# Copyright (c) 2013, Web Notes Technologies Pvt. Ltd. and Contributors # MIT License. See license.txt from __future__ import unicode_literals import time import poplib import webnotes from webnotes.utils import extract_email_id, convert_utc_to_user_timezone, now, cint from webnotes.utils.scheduler import log class EmailSizeExceededError(webnotes.ValidationError): pass class EmailTimeoutError(webnotes.ValidationError): pass class TotalSizeExceededError(webnotes.ValidationError): pass class IncomingMail: """ Single incoming email object. Extracts, text / html and attachments from the email """ def __init__(self, content): import email, email.utils import datetime self.mail = email.message_from_string(content) self.text_content = '' self.html_content = '' self.attachments = [] self.parse() self.set_content_and_type() self.set_subject() self.from_email = extract_email_id(self.mail["From"]) self.from_real_name = email.utils.parseaddr(self.mail["From"])[0] if self.mail["Date"]: utc = email.utils.mktime_tz(email.utils.parsedate_tz(self.mail["Date"])) utc_dt = datetime.datetime.utcfromtimestamp(utc) self.date = convert_utc_to_user_timezone(utc_dt).strftime('%Y-%m-%d %H:%M:%S') else: self.date = now() def parse(self): for part in self.mail.walk(): self.process_part(part) def set_subject(self): import email.header _subject = email.header.decode_header(self.mail.get("Subject", "No Subject")) self.subject = _subject[0][0] or "" if _subject[0][1]: self.subject = self.subject.decode(_subject[0][1]) else: # assume that the encoding is utf-8 self.subject = self.subject.decode("utf-8") if not self.subject: self.subject = "No Subject" def set_content_and_type(self): self.content, self.content_type = '[Blank Email]', 'text/plain' if self.text_content: self.content, self.content_type = self.text_content, 'text/plain' else: self.content, self.content_type = self.html_content, 'text/html' def process_part(self, part): content_type = part.get_content_type() charset = part.get_content_charset() if not charset: charset = self.get_charset(part) if content_type == 'text/plain': self.text_content += self.get_payload(part, charset) if content_type == 'text/html': self.html_content += self.get_payload(part, charset) if part.get_filename(): self.get_attachment(part, charset) def get_text_content(self): return self.text_content or self.html_content def get_charset(self, part): charset = part.get_content_charset() if not charset: import chardet charset = chardet.detect(str(part))['encoding'] return charset def get_payload(self, part, charset): try: return unicode(part.get_payload(decode=True),str(charset),"ignore") except LookupError: return part.get_payload() def get_attachment(self, part, charset): self.attachments.append({ 'content-type': part.get_content_type(), 'filename': part.get_filename(), 'content': part.get_payload(decode=True), }) def save_attachments_in_doc(self, doc): from webnotes.utils.file_manager import save_file, MaxFileSizeReachedError for attachment in self.attachments: try: fid = save_file(attachment['filename'], attachment['content'], doc.doctype, doc.name) except MaxFileSizeReachedError: # WARNING: bypass max file size exception pass except webnotes.DuplicateEntryError: # same file attached twice?? pass def get_thread_id(self): import re l = re.findall('(?<=\[)[\w/-]+', self.subject) return l and l[0] or None class POP3Mailbox: def __init__(self, args=None): self.setup(args) self.get_messages() def setup(self, args=None): # overrride self.settings = args or webnotes._dict() def check_mails(self): # overrride return True def process_message(self, mail): # overrride pass def connect(self): if cint(self.settings.use_ssl): self.pop = Timed_POP3_SSL(self.settings.host, timeout=webnotes.conf.get("pop_timeout")) else: self.pop = Timed_POP3(self.settings.host, timeout=webnotes.conf.get("pop_timeout")) self.pop.user(self.settings.username) self.pop.pass_(self.settings.password) def get_messages(self): if not self.check_mails(): return # nothing to do webnotes.conn.commit() self.connect() try: # track if errors arised self.errors = False pop_list = self.pop.list()[1] num = num_copy = len(pop_list) # WARNING: Hard coded max no. of messages to be popped if num > 20: num = 20 # size limits self.total_size = 0 self.max_email_size = cint(webnotes.local.conf.get("max_email_size")) self.max_total_size = 5 * self.max_email_size for i, pop_meta in enumerate(pop_list): # do not pull more than NUM emails if (i+1) > num: break try: self.retrieve_message(pop_meta, i+1) except (TotalSizeExceededError, EmailTimeoutError): break # WARNING: Mark as read - message number 101 onwards from the pop list # This is to avoid having too many messages entering the system num = num_copy if num > 100 and not self.errors: for m in xrange(101, num+1): self.pop.dele(m) finally: # no matter the exception, pop should quit if connected self.pop.quit() def retrieve_message(self, pop_meta, msg_num): incoming_mail = None try: self.validate_pop(pop_meta) msg = self.pop.retr(msg_num) incoming_mail = IncomingMail(b'\n'.join(msg[1])) webnotes.conn.begin() self.process_message(incoming_mail) webnotes.conn.commit() except (TotalSizeExceededError, EmailTimeoutError): # propagate this error to break the loop raise except: # log performs rollback and logs error in scheduler log log("receive.get_messages", self.make_error_msg(msg_num, incoming_mail)) self.errors = True webnotes.conn.rollback() self.pop.dele(msg_num) else: self.pop.dele(msg_num) def validate_pop(self, pop_meta): # throttle based on email size if not self.max_email_size: return m, size = pop_meta.split() size = cint(size) if size < self.max_email_size: self.total_size += size if self.total_size > self.max_total_size: raise TotalSizeExceededError else: raise EmailSizeExceededError def make_error_msg(self, msg_num, incoming_mail): error_msg = "Error in retrieving email." if not incoming_mail: try: # retrieve headers incoming_mail = IncomingMail(b'\n'.join(self.pop.top(msg_num, 5)[1])) except: pass if incoming_mail: error_msg += "\nDate: {date}\nFrom: {from_email}\nSubject: {subject}\n".format( date=incoming_mail.date, from_email=incoming_mail.from_email, subject=incoming_mail.subject) return error_msg class TimerMixin(object): def __init__(self, *args, **kwargs): self.timeout = kwargs.pop('timeout', 0.0) self.elapsed_time = 0.0 self._super.__init__(self, *args, **kwargs) if self.timeout: # set per operation timeout to one-fifth of total pop timeout self.sock.settimeout(self.timeout / 5.0) def _getline(self, *args, **kwargs): start_time = time.time() ret = self._super._getline(self, *args, **kwargs) self.elapsed_time += time.time() - start_time if self.timeout and self.elapsed_time > self.timeout: raise EmailTimeoutError return ret def quit(self, *args, **kwargs): self.elapsed_time = 0.0 return self._super.quit(self, *args, **kwargs) class Timed_POP3(TimerMixin, poplib.POP3): _super = poplib.POP3 class Timed_POP3_SSL(TimerMixin, poplib.POP3_SSL): _super = poplib.POP3_SSL