You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

250 lines
6.9 KiB

  1. # Copyright (c) 2013, Web Notes Technologies Pvt. Ltd. and Contributors
  2. # MIT License. See license.txt
  3. from __future__ import unicode_literals
  4. import time
  5. import poplib
  6. import webnotes
  7. from webnotes.utils import extract_email_id, convert_utc_to_user_timezone, now, cint
  8. from webnotes.utils.scheduler import log
  9. import time
  10. class EmailSizeExceededError(webnotes.ValidationError): pass
  11. class TotalSizeExceededError(webnotes.ValidationError): pass
  12. class TotalTimeExceededError(webnotes.ValidationError): pass
  13. class IncomingMail:
  14. """
  15. Single incoming email object. Extracts, text / html and attachments from the email
  16. """
  17. def __init__(self, content):
  18. import email, email.utils
  19. import datetime
  20. self.mail = email.message_from_string(content)
  21. self.text_content = ''
  22. self.html_content = ''
  23. self.attachments = []
  24. self.parse()
  25. self.set_content_and_type()
  26. self.set_subject()
  27. self.from_email = extract_email_id(self.mail["From"])
  28. self.from_real_name = email.utils.parseaddr(self.mail["From"])[0]
  29. if self.mail["Date"]:
  30. utc = email.utils.mktime_tz(email.utils.parsedate_tz(self.mail["Date"]))
  31. utc_dt = datetime.datetime.utcfromtimestamp(utc)
  32. self.date = convert_utc_to_user_timezone(utc_dt).strftime('%Y-%m-%d %H:%M:%S')
  33. else:
  34. self.date = now()
  35. def parse(self):
  36. for part in self.mail.walk():
  37. self.process_part(part)
  38. def set_subject(self):
  39. import email.header
  40. _subject = email.header.decode_header(self.mail.get("Subject", "No Subject"))
  41. self.subject = _subject[0][0] or ""
  42. if _subject[0][1]:
  43. self.subject = self.subject.decode(_subject[0][1])
  44. else:
  45. # assume that the encoding is utf-8
  46. self.subject = self.subject.decode("utf-8")
  47. def set_content_and_type(self):
  48. self.content, self.content_type = '[Blank Email]', 'text/plain'
  49. if self.text_content:
  50. self.content, self.content_type = self.text_content, 'text/plain'
  51. else:
  52. self.content, self.content_type = self.html_content, 'text/html'
  53. def process_part(self, part):
  54. content_type = part.get_content_type()
  55. charset = part.get_content_charset()
  56. if not charset: charset = self.get_charset(part)
  57. if content_type == 'text/plain':
  58. self.text_content += self.get_payload(part, charset)
  59. if content_type == 'text/html':
  60. self.html_content += self.get_payload(part, charset)
  61. if part.get_filename():
  62. self.get_attachment(part, charset)
  63. def get_text_content(self):
  64. return self.text_content or self.html_content
  65. def get_charset(self, part):
  66. charset = part.get_content_charset()
  67. if not charset:
  68. import chardet
  69. charset = chardet.detect(str(part))['encoding']
  70. return charset
  71. def get_payload(self, part, charset):
  72. try:
  73. return unicode(part.get_payload(decode=True),str(charset),"ignore")
  74. except LookupError:
  75. return part.get_payload()
  76. def get_attachment(self, part, charset):
  77. self.attachments.append({
  78. 'content-type': part.get_content_type(),
  79. 'filename': part.get_filename(),
  80. 'content': part.get_payload(decode=True),
  81. })
  82. def save_attachments_in_doc(self, doc):
  83. from webnotes.utils.file_manager import save_file, MaxFileSizeReachedError
  84. for attachment in self.attachments:
  85. try:
  86. fid = save_file(attachment['filename'], attachment['content'],
  87. doc.doctype, doc.name)
  88. except MaxFileSizeReachedError:
  89. # WARNING: bypass max file size exception
  90. pass
  91. except webnotes.DuplicateEntryError:
  92. # same file attached twice??
  93. pass
  94. def get_thread_id(self):
  95. import re
  96. l = re.findall('(?<=\[)[\w/-]+', self.subject)
  97. return l and l[0] or None
  98. class POP3Mailbox:
  99. def __init__(self, args=None):
  100. self.setup(args)
  101. self.get_messages()
  102. def setup(self, args=None):
  103. # overrride
  104. self.settings = args or webnotes._dict()
  105. def check_mails(self):
  106. # overrride
  107. return True
  108. def process_message(self, mail):
  109. # overrride
  110. pass
  111. def connect(self):
  112. if cint(self.settings.use_ssl):
  113. self.pop = Timed_POP3_SSL(self.settings.host, timeout=webnotes.conf.get("pop_timeout"))
  114. else:
  115. self.pop = Timed_POP3(self.settings.host, timeout=webnotes.conf.get("pop_timeout"))
  116. self.pop.user(self.settings.username)
  117. self.pop.pass_(self.settings.password)
  118. def get_messages(self):
  119. if not self.check_mails():
  120. return # nothing to do
  121. webnotes.conn.commit()
  122. self.connect()
  123. try:
  124. # track if errors arised
  125. self.errors = False
  126. pop_list = self.pop.list()[1]
  127. num = num_copy = len(pop_list)
  128. # WARNING: Hard coded max no. of messages to be popped
  129. if num > 20: num = 20
  130. # time limits
  131. self.start_time = time.time()
  132. self.max_email_time = cint(webnotes.local.conf.get("max_email_time"))
  133. # size limits
  134. self.total_size = 0
  135. self.max_email_size = cint(webnotes.local.conf.get("max_email_size"))
  136. self.max_total_size = 5 * self.max_email_size
  137. for i, pop_meta in enumerate(pop_list):
  138. # do not pull more than NUM emails
  139. if (i+1) > num:
  140. break
  141. try:
  142. self.retrieve_message(pop_meta, i+1)
  143. except (TotalSizeExceededError, TotalTimeExceededError):
  144. break
  145. # WARNING: Mark as read - message number 101 onwards from the pop list
  146. # This is to avoid having too many messages entering the system
  147. num = num_copy
  148. if num > 100 and not self.errors:
  149. for m in xrange(101, num+1):
  150. self.pop.dele(m)
  151. finally:
  152. # no matter the exception, pop should quit if connected
  153. self.pop.quit()
  154. def retrieve_message(self, pop_meta, msg_num):
  155. incoming_mail = None
  156. try:
  157. self.validate_pop(pop_meta)
  158. msg = self.pop.retr(msg_num)
  159. incoming_mail = IncomingMail(b'\n'.join(msg[1]))
  160. webnotes.conn.begin()
  161. self.process_message(incoming_mail)
  162. webnotes.conn.commit()
  163. except (TotalSizeExceededError, TotalTimeExceededError):
  164. # propagate this error to break the loop
  165. raise
  166. except:
  167. # log performs rollback and logs error in scheduler log
  168. log("receive.get_messages", self.make_error_msg(msg_num, incoming_mail))
  169. self.errors = True
  170. webnotes.conn.rollback()
  171. self.pop.dele(msg_num)
  172. else:
  173. self.pop.dele(msg_num)
  174. def validate_pop(self, pop_meta):
  175. # throttle based on time restriction
  176. if self.max_email_time and (time.time() - self.start_time) > self.max_email_time:
  177. raise TotalTimeExceededError
  178. # throttle based on email size
  179. if not self.max_email_size:
  180. return
  181. m, size = pop_meta.split()
  182. size = cint(size)
  183. if size < self.max_email_size:
  184. self.total_size += size
  185. if self.total_size > self.max_total_size:
  186. raise TotalSizeExceededError
  187. else:
  188. raise EmailSizeExceededError
  189. def make_error_msg(self, msg_num, incoming_mail):
  190. error_msg = "Error in retrieving email."
  191. if not incoming_mail:
  192. try:
  193. # retrieve headers
  194. incoming_mail = IncomingMail(b'\n'.join(self.pop.top(msg_num, 5)[1]))
  195. except:
  196. pass
  197. if incoming_mail:
  198. error_msg += "\nDate: {date}\nFrom: {from_email}\nSubject: {subject}\n".format(
  199. date=incoming_mail.date, from_email=incoming_mail.from_email, subject=incoming_mail.subject)
  200. return error_msg