Nie możesz wybrać więcej, niż 25 tematów Tematy muszą się zaczynać od litery lub cyfry, mogą zawierać myślniki ('-') i mogą mieć do 35 znaków.
 
 
 
 
 
 

269 wiersze
7.4 KiB

  1. # Copyright (c) 2013, Web Notes Technologies Pvt. Ltd. and Contributors
  2. # MIT License. See license.txt
  3. from __future__ import unicode_literals
  4. import time
  5. import poplib
  6. import webnotes
  7. from webnotes.utils import extract_email_id, convert_utc_to_user_timezone, now, cint
  8. from webnotes.utils.scheduler import log
  9. class EmailSizeExceededError(webnotes.ValidationError): pass
  10. class EmailTimeoutError(webnotes.ValidationError): pass
  11. class TotalSizeExceededError(webnotes.ValidationError): pass
  12. class IncomingMail:
  13. """
  14. Single incoming email object. Extracts, text / html and attachments from the email
  15. """
  16. def __init__(self, content):
  17. import email, email.utils
  18. import datetime
  19. self.mail = email.message_from_string(content)
  20. self.text_content = ''
  21. self.html_content = ''
  22. self.attachments = []
  23. self.parse()
  24. self.set_content_and_type()
  25. self.set_subject()
  26. self.from_email = extract_email_id(self.mail["From"])
  27. self.from_real_name = email.utils.parseaddr(self.mail["From"])[0]
  28. if self.mail["Date"]:
  29. utc = email.utils.mktime_tz(email.utils.parsedate_tz(self.mail["Date"]))
  30. utc_dt = datetime.datetime.utcfromtimestamp(utc)
  31. self.date = convert_utc_to_user_timezone(utc_dt).strftime('%Y-%m-%d %H:%M:%S')
  32. else:
  33. self.date = now()
  34. def parse(self):
  35. for part in self.mail.walk():
  36. self.process_part(part)
  37. def set_subject(self):
  38. import email.header
  39. _subject = email.header.decode_header(self.mail.get("Subject", "No Subject"))
  40. self.subject = _subject[0][0] or ""
  41. if _subject[0][1]:
  42. self.subject = self.subject.decode(_subject[0][1])
  43. else:
  44. # assume that the encoding is utf-8
  45. self.subject = self.subject.decode("utf-8")
  46. def set_content_and_type(self):
  47. self.content, self.content_type = '[Blank Email]', 'text/plain'
  48. if self.text_content:
  49. self.content, self.content_type = self.text_content, 'text/plain'
  50. else:
  51. self.content, self.content_type = self.html_content, 'text/html'
  52. def process_part(self, part):
  53. content_type = part.get_content_type()
  54. charset = part.get_content_charset()
  55. if not charset: charset = self.get_charset(part)
  56. if content_type == 'text/plain':
  57. self.text_content += self.get_payload(part, charset)
  58. if content_type == 'text/html':
  59. self.html_content += self.get_payload(part, charset)
  60. if part.get_filename():
  61. self.get_attachment(part, charset)
  62. def get_text_content(self):
  63. return self.text_content or self.html_content
  64. def get_charset(self, part):
  65. charset = part.get_content_charset()
  66. if not charset:
  67. import chardet
  68. charset = chardet.detect(str(part))['encoding']
  69. return charset
  70. def get_payload(self, part, charset):
  71. try:
  72. return unicode(part.get_payload(decode=True),str(charset),"ignore")
  73. except LookupError:
  74. return part.get_payload()
  75. def get_attachment(self, part, charset):
  76. self.attachments.append({
  77. 'content-type': part.get_content_type(),
  78. 'filename': part.get_filename(),
  79. 'content': part.get_payload(decode=True),
  80. })
  81. def save_attachments_in_doc(self, doc):
  82. from webnotes.utils.file_manager import save_file, MaxFileSizeReachedError
  83. for attachment in self.attachments:
  84. try:
  85. fid = save_file(attachment['filename'], attachment['content'],
  86. doc.doctype, doc.name)
  87. except MaxFileSizeReachedError:
  88. # WARNING: bypass max file size exception
  89. pass
  90. except webnotes.DuplicateEntryError:
  91. # same file attached twice??
  92. pass
  93. def get_thread_id(self):
  94. import re
  95. l = re.findall('(?<=\[)[\w/-]+', self.subject)
  96. return l and l[0] or None
  97. class POP3Mailbox:
  98. def __init__(self, args=None):
  99. self.setup(args)
  100. self.get_messages()
  101. def setup(self, args=None):
  102. # overrride
  103. self.settings = args or webnotes._dict()
  104. def check_mails(self):
  105. # overrride
  106. return True
  107. def process_message(self, mail):
  108. # overrride
  109. pass
  110. def connect(self):
  111. if cint(self.settings.use_ssl):
  112. self.pop = Timed_POP3_SSL(self.settings.host, timeout=webnotes.conf.get("pop_timeout"))
  113. else:
  114. self.pop = Timed_POP3(self.settings.host, timeout=webnotes.conf.get("pop_timeout"))
  115. self.pop.user(self.settings.username)
  116. self.pop.pass_(self.settings.password)
  117. def get_messages(self):
  118. if not self.check_mails():
  119. return # nothing to do
  120. webnotes.conn.commit()
  121. self.connect()
  122. try:
  123. # track if errors arised
  124. self.errors = False
  125. pop_list = self.pop.list()[1]
  126. num = num_copy = len(pop_list)
  127. # WARNING: Hard coded max no. of messages to be popped
  128. if num > 20: num = 20
  129. # size limits
  130. self.total_size = 0
  131. self.max_email_size = cint(webnotes.local.conf.get("max_email_size"))
  132. self.max_total_size = 5 * self.max_email_size
  133. for i, pop_meta in enumerate(pop_list):
  134. # do not pull more than NUM emails
  135. if (i+1) > num:
  136. break
  137. try:
  138. self.retrieve_message(pop_meta, i+1)
  139. except (TotalSizeExceededError, EmailTimeoutError):
  140. break
  141. # WARNING: Mark as read - message number 101 onwards from the pop list
  142. # This is to avoid having too many messages entering the system
  143. num = num_copy
  144. if num > 100 and not self.errors:
  145. for m in xrange(101, num+1):
  146. self.pop.dele(m)
  147. finally:
  148. # no matter the exception, pop should quit if connected
  149. self.pop.quit()
  150. def retrieve_message(self, pop_meta, msg_num):
  151. incoming_mail = None
  152. try:
  153. self.validate_pop(pop_meta)
  154. msg = self.pop.retr(msg_num)
  155. incoming_mail = IncomingMail(b'\n'.join(msg[1]))
  156. webnotes.conn.begin()
  157. self.process_message(incoming_mail)
  158. webnotes.conn.commit()
  159. except (TotalSizeExceededError, EmailTimeoutError):
  160. # propagate this error to break the loop
  161. raise
  162. except:
  163. # log performs rollback and logs error in scheduler log
  164. log("receive.get_messages", self.make_error_msg(msg_num, incoming_mail))
  165. self.errors = True
  166. webnotes.conn.rollback()
  167. self.pop.dele(msg_num)
  168. else:
  169. self.pop.dele(msg_num)
  170. def validate_pop(self, pop_meta):
  171. # throttle based on email size
  172. if not self.max_email_size:
  173. return
  174. m, size = pop_meta.split()
  175. size = cint(size)
  176. if size < self.max_email_size:
  177. self.total_size += size
  178. if self.total_size > self.max_total_size:
  179. raise TotalSizeExceededError
  180. else:
  181. raise EmailSizeExceededError
  182. def make_error_msg(self, msg_num, incoming_mail):
  183. error_msg = "Error in retrieving email."
  184. if not incoming_mail:
  185. try:
  186. # retrieve headers
  187. incoming_mail = IncomingMail(b'\n'.join(self.pop.top(msg_num, 5)[1]))
  188. except:
  189. pass
  190. if incoming_mail:
  191. error_msg += "\nDate: {date}\nFrom: {from_email}\nSubject: {subject}\n".format(
  192. date=incoming_mail.date, from_email=incoming_mail.from_email, subject=incoming_mail.subject)
  193. return error_msg
  194. class TimerMixin(object):
  195. def __init__(self, *args, **kwargs):
  196. self.timeout = kwargs.pop('timeout', 0.0)
  197. self.elapsed_time = 0.0
  198. self._super.__init__(self, *args, **kwargs)
  199. if self.timeout:
  200. # set per operation timeout to one-fifth of total pop timeout
  201. self.sock.settimeout(self.timeout / 5.0)
  202. def _getline(self, *args, **kwargs):
  203. start_time = time.time()
  204. ret = self._super._getline(self, *args, **kwargs)
  205. self.elapsed_time += time.time() - start_time
  206. if self.timeout and self.elapsed_time > self.timeout:
  207. raise EmailTimeoutError
  208. return ret
  209. def quit(self, *args, **kwargs):
  210. self.elapsed_time = 0.0
  211. return self._super.quit(self, *args, **kwargs)
  212. class Timed_POP3(TimerMixin, poplib.POP3):
  213. _super = poplib.POP3
  214. class Timed_POP3_SSL(TimerMixin, poplib.POP3_SSL):
  215. _super = poplib.POP3_SSL