You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

пре 11 година
пре 13 година
пре 12 година
пре 12 година
пре 13 година
пре 12 година
пре 12 година
пре 12 година
пре 12 година
пре 12 година
пре 12 година
пре 12 година
пре 12 година
пре 12 година
пре 12 година
пре 12 година
пре 14 година
пре 13 година
пре 14 година
пре 13 година
пре 12 година
пре 12 година
пре 12 година
пре 12 година
пре 12 година
пре 12 година
пре 12 година
пре 12 година
пре 12 година
пре 12 година
пре 12 година
пре 13 година
пре 11 година
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271
  1. # Copyright (c) 2013, Web Notes Technologies Pvt. Ltd. and Contributors
  2. # MIT License. See license.txt
  3. from __future__ import unicode_literals
  4. import time
  5. import poplib
  6. import webnotes
  7. from webnotes.utils import extract_email_id, convert_utc_to_user_timezone, now, cint
  8. from webnotes.utils.scheduler import log
  9. class EmailSizeExceededError(webnotes.ValidationError): pass
  10. class EmailTimeoutError(webnotes.ValidationError): pass
  11. class TotalSizeExceededError(webnotes.ValidationError): pass
  12. class IncomingMail:
  13. """
  14. Single incoming email object. Extracts, text / html and attachments from the email
  15. """
  16. def __init__(self, content):
  17. import email, email.utils
  18. import datetime
  19. self.mail = email.message_from_string(content)
  20. self.text_content = ''
  21. self.html_content = ''
  22. self.attachments = []
  23. self.parse()
  24. self.set_content_and_type()
  25. self.set_subject()
  26. self.from_email = extract_email_id(self.mail["From"])
  27. self.from_real_name = email.utils.parseaddr(self.mail["From"])[0]
  28. if self.mail["Date"]:
  29. utc = email.utils.mktime_tz(email.utils.parsedate_tz(self.mail["Date"]))
  30. utc_dt = datetime.datetime.utcfromtimestamp(utc)
  31. self.date = convert_utc_to_user_timezone(utc_dt).strftime('%Y-%m-%d %H:%M:%S')
  32. else:
  33. self.date = now()
  34. def parse(self):
  35. for part in self.mail.walk():
  36. self.process_part(part)
  37. def set_subject(self):
  38. import email.header
  39. _subject = email.header.decode_header(self.mail.get("Subject", "No Subject"))
  40. self.subject = _subject[0][0] or ""
  41. if _subject[0][1]:
  42. self.subject = self.subject.decode(_subject[0][1])
  43. else:
  44. # assume that the encoding is utf-8
  45. self.subject = self.subject.decode("utf-8")
  46. if not self.subject:
  47. self.subject = "No Subject"
  48. def set_content_and_type(self):
  49. self.content, self.content_type = '[Blank Email]', 'text/plain'
  50. if self.text_content:
  51. self.content, self.content_type = self.text_content, 'text/plain'
  52. else:
  53. self.content, self.content_type = self.html_content, 'text/html'
  54. def process_part(self, part):
  55. content_type = part.get_content_type()
  56. charset = part.get_content_charset()
  57. if not charset: charset = self.get_charset(part)
  58. if content_type == 'text/plain':
  59. self.text_content += self.get_payload(part, charset)
  60. if content_type == 'text/html':
  61. self.html_content += self.get_payload(part, charset)
  62. if part.get_filename():
  63. self.get_attachment(part, charset)
  64. def get_text_content(self):
  65. return self.text_content or self.html_content
  66. def get_charset(self, part):
  67. charset = part.get_content_charset()
  68. if not charset:
  69. import chardet
  70. charset = chardet.detect(str(part))['encoding']
  71. return charset
  72. def get_payload(self, part, charset):
  73. try:
  74. return unicode(part.get_payload(decode=True),str(charset),"ignore")
  75. except LookupError:
  76. return part.get_payload()
  77. def get_attachment(self, part, charset):
  78. self.attachments.append({
  79. 'content-type': part.get_content_type(),
  80. 'filename': part.get_filename(),
  81. 'content': part.get_payload(decode=True),
  82. })
  83. def save_attachments_in_doc(self, doc):
  84. from webnotes.utils.file_manager import save_file, MaxFileSizeReachedError
  85. for attachment in self.attachments:
  86. try:
  87. fid = save_file(attachment['filename'], attachment['content'],
  88. doc.doctype, doc.name)
  89. except MaxFileSizeReachedError:
  90. # WARNING: bypass max file size exception
  91. pass
  92. except webnotes.DuplicateEntryError:
  93. # same file attached twice??
  94. pass
  95. def get_thread_id(self):
  96. import re
  97. l = re.findall('(?<=\[)[\w/-]+', self.subject)
  98. return l and l[0] or None
  99. class POP3Mailbox:
  100. def __init__(self, args=None):
  101. self.setup(args)
  102. self.get_messages()
  103. def setup(self, args=None):
  104. # overrride
  105. self.settings = args or webnotes._dict()
  106. def check_mails(self):
  107. # overrride
  108. return True
  109. def process_message(self, mail):
  110. # overrride
  111. pass
  112. def connect(self):
  113. if cint(self.settings.use_ssl):
  114. self.pop = Timed_POP3_SSL(self.settings.host, timeout=webnotes.conf.get("pop_timeout"))
  115. else:
  116. self.pop = Timed_POP3(self.settings.host, timeout=webnotes.conf.get("pop_timeout"))
  117. self.pop.user(self.settings.username)
  118. self.pop.pass_(self.settings.password)
  119. def get_messages(self):
  120. if not self.check_mails():
  121. return # nothing to do
  122. webnotes.conn.commit()
  123. self.connect()
  124. try:
  125. # track if errors arised
  126. self.errors = False
  127. pop_list = self.pop.list()[1]
  128. num = num_copy = len(pop_list)
  129. # WARNING: Hard coded max no. of messages to be popped
  130. if num > 20: num = 20
  131. # size limits
  132. self.total_size = 0
  133. self.max_email_size = cint(webnotes.local.conf.get("max_email_size"))
  134. self.max_total_size = 5 * self.max_email_size
  135. for i, pop_meta in enumerate(pop_list):
  136. # do not pull more than NUM emails
  137. if (i+1) > num:
  138. break
  139. try:
  140. self.retrieve_message(pop_meta, i+1)
  141. except (TotalSizeExceededError, EmailTimeoutError):
  142. break
  143. # WARNING: Mark as read - message number 101 onwards from the pop list
  144. # This is to avoid having too many messages entering the system
  145. num = num_copy
  146. if num > 100 and not self.errors:
  147. for m in xrange(101, num+1):
  148. self.pop.dele(m)
  149. finally:
  150. # no matter the exception, pop should quit if connected
  151. self.pop.quit()
  152. def retrieve_message(self, pop_meta, msg_num):
  153. incoming_mail = None
  154. try:
  155. self.validate_pop(pop_meta)
  156. msg = self.pop.retr(msg_num)
  157. incoming_mail = IncomingMail(b'\n'.join(msg[1]))
  158. webnotes.conn.begin()
  159. self.process_message(incoming_mail)
  160. webnotes.conn.commit()
  161. except (TotalSizeExceededError, EmailTimeoutError):
  162. # propagate this error to break the loop
  163. raise
  164. except:
  165. # log performs rollback and logs error in scheduler log
  166. log("receive.get_messages", self.make_error_msg(msg_num, incoming_mail))
  167. self.errors = True
  168. webnotes.conn.rollback()
  169. self.pop.dele(msg_num)
  170. else:
  171. self.pop.dele(msg_num)
  172. def validate_pop(self, pop_meta):
  173. # throttle based on email size
  174. if not self.max_email_size:
  175. return
  176. m, size = pop_meta.split()
  177. size = cint(size)
  178. if size < self.max_email_size:
  179. self.total_size += size
  180. if self.total_size > self.max_total_size:
  181. raise TotalSizeExceededError
  182. else:
  183. raise EmailSizeExceededError
  184. def make_error_msg(self, msg_num, incoming_mail):
  185. error_msg = "Error in retrieving email."
  186. if not incoming_mail:
  187. try:
  188. # retrieve headers
  189. incoming_mail = IncomingMail(b'\n'.join(self.pop.top(msg_num, 5)[1]))
  190. except:
  191. pass
  192. if incoming_mail:
  193. error_msg += "\nDate: {date}\nFrom: {from_email}\nSubject: {subject}\n".format(
  194. date=incoming_mail.date, from_email=incoming_mail.from_email, subject=incoming_mail.subject)
  195. return error_msg
  196. class TimerMixin(object):
  197. def __init__(self, *args, **kwargs):
  198. self.timeout = kwargs.pop('timeout', 0.0)
  199. self.elapsed_time = 0.0
  200. self._super.__init__(self, *args, **kwargs)
  201. if self.timeout:
  202. # set per operation timeout to one-fifth of total pop timeout
  203. self.sock.settimeout(self.timeout / 5.0)
  204. def _getline(self, *args, **kwargs):
  205. start_time = time.time()
  206. ret = self._super._getline(self, *args, **kwargs)
  207. self.elapsed_time += time.time() - start_time
  208. if self.timeout and self.elapsed_time > self.timeout:
  209. raise EmailTimeoutError
  210. return ret
  211. def quit(self, *args, **kwargs):
  212. self.elapsed_time = 0.0
  213. return self._super.quit(self, *args, **kwargs)
  214. class Timed_POP3(TimerMixin, poplib.POP3):
  215. _super = poplib.POP3
  216. class Timed_POP3_SSL(TimerMixin, poplib.POP3_SSL):
  217. _super = poplib.POP3_SSL