Du kan inte välja fler än 25 ämnen Ämnen måste starta med en bokstav eller siffra, kan innehålla bindestreck ('-') och vara max 35 tecken långa.
 
 
 
 
 
 

600 rader
17 KiB

  1. # Copyright (c) 2015, Frappe Technologies Pvt. Ltd. and Contributors
  2. # MIT License. See license.txt
  3. import datetime
  4. import email
  5. import email.utils
  6. import imaplib
  7. import poplib
  8. import re
  9. import time
  10. from email.header import decode_header
  11. import _socket
  12. import chardet
  13. import six
  14. from email_reply_parser import EmailReplyParser
  15. import frappe
  16. from frappe import _, safe_decode, safe_encode
  17. from frappe.core.doctype.file.file import (MaxFileSizeReachedError,
  18. get_random_filename)
  19. from frappe.utils import (cint, convert_utc_to_user_timezone, cstr,
  20. extract_email_id, markdown, now, parse_addr, strip)
  21. class EmailSizeExceededError(frappe.ValidationError): pass
  22. class EmailTimeoutError(frappe.ValidationError): pass
  23. class TotalSizeExceededError(frappe.ValidationError): pass
  24. class LoginLimitExceeded(frappe.ValidationError): pass
  25. class EmailServer:
  26. """Wrapper for POP server to pull emails."""
  27. def __init__(self, args=None):
  28. self.setup(args)
  29. def setup(self, args=None):
  30. # overrride
  31. self.settings = args or frappe._dict()
  32. def check_mails(self):
  33. # overrride
  34. return True
  35. def process_message(self, mail):
  36. # overrride
  37. pass
  38. def connect(self):
  39. """Connect to **Email Account**."""
  40. if cint(self.settings.use_imap):
  41. return self.connect_imap()
  42. else:
  43. return self.connect_pop()
  44. def connect_imap(self):
  45. """Connect to IMAP"""
  46. try:
  47. if cint(self.settings.use_ssl):
  48. self.imap = Timed_IMAP4_SSL(self.settings.host, self.settings.incoming_port, timeout=frappe.conf.get("pop_timeout"))
  49. else:
  50. self.imap = Timed_IMAP4(self.settings.host, self.settings.incoming_port, timeout=frappe.conf.get("pop_timeout"))
  51. self.imap.login(self.settings.username, self.settings.password)
  52. # connection established!
  53. return True
  54. except _socket.error:
  55. # Invalid mail server -- due to refusing connection
  56. frappe.msgprint(_('Invalid Mail Server. Please rectify and try again.'))
  57. raise
  58. def connect_pop(self):
  59. #this method return pop connection
  60. try:
  61. if cint(self.settings.use_ssl):
  62. self.pop = Timed_POP3_SSL(self.settings.host, self.settings.incoming_port, timeout=frappe.conf.get("pop_timeout"))
  63. else:
  64. self.pop = Timed_POP3(self.settings.host, self.settings.incoming_port, timeout=frappe.conf.get("pop_timeout"))
  65. self.pop.user(self.settings.username)
  66. self.pop.pass_(self.settings.password)
  67. # connection established!
  68. return True
  69. except _socket.error:
  70. # log performs rollback and logs error in Error Log
  71. frappe.log_error("receive.connect_pop")
  72. # Invalid mail server -- due to refusing connection
  73. frappe.msgprint(_('Invalid Mail Server. Please rectify and try again.'))
  74. raise
  75. except poplib.error_proto as e:
  76. if self.is_temporary_system_problem(e):
  77. return False
  78. else:
  79. frappe.msgprint(_('Invalid User Name or Support Password. Please rectify and try again.'))
  80. raise
  81. def get_messages(self):
  82. """Returns new email messages in a list."""
  83. if not self.check_mails():
  84. return # nothing to do
  85. frappe.db.commit()
  86. if not self.connect():
  87. return
  88. uid_list = []
  89. try:
  90. # track if errors arised
  91. self.errors = False
  92. self.latest_messages = []
  93. self.seen_status = {}
  94. self.uid_reindexed = False
  95. uid_list = email_list = self.get_new_mails()
  96. if not email_list:
  97. return
  98. num = num_copy = len(email_list)
  99. # WARNING: Hard coded max no. of messages to be popped
  100. if num > 50: num = 50
  101. # size limits
  102. self.total_size = 0
  103. self.max_email_size = cint(frappe.local.conf.get("max_email_size"))
  104. self.max_total_size = 5 * self.max_email_size
  105. for i, message_meta in enumerate(email_list):
  106. # do not pull more than NUM emails
  107. if (i+1) > num:
  108. break
  109. try:
  110. self.retrieve_message(message_meta, i+1)
  111. except (TotalSizeExceededError, EmailTimeoutError, LoginLimitExceeded):
  112. break
  113. # WARNING: Mark as read - message number 101 onwards from the pop list
  114. # This is to avoid having too many messages entering the system
  115. num = num_copy
  116. if not cint(self.settings.use_imap):
  117. if num > 100 and not self.errors:
  118. for m in range(101, num+1):
  119. self.pop.dele(m)
  120. except Exception as e:
  121. if self.has_login_limit_exceeded(e):
  122. pass
  123. else:
  124. raise
  125. finally:
  126. # no matter the exception, pop should quit if connected
  127. if cint(self.settings.use_imap):
  128. self.imap.logout()
  129. else:
  130. self.pop.quit()
  131. out = { "latest_messages": self.latest_messages }
  132. if self.settings.use_imap:
  133. out.update({
  134. "uid_list": uid_list,
  135. "seen_status": self.seen_status,
  136. "uid_reindexed": self.uid_reindexed
  137. })
  138. return out
  139. def get_new_mails(self):
  140. """Return list of new mails"""
  141. if cint(self.settings.use_imap):
  142. email_list = []
  143. self.check_imap_uidvalidity()
  144. readonly = False if self.settings.email_sync_rule == "UNSEEN" else True
  145. self.imap.select("Inbox", readonly=readonly)
  146. response, message = self.imap.uid('search', None, self.settings.email_sync_rule)
  147. if message[0]:
  148. email_list = message[0].split()
  149. else:
  150. email_list = self.pop.list()[1]
  151. return email_list
  152. def check_imap_uidvalidity(self):
  153. # compare the UIDVALIDITY of email account and imap server
  154. uid_validity = self.settings.uid_validity
  155. response, message = self.imap.status("Inbox", "(UIDVALIDITY UIDNEXT)")
  156. current_uid_validity = self.parse_imap_response("UIDVALIDITY", message[0]) or 0
  157. uidnext = int(self.parse_imap_response("UIDNEXT", message[0]) or "1")
  158. frappe.db.set_value("Email Account", self.settings.email_account, "uidnext", uidnext)
  159. if not uid_validity or uid_validity != current_uid_validity:
  160. # uidvalidity changed & all email uids are reindexed by server
  161. frappe.db.sql(
  162. """update `tabCommunication` set uid=-1 where communication_medium='Email'
  163. and email_account=%s""", (self.settings.email_account,)
  164. )
  165. frappe.db.sql(
  166. """update `tabEmail Account` set uidvalidity=%s, uidnext=%s where
  167. name=%s""", (current_uid_validity, uidnext, self.settings.email_account)
  168. )
  169. # uid validity not found pulling emails for first time
  170. if not uid_validity:
  171. self.settings.email_sync_rule = "UNSEEN"
  172. return
  173. sync_count = 100 if uid_validity else int(self.settings.initial_sync_count)
  174. from_uid = 1 if uidnext < (sync_count + 1) or (uidnext - sync_count) < 1 else uidnext - sync_count
  175. # sync last 100 email
  176. self.settings.email_sync_rule = "UID {}:{}".format(from_uid, uidnext)
  177. self.uid_reindexed = True
  178. elif uid_validity == current_uid_validity:
  179. return
  180. def parse_imap_response(self, cmd, response):
  181. pattern = r"(?<={cmd} )[0-9]*".format(cmd=cmd)
  182. match = re.search(pattern, response.decode('utf-8'), re.U | re.I)
  183. if match:
  184. return match.group(0)
  185. else:
  186. return None
  187. def retrieve_message(self, message_meta, msg_num=None):
  188. incoming_mail = None
  189. try:
  190. self.validate_message_limits(message_meta)
  191. if cint(self.settings.use_imap):
  192. status, message = self.imap.uid('fetch', message_meta, '(BODY.PEEK[] BODY.PEEK[HEADER] FLAGS)')
  193. raw = message[0]
  194. self.get_email_seen_status(message_meta, raw[0])
  195. self.latest_messages.append(raw[1])
  196. else:
  197. msg = self.pop.retr(msg_num)
  198. self.latest_messages.append(b'\n'.join(msg[1]))
  199. except (TotalSizeExceededError, EmailTimeoutError):
  200. # propagate this error to break the loop
  201. self.errors = True
  202. raise
  203. except Exception as e:
  204. if self.has_login_limit_exceeded(e):
  205. self.errors = True
  206. raise LoginLimitExceeded(e)
  207. else:
  208. # log performs rollback and logs error in Error Log
  209. frappe.log_error("receive.get_messages", self.make_error_msg(msg_num, incoming_mail))
  210. self.errors = True
  211. frappe.db.rollback()
  212. if not cint(self.settings.use_imap):
  213. self.pop.dele(msg_num)
  214. else:
  215. # mark as seen if email sync rule is UNSEEN (syncing only unseen mails)
  216. if self.settings.email_sync_rule == "UNSEEN":
  217. self.imap.uid('STORE', message_meta, '+FLAGS', '(\\SEEN)')
  218. else:
  219. if not cint(self.settings.use_imap):
  220. self.pop.dele(msg_num)
  221. else:
  222. # mark as seen if email sync rule is UNSEEN (syncing only unseen mails)
  223. if self.settings.email_sync_rule == "UNSEEN":
  224. self.imap.uid('STORE', message_meta, '+FLAGS', '(\\SEEN)')
  225. def get_email_seen_status(self, uid, flag_string):
  226. """ parse the email FLAGS response """
  227. if not flag_string:
  228. return None
  229. flags = []
  230. for flag in imaplib.ParseFlags(flag_string) or []:
  231. pattern = re.compile(r"\w+")
  232. match = re.search(pattern, frappe.as_unicode(flag))
  233. flags.append(match.group(0))
  234. if "Seen" in flags:
  235. self.seen_status.update({ uid: "SEEN" })
  236. else:
  237. self.seen_status.update({ uid: "UNSEEN" })
  238. def has_login_limit_exceeded(self, e):
  239. return "-ERR Exceeded the login limit" in strip(cstr(e.message))
  240. def is_temporary_system_problem(self, e):
  241. messages = (
  242. "-ERR [SYS/TEMP] Temporary system problem. Please try again later.",
  243. "Connection timed out",
  244. )
  245. for message in messages:
  246. if message in strip(cstr(e)) or message in strip(cstr(getattr(e, 'strerror', ''))):
  247. return True
  248. return False
  249. def validate_message_limits(self, message_meta):
  250. # throttle based on email size
  251. if not self.max_email_size:
  252. return
  253. m, size = message_meta.split()
  254. size = cint(size)
  255. if size < self.max_email_size:
  256. self.total_size += size
  257. if self.total_size > self.max_total_size:
  258. raise TotalSizeExceededError
  259. else:
  260. raise EmailSizeExceededError
  261. def make_error_msg(self, msg_num, incoming_mail):
  262. error_msg = "Error in retrieving email."
  263. if not incoming_mail:
  264. try:
  265. # retrieve headers
  266. incoming_mail = Email(b'\n'.join(self.pop.top(msg_num, 5)[1]))
  267. except:
  268. pass
  269. if incoming_mail:
  270. error_msg += "\nDate: {date}\nFrom: {from_email}\nSubject: {subject}\n".format(
  271. date=incoming_mail.date, from_email=incoming_mail.from_email, subject=incoming_mail.subject)
  272. return error_msg
  273. def update_flag(self, uid_list={}):
  274. """ set all uids mails the flag as seen """
  275. if not uid_list:
  276. return
  277. if not self.connect():
  278. return
  279. self.imap.select("Inbox")
  280. for uid, operation in uid_list.items():
  281. if not uid: continue
  282. op = "+FLAGS" if operation == "Read" else "-FLAGS"
  283. try:
  284. self.imap.uid('STORE', uid, op, '(\\SEEN)')
  285. except Exception:
  286. continue
  287. class Email:
  288. """Wrapper for an email."""
  289. def __init__(self, content):
  290. """Parses headers, content, attachments from given raw message.
  291. :param content: Raw message."""
  292. if six.PY2:
  293. self.mail = email.message_from_string(safe_encode(content))
  294. else:
  295. if isinstance(content, bytes):
  296. self.mail = email.message_from_bytes(content)
  297. else:
  298. self.mail = email.message_from_string(content)
  299. self.text_content = ''
  300. self.html_content = ''
  301. self.attachments = []
  302. self.cid_map = {}
  303. self.parse()
  304. self.set_content_and_type()
  305. self.set_subject()
  306. self.set_from()
  307. self.message_id = (self.mail.get('Message-ID') or "").strip(" <>")
  308. if self.mail["Date"]:
  309. try:
  310. utc = email.utils.mktime_tz(email.utils.parsedate_tz(self.mail["Date"]))
  311. utc_dt = datetime.datetime.utcfromtimestamp(utc)
  312. self.date = convert_utc_to_user_timezone(utc_dt).strftime('%Y-%m-%d %H:%M:%S')
  313. except:
  314. self.date = now()
  315. else:
  316. self.date = now()
  317. if self.date > now():
  318. self.date = now()
  319. def parse(self):
  320. """Walk and process multi-part email."""
  321. for part in self.mail.walk():
  322. self.process_part(part)
  323. def set_subject(self):
  324. """Parse and decode `Subject` header."""
  325. _subject = decode_header(self.mail.get("Subject", "No Subject"))
  326. self.subject = _subject[0][0] or ""
  327. if _subject[0][1]:
  328. self.subject = safe_decode(self.subject, _subject[0][1])
  329. else:
  330. # assume that the encoding is utf-8
  331. self.subject = safe_decode(self.subject)[:140]
  332. if not self.subject:
  333. self.subject = "No Subject"
  334. def set_from(self):
  335. # gmail mailing-list compatibility
  336. # use X-Original-Sender if available, as gmail sometimes modifies the 'From'
  337. _from_email = self.decode_email(self.mail.get("X-Original-From") or self.mail["From"])
  338. _reply_to = self.decode_email(self.mail.get("Reply-To"))
  339. if _reply_to and not frappe.db.get_value('Email Account', {"email_id":_reply_to}, 'email_id'):
  340. self.from_email = extract_email_id(_reply_to)
  341. else:
  342. self.from_email = extract_email_id(_from_email)
  343. if self.from_email:
  344. self.from_email = self.from_email.lower()
  345. self.from_real_name = parse_addr(_from_email)[0] if "@" in _from_email else _from_email
  346. def decode_email(self, email):
  347. if not email: return
  348. decoded = ""
  349. for part, encoding in decode_header(frappe.as_unicode(email).replace("\""," ").replace("\'"," ")):
  350. if encoding:
  351. decoded += part.decode(encoding)
  352. else:
  353. decoded += safe_decode(part)
  354. return decoded
  355. def set_content_and_type(self):
  356. self.content, self.content_type = '[Blank Email]', 'text/plain'
  357. if self.html_content:
  358. self.content, self.content_type = self.html_content, 'text/html'
  359. else:
  360. self.content, self.content_type = EmailReplyParser.read(self.text_content).text.replace("\n","\n\n"), 'text/plain'
  361. def process_part(self, part):
  362. """Parse email `part` and set it to `text_content`, `html_content` or `attachments`."""
  363. content_type = part.get_content_type()
  364. if content_type == 'text/plain':
  365. self.text_content += self.get_payload(part)
  366. elif content_type == 'text/html':
  367. self.html_content += self.get_payload(part)
  368. elif content_type == 'message/rfc822':
  369. # sent by outlook when another email is sent as an attachment to this email
  370. self.show_attached_email_headers_in_content(part)
  371. elif part.get_filename() or 'image' in content_type:
  372. self.get_attachment(part)
  373. def show_attached_email_headers_in_content(self, part):
  374. # get the multipart/alternative message
  375. try:
  376. from html import escape # python 3.x
  377. except ImportError:
  378. from cgi import escape # python 2.x
  379. message = list(part.walk())[1]
  380. headers = []
  381. for key in ('From', 'To', 'Subject', 'Date'):
  382. value = cstr(message.get(key))
  383. if value:
  384. headers.append('{label}: {value}'.format(label=_(key), value=escape(value)))
  385. self.text_content += '\n'.join(headers)
  386. self.html_content += '<hr>' + '\n'.join('<p>{0}</p>'.format(h) for h in headers)
  387. if not message.is_multipart() and message.get_content_type()=='text/plain':
  388. # email.parser didn't parse it!
  389. text_content = self.get_payload(message)
  390. self.text_content += text_content
  391. self.html_content += markdown(text_content)
  392. def get_charset(self, part):
  393. """Detect charset."""
  394. charset = part.get_content_charset()
  395. if not charset:
  396. charset = chardet.detect(safe_encode(cstr(part)))['encoding']
  397. return charset
  398. def get_payload(self, part):
  399. charset = self.get_charset(part)
  400. try:
  401. return str(part.get_payload(decode=True), str(charset), "ignore")
  402. except LookupError:
  403. return part.get_payload()
  404. def get_attachment(self, part):
  405. #charset = self.get_charset(part)
  406. fcontent = part.get_payload(decode=True)
  407. if fcontent:
  408. content_type = part.get_content_type()
  409. fname = part.get_filename()
  410. if fname:
  411. try:
  412. fname = fname.replace('\n', ' ').replace('\r', '')
  413. fname = cstr(decode_header(fname)[0][0])
  414. except:
  415. fname = get_random_filename(content_type=content_type)
  416. else:
  417. fname = get_random_filename(content_type=content_type)
  418. self.attachments.append({
  419. 'content_type': content_type,
  420. 'fname': fname,
  421. 'fcontent': fcontent,
  422. })
  423. cid = (cstr(part.get("Content-Id")) or "").strip("><")
  424. if cid:
  425. self.cid_map[fname] = cid
  426. def save_attachments_in_doc(self, doc):
  427. """Save email attachments in given document."""
  428. saved_attachments = []
  429. for attachment in self.attachments:
  430. try:
  431. _file = frappe.get_doc({
  432. "doctype": "File",
  433. "file_name": attachment['fname'],
  434. "attached_to_doctype": doc.doctype,
  435. "attached_to_name": doc.name,
  436. "is_private": 1,
  437. "content": attachment['fcontent']})
  438. _file.save()
  439. saved_attachments.append(_file)
  440. if attachment['fname'] in self.cid_map:
  441. self.cid_map[_file.name] = self.cid_map[attachment['fname']]
  442. except MaxFileSizeReachedError:
  443. # WARNING: bypass max file size exception
  444. pass
  445. except frappe.FileAlreadyAttachedException:
  446. pass
  447. except frappe.DuplicateEntryError:
  448. # same file attached twice??
  449. pass
  450. return saved_attachments
  451. def get_thread_id(self):
  452. """Extract thread ID from `[]`"""
  453. l = re.findall(r'(?<=\[)[\w/-]+', self.subject)
  454. return l and l[0] or None
  455. # fix due to a python bug in poplib that limits it to 2048
  456. poplib._MAXLINE = 20480
  457. imaplib._MAXLINE = 20480
  458. class TimerMixin(object):
  459. def __init__(self, *args, **kwargs):
  460. self.timeout = kwargs.pop('timeout', 0.0)
  461. self.elapsed_time = 0.0
  462. self._super.__init__(self, *args, **kwargs)
  463. if self.timeout:
  464. # set per operation timeout to one-fifth of total pop timeout
  465. self.sock.settimeout(self.timeout / 5.0)
  466. def _getline(self, *args, **kwargs):
  467. start_time = time.time()
  468. ret = self._super._getline(self, *args, **kwargs)
  469. self.elapsed_time += time.time() - start_time
  470. if self.timeout and self.elapsed_time > self.timeout:
  471. raise EmailTimeoutError
  472. return ret
  473. def quit(self, *args, **kwargs):
  474. self.elapsed_time = 0.0
  475. return self._super.quit(self, *args, **kwargs)
  476. class Timed_POP3(TimerMixin, poplib.POP3):
  477. _super = poplib.POP3
  478. class Timed_POP3_SSL(TimerMixin, poplib.POP3_SSL):
  479. _super = poplib.POP3_SSL
  480. class Timed_IMAP4(TimerMixin, imaplib.IMAP4):
  481. _super = imaplib.IMAP4
  482. class Timed_IMAP4_SSL(TimerMixin, imaplib.IMAP4_SSL):
  483. _super = imaplib.IMAP4_SSL