You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

189 lines
4.9 KiB

  1. # Copyright (c) 2012 Web Notes Technologies Pvt Ltd (http://erpnext.com)
  2. #
  3. # MIT License (MIT)
  4. #
  5. # Permission is hereby granted, free of charge, to any person obtaining a
  6. # copy of this software and associated documentation files (the "Software"),
  7. # to deal in the Software without restriction, including without limitation
  8. # the rights to use, copy, modify, merge, publish, distribute, sublicense,
  9. # and/or sell copies of the Software, and to permit persons to whom the
  10. # Software is furnished to do so, subject to the following conditions:
  11. #
  12. # The above copyright notice and this permission notice shall be included in
  13. # all copies or substantial portions of the Software.
  14. #
  15. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
  16. # INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
  17. # PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
  18. # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
  19. # CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
  20. # OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  21. #
  22. from __future__ import unicode_literals
  23. """
  24. This module contains classes for managing incoming emails
  25. """
  26. class IncomingMail:
  27. """
  28. Single incoming email object. Extracts, text / html and attachments from the email
  29. """
  30. def __init__(self, content):
  31. """
  32. Parse the incoming mail content
  33. """
  34. import email
  35. self.mail = email.message_from_string(content)
  36. self.text_content = ''
  37. self.html_content = ''
  38. self.attachments = []
  39. self.parse()
  40. def get_text_content(self):
  41. """
  42. Returns the text parts of the email. If None, then HTML parts
  43. """
  44. return self.text_content or self.html_content
  45. def get_charset(self, part):
  46. """
  47. Guesses character set
  48. """
  49. charset = part.get_content_charset()
  50. if not charset:
  51. import chardet
  52. charset = chardet.detect(str(part))['encoding']
  53. return charset
  54. def get_payload(self, part, charset):
  55. """
  56. get utf-8 encoded part content
  57. """
  58. try:
  59. return unicode(part.get_payload(decode=True),str(charset),"ignore").encode('utf8','replace')
  60. except LookupError, e:
  61. return part.get_payload()
  62. def get_attachment(self, part, charset):
  63. """
  64. Extracts an attachment
  65. """
  66. self.attachments.append({
  67. 'content-type': part.get_content_type(),
  68. 'filename': part.get_filename(),
  69. 'content': part.get_payload(decode=True),
  70. })
  71. def parse(self):
  72. """
  73. Extracts text, html and attachments from the mail
  74. """
  75. for part in self.mail.walk():
  76. self.process_part(part)
  77. def get_thread_id(self):
  78. """
  79. Extracts thread id of the message between first []
  80. from the subject
  81. """
  82. import re
  83. subject = self.mail.get('Subject', '')
  84. return re.findall('(?<=\[)[\w/-]+', subject)
  85. def process_part(self, part):
  86. """
  87. Process a single part of an email
  88. """
  89. charset = self.get_charset(part)
  90. content_type = part.get_content_type()
  91. if content_type == 'text/plain':
  92. self.text_content += self.get_payload(part, charset)
  93. if content_type == 'text/html':
  94. self.html_content += self.get_payload(part, charset)
  95. if part.get_filename():
  96. self.get_attachment(part, charset)
  97. class POP3Mailbox:
  98. """
  99. A simple pop3 mailbox, abstracts connection and mail extraction
  100. To use, subclass it and override method process_message(from, subject, text, thread_id)
  101. """
  102. def __init__(self, settings_doc):
  103. """
  104. settings_doc must contain
  105. use_ssl, host, username, password
  106. (by name or object)
  107. """
  108. if isinstance(settings_doc, basestring):
  109. from webnotes.model.doc import Document
  110. self.settings = Document(settings_doc, settings_doc)
  111. else:
  112. self.settings = settings_doc
  113. def connect(self):
  114. """
  115. Connects to the mailbox
  116. """
  117. import poplib
  118. if self.settings.use_ssl:
  119. self.pop = poplib.POP3_SSL(self.settings.host)
  120. else:
  121. self.pop = poplib.POP3(self.settings.host)
  122. self.pop.user(self.settings.username)
  123. self.pop.pass_(self.settings.password)
  124. def get_messages(self):
  125. """
  126. Loads messages from the mailbox and calls
  127. process_message for each message
  128. """
  129. if not self.check_mails():
  130. return # nothing to do
  131. self.connect()
  132. num = num_copy = len(self.pop.list()[1])
  133. # WARNING: Hard coded max no. of messages to be popped
  134. if num > 20: num = 20
  135. for m in xrange(1, num+1):
  136. msg = self.pop.retr(m)
  137. try:
  138. self.process_message(IncomingMail('\n'.join(msg[1])))
  139. except:
  140. pass
  141. self.pop.dele(m)
  142. # WARNING: Delete message number 101 onwards from the pop list
  143. # This is to avoid having too many messages entering the system
  144. num = num_copy
  145. if num > 100:
  146. for m in xrange(101, num+1):
  147. self.pop.dele(m)
  148. self.pop.quit()
  149. def check_mails(self):
  150. """
  151. To be overridden
  152. If mailbox is to be scanned, returns true
  153. """
  154. return True
  155. def process_message(self, mail):
  156. """
  157. To be overriden
  158. """
  159. pass