You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

188 line
4.8 KiB

  1. # Copyright (c) 2012 Web Notes Technologies Pvt Ltd (http://erpnext.com)
  2. #
  3. # MIT License (MIT)
  4. #
  5. # Permission is hereby granted, free of charge, to any person obtaining a
  6. # copy of this software and associated documentation files (the "Software"),
  7. # to deal in the Software without restriction, including without limitation
  8. # the rights to use, copy, modify, merge, publish, distribute, sublicense,
  9. # and/or sell copies of the Software, and to permit persons to whom the
  10. # Software is furnished to do so, subject to the following conditions:
  11. #
  12. # The above copyright notice and this permission notice shall be included in
  13. # all copies or substantial portions of the Software.
  14. #
  15. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
  16. # INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
  17. # PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
  18. # HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
  19. # CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
  20. # OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  21. #
  22. """
  23. This module contains classes for managing incoming emails
  24. """
  25. class IncomingMail:
  26. """
  27. Single incoming email object. Extracts, text / html and attachments from the email
  28. """
  29. def __init__(self, content):
  30. """
  31. Parse the incoming mail content
  32. """
  33. import email
  34. self.mail = email.message_from_string(content)
  35. self.text_content = ''
  36. self.html_content = ''
  37. self.attachments = []
  38. self.parse()
  39. def get_text_content(self):
  40. """
  41. Returns the text parts of the email. If None, then HTML parts
  42. """
  43. return self.text_content or self.html_content
  44. def get_charset(self, part):
  45. """
  46. Guesses character set
  47. """
  48. charset = part.get_content_charset()
  49. if not charset:
  50. import chardet
  51. charset = chardet.detect(str(part))['encoding']
  52. return charset
  53. def get_payload(self, part, charset):
  54. """
  55. get utf-8 encoded part content
  56. """
  57. try:
  58. return unicode(part.get_payload(decode=True),str(charset),"ignore").encode('utf8','replace')
  59. except LookupError, e:
  60. return part.get_payload()
  61. def get_attachment(self, part, charset):
  62. """
  63. Extracts an attachment
  64. """
  65. self.attachments.append({
  66. 'content-type': part.get_content_type(),
  67. 'filename': part.get_filename(),
  68. 'content': part.get_payload(decode=True),
  69. })
  70. def parse(self):
  71. """
  72. Extracts text, html and attachments from the mail
  73. """
  74. for part in self.mail.walk():
  75. self.process_part(part)
  76. def get_thread_id(self):
  77. """
  78. Extracts thread id of the message between first []
  79. from the subject
  80. """
  81. import re
  82. subject = self.mail.get('Subject', '')
  83. return re.findall('(?<=\[)[\w/-]+', subject)
  84. def process_part(self, part):
  85. """
  86. Process a single part of an email
  87. """
  88. charset = self.get_charset(part)
  89. content_type = part.get_content_type()
  90. if content_type == 'text/plain':
  91. self.text_content += self.get_payload(part, charset)
  92. if content_type == 'text/html':
  93. self.html_content += self.get_payload(part, charset)
  94. if part.get_filename():
  95. self.get_attachment(part, charset)
  96. class POP3Mailbox:
  97. """
  98. A simple pop3 mailbox, abstracts connection and mail extraction
  99. To use, subclass it and override method process_message(from, subject, text, thread_id)
  100. """
  101. def __init__(self, settings_doc):
  102. """
  103. settings_doc must contain
  104. use_ssl, host, username, password
  105. (by name or object)
  106. """
  107. if isinstance(settings_doc, basestring):
  108. from webnotes.model.doc import Document
  109. self.settings = Document(settings_doc, settings_doc)
  110. else:
  111. self.settings = settings_doc
  112. def connect(self):
  113. """
  114. Connects to the mailbox
  115. """
  116. import poplib
  117. if self.settings.use_ssl:
  118. self.pop = poplib.POP3_SSL(self.settings.host)
  119. else:
  120. self.pop = poplib.POP3(self.settings.host)
  121. self.pop.user(self.settings.username)
  122. self.pop.pass_(self.settings.password)
  123. def get_messages(self):
  124. """
  125. Loads messages from the mailbox and calls
  126. process_message for each message
  127. """
  128. if not self.check_mails():
  129. return # nothing to do
  130. self.connect()
  131. num = num_copy = len(self.pop.list()[1])
  132. # WARNING: Hard coded max no. of messages to be popped
  133. if num > 20: num = 20
  134. for m in xrange(1, num+1):
  135. msg = self.pop.retr(m)
  136. try:
  137. self.process_message(IncomingMail('\n'.join(msg[1])))
  138. except:
  139. pass
  140. self.pop.dele(m)
  141. # WARNING: Delete message number 101 onwards from the pop list
  142. # This is to avoid having too many messages entering the system
  143. num = num_copy
  144. if num > 100:
  145. for m in xrange(101, num+1):
  146. self.pop.dele(m)
  147. self.pop.quit()
  148. def check_mails(self):
  149. """
  150. To be overridden
  151. If mailbox is to be scanned, returns true
  152. """
  153. return True
  154. def process_message(self, mail):
  155. """
  156. To be overriden
  157. """
  158. pass