您最多选择25个主题 主题必须以字母或数字开头,可以包含连字符 (-),并且长度不得超过35个字符
 
 
 
 
 
 

788 行
22 KiB

  1. # Copyright (c) 2015, Frappe Technologies Pvt. Ltd. and Contributors
  2. # MIT License. See license.txt
  3. from __future__ import unicode_literals
  4. # IMPORTANT: only import safe functions as this module will be included in jinja environment
  5. import frappe
  6. import operator
  7. import re, urllib, datetime, math
  8. import babel.dates
  9. from babel.core import UnknownLocaleError
  10. from dateutil import parser
  11. from num2words import num2words
  12. import HTMLParser
  13. from html2text import html2text
  14. DATE_FORMAT = "%Y-%m-%d"
  15. TIME_FORMAT = "%H:%M:%S.%f"
  16. DATETIME_FORMAT = DATE_FORMAT + " " + TIME_FORMAT
  17. # datetime functions
  18. def getdate(string_date=None):
  19. """
  20. Coverts string date (yyyy-mm-dd) to datetime.date object
  21. """
  22. if not string_date:
  23. return get_datetime().date()
  24. if isinstance(string_date, datetime.datetime):
  25. return string_date.date()
  26. elif isinstance(string_date, datetime.date):
  27. return string_date
  28. # dateutil parser does not agree with dates like 0000-00-00
  29. if not string_date or string_date=="0000-00-00":
  30. return None
  31. return parser.parse(string_date).date()
  32. def get_datetime(datetime_str=None):
  33. if not datetime_str:
  34. return now_datetime()
  35. if isinstance(datetime_str, (datetime.datetime, datetime.timedelta)):
  36. return datetime_str
  37. elif isinstance(datetime_str, (list, tuple)):
  38. return datetime.datetime(datetime_str)
  39. elif isinstance(datetime_str, datetime.date):
  40. return datetime.datetime.combine(datetime_str, datetime.time())
  41. # dateutil parser does not agree with dates like 0000-00-00
  42. if not datetime_str or (datetime_str or "").startswith("0000-00-00"):
  43. return None
  44. return parser.parse(datetime_str)
  45. def to_timedelta(time_str):
  46. if isinstance(time_str, basestring):
  47. t = parser.parse(time_str)
  48. return datetime.timedelta(hours=t.hour, minutes=t.minute, seconds=t.second, microseconds=t.microsecond)
  49. else:
  50. return time_str
  51. def add_to_date(date, years=0, months=0, days=0, hours=0, as_string=False, as_datetime=False):
  52. """Adds `days` to the given date"""
  53. from dateutil.relativedelta import relativedelta
  54. if date==None:
  55. date = now_datetime()
  56. if hours:
  57. as_datetime = True
  58. if isinstance(date, basestring):
  59. as_string = True
  60. if " " in date:
  61. as_datetime = True
  62. date = parser.parse(date)
  63. date = date + relativedelta(years=years, months=months, days=days, hours=hours)
  64. if as_string:
  65. if as_datetime:
  66. return date.strftime(DATETIME_FORMAT)
  67. else:
  68. return date.strftime(DATE_FORMAT)
  69. else:
  70. return date
  71. def add_days(date, days):
  72. return add_to_date(date, days=days)
  73. def add_months(date, months):
  74. return add_to_date(date, months=months)
  75. def add_years(date, years):
  76. return add_to_date(date, years=years)
  77. def date_diff(string_ed_date, string_st_date):
  78. return (getdate(string_ed_date) - getdate(string_st_date)).days
  79. def time_diff(string_ed_date, string_st_date):
  80. return get_datetime(string_ed_date) - get_datetime(string_st_date)
  81. def time_diff_in_seconds(string_ed_date, string_st_date):
  82. return time_diff(string_ed_date, string_st_date).total_seconds()
  83. def time_diff_in_hours(string_ed_date, string_st_date):
  84. return round(float(time_diff(string_ed_date, string_st_date).total_seconds()) / 3600, 6)
  85. def now_datetime():
  86. dt = convert_utc_to_user_timezone(datetime.datetime.utcnow())
  87. return dt.replace(tzinfo=None)
  88. def get_eta(from_time, percent_complete):
  89. diff = time_diff(now_datetime(), from_time).total_seconds()
  90. return str(datetime.timedelta(seconds=(100 - percent_complete) / percent_complete * diff))
  91. def _get_time_zone():
  92. return frappe.db.get_system_setting('time_zone') or 'Asia/Kolkata'
  93. def get_time_zone():
  94. if frappe.local.flags.in_test:
  95. return _get_time_zone()
  96. return frappe.cache().get_value("time_zone", _get_time_zone)
  97. def convert_utc_to_user_timezone(utc_timestamp):
  98. from pytz import timezone, UnknownTimeZoneError
  99. utcnow = timezone('UTC').localize(utc_timestamp)
  100. try:
  101. return utcnow.astimezone(timezone(get_time_zone()))
  102. except UnknownTimeZoneError:
  103. return utcnow
  104. def now():
  105. """return current datetime as yyyy-mm-dd hh:mm:ss"""
  106. if frappe.flags.current_date:
  107. return getdate(frappe.flags.current_date).strftime(DATE_FORMAT) + " " + \
  108. now_datetime().strftime(TIME_FORMAT)
  109. else:
  110. return now_datetime().strftime(DATETIME_FORMAT)
  111. def nowdate():
  112. """return current date as yyyy-mm-dd"""
  113. return now_datetime().strftime(DATE_FORMAT)
  114. def today():
  115. return nowdate()
  116. def nowtime():
  117. """return current time in hh:mm"""
  118. return now_datetime().strftime(TIME_FORMAT)
  119. def get_first_day(dt, d_years=0, d_months=0):
  120. """
  121. Returns the first day of the month for the date specified by date object
  122. Also adds `d_years` and `d_months` if specified
  123. """
  124. dt = getdate(dt)
  125. # d_years, d_months are "deltas" to apply to dt
  126. overflow_years, month = divmod(dt.month + d_months - 1, 12)
  127. year = dt.year + d_years + overflow_years
  128. return datetime.date(year, month + 1, 1)
  129. def get_last_day(dt):
  130. """
  131. Returns last day of the month using:
  132. `get_first_day(dt, 0, 1) + datetime.timedelta(-1)`
  133. """
  134. return get_first_day(dt, 0, 1) + datetime.timedelta(-1)
  135. def get_time(time_str):
  136. if isinstance(time_str, datetime.datetime):
  137. return time_str.time()
  138. elif isinstance(time_str, datetime.time):
  139. return time_str
  140. else:
  141. if isinstance(time_str, datetime.timedelta):
  142. time_str = str(time_str)
  143. return parser.parse(time_str).time()
  144. def get_datetime_str(datetime_obj):
  145. if isinstance(datetime_obj, basestring):
  146. datetime_obj = get_datetime(datetime_obj)
  147. return datetime_obj.strftime(DATETIME_FORMAT)
  148. def get_user_format():
  149. if getattr(frappe.local, "user_format", None) is None:
  150. frappe.local.user_format = frappe.db.get_default("date_format")
  151. return frappe.local.user_format or "yyyy-mm-dd"
  152. def formatdate(string_date=None, format_string=None):
  153. """
  154. Convers the given string date to :data:`user_format`
  155. User format specified in defaults
  156. Examples:
  157. * dd-mm-yyyy
  158. * mm-dd-yyyy
  159. * dd/mm/yyyy
  160. """
  161. date = getdate(string_date) if string_date else now_datetime().date()
  162. if not format_string:
  163. format_string = get_user_format().replace("mm", "MM")
  164. try:
  165. formatted_date = babel.dates.format_date(date, format_string, locale=(frappe.local.lang or "").replace("-", "_"))
  166. except UnknownLocaleError:
  167. formatted_date = date.strftime("%Y-%m-%d")
  168. return formatted_date
  169. def format_time(txt):
  170. try:
  171. formatted_time = babel.dates.format_time(get_time(txt), locale=(frappe.local.lang or "").replace("-", "_"))
  172. except UnknownLocaleError:
  173. formatted_time = get_time(txt).strftime("%H:%M:%S")
  174. return formatted_time
  175. def format_datetime(datetime_string, format_string=None):
  176. if not datetime_string:
  177. return
  178. datetime = get_datetime(datetime_string)
  179. if not format_string:
  180. format_string = get_user_format().replace("mm", "MM") + " HH:mm:ss"
  181. try:
  182. formatted_datetime = babel.dates.format_datetime(datetime, format_string, locale=(frappe.local.lang or "").replace("-", "_"))
  183. except UnknownLocaleError:
  184. formatted_datetime = datetime.strftime('%Y-%m-%d %H:%M:%S')
  185. return formatted_datetime
  186. def global_date_format(date):
  187. """returns date as 1 January 2012"""
  188. formatted_date = getdate(date).strftime("%d %B %Y")
  189. return formatted_date.startswith("0") and formatted_date[1:] or formatted_date
  190. def has_common(l1, l2):
  191. """Returns truthy value if there are common elements in lists l1 and l2"""
  192. return set(l1) & set(l2)
  193. def flt(s, precision=None):
  194. """Convert to float (ignore commas)"""
  195. if isinstance(s, basestring):
  196. s = s.replace(',','')
  197. try:
  198. num = float(s)
  199. if precision is not None:
  200. num = rounded(num, precision)
  201. except Exception:
  202. num = 0
  203. return num
  204. def cint(s):
  205. """Convert to integer"""
  206. try: num = int(float(s))
  207. except: num = 0
  208. return num
  209. def cstr(s, encoding='utf-8'):
  210. return frappe.as_unicode(s, encoding)
  211. def rounded(num, precision=0):
  212. """round method for round halfs to nearest even algorithm aka banker's rounding - compatible with python3"""
  213. precision = cint(precision)
  214. multiplier = 10 ** precision
  215. # avoid rounding errors
  216. num = round(num * multiplier if precision else num, 8)
  217. floor = math.floor(num)
  218. decimal_part = num - floor
  219. if not precision and decimal_part == 0.5:
  220. num = floor if (floor % 2 == 0) else floor + 1
  221. else:
  222. num = round(num)
  223. return (num / multiplier) if precision else num
  224. def remainder(numerator, denominator, precision=2):
  225. precision = cint(precision)
  226. multiplier = 10 ** precision
  227. if precision:
  228. _remainder = ((numerator * multiplier) % (denominator * multiplier)) / multiplier
  229. else:
  230. _remainder = numerator % denominator
  231. return flt(_remainder, precision);
  232. def round_based_on_smallest_currency_fraction(value, currency, precision=2):
  233. smallest_currency_fraction_value = flt(frappe.db.get_value("Currency",
  234. currency, "smallest_currency_fraction_value"))
  235. if smallest_currency_fraction_value:
  236. remainder_val = remainder(value, smallest_currency_fraction_value, precision)
  237. if remainder_val > (smallest_currency_fraction_value / 2):
  238. value += smallest_currency_fraction_value - remainder_val
  239. else:
  240. value -= remainder_val
  241. else:
  242. value = rounded(value)
  243. return flt(value, precision)
  244. def encode(obj, encoding="utf-8"):
  245. if isinstance(obj, list):
  246. out = []
  247. for o in obj:
  248. if isinstance(o, unicode):
  249. out.append(o.encode(encoding))
  250. else:
  251. out.append(o)
  252. return out
  253. elif isinstance(obj, unicode):
  254. return obj.encode(encoding)
  255. else:
  256. return obj
  257. def parse_val(v):
  258. """Converts to simple datatypes from SQL query results"""
  259. if isinstance(v, (datetime.date, datetime.datetime)):
  260. v = unicode(v)
  261. elif isinstance(v, datetime.timedelta):
  262. v = ":".join(unicode(v).split(":")[:2])
  263. elif isinstance(v, long):
  264. v = int(v)
  265. return v
  266. def fmt_money(amount, precision=None, currency=None):
  267. """
  268. Convert to string with commas for thousands, millions etc
  269. """
  270. number_format = None
  271. if currency:
  272. number_format = frappe.db.get_value("Currency", currency, "number_format", cache=True)
  273. if not number_format:
  274. number_format = frappe.db.get_default("number_format") or "#,###.##"
  275. decimal_str, comma_str, number_format_precision = get_number_format_info(number_format)
  276. if precision is None:
  277. precision = number_format_precision
  278. amount = '%.*f' % (precision, flt(amount))
  279. if amount.find('.') == -1:
  280. decimals = ''
  281. else:
  282. decimals = amount.split('.')[1]
  283. parts = []
  284. minus = ''
  285. if flt(amount) < 0:
  286. minus = '-'
  287. amount = cstr(abs(flt(amount))).split('.')[0]
  288. if len(amount) > 3:
  289. parts.append(amount[-3:])
  290. amount = amount[:-3]
  291. val = number_format=="#,##,###.##" and 2 or 3
  292. while len(amount) > val:
  293. parts.append(amount[-val:])
  294. amount = amount[:-val]
  295. parts.append(amount)
  296. parts.reverse()
  297. amount = comma_str.join(parts) + ((precision and decimal_str) and (decimal_str + decimals) or "")
  298. amount = minus + amount
  299. if currency and frappe.defaults.get_global_default("hide_currency_symbol") != "Yes":
  300. symbol = frappe.db.get_value("Currency", currency, "symbol") or currency
  301. amount = symbol + " " + amount
  302. return amount
  303. number_format_info = {
  304. "#,###.##": (".", ",", 2),
  305. "#.###,##": (",", ".", 2),
  306. "# ###.##": (".", " ", 2),
  307. "# ###,##": (",", " ", 2),
  308. "#'###.##": (".", "'", 2),
  309. "#, ###.##": (".", ", ", 2),
  310. "#,##,###.##": (".", ",", 2),
  311. "#,###.###": (".", ",", 3),
  312. "#.###": ("", ".", 0),
  313. "#,###": ("", ",", 0)
  314. }
  315. def get_number_format_info(format):
  316. return number_format_info.get(format) or (".", ",", 2)
  317. #
  318. # convet currency to words
  319. #
  320. def money_in_words(number, main_currency = None, fraction_currency=None):
  321. """
  322. Returns string in words with currency and fraction currency.
  323. """
  324. from frappe.utils import get_defaults
  325. _ = frappe._
  326. if not number or flt(number) < 0:
  327. return ""
  328. d = get_defaults()
  329. if not main_currency:
  330. main_currency = d.get('currency', 'INR')
  331. if not fraction_currency:
  332. fraction_currency = frappe.db.get_value("Currency", main_currency, "fraction") or _("Cent")
  333. n = "%.2f" % flt(number)
  334. main, fraction = n.split('.')
  335. if len(fraction)==1: fraction += '0'
  336. number_format = frappe.db.get_value("Currency", main_currency, "number_format", cache=True) or \
  337. frappe.db.get_default("number_format") or "#,###.##"
  338. in_million = True
  339. if number_format == "#,##,###.##": in_million = False
  340. out = main_currency + ' ' + in_words(main, in_million).title()
  341. if cint(fraction):
  342. out = out + ' ' + _('and') + ' ' + in_words(fraction, in_million).title() + ' ' + fraction_currency
  343. return out + ' ' + _('only.')
  344. #
  345. # convert number to words
  346. #
  347. def in_words(integer, in_million=True):
  348. """
  349. Returns string in words for the given integer.
  350. """
  351. locale = 'en_IN' if not in_million else frappe.local.lang
  352. integer = int(integer)
  353. try:
  354. ret = num2words(integer, lang=locale)
  355. except NotImplementedError:
  356. ret = num2words(integer, lang='en')
  357. return ret.replace('-', ' ')
  358. def is_html(text):
  359. out = False
  360. for key in ["<br>", "<p", "<img", "<div"]:
  361. if key in text:
  362. out = True
  363. break
  364. return out
  365. # from Jinja2 code
  366. _striptags_re = re.compile(r'(<!--.*?-->|<[^>]*>)')
  367. def strip_html(text):
  368. """removes anything enclosed in and including <>"""
  369. return _striptags_re.sub("", text)
  370. def escape_html(text):
  371. html_escape_table = {
  372. "&": "&amp;",
  373. '"': "&quot;",
  374. "'": "&apos;",
  375. ">": "&gt;",
  376. "<": "&lt;",
  377. }
  378. return "".join(html_escape_table.get(c,c) for c in text)
  379. def pretty_date(iso_datetime):
  380. """
  381. Takes an ISO time and returns a string representing how
  382. long ago the date represents.
  383. Ported from PrettyDate by John Resig
  384. """
  385. if not iso_datetime: return ''
  386. import math
  387. if isinstance(iso_datetime, basestring):
  388. iso_datetime = datetime.datetime.strptime(iso_datetime, DATETIME_FORMAT)
  389. now_dt = datetime.datetime.strptime(now(), DATETIME_FORMAT)
  390. dt_diff = now_dt - iso_datetime
  391. # available only in python 2.7+
  392. # dt_diff_seconds = dt_diff.total_seconds()
  393. dt_diff_seconds = dt_diff.days * 86400.0 + dt_diff.seconds
  394. dt_diff_days = math.floor(dt_diff_seconds / 86400.0)
  395. # differnt cases
  396. if dt_diff_seconds < 60.0:
  397. return 'just now'
  398. elif dt_diff_seconds < 120.0:
  399. return '1 minute ago'
  400. elif dt_diff_seconds < 3600.0:
  401. return '%s minutes ago' % cint(math.floor(dt_diff_seconds / 60.0))
  402. elif dt_diff_seconds < 7200.0:
  403. return '1 hour ago'
  404. elif dt_diff_seconds < 86400.0:
  405. return '%s hours ago' % cint(math.floor(dt_diff_seconds / 3600.0))
  406. elif dt_diff_days == 1.0:
  407. return 'Yesterday'
  408. elif dt_diff_days < 7.0:
  409. return '%s days ago' % cint(dt_diff_days)
  410. elif dt_diff_days < 31.0:
  411. return '%s week(s) ago' % cint(math.ceil(dt_diff_days / 7.0))
  412. elif dt_diff_days < 365.0:
  413. return '%s months ago' % cint(math.ceil(dt_diff_days / 30.0))
  414. else:
  415. return 'more than %s year(s) ago' % cint(math.floor(dt_diff_days / 365.0))
  416. def comma_or(some_list):
  417. return comma_sep(some_list, frappe._("{0} or {1}"))
  418. def comma_and(some_list):
  419. return comma_sep(some_list, frappe._("{0} and {1}"))
  420. def comma_sep(some_list, pattern):
  421. if isinstance(some_list, (list, tuple)):
  422. # list(some_list) is done to preserve the existing list
  423. some_list = [unicode(s) for s in list(some_list)]
  424. if not some_list:
  425. return ""
  426. elif len(some_list) == 1:
  427. return some_list[0]
  428. else:
  429. some_list = ["'%s'" % s for s in some_list]
  430. return pattern.format(", ".join(frappe._(s) for s in some_list[:-1]), some_list[-1])
  431. else:
  432. return some_list
  433. def new_line_sep(some_list):
  434. if isinstance(some_list, (list, tuple)):
  435. # list(some_list) is done to preserve the existing list
  436. some_list = [unicode(s) for s in list(some_list)]
  437. if not some_list:
  438. return ""
  439. elif len(some_list) == 1:
  440. return some_list[0]
  441. else:
  442. some_list = ["%s" % s for s in some_list]
  443. return format("\n ".join(some_list))
  444. else:
  445. return some_list
  446. def filter_strip_join(some_list, sep):
  447. """given a list, filter None values, strip spaces and join"""
  448. return (cstr(sep)).join((cstr(a).strip() for a in filter(None, some_list)))
  449. def get_url(uri=None, full_address=False):
  450. """get app url from request"""
  451. host_name = frappe.local.conf.host_name or frappe.local.conf.hostname
  452. if uri and (uri.startswith("http://") or uri.startswith("https://")):
  453. return uri
  454. if not host_name:
  455. if hasattr(frappe.local, "request") and frappe.local.request and frappe.local.request.host:
  456. protocol = 'https://' if 'https' == frappe.get_request_header('X-Forwarded-Proto', "") else 'http://'
  457. host_name = protocol + frappe.local.request.host
  458. elif frappe.local.site:
  459. protocol = 'http://'
  460. if frappe.local.conf.ssl_certificate:
  461. protocol = 'https://'
  462. elif frappe.local.conf.wildcard:
  463. domain = frappe.local.conf.wildcard.get('domain')
  464. if domain and frappe.local.site.endswith(domain) and frappe.local.conf.wildcard.get('ssl_certificate'):
  465. protocol = 'https://'
  466. host_name = protocol + frappe.local.site
  467. else:
  468. host_name = frappe.db.get_value("Website Settings", "Website Settings",
  469. "subdomain")
  470. if not host_name:
  471. host_name = "http://localhost"
  472. if host_name and not (host_name.startswith("http://") or host_name.startswith("https://")):
  473. host_name = "http://" + host_name
  474. if not uri and full_address:
  475. uri = frappe.get_request_header("REQUEST_URI", "")
  476. url = urllib.basejoin(host_name, uri) if uri else host_name
  477. return url
  478. def get_host_name():
  479. return get_url().rsplit("//", 1)[-1]
  480. def get_link_to_form(doctype, name, label=None):
  481. if not label: label = name
  482. return """<a href="{0}">{1}</a>""".format(get_url_to_form(doctype, name), label)
  483. def get_url_to_form(doctype, name):
  484. return get_url(uri = "desk#Form/{0}/{1}".format(quoted(doctype), quoted(name)))
  485. def get_url_to_list(doctype):
  486. return get_url(uri = "desk#List/{0}".format(quoted(doctype)))
  487. operator_map = {
  488. # startswith
  489. "^": lambda (a, b): (a or "").startswith(b),
  490. # in or not in a list
  491. "in": lambda (a, b): operator.contains(b, a),
  492. "not in": lambda (a, b): not operator.contains(b, a),
  493. # comparison operators
  494. "=": lambda (a, b): operator.eq(a, b),
  495. "!=": lambda (a, b): operator.ne(a, b),
  496. ">": lambda (a, b): operator.gt(a, b),
  497. "<": lambda (a, b): operator.lt(a, b),
  498. ">=": lambda (a, b): operator.ge(a, b),
  499. "<=": lambda (a, b): operator.le(a, b),
  500. "not None": lambda (a, b): a and True or False,
  501. "None": lambda (a, b): (not a) and True or False
  502. }
  503. def evaluate_filters(doc, filters):
  504. '''Returns true if doc matches filters'''
  505. if isinstance(filters, dict):
  506. for key, value in filters.iteritems():
  507. f = get_filter(None, {key:value})
  508. if not compare(doc.get(f.fieldname), f.operator, f.value):
  509. return False
  510. elif isinstance(filters, (list, tuple)):
  511. for d in filters:
  512. f = get_filter(None, d)
  513. if not compare(doc.get(f.fieldname), f.operator, f.value):
  514. return False
  515. return True
  516. def compare(val1, condition, val2):
  517. ret = False
  518. if condition in operator_map:
  519. ret = operator_map[condition]((val1, val2))
  520. return ret
  521. def get_filter(doctype, f):
  522. """Returns a _dict like
  523. {
  524. "doctype":
  525. "fieldname":
  526. "operator":
  527. "value":
  528. }
  529. """
  530. from frappe.model import default_fields, optional_fields
  531. if isinstance(f, dict):
  532. key, value = f.items()[0]
  533. f = make_filter_tuple(doctype, key, value)
  534. if not isinstance(f, (list, tuple)):
  535. frappe.throw("Filter must be a tuple or list (in a list)")
  536. if len(f) == 3:
  537. f = (doctype, f[0], f[1], f[2])
  538. elif len(f) != 4:
  539. frappe.throw("Filter must have 4 values (doctype, fieldname, operator, value): {0}".format(str(f)))
  540. f = frappe._dict(doctype=f[0], fieldname=f[1], operator=f[2], value=f[3])
  541. if not f.operator:
  542. # if operator is missing
  543. f.operator = "="
  544. valid_operators = ("=", "!=", ">", "<", ">=", "<=", "like", "not like", "in", "not in", "Between")
  545. if f.operator not in valid_operators:
  546. frappe.throw("Operator must be one of {0}".format(", ".join(valid_operators)))
  547. if f.doctype and (f.fieldname not in default_fields + optional_fields):
  548. # verify fieldname belongs to the doctype
  549. meta = frappe.get_meta(f.doctype)
  550. if not meta.has_field(f.fieldname):
  551. # try and match the doctype name from child tables
  552. for df in meta.get_table_fields():
  553. if frappe.get_meta(df.options).has_field(f.fieldname):
  554. f.doctype = df.options
  555. break
  556. return f
  557. def make_filter_tuple(doctype, key, value):
  558. '''return a filter tuple like [doctype, key, operator, value]'''
  559. if isinstance(value, (list, tuple)):
  560. return [doctype, key, value[0], value[1]]
  561. else:
  562. return [doctype, key, "=", value]
  563. def scrub_urls(html):
  564. html = expand_relative_urls(html)
  565. # encoding should be responsibility of the composer
  566. # html = quote_urls(html)
  567. return html
  568. def expand_relative_urls(html):
  569. # expand relative urls
  570. url = get_url()
  571. if url.endswith("/"): url = url[:-1]
  572. def _expand_relative_urls(match):
  573. to_expand = list(match.groups())
  574. if not to_expand[2].startswith("/"):
  575. to_expand[2] = "/" + to_expand[2]
  576. to_expand.insert(2, url)
  577. if 'url' in to_expand[0] and to_expand[1].startswith('(') and to_expand[-1].endswith(')'):
  578. # background-image: url('/assets/...') - workaround for wkhtmltopdf print-media-type
  579. to_expand.append(' !important')
  580. return "".join(to_expand)
  581. html = re.sub('(href|src){1}([\s]*=[\s]*[\'"]?)((?!http)[^\'" >]+)([\'"]?)', _expand_relative_urls, html)
  582. # background-image: url('/assets/...')
  583. html = re.sub('(:[\s]?url)(\([\'"]?)([^\)]*)([\'"]?\))', _expand_relative_urls, html)
  584. return html
  585. def quoted(url):
  586. return cstr(urllib.quote(encode(url), safe=b"~@#$&()*!+=:;,.?/'"))
  587. def quote_urls(html):
  588. def _quote_url(match):
  589. groups = list(match.groups())
  590. groups[2] = quoted(groups[2])
  591. return "".join(groups)
  592. return re.sub('(href|src){1}([\s]*=[\s]*[\'"]?)((?:http)[^\'">]+)([\'"]?)',
  593. _quote_url, html)
  594. def unique(seq):
  595. """use this instead of list(set()) to preserve order of the original list.
  596. Thanks to Stackoverflow: http://stackoverflow.com/questions/480214/how-do-you-remove-duplicates-from-a-list-in-python-whilst-preserving-order"""
  597. seen = set()
  598. seen_add = seen.add
  599. return [ x for x in seq if not (x in seen or seen_add(x)) ]
  600. def strip(val, chars=None):
  601. # \ufeff is no-width-break, \u200b is no-width-space
  602. return (val or "").replace("\ufeff", "").replace("\u200b", "").strip(chars)
  603. def to_markdown(html):
  604. text = None
  605. try:
  606. text = html2text(html)
  607. except HTMLParser.HTMLParseError:
  608. pass
  609. return text