perf: Pre-compile and re-use regexp pattern

Converted all possible usages of re.* that weren't compiling the regex separately and re-using it. Separated out the compiled patterns as global variables. Repetitive patterns could be made DRY-er. Would be nicer to have all regexes in a single module so that we could re-use better, keep track of outdated, and keep checks for possible reDos' etc
pirms 3 gadiem · b696fa6da5
--- a/frappe/build.py
+++ b/frappe/build.py
@@ -20,6 +20,8 @@ import frappe
 timestamps = {}
 app_paths = None
 sites_path = os.path.abspath(os.getcwd())
 WHITESPACE_PATTERN = re.compile(r"\s+")
 HTML_COMMENT_PATTERN = re.compile(r"(<!--.*?-->)")


 class AssetsNotDownloadedError(Exception):
@@ -406,10 +408,10 @@ def link_assets_dir(source, target, hard_link=False):
 def scrub_html_template(content):
 	"""Returns HTML content with removed whitespace and comments"""
 	# remove whitespace to a single space
 	content = re.sub(r"\s+", " ", content)
 	content = WHITESPACE_PATTERN.sub(" ", content)

 	# strip comments
 	content = re.sub(r"(<!--.*?-->)", "", content)
 	content = HTML_COMMENT_PATTERN.sub("", content)

 	return content.replace("'", "'")

--- a/frappe/core/doctype/data_import/importer.py
+++ b/frappe/core/doctype/data_import/importer.py
@@ -4,6 +4,7 @@
 import io
 import json
 import os
 import re
 import timeit
 from datetime import date, datetime

@@ -22,6 +23,7 @@ INVALID_VALUES = ("", None)
 MAX_ROWS_IN_PREVIEW = 10
 INSERT = "Insert New Records"
 UPDATE = "Update Existing Records"
 DURATION_PATTERN = re.compile(r"^(?:(\d+d)?((^|\s)\d+h)?((^|\s)\d+m)?((^|\s)\d+s)?)$")


 class Importer:
@@ -725,10 +727,7 @@ class Row:
 				)
 				return
 		elif df.fieldtype == "Duration":
 			import re

 			is_valid_duration = re.match(r"^(?:(\d+d)?((^|\s)\d+h)?((^|\s)\d+m)?((^|\s)\d+s)?)$", value)
 			if not is_valid_duration:
 			if not DURATION_PATTERN.match(value):
 				self.warnings.append(
 					{
 						"row": self.row_number,
--- a/frappe/core/doctype/doctype/doctype.py
+++ b/frappe/core/doctype/doctype/doctype.py
@@ -35,6 +35,12 @@ from frappe.query_builder.functions import Concat
 from frappe.utils import cint
 from frappe.website.utils import clear_cache

 DEPENDS_ON_PATTERN = re.compile(r'[\w\.:_]+\s*={1}\s*[\w\.@\'"]+')
 ILLEGAL_FIELDNAME_PATTERN = re.compile("""['",./%@()<>{}]""")
 WHITESPACE_PADDING_PATTERN = re.compile(r"^[ \t\n\r]+|[ \t\n\r]+$", flags=re.ASCII)
 START_WITH_LETTERS_PATTERN = re.compile(r"^(?![\W])[^\d_\s][\w -]+$", flags=re.ASCII)
 FIELD_PATTERN = re.compile("{(.*?)}", flags=re.UNICODE)


 class InvalidFieldNameError(frappe.ValidationError):
 	pass
@@ -357,8 +363,7 @@ class DocType(Document):
 				else:
 					if d.fieldname in restricted:
 						frappe.throw(_("Fieldname {0} is restricted").format(d.fieldname), InvalidFieldNameError)

 				d.fieldname = re.sub("""['",./%@()<>{}]""", "", d.fieldname)
 				d.fieldname = ILLEGAL_FIELDNAME_PATTERN.sub("", d.fieldname)

 				# fieldnames should be lowercase
 				d.fieldname = d.fieldname.lower()
@@ -842,15 +847,13 @@ class DocType(Document):
 				_("Doctype name is limited to {0} characters ({1})").format(max_length, name), frappe.NameError
 			)

 		flags = {"flags": re.ASCII}

 		# a DocType name should not start or end with an empty space
 		if re.search(r"^[ \t\n\r]+|[ \t\n\r]+$", name, **flags):
 		if WHITESPACE_PADDING_PATTERN.search(name):
 			frappe.throw(_("DocType's name should not start or end with whitespace"), frappe.NameError)

 		# a DocType's name should not start with a number or underscore
 		# and should only contain letters, numbers, underscore, and hyphen
 		if not re.match(r"^(?![\W])[^\d_\s][\w -]+$", name, **flags):
 		if not START_WITH_LETTERS_PATTERN.match(name):
 			frappe.throw(
 				_(
 					"A DocType's name should start with a letter and can only "
@@ -1254,7 +1257,7 @@ def validate_fields(meta):
 			if not pattern:
 				return

 			for fieldname in re.findall("{(.*?)}", pattern, re.UNICODE):
 			for fieldname in FIELD_PATTERN.findall(pattern):
 				if fieldname.startswith("{"):
 					# edge case when double curlies are used for escape
 					continue
@@ -1336,9 +1339,7 @@ def validate_fields(meta):
 		]
 		for field in depends_on_fields:
 			depends_on = docfield.get(field, None)
 			if (
 				depends_on and ("=" in depends_on) and re.match(r'[\w\.:_]+\s*={1}\s*[\w\.@\'"]+', depends_on)
 			):
 			if depends_on and ("=" in depends_on) and DEPENDS_ON_PATTERN.match(depends_on):
 				frappe.throw(_("Invalid {0} condition").format(frappe.unscrub(field)), frappe.ValidationError)

 	def check_table_multiselect_option(docfield):
--- a/frappe/database/database.py
+++ b/frappe/database/database.py
@@ -23,6 +23,11 @@ from frappe.query_builder.functions import Count
 from frappe.query_builder.utils import DocType
 from frappe.utils import cast, get_datetime, get_table_name, getdate, now, sbool

 IFNULL_PATTERN = re.compile(r"ifnull\(", flags=re.IGNORECASE)
 INDEX_PATTERN = re.compile(r"\s*\([^)]+\)\s*")
 SINGLE_WORD_PATTERN = re.compile(r'([`"]?)(tab([A-Z]\w+))\1')
 MULTI_WORD_PATTERN = re.compile(r'([`"])(tab([A-Z]\w+)( [A-Z]\w+)+)\1')


 class Database(object):
 	"""
@@ -143,9 +148,8 @@ class Database(object):
 		# remove whitespace / indentation from start and end of query
 		query = query.strip()

 		if re.search(r"ifnull\(", query, flags=re.IGNORECASE):
 			# replaces ifnull in query with coalesce
 			query = re.sub(r"ifnull\(", "coalesce(", query, flags=re.IGNORECASE)
 		# replaces ifnull in query with coalesce
 		query = IFNULL_PATTERN.sub("coalesce(", query)

 		if not self._conn:
 			self.connect()
@@ -1126,8 +1130,7 @@ class Database(object):
 	def get_index_name(fields):
 		index_name = "_".join(fields) + "_index"
 		# remove index length if present e.g. (10) from index name
 		index_name = re.sub(r"\s*\([^)]+\)\s*", r"", index_name)
 		return index_name
 		return INDEX_PATTERN.sub(r"", index_name)

 	def get_system_setting(self, key):
 		def _load_system_settings():
@@ -1219,11 +1222,9 @@ class Database(object):
 			# and are continued with multiple words that start with a captital letter
 			# e.g. 'tabXxx' or 'tabXxx Xxx' or 'tabXxx Xxx Xxx' and so on

 			single_word_regex = r'([`"]?)(tab([A-Z]\w+))\1'
 			multi_word_regex = r'([`"])(tab([A-Z]\w+)( [A-Z]\w+)+)\1'
 			tables = []
 			for regex in (single_word_regex, multi_word_regex):
 				tables += [groups[1] for groups in re.findall(regex, query)]
 			for regex in (SINGLE_WORD_PATTERN, MULTI_WORD_PATTERN):
 				tables += [groups[1] for groups in regex.findall(query)]

 			if frappe.flags.touched_tables is None:
 				frappe.flags.touched_tables = set()
--- a/frappe/database/postgres/database.py
+++ b/frappe/database/postgres/database.py
@@ -20,6 +20,11 @@ DEC2FLOAT = psycopg2.extensions.new_type(

 psycopg2.extensions.register_type(DEC2FLOAT)

 LOCATE_SUB_PATTERN = re.compile(r"locate\(([^,]+),([^)]+)(\)?)\)", flags=re.IGNORECASE)
 LOCATE_QUERY_PATTERN = re.compile(r"locate\(", flags=re.IGNORECASE)
 PG_TRANSFORM_PATTERN = re.compile(r"([=><]+)\s*([+-]?\d+)(\.0)?(?![a-zA-Z\.\d])")
 FROM_TAB_PATTERN = re.compile(r"from tab([\w-]*)", flags=re.IGNORECASE)


 class PostgresDatabase(Database):
 	ProgrammingError = psycopg2.ProgrammingError
@@ -382,12 +387,10 @@ class PostgresDatabase(Database):
 def modify_query(query):
 	""" "Modifies query according to the requirements of postgres"""
 	# replace ` with " for definitions
 	query = str(query)
 	query = query.replace("`", '"')
 	query = str(query).replace("`", '"')
 	query = replace_locate_with_strpos(query)
 	# select from requires ""
 	if re.search("from tab", query, flags=re.IGNORECASE):
 		query = re.sub(r"from tab([\w-]*)", r'from "tab\1"', query, flags=re.IGNORECASE)
 	query = FROM_TAB_PATTERN.sub(r'from "tab\1"', query)

 	# only find int (with/without signs), ignore decimals (with/without signs), ignore hashes (which start with numbers),
 	# drop .0 from decimals and add quotes around them
@@ -396,8 +399,7 @@ def modify_query(query):
 	# >>> re.sub(r"([=><]+)\s*([+-]?\d+)(\.0)?(?![a-zA-Z\.\d])", r"\1 '\2'", query)
 	# 	"c='abcd' , a >= '45', b = '-45', c = '40', d= '4500', e=3500.53, f=40psdfsd, g= '9092094312', h=12.00023

 	query = re.sub(r"([=><]+)\s*([+-]?\d+)(\.0)?(?![a-zA-Z\.\d])", r"\1 '\2'", query)
 	return query
 	return PG_TRANSFORM_PATTERN.sub(r"\1 '\2'", query)


 def modify_values(values):
@@ -430,8 +432,6 @@ def modify_values(values):

 def replace_locate_with_strpos(query):
 	# strpos is the locate equivalent in postgres
 	if re.search(r"locate\(", query, flags=re.IGNORECASE):
 		query = re.sub(
 			r"locate\(([^,]+),([^)]+)(\)?)\)", r"strpos(\2\3, \1)", query, flags=re.IGNORECASE
 		)
 	if LOCATE_QUERY_PATTERN.search(query):
 		query = LOCATE_SUB_PATTERN.sub(r"strpos(\2\3, \1)", query)
 	return query
--- a/frappe/database/query.py
+++ b/frappe/database/query.py
@@ -9,6 +9,9 @@ from frappe.boot import get_additional_filters_from_hooks
 from frappe.model.db_query import get_timespan_date_range
 from frappe.query_builder import Criterion, Field, Order, Table

 TAB_PATTERN = re.compile("^tab")
 WORDS_PATTERN = re.compile(r"\w+")


 def like(key: Field, value: str) -> frappe.qb:
 	"""Wrapper method for `LIKE`
@@ -391,7 +394,7 @@ class Permission:
 			doctype = [doctype]

 		for dt in doctype:
 			dt = re.sub("^tab", "", dt)
 			dt = TAB_PATTERN.sub("", dt)
 			if not frappe.has_permission(
 				dt,
 				"select",
@@ -407,4 +410,4 @@ class Permission:

 	@staticmethod
 	def get_tables_from_query(query: str):
 		return [table for table in re.findall(r"\w+", query) if table.startswith("tab")]
 		return [table for table in WORDS_PATTERN.findall(query) if table.startswith("tab")]
--- a/frappe/database/schema.py
+++ b/frappe/database/schema.py
@@ -4,6 +4,9 @@ import frappe
 from frappe import _
 from frappe.utils import cint, cstr, flt

 SPECIAL_CHAR_PATTERN = re.compile(r"[\W]", flags=re.UNICODE)
 VARCHAR_CAST_PATTERN = re.compile(r"varchar\(([\d]+)\)")


 class InvalidColumnName(frappe.ValidationError):
 	pass
@@ -130,7 +133,7 @@ class DBTable:
 				if not current_col:
 					continue
 				current_type = self.current_columns[col.fieldname]["type"]
 				current_length = re.findall(r"varchar\(([\d]+)\)", current_type)
 				current_length = VARCHAR_CAST_PATTERN.findall(current_type)
 				if not current_length:
 					# case when the field is no longer a varchar
 					continue
@@ -304,8 +307,7 @@ class DbColumn:


 def validate_column_name(n):
 	special_characters = re.findall(r"[\W]", n, re.UNICODE)
 	if special_characters:
 	if special_characters := SPECIAL_CHAR_PATTERN.findall(n):
 		special_characters = ", ".join('"{0}"'.format(c) for c in special_characters)
 		frappe.throw(
 			_("Fieldname {0} cannot have special characters like {1}").format(
--- a/frappe/desk/doctype/note/note.py
+++ b/frappe/desk/doctype/note/note.py
@@ -1,16 +1,18 @@
 # Copyright (c) 2015, Frappe Technologies Pvt. Ltd. and Contributors
 # License: MIT. See LICENSE

 import re

 import frappe
 from frappe.model.document import Document

 NAME_PATTERN = re.compile("[%'\"#*?`]")


 class Note(Document):
 	def autoname(self):
 		# replace forbidden characters
 		import re

 		self.name = re.sub("[%'\"#*?`]", "", self.title.strip())
 		self.name = NAME_PATTERN.sub("", self.title.strip())

 	def validate(self):
 		if self.notify_on_login and not self.expire_notification_on:
--- a/frappe/email/email_body.py
+++ b/frappe/email/email_body.py
@@ -24,6 +24,8 @@ from frappe.utils import (
 )
 from frappe.utils.pdf import get_pdf

 EMBED_PATTERN = re.compile("""embed=["'](.*?)["']""")


 def get_email(
 	recipients,
@@ -190,7 +192,7 @@ class EMail:
 	def set_part_html(self, message, inline_images):
 		from email.mime.text import MIMEText

 		has_inline_images = re.search("""embed=['"].*?['"]""", message)
 		has_inline_images = EMBED_PATTERN.search(message)

 		if has_inline_images:
 			# process inline images
@@ -499,7 +501,7 @@ def replace_filename_with_cid(message):
 	inline_images = []

 	while True:
 		matches = re.search("""embed=["'](.*?)["']""", message)
 		matches = EMBED_PATTERN.search(message)
 		if not matches:
 			break
 		groups = matches.groups()
@@ -510,7 +512,7 @@ def replace_filename_with_cid(message):

 		filecontent = get_filecontent_from_path(img_path)
 		if not filecontent:
 			message = re.sub("""embed=['"]{0}['"]""".format(img_path), "", message)
 			message = re.sub(f"""embed=['"]{img_path}['"]""", "", message)
 			continue

 		content_id = random_string(10)
@@ -519,9 +521,7 @@ def replace_filename_with_cid(message):
 			{"filename": filename, "filecontent": filecontent, "content_id": content_id}
 		)

 		message = re.sub(
 			"""embed=['"]{0}['"]""".format(img_path), 'src="cid:{0}"'.format(content_id), message
 		)
 		message = re.sub(f"""embed=['"]{img_path}['"]""", f'src="cid:{content_id}"', message)

 	return (message, inline_images)

--- a/frappe/email/receive.py
+++ b/frappe/email/receive.py
@@ -38,6 +38,9 @@ from frappe.utils.user import is_system_user
 # fix due to a python bug in poplib that limits it to 2048
 poplib._MAXLINE = 20480

 THREAD_ID_PATTERN = re.compile(r"(?<=\[)[\w/-]+")
 WORDS_PATTERN = re.compile(r"\w+")


 class EmailSizeExceededError(frappe.ValidationError):
 	pass
@@ -273,7 +276,7 @@ class EmailServer:
 			return

 	def parse_imap_response(self, cmd, response):
 		pattern = r"(?<={cmd} )[0-9]*".format(cmd=cmd)
 		pattern = rf"(?<={cmd} )[0-9]*"
 		match = re.search(pattern, response.decode("utf-8"), re.U | re.I)

 		if match:
@@ -332,8 +335,7 @@ class EmailServer:

 		flags = []
 		for flag in imaplib.ParseFlags(flag_string) or []:
 			pattern = re.compile(r"\w+")
 			match = re.search(pattern, frappe.as_unicode(flag))
 			match = WORDS_PATTERN.search(frappe.as_unicode(flag))
 			flags.append(match.group(0))

 		if "Seen" in flags:
@@ -622,7 +624,7 @@ class Email:

 	def get_thread_id(self):
 		"""Extract thread ID from `[]`"""
 		l = re.findall(r"(?<=\[)[\w/-]+", self.subject)
 		l = THREAD_ID_PATTERN.findall(self.subject)
 		return l and l[0] or None

 	def is_reply(self):
--- a/frappe/model/db_query.py
+++ b/frappe/model/db_query.py
@@ -29,6 +29,14 @@ from frappe.utils import (
 	make_filter_tuple,
 )

 LOCATE_PATTERN = re.compile(r"locate\([^,]+,\s*[`\"]?name[`\"]?\s*\)")
 LOCATE_CAST_PATTERN = re.compile(r"locate\(([^,]+),\s*([`\"]?name[`\"]?)\s*\)")
 FUNC_IFNULL_PATTERN = re.compile(r"(strpos|ifnull|coalesce)\(\s*[`\"]?name[`\"]?\s*,")
 CAST_VARCHAR_PATTERN = re.compile(r"([`\"]?tab[\w`\" -]+\.[`\"]?name[`\"]?)(?!\w)")
 ORDER_BY_PATTERN = re.compile(
 	r"\ order\ by\ |\ asc|\ ASC|\ desc|\ DESC",
 )


 class DatabaseQuery(object):
 	def __init__(self, doctype, user=None):
@@ -266,7 +274,7 @@ class DatabaseQuery(object):
 		return args

 	def prepare_select_args(self, args):
 		order_field = re.sub(r"\ order\ by\ |\ asc|\ ASC|\ desc|\ DESC", "", args.order_by)
 		order_field = ORDER_BY_PATTERN.sub("", args.order_by)

 		if order_field not in args.fields:
 			extracted_column = order_column = order_field.replace("`", "")
@@ -957,16 +965,14 @@ def cast_name(column: str) -> str:

 	kwargs = {"string": column, "flags": re.IGNORECASE}
 	if "cast(" not in column.lower() and "::" not in column:
 		if re.search(r"locate\([^,]+,\s*[`\"]?name[`\"]?\s*\)", **kwargs):
 			return re.sub(
 				r"locate\(([^,]+),\s*([`\"]?name[`\"]?)\s*\)", r"locate(\1, cast(\2 as varchar))", **kwargs
 			)
 		if LOCATE_PATTERN.search(**kwargs):
 			return LOCATE_CAST_PATTERN.sub(r"locate(\1, cast(\2 as varchar))", **kwargs)

 		elif match := re.search(r"(strpos|ifnull|coalesce)\(\s*[`\"]?name[`\"]?\s*,", **kwargs):
 		elif match := FUNC_IFNULL_PATTERN.search(**kwargs):
 			func = match.groups()[0]
 			return re.sub(rf"{func}\(\s*([`\"]?name[`\"]?)\s*,", rf"{func}(cast(\1 as varchar),", **kwargs)

 		return re.sub(r"([`\"]?tab[\w`\" -]+\.[`\"]?name[`\"]?)(?!\w)", r"cast(\1 as varchar)", **kwargs)
 		return CAST_VARCHAR_PATTERN.sub(r"cast(\1 as varchar)", **kwargs)

 	return column

--- a/frappe/model/naming.py
+++ b/frappe/model/naming.py
@@ -20,6 +20,7 @@ if TYPE_CHECKING:
 autoincremented_site_status_map = {}

 NAMING_SERIES_PATTERN = re.compile(r"^[\w\- \/.#{}]+$", re.UNICODE)
 BRACED_PARAMS_PATTERN = re.compile(r"(\{[\w | #]+\})")


 class InvalidNamingSeriesError(frappe.ValidationError):
@@ -448,7 +449,7 @@ def validate_name(doctype: str, name: Union[int, str], case: Optional[str] = Non
 		frappe.throw(_("Name of {0} cannot be {1}").format(doctype, name), frappe.NameError)

 	special_characters = "<>"
 	if re.findall("[{0}]+".format(special_characters), name):
 	if re.findall(f"[{special_characters}]+", name):
 		message = ", ".join("'{0}'".format(c) for c in special_characters)
 		frappe.throw(
 			_("Name cannot contain special characters like {0}").format(message), frappe.NameError
@@ -535,6 +536,6 @@ def _format_autoname(autoname, doc):
 		return parse_naming_series([trimmed_param], doc=doc)

 	# Replace braced params with their parsed value
 	name = re.sub(r"(\{[\w | #]+\})", get_param_value_for_match, autoname_value)
 	name = BRACED_PARAMS_PATTERN.sub(get_param_value_for_match, autoname_value)

 	return name
--- a/frappe/model/utils/init.py
+++ b/frappe/model/utils/init.py
@@ -21,10 +21,7 @@ STANDARD_FIELD_CONVERSION_MAP = {
 	"_assign": "Text",
 	"docstatus": "Int",
 }

 """
 Model utilities, unclassified functions
 """
 INCLUDE_DIRECTIVE_PATTERN = re.compile(r"""{% include\s['"](.*)['"]\s%}""")


 def set_default(doc, key):
@@ -67,7 +64,7 @@ def render_include(content):
 	# try 5 levels of includes
 	for i in range(5):
 		if "{% include" in content:
 			paths = re.findall(r"""{% include\s['"](.*)['"]\s%}""", content)
 			paths = INCLUDE_DIRECTIVE_PATTERN.findall(content)
 			if not paths:
 				frappe.throw(_("Invalid include path"), InvalidIncludePath)

@@ -78,7 +75,7 @@ def render_include(content):
 					if path.endswith(".html"):
 						include = html_to_js_template(path, include)

 					content = re.sub(r"""{{% include\s['"]{0}['"]\s%}}""".format(path), include, content)
 					content = re.sub(rf"""{{% include\s['"]{path}['"]\s%}}""", include, content)

 		else:
 			break
--- a/frappe/recorder.py
+++ b/frappe/recorder.py
@@ -16,6 +16,7 @@ from frappe import _
 RECORDER_INTERCEPT_FLAG = "recorder-intercept"
 RECORDER_REQUEST_SPARSE_HASH = "recorder-requests-sparse"
 RECORDER_REQUEST_HASH = "recorder-requests"
 TRACEBACK_PATH_PATTERN = re.compile(".*/apps/")


 def sql(*args, **kwargs):
@@ -58,7 +59,7 @@ def get_current_stack_frames():
 		for frame, filename, lineno, function, context, index in list(reversed(frames))[:-2]:
 			if "/apps/" in filename:
 				yield {
 					"filename": re.sub(".*/apps/", "", filename),
 					"filename": TRACEBACK_PATH_PATTERN.sub("", filename),
 					"lineno": lineno,
 					"function": function,
 				}
--- a/frappe/templates/includes/comments/comments.py
+++ b/frappe/templates/includes/comments/comments.py
@@ -10,6 +10,11 @@ from frappe.utils.html_utils import clean_html
 from frappe.website.doctype.blog_settings.blog_settings import get_comment_limit
 from frappe.website.utils import clear_cache

 URLS_COMMENT_PATTERN = re.compile(
 	r"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+", re.IGNORECASE
 )
 EMAIL_PATTERN = re.compile(r"(^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$)", re.IGNORECASE)


@frappe.whitelist(allow_guest=True)
@rate_limit(key="reference_name", limit=get_comment_limit, seconds=60 * 60)
@@ -23,12 +28,7 @@ def add_comment(comment, comment_email, comment_by, reference_doctype, reference
 		frappe.msgprint(_("The comment cannot be empty"))
 		return False

 	url_regex = re.compile(
 		r"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+", re.IGNORECASE
 	)
 	email_regex = re.compile(r"(^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$)", re.IGNORECASE)

 	if url_regex.search(comment) or email_regex.search(comment):
 	if URLS_COMMENT_PATTERN.search(comment) or EMAIL_PATTERN.search(comment):
 		frappe.msgprint(_("Comments cannot have links or email addresses"))
 		return False

--- a/frappe/translate.py
+++ b/frappe/translate.py
@@ -48,6 +48,8 @@ TRANSLATE_PATTERN = re.compile(
 	# END: JS context search
 	r"[\s\n]*\)"  # Closing function call ignore leading whitespace/newlines
 )
 REPORT_TRANSLATE_PATTERN = re.compile('"([^:,^"]*):')
 CSV_STRIP_WHITESPACE_PATTERN = re.compile(r"{\s?([0-9]+)\s?}")


 def get_language(lang_list: List = None) -> str:
@@ -602,7 +604,7 @@ def get_messages_from_report(name):
 		messages.extend(
 			[
 				(None, message)
 				for message in re.findall('"([^:,^"]*):', report.query)
 				for message in REPORT_TRANSLATE_PATTERN.findall(report.query)
 				if is_translatable(message)
 			]
 		)
@@ -801,7 +803,7 @@ def write_csv_file(path, app_messages, lang_dict):

 			t = lang_dict.get(message, "")
 			# strip whitespaces
 			translated_string = re.sub(r"{\s?([0-9]+)\s?}", r"{\g<1>}", t)
 			translated_string = CSV_STRIP_WHITESPACE_PATTERN.sub(r"{\g<1>}", t)
 			if translated_string:
 				w.writerow([message, translated_string, context])

--- a/frappe/utils/init.py
+++ b/frappe/utils/init.py
@@ -27,6 +27,16 @@ import frappe
 from frappe.utils.data import *
 from frappe.utils.html_utils import sanitize_html

 EMAIL_NAME_PATTERN = re.compile(r"[^A-Za-z0-9\u00C0-\u024F\/\_\' ]+")
 EMAIL_STRING_PATTERN = re.compile(r"([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)")
 NON_MD_HTML_PATTERN = re.compile(r"<p[\s]*>|<br[\s]*>")
 HTML_TAGS_PATTERN = re.compile(r"\<[^>]*\>")
 INCLUDE_DIRECTIVE_PATTERN = re.compile("""({% include ['"]([^'"]*)['"] %})""")
 PHONE_NUMBER_PATTERN = re.compile(r"([0-9\ \+\_\-\,\.\*\#\(\)]){1,20}$")
 PERSON_NAME_PATTERN = re.compile(r"^[\w][\w\'\-]*( \w[\w\'\-]*)*$")
 WHITESPACE_PATTERN = re.compile(r"[\t\n\r]")
 MULTI_EMAIL_STRING_PATTERN = re.compile(r'[,\n](?=(?:[^"]|"[^"]*")*$)')


 def get_fullname(user=None):
 	"""get the full name (first name + last name) of the user from User"""
@@ -116,7 +126,7 @@ def validate_phone_number(phone_number, throw=False):
 		return False

 	phone_number = phone_number.strip()
 	match = re.match(r"([0-9\ \+\_\-\,\.\*\#\(\)]){1,20}$", phone_number)
 	match = PHONE_NUMBER_PATTERN.match(phone_number)

 	if not match and throw:
 		frappe.throw(
@@ -135,7 +145,7 @@ def validate_name(name, throw=False):
 		return False

 	name = name.strip()
 	match = re.match(r"^[\w][\w\'\-]*( \w[\w\'\-]*)*$", name)
 	match = PERSON_NAME_PATTERN.match(PERSON_NAME_PATTERN, name)

 	if not match and throw:
 		frappe.throw(frappe._("{0} is not a valid Name").format(name), frappe.InvalidNameError)
@@ -201,8 +211,8 @@ def split_emails(txt):
 	email_list = []

 	# emails can be separated by comma or newline
 	s = re.sub(r"[\t\n\r]", " ", cstr(txt))
 	for email in re.split(r'[,\n](?=(?:[^"]|"[^"]*")*$)', s):
 	s = WHITESPACE_PATTERN.sub(" ", cstr(txt))
 	for email in MULTI_EMAIL_STRING_PATTERN.split(s):
 		email = strip(cstr(email))
 		if email:
 			email_list.append(email)
@@ -360,7 +370,7 @@ def remove_blanks(d):

 def strip_html_tags(text):
 	"""Remove html tags from text"""
 	return re.sub(r"\<[^>]*\>", "", text)
 	return HTML_TAGS_PATTERN.sub("", text)


 def get_file_timestamp(fn):
@@ -584,7 +594,7 @@ def get_html_format(print_path):
 		with open(print_path, "r") as f:
 			html_format = f.read()

 		for include_directive, path in re.findall("""({% include ['"]([^'"]*)['"] %})""", html_format):
 		for include_directive, path in INCLUDE_DIRECTIVE_PATTERN.findall(html_format):
 			for app_name in frappe.get_installed_apps():
 				include_path = frappe.get_app_path(app_name, *path.split(os.path.sep))
 				if os.path.exists(include_path):
@@ -601,7 +611,7 @@ def is_markdown(text):
 	elif "<!-- html -->" in text:
 		return False
 	else:
 		return not re.search(r"<p[\s]*>|<br[\s]*>", text)
 		return not NON_MD_HTML_PATTERN.search(text)


 def get_sites(sites_path=None):
@@ -670,8 +680,7 @@ def parse_addr(email_string):
 		name = get_name_from_email_string(email_string, email, name)
 		return (name, email)
 	else:
 		email_regex = re.compile(r"([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)")
 		email_list = re.findall(email_regex, email_string)
 		email_list = EMAIL_STRING_PATTERN.findall(email_string)
 		if len(email_list) > 0 and check_format(email_list[0]):
 			# take only first email address
 			email = email_list[0]
@@ -698,7 +707,7 @@ def check_format(email_id):

 def get_name_from_email_string(email_string, email_id, name):
 	name = email_string.replace(email_id, "")
 	name = re.sub(r"[^A-Za-z0-9\u00C0-\u024F\/\_\' ]+", "", name).strip()
 	name = EMAIL_NAME_PATTERN.sub("", name).strip()
 	if not name:
 		name = email_id
 	return name
--- a/frappe/utils/boilerplate.py
+++ b/frappe/utils/boilerplate.py
@@ -11,6 +11,8 @@ import git
 import frappe
 from frappe.utils import touch_file

 APP_TITLE_PATTERN = re.compile(r"^(?![\W])[^\d_\s][\w -]+$")


 def make_boilerplate(dest, app_name, no_git=False):
 	if not os.path.exists(dest):
@@ -67,7 +69,7 @@ def _get_user_inputs(app_name):


 def is_valid_title(title) -> bool:
 	if not re.match(r"^(?![\W])[^\d_\s][\w -]+$", title, re.UNICODE):
 	if not APP_TITLE_PATTERN.match(title, re.UNICODE):
 		print(
 			"App Title should start with a letter and it can only consist of letters, numbers, spaces and underscores"
 		)
--- a/frappe/utils/data.py
+++ b/frappe/utils/data.py
@@ -29,7 +29,22 @@ if typing.TYPE_CHECKING:

 DATE_FORMAT = "%Y-%m-%d"
 TIME_FORMAT = "%H:%M:%S.%f"
 DATETIME_FORMAT = DATE_FORMAT + " " + TIME_FORMAT
 DATETIME_FORMAT = f"{DATE_FORMAT} {TIME_FORMAT}"
 TIMEDELTA_DAY_PATTERN = re.compile(
 	r"(?P<days>[-\d]+) day[s]*, (?P<hours>\d+):(?P<minutes>\d+):(?P<seconds>\d[\.\d+]*)"
 )
 TIMEDELTA_BASE_PATTERN = re.compile(r"(?P<hours>\d+):(?P<minutes>\d+):(?P<seconds>\d[\.\d+]*)")
 URLS_HTTP_TAG_PATTERN = re.compile(
 	r'(href|src){1}([\s]*=[\s]*[\'"]?)((?:http)[^\'">]+)([\'"]?)'
 )  # href='https://...
 URLS_NOT_HTTP_TAG_PATTERN = re.compile(
 	r'(href|src){1}([\s]*=[\s]*[\'"]?)((?!http)[^\'" >]+)([\'"]?)'
 )  # href=/assets/...
 URL_NOTATION_PATTERN = re.compile(
 	r'(:[\s]?url)(\([\'"]?)((?!http)[^\'" >]+)([\'"]?\))'
 )  # background-image: url('/assets/...')
 DURATION_PATTERN = re.compile(r"^(?:(\d+d)?((^|\s)\d+h)?((^|\s)\d+m)?((^|\s)\d+s)?)$")
 HTML_TAG_PATTERN = re.compile("<[^>]+>")


 class Weekday(Enum):
@@ -692,10 +707,7 @@ def duration_to_seconds(duration):


 def validate_duration_format(duration):
 	import re

 	is_valid_duration = re.match(r"^(?:(\d+d)?((^|\s)\d+h)?((^|\s)\d+m)?((^|\s)\d+s)?)$", duration)
 	if not is_valid_duration:
 	if not DURATION_PATTERN.match(duration):
 		frappe.throw(
 			frappe._("Value {0} must be in the valid duration format: d h m s").format(
 				frappe.bold(duration)
@@ -1297,7 +1309,7 @@ def in_words(integer: int, in_million=True) -> str:
 def is_html(text: str) -> bool:
 	if not isinstance(text, str):
 		return False
 	return re.search("<[^>]+>", text)
 	return HTML_TAG_PATTERN.search(text)


 def is_image(filepath: str) -> bool:
@@ -1851,12 +1863,8 @@ def expand_relative_urls(html: str) -> str:

 		return "".join(to_expand)

 	html = re.sub(
 		r'(href|src){1}([\s]*=[\s]*[\'"]?)((?!http)[^\'" >]+)([\'"]?)', _expand_relative_urls, html
 	)

 	# background-image: url('/assets/...')
 	html = re.sub(r'(:[\s]?url)(\([\'"]?)((?!http)[^\'" >]+)([\'"]?\))', _expand_relative_urls, html)
 	html = URLS_NOT_HTTP_TAG_PATTERN.sub(_expand_relative_urls, html)
 	html = URL_NOTATION_PATTERN.sub(_expand_relative_urls, html)
 	return html


@@ -1870,7 +1878,7 @@ def quote_urls(html: str) -> str:
 		groups[2] = quoted(groups[2])
 		return "".join(groups)

 	return re.sub(r'(href|src){1}([\s]*=[\s]*[\'"]?)((?:http)[^\'">]+)([\'"]?)', _quote_url, html)
 	return URLS_HTTP_TAG_PATTERN.sub(_quote_url, html)


 def unique(seq: typing.Sequence["T"]) -> List["T"]:
@@ -1891,8 +1899,7 @@ def get_string_between(start: str, string: str, end: str) -> str:
 	if not string:
 		return ""

 	regex = "{0}(.*){1}".format(start, end)
 	out = re.search(regex, string)
 	out = re.search(f"{start}(.*){end}", string)

 	return out.group(1) if out else string

@@ -2098,10 +2105,8 @@ def format_timedelta(o: datetime.timedelta) -> str:
 def parse_timedelta(s: str) -> datetime.timedelta:
 	# ref: https://stackoverflow.com/a/21074460/10309266
 	if "day" in s:
 		m = re.match(
 			r"(?P<days>[-\d]+) day[s]*, (?P<hours>\d+):(?P<minutes>\d+):(?P<seconds>\d[\.\d+]*)", s
 		)
 		m = TIMEDELTA_DAY_PATTERN.match(s)
 	else:
 		m = re.match(r"(?P<hours>\d+):(?P<minutes>\d+):(?P<seconds>\d[\.\d+]*)", s)
 		m = TIMEDELTA_BASE_PATTERN.match(s)

 	return datetime.timedelta(**{key: float(val) for key, val in m.groupdict().items()})
--- a/frappe/utils/formatters.py
+++ b/frappe/utils/formatters.py
@@ -20,6 +20,8 @@ from frappe.utils import (
 	formatdate,
 )

 BLOCK_TAGS_PATTERN = re.compile(r"(<br|<div|<p)")


 def format_value(value, df=None, doc=None, currency=None, translated=False, format=None):
 	"""Format value based on given fieldtype, document reference, currency reference.
@@ -97,7 +99,7 @@ def format_value(value, df=None, doc=None, currency=None, translated=False, form
 		return "{}%".format(flt(value, 2))

 	elif df.get("fieldtype") in ("Text", "Small Text"):
 		if not re.search(r"(<br|<div|<p)", value):
 		if not BLOCK_TAGS_PATTERN.search(value):
 			return frappe.safe_decode(value).replace("\n", "<br>")

 	elif df.get("fieldtype") == "Markdown Editor":
--- a/frappe/utils/global_search.py
+++ b/frappe/utils/global_search.py
@@ -13,6 +13,8 @@ from frappe.utils import cint, strip_html_tags
 from frappe.utils.data import cstr
 from frappe.utils.html_utils import unescape_html

 HTML_TAGS_PATTERN = re.compile(r"(?s)<[\s]*(script|style).*?</\1>")


 def setup_global_search_table():
 	"""
@@ -360,7 +362,7 @@ def get_formatted_value(value, field):

 	if getattr(field, "fieldtype", None) in ["Text", "Text Editor"]:
 		value = unescape_html(frappe.safe_decode(value))
 		value = re.subn(r"(?s)<[\s]*(script|style).*?</\1>", "", str(value))[0]
 		value = HTML_TAGS_PATTERN.subn("", str(value))[0]
 		value = " ".join(value.split())
 	return field.label + " : " + strip_html_tags(str(value))

--- a/frappe/utils/html_utils.py
+++ b/frappe/utils/html_utils.py
@@ -5,6 +5,16 @@ from bleach_allowlist import bleach_allowlist

 import frappe

 EMOJI_PATTERN = re.compile(
 	"(\ud83d[\ude00-\ude4f])|"
 	"(\ud83c[\udf00-\uffff])|"
 	"(\ud83d[\u0000-\uddff])|"
 	"(\ud83d[\ude80-\udeff])|"
 	"(\ud83c[\udde0-\uddff])"
 	"+",
 	flags=re.UNICODE,
 )


 def clean_html(html):
 	import bleach
@@ -181,28 +191,17 @@ def is_json(text):
 def get_icon_html(icon, small=False):
 	from frappe.utils import is_image

 	emoji_pattern = re.compile(
 		"(\ud83d[\ude00-\ude4f])|"
 		"(\ud83c[\udf00-\uffff])|"
 		"(\ud83d[\u0000-\uddff])|"
 		"(\ud83d[\ude80-\udeff])|"
 		"(\ud83c[\udde0-\uddff])"
 		"+",
 		flags=re.UNICODE,
 	)

 	icon = icon or ""
 	if icon and emoji_pattern.match(icon):
 		return '<span class="text-muted">' + icon + "</span>"

 	if icon and EMOJI_PATTERN.match(icon):
 		return f'<span class="text-muted">{icon}</span>'

 	if is_image(icon):
 		return (
 			'<img style="width: 16px; height: 16px;" src="{icon}">'.format(icon=icon)
 			if small
 			else '<img src="{icon}">'.format(icon=icon)
 			f'<img style="width: 16px; height: 16px;" src="{icon}">' if small else f'<img src="{icon}">'
 		)
 	else:
 		return "<i class='{icon}'></i>".format(icon=icon)
 		return f"<i class='{icon}'></i>"


 def unescape_html(value):
--- a/frappe/utils/password_strength.py
+++ b/frappe/utils/password_strength.py
@@ -177,9 +177,9 @@ def get_dictionary_match_feedback(match, is_sole_match):

 	word = match.get("token")
 	# Variations of the match like UPPERCASES
 	if re.match(scoring.START_UPPER, word):
 	if scoring.START_UPPER.match(word):
 		suggestions.append(_("Capitalization doesn't help very much."))
 	elif re.match(scoring.ALL_UPPER, word):
 	elif scoring.ALL_UPPER.match(word):
 		suggestions.append(_("All-uppercase is almost as easy to guess as all-lowercase."))

 	# Match contains l33t speak substitutions
--- a/frappe/utils/xlsxutils.py
+++ b/frappe/utils/xlsxutils.py
@@ -40,7 +40,7 @@ def make_xlsx(data, sheet_name, wb=None, column_widths=None):

 			if isinstance(item, str) and next(ILLEGAL_CHARACTERS_RE.finditer(value), None):
 				# Remove illegal characters from the string
 				value = re.sub(ILLEGAL_CHARACTERS_RE, "", value)
 				value = ILLEGAL_CHARACTERS_RE.sub("", value)

 			clean_row.append(value)

--- a/frappe/website/doctype/web_page/web_page.py
+++ b/frappe/website/doctype/web_page/web_page.py
@@ -19,6 +19,8 @@ from frappe.website.utils import (
 )
 from frappe.website.website_generator import WebsiteGenerator

 H_TAG_PATTERN = re.compile("<h.>")


 class WebPage(WebsiteGenerator):
 	def validate(self):
@@ -114,7 +116,7 @@ class WebPage(WebsiteGenerator):
 				context.header = context.title

 			# add h1 tag to header
 			if context.get("header") and not re.findall("<h.>", context.header):
 			if context.get("header") and not H_TAG_PATTERN.findall(context.header):
 				context.header = "<h1>" + context.header + "</h1>"

 		# if title not set, set title from header
--- a/frappe/website/utils.py
+++ b/frappe/website/utils.py
@@ -15,6 +15,13 @@ from frappe import _
 from frappe.model.document import Document
 from frappe.utils import md_to_html

 FRONTMATTER_PATTERN = re.compile(r"^\s*(?:---|\+\+\+)(.*?)(?:---|\+\+\+)\s*(.+)$", re.S | re.M)
 H1_TAG_PATTERN = re.compile("<h1>([^<]*)")
 IMAGE_TAG_PATTERN = re.compile(r"""<img[^>]*src\s?=\s?['"]([^'"]*)['"]""")
 CLEANUP_PATTERN_1 = re.compile(r'[~!@#$%^&*+()<>,."\'\?]')
 CLEANUP_PATTERN_2 = re.compile("[:/]")
 CLEANUP_PATTERN_3 = re.compile(r"(-)\1+")


 def delete_page_cache(path):
 	cache = frappe.cache()
@@ -29,7 +36,7 @@ def delete_page_cache(path):


 def find_first_image(html):
 	m = re.finditer(r"""<img[^>]*src\s?=\s?['"]([^'"]*)['"]""", html)
 	m = IMAGE_TAG_PATTERN.finditer(html)
 	try:
 		return next(m).groups()[0]
 	except StopIteration:
@@ -156,17 +163,17 @@ def is_signup_disabled():
 	return frappe.db.get_single_value("Website Settings", "disable_signup", True)


 def cleanup_page_name(title):
 def cleanup_page_name(title: str) -> str:
 	"""make page name from title"""
 	if not title:
 		return ""

 	name = title.lower()
 	name = re.sub(r'[~!@#$%^&*+()<>,."\'\?]', "", name)
 	name = re.sub("[:/]", "-", name)
 	name = CLEANUP_PATTERN_1.sub("", name)
 	name = CLEANUP_PATTERN_2.sub("-", name)
 	name = "-".join(name.split())
 	# replace repeating hyphens
 	name = re.sub(r"(-)\1+", r"\1", name)
 	name = CLEANUP_PATTERN_3.sub(r"\1", name)
 	return name[:140]


@@ -287,8 +294,8 @@ def extract_title(source, path):

 	if not title and "<h1>" in source:
 		# extract title from h1
 		match = re.findall("<h1>([^<]*)", source)
 		title_content = match[0].strip()[:300]
 		match = H1_TAG_PATTERN.search(source).group()
 		title_content = match.strip()[:300]
 		if "{{" not in title_content:
 			title = title_content

@@ -308,17 +315,16 @@ def extract_title(source, path):
 	return title


 def extract_comment_tag(source, tag):
 def extract_comment_tag(source: str, tag: str):
 	"""Extract custom tags in comments from source.

 	:param source: raw template source in HTML
 	:param title: tag to search, example "title"
 	"""

 	if "<!-- {0}:".format(tag) in source:
 		return re.findall("<!-- {0}:([^>]*) -->".format(tag), source)[0].strip()
 	else:
 		return None
 	if f"<!-- {tag}:" in source:
 		return re.search(f"<!-- {tag}:([^>]*) -->", source).group().strip()
 	return None


 def get_html_content_based_on_type(doc, fieldname, content_type):
@@ -378,7 +384,8 @@ def get_frontmatter(string):
 	"Reference: https://github.com/jonbeebe/frontmatter"
 	frontmatter = ""
 	body = ""
 	result = re.compile(r"^\s*(?:---|\+\+\+)(.*?)(?:---|\+\+\+)\s*(.+)$", re.S | re.M).search(string)
 	result = FRONTMATTER_PATTERN.search(string)

 	if result:
 		frontmatter = result.group(1)
 		body = result.group(2)
--- a/frappe/www/app.py
+++ b/frappe/www/app.py
@@ -10,6 +10,9 @@ import frappe.sessions
 from frappe import _
 from frappe.utils.jinja_globals import is_rtl

 SCRIPT_TAG_PATTERN = re.compile(r"\<script[^<]*\</script\>")
 CLOSING_SCRIPT_TAG_PATTERN = re.compile(r"</script\>")


 def get_context(context):
 	if frappe.session.user == "Guest":
@@ -34,10 +37,10 @@ def get_context(context):
 	boot_json = frappe.as_json(boot)

 	# remove script tags from boot
 	boot_json = re.sub(r"\<script[^<]*\</script\>", "", boot_json)
 	boot_json = SCRIPT_TAG_PATTERN.sub("", boot_json)

 	# TODO: Find better fix
 	boot_json = re.sub(r"</script\>", "", boot_json)
 	boot_json = CLOSING_SCRIPT_TAG_PATTERN.sub("", boot_json)

 	context.update(
 		{