Converted all possible usages of re.* that weren't compiling the regex separately and re-using it. Separated out the compiled patterns as global variables. Repetitive patterns could be made DRY-er. Would be nicer to have all regexes in a single module so that we could re-use better, keep track of outdated, and keep checks for possible reDos' etcversion-14
@@ -20,6 +20,8 @@ import frappe | |||
timestamps = {} | |||
app_paths = None | |||
sites_path = os.path.abspath(os.getcwd()) | |||
WHITESPACE_PATTERN = re.compile(r"\s+") | |||
HTML_COMMENT_PATTERN = re.compile(r"(<!--.*?-->)") | |||
class AssetsNotDownloadedError(Exception): | |||
@@ -406,10 +408,10 @@ def link_assets_dir(source, target, hard_link=False): | |||
def scrub_html_template(content): | |||
"""Returns HTML content with removed whitespace and comments""" | |||
# remove whitespace to a single space | |||
content = re.sub(r"\s+", " ", content) | |||
content = WHITESPACE_PATTERN.sub(" ", content) | |||
# strip comments | |||
content = re.sub(r"(<!--.*?-->)", "", content) | |||
content = HTML_COMMENT_PATTERN.sub("", content) | |||
return content.replace("'", "'") | |||
@@ -4,6 +4,7 @@ | |||
import io | |||
import json | |||
import os | |||
import re | |||
import timeit | |||
from datetime import date, datetime | |||
@@ -22,6 +23,7 @@ INVALID_VALUES = ("", None) | |||
MAX_ROWS_IN_PREVIEW = 10 | |||
INSERT = "Insert New Records" | |||
UPDATE = "Update Existing Records" | |||
DURATION_PATTERN = re.compile(r"^(?:(\d+d)?((^|\s)\d+h)?((^|\s)\d+m)?((^|\s)\d+s)?)$") | |||
class Importer: | |||
@@ -725,10 +727,7 @@ class Row: | |||
) | |||
return | |||
elif df.fieldtype == "Duration": | |||
import re | |||
is_valid_duration = re.match(r"^(?:(\d+d)?((^|\s)\d+h)?((^|\s)\d+m)?((^|\s)\d+s)?)$", value) | |||
if not is_valid_duration: | |||
if not DURATION_PATTERN.match(value): | |||
self.warnings.append( | |||
{ | |||
"row": self.row_number, | |||
@@ -35,6 +35,12 @@ from frappe.query_builder.functions import Concat | |||
from frappe.utils import cint | |||
from frappe.website.utils import clear_cache | |||
DEPENDS_ON_PATTERN = re.compile(r'[\w\.:_]+\s*={1}\s*[\w\.@\'"]+') | |||
ILLEGAL_FIELDNAME_PATTERN = re.compile("""['",./%@()<>{}]""") | |||
WHITESPACE_PADDING_PATTERN = re.compile(r"^[ \t\n\r]+|[ \t\n\r]+$", flags=re.ASCII) | |||
START_WITH_LETTERS_PATTERN = re.compile(r"^(?![\W])[^\d_\s][\w -]+$", flags=re.ASCII) | |||
FIELD_PATTERN = re.compile("{(.*?)}", flags=re.UNICODE) | |||
class InvalidFieldNameError(frappe.ValidationError): | |||
pass | |||
@@ -357,8 +363,7 @@ class DocType(Document): | |||
else: | |||
if d.fieldname in restricted: | |||
frappe.throw(_("Fieldname {0} is restricted").format(d.fieldname), InvalidFieldNameError) | |||
d.fieldname = re.sub("""['",./%@()<>{}]""", "", d.fieldname) | |||
d.fieldname = ILLEGAL_FIELDNAME_PATTERN.sub("", d.fieldname) | |||
# fieldnames should be lowercase | |||
d.fieldname = d.fieldname.lower() | |||
@@ -842,15 +847,13 @@ class DocType(Document): | |||
_("Doctype name is limited to {0} characters ({1})").format(max_length, name), frappe.NameError | |||
) | |||
flags = {"flags": re.ASCII} | |||
# a DocType name should not start or end with an empty space | |||
if re.search(r"^[ \t\n\r]+|[ \t\n\r]+$", name, **flags): | |||
if WHITESPACE_PADDING_PATTERN.search(name): | |||
frappe.throw(_("DocType's name should not start or end with whitespace"), frappe.NameError) | |||
# a DocType's name should not start with a number or underscore | |||
# and should only contain letters, numbers, underscore, and hyphen | |||
if not re.match(r"^(?![\W])[^\d_\s][\w -]+$", name, **flags): | |||
if not START_WITH_LETTERS_PATTERN.match(name): | |||
frappe.throw( | |||
_( | |||
"A DocType's name should start with a letter and can only " | |||
@@ -1254,7 +1257,7 @@ def validate_fields(meta): | |||
if not pattern: | |||
return | |||
for fieldname in re.findall("{(.*?)}", pattern, re.UNICODE): | |||
for fieldname in FIELD_PATTERN.findall(pattern): | |||
if fieldname.startswith("{"): | |||
# edge case when double curlies are used for escape | |||
continue | |||
@@ -1336,9 +1339,7 @@ def validate_fields(meta): | |||
] | |||
for field in depends_on_fields: | |||
depends_on = docfield.get(field, None) | |||
if ( | |||
depends_on and ("=" in depends_on) and re.match(r'[\w\.:_]+\s*={1}\s*[\w\.@\'"]+', depends_on) | |||
): | |||
if depends_on and ("=" in depends_on) and DEPENDS_ON_PATTERN.match(depends_on): | |||
frappe.throw(_("Invalid {0} condition").format(frappe.unscrub(field)), frappe.ValidationError) | |||
def check_table_multiselect_option(docfield): | |||
@@ -23,6 +23,11 @@ from frappe.query_builder.functions import Count | |||
from frappe.query_builder.utils import DocType | |||
from frappe.utils import cast, get_datetime, get_table_name, getdate, now, sbool | |||
IFNULL_PATTERN = re.compile(r"ifnull\(", flags=re.IGNORECASE) | |||
INDEX_PATTERN = re.compile(r"\s*\([^)]+\)\s*") | |||
SINGLE_WORD_PATTERN = re.compile(r'([`"]?)(tab([A-Z]\w+))\1') | |||
MULTI_WORD_PATTERN = re.compile(r'([`"])(tab([A-Z]\w+)( [A-Z]\w+)+)\1') | |||
class Database(object): | |||
""" | |||
@@ -143,9 +148,8 @@ class Database(object): | |||
# remove whitespace / indentation from start and end of query | |||
query = query.strip() | |||
if re.search(r"ifnull\(", query, flags=re.IGNORECASE): | |||
# replaces ifnull in query with coalesce | |||
query = re.sub(r"ifnull\(", "coalesce(", query, flags=re.IGNORECASE) | |||
# replaces ifnull in query with coalesce | |||
query = IFNULL_PATTERN.sub("coalesce(", query) | |||
if not self._conn: | |||
self.connect() | |||
@@ -1126,8 +1130,7 @@ class Database(object): | |||
def get_index_name(fields): | |||
index_name = "_".join(fields) + "_index" | |||
# remove index length if present e.g. (10) from index name | |||
index_name = re.sub(r"\s*\([^)]+\)\s*", r"", index_name) | |||
return index_name | |||
return INDEX_PATTERN.sub(r"", index_name) | |||
def get_system_setting(self, key): | |||
def _load_system_settings(): | |||
@@ -1219,11 +1222,9 @@ class Database(object): | |||
# and are continued with multiple words that start with a captital letter | |||
# e.g. 'tabXxx' or 'tabXxx Xxx' or 'tabXxx Xxx Xxx' and so on | |||
single_word_regex = r'([`"]?)(tab([A-Z]\w+))\1' | |||
multi_word_regex = r'([`"])(tab([A-Z]\w+)( [A-Z]\w+)+)\1' | |||
tables = [] | |||
for regex in (single_word_regex, multi_word_regex): | |||
tables += [groups[1] for groups in re.findall(regex, query)] | |||
for regex in (SINGLE_WORD_PATTERN, MULTI_WORD_PATTERN): | |||
tables += [groups[1] for groups in regex.findall(query)] | |||
if frappe.flags.touched_tables is None: | |||
frappe.flags.touched_tables = set() | |||
@@ -20,6 +20,11 @@ DEC2FLOAT = psycopg2.extensions.new_type( | |||
psycopg2.extensions.register_type(DEC2FLOAT) | |||
LOCATE_SUB_PATTERN = re.compile(r"locate\(([^,]+),([^)]+)(\)?)\)", flags=re.IGNORECASE) | |||
LOCATE_QUERY_PATTERN = re.compile(r"locate\(", flags=re.IGNORECASE) | |||
PG_TRANSFORM_PATTERN = re.compile(r"([=><]+)\s*([+-]?\d+)(\.0)?(?![a-zA-Z\.\d])") | |||
FROM_TAB_PATTERN = re.compile(r"from tab([\w-]*)", flags=re.IGNORECASE) | |||
class PostgresDatabase(Database): | |||
ProgrammingError = psycopg2.ProgrammingError | |||
@@ -382,12 +387,10 @@ class PostgresDatabase(Database): | |||
def modify_query(query): | |||
""" "Modifies query according to the requirements of postgres""" | |||
# replace ` with " for definitions | |||
query = str(query) | |||
query = query.replace("`", '"') | |||
query = str(query).replace("`", '"') | |||
query = replace_locate_with_strpos(query) | |||
# select from requires "" | |||
if re.search("from tab", query, flags=re.IGNORECASE): | |||
query = re.sub(r"from tab([\w-]*)", r'from "tab\1"', query, flags=re.IGNORECASE) | |||
query = FROM_TAB_PATTERN.sub(r'from "tab\1"', query) | |||
# only find int (with/without signs), ignore decimals (with/without signs), ignore hashes (which start with numbers), | |||
# drop .0 from decimals and add quotes around them | |||
@@ -396,8 +399,7 @@ def modify_query(query): | |||
# >>> re.sub(r"([=><]+)\s*([+-]?\d+)(\.0)?(?![a-zA-Z\.\d])", r"\1 '\2'", query) | |||
# "c='abcd' , a >= '45', b = '-45', c = '40', d= '4500', e=3500.53, f=40psdfsd, g= '9092094312', h=12.00023 | |||
query = re.sub(r"([=><]+)\s*([+-]?\d+)(\.0)?(?![a-zA-Z\.\d])", r"\1 '\2'", query) | |||
return query | |||
return PG_TRANSFORM_PATTERN.sub(r"\1 '\2'", query) | |||
def modify_values(values): | |||
@@ -430,8 +432,6 @@ def modify_values(values): | |||
def replace_locate_with_strpos(query): | |||
# strpos is the locate equivalent in postgres | |||
if re.search(r"locate\(", query, flags=re.IGNORECASE): | |||
query = re.sub( | |||
r"locate\(([^,]+),([^)]+)(\)?)\)", r"strpos(\2\3, \1)", query, flags=re.IGNORECASE | |||
) | |||
if LOCATE_QUERY_PATTERN.search(query): | |||
query = LOCATE_SUB_PATTERN.sub(r"strpos(\2\3, \1)", query) | |||
return query |
@@ -9,6 +9,9 @@ from frappe.boot import get_additional_filters_from_hooks | |||
from frappe.model.db_query import get_timespan_date_range | |||
from frappe.query_builder import Criterion, Field, Order, Table | |||
TAB_PATTERN = re.compile("^tab") | |||
WORDS_PATTERN = re.compile(r"\w+") | |||
def like(key: Field, value: str) -> frappe.qb: | |||
"""Wrapper method for `LIKE` | |||
@@ -391,7 +394,7 @@ class Permission: | |||
doctype = [doctype] | |||
for dt in doctype: | |||
dt = re.sub("^tab", "", dt) | |||
dt = TAB_PATTERN.sub("", dt) | |||
if not frappe.has_permission( | |||
dt, | |||
"select", | |||
@@ -407,4 +410,4 @@ class Permission: | |||
@staticmethod | |||
def get_tables_from_query(query: str): | |||
return [table for table in re.findall(r"\w+", query) if table.startswith("tab")] | |||
return [table for table in WORDS_PATTERN.findall(query) if table.startswith("tab")] |
@@ -4,6 +4,9 @@ import frappe | |||
from frappe import _ | |||
from frappe.utils import cint, cstr, flt | |||
SPECIAL_CHAR_PATTERN = re.compile(r"[\W]", flags=re.UNICODE) | |||
VARCHAR_CAST_PATTERN = re.compile(r"varchar\(([\d]+)\)") | |||
class InvalidColumnName(frappe.ValidationError): | |||
pass | |||
@@ -130,7 +133,7 @@ class DBTable: | |||
if not current_col: | |||
continue | |||
current_type = self.current_columns[col.fieldname]["type"] | |||
current_length = re.findall(r"varchar\(([\d]+)\)", current_type) | |||
current_length = VARCHAR_CAST_PATTERN.findall(current_type) | |||
if not current_length: | |||
# case when the field is no longer a varchar | |||
continue | |||
@@ -304,8 +307,7 @@ class DbColumn: | |||
def validate_column_name(n): | |||
special_characters = re.findall(r"[\W]", n, re.UNICODE) | |||
if special_characters: | |||
if special_characters := SPECIAL_CHAR_PATTERN.findall(n): | |||
special_characters = ", ".join('"{0}"'.format(c) for c in special_characters) | |||
frappe.throw( | |||
_("Fieldname {0} cannot have special characters like {1}").format( | |||
@@ -1,16 +1,18 @@ | |||
# Copyright (c) 2015, Frappe Technologies Pvt. Ltd. and Contributors | |||
# License: MIT. See LICENSE | |||
import re | |||
import frappe | |||
from frappe.model.document import Document | |||
NAME_PATTERN = re.compile("[%'\"#*?`]") | |||
class Note(Document): | |||
def autoname(self): | |||
# replace forbidden characters | |||
import re | |||
self.name = re.sub("[%'\"#*?`]", "", self.title.strip()) | |||
self.name = NAME_PATTERN.sub("", self.title.strip()) | |||
def validate(self): | |||
if self.notify_on_login and not self.expire_notification_on: | |||
@@ -24,6 +24,8 @@ from frappe.utils import ( | |||
) | |||
from frappe.utils.pdf import get_pdf | |||
EMBED_PATTERN = re.compile("""embed=["'](.*?)["']""") | |||
def get_email( | |||
recipients, | |||
@@ -190,7 +192,7 @@ class EMail: | |||
def set_part_html(self, message, inline_images): | |||
from email.mime.text import MIMEText | |||
has_inline_images = re.search("""embed=['"].*?['"]""", message) | |||
has_inline_images = EMBED_PATTERN.search(message) | |||
if has_inline_images: | |||
# process inline images | |||
@@ -499,7 +501,7 @@ def replace_filename_with_cid(message): | |||
inline_images = [] | |||
while True: | |||
matches = re.search("""embed=["'](.*?)["']""", message) | |||
matches = EMBED_PATTERN.search(message) | |||
if not matches: | |||
break | |||
groups = matches.groups() | |||
@@ -510,7 +512,7 @@ def replace_filename_with_cid(message): | |||
filecontent = get_filecontent_from_path(img_path) | |||
if not filecontent: | |||
message = re.sub("""embed=['"]{0}['"]""".format(img_path), "", message) | |||
message = re.sub(f"""embed=['"]{img_path}['"]""", "", message) | |||
continue | |||
content_id = random_string(10) | |||
@@ -519,9 +521,7 @@ def replace_filename_with_cid(message): | |||
{"filename": filename, "filecontent": filecontent, "content_id": content_id} | |||
) | |||
message = re.sub( | |||
"""embed=['"]{0}['"]""".format(img_path), 'src="cid:{0}"'.format(content_id), message | |||
) | |||
message = re.sub(f"""embed=['"]{img_path}['"]""", f'src="cid:{content_id}"', message) | |||
return (message, inline_images) | |||
@@ -38,6 +38,9 @@ from frappe.utils.user import is_system_user | |||
# fix due to a python bug in poplib that limits it to 2048 | |||
poplib._MAXLINE = 20480 | |||
THREAD_ID_PATTERN = re.compile(r"(?<=\[)[\w/-]+") | |||
WORDS_PATTERN = re.compile(r"\w+") | |||
class EmailSizeExceededError(frappe.ValidationError): | |||
pass | |||
@@ -273,7 +276,7 @@ class EmailServer: | |||
return | |||
def parse_imap_response(self, cmd, response): | |||
pattern = r"(?<={cmd} )[0-9]*".format(cmd=cmd) | |||
pattern = rf"(?<={cmd} )[0-9]*" | |||
match = re.search(pattern, response.decode("utf-8"), re.U | re.I) | |||
if match: | |||
@@ -332,8 +335,7 @@ class EmailServer: | |||
flags = [] | |||
for flag in imaplib.ParseFlags(flag_string) or []: | |||
pattern = re.compile(r"\w+") | |||
match = re.search(pattern, frappe.as_unicode(flag)) | |||
match = WORDS_PATTERN.search(frappe.as_unicode(flag)) | |||
flags.append(match.group(0)) | |||
if "Seen" in flags: | |||
@@ -622,7 +624,7 @@ class Email: | |||
def get_thread_id(self): | |||
"""Extract thread ID from `[]`""" | |||
l = re.findall(r"(?<=\[)[\w/-]+", self.subject) | |||
l = THREAD_ID_PATTERN.findall(self.subject) | |||
return l and l[0] or None | |||
def is_reply(self): | |||
@@ -29,6 +29,14 @@ from frappe.utils import ( | |||
make_filter_tuple, | |||
) | |||
LOCATE_PATTERN = re.compile(r"locate\([^,]+,\s*[`\"]?name[`\"]?\s*\)") | |||
LOCATE_CAST_PATTERN = re.compile(r"locate\(([^,]+),\s*([`\"]?name[`\"]?)\s*\)") | |||
FUNC_IFNULL_PATTERN = re.compile(r"(strpos|ifnull|coalesce)\(\s*[`\"]?name[`\"]?\s*,") | |||
CAST_VARCHAR_PATTERN = re.compile(r"([`\"]?tab[\w`\" -]+\.[`\"]?name[`\"]?)(?!\w)") | |||
ORDER_BY_PATTERN = re.compile( | |||
r"\ order\ by\ |\ asc|\ ASC|\ desc|\ DESC", | |||
) | |||
class DatabaseQuery(object): | |||
def __init__(self, doctype, user=None): | |||
@@ -266,7 +274,7 @@ class DatabaseQuery(object): | |||
return args | |||
def prepare_select_args(self, args): | |||
order_field = re.sub(r"\ order\ by\ |\ asc|\ ASC|\ desc|\ DESC", "", args.order_by) | |||
order_field = ORDER_BY_PATTERN.sub("", args.order_by) | |||
if order_field not in args.fields: | |||
extracted_column = order_column = order_field.replace("`", "") | |||
@@ -957,16 +965,14 @@ def cast_name(column: str) -> str: | |||
kwargs = {"string": column, "flags": re.IGNORECASE} | |||
if "cast(" not in column.lower() and "::" not in column: | |||
if re.search(r"locate\([^,]+,\s*[`\"]?name[`\"]?\s*\)", **kwargs): | |||
return re.sub( | |||
r"locate\(([^,]+),\s*([`\"]?name[`\"]?)\s*\)", r"locate(\1, cast(\2 as varchar))", **kwargs | |||
) | |||
if LOCATE_PATTERN.search(**kwargs): | |||
return LOCATE_CAST_PATTERN.sub(r"locate(\1, cast(\2 as varchar))", **kwargs) | |||
elif match := re.search(r"(strpos|ifnull|coalesce)\(\s*[`\"]?name[`\"]?\s*,", **kwargs): | |||
elif match := FUNC_IFNULL_PATTERN.search(**kwargs): | |||
func = match.groups()[0] | |||
return re.sub(rf"{func}\(\s*([`\"]?name[`\"]?)\s*,", rf"{func}(cast(\1 as varchar),", **kwargs) | |||
return re.sub(r"([`\"]?tab[\w`\" -]+\.[`\"]?name[`\"]?)(?!\w)", r"cast(\1 as varchar)", **kwargs) | |||
return CAST_VARCHAR_PATTERN.sub(r"cast(\1 as varchar)", **kwargs) | |||
return column | |||
@@ -20,6 +20,7 @@ if TYPE_CHECKING: | |||
autoincremented_site_status_map = {} | |||
NAMING_SERIES_PATTERN = re.compile(r"^[\w\- \/.#{}]+$", re.UNICODE) | |||
BRACED_PARAMS_PATTERN = re.compile(r"(\{[\w | #]+\})") | |||
class InvalidNamingSeriesError(frappe.ValidationError): | |||
@@ -448,7 +449,7 @@ def validate_name(doctype: str, name: Union[int, str], case: Optional[str] = Non | |||
frappe.throw(_("Name of {0} cannot be {1}").format(doctype, name), frappe.NameError) | |||
special_characters = "<>" | |||
if re.findall("[{0}]+".format(special_characters), name): | |||
if re.findall(f"[{special_characters}]+", name): | |||
message = ", ".join("'{0}'".format(c) for c in special_characters) | |||
frappe.throw( | |||
_("Name cannot contain special characters like {0}").format(message), frappe.NameError | |||
@@ -535,6 +536,6 @@ def _format_autoname(autoname, doc): | |||
return parse_naming_series([trimmed_param], doc=doc) | |||
# Replace braced params with their parsed value | |||
name = re.sub(r"(\{[\w | #]+\})", get_param_value_for_match, autoname_value) | |||
name = BRACED_PARAMS_PATTERN.sub(get_param_value_for_match, autoname_value) | |||
return name |
@@ -21,10 +21,7 @@ STANDARD_FIELD_CONVERSION_MAP = { | |||
"_assign": "Text", | |||
"docstatus": "Int", | |||
} | |||
""" | |||
Model utilities, unclassified functions | |||
""" | |||
INCLUDE_DIRECTIVE_PATTERN = re.compile(r"""{% include\s['"](.*)['"]\s%}""") | |||
def set_default(doc, key): | |||
@@ -67,7 +64,7 @@ def render_include(content): | |||
# try 5 levels of includes | |||
for i in range(5): | |||
if "{% include" in content: | |||
paths = re.findall(r"""{% include\s['"](.*)['"]\s%}""", content) | |||
paths = INCLUDE_DIRECTIVE_PATTERN.findall(content) | |||
if not paths: | |||
frappe.throw(_("Invalid include path"), InvalidIncludePath) | |||
@@ -78,7 +75,7 @@ def render_include(content): | |||
if path.endswith(".html"): | |||
include = html_to_js_template(path, include) | |||
content = re.sub(r"""{{% include\s['"]{0}['"]\s%}}""".format(path), include, content) | |||
content = re.sub(rf"""{{% include\s['"]{path}['"]\s%}}""", include, content) | |||
else: | |||
break | |||
@@ -16,6 +16,7 @@ from frappe import _ | |||
RECORDER_INTERCEPT_FLAG = "recorder-intercept" | |||
RECORDER_REQUEST_SPARSE_HASH = "recorder-requests-sparse" | |||
RECORDER_REQUEST_HASH = "recorder-requests" | |||
TRACEBACK_PATH_PATTERN = re.compile(".*/apps/") | |||
def sql(*args, **kwargs): | |||
@@ -58,7 +59,7 @@ def get_current_stack_frames(): | |||
for frame, filename, lineno, function, context, index in list(reversed(frames))[:-2]: | |||
if "/apps/" in filename: | |||
yield { | |||
"filename": re.sub(".*/apps/", "", filename), | |||
"filename": TRACEBACK_PATH_PATTERN.sub("", filename), | |||
"lineno": lineno, | |||
"function": function, | |||
} | |||
@@ -10,6 +10,11 @@ from frappe.utils.html_utils import clean_html | |||
from frappe.website.doctype.blog_settings.blog_settings import get_comment_limit | |||
from frappe.website.utils import clear_cache | |||
URLS_COMMENT_PATTERN = re.compile( | |||
r"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+", re.IGNORECASE | |||
) | |||
EMAIL_PATTERN = re.compile(r"(^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$)", re.IGNORECASE) | |||
@frappe.whitelist(allow_guest=True) | |||
@rate_limit(key="reference_name", limit=get_comment_limit, seconds=60 * 60) | |||
@@ -23,12 +28,7 @@ def add_comment(comment, comment_email, comment_by, reference_doctype, reference | |||
frappe.msgprint(_("The comment cannot be empty")) | |||
return False | |||
url_regex = re.compile( | |||
r"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+", re.IGNORECASE | |||
) | |||
email_regex = re.compile(r"(^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$)", re.IGNORECASE) | |||
if url_regex.search(comment) or email_regex.search(comment): | |||
if URLS_COMMENT_PATTERN.search(comment) or EMAIL_PATTERN.search(comment): | |||
frappe.msgprint(_("Comments cannot have links or email addresses")) | |||
return False | |||
@@ -48,6 +48,8 @@ TRANSLATE_PATTERN = re.compile( | |||
# END: JS context search | |||
r"[\s\n]*\)" # Closing function call ignore leading whitespace/newlines | |||
) | |||
REPORT_TRANSLATE_PATTERN = re.compile('"([^:,^"]*):') | |||
CSV_STRIP_WHITESPACE_PATTERN = re.compile(r"{\s?([0-9]+)\s?}") | |||
def get_language(lang_list: List = None) -> str: | |||
@@ -602,7 +604,7 @@ def get_messages_from_report(name): | |||
messages.extend( | |||
[ | |||
(None, message) | |||
for message in re.findall('"([^:,^"]*):', report.query) | |||
for message in REPORT_TRANSLATE_PATTERN.findall(report.query) | |||
if is_translatable(message) | |||
] | |||
) | |||
@@ -801,7 +803,7 @@ def write_csv_file(path, app_messages, lang_dict): | |||
t = lang_dict.get(message, "") | |||
# strip whitespaces | |||
translated_string = re.sub(r"{\s?([0-9]+)\s?}", r"{\g<1>}", t) | |||
translated_string = CSV_STRIP_WHITESPACE_PATTERN.sub(r"{\g<1>}", t) | |||
if translated_string: | |||
w.writerow([message, translated_string, context]) | |||
@@ -27,6 +27,16 @@ import frappe | |||
from frappe.utils.data import * | |||
from frappe.utils.html_utils import sanitize_html | |||
EMAIL_NAME_PATTERN = re.compile(r"[^A-Za-z0-9\u00C0-\u024F\/\_\' ]+") | |||
EMAIL_STRING_PATTERN = re.compile(r"([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)") | |||
NON_MD_HTML_PATTERN = re.compile(r"<p[\s]*>|<br[\s]*>") | |||
HTML_TAGS_PATTERN = re.compile(r"\<[^>]*\>") | |||
INCLUDE_DIRECTIVE_PATTERN = re.compile("""({% include ['"]([^'"]*)['"] %})""") | |||
PHONE_NUMBER_PATTERN = re.compile(r"([0-9\ \+\_\-\,\.\*\#\(\)]){1,20}$") | |||
PERSON_NAME_PATTERN = re.compile(r"^[\w][\w\'\-]*( \w[\w\'\-]*)*$") | |||
WHITESPACE_PATTERN = re.compile(r"[\t\n\r]") | |||
MULTI_EMAIL_STRING_PATTERN = re.compile(r'[,\n](?=(?:[^"]|"[^"]*")*$)') | |||
def get_fullname(user=None): | |||
"""get the full name (first name + last name) of the user from User""" | |||
@@ -116,7 +126,7 @@ def validate_phone_number(phone_number, throw=False): | |||
return False | |||
phone_number = phone_number.strip() | |||
match = re.match(r"([0-9\ \+\_\-\,\.\*\#\(\)]){1,20}$", phone_number) | |||
match = PHONE_NUMBER_PATTERN.match(phone_number) | |||
if not match and throw: | |||
frappe.throw( | |||
@@ -135,7 +145,7 @@ def validate_name(name, throw=False): | |||
return False | |||
name = name.strip() | |||
match = re.match(r"^[\w][\w\'\-]*( \w[\w\'\-]*)*$", name) | |||
match = PERSON_NAME_PATTERN.match(PERSON_NAME_PATTERN, name) | |||
if not match and throw: | |||
frappe.throw(frappe._("{0} is not a valid Name").format(name), frappe.InvalidNameError) | |||
@@ -201,8 +211,8 @@ def split_emails(txt): | |||
email_list = [] | |||
# emails can be separated by comma or newline | |||
s = re.sub(r"[\t\n\r]", " ", cstr(txt)) | |||
for email in re.split(r'[,\n](?=(?:[^"]|"[^"]*")*$)', s): | |||
s = WHITESPACE_PATTERN.sub(" ", cstr(txt)) | |||
for email in MULTI_EMAIL_STRING_PATTERN.split(s): | |||
email = strip(cstr(email)) | |||
if email: | |||
email_list.append(email) | |||
@@ -360,7 +370,7 @@ def remove_blanks(d): | |||
def strip_html_tags(text): | |||
"""Remove html tags from text""" | |||
return re.sub(r"\<[^>]*\>", "", text) | |||
return HTML_TAGS_PATTERN.sub("", text) | |||
def get_file_timestamp(fn): | |||
@@ -584,7 +594,7 @@ def get_html_format(print_path): | |||
with open(print_path, "r") as f: | |||
html_format = f.read() | |||
for include_directive, path in re.findall("""({% include ['"]([^'"]*)['"] %})""", html_format): | |||
for include_directive, path in INCLUDE_DIRECTIVE_PATTERN.findall(html_format): | |||
for app_name in frappe.get_installed_apps(): | |||
include_path = frappe.get_app_path(app_name, *path.split(os.path.sep)) | |||
if os.path.exists(include_path): | |||
@@ -601,7 +611,7 @@ def is_markdown(text): | |||
elif "<!-- html -->" in text: | |||
return False | |||
else: | |||
return not re.search(r"<p[\s]*>|<br[\s]*>", text) | |||
return not NON_MD_HTML_PATTERN.search(text) | |||
def get_sites(sites_path=None): | |||
@@ -670,8 +680,7 @@ def parse_addr(email_string): | |||
name = get_name_from_email_string(email_string, email, name) | |||
return (name, email) | |||
else: | |||
email_regex = re.compile(r"([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)") | |||
email_list = re.findall(email_regex, email_string) | |||
email_list = EMAIL_STRING_PATTERN.findall(email_string) | |||
if len(email_list) > 0 and check_format(email_list[0]): | |||
# take only first email address | |||
email = email_list[0] | |||
@@ -698,7 +707,7 @@ def check_format(email_id): | |||
def get_name_from_email_string(email_string, email_id, name): | |||
name = email_string.replace(email_id, "") | |||
name = re.sub(r"[^A-Za-z0-9\u00C0-\u024F\/\_\' ]+", "", name).strip() | |||
name = EMAIL_NAME_PATTERN.sub("", name).strip() | |||
if not name: | |||
name = email_id | |||
return name | |||
@@ -11,6 +11,8 @@ import git | |||
import frappe | |||
from frappe.utils import touch_file | |||
APP_TITLE_PATTERN = re.compile(r"^(?![\W])[^\d_\s][\w -]+$") | |||
def make_boilerplate(dest, app_name, no_git=False): | |||
if not os.path.exists(dest): | |||
@@ -67,7 +69,7 @@ def _get_user_inputs(app_name): | |||
def is_valid_title(title) -> bool: | |||
if not re.match(r"^(?![\W])[^\d_\s][\w -]+$", title, re.UNICODE): | |||
if not APP_TITLE_PATTERN.match(title, re.UNICODE): | |||
print( | |||
"App Title should start with a letter and it can only consist of letters, numbers, spaces and underscores" | |||
) | |||
@@ -29,7 +29,22 @@ if typing.TYPE_CHECKING: | |||
DATE_FORMAT = "%Y-%m-%d" | |||
TIME_FORMAT = "%H:%M:%S.%f" | |||
DATETIME_FORMAT = DATE_FORMAT + " " + TIME_FORMAT | |||
DATETIME_FORMAT = f"{DATE_FORMAT} {TIME_FORMAT}" | |||
TIMEDELTA_DAY_PATTERN = re.compile( | |||
r"(?P<days>[-\d]+) day[s]*, (?P<hours>\d+):(?P<minutes>\d+):(?P<seconds>\d[\.\d+]*)" | |||
) | |||
TIMEDELTA_BASE_PATTERN = re.compile(r"(?P<hours>\d+):(?P<minutes>\d+):(?P<seconds>\d[\.\d+]*)") | |||
URLS_HTTP_TAG_PATTERN = re.compile( | |||
r'(href|src){1}([\s]*=[\s]*[\'"]?)((?:http)[^\'">]+)([\'"]?)' | |||
) # href='https://... | |||
URLS_NOT_HTTP_TAG_PATTERN = re.compile( | |||
r'(href|src){1}([\s]*=[\s]*[\'"]?)((?!http)[^\'" >]+)([\'"]?)' | |||
) # href=/assets/... | |||
URL_NOTATION_PATTERN = re.compile( | |||
r'(:[\s]?url)(\([\'"]?)((?!http)[^\'" >]+)([\'"]?\))' | |||
) # background-image: url('/assets/...') | |||
DURATION_PATTERN = re.compile(r"^(?:(\d+d)?((^|\s)\d+h)?((^|\s)\d+m)?((^|\s)\d+s)?)$") | |||
HTML_TAG_PATTERN = re.compile("<[^>]+>") | |||
class Weekday(Enum): | |||
@@ -692,10 +707,7 @@ def duration_to_seconds(duration): | |||
def validate_duration_format(duration): | |||
import re | |||
is_valid_duration = re.match(r"^(?:(\d+d)?((^|\s)\d+h)?((^|\s)\d+m)?((^|\s)\d+s)?)$", duration) | |||
if not is_valid_duration: | |||
if not DURATION_PATTERN.match(duration): | |||
frappe.throw( | |||
frappe._("Value {0} must be in the valid duration format: d h m s").format( | |||
frappe.bold(duration) | |||
@@ -1297,7 +1309,7 @@ def in_words(integer: int, in_million=True) -> str: | |||
def is_html(text: str) -> bool: | |||
if not isinstance(text, str): | |||
return False | |||
return re.search("<[^>]+>", text) | |||
return HTML_TAG_PATTERN.search(text) | |||
def is_image(filepath: str) -> bool: | |||
@@ -1851,12 +1863,8 @@ def expand_relative_urls(html: str) -> str: | |||
return "".join(to_expand) | |||
html = re.sub( | |||
r'(href|src){1}([\s]*=[\s]*[\'"]?)((?!http)[^\'" >]+)([\'"]?)', _expand_relative_urls, html | |||
) | |||
# background-image: url('/assets/...') | |||
html = re.sub(r'(:[\s]?url)(\([\'"]?)((?!http)[^\'" >]+)([\'"]?\))', _expand_relative_urls, html) | |||
html = URLS_NOT_HTTP_TAG_PATTERN.sub(_expand_relative_urls, html) | |||
html = URL_NOTATION_PATTERN.sub(_expand_relative_urls, html) | |||
return html | |||
@@ -1870,7 +1878,7 @@ def quote_urls(html: str) -> str: | |||
groups[2] = quoted(groups[2]) | |||
return "".join(groups) | |||
return re.sub(r'(href|src){1}([\s]*=[\s]*[\'"]?)((?:http)[^\'">]+)([\'"]?)', _quote_url, html) | |||
return URLS_HTTP_TAG_PATTERN.sub(_quote_url, html) | |||
def unique(seq: typing.Sequence["T"]) -> List["T"]: | |||
@@ -1891,8 +1899,7 @@ def get_string_between(start: str, string: str, end: str) -> str: | |||
if not string: | |||
return "" | |||
regex = "{0}(.*){1}".format(start, end) | |||
out = re.search(regex, string) | |||
out = re.search(f"{start}(.*){end}", string) | |||
return out.group(1) if out else string | |||
@@ -2098,10 +2105,8 @@ def format_timedelta(o: datetime.timedelta) -> str: | |||
def parse_timedelta(s: str) -> datetime.timedelta: | |||
# ref: https://stackoverflow.com/a/21074460/10309266 | |||
if "day" in s: | |||
m = re.match( | |||
r"(?P<days>[-\d]+) day[s]*, (?P<hours>\d+):(?P<minutes>\d+):(?P<seconds>\d[\.\d+]*)", s | |||
) | |||
m = TIMEDELTA_DAY_PATTERN.match(s) | |||
else: | |||
m = re.match(r"(?P<hours>\d+):(?P<minutes>\d+):(?P<seconds>\d[\.\d+]*)", s) | |||
m = TIMEDELTA_BASE_PATTERN.match(s) | |||
return datetime.timedelta(**{key: float(val) for key, val in m.groupdict().items()}) |
@@ -20,6 +20,8 @@ from frappe.utils import ( | |||
formatdate, | |||
) | |||
BLOCK_TAGS_PATTERN = re.compile(r"(<br|<div|<p)") | |||
def format_value(value, df=None, doc=None, currency=None, translated=False, format=None): | |||
"""Format value based on given fieldtype, document reference, currency reference. | |||
@@ -97,7 +99,7 @@ def format_value(value, df=None, doc=None, currency=None, translated=False, form | |||
return "{}%".format(flt(value, 2)) | |||
elif df.get("fieldtype") in ("Text", "Small Text"): | |||
if not re.search(r"(<br|<div|<p)", value): | |||
if not BLOCK_TAGS_PATTERN.search(value): | |||
return frappe.safe_decode(value).replace("\n", "<br>") | |||
elif df.get("fieldtype") == "Markdown Editor": | |||
@@ -13,6 +13,8 @@ from frappe.utils import cint, strip_html_tags | |||
from frappe.utils.data import cstr | |||
from frappe.utils.html_utils import unescape_html | |||
HTML_TAGS_PATTERN = re.compile(r"(?s)<[\s]*(script|style).*?</\1>") | |||
def setup_global_search_table(): | |||
""" | |||
@@ -360,7 +362,7 @@ def get_formatted_value(value, field): | |||
if getattr(field, "fieldtype", None) in ["Text", "Text Editor"]: | |||
value = unescape_html(frappe.safe_decode(value)) | |||
value = re.subn(r"(?s)<[\s]*(script|style).*?</\1>", "", str(value))[0] | |||
value = HTML_TAGS_PATTERN.subn("", str(value))[0] | |||
value = " ".join(value.split()) | |||
return field.label + " : " + strip_html_tags(str(value)) | |||
@@ -5,6 +5,16 @@ from bleach_allowlist import bleach_allowlist | |||
import frappe | |||
EMOJI_PATTERN = re.compile( | |||
"(\ud83d[\ude00-\ude4f])|" | |||
"(\ud83c[\udf00-\uffff])|" | |||
"(\ud83d[\u0000-\uddff])|" | |||
"(\ud83d[\ude80-\udeff])|" | |||
"(\ud83c[\udde0-\uddff])" | |||
"+", | |||
flags=re.UNICODE, | |||
) | |||
def clean_html(html): | |||
import bleach | |||
@@ -181,28 +191,17 @@ def is_json(text): | |||
def get_icon_html(icon, small=False): | |||
from frappe.utils import is_image | |||
emoji_pattern = re.compile( | |||
"(\ud83d[\ude00-\ude4f])|" | |||
"(\ud83c[\udf00-\uffff])|" | |||
"(\ud83d[\u0000-\uddff])|" | |||
"(\ud83d[\ude80-\udeff])|" | |||
"(\ud83c[\udde0-\uddff])" | |||
"+", | |||
flags=re.UNICODE, | |||
) | |||
icon = icon or "" | |||
if icon and emoji_pattern.match(icon): | |||
return '<span class="text-muted">' + icon + "</span>" | |||
if icon and EMOJI_PATTERN.match(icon): | |||
return f'<span class="text-muted">{icon}</span>' | |||
if is_image(icon): | |||
return ( | |||
'<img style="width: 16px; height: 16px;" src="{icon}">'.format(icon=icon) | |||
if small | |||
else '<img src="{icon}">'.format(icon=icon) | |||
f'<img style="width: 16px; height: 16px;" src="{icon}">' if small else f'<img src="{icon}">' | |||
) | |||
else: | |||
return "<i class='{icon}'></i>".format(icon=icon) | |||
return f"<i class='{icon}'></i>" | |||
def unescape_html(value): | |||
@@ -177,9 +177,9 @@ def get_dictionary_match_feedback(match, is_sole_match): | |||
word = match.get("token") | |||
# Variations of the match like UPPERCASES | |||
if re.match(scoring.START_UPPER, word): | |||
if scoring.START_UPPER.match(word): | |||
suggestions.append(_("Capitalization doesn't help very much.")) | |||
elif re.match(scoring.ALL_UPPER, word): | |||
elif scoring.ALL_UPPER.match(word): | |||
suggestions.append(_("All-uppercase is almost as easy to guess as all-lowercase.")) | |||
# Match contains l33t speak substitutions | |||
@@ -40,7 +40,7 @@ def make_xlsx(data, sheet_name, wb=None, column_widths=None): | |||
if isinstance(item, str) and next(ILLEGAL_CHARACTERS_RE.finditer(value), None): | |||
# Remove illegal characters from the string | |||
value = re.sub(ILLEGAL_CHARACTERS_RE, "", value) | |||
value = ILLEGAL_CHARACTERS_RE.sub("", value) | |||
clean_row.append(value) | |||
@@ -19,6 +19,8 @@ from frappe.website.utils import ( | |||
) | |||
from frappe.website.website_generator import WebsiteGenerator | |||
H_TAG_PATTERN = re.compile("<h.>") | |||
class WebPage(WebsiteGenerator): | |||
def validate(self): | |||
@@ -114,7 +116,7 @@ class WebPage(WebsiteGenerator): | |||
context.header = context.title | |||
# add h1 tag to header | |||
if context.get("header") and not re.findall("<h.>", context.header): | |||
if context.get("header") and not H_TAG_PATTERN.findall(context.header): | |||
context.header = "<h1>" + context.header + "</h1>" | |||
# if title not set, set title from header | |||
@@ -15,6 +15,13 @@ from frappe import _ | |||
from frappe.model.document import Document | |||
from frappe.utils import md_to_html | |||
FRONTMATTER_PATTERN = re.compile(r"^\s*(?:---|\+\+\+)(.*?)(?:---|\+\+\+)\s*(.+)$", re.S | re.M) | |||
H1_TAG_PATTERN = re.compile("<h1>([^<]*)") | |||
IMAGE_TAG_PATTERN = re.compile(r"""<img[^>]*src\s?=\s?['"]([^'"]*)['"]""") | |||
CLEANUP_PATTERN_1 = re.compile(r'[~!@#$%^&*+()<>,."\'\?]') | |||
CLEANUP_PATTERN_2 = re.compile("[:/]") | |||
CLEANUP_PATTERN_3 = re.compile(r"(-)\1+") | |||
def delete_page_cache(path): | |||
cache = frappe.cache() | |||
@@ -29,7 +36,7 @@ def delete_page_cache(path): | |||
def find_first_image(html): | |||
m = re.finditer(r"""<img[^>]*src\s?=\s?['"]([^'"]*)['"]""", html) | |||
m = IMAGE_TAG_PATTERN.finditer(html) | |||
try: | |||
return next(m).groups()[0] | |||
except StopIteration: | |||
@@ -156,17 +163,17 @@ def is_signup_disabled(): | |||
return frappe.db.get_single_value("Website Settings", "disable_signup", True) | |||
def cleanup_page_name(title): | |||
def cleanup_page_name(title: str) -> str: | |||
"""make page name from title""" | |||
if not title: | |||
return "" | |||
name = title.lower() | |||
name = re.sub(r'[~!@#$%^&*+()<>,."\'\?]', "", name) | |||
name = re.sub("[:/]", "-", name) | |||
name = CLEANUP_PATTERN_1.sub("", name) | |||
name = CLEANUP_PATTERN_2.sub("-", name) | |||
name = "-".join(name.split()) | |||
# replace repeating hyphens | |||
name = re.sub(r"(-)\1+", r"\1", name) | |||
name = CLEANUP_PATTERN_3.sub(r"\1", name) | |||
return name[:140] | |||
@@ -287,8 +294,8 @@ def extract_title(source, path): | |||
if not title and "<h1>" in source: | |||
# extract title from h1 | |||
match = re.findall("<h1>([^<]*)", source) | |||
title_content = match[0].strip()[:300] | |||
match = H1_TAG_PATTERN.search(source).group() | |||
title_content = match.strip()[:300] | |||
if "{{" not in title_content: | |||
title = title_content | |||
@@ -308,17 +315,16 @@ def extract_title(source, path): | |||
return title | |||
def extract_comment_tag(source, tag): | |||
def extract_comment_tag(source: str, tag: str): | |||
"""Extract custom tags in comments from source. | |||
:param source: raw template source in HTML | |||
:param title: tag to search, example "title" | |||
""" | |||
if "<!-- {0}:".format(tag) in source: | |||
return re.findall("<!-- {0}:([^>]*) -->".format(tag), source)[0].strip() | |||
else: | |||
return None | |||
if f"<!-- {tag}:" in source: | |||
return re.search(f"<!-- {tag}:([^>]*) -->", source).group().strip() | |||
return None | |||
def get_html_content_based_on_type(doc, fieldname, content_type): | |||
@@ -378,7 +384,8 @@ def get_frontmatter(string): | |||
"Reference: https://github.com/jonbeebe/frontmatter" | |||
frontmatter = "" | |||
body = "" | |||
result = re.compile(r"^\s*(?:---|\+\+\+)(.*?)(?:---|\+\+\+)\s*(.+)$", re.S | re.M).search(string) | |||
result = FRONTMATTER_PATTERN.search(string) | |||
if result: | |||
frontmatter = result.group(1) | |||
body = result.group(2) | |||
@@ -10,6 +10,9 @@ import frappe.sessions | |||
from frappe import _ | |||
from frappe.utils.jinja_globals import is_rtl | |||
SCRIPT_TAG_PATTERN = re.compile(r"\<script[^<]*\</script\>") | |||
CLOSING_SCRIPT_TAG_PATTERN = re.compile(r"</script\>") | |||
def get_context(context): | |||
if frappe.session.user == "Guest": | |||
@@ -34,10 +37,10 @@ def get_context(context): | |||
boot_json = frappe.as_json(boot) | |||
# remove script tags from boot | |||
boot_json = re.sub(r"\<script[^<]*\</script\>", "", boot_json) | |||
boot_json = SCRIPT_TAG_PATTERN.sub("", boot_json) | |||
# TODO: Find better fix | |||
boot_json = re.sub(r"</script\>", "", boot_json) | |||
boot_json = CLOSING_SCRIPT_TAG_PATTERN.sub("", boot_json) | |||
context.update( | |||
{ | |||