Use raw string + binary matching for URL regex.

Long URLs are allowed only if they are alone on their lines.

Signed-off-by: Mateusz Starzyk <mateusz.starzyk@mobica.com>
This commit is contained in:
Mateusz Starzyk 2021-03-25 14:06:50 +01:00
parent 9ee8166148
commit c8f4489fa5

View file

@ -201,6 +201,8 @@ class ChangeLog:
# a version that is not yet released. Something like "3.1a" is accepted. # a version that is not yet released. Something like "3.1a" is accepted.
_version_number_re = re.compile(br'[0-9]+\.[0-9A-Za-z.]+') _version_number_re = re.compile(br'[0-9]+\.[0-9A-Za-z.]+')
_incomplete_version_number_re = re.compile(br'.*\.[A-Za-z]') _incomplete_version_number_re = re.compile(br'.*\.[A-Za-z]')
_only_url_re = re.compile(br'^\s*\w+://\S+\s*$')
_has_url_re = re.compile(br'.*://.*')
def add_categories_from_text(self, filename, line_offset, def add_categories_from_text(self, filename, line_offset,
text, allow_unknown_category): text, allow_unknown_category):
@ -219,14 +221,18 @@ class ChangeLog:
category.name.decode('utf8')) category.name.decode('utf8'))
body_split = category.body.splitlines() body_split = category.body.splitlines()
re_has_url = re.compile('.*http[s]?://.*')
for line_number, line in enumerate(body_split, 1): for line_number, line in enumerate(body_split, 1):
if not re_has_url.match(line.decode('utf-8')) and \ if not self.__class__._only_url_re.match(line) and \
len(line) > MAX_LINE_LENGTH: len(line) > MAX_LINE_LENGTH:
long_url_msg = '. URL exceeding length limit must be ' \
'alone in it\'s line.' if \
self.__class__._has_url_re.match(line) else ""
raise InputFormatError(filename, raise InputFormatError(filename,
category.body_line + line_number, category.body_line + line_number,
'Line is longer than allowed: Length {} (Max {})', 'Line is longer than allowed: '
len(line), MAX_LINE_LENGTH) 'Length {} (Max {}){}',
len(line), MAX_LINE_LENGTH,
long_url_msg)
self.categories[category.name] += category.body self.categories[category.name] += category.body