Improve comment and string stripping

Make that part of the code more readable.

Add support for // line comments.

Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
This commit is contained in:
Gilles Peskine 2021-11-17 20:43:35 +01:00
parent 4f04d619b5
commit 44801627d2

View file

@ -457,6 +457,12 @@ class CodeParser():
return enum_consts return enum_consts
IGNORED_CHUNK_REGEX = re.compile('|'.join([
r'/\*.*?\*/', # block comment entirely on one line
r'//.*', # line comment
r'(?P<string>")(?:[^\\\"]|\\.)*"', # string literal
]))
def strip_comments_and_literals(self, line, in_block_comment): def strip_comments_and_literals(self, line, in_block_comment):
"""Strip comments and string literals from line. """Strip comments and string literals from line.
@ -476,15 +482,21 @@ class CodeParser():
if in_block_comment: if in_block_comment:
line = re.sub(r".*?\*/", r"", line, 1) line = re.sub(r".*?\*/", r"", line, 1)
in_block_comment = False in_block_comment = False
# Remove full comments and string literals
line = re.sub(r'/\*.*?\*/|(")(?:[^\\\"]|\\.)*"', # Remove full comments and string literals.
lambda s: '""' if s.group(1) else ' ', # Do it all together to handle cases like "/*" correctly.
# Note that continuation lines are not supported.
line = re.sub(self.IGNORED_CHUNK_REGEX,
lambda s: '""' if s.group('string') else ' ',
line) line)
# Start an unfinished comment? # Start an unfinished comment?
# (If `/*` was part of a complete comment, it's already been removed.)
m = re.match(r"/\*", line) m = re.match(r"/\*", line)
if m: if m:
in_block_comment = True in_block_comment = True
line = line[:m.end(0)] line = line[:m.end(0)]
return line, in_block_comment return line, in_block_comment
IDENTIFIER_REGEX = re.compile('|'.join([ IDENTIFIER_REGEX = re.compile('|'.join([