Lift some code out of parse_identifiers

Make parse_identifiers less complex. Pylint was complaining that it had too
many local variables, and it had a point.

* Lift the constants identifier_regex and exclusion_lines to class
  constants (renamed to uppercase because they're constants).
* Lift the per-file loop into a new function parse_identifiers_in_file.

No intended behavior change.

Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
This commit is contained in:
Gilles Peskine 2021-11-16 20:56:47 +01:00
parent 7493c4017a
commit b3f4dd5c81

View file

@ -457,19 +457,7 @@ class CodeParser():
return enum_consts return enum_consts
def parse_identifiers(self, include, exclude=None): IDENTIFIER_REGEX = re.compile(
"""
Parse all lines of a header where a function/enum/struct/union/typedef
identifier is declared, based on some regex and heuristics. Highly
dependent on formatting style.
Args:
* include: A List of glob expressions to look for files through.
* exclude: A List of glob expressions for excluding files.
Returns a List of Match objects with identifiers.
"""
identifier_regex = re.compile(
# Match " something(a" or " *something(a". Functions. # Match " something(a" or " *something(a". Functions.
# Assumptions: # Assumptions:
# - function definition from return type to one of its arguments is # - function definition from return type to one of its arguments is
@ -485,7 +473,7 @@ class CodeParser():
r"}? *(\w+)[;[].*" r"}? *(\w+)[;[].*"
) )
# The regex below is indented for clarity. # The regex below is indented for clarity.
exclusion_lines = re.compile( EXCLUSION_LINES = re.compile(
r"^(" r"^("
r"extern +\"C\"|" # pylint: disable=bad-continuation r"extern +\"C\"|" # pylint: disable=bad-continuation
r"(typedef +)?(struct|union|enum)( *{)?$|" r"(typedef +)?(struct|union|enum)( *{)?$|"
@ -496,11 +484,15 @@ class CodeParser():
r")" r")"
) )
files = self.get_files(include, exclude) def parse_identifiers_in_file(self, header_file, identifiers):
self.log.debug("Looking for identifiers in {} files".format(len(files))) """
Parse all lines of a header where a function/enum/struct/union/typedef
identifier is declared, based on some regex and heuristics. Highly
dependent on formatting style.
Append found matches to the list ``identifiers``.
"""
identifiers = []
for header_file in files:
with open(header_file, "r", encoding="utf-8") as header: with open(header_file, "r", encoding="utf-8") as header:
in_block_comment = False in_block_comment = False
# The previous line variable is used for concatenating lines # The previous line variable is used for concatenating lines
@ -523,7 +515,7 @@ class CodeParser():
in_block_comment = True in_block_comment = True
line = line[:m.end(0)] line = line[:m.end(0)]
if exclusion_lines.search(line): if self.EXCLUSION_LINES.search(line):
previous_line = "" previous_line = ""
continue continue
@ -547,7 +539,7 @@ class CodeParser():
if line[0] == " ": if line[0] == " ":
continue continue
identifier = identifier_regex.search(line) identifier = self.IDENTIFIER_REGEX.search(line)
if not identifier: if not identifier:
continue continue
@ -564,6 +556,26 @@ class CodeParser():
identifier.span(), identifier.span(),
group)) group))
def parse_identifiers(self, include, exclude=None):
"""
Parse all lines of a header where a function/enum/struct/union/typedef
identifier is declared, based on some regex and heuristics. Highly
dependent on formatting style.
Args:
* include: A List of glob expressions to look for files through.
* exclude: A List of glob expressions for excluding files.
Returns a List of Match objects with identifiers.
"""
files = self.get_files(include, exclude)
self.log.debug("Looking for identifiers in {} files".format(len(files)))
identifiers = []
for header_file in files:
self.parse_identifiers_in_file(header_file, identifiers)
return identifiers return identifiers
def parse_symbols(self): def parse_symbols(self):