From 791c40c5229fdd611855a055353617fc5cbe2d17 Mon Sep 17 00:00:00 2001
From: Gilles Peskine <Gilles.Peskine@arm.com>
Date: Tue, 18 May 2021 14:39:40 +0200
Subject: [PATCH 01/10] Switch assemble_changelog to using text strings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Changelog contents should be UTF-8 text files. There's no need to be
binary-safe. So switch to using text strings in Python (str, not bytes). This
commit makes the following changes:
* Bytes literals (b'…') to string literals ('…').
* Subprocess output (which is all git information) is decoded as ascii.
* Inject text directly in exceptions rather than calling a decode method.

This is enough to make the script work as desired in a UTF-8 locale.

Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
---
 scripts/assemble_changelog.py | 78 +++++++++++++++++------------------
 1 file changed, 39 insertions(+), 39 deletions(-)

diff --git a/scripts/assemble_changelog.py b/scripts/assemble_changelog.py
index 56d6c37e1..169995523 100755
--- a/scripts/assemble_changelog.py
+++ b/scripts/assemble_changelog.py
@@ -63,15 +63,15 @@ class LostContent(Exception):
 # The category names we use in the changelog.
 # If you edit this, update ChangeLog.d/README.md.
 STANDARD_CATEGORIES = (
-    b'API changes',
-    b'Default behavior changes',
-    b'Requirement changes',
-    b'New deprecations',
-    b'Removals',
-    b'Features',
-    b'Security',
-    b'Bugfix',
-    b'Changes',
+    'API changes',
+    'Default behavior changes',
+    'Requirement changes',
+    'New deprecations',
+    'Removals',
+    'Features',
+    'Security',
+    'Bugfix',
+    'Changes',
 )
 
 # The maximum line length for an entry
@@ -122,13 +122,13 @@ class ChangelogFormat:
 class TextChangelogFormat(ChangelogFormat):
     """The traditional Mbed TLS changelog format."""
 
-    _unreleased_version_text = b'= mbed TLS x.x.x branch released xxxx-xx-xx'
+    _unreleased_version_text = '= mbed TLS x.x.x branch released xxxx-xx-xx'
     @classmethod
     def is_released_version(cls, title):
         # Look for an incomplete release date
-        return not re.search(br'[0-9x]{4}-[0-9x]{2}-[0-9x]?x', title)
+        return not re.search(r'[0-9x]{4}-[0-9x]{2}-[0-9x]?x', title)
 
-    _top_version_re = re.compile(br'(?:\A|\n)(=[^\n]*\n+)(.*?\n)(?:=|$)',
+    _top_version_re = re.compile(r'(?:\A|\n)(=[^\n]*\n+)(.*?\n)(?:=|$)',
                                  re.DOTALL)
     @classmethod
     def extract_top_version(cls, changelog_file_content):
@@ -140,17 +140,17 @@ class TextChangelogFormat(ChangelogFormat):
         top_version_body = m.group(2)
         if cls.is_released_version(top_version_title):
             top_version_end = top_version_start
-            top_version_title = cls._unreleased_version_text + b'\n\n'
-            top_version_body = b''
+            top_version_title = cls._unreleased_version_text + '\n\n'
+            top_version_body = ''
         return (changelog_file_content[:top_version_start],
                 top_version_title, top_version_body,
                 changelog_file_content[top_version_end:])
 
     @classmethod
     def version_title_text(cls, version_title):
-        return re.sub(br'\n.*', version_title, re.DOTALL)
+        return re.sub(r'\n.*', version_title, re.DOTALL)
 
-    _category_title_re = re.compile(br'(^\w.*)\n+', re.MULTILINE)
+    _category_title_re = re.compile(r'(^\w.*)\n+', re.MULTILINE)
     @classmethod
     def split_categories(cls, version_body):
         """A category title is a line with the title in column 0."""
@@ -163,10 +163,10 @@ class TextChangelogFormat(ChangelogFormat):
         title_starts = [m.start(1) for m in title_matches]
         body_starts = [m.end(0) for m in title_matches]
         body_ends = title_starts[1:] + [len(version_body)]
-        bodies = [version_body[body_start:body_end].rstrip(b'\n') + b'\n'
+        bodies = [version_body[body_start:body_end].rstrip('\n') + '\n'
                   for (body_start, body_end) in zip(body_starts, body_ends)]
-        title_lines = [version_body[:pos].count(b'\n') for pos in title_starts]
-        body_lines = [version_body[:pos].count(b'\n') for pos in body_starts]
+        title_lines = [version_body[:pos].count('\n') for pos in title_starts]
+        body_lines = [version_body[:pos].count('\n') for pos in body_starts]
         return [CategoryContent(title_match.group(1), title_line,
                                 body, body_line)
                 for title_match, title_line, body, body_line
@@ -176,9 +176,9 @@ class TextChangelogFormat(ChangelogFormat):
     def format_category(cls, title, body):
         # `split_categories` ensures that each body ends with a newline.
         # Make sure that there is additionally a blank line between categories.
-        if not body.endswith(b'\n\n'):
-            body += b'\n'
-        return title + b'\n' + body
+        if not body.endswith('\n\n'):
+            body += '\n'
+        return title + '\n' + body
 
 class ChangeLog:
     """An Mbed TLS changelog.
@@ -199,10 +199,10 @@ class ChangeLog:
     # Only accept dotted version numbers (e.g. "3.1", not "3").
     # Refuse ".x" in a version number where x is a letter: this indicates
     # a version that is not yet released. Something like "3.1a" is accepted.
-    _version_number_re = re.compile(br'[0-9]+\.[0-9A-Za-z.]+')
-    _incomplete_version_number_re = re.compile(br'.*\.[A-Za-z]')
-    _only_url_re = re.compile(br'^\s*\w+://\S+\s*$')
-    _has_url_re = re.compile(br'.*://.*')
+    _version_number_re = re.compile(r'[0-9]+\.[0-9A-Za-z.]+')
+    _incomplete_version_number_re = re.compile(r'.*\.[A-Za-z]')
+    _only_url_re = re.compile(r'^\s*\w+://\S+\s*$')
+    _has_url_re = re.compile(r'.*://.*')
 
     def add_categories_from_text(self, filename, line_offset,
                                  text, allow_unknown_category):
@@ -218,7 +218,7 @@ class ChangeLog:
                 raise InputFormatError(filename,
                                        line_offset + category.title_line,
                                        'Unknown category: "{}"',
-                                       category.name.decode('utf8'))
+                                       category.name)
 
             body_split = category.body.splitlines()
 
@@ -250,8 +250,8 @@ class ChangeLog:
         # Split the top version section into categories.
         self.categories = OrderedDict()
         for category in STANDARD_CATEGORIES:
-            self.categories[category] = b''
-        offset = (self.header + self.top_version_title).count(b'\n') + 1
+            self.categories[category] = ''
+        offset = (self.header + self.top_version_title).count('\n') + 1
         self.add_categories_from_text(input_stream.name, offset,
                                       top_version_body, True)
 
@@ -264,7 +264,7 @@ class ChangeLog:
     def write(self, filename):
         """Write the changelog to the specified file.
         """
-        with open(filename, 'wb') as out:
+        with open(filename, 'w') as out:
             out.write(self.header)
             out.write(self.top_version_title)
             for title, body in self.categories.items():
@@ -303,7 +303,7 @@ class EntryFileSortKey:
         hashes = subprocess.check_output(['git', 'log', '--format=%H',
                                           '--follow',
                                           '--', filename])
-        m = re.search(b'(.+)$', hashes)
+        m = re.search('(.+)$', hashes.decode('ascii'))
         if not m:
             # The git output is empty. This means that the file was
             # never checked in.
@@ -320,8 +320,8 @@ class EntryFileSortKey:
         """
         text = subprocess.check_output(['git', 'rev-list',
                                         '--merges', *options,
-                                        b'..'.join([some_hash, target])])
-        return text.rstrip(b'\n').split(b'\n')
+                                        '..'.join([some_hash, target])])
+        return text.decode('ascii').rstrip('\n').split('\n')
 
     @classmethod
     def merge_hash(cls, some_hash):
@@ -329,7 +329,7 @@ class EntryFileSortKey:
 
         Return None if the given commit was never merged.
         """
-        target = b'HEAD'
+        target = 'HEAD'
         # List the merges from some_hash to the target in two ways.
         # The ancestry list is the ones that are both descendants of
         # some_hash and ancestors of the target.
@@ -407,12 +407,12 @@ def check_output(generated_output_file, main_input_file, merged_files):
     is also present in an output file. This is not perfect but good enough
     for now.
     """
-    generated_output = set(open(generated_output_file, 'rb'))
-    for line in open(main_input_file, 'rb'):
+    generated_output = set(open(generated_output_file, 'r'))
+    for line in open(main_input_file, 'r'):
         if line not in generated_output:
             raise LostContent('original file', line)
     for merged_file in merged_files:
-        for line in open(merged_file, 'rb'):
+        for line in open(merged_file, 'r'):
             if line not in generated_output:
                 raise LostContent(merged_file, line)
 
@@ -455,14 +455,14 @@ def merge_entries(options):
     Write the new changelog to options.output.
     Remove the merged entries if options.keep_entries is false.
     """
-    with open(options.input, 'rb') as input_file:
+    with open(options.input, 'r') as input_file:
         changelog = ChangeLog(input_file, TextChangelogFormat)
     files_to_merge = list_files_to_merge(options)
     if not files_to_merge:
         sys.stderr.write('There are no pending changelog entries.\n')
         return
     for filename in files_to_merge:
-        with open(filename, 'rb') as input_file:
+        with open(filename, 'r') as input_file:
             changelog.add_file(input_file)
     finish_output(changelog, options.output, options.input, files_to_merge)
     if not options.keep_entries:

From 9c6187d8ad6b15fda5d655de9caa459681ec3749 Mon Sep 17 00:00:00 2001
From: Gilles Peskine <Gilles.Peskine@arm.com>
Date: Tue, 18 May 2021 14:49:02 +0200
Subject: [PATCH 02/10] Explicitly use UTF-8 in assemble_changelog

Changelog contents should be UTF-8 text files. So explicitly open all files as
UTF-8. This makes the script independent of the ambient locale (except with
respect to exception messages, but we can live with that).

Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
---
 scripts/assemble_changelog.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/scripts/assemble_changelog.py b/scripts/assemble_changelog.py
index 169995523..b8a63c9e9 100755
--- a/scripts/assemble_changelog.py
+++ b/scripts/assemble_changelog.py
@@ -264,7 +264,7 @@ class ChangeLog:
     def write(self, filename):
         """Write the changelog to the specified file.
         """
-        with open(filename, 'w') as out:
+        with open(filename, 'w', encoding='utf-8') as out:
             out.write(self.header)
             out.write(self.top_version_title)
             for title, body in self.categories.items():
@@ -407,12 +407,12 @@ def check_output(generated_output_file, main_input_file, merged_files):
     is also present in an output file. This is not perfect but good enough
     for now.
     """
-    generated_output = set(open(generated_output_file, 'r'))
-    for line in open(main_input_file, 'r'):
+    generated_output = set(open(generated_output_file, 'r', encoding='utf-8'))
+    for line in open(main_input_file, 'r', encoding='utf-8'):
         if line not in generated_output:
             raise LostContent('original file', line)
     for merged_file in merged_files:
-        for line in open(merged_file, 'r'):
+        for line in open(merged_file, 'r', encoding='utf-8'):
             if line not in generated_output:
                 raise LostContent(merged_file, line)
 
@@ -455,14 +455,14 @@ def merge_entries(options):
     Write the new changelog to options.output.
     Remove the merged entries if options.keep_entries is false.
     """
-    with open(options.input, 'r') as input_file:
+    with open(options.input, 'r', encoding='utf-8') as input_file:
         changelog = ChangeLog(input_file, TextChangelogFormat)
     files_to_merge = list_files_to_merge(options)
     if not files_to_merge:
         sys.stderr.write('There are no pending changelog entries.\n')
         return
     for filename in files_to_merge:
-        with open(filename, 'r') as input_file:
+        with open(filename, 'r', encoding='utf-8') as input_file:
             changelog.add_file(input_file)
     finish_output(changelog, options.output, options.input, files_to_merge)
     if not options.keep_entries:

From 793778f6d6861fc05165f2ecd9b7f4237cff344e Mon Sep 17 00:00:00 2001
From: Gilles Peskine <Gilles.Peskine@arm.com>
Date: Fri, 23 Apr 2021 16:32:32 +0200
Subject: [PATCH 03/10] Make the API/ABI check optional

This way we can add other checks and only run a subset of all the
checks. The default remains to run all the checks.

I made separate options for API and ABI, but since we use the same
tool for both and it doesn't have an obvious way to check only API or
only ABI, the two options must be both enabled or both disabled.

Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
---
 scripts/abi_check.py | 38 +++++++++++++++++++++++++++++++-------
 1 file changed, 31 insertions(+), 7 deletions(-)

diff --git a/scripts/abi_check.py b/scripts/abi_check.py
index 3cfd95a06..c8dad27d8 100755
--- a/scripts/abi_check.py
+++ b/scripts/abi_check.py
@@ -51,6 +51,8 @@ class AbiChecker:
         configuration.report_dir: directory for output files
         configuration.keep_all_reports: if false, delete old reports
         configuration.brief: if true, output shorter report to stdout
+        configuration.check_api: if true, compare ABIs
+        configuration.check_api: if true, compare APIs
         configuration.skip_file: path to file containing symbols and types to skip
         """
         self.repo_path = "."
@@ -64,6 +66,10 @@ class AbiChecker:
         self.old_version = old_version
         self.new_version = new_version
         self.skip_file = configuration.skip_file
+        self.check_abi = configuration.check_abi
+        self.check_api = configuration.check_api
+        if self.check_abi != self.check_api:
+            raise Exception('Checking API without ABI or vice versa is not supported')
         self.brief = configuration.brief
         self.git_command = "git"
         self.make_command = "make"
@@ -222,8 +228,9 @@ class AbiChecker:
         """Generate the ABI dumps for the specified git revision."""
         git_worktree_path = self._get_clean_worktree_for_git_revision(version)
         self._update_git_submodules(git_worktree_path, version)
-        self._build_shared_libraries(git_worktree_path, version)
-        self._get_abi_dumps_from_shared_libraries(version)
+        if self.check_abi:
+            self._build_shared_libraries(git_worktree_path, version)
+            self._get_abi_dumps_from_shared_libraries(version)
         self._cleanup_worktree(git_worktree_path)
 
     def _remove_children_with_tag(self, parent, tag):
@@ -310,11 +317,14 @@ class AbiChecker:
             self._pretty_revision(self.new_version)
         )]
         compliance_return_code = 0
-        shared_modules = list(set(self.old_version.modules.keys()) &
-                              set(self.new_version.modules.keys()))
-        for mbed_module in shared_modules:
-            if not self._is_library_compatible(mbed_module,
-                                               compatibility_report):
+        if self.check_abi:
+            shared_modules = list(set(self.old_version.modules.keys()) &
+                                  set(self.new_version.modules.keys()))
+            for mbed_module in shared_modules:
+                if not self._is_library_compatible(mbed_module,
+                                                   compatibility_report):
+                    compliance_return_code = 1
+
                 compliance_return_code = 1
         for version in [self.old_version, self.new_version]:
             for mbed_module, mbed_module_dump in version.abi_dumps.items():
@@ -397,6 +407,18 @@ def run_main():
                   "(typically \"-s identifiers\" after running "
                   "\"tests/scripts/list-identifiers.sh --internal\")")
         )
+        parser.add_argument(
+            "--check-abi",
+            action='store_true', default=True,
+            help="Perform ABI comparison (default: yes)"
+        )
+        parser.add_argument("--no-check-abi", action='store_false', dest='check_abi')
+        parser.add_argument(
+            "--check-api",
+            action='store_true', default=True,
+            help="Perform API comparison (default: yes)"
+        )
+        parser.add_argument("--no-check-api", action='store_false', dest='check_api')
         parser.add_argument(
             "-b", "--brief", action="store_true",
             help="output only the list of issues to stdout, instead of a full report",
@@ -430,6 +452,8 @@ def run_main():
             report_dir=abi_args.report_dir,
             keep_all_reports=abi_args.keep_all_reports,
             brief=abi_args.brief,
+            check_abi=abi_args.check_abi,
+            check_api=abi_args.check_api,
             skip_file=abi_args.skip_file
         )
         abi_check = AbiChecker(old_version, new_version, configuration)

From cfd4fae89d1b6689bafa6e9716c8114fb2c2331b Mon Sep 17 00:00:00 2001
From: Gilles Peskine <Gilles.Peskine@arm.com>
Date: Fri, 23 Apr 2021 16:37:12 +0200
Subject: [PATCH 04/10] Add storage format checks to the interface checker

Expand abi_check.py to look for backward incompatibilities not only in
the interface exposed to application code (and to some extent driver
code), but also to the interface exposed via the storage format, which
is relevant when upgrading Mbed TLS on a device with a PSA keystore.

Strictly speaking, the storage format checks look for regressions in
the automatically generated storage format test data. Incompatible
changes that are not covered by the generated tests will also not be
covered by the interface checker.

A known defect in this commit is that the --brief output is not brief
for storage format checks.

Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
---
 scripts/abi_check.py                | 129 ++++++++++++++++++++++++++--
 tests/scripts/generate_psa_tests.py |   2 +
 2 files changed, 126 insertions(+), 5 deletions(-)

diff --git a/scripts/abi_check.py b/scripts/abi_check.py
index c8dad27d8..2051c3eae 100755
--- a/scripts/abi_check.py
+++ b/scripts/abi_check.py
@@ -2,13 +2,21 @@
 """
 Purpose
 
-This script is a small wrapper around the abi-compliance-checker and
-abi-dumper tools, applying them to compare the ABI and API of the library
-files from two different Git revisions within an Mbed TLS repository.
+This script compares the interfaces of two versions of Mbed TLS, looking
+for backward incompatibilities between two different Git revisions within
+an Mbed TLS repository. It must be run from the root of a Git working tree.
+
+For the source (API) and runtime (ABI) interface compatibility, this script
+is a small wrapper around the abi-compliance-checker and abi-dumper tools,
+applying them to compare the header and library files.
+
+For the storage format, this script compares the automatically generated
+storage tests, and complains if there is a reduction in coverage.
+
 The results of the comparison are either formatted as HTML and stored at
 a configurable location, or are given as a brief list of problems.
 Returns 0 on success, 1 on ABI/API non-compliance, and 2 if there is an error
-while running the script. Note: must be run from Mbed TLS root.
+while running the script.
 """
 
 # Copyright The Mbed TLS Contributors
@@ -27,6 +35,7 @@ while running the script. Note: must be run from Mbed TLS root.
 # limitations under the License.
 
 import os
+import re
 import sys
 import traceback
 import shutil
@@ -53,6 +62,7 @@ class AbiChecker:
         configuration.brief: if true, output shorter report to stdout
         configuration.check_api: if true, compare ABIs
         configuration.check_api: if true, compare APIs
+        configuration.check_storage: if true, compare storage format tests
         configuration.skip_file: path to file containing symbols and types to skip
         """
         self.repo_path = "."
@@ -70,6 +80,7 @@ class AbiChecker:
         self.check_api = configuration.check_api
         if self.check_abi != self.check_api:
             raise Exception('Checking API without ABI or vice versa is not supported')
+        self.check_storage_tests = configuration.check_storage
         self.brief = configuration.brief
         self.git_command = "git"
         self.make_command = "make"
@@ -214,6 +225,65 @@ class AbiChecker:
             self.log.debug(abi_dump_output.decode("utf-8"))
             version.abi_dumps[mbed_module] = output_path
 
+    @staticmethod
+    def _normalize_storage_test_case_data(line):
+        """Eliminate cosmetic or irrelevant details in storage format test cases."""
+        line = re.sub(r'\s+', r'', line)
+        return line
+
+    def _read_storage_tests(self, directory, filename, storage_tests):
+        """Record storage tests from the given file.
+
+        Populate the storage_tests dictionary with test cases read from
+        filename under directory.
+        """
+        at_paragraph_start = True
+        description = None
+        full_path = os.path.join(directory, filename)
+        for line_number, line in enumerate(open(full_path), 1):
+            line = line.strip()
+            if not line:
+                at_paragraph_start = True
+                continue
+            if line.startswith('#'):
+                continue
+            if at_paragraph_start:
+                description = line.strip()
+                at_paragraph_start = False
+                continue
+            if line.startswith('depends_on:'):
+                continue
+            # We've reached a test case data line
+            test_case_data = self._normalize_storage_test_case_data(line)
+            metadata = SimpleNamespace(
+                filename=filename,
+                line_number=line_number,
+                description=description
+            )
+            storage_tests[test_case_data] = metadata
+
+    def _get_storage_format_tests(self, version, git_worktree_path):
+        """Generate and record the storage format tests for the specified git version.
+
+        The version must be checked out at git_worktree_path.
+        """
+        full_test_list = subprocess.check_output(
+            ['tests/scripts/generate_psa_tests.py', '--list'],
+            cwd=git_worktree_path,
+        ).decode('ascii')
+        storage_test_list = [test_file
+                             for test_file in full_test_list.split()
+                             # If you change the following condition, update
+                             # generate_psa_tests.py accordingly.
+                             if 'storage_format' in test_file]
+        subprocess.check_call(
+            ['tests/scripts/generate_psa_tests.py'] + storage_test_list,
+            cwd=git_worktree_path,
+        )
+        for test_file in storage_test_list:
+            self._read_storage_tests(git_worktree_path, test_file,
+                                     version.storage_tests)
+
     def _cleanup_worktree(self, git_worktree_path):
         """Remove the specified git worktree."""
         shutil.rmtree(git_worktree_path)
@@ -225,12 +295,14 @@ class AbiChecker:
         self.log.debug(worktree_output.decode("utf-8"))
 
     def _get_abi_dump_for_ref(self, version):
-        """Generate the ABI dumps for the specified git revision."""
+        """Generate the interface information for the specified git revision."""
         git_worktree_path = self._get_clean_worktree_for_git_revision(version)
         self._update_git_submodules(git_worktree_path, version)
         if self.check_abi:
             self._build_shared_libraries(git_worktree_path, version)
             self._get_abi_dumps_from_shared_libraries(version)
+        if self.check_storage_tests:
+            self._get_storage_format_tests(version, git_worktree_path)
         self._cleanup_worktree(git_worktree_path)
 
     def _remove_children_with_tag(self, parent, tag):
@@ -308,6 +380,37 @@ class AbiChecker:
             os.remove(output_path)
         return True
 
+    @staticmethod
+    def _is_storage_format_compatible(old_tests, new_tests,
+                                      compatibility_report):
+        """Check whether all tests present in old_tests are also in new_tests.
+
+        Append a message regarding compatibility to compatibility_report.
+        """
+        missing = frozenset(old_tests.keys()).difference(new_tests.keys())
+        for test_data in sorted(missing):
+            metadata = old_tests[test_data]
+            compatibility_report.append(
+                'Test case from {} line {} "{}" has disappeared: {}'.format(
+                    metadata.filename, metadata.line_number,
+                    metadata.description, test_data
+                )
+            )
+        compatibility_report.append(
+            'FAIL: {}/{} storage format test cases have changed or disappeared.'.format(
+                len(missing), len(old_tests)
+            ) if missing else
+            'PASS: All {} storage format test cases are preserved.'.format(
+                len(old_tests)
+            )
+        )
+        compatibility_report.append(
+            'Info: number of storage format tests cases: {} -> {}.'.format(
+                len(old_tests), len(new_tests)
+            )
+        )
+        return not missing
+
     def get_abi_compatibility_report(self):
         """Generate a report of the differences between the reference ABI
         and the new ABI. ABI dumps from self.old_version and self.new_version
@@ -317,6 +420,7 @@ class AbiChecker:
             self._pretty_revision(self.new_version)
         )]
         compliance_return_code = 0
+
         if self.check_abi:
             shared_modules = list(set(self.old_version.modules.keys()) &
                                   set(self.new_version.modules.keys()))
@@ -325,7 +429,13 @@ class AbiChecker:
                                                    compatibility_report):
                     compliance_return_code = 1
 
+        if self.check_storage_tests:
+            if not self._is_storage_format_compatible(
+                    self.old_version.storage_tests,
+                    self.new_version.storage_tests,
+                    compatibility_report):
                 compliance_return_code = 1
+
         for version in [self.old_version, self.new_version]:
             for mbed_module, mbed_module_dump in version.abi_dumps.items():
                 os.remove(mbed_module_dump)
@@ -419,6 +529,12 @@ def run_main():
             help="Perform API comparison (default: yes)"
         )
         parser.add_argument("--no-check-api", action='store_false', dest='check_api')
+        parser.add_argument(
+            "--check-storage",
+            action='store_true', default=True,
+            help="Perform storage tests comparison (default: yes)"
+        )
+        parser.add_argument("--no-check-storage", action='store_false', dest='check_storage')
         parser.add_argument(
             "-b", "--brief", action="store_true",
             help="output only the list of issues to stdout, instead of a full report",
@@ -435,6 +551,7 @@ def run_main():
             crypto_repository=abi_args.old_crypto_repo,
             crypto_revision=abi_args.old_crypto_rev,
             abi_dumps={},
+            storage_tests={},
             modules={}
         )
         new_version = SimpleNamespace(
@@ -445,6 +562,7 @@ def run_main():
             crypto_repository=abi_args.new_crypto_repo,
             crypto_revision=abi_args.new_crypto_rev,
             abi_dumps={},
+            storage_tests={},
             modules={}
         )
         configuration = SimpleNamespace(
@@ -454,6 +572,7 @@ def run_main():
             brief=abi_args.brief,
             check_abi=abi_args.check_abi,
             check_api=abi_args.check_api,
+            check_storage=abi_args.check_storage,
             skip_file=abi_args.skip_file
         )
         abi_check = AbiChecker(old_version, new_version, configuration)
diff --git a/tests/scripts/generate_psa_tests.py b/tests/scripts/generate_psa_tests.py
index af25feb63..89f8ce1a8 100755
--- a/tests/scripts/generate_psa_tests.py
+++ b/tests/scripts/generate_psa_tests.py
@@ -723,6 +723,8 @@ class TestGenerator:
         filename = self.filename_for(basename)
         test_case.write_data_file(filename, test_cases)
 
+    # Note that targets whose name containns 'test_format' have their content
+    # validated by `abi_check.py`.
     TARGETS = {
         'test_suite_psa_crypto_generate_key.generated':
         lambda info: KeyGenerate(info).test_cases_for_key_generation(),

From 2eae8d7c4099a827cefb9f1207c851d239399375 Mon Sep 17 00:00:00 2001
From: Gilles Peskine <Gilles.Peskine@arm.com>
Date: Tue, 22 Feb 2022 19:02:44 +0100
Subject: [PATCH 05/10] Look at manually written read tests as well

The storage format comparison has a dual purpose: detect format changes that
lead to a loss of backward compatibility, and detect loss of test coverage.
For loss of backward compatibility, the read tests are the relevant ones.
For loss of test coverage, all generated test cases are potentially
relevant, but this script currently focuses on storage format (where a loss
of test coverage may be a symptom of a loss of backward compatibility).

Therefore, storage format test comparison now looks at manually written
storage format tests, but only if they're read tests.

Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
---
 scripts/abi_check.py | 70 ++++++++++++++++++++++++++++++++------------
 1 file changed, 51 insertions(+), 19 deletions(-)

diff --git a/scripts/abi_check.py b/scripts/abi_check.py
index 2051c3eae..b983ba405 100755
--- a/scripts/abi_check.py
+++ b/scripts/abi_check.py
@@ -11,11 +11,15 @@ is a small wrapper around the abi-compliance-checker and abi-dumper tools,
 applying them to compare the header and library files.
 
 For the storage format, this script compares the automatically generated
-storage tests, and complains if there is a reduction in coverage.
+storage tests and the manual read tests, and complains if there is a
+reduction in coverage. A change in test data will be signalled as a
+coverage reduction since the old test data is no longer present. A change in
+how test data is presented will be signalled as well; this would be a false
+positive.
 
-The results of the comparison are either formatted as HTML and stored at
-a configurable location, or are given as a brief list of problems.
-Returns 0 on success, 1 on ABI/API non-compliance, and 2 if there is an error
+The results of the API/ABI comparison are either formatted as HTML and stored
+at a configurable location, or are given as a brief list of problems.
+Returns 0 on success, 1 on non-compliance, and 2 if there is an error
 while running the script.
 """
 
@@ -34,6 +38,7 @@ while running the script.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import glob
 import os
 import re
 import sys
@@ -231,7 +236,11 @@ class AbiChecker:
         line = re.sub(r'\s+', r'', line)
         return line
 
-    def _read_storage_tests(self, directory, filename, storage_tests):
+    def _read_storage_tests(self,
+                            directory,
+                            filename,
+                            is_generated,
+                            storage_tests):
         """Record storage tests from the given file.
 
         Populate the storage_tests dictionary with test cases read from
@@ -255,6 +264,11 @@ class AbiChecker:
                 continue
             # We've reached a test case data line
             test_case_data = self._normalize_storage_test_case_data(line)
+            if not is_generated:
+                # In manual test data, only look at read tests.
+                function_name = test_case_data.split(':', 1)[0]
+                if 'read' not in function_name.split('_'):
+                    continue
             metadata = SimpleNamespace(
                 filename=filename,
                 line_number=line_number,
@@ -262,26 +276,44 @@ class AbiChecker:
             )
             storage_tests[test_case_data] = metadata
 
-    def _get_storage_format_tests(self, version, git_worktree_path):
-        """Generate and record the storage format tests for the specified git version.
-
-        The version must be checked out at git_worktree_path.
-        """
-        full_test_list = subprocess.check_output(
+    @staticmethod
+    def _list_generated_test_data_files(git_worktree_path):
+        """List the generated test data files."""
+        output = subprocess.check_output(
             ['tests/scripts/generate_psa_tests.py', '--list'],
             cwd=git_worktree_path,
         ).decode('ascii')
-        storage_test_list = [test_file
-                             for test_file in full_test_list.split()
-                             # If you change the following condition, update
-                             # generate_psa_tests.py accordingly.
-                             if 'storage_format' in test_file]
+        return [line for line in output.split('\n') if line]
+
+    def _get_storage_format_tests(self, version, git_worktree_path):
+        """Record the storage format tests for the specified git version.
+
+        The storage format tests are the test suite data files whose name
+        contains "storage_format".
+
+        The version must be checked out at git_worktree_path.
+
+        This function creates or updates the generated data files.
+        """
+        # Existing test data files. This may be missing some automatically
+        # generated files if they haven't been generated yet.
+        storage_data_files = set(glob.glob(
+            'tests/suites/test_suite_*storage_format*.data'
+        ))
+        # Discover and (re)generate automatically generated data files.
+        to_be_generated = set()
+        for filename in self._list_generated_test_data_files(git_worktree_path):
+            if 'storage_format' in filename:
+                storage_data_files.add(filename)
+                to_be_generated.add(filename)
         subprocess.check_call(
-            ['tests/scripts/generate_psa_tests.py'] + storage_test_list,
+            ['tests/scripts/generate_psa_tests.py'] + sorted(to_be_generated),
             cwd=git_worktree_path,
         )
-        for test_file in storage_test_list:
-            self._read_storage_tests(git_worktree_path, test_file,
+        for test_file in sorted(storage_data_files):
+            self._read_storage_tests(git_worktree_path,
+                                     test_file,
+                                     test_file in to_be_generated,
                                      version.storage_tests)
 
     def _cleanup_worktree(self, git_worktree_path):

From 296aa46c040f3b4f71515c9f3b9ada32f24f2989 Mon Sep 17 00:00:00 2001
From: Gilles Peskine <Gilles.Peskine@arm.com>
Date: Tue, 22 Feb 2022 19:16:42 +0100
Subject: [PATCH 06/10] Storage format test regressions are now checked
 mechanically

Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
---
 docs/architecture/testing/psa-storage-format-testing.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/architecture/testing/psa-storage-format-testing.md b/docs/architecture/testing/psa-storage-format-testing.md
index e293985cf..0e20a8bf8 100644
--- a/docs/architecture/testing/psa-storage-format-testing.md
+++ b/docs/architecture/testing/psa-storage-format-testing.md
@@ -34,7 +34,7 @@ Use a similar approach for files other than keys where possible and relevant.
 
 Test cases should normally not be removed from the code base: if something has worked before, it should keep working in future versions, so we should keep testing it.
 
-This cannot be enforced solely by looking at a single version of Mbed TLS, since there would be no indication that more test cases used to exist. It can only be enforced through review of library changes. The review may be assisted by a tool that compares the old and the new version, in the same way that `abi-check.py` compares the library's API and ABI.
+This cannot be enforced solely by looking at a single version of Mbed TLS, since there would be no indication that more test cases used to exist. It can only be enforced through review of library changes. The review is be assisted by a tool that compares the old and the new version, which is implemented in `scripts/abi_check.py`. This tool fails the CI if load-and-check test case disappears (changed test cases are raised as false positives).
 
 If the way certain keys are stored changes, and we don't deliberately decide to stop supporting old keys (which should only be done by retiring a version of the storage format), then we should keep the corresponding test cases in load-only mode: create a file with the expected content, load it and check the data that it contains.
 

From f548a0ce80d78ca7537529aa61236f5125d2e33e Mon Sep 17 00:00:00 2001
From: Gilles Peskine <Gilles.Peskine@arm.com>
Date: Thu, 3 Mar 2022 10:22:36 +0100
Subject: [PATCH 07/10] Don't require ABI tools if not checking the ABI

Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
---
 scripts/abi_check.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/scripts/abi_check.py b/scripts/abi_check.py
index b983ba405..1192f990c 100755
--- a/scripts/abi_check.py
+++ b/scripts/abi_check.py
@@ -480,7 +480,8 @@ class AbiChecker:
         """Generate a report of ABI differences
         between self.old_rev and self.new_rev."""
         self.check_repo_path()
-        self.check_abi_tools_are_installed()
+        if self.check_api or self.check_abi:
+            self.check_abi_tools_are_installed()
         self._get_abi_dump_for_ref(self.old_version)
         self._get_abi_dump_for_ref(self.new_version)
         return self.get_abi_compatibility_report()

From 56354592844e5b050e24efef8ecd72ad79562e07 Mon Sep 17 00:00:00 2001
From: Gilles Peskine <Gilles.Peskine@arm.com>
Date: Thu, 3 Mar 2022 10:23:09 +0100
Subject: [PATCH 08/10] Unify module documentation with --help text

Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
---
 scripts/abi_check.py | 16 +++-------------
 1 file changed, 3 insertions(+), 13 deletions(-)

diff --git a/scripts/abi_check.py b/scripts/abi_check.py
index 1192f990c..1ef6d663b 100755
--- a/scripts/abi_check.py
+++ b/scripts/abi_check.py
@@ -1,7 +1,5 @@
 #!/usr/bin/env python3
 """
-Purpose
-
 This script compares the interfaces of two versions of Mbed TLS, looking
 for backward incompatibilities between two different Git revisions within
 an Mbed TLS repository. It must be run from the root of a Git working tree.
@@ -21,6 +19,8 @@ The results of the API/ABI comparison are either formatted as HTML and stored
 at a configurable location, or are given as a brief list of problems.
 Returns 0 on success, 1 on non-compliance, and 2 if there is an error
 while running the script.
+
+You must run this test from an Mbed TLS root.
 """
 
 # Copyright The Mbed TLS Contributors
@@ -490,17 +490,7 @@ class AbiChecker:
 def run_main():
     try:
         parser = argparse.ArgumentParser(
-            description=(
-                """This script is a small wrapper around the
-                abi-compliance-checker and abi-dumper tools, applying them
-                to compare the ABI and API of the library files from two
-                different Git revisions within an Mbed TLS repository.
-                The results of the comparison are either formatted as HTML and
-                stored at a configurable location, or are given as a brief list
-                of problems. Returns 0 on success, 1 on ABI/API non-compliance,
-                and 2 if there is an error while running the script.
-                Note: must be run from Mbed TLS root."""
-            )
+            description=__doc__
         )
         parser.add_argument(
             "-v", "--verbose", action="store_true",

From 1177f37648ecfd1fb0d25936807c7c71313daa06 Mon Sep 17 00:00:00 2001
From: Gilles Peskine <Gilles.Peskine@arm.com>
Date: Fri, 4 Mar 2022 19:59:55 +0100
Subject: [PATCH 09/10] Fix typo and align on US spelling

Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
---
 scripts/abi_check.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/scripts/abi_check.py b/scripts/abi_check.py
index 1ef6d663b..1f6ac53a7 100755
--- a/scripts/abi_check.py
+++ b/scripts/abi_check.py
@@ -10,9 +10,9 @@ applying them to compare the header and library files.
 
 For the storage format, this script compares the automatically generated
 storage tests and the manual read tests, and complains if there is a
-reduction in coverage. A change in test data will be signalled as a
+reduction in coverage. A change in test data will be signaled as a
 coverage reduction since the old test data is no longer present. A change in
-how test data is presented will be signalled as well; this would be a false
+how test data is presented will be signaled as well; this would be a false
 positive.
 
 The results of the API/ABI comparison are either formatted as HTML and stored
@@ -65,7 +65,7 @@ class AbiChecker:
         configuration.report_dir: directory for output files
         configuration.keep_all_reports: if false, delete old reports
         configuration.brief: if true, output shorter report to stdout
-        configuration.check_api: if true, compare ABIs
+        configuration.check_abi: if true, compare ABIs
         configuration.check_api: if true, compare APIs
         configuration.check_storage: if true, compare storage format tests
         configuration.skip_file: path to file containing symbols and types to skip

From aeb8d6652545f25ac8f74a045076cef1632d5c26 Mon Sep 17 00:00:00 2001
From: Gilles Peskine <Gilles.Peskine@arm.com>
Date: Fri, 4 Mar 2022 20:02:00 +0100
Subject: [PATCH 10/10] Ensure files get closed when they go out of scope

This is automatic in CPython but not guaranteed by the language. Be friendly
to other Python implementations.

Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
---
 scripts/abi_check.py                   | 51 +++++++++++++-------------
 scripts/assemble_changelog.py          | 19 ++++++----
 scripts/mbedtls_dev/macro_collector.py |  9 +++--
 scripts/min_requirements.py            | 25 +++++++------
 4 files changed, 56 insertions(+), 48 deletions(-)

diff --git a/scripts/abi_check.py b/scripts/abi_check.py
index 1f6ac53a7..f11cdf259 100755
--- a/scripts/abi_check.py
+++ b/scripts/abi_check.py
@@ -249,32 +249,33 @@ class AbiChecker:
         at_paragraph_start = True
         description = None
         full_path = os.path.join(directory, filename)
-        for line_number, line in enumerate(open(full_path), 1):
-            line = line.strip()
-            if not line:
-                at_paragraph_start = True
-                continue
-            if line.startswith('#'):
-                continue
-            if at_paragraph_start:
-                description = line.strip()
-                at_paragraph_start = False
-                continue
-            if line.startswith('depends_on:'):
-                continue
-            # We've reached a test case data line
-            test_case_data = self._normalize_storage_test_case_data(line)
-            if not is_generated:
-                # In manual test data, only look at read tests.
-                function_name = test_case_data.split(':', 1)[0]
-                if 'read' not in function_name.split('_'):
+        with open(full_path) as fd:
+            for line_number, line in enumerate(fd, 1):
+                line = line.strip()
+                if not line:
+                    at_paragraph_start = True
                     continue
-            metadata = SimpleNamespace(
-                filename=filename,
-                line_number=line_number,
-                description=description
-            )
-            storage_tests[test_case_data] = metadata
+                if line.startswith('#'):
+                    continue
+                if at_paragraph_start:
+                    description = line.strip()
+                    at_paragraph_start = False
+                    continue
+                if line.startswith('depends_on:'):
+                    continue
+                # We've reached a test case data line
+                test_case_data = self._normalize_storage_test_case_data(line)
+                if not is_generated:
+                    # In manual test data, only look at read tests.
+                    function_name = test_case_data.split(':', 1)[0]
+                    if 'read' not in function_name.split('_'):
+                        continue
+                metadata = SimpleNamespace(
+                    filename=filename,
+                    line_number=line_number,
+                    description=description
+                )
+                storage_tests[test_case_data] = metadata
 
     @staticmethod
     def _list_generated_test_data_files(git_worktree_path):
diff --git a/scripts/assemble_changelog.py b/scripts/assemble_changelog.py
index b8a63c9e9..b742cc8ca 100755
--- a/scripts/assemble_changelog.py
+++ b/scripts/assemble_changelog.py
@@ -407,14 +407,17 @@ def check_output(generated_output_file, main_input_file, merged_files):
     is also present in an output file. This is not perfect but good enough
     for now.
     """
-    generated_output = set(open(generated_output_file, 'r', encoding='utf-8'))
-    for line in open(main_input_file, 'r', encoding='utf-8'):
-        if line not in generated_output:
-            raise LostContent('original file', line)
-    for merged_file in merged_files:
-        for line in open(merged_file, 'r', encoding='utf-8'):
-            if line not in generated_output:
-                raise LostContent(merged_file, line)
+    with open(generated_output_file, 'r', encoding='utf-8') as out_fd:
+        generated_output = set(out_fd)
+        with open(main_input_file, 'r', encoding='utf-8') as in_fd:
+            for line in in_fd:
+                if line not in generated_output:
+                    raise LostContent('original file', line)
+        for merged_file in merged_files:
+            with open(merged_file, 'r', encoding='utf-8') as in_fd:
+                for line in in_fd:
+                    if line not in generated_output:
+                        raise LostContent(merged_file, line)
 
 def finish_output(changelog, output_file, input_file, merged_files):
     """Write the changelog to the output file.
diff --git a/scripts/mbedtls_dev/macro_collector.py b/scripts/mbedtls_dev/macro_collector.py
index 3440ba74b..e93940d6e 100644
--- a/scripts/mbedtls_dev/macro_collector.py
+++ b/scripts/mbedtls_dev/macro_collector.py
@@ -18,7 +18,7 @@
 
 import itertools
 import re
-from typing import Dict, Iterable, Iterator, List, Optional, Pattern, Set, Tuple, Union
+from typing import Dict, IO, Iterable, Iterator, List, Optional, Pattern, Set, Tuple, Union
 
 
 class ReadFileLineException(Exception):
@@ -50,12 +50,13 @@ class read_file_lines:
     """
     def __init__(self, filename: str, binary: bool = False) -> None:
         self.filename = filename
+        self.file = None #type: Optional[IO[str]]
         self.line_number = 'entry' #type: Union[int, str]
         self.generator = None #type: Optional[Iterable[Tuple[int, str]]]
         self.binary = binary
     def __enter__(self) -> 'read_file_lines':
-        self.generator = enumerate(open(self.filename,
-                                        'rb' if self.binary else 'r'))
+        self.file = open(self.filename, 'rb' if self.binary else 'r')
+        self.generator = enumerate(self.file)
         return self
     def __iter__(self) -> Iterator[str]:
         assert self.generator is not None
@@ -64,6 +65,8 @@ class read_file_lines:
             yield content
         self.line_number = 'exit'
     def __exit__(self, exc_type, exc_value, exc_traceback) -> None:
+        if self.file is not None:
+            self.file.close()
         if exc_type is not None:
             raise ReadFileLineException(self.filename, self.line_number) \
                 from exc_value
diff --git a/scripts/min_requirements.py b/scripts/min_requirements.py
index eecab1c1e..01c9de13c 100755
--- a/scripts/min_requirements.py
+++ b/scripts/min_requirements.py
@@ -56,18 +56,19 @@ class Requirements:
         * Comments (``#`` at the beginning of the line or after whitespace).
         * ``-r FILENAME`` to include another file.
         """
-        for line in open(filename):
-            line = line.strip()
-            line = re.sub(r'(\A|\s+)#.*', r'', line)
-            if not line:
-                continue
-            m = re.match(r'-r\s+', line)
-            if m:
-                nested_file = os.path.join(os.path.dirname(filename),
-                                           line[m.end(0):])
-                self.add_file(nested_file)
-                continue
-            self.requirements.append(self.adjust_requirement(line))
+        with open(filename) as fd:
+            for line in fd:
+                line = line.strip()
+                line = re.sub(r'(\A|\s+)#.*', r'', line)
+                if not line:
+                    continue
+                m = re.match(r'-r\s+', line)
+                if m:
+                    nested_file = os.path.join(os.path.dirname(filename),
+                                               line[m.end(0):])
+                    self.add_file(nested_file)
+                    continue
+                self.requirements.append(self.adjust_requirement(line))
 
     def write(self, out: typing_util.Writable) -> None:
         """List the gathered requirements."""