From 4bda3254cf29651abc9deaff4816cbdb45e8c1c0 Mon Sep 17 00:00:00 2001
From: Gilles Peskine <Gilles.Peskine@arm.com>
Date: Sun, 10 May 2020 17:18:06 +0200
Subject: [PATCH] Check only files checked into Git

We're only interested in files that are committed and pushed to be
included in Mbed TLS, not in any other files that may be lying around.
So ask git for the list of file names.

This script is primarily intended to run on the CI, and there it runs
on a fresh Git checkout plus potentially some other checkouts or
leftovers from a previous part of the CI job. It should also run
reasonably well on developer machines, where there may be various
additional files. In both cases, git is available.

Ad hoc directory exclusions are no longer needed.

Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
---
 tests/scripts/check-files.py | 37 +++++++++++++++---------------------
 1 file changed, 15 insertions(+), 22 deletions(-)

diff --git a/tests/scripts/check-files.py b/tests/scripts/check-files.py
index b48f62b75..39a76931b 100755
--- a/tests/scripts/check-files.py
+++ b/tests/scripts/check-files.py
@@ -15,6 +15,7 @@ import argparse
 import logging
 import codecs
 import re
+import subprocess
 import sys
 
 
@@ -262,15 +263,6 @@ class IntegrityChecker:
         self.check_repo_path()
         self.logger = None
         self.setup_logger(log_file)
-        self.excluded_directories = [
-            '.git',
-            'mbed-os',
-        ]
-        self.excluded_paths = list(map(os.path.normpath, [
-            'cov-int',
-            'examples',
-            'yotta/module'
-        ]))
         self.issues_to_check = [
             PermissionIssueTracker(),
             EndOfFileNewlineIssueTracker(),
@@ -297,21 +289,22 @@ class IntegrityChecker:
             console = logging.StreamHandler()
             self.logger.addHandler(console)
 
-    def prune_branch(self, root, d):
-        if d in self.excluded_directories:
-            return True
-        if os.path.normpath(os.path.join(root, d)) in self.excluded_paths:
-            return True
-        return False
+    @staticmethod
+    def collect_files():
+        bytes_output = subprocess.check_output(['git', 'ls-files', '-z'])
+        bytes_filepaths = bytes_output.split(b'\0')[:-1]
+        ascii_filepaths = map(lambda fp: fp.decode('ascii'), bytes_filepaths)
+        # Prepend './' to files in the top-level directory so that
+        # something like `'/Makefile' in fp` matches in the top-level
+        # directory as well as in subdirectories.
+        return [fp if os.path.dirname(fp) else os.path.join(os.curdir, fp)
+                for fp in ascii_filepaths]
 
     def check_files(self):
-        for root, dirs, files in os.walk("."):
-            dirs[:] = sorted(d for d in dirs if not self.prune_branch(root, d))
-            for filename in sorted(files):
-                filepath = os.path.join(root, filename)
-                for issue_to_check in self.issues_to_check:
-                    if issue_to_check.should_check_file(filepath):
-                        issue_to_check.check_file_for_issue(filepath)
+        for issue_to_check in self.issues_to_check:
+            for filepath in self.collect_files():
+                if issue_to_check.should_check_file(filepath):
+                    issue_to_check.check_file_for_issue(filepath)
 
     def output_issues(self):
         integrity_return_code = 0