From 96377d397b941274a7ea9faf472159ecac30803d Mon Sep 17 00:00:00 2001 From: Gilles Peskine Date: Tue, 21 Apr 2020 18:36:17 +0200 Subject: [PATCH] Copy the changelog entry assembling script from development This commit is the combined cherry-pick of the following commits: dba4de0a1294d14840ccfc812799ef6b63725935 b695d5e30a87a8d574ad21393a037710396f27d3 7c3f7cdeae991733573ed2ab81a213f0784cd9cb 4d977a4f4011be14b52330b7224d5b8cf88dc85a e248e83f9f23ffe07bc54579522bf3182018b68f eebf24f7a8d138f656b8cd1f724bfc73d2c4f033 6e97c439592b51c4277328a3ef0e0f61ddccc059 c68c7c8864fd5eaea891712c5a3455c87aae4d40 ac0f0860f17b319c8dd36052f8df5bda075274af 42f384c186db02c0a96b9a0ae5c95df6eae92726 98a53aa3990a6a7730bb15d27e215d3163485ac2 13dc6342822bd4158f8dc4ed30bb2f88af2de356 7fa3eb7d44bd01beaeff145bc1f00a5af37e2751 28af958ea4299eea9e0e3a112f04fcf7e8fc7049 8f46bbf46f3387fa93dd69040d039ad5cdc1ed4b 27a1facd486dc178a1c2e597ccb006b7ccc30258 afc9db8bb71bd76dce45f5246f9ea5b52d59a700 a26079613a0fb75ba616657f9e15a73912a056ef da14e8225e48250375c2f9aa07d195f7f5e9a5b8 37d670a1e17f86b3411fdf15c643b4e1caf7a503 974349d40e69b4e93102abebc79dd54e7beeb4bc d8b6c77388f2c6756aa16bfe3932d956b4d1d2d1 2b242495e12ffb9010c177fa2d62fc01854e70b8 6e91009cfe0bf578ee6afb0cba09f188a238f4af 566407d6f68e906946ff80648c2bde9024bda192 8c4a84c5ded7bb19a5a607591b38971ed79b0a11 5e39c9e94f62d924ec94f8887baa088f05930b07 974232f045781a17b494f6993b2b02e150476653 40b3f411ecb16522beb073e1131f8e727be29ea4 Signed-off-by: Gilles Peskine --- ChangeLog.d/00README.md | 67 +++++ scripts/assemble_changelog.py | 505 ++++++++++++++++++++++++++++++++++ 2 files changed, 572 insertions(+) create mode 100644 ChangeLog.d/00README.md create mode 100755 scripts/assemble_changelog.py diff --git a/ChangeLog.d/00README.md b/ChangeLog.d/00README.md new file mode 100644 index 000000000..b559e2336 --- /dev/null +++ b/ChangeLog.d/00README.md @@ -0,0 +1,67 @@ +# Pending changelog entry directory + +This directory contains changelog entries that have not yet been merged +to the changelog file ([`../ChangeLog`](../ChangeLog)). + +## Changelog entry file format + +A changelog entry file must have the extension `*.txt` and must have the +following format: + +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Security + * Change description. + * Another change description. + +Features + * Yet another change description. This is a long change description that + spans multiple lines. + * Yet again another change description. + +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The permitted changelog entry categories are as follows: + + + API changes + Default behavior changes + Requirement changes + New deprecations + Removals + Features + Security + Bugfix + Changes + +Use “Changes” for anything that doesn't fit in the other categories, such as +performance, documentation and test improvements. + +## How to write a changelog entry + +Each entry starts with three spaces, an asterisk and a space. Continuation +lines start with 5 spaces. Lines wrap at 79 characters. + +Write full English sentences with proper capitalization and punctuation. Use +the present tense. Use the imperative where applicable. For example: “Fix a +bug in mbedtls_xxx() ….” + +Include GitHub issue numbers where relevant. Use the format “#1234” for an +Mbed TLS issue. Add other external references such as CVE numbers where +applicable. + +Credit the author of the contribution if the contribution is not a member of +the Mbed TLS development team. Also credit bug reporters where applicable. + +**Explain why, not how**. Remember that the audience is the users of the +library, not its developers. In particular, for a bug fix, explain the +consequences of the bug, not how the bug was fixed. For a new feature, explain +why one might be interested in the feature. For an API change or a deprecation, +explain how to update existing applications. + +See [existing entries](../ChangeLog) for examples. + +## How `ChangeLog` is updated + +Run [`../scripts/assemble_changelog.py`](../scripts/assemble_changelog.py) +from a Git working copy +to move the entries from files in `ChangeLog.d` to the main `ChangeLog` file. diff --git a/scripts/assemble_changelog.py b/scripts/assemble_changelog.py new file mode 100755 index 000000000..ffa3f161b --- /dev/null +++ b/scripts/assemble_changelog.py @@ -0,0 +1,505 @@ +#!/usr/bin/env python3 + +"""Assemble Mbed TLS change log entries into the change log file. + +Add changelog entries to the first level-2 section. +Create a new level-2 section for unreleased changes if needed. +Remove the input files unless --keep-entries is specified. + +In each level-3 section, entries are sorted in chronological order +(oldest first). From oldest to newest: +* Merged entry files are sorted according to their merge date (date of + the merge commit that brought the commit that created the file into + the target branch). +* Committed but unmerged entry files are sorted according to the date + of the commit that adds them. +* Uncommitted entry files are sorted according to their modification time. + +You must run this program from within a git working directory. +""" + +# Copyright (C) 2019, Arm Limited, All Rights Reserved +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# This file is part of Mbed TLS (https://tls.mbed.org) + +import argparse +from collections import OrderedDict, namedtuple +import datetime +import functools +import glob +import os +import re +import subprocess +import sys + +class InputFormatError(Exception): + def __init__(self, filename, line_number, message, *args, **kwargs): + message = '{}:{}: {}'.format(filename, line_number, + message.format(*args, **kwargs)) + super().__init__(message) + +class CategoryParseError(Exception): + def __init__(self, line_offset, error_message): + self.line_offset = line_offset + self.error_message = error_message + super().__init__('{}: {}'.format(line_offset, error_message)) + +class LostContent(Exception): + def __init__(self, filename, line): + message = ('Lost content from {}: "{}"'.format(filename, line)) + super().__init__(message) + +# The category names we use in the changelog. +# If you edit this, update ChangeLog.d/README.md. +STANDARD_CATEGORIES = ( + b'API changes', + b'Default behavior changes', + b'Requirement changes', + b'New deprecations', + b'Removals', + b'Features', + b'Security', + b'Bugfix', + b'Changes', +) + +CategoryContent = namedtuple('CategoryContent', [ + 'name', 'title_line', # Title text and line number of the title + 'body', 'body_line', # Body text and starting line number of the body +]) + +class ChangelogFormat: + """Virtual class documenting how to write a changelog format class.""" + + @classmethod + def extract_top_version(cls, changelog_file_content): + """Split out the top version section. + + If the top version is already released, create a new top + version section for an unreleased version. + + Return ``(header, top_version_title, top_version_body, trailer)`` + where the "top version" is the existing top version section if it's + for unreleased changes, and a newly created section otherwise. + To assemble the changelog after modifying top_version_body, + concatenate the four pieces. + """ + raise NotImplementedError + + @classmethod + def version_title_text(cls, version_title): + """Return the text of a formatted version section title.""" + raise NotImplementedError + + @classmethod + def split_categories(cls, version_body): + """Split a changelog version section body into categories. + + Return a list of `CategoryContent` the name is category title + without any formatting. + """ + raise NotImplementedError + + @classmethod + def format_category(cls, title, body): + """Construct the text of a category section from its title and body.""" + raise NotImplementedError + +class TextChangelogFormat(ChangelogFormat): + """The traditional Mbed TLS changelog format.""" + + _unreleased_version_text = b'= mbed TLS x.x.x branch released xxxx-xx-xx' + @classmethod + def is_released_version(cls, title): + # Look for an incomplete release date + return not re.search(br'[0-9x]{4}-[0-9x]{2}-[0-9x]?x', title) + + _top_version_re = re.compile(br'(?:\A|\n)(=[^\n]*\n+)(.*?\n)(?:=|$)', + re.DOTALL) + @classmethod + def extract_top_version(cls, changelog_file_content): + """A version section starts with a line starting with '='.""" + m = re.search(cls._top_version_re, changelog_file_content) + top_version_start = m.start(1) + top_version_end = m.end(2) + top_version_title = m.group(1) + top_version_body = m.group(2) + if cls.is_released_version(top_version_title): + top_version_end = top_version_start + top_version_title = cls._unreleased_version_text + b'\n\n' + top_version_body = b'' + return (changelog_file_content[:top_version_start], + top_version_title, top_version_body, + changelog_file_content[top_version_end:]) + + @classmethod + def version_title_text(cls, version_title): + return re.sub(br'\n.*', version_title, re.DOTALL) + + _category_title_re = re.compile(br'(^\w.*)\n+', re.MULTILINE) + @classmethod + def split_categories(cls, version_body): + """A category title is a line with the title in column 0.""" + if not version_body: + return [] + title_matches = list(re.finditer(cls._category_title_re, version_body)) + if not title_matches or title_matches[0].start() != 0: + # There is junk before the first category. + raise CategoryParseError(0, 'Junk found where category expected') + title_starts = [m.start(1) for m in title_matches] + body_starts = [m.end(0) for m in title_matches] + body_ends = title_starts[1:] + [len(version_body)] + bodies = [version_body[body_start:body_end].rstrip(b'\n') + b'\n' + for (body_start, body_end) in zip(body_starts, body_ends)] + title_lines = [version_body[:pos].count(b'\n') for pos in title_starts] + body_lines = [version_body[:pos].count(b'\n') for pos in body_starts] + return [CategoryContent(title_match.group(1), title_line, + body, body_line) + for title_match, title_line, body, body_line + in zip(title_matches, title_lines, bodies, body_lines)] + + @classmethod + def format_category(cls, title, body): + # `split_categories` ensures that each body ends with a newline. + # Make sure that there is additionally a blank line between categories. + if not body.endswith(b'\n\n'): + body += b'\n' + return title + b'\n' + body + +class ChangeLog: + """An Mbed TLS changelog. + + A changelog file consists of some header text followed by one or + more version sections. The version sections are in reverse + chronological order. Each version section consists of a title and a body. + + The body of a version section consists of zero or more category + subsections. Each category subsection consists of a title and a body. + + A changelog entry file has the same format as the body of a version section. + + A `ChangelogFormat` object defines the concrete syntax of the changelog. + Entry files must have the same format as the changelog file. + """ + + # Only accept dotted version numbers (e.g. "3.1", not "3"). + # Refuse ".x" in a version number where x is a letter: this indicates + # a version that is not yet released. Something like "3.1a" is accepted. + _version_number_re = re.compile(br'[0-9]+\.[0-9A-Za-z.]+') + _incomplete_version_number_re = re.compile(br'.*\.[A-Za-z]') + + def add_categories_from_text(self, filename, line_offset, + text, allow_unknown_category): + """Parse a version section or entry file.""" + try: + categories = self.format.split_categories(text) + except CategoryParseError as e: + raise InputFormatError(filename, line_offset + e.line_offset, + e.error_message) + for category in categories: + if not allow_unknown_category and \ + category.name not in self.categories: + raise InputFormatError(filename, + line_offset + category.title_line, + 'Unknown category: "{}"', + category.name.decode('utf8')) + self.categories[category.name] += category.body + + def __init__(self, input_stream, changelog_format): + """Create a changelog object. + + Populate the changelog object from the content of the file + input_stream. + """ + self.format = changelog_format + whole_file = input_stream.read() + (self.header, + self.top_version_title, top_version_body, + self.trailer) = self.format.extract_top_version(whole_file) + # Split the top version section into categories. + self.categories = OrderedDict() + for category in STANDARD_CATEGORIES: + self.categories[category] = b'' + offset = (self.header + self.top_version_title).count(b'\n') + 1 + self.add_categories_from_text(input_stream.name, offset, + top_version_body, True) + + def add_file(self, input_stream): + """Add changelog entries from a file. + """ + self.add_categories_from_text(input_stream.name, 1, + input_stream.read(), False) + + def write(self, filename): + """Write the changelog to the specified file. + """ + with open(filename, 'wb') as out: + out.write(self.header) + out.write(self.top_version_title) + for title, body in self.categories.items(): + if not body: + continue + out.write(self.format.format_category(title, body)) + out.write(self.trailer) + + +@functools.total_ordering +class EntryFileSortKey: + """This classes defines an ordering on changelog entry files: older < newer. + + * Merged entry files are sorted according to their merge date (date of + the merge commit that brought the commit that created the file into + the target branch). + * Committed but unmerged entry files are sorted according to the date + of the commit that adds them. + * Uncommitted entry files are sorted according to their modification time. + + This class assumes that the file is in a git working directory with + the target branch checked out. + """ + + # Categories of files. A lower number is considered older. + MERGED = 0 + COMMITTED = 1 + LOCAL = 2 + + @staticmethod + def creation_hash(filename): + """Return the git commit id at which the given file was created. + + Return None if the file was never checked into git. + """ + hashes = subprocess.check_output(['git', 'log', '--format=%H', + '--follow', + '--', filename]) + m = re.search(b'(.+)$', hashes) + if not m: + # The git output is empty. This means that the file was + # never checked in. + return None + # The last commit in the log is the oldest one, which is when the + # file was created. + return m.group(0) + + @staticmethod + def list_merges(some_hash, target, *options): + """List merge commits from some_hash to target. + + Pass options to git to select which commits are included. + """ + text = subprocess.check_output(['git', 'rev-list', + '--merges', *options, + b'..'.join([some_hash, target])]) + return text.rstrip(b'\n').split(b'\n') + + @classmethod + def merge_hash(cls, some_hash): + """Return the git commit id at which the given commit was merged. + + Return None if the given commit was never merged. + """ + target = b'HEAD' + # List the merges from some_hash to the target in two ways. + # The ancestry list is the ones that are both descendants of + # some_hash and ancestors of the target. + ancestry = frozenset(cls.list_merges(some_hash, target, + '--ancestry-path')) + # The first_parents list only contains merges that are directly + # on the target branch. We want it in reverse order (oldest first). + first_parents = cls.list_merges(some_hash, target, + '--first-parent', '--reverse') + # Look for the oldest merge commit that's both on the direct path + # and directly on the target branch. That's the place where some_hash + # was merged on the target branch. See + # https://stackoverflow.com/questions/8475448/find-merge-commit-which-include-a-specific-commit + for commit in first_parents: + if commit in ancestry: + return commit + return None + + @staticmethod + def commit_timestamp(commit_id): + """Return the timestamp of the given commit.""" + text = subprocess.check_output(['git', 'show', '-s', + '--format=%ct', + commit_id]) + return datetime.datetime.utcfromtimestamp(int(text)) + + @staticmethod + def file_timestamp(filename): + """Return the modification timestamp of the given file.""" + mtime = os.stat(filename).st_mtime + return datetime.datetime.fromtimestamp(mtime) + + def __init__(self, filename): + """Determine position of the file in the changelog entry order. + + This constructor returns an object that can be used with comparison + operators, with `sort` and `sorted`, etc. Older entries are sorted + before newer entries. + """ + self.filename = filename + creation_hash = self.creation_hash(filename) + if not creation_hash: + self.category = self.LOCAL + self.datetime = self.file_timestamp(filename) + return + merge_hash = self.merge_hash(creation_hash) + if not merge_hash: + self.category = self.COMMITTED + self.datetime = self.commit_timestamp(creation_hash) + return + self.category = self.MERGED + self.datetime = self.commit_timestamp(merge_hash) + + def sort_key(self): + """"Return a concrete sort key for this entry file sort key object. + + ``ts1 < ts2`` is implemented as ``ts1.sort_key() < ts2.sort_key()``. + """ + return (self.category, self.datetime, self.filename) + + def __eq__(self, other): + return self.sort_key() == other.sort_key() + + def __lt__(self, other): + return self.sort_key() < other.sort_key() + + +def check_output(generated_output_file, main_input_file, merged_files): + """Make sanity checks on the generated output. + + The intent of these sanity checks is to have reasonable confidence + that no content has been lost. + + The sanity check is that every line that is present in an input file + is also present in an output file. This is not perfect but good enough + for now. + """ + generated_output = set(open(generated_output_file, 'rb')) + for line in open(main_input_file, 'rb'): + if line not in generated_output: + raise LostContent('original file', line) + for merged_file in merged_files: + for line in open(merged_file, 'rb'): + if line not in generated_output: + raise LostContent(merged_file, line) + +def finish_output(changelog, output_file, input_file, merged_files): + """Write the changelog to the output file. + + The input file and the list of merged files are used only for sanity + checks on the output. + """ + if os.path.exists(output_file) and not os.path.isfile(output_file): + # The output is a non-regular file (e.g. pipe). Write to it directly. + output_temp = output_file + else: + # The output is a regular file. Write to a temporary file, + # then move it into place atomically. + output_temp = output_file + '.tmp' + changelog.write(output_temp) + check_output(output_temp, input_file, merged_files) + if output_temp != output_file: + os.rename(output_temp, output_file) + +def remove_merged_entries(files_to_remove): + for filename in files_to_remove: + os.remove(filename) + +def list_files_to_merge(options): + """List the entry files to merge, oldest first. + + "Oldest" is defined by `EntryFileSortKey`. + """ + files_to_merge = glob.glob(os.path.join(options.dir, '*.txt')) + files_to_merge.sort(key=EntryFileSortKey) + return files_to_merge + +def merge_entries(options): + """Merge changelog entries into the changelog file. + + Read the changelog file from options.input. + Read entries to merge from the directory options.dir. + Write the new changelog to options.output. + Remove the merged entries if options.keep_entries is false. + """ + with open(options.input, 'rb') as input_file: + changelog = ChangeLog(input_file, TextChangelogFormat) + files_to_merge = list_files_to_merge(options) + if not files_to_merge: + sys.stderr.write('There are no pending changelog entries.\n') + return + for filename in files_to_merge: + with open(filename, 'rb') as input_file: + changelog.add_file(input_file) + finish_output(changelog, options.output, options.input, files_to_merge) + if not options.keep_entries: + remove_merged_entries(files_to_merge) + +def show_file_timestamps(options): + """List the files to merge and their timestamp. + + This is only intended for debugging purposes. + """ + files = list_files_to_merge(options) + for filename in files: + ts = EntryFileSortKey(filename) + print(ts.category, ts.datetime, filename) + +def set_defaults(options): + """Add default values for missing options.""" + output_file = getattr(options, 'output', None) + if output_file is None: + options.output = options.input + if getattr(options, 'keep_entries', None) is None: + options.keep_entries = (output_file is not None) + +def main(): + """Command line entry point.""" + parser = argparse.ArgumentParser(description=__doc__) + parser.add_argument('--dir', '-d', metavar='DIR', + default='ChangeLog.d', + help='Directory to read entries from' + ' (default: ChangeLog.d)') + parser.add_argument('--input', '-i', metavar='FILE', + default='ChangeLog', + help='Existing changelog file to read from and augment' + ' (default: ChangeLog)') + parser.add_argument('--keep-entries', + action='store_true', dest='keep_entries', default=None, + help='Keep the files containing entries' + ' (default: remove them if --output/-o is not specified)') + parser.add_argument('--no-keep-entries', + action='store_false', dest='keep_entries', + help='Remove the files containing entries after they are merged' + ' (default: remove them if --output/-o is not specified)') + parser.add_argument('--output', '-o', metavar='FILE', + help='Output changelog file' + ' (default: overwrite the input)') + parser.add_argument('--list-files-only', + action='store_true', + help=('Only list the files that would be processed ' + '(with some debugging information)')) + options = parser.parse_args() + set_defaults(options) + if options.list_files_only: + show_file_timestamps(options) + return + merge_entries(options) + +if __name__ == '__main__': + main()