decodetree: Open files with encoding='utf-8'

When decodetree.py was added in commit 568ae7efae7, QEMU was using Python 2 which happily reads UTF-8 files in text mode. Python 3 requires either UTF-8 locale or an explicit encoding passed to open(). Now that Python 3 is required, explicit UTF-8 encoding for decodetree source files. To avoid further problems with the user locale, also explicit UTF-8 encoding for the generated C files. Explicit both input/output are plain text by using the 't' mode. This fixes: $ /usr/bin/python3 scripts/decodetree.py test.decode Traceback (most recent call last): File "scripts/decodetree.py", line 1397, in <module> main() File "scripts/decodetree.py", line 1308, in main parse_file(f, toppat) File "scripts/decodetree.py", line 994, in parse_file for line in f: File "/usr/lib/python3.6/encodings/ascii.py", line 26, in decode return codecs.ascii_decode(input, self.errors)[0] UnicodeDecodeError: 'ascii' codec can't decode byte 0xc3 in position 80: ordinal not in range(128) Backports 4cacecaaa2bbf8af0967bd3eee43297fada475a9
2025-08-25 16:51:13 +00:00 · 2021-03-04 13:33:58 -05:00 · 2021-03-04 13:33:58 -05:00 · 296c32a8da
parent 419941c3d1
commit 296c32a8da
1 changed files with 10 additions and 7 deletions
--- a/qemu/scripts/decodetree.py
+++ b/qemu/scripts/decodetree.py
@ -4,7 +4,7 @@
 # This library is free software; you can redistribute it and/or
 # modify it under the terms of the GNU Lesser General Public
 # License as published by the Free Software Foundation; either
-# version 2 of the License, or (at your option) any later version.
+# version 2.1 of the License, or (at your option) any later version.
 #
 # This library is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
@ -20,6 +20,7 @@
 # See the syntax and semantics in docs/devel/decodetree.rst.
 #

+import io
 import os
 import re
 import sys
@ -94,7 +95,7 @@ def str_indent(c):


 def str_fields(fields):
-    """Return a string uniquely identifing FIELDS"""
+    """Return a string uniquely identifying FIELDS"""
    r = ''
    for n in sorted(fields.keys()):
        r += '_' + n
@ -814,7 +815,7 @@ def parse_generic(lineno, parent_pat, name, toks):
    arg = None
    fmt = None
    for t in toks:
-        # '&Foo' gives a format an explcit argument set.
+        # '&Foo' gives a format an explicit argument set.
        if re.fullmatch(re_arg_ident, t):
            tt = t[1:]
            if arg:
@ -903,7 +904,7 @@ def parse_generic(lineno, parent_pat, name, toks):
    elif not (is_format and width == 0) and width != insnwidth:
        error(lineno, 'definition has {0} bits'.format(width))

-    # Do not check for fields overlaping fields; one valid usage
+    # Do not check for fields overlapping fields; one valid usage
    # is to be able to duplicate fields via import.
    fieldmask = 0
    for f in flds.values():
@ -1304,7 +1305,7 @@ def main():

    for filename in args:
        input_file = filename
-        f = open(filename, 'r')
+        f = open(filename, 'rt', encoding='utf-8')
        parse_file(f, toppat)
        f.close()

@ -1324,9 +1325,11 @@ def main():
        prop_size(stree)

    if output_file:
-        output_fd = open(output_file, 'w')
+        output_fd = open(output_file, 'wt', encoding='utf-8')
    else:
-        output_fd = sys.stdout
+        output_fd = io.TextIOWrapper(sys.stdout.buffer,
+                                     encoding=sys.stdout.encoding,
+                                     errors="ignore")

    output_autogen()
    for n in sorted(arguments.keys()):