unicorn/bindings/const_generator.py

# Unicorn Engine
# By Dang Hoang Vu, 2013
from __future__ import print_function
import sys, re, os

INCL_DIR = os.path.join('..', 'include', 'unicorn')

include = [ 'arm.h', 'arm64.h', 'mips.h', 'x86.h', 'sparc.h', 'm68k.h', 'unicorn.h' ]

template = {
    'python': {
            'header': "# For Unicorn Engine. AUTO-GENERATED FILE, DO NOT EDIT [%s_const.py]\n",
            'footer': "",
            'line_format': 'UC_%s = %s\n',
            'out_file': './python/unicorn/%s_const.py',
            # prefixes for constant filenames of all archs - case sensitive
            'arm.h': 'arm',
            'arm64.h': 'arm64',
            'mips.h': 'mips',
            'x86.h': 'x86',
            'sparc.h': 'sparc',
            'm68k.h': 'm68k',
            'unicorn.h': 'unicorn',
            'comment_open': '#',
            'comment_close': '',
        },
    'go': {
            'header': "package unicorn\n// For Unicorn Engine. AUTO-GENERATED FILE, DO NOT EDIT [%s_const.go]\nconst (\n",
            'footer': ")",
            'line_format': '\t%s = %s\n',
            'out_file': './go/unicorn/%s_const.go',
            # prefixes for constant filenames of all archs - case sensitive
            'arm.h': 'arm',
            'arm64.h': 'arm64',
            'mips.h': 'mips',
            'x86.h': 'x86',
            'sparc.h': 'sparc',
            'm68k.h': 'm68k',
            'unicorn.h': 'unicorn',
            'comment_open': '//',
            'comment_close': '',
        },
    'java': {
            'header': "// For Unicorn Engine. AUTO-GENERATED FILE, DO NOT EDIT\n\npackage unicorn;\n\npublic interface %sConst {\n",
            'footer': "\n}\n",
            'line_format': '   public static final int UC_%s = %s;\n',
            'out_file': './java/unicorn/%sConst.java',
            # prefixes for constant filenames of all archs - case sensitive
            'arm.h': 'Arm',
            'arm64.h': 'Arm64',
            'mips.h': 'Mips',
            'x86.h': 'X86',
            'sparc.h': 'Sparc',
            'm68k.h': 'M68k',
            'unicorn.h': 'Unicorn',
            'comment_open': '//',
            'comment_close': '',
        },
    'dotnet': {
            'header': "// For Unicorn Engine. AUTO-GENERATED FILE, DO NOT EDIT\n\nnamespace UnicornManaged.Const\n\nopen System\n\n[<AutoOpen>]\nmodule %s =\n",
            'footer': "\n",
            'line_format': '    let UC_%s = %s\n',
            'out_file': os.path.join('dotnet', 'UnicornManaged', 'Const', '%s.fs'),
            # prefixes for constant filenames of all archs - case sensitive
            'arm.h': 'Arm',
            'arm64.h': 'Arm64',
            'mips.h': 'Mips',
            'x86.h': 'X86',
            'sparc.h': 'Sparc',
            'm68k.h': 'M68k',
            'unicorn.h': 'Common',
            'comment_open': '    //',
            'comment_close': '',
        },
}

# markup for comments to be added to autogen files
MARKUP = '//>'

def gen(lang):
    global include, INCL_DIR
    templ = template[lang]
    for target in include:
        prefix = templ[target]
        outfile = open(templ['out_file'] %(prefix), 'wb')   # open as binary prevents windows newlines
        outfile.write((templ['header'] % (prefix)).encode("utf-8"))
        if target == 'unicorn.h':
            prefix = ''
        lines = open(os.path.join(INCL_DIR, target)).readlines()

        previous = {}
        count = 0
        for line in lines:
            line = line.strip()

            if line.startswith(MARKUP):  # markup for comments
                outfile.write(("\n%s%s%s\n" %(templ['comment_open'], \
                            line.replace(MARKUP, ''), templ['comment_close'])).encode("utf-8"))
                continue

            if line == '' or line.startswith('//'):
                continue

            tmp = line.strip().split(',')
            for t in tmp:
                t = t.strip()
                if not t or t.startswith('//'): continue
                f = re.split('\s+', t)

                # parse #define UC_TARGET (num)
                define = False
                if f[0] == '#define' and len(f) >= 3:
                    define = True
                    f.pop(0)
                    f.insert(1, '=')

                if f[0].startswith("UC_" + prefix.upper()):
                    if len(f) > 1 and f[1] not in ('//', '='):
                        print("Error: Unable to convert %s" % f)
                        continue
                    elif len(f) > 1 and f[1] == '=':
                        rhs = ''.join(f[2:])
                    else:
                        rhs = str(count)

                    lhs = f[0].strip()
                    # evaluate bitshifts in constants e.g. "UC_X86 = 1 << 1"
                    match = re.match(r'(?P<rhs>\s*\d+\s*<<\s*\d+\s*)', rhs)
                    if match:
                        rhs = str(eval(match.group(1)))
                    else:
                        # evaluate references to other constants e.g. "UC_ARM_REG_X = UC_ARM_REG_SP"
                        match = re.match(r'^([^\d]\w+)$', rhs)
                        if match:
                            rhs = previous[match.group(1)]

                    if not rhs.isdigit():
                        for k, v in previous.items():
                            rhs = re.sub(r'\b%s\b' % k, v, rhs)
                        rhs = str(eval(rhs))

                    lhs_strip = re.sub(r'^UC_', '', lhs)
                    count = int(rhs) + 1
                    if (count == 1):
                        outfile.write(("\n").encode("utf-8"))

                    outfile.write((templ['line_format'] % (lhs_strip, rhs)).encode("utf-8"))
                    previous[lhs] = str(rhs)

        outfile.write((templ['footer']).encode("utf-8"))
        outfile.close()

def main():
    lang = sys.argv[1]
    if not lang in template:
        raise RuntimeError("Unsupported binding %s" % lang)
    gen(sys.argv[1])

if __name__ == "__main__":
    if len(sys.argv) < 2:
        print("Usage:", sys.argv[0], " <python>")
        sys.exit(1)
    main()
import 2015-08-21 07:04:50 +00:00			`# Unicorn Engine`
			`# By Dang Hoang Vu, 2013`
			`from __future__ import print_function`
Refactored in order to be independent from the OS path separator, also added support for .net constants generation 2015-10-14 14:32:31 +00:00			`import sys, re, os`
import 2015-08-21 07:04:50 +00:00
Refactored in order to be independent from the OS path separator, also added support for .net constants generation 2015-10-14 14:32:31 +00:00			`INCL_DIR = os.path.join('..', 'include', 'unicorn')`
import 2015-08-21 07:04:50 +00:00
improve const generator; emit unicorn.h consts 2015-08-28 14:31:38 +00:00			`include = [ 'arm.h', 'arm64.h', 'mips.h', 'x86.h', 'sparc.h', 'm68k.h', 'unicorn.h' ]`
import 2015-08-21 07:04:50 +00:00
			`template = {`
			`'python': {`
			`'header': "# For Unicorn Engine. AUTO-GENERATED FILE, DO NOT EDIT [%s_const.py]\n",`
			`'footer': "",`
remove UC_ prefix for go binding consts 2015-09-08 02:25:13 +00:00			`'line_format': 'UC_%s = %s\n',`
import 2015-08-21 07:04:50 +00:00			`'out_file': './python/unicorn/%s_const.py',`
			`# prefixes for constant filenames of all archs - case sensitive`
			`'arm.h': 'arm',`
			`'arm64.h': 'arm64',`
			`'mips.h': 'mips',`
			`'x86.h': 'x86',`
			`'sparc.h': 'sparc',`
			`'m68k.h': 'm68k',`
improve const generator; emit unicorn.h consts 2015-08-28 14:31:38 +00:00			`'unicorn.h': 'unicorn',`
import 2015-08-21 07:04:50 +00:00			`'comment_open': '#',`
			`'comment_close': '',`
			`},`
add Go bindings 2015-08-28 04:13:50 +00:00			`'go': {`
improve const generator; emit unicorn.h consts 2015-08-28 14:31:38 +00:00			`'header': "package unicorn\n// For Unicorn Engine. AUTO-GENERATED FILE, DO NOT EDIT [%s_const.go]\nconst (\n",`
add Go bindings 2015-08-28 04:13:50 +00:00			`'footer': ")",`
improve const generator; emit unicorn.h consts 2015-08-28 14:31:38 +00:00			`'line_format': '\t%s = %s\n',`
add Go bindings 2015-08-28 04:13:50 +00:00			`'out_file': './go/unicorn/%s_const.go',`
			`# prefixes for constant filenames of all archs - case sensitive`
			`'arm.h': 'arm',`
			`'arm64.h': 'arm64',`
			`'mips.h': 'mips',`
			`'x86.h': 'x86',`
			`'sparc.h': 'sparc',`
			`'m68k.h': 'm68k',`
Fix capitalization for unicorn.h mapping in java and go binding 2015-08-29 06:33:38 +00:00			`'unicorn.h': 'unicorn',`
Initial changes to support use of const_generator.py 2015-08-29 02:41:13 +00:00			`'comment_open': '//',`
			`'comment_close': '',`
			`},`
			`'java': {`
			`'header': "// For Unicorn Engine. AUTO-GENERATED FILE, DO NOT EDIT\n\npackage unicorn;\n\npublic interface %sConst {\n",`
			`'footer': "\n}\n",`
remove UC_ prefix for go binding consts 2015-09-08 02:25:13 +00:00			`'line_format': ' public static final int UC_%s = %s;\n',`
Initial changes to support use of const_generator.py 2015-08-29 02:41:13 +00:00			`'out_file': './java/unicorn/%sConst.java',`
			`# prefixes for constant filenames of all archs - case sensitive`
			`'arm.h': 'Arm',`
			`'arm64.h': 'Arm64',`
			`'mips.h': 'Mips',`
			`'x86.h': 'X86',`
			`'sparc.h': 'Sparc',`
			`'m68k.h': 'M68k',`
Fix capitalization for unicorn.h mapping in java binding 2015-08-29 06:29:59 +00:00			`'unicorn.h': 'Unicorn',`
add Go bindings 2015-08-28 04:13:50 +00:00			`'comment_open': '//',`
			`'comment_close': '',`
			`},`
Refactored in order to be independent from the OS path separator, also added support for .net constants generation 2015-10-14 14:32:31 +00:00			`'dotnet': {`
Fixed spacing in constants files generation for .NET 2016-01-04 10:31:17 +00:00			`'header': "// For Unicorn Engine. AUTO-GENERATED FILE, DO NOT EDIT\n\nnamespace UnicornManaged.Const\n\nopen System\n\n[<AutoOpen>]\nmodule %s =\n",`
Refactored in order to be independent from the OS path separator, also added support for .net constants generation 2015-10-14 14:32:31 +00:00			`'footer': "\n",`
Fixed spacing in constants files generation for .NET 2016-01-04 10:31:17 +00:00			`'line_format': ' let UC_%s = %s\n',`
			`'out_file': os.path.join('dotnet', 'UnicornManaged', 'Const', '%s.fs'),`
Refactored in order to be independent from the OS path separator, also added support for .net constants generation 2015-10-14 14:32:31 +00:00			`# prefixes for constant filenames of all archs - case sensitive`
			`'arm.h': 'Arm',`
			`'arm64.h': 'Arm64',`
			`'mips.h': 'Mips',`
			`'x86.h': 'X86',`
			`'sparc.h': 'Sparc',`
			`'m68k.h': 'M68k',`
			`'unicorn.h': 'Common',`
Fixed spacing in dotnet comment 2016-01-04 15:55:20 +00:00			`'comment_open': ' //',`
Refactored in order to be independent from the OS path separator, also added support for .net constants generation 2015-10-14 14:32:31 +00:00			`'comment_close': '',`
			`},`
import 2015-08-21 07:04:50 +00:00			`}`

			`# markup for comments to be added to autogen files`
			`MARKUP = '//>'`

			`def gen(lang):`
			`global include, INCL_DIR`
			`templ = template[lang]`
			`for target in include:`
			`prefix = templ[target]`
python: better support for Python3 (adapted from Capstone code) 2015-10-04 02:55:29 +00:00			`outfile = open(templ['out_file'] %(prefix), 'wb') # open as binary prevents windows newlines`
			`outfile.write((templ['header'] % (prefix)).encode("utf-8"))`
improve const generator; emit unicorn.h consts 2015-08-28 14:31:38 +00:00			`if target == 'unicorn.h':`
			`prefix = ''`
Refactored in order to be independent from the OS path separator, also added support for .net constants generation 2015-10-14 14:32:31 +00:00			`lines = open(os.path.join(INCL_DIR, target)).readlines()`
import 2015-08-21 07:04:50 +00:00
improve const generator; emit unicorn.h consts 2015-08-28 14:31:38 +00:00			`previous = {}`
import 2015-08-21 07:04:50 +00:00			`count = 0`
			`for line in lines:`
			`line = line.strip()`

			`if line.startswith(MARKUP): # markup for comments`
python: better support for Python3 (adapted from Capstone code) 2015-10-04 02:55:29 +00:00			`outfile.write(("\n%s%s%s\n" %(templ['comment_open'], \`
			`line.replace(MARKUP, ''), templ['comment_close'])).encode("utf-8"))`
import 2015-08-21 07:04:50 +00:00			`continue`

			`if line == '' or line.startswith('//'):`
			`continue`

			`tmp = line.strip().split(',')`
			`for t in tmp:`
			`t = t.strip()`
			`if not t or t.startswith('//'): continue`
			`f = re.split('\s+', t)`

improve const generator; emit unicorn.h consts 2015-08-28 14:31:38 +00:00			`# parse #define UC_TARGET (num)`
			`define = False`
add composite const generator (fix #161) 2015-10-03 17:41:19 +00:00			`if f[0] == '#define' and len(f) >= 3:`
improve const generator; emit unicorn.h consts 2015-08-28 14:31:38 +00:00			`define = True`
			`f.pop(0)`
			`f.insert(1, '=')`

python: update consts after the latest change in the core 2015-08-24 05:16:22 +00:00			`if f[0].startswith("UC_" + prefix.upper()):`
improve const generator; emit unicorn.h consts 2015-08-28 14:31:38 +00:00			`if len(f) > 1 and f[1] not in ('//', '='):`
import 2015-08-21 07:04:50 +00:00			`print("Error: Unable to convert %s" % f)`
			`continue`
			`elif len(f) > 1 and f[1] == '=':`
			`rhs = ''.join(f[2:])`
			`else:`
			`rhs = str(count)`

improve const generator; emit unicorn.h consts 2015-08-28 14:31:38 +00:00			`lhs = f[0].strip()`
			`# evaluate bitshifts in constants e.g. "UC_X86 = 1 << 1"`
			`match = re.match(r'(?P<rhs>\s\d+\s<<\s\d+\s)', rhs)`
			`if match:`
add composite const generator (fix #161) 2015-10-03 17:41:19 +00:00			`rhs = str(eval(match.group(1)))`
improve const generator; emit unicorn.h consts 2015-08-28 14:31:38 +00:00			`else:`
			`# evaluate references to other constants e.g. "UC_ARM_REG_X = UC_ARM_REG_SP"`
			`match = re.match(r'^([^\d]\w+)$', rhs)`
			`if match:`
			`rhs = previous[match.group(1)]`

add composite const generator (fix #161) 2015-10-03 17:41:19 +00:00			`if not rhs.isdigit():`
			`for k, v in previous.items():`
			`rhs = re.sub(r'\b%s\b' % k, v, rhs)`
			`rhs = str(eval(rhs))`

remove UC_ prefix for go binding consts 2015-09-08 02:25:13 +00:00			`lhs_strip = re.sub(r'^UC_', '', lhs)`
improve const generator; emit unicorn.h consts 2015-08-28 14:31:38 +00:00			`count = int(rhs) + 1`
			`if (count == 1):`
python: better support for Python3 (adapted from Capstone code) 2015-10-04 02:55:29 +00:00			`outfile.write(("\n").encode("utf-8"))`
add composite const generator (fix #161) 2015-10-03 17:41:19 +00:00
python: better support for Python3 (adapted from Capstone code) 2015-10-04 02:55:29 +00:00			`outfile.write((templ['line_format'] % (lhs_strip, rhs)).encode("utf-8"))`
add composite const generator (fix #161) 2015-10-03 17:41:19 +00:00			`previous[lhs] = str(rhs)`
import 2015-08-21 07:04:50 +00:00
python: better support for Python3 (adapted from Capstone code) 2015-10-04 02:55:29 +00:00			`outfile.write((templ['footer']).encode("utf-8"))`
import 2015-08-21 07:04:50 +00:00			`outfile.close()`

			`def main():`
improve const generator; emit unicorn.h consts 2015-08-28 14:31:38 +00:00			`lang = sys.argv[1]`
			`if not lang in template:`
			`raise RuntimeError("Unsupported binding %s" % lang)`
			`gen(sys.argv[1])`
import 2015-08-21 07:04:50 +00:00
			`if __name__ == "__main__":`
			`if len(sys.argv) < 2:`
			`print("Usage:", sys.argv[0], " <python>")`
			`sys.exit(1)`
			`main()`