| """ |
| Process raw qstr file and output qstr data with length, hash and data bytes. |
| |
| This script works with Python 2.6, 2.7, 3.3 and 3.4. |
| """ |
| |
| from __future__ import print_function |
| |
| import re |
| import sys |
| |
| # codepoint2name is different in Python 2 to Python 3 |
| import platform |
| if platform.python_version_tuple()[0] == '2': |
| from htmlentitydefs import codepoint2name |
| elif platform.python_version_tuple()[0] == '3': |
| from html.entities import codepoint2name |
| codepoint2name[ord('-')] = 'hyphen'; |
| |
| # add some custom names to map characters that aren't in HTML |
| codepoint2name[ord(' ')] = 'space' |
| codepoint2name[ord('\'')] = 'squot' |
| codepoint2name[ord(',')] = 'comma' |
| codepoint2name[ord('.')] = 'dot' |
| codepoint2name[ord(':')] = 'colon' |
| codepoint2name[ord('/')] = 'slash' |
| codepoint2name[ord('%')] = 'percent' |
| codepoint2name[ord('#')] = 'hash' |
| codepoint2name[ord('(')] = 'paren_open' |
| codepoint2name[ord(')')] = 'paren_close' |
| codepoint2name[ord('[')] = 'bracket_open' |
| codepoint2name[ord(']')] = 'bracket_close' |
| codepoint2name[ord('{')] = 'brace_open' |
| codepoint2name[ord('}')] = 'brace_close' |
| codepoint2name[ord('*')] = 'star' |
| codepoint2name[ord('!')] = 'bang' |
| codepoint2name[ord('\\')] = 'backslash' |
| codepoint2name[ord('+')] = 'plus' |
| |
| # this must match the equivalent function in qstr.c |
| def compute_hash(qstr, bytes_hash): |
| hash = 5381 |
| for char in qstr: |
| hash = (hash * 33) ^ ord(char) |
| # Make sure that valid hash is never zero, zero means "hash not computed" |
| return (hash & ((1 << (8 * bytes_hash)) - 1)) or 1 |
| |
| def do_work(infiles): |
| # read the qstrs in from the input files |
| qcfgs = {} |
| qstrs = {} |
| for infile in infiles: |
| with open(infile, 'rt') as f: |
| for line in f: |
| line = line.strip() |
| |
| # is this a config line? |
| match = re.match(r'^QCFG\((.+), (.+)\)', line) |
| if match: |
| value = match.group(2) |
| if value[0] == '(' and value[-1] == ')': |
| # strip parenthesis from config value |
| value = value[1:-1] |
| qcfgs[match.group(1)] = value |
| continue |
| |
| # is this a QSTR line? |
| match = re.match(r'^Q\((.*)\)$', line) |
| if not match: |
| continue |
| |
| # get the qstr value |
| qstr = match.group(1) |
| ident = re.sub(r'[^A-Za-z0-9_]', lambda s: "_" + codepoint2name[ord(s.group(0))] + "_", qstr) |
| |
| # don't add duplicates |
| if ident in qstrs: |
| continue |
| |
| # add the qstr to the list, with order number to retain original order in file |
| qstrs[ident] = (len(qstrs), ident, qstr) |
| |
| if not qcfgs: |
| sys.stderr.write("ERROR: Empty preprocessor output - check for errors above\n") |
| sys.exit(1) |
| |
| # get config variables |
| cfg_bytes_len = int(qcfgs['BYTES_IN_LEN']) |
| cfg_bytes_hash = int(qcfgs['BYTES_IN_HASH']) |
| cfg_max_len = 1 << (8 * cfg_bytes_len) |
| |
| # print out the starter of the generated C header file |
| print('// This file was automatically generated by makeqstrdata.py') |
| print('') |
| |
| # add NULL qstr with no hash or data |
| print('QDEF(MP_QSTR_NULL, (const byte*)"%s%s" "")' % ('\\x00' * cfg_bytes_hash, '\\x00' * cfg_bytes_len)) |
| |
| # go through each qstr and print it out |
| for order, ident, qstr in sorted(qstrs.values(), key=lambda x: x[0]): |
| qhash = compute_hash(qstr, cfg_bytes_hash) |
| # Calculate len of str, taking escapes into account |
| qlen = len(qstr.replace("\\\\", "-").replace("\\", "")) |
| qdata = qstr.replace('"', '\\"') |
| if qlen >= cfg_max_len: |
| print('qstr is too long:', qstr) |
| assert False |
| qlen_str = ('\\x%02x' * cfg_bytes_len) % tuple(((qlen >> (8 * i)) & 0xff) for i in range(cfg_bytes_len)) |
| qhash_str = ('\\x%02x' * cfg_bytes_hash) % tuple(((qhash >> (8 * i)) & 0xff) for i in range(cfg_bytes_hash)) |
| print('QDEF(MP_QSTR_%s, (const byte*)"%s%s" "%s")' % (ident, qhash_str, qlen_str, qdata)) |
| |
| if __name__ == "__main__": |
| do_work(sys.argv[1:]) |