Damien George | 26b512e | 2015-05-30 23:11:16 +0100 | [diff] [blame] | 1 | """ |
| 2 | Process raw qstr file and output qstr data with length, hash and data bytes. |
| 3 | |
| 4 | This script works with Python 2.6, 2.7, 3.3 and 3.4. |
| 5 | """ |
| 6 | |
Dave Hylands | 0308f96 | 2014-03-10 00:07:35 -0700 | [diff] [blame] | 7 | from __future__ import print_function |
| 8 | |
Damien George | 55baff4 | 2014-01-21 21:40:13 +0000 | [diff] [blame] | 9 | import re |
Damien George | fdf0da5 | 2014-03-08 15:03:25 +0000 | [diff] [blame] | 10 | import sys |
Damien George | 1976bae | 2014-01-24 22:22:00 +0000 | [diff] [blame] | 11 | |
Damien George | 2243d68 | 2016-04-14 14:37:04 +0100 | [diff] [blame] | 12 | # Python 2/3 compatibility: |
| 13 | # - iterating through bytes is different |
| 14 | # - codepoint2name lives in a different module |
Damien George | 1976bae | 2014-01-24 22:22:00 +0000 | [diff] [blame] | 15 | import platform |
| 16 | if platform.python_version_tuple()[0] == '2': |
Damien George | f127bef | 2016-09-02 14:32:47 +1000 | [diff] [blame] | 17 | bytes_cons = lambda val, enc=None: bytearray(val) |
Damien George | 1976bae | 2014-01-24 22:22:00 +0000 | [diff] [blame] | 18 | from htmlentitydefs import codepoint2name |
| 19 | elif platform.python_version_tuple()[0] == '3': |
Damien George | f127bef | 2016-09-02 14:32:47 +1000 | [diff] [blame] | 20 | bytes_cons = bytes |
Damien George | 1976bae | 2014-01-24 22:22:00 +0000 | [diff] [blame] | 21 | from html.entities import codepoint2name |
Damien George | f127bef | 2016-09-02 14:32:47 +1000 | [diff] [blame] | 22 | # end compatibility code |
| 23 | |
Paul Sokolovsky | 73b7027 | 2014-04-13 05:28:46 +0300 | [diff] [blame] | 24 | codepoint2name[ord('-')] = 'hyphen'; |
Damien George | 55baff4 | 2014-01-21 21:40:13 +0000 | [diff] [blame] | 25 | |
Damien George | a71c83a | 2014-02-15 11:34:50 +0000 | [diff] [blame] | 26 | # add some custom names to map characters that aren't in HTML |
Damien George | 56e1f99 | 2015-01-11 14:16:24 +0000 | [diff] [blame] | 27 | codepoint2name[ord(' ')] = 'space' |
| 28 | codepoint2name[ord('\'')] = 'squot' |
| 29 | codepoint2name[ord(',')] = 'comma' |
Damien George | a71c83a | 2014-02-15 11:34:50 +0000 | [diff] [blame] | 30 | codepoint2name[ord('.')] = 'dot' |
Paul Sokolovsky | 4aee119 | 2014-02-18 00:06:37 +0200 | [diff] [blame] | 31 | codepoint2name[ord(':')] = 'colon' |
Damien George | f30b6f0 | 2016-04-13 22:12:39 +0100 | [diff] [blame] | 32 | codepoint2name[ord(';')] = 'semicolon' |
Paul Sokolovsky | 4aee119 | 2014-02-18 00:06:37 +0200 | [diff] [blame] | 33 | codepoint2name[ord('/')] = 'slash' |
Damien George | 5805111 | 2014-04-15 12:42:52 +0100 | [diff] [blame] | 34 | codepoint2name[ord('%')] = 'percent' |
Damien George | b013aea | 2014-04-15 12:50:21 +0100 | [diff] [blame] | 35 | codepoint2name[ord('#')] = 'hash' |
Damien George | 56e1f99 | 2015-01-11 14:16:24 +0000 | [diff] [blame] | 36 | codepoint2name[ord('(')] = 'paren_open' |
| 37 | codepoint2name[ord(')')] = 'paren_close' |
| 38 | codepoint2name[ord('[')] = 'bracket_open' |
| 39 | codepoint2name[ord(']')] = 'bracket_close' |
Damien George | 897fe0c | 2014-04-15 22:03:55 +0100 | [diff] [blame] | 40 | codepoint2name[ord('{')] = 'brace_open' |
| 41 | codepoint2name[ord('}')] = 'brace_close' |
Damien George | 708c073 | 2014-04-27 19:23:46 +0100 | [diff] [blame] | 42 | codepoint2name[ord('*')] = 'star' |
Damien George | 56e1f99 | 2015-01-11 14:16:24 +0000 | [diff] [blame] | 43 | codepoint2name[ord('!')] = 'bang' |
Paul Sokolovsky | f88eec0 | 2015-04-02 01:09:24 +0300 | [diff] [blame] | 44 | codepoint2name[ord('\\')] = 'backslash' |
Tony Abboud | 8d8fdcb | 2015-08-30 17:20:38 -0400 | [diff] [blame] | 45 | codepoint2name[ord('+')] = 'plus' |
Damien George | f30b6f0 | 2016-04-13 22:12:39 +0100 | [diff] [blame] | 46 | codepoint2name[ord('$')] = 'dollar' |
| 47 | codepoint2name[ord('=')] = 'equals' |
| 48 | codepoint2name[ord('?')] = 'question' |
| 49 | codepoint2name[ord('@')] = 'at_sign' |
| 50 | codepoint2name[ord('^')] = 'caret' |
| 51 | codepoint2name[ord('|')] = 'pipe' |
| 52 | codepoint2name[ord('~')] = 'tilde' |
Damien George | a71c83a | 2014-02-15 11:34:50 +0000 | [diff] [blame] | 53 | |
Damien George | 55baff4 | 2014-01-21 21:40:13 +0000 | [diff] [blame] | 54 | # this must match the equivalent function in qstr.c |
Damien George | c3bd941 | 2015-07-20 11:03:13 +0000 | [diff] [blame] | 55 | def compute_hash(qstr, bytes_hash): |
Damien George | 6e628c4 | 2014-03-25 15:27:15 +0000 | [diff] [blame] | 56 | hash = 5381 |
Damien George | f127bef | 2016-09-02 14:32:47 +1000 | [diff] [blame] | 57 | for b in qstr: |
| 58 | hash = (hash * 33) ^ b |
Chris Angelico | de09caa | 2014-06-07 06:55:27 +1000 | [diff] [blame] | 59 | # Make sure that valid hash is never zero, zero means "hash not computed" |
Damien George | c3bd941 | 2015-07-20 11:03:13 +0000 | [diff] [blame] | 60 | return (hash & ((1 << (8 * bytes_hash)) - 1)) or 1 |
Damien George | 55baff4 | 2014-01-21 21:40:13 +0000 | [diff] [blame] | 61 | |
Damien George | 594fa73 | 2016-01-31 12:59:59 +0000 | [diff] [blame] | 62 | def qstr_escape(qst): |
Damien George | f30b6f0 | 2016-04-13 22:12:39 +0100 | [diff] [blame] | 63 | def esc_char(m): |
| 64 | c = ord(m.group(0)) |
| 65 | try: |
| 66 | name = codepoint2name[c] |
| 67 | except KeyError: |
| 68 | name = '0x%02x' % c |
| 69 | return "_" + name + '_' |
| 70 | return re.sub(r'[^A-Za-z0-9_]', esc_char, qst) |
Damien George | 594fa73 | 2016-01-31 12:59:59 +0000 | [diff] [blame] | 71 | |
| 72 | def parse_input_headers(infiles): |
Damien George | 55baff4 | 2014-01-21 21:40:13 +0000 | [diff] [blame] | 73 | # read the qstrs in from the input files |
Damien George | 6942f80 | 2015-01-11 17:52:45 +0000 | [diff] [blame] | 74 | qcfgs = {} |
Paul Sokolovsky | ab5d082 | 2014-01-24 00:22:00 +0200 | [diff] [blame] | 75 | qstrs = {} |
Damien George | 55baff4 | 2014-01-21 21:40:13 +0000 | [diff] [blame] | 76 | for infile in infiles: |
| 77 | with open(infile, 'rt') as f: |
Damien George | 55baff4 | 2014-01-21 21:40:13 +0000 | [diff] [blame] | 78 | for line in f: |
Damien George | 6942f80 | 2015-01-11 17:52:45 +0000 | [diff] [blame] | 79 | line = line.strip() |
| 80 | |
| 81 | # is this a config line? |
| 82 | match = re.match(r'^QCFG\((.+), (.+)\)', line) |
| 83 | if match: |
| 84 | value = match.group(2) |
| 85 | if value[0] == '(' and value[-1] == ')': |
| 86 | # strip parenthesis from config value |
| 87 | value = value[1:-1] |
| 88 | qcfgs[match.group(1)] = value |
| 89 | continue |
| 90 | |
stijn | 1dc7f04 | 2014-05-02 21:10:47 +0200 | [diff] [blame] | 91 | # is this a QSTR line? |
Damien George | 6942f80 | 2015-01-11 17:52:45 +0000 | [diff] [blame] | 92 | match = re.match(r'^Q\((.*)\)$', line) |
stijn | 1dc7f04 | 2014-05-02 21:10:47 +0200 | [diff] [blame] | 93 | if not match: |
Damien George | 5bb7d99 | 2014-04-13 13:16:51 +0100 | [diff] [blame] | 94 | continue |
Damien George | 55baff4 | 2014-01-21 21:40:13 +0000 | [diff] [blame] | 95 | |
| 96 | # get the qstr value |
| 97 | qstr = match.group(1) |
Damien George | a649d72 | 2016-04-14 15:22:36 +0100 | [diff] [blame] | 98 | |
| 99 | # special case to specify control characters |
| 100 | if qstr == '\\n': |
| 101 | qstr = '\n' |
| 102 | |
| 103 | # work out the corresponding qstr name |
Damien George | 594fa73 | 2016-01-31 12:59:59 +0000 | [diff] [blame] | 104 | ident = qstr_escape(qstr) |
Damien George | 55baff4 | 2014-01-21 21:40:13 +0000 | [diff] [blame] | 105 | |
| 106 | # don't add duplicates |
Paul Sokolovsky | ab5d082 | 2014-01-24 00:22:00 +0200 | [diff] [blame] | 107 | if ident in qstrs: |
Damien George | 55baff4 | 2014-01-21 21:40:13 +0000 | [diff] [blame] | 108 | continue |
| 109 | |
Damien George | 1976bae | 2014-01-24 22:22:00 +0000 | [diff] [blame] | 110 | # add the qstr to the list, with order number to retain original order in file |
Paul Sokolovsky | 6ea0e92 | 2014-04-11 20:36:08 +0300 | [diff] [blame] | 111 | qstrs[ident] = (len(qstrs), ident, qstr) |
Damien George | 55baff4 | 2014-01-21 21:40:13 +0000 | [diff] [blame] | 112 | |
Paul Sokolovsky | 53ca6ae | 2015-10-11 11:09:57 +0300 | [diff] [blame] | 113 | if not qcfgs: |
| 114 | sys.stderr.write("ERROR: Empty preprocessor output - check for errors above\n") |
| 115 | sys.exit(1) |
| 116 | |
Damien George | 594fa73 | 2016-01-31 12:59:59 +0000 | [diff] [blame] | 117 | return qcfgs, qstrs |
| 118 | |
| 119 | def make_bytes(cfg_bytes_len, cfg_bytes_hash, qstr): |
Damien George | f127bef | 2016-09-02 14:32:47 +1000 | [diff] [blame] | 120 | qbytes = bytes_cons(qstr, 'utf8') |
| 121 | qlen = len(qbytes) |
| 122 | qhash = compute_hash(qbytes, cfg_bytes_hash) |
Damien George | 202d5ac | 2016-05-23 15:18:55 +0100 | [diff] [blame] | 123 | if all(32 <= ord(c) <= 126 and c != '\\' and c != '"' for c in qstr): |
Damien George | 49bb04e | 2016-04-14 14:20:25 +0100 | [diff] [blame] | 124 | # qstr is all printable ASCII so render it as-is (for easier debugging) |
Damien George | 49bb04e | 2016-04-14 14:20:25 +0100 | [diff] [blame] | 125 | qdata = qstr |
| 126 | else: |
| 127 | # qstr contains non-printable codes so render entire thing as hex pairs |
Damien George | f127bef | 2016-09-02 14:32:47 +1000 | [diff] [blame] | 128 | qdata = ''.join(('\\x%02x' % b) for b in qbytes) |
Damien George | 594fa73 | 2016-01-31 12:59:59 +0000 | [diff] [blame] | 129 | if qlen >= (1 << (8 * cfg_bytes_len)): |
| 130 | print('qstr is too long:', qstr) |
| 131 | assert False |
| 132 | qlen_str = ('\\x%02x' * cfg_bytes_len) % tuple(((qlen >> (8 * i)) & 0xff) for i in range(cfg_bytes_len)) |
| 133 | qhash_str = ('\\x%02x' * cfg_bytes_hash) % tuple(((qhash >> (8 * i)) & 0xff) for i in range(cfg_bytes_hash)) |
| 134 | return '(const byte*)"%s%s" "%s"' % (qhash_str, qlen_str, qdata) |
| 135 | |
| 136 | def print_qstr_data(qcfgs, qstrs): |
Damien George | 95836f8 | 2015-01-11 22:27:30 +0000 | [diff] [blame] | 137 | # get config variables |
| 138 | cfg_bytes_len = int(qcfgs['BYTES_IN_LEN']) |
Damien George | c3bd941 | 2015-07-20 11:03:13 +0000 | [diff] [blame] | 139 | cfg_bytes_hash = int(qcfgs['BYTES_IN_HASH']) |
Damien George | 95836f8 | 2015-01-11 22:27:30 +0000 | [diff] [blame] | 140 | |
Paul Sokolovsky | 3a2fb20 | 2015-07-31 14:57:36 +0300 | [diff] [blame] | 141 | # print out the starter of the generated C header file |
Damien George | 55baff4 | 2014-01-21 21:40:13 +0000 | [diff] [blame] | 142 | print('// This file was automatically generated by makeqstrdata.py') |
Dave Hylands | 7a996b1 | 2014-01-21 15:28:27 -0800 | [diff] [blame] | 143 | print('') |
Damien George | 95836f8 | 2015-01-11 22:27:30 +0000 | [diff] [blame] | 144 | |
Damien George | 6942f80 | 2015-01-11 17:52:45 +0000 | [diff] [blame] | 145 | # add NULL qstr with no hash or data |
Damien George | c3bd941 | 2015-07-20 11:03:13 +0000 | [diff] [blame] | 146 | print('QDEF(MP_QSTR_NULL, (const byte*)"%s%s" "")' % ('\\x00' * cfg_bytes_hash, '\\x00' * cfg_bytes_len)) |
Damien George | 95836f8 | 2015-01-11 22:27:30 +0000 | [diff] [blame] | 147 | |
| 148 | # go through each qstr and print it out |
Paul Sokolovsky | 6ea0e92 | 2014-04-11 20:36:08 +0300 | [diff] [blame] | 149 | for order, ident, qstr in sorted(qstrs.values(), key=lambda x: x[0]): |
Damien George | 594fa73 | 2016-01-31 12:59:59 +0000 | [diff] [blame] | 150 | qbytes = make_bytes(cfg_bytes_len, cfg_bytes_hash, qstr) |
| 151 | print('QDEF(MP_QSTR_%s, %s)' % (ident, qbytes)) |
| 152 | |
| 153 | def do_work(infiles): |
| 154 | qcfgs, qstrs = parse_input_headers(infiles) |
| 155 | print_qstr_data(qcfgs, qstrs) |
Damien George | 55baff4 | 2014-01-21 21:40:13 +0000 | [diff] [blame] | 156 | |
Damien George | 55baff4 | 2014-01-21 21:40:13 +0000 | [diff] [blame] | 157 | if __name__ == "__main__": |
Damien George | 26b512e | 2015-05-30 23:11:16 +0100 | [diff] [blame] | 158 | do_work(sys.argv[1:]) |