Dave Hylands | 0308f96 | 2014-03-10 00:07:35 -0700 | [diff] [blame] | 1 | from __future__ import print_function |
| 2 | |
Damien George | 55baff4 | 2014-01-21 21:40:13 +0000 | [diff] [blame] | 3 | import argparse |
| 4 | import re |
Damien George | fdf0da5 | 2014-03-08 15:03:25 +0000 | [diff] [blame] | 5 | import sys |
Damien George | 1976bae | 2014-01-24 22:22:00 +0000 | [diff] [blame] | 6 | |
| 7 | # codepoint2name is different in Python 2 to Python 3 |
| 8 | import platform |
| 9 | if platform.python_version_tuple()[0] == '2': |
| 10 | from htmlentitydefs import codepoint2name |
| 11 | elif platform.python_version_tuple()[0] == '3': |
| 12 | from html.entities import codepoint2name |
Paul Sokolovsky | 73b7027 | 2014-04-13 05:28:46 +0300 | [diff] [blame] | 13 | codepoint2name[ord('-')] = 'hyphen'; |
Damien George | 55baff4 | 2014-01-21 21:40:13 +0000 | [diff] [blame] | 14 | |
Damien George | a71c83a | 2014-02-15 11:34:50 +0000 | [diff] [blame] | 15 | # add some custom names to map characters that aren't in HTML |
Damien George | 56e1f99 | 2015-01-11 14:16:24 +0000 | [diff] [blame] | 16 | codepoint2name[ord(' ')] = 'space' |
| 17 | codepoint2name[ord('\'')] = 'squot' |
| 18 | codepoint2name[ord(',')] = 'comma' |
Damien George | a71c83a | 2014-02-15 11:34:50 +0000 | [diff] [blame] | 19 | codepoint2name[ord('.')] = 'dot' |
Paul Sokolovsky | 4aee119 | 2014-02-18 00:06:37 +0200 | [diff] [blame] | 20 | codepoint2name[ord(':')] = 'colon' |
| 21 | codepoint2name[ord('/')] = 'slash' |
Damien George | 5805111 | 2014-04-15 12:42:52 +0100 | [diff] [blame] | 22 | codepoint2name[ord('%')] = 'percent' |
Damien George | b013aea | 2014-04-15 12:50:21 +0100 | [diff] [blame] | 23 | codepoint2name[ord('#')] = 'hash' |
Damien George | 56e1f99 | 2015-01-11 14:16:24 +0000 | [diff] [blame] | 24 | codepoint2name[ord('(')] = 'paren_open' |
| 25 | codepoint2name[ord(')')] = 'paren_close' |
| 26 | codepoint2name[ord('[')] = 'bracket_open' |
| 27 | codepoint2name[ord(']')] = 'bracket_close' |
Damien George | 897fe0c | 2014-04-15 22:03:55 +0100 | [diff] [blame] | 28 | codepoint2name[ord('{')] = 'brace_open' |
| 29 | codepoint2name[ord('}')] = 'brace_close' |
Damien George | 708c073 | 2014-04-27 19:23:46 +0100 | [diff] [blame] | 30 | codepoint2name[ord('*')] = 'star' |
Damien George | 56e1f99 | 2015-01-11 14:16:24 +0000 | [diff] [blame] | 31 | codepoint2name[ord('!')] = 'bang' |
Damien George | a71c83a | 2014-02-15 11:34:50 +0000 | [diff] [blame] | 32 | |
Damien George | 55baff4 | 2014-01-21 21:40:13 +0000 | [diff] [blame] | 33 | # this must match the equivalent function in qstr.c |
| 34 | def compute_hash(qstr): |
Damien George | 6e628c4 | 2014-03-25 15:27:15 +0000 | [diff] [blame] | 35 | hash = 5381 |
Damien George | 55baff4 | 2014-01-21 21:40:13 +0000 | [diff] [blame] | 36 | for char in qstr: |
Damien George | 6e628c4 | 2014-03-25 15:27:15 +0000 | [diff] [blame] | 37 | hash = (hash * 33) ^ ord(char) |
Chris Angelico | de09caa | 2014-06-07 06:55:27 +1000 | [diff] [blame] | 38 | # Make sure that valid hash is never zero, zero means "hash not computed" |
| 39 | return (hash & 0xffff) or 1 |
Damien George | 55baff4 | 2014-01-21 21:40:13 +0000 | [diff] [blame] | 40 | |
| 41 | def do_work(infiles): |
| 42 | # read the qstrs in from the input files |
Damien George | 6942f80 | 2015-01-11 17:52:45 +0000 | [diff] [blame] | 43 | qcfgs = {} |
Paul Sokolovsky | ab5d082 | 2014-01-24 00:22:00 +0200 | [diff] [blame] | 44 | qstrs = {} |
Damien George | 55baff4 | 2014-01-21 21:40:13 +0000 | [diff] [blame] | 45 | for infile in infiles: |
| 46 | with open(infile, 'rt') as f: |
Damien George | 55baff4 | 2014-01-21 21:40:13 +0000 | [diff] [blame] | 47 | for line in f: |
Damien George | 6942f80 | 2015-01-11 17:52:45 +0000 | [diff] [blame] | 48 | line = line.strip() |
| 49 | |
| 50 | # is this a config line? |
| 51 | match = re.match(r'^QCFG\((.+), (.+)\)', line) |
| 52 | if match: |
| 53 | value = match.group(2) |
| 54 | if value[0] == '(' and value[-1] == ')': |
| 55 | # strip parenthesis from config value |
| 56 | value = value[1:-1] |
| 57 | qcfgs[match.group(1)] = value |
| 58 | continue |
| 59 | |
stijn | 1dc7f04 | 2014-05-02 21:10:47 +0200 | [diff] [blame] | 60 | # is this a QSTR line? |
Damien George | 6942f80 | 2015-01-11 17:52:45 +0000 | [diff] [blame] | 61 | match = re.match(r'^Q\((.*)\)$', line) |
stijn | 1dc7f04 | 2014-05-02 21:10:47 +0200 | [diff] [blame] | 62 | if not match: |
Damien George | 5bb7d99 | 2014-04-13 13:16:51 +0100 | [diff] [blame] | 63 | continue |
Damien George | 55baff4 | 2014-01-21 21:40:13 +0000 | [diff] [blame] | 64 | |
| 65 | # get the qstr value |
| 66 | qstr = match.group(1) |
Paul Sokolovsky | ab5d082 | 2014-01-24 00:22:00 +0200 | [diff] [blame] | 67 | ident = re.sub(r'[^A-Za-z0-9_]', lambda s: "_" + codepoint2name[ord(s.group(0))] + "_", qstr) |
Damien George | 55baff4 | 2014-01-21 21:40:13 +0000 | [diff] [blame] | 68 | |
| 69 | # don't add duplicates |
Paul Sokolovsky | ab5d082 | 2014-01-24 00:22:00 +0200 | [diff] [blame] | 70 | if ident in qstrs: |
Damien George | 55baff4 | 2014-01-21 21:40:13 +0000 | [diff] [blame] | 71 | continue |
| 72 | |
Damien George | 1976bae | 2014-01-24 22:22:00 +0000 | [diff] [blame] | 73 | # add the qstr to the list, with order number to retain original order in file |
Paul Sokolovsky | 6ea0e92 | 2014-04-11 20:36:08 +0300 | [diff] [blame] | 74 | qstrs[ident] = (len(qstrs), ident, qstr) |
Damien George | 55baff4 | 2014-01-21 21:40:13 +0000 | [diff] [blame] | 75 | |
Damien George | 95836f8 | 2015-01-11 22:27:30 +0000 | [diff] [blame] | 76 | # get config variables |
| 77 | cfg_bytes_len = int(qcfgs['BYTES_IN_LEN']) |
| 78 | cfg_max_len = 1 << (8 * cfg_bytes_len) |
| 79 | |
| 80 | # print out the starte of the generated C header file |
Damien George | 55baff4 | 2014-01-21 21:40:13 +0000 | [diff] [blame] | 81 | print('// This file was automatically generated by makeqstrdata.py') |
Dave Hylands | 7a996b1 | 2014-01-21 15:28:27 -0800 | [diff] [blame] | 82 | print('') |
Damien George | 95836f8 | 2015-01-11 22:27:30 +0000 | [diff] [blame] | 83 | |
Damien George | 6942f80 | 2015-01-11 17:52:45 +0000 | [diff] [blame] | 84 | # add NULL qstr with no hash or data |
Damien George | 95836f8 | 2015-01-11 22:27:30 +0000 | [diff] [blame] | 85 | print('QDEF(MP_QSTR_NULL, (const byte*)"\\x00\\x00%s" "")' % ('\\x00' * cfg_bytes_len)) |
| 86 | |
| 87 | # go through each qstr and print it out |
Paul Sokolovsky | 6ea0e92 | 2014-04-11 20:36:08 +0300 | [diff] [blame] | 88 | for order, ident, qstr in sorted(qstrs.values(), key=lambda x: x[0]): |
Damien George | 55baff4 | 2014-01-21 21:40:13 +0000 | [diff] [blame] | 89 | qhash = compute_hash(qstr) |
| 90 | qlen = len(qstr) |
Damien George | 56e1f99 | 2015-01-11 14:16:24 +0000 | [diff] [blame] | 91 | qdata = qstr.replace('"', '\\"') |
Damien George | 95836f8 | 2015-01-11 22:27:30 +0000 | [diff] [blame] | 92 | if qlen >= cfg_max_len: |
| 93 | print('qstr is too long:', qstr) |
| 94 | assert False |
Damien George | 99ab64f | 2015-01-11 22:40:38 +0000 | [diff] [blame] | 95 | qlen_str = ('\\x%02x' * cfg_bytes_len) % tuple(((qlen >> (8 * i)) & 0xff) for i in range(cfg_bytes_len)) |
Damien George | 95836f8 | 2015-01-11 22:27:30 +0000 | [diff] [blame] | 96 | print('QDEF(MP_QSTR_%s, (const byte*)"\\x%02x\\x%02x%s" "%s")' % (ident, qhash & 0xff, (qhash >> 8) & 0xff, qlen_str, qdata)) |
Damien George | 55baff4 | 2014-01-21 21:40:13 +0000 | [diff] [blame] | 97 | |
| 98 | return True |
| 99 | |
| 100 | def main(): |
| 101 | arg_parser = argparse.ArgumentParser(description='Process raw qstr file and output qstr data with length, hash and data bytes') |
| 102 | arg_parser.add_argument('files', nargs='+', help='input file(s)') |
| 103 | args = arg_parser.parse_args() |
| 104 | |
| 105 | result = do_work(args.files) |
| 106 | if not result: |
Damien George | fdf0da5 | 2014-03-08 15:03:25 +0000 | [diff] [blame] | 107 | print('exiting with error code', file=sys.stderr) |
Damien George | 55baff4 | 2014-01-21 21:40:13 +0000 | [diff] [blame] | 108 | exit(1) |
| 109 | |
| 110 | if __name__ == "__main__": |
| 111 | main() |