Dave Hylands | 0308f96 | 2014-03-10 00:07:35 -0700 | [diff] [blame] | 1 | from __future__ import print_function |
| 2 | |
Damien George | 55baff4 | 2014-01-21 21:40:13 +0000 | [diff] [blame] | 3 | import argparse |
| 4 | import re |
Damien George | fdf0da5 | 2014-03-08 15:03:25 +0000 | [diff] [blame] | 5 | import sys |
Damien George | 1976bae | 2014-01-24 22:22:00 +0000 | [diff] [blame] | 6 | |
| 7 | # codepoint2name is different in Python 2 to Python 3 |
| 8 | import platform |
| 9 | if platform.python_version_tuple()[0] == '2': |
| 10 | from htmlentitydefs import codepoint2name |
| 11 | elif platform.python_version_tuple()[0] == '3': |
| 12 | from html.entities import codepoint2name |
Paul Sokolovsky | 73b7027 | 2014-04-13 05:28:46 +0300 | [diff] [blame] | 13 | codepoint2name[ord('-')] = 'hyphen'; |
Damien George | 55baff4 | 2014-01-21 21:40:13 +0000 | [diff] [blame] | 14 | |
Damien George | a71c83a | 2014-02-15 11:34:50 +0000 | [diff] [blame] | 15 | # add some custom names to map characters that aren't in HTML |
| 16 | codepoint2name[ord('.')] = 'dot' |
Paul Sokolovsky | 4aee119 | 2014-02-18 00:06:37 +0200 | [diff] [blame] | 17 | codepoint2name[ord(':')] = 'colon' |
| 18 | codepoint2name[ord('/')] = 'slash' |
Damien George | a71c83a | 2014-02-15 11:34:50 +0000 | [diff] [blame] | 19 | |
Damien George | 55baff4 | 2014-01-21 21:40:13 +0000 | [diff] [blame] | 20 | # this must match the equivalent function in qstr.c |
| 21 | def compute_hash(qstr): |
Damien George | 6e628c4 | 2014-03-25 15:27:15 +0000 | [diff] [blame] | 22 | hash = 5381 |
Damien George | 55baff4 | 2014-01-21 21:40:13 +0000 | [diff] [blame] | 23 | for char in qstr: |
Damien George | 6e628c4 | 2014-03-25 15:27:15 +0000 | [diff] [blame] | 24 | hash = (hash * 33) ^ ord(char) |
Damien George | 55baff4 | 2014-01-21 21:40:13 +0000 | [diff] [blame] | 25 | return hash & 0xffff |
| 26 | |
| 27 | def do_work(infiles): |
| 28 | # read the qstrs in from the input files |
Paul Sokolovsky | ab5d082 | 2014-01-24 00:22:00 +0200 | [diff] [blame] | 29 | qstrs = {} |
Paul Sokolovsky | a925cb5 | 2014-04-11 20:50:15 +0300 | [diff] [blame] | 30 | cpp_header_blocks = 3 |
Damien George | 55baff4 | 2014-01-21 21:40:13 +0000 | [diff] [blame] | 31 | for infile in infiles: |
| 32 | with open(infile, 'rt') as f: |
| 33 | line_number = 0 |
| 34 | for line in f: |
| 35 | line_number += 1 |
| 36 | line = line.strip() |
| 37 | |
| 38 | # ignore blank lines and comments |
| 39 | if len(line) == 0 or line.startswith('//'): |
| 40 | continue |
| 41 | |
Paul Sokolovsky | a925cb5 | 2014-04-11 20:50:15 +0300 | [diff] [blame] | 42 | # We'll have 3 line-number lines for py/qstrdefs.h - initial, leaving it to |
| 43 | # go into other headers, and returning to it. |
| 44 | if line.startswith('# ') and 'py/qstrdefs.h' in line: |
| 45 | cpp_header_blocks -= 1 |
| 46 | continue |
| 47 | if cpp_header_blocks != 0: |
| 48 | continue |
| 49 | |
Damien George | 55baff4 | 2014-01-21 21:40:13 +0000 | [diff] [blame] | 50 | # verify line is of the correct form |
Paul Sokolovsky | ab5d082 | 2014-01-24 00:22:00 +0200 | [diff] [blame] | 51 | match = re.match(r'Q\((.+)\)$', line) |
Damien George | 55baff4 | 2014-01-21 21:40:13 +0000 | [diff] [blame] | 52 | if not match: |
Damien George | fdf0da5 | 2014-03-08 15:03:25 +0000 | [diff] [blame] | 53 | print('({}:{}) bad qstr format, got {}'.format(infile, line_number, line), file=sys.stderr) |
Damien George | 55baff4 | 2014-01-21 21:40:13 +0000 | [diff] [blame] | 54 | return False |
| 55 | |
| 56 | # get the qstr value |
| 57 | qstr = match.group(1) |
Paul Sokolovsky | ab5d082 | 2014-01-24 00:22:00 +0200 | [diff] [blame] | 58 | ident = re.sub(r'[^A-Za-z0-9_]', lambda s: "_" + codepoint2name[ord(s.group(0))] + "_", qstr) |
Damien George | 55baff4 | 2014-01-21 21:40:13 +0000 | [diff] [blame] | 59 | |
| 60 | # don't add duplicates |
Paul Sokolovsky | ab5d082 | 2014-01-24 00:22:00 +0200 | [diff] [blame] | 61 | if ident in qstrs: |
Damien George | 55baff4 | 2014-01-21 21:40:13 +0000 | [diff] [blame] | 62 | continue |
| 63 | |
Damien George | 1976bae | 2014-01-24 22:22:00 +0000 | [diff] [blame] | 64 | # add the qstr to the list, with order number to retain original order in file |
Paul Sokolovsky | 6ea0e92 | 2014-04-11 20:36:08 +0300 | [diff] [blame] | 65 | qstrs[ident] = (len(qstrs), ident, qstr) |
Damien George | 55baff4 | 2014-01-21 21:40:13 +0000 | [diff] [blame] | 66 | |
| 67 | # process the qstrs, printing out the generated C header file |
| 68 | print('// This file was automatically generated by makeqstrdata.py') |
Dave Hylands | 7a996b1 | 2014-01-21 15:28:27 -0800 | [diff] [blame] | 69 | print('') |
Paul Sokolovsky | 6ea0e92 | 2014-04-11 20:36:08 +0300 | [diff] [blame] | 70 | for order, ident, qstr in sorted(qstrs.values(), key=lambda x: x[0]): |
Damien George | 55baff4 | 2014-01-21 21:40:13 +0000 | [diff] [blame] | 71 | qhash = compute_hash(qstr) |
| 72 | qlen = len(qstr) |
Paul Sokolovsky | ab5d082 | 2014-01-24 00:22:00 +0200 | [diff] [blame] | 73 | print('Q({}, (const byte*)"\\x{:02x}\\x{:02x}\\x{:02x}\\x{:02x}" "{}")'.format(ident, qhash & 0xff, (qhash >> 8) & 0xff, qlen & 0xff, (qlen >> 8) & 0xff, qstr)) |
Damien George | 55baff4 | 2014-01-21 21:40:13 +0000 | [diff] [blame] | 74 | |
| 75 | return True |
| 76 | |
| 77 | def main(): |
| 78 | arg_parser = argparse.ArgumentParser(description='Process raw qstr file and output qstr data with length, hash and data bytes') |
| 79 | arg_parser.add_argument('files', nargs='+', help='input file(s)') |
| 80 | args = arg_parser.parse_args() |
| 81 | |
| 82 | result = do_work(args.files) |
| 83 | if not result: |
Damien George | fdf0da5 | 2014-03-08 15:03:25 +0000 | [diff] [blame] | 84 | print('exiting with error code', file=sys.stderr) |
Damien George | 55baff4 | 2014-01-21 21:40:13 +0000 | [diff] [blame] | 85 | exit(1) |
| 86 | |
| 87 | if __name__ == "__main__": |
| 88 | main() |