blob: afde7720541b9eb09864427a208d5dc9ea7d1a9b [file] [log] [blame]
Dave Hylands0308f962014-03-10 00:07:35 -07001from __future__ import print_function
2
Damien George55baff42014-01-21 21:40:13 +00003import argparse
4import re
Damien Georgefdf0da52014-03-08 15:03:25 +00005import sys
Damien George1976bae2014-01-24 22:22:00 +00006
7# codepoint2name is different in Python 2 to Python 3
8import platform
9if platform.python_version_tuple()[0] == '2':
10 from htmlentitydefs import codepoint2name
11elif platform.python_version_tuple()[0] == '3':
12 from html.entities import codepoint2name
Paul Sokolovsky73b70272014-04-13 05:28:46 +030013codepoint2name[ord('-')] = 'hyphen';
Damien George55baff42014-01-21 21:40:13 +000014
Damien Georgea71c83a2014-02-15 11:34:50 +000015# add some custom names to map characters that aren't in HTML
16codepoint2name[ord('.')] = 'dot'
Paul Sokolovsky4aee1192014-02-18 00:06:37 +020017codepoint2name[ord(':')] = 'colon'
18codepoint2name[ord('/')] = 'slash'
Damien George58051112014-04-15 12:42:52 +010019codepoint2name[ord('%')] = 'percent'
Damien Georgeb013aea2014-04-15 12:50:21 +010020codepoint2name[ord('#')] = 'hash'
Damien George897fe0c2014-04-15 22:03:55 +010021codepoint2name[ord('{')] = 'brace_open'
22codepoint2name[ord('}')] = 'brace_close'
Damien Georgea71c83a2014-02-15 11:34:50 +000023
Damien George55baff42014-01-21 21:40:13 +000024# this must match the equivalent function in qstr.c
25def compute_hash(qstr):
Damien George6e628c42014-03-25 15:27:15 +000026 hash = 5381
Damien George55baff42014-01-21 21:40:13 +000027 for char in qstr:
Damien George6e628c42014-03-25 15:27:15 +000028 hash = (hash * 33) ^ ord(char)
Damien George55baff42014-01-21 21:40:13 +000029 return hash & 0xffff
30
Damien George5bb7d992014-04-13 13:16:51 +010031# given a list of (name,regex) pairs, find the first one that matches the given line
Damien George36837892014-04-14 23:38:37 +010032def re_match_first(regexs, line):
Damien George5bb7d992014-04-13 13:16:51 +010033 for name, regex in regexs:
34 match = re.match(regex, line)
35 if match:
36 return name, match
37 return None, None
38
Damien George36837892014-04-14 23:38:37 +010039# regexs to recognise lines that the CPP emits
40# use a list so that matching order is honoured
41cpp_regexs = [
42 ('qstr', r'Q\((.+)\)$'),
43 ('cdecl', r'(typedef|extern) [A-Za-z0-9_* ]+;$')
44]
45
Damien George55baff42014-01-21 21:40:13 +000046def do_work(infiles):
47 # read the qstrs in from the input files
Paul Sokolovskyab5d0822014-01-24 00:22:00 +020048 qstrs = {}
Damien George55baff42014-01-21 21:40:13 +000049 for infile in infiles:
50 with open(infile, 'rt') as f:
51 line_number = 0
52 for line in f:
53 line_number += 1
54 line = line.strip()
55
Damien George5bb7d992014-04-13 13:16:51 +010056 # ignore blank lines, comments and preprocessor directives
57 if len(line) == 0 or line.startswith('//') or line.startswith('#'):
Damien George55baff42014-01-21 21:40:13 +000058 continue
59
Damien George5bb7d992014-04-13 13:16:51 +010060 # work out what kind of line it is
Damien George36837892014-04-14 23:38:37 +010061 match_kind, match = re_match_first(cpp_regexs, line)
Damien George5bb7d992014-04-13 13:16:51 +010062 if match_kind is None:
63 # unknown line format
Damien Georgefdf0da52014-03-08 15:03:25 +000064 print('({}:{}) bad qstr format, got {}'.format(infile, line_number, line), file=sys.stderr)
Damien George55baff42014-01-21 21:40:13 +000065 return False
Damien George5bb7d992014-04-13 13:16:51 +010066 elif match_kind != 'qstr':
67 # not a line with a qstr
68 continue
Damien George55baff42014-01-21 21:40:13 +000069
70 # get the qstr value
71 qstr = match.group(1)
Paul Sokolovskyab5d0822014-01-24 00:22:00 +020072 ident = re.sub(r'[^A-Za-z0-9_]', lambda s: "_" + codepoint2name[ord(s.group(0))] + "_", qstr)
Damien George55baff42014-01-21 21:40:13 +000073
74 # don't add duplicates
Paul Sokolovskyab5d0822014-01-24 00:22:00 +020075 if ident in qstrs:
Damien George55baff42014-01-21 21:40:13 +000076 continue
77
Damien George1976bae2014-01-24 22:22:00 +000078 # add the qstr to the list, with order number to retain original order in file
Paul Sokolovsky6ea0e922014-04-11 20:36:08 +030079 qstrs[ident] = (len(qstrs), ident, qstr)
Damien George55baff42014-01-21 21:40:13 +000080
81 # process the qstrs, printing out the generated C header file
82 print('// This file was automatically generated by makeqstrdata.py')
Dave Hylands7a996b12014-01-21 15:28:27 -080083 print('')
Paul Sokolovsky6ea0e922014-04-11 20:36:08 +030084 for order, ident, qstr in sorted(qstrs.values(), key=lambda x: x[0]):
Damien George55baff42014-01-21 21:40:13 +000085 qhash = compute_hash(qstr)
86 qlen = len(qstr)
Paul Sokolovskyab5d0822014-01-24 00:22:00 +020087 print('Q({}, (const byte*)"\\x{:02x}\\x{:02x}\\x{:02x}\\x{:02x}" "{}")'.format(ident, qhash & 0xff, (qhash >> 8) & 0xff, qlen & 0xff, (qlen >> 8) & 0xff, qstr))
Damien George55baff42014-01-21 21:40:13 +000088
89 return True
90
91def main():
92 arg_parser = argparse.ArgumentParser(description='Process raw qstr file and output qstr data with length, hash and data bytes')
93 arg_parser.add_argument('files', nargs='+', help='input file(s)')
94 args = arg_parser.parse_args()
95
96 result = do_work(args.files)
97 if not result:
Damien Georgefdf0da52014-03-08 15:03:25 +000098 print('exiting with error code', file=sys.stderr)
Damien George55baff42014-01-21 21:40:13 +000099 exit(1)
100
101if __name__ == "__main__":
102 main()