blob: a9a0cd0fa3a75cbe24ae1647405795f7158cde85 [file] [log] [blame]
Philippe Mathieu-Daudé3d004a32020-01-30 17:32:25 +01001#!/usr/bin/env python3
Richard Henderson568ae7e2017-12-07 12:44:09 -08002# Copyright (c) 2018 Linaro Limited
3#
4# This library is free software; you can redistribute it and/or
5# modify it under the terms of the GNU Lesser General Public
6# License as published by the Free Software Foundation; either
Chetan Pantd6ea4232020-10-23 12:33:53 +00007# version 2.1 of the License, or (at your option) any later version.
Richard Henderson568ae7e2017-12-07 12:44:09 -08008#
9# This library is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12# Lesser General Public License for more details.
13#
14# You should have received a copy of the GNU Lesser General Public
15# License along with this library; if not, see <http://www.gnu.org/licenses/>.
16#
17
18#
19# Generate a decoding tree from a specification file.
Richard Henderson3fdbf5d2019-02-23 13:00:10 -080020# See the syntax and semantics in docs/devel/decodetree.rst.
Richard Henderson568ae7e2017-12-07 12:44:09 -080021#
22
Philippe Mathieu-Daudé4caceca2021-01-10 01:02:40 +010023import io
Richard Henderson568ae7e2017-12-07 12:44:09 -080024import os
25import re
26import sys
27import getopt
Richard Henderson568ae7e2017-12-07 12:44:09 -080028
29insnwidth = 32
Luis Fernando Fujita Pires60c425f2021-04-07 22:18:49 +000030bitop_width = 32
Richard Henderson568ae7e2017-12-07 12:44:09 -080031insnmask = 0xffffffff
Richard Henderson17560e92019-01-30 18:01:29 -080032variablewidth = False
Richard Henderson568ae7e2017-12-07 12:44:09 -080033fields = {}
34arguments = {}
35formats = {}
Richard Henderson0eff2df2019-02-23 11:35:36 -080036allpatterns = []
Richard Hendersonc6920792019-08-09 08:12:50 -070037anyextern = False
Richard Henderson9b5acc52023-05-25 18:04:05 -070038testforerror = False
Richard Henderson568ae7e2017-12-07 12:44:09 -080039
40translate_prefix = 'trans'
41translate_scope = 'static '
42input_file = ''
43output_file = None
44output_fd = None
45insntype = 'uint32_t'
Richard Hendersonabd04f92018-10-23 10:26:25 +010046decode_function = 'decode'
Richard Henderson568ae7e2017-12-07 12:44:09 -080047
Richard Hendersonacfdd232020-09-03 12:23:34 -070048# An identifier for C.
49re_C_ident = '[a-zA-Z][a-zA-Z0-9_]*'
Richard Henderson568ae7e2017-12-07 12:44:09 -080050
Richard Hendersonacfdd232020-09-03 12:23:34 -070051# Identifiers for Arguments, Fields, Formats and Patterns.
52re_arg_ident = '&[a-zA-Z0-9_]*'
53re_fld_ident = '%[a-zA-Z0-9_]*'
54re_fmt_ident = '@[a-zA-Z0-9_]*'
55re_pat_ident = '[a-zA-Z0-9_]*'
Richard Henderson568ae7e2017-12-07 12:44:09 -080056
Richard Henderson6699ae62018-10-26 14:59:43 +010057def error_with_file(file, lineno, *args):
Richard Henderson568ae7e2017-12-07 12:44:09 -080058 """Print an error message from file:line and args and exit."""
59 global output_file
60 global output_fd
61
Richard Henderson2fd51b12020-05-15 14:48:54 -070062 prefix = ''
63 if file:
Richard Henderson9f6e2b42021-04-28 16:37:02 -070064 prefix += f'{file}:'
Richard Henderson568ae7e2017-12-07 12:44:09 -080065 if lineno:
Richard Henderson9f6e2b42021-04-28 16:37:02 -070066 prefix += f'{lineno}:'
Richard Henderson2fd51b12020-05-15 14:48:54 -070067 if prefix:
68 prefix += ' '
69 print(prefix, end='error: ', file=sys.stderr)
70 print(*args, file=sys.stderr)
71
Richard Henderson568ae7e2017-12-07 12:44:09 -080072 if output_file and output_fd:
73 output_fd.close()
Richard Henderson036cc752023-05-26 10:22:51 -070074 # Do not try to remove e.g. -o /dev/null
75 if not output_file.startswith("/dev"):
76 try:
77 os.remove(output_file)
78 except PermissionError:
79 pass
Richard Henderson9b5acc52023-05-25 18:04:05 -070080 exit(0 if testforerror else 1)
Richard Henderson2fd51b12020-05-15 14:48:54 -070081# end error_with_file
82
Richard Henderson568ae7e2017-12-07 12:44:09 -080083
Richard Henderson6699ae62018-10-26 14:59:43 +010084def error(lineno, *args):
Richard Henderson2fd51b12020-05-15 14:48:54 -070085 error_with_file(input_file, lineno, *args)
86# end error
87
Richard Henderson568ae7e2017-12-07 12:44:09 -080088
89def output(*args):
90 global output_fd
91 for a in args:
92 output_fd.write(a)
93
94
Richard Henderson568ae7e2017-12-07 12:44:09 -080095def output_autogen():
96 output('/* This file is autogenerated by scripts/decodetree.py. */\n\n')
97
98
99def str_indent(c):
100 """Return a string with C spaces"""
101 return ' ' * c
102
103
104def str_fields(fields):
zhaolichang65fdb3c2020-09-17 15:50:23 +0800105 """Return a string uniquely identifying FIELDS"""
Richard Henderson568ae7e2017-12-07 12:44:09 -0800106 r = ''
107 for n in sorted(fields.keys()):
108 r += '_' + n
109 return r[1:]
110
111
Richard Hendersonc7cefe62021-04-28 16:27:56 -0700112def whex(val):
113 """Return a hex string for val padded for insnwidth"""
114 global insnwidth
115 return f'0x{val:0{insnwidth // 4}x}'
116
117
118def whexC(val):
119 """Return a hex string for val padded for insnwidth,
120 and with the proper suffix for a C constant."""
121 suffix = ''
Luis Fernando Fujita Pires60c425f2021-04-07 22:18:49 +0000122 if val >= 0x100000000:
123 suffix = 'ull'
124 elif val >= 0x80000000:
Richard Hendersonc7cefe62021-04-28 16:27:56 -0700125 suffix = 'u'
126 return whex(val) + suffix
127
128
Richard Henderson568ae7e2017-12-07 12:44:09 -0800129def str_match_bits(bits, mask):
130 """Return a string pretty-printing BITS/MASK"""
131 global insnwidth
132
133 i = 1 << (insnwidth - 1)
134 space = 0x01010100
135 r = ''
136 while i != 0:
137 if i & mask:
138 if i & bits:
139 r += '1'
140 else:
141 r += '0'
142 else:
143 r += '.'
144 if i & space:
145 r += ' '
146 i >>= 1
147 return r
148
149
150def is_pow2(x):
151 """Return true iff X is equal to a power of 2."""
152 return (x & (x - 1)) == 0
153
154
155def ctz(x):
156 """Return the number of times 2 factors into X."""
Richard Hendersonb44b3442020-05-16 13:15:02 -0700157 assert x != 0
Richard Henderson568ae7e2017-12-07 12:44:09 -0800158 r = 0
159 while ((x >> r) & 1) == 0:
160 r += 1
161 return r
162
163
164def is_contiguous(bits):
Richard Hendersonb44b3442020-05-16 13:15:02 -0700165 if bits == 0:
166 return -1
Richard Henderson568ae7e2017-12-07 12:44:09 -0800167 shift = ctz(bits)
168 if is_pow2((bits >> shift) + 1):
169 return shift
170 else:
171 return -1
172
173
Richard Hendersonaf93cca2021-04-29 10:03:59 -0700174def eq_fields_for_args(flds_a, arg):
175 if len(flds_a) != len(arg.fields):
Richard Henderson568ae7e2017-12-07 12:44:09 -0800176 return False
Richard Hendersonaf93cca2021-04-29 10:03:59 -0700177 # Only allow inference on default types
178 for t in arg.types:
179 if t != 'int':
180 return False
Richard Henderson568ae7e2017-12-07 12:44:09 -0800181 for k, a in flds_a.items():
Richard Hendersonaf93cca2021-04-29 10:03:59 -0700182 if k not in arg.fields:
Richard Henderson568ae7e2017-12-07 12:44:09 -0800183 return False
184 return True
185
186
187def eq_fields_for_fmts(flds_a, flds_b):
188 if len(flds_a) != len(flds_b):
189 return False
190 for k, a in flds_a.items():
191 if k not in flds_b:
192 return False
193 b = flds_b[k]
194 if a.__class__ != b.__class__ or a != b:
195 return False
196 return True
197
198
199class Field:
200 """Class representing a simple instruction field"""
201 def __init__(self, sign, pos, len):
202 self.sign = sign
203 self.pos = pos
204 self.len = len
205 self.mask = ((1 << len) - 1) << pos
206
207 def __str__(self):
208 if self.sign:
209 s = 's'
210 else:
211 s = ''
Cleber Rosacbcdf1a2018-10-04 12:18:50 -0400212 return str(self.pos) + ':' + s + str(self.len)
Richard Henderson568ae7e2017-12-07 12:44:09 -0800213
214 def str_extract(self):
Luis Fernando Fujita Pires60c425f2021-04-07 22:18:49 +0000215 global bitop_width
216 s = 's' if self.sign else ''
217 return f'{s}extract{bitop_width}(insn, {self.pos}, {self.len})'
Richard Henderson568ae7e2017-12-07 12:44:09 -0800218
219 def __eq__(self, other):
Richard Henderson2c7d4422019-06-11 16:39:41 +0100220 return self.sign == other.sign and self.mask == other.mask
Richard Henderson568ae7e2017-12-07 12:44:09 -0800221
222 def __ne__(self, other):
223 return not self.__eq__(other)
224# end Field
225
226
227class MultiField:
228 """Class representing a compound instruction field"""
229 def __init__(self, subs, mask):
230 self.subs = subs
231 self.sign = subs[0].sign
232 self.mask = mask
233
234 def __str__(self):
235 return str(self.subs)
236
237 def str_extract(self):
Luis Fernando Fujita Pires60c425f2021-04-07 22:18:49 +0000238 global bitop_width
Richard Henderson568ae7e2017-12-07 12:44:09 -0800239 ret = '0'
240 pos = 0
241 for f in reversed(self.subs):
Richard Henderson9f6e2b42021-04-28 16:37:02 -0700242 ext = f.str_extract()
Richard Henderson568ae7e2017-12-07 12:44:09 -0800243 if pos == 0:
Richard Henderson9f6e2b42021-04-28 16:37:02 -0700244 ret = ext
Richard Henderson568ae7e2017-12-07 12:44:09 -0800245 else:
Luis Fernando Fujita Pires60c425f2021-04-07 22:18:49 +0000246 ret = f'deposit{bitop_width}({ret}, {pos}, {bitop_width - pos}, {ext})'
Richard Henderson568ae7e2017-12-07 12:44:09 -0800247 pos += f.len
248 return ret
249
250 def __ne__(self, other):
251 if len(self.subs) != len(other.subs):
252 return True
253 for a, b in zip(self.subs, other.subs):
254 if a.__class__ != b.__class__ or a != b:
255 return True
256 return False
257
258 def __eq__(self, other):
259 return not self.__ne__(other)
260# end MultiField
261
262
263class ConstField:
264 """Class representing an argument field with constant value"""
265 def __init__(self, value):
266 self.value = value
267 self.mask = 0
268 self.sign = value < 0
269
270 def __str__(self):
271 return str(self.value)
272
273 def str_extract(self):
274 return str(self.value)
275
276 def __cmp__(self, other):
277 return self.value - other.value
278# end ConstField
279
280
281class FunctionField:
Richard Henderson94597b62019-07-22 17:02:56 -0700282 """Class representing a field passed through a function"""
Richard Henderson568ae7e2017-12-07 12:44:09 -0800283 def __init__(self, func, base):
284 self.mask = base.mask
285 self.sign = base.sign
286 self.base = base
287 self.func = func
288
289 def __str__(self):
290 return self.func + '(' + str(self.base) + ')'
291
292 def str_extract(self):
Richard Henderson451e4ff2019-03-20 19:21:31 -0700293 return self.func + '(ctx, ' + self.base.str_extract() + ')'
Richard Henderson568ae7e2017-12-07 12:44:09 -0800294
295 def __eq__(self, other):
296 return self.func == other.func and self.base == other.base
297
298 def __ne__(self, other):
299 return not self.__eq__(other)
300# end FunctionField
301
302
Richard Henderson94597b62019-07-22 17:02:56 -0700303class ParameterField:
304 """Class representing a pseudo-field read from a function"""
305 def __init__(self, func):
306 self.mask = 0
307 self.sign = 0
308 self.func = func
309
310 def __str__(self):
311 return self.func
312
313 def str_extract(self):
314 return self.func + '(ctx)'
315
316 def __eq__(self, other):
317 return self.func == other.func
318
319 def __ne__(self, other):
320 return not self.__eq__(other)
321# end ParameterField
322
323
Richard Henderson568ae7e2017-12-07 12:44:09 -0800324class Arguments:
325 """Class representing the extracted fields of a format"""
Richard Hendersonaf93cca2021-04-29 10:03:59 -0700326 def __init__(self, nm, flds, types, extern):
Richard Henderson568ae7e2017-12-07 12:44:09 -0800327 self.name = nm
Richard Hendersonabd04f92018-10-23 10:26:25 +0100328 self.extern = extern
Richard Hendersonaf93cca2021-04-29 10:03:59 -0700329 self.fields = flds
330 self.types = types
Richard Henderson568ae7e2017-12-07 12:44:09 -0800331
332 def __str__(self):
333 return self.name + ' ' + str(self.fields)
334
335 def struct_name(self):
336 return 'arg_' + self.name
337
338 def output_def(self):
Richard Hendersonabd04f92018-10-23 10:26:25 +0100339 if not self.extern:
340 output('typedef struct {\n')
Richard Hendersonaf93cca2021-04-29 10:03:59 -0700341 for (n, t) in zip(self.fields, self.types):
342 output(f' {t} {n};\n')
Richard Hendersonabd04f92018-10-23 10:26:25 +0100343 output('} ', self.struct_name(), ';\n\n')
Richard Henderson568ae7e2017-12-07 12:44:09 -0800344# end Arguments
345
346
347class General:
348 """Common code between instruction formats and instruction patterns"""
Richard Henderson17560e92019-01-30 18:01:29 -0800349 def __init__(self, name, lineno, base, fixb, fixm, udfm, fldm, flds, w):
Richard Henderson568ae7e2017-12-07 12:44:09 -0800350 self.name = name
Richard Henderson6699ae62018-10-26 14:59:43 +0100351 self.file = input_file
Richard Henderson568ae7e2017-12-07 12:44:09 -0800352 self.lineno = lineno
353 self.base = base
354 self.fixedbits = fixb
355 self.fixedmask = fixm
356 self.undefmask = udfm
357 self.fieldmask = fldm
358 self.fields = flds
Richard Henderson17560e92019-01-30 18:01:29 -0800359 self.width = w
Richard Henderson568ae7e2017-12-07 12:44:09 -0800360
361 def __str__(self):
Richard Henderson0eff2df2019-02-23 11:35:36 -0800362 return self.name + ' ' + str_match_bits(self.fixedbits, self.fixedmask)
Richard Henderson568ae7e2017-12-07 12:44:09 -0800363
364 def str1(self, i):
365 return str_indent(i) + self.__str__()
366# end General
367
368
369class Format(General):
370 """Class representing an instruction format"""
371
372 def extract_name(self):
Richard Henderson71ecf792019-02-28 14:45:50 -0800373 global decode_function
374 return decode_function + '_extract_' + self.name
Richard Henderson568ae7e2017-12-07 12:44:09 -0800375
376 def output_extract(self):
Richard Henderson451e4ff2019-03-20 19:21:31 -0700377 output('static void ', self.extract_name(), '(DisasContext *ctx, ',
Richard Henderson568ae7e2017-12-07 12:44:09 -0800378 self.base.struct_name(), ' *a, ', insntype, ' insn)\n{\n')
379 for n, f in self.fields.items():
380 output(' a->', n, ' = ', f.str_extract(), ';\n')
381 output('}\n\n')
382# end Format
383
384
385class Pattern(General):
386 """Class representing an instruction pattern"""
387
388 def output_decl(self):
389 global translate_scope
390 global translate_prefix
391 output('typedef ', self.base.base.struct_name(),
392 ' arg_', self.name, ';\n')
Richard Henderson76805592018-03-02 10:45:35 +0000393 output(translate_scope, 'bool ', translate_prefix, '_', self.name,
Richard Henderson3a7be552018-10-23 11:05:27 +0100394 '(DisasContext *ctx, arg_', self.name, ' *a);\n')
Richard Henderson568ae7e2017-12-07 12:44:09 -0800395
396 def output_code(self, i, extracted, outerbits, outermask):
397 global translate_prefix
398 ind = str_indent(i)
399 arg = self.base.base.name
Richard Henderson6699ae62018-10-26 14:59:43 +0100400 output(ind, '/* ', self.file, ':', str(self.lineno), ' */\n')
Richard Henderson568ae7e2017-12-07 12:44:09 -0800401 if not extracted:
Richard Henderson451e4ff2019-03-20 19:21:31 -0700402 output(ind, self.base.extract_name(),
403 '(ctx, &u.f_', arg, ', insn);\n')
Richard Henderson568ae7e2017-12-07 12:44:09 -0800404 for n, f in self.fields.items():
405 output(ind, 'u.f_', arg, '.', n, ' = ', f.str_extract(), ';\n')
Richard Hendersoneb6b87f2019-02-23 08:57:46 -0800406 output(ind, 'if (', translate_prefix, '_', self.name,
407 '(ctx, &u.f_', arg, ')) return true;\n')
Richard Henderson08561fc2020-05-17 10:14:11 -0700408
409 # Normal patterns do not have children.
410 def build_tree(self):
411 return
412 def prop_masks(self):
413 return
414 def prop_format(self):
415 return
416 def prop_width(self):
417 return
418
Richard Henderson568ae7e2017-12-07 12:44:09 -0800419# end Pattern
420
421
Richard Hendersondf630442020-05-16 11:19:45 -0700422class MultiPattern(General):
423 """Class representing a set of instruction patterns"""
424
Richard Henderson08561fc2020-05-17 10:14:11 -0700425 def __init__(self, lineno):
Richard Hendersondf630442020-05-16 11:19:45 -0700426 self.file = input_file
427 self.lineno = lineno
Richard Henderson08561fc2020-05-17 10:14:11 -0700428 self.pats = []
Richard Hendersondf630442020-05-16 11:19:45 -0700429 self.base = None
430 self.fixedbits = 0
431 self.fixedmask = 0
432 self.undefmask = 0
433 self.width = None
434
435 def __str__(self):
436 r = 'group'
437 if self.fixedbits is not None:
438 r += ' ' + str_match_bits(self.fixedbits, self.fixedmask)
439 return r
440
441 def output_decl(self):
442 for p in self.pats:
443 p.output_decl()
Richard Henderson08561fc2020-05-17 10:14:11 -0700444
445 def prop_masks(self):
446 global insnmask
447
448 fixedmask = insnmask
449 undefmask = insnmask
450
451 # Collect fixedmask/undefmask for all of the children.
452 for p in self.pats:
453 p.prop_masks()
454 fixedmask &= p.fixedmask
455 undefmask &= p.undefmask
456
457 # Widen fixedmask until all fixedbits match
458 repeat = True
459 fixedbits = 0
460 while repeat and fixedmask != 0:
461 fixedbits = None
462 for p in self.pats:
463 thisbits = p.fixedbits & fixedmask
464 if fixedbits is None:
465 fixedbits = thisbits
466 elif fixedbits != thisbits:
467 fixedmask &= ~(fixedbits ^ thisbits)
468 break
469 else:
470 repeat = False
471
472 self.fixedbits = fixedbits
473 self.fixedmask = fixedmask
474 self.undefmask = undefmask
475
476 def build_tree(self):
477 for p in self.pats:
478 p.build_tree()
479
480 def prop_format(self):
481 for p in self.pats:
Richard Henderson2fd2eb52023-05-25 18:45:43 -0700482 p.prop_format()
Richard Henderson08561fc2020-05-17 10:14:11 -0700483
484 def prop_width(self):
485 width = None
486 for p in self.pats:
487 p.prop_width()
488 if width is None:
489 width = p.width
490 elif width != p.width:
491 error_with_file(self.file, self.lineno,
492 'width mismatch in patterns within braces')
493 self.width = width
494
Richard Hendersondf630442020-05-16 11:19:45 -0700495# end MultiPattern
496
497
498class IncMultiPattern(MultiPattern):
Richard Henderson0eff2df2019-02-23 11:35:36 -0800499 """Class representing an overlapping set of instruction patterns"""
500
Richard Henderson0eff2df2019-02-23 11:35:36 -0800501 def output_code(self, i, extracted, outerbits, outermask):
502 global translate_prefix
503 ind = str_indent(i)
504 for p in self.pats:
505 if outermask != p.fixedmask:
506 innermask = p.fixedmask & ~outermask
507 innerbits = p.fixedbits & ~outermask
Richard Hendersonc7cefe62021-04-28 16:27:56 -0700508 output(ind, f'if ((insn & {whexC(innermask)}) == {whexC(innerbits)}) {{\n')
509 output(ind, f' /* {str_match_bits(p.fixedbits, p.fixedmask)} */\n')
Richard Henderson0eff2df2019-02-23 11:35:36 -0800510 p.output_code(i + 4, extracted, p.fixedbits, p.fixedmask)
511 output(ind, '}\n')
512 else:
513 p.output_code(i, extracted, p.fixedbits, p.fixedmask)
Richard Hendersonf2604472023-05-25 18:50:58 -0700514
515 def build_tree(self):
516 if not self.pats:
517 error_with_file(self.file, self.lineno, 'empty pattern group')
518 super().build_tree()
519
Richard Henderson040145c2020-05-16 10:50:43 -0700520#end IncMultiPattern
Richard Henderson0eff2df2019-02-23 11:35:36 -0800521
522
Richard Henderson08561fc2020-05-17 10:14:11 -0700523class Tree:
524 """Class representing a node in a decode tree"""
525
526 def __init__(self, fm, tm):
527 self.fixedmask = fm
528 self.thismask = tm
529 self.subs = []
530 self.base = None
531
532 def str1(self, i):
533 ind = str_indent(i)
Richard Hendersonc7cefe62021-04-28 16:27:56 -0700534 r = ind + whex(self.fixedmask)
Richard Henderson08561fc2020-05-17 10:14:11 -0700535 if self.format:
536 r += ' ' + self.format.name
537 r += ' [\n'
538 for (b, s) in self.subs:
Richard Hendersonc7cefe62021-04-28 16:27:56 -0700539 r += ind + f' {whex(b)}:\n'
Richard Henderson08561fc2020-05-17 10:14:11 -0700540 r += s.str1(i + 4) + '\n'
541 r += ind + ']'
542 return r
543
544 def __str__(self):
545 return self.str1(0)
546
547 def output_code(self, i, extracted, outerbits, outermask):
548 ind = str_indent(i)
549
550 # If we identified all nodes below have the same format,
551 # extract the fields now.
552 if not extracted and self.base:
553 output(ind, self.base.extract_name(),
554 '(ctx, &u.f_', self.base.base.name, ', insn);\n')
555 extracted = True
556
557 # Attempt to aid the compiler in producing compact switch statements.
558 # If the bits in the mask are contiguous, extract them.
559 sh = is_contiguous(self.thismask)
560 if sh > 0:
561 # Propagate SH down into the local functions.
562 def str_switch(b, sh=sh):
Richard Hendersonc7cefe62021-04-28 16:27:56 -0700563 return f'(insn >> {sh}) & {b >> sh:#x}'
Richard Henderson08561fc2020-05-17 10:14:11 -0700564
565 def str_case(b, sh=sh):
Richard Hendersonc7cefe62021-04-28 16:27:56 -0700566 return hex(b >> sh)
Richard Henderson08561fc2020-05-17 10:14:11 -0700567 else:
568 def str_switch(b):
Richard Hendersonc7cefe62021-04-28 16:27:56 -0700569 return f'insn & {whexC(b)}'
Richard Henderson08561fc2020-05-17 10:14:11 -0700570
571 def str_case(b):
Richard Hendersonc7cefe62021-04-28 16:27:56 -0700572 return whexC(b)
Richard Henderson08561fc2020-05-17 10:14:11 -0700573
574 output(ind, 'switch (', str_switch(self.thismask), ') {\n')
575 for b, s in sorted(self.subs):
576 assert (self.thismask & ~s.fixedmask) == 0
577 innermask = outermask | self.thismask
578 innerbits = outerbits | b
579 output(ind, 'case ', str_case(b), ':\n')
580 output(ind, ' /* ',
581 str_match_bits(innerbits, innermask), ' */\n')
582 s.output_code(i + 4, extracted, innerbits, innermask)
Peter Maydell514101c2020-10-19 16:12:52 +0100583 output(ind, ' break;\n')
Richard Henderson08561fc2020-05-17 10:14:11 -0700584 output(ind, '}\n')
585# end Tree
586
587
588class ExcMultiPattern(MultiPattern):
589 """Class representing a non-overlapping set of instruction patterns"""
590
591 def output_code(self, i, extracted, outerbits, outermask):
592 # Defer everything to our decomposed Tree node
593 self.tree.output_code(i, extracted, outerbits, outermask)
594
595 @staticmethod
596 def __build_tree(pats, outerbits, outermask):
597 # Find the intersection of all remaining fixedmask.
598 innermask = ~outermask & insnmask
599 for i in pats:
600 innermask &= i.fixedmask
601
602 if innermask == 0:
603 # Edge condition: One pattern covers the entire insnmask
604 if len(pats) == 1:
605 t = Tree(outermask, innermask)
606 t.subs.append((0, pats[0]))
607 return t
608
609 text = 'overlapping patterns:'
610 for p in pats:
611 text += '\n' + p.file + ':' + str(p.lineno) + ': ' + str(p)
612 error_with_file(pats[0].file, pats[0].lineno, text)
613
614 fullmask = outermask | innermask
615
616 # Sort each element of pats into the bin selected by the mask.
617 bins = {}
618 for i in pats:
619 fb = i.fixedbits & innermask
620 if fb in bins:
621 bins[fb].append(i)
622 else:
623 bins[fb] = [i]
624
625 # We must recurse if any bin has more than one element or if
626 # the single element in the bin has not been fully matched.
627 t = Tree(fullmask, innermask)
628
629 for b, l in bins.items():
630 s = l[0]
631 if len(l) > 1 or s.fixedmask & ~fullmask != 0:
632 s = ExcMultiPattern.__build_tree(l, b | outerbits, fullmask)
633 t.subs.append((b, s))
634
635 return t
636
637 def build_tree(self):
Richard Henderson2fd2eb52023-05-25 18:45:43 -0700638 super().build_tree()
Richard Henderson08561fc2020-05-17 10:14:11 -0700639 self.tree = self.__build_tree(self.pats, self.fixedbits,
640 self.fixedmask)
641
642 @staticmethod
643 def __prop_format(tree):
644 """Propagate Format objects into the decode tree"""
645
646 # Depth first search.
647 for (b, s) in tree.subs:
648 if isinstance(s, Tree):
649 ExcMultiPattern.__prop_format(s)
650
651 # If all entries in SUBS have the same format, then
652 # propagate that into the tree.
653 f = None
654 for (b, s) in tree.subs:
655 if f is None:
656 f = s.base
657 if f is None:
658 return
659 if f is not s.base:
660 return
661 tree.base = f
662
663 def prop_format(self):
664 super().prop_format()
665 self.__prop_format(self.tree)
666
667# end ExcMultiPattern
668
669
Richard Henderson568ae7e2017-12-07 12:44:09 -0800670def parse_field(lineno, name, toks):
671 """Parse one instruction field from TOKS at LINENO"""
672 global fields
Richard Henderson568ae7e2017-12-07 12:44:09 -0800673 global insnwidth
674
675 # A "simple" field will have only one entry;
676 # a "multifield" will have several.
677 subs = []
678 width = 0
679 func = None
680 for t in toks:
Richard Hendersonacfdd232020-09-03 12:23:34 -0700681 if re.match('^!function=', t):
Richard Henderson568ae7e2017-12-07 12:44:09 -0800682 if func:
683 error(lineno, 'duplicate function')
684 func = t.split('=')
685 func = func[1]
686 continue
687
John Snow2d110c12020-05-13 23:52:30 -0400688 if re.fullmatch('[0-9]+:s[0-9]+', t):
Richard Henderson568ae7e2017-12-07 12:44:09 -0800689 # Signed field extract
690 subtoks = t.split(':s')
691 sign = True
John Snow2d110c12020-05-13 23:52:30 -0400692 elif re.fullmatch('[0-9]+:[0-9]+', t):
Richard Henderson568ae7e2017-12-07 12:44:09 -0800693 # Unsigned field extract
694 subtoks = t.split(':')
695 sign = False
696 else:
Richard Henderson9f6e2b42021-04-28 16:37:02 -0700697 error(lineno, f'invalid field token "{t}"')
Richard Henderson568ae7e2017-12-07 12:44:09 -0800698 po = int(subtoks[0])
699 le = int(subtoks[1])
700 if po + le > insnwidth:
Richard Henderson9f6e2b42021-04-28 16:37:02 -0700701 error(lineno, f'field {t} too large')
Richard Henderson568ae7e2017-12-07 12:44:09 -0800702 f = Field(sign, po, le)
703 subs.append(f)
704 width += le
705
706 if width > insnwidth:
707 error(lineno, 'field too large')
Richard Henderson94597b62019-07-22 17:02:56 -0700708 if len(subs) == 0:
709 if func:
710 f = ParameterField(func)
711 else:
712 error(lineno, 'field with no value')
Richard Henderson568ae7e2017-12-07 12:44:09 -0800713 else:
Richard Henderson94597b62019-07-22 17:02:56 -0700714 if len(subs) == 1:
715 f = subs[0]
716 else:
717 mask = 0
718 for s in subs:
719 if mask & s.mask:
720 error(lineno, 'field components overlap')
721 mask |= s.mask
722 f = MultiField(subs, mask)
723 if func:
724 f = FunctionField(func, f)
Richard Henderson568ae7e2017-12-07 12:44:09 -0800725
726 if name in fields:
727 error(lineno, 'duplicate field', name)
728 fields[name] = f
729# end parse_field
730
731
732def parse_arguments(lineno, name, toks):
733 """Parse one argument set from TOKS at LINENO"""
734 global arguments
Richard Hendersonacfdd232020-09-03 12:23:34 -0700735 global re_C_ident
Richard Hendersonc6920792019-08-09 08:12:50 -0700736 global anyextern
Richard Henderson568ae7e2017-12-07 12:44:09 -0800737
738 flds = []
Richard Hendersonaf93cca2021-04-29 10:03:59 -0700739 types = []
Richard Hendersonabd04f92018-10-23 10:26:25 +0100740 extern = False
Richard Hendersonaf93cca2021-04-29 10:03:59 -0700741 for n in toks:
742 if re.fullmatch('!extern', n):
Richard Hendersonabd04f92018-10-23 10:26:25 +0100743 extern = True
Richard Hendersonc6920792019-08-09 08:12:50 -0700744 anyextern = True
Richard Hendersonabd04f92018-10-23 10:26:25 +0100745 continue
Richard Hendersonaf93cca2021-04-29 10:03:59 -0700746 if re.fullmatch(re_C_ident + ':' + re_C_ident, n):
747 (n, t) = n.split(':')
748 elif re.fullmatch(re_C_ident, n):
749 t = 'int'
750 else:
751 error(lineno, f'invalid argument set token "{n}"')
752 if n in flds:
753 error(lineno, f'duplicate argument "{n}"')
754 flds.append(n)
755 types.append(t)
Richard Henderson568ae7e2017-12-07 12:44:09 -0800756
757 if name in arguments:
758 error(lineno, 'duplicate argument set', name)
Richard Hendersonaf93cca2021-04-29 10:03:59 -0700759 arguments[name] = Arguments(name, flds, types, extern)
Richard Henderson568ae7e2017-12-07 12:44:09 -0800760# end parse_arguments
761
762
763def lookup_field(lineno, name):
764 global fields
765 if name in fields:
766 return fields[name]
767 error(lineno, 'undefined field', name)
768
769
770def add_field(lineno, flds, new_name, f):
771 if new_name in flds:
772 error(lineno, 'duplicate field', new_name)
773 flds[new_name] = f
774 return flds
775
776
777def add_field_byname(lineno, flds, new_name, old_name):
778 return add_field(lineno, flds, new_name, lookup_field(lineno, old_name))
779
780
781def infer_argument_set(flds):
782 global arguments
Richard Hendersonabd04f92018-10-23 10:26:25 +0100783 global decode_function
Richard Henderson568ae7e2017-12-07 12:44:09 -0800784
785 for arg in arguments.values():
Richard Hendersonaf93cca2021-04-29 10:03:59 -0700786 if eq_fields_for_args(flds, arg):
Richard Henderson568ae7e2017-12-07 12:44:09 -0800787 return arg
788
Richard Hendersonabd04f92018-10-23 10:26:25 +0100789 name = decode_function + str(len(arguments))
Richard Hendersonaf93cca2021-04-29 10:03:59 -0700790 arg = Arguments(name, flds.keys(), ['int'] * len(flds), False)
Richard Henderson568ae7e2017-12-07 12:44:09 -0800791 arguments[name] = arg
792 return arg
793
794
Richard Henderson17560e92019-01-30 18:01:29 -0800795def infer_format(arg, fieldmask, flds, width):
Richard Henderson568ae7e2017-12-07 12:44:09 -0800796 global arguments
797 global formats
Richard Hendersonabd04f92018-10-23 10:26:25 +0100798 global decode_function
Richard Henderson568ae7e2017-12-07 12:44:09 -0800799
800 const_flds = {}
801 var_flds = {}
802 for n, c in flds.items():
803 if c is ConstField:
804 const_flds[n] = c
805 else:
806 var_flds[n] = c
807
808 # Look for an existing format with the same argument set and fields
809 for fmt in formats.values():
810 if arg and fmt.base != arg:
811 continue
812 if fieldmask != fmt.fieldmask:
813 continue
Richard Henderson17560e92019-01-30 18:01:29 -0800814 if width != fmt.width:
815 continue
Richard Henderson568ae7e2017-12-07 12:44:09 -0800816 if not eq_fields_for_fmts(flds, fmt.fields):
817 continue
818 return (fmt, const_flds)
819
Richard Hendersonabd04f92018-10-23 10:26:25 +0100820 name = decode_function + '_Fmt_' + str(len(formats))
Richard Henderson568ae7e2017-12-07 12:44:09 -0800821 if not arg:
822 arg = infer_argument_set(flds)
823
Richard Henderson17560e92019-01-30 18:01:29 -0800824 fmt = Format(name, 0, arg, 0, 0, 0, fieldmask, var_flds, width)
Richard Henderson568ae7e2017-12-07 12:44:09 -0800825 formats[name] = fmt
826
827 return (fmt, const_flds)
828# end infer_format
829
830
Richard Henderson08561fc2020-05-17 10:14:11 -0700831def parse_generic(lineno, parent_pat, name, toks):
Richard Henderson568ae7e2017-12-07 12:44:09 -0800832 """Parse one instruction format from TOKS at LINENO"""
833 global fields
834 global arguments
835 global formats
Richard Henderson0eff2df2019-02-23 11:35:36 -0800836 global allpatterns
Richard Hendersonacfdd232020-09-03 12:23:34 -0700837 global re_arg_ident
838 global re_fld_ident
839 global re_fmt_ident
840 global re_C_ident
Richard Henderson568ae7e2017-12-07 12:44:09 -0800841 global insnwidth
842 global insnmask
Richard Henderson17560e92019-01-30 18:01:29 -0800843 global variablewidth
Richard Henderson568ae7e2017-12-07 12:44:09 -0800844
Richard Henderson08561fc2020-05-17 10:14:11 -0700845 is_format = parent_pat is None
846
Richard Henderson568ae7e2017-12-07 12:44:09 -0800847 fixedmask = 0
848 fixedbits = 0
849 undefmask = 0
850 width = 0
851 flds = {}
852 arg = None
853 fmt = None
854 for t in toks:
zhaolichang65fdb3c2020-09-17 15:50:23 +0800855 # '&Foo' gives a format an explicit argument set.
Richard Hendersonacfdd232020-09-03 12:23:34 -0700856 if re.fullmatch(re_arg_ident, t):
Richard Henderson568ae7e2017-12-07 12:44:09 -0800857 tt = t[1:]
858 if arg:
859 error(lineno, 'multiple argument sets')
860 if tt in arguments:
861 arg = arguments[tt]
862 else:
863 error(lineno, 'undefined argument set', t)
864 continue
865
866 # '@Foo' gives a pattern an explicit format.
Richard Hendersonacfdd232020-09-03 12:23:34 -0700867 if re.fullmatch(re_fmt_ident, t):
Richard Henderson568ae7e2017-12-07 12:44:09 -0800868 tt = t[1:]
869 if fmt:
870 error(lineno, 'multiple formats')
871 if tt in formats:
872 fmt = formats[tt]
873 else:
874 error(lineno, 'undefined format', t)
875 continue
876
877 # '%Foo' imports a field.
Richard Hendersonacfdd232020-09-03 12:23:34 -0700878 if re.fullmatch(re_fld_ident, t):
Richard Henderson568ae7e2017-12-07 12:44:09 -0800879 tt = t[1:]
880 flds = add_field_byname(lineno, flds, tt, tt)
881 continue
882
883 # 'Foo=%Bar' imports a field with a different name.
Richard Hendersonacfdd232020-09-03 12:23:34 -0700884 if re.fullmatch(re_C_ident + '=' + re_fld_ident, t):
Richard Henderson568ae7e2017-12-07 12:44:09 -0800885 (fname, iname) = t.split('=%')
886 flds = add_field_byname(lineno, flds, fname, iname)
887 continue
888
889 # 'Foo=number' sets an argument field to a constant value
Richard Hendersonacfdd232020-09-03 12:23:34 -0700890 if re.fullmatch(re_C_ident + '=[+-]?[0-9]+', t):
Richard Henderson568ae7e2017-12-07 12:44:09 -0800891 (fname, value) = t.split('=')
892 value = int(value)
893 flds = add_field(lineno, flds, fname, ConstField(value))
894 continue
895
896 # Pattern of 0s, 1s, dots and dashes indicate required zeros,
897 # required ones, or dont-cares.
John Snow2d110c12020-05-13 23:52:30 -0400898 if re.fullmatch('[01.-]+', t):
Richard Henderson568ae7e2017-12-07 12:44:09 -0800899 shift = len(t)
900 fms = t.replace('0', '1')
901 fms = fms.replace('.', '0')
902 fms = fms.replace('-', '0')
903 fbs = t.replace('.', '0')
904 fbs = fbs.replace('-', '0')
905 ubm = t.replace('1', '0')
906 ubm = ubm.replace('.', '0')
907 ubm = ubm.replace('-', '1')
908 fms = int(fms, 2)
909 fbs = int(fbs, 2)
910 ubm = int(ubm, 2)
911 fixedbits = (fixedbits << shift) | fbs
912 fixedmask = (fixedmask << shift) | fms
913 undefmask = (undefmask << shift) | ubm
914 # Otherwise, fieldname:fieldwidth
Richard Hendersonacfdd232020-09-03 12:23:34 -0700915 elif re.fullmatch(re_C_ident + ':s?[0-9]+', t):
Richard Henderson568ae7e2017-12-07 12:44:09 -0800916 (fname, flen) = t.split(':')
917 sign = False
918 if flen[0] == 's':
919 sign = True
920 flen = flen[1:]
921 shift = int(flen, 10)
Richard Henderson2decfc92019-03-05 15:34:41 -0800922 if shift + width > insnwidth:
Richard Henderson9f6e2b42021-04-28 16:37:02 -0700923 error(lineno, f'field {fname} exceeds insnwidth')
Richard Henderson568ae7e2017-12-07 12:44:09 -0800924 f = Field(sign, insnwidth - width - shift, shift)
925 flds = add_field(lineno, flds, fname, f)
926 fixedbits <<= shift
927 fixedmask <<= shift
928 undefmask <<= shift
929 else:
Richard Henderson9f6e2b42021-04-28 16:37:02 -0700930 error(lineno, f'invalid token "{t}"')
Richard Henderson568ae7e2017-12-07 12:44:09 -0800931 width += shift
932
Richard Henderson17560e92019-01-30 18:01:29 -0800933 if variablewidth and width < insnwidth and width % 8 == 0:
934 shift = insnwidth - width
935 fixedbits <<= shift
936 fixedmask <<= shift
937 undefmask <<= shift
938 undefmask |= (1 << shift) - 1
939
Richard Henderson568ae7e2017-12-07 12:44:09 -0800940 # We should have filled in all of the bits of the instruction.
Richard Henderson17560e92019-01-30 18:01:29 -0800941 elif not (is_format and width == 0) and width != insnwidth:
Richard Henderson9f6e2b42021-04-28 16:37:02 -0700942 error(lineno, f'definition has {width} bits')
Richard Henderson568ae7e2017-12-07 12:44:09 -0800943
zhaolichang65fdb3c2020-09-17 15:50:23 +0800944 # Do not check for fields overlapping fields; one valid usage
Richard Henderson568ae7e2017-12-07 12:44:09 -0800945 # is to be able to duplicate fields via import.
946 fieldmask = 0
947 for f in flds.values():
948 fieldmask |= f.mask
949
950 # Fix up what we've parsed to match either a format or a pattern.
951 if is_format:
952 # Formats cannot reference formats.
953 if fmt:
954 error(lineno, 'format referencing format')
955 # If an argument set is given, then there should be no fields
956 # without a place to store it.
957 if arg:
958 for f in flds.keys():
959 if f not in arg.fields:
Richard Henderson9f6e2b42021-04-28 16:37:02 -0700960 error(lineno, f'field {f} not in argument set {arg.name}')
Richard Henderson568ae7e2017-12-07 12:44:09 -0800961 else:
962 arg = infer_argument_set(flds)
963 if name in formats:
964 error(lineno, 'duplicate format name', name)
965 fmt = Format(name, lineno, arg, fixedbits, fixedmask,
Richard Henderson17560e92019-01-30 18:01:29 -0800966 undefmask, fieldmask, flds, width)
Richard Henderson568ae7e2017-12-07 12:44:09 -0800967 formats[name] = fmt
968 else:
969 # Patterns can reference a format ...
970 if fmt:
971 # ... but not an argument simultaneously
972 if arg:
973 error(lineno, 'pattern specifies both format and argument set')
974 if fixedmask & fmt.fixedmask:
975 error(lineno, 'pattern fixed bits overlap format fixed bits')
Richard Henderson17560e92019-01-30 18:01:29 -0800976 if width != fmt.width:
977 error(lineno, 'pattern uses format of different width')
Richard Henderson568ae7e2017-12-07 12:44:09 -0800978 fieldmask |= fmt.fieldmask
979 fixedbits |= fmt.fixedbits
980 fixedmask |= fmt.fixedmask
981 undefmask |= fmt.undefmask
982 else:
Richard Henderson17560e92019-01-30 18:01:29 -0800983 (fmt, flds) = infer_format(arg, fieldmask, flds, width)
Richard Henderson568ae7e2017-12-07 12:44:09 -0800984 arg = fmt.base
985 for f in flds.keys():
986 if f not in arg.fields:
Richard Henderson9f6e2b42021-04-28 16:37:02 -0700987 error(lineno, f'field {f} not in argument set {arg.name}')
Richard Henderson568ae7e2017-12-07 12:44:09 -0800988 if f in fmt.fields.keys():
Richard Henderson9f6e2b42021-04-28 16:37:02 -0700989 error(lineno, f'field {f} set by format and pattern')
Richard Henderson568ae7e2017-12-07 12:44:09 -0800990 for f in arg.fields:
991 if f not in flds.keys() and f not in fmt.fields.keys():
Richard Henderson9f6e2b42021-04-28 16:37:02 -0700992 error(lineno, f'field {f} not initialized')
Richard Henderson568ae7e2017-12-07 12:44:09 -0800993 pat = Pattern(name, lineno, fmt, fixedbits, fixedmask,
Richard Henderson17560e92019-01-30 18:01:29 -0800994 undefmask, fieldmask, flds, width)
Richard Henderson08561fc2020-05-17 10:14:11 -0700995 parent_pat.pats.append(pat)
Richard Henderson0eff2df2019-02-23 11:35:36 -0800996 allpatterns.append(pat)
Richard Henderson568ae7e2017-12-07 12:44:09 -0800997
998 # Validate the masks that we have assembled.
999 if fieldmask & fixedmask:
Richard Hendersonc7cefe62021-04-28 16:27:56 -07001000 error(lineno, 'fieldmask overlaps fixedmask ',
1001 f'({whex(fieldmask)} & {whex(fixedmask)})')
Richard Henderson568ae7e2017-12-07 12:44:09 -08001002 if fieldmask & undefmask:
Richard Hendersonc7cefe62021-04-28 16:27:56 -07001003 error(lineno, 'fieldmask overlaps undefmask ',
1004 f'({whex(fieldmask)} & {whex(undefmask)})')
Richard Henderson568ae7e2017-12-07 12:44:09 -08001005 if fixedmask & undefmask:
Richard Hendersonc7cefe62021-04-28 16:27:56 -07001006 error(lineno, 'fixedmask overlaps undefmask ',
1007 f'({whex(fixedmask)} & {whex(undefmask)})')
Richard Henderson568ae7e2017-12-07 12:44:09 -08001008 if not is_format:
1009 allbits = fieldmask | fixedmask | undefmask
1010 if allbits != insnmask:
Richard Hendersonc7cefe62021-04-28 16:27:56 -07001011 error(lineno, 'bits left unspecified ',
1012 f'({whex(allbits ^ insnmask)})')
Richard Henderson568ae7e2017-12-07 12:44:09 -08001013# end parse_general
1014
Richard Henderson0eff2df2019-02-23 11:35:36 -08001015
Richard Henderson08561fc2020-05-17 10:14:11 -07001016def parse_file(f, parent_pat):
Richard Henderson568ae7e2017-12-07 12:44:09 -08001017 """Parse all of the patterns within a file"""
Richard Hendersonacfdd232020-09-03 12:23:34 -07001018 global re_arg_ident
1019 global re_fld_ident
1020 global re_fmt_ident
1021 global re_pat_ident
Richard Henderson568ae7e2017-12-07 12:44:09 -08001022
1023 # Read all of the lines of the file. Concatenate lines
1024 # ending in backslash; discard empty lines and comments.
1025 toks = []
1026 lineno = 0
Richard Henderson0eff2df2019-02-23 11:35:36 -08001027 nesting = 0
Richard Henderson08561fc2020-05-17 10:14:11 -07001028 nesting_pats = []
Richard Henderson0eff2df2019-02-23 11:35:36 -08001029
Richard Henderson568ae7e2017-12-07 12:44:09 -08001030 for line in f:
1031 lineno += 1
1032
Richard Henderson0eff2df2019-02-23 11:35:36 -08001033 # Expand and strip spaces, to find indent.
1034 line = line.rstrip()
1035 line = line.expandtabs()
1036 len1 = len(line)
1037 line = line.lstrip()
1038 len2 = len(line)
1039
Richard Henderson568ae7e2017-12-07 12:44:09 -08001040 # Discard comments
1041 end = line.find('#')
1042 if end >= 0:
1043 line = line[:end]
1044
1045 t = line.split()
1046 if len(toks) != 0:
1047 # Next line after continuation
1048 toks.extend(t)
Richard Henderson568ae7e2017-12-07 12:44:09 -08001049 else:
Richard Henderson0eff2df2019-02-23 11:35:36 -08001050 # Allow completely blank lines.
1051 if len1 == 0:
1052 continue
1053 indent = len1 - len2
1054 # Empty line due to comment.
1055 if len(t) == 0:
1056 # Indentation must be correct, even for comment lines.
1057 if indent != nesting:
1058 error(lineno, 'indentation ', indent, ' != ', nesting)
1059 continue
1060 start_lineno = lineno
Richard Henderson568ae7e2017-12-07 12:44:09 -08001061 toks = t
1062
1063 # Continuation?
1064 if toks[-1] == '\\':
1065 toks.pop()
1066 continue
1067
Richard Henderson568ae7e2017-12-07 12:44:09 -08001068 name = toks[0]
1069 del toks[0]
1070
Richard Henderson0eff2df2019-02-23 11:35:36 -08001071 # End nesting?
Richard Henderson067e8b02020-05-18 08:45:32 -07001072 if name == '}' or name == ']':
Richard Henderson0eff2df2019-02-23 11:35:36 -08001073 if len(toks) != 0:
1074 error(start_lineno, 'extra tokens after close brace')
Richard Henderson08561fc2020-05-17 10:14:11 -07001075
Richard Henderson067e8b02020-05-18 08:45:32 -07001076 # Make sure { } and [ ] nest properly.
1077 if (name == '}') != isinstance(parent_pat, IncMultiPattern):
1078 error(lineno, 'mismatched close brace')
1079
Richard Henderson08561fc2020-05-17 10:14:11 -07001080 try:
1081 parent_pat = nesting_pats.pop()
1082 except:
Richard Henderson067e8b02020-05-18 08:45:32 -07001083 error(lineno, 'extra close brace')
Richard Henderson08561fc2020-05-17 10:14:11 -07001084
Richard Henderson0eff2df2019-02-23 11:35:36 -08001085 nesting -= 2
1086 if indent != nesting:
Richard Henderson08561fc2020-05-17 10:14:11 -07001087 error(lineno, 'indentation ', indent, ' != ', nesting)
1088
Richard Henderson0eff2df2019-02-23 11:35:36 -08001089 toks = []
1090 continue
1091
1092 # Everything else should have current indentation.
1093 if indent != nesting:
1094 error(start_lineno, 'indentation ', indent, ' != ', nesting)
1095
1096 # Start nesting?
Richard Henderson067e8b02020-05-18 08:45:32 -07001097 if name == '{' or name == '[':
Richard Henderson0eff2df2019-02-23 11:35:36 -08001098 if len(toks) != 0:
1099 error(start_lineno, 'extra tokens after open brace')
Richard Henderson08561fc2020-05-17 10:14:11 -07001100
Richard Henderson067e8b02020-05-18 08:45:32 -07001101 if name == '{':
1102 nested_pat = IncMultiPattern(start_lineno)
1103 else:
1104 nested_pat = ExcMultiPattern(start_lineno)
Richard Henderson08561fc2020-05-17 10:14:11 -07001105 parent_pat.pats.append(nested_pat)
1106 nesting_pats.append(parent_pat)
1107 parent_pat = nested_pat
1108
Richard Henderson0eff2df2019-02-23 11:35:36 -08001109 nesting += 2
1110 toks = []
1111 continue
1112
Richard Henderson568ae7e2017-12-07 12:44:09 -08001113 # Determine the type of object needing to be parsed.
Richard Hendersonacfdd232020-09-03 12:23:34 -07001114 if re.fullmatch(re_fld_ident, name):
Richard Henderson0eff2df2019-02-23 11:35:36 -08001115 parse_field(start_lineno, name[1:], toks)
Richard Hendersonacfdd232020-09-03 12:23:34 -07001116 elif re.fullmatch(re_arg_ident, name):
Richard Henderson0eff2df2019-02-23 11:35:36 -08001117 parse_arguments(start_lineno, name[1:], toks)
Richard Hendersonacfdd232020-09-03 12:23:34 -07001118 elif re.fullmatch(re_fmt_ident, name):
Richard Henderson08561fc2020-05-17 10:14:11 -07001119 parse_generic(start_lineno, None, name[1:], toks)
Richard Hendersonacfdd232020-09-03 12:23:34 -07001120 elif re.fullmatch(re_pat_ident, name):
Richard Henderson08561fc2020-05-17 10:14:11 -07001121 parse_generic(start_lineno, parent_pat, name, toks)
Richard Hendersonacfdd232020-09-03 12:23:34 -07001122 else:
Richard Henderson9f6e2b42021-04-28 16:37:02 -07001123 error(lineno, f'invalid token "{name}"')
Richard Henderson568ae7e2017-12-07 12:44:09 -08001124 toks = []
Richard Henderson067e8b02020-05-18 08:45:32 -07001125
1126 if nesting != 0:
1127 error(lineno, 'missing close brace')
Richard Henderson568ae7e2017-12-07 12:44:09 -08001128# end parse_file
1129
1130
Richard Henderson70e07112019-01-31 11:34:11 -08001131class SizeTree:
1132 """Class representing a node in a size decode tree"""
1133
1134 def __init__(self, m, w):
1135 self.mask = m
1136 self.subs = []
1137 self.base = None
1138 self.width = w
1139
1140 def str1(self, i):
1141 ind = str_indent(i)
Richard Hendersonc7cefe62021-04-28 16:27:56 -07001142 r = ind + whex(self.mask) + ' [\n'
Richard Henderson70e07112019-01-31 11:34:11 -08001143 for (b, s) in self.subs:
Richard Hendersonc7cefe62021-04-28 16:27:56 -07001144 r += ind + f' {whex(b)}:\n'
Richard Henderson70e07112019-01-31 11:34:11 -08001145 r += s.str1(i + 4) + '\n'
1146 r += ind + ']'
1147 return r
1148
1149 def __str__(self):
1150 return self.str1(0)
1151
1152 def output_code(self, i, extracted, outerbits, outermask):
1153 ind = str_indent(i)
1154
1155 # If we need to load more bytes to test, do so now.
1156 if extracted < self.width:
Richard Henderson9f6e2b42021-04-28 16:37:02 -07001157 output(ind, f'insn = {decode_function}_load_bytes',
1158 f'(ctx, insn, {extracted // 8}, {self.width // 8});\n')
Richard Henderson70e07112019-01-31 11:34:11 -08001159 extracted = self.width
1160
1161 # Attempt to aid the compiler in producing compact switch statements.
1162 # If the bits in the mask are contiguous, extract them.
1163 sh = is_contiguous(self.mask)
1164 if sh > 0:
1165 # Propagate SH down into the local functions.
1166 def str_switch(b, sh=sh):
Richard Hendersonc7cefe62021-04-28 16:27:56 -07001167 return f'(insn >> {sh}) & {b >> sh:#x}'
Richard Henderson70e07112019-01-31 11:34:11 -08001168
1169 def str_case(b, sh=sh):
Richard Hendersonc7cefe62021-04-28 16:27:56 -07001170 return hex(b >> sh)
Richard Henderson70e07112019-01-31 11:34:11 -08001171 else:
1172 def str_switch(b):
Richard Hendersonc7cefe62021-04-28 16:27:56 -07001173 return f'insn & {whexC(b)}'
Richard Henderson70e07112019-01-31 11:34:11 -08001174
1175 def str_case(b):
Richard Hendersonc7cefe62021-04-28 16:27:56 -07001176 return whexC(b)
Richard Henderson70e07112019-01-31 11:34:11 -08001177
1178 output(ind, 'switch (', str_switch(self.mask), ') {\n')
1179 for b, s in sorted(self.subs):
1180 innermask = outermask | self.mask
1181 innerbits = outerbits | b
1182 output(ind, 'case ', str_case(b), ':\n')
1183 output(ind, ' /* ',
1184 str_match_bits(innerbits, innermask), ' */\n')
1185 s.output_code(i + 4, extracted, innerbits, innermask)
1186 output(ind, '}\n')
1187 output(ind, 'return insn;\n')
1188# end SizeTree
1189
1190class SizeLeaf:
1191 """Class representing a leaf node in a size decode tree"""
1192
1193 def __init__(self, m, w):
1194 self.mask = m
1195 self.width = w
1196
1197 def str1(self, i):
Richard Hendersonc7cefe62021-04-28 16:27:56 -07001198 return str_indent(i) + whex(self.mask)
Richard Henderson70e07112019-01-31 11:34:11 -08001199
1200 def __str__(self):
1201 return self.str1(0)
1202
1203 def output_code(self, i, extracted, outerbits, outermask):
1204 global decode_function
1205 ind = str_indent(i)
1206
1207 # If we need to load more bytes, do so now.
1208 if extracted < self.width:
Richard Henderson9f6e2b42021-04-28 16:37:02 -07001209 output(ind, f'insn = {decode_function}_load_bytes',
1210 f'(ctx, insn, {extracted // 8}, {self.width // 8});\n')
Richard Henderson70e07112019-01-31 11:34:11 -08001211 extracted = self.width
1212 output(ind, 'return insn;\n')
1213# end SizeLeaf
1214
1215
1216def build_size_tree(pats, width, outerbits, outermask):
1217 global insnwidth
1218
1219 # Collect the mask of bits that are fixed in this width
1220 innermask = 0xff << (insnwidth - width)
1221 innermask &= ~outermask
1222 minwidth = None
1223 onewidth = True
1224 for i in pats:
1225 innermask &= i.fixedmask
1226 if minwidth is None:
1227 minwidth = i.width
1228 elif minwidth != i.width:
1229 onewidth = False;
1230 if minwidth < i.width:
1231 minwidth = i.width
1232
1233 if onewidth:
1234 return SizeLeaf(innermask, minwidth)
1235
1236 if innermask == 0:
1237 if width < minwidth:
1238 return build_size_tree(pats, width + 8, outerbits, outermask)
1239
1240 pnames = []
1241 for p in pats:
1242 pnames.append(p.name + ':' + p.file + ':' + str(p.lineno))
1243 error_with_file(pats[0].file, pats[0].lineno,
Richard Henderson9f6e2b42021-04-28 16:37:02 -07001244 f'overlapping patterns size {width}:', pnames)
Richard Henderson70e07112019-01-31 11:34:11 -08001245
1246 bins = {}
1247 for i in pats:
1248 fb = i.fixedbits & innermask
1249 if fb in bins:
1250 bins[fb].append(i)
1251 else:
1252 bins[fb] = [i]
1253
1254 fullmask = outermask | innermask
1255 lens = sorted(bins.keys())
1256 if len(lens) == 1:
1257 b = lens[0]
1258 return build_size_tree(bins[b], width + 8, b | outerbits, fullmask)
1259
1260 r = SizeTree(innermask, width)
1261 for b, l in bins.items():
1262 s = build_size_tree(l, width, b | outerbits, fullmask)
1263 r.subs.append((b, s))
1264 return r
1265# end build_size_tree
1266
1267
Richard Henderson70e07112019-01-31 11:34:11 -08001268def prop_size(tree):
1269 """Propagate minimum widths up the decode size tree"""
1270
1271 if isinstance(tree, SizeTree):
1272 min = None
1273 for (b, s) in tree.subs:
1274 width = prop_size(s)
1275 if min is None or min > width:
1276 min = width
1277 assert min >= tree.width
1278 tree.width = min
1279 else:
1280 min = tree.width
1281 return min
1282# end prop_size
1283
1284
Richard Henderson568ae7e2017-12-07 12:44:09 -08001285def main():
1286 global arguments
1287 global formats
Richard Henderson0eff2df2019-02-23 11:35:36 -08001288 global allpatterns
Richard Henderson568ae7e2017-12-07 12:44:09 -08001289 global translate_scope
1290 global translate_prefix
1291 global output_fd
1292 global output_file
1293 global input_file
1294 global insnwidth
1295 global insntype
Bastian Koppelmann83d7c402018-03-19 12:58:46 +01001296 global insnmask
Richard Hendersonabd04f92018-10-23 10:26:25 +01001297 global decode_function
Luis Fernando Fujita Pires60c425f2021-04-07 22:18:49 +00001298 global bitop_width
Richard Henderson17560e92019-01-30 18:01:29 -08001299 global variablewidth
Richard Hendersonc6920792019-08-09 08:12:50 -07001300 global anyextern
Richard Henderson9b5acc52023-05-25 18:04:05 -07001301 global testforerror
Richard Henderson568ae7e2017-12-07 12:44:09 -08001302
Richard Henderson568ae7e2017-12-07 12:44:09 -08001303 decode_scope = 'static '
1304
Richard Hendersoncd3e7fc2019-02-23 17:44:31 -08001305 long_opts = ['decode=', 'translate=', 'output=', 'insnwidth=',
Richard Henderson9b5acc52023-05-25 18:04:05 -07001306 'static-decode=', 'varinsnwidth=', 'test-for-error']
Richard Henderson568ae7e2017-12-07 12:44:09 -08001307 try:
Paolo Bonziniabff1ab2020-08-07 12:10:23 +02001308 (opts, args) = getopt.gnu_getopt(sys.argv[1:], 'o:vw:', long_opts)
Richard Henderson568ae7e2017-12-07 12:44:09 -08001309 except getopt.GetoptError as err:
1310 error(0, err)
1311 for o, a in opts:
1312 if o in ('-o', '--output'):
1313 output_file = a
1314 elif o == '--decode':
1315 decode_function = a
1316 decode_scope = ''
Richard Hendersoncd3e7fc2019-02-23 17:44:31 -08001317 elif o == '--static-decode':
1318 decode_function = a
Richard Henderson568ae7e2017-12-07 12:44:09 -08001319 elif o == '--translate':
1320 translate_prefix = a
1321 translate_scope = ''
Richard Henderson17560e92019-01-30 18:01:29 -08001322 elif o in ('-w', '--insnwidth', '--varinsnwidth'):
1323 if o == '--varinsnwidth':
1324 variablewidth = True
Richard Henderson568ae7e2017-12-07 12:44:09 -08001325 insnwidth = int(a)
1326 if insnwidth == 16:
1327 insntype = 'uint16_t'
1328 insnmask = 0xffff
Luis Fernando Fujita Pires60c425f2021-04-07 22:18:49 +00001329 elif insnwidth == 64:
1330 insntype = 'uint64_t'
1331 insnmask = 0xffffffffffffffff
1332 bitop_width = 64
Richard Henderson568ae7e2017-12-07 12:44:09 -08001333 elif insnwidth != 32:
1334 error(0, 'cannot handle insns of width', insnwidth)
Richard Henderson9b5acc52023-05-25 18:04:05 -07001335 elif o == '--test-for-error':
1336 testforerror = True
Richard Henderson568ae7e2017-12-07 12:44:09 -08001337 else:
1338 assert False, 'unhandled option'
1339
1340 if len(args) < 1:
1341 error(0, 'missing input file')
Richard Henderson08561fc2020-05-17 10:14:11 -07001342
1343 toppat = ExcMultiPattern(0)
1344
Richard Henderson6699ae62018-10-26 14:59:43 +01001345 for filename in args:
1346 input_file = filename
Philippe Mathieu-Daudé4caceca2021-01-10 01:02:40 +01001347 f = open(filename, 'rt', encoding='utf-8')
Richard Henderson08561fc2020-05-17 10:14:11 -07001348 parse_file(f, toppat)
Richard Henderson6699ae62018-10-26 14:59:43 +01001349 f.close()
Richard Henderson568ae7e2017-12-07 12:44:09 -08001350
Richard Henderson08561fc2020-05-17 10:14:11 -07001351 # We do not want to compute masks for toppat, because those masks
1352 # are used as a starting point for build_tree. For toppat, we must
1353 # insist that decode begins from naught.
1354 for i in toppat.pats:
1355 i.prop_masks()
Richard Henderson70e07112019-01-31 11:34:11 -08001356
Richard Henderson08561fc2020-05-17 10:14:11 -07001357 toppat.build_tree()
1358 toppat.prop_format()
1359
1360 if variablewidth:
1361 for i in toppat.pats:
1362 i.prop_width()
1363 stree = build_size_tree(toppat.pats, 8, 0, 0)
1364 prop_size(stree)
Richard Henderson568ae7e2017-12-07 12:44:09 -08001365
1366 if output_file:
Philippe Mathieu-Daudé4caceca2021-01-10 01:02:40 +01001367 output_fd = open(output_file, 'wt', encoding='utf-8')
Richard Henderson568ae7e2017-12-07 12:44:09 -08001368 else:
Philippe Mathieu-Daudé4caceca2021-01-10 01:02:40 +01001369 output_fd = io.TextIOWrapper(sys.stdout.buffer,
1370 encoding=sys.stdout.encoding,
1371 errors="ignore")
Richard Henderson568ae7e2017-12-07 12:44:09 -08001372
1373 output_autogen()
1374 for n in sorted(arguments.keys()):
1375 f = arguments[n]
1376 f.output_def()
1377
1378 # A single translate function can be invoked for different patterns.
1379 # Make sure that the argument sets are the same, and declare the
1380 # function only once.
Richard Hendersonc6920792019-08-09 08:12:50 -07001381 #
1382 # If we're sharing formats, we're likely also sharing trans_* functions,
1383 # but we can't tell which ones. Prevent issues from the compiler by
1384 # suppressing redundant declaration warnings.
1385 if anyextern:
Thomas Huth7aa12aa2020-07-08 20:19:44 +02001386 output("#pragma GCC diagnostic push\n",
1387 "#pragma GCC diagnostic ignored \"-Wredundant-decls\"\n",
1388 "#ifdef __clang__\n"
Richard Hendersonc6920792019-08-09 08:12:50 -07001389 "# pragma GCC diagnostic ignored \"-Wtypedef-redefinition\"\n",
Richard Hendersonc6920792019-08-09 08:12:50 -07001390 "#endif\n\n")
1391
Richard Henderson568ae7e2017-12-07 12:44:09 -08001392 out_pats = {}
Richard Henderson0eff2df2019-02-23 11:35:36 -08001393 for i in allpatterns:
Richard Henderson568ae7e2017-12-07 12:44:09 -08001394 if i.name in out_pats:
1395 p = out_pats[i.name]
1396 if i.base.base != p.base.base:
1397 error(0, i.name, ' has conflicting argument sets')
1398 else:
1399 i.output_decl()
1400 out_pats[i.name] = i
1401 output('\n')
1402
Richard Hendersonc6920792019-08-09 08:12:50 -07001403 if anyextern:
Thomas Huth7aa12aa2020-07-08 20:19:44 +02001404 output("#pragma GCC diagnostic pop\n\n")
Richard Hendersonc6920792019-08-09 08:12:50 -07001405
Richard Henderson568ae7e2017-12-07 12:44:09 -08001406 for n in sorted(formats.keys()):
1407 f = formats[n]
1408 f.output_extract()
1409
1410 output(decode_scope, 'bool ', decode_function,
1411 '(DisasContext *ctx, ', insntype, ' insn)\n{\n')
1412
1413 i4 = str_indent(4)
Richard Henderson568ae7e2017-12-07 12:44:09 -08001414
Richard Henderson82bfac12019-02-27 21:37:32 -08001415 if len(allpatterns) != 0:
1416 output(i4, 'union {\n')
1417 for n in sorted(arguments.keys()):
1418 f = arguments[n]
1419 output(i4, i4, f.struct_name(), ' f_', f.name, ';\n')
1420 output(i4, '} u;\n\n')
Richard Henderson08561fc2020-05-17 10:14:11 -07001421 toppat.output_code(4, False, 0, 0)
Richard Henderson82bfac12019-02-27 21:37:32 -08001422
Richard Hendersoneb6b87f2019-02-23 08:57:46 -08001423 output(i4, 'return false;\n')
Richard Henderson568ae7e2017-12-07 12:44:09 -08001424 output('}\n')
1425
Richard Henderson70e07112019-01-31 11:34:11 -08001426 if variablewidth:
1427 output('\n', decode_scope, insntype, ' ', decode_function,
1428 '_load(DisasContext *ctx)\n{\n',
1429 ' ', insntype, ' insn = 0;\n\n')
1430 stree.output_code(4, 0, 0, 0)
1431 output('}\n')
1432
Richard Henderson568ae7e2017-12-07 12:44:09 -08001433 if output_file:
1434 output_fd.close()
Richard Henderson9b5acc52023-05-25 18:04:05 -07001435 exit(1 if testforerror else 0)
Richard Henderson568ae7e2017-12-07 12:44:09 -08001436# end main
1437
1438
1439if __name__ == '__main__':
1440 main()