blob: 0861e5d503c89ac480b2cc401ec7d7be99040259 [file] [log] [blame]
Philippe Mathieu-Daudé3d004a32020-01-30 17:32:25 +01001#!/usr/bin/env python3
Richard Henderson568ae7e2017-12-07 12:44:09 -08002# Copyright (c) 2018 Linaro Limited
3#
4# This library is free software; you can redistribute it and/or
5# modify it under the terms of the GNU Lesser General Public
6# License as published by the Free Software Foundation; either
Chetan Pantd6ea4232020-10-23 12:33:53 +00007# version 2.1 of the License, or (at your option) any later version.
Richard Henderson568ae7e2017-12-07 12:44:09 -08008#
9# This library is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12# Lesser General Public License for more details.
13#
14# You should have received a copy of the GNU Lesser General Public
15# License along with this library; if not, see <http://www.gnu.org/licenses/>.
16#
17
18#
19# Generate a decoding tree from a specification file.
Richard Henderson3fdbf5d2019-02-23 13:00:10 -080020# See the syntax and semantics in docs/devel/decodetree.rst.
Richard Henderson568ae7e2017-12-07 12:44:09 -080021#
22
Philippe Mathieu-Daudé4caceca2021-01-10 01:02:40 +010023import io
Richard Henderson568ae7e2017-12-07 12:44:09 -080024import os
25import re
26import sys
27import getopt
Richard Henderson568ae7e2017-12-07 12:44:09 -080028
29insnwidth = 32
30insnmask = 0xffffffff
Richard Henderson17560e92019-01-30 18:01:29 -080031variablewidth = False
Richard Henderson568ae7e2017-12-07 12:44:09 -080032fields = {}
33arguments = {}
34formats = {}
Richard Henderson0eff2df2019-02-23 11:35:36 -080035allpatterns = []
Richard Hendersonc6920792019-08-09 08:12:50 -070036anyextern = False
Richard Henderson568ae7e2017-12-07 12:44:09 -080037
38translate_prefix = 'trans'
39translate_scope = 'static '
40input_file = ''
41output_file = None
42output_fd = None
43insntype = 'uint32_t'
Richard Hendersonabd04f92018-10-23 10:26:25 +010044decode_function = 'decode'
Richard Henderson568ae7e2017-12-07 12:44:09 -080045
Richard Hendersonacfdd232020-09-03 12:23:34 -070046# An identifier for C.
47re_C_ident = '[a-zA-Z][a-zA-Z0-9_]*'
Richard Henderson568ae7e2017-12-07 12:44:09 -080048
Richard Hendersonacfdd232020-09-03 12:23:34 -070049# Identifiers for Arguments, Fields, Formats and Patterns.
50re_arg_ident = '&[a-zA-Z0-9_]*'
51re_fld_ident = '%[a-zA-Z0-9_]*'
52re_fmt_ident = '@[a-zA-Z0-9_]*'
53re_pat_ident = '[a-zA-Z0-9_]*'
Richard Henderson568ae7e2017-12-07 12:44:09 -080054
Richard Henderson6699ae62018-10-26 14:59:43 +010055def error_with_file(file, lineno, *args):
Richard Henderson568ae7e2017-12-07 12:44:09 -080056 """Print an error message from file:line and args and exit."""
57 global output_file
58 global output_fd
59
Richard Henderson2fd51b12020-05-15 14:48:54 -070060 prefix = ''
61 if file:
62 prefix += '{0}:'.format(file)
Richard Henderson568ae7e2017-12-07 12:44:09 -080063 if lineno:
Richard Henderson2fd51b12020-05-15 14:48:54 -070064 prefix += '{0}:'.format(lineno)
65 if prefix:
66 prefix += ' '
67 print(prefix, end='error: ', file=sys.stderr)
68 print(*args, file=sys.stderr)
69
Richard Henderson568ae7e2017-12-07 12:44:09 -080070 if output_file and output_fd:
71 output_fd.close()
72 os.remove(output_file)
73 exit(1)
Richard Henderson2fd51b12020-05-15 14:48:54 -070074# end error_with_file
75
Richard Henderson568ae7e2017-12-07 12:44:09 -080076
Richard Henderson6699ae62018-10-26 14:59:43 +010077def error(lineno, *args):
Richard Henderson2fd51b12020-05-15 14:48:54 -070078 error_with_file(input_file, lineno, *args)
79# end error
80
Richard Henderson568ae7e2017-12-07 12:44:09 -080081
82def output(*args):
83 global output_fd
84 for a in args:
85 output_fd.write(a)
86
87
Richard Henderson568ae7e2017-12-07 12:44:09 -080088def output_autogen():
89 output('/* This file is autogenerated by scripts/decodetree.py. */\n\n')
90
91
92def str_indent(c):
93 """Return a string with C spaces"""
94 return ' ' * c
95
96
97def str_fields(fields):
zhaolichang65fdb3c2020-09-17 15:50:23 +080098 """Return a string uniquely identifying FIELDS"""
Richard Henderson568ae7e2017-12-07 12:44:09 -080099 r = ''
100 for n in sorted(fields.keys()):
101 r += '_' + n
102 return r[1:]
103
104
Richard Hendersonc7cefe62021-04-28 16:27:56 -0700105def whex(val):
106 """Return a hex string for val padded for insnwidth"""
107 global insnwidth
108 return f'0x{val:0{insnwidth // 4}x}'
109
110
111def whexC(val):
112 """Return a hex string for val padded for insnwidth,
113 and with the proper suffix for a C constant."""
114 suffix = ''
115 if val >= 0x80000000:
116 suffix = 'u'
117 return whex(val) + suffix
118
119
Richard Henderson568ae7e2017-12-07 12:44:09 -0800120def str_match_bits(bits, mask):
121 """Return a string pretty-printing BITS/MASK"""
122 global insnwidth
123
124 i = 1 << (insnwidth - 1)
125 space = 0x01010100
126 r = ''
127 while i != 0:
128 if i & mask:
129 if i & bits:
130 r += '1'
131 else:
132 r += '0'
133 else:
134 r += '.'
135 if i & space:
136 r += ' '
137 i >>= 1
138 return r
139
140
141def is_pow2(x):
142 """Return true iff X is equal to a power of 2."""
143 return (x & (x - 1)) == 0
144
145
146def ctz(x):
147 """Return the number of times 2 factors into X."""
Richard Hendersonb44b3442020-05-16 13:15:02 -0700148 assert x != 0
Richard Henderson568ae7e2017-12-07 12:44:09 -0800149 r = 0
150 while ((x >> r) & 1) == 0:
151 r += 1
152 return r
153
154
155def is_contiguous(bits):
Richard Hendersonb44b3442020-05-16 13:15:02 -0700156 if bits == 0:
157 return -1
Richard Henderson568ae7e2017-12-07 12:44:09 -0800158 shift = ctz(bits)
159 if is_pow2((bits >> shift) + 1):
160 return shift
161 else:
162 return -1
163
164
165def eq_fields_for_args(flds_a, flds_b):
166 if len(flds_a) != len(flds_b):
167 return False
168 for k, a in flds_a.items():
169 if k not in flds_b:
170 return False
171 return True
172
173
174def eq_fields_for_fmts(flds_a, flds_b):
175 if len(flds_a) != len(flds_b):
176 return False
177 for k, a in flds_a.items():
178 if k not in flds_b:
179 return False
180 b = flds_b[k]
181 if a.__class__ != b.__class__ or a != b:
182 return False
183 return True
184
185
186class Field:
187 """Class representing a simple instruction field"""
188 def __init__(self, sign, pos, len):
189 self.sign = sign
190 self.pos = pos
191 self.len = len
192 self.mask = ((1 << len) - 1) << pos
193
194 def __str__(self):
195 if self.sign:
196 s = 's'
197 else:
198 s = ''
Cleber Rosacbcdf1a2018-10-04 12:18:50 -0400199 return str(self.pos) + ':' + s + str(self.len)
Richard Henderson568ae7e2017-12-07 12:44:09 -0800200
201 def str_extract(self):
202 if self.sign:
203 extr = 'sextract32'
204 else:
205 extr = 'extract32'
206 return '{0}(insn, {1}, {2})'.format(extr, self.pos, self.len)
207
208 def __eq__(self, other):
Richard Henderson2c7d4422019-06-11 16:39:41 +0100209 return self.sign == other.sign and self.mask == other.mask
Richard Henderson568ae7e2017-12-07 12:44:09 -0800210
211 def __ne__(self, other):
212 return not self.__eq__(other)
213# end Field
214
215
216class MultiField:
217 """Class representing a compound instruction field"""
218 def __init__(self, subs, mask):
219 self.subs = subs
220 self.sign = subs[0].sign
221 self.mask = mask
222
223 def __str__(self):
224 return str(self.subs)
225
226 def str_extract(self):
227 ret = '0'
228 pos = 0
229 for f in reversed(self.subs):
230 if pos == 0:
231 ret = f.str_extract()
232 else:
233 ret = 'deposit32({0}, {1}, {2}, {3})' \
234 .format(ret, pos, 32 - pos, f.str_extract())
235 pos += f.len
236 return ret
237
238 def __ne__(self, other):
239 if len(self.subs) != len(other.subs):
240 return True
241 for a, b in zip(self.subs, other.subs):
242 if a.__class__ != b.__class__ or a != b:
243 return True
244 return False
245
246 def __eq__(self, other):
247 return not self.__ne__(other)
248# end MultiField
249
250
251class ConstField:
252 """Class representing an argument field with constant value"""
253 def __init__(self, value):
254 self.value = value
255 self.mask = 0
256 self.sign = value < 0
257
258 def __str__(self):
259 return str(self.value)
260
261 def str_extract(self):
262 return str(self.value)
263
264 def __cmp__(self, other):
265 return self.value - other.value
266# end ConstField
267
268
269class FunctionField:
Richard Henderson94597b62019-07-22 17:02:56 -0700270 """Class representing a field passed through a function"""
Richard Henderson568ae7e2017-12-07 12:44:09 -0800271 def __init__(self, func, base):
272 self.mask = base.mask
273 self.sign = base.sign
274 self.base = base
275 self.func = func
276
277 def __str__(self):
278 return self.func + '(' + str(self.base) + ')'
279
280 def str_extract(self):
Richard Henderson451e4ff2019-03-20 19:21:31 -0700281 return self.func + '(ctx, ' + self.base.str_extract() + ')'
Richard Henderson568ae7e2017-12-07 12:44:09 -0800282
283 def __eq__(self, other):
284 return self.func == other.func and self.base == other.base
285
286 def __ne__(self, other):
287 return not self.__eq__(other)
288# end FunctionField
289
290
Richard Henderson94597b62019-07-22 17:02:56 -0700291class ParameterField:
292 """Class representing a pseudo-field read from a function"""
293 def __init__(self, func):
294 self.mask = 0
295 self.sign = 0
296 self.func = func
297
298 def __str__(self):
299 return self.func
300
301 def str_extract(self):
302 return self.func + '(ctx)'
303
304 def __eq__(self, other):
305 return self.func == other.func
306
307 def __ne__(self, other):
308 return not self.__eq__(other)
309# end ParameterField
310
311
Richard Henderson568ae7e2017-12-07 12:44:09 -0800312class Arguments:
313 """Class representing the extracted fields of a format"""
Richard Hendersonabd04f92018-10-23 10:26:25 +0100314 def __init__(self, nm, flds, extern):
Richard Henderson568ae7e2017-12-07 12:44:09 -0800315 self.name = nm
Richard Hendersonabd04f92018-10-23 10:26:25 +0100316 self.extern = extern
Richard Henderson568ae7e2017-12-07 12:44:09 -0800317 self.fields = sorted(flds)
318
319 def __str__(self):
320 return self.name + ' ' + str(self.fields)
321
322 def struct_name(self):
323 return 'arg_' + self.name
324
325 def output_def(self):
Richard Hendersonabd04f92018-10-23 10:26:25 +0100326 if not self.extern:
327 output('typedef struct {\n')
328 for n in self.fields:
329 output(' int ', n, ';\n')
330 output('} ', self.struct_name(), ';\n\n')
Richard Henderson568ae7e2017-12-07 12:44:09 -0800331# end Arguments
332
333
334class General:
335 """Common code between instruction formats and instruction patterns"""
Richard Henderson17560e92019-01-30 18:01:29 -0800336 def __init__(self, name, lineno, base, fixb, fixm, udfm, fldm, flds, w):
Richard Henderson568ae7e2017-12-07 12:44:09 -0800337 self.name = name
Richard Henderson6699ae62018-10-26 14:59:43 +0100338 self.file = input_file
Richard Henderson568ae7e2017-12-07 12:44:09 -0800339 self.lineno = lineno
340 self.base = base
341 self.fixedbits = fixb
342 self.fixedmask = fixm
343 self.undefmask = udfm
344 self.fieldmask = fldm
345 self.fields = flds
Richard Henderson17560e92019-01-30 18:01:29 -0800346 self.width = w
Richard Henderson568ae7e2017-12-07 12:44:09 -0800347
348 def __str__(self):
Richard Henderson0eff2df2019-02-23 11:35:36 -0800349 return self.name + ' ' + str_match_bits(self.fixedbits, self.fixedmask)
Richard Henderson568ae7e2017-12-07 12:44:09 -0800350
351 def str1(self, i):
352 return str_indent(i) + self.__str__()
353# end General
354
355
356class Format(General):
357 """Class representing an instruction format"""
358
359 def extract_name(self):
Richard Henderson71ecf792019-02-28 14:45:50 -0800360 global decode_function
361 return decode_function + '_extract_' + self.name
Richard Henderson568ae7e2017-12-07 12:44:09 -0800362
363 def output_extract(self):
Richard Henderson451e4ff2019-03-20 19:21:31 -0700364 output('static void ', self.extract_name(), '(DisasContext *ctx, ',
Richard Henderson568ae7e2017-12-07 12:44:09 -0800365 self.base.struct_name(), ' *a, ', insntype, ' insn)\n{\n')
366 for n, f in self.fields.items():
367 output(' a->', n, ' = ', f.str_extract(), ';\n')
368 output('}\n\n')
369# end Format
370
371
372class Pattern(General):
373 """Class representing an instruction pattern"""
374
375 def output_decl(self):
376 global translate_scope
377 global translate_prefix
378 output('typedef ', self.base.base.struct_name(),
379 ' arg_', self.name, ';\n')
Richard Henderson76805592018-03-02 10:45:35 +0000380 output(translate_scope, 'bool ', translate_prefix, '_', self.name,
Richard Henderson3a7be552018-10-23 11:05:27 +0100381 '(DisasContext *ctx, arg_', self.name, ' *a);\n')
Richard Henderson568ae7e2017-12-07 12:44:09 -0800382
383 def output_code(self, i, extracted, outerbits, outermask):
384 global translate_prefix
385 ind = str_indent(i)
386 arg = self.base.base.name
Richard Henderson6699ae62018-10-26 14:59:43 +0100387 output(ind, '/* ', self.file, ':', str(self.lineno), ' */\n')
Richard Henderson568ae7e2017-12-07 12:44:09 -0800388 if not extracted:
Richard Henderson451e4ff2019-03-20 19:21:31 -0700389 output(ind, self.base.extract_name(),
390 '(ctx, &u.f_', arg, ', insn);\n')
Richard Henderson568ae7e2017-12-07 12:44:09 -0800391 for n, f in self.fields.items():
392 output(ind, 'u.f_', arg, '.', n, ' = ', f.str_extract(), ';\n')
Richard Hendersoneb6b87f2019-02-23 08:57:46 -0800393 output(ind, 'if (', translate_prefix, '_', self.name,
394 '(ctx, &u.f_', arg, ')) return true;\n')
Richard Henderson08561fc2020-05-17 10:14:11 -0700395
396 # Normal patterns do not have children.
397 def build_tree(self):
398 return
399 def prop_masks(self):
400 return
401 def prop_format(self):
402 return
403 def prop_width(self):
404 return
405
Richard Henderson568ae7e2017-12-07 12:44:09 -0800406# end Pattern
407
408
Richard Hendersondf630442020-05-16 11:19:45 -0700409class MultiPattern(General):
410 """Class representing a set of instruction patterns"""
411
Richard Henderson08561fc2020-05-17 10:14:11 -0700412 def __init__(self, lineno):
Richard Hendersondf630442020-05-16 11:19:45 -0700413 self.file = input_file
414 self.lineno = lineno
Richard Henderson08561fc2020-05-17 10:14:11 -0700415 self.pats = []
Richard Hendersondf630442020-05-16 11:19:45 -0700416 self.base = None
417 self.fixedbits = 0
418 self.fixedmask = 0
419 self.undefmask = 0
420 self.width = None
421
422 def __str__(self):
423 r = 'group'
424 if self.fixedbits is not None:
425 r += ' ' + str_match_bits(self.fixedbits, self.fixedmask)
426 return r
427
428 def output_decl(self):
429 for p in self.pats:
430 p.output_decl()
Richard Henderson08561fc2020-05-17 10:14:11 -0700431
432 def prop_masks(self):
433 global insnmask
434
435 fixedmask = insnmask
436 undefmask = insnmask
437
438 # Collect fixedmask/undefmask for all of the children.
439 for p in self.pats:
440 p.prop_masks()
441 fixedmask &= p.fixedmask
442 undefmask &= p.undefmask
443
444 # Widen fixedmask until all fixedbits match
445 repeat = True
446 fixedbits = 0
447 while repeat and fixedmask != 0:
448 fixedbits = None
449 for p in self.pats:
450 thisbits = p.fixedbits & fixedmask
451 if fixedbits is None:
452 fixedbits = thisbits
453 elif fixedbits != thisbits:
454 fixedmask &= ~(fixedbits ^ thisbits)
455 break
456 else:
457 repeat = False
458
459 self.fixedbits = fixedbits
460 self.fixedmask = fixedmask
461 self.undefmask = undefmask
462
463 def build_tree(self):
464 for p in self.pats:
465 p.build_tree()
466
467 def prop_format(self):
468 for p in self.pats:
469 p.build_tree()
470
471 def prop_width(self):
472 width = None
473 for p in self.pats:
474 p.prop_width()
475 if width is None:
476 width = p.width
477 elif width != p.width:
478 error_with_file(self.file, self.lineno,
479 'width mismatch in patterns within braces')
480 self.width = width
481
Richard Hendersondf630442020-05-16 11:19:45 -0700482# end MultiPattern
483
484
485class IncMultiPattern(MultiPattern):
Richard Henderson0eff2df2019-02-23 11:35:36 -0800486 """Class representing an overlapping set of instruction patterns"""
487
Richard Henderson0eff2df2019-02-23 11:35:36 -0800488 def output_code(self, i, extracted, outerbits, outermask):
489 global translate_prefix
490 ind = str_indent(i)
491 for p in self.pats:
492 if outermask != p.fixedmask:
493 innermask = p.fixedmask & ~outermask
494 innerbits = p.fixedbits & ~outermask
Richard Hendersonc7cefe62021-04-28 16:27:56 -0700495 output(ind, f'if ((insn & {whexC(innermask)}) == {whexC(innerbits)}) {{\n')
496 output(ind, f' /* {str_match_bits(p.fixedbits, p.fixedmask)} */\n')
Richard Henderson0eff2df2019-02-23 11:35:36 -0800497 p.output_code(i + 4, extracted, p.fixedbits, p.fixedmask)
498 output(ind, '}\n')
499 else:
500 p.output_code(i, extracted, p.fixedbits, p.fixedmask)
Richard Henderson040145c2020-05-16 10:50:43 -0700501#end IncMultiPattern
Richard Henderson0eff2df2019-02-23 11:35:36 -0800502
503
Richard Henderson08561fc2020-05-17 10:14:11 -0700504class Tree:
505 """Class representing a node in a decode tree"""
506
507 def __init__(self, fm, tm):
508 self.fixedmask = fm
509 self.thismask = tm
510 self.subs = []
511 self.base = None
512
513 def str1(self, i):
514 ind = str_indent(i)
Richard Hendersonc7cefe62021-04-28 16:27:56 -0700515 r = ind + whex(self.fixedmask)
Richard Henderson08561fc2020-05-17 10:14:11 -0700516 if self.format:
517 r += ' ' + self.format.name
518 r += ' [\n'
519 for (b, s) in self.subs:
Richard Hendersonc7cefe62021-04-28 16:27:56 -0700520 r += ind + f' {whex(b)}:\n'
Richard Henderson08561fc2020-05-17 10:14:11 -0700521 r += s.str1(i + 4) + '\n'
522 r += ind + ']'
523 return r
524
525 def __str__(self):
526 return self.str1(0)
527
528 def output_code(self, i, extracted, outerbits, outermask):
529 ind = str_indent(i)
530
531 # If we identified all nodes below have the same format,
532 # extract the fields now.
533 if not extracted and self.base:
534 output(ind, self.base.extract_name(),
535 '(ctx, &u.f_', self.base.base.name, ', insn);\n')
536 extracted = True
537
538 # Attempt to aid the compiler in producing compact switch statements.
539 # If the bits in the mask are contiguous, extract them.
540 sh = is_contiguous(self.thismask)
541 if sh > 0:
542 # Propagate SH down into the local functions.
543 def str_switch(b, sh=sh):
Richard Hendersonc7cefe62021-04-28 16:27:56 -0700544 return f'(insn >> {sh}) & {b >> sh:#x}'
Richard Henderson08561fc2020-05-17 10:14:11 -0700545
546 def str_case(b, sh=sh):
Richard Hendersonc7cefe62021-04-28 16:27:56 -0700547 return hex(b >> sh)
Richard Henderson08561fc2020-05-17 10:14:11 -0700548 else:
549 def str_switch(b):
Richard Hendersonc7cefe62021-04-28 16:27:56 -0700550 return f'insn & {whexC(b)}'
Richard Henderson08561fc2020-05-17 10:14:11 -0700551
552 def str_case(b):
Richard Hendersonc7cefe62021-04-28 16:27:56 -0700553 return whexC(b)
Richard Henderson08561fc2020-05-17 10:14:11 -0700554
555 output(ind, 'switch (', str_switch(self.thismask), ') {\n')
556 for b, s in sorted(self.subs):
557 assert (self.thismask & ~s.fixedmask) == 0
558 innermask = outermask | self.thismask
559 innerbits = outerbits | b
560 output(ind, 'case ', str_case(b), ':\n')
561 output(ind, ' /* ',
562 str_match_bits(innerbits, innermask), ' */\n')
563 s.output_code(i + 4, extracted, innerbits, innermask)
Peter Maydell514101c2020-10-19 16:12:52 +0100564 output(ind, ' break;\n')
Richard Henderson08561fc2020-05-17 10:14:11 -0700565 output(ind, '}\n')
566# end Tree
567
568
569class ExcMultiPattern(MultiPattern):
570 """Class representing a non-overlapping set of instruction patterns"""
571
572 def output_code(self, i, extracted, outerbits, outermask):
573 # Defer everything to our decomposed Tree node
574 self.tree.output_code(i, extracted, outerbits, outermask)
575
576 @staticmethod
577 def __build_tree(pats, outerbits, outermask):
578 # Find the intersection of all remaining fixedmask.
579 innermask = ~outermask & insnmask
580 for i in pats:
581 innermask &= i.fixedmask
582
583 if innermask == 0:
584 # Edge condition: One pattern covers the entire insnmask
585 if len(pats) == 1:
586 t = Tree(outermask, innermask)
587 t.subs.append((0, pats[0]))
588 return t
589
590 text = 'overlapping patterns:'
591 for p in pats:
592 text += '\n' + p.file + ':' + str(p.lineno) + ': ' + str(p)
593 error_with_file(pats[0].file, pats[0].lineno, text)
594
595 fullmask = outermask | innermask
596
597 # Sort each element of pats into the bin selected by the mask.
598 bins = {}
599 for i in pats:
600 fb = i.fixedbits & innermask
601 if fb in bins:
602 bins[fb].append(i)
603 else:
604 bins[fb] = [i]
605
606 # We must recurse if any bin has more than one element or if
607 # the single element in the bin has not been fully matched.
608 t = Tree(fullmask, innermask)
609
610 for b, l in bins.items():
611 s = l[0]
612 if len(l) > 1 or s.fixedmask & ~fullmask != 0:
613 s = ExcMultiPattern.__build_tree(l, b | outerbits, fullmask)
614 t.subs.append((b, s))
615
616 return t
617
618 def build_tree(self):
619 super().prop_format()
620 self.tree = self.__build_tree(self.pats, self.fixedbits,
621 self.fixedmask)
622
623 @staticmethod
624 def __prop_format(tree):
625 """Propagate Format objects into the decode tree"""
626
627 # Depth first search.
628 for (b, s) in tree.subs:
629 if isinstance(s, Tree):
630 ExcMultiPattern.__prop_format(s)
631
632 # If all entries in SUBS have the same format, then
633 # propagate that into the tree.
634 f = None
635 for (b, s) in tree.subs:
636 if f is None:
637 f = s.base
638 if f is None:
639 return
640 if f is not s.base:
641 return
642 tree.base = f
643
644 def prop_format(self):
645 super().prop_format()
646 self.__prop_format(self.tree)
647
648# end ExcMultiPattern
649
650
Richard Henderson568ae7e2017-12-07 12:44:09 -0800651def parse_field(lineno, name, toks):
652 """Parse one instruction field from TOKS at LINENO"""
653 global fields
Richard Henderson568ae7e2017-12-07 12:44:09 -0800654 global insnwidth
655
656 # A "simple" field will have only one entry;
657 # a "multifield" will have several.
658 subs = []
659 width = 0
660 func = None
661 for t in toks:
Richard Hendersonacfdd232020-09-03 12:23:34 -0700662 if re.match('^!function=', t):
Richard Henderson568ae7e2017-12-07 12:44:09 -0800663 if func:
664 error(lineno, 'duplicate function')
665 func = t.split('=')
666 func = func[1]
667 continue
668
John Snow2d110c12020-05-13 23:52:30 -0400669 if re.fullmatch('[0-9]+:s[0-9]+', t):
Richard Henderson568ae7e2017-12-07 12:44:09 -0800670 # Signed field extract
671 subtoks = t.split(':s')
672 sign = True
John Snow2d110c12020-05-13 23:52:30 -0400673 elif re.fullmatch('[0-9]+:[0-9]+', t):
Richard Henderson568ae7e2017-12-07 12:44:09 -0800674 # Unsigned field extract
675 subtoks = t.split(':')
676 sign = False
677 else:
678 error(lineno, 'invalid field token "{0}"'.format(t))
679 po = int(subtoks[0])
680 le = int(subtoks[1])
681 if po + le > insnwidth:
682 error(lineno, 'field {0} too large'.format(t))
683 f = Field(sign, po, le)
684 subs.append(f)
685 width += le
686
687 if width > insnwidth:
688 error(lineno, 'field too large')
Richard Henderson94597b62019-07-22 17:02:56 -0700689 if len(subs) == 0:
690 if func:
691 f = ParameterField(func)
692 else:
693 error(lineno, 'field with no value')
Richard Henderson568ae7e2017-12-07 12:44:09 -0800694 else:
Richard Henderson94597b62019-07-22 17:02:56 -0700695 if len(subs) == 1:
696 f = subs[0]
697 else:
698 mask = 0
699 for s in subs:
700 if mask & s.mask:
701 error(lineno, 'field components overlap')
702 mask |= s.mask
703 f = MultiField(subs, mask)
704 if func:
705 f = FunctionField(func, f)
Richard Henderson568ae7e2017-12-07 12:44:09 -0800706
707 if name in fields:
708 error(lineno, 'duplicate field', name)
709 fields[name] = f
710# end parse_field
711
712
713def parse_arguments(lineno, name, toks):
714 """Parse one argument set from TOKS at LINENO"""
715 global arguments
Richard Hendersonacfdd232020-09-03 12:23:34 -0700716 global re_C_ident
Richard Hendersonc6920792019-08-09 08:12:50 -0700717 global anyextern
Richard Henderson568ae7e2017-12-07 12:44:09 -0800718
719 flds = []
Richard Hendersonabd04f92018-10-23 10:26:25 +0100720 extern = False
Richard Henderson568ae7e2017-12-07 12:44:09 -0800721 for t in toks:
John Snow2d110c12020-05-13 23:52:30 -0400722 if re.fullmatch('!extern', t):
Richard Hendersonabd04f92018-10-23 10:26:25 +0100723 extern = True
Richard Hendersonc6920792019-08-09 08:12:50 -0700724 anyextern = True
Richard Hendersonabd04f92018-10-23 10:26:25 +0100725 continue
Richard Hendersonacfdd232020-09-03 12:23:34 -0700726 if not re.fullmatch(re_C_ident, t):
Richard Henderson568ae7e2017-12-07 12:44:09 -0800727 error(lineno, 'invalid argument set token "{0}"'.format(t))
728 if t in flds:
729 error(lineno, 'duplicate argument "{0}"'.format(t))
730 flds.append(t)
731
732 if name in arguments:
733 error(lineno, 'duplicate argument set', name)
Richard Hendersonabd04f92018-10-23 10:26:25 +0100734 arguments[name] = Arguments(name, flds, extern)
Richard Henderson568ae7e2017-12-07 12:44:09 -0800735# end parse_arguments
736
737
738def lookup_field(lineno, name):
739 global fields
740 if name in fields:
741 return fields[name]
742 error(lineno, 'undefined field', name)
743
744
745def add_field(lineno, flds, new_name, f):
746 if new_name in flds:
747 error(lineno, 'duplicate field', new_name)
748 flds[new_name] = f
749 return flds
750
751
752def add_field_byname(lineno, flds, new_name, old_name):
753 return add_field(lineno, flds, new_name, lookup_field(lineno, old_name))
754
755
756def infer_argument_set(flds):
757 global arguments
Richard Hendersonabd04f92018-10-23 10:26:25 +0100758 global decode_function
Richard Henderson568ae7e2017-12-07 12:44:09 -0800759
760 for arg in arguments.values():
761 if eq_fields_for_args(flds, arg.fields):
762 return arg
763
Richard Hendersonabd04f92018-10-23 10:26:25 +0100764 name = decode_function + str(len(arguments))
765 arg = Arguments(name, flds.keys(), False)
Richard Henderson568ae7e2017-12-07 12:44:09 -0800766 arguments[name] = arg
767 return arg
768
769
Richard Henderson17560e92019-01-30 18:01:29 -0800770def infer_format(arg, fieldmask, flds, width):
Richard Henderson568ae7e2017-12-07 12:44:09 -0800771 global arguments
772 global formats
Richard Hendersonabd04f92018-10-23 10:26:25 +0100773 global decode_function
Richard Henderson568ae7e2017-12-07 12:44:09 -0800774
775 const_flds = {}
776 var_flds = {}
777 for n, c in flds.items():
778 if c is ConstField:
779 const_flds[n] = c
780 else:
781 var_flds[n] = c
782
783 # Look for an existing format with the same argument set and fields
784 for fmt in formats.values():
785 if arg and fmt.base != arg:
786 continue
787 if fieldmask != fmt.fieldmask:
788 continue
Richard Henderson17560e92019-01-30 18:01:29 -0800789 if width != fmt.width:
790 continue
Richard Henderson568ae7e2017-12-07 12:44:09 -0800791 if not eq_fields_for_fmts(flds, fmt.fields):
792 continue
793 return (fmt, const_flds)
794
Richard Hendersonabd04f92018-10-23 10:26:25 +0100795 name = decode_function + '_Fmt_' + str(len(formats))
Richard Henderson568ae7e2017-12-07 12:44:09 -0800796 if not arg:
797 arg = infer_argument_set(flds)
798
Richard Henderson17560e92019-01-30 18:01:29 -0800799 fmt = Format(name, 0, arg, 0, 0, 0, fieldmask, var_flds, width)
Richard Henderson568ae7e2017-12-07 12:44:09 -0800800 formats[name] = fmt
801
802 return (fmt, const_flds)
803# end infer_format
804
805
Richard Henderson08561fc2020-05-17 10:14:11 -0700806def parse_generic(lineno, parent_pat, name, toks):
Richard Henderson568ae7e2017-12-07 12:44:09 -0800807 """Parse one instruction format from TOKS at LINENO"""
808 global fields
809 global arguments
810 global formats
Richard Henderson0eff2df2019-02-23 11:35:36 -0800811 global allpatterns
Richard Hendersonacfdd232020-09-03 12:23:34 -0700812 global re_arg_ident
813 global re_fld_ident
814 global re_fmt_ident
815 global re_C_ident
Richard Henderson568ae7e2017-12-07 12:44:09 -0800816 global insnwidth
817 global insnmask
Richard Henderson17560e92019-01-30 18:01:29 -0800818 global variablewidth
Richard Henderson568ae7e2017-12-07 12:44:09 -0800819
Richard Henderson08561fc2020-05-17 10:14:11 -0700820 is_format = parent_pat is None
821
Richard Henderson568ae7e2017-12-07 12:44:09 -0800822 fixedmask = 0
823 fixedbits = 0
824 undefmask = 0
825 width = 0
826 flds = {}
827 arg = None
828 fmt = None
829 for t in toks:
zhaolichang65fdb3c2020-09-17 15:50:23 +0800830 # '&Foo' gives a format an explicit argument set.
Richard Hendersonacfdd232020-09-03 12:23:34 -0700831 if re.fullmatch(re_arg_ident, t):
Richard Henderson568ae7e2017-12-07 12:44:09 -0800832 tt = t[1:]
833 if arg:
834 error(lineno, 'multiple argument sets')
835 if tt in arguments:
836 arg = arguments[tt]
837 else:
838 error(lineno, 'undefined argument set', t)
839 continue
840
841 # '@Foo' gives a pattern an explicit format.
Richard Hendersonacfdd232020-09-03 12:23:34 -0700842 if re.fullmatch(re_fmt_ident, t):
Richard Henderson568ae7e2017-12-07 12:44:09 -0800843 tt = t[1:]
844 if fmt:
845 error(lineno, 'multiple formats')
846 if tt in formats:
847 fmt = formats[tt]
848 else:
849 error(lineno, 'undefined format', t)
850 continue
851
852 # '%Foo' imports a field.
Richard Hendersonacfdd232020-09-03 12:23:34 -0700853 if re.fullmatch(re_fld_ident, t):
Richard Henderson568ae7e2017-12-07 12:44:09 -0800854 tt = t[1:]
855 flds = add_field_byname(lineno, flds, tt, tt)
856 continue
857
858 # 'Foo=%Bar' imports a field with a different name.
Richard Hendersonacfdd232020-09-03 12:23:34 -0700859 if re.fullmatch(re_C_ident + '=' + re_fld_ident, t):
Richard Henderson568ae7e2017-12-07 12:44:09 -0800860 (fname, iname) = t.split('=%')
861 flds = add_field_byname(lineno, flds, fname, iname)
862 continue
863
864 # 'Foo=number' sets an argument field to a constant value
Richard Hendersonacfdd232020-09-03 12:23:34 -0700865 if re.fullmatch(re_C_ident + '=[+-]?[0-9]+', t):
Richard Henderson568ae7e2017-12-07 12:44:09 -0800866 (fname, value) = t.split('=')
867 value = int(value)
868 flds = add_field(lineno, flds, fname, ConstField(value))
869 continue
870
871 # Pattern of 0s, 1s, dots and dashes indicate required zeros,
872 # required ones, or dont-cares.
John Snow2d110c12020-05-13 23:52:30 -0400873 if re.fullmatch('[01.-]+', t):
Richard Henderson568ae7e2017-12-07 12:44:09 -0800874 shift = len(t)
875 fms = t.replace('0', '1')
876 fms = fms.replace('.', '0')
877 fms = fms.replace('-', '0')
878 fbs = t.replace('.', '0')
879 fbs = fbs.replace('-', '0')
880 ubm = t.replace('1', '0')
881 ubm = ubm.replace('.', '0')
882 ubm = ubm.replace('-', '1')
883 fms = int(fms, 2)
884 fbs = int(fbs, 2)
885 ubm = int(ubm, 2)
886 fixedbits = (fixedbits << shift) | fbs
887 fixedmask = (fixedmask << shift) | fms
888 undefmask = (undefmask << shift) | ubm
889 # Otherwise, fieldname:fieldwidth
Richard Hendersonacfdd232020-09-03 12:23:34 -0700890 elif re.fullmatch(re_C_ident + ':s?[0-9]+', t):
Richard Henderson568ae7e2017-12-07 12:44:09 -0800891 (fname, flen) = t.split(':')
892 sign = False
893 if flen[0] == 's':
894 sign = True
895 flen = flen[1:]
896 shift = int(flen, 10)
Richard Henderson2decfc92019-03-05 15:34:41 -0800897 if shift + width > insnwidth:
898 error(lineno, 'field {0} exceeds insnwidth'.format(fname))
Richard Henderson568ae7e2017-12-07 12:44:09 -0800899 f = Field(sign, insnwidth - width - shift, shift)
900 flds = add_field(lineno, flds, fname, f)
901 fixedbits <<= shift
902 fixedmask <<= shift
903 undefmask <<= shift
904 else:
905 error(lineno, 'invalid token "{0}"'.format(t))
906 width += shift
907
Richard Henderson17560e92019-01-30 18:01:29 -0800908 if variablewidth and width < insnwidth and width % 8 == 0:
909 shift = insnwidth - width
910 fixedbits <<= shift
911 fixedmask <<= shift
912 undefmask <<= shift
913 undefmask |= (1 << shift) - 1
914
Richard Henderson568ae7e2017-12-07 12:44:09 -0800915 # We should have filled in all of the bits of the instruction.
Richard Henderson17560e92019-01-30 18:01:29 -0800916 elif not (is_format and width == 0) and width != insnwidth:
Richard Henderson568ae7e2017-12-07 12:44:09 -0800917 error(lineno, 'definition has {0} bits'.format(width))
918
zhaolichang65fdb3c2020-09-17 15:50:23 +0800919 # Do not check for fields overlapping fields; one valid usage
Richard Henderson568ae7e2017-12-07 12:44:09 -0800920 # is to be able to duplicate fields via import.
921 fieldmask = 0
922 for f in flds.values():
923 fieldmask |= f.mask
924
925 # Fix up what we've parsed to match either a format or a pattern.
926 if is_format:
927 # Formats cannot reference formats.
928 if fmt:
929 error(lineno, 'format referencing format')
930 # If an argument set is given, then there should be no fields
931 # without a place to store it.
932 if arg:
933 for f in flds.keys():
934 if f not in arg.fields:
935 error(lineno, 'field {0} not in argument set {1}'
936 .format(f, arg.name))
937 else:
938 arg = infer_argument_set(flds)
939 if name in formats:
940 error(lineno, 'duplicate format name', name)
941 fmt = Format(name, lineno, arg, fixedbits, fixedmask,
Richard Henderson17560e92019-01-30 18:01:29 -0800942 undefmask, fieldmask, flds, width)
Richard Henderson568ae7e2017-12-07 12:44:09 -0800943 formats[name] = fmt
944 else:
945 # Patterns can reference a format ...
946 if fmt:
947 # ... but not an argument simultaneously
948 if arg:
949 error(lineno, 'pattern specifies both format and argument set')
950 if fixedmask & fmt.fixedmask:
951 error(lineno, 'pattern fixed bits overlap format fixed bits')
Richard Henderson17560e92019-01-30 18:01:29 -0800952 if width != fmt.width:
953 error(lineno, 'pattern uses format of different width')
Richard Henderson568ae7e2017-12-07 12:44:09 -0800954 fieldmask |= fmt.fieldmask
955 fixedbits |= fmt.fixedbits
956 fixedmask |= fmt.fixedmask
957 undefmask |= fmt.undefmask
958 else:
Richard Henderson17560e92019-01-30 18:01:29 -0800959 (fmt, flds) = infer_format(arg, fieldmask, flds, width)
Richard Henderson568ae7e2017-12-07 12:44:09 -0800960 arg = fmt.base
961 for f in flds.keys():
962 if f not in arg.fields:
963 error(lineno, 'field {0} not in argument set {1}'
964 .format(f, arg.name))
965 if f in fmt.fields.keys():
966 error(lineno, 'field {0} set by format and pattern'.format(f))
967 for f in arg.fields:
968 if f not in flds.keys() and f not in fmt.fields.keys():
969 error(lineno, 'field {0} not initialized'.format(f))
970 pat = Pattern(name, lineno, fmt, fixedbits, fixedmask,
Richard Henderson17560e92019-01-30 18:01:29 -0800971 undefmask, fieldmask, flds, width)
Richard Henderson08561fc2020-05-17 10:14:11 -0700972 parent_pat.pats.append(pat)
Richard Henderson0eff2df2019-02-23 11:35:36 -0800973 allpatterns.append(pat)
Richard Henderson568ae7e2017-12-07 12:44:09 -0800974
975 # Validate the masks that we have assembled.
976 if fieldmask & fixedmask:
Richard Hendersonc7cefe62021-04-28 16:27:56 -0700977 error(lineno, 'fieldmask overlaps fixedmask ',
978 f'({whex(fieldmask)} & {whex(fixedmask)})')
Richard Henderson568ae7e2017-12-07 12:44:09 -0800979 if fieldmask & undefmask:
Richard Hendersonc7cefe62021-04-28 16:27:56 -0700980 error(lineno, 'fieldmask overlaps undefmask ',
981 f'({whex(fieldmask)} & {whex(undefmask)})')
Richard Henderson568ae7e2017-12-07 12:44:09 -0800982 if fixedmask & undefmask:
Richard Hendersonc7cefe62021-04-28 16:27:56 -0700983 error(lineno, 'fixedmask overlaps undefmask ',
984 f'({whex(fixedmask)} & {whex(undefmask)})')
Richard Henderson568ae7e2017-12-07 12:44:09 -0800985 if not is_format:
986 allbits = fieldmask | fixedmask | undefmask
987 if allbits != insnmask:
Richard Hendersonc7cefe62021-04-28 16:27:56 -0700988 error(lineno, 'bits left unspecified ',
989 f'({whex(allbits ^ insnmask)})')
Richard Henderson568ae7e2017-12-07 12:44:09 -0800990# end parse_general
991
Richard Henderson0eff2df2019-02-23 11:35:36 -0800992
Richard Henderson08561fc2020-05-17 10:14:11 -0700993def parse_file(f, parent_pat):
Richard Henderson568ae7e2017-12-07 12:44:09 -0800994 """Parse all of the patterns within a file"""
Richard Hendersonacfdd232020-09-03 12:23:34 -0700995 global re_arg_ident
996 global re_fld_ident
997 global re_fmt_ident
998 global re_pat_ident
Richard Henderson568ae7e2017-12-07 12:44:09 -0800999
1000 # Read all of the lines of the file. Concatenate lines
1001 # ending in backslash; discard empty lines and comments.
1002 toks = []
1003 lineno = 0
Richard Henderson0eff2df2019-02-23 11:35:36 -08001004 nesting = 0
Richard Henderson08561fc2020-05-17 10:14:11 -07001005 nesting_pats = []
Richard Henderson0eff2df2019-02-23 11:35:36 -08001006
Richard Henderson568ae7e2017-12-07 12:44:09 -08001007 for line in f:
1008 lineno += 1
1009
Richard Henderson0eff2df2019-02-23 11:35:36 -08001010 # Expand and strip spaces, to find indent.
1011 line = line.rstrip()
1012 line = line.expandtabs()
1013 len1 = len(line)
1014 line = line.lstrip()
1015 len2 = len(line)
1016
Richard Henderson568ae7e2017-12-07 12:44:09 -08001017 # Discard comments
1018 end = line.find('#')
1019 if end >= 0:
1020 line = line[:end]
1021
1022 t = line.split()
1023 if len(toks) != 0:
1024 # Next line after continuation
1025 toks.extend(t)
Richard Henderson568ae7e2017-12-07 12:44:09 -08001026 else:
Richard Henderson0eff2df2019-02-23 11:35:36 -08001027 # Allow completely blank lines.
1028 if len1 == 0:
1029 continue
1030 indent = len1 - len2
1031 # Empty line due to comment.
1032 if len(t) == 0:
1033 # Indentation must be correct, even for comment lines.
1034 if indent != nesting:
1035 error(lineno, 'indentation ', indent, ' != ', nesting)
1036 continue
1037 start_lineno = lineno
Richard Henderson568ae7e2017-12-07 12:44:09 -08001038 toks = t
1039
1040 # Continuation?
1041 if toks[-1] == '\\':
1042 toks.pop()
1043 continue
1044
Richard Henderson568ae7e2017-12-07 12:44:09 -08001045 name = toks[0]
1046 del toks[0]
1047
Richard Henderson0eff2df2019-02-23 11:35:36 -08001048 # End nesting?
Richard Henderson067e8b02020-05-18 08:45:32 -07001049 if name == '}' or name == ']':
Richard Henderson0eff2df2019-02-23 11:35:36 -08001050 if len(toks) != 0:
1051 error(start_lineno, 'extra tokens after close brace')
Richard Henderson08561fc2020-05-17 10:14:11 -07001052
Richard Henderson067e8b02020-05-18 08:45:32 -07001053 # Make sure { } and [ ] nest properly.
1054 if (name == '}') != isinstance(parent_pat, IncMultiPattern):
1055 error(lineno, 'mismatched close brace')
1056
Richard Henderson08561fc2020-05-17 10:14:11 -07001057 try:
1058 parent_pat = nesting_pats.pop()
1059 except:
Richard Henderson067e8b02020-05-18 08:45:32 -07001060 error(lineno, 'extra close brace')
Richard Henderson08561fc2020-05-17 10:14:11 -07001061
Richard Henderson0eff2df2019-02-23 11:35:36 -08001062 nesting -= 2
1063 if indent != nesting:
Richard Henderson08561fc2020-05-17 10:14:11 -07001064 error(lineno, 'indentation ', indent, ' != ', nesting)
1065
Richard Henderson0eff2df2019-02-23 11:35:36 -08001066 toks = []
1067 continue
1068
1069 # Everything else should have current indentation.
1070 if indent != nesting:
1071 error(start_lineno, 'indentation ', indent, ' != ', nesting)
1072
1073 # Start nesting?
Richard Henderson067e8b02020-05-18 08:45:32 -07001074 if name == '{' or name == '[':
Richard Henderson0eff2df2019-02-23 11:35:36 -08001075 if len(toks) != 0:
1076 error(start_lineno, 'extra tokens after open brace')
Richard Henderson08561fc2020-05-17 10:14:11 -07001077
Richard Henderson067e8b02020-05-18 08:45:32 -07001078 if name == '{':
1079 nested_pat = IncMultiPattern(start_lineno)
1080 else:
1081 nested_pat = ExcMultiPattern(start_lineno)
Richard Henderson08561fc2020-05-17 10:14:11 -07001082 parent_pat.pats.append(nested_pat)
1083 nesting_pats.append(parent_pat)
1084 parent_pat = nested_pat
1085
Richard Henderson0eff2df2019-02-23 11:35:36 -08001086 nesting += 2
1087 toks = []
1088 continue
1089
Richard Henderson568ae7e2017-12-07 12:44:09 -08001090 # Determine the type of object needing to be parsed.
Richard Hendersonacfdd232020-09-03 12:23:34 -07001091 if re.fullmatch(re_fld_ident, name):
Richard Henderson0eff2df2019-02-23 11:35:36 -08001092 parse_field(start_lineno, name[1:], toks)
Richard Hendersonacfdd232020-09-03 12:23:34 -07001093 elif re.fullmatch(re_arg_ident, name):
Richard Henderson0eff2df2019-02-23 11:35:36 -08001094 parse_arguments(start_lineno, name[1:], toks)
Richard Hendersonacfdd232020-09-03 12:23:34 -07001095 elif re.fullmatch(re_fmt_ident, name):
Richard Henderson08561fc2020-05-17 10:14:11 -07001096 parse_generic(start_lineno, None, name[1:], toks)
Richard Hendersonacfdd232020-09-03 12:23:34 -07001097 elif re.fullmatch(re_pat_ident, name):
Richard Henderson08561fc2020-05-17 10:14:11 -07001098 parse_generic(start_lineno, parent_pat, name, toks)
Richard Hendersonacfdd232020-09-03 12:23:34 -07001099 else:
1100 error(lineno, 'invalid token "{0}"'.format(name))
Richard Henderson568ae7e2017-12-07 12:44:09 -08001101 toks = []
Richard Henderson067e8b02020-05-18 08:45:32 -07001102
1103 if nesting != 0:
1104 error(lineno, 'missing close brace')
Richard Henderson568ae7e2017-12-07 12:44:09 -08001105# end parse_file
1106
1107
Richard Henderson70e07112019-01-31 11:34:11 -08001108class SizeTree:
1109 """Class representing a node in a size decode tree"""
1110
1111 def __init__(self, m, w):
1112 self.mask = m
1113 self.subs = []
1114 self.base = None
1115 self.width = w
1116
1117 def str1(self, i):
1118 ind = str_indent(i)
Richard Hendersonc7cefe62021-04-28 16:27:56 -07001119 r = ind + whex(self.mask) + ' [\n'
Richard Henderson70e07112019-01-31 11:34:11 -08001120 for (b, s) in self.subs:
Richard Hendersonc7cefe62021-04-28 16:27:56 -07001121 r += ind + f' {whex(b)}:\n'
Richard Henderson70e07112019-01-31 11:34:11 -08001122 r += s.str1(i + 4) + '\n'
1123 r += ind + ']'
1124 return r
1125
1126 def __str__(self):
1127 return self.str1(0)
1128
1129 def output_code(self, i, extracted, outerbits, outermask):
1130 ind = str_indent(i)
1131
1132 # If we need to load more bytes to test, do so now.
1133 if extracted < self.width:
1134 output(ind, 'insn = ', decode_function,
1135 '_load_bytes(ctx, insn, {0}, {1});\n'
Philippe Mathieu-Daudéb4123782020-03-30 14:13:45 +02001136 .format(extracted // 8, self.width // 8));
Richard Henderson70e07112019-01-31 11:34:11 -08001137 extracted = self.width
1138
1139 # Attempt to aid the compiler in producing compact switch statements.
1140 # If the bits in the mask are contiguous, extract them.
1141 sh = is_contiguous(self.mask)
1142 if sh > 0:
1143 # Propagate SH down into the local functions.
1144 def str_switch(b, sh=sh):
Richard Hendersonc7cefe62021-04-28 16:27:56 -07001145 return f'(insn >> {sh}) & {b >> sh:#x}'
Richard Henderson70e07112019-01-31 11:34:11 -08001146
1147 def str_case(b, sh=sh):
Richard Hendersonc7cefe62021-04-28 16:27:56 -07001148 return hex(b >> sh)
Richard Henderson70e07112019-01-31 11:34:11 -08001149 else:
1150 def str_switch(b):
Richard Hendersonc7cefe62021-04-28 16:27:56 -07001151 return f'insn & {whexC(b)}'
Richard Henderson70e07112019-01-31 11:34:11 -08001152
1153 def str_case(b):
Richard Hendersonc7cefe62021-04-28 16:27:56 -07001154 return whexC(b)
Richard Henderson70e07112019-01-31 11:34:11 -08001155
1156 output(ind, 'switch (', str_switch(self.mask), ') {\n')
1157 for b, s in sorted(self.subs):
1158 innermask = outermask | self.mask
1159 innerbits = outerbits | b
1160 output(ind, 'case ', str_case(b), ':\n')
1161 output(ind, ' /* ',
1162 str_match_bits(innerbits, innermask), ' */\n')
1163 s.output_code(i + 4, extracted, innerbits, innermask)
1164 output(ind, '}\n')
1165 output(ind, 'return insn;\n')
1166# end SizeTree
1167
1168class SizeLeaf:
1169 """Class representing a leaf node in a size decode tree"""
1170
1171 def __init__(self, m, w):
1172 self.mask = m
1173 self.width = w
1174
1175 def str1(self, i):
Richard Hendersonc7cefe62021-04-28 16:27:56 -07001176 return str_indent(i) + whex(self.mask)
Richard Henderson70e07112019-01-31 11:34:11 -08001177
1178 def __str__(self):
1179 return self.str1(0)
1180
1181 def output_code(self, i, extracted, outerbits, outermask):
1182 global decode_function
1183 ind = str_indent(i)
1184
1185 # If we need to load more bytes, do so now.
1186 if extracted < self.width:
1187 output(ind, 'insn = ', decode_function,
1188 '_load_bytes(ctx, insn, {0}, {1});\n'
Philippe Mathieu-Daudéb4123782020-03-30 14:13:45 +02001189 .format(extracted // 8, self.width // 8));
Richard Henderson70e07112019-01-31 11:34:11 -08001190 extracted = self.width
1191 output(ind, 'return insn;\n')
1192# end SizeLeaf
1193
1194
1195def build_size_tree(pats, width, outerbits, outermask):
1196 global insnwidth
1197
1198 # Collect the mask of bits that are fixed in this width
1199 innermask = 0xff << (insnwidth - width)
1200 innermask &= ~outermask
1201 minwidth = None
1202 onewidth = True
1203 for i in pats:
1204 innermask &= i.fixedmask
1205 if minwidth is None:
1206 minwidth = i.width
1207 elif minwidth != i.width:
1208 onewidth = False;
1209 if minwidth < i.width:
1210 minwidth = i.width
1211
1212 if onewidth:
1213 return SizeLeaf(innermask, minwidth)
1214
1215 if innermask == 0:
1216 if width < minwidth:
1217 return build_size_tree(pats, width + 8, outerbits, outermask)
1218
1219 pnames = []
1220 for p in pats:
1221 pnames.append(p.name + ':' + p.file + ':' + str(p.lineno))
1222 error_with_file(pats[0].file, pats[0].lineno,
1223 'overlapping patterns size {0}:'.format(width), pnames)
1224
1225 bins = {}
1226 for i in pats:
1227 fb = i.fixedbits & innermask
1228 if fb in bins:
1229 bins[fb].append(i)
1230 else:
1231 bins[fb] = [i]
1232
1233 fullmask = outermask | innermask
1234 lens = sorted(bins.keys())
1235 if len(lens) == 1:
1236 b = lens[0]
1237 return build_size_tree(bins[b], width + 8, b | outerbits, fullmask)
1238
1239 r = SizeTree(innermask, width)
1240 for b, l in bins.items():
1241 s = build_size_tree(l, width, b | outerbits, fullmask)
1242 r.subs.append((b, s))
1243 return r
1244# end build_size_tree
1245
1246
Richard Henderson70e07112019-01-31 11:34:11 -08001247def prop_size(tree):
1248 """Propagate minimum widths up the decode size tree"""
1249
1250 if isinstance(tree, SizeTree):
1251 min = None
1252 for (b, s) in tree.subs:
1253 width = prop_size(s)
1254 if min is None or min > width:
1255 min = width
1256 assert min >= tree.width
1257 tree.width = min
1258 else:
1259 min = tree.width
1260 return min
1261# end prop_size
1262
1263
Richard Henderson568ae7e2017-12-07 12:44:09 -08001264def main():
1265 global arguments
1266 global formats
Richard Henderson0eff2df2019-02-23 11:35:36 -08001267 global allpatterns
Richard Henderson568ae7e2017-12-07 12:44:09 -08001268 global translate_scope
1269 global translate_prefix
1270 global output_fd
1271 global output_file
1272 global input_file
1273 global insnwidth
1274 global insntype
Bastian Koppelmann83d7c402018-03-19 12:58:46 +01001275 global insnmask
Richard Hendersonabd04f92018-10-23 10:26:25 +01001276 global decode_function
Richard Henderson17560e92019-01-30 18:01:29 -08001277 global variablewidth
Richard Hendersonc6920792019-08-09 08:12:50 -07001278 global anyextern
Richard Henderson568ae7e2017-12-07 12:44:09 -08001279
Richard Henderson568ae7e2017-12-07 12:44:09 -08001280 decode_scope = 'static '
1281
Richard Hendersoncd3e7fc2019-02-23 17:44:31 -08001282 long_opts = ['decode=', 'translate=', 'output=', 'insnwidth=',
Richard Henderson17560e92019-01-30 18:01:29 -08001283 'static-decode=', 'varinsnwidth=']
Richard Henderson568ae7e2017-12-07 12:44:09 -08001284 try:
Paolo Bonziniabff1ab2020-08-07 12:10:23 +02001285 (opts, args) = getopt.gnu_getopt(sys.argv[1:], 'o:vw:', long_opts)
Richard Henderson568ae7e2017-12-07 12:44:09 -08001286 except getopt.GetoptError as err:
1287 error(0, err)
1288 for o, a in opts:
1289 if o in ('-o', '--output'):
1290 output_file = a
1291 elif o == '--decode':
1292 decode_function = a
1293 decode_scope = ''
Richard Hendersoncd3e7fc2019-02-23 17:44:31 -08001294 elif o == '--static-decode':
1295 decode_function = a
Richard Henderson568ae7e2017-12-07 12:44:09 -08001296 elif o == '--translate':
1297 translate_prefix = a
1298 translate_scope = ''
Richard Henderson17560e92019-01-30 18:01:29 -08001299 elif o in ('-w', '--insnwidth', '--varinsnwidth'):
1300 if o == '--varinsnwidth':
1301 variablewidth = True
Richard Henderson568ae7e2017-12-07 12:44:09 -08001302 insnwidth = int(a)
1303 if insnwidth == 16:
1304 insntype = 'uint16_t'
1305 insnmask = 0xffff
1306 elif insnwidth != 32:
1307 error(0, 'cannot handle insns of width', insnwidth)
1308 else:
1309 assert False, 'unhandled option'
1310
1311 if len(args) < 1:
1312 error(0, 'missing input file')
Richard Henderson08561fc2020-05-17 10:14:11 -07001313
1314 toppat = ExcMultiPattern(0)
1315
Richard Henderson6699ae62018-10-26 14:59:43 +01001316 for filename in args:
1317 input_file = filename
Philippe Mathieu-Daudé4caceca2021-01-10 01:02:40 +01001318 f = open(filename, 'rt', encoding='utf-8')
Richard Henderson08561fc2020-05-17 10:14:11 -07001319 parse_file(f, toppat)
Richard Henderson6699ae62018-10-26 14:59:43 +01001320 f.close()
Richard Henderson568ae7e2017-12-07 12:44:09 -08001321
Richard Henderson08561fc2020-05-17 10:14:11 -07001322 # We do not want to compute masks for toppat, because those masks
1323 # are used as a starting point for build_tree. For toppat, we must
1324 # insist that decode begins from naught.
1325 for i in toppat.pats:
1326 i.prop_masks()
Richard Henderson70e07112019-01-31 11:34:11 -08001327
Richard Henderson08561fc2020-05-17 10:14:11 -07001328 toppat.build_tree()
1329 toppat.prop_format()
1330
1331 if variablewidth:
1332 for i in toppat.pats:
1333 i.prop_width()
1334 stree = build_size_tree(toppat.pats, 8, 0, 0)
1335 prop_size(stree)
Richard Henderson568ae7e2017-12-07 12:44:09 -08001336
1337 if output_file:
Philippe Mathieu-Daudé4caceca2021-01-10 01:02:40 +01001338 output_fd = open(output_file, 'wt', encoding='utf-8')
Richard Henderson568ae7e2017-12-07 12:44:09 -08001339 else:
Philippe Mathieu-Daudé4caceca2021-01-10 01:02:40 +01001340 output_fd = io.TextIOWrapper(sys.stdout.buffer,
1341 encoding=sys.stdout.encoding,
1342 errors="ignore")
Richard Henderson568ae7e2017-12-07 12:44:09 -08001343
1344 output_autogen()
1345 for n in sorted(arguments.keys()):
1346 f = arguments[n]
1347 f.output_def()
1348
1349 # A single translate function can be invoked for different patterns.
1350 # Make sure that the argument sets are the same, and declare the
1351 # function only once.
Richard Hendersonc6920792019-08-09 08:12:50 -07001352 #
1353 # If we're sharing formats, we're likely also sharing trans_* functions,
1354 # but we can't tell which ones. Prevent issues from the compiler by
1355 # suppressing redundant declaration warnings.
1356 if anyextern:
Thomas Huth7aa12aa2020-07-08 20:19:44 +02001357 output("#pragma GCC diagnostic push\n",
1358 "#pragma GCC diagnostic ignored \"-Wredundant-decls\"\n",
1359 "#ifdef __clang__\n"
Richard Hendersonc6920792019-08-09 08:12:50 -07001360 "# pragma GCC diagnostic ignored \"-Wtypedef-redefinition\"\n",
Richard Hendersonc6920792019-08-09 08:12:50 -07001361 "#endif\n\n")
1362
Richard Henderson568ae7e2017-12-07 12:44:09 -08001363 out_pats = {}
Richard Henderson0eff2df2019-02-23 11:35:36 -08001364 for i in allpatterns:
Richard Henderson568ae7e2017-12-07 12:44:09 -08001365 if i.name in out_pats:
1366 p = out_pats[i.name]
1367 if i.base.base != p.base.base:
1368 error(0, i.name, ' has conflicting argument sets')
1369 else:
1370 i.output_decl()
1371 out_pats[i.name] = i
1372 output('\n')
1373
Richard Hendersonc6920792019-08-09 08:12:50 -07001374 if anyextern:
Thomas Huth7aa12aa2020-07-08 20:19:44 +02001375 output("#pragma GCC diagnostic pop\n\n")
Richard Hendersonc6920792019-08-09 08:12:50 -07001376
Richard Henderson568ae7e2017-12-07 12:44:09 -08001377 for n in sorted(formats.keys()):
1378 f = formats[n]
1379 f.output_extract()
1380
1381 output(decode_scope, 'bool ', decode_function,
1382 '(DisasContext *ctx, ', insntype, ' insn)\n{\n')
1383
1384 i4 = str_indent(4)
Richard Henderson568ae7e2017-12-07 12:44:09 -08001385
Richard Henderson82bfac12019-02-27 21:37:32 -08001386 if len(allpatterns) != 0:
1387 output(i4, 'union {\n')
1388 for n in sorted(arguments.keys()):
1389 f = arguments[n]
1390 output(i4, i4, f.struct_name(), ' f_', f.name, ';\n')
1391 output(i4, '} u;\n\n')
Richard Henderson08561fc2020-05-17 10:14:11 -07001392 toppat.output_code(4, False, 0, 0)
Richard Henderson82bfac12019-02-27 21:37:32 -08001393
Richard Hendersoneb6b87f2019-02-23 08:57:46 -08001394 output(i4, 'return false;\n')
Richard Henderson568ae7e2017-12-07 12:44:09 -08001395 output('}\n')
1396
Richard Henderson70e07112019-01-31 11:34:11 -08001397 if variablewidth:
1398 output('\n', decode_scope, insntype, ' ', decode_function,
1399 '_load(DisasContext *ctx)\n{\n',
1400 ' ', insntype, ' insn = 0;\n\n')
1401 stree.output_code(4, 0, 0, 0)
1402 output('}\n')
1403
Richard Henderson568ae7e2017-12-07 12:44:09 -08001404 if output_file:
1405 output_fd.close()
1406# end main
1407
1408
1409if __name__ == '__main__':
1410 main()