blob: e4ef0a03cc4c7ee9f1a5f372f1f83f112d8ae4a4 [file] [log] [blame]
Philippe Mathieu-Daudé3d004a32020-01-30 17:32:25 +01001#!/usr/bin/env python3
Richard Henderson568ae7e2017-12-07 12:44:09 -08002# Copyright (c) 2018 Linaro Limited
3#
4# This library is free software; you can redistribute it and/or
5# modify it under the terms of the GNU Lesser General Public
6# License as published by the Free Software Foundation; either
Chetan Pantd6ea4232020-10-23 12:33:53 +00007# version 2.1 of the License, or (at your option) any later version.
Richard Henderson568ae7e2017-12-07 12:44:09 -08008#
9# This library is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12# Lesser General Public License for more details.
13#
14# You should have received a copy of the GNU Lesser General Public
15# License along with this library; if not, see <http://www.gnu.org/licenses/>.
16#
17
18#
19# Generate a decoding tree from a specification file.
Richard Henderson3fdbf5d2019-02-23 13:00:10 -080020# See the syntax and semantics in docs/devel/decodetree.rst.
Richard Henderson568ae7e2017-12-07 12:44:09 -080021#
22
Philippe Mathieu-Daudé4caceca2021-01-10 01:02:40 +010023import io
Richard Henderson568ae7e2017-12-07 12:44:09 -080024import os
25import re
26import sys
27import getopt
Richard Henderson568ae7e2017-12-07 12:44:09 -080028
29insnwidth = 32
Luis Fernando Fujita Pires60c425f2021-04-07 22:18:49 +000030bitop_width = 32
Richard Henderson568ae7e2017-12-07 12:44:09 -080031insnmask = 0xffffffff
Richard Henderson17560e92019-01-30 18:01:29 -080032variablewidth = False
Richard Henderson568ae7e2017-12-07 12:44:09 -080033fields = {}
34arguments = {}
35formats = {}
Richard Henderson0eff2df2019-02-23 11:35:36 -080036allpatterns = []
Richard Hendersonc6920792019-08-09 08:12:50 -070037anyextern = False
Richard Henderson9b5acc52023-05-25 18:04:05 -070038testforerror = False
Richard Henderson568ae7e2017-12-07 12:44:09 -080039
40translate_prefix = 'trans'
41translate_scope = 'static '
42input_file = ''
43output_file = None
44output_fd = None
45insntype = 'uint32_t'
Richard Hendersonabd04f92018-10-23 10:26:25 +010046decode_function = 'decode'
Richard Henderson568ae7e2017-12-07 12:44:09 -080047
Richard Hendersonacfdd232020-09-03 12:23:34 -070048# An identifier for C.
49re_C_ident = '[a-zA-Z][a-zA-Z0-9_]*'
Richard Henderson568ae7e2017-12-07 12:44:09 -080050
Richard Hendersonacfdd232020-09-03 12:23:34 -070051# Identifiers for Arguments, Fields, Formats and Patterns.
52re_arg_ident = '&[a-zA-Z0-9_]*'
53re_fld_ident = '%[a-zA-Z0-9_]*'
54re_fmt_ident = '@[a-zA-Z0-9_]*'
55re_pat_ident = '[a-zA-Z0-9_]*'
Richard Henderson568ae7e2017-12-07 12:44:09 -080056
Richard Henderson6699ae62018-10-26 14:59:43 +010057def error_with_file(file, lineno, *args):
Richard Henderson568ae7e2017-12-07 12:44:09 -080058 """Print an error message from file:line and args and exit."""
59 global output_file
60 global output_fd
61
Richard Henderson2fd51b12020-05-15 14:48:54 -070062 prefix = ''
63 if file:
Richard Henderson9f6e2b42021-04-28 16:37:02 -070064 prefix += f'{file}:'
Richard Henderson568ae7e2017-12-07 12:44:09 -080065 if lineno:
Richard Henderson9f6e2b42021-04-28 16:37:02 -070066 prefix += f'{lineno}:'
Richard Henderson2fd51b12020-05-15 14:48:54 -070067 if prefix:
68 prefix += ' '
69 print(prefix, end='error: ', file=sys.stderr)
70 print(*args, file=sys.stderr)
71
Richard Henderson568ae7e2017-12-07 12:44:09 -080072 if output_file and output_fd:
73 output_fd.close()
74 os.remove(output_file)
Richard Henderson9b5acc52023-05-25 18:04:05 -070075 exit(0 if testforerror else 1)
Richard Henderson2fd51b12020-05-15 14:48:54 -070076# end error_with_file
77
Richard Henderson568ae7e2017-12-07 12:44:09 -080078
Richard Henderson6699ae62018-10-26 14:59:43 +010079def error(lineno, *args):
Richard Henderson2fd51b12020-05-15 14:48:54 -070080 error_with_file(input_file, lineno, *args)
81# end error
82
Richard Henderson568ae7e2017-12-07 12:44:09 -080083
84def output(*args):
85 global output_fd
86 for a in args:
87 output_fd.write(a)
88
89
Richard Henderson568ae7e2017-12-07 12:44:09 -080090def output_autogen():
91 output('/* This file is autogenerated by scripts/decodetree.py. */\n\n')
92
93
94def str_indent(c):
95 """Return a string with C spaces"""
96 return ' ' * c
97
98
99def str_fields(fields):
zhaolichang65fdb3c2020-09-17 15:50:23 +0800100 """Return a string uniquely identifying FIELDS"""
Richard Henderson568ae7e2017-12-07 12:44:09 -0800101 r = ''
102 for n in sorted(fields.keys()):
103 r += '_' + n
104 return r[1:]
105
106
Richard Hendersonc7cefe62021-04-28 16:27:56 -0700107def whex(val):
108 """Return a hex string for val padded for insnwidth"""
109 global insnwidth
110 return f'0x{val:0{insnwidth // 4}x}'
111
112
113def whexC(val):
114 """Return a hex string for val padded for insnwidth,
115 and with the proper suffix for a C constant."""
116 suffix = ''
Luis Fernando Fujita Pires60c425f2021-04-07 22:18:49 +0000117 if val >= 0x100000000:
118 suffix = 'ull'
119 elif val >= 0x80000000:
Richard Hendersonc7cefe62021-04-28 16:27:56 -0700120 suffix = 'u'
121 return whex(val) + suffix
122
123
Richard Henderson568ae7e2017-12-07 12:44:09 -0800124def str_match_bits(bits, mask):
125 """Return a string pretty-printing BITS/MASK"""
126 global insnwidth
127
128 i = 1 << (insnwidth - 1)
129 space = 0x01010100
130 r = ''
131 while i != 0:
132 if i & mask:
133 if i & bits:
134 r += '1'
135 else:
136 r += '0'
137 else:
138 r += '.'
139 if i & space:
140 r += ' '
141 i >>= 1
142 return r
143
144
145def is_pow2(x):
146 """Return true iff X is equal to a power of 2."""
147 return (x & (x - 1)) == 0
148
149
150def ctz(x):
151 """Return the number of times 2 factors into X."""
Richard Hendersonb44b3442020-05-16 13:15:02 -0700152 assert x != 0
Richard Henderson568ae7e2017-12-07 12:44:09 -0800153 r = 0
154 while ((x >> r) & 1) == 0:
155 r += 1
156 return r
157
158
159def is_contiguous(bits):
Richard Hendersonb44b3442020-05-16 13:15:02 -0700160 if bits == 0:
161 return -1
Richard Henderson568ae7e2017-12-07 12:44:09 -0800162 shift = ctz(bits)
163 if is_pow2((bits >> shift) + 1):
164 return shift
165 else:
166 return -1
167
168
Richard Hendersonaf93cca2021-04-29 10:03:59 -0700169def eq_fields_for_args(flds_a, arg):
170 if len(flds_a) != len(arg.fields):
Richard Henderson568ae7e2017-12-07 12:44:09 -0800171 return False
Richard Hendersonaf93cca2021-04-29 10:03:59 -0700172 # Only allow inference on default types
173 for t in arg.types:
174 if t != 'int':
175 return False
Richard Henderson568ae7e2017-12-07 12:44:09 -0800176 for k, a in flds_a.items():
Richard Hendersonaf93cca2021-04-29 10:03:59 -0700177 if k not in arg.fields:
Richard Henderson568ae7e2017-12-07 12:44:09 -0800178 return False
179 return True
180
181
182def eq_fields_for_fmts(flds_a, flds_b):
183 if len(flds_a) != len(flds_b):
184 return False
185 for k, a in flds_a.items():
186 if k not in flds_b:
187 return False
188 b = flds_b[k]
189 if a.__class__ != b.__class__ or a != b:
190 return False
191 return True
192
193
194class Field:
195 """Class representing a simple instruction field"""
196 def __init__(self, sign, pos, len):
197 self.sign = sign
198 self.pos = pos
199 self.len = len
200 self.mask = ((1 << len) - 1) << pos
201
202 def __str__(self):
203 if self.sign:
204 s = 's'
205 else:
206 s = ''
Cleber Rosacbcdf1a2018-10-04 12:18:50 -0400207 return str(self.pos) + ':' + s + str(self.len)
Richard Henderson568ae7e2017-12-07 12:44:09 -0800208
209 def str_extract(self):
Luis Fernando Fujita Pires60c425f2021-04-07 22:18:49 +0000210 global bitop_width
211 s = 's' if self.sign else ''
212 return f'{s}extract{bitop_width}(insn, {self.pos}, {self.len})'
Richard Henderson568ae7e2017-12-07 12:44:09 -0800213
214 def __eq__(self, other):
Richard Henderson2c7d4422019-06-11 16:39:41 +0100215 return self.sign == other.sign and self.mask == other.mask
Richard Henderson568ae7e2017-12-07 12:44:09 -0800216
217 def __ne__(self, other):
218 return not self.__eq__(other)
219# end Field
220
221
222class MultiField:
223 """Class representing a compound instruction field"""
224 def __init__(self, subs, mask):
225 self.subs = subs
226 self.sign = subs[0].sign
227 self.mask = mask
228
229 def __str__(self):
230 return str(self.subs)
231
232 def str_extract(self):
Luis Fernando Fujita Pires60c425f2021-04-07 22:18:49 +0000233 global bitop_width
Richard Henderson568ae7e2017-12-07 12:44:09 -0800234 ret = '0'
235 pos = 0
236 for f in reversed(self.subs):
Richard Henderson9f6e2b42021-04-28 16:37:02 -0700237 ext = f.str_extract()
Richard Henderson568ae7e2017-12-07 12:44:09 -0800238 if pos == 0:
Richard Henderson9f6e2b42021-04-28 16:37:02 -0700239 ret = ext
Richard Henderson568ae7e2017-12-07 12:44:09 -0800240 else:
Luis Fernando Fujita Pires60c425f2021-04-07 22:18:49 +0000241 ret = f'deposit{bitop_width}({ret}, {pos}, {bitop_width - pos}, {ext})'
Richard Henderson568ae7e2017-12-07 12:44:09 -0800242 pos += f.len
243 return ret
244
245 def __ne__(self, other):
246 if len(self.subs) != len(other.subs):
247 return True
248 for a, b in zip(self.subs, other.subs):
249 if a.__class__ != b.__class__ or a != b:
250 return True
251 return False
252
253 def __eq__(self, other):
254 return not self.__ne__(other)
255# end MultiField
256
257
258class ConstField:
259 """Class representing an argument field with constant value"""
260 def __init__(self, value):
261 self.value = value
262 self.mask = 0
263 self.sign = value < 0
264
265 def __str__(self):
266 return str(self.value)
267
268 def str_extract(self):
269 return str(self.value)
270
271 def __cmp__(self, other):
272 return self.value - other.value
273# end ConstField
274
275
276class FunctionField:
Richard Henderson94597b62019-07-22 17:02:56 -0700277 """Class representing a field passed through a function"""
Richard Henderson568ae7e2017-12-07 12:44:09 -0800278 def __init__(self, func, base):
279 self.mask = base.mask
280 self.sign = base.sign
281 self.base = base
282 self.func = func
283
284 def __str__(self):
285 return self.func + '(' + str(self.base) + ')'
286
287 def str_extract(self):
Richard Henderson451e4ff2019-03-20 19:21:31 -0700288 return self.func + '(ctx, ' + self.base.str_extract() + ')'
Richard Henderson568ae7e2017-12-07 12:44:09 -0800289
290 def __eq__(self, other):
291 return self.func == other.func and self.base == other.base
292
293 def __ne__(self, other):
294 return not self.__eq__(other)
295# end FunctionField
296
297
Richard Henderson94597b62019-07-22 17:02:56 -0700298class ParameterField:
299 """Class representing a pseudo-field read from a function"""
300 def __init__(self, func):
301 self.mask = 0
302 self.sign = 0
303 self.func = func
304
305 def __str__(self):
306 return self.func
307
308 def str_extract(self):
309 return self.func + '(ctx)'
310
311 def __eq__(self, other):
312 return self.func == other.func
313
314 def __ne__(self, other):
315 return not self.__eq__(other)
316# end ParameterField
317
318
Richard Henderson568ae7e2017-12-07 12:44:09 -0800319class Arguments:
320 """Class representing the extracted fields of a format"""
Richard Hendersonaf93cca2021-04-29 10:03:59 -0700321 def __init__(self, nm, flds, types, extern):
Richard Henderson568ae7e2017-12-07 12:44:09 -0800322 self.name = nm
Richard Hendersonabd04f92018-10-23 10:26:25 +0100323 self.extern = extern
Richard Hendersonaf93cca2021-04-29 10:03:59 -0700324 self.fields = flds
325 self.types = types
Richard Henderson568ae7e2017-12-07 12:44:09 -0800326
327 def __str__(self):
328 return self.name + ' ' + str(self.fields)
329
330 def struct_name(self):
331 return 'arg_' + self.name
332
333 def output_def(self):
Richard Hendersonabd04f92018-10-23 10:26:25 +0100334 if not self.extern:
335 output('typedef struct {\n')
Richard Hendersonaf93cca2021-04-29 10:03:59 -0700336 for (n, t) in zip(self.fields, self.types):
337 output(f' {t} {n};\n')
Richard Hendersonabd04f92018-10-23 10:26:25 +0100338 output('} ', self.struct_name(), ';\n\n')
Richard Henderson568ae7e2017-12-07 12:44:09 -0800339# end Arguments
340
341
342class General:
343 """Common code between instruction formats and instruction patterns"""
Richard Henderson17560e92019-01-30 18:01:29 -0800344 def __init__(self, name, lineno, base, fixb, fixm, udfm, fldm, flds, w):
Richard Henderson568ae7e2017-12-07 12:44:09 -0800345 self.name = name
Richard Henderson6699ae62018-10-26 14:59:43 +0100346 self.file = input_file
Richard Henderson568ae7e2017-12-07 12:44:09 -0800347 self.lineno = lineno
348 self.base = base
349 self.fixedbits = fixb
350 self.fixedmask = fixm
351 self.undefmask = udfm
352 self.fieldmask = fldm
353 self.fields = flds
Richard Henderson17560e92019-01-30 18:01:29 -0800354 self.width = w
Richard Henderson568ae7e2017-12-07 12:44:09 -0800355
356 def __str__(self):
Richard Henderson0eff2df2019-02-23 11:35:36 -0800357 return self.name + ' ' + str_match_bits(self.fixedbits, self.fixedmask)
Richard Henderson568ae7e2017-12-07 12:44:09 -0800358
359 def str1(self, i):
360 return str_indent(i) + self.__str__()
361# end General
362
363
364class Format(General):
365 """Class representing an instruction format"""
366
367 def extract_name(self):
Richard Henderson71ecf792019-02-28 14:45:50 -0800368 global decode_function
369 return decode_function + '_extract_' + self.name
Richard Henderson568ae7e2017-12-07 12:44:09 -0800370
371 def output_extract(self):
Richard Henderson451e4ff2019-03-20 19:21:31 -0700372 output('static void ', self.extract_name(), '(DisasContext *ctx, ',
Richard Henderson568ae7e2017-12-07 12:44:09 -0800373 self.base.struct_name(), ' *a, ', insntype, ' insn)\n{\n')
374 for n, f in self.fields.items():
375 output(' a->', n, ' = ', f.str_extract(), ';\n')
376 output('}\n\n')
377# end Format
378
379
380class Pattern(General):
381 """Class representing an instruction pattern"""
382
383 def output_decl(self):
384 global translate_scope
385 global translate_prefix
386 output('typedef ', self.base.base.struct_name(),
387 ' arg_', self.name, ';\n')
Richard Henderson76805592018-03-02 10:45:35 +0000388 output(translate_scope, 'bool ', translate_prefix, '_', self.name,
Richard Henderson3a7be552018-10-23 11:05:27 +0100389 '(DisasContext *ctx, arg_', self.name, ' *a);\n')
Richard Henderson568ae7e2017-12-07 12:44:09 -0800390
391 def output_code(self, i, extracted, outerbits, outermask):
392 global translate_prefix
393 ind = str_indent(i)
394 arg = self.base.base.name
Richard Henderson6699ae62018-10-26 14:59:43 +0100395 output(ind, '/* ', self.file, ':', str(self.lineno), ' */\n')
Richard Henderson568ae7e2017-12-07 12:44:09 -0800396 if not extracted:
Richard Henderson451e4ff2019-03-20 19:21:31 -0700397 output(ind, self.base.extract_name(),
398 '(ctx, &u.f_', arg, ', insn);\n')
Richard Henderson568ae7e2017-12-07 12:44:09 -0800399 for n, f in self.fields.items():
400 output(ind, 'u.f_', arg, '.', n, ' = ', f.str_extract(), ';\n')
Richard Hendersoneb6b87f2019-02-23 08:57:46 -0800401 output(ind, 'if (', translate_prefix, '_', self.name,
402 '(ctx, &u.f_', arg, ')) return true;\n')
Richard Henderson08561fc2020-05-17 10:14:11 -0700403
404 # Normal patterns do not have children.
405 def build_tree(self):
406 return
407 def prop_masks(self):
408 return
409 def prop_format(self):
410 return
411 def prop_width(self):
412 return
413
Richard Henderson568ae7e2017-12-07 12:44:09 -0800414# end Pattern
415
416
Richard Hendersondf630442020-05-16 11:19:45 -0700417class MultiPattern(General):
418 """Class representing a set of instruction patterns"""
419
Richard Henderson08561fc2020-05-17 10:14:11 -0700420 def __init__(self, lineno):
Richard Hendersondf630442020-05-16 11:19:45 -0700421 self.file = input_file
422 self.lineno = lineno
Richard Henderson08561fc2020-05-17 10:14:11 -0700423 self.pats = []
Richard Hendersondf630442020-05-16 11:19:45 -0700424 self.base = None
425 self.fixedbits = 0
426 self.fixedmask = 0
427 self.undefmask = 0
428 self.width = None
429
430 def __str__(self):
431 r = 'group'
432 if self.fixedbits is not None:
433 r += ' ' + str_match_bits(self.fixedbits, self.fixedmask)
434 return r
435
436 def output_decl(self):
437 for p in self.pats:
438 p.output_decl()
Richard Henderson08561fc2020-05-17 10:14:11 -0700439
440 def prop_masks(self):
441 global insnmask
442
443 fixedmask = insnmask
444 undefmask = insnmask
445
446 # Collect fixedmask/undefmask for all of the children.
447 for p in self.pats:
448 p.prop_masks()
449 fixedmask &= p.fixedmask
450 undefmask &= p.undefmask
451
452 # Widen fixedmask until all fixedbits match
453 repeat = True
454 fixedbits = 0
455 while repeat and fixedmask != 0:
456 fixedbits = None
457 for p in self.pats:
458 thisbits = p.fixedbits & fixedmask
459 if fixedbits is None:
460 fixedbits = thisbits
461 elif fixedbits != thisbits:
462 fixedmask &= ~(fixedbits ^ thisbits)
463 break
464 else:
465 repeat = False
466
467 self.fixedbits = fixedbits
468 self.fixedmask = fixedmask
469 self.undefmask = undefmask
470
471 def build_tree(self):
472 for p in self.pats:
473 p.build_tree()
474
475 def prop_format(self):
476 for p in self.pats:
Richard Henderson2fd2eb52023-05-25 18:45:43 -0700477 p.prop_format()
Richard Henderson08561fc2020-05-17 10:14:11 -0700478
479 def prop_width(self):
480 width = None
481 for p in self.pats:
482 p.prop_width()
483 if width is None:
484 width = p.width
485 elif width != p.width:
486 error_with_file(self.file, self.lineno,
487 'width mismatch in patterns within braces')
488 self.width = width
489
Richard Hendersondf630442020-05-16 11:19:45 -0700490# end MultiPattern
491
492
493class IncMultiPattern(MultiPattern):
Richard Henderson0eff2df2019-02-23 11:35:36 -0800494 """Class representing an overlapping set of instruction patterns"""
495
Richard Henderson0eff2df2019-02-23 11:35:36 -0800496 def output_code(self, i, extracted, outerbits, outermask):
497 global translate_prefix
498 ind = str_indent(i)
499 for p in self.pats:
500 if outermask != p.fixedmask:
501 innermask = p.fixedmask & ~outermask
502 innerbits = p.fixedbits & ~outermask
Richard Hendersonc7cefe62021-04-28 16:27:56 -0700503 output(ind, f'if ((insn & {whexC(innermask)}) == {whexC(innerbits)}) {{\n')
504 output(ind, f' /* {str_match_bits(p.fixedbits, p.fixedmask)} */\n')
Richard Henderson0eff2df2019-02-23 11:35:36 -0800505 p.output_code(i + 4, extracted, p.fixedbits, p.fixedmask)
506 output(ind, '}\n')
507 else:
508 p.output_code(i, extracted, p.fixedbits, p.fixedmask)
Richard Hendersonf2604472023-05-25 18:50:58 -0700509
510 def build_tree(self):
511 if not self.pats:
512 error_with_file(self.file, self.lineno, 'empty pattern group')
513 super().build_tree()
514
Richard Henderson040145c2020-05-16 10:50:43 -0700515#end IncMultiPattern
Richard Henderson0eff2df2019-02-23 11:35:36 -0800516
517
Richard Henderson08561fc2020-05-17 10:14:11 -0700518class Tree:
519 """Class representing a node in a decode tree"""
520
521 def __init__(self, fm, tm):
522 self.fixedmask = fm
523 self.thismask = tm
524 self.subs = []
525 self.base = None
526
527 def str1(self, i):
528 ind = str_indent(i)
Richard Hendersonc7cefe62021-04-28 16:27:56 -0700529 r = ind + whex(self.fixedmask)
Richard Henderson08561fc2020-05-17 10:14:11 -0700530 if self.format:
531 r += ' ' + self.format.name
532 r += ' [\n'
533 for (b, s) in self.subs:
Richard Hendersonc7cefe62021-04-28 16:27:56 -0700534 r += ind + f' {whex(b)}:\n'
Richard Henderson08561fc2020-05-17 10:14:11 -0700535 r += s.str1(i + 4) + '\n'
536 r += ind + ']'
537 return r
538
539 def __str__(self):
540 return self.str1(0)
541
542 def output_code(self, i, extracted, outerbits, outermask):
543 ind = str_indent(i)
544
545 # If we identified all nodes below have the same format,
546 # extract the fields now.
547 if not extracted and self.base:
548 output(ind, self.base.extract_name(),
549 '(ctx, &u.f_', self.base.base.name, ', insn);\n')
550 extracted = True
551
552 # Attempt to aid the compiler in producing compact switch statements.
553 # If the bits in the mask are contiguous, extract them.
554 sh = is_contiguous(self.thismask)
555 if sh > 0:
556 # Propagate SH down into the local functions.
557 def str_switch(b, sh=sh):
Richard Hendersonc7cefe62021-04-28 16:27:56 -0700558 return f'(insn >> {sh}) & {b >> sh:#x}'
Richard Henderson08561fc2020-05-17 10:14:11 -0700559
560 def str_case(b, sh=sh):
Richard Hendersonc7cefe62021-04-28 16:27:56 -0700561 return hex(b >> sh)
Richard Henderson08561fc2020-05-17 10:14:11 -0700562 else:
563 def str_switch(b):
Richard Hendersonc7cefe62021-04-28 16:27:56 -0700564 return f'insn & {whexC(b)}'
Richard Henderson08561fc2020-05-17 10:14:11 -0700565
566 def str_case(b):
Richard Hendersonc7cefe62021-04-28 16:27:56 -0700567 return whexC(b)
Richard Henderson08561fc2020-05-17 10:14:11 -0700568
569 output(ind, 'switch (', str_switch(self.thismask), ') {\n')
570 for b, s in sorted(self.subs):
571 assert (self.thismask & ~s.fixedmask) == 0
572 innermask = outermask | self.thismask
573 innerbits = outerbits | b
574 output(ind, 'case ', str_case(b), ':\n')
575 output(ind, ' /* ',
576 str_match_bits(innerbits, innermask), ' */\n')
577 s.output_code(i + 4, extracted, innerbits, innermask)
Peter Maydell514101c2020-10-19 16:12:52 +0100578 output(ind, ' break;\n')
Richard Henderson08561fc2020-05-17 10:14:11 -0700579 output(ind, '}\n')
580# end Tree
581
582
583class ExcMultiPattern(MultiPattern):
584 """Class representing a non-overlapping set of instruction patterns"""
585
586 def output_code(self, i, extracted, outerbits, outermask):
587 # Defer everything to our decomposed Tree node
588 self.tree.output_code(i, extracted, outerbits, outermask)
589
590 @staticmethod
591 def __build_tree(pats, outerbits, outermask):
592 # Find the intersection of all remaining fixedmask.
593 innermask = ~outermask & insnmask
594 for i in pats:
595 innermask &= i.fixedmask
596
597 if innermask == 0:
598 # Edge condition: One pattern covers the entire insnmask
599 if len(pats) == 1:
600 t = Tree(outermask, innermask)
601 t.subs.append((0, pats[0]))
602 return t
603
604 text = 'overlapping patterns:'
605 for p in pats:
606 text += '\n' + p.file + ':' + str(p.lineno) + ': ' + str(p)
607 error_with_file(pats[0].file, pats[0].lineno, text)
608
609 fullmask = outermask | innermask
610
611 # Sort each element of pats into the bin selected by the mask.
612 bins = {}
613 for i in pats:
614 fb = i.fixedbits & innermask
615 if fb in bins:
616 bins[fb].append(i)
617 else:
618 bins[fb] = [i]
619
620 # We must recurse if any bin has more than one element or if
621 # the single element in the bin has not been fully matched.
622 t = Tree(fullmask, innermask)
623
624 for b, l in bins.items():
625 s = l[0]
626 if len(l) > 1 or s.fixedmask & ~fullmask != 0:
627 s = ExcMultiPattern.__build_tree(l, b | outerbits, fullmask)
628 t.subs.append((b, s))
629
630 return t
631
632 def build_tree(self):
Richard Henderson2fd2eb52023-05-25 18:45:43 -0700633 super().build_tree()
Richard Henderson08561fc2020-05-17 10:14:11 -0700634 self.tree = self.__build_tree(self.pats, self.fixedbits,
635 self.fixedmask)
636
637 @staticmethod
638 def __prop_format(tree):
639 """Propagate Format objects into the decode tree"""
640
641 # Depth first search.
642 for (b, s) in tree.subs:
643 if isinstance(s, Tree):
644 ExcMultiPattern.__prop_format(s)
645
646 # If all entries in SUBS have the same format, then
647 # propagate that into the tree.
648 f = None
649 for (b, s) in tree.subs:
650 if f is None:
651 f = s.base
652 if f is None:
653 return
654 if f is not s.base:
655 return
656 tree.base = f
657
658 def prop_format(self):
659 super().prop_format()
660 self.__prop_format(self.tree)
661
662# end ExcMultiPattern
663
664
Richard Henderson568ae7e2017-12-07 12:44:09 -0800665def parse_field(lineno, name, toks):
666 """Parse one instruction field from TOKS at LINENO"""
667 global fields
Richard Henderson568ae7e2017-12-07 12:44:09 -0800668 global insnwidth
669
670 # A "simple" field will have only one entry;
671 # a "multifield" will have several.
672 subs = []
673 width = 0
674 func = None
675 for t in toks:
Richard Hendersonacfdd232020-09-03 12:23:34 -0700676 if re.match('^!function=', t):
Richard Henderson568ae7e2017-12-07 12:44:09 -0800677 if func:
678 error(lineno, 'duplicate function')
679 func = t.split('=')
680 func = func[1]
681 continue
682
John Snow2d110c12020-05-13 23:52:30 -0400683 if re.fullmatch('[0-9]+:s[0-9]+', t):
Richard Henderson568ae7e2017-12-07 12:44:09 -0800684 # Signed field extract
685 subtoks = t.split(':s')
686 sign = True
John Snow2d110c12020-05-13 23:52:30 -0400687 elif re.fullmatch('[0-9]+:[0-9]+', t):
Richard Henderson568ae7e2017-12-07 12:44:09 -0800688 # Unsigned field extract
689 subtoks = t.split(':')
690 sign = False
691 else:
Richard Henderson9f6e2b42021-04-28 16:37:02 -0700692 error(lineno, f'invalid field token "{t}"')
Richard Henderson568ae7e2017-12-07 12:44:09 -0800693 po = int(subtoks[0])
694 le = int(subtoks[1])
695 if po + le > insnwidth:
Richard Henderson9f6e2b42021-04-28 16:37:02 -0700696 error(lineno, f'field {t} too large')
Richard Henderson568ae7e2017-12-07 12:44:09 -0800697 f = Field(sign, po, le)
698 subs.append(f)
699 width += le
700
701 if width > insnwidth:
702 error(lineno, 'field too large')
Richard Henderson94597b62019-07-22 17:02:56 -0700703 if len(subs) == 0:
704 if func:
705 f = ParameterField(func)
706 else:
707 error(lineno, 'field with no value')
Richard Henderson568ae7e2017-12-07 12:44:09 -0800708 else:
Richard Henderson94597b62019-07-22 17:02:56 -0700709 if len(subs) == 1:
710 f = subs[0]
711 else:
712 mask = 0
713 for s in subs:
714 if mask & s.mask:
715 error(lineno, 'field components overlap')
716 mask |= s.mask
717 f = MultiField(subs, mask)
718 if func:
719 f = FunctionField(func, f)
Richard Henderson568ae7e2017-12-07 12:44:09 -0800720
721 if name in fields:
722 error(lineno, 'duplicate field', name)
723 fields[name] = f
724# end parse_field
725
726
727def parse_arguments(lineno, name, toks):
728 """Parse one argument set from TOKS at LINENO"""
729 global arguments
Richard Hendersonacfdd232020-09-03 12:23:34 -0700730 global re_C_ident
Richard Hendersonc6920792019-08-09 08:12:50 -0700731 global anyextern
Richard Henderson568ae7e2017-12-07 12:44:09 -0800732
733 flds = []
Richard Hendersonaf93cca2021-04-29 10:03:59 -0700734 types = []
Richard Hendersonabd04f92018-10-23 10:26:25 +0100735 extern = False
Richard Hendersonaf93cca2021-04-29 10:03:59 -0700736 for n in toks:
737 if re.fullmatch('!extern', n):
Richard Hendersonabd04f92018-10-23 10:26:25 +0100738 extern = True
Richard Hendersonc6920792019-08-09 08:12:50 -0700739 anyextern = True
Richard Hendersonabd04f92018-10-23 10:26:25 +0100740 continue
Richard Hendersonaf93cca2021-04-29 10:03:59 -0700741 if re.fullmatch(re_C_ident + ':' + re_C_ident, n):
742 (n, t) = n.split(':')
743 elif re.fullmatch(re_C_ident, n):
744 t = 'int'
745 else:
746 error(lineno, f'invalid argument set token "{n}"')
747 if n in flds:
748 error(lineno, f'duplicate argument "{n}"')
749 flds.append(n)
750 types.append(t)
Richard Henderson568ae7e2017-12-07 12:44:09 -0800751
752 if name in arguments:
753 error(lineno, 'duplicate argument set', name)
Richard Hendersonaf93cca2021-04-29 10:03:59 -0700754 arguments[name] = Arguments(name, flds, types, extern)
Richard Henderson568ae7e2017-12-07 12:44:09 -0800755# end parse_arguments
756
757
758def lookup_field(lineno, name):
759 global fields
760 if name in fields:
761 return fields[name]
762 error(lineno, 'undefined field', name)
763
764
765def add_field(lineno, flds, new_name, f):
766 if new_name in flds:
767 error(lineno, 'duplicate field', new_name)
768 flds[new_name] = f
769 return flds
770
771
772def add_field_byname(lineno, flds, new_name, old_name):
773 return add_field(lineno, flds, new_name, lookup_field(lineno, old_name))
774
775
776def infer_argument_set(flds):
777 global arguments
Richard Hendersonabd04f92018-10-23 10:26:25 +0100778 global decode_function
Richard Henderson568ae7e2017-12-07 12:44:09 -0800779
780 for arg in arguments.values():
Richard Hendersonaf93cca2021-04-29 10:03:59 -0700781 if eq_fields_for_args(flds, arg):
Richard Henderson568ae7e2017-12-07 12:44:09 -0800782 return arg
783
Richard Hendersonabd04f92018-10-23 10:26:25 +0100784 name = decode_function + str(len(arguments))
Richard Hendersonaf93cca2021-04-29 10:03:59 -0700785 arg = Arguments(name, flds.keys(), ['int'] * len(flds), False)
Richard Henderson568ae7e2017-12-07 12:44:09 -0800786 arguments[name] = arg
787 return arg
788
789
Richard Henderson17560e92019-01-30 18:01:29 -0800790def infer_format(arg, fieldmask, flds, width):
Richard Henderson568ae7e2017-12-07 12:44:09 -0800791 global arguments
792 global formats
Richard Hendersonabd04f92018-10-23 10:26:25 +0100793 global decode_function
Richard Henderson568ae7e2017-12-07 12:44:09 -0800794
795 const_flds = {}
796 var_flds = {}
797 for n, c in flds.items():
798 if c is ConstField:
799 const_flds[n] = c
800 else:
801 var_flds[n] = c
802
803 # Look for an existing format with the same argument set and fields
804 for fmt in formats.values():
805 if arg and fmt.base != arg:
806 continue
807 if fieldmask != fmt.fieldmask:
808 continue
Richard Henderson17560e92019-01-30 18:01:29 -0800809 if width != fmt.width:
810 continue
Richard Henderson568ae7e2017-12-07 12:44:09 -0800811 if not eq_fields_for_fmts(flds, fmt.fields):
812 continue
813 return (fmt, const_flds)
814
Richard Hendersonabd04f92018-10-23 10:26:25 +0100815 name = decode_function + '_Fmt_' + str(len(formats))
Richard Henderson568ae7e2017-12-07 12:44:09 -0800816 if not arg:
817 arg = infer_argument_set(flds)
818
Richard Henderson17560e92019-01-30 18:01:29 -0800819 fmt = Format(name, 0, arg, 0, 0, 0, fieldmask, var_flds, width)
Richard Henderson568ae7e2017-12-07 12:44:09 -0800820 formats[name] = fmt
821
822 return (fmt, const_flds)
823# end infer_format
824
825
Richard Henderson08561fc2020-05-17 10:14:11 -0700826def parse_generic(lineno, parent_pat, name, toks):
Richard Henderson568ae7e2017-12-07 12:44:09 -0800827 """Parse one instruction format from TOKS at LINENO"""
828 global fields
829 global arguments
830 global formats
Richard Henderson0eff2df2019-02-23 11:35:36 -0800831 global allpatterns
Richard Hendersonacfdd232020-09-03 12:23:34 -0700832 global re_arg_ident
833 global re_fld_ident
834 global re_fmt_ident
835 global re_C_ident
Richard Henderson568ae7e2017-12-07 12:44:09 -0800836 global insnwidth
837 global insnmask
Richard Henderson17560e92019-01-30 18:01:29 -0800838 global variablewidth
Richard Henderson568ae7e2017-12-07 12:44:09 -0800839
Richard Henderson08561fc2020-05-17 10:14:11 -0700840 is_format = parent_pat is None
841
Richard Henderson568ae7e2017-12-07 12:44:09 -0800842 fixedmask = 0
843 fixedbits = 0
844 undefmask = 0
845 width = 0
846 flds = {}
847 arg = None
848 fmt = None
849 for t in toks:
zhaolichang65fdb3c2020-09-17 15:50:23 +0800850 # '&Foo' gives a format an explicit argument set.
Richard Hendersonacfdd232020-09-03 12:23:34 -0700851 if re.fullmatch(re_arg_ident, t):
Richard Henderson568ae7e2017-12-07 12:44:09 -0800852 tt = t[1:]
853 if arg:
854 error(lineno, 'multiple argument sets')
855 if tt in arguments:
856 arg = arguments[tt]
857 else:
858 error(lineno, 'undefined argument set', t)
859 continue
860
861 # '@Foo' gives a pattern an explicit format.
Richard Hendersonacfdd232020-09-03 12:23:34 -0700862 if re.fullmatch(re_fmt_ident, t):
Richard Henderson568ae7e2017-12-07 12:44:09 -0800863 tt = t[1:]
864 if fmt:
865 error(lineno, 'multiple formats')
866 if tt in formats:
867 fmt = formats[tt]
868 else:
869 error(lineno, 'undefined format', t)
870 continue
871
872 # '%Foo' imports a field.
Richard Hendersonacfdd232020-09-03 12:23:34 -0700873 if re.fullmatch(re_fld_ident, t):
Richard Henderson568ae7e2017-12-07 12:44:09 -0800874 tt = t[1:]
875 flds = add_field_byname(lineno, flds, tt, tt)
876 continue
877
878 # 'Foo=%Bar' imports a field with a different name.
Richard Hendersonacfdd232020-09-03 12:23:34 -0700879 if re.fullmatch(re_C_ident + '=' + re_fld_ident, t):
Richard Henderson568ae7e2017-12-07 12:44:09 -0800880 (fname, iname) = t.split('=%')
881 flds = add_field_byname(lineno, flds, fname, iname)
882 continue
883
884 # 'Foo=number' sets an argument field to a constant value
Richard Hendersonacfdd232020-09-03 12:23:34 -0700885 if re.fullmatch(re_C_ident + '=[+-]?[0-9]+', t):
Richard Henderson568ae7e2017-12-07 12:44:09 -0800886 (fname, value) = t.split('=')
887 value = int(value)
888 flds = add_field(lineno, flds, fname, ConstField(value))
889 continue
890
891 # Pattern of 0s, 1s, dots and dashes indicate required zeros,
892 # required ones, or dont-cares.
John Snow2d110c12020-05-13 23:52:30 -0400893 if re.fullmatch('[01.-]+', t):
Richard Henderson568ae7e2017-12-07 12:44:09 -0800894 shift = len(t)
895 fms = t.replace('0', '1')
896 fms = fms.replace('.', '0')
897 fms = fms.replace('-', '0')
898 fbs = t.replace('.', '0')
899 fbs = fbs.replace('-', '0')
900 ubm = t.replace('1', '0')
901 ubm = ubm.replace('.', '0')
902 ubm = ubm.replace('-', '1')
903 fms = int(fms, 2)
904 fbs = int(fbs, 2)
905 ubm = int(ubm, 2)
906 fixedbits = (fixedbits << shift) | fbs
907 fixedmask = (fixedmask << shift) | fms
908 undefmask = (undefmask << shift) | ubm
909 # Otherwise, fieldname:fieldwidth
Richard Hendersonacfdd232020-09-03 12:23:34 -0700910 elif re.fullmatch(re_C_ident + ':s?[0-9]+', t):
Richard Henderson568ae7e2017-12-07 12:44:09 -0800911 (fname, flen) = t.split(':')
912 sign = False
913 if flen[0] == 's':
914 sign = True
915 flen = flen[1:]
916 shift = int(flen, 10)
Richard Henderson2decfc92019-03-05 15:34:41 -0800917 if shift + width > insnwidth:
Richard Henderson9f6e2b42021-04-28 16:37:02 -0700918 error(lineno, f'field {fname} exceeds insnwidth')
Richard Henderson568ae7e2017-12-07 12:44:09 -0800919 f = Field(sign, insnwidth - width - shift, shift)
920 flds = add_field(lineno, flds, fname, f)
921 fixedbits <<= shift
922 fixedmask <<= shift
923 undefmask <<= shift
924 else:
Richard Henderson9f6e2b42021-04-28 16:37:02 -0700925 error(lineno, f'invalid token "{t}"')
Richard Henderson568ae7e2017-12-07 12:44:09 -0800926 width += shift
927
Richard Henderson17560e92019-01-30 18:01:29 -0800928 if variablewidth and width < insnwidth and width % 8 == 0:
929 shift = insnwidth - width
930 fixedbits <<= shift
931 fixedmask <<= shift
932 undefmask <<= shift
933 undefmask |= (1 << shift) - 1
934
Richard Henderson568ae7e2017-12-07 12:44:09 -0800935 # We should have filled in all of the bits of the instruction.
Richard Henderson17560e92019-01-30 18:01:29 -0800936 elif not (is_format and width == 0) and width != insnwidth:
Richard Henderson9f6e2b42021-04-28 16:37:02 -0700937 error(lineno, f'definition has {width} bits')
Richard Henderson568ae7e2017-12-07 12:44:09 -0800938
zhaolichang65fdb3c2020-09-17 15:50:23 +0800939 # Do not check for fields overlapping fields; one valid usage
Richard Henderson568ae7e2017-12-07 12:44:09 -0800940 # is to be able to duplicate fields via import.
941 fieldmask = 0
942 for f in flds.values():
943 fieldmask |= f.mask
944
945 # Fix up what we've parsed to match either a format or a pattern.
946 if is_format:
947 # Formats cannot reference formats.
948 if fmt:
949 error(lineno, 'format referencing format')
950 # If an argument set is given, then there should be no fields
951 # without a place to store it.
952 if arg:
953 for f in flds.keys():
954 if f not in arg.fields:
Richard Henderson9f6e2b42021-04-28 16:37:02 -0700955 error(lineno, f'field {f} not in argument set {arg.name}')
Richard Henderson568ae7e2017-12-07 12:44:09 -0800956 else:
957 arg = infer_argument_set(flds)
958 if name in formats:
959 error(lineno, 'duplicate format name', name)
960 fmt = Format(name, lineno, arg, fixedbits, fixedmask,
Richard Henderson17560e92019-01-30 18:01:29 -0800961 undefmask, fieldmask, flds, width)
Richard Henderson568ae7e2017-12-07 12:44:09 -0800962 formats[name] = fmt
963 else:
964 # Patterns can reference a format ...
965 if fmt:
966 # ... but not an argument simultaneously
967 if arg:
968 error(lineno, 'pattern specifies both format and argument set')
969 if fixedmask & fmt.fixedmask:
970 error(lineno, 'pattern fixed bits overlap format fixed bits')
Richard Henderson17560e92019-01-30 18:01:29 -0800971 if width != fmt.width:
972 error(lineno, 'pattern uses format of different width')
Richard Henderson568ae7e2017-12-07 12:44:09 -0800973 fieldmask |= fmt.fieldmask
974 fixedbits |= fmt.fixedbits
975 fixedmask |= fmt.fixedmask
976 undefmask |= fmt.undefmask
977 else:
Richard Henderson17560e92019-01-30 18:01:29 -0800978 (fmt, flds) = infer_format(arg, fieldmask, flds, width)
Richard Henderson568ae7e2017-12-07 12:44:09 -0800979 arg = fmt.base
980 for f in flds.keys():
981 if f not in arg.fields:
Richard Henderson9f6e2b42021-04-28 16:37:02 -0700982 error(lineno, f'field {f} not in argument set {arg.name}')
Richard Henderson568ae7e2017-12-07 12:44:09 -0800983 if f in fmt.fields.keys():
Richard Henderson9f6e2b42021-04-28 16:37:02 -0700984 error(lineno, f'field {f} set by format and pattern')
Richard Henderson568ae7e2017-12-07 12:44:09 -0800985 for f in arg.fields:
986 if f not in flds.keys() and f not in fmt.fields.keys():
Richard Henderson9f6e2b42021-04-28 16:37:02 -0700987 error(lineno, f'field {f} not initialized')
Richard Henderson568ae7e2017-12-07 12:44:09 -0800988 pat = Pattern(name, lineno, fmt, fixedbits, fixedmask,
Richard Henderson17560e92019-01-30 18:01:29 -0800989 undefmask, fieldmask, flds, width)
Richard Henderson08561fc2020-05-17 10:14:11 -0700990 parent_pat.pats.append(pat)
Richard Henderson0eff2df2019-02-23 11:35:36 -0800991 allpatterns.append(pat)
Richard Henderson568ae7e2017-12-07 12:44:09 -0800992
993 # Validate the masks that we have assembled.
994 if fieldmask & fixedmask:
Richard Hendersonc7cefe62021-04-28 16:27:56 -0700995 error(lineno, 'fieldmask overlaps fixedmask ',
996 f'({whex(fieldmask)} & {whex(fixedmask)})')
Richard Henderson568ae7e2017-12-07 12:44:09 -0800997 if fieldmask & undefmask:
Richard Hendersonc7cefe62021-04-28 16:27:56 -0700998 error(lineno, 'fieldmask overlaps undefmask ',
999 f'({whex(fieldmask)} & {whex(undefmask)})')
Richard Henderson568ae7e2017-12-07 12:44:09 -08001000 if fixedmask & undefmask:
Richard Hendersonc7cefe62021-04-28 16:27:56 -07001001 error(lineno, 'fixedmask overlaps undefmask ',
1002 f'({whex(fixedmask)} & {whex(undefmask)})')
Richard Henderson568ae7e2017-12-07 12:44:09 -08001003 if not is_format:
1004 allbits = fieldmask | fixedmask | undefmask
1005 if allbits != insnmask:
Richard Hendersonc7cefe62021-04-28 16:27:56 -07001006 error(lineno, 'bits left unspecified ',
1007 f'({whex(allbits ^ insnmask)})')
Richard Henderson568ae7e2017-12-07 12:44:09 -08001008# end parse_general
1009
Richard Henderson0eff2df2019-02-23 11:35:36 -08001010
Richard Henderson08561fc2020-05-17 10:14:11 -07001011def parse_file(f, parent_pat):
Richard Henderson568ae7e2017-12-07 12:44:09 -08001012 """Parse all of the patterns within a file"""
Richard Hendersonacfdd232020-09-03 12:23:34 -07001013 global re_arg_ident
1014 global re_fld_ident
1015 global re_fmt_ident
1016 global re_pat_ident
Richard Henderson568ae7e2017-12-07 12:44:09 -08001017
1018 # Read all of the lines of the file. Concatenate lines
1019 # ending in backslash; discard empty lines and comments.
1020 toks = []
1021 lineno = 0
Richard Henderson0eff2df2019-02-23 11:35:36 -08001022 nesting = 0
Richard Henderson08561fc2020-05-17 10:14:11 -07001023 nesting_pats = []
Richard Henderson0eff2df2019-02-23 11:35:36 -08001024
Richard Henderson568ae7e2017-12-07 12:44:09 -08001025 for line in f:
1026 lineno += 1
1027
Richard Henderson0eff2df2019-02-23 11:35:36 -08001028 # Expand and strip spaces, to find indent.
1029 line = line.rstrip()
1030 line = line.expandtabs()
1031 len1 = len(line)
1032 line = line.lstrip()
1033 len2 = len(line)
1034
Richard Henderson568ae7e2017-12-07 12:44:09 -08001035 # Discard comments
1036 end = line.find('#')
1037 if end >= 0:
1038 line = line[:end]
1039
1040 t = line.split()
1041 if len(toks) != 0:
1042 # Next line after continuation
1043 toks.extend(t)
Richard Henderson568ae7e2017-12-07 12:44:09 -08001044 else:
Richard Henderson0eff2df2019-02-23 11:35:36 -08001045 # Allow completely blank lines.
1046 if len1 == 0:
1047 continue
1048 indent = len1 - len2
1049 # Empty line due to comment.
1050 if len(t) == 0:
1051 # Indentation must be correct, even for comment lines.
1052 if indent != nesting:
1053 error(lineno, 'indentation ', indent, ' != ', nesting)
1054 continue
1055 start_lineno = lineno
Richard Henderson568ae7e2017-12-07 12:44:09 -08001056 toks = t
1057
1058 # Continuation?
1059 if toks[-1] == '\\':
1060 toks.pop()
1061 continue
1062
Richard Henderson568ae7e2017-12-07 12:44:09 -08001063 name = toks[0]
1064 del toks[0]
1065
Richard Henderson0eff2df2019-02-23 11:35:36 -08001066 # End nesting?
Richard Henderson067e8b02020-05-18 08:45:32 -07001067 if name == '}' or name == ']':
Richard Henderson0eff2df2019-02-23 11:35:36 -08001068 if len(toks) != 0:
1069 error(start_lineno, 'extra tokens after close brace')
Richard Henderson08561fc2020-05-17 10:14:11 -07001070
Richard Henderson067e8b02020-05-18 08:45:32 -07001071 # Make sure { } and [ ] nest properly.
1072 if (name == '}') != isinstance(parent_pat, IncMultiPattern):
1073 error(lineno, 'mismatched close brace')
1074
Richard Henderson08561fc2020-05-17 10:14:11 -07001075 try:
1076 parent_pat = nesting_pats.pop()
1077 except:
Richard Henderson067e8b02020-05-18 08:45:32 -07001078 error(lineno, 'extra close brace')
Richard Henderson08561fc2020-05-17 10:14:11 -07001079
Richard Henderson0eff2df2019-02-23 11:35:36 -08001080 nesting -= 2
1081 if indent != nesting:
Richard Henderson08561fc2020-05-17 10:14:11 -07001082 error(lineno, 'indentation ', indent, ' != ', nesting)
1083
Richard Henderson0eff2df2019-02-23 11:35:36 -08001084 toks = []
1085 continue
1086
1087 # Everything else should have current indentation.
1088 if indent != nesting:
1089 error(start_lineno, 'indentation ', indent, ' != ', nesting)
1090
1091 # Start nesting?
Richard Henderson067e8b02020-05-18 08:45:32 -07001092 if name == '{' or name == '[':
Richard Henderson0eff2df2019-02-23 11:35:36 -08001093 if len(toks) != 0:
1094 error(start_lineno, 'extra tokens after open brace')
Richard Henderson08561fc2020-05-17 10:14:11 -07001095
Richard Henderson067e8b02020-05-18 08:45:32 -07001096 if name == '{':
1097 nested_pat = IncMultiPattern(start_lineno)
1098 else:
1099 nested_pat = ExcMultiPattern(start_lineno)
Richard Henderson08561fc2020-05-17 10:14:11 -07001100 parent_pat.pats.append(nested_pat)
1101 nesting_pats.append(parent_pat)
1102 parent_pat = nested_pat
1103
Richard Henderson0eff2df2019-02-23 11:35:36 -08001104 nesting += 2
1105 toks = []
1106 continue
1107
Richard Henderson568ae7e2017-12-07 12:44:09 -08001108 # Determine the type of object needing to be parsed.
Richard Hendersonacfdd232020-09-03 12:23:34 -07001109 if re.fullmatch(re_fld_ident, name):
Richard Henderson0eff2df2019-02-23 11:35:36 -08001110 parse_field(start_lineno, name[1:], toks)
Richard Hendersonacfdd232020-09-03 12:23:34 -07001111 elif re.fullmatch(re_arg_ident, name):
Richard Henderson0eff2df2019-02-23 11:35:36 -08001112 parse_arguments(start_lineno, name[1:], toks)
Richard Hendersonacfdd232020-09-03 12:23:34 -07001113 elif re.fullmatch(re_fmt_ident, name):
Richard Henderson08561fc2020-05-17 10:14:11 -07001114 parse_generic(start_lineno, None, name[1:], toks)
Richard Hendersonacfdd232020-09-03 12:23:34 -07001115 elif re.fullmatch(re_pat_ident, name):
Richard Henderson08561fc2020-05-17 10:14:11 -07001116 parse_generic(start_lineno, parent_pat, name, toks)
Richard Hendersonacfdd232020-09-03 12:23:34 -07001117 else:
Richard Henderson9f6e2b42021-04-28 16:37:02 -07001118 error(lineno, f'invalid token "{name}"')
Richard Henderson568ae7e2017-12-07 12:44:09 -08001119 toks = []
Richard Henderson067e8b02020-05-18 08:45:32 -07001120
1121 if nesting != 0:
1122 error(lineno, 'missing close brace')
Richard Henderson568ae7e2017-12-07 12:44:09 -08001123# end parse_file
1124
1125
Richard Henderson70e07112019-01-31 11:34:11 -08001126class SizeTree:
1127 """Class representing a node in a size decode tree"""
1128
1129 def __init__(self, m, w):
1130 self.mask = m
1131 self.subs = []
1132 self.base = None
1133 self.width = w
1134
1135 def str1(self, i):
1136 ind = str_indent(i)
Richard Hendersonc7cefe62021-04-28 16:27:56 -07001137 r = ind + whex(self.mask) + ' [\n'
Richard Henderson70e07112019-01-31 11:34:11 -08001138 for (b, s) in self.subs:
Richard Hendersonc7cefe62021-04-28 16:27:56 -07001139 r += ind + f' {whex(b)}:\n'
Richard Henderson70e07112019-01-31 11:34:11 -08001140 r += s.str1(i + 4) + '\n'
1141 r += ind + ']'
1142 return r
1143
1144 def __str__(self):
1145 return self.str1(0)
1146
1147 def output_code(self, i, extracted, outerbits, outermask):
1148 ind = str_indent(i)
1149
1150 # If we need to load more bytes to test, do so now.
1151 if extracted < self.width:
Richard Henderson9f6e2b42021-04-28 16:37:02 -07001152 output(ind, f'insn = {decode_function}_load_bytes',
1153 f'(ctx, insn, {extracted // 8}, {self.width // 8});\n')
Richard Henderson70e07112019-01-31 11:34:11 -08001154 extracted = self.width
1155
1156 # Attempt to aid the compiler in producing compact switch statements.
1157 # If the bits in the mask are contiguous, extract them.
1158 sh = is_contiguous(self.mask)
1159 if sh > 0:
1160 # Propagate SH down into the local functions.
1161 def str_switch(b, sh=sh):
Richard Hendersonc7cefe62021-04-28 16:27:56 -07001162 return f'(insn >> {sh}) & {b >> sh:#x}'
Richard Henderson70e07112019-01-31 11:34:11 -08001163
1164 def str_case(b, sh=sh):
Richard Hendersonc7cefe62021-04-28 16:27:56 -07001165 return hex(b >> sh)
Richard Henderson70e07112019-01-31 11:34:11 -08001166 else:
1167 def str_switch(b):
Richard Hendersonc7cefe62021-04-28 16:27:56 -07001168 return f'insn & {whexC(b)}'
Richard Henderson70e07112019-01-31 11:34:11 -08001169
1170 def str_case(b):
Richard Hendersonc7cefe62021-04-28 16:27:56 -07001171 return whexC(b)
Richard Henderson70e07112019-01-31 11:34:11 -08001172
1173 output(ind, 'switch (', str_switch(self.mask), ') {\n')
1174 for b, s in sorted(self.subs):
1175 innermask = outermask | self.mask
1176 innerbits = outerbits | b
1177 output(ind, 'case ', str_case(b), ':\n')
1178 output(ind, ' /* ',
1179 str_match_bits(innerbits, innermask), ' */\n')
1180 s.output_code(i + 4, extracted, innerbits, innermask)
1181 output(ind, '}\n')
1182 output(ind, 'return insn;\n')
1183# end SizeTree
1184
1185class SizeLeaf:
1186 """Class representing a leaf node in a size decode tree"""
1187
1188 def __init__(self, m, w):
1189 self.mask = m
1190 self.width = w
1191
1192 def str1(self, i):
Richard Hendersonc7cefe62021-04-28 16:27:56 -07001193 return str_indent(i) + whex(self.mask)
Richard Henderson70e07112019-01-31 11:34:11 -08001194
1195 def __str__(self):
1196 return self.str1(0)
1197
1198 def output_code(self, i, extracted, outerbits, outermask):
1199 global decode_function
1200 ind = str_indent(i)
1201
1202 # If we need to load more bytes, do so now.
1203 if extracted < self.width:
Richard Henderson9f6e2b42021-04-28 16:37:02 -07001204 output(ind, f'insn = {decode_function}_load_bytes',
1205 f'(ctx, insn, {extracted // 8}, {self.width // 8});\n')
Richard Henderson70e07112019-01-31 11:34:11 -08001206 extracted = self.width
1207 output(ind, 'return insn;\n')
1208# end SizeLeaf
1209
1210
1211def build_size_tree(pats, width, outerbits, outermask):
1212 global insnwidth
1213
1214 # Collect the mask of bits that are fixed in this width
1215 innermask = 0xff << (insnwidth - width)
1216 innermask &= ~outermask
1217 minwidth = None
1218 onewidth = True
1219 for i in pats:
1220 innermask &= i.fixedmask
1221 if minwidth is None:
1222 minwidth = i.width
1223 elif minwidth != i.width:
1224 onewidth = False;
1225 if minwidth < i.width:
1226 minwidth = i.width
1227
1228 if onewidth:
1229 return SizeLeaf(innermask, minwidth)
1230
1231 if innermask == 0:
1232 if width < minwidth:
1233 return build_size_tree(pats, width + 8, outerbits, outermask)
1234
1235 pnames = []
1236 for p in pats:
1237 pnames.append(p.name + ':' + p.file + ':' + str(p.lineno))
1238 error_with_file(pats[0].file, pats[0].lineno,
Richard Henderson9f6e2b42021-04-28 16:37:02 -07001239 f'overlapping patterns size {width}:', pnames)
Richard Henderson70e07112019-01-31 11:34:11 -08001240
1241 bins = {}
1242 for i in pats:
1243 fb = i.fixedbits & innermask
1244 if fb in bins:
1245 bins[fb].append(i)
1246 else:
1247 bins[fb] = [i]
1248
1249 fullmask = outermask | innermask
1250 lens = sorted(bins.keys())
1251 if len(lens) == 1:
1252 b = lens[0]
1253 return build_size_tree(bins[b], width + 8, b | outerbits, fullmask)
1254
1255 r = SizeTree(innermask, width)
1256 for b, l in bins.items():
1257 s = build_size_tree(l, width, b | outerbits, fullmask)
1258 r.subs.append((b, s))
1259 return r
1260# end build_size_tree
1261
1262
Richard Henderson70e07112019-01-31 11:34:11 -08001263def prop_size(tree):
1264 """Propagate minimum widths up the decode size tree"""
1265
1266 if isinstance(tree, SizeTree):
1267 min = None
1268 for (b, s) in tree.subs:
1269 width = prop_size(s)
1270 if min is None or min > width:
1271 min = width
1272 assert min >= tree.width
1273 tree.width = min
1274 else:
1275 min = tree.width
1276 return min
1277# end prop_size
1278
1279
Richard Henderson568ae7e2017-12-07 12:44:09 -08001280def main():
1281 global arguments
1282 global formats
Richard Henderson0eff2df2019-02-23 11:35:36 -08001283 global allpatterns
Richard Henderson568ae7e2017-12-07 12:44:09 -08001284 global translate_scope
1285 global translate_prefix
1286 global output_fd
1287 global output_file
1288 global input_file
1289 global insnwidth
1290 global insntype
Bastian Koppelmann83d7c402018-03-19 12:58:46 +01001291 global insnmask
Richard Hendersonabd04f92018-10-23 10:26:25 +01001292 global decode_function
Luis Fernando Fujita Pires60c425f2021-04-07 22:18:49 +00001293 global bitop_width
Richard Henderson17560e92019-01-30 18:01:29 -08001294 global variablewidth
Richard Hendersonc6920792019-08-09 08:12:50 -07001295 global anyextern
Richard Henderson9b5acc52023-05-25 18:04:05 -07001296 global testforerror
Richard Henderson568ae7e2017-12-07 12:44:09 -08001297
Richard Henderson568ae7e2017-12-07 12:44:09 -08001298 decode_scope = 'static '
1299
Richard Hendersoncd3e7fc2019-02-23 17:44:31 -08001300 long_opts = ['decode=', 'translate=', 'output=', 'insnwidth=',
Richard Henderson9b5acc52023-05-25 18:04:05 -07001301 'static-decode=', 'varinsnwidth=', 'test-for-error']
Richard Henderson568ae7e2017-12-07 12:44:09 -08001302 try:
Paolo Bonziniabff1ab2020-08-07 12:10:23 +02001303 (opts, args) = getopt.gnu_getopt(sys.argv[1:], 'o:vw:', long_opts)
Richard Henderson568ae7e2017-12-07 12:44:09 -08001304 except getopt.GetoptError as err:
1305 error(0, err)
1306 for o, a in opts:
1307 if o in ('-o', '--output'):
1308 output_file = a
1309 elif o == '--decode':
1310 decode_function = a
1311 decode_scope = ''
Richard Hendersoncd3e7fc2019-02-23 17:44:31 -08001312 elif o == '--static-decode':
1313 decode_function = a
Richard Henderson568ae7e2017-12-07 12:44:09 -08001314 elif o == '--translate':
1315 translate_prefix = a
1316 translate_scope = ''
Richard Henderson17560e92019-01-30 18:01:29 -08001317 elif o in ('-w', '--insnwidth', '--varinsnwidth'):
1318 if o == '--varinsnwidth':
1319 variablewidth = True
Richard Henderson568ae7e2017-12-07 12:44:09 -08001320 insnwidth = int(a)
1321 if insnwidth == 16:
1322 insntype = 'uint16_t'
1323 insnmask = 0xffff
Luis Fernando Fujita Pires60c425f2021-04-07 22:18:49 +00001324 elif insnwidth == 64:
1325 insntype = 'uint64_t'
1326 insnmask = 0xffffffffffffffff
1327 bitop_width = 64
Richard Henderson568ae7e2017-12-07 12:44:09 -08001328 elif insnwidth != 32:
1329 error(0, 'cannot handle insns of width', insnwidth)
Richard Henderson9b5acc52023-05-25 18:04:05 -07001330 elif o == '--test-for-error':
1331 testforerror = True
Richard Henderson568ae7e2017-12-07 12:44:09 -08001332 else:
1333 assert False, 'unhandled option'
1334
1335 if len(args) < 1:
1336 error(0, 'missing input file')
Richard Henderson08561fc2020-05-17 10:14:11 -07001337
1338 toppat = ExcMultiPattern(0)
1339
Richard Henderson6699ae62018-10-26 14:59:43 +01001340 for filename in args:
1341 input_file = filename
Philippe Mathieu-Daudé4caceca2021-01-10 01:02:40 +01001342 f = open(filename, 'rt', encoding='utf-8')
Richard Henderson08561fc2020-05-17 10:14:11 -07001343 parse_file(f, toppat)
Richard Henderson6699ae62018-10-26 14:59:43 +01001344 f.close()
Richard Henderson568ae7e2017-12-07 12:44:09 -08001345
Richard Henderson08561fc2020-05-17 10:14:11 -07001346 # We do not want to compute masks for toppat, because those masks
1347 # are used as a starting point for build_tree. For toppat, we must
1348 # insist that decode begins from naught.
1349 for i in toppat.pats:
1350 i.prop_masks()
Richard Henderson70e07112019-01-31 11:34:11 -08001351
Richard Henderson08561fc2020-05-17 10:14:11 -07001352 toppat.build_tree()
1353 toppat.prop_format()
1354
1355 if variablewidth:
1356 for i in toppat.pats:
1357 i.prop_width()
1358 stree = build_size_tree(toppat.pats, 8, 0, 0)
1359 prop_size(stree)
Richard Henderson568ae7e2017-12-07 12:44:09 -08001360
1361 if output_file:
Philippe Mathieu-Daudé4caceca2021-01-10 01:02:40 +01001362 output_fd = open(output_file, 'wt', encoding='utf-8')
Richard Henderson568ae7e2017-12-07 12:44:09 -08001363 else:
Philippe Mathieu-Daudé4caceca2021-01-10 01:02:40 +01001364 output_fd = io.TextIOWrapper(sys.stdout.buffer,
1365 encoding=sys.stdout.encoding,
1366 errors="ignore")
Richard Henderson568ae7e2017-12-07 12:44:09 -08001367
1368 output_autogen()
1369 for n in sorted(arguments.keys()):
1370 f = arguments[n]
1371 f.output_def()
1372
1373 # A single translate function can be invoked for different patterns.
1374 # Make sure that the argument sets are the same, and declare the
1375 # function only once.
Richard Hendersonc6920792019-08-09 08:12:50 -07001376 #
1377 # If we're sharing formats, we're likely also sharing trans_* functions,
1378 # but we can't tell which ones. Prevent issues from the compiler by
1379 # suppressing redundant declaration warnings.
1380 if anyextern:
Thomas Huth7aa12aa2020-07-08 20:19:44 +02001381 output("#pragma GCC diagnostic push\n",
1382 "#pragma GCC diagnostic ignored \"-Wredundant-decls\"\n",
1383 "#ifdef __clang__\n"
Richard Hendersonc6920792019-08-09 08:12:50 -07001384 "# pragma GCC diagnostic ignored \"-Wtypedef-redefinition\"\n",
Richard Hendersonc6920792019-08-09 08:12:50 -07001385 "#endif\n\n")
1386
Richard Henderson568ae7e2017-12-07 12:44:09 -08001387 out_pats = {}
Richard Henderson0eff2df2019-02-23 11:35:36 -08001388 for i in allpatterns:
Richard Henderson568ae7e2017-12-07 12:44:09 -08001389 if i.name in out_pats:
1390 p = out_pats[i.name]
1391 if i.base.base != p.base.base:
1392 error(0, i.name, ' has conflicting argument sets')
1393 else:
1394 i.output_decl()
1395 out_pats[i.name] = i
1396 output('\n')
1397
Richard Hendersonc6920792019-08-09 08:12:50 -07001398 if anyextern:
Thomas Huth7aa12aa2020-07-08 20:19:44 +02001399 output("#pragma GCC diagnostic pop\n\n")
Richard Hendersonc6920792019-08-09 08:12:50 -07001400
Richard Henderson568ae7e2017-12-07 12:44:09 -08001401 for n in sorted(formats.keys()):
1402 f = formats[n]
1403 f.output_extract()
1404
1405 output(decode_scope, 'bool ', decode_function,
1406 '(DisasContext *ctx, ', insntype, ' insn)\n{\n')
1407
1408 i4 = str_indent(4)
Richard Henderson568ae7e2017-12-07 12:44:09 -08001409
Richard Henderson82bfac12019-02-27 21:37:32 -08001410 if len(allpatterns) != 0:
1411 output(i4, 'union {\n')
1412 for n in sorted(arguments.keys()):
1413 f = arguments[n]
1414 output(i4, i4, f.struct_name(), ' f_', f.name, ';\n')
1415 output(i4, '} u;\n\n')
Richard Henderson08561fc2020-05-17 10:14:11 -07001416 toppat.output_code(4, False, 0, 0)
Richard Henderson82bfac12019-02-27 21:37:32 -08001417
Richard Hendersoneb6b87f2019-02-23 08:57:46 -08001418 output(i4, 'return false;\n')
Richard Henderson568ae7e2017-12-07 12:44:09 -08001419 output('}\n')
1420
Richard Henderson70e07112019-01-31 11:34:11 -08001421 if variablewidth:
1422 output('\n', decode_scope, insntype, ' ', decode_function,
1423 '_load(DisasContext *ctx)\n{\n',
1424 ' ', insntype, ' insn = 0;\n\n')
1425 stree.output_code(4, 0, 0, 0)
1426 output('}\n')
1427
Richard Henderson568ae7e2017-12-07 12:44:09 -08001428 if output_file:
1429 output_fd.close()
Richard Henderson9b5acc52023-05-25 18:04:05 -07001430 exit(1 if testforerror else 0)
Richard Henderson568ae7e2017-12-07 12:44:09 -08001431# end main
1432
1433
1434if __name__ == '__main__':
1435 main()