blob: db019a25c6f7f1d8d42f7bf3b4979aede0c49356 [file] [log] [blame]
Philippe Mathieu-Daudé3d004a32020-01-30 17:32:25 +01001#!/usr/bin/env python3
Richard Henderson568ae7e2017-12-07 12:44:09 -08002# Copyright (c) 2018 Linaro Limited
3#
4# This library is free software; you can redistribute it and/or
5# modify it under the terms of the GNU Lesser General Public
6# License as published by the Free Software Foundation; either
Chetan Pantd6ea4232020-10-23 12:33:53 +00007# version 2.1 of the License, or (at your option) any later version.
Richard Henderson568ae7e2017-12-07 12:44:09 -08008#
9# This library is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12# Lesser General Public License for more details.
13#
14# You should have received a copy of the GNU Lesser General Public
15# License along with this library; if not, see <http://www.gnu.org/licenses/>.
16#
17
18#
19# Generate a decoding tree from a specification file.
Richard Henderson3fdbf5d2019-02-23 13:00:10 -080020# See the syntax and semantics in docs/devel/decodetree.rst.
Richard Henderson568ae7e2017-12-07 12:44:09 -080021#
22
Philippe Mathieu-Daudé4caceca2021-01-10 01:02:40 +010023import io
Richard Henderson568ae7e2017-12-07 12:44:09 -080024import os
25import re
26import sys
27import getopt
Richard Henderson568ae7e2017-12-07 12:44:09 -080028
29insnwidth = 32
Luis Fernando Fujita Pires60c425f2021-04-07 22:18:49 +000030bitop_width = 32
Richard Henderson568ae7e2017-12-07 12:44:09 -080031insnmask = 0xffffffff
Richard Henderson17560e92019-01-30 18:01:29 -080032variablewidth = False
Richard Henderson568ae7e2017-12-07 12:44:09 -080033fields = {}
34arguments = {}
35formats = {}
Richard Henderson0eff2df2019-02-23 11:35:36 -080036allpatterns = []
Richard Hendersonc6920792019-08-09 08:12:50 -070037anyextern = False
Richard Henderson9b5acc52023-05-25 18:04:05 -070038testforerror = False
Richard Henderson568ae7e2017-12-07 12:44:09 -080039
40translate_prefix = 'trans'
41translate_scope = 'static '
42input_file = ''
43output_file = None
44output_fd = None
45insntype = 'uint32_t'
Richard Hendersonabd04f92018-10-23 10:26:25 +010046decode_function = 'decode'
Richard Henderson568ae7e2017-12-07 12:44:09 -080047
Richard Hendersonacfdd232020-09-03 12:23:34 -070048# An identifier for C.
49re_C_ident = '[a-zA-Z][a-zA-Z0-9_]*'
Richard Henderson568ae7e2017-12-07 12:44:09 -080050
Richard Hendersonacfdd232020-09-03 12:23:34 -070051# Identifiers for Arguments, Fields, Formats and Patterns.
52re_arg_ident = '&[a-zA-Z0-9_]*'
53re_fld_ident = '%[a-zA-Z0-9_]*'
54re_fmt_ident = '@[a-zA-Z0-9_]*'
55re_pat_ident = '[a-zA-Z0-9_]*'
Richard Henderson568ae7e2017-12-07 12:44:09 -080056
Peter Maydell36d61242023-05-23 13:04:45 +010057# Local implementation of a topological sort. We use the same API that
58# the Python graphlib does, so that when QEMU moves forward to a
59# baseline of Python 3.9 or newer this code can all be dropped and
60# replaced with:
61# from graphlib import TopologicalSorter, CycleError
62#
63# https://docs.python.org/3.9/library/graphlib.html#graphlib.TopologicalSorter
64#
65# We only implement the parts of TopologicalSorter we care about:
66# ts = TopologicalSorter(graph=None)
67# create the sorter. graph is a dictionary whose keys are
68# nodes and whose values are lists of the predecessors of that node.
69# (That is, if graph contains "A" -> ["B", "C"] then we must output
70# B and C before A.)
71# ts.static_order()
72# returns a list of all the nodes in sorted order, or raises CycleError
73# CycleError
74# exception raised if there are cycles in the graph. The second
75# element in the args attribute is a list of nodes which form a
76# cycle; the first and last element are the same, eg [a, b, c, a]
77# (Our implementation doesn't give the order correctly.)
78#
79# For our purposes we can assume that the data set is always small
80# (typically 10 nodes or less, actual links in the graph very rare),
81# so we don't need to worry about efficiency of implementation.
82#
83# The core of this implementation is from
84# https://code.activestate.com/recipes/578272-topological-sort/
85# (but updated to Python 3), and is under the MIT license.
86
87class CycleError(ValueError):
88 """Subclass of ValueError raised if cycles exist in the graph"""
89 pass
90
91class TopologicalSorter:
92 """Topologically sort a graph"""
93 def __init__(self, graph=None):
94 self.graph = graph
95
96 def static_order(self):
97 # We do the sort right here, unlike the stdlib version
98 from functools import reduce
99 data = {}
100 r = []
101
102 if not self.graph:
103 return []
104
105 # This code wants the values in the dict to be specifically sets
106 for k, v in self.graph.items():
107 data[k] = set(v)
108
109 # Find all items that don't depend on anything.
110 extra_items_in_deps = (reduce(set.union, data.values())
111 - set(data.keys()))
112 # Add empty dependencies where needed
113 data.update({item:{} for item in extra_items_in_deps})
114 while True:
115 ordered = set(item for item, dep in data.items() if not dep)
116 if not ordered:
117 break
118 r.extend(ordered)
119 data = {item: (dep - ordered)
120 for item, dep in data.items()
121 if item not in ordered}
122 if data:
123 # This doesn't give as nice results as the stdlib, which
124 # gives you the cycle by listing the nodes in order. Here
125 # we only know the nodes in the cycle but not their order.
126 raise CycleError(f'nodes are in a cycle', list(data.keys()))
127
128 return r
129# end TopologicalSorter
130
Richard Henderson6699ae62018-10-26 14:59:43 +0100131def error_with_file(file, lineno, *args):
Richard Henderson568ae7e2017-12-07 12:44:09 -0800132 """Print an error message from file:line and args and exit."""
133 global output_file
134 global output_fd
135
Richard Henderson2fd51b12020-05-15 14:48:54 -0700136 prefix = ''
137 if file:
Richard Henderson9f6e2b42021-04-28 16:37:02 -0700138 prefix += f'{file}:'
Richard Henderson568ae7e2017-12-07 12:44:09 -0800139 if lineno:
Richard Henderson9f6e2b42021-04-28 16:37:02 -0700140 prefix += f'{lineno}:'
Richard Henderson2fd51b12020-05-15 14:48:54 -0700141 if prefix:
142 prefix += ' '
143 print(prefix, end='error: ', file=sys.stderr)
144 print(*args, file=sys.stderr)
145
Richard Henderson568ae7e2017-12-07 12:44:09 -0800146 if output_file and output_fd:
147 output_fd.close()
Richard Henderson036cc752023-05-26 10:22:51 -0700148 # Do not try to remove e.g. -o /dev/null
149 if not output_file.startswith("/dev"):
150 try:
151 os.remove(output_file)
152 except PermissionError:
153 pass
Richard Henderson9b5acc52023-05-25 18:04:05 -0700154 exit(0 if testforerror else 1)
Richard Henderson2fd51b12020-05-15 14:48:54 -0700155# end error_with_file
156
Richard Henderson568ae7e2017-12-07 12:44:09 -0800157
Richard Henderson6699ae62018-10-26 14:59:43 +0100158def error(lineno, *args):
Richard Henderson2fd51b12020-05-15 14:48:54 -0700159 error_with_file(input_file, lineno, *args)
160# end error
161
Richard Henderson568ae7e2017-12-07 12:44:09 -0800162
163def output(*args):
164 global output_fd
165 for a in args:
166 output_fd.write(a)
167
168
Richard Henderson568ae7e2017-12-07 12:44:09 -0800169def output_autogen():
170 output('/* This file is autogenerated by scripts/decodetree.py. */\n\n')
171
172
173def str_indent(c):
174 """Return a string with C spaces"""
175 return ' ' * c
176
177
178def str_fields(fields):
zhaolichang65fdb3c2020-09-17 15:50:23 +0800179 """Return a string uniquely identifying FIELDS"""
Richard Henderson568ae7e2017-12-07 12:44:09 -0800180 r = ''
181 for n in sorted(fields.keys()):
182 r += '_' + n
183 return r[1:]
184
185
Richard Hendersonc7cefe62021-04-28 16:27:56 -0700186def whex(val):
187 """Return a hex string for val padded for insnwidth"""
188 global insnwidth
189 return f'0x{val:0{insnwidth // 4}x}'
190
191
192def whexC(val):
193 """Return a hex string for val padded for insnwidth,
194 and with the proper suffix for a C constant."""
195 suffix = ''
Luis Fernando Fujita Pires60c425f2021-04-07 22:18:49 +0000196 if val >= 0x100000000:
197 suffix = 'ull'
198 elif val >= 0x80000000:
Richard Hendersonc7cefe62021-04-28 16:27:56 -0700199 suffix = 'u'
200 return whex(val) + suffix
201
202
Richard Henderson568ae7e2017-12-07 12:44:09 -0800203def str_match_bits(bits, mask):
204 """Return a string pretty-printing BITS/MASK"""
205 global insnwidth
206
207 i = 1 << (insnwidth - 1)
208 space = 0x01010100
209 r = ''
210 while i != 0:
211 if i & mask:
212 if i & bits:
213 r += '1'
214 else:
215 r += '0'
216 else:
217 r += '.'
218 if i & space:
219 r += ' '
220 i >>= 1
221 return r
222
223
224def is_pow2(x):
225 """Return true iff X is equal to a power of 2."""
226 return (x & (x - 1)) == 0
227
228
229def ctz(x):
230 """Return the number of times 2 factors into X."""
Richard Hendersonb44b3442020-05-16 13:15:02 -0700231 assert x != 0
Richard Henderson568ae7e2017-12-07 12:44:09 -0800232 r = 0
233 while ((x >> r) & 1) == 0:
234 r += 1
235 return r
236
237
238def is_contiguous(bits):
Richard Hendersonb44b3442020-05-16 13:15:02 -0700239 if bits == 0:
240 return -1
Richard Henderson568ae7e2017-12-07 12:44:09 -0800241 shift = ctz(bits)
242 if is_pow2((bits >> shift) + 1):
243 return shift
244 else:
245 return -1
246
247
Richard Hendersonaf93cca2021-04-29 10:03:59 -0700248def eq_fields_for_args(flds_a, arg):
249 if len(flds_a) != len(arg.fields):
Richard Henderson568ae7e2017-12-07 12:44:09 -0800250 return False
Richard Hendersonaf93cca2021-04-29 10:03:59 -0700251 # Only allow inference on default types
252 for t in arg.types:
253 if t != 'int':
254 return False
Richard Henderson568ae7e2017-12-07 12:44:09 -0800255 for k, a in flds_a.items():
Richard Hendersonaf93cca2021-04-29 10:03:59 -0700256 if k not in arg.fields:
Richard Henderson568ae7e2017-12-07 12:44:09 -0800257 return False
258 return True
259
260
261def eq_fields_for_fmts(flds_a, flds_b):
262 if len(flds_a) != len(flds_b):
263 return False
264 for k, a in flds_a.items():
265 if k not in flds_b:
266 return False
267 b = flds_b[k]
268 if a.__class__ != b.__class__ or a != b:
269 return False
270 return True
271
272
273class Field:
274 """Class representing a simple instruction field"""
275 def __init__(self, sign, pos, len):
276 self.sign = sign
277 self.pos = pos
278 self.len = len
279 self.mask = ((1 << len) - 1) << pos
280
281 def __str__(self):
282 if self.sign:
283 s = 's'
284 else:
285 s = ''
Cleber Rosacbcdf1a2018-10-04 12:18:50 -0400286 return str(self.pos) + ':' + s + str(self.len)
Richard Henderson568ae7e2017-12-07 12:44:09 -0800287
Peter Maydellaeac22b2023-05-23 13:04:44 +0100288 def str_extract(self, lvalue_formatter):
Luis Fernando Fujita Pires60c425f2021-04-07 22:18:49 +0000289 global bitop_width
290 s = 's' if self.sign else ''
291 return f'{s}extract{bitop_width}(insn, {self.pos}, {self.len})'
Richard Henderson568ae7e2017-12-07 12:44:09 -0800292
293 def __eq__(self, other):
Richard Henderson2c7d4422019-06-11 16:39:41 +0100294 return self.sign == other.sign and self.mask == other.mask
Richard Henderson568ae7e2017-12-07 12:44:09 -0800295
296 def __ne__(self, other):
297 return not self.__eq__(other)
298# end Field
299
300
301class MultiField:
302 """Class representing a compound instruction field"""
303 def __init__(self, subs, mask):
304 self.subs = subs
305 self.sign = subs[0].sign
306 self.mask = mask
307
308 def __str__(self):
309 return str(self.subs)
310
Peter Maydellaeac22b2023-05-23 13:04:44 +0100311 def str_extract(self, lvalue_formatter):
Luis Fernando Fujita Pires60c425f2021-04-07 22:18:49 +0000312 global bitop_width
Richard Henderson568ae7e2017-12-07 12:44:09 -0800313 ret = '0'
314 pos = 0
315 for f in reversed(self.subs):
Peter Maydellaeac22b2023-05-23 13:04:44 +0100316 ext = f.str_extract(lvalue_formatter)
Richard Henderson568ae7e2017-12-07 12:44:09 -0800317 if pos == 0:
Richard Henderson9f6e2b42021-04-28 16:37:02 -0700318 ret = ext
Richard Henderson568ae7e2017-12-07 12:44:09 -0800319 else:
Luis Fernando Fujita Pires60c425f2021-04-07 22:18:49 +0000320 ret = f'deposit{bitop_width}({ret}, {pos}, {bitop_width - pos}, {ext})'
Richard Henderson568ae7e2017-12-07 12:44:09 -0800321 pos += f.len
322 return ret
323
324 def __ne__(self, other):
325 if len(self.subs) != len(other.subs):
326 return True
327 for a, b in zip(self.subs, other.subs):
328 if a.__class__ != b.__class__ or a != b:
329 return True
330 return False
331
332 def __eq__(self, other):
333 return not self.__ne__(other)
334# end MultiField
335
336
337class ConstField:
338 """Class representing an argument field with constant value"""
339 def __init__(self, value):
340 self.value = value
341 self.mask = 0
342 self.sign = value < 0
343
344 def __str__(self):
345 return str(self.value)
346
Peter Maydellaeac22b2023-05-23 13:04:44 +0100347 def str_extract(self, lvalue_formatter):
Richard Henderson568ae7e2017-12-07 12:44:09 -0800348 return str(self.value)
349
350 def __cmp__(self, other):
351 return self.value - other.value
352# end ConstField
353
354
355class FunctionField:
Richard Henderson94597b62019-07-22 17:02:56 -0700356 """Class representing a field passed through a function"""
Richard Henderson568ae7e2017-12-07 12:44:09 -0800357 def __init__(self, func, base):
358 self.mask = base.mask
359 self.sign = base.sign
360 self.base = base
361 self.func = func
362
363 def __str__(self):
364 return self.func + '(' + str(self.base) + ')'
365
Peter Maydellaeac22b2023-05-23 13:04:44 +0100366 def str_extract(self, lvalue_formatter):
367 return (self.func + '(ctx, '
368 + self.base.str_extract(lvalue_formatter) + ')')
Richard Henderson568ae7e2017-12-07 12:44:09 -0800369
370 def __eq__(self, other):
371 return self.func == other.func and self.base == other.base
372
373 def __ne__(self, other):
374 return not self.__eq__(other)
375# end FunctionField
376
377
Richard Henderson94597b62019-07-22 17:02:56 -0700378class ParameterField:
379 """Class representing a pseudo-field read from a function"""
380 def __init__(self, func):
381 self.mask = 0
382 self.sign = 0
383 self.func = func
384
385 def __str__(self):
386 return self.func
387
Peter Maydellaeac22b2023-05-23 13:04:44 +0100388 def str_extract(self, lvalue_formatter):
Richard Henderson94597b62019-07-22 17:02:56 -0700389 return self.func + '(ctx)'
390
391 def __eq__(self, other):
392 return self.func == other.func
393
394 def __ne__(self, other):
395 return not self.__eq__(other)
396# end ParameterField
397
398
Richard Henderson568ae7e2017-12-07 12:44:09 -0800399class Arguments:
400 """Class representing the extracted fields of a format"""
Richard Hendersonaf93cca2021-04-29 10:03:59 -0700401 def __init__(self, nm, flds, types, extern):
Richard Henderson568ae7e2017-12-07 12:44:09 -0800402 self.name = nm
Richard Hendersonabd04f92018-10-23 10:26:25 +0100403 self.extern = extern
Richard Hendersonaf93cca2021-04-29 10:03:59 -0700404 self.fields = flds
405 self.types = types
Richard Henderson568ae7e2017-12-07 12:44:09 -0800406
407 def __str__(self):
408 return self.name + ' ' + str(self.fields)
409
410 def struct_name(self):
411 return 'arg_' + self.name
412
413 def output_def(self):
Richard Hendersonabd04f92018-10-23 10:26:25 +0100414 if not self.extern:
415 output('typedef struct {\n')
Richard Hendersonaf93cca2021-04-29 10:03:59 -0700416 for (n, t) in zip(self.fields, self.types):
417 output(f' {t} {n};\n')
Richard Hendersonabd04f92018-10-23 10:26:25 +0100418 output('} ', self.struct_name(), ';\n\n')
Richard Henderson568ae7e2017-12-07 12:44:09 -0800419# end Arguments
420
421
422class General:
423 """Common code between instruction formats and instruction patterns"""
Richard Henderson17560e92019-01-30 18:01:29 -0800424 def __init__(self, name, lineno, base, fixb, fixm, udfm, fldm, flds, w):
Richard Henderson568ae7e2017-12-07 12:44:09 -0800425 self.name = name
Richard Henderson6699ae62018-10-26 14:59:43 +0100426 self.file = input_file
Richard Henderson568ae7e2017-12-07 12:44:09 -0800427 self.lineno = lineno
428 self.base = base
429 self.fixedbits = fixb
430 self.fixedmask = fixm
431 self.undefmask = udfm
432 self.fieldmask = fldm
433 self.fields = flds
Richard Henderson17560e92019-01-30 18:01:29 -0800434 self.width = w
Richard Henderson568ae7e2017-12-07 12:44:09 -0800435
436 def __str__(self):
Richard Henderson0eff2df2019-02-23 11:35:36 -0800437 return self.name + ' ' + str_match_bits(self.fixedbits, self.fixedmask)
Richard Henderson568ae7e2017-12-07 12:44:09 -0800438
439 def str1(self, i):
440 return str_indent(i) + self.__str__()
Peter Maydellaeac22b2023-05-23 13:04:44 +0100441
442 def output_fields(self, indent, lvalue_formatter):
443 for n, f in self.fields.items():
444 output(indent, lvalue_formatter(n), ' = ',
445 f.str_extract(lvalue_formatter), ';\n')
Richard Henderson568ae7e2017-12-07 12:44:09 -0800446# end General
447
448
449class Format(General):
450 """Class representing an instruction format"""
451
452 def extract_name(self):
Richard Henderson71ecf792019-02-28 14:45:50 -0800453 global decode_function
454 return decode_function + '_extract_' + self.name
Richard Henderson568ae7e2017-12-07 12:44:09 -0800455
456 def output_extract(self):
Richard Henderson451e4ff2019-03-20 19:21:31 -0700457 output('static void ', self.extract_name(), '(DisasContext *ctx, ',
Richard Henderson568ae7e2017-12-07 12:44:09 -0800458 self.base.struct_name(), ' *a, ', insntype, ' insn)\n{\n')
Peter Maydellaeac22b2023-05-23 13:04:44 +0100459 self.output_fields(str_indent(4), lambda n: 'a->' + n)
Richard Henderson568ae7e2017-12-07 12:44:09 -0800460 output('}\n\n')
461# end Format
462
463
464class Pattern(General):
465 """Class representing an instruction pattern"""
466
467 def output_decl(self):
468 global translate_scope
469 global translate_prefix
470 output('typedef ', self.base.base.struct_name(),
471 ' arg_', self.name, ';\n')
Richard Henderson76805592018-03-02 10:45:35 +0000472 output(translate_scope, 'bool ', translate_prefix, '_', self.name,
Richard Henderson3a7be552018-10-23 11:05:27 +0100473 '(DisasContext *ctx, arg_', self.name, ' *a);\n')
Richard Henderson568ae7e2017-12-07 12:44:09 -0800474
475 def output_code(self, i, extracted, outerbits, outermask):
476 global translate_prefix
477 ind = str_indent(i)
478 arg = self.base.base.name
Richard Henderson6699ae62018-10-26 14:59:43 +0100479 output(ind, '/* ', self.file, ':', str(self.lineno), ' */\n')
Richard Henderson568ae7e2017-12-07 12:44:09 -0800480 if not extracted:
Richard Henderson451e4ff2019-03-20 19:21:31 -0700481 output(ind, self.base.extract_name(),
482 '(ctx, &u.f_', arg, ', insn);\n')
Peter Maydellaeac22b2023-05-23 13:04:44 +0100483 self.output_fields(ind, lambda n: 'u.f_' + arg + '.' + n)
Richard Hendersoneb6b87f2019-02-23 08:57:46 -0800484 output(ind, 'if (', translate_prefix, '_', self.name,
485 '(ctx, &u.f_', arg, ')) return true;\n')
Richard Henderson08561fc2020-05-17 10:14:11 -0700486
487 # Normal patterns do not have children.
488 def build_tree(self):
489 return
490 def prop_masks(self):
491 return
492 def prop_format(self):
493 return
494 def prop_width(self):
495 return
496
Richard Henderson568ae7e2017-12-07 12:44:09 -0800497# end Pattern
498
499
Richard Hendersondf630442020-05-16 11:19:45 -0700500class MultiPattern(General):
501 """Class representing a set of instruction patterns"""
502
Richard Henderson08561fc2020-05-17 10:14:11 -0700503 def __init__(self, lineno):
Richard Hendersondf630442020-05-16 11:19:45 -0700504 self.file = input_file
505 self.lineno = lineno
Richard Henderson08561fc2020-05-17 10:14:11 -0700506 self.pats = []
Richard Hendersondf630442020-05-16 11:19:45 -0700507 self.base = None
508 self.fixedbits = 0
509 self.fixedmask = 0
510 self.undefmask = 0
511 self.width = None
512
513 def __str__(self):
514 r = 'group'
515 if self.fixedbits is not None:
516 r += ' ' + str_match_bits(self.fixedbits, self.fixedmask)
517 return r
518
519 def output_decl(self):
520 for p in self.pats:
521 p.output_decl()
Richard Henderson08561fc2020-05-17 10:14:11 -0700522
523 def prop_masks(self):
524 global insnmask
525
526 fixedmask = insnmask
527 undefmask = insnmask
528
529 # Collect fixedmask/undefmask for all of the children.
530 for p in self.pats:
531 p.prop_masks()
532 fixedmask &= p.fixedmask
533 undefmask &= p.undefmask
534
535 # Widen fixedmask until all fixedbits match
536 repeat = True
537 fixedbits = 0
538 while repeat and fixedmask != 0:
539 fixedbits = None
540 for p in self.pats:
541 thisbits = p.fixedbits & fixedmask
542 if fixedbits is None:
543 fixedbits = thisbits
544 elif fixedbits != thisbits:
545 fixedmask &= ~(fixedbits ^ thisbits)
546 break
547 else:
548 repeat = False
549
550 self.fixedbits = fixedbits
551 self.fixedmask = fixedmask
552 self.undefmask = undefmask
553
554 def build_tree(self):
555 for p in self.pats:
556 p.build_tree()
557
558 def prop_format(self):
559 for p in self.pats:
Richard Henderson2fd2eb52023-05-25 18:45:43 -0700560 p.prop_format()
Richard Henderson08561fc2020-05-17 10:14:11 -0700561
562 def prop_width(self):
563 width = None
564 for p in self.pats:
565 p.prop_width()
566 if width is None:
567 width = p.width
568 elif width != p.width:
569 error_with_file(self.file, self.lineno,
570 'width mismatch in patterns within braces')
571 self.width = width
572
Richard Hendersondf630442020-05-16 11:19:45 -0700573# end MultiPattern
574
575
576class IncMultiPattern(MultiPattern):
Richard Henderson0eff2df2019-02-23 11:35:36 -0800577 """Class representing an overlapping set of instruction patterns"""
578
Richard Henderson0eff2df2019-02-23 11:35:36 -0800579 def output_code(self, i, extracted, outerbits, outermask):
580 global translate_prefix
581 ind = str_indent(i)
582 for p in self.pats:
583 if outermask != p.fixedmask:
584 innermask = p.fixedmask & ~outermask
585 innerbits = p.fixedbits & ~outermask
Richard Hendersonc7cefe62021-04-28 16:27:56 -0700586 output(ind, f'if ((insn & {whexC(innermask)}) == {whexC(innerbits)}) {{\n')
587 output(ind, f' /* {str_match_bits(p.fixedbits, p.fixedmask)} */\n')
Richard Henderson0eff2df2019-02-23 11:35:36 -0800588 p.output_code(i + 4, extracted, p.fixedbits, p.fixedmask)
589 output(ind, '}\n')
590 else:
591 p.output_code(i, extracted, p.fixedbits, p.fixedmask)
Richard Hendersonf2604472023-05-25 18:50:58 -0700592
593 def build_tree(self):
594 if not self.pats:
595 error_with_file(self.file, self.lineno, 'empty pattern group')
596 super().build_tree()
597
Richard Henderson040145c2020-05-16 10:50:43 -0700598#end IncMultiPattern
Richard Henderson0eff2df2019-02-23 11:35:36 -0800599
600
Richard Henderson08561fc2020-05-17 10:14:11 -0700601class Tree:
602 """Class representing a node in a decode tree"""
603
604 def __init__(self, fm, tm):
605 self.fixedmask = fm
606 self.thismask = tm
607 self.subs = []
608 self.base = None
609
610 def str1(self, i):
611 ind = str_indent(i)
Richard Hendersonc7cefe62021-04-28 16:27:56 -0700612 r = ind + whex(self.fixedmask)
Richard Henderson08561fc2020-05-17 10:14:11 -0700613 if self.format:
614 r += ' ' + self.format.name
615 r += ' [\n'
616 for (b, s) in self.subs:
Richard Hendersonc7cefe62021-04-28 16:27:56 -0700617 r += ind + f' {whex(b)}:\n'
Richard Henderson08561fc2020-05-17 10:14:11 -0700618 r += s.str1(i + 4) + '\n'
619 r += ind + ']'
620 return r
621
622 def __str__(self):
623 return self.str1(0)
624
625 def output_code(self, i, extracted, outerbits, outermask):
626 ind = str_indent(i)
627
628 # If we identified all nodes below have the same format,
629 # extract the fields now.
630 if not extracted and self.base:
631 output(ind, self.base.extract_name(),
632 '(ctx, &u.f_', self.base.base.name, ', insn);\n')
633 extracted = True
634
635 # Attempt to aid the compiler in producing compact switch statements.
636 # If the bits in the mask are contiguous, extract them.
637 sh = is_contiguous(self.thismask)
638 if sh > 0:
639 # Propagate SH down into the local functions.
640 def str_switch(b, sh=sh):
Richard Hendersonc7cefe62021-04-28 16:27:56 -0700641 return f'(insn >> {sh}) & {b >> sh:#x}'
Richard Henderson08561fc2020-05-17 10:14:11 -0700642
643 def str_case(b, sh=sh):
Richard Hendersonc7cefe62021-04-28 16:27:56 -0700644 return hex(b >> sh)
Richard Henderson08561fc2020-05-17 10:14:11 -0700645 else:
646 def str_switch(b):
Richard Hendersonc7cefe62021-04-28 16:27:56 -0700647 return f'insn & {whexC(b)}'
Richard Henderson08561fc2020-05-17 10:14:11 -0700648
649 def str_case(b):
Richard Hendersonc7cefe62021-04-28 16:27:56 -0700650 return whexC(b)
Richard Henderson08561fc2020-05-17 10:14:11 -0700651
652 output(ind, 'switch (', str_switch(self.thismask), ') {\n')
653 for b, s in sorted(self.subs):
654 assert (self.thismask & ~s.fixedmask) == 0
655 innermask = outermask | self.thismask
656 innerbits = outerbits | b
657 output(ind, 'case ', str_case(b), ':\n')
658 output(ind, ' /* ',
659 str_match_bits(innerbits, innermask), ' */\n')
660 s.output_code(i + 4, extracted, innerbits, innermask)
Peter Maydell514101c2020-10-19 16:12:52 +0100661 output(ind, ' break;\n')
Richard Henderson08561fc2020-05-17 10:14:11 -0700662 output(ind, '}\n')
663# end Tree
664
665
666class ExcMultiPattern(MultiPattern):
667 """Class representing a non-overlapping set of instruction patterns"""
668
669 def output_code(self, i, extracted, outerbits, outermask):
670 # Defer everything to our decomposed Tree node
671 self.tree.output_code(i, extracted, outerbits, outermask)
672
673 @staticmethod
674 def __build_tree(pats, outerbits, outermask):
675 # Find the intersection of all remaining fixedmask.
676 innermask = ~outermask & insnmask
677 for i in pats:
678 innermask &= i.fixedmask
679
680 if innermask == 0:
681 # Edge condition: One pattern covers the entire insnmask
682 if len(pats) == 1:
683 t = Tree(outermask, innermask)
684 t.subs.append((0, pats[0]))
685 return t
686
687 text = 'overlapping patterns:'
688 for p in pats:
689 text += '\n' + p.file + ':' + str(p.lineno) + ': ' + str(p)
690 error_with_file(pats[0].file, pats[0].lineno, text)
691
692 fullmask = outermask | innermask
693
694 # Sort each element of pats into the bin selected by the mask.
695 bins = {}
696 for i in pats:
697 fb = i.fixedbits & innermask
698 if fb in bins:
699 bins[fb].append(i)
700 else:
701 bins[fb] = [i]
702
703 # We must recurse if any bin has more than one element or if
704 # the single element in the bin has not been fully matched.
705 t = Tree(fullmask, innermask)
706
707 for b, l in bins.items():
708 s = l[0]
709 if len(l) > 1 or s.fixedmask & ~fullmask != 0:
710 s = ExcMultiPattern.__build_tree(l, b | outerbits, fullmask)
711 t.subs.append((b, s))
712
713 return t
714
715 def build_tree(self):
Richard Henderson2fd2eb52023-05-25 18:45:43 -0700716 super().build_tree()
Richard Henderson08561fc2020-05-17 10:14:11 -0700717 self.tree = self.__build_tree(self.pats, self.fixedbits,
718 self.fixedmask)
719
720 @staticmethod
721 def __prop_format(tree):
722 """Propagate Format objects into the decode tree"""
723
724 # Depth first search.
725 for (b, s) in tree.subs:
726 if isinstance(s, Tree):
727 ExcMultiPattern.__prop_format(s)
728
729 # If all entries in SUBS have the same format, then
730 # propagate that into the tree.
731 f = None
732 for (b, s) in tree.subs:
733 if f is None:
734 f = s.base
735 if f is None:
736 return
737 if f is not s.base:
738 return
739 tree.base = f
740
741 def prop_format(self):
742 super().prop_format()
743 self.__prop_format(self.tree)
744
745# end ExcMultiPattern
746
747
Richard Henderson568ae7e2017-12-07 12:44:09 -0800748def parse_field(lineno, name, toks):
749 """Parse one instruction field from TOKS at LINENO"""
750 global fields
Richard Henderson568ae7e2017-12-07 12:44:09 -0800751 global insnwidth
752
753 # A "simple" field will have only one entry;
754 # a "multifield" will have several.
755 subs = []
756 width = 0
757 func = None
758 for t in toks:
Richard Hendersonacfdd232020-09-03 12:23:34 -0700759 if re.match('^!function=', t):
Richard Henderson568ae7e2017-12-07 12:44:09 -0800760 if func:
761 error(lineno, 'duplicate function')
762 func = t.split('=')
763 func = func[1]
764 continue
765
John Snow2d110c12020-05-13 23:52:30 -0400766 if re.fullmatch('[0-9]+:s[0-9]+', t):
Richard Henderson568ae7e2017-12-07 12:44:09 -0800767 # Signed field extract
768 subtoks = t.split(':s')
769 sign = True
John Snow2d110c12020-05-13 23:52:30 -0400770 elif re.fullmatch('[0-9]+:[0-9]+', t):
Richard Henderson568ae7e2017-12-07 12:44:09 -0800771 # Unsigned field extract
772 subtoks = t.split(':')
773 sign = False
774 else:
Richard Henderson9f6e2b42021-04-28 16:37:02 -0700775 error(lineno, f'invalid field token "{t}"')
Richard Henderson568ae7e2017-12-07 12:44:09 -0800776 po = int(subtoks[0])
777 le = int(subtoks[1])
778 if po + le > insnwidth:
Richard Henderson9f6e2b42021-04-28 16:37:02 -0700779 error(lineno, f'field {t} too large')
Richard Henderson568ae7e2017-12-07 12:44:09 -0800780 f = Field(sign, po, le)
781 subs.append(f)
782 width += le
783
784 if width > insnwidth:
785 error(lineno, 'field too large')
Richard Henderson94597b62019-07-22 17:02:56 -0700786 if len(subs) == 0:
787 if func:
788 f = ParameterField(func)
789 else:
790 error(lineno, 'field with no value')
Richard Henderson568ae7e2017-12-07 12:44:09 -0800791 else:
Richard Henderson94597b62019-07-22 17:02:56 -0700792 if len(subs) == 1:
793 f = subs[0]
794 else:
795 mask = 0
796 for s in subs:
797 if mask & s.mask:
798 error(lineno, 'field components overlap')
799 mask |= s.mask
800 f = MultiField(subs, mask)
801 if func:
802 f = FunctionField(func, f)
Richard Henderson568ae7e2017-12-07 12:44:09 -0800803
804 if name in fields:
805 error(lineno, 'duplicate field', name)
806 fields[name] = f
807# end parse_field
808
809
810def parse_arguments(lineno, name, toks):
811 """Parse one argument set from TOKS at LINENO"""
812 global arguments
Richard Hendersonacfdd232020-09-03 12:23:34 -0700813 global re_C_ident
Richard Hendersonc6920792019-08-09 08:12:50 -0700814 global anyextern
Richard Henderson568ae7e2017-12-07 12:44:09 -0800815
816 flds = []
Richard Hendersonaf93cca2021-04-29 10:03:59 -0700817 types = []
Richard Hendersonabd04f92018-10-23 10:26:25 +0100818 extern = False
Richard Hendersonaf93cca2021-04-29 10:03:59 -0700819 for n in toks:
820 if re.fullmatch('!extern', n):
Richard Hendersonabd04f92018-10-23 10:26:25 +0100821 extern = True
Richard Hendersonc6920792019-08-09 08:12:50 -0700822 anyextern = True
Richard Hendersonabd04f92018-10-23 10:26:25 +0100823 continue
Richard Hendersonaf93cca2021-04-29 10:03:59 -0700824 if re.fullmatch(re_C_ident + ':' + re_C_ident, n):
825 (n, t) = n.split(':')
826 elif re.fullmatch(re_C_ident, n):
827 t = 'int'
828 else:
829 error(lineno, f'invalid argument set token "{n}"')
830 if n in flds:
831 error(lineno, f'duplicate argument "{n}"')
832 flds.append(n)
833 types.append(t)
Richard Henderson568ae7e2017-12-07 12:44:09 -0800834
835 if name in arguments:
836 error(lineno, 'duplicate argument set', name)
Richard Hendersonaf93cca2021-04-29 10:03:59 -0700837 arguments[name] = Arguments(name, flds, types, extern)
Richard Henderson568ae7e2017-12-07 12:44:09 -0800838# end parse_arguments
839
840
841def lookup_field(lineno, name):
842 global fields
843 if name in fields:
844 return fields[name]
845 error(lineno, 'undefined field', name)
846
847
848def add_field(lineno, flds, new_name, f):
849 if new_name in flds:
850 error(lineno, 'duplicate field', new_name)
851 flds[new_name] = f
852 return flds
853
854
855def add_field_byname(lineno, flds, new_name, old_name):
856 return add_field(lineno, flds, new_name, lookup_field(lineno, old_name))
857
858
859def infer_argument_set(flds):
860 global arguments
Richard Hendersonabd04f92018-10-23 10:26:25 +0100861 global decode_function
Richard Henderson568ae7e2017-12-07 12:44:09 -0800862
863 for arg in arguments.values():
Richard Hendersonaf93cca2021-04-29 10:03:59 -0700864 if eq_fields_for_args(flds, arg):
Richard Henderson568ae7e2017-12-07 12:44:09 -0800865 return arg
866
Richard Hendersonabd04f92018-10-23 10:26:25 +0100867 name = decode_function + str(len(arguments))
Richard Hendersonaf93cca2021-04-29 10:03:59 -0700868 arg = Arguments(name, flds.keys(), ['int'] * len(flds), False)
Richard Henderson568ae7e2017-12-07 12:44:09 -0800869 arguments[name] = arg
870 return arg
871
872
Richard Henderson17560e92019-01-30 18:01:29 -0800873def infer_format(arg, fieldmask, flds, width):
Richard Henderson568ae7e2017-12-07 12:44:09 -0800874 global arguments
875 global formats
Richard Hendersonabd04f92018-10-23 10:26:25 +0100876 global decode_function
Richard Henderson568ae7e2017-12-07 12:44:09 -0800877
878 const_flds = {}
879 var_flds = {}
880 for n, c in flds.items():
881 if c is ConstField:
882 const_flds[n] = c
883 else:
884 var_flds[n] = c
885
886 # Look for an existing format with the same argument set and fields
887 for fmt in formats.values():
888 if arg and fmt.base != arg:
889 continue
890 if fieldmask != fmt.fieldmask:
891 continue
Richard Henderson17560e92019-01-30 18:01:29 -0800892 if width != fmt.width:
893 continue
Richard Henderson568ae7e2017-12-07 12:44:09 -0800894 if not eq_fields_for_fmts(flds, fmt.fields):
895 continue
896 return (fmt, const_flds)
897
Richard Hendersonabd04f92018-10-23 10:26:25 +0100898 name = decode_function + '_Fmt_' + str(len(formats))
Richard Henderson568ae7e2017-12-07 12:44:09 -0800899 if not arg:
900 arg = infer_argument_set(flds)
901
Richard Henderson17560e92019-01-30 18:01:29 -0800902 fmt = Format(name, 0, arg, 0, 0, 0, fieldmask, var_flds, width)
Richard Henderson568ae7e2017-12-07 12:44:09 -0800903 formats[name] = fmt
904
905 return (fmt, const_flds)
906# end infer_format
907
908
Richard Henderson08561fc2020-05-17 10:14:11 -0700909def parse_generic(lineno, parent_pat, name, toks):
Richard Henderson568ae7e2017-12-07 12:44:09 -0800910 """Parse one instruction format from TOKS at LINENO"""
911 global fields
912 global arguments
913 global formats
Richard Henderson0eff2df2019-02-23 11:35:36 -0800914 global allpatterns
Richard Hendersonacfdd232020-09-03 12:23:34 -0700915 global re_arg_ident
916 global re_fld_ident
917 global re_fmt_ident
918 global re_C_ident
Richard Henderson568ae7e2017-12-07 12:44:09 -0800919 global insnwidth
920 global insnmask
Richard Henderson17560e92019-01-30 18:01:29 -0800921 global variablewidth
Richard Henderson568ae7e2017-12-07 12:44:09 -0800922
Richard Henderson08561fc2020-05-17 10:14:11 -0700923 is_format = parent_pat is None
924
Richard Henderson568ae7e2017-12-07 12:44:09 -0800925 fixedmask = 0
926 fixedbits = 0
927 undefmask = 0
928 width = 0
929 flds = {}
930 arg = None
931 fmt = None
932 for t in toks:
zhaolichang65fdb3c2020-09-17 15:50:23 +0800933 # '&Foo' gives a format an explicit argument set.
Richard Hendersonacfdd232020-09-03 12:23:34 -0700934 if re.fullmatch(re_arg_ident, t):
Richard Henderson568ae7e2017-12-07 12:44:09 -0800935 tt = t[1:]
936 if arg:
937 error(lineno, 'multiple argument sets')
938 if tt in arguments:
939 arg = arguments[tt]
940 else:
941 error(lineno, 'undefined argument set', t)
942 continue
943
944 # '@Foo' gives a pattern an explicit format.
Richard Hendersonacfdd232020-09-03 12:23:34 -0700945 if re.fullmatch(re_fmt_ident, t):
Richard Henderson568ae7e2017-12-07 12:44:09 -0800946 tt = t[1:]
947 if fmt:
948 error(lineno, 'multiple formats')
949 if tt in formats:
950 fmt = formats[tt]
951 else:
952 error(lineno, 'undefined format', t)
953 continue
954
955 # '%Foo' imports a field.
Richard Hendersonacfdd232020-09-03 12:23:34 -0700956 if re.fullmatch(re_fld_ident, t):
Richard Henderson568ae7e2017-12-07 12:44:09 -0800957 tt = t[1:]
958 flds = add_field_byname(lineno, flds, tt, tt)
959 continue
960
961 # 'Foo=%Bar' imports a field with a different name.
Richard Hendersonacfdd232020-09-03 12:23:34 -0700962 if re.fullmatch(re_C_ident + '=' + re_fld_ident, t):
Richard Henderson568ae7e2017-12-07 12:44:09 -0800963 (fname, iname) = t.split('=%')
964 flds = add_field_byname(lineno, flds, fname, iname)
965 continue
966
967 # 'Foo=number' sets an argument field to a constant value
Richard Hendersonacfdd232020-09-03 12:23:34 -0700968 if re.fullmatch(re_C_ident + '=[+-]?[0-9]+', t):
Richard Henderson568ae7e2017-12-07 12:44:09 -0800969 (fname, value) = t.split('=')
970 value = int(value)
971 flds = add_field(lineno, flds, fname, ConstField(value))
972 continue
973
974 # Pattern of 0s, 1s, dots and dashes indicate required zeros,
975 # required ones, or dont-cares.
John Snow2d110c12020-05-13 23:52:30 -0400976 if re.fullmatch('[01.-]+', t):
Richard Henderson568ae7e2017-12-07 12:44:09 -0800977 shift = len(t)
978 fms = t.replace('0', '1')
979 fms = fms.replace('.', '0')
980 fms = fms.replace('-', '0')
981 fbs = t.replace('.', '0')
982 fbs = fbs.replace('-', '0')
983 ubm = t.replace('1', '0')
984 ubm = ubm.replace('.', '0')
985 ubm = ubm.replace('-', '1')
986 fms = int(fms, 2)
987 fbs = int(fbs, 2)
988 ubm = int(ubm, 2)
989 fixedbits = (fixedbits << shift) | fbs
990 fixedmask = (fixedmask << shift) | fms
991 undefmask = (undefmask << shift) | ubm
992 # Otherwise, fieldname:fieldwidth
Richard Hendersonacfdd232020-09-03 12:23:34 -0700993 elif re.fullmatch(re_C_ident + ':s?[0-9]+', t):
Richard Henderson568ae7e2017-12-07 12:44:09 -0800994 (fname, flen) = t.split(':')
995 sign = False
996 if flen[0] == 's':
997 sign = True
998 flen = flen[1:]
999 shift = int(flen, 10)
Richard Henderson2decfc92019-03-05 15:34:41 -08001000 if shift + width > insnwidth:
Richard Henderson9f6e2b42021-04-28 16:37:02 -07001001 error(lineno, f'field {fname} exceeds insnwidth')
Richard Henderson568ae7e2017-12-07 12:44:09 -08001002 f = Field(sign, insnwidth - width - shift, shift)
1003 flds = add_field(lineno, flds, fname, f)
1004 fixedbits <<= shift
1005 fixedmask <<= shift
1006 undefmask <<= shift
1007 else:
Richard Henderson9f6e2b42021-04-28 16:37:02 -07001008 error(lineno, f'invalid token "{t}"')
Richard Henderson568ae7e2017-12-07 12:44:09 -08001009 width += shift
1010
Richard Henderson17560e92019-01-30 18:01:29 -08001011 if variablewidth and width < insnwidth and width % 8 == 0:
1012 shift = insnwidth - width
1013 fixedbits <<= shift
1014 fixedmask <<= shift
1015 undefmask <<= shift
1016 undefmask |= (1 << shift) - 1
1017
Richard Henderson568ae7e2017-12-07 12:44:09 -08001018 # We should have filled in all of the bits of the instruction.
Richard Henderson17560e92019-01-30 18:01:29 -08001019 elif not (is_format and width == 0) and width != insnwidth:
Richard Henderson9f6e2b42021-04-28 16:37:02 -07001020 error(lineno, f'definition has {width} bits')
Richard Henderson568ae7e2017-12-07 12:44:09 -08001021
zhaolichang65fdb3c2020-09-17 15:50:23 +08001022 # Do not check for fields overlapping fields; one valid usage
Richard Henderson568ae7e2017-12-07 12:44:09 -08001023 # is to be able to duplicate fields via import.
1024 fieldmask = 0
1025 for f in flds.values():
1026 fieldmask |= f.mask
1027
1028 # Fix up what we've parsed to match either a format or a pattern.
1029 if is_format:
1030 # Formats cannot reference formats.
1031 if fmt:
1032 error(lineno, 'format referencing format')
1033 # If an argument set is given, then there should be no fields
1034 # without a place to store it.
1035 if arg:
1036 for f in flds.keys():
1037 if f not in arg.fields:
Richard Henderson9f6e2b42021-04-28 16:37:02 -07001038 error(lineno, f'field {f} not in argument set {arg.name}')
Richard Henderson568ae7e2017-12-07 12:44:09 -08001039 else:
1040 arg = infer_argument_set(flds)
1041 if name in formats:
1042 error(lineno, 'duplicate format name', name)
1043 fmt = Format(name, lineno, arg, fixedbits, fixedmask,
Richard Henderson17560e92019-01-30 18:01:29 -08001044 undefmask, fieldmask, flds, width)
Richard Henderson568ae7e2017-12-07 12:44:09 -08001045 formats[name] = fmt
1046 else:
1047 # Patterns can reference a format ...
1048 if fmt:
1049 # ... but not an argument simultaneously
1050 if arg:
1051 error(lineno, 'pattern specifies both format and argument set')
1052 if fixedmask & fmt.fixedmask:
1053 error(lineno, 'pattern fixed bits overlap format fixed bits')
Richard Henderson17560e92019-01-30 18:01:29 -08001054 if width != fmt.width:
1055 error(lineno, 'pattern uses format of different width')
Richard Henderson568ae7e2017-12-07 12:44:09 -08001056 fieldmask |= fmt.fieldmask
1057 fixedbits |= fmt.fixedbits
1058 fixedmask |= fmt.fixedmask
1059 undefmask |= fmt.undefmask
1060 else:
Richard Henderson17560e92019-01-30 18:01:29 -08001061 (fmt, flds) = infer_format(arg, fieldmask, flds, width)
Richard Henderson568ae7e2017-12-07 12:44:09 -08001062 arg = fmt.base
1063 for f in flds.keys():
1064 if f not in arg.fields:
Richard Henderson9f6e2b42021-04-28 16:37:02 -07001065 error(lineno, f'field {f} not in argument set {arg.name}')
Richard Henderson568ae7e2017-12-07 12:44:09 -08001066 if f in fmt.fields.keys():
Richard Henderson9f6e2b42021-04-28 16:37:02 -07001067 error(lineno, f'field {f} set by format and pattern')
Richard Henderson568ae7e2017-12-07 12:44:09 -08001068 for f in arg.fields:
1069 if f not in flds.keys() and f not in fmt.fields.keys():
Richard Henderson9f6e2b42021-04-28 16:37:02 -07001070 error(lineno, f'field {f} not initialized')
Richard Henderson568ae7e2017-12-07 12:44:09 -08001071 pat = Pattern(name, lineno, fmt, fixedbits, fixedmask,
Richard Henderson17560e92019-01-30 18:01:29 -08001072 undefmask, fieldmask, flds, width)
Richard Henderson08561fc2020-05-17 10:14:11 -07001073 parent_pat.pats.append(pat)
Richard Henderson0eff2df2019-02-23 11:35:36 -08001074 allpatterns.append(pat)
Richard Henderson568ae7e2017-12-07 12:44:09 -08001075
1076 # Validate the masks that we have assembled.
1077 if fieldmask & fixedmask:
Richard Hendersonc7cefe62021-04-28 16:27:56 -07001078 error(lineno, 'fieldmask overlaps fixedmask ',
1079 f'({whex(fieldmask)} & {whex(fixedmask)})')
Richard Henderson568ae7e2017-12-07 12:44:09 -08001080 if fieldmask & undefmask:
Richard Hendersonc7cefe62021-04-28 16:27:56 -07001081 error(lineno, 'fieldmask overlaps undefmask ',
1082 f'({whex(fieldmask)} & {whex(undefmask)})')
Richard Henderson568ae7e2017-12-07 12:44:09 -08001083 if fixedmask & undefmask:
Richard Hendersonc7cefe62021-04-28 16:27:56 -07001084 error(lineno, 'fixedmask overlaps undefmask ',
1085 f'({whex(fixedmask)} & {whex(undefmask)})')
Richard Henderson568ae7e2017-12-07 12:44:09 -08001086 if not is_format:
1087 allbits = fieldmask | fixedmask | undefmask
1088 if allbits != insnmask:
Richard Hendersonc7cefe62021-04-28 16:27:56 -07001089 error(lineno, 'bits left unspecified ',
1090 f'({whex(allbits ^ insnmask)})')
Richard Henderson568ae7e2017-12-07 12:44:09 -08001091# end parse_general
1092
Richard Henderson0eff2df2019-02-23 11:35:36 -08001093
Richard Henderson08561fc2020-05-17 10:14:11 -07001094def parse_file(f, parent_pat):
Richard Henderson568ae7e2017-12-07 12:44:09 -08001095 """Parse all of the patterns within a file"""
Richard Hendersonacfdd232020-09-03 12:23:34 -07001096 global re_arg_ident
1097 global re_fld_ident
1098 global re_fmt_ident
1099 global re_pat_ident
Richard Henderson568ae7e2017-12-07 12:44:09 -08001100
1101 # Read all of the lines of the file. Concatenate lines
1102 # ending in backslash; discard empty lines and comments.
1103 toks = []
1104 lineno = 0
Richard Henderson0eff2df2019-02-23 11:35:36 -08001105 nesting = 0
Richard Henderson08561fc2020-05-17 10:14:11 -07001106 nesting_pats = []
Richard Henderson0eff2df2019-02-23 11:35:36 -08001107
Richard Henderson568ae7e2017-12-07 12:44:09 -08001108 for line in f:
1109 lineno += 1
1110
Richard Henderson0eff2df2019-02-23 11:35:36 -08001111 # Expand and strip spaces, to find indent.
1112 line = line.rstrip()
1113 line = line.expandtabs()
1114 len1 = len(line)
1115 line = line.lstrip()
1116 len2 = len(line)
1117
Richard Henderson568ae7e2017-12-07 12:44:09 -08001118 # Discard comments
1119 end = line.find('#')
1120 if end >= 0:
1121 line = line[:end]
1122
1123 t = line.split()
1124 if len(toks) != 0:
1125 # Next line after continuation
1126 toks.extend(t)
Richard Henderson568ae7e2017-12-07 12:44:09 -08001127 else:
Richard Henderson0eff2df2019-02-23 11:35:36 -08001128 # Allow completely blank lines.
1129 if len1 == 0:
1130 continue
1131 indent = len1 - len2
1132 # Empty line due to comment.
1133 if len(t) == 0:
1134 # Indentation must be correct, even for comment lines.
1135 if indent != nesting:
1136 error(lineno, 'indentation ', indent, ' != ', nesting)
1137 continue
1138 start_lineno = lineno
Richard Henderson568ae7e2017-12-07 12:44:09 -08001139 toks = t
1140
1141 # Continuation?
1142 if toks[-1] == '\\':
1143 toks.pop()
1144 continue
1145
Richard Henderson568ae7e2017-12-07 12:44:09 -08001146 name = toks[0]
1147 del toks[0]
1148
Richard Henderson0eff2df2019-02-23 11:35:36 -08001149 # End nesting?
Richard Henderson067e8b02020-05-18 08:45:32 -07001150 if name == '}' or name == ']':
Richard Henderson0eff2df2019-02-23 11:35:36 -08001151 if len(toks) != 0:
1152 error(start_lineno, 'extra tokens after close brace')
Richard Henderson08561fc2020-05-17 10:14:11 -07001153
Richard Henderson067e8b02020-05-18 08:45:32 -07001154 # Make sure { } and [ ] nest properly.
1155 if (name == '}') != isinstance(parent_pat, IncMultiPattern):
1156 error(lineno, 'mismatched close brace')
1157
Richard Henderson08561fc2020-05-17 10:14:11 -07001158 try:
1159 parent_pat = nesting_pats.pop()
1160 except:
Richard Henderson067e8b02020-05-18 08:45:32 -07001161 error(lineno, 'extra close brace')
Richard Henderson08561fc2020-05-17 10:14:11 -07001162
Richard Henderson0eff2df2019-02-23 11:35:36 -08001163 nesting -= 2
1164 if indent != nesting:
Richard Henderson08561fc2020-05-17 10:14:11 -07001165 error(lineno, 'indentation ', indent, ' != ', nesting)
1166
Richard Henderson0eff2df2019-02-23 11:35:36 -08001167 toks = []
1168 continue
1169
1170 # Everything else should have current indentation.
1171 if indent != nesting:
1172 error(start_lineno, 'indentation ', indent, ' != ', nesting)
1173
1174 # Start nesting?
Richard Henderson067e8b02020-05-18 08:45:32 -07001175 if name == '{' or name == '[':
Richard Henderson0eff2df2019-02-23 11:35:36 -08001176 if len(toks) != 0:
1177 error(start_lineno, 'extra tokens after open brace')
Richard Henderson08561fc2020-05-17 10:14:11 -07001178
Richard Henderson067e8b02020-05-18 08:45:32 -07001179 if name == '{':
1180 nested_pat = IncMultiPattern(start_lineno)
1181 else:
1182 nested_pat = ExcMultiPattern(start_lineno)
Richard Henderson08561fc2020-05-17 10:14:11 -07001183 parent_pat.pats.append(nested_pat)
1184 nesting_pats.append(parent_pat)
1185 parent_pat = nested_pat
1186
Richard Henderson0eff2df2019-02-23 11:35:36 -08001187 nesting += 2
1188 toks = []
1189 continue
1190
Richard Henderson568ae7e2017-12-07 12:44:09 -08001191 # Determine the type of object needing to be parsed.
Richard Hendersonacfdd232020-09-03 12:23:34 -07001192 if re.fullmatch(re_fld_ident, name):
Richard Henderson0eff2df2019-02-23 11:35:36 -08001193 parse_field(start_lineno, name[1:], toks)
Richard Hendersonacfdd232020-09-03 12:23:34 -07001194 elif re.fullmatch(re_arg_ident, name):
Richard Henderson0eff2df2019-02-23 11:35:36 -08001195 parse_arguments(start_lineno, name[1:], toks)
Richard Hendersonacfdd232020-09-03 12:23:34 -07001196 elif re.fullmatch(re_fmt_ident, name):
Richard Henderson08561fc2020-05-17 10:14:11 -07001197 parse_generic(start_lineno, None, name[1:], toks)
Richard Hendersonacfdd232020-09-03 12:23:34 -07001198 elif re.fullmatch(re_pat_ident, name):
Richard Henderson08561fc2020-05-17 10:14:11 -07001199 parse_generic(start_lineno, parent_pat, name, toks)
Richard Hendersonacfdd232020-09-03 12:23:34 -07001200 else:
Richard Henderson9f6e2b42021-04-28 16:37:02 -07001201 error(lineno, f'invalid token "{name}"')
Richard Henderson568ae7e2017-12-07 12:44:09 -08001202 toks = []
Richard Henderson067e8b02020-05-18 08:45:32 -07001203
1204 if nesting != 0:
1205 error(lineno, 'missing close brace')
Richard Henderson568ae7e2017-12-07 12:44:09 -08001206# end parse_file
1207
1208
Richard Henderson70e07112019-01-31 11:34:11 -08001209class SizeTree:
1210 """Class representing a node in a size decode tree"""
1211
1212 def __init__(self, m, w):
1213 self.mask = m
1214 self.subs = []
1215 self.base = None
1216 self.width = w
1217
1218 def str1(self, i):
1219 ind = str_indent(i)
Richard Hendersonc7cefe62021-04-28 16:27:56 -07001220 r = ind + whex(self.mask) + ' [\n'
Richard Henderson70e07112019-01-31 11:34:11 -08001221 for (b, s) in self.subs:
Richard Hendersonc7cefe62021-04-28 16:27:56 -07001222 r += ind + f' {whex(b)}:\n'
Richard Henderson70e07112019-01-31 11:34:11 -08001223 r += s.str1(i + 4) + '\n'
1224 r += ind + ']'
1225 return r
1226
1227 def __str__(self):
1228 return self.str1(0)
1229
1230 def output_code(self, i, extracted, outerbits, outermask):
1231 ind = str_indent(i)
1232
1233 # If we need to load more bytes to test, do so now.
1234 if extracted < self.width:
Richard Henderson9f6e2b42021-04-28 16:37:02 -07001235 output(ind, f'insn = {decode_function}_load_bytes',
1236 f'(ctx, insn, {extracted // 8}, {self.width // 8});\n')
Richard Henderson70e07112019-01-31 11:34:11 -08001237 extracted = self.width
1238
1239 # Attempt to aid the compiler in producing compact switch statements.
1240 # If the bits in the mask are contiguous, extract them.
1241 sh = is_contiguous(self.mask)
1242 if sh > 0:
1243 # Propagate SH down into the local functions.
1244 def str_switch(b, sh=sh):
Richard Hendersonc7cefe62021-04-28 16:27:56 -07001245 return f'(insn >> {sh}) & {b >> sh:#x}'
Richard Henderson70e07112019-01-31 11:34:11 -08001246
1247 def str_case(b, sh=sh):
Richard Hendersonc7cefe62021-04-28 16:27:56 -07001248 return hex(b >> sh)
Richard Henderson70e07112019-01-31 11:34:11 -08001249 else:
1250 def str_switch(b):
Richard Hendersonc7cefe62021-04-28 16:27:56 -07001251 return f'insn & {whexC(b)}'
Richard Henderson70e07112019-01-31 11:34:11 -08001252
1253 def str_case(b):
Richard Hendersonc7cefe62021-04-28 16:27:56 -07001254 return whexC(b)
Richard Henderson70e07112019-01-31 11:34:11 -08001255
1256 output(ind, 'switch (', str_switch(self.mask), ') {\n')
1257 for b, s in sorted(self.subs):
1258 innermask = outermask | self.mask
1259 innerbits = outerbits | b
1260 output(ind, 'case ', str_case(b), ':\n')
1261 output(ind, ' /* ',
1262 str_match_bits(innerbits, innermask), ' */\n')
1263 s.output_code(i + 4, extracted, innerbits, innermask)
1264 output(ind, '}\n')
1265 output(ind, 'return insn;\n')
1266# end SizeTree
1267
1268class SizeLeaf:
1269 """Class representing a leaf node in a size decode tree"""
1270
1271 def __init__(self, m, w):
1272 self.mask = m
1273 self.width = w
1274
1275 def str1(self, i):
Richard Hendersonc7cefe62021-04-28 16:27:56 -07001276 return str_indent(i) + whex(self.mask)
Richard Henderson70e07112019-01-31 11:34:11 -08001277
1278 def __str__(self):
1279 return self.str1(0)
1280
1281 def output_code(self, i, extracted, outerbits, outermask):
1282 global decode_function
1283 ind = str_indent(i)
1284
1285 # If we need to load more bytes, do so now.
1286 if extracted < self.width:
Richard Henderson9f6e2b42021-04-28 16:37:02 -07001287 output(ind, f'insn = {decode_function}_load_bytes',
1288 f'(ctx, insn, {extracted // 8}, {self.width // 8});\n')
Richard Henderson70e07112019-01-31 11:34:11 -08001289 extracted = self.width
1290 output(ind, 'return insn;\n')
1291# end SizeLeaf
1292
1293
1294def build_size_tree(pats, width, outerbits, outermask):
1295 global insnwidth
1296
1297 # Collect the mask of bits that are fixed in this width
1298 innermask = 0xff << (insnwidth - width)
1299 innermask &= ~outermask
1300 minwidth = None
1301 onewidth = True
1302 for i in pats:
1303 innermask &= i.fixedmask
1304 if minwidth is None:
1305 minwidth = i.width
1306 elif minwidth != i.width:
1307 onewidth = False;
1308 if minwidth < i.width:
1309 minwidth = i.width
1310
1311 if onewidth:
1312 return SizeLeaf(innermask, minwidth)
1313
1314 if innermask == 0:
1315 if width < minwidth:
1316 return build_size_tree(pats, width + 8, outerbits, outermask)
1317
1318 pnames = []
1319 for p in pats:
1320 pnames.append(p.name + ':' + p.file + ':' + str(p.lineno))
1321 error_with_file(pats[0].file, pats[0].lineno,
Richard Henderson9f6e2b42021-04-28 16:37:02 -07001322 f'overlapping patterns size {width}:', pnames)
Richard Henderson70e07112019-01-31 11:34:11 -08001323
1324 bins = {}
1325 for i in pats:
1326 fb = i.fixedbits & innermask
1327 if fb in bins:
1328 bins[fb].append(i)
1329 else:
1330 bins[fb] = [i]
1331
1332 fullmask = outermask | innermask
1333 lens = sorted(bins.keys())
1334 if len(lens) == 1:
1335 b = lens[0]
1336 return build_size_tree(bins[b], width + 8, b | outerbits, fullmask)
1337
1338 r = SizeTree(innermask, width)
1339 for b, l in bins.items():
1340 s = build_size_tree(l, width, b | outerbits, fullmask)
1341 r.subs.append((b, s))
1342 return r
1343# end build_size_tree
1344
1345
Richard Henderson70e07112019-01-31 11:34:11 -08001346def prop_size(tree):
1347 """Propagate minimum widths up the decode size tree"""
1348
1349 if isinstance(tree, SizeTree):
1350 min = None
1351 for (b, s) in tree.subs:
1352 width = prop_size(s)
1353 if min is None or min > width:
1354 min = width
1355 assert min >= tree.width
1356 tree.width = min
1357 else:
1358 min = tree.width
1359 return min
1360# end prop_size
1361
1362
Richard Henderson568ae7e2017-12-07 12:44:09 -08001363def main():
1364 global arguments
1365 global formats
Richard Henderson0eff2df2019-02-23 11:35:36 -08001366 global allpatterns
Richard Henderson568ae7e2017-12-07 12:44:09 -08001367 global translate_scope
1368 global translate_prefix
1369 global output_fd
1370 global output_file
1371 global input_file
1372 global insnwidth
1373 global insntype
Bastian Koppelmann83d7c402018-03-19 12:58:46 +01001374 global insnmask
Richard Hendersonabd04f92018-10-23 10:26:25 +01001375 global decode_function
Luis Fernando Fujita Pires60c425f2021-04-07 22:18:49 +00001376 global bitop_width
Richard Henderson17560e92019-01-30 18:01:29 -08001377 global variablewidth
Richard Hendersonc6920792019-08-09 08:12:50 -07001378 global anyextern
Richard Henderson9b5acc52023-05-25 18:04:05 -07001379 global testforerror
Richard Henderson568ae7e2017-12-07 12:44:09 -08001380
Richard Henderson568ae7e2017-12-07 12:44:09 -08001381 decode_scope = 'static '
1382
Richard Hendersoncd3e7fc2019-02-23 17:44:31 -08001383 long_opts = ['decode=', 'translate=', 'output=', 'insnwidth=',
Richard Henderson9b5acc52023-05-25 18:04:05 -07001384 'static-decode=', 'varinsnwidth=', 'test-for-error']
Richard Henderson568ae7e2017-12-07 12:44:09 -08001385 try:
Paolo Bonziniabff1ab2020-08-07 12:10:23 +02001386 (opts, args) = getopt.gnu_getopt(sys.argv[1:], 'o:vw:', long_opts)
Richard Henderson568ae7e2017-12-07 12:44:09 -08001387 except getopt.GetoptError as err:
1388 error(0, err)
1389 for o, a in opts:
1390 if o in ('-o', '--output'):
1391 output_file = a
1392 elif o == '--decode':
1393 decode_function = a
1394 decode_scope = ''
Richard Hendersoncd3e7fc2019-02-23 17:44:31 -08001395 elif o == '--static-decode':
1396 decode_function = a
Richard Henderson568ae7e2017-12-07 12:44:09 -08001397 elif o == '--translate':
1398 translate_prefix = a
1399 translate_scope = ''
Richard Henderson17560e92019-01-30 18:01:29 -08001400 elif o in ('-w', '--insnwidth', '--varinsnwidth'):
1401 if o == '--varinsnwidth':
1402 variablewidth = True
Richard Henderson568ae7e2017-12-07 12:44:09 -08001403 insnwidth = int(a)
1404 if insnwidth == 16:
1405 insntype = 'uint16_t'
1406 insnmask = 0xffff
Luis Fernando Fujita Pires60c425f2021-04-07 22:18:49 +00001407 elif insnwidth == 64:
1408 insntype = 'uint64_t'
1409 insnmask = 0xffffffffffffffff
1410 bitop_width = 64
Richard Henderson568ae7e2017-12-07 12:44:09 -08001411 elif insnwidth != 32:
1412 error(0, 'cannot handle insns of width', insnwidth)
Richard Henderson9b5acc52023-05-25 18:04:05 -07001413 elif o == '--test-for-error':
1414 testforerror = True
Richard Henderson568ae7e2017-12-07 12:44:09 -08001415 else:
1416 assert False, 'unhandled option'
1417
1418 if len(args) < 1:
1419 error(0, 'missing input file')
Richard Henderson08561fc2020-05-17 10:14:11 -07001420
1421 toppat = ExcMultiPattern(0)
1422
Richard Henderson6699ae62018-10-26 14:59:43 +01001423 for filename in args:
1424 input_file = filename
Philippe Mathieu-Daudé4caceca2021-01-10 01:02:40 +01001425 f = open(filename, 'rt', encoding='utf-8')
Richard Henderson08561fc2020-05-17 10:14:11 -07001426 parse_file(f, toppat)
Richard Henderson6699ae62018-10-26 14:59:43 +01001427 f.close()
Richard Henderson568ae7e2017-12-07 12:44:09 -08001428
Richard Henderson08561fc2020-05-17 10:14:11 -07001429 # We do not want to compute masks for toppat, because those masks
1430 # are used as a starting point for build_tree. For toppat, we must
1431 # insist that decode begins from naught.
1432 for i in toppat.pats:
1433 i.prop_masks()
Richard Henderson70e07112019-01-31 11:34:11 -08001434
Richard Henderson08561fc2020-05-17 10:14:11 -07001435 toppat.build_tree()
1436 toppat.prop_format()
1437
1438 if variablewidth:
1439 for i in toppat.pats:
1440 i.prop_width()
1441 stree = build_size_tree(toppat.pats, 8, 0, 0)
1442 prop_size(stree)
Richard Henderson568ae7e2017-12-07 12:44:09 -08001443
1444 if output_file:
Philippe Mathieu-Daudé4caceca2021-01-10 01:02:40 +01001445 output_fd = open(output_file, 'wt', encoding='utf-8')
Richard Henderson568ae7e2017-12-07 12:44:09 -08001446 else:
Philippe Mathieu-Daudé4caceca2021-01-10 01:02:40 +01001447 output_fd = io.TextIOWrapper(sys.stdout.buffer,
1448 encoding=sys.stdout.encoding,
1449 errors="ignore")
Richard Henderson568ae7e2017-12-07 12:44:09 -08001450
1451 output_autogen()
1452 for n in sorted(arguments.keys()):
1453 f = arguments[n]
1454 f.output_def()
1455
1456 # A single translate function can be invoked for different patterns.
1457 # Make sure that the argument sets are the same, and declare the
1458 # function only once.
Richard Hendersonc6920792019-08-09 08:12:50 -07001459 #
1460 # If we're sharing formats, we're likely also sharing trans_* functions,
1461 # but we can't tell which ones. Prevent issues from the compiler by
1462 # suppressing redundant declaration warnings.
1463 if anyextern:
Thomas Huth7aa12aa2020-07-08 20:19:44 +02001464 output("#pragma GCC diagnostic push\n",
1465 "#pragma GCC diagnostic ignored \"-Wredundant-decls\"\n",
1466 "#ifdef __clang__\n"
Richard Hendersonc6920792019-08-09 08:12:50 -07001467 "# pragma GCC diagnostic ignored \"-Wtypedef-redefinition\"\n",
Richard Hendersonc6920792019-08-09 08:12:50 -07001468 "#endif\n\n")
1469
Richard Henderson568ae7e2017-12-07 12:44:09 -08001470 out_pats = {}
Richard Henderson0eff2df2019-02-23 11:35:36 -08001471 for i in allpatterns:
Richard Henderson568ae7e2017-12-07 12:44:09 -08001472 if i.name in out_pats:
1473 p = out_pats[i.name]
1474 if i.base.base != p.base.base:
1475 error(0, i.name, ' has conflicting argument sets')
1476 else:
1477 i.output_decl()
1478 out_pats[i.name] = i
1479 output('\n')
1480
Richard Hendersonc6920792019-08-09 08:12:50 -07001481 if anyextern:
Thomas Huth7aa12aa2020-07-08 20:19:44 +02001482 output("#pragma GCC diagnostic pop\n\n")
Richard Hendersonc6920792019-08-09 08:12:50 -07001483
Richard Henderson568ae7e2017-12-07 12:44:09 -08001484 for n in sorted(formats.keys()):
1485 f = formats[n]
1486 f.output_extract()
1487
1488 output(decode_scope, 'bool ', decode_function,
1489 '(DisasContext *ctx, ', insntype, ' insn)\n{\n')
1490
1491 i4 = str_indent(4)
Richard Henderson568ae7e2017-12-07 12:44:09 -08001492
Richard Henderson82bfac12019-02-27 21:37:32 -08001493 if len(allpatterns) != 0:
1494 output(i4, 'union {\n')
1495 for n in sorted(arguments.keys()):
1496 f = arguments[n]
1497 output(i4, i4, f.struct_name(), ' f_', f.name, ';\n')
1498 output(i4, '} u;\n\n')
Richard Henderson08561fc2020-05-17 10:14:11 -07001499 toppat.output_code(4, False, 0, 0)
Richard Henderson82bfac12019-02-27 21:37:32 -08001500
Richard Hendersoneb6b87f2019-02-23 08:57:46 -08001501 output(i4, 'return false;\n')
Richard Henderson568ae7e2017-12-07 12:44:09 -08001502 output('}\n')
1503
Richard Henderson70e07112019-01-31 11:34:11 -08001504 if variablewidth:
1505 output('\n', decode_scope, insntype, ' ', decode_function,
1506 '_load(DisasContext *ctx)\n{\n',
1507 ' ', insntype, ' insn = 0;\n\n')
1508 stree.output_code(4, 0, 0, 0)
1509 output('}\n')
1510
Richard Henderson568ae7e2017-12-07 12:44:09 -08001511 if output_file:
1512 output_fd.close()
Richard Henderson9b5acc52023-05-25 18:04:05 -07001513 exit(1 if testforerror else 0)
Richard Henderson568ae7e2017-12-07 12:44:09 -08001514# end main
1515
1516
1517if __name__ == '__main__':
1518 main()