blob: 4637b633e704be81d2a048540e516258d5805ff5 [file] [log] [blame]
Philippe Mathieu-Daudé3d004a32020-01-30 17:32:25 +01001#!/usr/bin/env python3
Richard Henderson568ae7e2017-12-07 12:44:09 -08002# Copyright (c) 2018 Linaro Limited
3#
4# This library is free software; you can redistribute it and/or
5# modify it under the terms of the GNU Lesser General Public
6# License as published by the Free Software Foundation; either
Chetan Pantd6ea4232020-10-23 12:33:53 +00007# version 2.1 of the License, or (at your option) any later version.
Richard Henderson568ae7e2017-12-07 12:44:09 -08008#
9# This library is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12# Lesser General Public License for more details.
13#
14# You should have received a copy of the GNU Lesser General Public
15# License along with this library; if not, see <http://www.gnu.org/licenses/>.
16#
17
18#
19# Generate a decoding tree from a specification file.
Richard Henderson3fdbf5d2019-02-23 13:00:10 -080020# See the syntax and semantics in docs/devel/decodetree.rst.
Richard Henderson568ae7e2017-12-07 12:44:09 -080021#
22
Philippe Mathieu-Daudé4caceca2021-01-10 01:02:40 +010023import io
Richard Henderson568ae7e2017-12-07 12:44:09 -080024import os
25import re
26import sys
27import getopt
Richard Henderson568ae7e2017-12-07 12:44:09 -080028
29insnwidth = 32
30insnmask = 0xffffffff
Richard Henderson17560e92019-01-30 18:01:29 -080031variablewidth = False
Richard Henderson568ae7e2017-12-07 12:44:09 -080032fields = {}
33arguments = {}
34formats = {}
Richard Henderson0eff2df2019-02-23 11:35:36 -080035allpatterns = []
Richard Hendersonc6920792019-08-09 08:12:50 -070036anyextern = False
Richard Henderson568ae7e2017-12-07 12:44:09 -080037
38translate_prefix = 'trans'
39translate_scope = 'static '
40input_file = ''
41output_file = None
42output_fd = None
43insntype = 'uint32_t'
Richard Hendersonabd04f92018-10-23 10:26:25 +010044decode_function = 'decode'
Richard Henderson568ae7e2017-12-07 12:44:09 -080045
Richard Hendersonacfdd232020-09-03 12:23:34 -070046# An identifier for C.
47re_C_ident = '[a-zA-Z][a-zA-Z0-9_]*'
Richard Henderson568ae7e2017-12-07 12:44:09 -080048
Richard Hendersonacfdd232020-09-03 12:23:34 -070049# Identifiers for Arguments, Fields, Formats and Patterns.
50re_arg_ident = '&[a-zA-Z0-9_]*'
51re_fld_ident = '%[a-zA-Z0-9_]*'
52re_fmt_ident = '@[a-zA-Z0-9_]*'
53re_pat_ident = '[a-zA-Z0-9_]*'
Richard Henderson568ae7e2017-12-07 12:44:09 -080054
Richard Henderson6699ae62018-10-26 14:59:43 +010055def error_with_file(file, lineno, *args):
Richard Henderson568ae7e2017-12-07 12:44:09 -080056 """Print an error message from file:line and args and exit."""
57 global output_file
58 global output_fd
59
Richard Henderson2fd51b12020-05-15 14:48:54 -070060 prefix = ''
61 if file:
62 prefix += '{0}:'.format(file)
Richard Henderson568ae7e2017-12-07 12:44:09 -080063 if lineno:
Richard Henderson2fd51b12020-05-15 14:48:54 -070064 prefix += '{0}:'.format(lineno)
65 if prefix:
66 prefix += ' '
67 print(prefix, end='error: ', file=sys.stderr)
68 print(*args, file=sys.stderr)
69
Richard Henderson568ae7e2017-12-07 12:44:09 -080070 if output_file and output_fd:
71 output_fd.close()
72 os.remove(output_file)
73 exit(1)
Richard Henderson2fd51b12020-05-15 14:48:54 -070074# end error_with_file
75
Richard Henderson568ae7e2017-12-07 12:44:09 -080076
Richard Henderson6699ae62018-10-26 14:59:43 +010077def error(lineno, *args):
Richard Henderson2fd51b12020-05-15 14:48:54 -070078 error_with_file(input_file, lineno, *args)
79# end error
80
Richard Henderson568ae7e2017-12-07 12:44:09 -080081
82def output(*args):
83 global output_fd
84 for a in args:
85 output_fd.write(a)
86
87
Richard Henderson568ae7e2017-12-07 12:44:09 -080088def output_autogen():
89 output('/* This file is autogenerated by scripts/decodetree.py. */\n\n')
90
91
92def str_indent(c):
93 """Return a string with C spaces"""
94 return ' ' * c
95
96
97def str_fields(fields):
zhaolichang65fdb3c2020-09-17 15:50:23 +080098 """Return a string uniquely identifying FIELDS"""
Richard Henderson568ae7e2017-12-07 12:44:09 -080099 r = ''
100 for n in sorted(fields.keys()):
101 r += '_' + n
102 return r[1:]
103
104
105def str_match_bits(bits, mask):
106 """Return a string pretty-printing BITS/MASK"""
107 global insnwidth
108
109 i = 1 << (insnwidth - 1)
110 space = 0x01010100
111 r = ''
112 while i != 0:
113 if i & mask:
114 if i & bits:
115 r += '1'
116 else:
117 r += '0'
118 else:
119 r += '.'
120 if i & space:
121 r += ' '
122 i >>= 1
123 return r
124
125
126def is_pow2(x):
127 """Return true iff X is equal to a power of 2."""
128 return (x & (x - 1)) == 0
129
130
131def ctz(x):
132 """Return the number of times 2 factors into X."""
Richard Hendersonb44b3442020-05-16 13:15:02 -0700133 assert x != 0
Richard Henderson568ae7e2017-12-07 12:44:09 -0800134 r = 0
135 while ((x >> r) & 1) == 0:
136 r += 1
137 return r
138
139
140def is_contiguous(bits):
Richard Hendersonb44b3442020-05-16 13:15:02 -0700141 if bits == 0:
142 return -1
Richard Henderson568ae7e2017-12-07 12:44:09 -0800143 shift = ctz(bits)
144 if is_pow2((bits >> shift) + 1):
145 return shift
146 else:
147 return -1
148
149
150def eq_fields_for_args(flds_a, flds_b):
151 if len(flds_a) != len(flds_b):
152 return False
153 for k, a in flds_a.items():
154 if k not in flds_b:
155 return False
156 return True
157
158
159def eq_fields_for_fmts(flds_a, flds_b):
160 if len(flds_a) != len(flds_b):
161 return False
162 for k, a in flds_a.items():
163 if k not in flds_b:
164 return False
165 b = flds_b[k]
166 if a.__class__ != b.__class__ or a != b:
167 return False
168 return True
169
170
171class Field:
172 """Class representing a simple instruction field"""
173 def __init__(self, sign, pos, len):
174 self.sign = sign
175 self.pos = pos
176 self.len = len
177 self.mask = ((1 << len) - 1) << pos
178
179 def __str__(self):
180 if self.sign:
181 s = 's'
182 else:
183 s = ''
Cleber Rosacbcdf1a2018-10-04 12:18:50 -0400184 return str(self.pos) + ':' + s + str(self.len)
Richard Henderson568ae7e2017-12-07 12:44:09 -0800185
186 def str_extract(self):
187 if self.sign:
188 extr = 'sextract32'
189 else:
190 extr = 'extract32'
191 return '{0}(insn, {1}, {2})'.format(extr, self.pos, self.len)
192
193 def __eq__(self, other):
Richard Henderson2c7d4422019-06-11 16:39:41 +0100194 return self.sign == other.sign and self.mask == other.mask
Richard Henderson568ae7e2017-12-07 12:44:09 -0800195
196 def __ne__(self, other):
197 return not self.__eq__(other)
198# end Field
199
200
201class MultiField:
202 """Class representing a compound instruction field"""
203 def __init__(self, subs, mask):
204 self.subs = subs
205 self.sign = subs[0].sign
206 self.mask = mask
207
208 def __str__(self):
209 return str(self.subs)
210
211 def str_extract(self):
212 ret = '0'
213 pos = 0
214 for f in reversed(self.subs):
215 if pos == 0:
216 ret = f.str_extract()
217 else:
218 ret = 'deposit32({0}, {1}, {2}, {3})' \
219 .format(ret, pos, 32 - pos, f.str_extract())
220 pos += f.len
221 return ret
222
223 def __ne__(self, other):
224 if len(self.subs) != len(other.subs):
225 return True
226 for a, b in zip(self.subs, other.subs):
227 if a.__class__ != b.__class__ or a != b:
228 return True
229 return False
230
231 def __eq__(self, other):
232 return not self.__ne__(other)
233# end MultiField
234
235
236class ConstField:
237 """Class representing an argument field with constant value"""
238 def __init__(self, value):
239 self.value = value
240 self.mask = 0
241 self.sign = value < 0
242
243 def __str__(self):
244 return str(self.value)
245
246 def str_extract(self):
247 return str(self.value)
248
249 def __cmp__(self, other):
250 return self.value - other.value
251# end ConstField
252
253
254class FunctionField:
Richard Henderson94597b62019-07-22 17:02:56 -0700255 """Class representing a field passed through a function"""
Richard Henderson568ae7e2017-12-07 12:44:09 -0800256 def __init__(self, func, base):
257 self.mask = base.mask
258 self.sign = base.sign
259 self.base = base
260 self.func = func
261
262 def __str__(self):
263 return self.func + '(' + str(self.base) + ')'
264
265 def str_extract(self):
Richard Henderson451e4ff2019-03-20 19:21:31 -0700266 return self.func + '(ctx, ' + self.base.str_extract() + ')'
Richard Henderson568ae7e2017-12-07 12:44:09 -0800267
268 def __eq__(self, other):
269 return self.func == other.func and self.base == other.base
270
271 def __ne__(self, other):
272 return not self.__eq__(other)
273# end FunctionField
274
275
Richard Henderson94597b62019-07-22 17:02:56 -0700276class ParameterField:
277 """Class representing a pseudo-field read from a function"""
278 def __init__(self, func):
279 self.mask = 0
280 self.sign = 0
281 self.func = func
282
283 def __str__(self):
284 return self.func
285
286 def str_extract(self):
287 return self.func + '(ctx)'
288
289 def __eq__(self, other):
290 return self.func == other.func
291
292 def __ne__(self, other):
293 return not self.__eq__(other)
294# end ParameterField
295
296
Richard Henderson568ae7e2017-12-07 12:44:09 -0800297class Arguments:
298 """Class representing the extracted fields of a format"""
Richard Hendersonabd04f92018-10-23 10:26:25 +0100299 def __init__(self, nm, flds, extern):
Richard Henderson568ae7e2017-12-07 12:44:09 -0800300 self.name = nm
Richard Hendersonabd04f92018-10-23 10:26:25 +0100301 self.extern = extern
Richard Henderson568ae7e2017-12-07 12:44:09 -0800302 self.fields = sorted(flds)
303
304 def __str__(self):
305 return self.name + ' ' + str(self.fields)
306
307 def struct_name(self):
308 return 'arg_' + self.name
309
310 def output_def(self):
Richard Hendersonabd04f92018-10-23 10:26:25 +0100311 if not self.extern:
312 output('typedef struct {\n')
313 for n in self.fields:
314 output(' int ', n, ';\n')
315 output('} ', self.struct_name(), ';\n\n')
Richard Henderson568ae7e2017-12-07 12:44:09 -0800316# end Arguments
317
318
319class General:
320 """Common code between instruction formats and instruction patterns"""
Richard Henderson17560e92019-01-30 18:01:29 -0800321 def __init__(self, name, lineno, base, fixb, fixm, udfm, fldm, flds, w):
Richard Henderson568ae7e2017-12-07 12:44:09 -0800322 self.name = name
Richard Henderson6699ae62018-10-26 14:59:43 +0100323 self.file = input_file
Richard Henderson568ae7e2017-12-07 12:44:09 -0800324 self.lineno = lineno
325 self.base = base
326 self.fixedbits = fixb
327 self.fixedmask = fixm
328 self.undefmask = udfm
329 self.fieldmask = fldm
330 self.fields = flds
Richard Henderson17560e92019-01-30 18:01:29 -0800331 self.width = w
Richard Henderson568ae7e2017-12-07 12:44:09 -0800332
333 def __str__(self):
Richard Henderson0eff2df2019-02-23 11:35:36 -0800334 return self.name + ' ' + str_match_bits(self.fixedbits, self.fixedmask)
Richard Henderson568ae7e2017-12-07 12:44:09 -0800335
336 def str1(self, i):
337 return str_indent(i) + self.__str__()
338# end General
339
340
341class Format(General):
342 """Class representing an instruction format"""
343
344 def extract_name(self):
Richard Henderson71ecf792019-02-28 14:45:50 -0800345 global decode_function
346 return decode_function + '_extract_' + self.name
Richard Henderson568ae7e2017-12-07 12:44:09 -0800347
348 def output_extract(self):
Richard Henderson451e4ff2019-03-20 19:21:31 -0700349 output('static void ', self.extract_name(), '(DisasContext *ctx, ',
Richard Henderson568ae7e2017-12-07 12:44:09 -0800350 self.base.struct_name(), ' *a, ', insntype, ' insn)\n{\n')
351 for n, f in self.fields.items():
352 output(' a->', n, ' = ', f.str_extract(), ';\n')
353 output('}\n\n')
354# end Format
355
356
357class Pattern(General):
358 """Class representing an instruction pattern"""
359
360 def output_decl(self):
361 global translate_scope
362 global translate_prefix
363 output('typedef ', self.base.base.struct_name(),
364 ' arg_', self.name, ';\n')
Richard Henderson76805592018-03-02 10:45:35 +0000365 output(translate_scope, 'bool ', translate_prefix, '_', self.name,
Richard Henderson3a7be552018-10-23 11:05:27 +0100366 '(DisasContext *ctx, arg_', self.name, ' *a);\n')
Richard Henderson568ae7e2017-12-07 12:44:09 -0800367
368 def output_code(self, i, extracted, outerbits, outermask):
369 global translate_prefix
370 ind = str_indent(i)
371 arg = self.base.base.name
Richard Henderson6699ae62018-10-26 14:59:43 +0100372 output(ind, '/* ', self.file, ':', str(self.lineno), ' */\n')
Richard Henderson568ae7e2017-12-07 12:44:09 -0800373 if not extracted:
Richard Henderson451e4ff2019-03-20 19:21:31 -0700374 output(ind, self.base.extract_name(),
375 '(ctx, &u.f_', arg, ', insn);\n')
Richard Henderson568ae7e2017-12-07 12:44:09 -0800376 for n, f in self.fields.items():
377 output(ind, 'u.f_', arg, '.', n, ' = ', f.str_extract(), ';\n')
Richard Hendersoneb6b87f2019-02-23 08:57:46 -0800378 output(ind, 'if (', translate_prefix, '_', self.name,
379 '(ctx, &u.f_', arg, ')) return true;\n')
Richard Henderson08561fc2020-05-17 10:14:11 -0700380
381 # Normal patterns do not have children.
382 def build_tree(self):
383 return
384 def prop_masks(self):
385 return
386 def prop_format(self):
387 return
388 def prop_width(self):
389 return
390
Richard Henderson568ae7e2017-12-07 12:44:09 -0800391# end Pattern
392
393
Richard Hendersondf630442020-05-16 11:19:45 -0700394class MultiPattern(General):
395 """Class representing a set of instruction patterns"""
396
Richard Henderson08561fc2020-05-17 10:14:11 -0700397 def __init__(self, lineno):
Richard Hendersondf630442020-05-16 11:19:45 -0700398 self.file = input_file
399 self.lineno = lineno
Richard Henderson08561fc2020-05-17 10:14:11 -0700400 self.pats = []
Richard Hendersondf630442020-05-16 11:19:45 -0700401 self.base = None
402 self.fixedbits = 0
403 self.fixedmask = 0
404 self.undefmask = 0
405 self.width = None
406
407 def __str__(self):
408 r = 'group'
409 if self.fixedbits is not None:
410 r += ' ' + str_match_bits(self.fixedbits, self.fixedmask)
411 return r
412
413 def output_decl(self):
414 for p in self.pats:
415 p.output_decl()
Richard Henderson08561fc2020-05-17 10:14:11 -0700416
417 def prop_masks(self):
418 global insnmask
419
420 fixedmask = insnmask
421 undefmask = insnmask
422
423 # Collect fixedmask/undefmask for all of the children.
424 for p in self.pats:
425 p.prop_masks()
426 fixedmask &= p.fixedmask
427 undefmask &= p.undefmask
428
429 # Widen fixedmask until all fixedbits match
430 repeat = True
431 fixedbits = 0
432 while repeat and fixedmask != 0:
433 fixedbits = None
434 for p in self.pats:
435 thisbits = p.fixedbits & fixedmask
436 if fixedbits is None:
437 fixedbits = thisbits
438 elif fixedbits != thisbits:
439 fixedmask &= ~(fixedbits ^ thisbits)
440 break
441 else:
442 repeat = False
443
444 self.fixedbits = fixedbits
445 self.fixedmask = fixedmask
446 self.undefmask = undefmask
447
448 def build_tree(self):
449 for p in self.pats:
450 p.build_tree()
451
452 def prop_format(self):
453 for p in self.pats:
454 p.build_tree()
455
456 def prop_width(self):
457 width = None
458 for p in self.pats:
459 p.prop_width()
460 if width is None:
461 width = p.width
462 elif width != p.width:
463 error_with_file(self.file, self.lineno,
464 'width mismatch in patterns within braces')
465 self.width = width
466
Richard Hendersondf630442020-05-16 11:19:45 -0700467# end MultiPattern
468
469
470class IncMultiPattern(MultiPattern):
Richard Henderson0eff2df2019-02-23 11:35:36 -0800471 """Class representing an overlapping set of instruction patterns"""
472
Richard Henderson0eff2df2019-02-23 11:35:36 -0800473 def output_code(self, i, extracted, outerbits, outermask):
474 global translate_prefix
475 ind = str_indent(i)
476 for p in self.pats:
477 if outermask != p.fixedmask:
478 innermask = p.fixedmask & ~outermask
479 innerbits = p.fixedbits & ~outermask
480 output(ind, 'if ((insn & ',
481 '0x{0:08x}) == 0x{1:08x}'.format(innermask, innerbits),
482 ') {\n')
483 output(ind, ' /* ',
484 str_match_bits(p.fixedbits, p.fixedmask), ' */\n')
485 p.output_code(i + 4, extracted, p.fixedbits, p.fixedmask)
486 output(ind, '}\n')
487 else:
488 p.output_code(i, extracted, p.fixedbits, p.fixedmask)
Richard Henderson040145c2020-05-16 10:50:43 -0700489#end IncMultiPattern
Richard Henderson0eff2df2019-02-23 11:35:36 -0800490
491
Richard Henderson08561fc2020-05-17 10:14:11 -0700492class Tree:
493 """Class representing a node in a decode tree"""
494
495 def __init__(self, fm, tm):
496 self.fixedmask = fm
497 self.thismask = tm
498 self.subs = []
499 self.base = None
500
501 def str1(self, i):
502 ind = str_indent(i)
503 r = '{0}{1:08x}'.format(ind, self.fixedmask)
504 if self.format:
505 r += ' ' + self.format.name
506 r += ' [\n'
507 for (b, s) in self.subs:
508 r += '{0} {1:08x}:\n'.format(ind, b)
509 r += s.str1(i + 4) + '\n'
510 r += ind + ']'
511 return r
512
513 def __str__(self):
514 return self.str1(0)
515
516 def output_code(self, i, extracted, outerbits, outermask):
517 ind = str_indent(i)
518
519 # If we identified all nodes below have the same format,
520 # extract the fields now.
521 if not extracted and self.base:
522 output(ind, self.base.extract_name(),
523 '(ctx, &u.f_', self.base.base.name, ', insn);\n')
524 extracted = True
525
526 # Attempt to aid the compiler in producing compact switch statements.
527 # If the bits in the mask are contiguous, extract them.
528 sh = is_contiguous(self.thismask)
529 if sh > 0:
530 # Propagate SH down into the local functions.
531 def str_switch(b, sh=sh):
532 return '(insn >> {0}) & 0x{1:x}'.format(sh, b >> sh)
533
534 def str_case(b, sh=sh):
535 return '0x{0:x}'.format(b >> sh)
536 else:
537 def str_switch(b):
538 return 'insn & 0x{0:08x}'.format(b)
539
540 def str_case(b):
541 return '0x{0:08x}'.format(b)
542
543 output(ind, 'switch (', str_switch(self.thismask), ') {\n')
544 for b, s in sorted(self.subs):
545 assert (self.thismask & ~s.fixedmask) == 0
546 innermask = outermask | self.thismask
547 innerbits = outerbits | b
548 output(ind, 'case ', str_case(b), ':\n')
549 output(ind, ' /* ',
550 str_match_bits(innerbits, innermask), ' */\n')
551 s.output_code(i + 4, extracted, innerbits, innermask)
Peter Maydell514101c2020-10-19 16:12:52 +0100552 output(ind, ' break;\n')
Richard Henderson08561fc2020-05-17 10:14:11 -0700553 output(ind, '}\n')
554# end Tree
555
556
557class ExcMultiPattern(MultiPattern):
558 """Class representing a non-overlapping set of instruction patterns"""
559
560 def output_code(self, i, extracted, outerbits, outermask):
561 # Defer everything to our decomposed Tree node
562 self.tree.output_code(i, extracted, outerbits, outermask)
563
564 @staticmethod
565 def __build_tree(pats, outerbits, outermask):
566 # Find the intersection of all remaining fixedmask.
567 innermask = ~outermask & insnmask
568 for i in pats:
569 innermask &= i.fixedmask
570
571 if innermask == 0:
572 # Edge condition: One pattern covers the entire insnmask
573 if len(pats) == 1:
574 t = Tree(outermask, innermask)
575 t.subs.append((0, pats[0]))
576 return t
577
578 text = 'overlapping patterns:'
579 for p in pats:
580 text += '\n' + p.file + ':' + str(p.lineno) + ': ' + str(p)
581 error_with_file(pats[0].file, pats[0].lineno, text)
582
583 fullmask = outermask | innermask
584
585 # Sort each element of pats into the bin selected by the mask.
586 bins = {}
587 for i in pats:
588 fb = i.fixedbits & innermask
589 if fb in bins:
590 bins[fb].append(i)
591 else:
592 bins[fb] = [i]
593
594 # We must recurse if any bin has more than one element or if
595 # the single element in the bin has not been fully matched.
596 t = Tree(fullmask, innermask)
597
598 for b, l in bins.items():
599 s = l[0]
600 if len(l) > 1 or s.fixedmask & ~fullmask != 0:
601 s = ExcMultiPattern.__build_tree(l, b | outerbits, fullmask)
602 t.subs.append((b, s))
603
604 return t
605
606 def build_tree(self):
607 super().prop_format()
608 self.tree = self.__build_tree(self.pats, self.fixedbits,
609 self.fixedmask)
610
611 @staticmethod
612 def __prop_format(tree):
613 """Propagate Format objects into the decode tree"""
614
615 # Depth first search.
616 for (b, s) in tree.subs:
617 if isinstance(s, Tree):
618 ExcMultiPattern.__prop_format(s)
619
620 # If all entries in SUBS have the same format, then
621 # propagate that into the tree.
622 f = None
623 for (b, s) in tree.subs:
624 if f is None:
625 f = s.base
626 if f is None:
627 return
628 if f is not s.base:
629 return
630 tree.base = f
631
632 def prop_format(self):
633 super().prop_format()
634 self.__prop_format(self.tree)
635
636# end ExcMultiPattern
637
638
Richard Henderson568ae7e2017-12-07 12:44:09 -0800639def parse_field(lineno, name, toks):
640 """Parse one instruction field from TOKS at LINENO"""
641 global fields
Richard Henderson568ae7e2017-12-07 12:44:09 -0800642 global insnwidth
643
644 # A "simple" field will have only one entry;
645 # a "multifield" will have several.
646 subs = []
647 width = 0
648 func = None
649 for t in toks:
Richard Hendersonacfdd232020-09-03 12:23:34 -0700650 if re.match('^!function=', t):
Richard Henderson568ae7e2017-12-07 12:44:09 -0800651 if func:
652 error(lineno, 'duplicate function')
653 func = t.split('=')
654 func = func[1]
655 continue
656
John Snow2d110c12020-05-13 23:52:30 -0400657 if re.fullmatch('[0-9]+:s[0-9]+', t):
Richard Henderson568ae7e2017-12-07 12:44:09 -0800658 # Signed field extract
659 subtoks = t.split(':s')
660 sign = True
John Snow2d110c12020-05-13 23:52:30 -0400661 elif re.fullmatch('[0-9]+:[0-9]+', t):
Richard Henderson568ae7e2017-12-07 12:44:09 -0800662 # Unsigned field extract
663 subtoks = t.split(':')
664 sign = False
665 else:
666 error(lineno, 'invalid field token "{0}"'.format(t))
667 po = int(subtoks[0])
668 le = int(subtoks[1])
669 if po + le > insnwidth:
670 error(lineno, 'field {0} too large'.format(t))
671 f = Field(sign, po, le)
672 subs.append(f)
673 width += le
674
675 if width > insnwidth:
676 error(lineno, 'field too large')
Richard Henderson94597b62019-07-22 17:02:56 -0700677 if len(subs) == 0:
678 if func:
679 f = ParameterField(func)
680 else:
681 error(lineno, 'field with no value')
Richard Henderson568ae7e2017-12-07 12:44:09 -0800682 else:
Richard Henderson94597b62019-07-22 17:02:56 -0700683 if len(subs) == 1:
684 f = subs[0]
685 else:
686 mask = 0
687 for s in subs:
688 if mask & s.mask:
689 error(lineno, 'field components overlap')
690 mask |= s.mask
691 f = MultiField(subs, mask)
692 if func:
693 f = FunctionField(func, f)
Richard Henderson568ae7e2017-12-07 12:44:09 -0800694
695 if name in fields:
696 error(lineno, 'duplicate field', name)
697 fields[name] = f
698# end parse_field
699
700
701def parse_arguments(lineno, name, toks):
702 """Parse one argument set from TOKS at LINENO"""
703 global arguments
Richard Hendersonacfdd232020-09-03 12:23:34 -0700704 global re_C_ident
Richard Hendersonc6920792019-08-09 08:12:50 -0700705 global anyextern
Richard Henderson568ae7e2017-12-07 12:44:09 -0800706
707 flds = []
Richard Hendersonabd04f92018-10-23 10:26:25 +0100708 extern = False
Richard Henderson568ae7e2017-12-07 12:44:09 -0800709 for t in toks:
John Snow2d110c12020-05-13 23:52:30 -0400710 if re.fullmatch('!extern', t):
Richard Hendersonabd04f92018-10-23 10:26:25 +0100711 extern = True
Richard Hendersonc6920792019-08-09 08:12:50 -0700712 anyextern = True
Richard Hendersonabd04f92018-10-23 10:26:25 +0100713 continue
Richard Hendersonacfdd232020-09-03 12:23:34 -0700714 if not re.fullmatch(re_C_ident, t):
Richard Henderson568ae7e2017-12-07 12:44:09 -0800715 error(lineno, 'invalid argument set token "{0}"'.format(t))
716 if t in flds:
717 error(lineno, 'duplicate argument "{0}"'.format(t))
718 flds.append(t)
719
720 if name in arguments:
721 error(lineno, 'duplicate argument set', name)
Richard Hendersonabd04f92018-10-23 10:26:25 +0100722 arguments[name] = Arguments(name, flds, extern)
Richard Henderson568ae7e2017-12-07 12:44:09 -0800723# end parse_arguments
724
725
726def lookup_field(lineno, name):
727 global fields
728 if name in fields:
729 return fields[name]
730 error(lineno, 'undefined field', name)
731
732
733def add_field(lineno, flds, new_name, f):
734 if new_name in flds:
735 error(lineno, 'duplicate field', new_name)
736 flds[new_name] = f
737 return flds
738
739
740def add_field_byname(lineno, flds, new_name, old_name):
741 return add_field(lineno, flds, new_name, lookup_field(lineno, old_name))
742
743
744def infer_argument_set(flds):
745 global arguments
Richard Hendersonabd04f92018-10-23 10:26:25 +0100746 global decode_function
Richard Henderson568ae7e2017-12-07 12:44:09 -0800747
748 for arg in arguments.values():
749 if eq_fields_for_args(flds, arg.fields):
750 return arg
751
Richard Hendersonabd04f92018-10-23 10:26:25 +0100752 name = decode_function + str(len(arguments))
753 arg = Arguments(name, flds.keys(), False)
Richard Henderson568ae7e2017-12-07 12:44:09 -0800754 arguments[name] = arg
755 return arg
756
757
Richard Henderson17560e92019-01-30 18:01:29 -0800758def infer_format(arg, fieldmask, flds, width):
Richard Henderson568ae7e2017-12-07 12:44:09 -0800759 global arguments
760 global formats
Richard Hendersonabd04f92018-10-23 10:26:25 +0100761 global decode_function
Richard Henderson568ae7e2017-12-07 12:44:09 -0800762
763 const_flds = {}
764 var_flds = {}
765 for n, c in flds.items():
766 if c is ConstField:
767 const_flds[n] = c
768 else:
769 var_flds[n] = c
770
771 # Look for an existing format with the same argument set and fields
772 for fmt in formats.values():
773 if arg and fmt.base != arg:
774 continue
775 if fieldmask != fmt.fieldmask:
776 continue
Richard Henderson17560e92019-01-30 18:01:29 -0800777 if width != fmt.width:
778 continue
Richard Henderson568ae7e2017-12-07 12:44:09 -0800779 if not eq_fields_for_fmts(flds, fmt.fields):
780 continue
781 return (fmt, const_flds)
782
Richard Hendersonabd04f92018-10-23 10:26:25 +0100783 name = decode_function + '_Fmt_' + str(len(formats))
Richard Henderson568ae7e2017-12-07 12:44:09 -0800784 if not arg:
785 arg = infer_argument_set(flds)
786
Richard Henderson17560e92019-01-30 18:01:29 -0800787 fmt = Format(name, 0, arg, 0, 0, 0, fieldmask, var_flds, width)
Richard Henderson568ae7e2017-12-07 12:44:09 -0800788 formats[name] = fmt
789
790 return (fmt, const_flds)
791# end infer_format
792
793
Richard Henderson08561fc2020-05-17 10:14:11 -0700794def parse_generic(lineno, parent_pat, name, toks):
Richard Henderson568ae7e2017-12-07 12:44:09 -0800795 """Parse one instruction format from TOKS at LINENO"""
796 global fields
797 global arguments
798 global formats
Richard Henderson0eff2df2019-02-23 11:35:36 -0800799 global allpatterns
Richard Hendersonacfdd232020-09-03 12:23:34 -0700800 global re_arg_ident
801 global re_fld_ident
802 global re_fmt_ident
803 global re_C_ident
Richard Henderson568ae7e2017-12-07 12:44:09 -0800804 global insnwidth
805 global insnmask
Richard Henderson17560e92019-01-30 18:01:29 -0800806 global variablewidth
Richard Henderson568ae7e2017-12-07 12:44:09 -0800807
Richard Henderson08561fc2020-05-17 10:14:11 -0700808 is_format = parent_pat is None
809
Richard Henderson568ae7e2017-12-07 12:44:09 -0800810 fixedmask = 0
811 fixedbits = 0
812 undefmask = 0
813 width = 0
814 flds = {}
815 arg = None
816 fmt = None
817 for t in toks:
zhaolichang65fdb3c2020-09-17 15:50:23 +0800818 # '&Foo' gives a format an explicit argument set.
Richard Hendersonacfdd232020-09-03 12:23:34 -0700819 if re.fullmatch(re_arg_ident, t):
Richard Henderson568ae7e2017-12-07 12:44:09 -0800820 tt = t[1:]
821 if arg:
822 error(lineno, 'multiple argument sets')
823 if tt in arguments:
824 arg = arguments[tt]
825 else:
826 error(lineno, 'undefined argument set', t)
827 continue
828
829 # '@Foo' gives a pattern an explicit format.
Richard Hendersonacfdd232020-09-03 12:23:34 -0700830 if re.fullmatch(re_fmt_ident, t):
Richard Henderson568ae7e2017-12-07 12:44:09 -0800831 tt = t[1:]
832 if fmt:
833 error(lineno, 'multiple formats')
834 if tt in formats:
835 fmt = formats[tt]
836 else:
837 error(lineno, 'undefined format', t)
838 continue
839
840 # '%Foo' imports a field.
Richard Hendersonacfdd232020-09-03 12:23:34 -0700841 if re.fullmatch(re_fld_ident, t):
Richard Henderson568ae7e2017-12-07 12:44:09 -0800842 tt = t[1:]
843 flds = add_field_byname(lineno, flds, tt, tt)
844 continue
845
846 # 'Foo=%Bar' imports a field with a different name.
Richard Hendersonacfdd232020-09-03 12:23:34 -0700847 if re.fullmatch(re_C_ident + '=' + re_fld_ident, t):
Richard Henderson568ae7e2017-12-07 12:44:09 -0800848 (fname, iname) = t.split('=%')
849 flds = add_field_byname(lineno, flds, fname, iname)
850 continue
851
852 # 'Foo=number' sets an argument field to a constant value
Richard Hendersonacfdd232020-09-03 12:23:34 -0700853 if re.fullmatch(re_C_ident + '=[+-]?[0-9]+', t):
Richard Henderson568ae7e2017-12-07 12:44:09 -0800854 (fname, value) = t.split('=')
855 value = int(value)
856 flds = add_field(lineno, flds, fname, ConstField(value))
857 continue
858
859 # Pattern of 0s, 1s, dots and dashes indicate required zeros,
860 # required ones, or dont-cares.
John Snow2d110c12020-05-13 23:52:30 -0400861 if re.fullmatch('[01.-]+', t):
Richard Henderson568ae7e2017-12-07 12:44:09 -0800862 shift = len(t)
863 fms = t.replace('0', '1')
864 fms = fms.replace('.', '0')
865 fms = fms.replace('-', '0')
866 fbs = t.replace('.', '0')
867 fbs = fbs.replace('-', '0')
868 ubm = t.replace('1', '0')
869 ubm = ubm.replace('.', '0')
870 ubm = ubm.replace('-', '1')
871 fms = int(fms, 2)
872 fbs = int(fbs, 2)
873 ubm = int(ubm, 2)
874 fixedbits = (fixedbits << shift) | fbs
875 fixedmask = (fixedmask << shift) | fms
876 undefmask = (undefmask << shift) | ubm
877 # Otherwise, fieldname:fieldwidth
Richard Hendersonacfdd232020-09-03 12:23:34 -0700878 elif re.fullmatch(re_C_ident + ':s?[0-9]+', t):
Richard Henderson568ae7e2017-12-07 12:44:09 -0800879 (fname, flen) = t.split(':')
880 sign = False
881 if flen[0] == 's':
882 sign = True
883 flen = flen[1:]
884 shift = int(flen, 10)
Richard Henderson2decfc92019-03-05 15:34:41 -0800885 if shift + width > insnwidth:
886 error(lineno, 'field {0} exceeds insnwidth'.format(fname))
Richard Henderson568ae7e2017-12-07 12:44:09 -0800887 f = Field(sign, insnwidth - width - shift, shift)
888 flds = add_field(lineno, flds, fname, f)
889 fixedbits <<= shift
890 fixedmask <<= shift
891 undefmask <<= shift
892 else:
893 error(lineno, 'invalid token "{0}"'.format(t))
894 width += shift
895
Richard Henderson17560e92019-01-30 18:01:29 -0800896 if variablewidth and width < insnwidth and width % 8 == 0:
897 shift = insnwidth - width
898 fixedbits <<= shift
899 fixedmask <<= shift
900 undefmask <<= shift
901 undefmask |= (1 << shift) - 1
902
Richard Henderson568ae7e2017-12-07 12:44:09 -0800903 # We should have filled in all of the bits of the instruction.
Richard Henderson17560e92019-01-30 18:01:29 -0800904 elif not (is_format and width == 0) and width != insnwidth:
Richard Henderson568ae7e2017-12-07 12:44:09 -0800905 error(lineno, 'definition has {0} bits'.format(width))
906
zhaolichang65fdb3c2020-09-17 15:50:23 +0800907 # Do not check for fields overlapping fields; one valid usage
Richard Henderson568ae7e2017-12-07 12:44:09 -0800908 # is to be able to duplicate fields via import.
909 fieldmask = 0
910 for f in flds.values():
911 fieldmask |= f.mask
912
913 # Fix up what we've parsed to match either a format or a pattern.
914 if is_format:
915 # Formats cannot reference formats.
916 if fmt:
917 error(lineno, 'format referencing format')
918 # If an argument set is given, then there should be no fields
919 # without a place to store it.
920 if arg:
921 for f in flds.keys():
922 if f not in arg.fields:
923 error(lineno, 'field {0} not in argument set {1}'
924 .format(f, arg.name))
925 else:
926 arg = infer_argument_set(flds)
927 if name in formats:
928 error(lineno, 'duplicate format name', name)
929 fmt = Format(name, lineno, arg, fixedbits, fixedmask,
Richard Henderson17560e92019-01-30 18:01:29 -0800930 undefmask, fieldmask, flds, width)
Richard Henderson568ae7e2017-12-07 12:44:09 -0800931 formats[name] = fmt
932 else:
933 # Patterns can reference a format ...
934 if fmt:
935 # ... but not an argument simultaneously
936 if arg:
937 error(lineno, 'pattern specifies both format and argument set')
938 if fixedmask & fmt.fixedmask:
939 error(lineno, 'pattern fixed bits overlap format fixed bits')
Richard Henderson17560e92019-01-30 18:01:29 -0800940 if width != fmt.width:
941 error(lineno, 'pattern uses format of different width')
Richard Henderson568ae7e2017-12-07 12:44:09 -0800942 fieldmask |= fmt.fieldmask
943 fixedbits |= fmt.fixedbits
944 fixedmask |= fmt.fixedmask
945 undefmask |= fmt.undefmask
946 else:
Richard Henderson17560e92019-01-30 18:01:29 -0800947 (fmt, flds) = infer_format(arg, fieldmask, flds, width)
Richard Henderson568ae7e2017-12-07 12:44:09 -0800948 arg = fmt.base
949 for f in flds.keys():
950 if f not in arg.fields:
951 error(lineno, 'field {0} not in argument set {1}'
952 .format(f, arg.name))
953 if f in fmt.fields.keys():
954 error(lineno, 'field {0} set by format and pattern'.format(f))
955 for f in arg.fields:
956 if f not in flds.keys() and f not in fmt.fields.keys():
957 error(lineno, 'field {0} not initialized'.format(f))
958 pat = Pattern(name, lineno, fmt, fixedbits, fixedmask,
Richard Henderson17560e92019-01-30 18:01:29 -0800959 undefmask, fieldmask, flds, width)
Richard Henderson08561fc2020-05-17 10:14:11 -0700960 parent_pat.pats.append(pat)
Richard Henderson0eff2df2019-02-23 11:35:36 -0800961 allpatterns.append(pat)
Richard Henderson568ae7e2017-12-07 12:44:09 -0800962
963 # Validate the masks that we have assembled.
964 if fieldmask & fixedmask:
965 error(lineno, 'fieldmask overlaps fixedmask (0x{0:08x} & 0x{1:08x})'
966 .format(fieldmask, fixedmask))
967 if fieldmask & undefmask:
968 error(lineno, 'fieldmask overlaps undefmask (0x{0:08x} & 0x{1:08x})'
969 .format(fieldmask, undefmask))
970 if fixedmask & undefmask:
971 error(lineno, 'fixedmask overlaps undefmask (0x{0:08x} & 0x{1:08x})'
972 .format(fixedmask, undefmask))
973 if not is_format:
974 allbits = fieldmask | fixedmask | undefmask
975 if allbits != insnmask:
976 error(lineno, 'bits left unspecified (0x{0:08x})'
977 .format(allbits ^ insnmask))
978# end parse_general
979
Richard Henderson0eff2df2019-02-23 11:35:36 -0800980
Richard Henderson08561fc2020-05-17 10:14:11 -0700981def parse_file(f, parent_pat):
Richard Henderson568ae7e2017-12-07 12:44:09 -0800982 """Parse all of the patterns within a file"""
Richard Hendersonacfdd232020-09-03 12:23:34 -0700983 global re_arg_ident
984 global re_fld_ident
985 global re_fmt_ident
986 global re_pat_ident
Richard Henderson568ae7e2017-12-07 12:44:09 -0800987
988 # Read all of the lines of the file. Concatenate lines
989 # ending in backslash; discard empty lines and comments.
990 toks = []
991 lineno = 0
Richard Henderson0eff2df2019-02-23 11:35:36 -0800992 nesting = 0
Richard Henderson08561fc2020-05-17 10:14:11 -0700993 nesting_pats = []
Richard Henderson0eff2df2019-02-23 11:35:36 -0800994
Richard Henderson568ae7e2017-12-07 12:44:09 -0800995 for line in f:
996 lineno += 1
997
Richard Henderson0eff2df2019-02-23 11:35:36 -0800998 # Expand and strip spaces, to find indent.
999 line = line.rstrip()
1000 line = line.expandtabs()
1001 len1 = len(line)
1002 line = line.lstrip()
1003 len2 = len(line)
1004
Richard Henderson568ae7e2017-12-07 12:44:09 -08001005 # Discard comments
1006 end = line.find('#')
1007 if end >= 0:
1008 line = line[:end]
1009
1010 t = line.split()
1011 if len(toks) != 0:
1012 # Next line after continuation
1013 toks.extend(t)
Richard Henderson568ae7e2017-12-07 12:44:09 -08001014 else:
Richard Henderson0eff2df2019-02-23 11:35:36 -08001015 # Allow completely blank lines.
1016 if len1 == 0:
1017 continue
1018 indent = len1 - len2
1019 # Empty line due to comment.
1020 if len(t) == 0:
1021 # Indentation must be correct, even for comment lines.
1022 if indent != nesting:
1023 error(lineno, 'indentation ', indent, ' != ', nesting)
1024 continue
1025 start_lineno = lineno
Richard Henderson568ae7e2017-12-07 12:44:09 -08001026 toks = t
1027
1028 # Continuation?
1029 if toks[-1] == '\\':
1030 toks.pop()
1031 continue
1032
Richard Henderson568ae7e2017-12-07 12:44:09 -08001033 name = toks[0]
1034 del toks[0]
1035
Richard Henderson0eff2df2019-02-23 11:35:36 -08001036 # End nesting?
Richard Henderson067e8b02020-05-18 08:45:32 -07001037 if name == '}' or name == ']':
Richard Henderson0eff2df2019-02-23 11:35:36 -08001038 if len(toks) != 0:
1039 error(start_lineno, 'extra tokens after close brace')
Richard Henderson08561fc2020-05-17 10:14:11 -07001040
Richard Henderson067e8b02020-05-18 08:45:32 -07001041 # Make sure { } and [ ] nest properly.
1042 if (name == '}') != isinstance(parent_pat, IncMultiPattern):
1043 error(lineno, 'mismatched close brace')
1044
Richard Henderson08561fc2020-05-17 10:14:11 -07001045 try:
1046 parent_pat = nesting_pats.pop()
1047 except:
Richard Henderson067e8b02020-05-18 08:45:32 -07001048 error(lineno, 'extra close brace')
Richard Henderson08561fc2020-05-17 10:14:11 -07001049
Richard Henderson0eff2df2019-02-23 11:35:36 -08001050 nesting -= 2
1051 if indent != nesting:
Richard Henderson08561fc2020-05-17 10:14:11 -07001052 error(lineno, 'indentation ', indent, ' != ', nesting)
1053
Richard Henderson0eff2df2019-02-23 11:35:36 -08001054 toks = []
1055 continue
1056
1057 # Everything else should have current indentation.
1058 if indent != nesting:
1059 error(start_lineno, 'indentation ', indent, ' != ', nesting)
1060
1061 # Start nesting?
Richard Henderson067e8b02020-05-18 08:45:32 -07001062 if name == '{' or name == '[':
Richard Henderson0eff2df2019-02-23 11:35:36 -08001063 if len(toks) != 0:
1064 error(start_lineno, 'extra tokens after open brace')
Richard Henderson08561fc2020-05-17 10:14:11 -07001065
Richard Henderson067e8b02020-05-18 08:45:32 -07001066 if name == '{':
1067 nested_pat = IncMultiPattern(start_lineno)
1068 else:
1069 nested_pat = ExcMultiPattern(start_lineno)
Richard Henderson08561fc2020-05-17 10:14:11 -07001070 parent_pat.pats.append(nested_pat)
1071 nesting_pats.append(parent_pat)
1072 parent_pat = nested_pat
1073
Richard Henderson0eff2df2019-02-23 11:35:36 -08001074 nesting += 2
1075 toks = []
1076 continue
1077
Richard Henderson568ae7e2017-12-07 12:44:09 -08001078 # Determine the type of object needing to be parsed.
Richard Hendersonacfdd232020-09-03 12:23:34 -07001079 if re.fullmatch(re_fld_ident, name):
Richard Henderson0eff2df2019-02-23 11:35:36 -08001080 parse_field(start_lineno, name[1:], toks)
Richard Hendersonacfdd232020-09-03 12:23:34 -07001081 elif re.fullmatch(re_arg_ident, name):
Richard Henderson0eff2df2019-02-23 11:35:36 -08001082 parse_arguments(start_lineno, name[1:], toks)
Richard Hendersonacfdd232020-09-03 12:23:34 -07001083 elif re.fullmatch(re_fmt_ident, name):
Richard Henderson08561fc2020-05-17 10:14:11 -07001084 parse_generic(start_lineno, None, name[1:], toks)
Richard Hendersonacfdd232020-09-03 12:23:34 -07001085 elif re.fullmatch(re_pat_ident, name):
Richard Henderson08561fc2020-05-17 10:14:11 -07001086 parse_generic(start_lineno, parent_pat, name, toks)
Richard Hendersonacfdd232020-09-03 12:23:34 -07001087 else:
1088 error(lineno, 'invalid token "{0}"'.format(name))
Richard Henderson568ae7e2017-12-07 12:44:09 -08001089 toks = []
Richard Henderson067e8b02020-05-18 08:45:32 -07001090
1091 if nesting != 0:
1092 error(lineno, 'missing close brace')
Richard Henderson568ae7e2017-12-07 12:44:09 -08001093# end parse_file
1094
1095
Richard Henderson70e07112019-01-31 11:34:11 -08001096class SizeTree:
1097 """Class representing a node in a size decode tree"""
1098
1099 def __init__(self, m, w):
1100 self.mask = m
1101 self.subs = []
1102 self.base = None
1103 self.width = w
1104
1105 def str1(self, i):
1106 ind = str_indent(i)
1107 r = '{0}{1:08x}'.format(ind, self.mask)
1108 r += ' [\n'
1109 for (b, s) in self.subs:
1110 r += '{0} {1:08x}:\n'.format(ind, b)
1111 r += s.str1(i + 4) + '\n'
1112 r += ind + ']'
1113 return r
1114
1115 def __str__(self):
1116 return self.str1(0)
1117
1118 def output_code(self, i, extracted, outerbits, outermask):
1119 ind = str_indent(i)
1120
1121 # If we need to load more bytes to test, do so now.
1122 if extracted < self.width:
1123 output(ind, 'insn = ', decode_function,
1124 '_load_bytes(ctx, insn, {0}, {1});\n'
Philippe Mathieu-Daudéb4123782020-03-30 14:13:45 +02001125 .format(extracted // 8, self.width // 8));
Richard Henderson70e07112019-01-31 11:34:11 -08001126 extracted = self.width
1127
1128 # Attempt to aid the compiler in producing compact switch statements.
1129 # If the bits in the mask are contiguous, extract them.
1130 sh = is_contiguous(self.mask)
1131 if sh > 0:
1132 # Propagate SH down into the local functions.
1133 def str_switch(b, sh=sh):
1134 return '(insn >> {0}) & 0x{1:x}'.format(sh, b >> sh)
1135
1136 def str_case(b, sh=sh):
1137 return '0x{0:x}'.format(b >> sh)
1138 else:
1139 def str_switch(b):
1140 return 'insn & 0x{0:08x}'.format(b)
1141
1142 def str_case(b):
1143 return '0x{0:08x}'.format(b)
1144
1145 output(ind, 'switch (', str_switch(self.mask), ') {\n')
1146 for b, s in sorted(self.subs):
1147 innermask = outermask | self.mask
1148 innerbits = outerbits | b
1149 output(ind, 'case ', str_case(b), ':\n')
1150 output(ind, ' /* ',
1151 str_match_bits(innerbits, innermask), ' */\n')
1152 s.output_code(i + 4, extracted, innerbits, innermask)
1153 output(ind, '}\n')
1154 output(ind, 'return insn;\n')
1155# end SizeTree
1156
1157class SizeLeaf:
1158 """Class representing a leaf node in a size decode tree"""
1159
1160 def __init__(self, m, w):
1161 self.mask = m
1162 self.width = w
1163
1164 def str1(self, i):
1165 ind = str_indent(i)
1166 return '{0}{1:08x}'.format(ind, self.mask)
1167
1168 def __str__(self):
1169 return self.str1(0)
1170
1171 def output_code(self, i, extracted, outerbits, outermask):
1172 global decode_function
1173 ind = str_indent(i)
1174
1175 # If we need to load more bytes, do so now.
1176 if extracted < self.width:
1177 output(ind, 'insn = ', decode_function,
1178 '_load_bytes(ctx, insn, {0}, {1});\n'
Philippe Mathieu-Daudéb4123782020-03-30 14:13:45 +02001179 .format(extracted // 8, self.width // 8));
Richard Henderson70e07112019-01-31 11:34:11 -08001180 extracted = self.width
1181 output(ind, 'return insn;\n')
1182# end SizeLeaf
1183
1184
1185def build_size_tree(pats, width, outerbits, outermask):
1186 global insnwidth
1187
1188 # Collect the mask of bits that are fixed in this width
1189 innermask = 0xff << (insnwidth - width)
1190 innermask &= ~outermask
1191 minwidth = None
1192 onewidth = True
1193 for i in pats:
1194 innermask &= i.fixedmask
1195 if minwidth is None:
1196 minwidth = i.width
1197 elif minwidth != i.width:
1198 onewidth = False;
1199 if minwidth < i.width:
1200 minwidth = i.width
1201
1202 if onewidth:
1203 return SizeLeaf(innermask, minwidth)
1204
1205 if innermask == 0:
1206 if width < minwidth:
1207 return build_size_tree(pats, width + 8, outerbits, outermask)
1208
1209 pnames = []
1210 for p in pats:
1211 pnames.append(p.name + ':' + p.file + ':' + str(p.lineno))
1212 error_with_file(pats[0].file, pats[0].lineno,
1213 'overlapping patterns size {0}:'.format(width), pnames)
1214
1215 bins = {}
1216 for i in pats:
1217 fb = i.fixedbits & innermask
1218 if fb in bins:
1219 bins[fb].append(i)
1220 else:
1221 bins[fb] = [i]
1222
1223 fullmask = outermask | innermask
1224 lens = sorted(bins.keys())
1225 if len(lens) == 1:
1226 b = lens[0]
1227 return build_size_tree(bins[b], width + 8, b | outerbits, fullmask)
1228
1229 r = SizeTree(innermask, width)
1230 for b, l in bins.items():
1231 s = build_size_tree(l, width, b | outerbits, fullmask)
1232 r.subs.append((b, s))
1233 return r
1234# end build_size_tree
1235
1236
Richard Henderson70e07112019-01-31 11:34:11 -08001237def prop_size(tree):
1238 """Propagate minimum widths up the decode size tree"""
1239
1240 if isinstance(tree, SizeTree):
1241 min = None
1242 for (b, s) in tree.subs:
1243 width = prop_size(s)
1244 if min is None or min > width:
1245 min = width
1246 assert min >= tree.width
1247 tree.width = min
1248 else:
1249 min = tree.width
1250 return min
1251# end prop_size
1252
1253
Richard Henderson568ae7e2017-12-07 12:44:09 -08001254def main():
1255 global arguments
1256 global formats
Richard Henderson0eff2df2019-02-23 11:35:36 -08001257 global allpatterns
Richard Henderson568ae7e2017-12-07 12:44:09 -08001258 global translate_scope
1259 global translate_prefix
1260 global output_fd
1261 global output_file
1262 global input_file
1263 global insnwidth
1264 global insntype
Bastian Koppelmann83d7c402018-03-19 12:58:46 +01001265 global insnmask
Richard Hendersonabd04f92018-10-23 10:26:25 +01001266 global decode_function
Richard Henderson17560e92019-01-30 18:01:29 -08001267 global variablewidth
Richard Hendersonc6920792019-08-09 08:12:50 -07001268 global anyextern
Richard Henderson568ae7e2017-12-07 12:44:09 -08001269
Richard Henderson568ae7e2017-12-07 12:44:09 -08001270 decode_scope = 'static '
1271
Richard Hendersoncd3e7fc2019-02-23 17:44:31 -08001272 long_opts = ['decode=', 'translate=', 'output=', 'insnwidth=',
Richard Henderson17560e92019-01-30 18:01:29 -08001273 'static-decode=', 'varinsnwidth=']
Richard Henderson568ae7e2017-12-07 12:44:09 -08001274 try:
Paolo Bonziniabff1ab2020-08-07 12:10:23 +02001275 (opts, args) = getopt.gnu_getopt(sys.argv[1:], 'o:vw:', long_opts)
Richard Henderson568ae7e2017-12-07 12:44:09 -08001276 except getopt.GetoptError as err:
1277 error(0, err)
1278 for o, a in opts:
1279 if o in ('-o', '--output'):
1280 output_file = a
1281 elif o == '--decode':
1282 decode_function = a
1283 decode_scope = ''
Richard Hendersoncd3e7fc2019-02-23 17:44:31 -08001284 elif o == '--static-decode':
1285 decode_function = a
Richard Henderson568ae7e2017-12-07 12:44:09 -08001286 elif o == '--translate':
1287 translate_prefix = a
1288 translate_scope = ''
Richard Henderson17560e92019-01-30 18:01:29 -08001289 elif o in ('-w', '--insnwidth', '--varinsnwidth'):
1290 if o == '--varinsnwidth':
1291 variablewidth = True
Richard Henderson568ae7e2017-12-07 12:44:09 -08001292 insnwidth = int(a)
1293 if insnwidth == 16:
1294 insntype = 'uint16_t'
1295 insnmask = 0xffff
1296 elif insnwidth != 32:
1297 error(0, 'cannot handle insns of width', insnwidth)
1298 else:
1299 assert False, 'unhandled option'
1300
1301 if len(args) < 1:
1302 error(0, 'missing input file')
Richard Henderson08561fc2020-05-17 10:14:11 -07001303
1304 toppat = ExcMultiPattern(0)
1305
Richard Henderson6699ae62018-10-26 14:59:43 +01001306 for filename in args:
1307 input_file = filename
Philippe Mathieu-Daudé4caceca2021-01-10 01:02:40 +01001308 f = open(filename, 'rt', encoding='utf-8')
Richard Henderson08561fc2020-05-17 10:14:11 -07001309 parse_file(f, toppat)
Richard Henderson6699ae62018-10-26 14:59:43 +01001310 f.close()
Richard Henderson568ae7e2017-12-07 12:44:09 -08001311
Richard Henderson08561fc2020-05-17 10:14:11 -07001312 # We do not want to compute masks for toppat, because those masks
1313 # are used as a starting point for build_tree. For toppat, we must
1314 # insist that decode begins from naught.
1315 for i in toppat.pats:
1316 i.prop_masks()
Richard Henderson70e07112019-01-31 11:34:11 -08001317
Richard Henderson08561fc2020-05-17 10:14:11 -07001318 toppat.build_tree()
1319 toppat.prop_format()
1320
1321 if variablewidth:
1322 for i in toppat.pats:
1323 i.prop_width()
1324 stree = build_size_tree(toppat.pats, 8, 0, 0)
1325 prop_size(stree)
Richard Henderson568ae7e2017-12-07 12:44:09 -08001326
1327 if output_file:
Philippe Mathieu-Daudé4caceca2021-01-10 01:02:40 +01001328 output_fd = open(output_file, 'wt', encoding='utf-8')
Richard Henderson568ae7e2017-12-07 12:44:09 -08001329 else:
Philippe Mathieu-Daudé4caceca2021-01-10 01:02:40 +01001330 output_fd = io.TextIOWrapper(sys.stdout.buffer,
1331 encoding=sys.stdout.encoding,
1332 errors="ignore")
Richard Henderson568ae7e2017-12-07 12:44:09 -08001333
1334 output_autogen()
1335 for n in sorted(arguments.keys()):
1336 f = arguments[n]
1337 f.output_def()
1338
1339 # A single translate function can be invoked for different patterns.
1340 # Make sure that the argument sets are the same, and declare the
1341 # function only once.
Richard Hendersonc6920792019-08-09 08:12:50 -07001342 #
1343 # If we're sharing formats, we're likely also sharing trans_* functions,
1344 # but we can't tell which ones. Prevent issues from the compiler by
1345 # suppressing redundant declaration warnings.
1346 if anyextern:
Thomas Huth7aa12aa2020-07-08 20:19:44 +02001347 output("#pragma GCC diagnostic push\n",
1348 "#pragma GCC diagnostic ignored \"-Wredundant-decls\"\n",
1349 "#ifdef __clang__\n"
Richard Hendersonc6920792019-08-09 08:12:50 -07001350 "# pragma GCC diagnostic ignored \"-Wtypedef-redefinition\"\n",
Richard Hendersonc6920792019-08-09 08:12:50 -07001351 "#endif\n\n")
1352
Richard Henderson568ae7e2017-12-07 12:44:09 -08001353 out_pats = {}
Richard Henderson0eff2df2019-02-23 11:35:36 -08001354 for i in allpatterns:
Richard Henderson568ae7e2017-12-07 12:44:09 -08001355 if i.name in out_pats:
1356 p = out_pats[i.name]
1357 if i.base.base != p.base.base:
1358 error(0, i.name, ' has conflicting argument sets')
1359 else:
1360 i.output_decl()
1361 out_pats[i.name] = i
1362 output('\n')
1363
Richard Hendersonc6920792019-08-09 08:12:50 -07001364 if anyextern:
Thomas Huth7aa12aa2020-07-08 20:19:44 +02001365 output("#pragma GCC diagnostic pop\n\n")
Richard Hendersonc6920792019-08-09 08:12:50 -07001366
Richard Henderson568ae7e2017-12-07 12:44:09 -08001367 for n in sorted(formats.keys()):
1368 f = formats[n]
1369 f.output_extract()
1370
1371 output(decode_scope, 'bool ', decode_function,
1372 '(DisasContext *ctx, ', insntype, ' insn)\n{\n')
1373
1374 i4 = str_indent(4)
Richard Henderson568ae7e2017-12-07 12:44:09 -08001375
Richard Henderson82bfac12019-02-27 21:37:32 -08001376 if len(allpatterns) != 0:
1377 output(i4, 'union {\n')
1378 for n in sorted(arguments.keys()):
1379 f = arguments[n]
1380 output(i4, i4, f.struct_name(), ' f_', f.name, ';\n')
1381 output(i4, '} u;\n\n')
Richard Henderson08561fc2020-05-17 10:14:11 -07001382 toppat.output_code(4, False, 0, 0)
Richard Henderson82bfac12019-02-27 21:37:32 -08001383
Richard Hendersoneb6b87f2019-02-23 08:57:46 -08001384 output(i4, 'return false;\n')
Richard Henderson568ae7e2017-12-07 12:44:09 -08001385 output('}\n')
1386
Richard Henderson70e07112019-01-31 11:34:11 -08001387 if variablewidth:
1388 output('\n', decode_scope, insntype, ' ', decode_function,
1389 '_load(DisasContext *ctx)\n{\n',
1390 ' ', insntype, ' insn = 0;\n\n')
1391 stree.output_code(4, 0, 0, 0)
1392 output('}\n')
1393
Richard Henderson568ae7e2017-12-07 12:44:09 -08001394 if output_file:
1395 output_fd.close()
1396# end main
1397
1398
1399if __name__ == '__main__':
1400 main()