diff options
Diffstat (limited to 'scripts/decodetree.py')
-rw-r--r--[-rwxr-xr-x] | scripts/decodetree.py | 1321 |
1 files changed, 958 insertions, 363 deletions
diff --git a/scripts/decodetree.py b/scripts/decodetree.py index 277f9a9bba..e8b72da3a9 100755..100644 --- a/scripts/decodetree.py +++ b/scripts/decodetree.py @@ -1,10 +1,10 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # Copyright (c) 2018 Linaro Limited # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either -# version 2 of the License, or (at your option) any later version. +# version 2.1 of the License, or (at your option) any later version. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -17,136 +17,7 @@ # # Generate a decoding tree from a specification file. -# -# The tree is built from instruction "patterns". A pattern may represent -# a single architectural instruction or a group of same, depending on what -# is convenient for further processing. -# -# Each pattern has "fixedbits" & "fixedmask", the combination of which -# describes the condition under which the pattern is matched: -# -# (insn & fixedmask) == fixedbits -# -# Each pattern may have "fields", which are extracted from the insn and -# passed along to the translator. Examples of such are registers, -# immediates, and sub-opcodes. -# -# In support of patterns, one may declare fields, argument sets, and -# formats, each of which may be re-used to simplify further definitions. -# -# *** Field syntax: -# -# field_def := '%' identifier ( unnamed_field )+ ( !function=identifier )? -# unnamed_field := number ':' ( 's' ) number -# -# For unnamed_field, the first number is the least-significant bit position of -# the field and the second number is the length of the field. If the 's' is -# present, the field is considered signed. If multiple unnamed_fields are -# present, they are concatenated. In this way one can define disjoint fields. -# -# If !function is specified, the concatenated result is passed through the -# named function, taking and returning an integral value. -# -# FIXME: the fields of the structure into which this result will be stored -# is restricted to "int". Which means that we cannot expand 64-bit items. -# -# Field examples: -# -# %disp 0:s16 -- sextract(i, 0, 16) -# %imm9 16:6 10:3 -- extract(i, 16, 6) << 3 | extract(i, 10, 3) -# %disp12 0:s1 1:1 2:10 -- sextract(i, 0, 1) << 11 -# | extract(i, 1, 1) << 10 -# | extract(i, 2, 10) -# %shimm8 5:s8 13:1 !function=expand_shimm8 -# -- expand_shimm8(sextract(i, 5, 8) << 1 -# | extract(i, 13, 1)) -# -# *** Argument set syntax: -# -# args_def := '&' identifier ( args_elt )+ -# args_elt := identifier -# -# Each args_elt defines an argument within the argument set. -# Each argument set will be rendered as a C structure "arg_$name" -# with each of the fields being one of the member arguments. -# -# Argument set examples: -# -# ®3 ra rb rc -# &loadstore reg base offset -# -# *** Format syntax: -# -# fmt_def := '@' identifier ( fmt_elt )+ -# fmt_elt := fixedbit_elt | field_elt | field_ref | args_ref -# fixedbit_elt := [01.-]+ -# field_elt := identifier ':' 's'? number -# field_ref := '%' identifier | identifier '=' '%' identifier -# args_ref := '&' identifier -# -# Defining a format is a handy way to avoid replicating groups of fields -# across many instruction patterns. -# -# A fixedbit_elt describes a contiguous sequence of bits that must -# be 1, 0, [.-] for don't care. The difference between '.' and '-' -# is that '.' means that the bit will be covered with a field or a -# final [01] from the pattern, and '-' means that the bit is really -# ignored by the cpu and will not be specified. -# -# A field_elt describes a simple field only given a width; the position of -# the field is implied by its position with respect to other fixedbit_elt -# and field_elt. -# -# If any fixedbit_elt or field_elt appear then all bits must be defined. -# Padding with a fixedbit_elt of all '.' is an easy way to accomplish that. -# -# A field_ref incorporates a field by reference. This is the only way to -# add a complex field to a format. A field may be renamed in the process -# via assignment to another identifier. This is intended to allow the -# same argument set be used with disjoint named fields. -# -# A single args_ref may specify an argument set to use for the format. -# The set of fields in the format must be a subset of the arguments in -# the argument set. If an argument set is not specified, one will be -# inferred from the set of fields. -# -# It is recommended, but not required, that all field_ref and args_ref -# appear at the end of the line, not interleaving with fixedbit_elf or -# field_elt. -# -# Format examples: -# -# @opr ...... ra:5 rb:5 ... 0 ....... rc:5 -# @opi ...... ra:5 lit:8 1 ....... rc:5 -# -# *** Pattern syntax: -# -# pat_def := identifier ( pat_elt )+ -# pat_elt := fixedbit_elt | field_elt | field_ref -# | args_ref | fmt_ref | const_elt -# fmt_ref := '@' identifier -# const_elt := identifier '=' number -# -# The fixedbit_elt and field_elt specifiers are unchanged from formats. -# A pattern that does not specify a named format will have one inferred -# from a referenced argument set (if present) and the set of fields. -# -# A const_elt allows a argument to be set to a constant value. This may -# come in handy when fields overlap between patterns and one has to -# include the values in the fixedbit_elt instead. -# -# The decoder will call a translator function for each pattern matched. -# -# Pattern examples: -# -# addl_r 010000 ..... ..... .... 0000000 ..... @opr -# addl_i 010000 ..... ..... .... 0000000 ..... @opi -# -# which will, in part, invoke -# -# trans_addl_r(ctx, &arg_opr, insn) -# and -# trans_addl_i(ctx, &arg_opi, insn) +# See the syntax and semantics in docs/devel/decodetree.rst. # import io @@ -154,44 +25,139 @@ import os import re import sys import getopt -import pdb insnwidth = 32 +bitop_width = 32 insnmask = 0xffffffff +variablewidth = False fields = {} arguments = {} formats = {} -patterns = [] +allpatterns = [] +anyextern = False +testforerror = False translate_prefix = 'trans' translate_scope = 'static ' input_file = '' output_file = None output_fd = None +output_null = False insntype = 'uint32_t' +decode_function = 'decode' + +# An identifier for C. +re_C_ident = '[a-zA-Z][a-zA-Z0-9_]*' + +# Identifiers for Arguments, Fields, Formats and Patterns. +re_arg_ident = '&[a-zA-Z0-9_]*' +re_fld_ident = '%[a-zA-Z0-9_]*' +re_fmt_ident = '@[a-zA-Z0-9_]*' +re_pat_ident = '[a-zA-Z0-9_]*' + +# Local implementation of a topological sort. We use the same API that +# the Python graphlib does, so that when QEMU moves forward to a +# baseline of Python 3.9 or newer this code can all be dropped and +# replaced with: +# from graphlib import TopologicalSorter, CycleError +# +# https://docs.python.org/3.9/library/graphlib.html#graphlib.TopologicalSorter +# +# We only implement the parts of TopologicalSorter we care about: +# ts = TopologicalSorter(graph=None) +# create the sorter. graph is a dictionary whose keys are +# nodes and whose values are lists of the predecessors of that node. +# (That is, if graph contains "A" -> ["B", "C"] then we must output +# B and C before A.) +# ts.static_order() +# returns a list of all the nodes in sorted order, or raises CycleError +# CycleError +# exception raised if there are cycles in the graph. The second +# element in the args attribute is a list of nodes which form a +# cycle; the first and last element are the same, eg [a, b, c, a] +# (Our implementation doesn't give the order correctly.) +# +# For our purposes we can assume that the data set is always small +# (typically 10 nodes or less, actual links in the graph very rare), +# so we don't need to worry about efficiency of implementation. +# +# The core of this implementation is from +# https://code.activestate.com/recipes/578272-topological-sort/ +# (but updated to Python 3), and is under the MIT license. + +class CycleError(ValueError): + """Subclass of ValueError raised if cycles exist in the graph""" + pass + +class TopologicalSorter: + """Topologically sort a graph""" + def __init__(self, graph=None): + self.graph = graph + + def static_order(self): + # We do the sort right here, unlike the stdlib version + from functools import reduce + data = {} + r = [] + + if not self.graph: + return [] + + # This code wants the values in the dict to be specifically sets + for k, v in self.graph.items(): + data[k] = set(v) + + # Find all items that don't depend on anything. + extra_items_in_deps = (reduce(set.union, data.values()) + - set(data.keys())) + # Add empty dependencies where needed + data.update({item:{} for item in extra_items_in_deps}) + while True: + ordered = set(item for item, dep in data.items() if not dep) + if not ordered: + break + r.extend(ordered) + data = {item: (dep - ordered) + for item, dep in data.items() + if item not in ordered} + if data: + # This doesn't give as nice results as the stdlib, which + # gives you the cycle by listing the nodes in order. Here + # we only know the nodes in the cycle but not their order. + raise CycleError(f'nodes are in a cycle', list(data.keys())) -re_ident = '[a-zA-Z][a-zA-Z0-9_]*' - + return r +# end TopologicalSorter -def error(lineno, *args): +def error_with_file(file, lineno, *args): """Print an error message from file:line and args and exit.""" global output_file global output_fd + # For the test suite expected-errors case, don't print the + # string "error: ", so they don't turn up as false positives + # if you grep the meson logs for strings like that. + end = 'error: ' if not testforerror else 'detected: ' + prefix = '' + if file: + prefix += f'{file}:' if lineno: - r = '{0}:{1}: error:'.format(input_file, lineno) - elif input_file: - r = '{0}: error:'.format(input_file) - else: - r = 'error:' - for a in args: - r += ' ' + str(a) - r += '\n' - sys.stderr.write(r) + prefix += f'{lineno}:' + if prefix: + prefix += ' ' + print(prefix, end=end, file=sys.stderr) + print(*args, file=sys.stderr) + if output_file and output_fd: output_fd.close() os.remove(output_file) - exit(1) + exit(0 if testforerror else 1) +# end error_with_file + + +def error(lineno, *args): + error_with_file(input_file, lineno, *args) +# end error def output(*args): @@ -200,13 +166,6 @@ def output(*args): output_fd.write(a) -if sys.version_info >= (3, 0): - re_fullmatch = re.fullmatch -else: - def re_fullmatch(pat, str): - return re.match('^' + pat + '$', str) - - def output_autogen(): output('/* This file is autogenerated by scripts/decodetree.py. */\n\n') @@ -217,13 +176,30 @@ def str_indent(c): def str_fields(fields): - """Return a string uniquely identifing FIELDS""" + """Return a string uniquely identifying FIELDS""" r = '' for n in sorted(fields.keys()): r += '_' + n return r[1:] +def whex(val): + """Return a hex string for val padded for insnwidth""" + global insnwidth + return f'0x{val:0{insnwidth // 4}x}' + + +def whexC(val): + """Return a hex string for val padded for insnwidth, + and with the proper suffix for a C constant.""" + suffix = '' + if val >= 0x100000000: + suffix = 'ull' + elif val >= 0x80000000: + suffix = 'u' + return whex(val) + suffix + + def str_match_bits(bits, mask): """Return a string pretty-printing BITS/MASK""" global insnwidth @@ -252,6 +228,7 @@ def is_pow2(x): def ctz(x): """Return the number of times 2 factors into X.""" + assert x != 0 r = 0 while ((x >> r) & 1) == 0: r += 1 @@ -259,6 +236,8 @@ def ctz(x): def is_contiguous(bits): + if bits == 0: + return -1 shift = ctz(bits) if is_pow2((bits >> shift) + 1): return shift @@ -266,11 +245,15 @@ def is_contiguous(bits): return -1 -def eq_fields_for_args(flds_a, flds_b): - if len(flds_a) != len(flds_b): +def eq_fields_for_args(flds_a, arg): + if len(flds_a) != len(arg.fields): return False + # Only allow inference on default types + for t in arg.types: + if t != 'int': + return False for k, a in flds_a.items(): - if k not in flds_b: + if k not in arg.fields: return False return True @@ -300,17 +283,18 @@ class Field: s = 's' else: s = '' - return str(pos) + ':' + s + str(len) + return str(self.pos) + ':' + s + str(self.len) - def str_extract(self): - if self.sign: - extr = 'sextract32' - else: - extr = 'extract32' - return '{0}(insn, {1}, {2})'.format(extr, self.pos, self.len) + def str_extract(self, lvalue_formatter): + global bitop_width + s = 's' if self.sign else '' + return f'{s}extract{bitop_width}(insn, {self.pos}, {self.len})' + + def referenced_fields(self): + return [] def __eq__(self, other): - return self.sign == other.sign and self.sign == other.sign + return self.sign == other.sign and self.mask == other.mask def __ne__(self, other): return not self.__eq__(other) @@ -327,18 +311,25 @@ class MultiField: def __str__(self): return str(self.subs) - def str_extract(self): + def str_extract(self, lvalue_formatter): + global bitop_width ret = '0' pos = 0 for f in reversed(self.subs): + ext = f.str_extract(lvalue_formatter) if pos == 0: - ret = f.str_extract() + ret = ext else: - ret = 'deposit32({0}, {1}, {2}, {3})' \ - .format(ret, pos, 32 - pos, f.str_extract()) + ret = f'deposit{bitop_width}({ret}, {pos}, {bitop_width - pos}, {ext})' pos += f.len return ret + def referenced_fields(self): + l = [] + for f in self.subs: + l.extend(f.referenced_fields()) + return l + def __ne__(self, other): if len(self.subs) != len(other.subs): return True @@ -362,16 +353,19 @@ class ConstField: def __str__(self): return str(self.value) - def str_extract(self): + def str_extract(self, lvalue_formatter): return str(self.value) + def referenced_fields(self): + return [] + def __cmp__(self, other): return self.value - other.value # end ConstField class FunctionField: - """Class representing a field passed through an expander""" + """Class representing a field passed through a function""" def __init__(self, func, base): self.mask = base.mask self.sign = base.sign @@ -381,8 +375,12 @@ class FunctionField: def __str__(self): return self.func + '(' + str(self.base) + ')' - def str_extract(self): - return self.func + '(' + self.base.str_extract() + ')' + def str_extract(self, lvalue_formatter): + return (self.func + '(ctx, ' + + self.base.str_extract(lvalue_formatter) + ')') + + def referenced_fields(self): + return self.base.referenced_fields() def __eq__(self, other): return self.func == other.func and self.base == other.base @@ -392,11 +390,63 @@ class FunctionField: # end FunctionField +class ParameterField: + """Class representing a pseudo-field read from a function""" + def __init__(self, func): + self.mask = 0 + self.sign = 0 + self.func = func + + def __str__(self): + return self.func + + def str_extract(self, lvalue_formatter): + return self.func + '(ctx)' + + def referenced_fields(self): + return [] + + def __eq__(self, other): + return self.func == other.func + + def __ne__(self, other): + return not self.__eq__(other) +# end ParameterField + +class NamedField: + """Class representing a field already named in the pattern""" + def __init__(self, name, sign, len): + self.mask = 0 + self.sign = sign + self.len = len + self.name = name + + def __str__(self): + return self.name + + def str_extract(self, lvalue_formatter): + global bitop_width + s = 's' if self.sign else '' + lvalue = lvalue_formatter(self.name) + return f'{s}extract{bitop_width}({lvalue}, 0, {self.len})' + + def referenced_fields(self): + return [self.name] + + def __eq__(self, other): + return self.name == other.name + + def __ne__(self, other): + return not self.__eq__(other) +# end NamedField + class Arguments: """Class representing the extracted fields of a format""" - def __init__(self, nm, flds): + def __init__(self, nm, flds, types, extern): self.name = nm - self.fields = sorted(flds) + self.extern = extern + self.fields = flds + self.types = types def __str__(self): return self.name + ' ' + str(self.fields) @@ -405,17 +455,18 @@ class Arguments: return 'arg_' + self.name def output_def(self): - output('typedef struct {\n') - for n in self.fields: - output(' int ', n, ';\n') - output('} ', self.struct_name(), ';\n\n') + if not self.extern: + output('typedef struct {\n') + for (n, t) in zip(self.fields, self.types): + output(f' {t} {n};\n') + output('} ', self.struct_name(), ';\n\n') # end Arguments - class General: """Common code between instruction formats and instruction patterns""" - def __init__(self, name, lineno, base, fixb, fixm, udfm, fldm, flds): + def __init__(self, name, lineno, base, fixb, fixm, udfm, fldm, flds, w): self.name = name + self.file = input_file self.lineno = lineno self.base = base self.fixedbits = fixb @@ -423,18 +474,60 @@ class General: self.undefmask = udfm self.fieldmask = fldm self.fields = flds + self.width = w + self.dangling = None def __str__(self): - r = self.name - if self.base: - r = r + ' ' + self.base.name - else: - r = r + ' ' + str(self.fields) - r = r + ' ' + str_match_bits(self.fixedbits, self.fixedmask) - return r + return self.name + ' ' + str_match_bits(self.fixedbits, self.fixedmask) def str1(self, i): return str_indent(i) + self.__str__() + + def dangling_references(self): + # Return a list of all named references which aren't satisfied + # directly by this format/pattern. This will be either: + # * a format referring to a field which is specified by the + # pattern(s) using it + # * a pattern referring to a field which is specified by the + # format it uses + # * a user error (referring to a field that doesn't exist at all) + if self.dangling is None: + # Compute this once and cache the answer + dangling = [] + for n, f in self.fields.items(): + for r in f.referenced_fields(): + if r not in self.fields: + dangling.append(r) + self.dangling = dangling + return self.dangling + + def output_fields(self, indent, lvalue_formatter): + # We use a topological sort to ensure that any use of NamedField + # comes after the initialization of the field it is referencing. + graph = {} + for n, f in self.fields.items(): + refs = f.referenced_fields() + graph[n] = refs + + try: + ts = TopologicalSorter(graph) + for n in ts.static_order(): + # We only want to emit assignments for the keys + # in our fields list, not for anything that ends up + # in the tsort graph only because it was referenced as + # a NamedField. + try: + f = self.fields[n] + output(indent, lvalue_formatter(n), ' = ', + f.str_extract(lvalue_formatter), ';\n') + except KeyError: + pass + except CycleError as e: + # The second element of args is a list of nodes which form + # a cycle (there might be others too, but only one is reported). + # Pretty-print it to tell the user. + cycle = ' => '.join(e.args[1]) + error(self.lineno, 'field definitions form a cycle: ' + cycle) # end General @@ -442,13 +535,13 @@ class Format(General): """Class representing an instruction format""" def extract_name(self): - return 'extract_' + self.name + global decode_function + return decode_function + '_extract_' + self.name def output_extract(self): - output('static void ', self.extract_name(), '(', + output('static void ', self.extract_name(), '(DisasContext *ctx, ', self.base.struct_name(), ' *a, ', insntype, ' insn)\n{\n') - for n, f in self.fields.items(): - output(' a->', n, ' = ', f.str_extract(), ';\n') + self.output_fields(str_indent(4), lambda n: 'a->' + n) output('}\n\n') # end Format @@ -462,28 +555,314 @@ class Pattern(General): output('typedef ', self.base.base.struct_name(), ' arg_', self.name, ';\n') output(translate_scope, 'bool ', translate_prefix, '_', self.name, - '(DisasContext *ctx, arg_', self.name, - ' *a, ', insntype, ' insn);\n') + '(DisasContext *ctx, arg_', self.name, ' *a);\n') def output_code(self, i, extracted, outerbits, outermask): global translate_prefix ind = str_indent(i) arg = self.base.base.name - output(ind, '/* line ', str(self.lineno), ' */\n') + output(ind, '/* ', self.file, ':', str(self.lineno), ' */\n') + # We might have named references in the format that refer to fields + # in the pattern, or named references in the pattern that refer + # to fields in the format. This affects whether we extract the fields + # for the format before or after the ones for the pattern. + # For simplicity we don't allow cross references in both directions. + # This is also where we catch the syntax error of referring to + # a nonexistent field. + fmt_refs = self.base.dangling_references() + for r in fmt_refs: + if r not in self.fields: + error(self.lineno, f'format refers to undefined field {r}') + pat_refs = self.dangling_references() + for r in pat_refs: + if r not in self.base.fields: + error(self.lineno, f'pattern refers to undefined field {r}') + if pat_refs and fmt_refs: + error(self.lineno, ('pattern that uses fields defined in format ' + 'cannot use format that uses fields defined ' + 'in pattern')) + if fmt_refs: + # pattern fields first + self.output_fields(ind, lambda n: 'u.f_' + arg + '.' + n) + assert not extracted, "dangling fmt refs but it was already extracted" if not extracted: - output(ind, self.base.extract_name(), '(&u.f_', arg, ', insn);\n') - for n, f in self.fields.items(): - output(ind, 'u.f_', arg, '.', n, ' = ', f.str_extract(), ';\n') - output(ind, 'return ', translate_prefix, '_', self.name, - '(ctx, &u.f_', arg, ', insn);\n') + output(ind, self.base.extract_name(), + '(ctx, &u.f_', arg, ', insn);\n') + if not fmt_refs: + # pattern fields last + self.output_fields(ind, lambda n: 'u.f_' + arg + '.' + n) + + output(ind, 'if (', translate_prefix, '_', self.name, + '(ctx, &u.f_', arg, ')) return true;\n') + + # Normal patterns do not have children. + def build_tree(self): + return + def prop_masks(self): + return + def prop_format(self): + return + def prop_width(self): + return + # end Pattern +class MultiPattern(General): + """Class representing a set of instruction patterns""" + + def __init__(self, lineno): + self.file = input_file + self.lineno = lineno + self.pats = [] + self.base = None + self.fixedbits = 0 + self.fixedmask = 0 + self.undefmask = 0 + self.width = None + + def __str__(self): + r = 'group' + if self.fixedbits is not None: + r += ' ' + str_match_bits(self.fixedbits, self.fixedmask) + return r + + def output_decl(self): + for p in self.pats: + p.output_decl() + + def prop_masks(self): + global insnmask + + fixedmask = insnmask + undefmask = insnmask + + # Collect fixedmask/undefmask for all of the children. + for p in self.pats: + p.prop_masks() + fixedmask &= p.fixedmask + undefmask &= p.undefmask + + # Widen fixedmask until all fixedbits match + repeat = True + fixedbits = 0 + while repeat and fixedmask != 0: + fixedbits = None + for p in self.pats: + thisbits = p.fixedbits & fixedmask + if fixedbits is None: + fixedbits = thisbits + elif fixedbits != thisbits: + fixedmask &= ~(fixedbits ^ thisbits) + break + else: + repeat = False + + self.fixedbits = fixedbits + self.fixedmask = fixedmask + self.undefmask = undefmask + + def build_tree(self): + for p in self.pats: + p.build_tree() + + def prop_format(self): + for p in self.pats: + p.prop_format() + + def prop_width(self): + width = None + for p in self.pats: + p.prop_width() + if width is None: + width = p.width + elif width != p.width: + error_with_file(self.file, self.lineno, + 'width mismatch in patterns within braces') + self.width = width + +# end MultiPattern + + +class IncMultiPattern(MultiPattern): + """Class representing an overlapping set of instruction patterns""" + + def output_code(self, i, extracted, outerbits, outermask): + global translate_prefix + ind = str_indent(i) + for p in self.pats: + if outermask != p.fixedmask: + innermask = p.fixedmask & ~outermask + innerbits = p.fixedbits & ~outermask + output(ind, f'if ((insn & {whexC(innermask)}) == {whexC(innerbits)}) {{\n') + output(ind, f' /* {str_match_bits(p.fixedbits, p.fixedmask)} */\n') + p.output_code(i + 4, extracted, p.fixedbits, p.fixedmask) + output(ind, '}\n') + else: + p.output_code(i, extracted, p.fixedbits, p.fixedmask) + + def build_tree(self): + if not self.pats: + error_with_file(self.file, self.lineno, 'empty pattern group') + super().build_tree() + +#end IncMultiPattern + + +class Tree: + """Class representing a node in a decode tree""" + + def __init__(self, fm, tm): + self.fixedmask = fm + self.thismask = tm + self.subs = [] + self.base = None + + def str1(self, i): + ind = str_indent(i) + r = ind + whex(self.fixedmask) + if self.format: + r += ' ' + self.format.name + r += ' [\n' + for (b, s) in self.subs: + r += ind + f' {whex(b)}:\n' + r += s.str1(i + 4) + '\n' + r += ind + ']' + return r + + def __str__(self): + return self.str1(0) + + def output_code(self, i, extracted, outerbits, outermask): + ind = str_indent(i) + + # If we identified all nodes below have the same format, + # extract the fields now. But don't do it if the format relies + # on named fields from the insn pattern, as those won't have + # been initialised at this point. + if not extracted and self.base and not self.base.dangling_references(): + output(ind, self.base.extract_name(), + '(ctx, &u.f_', self.base.base.name, ', insn);\n') + extracted = True + + # Attempt to aid the compiler in producing compact switch statements. + # If the bits in the mask are contiguous, extract them. + sh = is_contiguous(self.thismask) + if sh > 0: + # Propagate SH down into the local functions. + def str_switch(b, sh=sh): + return f'(insn >> {sh}) & {b >> sh:#x}' + + def str_case(b, sh=sh): + return hex(b >> sh) + else: + def str_switch(b): + return f'insn & {whexC(b)}' + + def str_case(b): + return whexC(b) + + output(ind, 'switch (', str_switch(self.thismask), ') {\n') + for b, s in sorted(self.subs): + assert (self.thismask & ~s.fixedmask) == 0 + innermask = outermask | self.thismask + innerbits = outerbits | b + output(ind, 'case ', str_case(b), ':\n') + output(ind, ' /* ', + str_match_bits(innerbits, innermask), ' */\n') + s.output_code(i + 4, extracted, innerbits, innermask) + output(ind, ' break;\n') + output(ind, '}\n') +# end Tree + + +class ExcMultiPattern(MultiPattern): + """Class representing a non-overlapping set of instruction patterns""" + + def output_code(self, i, extracted, outerbits, outermask): + # Defer everything to our decomposed Tree node + self.tree.output_code(i, extracted, outerbits, outermask) + + @staticmethod + def __build_tree(pats, outerbits, outermask): + # Find the intersection of all remaining fixedmask. + innermask = ~outermask & insnmask + for i in pats: + innermask &= i.fixedmask + + if innermask == 0: + # Edge condition: One pattern covers the entire insnmask + if len(pats) == 1: + t = Tree(outermask, innermask) + t.subs.append((0, pats[0])) + return t + + text = 'overlapping patterns:' + for p in pats: + text += '\n' + p.file + ':' + str(p.lineno) + ': ' + str(p) + error_with_file(pats[0].file, pats[0].lineno, text) + + fullmask = outermask | innermask + + # Sort each element of pats into the bin selected by the mask. + bins = {} + for i in pats: + fb = i.fixedbits & innermask + if fb in bins: + bins[fb].append(i) + else: + bins[fb] = [i] + + # We must recurse if any bin has more than one element or if + # the single element in the bin has not been fully matched. + t = Tree(fullmask, innermask) + + for b, l in bins.items(): + s = l[0] + if len(l) > 1 or s.fixedmask & ~fullmask != 0: + s = ExcMultiPattern.__build_tree(l, b | outerbits, fullmask) + t.subs.append((b, s)) + + return t + + def build_tree(self): + super().build_tree() + self.tree = self.__build_tree(self.pats, self.fixedbits, + self.fixedmask) + + @staticmethod + def __prop_format(tree): + """Propagate Format objects into the decode tree""" + + # Depth first search. + for (b, s) in tree.subs: + if isinstance(s, Tree): + ExcMultiPattern.__prop_format(s) + + # If all entries in SUBS have the same format, then + # propagate that into the tree. + f = None + for (b, s) in tree.subs: + if f is None: + f = s.base + if f is None: + return + if f is not s.base: + return + tree.base = f + + def prop_format(self): + super().prop_format() + self.__prop_format(self.tree) + +# end ExcMultiPattern + + def parse_field(lineno, name, toks): """Parse one instruction field from TOKS at LINENO""" global fields - global re_ident global insnwidth + global re_C_ident # A "simple" field will have only one entry; # a "multifield" will have several. @@ -491,44 +870,69 @@ def parse_field(lineno, name, toks): width = 0 func = None for t in toks: - if re_fullmatch('!function=' + re_ident, t): + if re.match('^!function=', t): if func: error(lineno, 'duplicate function') func = t.split('=') func = func[1] continue - if re_fullmatch('[0-9]+:s[0-9]+', t): + if re.fullmatch(re_C_ident + ':s[0-9]+', t): + # Signed named field + subtoks = t.split(':') + n = subtoks[0] + le = int(subtoks[1]) + f = NamedField(n, True, le) + subs.append(f) + width += le + continue + if re.fullmatch(re_C_ident + ':[0-9]+', t): + # Unsigned named field + subtoks = t.split(':') + n = subtoks[0] + le = int(subtoks[1]) + f = NamedField(n, False, le) + subs.append(f) + width += le + continue + + if re.fullmatch('[0-9]+:s[0-9]+', t): # Signed field extract subtoks = t.split(':s') sign = True - elif re_fullmatch('[0-9]+:[0-9]+', t): + elif re.fullmatch('[0-9]+:[0-9]+', t): # Unsigned field extract subtoks = t.split(':') sign = False else: - error(lineno, 'invalid field token "{0}"'.format(t)) + error(lineno, f'invalid field token "{t}"') po = int(subtoks[0]) le = int(subtoks[1]) if po + le > insnwidth: - error(lineno, 'field {0} too large'.format(t)) + error(lineno, f'field {t} too large') f = Field(sign, po, le) subs.append(f) width += le if width > insnwidth: error(lineno, 'field too large') - if len(subs) == 1: - f = subs[0] + if len(subs) == 0: + if func: + f = ParameterField(func) + else: + error(lineno, 'field with no value') else: - mask = 0 - for s in subs: - if mask & s.mask: - error(lineno, 'field components overlap') - mask |= s.mask - f = MultiField(subs, mask) - if func: - f = FunctionField(func, f) + if len(subs) == 1: + f = subs[0] + else: + mask = 0 + for s in subs: + if mask & s.mask: + error(lineno, 'field components overlap') + mask |= s.mask + f = MultiField(subs, mask) + if func: + f = FunctionField(func, f) if name in fields: error(lineno, 'duplicate field', name) @@ -539,19 +943,31 @@ def parse_field(lineno, name, toks): def parse_arguments(lineno, name, toks): """Parse one argument set from TOKS at LINENO""" global arguments - global re_ident + global re_C_ident + global anyextern flds = [] - for t in toks: - if not re_fullmatch(re_ident, t): - error(lineno, 'invalid argument set token "{0}"'.format(t)) - if t in flds: - error(lineno, 'duplicate argument "{0}"'.format(t)) - flds.append(t) + types = [] + extern = False + for n in toks: + if re.fullmatch('!extern', n): + extern = True + anyextern = True + continue + if re.fullmatch(re_C_ident + ':' + re_C_ident, n): + (n, t) = n.split(':') + elif re.fullmatch(re_C_ident, n): + t = 'int' + else: + error(lineno, f'invalid argument set token "{n}"') + if n in flds: + error(lineno, f'duplicate argument "{n}"') + flds.append(n) + types.append(t) if name in arguments: error(lineno, 'duplicate argument set', name) - arguments[name] = Arguments(name, flds) + arguments[name] = Arguments(name, flds, types, extern) # end parse_arguments @@ -575,20 +991,22 @@ def add_field_byname(lineno, flds, new_name, old_name): def infer_argument_set(flds): global arguments + global decode_function for arg in arguments.values(): - if eq_fields_for_args(flds, arg.fields): + if eq_fields_for_args(flds, arg): return arg - name = str(len(arguments)) - arg = Arguments(name, flds.keys()) + name = decode_function + str(len(arguments)) + arg = Arguments(name, flds.keys(), ['int'] * len(flds), False) arguments[name] = arg return arg -def infer_format(arg, fieldmask, flds): +def infer_format(arg, fieldmask, flds, width): global arguments global formats + global decode_function const_flds = {} var_flds = {} @@ -604,30 +1022,38 @@ def infer_format(arg, fieldmask, flds): continue if fieldmask != fmt.fieldmask: continue + if width != fmt.width: + continue if not eq_fields_for_fmts(flds, fmt.fields): continue return (fmt, const_flds) - name = 'Fmt_' + str(len(formats)) + name = decode_function + '_Fmt_' + str(len(formats)) if not arg: arg = infer_argument_set(flds) - fmt = Format(name, 0, arg, 0, 0, 0, fieldmask, var_flds) + fmt = Format(name, 0, arg, 0, 0, 0, fieldmask, var_flds, width) formats[name] = fmt return (fmt, const_flds) # end infer_format -def parse_generic(lineno, is_format, name, toks): +def parse_generic(lineno, parent_pat, name, toks): """Parse one instruction format from TOKS at LINENO""" global fields global arguments global formats - global patterns - global re_ident + global allpatterns + global re_arg_ident + global re_fld_ident + global re_fmt_ident + global re_C_ident global insnwidth global insnmask + global variablewidth + + is_format = parent_pat is None fixedmask = 0 fixedbits = 0 @@ -637,8 +1063,8 @@ def parse_generic(lineno, is_format, name, toks): arg = None fmt = None for t in toks: - # '&Foo' gives a format an explcit argument set. - if t[0] == '&': + # '&Foo' gives a format an explicit argument set. + if re.fullmatch(re_arg_ident, t): tt = t[1:] if arg: error(lineno, 'multiple argument sets') @@ -649,7 +1075,7 @@ def parse_generic(lineno, is_format, name, toks): continue # '@Foo' gives a pattern an explicit format. - if t[0] == '@': + if re.fullmatch(re_fmt_ident, t): tt = t[1:] if fmt: error(lineno, 'multiple formats') @@ -660,19 +1086,19 @@ def parse_generic(lineno, is_format, name, toks): continue # '%Foo' imports a field. - if t[0] == '%': + if re.fullmatch(re_fld_ident, t): tt = t[1:] flds = add_field_byname(lineno, flds, tt, tt) continue # 'Foo=%Bar' imports a field with a different name. - if re_fullmatch(re_ident + '=%' + re_ident, t): + if re.fullmatch(re_C_ident + '=' + re_fld_ident, t): (fname, iname) = t.split('=%') flds = add_field_byname(lineno, flds, fname, iname) continue # 'Foo=number' sets an argument field to a constant value - if re_fullmatch(re_ident + '=[0-9]+', t): + if re.fullmatch(re_C_ident + '=[+-]?[0-9]+', t): (fname, value) = t.split('=') value = int(value) flds = add_field(lineno, flds, fname, ConstField(value)) @@ -680,7 +1106,7 @@ def parse_generic(lineno, is_format, name, toks): # Pattern of 0s, 1s, dots and dashes indicate required zeros, # required ones, or dont-cares. - if re_fullmatch('[01.-]+', t): + if re.fullmatch('[01.-]+', t): shift = len(t) fms = t.replace('0', '1') fms = fms.replace('.', '0') @@ -697,27 +1123,36 @@ def parse_generic(lineno, is_format, name, toks): fixedmask = (fixedmask << shift) | fms undefmask = (undefmask << shift) | ubm # Otherwise, fieldname:fieldwidth - elif re_fullmatch(re_ident + ':s?[0-9]+', t): + elif re.fullmatch(re_C_ident + ':s?[0-9]+', t): (fname, flen) = t.split(':') sign = False if flen[0] == 's': sign = True flen = flen[1:] shift = int(flen, 10) + if shift + width > insnwidth: + error(lineno, f'field {fname} exceeds insnwidth') f = Field(sign, insnwidth - width - shift, shift) flds = add_field(lineno, flds, fname, f) fixedbits <<= shift fixedmask <<= shift undefmask <<= shift else: - error(lineno, 'invalid token "{0}"'.format(t)) + error(lineno, f'invalid token "{t}"') width += shift + if variablewidth and width < insnwidth and width % 8 == 0: + shift = insnwidth - width + fixedbits <<= shift + fixedmask <<= shift + undefmask <<= shift + undefmask |= (1 << shift) - 1 + # We should have filled in all of the bits of the instruction. - if not (is_format and width == 0) and width != insnwidth: - error(lineno, 'definition has {0} bits'.format(width)) + elif not (is_format and width == 0) and width != insnwidth: + error(lineno, f'definition has {width} bits') - # Do not check for fields overlaping fields; one valid usage + # Do not check for fields overlapping fields; one valid usage # is to be able to duplicate fields via import. fieldmask = 0 for f in flds.values(): @@ -733,14 +1168,13 @@ def parse_generic(lineno, is_format, name, toks): if arg: for f in flds.keys(): if f not in arg.fields: - error(lineno, 'field {0} not in argument set {1}' - .format(f, arg.name)) + error(lineno, f'field {f} not in argument set {arg.name}') else: arg = infer_argument_set(flds) if name in formats: error(lineno, 'duplicate format name', name) fmt = Format(name, lineno, arg, fixedbits, fixedmask, - undefmask, fieldmask, flds) + undefmask, fieldmask, flds, width) formats[name] = fmt else: # Patterns can reference a format ... @@ -750,54 +1184,70 @@ def parse_generic(lineno, is_format, name, toks): error(lineno, 'pattern specifies both format and argument set') if fixedmask & fmt.fixedmask: error(lineno, 'pattern fixed bits overlap format fixed bits') + if width != fmt.width: + error(lineno, 'pattern uses format of different width') fieldmask |= fmt.fieldmask fixedbits |= fmt.fixedbits fixedmask |= fmt.fixedmask undefmask |= fmt.undefmask else: - (fmt, flds) = infer_format(arg, fieldmask, flds) + (fmt, flds) = infer_format(arg, fieldmask, flds, width) arg = fmt.base for f in flds.keys(): if f not in arg.fields: - error(lineno, 'field {0} not in argument set {1}' - .format(f, arg.name)) + error(lineno, f'field {f} not in argument set {arg.name}') if f in fmt.fields.keys(): - error(lineno, 'field {0} set by format and pattern'.format(f)) + error(lineno, f'field {f} set by format and pattern') for f in arg.fields: if f not in flds.keys() and f not in fmt.fields.keys(): - error(lineno, 'field {0} not initialized'.format(f)) + error(lineno, f'field {f} not initialized') pat = Pattern(name, lineno, fmt, fixedbits, fixedmask, - undefmask, fieldmask, flds) - patterns.append(pat) + undefmask, fieldmask, flds, width) + parent_pat.pats.append(pat) + allpatterns.append(pat) # Validate the masks that we have assembled. if fieldmask & fixedmask: - error(lineno, 'fieldmask overlaps fixedmask (0x{0:08x} & 0x{1:08x})' - .format(fieldmask, fixedmask)) + error(lineno, 'fieldmask overlaps fixedmask ', + f'({whex(fieldmask)} & {whex(fixedmask)})') if fieldmask & undefmask: - error(lineno, 'fieldmask overlaps undefmask (0x{0:08x} & 0x{1:08x})' - .format(fieldmask, undefmask)) + error(lineno, 'fieldmask overlaps undefmask ', + f'({whex(fieldmask)} & {whex(undefmask)})') if fixedmask & undefmask: - error(lineno, 'fixedmask overlaps undefmask (0x{0:08x} & 0x{1:08x})' - .format(fixedmask, undefmask)) + error(lineno, 'fixedmask overlaps undefmask ', + f'({whex(fixedmask)} & {whex(undefmask)})') if not is_format: allbits = fieldmask | fixedmask | undefmask if allbits != insnmask: - error(lineno, 'bits left unspecified (0x{0:08x})' - .format(allbits ^ insnmask)) + error(lineno, 'bits left unspecified ', + f'({whex(allbits ^ insnmask)})') # end parse_general -def parse_file(f): +def parse_file(f, parent_pat): """Parse all of the patterns within a file""" + global re_arg_ident + global re_fld_ident + global re_fmt_ident + global re_pat_ident # Read all of the lines of the file. Concatenate lines # ending in backslash; discard empty lines and comments. toks = [] lineno = 0 + nesting = 0 + nesting_pats = [] + for line in f: lineno += 1 + # Expand and strip spaces, to find indent. + line = line.rstrip() + line = line.expandtabs() + len1 = len(line) + line = line.lstrip() + len2 = len(line) + # Discard comments end = line.find('#') if end >= 0: @@ -807,10 +1257,18 @@ def parse_file(f): if len(toks) != 0: # Next line after continuation toks.extend(t) - elif len(t) == 0: - # Empty line - continue else: + # Allow completely blank lines. + if len1 == 0: + continue + indent = len1 - len2 + # Empty line due to comment. + if len(t) == 0: + # Indentation must be correct, even for comment lines. + if indent != nesting: + error(lineno, 'indentation ', indent, ' != ', nesting) + continue + start_lineno = lineno toks = t # Continuation? @@ -818,42 +1276,83 @@ def parse_file(f): toks.pop() continue - if len(toks) < 2: - error(lineno, 'short line') - name = toks[0] del toks[0] + # End nesting? + if name == '}' or name == ']': + if len(toks) != 0: + error(start_lineno, 'extra tokens after close brace') + + # Make sure { } and [ ] nest properly. + if (name == '}') != isinstance(parent_pat, IncMultiPattern): + error(lineno, 'mismatched close brace') + + try: + parent_pat = nesting_pats.pop() + except: + error(lineno, 'extra close brace') + + nesting -= 2 + if indent != nesting: + error(lineno, 'indentation ', indent, ' != ', nesting) + + toks = [] + continue + + # Everything else should have current indentation. + if indent != nesting: + error(start_lineno, 'indentation ', indent, ' != ', nesting) + + # Start nesting? + if name == '{' or name == '[': + if len(toks) != 0: + error(start_lineno, 'extra tokens after open brace') + + if name == '{': + nested_pat = IncMultiPattern(start_lineno) + else: + nested_pat = ExcMultiPattern(start_lineno) + parent_pat.pats.append(nested_pat) + nesting_pats.append(parent_pat) + parent_pat = nested_pat + + nesting += 2 + toks = [] + continue + # Determine the type of object needing to be parsed. - if name[0] == '%': - parse_field(lineno, name[1:], toks) - elif name[0] == '&': - parse_arguments(lineno, name[1:], toks) - elif name[0] == '@': - parse_generic(lineno, True, name[1:], toks) + if re.fullmatch(re_fld_ident, name): + parse_field(start_lineno, name[1:], toks) + elif re.fullmatch(re_arg_ident, name): + parse_arguments(start_lineno, name[1:], toks) + elif re.fullmatch(re_fmt_ident, name): + parse_generic(start_lineno, None, name[1:], toks) + elif re.fullmatch(re_pat_ident, name): + parse_generic(start_lineno, parent_pat, name, toks) else: - parse_generic(lineno, False, name, toks) + error(lineno, f'invalid token "{name}"') toks = [] + + if nesting != 0: + error(lineno, 'missing close brace') # end parse_file -class Tree: - """Class representing a node in a decode tree""" +class SizeTree: + """Class representing a node in a size decode tree""" - def __init__(self, fm, tm): - self.fixedmask = fm - self.thismask = tm + def __init__(self, m, w): + self.mask = m self.subs = [] self.base = None + self.width = w def str1(self, i): ind = str_indent(i) - r = '{0}{1:08x}'.format(ind, self.fixedmask) - if self.format: - r += ' ' + self.format.name - r += ' [\n' + r = ind + whex(self.mask) + ' [\n' for (b, s) in self.subs: - r += '{0} {1:08x}:\n'.format(ind, b) + r += ind + f' {whex(b)}:\n' r += s.str1(i + 4) + '\n' r += ind + ']' return r @@ -864,59 +1363,97 @@ class Tree: def output_code(self, i, extracted, outerbits, outermask): ind = str_indent(i) - # If we identified all nodes below have the same format, - # extract the fields now. - if not extracted and self.base: - output(ind, self.base.extract_name(), - '(&u.f_', self.base.base.name, ', insn);\n') - extracted = True + # If we need to load more bytes to test, do so now. + if extracted < self.width: + output(ind, f'insn = {decode_function}_load_bytes', + f'(ctx, insn, {extracted // 8}, {self.width // 8});\n') + extracted = self.width # Attempt to aid the compiler in producing compact switch statements. # If the bits in the mask are contiguous, extract them. - sh = is_contiguous(self.thismask) + sh = is_contiguous(self.mask) if sh > 0: # Propagate SH down into the local functions. def str_switch(b, sh=sh): - return '(insn >> {0}) & 0x{1:x}'.format(sh, b >> sh) + return f'(insn >> {sh}) & {b >> sh:#x}' def str_case(b, sh=sh): - return '0x{0:x}'.format(b >> sh) + return hex(b >> sh) else: def str_switch(b): - return 'insn & 0x{0:08x}'.format(b) + return f'insn & {whexC(b)}' def str_case(b): - return '0x{0:08x}'.format(b) + return whexC(b) - output(ind, 'switch (', str_switch(self.thismask), ') {\n') + output(ind, 'switch (', str_switch(self.mask), ') {\n') for b, s in sorted(self.subs): - assert (self.thismask & ~s.fixedmask) == 0 - innermask = outermask | self.thismask + innermask = outermask | self.mask innerbits = outerbits | b output(ind, 'case ', str_case(b), ':\n') output(ind, ' /* ', str_match_bits(innerbits, innermask), ' */\n') s.output_code(i + 4, extracted, innerbits, innermask) output(ind, '}\n') - output(ind, 'return false;\n') -# end Tree + output(ind, 'return insn;\n') +# end SizeTree + +class SizeLeaf: + """Class representing a leaf node in a size decode tree""" + def __init__(self, m, w): + self.mask = m + self.width = w -def build_tree(pats, outerbits, outermask): - # Find the intersection of all remaining fixedmask. - innermask = ~outermask + def str1(self, i): + return str_indent(i) + whex(self.mask) + + def __str__(self): + return self.str1(0) + + def output_code(self, i, extracted, outerbits, outermask): + global decode_function + ind = str_indent(i) + + # If we need to load more bytes, do so now. + if extracted < self.width: + output(ind, f'insn = {decode_function}_load_bytes', + f'(ctx, insn, {extracted // 8}, {self.width // 8});\n') + extracted = self.width + output(ind, 'return insn;\n') +# end SizeLeaf + + +def build_size_tree(pats, width, outerbits, outermask): + global insnwidth + + # Collect the mask of bits that are fixed in this width + innermask = 0xff << (insnwidth - width) + innermask &= ~outermask + minwidth = None + onewidth = True for i in pats: innermask &= i.fixedmask + if minwidth is None: + minwidth = i.width + elif minwidth != i.width: + onewidth = False; + if minwidth < i.width: + minwidth = i.width + + if onewidth: + return SizeLeaf(innermask, minwidth) if innermask == 0: + if width < minwidth: + return build_size_tree(pats, width + 8, outerbits, outermask) + pnames = [] for p in pats: - pnames.append(p.name + ':' + str(p.lineno)) - error(pats[0].lineno, 'overlapping patterns:', pnames) + pnames.append(p.name + ':' + p.file + ':' + str(p.lineno)) + error_with_file(pats[0].file, pats[0].lineno, + f'overlapping patterns size {width}:', pnames) - fullmask = outermask | innermask - - # Sort each element of pats into the bin selected by the mask. bins = {} for i in pats: fb = i.fixedbits & innermask @@ -925,61 +1462,63 @@ def build_tree(pats, outerbits, outermask): else: bins[fb] = [i] - # We must recurse if any bin has more than one element or if - # the single element in the bin has not been fully matched. - t = Tree(fullmask, innermask) + fullmask = outermask | innermask + lens = sorted(bins.keys()) + if len(lens) == 1: + b = lens[0] + return build_size_tree(bins[b], width + 8, b | outerbits, fullmask) + r = SizeTree(innermask, width) for b, l in bins.items(): - s = l[0] - if len(l) > 1 or s.fixedmask & ~fullmask != 0: - s = build_tree(l, b | outerbits, fullmask) - t.subs.append((b, s)) - - return t -# end build_tree - + s = build_size_tree(l, width, b | outerbits, fullmask) + r.subs.append((b, s)) + return r +# end build_size_tree -def prop_format(tree): - """Propagate Format objects into the decode tree""" - # Depth first search. - for (b, s) in tree.subs: - if isinstance(s, Tree): - prop_format(s) +def prop_size(tree): + """Propagate minimum widths up the decode size tree""" - # If all entries in SUBS have the same format, then - # propagate that into the tree. - f = None - for (b, s) in tree.subs: - if f is None: - f = s.base - if f is None: - return - if f is not s.base: - return - tree.base = f -# end prop_format + if isinstance(tree, SizeTree): + min = None + for (b, s) in tree.subs: + width = prop_size(s) + if min is None or min > width: + min = width + assert min >= tree.width + tree.width = min + else: + min = tree.width + return min +# end prop_size def main(): global arguments global formats - global patterns + global allpatterns global translate_scope global translate_prefix global output_fd global output_file + global output_null global input_file global insnwidth global insntype global insnmask + global decode_function + global bitop_width + global variablewidth + global anyextern + global testforerror - decode_function = 'decode' decode_scope = 'static ' - long_opts = ['decode=', 'translate=', 'output=', 'insnwidth='] + long_opts = ['decode=', 'translate=', 'output=', 'insnwidth=', + 'static-decode=', 'varinsnwidth=', 'test-for-error', + 'output-null'] try: - (opts, args) = getopt.getopt(sys.argv[1:], 'o:w:', long_opts) + (opts, args) = getopt.gnu_getopt(sys.argv[1:], 'o:vw:', long_opts) except getopt.GetoptError as err: error(0, err) for o, a in opts: @@ -988,33 +1527,65 @@ def main(): elif o == '--decode': decode_function = a decode_scope = '' + elif o == '--static-decode': + decode_function = a elif o == '--translate': translate_prefix = a translate_scope = '' - elif o in ('-w', '--insnwidth'): + elif o in ('-w', '--insnwidth', '--varinsnwidth'): + if o == '--varinsnwidth': + variablewidth = True insnwidth = int(a) if insnwidth == 16: insntype = 'uint16_t' insnmask = 0xffff + elif insnwidth == 64: + insntype = 'uint64_t' + insnmask = 0xffffffffffffffff + bitop_width = 64 elif insnwidth != 32: error(0, 'cannot handle insns of width', insnwidth) + elif o == '--test-for-error': + testforerror = True + elif o == '--output-null': + output_null = True else: assert False, 'unhandled option' if len(args) < 1: error(0, 'missing input file') - input_file = args[0] - f = open(input_file, 'r') - parse_file(f) - f.close() - - t = build_tree(patterns, 0, 0) - prop_format(t) - if output_file: - output_fd = open(output_file, 'w') + toppat = ExcMultiPattern(0) + + for filename in args: + input_file = filename + f = open(filename, 'rt', encoding='utf-8') + parse_file(f, toppat) + f.close() + + # We do not want to compute masks for toppat, because those masks + # are used as a starting point for build_tree. For toppat, we must + # insist that decode begins from naught. + for i in toppat.pats: + i.prop_masks() + + toppat.build_tree() + toppat.prop_format() + + if variablewidth: + for i in toppat.pats: + i.prop_width() + stree = build_size_tree(toppat.pats, 8, 0, 0) + prop_size(stree) + + if output_null: + output_fd = open(os.devnull, 'wt', encoding='utf-8', errors="ignore") + elif output_file: + output_fd = open(output_file, 'wt', encoding='utf-8') else: - output_fd = sys.stdout + output_fd = io.TextIOWrapper(sys.stdout.buffer, + encoding=sys.stdout.encoding, + errors="ignore") output_autogen() for n in sorted(arguments.keys()): @@ -1024,8 +1595,19 @@ def main(): # A single translate function can be invoked for different patterns. # Make sure that the argument sets are the same, and declare the # function only once. + # + # If we're sharing formats, we're likely also sharing trans_* functions, + # but we can't tell which ones. Prevent issues from the compiler by + # suppressing redundant declaration warnings. + if anyextern: + output("#pragma GCC diagnostic push\n", + "#pragma GCC diagnostic ignored \"-Wredundant-decls\"\n", + "#ifdef __clang__\n" + "# pragma GCC diagnostic ignored \"-Wtypedef-redefinition\"\n", + "#endif\n\n") + out_pats = {} - for i in patterns: + for i in allpatterns: if i.name in out_pats: p = out_pats[i.name] if i.base.base != p.base.base: @@ -1035,6 +1617,9 @@ def main(): out_pats[i.name] = i output('\n') + if anyextern: + output("#pragma GCC diagnostic pop\n\n") + for n in sorted(formats.keys()): f = formats[n] f.output_extract() @@ -1043,18 +1628,28 @@ def main(): '(DisasContext *ctx, ', insntype, ' insn)\n{\n') i4 = str_indent(4) - output(i4, 'union {\n') - for n in sorted(arguments.keys()): - f = arguments[n] - output(i4, i4, f.struct_name(), ' f_', f.name, ';\n') - output(i4, '} u;\n\n') - t.output_code(4, False, 0, 0) + if len(allpatterns) != 0: + output(i4, 'union {\n') + for n in sorted(arguments.keys()): + f = arguments[n] + output(i4, i4, f.struct_name(), ' f_', f.name, ';\n') + output(i4, '} u;\n\n') + toppat.output_code(4, False, 0, 0) + output(i4, 'return false;\n') output('}\n') + if variablewidth: + output('\n', decode_scope, insntype, ' ', decode_function, + '_load(DisasContext *ctx)\n{\n', + ' ', insntype, ' insn = 0;\n\n') + stree.output_code(4, 0, 0, 0) + output('}\n') + if output_file: output_fd.close() + exit(1 if testforerror else 0) # end main |