Blame - tools/test_generator/parser.py - arm/vixl.git

blob: 65b76dfa46bce97fa5417d2a844ac8212d8d3a82 [file] [log] [blame]

Alexandre Rames	b78f139	2016-07-01 14:22:22 +0100	[diff] [blame]	1	# Copyright 2016, VIXL authors
Pierre Langlois	88c46b8	2016-06-02 18:15:32 +0100	[diff] [blame]	2	# All rights reserved.
				3	#
				4	# Redistribution and use in source and binary forms, with or without
				5	# modification, are permitted provided that the following conditions are met:
				6	#
				7	# * Redistributions of source code must retain the above copyright notice,
				8	# this list of conditions and the following disclaimer.
				9	# * Redistributions in binary form must reproduce the above copyright notice,
				10	# this list of conditions and the following disclaimer in the documentation
				11	# and/or other materials provided with the distribution.
				12	# * Neither the name of ARM Limited nor the names of its contributors may be
				13	# used to endorse or promote products derived from this software without
				14	# specific prior written permission.
				15	#
				16	# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
				17	# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
				18	# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
				19	# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
				20	# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
				21	# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
				22	# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
				23	# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
				24	# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
				25	# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
				26
				27	import json
				28	import re
				29	import os
				30	import hashlib
				31	import collections
				32	import itertools
				33
				34	from test_generator import data_types
				35	from test_generator import generator
				36
				37	class DataTypeBuilder(object):
				38	"""
				39	Factory object for building `data_types.Operand` and `data_types.Input`
				40	objects. This object stores information about all operand and input types
				41	described in JSON as dictionnaries indexed by their identifier. See
				42	`test/a32/config/data-types.json` as a reference.
				43
				44	Attributes:
				45	operand_types Dictionnary of type names corresponding to the JSON
				46	"type" field.
				47	operand_variants Dictionnary of (variants, default) tuples.
				48
				49	input_types Dictionnary of type names corresponding to the JSON
				50	"type" field.
				51	input_values Dictionnary of (values, default) tuples.
				52	"""
				53
				54	def __init__(self, operand_types, operand_variants, input_types,
				55	input_values):
				56	self.operand_types = operand_types
				57	self.operand_variants = operand_variants
				58	self.input_types = input_types
				59	self.input_values = input_values
				60
				61	def BuildOperand(self, name, identifier):
				62	"""
				63	Build a `data_types.Operand` object with the name `name`. `identifier`
				64	identifies which type we want to create, as declared in JSON.
				65	"""
				66	type_name = self.operand_types[identifier]
				67	variants, default = self.operand_variants[identifier]
				68	# We simply pass the `type_name` as a parameter which will be used verbatim
				69	# in the code.
				70	return data_types.Operand(name, type_name, variants, default)
				71
				72	def BuildInput(self, name, identifier):
				73	"""
				74	Build a `data_types.Input` object with the name `name`. `identifier`
				75	identifies which type we want to create, as declared in JSON.
				76	"""
				77	type_name = self.input_types[identifier]
				78	values, default = self.input_values[identifier]
				79	# For `data_types.Input` types, the `type_name` refers to the actual name of
				80	# the Python class, inheriting from `Input`. This is done so that different
				81	# input types can generate different C++ code by overriding the `Load` and
				82	# `Store` methods.
				83	input_constructor = getattr(data_types, type_name)
				84	return input_constructor(name, values, default)
				85
				86
				87	def LoadJSON(filename):
				88	"""
				89	Read `filename`, strip its comments and load as JSON.
				90	"""
				91	with open(filename, "r") as f:
				92	match_cpp_comments = re.compile("//.*\n")
				93	# The order in which structures are described in JSON matters as we use them
				94	# as a seed. Computing a hash from a unordered dict always gives a different
				95	# value. We use the `object_pairs_hook` to make the json module create
				96	# `OrderedDict` objects instead of builtin `dict` objects.
				97	return json.loads(match_cpp_comments.sub("", f.read()),
				98	object_pairs_hook=collections.OrderedDict)
				99
				100
				101	def ParseDataTypes(json_data_types):
				102	"""
				103	Build a `DataTypeBuilder` object containing all information from the JSON
				104	description in `json_data_types`.
				105
				106	~~~
				107	{
				108	"operands": [
				109	{
				110	"identifier": "AllRegistersButPC"
				111	"type": "Register"
				112	"variants": [
				113	"r0",
				114	"r1",
				115	"r2",
				116	"r3"
				117	]
				118	"default": "r0"
				119	},
				120	{
				121	...
				122	}
				123	],
				124	"inputs": [
				125	{
				126	"identifier": "Register"
				127	"type": "Register"
				128	"values": [
				129	"0x00000000",
				130	"0xffffffff",
				131	"0xabababab"
				132	]
				133	"default": "0xabababab"
				134	},
				135	{
				136	...
				137	}
				138	]
				139	}
				140	~~~
				141	"""
				142	operand_types = {
				143	json_operand_type["identifier"]: json_operand_type["type"]
				144	for json_operand_type in json_data_types["operands"]
				145	}
				146	operand_variants = {
				147	json_operand_type["identifier"]:
				148	(json_operand_type["variants"], json_operand_type["default"])
				149	for json_operand_type in json_data_types["operands"]
				150	}
				151	input_types = {
				152	json_input_type["identifier"]: json_input_type["type"]
				153	for json_input_type in json_data_types["inputs"]
				154	}
				155	input_values = {
				156	json_input_type["identifier"]:
				157	(json_input_type["values"], json_input_type["default"])
				158	for json_input_type in json_data_types["inputs"]
				159	}
				160	return DataTypeBuilder(operand_types, operand_variants, input_types, input_values)
				161
				162
				163	def ParseDescription(data_type_builder, json_description):
				164	"""
				165	Parse the instruction description into a
				166	(`generator.OperandList`, `generator.InputList`) tuple.
				167
Josh Soref	b43d6ef	2022-08-03 12:47:14 -0400	[diff] [blame^]	168	Example for an instruction that takes a condition code, two registers and an
Pierre Langlois	88c46b8	2016-06-02 18:15:32 +0100	[diff] [blame]	169	immediate as operand. It will also need inputs for the registers, as well as
				170	NZCV flags.
				171	~~~
				172	{
				173	"operands": [
				174	{
				175	"name": "cond",
				176	"type": "Condition",
				177	},
				178	{
				179	"name": "rd",
				180	"type": "RegisterScratch",
				181	},
				182	{
				183	"name": "rn",
				184	"type": "RegisterScratch",
				185	},
				186	// The last operand needs to be wrapped into a C++ `Operand` object. We
				187	// declare the operands that need to be wrapped as a list.
				188	{
				189	"name": "op",
				190	"wrapper": "Operand",
				191	"operands": [
				192	{
				193	"name": "immediate",
				194	"type": "ModifiedImmediate",
				195	}
				196	]
				197	}
				198	],
				199	"inputs": [
				200	{
				201	"name": "apsr",
				202	"type": "NZCV"
				203	},
				204	{
				205	"name": "rd",
				206	"type": "Register"
				207	},
				208	{
				209	"name": "rn",
				210	"type": "Register"
				211	}
				212	]
				213	]
				214	~~~
				215	"""
				216
				217	operands = []
				218	for json_operand in json_description["operands"]:
				219	if "name" in json_operand and "type" in json_operand:
				220	operands.append(data_type_builder.BuildOperand(json_operand["name"],
				221	json_operand["type"]))
				222	elif "name" in json_operand and \
				223	"wrapper" in json_operand and \
				224	"operands" in json_operand:
				225	wrapped_operands = [
				226	data_type_builder.BuildOperand(json_wrapped_operand["name"],
				227	json_wrapped_operand["type"])
				228	for json_wrapped_operand in json_operand["operands"]
				229	]
				230	operands.append(data_types.OperandWrapper(json_operand["name"],
				231	json_operand["wrapper"],
				232	wrapped_operands))
				233	else:
				234	raise Exception("Parser failed to recognize JSON \"description\".")
				235	operand_list = generator.OperandList(operands)
				236
				237	json_description_inputs = json_description["inputs"]
				238	input_list = generator.InputList([
				239	data_type_builder.BuildInput(json_input["name"], json_input["type"])
				240	for json_input in json_description_inputs
				241	])
				242
				243	return operand_list, input_list
				244
				245
				246	def ParseTestCase(json_test_case):
				247	"""
				248	Build a `generator.TestCase` object from its JSON description.
				249
				250	~~~
				251	{
				252	"name": "RdIsNotRn",
				253	"operands": [
				254	"rd", "rn"
				255	],
				256	"inputs": [
				257	"rd", "rn"
				258	],
				259	"operand-filter": "rd != rn", // Python code to limit operand generation.
				260	"operand-limit": 10 // Choose a random sample of 10 operands.
				261	}
				262	...
				263	{
				264	"name": "Flags",
				265	"operands": [
				266	"cond"
				267	],
				268	"inputs": [
				269	"apsr", "q"
				270	],
				271	"input-filter": "q == \"QFlag\"", // Python code to limit input generation
				272	"input-limit": 200 // Choose a random sample of 200 inputs.
				273	}
				274	...
				275	{
				276	"name": "InITBlock",
				277	"operands": [
				278	"cond", "rd", "rn", "rm"
				279	],
Pierre Langlois	5b0cbc8	2016-09-26 14:00:30 +0100	[diff] [blame]	280	"in-it-block": "{cond}", // Generate an extra IT instruction. This string
				281	// will be used as the operand passed to IT. One
				282	// needs to specify under what name the condition
				283	// operand is represented, in braces.
Pierre Langlois	88c46b8	2016-06-02 18:15:32 +0100	[diff] [blame]	284	"operand-filter": "cond != 'al' and rd == rm"
				285	}
				286	~~~
				287	"""
				288
				289	# TODO: The fields in "operands" and "inputs" respectively refer to operands
				290	# and inputs declared in the instruction description (see `ParseDescription`).
				291	# We should assert that the user hasn't miss typed them and raise an
				292	# exception.
				293
				294	# If the fields are not present, give them default values (empty list,
				295	# "True", or "None").
				296	operand_names = json_test_case["operands"] \
				297	if "operands" in json_test_case else []
				298	input_names = json_test_case["inputs"] if "inputs" in json_test_case else []
				299	operand_filter = json_test_case["operand-filter"] \
				300	if "operand-filter" in json_test_case else "True"
				301	input_filter = json_test_case["input-filter"] \
				302	if "input-filter" in json_test_case else "True"
				303	operand_limit = json_test_case["operand-limit"] \
				304	if "operand-limit" in json_test_case else None
				305	input_limit = json_test_case["input-limit"] \
				306	if "input-limit" in json_test_case else None
Pierre Langlois	5b0cbc8	2016-09-26 14:00:30 +0100	[diff] [blame]	307	in_it_block = json_test_case["in-it-block"] \
				308	if "in-it-block" in json_test_case else None
Pierre Langlois	88c46b8	2016-06-02 18:15:32 +0100	[diff] [blame]	309
				310	# Create a seed from the test case description. It will only change if the
				311	# test case has changed.
				312	md5 = hashlib.md5(str(json_test_case).encode())
				313	seed = md5.hexdigest()
				314
				315	return generator.TestCase(json_test_case["name"], seed, operand_names, input_names,
				316	operand_filter, input_filter, operand_limit,
Pierre Langlois	5b0cbc8	2016-09-26 14:00:30 +0100	[diff] [blame]	317	input_limit, in_it_block)
Pierre Langlois	88c46b8	2016-06-02 18:15:32 +0100	[diff] [blame]	318
				319
				320	def ParseTestFile(test_name, test_isa, mnemonics, operand_list, input_list,
				321	json_test_file):
				322	"""
				323	Build a `generator.Generator` object from a test file description. We have one
				324	for each generated test files.
				325
				326	~~~
				327	{
				328	"type": "simulator", // Type of the test. This will control the prefix we
				329	// use when naming the file to generate.
				330	"name": "special-case", // Optional name that will be included in the
				331	// generated filename.
				332	"mnemonics": [ // Optional list of instruction, overriding the top-level
				333	"Adc", // one.
				334	"Add",
				335	...
				336	],
				337	"test-cases": [
				338	... // Test case descriptions parsed with `ParseTestCase`.
				339	]
				340	}
				341	~~~
				342	"""
				343	name = json_test_file["name"] if "name" in json_test_file else ""
				344	if name is not "":
				345	test_name = test_name + "-" + name
				346	# Override the top-level mnemonics list with a subset.
				347	if "mnemonics" in json_test_file:
				348	if set(json_test_file["mnemonics"]) == set(mnemonics):
				349	raise Exception(
				350	"Overriding mnemonic list is identical to the top-level list")
				351	if not(set(json_test_file["mnemonics"]) < set(mnemonics)):
				352	raise Exception(
				353	"Overriding mnemonic list should a subset of the top-level list")
				354	mnemonics = json_test_file["mnemonics"]
				355	test_cases = [
				356	ParseTestCase(json_test_case)
				357	for json_test_case in json_test_file["test-cases"]
				358	]
				359	return generator.Generator(test_name, test_isa, json_test_file["type"],
				360	mnemonics, operand_list, input_list, test_cases)
				361
				362
Pierre Langlois	d1bf278	2016-09-27 15:05:07 +0100	[diff] [blame]	363	def ParseConfig(test_name, test_isas, data_type_builder, json_config):
Pierre Langlois	88c46b8	2016-06-02 18:15:32 +0100	[diff] [blame]	364	"""
				365	Return a list of `generator.Generator` objects from a JSON description. This
				366	is the top-level description.
				367
				368	~~~
				369	{
				370	"mnemonics": [
				371	"Adc",
				372	"Add",
				373	...
				374	],
				375	"description": [
				376	... // Instruction description parsed with `ParseDescription`.
				377	],
				378	"test-files": [
				379	... // Test files descriptions parsed with `ParseTestFile`.
				380	]
				381	}
				382	~~~
				383	"""
				384	mnemonics = json_config["mnemonics"]
				385	operand_list, input_list = ParseDescription(
				386	data_type_builder, json_config["description"])
				387
Pierre Langlois	d1bf278	2016-09-27 15:05:07 +0100	[diff] [blame]	388	return itertools.chain(*[[
				389	ParseTestFile(test_name, test_isa, mnemonics, operand_list,
				390	input_list, json_test_file)
				391	for json_test_file in json_config["test-files"]
				392	]
				393	for test_isa in test_isas
				394	])
				395
				396
				397	def GetTestNameAndISAFromFileName(filename):
				398	"""
				399	Return a tuple (name, [isa, ...]) extracted from the file name.
				400	"""
				401	# Strip the ".json" extension
				402	stripped_basename = os.path.splitext(os.path.basename(filename))[0]
Josh Soref	b43d6ef	2022-08-03 12:47:14 -0400	[diff] [blame^]	403	# The ISA is the last element in the filename, separated with "-".
Pierre Langlois	d1bf278	2016-09-27 15:05:07 +0100	[diff] [blame]	404	if stripped_basename.endswith(('-a32', '-t32')):
				405	isa = [stripped_basename[-3:]]
				406	test_name = stripped_basename[:-4]
				407	else:
Josh Soref	b43d6ef	2022-08-03 12:47:14 -0400	[diff] [blame^]	408	# If the ISA is omitted, support both.
Pierre Langlois	d1bf278	2016-09-27 15:05:07 +0100	[diff] [blame]	409	isa = ["a32", "t32"]
				410	test_name = stripped_basename
				411
				412	return (test_name, isa)
Pierre Langlois	88c46b8	2016-06-02 18:15:32 +0100	[diff] [blame]	413
				414
				415	def GetTestNameFromFileName(filename):
				416	"""
Pierre Langlois	d1bf278	2016-09-27 15:05:07 +0100	[diff] [blame]	417	Return the name given to this test from its file name, stripped of the
				418	optional "a32" or "t32" at the end.
Pierre Langlois	88c46b8	2016-06-02 18:15:32 +0100	[diff] [blame]	419	"""
Pierre Langlois	d1bf278	2016-09-27 15:05:07 +0100	[diff] [blame]	420	test_name, _ = GetTestNameAndISAFromFileName(filename)
				421	return test_name
Pierre Langlois	88c46b8	2016-06-02 18:15:32 +0100	[diff] [blame]	422
				423
Pierre Langlois	d1bf278	2016-09-27 15:05:07 +0100	[diff] [blame]	424	def GetISAsFromFileName(filename):
Pierre Langlois	88c46b8	2016-06-02 18:15:32 +0100	[diff] [blame]	425	"""
Pierre Langlois	d1bf278	2016-09-27 15:05:07 +0100	[diff] [blame]	426	Return a list of ISAs supported by the test, from the file name, either
				427	["a32"], ["t32"] or both.
Pierre Langlois	88c46b8	2016-06-02 18:15:32 +0100	[diff] [blame]	428	"""
Pierre Langlois	d1bf278	2016-09-27 15:05:07 +0100	[diff] [blame]	429	_, isas = GetTestNameAndISAFromFileName(filename)
Pierre Langlois	88c46b8	2016-06-02 18:15:32 +0100	[diff] [blame]	430
Pierre Langlois	d1bf278	2016-09-27 15:05:07 +0100	[diff] [blame]	431	return isas
Pierre Langlois	88c46b8	2016-06-02 18:15:32 +0100	[diff] [blame]	432
				433	def Parse(data_type_file, config_files):
				434	"""
				435	Parse the `data_type_file` and `test_case_files` json description files into a
				436	list of (name, test_case) tuples. Test cases are `generator.TestCase`
				437	objects that can be used to generate C++.
				438	"""
				439
				440	# Create a `DataTypeBuilder` object. This object will passed down and used to
				441	# instantiate `data_types.Operand` and `data_types.Input` objects.
				442	data_type_builder = ParseDataTypes(LoadJSON(data_type_file))
				443
				444	# Build a list of (name, JSON) tuples to represent the new tests.
				445	json_configs = [
				446	# We create the name of the test by looking at the file name stripped of
				447	# its extension.
Pierre Langlois	d1bf278	2016-09-27 15:05:07 +0100	[diff] [blame]	448	(GetTestNameFromFileName(config_file), GetISAsFromFileName(config_file),
Pierre Langlois	88c46b8	2016-06-02 18:15:32 +0100	[diff] [blame]	449	LoadJSON(config_file))
				450	for config_file in config_files
				451	]
				452
				453	# Return a list of Generator objects. The generator is the entry point to
				454	# generate a file.
				455	# Note that `ParseConfig` returns a list of generators already. We use `chain`
				456	# here to flatten a list of lists into just a list.
				457	return itertools.chain(*[
Pierre Langlois	d1bf278	2016-09-27 15:05:07 +0100	[diff] [blame]	458	ParseConfig(test_name, test_isas, data_type_builder, json_config)
				459	for test_name, test_isas, json_config in json_configs
Pierre Langlois	88c46b8	2016-06-02 18:15:32 +0100	[diff] [blame]	460	])