Pierre Langlois | 88c46b8 | 2016-06-02 18:15:32 +0100 | [diff] [blame] | 1 | #!/usr/bin/env python3 |
| 2 | |
Alexandre Rames | b78f139 | 2016-07-01 14:22:22 +0100 | [diff] [blame] | 3 | # Copyright 2016, VIXL authors |
Pierre Langlois | 88c46b8 | 2016-06-02 18:15:32 +0100 | [diff] [blame] | 4 | # All rights reserved. |
| 5 | # |
| 6 | # Redistribution and use in source and binary forms, with or without |
| 7 | # modification, are permitted provided that the following conditions are met: |
| 8 | # |
| 9 | # * Redistributions of source code must retain the above copyright notice, |
| 10 | # this list of conditions and the following disclaimer. |
| 11 | # * Redistributions in binary form must reproduce the above copyright notice, |
| 12 | # this list of conditions and the following disclaimer in the documentation |
| 13 | # and/or other materials provided with the distribution. |
| 14 | # * Neither the name of ARM Limited nor the names of its contributors may be |
| 15 | # used to endorse or promote products derived from this software without |
| 16 | # specific prior written permission. |
| 17 | # |
| 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND |
| 19 | # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
| 20 | # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
| 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE |
| 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
| 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
| 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
| 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
| 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 28 | |
| 29 | """ |
| 30 | Verify generated AArch32 assembler traces against `llvm-mc`. |
| 31 | |
Alexandre Rames | d383296 | 2016-07-04 15:03:43 +0100 | [diff] [blame] | 32 | This script will find all files in `test/aarch32/traces/` with names starting |
| 33 | will `assembler`, and check them against `llvm-mc`. It checks our assembler is |
Josh Soref | b43d6ef | 2022-08-03 12:47:14 -0400 | [diff] [blame] | 34 | correct by looking up what instruction we meant to assemble, assemble it with |
Pierre Langlois | 88c46b8 | 2016-06-02 18:15:32 +0100 | [diff] [blame] | 35 | `llvm` and check the result is bit identical to what our assembler generated. |
| 36 | |
| 37 | You may run the script with no arguments from VIXL's top-level directory as long |
| 38 | as `llvm-mc` is in your PATH. You may provide a different `llvm-mc` path with |
| 39 | the `--llvm-mc` option. This script relies on version 3.8 or higher of |
| 40 | LLVM. Previous versions refuse to assemble some instructions that ARMv8 allows, |
| 41 | but ARMv7 did not. |
| 42 | |
| 43 | For example, let's say we have the following assembler trace for CLZ |
| 44 | (the real trace is a lot bigger): |
| 45 | |
| 46 | ~~~ |
| 47 | static const byte kInstruction_Clz_eq_r0_r0[] = { |
| 48 | 0x10, 0x0f, 0x6f, 0x01 // Clz eq r0 r0 |
| 49 | }; |
| 50 | static const byte kInstruction_Clz_eq_r0_r1[] = { |
| 51 | 0x11, 0x0f, 0x6f, 0x01 // Clz eq r0 r1 |
| 52 | }; |
| 53 | static const byte kInstruction_Clz_eq_r0_r2[] = { |
| 54 | 0x12, 0x0f, 0x6f, 0x01 // Clz eq r0 r2 |
| 55 | }; |
| 56 | static const TestResult kReferenceClz[] = { |
| 57 | { |
| 58 | ARRAY_SIZE(kInstruction_Clz_eq_r0_r0), |
| 59 | kInstruction_Clz_eq_r0_r0, |
| 60 | }, |
| 61 | { |
| 62 | ARRAY_SIZE(kInstruction_Clz_eq_r0_r1), |
| 63 | kInstruction_Clz_eq_r0_r1, |
| 64 | }, |
| 65 | { |
| 66 | ARRAY_SIZE(kInstruction_Clz_eq_r0_r2), |
| 67 | kInstruction_Clz_eq_r0_r2, |
| 68 | }, |
| 69 | }; |
| 70 | ~~~ |
| 71 | |
| 72 | The traces contain both the list of bytes that were encoded as well as a comment |
| 73 | with a description of the instruction this is. This script searches for these |
| 74 | lines and checks them. |
| 75 | |
| 76 | With our example, the script will find the following: |
| 77 | |
| 78 | [ |
| 79 | ("Clz eq r0 r0", ["0x10", "0x0f", "0x6f", "0x01"]), |
| 80 | ("Clz eq r0 r1", ["0x11", "0x0f", "0x6f", "0x01"]), |
| 81 | ("Clz eq r0 r2", ["0x12", "0x0f", "0x6f", "0x01"]) |
| 82 | ] |
| 83 | |
| 84 | Then the tricky part is to convert the description of the instruction into the |
| 85 | following valid assembly syntax: |
| 86 | |
| 87 | clzeq r0, r0 |
| 88 | clzeq r0, r1 |
| 89 | clzeq r0, r2 |
| 90 | |
| 91 | Our example is easy, but it gets more complicated with load and store |
| 92 | instructions for example. We can feed this as input to `llvm-mc`: |
| 93 | |
| 94 | $ echo " |
| 95 | clzeq r0, r0 |
| 96 | clzeq r0, r1 |
| 97 | clzeq r0, r2 |
| 98 | " | llvm-mc -assemble -arch=arm -mattr=v8,crc -show-encoding |
| 99 | |
| 100 | And we will get the following output: |
| 101 | |
| 102 | .text |
| 103 | clzeq r0, r0 @ encoding: [0x10,0x0f,0x6f,0x01] |
| 104 | clzeq r0, r1 @ encoding: [0x11,0x0f,0x6f,0x01] |
| 105 | clzeq r0, r2 @ encoding: [0x12,0x0f,0x6f,0x01] |
| 106 | |
| 107 | The script will finally extract the encoding and compare it to what VIXL |
| 108 | generated. |
| 109 | """ |
| 110 | |
| 111 | import argparse |
| 112 | import subprocess |
| 113 | import os |
| 114 | import re |
| 115 | import itertools |
| 116 | import types |
| 117 | |
| 118 | def BuildOptions(): |
| 119 | result = argparse.ArgumentParser( |
| 120 | description = 'Use `llvm-mc` to check the assembler traces are correct.', |
| 121 | formatter_class = argparse.ArgumentDefaultsHelpFormatter) |
| 122 | result.add_argument('--llvm-mc', default='llvm-mc', help='Path to llvm-mc') |
| 123 | result.add_argument('--verbose', '-v', action='store_true') |
| 124 | return result.parse_args() |
| 125 | |
| 126 | |
| 127 | def CheckLLVMVersion(llvm_mc): |
| 128 | version = subprocess.check_output([llvm_mc, '-version']) |
| 129 | m = re.search("^ LLVM version (\d)\.(\d)\.\d$", version.decode(), re.M) |
| 130 | major, minor = m.groups() |
| 131 | if int(major) < 3 or (int(major) == 3 and int(minor) < 8): |
| 132 | raise Exception("This script requires LLVM version 3.8 or higher.") |
| 133 | |
| 134 | |
| 135 | def ConvertToLLVMFormat(vixl_instruction, triple): |
| 136 | """ |
| 137 | Take an string representing an instruction and convert it to assembly syntax |
| 138 | for LLVM. VIXL's test generation framework will print instruction |
Josh Soref | b43d6ef | 2022-08-03 12:47:14 -0400 | [diff] [blame] | 139 | representations as a space separated list. The first element is the mnemonic |
Pierre Langlois | 88c46b8 | 2016-06-02 18:15:32 +0100 | [diff] [blame] | 140 | and the following elements are operands. |
| 141 | """ |
| 142 | |
Pierre Langlois | 4d912ac | 2016-11-08 11:13:31 +0000 | [diff] [blame] | 143 | def DtUntypedToLLVM(matches): |
| 144 | dt = "" |
| 145 | if matches[1] == "untyped8": |
| 146 | dt = "8" |
| 147 | elif matches[1] == "untyped16": |
| 148 | dt = "16" |
| 149 | elif matches[1] == "untyped32": |
| 150 | dt = "32" |
| 151 | else: |
| 152 | raise Exception() |
| 153 | |
| 154 | return "{}.{} {}, {}, {}".format(matches[0], dt, matches[2], matches[3], matches[4]) |
| 155 | |
Pierre Langlois | 88c46b8 | 2016-06-02 18:15:32 +0100 | [diff] [blame] | 156 | # Dictionnary of patterns. The key is an identifier used in |
| 157 | # `llvm_mc_instruction_converters` below. The value needs to be a capturing |
| 158 | # regular expression. |
| 159 | pattern_matchers = { |
Pierre Langlois | 5b0cbc8 | 2016-09-26 14:00:30 +0100 | [diff] [blame] | 160 | # Allow an optional underscore in case this an "and" instruction. |
| 161 | "mnemonic": "(\w+?)_?", |
Pierre Langlois | 88c46b8 | 2016-06-02 18:15:32 +0100 | [diff] [blame] | 162 | "condition": |
| 163 | "(al|eq|ne|cs|cc|mi|pl|vs|vc|hi|ls|ge|lt|gt|le)", |
| 164 | "register": |
| 165 | "(r0|r1|r2|r3|r4|r5|r6|r7|r8|r9|r10|r11|r12|r13|r14|r15|pc|sp|lr)", |
| 166 | "immediate": "(0x[0-9a-f]+|[0-9]+)", |
| 167 | "shift": "(lsl|lsr|asr|ror)", |
Pierre Langlois | 4d912ac | 2016-11-08 11:13:31 +0000 | [diff] [blame] | 168 | "dregister": "(d[0-9]|d[12][0-9]|d3[01])", |
| 169 | "dt": "(s8|s16|s32|s64|u8|u16|u32|u64|f16|f32|f64|i8|i16|i32|i64|p8|p64)", |
| 170 | "dt_untyped": "(untyped8|untyped16|untyped32)" |
Pierre Langlois | 88c46b8 | 2016-06-02 18:15:32 +0100 | [diff] [blame] | 171 | } |
| 172 | |
| 173 | # List of converters. Each of them represents an instruction form and what to |
| 174 | # convert it to. This list needs to be complete; an exception is raised if we |
| 175 | # couldn't find a converter for the instruction. |
| 176 | # |
| 177 | # The first part of each tuple is a pattern to match. It's simply a regular |
| 178 | # expression. Additionally, each identifier in curly braces is replaced by the |
| 179 | # corresponding pattern from `pattern_matchers`. |
| 180 | # |
| 181 | # The second part of the tuple is a string that describes what the result will |
| 182 | # look like. Empty curly braces are replaced by matches, in order. |
| 183 | llvm_mc_instruction_converters = [ |
| 184 | ("it {condition}", "it {}"), |
| 185 | ("{mnemonic} {condition} {register} {immediate}", |
| 186 | "{}{} {}, #{}"), |
| 187 | ("{mnemonic} {condition} {register} {register} {immediate}", |
| 188 | "{}{} {}, {}, #{}"), |
| 189 | ("{mnemonic} {condition} {register} {register}", |
| 190 | "{}{} {}, {}"), |
| 191 | ("{mnemonic} {condition} {register} {register} {register}", |
| 192 | "{}{} {}, {}, {}"), |
| 193 | ("{mnemonic} {register} {register} {register}", |
| 194 | "{} {}, {}, {}"), |
| 195 | ("{mnemonic} {condition} {register} {register} {immediate}", |
| 196 | "{}{} {}, {}, #{}"), |
| 197 | ("{mnemonic} {condition} {register} {register} {register} {shift} " |
| 198 | "{immediate}", |
| 199 | "{}{} {}, {}, {}, {} #{}"), |
| 200 | ("{mnemonic} {condition} {register} {register} {register} {shift} " |
| 201 | "{register}", |
| 202 | "{}{} {}, {}, {}, {} {}"), |
| 203 | ("{mnemonic} {condition} {register} {register} {shift} {immediate}", |
| 204 | "{}{} {}, {}, {} #{}"), |
| 205 | ("{mnemonic} {condition} {register} {register} {shift} {register}", |
| 206 | "{}{} {}, {}, {} {}"), |
| 207 | ("{mnemonic} {condition} {register} {register} plus {immediate} offset", |
| 208 | "{}{} {}, [{}, #{}]"), |
| 209 | ("{mnemonic} {condition} {register} {register} minus {immediate} offset", |
| 210 | "{}{} {}, [{}, #-{}]"), |
| 211 | ("{mnemonic} {condition} {register} {register} plus {immediate} postindex", |
| 212 | "{}{} {}, [{}], #{}"), |
| 213 | ("{mnemonic} {condition} {register} {register} minus {immediate} " |
| 214 | "postindex", |
| 215 | "{}{} {}, [{}], #-{}"), |
| 216 | ("{mnemonic} {condition} {register} {register} plus {immediate} preindex", |
| 217 | "{}{} {}, [{}, #{}]!"), |
| 218 | ("{mnemonic} {condition} {register} {register} minus {immediate} " |
| 219 | "preindex", |
| 220 | "{}{} {}, [{}, #-{}]!"), |
| 221 | ("{mnemonic} {condition} {register} {register} plus {register} offset", |
| 222 | "{}{} {}, [{}, {}]"), |
| 223 | ("{mnemonic} {condition} {register} {register} minus {register} offset", |
| 224 | "{}{} {}, [{}, -{}]"), |
| 225 | ("{mnemonic} {condition} {register} {register} plus {register} postindex", |
| 226 | "{}{} {}, [{}], {}"), |
| 227 | ("{mnemonic} {condition} {register} {register} minus {register} " |
| 228 | "postindex", |
| 229 | "{}{} {}, [{}], -{}"), |
| 230 | ("{mnemonic} {condition} {register} {register} plus {register} preindex", |
| 231 | "{}{} {}, [{}, {}]!"), |
| 232 | ("{mnemonic} {condition} {register} {register} minus {register} preindex", |
| 233 | "{}{} {}, [{}, -{}]!"), |
| 234 | ("{mnemonic} {condition} {register} {register} plus {register} {shift} " |
| 235 | "{immediate} offset", |
| 236 | "{}{} {}, [{}, {}, {} #{}]"), |
| 237 | ("{mnemonic} {condition} {register} {register} minus {register} {shift} " |
| 238 | "{immediate} offset", |
| 239 | "{}{} {}, [{}, -{}, {} #{}]"), |
| 240 | ("{mnemonic} {condition} {register} {register} plus {register} {shift} " |
| 241 | "{immediate} postindex", |
| 242 | "{}{} {}, [{}], {}, {} #{}"), |
| 243 | ("{mnemonic} {condition} {register} {register} minus {register} {shift} " |
| 244 | "{immediate} postindex", |
| 245 | "{}{} {}, [{}], -{}, {} #{}"), |
| 246 | ("{mnemonic} {condition} {register} {register} plus {register} {shift} " |
| 247 | "{immediate} preindex", |
| 248 | "{}{} {}, [{}, {}, {} #{}]!"), |
| 249 | ("{mnemonic} {condition} {register} {register} minus {register} {shift} " |
| 250 | "{immediate} preindex", |
| 251 | "{}{} {}, [{}, -{}, {} #{}]!"), |
Pierre Langlois | 4d912ac | 2016-11-08 11:13:31 +0000 | [diff] [blame] | 252 | ("{mnemonic} {dt} {dregister} {dregister} {dregister}", |
| 253 | "{}.{} {}, {}, {}"), |
| 254 | ("{mnemonic} {dt_untyped} {dregister} {dregister} {dregister}", DtUntypedToLLVM) |
Pierre Langlois | 88c46b8 | 2016-06-02 18:15:32 +0100 | [diff] [blame] | 255 | ] |
| 256 | |
| 257 | # Work around issues in LLVM 3.8. |
| 258 | if triple == "thumbv8": |
| 259 | def ConvertMovRdImm(matches): |
| 260 | """ |
| 261 | LLVM chooses the T3 encoding for `mov <rd>, #<immediate>` when the |
| 262 | immediate fits both into a modified immediate (T2 encoding) and 16 |
| 263 | bits (T3 encoding). Adding the `.W` modifier forces the T2 encoding to |
| 264 | be used. |
| 265 | """ |
| 266 | # The immediate is the second capture in "mov al {register} {immediate}". |
| 267 | imm = int(matches[1], 16) |
| 268 | if imm <= 0xffff: |
| 269 | lsb = imm & -imm |
| 270 | if (imm >> 8) < lsb: |
| 271 | return "mov.w {}, #{}".format(*matches) |
| 272 | # Fall back to a LLVM making the right decision. |
| 273 | return "mov {}, #{}".format(*matches) |
| 274 | llvm_mc_instruction_converters[:0] = [ |
| 275 | # The ARM ARM specifies that if <Rn> is PC in either an ADD or SUB |
| 276 | # instruction with an immediate, the assembler should use the ADR |
| 277 | # encoding. LLVM does not know about this subtlety. We get around this |
| 278 | # by manually translating the instruction to their ADR form. |
| 279 | ("add al {register} pc {immediate}", "adr {}, #{}"), |
| 280 | ("sub al {register} pc {immediate}", "adr {}, #-{}"), |
| 281 | |
| 282 | # LLVM is (rightfully) being helpful by swapping register operands so |
| 283 | # that the 16 bit encoding of the following instructions is used. |
| 284 | # However, VIXL does not do this. These rules specifically add the `.w` |
| 285 | # modifier to force LLVM to use the 32 bit encoding if the last register |
| 286 | # is identical to first one. But at the same time, we should still use |
| 287 | # the narrow encoding if all registers are the same. |
| 288 | ("adcs al {register} (\\1) (\\1)", "adcs.n {}, {}, {}"), |
| 289 | ("adcs al {register} {register} (\\1)", "adcs.w {}, {}, {}"), |
| 290 | ("orrs al {register} (\\1) (\\1)", "orrs.n {}, {}, {}"), |
| 291 | ("orrs al {register} {register} (\\1)", "orrs.w {}, {}, {}"), |
| 292 | ("eors al {register} (\\1) (\\1)", "eors.n {}, {}, {}"), |
| 293 | ("eors al {register} {register} (\\1)", "eors.w {}, {}, {}"), |
| 294 | ("ands al {register} (\\1) (\\1)", "ands.n {}, {}, {}"), |
| 295 | ("ands al {register} {register} (\\1)", "ands.w {}, {}, {}"), |
| 296 | # Solve the same issue as for the previous rules, however, we need to |
| 297 | # take into account that ADD instructions with the stack pointer have |
| 298 | # additional 16 bit forms. |
| 299 | ("add al {register} (\\1) (\\1)", "add.n {}, {}, {}"), |
| 300 | ("add al {register} (\\1) r13", "add.w {}, {}, sp"), |
| 301 | ("add al {register} r13 (\\1)", "add.n {}, sp, {}"), |
| 302 | ("add al {register} {register} (\\1)", "add.w {}, {}, {}"), |
| 303 | ("mov al {register} {immediate}", ConvertMovRdImm) |
| 304 | ] |
| 305 | |
| 306 | # Our test generator framework uses mnemonics starting with a capital letters. |
Josh Soref | b43d6ef | 2022-08-03 12:47:14 -0400 | [diff] [blame] | 307 | # We need everything to be lower case for LLVM. |
Pierre Langlois | 88c46b8 | 2016-06-02 18:15:32 +0100 | [diff] [blame] | 308 | vixl_instruction = vixl_instruction.lower() |
| 309 | |
| 310 | llvm_instruction = [] |
| 311 | |
Josh Soref | b43d6ef | 2022-08-03 12:47:14 -0400 | [diff] [blame] | 312 | # VIXL may have generated more than one instruction separated by ';' |
Pierre Langlois | 88c46b8 | 2016-06-02 18:15:32 +0100 | [diff] [blame] | 313 | # (an IT instruction for example). |
| 314 | for instruction in vixl_instruction.split(';'): |
| 315 | # Strip out extra white spaces. |
| 316 | instruction = instruction.strip() |
| 317 | # Try all converters in the list. |
| 318 | for pattern, result in llvm_mc_instruction_converters: |
| 319 | # Build the regular expression for this converter. |
| 320 | instruction_matcher = "^" + pattern.format(**pattern_matchers) + "$" |
| 321 | match = re.match(instruction_matcher, instruction) |
| 322 | if match: |
| 323 | # If we have a match, the object will contain a tuple of substrings. |
| 324 | if isinstance(result, types.FunctionType): |
| 325 | # `result` is a function, call it produce the instruction. |
| 326 | llvm_instruction.append(result(match.groups())) |
| 327 | else: |
| 328 | # `result` is a string, use it as the format string. |
| 329 | assert(isinstance(result, str)) |
| 330 | llvm_instruction.append(result.format(*match.groups())) |
| 331 | break |
| 332 | |
| 333 | if llvm_instruction: |
| 334 | return "\n".join(llvm_instruction) |
| 335 | |
| 336 | # No converters worked so raise an exception. |
| 337 | raise Exception("Unsupported instruction {}.".format(instruction)) |
| 338 | |
| 339 | |
| 340 | def ReadTrace(trace): |
| 341 | """ |
| 342 | Receive the content of an assembler trace, extract the relevant information |
| 343 | and return it as a list of tuples. The first part of each typle is a string |
| 344 | representing the instruction. The second part is a list of bytes representing |
| 345 | the encoding. |
| 346 | |
| 347 | For example: |
| 348 | |
| 349 | [ |
| 350 | ("Clz eq r0 r0", ["0x10", "0x0f", "0x6f", "0x01"]), |
| 351 | ("Clz eq r0 r1", ["0x11", "0x0f", "0x6f", "0x01"]), |
| 352 | ("Clz eq r0 r2", ["0x12", "0x0f", "0x6f", "0x01"]) |
| 353 | ] |
| 354 | """ |
| 355 | |
| 356 | pattern = re.compile( |
| 357 | "^ (?P<encoding>(:?0x[0-9a-f]{2}, )+0x[0-9a-f]{2}) // (?P<instruction>.*)$", |
| 358 | re.M) |
| 359 | return [ |
| 360 | (m.group('instruction'), m.group('encoding').replace(" ", "").split(",")) |
| 361 | for m in re.finditer(pattern, trace) |
| 362 | ] |
| 363 | |
| 364 | |
| 365 | def VerifyInstructionsWithLLVMMC(llvm_mc, f, triple): |
| 366 | """ |
| 367 | Extract all instructions from `f`, feed them to `llvm-mc` and make sure it's |
| 368 | encoded them the same way as VIXL. `triple` allows us to specify either |
| 369 | "thumbv8" or "armv8". |
| 370 | """ |
| 371 | |
| 372 | vixl_reference = ReadTrace(f.read()) |
| 373 | vixl_instructions, vixl_encodings = zip(*vixl_reference) |
| 374 | instructions = [ |
| 375 | ConvertToLLVMFormat(instruction, triple) |
| 376 | for instruction in vixl_instructions |
| 377 | ] |
| 378 | llvm_mc_proc = subprocess.Popen( |
| 379 | [llvm_mc, '-assemble', '-triple={}'.format(triple), '-mattr=v8,crc', |
| 380 | # LLVM fails to recognize some instructions as valid T32 when we do not |
| 381 | # set `-mcpu`. |
| 382 | '-mcpu=cortex-a53', '-show-encoding'], |
| 383 | stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) |
| 384 | out, err = llvm_mc_proc.communicate("\n".join(instructions).encode()) |
| 385 | # If `llvm-mc` printed something to stderr then stop. |
| 386 | if err: |
| 387 | print(err.decode()) |
| 388 | return |
| 389 | |
| 390 | # Extract list of bytes from `llvm-mc` output. It's in the following form: |
| 391 | # |
| 392 | # clzeq r0, r0 @ encoding: [0x10,0x0f,0x6f,0x01] |
| 393 | # ^^^^ ^^^^ ^^^^ ^^^^ |
| 394 | llvm_encodings = [ |
| 395 | match_object.group('encoding').replace(" ", "").split(",") |
| 396 | for match_object in re.finditer(".*@ encoding: \[(?P<encoding>.*)\]", |
| 397 | out.decode()) |
| 398 | ] |
| 399 | |
| 400 | # If LLVM has generated exactly twice as much instructions, we assume this is |
| 401 | # due to IT instructions preceding every instruction under test. VIXL's |
| 402 | # assembly reference files will contain a single array of 4 bytes encoding |
| 403 | # both the IT and the following instruction. While LLVM will have decoded them |
Josh Soref | b43d6ef | 2022-08-03 12:47:14 -0400 | [diff] [blame] | 404 | # into two separate 2 bytes arrays. |
Pierre Langlois | 88c46b8 | 2016-06-02 18:15:32 +0100 | [diff] [blame] | 405 | if len(llvm_encodings) == 2 * len(vixl_encodings): |
| 406 | llvm_encodings = [ |
| 407 | llvm_encodings[i * 2] + llvm_encodings[(i * 2) + 1] |
| 408 | for i in range(0, len(vixl_encodings)) |
| 409 | ] |
| 410 | |
| 411 | # Check the encodings from LLVM are identical to VIXL's. |
| 412 | if len(llvm_encodings) != len(vixl_encodings): |
| 413 | print("""Error: llvm-mc generated {} instructions than there are in the |
| 414 | generated trace. |
| 415 | """.format("fewer" if len(llvm_encodings) < len(vixl_encodings) else "more")) |
| 416 | else: |
| 417 | for i in range(0, len(vixl_encodings)): |
| 418 | if llvm_encodings[i] != vixl_encodings[i]: |
| 419 | print("""Error: llvm-mc disagrees on the encoding of \"{instruction}\": |
| 420 | LLVM-MC: {llvm} |
| 421 | VIXL: {vixl} |
| 422 | """.format(instruction=vixl_instructions[i].replace("\n", "; "), |
| 423 | llvm=llvm_encodings[i], |
| 424 | vixl=vixl_encodings[i])) |
| 425 | |
| 426 | |
| 427 | if __name__ == "__main__": |
| 428 | args = BuildOptions() |
| 429 | |
| 430 | CheckLLVMVersion(args.llvm_mc) |
| 431 | |
Alexandre Rames | d383296 | 2016-07-04 15:03:43 +0100 | [diff] [blame] | 432 | trace_dir = 'test/aarch32/traces/' |
Pierre Langlois | 88c46b8 | 2016-06-02 18:15:32 +0100 | [diff] [blame] | 433 | trace_files = [ |
| 434 | trace_file |
| 435 | for trace_file in os.listdir(trace_dir) |
| 436 | if trace_file.startswith("assembler-") |
| 437 | ] |
| 438 | trace_files.sort() |
| 439 | for trace_file in trace_files: |
| 440 | if args.verbose: |
| 441 | print("Verifying \"" + trace_file + "\".") |
| 442 | with open(os.path.join(trace_dir, trace_file), "r") as f: |
| 443 | if "t32" in trace_file: |
| 444 | VerifyInstructionsWithLLVMMC(args.llvm_mc, f, "thumbv8") |
| 445 | elif "a32" in trace_file: |
| 446 | VerifyInstructionsWithLLVMMC(args.llvm_mc, f, "armv8") |
| 447 | else: |
| 448 | raise Exception("Failed to recognize the ISA in \"" + trace_file + "\".") |