#!/usr/bin/env perl # This script uses llvm-mc to disassemble byte patterns into possible # x86/x86_64/arm/aarch64 instructions, trying to find a way to represent # that in any of the instruction sets. use strict; use warnings; use Data::Dumper; #vpush-vpop.s:@ CHECK-ARM: vpop {s8, s9, s10, s11, s12} @ encoding: [0x05,0x4a,0xbd,0xec] #vpush-vpop.s:@ CHECK-THUMB: vpush {d8, d9, d10, d11, d12} @ encoding: [0x2d,0xed,0x0a,0x8b] my $syntax = "Syntax: $0 0xFFFF(FFFF)\n"; my @hexs = &fix_endian($ARGV[0]); #die Dumper \@hexs; foreach (@hexs) { my $code = $_->{'code'}; my $arch = $_->{'arch'}; print "$arch: $code: "; print `echo "$code" | llvm-mc -disassemble -triple $arch`; } sub fix_endian() { my ($hex) = @_; my ($a, $b, $c, $d) = ($hex =~ /^0?x?(\w{2})(\w{2})(\w{0,2})(\w{0,2})$/); print "$a $b $c $d\n"; die $syntax unless $a and $b; # at least two bytes die $syntax if $c and not $d; # two or four bytes # ARM/T2/x86 if ($c) { return ( { 'code' => "0x$d 0x$c 0x$b 0x$a", 'arch' => "armv7" }, { 'code' => "0x$b 0x$a 0x$d 0x$c", 'arch' => "thumbv7" }, { 'code' => "0x$d 0x$c 0x$b 0x$a", 'arch' => "i686" }, { 'code' => "0x$d 0x$c 0x$b 0x$a", 'arch' => "x86_64" } ); } else { return ( { 'code' => "0x$b 0x$a", 'arch' => "thumb" } ); } }