aboutsummaryrefslogtreecommitdiff
path: root/target-ppc/int_helper.c
diff options
context:
space:
mode:
authorTom Musta <tommusta@gmail.com>2014-02-12 15:23:17 -0600
committerAlexander Graf <agraf@suse.de>2014-03-05 03:06:59 +0100
commit557d52fa697c938aeff2784b79df55952c3bfcc1 (patch)
treecadb732d788c6cae4a14700ef5909da8f47724d8 /target-ppc/int_helper.c
parente8f7b27b9942d02ece7df34ae2b2a09cb9da7196 (diff)
target-ppc: Altivec 2.07: AES Instructions
This patch adds the Vector AES instructions introduced in Power ISA Version 2.07: - Vector AES Cipher (vcipher) - Vector AES Cipher Last (vcipherlast) - Vector AES Inverse Cipher (vncipher) - Vector AES Inverse Cipher Last (vncipherlast) - Vector AES SubBytes (vsbox) Note that the implementation of vncipher deviates from the RTL in ISA V2.07. However it does match the verbal description in the third paragraph. The RTL will be fixed in ISA V2.07B. The implementation here has been tested against actual P8 hardware. Signed-off-by: Tom Musta <tommusta@gmail.com> Signed-off-by: Alexander Graf <agraf@suse.de>
Diffstat (limited to 'target-ppc/int_helper.c')
-rw-r--r--target-ppc/int_helper.c280
1 files changed, 280 insertions, 0 deletions
diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
index ce7c6a0ea0..cd04e8ab71 100644
--- a/target-ppc/int_helper.c
+++ b/target-ppc/int_helper.c
@@ -2338,6 +2338,286 @@ uint32_t helper_bcdsub(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b, uint32_t ps)
return helper_bcdadd(r, a, &bcopy, ps);
}
+static uint8_t SBOX[256] = {
+0x63, 0x7C, 0x77, 0x7B, 0xF2, 0x6B, 0x6F, 0xC5,
+0x30, 0x01, 0x67, 0x2B, 0xFE, 0xD7, 0xAB, 0x76,
+0xCA, 0x82, 0xC9, 0x7D, 0xFA, 0x59, 0x47, 0xF0,
+0xAD, 0xD4, 0xA2, 0xAF, 0x9C, 0xA4, 0x72, 0xC0,
+0xB7, 0xFD, 0x93, 0x26, 0x36, 0x3F, 0xF7, 0xCC,
+0x34, 0xA5, 0xE5, 0xF1, 0x71, 0xD8, 0x31, 0x15,
+0x04, 0xC7, 0x23, 0xC3, 0x18, 0x96, 0x05, 0x9A,
+0x07, 0x12, 0x80, 0xE2, 0xEB, 0x27, 0xB2, 0x75,
+0x09, 0x83, 0x2C, 0x1A, 0x1B, 0x6E, 0x5A, 0xA0,
+0x52, 0x3B, 0xD6, 0xB3, 0x29, 0xE3, 0x2F, 0x84,
+0x53, 0xD1, 0x00, 0xED, 0x20, 0xFC, 0xB1, 0x5B,
+0x6A, 0xCB, 0xBE, 0x39, 0x4A, 0x4C, 0x58, 0xCF,
+0xD0, 0xEF, 0xAA, 0xFB, 0x43, 0x4D, 0x33, 0x85,
+0x45, 0xF9, 0x02, 0x7F, 0x50, 0x3C, 0x9F, 0xA8,
+0x51, 0xA3, 0x40, 0x8F, 0x92, 0x9D, 0x38, 0xF5,
+0xBC, 0xB6, 0xDA, 0x21, 0x10, 0xFF, 0xF3, 0xD2,
+0xCD, 0x0C, 0x13, 0xEC, 0x5F, 0x97, 0x44, 0x17,
+0xC4, 0xA7, 0x7E, 0x3D, 0x64, 0x5D, 0x19, 0x73,
+0x60, 0x81, 0x4F, 0xDC, 0x22, 0x2A, 0x90, 0x88,
+0x46, 0xEE, 0xB8, 0x14, 0xDE, 0x5E, 0x0B, 0xDB,
+0xE0, 0x32, 0x3A, 0x0A, 0x49, 0x06, 0x24, 0x5C,
+0xC2, 0xD3, 0xAC, 0x62, 0x91, 0x95, 0xE4, 0x79,
+0xE7, 0xC8, 0x37, 0x6D, 0x8D, 0xD5, 0x4E, 0xA9,
+0x6C, 0x56, 0xF4, 0xEA, 0x65, 0x7A, 0xAE, 0x08,
+0xBA, 0x78, 0x25, 0x2E, 0x1C, 0xA6, 0xB4, 0xC6,
+0xE8, 0xDD, 0x74, 0x1F, 0x4B, 0xBD, 0x8B, 0x8A,
+0x70, 0x3E, 0xB5, 0x66, 0x48, 0x03, 0xF6, 0x0E,
+0x61, 0x35, 0x57, 0xB9, 0x86, 0xC1, 0x1D, 0x9E,
+0xE1, 0xF8, 0x98, 0x11, 0x69, 0xD9, 0x8E, 0x94,
+0x9B, 0x1E, 0x87, 0xE9, 0xCE, 0x55, 0x28, 0xDF,
+0x8C, 0xA1, 0x89, 0x0D, 0xBF, 0xE6, 0x42, 0x68,
+0x41, 0x99, 0x2D, 0x0F, 0xB0, 0x54, 0xBB, 0x16,
+};
+
+static void SubBytes(ppc_avr_t *r, ppc_avr_t *a)
+{
+ int i;
+ VECTOR_FOR_INORDER_I(i, u8) {
+ r->u8[i] = SBOX[a->u8[i]];
+ }
+}
+
+static uint8_t InvSBOX[256] = {
+0x52, 0x09, 0x6A, 0xD5, 0x30, 0x36, 0xA5, 0x38,
+0xBF, 0x40, 0xA3, 0x9E, 0x81, 0xF3, 0xD7, 0xFB,
+0x7C, 0xE3, 0x39, 0x82, 0x9B, 0x2F, 0xFF, 0x87,
+0x34, 0x8E, 0x43, 0x44, 0xC4, 0xDE, 0xE9, 0xCB,
+0x54, 0x7B, 0x94, 0x32, 0xA6, 0xC2, 0x23, 0x3D,
+0xEE, 0x4C, 0x95, 0x0B, 0x42, 0xFA, 0xC3, 0x4E,
+0x08, 0x2E, 0xA1, 0x66, 0x28, 0xD9, 0x24, 0xB2,
+0x76, 0x5B, 0xA2, 0x49, 0x6D, 0x8B, 0xD1, 0x25,
+0x72, 0xF8, 0xF6, 0x64, 0x86, 0x68, 0x98, 0x16,
+0xD4, 0xA4, 0x5C, 0xCC, 0x5D, 0x65, 0xB6, 0x92,
+0x6C, 0x70, 0x48, 0x50, 0xFD, 0xED, 0xB9, 0xDA,
+0x5E, 0x15, 0x46, 0x57, 0xA7, 0x8D, 0x9D, 0x84,
+0x90, 0xD8, 0xAB, 0x00, 0x8C, 0xBC, 0xD3, 0x0A,
+0xF7, 0xE4, 0x58, 0x05, 0xB8, 0xB3, 0x45, 0x06,
+0xD0, 0x2C, 0x1E, 0x8F, 0xCA, 0x3F, 0x0F, 0x02,
+0xC1, 0xAF, 0xBD, 0x03, 0x01, 0x13, 0x8A, 0x6B,
+0x3A, 0x91, 0x11, 0x41, 0x4F, 0x67, 0xDC, 0xEA,
+0x97, 0xF2, 0xCF, 0xCE, 0xF0, 0xB4, 0xE6, 0x73,
+0x96, 0xAC, 0x74, 0x22, 0xE7, 0xAD, 0x35, 0x85,
+0xE2, 0xF9, 0x37, 0xE8, 0x1C, 0x75, 0xDF, 0x6E,
+0x47, 0xF1, 0x1A, 0x71, 0x1D, 0x29, 0xC5, 0x89,
+0x6F, 0xB7, 0x62, 0x0E, 0xAA, 0x18, 0xBE, 0x1B,
+0xFC, 0x56, 0x3E, 0x4B, 0xC6, 0xD2, 0x79, 0x20,
+0x9A, 0xDB, 0xC0, 0xFE, 0x78, 0xCD, 0x5A, 0xF4,
+0x1F, 0xDD, 0xA8, 0x33, 0x88, 0x07, 0xC7, 0x31,
+0xB1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xEC, 0x5F,
+0x60, 0x51, 0x7F, 0xA9, 0x19, 0xB5, 0x4A, 0x0D,
+0x2D, 0xE5, 0x7A, 0x9F, 0x93, 0xC9, 0x9C, 0xEF,
+0xA0, 0xE0, 0x3B, 0x4D, 0xAE, 0x2A, 0xF5, 0xB0,
+0xC8, 0xEB, 0xBB, 0x3C, 0x83, 0x53, 0x99, 0x61,
+0x17, 0x2B, 0x04, 0x7E, 0xBA, 0x77, 0xD6, 0x26,
+0xE1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0C, 0x7D,
+};
+
+static void InvSubBytes(ppc_avr_t *r, ppc_avr_t *a)
+{
+ int i;
+ VECTOR_FOR_INORDER_I(i, u8) {
+ r->u8[i] = InvSBOX[a->u8[i]];
+ }
+}
+
+static uint8_t ROTL8(uint8_t x, int n)
+{
+ return (x << n) | (x >> (8-n));
+}
+
+static inline int BIT8(uint8_t x, int n)
+{
+ return (x & (0x80 >> n)) != 0;
+}
+
+static uint8_t GFx02(uint8_t x)
+{
+ return ROTL8(x, 1) ^ (BIT8(x, 0) ? 0x1A : 0);
+}
+
+static uint8_t GFx03(uint8_t x)
+{
+ return x ^ ROTL8(x, 1) ^ (BIT8(x, 0) ? 0x1A : 0);
+}
+
+static uint8_t GFx09(uint8_t x)
+{
+ uint8_t term2 = ROTL8(x, 3);
+ uint8_t term3 = (BIT8(x, 0) ? 0x68 : 0) | (BIT8(x, 1) ? 0x14 : 0) |
+ (BIT8(x, 2) ? 0x02 : 0);
+ uint8_t term4 = (BIT8(x, 1) ? 0x20 : 0) | (BIT8(x, 2) ? 0x18 : 0);
+ return x ^ term2 ^ term3 ^ term4;
+}
+
+static uint8_t GFx0B(uint8_t x)
+{
+ uint8_t term2 = ROTL8(x, 1);
+ uint8_t term3 = (x << 3) | (BIT8(x, 0) ? 0x06 : 0) |
+ (BIT8(x, 2) ? 0x01 : 0);
+ uint8_t term4 = (BIT8(x, 0) ? 0x70 : 0) | (BIT8(x, 1) ? 0x06 : 0) |
+ (BIT8(x, 2) ? 0x08 : 0);
+ uint8_t term5 = (BIT8(x, 1) ? 0x30 : 0) | (BIT8(x, 2) ? 0x02 : 0);
+ uint8_t term6 = BIT8(x, 2) ? 0x10 : 0;
+ return x ^ term2 ^ term3 ^ term4 ^ term5 ^ term6;
+}
+
+static uint8_t GFx0D(uint8_t x)
+{
+ uint8_t term2 = ROTL8(x, 2);
+ uint8_t term3 = (x << 3) | (BIT8(x, 1) ? 0x04 : 0) |
+ (BIT8(x, 2) ? 0x03 : 0);
+ uint8_t term4 = (BIT8(x, 0) ? 0x58 : 0) | (BIT8(x, 1) ? 0x20 : 0);
+ uint8_t term5 = (BIT8(x, 1) ? 0x08 : 0) | (BIT8(x, 2) ? 0x10 : 0);
+ uint8_t term6 = BIT8(x, 2) ? 0x08 : 0;
+ return x ^ term2 ^ term3 ^ term4 ^ term5 ^ term6;
+}
+
+static uint8_t GFx0E(uint8_t x)
+{
+ uint8_t term1 = ROTL8(x, 1);
+ uint8_t term2 = (x << 2) | (BIT8(x, 2) ? 0x02 : 0) |
+ (BIT8(x, 1) ? 0x01 : 0);
+ uint8_t term3 = (x << 3) | (BIT8(x, 1) ? 0x04 : 0) |
+ (BIT8(x, 2) ? 0x01 : 0);
+ uint8_t term4 = (BIT8(x, 0) ? 0x40 : 0) | (BIT8(x, 1) ? 0x28 : 0) |
+ (BIT8(x, 2) ? 0x10 : 0);
+ uint8_t term5 = (BIT8(x, 2) ? 0x08 : 0);
+ return term1 ^ term2 ^ term3 ^ term4 ^ term5;
+}
+
+#if defined(HOST_WORDS_BIGENDIAN)
+#define MCB(x, i, b) ((x)->u8[(i)*4 + (b)])
+#else
+#define MCB(x, i, b) ((x)->u8[15 - ((i)*4 + (b))])
+#endif
+
+static void MixColumns(ppc_avr_t *r, ppc_avr_t *x)
+{
+ int i;
+ for (i = 0; i < 4; i++) {
+ MCB(r, i, 0) = GFx02(MCB(x, i, 0)) ^ GFx03(MCB(x, i, 1)) ^
+ MCB(x, i, 2) ^ MCB(x, i, 3);
+ MCB(r, i, 1) = MCB(x, i, 0) ^ GFx02(MCB(x, i, 1)) ^
+ GFx03(MCB(x, i, 2)) ^ MCB(x, i, 3);
+ MCB(r, i, 2) = MCB(x, i, 0) ^ MCB(x, i, 1) ^
+ GFx02(MCB(x, i, 2)) ^ GFx03(MCB(x, i, 3));
+ MCB(r, i, 3) = GFx03(MCB(x, i, 0)) ^ MCB(x, i, 1) ^
+ MCB(x, i, 2) ^ GFx02(MCB(x, i, 3));
+ }
+}
+
+static void InvMixColumns(ppc_avr_t *r, ppc_avr_t *x)
+{
+ int i;
+ for (i = 0; i < 4; i++) {
+ MCB(r, i, 0) = GFx0E(MCB(x, i, 0)) ^ GFx0B(MCB(x, i, 1)) ^
+ GFx0D(MCB(x, i, 2)) ^ GFx09(MCB(x, i, 3));
+ MCB(r, i, 1) = GFx09(MCB(x, i, 0)) ^ GFx0E(MCB(x, i, 1)) ^
+ GFx0B(MCB(x, i, 2)) ^ GFx0D(MCB(x, i, 3));
+ MCB(r, i, 2) = GFx0D(MCB(x, i, 0)) ^ GFx09(MCB(x, i, 1)) ^
+ GFx0E(MCB(x, i, 2)) ^ GFx0B(MCB(x, i, 3));
+ MCB(r, i, 3) = GFx0B(MCB(x, i, 0)) ^ GFx0D(MCB(x, i, 1)) ^
+ GFx09(MCB(x, i, 2)) ^ GFx0E(MCB(x, i, 3));
+ }
+}
+
+static void ShiftRows(ppc_avr_t *r, ppc_avr_t *x)
+{
+ MCB(r, 0, 0) = MCB(x, 0, 0);
+ MCB(r, 1, 0) = MCB(x, 1, 0);
+ MCB(r, 2, 0) = MCB(x, 2, 0);
+ MCB(r, 3, 0) = MCB(x, 3, 0);
+
+ MCB(r, 0, 1) = MCB(x, 1, 1);
+ MCB(r, 1, 1) = MCB(x, 2, 1);
+ MCB(r, 2, 1) = MCB(x, 3, 1);
+ MCB(r, 3, 1) = MCB(x, 0, 1);
+
+ MCB(r, 0, 2) = MCB(x, 2, 2);
+ MCB(r, 1, 2) = MCB(x, 3, 2);
+ MCB(r, 2, 2) = MCB(x, 0, 2);
+ MCB(r, 3, 2) = MCB(x, 1, 2);
+
+ MCB(r, 0, 3) = MCB(x, 3, 3);
+ MCB(r, 1, 3) = MCB(x, 0, 3);
+ MCB(r, 2, 3) = MCB(x, 1, 3);
+ MCB(r, 3, 3) = MCB(x, 2, 3);
+}
+
+static void InvShiftRows(ppc_avr_t *r, ppc_avr_t *x)
+{
+ MCB(r, 0, 0) = MCB(x, 0, 0);
+ MCB(r, 1, 0) = MCB(x, 1, 0);
+ MCB(r, 2, 0) = MCB(x, 2, 0);
+ MCB(r, 3, 0) = MCB(x, 3, 0);
+
+ MCB(r, 0, 1) = MCB(x, 3, 1);
+ MCB(r, 1, 1) = MCB(x, 0, 1);
+ MCB(r, 2, 1) = MCB(x, 1, 1);
+ MCB(r, 3, 1) = MCB(x, 2, 1);
+
+ MCB(r, 0, 2) = MCB(x, 2, 2);
+ MCB(r, 1, 2) = MCB(x, 3, 2);
+ MCB(r, 2, 2) = MCB(x, 0, 2);
+ MCB(r, 3, 2) = MCB(x, 1, 2);
+
+ MCB(r, 0, 3) = MCB(x, 1, 3);
+ MCB(r, 1, 3) = MCB(x, 2, 3);
+ MCB(r, 2, 3) = MCB(x, 3, 3);
+ MCB(r, 3, 3) = MCB(x, 0, 3);
+}
+
+#undef MCB
+
+void helper_vsbox(ppc_avr_t *r, ppc_avr_t *a)
+{
+ SubBytes(r, a);
+}
+
+void helper_vcipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
+{
+ ppc_avr_t vtemp1, vtemp2, vtemp3;
+ SubBytes(&vtemp1, a);
+ ShiftRows(&vtemp2, &vtemp1);
+ MixColumns(&vtemp3, &vtemp2);
+ r->u64[0] = vtemp3.u64[0] ^ b->u64[0];
+ r->u64[1] = vtemp3.u64[1] ^ b->u64[1];
+}
+
+void helper_vcipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
+{
+ ppc_avr_t vtemp1, vtemp2;
+ SubBytes(&vtemp1, a);
+ ShiftRows(&vtemp2, &vtemp1);
+ r->u64[0] = vtemp2.u64[0] ^ b->u64[0];
+ r->u64[1] = vtemp2.u64[1] ^ b->u64[1];
+}
+
+void helper_vncipher(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
+{
+ /* This differs from what is written in ISA V2.07. The RTL is */
+ /* incorrect and will be fixed in V2.07B. */
+ ppc_avr_t vtemp1, vtemp2, vtemp3;
+ InvShiftRows(&vtemp1, a);
+ InvSubBytes(&vtemp2, &vtemp1);
+ vtemp3.u64[0] = vtemp2.u64[0] ^ b->u64[0];
+ vtemp3.u64[1] = vtemp2.u64[1] ^ b->u64[1];
+ InvMixColumns(r, &vtemp3);
+}
+
+void helper_vncipherlast(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)
+{
+ ppc_avr_t vtemp1, vtemp2;
+ InvShiftRows(&vtemp1, a);
+ InvSubBytes(&vtemp2, &vtemp1);
+ r->u64[0] = vtemp2.u64[0] ^ b->u64[0];
+ r->u64[1] = vtemp2.u64[1] ^ b->u64[1];
+}
+
#undef VECTOR_FOR_INORDER_I
#undef HI_IDX
#undef LO_IDX