aboutsummaryrefslogtreecommitdiff
path: root/crypto/gf128mul.c
diff options
context:
space:
mode:
Diffstat (limited to 'crypto/gf128mul.c')
-rw-r--r--crypto/gf128mul.c171
1 files changed, 136 insertions, 35 deletions
diff --git a/crypto/gf128mul.c b/crypto/gf128mul.c
index 5276607c72d0..f3d9f6da0767 100644
--- a/crypto/gf128mul.c
+++ b/crypto/gf128mul.c
@@ -44,7 +44,7 @@
---------------------------------------------------------------------------
Issue 31/01/2006
- This file provides fast multiplication in GF(128) as required by several
+ This file provides fast multiplication in GF(2^128) as required by several
cryptographic authentication modes
*/
@@ -88,37 +88,52 @@
q(0xf8), q(0xf9), q(0xfa), q(0xfb), q(0xfc), q(0xfd), q(0xfe), q(0xff) \
}
-/* Given the value i in 0..255 as the byte overflow when a field element
- in GHASH is multiplied by x^8, this function will return the values that
- are generated in the lo 16-bit word of the field value by applying the
- modular polynomial. The values lo_byte and hi_byte are returned via the
- macro xp_fun(lo_byte, hi_byte) so that the values can be assembled into
- memory as required by a suitable definition of this macro operating on
- the table above
-*/
-
-#define xx(p, q) 0x##p##q
+/*
+ * Given a value i in 0..255 as the byte overflow when a field element
+ * in GF(2^128) is multiplied by x^8, the following macro returns the
+ * 16-bit value that must be XOR-ed into the low-degree end of the
+ * product to reduce it modulo the irreducible polynomial x^128 + x^7 +
+ * x^2 + x + 1.
+ *
+ * There are two versions of the macro, and hence two tables: one for
+ * the "be" convention where the highest-order bit is the coefficient of
+ * the highest-degree polynomial term, and one for the "le" convention
+ * where the highest-order bit is the coefficient of the lowest-degree
+ * polynomial term. In both cases the values are stored in CPU byte
+ * endianness such that the coefficients are ordered consistently across
+ * bytes, i.e. in the "be" table bits 15..0 of the stored value
+ * correspond to the coefficients of x^15..x^0, and in the "le" table
+ * bits 15..0 correspond to the coefficients of x^0..x^15.
+ *
+ * Therefore, provided that the appropriate byte endianness conversions
+ * are done by the multiplication functions (and these must be in place
+ * anyway to support both little endian and big endian CPUs), the "be"
+ * table can be used for multiplications of both "bbe" and "ble"
+ * elements, and the "le" table can be used for multiplications of both
+ * "lle" and "lbe" elements.
+ */
-#define xda_bbe(i) ( \
- (i & 0x80 ? xx(43, 80) : 0) ^ (i & 0x40 ? xx(21, c0) : 0) ^ \
- (i & 0x20 ? xx(10, e0) : 0) ^ (i & 0x10 ? xx(08, 70) : 0) ^ \
- (i & 0x08 ? xx(04, 38) : 0) ^ (i & 0x04 ? xx(02, 1c) : 0) ^ \
- (i & 0x02 ? xx(01, 0e) : 0) ^ (i & 0x01 ? xx(00, 87) : 0) \
+#define xda_be(i) ( \
+ (i & 0x80 ? 0x4380 : 0) ^ (i & 0x40 ? 0x21c0 : 0) ^ \
+ (i & 0x20 ? 0x10e0 : 0) ^ (i & 0x10 ? 0x0870 : 0) ^ \
+ (i & 0x08 ? 0x0438 : 0) ^ (i & 0x04 ? 0x021c : 0) ^ \
+ (i & 0x02 ? 0x010e : 0) ^ (i & 0x01 ? 0x0087 : 0) \
)
-#define xda_lle(i) ( \
- (i & 0x80 ? xx(e1, 00) : 0) ^ (i & 0x40 ? xx(70, 80) : 0) ^ \
- (i & 0x20 ? xx(38, 40) : 0) ^ (i & 0x10 ? xx(1c, 20) : 0) ^ \
- (i & 0x08 ? xx(0e, 10) : 0) ^ (i & 0x04 ? xx(07, 08) : 0) ^ \
- (i & 0x02 ? xx(03, 84) : 0) ^ (i & 0x01 ? xx(01, c2) : 0) \
+#define xda_le(i) ( \
+ (i & 0x80 ? 0xe100 : 0) ^ (i & 0x40 ? 0x7080 : 0) ^ \
+ (i & 0x20 ? 0x3840 : 0) ^ (i & 0x10 ? 0x1c20 : 0) ^ \
+ (i & 0x08 ? 0x0e10 : 0) ^ (i & 0x04 ? 0x0708 : 0) ^ \
+ (i & 0x02 ? 0x0384 : 0) ^ (i & 0x01 ? 0x01c2 : 0) \
)
-static const u16 gf128mul_table_lle[256] = gf128mul_dat(xda_lle);
-static const u16 gf128mul_table_bbe[256] = gf128mul_dat(xda_bbe);
+static const u16 gf128mul_table_le[256] = gf128mul_dat(xda_le);
+static const u16 gf128mul_table_be[256] = gf128mul_dat(xda_be);
-/* These functions multiply a field element by x, by x^4 and by x^8
- * in the polynomial field representation. It uses 32-bit word operations
- * to gain speed but compensates for machine endianess and hence works
+/*
+ * The following functions multiply a field element by x or by x^8 in
+ * the polynomial field representation. They use 64-bit word operations
+ * to gain speed but compensate for machine endianness and hence work
* correctly on both styles of machine.
*/
@@ -126,7 +141,7 @@ static void gf128mul_x_lle(be128 *r, const be128 *x)
{
u64 a = be64_to_cpu(x->a);
u64 b = be64_to_cpu(x->b);
- u64 _tt = gf128mul_table_lle[(b << 7) & 0xff];
+ u64 _tt = gf128mul_table_le[(b << 7) & 0xff];
r->b = cpu_to_be64((b >> 1) | (a << 63));
r->a = cpu_to_be64((a >> 1) ^ (_tt << 48));
@@ -136,7 +151,7 @@ static void gf128mul_x_bbe(be128 *r, const be128 *x)
{
u64 a = be64_to_cpu(x->a);
u64 b = be64_to_cpu(x->b);
- u64 _tt = gf128mul_table_bbe[a >> 63];
+ u64 _tt = gf128mul_table_be[a >> 63];
r->a = cpu_to_be64((a << 1) | (b >> 63));
r->b = cpu_to_be64((b << 1) ^ _tt);
@@ -146,7 +161,7 @@ void gf128mul_x_ble(be128 *r, const be128 *x)
{
u64 a = le64_to_cpu(x->a);
u64 b = le64_to_cpu(x->b);
- u64 _tt = gf128mul_table_bbe[b >> 63];
+ u64 _tt = gf128mul_table_be[b >> 63];
r->a = cpu_to_le64((a << 1) ^ _tt);
r->b = cpu_to_le64((b << 1) | (a >> 63));
@@ -157,7 +172,7 @@ static void gf128mul_x8_lle(be128 *x)
{
u64 a = be64_to_cpu(x->a);
u64 b = be64_to_cpu(x->b);
- u64 _tt = gf128mul_table_lle[b & 0xff];
+ u64 _tt = gf128mul_table_le[b & 0xff];
x->b = cpu_to_be64((b >> 8) | (a << 56));
x->a = cpu_to_be64((a >> 8) ^ (_tt << 48));
@@ -167,12 +182,22 @@ static void gf128mul_x8_bbe(be128 *x)
{
u64 a = be64_to_cpu(x->a);
u64 b = be64_to_cpu(x->b);
- u64 _tt = gf128mul_table_bbe[a >> 56];
+ u64 _tt = gf128mul_table_be[a >> 56];
x->a = cpu_to_be64((a << 8) | (b >> 56));
x->b = cpu_to_be64((b << 8) ^ _tt);
}
+static void gf128mul_x8_ble(be128 *x)
+{
+ u64 a = le64_to_cpu(x->b);
+ u64 b = le64_to_cpu(x->a);
+ u64 _tt = gf128mul_table_be[a >> 56];
+
+ x->b = cpu_to_le64((a << 8) | (b >> 56));
+ x->a = cpu_to_le64((b << 8) ^ _tt);
+}
+
void gf128mul_lle(be128 *r, const be128 *b)
{
be128 p[8];
@@ -249,9 +274,48 @@ void gf128mul_bbe(be128 *r, const be128 *b)
}
EXPORT_SYMBOL(gf128mul_bbe);
+void gf128mul_ble(be128 *r, const be128 *b)
+{
+ be128 p[8];
+ int i;
+
+ p[0] = *r;
+ for (i = 0; i < 7; ++i)
+ gf128mul_x_ble((be128 *)&p[i + 1], (be128 *)&p[i]);
+
+ memset(r, 0, sizeof(*r));
+ for (i = 0;;) {
+ u8 ch = ((u8 *)b)[15 - i];
+
+ if (ch & 0x80)
+ be128_xor(r, r, &p[7]);
+ if (ch & 0x40)
+ be128_xor(r, r, &p[6]);
+ if (ch & 0x20)
+ be128_xor(r, r, &p[5]);
+ if (ch & 0x10)
+ be128_xor(r, r, &p[4]);
+ if (ch & 0x08)
+ be128_xor(r, r, &p[3]);
+ if (ch & 0x04)
+ be128_xor(r, r, &p[2]);
+ if (ch & 0x02)
+ be128_xor(r, r, &p[1]);
+ if (ch & 0x01)
+ be128_xor(r, r, &p[0]);
+
+ if (++i >= 16)
+ break;
+
+ gf128mul_x8_ble(r);
+ }
+}
+EXPORT_SYMBOL(gf128mul_ble);
+
+
/* This version uses 64k bytes of table space.
A 16 byte buffer has to be multiplied by a 16 byte key
- value in GF(128). If we consider a GF(128) value in
+ value in GF(2^128). If we consider a GF(2^128) value in
the buffer's lowest byte, we can construct a table of
the 256 16 byte values that result from the 256 values
of this byte. This requires 4096 bytes. But we also
@@ -352,8 +416,8 @@ void gf128mul_free_64k(struct gf128mul_64k *t)
int i;
for (i = 0; i < 16; i++)
- kfree(t->t[i]);
- kfree(t);
+ kzfree(t->t[i]);
+ kzfree(t);
}
EXPORT_SYMBOL(gf128mul_free_64k);
@@ -385,7 +449,7 @@ EXPORT_SYMBOL(gf128mul_64k_bbe);
/* This version uses 4k bytes of table space.
A 16 byte buffer has to be multiplied by a 16 byte key
- value in GF(128). If we consider a GF(128) value in a
+ value in GF(2^128). If we consider a GF(2^128) value in a
single byte, we can construct a table of the 256 16 byte
values that result from the 256 values of this byte.
This requires 4096 bytes. If we take the highest byte in
@@ -443,6 +507,28 @@ out:
}
EXPORT_SYMBOL(gf128mul_init_4k_bbe);
+struct gf128mul_4k *gf128mul_init_4k_ble(const be128 *g)
+{
+ struct gf128mul_4k *t;
+ int j, k;
+
+ t = kzalloc(sizeof(*t), GFP_KERNEL);
+ if (!t)
+ goto out;
+
+ t->t[1] = *g;
+ for (j = 1; j <= 64; j <<= 1)
+ gf128mul_x_ble(&t->t[j + j], &t->t[j]);
+
+ for (j = 2; j < 256; j += j)
+ for (k = 1; k < j; ++k)
+ be128_xor(&t->t[j + k], &t->t[j], &t->t[k]);
+
+out:
+ return t;
+}
+EXPORT_SYMBOL(gf128mul_init_4k_ble);
+
void gf128mul_4k_lle(be128 *a, struct gf128mul_4k *t)
{
u8 *ap = (u8 *)a;
@@ -473,5 +559,20 @@ void gf128mul_4k_bbe(be128 *a, struct gf128mul_4k *t)
}
EXPORT_SYMBOL(gf128mul_4k_bbe);
+void gf128mul_4k_ble(be128 *a, struct gf128mul_4k *t)
+{
+ u8 *ap = (u8 *)a;
+ be128 r[1];
+ int i = 15;
+
+ *r = t->t[ap[15]];
+ while (i--) {
+ gf128mul_x8_ble(r);
+ be128_xor(r, r, &t->t[ap[i]]);
+ }
+ *a = *r;
+}
+EXPORT_SYMBOL(gf128mul_4k_ble);
+
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Functions for multiplying elements of GF(2^128)");