crypto: twofish - add AVX2/x86_64 assembler implementation of twofish cipher
Patch adds AVX2/x86-64 implementation of Twofish cipher, requiring 16 parallel
blocks for input (256 bytes). Table look-ups are performed using vpgatherdd
instruction directly from vector registers and thus should be faster than
earlier implementations. Implementation also uses 256-bit wide YMM registers,
which should give additional speed up compared to the AVX implementation.
Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index f3effb42..fea7841 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -1651,6 +1651,9 @@
.alg = "__cbc-twofish-avx",
.test = alg_test_null,
}, {
+ .alg = "__cbc-twofish-avx2",
+ .test = alg_test_null,
+ }, {
.alg = "__driver-cbc-aes-aesni",
.test = alg_test_null,
.fips_allowed = 1,
@@ -1676,6 +1679,9 @@
.alg = "__driver-cbc-twofish-avx",
.test = alg_test_null,
}, {
+ .alg = "__driver-cbc-twofish-avx2",
+ .test = alg_test_null,
+ }, {
.alg = "__driver-ecb-aes-aesni",
.test = alg_test_null,
.fips_allowed = 1,
@@ -1701,6 +1707,9 @@
.alg = "__driver-ecb-twofish-avx",
.test = alg_test_null,
}, {
+ .alg = "__driver-ecb-twofish-avx2",
+ .test = alg_test_null,
+ }, {
.alg = "__ghash-pclmulqdqni",
.test = alg_test_null,
.fips_allowed = 1,
@@ -1985,6 +1994,9 @@
.alg = "cryptd(__driver-ecb-twofish-avx)",
.test = alg_test_null,
}, {
+ .alg = "cryptd(__driver-ecb-twofish-avx2)",
+ .test = alg_test_null,
+ }, {
.alg = "cryptd(__driver-gcm-aes-aesni)",
.test = alg_test_null,
.fips_allowed = 1,