Commit 3e1a29b3 authored by Linus Torvalds's avatar Linus Torvalds

Merge branch 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6

Pull crypto updates from Herbert Xu:
 "API:

   - Decryption test vectors are now automatically generated from
     encryption test vectors.

  Algorithms:

   - Fix unaligned access issues in crc32/crc32c.

   - Add zstd compression algorithm.

   - Add AEGIS.

   - Add MORUS.

  Drivers:

   - Add accelerated AEGIS/MORUS on x86.

   - Add accelerated SM4 on arm64.

   - Removed x86 assembly salsa implementation as it is slower than C.

   - Add authenc(hmac(sha*), cbc(aes)) support in inside-secure.

   - Add ctr(aes) support in crypto4xx.

   - Add hardware key support in ccree.

   - Add support for new Centaur CPU in via-rng"

* 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (112 commits)
  crypto: chtls - free beyond end rspq_skb_cache
  crypto: chtls - kbuild warnings
  crypto: chtls - dereference null variable
  crypto: chtls - wait for memory sendmsg, sendpage
  crypto: chtls - key len correction
  crypto: salsa20 - Revert "crypto: salsa20 - export generic helpers"
  crypto: x86/salsa20 - remove x86 salsa20 implementations
  crypto: ccp - Add GET_ID SEV command
  crypto: ccp - Add DOWNLOAD_FIRMWARE SEV command
  crypto: qat - Add MODULE_FIRMWARE for all qat drivers
  crypto: ccree - silence debug prints
  crypto: ccree - better clock handling
  crypto: ccree - correct host regs offset
  crypto: chelsio - Remove separate buffer used for DMA map B0 block in CCM
  crypt: chelsio - Send IV as Immediate for cipher algo
  crypto: chelsio - Return -ENOSPC for transient busy indication.
  crypto: caam/qi - fix warning in init_cgr()
  crypto: caam - fix rfc4543 descriptors
  crypto: caam - fix MC firmware detection
  crypto: clarify licensing of OpenSSL asm code
  ...
parents fd59ccc5 b268b350
#define __ARM_ARCH__ __LINUX_ARM_ARCH__
@ SPDX-License-Identifier: GPL-2.0
@ This code is taken from the OpenSSL project but the author (Andy Polyakov)
@ has relicensed it under the GPLv2. Therefore this program is free software;
@ you can redistribute it and/or modify it under the terms of the GNU General
@ Public License version 2 as published by the Free Software Foundation.
@
@ The original headers, including the original license headers, are
@ included below for completeness.
@ ====================================================================
@ Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
@ project. The module is, however, dual licensed under OpenSSL and
......
#!/usr/bin/env perl
# SPDX-License-Identifier: GPL-2.0
# This code is taken from the OpenSSL project but the author (Andy Polyakov)
# has relicensed it under the GPLv2. Therefore this program is free software;
# you can redistribute it and/or modify it under the terms of the GNU General
# Public License version 2 as published by the Free Software Foundation.
#
# The original headers, including the original license headers, are
# included below for completeness.
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
# project. The module is, however, dual licensed under OpenSSL and
# CRYPTOGAMS licenses depending on where you obtain it. For further
# details see http://www.openssl.org/~appro/cryptogams/.
#
# Permission to use under GPL terms is granted.
# ====================================================================
# SHA256 block procedure for ARMv4. May 2007.
......
@ SPDX-License-Identifier: GPL-2.0
@ This code is taken from the OpenSSL project but the author (Andy Polyakov)
@ has relicensed it under the GPLv2. Therefore this program is free software;
@ you can redistribute it and/or modify it under the terms of the GNU General
@ Public License version 2 as published by the Free Software Foundation.
@
@ The original headers, including the original license headers, are
@ included below for completeness.
@ ====================================================================
@ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
@ project. The module is, however, dual licensed under OpenSSL and
@ CRYPTOGAMS licenses depending on where you obtain it. For further
@ details see http://www.openssl.org/~appro/cryptogams/.
@
@ Permission to use under GPL terms is granted.
@ ====================================================================
@ SHA256 block procedure for ARMv4. May 2007.
......
#!/usr/bin/env perl
# SPDX-License-Identifier: GPL-2.0
# This code is taken from the OpenSSL project but the author (Andy Polyakov)
# has relicensed it under the GPLv2. Therefore this program is free software;
# you can redistribute it and/or modify it under the terms of the GNU General
# Public License version 2 as published by the Free Software Foundation.
#
# The original headers, including the original license headers, are
# included below for completeness.
# ====================================================================
# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
# project. The module is, however, dual licensed under OpenSSL and
# CRYPTOGAMS licenses depending on where you obtain it. For further
# details see http://www.openssl.org/~appro/cryptogams/.
#
# Permission to use under GPL terms is granted.
# ====================================================================
# SHA512 block procedure for ARMv4. September 2007.
......
@ SPDX-License-Identifier: GPL-2.0
@ This code is taken from the OpenSSL project but the author (Andy Polyakov)
@ has relicensed it under the GPLv2. Therefore this program is free software;
@ you can redistribute it and/or modify it under the terms of the GNU General
@ Public License version 2 as published by the Free Software Foundation.
@
@ The original headers, including the original license headers, are
@ included below for completeness.
@ ====================================================================
@ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
@ project. The module is, however, dual licensed under OpenSSL and
@ CRYPTOGAMS licenses depending on where you obtain it. For further
@ details see http://www.openssl.org/~appro/cryptogams/.
@
@ Permission to use under GPL terms is granted.
@ ====================================================================
@ SHA512 block procedure for ARMv4. September 2007.
......
......@@ -47,6 +47,12 @@ config CRYPTO_SM3_ARM64_CE
select CRYPTO_HASH
select CRYPTO_SM3
config CRYPTO_SM4_ARM64_CE
tristate "SM4 symmetric cipher (ARMv8.2 Crypto Extensions)"
depends on KERNEL_MODE_NEON
select CRYPTO_ALGAPI
select CRYPTO_SM4
config CRYPTO_GHASH_ARM64_CE
tristate "GHASH/AES-GCM using ARMv8 Crypto Extensions"
depends on KERNEL_MODE_NEON
......
......@@ -23,6 +23,9 @@ sha3-ce-y := sha3-ce-glue.o sha3-ce-core.o
obj-$(CONFIG_CRYPTO_SM3_ARM64_CE) += sm3-ce.o
sm3-ce-y := sm3-ce-glue.o sm3-ce-core.o
obj-$(CONFIG_CRYPTO_SM4_ARM64_CE) += sm4-ce.o
sm4-ce-y := sm4-ce-glue.o sm4-ce-core.o
obj-$(CONFIG_CRYPTO_GHASH_ARM64_CE) += ghash-ce.o
ghash-ce-y := ghash-ce-glue.o ghash-ce-core.o
......
......@@ -19,24 +19,33 @@
* u32 *macp, u8 const rk[], u32 rounds);
*/
ENTRY(ce_aes_ccm_auth_data)
ldr w8, [x3] /* leftover from prev round? */
frame_push 7
mov x19, x0
mov x20, x1
mov x21, x2
mov x22, x3
mov x23, x4
mov x24, x5
ldr w25, [x22] /* leftover from prev round? */
ld1 {v0.16b}, [x0] /* load mac */
cbz w8, 1f
sub w8, w8, #16
cbz w25, 1f
sub w25, w25, #16
eor v1.16b, v1.16b, v1.16b
0: ldrb w7, [x1], #1 /* get 1 byte of input */
subs w2, w2, #1
add w8, w8, #1
0: ldrb w7, [x20], #1 /* get 1 byte of input */
subs w21, w21, #1
add w25, w25, #1
ins v1.b[0], w7
ext v1.16b, v1.16b, v1.16b, #1 /* rotate in the input bytes */
beq 8f /* out of input? */
cbnz w8, 0b
cbnz w25, 0b
eor v0.16b, v0.16b, v1.16b
1: ld1 {v3.4s}, [x4] /* load first round key */
prfm pldl1strm, [x1]
cmp w5, #12 /* which key size? */
add x6, x4, #16
sub w7, w5, #2 /* modified # of rounds */
1: ld1 {v3.4s}, [x23] /* load first round key */
prfm pldl1strm, [x20]
cmp w24, #12 /* which key size? */
add x6, x23, #16
sub w7, w24, #2 /* modified # of rounds */
bmi 2f
bne 5f
mov v5.16b, v3.16b
......@@ -55,33 +64,43 @@ ENTRY(ce_aes_ccm_auth_data)
ld1 {v5.4s}, [x6], #16 /* load next round key */
bpl 3b
aese v0.16b, v4.16b
subs w2, w2, #16 /* last data? */
subs w21, w21, #16 /* last data? */
eor v0.16b, v0.16b, v5.16b /* final round */
bmi 6f
ld1 {v1.16b}, [x1], #16 /* load next input block */
ld1 {v1.16b}, [x20], #16 /* load next input block */
eor v0.16b, v0.16b, v1.16b /* xor with mac */
bne 1b
6: st1 {v0.16b}, [x0] /* store mac */
beq 6f
if_will_cond_yield_neon
st1 {v0.16b}, [x19] /* store mac */
do_cond_yield_neon
ld1 {v0.16b}, [x19] /* reload mac */
endif_yield_neon
b 1b
6: st1 {v0.16b}, [x19] /* store mac */
beq 10f
adds w2, w2, #16
adds w21, w21, #16
beq 10f
mov w8, w2
7: ldrb w7, [x1], #1
mov w25, w21
7: ldrb w7, [x20], #1
umov w6, v0.b[0]
eor w6, w6, w7
strb w6, [x0], #1
subs w2, w2, #1
strb w6, [x19], #1
subs w21, w21, #1
beq 10f
ext v0.16b, v0.16b, v0.16b, #1 /* rotate out the mac bytes */
b 7b
8: mov w7, w8
add w8, w8, #16
8: mov w7, w25
add w25, w25, #16
9: ext v1.16b, v1.16b, v1.16b, #1
adds w7, w7, #1
bne 9b
eor v0.16b, v0.16b, v1.16b
st1 {v0.16b}, [x0]
10: str w8, [x3]
st1 {v0.16b}, [x19]
10: str w25, [x22]
frame_pop
ret
ENDPROC(ce_aes_ccm_auth_data)
......@@ -126,19 +145,29 @@ ENTRY(ce_aes_ccm_final)
ENDPROC(ce_aes_ccm_final)
.macro aes_ccm_do_crypt,enc
ldr x8, [x6, #8] /* load lower ctr */
ld1 {v0.16b}, [x5] /* load mac */
CPU_LE( rev x8, x8 ) /* keep swabbed ctr in reg */
frame_push 8
mov x19, x0
mov x20, x1
mov x21, x2
mov x22, x3
mov x23, x4
mov x24, x5
mov x25, x6
ldr x26, [x25, #8] /* load lower ctr */
ld1 {v0.16b}, [x24] /* load mac */
CPU_LE( rev x26, x26 ) /* keep swabbed ctr in reg */
0: /* outer loop */
ld1 {v1.8b}, [x6] /* load upper ctr */
prfm pldl1strm, [x1]
add x8, x8, #1
rev x9, x8
cmp w4, #12 /* which key size? */
sub w7, w4, #2 /* get modified # of rounds */
ld1 {v1.8b}, [x25] /* load upper ctr */
prfm pldl1strm, [x20]
add x26, x26, #1
rev x9, x26
cmp w23, #12 /* which key size? */
sub w7, w23, #2 /* get modified # of rounds */
ins v1.d[1], x9 /* no carry in lower ctr */
ld1 {v3.4s}, [x3] /* load first round key */
add x10, x3, #16
ld1 {v3.4s}, [x22] /* load first round key */
add x10, x22, #16
bmi 1f
bne 4f
mov v5.16b, v3.16b
......@@ -165,9 +194,9 @@ CPU_LE( rev x8, x8 ) /* keep swabbed ctr in reg */
bpl 2b
aese v0.16b, v4.16b
aese v1.16b, v4.16b
subs w2, w2, #16
bmi 6f /* partial block? */
ld1 {v2.16b}, [x1], #16 /* load next input block */
subs w21, w21, #16
bmi 7f /* partial block? */
ld1 {v2.16b}, [x20], #16 /* load next input block */
.if \enc == 1
eor v2.16b, v2.16b, v5.16b /* final round enc+mac */
eor v1.16b, v1.16b, v2.16b /* xor with crypted ctr */
......@@ -176,18 +205,29 @@ CPU_LE( rev x8, x8 ) /* keep swabbed ctr in reg */
eor v1.16b, v2.16b, v5.16b /* final round enc */
.endif
eor v0.16b, v0.16b, v2.16b /* xor mac with pt ^ rk[last] */
st1 {v1.16b}, [x0], #16 /* write output block */
bne 0b
CPU_LE( rev x8, x8 )
st1 {v0.16b}, [x5] /* store mac */
str x8, [x6, #8] /* store lsb end of ctr (BE) */
5: ret
6: eor v0.16b, v0.16b, v5.16b /* final round mac */
st1 {v1.16b}, [x19], #16 /* write output block */
beq 5f
if_will_cond_yield_neon
st1 {v0.16b}, [x24] /* store mac */
do_cond_yield_neon
ld1 {v0.16b}, [x24] /* reload mac */
endif_yield_neon
b 0b
5:
CPU_LE( rev x26, x26 )
st1 {v0.16b}, [x24] /* store mac */
str x26, [x25, #8] /* store lsb end of ctr (BE) */
6: frame_pop
ret
7: eor v0.16b, v0.16b, v5.16b /* final round mac */
eor v1.16b, v1.16b, v5.16b /* final round enc */
st1 {v0.16b}, [x5] /* store mac */
add w2, w2, #16 /* process partial tail block */
7: ldrb w9, [x1], #1 /* get 1 byte of input */
st1 {v0.16b}, [x24] /* store mac */
add w21, w21, #16 /* process partial tail block */
8: ldrb w9, [x20], #1 /* get 1 byte of input */
umov w6, v1.b[0] /* get top crypted ctr byte */
umov w7, v0.b[0] /* get top mac byte */
.if \enc == 1
......@@ -197,13 +237,13 @@ CPU_LE( rev x8, x8 )
eor w9, w9, w6
eor w7, w7, w9
.endif
strb w9, [x0], #1 /* store out byte */
strb w7, [x5], #1 /* store mac byte */
subs w2, w2, #1
beq 5b
strb w9, [x19], #1 /* store out byte */
strb w7, [x24], #1 /* store mac byte */
subs w21, w21, #1
beq 6b
ext v0.16b, v0.16b, v0.16b, #1 /* shift out mac byte */
ext v1.16b, v1.16b, v1.16b, #1 /* shift out ctr byte */
b 7b
b 8b
.endm
/*
......
......@@ -30,18 +30,21 @@
.endm
/* prepare for encryption with key in rk[] */
.macro enc_prepare, rounds, rk, ignore
load_round_keys \rounds, \rk
.macro enc_prepare, rounds, rk, temp
mov \temp, \rk
load_round_keys \rounds, \temp
.endm
/* prepare for encryption (again) but with new key in rk[] */
.macro enc_switch_key, rounds, rk, ignore
load_round_keys \rounds, \rk
.macro enc_switch_key, rounds, rk, temp
mov \temp, \rk
load_round_keys \rounds, \temp
.endm
/* prepare for decryption with key in rk[] */
.macro dec_prepare, rounds, rk, ignore
load_round_keys \rounds, \rk
.macro dec_prepare, rounds, rk, temp
mov \temp, \rk
load_round_keys \rounds, \temp
.endm
.macro do_enc_Nx, de, mc, k, i0, i1, i2, i3
......
This diff is collapsed.
This diff is collapsed.
......@@ -100,9 +100,10 @@
dCONSTANT .req d0
qCONSTANT .req q0
BUF .req x0
LEN .req x1
CRC .req x2
BUF .req x19
LEN .req x20
CRC .req x21
CONST .req x22
vzr .req v9
......@@ -123,7 +124,14 @@ ENTRY(crc32_pmull_le)
ENTRY(crc32c_pmull_le)
adr_l x3, .Lcrc32c_constants
0: bic LEN, LEN, #15
0: frame_push 4, 64
mov BUF, x0
mov LEN, x1
mov CRC, x2
mov CONST, x3
bic LEN, LEN, #15
ld1 {v1.16b-v4.16b}, [BUF], #0x40
movi vzr.16b, #0
fmov dCONSTANT, CRC
......@@ -132,7 +140,7 @@ ENTRY(crc32c_pmull_le)
cmp LEN, #0x40
b.lt less_64
ldr qCONSTANT, [x3]
ldr qCONSTANT, [CONST]
loop_64: /* 64 bytes Full cache line folding */
sub LEN, LEN, #0x40
......@@ -162,10 +170,21 @@ loop_64: /* 64 bytes Full cache line folding */
eor v4.16b, v4.16b, v8.16b
cmp LEN, #0x40
b.ge loop_64
b.lt less_64
if_will_cond_yield_neon
stp q1, q2, [sp, #.Lframe_local_offset]
stp q3, q4, [sp, #.Lframe_local_offset + 32]
do_cond_yield_neon
ldp q1, q2, [sp, #.Lframe_local_offset]
ldp q3, q4, [sp, #.Lframe_local_offset + 32]
ldr qCONSTANT, [CONST]
movi vzr.16b, #0
endif_yield_neon
b loop_64
less_64: /* Folding cache line into 128bit */
ldr qCONSTANT, [x3, #16]
ldr qCONSTANT, [CONST, #16]
pmull2 v5.1q, v1.2d, vCONSTANT.2d
pmull v1.1q, v1.1d, vCONSTANT.1d
......@@ -204,8 +223,8 @@ fold_64:
eor v1.16b, v1.16b, v2.16b
/* final 32-bit fold */
ldr dCONSTANT, [x3, #32]
ldr d3, [x3, #40]
ldr dCONSTANT, [CONST, #32]
ldr d3, [CONST, #40]
ext v2.16b, v1.16b, vzr.16b, #4
and v1.16b, v1.16b, v3.16b
......@@ -213,7 +232,7 @@ fold_64:
eor v1.16b, v1.16b, v2.16b
/* Finish up with the bit-reversed barrett reduction 64 ==> 32 bits */
ldr qCONSTANT, [x3, #48]
ldr qCONSTANT, [CONST, #48]
and v2.16b, v1.16b, v3.16b
ext v2.16b, vzr.16b, v2.16b, #8
......@@ -223,6 +242,7 @@ fold_64:
eor v1.16b, v1.16b, v2.16b
mov w0, v1.s[1]
frame_pop
ret
ENDPROC(crc32_pmull_le)
ENDPROC(crc32c_pmull_le)
......
......@@ -74,13 +74,19 @@
.text
.cpu generic+crypto
arg1_low32 .req w0
arg2 .req x1
arg3 .req x2
arg1_low32 .req w19
arg2 .req x20
arg3 .req x21
vzr .req v13
ENTRY(crc_t10dif_pmull)
frame_push 3, 128
mov arg1_low32, w0
mov arg2, x1
mov arg3, x2
movi vzr.16b, #0 // init zero register
// adjust the 16-bit initial_crc value, scale it to 32 bits
......@@ -175,8 +181,25 @@ CPU_LE( ext v12.16b, v12.16b, v12.16b, #8 )
subs arg3, arg3, #128
// check if there is another 64B in the buffer to be able to fold
b.ge _fold_64_B_loop
b.lt _fold_64_B_end
if_will_cond_yield_neon
stp q0, q1, [sp, #.Lframe_local_offset]
stp q2, q3, [sp, #.Lframe_local_offset + 32]
stp q4, q5, [sp, #.Lframe_local_offset + 64]
stp q6, q7, [sp, #.Lframe_local_offset + 96]
do_cond_yield_neon
ldp q0, q1, [sp, #.Lframe_local_offset]
ldp q2, q3, [sp, #.Lframe_local_offset + 32]
ldp q4, q5, [sp, #.Lframe_local_offset + 64]
ldp q6, q7, [sp, #.Lframe_local_offset + 96]
ldr_l q10, rk3, x8
movi vzr.16b, #0 // init zero register
endif_yield_neon
b _fold_64_B_loop
_fold_64_B_end:
// at this point, the buffer pointer is pointing at the last y Bytes
// of the buffer the 64B of folded data is in 4 of the vector
// registers: v0, v1, v2, v3
......@@ -304,6 +327,7 @@ _barrett:
_cleanup:
// scale the result back to 16 bits
lsr x0, x0, #16
frame_pop
ret
_less_than_128:
......
......@@ -213,22 +213,31 @@
.endm
.macro __pmull_ghash, pn
ld1 {SHASH.2d}, [x3]
ld1 {XL.2d}, [x1]
frame_push 5
mov x19, x0
mov x20, x1
mov x21, x2
mov x22, x3
mov x23, x4
0: ld1 {SHASH.2d}, [x22]
ld1 {XL.2d}, [x20]
ext SHASH2.16b, SHASH.16b, SHASH.16b, #8
eor SHASH2.16b, SHASH2.16b, SHASH.16b
__pmull_pre_\pn
/* do the head block first, if supplied */
cbz x4, 0f
ld1 {T1.2d}, [x4]
b 1f
cbz x23, 1f
ld1 {T1.2d}, [x23]
mov x23, xzr
b 2f
0: ld1 {T1.2d}, [x2], #16
sub w0, w0, #1
1: ld1 {T1.2d}, [x21], #16
sub w19, w19, #1
1: /* multiply XL by SHASH in GF(2^128) */
2: /* multiply XL by SHASH in GF(2^128) */
CPU_LE( rev64 T1.16b, T1.16b )
ext T2.16b, XL.16b, XL.16b, #8
......@@ -250,9 +259,18 @@ CPU_LE( rev64 T1.16b, T1.16b )
eor T2.16b, T2.16b, XH.16b
eor XL.16b, XL.16b, T2.16b
cbnz w0, 0b
cbz w19, 3f
if_will_cond_yield_neon
st1 {XL.2d}, [x20]
do_cond_yield_neon
b 0b
endif_yield_neon
b 1b
st1 {XL.2d}, [x1]
3: st1 {XL.2d}, [x20]
frame_pop
ret
.endm
......@@ -304,38 +322,55 @@ ENDPROC(pmull_ghash_update_p8)
.endm
.macro pmull_gcm_do_crypt, enc
ld1 {SHASH.2d}, [x4]
ld1 {XL.2d}, [x1]
ldr x8, [x5, #8] // load lower counter
frame_push 10
mov x19, x0
mov x20, x1
mov x21, x2
mov x22, x3
mov x23, x4
mov x24, x5
mov x25, x6
mov x26, x7
.if \enc == 1
ldr x27, [sp, #96] // first stacked arg
.endif
ldr x28, [x24, #8] // load lower counter
CPU_LE( rev x28, x28 )
0: mov x0, x25
load_round_keys w26, x0
ld1 {SHASH.2d}, [x23]
ld1 {XL.2d}, [x20]
movi MASK.16b, #0xe1
ext SHASH2.16b, SHASH.16b, SHASH.16b, #8
CPU_LE( rev x8, x8 )
shl MASK.2d, MASK.2d, #57
eor SHASH2.16b, SHASH2.16b, SHASH.16b
.if \enc == 1
ld1 {KS.16b}, [x7]
ld1 {KS.16b}, [x27]
.endif
0: ld1 {CTR.8b}, [x5] // load upper counter
ld1 {INP.16b}, [x3], #16
rev x9, x8
add x8, x8, #1
sub w0, w0, #1
1: ld1 {CTR.8b}, [x24] // load upper counter
ld1 {INP.16b}, [x22], #16
rev x9, x28
add x28, x28, #1
sub w19, w19, #1
ins CTR.d[1], x9 // set lower counter
.if \enc == 1
eor INP.16b, INP.16b, KS.16b // encrypt input
st1 {INP.16b}, [x2], #16
st1 {INP.16b}, [x21], #16
.endif
rev64 T1.16b, INP.16b