183 lines
6.4 KiB
Diff
183 lines
6.4 KiB
Diff
From 80835d048cb2a241605beb49d17bf129ab2f5ae5 Mon Sep 17 00:00:00 2001
|
|
From: Xu Yizhou <xuyizhou1@huawei.com>
|
|
Date: Mon, 15 May 2023 11:41:59 +0800
|
|
Subject: [PATCH] Fix SM4-XTS build failure using clang
|
|
|
|
The OpenSSL community also has similar issues, and the corresponding
|
|
solutions can be found in this [PR]
|
|
(https://github.com/openssl/openssl/pull/20202). Moreover, the
|
|
community has added restrictions in the arm-xlate.pl file to recognize
|
|
the 'LDR REG, =VALUE' pseudo instruction on Neon, as shown in this [PR]
|
|
(https://github.com/openssl/openssl/pull/20222).
|
|
|
|
Signed-off-by: Xu Yizhou <xuyizhou1@huawei.com>
|
|
---
|
|
crypto/perlasm/arm-xlate.pl | 10 ++++++++++
|
|
crypto/sm4/asm/sm4-armv8.pl | 12 ++++++-----
|
|
crypto/sm4/asm/vpsm4_ex-armv8.pl | 34 ++++++++++++++++++++------------
|
|
3 files changed, 38 insertions(+), 18 deletions(-)
|
|
|
|
diff --git a/crypto/perlasm/arm-xlate.pl b/crypto/perlasm/arm-xlate.pl
|
|
index 48819be..a2f3838 100755
|
|
--- a/crypto/perlasm/arm-xlate.pl
|
|
+++ b/crypto/perlasm/arm-xlate.pl
|
|
@@ -170,6 +170,16 @@ while(my $line=<>) {
|
|
}
|
|
}
|
|
|
|
+ # ldr REG, #VALUE psuedo-instruction - avoid clang issue with Neon registers
|
|
+ #
|
|
+ if ($line =~ /^\s*ldr\s+([qd]\d\d?)\s*,\s*=(\w+)/i) {
|
|
+ # Immediate load via literal pool into qN or DN - clang max is 2^32-1
|
|
+ my ($reg, $value) = ($1, $2);
|
|
+ # If $value is hex, 0x + 8 hex chars = 10 chars total will be okay
|
|
+ # If $value is decimal, 2^32 - 1 = 4294967295 will be okay (also 10 chars)
|
|
+ die("$line: immediate load via literal pool into $reg: value too large for clang - redo manually") if length($value) > 10;
|
|
+ }
|
|
+
|
|
print $line if ($line);
|
|
print "\n";
|
|
}
|
|
diff --git a/crypto/sm4/asm/sm4-armv8.pl b/crypto/sm4/asm/sm4-armv8.pl
|
|
index 923c1c0..07ba53a 100644
|
|
--- a/crypto/sm4/asm/sm4-armv8.pl
|
|
+++ b/crypto/sm4/asm/sm4-armv8.pl
|
|
@@ -244,6 +244,8 @@ $code.=<<___;
|
|
.long 0x10171E25, 0x2C333A41, 0x484F565D, 0x646B7279
|
|
.Lfk:
|
|
.long 0xa3b1bac6, 0x56aa3350, 0x677d9197, 0xb27022dc
|
|
+.Lxts_magic:
|
|
+ .dword 0x0101010101010187,0x0101010101010101
|
|
___
|
|
}}}
|
|
|
|
@@ -604,7 +606,7 @@ $code.=<<___;
|
|
.globl ${prefix}_ctr32_encrypt_blocks
|
|
.type ${prefix}_ctr32_encrypt_blocks,%function
|
|
.align 5
|
|
-${prefix}_ctr32_encrypt_blocks:
|
|
+${prefix}_ctr32_encrypt_blocks:
|
|
stp d8,d9,[sp, #-16]!
|
|
|
|
ld1 {$ivec.4s},[$ivp]
|
|
@@ -736,7 +738,7 @@ $code.=<<___;
|
|
.align 5
|
|
${prefix}_xts_do_cipher${standard}:
|
|
mov w$magic,0x87
|
|
- ldr $qMagic, =0x01010101010101010101010101010187
|
|
+ ldr $qMagic, .Lxts_magic
|
|
// used to encrypt the XORed plaintext blocks
|
|
ld1 {@rks[0].4s,@rks[1].4s,@rks[2].4s,@rks[3].4s},[$rk2],#64
|
|
ld1 {@rks[4].4s,@rks[5].4s,@rks[6].4s,@rks[7].4s},[$rk2]
|
|
@@ -963,7 +965,7 @@ $code.=<<___;
|
|
cmp $remain,0
|
|
b.eq 99f
|
|
|
|
-// This brance calculates the last two tweaks,
|
|
+// This brance calculates the last two tweaks,
|
|
// while the encryption/decryption length is larger than 32
|
|
.last_2blks_tweak${standard}:
|
|
___
|
|
@@ -974,7 +976,7 @@ $code.=<<___;
|
|
b .check_dec${standard}
|
|
|
|
|
|
-// This brance calculates the last two tweaks,
|
|
+// This brance calculates the last two tweaks,
|
|
// while the encryption/decryption length is less than 32, who only need two tweaks
|
|
.only_2blks_tweak${standard}:
|
|
mov @tweak[1].16b,@tweak[0].16b
|
|
@@ -1018,7 +1020,7 @@ $code.=<<___;
|
|
strb w$tmp1,[$lastBlk,$remain]
|
|
strb w$tmp0,[$out,$remain]
|
|
b.gt .loop${standard}
|
|
- ld1 {@dat[0].4s}, [$lastBlk]
|
|
+ ld1 {@dat[0].4s}, [$lastBlk]
|
|
eor @dat[0].16b, @dat[0].16b, @tweak[2].16b
|
|
___
|
|
&rev32(@dat[0],@dat[0]);
|
|
diff --git a/crypto/sm4/asm/vpsm4_ex-armv8.pl b/crypto/sm4/asm/vpsm4_ex-armv8.pl
|
|
index 86a6f89..4fd2975 100644
|
|
--- a/crypto/sm4/asm/vpsm4_ex-armv8.pl
|
|
+++ b/crypto/sm4/asm/vpsm4_ex-armv8.pl
|
|
@@ -108,12 +108,12 @@ ___
|
|
|
|
sub load_sbox_matrix () {
|
|
$code.=<<___;
|
|
- ldr $MaskQ, =0x0306090c0f0205080b0e0104070a0d00
|
|
- ldr $TAHMatQ, =0x22581a6002783a4062185a2042387a00
|
|
- ldr $TALMatQ, =0xc10bb67c4a803df715df62a89e54e923
|
|
- ldr $ATAHMatQ, =0x1407c6d56c7fbeadb9aa6b78c1d21300
|
|
- ldr $ATALMatQ, =0xe383c1a1fe9edcbc6404462679195b3b
|
|
- ldr $ANDMaskQ, =0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f
|
|
+ ldr $MaskQ, .Lsbox_magic
|
|
+ ldr $TAHMatQ, .Lsbox_magic+16
|
|
+ ldr $TALMatQ, .Lsbox_magic+32
|
|
+ ldr $ATAHMatQ, .Lsbox_magic+48
|
|
+ ldr $ATALMatQ, .Lsbox_magic+64
|
|
+ ldr $ANDMaskQ, .Lsbox_magic+80
|
|
___
|
|
}
|
|
# matrix multiplication Mat*x = (lowerMat*x) ^ (higherMat*x)
|
|
@@ -505,7 +505,7 @@ sub compute_tweak_vec() {
|
|
my $des = shift;
|
|
&rbit(@vtmp[2],$src);
|
|
$code.=<<___;
|
|
- ldr @qtmp[0], =0x01010101010101010101010101010187
|
|
+ ldr @qtmp[0], .Lxts_magic
|
|
shl $des.16b, @vtmp[2].16b, #1
|
|
ext @vtmp[1].16b, @vtmp[2].16b, @vtmp[2].16b,#15
|
|
ushr @vtmp[1].16b, @vtmp[1].16b, #7
|
|
@@ -569,10 +569,18 @@ ${prefix}_consts:
|
|
.long 0xA0A7AEB5, 0xBCC3CAD1, 0xD8DFE6ED, 0xF4FB0209
|
|
.long 0x10171E25, 0x2C333A41, 0x484F565D, 0x646B7279
|
|
.Lfk:
|
|
- .long 0xa3b1bac6, 0x56aa3350, 0x677d9197, 0xb27022dc
|
|
+ .long 0xa3b1bac6, 0x56aa3350, 0x677d9197, 0xb27022dc
|
|
.Lshuffles:
|
|
- .long 0x07060504, 0x0B0A0908, 0x0F0E0D0C, 0x03020100
|
|
-
|
|
+ .long 0x07060504, 0x0B0A0908, 0x0F0E0D0C, 0x03020100
|
|
+.Lxts_magic:
|
|
+ .dword 0x0101010101010187,0x0101010101010101
|
|
+.Lsbox_magic:
|
|
+ .dword 0x0b0e0104070a0d00,0x0306090c0f020508
|
|
+ .dword 0x62185a2042387a00,0x22581a6002783a40
|
|
+ .dword 0x15df62a89e54e923,0xc10bb67c4a803df7
|
|
+ .dword 0xb9aa6b78c1d21300,0x1407c6d56c7fbead
|
|
+ .dword 0x6404462679195b3b,0xe383c1a1fe9edcbc
|
|
+ .dword 0x0f0f0f0f0f0f0f0f,0x0f0f0f0f0f0f0f0f
|
|
.size ${prefix}_consts,.-${prefix}_consts
|
|
___
|
|
|
|
@@ -1033,7 +1041,7 @@ $code.=<<___;
|
|
cmp $remain,0
|
|
b.eq .return${standard}
|
|
|
|
-// This brance calculates the last two tweaks,
|
|
+// This brance calculates the last two tweaks,
|
|
// while the encryption/decryption length is larger than 32
|
|
.last_2blks_tweak${standard}:
|
|
___
|
|
@@ -1044,7 +1052,7 @@ $code.=<<___;
|
|
b .check_dec${standard}
|
|
|
|
|
|
-// This brance calculates the last two tweaks,
|
|
+// This brance calculates the last two tweaks,
|
|
// while the encryption/decryption length is equal to 32, who only need two tweaks
|
|
.only_2blks_tweak${standard}:
|
|
mov @tweak[1].16b,@tweak[0].16b
|
|
@@ -1087,7 +1095,7 @@ $code.=<<___;
|
|
strb $wtmp1,[$lastBlk,$remain]
|
|
strb $wtmp0,[$outp,$remain]
|
|
b.gt .loop${standard}
|
|
- ld1 {@data[0].4s}, [$lastBlk]
|
|
+ ld1 {@data[0].4s}, [$lastBlk]
|
|
eor @data[0].16b, @data[0].16b, @tweak[2].16b
|
|
___
|
|
&rev32(@data[0],@data[0]);
|
|
--
|
|
2.36.1
|
|
|