Files
openGauss-third_party/dependency/openssl/Fix-SM4-XTS-build-failure-using-clang.patch
2023-12-26 10:47:01 +08:00

183 lines
6.4 KiB
Diff

From 80835d048cb2a241605beb49d17bf129ab2f5ae5 Mon Sep 17 00:00:00 2001
From: Xu Yizhou <xuyizhou1@huawei.com>
Date: Mon, 15 May 2023 11:41:59 +0800
Subject: [PATCH] Fix SM4-XTS build failure using clang
The OpenSSL community also has similar issues, and the corresponding
solutions can be found in this [PR]
(https://github.com/openssl/openssl/pull/20202). Moreover, the
community has added restrictions in the arm-xlate.pl file to recognize
the 'LDR REG, =VALUE' pseudo instruction on Neon, as shown in this [PR]
(https://github.com/openssl/openssl/pull/20222).
Signed-off-by: Xu Yizhou <xuyizhou1@huawei.com>
---
crypto/perlasm/arm-xlate.pl | 10 ++++++++++
crypto/sm4/asm/sm4-armv8.pl | 12 ++++++-----
crypto/sm4/asm/vpsm4_ex-armv8.pl | 34 ++++++++++++++++++++------------
3 files changed, 38 insertions(+), 18 deletions(-)
diff --git a/crypto/perlasm/arm-xlate.pl b/crypto/perlasm/arm-xlate.pl
index 48819be..a2f3838 100755
--- a/crypto/perlasm/arm-xlate.pl
+++ b/crypto/perlasm/arm-xlate.pl
@@ -170,6 +170,16 @@ while(my $line=<>) {
}
}
+ # ldr REG, #VALUE psuedo-instruction - avoid clang issue with Neon registers
+ #
+ if ($line =~ /^\s*ldr\s+([qd]\d\d?)\s*,\s*=(\w+)/i) {
+ # Immediate load via literal pool into qN or DN - clang max is 2^32-1
+ my ($reg, $value) = ($1, $2);
+ # If $value is hex, 0x + 8 hex chars = 10 chars total will be okay
+ # If $value is decimal, 2^32 - 1 = 4294967295 will be okay (also 10 chars)
+ die("$line: immediate load via literal pool into $reg: value too large for clang - redo manually") if length($value) > 10;
+ }
+
print $line if ($line);
print "\n";
}
diff --git a/crypto/sm4/asm/sm4-armv8.pl b/crypto/sm4/asm/sm4-armv8.pl
index 923c1c0..07ba53a 100644
--- a/crypto/sm4/asm/sm4-armv8.pl
+++ b/crypto/sm4/asm/sm4-armv8.pl
@@ -244,6 +244,8 @@ $code.=<<___;
.long 0x10171E25, 0x2C333A41, 0x484F565D, 0x646B7279
.Lfk:
.long 0xa3b1bac6, 0x56aa3350, 0x677d9197, 0xb27022dc
+.Lxts_magic:
+ .dword 0x0101010101010187,0x0101010101010101
___
}}}
@@ -604,7 +606,7 @@ $code.=<<___;
.globl ${prefix}_ctr32_encrypt_blocks
.type ${prefix}_ctr32_encrypt_blocks,%function
.align 5
-${prefix}_ctr32_encrypt_blocks:
+${prefix}_ctr32_encrypt_blocks:
stp d8,d9,[sp, #-16]!
ld1 {$ivec.4s},[$ivp]
@@ -736,7 +738,7 @@ $code.=<<___;
.align 5
${prefix}_xts_do_cipher${standard}:
mov w$magic,0x87
- ldr $qMagic, =0x01010101010101010101010101010187
+ ldr $qMagic, .Lxts_magic
// used to encrypt the XORed plaintext blocks
ld1 {@rks[0].4s,@rks[1].4s,@rks[2].4s,@rks[3].4s},[$rk2],#64
ld1 {@rks[4].4s,@rks[5].4s,@rks[6].4s,@rks[7].4s},[$rk2]
@@ -963,7 +965,7 @@ $code.=<<___;
cmp $remain,0
b.eq 99f
-// This brance calculates the last two tweaks,
+// This brance calculates the last two tweaks,
// while the encryption/decryption length is larger than 32
.last_2blks_tweak${standard}:
___
@@ -974,7 +976,7 @@ $code.=<<___;
b .check_dec${standard}
-// This brance calculates the last two tweaks,
+// This brance calculates the last two tweaks,
// while the encryption/decryption length is less than 32, who only need two tweaks
.only_2blks_tweak${standard}:
mov @tweak[1].16b,@tweak[0].16b
@@ -1018,7 +1020,7 @@ $code.=<<___;
strb w$tmp1,[$lastBlk,$remain]
strb w$tmp0,[$out,$remain]
b.gt .loop${standard}
- ld1 {@dat[0].4s}, [$lastBlk]
+ ld1 {@dat[0].4s}, [$lastBlk]
eor @dat[0].16b, @dat[0].16b, @tweak[2].16b
___
&rev32(@dat[0],@dat[0]);
diff --git a/crypto/sm4/asm/vpsm4_ex-armv8.pl b/crypto/sm4/asm/vpsm4_ex-armv8.pl
index 86a6f89..4fd2975 100644
--- a/crypto/sm4/asm/vpsm4_ex-armv8.pl
+++ b/crypto/sm4/asm/vpsm4_ex-armv8.pl
@@ -108,12 +108,12 @@ ___
sub load_sbox_matrix () {
$code.=<<___;
- ldr $MaskQ, =0x0306090c0f0205080b0e0104070a0d00
- ldr $TAHMatQ, =0x22581a6002783a4062185a2042387a00
- ldr $TALMatQ, =0xc10bb67c4a803df715df62a89e54e923
- ldr $ATAHMatQ, =0x1407c6d56c7fbeadb9aa6b78c1d21300
- ldr $ATALMatQ, =0xe383c1a1fe9edcbc6404462679195b3b
- ldr $ANDMaskQ, =0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f
+ ldr $MaskQ, .Lsbox_magic
+ ldr $TAHMatQ, .Lsbox_magic+16
+ ldr $TALMatQ, .Lsbox_magic+32
+ ldr $ATAHMatQ, .Lsbox_magic+48
+ ldr $ATALMatQ, .Lsbox_magic+64
+ ldr $ANDMaskQ, .Lsbox_magic+80
___
}
# matrix multiplication Mat*x = (lowerMat*x) ^ (higherMat*x)
@@ -505,7 +505,7 @@ sub compute_tweak_vec() {
my $des = shift;
&rbit(@vtmp[2],$src);
$code.=<<___;
- ldr @qtmp[0], =0x01010101010101010101010101010187
+ ldr @qtmp[0], .Lxts_magic
shl $des.16b, @vtmp[2].16b, #1
ext @vtmp[1].16b, @vtmp[2].16b, @vtmp[2].16b,#15
ushr @vtmp[1].16b, @vtmp[1].16b, #7
@@ -569,10 +569,18 @@ ${prefix}_consts:
.long 0xA0A7AEB5, 0xBCC3CAD1, 0xD8DFE6ED, 0xF4FB0209
.long 0x10171E25, 0x2C333A41, 0x484F565D, 0x646B7279
.Lfk:
- .long 0xa3b1bac6, 0x56aa3350, 0x677d9197, 0xb27022dc
+ .long 0xa3b1bac6, 0x56aa3350, 0x677d9197, 0xb27022dc
.Lshuffles:
- .long 0x07060504, 0x0B0A0908, 0x0F0E0D0C, 0x03020100
-
+ .long 0x07060504, 0x0B0A0908, 0x0F0E0D0C, 0x03020100
+.Lxts_magic:
+ .dword 0x0101010101010187,0x0101010101010101
+.Lsbox_magic:
+ .dword 0x0b0e0104070a0d00,0x0306090c0f020508
+ .dword 0x62185a2042387a00,0x22581a6002783a40
+ .dword 0x15df62a89e54e923,0xc10bb67c4a803df7
+ .dword 0xb9aa6b78c1d21300,0x1407c6d56c7fbead
+ .dword 0x6404462679195b3b,0xe383c1a1fe9edcbc
+ .dword 0x0f0f0f0f0f0f0f0f,0x0f0f0f0f0f0f0f0f
.size ${prefix}_consts,.-${prefix}_consts
___
@@ -1033,7 +1041,7 @@ $code.=<<___;
cmp $remain,0
b.eq .return${standard}
-// This brance calculates the last two tweaks,
+// This brance calculates the last two tweaks,
// while the encryption/decryption length is larger than 32
.last_2blks_tweak${standard}:
___
@@ -1044,7 +1052,7 @@ $code.=<<___;
b .check_dec${standard}
-// This brance calculates the last two tweaks,
+// This brance calculates the last two tweaks,
// while the encryption/decryption length is equal to 32, who only need two tweaks
.only_2blks_tweak${standard}:
mov @tweak[1].16b,@tweak[0].16b
@@ -1087,7 +1095,7 @@ $code.=<<___;
strb $wtmp1,[$lastBlk,$remain]
strb $wtmp0,[$outp,$remain]
b.gt .loop${standard}
- ld1 {@data[0].4s}, [$lastBlk]
+ ld1 {@data[0].4s}, [$lastBlk]
eor @data[0].16b, @data[0].16b, @tweak[2].16b
___
&rev32(@data[0],@data[0]);
--
2.36.1