openGauss-third_party/dependency/openssl/Fix-SM4-XTS-build-failure-using-clang.patch

From 80835d048cb2a241605beb49d17bf129ab2f5ae5 Mon Sep 17 00:00:00 2001
From: Xu Yizhou <xuyizhou1@huawei.com>
Date: Mon, 15 May 2023 11:41:59 +0800
Subject: [PATCH] Fix SM4-XTS build failure using clang

The OpenSSL community also has similar issues, and the corresponding
solutions can be found in this [PR]
(https://github.com/openssl/openssl/pull/20202). Moreover, the
community has added restrictions in the arm-xlate.pl file to recognize
the 'LDR REG, =VALUE' pseudo instruction on Neon, as shown in this [PR]
(https://github.com/openssl/openssl/pull/20222).

Signed-off-by: Xu Yizhou <xuyizhou1@huawei.com>
---
 crypto/perlasm/arm-xlate.pl      | 10 ++++++++++
 crypto/sm4/asm/sm4-armv8.pl      | 12 ++++++-----
 crypto/sm4/asm/vpsm4_ex-armv8.pl | 34 ++++++++++++++++++++------------
 3 files changed, 38 insertions(+), 18 deletions(-)

diff --git a/crypto/perlasm/arm-xlate.pl b/crypto/perlasm/arm-xlate.pl
index 48819be..a2f3838 100755
--- a/crypto/perlasm/arm-xlate.pl
+++ b/crypto/perlasm/arm-xlate.pl
@@ -170,6 +170,16 @@ while(my $line=<>) {
 	}
     }

+    # ldr REG, #VALUE psuedo-instruction - avoid clang issue with Neon registers
+    #
+    if ($line =~ /^\s*ldr\s+([qd]\d\d?)\s*,\s*=(\w+)/i) {
+        # Immediate load via literal pool into qN or DN - clang max is 2^32-1
+        my ($reg, $value) = ($1, $2);
+        # If $value is hex, 0x + 8 hex chars = 10 chars total will be okay
+        # If $value is decimal, 2^32 - 1 = 4294967295 will be okay (also 10 chars)
+        die("$line: immediate load via literal pool into $reg: value too large for clang - redo manually") if length($value) > 10;
+    }
+
     print $line if ($line);
     print "\n";
 }
diff --git a/crypto/sm4/asm/sm4-armv8.pl b/crypto/sm4/asm/sm4-armv8.pl
index 923c1c0..07ba53a 100644
--- a/crypto/sm4/asm/sm4-armv8.pl
+++ b/crypto/sm4/asm/sm4-armv8.pl
@@ -244,6 +244,8 @@ $code.=<<___;
 	.long 0x10171E25, 0x2C333A41, 0x484F565D, 0x646B7279
 .Lfk:
 	.long 0xa3b1bac6, 0x56aa3350, 0x677d9197, 0xb27022dc
+.Lxts_magic:
+	.dword 0x0101010101010187,0x0101010101010101
 ___
 }}}

@@ -604,7 +606,7 @@ $code.=<<___;
 .globl	${prefix}_ctr32_encrypt_blocks
 .type	${prefix}_ctr32_encrypt_blocks,%function
 .align	5
-${prefix}_ctr32_encrypt_blocks:
+${prefix}_ctr32_encrypt_blocks:
 	stp	d8,d9,[sp, #-16]!

 	ld1	{$ivec.4s},[$ivp]
@@ -736,7 +738,7 @@ $code.=<<___;
 .align    5
 ${prefix}_xts_do_cipher${standard}:
 	mov w$magic,0x87
-    ldr $qMagic, =0x01010101010101010101010101010187
+    ldr $qMagic, .Lxts_magic
 	// used to encrypt the XORed plaintext blocks
 	ld1	{@rks[0].4s,@rks[1].4s,@rks[2].4s,@rks[3].4s},[$rk2],#64
 	ld1	{@rks[4].4s,@rks[5].4s,@rks[6].4s,@rks[7].4s},[$rk2]
@@ -963,7 +965,7 @@ $code.=<<___;
     cmp $remain,0
     b.eq 99f

-// This brance calculates the last two tweaks,
+// This brance calculates the last two tweaks,
 // while the encryption/decryption length is larger than 32
 .last_2blks_tweak${standard}:
 ___
@@ -974,7 +976,7 @@ $code.=<<___;
     b .check_dec${standard}


-// This brance calculates the last two tweaks,
+// This brance calculates the last two tweaks,
 // while the encryption/decryption length is less than 32, who only need two tweaks
 .only_2blks_tweak${standard}:
     mov @tweak[1].16b,@tweak[0].16b
@@ -1018,7 +1020,7 @@ $code.=<<___;
         strb    w$tmp1,[$lastBlk,$remain]
         strb    w$tmp0,[$out,$remain]
     b.gt .loop${standard}
-    ld1        {@dat[0].4s}, [$lastBlk]
+    ld1        {@dat[0].4s}, [$lastBlk]
     eor @dat[0].16b, @dat[0].16b, @tweak[2].16b
 ___
 	&rev32(@dat[0],@dat[0]);
diff --git a/crypto/sm4/asm/vpsm4_ex-armv8.pl b/crypto/sm4/asm/vpsm4_ex-armv8.pl
index 86a6f89..4fd2975 100644
--- a/crypto/sm4/asm/vpsm4_ex-armv8.pl
+++ b/crypto/sm4/asm/vpsm4_ex-armv8.pl
@@ -108,12 +108,12 @@ ___

 sub load_sbox_matrix () {
 $code.=<<___;
-    ldr $MaskQ,       =0x0306090c0f0205080b0e0104070a0d00
-    ldr $TAHMatQ,    =0x22581a6002783a4062185a2042387a00
-    ldr $TALMatQ,    =0xc10bb67c4a803df715df62a89e54e923
-    ldr $ATAHMatQ,   =0x1407c6d56c7fbeadb9aa6b78c1d21300
-    ldr $ATALMatQ,   =0xe383c1a1fe9edcbc6404462679195b3b
-    ldr $ANDMaskQ,    =0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f
+	ldr $MaskQ, .Lsbox_magic
+	ldr $TAHMatQ, .Lsbox_magic+16
+	ldr $TALMatQ, .Lsbox_magic+32
+	ldr $ATAHMatQ, .Lsbox_magic+48
+	ldr $ATALMatQ, .Lsbox_magic+64
+	ldr $ANDMaskQ, .Lsbox_magic+80
 ___
 }
 # matrix multiplication Mat*x = (lowerMat*x) ^ (higherMat*x)
@@ -505,7 +505,7 @@ sub compute_tweak_vec() {
     my $des = shift;
     &rbit(@vtmp[2],$src);
 $code.=<<___;
-    ldr  @qtmp[0], =0x01010101010101010101010101010187
+    ldr  @qtmp[0], .Lxts_magic
     shl  $des.16b, @vtmp[2].16b, #1
     ext  @vtmp[1].16b, @vtmp[2].16b, @vtmp[2].16b,#15
     ushr @vtmp[1].16b, @vtmp[1].16b, #7
@@ -569,10 +569,18 @@ ${prefix}_consts:
 	.long 0xA0A7AEB5, 0xBCC3CAD1, 0xD8DFE6ED, 0xF4FB0209
 	.long 0x10171E25, 0x2C333A41, 0x484F565D, 0x646B7279
 .Lfk:
-    .long 0xa3b1bac6, 0x56aa3350, 0x677d9197, 0xb27022dc
+	.long 0xa3b1bac6, 0x56aa3350, 0x677d9197, 0xb27022dc
 .Lshuffles:
-    .long 0x07060504, 0x0B0A0908, 0x0F0E0D0C, 0x03020100
-
+	.long 0x07060504, 0x0B0A0908, 0x0F0E0D0C, 0x03020100
+.Lxts_magic:
+	.dword 0x0101010101010187,0x0101010101010101
+.Lsbox_magic:
+	.dword 0x0b0e0104070a0d00,0x0306090c0f020508
+	.dword 0x62185a2042387a00,0x22581a6002783a40
+	.dword 0x15df62a89e54e923,0xc10bb67c4a803df7
+	.dword 0xb9aa6b78c1d21300,0x1407c6d56c7fbead
+	.dword 0x6404462679195b3b,0xe383c1a1fe9edcbc
+	.dword 0x0f0f0f0f0f0f0f0f,0x0f0f0f0f0f0f0f0f
 .size	${prefix}_consts,.-${prefix}_consts
 ___

@@ -1033,7 +1041,7 @@ $code.=<<___;
     cmp $remain,0
     b.eq .return${standard}

-// This brance calculates the last two tweaks,
+// This brance calculates the last two tweaks,
 // while the encryption/decryption length is larger than 32
 .last_2blks_tweak${standard}:
 ___
@@ -1044,7 +1052,7 @@ $code.=<<___;
     b .check_dec${standard}


-// This brance calculates the last two tweaks,
+// This brance calculates the last two tweaks,
 // while the encryption/decryption length is equal to 32, who only need two tweaks
 .only_2blks_tweak${standard}:
     mov @tweak[1].16b,@tweak[0].16b
@@ -1087,7 +1095,7 @@ $code.=<<___;
         strb    $wtmp1,[$lastBlk,$remain]
         strb    $wtmp0,[$outp,$remain]
     b.gt .loop${standard}
-    ld1        {@data[0].4s}, [$lastBlk]
+    ld1        {@data[0].4s}, [$lastBlk]
     eor @data[0].16b, @data[0].16b, @tweak[2].16b
 ___
     &rev32(@data[0],@data[0]);
--
2.36.1