493 lines
15 KiB
Diff
493 lines
15 KiB
Diff
From 4d2e328357ac4b468d4762a5a5f615d7e7bf46a6 Mon Sep 17 00:00:00 2001
|
|
From: Xu Yizhou <xuyizhou1@huawei.com>
|
|
Date: Thu, 27 Oct 2022 20:49:34 +0800
|
|
Subject: [PATCH 1/3] SM3 acceleration with SM3 hardware instruction on aarch64
|
|
|
|
This patch contains the following two PRs,
|
|
|
|
1. SM3 acceleration with SM3 hardware instruction on aarch64
|
|
|
|
SM3 hardware instruction is optional feature of crypto extension for
|
|
aarch64. This implementation accelerates SM3 via SM3 instructions. For
|
|
the platform not supporting SM3 instruction, the original C
|
|
implementation still works. Thanks to AliBaba for testing and reporting
|
|
the following perf numbers for Yitian710:
|
|
|
|
Benchmark on T-Head Yitian-710 2.75GHz:
|
|
|
|
Before:
|
|
type 16 bytes 64 bytes 256 bytes 1024 bytes 8192 bytes 16384 bytes
|
|
sm3 49297.82k 121062.63k 223106.05k 283371.52k 307574.10k 309400.92k
|
|
|
|
After (33% - 74% faster):
|
|
type 16 bytes 64 bytes 256 bytes 1024 bytes 8192 bytes 16384 bytes
|
|
sm3 65640.01k 179121.79k 359854.59k 481448.96k 534055.59k 538274.47k
|
|
|
|
Reviewed-by: Paul Dale <pauli@openssl.org>
|
|
Reviewed-by: Tomas Mraz <tomas@openssl.org>
|
|
(Merged from https://github.com/openssl/openssl/pull/17454)
|
|
|
|
2. Fix sm3ss1 translation issue in sm3-armv8.pl
|
|
|
|
Reviewed-by: Tomas Mraz <tomas@openssl.org>
|
|
Reviewed-by: Matt Caswell <matt@openssl.org>
|
|
Reviewed-by: Paul Dale <pauli@openssl.org>
|
|
(Merged from https://github.com/openssl/openssl/pull/17542)
|
|
|
|
Signed-off-by: Xu Yizhou <xuyizhou1@huawei.com>
|
|
---
|
|
Configurations/00-base-templates.conf | 1 +
|
|
Configure | 4 +
|
|
crypto/arm64cpuid.pl | 7 +
|
|
crypto/arm_arch.h | 1 +
|
|
crypto/armcap.c | 10 +
|
|
crypto/sm3/asm/sm3-armv8.pl | 280 ++++++++++++++++++++++++++
|
|
crypto/sm3/build.info | 15 +-
|
|
crypto/sm3/sm3_local.h | 16 +-
|
|
8 files changed, 332 insertions(+), 2 deletions(-)
|
|
create mode 100644 crypto/sm3/asm/sm3-armv8.pl
|
|
|
|
diff --git a/Configurations/00-base-templates.conf b/Configurations/00-base-templates.conf
|
|
index 1d35012..a67ae65 100644
|
|
--- a/Configurations/00-base-templates.conf
|
|
+++ b/Configurations/00-base-templates.conf
|
|
@@ -322,6 +322,7 @@ my %targets=(
|
|
poly1305_asm_src=> "poly1305-armv8.S",
|
|
keccak1600_asm_src => "keccak1600-armv8.S",
|
|
sm4_asm_src => "vpsm4_ex-armv8.S",
|
|
+ sm3_asm_src => "sm3-armv8.S",
|
|
},
|
|
parisc11_asm => {
|
|
template => 1,
|
|
diff --git a/Configure b/Configure
|
|
index 3bfe360..fce460d 100755
|
|
--- a/Configure
|
|
+++ b/Configure
|
|
@@ -1423,6 +1423,9 @@ unless ($disabled{asm}) {
|
|
if ($target{sm4_asm_src} ne "") {
|
|
push @{$config{lib_defines}}, "VPSM4_EX_ASM";
|
|
}
|
|
+ if ($target{sm3_asm_src} ne "") {
|
|
+ push @{$config{lib_defines}}, "SM3_ASM";
|
|
+ }
|
|
}
|
|
|
|
my %predefined_C = compiler_predefined($config{CROSS_COMPILE}.$config{CC});
|
|
@@ -3379,6 +3382,7 @@ sub print_table_entry
|
|
"multilib",
|
|
"build_scheme",
|
|
"sm4_asm_src",
|
|
+ "sm3_asm_src",
|
|
);
|
|
|
|
if ($type eq "TABLE") {
|
|
diff --git a/crypto/arm64cpuid.pl b/crypto/arm64cpuid.pl
|
|
index 319927e..1e9b167 100755
|
|
--- a/crypto/arm64cpuid.pl
|
|
+++ b/crypto/arm64cpuid.pl
|
|
@@ -78,6 +78,13 @@ _armv8_sha512_probe:
|
|
ret
|
|
.size _armv8_sha512_probe,.-_armv8_sha512_probe
|
|
|
|
+.globl _armv8_sm3_probe
|
|
+.type _armv8_sm3_probe,%function
|
|
+_armv8_sm3_probe:
|
|
+ .long 0xce63c004 // sm3partw1 v4.4s, v0.4s, v3.4s
|
|
+ ret
|
|
+.size _armv8_sm3_probe,.-_armv8_sm3_probe
|
|
+
|
|
.globl OPENSSL_cleanse
|
|
.type OPENSSL_cleanse,%function
|
|
.align 5
|
|
diff --git a/crypto/arm_arch.h b/crypto/arm_arch.h
|
|
index 8b71055..8839b21 100644
|
|
--- a/crypto/arm_arch.h
|
|
+++ b/crypto/arm_arch.h
|
|
@@ -80,5 +80,6 @@ extern unsigned int OPENSSL_armcap_P;
|
|
# define ARMV8_SHA256 (1<<4)
|
|
# define ARMV8_PMULL (1<<5)
|
|
# define ARMV8_SHA512 (1<<6)
|
|
+# define ARMV8_SM3 (1<<9)
|
|
|
|
#endif
|
|
diff --git a/crypto/armcap.c b/crypto/armcap.c
|
|
index 48c5d4d..8b2f4a5 100644
|
|
--- a/crypto/armcap.c
|
|
+++ b/crypto/armcap.c
|
|
@@ -47,6 +47,7 @@ void _armv8_sha1_probe(void);
|
|
void _armv8_sha256_probe(void);
|
|
void _armv8_pmull_probe(void);
|
|
# ifdef __aarch64__
|
|
+void _armv8_sm3_probe(void);
|
|
void _armv8_sha512_probe(void);
|
|
# endif
|
|
uint32_t _armv7_tick(void);
|
|
@@ -130,6 +131,7 @@ static unsigned long getauxval(unsigned long key)
|
|
# define HWCAP_CE_PMULL (1 << 4)
|
|
# define HWCAP_CE_SHA1 (1 << 5)
|
|
# define HWCAP_CE_SHA256 (1 << 6)
|
|
+# define HWCAP_CE_SM3 (1 << 18)
|
|
# define HWCAP_CE_SHA512 (1 << 21)
|
|
# endif
|
|
|
|
@@ -190,6 +192,9 @@ void OPENSSL_cpuid_setup(void)
|
|
# ifdef __aarch64__
|
|
if (hwcap & HWCAP_CE_SHA512)
|
|
OPENSSL_armcap_P |= ARMV8_SHA512;
|
|
+
|
|
+ if (hwcap & HWCAP_CE_SM3)
|
|
+ OPENSSL_armcap_P |= ARMV8_SM3;
|
|
# endif
|
|
}
|
|
# endif
|
|
@@ -233,6 +238,11 @@ void OPENSSL_cpuid_setup(void)
|
|
_armv8_sha512_probe();
|
|
OPENSSL_armcap_P |= ARMV8_SHA512;
|
|
}
|
|
+
|
|
+ if (sigsetjmp(ill_jmp, 1) == 0) {
|
|
+ _armv8_sm3_probe();
|
|
+ OPENSSL_armcap_P |= ARMV8_SM3;
|
|
+ }
|
|
# endif
|
|
}
|
|
# endif
|
|
diff --git a/crypto/sm3/asm/sm3-armv8.pl b/crypto/sm3/asm/sm3-armv8.pl
|
|
new file mode 100644
|
|
index 0000000..677ca52
|
|
--- /dev/null
|
|
+++ b/crypto/sm3/asm/sm3-armv8.pl
|
|
@@ -0,0 +1,280 @@
|
|
+#! /usr/bin/env perl
|
|
+# Copyright 2021-2022 The OpenSSL Project Authors. All Rights Reserved.
|
|
+#
|
|
+# Licensed under the Apache License 2.0 (the "License"). You may not use
|
|
+# this file except in compliance with the License. You can obtain a copy
|
|
+# in the file LICENSE in the source distribution or at
|
|
+# https://www.openssl.org/source/license.html
|
|
+#
|
|
+# This module implements support for Armv8 SM3 instructions
|
|
+
|
|
+# $output is the last argument if it looks like a file (it has an extension)
|
|
+# $flavour is the first argument if it doesn't look like a file
|
|
+$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
|
|
+$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
|
|
+
|
|
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
|
|
+( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
|
|
+( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
|
|
+die "can't locate arm-xlate.pl";
|
|
+
|
|
+open OUT,"| \"$^X\" $xlate $flavour \"$output\""
|
|
+ or die "can't call $xlate: $!";
|
|
+*STDOUT=*OUT;
|
|
+
|
|
+# Message expanding:
|
|
+# Wj <- P1(W[j-16]^W[j-9]^(W[j-3]<<<15))^(W[j-13]<<<7)^W[j-6]
|
|
+# Input: s0, s1, s2, s3
|
|
+# s0 = w0 | w1 | w2 | w3
|
|
+# s1 = w4 | w5 | w6 | w7
|
|
+# s2 = w8 | w9 | w10 | w11
|
|
+# s3 = w12 | w13 | w14 | w15
|
|
+# Output: s4
|
|
+sub msg_exp () {
|
|
+my $s0 = shift;
|
|
+my $s1 = shift;
|
|
+my $s2 = shift;
|
|
+my $s3 = shift;
|
|
+my $s4 = shift;
|
|
+my $vtmp1 = shift;
|
|
+my $vtmp2 = shift;
|
|
+$code.=<<___;
|
|
+ // s4 = w7 | w8 | w9 | w10
|
|
+ ext $s4.16b, $s1.16b, $s2.16b, #12
|
|
+ // vtmp1 = w3 | w4 | w5 | w6
|
|
+ ext $vtmp1.16b, $s0.16b, $s1.16b, #12
|
|
+ // vtmp2 = w10 | w11 | w12 | w13
|
|
+ ext $vtmp2.16b, $s2.16b, $s3.16b, #8
|
|
+ sm3partw1 $s4.4s, $s0.4s, $s3.4s
|
|
+ sm3partw2 $s4.4s, $vtmp2.4s, $vtmp1.4s
|
|
+___
|
|
+}
|
|
+
|
|
+# A round of compresson function
|
|
+# Input:
|
|
+# ab - choose instruction among sm3tt1a, sm3tt1b, sm3tt2a, sm3tt2b
|
|
+# vstate0 - vstate1, store digest status(A - H)
|
|
+# vconst0 - vconst1, interleaved used to store Tj <<< j
|
|
+# vtmp - temporary register
|
|
+# vw - for sm3tt1ab, vw = s0 eor s1
|
|
+# s0 - for sm3tt2ab, just be s0
|
|
+# i, choose wj' or wj from vw
|
|
+sub round () {
|
|
+my $ab = shift;
|
|
+my $vstate0 = shift;
|
|
+my $vstate1 = shift;
|
|
+my $vconst0 = shift;
|
|
+my $vconst1 = shift;
|
|
+my $vtmp = shift;
|
|
+my $vw = shift;
|
|
+my $s0 = shift;
|
|
+my $i = shift;
|
|
+$code.=<<___;
|
|
+ sm3ss1 $vtmp.4s, $vstate0.4s, $vconst0.4s, $vstate1.4s
|
|
+ shl $vconst1.4s, $vconst0.4s, #1
|
|
+ sri $vconst1.4s, $vconst0.4s, #31
|
|
+ sm3tt1$ab $vstate0.4s, $vtmp.4s, $vw.4s[$i]
|
|
+ sm3tt2$ab $vstate1.4s, $vtmp.4s, $s0.4s[$i]
|
|
+___
|
|
+}
|
|
+
|
|
+sub qround () {
|
|
+my $ab = shift;
|
|
+my $vstate0 = shift;
|
|
+my $vstate1 = shift;
|
|
+my $vconst0 = shift;
|
|
+my $vconst1 = shift;
|
|
+my $vtmp1 = shift;
|
|
+my $vtmp2 = shift;
|
|
+my $s0 = shift;
|
|
+my $s1 = shift;
|
|
+my $s2 = shift;
|
|
+my $s3 = shift;
|
|
+my $s4 = shift;
|
|
+ if($s4) {
|
|
+ &msg_exp($s0, $s1, $s2, $s3, $s4, $vtmp1, $vtmp2);
|
|
+ }
|
|
+$code.=<<___;
|
|
+ eor $vtmp1.16b, $s0.16b, $s1.16b
|
|
+___
|
|
+ &round($ab, $vstate0, $vstate1, $vconst0, $vconst1, $vtmp2,
|
|
+ $vtmp1, $s0, 0);
|
|
+ &round($ab, $vstate0, $vstate1, $vconst1, $vconst0, $vtmp2,
|
|
+ $vtmp1, $s0, 1);
|
|
+ &round($ab, $vstate0, $vstate1, $vconst0, $vconst1, $vtmp2,
|
|
+ $vtmp1, $s0, 2);
|
|
+ &round($ab, $vstate0, $vstate1, $vconst1, $vconst0, $vtmp2,
|
|
+ $vtmp1, $s0, 3);
|
|
+}
|
|
+
|
|
+$code=<<___;
|
|
+#include "arm_arch.h"
|
|
+.arch armv8.2-a
|
|
+.text
|
|
+___
|
|
+
|
|
+{{{
|
|
+my ($pstate,$pdata,$num)=("x0","x1","w2");
|
|
+my ($state1,$state2)=("v5","v6");
|
|
+my ($sconst1, $sconst2)=("s16","s17");
|
|
+my ($vconst1, $vconst2)=("v16","v17");
|
|
+my ($s0,$s1,$s2,$s3,$s4)=map("v$_",(0..4));
|
|
+my ($bkstate1,$bkstate2)=("v18","v19");
|
|
+my ($vconst_tmp1,$vconst_tmp2)=("v20","v21");
|
|
+my ($vtmp1,$vtmp2)=("v22","v23");
|
|
+my $constaddr="x8";
|
|
+# void ossl_hwsm3_block_data_order(SM3_CTX *c, const void *p, size_t num)
|
|
+$code.=<<___;
|
|
+.globl ossl_hwsm3_block_data_order
|
|
+.type ossl_hwsm3_block_data_order,%function
|
|
+.align 5
|
|
+ossl_hwsm3_block_data_order:
|
|
+ // load state
|
|
+ ld1 {$state1.4s-$state2.4s}, [$pstate]
|
|
+ rev64 $state1.4s, $state1.4s
|
|
+ rev64 $state2.4s, $state2.4s
|
|
+ ext $state1.16b, $state1.16b, $state1.16b, #8
|
|
+ ext $state2.16b, $state2.16b, $state2.16b, #8
|
|
+
|
|
+ adr $constaddr, .Tj
|
|
+ ldp $sconst1, $sconst2, [$constaddr]
|
|
+
|
|
+.Loop:
|
|
+ // load input
|
|
+ ld1 {$s0.16b-$s3.16b}, [$pdata], #64
|
|
+ sub $num, $num, #1
|
|
+
|
|
+ mov $bkstate1.16b, $state1.16b
|
|
+ mov $bkstate2.16b, $state2.16b
|
|
+
|
|
+#ifndef __ARMEB__
|
|
+ rev32 $s0.16b, $s0.16b
|
|
+ rev32 $s1.16b, $s1.16b
|
|
+ rev32 $s2.16b, $s2.16b
|
|
+ rev32 $s3.16b, $s3.16b
|
|
+#endif
|
|
+
|
|
+ ext $vconst_tmp1.16b, $vconst1.16b, $vconst1.16b, #4
|
|
+___
|
|
+ &qround("a",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2,
|
|
+ $s0,$s1,$s2,$s3,$s4);
|
|
+ &qround("a",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2,
|
|
+ $s1,$s2,$s3,$s4,$s0);
|
|
+ &qround("a",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2,
|
|
+ $s2,$s3,$s4,$s0,$s1);
|
|
+ &qround("a",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2,
|
|
+ $s3,$s4,$s0,$s1,$s2);
|
|
+
|
|
+$code.=<<___;
|
|
+ ext $vconst_tmp1.16b, $vconst2.16b, $vconst2.16b, #4
|
|
+___
|
|
+
|
|
+ &qround("b",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2,
|
|
+ $s4,$s0,$s1,$s2,$s3);
|
|
+ &qround("b",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2,
|
|
+ $s0,$s1,$s2,$s3,$s4);
|
|
+ &qround("b",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2,
|
|
+ $s1,$s2,$s3,$s4,$s0);
|
|
+ &qround("b",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2,
|
|
+ $s2,$s3,$s4,$s0,$s1);
|
|
+ &qround("b",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2,
|
|
+ $s3,$s4,$s0,$s1,$s2);
|
|
+ &qround("b",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2,
|
|
+ $s4,$s0,$s1,$s2,$s3);
|
|
+ &qround("b",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2,
|
|
+ $s0,$s1,$s2,$s3,$s4);
|
|
+ &qround("b",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2,
|
|
+ $s1,$s2,$s3,$s4,$s0);
|
|
+ &qround("b",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2,
|
|
+ $s2,$s3,$s4,$s0,$s1);
|
|
+ &qround("b",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2,
|
|
+ $s3,$s4);
|
|
+ &qround("b",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2,
|
|
+ $s4,$s0);
|
|
+ &qround("b",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2,
|
|
+ $s0,$s1);
|
|
+
|
|
+$code.=<<___;
|
|
+ eor $state1.16b, $state1.16b, $bkstate1.16b
|
|
+ eor $state2.16b, $state2.16b, $bkstate2.16b
|
|
+
|
|
+ // any remained blocks?
|
|
+ cbnz $num, .Loop
|
|
+
|
|
+ // save state
|
|
+ rev64 $state1.4s, $state1.4s
|
|
+ rev64 $state2.4s, $state2.4s
|
|
+ ext $state1.16b, $state1.16b, $state1.16b, #8
|
|
+ ext $state2.16b, $state2.16b, $state2.16b, #8
|
|
+ st1 {$state1.4s-$state2.4s}, [$pstate]
|
|
+ ret
|
|
+.size ossl_hwsm3_block_data_order,.-ossl_hwsm3_block_data_order
|
|
+
|
|
+.align 3
|
|
+.Tj:
|
|
+.word 0x79cc4519, 0x9d8a7a87
|
|
+___
|
|
+}}}
|
|
+
|
|
+#########################################
|
|
+my %sm3partopcode = (
|
|
+ "sm3partw1" => 0xce60C000,
|
|
+ "sm3partw2" => 0xce60C400);
|
|
+
|
|
+my %sm3ss1opcode = (
|
|
+ "sm3ss1" => 0xce400000);
|
|
+
|
|
+my %sm3ttopcode = (
|
|
+ "sm3tt1a" => 0xce408000,
|
|
+ "sm3tt1b" => 0xce408400,
|
|
+ "sm3tt2a" => 0xce408800,
|
|
+ "sm3tt2b" => 0xce408C00);
|
|
+
|
|
+sub unsm3part {
|
|
+ my ($mnemonic,$arg)=@_;
|
|
+
|
|
+ $arg=~ m/[qv](\d+)[^,]*,\s*[qv](\d+)[^,]*,\s*[qv](\d+)/o
|
|
+ &&
|
|
+ sprintf ".inst\t0x%08x\t//%s %s",
|
|
+ $sm3partopcode{$mnemonic}|$1|($2<<5)|($3<<16),
|
|
+ $mnemonic,$arg;
|
|
+}
|
|
+
|
|
+sub unsm3ss1 {
|
|
+ my ($mnemonic,$arg)=@_;
|
|
+
|
|
+ $arg=~ m/[qv](\d+)[^,]*,\s*[qv](\d+)[^,]*,\s*[qv](\d+)[^,]*,\s*[qv](\d+)/o
|
|
+ &&
|
|
+ sprintf ".inst\t0x%08x\t//%s %s",
|
|
+ $sm3ss1opcode{$mnemonic}|$1|($2<<5)|($3<<16)|($4<<10),
|
|
+ $mnemonic,$arg;
|
|
+}
|
|
+
|
|
+sub unsm3tt {
|
|
+ my ($mnemonic,$arg)=@_;
|
|
+
|
|
+ $arg=~ m/[qv](\d+)[^,]*,\s*[qv](\d+)[^,]*,\s*[qv](\d+)[^,]*\[([0-3])\]/o
|
|
+ &&
|
|
+ sprintf ".inst\t0x%08x\t//%s %s",
|
|
+ $sm3ttopcode{$mnemonic}|$1|($2<<5)|($3<<16)|($4<<12),
|
|
+ $mnemonic,$arg;
|
|
+}
|
|
+
|
|
+open SELF,$0;
|
|
+while(<SELF>) {
|
|
+ next if (/^#!/);
|
|
+ last if (!s/^#/\/\// and !/^$/);
|
|
+ print;
|
|
+}
|
|
+close SELF;
|
|
+
|
|
+foreach(split("\n",$code)) {
|
|
+ s/\`([^\`]*)\`/eval($1)/ge;
|
|
+
|
|
+ s/\b(sm3partw[1-2])\s+([qv].*)/unsm3part($1,$2)/ge;
|
|
+ s/\b(sm3ss1)\s+([qv].*)/unsm3ss1($1,$2)/ge;
|
|
+ s/\b(sm3tt[1-2][a-b])\s+([qv].*)/unsm3tt($1,$2)/ge;
|
|
+ print $_,"\n";
|
|
+}
|
|
+
|
|
+close STDOUT or die "error closing STDOUT: $!";
|
|
diff --git a/crypto/sm3/build.info b/crypto/sm3/build.info
|
|
index 6009b19..e113729 100644
|
|
--- a/crypto/sm3/build.info
|
|
+++ b/crypto/sm3/build.info
|
|
@@ -1,2 +1,15 @@
|
|
LIBS=../../libcrypto
|
|
-SOURCE[../../libcrypto]=sm3.c m_sm3.c
|
|
+SOURCE[../../libcrypto]=\
|
|
+ sm3.c m_sm3.c {- $target{sm3_asm_src} -}
|
|
+
|
|
+GENERATE[sm3-armv8.S]=asm/sm3-armv8.pl $(PERLASM_SCHEME)
|
|
+INCLUDE[sm3-armv8.o]=..
|
|
+
|
|
+BEGINRAW[Makefile]
|
|
+##### SM3 assembler implementations
|
|
+
|
|
+# GNU make "catch all"
|
|
+{- $builddir -}/sm3-%.S: {- $sourcedir -}/asm/sm3-%.pl
|
|
+ CC="$(CC)" $(PERL) $< $(PERLASM_SCHEME) $@
|
|
+
|
|
+ENDRAW[Makefile]
|
|
\ No newline at end of file
|
|
diff --git a/crypto/sm3/sm3_local.h b/crypto/sm3/sm3_local.h
|
|
index 7171de5..aafff63 100644
|
|
--- a/crypto/sm3/sm3_local.h
|
|
+++ b/crypto/sm3/sm3_local.h
|
|
@@ -32,7 +32,21 @@
|
|
ll=(c)->G; (void)HOST_l2c(ll, (s)); \
|
|
ll=(c)->H; (void)HOST_l2c(ll, (s)); \
|
|
} while (0)
|
|
-#define HASH_BLOCK_DATA_ORDER sm3_block_data_order
|
|
+
|
|
+#if defined(SM3_ASM)
|
|
+# if defined(__aarch64__)
|
|
+# include "crypto/arm_arch.h"
|
|
+# define HWSM3_CAPABLE (OPENSSL_armcap_P & ARMV8_SM3)
|
|
+void ossl_hwsm3_block_data_order(SM3_CTX *c, const void *p, size_t num);
|
|
+# endif
|
|
+#endif
|
|
+
|
|
+#if defined(HWSM3_CAPABLE)
|
|
+# define HASH_BLOCK_DATA_ORDER (HWSM3_CAPABLE ? ossl_hwsm3_block_data_order \
|
|
+ : sm3_block_data_order)
|
|
+#else
|
|
+# define HASH_BLOCK_DATA_ORDER sm3_block_data_order
|
|
+#endif
|
|
|
|
void sm3_transform(SM3_CTX *c, const unsigned char *data);
|
|
|
|
--
|
|
2.36.1
|
|
|