1033 lines
31 KiB
Diff
1033 lines
31 KiB
Diff
From 4f7e522f7fda2c55c4915396d08f8c9cf3b3fba8 Mon Sep 17 00:00:00 2001
|
|
From: Xu Yizhou <xuyizhou1@huawei.com>
|
|
Date: Fri, 28 Oct 2022 11:24:28 +0800
|
|
Subject: [PATCH 2/3] SM4 optimization for ARM by HW instruction
|
|
|
|
This patch is a copy of the following PR, with
|
|
some extra supporting code.
|
|
|
|
1. SM4 optimization for ARM by HW instruction
|
|
|
|
This patch implements the SM4 optimization for ARM processor,
|
|
using SM4 HW instruction, which is an optional feature of
|
|
crypto extension for aarch64 V8.
|
|
|
|
Tested on some modern ARM micro-architectures with SM4 support, the
|
|
performance uplift can be observed around 8X~40X over existing
|
|
C implementation in openssl. Algorithms that can be parallelized
|
|
(like CTR, ECB, CBC decryption) are on higher end, with algorithm
|
|
like CBC encryption on lower end (due to inter-block dependency)
|
|
|
|
Perf data on Yitian-710 2.75GHz hardware, before and after optimization:
|
|
|
|
Before:
|
|
type 16 bytes 64 bytes 256 bytes 1024 bytes 8192 bytes 16384 bytes
|
|
SM4-CTR 105787.80k 107837.87k 108380.84k 108462.08k 108549.46k 108554.92k
|
|
SM4-ECB 111924.58k 118173.76k 119776.00k 120093.70k 120264.02k 120274.94k
|
|
SM4-CBC 106428.09k 109190.98k 109674.33k 109774.51k 109827.41k 109827.41k
|
|
|
|
After (7.4x - 36.6x faster):
|
|
type 16 bytes 64 bytes 256 bytes 1024 bytes 8192 bytes 16384 bytes
|
|
SM4-CTR 781979.02k 2432994.28k 3437753.86k 3834177.88k 3963715.58k 3974556.33k
|
|
SM4-ECB 937590.69k 2941689.02k 3945751.81k 4328655.87k 4459181.40k 4468692.31k
|
|
SM4-CBC 890639.88k 1027746.58k 1050621.78k 1056696.66k 1058613.93k 1058701.31k
|
|
|
|
Signed-off-by: Daniel Hu <Daniel.Hu@arm.com>
|
|
|
|
Reviewed-by: Paul Dale <pauli@openssl.org>
|
|
Reviewed-by: Tomas Mraz <tomas@openssl.org>
|
|
(Merged from https://github.com/openssl/openssl/pull/17455\)
|
|
|
|
Signed-off-by: Xu Yizhou <xuyizhou1@huawei.com>
|
|
---
|
|
Configurations/00-base-templates.conf | 2 +-
|
|
Configure | 3 +-
|
|
crypto/arm64cpuid.pl | 7 +
|
|
crypto/arm_arch.h | 1 +
|
|
crypto/armcap.c | 10 +
|
|
crypto/evp/e_sm4.c | 88 ++--
|
|
crypto/sm4/asm/sm4-armv8.pl | 629 ++++++++++++++++++++++++++
|
|
crypto/sm4/build.info | 13 +-
|
|
include/crypto/sm4_platform.h | 70 +++
|
|
9 files changed, 788 insertions(+), 35 deletions(-)
|
|
create mode 100644 crypto/sm4/asm/sm4-armv8.pl
|
|
create mode 100644 include/crypto/sm4_platform.h
|
|
|
|
diff --git a/Configurations/00-base-templates.conf b/Configurations/00-base-templates.conf
|
|
index a67ae65..a26d081 100644
|
|
--- a/Configurations/00-base-templates.conf
|
|
+++ b/Configurations/00-base-templates.conf
|
|
@@ -321,7 +321,7 @@ my %targets=(
|
|
chacha_asm_src => "chacha-armv8.S",
|
|
poly1305_asm_src=> "poly1305-armv8.S",
|
|
keccak1600_asm_src => "keccak1600-armv8.S",
|
|
- sm4_asm_src => "vpsm4_ex-armv8.S",
|
|
+ sm4_asm_src => "sm4-armv8.S vpsm4_ex-armv8.S",
|
|
sm3_asm_src => "sm3-armv8.S",
|
|
},
|
|
parisc11_asm => {
|
|
diff --git a/Configure b/Configure
|
|
index fce460d..d013204 100755
|
|
--- a/Configure
|
|
+++ b/Configure
|
|
@@ -1421,7 +1421,8 @@ unless ($disabled{asm}) {
|
|
push @{$config{lib_defines}}, "POLY1305_ASM";
|
|
}
|
|
if ($target{sm4_asm_src} ne "") {
|
|
- push @{$config{lib_defines}}, "VPSM4_EX_ASM";
|
|
+ push @{$config{lib_defines}}, "SM4_ASM" if ($target{sm4_asm_src} =~ m/sm4/);
|
|
+ push @{$config{lib_defines}}, "VPSM4_EX_ASM" if ($target{sm4_asm_src} =~ m/vpsm4_ex/);
|
|
}
|
|
if ($target{sm3_asm_src} ne "") {
|
|
push @{$config{lib_defines}}, "SM3_ASM";
|
|
diff --git a/crypto/arm64cpuid.pl b/crypto/arm64cpuid.pl
|
|
index 1e9b167..341167b 100755
|
|
--- a/crypto/arm64cpuid.pl
|
|
+++ b/crypto/arm64cpuid.pl
|
|
@@ -71,6 +71,13 @@ _armv8_pmull_probe:
|
|
ret
|
|
.size _armv8_pmull_probe,.-_armv8_pmull_probe
|
|
|
|
+.globl _armv8_sm4_probe
|
|
+.type _armv8_sm4_probe,%function
|
|
+_armv8_sm4_probe:
|
|
+ .long 0xcec08400 // sm4e v0.4s, v0.4s
|
|
+ ret
|
|
+.size _armv8_sm4_probe,.-_armv8_sm4_probe
|
|
+
|
|
.globl _armv8_sha512_probe
|
|
.type _armv8_sha512_probe,%function
|
|
_armv8_sha512_probe:
|
|
diff --git a/crypto/arm_arch.h b/crypto/arm_arch.h
|
|
index 8839b21..0f6f7ca 100644
|
|
--- a/crypto/arm_arch.h
|
|
+++ b/crypto/arm_arch.h
|
|
@@ -81,5 +81,6 @@ extern unsigned int OPENSSL_armcap_P;
|
|
# define ARMV8_PMULL (1<<5)
|
|
# define ARMV8_SHA512 (1<<6)
|
|
# define ARMV8_SM3 (1<<9)
|
|
+# define ARMV8_SM4 (1<<10)
|
|
|
|
#endif
|
|
diff --git a/crypto/armcap.c b/crypto/armcap.c
|
|
index 8b2f4a5..73bcad1 100644
|
|
--- a/crypto/armcap.c
|
|
+++ b/crypto/armcap.c
|
|
@@ -48,6 +48,7 @@ void _armv8_sha256_probe(void);
|
|
void _armv8_pmull_probe(void);
|
|
# ifdef __aarch64__
|
|
void _armv8_sm3_probe(void);
|
|
+void _armv8_sm4_probe(void);
|
|
void _armv8_sha512_probe(void);
|
|
# endif
|
|
uint32_t _armv7_tick(void);
|
|
@@ -132,6 +133,7 @@ static unsigned long getauxval(unsigned long key)
|
|
# define HWCAP_CE_SHA1 (1 << 5)
|
|
# define HWCAP_CE_SHA256 (1 << 6)
|
|
# define HWCAP_CE_SM3 (1 << 18)
|
|
+# define HWCAP_CE_SM4 (1 << 19)
|
|
# define HWCAP_CE_SHA512 (1 << 21)
|
|
# endif
|
|
|
|
@@ -190,6 +192,9 @@ void OPENSSL_cpuid_setup(void)
|
|
OPENSSL_armcap_P |= ARMV8_SHA256;
|
|
|
|
# ifdef __aarch64__
|
|
+ if (hwcap & HWCAP_CE_SM4)
|
|
+ OPENSSL_armcap_P |= ARMV8_SM4;
|
|
+
|
|
if (hwcap & HWCAP_CE_SHA512)
|
|
OPENSSL_armcap_P |= ARMV8_SHA512;
|
|
|
|
@@ -234,6 +239,11 @@ void OPENSSL_cpuid_setup(void)
|
|
OPENSSL_armcap_P |= ARMV8_SHA256;
|
|
}
|
|
# if defined(__aarch64__) && !defined(__APPLE__)
|
|
+ if (sigsetjmp(ill_jmp, 1) == 0) {
|
|
+ _armv8_sm4_probe();
|
|
+ OPENSSL_armcap_P |= ARMV8_SM4;
|
|
+ }
|
|
+
|
|
if (sigsetjmp(ill_jmp, 1) == 0) {
|
|
_armv8_sha512_probe();
|
|
OPENSSL_armcap_P |= ARMV8_SHA512;
|
|
diff --git a/crypto/evp/e_sm4.c b/crypto/evp/e_sm4.c
|
|
index 169d6c7..eaa5ba0 100644
|
|
--- a/crypto/evp/e_sm4.c
|
|
+++ b/crypto/evp/e_sm4.c
|
|
@@ -15,17 +15,11 @@
|
|
# include <openssl/modes.h>
|
|
# include "crypto/sm4.h"
|
|
# include "crypto/evp.h"
|
|
+# include "crypto/sm4_platform.h"
|
|
# include "evp_local.h"
|
|
# include "modes_local.h"
|
|
|
|
-#if defined(OPENSSL_CPUID_OBJ) && (defined(__arm__) || defined(__arm) || defined(__aarch64__))
|
|
-# include "arm_arch.h"
|
|
-# if __ARM_MAX_ARCH__>=7
|
|
-# if defined(VPSM4_EX_ASM)
|
|
-# define VPSM4_EX_CAPABLE (OPENSSL_armcap_P & ARMV8_AES)
|
|
-# endif
|
|
-# endif
|
|
-#endif
|
|
+
|
|
|
|
typedef struct {
|
|
union {
|
|
@@ -35,28 +29,11 @@ typedef struct {
|
|
block128_f block;
|
|
union {
|
|
ecb128_f ecb;
|
|
+ cbc128_f cbc;
|
|
+ ctr128_f ctr;
|
|
} stream;
|
|
} EVP_SM4_KEY;
|
|
|
|
-#ifdef VPSM4_EX_CAPABLE
|
|
-void vpsm4_ex_set_encrypt_key(const unsigned char *userKey, SM4_KEY *key);
|
|
-void vpsm4_ex_set_decrypt_key(const unsigned char *userKey, SM4_KEY *key);
|
|
-#define vpsm4_ex_encrypt SM4_encrypt
|
|
-#define vpsm4_ex_decrypt SM4_encrypt
|
|
-void vpsm4_ex_ecb_encrypt(
|
|
- const unsigned char *in, unsigned char *out, size_t length, const SM4_KEY *key, const int enc);
|
|
-/* xts mode in GB/T 17964-2021 */
|
|
-void vpsm4_ex_xts_encrypt_gb(const unsigned char *in, unsigned char *out, size_t length, const SM4_KEY *key1,
|
|
- const SM4_KEY *key2, const uint8_t iv[16]);
|
|
-void vpsm4_ex_xts_decrypt_gb(const unsigned char *in, unsigned char *out, size_t length, const SM4_KEY *key1,
|
|
- const SM4_KEY *key2, const uint8_t iv[16]);
|
|
-/* xts mode in IEEE Std 1619-2007 */
|
|
-void vpsm4_ex_xts_encrypt(const unsigned char *in, unsigned char *out, size_t length, const SM4_KEY *key1,
|
|
- const SM4_KEY *key2, const uint8_t iv[16]);
|
|
-void vpsm4_ex_xts_decrypt(const unsigned char *in, unsigned char *out, size_t length, const SM4_KEY *key1,
|
|
- const SM4_KEY *key2, const uint8_t iv[16]);
|
|
-#endif
|
|
-
|
|
# define BLOCK_CIPHER_generic(nid,blocksize,ivlen,nmode,mode,MODE,flags) \
|
|
static const EVP_CIPHER sm4_##mode = { \
|
|
nid##_##nmode,blocksize,128/8,ivlen, \
|
|
@@ -84,6 +61,21 @@ static int sm4_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
|
|
|
|
mode = EVP_CIPHER_CTX_mode(ctx);
|
|
if ((mode == EVP_CIPH_ECB_MODE || mode == EVP_CIPH_CBC_MODE) && !enc) {
|
|
+#ifdef HWSM4_CAPABLE
|
|
+ if (HWSM4_CAPABLE) {
|
|
+ HWSM4_set_decrypt_key(key, &dat->ks.ks);
|
|
+ dat->block = (block128_f) HWSM4_decrypt;
|
|
+ dat->stream.cbc = NULL;
|
|
+# ifdef HWSM4_cbc_encrypt
|
|
+ if (mode == EVP_CIPH_CBC_MODE)
|
|
+ dat->stream.cbc = (cbc128_f) HWSM4_cbc_encrypt;
|
|
+# endif
|
|
+# ifdef HWSM4_ecb_encrypt
|
|
+ if (mode == EVP_CIPH_ECB_MODE)
|
|
+ dat->stream.ecb = (ecb128_f) HWSM4_ecb_encrypt;
|
|
+# endif
|
|
+ } else
|
|
+#endif
|
|
#ifdef VPSM4_EX_CAPABLE
|
|
if (VPSM4_EX_CAPABLE) {
|
|
vpsm4_ex_set_decrypt_key(key, &dat->ks.ks);
|
|
@@ -97,6 +89,29 @@ static int sm4_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
|
|
SM4_set_key(key, EVP_CIPHER_CTX_get_cipher_data(ctx));
|
|
}
|
|
} else {
|
|
+#ifdef HWSM4_CAPABLE
|
|
+ if (HWSM4_CAPABLE) {
|
|
+ HWSM4_set_encrypt_key(key, &dat->ks.ks);
|
|
+ dat->block = (block128_f) HWSM4_encrypt;
|
|
+ dat->stream.cbc = NULL;
|
|
+# ifdef HWSM4_cbc_encrypt
|
|
+ if (mode == EVP_CIPH_CBC_MODE)
|
|
+ dat->stream.cbc = (cbc128_f) HWSM4_cbc_encrypt;
|
|
+ else
|
|
+# endif
|
|
+# ifdef HWSM4_ecb_encrypt
|
|
+ if (mode == EVP_CIPH_ECB_MODE)
|
|
+ dat->stream.ecb = (ecb128_f) HWSM4_ecb_encrypt;
|
|
+ else
|
|
+# endif
|
|
+# ifdef HWSM4_ctr32_encrypt_blocks
|
|
+ if (mode == EVP_CIPH_CTR_MODE)
|
|
+ dat->stream.ctr = (ctr128_f) HWSM4_ctr32_encrypt_blocks;
|
|
+ else
|
|
+# endif
|
|
+ (void)0; /* terminate potentially open 'else' */
|
|
+ } else
|
|
+#endif
|
|
#ifdef VPSM4_EX_CAPABLE
|
|
if (VPSM4_EX_CAPABLE) {
|
|
vpsm4_ex_set_encrypt_key(key, &dat->ks.ks);
|
|
@@ -118,7 +133,10 @@ static int sm4_cbc_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
|
|
{
|
|
EVP_SM4_KEY *dat = EVP_C_DATA(EVP_SM4_KEY,ctx);
|
|
|
|
- if (EVP_CIPHER_CTX_encrypting(ctx))
|
|
+ if (dat->stream.cbc)
|
|
+ (*dat->stream.cbc) (in, out, len, &dat->ks.ks, ctx->iv,
|
|
+ EVP_CIPHER_CTX_encrypting(ctx));
|
|
+ else if (EVP_CIPHER_CTX_encrypting(ctx))
|
|
CRYPTO_cbc128_encrypt(in, out, len, &dat->ks.ks,
|
|
EVP_CIPHER_CTX_iv_noconst(ctx), dat->block);
|
|
else
|
|
@@ -183,10 +201,16 @@ static int sm4_ctr_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
|
|
return 0;
|
|
num = (unsigned int)n;
|
|
|
|
- CRYPTO_ctr128_encrypt(in, out, len, &dat->ks.ks,
|
|
- ctx->iv,
|
|
- EVP_CIPHER_CTX_buf_noconst(ctx), &num,
|
|
- dat->block);
|
|
+ if (dat->stream.ctr)
|
|
+ CRYPTO_ctr128_encrypt_ctr32(in, out, len, &dat->ks,
|
|
+ ctx->iv,
|
|
+ EVP_CIPHER_CTX_buf_noconst(ctx),
|
|
+ &num, dat->stream.ctr);
|
|
+ else
|
|
+ CRYPTO_ctr128_encrypt(in, out, len, &dat->ks.ks,
|
|
+ ctx->iv,
|
|
+ EVP_CIPHER_CTX_buf_noconst(ctx), &num,
|
|
+ dat->block);
|
|
EVP_CIPHER_CTX_set_num(ctx, num);
|
|
return 1;
|
|
}
|
|
diff --git a/crypto/sm4/asm/sm4-armv8.pl b/crypto/sm4/asm/sm4-armv8.pl
|
|
new file mode 100644
|
|
index 0000000..dbacad2
|
|
--- /dev/null
|
|
+++ b/crypto/sm4/asm/sm4-armv8.pl
|
|
@@ -0,0 +1,629 @@
|
|
+#! /usr/bin/env perl
|
|
+# Copyright 2022 The OpenSSL Project Authors. All Rights Reserved.
|
|
+#
|
|
+# Licensed under the Apache License 2.0 (the "License"). You may not use
|
|
+# this file except in compliance with the License. You can obtain a copy
|
|
+# in the file LICENSE in the source distribution or at
|
|
+# https://www.openssl.org/source/license.html
|
|
+
|
|
+#
|
|
+# This module implements support for SM4 hw support on aarch64
|
|
+# Oct 2021
|
|
+#
|
|
+
|
|
+# $output is the last argument if it looks like a file (it has an extension)
|
|
+# $flavour is the first argument if it doesn't look like a file
|
|
+$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
|
|
+$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
|
|
+
|
|
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
|
|
+( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or
|
|
+( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or
|
|
+die "can't locate arm-xlate.pl";
|
|
+
|
|
+open OUT,"| \"$^X\" $xlate $flavour \"$output\""
|
|
+ or die "can't call $xlate: $!";
|
|
+*STDOUT=*OUT;
|
|
+
|
|
+$prefix="sm4_v8";
|
|
+my @rks=map("v$_",(0..7));
|
|
+
|
|
+sub rev32() {
|
|
+my $dst = shift;
|
|
+my $src = shift;
|
|
+$code.=<<___;
|
|
+#ifndef __ARMEB__
|
|
+ rev32 $dst.16b,$src.16b
|
|
+#endif
|
|
+___
|
|
+}
|
|
+
|
|
+sub enc_blk () {
|
|
+my $data = shift;
|
|
+$code.=<<___;
|
|
+ sm4e $data.4s,@rks[0].4s
|
|
+ sm4e $data.4s,@rks[1].4s
|
|
+ sm4e $data.4s,@rks[2].4s
|
|
+ sm4e $data.4s,@rks[3].4s
|
|
+ sm4e $data.4s,@rks[4].4s
|
|
+ sm4e $data.4s,@rks[5].4s
|
|
+ sm4e $data.4s,@rks[6].4s
|
|
+ sm4e $data.4s,@rks[7].4s
|
|
+ rev64 $data.4S,$data.4S
|
|
+ ext $data.16b,$data.16b,$data.16b,#8
|
|
+___
|
|
+}
|
|
+
|
|
+sub enc_4blks () {
|
|
+my $data0 = shift;
|
|
+my $data1 = shift;
|
|
+my $data2 = shift;
|
|
+my $data3 = shift;
|
|
+$code.=<<___;
|
|
+ sm4e $data0.4s,@rks[0].4s
|
|
+ sm4e $data1.4s,@rks[0].4s
|
|
+ sm4e $data2.4s,@rks[0].4s
|
|
+ sm4e $data3.4s,@rks[0].4s
|
|
+
|
|
+ sm4e $data0.4s,@rks[1].4s
|
|
+ sm4e $data1.4s,@rks[1].4s
|
|
+ sm4e $data2.4s,@rks[1].4s
|
|
+ sm4e $data3.4s,@rks[1].4s
|
|
+
|
|
+ sm4e $data0.4s,@rks[2].4s
|
|
+ sm4e $data1.4s,@rks[2].4s
|
|
+ sm4e $data2.4s,@rks[2].4s
|
|
+ sm4e $data3.4s,@rks[2].4s
|
|
+
|
|
+ sm4e $data0.4s,@rks[3].4s
|
|
+ sm4e $data1.4s,@rks[3].4s
|
|
+ sm4e $data2.4s,@rks[3].4s
|
|
+ sm4e $data3.4s,@rks[3].4s
|
|
+
|
|
+ sm4e $data0.4s,@rks[4].4s
|
|
+ sm4e $data1.4s,@rks[4].4s
|
|
+ sm4e $data2.4s,@rks[4].4s
|
|
+ sm4e $data3.4s,@rks[4].4s
|
|
+
|
|
+ sm4e $data0.4s,@rks[5].4s
|
|
+ sm4e $data1.4s,@rks[5].4s
|
|
+ sm4e $data2.4s,@rks[5].4s
|
|
+ sm4e $data3.4s,@rks[5].4s
|
|
+
|
|
+ sm4e $data0.4s,@rks[6].4s
|
|
+ sm4e $data1.4s,@rks[6].4s
|
|
+ sm4e $data2.4s,@rks[6].4s
|
|
+ sm4e $data3.4s,@rks[6].4s
|
|
+
|
|
+ sm4e $data0.4s,@rks[7].4s
|
|
+ rev64 $data0.4S,$data0.4S
|
|
+ sm4e $data1.4s,@rks[7].4s
|
|
+ ext $data0.16b,$data0.16b,$data0.16b,#8
|
|
+ rev64 $data1.4S,$data1.4S
|
|
+ sm4e $data2.4s,@rks[7].4s
|
|
+ ext $data1.16b,$data1.16b,$data1.16b,#8
|
|
+ rev64 $data2.4S,$data2.4S
|
|
+ sm4e $data3.4s,@rks[7].4s
|
|
+ ext $data2.16b,$data2.16b,$data2.16b,#8
|
|
+ rev64 $data3.4S,$data3.4S
|
|
+ ext $data3.16b,$data3.16b,$data3.16b,#8
|
|
+___
|
|
+}
|
|
+
|
|
+$code=<<___;
|
|
+#include "arm_arch.h"
|
|
+.arch armv8-a+crypto
|
|
+.text
|
|
+___
|
|
+
|
|
+{{{
|
|
+$code.=<<___;
|
|
+.align 6
|
|
+.Lck:
|
|
+ .long 0x00070E15, 0x1C232A31, 0x383F464D, 0x545B6269
|
|
+ .long 0x70777E85, 0x8C939AA1, 0xA8AFB6BD, 0xC4CBD2D9
|
|
+ .long 0xE0E7EEF5, 0xFC030A11, 0x181F262D, 0x343B4249
|
|
+ .long 0x50575E65, 0x6C737A81, 0x888F969D, 0xA4ABB2B9
|
|
+ .long 0xC0C7CED5, 0xDCE3EAF1, 0xF8FF060D, 0x141B2229
|
|
+ .long 0x30373E45, 0x4C535A61, 0x686F767D, 0x848B9299
|
|
+ .long 0xA0A7AEB5, 0xBCC3CAD1, 0xD8DFE6ED, 0xF4FB0209
|
|
+ .long 0x10171E25, 0x2C333A41, 0x484F565D, 0x646B7279
|
|
+.Lfk:
|
|
+ .long 0xa3b1bac6, 0x56aa3350, 0x677d9197, 0xb27022dc
|
|
+___
|
|
+}}}
|
|
+
|
|
+{{{
|
|
+my ($key,$keys)=("x0","x1");
|
|
+my ($tmp)=("x2");
|
|
+my ($key0,$key1,$key2,$key3,$key4,$key5,$key6,$key7)=map("v$_",(0..7));
|
|
+my ($const0,$const1,$const2,$const3,$const4,$const5,$const6,$const7)=map("v$_",(16..23));
|
|
+my ($fkconst) = ("v24");
|
|
+$code.=<<___;
|
|
+.globl ${prefix}_set_encrypt_key
|
|
+.type ${prefix}_set_encrypt_key,%function
|
|
+.align 5
|
|
+${prefix}_set_encrypt_key:
|
|
+ ld1 {$key0.4s},[$key]
|
|
+ adr $tmp,.Lfk
|
|
+ ld1 {$fkconst.4s},[$tmp]
|
|
+ adr $tmp,.Lck
|
|
+ ld1 {$const0.4s,$const1.4s,$const2.4s,$const3.4s},[$tmp],64
|
|
+___
|
|
+ &rev32($key0, $key0);
|
|
+$code.=<<___;
|
|
+ ld1 {$const4.4s,$const5.4s,$const6.4s,$const7.4s},[$tmp]
|
|
+ eor $key0.16b,$key0.16b,$fkconst.16b;
|
|
+ sm4ekey $key0.4S,$key0.4S,$const0.4S
|
|
+ sm4ekey $key1.4S,$key0.4S,$const1.4S
|
|
+ sm4ekey $key2.4S,$key1.4S,$const2.4S
|
|
+ sm4ekey $key3.4S,$key2.4S,$const3.4S
|
|
+ sm4ekey $key4.4S,$key3.4S,$const4.4S
|
|
+ st1 {$key0.4s,$key1.4s,$key2.4s,$key3.4s},[$keys],64
|
|
+ sm4ekey $key5.4S,$key4.4S,$const5.4S
|
|
+ sm4ekey $key6.4S,$key5.4S,$const6.4S
|
|
+ sm4ekey $key7.4S,$key6.4S,$const7.4S
|
|
+ st1 {$key4.4s,$key5.4s,$key6.4s,$key7.4s},[$keys]
|
|
+ ret
|
|
+.size ${prefix}_set_encrypt_key,.-${prefix}_set_encrypt_key
|
|
+___
|
|
+}}}
|
|
+
|
|
+{{{
|
|
+my ($key,$keys)=("x0","x1");
|
|
+my ($tmp)=("x2");
|
|
+my ($key7,$key6,$key5,$key4,$key3,$key2,$key1,$key0)=map("v$_",(0..7));
|
|
+my ($const0,$const1,$const2,$const3,$const4,$const5,$const6,$const7)=map("v$_",(16..23));
|
|
+my ($fkconst) = ("v24");
|
|
+$code.=<<___;
|
|
+.globl ${prefix}_set_decrypt_key
|
|
+.type ${prefix}_set_decrypt_key,%function
|
|
+.align 5
|
|
+${prefix}_set_decrypt_key:
|
|
+ ld1 {$key0.4s},[$key]
|
|
+ adr $tmp,.Lfk
|
|
+ ld1 {$fkconst.4s},[$tmp]
|
|
+ adr $tmp, .Lck
|
|
+ ld1 {$const0.4s,$const1.4s,$const2.4s,$const3.4s},[$tmp],64
|
|
+___
|
|
+ &rev32($key0, $key0);
|
|
+$code.=<<___;
|
|
+ ld1 {$const4.4s,$const5.4s,$const6.4s,$const7.4s},[$tmp]
|
|
+ eor $key0.16b, $key0.16b,$fkconst.16b;
|
|
+ sm4ekey $key0.4S,$key0.4S,$const0.4S
|
|
+ sm4ekey $key1.4S,$key0.4S,$const1.4S
|
|
+ sm4ekey $key2.4S,$key1.4S,$const2.4S
|
|
+ rev64 $key0.4s,$key0.4s
|
|
+ rev64 $key1.4s,$key1.4s
|
|
+ ext $key0.16b,$key0.16b,$key0.16b,#8
|
|
+ ext $key1.16b,$key1.16b,$key1.16b,#8
|
|
+ sm4ekey $key3.4S,$key2.4S,$const3.4S
|
|
+ sm4ekey $key4.4S,$key3.4S,$const4.4S
|
|
+ rev64 $key2.4s,$key2.4s
|
|
+ rev64 $key3.4s,$key3.4s
|
|
+ ext $key2.16b,$key2.16b,$key2.16b,#8
|
|
+ ext $key3.16b,$key3.16b,$key3.16b,#8
|
|
+ sm4ekey $key5.4S,$key4.4S,$const5.4S
|
|
+ sm4ekey $key6.4S,$key5.4S,$const6.4S
|
|
+ rev64 $key4.4s,$key4.4s
|
|
+ rev64 $key5.4s,$key5.4s
|
|
+ ext $key4.16b,$key4.16b,$key4.16b,#8
|
|
+ ext $key5.16b,$key5.16b,$key5.16b,#8
|
|
+ sm4ekey $key7.4S,$key6.4S,$const7.4S
|
|
+ rev64 $key6.4s, $key6.4s
|
|
+ rev64 $key7.4s, $key7.4s
|
|
+ ext $key6.16b,$key6.16b,$key6.16b,#8
|
|
+ ext $key7.16b,$key7.16b,$key7.16b,#8
|
|
+ st1 {$key7.4s,$key6.4s,$key5.4s,$key4.4s},[$keys],64
|
|
+ st1 {$key3.4s,$key2.4s,$key1.4s,$key0.4s},[$keys]
|
|
+ ret
|
|
+.size ${prefix}_set_decrypt_key,.-${prefix}_set_decrypt_key
|
|
+___
|
|
+}}}
|
|
+
|
|
+{{{
|
|
+sub gen_block () {
|
|
+my $dir = shift;
|
|
+my ($inp,$out,$rk)=map("x$_",(0..2));
|
|
+my ($data)=("v16");
|
|
+$code.=<<___;
|
|
+.globl ${prefix}_${dir}crypt
|
|
+.type ${prefix}_${dir}crypt,%function
|
|
+.align 5
|
|
+${prefix}_${dir}crypt:
|
|
+ ld1 {$data.4s},[$inp]
|
|
+ ld1 {@rks[0].4s,@rks[1].4s,@rks[2].4s,@rks[3].4s},[$rk],64
|
|
+ ld1 {@rks[4].4s,@rks[5].4s,@rks[6].4s,@rks[7].4s},[$rk]
|
|
+___
|
|
+ &rev32($data,$data);
|
|
+ &enc_blk($data);
|
|
+ &rev32($data,$data);
|
|
+$code.=<<___;
|
|
+ st1 {$data.4s},[$out]
|
|
+ ret
|
|
+.size ${prefix}_${dir}crypt,.-${prefix}_${dir}crypt
|
|
+___
|
|
+}
|
|
+
|
|
+&gen_block("en");
|
|
+&gen_block("de");
|
|
+}}}
|
|
+
|
|
+{{{
|
|
+my ($inp,$out,$len,$rk)=map("x$_",(0..3));
|
|
+my ($enc) = ("w4");
|
|
+my @dat=map("v$_",(16..23));
|
|
+$code.=<<___;
|
|
+.globl ${prefix}_ecb_encrypt
|
|
+.type ${prefix}_ecb_encrypt,%function
|
|
+.align 5
|
|
+${prefix}_ecb_encrypt:
|
|
+ ld1 {@rks[0].4s,@rks[1].4s,@rks[2].4s,@rks[3].4s},[$rk],#64
|
|
+ ld1 {@rks[4].4s,@rks[5].4s,@rks[6].4s,@rks[7].4s},[$rk]
|
|
+1:
|
|
+ cmp $len,#64
|
|
+ b.lt 1f
|
|
+ ld1 {@dat[0].4s,@dat[1].4s,@dat[2].4s,@dat[3].4s},[$inp],#64
|
|
+ cmp $len,#128
|
|
+ b.lt 2f
|
|
+ ld1 {@dat[4].4s,@dat[5].4s,@dat[6].4s,@dat[7].4s},[$inp],#64
|
|
+ // 8 blocks
|
|
+___
|
|
+ &rev32(@dat[0],@dat[0]);
|
|
+ &rev32(@dat[1],@dat[1]);
|
|
+ &rev32(@dat[2],@dat[2]);
|
|
+ &rev32(@dat[3],@dat[3]);
|
|
+ &rev32(@dat[4],@dat[4]);
|
|
+ &rev32(@dat[5],@dat[5]);
|
|
+ &rev32(@dat[6],@dat[6]);
|
|
+ &rev32(@dat[7],@dat[7]);
|
|
+ &enc_4blks(@dat[0],@dat[1],@dat[2],@dat[3]);
|
|
+ &enc_4blks(@dat[4],@dat[5],@dat[6],@dat[7]);
|
|
+ &rev32(@dat[0],@dat[0]);
|
|
+ &rev32(@dat[1],@dat[1]);
|
|
+ &rev32(@dat[2],@dat[2]);
|
|
+ &rev32(@dat[3],@dat[3]);
|
|
+ &rev32(@dat[4],@dat[4]);
|
|
+ &rev32(@dat[5],@dat[5]);
|
|
+$code.=<<___;
|
|
+ st1 {@dat[0].4s,@dat[1].4s,@dat[2].4s,@dat[3].4s},[$out],#64
|
|
+___
|
|
+ &rev32(@dat[6],@dat[6]);
|
|
+ &rev32(@dat[7],@dat[7]);
|
|
+$code.=<<___;
|
|
+ st1 {@dat[4].4s,@dat[5].4s,@dat[6].4s,@dat[7].4s},[$out],#64
|
|
+ subs $len,$len,#128
|
|
+ b.gt 1b
|
|
+ ret
|
|
+ // 4 blocks
|
|
+2:
|
|
+___
|
|
+ &rev32(@dat[0],@dat[0]);
|
|
+ &rev32(@dat[1],@dat[1]);
|
|
+ &rev32(@dat[2],@dat[2]);
|
|
+ &rev32(@dat[3],@dat[3]);
|
|
+ &enc_4blks(@dat[0],@dat[1],@dat[2],@dat[3]);
|
|
+ &rev32(@dat[0],@dat[0]);
|
|
+ &rev32(@dat[1],@dat[1]);
|
|
+ &rev32(@dat[2],@dat[2]);
|
|
+ &rev32(@dat[3],@dat[3]);
|
|
+$code.=<<___;
|
|
+ st1 {@dat[0].4s,@dat[1].4s,@dat[2].4s,@dat[3].4s},[$out],#64
|
|
+ subs $len,$len,#64
|
|
+ b.gt 1b
|
|
+1:
|
|
+ subs $len,$len,#16
|
|
+ b.lt 1f
|
|
+ ld1 {@dat[0].4s},[$inp],#16
|
|
+___
|
|
+ &rev32(@dat[0],@dat[0]);
|
|
+ &enc_blk(@dat[0]);
|
|
+ &rev32(@dat[0],@dat[0]);
|
|
+$code.=<<___;
|
|
+ st1 {@dat[0].4s},[$out],#16
|
|
+ b.ne 1b
|
|
+1:
|
|
+ ret
|
|
+.size ${prefix}_ecb_encrypt,.-${prefix}_ecb_encrypt
|
|
+___
|
|
+}}}
|
|
+
|
|
+{{{
|
|
+my ($inp,$out,$len,$rk,$ivp)=map("x$_",(0..4));
|
|
+my ($enc) = ("w5");
|
|
+my @dat=map("v$_",(16..23));
|
|
+my @in=map("v$_",(24..31));
|
|
+my ($ivec) = ("v8");
|
|
+$code.=<<___;
|
|
+.globl ${prefix}_cbc_encrypt
|
|
+.type ${prefix}_cbc_encrypt,%function
|
|
+.align 5
|
|
+${prefix}_cbc_encrypt:
|
|
+ stp d8,d9,[sp, #-16]!
|
|
+
|
|
+ ld1 {@rks[0].4s,@rks[1].4s,@rks[2].4s,@rks[3].4s},[$rk],#64
|
|
+ ld1 {@rks[4].4s,@rks[5].4s,@rks[6].4s,@rks[7].4s},[$rk]
|
|
+ ld1 {$ivec.4s},[$ivp]
|
|
+ cmp $enc,#0
|
|
+ b.eq .Ldec
|
|
+1:
|
|
+ cmp $len, #64
|
|
+ b.lt 1f
|
|
+ ld1 {@dat[0].4s,@dat[1].4s,@dat[2].4s,@dat[3].4s},[$inp],#64
|
|
+ eor @dat[0].16b,@dat[0].16b,$ivec.16b
|
|
+___
|
|
+ &rev32(@dat[1],@dat[1]);
|
|
+ &rev32(@dat[0],@dat[0]);
|
|
+ &rev32(@dat[2],@dat[2]);
|
|
+ &rev32(@dat[3],@dat[3]);
|
|
+ &enc_blk(@dat[0]);
|
|
+$code.=<<___;
|
|
+ eor @dat[1].16b,@dat[1].16b,@dat[0].16b
|
|
+___
|
|
+ &enc_blk(@dat[1]);
|
|
+ &rev32(@dat[0],@dat[0]);
|
|
+$code.=<<___;
|
|
+ eor @dat[2].16b,@dat[2].16b,@dat[1].16b
|
|
+___
|
|
+ &enc_blk(@dat[2]);
|
|
+ &rev32(@dat[1],@dat[1]);
|
|
+$code.=<<___;
|
|
+ eor @dat[3].16b,@dat[3].16b,@dat[2].16b
|
|
+___
|
|
+ &enc_blk(@dat[3]);
|
|
+ &rev32(@dat[2],@dat[2]);
|
|
+ &rev32(@dat[3],@dat[3]);
|
|
+$code.=<<___;
|
|
+ mov $ivec.16b,@dat[3].16b
|
|
+ st1 {@dat[0].4s,@dat[1].4s,@dat[2].4s,@dat[3].4s},[$out],#64
|
|
+ subs $len,$len,#64
|
|
+ b.ne 1b
|
|
+1:
|
|
+ subs $len,$len,#16
|
|
+ b.lt 3f
|
|
+ ld1 {@dat[0].4s},[$inp],#16
|
|
+ eor $ivec.16b,$ivec.16b,@dat[0].16b
|
|
+___
|
|
+ &rev32($ivec,$ivec);
|
|
+ &enc_blk($ivec);
|
|
+ &rev32($ivec,$ivec);
|
|
+$code.=<<___;
|
|
+ st1 {$ivec.16b},[$out],#16
|
|
+ b.ne 1b
|
|
+ b 3f
|
|
+.Ldec:
|
|
+1:
|
|
+ cmp $len, #64
|
|
+ b.lt 1f
|
|
+ ld1 {@dat[0].4s,@dat[1].4s,@dat[2].4s,@dat[3].4s},[$inp]
|
|
+ ld1 {@in[0].4s,@in[1].4s,@in[2].4s,@in[3].4s},[$inp],#64
|
|
+ cmp $len,#128
|
|
+ b.lt 2f
|
|
+ // 8 blocks mode
|
|
+ ld1 {@dat[4].4s,@dat[5].4s,@dat[6].4s,@dat[7].4s},[$inp]
|
|
+ ld1 {@in[4].4s,@in[5].4s,@in[6].4s,@in[7].4s},[$inp],#64
|
|
+___
|
|
+ &rev32(@dat[0],@dat[0]);
|
|
+ &rev32(@dat[1],@dat[1]);
|
|
+ &rev32(@dat[2],@dat[2]);
|
|
+ &rev32(@dat[3],$dat[3]);
|
|
+ &rev32(@dat[4],@dat[4]);
|
|
+ &rev32(@dat[5],@dat[5]);
|
|
+ &rev32(@dat[6],@dat[6]);
|
|
+ &rev32(@dat[7],$dat[7]);
|
|
+ &enc_4blks(@dat[0],@dat[1],@dat[2],@dat[3]);
|
|
+ &enc_4blks(@dat[4],@dat[5],@dat[6],@dat[7]);
|
|
+ &rev32(@dat[0],@dat[0]);
|
|
+ &rev32(@dat[1],@dat[1]);
|
|
+ &rev32(@dat[2],@dat[2]);
|
|
+ &rev32(@dat[3],@dat[3]);
|
|
+ &rev32(@dat[4],@dat[4]);
|
|
+ &rev32(@dat[5],@dat[5]);
|
|
+ &rev32(@dat[6],@dat[6]);
|
|
+ &rev32(@dat[7],@dat[7]);
|
|
+$code.=<<___;
|
|
+ eor @dat[0].16b,@dat[0].16b,$ivec.16b
|
|
+ eor @dat[1].16b,@dat[1].16b,@in[0].16b
|
|
+ eor @dat[2].16b,@dat[2].16b,@in[1].16b
|
|
+ mov $ivec.16b,@in[7].16b
|
|
+ eor @dat[3].16b,$dat[3].16b,@in[2].16b
|
|
+ eor @dat[4].16b,$dat[4].16b,@in[3].16b
|
|
+ eor @dat[5].16b,$dat[5].16b,@in[4].16b
|
|
+ eor @dat[6].16b,$dat[6].16b,@in[5].16b
|
|
+ eor @dat[7].16b,$dat[7].16b,@in[6].16b
|
|
+ st1 {@dat[0].4s,@dat[1].4s,@dat[2].4s,@dat[3].4s},[$out],#64
|
|
+ st1 {@dat[4].4s,@dat[5].4s,@dat[6].4s,@dat[7].4s},[$out],#64
|
|
+ subs $len,$len,128
|
|
+ b.gt 1b
|
|
+ b 3f
|
|
+ // 4 blocks mode
|
|
+2:
|
|
+___
|
|
+ &rev32(@dat[0],@dat[0]);
|
|
+ &rev32(@dat[1],@dat[1]);
|
|
+ &rev32(@dat[2],@dat[2]);
|
|
+ &rev32(@dat[3],$dat[3]);
|
|
+ &enc_4blks(@dat[0],@dat[1],@dat[2],@dat[3]);
|
|
+ &rev32(@dat[0],@dat[0]);
|
|
+ &rev32(@dat[1],@dat[1]);
|
|
+ &rev32(@dat[2],@dat[2]);
|
|
+ &rev32(@dat[3],@dat[3]);
|
|
+$code.=<<___;
|
|
+ eor @dat[0].16b,@dat[0].16b,$ivec.16b
|
|
+ eor @dat[1].16b,@dat[1].16b,@in[0].16b
|
|
+ mov $ivec.16b,@in[3].16b
|
|
+ eor @dat[2].16b,@dat[2].16b,@in[1].16b
|
|
+ eor @dat[3].16b,$dat[3].16b,@in[2].16b
|
|
+ st1 {@dat[0].4s,@dat[1].4s,@dat[2].4s,@dat[3].4s},[$out],#64
|
|
+ subs $len,$len,#64
|
|
+ b.gt 1b
|
|
+1:
|
|
+ subs $len,$len,#16
|
|
+ b.lt 3f
|
|
+ ld1 {@dat[0].4s},[$inp],#16
|
|
+ mov @in[0].16b,@dat[0].16b
|
|
+___
|
|
+ &rev32(@dat[0],@dat[0]);
|
|
+ &enc_blk(@dat[0]);
|
|
+ &rev32(@dat[0],@dat[0]);
|
|
+$code.=<<___;
|
|
+ eor @dat[0].16b,@dat[0].16b,$ivec.16b
|
|
+ mov $ivec.16b,@in[0].16b
|
|
+ st1 {@dat[0].16b},[$out],#16
|
|
+ b.ne 1b
|
|
+3:
|
|
+ // save back IV
|
|
+ st1 {$ivec.16b},[$ivp]
|
|
+ ldp d8,d9,[sp],#16
|
|
+ ret
|
|
+.size ${prefix}_cbc_encrypt,.-${prefix}_cbc_encrypt
|
|
+___
|
|
+}}}
|
|
+
|
|
+{{{
|
|
+my ($inp,$out,$len,$rk,$ivp)=map("x$_",(0..4));
|
|
+my ($ctr)=("w5");
|
|
+my @dat=map("v$_",(16..23));
|
|
+my @in=map("v$_",(24..31));
|
|
+my ($ivec)=("v8");
|
|
+$code.=<<___;
|
|
+.globl ${prefix}_ctr32_encrypt_blocks
|
|
+.type ${prefix}_ctr32_encrypt_blocks,%function
|
|
+.align 5
|
|
+${prefix}_ctr32_encrypt_blocks:
|
|
+ stp d8,d9,[sp, #-16]!
|
|
+
|
|
+ ld1 {$ivec.4s},[$ivp]
|
|
+ ld1 {@rks[0].4s,@rks[1].4s,@rks[2].4s,@rks[3].4s},[$rk],64
|
|
+ ld1 {@rks[4].4s,@rks[5].4s,@rks[6].4s,@rks[7].4s},[$rk]
|
|
+___
|
|
+ &rev32($ivec,$ivec);
|
|
+$code.=<<___;
|
|
+ mov $ctr,$ivec.s[3]
|
|
+1:
|
|
+ cmp $len,#4
|
|
+ b.lt 1f
|
|
+ ld1 {@in[0].4s,@in[1].4s,@in[2].4s,@in[3].4s},[$inp],#64
|
|
+ mov @dat[0].16b,$ivec.16b
|
|
+ mov @dat[1].16b,$ivec.16b
|
|
+ mov @dat[2].16b,$ivec.16b
|
|
+ mov @dat[3].16b,$ivec.16b
|
|
+ add $ctr,$ctr,#1
|
|
+ mov $dat[1].s[3],$ctr
|
|
+ add $ctr,$ctr,#1
|
|
+ mov @dat[2].s[3],$ctr
|
|
+ add $ctr,$ctr,#1
|
|
+ mov @dat[3].s[3],$ctr
|
|
+ cmp $len,#8
|
|
+ b.lt 2f
|
|
+ ld1 {@in[4].4s,@in[5].4s,@in[6].4s,@in[7].4s},[$inp],#64
|
|
+ mov @dat[4].16b,$ivec.16b
|
|
+ mov @dat[5].16b,$ivec.16b
|
|
+ mov @dat[6].16b,$ivec.16b
|
|
+ mov @dat[7].16b,$ivec.16b
|
|
+ add $ctr,$ctr,#1
|
|
+ mov $dat[4].s[3],$ctr
|
|
+ add $ctr,$ctr,#1
|
|
+ mov @dat[5].s[3],$ctr
|
|
+ add $ctr,$ctr,#1
|
|
+ mov @dat[6].s[3],$ctr
|
|
+ add $ctr,$ctr,#1
|
|
+ mov @dat[7].s[3],$ctr
|
|
+___
|
|
+ &enc_4blks(@dat[0],@dat[1],@dat[2],@dat[3]);
|
|
+ &enc_4blks(@dat[4],@dat[5],@dat[6],@dat[7]);
|
|
+ &rev32(@dat[0],@dat[0]);
|
|
+ &rev32(@dat[1],@dat[1]);
|
|
+ &rev32(@dat[2],@dat[2]);
|
|
+ &rev32(@dat[3],@dat[3]);
|
|
+ &rev32(@dat[4],@dat[4]);
|
|
+ &rev32(@dat[5],@dat[5]);
|
|
+ &rev32(@dat[6],@dat[6]);
|
|
+ &rev32(@dat[7],@dat[7]);
|
|
+$code.=<<___;
|
|
+ eor @dat[0].16b,@dat[0].16b,@in[0].16b
|
|
+ eor @dat[1].16b,@dat[1].16b,@in[1].16b
|
|
+ eor @dat[2].16b,@dat[2].16b,@in[2].16b
|
|
+ eor @dat[3].16b,@dat[3].16b,@in[3].16b
|
|
+ eor @dat[4].16b,@dat[4].16b,@in[4].16b
|
|
+ eor @dat[5].16b,@dat[5].16b,@in[5].16b
|
|
+ eor @dat[6].16b,@dat[6].16b,@in[6].16b
|
|
+ eor @dat[7].16b,@dat[7].16b,@in[7].16b
|
|
+ st1 {@dat[0].4s,@dat[1].4s,@dat[2].4s,@dat[3].4s},[$out],#64
|
|
+ st1 {@dat[4].4s,@dat[5].4s,@dat[6].4s,@dat[7].4s},[$out],#64
|
|
+ subs $len,$len,#8
|
|
+ b.eq 3f
|
|
+ add $ctr,$ctr,#1
|
|
+ mov $ivec.s[3],$ctr
|
|
+ b 1b
|
|
+2:
|
|
+___
|
|
+ &enc_4blks(@dat[0],@dat[1],@dat[2],@dat[3]);
|
|
+ &rev32(@dat[0],@dat[0]);
|
|
+ &rev32(@dat[1],@dat[1]);
|
|
+ &rev32(@dat[2],@dat[2]);
|
|
+ &rev32(@dat[3],@dat[3]);
|
|
+$code.=<<___;
|
|
+ eor @dat[0].16b,@dat[0].16b,@in[0].16b
|
|
+ eor @dat[1].16b,@dat[1].16b,@in[1].16b
|
|
+ eor @dat[2].16b,@dat[2].16b,@in[2].16b
|
|
+ eor @dat[3].16b,@dat[3].16b,@in[3].16b
|
|
+ st1 {@dat[0].4s,@dat[1].4s,@dat[2].4s,@dat[3].4s},[$out],#64
|
|
+ subs $len,$len,#4
|
|
+ b.eq 3f
|
|
+ add $ctr,$ctr,#1
|
|
+ mov $ivec.s[3],$ctr
|
|
+ b 1b
|
|
+1:
|
|
+ subs $len,$len,#1
|
|
+ b.lt 3f
|
|
+ mov $dat[0].16b,$ivec.16b
|
|
+ ld1 {@in[0].4s},[$inp],#16
|
|
+___
|
|
+ &enc_blk(@dat[0]);
|
|
+ &rev32(@dat[0],@dat[0]);
|
|
+$code.=<<___;
|
|
+ eor $dat[0].16b,$dat[0].16b,@in[0].16b
|
|
+ st1 {$dat[0].4s},[$out],#16
|
|
+ b.eq 3f
|
|
+ add $ctr,$ctr,#1
|
|
+ mov $ivec.s[3],$ctr
|
|
+ b 1b
|
|
+3:
|
|
+ ldp d8,d9,[sp],#16
|
|
+ ret
|
|
+.size ${prefix}_ctr32_encrypt_blocks,.-${prefix}_ctr32_encrypt_blocks
|
|
+___
|
|
+}}}
|
|
+########################################
|
|
+{ my %opcode = (
|
|
+ "sm4e" => 0xcec08400,
|
|
+ "sm4ekey" => 0xce60c800);
|
|
+
|
|
+ sub unsm4 {
|
|
+ my ($mnemonic,$arg)=@_;
|
|
+
|
|
+ $arg =~ m/[qv]([0-9]+)[^,]*,\s*[qv]([0-9]+)[^,]*(?:,\s*[qv]([0-9]+))?/o
|
|
+ &&
|
|
+ sprintf ".inst\t0x%08x\t//%s %s",
|
|
+ $opcode{$mnemonic}|$1|($2<<5)|($3<<16),
|
|
+ $mnemonic,$arg;
|
|
+ }
|
|
+}
|
|
+
|
|
+open SELF,$0;
|
|
+while(<SELF>) {
|
|
+ next if (/^#!/);
|
|
+ last if (!s/^#/\/\// and !/^$/);
|
|
+ print;
|
|
+}
|
|
+close SELF;
|
|
+
|
|
+foreach(split("\n",$code)) {
|
|
+ s/\`([^\`]*)\`/eval($1)/ge;
|
|
+
|
|
+ s/\b(sm4\w+)\s+([qv].*)/unsm4($1,$2)/ge;
|
|
+ print $_,"\n";
|
|
+}
|
|
+
|
|
+close STDOUT or die "error closing STDOUT: $!";
|
|
diff --git a/crypto/sm4/build.info b/crypto/sm4/build.info
|
|
index bb042c5..4d26ede 100644
|
|
--- a/crypto/sm4/build.info
|
|
+++ b/crypto/sm4/build.info
|
|
@@ -2,6 +2,17 @@ LIBS=../../libcrypto
|
|
SOURCE[../../libcrypto]=\
|
|
sm4.c {- $target{sm4_asm_src} -}
|
|
|
|
+GENERATE[sm4-armv8.S]=asm/sm4-armv8.pl $(PERLASM_SCHEME)
|
|
+INCLUDE[sm4-armv8.o]=..
|
|
|
|
GENERATE[vpsm4_ex-armv8.S]=asm/vpsm4_ex-armv8.pl $(PERLASM_SCHEME)
|
|
-INCLUDE[vpsm4_ex-armv8.o]=..
|
|
\ No newline at end of file
|
|
+INCLUDE[vpsm4_ex-armv8.o]=..
|
|
+
|
|
+BEGINRAW[Makefile]
|
|
+##### SM4 assembler implementations
|
|
+
|
|
+# GNU make "catch all"
|
|
+{- $builddir -}/sm4-%.S: {- $sourcedir -}/asm/sm4-%.pl
|
|
+ CC="$(CC)" $(PERL) $< $(PERLASM_SCHEME) $@
|
|
+
|
|
+ENDRAW[Makefile]
|
|
diff --git a/include/crypto/sm4_platform.h b/include/crypto/sm4_platform.h
|
|
new file mode 100644
|
|
index 0000000..2f5a6cf
|
|
--- /dev/null
|
|
+++ b/include/crypto/sm4_platform.h
|
|
@@ -0,0 +1,70 @@
|
|
+/*
|
|
+ * Copyright 2022 The OpenSSL Project Authors. All Rights Reserved.
|
|
+ *
|
|
+ * Licensed under the Apache License 2.0 (the "License"). You may not use
|
|
+ * this file except in compliance with the License. You can obtain a copy
|
|
+ * in the file LICENSE in the source distribution or at
|
|
+ * https://www.openssl.org/source/license.html
|
|
+ */
|
|
+
|
|
+#ifndef OSSL_SM4_PLATFORM_H
|
|
+# define OSSL_SM4_PLATFORM_H
|
|
+# pragma once
|
|
+
|
|
+# if defined(OPENSSL_CPUID_OBJ)
|
|
+# if (defined(__arm__) || defined(__arm) || defined(__aarch64__))
|
|
+# include "arm_arch.h"
|
|
+# if __ARM_MAX_ARCH__>=7
|
|
+# if defined(VPSM4_EX_ASM)
|
|
+# define VPSM4_EX_CAPABLE (OPENSSL_armcap_P & ARMV8_AES)
|
|
+# endif
|
|
+# define HWSM4_CAPABLE (OPENSSL_armcap_P & ARMV8_SM4)
|
|
+# define HWSM4_set_encrypt_key sm4_v8_set_encrypt_key
|
|
+# define HWSM4_set_decrypt_key sm4_v8_set_decrypt_key
|
|
+# define HWSM4_encrypt sm4_v8_encrypt
|
|
+# define HWSM4_decrypt sm4_v8_decrypt
|
|
+# define HWSM4_cbc_encrypt sm4_v8_cbc_encrypt
|
|
+# define HWSM4_ecb_encrypt sm4_v8_ecb_encrypt
|
|
+# define HWSM4_ctr32_encrypt_blocks sm4_v8_ctr32_encrypt_blocks
|
|
+# endif
|
|
+# endif
|
|
+# endif /* OPENSSL_CPUID_OBJ */
|
|
+
|
|
+# if defined(HWSM4_CAPABLE)
|
|
+int HWSM4_set_encrypt_key(const unsigned char *userKey, SM4_KEY *key);
|
|
+int HWSM4_set_decrypt_key(const unsigned char *userKey, SM4_KEY *key);
|
|
+void HWSM4_encrypt(const unsigned char *in, unsigned char *out,
|
|
+ const SM4_KEY *key);
|
|
+void HWSM4_decrypt(const unsigned char *in, unsigned char *out,
|
|
+ const SM4_KEY *key);
|
|
+void HWSM4_cbc_encrypt(const unsigned char *in, unsigned char *out,
|
|
+ size_t length, const SM4_KEY *key,
|
|
+ unsigned char *ivec, const int enc);
|
|
+void HWSM4_ecb_encrypt(const unsigned char *in, unsigned char *out,
|
|
+ size_t length, const SM4_KEY *key,
|
|
+ const int enc);
|
|
+void HWSM4_ctr32_encrypt_blocks(const unsigned char *in, unsigned char *out,
|
|
+ size_t len, const void *key,
|
|
+ const unsigned char ivec[16]);
|
|
+# endif /* HWSM4_CAPABLE */
|
|
+
|
|
+#ifdef VPSM4_EX_CAPABLE
|
|
+void vpsm4_ex_set_encrypt_key(const unsigned char *userKey, SM4_KEY *key);
|
|
+void vpsm4_ex_set_decrypt_key(const unsigned char *userKey, SM4_KEY *key);
|
|
+#define vpsm4_ex_encrypt SM4_encrypt
|
|
+#define vpsm4_ex_decrypt SM4_encrypt
|
|
+void vpsm4_ex_ecb_encrypt(
|
|
+ const unsigned char *in, unsigned char *out, size_t length, const SM4_KEY *key, const int enc);
|
|
+/* xts mode in GB/T 17964-2021 */
|
|
+void vpsm4_ex_xts_encrypt_gb(const unsigned char *in, unsigned char *out, size_t length, const SM4_KEY *key1,
|
|
+ const SM4_KEY *key2, const uint8_t iv[16]);
|
|
+void vpsm4_ex_xts_decrypt_gb(const unsigned char *in, unsigned char *out, size_t length, const SM4_KEY *key1,
|
|
+ const SM4_KEY *key2, const uint8_t iv[16]);
|
|
+/* xts mode in IEEE Std 1619-2007 */
|
|
+void vpsm4_ex_xts_encrypt(const unsigned char *in, unsigned char *out, size_t length, const SM4_KEY *key1,
|
|
+ const SM4_KEY *key2, const uint8_t iv[16]);
|
|
+void vpsm4_ex_xts_decrypt(const unsigned char *in, unsigned char *out, size_t length, const SM4_KEY *key1,
|
|
+ const SM4_KEY *key2, const uint8_t iv[16]);
|
|
+#endif /* VPSM4_EX_CAPABLE */
|
|
+
|
|
+#endif /* OSSL_SM4_PLATFORM_H */
|
|
\ No newline at end of file
|
|
--
|
|
2.36.1
|
|
|