diff --git a/src/common_audio/signal_processing/Android.mk b/src/common_audio/signal_processing/Android.mk index bdc8796e62..26ebb30d41 100644 --- a/src/common_audio/signal_processing/Android.mk +++ b/src/common_audio/signal_processing/Android.mk @@ -57,7 +57,8 @@ ifeq ($(ARCH_ARM_HAVE_NEON),true) LOCAL_SRC_FILES += \ cross_correlation_neon.s \ downsample_fast_neon.s \ - min_max_operations_neon.s + min_max_operations_neon.s \ + vector_scaling_operations_neon.s LOCAL_CFLAGS += \ $(MY_ARM_CFLAGS_NEON) else diff --git a/src/common_audio/signal_processing/ilbc_specific_functions.c b/src/common_audio/signal_processing/ilbc_specific_functions.c index 5a9e5773b3..3588ba411d 100644 --- a/src/common_audio/signal_processing/ilbc_specific_functions.c +++ b/src/common_audio/signal_processing/ilbc_specific_functions.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source @@ -11,35 +11,16 @@ /* * This file contains implementations of the iLBC specific functions - * WebRtcSpl_ScaleAndAddVectorsWithRound() * WebRtcSpl_ReverseOrderMultArrayElements() * WebRtcSpl_ElementwiseVectorMult() * WebRtcSpl_AddVectorsAndShift() * WebRtcSpl_AddAffineVectorToVector() * WebRtcSpl_AffineTransformVector() * - * The description header can be found in signal_processing_library.h - * */ #include "signal_processing_library.h" -void WebRtcSpl_ScaleAndAddVectorsWithRound(WebRtc_Word16 *vector1, WebRtc_Word16 scale1, - WebRtc_Word16 *vector2, WebRtc_Word16 scale2, - WebRtc_Word16 right_shifts, WebRtc_Word16 *out, - WebRtc_Word16 vector_length) -{ - int i; - WebRtc_Word16 roundVal; - roundVal = 1 << right_shifts; - roundVal = roundVal >> 1; - for (i = 0; i < vector_length; i++) - { - out[i] = (WebRtc_Word16)((WEBRTC_SPL_MUL_16_16(vector1[i], scale1) - + WEBRTC_SPL_MUL_16_16(vector2[i], scale2) + roundVal) >> right_shifts); - } -} - void WebRtcSpl_ReverseOrderMultArrayElements(WebRtc_Word16 *out, G_CONST WebRtc_Word16 *in, G_CONST WebRtc_Word16 *win, WebRtc_Word16 vector_length, diff --git a/src/common_audio/signal_processing/include/signal_processing_library.h b/src/common_audio/signal_processing/include/signal_processing_library.h index 03e3eda1ba..348b5c8f13 100644 --- a/src/common_audio/signal_processing/include/signal_processing_library.h +++ b/src/common_audio/signal_processing/include/signal_processing_library.h @@ -268,17 +268,37 @@ void WebRtcSpl_ScaleAndAddVectors(G_CONST WebRtc_Word16* in_vector1, WebRtc_Word16 gain2, int right_shifts2, WebRtc_Word16* out_vector, int vector_length); + +// Performs the vector operation: +// out_vector[k] = ((scale1 * in_vector1[k]) + (scale2 * in_vector2[k]) +// + round_value) >> right_shifts, +// where round_value = (1 << right_shifts) >> 1. +// +// Input: +// - in_vector1 : Input vector 1 +// - in_vector1_scale : Gain to be used for vector 1 +// - in_vector2 : Input vector 2 +// - in_vector2_scale : Gain to be used for vector 2 +// - right_shifts : Number of right bit shifts to be applied +// - length : Number of elements in the input vectors +// +// Output: +// - out_vector : Output vector +// Return value : 0 if OK, -1 if (in_vector1 == NULL +// || in_vector2 == NULL || out_vector == NULL +// || length <= 0 || right_shift < 0). +int WebRtcSpl_ScaleAndAddVectorsWithRound(const int16_t* in_vector1, + int16_t in_vector1_scale, + const int16_t* in_vector2, + int16_t in_vector2_scale, + int right_shifts, + int16_t* out_vector, + int length); + // End: Vector scaling operations. // iLBC specific functions. Implementations in ilbc_specific_functions.c. // Description at bottom of file. -void WebRtcSpl_ScaleAndAddVectorsWithRound(WebRtc_Word16* in_vector1, - WebRtc_Word16 scale1, - WebRtc_Word16* in_vector2, - WebRtc_Word16 scale2, - WebRtc_Word16 right_shifts, - WebRtc_Word16* out_vector, - WebRtc_Word16 vector_length); void WebRtcSpl_ReverseOrderMultArrayElements(WebRtc_Word16* out_vector, G_CONST WebRtc_Word16* in_vector, G_CONST WebRtc_Word16* window, @@ -991,30 +1011,6 @@ void WebRtcSpl_SynthesisQMF(const WebRtc_Word16* low_band, // - out_vector : Output vector // -// -// WebRtcSpl_ScaleAndAddVectorsWithRound(...) -// -// Performs the vector operation: -// -// out_vector[k] = ((scale1*in_vector1[k]) + (scale2*in_vector2[k]) -// + round_value) >> right_shifts -// -// where: -// -// round_value = (1<>1 -// -// Input: -// - in_vector1 : Input vector 1 -// - scale1 : Gain to be used for vector 1 -// - in_vector2 : Input vector 2 -// - scale2 : Gain to be used for vector 2 -// - right_shifts : Number of right bit shifts to be applied -// - vector_length : Number of elements in the input vectors -// -// Output: -// - out_vector : Output vector -// - // // WebRtcSpl_ReverseOrderMultArrayElements(...) // diff --git a/src/common_audio/signal_processing/vector_scaling_operations.c b/src/common_audio/signal_processing/vector_scaling_operations.c index 20d239cabe..76601ad6bd 100644 --- a/src/common_audio/signal_processing/vector_scaling_operations.c +++ b/src/common_audio/signal_processing/vector_scaling_operations.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source @@ -17,9 +17,7 @@ * WebRtcSpl_ScaleVector() * WebRtcSpl_ScaleVectorWithSat() * WebRtcSpl_ScaleAndAddVectors() - * - * The description header can be found in signal_processing_library.h - * + * WebRtcSpl_ScaleAndAddVectorsWithRound() */ #include "signal_processing_library.h" @@ -149,3 +147,30 @@ void WebRtcSpl_ScaleAndAddVectors(G_CONST WebRtc_Word16 *in1, WebRtc_Word16 gain + (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(gain2, *in2ptr++, shift2); } } + +#if !(defined(WEBRTC_ANDROID) && defined(WEBRTC_ARCH_ARM_NEON)) +int WebRtcSpl_ScaleAndAddVectorsWithRound(const int16_t* in_vector1, + int16_t in_vector1_scale, + const int16_t* in_vector2, + int16_t in_vector2_scale, + int right_shifts, + int16_t* out_vector, + int length) { + int i = 0; + int round_value = (1 << right_shifts) >> 1; + + if (in_vector1 == NULL || in_vector2 == NULL || out_vector == NULL || + length <= 0 || right_shifts < 0) { + return -1; + } + + for (i = 0; i < length; i++) { + out_vector[i] = (int16_t)(( + WEBRTC_SPL_MUL_16_16(in_vector1[i], in_vector1_scale) + + WEBRTC_SPL_MUL_16_16(in_vector2[i], in_vector2_scale) + + round_value) >> right_shifts); + } + + return 0; +} +#endif diff --git a/src/common_audio/signal_processing/vector_scaling_operations_neon.s b/src/common_audio/signal_processing/vector_scaling_operations_neon.s new file mode 100644 index 0000000000..003943b2c5 --- /dev/null +++ b/src/common_audio/signal_processing/vector_scaling_operations_neon.s @@ -0,0 +1,88 @@ +@ +@ Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. +@ +@ Use of this source code is governed by a BSD-style license +@ that can be found in the LICENSE file in the root of the source +@ tree. An additional intellectual property rights grant can be found +@ in the file PATENTS. All contributing project authors may +@ be found in the AUTHORS file in the root of the source tree. +@ + +@ vector_scaling_operations_neon.s +@ This file contains the function WebRtcSpl_ScaleAndAddVectorsWithRound(), +@ optimized for ARM Neon platform. Output is bit-exact with the reference +@ C code in vector_scaling_operations.c. + +.arch armv7-a +.fpu neon + +.align 2 +.global WebRtcSpl_ScaleAndAddVectorsWithRound + +WebRtcSpl_ScaleAndAddVectorsWithRound: +.fnstart + + push {r4-r9} + + ldr r4, [sp, #32] @ length + ldr r5, [sp, #28] @ out_vector + ldrsh r6, [sp, #24] @ right_shifts + + cmp r4, #0 + ble END @ Return if length <= 0. + + cmp r4, #8 + blt SET_ROUND_VALUE + + vdup.16 d26, r1 @ in_vector1_scale + vdup.16 d27, r3 @ in_vector2_scale + + @ Neon instructions can only right shift by an immediate value. To shift right + @ by a register value, we have to do a left shift left by the negative value. + rsb r7, r6, #0 + vdup.16 q12, r7 @ -right_shifts + + bic r7, r4, #7 @ Counter for LOOP_UNROLLED_BY_8: length / 8 * 8. + +LOOP_UNROLLED_BY_8: + vld1.16 {d28, d29}, [r0]! @ in_vector1[] + vld1.16 {d30, d31}, [r2]! @ in_vector2[] + vmull.s16 q0, d28, d26 + vmull.s16 q1, d29, d26 + vmull.s16 q2, d30, d27 + vmull.s16 q3, d31, d27 + vadd.s32 q0, q2 + vadd.s32 q1, q3 + vrshl.s32 q0, q12 @ Round shift right by right_shifts. + vrshl.s32 q1, q12 + vmovn.i32 d0, q0 @ Cast to 16 bit values. + vmovn.i32 d1, q1 + subs r7, #8 + vst1.16 {d0, d1}, [r5]! + bgt LOOP_UNROLLED_BY_8 + + ands r4, #0xFF @ Counter for LOOP_NO_UNROLLING: length % 8. + beq END + +SET_ROUND_VALUE: + mov r9, #1 + lsl r9, r6 + lsr r9, #1 + +LOOP_NO_UNROLLING: + ldrh r7, [r0], #2 + ldrh r8, [r2], #2 + smulbb r7, r7, r1 + smulbb r8, r8, r3 + subs r4, #1 + add r7, r9 + add r7, r8 + asr r7, r6 + strh r7, [r5], #2 + bne LOOP_NO_UNROLLING + +END: + pop {r4-r9} + bx lr + +.fnend