[FFmpeg-devel] [PATCH v3 2/5] avcodec/ac3: Implement ac3_exponent_min for aarch64 NEON
Geoff Hill
geoff at geoffhill.org
Wed Apr 3 09:43:26 EEST 2024
Signed-off-by: Geoff Hill <geoff at geoffhill.org>
---
libavcodec/aarch64/ac3dsp_init_aarch64.c | 2 ++
libavcodec/aarch64/ac3dsp_neon.S | 16 +++++++++
tests/checkasm/ac3dsp.c | 41 ++++++++++++++++++++++++
3 files changed, 59 insertions(+)
diff --git a/libavcodec/aarch64/ac3dsp_init_aarch64.c b/libavcodec/aarch64/ac3dsp_init_aarch64.c
index e3320de0f5..8874b41393 100644
--- a/libavcodec/aarch64/ac3dsp_init_aarch64.c
+++ b/libavcodec/aarch64/ac3dsp_init_aarch64.c
@@ -25,6 +25,7 @@
#include "libavcodec/ac3dsp.h"
#include "config.h"
+void ff_ac3_exponent_min_neon(uint8_t *exp, int num_reuse_blocks, int nb_coefs);
void ff_float_to_fixed24_neon(int32_t *dst, const float *src, size_t len);
av_cold void ff_ac3dsp_init_aarch64(AC3DSPContext *c)
@@ -32,5 +33,6 @@ av_cold void ff_ac3dsp_init_aarch64(AC3DSPContext *c)
int cpu_flags = av_get_cpu_flags();
if (!have_neon(cpu_flags)) return;
+ c->ac3_exponent_min = ff_ac3_exponent_min_neon;
c->float_to_fixed24 = ff_float_to_fixed24_neon;
}
diff --git a/libavcodec/aarch64/ac3dsp_neon.S b/libavcodec/aarch64/ac3dsp_neon.S
index 6924645b7e..c8bdbb1bd3 100644
--- a/libavcodec/aarch64/ac3dsp_neon.S
+++ b/libavcodec/aarch64/ac3dsp_neon.S
@@ -20,6 +20,22 @@
#include "libavutil/aarch64/asm.S"
+function ff_ac3_exponent_min_neon, export=1
+ cbz w1, 2f
+0: ld1 {v0.16b}, [x0]
+ mov w3, w1
+ add x4, x0, #256
+1: ld1 {v1.16b}, [x4]
+ umin v0.16b, v0.16b, v1.16b
+ add x4, x4, #256
+ subs w3, w3, #1
+ b.gt 1b
+ st1 {v0.16b}, [x0], #16
+ subs w2, w2, #16
+ b.gt 0b
+2: ret
+endfunc
+
function ff_float_to_fixed24_neon, export=1
0: ld1 {v0.4s, v1.4s}, [x1], #32
fcvtzs v0.4s, v0.4s, #24
diff --git a/tests/checkasm/ac3dsp.c b/tests/checkasm/ac3dsp.c
index 344e1fe5c2..acb00b6fe1 100644
--- a/tests/checkasm/ac3dsp.c
+++ b/tests/checkasm/ac3dsp.c
@@ -27,6 +27,14 @@
#include "checkasm.h"
+#define randomize_exp(buf, len) \
+ do { \
+ int i; \
+ for (i = 0; i < len; i++) { \
+ buf[i] = (uint8_t)rnd(); \
+ } \
+ } while (0) \
+
#define randomize_float(buf, len) \
do { \
int i; \
@@ -36,6 +44,38 @@
} \
} while (0)
+static void check_ac3_exponent_min(AC3DSPContext *c) {
+#define MAX_COEFS 256
+#define MAX_CTXT 6
+#define EXP_SIZE (MAX_CTXT * MAX_COEFS)
+
+ LOCAL_ALIGNED_16(uint8_t, src, [EXP_SIZE]);
+ LOCAL_ALIGNED_16(uint8_t, v1, [EXP_SIZE]);
+ LOCAL_ALIGNED_16(uint8_t, v2, [EXP_SIZE]);
+ int n;
+
+ declare_func(void, uint8_t *, int, int);
+
+ for (n = 0; n < MAX_CTXT; ++n) {
+ if (check_func(c->ac3_exponent_min, "ac3_exponent_min_reuse%d", n)) {
+ randomize_exp(src, EXP_SIZE);
+
+ memcpy(v1, src, EXP_SIZE);
+ memcpy(v2, src, EXP_SIZE);
+
+ call_ref(v1, n, MAX_COEFS);
+ call_new(v2, n, MAX_COEFS);
+
+ if (memcmp(v1, v2, EXP_SIZE) != 0)
+ fail();
+
+ bench_new(v2, n, MAX_COEFS);
+ }
+ }
+
+ report("ac3_exponent_min");
+}
+
static void check_float_to_fixed24(AC3DSPContext *c) {
#define BUF_SIZE 1024
LOCAL_ALIGNED_32(float, src, [BUF_SIZE]);
@@ -66,5 +106,6 @@ void checkasm_check_ac3dsp(void)
AC3DSPContext c;
ff_ac3dsp_init(&c);
+ check_ac3_exponent_min(&c);
check_float_to_fixed24(&c);
}
--
2.44.0
More information about the ffmpeg-devel
mailing list