[FFmpeg-devel] [PATCH 1/2] swresample: Refactor resample asm and port it to yasm

James Almer jamrial at gmail.com
Wed Mar 19 22:45:03 CET 2014


This reduces code duplication and makes it easier to implement new asm 
functions in the future

Signed-off-by: James Almer <jamrial at gmail.com>
---
 libswresample/resample.c            | 96 ++++++++++---------------------------
 libswresample/resample_template.c   | 49 +++++++------------
 libswresample/swresample_internal.h | 24 ++++++++++
 libswresample/x86/Makefile          |  1 +
 libswresample/x86/resample.asm      | 64 +++++++++++++++++++++++++
 libswresample/x86/resample_mmx.h    | 74 ----------------------------
 libswresample/x86/swresample_x86.c  | 16 +++++++
 7 files changed, 148 insertions(+), 176 deletions(-)
 create mode 100644 libswresample/x86/resample.asm
 delete mode 100644 libswresample/x86/resample_mmx.h

diff --git a/libswresample/resample.c b/libswresample/resample.c
index 034b47a..c7e9c02 100644
--- a/libswresample/resample.c
+++ b/libswresample/resample.c
@@ -29,29 +29,6 @@
 #include "libavutil/avassert.h"
 #include "swresample_internal.h"
 
-
-typedef struct ResampleContext {
-    const AVClass *av_class;
-    uint8_t *filter_bank;
-    int filter_length;
-    int filter_alloc;
-    int ideal_dst_incr;
-    int dst_incr;
-    int index;
-    int frac;
-    int src_incr;
-    int compensation_distance;
-    int phase_shift;
-    int phase_mask;
-    int linear;
-    enum SwrFilterType filter_type;
-    int kaiser_beta;
-    double factor;
-    enum AVSampleFormat format;
-    int felem_size;
-    int filter_shift;
-} ResampleContext;
-
 /**
  * 0th order modified bessel function of the first kind.
  */
@@ -195,6 +172,22 @@ static int build_filter(ResampleContext *c, void *filter, double factor, int tap
     return 0;
 }
 
+#define TEMPLATE_RESAMPLE_S16
+#include "resample_template.c"
+#undef TEMPLATE_RESAMPLE_S16
+
+#define TEMPLATE_RESAMPLE_S32
+#include "resample_template.c"
+#undef TEMPLATE_RESAMPLE_S32
+
+#define TEMPLATE_RESAMPLE_FLT
+#include "resample_template.c"
+#undef TEMPLATE_RESAMPLE_FLT
+
+#define TEMPLATE_RESAMPLE_DBL
+#include "resample_template.c"
+#undef TEMPLATE_RESAMPLE_DBL
+
 static ResampleContext *resample_init(ResampleContext *c, int out_rate, int in_rate, int filter_size, int phase_shift, int linear,
                                     double cutoff0, enum AVSampleFormat format, enum SwrFilterType filter_type, int kaiser_beta,
                                     double precision, int cheby){
@@ -216,13 +209,19 @@ static ResampleContext *resample_init(ResampleContext *c, int out_rate, int in_r
         switch(c->format){
         case AV_SAMPLE_FMT_S16P:
             c->filter_shift = 15;
+            c->scalarproduct = scalarproduct_int16;
             break;
         case AV_SAMPLE_FMT_S32P:
             c->filter_shift = 30;
+            c->scalarproduct = scalarproduct_int32;
             break;
         case AV_SAMPLE_FMT_FLTP:
+            c->filter_shift = 0;
+            c->scalarproduct = scalarproduct_float;
+            break;
         case AV_SAMPLE_FMT_DBLP:
             c->filter_shift = 0;
+            c->scalarproduct = scalarproduct_double;
             break;
         default:
             av_log(NULL, AV_LOG_ERROR, "Unsupported sample format\n");
@@ -254,6 +253,9 @@ static ResampleContext *resample_init(ResampleContext *c, int out_rate, int in_r
     c->index= -phase_count*((c->filter_length-1)/2);
     c->frac= 0;
 
+    if (ARCH_X86)
+        swri_audio_resample_init_x86(c);
+
     return c;
 error:
     av_freep(&c->filter_bank);
@@ -277,62 +279,16 @@ static int set_compensation(ResampleContext *c, int sample_delta, int compensati
     return 0;
 }
 
-#define TEMPLATE_RESAMPLE_S16
-#include "resample_template.c"
-#undef TEMPLATE_RESAMPLE_S16
-
-#define TEMPLATE_RESAMPLE_S32
-#include "resample_template.c"
-#undef TEMPLATE_RESAMPLE_S32
-
-#define TEMPLATE_RESAMPLE_FLT
-#include "resample_template.c"
-#undef TEMPLATE_RESAMPLE_FLT
-
-#define TEMPLATE_RESAMPLE_DBL
-#include "resample_template.c"
-#undef TEMPLATE_RESAMPLE_DBL
-
-// XXX FIXME the whole C loop should be written in asm so this x86 specific code here isnt needed
-#if HAVE_MMXEXT_INLINE
-
-#include "x86/resample_mmx.h"
-
-#define TEMPLATE_RESAMPLE_S16_MMX2
-#include "resample_template.c"
-#undef TEMPLATE_RESAMPLE_S16_MMX2
-
-#if HAVE_SSE2_INLINE
-#define TEMPLATE_RESAMPLE_S16_SSE2
-#include "resample_template.c"
-#undef TEMPLATE_RESAMPLE_S16_SSE2
-#endif
-
-#endif // HAVE_MMXEXT_INLINE
-
 static int multiple_resample(ResampleContext *c, AudioData *dst, int dst_size, AudioData *src, int src_size, int *consumed){
     int i, ret= -1;
-    int av_unused mm_flags = av_get_cpu_flags();
-    int need_emms= 0;
 
     for(i=0; i<dst->ch_count; i++){
-#if HAVE_MMXEXT_INLINE
-#if HAVE_SSE2_INLINE
-             if(c->format == AV_SAMPLE_FMT_S16P && (mm_flags&AV_CPU_FLAG_SSE2)) ret= swri_resample_int16_sse2 (c, (int16_t*)dst->ch[i], (const int16_t*)src->ch[i], consumed, src_size, dst_size, i+1==dst->ch_count);
-        else
-#endif
-             if(c->format == AV_SAMPLE_FMT_S16P && (mm_flags&AV_CPU_FLAG_MMX2 )){
-                 ret= swri_resample_int16_mmx2 (c, (int16_t*)dst->ch[i], (const int16_t*)src->ch[i], consumed, src_size, dst_size, i+1==dst->ch_count);
-                 need_emms= 1;
-             } else
-#endif
              if(c->format == AV_SAMPLE_FMT_S16P) ret= swri_resample_int16(c, (int16_t*)dst->ch[i], (const int16_t*)src->ch[i], consumed, src_size, dst_size, i+1==dst->ch_count);
         else if(c->format == AV_SAMPLE_FMT_S32P) ret= swri_resample_int32(c, (int32_t*)dst->ch[i], (const int32_t*)src->ch[i], consumed, src_size, dst_size, i+1==dst->ch_count);
         else if(c->format == AV_SAMPLE_FMT_FLTP) ret= swri_resample_float(c, (float  *)dst->ch[i], (const float  *)src->ch[i], consumed, src_size, dst_size, i+1==dst->ch_count);
         else if(c->format == AV_SAMPLE_FMT_DBLP) ret= swri_resample_double(c,(double *)dst->ch[i], (const double *)src->ch[i], consumed, src_size, dst_size, i+1==dst->ch_count);
     }
-    if(need_emms)
-        emms_c();
+
     return ret;
 }
 
diff --git a/libswresample/resample_template.c b/libswresample/resample_template.c
index f11053d..123786a 100644
--- a/libswresample/resample_template.c
+++ b/libswresample/resample_template.c
@@ -55,10 +55,8 @@
 #    define OUT(d, v) v = (v + (1<<(FILTER_SHIFT-1)))>>FILTER_SHIFT;\
                       d = (uint64_t)(v + 0x80000000) > 0xFFFFFFFF ? (v>>63) ^ 0x7FFFFFFF : v
 
-#elif    defined(TEMPLATE_RESAMPLE_S16)      \
-      || defined(TEMPLATE_RESAMPLE_S16_MMX2) \
-      || defined(TEMPLATE_RESAMPLE_S16_SSE2)
-
+#elif defined(TEMPLATE_RESAMPLE_S16)
+#    define RENAME(N) N ## _int16
 #    define FILTER_SHIFT 15
 #    define DELEM  int16_t
 #    define FELEM  int16_t
@@ -68,18 +66,21 @@
 #    define FELEM_MIN INT16_MIN
 #    define OUT(d, v) v = (v + (1<<(FILTER_SHIFT-1)))>>FILTER_SHIFT;\
                       d = (unsigned)(v + 32768) > 65535 ? (v>>31) ^ 32767 : v
+#endif
 
-#    if defined(TEMPLATE_RESAMPLE_S16)
-#        define RENAME(N) N ## _int16
-#    elif defined(TEMPLATE_RESAMPLE_S16_MMX2)
-#        define COMMON_CORE COMMON_CORE_INT16_MMX2
-#        define RENAME(N) N ## _int16_mmx2
-#    elif defined(TEMPLATE_RESAMPLE_S16_SSE2)
-#        define COMMON_CORE COMMON_CORE_INT16_SSE2
-#        define RENAME(N) N ## _int16_sse2
-#    endif
+static void RENAME(scalarproduct)(const void *source, void *dest, void *filter, int len)
+{
+    const DELEM *src = (const DELEM*)source;
+    DELEM *dst = (DELEM*)dest;
+    FELEM *flt = (FELEM*)filter;
+    FELEM2 val=0;
+    int i;
 
-#endif
+    for(i = 0; i < len; i++){
+        val += src[i] * (FELEM2)flt[i];
+    }
+    OUT(*dst, val);
+}
 
 int RENAME(swri_resample)(ResampleContext *c, DELEM *dst, const DELEM *src, int *consumed, int src_size, int dst_size, int update_ctx){
     int dst_index, i;
@@ -118,15 +119,7 @@ int RENAME(swri_resample)(ResampleContext *c, DELEM *dst, const DELEM *src, int
             if(sample_index + c->filter_length > src_size){
                 break;
             }else{
-#ifdef COMMON_CORE
-                COMMON_CORE
-#else
-                FELEM2 val=0;
-                for(i=0; i<c->filter_length; i++){
-                    val += src[sample_index + i] * (FELEM2)filter[i];
-                }
-                OUT(dst[dst_index], val);
-#endif
+                c->scalarproduct(src+sample_index, dst+dst_index, filter, c->filter_length);
             }
 
             frac += dst_incr_frac;
@@ -162,14 +155,7 @@ int RENAME(swri_resample)(ResampleContext *c, DELEM *dst, const DELEM *src, int
                 val+=(v2-val)*(FELEML)frac / c->src_incr;
                 OUT(dst[dst_index], val);
             }else{
-#ifdef COMMON_CORE
-                COMMON_CORE
-#else
-                for(i=0; i<c->filter_length; i++){
-                    val += src[sample_index + i] * (FELEM2)filter[i];
-                }
-                OUT(dst[dst_index], val);
-#endif
+                c->scalarproduct(src+sample_index, dst+dst_index, filter, c->filter_length);
             }
 
             frac += dst_incr_frac;
@@ -204,7 +190,6 @@ int RENAME(swri_resample)(ResampleContext *c, DELEM *dst, const DELEM *src, int
     return dst_index;
 }
 
-#undef COMMON_CORE
 #undef RENAME
 #undef FILTER_SHIFT
 #undef DELEM
diff --git a/libswresample/swresample_internal.h b/libswresample/swresample_internal.h
index ab19f21..95a803c 100644
--- a/libswresample/swresample_internal.h
+++ b/libswresample/swresample_internal.h
@@ -66,6 +66,29 @@ struct DitherContext {
     int output_sample_bits;                         ///< the number of used output bits, needed to scale dither correctly
 };
 
+typedef struct ResampleContext {
+    const AVClass *av_class;
+    uint8_t *filter_bank;
+    int filter_length;
+    int filter_alloc;
+    int ideal_dst_incr;
+    int dst_incr;
+    int index;
+    int frac;
+    int src_incr;
+    int compensation_distance;
+    int phase_shift;
+    int phase_mask;
+    int linear;
+    enum SwrFilterType filter_type;
+    int kaiser_beta;
+    double factor;
+    enum AVSampleFormat format;
+    int felem_size;
+    int filter_shift;
+    void (*scalarproduct)(const void *src, void *dst, void *filter, int length);
+} ResampleContext;
+
 struct SwrContext {
     const AVClass *av_class;                        ///< AVClass used for AVOption and av_log()
     int log_level_offset;                           ///< logging level offset
@@ -196,4 +219,5 @@ void swri_audio_convert_init_x86(struct AudioConvert *ac,
                                  enum AVSampleFormat out_fmt,
                                  enum AVSampleFormat in_fmt,
                                  int channels);
+void swri_audio_resample_init_x86(struct ResampleContext *c);
 #endif
diff --git a/libswresample/x86/Makefile b/libswresample/x86/Makefile
index 1d1ab6e..0f8e75d 100644
--- a/libswresample/x86/Makefile
+++ b/libswresample/x86/Makefile
@@ -1,5 +1,6 @@
 YASM-OBJS                       += x86/swresample_x86.o\
                                    x86/audio_convert.o\
                                    x86/rematrix.o\
+                                   x86/resample.o\
 
 OBJS-$(CONFIG_XMM_CLOBBER_TEST) += x86/w64xmmtest.o
diff --git a/libswresample/x86/resample.asm b/libswresample/x86/resample.asm
new file mode 100644
index 0000000..0204387
--- /dev/null
+++ b/libswresample/x86/resample.asm
@@ -0,0 +1,64 @@
+;******************************************************************************
+;* Copyright (c) 2012 Michael Niedermayer
+;* Copyright (c) 2014 James Almer
+;*
+;* This file is part of FFmpeg.
+;*
+;* FFmpeg is free software; you can redistribute it and/or
+;* modify it under the terms of the GNU Lesser General Public
+;* License as published by the Free Software Foundation; either
+;* version 2.1 of the License, or (at your option) any later version.
+;*
+;* FFmpeg is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;* Lesser General Public License for more details.
+;*
+;* You should have received a copy of the GNU Lesser General Public
+;* License along with FFmpeg; if not, write to the Free Software
+;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+;******************************************************************************
+
+%include "libavutil/x86/x86util.asm"
+
+
+SECTION_RODATA
+round: dq 0x4000, 0
+
+SECTION .text
+
+%macro RESAMPLE_SCALARPRODUCT_INT16 0
+cglobal resample_scalarproduct_int16, 4,4,2, src, dst, filter, len
+    shl      lenq, 1
+    neg      lenq
+    sub      srcq, lenq
+    sub      filterq, lenq
+    mova     m0, [round]
+.loop
+    movu     m1, [srcq + lenq]
+    pmaddwd  m1, [filterq + lenq]
+    paddd    m0, m1
+    add      lenq, mmsize
+    js .loop
+%if mmsize == 8
+    pshufw   m1, m0, 0xe
+    paddd    m0, m1
+%else
+    pshufd   m1, m0, 0xe
+    paddd    m0, m1
+    pshufd   m1, m0, 1
+    paddd    m0, m1
+%endif
+    psrad    m0, 15
+    packssdw m0, m0
+    movd     [dstq], m0
+%if mmsize == 8
+    emms
+%endif
+    RET
+%endmacro
+
+INIT_MMX mmxext
+RESAMPLE_SCALARPRODUCT_INT16
+INIT_XMM sse2
+RESAMPLE_SCALARPRODUCT_INT16
diff --git a/libswresample/x86/resample_mmx.h b/libswresample/x86/resample_mmx.h
deleted file mode 100644
index f366cc7..0000000
--- a/libswresample/x86/resample_mmx.h
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright (c) 2012 Michael Niedermayer <michaelni at gmx.at>
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "libavutil/x86/asm.h"
-#include "libavutil/cpu.h"
-#include "libswresample/swresample_internal.h"
-
-int swri_resample_int16_mmx2 (struct ResampleContext *c, int16_t *dst, const int16_t *src, int *consumed, int src_size, int dst_size, int update_ctx);
-int swri_resample_int16_sse2 (struct ResampleContext *c, int16_t *dst, const int16_t *src, int *consumed, int src_size, int dst_size, int update_ctx);
-
-DECLARE_ALIGNED(16, const uint64_t, ff_resample_int16_rounder)[2]    = { 0x0000000000004000ULL, 0x0000000000000000ULL};
-
-#define COMMON_CORE_INT16_MMX2 \
-    x86_reg len= -2*c->filter_length;\
-__asm__ volatile(\
-    "movq "MANGLE(ff_resample_int16_rounder)", %%mm0 \n\t"\
-    "1:                         \n\t"\
-    "movq    (%1, %0), %%mm1    \n\t"\
-    "pmaddwd (%2, %0), %%mm1    \n\t"\
-    "paddd  %%mm1, %%mm0        \n\t"\
-    "add       $8, %0           \n\t"\
-    " js 1b                     \n\t"\
-    "pshufw $0x0E, %%mm0, %%mm1 \n\t"\
-    "paddd %%mm1, %%mm0         \n\t"\
-    "psrad    $15, %%mm0        \n\t"\
-    "packssdw %%mm0, %%mm0      \n\t"\
-    "movd %%mm0, (%3)           \n\t"\
-    : "+r" (len)\
-    : "r" (((uint8_t*)(src+sample_index))-len),\
-      "r" (((uint8_t*)filter)-len),\
-      "r" (dst+dst_index)\
-      NAMED_CONSTRAINTS_ADD(ff_resample_int16_rounder)\
-);
-
-#define COMMON_CORE_INT16_SSE2 \
-    x86_reg len= -2*c->filter_length;\
-__asm__ volatile(\
-    "movdqa "MANGLE(ff_resample_int16_rounder)", %%xmm0 \n\t"\
-    "1:                           \n\t"\
-    "movdqu  (%1, %0), %%xmm1     \n\t"\
-    "pmaddwd (%2, %0), %%xmm1     \n\t"\
-    "paddd  %%xmm1, %%xmm0        \n\t"\
-    "add       $16, %0            \n\t"\
-    " js 1b                       \n\t"\
-    "pshufd $0x0E, %%xmm0, %%xmm1 \n\t"\
-    "paddd %%xmm1, %%xmm0         \n\t"\
-    "pshufd $0x01, %%xmm0, %%xmm1 \n\t"\
-    "paddd %%xmm1, %%xmm0         \n\t"\
-    "psrad    $15, %%xmm0         \n\t"\
-    "packssdw %%xmm0, %%xmm0      \n\t"\
-    "movd %%xmm0, (%3)            \n\t"\
-    : "+r" (len)\
-    : "r" (((uint8_t*)(src+sample_index))-len),\
-      "r" (((uint8_t*)filter)-len),\
-      "r" (dst+dst_index)\
-      NAMED_CONSTRAINTS_ADD(ff_resample_int16_rounder)\
-);
diff --git a/libswresample/x86/swresample_x86.c b/libswresample/x86/swresample_x86.c
index 7483ba0..f38b069 100644
--- a/libswresample/x86/swresample_x86.c
+++ b/libswresample/x86/swresample_x86.c
@@ -18,6 +18,7 @@
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include "libavutil/x86/cpu.h"
 #include "libswresample/swresample_internal.h"
 #include "libswresample/audioconvert.h"
 
@@ -198,3 +199,18 @@ av_cold void swri_rematrix_init_x86(struct SwrContext *s){
         memcpy(s->native_simd_one, s->native_one, sizeof(float));
     }
 }
+
+void ff_resample_scalarproduct_int16_mmxext(const void *src, void *dst, void *filter, int length);
+void ff_resample_scalarproduct_int16_sse2  (const void *src, void *dst, void *filter, int length);
+
+void swri_audio_resample_init_x86(ResampleContext *c)
+{
+    int cpuflags = av_get_cpu_flags();
+
+    if (c->format == AV_SAMPLE_FMT_S16P) {
+        if (EXTERNAL_MMXEXT(cpuflags))
+            c->scalarproduct = ff_resample_scalarproduct_int16_mmxext;
+        if (EXTERNAL_SSE2(cpuflags))
+            c->scalarproduct = ff_resample_scalarproduct_int16_sse2;
+    }
+}
-- 
1.8.3.2



More information about the ffmpeg-devel mailing list