[FFmpeg-devel] [PATCH 4/6] pp: move optim templating config to template itself.

Michael Niedermayer michaelni at gmx.at
Thu Nov 15 01:10:51 CET 2012


On Wed, Nov 14, 2012 at 11:29:58PM +0100, Clément Bœsch wrote:
> Also avoid messing up the HAVE_* flags.
> ---
>  libpostproc/postprocess.c                  |  73 ++--------
>  libpostproc/postprocess_altivec_template.c |   2 +-
>  libpostproc/postprocess_template.c         | 226 +++++++++++++++++------------
>  3 files changed, 146 insertions(+), 155 deletions(-)
> 
> diff --git a/libpostproc/postprocess.c b/libpostproc/postprocess.c
> index f0d97d3..7001778 100644
> --- a/libpostproc/postprocess.c
> +++ b/libpostproc/postprocess.c
> @@ -538,84 +538,33 @@ static av_always_inline void do_a_deblock_C(uint8_t *src, int step, int stride,
>  //Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
>  //Plain C versions
>  //we always compile C for testing which needs bitexactness
> -#define COMPILE_C
> +#define COMPILE_C 1
> +#include "postprocess_template.c"
>  
>  #if HAVE_ALTIVEC
> -#define COMPILE_ALTIVEC
> +#define COMPILE_ALTIVEC 1
> +#include "postprocess_altivec_template.c"
> +#include "postprocess_template.c"
>  #endif //HAVE_ALTIVEC
>  
>  #if ARCH_X86 && HAVE_INLINE_ASM
>  
>  #if (HAVE_MMX_INLINE && !HAVE_AMD3DNOW_INLINE && !HAVE_MMXEXT_INLINE) || CONFIG_RUNTIME_CPUDETECT
> -#define COMPILE_MMX
> -#endif
> -
> -#if HAVE_MMXEXT_INLINE || CONFIG_RUNTIME_CPUDETECT
> -#define COMPILE_MMX2
> -#endif
> -
> -#if (HAVE_AMD3DNOW_INLINE && !HAVE_MMXEXT_INLINE) || CONFIG_RUNTIME_CPUDETECT
> -#define COMPILE_3DNOW
> -#endif
> -#endif /* ARCH_X86 */
> -
> -#undef HAVE_MMX_INLINE
> -#define HAVE_MMX_INLINE 0
> -#undef HAVE_MMXEXT_INLINE
> -#define HAVE_MMXEXT_INLINE 0
> -#undef HAVE_AMD3DNOW_INLINE
> -#define HAVE_AMD3DNOW_INLINE 0
> -#undef HAVE_ALTIVEC
> -#define HAVE_ALTIVEC 0
> -
> -#ifdef COMPILE_C
> -#define RENAME(a) a ## _C
> +#define COMPILE_MMX 1
>  #include "postprocess_template.c"
>  #endif
>  
> -#ifdef COMPILE_ALTIVEC
> -#undef RENAME
> -#undef HAVE_ALTIVEC
> -#define HAVE_ALTIVEC 1
> -#define RENAME(a) a ## _altivec
> -#include "postprocess_altivec_template.c"
> -#include "postprocess_template.c"
> -#endif
> -
> -//MMX versions
> -#ifdef COMPILE_MMX
> -#undef RENAME
> -#undef HAVE_MMX_INLINE
> -#define HAVE_MMX_INLINE 1
> -#define RENAME(a) a ## _MMX
> -#include "postprocess_template.c"
> -#endif
> -
> -//MMX2 versions
> -#ifdef COMPILE_MMX2
> -#undef RENAME
> -#undef HAVE_MMX_INLINE
> -#undef HAVE_MMXEXT_INLINE
> -#define HAVE_MMX_INLINE 1
> -#define HAVE_MMXEXT_INLINE 1
> -#define RENAME(a) a ## _MMX2
> +#if HAVE_MMXEXT_INLINE || CONFIG_RUNTIME_CPUDETECT
> +#define COMPILE_MMXEXT 1
>  #include "postprocess_template.c"
>  #endif
>  
> -//3DNOW versions
> -#ifdef COMPILE_3DNOW
> -#undef RENAME
> -#undef HAVE_MMX_INLINE
> -#undef HAVE_MMXEXT_INLINE
> -#undef HAVE_AMD3DNOW_INLINE
> -#define HAVE_MMX_INLINE 1
> -#define HAVE_MMXEXT_INLINE 0
> -#define HAVE_AMD3DNOW_INLINE 1
> -#define RENAME(a) a ## _3DNow
> +#if (HAVE_AMD3DNOW_INLINE && !HAVE_MMXEXT_INLINE) || CONFIG_RUNTIME_CPUDETECT
> +#define COMPILE_3DNOW 1
>  #include "postprocess_template.c"
>  #endif
>  
> -// minor note: the HAVE_xyz is messed up after that line so do not use it.
> +#endif /* ARCH_X86 && HAVE_INLINE_ASM */
>  
>  static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
>          const QP_STORE_T QPs[], int QPStride, int isColor, pp_mode *vm, pp_context *vc)
> diff --git a/libpostproc/postprocess_altivec_template.c b/libpostproc/postprocess_altivec_template.c
> index 3a37562..fa6ebe2 100644
> --- a/libpostproc/postprocess_altivec_template.c
> +++ b/libpostproc/postprocess_altivec_template.c
> @@ -825,7 +825,7 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) {
>  #define doHorizDefFilter_altivec(a...) doHorizDefFilter_C(a)
>  #define do_a_deblock_altivec(a...) do_a_deblock_C(a)
>  
> -static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride,
> +static inline void tempNoiseReducer_altivec(uint8_t *src, int stride,
>                                              uint8_t *tempBlurred, uint32_t *tempBlurredPast, int *maxNoise)
>  {
>      const vector signed char neg1 = vec_splat_s8(-1);
> diff --git a/libpostproc/postprocess_template.c b/libpostproc/postprocess_template.c
> index f45ccf8..6611670 100644
> --- a/libpostproc/postprocess_template.c
> +++ b/libpostproc/postprocess_template.c
> @@ -25,37 +25,72 @@
>  
>  #include "libavutil/x86/asm.h"
>  
> +#ifdef COMPILE_C
> +# define RENAME(a) a ## _C
> +#else
> +# define COMPILE_C 0
> +#endif
> +

> +#ifdef COMPILE_ALTIVEC
> +# define COMPILE_ALTIVEC 1

this looks wrong


> +# define RENAME(a) a ## _altivec
> +#else
> +# define COMPILE_ALTIVEC 0
> +#endif
> +
> +#ifdef COMPILE_MMX
> +# define RENAME(a) a ## _MMX
> +#else
> +# define COMPILE_MMX 0
> +#endif
> +
> +#ifdef COMPILE_MMXEXT
> +# undef  COMPILE_MMX
> +# define COMPILE_MMX 1
> +# define RENAME(a) a ## _MMX2
> +#else
> +# define COMPILE_MMXEXT 0
> +#endif
> +
> +#ifdef COMPILE_3DNOW
> +# undef  COMPILE_MMX
> +# define COMPILE_MMX 1
> +# define RENAME(a) a ## _3DNow
> +#else
> +# define COMPILE_3DNOW 0
> +#endif
> +
>  #undef REAL_PAVGB
>  #undef PAVGB
>  #undef PMINUB
>  #undef PMAXUB
>  
> -#if   HAVE_MMXEXT_INLINE
> +#if   COMPILE_MMXEXT
>  #define REAL_PAVGB(a,b) "pavgb " #a ", " #b " \n\t"
> -#elif HAVE_AMD3DNOW_INLINE
> +#elif COMPILE_3DNOW
>  #define REAL_PAVGB(a,b) "pavgusb " #a ", " #b " \n\t"
>  #endif
>  #define PAVGB(a,b)  REAL_PAVGB(a,b)
>  
> -#if   HAVE_MMXEXT_INLINE
> +#if   COMPILE_MMXEXT
>  #define PMINUB(a,b,t) "pminub " #a ", " #b " \n\t"
> -#elif HAVE_MMX_INLINE
> +#elif COMPILE_MMX
>  #define PMINUB(b,a,t) \
>      "movq " #a ", " #t " \n\t"\
>      "psubusb " #b ", " #t " \n\t"\
>      "psubb " #t ", " #a " \n\t"
>  #endif
>  
> -#if   HAVE_MMXEXT_INLINE
> +#if   COMPILE_MMXEXT
>  #define PMAXUB(a,b) "pmaxub " #a ", " #b " \n\t"
> -#elif HAVE_MMX_INLINE
> +#elif COMPILE_MMX
>  #define PMAXUB(a,b) \
>      "psubusb " #a ", " #b " \n\t"\
>      "paddb " #a ", " #b " \n\t"
>  #endif
>  
>  //FIXME? |255-0| = 1 (should not be a problem ...)
> -#if HAVE_MMX_INLINE
> +#if COMPILE_MMX

These changes dont feel correct
For the template in its instance of compilation the question is
do i HAVE MMX/MMX2/3dnow/SSE on the target i get compiled for

or to say it differently
one could compile for MMX2 but not MMX in this case COMPILE_MMX2
would be set but not MMX. Yet the template should have its define
for MMX+MMX2 set. Aka i get COMPILED for MMX2 and HAVE MMX+MMX2

If you use COMPILE_* for this then COMPILE_* has a different semantic
meaning inside and outside of teh template, this appears bad to me.


[...]
-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Avoid a single point of failure, be that a person or equipment.
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 198 bytes
Desc: Digital signature
URL: <http://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20121115/d02946f5/attachment.asc>


More information about the ffmpeg-devel mailing list