FFmpeg
cavsdsp.c
Go to the documentation of this file.
1 /*
2  * Chinese AVS video (AVS1-P2, JiZhun profile) decoder.
3  * Copyright (c) 2006 Stefan Gehrer <stefan.gehrer@gmx.de>
4  *
5  * MMX-optimized DSP functions, based on H.264 optimizations by
6  * Michael Niedermayer and Loren Merritt
7  *
8  * This file is part of FFmpeg.
9  *
10  * FFmpeg is free software; you can redistribute it and/or
11  * modify it under the terms of the GNU Lesser General Public
12  * License as published by the Free Software Foundation; either
13  * version 2.1 of the License, or (at your option) any later version.
14  *
15  * FFmpeg is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18  * Lesser General Public License for more details.
19  *
20  * You should have received a copy of the GNU Lesser General Public
21  * License along with FFmpeg; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23  */
24 
25 #include "libavutil/attributes.h"
26 #include "libavutil/common.h"
27 #include "libavutil/cpu.h"
28 #include "libavutil/mem_internal.h"
29 #include "libavutil/x86/asm.h"
30 #include "libavutil/x86/cpu.h"
31 #include "libavcodec/cavsdsp.h"
32 #include "libavcodec/idctdsp.h"
33 #include "constants.h"
34 #include "fpel.h"
35 #include "idctdsp.h"
36 #include "config.h"
37 
38 
39 #if HAVE_MMX_EXTERNAL
40 
41 void ff_cavs_idct8_mmx(int16_t *out, const int16_t *in);
42 
43 static void cavs_idct8_add_mmx(uint8_t *dst, int16_t *block, ptrdiff_t stride)
44 {
45  LOCAL_ALIGNED(16, int16_t, b2, [64]);
46  ff_cavs_idct8_mmx(b2, block);
48 }
49 
50 void ff_cavs_idct8_sse2(int16_t *out, const int16_t *in);
51 
52 static void cavs_idct8_add_sse2(uint8_t *dst, int16_t *block, ptrdiff_t stride)
53 {
54  LOCAL_ALIGNED(16, int16_t, b2, [64]);
55  ff_cavs_idct8_sse2(b2, block);
57 }
58 
59 #endif /* HAVE_MMX_EXTERNAL */
60 
61 #if (HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE)
62 
63 /*****************************************************************************
64  *
65  * motion compensation
66  *
67  ****************************************************************************/
68 
69 /* vertical filter [-1 -2 96 42 -7 0] */
70 #define QPEL_CAVSV1(A,B,C,D,E,F,OP,ADD, MUL1, MUL2) \
71  "movd (%0), "#F" \n\t"\
72  "movq "#C", %%mm6 \n\t"\
73  "pmullw "MANGLE(MUL1)", %%mm6\n\t"\
74  "movq "#D", %%mm7 \n\t"\
75  "pmullw "MANGLE(MUL2)", %%mm7\n\t"\
76  "psllw $3, "#E" \n\t"\
77  "psubw "#E", %%mm6 \n\t"\
78  "psraw $3, "#E" \n\t"\
79  "paddw %%mm7, %%mm6 \n\t"\
80  "paddw "#E", %%mm6 \n\t"\
81  "paddw "#B", "#B" \n\t"\
82  "pxor %%mm7, %%mm7 \n\t"\
83  "add %2, %0 \n\t"\
84  "punpcklbw %%mm7, "#F" \n\t"\
85  "psubw "#B", %%mm6 \n\t"\
86  "psraw $1, "#B" \n\t"\
87  "psubw "#A", %%mm6 \n\t"\
88  "paddw "MANGLE(ADD)", %%mm6 \n\t"\
89  "psraw $7, %%mm6 \n\t"\
90  "packuswb %%mm6, %%mm6 \n\t"\
91  OP(%%mm6, (%1), A, d) \
92  "add %3, %1 \n\t"
93 
94 /* vertical filter [ 0 -1 5 5 -1 0] */
95 #define QPEL_CAVSV2(A,B,C,D,E,F,OP,ADD, MUL1, MUL2) \
96  "movd (%0), "#F" \n\t"\
97  "movq "#C", %%mm6 \n\t"\
98  "paddw "#D", %%mm6 \n\t"\
99  "pmullw "MANGLE(MUL1)", %%mm6\n\t"\
100  "add %2, %0 \n\t"\
101  "punpcklbw %%mm7, "#F" \n\t"\
102  "psubw "#B", %%mm6 \n\t"\
103  "psubw "#E", %%mm6 \n\t"\
104  "paddw "MANGLE(ADD)", %%mm6 \n\t"\
105  "psraw $3, %%mm6 \n\t"\
106  "packuswb %%mm6, %%mm6 \n\t"\
107  OP(%%mm6, (%1), A, d) \
108  "add %3, %1 \n\t"
109 
110 /* vertical filter [ 0 -7 42 96 -2 -1] */
111 #define QPEL_CAVSV3(A,B,C,D,E,F,OP,ADD, MUL1, MUL2) \
112  "movd (%0), "#F" \n\t"\
113  "movq "#C", %%mm6 \n\t"\
114  "pmullw "MANGLE(MUL2)", %%mm6\n\t"\
115  "movq "#D", %%mm7 \n\t"\
116  "pmullw "MANGLE(MUL1)", %%mm7\n\t"\
117  "psllw $3, "#B" \n\t"\
118  "psubw "#B", %%mm6 \n\t"\
119  "psraw $3, "#B" \n\t"\
120  "paddw %%mm7, %%mm6 \n\t"\
121  "paddw "#B", %%mm6 \n\t"\
122  "paddw "#E", "#E" \n\t"\
123  "pxor %%mm7, %%mm7 \n\t"\
124  "add %2, %0 \n\t"\
125  "punpcklbw %%mm7, "#F" \n\t"\
126  "psubw "#E", %%mm6 \n\t"\
127  "psraw $1, "#E" \n\t"\
128  "psubw "#F", %%mm6 \n\t"\
129  "paddw "MANGLE(ADD)", %%mm6 \n\t"\
130  "psraw $7, %%mm6 \n\t"\
131  "packuswb %%mm6, %%mm6 \n\t"\
132  OP(%%mm6, (%1), A, d) \
133  "add %3, %1 \n\t"
134 
135 
136 #define QPEL_CAVSVNUM(VOP,OP,ADD,MUL1,MUL2)\
137  int w= 2;\
138  src -= 2*srcStride;\
139  \
140  while(w--){\
141  __asm__ volatile(\
142  "pxor %%mm7, %%mm7 \n\t"\
143  "movd (%0), %%mm0 \n\t"\
144  "add %2, %0 \n\t"\
145  "movd (%0), %%mm1 \n\t"\
146  "add %2, %0 \n\t"\
147  "movd (%0), %%mm2 \n\t"\
148  "add %2, %0 \n\t"\
149  "movd (%0), %%mm3 \n\t"\
150  "add %2, %0 \n\t"\
151  "movd (%0), %%mm4 \n\t"\
152  "add %2, %0 \n\t"\
153  "punpcklbw %%mm7, %%mm0 \n\t"\
154  "punpcklbw %%mm7, %%mm1 \n\t"\
155  "punpcklbw %%mm7, %%mm2 \n\t"\
156  "punpcklbw %%mm7, %%mm3 \n\t"\
157  "punpcklbw %%mm7, %%mm4 \n\t"\
158  VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP, ADD, MUL1, MUL2)\
159  VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP, ADD, MUL1, MUL2)\
160  VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP, ADD, MUL1, MUL2)\
161  VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP, ADD, MUL1, MUL2)\
162  VOP(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP, ADD, MUL1, MUL2)\
163  VOP(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP, ADD, MUL1, MUL2)\
164  VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP, ADD, MUL1, MUL2)\
165  VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP, ADD, MUL1, MUL2)\
166  \
167  : "+a"(src), "+c"(dst)\
168  : "S"((x86_reg)srcStride), "r"((x86_reg)dstStride)\
169  NAMED_CONSTRAINTS_ADD(ADD,MUL1,MUL2)\
170  : "memory"\
171  );\
172  if(h==16){\
173  __asm__ volatile(\
174  VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP, ADD, MUL1, MUL2)\
175  VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP, ADD, MUL1, MUL2)\
176  VOP(%%mm4, %%mm5, %%mm0, %%mm1, %%mm2, %%mm3, OP, ADD, MUL1, MUL2)\
177  VOP(%%mm5, %%mm0, %%mm1, %%mm2, %%mm3, %%mm4, OP, ADD, MUL1, MUL2)\
178  VOP(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP, ADD, MUL1, MUL2)\
179  VOP(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP, ADD, MUL1, MUL2)\
180  VOP(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP, ADD, MUL1, MUL2)\
181  VOP(%%mm3, %%mm4, %%mm5, %%mm0, %%mm1, %%mm2, OP, ADD, MUL1, MUL2)\
182  \
183  : "+a"(src), "+c"(dst)\
184  : "S"((x86_reg)srcStride), "r"((x86_reg)dstStride)\
185  NAMED_CONSTRAINTS_ADD(ADD,MUL1,MUL2)\
186  : "memory"\
187  );\
188  }\
189  src += 4-(h+5)*srcStride;\
190  dst += 4-h*dstStride;\
191  }
192 
193 #define QPEL_CAVS(OPNAME, OP, MMX)\
194 static void OPNAME ## cavs_qpel8_h_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\
195 {\
196  int h=8;\
197  __asm__ volatile(\
198  "pxor %%mm7, %%mm7 \n\t"\
199  "movq "MANGLE(ff_pw_5)", %%mm6\n\t"\
200  "1: \n\t"\
201  "movq (%0), %%mm0 \n\t"\
202  "movq 1(%0), %%mm2 \n\t"\
203  "movq %%mm0, %%mm1 \n\t"\
204  "movq %%mm2, %%mm3 \n\t"\
205  "punpcklbw %%mm7, %%mm0 \n\t"\
206  "punpckhbw %%mm7, %%mm1 \n\t"\
207  "punpcklbw %%mm7, %%mm2 \n\t"\
208  "punpckhbw %%mm7, %%mm3 \n\t"\
209  "paddw %%mm2, %%mm0 \n\t"\
210  "paddw %%mm3, %%mm1 \n\t"\
211  "pmullw %%mm6, %%mm0 \n\t"\
212  "pmullw %%mm6, %%mm1 \n\t"\
213  "movq -1(%0), %%mm2 \n\t"\
214  "movq 2(%0), %%mm4 \n\t"\
215  "movq %%mm2, %%mm3 \n\t"\
216  "movq %%mm4, %%mm5 \n\t"\
217  "punpcklbw %%mm7, %%mm2 \n\t"\
218  "punpckhbw %%mm7, %%mm3 \n\t"\
219  "punpcklbw %%mm7, %%mm4 \n\t"\
220  "punpckhbw %%mm7, %%mm5 \n\t"\
221  "paddw %%mm4, %%mm2 \n\t"\
222  "paddw %%mm3, %%mm5 \n\t"\
223  "psubw %%mm2, %%mm0 \n\t"\
224  "psubw %%mm5, %%mm1 \n\t"\
225  "movq "MANGLE(ff_pw_4)", %%mm5\n\t"\
226  "paddw %%mm5, %%mm0 \n\t"\
227  "paddw %%mm5, %%mm1 \n\t"\
228  "psraw $3, %%mm0 \n\t"\
229  "psraw $3, %%mm1 \n\t"\
230  "packuswb %%mm1, %%mm0 \n\t"\
231  OP(%%mm0, (%1),%%mm5, q) \
232  "add %3, %0 \n\t"\
233  "add %4, %1 \n\t"\
234  "decl %2 \n\t"\
235  " jnz 1b \n\t"\
236  : "+a"(src), "+c"(dst), "+m"(h)\
237  : "d"((x86_reg)srcStride), "S"((x86_reg)dstStride)\
238  NAMED_CONSTRAINTS_ADD(ff_pw_4,ff_pw_5)\
239  : "memory"\
240  );\
241 }\
242 \
243 static inline void OPNAME ## cavs_qpel8or16_v1_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride, int h)\
244 { \
245  QPEL_CAVSVNUM(QPEL_CAVSV1,OP,ff_pw_64,ff_pw_96,ff_pw_42) \
246 }\
247 \
248 static inline void OPNAME ## cavs_qpel8or16_v2_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride, int h)\
249 { \
250  QPEL_CAVSVNUM(QPEL_CAVSV2,OP,ff_pw_4,ff_pw_5,ff_pw_42) \
251 }\
252 \
253 static inline void OPNAME ## cavs_qpel8or16_v3_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride, int h)\
254 { \
255  QPEL_CAVSVNUM(QPEL_CAVSV3,OP,ff_pw_64,ff_pw_96,ff_pw_42) \
256 }\
257 \
258 static void OPNAME ## cavs_qpel8_v1_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\
259 { \
260  OPNAME ## cavs_qpel8or16_v1_ ## MMX(dst , src , dstStride, srcStride, 8);\
261 }\
262 static void OPNAME ## cavs_qpel16_v1_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\
263 { \
264  OPNAME ## cavs_qpel8or16_v1_ ## MMX(dst , src , dstStride, srcStride, 16);\
265  OPNAME ## cavs_qpel8or16_v1_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\
266 }\
267 \
268 static void OPNAME ## cavs_qpel8_v2_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\
269 { \
270  OPNAME ## cavs_qpel8or16_v2_ ## MMX(dst , src , dstStride, srcStride, 8);\
271 }\
272 static void OPNAME ## cavs_qpel16_v2_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\
273 { \
274  OPNAME ## cavs_qpel8or16_v2_ ## MMX(dst , src , dstStride, srcStride, 16);\
275  OPNAME ## cavs_qpel8or16_v2_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\
276 }\
277 \
278 static void OPNAME ## cavs_qpel8_v3_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\
279 { \
280  OPNAME ## cavs_qpel8or16_v3_ ## MMX(dst , src , dstStride, srcStride, 8);\
281 }\
282 static void OPNAME ## cavs_qpel16_v3_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\
283 { \
284  OPNAME ## cavs_qpel8or16_v3_ ## MMX(dst , src , dstStride, srcStride, 16);\
285  OPNAME ## cavs_qpel8or16_v3_ ## MMX(dst+8, src+8, dstStride, srcStride, 16);\
286 }\
287 \
288 static void OPNAME ## cavs_qpel16_h_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t dstStride, ptrdiff_t srcStride)\
289 { \
290  OPNAME ## cavs_qpel8_h_ ## MMX(dst , src , dstStride, srcStride);\
291  OPNAME ## cavs_qpel8_h_ ## MMX(dst+8, src+8, dstStride, srcStride);\
292  src += 8*srcStride;\
293  dst += 8*dstStride;\
294  OPNAME ## cavs_qpel8_h_ ## MMX(dst , src , dstStride, srcStride);\
295  OPNAME ## cavs_qpel8_h_ ## MMX(dst+8, src+8, dstStride, srcStride);\
296 }\
297 
298 #define CAVS_MC(OPNAME, SIZE, MMX) \
299 static void OPNAME ## cavs_qpel ## SIZE ## _mc20_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
300 {\
301  OPNAME ## cavs_qpel ## SIZE ## _h_ ## MMX(dst, src, stride, stride);\
302 }\
303 \
304 static void OPNAME ## cavs_qpel ## SIZE ## _mc01_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
305 {\
306  OPNAME ## cavs_qpel ## SIZE ## _v1_ ## MMX(dst, src, stride, stride);\
307 }\
308 \
309 static void OPNAME ## cavs_qpel ## SIZE ## _mc02_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
310 {\
311  OPNAME ## cavs_qpel ## SIZE ## _v2_ ## MMX(dst, src, stride, stride);\
312 }\
313 \
314 static void OPNAME ## cavs_qpel ## SIZE ## _mc03_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
315 {\
316  OPNAME ## cavs_qpel ## SIZE ## _v3_ ## MMX(dst, src, stride, stride);\
317 }\
318 
319 #define PUT_OP(a,b,temp, size) "mov" #size " " #a ", " #b " \n\t"
320 #define AVG_3DNOW_OP(a,b,temp, size) \
321 "mov" #size " " #b ", " #temp " \n\t"\
322 "pavgusb " #temp ", " #a " \n\t"\
323 "mov" #size " " #a ", " #b " \n\t"
324 #define AVG_MMXEXT_OP(a, b, temp, size) \
325 "mov" #size " " #b ", " #temp " \n\t"\
326 "pavgb " #temp ", " #a " \n\t"\
327 "mov" #size " " #a ", " #b " \n\t"
328 
329 #endif /* (HAVE_MMXEXT_INLINE || HAVE_AMD3DNOW_INLINE) */
330 
331 #if HAVE_MMX_EXTERNAL
332 static void put_cavs_qpel8_mc00_mmx(uint8_t *dst, const uint8_t *src,
333  ptrdiff_t stride)
334 {
335  ff_put_pixels8_mmx(dst, src, stride, 8);
336 }
337 
338 static void avg_cavs_qpel8_mc00_mmx(uint8_t *dst, const uint8_t *src,
339  ptrdiff_t stride)
340 {
341  ff_avg_pixels8_mmx(dst, src, stride, 8);
342 }
343 
344 static void avg_cavs_qpel8_mc00_mmxext(uint8_t *dst, const uint8_t *src,
345  ptrdiff_t stride)
346 {
347  ff_avg_pixels8_mmxext(dst, src, stride, 8);
348 }
349 
350 static void put_cavs_qpel16_mc00_mmx(uint8_t *dst, const uint8_t *src,
351  ptrdiff_t stride)
352 {
353  ff_put_pixels16_mmx(dst, src, stride, 16);
354 }
355 
356 static void avg_cavs_qpel16_mc00_mmx(uint8_t *dst, const uint8_t *src,
357  ptrdiff_t stride)
358 {
359  ff_avg_pixels16_mmx(dst, src, stride, 16);
360 }
361 
362 static void avg_cavs_qpel16_mc00_mmxext(uint8_t *dst, const uint8_t *src,
363  ptrdiff_t stride)
364 {
365  ff_avg_pixels16_mmxext(dst, src, stride, 16);
366 }
367 
368 static void put_cavs_qpel16_mc00_sse2(uint8_t *dst, const uint8_t *src,
369  ptrdiff_t stride)
370 {
371  ff_put_pixels16_sse2(dst, src, stride, 16);
372 }
373 
374 static void avg_cavs_qpel16_mc00_sse2(uint8_t *dst, const uint8_t *src,
375  ptrdiff_t stride)
376 {
377  ff_avg_pixels16_sse2(dst, src, stride, 16);
378 }
379 #endif
380 
382  AVCodecContext *avctx)
383 {
384 #if HAVE_MMX_EXTERNAL
385  c->put_cavs_qpel_pixels_tab[0][0] = put_cavs_qpel16_mc00_mmx;
386  c->put_cavs_qpel_pixels_tab[1][0] = put_cavs_qpel8_mc00_mmx;
387  c->avg_cavs_qpel_pixels_tab[0][0] = avg_cavs_qpel16_mc00_mmx;
388  c->avg_cavs_qpel_pixels_tab[1][0] = avg_cavs_qpel8_mc00_mmx;
389 
390  c->cavs_idct8_add = cavs_idct8_add_mmx;
391  c->idct_perm = FF_IDCT_PERM_TRANSPOSE;
392 #endif /* HAVE_MMX_EXTERNAL */
393 }
394 
395 #define DSPFUNC(PFX, IDX, NUM, EXT) \
396  c->PFX ## _cavs_qpel_pixels_tab[IDX][ 2] = PFX ## _cavs_qpel ## NUM ## _mc20_ ## EXT; \
397  c->PFX ## _cavs_qpel_pixels_tab[IDX][ 4] = PFX ## _cavs_qpel ## NUM ## _mc01_ ## EXT; \
398  c->PFX ## _cavs_qpel_pixels_tab[IDX][ 8] = PFX ## _cavs_qpel ## NUM ## _mc02_ ## EXT; \
399  c->PFX ## _cavs_qpel_pixels_tab[IDX][12] = PFX ## _cavs_qpel ## NUM ## _mc03_ ## EXT; \
400 
401 #if HAVE_MMXEXT_INLINE
402 QPEL_CAVS(put_, PUT_OP, mmxext)
403 QPEL_CAVS(avg_, AVG_MMXEXT_OP, mmxext)
404 
405 CAVS_MC(put_, 8, mmxext)
406 CAVS_MC(put_, 16, mmxext)
407 CAVS_MC(avg_, 8, mmxext)
408 CAVS_MC(avg_, 16, mmxext)
409 #endif /* HAVE_MMXEXT_INLINE */
410 
411 #if HAVE_AMD3DNOW_INLINE
412 QPEL_CAVS(put_, PUT_OP, 3dnow)
413 QPEL_CAVS(avg_, AVG_3DNOW_OP, 3dnow)
414 
415 CAVS_MC(put_, 8, 3dnow)
416 CAVS_MC(put_, 16,3dnow)
417 CAVS_MC(avg_, 8, 3dnow)
418 CAVS_MC(avg_, 16,3dnow)
419 
420 static av_cold void cavsdsp_init_3dnow(CAVSDSPContext *c,
421  AVCodecContext *avctx)
422 {
423  DSPFUNC(put, 0, 16, 3dnow);
424  DSPFUNC(put, 1, 8, 3dnow);
425  DSPFUNC(avg, 0, 16, 3dnow);
426  DSPFUNC(avg, 1, 8, 3dnow);
427 }
428 #endif /* HAVE_AMD3DNOW_INLINE */
429 
431 {
433 
434  if (X86_MMX(cpu_flags))
435  cavsdsp_init_mmx(c, avctx);
436 
437 #if HAVE_AMD3DNOW_INLINE
439  cavsdsp_init_3dnow(c, avctx);
440 #endif /* HAVE_AMD3DNOW_INLINE */
441 #if HAVE_MMXEXT_INLINE
442  if (INLINE_MMXEXT(cpu_flags)) {
443  DSPFUNC(put, 0, 16, mmxext);
444  DSPFUNC(put, 1, 8, mmxext);
445  DSPFUNC(avg, 0, 16, mmxext);
446  DSPFUNC(avg, 1, 8, mmxext);
447  }
448 #endif
449 #if HAVE_MMX_EXTERNAL
450  if (EXTERNAL_MMXEXT(cpu_flags)) {
451  c->avg_cavs_qpel_pixels_tab[0][0] = avg_cavs_qpel16_mc00_mmxext;
452  c->avg_cavs_qpel_pixels_tab[1][0] = avg_cavs_qpel8_mc00_mmxext;
453  }
454 #endif
455 #if HAVE_SSE2_EXTERNAL
456  if (EXTERNAL_SSE2(cpu_flags)) {
457  c->put_cavs_qpel_pixels_tab[0][0] = put_cavs_qpel16_mc00_sse2;
458  c->avg_cavs_qpel_pixels_tab[0][0] = avg_cavs_qpel16_mc00_sse2;
459 
460  c->cavs_idct8_add = cavs_idct8_add_sse2;
461  c->idct_perm = FF_IDCT_PERM_TRANSPOSE;
462  }
463 #endif
464 }
ff_put_pixels8_mmx
void ff_put_pixels8_mmx(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
ff_put_pixels16_mmx
void ff_put_pixels16_mmx(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
stride
int stride
Definition: mace.c:144
cpu.h
mem_internal.h
out
FILE * out
Definition: movenc.c:54
av_unused
#define av_unused
Definition: attributes.h:131
ff_avg_pixels8_mmx
void ff_avg_pixels8_mmx(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
cavsdsp_init_mmx
static av_cold void cavsdsp_init_mmx(CAVSDSPContext *c, AVCodecContext *avctx)
Definition: cavsdsp.c:381
av_get_cpu_flags
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:95
cpu_flags
static atomic_int cpu_flags
Definition: cpu.c:50
ff_cavsdsp_init_x86
av_cold void ff_cavsdsp_init_x86(CAVSDSPContext *c, AVCodecContext *avctx)
Definition: cavsdsp.c:430
CAVSDSPContext
Definition: cavsdsp.h:30
av_cold
#define av_cold
Definition: attributes.h:90
LOCAL_ALIGNED
#define LOCAL_ALIGNED(a, t, v,...)
Definition: mem_internal.h:113
ff_avg_pixels8_mmxext
void ff_avg_pixels8_mmxext(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
ff_put_pixels16_sse2
void ff_put_pixels16_sse2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
ff_add_pixels_clamped_mmx
void ff_add_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, ptrdiff_t line_size)
ff_add_pixels_clamped_sse2
void ff_add_pixels_clamped_sse2(const int16_t *block, uint8_t *pixels, ptrdiff_t line_size)
src
#define src
Definition: vp8dsp.c:255
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
constants.h
ff_avg_pixels16_mmx
void ff_avg_pixels16_mmx(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
CAVS_MC
#define CAVS_MC(OPNAME, SIZE)
Definition: cavsdsp.c:439
ff_avg_pixels16_sse2
void ff_avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
cpu.h
ff_avg_pixels16_mmxext
void ff_avg_pixels16_mmxext(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
asm.h
avg
#define avg(a, b, c, d)
Definition: colorspacedsp_template.c:28
b2
static double b2(void *priv, double x, double y)
Definition: vf_xfade.c:1666
attributes.h
EXTERNAL_SSE2
#define EXTERNAL_SSE2(flags)
Definition: cpu.h:59
in
uint8_t pi<< 24) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi - 0x80) *(1.0f/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_U8, uint8_t,(*(const uint8_t *) pi - 0x80) *(1.0/(1<< 7))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S16, int16_t,(*(const int16_t *) pi >> 8)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S16, int16_t, *(const int16_t *) pi *(1.0f/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S16, int16_t, *(const int16_t *) pi *(1.0/(1<< 15))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_S32, int32_t,(*(const int32_t *) pi >> 24)+0x80) CONV_FUNC_GROUP(AV_SAMPLE_FMT_FLT, float, AV_SAMPLE_FMT_S32, int32_t, *(const int32_t *) pi *(1.0f/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_DBL, double, AV_SAMPLE_FMT_S32, int32_t, *(const int32_t *) pi *(1.0/(1U<< 31))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_FLT, float, av_clip_uint8(lrintf(*(const float *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, float, av_clip_int16(lrintf(*(const float *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_FLT, float, av_clipl_int32(llrintf(*(const float *) pi *(1U<< 31)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_U8, uint8_t, AV_SAMPLE_FMT_DBL, double, av_clip_uint8(lrint(*(const double *) pi *(1<< 7))+0x80)) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_DBL, double, av_clip_int16(lrint(*(const double *) pi *(1<< 15)))) CONV_FUNC_GROUP(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_DBL, double, av_clipl_int32(llrint(*(const double *) pi *(1U<< 31)))) #define SET_CONV_FUNC_GROUP(ofmt, ifmt) static void set_generic_function(AudioConvert *ac) { } void ff_audio_convert_free(AudioConvert **ac) { if(! *ac) return;ff_dither_free(&(*ac) ->dc);av_freep(ac);} AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, enum AVSampleFormat out_fmt, enum AVSampleFormat in_fmt, int channels, int sample_rate, int apply_map) { AudioConvert *ac;int in_planar, out_planar;ac=av_mallocz(sizeof(*ac));if(!ac) return NULL;ac->avr=avr;ac->out_fmt=out_fmt;ac->in_fmt=in_fmt;ac->channels=channels;ac->apply_map=apply_map;if(avr->dither_method !=AV_RESAMPLE_DITHER_NONE &&av_get_packed_sample_fmt(out_fmt)==AV_SAMPLE_FMT_S16 &&av_get_bytes_per_sample(in_fmt) > 2) { ac->dc=ff_dither_alloc(avr, out_fmt, in_fmt, channels, sample_rate, apply_map);if(!ac->dc) { av_free(ac);return NULL;} return ac;} in_planar=ff_sample_fmt_is_planar(in_fmt, channels);out_planar=ff_sample_fmt_is_planar(out_fmt, channels);if(in_planar==out_planar) { ac->func_type=CONV_FUNC_TYPE_FLAT;ac->planes=in_planar ? ac->channels :1;} else if(in_planar) ac->func_type=CONV_FUNC_TYPE_INTERLEAVE;else ac->func_type=CONV_FUNC_TYPE_DEINTERLEAVE;set_generic_function(ac);if(ARCH_AARCH64) ff_audio_convert_init_aarch64(ac);if(ARCH_ARM) ff_audio_convert_init_arm(ac);if(ARCH_X86) ff_audio_convert_init_x86(ac);return ac;} int ff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in) { int use_generic=1;int len=in->nb_samples;int p;if(ac->dc) { av_log(ac->avr, AV_LOG_TRACE, "%d samples - audio_convert: %s to %s (dithered)\n", len, av_get_sample_fmt_name(ac->in_fmt), av_get_sample_fmt_name(ac->out_fmt));return ff_convert_dither(ac-> in
Definition: audio_convert.c:326
common.h
uint8_t
uint8_t
Definition: audio_convert.c:194
idctdsp.h
FF_IDCT_PERM_TRANSPOSE
@ FF_IDCT_PERM_TRANSPOSE
Definition: idctdsp.h:41
DSPFUNC
#define DSPFUNC(PFX, IDX, NUM, EXT)
Definition: cavsdsp.c:395
AVCodecContext
main external API structure.
Definition: avcodec.h:536
fpel.h
INLINE_MMXEXT
#define INLINE_MMXEXT(flags)
Definition: cpu.h:87
X86_MMX
#define X86_MMX(flags)
Definition: cpu.h:30
block
The exact code depends on how similar the blocks are and how related they are to the block
Definition: filter_design.txt:207
INLINE_AMD3DNOW
#define INLINE_AMD3DNOW(flags)
Definition: cpu.h:84
EXTERNAL_MMXEXT
#define EXTERNAL_MMXEXT(flags)
Definition: cpu.h:57
cavsdsp.h