FFmpeg
fdctdsp.c
Go to the documentation of this file.
1 /*
2  * Copyright (C) 2003 James Klicman <james@klicman.org>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include "config.h"
22 
23 #include "libavutil/attributes.h"
24 #include "libavutil/cpu.h"
25 #include "libavutil/ppc/cpu.h"
27 
28 #include "libavcodec/avcodec.h"
29 #include "libavcodec/fdctdsp.h"
30 
31 #include "fdct.h"
32 
33 #if HAVE_ALTIVEC
34 
35 #define vs16(v) ((vector signed short) (v))
36 #define vs32(v) ((vector signed int) (v))
37 #define vu8(v) ((vector unsigned char) (v))
38 #define vu16(v) ((vector unsigned short) (v))
39 #define vu32(v) ((vector unsigned int) (v))
40 
41 #define C1 0.98078528040323044912618224 /* cos(1 * PI / 16) */
42 #define C2 0.92387953251128675612818319 /* cos(2 * PI / 16) */
43 #define C3 0.83146961230254523707878838 /* cos(3 * PI / 16) */
44 #define C4 0.70710678118654752440084436 /* cos(4 * PI / 16) */
45 #define C5 0.55557023301960222474283081 /* cos(5 * PI / 16) */
46 #define C6 0.38268343236508977172845998 /* cos(6 * PI / 16) */
47 #define C7 0.19509032201612826784828487 /* cos(7 * PI / 16) */
48 
49 #define W0 -(2 * C2)
50 #define W1 (2 * C6)
51 #define W2 (M_SQRT2 * C6)
52 #define W3 (M_SQRT2 * C3)
53 #define W4 (M_SQRT2 * (-C1 + C3 + C5 - C7))
54 #define W5 (M_SQRT2 * (C1 + C3 - C5 + C7))
55 #define W6 (M_SQRT2 * (C1 + C3 + C5 - C7))
56 #define W7 (M_SQRT2 * (C1 + C3 - C5 - C7))
57 #define W8 (M_SQRT2 * (C7 - C3))
58 #define W9 (M_SQRT2 * (-C1 - C3))
59 #define WA (M_SQRT2 * (-C3 - C5))
60 #define WB (M_SQRT2 * (C5 - C3))
61 
62 static const vector float fdctconsts[3] = {
63  { W0, W1, W2, W3 },
64  { W4, W5, W6, W7 },
65  { W8, W9, WA, WB }
66 };
67 
68 #define LD_W0 vec_splat(cnsts0, 0)
69 #define LD_W1 vec_splat(cnsts0, 1)
70 #define LD_W2 vec_splat(cnsts0, 2)
71 #define LD_W3 vec_splat(cnsts0, 3)
72 #define LD_W4 vec_splat(cnsts1, 0)
73 #define LD_W5 vec_splat(cnsts1, 1)
74 #define LD_W6 vec_splat(cnsts1, 2)
75 #define LD_W7 vec_splat(cnsts1, 3)
76 #define LD_W8 vec_splat(cnsts2, 0)
77 #define LD_W9 vec_splat(cnsts2, 1)
78 #define LD_WA vec_splat(cnsts2, 2)
79 #define LD_WB vec_splat(cnsts2, 3)
80 
81 #define FDCTROW(b0, b1, b2, b3, b4, b5, b6, b7) /* {{{ */ \
82  x0 = vec_add(b0, b7); /* x0 = b0 + b7; */ \
83  x7 = vec_sub(b0, b7); /* x7 = b0 - b7; */ \
84  x1 = vec_add(b1, b6); /* x1 = b1 + b6; */ \
85  x6 = vec_sub(b1, b6); /* x6 = b1 - b6; */ \
86  x2 = vec_add(b2, b5); /* x2 = b2 + b5; */ \
87  x5 = vec_sub(b2, b5); /* x5 = b2 - b5; */ \
88  x3 = vec_add(b3, b4); /* x3 = b3 + b4; */ \
89  x4 = vec_sub(b3, b4); /* x4 = b3 - b4; */ \
90  \
91  b7 = vec_add(x0, x3); /* b7 = x0 + x3; */ \
92  b1 = vec_add(x1, x2); /* b1 = x1 + x2; */ \
93  b0 = vec_add(b7, b1); /* b0 = b7 + b1; */ \
94  b4 = vec_sub(b7, b1); /* b4 = b7 - b1; */ \
95  \
96  b2 = vec_sub(x0, x3); /* b2 = x0 - x3; */ \
97  b6 = vec_sub(x1, x2); /* b6 = x1 - x2; */ \
98  b5 = vec_add(b6, b2); /* b5 = b6 + b2; */ \
99  cnst = LD_W2; \
100  b5 = vec_madd(cnst, b5, mzero); /* b5 = b5 * W2; */ \
101  cnst = LD_W1; \
102  b2 = vec_madd(cnst, b2, b5); /* b2 = b5 + b2 * W1; */ \
103  cnst = LD_W0; \
104  b6 = vec_madd(cnst, b6, b5); /* b6 = b5 + b6 * W0; */ \
105  \
106  x0 = vec_add(x4, x7); /* x0 = x4 + x7; */ \
107  x1 = vec_add(x5, x6); /* x1 = x5 + x6; */ \
108  x2 = vec_add(x4, x6); /* x2 = x4 + x6; */ \
109  x3 = vec_add(x5, x7); /* x3 = x5 + x7; */ \
110  x8 = vec_add(x2, x3); /* x8 = x2 + x3; */ \
111  cnst = LD_W3; \
112  x8 = vec_madd(cnst, x8, mzero); /* x8 = x8 * W3; */ \
113  \
114  cnst = LD_W8; \
115  x0 = vec_madd(cnst, x0, mzero); /* x0 *= W8; */ \
116  cnst = LD_W9; \
117  x1 = vec_madd(cnst, x1, mzero); /* x1 *= W9; */ \
118  cnst = LD_WA; \
119  x2 = vec_madd(cnst, x2, x8); /* x2 = x2 * WA + x8; */ \
120  cnst = LD_WB; \
121  x3 = vec_madd(cnst, x3, x8); /* x3 = x3 * WB + x8; */ \
122  \
123  cnst = LD_W4; \
124  b7 = vec_madd(cnst, x4, x0); /* b7 = x4 * W4 + x0; */ \
125  cnst = LD_W5; \
126  b5 = vec_madd(cnst, x5, x1); /* b5 = x5 * W5 + x1; */ \
127  cnst = LD_W6; \
128  b3 = vec_madd(cnst, x6, x1); /* b3 = x6 * W6 + x1; */ \
129  cnst = LD_W7; \
130  b1 = vec_madd(cnst, x7, x0); /* b1 = x7 * W7 + x0; */ \
131  \
132  b7 = vec_add(b7, x2); /* b7 = b7 + x2; */ \
133  b5 = vec_add(b5, x3); /* b5 = b5 + x3; */ \
134  b3 = vec_add(b3, x2); /* b3 = b3 + x2; */ \
135  b1 = vec_add(b1, x3) /* b1 = b1 + x3; */ \
136  /* }}} */
137 
138 #define FDCTCOL(b0, b1, b2, b3, b4, b5, b6, b7) /* {{{ */ \
139  x0 = vec_add(b0, b7); /* x0 = b0 + b7; */ \
140  x7 = vec_sub(b0, b7); /* x7 = b0 - b7; */ \
141  x1 = vec_add(b1, b6); /* x1 = b1 + b6; */ \
142  x6 = vec_sub(b1, b6); /* x6 = b1 - b6; */ \
143  x2 = vec_add(b2, b5); /* x2 = b2 + b5; */ \
144  x5 = vec_sub(b2, b5); /* x5 = b2 - b5; */ \
145  x3 = vec_add(b3, b4); /* x3 = b3 + b4; */ \
146  x4 = vec_sub(b3, b4); /* x4 = b3 - b4; */ \
147  \
148  b7 = vec_add(x0, x3); /* b7 = x0 + x3; */ \
149  b1 = vec_add(x1, x2); /* b1 = x1 + x2; */ \
150  b0 = vec_add(b7, b1); /* b0 = b7 + b1; */ \
151  b4 = vec_sub(b7, b1); /* b4 = b7 - b1; */ \
152  \
153  b2 = vec_sub(x0, x3); /* b2 = x0 - x3; */ \
154  b6 = vec_sub(x1, x2); /* b6 = x1 - x2; */ \
155  b5 = vec_add(b6, b2); /* b5 = b6 + b2; */ \
156  cnst = LD_W2; \
157  b5 = vec_madd(cnst, b5, mzero); /* b5 = b5 * W2; */ \
158  cnst = LD_W1; \
159  b2 = vec_madd(cnst, b2, b5); /* b2 = b5 + b2 * W1; */ \
160  cnst = LD_W0; \
161  b6 = vec_madd(cnst, b6, b5); /* b6 = b5 + b6 * W0; */ \
162  \
163  x0 = vec_add(x4, x7); /* x0 = x4 + x7; */ \
164  x1 = vec_add(x5, x6); /* x1 = x5 + x6; */ \
165  x2 = vec_add(x4, x6); /* x2 = x4 + x6; */ \
166  x3 = vec_add(x5, x7); /* x3 = x5 + x7; */ \
167  x8 = vec_add(x2, x3); /* x8 = x2 + x3; */ \
168  cnst = LD_W3; \
169  x8 = vec_madd(cnst, x8, mzero); /* x8 = x8 * W3; */ \
170  \
171  cnst = LD_W8; \
172  x0 = vec_madd(cnst, x0, mzero); /* x0 *= W8; */ \
173  cnst = LD_W9; \
174  x1 = vec_madd(cnst, x1, mzero); /* x1 *= W9; */ \
175  cnst = LD_WA; \
176  x2 = vec_madd(cnst, x2, x8); /* x2 = x2 * WA + x8; */ \
177  cnst = LD_WB; \
178  x3 = vec_madd(cnst, x3, x8); /* x3 = x3 * WB + x8; */ \
179  \
180  cnst = LD_W4; \
181  b7 = vec_madd(cnst, x4, x0); /* b7 = x4 * W4 + x0; */ \
182  cnst = LD_W5; \
183  b5 = vec_madd(cnst, x5, x1); /* b5 = x5 * W5 + x1; */ \
184  cnst = LD_W6; \
185  b3 = vec_madd(cnst, x6, x1); /* b3 = x6 * W6 + x1; */ \
186  cnst = LD_W7; \
187  b1 = vec_madd(cnst, x7, x0); /* b1 = x7 * W7 + x0; */ \
188  \
189  b7 = vec_add(b7, x2); /* b7 += x2; */ \
190  b5 = vec_add(b5, x3); /* b5 += x3; */ \
191  b3 = vec_add(b3, x2); /* b3 += x2; */ \
192  b1 = vec_add(b1, x3) /* b1 += x3; */ \
193  /* }}} */
194 
195 /* two dimensional discrete cosine transform */
196 void ff_fdct_altivec(int16_t *block)
197 {
198  vector signed short *bp;
199  const vector float *cp = fdctconsts;
200  vector float b00, b10, b20, b30, b40, b50, b60, b70;
201  vector float b01, b11, b21, b31, b41, b51, b61, b71;
202  vector float mzero, cnst, cnsts0, cnsts1, cnsts2;
203  vector float x0, x1, x2, x3, x4, x5, x6, x7, x8;
204 
205  /* setup constants {{{ */
206  /* mzero = -0.0 */
207  mzero = ((vector float) vec_splat_u32(-1));
208  mzero = ((vector float) vec_sl(vu32(mzero), vu32(mzero)));
209  cnsts0 = vec_ld(0, cp);
210  cp++;
211  cnsts1 = vec_ld(0, cp);
212  cp++;
213  cnsts2 = vec_ld(0, cp);
214  /* }}} */
215 
216  /* 8x8 matrix transpose (vector short[8]) {{{ */
217 #define MERGE_S16(hl, a, b) vec_merge ## hl(vs16(a), vs16(b))
218 
219  bp = (vector signed short *) block;
220  b00 = ((vector float) vec_ld(0, bp));
221  b40 = ((vector float) vec_ld(16 * 4, bp));
222  b01 = ((vector float) MERGE_S16(h, b00, b40));
223  b11 = ((vector float) MERGE_S16(l, b00, b40));
224  bp++;
225  b10 = ((vector float) vec_ld(0, bp));
226  b50 = ((vector float) vec_ld(16 * 4, bp));
227  b21 = ((vector float) MERGE_S16(h, b10, b50));
228  b31 = ((vector float) MERGE_S16(l, b10, b50));
229  bp++;
230  b20 = ((vector float) vec_ld(0, bp));
231  b60 = ((vector float) vec_ld(16 * 4, bp));
232  b41 = ((vector float) MERGE_S16(h, b20, b60));
233  b51 = ((vector float) MERGE_S16(l, b20, b60));
234  bp++;
235  b30 = ((vector float) vec_ld(0, bp));
236  b70 = ((vector float) vec_ld(16 * 4, bp));
237  b61 = ((vector float) MERGE_S16(h, b30, b70));
238  b71 = ((vector float) MERGE_S16(l, b30, b70));
239 
240  x0 = ((vector float) MERGE_S16(h, b01, b41));
241  x1 = ((vector float) MERGE_S16(l, b01, b41));
242  x2 = ((vector float) MERGE_S16(h, b11, b51));
243  x3 = ((vector float) MERGE_S16(l, b11, b51));
244  x4 = ((vector float) MERGE_S16(h, b21, b61));
245  x5 = ((vector float) MERGE_S16(l, b21, b61));
246  x6 = ((vector float) MERGE_S16(h, b31, b71));
247  x7 = ((vector float) MERGE_S16(l, b31, b71));
248 
249  b00 = ((vector float) MERGE_S16(h, x0, x4));
250  b10 = ((vector float) MERGE_S16(l, x0, x4));
251  b20 = ((vector float) MERGE_S16(h, x1, x5));
252  b30 = ((vector float) MERGE_S16(l, x1, x5));
253  b40 = ((vector float) MERGE_S16(h, x2, x6));
254  b50 = ((vector float) MERGE_S16(l, x2, x6));
255  b60 = ((vector float) MERGE_S16(h, x3, x7));
256  b70 = ((vector float) MERGE_S16(l, x3, x7));
257 
258 #undef MERGE_S16
259  /* }}} */
260 
261  /* Some of the initial calculations can be done as vector short
262  * before conversion to vector float. The following code section
263  * takes advantage of this. */
264 
265  /* fdct rows {{{ */
266  x0 = ((vector float) vec_add(vs16(b00), vs16(b70)));
267  x7 = ((vector float) vec_sub(vs16(b00), vs16(b70)));
268  x1 = ((vector float) vec_add(vs16(b10), vs16(b60)));
269  x6 = ((vector float) vec_sub(vs16(b10), vs16(b60)));
270  x2 = ((vector float) vec_add(vs16(b20), vs16(b50)));
271  x5 = ((vector float) vec_sub(vs16(b20), vs16(b50)));
272  x3 = ((vector float) vec_add(vs16(b30), vs16(b40)));
273  x4 = ((vector float) vec_sub(vs16(b30), vs16(b40)));
274 
275  b70 = ((vector float) vec_add(vs16(x0), vs16(x3)));
276  b10 = ((vector float) vec_add(vs16(x1), vs16(x2)));
277 
278  b00 = ((vector float) vec_add(vs16(b70), vs16(b10)));
279  b40 = ((vector float) vec_sub(vs16(b70), vs16(b10)));
280 
281 #define CTF0(n) \
282  b ## n ## 1 = ((vector float) vec_unpackl(vs16(b ## n ## 0))); \
283  b ## n ## 0 = ((vector float) vec_unpackh(vs16(b ## n ## 0))); \
284  b ## n ## 1 = vec_ctf(vs32(b ## n ## 1), 0); \
285  b ## n ## 0 = vec_ctf(vs32(b ## n ## 0), 0)
286 
287  CTF0(0);
288  CTF0(4);
289 
290  b20 = ((vector float) vec_sub(vs16(x0), vs16(x3)));
291  b60 = ((vector float) vec_sub(vs16(x1), vs16(x2)));
292 
293  CTF0(2);
294  CTF0(6);
295 
296 #undef CTF0
297 
298  x0 = vec_add(b60, b20);
299  x1 = vec_add(b61, b21);
300 
301  cnst = LD_W2;
302  x0 = vec_madd(cnst, x0, mzero);
303  x1 = vec_madd(cnst, x1, mzero);
304  cnst = LD_W1;
305  b20 = vec_madd(cnst, b20, x0);
306  b21 = vec_madd(cnst, b21, x1);
307  cnst = LD_W0;
308  b60 = vec_madd(cnst, b60, x0);
309  b61 = vec_madd(cnst, b61, x1);
310 
311 #define CTFX(x, b) \
312  b ## 0 = ((vector float) vec_unpackh(vs16(x))); \
313  b ## 1 = ((vector float) vec_unpackl(vs16(x))); \
314  b ## 0 = vec_ctf(vs32(b ## 0), 0); \
315  b ## 1 = vec_ctf(vs32(b ## 1), 0)
316 
317  CTFX(x4, b7);
318  CTFX(x5, b5);
319  CTFX(x6, b3);
320  CTFX(x7, b1);
321 
322 #undef CTFX
323 
324  x0 = vec_add(b70, b10);
325  x1 = vec_add(b50, b30);
326  x2 = vec_add(b70, b30);
327  x3 = vec_add(b50, b10);
328  x8 = vec_add(x2, x3);
329  cnst = LD_W3;
330  x8 = vec_madd(cnst, x8, mzero);
331 
332  cnst = LD_W8;
333  x0 = vec_madd(cnst, x0, mzero);
334  cnst = LD_W9;
335  x1 = vec_madd(cnst, x1, mzero);
336  cnst = LD_WA;
337  x2 = vec_madd(cnst, x2, x8);
338  cnst = LD_WB;
339  x3 = vec_madd(cnst, x3, x8);
340 
341  cnst = LD_W4;
342  b70 = vec_madd(cnst, b70, x0);
343  cnst = LD_W5;
344  b50 = vec_madd(cnst, b50, x1);
345  cnst = LD_W6;
346  b30 = vec_madd(cnst, b30, x1);
347  cnst = LD_W7;
348  b10 = vec_madd(cnst, b10, x0);
349 
350  b70 = vec_add(b70, x2);
351  b50 = vec_add(b50, x3);
352  b30 = vec_add(b30, x2);
353  b10 = vec_add(b10, x3);
354 
355  x0 = vec_add(b71, b11);
356  x1 = vec_add(b51, b31);
357  x2 = vec_add(b71, b31);
358  x3 = vec_add(b51, b11);
359  x8 = vec_add(x2, x3);
360  cnst = LD_W3;
361  x8 = vec_madd(cnst, x8, mzero);
362 
363  cnst = LD_W8;
364  x0 = vec_madd(cnst, x0, mzero);
365  cnst = LD_W9;
366  x1 = vec_madd(cnst, x1, mzero);
367  cnst = LD_WA;
368  x2 = vec_madd(cnst, x2, x8);
369  cnst = LD_WB;
370  x3 = vec_madd(cnst, x3, x8);
371 
372  cnst = LD_W4;
373  b71 = vec_madd(cnst, b71, x0);
374  cnst = LD_W5;
375  b51 = vec_madd(cnst, b51, x1);
376  cnst = LD_W6;
377  b31 = vec_madd(cnst, b31, x1);
378  cnst = LD_W7;
379  b11 = vec_madd(cnst, b11, x0);
380 
381  b71 = vec_add(b71, x2);
382  b51 = vec_add(b51, x3);
383  b31 = vec_add(b31, x2);
384  b11 = vec_add(b11, x3);
385  /* }}} */
386 
387  /* 8x8 matrix transpose (vector float[8][2]) {{{ */
388  x0 = vec_mergel(b00, b20);
389  x1 = vec_mergeh(b00, b20);
390  x2 = vec_mergel(b10, b30);
391  x3 = vec_mergeh(b10, b30);
392 
393  b00 = vec_mergeh(x1, x3);
394  b10 = vec_mergel(x1, x3);
395  b20 = vec_mergeh(x0, x2);
396  b30 = vec_mergel(x0, x2);
397 
398  x4 = vec_mergel(b41, b61);
399  x5 = vec_mergeh(b41, b61);
400  x6 = vec_mergel(b51, b71);
401  x7 = vec_mergeh(b51, b71);
402 
403  b41 = vec_mergeh(x5, x7);
404  b51 = vec_mergel(x5, x7);
405  b61 = vec_mergeh(x4, x6);
406  b71 = vec_mergel(x4, x6);
407 
408  x0 = vec_mergel(b01, b21);
409  x1 = vec_mergeh(b01, b21);
410  x2 = vec_mergel(b11, b31);
411  x3 = vec_mergeh(b11, b31);
412 
413  x4 = vec_mergel(b40, b60);
414  x5 = vec_mergeh(b40, b60);
415  x6 = vec_mergel(b50, b70);
416  x7 = vec_mergeh(b50, b70);
417 
418  b40 = vec_mergeh(x1, x3);
419  b50 = vec_mergel(x1, x3);
420  b60 = vec_mergeh(x0, x2);
421  b70 = vec_mergel(x0, x2);
422 
423  b01 = vec_mergeh(x5, x7);
424  b11 = vec_mergel(x5, x7);
425  b21 = vec_mergeh(x4, x6);
426  b31 = vec_mergel(x4, x6);
427  /* }}} */
428 
429  FDCTCOL(b00, b10, b20, b30, b40, b50, b60, b70);
430  FDCTCOL(b01, b11, b21, b31, b41, b51, b61, b71);
431 
432  /* round, convert back to short {{{ */
433 #define CTS(n) \
434  b ## n ## 0 = vec_round(b ## n ## 0); \
435  b ## n ## 1 = vec_round(b ## n ## 1); \
436  b ## n ## 0 = ((vector float) vec_cts(b ## n ## 0, 0)); \
437  b ## n ## 1 = ((vector float) vec_cts(b ## n ## 1, 0)); \
438  b ## n ## 0 = ((vector float) vec_pack(vs32(b ## n ## 0), \
439  vs32(b ## n ## 1))); \
440  vec_st(vs16(b ## n ## 0), 0, bp)
441 
442  bp = (vector signed short *) block;
443  CTS(0);
444  bp++;
445  CTS(1);
446  bp++;
447  CTS(2);
448  bp++;
449  CTS(3);
450  bp++;
451  CTS(4);
452  bp++;
453  CTS(5);
454  bp++;
455  CTS(6);
456  bp++;
457  CTS(7);
458 
459 #undef CTS
460  /* }}} */
461 }
462 
463 #endif /* HAVE_ALTIVEC */
464 
466  unsigned high_bit_depth)
467 {
468 #if HAVE_ALTIVEC
470  return;
471 
472  if (!high_bit_depth) {
473  if (avctx->dct_algo == FF_DCT_AUTO ||
474  avctx->dct_algo == FF_DCT_ALTIVEC) {
475  c->fdct = ff_fdct_altivec;
476  }
477  }
478 #endif /* HAVE_ALTIVEC */
479 }
WA
static const int WA[80]
Definition: ripemd.c:75
W4
#define W4
Definition: simple_idct_mmi.c:34
W1
#define W1
Definition: simple_idct_mmi.c:31
av_get_cpu_flags
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:107
FDCTDSPContext
Definition: fdctdsp.h:28
b1
static double b1(void *priv, double x, double y)
Definition: vf_xfade.c:2034
AVCodecContext::dct_algo
int dct_algo
DCT algorithm, see FF_DCT_* below.
Definition: avcodec.h:1538
av_cold
#define av_cold
Definition: attributes.h:90
float
float
Definition: af_crystalizer.c:122
b3
static double b3(void *priv, double x, double y)
Definition: vf_xfade.c:2036
ff_fdctdsp_init_ppc
av_cold void ff_fdctdsp_init_ppc(FDCTDSPContext *c, AVCodecContext *avctx, unsigned high_bit_depth)
Definition: fdctdsp.c:465
W2
#define W2
Definition: simple_idct_mmi.c:32
W3
#define W3
Definition: simple_idct_mmi.c:33
WB
static const int WB[80]
Definition: ripemd.c:83
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
FF_DCT_ALTIVEC
#define FF_DCT_ALTIVEC
Definition: avcodec.h:1543
PPC_ALTIVEC
#define PPC_ALTIVEC(flags)
Definition: cpu.h:25
cpu.h
W7
#define W7
Definition: simple_idct_mmi.c:37
attributes.h
fdctdsp.h
avcodec.h
fdct.h
AVCodecContext
main external API structure.
Definition: avcodec.h:445
FF_DCT_AUTO
#define FF_DCT_AUTO
Definition: avcodec.h:1539
util_altivec.h
cpu.h
block
The exact code depends on how similar the blocks are and how related they are to the block
Definition: filter_design.txt:207
W6
#define W6
Definition: simple_idct_mmi.c:36
h
h
Definition: vp9dsp_template.c:2070
ff_fdct_altivec
void ff_fdct_altivec(int16_t *block)
W5
#define W5
Definition: simple_idct_mmi.c:35
W0
#define W0
Definition: wmv2dsp_mmi.c:28