Go to the documentation of this file.
35 #define PUT_OP_U8_ALTIVEC(d, s, dst) d = s
36 #define AVG_OP_U8_ALTIVEC(d, s, dst) d = vec_avg(dst, s)
38 #define OP_U8_ALTIVEC PUT_OP_U8_ALTIVEC
39 #define PREFIX_h264_qpel16_h_lowpass_altivec put_h264_qpel16_h_lowpass_altivec
40 #define PREFIX_h264_qpel16_h_lowpass_num altivec_put_h264_qpel16_h_lowpass_num
41 #define PREFIX_h264_qpel16_v_lowpass_altivec put_h264_qpel16_v_lowpass_altivec
42 #define PREFIX_h264_qpel16_v_lowpass_num altivec_put_h264_qpel16_v_lowpass_num
43 #define PREFIX_h264_qpel16_hv_lowpass_altivec put_h264_qpel16_hv_lowpass_altivec
44 #define PREFIX_h264_qpel16_hv_lowpass_num altivec_put_h264_qpel16_hv_lowpass_num
45 #include "h264qpel_template.c"
47 #undef PREFIX_h264_qpel16_h_lowpass_altivec
48 #undef PREFIX_h264_qpel16_h_lowpass_num
49 #undef PREFIX_h264_qpel16_v_lowpass_altivec
50 #undef PREFIX_h264_qpel16_v_lowpass_num
51 #undef PREFIX_h264_qpel16_hv_lowpass_altivec
52 #undef PREFIX_h264_qpel16_hv_lowpass_num
54 #define OP_U8_ALTIVEC AVG_OP_U8_ALTIVEC
55 #define PREFIX_h264_qpel16_h_lowpass_altivec avg_h264_qpel16_h_lowpass_altivec
56 #define PREFIX_h264_qpel16_h_lowpass_num altivec_avg_h264_qpel16_h_lowpass_num
57 #define PREFIX_h264_qpel16_v_lowpass_altivec avg_h264_qpel16_v_lowpass_altivec
58 #define PREFIX_h264_qpel16_v_lowpass_num altivec_avg_h264_qpel16_v_lowpass_num
59 #define PREFIX_h264_qpel16_hv_lowpass_altivec avg_h264_qpel16_hv_lowpass_altivec
60 #define PREFIX_h264_qpel16_hv_lowpass_num altivec_avg_h264_qpel16_hv_lowpass_num
61 #include "h264qpel_template.c"
63 #undef PREFIX_h264_qpel16_h_lowpass_altivec
64 #undef PREFIX_h264_qpel16_h_lowpass_num
65 #undef PREFIX_h264_qpel16_v_lowpass_altivec
66 #undef PREFIX_h264_qpel16_v_lowpass_num
67 #undef PREFIX_h264_qpel16_hv_lowpass_altivec
68 #undef PREFIX_h264_qpel16_hv_lowpass_num
70 #define H264_MC(OPNAME, SIZE, CODETYPE) \
71 static void OPNAME ## h264_qpel ## SIZE ## _mc00_ ## CODETYPE (uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
73 ff_ ## OPNAME ## pixels ## SIZE ## _ ## CODETYPE(dst, src, stride, SIZE);\
76 static void OPNAME ## h264_qpel ## SIZE ## _mc10_ ## CODETYPE(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
78 DECLARE_ALIGNED(16, uint8_t, half)[SIZE*SIZE];\
79 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
80 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride, SIZE);\
83 static void OPNAME ## h264_qpel ## SIZE ## _mc20_ ## CODETYPE(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
85 OPNAME ## h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(dst, src, stride, stride);\
88 static void OPNAME ## h264_qpel ## SIZE ## _mc30_ ## CODETYPE(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
90 DECLARE_ALIGNED(16, uint8_t, half)[SIZE*SIZE];\
91 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
92 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+1, half, stride, stride, SIZE);\
95 static void OPNAME ## h264_qpel ## SIZE ## _mc01_ ## CODETYPE(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
97 DECLARE_ALIGNED(16, uint8_t, half)[SIZE*SIZE];\
98 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
99 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride, SIZE);\
102 static void OPNAME ## h264_qpel ## SIZE ## _mc02_ ## CODETYPE(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
104 OPNAME ## h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(dst, src, stride, stride);\
107 static void OPNAME ## h264_qpel ## SIZE ## _mc03_ ## CODETYPE(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
109 DECLARE_ALIGNED(16, uint8_t, half)[SIZE*SIZE];\
110 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
111 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+stride, half, stride, stride, SIZE);\
114 static void OPNAME ## h264_qpel ## SIZE ## _mc11_ ## CODETYPE(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
116 DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\
117 DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\
118 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\
119 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\
120 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
123 static void OPNAME ## h264_qpel ## SIZE ## _mc31_ ## CODETYPE(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
125 DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\
126 DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\
127 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\
128 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\
129 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
132 static void OPNAME ## h264_qpel ## SIZE ## _mc13_ ## CODETYPE(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
134 DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\
135 DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\
136 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\
137 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\
138 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
141 static void OPNAME ## h264_qpel ## SIZE ## _mc33_ ## CODETYPE(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
143 DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\
144 DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\
145 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\
146 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\
147 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
150 static void OPNAME ## h264_qpel ## SIZE ## _mc22_ ## CODETYPE(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
152 DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\
153 OPNAME ## h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(dst, tmp, src, stride, SIZE, stride);\
156 static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## CODETYPE(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
158 DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\
159 DECLARE_ALIGNED(16, uint8_t, halfHV)[SIZE*SIZE];\
160 DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\
161 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\
162 put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
163 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfHV, stride, SIZE, SIZE);\
166 static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## CODETYPE(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
168 DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\
169 DECLARE_ALIGNED(16, uint8_t, halfHV)[SIZE*SIZE];\
170 DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\
171 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\
172 put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
173 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfHV, stride, SIZE, SIZE);\
176 static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## CODETYPE(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
178 DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\
179 DECLARE_ALIGNED(16, uint8_t, halfHV)[SIZE*SIZE];\
180 DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\
181 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\
182 put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
183 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZE, SIZE);\
186 static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## CODETYPE(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
188 DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\
189 DECLARE_ALIGNED(16, uint8_t, halfHV)[SIZE*SIZE];\
190 DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\
191 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\
192 put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
193 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZE, SIZE);\
197 #define put_unligned_store(s, dest) { \
198 tmp1 = vec_ld(0, dest); \
199 mask = vec_lvsl(0, dest); \
200 tmp2 = vec_ld(15, dest); \
201 edges = vec_perm(tmp2, tmp1, mask); \
202 align = vec_lvsr(0, dest); \
203 tmp2 = vec_perm(s, edges, align); \
204 tmp1 = vec_perm(edges, s, align); \
205 vec_st(tmp2, 15, dest); \
206 vec_st(tmp1, 0 , dest); \
209 #define put_unligned_store(s, dest) vec_vsx_st(s, 0, dest);
213 const uint8_t * src2,
int dst_stride,
214 int src_stride1,
int h)
220 mask_ = vec_lvsl(0, src2);
223 for (
i = 0;
i <
h;
i++) {
224 a = unaligned_load(
i * src_stride1,
src1);
225 b = load_with_perm_vec(
i * 16, src2, mask_);
227 put_unligned_store(d, dst);
233 #define avg_unligned_store(s, dest){ \
234 tmp1 = vec_ld(0, dest); \
235 mask = vec_lvsl(0, dest); \
236 tmp2 = vec_ld(15, dest); \
237 a = vec_avg(vec_perm(tmp1, tmp2, mask), s); \
238 edges = vec_perm(tmp2, tmp1, mask); \
239 align = vec_lvsr(0, dest); \
240 tmp2 = vec_perm(a, edges, align); \
241 tmp1 = vec_perm(edges, a, align); \
242 vec_st(tmp2, 15, dest); \
243 vec_st(tmp1, 0 , dest); \
246 #define avg_unligned_store(s, dest){ \
247 a = vec_avg(vec_vsx_ld(0, dst), s); \
248 vec_vsx_st(a, 0, dst); \
253 const uint8_t * src2,
int dst_stride,
254 int src_stride1,
int h)
261 mask_ = vec_lvsl(0, src2);
264 for (
i = 0;
i <
h;
i++) {
265 a = unaligned_load(
i * src_stride1,
src1);
266 b = load_with_perm_vec(
i * 16, src2, mask_);
268 avg_unligned_store(d, dst);
285 const int high_bit_depth =
bit_depth > 8;
290 if (!high_bit_depth) {
291 #define dspfunc(PFX, IDX, NUM) \
292 c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_altivec; \
293 c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_altivec; \
294 c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_altivec; \
295 c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_altivec; \
296 c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_altivec; \
297 c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_altivec; \
298 c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_altivec; \
299 c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_altivec; \
300 c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_altivec; \
301 c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_altivec; \
302 c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_altivec; \
303 c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_altivec; \
304 c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_altivec; \
305 c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_altivec; \
306 c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_altivec; \
307 c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_altivec
static void bit_depth(AudioStatsContext *s, uint64_t mask, uint64_t imask, AVRational *depth)
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
static const uint16_t mask[17]
av_cold void ff_h264qpel_init_ppc(H264QpelContext *c, int bit_depth)
#define H264_MC(OPNAME, SIZE)
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
#define PPC_ALTIVEC(flags)
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
#define i(width, name, range_min, range_max)
#define dspfunc(PFX, IDX, NUM)