FFmpeg
rv40dsp.c
Go to the documentation of this file.
1 /*
2  * RV40 decoder motion compensation functions
3  * Copyright (c) 2008 Konstantin Shishkov
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21 
22 /**
23  * @file
24  * RV40 decoder motion compensation functions
25  */
26 
27 #include "libavutil/common.h"
28 #include "libavutil/intreadwrite.h"
29 #include "h264qpel.h"
30 #include "mathops.h"
31 #include "pixels.h"
32 #include "rnd_avg.h"
33 #include "rv34dsp.h"
34 #include "libavutil/avassert.h"
35 
36 #define RV40_LOWPASS(OPNAME, OP) \
37 static void OPNAME ## rv40_qpel8_h_lowpass(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride,\
38  const int h, const int C1, const int C2, const int SHIFT){\
39  const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP;\
40  int i;\
41  for(i = 0; i < h; i++)\
42  {\
43  OP(dst[0], (src[-2] + src[ 3] - 5*(src[-1]+src[2]) + src[0]*C1 + src[1]*C2 + (1<<(SHIFT-1))) >> SHIFT);\
44  OP(dst[1], (src[-1] + src[ 4] - 5*(src[ 0]+src[3]) + src[1]*C1 + src[2]*C2 + (1<<(SHIFT-1))) >> SHIFT);\
45  OP(dst[2], (src[ 0] + src[ 5] - 5*(src[ 1]+src[4]) + src[2]*C1 + src[3]*C2 + (1<<(SHIFT-1))) >> SHIFT);\
46  OP(dst[3], (src[ 1] + src[ 6] - 5*(src[ 2]+src[5]) + src[3]*C1 + src[4]*C2 + (1<<(SHIFT-1))) >> SHIFT);\
47  OP(dst[4], (src[ 2] + src[ 7] - 5*(src[ 3]+src[6]) + src[4]*C1 + src[5]*C2 + (1<<(SHIFT-1))) >> SHIFT);\
48  OP(dst[5], (src[ 3] + src[ 8] - 5*(src[ 4]+src[7]) + src[5]*C1 + src[6]*C2 + (1<<(SHIFT-1))) >> SHIFT);\
49  OP(dst[6], (src[ 4] + src[ 9] - 5*(src[ 5]+src[8]) + src[6]*C1 + src[7]*C2 + (1<<(SHIFT-1))) >> SHIFT);\
50  OP(dst[7], (src[ 5] + src[10] - 5*(src[ 6]+src[9]) + src[7]*C1 + src[8]*C2 + (1<<(SHIFT-1))) >> SHIFT);\
51  dst += dstStride;\
52  src += srcStride;\
53  }\
54 }\
55 \
56 static void OPNAME ## rv40_qpel8_v_lowpass(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride,\
57  const int w, const int C1, const int C2, const int SHIFT){\
58  const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP;\
59  int i;\
60  for(i = 0; i < w; i++)\
61  {\
62  const int srcB = src[-2*srcStride];\
63  const int srcA = src[-1*srcStride];\
64  const int src0 = src[0 *srcStride];\
65  const int src1 = src[1 *srcStride];\
66  const int src2 = src[2 *srcStride];\
67  const int src3 = src[3 *srcStride];\
68  const int src4 = src[4 *srcStride];\
69  const int src5 = src[5 *srcStride];\
70  const int src6 = src[6 *srcStride];\
71  const int src7 = src[7 *srcStride];\
72  const int src8 = src[8 *srcStride];\
73  const int src9 = src[9 *srcStride];\
74  const int src10 = src[10*srcStride];\
75  OP(dst[0*dstStride], (srcB + src3 - 5*(srcA+src2) + src0*C1 + src1*C2 + (1<<(SHIFT-1))) >> SHIFT);\
76  OP(dst[1*dstStride], (srcA + src4 - 5*(src0+src3) + src1*C1 + src2*C2 + (1<<(SHIFT-1))) >> SHIFT);\
77  OP(dst[2*dstStride], (src0 + src5 - 5*(src1+src4) + src2*C1 + src3*C2 + (1<<(SHIFT-1))) >> SHIFT);\
78  OP(dst[3*dstStride], (src1 + src6 - 5*(src2+src5) + src3*C1 + src4*C2 + (1<<(SHIFT-1))) >> SHIFT);\
79  OP(dst[4*dstStride], (src2 + src7 - 5*(src3+src6) + src4*C1 + src5*C2 + (1<<(SHIFT-1))) >> SHIFT);\
80  OP(dst[5*dstStride], (src3 + src8 - 5*(src4+src7) + src5*C1 + src6*C2 + (1<<(SHIFT-1))) >> SHIFT);\
81  OP(dst[6*dstStride], (src4 + src9 - 5*(src5+src8) + src6*C1 + src7*C2 + (1<<(SHIFT-1))) >> SHIFT);\
82  OP(dst[7*dstStride], (src5 + src10 - 5*(src6+src9) + src7*C1 + src8*C2 + (1<<(SHIFT-1))) >> SHIFT);\
83  dst++;\
84  src++;\
85  }\
86 }\
87 \
88 static void OPNAME ## rv40_qpel16_v_lowpass(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride,\
89  const int w, const int C1, const int C2, const int SHIFT){\
90  OPNAME ## rv40_qpel8_v_lowpass(dst , src , dstStride, srcStride, 8, C1, C2, SHIFT);\
91  OPNAME ## rv40_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride, 8, C1, C2, SHIFT);\
92  src += 8*srcStride;\
93  dst += 8*dstStride;\
94  OPNAME ## rv40_qpel8_v_lowpass(dst , src , dstStride, srcStride, w-8, C1, C2, SHIFT);\
95  OPNAME ## rv40_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride, w-8, C1, C2, SHIFT);\
96 }\
97 \
98 static void OPNAME ## rv40_qpel16_h_lowpass(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride,\
99  const int h, const int C1, const int C2, const int SHIFT){\
100  OPNAME ## rv40_qpel8_h_lowpass(dst , src , dstStride, srcStride, 8, C1, C2, SHIFT);\
101  OPNAME ## rv40_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride, 8, C1, C2, SHIFT);\
102  src += 8*srcStride;\
103  dst += 8*dstStride;\
104  OPNAME ## rv40_qpel8_h_lowpass(dst , src , dstStride, srcStride, h-8, C1, C2, SHIFT);\
105  OPNAME ## rv40_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride, h-8, C1, C2, SHIFT);\
106 }\
107 \
108 
109 #define RV40_MC(OPNAME, SIZE) \
110 static void OPNAME ## rv40_qpel ## SIZE ## _mc10_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
111 {\
112  OPNAME ## rv40_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride, SIZE, 52, 20, 6);\
113 }\
114 \
115 static void OPNAME ## rv40_qpel ## SIZE ## _mc30_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
116 {\
117  OPNAME ## rv40_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride, SIZE, 20, 52, 6);\
118 }\
119 \
120 static void OPNAME ## rv40_qpel ## SIZE ## _mc01_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
121 {\
122  OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, src, stride, stride, SIZE, 52, 20, 6);\
123 }\
124 \
125 static void OPNAME ## rv40_qpel ## SIZE ## _mc11_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
126 {\
127  uint8_t full[SIZE*(SIZE+5)];\
128  uint8_t * const full_mid = full + SIZE*2;\
129  put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 52, 20, 6);\
130  OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 52, 20, 6);\
131 }\
132 \
133 static void OPNAME ## rv40_qpel ## SIZE ## _mc21_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
134 {\
135  uint8_t full[SIZE*(SIZE+5)];\
136  uint8_t * const full_mid = full + SIZE*2;\
137  put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 20, 20, 5);\
138  OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 52, 20, 6);\
139 }\
140 \
141 static void OPNAME ## rv40_qpel ## SIZE ## _mc31_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
142 {\
143  uint8_t full[SIZE*(SIZE+5)];\
144  uint8_t * const full_mid = full + SIZE*2;\
145  put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 20, 52, 6);\
146  OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 52, 20, 6);\
147 }\
148 \
149 static void OPNAME ## rv40_qpel ## SIZE ## _mc12_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
150 {\
151  uint8_t full[SIZE*(SIZE+5)];\
152  uint8_t * const full_mid = full + SIZE*2;\
153  put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 52, 20, 6);\
154  OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 20, 20, 5);\
155 }\
156 \
157 static void OPNAME ## rv40_qpel ## SIZE ## _mc22_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
158 {\
159  uint8_t full[SIZE*(SIZE+5)];\
160  uint8_t * const full_mid = full + SIZE*2;\
161  put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 20, 20, 5);\
162  OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 20, 20, 5);\
163 }\
164 \
165 static void OPNAME ## rv40_qpel ## SIZE ## _mc32_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
166 {\
167  uint8_t full[SIZE*(SIZE+5)];\
168  uint8_t * const full_mid = full + SIZE*2;\
169  put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 20, 52, 6);\
170  OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 20, 20, 5);\
171 }\
172 \
173 static void OPNAME ## rv40_qpel ## SIZE ## _mc03_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
174 {\
175  OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, src, stride, stride, SIZE, 20, 52, 6);\
176 }\
177 \
178 static void OPNAME ## rv40_qpel ## SIZE ## _mc13_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
179 {\
180  uint8_t full[SIZE*(SIZE+5)];\
181  uint8_t * const full_mid = full + SIZE*2;\
182  put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 52, 20, 6);\
183  OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 20, 52, 6);\
184 }\
185 \
186 static void OPNAME ## rv40_qpel ## SIZE ## _mc23_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\
187 {\
188  uint8_t full[SIZE*(SIZE+5)];\
189  uint8_t * const full_mid = full + SIZE*2;\
190  put_rv40_qpel ## SIZE ## _h_lowpass(full, src - 2*stride, SIZE, stride, SIZE+5, 20, 20, 5);\
191  OPNAME ## rv40_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE, SIZE, 20, 52, 6);\
192 }\
193 \
194 
195 #define op_avg(a, b) a = (((a)+cm[b]+1)>>1)
196 #define op_put(a, b) a = cm[b]
197 
198 RV40_LOWPASS(put_ , op_put)
199 RV40_LOWPASS(avg_ , op_avg)
200 
201 #undef op_avg
202 #undef op_put
203 
204 RV40_MC(put_, 8)
205 RV40_MC(put_, 16)
206 RV40_MC(avg_, 8)
207 RV40_MC(avg_, 16)
208 
209 #define PIXOP2(OPNAME, OP) \
210 static inline void OPNAME ## _pixels8_xy2_8_c(uint8_t *block, \
211  const uint8_t *pixels, \
212  ptrdiff_t line_size, \
213  int h) \
214 { \
215  /* FIXME HIGH BIT DEPTH */ \
216  int j; \
217  \
218  for (j = 0; j < 2; j++) { \
219  int i; \
220  const uint32_t a = AV_RN32(pixels); \
221  const uint32_t b = AV_RN32(pixels + 1); \
222  uint32_t l0 = (a & 0x03030303UL) + \
223  (b & 0x03030303UL) + \
224  0x02020202UL; \
225  uint32_t h0 = ((a & 0xFCFCFCFCUL) >> 2) + \
226  ((b & 0xFCFCFCFCUL) >> 2); \
227  uint32_t l1, h1; \
228  \
229  pixels += line_size; \
230  for (i = 0; i < h; i += 2) { \
231  uint32_t a = AV_RN32(pixels); \
232  uint32_t b = AV_RN32(pixels + 1); \
233  l1 = (a & 0x03030303UL) + \
234  (b & 0x03030303UL); \
235  h1 = ((a & 0xFCFCFCFCUL) >> 2) + \
236  ((b & 0xFCFCFCFCUL) >> 2); \
237  OP(*((uint32_t *) block), \
238  h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL)); \
239  pixels += line_size; \
240  block += line_size; \
241  a = AV_RN32(pixels); \
242  b = AV_RN32(pixels + 1); \
243  l0 = (a & 0x03030303UL) + \
244  (b & 0x03030303UL) + \
245  0x02020202UL; \
246  h0 = ((a & 0xFCFCFCFCUL) >> 2) + \
247  ((b & 0xFCFCFCFCUL) >> 2); \
248  OP(*((uint32_t *) block), \
249  h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL)); \
250  pixels += line_size; \
251  block += line_size; \
252  } \
253  pixels += 4 - line_size * (h + 1); \
254  block += 4 - line_size * h; \
255  } \
256 } \
257  \
258 CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_8_c, \
259  OPNAME ## _pixels8_xy2_8_c, \
260  8) \
261 
262 #define op_avg(a, b) a = rnd_avg32(a, b)
263 #define op_put(a, b) a = b
264 PIXOP2(avg, op_avg)
265 PIXOP2(put, op_put)
266 #undef op_avg
267 #undef op_put
268 
269 static void put_rv40_qpel16_mc33_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
270 {
271  put_pixels16_xy2_8_c(dst, src, stride, 16);
272 }
273 static void avg_rv40_qpel16_mc33_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
274 {
275  avg_pixels16_xy2_8_c(dst, src, stride, 16);
276 }
277 static void put_rv40_qpel8_mc33_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
278 {
279  put_pixels8_xy2_8_c(dst, src, stride, 8);
280 }
281 static void avg_rv40_qpel8_mc33_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
282 {
283  avg_pixels8_xy2_8_c(dst, src, stride, 8);
284 }
285 
286 static const int rv40_bias[4][4] = {
287  { 0, 16, 32, 16 },
288  { 32, 28, 32, 28 },
289  { 0, 32, 16, 32 },
290  { 32, 28, 32, 28 }
291 };
292 
293 #define RV40_CHROMA_MC(OPNAME, OP)\
294 static void OPNAME ## rv40_chroma_mc4_c(uint8_t *dst /*align 8*/,\
295  const uint8_t *src /*align 1*/,\
296  ptrdiff_t stride, int h, int x, int y)\
297 {\
298  const int A = (8-x) * (8-y);\
299  const int B = ( x) * (8-y);\
300  const int C = (8-x) * ( y);\
301  const int D = ( x) * ( y);\
302  int i;\
303  int bias = rv40_bias[y>>1][x>>1];\
304  \
305  av_assert2(x<8 && y<8 && x>=0 && y>=0);\
306 \
307  if(D){\
308  for(i = 0; i < h; i++){\
309  OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + bias));\
310  OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + bias));\
311  OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + bias));\
312  OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + bias));\
313  dst += stride;\
314  src += stride;\
315  }\
316  }else{\
317  const int E = B + C;\
318  const ptrdiff_t step = C ? stride : 1;\
319  for(i = 0; i < h; i++){\
320  OP(dst[0], (A*src[0] + E*src[step+0] + bias));\
321  OP(dst[1], (A*src[1] + E*src[step+1] + bias));\
322  OP(dst[2], (A*src[2] + E*src[step+2] + bias));\
323  OP(dst[3], (A*src[3] + E*src[step+3] + bias));\
324  dst += stride;\
325  src += stride;\
326  }\
327  }\
328 }\
329 \
330 static void OPNAME ## rv40_chroma_mc8_c(uint8_t *dst/*align 8*/,\
331  const uint8_t *src/*align 1*/,\
332  ptrdiff_t stride, int h, int x, int y)\
333 {\
334  const int A = (8-x) * (8-y);\
335  const int B = ( x) * (8-y);\
336  const int C = (8-x) * ( y);\
337  const int D = ( x) * ( y);\
338  int i;\
339  int bias = rv40_bias[y>>1][x>>1];\
340  \
341  av_assert2(x<8 && y<8 && x>=0 && y>=0);\
342 \
343  if(D){\
344  for(i = 0; i < h; i++){\
345  OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + bias));\
346  OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + bias));\
347  OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + bias));\
348  OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + bias));\
349  OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + bias));\
350  OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + bias));\
351  OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + bias));\
352  OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + bias));\
353  dst += stride;\
354  src += stride;\
355  }\
356  }else{\
357  const int E = B + C;\
358  const ptrdiff_t step = C ? stride : 1;\
359  for(i = 0; i < h; i++){\
360  OP(dst[0], (A*src[0] + E*src[step+0] + bias));\
361  OP(dst[1], (A*src[1] + E*src[step+1] + bias));\
362  OP(dst[2], (A*src[2] + E*src[step+2] + bias));\
363  OP(dst[3], (A*src[3] + E*src[step+3] + bias));\
364  OP(dst[4], (A*src[4] + E*src[step+4] + bias));\
365  OP(dst[5], (A*src[5] + E*src[step+5] + bias));\
366  OP(dst[6], (A*src[6] + E*src[step+6] + bias));\
367  OP(dst[7], (A*src[7] + E*src[step+7] + bias));\
368  dst += stride;\
369  src += stride;\
370  }\
371  }\
372 }
373 
374 #define op_avg(a, b) a = (((a)+((b)>>6)+1)>>1)
375 #define op_put(a, b) a = ((b)>>6)
376 
377 RV40_CHROMA_MC(put_, op_put)
378 RV40_CHROMA_MC(avg_, op_avg)
379 
380 #define RV40_WEIGHT_FUNC(size) \
381 static void rv40_weight_func_rnd_ ## size (uint8_t *dst, uint8_t *src1, uint8_t *src2, int w1, int w2, ptrdiff_t stride)\
382 {\
383  int i, j;\
384 \
385  for (j = 0; j < size; j++) {\
386  for (i = 0; i < size; i++)\
387  dst[i] = ((((unsigned)w2 * src1[i]) >> 9) + (((unsigned)w1 * src2[i]) >> 9) + 0x10) >> 5;\
388  src1 += stride;\
389  src2 += stride;\
390  dst += stride;\
391  }\
392 }\
393 static void rv40_weight_func_nornd_ ## size (uint8_t *dst, uint8_t *src1, uint8_t *src2, int w1, int w2, ptrdiff_t stride)\
394 {\
395  int i, j;\
396 \
397  for (j = 0; j < size; j++) {\
398  for (i = 0; i < size; i++)\
399  dst[i] = ((unsigned)w2 * src1[i] + (unsigned)w1 * src2[i] + 0x10) >> 5;\
400  src1 += stride;\
401  src2 += stride;\
402  dst += stride;\
403  }\
404 }
405 
408 
409 /**
410  * dither values for deblocking filter - left/top values
411  */
412 static const uint8_t rv40_dither_l[16] = {
413  0x40, 0x50, 0x20, 0x60, 0x30, 0x50, 0x40, 0x30,
414  0x50, 0x40, 0x50, 0x30, 0x60, 0x20, 0x50, 0x40
415 };
416 
417 /**
418  * dither values for deblocking filter - right/bottom values
419  */
420 static const uint8_t rv40_dither_r[16] = {
421  0x40, 0x30, 0x60, 0x20, 0x50, 0x30, 0x30, 0x40,
422  0x40, 0x40, 0x50, 0x30, 0x20, 0x60, 0x30, 0x40
423 };
424 
425 #define CLIP_SYMM(a, b) av_clip(a, -(b), b)
426 /**
427  * weaker deblocking very similar to the one described in 4.4.2 of JVT-A003r1
428  */
430  const int step,
431  const ptrdiff_t stride,
432  const int filter_p1,
433  const int filter_q1,
434  const int alpha,
435  const int beta,
436  const int lim_p0q0,
437  const int lim_q1,
438  const int lim_p1)
439 {
440  const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP;
441  int i, t, u, diff;
442 
443  for (i = 0; i < 4; i++, src += stride) {
444  int diff_p1p0 = src[-2*step] - src[-1*step];
445  int diff_q1q0 = src[ 1*step] - src[ 0*step];
446  int diff_p1p2 = src[-2*step] - src[-3*step];
447  int diff_q1q2 = src[ 1*step] - src[ 2*step];
448 
449  t = src[0*step] - src[-1*step];
450  if (!t)
451  continue;
452 
453  u = (alpha * FFABS(t)) >> 7;
454  if (u > 3 - (filter_p1 && filter_q1))
455  continue;
456 
457  t *= 1 << 2;
458  if (filter_p1 && filter_q1)
459  t += src[-2*step] - src[1*step];
460 
461  diff = CLIP_SYMM((t + 4) >> 3, lim_p0q0);
462  src[-1*step] = cm[src[-1*step] + diff];
463  src[ 0*step] = cm[src[ 0*step] - diff];
464 
465  if (filter_p1 && FFABS(diff_p1p2) <= beta) {
466  t = (diff_p1p0 + diff_p1p2 - diff) >> 1;
467  src[-2*step] = cm[src[-2*step] - CLIP_SYMM(t, lim_p1)];
468  }
469 
470  if (filter_q1 && FFABS(diff_q1q2) <= beta) {
471  t = (diff_q1q0 + diff_q1q2 + diff) >> 1;
472  src[ 1*step] = cm[src[ 1*step] - CLIP_SYMM(t, lim_q1)];
473  }
474  }
475 }
476 
477 static void rv40_h_weak_loop_filter(uint8_t *src, const ptrdiff_t stride,
478  const int filter_p1, const int filter_q1,
479  const int alpha, const int beta,
480  const int lim_p0q0, const int lim_q1,
481  const int lim_p1)
482 {
483  rv40_weak_loop_filter(src, stride, 1, filter_p1, filter_q1,
484  alpha, beta, lim_p0q0, lim_q1, lim_p1);
485 }
486 
487 static void rv40_v_weak_loop_filter(uint8_t *src, const ptrdiff_t stride,
488  const int filter_p1, const int filter_q1,
489  const int alpha, const int beta,
490  const int lim_p0q0, const int lim_q1,
491  const int lim_p1)
492 {
493  rv40_weak_loop_filter(src, 1, stride, filter_p1, filter_q1,
494  alpha, beta, lim_p0q0, lim_q1, lim_p1);
495 }
496 
498  const int step,
499  const ptrdiff_t stride,
500  const int alpha,
501  const int lims,
502  const int dmode,
503  const int chroma)
504 {
505  int i;
506 
507  for(i = 0; i < 4; i++, src += stride){
508  int sflag, p0, q0, p1, q1;
509  int t = src[0*step] - src[-1*step];
510 
511  if (!t)
512  continue;
513 
514  sflag = (alpha * FFABS(t)) >> 7;
515  if (sflag > 1)
516  continue;
517 
518  p0 = (25*src[-3*step] + 26*src[-2*step] + 26*src[-1*step] +
519  26*src[ 0*step] + 25*src[ 1*step] +
520  rv40_dither_l[dmode + i]) >> 7;
521 
522  q0 = (25*src[-2*step] + 26*src[-1*step] + 26*src[ 0*step] +
523  26*src[ 1*step] + 25*src[ 2*step] +
524  rv40_dither_r[dmode + i]) >> 7;
525 
526  if (sflag) {
527  p0 = av_clip(p0, src[-1*step] - lims, src[-1*step] + lims);
528  q0 = av_clip(q0, src[ 0*step] - lims, src[ 0*step] + lims);
529  }
530 
531  p1 = (25*src[-4*step] + 26*src[-3*step] + 26*src[-2*step] + 26*p0 +
532  25*src[ 0*step] + rv40_dither_l[dmode + i]) >> 7;
533  q1 = (25*src[-1*step] + 26*q0 + 26*src[ 1*step] + 26*src[ 2*step] +
534  25*src[ 3*step] + rv40_dither_r[dmode + i]) >> 7;
535 
536  if (sflag) {
537  p1 = av_clip(p1, src[-2*step] - lims, src[-2*step] + lims);
538  q1 = av_clip(q1, src[ 1*step] - lims, src[ 1*step] + lims);
539  }
540 
541  src[-2*step] = p1;
542  src[-1*step] = p0;
543  src[ 0*step] = q0;
544  src[ 1*step] = q1;
545 
546  if(!chroma){
547  src[-3*step] = (25*src[-1*step] + 26*src[-2*step] +
548  51*src[-3*step] + 26*src[-4*step] + 64) >> 7;
549  src[ 2*step] = (25*src[ 0*step] + 26*src[ 1*step] +
550  51*src[ 2*step] + 26*src[ 3*step] + 64) >> 7;
551  }
552  }
553 }
554 
555 static void rv40_h_strong_loop_filter(uint8_t *src, const ptrdiff_t stride,
556  const int alpha, const int lims,
557  const int dmode, const int chroma)
558 {
559  rv40_strong_loop_filter(src, stride, 1, alpha, lims, dmode, chroma);
560 }
561 
562 static void rv40_v_strong_loop_filter(uint8_t *src, const ptrdiff_t stride,
563  const int alpha, const int lims,
564  const int dmode, const int chroma)
565 {
566  rv40_strong_loop_filter(src, 1, stride, alpha, lims, dmode, chroma);
567 }
568 
570  int step, ptrdiff_t stride,
571  int beta, int beta2,
572  int edge,
573  int *p1, int *q1)
574 {
575  int sum_p1p0 = 0, sum_q1q0 = 0, sum_p1p2 = 0, sum_q1q2 = 0;
576  int strong0 = 0, strong1 = 0;
577  uint8_t *ptr;
578  int i;
579 
580  for (i = 0, ptr = src; i < 4; i++, ptr += stride) {
581  sum_p1p0 += ptr[-2*step] - ptr[-1*step];
582  sum_q1q0 += ptr[ 1*step] - ptr[ 0*step];
583  }
584 
585  *p1 = FFABS(sum_p1p0) < (beta << 2);
586  *q1 = FFABS(sum_q1q0) < (beta << 2);
587 
588  if(!*p1 && !*q1)
589  return 0;
590 
591  if (!edge)
592  return 0;
593 
594  for (i = 0, ptr = src; i < 4; i++, ptr += stride) {
595  sum_p1p2 += ptr[-2*step] - ptr[-3*step];
596  sum_q1q2 += ptr[ 1*step] - ptr[ 2*step];
597  }
598 
599  strong0 = *p1 && (FFABS(sum_p1p2) < beta2);
600  strong1 = *q1 && (FFABS(sum_q1q2) < beta2);
601 
602  return strong0 && strong1;
603 }
604 
605 static int rv40_h_loop_filter_strength(uint8_t *src, ptrdiff_t stride,
606  int beta, int beta2, int edge,
607  int *p1, int *q1)
608 {
609  return rv40_loop_filter_strength(src, stride, 1, beta, beta2, edge, p1, q1);
610 }
611 
612 static int rv40_v_loop_filter_strength(uint8_t *src, ptrdiff_t stride,
613  int beta, int beta2, int edge,
614  int *p1, int *q1)
615 {
616  return rv40_loop_filter_strength(src, 1, stride, beta, beta2, edge, p1, q1);
617 }
618 
620 {
621  H264QpelContext qpel;
622 
624  ff_h264qpel_init(&qpel, 8);
625 
626  c->put_pixels_tab[0][ 0] = qpel.put_h264_qpel_pixels_tab[0][0];
627  c->put_pixels_tab[0][ 1] = put_rv40_qpel16_mc10_c;
628  c->put_pixels_tab[0][ 2] = qpel.put_h264_qpel_pixels_tab[0][2];
629  c->put_pixels_tab[0][ 3] = put_rv40_qpel16_mc30_c;
630  c->put_pixels_tab[0][ 4] = put_rv40_qpel16_mc01_c;
631  c->put_pixels_tab[0][ 5] = put_rv40_qpel16_mc11_c;
632  c->put_pixels_tab[0][ 6] = put_rv40_qpel16_mc21_c;
633  c->put_pixels_tab[0][ 7] = put_rv40_qpel16_mc31_c;
634  c->put_pixels_tab[0][ 8] = qpel.put_h264_qpel_pixels_tab[0][8];
635  c->put_pixels_tab[0][ 9] = put_rv40_qpel16_mc12_c;
636  c->put_pixels_tab[0][10] = put_rv40_qpel16_mc22_c;
637  c->put_pixels_tab[0][11] = put_rv40_qpel16_mc32_c;
638  c->put_pixels_tab[0][12] = put_rv40_qpel16_mc03_c;
639  c->put_pixels_tab[0][13] = put_rv40_qpel16_mc13_c;
640  c->put_pixels_tab[0][14] = put_rv40_qpel16_mc23_c;
641  c->put_pixels_tab[0][15] = put_rv40_qpel16_mc33_c;
642  c->avg_pixels_tab[0][ 0] = qpel.avg_h264_qpel_pixels_tab[0][0];
643  c->avg_pixels_tab[0][ 1] = avg_rv40_qpel16_mc10_c;
644  c->avg_pixels_tab[0][ 2] = qpel.avg_h264_qpel_pixels_tab[0][2];
645  c->avg_pixels_tab[0][ 3] = avg_rv40_qpel16_mc30_c;
646  c->avg_pixels_tab[0][ 4] = avg_rv40_qpel16_mc01_c;
647  c->avg_pixels_tab[0][ 5] = avg_rv40_qpel16_mc11_c;
648  c->avg_pixels_tab[0][ 6] = avg_rv40_qpel16_mc21_c;
649  c->avg_pixels_tab[0][ 7] = avg_rv40_qpel16_mc31_c;
650  c->avg_pixels_tab[0][ 8] = qpel.avg_h264_qpel_pixels_tab[0][8];
651  c->avg_pixels_tab[0][ 9] = avg_rv40_qpel16_mc12_c;
652  c->avg_pixels_tab[0][10] = avg_rv40_qpel16_mc22_c;
653  c->avg_pixels_tab[0][11] = avg_rv40_qpel16_mc32_c;
654  c->avg_pixels_tab[0][12] = avg_rv40_qpel16_mc03_c;
655  c->avg_pixels_tab[0][13] = avg_rv40_qpel16_mc13_c;
656  c->avg_pixels_tab[0][14] = avg_rv40_qpel16_mc23_c;
657  c->avg_pixels_tab[0][15] = avg_rv40_qpel16_mc33_c;
658  c->put_pixels_tab[1][ 0] = qpel.put_h264_qpel_pixels_tab[1][0];
659  c->put_pixels_tab[1][ 1] = put_rv40_qpel8_mc10_c;
660  c->put_pixels_tab[1][ 2] = qpel.put_h264_qpel_pixels_tab[1][2];
661  c->put_pixels_tab[1][ 3] = put_rv40_qpel8_mc30_c;
662  c->put_pixels_tab[1][ 4] = put_rv40_qpel8_mc01_c;
663  c->put_pixels_tab[1][ 5] = put_rv40_qpel8_mc11_c;
664  c->put_pixels_tab[1][ 6] = put_rv40_qpel8_mc21_c;
665  c->put_pixels_tab[1][ 7] = put_rv40_qpel8_mc31_c;
666  c->put_pixels_tab[1][ 8] = qpel.put_h264_qpel_pixels_tab[1][8];
667  c->put_pixels_tab[1][ 9] = put_rv40_qpel8_mc12_c;
668  c->put_pixels_tab[1][10] = put_rv40_qpel8_mc22_c;
669  c->put_pixels_tab[1][11] = put_rv40_qpel8_mc32_c;
670  c->put_pixels_tab[1][12] = put_rv40_qpel8_mc03_c;
671  c->put_pixels_tab[1][13] = put_rv40_qpel8_mc13_c;
672  c->put_pixels_tab[1][14] = put_rv40_qpel8_mc23_c;
673  c->put_pixels_tab[1][15] = put_rv40_qpel8_mc33_c;
674  c->avg_pixels_tab[1][ 0] = qpel.avg_h264_qpel_pixels_tab[1][0];
675  c->avg_pixels_tab[1][ 1] = avg_rv40_qpel8_mc10_c;
676  c->avg_pixels_tab[1][ 2] = qpel.avg_h264_qpel_pixels_tab[1][2];
677  c->avg_pixels_tab[1][ 3] = avg_rv40_qpel8_mc30_c;
678  c->avg_pixels_tab[1][ 4] = avg_rv40_qpel8_mc01_c;
679  c->avg_pixels_tab[1][ 5] = avg_rv40_qpel8_mc11_c;
680  c->avg_pixels_tab[1][ 6] = avg_rv40_qpel8_mc21_c;
681  c->avg_pixels_tab[1][ 7] = avg_rv40_qpel8_mc31_c;
682  c->avg_pixels_tab[1][ 8] = qpel.avg_h264_qpel_pixels_tab[1][8];
683  c->avg_pixels_tab[1][ 9] = avg_rv40_qpel8_mc12_c;
684  c->avg_pixels_tab[1][10] = avg_rv40_qpel8_mc22_c;
685  c->avg_pixels_tab[1][11] = avg_rv40_qpel8_mc32_c;
686  c->avg_pixels_tab[1][12] = avg_rv40_qpel8_mc03_c;
687  c->avg_pixels_tab[1][13] = avg_rv40_qpel8_mc13_c;
688  c->avg_pixels_tab[1][14] = avg_rv40_qpel8_mc23_c;
689  c->avg_pixels_tab[1][15] = avg_rv40_qpel8_mc33_c;
690 
691  c->put_chroma_pixels_tab[0] = put_rv40_chroma_mc8_c;
692  c->put_chroma_pixels_tab[1] = put_rv40_chroma_mc4_c;
693  c->avg_chroma_pixels_tab[0] = avg_rv40_chroma_mc8_c;
694  c->avg_chroma_pixels_tab[1] = avg_rv40_chroma_mc4_c;
695 
696  c->rv40_weight_pixels_tab[0][0] = rv40_weight_func_rnd_16;
697  c->rv40_weight_pixels_tab[0][1] = rv40_weight_func_rnd_8;
698  c->rv40_weight_pixels_tab[1][0] = rv40_weight_func_nornd_16;
699  c->rv40_weight_pixels_tab[1][1] = rv40_weight_func_nornd_8;
700 
701  c->rv40_weak_loop_filter[0] = rv40_h_weak_loop_filter;
702  c->rv40_weak_loop_filter[1] = rv40_v_weak_loop_filter;
703  c->rv40_strong_loop_filter[0] = rv40_h_strong_loop_filter;
704  c->rv40_strong_loop_filter[1] = rv40_v_strong_loop_filter;
705  c->rv40_loop_filter_strength[0] = rv40_h_loop_filter_strength;
706  c->rv40_loop_filter_strength[1] = rv40_v_loop_filter_strength;
707 
708 #if ARCH_AARCH64
710 #elif ARCH_ARM
712 #elif ARCH_RISCV
714 #elif ARCH_X86
716 #endif
717 }
rv40_weak_loop_filter
static av_always_inline void rv40_weak_loop_filter(uint8_t *src, const int step, const ptrdiff_t stride, const int filter_p1, const int filter_q1, const int alpha, const int beta, const int lim_p0q0, const int lim_q1, const int lim_p1)
weaker deblocking very similar to the one described in 4.4.2 of JVT-A003r1
Definition: rv40dsp.c:429
q1
static const uint8_t q1[256]
Definition: twofish.c:100
av_clip
#define av_clip
Definition: common.h:100
u
#define u(width, name, range_min, range_max)
Definition: cbs_h2645.c:251
rv40_v_strong_loop_filter
static void rv40_v_strong_loop_filter(uint8_t *src, const ptrdiff_t stride, const int alpha, const int lims, const int dmode, const int chroma)
Definition: rv40dsp.c:562
step
trying all byte sequences megabyte in length and selecting the best looking sequence will yield cases to try But a word about which is also called distortion Distortion can be quantified by almost any quality measurement one chooses the sum of squared differences is used but more complex methods that consider psychovisual effects can be used as well It makes no difference in this discussion First step
Definition: rate_distortion.txt:58
op_put
#define op_put(a, b)
Definition: rv40dsp.c:375
RV40_MC
#define RV40_MC(OPNAME, SIZE)
Definition: rv40dsp.c:109
chroma
static av_always_inline void chroma(WaveformContext *s, AVFrame *in, AVFrame *out, int component, int intensity, int offset_y, int offset_x, int column, int mirror, int jobnr, int nb_jobs)
Definition: vf_waveform.c:1639
pixels.h
ff_rv34dsp_init
av_cold void ff_rv34dsp_init(RV34DSPContext *c)
Definition: rv34dsp.c:131
rv40_h_weak_loop_filter
static void rv40_h_weak_loop_filter(uint8_t *src, const ptrdiff_t stride, const int filter_p1, const int filter_q1, const int alpha, const int beta, const int lim_p0q0, const int lim_q1, const int lim_p1)
Definition: rv40dsp.c:477
ff_crop_tab
#define ff_crop_tab
Definition: motionpixels_tablegen.c:26
H264QpelContext::avg_h264_qpel_pixels_tab
qpel_mc_func avg_h264_qpel_pixels_tab[4][16]
Definition: h264qpel.h:29
CLIP_SYMM
#define CLIP_SYMM(a, b)
Definition: rv40dsp.c:425
h264qpel.h
rv40_loop_filter_strength
static av_always_inline int rv40_loop_filter_strength(uint8_t *src, int step, ptrdiff_t stride, int beta, int beta2, int edge, int *p1, int *q1)
Definition: rv40dsp.c:569
ff_rv40dsp_init_riscv
av_cold void ff_rv40dsp_init_riscv(RV34DSPContext *c)
Definition: rv40dsp_init.c:38
PIXOP2
#define PIXOP2(OPNAME, OP)
Definition: rv40dsp.c:209
ff_h264qpel_init
av_cold void ff_h264qpel_init(H264QpelContext *c, int bit_depth)
Definition: h264qpel.c:49
avassert.h
rv40_v_weak_loop_filter
static void rv40_v_weak_loop_filter(uint8_t *src, const ptrdiff_t stride, const int filter_p1, const int filter_q1, const int alpha, const int beta, const int lim_p0q0, const int lim_q1, const int lim_p1)
Definition: rv40dsp.c:487
av_cold
#define av_cold
Definition: attributes.h:90
intreadwrite.h
rv40_dither_r
static const uint8_t rv40_dither_r[16]
dither values for deblocking filter - right/bottom values
Definition: rv40dsp.c:420
rv40_dither_l
static const uint8_t rv40_dither_l[16]
dither values for deblocking filter - left/top values
Definition: rv40dsp.c:412
rv40_strong_loop_filter
static av_always_inline void rv40_strong_loop_filter(uint8_t *src, const int step, const ptrdiff_t stride, const int alpha, const int lims, const int dmode, const int chroma)
Definition: rv40dsp.c:497
q0
static const uint8_t q0[256]
Definition: twofish.c:81
FFABS
#define FFABS(a)
Absolute value, Note, INT_MIN / INT64_MIN result in undefined behavior as they are not representable ...
Definition: common.h:74
mathops.h
put_rv40_qpel16_mc33_c
static void put_rv40_qpel16_mc33_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: rv40dsp.c:269
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
RV34DSPContext
Definition: rv34dsp.h:57
put_rv40_qpel8_mc33_c
static void put_rv40_qpel8_mc33_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: rv40dsp.c:277
rv34dsp.h
avg
#define avg(a, b, c, d)
Definition: colorspacedsp_template.c:28
diff
static av_always_inline int diff(const struct color_info *a, const struct color_info *b, const int trans_thresh)
Definition: vf_paletteuse.c:164
RV40_LOWPASS
#define RV40_LOWPASS(OPNAME, OP)
Definition: rv40dsp.c:36
rv40_v_loop_filter_strength
static int rv40_v_loop_filter_strength(uint8_t *src, ptrdiff_t stride, int beta, int beta2, int edge, int *p1, int *q1)
Definition: rv40dsp.c:612
op_avg
#define op_avg(a, b)
Definition: rv40dsp.c:374
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:256
RV40_WEIGHT_FUNC
#define RV40_WEIGHT_FUNC(size)
Definition: rv40dsp.c:380
common.h
av_always_inline
#define av_always_inline
Definition: attributes.h:49
ff_rv40dsp_init_aarch64
av_cold void ff_rv40dsp_init_aarch64(RV34DSPContext *c)
Definition: rv40dsp_init_aarch64.c:38
H264QpelContext
Definition: h264qpel.h:27
stride
#define stride
Definition: h264pred_template.c:537
rnd_avg.h
avg_rv40_qpel16_mc33_c
static void avg_rv40_qpel16_mc33_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: rv40dsp.c:273
avg_rv40_qpel8_mc33_c
static void avg_rv40_qpel8_mc33_c(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)
Definition: rv40dsp.c:281
H264QpelContext::put_h264_qpel_pixels_tab
qpel_mc_func put_h264_qpel_pixels_tab[4][16]
Definition: h264qpel.h:28
cm
#define cm
Definition: dvbsubdec.c:40
ff_rv40dsp_init
av_cold void ff_rv40dsp_init(RV34DSPContext *c)
Definition: rv40dsp.c:619
RV40_CHROMA_MC
#define RV40_CHROMA_MC(OPNAME, OP)
Definition: rv40dsp.c:293
rv40_bias
static const int rv40_bias[4][4]
Definition: rv40dsp.c:286
alpha
static const int16_t alpha[]
Definition: ilbcdata.h:55
src
INIT_CLIP pixel * src
Definition: h264pred_template.c:418
ff_rv40dsp_init_x86
void ff_rv40dsp_init_x86(RV34DSPContext *c)
Definition: rv40dsp_init.c:188
ff_rv40dsp_init_arm
av_cold void ff_rv40dsp_init_arm(RV34DSPContext *c)
Definition: rv40dsp_init_arm.c:144
rv40_h_loop_filter_strength
static int rv40_h_loop_filter_strength(uint8_t *src, ptrdiff_t stride, int beta, int beta2, int edge, int *p1, int *q1)
Definition: rv40dsp.c:605
MAX_NEG_CROP
#define MAX_NEG_CROP
Definition: mathops.h:31
rv40_h_strong_loop_filter
static void rv40_h_strong_loop_filter(uint8_t *src, const ptrdiff_t stride, const int alpha, const int lims, const int dmode, const int chroma)
Definition: rv40dsp.c:555