FFmpeg
swscale.c
Go to the documentation of this file.
1 /*
2  * This file is part of FFmpeg.
3  *
4  * FFmpeg is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * FFmpeg is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with FFmpeg; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17  */
18 
19 #include "config.h"
20 #include "libavutil/attributes.h"
21 #include "libswscale/swscale.h"
23 #include "libavutil/aarch64/cpu.h"
24 
25 void ff_hscale16to15_4_neon_asm(int shift, int16_t *_dst, int dstW,
26  const uint8_t *_src, const int16_t *filter,
27  const int32_t *filterPos, int filterSize);
28 void ff_hscale16to15_X8_neon_asm(int shift, int16_t *_dst, int dstW,
29  const uint8_t *_src, const int16_t *filter,
30  const int32_t *filterPos, int filterSize);
31 void ff_hscale16to15_X4_neon_asm(int shift, int16_t *_dst, int dstW,
32  const uint8_t *_src, const int16_t *filter,
33  const int32_t *filterPos, int filterSize);
34 void ff_hscale16to19_4_neon_asm(int shift, int16_t *_dst, int dstW,
35  const uint8_t *_src, const int16_t *filter,
36  const int32_t *filterPos, int filterSize);
37 void ff_hscale16to19_X8_neon_asm(int shift, int16_t *_dst, int dstW,
38  const uint8_t *_src, const int16_t *filter,
39  const int32_t *filterPos, int filterSize);
40 void ff_hscale16to19_X4_neon_asm(int shift, int16_t *_dst, int dstW,
41  const uint8_t *_src, const int16_t *filter,
42  const int32_t *filterPos, int filterSize);
43 
44 static void ff_hscale16to15_4_neon(SwsContext *c, int16_t *_dst, int dstW,
45  const uint8_t *_src, const int16_t *filter,
46  const int32_t *filterPos, int filterSize)
47 {
48  const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->srcFormat);
49  int sh = desc->comp[0].depth - 1;
50 
51  if (sh<15) {
52  sh = isAnyRGB(c->srcFormat) || c->srcFormat==AV_PIX_FMT_PAL8 ? 13 : (desc->comp[0].depth - 1);
53  } else if (desc->flags & AV_PIX_FMT_FLAG_FLOAT) { /* float input are process like uint 16bpc */
54  sh = 16 - 1;
55  }
56  ff_hscale16to15_4_neon_asm(sh, _dst, dstW, _src, filter, filterPos, filterSize);
57 
58 }
59 
60 static void ff_hscale16to15_X8_neon(SwsContext *c, int16_t *_dst, int dstW,
61  const uint8_t *_src, const int16_t *filter,
62  const int32_t *filterPos, int filterSize)
63 {
64  const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->srcFormat);
65  int sh = desc->comp[0].depth - 1;
66 
67  if (sh<15) {
68  sh = isAnyRGB(c->srcFormat) || c->srcFormat==AV_PIX_FMT_PAL8 ? 13 : (desc->comp[0].depth - 1);
69  } else if (desc->flags & AV_PIX_FMT_FLAG_FLOAT) { /* float input are process like uint 16bpc */
70  sh = 16 - 1;
71  }
72  ff_hscale16to15_X8_neon_asm(sh, _dst, dstW, _src, filter, filterPos, filterSize);
73 
74 }
75 
76 static void ff_hscale16to15_X4_neon(SwsContext *c, int16_t *_dst, int dstW,
77  const uint8_t *_src, const int16_t *filter,
78  const int32_t *filterPos, int filterSize)
79 {
80  const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->srcFormat);
81  int sh = desc->comp[0].depth - 1;
82 
83  if (sh<15) {
84  sh = isAnyRGB(c->srcFormat) || c->srcFormat==AV_PIX_FMT_PAL8 ? 13 : (desc->comp[0].depth - 1);
85  } else if (desc->flags & AV_PIX_FMT_FLAG_FLOAT) { /* float input are process like uint 16bpc */
86  sh = 16 - 1;
87  }
88  ff_hscale16to15_X4_neon_asm(sh, _dst, dstW, _src, filter, filterPos, filterSize);
89 }
90 
91 static void ff_hscale16to19_4_neon(SwsContext *c, int16_t *_dst, int dstW,
92  const uint8_t *_src, const int16_t *filter,
93  const int32_t *filterPos, int filterSize)
94 {
95  const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->srcFormat);
96  int bits = desc->comp[0].depth - 1;
97  int sh = bits - 4;
98 
99  if ((isAnyRGB(c->srcFormat) || c->srcFormat==AV_PIX_FMT_PAL8) && desc->comp[0].depth<16) {
100  sh = 9;
101  } else if (desc->flags & AV_PIX_FMT_FLAG_FLOAT) { /* float input are process like uint 16bpc */
102  sh = 16 - 1 - 4;
103  }
104 
105  ff_hscale16to19_4_neon_asm(sh, _dst, dstW, _src, filter, filterPos, filterSize);
106 
107 }
108 
109 static void ff_hscale16to19_X8_neon(SwsContext *c, int16_t *_dst, int dstW,
110  const uint8_t *_src, const int16_t *filter,
111  const int32_t *filterPos, int filterSize)
112 {
113  const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->srcFormat);
114  int bits = desc->comp[0].depth - 1;
115  int sh = bits - 4;
116 
117  if ((isAnyRGB(c->srcFormat) || c->srcFormat==AV_PIX_FMT_PAL8) && desc->comp[0].depth<16) {
118  sh = 9;
119  } else if (desc->flags & AV_PIX_FMT_FLAG_FLOAT) { /* float input are process like uint 16bpc */
120  sh = 16 - 1 - 4;
121  }
122 
123  ff_hscale16to19_X8_neon_asm(sh, _dst, dstW, _src, filter, filterPos, filterSize);
124 
125 }
126 
127 static void ff_hscale16to19_X4_neon(SwsContext *c, int16_t *_dst, int dstW,
128  const uint8_t *_src, const int16_t *filter,
129  const int32_t *filterPos, int filterSize)
130 {
131  const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->srcFormat);
132  int bits = desc->comp[0].depth - 1;
133  int sh = bits - 4;
134 
135  if ((isAnyRGB(c->srcFormat) || c->srcFormat==AV_PIX_FMT_PAL8) && desc->comp[0].depth<16) {
136  sh = 9;
137  } else if (desc->flags & AV_PIX_FMT_FLAG_FLOAT) { /* float input are process like uint 16bpc */
138  sh = 16 - 1 - 4;
139  }
140 
141  ff_hscale16to19_X4_neon_asm(sh, _dst, dstW, _src, filter, filterPos, filterSize);
142 
143 }
144 
145 #define SCALE_FUNC(filter_n, from_bpc, to_bpc, opt) \
146 void ff_hscale ## from_bpc ## to ## to_bpc ## _ ## filter_n ## _ ## opt( \
147  SwsContext *c, int16_t *data, \
148  int dstW, const uint8_t *src, \
149  const int16_t *filter, \
150  const int32_t *filterPos, int filterSize)
151 #define SCALE_FUNCS(filter_n, opt) \
152  SCALE_FUNC(filter_n, 8, 15, opt); \
153  SCALE_FUNC(filter_n, 8, 19, opt);
154 #define ALL_SCALE_FUNCS(opt) \
155  SCALE_FUNCS(4, opt); \
156  SCALE_FUNCS(X8, opt); \
157  SCALE_FUNCS(X4, opt)
158 
159 ALL_SCALE_FUNCS(neon);
160 
161 void ff_yuv2planeX_8_neon(const int16_t *filter, int filterSize,
162  const int16_t **src, uint8_t *dest, int dstW,
163  const uint8_t *dither, int offset);
165  const int16_t *src,
166  uint8_t *dest,
167  int dstW,
168  const uint8_t *dither,
169  int offset);
170 
171 #define ASSIGN_SCALE_FUNC2(hscalefn, filtersize, opt) do { \
172  if (c->srcBpc == 8) { \
173  if(c->dstBpc <= 14) { \
174  hscalefn = \
175  ff_hscale8to15_ ## filtersize ## _ ## opt; \
176  } else \
177  hscalefn = \
178  ff_hscale8to19_ ## filtersize ## _ ## opt; \
179  } else { \
180  if (c->dstBpc <= 14) \
181  hscalefn = \
182  ff_hscale16to15_ ## filtersize ## _ ## opt; \
183  else \
184  hscalefn = \
185  ff_hscale16to19_ ## filtersize ## _ ## opt; \
186  } \
187 } while (0)
188 
189 #define ASSIGN_SCALE_FUNC(hscalefn, filtersize, opt) do { \
190  if (filtersize == 4) \
191  ASSIGN_SCALE_FUNC2(hscalefn, 4, opt); \
192  else if (filtersize % 8 == 0) \
193  ASSIGN_SCALE_FUNC2(hscalefn, X8, opt); \
194  else if (filtersize % 4 == 0 && filtersize % 8 != 0) \
195  ASSIGN_SCALE_FUNC2(hscalefn, X4, opt); \
196 } while (0)
197 
198 #define ASSIGN_VSCALE_FUNC(vscalefn, opt) \
199  switch (c->dstBpc) { \
200  case 8: vscalefn = ff_yuv2plane1_8_ ## opt; break; \
201  default: break; \
202  }
203 
204 #define NEON_INPUT(name) \
205 void ff_##name##ToY_neon(uint8_t *dst, const uint8_t *src, const uint8_t *, \
206  const uint8_t *, int w, uint32_t *coeffs, void *); \
207 void ff_##name##ToUV_neon(uint8_t *, uint8_t *, const uint8_t *, \
208  const uint8_t *, const uint8_t *, int w, \
209  uint32_t *coeffs, void *); \
210 void ff_##name##ToUV_half_neon(uint8_t *, uint8_t *, const uint8_t *, \
211  const uint8_t *, const uint8_t *, int w, \
212  uint32_t *coeffs, void *)
213 
214 NEON_INPUT(abgr32);
215 NEON_INPUT(argb32);
216 NEON_INPUT(bgr24);
217 NEON_INPUT(bgra32);
218 NEON_INPUT(rgb24);
219 NEON_INPUT(rgba32);
220 
221 void ff_lumRangeFromJpeg_neon(int16_t *dst, int width);
222 void ff_chrRangeFromJpeg_neon(int16_t *dstU, int16_t *dstV, int width);
223 void ff_lumRangeToJpeg_neon(int16_t *dst, int width);
224 void ff_chrRangeToJpeg_neon(int16_t *dstU, int16_t *dstV, int width);
225 
227 {
228  if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) {
229  if (c->dstBpc <= 14) {
230  if (c->srcRange) {
231  c->lumConvertRange = ff_lumRangeFromJpeg_neon;
232  c->chrConvertRange = ff_chrRangeFromJpeg_neon;
233  } else {
234  c->lumConvertRange = ff_lumRangeToJpeg_neon;
235  c->chrConvertRange = ff_chrRangeToJpeg_neon;
236  }
237  }
238  }
239 }
240 
242 {
243  int cpu_flags = av_get_cpu_flags();
244 
245  if (have_neon(cpu_flags)) {
246  ASSIGN_SCALE_FUNC(c->hyScale, c->hLumFilterSize, neon);
247  ASSIGN_SCALE_FUNC(c->hcScale, c->hChrFilterSize, neon);
248  ASSIGN_VSCALE_FUNC(c->yuv2plane1, neon);
249  if (c->dstBpc == 8) {
250  c->yuv2planeX = ff_yuv2planeX_8_neon;
251  }
252  switch (c->srcFormat) {
253  case AV_PIX_FMT_ABGR:
254  c->lumToYV12 = ff_abgr32ToY_neon;
255  if (c->chrSrcHSubSample)
256  c->chrToYV12 = ff_abgr32ToUV_half_neon;
257  else
258  c->chrToYV12 = ff_abgr32ToUV_neon;
259  break;
260 
261  case AV_PIX_FMT_ARGB:
262  c->lumToYV12 = ff_argb32ToY_neon;
263  if (c->chrSrcHSubSample)
264  c->chrToYV12 = ff_argb32ToUV_half_neon;
265  else
266  c->chrToYV12 = ff_argb32ToUV_neon;
267  break;
268  case AV_PIX_FMT_BGR24:
269  c->lumToYV12 = ff_bgr24ToY_neon;
270  if (c->chrSrcHSubSample)
271  c->chrToYV12 = ff_bgr24ToUV_half_neon;
272  else
273  c->chrToYV12 = ff_bgr24ToUV_neon;
274  break;
275  case AV_PIX_FMT_BGRA:
276  c->lumToYV12 = ff_bgra32ToY_neon;
277  if (c->chrSrcHSubSample)
278  c->chrToYV12 = ff_bgra32ToUV_half_neon;
279  else
280  c->chrToYV12 = ff_bgra32ToUV_neon;
281  break;
282  case AV_PIX_FMT_RGB24:
283  c->lumToYV12 = ff_rgb24ToY_neon;
284  if (c->chrSrcHSubSample)
285  c->chrToYV12 = ff_rgb24ToUV_half_neon;
286  else
287  c->chrToYV12 = ff_rgb24ToUV_neon;
288  break;
289  case AV_PIX_FMT_RGBA:
290  c->lumToYV12 = ff_rgba32ToY_neon;
291  if (c->chrSrcHSubSample)
292  c->chrToYV12 = ff_rgba32ToUV_half_neon;
293  else
294  c->chrToYV12 = ff_rgba32ToUV_neon;
295  break;
296  default:
297  break;
298  }
300  }
301 }
ff_lumRangeFromJpeg_neon
void ff_lumRangeFromJpeg_neon(int16_t *dst, int width)
ff_hscale16to19_X4_neon_asm
void ff_hscale16to19_X4_neon_asm(int shift, int16_t *_dst, int dstW, const uint8_t *_src, const int16_t *filter, const int32_t *filterPos, int filterSize)
av_pix_fmt_desc_get
const AVPixFmtDescriptor * av_pix_fmt_desc_get(enum AVPixelFormat pix_fmt)
Definition: pixdesc.c:2965
AV_PIX_FMT_FLAG_FLOAT
#define AV_PIX_FMT_FLAG_FLOAT
The pixel format contains IEEE-754 floating point values.
Definition: pixdesc.h:158
ff_yuv2planeX_8_neon
void ff_yuv2planeX_8_neon(const int16_t *filter, int filterSize, const int16_t **src, uint8_t *dest, int dstW, const uint8_t *dither, int offset)
ff_yuv2plane1_8_neon
void ff_yuv2plane1_8_neon(const int16_t *src, uint8_t *dest, int dstW, const uint8_t *dither, int offset)
filter
void(* filter)(uint8_t *src, int stride, int qscale)
Definition: h263dsp.c:29
ff_hscale16to15_X4_neon
static void ff_hscale16to15_X4_neon(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_src, const int16_t *filter, const int32_t *filterPos, int filterSize)
Definition: swscale.c:76
AV_PIX_FMT_BGR24
@ AV_PIX_FMT_BGR24
packed RGB 8:8:8, 24bpp, BGRBGR...
Definition: pixfmt.h:76
AV_PIX_FMT_BGRA
@ AV_PIX_FMT_BGRA
packed BGRA 8:8:8:8, 32bpp, BGRABGRA...
Definition: pixfmt.h:102
av_get_cpu_flags
int av_get_cpu_flags(void)
Return the flags which specify extensions supported by the CPU.
Definition: cpu.c:103
cpu_flags
static atomic_int cpu_flags
Definition: cpu.c:52
ff_hscale16to15_X4_neon_asm
void ff_hscale16to15_X4_neon_asm(int shift, int16_t *_dst, int dstW, const uint8_t *_src, const int16_t *filter, const int32_t *filterPos, int filterSize)
ff_sws_init_swscale_aarch64
av_cold void ff_sws_init_swscale_aarch64(SwsContext *c)
Definition: swscale.c:241
ff_hscale16to15_4_neon
static void ff_hscale16to15_4_neon(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_src, const int16_t *filter, const int32_t *filterPos, int filterSize)
Definition: swscale.c:44
NEON_INPUT
#define NEON_INPUT(name)
Definition: swscale.c:204
ff_hscale16to19_X4_neon
static void ff_hscale16to19_X4_neon(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_src, const int16_t *filter, const int32_t *filterPos, int filterSize)
Definition: swscale.c:127
ff_hscale16to15_X8_neon_asm
void ff_hscale16to15_X8_neon_asm(int shift, int16_t *_dst, int dstW, const uint8_t *_src, const int16_t *filter, const int32_t *filterPos, int filterSize)
av_cold
#define av_cold
Definition: attributes.h:90
width
#define width
ff_hscale16to19_X8_neon
static void ff_hscale16to19_X8_neon(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_src, const int16_t *filter, const int32_t *filterPos, int filterSize)
Definition: swscale.c:109
bits
uint8_t bits
Definition: vp3data.h:128
ff_chrRangeToJpeg_neon
void ff_chrRangeToJpeg_neon(int16_t *dstU, int16_t *dstV, int width)
AV_PIX_FMT_RGBA
@ AV_PIX_FMT_RGBA
packed RGBA 8:8:8:8, 32bpp, RGBARGBA...
Definition: pixfmt.h:100
ff_hscale16to19_X8_neon_asm
void ff_hscale16to19_X8_neon_asm(int shift, int16_t *_dst, int dstW, const uint8_t *_src, const int16_t *filter, const int32_t *filterPos, int filterSize)
ff_sws_init_range_convert_aarch64
av_cold void ff_sws_init_range_convert_aarch64(SwsContext *c)
Definition: swscale.c:226
AV_PIX_FMT_ABGR
@ AV_PIX_FMT_ABGR
packed ABGR 8:8:8:8, 32bpp, ABGRABGR...
Definition: pixfmt.h:101
c
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
ff_chrRangeFromJpeg_neon
void ff_chrRangeFromJpeg_neon(int16_t *dstU, int16_t *dstV, int width)
ff_hscale16to19_4_neon
static void ff_hscale16to19_4_neon(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_src, const int16_t *filter, const int32_t *filterPos, int filterSize)
Definition: swscale.c:91
AV_PIX_FMT_RGB24
@ AV_PIX_FMT_RGB24
packed RGB 8:8:8, 24bpp, RGBRGB...
Definition: pixfmt.h:75
shift
static int shift(int a, int b)
Definition: bonk.c:261
isAnyRGB
static av_always_inline int isAnyRGB(enum AVPixelFormat pix_fmt)
Definition: swscale_internal.h:835
have_neon
#define have_neon(flags)
Definition: cpu.h:26
offset
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
Definition: writing_filters.txt:86
attributes.h
ff_hscale16to19_4_neon_asm
void ff_hscale16to19_4_neon_asm(int shift, int16_t *_dst, int dstW, const uint8_t *_src, const int16_t *filter, const int32_t *filterPos, int filterSize)
AV_PIX_FMT_ARGB
@ AV_PIX_FMT_ARGB
packed ARGB 8:8:8:8, 32bpp, ARGBARGB...
Definition: pixfmt.h:99
swscale_internal.h
ASSIGN_VSCALE_FUNC
#define ASSIGN_VSCALE_FUNC(vscalefn, opt)
Definition: swscale.c:198
AV_PIX_FMT_PAL8
@ AV_PIX_FMT_PAL8
8 bits with AV_PIX_FMT_RGB32 palette
Definition: pixfmt.h:84
ASSIGN_SCALE_FUNC
#define ASSIGN_SCALE_FUNC(hscalefn, filtersize, opt)
Definition: swscale.c:189
desc
const char * desc
Definition: libsvtav1.c:79
ff_hscale16to15_4_neon_asm
void ff_hscale16to15_4_neon_asm(int shift, int16_t *_dst, int dstW, const uint8_t *_src, const int16_t *filter, const int32_t *filterPos, int filterSize)
AVPixFmtDescriptor
Descriptor that unambiguously describes how the bits of a pixel are stored in the up to 4 data planes...
Definition: pixdesc.h:69
src
INIT_CLIP pixel * src
Definition: h264pred_template.c:418
ff_hscale16to15_X8_neon
static void ff_hscale16to15_X8_neon(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_src, const int16_t *filter, const int32_t *filterPos, int filterSize)
Definition: swscale.c:60
int32_t
int32_t
Definition: audioconvert.c:56
cpu.h
ALL_SCALE_FUNCS
#define ALL_SCALE_FUNCS(opt)
Definition: swscale.c:154
SwsContext
Definition: swscale_internal.h:299
ff_lumRangeToJpeg_neon
void ff_lumRangeToJpeg_neon(int16_t *dst, int width)
swscale.h
dither
static const uint8_t dither[8][8]
Definition: vf_fspp.c:62