23 #define _SVID_SOURCE // needed for MAP_ANONYMOUS
24 #define _DARWIN_C_SOURCE // needed for MAP_ANON
31 #if defined(MAP_ANON) && !defined(MAP_ANONYMOUS)
32 #define MAP_ANONYMOUS MAP_ANON
36 #define WIN32_LEAN_AND_MEAN
63 return FFMPEG_CONFIGURATION;
68 #define LICENSE_PREFIX "libswscale license: "
72 #define RET 0xC3 // near return opcode for x86
229 #if FF_API_SWS_FORMAT_NAME
236 return "Unknown format";
244 return ((d * dist + c) * dist + b) * dist +
a;
247 b + 2.0 * c + 3.0 * d,
249 -b - 3.0 * c - 6.0 * d,
254 int *outFilterSize,
int xInc,
int srcW,
255 int dstW,
int filterAlign,
int one,
265 int64_t *filter2 = NULL;
266 const int64_t fone = 1LL << (54 -
FFMIN(
av_log2(srcW/dstW), 8));
274 if (
FFABS(xInc - 0x10000) < 10) {
278 dstW *
sizeof(*filter) * filterSize, fail);
280 for (i = 0; i < dstW; i++) {
281 filter[i * filterSize] = fone;
289 dstW *
sizeof(*filter) * filterSize, fail);
291 xDstInSrc = xInc / 2 - 0x8000;
292 for (i = 0; i < dstW; i++) {
293 int xx = (xDstInSrc - ((filterSize - 1) << 15) + (1 << 15)) >> 16;
295 (*filterPos)[i] = xx;
299 }
else if ((xInc <= (1 << 16) && (flags &
SWS_AREA)) ||
305 dstW *
sizeof(*filter) * filterSize, fail);
307 xDstInSrc = xInc / 2 - 0x8000;
308 for (i = 0; i < dstW; i++) {
309 int xx = (xDstInSrc - ((filterSize - 1) << 15) + (1 << 15)) >> 16;
312 (*filterPos)[i] = xx;
314 for (j = 0; j < filterSize; j++) {
315 int64_t
coeff= fone -
FFABS(((int64_t)xx<<16) - xDstInSrc)*(fone>>16);
318 filter[i * filterSize + j] =
coeff;
329 else if (flags &
SWS_X)
348 filterSize = 1 + sizeFactor;
350 filterSize = 1 + (sizeFactor * srcW + dstW - 1) / dstW;
352 filterSize =
FFMIN(filterSize, srcW - 2);
353 filterSize =
FFMAX(filterSize, 1);
356 dstW *
sizeof(*filter) * filterSize, fail);
358 xDstInSrc = xInc - 0x10000;
359 for (i = 0; i < dstW; i++) {
360 int xx = (xDstInSrc - ((filterSize - 2) << 16)) / (1 << 17);
362 (*filterPos)[i] = xx;
363 for (j = 0; j < filterSize; j++) {
364 int64_t d = (
FFABS(((int64_t)xx << 17) - xDstInSrc)) << 13;
370 floatd = d * (1.0 / (1 << 30));
372 if (flags & SWS_BICUBIC) {
376 if (d >= 1LL << 31) {
379 int64_t dd = (d * d) >> 30;
380 int64_t ddd = (dd * d) >> 30;
383 coeff = (12 * (1 << 24) - 9 * B - 6 * C) * ddd +
384 (-18 * (1 << 24) + 12 * B + 6 * C) * dd +
385 (6 * (1 << 24) - 2 * B) * (1 << 30);
387 coeff = (-B - 6 * C) * ddd +
388 (6 * B + 30 * C) * dd +
389 (-12 * B - 48 * C) * d +
390 (8 * B + 24 * C) * (1 << 30);
392 coeff /= (1LL<<54)/fone;
395 else if (flags & SWS_X) {
396 double p = param ? param * 0.01 : 0.3;
397 coeff = d ? sin(d *
M_PI) / (d *
M_PI) : 1.0;
398 coeff *= pow(2.0, -p * d * d);
401 else if (flags & SWS_X) {
406 c = cos(floatd *
M_PI);
413 coeff = (c * 0.5 + 0.5) * fone;
414 }
else if (flags & SWS_AREA) {
415 int64_t d2 = d - (1 << 29);
416 if (d2 * xInc < -(1LL << (29 + 16)))
417 coeff = 1.0 * (1LL << (30 + 16));
418 else if (d2 * xInc < (1LL << (29 + 16)))
419 coeff = -d2 * xInc + (1LL << (29 + 16));
422 coeff *= fone >> (30 + 16);
423 }
else if (flags & SWS_GAUSS) {
425 coeff = (pow(2.0, -p * floatd * floatd)) * fone;
426 }
else if (flags & SWS_SINC) {
427 coeff = (d ? sin(floatd *
M_PI) / (floatd *
M_PI) : 1.0) * fone;
428 }
else if (flags & SWS_LANCZOS) {
430 coeff = (d ? sin(floatd *
M_PI) * sin(floatd * M_PI / p) /
431 (floatd * floatd * M_PI * M_PI / p) : 1.0) * fone;
434 }
else if (flags & SWS_BILINEAR) {
435 coeff = (1 << 30) - d;
439 }
else if (flags & SWS_SPLINE) {
440 double p = -2.196152422706632;
446 filter[i * filterSize + j] =
coeff;
449 xDstInSrc += 2 * xInc;
457 filter2Size = filterSize;
459 filter2Size += srcFilter->
length - 1;
461 filter2Size += dstFilter->
length - 1;
465 for (i = 0; i < dstW; i++) {
469 for (k = 0; k < srcFilter->
length; k++) {
470 for (j = 0; j < filterSize; j++)
471 filter2[i * filter2Size + k + j] +=
472 srcFilter->
coeff[k] * filter[i * filterSize + j];
475 for (j = 0; j < filterSize; j++)
476 filter2[i * filter2Size + j] = filter[i * filterSize + j];
480 (*filterPos)[i] += (filterSize - 1) / 2 - (filter2Size - 1) / 2;
487 for (i = dstW - 1; i >= 0; i--) {
488 int min = filter2Size;
490 int64_t cutOff = 0.0;
493 for (j = 0; j < filter2Size; j++) {
495 cutOff +=
FFABS(filter2[i * filter2Size]);
502 if (i < dstW - 1 && (*filterPos)[i] >= (*filterPos)[i + 1])
506 for (k = 1; k < filter2Size; k++)
507 filter2[i * filter2Size + k - 1] = filter2[i * filter2Size + k];
508 filter2[i * filter2Size + k - 1] = 0;
514 for (j = filter2Size - 1; j > 0; j--) {
515 cutOff +=
FFABS(filter2[i * filter2Size + j]);
522 if (min > minFilterSize)
528 if (minFilterSize < 5)
534 if (minFilterSize < 3)
540 if (minFilterSize == 1 && filterAlign == 2)
545 filterSize = (minFilterSize + (filterAlign - 1)) & (~(filterAlign - 1));
547 filter =
av_malloc(filterSize * dstW *
sizeof(*filter));
550 av_log(NULL,
AV_LOG_ERROR,
"sws: filterSize %d is too large, try less extreem scaling or increase MAX_FILTER_SIZE and recompile\n", filterSize);
553 *outFilterSize = filterSize;
557 "SwScaler: reducing / aligning filtersize %d -> %d\n",
558 filter2Size, filterSize);
560 for (i = 0; i < dstW; i++) {
563 for (j = 0; j < filterSize; j++) {
564 if (j >= filter2Size)
565 filter[i * filterSize + j] = 0;
567 filter[i * filterSize + j] = filter2[i * filter2Size + j];
569 filter[i * filterSize + j] = 0;
576 for (i = 0; i < dstW; i++) {
578 if ((*filterPos)[i] < 0) {
580 for (j = 1; j < filterSize; j++) {
581 int left =
FFMAX(j + (*filterPos)[i], 0);
582 filter[i * filterSize + left] += filter[i * filterSize + j];
583 filter[i * filterSize + j] = 0;
588 if ((*filterPos)[i] + filterSize > srcW) {
589 int shift = (*filterPos)[i] + filterSize - srcW;
591 for (j = filterSize - 2; j >= 0; j--) {
592 int right =
FFMIN(j + shift, filterSize - 1);
593 filter[i * filterSize + right] += filter[i * filterSize + j];
594 filter[i * filterSize + j] = 0;
596 (*filterPos)[i]= srcW - filterSize;
603 *outFilterSize * (dstW + 3) *
sizeof(int16_t), fail);
606 for (i = 0; i < dstW; i++) {
611 for (j = 0; j < filterSize; j++) {
612 sum += filter[i * filterSize + j];
614 sum = (sum + one / 2) / one;
615 for (j = 0; j < *outFilterSize; j++) {
616 int64_t
v = filter[i * filterSize + j] + error;
618 (*outFilter)[i * (*outFilterSize) + j] = intV;
619 error = v - intV * sum;
623 (*filterPos)[dstW + 0] =
624 (*filterPos)[dstW + 1] =
625 (*filterPos)[dstW + 2] = (*filterPos)[dstW - 1];
627 for (i = 0; i < *outFilterSize; i++) {
628 int k = (dstW - 1) * (*outFilterSize) + i;
629 (*outFilter)[k + 1 * (*outFilterSize)] =
630 (*outFilter)[k + 2 * (*outFilterSize)] =
631 (*outFilter)[k + 3 * (*outFilterSize)] = (*outFilter)[k];
644 #if HAVE_MMXEXT_INLINE
645 static av_cold int init_hscaler_mmxext(
int dstW,
int xInc,
uint8_t *filterCode,
674 "movq (%%"REG_d
", %%"REG_a
"), %%mm3 \n\t"
675 "movd (%%"REG_c
", %%"REG_S
"), %%mm0 \n\t"
676 "movd 1(%%"REG_c
", %%"REG_S
"), %%mm1 \n\t"
677 "punpcklbw %%mm7, %%mm1 \n\t"
678 "punpcklbw %%mm7, %%mm0 \n\t"
679 "pshufw $0xFF, %%mm1, %%mm1 \n\t"
681 "pshufw $0xFF, %%mm0, %%mm0 \n\t"
683 "psubw %%mm1, %%mm0 \n\t"
684 "movl 8(%%"REG_b
", %%"REG_a
"), %%esi \n\t"
685 "pmullw %%mm3, %%mm0 \n\t"
686 "psllw $7, %%mm1 \n\t"
687 "paddw %%mm1, %%mm0 \n\t"
689 "movq %%mm0, (%%"REG_D
", %%"REG_a
") \n\t"
691 "add $8, %%"REG_a
" \n\t"
706 :
"=r" (fragmentA),
"=r" (imm8OfPShufW1A),
"=r" (imm8OfPShufW2A),
707 "=r" (fragmentLengthA)
714 "movq (%%"REG_d
", %%"REG_a
"), %%mm3 \n\t"
715 "movd (%%"REG_c
", %%"REG_S
"), %%mm0 \n\t"
716 "punpcklbw %%mm7, %%mm0 \n\t"
717 "pshufw $0xFF, %%mm0, %%mm1 \n\t"
719 "pshufw $0xFF, %%mm0, %%mm0 \n\t"
721 "psubw %%mm1, %%mm0 \n\t"
722 "movl 8(%%"REG_b
", %%"REG_a
"), %%esi \n\t"
723 "pmullw %%mm3, %%mm0 \n\t"
724 "psllw $7, %%mm1 \n\t"
725 "paddw %%mm1, %%mm0 \n\t"
727 "movq %%mm0, (%%"REG_D
", %%"REG_a
") \n\t"
729 "add $8, %%"REG_a
" \n\t"
744 :
"=r" (fragmentB),
"=r" (imm8OfPShufW1B),
"=r" (imm8OfPShufW2B),
745 "=r" (fragmentLengthB)
751 for (i = 0; i < dstW / numSplits; i++) {
756 int b = ((xpos + xInc) >> 16) - xx;
757 int c = ((xpos + xInc * 2) >> 16) - xx;
758 int d = ((xpos + xInc * 3) >> 16) - xx;
759 int inc = (d + 1 < 4);
760 uint8_t *fragment = (d + 1 < 4) ? fragmentB : fragmentA;
761 x86_reg imm8OfPShufW1 = (d + 1 < 4) ? imm8OfPShufW1B : imm8OfPShufW1A;
762 x86_reg imm8OfPShufW2 = (d + 1 < 4) ? imm8OfPShufW2B : imm8OfPShufW2A;
763 x86_reg fragmentLength = (d + 1 < 4) ? fragmentLengthB : fragmentLengthA;
764 int maxShift = 3 - (d + inc);
768 filter[i] = ((xpos & 0xFFFF) ^ 0xFFFF) >> 9;
769 filter[i + 1] = (((xpos + xInc) & 0xFFFF) ^ 0xFFFF) >> 9;
770 filter[i + 2] = (((xpos + xInc * 2) & 0xFFFF) ^ 0xFFFF) >> 9;
771 filter[i + 3] = (((xpos + xInc * 3) & 0xFFFF) ^ 0xFFFF) >> 9;
772 filterPos[i / 2] = xx;
774 memcpy(filterCode + fragmentPos, fragment, fragmentLength);
776 filterCode[fragmentPos + imm8OfPShufW1] = (a + inc) |
780 filterCode[fragmentPos + imm8OfPShufW2] = a | (b << 2) |
784 if (i + 4 - inc >= dstW)
786 else if ((filterPos[i / 2] & 3) <= maxShift)
787 shift = filterPos[i / 2] & 3;
789 if (shift && i >= shift) {
790 filterCode[fragmentPos + imm8OfPShufW1] += 0x55 *
shift;
791 filterCode[fragmentPos + imm8OfPShufW2] += 0x55 *
shift;
792 filterPos[i / 2] -=
shift;
796 fragmentPos += fragmentLength;
799 filterCode[fragmentPos] =
RET;
804 filterPos[((i / 2) + 1) & (~1)] = xpos >> 16;
806 return fragmentPos + 1;
819 int64_t
W,
V, Z, Cy, Cu, Cv;
820 int64_t vr = table[0];
821 int64_t ub = table[1];
822 int64_t ug = -table[2];
823 int64_t vg = -table[3];
828 static const int8_t map[] = {
853 -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 ,
854 -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 ,
855 -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 ,
856 -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 ,
857 -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 ,
858 -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 ,
859 -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 ,
860 -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 ,
897 c->input_rgb2yuv_table[
BY_IDX] = ((int)(0.114 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
898 c->input_rgb2yuv_table[
BV_IDX] = (-(int)(0.081 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
899 c->input_rgb2yuv_table[
BU_IDX] = ((int)(0.500 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
900 c->input_rgb2yuv_table[
GY_IDX] = ((int)(0.587 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
901 c->input_rgb2yuv_table[
GV_IDX] = (-(int)(0.419 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
902 c->input_rgb2yuv_table[
GU_IDX] = (-(int)(0.331 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
903 c->input_rgb2yuv_table[
RY_IDX] = ((int)(0.299 * 219 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
904 c->input_rgb2yuv_table[
RV_IDX] = ((int)(0.500 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
905 c->input_rgb2yuv_table[
RU_IDX] = (-(int)(0.169 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5));
908 AV_WL16(p + 16*4 + 2*i, map[i] >= 0 ? c->input_rgb2yuv_table[map[i]] : 0);
916 static const int16_t xyz2rgb_matrix[3][4] = {
917 {13270, -6295, -2041},
919 { 228, -835, 4329} };
920 static int16_t xyzgamma_tab[4096], rgbgamma_tab[4096];
926 if (rgbgamma_tab[4095])
930 for (i = 0; i < 4096; i++) {
931 xyzgamma_tab[i] =
lrint(pow(i / 4095.0, xyzgamma) * 4095.0);
932 rgbgamma_tab[i] =
lrint(pow(i / 4095.0, rgbgamma) * 4095.0);
937 int srcRange,
const int table[4],
int dstRange,
938 int brightness,
int contrast,
int saturation)
966 contrast, saturation);
971 contrast, saturation);
980 int *srcRange,
int **
table,
int *dstRange,
981 int *brightness,
int *contrast,
int *saturation)
1058 int usesVFilter, usesHFilter;
1060 SwsFilter dummyFilter = { NULL, NULL, NULL, NULL };
1065 int dst_stride =
FFALIGN(dstW *
sizeof(int16_t) + 66, 16);
1078 unscaled = (srcW == dstW && srcH == dstH);
1083 av_log(c,
AV_LOG_WARNING,
"deprecated pixel format used, make sure you did set range correctly\n");
1119 if (!i || (i & (i - 1))) {
1124 if (srcW < 1 || srcH < 1 || dstW < 1 || dstH < 1) {
1128 srcW, srcH, dstW, dstH);
1133 dstFilter = &dummyFilter;
1135 srcFilter = &dummyFilter;
1137 c->
lumXInc = (((int64_t)srcW << 16) + (dstW >> 1)) / dstW;
1138 c->
lumYInc = (((int64_t)srcH << 16) + (dstH >> 1)) / dstH;
1141 c->
vRounder = 4 * 0x0001000100010001ULL;
1143 usesVFilter = (srcFilter->
lumV && srcFilter->
lumV->
length > 1) ||
1147 usesHFilter = (srcFilter->
lumH && srcFilter->
lumH->
length > 1) ||
1169 "Error diffusion dither is only supported in full chroma interpolation for destination format '%s'\n",
1176 "Ordered dither is not supported in full chroma interpolation for destination format '%s'\n",
1183 if (!(flags & SWS_FULL_CHR_H_INT)) {
1185 "%s output is not supported with half chroma resolution, switching to full\n",
1194 if (flags & SWS_FULL_CHR_H_INT &&
1209 "full chroma interpolation for destination format '%s' not yet implemented\n",
1211 flags &= ~SWS_FULL_CHR_H_INT;
1214 if (
isAnyRGB(dstFormat) && !(flags & SWS_FULL_CHR_H_INT))
1246 if (unscaled && !usesHFilter && !usesVFilter &&
1253 "using unscaled %s -> %s special converter\n",
1272 (srcW & 15) == 0) ? 1 : 0;
1278 "output width is not a multiple of 32 -> no MMXEXT scaler\n");
1295 if (flags & SWS_FAST_BILINEAR) {
1302 c->
lumXInc = ((int64_t)(srcW - 2) << 16) / (dstW - 2) - 20;
1307 #define USE_MMAP (HAVE_MMAP && HAVE_MPROTECT && defined MAP_ANONYMOUS)
1311 #if HAVE_MMXEXT_INLINE
1317 NULL, NULL, NULL, 4);
1321 PROT_READ | PROT_WRITE,
1322 MAP_PRIVATE | MAP_ANONYMOUS,
1325 PROT_READ | PROT_WRITE,
1326 MAP_PRIVATE | MAP_ANONYMOUS,
1328 #elif HAVE_VIRTUALALLOC
1332 PAGE_EXECUTE_READWRITE);
1336 PAGE_EXECUTE_READWRITE);
1342 #ifdef MAP_ANONYMOUS
1369 const int filterAlign =
1376 srcW, dstW, filterAlign, 1 << 14,
1378 cpu_flags, srcFilter->
lumH, dstFilter->
lumH,
1384 (flags & SWS_BICUBLIN) ? (flags |
SWS_BILINEAR) : flags,
1385 cpu_flags, srcFilter->
chrH, dstFilter->
chrH,
1393 const int filterAlign =
1399 c->
lumYInc, srcH, dstH, filterAlign, (1 << 12),
1406 filterAlign, (1 << 12),
1418 short *p = (
short *)&c->vYCoeffsBank[i];
1419 for (j = 0; j < 8; j++)
1425 short *p = (
short *)&c->vCCoeffsBank[i];
1426 for (j = 0; j < 8; j++)
1435 for (i = 0; i < dstH; i++) {
1436 int chrI = (int64_t)i * c->
chrDstH / dstH;
1451 for (i = 0; i < 4; i++)
1466 dst_stride + 16, fail);
1474 dst_stride * 2 + 32, fail);
1479 if (CONFIG_SWSCALE_ALPHA && c->
alpPixBuf)
1482 dst_stride + 16, fail);
1490 for(j=0; j<dst_stride/2+1; j++)
1493 for(j=0; j<dst_stride+1; j++)
1499 if (flags & SWS_FAST_BILINEAR)
1505 else if (flags &
SWS_X)
1549 "lum srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
1552 "chr srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
1563 #if FF_API_SWS_GETCONTEXT
1567 SwsFilter *dstFilter,
const double *param)
1589 c->
param[0] = param[0];
1590 c->
param[1] = param[1];
1606 float lumaSharpen,
float chromaSharpen,
1607 float chromaHShift,
float chromaVShift,
1614 if (lumaGBlur != 0.0) {
1622 if (chromaGBlur != 0.0) {
1630 if (chromaSharpen != 0.0) {
1639 if (lumaSharpen != 0.0) {
1648 if (chromaHShift != 0.0)
1651 if (chromaVShift != 0.0)
1671 if(length <= 0 || length > INT_MAX/
sizeof(
double))
1686 const int length = (int)(variance * quality + 0.5) | 1;
1688 double middle = (length - 1) * 0.5;
1691 if(variance < 0 || quality < 0)
1699 for (i = 0; i <
length; i++) {
1700 double dist = i - middle;
1701 vec->
coeff[i] = exp(-dist * dist / (2 * variance * variance)) /
1702 sqrt(2 * variance *
M_PI);
1718 for (i = 0; i <
length; i++)
1734 for (i = 0; i < a->
length; i++)
1744 for (i = 0; i < a->
length; i++)
1745 a->
coeff[i] *= scalar;
1762 for (i = 0; i < a->
length; i++) {
1763 for (j = 0; j < b->
length; j++) {
1780 for (i = 0; i < a->
length; i++)
1797 for (i = 0; i < a->
length; i++)
1815 for (i = 0; i < a->
length; i++) {
1816 vec->
coeff[i + (length - 1) / 2 -
1867 for (i = 0; i < a->
length; i++)
1880 for (i = 0; i < a->
length; i++)
1881 if (a->
coeff[i] > max)
1884 for (i = 0; i < a->
length; i++)
1885 if (a->
coeff[i] < min)
1890 for (i = 0; i < a->
length; i++) {
1891 int x = (int)((a->
coeff[i] - min) * 60.0 / range + 0.5);
1892 av_log(log_ctx, log_level,
"%1.3f ", a->
coeff[i]);
1894 av_log(log_ctx, log_level,
" ");
1895 av_log(log_ctx, log_level,
"|\n");
1943 if (CONFIG_SWSCALE_ALPHA && c->
alpPixBuf) {
1949 for (i = 0; i < 4; i++)
1972 #elif HAVE_VIRTUALALLOC
1997 const double *
param)
2003 param = default_param;
2006 (context->
srcW != srcW ||
2007 context->
srcH != srcH ||
2009 context->
dstW != dstW ||
2010 context->
dstH != dstH ||
2012 context->
flags != flags ||
2013 context->
param[0] != param[0] ||
2014 context->
param[1] != param[1])) {
2035 context->
param[0] = param[0];
2036 context->
param[1] = param[1];
2040 context->
dstRange, 0, 1 << 16, 1 << 16);