[FFmpeg-trac] #3451(swscale:new): sws_scale crashes in high resolutions when using mmx optimization

FFmpeg trac at avcodec.org
Thu Mar 13 10:09:14 CET 2014


#3451: sws_scale crashes in high resolutions when using mmx optimization
-------------------------------------+-----------------------------------
             Reporter:  miro82       |                    Owner:
                 Type:  defect       |                   Status:  new
             Priority:  normal       |                Component:  swscale
              Version:  unspecified  |               Resolution:
             Keywords:  crash        |               Blocked By:
             Blocking:               |  Reproduced by developer:  0
Analyzed by developer:  0            |
-------------------------------------+-----------------------------------

Comment (by miro82):

 I discovered that --enable-shared must be set in order to trigger the
 crash. This time FFmpeg was configured and build with:

 {{{
 ./configure --disable-yasm --disable-iconv --enable-libx265 --enable-
 libx264 --enable-gpl --enable-shared --disable-stripping
 }}}

 Debug output from the application:

 {{{
 Miroslavs-MacBook-Pro:bin miran46$ lldb mmx_test2
 Current executable set to 'mmx_test2' (x86_64).
 (lldb) r
 Process 29388 launched:
 '/Users/miran46/code/projects/FFMpegCapture/mmx_test2_build/bin/mmx_test2'
 (x86_64)
 x265 [info]: using cpu capabilities: MMX2 SSE2Fast SSSE3 SSE4.2 AVX AVX2
 FMA3 LZCNT BMI2
 x265 [info]: Main profile, Level-5.1 (High tier)
 x265 [info]: WPP streams / pool / frames         : 34 / 4 / 1
 x265 [info]: CU size                             : 64
 x265 [info]: Max RQT depth inter / intra         : 1 / 1
 x265 [info]: ME / range / subpel / merge         : hex / 57 / 2 / 2
 x265 [info]: Keyframe min / max / scenecut       : 25 / 250 / 40
 x265 [info]: Lookahead / bframes / badapt        : 20 / 4 / 2
 x265 [info]: b-pyramid / weightp / refs          : 1 / 1 / 3
 x265 [info]: Rate Control / AQ-Strength / CUTree : ABR-104857 kbps / 1.0 /
 1
 x265 [info]: tools: rect amp rd=3 lft sao-lcu sign-hide
 Process 29388 stopped
 * thread #1: tid = 0x14f790, 0x000000010112e8b8
 libswscale.2.dylib`rgb24toyv12_mmxext(src=0x000000010c396300,
 ydst=0x000000010a000000, udst=0x000000010a7e9000, vdst=0x000000010abdd080,
 width=3840, height=2160, lumStride=<unavailable>,
 chromStride=<unavailable>, srcStride=<unavailable>,
 rgb2yuv=0x0000000109805340) + 440 at rgb2rgb_template.c:1629, queue =
 'com.apple.main-thread', stop reason = EXC_BAD_ACCESS (code=1,
 address=0x10c399000)
     frame #0: 0x000000010112e8b8
 libswscale.2.dylib`rgb24toyv12_mmxext(src=0x000000010c396300,
 ydst=0x000000010a000000, udst=0x000000010a7e9000, vdst=0x000000010abdd080,
 width=3840, height=2160, lumStride=<unavailable>,
 chromStride=<unavailable>, srcStride=<unavailable>,
 rgb2yuv=0x0000000109805340) + 440 at rgb2rgb_template.c:1629
    1626     for (y=0; y<height-2; y+=2) {
    1627         int i;
    1628         for (i=0; i<2; i++) {
 -> 1629             __asm__ volatile(
    1630                 "mov                        %2, %%"REG_a"   \n\t"
    1631                 "movq          "BGR2Y_IDX"(%3), %%mm6       \n\t"
    1632                 "movq       "MANGLE(ff_w1111)", %%mm5       \n\t"
 (lldb) bt
 * thread #1: tid = 0x14f790, 0x000000010112e8b8
 libswscale.2.dylib`rgb24toyv12_mmxext(src=0x000000010c396300,
 ydst=0x000000010a000000, udst=0x000000010a7e9000, vdst=0x000000010abdd080,
 width=3840, height=2160, lumStride=<unavailable>,
 chromStride=<unavailable>, srcStride=<unavailable>,
 rgb2yuv=0x0000000109805340) + 440 at rgb2rgb_template.c:1629, queue =
 'com.apple.main-thread', stop reason = EXC_BAD_ACCESS (code=1,
 address=0x10c399000)
   * frame #0: 0x000000010112e8b8
 libswscale.2.dylib`rgb24toyv12_mmxext(src=0x000000010c396300,
 ydst=0x000000010a000000, udst=0x000000010a7e9000, vdst=0x000000010abdd080,
 width=3840, height=2160, lumStride=<unavailable>,
 chromStride=<unavailable>, srcStride=<unavailable>,
 rgb2yuv=0x0000000109805340) + 440 at rgb2rgb_template.c:1629
     frame #1: 0x0000000101116b49
 libswscale.2.dylib`bgr24ToYv12Wrapper(c=0x0000000109801200,
 src=<unavailable>, srcStride=<unavailable>, srcSliceY=0, srcSliceH=2160,
 dst=0x00007fff5fbffa00, dstStride=0x00007fff5fbff9e0) + 137 at
 swscale_unscaled.c:1314
     frame #2: 0x00000001011136b7
 libswscale.2.dylib`sws_scale(c=<unavailable>, srcSlice=<unavailable>,
 srcStride=<unavailable>, srcSliceY=<unavailable>, srcSliceH=<unavailable>,
 dst=<unavailable>, dstStride=<unavailable>) + 2919 at swscale.c:1101
     frame #3: 0x0000000100004894 mmx_test2`Encoder::addFrame(unsigned
 char*) + 212
     frame #4: 0x0000000100003769 mmx_test2`main + 345
 (lldb) disassemble --pc
 libswscale.2.dylib`rgb24toyv12_mmxext + 440 at rgb2rgb_template.c:1629:
 -> 0x10112e8b8:  movd   0x15(%rbx,%rdx), %mm3
    0x10112e8bd:  punpcklbw %mm7, %mm2
    0x10112e8c0:  punpcklbw %mm7, %mm3
    0x10112e8c3:  pmaddwd %mm6, %mm4
 (lldb) info all-registers
 error: 'info' is not a valid command.
 (lldb) register read --all
 General Purpose Registers:
        rax = 0xfffffffffffffff8
        rbx = 0x000000010c399000
        rcx = 0x0000000000000000
        rdx = 0xffffffffffffffe8
        rdi = 0x000000000000086c
        rsi = 0x0000000000000f00
        rbp = 0x0000000000000780
        rsp = 0x00007fff5fbff8b0
         r8 = 0x0000000000000f00
         r9 = 0x0000000000002d00
        r10 = 0x0000000000000000
        r11 = 0x000000010a000f00
        r12 = 0x0000000109805340
        r13 = 0x000000010a7e9000
        r14 = 0xfffffffffffff100
        r15 = 0x000000010c396300
        rip = 0x000000010112e8b8  libswscale.2.dylib`rgb24toyv12_mmxext +
 440 at rgb2rgb_template.c:1629
     rflags = 0x0000000000010282
         cs = 0x000000000000002b
         fs = 0x00000000ffff0000
         gs = 0x00000000ffff0000
        eax = 0xfffffff8
        ebx = 0x0c399000
        ecx = 0x00000000
        edx = 0xffffffe8
        edi = 0x0000086c
        esi = 0x00000f00
        ebp = 0x00000780
        esp = 0x5fbff8b0
        r8d = 0x00000f00
        r9d = 0x00002d00
       r10d = 0x00000000
       r11d = 0x0a000f00
       r12d = 0x09805340
       r13d = 0x0a7e9000
       r14d = 0xfffff100
       r15d = 0x0c396300
         ax = 0xfff8
         bx = 0x9000
         cx = 0x0000
         dx = 0xffe8
         di = 0x086c
         si = 0x0f00
         bp = 0x0780
         sp = 0xf8b0
        r8w = 0x0f00
        r9w = 0x2d00
       r10w = 0x0000
       r11w = 0x0f00
       r12w = 0x5340
       r13w = 0x9000
       r14w = 0xf100
       r15w = 0x6300
         ah = 0xff
         bh = 0x90
         ch = 0x00
         dh = 0xff
         al = 0xf8
         bl = 0x00
         cl = 0x00
         dl = 0xe8
        dil = 0x6c
        sil = 0x00
        bpl = 0x80
        spl = 0xb0
        r8l = 0x00
        r9l = 0x00
       r10l = 0x00
       r11l = 0x00
       r12l = 0x40
       r13l = 0x00
       r14l = 0x00
       r15l = 0x00

 Floating Point Registers:
      fctrl = 0x037f
      fstat = 0x0000
       ftag = 0xff
        fop = 0x0000
      fioff = 0x00000000
      fiseg = 0x0000
      fooff = 0x00000000
      foseg = 0x0000
      mxcsr = 0x00001fa0
   mxcsrmask = 0x0000ffff
      stmm0 = {0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0xff 0xff}
      stmm1 = {0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0xff 0xff}
      stmm2 = {0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0xff 0xff}
      stmm3 = {0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0xff 0xff}
      stmm4 = {0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0xff 0xff}
      stmm5 = {0x01 0x00 0x01 0x00 0x01 0x00 0x01 0x00 0xff 0xff}
      stmm6 = {0x88 0x0c 0x87 0x40 0xde 0x20 0x00 0x00 0xff 0xff}
      stmm7 = {0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0xff 0xff}
       ymm0 = {0x00 0x0f 0x00 0x00 0x80 0x07 0x00 0x00 0x80 0x07 0x00 0x00
 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
 0x00 0x00 0x00 0x00 0x00}
       ymm1 = {0x00 0x2d 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
 0x00 0x00 0x00 0x00 0x00}
       ymm2 = {0x00 0x00 0x00 0x00 0x00 0x00 0xe0 0x43 0x00 0x00 0x00 0x00
 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
 0x00 0x00 0x00 0x00 0x00}
       ymm3 = {0x66 0xa9 0x49 0x15 0x00 0x00 0x00 0x10 0xdf 0x00 0x00 0x00
 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
 0x00 0x00 0x00 0x00 0x00}
       ymm4 = {0x6b 0xae 0x54 0x16 0x00 0x00 0x00 0x10 0x30 0x01 0x00 0x00
 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
 0x00 0x00 0x00 0x00 0x00}
       ymm5 = {0x00 0x00 0x00 0x00 0x00 0x00 0xf0 0x3f 0x00 0x00 0x00 0x00
 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
 0x00 0x00 0x00 0x00 0x00}
       ymm6 = {0xae 0x72 0x46 0xe8 0x8f 0x1d 0xe4 0x3f 0x00 0x00 0x00 0x00
 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
 0x00 0x00 0x00 0x00 0x00}
       ymm7 = {0x6b 0xc8 0xb8 0xbe 0xd3 0xb9 0x0b 0x40 0x00 0x00 0x00 0x00
 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
 0x00 0x00 0x00 0x00 0x00}
       ymm8 = {0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
 0x00 0x00 0x00 0x00 0x00}
       ymm9 = {0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
 0x00 0x00 0x00 0x00 0x00}
      ymm10 = {0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
 0x00 0x00 0x00 0x00 0x00}
      ymm11 = {0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
 0x00 0x00 0x00 0x00 0x00}
      ymm12 = {0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
 0x00 0x00 0x00 0x00 0x00}
      ymm13 = {0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
 0x00 0x00 0x00 0x00 0x00}
      ymm14 = {0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
 0x00 0x00 0x00 0x00 0x00}
      ymm15 = {0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
 0x00 0x00 0x00 0x00 0x00}
       xmm0 = {0x00 0x0f 0x00 0x00 0x80 0x07 0x00 0x00 0x80 0x07 0x00 0x00
 0x00 0x00 0x00 0x00}
       xmm1 = {0x00 0x2d 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
 0x00 0x00 0x00 0x00}
       xmm2 = {0x00 0x00 0x00 0x00 0x00 0x00 0xe0 0x43 0x00 0x00 0x00 0x00
 0x00 0x00 0x00 0x00}
       xmm3 = {0x66 0xa9 0x49 0x15 0x00 0x00 0x00 0x10 0xdf 0x00 0x00 0x00
 0x00 0x00 0x00 0x00}
       xmm4 = {0x6b 0xae 0x54 0x16 0x00 0x00 0x00 0x10 0x30 0x01 0x00 0x00
 0x00 0x00 0x00 0x00}
       xmm5 = {0x00 0x00 0x00 0x00 0x00 0x00 0xf0 0x3f 0x00 0x00 0x00 0x00
 0x00 0x00 0x00 0x00}
       xmm6 = {0xae 0x72 0x46 0xe8 0x8f 0x1d 0xe4 0x3f 0x00 0x00 0x00 0x00
 0x00 0x00 0x00 0x00}
       xmm7 = {0x6b 0xc8 0xb8 0xbe 0xd3 0xb9 0x0b 0x40 0x00 0x00 0x00 0x00
 0x00 0x00 0x00 0x00}
       xmm8 = {0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
 0x00 0x00 0x00 0x00}
       xmm9 = {0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
 0x00 0x00 0x00 0x00}
      xmm10 = {0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
 0x00 0x00 0x00 0x00}
      xmm11 = {0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
 0x00 0x00 0x00 0x00}
      xmm12 = {0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
 0x00 0x00 0x00 0x00}
      xmm13 = {0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
 0x00 0x00 0x00 0x00}
      xmm14 = {0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
 0x00 0x00 0x00 0x00}
      xmm15 = {0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00 0x00
 0x00 0x00 0x00 0x00}

 Exception State Registers:
     trapno = 0x0000000e
        err = 0x00000004
   faultvaddr = 0x000000010c399000

 (lldb)
 }}}

 If I simplify my code and remove all FFmpeg encoding and file-writing code
 then the application doesn't crash. The code below works while the
 attached code don't.


 {{{
 extern "C"
 {
 #ifndef __STDC_CONSTANT_MACROS
 #define __STDC_CONSTANT_MACROS
 #endif
 #include <libavcodec/avcodec.h>
 #include <libavutil/imgutils.h>
 #include <libavformat/avformat.h>
 #include <libswscale/swscale.h>
 }

 #include <new>

 int main()
 {
     fprintf(stderr, "Running mmx test\n");

     av_register_all();

     int mWidth = 3840;
     int mHeight = 2160;
     int ret;

     uint8_t * pixels = new (std::nothrow) uint8_t[mWidth * mHeight * 3];
     //fill buffer with zeros
     memset(pixels, 0, mWidth * mHeight * 3);

     SwsContext * sContext = NULL;
     //create context for frame convertion
         sContext = sws_getContext(mWidth, mHeight, AV_PIX_FMT_BGR24,
                               mWidth, mHeight, AV_PIX_FMT_YUV420P,
                               SWS_FAST_BILINEAR, NULL, NULL, NULL);
         if (!sContext)
         {
                 fprintf(stderr, "Could not allocate frame convertion
 context!\n");
                 return EXIT_FAILURE;
         }

     //allocate frames
     AVFrame * tmpFrame = NULL;
     AVFrame * outFrame = NULL;

     outFrame = av_frame_alloc();
         if (!outFrame)
         {
                 fprintf(stderr, "Could not create output frame\n");
         return false;
         }

     outFrame->format = PIX_FMT_YUV420P;
         outFrame->width = mWidth;
         outFrame->height = mHeight;

         ret = av_image_alloc(outFrame->data, outFrame->linesize, mWidth,
 mHeight, PIX_FMT_YUV420P, 32);
         if (ret < 0)
         {
                 fprintf(stderr, "Could not allocate output frame\n");
         return false;
         }

     tmpFrame = av_frame_alloc();
         if (!tmpFrame)
         {
                 fprintf(stderr, "Could not create swap frame\n");
                 return false;
         }

     tmpFrame->width = mWidth;
     tmpFrame->height = mHeight;
     tmpFrame->format = PIX_FMT_BGR24;

     //fill with random values
     //for(unsigned int i=0; i<mWidth * mHeight * 3;i++)
     //    pixels[i]= static_cast<uint8_t>( rand()%256 );

     //convert
     // Fill picture with image
     avpicture_fill((AVPicture*)tmpFrame, pixels, PIX_FMT_BGR24, mWidth,
 mHeight);

     // Flipping frame
     tmpFrame->data[0] += tmpFrame->linesize[0]*(mHeight-1);
     // Flipping frame
     tmpFrame->linesize[0] = -tmpFrame->linesize[0];

     fprintf(stderr, "Converting to YUV420\n");

     //convert BGR24 to YUV420
     ret = sws_scale(sContext, tmpFrame->data, tmpFrame->linesize, 0,
 mHeight, outFrame->data, outFrame->linesize);

     if (ret < 0)
     {
         fprintf(stderr, "Failed to convert frame to YUV420!\n");
     }

     //Cleanup
     if (tmpFrame)
         {
                 av_frame_free(&tmpFrame);
         }

     if (outFrame)
         {
                 av_freep(&outFrame->data[0]);
                 av_frame_free(&outFrame);
         }

     if (sContext)
                 sws_freeContext(sContext);


     delete [] pixels;

     // Exit program
         exit( EXIT_SUCCESS );
 }
 }}}

--
Ticket URL: <https://trac.ffmpeg.org/ticket/3451#comment:5>
FFmpeg <https://ffmpeg.org>
FFmpeg issue tracker


More information about the FFmpeg-trac mailing list