[Ffmpeg-devel] Segfault when encoding MPEG with MMX support

Tobias Grimm listaccount
Tue Jul 12 01:04:41 CEST 2005


Michael Niedermayer schrieb:

> or split it before "mov $-128, %%"REG_a"

Ok - I'm not sure, I fully understood gcc's extended asm. I tried it
this way:

asm volatile(
    "movd %%"REG_a", %%mm3		\n\t" // last_non_zero_p1
    SPREADW(%%mm3)
    "pxor %%mm7, %%mm7			\n\t" // 0
    "pxor %%mm4, %%mm4			\n\t" // 0
    "movq (%0), %%mm5			\n\t" // qmat[0]
    "pxor %%mm6, %%mm6			\n\t"
    "psubw (%1), %%mm6			\n\t" // -bias[0]
    :: "r" (qmat), "r" (bias)
);
asm volatile(
    "mov $-128, %%"REG_a"		\n\t"
    ".balign 16				\n\t"
    "1:					\n\t"
    "pxor %%mm1, %%mm1			\n\t" // 0
    "movq (%1, %%"REG_a"), %%mm0	\n\t" // block[i]
    "pcmpgtw %%mm0, %%mm1		\n\t" // block[i] <= 0 ? 0xFF : 0x00
    "pxor %%mm1, %%mm0			\n\t"
    "psubw %%mm1, %%mm0			\n\t" // ABS(block[i])
    "psubusw %%mm6, %%mm0		\n\t" // ABS(block[i]) + bias[0]
    "pmulhw %%mm5, %%mm0		\n\t" // (ABS(block[i])*qmat[0] -
bias[0]*qmat[0])>>16
    "por %%mm0, %%mm4			\n\t"
    "pxor %%mm1, %%mm0			\n\t"
    "psubw %%mm1, %%mm0			\n\t" // out=((ABS(block[i])*qmat[0] -
bias[0]*qmat[0])>>16)*sign(block[i])
    "movq %%mm0, (%3, %%"REG_a")	\n\t"
    "pcmpeqw %%mm7, %%mm0		\n\t" // out==0 ? 0xFF : 0x00
    "movq (%2, %%"REG_a"), %%mm1	\n\t"
    "movq %%mm7, (%1, %%"REG_a")	\n\t" // 0
    "pandn %%mm1, %%mm0			\n\t"
    PMAXW(%%mm0, %%mm3)
    "add $8, %%"REG_a"			\n\t"
    " js 1b				\n\t"
    "movq %%mm3, %%mm0			\n\t"
    "psrlq $32, %%mm3			\n\t"
    PMAXW(%%mm0, %%mm3)
    "movq %%mm3, %%mm0			\n\t"
    "psrlq $16, %%mm3			\n\t"
    PMAXW(%%mm0, %%mm3)
    "movd %%mm3, %%"REG_a"		\n\t"
    "movzb %%al, %%"REG_a"		\n\t" // last_non_zero_p1
    : "+a" (last_non_zero_p1)
    : "r" (block+64),
      "r" (inv_zigzag_direct16+64), "r" (temp_block+64)
);

Is this correct?

There's another asm block with the same problem:

asm volatile(
    "movd %%"REG_a", %%mm3		\n\t" // last_non_zero_p1
    SPREADW(%%mm3)
    "pxor %%mm7, %%mm7			\n\t" // 0
    "pxor %%mm4, %%mm4			\n\t" // 0
    "mov $-128, %%"REG_a"		\n\t"
    ".balign 16				\n\t"
    "1:					\n\t"
    "pxor %%mm1, %%mm1			\n\t" // 0
    "movq (%1, %%"REG_a"), %%mm0	\n\t" // block[i]
    "pcmpgtw %%mm0, %%mm1		\n\t" // block[i] <= 0 ? 0xFF : 0x00
    "pxor %%mm1, %%mm0			\n\t"
    "psubw %%mm1, %%mm0			\n\t" // ABS(block[i])
    "movq (%3, %%"REG_a"), %%mm6	\n\t" // bias[0]
    "paddusw %%mm6, %%mm0		\n\t" // ABS(block[i]) + bias[0]
    "movq (%2, %%"REG_a"), %%mm5		\n\t" // qmat[i]
    "pmulhw %%mm5, %%mm0		\n\t" // (ABS(block[i])*qmat[0] +
bias[0]*qmat[0])>>16
    "por %%mm0, %%mm4			\n\t"
    "pxor %%mm1, %%mm0			\n\t"
    "psubw %%mm1, %%mm0			\n\t" // out=((ABS(block[i])*qmat[0] -
bias[0]*qmat[0])>>16)*sign(block[i])
    "movq %%mm0, (%5, %%"REG_a")	\n\t"
    "pcmpeqw %%mm7, %%mm0		\n\t" // out==0 ? 0xFF : 0x00
    "movq (%4, %%"REG_a"), %%mm1		\n\t"
    "movq %%mm7, (%1, %%"REG_a")		\n\t" // 0
    "pandn %%mm1, %%mm0			\n\t"
    PMAXW(%%mm0, %%mm3)
    "add $8, %%"REG_a"			\n\t"
    " js 1b				\n\t"
    "movq %%mm3, %%mm0			\n\t"
    "psrlq $32, %%mm3			\n\t"
    PMAXW(%%mm0, %%mm3)
    "movq %%mm3, %%mm0			\n\t"
    "psrlq $16, %%mm3			\n\t"
    PMAXW(%%mm0, %%mm3)
    "movd %%mm3, %%"REG_a"		\n\t"
    "movzb %%al, %%"REG_a"		\n\t" // last_non_zero_p1
    : "+a" (last_non_zero_p1)
    : "r" (block+64), "r" (qmat+64), "r" (bias+64),
    "r" (inv_zigzag_direct16+64), "r" (temp_block+64)
);

I couldn't find a way to split this, without breaking the code. Maybe a
temporary variable needs to be introduced?

Tobias





More information about the ffmpeg-devel mailing list