[FFmpeg-devel] FATE & Regressions (and PPC is broken)

Michael Niedermayer michaelni
Sat Mar 15 02:24:41 CET 2008


On Sat, Mar 15, 2008 at 12:36:36AM +0000, M?ns Rullg?rd wrote:
> Mike Melanson <mike at multimedia.cx> writes:
> 
> > Hi,
> >
> > The following regression test is broken on PowerPC (gcc 4.1.2):
> >
> > diff -u -w
> > "/home/melanson/ffmpeg/ffmpeg-main"/tests/ffmpeg.regression.ref
> > tests/data/vsynth.regression
> > --- /home/melanson/ffmpeg/ffmpeg-main/tests/ffmpeg.regression.ref
> > 2008-03-08 19:10:07.000000000 -0800
> > +++ tests/data/vsynth.regression        2008-03-12 14:27:53.000000000 -0700
> > @@ -197,7 +197,7 @@
> >  353368 ./tests/data/a-flac.flac
> >  c4228df189aad9567a037727d0e763e4 *./tests/data/flac.vsynth.out.wav
> >  stddev: 33.31 PSNR:65.87 bytes:1040384
> > -a1e56bb034c2773438ba1c830c4cea07 *./tests/data/a-wmav1.asf
> > +59575b4756d674cd8c8d53d86c08e8cb *./tests/data/a-wmav1.asf
> >  106004 ./tests/data/a-wmav1.asf
> >  stddev:12251.50 PSNR:14.56 bytes:1056768
> >  stddev:2106.00 PSNR:29.85 bytes:1048576
> > make: *** [codectest] Error 1
> 
> OK, I tracked this one down.  It turns out lrint() rounds slightly
> differently for whatever reason (bug or otherwise).  Attached patch
> changes it to lround() (which has well-defined rounding behaviour).
> With this change, the results are the same on x86_64 and ppc64.
> 
> This raises the broader question of lrint() usage in FFmpeg.  The
> documentation for the lrint() family says:
> 
>   These functions shall round their argument to the nearest integer
>   value, rounding according to the current rounding direction.
> 
> Clearly, in a library we do not know the current rounding direction,
> and it would be rude of us to change it.  The lround() family, on the
> other hand, always rounds the same way:
> 
>   These functions shall round their argument to the nearest integer
>   value, rounding halfway cases away from zero, regardless of the
>   current rounding direction.

We have to use lrint* because its faster see:

Dump of assembler code for function lrint:
<lrint+0>:	fldl   0x4(%esp)
<lrint+4>:	sub    $0x4,%esp
<lrint+7>:	fistpl (%esp)
<lrint+10>:	fwait
<lrint+11>:	pop    %eax
<lrint+12>:	ret    

Dump of assembler code for function lround:
<lround+0>:	push   %ebp
<lround+1>:	mov    %esp,%ebp
<lround+3>:	push   %edi
<lround+4>:	push   %esi
<lround+5>:	sub    $0x28,%esp
<lround+8>:	fldl   0x8(%ebp)
<lround+11>:	fstl   0xffffffd8(%ebp)
<lround+14>:	fstpl  0xffffffe0(%ebp)
<lround+17>:	mov    0xffffffe4(%ebp),%eax
<lround+20>:	mov    0xffffffe0(%ebp),%edx
<lround+23>:	mov    %eax,%edi
<lround+25>:	mov    %edx,0xffffffec(%ebp)
<lround+28>:	mov    %eax,%edx
<lround+30>:	and    $0xfffff,%eax
<lround+35>:	shr    $0x14,%edx
<lround+38>:	and    $0x7ff,%edx
<lround+44>:	sar    $0x1f,%edi
<lround+47>:	lea    0xfffffc01(%edx),%esi
<lround+53>:	or     $0x1,%edi
<lround+56>:	mov    %eax,0xffffffd4(%ebp)
<lround+59>:	orl    $0x100000,0xffffffd4(%ebp)
<lround+66>:	cmp    $0x13,%esi
<lround+69>:	jg     <lround+112>
<lround+71>:	test   %esi,%esi
<lround+73>:	js     <lround+211>
<lround+79>:	mov    %esi,%ecx
<lround+81>:	mov    $0x80000,%eax
<lround+86>:	sar    %cl,%eax
<lround+88>:	mov    $0x14,%ecx
<lround+93>:	add    0xffffffd4(%ebp),%eax
<lround+96>:	sub    %esi,%ecx
<lround+98>:	shr    %cl,%eax
<lround+100>:	imul   %edi,%eax
<lround+103>:	add    $0x28,%esp
<lround+106>:	pop    %esi
<lround+107>:	pop    %edi
<lround+108>:	pop    %ebp
<lround+109>:	ret    
<lround+110>:	data16
<lround+111>:	nop    
<lround+112>:	cmp    $0x1e,%esi
<lround+115>:	jg     <lround+176>
<lround+117>:	sub    $0x413,%edx
<lround+123>:	mov    $0x80000000,%eax
<lround+128>:	mov    %edx,%ecx
<lround+130>:	shr    %cl,%eax
<lround+132>:	mov    0xffffffec(%ebp),%ecx
<lround+135>:	mov    %edx,0xffffffe8(%ebp)
<lround+138>:	lea    (%eax,%ecx,1),%edx
<lround+141>:	cmp    %ecx,%edx
<lround+143>:	adcl   $0x0,0xffffffd4(%ebp)
<lround+147>:	cmp    $0x14,%esi
<lround+150>:	mov    0xffffffd4(%ebp),%eax
<lround+153>:	je     <lround+100>
<lround+155>:	movzbl 0xffffffe8(%ebp),%ecx
<lround+159>:	shl    %cl,%eax
<lround+161>:	mov    $0x34,%ecx
<lround+166>:	sub    %esi,%ecx
<lround+168>:	shr    %cl,%edx
<lround+170>:	or     %edx,%eax
<lround+172>:	jmp    <lround+100>
<lround+174>:	data16
<lround+175>:	nop    
<lround+176>:	fnstcw 0xfffffff6(%ebp)
<lround+179>:	fldl   0xffffffd8(%ebp)
<lround+182>:	movzwl 0xfffffff6(%ebp),%eax
<lround+186>:	mov    $0xc,%ah
<lround+188>:	mov    %ax,0xfffffff4(%ebp)
<lround+192>:	fldcw  0xfffffff4(%ebp)
<lround+195>:	fistpl 0xfffffff0(%ebp)
<lround+198>:	fldcw  0xfffffff6(%ebp)
<lround+201>:	mov    0xfffffff0(%ebp),%eax
<lround+204>:	add    $0x28,%esp
<lround+207>:	pop    %esi
<lround+208>:	pop    %edi
<lround+209>:	pop    %ebp
<lround+210>:	ret    
<lround+211>:	xor    %eax,%eax
<lround+213>:	cmp    $0xffffffff,%esi
<lround+216>:	setne  %al
<lround+219>:	add    $0x28,%esp
<lround+222>:	sub    $0x1,%eax
<lround+225>:	and    %edi,%eax
<lround+227>:	pop    %esi
<lround+228>:	pop    %edi
<lround+229>:	pop    %ebp
<lround+230>:	ret    
-------------

so the problem has to be solved differently, maybe a
if(flags & CODEC_FLAG_BITEXACT)
    for()
        lround()
else
    for()
        lrint()

[...]

-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

Good people do not need laws to tell them to act responsibly, while bad
people will find a way around the laws. -- Plato
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 189 bytes
Desc: Digital signature
URL: <http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/attachments/20080315/d1f0cc73/attachment.pgp>



More information about the ffmpeg-devel mailing list