[FFmpeg-devel] [PATCH] sws/aarch64/yuv2rgb: honor iOS calling convention

Clément Bœsch u at pkh.me
Fri Apr 8 13:41:01 CEST 2016


From: Clément Bœsch <clement at stupeflix.com>

y_offset and y_coeff being successive 32-bit integers, they are packed
into 8 bytes instead of 2x8 bytes.

See https://developer.apple.com/library/ios/documentation/Xcode/Conceptual/iPhoneOSABIReference/Articles/ARM64FunctionCallingConventions.html

> iOS diverges from Procedure Call Standard for the ARM 64-bit
> Architecture in several ways
[...]
> In the generic procedure call standard, all function arguments passed
> on the stack consume slots in multiples of 8 bytes. In iOS, this
> requirement is dropped, and values consume only the space required.
[...]
> Padding is still inserted on the stack to satisfy arguments’ alignment
> requirements.
---
 libswscale/aarch64/yuv2rgb_neon.S | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/libswscale/aarch64/yuv2rgb_neon.S b/libswscale/aarch64/yuv2rgb_neon.S
index 8cefe22..b7446aa 100644
--- a/libswscale/aarch64/yuv2rgb_neon.S
+++ b/libswscale/aarch64/yuv2rgb_neon.S
@@ -21,10 +21,18 @@
 
 #include "libavutil/aarch64/asm.S"
 
+.macro load_yoff_ycoeff yoff ycoeff
+#if defined(__APPLE__)
+    ldp                 w9, w10, [sp, #\yoff]
+#else
+    ldr                 w9,  [sp, #\yoff]
+    ldr                 w10, [sp, #\ycoeff]
+#endif
+.endm
+
 .macro load_args_nv12
     ldr                 x8,  [sp]                                       // table
-    ldr                 w9,  [sp, #8]                                   // y_offset
-    ldr                 w10, [sp, #16]                                  // y_coeff
+    load_yoff_ycoeff    8, 16                                           // y_offset, y_coeff
     ld1                 {v1.1D}, [x8]
     dup                 v0.8H, w10
     dup                 v3.8H, w9
@@ -42,8 +50,7 @@
     ldr                 x13, [sp]                                       // srcV
     ldr                 w14, [sp, #8]                                   // linesizeV
     ldr                 x8,  [sp, #16]                                  // table
-    ldr                 w9,  [sp, #24]                                  // y_offset
-    ldr                 w10, [sp, #32]                                  // y_coeff
+    load_yoff_ycoeff    24, 32                                          // y_offset, y_coeff
     ld1                 {v1.1D}, [x8]
     dup                 v0.8H, w10
     dup                 v3.8H, w9
@@ -59,8 +66,7 @@
     ldr                 x13, [sp]                                       // srcV
     ldr                 w14, [sp, #8]                                   // linesizeV
     ldr                 x8,  [sp, #16]                                  // table
-    ldr                 w9,  [sp, #24]                                  // y_offset
-    ldr                 w10, [sp, #32]                                  // y_coeff
+    load_yoff_ycoeff    24, 32                                          // y_offset, y_coeff
     ld1                 {v1.1D}, [x8]
     dup                 v0.8H, w10
     dup                 v3.8H, w9
-- 
2.8.0



More information about the ffmpeg-devel mailing list