[FFmpeg-cvslog] swscale/x86/rgb2rgb: add optimized versions of the remaining shuffle_bytes functions

James Almer git at videolan.org
Sat Nov 2 20:05:59 EET 2024


ffmpeg | branch: master | James Almer <jamrial at gmail.com> | Tue Oct 29 16:30:51 2024 -0300| [78ba06928a69ecb464221f71dbdedabc9d714176] | committer: James Almer

swscale/x86/rgb2rgb: add optimized versions of the remaining shuffle_bytes functions

Signed-off-by: James Almer <jamrial at gmail.com>

> http://git.videolan.org/gitweb.cgi/ffmpeg.git/?a=commit;h=78ba06928a69ecb464221f71dbdedabc9d714176
---

 libswscale/x86/rgb2rgb.c     | 16 ++++++++++++++++
 libswscale/x86/rgb_2_rgb.asm | 12 ++++++++++++
 2 files changed, 28 insertions(+)

diff --git a/libswscale/x86/rgb2rgb.c b/libswscale/x86/rgb2rgb.c
index 456bbc7898..6790551a38 100644
--- a/libswscale/x86/rgb2rgb.c
+++ b/libswscale/x86/rgb2rgb.c
@@ -2348,6 +2348,10 @@ void ff_shuffle_bytes_0321_ssse3(const uint8_t *src, uint8_t *dst, int src_size)
 void ff_shuffle_bytes_1230_ssse3(const uint8_t *src, uint8_t *dst, int src_size);
 void ff_shuffle_bytes_3012_ssse3(const uint8_t *src, uint8_t *dst, int src_size);
 void ff_shuffle_bytes_3210_ssse3(const uint8_t *src, uint8_t *dst, int src_size);
+void ff_shuffle_bytes_3102_ssse3(const uint8_t *src, uint8_t *dst, int src_size);
+void ff_shuffle_bytes_2013_ssse3(const uint8_t *src, uint8_t *dst, int src_size);
+void ff_shuffle_bytes_2130_ssse3(const uint8_t *src, uint8_t *dst, int src_size);
+void ff_shuffle_bytes_1203_ssse3(const uint8_t *src, uint8_t *dst, int src_size);
 
 #if ARCH_X86_64
 void ff_shuffle_bytes_2103_avx2(const uint8_t *src, uint8_t *dst, int src_size);
@@ -2355,6 +2359,10 @@ void ff_shuffle_bytes_0321_avx2(const uint8_t *src, uint8_t *dst, int src_size);
 void ff_shuffle_bytes_1230_avx2(const uint8_t *src, uint8_t *dst, int src_size);
 void ff_shuffle_bytes_3012_avx2(const uint8_t *src, uint8_t *dst, int src_size);
 void ff_shuffle_bytes_3210_avx2(const uint8_t *src, uint8_t *dst, int src_size);
+void ff_shuffle_bytes_3102_avx2(const uint8_t *src, uint8_t *dst, int src_size);
+void ff_shuffle_bytes_2013_avx2(const uint8_t *src, uint8_t *dst, int src_size);
+void ff_shuffle_bytes_2130_avx2(const uint8_t *src, uint8_t *dst, int src_size);
+void ff_shuffle_bytes_1203_avx2(const uint8_t *src, uint8_t *dst, int src_size);
 
 void ff_uyvytoyuv422_sse2(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
                           const uint8_t *src, int width, int height,
@@ -2424,6 +2432,10 @@ av_cold void rgb2rgb_init_x86(void)
         shuffle_bytes_1230 = ff_shuffle_bytes_1230_ssse3;
         shuffle_bytes_3012 = ff_shuffle_bytes_3012_ssse3;
         shuffle_bytes_3210 = ff_shuffle_bytes_3210_ssse3;
+        shuffle_bytes_3102 = ff_shuffle_bytes_3102_ssse3;
+        shuffle_bytes_2013 = ff_shuffle_bytes_2013_ssse3;
+        shuffle_bytes_2130 = ff_shuffle_bytes_2130_ssse3;
+        shuffle_bytes_1203 = ff_shuffle_bytes_1203_ssse3;
     }
 #if HAVE_AVX_EXTERNAL
     if (EXTERNAL_AVX(cpu_flags)) {
@@ -2437,6 +2449,10 @@ av_cold void rgb2rgb_init_x86(void)
         shuffle_bytes_1230 = ff_shuffle_bytes_1230_avx2;
         shuffle_bytes_3012 = ff_shuffle_bytes_3012_avx2;
         shuffle_bytes_3210 = ff_shuffle_bytes_3210_avx2;
+        shuffle_bytes_3102 = ff_shuffle_bytes_3102_avx2;
+        shuffle_bytes_2013 = ff_shuffle_bytes_2013_avx2;
+        shuffle_bytes_2130 = ff_shuffle_bytes_2130_avx2;
+        shuffle_bytes_1203 = ff_shuffle_bytes_1203_avx2;
     }
     if (EXTERNAL_AVX2_FAST(cpu_flags)) {
         uyvytoyuv422 = ff_uyvytoyuv422_avx2;
diff --git a/libswscale/x86/rgb_2_rgb.asm b/libswscale/x86/rgb_2_rgb.asm
index 45a21dd0bf..b468beb12d 100644
--- a/libswscale/x86/rgb_2_rgb.asm
+++ b/libswscale/x86/rgb_2_rgb.asm
@@ -30,6 +30,10 @@ pb_shuffle0321: db 0, 3, 2, 1, 4, 7, 6, 5, 8, 11, 10, 9, 12, 15, 14, 13
 pb_shuffle1230: db 1, 2, 3, 0, 5, 6, 7, 4, 9, 10, 11, 8, 13, 14, 15, 12
 pb_shuffle3012: db 3, 0, 1, 2, 7, 4, 5, 6, 11, 8, 9, 10, 15, 12, 13, 14
 pb_shuffle3210: db 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
+pb_shuffle3102: db 3, 1, 0, 2, 7, 5, 4, 6, 11, 9, 8, 10, 15, 13, 12, 14
+pb_shuffle2013: db 2, 0, 1, 3, 6, 4, 5, 7, 10, 8, 9, 11, 14, 12, 13, 15
+pb_shuffle2130: db 2, 1, 3, 0, 6, 5, 7, 4, 10, 9, 11, 8, 14, 13, 15, 12
+pb_shuffle1203: db 1, 2, 0, 3, 5, 6, 4, 7, 9, 10, 8, 11, 13, 14, 12, 15
 
 SECTION .text
 
@@ -98,6 +102,10 @@ SHUFFLE_BYTES 0, 3, 2, 1
 SHUFFLE_BYTES 1, 2, 3, 0
 SHUFFLE_BYTES 3, 0, 1, 2
 SHUFFLE_BYTES 3, 2, 1, 0
+SHUFFLE_BYTES 3, 1, 0, 2
+SHUFFLE_BYTES 2, 0, 1, 3
+SHUFFLE_BYTES 2, 1, 3, 0
+SHUFFLE_BYTES 1, 2, 0, 3
 
 %if ARCH_X86_64
 %if HAVE_AVX2_EXTERNAL
@@ -107,6 +115,10 @@ SHUFFLE_BYTES 0, 3, 2, 1
 SHUFFLE_BYTES 1, 2, 3, 0
 SHUFFLE_BYTES 3, 0, 1, 2
 SHUFFLE_BYTES 3, 2, 1, 0
+SHUFFLE_BYTES 3, 1, 0, 2
+SHUFFLE_BYTES 2, 0, 1, 3
+SHUFFLE_BYTES 2, 1, 3, 0
+SHUFFLE_BYTES 1, 2, 0, 3
 %endif
 %endif
 



More information about the ffmpeg-cvslog mailing list