[FFmpeg-devel] [PATCH] avcodec: use looking up crop table method when do clip

Fri Aug 7 11:30:01 CEST 2015

based on last h264qpel optimization patch i have pushed
do i need separate this patch to double? cause one file to change loongson arch
use looking up crop table method may boost up decode on loongson, and with this patch x86 pass fate too
but i have not tested on other arch


test on loongson-3b
time ./ffmepg -i 1280x720.mp4 -f rawvideo -an -vframes 4096 -y /dev/null


no patch:


real 0m58.2s
user 4m59.1s
sys 0m5.8s


with patch:


real 0m53.9s
user 4m33.2s
sys 0m6.2s


---
>From 68e88b17d113875d829a9936284d3551fd499139 Mon Sep 17 00:00:00 2001
From: ZhouXiaoyong <zhouxiaoyong at loongson.cn>
Date: Fri, 7 Aug 2015 16:33:10 +0800
Subject: [PATCH] avcodec: use looking up crop table method when do clip


Signed-off-by: ZhouXiaoyong <zhouxiaoyong at loongson.cn>
---
 libavcodec/bit_depth_template.c | 4 ++--
 libavcodec/mips/h264qpel_mmi.c  | 4 ++++
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/libavcodec/bit_depth_template.c b/libavcodec/bit_depth_template.c
index 8018489..759cd30 100644
--- a/libavcodec/bit_depth_template.c
+++ b/libavcodec/bit_depth_template.c
@@ -72,7 +72,7 @@
 #   define pixel4 uint32_t
 #   define dctcoef int16_t
 
-#   define INIT_CLIP
+#   define INIT_CLIP const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP;
 #   define no_rnd_avg_pixel4 no_rnd_avg32
 #   define    rnd_avg_pixel4    rnd_avg32
 #   define AV_RN2P  AV_RN16
@@ -84,7 +84,7 @@
 #   define PIXEL_SPLAT_X4(x) ((x)*0x01010101U)
 
 #   define av_clip_pixel(a) av_clip_uint8(a)
-#   define CLIP(a) av_clip_uint8(a)
+#   define CLIP(a) cm[a]
 #endif
 
 #define FUNC3(a, b, c)  a ## _ ## b ## c
diff --git a/libavcodec/mips/h264qpel_mmi.c b/libavcodec/mips/h264qpel_mmi.c
index e04a2d5..ebb21c7 100644
--- a/libavcodec/mips/h264qpel_mmi.c
+++ b/libavcodec/mips/h264qpel_mmi.c
@@ -1308,6 +1308,7 @@ static void avg_h264_qpel16_v_lowpass_mmi(uint8_t *dst, const uint8_t *src,
 static void put_h264_qpel4_hv_lowpass_mmi(uint8_t *dst, const uint8_t *src,
         int dstStride, int srcStride)
 {
+    INIT_CLIP
     int i;
     int16_t _tmp[36];
     int16_t *tmp = _tmp;
@@ -1376,6 +1377,7 @@ static void put_h264_qpel4_hv_lowpass_mmi(uint8_t *dst, const uint8_t *src,
 static void put_h264_qpel8_hv_lowpass_mmi(uint8_t *dst, const uint8_t *src,
         int dstStride, int srcStride)
 {
+    INIT_CLIP
     int16_t _tmp[104];
     int16_t *tmp = _tmp;
     int i;
@@ -1479,6 +1481,7 @@ static void put_h264_qpel16_hv_lowpass_mmi(uint8_t *dst, const uint8_t *src,
 static void avg_h264_qpel4_hv_lowpass_mmi(uint8_t *dst, const uint8_t *src,
         int dstStride, int srcStride)
 {
+    INIT_CLIP
     int i;
     int16_t _tmp[36];
     int16_t *tmp = _tmp;
@@ -1549,6 +1552,7 @@ static void avg_h264_qpel4_hv_lowpass_mmi(uint8_t *dst, const uint8_t *src,
 static void avg_h264_qpel8_hv_lowpass_mmi(uint8_t *dst, const uint8_t *src,
         int dstStride, int srcStride)
 {
+    INIT_CLIP
     int16_t _tmp[104];
     int16_t *tmp = _tmp;
     int i;
-- 
2.1.0