[FFmpeg-devel] [PATCH] libavutil/libavfilter:add opencl warpper and opencl deshake filter

Wei Gao highgod0401 at gmail.com
Fri Mar 8 03:29:49 CET 2013


Hi,

Thank you for your comments, I have some questions as follow.

Thanks
Best regards

2013/3/8 Stefano Sabatini <stefasab at gmail.com>

> Sorry again for the slow reply.
>
> On date Friday 2013-02-22 15:31:09 +0800, Wei Gao encoded:
> >
> > From 4824a3cb8108afe95756f46cfe5a185d3a6308dd Mon Sep 17 00:00:00 2001
> > From: highgod0401 <highgod0401 at gmail.com>
> > Date: Fri, 22 Feb 2013 15:25:48 +0800
> > Subject: [PATCH] add opencl warpper and opencl deshake filter
> >
> > ---
> >  configure                        |   5 +
> >  libavfilter/Makefile             |   3 +
> >  libavfilter/all_filter_kernels.c |  18 +
> >  libavfilter/all_filter_kernels.h |   9 +
> >  libavfilter/allfilters.c         |   8 +-
> >  libavfilter/deshake_kernel.h     | 182 +++++++++
> >  libavfilter/transform_opencl.c   | 157 ++++++++
> >  libavfilter/transform_opencl.h   |  38 ++
> >  libavfilter/vf_deshake.c         | 213 ++++++++++-
>
> >  libavutil/Makefile               |   4 +
> >  libavutil/openclwrapper.c        | 808
> +++++++++++++++++++++++++++++++++++++++
> >  libavutil/openclwrapper.h        | 198 ++++++++++
>
> Look, since deshake/opencl is the only user for the moment, I suggest
> to keep this all in libavfilter as internal, so you avoid binary and
> API issues. It can be moved to libavutil in a second moment if there
> is the need, so we have some time to test the API.
>
 We will submit opendecode and openencode patch after the openclwrapper
patch accepted. And they are codecs in libavcodec,they will use
openclwrapper too.

>
> >  12 files changed, 1641 insertions(+), 2 deletions(-)
> >  create mode 100644 libavfilter/all_filter_kernels.c
> >  create mode 100644 libavfilter/all_filter_kernels.h
> >  create mode 100644 libavfilter/deshake_kernel.h
> >  create mode 100644 libavfilter/transform_opencl.c
> >  create mode 100644 libavfilter/transform_opencl.h
> >  create mode 100644 libavutil/openclwrapper.c
> >  create mode 100644 libavutil/openclwrapper.h
> >
> > diff --git a/configure b/configure
> > index b61359c..e3a7f3a 100755
> > --- a/configure
> > +++ b/configure
> > @@ -140,6 +140,7 @@ Component options:
> >    --disable-rdft           disable RDFT code
> >    --disable-fft            disable FFT code
> >    --enable-dxva2           enable DXVA2 code
> > +  --enable-opencl          enable OpenCL code
> >    --enable-vaapi           enable VAAPI code [autodetect]
> >    --enable-vda             enable VDA code   [autodetect]
> >    --enable-vdpau           enable VDPAU code [autodetect]
> > @@ -1196,6 +1197,7 @@ CONFIG_LIST="
> >      network
> >      nonfree
> >      openal
> > +    opencl
> >      openssl
> >      pic
> >      rdft
> > @@ -1990,6 +1992,7 @@ cropdetect_filter_deps="gpl"
> >  decimate_filter_deps="gpl avcodec"
> >  delogo_filter_deps="gpl"
> >  deshake_filter_deps="avcodec"
> > +deshake_opencl_filter_deps="opencl deshake_filter"
> >  drawtext_filter_deps="libfreetype"
> >  ebur128_filter_deps="gpl"
> >  flite_filter_deps="libflite"
> > @@ -3885,6 +3888,7 @@ enabled openal     && { { for al_libs in
> "${OPENAL_LIBS}" "-lopenal" "-lOpenAL32
> >                          die "ERROR: openal not found"; } &&
> >                        { check_cpp_condition "AL/al.h"
> "defined(AL_VERSION_1_1)" ||
> >                          die "ERROR: openal must be installed and
> version must be 1.1 or compatible"; }
> > +enabled opencl    && require2 opencl CL/cl.h clEnqueueNDRangeKernel
> -lOpenCL
> >  enabled openssl    && { check_lib openssl/ssl.h SSL_library_init -lssl
> -lcrypto ||
> >                          check_lib openssl/ssl.h SSL_library_init
> -lssl32 -leay32 ||
> >                          check_lib openssl/ssl.h SSL_library_init -lssl
> -lcrypto -lws2_32 -lgdi32 ||
> > @@ -4295,6 +4299,7 @@ echo "libx264 enabled           ${libx264-no}"
> >  echo "libxavs enabled           ${libxavs-no}"
> >  echo "libxvid enabled           ${libxvid-no}"
> >  echo "openal enabled            ${openal-no}"
> > +echo "opencl enabled            ${opencl-no}"
> >  echo "openssl enabled           ${openssl-no}"
> >  echo "zlib enabled              ${zlib-no}"
> >  echo "bzlib enabled             ${bzlib-no}"
> > diff --git a/libavfilter/Makefile b/libavfilter/Makefile
> > index 938b183..d56f380 100644
> > --- a/libavfilter/Makefile
> > +++ b/libavfilter/Makefile
> > @@ -9,6 +9,7 @@ FFLIBS-$(CONFIG_ASYNCTS_FILTER)              +=
> avresample
> >  FFLIBS-$(CONFIG_ATEMPO_FILTER)               += avcodec
> >  FFLIBS-$(CONFIG_DECIMATE_FILTER)             += avcodec
> >  FFLIBS-$(CONFIG_DESHAKE_FILTER)              += avcodec
> > +FFLIBS-$(CONFIG_DESHAKE_OPENCL_FILTER)       += avcodec
> >  FFLIBS-$(CONFIG_MOVIE_FILTER)                += avformat avcodec
> >  FFLIBS-$(CONFIG_MP_FILTER)                   += avcodec
> >  FFLIBS-$(CONFIG_PAN_FILTER)                  += swresample
> > @@ -49,6 +50,7 @@ OBJS = allfilters.o
>                   \
> >  OBJS-$(CONFIG_AVCODEC)                       += avcodec.o
> >  OBJS-$(CONFIG_AVFORMAT)                      += lavfutils.o
> >  OBJS-$(CONFIG_SWSCALE)                       += lswsutils.o
> > +OBJS-$(CONFIG_OPENCL)                        += all_filter_kernels.o
> >
> >  OBJS-$(CONFIG_ACONVERT_FILTER)               += af_aconvert.o
> >  OBJS-$(CONFIG_AFADE_FILTER)                  += af_afade.o
> > @@ -108,6 +110,7 @@ OBJS-$(CONFIG_CROPDETECT_FILTER)             +=
> vf_cropdetect.o
> >  OBJS-$(CONFIG_DECIMATE_FILTER)               += vf_decimate.o
> >  OBJS-$(CONFIG_DELOGO_FILTER)                 += vf_delogo.o
> >  OBJS-$(CONFIG_DESHAKE_FILTER)                += vf_deshake.o
> > +OBJS-$(CONFIG_DESHAKE_OPENCL_FILTER)         += vf_deshake.o
> transform_opencl.o all_filter_kernels.o
> >  OBJS-$(CONFIG_DRAWBOX_FILTER)                += vf_drawbox.o
> >  OBJS-$(CONFIG_DRAWTEXT_FILTER)               += vf_drawtext.o
> >  OBJS-$(CONFIG_EDGEDETECT_FILTER)             += vf_edgedetect.o
> > diff --git a/libavfilter/all_filter_kernels.c
> b/libavfilter/all_filter_kernels.c
> > new file mode 100644
>
> > index 0000000..56ee3c2
> > --- /dev/null
> > +++ b/libavfilter/all_filter_kernels.c
> > @@ -0,0 +1,18 @@
> > +#include "avfilter.h"
> > +#include "config.h"
> > +#include "all_filter_kernels.h"
> > +#include "libavutil/openclwrapper.h"
> > +#include "deshake_kernel.h"
> > +
> > +#define REGISTER_FILTER_KERNEL(X, x, y)
>        \
> > +    {
>        \
> > +        extern AVFilter avfilter_##y##_##x;
>        \
> > +        if (CONFIG_##X##_FILTER)
>         \
> > +
>  ff_opencl_regist_kernel((avfilter_##y##_##x).name,ff_kernel_##x);  \
> > +    }
> > +
> > +void ff_kernels_register_all(void)
> > +{
> > +    REGISTER_FILTER_KERNEL(DESHAKE_OPENCL,     deshake_opencl,
> vf);
> > +}
>
> Missing license. Also you may move this to allfilters.c, in order to
> avoid too many files around.
>
> > +
> > diff --git a/libavfilter/all_filter_kernels.h
> b/libavfilter/all_filter_kernels.h
> > new file mode 100644
> > index 0000000..4d59e5a
> > --- /dev/null
> > +++ b/libavfilter/all_filter_kernels.h
> > @@ -0,0 +1,9 @@
> > +#ifndef AVFILTER_ALL_AVFILTER_KERNELS_H
> > +#define AVFILTER_ALL_AVFILTER_KERNELS_H
> > +
> > +void ff_kernels_register_all(void);
> > +
> > +#endif
>
> Same for this.
>
> > +
> > +
> > +
> > diff --git a/libavfilter/allfilters.c b/libavfilter/allfilters.c
> > index 47158f9..e92cf8b 100644
> > --- a/libavfilter/allfilters.c
> > +++ b/libavfilter/allfilters.c
> > @@ -21,7 +21,9 @@
> >
> >  #include "avfilter.h"
> >  #include "config.h"
> > -
> > +#if CONFIG_OPENCL
> > +#include "all_filter_kernels.h"
> > +#endif
>
> avoid indirections, just insert the code here.
>
The function " ff_opencl_regist_kernel" is defined in openclwrapper, and
only can be compiled if config CONFIG_OPENCL,so should I code here and
still use indirections?

>
> >
> >  #define REGISTER_FILTER(X, x, y)
>  \
> >      {
> \
> > @@ -102,6 +104,7 @@ void avfilter_register_all(void)
> >      REGISTER_FILTER(DECIMATE,       decimate,       vf);
> >      REGISTER_FILTER(DELOGO,         delogo,         vf);
> >      REGISTER_FILTER(DESHAKE,        deshake,        vf);
> > +    REGISTER_FILTER(DESHAKE_OPENCL, deshake_opencl, vf);
> >      REGISTER_FILTER(DRAWBOX,        drawbox,        vf);
> >      REGISTER_FILTER(DRAWTEXT,       drawtext,       vf);
> >      REGISTER_FILTER(EDGEDETECT,     edgedetect,     vf);
> > @@ -192,4 +195,7 @@ void avfilter_register_all(void)
> >      REGISTER_FILTER_UNCONDITIONAL(vsink_buffer);
> >      REGISTER_FILTER_UNCONDITIONAL(af_afifo);
> >      REGISTER_FILTER_UNCONDITIONAL(vf_fifo);
> > +#if CONFIG_OPENCL
> > +    ff_kernels_register_all();
> > +#endif
> >  }
> > diff --git a/libavfilter/deshake_kernel.h b/libavfilter/deshake_kernel.h
> > new file mode 100644
> > index 0000000..958fcbe
> > --- /dev/null
> > +++ b/libavfilter/deshake_kernel.h
> > @@ -0,0 +1,182 @@
> > +#include "libavutil/openclwrapper.h"
> > +
> > +const char *ff_kernel_deshake_opencl = FF_OPENCL_KERNEL(
> > +
>
> > +inline unsigned char pixel(global const unsigned char *src,float x,
> float y,int w, int h,int stride, unsigned char def)
> > +{
> > +    return (x < 0 || y < 0) ? def : ((x >= w || y >= h) ? def
> :src[(int)x + (int)y * stride]);
>
> you can merge the two checks:
> x < 0 || y < 0 || x >= w || y >= h ? def : ...
>
> > +}
> > +unsigned char interpolate_nearest(float x, float y, global const
> unsigned char *src,
> > +                        int width, int height, int stride, unsigned
> char def)
> > +{
> > +    return pixel(src, (int)(x + 0.5), (int)(y + 0.5), width, height,
> stride, def);
> > +}
> > +
> > +unsigned char interpolate_bilinear(float x, float y, global const
> unsigned char *src,
> > +                        int width, int height, int stride, unsigned
> char def)
> > +{
> > +    int x_c, x_f, y_c, y_f;
> > +    int v1, v2, v3, v4;
> > +
> > +    if (x < -1 || x > width || y < -1 || y > height) {
> > +        return def;
> > +    } else {
> > +        x_f = (int)x;
> > +        x_c = x_f + 1;
> > +
> > +        y_f = (int)y;
> > +        y_c = y_f + 1;
> > +
> > +        v1 = pixel(src, x_c, y_c, width, height, stride, def);
> > +        v2 = pixel(src, x_c, y_f, width, height, stride, def);
> > +        v3 = pixel(src, x_f, y_c, width, height, stride, def);
> > +        v4 = pixel(src, x_f, y_f, width, height, stride, def);
> > +
> > +        return (v1*(x - x_f)*(y - y_f) + v2*((x - x_f)*(y_c - y)) +
> > +                v3*(x_c - x)*(y - y_f) + v4*((x_c - x)*(y_c - y)));
> > +    }
> > +}
> > +
> > +unsigned char interpolate_biquadratic(float x, float y, global const
> unsigned char *src,
> > +                        int width, int height, int stride, unsigned
> char def)
> > +{
> > +    int     x_c, x_f, y_c, y_f;
> > +    unsigned char v1,  v2,  v3,  v4;
> > +    float   f1,  f2,  f3,  f4;
> > +
> > +    if (x < - 1 || x > width || y < -1 || y > height)
> > +        return def;
> > +    else {
> > +        x_f = (int)x;
> > +        x_c = x_f + 1;
> > +        y_f = (int)y;
> > +        y_c = y_f + 1;
> > +
> > +        v1 = pixel(src, x_c, y_c, width, height, stride, def);
> > +        v2 = pixel(src, x_c, y_f, width, height, stride, def);
> > +        v3 = pixel(src, x_f, y_c, width, height, stride, def);
> > +        v4 = pixel(src, x_f, y_f, width, height, stride, def);
> > +
> > +        f1 = 1 - sqrt((x_c - x) * (y_c - y));
> > +        f2 = 1 - sqrt((x_c - x) * (y - y_f));
> > +        f3 = 1 - sqrt((x - x_f) * (y_c - y));
> > +        f4 = 1 - sqrt((x - x_f) * (y - y_f));
> > +        return (v1 * f1 + v2 * f2 + v3 * f3 + v4 * f4) / (f1 + f2 + f3
> + f4);
> > +    }
> > +}
>
> > +
> > +
>
> Nit here and below: just one empty line after each function
>
> > +inline const float clipf(float a, float amin, float amax)
> > +{
>
> > +    if      (a < amin) return amin;
> > +    else if (a > amax) return amax;
> > +    else               return a;
>
> Nit: return a < amin ? amin : a > amax ? amax : a;
>
> > +}
> > +
> > +
> > +kernel void avfilter_transform(global  unsigned char *src,
> > +                               global  unsigned char *dst,
> > +                               global          float *matrix,
> > +                               global          float *matrix2,
>
> > +                                                 int interpolate,
> > +                                                 int fillmethod,
>
> you may use some enum here, they greatly help debugging and
> readability
>
Sorry, I don't get what you mean, it is a OpenCL kernel function,and each
parameter is set from c code using AV_OPENCLCHECK_SET_KERNEL_ARG, and they
should be set one by one.So I think it may be easy to write like this.

>
> > +                                                 int src_stride_lu,
> > +                                                 int dst_stride_lu,
> > +                                                 int src_stride_ch,
> > +                                                 int dst_stride_ch,
> > +                                                 int height,
> > +                                                 int width,
> > +                                                 int ch,
> > +                                                 int cw)
> > +{
> > +     int global_id = get_global_id(0);
> > +
> > +     global unsigned char *dstY = dst;
> > +     global unsigned char *dstU = dstY + height * dst_stride_lu;
> > +     global unsigned char *dstV = dstU + ch * dst_stride_ch;
> > +
> > +     global unsigned char *srcY = src;
> > +     global unsigned char *srcU = srcY + height * src_stride_lu;
> > +     global unsigned char *srcV = srcU + ch * src_stride_ch;
>
> Style: please avoid camelStyle, dst_y, dst_u, etc. are favored.
>
> > +
> > +     global unsigned char *tempdst;
> > +     global unsigned char *tempsrc;
> > +
> > +     int x;
> > +     int y;
> > +     float x_s;
> > +     float y_s;
> > +     int tempsrc_stride;
> > +     int tempdst_stride;
> > +     int temp_height;
> > +     int temp_width;
> > +     int curpos;
> > +     unsigned char def;
> > +     if (global_id < width*height) {
> > +        y = global_id/width;
> > +        x = global_id%width;
> > +        x_s = x * matrix[0] + y * matrix[1] + matrix[2];
> > +        y_s = x * matrix[3] + y * matrix[4] + matrix[5];
> > +        tempdst = dstY;
> > +        tempsrc = srcY;
> > +        tempsrc_stride = src_stride_lu;
> > +        tempdst_stride = dst_stride_lu;
> > +        temp_height = height;
> > +        temp_width = width;
> > +     }
> > +     else if ((global_id >= width*height)&&(global_id < width*height +
> ch*cw)) {
> > +        y = (global_id - width*height)/cw;
> > +        x = (global_id - width*height)%cw;
> > +        x_s = x * matrix2[0] + y * matrix2[1] + matrix2[2];
> > +        y_s = x * matrix2[3] + y * matrix2[4] + matrix2[5];
> > +        tempdst = dstU;
> > +        tempsrc = srcU;
> > +        tempsrc_stride = src_stride_ch;
> > +        tempdst_stride = dst_stride_ch;
> > +        temp_height = height;
> > +        temp_width = width;
> > +        temp_height = ch;
> > +        temp_width = cw;
> > +     }
> > +     else {
> > +        y = (global_id - width*height - ch*cw)/cw;
> > +        x = (global_id - width*height - ch*cw)%cw;
> > +        x_s = x * matrix2[0] + y * matrix2[1] + matrix2[2];
> > +        y_s = x * matrix2[3] + y * matrix2[4] + matrix2[5];
> > +        tempdst = dstV;
> > +        tempsrc = srcV;
> > +        tempsrc_stride = src_stride_ch;
> > +        tempdst_stride = dst_stride_ch;
> > +        temp_height = ch;
> > +        temp_width = cw;
> > +     }
> > +     curpos = y * tempdst_stride + x;
> > +     switch (fillmethod) {
> > +        case 1:
> > +            def = tempsrc[y*tempsrc_stride+x];
> > +            break;
> > +        case 2:
> > +            y_s = clipf(y_s, 0, temp_height - 1);
> > +            x_s = clipf(x_s, 0, temp_width - 1);
> > +            def = tempsrc[(int)y_s * tempsrc_stride + (int)x_s];
> > +            break;
> > +        case 3:
> > +            y_s = (y_s < 0) ? -y_s : (y_s >= temp_height) ?
> (temp_height + temp_height - y_s) : y_s;
> > +            x_s = (x_s < 0) ? -x_s : (x_s >= temp_width) ? (temp_width
> + temp_width - x_s) : x_s;
> > +            def = tempsrc[(int)y_s * tempsrc_stride + (int)x_s];
> > +            break;
> > +         }
> > +    switch (interpolate) {
> > +        case 0:
> > +            tempdst[curpos] = interpolate_nearest(x_s, y_s, tempsrc,
> temp_width, temp_height, tempsrc_stride, def);
> > +            break;
> > +        case 1:
> > +            tempdst[curpos] = interpolate_bilinear(x_s, y_s, tempsrc,
> temp_width, temp_height, tempsrc_stride, def);
> > +            break;
> > +        case 2:
> > +            tempdst[curpos] = interpolate_biquadratic(x_s, y_s,
> tempsrc, temp_width, temp_height, tempsrc_stride, def);
> > +            break;
> > +        }
> > +}
> > +
> > +);
> > diff --git a/libavfilter/transform_opencl.c
> b/libavfilter/transform_opencl.c
> > new file mode 100644
> > index 0000000..4acec2b
> > --- /dev/null
> > +++ b/libavfilter/transform_opencl.c
> > @@ -0,0 +1,157 @@
> > +/*
> > + * Copyright (C) 2013 Wei Gao <weigao at multicorewareinc.com>
> > + *
> > + *
>
> Nit: an empty line is enough
>
> > + * This file is part of FFmpeg.
> > + *
> > + * FFmpeg is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU Lesser General Public
> > + * License as published by the Free Software Foundation; either
> > + * version 2.1 of the License, or (at your option) any later version.
> > + *
> > + * FFmpeg is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > + * Lesser General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU Lesser General Public
> > + * License along with FFmpeg; if not, write to the Free Software
> > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> 02110-1301 USA
> > + */
> > +
> > +/**
> > + * @file
> > + * transform input video
> > + */
> > +
> > +#include "libavutil/common.h"
> > +#include "libavutil/avassert.h"
> > +#include "libavutil/avstring.h"
> > +#include "libavutil/openclwrapper.h"
> > +#include "transform.h"
> > +#include "transform_opencl.h"
> > +
> > +
> > +
> > +static int ff_filter_transform_func(void **userdata, AV_KernelEnv *kenv)
> > +{
> > +    cl_mem src = (cl_mem)userdata[0];
> > +    cl_mem dst = (cl_mem)userdata[1];
> > +    int src_stride_lu = (int)userdata[2];
> > +    int dst_stride_lu = (int)userdata[3];
> > +    int src_stride_ch = (int)userdata[4];
> > +    int dst_stride_ch = (int)userdata[5];
> > +    int width      = (int)userdata[6];
> > +    int height     = (int)userdata[7];
> > +    int cw          = (int)userdata[8];
> > +    int ch     = (int)userdata[9];
> > +    float *matrix  = (float *)userdata[10];
> > +    float *matrix2  = (float *)userdata[11];
> > +    int interpolate = (int)userdata[12];
> > +    int fillmethod  = (int)userdata[13];
> > +    cl_mem matrix_buf = (cl_mem)userdata[14];
> > +    cl_mem matrix_buf2  = (cl_mem)userdata[15];
> > +    AV_KernelEnv *env  = (AV_KernelEnv *)userdata[16];
> > +    cl_uint status;
> > +    void *mapped;
> > +    const size_t global_work_size = width * height + 2 * ch * cw;
> > +    int m_size = 6;
> > +    cl_kernel kernel;
> > +    int arg_no;
> > +
> > +
> > +    mapped = clEnqueueMapBuffer(kenv->command_queue, matrix_buf,
> CL_TRUE, CL_MAP_WRITE, 0, m_size*sizeof(cl_float),0,NULL, NULL, NULL);
> > +    memcpy(mapped,matrix,m_size*sizeof(cl_float));
> > +    clEnqueueUnmapMemObject(kenv->command_queue, matrix_buf, mapped, 0,
> NULL, NULL);
> > +
> > +    mapped = clEnqueueMapBuffer(kenv->command_queue, matrix_buf2,
> CL_TRUE, CL_MAP_WRITE, 0, m_size*sizeof(cl_float),0,NULL, NULL, NULL);
> > +    memcpy(mapped,matrix2,m_size*sizeof(cl_float));
> > +    clEnqueueUnmapMemObject(kenv->command_queue, matrix_buf2, mapped,
> 0, NULL, NULL);
> > +
> > +    if (!env->kernel) {
> > +        status =  av_opencl_create_kernel("avfilter_transform", kenv);
> > +        if (status) {
> > +            av_log(NULL,AV_LOG_ERROR,"clCreateKernel Error
> %s\n","avfilter_transform");
> > +            return 0;
> > +        }
> > +        env->command_queue = kenv->command_queue;
> > +        env->context = kenv->context;
> > +        env->kernel = kenv->kernel;
> > +        av_strlcpy(env->kernel_name,kenv->kernel_name,150);
> > +        env->program = kenv->program;
> > +    }
> > +    kernel = env->kernel;
> > +    arg_no = 0;
>
> > +    AV_OPENCLCHECK_SET_KERNEL_ARG(sizeof(cl_mem),&src);
> > +    AV_OPENCLCHECK_SET_KERNEL_ARG(sizeof(cl_mem),&dst);
> > +    AV_OPENCLCHECK_SET_KERNEL_ARG(sizeof(cl_mem),&matrix_buf);
> > +    AV_OPENCLCHECK_SET_KERNEL_ARG(sizeof(cl_mem),&matrix_buf2);
> > +    AV_OPENCLCHECK_SET_KERNEL_ARG(sizeof(cl_int),&interpolate);
> > +    AV_OPENCLCHECK_SET_KERNEL_ARG(sizeof(cl_int),&fillmethod);
> > +    AV_OPENCLCHECK_SET_KERNEL_ARG(sizeof(cl_int),&src_stride_lu);
> > +    AV_OPENCLCHECK_SET_KERNEL_ARG(sizeof(cl_int),&dst_stride_lu);
> > +    AV_OPENCLCHECK_SET_KERNEL_ARG(sizeof(cl_int),&src_stride_ch);
> > +    AV_OPENCLCHECK_SET_KERNEL_ARG(sizeof(cl_int),&dst_stride_ch);
> > +    AV_OPENCLCHECK_SET_KERNEL_ARG(sizeof(cl_int),&height);
> > +    AV_OPENCLCHECK_SET_KERNEL_ARG(sizeof(cl_int),&width);
> > +    AV_OPENCLCHECK_SET_KERNEL_ARG(sizeof(cl_int),&ch);
> > +    AV_OPENCLCHECK_SET_KERNEL_ARG(sizeof(cl_int),&cw);
> > +
> > +    AV_OPENCLCHECK( clEnqueueNDRangeKernel, env->command_queue,
> env->kernel, 1, NULL,
> > +              &global_work_size, NULL, 0, NULL, NULL);
>
> since this is the only user, move the macro definition locally to this
> file (rule: avoid indirections unless strictly necessary).
>

 This is because we only submit deshake filter recently,other OpenCL
filters we will submit later will use the macro
AV_OPENCLCHECK_SET_KERNEL_ARG and AV_OPENCLCHECK to easy the code write.So
we add it in opencl wraper.If not, every OpenCL filter will add the two
macro.


> > +    clFinish(kenv->command_queue);//add for time test
> > +    return 1;
> > +}
> > +
> > +
> > +void avfilter_transform_cl( void *src,  void *dst,
> > +                        int src_stride_lu, int dst_stride_lu,
> > +                        int src_stride_ch, int dst_stride_ch,
> > +                        int width, int height, int cw, int ch,
> > +                        const float *matrix, const float *matrix2,
> > +                        const void *matrix_cl, const void *matrix2_cl,
> > +                        enum InterpolateMethod interpolate,
> > +                        enum FillMethod fill,AV_KernelEnv *env)
> > +
> > +{
> > +        int interpolate_t = interpolate;
> > +        int fillmethod    = fill;
> > +        void *userdata[17];
> > +
> > +        userdata[0] = (void *)src;
> > +        userdata[1] = (void *)dst;
> > +        userdata[2] = (void *)src_stride_lu;
> > +        userdata[3] = (void *)dst_stride_lu;
> > +        userdata[4] = (void *)src_stride_ch;
> > +        userdata[5] = (void *)dst_stride_ch;
> > +        userdata[6] = (void *)width;
> > +        userdata[7] = (void *)height;
> > +        userdata[8] = (void *)cw;
> > +        userdata[9] = (void *)ch;
> > +        userdata[10] = (void *)matrix;
> > +        userdata[11] = (void *)matrix2;
> > +        userdata[12] = (void *)interpolate_t;
> > +        userdata[13] = (void *)fillmethod;
> > +        userdata[14] = (void *)matrix_cl;
> > +        userdata[15] = (void *)matrix2_cl;
> > +        userdata[16] = (void *)env;
> > +
> > +
> > +
> > +        if(!av_opencl_run_kernel("deshake_opencl", userdata)) {
> > +            av_log( NULL,AV_LOG_ERROR,"run kernel[%s] faild\n",
> "deshake_opencl" );
> > +            return;
> > +        }
> > +}
> > +
> > +int ff_init_transform(void)
> > +{
> > +    int st = av_opencl_register_kernel_wrapper( "deshake_opencl",
> ff_filter_transform_func);
> > +    if (!st) {
>
> > +        av_log(NULL,AV_LOG_ERROR, "register kernel[%s] faild\n",
> "avfilter_transform" );
>
>
> > +        return -1;
>
> please return meaningful error code here
>
> > +    }
> > +    return 0;
> > +}
> > +
> > +
> > diff --git a/libavfilter/transform_opencl.h
> b/libavfilter/transform_opencl.h
> > new file mode 100644
> > index 0000000..aa1b4e0
> > --- /dev/null
> > +++ b/libavfilter/transform_opencl.h
> > @@ -0,0 +1,38 @@
> > +/*
> > + * Copyright (C) 2013 Wei Gao <weigao at multicorewareinc.com>
> > + *
> > + * This file is part of FFmpeg.
> > + *
> > + * FFmpeg is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU Lesser General Public
> > + * License as published by the Free Software Foundation; either
> > + * version 2.1 of the License, or (at your option) any later version.
> > + *
> > + * FFmpeg is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > + * Lesser General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU Lesser General Public
> > + * License along with FFmpeg; if not, write to the Free Software
> > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> 02110-1301 USA
> > + */
> > +
> > +#ifndef AVFILTER_TRANSFORM_OPENCL_H
> > +#define AVFILTER_TRANSFORM_OPENCL_H
> > +
> > +#include <stdint.h>
> > +
> > +void avfilter_transform_cl( void *src,  void *dst,
> > +                        int src_stride_lu, int dst_stride_lu,
> > +                        int src_stride_ch, int dst_stride_ch,
> > +                        int width, int height, int cw, int ch,
> > +                        const float *matrix, const float *matrix2,
> > +                        const void *matrix_cl, const void *matrix2_cl,
>
> > +                        enum InterpolateMethod interpolate,
> > +                        enum FillMethod fill, AV_KernelEnv *env);
>
> where are these enums defined?
>
They are defined in "transform.h", I move the header file here.

>
> > +int ff_init_transform(void);
>
> Is this file supposed to be public? If not you should avoid to use
> avfilter_ prefix (reserved for public functions).
>
> I suggest something along the lines of:
>
> ff_opencl_transform()
> ff_opencl_transform_init()
>
This file,I referenced "transform.h",this file will be used in
deshake_opencl filter.

>
> > +
> > +
> > +
> > +#endif /* AVFILTER_TRANSFORM_H */
> > diff --git a/libavfilter/vf_deshake.c b/libavfilter/vf_deshake.c
> > index c03919c..3e25e73 100644
> > --- a/libavfilter/vf_deshake.c
> > +++ b/libavfilter/vf_deshake.c
> > @@ -1,6 +1,7 @@
> >  /*
> >   * Copyright (C) 2010 Georg Martius <georg.martius at web.de>
> >   * Copyright (C) 2010 Daniel G. Taylor <dan at programmer-art.org>
>
> > + * Modified by Wei Gao <weigao at multicorewareinc.com>
>
> Just add your Copyright if you think so.
>
> >   *
> >   * This file is part of FFmpeg.
> >   *
> > @@ -59,6 +60,10 @@
> >  #include "libavcodec/dsputil.h"
> >
> >  #include "transform.h"
> > +#if CONFIG_DESHAKE_OPENCL_FILTER
> > +#include "libavutil/openclwrapper.h"
> > +#include "transform_opencl.h"
> > +#endif
> >
> >  #define CHROMA_WIDTH(link)  -((-link->w) >>
> av_pix_fmt_desc_get(link->format)->log2_chroma_w)
> >  #define CHROMA_HEIGHT(link) -((-link->h) >>
> av_pix_fmt_desc_get(link->format)->log2_chroma_h)
> > @@ -84,7 +89,17 @@ typedef struct {
> >      double angle;         ///< Angle of rotation
> >      double zoom;          ///< Zoom percentage
> >  } Transform;
> > -
>
> > +#if CONFIG_DESHAKE_OPENCL_FILTER
> > +typedef struct {
> > +    int pre_filter_type;
> > +    int next_filter_type;
> > +    void *cl_inbuf;
> > +    void *cl_outbuf;
> > +    void *matrix_buf;
> > +    void *matrix_buf2;
> > +    AV_KernelEnv kernelev;
> > +}Deshake_opencl_ev;
>
> DeshakeOpenclEv for consistency (CamelStyle is OK for struct names).
>
> > +#endif
> >  typedef struct {
> >      AVClass av_class;
> >      AVFilterBufferRef *ref;    ///< Previous frame
> > @@ -104,6 +119,9 @@ typedef struct {
> >      int ch;
> >      int cx;
> >      int cy;
> > +#if CONFIG_DESHAKE_OPENCL_FILTER
> > +    Deshake_opencl_ev opencl_ev;
> > +#endif
> >  } DeshakeContext;
> >
> >  static int cmp(const double *a, const double *b)
> > @@ -536,6 +554,163 @@ static int filter_frame(AVFilterLink *link,
> AVFilterBufferRef *in)
> >
> >      return ff_filter_frame(outlink, out);
> >  }
> > +#if CONFIG_DESHAKE_OPENCL_FILTER
> > +static av_cold int init_opencl(AVFilterContext *ctx, const char *args)
> > +{
> > +    DeshakeContext *deshake = ctx->priv;
>
> > +    init(ctx,args);
>
> need to check for init return value
>
> > +    if (av_opencl_init_run_env(0,NULL,"-I.",NULL)) {
> > +        av_log(ctx,AV_LOG_ERROR,"Init OpenCL Failed\n");
> > +    }
> > +    memset(&(deshake->opencl_ev),0,sizeof(Deshake_opencl_ev));
> > +    deshake->opencl_ev.cl_inbuf= NULL;
> > +    deshake->opencl_ev.cl_outbuf = NULL;
> > +    av_opencl_create_buffer(&(deshake->opencl_ev.matrix_buf),
> CL_MEM_READ_ONLY,6*sizeof(cl_float),NULL);
> > +
>  av_opencl_create_buffer(&(deshake->opencl_ev.matrix_buf2),CL_MEM_READ_ONLY,6*sizeof(cl_float),NULL);
>
> > +    if (ff_init_transform())
> > +        return -1;
> > +    return 0;
>
> return ff_init_transform()
>
> > +}
> > +
> > +static av_cold void uninit_opencl(AVFilterContext *ctx)
> > +{
> > +    DeshakeContext *deshake = ctx->priv;
> > +    if (deshake->opencl_ev.cl_inbuf) {
> > +        av_opencl_release_buffer(deshake->opencl_ev.cl_inbuf);
> > +    }
> > +    if (deshake->opencl_ev.cl_outbuf) {
> > +        av_opencl_release_buffer(deshake->opencl_ev.cl_outbuf);
> > +    }
> > +    if (deshake->opencl_ev.matrix_buf) {
> > +        av_opencl_release_buffer(deshake->opencl_ev.matrix_buf);
> > +    }
> > +    if (deshake->opencl_ev.matrix_buf2) {
> > +        av_opencl_release_buffer(deshake->opencl_ev.matrix_buf2);
> > +    }
> > +    av_opencl_release_opencl_run_env();
> > +    uninit(ctx);
> > +}
> > +
> > +static int filter_frame_opencl(AVFilterLink *link, AVFilterBufferRef
> *in)
> > +{
> > +    DeshakeContext *deshake = link->dst->priv;
> > +    AVFilterLink *outlink = link->dst->outputs[0];
> > +    AVFilterBufferRef *out;
> > +    Transform t = {{0},0}, orig = {{0},0};
> > +    float alpha = 2.0 / deshake->refcount;
> > +    char tmp[256];
> > +    float matrixY[9];
> > +    float matrixUV[9];
> > +
> > +    out = ff_get_video_buffer(outlink, AV_PERM_WRITE, outlink->w,
> outlink->h);
> > +    if (!out) {
> > +        avfilter_unref_bufferp(&in);
> > +        return AVERROR(ENOMEM);
> > +    }
> > +    avfilter_copy_buffer_ref_props(out, in);
> > +
> > +    if (!deshake->opencl_ev.cl_inbuf) {
> > +
>  av_opencl_create_buffer(&(deshake->opencl_ev.cl_inbuf),CL_MEM_READ_ONLY,(in->linesize[0]
> * in->video->h) + (in->linesize[1] * (in->video->h>>1)) + (in->linesize[2]
> * (in->video->h>>1)),NULL);
> > +    }
> > +    if (!deshake->opencl_ev.cl_outbuf) {
> > +
>  av_opencl_create_buffer(&(deshake->opencl_ev.cl_outbuf),CL_MEM_READ_WRITE,(out->linesize[0]
> * out->video->h) + (out->linesize[1] * (out->video->h>>1)) +
> (out->linesize[2] * (out->video->h>>1)),NULL);
> > +    }
>
> please split these overly long lines, same below.
>
> > +
> > +
>  av_opencl_write_cl_buffer(deshake->opencl_ev.cl_inbuf,in->data[0],in->data[1],in->data[2],in->linesize[0],in->linesize[1],in->linesize[2],link->h,0);
>
> nit here and below: deshake->opencl_ev.cl_inbuf, in->data[0], ...
> that is, space after comma.
>
> > +    if (deshake->cx < 0 || deshake->cy < 0 || deshake->cw < 0 ||
> deshake->ch < 0) {
> > +        // Find the most likely global motion for the current frame
> > +        find_motion(deshake, (deshake->ref == NULL) ? in->data[0] :
> deshake->ref->data[0], in->data[0], link->w, link->h, in->linesize[0], &t);
> > +    } else {
> > +        uint8_t *src1 = (deshake->ref == NULL) ? in->data[0] :
> deshake->ref->data[0];
> > +        uint8_t *src2 = in->data[0];
> > +
> > +        deshake->cx = FFMIN(deshake->cx, link->w);
> > +        deshake->cy = FFMIN(deshake->cy, link->h);
> > +
> > +        if ((unsigned)deshake->cx + (unsigned)deshake->cw > link->w)
> deshake->cw = link->w - deshake->cx;
> > +        if ((unsigned)deshake->cy + (unsigned)deshake->ch > link->h)
> deshake->ch = link->h - deshake->cy;
> > +
> > +        // Quadword align right margin
> > +        deshake->cw &= ~15;
> > +
> > +        src1 += deshake->cy * in->linesize[0] + deshake->cx;
> > +        src2 += deshake->cy * in->linesize[0] + deshake->cx;
> > +
> > +        find_motion(deshake, src1, src2, deshake->cw, deshake->ch,
> in->linesize[0], &t);
> > +    }
> > +
> > +
> > +    // Copy transform so we can output it later to compare to the
> smoothed value
> > +    orig.vector.x = t.vector.x;
> > +    orig.vector.y = t.vector.y;
> > +    orig.angle = t.angle;
> > +    orig.zoom = t.zoom;
> > +
> > +    // Generate a one-sided moving exponential average
> > +    deshake->avg.vector.x = alpha * t.vector.x + (1.0 - alpha) *
> deshake->avg.vector.x;
> > +    deshake->avg.vector.y = alpha * t.vector.y + (1.0 - alpha) *
> deshake->avg.vector.y;
> > +    deshake->avg.angle = alpha * t.angle + (1.0 - alpha) *
> deshake->avg.angle;
> > +    deshake->avg.zoom = alpha * t.zoom + (1.0 - alpha) *
> deshake->avg.zoom;
> > +
> > +    // Remove the average from the current motion to detect the motion
> that
> > +    // is not on purpose, just as jitter from bumping the camera
> > +    t.vector.x -= deshake->avg.vector.x;
> > +    t.vector.y -= deshake->avg.vector.y;
> > +    t.angle -= deshake->avg.angle;
> > +    t.zoom -= deshake->avg.zoom;
> > +
> > +    // Invert the motion to undo it
> > +    t.vector.x *= -1;
> > +    t.vector.y *= -1;
> > +    t.angle *= -1;
> > +
> > +    // Write statistics to file
> > +    if (deshake->fp) {
> > +        snprintf(tmp, 256, "%f, %f, %f, %f, %f, %f, %f, %f, %f, %f, %f,
> %f\n", orig.vector.x, deshake->avg.vector.x, t.vector.x, orig.vector.y,
> deshake->avg.vector.y, t.vector.y, orig.angle, deshake->avg.angle, t.angle,
> orig.zoom, deshake->avg.zoom, t.zoom);
> > +        fwrite(tmp, sizeof(char), strlen(tmp), deshake->fp);
> > +    }
> > +
> > +    // Turn relative current frame motion into absolute by adding it to
> the
> > +    // last absolute motion
> > +    t.vector.x += deshake->last.vector.x;
> > +    t.vector.y += deshake->last.vector.y;
> > +    t.angle += deshake->last.angle;
> > +    t.zoom += deshake->last.zoom;
> > +
> > +    // Shrink motion by 10% to keep things centered in the camera frame
> > +    t.vector.x *= 0.9;
> > +    t.vector.y *= 0.9;
> > +    t.angle *= 0.9;
> > +
> > +    // Store the last absolute motion information
> > +    deshake->last.vector.x = t.vector.x;
> > +    deshake->last.vector.y = t.vector.y;
> > +    deshake->last.angle = t.angle;
> > +    deshake->last.zoom = t.zoom;
> > +
> > +    avfilter_get_matrix(t.vector.x, t.vector.y, t.angle, 1.0 + t.zoom /
> 100.0, matrixY);
> > +    avfilter_get_matrix(t.vector.x / (link->w / CHROMA_WIDTH(link)),
> t.vector.y / (link->h / CHROMA_HEIGHT(link)), t.angle, 1.0 + t.zoom /
> 100.0, matrixUV);
> > +
>  avfilter_transform_cl(deshake->opencl_ev.cl_inbuf,deshake->opencl_ev.cl_outbuf,
> > +                    in->linesize[0], out->linesize[0],
> > +                    in->linesize[1], out->linesize[1],
> > +                    link->w, link->h, CHROMA_WIDTH(link),
> CHROMA_HEIGHT(link),
> > +                    matrixY, matrixUV,
> > +                    deshake->opencl_ev.matrix_buf,
> deshake->opencl_ev.matrix_buf2,
> > +                    INTERPOLATE_BILINEAR,
> deshake->edge,&(deshake->opencl_ev.kernelev));
> > +    if (!deshake->opencl_ev.next_filter_type)
> > +
>  av_opencl_read_to_frame_buffer(deshake->opencl_ev.cl_outbuf,out->data[0],out->data[1],out->data[2],out->linesize[0],out->linesize[1],out->linesize[2],link->h);
> > +
> > +    // Cleanup the old reference frame
> > +    avfilter_unref_buffer(deshake->ref);
> > +
> > +    // Store the current frame as the reference frame for calculating
> the
> > +    // motion of the next frame
> > +    deshake->ref = in;
> > +
> > +    return ff_filter_frame(outlink, out);
> > +}
> > +#endif
> > +
> >
> >  static const AVFilterPad deshake_inputs[] = {
> >      {
> > @@ -566,3 +741,39 @@ AVFilter avfilter_vf_deshake = {
> >      .inputs        = deshake_inputs,
> >      .outputs       = deshake_outputs,
> >  };
> > +
> > +
> > +
> > +
> > +#if CONFIG_DESHAKE_OPENCL_FILTER
> > +
> > +static const AVFilterPad deshake_opencl_inputs[] = {
> > +    {
> > +        .name         = "default",
> > +        .type         = AVMEDIA_TYPE_VIDEO,
> > +        .filter_frame = filter_frame_opencl,
> > +        .config_props = config_props,
> > +        .min_perms    = AV_PERM_READ | AV_PERM_PRESERVE,
> > +    },
> > +    { NULL }
> > +};
> > +
> > +static const AVFilterPad deshake_opencl_outputs[] = {
> > +    {
> > +        .name = "default",
> > +        .type = AVMEDIA_TYPE_VIDEO,
> > +    },
> > +    { NULL }
> > +};
> > +
> > +AVFilter avfilter_vf_deshake_opencl = {
> > +    .name          = "deshake_opencl",
> > +    .description   = NULL_IF_CONFIG_SMALL("Stabilize shaky video using
> OpenCL."),
> > +    .priv_size     = sizeof(DeshakeContext),
> > +    .init          = init_opencl,
> > +    .uninit        = uninit_opencl,
> > +    .query_formats = query_formats,
> > +    .inputs        = deshake_opencl_inputs,
> > +    .outputs       = deshake_opencl_outputs,
> > +};
> > +#endif
>
> General question: is this filter supposed to be binary compatible with
> deshake? Or it just shares the same syntax?
>

Sorry, I don't get what your mean.Our idea is to make the deshake_opencl
filter can be used both in ffmpeg.exe and a filter in libavfilter.lib witch
can be used in other application program.


> > diff --git a/libavutil/Makefile b/libavutil/Makefile
> > index 544c33f..307d2f8 100644
> > --- a/libavutil/Makefile
> > +++ b/libavutil/Makefile
> > @@ -50,6 +50,8 @@ HEADERS = adler32.h
>                   \
> >
> >  HEADERS-$(CONFIG_LZO)                   += lzo.h
> >
> > +HEADERS-$(CONFIG_OPENCL)                += openclwrapper.h
> > +
> >  ARCH_HEADERS = bswap.h
>  \
> >                 intmath.h
>  \
> >                 intreadwrite.h
> \
> > @@ -104,6 +106,8 @@ OBJS-$(CONFIG_LZO)                      += lzo.o
> >
> >  OBJS += $(COMPAT_OBJS:%=../compat/%)
> >
> > +OBJS-$(CONFIG_OPENCL)                   += openclwrapper.o
> > +
> >  SKIPHEADERS          = old_pix_fmts.h
> >
> >  TESTPROGS = adler32
> \
> > diff --git a/libavutil/openclwrapper.c b/libavutil/openclwrapper.c
> > new file mode 100644
> > index 0000000..65cb460
> > --- /dev/null
> > +++ b/libavutil/openclwrapper.c
> > @@ -0,0 +1,808 @@
> > +/*
> > + * Copyright (C) 2012 Peng Gao <peng at multicorewareinc.com>
> > + * Copyright (C) 2012 Li   Cao <li at multicorewareinc.com>
> > + * Copyright (C) 2012 Wei  Gao <weigao at multicorewareinc.com>
> > + *
> > + * This file is part of FFmpeg.
> > + *
> > + * FFmpeg is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU Lesser General Public
> > + * License as published by the Free Software Foundation; either
> > + * version 2.1 of the License, or (at your option) any later version.
> > + *
> > + * FFmpeg is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > + * Lesser General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU Lesser General Public
> > + * License along with FFmpeg; if not, write to the Free Software
> > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> 02110-1301 USA
> > + */
> > +
> > +#include "openclwrapper.h"
> > +#include "avstring.h"
> > +#include "log.h"
> > +
> > +
> > +#define MAX_KERNEL_STRING_LEN   64
> > +#define MAX_CLFILE_NUM 50
> > +#define MAX_CLKERNEL_NUM 200
> > +#define MAX_CLFILE_PATH 255
> > +#define MAX_KERNEL_NUM  50
> > +#define MAX_KERNEL_NAME_LEN 64
> > +#define MAX_FILTER_NAME_LEN 64
> > +#define MAX_FILTER_NUM 200
> > +
> > +typedef struct OpenCLEnv {
> > +    cl_platform_id platform;
> > +    cl_context   context;
> > +    cl_device_id devices;
> > +    cl_command_queue command_queue;
> > +}OpenCLEnv;
> > +
> > +typedef struct GPUEnv {
> > +    //share vb in all modules in hb library
> > +    cl_platform_id platform;
> > +    cl_device_type devide_type;
> > +    cl_context context;
> > +    cl_device_id *devices;
> > +    cl_device_id  dev;
> > +    cl_command_queue command_queue;
> > +    cl_kernel kernels[MAX_CLFILE_NUM];
> > +    cl_program programs[MAX_CLFILE_NUM]; //one program object maps one
> kernel source file
> > +    char  kernel_srcfile[MAX_CLFILE_NUM][256];   //the max len of
> kernel file name is 256
> > +    int file_count; // only one kernel file
> > +
> > +    char kernel_names[MAX_CLKERNEL_NUM][MAX_KERNEL_STRING_LEN+1];
> > +    av_cl_kernel_function kernel_functions[MAX_CLKERNEL_NUM];
> > +    const char *kernel_code[MAX_CLKERNEL_NUM];
> > +    int kernel_count;
> > +    int reg_kernel_count;
> > +    int is_user_created; // 1: created , 0:no create and needed to
> create by opencl wrapper
> > +    uint8_t *temp_buffer;
> > +    int temp_buffer_size;
> > +}GPUEnv;
> > +
> > +typedef struct filter_buffer_node {
> > +    char filter_name[MAX_FILTER_NAME_LEN+1];
> > +    void *cl_inbuf;
> > +}filter_buffer_node;
>
> please keep consisten naming, this could be FilterBufferNode
>
> > +
> > +typedef struct OpenclUtils {
> > +    const AVClass *class;
> > +    int   log_offset;
> > +    void *log_ctx;
> > +} OpenclUtils;
> > +
>
> > +static const AVClass openclwrapperutils_class = { "OPENCLWRAPPERUTILS",
> av_default_item_name, NULL, LIBAVUTIL_VERSION_INT, offsetof(OpenclUtils,
> log_offset), offsetof(OpenclUtils, log_ctx) };
>
> split long line
>
> > +static OpenclUtils openclwrapperutils =
> {&openclwrapperutils_class,0,NULL};
> > +
> > +
> > +static GPUEnv gpu_env = {0};
> > +static filter_buffer_node filter_buffer[MAX_FILTER_NUM] = {{"", NULL}};
> > +static int isinited = 0;
> > +
> > +
>
> > +#define ADD_FILTER_KERNEL_CFG(s,p){\
> > +        gpu_env.kernel_code[gpu_env.kernel_count] = p;\
> > +
>  av_strlcpy(gpu_env.kernel_names[gpu_env.kernel_count],s,MAX_KERNEL_STRING_LEN+1);\
> > +        gpu_env.kernel_count++;}
> > +
> > +
> > +void ff_opencl_regist_kernel(const char *kernel_name,const char
> *kernel_code)
> > +{
> > +    ADD_FILTER_KERNEL_CFG(kernel_name,kernel_code)
> > +}
>
> avoid macros used just once, they obfuscate the code for no gain.
>
> > +
> > +static int binary_generated(cl_context context, const char *
> cl_file_name, FILE ** fhandle)
> > +{
> > +    int i = 0;
> > +    cl_int status;
> > +    size_t numDevices;
>
> num_devices
>
> > +    cl_device_id *devices;
> > +    FILE * fd = NULL;
> > +    status = clGetContextInfo(context,
> > +                              CL_CONTEXT_NUM_DEVICES,
> > +                              sizeof(numDevices),
> > +                              &numDevices,
> > +                              NULL);
> > +    if (status != CL_SUCCESS)
> > +        return AVERROR_EXTERNAL;
> > +
> > +    devices = (cl_device_id*)av_malloc(sizeof(cl_device_id) *
> numDevices);
> > +    if (!devices)
> > +        return AVERROR(ENOMEM);
> > +
> > +    /* grab the handles to all of the devices in the context. */
> > +    status = clGetContextInfo(context,
> > +                              CL_CONTEXT_DEVICES,
> > +                              sizeof(cl_device_id) * numDevices,
> > +                              devices,
> > +                              NULL);
> > +
> > +    status = 0;
> > +    /* dump out each binary into its own separate file. */
> > +    for (i = 0; i < numDevices; i++) {
>
> > +        char fileName[256] = {0};
>
> ditto
>
> > +        char cl_name[128] = {0};
> > +        if (devices[i] != 0) {
> > +            char deviceName[1024];
> > +            status = clGetDeviceInfo(devices[i],
> > +                                     CL_DEVICE_NAME,
> > +                                     sizeof(deviceName),
> > +                                     deviceName,
> > +                                     NULL);
> > +            memcpy(cl_name,cl_file_name,strlen(cl_file_name));
> > +            cl_name[strlen(cl_file_name) + 1] = '\0';
> > +            snprintf( fileName, sizeof(fileName),"./%s-%s.bin",
> cl_name, deviceName );
> > +            fd = fopen( fileName, "rb" );
> > +            status = (fd != NULL) ? 1 : 0;
> > +        }
> > +    }
> > +
> > +    if (devices)
> > +        av_free(devices);
> > +
> > +    if(fd)
> > +        *fhandle = fd;
> > +    return status;
> > +}
> > +
> > +static int write_binary_to_file(const char* fileName, const char*
> birary, size_t numBytes)
>
> birary seems a typo
>
> > +{
> > +    FILE *output = NULL;
> > +    output = fopen(fileName, "wb");
> > +    if(!output)
> > +        return 0;
> > +
> > +    fwrite( birary, sizeof(char), numBytes, output );
> > +    fclose( output );
> > +
> > +    return 1;
> > +}
>
> also: do you need a function for this? (I see that it is only called
> once).
>
>
> > +
> > +
> > +static int generat_bin_from_kernel_source(cl_program program, const
> char * cl_file_name)
> > +{
> > +    int i = 0;
> > +    cl_int status;
> > +    size_t *binarysizes, numdevices;
> > +    cl_device_id *devices;
> > +    char **binaries;
> > +    status = clGetProgramInfo(program,
> > +                              CL_PROGRAM_NUM_DEVICES,
> > +                              sizeof(numdevices),
> > +                              &numdevices,
> > +                              NULL);
> > +    if (status != CL_SUCCESS) {
> > +
>  av_log(&openclwrapperutils,AV_LOG_ERROR,"generat_bin_from_kernel_source:clGetProgramInfo
> error,status = %d\n",status);
> > +        return AVERROR_EXTERNAL;
> > +    }
>
> > +    devices = (cl_device_id*)av_malloc(sizeof(cl_device_id) *
> numdevices);
>
> useless cast, same below
>
> > +    if(!devices)
> > +        return AVERROR(ENOMEM);
> > +    /* grab the handles to all of the devices in the program. */
> > +    status = clGetProgramInfo(program,
> > +                              CL_PROGRAM_DEVICES,
> > +                              sizeof(cl_device_id) * numdevices,
> > +                              devices,
> > +                              NULL);
> > +
> > +    /* figure out the sizes of each of the binaries. */
> > +    binarysizes = (size_t*)av_malloc(sizeof(size_t) * numdevices);
> > +
> > +    status = clGetProgramInfo(program,
> > +                              CL_PROGRAM_BINARY_SIZES,
> > +                              sizeof(size_t) * numdevices,
> > +                              binarysizes, NULL);
> > +    if (status != CL_SUCCESS) {
> > +
>  av_log(&openclwrapperutils,AV_LOG_ERROR,"generat_bin_from_kernel_source:clGetProgramInfo
> error,status = %d\n",status);
> > +        return AVERROR_EXTERNAL;
> > +    }
> > +    /* copy over all of the generated binaries. */
> > +    binaries = (char**)av_malloc(sizeof(char *) * numdevices);
> > +    if(!binaries)
> > +        return AVERROR(ENOMEM);
> > +    memset(binaries,0,sizeof(char *) * numdevices);
> > +    for (i = 0; i < numdevices; i++) {
> > +        if (binarysizes[i] != 0) {
> > +            binaries[i] = (char*)av_malloc(sizeof(char) *
> binarysizes[i]);
> > +            if(!binaries[i])
> > +                return AVERROR(ENOMEM);
> > +        }
> > +    }
> > +
> > +    status = clGetProgramInfo(program,
> > +                              CL_PROGRAM_BINARIES,
> > +                              sizeof(char *) * numdevices,
> > +                              binaries,
> > +                              NULL);
> > +    if (status != CL_SUCCESS) {
> > +
>  av_log(&openclwrapperutils,AV_LOG_ERROR,"generat_bin_from_kernel_source:clGetProgramInfo
> error,status = %d\n",status);
> > +        return AVERROR_EXTERNAL;
> > +    }
> > +    /* dump out each binary into its own separate file. */
> > +    for (i = 0; i < numdevices; i++) {
> > +        char filename[256] = {0};
> > +        char cl_name[128] = {0};
> > +        if (binarysizes[i] != 0) {
> > +            char devicename[1024];
> > +            status = clGetDeviceInfo(devices[i],
> > +                                     CL_DEVICE_NAME,
> > +                                     sizeof(devicename),
> > +                                     devicename,
> > +                                     NULL);
> > +            if (status != CL_SUCCESS) {
> > +
>  av_log(&openclwrapperutils,AV_LOG_ERROR,"generat_bin_from_kernel_source:clGetDeviceInfo
> error,status = %d\n",status);
> > +                return AVERROR_EXTERNAL;
> > +            }
> > +            memcpy(cl_name, cl_file_name, strlen(cl_file_name));
> > +            cl_name[strlen(cl_file_name) + 1] = '\0';
> > +            snprintf(filename,sizeof(filename), "./%s-%s.bin", cl_name,
> devicename);
> > +
> > +            if (!write_binary_to_file( filename, binaries[i],
> binarysizes[i])) {
> > +
>  av_log(&openclwrapperutils,AV_LOG_ERROR,"generat_bin_from_kernel_source:clGetDeviceInfo
> write_binary_to_file\n");
> > +                return AVERROR_EXTERNAL;
> > +            }
> > +        }
> > +    }
> > +
> > +    // Release all resouces and memory
> > +    for (i = 0;i < numdevices;i++ ) {
> > +        if (binaries[i])
> > +            av_free(binaries[i]);
> > +    }
> > +    if (binaries)
> > +        av_free(binaries);
> > +
> > +    if (binarysizes)
> > +        av_free(binarysizes);
> > +
> > +    if (devices)
> > +        av_free(devices);
> > +    return 0;
> > +}
> > +
> > +
> > +int av_opencl_create_kernel(const char * kernelname, AV_KernelEnv * env)
> > +{
> > +    int status;
> > +    env->kernel = clCreateKernel(gpu_env.programs[0], kernelname,
> &status);
> > +    env->context = gpu_env.context;
> > +    env->command_queue = gpu_env.command_queue;
>
> > +    if (status != CL_SUCCESS) {
> > +        av_log(&openclwrapperutils,AV_LOG_ERROR,"status = %d\n",status);
>
> Error messages should be intelligible, reporting just a status is not
> enough.
>

There are a lot of statuses,and you can find the status in CL.h(this file
is the OpenCL SDK),so I think just return the status then you can find the
error.Also if you think it should return the error exactly as string, I
will write a function to get the error string.


> > +    }
> > +    return status != CL_SUCCESS ? 1 : 0;
> > +}
> > +
> > +int av_opencl_release_kernel(AV_KernelEnv * env)
> > +{
> > +    int status = clReleaseKernel(env->kernel);
> > +    return status != CL_SUCCESS ? 1 : 0;
> > +}
> > +
> > +
> > +static int init_opencl_env(GPUEnv *gpu_info,void *ext_opencl_info)
> > +{
> > +    size_t length;
> > +    cl_int status;
> > +    cl_uint numplatforms, numdevices;
> > +    cl_platform_id *platforms;
> > +    cl_context_properties cps[3];
> > +    char platform_name[100];
> > +    unsigned int i;
> > +    AV_ExtOpenCLInfo *opencl_info = (AV_ExtOpenCLInfo *)ext_opencl_info;
> > +    if (opencl_info) {
> > +        if(gpu_info->is_user_created)
> > +            return 1;
> > +        gpu_info->platform = opencl_info->platform;
> > +        gpu_info->is_user_created = 1;
> > +        gpu_info->command_queue = opencl_info->command_queue;
> > +        gpu_info->context = opencl_info->context;
> > +        gpu_info->devices = opencl_info->devices;
> > +        gpu_info->dev = opencl_info->dev;
> > +        gpu_info->devide_type = opencl_info->devide_type;
> > +    } else {
> > +        if (!gpu_info->is_user_created) {
> > +            status = clGetPlatformIDs(0,NULL,&numplatforms);
> > +            if (status != CL_SUCCESS) {
> > +                return 1;
> > +            }
> > +            gpu_info->platform = NULL;
> > +            if (0 < numplatforms) {
> > +                platforms = (cl_platform_id*)av_malloc(
> > +                    numplatforms * sizeof(cl_platform_id));
> > +                if (platforms == (cl_platform_id*)NULL) {
> > +                    return 1;
> > +                }
> > +                status = clGetPlatformIDs(numplatforms, platforms,
> NULL);
> > +                if (status != CL_SUCCESS) {
> > +                    return 1;
> > +                }
> > +                for (i = 0; i < numplatforms; i++) {
> > +                    status = clGetPlatformInfo(platforms[i],
> CL_PLATFORM_VENDOR,
> > +                                               sizeof(platform_name),
> platform_name,
> > +                                               NULL);
> > +
> > +                    if ( status != CL_SUCCESS ) {
> > +                        return 1;
> > +                    }
> > +                    gpu_info->platform = platforms[i];
> > +                    av_free(gpu_info->devices);
> > +                    gpu_info->devices = NULL;
> > +                    status = clGetDeviceIDs(gpu_info->platform /*
> platform */,
> > +                                            CL_DEVICE_TYPE_GPU /*
> device_type */,
> > +                                            0 /* num_entries */,
> > +                                            NULL /* devices */,
> > +                                            &numdevices);
> > +
> > +                    if (0 == numdevices) {
> > +                        //find CPU device
> > +                        status = clGetDeviceIDs( gpu_info->platform /*
> platform */,
> > +                                             CL_DEVICE_TYPE_CPU /*
> device_type */,
> > +                                             0 /* num_entries */,
> > +                                             NULL /* devices */,
> > +                                             &numdevices );
> > +                    }
> > +                    if (status != CL_SUCCESS)
> > +                       return 1;
> > +
> > +                    if(numdevices)
> > +                       break;
> > +
> > +                }
> > +                av_free(platforms);
> > +            }
> > +            if (!gpu_info->platform) {
> > +                return 1;
> > +            }
> > +
> > +            /*
> > +             * Use available platform.
> > +             */
> > +            av_log(&openclwrapperutils,AV_LOG_INFO,"Platform Name:
> %s\n",platform_name);
> > +            cps[0] = CL_CONTEXT_PLATFORM;
> > +            cps[1] = (cl_context_properties)gpu_info->platform;
> > +            cps[2] = 0;
> > +            /* Check for GPU. */
> > +            gpu_info->devide_type = CL_DEVICE_TYPE_GPU;
> > +            gpu_info->context = clCreateContextFromType(
> > +                cps, gpu_info->devide_type, NULL, NULL, &status );
> > +            if ((gpu_info->context == (cl_context)NULL) || (status !=
> CL_SUCCESS)) {
> > +                gpu_info->devide_type = CL_DEVICE_TYPE_CPU;
> > +                gpu_info->context = clCreateContextFromType(
> > +                    cps, gpu_info->devide_type, NULL, NULL, &status );
> > +            }
> > +            if ((gpu_info->context == (cl_context)NULL) || (status !=
> CL_SUCCESS)) {
> > +                gpu_info->devide_type = CL_DEVICE_TYPE_DEFAULT;
> > +                gpu_info->context = clCreateContextFromType(
> > +                    cps, gpu_info->devide_type, NULL, NULL, &status );
> > +            }
> > +            if ((gpu_info->context == (cl_context)NULL) || (status !=
> CL_SUCCESS)) {
> > +                return 1;
> > +            }
> > +            /* Detect OpenCL devices. */
> > +            /* First, get the size of device list data */
> > +            status = clGetContextInfo(gpu_info->context,
> CL_CONTEXT_DEVICES,
> > +                                      0, NULL, &length);
> > +            if ((status != CL_SUCCESS) || (length == 0)) {
> > +                return 1;
> > +            }
> > +            /* Now allocate memory for device list based on the size we
> got earlier */
> > +            gpu_info->devices = (cl_device_id*)av_malloc( length );
> > +            if (gpu_info->devices == (cl_device_id*)NULL) {
> > +                return 1;
> > +            }
> > +            /* Now, get the device list data */
> > +            status = clGetContextInfo(gpu_info->context,
> CL_CONTEXT_DEVICES, length,
> > +                                      gpu_info->devices, NULL);
> > +            if (status != CL_SUCCESS) {
> > +                return 1;
> > +            }
> > +            /* Create OpenCL command queue. */
> > +            gpu_info->command_queue =
> clCreateCommandQueue(gpu_info->context,
> > +
> gpu_info->devices[0],
> > +                                                           0, &status);
> > +            if (status != CL_SUCCESS)
> > +                return 1;
> > +        }
> > +    }
> > +    return 0;
> > +}
> > +
> > +
> > +static int release_opencl_env( GPUEnv *gpu_info )
> > +{
> > +    int i, status;
> > +    if (!isinited)
> > +        return 1;
> > +    gpu_info->reg_kernel_count--;
> > +    if (!gpu_info->reg_kernel_count) {
> > +        for (i = 0; i<gpu_env.file_count; i++) {
> > +            if (gpu_env.programs[i]) {
> > +                status = clReleaseProgram(gpu_env.programs[i]);
> > +                if (status != CL_SUCCESS) {
> > +
>  av_log(&openclwrapperutils,AV_LOG_ERROR,"release_opencl_env:clReleaseProgram
> status = %d\n",status);
> > +                }
> > +                gpu_env.programs[i] = NULL;
> > +            }
> > +        }
> > +        if (gpu_env.command_queue) {
> > +            clReleaseCommandQueue(gpu_env.command_queue);
> > +            gpu_env.command_queue = NULL;
> > +        }
> > +        if (gpu_env.context) {
> > +            clReleaseContext(gpu_env.context);
> > +            gpu_env.context = NULL;
> > +        }
> > +        isinited = 0;
> > +        gpu_info->is_user_created = 0;
> > +    }
> > +    return 1;
> > +}
> > +
> > +
> > +int av_opencl_register_kernel_wrapper(const char *kernel_name,
> av_cl_kernel_function function)
> > +{
> > +    for (int i = 0; i < gpu_env.kernel_count; i++) {
> > +        if (av_strcasecmp(kernel_name, gpu_env.kernel_names[i])==0) {
> > +            gpu_env.kernel_functions[i] = function;
> > +            gpu_env.reg_kernel_count++;
> > +            return 1;
> > +        }
> > +    }
> > +    return 0;
> > +}
> > +
> > +static int cached_of_kerner_prg(const GPUEnv *gpu_env, const char *
> cl_file_name)
> > +{
> > +    for (int i = 0; i < gpu_env->file_count; i++) {
> > +        if (av_strcasecmp(gpu_env->kernel_srcfile[i], cl_file_name)==0)
> {
> > +            if(gpu_env->programs[i])
> > +                return 1;
> > +        }
> > +    }
> > +
> > +    return 0;
> > +}
> > +
> > +static int compile_kernel_file(const char *filename, GPUEnv *gpu_info,
> > +                            int indx, const char *build_option)
> > +{
> > +    cl_int status;
> > +    size_t length;
> > +    char *source_str;
> > +    const char *source;
> > +    size_t source_size[1];
> > +    char *buildLog = NULL;
> > +    int b_error, binary_status, binary_existed;
> > +    char * binary;
> > +    char *temp;
> > +    size_t numdevices;
> > +    cl_device_id *devices;
> > +    FILE * fd;
> > +    FILE * fd1;
> > +    int idx;
> > +    int kernel_src_size = 0;
> > +    if (cached_of_kerner_prg(gpu_info, filename) == 1)
> > +        return 1;
> > +
> > +    idx = gpu_info->file_count;
> > +    for (int i = 0;i < gpu_env.kernel_count;i++) {
> > +        kernel_src_size += strlen(gpu_env.kernel_code[i]);
> > +    }
> > +    source_str = (char*)av_malloc(kernel_src_size + 2);
> > +    temp = source_str;
> > +    memset(source_str,0x00,kernel_src_size + 2);
> > +    for (int i = 0;i < gpu_env.kernel_count;i++) {
> > +
>  memcpy(temp,gpu_env.kernel_code[i],strlen(gpu_env.kernel_code[i]));
> > +        temp += strlen(gpu_env.kernel_code[i]);
> > +    }
> > +    source = source_str;
> > +    source_size[0] = strlen(source);
> > +
> > +    binary_existed = 0;
> > +    if ((binary_existed = binary_generated(gpu_info->context, filename,
> &fd)) == 1) {
> > +        status = clGetContextInfo(gpu_info->context,
> > +                                  CL_CONTEXT_NUM_DEVICES,
> > +                                  sizeof(numdevices),
> > +                                  &numdevices,
> > +                                  NULL);
> > +        if(status != CL_SUCCESS)
> > +            return 0;
> > +
> > +        devices = (cl_device_id*)av_malloc(sizeof(cl_device_id) *
> numdevices);
> > +        if(!devices)
> > +            return 0;
> > +
> > +        b_error = 0;
> > +        length = 0;
> > +        b_error |= fseek( fd, 0, SEEK_END ) < 0;
> > +        b_error |= ( length = ftell( fd ) ) <= 0;
> > +        b_error |= fseek( fd, 0, SEEK_SET ) < 0;
> > +        if(b_error)
> > +            return 0;
> > +        binary = (char*)av_malloc(length);
> > +        if(!binary)
> > +            return 0;
> > +
> > +        memset(binary, 0, length);
> > +        b_error |= fread( binary, 1, length, fd ) != length;
> > +        fclose( fd );
> > +        fd = NULL;
> > +        /* grab the handles to all of the devices in the context. */
> > +        status = clGetContextInfo(gpu_info->context,
> > +                                  CL_CONTEXT_DEVICES,
> > +                                  sizeof(cl_device_id) * numdevices,
> > +                                  devices,
> > +                                  NULL);
> > +
> > +        gpu_info->programs[idx] =
> clCreateProgramWithBinary(gpu_info->context,
> > +                                                            numdevices,
> > +                                                            devices,
> > +                                                            &length,
> > +                                                            (const
> uint8_t**)&binary,
> > +
>  &binary_status,
> > +                                                            &status );
> > +
> > +        av_freep(devices);
> > +    } else {
> > +
> > +        /* create a CL program using the kernel source */
> > +        gpu_info->programs[idx] =
> clCreateProgramWithSource(gpu_info->context,
> > +                                                            1,
> > +                                                            &source,
> > +                                                            source_size,
> > +                                                            &status);
> > +    }
> > +
> > +    if ((gpu_info->programs[idx] == (cl_program)NULL) || (status !=
> CL_SUCCESS))
> > +        return 0;
> > +
> > +
> > +    /* create a cl program executable for all the devices specified */
> > +    if (!gpu_info->is_user_created)
> > +        status = clBuildProgram( gpu_info->programs[idx], 1,
> gpu_info->devices,
> > +                                 build_option, NULL, NULL );
> > +    else
> > +        status = clBuildProgram( gpu_info->programs[idx], 1,
> &(gpu_info->dev),
> > +                                 build_option, NULL, NULL );
> > +
> > +    if (status != CL_SUCCESS) {
> > +        if (!gpu_info->is_user_created)
> > +            status = clGetProgramBuildInfo(gpu_info->programs[idx],
> > +                                           gpu_info->devices[0],
> > +                                           CL_PROGRAM_BUILD_LOG, 0,
> NULL, &length );
> > +        else
> > +            status = clGetProgramBuildInfo(gpu_info->programs[idx],
> > +                                           gpu_info->dev,
> > +                                           CL_PROGRAM_BUILD_LOG, 0,
> NULL, &length );
> > +
> > +        if (status != CL_SUCCESS) {
> > +            av_log(&openclwrapperutils,AV_LOG_ERROR,"opencl create
> build log fail\n");
> > +            return 0;
> > +        }
> > +        buildLog = (char*)av_malloc( length );
> > +        if (buildLog == (char*)NULL) {
> > +            return 0;
> > +        }
> > +        if (!gpu_info->is_user_created)
> > +            status = clGetProgramBuildInfo( gpu_info->programs[idx],
> gpu_info->devices[0],
> > +                                            CL_PROGRAM_BUILD_LOG,
> length, buildLog, &length );
> > +        else
> > +            status = clGetProgramBuildInfo( gpu_info->programs[idx],
> gpu_info->dev,
> > +                                            CL_PROGRAM_BUILD_LOG,
> length, buildLog, &length );
> > +
> > +        fd1 = fopen( "kernel-build.log", "w+" );
> > +        if (fd1) {
> > +            fwrite(buildLog, sizeof(char), length, fd1);
> > +            fclose(fd1);
> > +        }
> > +        av_free(buildLog);
> > +        return 0;
> > +    }
> > +    av_strlcpy(gpu_env.kernel_srcfile[idx],filename,256);
> > +    if (binary_existed == 0)
> > +        generat_bin_from_kernel_source(gpu_env.programs[idx], filename);
> > +    gpu_info->file_count += 1;
> > +    av_free(source_str);
> > +    return 1;
> > +}
> > +
> > +
> > +static int get_kernel_env_and_func(const char *kernel_name,
> > +                                AV_KernelEnv *env,
> > +                                av_cl_kernel_function *function)
> > +{
> > +    for (int i = 0; i < gpu_env.kernel_count; i++) {
> > +        if (av_strcasecmp(kernel_name, gpu_env.kernel_names[i])==0) {
> > +            //program_idx = 0;
> > +            //GetProgramIndex(i, &gpu_env, &program_idx);
> > +            env->context = gpu_env.context;
> > +            env->command_queue = gpu_env.command_queue;
> > +            env->program = gpu_env.programs[0];
> > +            env->kernel = gpu_env.kernels[i];
> > +            *function = gpu_env.kernel_functions[i];
> > +            return 1;
> > +        }
> > +    }
> > +    return 0;
> > +}
> > +
> > +
> > +int av_opencl_run_kernel(const char *kernel_name, void **userdata)
> > +{
> > +    AV_KernelEnv env;
> > +    av_cl_kernel_function function;
> > +    int status;
> > +    memset(&env, 0, sizeof(AV_KernelEnv));
> > +    status = get_kernel_env_and_func(kernel_name, &env, &function);
> > +    av_strlcpy(env.kernel_name,kernel_name,150);
> > +    if (status == 1) {
> > +        return(function(userdata, &env));
> > +    }
> > +    return 0;
> > +}
> > +
> > +int av_opencl_init_run_env( int argc, char **argv, const char
> *build_option,void *ext_opencl_info)
> > +{
> > +    int status = 0;
> > +    if (MAX_CLKERNEL_NUM <= 0)
> > +        return 1;
> > +    if ((argc > MAX_CLFILE_NUM) || (argc<0))
> > +        return 1;
> > +    if (!isinited) {
> > +        /*initialize devices, context, comand_queue*/
> > +        status = init_opencl_env(&gpu_env,ext_opencl_info);
> > +        if (status) {
> > +            av_log(&openclwrapperutils,AV_LOG_ERROR,"init_opencl_env
> Failed\n");
> > +            return 1;
> > +        }
> > +        /*initialize program, kernel_name, kernel_count*/
> > +        //file_name = argv[i];
> > +        status = compile_kernel_file("ffmpeg-kernels", &gpu_env, 0,
> build_option);
> > +
> > +        if (status == 0 || gpu_env.kernel_count == 0) {
> > +
>  av_log(&openclwrapperutils,AV_LOG_ERROR,"compile_kernel_file Failed status
> = %d,kernel_count = %d\n",status,gpu_env.kernel_count);
> > +            return 1;
> > +        }
> > +        isinited = 1;
> > +    }
> > +    return 0;
> > +}
> > +
> > +
> > +int av_opencl_release_opencl_run_env(void)
> > +{
> > +    return release_opencl_env(&gpu_env);
> > +}
> > +
> > +
> > +int av_opencl_stats(void)
> > +{
> > +    return isinited;
> > +}
> > +
> > +int av_opencl_get_kernel_env(AV_KernelEnv *env)
> > +{
> > +    env->context = gpu_env.context;
> > +    env->command_queue = gpu_env.command_queue;
> > +    env->program = gpu_env.programs[0];
> > +    return 0;
> > +}
> > +
> > +int av_opencl_create_buffer(void **cl_Buf,int flags,int size,void
> *host_ptr)
> > +{
> > +    int status;
> > +    *cl_Buf = clCreateBuffer(gpu_env.context, (flags), (size),
> host_ptr, &status);
> > +
> > +    if (status != CL_SUCCESS) {
> > +        av_log(&openclwrapperutils,AV_LOG_ERROR, "clCreateBuffer error
> '%d'\n", status );
> > +        return AVERROR_EXTERNAL;
> > +    }
> > +    return 0;
> > +}
> > +
> > +void av_opencl_release_buffer(void *cl_Buf)
> > +{
> > +    clReleaseMemObject(cl_Buf);
> > +}
> > +
> > +int av_opencl_read_cl_buffer(void *cl_inBuf,uint8_t *outbuf,int size)
> > +{
> > +    int status;
> > +    void *mapped = clEnqueueMapBuffer(gpu_env.command_queue, cl_inBuf,
>  CL_TRUE,CL_MAP_READ, 0, sizeof(uint8_t) * size, 0, NULL, NULL, &status);
> > +
> > +    if (status != CL_SUCCESS) {
> > +        av_log(&openclwrapperutils,AV_LOG_ERROR,
> "av_opencl_read_cl_buffer error '%d'\n", status );
> > +        return 0;
> > +    }
> > +    memcpy(outbuf,mapped,sizeof(uint8_t) * size);
> > +
> > +    status = clEnqueueUnmapMemObject(gpu_env.command_queue, cl_inBuf,
> mapped, 0, NULL, NULL);
> > +    if (status != CL_SUCCESS) {
> > +        av_log(&openclwrapperutils,AV_LOG_ERROR,
> "av_opencl_read_cl_buffer error '%d'\n", status);
> > +        return 0;
> > +    }
> > +    return 1;
> > +}
> > +int av_opencl_write_cl_buffer(void *cl_inBuf, uint8_t *Ybuf, uint8_t
> *Ubuf, uint8_t *Vbuf, int linesize0, int linesize1, int linesize2, int
> height, int offset)
> > +{
> > +    int chrH = -(-height >> 1);
> > +    int buffersize = sizeof(uint8_t) * (linesize0 * height + linesize1
> * chrH * 2);
> > +
> > +    void *mapped = clEnqueueMapBuffer(gpu_env.command_queue, cl_inBuf,
>  CL_TRUE,CL_MAP_WRITE, 0, buffersize + offset, 0, NULL, NULL, NULL);
> > +    uint8_t *temp = (uint8_t *)mapped;
> > +    temp += offset;
> > +    memcpy(temp,Ybuf,sizeof(uint8_t) * linesize0 * height);
> > +    memcpy(temp + sizeof(uint8_t) * linesize0 *
> height,Ubuf,sizeof(uint8_t) * linesize1 * chrH);
> > +    memcpy(temp + sizeof(uint8_t) * (linesize0 * height + linesize1 *
> chrH),Vbuf,sizeof(uint8_t) * linesize2 * chrH);
> > +    clEnqueueUnmapMemObject(gpu_env.command_queue, cl_inBuf, mapped, 0,
> NULL, NULL );
> > +    return 1;
> > +}
> > +
> > +cl_device_id av_opencl_get_device_id(void)
> > +{
> > +    return *(gpu_env.devices);
> > +}
> > +
> > +cl_context av_opencl_get_context(void)
> > +{
> > +    return gpu_env.context;
> > +}
> > +cl_command_queue av_opencl_get_command_queue(void)
> > +{
> > +    return gpu_env.command_queue;
> > +}
>
> > +int av_opencl_read_to_frame_buffer(void *cl_inBuf, uint8_t *Ybuf,
> uint8_t *Ubuf, uint8_t *Vbuf, int linesize0, int linesize1, int linesize2,
> int height)
>
> ditto about style, avoid long lines and mixed camelCase and c_style
> variables
>
> > +{
> > +
> > +    int chrH = -(-height >> 1);
> > +    int size = sizeof(uint8_t) * (linesize0 * height + linesize1 * chrH
> * 2);
>
> sizeof(uint8_t) = 1
>
> > +    if (!(gpu_env.temp_buffer)) {
> > +        gpu_env.temp_buffer = (uint8_t *)av_malloc(size);
> > +        gpu_env.temp_buffer_size = size;
> > +    }
> > +
> > +    if(size > gpu_env.temp_buffer_size) {
> > +        av_free(gpu_env.temp_buffer);
> > +        gpu_env.temp_buffer = (uint8_t *)av_malloc(size);
> > +        gpu_env.temp_buffer_size = size;
> > +    }
> > +
> > +    if
> (av_opencl_read_cl_buffer(cl_inBuf,gpu_env.temp_buffer,sizeof(uint8_t)*(linesize0
> + linesize1)*height)) {
> > +        memcpy(Ybuf,gpu_env.temp_buffer,linesize0 * height);
> > +        memcpy(Ubuf,gpu_env.temp_buffer + linesize0 * height,linesize1
> *chrH);
> > +        memcpy(Vbuf,gpu_env.temp_buffer + linesize0 * height +
> linesize1 * chrH,linesize2 * chrH);
> > +    }
> > +    return 1;
> > +}
> > +void av_opencl_save_buffer(const char *filtername,void *cl_inbuf)
> > +{
> > +    int i = 0;
> > +    while (strlen(filter_buffer[i].filter_name)) {
> > +        i++;
> > +    }
> > +    if (i > (MAX_FILTER_NUM - 1)) {
> > +        av_log(&openclwrapperutils,AV_LOG_ERROR,"filter num is too
> large\n");
> > +        return;
> > +    }
> > +    if(strlen(filtername) > MAX_FILTER_NAME_LEN) {
> > +        av_log(&openclwrapperutils,AV_LOG_ERROR,"filter name is too
> long\n");
> > +        return;
> > +    }
> > +
>  av_strlcpy(filter_buffer[i].filter_name,filtername,MAX_FILTER_NAME_LEN+1);
> > +    filter_buffer[i].cl_inbuf = cl_inbuf;
> > +    return;
>
> aren't you supposed to return an error in case of too long buffers?
>

This functions is used to save the OpenCL memory pointer and if the follow
filter is OpenCL filter, it can get the OpenCL buffer as input, it can
avoid copy data between GPU and CPU.So the "cl_inbuf" is the buffer
per-OpenCL filter has created,this function just save the pointer.


>
> > +}
> > +void *av_opencl_get_buffer(const char *filtername)
> > +{
> > +    for (int i = 0;i < MAX_FILTER_NUM;i++) {
> > +        if (!strcmp(filtername,filter_buffer[i].filter_name))
> > +            return filter_buffer[i].cl_inbuf;
> > +    }
> > +    return NULL;
> > +}
> > +
> > diff --git a/libavutil/openclwrapper.h b/libavutil/openclwrapper.h
> > new file mode 100644
> > index 0000000..a3ac8c2
> > --- /dev/null
> > +++ b/libavutil/openclwrapper.h
> > @@ -0,0 +1,198 @@
> > +/*
> > + * Copyright (C) 2012 Peng Gao <peng at multicorewareinc.com>
> > + * Copyright (C) 2012 Li   Cao <li at multicorewareinc.com>
> > + * Copyright (C) 2012 Wei  Gao <weigao at multicorewareinc.com>
> > + *
> > + * This file is part of FFmpeg.
> > + *
> > + * FFmpeg is free software; you can redistribute it and/or
> > + * modify it under the terms of the GNU Lesser General Public
> > + * License as published by the Free Software Foundation; either
> > + * version 2.1 of the License, or (at your option) any later version.
> > + *
> > + * FFmpeg is distributed in the hope that it will be useful,
> > + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> > + * Lesser General Public License for more details.
> > + *
> > + * You should have received a copy of the GNU Lesser General Public
> > + * License along with FFmpeg; if not, write to the Free Software
> > + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> 02110-1301 USA
> > + */
> > +
> > +#include "config.h"
> > +
> > +#ifndef LIBAVUTIL_OPENCLWRAPPER_H
> > +#define LIBAVUTIL_OPENCLWRAPPER_H
> > +
> > +#include <CL/cl.h>
> > +
>
> > +#define AV_OPENCLCHECK(method, ...)\
> > +    status = method( __VA_ARGS__ ); if( status != CL_SUCCESS ) {\
> > +        av_log(NULL,AV_LOG_ERROR, " error %s %d\n", # method, status );
>  return status; }
> > +
> > +#define AV_OPENCLCHECK_SET_KERNEL_ARG(arg_size,arg_ptr) AV_OPENCLCHECK(
> clSetKernelArg,kernel,(arg_no++),(arg_size),(void*)(arg_ptr))
>
> avoid nested macros
>
> > +
> > +#define FF_OPENCL_KERNEL( ... )# __VA_ARGS__
> > +
> > +typedef struct AV_KernelEnv {
> > +    cl_context context;
> > +    cl_command_queue command_queue;
> > +    cl_program program;
> > +    cl_kernel kernel;
> > +    char kernel_name[150];
> > +}AV_KernelEnv;
>
> AVKernelEnv?
>
> > +
> > +typedef struct AV_ExtOpenCLInfo {
> > +    cl_platform_id platform;
>
> > +    cl_device_type devide_type;
>
> typo?
>
> > +    cl_context context;
> > +    cl_device_id *devices;
>
> devices_id?
>
> > +    cl_device_id  dev;
> > +    cl_command_queue command_queue;
> > +    char *platformName;
>
> camelCase
>
> > +}AV_ExtOpenCLInfo;
>
> AVExtOpenCLInfo
>
> also what's "Ext" for?
>

Let me explain it. There are two cases to use OpenCL filters, the first one
is just as ffmpeg.exe,the application program doesn't create OpenCL
 environment but it want to use OpenCL filter in ffmpeg or
libavfilter.lib,under this condition,libavfilter(ffmpeg) should create the
OpenCL filter it self,so it doesn't use AVExtOpenCLInfo.The other one is
that the application program has created the OpenCL environment, under this
condition, libavfilter(ffmpeg) may use the OpenCL environment wtich
application program created,so the application program just set the
AVExtOpenCLInfo and pass it two the API "av_opencl_init_run_env",
libavfilter will use it to init opencl filter.


> > +
>
> > +
> > +/**
> > + * user defined, this is function wrapper which is used to set the
> input parameters.
> > + * luanch kernel and copy data from GPU to CPU or CPU to GPU.
>
> typo
>
> > + */
> > +
> > +typedef int (*av_cl_kernel_function)(void **userdata, AV_KernelEnv
> *kenv);
> > +
> > +
>
> > +/**
> > + * register a wapper for running the kernel specified by the kernel
> name.
>
> typo
>
> > + *
> > + */
> > +
> > +int av_opencl_register_kernel_wrapper(const char *kernel_name,
> av_cl_kernel_function function);
>
> > +/**
> > + *launch kernel , user call this function to luanch kernel.
> > + *@param kernel_name: this kernel name is used to find the kernel in
> opencl runtime environment.
> > + *@param userdata: this userdata is the all parameters for running the
> kernel specified by kernel name
> > + */
> > +int av_opencl_run_kernel(const char *kernel_name, void **userdata);
>
> Please fix spacing and casing. Correct style is:
>
> /**
>  * Launch OpenCL kernel.
>  *
>  * @param kernel_name  name used to find the kernel in then OpenCL runtime
> environment
>  * @param userdata     parameters used for running the kernel specified by
> kernel_name
>  * @return ...
>  */
> int av_opencl_run_kernel(const char *kernel_name, void **userdata);
>
> > +
> > +/**
> > + * init the run time environment , this function must be called befor
> calling any function related to opencl.
> > + *the argc must be set zero , argv must be set NULL, build_option is
> the options for build the kernel.
> > + *
> > + */
> > +
> > +int av_opencl_init_run_env(int argc, char **argv, const char
> *build_option,void *extOpenCLInfo);
> > +
> > +/**
> > + * relase all resource about the opencl , this function must be called
> after calling any functions related to opencl.
> > + */
> > +
> > +int av_opencl_release_opencl_run_env(void);
> > +/**
> > + *  get the opencl status , 0: not init ; 1, inited; this function is
> used the check whether or not the opencl run time has been created.
> > + *
> > + */
> > +int av_opencl_stats(void);
> > +
> > +
> > +/**
> > + * create kernel object  by a kernel name on the specified opencl run
> time indicated by env parameter.
> > + *
> > + */
> > +
> > +int av_opencl_create_kernel(const char * kernelname, AV_KernelEnv *
> env);
> > +
> > +/**
> > + *  release kernel object which is generated by calling the
> hb_create_kernel api.
> > + *
> > + */
> > +int av_opencl_release_kernel(AV_KernelEnv * env);
> > +
> > +/**
> > + *  get the kernel environment.
> > + *
> > + */
> > +
> > +int av_opencl_get_kernel_env(AV_KernelEnv *env);
> > +
> > +/**
> > + *  create opencl buffer.
> > + *
> > + */
> > +
> > +
> > +int av_opencl_create_buffer(void **cl_Buf, int flags, int size,void
> *host_ptr);
> > +
> > +/**
> > + *  read opencl buffer data to memory.
> > + *
> > + */
> > +int av_opencl_read_cl_buffer(void *cl_inBuf, uint8_t *outbuf, int size);
> > +
> > +/**
> > + *  write data from memroy to opencl buffer.
> > + *
> > + */
> > +
> > +int av_opencl_write_cl_buffer(void *cl_inBuf, uint8_t *Ybuf, uint8_t
> *Ubuf, uint8_t *Vbuf, int linesize0, int linesize1, int linesize2, int
> height, int offset);
> > +
> > +/**
> > + *  get opencl device id.
> > + *
> > + */
> > +
> > +cl_device_id av_opencl_get_device_id(void);
> > +
> > +/**
> > + *  get opencl context.
> > + *
> > + */
> > +
> > +cl_context av_opencl_get_context(void);
> > +
> > +/**
> > + *  get opencl command queue.
> > + *
> > + */
> > +
> > +cl_command_queue av_opencl_get_command_queue(void);
> > +
> > +/**
> > + *  release opencl buffer.
> > + *
> > + */
> > +
> > +void av_opencl_release_buffer(void *cl_Buf);
> > +
> > +/**
> > + *  read frame data form opencl buffer to frame buffer.
> > + *
> > + */
> > +
> > +int av_opencl_read_to_frame_buffer(void *cl_inBuf, uint8_t *Ybuf,
> uint8_t *Ubuf, uint8_t *Vbuf, int linesize0, int linesize1, int linesize2,
> int height);
> > +
> > +/**
> > + *  save opencl buffer as share buffer.
> > + *
> > + */
> > +
> > +void av_opencl_save_buffer(const char *filtername,void *cl_inbuf);
> > +
> > +/**
> > + *  get the opencl share buffer.
> > + *
> > + */
> > +
> > +void *av_opencl_get_buffer(const char *filtername);
> > +
> > +/**
> > + *  regist kernels.
> > + *
> > + */
> > +
> > +
> > +void ff_opencl_regist_kernel(const char *kernel_name,const char
> *kernel_code);
>
> mixed ff_ and av_
>
> I suggest to move the code to an internal header in libavfilter, this
> can be exported later to libavutil if there is the need.
> --
> FFmpeg = Forgiving and Fanciful Miracolous Pure Elected Game
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel at ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>


More information about the ffmpeg-devel mailing list