[FFmpeg-devel] [PATCH 1/2] libavutil/libavfilter: opencl wrapper based on comments on 20130326

Michael Niedermayer michaelni at gmx.at
Tue Mar 26 23:48:55 CET 2013


On Tue, Mar 26, 2013 at 06:55:05PM +0800, Wei Gao wrote:
> 

>  configure          |    4 
>  libavutil/Makefile |    3 
>  libavutil/opencl.c |  653 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>  libavutil/opencl.h |  219 +++++++++++++++++
>  4 files changed, 879 insertions(+)
> 0aff9bdd8d853bf2cab9fd00fad14c9b9db8fbc7  0001-opencl-wrapper-based-on-comments-on-20130326.patch
> From f91df6a8315a1b7bdc7b69517831fc745fcbd4fd Mon Sep 17 00:00:00 2001
> From: highgod0401 <highgod0401 at gmail.com>
> Date: Tue, 26 Mar 2013 18:43:00 +0800
> Subject: [PATCH 1/2] opencl wrapper based on comments on 20130326
> 
> ---
>  configure          |   4 +
>  libavutil/Makefile |   3 +
>  libavutil/opencl.c | 653 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>  libavutil/opencl.h | 219 ++++++++++++++++++
>  4 files changed, 879 insertions(+)
>  create mode 100644 libavutil/opencl.c
>  create mode 100644 libavutil/opencl.h
> 
> diff --git a/configure b/configure
> index 8443db4..9c42a85 100755
> --- a/configure
> +++ b/configure
> @@ -233,6 +233,7 @@ External library support:
>    --enable-libxvid         enable Xvid encoding via xvidcore,
>                             native MPEG-4/Xvid encoder exists [no]
>    --enable-openal          enable OpenAL 1.1 capture support [no]
> +  --enable-opencl          enable OpenCL code
>    --enable-openssl         enable openssl [no]
>    --enable-x11grab         enable X11 grabbing [no]
>    --enable-zlib            enable zlib [autodetect]
> @@ -1178,6 +1179,7 @@ EXTERNAL_LIBRARY_LIST="
>      libxavs
>      libxvid
>      openal
> +    opencl
>      openssl
>      x11grab
>      zlib
> @@ -3982,6 +3984,7 @@ enabled openal     && { { for al_libs in "${OPENAL_LIBS}" "-lopenal" "-lOpenAL32
>                          die "ERROR: openal not found"; } &&
>                        { check_cpp_condition "AL/al.h" "defined(AL_VERSION_1_1)" ||
>                          die "ERROR: openal must be installed and version must be 1.1 or compatible"; }
> +enabled opencl     && require2 opencl CL/cl.h clEnqueueNDRangeKernel -lOpenCL
>  enabled openssl    && { check_lib openssl/ssl.h SSL_library_init -lssl -lcrypto ||
>                          check_lib openssl/ssl.h SSL_library_init -lssl32 -leay32 ||
>                          check_lib openssl/ssl.h SSL_library_init -lssl -lcrypto -lws2_32 -lgdi32 ||
> @@ -4350,6 +4353,7 @@ echo "network support           ${network-no}"
>  echo "threading support         ${thread_type-no}"
>  echo "safe bitstream reader     ${safe_bitstream_reader-no}"
>  echo "SDL support               ${sdl-no}"
> +echo "opencl enabled            ${opencl-no}"
>  echo "texi2html enabled         ${texi2html-no}"
>  echo "perl enabled              ${perl-no}"
>  echo "pod2man enabled           ${pod2man-no}"
> diff --git a/libavutil/Makefile b/libavutil/Makefile
> index 103ce5e..6375e10 100644
> --- a/libavutil/Makefile
> +++ b/libavutil/Makefile
> @@ -52,6 +52,8 @@ HEADERS = adler32.h                                                     \
>  
>  HEADERS-$(CONFIG_LZO)                   += lzo.h
>  
> +HEADERS-$(CONFIG_OPENCL)                += opencl.h
> +
>  ARCH_HEADERS = bswap.h                                                  \
>                 intmath.h                                                \
>                 intreadwrite.h                                           \
> @@ -115,6 +117,7 @@ SKIPHEADERS-$(HAVE_MACHINE_RW_BARRIER)          += atomic_suncc.h
>  SKIPHEADERS-$(HAVE_MEMORYBARRIER)               += atomic_win32.h
>  SKIPHEADERS-$(HAVE_SYNC_VAL_COMPARE_AND_SWAP)   += atomic_gcc.h
>  
> +OBJS-$(CONFIG_OPENCL)                   += opencl.o
>  TESTPROGS = adler32                                                     \
>              aes                                                         \
>              atomic                                                      \
> diff --git a/libavutil/opencl.c b/libavutil/opencl.c
> new file mode 100644
> index 0000000..929aae7
> --- /dev/null
> +++ b/libavutil/opencl.c
> @@ -0,0 +1,653 @@
> +/*
> + * Copyright (C) 2012 Peng Gao <peng at multicorewareinc.com>
> + * Copyright (C) 2012 Li   Cao <li at multicorewareinc.com>
> + * Copyright (C) 2012 Wei  Gao <weigao at multicorewareinc.com>
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#include "opencl.h"
> +#include "avstring.h"
> +#include "log.h"
> +#include "avassert.h"
> +
> +#define MAX_KERNEL_NAME_LEN  64
> +#define MAX_KERNEL_NUM 200
> +
> +typedef struct GPUEnv {
> +    cl_platform_id platform;
> +    cl_device_type device_type;
> +    cl_context context;
> +    cl_device_id *device_ids;
> +    cl_device_id device_id;
> +    cl_command_queue command_queue;
> +    cl_program program;
> +    char kernel_names[MAX_KERNEL_NUM][MAX_KERNEL_NAME_LEN+1];
> +    av_opencl_kernel_function kernel_functions[MAX_KERNEL_NUM];
> +    const char *kernel_code[MAX_KERNEL_NUM];
> +    int kernel_count;
> +    int runtime_kernel_count;
> +    int is_user_created; // 1: the opencl env is created by user and use AVOpenCLExternalInfo to pass to ffmpeg ,0:created by opencl wrapper
> +    uint8_t *temp_buffer;
> +    int temp_buffer_size;
> +} GPUEnv;
> +
> +typedef struct OpenclErrorMsg {
> +    int err_code;
> +    const char *err_str;
> +} OpenclErrorMsg;
> +

> +static OpenclErrorMsg opencl_err_msg[] = {

missing const


[...]
> +int av_opencl_create_kernel(const char *kernel_name, AVOpenCLKernelEnv *env)
> +{
> +    int status;
> +    if (!env->kernel) {
> +        env->kernel        = clCreateKernel(gpu_env.program, kernel_name, &status);
> +        env->context       = gpu_env.context;
> +        env->command_queue = gpu_env.command_queue;
> +        env->program       = gpu_env.program;

> +        av_strlcpy(env->kernel_name, kernel_name, AV_OPENCL_MAX_KERNEL_NAME_SIZE);

should use sizeof(env->kernel_name)


[...]

> +static int compile_kernel_file(GPUEnv *gpu_env, const char *build_option)
> +{
> +    cl_int status;
> +    size_t kernel_code_length = 0;
> +    char *source_str = NULL;
> +    char *temp;
> +    int ret = 0;
> +    int i;
> +
> +    if (gpu_env->program)
> +        return ret;
> +
> +    for (i = 0; i < gpu_env->kernel_count; i++) {
> +        kernel_code_length += strlen(gpu_env->kernel_code[i]);
> +    }
> +    source_str = av_mallocz(kernel_code_length + 1);
> +    if (!source_str) {
> +        return AVERROR(ENOMEM);
> +    }
> +    temp = source_str;
> +    for (i = 0; i < gpu_env->kernel_count; i++) {
> +        memcpy(temp, gpu_env->kernel_code[i], strlen(gpu_env->kernel_code[i]));
> +        temp += strlen(gpu_env->kernel_code[i]);
> +    }
> +    /* create a CL program using the kernel source */
> +    gpu_env->program = clCreateProgramWithSource(gpu_env->context, 1, (const char **)(&source_str),
> +                                                 &kernel_code_length, &status);
> +    if(status != CL_SUCCESS) {
> +        av_log(&openclutils, AV_LOG_ERROR, "Could not create OpenCL program with source code: %s\n",
> +               opencl_errstr(status));
> +        ret = AVERROR_EXTERNAL;
> +        goto end;
> +    }
> +    if (!gpu_env->program) {
> +        av_log(&openclutils, AV_LOG_ERROR, "Created program is NULL\n");
> +        ret = AVERROR_EXTERNAL;
> +        goto end;
> +    }
> +    /* create a cl program executable for all the devices specified */
> +    if (!gpu_env->is_user_created)
> +        status = clBuildProgram(gpu_env->program, 1, gpu_env->device_ids,
> +                                build_option, NULL, NULL);
> +    else
> +        status = clBuildProgram(gpu_env->program, 1, &(gpu_env->device_id),
> +                                 build_option, NULL, NULL);
> +
> +    if (status != CL_SUCCESS) {
> +        av_log(&openclutils, AV_LOG_ERROR, "Could not compile OpenCL kernel: %s\n", opencl_errstr(status));
> +        ret = AVERROR_EXTERNAL;
> +        goto end;
> +    }
> +end:
> +    av_free(source_str);
> +    return ret;
> +}
> +
> +int av_opencl_run_kernel(const char *kernel_name, void **userdata)
> +{
> +    av_opencl_kernel_function function = NULL;
> +    int i;
> +    for (i = 0; i < gpu_env.kernel_count; i++) {
> +        if (av_strcasecmp(kernel_name, gpu_env.kernel_names[i]) == 0) {
> +            function = gpu_env.kernel_functions[i];
> +            break;
> +        }
> +    }
> +    if (!function) {
> +        av_log(&openclutils, AV_LOG_ERROR, "Could not find kernel: %s\n", kernel_name);
> +        return AVERROR(EINVAL);
> +    }
> +    return(function(userdata));
> +}
> +
> +int av_opencl_init(const char *build_option, AVOpenCLExternalInfo *ext_opencl_info)
> +{
> +    int ret;
> +    if (!isinited) {
> +        /*initialize devices, context, command_queue*/
> +        ret = init_opencl_env(&gpu_env, ext_opencl_info);
> +        if (ret) {
> +            return ret;
> +        }
> +        /*initialize program, kernel_name, kernel_count*/
> +        ret = compile_kernel_file(&gpu_env, build_option);
> +        if (ret) {
> +            return ret;
> +        }
> +        av_assert1(gpu_env.kernel_count > 0);
> +        isinited = 1;
> +    }
> +    return 0;
> +}
> +
> +void av_opencl_uninit(void)
> +{
> +    int status;
> +    if (!isinited)
> +        return;
> +    av_freep(&(gpu_env.temp_buffer));
> +    if (gpu_env.is_user_created)
> +        return;
> +    gpu_env.runtime_kernel_count--;
> +    if (!gpu_env.runtime_kernel_count) {
> +        if (gpu_env.program) {
> +            status = clReleaseProgram(gpu_env.program);
> +            if (status != CL_SUCCESS) {
> +                av_log(&openclutils, AV_LOG_ERROR, "Could not release OpenCL program: %s\n", opencl_errstr(status));
> +            }
> +            gpu_env.program = NULL;
> +        }
> +        if (gpu_env.command_queue) {
> +            status = clReleaseCommandQueue(gpu_env.command_queue);
> +            if (status != CL_SUCCESS) {
> +                av_log(&openclutils, AV_LOG_ERROR, "Could not release OpenCL command queue: %s\n", opencl_errstr(status));
> +            }
> +            gpu_env.command_queue = NULL;
> +        }
> +        if (gpu_env.context) {
> +            status = clReleaseContext(gpu_env.context);
> +            if (status != CL_SUCCESS) {
> +                av_log(&openclutils, AV_LOG_ERROR, "Could not release OpenCL context: %s\n", opencl_errstr(status));
> +            }
> +            gpu_env.context = NULL;
> +        }
> +        av_freep(&(gpu_env.device_ids));
> +        isinited = 0;
> +    }
> +}

These functions are not thread safe but are used from multiple threads
(that is 2 deshake filter instances in the 2nd patch for example)



[...]

> +int av_opencl_buffer_read_image(uint8_t **dst_data, int *plane_size, int plane_num,
> +                                       void *src_cl_inbuf, size_t cl_buffer_size)
> +{
> +    int buffer_size = 0;
> +    int ret = 0;
> +    uint8_t *temp;
> +    int i;
> +    if ((unsigned int)plane_num > 8) {
> +        return AVERROR(EINVAL);
> +    }
> +    for (i = 0;i < plane_num;i++) {
> +        buffer_size += plane_size[i];
> +    }
> +    if (buffer_size > cl_buffer_size) {
> +        av_log(&openclutils, AV_LOG_ERROR, "Cannot write image to CPU buffer: OpenCL buffer too small\n");
> +        return AVERROR_EXTERNAL;
> +    }
> +    if (!gpu_env.temp_buffer) {
> +        gpu_env.temp_buffer = av_malloc(buffer_size);

There can be multiple AVFilter instances and they can run at the same
time from multiple threads. This is missing some kind of thread
synchronization if global non constant global variables cannot be
avoided. This issue affects more code than just above


[...]
> +/**
> + * Get the OpenCL status, this function is used the check whether or not the OpenCL environment has been created.
> + *
> + *@return  0 not inited, 1, inited;
> + */
> +int av_opencl_is_inited(void);

What is this function supposed to be used for ?
Iam asking because if it returns a value of 0 or 1 by the time the
caller sees the value a 2nd thread could have inited or uninited
opencl and the value would be wrong.


[...]

> +/**
> + *  Create OpenCL buffer, the buffer is used to save the data which is used by OpenCL kernel.
> + *
> + *@param cl_buf         the pointer of OpenCL buffer.
> + *@param cl_buf_size  size in bytes of the OpenCL buffer to create
> + *@param flags           the flags which used to control buffer attribute
> + *@param host_ptr      the host pointer of OpenCL buffer
> + *@return  >=0 on success, a negative error code in case of failure
> + */
> +int av_opencl_buffer_create(void **cl_buf, size_t cl_buf_size, int flags, void *host_ptr);
> +


> +/**
> + *  Release OpenCL buffer.
> + *
> + */
> +void av_opencl_buffer_release(void *cl_buf);

This needs more elaborate documentation, and both the create and
release docs should refer to each other


> +
> +/**
> + * Read data from OpenCL buffer to memory buffer.
> + *
> + * @param src_buf           pointer to destination buffer (CPU memory)
> + * @param dst_cl_buf        pointer to source OpenCL buffer
> + * @param buf_size          size in bytes of the source and destination buffers
> + * @return >=0 on success, a negative error code in case of failure
> + */
> +int av_opencl_buffer_write(void *dst_cl_buf, uint8_t *src_buf, size_t buf_size);
> +

> +/**
> + * Read data from OpenCL buffer to memory buffer.
> + *
> + * @param dst_buf           pointer to destination buffer (CPU memory)
> + * @param src_cl_buf        pointer to source OpenCL buffer
> + * @param buf_size          size in bytes of the source and destination buffers
> + * @return >=0 on success, a negative error code in case of failure
> + */
> +int av_opencl_buffer_read(uint8_t *dst_buf, void *src_cl_buf, size_t buf_size);

a more specific type than void* would allow detecting type mismatches


[...]

-- 
Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB

DNS cache poisoning attacks, popular search engine, Google internet authority
dont be evil, please
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 198 bytes
Desc: Digital signature
URL: <http://ffmpeg.org/pipermail/ffmpeg-devel/attachments/20130326/f7ee19c7/attachment.asc>


More information about the ffmpeg-devel mailing list