[FFmpeg-devel] [PATCH]opencl: automatically select the fastest opencl device

Stefano Sabatini stefasab at gmail.com
Mon Dec 2 12:46:49 CET 2013


On date Monday 2013-12-02 05:26:20 -0600, Lenny Wang encoded:
> On Mon, Dec 2, 2013 at 2:50 AM, Stefano Sabatini <stefasab at gmail.com> wrote:
[...]
> From: Lenny Wang <lwanghpc at gmail.com>
> Date: Mon, 2 Dec 2013 05:20:00 -0600
> Subject: [PATCH] cmdutils & opencl: add -opencl_bench option to test and show available OpenCL devices
> 
> ---
>  cmdutils.c                     | 165 +++++++++++++++++++++++++++++++++++++++++
>  cmdutils.h                     |  15 ++++
>  cmdutils_common_opts.h         |   1 +
>  cmdutils_opencl_bench_kernel.h |  86 +++++++++++++++++++++
>  doc/fftools-common-opts.texi   |   4 +
>  doc/utils.texi                 |   4 +-
>  libavutil/opencl.c             |  42 +++++++++++
>  libavutil/opencl.h             |  16 ++++
>  libavutil/version.h            |   2 +-
>  9 files changed, 332 insertions(+), 3 deletions(-)

Still missing doc/APIchanges entry.

> diff --git a/cmdutils.c b/cmdutils.c
> index 46ade3f..6b7f51a 100644
> --- a/cmdutils.c
> +++ b/cmdutils.c
> @@ -60,6 +60,8 @@
>  #endif
>  #if CONFIG_OPENCL
>  #include "libavutil/opencl.h"
> +#include "libavutil/time.h"
> +#include "cmdutils_opencl_bench_kernel.h"
>  #endif
>  
>  
> @@ -986,6 +988,169 @@ int opt_timelimit(void *optctx, const char *opt, const char *arg)
>  }
>  
>  #if CONFIG_OPENCL
> +#define OCLCHECK(method, ... )                                                 \
> +do {                                                                           \
> +    status = method(__VA_ARGS__);                                              \
> +    if (status != CL_SUCCESS) {                                                \
> +        av_log(NULL, AV_LOG_ERROR, # method " error '%s'\n",                   \
> +               av_opencl_errstr(status));                                      \
> +        ret = AVERROR_EXTERNAL;                                                \
> +        goto end;                                                              \
> +    }                                                                          \
> +} while (0)
> +
> +#define CREATEBUF(out, flags, size)                                            \
> +do {                                                                           \
> +    out = clCreateBuffer(ext_opencl_env->context, flags, size, NULL, &status); \
> +    if (status != CL_SUCCESS) {                                                \
> +        av_log(NULL, AV_LOG_ERROR, "Could not create OpenCL buffer\n");        \
> +        ret = AVERROR_EXTERNAL;                                                \
> +        goto end;                                                              \
> +    }                                                                          \
> +} while (0)
> +
> +static void fill_rand_int(int *data, int n)
> +{
> +    int i;
> +    srand(av_gettime());
> +    for (i = 0; i < n; i++)
> +        data[i] = rand();
> +}
> +
> +#define OPENCL_NB_ITER 5
> +static int64_t run_opencl_bench(AVOpenCLExternalEnv *ext_opencl_env)
> +{
> +    int i, arg = 0, width = 1920, height = 1088;
> +    int64_t start, ret = 0;
> +    cl_int status;
> +    size_t kernel_len;
> +    char *inbuf;
> +    int *mask;
> +    int buf_size = width * height * sizeof(char);
> +    int mask_size = sizeof(uint32_t) * 128;
> +
> +    cl_mem cl_mask, cl_inbuf, cl_outbuf;
> +    cl_kernel kernel = NULL;
> +    cl_program program = NULL;
> +    size_t local_work_size_2d[2] = {16, 16};
> +    size_t global_work_size_2d[2] = {(size_t)width, (size_t)height};
> +
> +    if (!(inbuf = av_malloc(buf_size)) || !(mask = av_malloc(mask_size))) {
> +        av_log(NULL, AV_LOG_ERROR, "Out of memory\n");
> +        ret = AVERROR(ENOMEM);
> +        goto end;
> +    }
> +    fill_rand_int((int*)inbuf, buf_size/4);
> +    fill_rand_int(mask, mask_size/4);
> +
> +    CREATEBUF(cl_mask, CL_MEM_READ_ONLY, mask_size);
> +    CREATEBUF(cl_inbuf, CL_MEM_READ_ONLY, buf_size);
> +    CREATEBUF(cl_outbuf, CL_MEM_READ_WRITE, buf_size);
> +
> +    kernel_len = strlen(ocl_bench_source);
> +    program = clCreateProgramWithSource(ext_opencl_env->context, 1, &ocl_bench_source,
> +                                        &kernel_len, &status);
> +    if (status != CL_SUCCESS || !program) {
> +        av_log(NULL, AV_LOG_ERROR, "OpenCL unable to create benchmark program\n");
> +        ret = AVERROR_EXTERNAL;
> +        goto end;
> +    }
> +    status = clBuildProgram(program, 1, &(ext_opencl_env->device_id), NULL, NULL, NULL);
> +    if (status != CL_SUCCESS) {
> +        av_log(NULL, AV_LOG_ERROR, "OpenCL unable to build benchmark program\n");
> +        ret = AVERROR_EXTERNAL;
> +        goto end;
> +    }
> +    kernel = clCreateKernel(program, "unsharp_bench", &status);
> +    if (status != CL_SUCCESS) {
> +        av_log(NULL, AV_LOG_ERROR, "OpenCL unable to create benchmark kernel\n");
> +        ret = AVERROR_EXTERNAL;
> +        goto end;
> +    }
> +
> +    OCLCHECK(clEnqueueWriteBuffer, ext_opencl_env->command_queue, cl_inbuf, CL_TRUE, 0,
> +             buf_size, inbuf, 0, NULL, NULL);
> +    OCLCHECK(clEnqueueWriteBuffer, ext_opencl_env->command_queue, cl_mask, CL_TRUE, 0,
> +             mask_size, mask, 0, NULL, NULL);
> +    OCLCHECK(clSetKernelArg, kernel, arg++, sizeof(cl_mem), &cl_inbuf);
> +    OCLCHECK(clSetKernelArg, kernel, arg++, sizeof(cl_mem), &cl_outbuf);
> +    OCLCHECK(clSetKernelArg, kernel, arg++, sizeof(cl_mem), &cl_mask);
> +    OCLCHECK(clSetKernelArg, kernel, arg++, sizeof(cl_int), &width);
> +    OCLCHECK(clSetKernelArg, kernel, arg++, sizeof(cl_int), &height);
> +
> +    start = av_gettime();

> +    for (i = 0; i < OPENCL_NB_ITER; i++)
> +	    OCLCHECK(clEnqueueNDRangeKernel, ext_opencl_env->command_queue, kernel, 2, NULL,
> +                 global_work_size_2d, local_work_size_2d, 0, NULL, NULL);
> +	clFinish(ext_opencl_env->command_queue);

weird indent

> +    ret = (av_gettime() - start)/OPENCL_NB_ITER;
> +end:
> +    if (kernel)
> +        clReleaseKernel(kernel);
> +    if (program)
> +        clReleaseProgram(program);
> +    if (cl_inbuf)
> +        clReleaseMemObject(cl_inbuf);
> +    if (cl_outbuf)
> +        clReleaseMemObject(cl_outbuf);
> +    if (cl_mask)
> +        clReleaseMemObject(cl_mask);
> +    av_free(inbuf);
> +    av_free(mask);
> +    return ret;
> +}
> +
> +static int compare_ocl_device_desc(const void *a, const void *b)
> +{
> +    return ((OpenCLDeviceBenchmark*)a)->runtime - ((OpenCLDeviceBenchmark*)b)->runtime;
> +}
> +
> +int opt_opencl_bench(void *optctx, const char *opt, const char *arg)
> +{
> +    int i, j, nb_devices = 0, count = 0;
> +    int64_t score = 0;
> +    AVOpenCLDeviceList *device_list;
> +    AVOpenCLDeviceNode *device_node = NULL;
> +    OpenCLDeviceBenchmark *devices = NULL;
> +    cl_platform_id platform;
> +
> +    av_opencl_get_device_list(&device_list);
> +    for (i = 0; i < device_list->platform_num; i++)
> +        nb_devices += device_list->platform_node[i]->device_num;
> +    if (!nb_devices) {
> +        av_log(NULL, AV_LOG_ERROR, "No OpenCL device detected!\n");
> +        return AVERROR(EINVAL);
> +    }
> +    if (!(devices = av_malloc(sizeof(OpenCLDeviceBenchmark) * nb_devices))) {
> +        av_log(NULL, AV_LOG_ERROR, "Could not allocate buffer\n");
> +        return AVERROR(ENOMEM);
> +    }
> +
> +    for (i = 0; i < device_list->platform_num; i++) {
> +        for (j = 0; j < device_list->platform_node[i]->device_num; j++) {
> +            device_node = device_list->platform_node[i]->device_node[j];
> +            platform = device_list->platform_node[i]->platform_id;
> +            score = av_opencl_benchmark(device_node, platform, run_opencl_bench);
> +            if (score > 0) {
> +                devices[count].platform_idx = i;
> +                devices[count].device_idx = j;
> +                devices[count].runtime = score;
> +                strcpy(devices[count].device_name, device_node->device_name);
> +                count++;
> +            }
> +        }
> +    }

> +    qsort(devices, count, sizeof(OpenCLDeviceBenchmark), compare_ocl_device_desc);

You could print an header here, something like:

printf("PLATFORM_IDX\tDEVICE_IDX\tDEVICE_NAME\tRUNTIME\n",

Ideally you should print the header to stderr, but it seems most table
headers are printed to stdout (which is IMO silly and wrong, since
they're not supposed to be parsed).

> +    for (i = 0; i < count; i++)
> +        printf("platform_idx=%d\tdevice_idx=%d\tdevice_name=%s\truntime=%"PRId64"\n",
> +               devices[i].platform_idx, devices[i].device_idx,
> +               devices[i].device_name, devices[i].runtime);
> +
> +    av_opencl_free_device_list(&device_list);
> +    av_free(devices);
> +    return 0;
> +}
> +

[...]

No more comments from me, thanks.
-- 
FFmpeg = Faithful & Fast Minimal Pacific Exuberant Gadget


More information about the ffmpeg-devel mailing list