FFmpeg
dnn-layer-conv2d.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2019 Guo Yejun
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include <stdio.h>
22 #include <string.h>
23 #include <math.h>
25 
26 #define EPSON 0.00001
27 
28 static int test_with_same_dilate(void)
29 {
30  // the input data and expected data are generated with below python code.
31  /*
32  x = tf.placeholder(tf.float32, shape=[1, None, None, 3])
33  y = tf.layers.conv2d(x, 2, 3, activation=tf.nn.tanh, padding='same', dilation_rate=(2, 2), bias_initializer=tf.keras.initializers.he_normal())
34  data = np.random.rand(1, 5, 6, 3);
35 
36  sess=tf.Session()
37  sess.run(tf.global_variables_initializer())
38 
39  weights = dict([(var.name, sess.run(var)) for var in tf.trainable_variables()])
40  kernel = weights['conv2d/kernel:0']
41  kernel = np.transpose(kernel, [3, 0, 1, 2])
42  print("kernel:")
43  print(kernel.shape)
44  print(list(kernel.flatten()))
45 
46  bias = weights['conv2d/bias:0']
47  print("bias:")
48  print(bias.shape)
49  print(list(bias.flatten()))
50 
51  output = sess.run(y, feed_dict={x: data})
52 
53  print("input:")
54  print(data.shape)
55  print(list(data.flatten()))
56 
57  print("output:")
58  print(output.shape)
59  print(list(output.flatten()))
60  */
61 
62  ConvolutionalParams params;
63  DnnOperand operands[2];
64  int32_t input_indexes[1];
65  float input[1*5*6*3] = {
66  0.7012556460308194, 0.4233847954643357, 0.19515900664313612, 0.16343083004926495, 0.5758261611052848, 0.9510767434014871, 0.11014085055947687,
67  0.906327053637727, 0.8136794715542507, 0.45371764543639526, 0.5768443343523952, 0.19543668786046986, 0.15648326047898609, 0.2099500241141279,
68  0.17658777090552413, 0.059335724777169196, 0.1729991838469117, 0.8150514704819208, 0.4435535466703049, 0.3752188477566878, 0.749936650421431,
69  0.6823494635284907, 0.10776389679424747, 0.34247481674596836, 0.5147867256244629, 0.9063709728129032, 0.12423605800856818, 0.6064872945412728,
70  0.5891681538551459, 0.9865836236466314, 0.9002163879294677, 0.003968273184274618, 0.8628374809643967, 0.1327176268279583, 0.8449799925703798,
71  0.1937671869354366, 0.41524410152707425, 0.02038786604756837, 0.49792466069597496, 0.8881874553848784, 0.9683921035597336, 0.4122972568010813,
72  0.843553550993252, 0.9588482762501964, 0.5190350762645546, 0.4283584264145317, 0.09781496073714646, 0.9501058833776156, 0.8665541760152776,
73  0.31669272550095806, 0.07133074675453632, 0.606438007334886, 0.7007157020538224, 0.4827996264130444, 0.5167615606392761, 0.6385043039312651,
74  0.23069664707810555, 0.058233497329354456, 0.06323892961591071, 0.24816458893245974, 0.8646369065257812, 0.24742185893094837, 0.09991225948167437,
75  0.625700606979606, 0.7678541502111257, 0.6215834594679912, 0.5623003956582483, 0.07389123942681242, 0.7659100715711249, 0.486061471642225,
76  0.9947455699829012, 0.9094911797643259, 0.7644355876253265, 0.05384315321492239, 0.13565394382783613, 0.9810628204953316, 0.007386389078887889,
77  0.226182754156241, 0.2609021390764772, 0.24182802076928933, 0.13264782451941648, 0.2035816485767682, 0.005504188177612557, 0.7014619934040155,
78  0.956215988391991, 0.5670398541013633, 0.9809764721750784, 0.6886338100487461, 0.5758152317218274, 0.7137823176776179
79  };
80  float expected_output[1*5*6*2] = {
81  -0.9480655, -0.7169147, -0.9404794, -0.5567385, -0.8991124, -0.8306558, -0.94487447, -0.8932543, -0.88238764, -0.7301602,
82  -0.8974813, -0.7026703, -0.8858988, -0.53203243, -0.92881465, -0.5648504, -0.8871471, -0.7000097, -0.91754407, -0.79684794,
83  -0.760465, -0.117928326, -0.88302773, -0.8975289, -0.70615053, 0.19231977, -0.8318776, -0.386184, -0.80698484, -0.8556624,
84  -0.7336671, -0.6168619, -0.7658234, -0.63449603, -0.73314047, -0.87502456, -0.58158904, -0.4184259, -0.52618927, -0.13613208,
85  -0.5093187, -0.21027721, -0.39455596, -0.44507834, -0.22269244, -0.73400885, -0.77655095, -0.74408925, -0.57313335, -0.15333457,
86  -0.74620694, -0.34858236, -0.42586932, -0.5240488, 0.1634339, -0.2447881, -0.57927346, -0.62732303, -0.82287043, -0.8474058
87  };
88  float *output;
89  float kernel[2*3*3*3] = {
90  0.26025516, 0.16536498, -0.24351254, 0.33892477, -0.34005195, 0.35202783, 0.34056443, 0.01422739, 0.13799345, 0.29489166,
91  0.2781723, 0.178585, 0.22122234, 0.044115514, 0.13134438, 0.31705368, 0.22527462, -0.021323413, 0.115134746, -0.18216397,
92  -0.21197563, -0.027848959, -0.01704529, -0.12401503, -0.23415318, -0.12661739, -0.35338148, 0.20049328, -0.076153606,
93  -0.23642601, -0.3125769, -0.025851756, -0.30006272, 0.050762743, 0.32003498, 0.3052225, -0.0017385483, 0.25337684, -0.25664508,
94  0.27846587, -0.3112659, 0.2066065, 0.31499845, 0.113178134, 0.09449363, -0.11828774, -0.12671001, -0.36259216, 0.2710235,
95  -0.19676702, 0.023612618, -0.2596915, -0.34949252, -0.108270735
96  };
97  float bias[2] = { -1.6574852, -0.72915393 };
98 
100  ctx.class = NULL;
101  ctx.options.conv2d_threads = 1;
102 
103  params.activation = TANH;
104  params.has_bias = 1;
105  params.biases = bias;
106  params.dilation = 2;
107  params.input_num = 3;
108  params.kernel = kernel;
109  params.kernel_size = 3;
110  params.output_num = 2;
111  params.padding_method = SAME;
112 
113  operands[0].data = input;
114  operands[0].dims[0] = 1;
115  operands[0].dims[1] = 5;
116  operands[0].dims[2] = 6;
117  operands[0].dims[3] = 3;
118  operands[1].data = NULL;
119 
120  input_indexes[0] = 0;
121  ff_dnn_execute_layer_conv2d(operands, input_indexes, 1, &params, &ctx);
122 
123  output = operands[1].data;
124  for (int i = 0; i < sizeof(expected_output) / sizeof(float); i++) {
125  if (fabs(output[i] - expected_output[i]) > EPSON) {
126  printf("at index %d, output: %f, expected_output: %f\n", i, output[i], expected_output[i]);
127  av_freep(&output);
128  return 1;
129  }
130  }
131 
132  av_freep(&output);
133  return 0;
134 }
135 
136 static int test_with_valid(void)
137 {
138  // the input data and expected data are generated with below python code.
139  /*
140  x = tf.placeholder(tf.float32, shape=[1, None, None, 3])
141  y = tf.layers.conv2d(x, 2, 3, activation=tf.nn.tanh, padding='valid', bias_initializer=tf.keras.initializers.he_normal())
142  data = np.random.rand(1, 5, 6, 3);
143 
144  sess=tf.Session()
145  sess.run(tf.global_variables_initializer())
146 
147  weights = dict([(var.name, sess.run(var)) for var in tf.trainable_variables()])
148  kernel = weights['conv2d/kernel:0']
149  kernel = np.transpose(kernel, [3, 0, 1, 2])
150  print("kernel:")
151  print(kernel.shape)
152  print(list(kernel.flatten()))
153 
154  bias = weights['conv2d/bias:0']
155  print("bias:")
156  print(bias.shape)
157  print(list(bias.flatten()))
158 
159  output = sess.run(y, feed_dict={x: data})
160 
161  print("input:")
162  print(data.shape)
163  print(list(data.flatten()))
164 
165  print("output:")
166  print(output.shape)
167  print(list(output.flatten()))
168  */
169 
170  ConvolutionalParams params;
171  DnnOperand operands[2];
172  int32_t input_indexes[1];
173  float input[1*5*6*3] = {
174  0.26126657468269665, 0.42762216215337556, 0.7466274030131497, 0.802550266787863, 0.3709323443076644, 0.5919817068197668, 0.49274512279324967,
175  0.7170132295090351, 0.0911793215410649, 0.5134213878288361, 0.670132600785118, 0.49417034512633484, 0.03887389460089885, 0.436785102836845,
176  0.1490231658611978, 0.6413606121498127, 0.8595987991375995, 0.9132593077586231, 0.7075959004873255, 0.17754995944845464, 0.5212507214937141,
177  0.35379732738215475, 0.25205107358505296, 0.3928792840544273, 0.09485294189485782, 0.8685115437448666, 0.6489046799288605, 0.509253797582924,
178  0.8993255536791972, 0.18740056466602373, 0.34237617336313986, 0.3871438962989183, 0.1488532571774911, 0.5187002331293636, 0.8137098818752955,
179  0.521761863717401, 0.4622312310118274, 0.29038411334638825, 0.16194915718170566, 0.5175999923925211, 0.8852230040101133, 0.0218263385047206,
180  0.08482355352852367, 0.3463638568376264, 0.28627127120619733, 0.9553293378948409, 0.4803391055970835, 0.841635695030805, 0.3556828280031952,
181  0.06778527221541808, 0.28193560357091596, 0.8399957619031576, 0.03305536359456385, 0.6625039162109645, 0.9300552020023897, 0.8551529138204146,
182  0.6133216915522418, 0.222427800857393, 0.1315422686800336, 0.6189144989185527, 0.5346184916866876, 0.8348888624532548, 0.6544834567840291,
183  0.2844062293389934, 0.28780026600883324, 0.5372272015684924, 0.6250226011503823, 0.28119106062279453, 0.49655812908420094, 0.6451488959145951,
184  0.7362580606834843, 0.44815578616664087, 0.6454760235835586, 0.6794062414265861, 0.045378883014935756, 0.9008388543865096, 0.7949752851269782,
185  0.4179928876222264, 0.28733419007048644, 0.996902319501908, 0.5690851338677467, 0.9511814013279738, 0.025323788678181636, 0.5594359732604794,
186  0.1213732595086251, 0.7172624313368294, 0.6759328959074691, 0.07252138454885071, 0.17557735158403442, 0.5988895455048769
187  };
188  float expected_output[1*3*4*2] = {
189  -0.556947, -0.42143887, -0.092070885, 0.27404794, -0.41886684, 0.0862887, -0.25001016, -0.342721, 0.020730592, 0.04016919, -0.69839877,
190  -0.06136704, 0.14186388, -0.11655602, -0.23489095, -0.3845829, -0.19017771, 0.1595885, -0.18308741, -0.3071209, -0.5848686, -0.22509028,
191  -0.6023201, -0.14448485
192  };
193  float *output;
194  float kernel[2*3*3*3] = {
195  -0.25291282, 0.22402048, 0.028642118, -0.14615723, -0.27362752, -0.34801802, -0.2759148, 0.19594926, -0.25029412, 0.34606284, 0.10376671,
196  -0.1015394, 0.23616093, 0.2134214, 0.35285157, 0.05893758, 0.0024731457, -0.17143056, 0.35758412, 0.2186206, -0.28384736, -0.21206513,
197  -0.20871592, 0.27070445, 0.25878823, 0.11136332, -0.33737376, 0.08353335, -0.34290665, 0.041805506, -0.09738535, 0.3284936, -0.16838405,
198  -0.032494456, -0.29193437, 0.033259362, -0.09272635, -0.2802651, -0.28648436, 0.3542878, 0.2432127, -0.24551713, 0.27813476, 0.21024024,
199  -0.013690501, -0.1350077, -0.07826337, -0.34563828, 0.3220685, -0.07571727, 0.19420576, 0.20783454, 0.18738335, 0.16672492
200  };
201  float bias[2] = { -0.4773722, -0.19620377 };
202 
204  ctx.class = NULL;
205  ctx.options.conv2d_threads = 1;
206 
207  params.activation = TANH;
208  params.has_bias = 1;
209  params.biases = bias;
210  params.dilation = 1;
211  params.input_num = 3;
212  params.kernel = kernel;
213  params.kernel_size = 3;
214  params.output_num = 2;
215  params.padding_method = VALID;
216 
217  operands[0].data = input;
218  operands[0].dims[0] = 1;
219  operands[0].dims[1] = 5;
220  operands[0].dims[2] = 6;
221  operands[0].dims[3] = 3;
222  operands[1].data = NULL;
223 
224  input_indexes[0] = 0;
225  ff_dnn_execute_layer_conv2d(operands, input_indexes, 1, &params, &ctx);
226 
227  output = operands[1].data;
228  for (int i = 0; i < sizeof(expected_output) / sizeof(float); i++) {
229  if (fabs(output[i] - expected_output[i]) > EPSON) {
230  printf("at index %d, output: %f, expected_output: %f\n", i, output[i], expected_output[i]);
231  av_freep(&output);
232  return 1;
233  }
234  }
235 
236  av_freep(&output);
237  return 0;
238 }
239 
240 int main(int argc, char **argv)
241 {
242  if (test_with_valid())
243  return 1;
244  if (test_with_same_dilate())
245  return 1;
246 
247  return 0;
248 }
output
filter_frame For filters that do not use the this method is called when a frame is pushed to the filter s input It can be called at any time except in a reentrant way If the input frame is enough to produce output
Definition: filter_design.txt:225
main
int main(int argc, char **argv)
Definition: dnn-layer-conv2d.c:240
ConvolutionalParams::kernel
float * kernel
Definition: dnn_backend_native_layer_conv2d.h:33
ConvolutionalParams::input_num
int32_t input_num
Definition: dnn_backend_native_layer_conv2d.h:28
TANH
@ TANH
Definition: dnn_backend_native.h:55
DnnOperand::data
void * data
data pointer with data length in bytes.
Definition: dnn_backend_native.h:104
ConvolutionalParams::activation
DNNActivationFunc activation
Definition: dnn_backend_native_layer_conv2d.h:29
ctx
AVFormatContext * ctx
Definition: movenc.c:48
ConvolutionalParams::has_bias
int32_t has_bias
Definition: dnn_backend_native_layer_conv2d.h:32
fabs
static __device__ float fabs(float a)
Definition: cuda_runtime.h:182
NULL
#define NULL
Definition: coverity.c:32
ff_dnn_execute_layer_conv2d
int ff_dnn_execute_layer_conv2d(DnnOperand *operands, const int32_t *input_operand_indexes, int32_t output_operand_index, const void *parameters, NativeContext *ctx)
Execute the 2D Convolution Layer.
Definition: dnn_backend_native_layer_conv2d.c:187
DnnOperand::dims
int32_t dims[4]
there are two memory layouts, NHWC or NCHW, so we use dims, dims[0] is Number.
Definition: dnn_backend_native.h:74
SAME
@ SAME
Definition: dnn_backend_native.h:54
ConvolutionalParams::kernel_size
int32_t kernel_size
Definition: dnn_backend_native_layer_conv2d.h:28
NativeContext
Definition: dnn_backend_native.h:118
VALID
@ VALID
Definition: dnn_backend_native.h:54
dnn_backend_native_layer_conv2d.h
printf
printf("static const uint8_t my_array[100] = {\n")
input
and forward the test the status of outputs and forward it to the corresponding return FFERROR_NOT_READY If the filters stores internally one or a few frame for some input
Definition: filter_design.txt:172
ConvolutionalParams::output_num
int32_t output_num
Definition: dnn_backend_native_layer_conv2d.h:28
i
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:271
ConvolutionalParams::padding_method
DNNPaddingParam padding_method
Definition: dnn_backend_native_layer_conv2d.h:30
DnnOperand
Definition: dnn_backend_native.h:69
test_with_same_dilate
static int test_with_same_dilate(void)
Definition: dnn-layer-conv2d.c:28
av_freep
#define av_freep(p)
Definition: tableprint_vlc.h:35
test_with_valid
static int test_with_valid(void)
Definition: dnn-layer-conv2d.c:136
int32_t
int32_t
Definition: audioconvert.c:56
EPSON
#define EPSON
Definition: dnn-layer-conv2d.c:26
ConvolutionalParams
Definition: dnn_backend_native_layer_conv2d.h:27
ConvolutionalParams::dilation
int32_t dilation
Definition: dnn_backend_native_layer_conv2d.h:31
ConvolutionalParams::biases
float * biases
Definition: dnn_backend_native_layer_conv2d.h:34