Message ID | 20200914113154.61946-2-xujunzz@sjtu.edu.cn |
---|---|
State | Superseded |
Headers | show |
Series | [FFmpeg-devel,1/2] dnn_backend_native_layer_conv2d.c: fix memory allocation bug in multithread function. | expand |
Context | Check | Description |
---|---|---|
andriy/default | pending | |
andriy/make_warn | warning | New warnings during build |
andriy/make | success | Make finished |
andriy/make_fate | success | Make fate finished |
> -----Original Message----- > From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of > xujunzz@sjtu.edu.cn > Sent: 2020年9月14日 19:32 > To: ffmpeg-devel@ffmpeg.org > Cc: xujunzz@sjtu.edu.cn > Subject: [FFmpeg-devel] [PATCH 2/2] dnn_backend_native_layer_conv2d.c: > refine code. > > From: Xu Jun <xujunzz@sjtu.edu.cn> > > Move thread area allocate out of thread function into main thread. > > Signed-off-by: Xu Jun <xujunzz@sjtu.edu.cn> > --- > .../dnn/dnn_backend_native_layer_conv2d.c | 29 +++++++++---------- > 1 file changed, 13 insertions(+), 16 deletions(-) > > diff --git a/libavfilter/dnn/dnn_backend_native_layer_conv2d.c > b/libavfilter/dnn/dnn_backend_native_layer_conv2d.c > index 5ed1851512..57659a1283 100644 > --- a/libavfilter/dnn/dnn_backend_native_layer_conv2d.c > +++ b/libavfilter/dnn/dnn_backend_native_layer_conv2d.c > @@ -33,12 +33,11 @@ typedef struct thread_common_param{ > const void *parameters; > NativeContext *ctx; > float *output_data; > - int thread_num; > } thread_common_param; > > typedef struct thread_param{ > thread_common_param *thread_common_param; > - int thread_index; > + int thread_start, thread_end; > } thread_param; > > int dnn_load_layer_conv2d(Layer *layer, AVIOContext *model_file_context, int > file_size, int operands_num) @@ -126,16 +125,12 @@ static void * > dnn_execute_layer_conv2d_thread(void *threadarg) > int filter_size = conv_params->kernel_size * filter_linesize; > int pad_size = (conv_params->padding_method == VALID) ? > (conv_params->kernel_size - 1) / 2 * conv_params->dilation : 0; > > - int thread_stride = (height - pad_size * 2) / > thread_common_param->thread_num; > - int thread_start = thread_stride * thread_param->thread_index + > pad_size; > - int thread_end = (thread_param->thread_index == > thread_common_param->thread_num - 1) ? (height - pad_size) : (thread_start > + thread_stride); > - > float *output = thread_common_param->output_data; > - output += (conv_params->output_num) * (width - 2 * pad_size) * > (thread_start - pad_size); > + output += (conv_params->output_num) * (width - 2 * pad_size) * > + (thread_param->thread_start - pad_size); > > av_assert0(channel == conv_params->input_num); > > - for (int y = thread_start; y < thread_end; ++y) { > + for (int y = thread_param->thread_start; y < > + thread_param->thread_end; ++y) { > for (int x = pad_size; x < width - pad_size; ++x) { > for (int n_filter = 0; n_filter < conv_params->output_num; > ++n_filter) { > if (conv_params->has_bias) @@ -207,11 +202,13 @@ int > dnn_execute_layer_conv2d(DnnOperand *operands, const int32_t > *input_operand_ > > //alloc memory > const ConvolutionalParams *conv_params = (const ConvolutionalParams > *)(parameters); > + int height = operands[input_operand_indexes[0]].dims[1]; > + int width = operands[input_operand_indexes[0]].dims[2]; > int pad_size = (conv_params->padding_method == VALID) ? > (conv_params->kernel_size - 1) / 2 * conv_params->dilation : 0; > DnnOperand *output_operand = &operands[output_operand_index]; > output_operand->dims[0] = operands[input_operand_indexes[0]].dims[0]; > - output_operand->dims[1] = operands[input_operand_indexes[0]].dims[1] - > pad_size * 2; > - output_operand->dims[2] = operands[input_operand_indexes[0]].dims[2] - > pad_size * 2; > + output_operand->dims[1] = height - pad_size * 2; > + output_operand->dims[2] = width - pad_size * 2; > output_operand->dims[3] = conv_params->output_num; > output_operand->data_type = > operands[input_operand_indexes[0]].data_type; > output_operand->length = > calculate_operand_data_length(output_operand); > @@ -227,13 +224,13 @@ int dnn_execute_layer_conv2d(DnnOperand > *operands, const int32_t *input_operand_ > thread_common_param.output_data = output_operand->data; > > #if HAVE_PTHREAD_CANCEL > - thread_common_param.thread_num = thread_num; > - > + int thread_stride = (height - pad_size * 2) / thread_num; please fix the build warning, move 'int thread_stride' up.
diff --git a/libavfilter/dnn/dnn_backend_native_layer_conv2d.c b/libavfilter/dnn/dnn_backend_native_layer_conv2d.c index 5ed1851512..57659a1283 100644 --- a/libavfilter/dnn/dnn_backend_native_layer_conv2d.c +++ b/libavfilter/dnn/dnn_backend_native_layer_conv2d.c @@ -33,12 +33,11 @@ typedef struct thread_common_param{ const void *parameters; NativeContext *ctx; float *output_data; - int thread_num; } thread_common_param; typedef struct thread_param{ thread_common_param *thread_common_param; - int thread_index; + int thread_start, thread_end; } thread_param; int dnn_load_layer_conv2d(Layer *layer, AVIOContext *model_file_context, int file_size, int operands_num) @@ -126,16 +125,12 @@ static void * dnn_execute_layer_conv2d_thread(void *threadarg) int filter_size = conv_params->kernel_size * filter_linesize; int pad_size = (conv_params->padding_method == VALID) ? (conv_params->kernel_size - 1) / 2 * conv_params->dilation : 0; - int thread_stride = (height - pad_size * 2) / thread_common_param->thread_num; - int thread_start = thread_stride * thread_param->thread_index + pad_size; - int thread_end = (thread_param->thread_index == thread_common_param->thread_num - 1) ? (height - pad_size) : (thread_start + thread_stride); - float *output = thread_common_param->output_data; - output += (conv_params->output_num) * (width - 2 * pad_size) * (thread_start - pad_size); + output += (conv_params->output_num) * (width - 2 * pad_size) * (thread_param->thread_start - pad_size); av_assert0(channel == conv_params->input_num); - for (int y = thread_start; y < thread_end; ++y) { + for (int y = thread_param->thread_start; y < thread_param->thread_end; ++y) { for (int x = pad_size; x < width - pad_size; ++x) { for (int n_filter = 0; n_filter < conv_params->output_num; ++n_filter) { if (conv_params->has_bias) @@ -207,11 +202,13 @@ int dnn_execute_layer_conv2d(DnnOperand *operands, const int32_t *input_operand_ //alloc memory const ConvolutionalParams *conv_params = (const ConvolutionalParams *)(parameters); + int height = operands[input_operand_indexes[0]].dims[1]; + int width = operands[input_operand_indexes[0]].dims[2]; int pad_size = (conv_params->padding_method == VALID) ? (conv_params->kernel_size - 1) / 2 * conv_params->dilation : 0; DnnOperand *output_operand = &operands[output_operand_index]; output_operand->dims[0] = operands[input_operand_indexes[0]].dims[0]; - output_operand->dims[1] = operands[input_operand_indexes[0]].dims[1] - pad_size * 2; - output_operand->dims[2] = operands[input_operand_indexes[0]].dims[2] - pad_size * 2; + output_operand->dims[1] = height - pad_size * 2; + output_operand->dims[2] = width - pad_size * 2; output_operand->dims[3] = conv_params->output_num; output_operand->data_type = operands[input_operand_indexes[0]].data_type; output_operand->length = calculate_operand_data_length(output_operand); @@ -227,13 +224,13 @@ int dnn_execute_layer_conv2d(DnnOperand *operands, const int32_t *input_operand_ thread_common_param.output_data = output_operand->data; #if HAVE_PTHREAD_CANCEL - thread_common_param.thread_num = thread_num; - + int thread_stride = (height - pad_size * 2) / thread_num; //create threads for (int i = 0; i < thread_num; i++){ thread_param[i] = av_malloc(sizeof(**thread_param)); thread_param[i]->thread_common_param = &thread_common_param; - thread_param[i]->thread_index = i; + thread_param[i]->thread_start = thread_stride * i + pad_size; + thread_param[i]->thread_end = (i == thread_num - 1) ? (height - pad_size) : (thread_param[i]->thread_start + thread_stride); pthread_create(&thread_id[i], NULL, dnn_execute_layer_conv2d_thread, (void *)thread_param[i]); } @@ -249,10 +246,10 @@ int dnn_execute_layer_conv2d(DnnOperand *operands, const int32_t *input_operand_ av_free(thread_param[i]); } #else - thread_common_param.thread_num = 1; - thread_param[0] = av_malloc(sizeof(thread_param)); + thread_param[0] = av_malloc(sizeof(**thread_param)); thread_param[0]->thread_common_param = &thread_common_param; - thread_param[0]->thread_index = 0; + thread_param[0]->thread_start = 0; + thread_param[0]->thread_end = height - pad_size; dnn_execute_layer_conv2d_thread((void *)thread_param[0]); av_free(thread_param[0]); #endif