diff mbox series

[FFmpeg-devel,2/2] dnn_backend_native_layer_conv2d.c: refine code.

Message ID 20200914113154.61946-2-xujunzz@sjtu.edu.cn
State Superseded
Headers show
Series [FFmpeg-devel,1/2] dnn_backend_native_layer_conv2d.c: fix memory allocation bug in multithread function.
Related show

Checks

Context Check Description
andriy/default pending
andriy/make_warn warning New warnings during build
andriy/make success Make finished
andriy/make_fate success Make fate finished

Commit Message

Xu Jun Sept. 14, 2020, 11:31 a.m. UTC
From: Xu Jun <xujunzz@sjtu.edu.cn>

Move thread area allocate out of thread function into
main thread.

Signed-off-by: Xu Jun <xujunzz@sjtu.edu.cn>
---
 .../dnn/dnn_backend_native_layer_conv2d.c     | 29 +++++++++----------
 1 file changed, 13 insertions(+), 16 deletions(-)

Comments

Guo, Yejun Sept. 15, 2020, 12:39 a.m. UTC | #1
> -----Original Message-----
> From: ffmpeg-devel <ffmpeg-devel-bounces@ffmpeg.org> On Behalf Of
> xujunzz@sjtu.edu.cn
> Sent: 2020年9月14日 19:32
> To: ffmpeg-devel@ffmpeg.org
> Cc: xujunzz@sjtu.edu.cn
> Subject: [FFmpeg-devel] [PATCH 2/2] dnn_backend_native_layer_conv2d.c:
> refine code.
> 
> From: Xu Jun <xujunzz@sjtu.edu.cn>
> 
> Move thread area allocate out of thread function into main thread.
> 
> Signed-off-by: Xu Jun <xujunzz@sjtu.edu.cn>
> ---
>  .../dnn/dnn_backend_native_layer_conv2d.c     | 29 +++++++++----------
>  1 file changed, 13 insertions(+), 16 deletions(-)
> 
> diff --git a/libavfilter/dnn/dnn_backend_native_layer_conv2d.c
> b/libavfilter/dnn/dnn_backend_native_layer_conv2d.c
> index 5ed1851512..57659a1283 100644
> --- a/libavfilter/dnn/dnn_backend_native_layer_conv2d.c
> +++ b/libavfilter/dnn/dnn_backend_native_layer_conv2d.c
> @@ -33,12 +33,11 @@ typedef struct thread_common_param{
>      const void *parameters;
>      NativeContext *ctx;
>      float *output_data;
> -    int thread_num;
>  } thread_common_param;
> 
>  typedef struct thread_param{
>      thread_common_param *thread_common_param;
> -    int thread_index;
> +    int thread_start, thread_end;
>  } thread_param;
> 
>  int dnn_load_layer_conv2d(Layer *layer, AVIOContext *model_file_context, int
> file_size, int operands_num) @@ -126,16 +125,12 @@ static void *
> dnn_execute_layer_conv2d_thread(void *threadarg)
>      int filter_size = conv_params->kernel_size * filter_linesize;
>      int pad_size = (conv_params->padding_method == VALID) ?
> (conv_params->kernel_size - 1) / 2 * conv_params->dilation : 0;
> 
> -    int thread_stride = (height - pad_size * 2) /
> thread_common_param->thread_num;
> -    int thread_start = thread_stride * thread_param->thread_index +
> pad_size;
> -    int thread_end = (thread_param->thread_index ==
> thread_common_param->thread_num - 1) ? (height - pad_size) : (thread_start
> + thread_stride);
> -
>      float *output = thread_common_param->output_data;
> -    output += (conv_params->output_num) * (width - 2 * pad_size) *
> (thread_start - pad_size);
> +    output += (conv_params->output_num) * (width - 2 * pad_size) *
> + (thread_param->thread_start - pad_size);
> 
>      av_assert0(channel == conv_params->input_num);
> 
> -    for (int y = thread_start; y < thread_end; ++y) {
> +    for (int y = thread_param->thread_start; y <
> + thread_param->thread_end; ++y) {
>          for (int x = pad_size; x < width - pad_size; ++x) {
>              for (int n_filter = 0; n_filter < conv_params->output_num;
> ++n_filter) {
>                  if (conv_params->has_bias) @@ -207,11 +202,13 @@ int
> dnn_execute_layer_conv2d(DnnOperand *operands, const int32_t
> *input_operand_
> 
>      //alloc memory
>      const ConvolutionalParams *conv_params = (const ConvolutionalParams
> *)(parameters);
> +    int height = operands[input_operand_indexes[0]].dims[1];
> +    int width = operands[input_operand_indexes[0]].dims[2];
>      int pad_size = (conv_params->padding_method == VALID) ?
> (conv_params->kernel_size - 1) / 2 * conv_params->dilation : 0;
>      DnnOperand *output_operand = &operands[output_operand_index];
>      output_operand->dims[0] = operands[input_operand_indexes[0]].dims[0];
> -    output_operand->dims[1] = operands[input_operand_indexes[0]].dims[1] -
> pad_size * 2;
> -    output_operand->dims[2] = operands[input_operand_indexes[0]].dims[2] -
> pad_size * 2;
> +    output_operand->dims[1] = height - pad_size * 2;
> +    output_operand->dims[2] = width - pad_size * 2;
>      output_operand->dims[3] = conv_params->output_num;
>      output_operand->data_type =
> operands[input_operand_indexes[0]].data_type;
>      output_operand->length =
> calculate_operand_data_length(output_operand);
> @@ -227,13 +224,13 @@ int dnn_execute_layer_conv2d(DnnOperand
> *operands, const int32_t *input_operand_
>      thread_common_param.output_data = output_operand->data;
> 
>  #if HAVE_PTHREAD_CANCEL
> -    thread_common_param.thread_num = thread_num;
> -
> +    int thread_stride = (height - pad_size * 2) / thread_num;
please fix the build warning, move 'int thread_stride' up.
diff mbox series

Patch

diff --git a/libavfilter/dnn/dnn_backend_native_layer_conv2d.c b/libavfilter/dnn/dnn_backend_native_layer_conv2d.c
index 5ed1851512..57659a1283 100644
--- a/libavfilter/dnn/dnn_backend_native_layer_conv2d.c
+++ b/libavfilter/dnn/dnn_backend_native_layer_conv2d.c
@@ -33,12 +33,11 @@  typedef struct thread_common_param{
     const void *parameters;
     NativeContext *ctx;
     float *output_data;
-    int thread_num;
 } thread_common_param;
 
 typedef struct thread_param{
     thread_common_param *thread_common_param;
-    int thread_index;
+    int thread_start, thread_end;
 } thread_param;
 
 int dnn_load_layer_conv2d(Layer *layer, AVIOContext *model_file_context, int file_size, int operands_num)
@@ -126,16 +125,12 @@  static void * dnn_execute_layer_conv2d_thread(void *threadarg)
     int filter_size = conv_params->kernel_size * filter_linesize;
     int pad_size = (conv_params->padding_method == VALID) ? (conv_params->kernel_size - 1) / 2 * conv_params->dilation : 0;
 
-    int thread_stride = (height - pad_size * 2) / thread_common_param->thread_num;
-    int thread_start = thread_stride * thread_param->thread_index + pad_size;
-    int thread_end = (thread_param->thread_index == thread_common_param->thread_num - 1) ? (height - pad_size) : (thread_start + thread_stride);
-
     float *output = thread_common_param->output_data;
-    output += (conv_params->output_num) * (width - 2 * pad_size) * (thread_start - pad_size);
+    output += (conv_params->output_num) * (width - 2 * pad_size) * (thread_param->thread_start - pad_size);
 
     av_assert0(channel == conv_params->input_num);
 
-    for (int y = thread_start; y < thread_end; ++y) {
+    for (int y = thread_param->thread_start; y < thread_param->thread_end; ++y) {
         for (int x = pad_size; x < width - pad_size; ++x) {
             for (int n_filter = 0; n_filter < conv_params->output_num; ++n_filter) {
                 if (conv_params->has_bias)
@@ -207,11 +202,13 @@  int dnn_execute_layer_conv2d(DnnOperand *operands, const int32_t *input_operand_
 
     //alloc memory
     const ConvolutionalParams *conv_params = (const ConvolutionalParams *)(parameters);
+    int height = operands[input_operand_indexes[0]].dims[1];
+    int width = operands[input_operand_indexes[0]].dims[2];
     int pad_size = (conv_params->padding_method == VALID) ? (conv_params->kernel_size - 1) / 2 * conv_params->dilation : 0;
     DnnOperand *output_operand = &operands[output_operand_index];
     output_operand->dims[0] = operands[input_operand_indexes[0]].dims[0];
-    output_operand->dims[1] = operands[input_operand_indexes[0]].dims[1] - pad_size * 2;
-    output_operand->dims[2] = operands[input_operand_indexes[0]].dims[2] - pad_size * 2;
+    output_operand->dims[1] = height - pad_size * 2;
+    output_operand->dims[2] = width - pad_size * 2;
     output_operand->dims[3] = conv_params->output_num;
     output_operand->data_type = operands[input_operand_indexes[0]].data_type;
     output_operand->length = calculate_operand_data_length(output_operand);
@@ -227,13 +224,13 @@  int dnn_execute_layer_conv2d(DnnOperand *operands, const int32_t *input_operand_
     thread_common_param.output_data = output_operand->data;
 
 #if HAVE_PTHREAD_CANCEL
-    thread_common_param.thread_num = thread_num;
-
+    int thread_stride = (height - pad_size * 2) / thread_num;
     //create threads
     for (int i = 0; i < thread_num; i++){
         thread_param[i] = av_malloc(sizeof(**thread_param));
         thread_param[i]->thread_common_param = &thread_common_param;
-        thread_param[i]->thread_index = i;
+        thread_param[i]->thread_start = thread_stride * i + pad_size;
+        thread_param[i]->thread_end = (i == thread_num - 1) ? (height - pad_size) : (thread_param[i]->thread_start + thread_stride);
         pthread_create(&thread_id[i], NULL, dnn_execute_layer_conv2d_thread, (void *)thread_param[i]);
     }
 
@@ -249,10 +246,10 @@  int dnn_execute_layer_conv2d(DnnOperand *operands, const int32_t *input_operand_
         av_free(thread_param[i]);
     }
 #else
-    thread_common_param.thread_num = 1;
-    thread_param[0] = av_malloc(sizeof(thread_param));
+    thread_param[0] = av_malloc(sizeof(**thread_param));
     thread_param[0]->thread_common_param = &thread_common_param;
-    thread_param[0]->thread_index = 0;
+    thread_param[0]->thread_start = 0;
+    thread_param[0]->thread_end = height - pad_size;
     dnn_execute_layer_conv2d_thread((void *)thread_param[0]);
     av_free(thread_param[0]);
 #endif