From patchwork Wed Sep 16 10:07:17 2020
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Xu Jun <xujunzz@sjtu.edu.cn>
X-Patchwork-Id: 22442
Return-Path: <ffmpeg-devel-bounces@ffmpeg.org>
X-Original-To: patchwork@ffaux-bg.ffmpeg.org
Delivered-To: patchwork@ffaux-bg.ffmpeg.org
Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org [79.124.17.100])
	by ffaux.localdomain (Postfix) with ESMTP id 147CE449373
	for <patchwork@ffaux-bg.ffmpeg.org>; Wed, 16 Sep 2020 13:08:01 +0300 (EEST)
Received: from [127.0.1.1] (localhost [127.0.0.1])
	by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id E5C5068BA81;
	Wed, 16 Sep 2020 13:08:00 +0300 (EEST)
X-Original-To: ffmpeg-devel@ffmpeg.org
Delivered-To: ffmpeg-devel@ffmpeg.org
Received: from smtp181.sjtu.edu.cn (smtp181.sjtu.edu.cn [202.120.2.181])
 by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTPS id A4C7C68BA34
 for <ffmpeg-devel@ffmpeg.org>; Wed, 16 Sep 2020 13:07:54 +0300 (EEST)
Received: from proxy02.sjtu.edu.cn (smtp188.sjtu.edu.cn [202.120.2.188])
 by smtp181.sjtu.edu.cn (Postfix) with ESMTPS id 276A11008CBC1
 for <ffmpeg-devel@ffmpeg.org>; Wed, 16 Sep 2020 18:07:49 +0800 (CST)
Received: from localhost (localhost.localdomain [127.0.0.1])
 by proxy02.sjtu.edu.cn (Postfix) with ESMTP id 1F0FE200B4497;
 Wed, 16 Sep 2020 18:07:49 +0800 (CST)
X-Virus-Scanned: amavisd-new at 
Received: from proxy02.sjtu.edu.cn ([127.0.0.1])
 by localhost (proxy02.sjtu.edu.cn [127.0.0.1]) (amavisd-new, port 10026)
 with ESMTP id 0OiRvIwWOaIu; Wed, 16 Sep 2020 18:07:49 +0800 (CST)
Received: from localhost.localdomain (unknown [202.120.39.204])
 (Authenticated sender: xujunzz@sjtu.edu.cn)
 by proxy02.sjtu.edu.cn (Postfix) with ESMTPSA id 3B5B6200B4496;
 Wed, 16 Sep 2020 18:07:47 +0800 (CST)
From: xujunzz@sjtu.edu.cn
To: ffmpeg-devel@ffmpeg.org
Date: Wed, 16 Sep 2020 18:07:17 +0800
Message-Id: <20200916100717.3142217-1-xujunzz@sjtu.edu.cn>
X-Mailer: git-send-email 2.28.0
MIME-Version: 1.0
Subject: [FFmpeg-devel] [PATCH v3 1/2] dnn_backend_native_layer_conv2d.c:
	fix memory allocation bug in multithread function.
X-BeenThere: ffmpeg-devel@ffmpeg.org
X-Mailman-Version: 2.1.20
Precedence: list
List-Id: FFmpeg development discussions and patches <ffmpeg-devel.ffmpeg.org>
List-Unsubscribe: <https://ffmpeg.org/mailman/options/ffmpeg-devel>,
 <mailto:ffmpeg-devel-request@ffmpeg.org?subject=unsubscribe>
List-Archive: <https://ffmpeg.org/pipermail/ffmpeg-devel>
List-Post: <mailto:ffmpeg-devel@ffmpeg.org>
List-Help: <mailto:ffmpeg-devel-request@ffmpeg.org?subject=help>
List-Subscribe: <https://ffmpeg.org/mailman/listinfo/ffmpeg-devel>,
 <mailto:ffmpeg-devel-request@ffmpeg.org?subject=subscribe>
Reply-To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org>
Cc: xujunzz@sjtu.edu.cn
Errors-To: ffmpeg-devel-bounces@ffmpeg.org
Sender: "ffmpeg-devel" <ffmpeg-devel-bounces@ffmpeg.org>

From: Xu Jun <xujunzz@sjtu.edu.cn>

Before patch, memory was allocated in each thread functions,
which may cause more than one time of memory allocation and
cause crash.

After patch, memory is allocated in the main thread once,
an index was parsed into thread functions. Bug fixed.

Signed-off-by: Xu Jun <xujunzz@sjtu.edu.cn>
---
v3: fix build warnings

 .../dnn/dnn_backend_native_layer_conv2d.c     | 57 +++++++++----------
 1 file changed, 26 insertions(+), 31 deletions(-)

diff --git a/libavfilter/dnn/dnn_backend_native_layer_conv2d.c b/libavfilter/dnn/dnn_backend_native_layer_conv2d.c
index c52725aa2b..5c313454f7 100644
--- a/libavfilter/dnn/dnn_backend_native_layer_conv2d.c
+++ b/libavfilter/dnn/dnn_backend_native_layer_conv2d.c
@@ -32,6 +32,7 @@ typedef struct thread_common_param{
     int32_t output_operand_index;
     const void *parameters;
     NativeContext *ctx;
+    float *output_data;
     int thread_num;
 } thread_common_param;
 
@@ -111,9 +112,7 @@ static void * dnn_execute_layer_conv2d_thread(void *threadarg)
     thread_param *thread_param = (struct thread_param *)threadarg;
     thread_common_param *thread_common_param = thread_param->thread_common_param;
     DnnOperand *operands = thread_common_param->operands;
-    float *output;
     int32_t input_operand_index = thread_common_param->input_operand_indexes[0];
-    int number = operands[input_operand_index].dims[0];
     int height = operands[input_operand_index].dims[1];
     int width = operands[input_operand_index].dims[2];
     int channel = operands[input_operand_index].dims[3];
@@ -130,24 +129,7 @@ static void * dnn_execute_layer_conv2d_thread(void *threadarg)
     int thread_start = thread_stride * thread_param->thread_index + pad_size;
     int thread_end = (thread_param->thread_index == thread_common_param->thread_num - 1) ? (height - pad_size) : (thread_start + thread_stride);
 
-    DnnOperand *output_operand = &operands[thread_common_param->output_operand_index];
-    output_operand->dims[0] = number;
-    output_operand->dims[1] = height - pad_size * 2;
-    output_operand->dims[2] = width - pad_size * 2;
-    output_operand->dims[3] = conv_params->output_num;
-    output_operand->data_type = operands[input_operand_index].data_type;
-    output_operand->length = calculate_operand_data_length(output_operand);
-    if (output_operand->length <= 0) {
-        av_log(thread_common_param->ctx, AV_LOG_ERROR, "The output data length overflow\n");
-        return (void *)DNN_ERROR;
-    }
-    output_operand->data = av_realloc(output_operand->data, output_operand->length);
-    if (!output_operand->data) {
-        av_log(thread_common_param->ctx, AV_LOG_ERROR, "Failed to reallocate memory for output\n");
-        return (void *)DNN_ERROR;
-    }
-
-    output = output_operand->data;
+    float *output = thread_common_param->output_data;
     output += (conv_params->output_num) * (width - 2 * pad_size) * (thread_start - pad_size);
 
     av_assert0(channel == conv_params->input_num);
@@ -213,16 +195,33 @@ int dnn_execute_layer_conv2d(DnnOperand *operands, const int32_t *input_operand_
     pthread_t *thread_id = av_malloc(thread_num * sizeof(pthread_t));
 #endif
     thread_param **thread_param = av_malloc(thread_num * sizeof(*thread_param));
-    void *res;
-    int error_flag = DNN_SUCCESS;
-
-    //struct used to pass parameters
     thread_common_param thread_common_param;
+    const ConvolutionalParams *conv_params = (const ConvolutionalParams *)(parameters);
+    int pad_size = (conv_params->padding_method == VALID) ? (conv_params->kernel_size - 1) / 2 * conv_params->dilation : 0;
+    DnnOperand *output_operand = &operands[output_operand_index];
+
+    output_operand->dims[0] = operands[input_operand_indexes[0]].dims[0];
+    output_operand->dims[1] = operands[input_operand_indexes[0]].dims[1] - pad_size * 2;
+    output_operand->dims[2] = operands[input_operand_indexes[0]].dims[2] - pad_size * 2;
+    output_operand->dims[3] = conv_params->output_num;
+    output_operand->data_type = operands[input_operand_indexes[0]].data_type;
+    output_operand->length = calculate_operand_data_length(output_operand);
+    if (output_operand->length <= 0) {
+        av_log(ctx, AV_LOG_ERROR, "The output data length overflow\n");
+        return DNN_ERROR;
+    }
+    output_operand->data = av_realloc(output_operand->data, output_operand->length);
+    if (!output_operand->data) {
+        av_log(ctx, AV_LOG_ERROR, "Failed to reallocate memory for output\n");
+        return DNN_ERROR;
+    }
+    thread_common_param.output_data = output_operand->data;
     thread_common_param.operands = operands;
     thread_common_param.input_operand_indexes = input_operand_indexes;
     thread_common_param.output_operand_index = output_operand_index;
     thread_common_param.parameters = parameters;
     thread_common_param.ctx = ctx;
+
 #if HAVE_PTHREAD_CANCEL
     thread_common_param.thread_num = thread_num;
 
@@ -236,9 +235,7 @@ int dnn_execute_layer_conv2d(DnnOperand *operands, const int32_t *input_operand_
 
     //join threads, res gets function return
     for (int i = 0; i < thread_num; i++){
-        pthread_join(thread_id[i], &res);
-        if ((int)res != DNN_SUCCESS)
-            error_flag = (int)res;
+        pthread_join(thread_id[i], NULL);
     }
 
     //release memory
@@ -252,12 +249,10 @@ int dnn_execute_layer_conv2d(DnnOperand *operands, const int32_t *input_operand_
     thread_param[0] = av_malloc(sizeof(thread_param));
     thread_param[0]->thread_common_param = &thread_common_param;
     thread_param[0]->thread_index = 0;
-    res = dnn_execute_layer_conv2d_thread((void *)thread_param[0]);
-    if ((int)res != DNN_SUCCESS)
-        error_flag = (int)res;
+    dnn_execute_layer_conv2d_thread((void *)thread_param[0]);
     av_free(thread_param[0]);
 #endif
 
     av_free(thread_param);
-    return error_flag;
+    return DNN_SUCCESS;
 }