From patchwork Thu Jun 15 13:34:23 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: James Darnley X-Patchwork-Id: 3986 Delivered-To: ffmpegpatchwork@gmail.com Received: by 10.103.22.4 with SMTP id 4csp806980vsw; Thu, 15 Jun 2017 06:48:23 -0700 (PDT) X-Received: by 10.223.168.6 with SMTP id l6mr3674506wrc.78.1497534503039; Thu, 15 Jun 2017 06:48:23 -0700 (PDT) ARC-Seal: i=1; a=rsa-sha256; t=1497534502; cv=none; d=google.com; s=arc-20160816; b=WyD4ljvHse9kkmJTwYK0LAfGbit4VKfrDdPEPfj1H2ZG1ykGEjWe4HBbpUaqs/BFVe XC05ZZ9n76OOtapEnW+AivO/nfDcKYYFKweqZFyIXh8li5MitO+TbwNKc26fz6M7Dx5O kd6vYl5eTouW6hDuWWrMFJoeqAXeNdNNY2n3r4enZr0uOtoSeyY0CuJaw2f5VH1QiKyb CAGReBmpEf1ljcGjYzjCgJJlaxQxll0TR2ON5gGfkh3AwTg7UvNPPiNADkd2qN2cPWFG fxrPIetiVtG1ZDx1hZ1EsRpdcJsoFmZ20F1J1mziMNY9uqC/DHaYtfn+c6KEF4MzwnxK 6W/g== ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=arc-20160816; h=sender:errors-to:content-transfer-encoding:mime-version:reply-to :list-subscribe:list-help:list-post:list-archive:list-unsubscribe :list-id:precedence:subject:references:in-reply-to:message-id:date :to:from:dkim-signature:delivered-to:arc-authentication-results; bh=B5yTMiB+WclP8iY9MUx89YC1yhEtk+SmPfaRcKt/LlY=; b=PHPfWqoSD03rI4A5WjzxckIchCncofEW+szGSh3Qoj74VEZXq9B4J2hSd92aXA2SLU loc7HnCdWwyq46xVSIWe3XI2yQ816F+dMlA+MD+1liIWzHEhhQnKXr9zdCVOdxXxPBca 9TNxChVBLTQFSB7SM6gT4u0UlBqAO/hbE057VGELHKlQlLTbndNcCWKBvCMxdkejTZIb EC+Mjj6FgIwOHxNyikJIqPTLlXiWzduVhjePD4WfTB+Sx9HHdwM9JHvTgiccT5wrGzZ5 cLQ++tmGCn1uIu0VaweQak8+Kuxtk1rnNRC1o4CbS6nUEOHidTq0mjtkDl5cCqe4jFjc lcjw== ARC-Authentication-Results: i=1; mx.google.com; dkim=neutral (body hash did not verify) header.i=@ob-encoder-com.20150623.gappssmtp.com header.b=baV5I/m9; spf=pass (google.com: domain of ffmpeg-devel-bounces@ffmpeg.org designates 79.124.17.100 as permitted sender) smtp.mailfrom=ffmpeg-devel-bounces@ffmpeg.org Return-Path: Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org. [79.124.17.100]) by mx.google.com with ESMTP id w67si164744wmg.5.2017.06.15.06.48.22; Thu, 15 Jun 2017 06:48:22 -0700 (PDT) Received-SPF: pass (google.com: domain of ffmpeg-devel-bounces@ffmpeg.org designates 79.124.17.100 as permitted sender) client-ip=79.124.17.100; Authentication-Results: mx.google.com; dkim=neutral (body hash did not verify) header.i=@ob-encoder-com.20150623.gappssmtp.com header.b=baV5I/m9; spf=pass (google.com: domain of ffmpeg-devel-bounces@ffmpeg.org designates 79.124.17.100 as permitted sender) smtp.mailfrom=ffmpeg-devel-bounces@ffmpeg.org Received: from [127.0.1.1] (localhost [127.0.0.1]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id DA7D368A32B; Thu, 15 Jun 2017 16:48:16 +0300 (EEST) X-Original-To: ffmpeg-devel@ffmpeg.org Delivered-To: ffmpeg-devel@ffmpeg.org Received: from mail-wm0-f67.google.com (mail-wm0-f67.google.com [74.125.82.67]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTPS id B1EEB68A315 for ; Thu, 15 Jun 2017 16:48:09 +0300 (EEST) Received: by mail-wm0-f67.google.com with SMTP id 70so8195wme.1 for ; Thu, 15 Jun 2017 06:48:12 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=ob-encoder-com.20150623.gappssmtp.com; s=20150623; h=sender:from:to:subject:date:message-id:in-reply-to:references; bh=ooLMLUIqR+e9B5fxJ//sq7JTAszgAoPdxmoxYHAeFLw=; b=baV5I/m9s8W2hDJtCAqvclK5KoAM3bcjTaxKrznQAUZmZjPyh62fLrKXquzWgSVg+3 5+UNw/sj63x7nSKOpzoFLMuJnNLAJSYFjjrg3tyTKHCgg8ewQjGoNWOtslwi/lQPlM9b /yTHey0OKXLUMrj+E9mpGPIOrd84w4YXvt5VaeYl4PADI/wO5hnrIc3IadhvUv6Bi1IX JNZeoL9P1zVn5uW/dmLDRvvus6QLQut+Ca1iK58ubyoEuSdiaNNrWfzWjaxzQWaGbJ+e cEwvnBJJNPF5ZJ47OQtzJcRR/rlxCBEg9cI01HVA/cHWBxqhehxrclUUQM83sZFmTaEp 47jQ== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20161025; h=x-gm-message-state:sender:from:to:subject:date:message-id :in-reply-to:references; bh=ooLMLUIqR+e9B5fxJ//sq7JTAszgAoPdxmoxYHAeFLw=; b=gyF4BVtoyKT3/DY/G5MB9TZ2CZMef7QoecgFmNJdMQgp6dA2L7j6a1++G/ljsKFsGX epGaVStR8AXXJVG6xu30MFrDUu6wqQjmIZGXZbR2z6uz9NfZE3VwARaBPw4hNAL0zoII PMCkmotsr6Q6IfWe6vd7wPrEhMx5TLyckl0d+TvA+cGooLzAZYAfD53xxa8ivDw+1Bwh Pg9Abbb+wowBaZBwXAMIwivuOOicJshu8q2DoFa8wMu2+QcmHX+fenDxb9u8LZyMhL9d EFDcLnivsyeCbbl8NuabEfH8GBq5g9FInED0oxVn0sUUdPnzJy3dQMWt/7cAo/80d9Iq XgXQ== X-Gm-Message-State: AKS2vOyXxDiDM8v8xBjdeimPdOladwFJZfzv2AOZKYAQwncy8rQuLEmc oHHbicH3/jHWARAc5C4= X-Received: by 10.28.11.84 with SMTP id 81mr3797697wml.82.1497534053381; Thu, 15 Jun 2017 06:40:53 -0700 (PDT) Received: from Ifrit.systemlords.lan (d51a44418.access.telenet.be. [81.164.68.24]) by smtp.gmail.com with ESMTPSA id c55sm211027wrc.7.2017.06.15.06.40.52 for (version=TLS1_2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128); Thu, 15 Jun 2017 06:40:53 -0700 (PDT) From: James Darnley To: FFmpeg development discussions and patches Date: Thu, 15 Jun 2017 15:34:23 +0200 Message-Id: <20170615133426.4484-4-jdarnley@obe.tv> X-Mailer: git-send-email 2.13.1 In-Reply-To: <20170615133426.4484-1-jdarnley@obe.tv> References: <20170615133426.4484-1-jdarnley@obe.tv> Subject: [FFmpeg-devel] [PATCH 3/6] avcodec/x86: modify simple_idct10 macros to add an action paramter X-BeenThere: ffmpeg-devel@ffmpeg.org X-Mailman-Version: 2.1.20 Precedence: list List-Id: FFmpeg development discussions and patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: FFmpeg development discussions and patches MIME-Version: 1.0 Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" --- libavcodec/x86/proresdsp.asm | 2 +- libavcodec/x86/simple_idct10.asm | 8 +++---- libavcodec/x86/simple_idct10_template.asm | 37 +++++++++++++++++-------------- 3 files changed, 25 insertions(+), 22 deletions(-) diff --git a/libavcodec/x86/proresdsp.asm b/libavcodec/x86/proresdsp.asm index 8318a81c5e..3be0ff7757 100644 --- a/libavcodec/x86/proresdsp.asm +++ b/libavcodec/x86/proresdsp.asm @@ -52,7 +52,7 @@ SECTION .text %macro idct_fn 0 cglobal prores_idct_put_10, 4, 4, 15, pixels, lsize, block, qmat - IDCT_FN pw_1, 15, pw_88, 18, pw_4, pw_1019, r3 + IDCT_FN pw_1, 15, pw_88, 18, "put", pw_4, pw_1019, r3 RET %endmacro diff --git a/libavcodec/x86/simple_idct10.asm b/libavcodec/x86/simple_idct10.asm index 7cfd33eaa3..1a5a2eae9b 100644 --- a/libavcodec/x86/simple_idct10.asm +++ b/libavcodec/x86/simple_idct10.asm @@ -69,24 +69,24 @@ SECTION .text %macro idct_fn 0 cglobal simple_idct10, 1, 1, 16, block - IDCT_FN "", 12, "", 19 + IDCT_FN "", 12, "", 19, "store" RET cglobal simple_idct10_put, 3, 3, 16, pixels, lsize, block - IDCT_FN "", 12, "", 19, 0, pw_1023 + IDCT_FN "", 12, "", 19, "put", 0, pw_1023 RET cglobal simple_idct12, 1, 1, 16, block ; coeffs are already 15bits, adding the offset would cause ; overflow in the input - IDCT_FN "", 15, pw_2, 16 + IDCT_FN "", 15, pw_2, 16, "store" RET cglobal simple_idct12_put, 3, 3, 16, pixels, lsize, block ; range isn't known, so the C simple_idct range is used ; Also, using a bias on input overflows, so use the bias ; on output of the first butterfly instead - IDCT_FN "", 15, pw_2, 16, 0, pw_4095 + IDCT_FN "", 15, pw_2, 16, "put", 0, pw_4095 RET %endmacro diff --git a/libavcodec/x86/simple_idct10_template.asm b/libavcodec/x86/simple_idct10_template.asm index 3f398985a5..8367011dfd 100644 --- a/libavcodec/x86/simple_idct10_template.asm +++ b/libavcodec/x86/simple_idct10_template.asm @@ -218,11 +218,12 @@ ; %2 = row bias macro ; %3 = column shift ; %4 = column bias macro -; %5 = min pixel value -; %6 = max pixel value -; %7 = qmat (for prores) +; %5 = final action (nothing, "store", "put", "add") +; %6 = min pixel value +; %7 = max pixel value +; %8 = qmat (for prores) -%macro IDCT_FN 4-7 +%macro IDCT_FN 4-8 ; for (i = 0; i < 8; i++) ; idctRowCondDC(block + i*8); mova m10,[blockq+ 0] ; { row[0] }[0-7] @@ -230,13 +231,13 @@ mova m13,[blockq+64] ; { row[4] }[0-7] mova m12,[blockq+96] ; { row[6] }[0-7] -%if %0 == 7 - pmullw m10,[%7+ 0] - pmullw m8, [%7+32] - pmullw m13,[%7+64] - pmullw m12,[%7+96] +%if %0 == 8 + pmullw m10,[%8+ 0] + pmullw m8, [%8+32] + pmullw m13,[%8+64] + pmullw m12,[%8+96] - IDCT_1D %1, %2, %7 + IDCT_1D %1, %2, %8 %else IDCT_1D %1, %2 %endif @@ -257,7 +258,8 @@ IDCT_1D %3, %4 ; clip/store -%if %0 == 4 +%if %0 >= 5 +%ifidn %5,"store" ; No clamping, means pure idct mova [blockq+ 0], m8 mova [blockq+ 16], m0 @@ -267,13 +269,13 @@ mova [blockq+ 80], m11 mova [blockq+ 96], m9 mova [blockq+112], m10 -%else -%ifidn %5, 0 +%elifidn %5,"put" +%ifidn %6, 0 pxor m3, m3 %else - mova m3, [%5] -%endif - mova m5, [%6] + mova m3, [%6] +%endif ; ifidn %6, 0 + mova m5, [%7] pmaxsw m8, m3 pmaxsw m0, m3 pmaxsw m1, m3 @@ -301,7 +303,8 @@ mova [r0+r1 ], m11 mova [r0+r1*2], m9 mova [r0+r2 ], m10 -%endif +%endif ; %5 action +%endif; if %0 >= 5 %endmacro %endif