From patchwork Mon Jun 19 15:11:00 2017 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: James Darnley X-Patchwork-Id: 4043 Delivered-To: ffmpegpatchwork@gmail.com Received: by 10.103.22.4 with SMTP id 4csp961844vsw; Mon, 19 Jun 2017 08:24:12 -0700 (PDT) X-Received: by 10.223.152.130 with SMTP id w2mr1489327wrb.118.1497885852136; Mon, 19 Jun 2017 08:24:12 -0700 (PDT) ARC-Seal: i=1; a=rsa-sha256; t=1497885851; cv=none; d=google.com; s=arc-20160816; b=fGIcqTGfnTebh17u4OV4AZ79KfYVtUvGLNeqGrUWrHOTfN7UleEK/LnG/q5m2t4UEn 34tPERwCG0PPzgcEdFgo/sHLraCUBJda2irbWWxX1yBLDGZjwmzxSVJFNxcmRHUpyxre KSKudqB+2gQbNV1dclIec9kP17tvzhnur+PQmHuLYJeTeDxolyHfvYWCPzUfjVFEry8s 5A+zB8oDIB0FbsDeMOZ/8t3RcdYGXHWiJmxiTMMKHTF2X0eK8cflUb+V+LRDR7RRRybJ N9KRNqQZhYnQPevOGZUOj649a7s/Zx4IygIGZxVONVWqjKY+i/+43U8l97xlU4QaZa9J Ub5A== ARC-Message-Signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=arc-20160816; h=sender:errors-to:content-transfer-encoding:mime-version:reply-to :list-subscribe:list-help:list-post:list-archive:list-unsubscribe :list-id:precedence:subject:references:in-reply-to:message-id:date :to:from:dkim-signature:delivered-to:arc-authentication-results; bh=B5yTMiB+WclP8iY9MUx89YC1yhEtk+SmPfaRcKt/LlY=; b=YswXchumBSgiJJk62rP9D5rBOWi6OUYbNRgQP0s+bySY+JKzqW1r5q1j/DWnDEdiAq f8Ekpyne36MRS0W3CA//SeVEFLfKaUvEctwHZMxRs4dem54JrdsCx9U3YuM/+pkfKrPF HooE7MA1gDtkDeJoETkOdp0DoyD6Fnh8zInIJ0/YzmuwK2jLvwlgD+ix9m49tcjgCVoS YJl6PYUggo9273NtrwweCLKcy07CvUq3oPr04o5nRHlKZ5SzqSUQ604+KYDfm9iN2Ldz Sg6xxqSa9qcrlePQsR68Xq/0bYDwqQUi1YBxtEHyDMjDKXF7e0SuAieJSfqlRh21g2td XcDA== ARC-Authentication-Results: i=1; mx.google.com; dkim=neutral (body hash did not verify) header.i=@ob-encoder-com.20150623.gappssmtp.com header.b=IQJIUYFV; spf=pass (google.com: domain of ffmpeg-devel-bounces@ffmpeg.org designates 79.124.17.100 as permitted sender) smtp.mailfrom=ffmpeg-devel-bounces@ffmpeg.org Return-Path: Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org. [79.124.17.100]) by mx.google.com with ESMTP id d4si10626409wra.285.2017.06.19.08.24.11; Mon, 19 Jun 2017 08:24:11 -0700 (PDT) Received-SPF: pass (google.com: domain of ffmpeg-devel-bounces@ffmpeg.org designates 79.124.17.100 as permitted sender) client-ip=79.124.17.100; Authentication-Results: mx.google.com; dkim=neutral (body hash did not verify) header.i=@ob-encoder-com.20150623.gappssmtp.com header.b=IQJIUYFV; spf=pass (google.com: domain of ffmpeg-devel-bounces@ffmpeg.org designates 79.124.17.100 as permitted sender) smtp.mailfrom=ffmpeg-devel-bounces@ffmpeg.org Received: from [127.0.1.1] (localhost [127.0.0.1]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id 0F55B68A51F; Mon, 19 Jun 2017 18:23:46 +0300 (EEST) X-Original-To: ffmpeg-devel@ffmpeg.org Delivered-To: ffmpeg-devel@ffmpeg.org Received: from mail-wm0-f66.google.com (mail-wm0-f66.google.com [74.125.82.66]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTPS id 6ED4F68A502 for ; Mon, 19 Jun 2017 18:23:37 +0300 (EEST) Received: by mail-wm0-f66.google.com with SMTP id f90so17050616wmh.0 for ; Mon, 19 Jun 2017 08:23:42 -0700 (PDT) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=ob-encoder-com.20150623.gappssmtp.com; s=20150623; h=sender:from:to:subject:date:message-id:in-reply-to:references; bh=ooLMLUIqR+e9B5fxJ//sq7JTAszgAoPdxmoxYHAeFLw=; b=IQJIUYFVKL73cObh2nvxwo1dcEIfGUJ0sdienqenGr/uoi5c23vYoC+j0EOzDWHup4 wP5haVKGrg0rzhBnte3tAMtDZtwEIJGDQrzxG5En3T+HUmythy4pspcSUmPVzaC0LduN 53I2Sn7JGjlo31Gnjhnwh3XElivHrJmsML0e1LyuM2qRiEJLC+8/pvZkVUeo9fQ2kvfg kY47VtbRS+dxxILFSGkpSyP6qujBNfeQXg/IxDJYGuu3YW16UcwK6LH2/Juo+6qTqHsG CEXLx5AXG+RfCuaL2ySXFv+GQLLdjhVOjmdiItlwVb5AapFsBdJZyjb6rv0AgMr29ANd 6LLQ== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20161025; h=x-gm-message-state:sender:from:to:subject:date:message-id :in-reply-to:references; bh=ooLMLUIqR+e9B5fxJ//sq7JTAszgAoPdxmoxYHAeFLw=; b=f6ePaNBvxvPOZlNTtA8LdCLiUdZlBT5ErMUEcNyp45TmIIcKnxYn0fJR9BeUtB3zVJ 7SkrhAZNo6LRaExNu2ZsvjpeG56iOEIucvXLPJYLvAGZOWUPiOdofxqdHy8h8+otVEBl kRKGz5dC9O5As9AbpOgM01beIHKXwmC2uerQ/sHtJY3rrL5ZigDdikC9EJWtQsAJiijl oXjYXZSA04Ha8tSSH/NqkR4z9iioc0CYDiwQo9ci8aYw1Rz6jpbUpfydK6VxF5Ognxdk zanT9VKEpatMbwMEijFlF2BrCoZJkE8/WY6+QORl7+STAbsxJpnxx42aFAYCaaf6ASpC 9EzQ== X-Gm-Message-State: AKS2vOwxjvZV6xFShmPUB2vQuio3hI/oAjYGJB3fZJHgkL+uqP30KSxV ecrOV29bp4QnvTI/Agk= X-Received: by 10.28.4.145 with SMTP id 139mr16608413wme.118.1497885463018; Mon, 19 Jun 2017 08:17:43 -0700 (PDT) Received: from Ifrit.systemlords.lan (d51a44418.access.telenet.be. [81.164.68.24]) by smtp.gmail.com with ESMTPSA id 6sm8059540wrg.61.2017.06.19.08.17.42 for (version=TLS1_2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128); Mon, 19 Jun 2017 08:17:42 -0700 (PDT) From: James Darnley To: FFmpeg development discussions and patches Date: Mon, 19 Jun 2017 17:11:00 +0200 Message-Id: <20170619151104.31273-8-jdarnley@obe.tv> X-Mailer: git-send-email 2.13.1 In-Reply-To: <20170619151104.31273-1-jdarnley@obe.tv> References: <20170619151104.31273-1-jdarnley@obe.tv> Subject: [FFmpeg-devel] [PATCH 07/11] avcodec/x86: modify simple_idct10 macros to add an action paramter X-BeenThere: ffmpeg-devel@ffmpeg.org X-Mailman-Version: 2.1.20 Precedence: list List-Id: FFmpeg development discussions and patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: FFmpeg development discussions and patches MIME-Version: 1.0 Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" --- libavcodec/x86/proresdsp.asm | 2 +- libavcodec/x86/simple_idct10.asm | 8 +++---- libavcodec/x86/simple_idct10_template.asm | 37 +++++++++++++++++-------------- 3 files changed, 25 insertions(+), 22 deletions(-) diff --git a/libavcodec/x86/proresdsp.asm b/libavcodec/x86/proresdsp.asm index 8318a81c5e..3be0ff7757 100644 --- a/libavcodec/x86/proresdsp.asm +++ b/libavcodec/x86/proresdsp.asm @@ -52,7 +52,7 @@ SECTION .text %macro idct_fn 0 cglobal prores_idct_put_10, 4, 4, 15, pixels, lsize, block, qmat - IDCT_FN pw_1, 15, pw_88, 18, pw_4, pw_1019, r3 + IDCT_FN pw_1, 15, pw_88, 18, "put", pw_4, pw_1019, r3 RET %endmacro diff --git a/libavcodec/x86/simple_idct10.asm b/libavcodec/x86/simple_idct10.asm index 7cfd33eaa3..1a5a2eae9b 100644 --- a/libavcodec/x86/simple_idct10.asm +++ b/libavcodec/x86/simple_idct10.asm @@ -69,24 +69,24 @@ SECTION .text %macro idct_fn 0 cglobal simple_idct10, 1, 1, 16, block - IDCT_FN "", 12, "", 19 + IDCT_FN "", 12, "", 19, "store" RET cglobal simple_idct10_put, 3, 3, 16, pixels, lsize, block - IDCT_FN "", 12, "", 19, 0, pw_1023 + IDCT_FN "", 12, "", 19, "put", 0, pw_1023 RET cglobal simple_idct12, 1, 1, 16, block ; coeffs are already 15bits, adding the offset would cause ; overflow in the input - IDCT_FN "", 15, pw_2, 16 + IDCT_FN "", 15, pw_2, 16, "store" RET cglobal simple_idct12_put, 3, 3, 16, pixels, lsize, block ; range isn't known, so the C simple_idct range is used ; Also, using a bias on input overflows, so use the bias ; on output of the first butterfly instead - IDCT_FN "", 15, pw_2, 16, 0, pw_4095 + IDCT_FN "", 15, pw_2, 16, "put", 0, pw_4095 RET %endmacro diff --git a/libavcodec/x86/simple_idct10_template.asm b/libavcodec/x86/simple_idct10_template.asm index 3f398985a5..8367011dfd 100644 --- a/libavcodec/x86/simple_idct10_template.asm +++ b/libavcodec/x86/simple_idct10_template.asm @@ -218,11 +218,12 @@ ; %2 = row bias macro ; %3 = column shift ; %4 = column bias macro -; %5 = min pixel value -; %6 = max pixel value -; %7 = qmat (for prores) +; %5 = final action (nothing, "store", "put", "add") +; %6 = min pixel value +; %7 = max pixel value +; %8 = qmat (for prores) -%macro IDCT_FN 4-7 +%macro IDCT_FN 4-8 ; for (i = 0; i < 8; i++) ; idctRowCondDC(block + i*8); mova m10,[blockq+ 0] ; { row[0] }[0-7] @@ -230,13 +231,13 @@ mova m13,[blockq+64] ; { row[4] }[0-7] mova m12,[blockq+96] ; { row[6] }[0-7] -%if %0 == 7 - pmullw m10,[%7+ 0] - pmullw m8, [%7+32] - pmullw m13,[%7+64] - pmullw m12,[%7+96] +%if %0 == 8 + pmullw m10,[%8+ 0] + pmullw m8, [%8+32] + pmullw m13,[%8+64] + pmullw m12,[%8+96] - IDCT_1D %1, %2, %7 + IDCT_1D %1, %2, %8 %else IDCT_1D %1, %2 %endif @@ -257,7 +258,8 @@ IDCT_1D %3, %4 ; clip/store -%if %0 == 4 +%if %0 >= 5 +%ifidn %5,"store" ; No clamping, means pure idct mova [blockq+ 0], m8 mova [blockq+ 16], m0 @@ -267,13 +269,13 @@ mova [blockq+ 80], m11 mova [blockq+ 96], m9 mova [blockq+112], m10 -%else -%ifidn %5, 0 +%elifidn %5,"put" +%ifidn %6, 0 pxor m3, m3 %else - mova m3, [%5] -%endif - mova m5, [%6] + mova m3, [%6] +%endif ; ifidn %6, 0 + mova m5, [%7] pmaxsw m8, m3 pmaxsw m0, m3 pmaxsw m1, m3 @@ -301,7 +303,8 @@ mova [r0+r1 ], m11 mova [r0+r1*2], m9 mova [r0+r2 ], m10 -%endif +%endif ; %5 action +%endif; if %0 >= 5 %endmacro %endif