From patchwork Sat Feb 9 13:10:21 2019 Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit X-Patchwork-Submitter: matthew.w.fearnley@gmail.com X-Patchwork-Id: 12005 Return-Path: X-Original-To: patchwork@ffaux-bg.ffmpeg.org Delivered-To: patchwork@ffaux-bg.ffmpeg.org Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org [79.124.17.100]) by ffaux.localdomain (Postfix) with ESMTP id A522144854F for ; Sat, 9 Feb 2019 15:19:11 +0200 (EET) Received: from [127.0.1.1] (localhost [127.0.0.1]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id 863D168AA42; Sat, 9 Feb 2019 15:19:11 +0200 (EET) X-Original-To: ffmpeg-devel@ffmpeg.org Delivered-To: ffmpeg-devel@ffmpeg.org Received: from mail-wm1-f66.google.com (mail-wm1-f66.google.com [209.85.128.66]) by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTPS id 5761668A2D7 for ; Sat, 9 Feb 2019 15:19:05 +0200 (EET) Received: by mail-wm1-f66.google.com with SMTP id p6so8161145wmc.1 for ; Sat, 09 Feb 2019 05:19:05 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20161025; h=from:to:cc:subject:date:message-id:in-reply-to:references; bh=7p4Dl5AG/m1peWqfzA05hwNf3W9v6vEbCc/XOgEoIOk=; b=URUWQyITbUvawCVQXPMKn+sWo0CAlG4zl8Zk+rOCwoWmN7Mxg/SyXksG90ag3b/CNa 9twFTe0LWaxt4KKPtYPhTA7Qh6u7ok8PohuOrX3QMKC9qKbPz0gvUHnwWCzmobW8kJHq nA+twH7zysZCiVqxNajmUW7D67VV4VZ1Dgl5g8uz7iKunlPO7TOlDY1APi+hL9kRS1hC Bv2UmyfexmpLGSN5cg3DEvt+SF/tSEHKCqlnejOIsCcrAZdck3doTCCWq79R12ThqLjC Hy+Au2pK7ir5R0auwdT209T+3rjMUi8e/rxdecZ5+qqaggUh8B8cvHr8w6B+Kh5LrbKr Uv8A== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20161025; h=x-gm-message-state:from:to:cc:subject:date:message-id:in-reply-to :references; bh=7p4Dl5AG/m1peWqfzA05hwNf3W9v6vEbCc/XOgEoIOk=; b=k/VpwMzwhsiwsZbnOSnkSfLCzn5ocTQQVfuspXpskMvYmp4rhYlgvQitBPgUy2i0QT HfIbPDol9ZTV+b78DEMeIPK8Wb6OI9PwCJ5fIPAvVzEf9nUsD0lORfEb6ugKYTPXeStc vp8bvb6f7hMPlYED6v7c7yFHoIun7kOP9IjyxPtwDdUUqe1i2hwXx5rUiJJuFswizLyW 433bfSeeqS0Xd9IhMampcHdjg7sARwhnVKmchoBwzkKeFSiii6Z/agG6vVrkrPaPypcB yL+WXcwvaeiZ2RuOYouj8iaOUkQ/Wu9T5/THSoKHsFW5TKMm/q6MRvChfJyX9b3toNDm +Cbw== X-Gm-Message-State: AHQUAuYFebG9POa18NduM6pFJ3PkbOVAELFEPi/ugzWWJ8zgtfsRh332 AEEcass1IBxrjP0O8BkqfLuINATm X-Google-Smtp-Source: AHgI3IYcTUm7062jX7E6ijhSIFvJTXHPYxV7IocNQdTMMo6WL1Zd5seMfE/CR0ItsW/aUrhrZvXudQ== X-Received: by 2002:adf:eb01:: with SMTP id s1mr11872363wrn.101.1549717962793; Sat, 09 Feb 2019 05:12:42 -0800 (PST) Received: from localhost.localdomain (cpc131498-bagu18-2-0-cust88.know.cable.virginm.net. [86.9.33.89]) by smtp.gmail.com with ESMTPSA id u25sm8878557wml.31.2019.02.09.05.12.41 (version=TLS1_2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128); Sat, 09 Feb 2019 05:12:41 -0800 (PST) From: Matthew Fearnley To: ffmpeg-devel@ffmpeg.org Date: Sat, 9 Feb 2019 13:10:21 +0000 Message-Id: <20190209131021.9959-2-matthew.w.fearnley@gmail.com> X-Mailer: git-send-email 2.17.1 In-Reply-To: <20190209131021.9959-1-matthew.w.fearnley@gmail.com> References: <20190209131021.9959-1-matthew.w.fearnley@gmail.com> Subject: [FFmpeg-devel] [PATCH 2/2] libavcodec/zmbvenc: motion estimation improvements/bug fixes: X-BeenThere: ffmpeg-devel@ffmpeg.org X-Mailman-Version: 2.1.20 Precedence: list List-Id: FFmpeg development discussions and patches List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , Reply-To: FFmpeg development discussions and patches Cc: Matthew Fearnley MIME-Version: 1.0 Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" - Clamp ME range to -64..63 (prevents corruption when me_range is too high) - Allow MV's up to *and including* the positive range limit - Allow out-of-edge ME by padding the prev buffer with a border of 0's - Try previous MV before checking the rest (improves speed in some cases) - More robust logic in code - ensure *mx,*my,*xored are updated together --- libavcodec/zmbvenc.c | 64 +++++++++++++++++++++++++++++++------------- 1 file changed, 46 insertions(+), 18 deletions(-) diff --git a/libavcodec/zmbvenc.c b/libavcodec/zmbvenc.c index 3df6e724c8..e92193478b 100644 --- a/libavcodec/zmbvenc.c +++ b/libavcodec/zmbvenc.c @@ -45,11 +45,11 @@ typedef struct ZmbvEncContext { AVCodecContext *avctx; - int range; + int lrange, urange; uint8_t *comp_buf, *work_buf; uint8_t pal[768]; uint32_t pal2[256]; //for quick comparisons - uint8_t *prev; + uint8_t *prev, *prev_buf; int pstride; int comp_size; int keyint, curfrm; @@ -61,7 +61,6 @@ typedef struct ZmbvEncContext { /** Block comparing function * XXX should be optimized and moved to DSPContext - * TODO handle out of edge ME */ static inline int block_cmp(ZmbvEncContext *c, uint8_t *src, int stride, uint8_t *src2, int stride2, int bw, int bh, @@ -100,23 +99,42 @@ static inline int block_cmp(ZmbvEncContext *c, uint8_t *src, int stride, static int zmbv_me(ZmbvEncContext *c, uint8_t *src, int sstride, uint8_t *prev, int pstride, int x, int y, int *mx, int *my, int *xored) { - int dx, dy, tx, ty, tv, bv, bw, bh; + int dx, dy, txored, tv, bv, bw, bh; + int mx0, my0; - *mx = *my = 0; + mx0 = *mx; + my0 = *my; bw = FFMIN(ZMBV_BLOCK, c->avctx->width - x); bh = FFMIN(ZMBV_BLOCK, c->avctx->height - y); + + /* Try (0,0) */ bv = block_cmp(c, src, sstride, prev, pstride, bw, bh, xored); + *mx = *my = 0; if(!bv) return 0; - for(ty = FFMAX(y - c->range, 0); ty < FFMIN(y + c->range, c->avctx->height - bh); ty++){ - for(tx = FFMAX(x - c->range, 0); tx < FFMIN(x + c->range, c->avctx->width - bw); tx++){ - if(tx == x && ty == y) continue; // we already tested this block - dx = tx - x; - dy = ty - y; - tv = block_cmp(c, src, sstride, prev + dx + dy * pstride, pstride, bw, bh, xored); + + /* Try previous block's MV (if not 0,0) */ + if (mx0 || my0){ + tv = block_cmp(c, src, sstride, prev + mx0 + my0 * pstride, pstride, bw, bh, &txored); + if(tv < bv){ + bv = tv; + *mx = mx0; + *my = my0; + *xored = txored; + if(!bv) return 0; + } + } + + /* Try other MVs from top-to-bottom, left-to-right */ + for(dy = -c->lrange; dy <= c->urange; dy++){ + for(dx = -c->lrange; dx <= c->urange; dx++){ + if(!dx && !dy) continue; // we already tested this block + if(dx == mx0 && dy == my0) continue; // this one too + tv = block_cmp(c, src, sstride, prev + dx + dy * pstride, pstride, bw, bh, &txored); if(tv < bv){ bv = tv; *mx = dx; *my = dy; + *xored = txored; if(!bv) return 0; } } @@ -181,7 +199,7 @@ FF_ENABLE_DEPRECATION_WARNINGS int x, y, bh2, bw2, xored; uint8_t *tsrc, *tprev; uint8_t *mv; - int mx, my; + int mx = 0, my = 0; bw = (avctx->width + ZMBV_BLOCK - 1) / ZMBV_BLOCK; bh = (avctx->height + ZMBV_BLOCK - 1) / ZMBV_BLOCK; @@ -269,7 +287,7 @@ static av_cold int encode_end(AVCodecContext *avctx) av_freep(&c->work_buf); deflateEnd(&c->zstream); - av_freep(&c->prev); + av_freep(&c->prev_buf); return 0; } @@ -283,6 +301,7 @@ static av_cold int encode_init(AVCodecContext *avctx) int zret; // Zlib return code int i; int lvl = 9; + int prev_size, prev_offset; /* Entropy-based score tables for comparing blocks. * Suitable for blocks up to (ZMBV_BLOCK * ZMBV_BLOCK) bytes. @@ -295,9 +314,13 @@ static av_cold int encode_init(AVCodecContext *avctx) c->curfrm = 0; c->keyint = avctx->keyint_min; - c->range = 8; - if(avctx->me_range > 0) - c->range = FFMIN(avctx->me_range, 127); + + /* Motion estimation range: maximum distance is -64..63 */ + c->lrange = c->urange = 8; + if(avctx->me_range > 0){ + c->lrange = FFMIN(avctx->me_range, 64); + c->urange = FFMIN(avctx->me_range, 63); + } if(avctx->compression_level >= 0) lvl = avctx->compression_level; @@ -323,11 +346,16 @@ static av_cold int encode_init(AVCodecContext *avctx) av_log(avctx, AV_LOG_ERROR, "Can't allocate compression buffer.\n"); return AVERROR(ENOMEM); } - c->pstride = FFALIGN(avctx->width, 16); - if (!(c->prev = av_malloc(c->pstride * avctx->height))) { + + /* Allocate prev buffer - leave border around the outside for out of edge ME */ + c->pstride = FFALIGN(avctx->width + c->lrange, 16); + prev_offset = FFALIGN(c->lrange + (c->pstride * c->lrange), 16); + prev_size = prev_offset + (c->pstride * (avctx->height + c->urange)); + if (!(c->prev_buf = av_mallocz(prev_size))) { av_log(avctx, AV_LOG_ERROR, "Can't allocate picture.\n"); return AVERROR(ENOMEM); } + c->prev = c->prev_buf + prev_offset; c->zstream.zalloc = Z_NULL; c->zstream.zfree = Z_NULL;