From patchwork Wed Apr 12 22:26:34 2017
Content-Type: text/plain; charset="utf-8"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
X-Patchwork-Submitter: Rostislav Pehlivanov <atomnuker@gmail.com>
X-Patchwork-Id: 3380
Delivered-To: ffmpegpatchwork@gmail.com
Received: by 10.103.3.129 with SMTP id 123csp451755vsd;
	Wed, 12 Apr 2017 15:34:13 -0700 (PDT)
X-Received: by 10.28.159.136 with SMTP id i130mr388628wme.29.1492036453249;
	Wed, 12 Apr 2017 15:34:13 -0700 (PDT)
Return-Path: <ffmpeg-devel-bounces@ffmpeg.org>
Received: from ffbox0-bg.mplayerhq.hu (ffbox0-bg.ffmpeg.org. [79.124.17.100])
	by mx.google.com with ESMTP id
	e7si30118684wrc.122.2017.04.12.15.34.12;
	Wed, 12 Apr 2017 15:34:13 -0700 (PDT)
Received-SPF: pass (google.com: domain of ffmpeg-devel-bounces@ffmpeg.org
	designates 79.124.17.100 as permitted sender)
	client-ip=79.124.17.100;
Authentication-Results: mx.google.com;
	dkim=neutral (body hash did not verify) header.i=@gmail.com;
	spf=pass (google.com: domain of ffmpeg-devel-bounces@ffmpeg.org
	designates 79.124.17.100 as permitted sender)
	smtp.mailfrom=ffmpeg-devel-bounces@ffmpeg.org;
	dmarc=fail (p=NONE sp=NONE dis=NONE) header.from=gmail.com
Received: from [127.0.1.1] (localhost [127.0.0.1])
	by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTP id 561C46897C1;
	Thu, 13 Apr 2017 01:34:04 +0300 (EEST)
X-Original-To: ffmpeg-devel@ffmpeg.org
Delivered-To: ffmpeg-devel@ffmpeg.org
Received: from mail-wr0-f196.google.com (mail-wr0-f196.google.com
	[209.85.128.196])
	by ffbox0-bg.mplayerhq.hu (Postfix) with ESMTPS id 73C5A6891F0
	for <ffmpeg-devel@ffmpeg.org>; Thu, 13 Apr 2017 01:33:57 +0300 (EEST)
Received: by mail-wr0-f196.google.com with SMTP id u18so6254774wrc.1
	for <ffmpeg-devel@ffmpeg.org>; Wed, 12 Apr 2017 15:34:03 -0700 (PDT)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gmail.com; s=20161025;
	h=from:to:cc:subject:date:message-id;
	bh=aegH/V8Z6drYo5B9bm8nCKyKIGNgg4x2QegoMb4CuGI=;
	b=QBl65zDI86G4j6+9OUrLR4a4rFTH9oaUcfDFIh+gSK+aSRwjSIZSOOp3RupRVkyoFy
	/UA6Ot+u9BLA/ijsb+nqqTv1up/L2NT6i5ODY7OoQjW38yKqeiQOcwWO4mNpJvzR7oJy
	lu+4jjUv3xJZLE3Ia53phj+N8xMaM3CV3ERpzWC4cL+eiO0HHHKCd/6kONZBchmmzTTy
	xrXVzKwUGz946jgj7uZdNOEcQ2hj9q+IzJr+1NG8iTPqow7AFhwJrpyYOecjAN671p8W
	/BTzqkzXAr8jgoYCEjpHMtGM+3OOvzsC4nXcuDLs1uNlLLmVs3YsRatqZoXXYi106nkw
	UChA==
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
	d=1e100.net; s=20161025;
	h=x-gm-message-state:from:to:cc:subject:date:message-id;
	bh=aegH/V8Z6drYo5B9bm8nCKyKIGNgg4x2QegoMb4CuGI=;
	b=H2ftRsZAwscDY+nBYIDlegmDugEq59WiHeXAG7cpYF+An/pZvjOrY4EEIXaiJqzMXc
	jA283MkPLNZuEgW1Z4eWbwMfqlFL/IkieG1AqXdJt+XudPoD440oMdZMcWWihmD1GaKp
	5aWOUNSbFG9BxDVCIixwSgLEhgy+2yxyY59naN3a6f7MuGQkq5wX0u9bR3+l/Q7Q6Jl2
	YYlHy6PjCjzQDXaLQ5GfgbFImEbK77v6bXrcrDEe569Sk5UbsZJVO7O6xckftnGVhye/
	VTPZU8MbSVYSp8mOdvZKsdv/YJzsCcb0GpsA8BAKb7HKaeAjABDorFHcWMhIGR+EyNHU
	PfWw==
X-Gm-Message-State: AN3rC/7RiJvJLXTeqKLd7PxcklE8c1xn1ROwN9nGueVd/uwP5p/7rrd4
	xNz6BezOdCkDtV9ghos=
X-Received: by 10.223.164.195 with SMTP id h3mr4907594wrb.171.1492035999306;
	Wed, 12 Apr 2017 15:26:39 -0700 (PDT)
Received: from moonbase.lan (host86-136-239-66.range86-136.btcentralplus.com.
	[86.136.239.66]) by smtp.gmail.com with ESMTPSA id
	75sm9501251wmp.2.2017.04.12.15.26.38
	(version=TLS1_2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128);
	Wed, 12 Apr 2017 15:26:38 -0700 (PDT)
From: Rostislav Pehlivanov <atomnuker@gmail.com>
To: ffmpeg-devel@ffmpeg.org
Date: Wed, 12 Apr 2017 23:26:34 +0100
Message-Id: <20170412222635.26793-1-atomnuker@gmail.com>
X-Mailer: git-send-email 2.12.2.762.g0e3151a226
Subject: [FFmpeg-devel] [PATCH 1/2] opus_pvq: add resynth support and band
	encoding cost function
X-BeenThere: ffmpeg-devel@ffmpeg.org
X-Mailman-Version: 2.1.20
Precedence: list
List-Id: FFmpeg development discussions and patches <ffmpeg-devel.ffmpeg.org>
List-Unsubscribe: <http://ffmpeg.org/mailman/options/ffmpeg-devel>,
	<mailto:ffmpeg-devel-request@ffmpeg.org?subject=unsubscribe>
List-Archive: <http://ffmpeg.org/pipermail/ffmpeg-devel/>
List-Post: <mailto:ffmpeg-devel@ffmpeg.org>
List-Help: <mailto:ffmpeg-devel-request@ffmpeg.org?subject=help>
List-Subscribe: <http://ffmpeg.org/mailman/listinfo/ffmpeg-devel>,
	<mailto:ffmpeg-devel-request@ffmpeg.org?subject=subscribe>
Reply-To: FFmpeg development discussions and patches
	<ffmpeg-devel@ffmpeg.org>
Cc: Rostislav Pehlivanov <atomnuker@gmail.com>
MIME-Version: 1.0
Errors-To: ffmpeg-devel-bounces@ffmpeg.org
Sender: "ffmpeg-devel" <ffmpeg-devel-bounces@ffmpeg.org>

Signed-off-by: Rostislav Pehlivanov <atomnuker@gmail.com>
---
 libavcodec/opus_pvq.c | 154 ++++++++++++++++++++++++++++++++++++++++++++------
 libavcodec/opus_pvq.h |   3 +
 2 files changed, 141 insertions(+), 16 deletions(-)

diff --git a/libavcodec/opus_pvq.c b/libavcodec/opus_pvq.c
index ce93c4731d..508555531b 100644
--- a/libavcodec/opus_pvq.c
+++ b/libavcodec/opus_pvq.c
@@ -389,10 +389,10 @@ static inline float celt_decode_pulses(OpusRangeCoder *rc, int *y, uint32_t N, u
  * Faster than libopus's search, operates entirely in the signed domain.
  * Slightly worse/better depending on N, K and the input vector.
  */
-static void celt_pvq_search(float *X, int *y, int K, int N)
+static int celt_pvq_search(float *X, int *y, int K, int N)
 {
-    int i;
-    float res = 0.0f, y_norm = 0.0f, xy_norm = 0.0f;
+    int i, y_norm = 0;
+    float res = 0.0f, xy_norm = 0.0f;
 
     for (i = 0; i < N; i++)
         res += FFABS(X[i]);
@@ -407,8 +407,8 @@ static void celt_pvq_search(float *X, int *y, int K, int N)
     }
 
     while (K) {
-        int max_idx = 0, phase = FFSIGN(K);
-        float max_den = 1.0f, max_num = 0.0f;
+        int max_idx = 0, max_den = 1, phase = FFSIGN(K);
+        float max_num = 0.0f;
         y_norm += 1.0f;
 
         for (i = 0; i < N; i++) {
@@ -416,8 +416,8 @@ static void celt_pvq_search(float *X, int *y, int K, int N)
              * to it, attempting to decrease it further will actually increase the
              * sum. Prevent this by disregarding any 0 positions when decrementing. */
             const int ca = 1 ^ ((y[i] == 0) & (phase < 0));
+            const int y_new = y_norm  + 2*phase*FFABS(y[i]);
             float xy_new = xy_norm + 1*phase*FFABS(X[i]);
-            float y_new  = y_norm  + 2*phase*FFABS(y[i]);
             xy_new = xy_new * xy_new;
             if (ca && (max_den*xy_new) > (y_new*max_num)) {
                 max_den = y_new;
@@ -433,6 +433,8 @@ static void celt_pvq_search(float *X, int *y, int K, int N)
         y_norm  += 2*phase*y[max_idx];
         y[max_idx] += phase;
     }
+
+    return y_norm;
 }
 
 static uint32_t celt_alg_quant(OpusRangeCoder *rc, float *X, uint32_t N, uint32_t K,
@@ -441,8 +443,10 @@ static uint32_t celt_alg_quant(OpusRangeCoder *rc, float *X, uint32_t N, uint32_
     int y[176];
 
     celt_exp_rotation(X, N, blocks, K, spread, 1);
-    celt_pvq_search(X, y, K, N);
+    gain /= sqrtf(celt_pvq_search(X, y, K, N));
     celt_encode_pulses(rc, y,  N, K);
+    celt_normalize_residual(y, X, N, gain);
+    celt_exp_rotation(X, N, blocks, K, spread, 0);
     return celt_extract_collapse_mask(y, N, blocks);
 }
 
@@ -844,7 +848,7 @@ static void celt_stereo_is_decouple(float *X, float *Y, float e_l, float e_r, in
 static void celt_stereo_ms_decouple(float *X, float *Y, int N)
 {
     int i;
-    const float decouple_norm = 1.0f/sqrtf(2.0f);
+    const float decouple_norm = 1.0f/sqrtf(1.0f + 1.0f);
     for (i = 0; i < N; i++) {
         const float Xret = X[i];
         X[i] = (X[i] + Y[i])*decouple_norm;
@@ -860,9 +864,9 @@ uint32_t ff_celt_encode_band(CeltFrame *f, OpusRangeCoder *rc, const int band,
     const uint8_t *cache;
     int dualstereo, split;
     int imid = 0, iside = 0;
-    //uint32_t N0 = N;
+    uint32_t N0 = N;
     int N_B = N / blocks;
-    //int N_B0 = N_B;
+    int N_B0 = N_B;
     int B0 = blocks;
     int time_divide = 0;
     int recombine = 0;
@@ -883,6 +887,7 @@ uint32_t ff_celt_encode_band(CeltFrame *f, OpusRangeCoder *rc, const int band,
                 f->remaining2 -= 1 << 3;
                 b             -= 1 << 3;
             }
+            x[0] = 1.0f - 2.0f*(x[0] < 0);
             x = Y;
         }
         if (lowband_out)
@@ -922,7 +927,7 @@ uint32_t ff_celt_encode_band(CeltFrame *f, OpusRangeCoder *rc, const int band,
             tf_change++;
         }
         B0 = blocks;
-        //N_B0 = N_B;
+        N_B0 = N_B;
 
         /* Reorganize the samples in time order instead of frequency order */
         if (B0 > 1)
@@ -977,19 +982,20 @@ uint32_t ff_celt_encode_band(CeltFrame *f, OpusRangeCoder *rc, const int band,
 
             if (dualstereo) {
                 if (itheta == 0)
-                    celt_stereo_is_decouple(X, Y, f->block[0].lin_energy[band], f->block[1].lin_energy[band], N);
+                    celt_stereo_is_decouple(X, Y, f->block[0].lin_energy[band],
+                                            f->block[1].lin_energy[band], N);
                 else
                     celt_stereo_ms_decouple(X, Y, N);
             }
         } else if (dualstereo) {
              inv = itheta > 8192;
-             if (inv)
-             {
+             if (inv) {
                 int j;
-                for (j=0;j<N;j++)
+                for (j = 0; j < N; j++)
                    Y[j] = -Y[j];
              }
-             celt_stereo_is_decouple(X, Y, f->block[0].lin_energy[band], f->block[1].lin_energy[band], N);
+             celt_stereo_is_decouple(X, Y, f->block[0].lin_energy[band],
+                                     f->block[1].lin_energy[band], N);
 
             if (b > 2 << 3 && f->remaining2 > 2 << 3) {
                 ff_opus_rc_enc_log(rc, inv, 2);
@@ -1153,8 +1159,124 @@ uint32_t ff_celt_encode_band(CeltFrame *f, OpusRangeCoder *rc, const int band,
             /* Finally do the actual quantization */
             cm = celt_alg_quant(rc, X, N, (q < 8) ? q : (8 + (q & 7)) << ((q >> 3) - 1),
                                 f->spread, blocks, gain);
+        } else {
+            /* If there's no pulse, fill the band anyway */
+            int j;
+            uint32_t cm_mask = (1 << blocks) - 1;
+            fill &= cm_mask;
+            if (!fill) {
+                for (j = 0; j < N; j++)
+                    X[j] = 0.0f;
+            } else {
+                if (!lowband) {
+                    /* Noise */
+                    for (j = 0; j < N; j++)
+                        X[j] = (((int32_t)celt_rng(f)) >> 20);
+                    cm = cm_mask;
+                } else {
+                    /* Folded spectrum */
+                    for (j = 0; j < N; j++) {
+                        /* About 48 dB below the "normal" folding level */
+                        X[j] = lowband[j] + (((celt_rng(f)) & 0x8000) ? 1.0f / 256 : -1.0f / 256);
+                    }
+                    cm = fill;
+                }
+                celt_renormalize_vector(X, N, gain);
+            }
+        }
+    }
+
+    /* This code is used by the decoder and by the resynthesis-enabled encoder */
+    if (dualstereo) {
+        int j;
+        if (N != 2)
+            celt_stereo_merge(X, Y, mid, N);
+        if (inv) {
+            for (j = 0; j < N; j++)
+                Y[j] *= -1;
+        }
+    } else if (level == 0) {
+        int k;
+
+        /* Undo the sample reorganization going from time order to frequency order */
+        if (B0 > 1)
+            celt_interleave_hadamard(f->scratch, X, N_B >> recombine,
+                                     B0<<recombine, longblocks);
+
+        /* Undo time-freq changes that we did earlier */
+        N_B = N_B0;
+        blocks = B0;
+        for (k = 0; k < time_divide; k++) {
+            blocks >>= 1;
+            N_B <<= 1;
+            cm |= cm >> blocks;
+            celt_haar1(X, N_B, blocks);
         }
+
+        for (k = 0; k < recombine; k++) {
+            cm = ff_celt_bit_deinterleave[cm];
+            celt_haar1(X, N0>>k, 1<<k);
+        }
+        blocks <<= recombine;
+
+        /* Scale output for later folding */
+        if (lowband_out) {
+            int j;
+            float n = sqrtf(N0);
+            for (j = 0; j < N0; j++)
+                lowband_out[j] = n * X[j];
+        }
+        cm = av_mod_uintp2(cm, blocks);
     }
 
     return cm;
 }
+
+float ff_celt_quant_band_cost(CeltFrame *f, OpusRangeCoder *rc, int band, float *bits,
+                              float lambda)
+{
+    int i, b = 0;
+    uint32_t cm[2] = { (1 << f->blocks) - 1, (1 << f->blocks) - 1 };
+    const int band_size = ff_celt_freq_range[band] << f->size;
+    float buf[352], lowband_scratch[176], norm1[176], norm2[176];
+    float dist, cost, err_x = 0.0f, err_y = 0.0f;
+    float *X = buf;
+    float *X_orig = f->block[0].coeffs + (ff_celt_freq_bands[band] << f->size);
+    float *Y = (f->channels == 2) ? &buf[176] : NULL;
+    float *Y_orig = f->block[1].coeffs + (ff_celt_freq_bands[band] << f->size);
+    OPUS_RC_CHECKPOINT_SPAWN(rc);
+
+    memcpy(X, X_orig, band_size*sizeof(float));
+    if (Y)
+        memcpy(Y, Y_orig, band_size*sizeof(float));
+
+    f->remaining2 = ((f->framebits << 3) - f->anticollapse_needed) - opus_rc_tell_frac(rc) - 1;
+    if (band <= f->coded_bands - 1) {
+        int curr_balance = f->remaining / FFMIN(3, f->coded_bands - band);
+        b = av_clip_uintp2(FFMIN(f->remaining2 + 1, f->pulses[band] + curr_balance), 14);
+    }
+
+    if (f->dual_stereo) {
+        ff_celt_encode_band(f, rc, band, X, NULL, band_size, b / 2, f->blocks, NULL,
+                            f->size, norm1, 0, 1.0f, lowband_scratch, cm[0]);
+
+        ff_celt_encode_band(f, rc, band, Y, NULL, band_size, b / 2, f->blocks, NULL,
+                            f->size, norm2, 0, 1.0f, lowband_scratch, cm[1]);
+    } else {
+        ff_celt_encode_band(f, rc, band, X, Y, band_size, b, f->blocks, NULL, f->size,
+                            norm1, 0, 1.0f, lowband_scratch, cm[0] | cm[1]);
+    }
+
+    for (i = 0; i < band_size; i++) {
+        err_x += (X[i] - X_orig[i])*(X[i] - X_orig[i]);
+        err_y += (Y[i] - Y_orig[i])*(Y[i] - Y_orig[i]);
+    }
+
+    dist = sqrtf(err_x) + sqrtf(err_y);
+    cost = OPUS_RC_CHECKPOINT_BITS(rc)/8.0f;
+    *bits += cost;
+
+    OPUS_RC_CHECKPOINT_ROLLBACK(rc);
+
+    return lambda*dist*cost;
+}
diff --git a/libavcodec/opus_pvq.h b/libavcodec/opus_pvq.h
index d414b47a42..045015406b 100644
--- a/libavcodec/opus_pvq.h
+++ b/libavcodec/opus_pvq.h
@@ -38,4 +38,7 @@ uint32_t ff_celt_encode_band(CeltFrame *f, OpusRangeCoder *rc, const int band,
                              float *lowband, int duration, float *lowband_out, int level,
                              float gain, float *lowband_scratch, int fill);
 
+float ff_celt_quant_band_cost(CeltFrame *f, OpusRangeCoder *rc, int band,
+                              float *bits, float lambda);
+
 #endif /* AVCODEC_OPUS_PVQ_H */