diff mbox series

[FFmpeg-devel,11/11] avcodec/vvcdec: add Intra Block Copy decoder

Message ID TYSPR06MB6433D9436D9F75AEB1A2B5D2AA562@TYSPR06MB6433.apcprd06.prod.outlook.com
State New
Headers show
Series Add Intra Block Copy support to the vvc decoder | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Nuo Mi Feb. 22, 2024, 7:14 a.m. UTC
From: Wu Jianhua <toqsxw@outlook.com>

Introduction at https://ieeexplore.ieee.org/document/9408666

passed files:
    10b444_A_Kwai_3.bit
    10b444_B_Kwai_3.bit
    CodingToolsSets_D_Tencent_2.bit
    IBC_A_Tencent_2.bit
    IBC_B_Tencent_2.bit
    IBC_C_Tencent_2.bit
    IBC_D_Tencent_2.bit
    IBC_E_Tencent_1.bit
    LOSSLESS_B_HHI_3.bit

Signed-off-by: Wu Jianhua <toqsxw@outlook.com>
Signed-off-by: Nuo Mi <nuomi2021@gmail.com>
---
 libavcodec/vvc/vvc_intra.c | 81 ++++++++++++++++++++++++++++++++++++++
 libavcodec/vvc/vvcdec.c    | 25 ++++++++++++
 libavcodec/vvc/vvcdec.h    |  3 ++
 3 files changed, 109 insertions(+)

Comments

Nuo Mi Feb. 23, 2024, 1:03 p.m. UTC | #1
On Thu, Feb 22, 2024 at 3:15 PM Nuo Mi <nuomi2021@gmail.com> wrote:

> From: Wu Jianhua <toqsxw@outlook.com>
>
> Introduction at https://ieeexplore.ieee.org/document/9408666
>
> passed files:
>     10b444_A_Kwai_3.bit
>     10b444_B_Kwai_3.bit
>     CodingToolsSets_D_Tencent_2.bit
>     IBC_A_Tencent_2.bit
>     IBC_B_Tencent_2.bit
>     IBC_C_Tencent_2.bit
>     IBC_D_Tencent_2.bit
>     IBC_E_Tencent_1.bit
>     LOSSLESS_B_HHI_3.bit
>
Will push tomorrow if there are no objections.

>
>
>
Nuo Mi Feb. 24, 2024, 12:33 p.m. UTC | #2
On Fri, Feb 23, 2024 at 9:03 PM Nuo Mi <nuomi2021@gmail.com> wrote:

>
>
> On Thu, Feb 22, 2024 at 3:15 PM Nuo Mi <nuomi2021@gmail.com> wrote:
>
>> From: Wu Jianhua <toqsxw@outlook.com>
>>
>> Introduction at https://ieeexplore.ieee.org/document/9408666
>>
>> passed files:
>>     10b444_A_Kwai_3.bit
>>     10b444_B_Kwai_3.bit
>>     CodingToolsSets_D_Tencent_2.bit
>>     IBC_A_Tencent_2.bit
>>     IBC_B_Tencent_2.bit
>>     IBC_C_Tencent_2.bit
>>     IBC_D_Tencent_2.bit
>>     IBC_E_Tencent_1.bit
>>     LOSSLESS_B_HHI_3.bit
>>
> Will push tomorrow if there are no objections.
>
pushed.
Ronald S. Bultje Feb. 24, 2024, 1:20 p.m. UTC | #3
Hi,

On Thu, Feb 22, 2024 at 2:15 AM Nuo Mi <nuomi2021@gmail.com> wrote:

> +static void ibc_fill_vir_buf(const VVCLocalContext *lc, const CodingUnit
> *cu)
> [..]
>
+        av_image_copy_plane(ibc_buf, ibc_stride, src, src_stride,
> cu->cb_width >> hs << ps , cu->cb_height >> vs);
>

I'm admittedly not super-familiar with VVC, but I wonder why we need the
double buffering here (from ref_pos in pic to ibc_buf, and then back from
ibc_buf back to cur block in pic)? In AV1, this is done with just a single
copy. Why is this done this way?

Ronald
Nuo Mi Feb. 25, 2024, 3:07 a.m. UTC | #4
On Sat, Feb 24, 2024 at 9:20 PM Ronald S. Bultje <rsbultje@gmail.com> wrote:

> Hi,
>
> On Thu, Feb 22, 2024 at 2:15 AM Nuo Mi <nuomi2021@gmail.com> wrote:
>
>> +static void ibc_fill_vir_buf(const VVCLocalContext *lc, const CodingUnit
>> *cu)
>> [..]
>>
> +        av_image_copy_plane(ibc_buf, ibc_stride, src, src_stride,
>> cu->cb_width >> hs << ps , cu->cb_height >> vs);
>>
>
> I'm admittedly not super-familiar with VVC, but I wonder why we need the
> double buffering here (from ref_pos in pic to ibc_buf, and then back from
> ibc_buf back to cur block in pic)? In AV1, this is done with just a single
> copy. Why is this done this way?
>
Hi Ronald,
Two major differences between AV1 and VVC are:
1. AV1 disables all in-loop filters for IBC, while VVC does not.
2. AV1 can refer to any reconstructed super blocks, except the delayed
super block, whereas VVC can only refer to the left and current CTU.
Therefore, in VVC, we need to allocate memory for each line to save pixels
before applying filters. VVC refers to this memory as IbcVirBuf, which is a
2D cyclic buffer.
Every new reconstructed Coding Block will be copied to this buffer, and we
can only copy pixels from this buffer.

Best Regards.

>
> Ronald
>
diff mbox series

Patch

diff --git a/libavcodec/vvc/vvc_intra.c b/libavcodec/vvc/vvc_intra.c
index fb001d6713..58dd492478 100644
--- a/libavcodec/vvc/vvc_intra.c
+++ b/libavcodec/vvc/vvc_intra.c
@@ -20,11 +20,13 @@ 
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 #include "libavutil/frame.h"
+#include "libavutil/imgutils.h"
 
 #include "vvc_data.h"
 #include "vvc_inter.h"
 #include "vvc_intra.h"
 #include "vvc_itx_1d.h"
+#include "vvc_mvs.h"
 
 static int is_cclm(enum IntraPredMode mode)
 {
@@ -580,6 +582,81 @@  static int reconstruct(VVCLocalContext *lc)
     return 0;
 }
 
+#define POS(c_idx, x, y)    \
+    &fc->frame->data[c_idx][((y) >> fc->ps.sps->vshift[c_idx]) * fc->frame->linesize[c_idx] +   \
+        (((x) >> fc->ps.sps->hshift[c_idx]) << fc->ps.sps->pixel_shift)]
+
+#define IBC_POS(c_idx, x, y) \
+    (fc->tab.ibc_vir_buf[c_idx] + \
+        (x << ps) + (y + ((cu->y0 & ~(sps->ctb_size_y - 1)) >> vs)) * ibc_stride)
+#define IBC_X(x)  ((x) & ((fc->tab.sz.ibc_buffer_width >> hs) - 1))
+#define IBC_Y(y)  ((y) & ((1 << sps->ctb_log2_size_y >> vs) - 1))
+
+static void intra_block_copy(const VVCLocalContext *lc, const int c_idx)
+{
+    const CodingUnit *cu      = lc->cu;
+    const PredictionUnit *pu  = &cu->pu;
+    const VVCFrameContext *fc = lc->fc;
+    const VVCSPS *sps         = fc->ps.sps;
+    const Mv *bv              = &pu->mi.mv[L0][0];
+    const int hs              = sps->hshift[c_idx];
+    const int vs              = sps->vshift[c_idx];
+    const int ps              = sps->pixel_shift;
+    const int ref_x           = IBC_X((cu->x0 >> hs) + (bv->x >> (4 + hs)));
+    const int ref_y           = IBC_Y((cu->y0 >> vs) + (bv->y >> (4 + vs)));
+    const int w               = cu->cb_width >> hs;
+    const int h               = cu->cb_height >> vs;
+    const int ibc_buf_width   = fc->tab.sz.ibc_buffer_width >> hs;    ///< IbcBufWidthY and IbcBufWidthC
+    const int rw              = FFMIN(w, ibc_buf_width - ref_x);
+    const int ibc_stride      = ibc_buf_width << ps;
+    const int dst_stride      = fc->frame->linesize[c_idx];
+    const uint8_t *ibc_buf    = IBC_POS(c_idx, ref_x, ref_y);
+    uint8_t *dst              = POS(c_idx, cu->x0, cu->y0);
+
+    av_image_copy_plane(dst, dst_stride, ibc_buf, ibc_stride, rw << ps, h);
+
+    if (w > rw) {
+        //wrap around, left part
+        ibc_buf = IBC_POS(c_idx, 0, ref_y);
+        dst  += rw << ps;
+        av_image_copy_plane(dst, dst_stride, ibc_buf, ibc_stride, (w - rw) << ps, h);
+    }
+}
+
+static void vvc_predict_ibc(const VVCLocalContext *lc)
+{
+    const H266RawSPS *rsps = lc->fc->ps.sps->r;
+
+    intra_block_copy(lc, LUMA);
+    if (lc->cu->tree_type == SINGLE_TREE && rsps->sps_chroma_format_idc) {
+        intra_block_copy(lc, CB);
+        intra_block_copy(lc, CR);
+    }
+}
+
+static void ibc_fill_vir_buf(const VVCLocalContext *lc, const CodingUnit *cu)
+{
+    const VVCFrameContext *fc = lc->fc;
+    const VVCSPS *sps         = fc->ps.sps;
+    const int has_chroma      = sps->r->sps_chroma_format_idc && cu->tree_type != DUAL_TREE_LUMA;
+    const int start           = cu->tree_type == DUAL_TREE_CHROMA;
+    const int end             = has_chroma ? CR : LUMA;
+
+    for (int c_idx = start; c_idx <= end; c_idx++) {
+        const int hs = sps->hshift[c_idx];
+        const int vs = sps->vshift[c_idx];
+        const int ps = sps->pixel_shift;
+        const int x  = IBC_X(cu->x0 >> hs);
+        const int y  = IBC_Y(cu->y0 >> vs);
+        const int src_stride = fc->frame->linesize[c_idx];
+        const int ibc_stride = fc->tab.sz.ibc_buffer_width >> hs << ps;
+        const uint8_t *src   = POS(c_idx, cu->x0, cu->y0);
+        uint8_t *ibc_buf     = IBC_POS(c_idx, x, y);
+
+        av_image_copy_plane(ibc_buf, ibc_stride, src, src_stride, cu->cb_width >> hs << ps , cu->cb_height >> vs);
+    }
+}
+
 int ff_vvc_reconstruct(VVCLocalContext *lc, const int rs, const int rx, const int ry)
 {
     const VVCFrameContext *fc   = lc->fc;
@@ -599,6 +676,8 @@  int ff_vvc_reconstruct(VVCLocalContext *lc, const int rs, const int rx, const in
 
         if (cu->ciip_flag)
             ff_vvc_predict_ciip(lc);
+        else if (cu->pred_mode == MODE_IBC)
+            vvc_predict_ibc(lc);
         if (cu->coded_flag) {
             ret = reconstruct(lc);
         } else {
@@ -607,6 +686,8 @@  int ff_vvc_reconstruct(VVCLocalContext *lc, const int rs, const int rx, const in
             if (sps->r->sps_chroma_format_idc && cu->tree_type != DUAL_TREE_LUMA)
                 add_reconstructed_area(lc, CHROMA, cu->x0, cu->y0, cu->cb_width, cu->cb_height);
         }
+        if (sps->r->sps_ibc_enabled_flag)
+            ibc_fill_vir_buf(lc, cu);
         cu = cu->next;
     }
     ff_vvc_ctu_free_cus(ctu);
diff --git a/libavcodec/vvc/vvcdec.c b/libavcodec/vvc/vvcdec.c
index e88e746de4..caba1b28c6 100644
--- a/libavcodec/vvc/vvcdec.c
+++ b/libavcodec/vvc/vvcdec.c
@@ -262,6 +262,30 @@  static void ispmf_tl_init(TabList *l, VVCFrameContext *fc)
     TL_ADD(ispmf, w64 * h64);
 }
 
+static void ibc_tl_init(TabList *l, VVCFrameContext *fc)
+{
+    const VVCSPS *sps    = fc->ps.sps;
+    const VVCPPS *pps    = fc->ps.pps;
+    const int ctu_height = pps ? pps->ctb_height : 0;
+    const int ctu_size   = sps ? sps->ctb_size_y : 0;
+    const int ps         = sps ? sps->pixel_shift : 0;
+    const int chroma_idc = sps ? sps->r->sps_chroma_format_idc : 0;
+    const int changed    = fc->tab.sz.chroma_format_idc != chroma_idc ||
+        fc->tab.sz.ctu_height != ctu_height ||
+        fc->tab.sz.ctu_size != ctu_size ||
+        fc->tab.sz.pixel_shift != ps;
+
+    fc->tab.sz.ibc_buffer_width = ctu_size ? 2 * MAX_CTU_SIZE * MAX_CTU_SIZE / ctu_size : 0;
+
+    tl_init(l, 0, changed);
+
+    for (int i = LUMA; i < VVC_MAX_SAMPLE_ARRAYS; i++) {
+        const int hs = sps ? sps->hshift[i] : 0;
+        const int vs = sps ? sps->vshift[i] : 0;
+        TL_ADD(ibc_vir_buf[i], fc->tab.sz.ibc_buffer_width * ctu_size * ctu_height << ps >> hs >> vs);
+    }
+}
+
 typedef void (*tl_init_fn)(TabList *l, VVCFrameContext *fc);
 
 static int frame_context_for_each_tl(VVCFrameContext *fc, int (*unary_fn)(TabList *l))
@@ -276,6 +300,7 @@  static int frame_context_for_each_tl(VVCFrameContext *fc, int (*unary_fn)(TabLis
         pixel_buffer_nz_tl_init,
         msm_tl_init,
         ispmf_tl_init,
+        ibc_tl_init,
     };
 
     for (int i = 0; i < FF_ARRAY_ELEMS(init); i++) {
diff --git a/libavcodec/vvc/vvcdec.h b/libavcodec/vvc/vvcdec.h
index 4f7ef3a32f..aa3d715524 100644
--- a/libavcodec/vvc/vvcdec.h
+++ b/libavcodec/vvc/vvcdec.h
@@ -170,6 +170,8 @@  typedef struct VVCFrameContext {
         int         *coeffs;
         struct CTU  *ctus;
 
+        uint8_t *ibc_vir_buf[VVC_MAX_SAMPLE_ARRAYS];    ///< IbcVirBuf[]
+
         //used in arrays_init only
         struct {
             int ctu_count;
@@ -185,6 +187,7 @@  typedef struct VVCFrameContext {
             int pixel_shift;
             int bs_width;
             int bs_height;
+            int ibc_buffer_width;       ///< IbcBufWidth
         } sz;
     } tab;
 } VVCFrameContext;