diff mbox series

[FFmpeg-devel,2/3] lavc/dvx: use texdsp funcs for texture block decompression

Message ID 20240128211335.58631-2-connorbworley@gmail.com
State New
Headers show
Series [FFmpeg-devel,1/3] lavc/dxv: move tag definitions to common header | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Connor Worley Jan. 28, 2024, 9:13 p.m. UTC
Signed-off-by: Connor Worley <connorbworley@gmail.com>
---
 libavcodec/dxv.c | 289 ++++++++++++-----------------------------------
 1 file changed, 75 insertions(+), 214 deletions(-)
diff mbox series

Patch

diff --git a/libavcodec/dxv.c b/libavcodec/dxv.c
index a2ae070984..cae5d8a92f 100644
--- a/libavcodec/dxv.c
+++ b/libavcodec/dxv.c
@@ -38,15 +38,12 @@  typedef struct DXVContext {
     GetByteContext gbc;
 
     uint8_t *tex_data;   // Compressed texture
-    uint8_t *ctex_data;  // Compressed texture
+    uint8_t *ctex_data;  // Compressed chroma texture
     int tex_rat;         // Compression ratio
     int tex_step;        // Distance between blocks
     int ctex_step;       // Distance between blocks
     int64_t tex_size;    // Texture size
-    int64_t ctex_size;   // Texture size
-
-    /* Optimal number of slices for parallel decoding */
-    int slice_count;
+    int64_t ctex_size;   // Chroma texture size
 
     uint8_t *op_data[4]; // Opcodes
     int64_t op_size[4];  // Opcodes size
@@ -56,198 +53,8 @@  typedef struct DXVContext {
 
     int ctexture_block_w;
     int ctexture_block_h;
-
-    /* Pointer to the selected decompression function */
-    int (*tex_funct)(uint8_t *dst, ptrdiff_t stride, const uint8_t *block);
-    int (*tex_funct_planar[2])(uint8_t *plane0, ptrdiff_t stride0,
-                               uint8_t *plane1, ptrdiff_t stride1,
-                               const uint8_t *block);
 } DXVContext;
 
-static void decompress_indices(uint8_t *dst, const uint8_t *src)
-{
-    int block, i;
-
-    for (block = 0; block < 2; block++) {
-        int tmp = AV_RL24(src);
-
-        /* Unpack 8x3 bit from last 3 byte block */
-        for (i = 0; i < 8; i++)
-            dst[i] = (tmp >> (i * 3)) & 0x7;
-
-        src += 3;
-        dst += 8;
-    }
-}
-
-static int extract_component(int yo0, int yo1, int code)
-{
-    int yo;
-
-    if (yo0 == yo1) {
-        yo = yo0;
-    } else if (code == 0) {
-        yo = yo0;
-    } else if (code == 1) {
-        yo = yo1;
-    } else {
-        if (yo0 > yo1) {
-            yo = (uint8_t) (((8 - code) * yo0 +
-                             (code - 1) * yo1) / 7);
-        } else {
-            if (code == 6) {
-                yo = 0;
-            } else if (code == 7) {
-                yo = 255;
-            } else {
-                yo = (uint8_t) (((6 - code) * yo0 +
-                                 (code - 1) * yo1) / 5);
-            }
-        }
-    }
-
-    return yo;
-}
-
-static int cocg_block(uint8_t *plane0, ptrdiff_t stride0,
-                      uint8_t *plane1, ptrdiff_t stride1,
-                      const uint8_t *block)
-{
-    uint8_t co_indices[16];
-    uint8_t cg_indices[16];
-    uint8_t co0 = *(block);
-    uint8_t co1 = *(block + 1);
-    uint8_t cg0 = *(block + 8);
-    uint8_t cg1 = *(block + 9);
-    int x, y;
-
-    decompress_indices(co_indices, block + 2);
-    decompress_indices(cg_indices, block + 10);
-
-    for (y = 0; y < 4; y++) {
-        for (x = 0; x < 4; x++) {
-            int co_code = co_indices[x + y * 4];
-            int cg_code = cg_indices[x + y * 4];
-
-            plane0[x] = extract_component(cg0, cg1, cg_code);
-            plane1[x] = extract_component(co0, co1, co_code);
-        }
-        plane0 += stride0;
-        plane1 += stride1;
-    }
-
-    return 16;
-}
-
-static void yao_subblock(uint8_t *dst, uint8_t *yo_indices,
-                        ptrdiff_t stride, const uint8_t *block)
-{
-    uint8_t yo0 = *(block);
-    uint8_t yo1 = *(block + 1);
-    int x, y;
-
-    decompress_indices(yo_indices, block + 2);
-
-    for (y = 0; y < 4; y++) {
-        for (x = 0; x < 4; x++) {
-            int yo_code = yo_indices[x + y * 4];
-
-            dst[x] = extract_component(yo0, yo1, yo_code);
-        }
-        dst += stride;
-    }
-}
-
-static int yo_block(uint8_t *dst, ptrdiff_t stride,
-                    uint8_t *unused0, ptrdiff_t unused1,
-                    const uint8_t *block)
-{
-    uint8_t yo_indices[16];
-
-    yao_subblock(dst,      yo_indices, stride, block);
-    yao_subblock(dst + 4,  yo_indices, stride, block + 8);
-    yao_subblock(dst + 8,  yo_indices, stride, block + 16);
-    yao_subblock(dst + 12, yo_indices, stride, block + 24);
-
-    return 32;
-}
-
-static int yao_block(uint8_t *plane0, ptrdiff_t stride0,
-                     uint8_t *plane3, ptrdiff_t stride1,
-                     const uint8_t *block)
-{
-    uint8_t yo_indices[16];
-    uint8_t a_indices[16];
-
-    yao_subblock(plane0,      yo_indices, stride0, block);
-    yao_subblock(plane3,      a_indices,  stride1, block + 8);
-    yao_subblock(plane0 + 4,  yo_indices, stride0, block + 16);
-    yao_subblock(plane3 + 4,  a_indices,  stride1, block + 24);
-    yao_subblock(plane0 + 8,  yo_indices, stride0, block + 32);
-    yao_subblock(plane3 + 8,  a_indices,  stride1, block + 40);
-    yao_subblock(plane0 + 12, yo_indices, stride0, block + 48);
-    yao_subblock(plane3 + 12, a_indices,  stride1, block + 56);
-
-    return 64;
-}
-
-static int decompress_texture_thread(AVCodecContext *avctx, void *arg,
-                                     int slice, int thread_nb)
-{
-    const DXVContext *ctx = avctx->priv_data;
-    AVFrame *frame = arg;
-    const uint8_t *d = ctx->tex_data;
-    int w_block = avctx->coded_width / ctx->texture_block_w;
-    int h_block = avctx->coded_height / ctx->texture_block_h;
-    int x, y;
-    int start_slice, end_slice;
-
-    start_slice = h_block * slice / ctx->slice_count;
-    end_slice = h_block * (slice + 1) / ctx->slice_count;
-
-    if (ctx->tex_funct) {
-        for (y = start_slice; y < end_slice; y++) {
-            uint8_t *p = frame->data[0] + y * frame->linesize[0] * ctx->texture_block_h;
-            int off = y * w_block;
-            for (x = 0; x < w_block; x++) {
-                ctx->tex_funct(p + x * 4 * ctx->texture_block_w, frame->linesize[0],
-                               d + (off + x) * ctx->tex_step);
-            }
-        }
-    } else {
-        const uint8_t *c = ctx->ctex_data;
-
-        for (y = start_slice; y < end_slice; y++) {
-            uint8_t *p0 = frame->data[0] + y * frame->linesize[0] * ctx->texture_block_h;
-            uint8_t *p3 = ctx->tex_step != 64 ? NULL : frame->data[3] + y * frame->linesize[3] * ctx->texture_block_h;
-            int off = y * w_block;
-            for (x = 0; x < w_block; x++) {
-                ctx->tex_funct_planar[0](p0 + x * ctx->texture_block_w, frame->linesize[0],
-                                         p3 != NULL ? p3 + x * ctx->texture_block_w : NULL, frame->linesize[3],
-                                         d + (off + x) * ctx->tex_step);
-            }
-        }
-
-        w_block = (avctx->coded_width / 2) / ctx->ctexture_block_w;
-        h_block = (avctx->coded_height / 2) / ctx->ctexture_block_h;
-        start_slice = h_block * slice / ctx->slice_count;
-        end_slice = h_block * (slice + 1) / ctx->slice_count;
-
-        for (y = start_slice; y < end_slice; y++) {
-            uint8_t *p0 = frame->data[1] + y * frame->linesize[1] * ctx->ctexture_block_h;
-            uint8_t *p1 = frame->data[2] + y * frame->linesize[2] * ctx->ctexture_block_h;
-            int off = y * w_block;
-            for (x = 0; x < w_block; x++) {
-                ctx->tex_funct_planar[1](p0 + x * ctx->ctexture_block_w, frame->linesize[1],
-                                         p1 + x * ctx->ctexture_block_w, frame->linesize[2],
-                                         c + (off + x) * ctx->ctex_step);
-            }
-        }
-    }
-
-    return 0;
-}
-
 /* This scheme addresses already decoded elements depending on 2-bit status:
  *   0 -> copy new element
  *   1 -> copy one element from position -x
@@ -1044,6 +851,8 @@  static int dxv_decode(AVCodecContext *avctx, AVFrame *frame,
 {
     DXVContext *ctx = avctx->priv_data;
     GetByteContext *gbc = &ctx->gbc;
+    AVCodecContext cavctx = *avctx;
+    TextureDSPThreadContext texdsp_ctx, ctexdsp_ctx;
     int (*decompress_tex)(AVCodecContext *avctx);
     const char *msgcomp, *msgtext;
     uint32_t tag;
@@ -1053,21 +862,22 @@  static int dxv_decode(AVCodecContext *avctx, AVFrame *frame,
 
     bytestream2_init(gbc, avpkt->data, avpkt->size);
 
+    cavctx.coded_height = avctx->coded_height / 2;
+    cavctx.coded_width  = avctx->coded_width  / 2;
+
     ctx->texture_block_h = 4;
     ctx->texture_block_w = 4;
 
     avctx->pix_fmt = AV_PIX_FMT_RGBA;
     avctx->colorspace = AVCOL_SPC_RGB;
 
-    ctx->tex_funct = NULL;
-    ctx->tex_funct_planar[0] = NULL;
-    ctx->tex_funct_planar[1] = NULL;
-
     tag = bytestream2_get_le32(gbc);
     switch (tag) {
     case DXV_FMT_DXT1:
         decompress_tex = dxv_decompress_dxt1;
-        ctx->tex_funct = ctx->texdsp.dxt1_block;
+        texdsp_ctx.tex_funct = ctx->texdsp.dxt1_block;
+        texdsp_ctx.tex_ratio = 8;
+        texdsp_ctx.raw_ratio = 16;
         ctx->tex_rat   = 8;
         ctx->tex_step  = 8;
         msgcomp = "DXTR1";
@@ -1075,7 +885,9 @@  static int dxv_decode(AVCodecContext *avctx, AVFrame *frame,
         break;
     case DXV_FMT_DXT5:
         decompress_tex = dxv_decompress_dxt5;
-        ctx->tex_funct = ctx->texdsp.dxt5_block;
+        texdsp_ctx.tex_funct = ctx->texdsp.dxt5_block;
+        texdsp_ctx.tex_ratio = 16;
+        texdsp_ctx.raw_ratio = 16;
         ctx->tex_rat   = 4;
         ctx->tex_step  = 16;
         msgcomp = "DXTR5";
@@ -1083,8 +895,12 @@  static int dxv_decode(AVCodecContext *avctx, AVFrame *frame,
         break;
     case DXV_FMT_YCG6:
         decompress_tex = dxv_decompress_ycg6;
-        ctx->tex_funct_planar[0] = yo_block;
-        ctx->tex_funct_planar[1] = cocg_block;
+        texdsp_ctx.tex_funct  = ctx->texdsp.rgtc1u_gray_block;
+        texdsp_ctx.tex_ratio  = 8;
+        texdsp_ctx.raw_ratio  = 4;
+        ctexdsp_ctx.tex_funct = ctx->texdsp.rgtc1u_gray_block;
+        ctexdsp_ctx.tex_ratio = 16;
+        ctexdsp_ctx.raw_ratio = 4;
         ctx->tex_rat   = 8;
         ctx->tex_step  = 32;
         ctx->ctex_step = 16;
@@ -1100,8 +916,12 @@  static int dxv_decode(AVCodecContext *avctx, AVFrame *frame,
         break;
     case DXV_FMT_YG10:
         decompress_tex = dxv_decompress_yg10;
-        ctx->tex_funct_planar[0] = yao_block;
-        ctx->tex_funct_planar[1] = cocg_block;
+        texdsp_ctx.tex_funct  = ctx->texdsp.rgtc1u_gray_block;
+        texdsp_ctx.tex_ratio  = 16;
+        texdsp_ctx.raw_ratio  = 4;
+        ctexdsp_ctx.tex_funct = ctx->texdsp.rgtc1u_gray_block;
+        ctexdsp_ctx.tex_ratio = 16;
+        ctexdsp_ctx.raw_ratio = 4;
         ctx->tex_rat   = 4;
         ctx->tex_step  = 64;
         ctx->ctex_step = 16;
@@ -1130,14 +950,20 @@  static int dxv_decode(AVCodecContext *avctx, AVFrame *frame,
         }
 
         if (old_type & 0x40) {
+            tag = DXV_FMT_DXT5;
             msgtext = "DXT5";
 
-            ctx->tex_funct = ctx->texdsp.dxt5_block;
+            texdsp_ctx.tex_funct = ctx->texdsp.dxt5_block;
+            texdsp_ctx.tex_ratio = 16;
+            texdsp_ctx.raw_ratio = 16;
             ctx->tex_step  = 16;
         } else if (old_type & 0x20 || version_major == 1) {
+            tag = DXV_FMT_DXT1;
             msgtext = "DXT1";
 
-            ctx->tex_funct = ctx->texdsp.dxt1_block;
+            texdsp_ctx.tex_funct = ctx->texdsp.dxt1_block;
+            texdsp_ctx.tex_ratio = 8;
+            texdsp_ctx.raw_ratio = 16;
             ctx->tex_step  = 8;
         } else {
             av_log(avctx, AV_LOG_ERROR, "Unsupported header (0x%08"PRIX32")\n.", tag);
@@ -1147,10 +973,10 @@  static int dxv_decode(AVCodecContext *avctx, AVFrame *frame,
         break;
     }
 
-    ctx->slice_count = av_clip(avctx->thread_count, 1,
-                               avctx->coded_height / FFMAX(ctx->texture_block_h,
-                                                           ctx->ctexture_block_h));
-
+    texdsp_ctx.slice_count  = av_clip(avctx->thread_count, 1,
+                                      avctx->coded_height / TEXTURE_BLOCK_H);
+    ctexdsp_ctx.slice_count = av_clip(avctx->thread_count, 1,
+                                      cavctx.coded_height / TEXTURE_BLOCK_H);
     /* New header is 12 bytes long. */
     if (!old_type) {
         version_major = bytestream2_get_byte(gbc) - 1;
@@ -1215,9 +1041,44 @@  static int dxv_decode(AVCodecContext *avctx, AVFrame *frame,
     if (ret < 0)
         return ret;
 
-    /* Now decompress the texture with the standard functions. */
-    avctx->execute2(avctx, decompress_texture_thread,
-                    frame, NULL, ctx->slice_count);
+    switch (tag) {
+    case DXV_FMT_YG10:
+        /* BC5 texture with alpha in the second half of each block */
+        texdsp_ctx.tex_data.in    = ctx->tex_data + texdsp_ctx.tex_ratio / 2;
+        texdsp_ctx.frame_data.out = frame->data[3];
+        texdsp_ctx.stride         = frame->linesize[3];
+        ret = ff_texturedsp_exec_decompress_threads(avctx, &texdsp_ctx);
+        if (ret < 0)
+            return ret;
+        /* fallthrough */
+    case DXV_FMT_YCG6:
+        /* BC5 texture with Co in the first half of each block and Cg in the second */
+        ctexdsp_ctx.tex_data.in    = ctx->ctex_data;
+        ctexdsp_ctx.frame_data.out = frame->data[2];
+        ctexdsp_ctx.stride         = frame->linesize[2];
+        ret = ff_texturedsp_exec_decompress_threads(&cavctx, &ctexdsp_ctx);
+        if (ret < 0)
+            return ret;
+        ctexdsp_ctx.tex_data.in    = ctx->ctex_data + ctexdsp_ctx.tex_ratio / 2;
+        ctexdsp_ctx.frame_data.out = frame->data[1];
+        ctexdsp_ctx.stride         = frame->linesize[1];
+        ret = ff_texturedsp_exec_decompress_threads(&cavctx, &ctexdsp_ctx);
+        if (ret < 0)
+            return ret;
+        /* fallthrough */
+    case DXV_FMT_DXT1:
+    case DXV_FMT_DXT5:
+        /* For DXT1 and DXT5, self explanatory
+         * For YCG6, BC4 texture for Y
+         * For YG10, BC5 texture with Y in the first half of each block */
+        texdsp_ctx.tex_data.in    = ctx->tex_data;
+        texdsp_ctx.frame_data.out = frame->data[0];
+        texdsp_ctx.stride         = frame->linesize[0];
+        ret = ff_texturedsp_exec_decompress_threads(avctx, &texdsp_ctx);
+        if (ret < 0)
+            return ret;
+        break;
+    }
 
     /* Frame is ready to be output. */
     frame->pict_type = AV_PICTURE_TYPE_I;