diff mbox

[FFmpeg-devel] avcodec/dxv: add support for "high" quality mode

Message ID 20180414194647.12814-1-onemda@gmail.com
State New
Headers show

Commit Message

Paul B Mahol April 14, 2018, 7:46 p.m. UTC
Signed-off-by: Paul B Mahol <onemda@gmail.com>
---
 libavcodec/dxv.c | 815 ++++++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 780 insertions(+), 35 deletions(-)

Comments

Rostislav Pehlivanov April 14, 2018, 8:21 p.m. UTC | #1
On 14 April 2018 at 20:46, Paul B Mahol <onemda@gmail.com> wrote:

> Signed-off-by: Paul B Mahol <onemda@gmail.com>
> ---
>  libavcodec/dxv.c | 815 ++++++++++++++++++++++++++++++
> ++++++++++++++++++++++---
>  1 file changed, 780 insertions(+), 35 deletions(-)
>
> diff --git a/libavcodec/dxv.c b/libavcodec/dxv.c
> index 529e211258..101fe78481 100644
> --- a/libavcodec/dxv.c
> +++ b/libavcodec/dxv.c
> @@ -1,6 +1,7 @@
>  /*
>   * Resolume DXV decoder
>   * Copyright (C) 2015 Vittorio Giovara <vittorio.giovara@gmail.com>
> + * Copyright (C) 2018 Paul B Mahol
>   *
>   * This file is part of FFmpeg.
>   *
> @@ -23,6 +24,7 @@
>
>  #include "libavutil/imgutils.h"
>
> +#include "mathops.h"
>  #include "avcodec.h"
>  #include "bytestream.h"
>  #include "internal.h"
> @@ -34,50 +36,211 @@ typedef struct DXVContext {
>      TextureDSPContext texdsp;
>      GetByteContext gbc;
>
> -    uint8_t *tex_data;  // Compressed texture
> -    int tex_rat;        // Compression ratio
> -    int tex_step;       // Distance between blocks
> -    int64_t tex_size;   // Texture size
> +    uint8_t *tex_data;   // Compressed texture
> +    uint8_t *ctex_data;  // Compressed texture
> +    int tex_rat;         // Compression ratio
> +    int tex_step;        // Distance between blocks
> +    int ctex_step;       // Distance between blocks
> +    int64_t tex_size;    // Texture size
> +    int64_t ctex_size;   // Texture size
>
>      /* Optimal number of slices for parallel decoding */
>      int slice_count;
>
> +    uint8_t *op_data[4]; // Opcodes
> +    int64_t op_size[4];  // Opcodes size
> +
> +    int texture_block_w;
> +    int texture_block_h;
> +
> +    int ctexture_block_w;
> +    int ctexture_block_h;
> +
>      /* Pointer to the selected decompression function */
>      int (*tex_funct)(uint8_t *dst, ptrdiff_t stride, const uint8_t
> *block);
> +    int (*tex_funct_planar[2])(uint8_t *plane0, ptrdiff_t stride0,
> +                               uint8_t *plane1, ptrdiff_t stride1,
> +                               const uint8_t *block);
>  } DXVContext;
>
> +static void decompress_indices(uint8_t *dst, const uint8_t *src)
> +{
> +    int block, i;
> +
> +    for (block = 0; block < 2; block++) {
> +        int tmp = AV_RL24(src);
> +
> +        /* Unpack 8x3 bit from last 3 byte block */
> +        for (i = 0; i < 8; i++)
> +            dst[i] = (tmp >> (i * 3)) & 0x7;
> +
> +        src += 3;
> +        dst += 8;
> +    }
> +}
> +
> +static int extract_component(int yo0, int yo1, int code)
> +{
> +    int yo;
> +
> +    if (yo0 == yo1) {
> +        yo = yo0;
> +    } else if (code == 0) {
> +        yo = yo0;
> +    } else if (code == 1) {
> +        yo = yo1;
> +    } else {
> +        if (yo0 > yo1) {
> +            yo = (uint8_t) (((8 - code) * yo0 +
> +                             (code - 1) * yo1) / 7);
> +        } else {
> +            if (code == 6) {
> +                yo = 0;
> +            } else if (code == 7) {
> +                yo = 255;
> +            } else {
> +                yo = (uint8_t) (((6 - code) * yo0 +
> +                                 (code - 1) * yo1) / 5);
> +            }
> +        }
> +    }
> +
> +    return yo;
> +}
> +
> +static int cocg_block(uint8_t *plane0, ptrdiff_t stride0,
> +                      uint8_t *plane1, ptrdiff_t stride1,
> +                      const uint8_t *block)
> +{
> +    uint8_t co_indices[16];
> +    uint8_t cg_indices[16];
> +    uint8_t co0 = *(block);
> +    uint8_t co1 = *(block + 1);
> +    uint8_t cg0 = *(block + 8);
> +    uint8_t cg1 = *(block + 9);
> +    int x, y;
> +
> +    decompress_indices(co_indices, block + 2);
> +    decompress_indices(cg_indices, block + 10);
> +
> +    for (y = 0; y < 4; y++) {
> +        for (x = 0; x < 4; x++) {
> +            int co_code = co_indices[x + y * 4];
> +            int cg_code = cg_indices[x + y * 4];
> +
> +            plane0[x] = extract_component(cg0, cg1, cg_code);
> +            plane1[x] = extract_component(co0, co1, co_code);
> +        }
> +        plane0 += stride0;
> +        plane1 += stride1;
> +    }
> +
> +    return 16;
> +}
> +
> +static void yao_subblock(uint8_t *dst, uint8_t *yo_indices,
> +                        ptrdiff_t stride, const uint8_t *block)
> +{
> +    uint8_t yo0 = *(block);
> +    uint8_t yo1 = *(block + 1);
> +    int x, y;
> +
> +    decompress_indices(yo_indices, block + 2);
> +
> +    for (y = 0; y < 4; y++) {
> +        for (x = 0; x < 4; x++) {
> +            int yo_code = yo_indices[x + y * 4];
> +
> +            dst[x] = extract_component(yo0, yo1, yo_code);
> +        }
> +        dst += stride;
> +    }
> +}
> +
> +static int yo_block(uint8_t *dst, ptrdiff_t stride,
> +                    uint8_t *unused0, ptrdiff_t unused1,
> +                    const uint8_t *block)
> +{
> +    uint8_t yo_indices[16];
> +
> +    yao_subblock(dst,      yo_indices, stride, block);
> +    yao_subblock(dst + 4,  yo_indices, stride, block + 8);
> +    yao_subblock(dst + 8,  yo_indices, stride, block + 16);
> +    yao_subblock(dst + 12, yo_indices, stride, block + 24);
> +
> +    return 32;
> +}
> +
> +static int yao_block(uint8_t *plane0, ptrdiff_t stride0,
> +                     uint8_t *plane3, ptrdiff_t stride1,
> +                     const uint8_t *block)
> +{
> +    uint8_t yo_indices[16];
> +    uint8_t a_indices[16];
> +
> +    yao_subblock(plane0,      yo_indices, stride0, block);
> +    yao_subblock(plane3,      a_indices,  stride1, block + 8);
> +    yao_subblock(plane0 + 4,  yo_indices, stride0, block + 16);
> +    yao_subblock(plane3 + 4,  a_indices,  stride1, block + 24);
> +    yao_subblock(plane0 + 8,  yo_indices, stride0, block + 32);
> +    yao_subblock(plane3 + 8,  a_indices,  stride1, block + 40);
> +    yao_subblock(plane0 + 12, yo_indices, stride0, block + 48);
> +    yao_subblock(plane3 + 12, a_indices,  stride1, block + 56);
> +
> +    return 64;
> +}
> +
>  static int decompress_texture_thread(AVCodecContext *avctx, void *arg,
>                                       int slice, int thread_nb)
>  {
>      DXVContext *ctx = avctx->priv_data;
>      AVFrame *frame = arg;
>      const uint8_t *d = ctx->tex_data;
> -    int w_block = avctx->coded_width / TEXTURE_BLOCK_W;
> -    int h_block = avctx->coded_height / TEXTURE_BLOCK_H;
> +    int w_block = avctx->coded_width / ctx->texture_block_w;
> +    int h_block = avctx->coded_height / ctx->texture_block_h;
>      int x, y;
>      int start_slice, end_slice;
> -    int base_blocks_per_slice = h_block / ctx->slice_count;
> -    int remainder_blocks = h_block % ctx->slice_count;
> -
> -    /* When the frame height (in blocks) doesn't divide evenly between the
> -     * number of slices, spread the remaining blocks evenly between the
> first
> -     * operations */
> -    start_slice = slice * base_blocks_per_slice;
> -    /* Add any extra blocks (one per slice) that have been added
> -     * before this slice */
> -    start_slice += FFMIN(slice, remainder_blocks);
> -
> -    end_slice = start_slice + base_blocks_per_slice;
> -    /* Add an extra block if there are remainder blocks to be accounted
> for */
> -    if (slice < remainder_blocks)
> -        end_slice++;
> -
> -    for (y = start_slice; y < end_slice; y++) {
> -        uint8_t *p = frame->data[0] + y * frame->linesize[0] *
> TEXTURE_BLOCK_H;
> -        int off  = y * w_block;
> -        for (x = 0; x < w_block; x++) {
> -            ctx->tex_funct(p + x * 16, frame->linesize[0],
> -                           d + (off + x) * ctx->tex_step);
> +
> +    start_slice = h_block * slice / ctx->slice_count;
> +    end_slice = h_block * (slice + 1) / ctx->slice_count;
> +
> +    if (ctx->tex_funct) {
> +        for (y = start_slice; y < end_slice; y++) {
> +            uint8_t *p = frame->data[0] + y * frame->linesize[0] *
> ctx->texture_block_h;
> +            int off = y * w_block;
> +            for (x = 0; x < w_block; x++) {
> +                ctx->tex_funct(p + x * 4 * ctx->texture_block_w,
> frame->linesize[0],
> +                               d + (off + x) * ctx->tex_step);
> +            }
> +        }
> +    } else {
> +        const uint8_t *c = ctx->ctex_data;
> +
> +        for (y = start_slice; y < end_slice; y++) {
> +            uint8_t *p0 = frame->data[0] + y * frame->linesize[0] *
> ctx->texture_block_h;
> +            uint8_t *p3 = ctx->tex_step != 64 ? NULL : frame->data[3] + y
> * frame->linesize[3] * ctx->texture_block_h;
> +            int off = y * w_block;
> +            for (x = 0; x < w_block; x++) {
> +                ctx->tex_funct_planar[0](p0 + x * ctx->texture_block_w,
> frame->linesize[0],
> +                                         p3 != NULL ? p3 + x *
> ctx->texture_block_w : NULL, frame->linesize[3],
> +                                         d + (off + x) * ctx->tex_step);
> +            }
> +        }
> +
> +        w_block = (avctx->coded_width / 2) / ctx->ctexture_block_w;
> +        h_block = (avctx->coded_height / 2) / ctx->ctexture_block_h;
> +        start_slice = h_block * slice / ctx->slice_count;
> +        end_slice = h_block * (slice + 1) / ctx->slice_count;
> +
> +        for (y = start_slice; y < end_slice; y++) {
> +            uint8_t *p0 = frame->data[1] + y * frame->linesize[1] *
> ctx->ctexture_block_h;
> +            uint8_t *p1 = frame->data[2] + y * frame->linesize[2] *
> ctx->ctexture_block_h;
> +            int off = y * w_block;
> +            for (x = 0; x < w_block; x++) {
> +                ctx->tex_funct_planar[1](p0 + x * ctx->ctexture_block_w,
> frame->linesize[1],
> +                                         p1 + x * ctx->ctexture_block_w,
> frame->linesize[2],
> +                                         c + (off + x) * ctx->ctex_step);
> +            }
>          }
>      }
>
> @@ -169,6 +332,529 @@ static int dxv_decompress_dxt1(AVCodecContext
> *avctx)
>      return 0;
>  }
>
> +typedef struct OpcodeTable {
> +    int16_t next;
> +    uint8_t val1;
> +    uint8_t val2;
> +} OpcodeTable;
> +
> +static int fill_ltable(GetByteContext *gb, uint32_t *table, int
> *nb_elements)
> +{
> +    unsigned half = 512, bits = 1023, left = 1024, input, mask;
> +    int value, counter = 0, rshift = 10, lshift = 30;
> +
> +    mask = bytestream2_get_le32(gb) >> 2;
> +    while (left) {
> +        if (counter >= 256)
> +            return AVERROR_INVALIDDATA;
> +        value = bits & mask;
> +        left -= bits & mask;
> +        mask >>= rshift;
> +        lshift -= rshift;
> +        table[counter++] = value;
> +        if (lshift < 16) {
> +            if (bytestream2_get_bytes_left(gb) <= 0)
> +                return AVERROR_INVALIDDATA;
> +
> +            input = bytestream2_get_le16(gb);
> +            mask += input << lshift;
> +            lshift += 16;
> +        }
> +        if (left < half) {
> +            half >>= 1;
> +            bits >>= 1;
> +            rshift--;
> +        }
> +    }
> +
> +    for (; !table[counter - 1]; counter--)
> +        if (counter <= 0)
> +            return AVERROR_INVALIDDATA;
> +
> +    *nb_elements = counter;
> +
> +    if (counter < 256)
> +        memset(&table[counter], 0, 4 * (256 - counter));
> +
> +    if (lshift >= 16)
> +        bytestream2_seek(gb, -2, SEEK_CUR);
> +
> +    return 0;
> +}
> +
> +static int fill_optable(unsigned *table0, OpcodeTable *table1, int
> nb_elements)
> +{
> +    unsigned table2[256] = { 0 };
> +    unsigned x = 0;
> +    int val0, val1, i, j = 2, k = 0;
> +
> +    table2[0] = table0[0];
> +    for (i = 0; i < nb_elements - 1; i++, table2[i] = val0) {
> +        val0 = table0[i + 1] + table2[i];
> +    }
> +
> +    if (!table2[0]) {
> +        do {
> +            k++;
> +        } while (!table2[k]);
> +    }
> +
> +    j = 2;
> +    for (i = 1024; i > 0; i--) {
> +        for (table1[x].val1 = k; k < 256 && j > table2[k]; k++);
> +        x = (x - 383) & 0x3FF;
> +        j++;
> +    }
> +
> +    if (nb_elements > 0)
> +        memcpy(&table2[0], table0, 4 * nb_elements);
> +
> +    for (i = 0; i < 1024; i++) {
> +        val0 = table1[i].val1;
> +        val1 = table2[val0];
> +        table2[val0]++;
> +        x = 31 - ff_clz(val1);
> +        if (x > 10)
> +            return AVERROR_INVALIDDATA;
> +        table1[i].val2 = 10 - x;
> +        table1[i].next = (val1 << table1[i].val2) - 1024;
> +    }
> +
> +    return 0;
> +}
> +
> +static int get_opcodes(GetByteContext *gb, uint32_t *table, uint8_t *dst,
> int op_size, int nb_elements)
> +{
> +    OpcodeTable optable[1024];
> +    int sum, x, val, lshift, rshift, ret, size_in_bits, i, idx;
> +    unsigned endoffset, newoffset, offset;
> +    unsigned next;
> +    uint8_t *src = (uint8_t *)gb->buffer;
> +
> +    ret = fill_optable(table, optable, nb_elements);
> +    if (ret < 0)
> +        return ret;
> +
> +    size_in_bits = bytestream2_get_le32(gb);
> +    endoffset = ((size_in_bits + 7) >> 3) - 4;
> +    if (endoffset <= 0 || bytestream2_get_bytes_left(gb) < endoffset)
> +        return AVERROR_INVALIDDATA;
> +
> +    offset = endoffset;
> +    next = AV_RL32(src + endoffset);
> +    rshift = (((size_in_bits & 0xFF) - 1) & 7) + 15;
> +    lshift = 32 - rshift;
> +    idx = (next >> rshift) & 0x3FF;
> +    for (i = 0; i < op_size; i++) {
> +        dst[i] = optable[idx].val1;
> +        val = optable[idx].val2;
> +        sum = val + lshift;
> +        x = (next << lshift) >> 1 >> (31 - val);
> +        newoffset = offset - (sum >> 3);
> +        lshift = sum & 7;
> +        idx = x + optable[idx].next;
> +        offset = newoffset;
> +        if (offset > endoffset)
> +            return AVERROR_INVALIDDATA;
> +        next = AV_RL32(src + offset);
> +    }
> +
> +    bytestream2_skip(gb, (size_in_bits + 7 >> 3) - 4);
> +
> +    return 0;
> +}
> +
> +static int dxv_decompress_opcodes(GetByteContext *gb, void *dstp, size_t
> op_size)
> +{
> +    int pos = bytestream2_tell(gb);
> +    int flag = bytestream2_peek_byte(gb);
> +
> +    if ((flag & 3) == 0) {
> +        bytestream2_skip(gb, 1);
> +        bytestream2_get_buffer(gb, dstp, op_size);
> +    } else if ((flag & 3) == 1) {
> +        bytestream2_skip(gb, 1);
> +        memset(dstp, bytestream2_get_byte(gb), op_size);
> +    } else {
> +        uint32_t table[256];
> +        int ret, elements = 0;
> +
> +        ret = fill_ltable(gb, table, &elements);
> +        if (ret < 0)
> +            return ret;
> +        ret = get_opcodes(gb, table, dstp, op_size, elements);
> +        if (ret < 0)
> +            return ret;
> +    }
> +    return bytestream2_tell(gb) - pos;
> +}
> +
> +static int dxv_decompress_cgo(DXVContext *ctx, GetByteContext *gb,
> +                              uint8_t *tex_data, int tex_size,
> +                              uint8_t *op_data, int *oindex,
> +                              int op_size,
> +                              uint8_t **dstp, int *statep,
> +                              uint8_t **tab0, uint8_t **tab1,
> +                              int offset)
> +{
> +    uint8_t *dst = *dstp;
> +    uint8_t *tptr0, *tptr1, *tptr3;
> +    int oi = *oindex;
> +    int state = *statep;
> +    int opcode, v, vv;
> +
> +    if (state <= 0) {
> +        if (oi >= op_size)
> +            return AVERROR_INVALIDDATA;
> +        opcode = op_data[oi++];
> +        if (!opcode) {
> +            v = bytestream2_get_byte(gb);
> +            if (v == 255) {
> +                do {
> +                    if (bytestream2_get_bytes_left(gb) <= 0)
> +                        return AVERROR_INVALIDDATA;
> +                    opcode = bytestream2_get_le16(gb);
> +                    v += opcode;
> +                } while (opcode == 0xFFFF);
> +            }
> +            AV_WL32(dst, AV_RL32(dst - 16));
> +            AV_WL32(dst + 4, AV_RL32(dst - 12));
> +            state = v + 4;
> +            goto done;
> +        }
> +
> +        switch (opcode) {
> +        case 1:
> +            AV_WL32(dst, AV_RL32(dst - (8 + offset)));
> +            AV_WL32(dst + 4, AV_RL32(dst - (4 + offset)));
> +            break;
> +        case 2:
> +            vv = (8 + offset) * (bytestream2_get_le16(gb) + 1);
> +            if (vv < 0 || vv > dst - tex_data)
> +                return AVERROR_INVALIDDATA;
> +            tptr0 = dst - vv;
> +            v = AV_RL32(tptr0);
> +            AV_WL32(dst, AV_RL32(tptr0));
> +            AV_WL32(dst + 4, AV_RL32(tptr0 + 4));
> +            tab0[0x9E3779B1 * (uint16_t)v >> 24] = dst;
> +            tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] = dst
> + 2;
> +            break;
> +        case 3:
> +            AV_WL32(dst, bytestream2_get_le32(gb));
> +            AV_WL32(dst + 4, bytestream2_get_le32(gb));
> +            tab0[0x9E3779B1 * AV_RL16(dst) >> 24] = dst;
> +            tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] = dst
> + 2;
> +            break;
> +        case 4:
> +            tptr3 = tab1[bytestream2_get_byte(gb)];
> +            if (!tptr3)
> +                return AVERROR_INVALIDDATA;
> +            AV_WL16(dst, bytestream2_get_le16(gb));
> +            AV_WL16(dst + 2, AV_RL16(tptr3));
> +            dst[4] = tptr3[2];
> +            AV_WL16(dst + 5, bytestream2_get_le16(gb));
> +            dst[7] = bytestream2_get_byte(gb);
> +            tab0[0x9E3779B1 * AV_RL16(dst) >> 24] = dst;
> +            break;
> +        case 5:
> +            tptr3 = tab1[bytestream2_get_byte(gb)];
> +            if (!tptr3)
> +                return AVERROR_INVALIDDATA;
> +            AV_WL16(dst, bytestream2_get_le16(gb));
> +            AV_WL16(dst + 2, bytestream2_get_le16(gb));
> +            dst[4] = bytestream2_get_byte(gb);
> +            AV_WL16(dst + 5, AV_RL16(tptr3));
> +            dst[7] = tptr3[2];
> +            tab0[0x9E3779B1 * AV_RL16(dst) >> 24] = dst;
> +            tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] = dst
> + 2;
> +            break;
> +        case 6:
> +            tptr0 = tab1[bytestream2_get_byte(gb)];
> +            if (!tptr0)
> +                return AVERROR_INVALIDDATA;
> +            tptr1 = tab1[bytestream2_get_byte(gb)];
> +            if (!tptr1)
> +                return AVERROR_INVALIDDATA;
> +            AV_WL16(dst, bytestream2_get_le16(gb));
> +            AV_WL16(dst + 2, AV_RL16(tptr0));
> +            dst[4] = tptr0[2];
> +            AV_WL16(dst + 5, AV_RL16(tptr1));
> +            dst[7] = tptr1[2];
> +            tab0[0x9E3779B1 * AV_RL16(dst) >> 24] = dst;
> +            break;
> +        case 7:
> +            v = (8 + offset) * (bytestream2_get_le16(gb) + 1);
> +            if (v < 0 || v > dst - tex_data)
> +                return AVERROR_INVALIDDATA;
> +            tptr0 = dst - v;
> +            AV_WL16(dst, bytestream2_get_le16(gb));
> +            AV_WL16(dst + 2, AV_RL16(tptr0 + 2));
> +            AV_WL32(dst + 4, AV_RL32(tptr0 + 4));
> +            tab0[0x9E3779B1 * AV_RL16(dst) >> 24] = dst;
> +            tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] = dst
> + 2;
> +            break;
> +        case 8:
> +            tptr1 = tab0[bytestream2_get_byte(gb)];
> +            if (!tptr1)
> +                return AVERROR_INVALIDDATA;
> +            AV_WL16(dst, AV_RL16(tptr1));
> +            AV_WL16(dst + 2, bytestream2_get_le16(gb));
> +            AV_WL32(dst + 4, bytestream2_get_le32(gb));
> +            tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] = dst
> + 2;
> +            break;
> +        case 9:
> +            tptr1 = tab0[bytestream2_get_byte(gb)];
> +            if (!tptr1)
> +                return AVERROR_INVALIDDATA;
> +            tptr3 = tab1[bytestream2_get_byte(gb)];
> +            if (!tptr3)
> +                return AVERROR_INVALIDDATA;
> +            AV_WL16(dst, AV_RL16(tptr1));
> +            AV_WL16(dst + 2, AV_RL16(tptr3));
> +            dst[4] = tptr3[2];
> +            AV_WL16(dst + 5, bytestream2_get_le16(gb));
> +            dst[7] = bytestream2_get_byte(gb);
> +            tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] = dst
> + 2;
> +            break;
> +        case 10:
> +            tptr1 = tab0[bytestream2_get_byte(gb)];
> +            if (!tptr1)
> +                return AVERROR_INVALIDDATA;
> +            tptr3 = tab1[bytestream2_get_byte(gb)];
> +            if (!tptr3)
> +                return AVERROR_INVALIDDATA;
> +            AV_WL16(dst, AV_RL16(tptr1));
> +            AV_WL16(dst + 2, bytestream2_get_le16(gb));
> +            dst[4] = bytestream2_get_byte(gb);
> +            AV_WL16(dst + 5, AV_RL16(tptr3));
> +            dst[7] = tptr3[2];
> +            tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] = dst
> + 2;
> +            break;
> +        case 11:
> +            tptr0 = tab0[bytestream2_get_byte(gb)];
> +            if (!tptr0)
> +                return AVERROR_INVALIDDATA;
> +            tptr3 = tab1[bytestream2_get_byte(gb)];
> +            if (!tptr3)
> +                return AVERROR_INVALIDDATA;
> +            tptr1 = tab1[bytestream2_get_byte(gb)];
> +            if (!tptr1)
> +                return AVERROR_INVALIDDATA;
> +            AV_WL16(dst, AV_RL16(tptr0));
> +            AV_WL16(dst + 2, AV_RL16(tptr3));
> +            dst[4] = tptr3[2];
> +            AV_WL16(dst + 5, AV_RL16(tptr1));
> +            dst[7] = tptr1[2];
> +            break;
> +        case 12:
> +            tptr1 = tab0[bytestream2_get_byte(gb)];
> +            if (!tptr1)
> +                return AVERROR_INVALIDDATA;
> +            v = (8 + offset) * (bytestream2_get_le16(gb) + 1);
> +            if (v < 0 || v > dst - tex_data)
> +                return AVERROR_INVALIDDATA;
> +            tptr0 = dst - v;
> +            AV_WL16(dst, AV_RL16(tptr1));
> +            AV_WL16(dst + 2, AV_RL16(tptr0 + 2));
> +            AV_WL32(dst + 4, AV_RL32(tptr0 + 4));
> +            tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] = dst
> + 2;
> +            break;
> +        case 13:
> +            AV_WL16(dst, AV_RL16(dst - (8 + offset)));
> +            AV_WL16(dst + 2, bytestream2_get_le16(gb));
> +            AV_WL32(dst + 4, bytestream2_get_le32(gb));
> +            tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] = dst
> + 2;
> +            break;
> +        case 14:
> +            tptr3 = tab1[bytestream2_get_byte(gb)];
> +            if (!tptr3)
> +                return AVERROR_INVALIDDATA;
> +            AV_WL16(dst, AV_RL16(dst - (8 + offset)));
> +            AV_WL16(dst + 2, AV_RL16(tptr3));
> +            dst[4] = tptr3[2];
> +            AV_WL16(dst + 5, bytestream2_get_le16(gb));
> +            dst[7] = bytestream2_get_byte(gb);
> +            tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] = dst
> + 2;
> +            break;
> +        case 15:
> +            tptr3 = tab1[bytestream2_get_byte(gb)];
> +            if (!tptr3)
> +                return AVERROR_INVALIDDATA;
> +            AV_WL16(dst, AV_RL16(dst - (8 + offset)));
> +            AV_WL16(dst + 2, bytestream2_get_le16(gb));
> +            dst[4] = bytestream2_get_byte(gb);
> +            AV_WL16(dst + 5, AV_RL16(tptr3));
> +            dst[7] = tptr3[2];
> +            tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] = dst
> + 2;
> +            break;
> +        case 16:
> +            tptr3 = tab1[bytestream2_get_byte(gb)];
> +            if (!tptr3)
> +                return AVERROR_INVALIDDATA;
> +            tptr1 = tab1[bytestream2_get_byte(gb)];
> +            if (!tptr1)
> +                return AVERROR_INVALIDDATA;
> +            AV_WL16(dst, AV_RL16(dst - (8 + offset)));
> +            AV_WL16(dst + 2, AV_RL16(tptr3));
> +            dst[4] = tptr3[2];
> +            AV_WL16(dst + 5, AV_RL16(tptr1));
> +            dst[7] = tptr1[2];
> +            break;
> +        case 17:
> +            v = (8 + offset) * (bytestream2_get_le16(gb) + 1);
> +            if (v < 0 || v > dst - tex_data)
> +                return AVERROR_INVALIDDATA;
> +            AV_WL16(dst, AV_RL16(dst - (8 + offset)));
> +            AV_WL16(dst + 2, AV_RL16(&dst[-v + 2]));
> +            AV_WL32(dst + 4, AV_RL32(&dst[-v + 4]));
> +            tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] = dst
> + 2;
> +            break;
> +        default:
> +            break;
> +        }
> +    } else {
> +done:
> +        AV_WL32(dst, AV_RL32(dst - (8 + offset)));
> +        AV_WL32(dst + 4, AV_RL32(dst - (4 + offset)));
> +        state--;
> +    }
> +    if (dst - tex_data + 8 > tex_size)
> +        return AVERROR_INVALIDDATA;
> +    dst += 8;
> +
> +    *oindex = oi;
> +    *dstp = dst;
> +    *statep = state;
> +
> +    return 0;
> +}
> +
> +static int dxv_decompress_cocg(DXVContext *ctx, GetByteContext *gb,
> +                               uint8_t *tex_data, int tex_size,
> +                               uint8_t *op_data0, uint8_t *op_data1,
> +                               int max_op_size0, int max_op_size1)
> +{
> +    uint8_t *dst, *tab2[256] = { 0 }, *tab0[256] = { 0 }, *tab3[256] = {
> 0 }, *tab1[256] = { 0 };
> +    int op_offset = bytestream2_get_le32(gb);
> +    unsigned op_size0 = bytestream2_get_le32(gb);
> +    unsigned op_size1 = bytestream2_get_le32(gb);
> +    int data_start = bytestream2_tell(gb);
> +    int skip0, skip1, oi0 = 0, oi1 = 0;
> +    int ret, state0 = 0, state1 = 0;
> +
> +    dst = tex_data;
> +    bytestream2_skip(gb, op_offset - 12);
> +    if (op_size0 > max_op_size0)
> +        return AVERROR_INVALIDDATA;
> +    skip0 = dxv_decompress_opcodes(gb, op_data0, op_size0);
> +    if (skip0 < 0)
> +        return skip0;
> +    bytestream2_seek(gb, data_start + op_offset + skip0 - 12, SEEK_SET);
> +    if (op_size1 > max_op_size1)
> +        return AVERROR_INVALIDDATA;
> +    skip1 = dxv_decompress_opcodes(gb, op_data1, op_size1);
> +    if (skip1 < 0)
> +        return skip1;
> +    bytestream2_seek(gb, data_start, SEEK_SET);
> +
> +    AV_WL32(dst, bytestream2_get_le32(gb));
> +    AV_WL32(dst + 4, bytestream2_get_le32(gb));
> +    AV_WL32(dst + 8, bytestream2_get_le32(gb));
> +    AV_WL32(dst + 12, bytestream2_get_le32(gb));
> +
> +    tab0[0x9E3779B1 * AV_RL16(dst) >> 24] = dst;
> +    tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFF) >> 24] = dst + 2;
> +    tab2[0x9E3779B1 * AV_RL16(dst + 8) >> 24] = dst + 8;
> +    tab3[0x9E3779B1 * (AV_RL32(dst + 10) & 0xFFFFFF) >> 24] = dst + 10;
> +    dst += 16;
> +    while (dst + 10 < tex_data + tex_size) {
> +        ret = dxv_decompress_cgo(ctx, gb, tex_data, tex_size, op_data0,
> &oi0, op_size0,
> +                                 &dst, &state0, tab0, tab1, 8);
> +        if (ret < 0)
> +            return ret;
> +        ret = dxv_decompress_cgo(ctx, gb, tex_data, tex_size, op_data1,
> &oi1, op_size1,
> +                                 &dst, &state1, tab2, tab3, 8);
> +        if (ret < 0)
> +            return ret;
> +    }
> +
> +    bytestream2_seek(gb, data_start + op_offset + skip0 + skip1 - 12,
> SEEK_SET);
> +
> +    return 0;
> +}
> +
> +static int dxv_decompress_yo(DXVContext *ctx, GetByteContext *gb,
> +                             uint8_t *tex_data, int tex_size,
> +                             uint8_t *op_data, int max_op_size)
> +{
> +    int op_offset = bytestream2_get_le32(gb);
> +    unsigned op_size = bytestream2_get_le32(gb);
> +    int data_start = bytestream2_tell(gb);
> +    uint8_t *dst, *table0[256] = { 0 }, *table1[256] = { 0 };
> +    int ret, state = 0, skip, oi = 0, v, vv;
> +
> +    dst = tex_data;
> +    bytestream2_skip(gb, op_offset - 8);
> +    if (op_size > max_op_size)
> +        return AVERROR_INVALIDDATA;
> +    skip = dxv_decompress_opcodes(gb, op_data, op_size);
> +    if (skip < 0)
> +        return skip;
> +    bytestream2_seek(gb, data_start, SEEK_SET);
> +
> +    v = bytestream2_get_le32(gb);
> +    AV_WL32(dst, v);
> +    vv = bytestream2_get_le32(gb);
> +    table0[0x9E3779B1 * (uint16_t)v >> 24] = dst;
> +    AV_WL32(dst + 4, vv);
> +    table1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFF) >> 24] = dst + 2;
> +    dst += 8;
> +
> +    while (dst < tex_data + tex_size) {
> +        ret = dxv_decompress_cgo(ctx, gb, tex_data, tex_size, op_data,
> &oi, op_size,
> +                                 &dst, &state, table0, table1, 0);
> +        if (ret < 0)
> +            return ret;
> +    }
> +
> +    bytestream2_seek(gb, data_start + op_offset + skip - 8, SEEK_SET);
> +
> +    return 0;
> +}
> +
> +static int dxv_decompress_ycg6(AVCodecContext *avctx)
> +{
> +    DXVContext *ctx = avctx->priv_data;
> +    GetByteContext *gb = &ctx->gbc;
> +    int ret;
> +
> +    ret = dxv_decompress_yo(ctx, gb, ctx->tex_data, ctx->tex_size,
> +                            ctx->op_data[0], ctx->op_size[0]);
> +    if (ret < 0)
> +        return ret;
> +
> +    return dxv_decompress_cocg(ctx, gb, ctx->ctex_data, ctx->ctex_size,
> +                               ctx->op_data[1], ctx->op_data[2],
> +                               ctx->op_size[1], ctx->op_size[2]);
> +}
> +
> +static int dxv_decompress_yg10(AVCodecContext *avctx)
> +{
> +    DXVContext *ctx = avctx->priv_data;
> +    GetByteContext *gb = &ctx->gbc;
> +    int ret;
> +
> +    ret = dxv_decompress_cocg(ctx, gb, ctx->tex_data, ctx->tex_size,
> +                              ctx->op_data[0], ctx->op_data[3],
> +                              ctx->op_size[0], ctx->op_size[3]);
> +    if (ret < 0)
> +        return ret;
> +
> +    return dxv_decompress_cocg(ctx, gb, ctx->ctex_data, ctx->ctex_size,
> +                               ctx->op_data[1], ctx->op_data[2],
> +                               ctx->op_size[1], ctx->op_size[2]);
> +}
> +
>  static int dxv_decompress_dxt5(AVCodecContext *avctx)
>  {
>      DXVContext *ctx = avctx->priv_data;
> @@ -359,6 +1045,12 @@ static int dxv_decode(AVCodecContext *avctx, void
> *data,
>
>      bytestream2_init(gbc, avpkt->data, avpkt->size);
>
> +    ctx->texture_block_h = 4;
> +    ctx->texture_block_w = 4;
> +
> +    avctx->pix_fmt = AV_PIX_FMT_RGBA;
> +    avctx->colorspace = AVCOL_SPC_RGB;
> +
>      tag = bytestream2_get_le32(gbc);
>      switch (tag) {
>      case MKBETAG('D', 'X', 'T', '1'):
> @@ -378,9 +1070,39 @@ static int dxv_decode(AVCodecContext *avctx, void
> *data,
>          msgtext = "DXT5";
>          break;
>      case MKBETAG('Y', 'C', 'G', '6'):
> +        decompress_tex = dxv_decompress_ycg6;
> +        ctx->tex_funct_planar[0] = yo_block;
> +        ctx->tex_funct_planar[1] = cocg_block;
> +        ctx->tex_rat   = 8;
> +        ctx->tex_step  = 32;
> +        ctx->ctex_step = 16;
> +        msgcomp = "YOCOCG6";
> +        msgtext = "YCG6";
> +        ctx->ctex_size = avctx->coded_width * avctx->coded_height / 4;
> +        ctx->texture_block_h = 4;
> +        ctx->texture_block_w = 16;
> +        ctx->ctexture_block_h = 4;
> +        ctx->ctexture_block_w = 4;
> +        avctx->pix_fmt = AV_PIX_FMT_YUV420P;
> +        avctx->colorspace = AVCOL_SPC_YCOCG;
> +        break;
>      case MKBETAG('Y', 'G', '1', '0'):
> -        avpriv_report_missing_feature(avctx, "Tag 0x%08"PRIX32, tag);
> -        return AVERROR_PATCHWELCOME;
> +        decompress_tex = dxv_decompress_yg10;
> +        ctx->tex_funct_planar[0] = yao_block;
> +        ctx->tex_funct_planar[1] = cocg_block;
> +        ctx->tex_rat   = 4;
> +        ctx->tex_step  = 64;
> +        ctx->ctex_step = 16;
> +        msgcomp = "YAOCOCG10";
> +        msgtext = "YG10";
> +        ctx->ctex_size = avctx->coded_width * avctx->coded_height / 4;
> +        ctx->texture_block_h = 4;
> +        ctx->texture_block_w = 16;
> +        ctx->ctexture_block_h = 4;
> +        ctx->ctexture_block_w = 4;
> +        avctx->pix_fmt = AV_PIX_FMT_YUVA420P;
> +        avctx->colorspace = AVCOL_SPC_YCOCG;
> +        break;
>      default:
>          /* Old version does not have a real header, just size and type. */
>          size = tag & 0x00FFFFFF;
> @@ -413,6 +1135,10 @@ static int dxv_decode(AVCodecContext *avctx, void
> *data,
>          break;
>      }
>
> +    ctx->slice_count = av_clip(avctx->thread_count, 1,
> +                               avctx->coded_height /
> FFMAX(ctx->texture_block_h,
> +
>  ctx->ctexture_block_h));
> +
>      /* New header is 12 bytes long. */
>      if (!old_type) {
>          version_major = bytestream2_get_byte(gbc) - 1;
> @@ -440,10 +1166,28 @@ static int dxv_decode(AVCodecContext *avctx, void
> *data,
>      }
>
>      ctx->tex_size = avctx->coded_width * avctx->coded_height * 4 /
> ctx->tex_rat;
> -    ret = av_reallocp(&ctx->tex_data, ctx->tex_size);
> +    ret = av_reallocp(&ctx->tex_data, ctx->tex_size +
> AV_INPUT_BUFFER_PADDING_SIZE);
>      if (ret < 0)
>          return ret;
>
> +    if (ctx->ctex_size) {
> +        int i;
> +
> +        ctx->op_size[0] = avctx->coded_width * avctx->coded_height / 16;
> +        ctx->op_size[1] = avctx->coded_width * avctx->coded_height / 32;
> +        ctx->op_size[2] = avctx->coded_width * avctx->coded_height / 32;
> +        ctx->op_size[3] = avctx->coded_width * avctx->coded_height / 16;
> +
> +        ret = av_reallocp(&ctx->ctex_data, ctx->ctex_size +
> AV_INPUT_BUFFER_PADDING_SIZE);
> +        if (ret < 0)
> +            return ret;
> +        for (i = 0; i < 4; i++) {
> +            ret = av_reallocp(&ctx->op_data[i], ctx->op_size[i]);
> +            if (ret < 0)
> +                return ret;
> +        }
> +    }
> +
>      /* Decompress texture out of the intermediate compression. */
>      ret = decompress_tex(avctx);
>      if (ret < 0)
> @@ -482,10 +1226,6 @@ static int dxv_init(AVCodecContext *avctx)
>      avctx->coded_height = FFALIGN(avctx->height, 16);
>
>      ff_texturedsp_init(&ctx->texdsp);
> -    avctx->pix_fmt = AV_PIX_FMT_RGBA;
> -
> -    ctx->slice_count = av_clip(avctx->thread_count, 1,
> -                               avctx->coded_height / TEXTURE_BLOCK_H);
>
>      return 0;
>  }
> @@ -495,6 +1235,11 @@ static int dxv_close(AVCodecContext *avctx)
>      DXVContext *ctx = avctx->priv_data;
>
>      av_freep(&ctx->tex_data);
> +    av_freep(&ctx->ctex_data);
> +    av_freep(&ctx->op_data[0]);
> +    av_freep(&ctx->op_data[1]);
> +    av_freep(&ctx->op_data[2]);
> +    av_freep(&ctx->op_data[3]);
>
>      return 0;
>  }
> --
> 2.11.0
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>

LGTM
Paul B Mahol April 15, 2018, 7:41 a.m. UTC | #2
On 4/14/18, Rostislav Pehlivanov <atomnuker@gmail.com> wrote:
> On 14 April 2018 at 20:46, Paul B Mahol <onemda@gmail.com> wrote:
>
>> Signed-off-by: Paul B Mahol <onemda@gmail.com>
>> ---
>>  libavcodec/dxv.c | 815 ++++++++++++++++++++++++++++++
>> ++++++++++++++++++++++---
>>  1 file changed, 780 insertions(+), 35 deletions(-)
>>
>
> LGTM

Applied. Thanks.
Carl Eugen Hoyos April 15, 2018, 10:23 p.m. UTC | #3
2018-04-15 9:41 GMT+02:00, Paul B Mahol <onemda@gmail.com>:
> On 4/14/18, Rostislav Pehlivanov <atomnuker@gmail.com> wrote:
>> On 14 April 2018 at 20:46, Paul B Mahol <onemda@gmail.com> wrote:
>>
>>> Signed-off-by: Paul B Mahol <onemda@gmail.com>
>>> ---
>>>  libavcodec/dxv.c | 815 ++++++++++++++++++++++++++++++
>>> ++++++++++++++++++++++---
>>>  1 file changed, 780 insertions(+), 35 deletions(-)
>>>
>>
>> LGTM
>
> Applied. Thanks.

Thank you for working on formats like this one, I consider this
highly important!

Sorry for not testing your patch earlier...

The committed change is not helpful afaict, if you cannot fix
the colourspace issue (which I perfectly understand), please
commit your original hack (with a note), this variant cannot
help users.

Thank you, Carl Eugen
Paul B Mahol April 16, 2018, 6:37 a.m. UTC | #4
On 4/16/18, Carl Eugen Hoyos <ceffmpeg@gmail.com> wrote:
> 2018-04-15 9:41 GMT+02:00, Paul B Mahol <onemda@gmail.com>:
>> On 4/14/18, Rostislav Pehlivanov <atomnuker@gmail.com> wrote:
>>> On 14 April 2018 at 20:46, Paul B Mahol <onemda@gmail.com> wrote:
>>>
>>>> Signed-off-by: Paul B Mahol <onemda@gmail.com>
>>>> ---
>>>>  libavcodec/dxv.c | 815 ++++++++++++++++++++++++++++++
>>>> ++++++++++++++++++++++---
>>>>  1 file changed, 780 insertions(+), 35 deletions(-)
>>>>
>>>
>>> LGTM
>>
>> Applied. Thanks.
>
> Thank you for working on formats like this one, I consider this
> highly important!
>
> Sorry for not testing your patch earlier...
>
> The committed change is not helpful afaict, if you cannot fix
> the colourspace issue (which I perfectly understand), please
> commit your original hack (with a note), this variant cannot
> help users.
>
> Thank you, Carl Eugen

Please, leave for the good of the world future.
Hendrik Leppkes April 16, 2018, 7:20 a.m. UTC | #5
On Mon, Apr 16, 2018 at 12:23 AM, Carl Eugen Hoyos <ceffmpeg@gmail.com> wrote:
> 2018-04-15 9:41 GMT+02:00, Paul B Mahol <onemda@gmail.com>:
>> On 4/14/18, Rostislav Pehlivanov <atomnuker@gmail.com> wrote:
>>> On 14 April 2018 at 20:46, Paul B Mahol <onemda@gmail.com> wrote:
>>>
>>>> Signed-off-by: Paul B Mahol <onemda@gmail.com>
>>>> ---
>>>>  libavcodec/dxv.c | 815 ++++++++++++++++++++++++++++++
>>>> ++++++++++++++++++++++---
>>>>  1 file changed, 780 insertions(+), 35 deletions(-)
>>>>
>>>
>>> LGTM
>>
>> Applied. Thanks.
>
> Thank you for working on formats like this one, I consider this
> highly important!
>
> Sorry for not testing your patch earlier...
>
> The committed change is not helpful afaict, if you cannot fix
> the colourspace issue (which I perfectly understand), please
> commit your original hack (with a note), this variant cannot
> help users.
>

This is not the first decoder to output YCoCg, I have H.264 files with
that as well. Just because swscale cannot convert that doesn't mean we
should be doing anything but non-native output from a decoder. My
playback chain can support that just fine.
Feel free to contribute YCoCg conversion to swscale to resolve this
for "the users".

- Hendrik
wm4 April 16, 2018, 2:21 p.m. UTC | #6
On Mon, 16 Apr 2018 09:20:09 +0200
Hendrik Leppkes <h.leppkes@gmail.com> wrote:

> On Mon, Apr 16, 2018 at 12:23 AM, Carl Eugen Hoyos <ceffmpeg@gmail.com> wrote:
> > 2018-04-15 9:41 GMT+02:00, Paul B Mahol <onemda@gmail.com>:  
> >> On 4/14/18, Rostislav Pehlivanov <atomnuker@gmail.com> wrote:  
> >>> On 14 April 2018 at 20:46, Paul B Mahol <onemda@gmail.com> wrote:
> >>>  
> >>>> Signed-off-by: Paul B Mahol <onemda@gmail.com>
> >>>> ---
> >>>>  libavcodec/dxv.c | 815 ++++++++++++++++++++++++++++++
> >>>> ++++++++++++++++++++++---
> >>>>  1 file changed, 780 insertions(+), 35 deletions(-)
> >>>>  
> >>>
> >>> LGTM  
> >>
> >> Applied. Thanks.  
> >
> > Thank you for working on formats like this one, I consider this
> > highly important!
> >
> > Sorry for not testing your patch earlier...
> >
> > The committed change is not helpful afaict, if you cannot fix
> > the colourspace issue (which I perfectly understand), please
> > commit your original hack (with a note), this variant cannot
> > help users.
> >  
> 
> This is not the first decoder to output YCoCg, I have H.264 files with
> that as well. Just because swscale cannot convert that doesn't mean we
> should be doing anything but non-native output from a decoder. My
> playback chain can support that just fine.
> Feel free to contribute YCoCg conversion to swscale to resolve this
> for "the users".

+1
diff mbox

Patch

diff --git a/libavcodec/dxv.c b/libavcodec/dxv.c
index 529e211258..101fe78481 100644
--- a/libavcodec/dxv.c
+++ b/libavcodec/dxv.c
@@ -1,6 +1,7 @@ 
 /*
  * Resolume DXV decoder
  * Copyright (C) 2015 Vittorio Giovara <vittorio.giovara@gmail.com>
+ * Copyright (C) 2018 Paul B Mahol
  *
  * This file is part of FFmpeg.
  *
@@ -23,6 +24,7 @@ 
 
 #include "libavutil/imgutils.h"
 
+#include "mathops.h"
 #include "avcodec.h"
 #include "bytestream.h"
 #include "internal.h"
@@ -34,50 +36,211 @@  typedef struct DXVContext {
     TextureDSPContext texdsp;
     GetByteContext gbc;
 
-    uint8_t *tex_data;  // Compressed texture
-    int tex_rat;        // Compression ratio
-    int tex_step;       // Distance between blocks
-    int64_t tex_size;   // Texture size
+    uint8_t *tex_data;   // Compressed texture
+    uint8_t *ctex_data;  // Compressed texture
+    int tex_rat;         // Compression ratio
+    int tex_step;        // Distance between blocks
+    int ctex_step;       // Distance between blocks
+    int64_t tex_size;    // Texture size
+    int64_t ctex_size;   // Texture size
 
     /* Optimal number of slices for parallel decoding */
     int slice_count;
 
+    uint8_t *op_data[4]; // Opcodes
+    int64_t op_size[4];  // Opcodes size
+
+    int texture_block_w;
+    int texture_block_h;
+
+    int ctexture_block_w;
+    int ctexture_block_h;
+
     /* Pointer to the selected decompression function */
     int (*tex_funct)(uint8_t *dst, ptrdiff_t stride, const uint8_t *block);
+    int (*tex_funct_planar[2])(uint8_t *plane0, ptrdiff_t stride0,
+                               uint8_t *plane1, ptrdiff_t stride1,
+                               const uint8_t *block);
 } DXVContext;
 
+static void decompress_indices(uint8_t *dst, const uint8_t *src)
+{
+    int block, i;
+
+    for (block = 0; block < 2; block++) {
+        int tmp = AV_RL24(src);
+
+        /* Unpack 8x3 bit from last 3 byte block */
+        for (i = 0; i < 8; i++)
+            dst[i] = (tmp >> (i * 3)) & 0x7;
+
+        src += 3;
+        dst += 8;
+    }
+}
+
+static int extract_component(int yo0, int yo1, int code)
+{
+    int yo;
+
+    if (yo0 == yo1) {
+        yo = yo0;
+    } else if (code == 0) {
+        yo = yo0;
+    } else if (code == 1) {
+        yo = yo1;
+    } else {
+        if (yo0 > yo1) {
+            yo = (uint8_t) (((8 - code) * yo0 +
+                             (code - 1) * yo1) / 7);
+        } else {
+            if (code == 6) {
+                yo = 0;
+            } else if (code == 7) {
+                yo = 255;
+            } else {
+                yo = (uint8_t) (((6 - code) * yo0 +
+                                 (code - 1) * yo1) / 5);
+            }
+        }
+    }
+
+    return yo;
+}
+
+static int cocg_block(uint8_t *plane0, ptrdiff_t stride0,
+                      uint8_t *plane1, ptrdiff_t stride1,
+                      const uint8_t *block)
+{
+    uint8_t co_indices[16];
+    uint8_t cg_indices[16];
+    uint8_t co0 = *(block);
+    uint8_t co1 = *(block + 1);
+    uint8_t cg0 = *(block + 8);
+    uint8_t cg1 = *(block + 9);
+    int x, y;
+
+    decompress_indices(co_indices, block + 2);
+    decompress_indices(cg_indices, block + 10);
+
+    for (y = 0; y < 4; y++) {
+        for (x = 0; x < 4; x++) {
+            int co_code = co_indices[x + y * 4];
+            int cg_code = cg_indices[x + y * 4];
+
+            plane0[x] = extract_component(cg0, cg1, cg_code);
+            plane1[x] = extract_component(co0, co1, co_code);
+        }
+        plane0 += stride0;
+        plane1 += stride1;
+    }
+
+    return 16;
+}
+
+static void yao_subblock(uint8_t *dst, uint8_t *yo_indices,
+                        ptrdiff_t stride, const uint8_t *block)
+{
+    uint8_t yo0 = *(block);
+    uint8_t yo1 = *(block + 1);
+    int x, y;
+
+    decompress_indices(yo_indices, block + 2);
+
+    for (y = 0; y < 4; y++) {
+        for (x = 0; x < 4; x++) {
+            int yo_code = yo_indices[x + y * 4];
+
+            dst[x] = extract_component(yo0, yo1, yo_code);
+        }
+        dst += stride;
+    }
+}
+
+static int yo_block(uint8_t *dst, ptrdiff_t stride,
+                    uint8_t *unused0, ptrdiff_t unused1,
+                    const uint8_t *block)
+{
+    uint8_t yo_indices[16];
+
+    yao_subblock(dst,      yo_indices, stride, block);
+    yao_subblock(dst + 4,  yo_indices, stride, block + 8);
+    yao_subblock(dst + 8,  yo_indices, stride, block + 16);
+    yao_subblock(dst + 12, yo_indices, stride, block + 24);
+
+    return 32;
+}
+
+static int yao_block(uint8_t *plane0, ptrdiff_t stride0,
+                     uint8_t *plane3, ptrdiff_t stride1,
+                     const uint8_t *block)
+{
+    uint8_t yo_indices[16];
+    uint8_t a_indices[16];
+
+    yao_subblock(plane0,      yo_indices, stride0, block);
+    yao_subblock(plane3,      a_indices,  stride1, block + 8);
+    yao_subblock(plane0 + 4,  yo_indices, stride0, block + 16);
+    yao_subblock(plane3 + 4,  a_indices,  stride1, block + 24);
+    yao_subblock(plane0 + 8,  yo_indices, stride0, block + 32);
+    yao_subblock(plane3 + 8,  a_indices,  stride1, block + 40);
+    yao_subblock(plane0 + 12, yo_indices, stride0, block + 48);
+    yao_subblock(plane3 + 12, a_indices,  stride1, block + 56);
+
+    return 64;
+}
+
 static int decompress_texture_thread(AVCodecContext *avctx, void *arg,
                                      int slice, int thread_nb)
 {
     DXVContext *ctx = avctx->priv_data;
     AVFrame *frame = arg;
     const uint8_t *d = ctx->tex_data;
-    int w_block = avctx->coded_width / TEXTURE_BLOCK_W;
-    int h_block = avctx->coded_height / TEXTURE_BLOCK_H;
+    int w_block = avctx->coded_width / ctx->texture_block_w;
+    int h_block = avctx->coded_height / ctx->texture_block_h;
     int x, y;
     int start_slice, end_slice;
-    int base_blocks_per_slice = h_block / ctx->slice_count;
-    int remainder_blocks = h_block % ctx->slice_count;
-
-    /* When the frame height (in blocks) doesn't divide evenly between the
-     * number of slices, spread the remaining blocks evenly between the first
-     * operations */
-    start_slice = slice * base_blocks_per_slice;
-    /* Add any extra blocks (one per slice) that have been added
-     * before this slice */
-    start_slice += FFMIN(slice, remainder_blocks);
-
-    end_slice = start_slice + base_blocks_per_slice;
-    /* Add an extra block if there are remainder blocks to be accounted for */
-    if (slice < remainder_blocks)
-        end_slice++;
-
-    for (y = start_slice; y < end_slice; y++) {
-        uint8_t *p = frame->data[0] + y * frame->linesize[0] * TEXTURE_BLOCK_H;
-        int off  = y * w_block;
-        for (x = 0; x < w_block; x++) {
-            ctx->tex_funct(p + x * 16, frame->linesize[0],
-                           d + (off + x) * ctx->tex_step);
+
+    start_slice = h_block * slice / ctx->slice_count;
+    end_slice = h_block * (slice + 1) / ctx->slice_count;
+
+    if (ctx->tex_funct) {
+        for (y = start_slice; y < end_slice; y++) {
+            uint8_t *p = frame->data[0] + y * frame->linesize[0] * ctx->texture_block_h;
+            int off = y * w_block;
+            for (x = 0; x < w_block; x++) {
+                ctx->tex_funct(p + x * 4 * ctx->texture_block_w, frame->linesize[0],
+                               d + (off + x) * ctx->tex_step);
+            }
+        }
+    } else {
+        const uint8_t *c = ctx->ctex_data;
+
+        for (y = start_slice; y < end_slice; y++) {
+            uint8_t *p0 = frame->data[0] + y * frame->linesize[0] * ctx->texture_block_h;
+            uint8_t *p3 = ctx->tex_step != 64 ? NULL : frame->data[3] + y * frame->linesize[3] * ctx->texture_block_h;
+            int off = y * w_block;
+            for (x = 0; x < w_block; x++) {
+                ctx->tex_funct_planar[0](p0 + x * ctx->texture_block_w, frame->linesize[0],
+                                         p3 != NULL ? p3 + x * ctx->texture_block_w : NULL, frame->linesize[3],
+                                         d + (off + x) * ctx->tex_step);
+            }
+        }
+
+        w_block = (avctx->coded_width / 2) / ctx->ctexture_block_w;
+        h_block = (avctx->coded_height / 2) / ctx->ctexture_block_h;
+        start_slice = h_block * slice / ctx->slice_count;
+        end_slice = h_block * (slice + 1) / ctx->slice_count;
+
+        for (y = start_slice; y < end_slice; y++) {
+            uint8_t *p0 = frame->data[1] + y * frame->linesize[1] * ctx->ctexture_block_h;
+            uint8_t *p1 = frame->data[2] + y * frame->linesize[2] * ctx->ctexture_block_h;
+            int off = y * w_block;
+            for (x = 0; x < w_block; x++) {
+                ctx->tex_funct_planar[1](p0 + x * ctx->ctexture_block_w, frame->linesize[1],
+                                         p1 + x * ctx->ctexture_block_w, frame->linesize[2],
+                                         c + (off + x) * ctx->ctex_step);
+            }
         }
     }
 
@@ -169,6 +332,529 @@  static int dxv_decompress_dxt1(AVCodecContext *avctx)
     return 0;
 }
 
+typedef struct OpcodeTable {
+    int16_t next;
+    uint8_t val1;
+    uint8_t val2;
+} OpcodeTable;
+
+static int fill_ltable(GetByteContext *gb, uint32_t *table, int *nb_elements)
+{
+    unsigned half = 512, bits = 1023, left = 1024, input, mask;
+    int value, counter = 0, rshift = 10, lshift = 30;
+
+    mask = bytestream2_get_le32(gb) >> 2;
+    while (left) {
+        if (counter >= 256)
+            return AVERROR_INVALIDDATA;
+        value = bits & mask;
+        left -= bits & mask;
+        mask >>= rshift;
+        lshift -= rshift;
+        table[counter++] = value;
+        if (lshift < 16) {
+            if (bytestream2_get_bytes_left(gb) <= 0)
+                return AVERROR_INVALIDDATA;
+
+            input = bytestream2_get_le16(gb);
+            mask += input << lshift;
+            lshift += 16;
+        }
+        if (left < half) {
+            half >>= 1;
+            bits >>= 1;
+            rshift--;
+        }
+    }
+
+    for (; !table[counter - 1]; counter--)
+        if (counter <= 0)
+            return AVERROR_INVALIDDATA;
+
+    *nb_elements = counter;
+
+    if (counter < 256)
+        memset(&table[counter], 0, 4 * (256 - counter));
+
+    if (lshift >= 16)
+        bytestream2_seek(gb, -2, SEEK_CUR);
+
+    return 0;
+}
+
+static int fill_optable(unsigned *table0, OpcodeTable *table1, int nb_elements)
+{
+    unsigned table2[256] = { 0 };
+    unsigned x = 0;
+    int val0, val1, i, j = 2, k = 0;
+
+    table2[0] = table0[0];
+    for (i = 0; i < nb_elements - 1; i++, table2[i] = val0) {
+        val0 = table0[i + 1] + table2[i];
+    }
+
+    if (!table2[0]) {
+        do {
+            k++;
+        } while (!table2[k]);
+    }
+
+    j = 2;
+    for (i = 1024; i > 0; i--) {
+        for (table1[x].val1 = k; k < 256 && j > table2[k]; k++);
+        x = (x - 383) & 0x3FF;
+        j++;
+    }
+
+    if (nb_elements > 0)
+        memcpy(&table2[0], table0, 4 * nb_elements);
+
+    for (i = 0; i < 1024; i++) {
+        val0 = table1[i].val1;
+        val1 = table2[val0];
+        table2[val0]++;
+        x = 31 - ff_clz(val1);
+        if (x > 10)
+            return AVERROR_INVALIDDATA;
+        table1[i].val2 = 10 - x;
+        table1[i].next = (val1 << table1[i].val2) - 1024;
+    }
+
+    return 0;
+}
+
+static int get_opcodes(GetByteContext *gb, uint32_t *table, uint8_t *dst, int op_size, int nb_elements)
+{
+    OpcodeTable optable[1024];
+    int sum, x, val, lshift, rshift, ret, size_in_bits, i, idx;
+    unsigned endoffset, newoffset, offset;
+    unsigned next;
+    uint8_t *src = (uint8_t *)gb->buffer;
+
+    ret = fill_optable(table, optable, nb_elements);
+    if (ret < 0)
+        return ret;
+
+    size_in_bits = bytestream2_get_le32(gb);
+    endoffset = ((size_in_bits + 7) >> 3) - 4;
+    if (endoffset <= 0 || bytestream2_get_bytes_left(gb) < endoffset)
+        return AVERROR_INVALIDDATA;
+
+    offset = endoffset;
+    next = AV_RL32(src + endoffset);
+    rshift = (((size_in_bits & 0xFF) - 1) & 7) + 15;
+    lshift = 32 - rshift;
+    idx = (next >> rshift) & 0x3FF;
+    for (i = 0; i < op_size; i++) {
+        dst[i] = optable[idx].val1;
+        val = optable[idx].val2;
+        sum = val + lshift;
+        x = (next << lshift) >> 1 >> (31 - val);
+        newoffset = offset - (sum >> 3);
+        lshift = sum & 7;
+        idx = x + optable[idx].next;
+        offset = newoffset;
+        if (offset > endoffset)
+            return AVERROR_INVALIDDATA;
+        next = AV_RL32(src + offset);
+    }
+
+    bytestream2_skip(gb, (size_in_bits + 7 >> 3) - 4);
+
+    return 0;
+}
+
+static int dxv_decompress_opcodes(GetByteContext *gb, void *dstp, size_t op_size)
+{
+    int pos = bytestream2_tell(gb);
+    int flag = bytestream2_peek_byte(gb);
+
+    if ((flag & 3) == 0) {
+        bytestream2_skip(gb, 1);
+        bytestream2_get_buffer(gb, dstp, op_size);
+    } else if ((flag & 3) == 1) {
+        bytestream2_skip(gb, 1);
+        memset(dstp, bytestream2_get_byte(gb), op_size);
+    } else {
+        uint32_t table[256];
+        int ret, elements = 0;
+
+        ret = fill_ltable(gb, table, &elements);
+        if (ret < 0)
+            return ret;
+        ret = get_opcodes(gb, table, dstp, op_size, elements);
+        if (ret < 0)
+            return ret;
+    }
+    return bytestream2_tell(gb) - pos;
+}
+
+static int dxv_decompress_cgo(DXVContext *ctx, GetByteContext *gb,
+                              uint8_t *tex_data, int tex_size,
+                              uint8_t *op_data, int *oindex,
+                              int op_size,
+                              uint8_t **dstp, int *statep,
+                              uint8_t **tab0, uint8_t **tab1,
+                              int offset)
+{
+    uint8_t *dst = *dstp;
+    uint8_t *tptr0, *tptr1, *tptr3;
+    int oi = *oindex;
+    int state = *statep;
+    int opcode, v, vv;
+
+    if (state <= 0) {
+        if (oi >= op_size)
+            return AVERROR_INVALIDDATA;
+        opcode = op_data[oi++];
+        if (!opcode) {
+            v = bytestream2_get_byte(gb);
+            if (v == 255) {
+                do {
+                    if (bytestream2_get_bytes_left(gb) <= 0)
+                        return AVERROR_INVALIDDATA;
+                    opcode = bytestream2_get_le16(gb);
+                    v += opcode;
+                } while (opcode == 0xFFFF);
+            }
+            AV_WL32(dst, AV_RL32(dst - 16));
+            AV_WL32(dst + 4, AV_RL32(dst - 12));
+            state = v + 4;
+            goto done;
+        }
+
+        switch (opcode) {
+        case 1:
+            AV_WL32(dst, AV_RL32(dst - (8 + offset)));
+            AV_WL32(dst + 4, AV_RL32(dst - (4 + offset)));
+            break;
+        case 2:
+            vv = (8 + offset) * (bytestream2_get_le16(gb) + 1);
+            if (vv < 0 || vv > dst - tex_data)
+                return AVERROR_INVALIDDATA;
+            tptr0 = dst - vv;
+            v = AV_RL32(tptr0);
+            AV_WL32(dst, AV_RL32(tptr0));
+            AV_WL32(dst + 4, AV_RL32(tptr0 + 4));
+            tab0[0x9E3779B1 * (uint16_t)v >> 24] = dst;
+            tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] = dst + 2;
+            break;
+        case 3:
+            AV_WL32(dst, bytestream2_get_le32(gb));
+            AV_WL32(dst + 4, bytestream2_get_le32(gb));
+            tab0[0x9E3779B1 * AV_RL16(dst) >> 24] = dst;
+            tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] = dst + 2;
+            break;
+        case 4:
+            tptr3 = tab1[bytestream2_get_byte(gb)];
+            if (!tptr3)
+                return AVERROR_INVALIDDATA;
+            AV_WL16(dst, bytestream2_get_le16(gb));
+            AV_WL16(dst + 2, AV_RL16(tptr3));
+            dst[4] = tptr3[2];
+            AV_WL16(dst + 5, bytestream2_get_le16(gb));
+            dst[7] = bytestream2_get_byte(gb);
+            tab0[0x9E3779B1 * AV_RL16(dst) >> 24] = dst;
+            break;
+        case 5:
+            tptr3 = tab1[bytestream2_get_byte(gb)];
+            if (!tptr3)
+                return AVERROR_INVALIDDATA;
+            AV_WL16(dst, bytestream2_get_le16(gb));
+            AV_WL16(dst + 2, bytestream2_get_le16(gb));
+            dst[4] = bytestream2_get_byte(gb);
+            AV_WL16(dst + 5, AV_RL16(tptr3));
+            dst[7] = tptr3[2];
+            tab0[0x9E3779B1 * AV_RL16(dst) >> 24] = dst;
+            tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] = dst + 2;
+            break;
+        case 6:
+            tptr0 = tab1[bytestream2_get_byte(gb)];
+            if (!tptr0)
+                return AVERROR_INVALIDDATA;
+            tptr1 = tab1[bytestream2_get_byte(gb)];
+            if (!tptr1)
+                return AVERROR_INVALIDDATA;
+            AV_WL16(dst, bytestream2_get_le16(gb));
+            AV_WL16(dst + 2, AV_RL16(tptr0));
+            dst[4] = tptr0[2];
+            AV_WL16(dst + 5, AV_RL16(tptr1));
+            dst[7] = tptr1[2];
+            tab0[0x9E3779B1 * AV_RL16(dst) >> 24] = dst;
+            break;
+        case 7:
+            v = (8 + offset) * (bytestream2_get_le16(gb) + 1);
+            if (v < 0 || v > dst - tex_data)
+                return AVERROR_INVALIDDATA;
+            tptr0 = dst - v;
+            AV_WL16(dst, bytestream2_get_le16(gb));
+            AV_WL16(dst + 2, AV_RL16(tptr0 + 2));
+            AV_WL32(dst + 4, AV_RL32(tptr0 + 4));
+            tab0[0x9E3779B1 * AV_RL16(dst) >> 24] = dst;
+            tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] = dst + 2;
+            break;
+        case 8:
+            tptr1 = tab0[bytestream2_get_byte(gb)];
+            if (!tptr1)
+                return AVERROR_INVALIDDATA;
+            AV_WL16(dst, AV_RL16(tptr1));
+            AV_WL16(dst + 2, bytestream2_get_le16(gb));
+            AV_WL32(dst + 4, bytestream2_get_le32(gb));
+            tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] = dst + 2;
+            break;
+        case 9:
+            tptr1 = tab0[bytestream2_get_byte(gb)];
+            if (!tptr1)
+                return AVERROR_INVALIDDATA;
+            tptr3 = tab1[bytestream2_get_byte(gb)];
+            if (!tptr3)
+                return AVERROR_INVALIDDATA;
+            AV_WL16(dst, AV_RL16(tptr1));
+            AV_WL16(dst + 2, AV_RL16(tptr3));
+            dst[4] = tptr3[2];
+            AV_WL16(dst + 5, bytestream2_get_le16(gb));
+            dst[7] = bytestream2_get_byte(gb);
+            tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] = dst + 2;
+            break;
+        case 10:
+            tptr1 = tab0[bytestream2_get_byte(gb)];
+            if (!tptr1)
+                return AVERROR_INVALIDDATA;
+            tptr3 = tab1[bytestream2_get_byte(gb)];
+            if (!tptr3)
+                return AVERROR_INVALIDDATA;
+            AV_WL16(dst, AV_RL16(tptr1));
+            AV_WL16(dst + 2, bytestream2_get_le16(gb));
+            dst[4] = bytestream2_get_byte(gb);
+            AV_WL16(dst + 5, AV_RL16(tptr3));
+            dst[7] = tptr3[2];
+            tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] = dst + 2;
+            break;
+        case 11:
+            tptr0 = tab0[bytestream2_get_byte(gb)];
+            if (!tptr0)
+                return AVERROR_INVALIDDATA;
+            tptr3 = tab1[bytestream2_get_byte(gb)];
+            if (!tptr3)
+                return AVERROR_INVALIDDATA;
+            tptr1 = tab1[bytestream2_get_byte(gb)];
+            if (!tptr1)
+                return AVERROR_INVALIDDATA;
+            AV_WL16(dst, AV_RL16(tptr0));
+            AV_WL16(dst + 2, AV_RL16(tptr3));
+            dst[4] = tptr3[2];
+            AV_WL16(dst + 5, AV_RL16(tptr1));
+            dst[7] = tptr1[2];
+            break;
+        case 12:
+            tptr1 = tab0[bytestream2_get_byte(gb)];
+            if (!tptr1)
+                return AVERROR_INVALIDDATA;
+            v = (8 + offset) * (bytestream2_get_le16(gb) + 1);
+            if (v < 0 || v > dst - tex_data)
+                return AVERROR_INVALIDDATA;
+            tptr0 = dst - v;
+            AV_WL16(dst, AV_RL16(tptr1));
+            AV_WL16(dst + 2, AV_RL16(tptr0 + 2));
+            AV_WL32(dst + 4, AV_RL32(tptr0 + 4));
+            tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] = dst + 2;
+            break;
+        case 13:
+            AV_WL16(dst, AV_RL16(dst - (8 + offset)));
+            AV_WL16(dst + 2, bytestream2_get_le16(gb));
+            AV_WL32(dst + 4, bytestream2_get_le32(gb));
+            tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] = dst + 2;
+            break;
+        case 14:
+            tptr3 = tab1[bytestream2_get_byte(gb)];
+            if (!tptr3)
+                return AVERROR_INVALIDDATA;
+            AV_WL16(dst, AV_RL16(dst - (8 + offset)));
+            AV_WL16(dst + 2, AV_RL16(tptr3));
+            dst[4] = tptr3[2];
+            AV_WL16(dst + 5, bytestream2_get_le16(gb));
+            dst[7] = bytestream2_get_byte(gb);
+            tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] = dst + 2;
+            break;
+        case 15:
+            tptr3 = tab1[bytestream2_get_byte(gb)];
+            if (!tptr3)
+                return AVERROR_INVALIDDATA;
+            AV_WL16(dst, AV_RL16(dst - (8 + offset)));
+            AV_WL16(dst + 2, bytestream2_get_le16(gb));
+            dst[4] = bytestream2_get_byte(gb);
+            AV_WL16(dst + 5, AV_RL16(tptr3));
+            dst[7] = tptr3[2];
+            tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] = dst + 2;
+            break;
+        case 16:
+            tptr3 = tab1[bytestream2_get_byte(gb)];
+            if (!tptr3)
+                return AVERROR_INVALIDDATA;
+            tptr1 = tab1[bytestream2_get_byte(gb)];
+            if (!tptr1)
+                return AVERROR_INVALIDDATA;
+            AV_WL16(dst, AV_RL16(dst - (8 + offset)));
+            AV_WL16(dst + 2, AV_RL16(tptr3));
+            dst[4] = tptr3[2];
+            AV_WL16(dst + 5, AV_RL16(tptr1));
+            dst[7] = tptr1[2];
+            break;
+        case 17:
+            v = (8 + offset) * (bytestream2_get_le16(gb) + 1);
+            if (v < 0 || v > dst - tex_data)
+                return AVERROR_INVALIDDATA;
+            AV_WL16(dst, AV_RL16(dst - (8 + offset)));
+            AV_WL16(dst + 2, AV_RL16(&dst[-v + 2]));
+            AV_WL32(dst + 4, AV_RL32(&dst[-v + 4]));
+            tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] = dst + 2;
+            break;
+        default:
+            break;
+        }
+    } else {
+done:
+        AV_WL32(dst, AV_RL32(dst - (8 + offset)));
+        AV_WL32(dst + 4, AV_RL32(dst - (4 + offset)));
+        state--;
+    }
+    if (dst - tex_data + 8 > tex_size)
+        return AVERROR_INVALIDDATA;
+    dst += 8;
+
+    *oindex = oi;
+    *dstp = dst;
+    *statep = state;
+
+    return 0;
+}
+
+static int dxv_decompress_cocg(DXVContext *ctx, GetByteContext *gb,
+                               uint8_t *tex_data, int tex_size,
+                               uint8_t *op_data0, uint8_t *op_data1,
+                               int max_op_size0, int max_op_size1)
+{
+    uint8_t *dst, *tab2[256] = { 0 }, *tab0[256] = { 0 }, *tab3[256] = { 0 }, *tab1[256] = { 0 };
+    int op_offset = bytestream2_get_le32(gb);
+    unsigned op_size0 = bytestream2_get_le32(gb);
+    unsigned op_size1 = bytestream2_get_le32(gb);
+    int data_start = bytestream2_tell(gb);
+    int skip0, skip1, oi0 = 0, oi1 = 0;
+    int ret, state0 = 0, state1 = 0;
+
+    dst = tex_data;
+    bytestream2_skip(gb, op_offset - 12);
+    if (op_size0 > max_op_size0)
+        return AVERROR_INVALIDDATA;
+    skip0 = dxv_decompress_opcodes(gb, op_data0, op_size0);
+    if (skip0 < 0)
+        return skip0;
+    bytestream2_seek(gb, data_start + op_offset + skip0 - 12, SEEK_SET);
+    if (op_size1 > max_op_size1)
+        return AVERROR_INVALIDDATA;
+    skip1 = dxv_decompress_opcodes(gb, op_data1, op_size1);
+    if (skip1 < 0)
+        return skip1;
+    bytestream2_seek(gb, data_start, SEEK_SET);
+
+    AV_WL32(dst, bytestream2_get_le32(gb));
+    AV_WL32(dst + 4, bytestream2_get_le32(gb));
+    AV_WL32(dst + 8, bytestream2_get_le32(gb));
+    AV_WL32(dst + 12, bytestream2_get_le32(gb));
+
+    tab0[0x9E3779B1 * AV_RL16(dst) >> 24] = dst;
+    tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFF) >> 24] = dst + 2;
+    tab2[0x9E3779B1 * AV_RL16(dst + 8) >> 24] = dst + 8;
+    tab3[0x9E3779B1 * (AV_RL32(dst + 10) & 0xFFFFFF) >> 24] = dst + 10;
+    dst += 16;
+    while (dst + 10 < tex_data + tex_size) {
+        ret = dxv_decompress_cgo(ctx, gb, tex_data, tex_size, op_data0, &oi0, op_size0,
+                                 &dst, &state0, tab0, tab1, 8);
+        if (ret < 0)
+            return ret;
+        ret = dxv_decompress_cgo(ctx, gb, tex_data, tex_size, op_data1, &oi1, op_size1,
+                                 &dst, &state1, tab2, tab3, 8);
+        if (ret < 0)
+            return ret;
+    }
+
+    bytestream2_seek(gb, data_start + op_offset + skip0 + skip1 - 12, SEEK_SET);
+
+    return 0;
+}
+
+static int dxv_decompress_yo(DXVContext *ctx, GetByteContext *gb,
+                             uint8_t *tex_data, int tex_size,
+                             uint8_t *op_data, int max_op_size)
+{
+    int op_offset = bytestream2_get_le32(gb);
+    unsigned op_size = bytestream2_get_le32(gb);
+    int data_start = bytestream2_tell(gb);
+    uint8_t *dst, *table0[256] = { 0 }, *table1[256] = { 0 };
+    int ret, state = 0, skip, oi = 0, v, vv;
+
+    dst = tex_data;
+    bytestream2_skip(gb, op_offset - 8);
+    if (op_size > max_op_size)
+        return AVERROR_INVALIDDATA;
+    skip = dxv_decompress_opcodes(gb, op_data, op_size);
+    if (skip < 0)
+        return skip;
+    bytestream2_seek(gb, data_start, SEEK_SET);
+
+    v = bytestream2_get_le32(gb);
+    AV_WL32(dst, v);
+    vv = bytestream2_get_le32(gb);
+    table0[0x9E3779B1 * (uint16_t)v >> 24] = dst;
+    AV_WL32(dst + 4, vv);
+    table1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFF) >> 24] = dst + 2;
+    dst += 8;
+
+    while (dst < tex_data + tex_size) {
+        ret = dxv_decompress_cgo(ctx, gb, tex_data, tex_size, op_data, &oi, op_size,
+                                 &dst, &state, table0, table1, 0);
+        if (ret < 0)
+            return ret;
+    }
+
+    bytestream2_seek(gb, data_start + op_offset + skip - 8, SEEK_SET);
+
+    return 0;
+}
+
+static int dxv_decompress_ycg6(AVCodecContext *avctx)
+{
+    DXVContext *ctx = avctx->priv_data;
+    GetByteContext *gb = &ctx->gbc;
+    int ret;
+
+    ret = dxv_decompress_yo(ctx, gb, ctx->tex_data, ctx->tex_size,
+                            ctx->op_data[0], ctx->op_size[0]);
+    if (ret < 0)
+        return ret;
+
+    return dxv_decompress_cocg(ctx, gb, ctx->ctex_data, ctx->ctex_size,
+                               ctx->op_data[1], ctx->op_data[2],
+                               ctx->op_size[1], ctx->op_size[2]);
+}
+
+static int dxv_decompress_yg10(AVCodecContext *avctx)
+{
+    DXVContext *ctx = avctx->priv_data;
+    GetByteContext *gb = &ctx->gbc;
+    int ret;
+
+    ret = dxv_decompress_cocg(ctx, gb, ctx->tex_data, ctx->tex_size,
+                              ctx->op_data[0], ctx->op_data[3],
+                              ctx->op_size[0], ctx->op_size[3]);
+    if (ret < 0)
+        return ret;
+
+    return dxv_decompress_cocg(ctx, gb, ctx->ctex_data, ctx->ctex_size,
+                               ctx->op_data[1], ctx->op_data[2],
+                               ctx->op_size[1], ctx->op_size[2]);
+}
+
 static int dxv_decompress_dxt5(AVCodecContext *avctx)
 {
     DXVContext *ctx = avctx->priv_data;
@@ -359,6 +1045,12 @@  static int dxv_decode(AVCodecContext *avctx, void *data,
 
     bytestream2_init(gbc, avpkt->data, avpkt->size);
 
+    ctx->texture_block_h = 4;
+    ctx->texture_block_w = 4;
+
+    avctx->pix_fmt = AV_PIX_FMT_RGBA;
+    avctx->colorspace = AVCOL_SPC_RGB;
+
     tag = bytestream2_get_le32(gbc);
     switch (tag) {
     case MKBETAG('D', 'X', 'T', '1'):
@@ -378,9 +1070,39 @@  static int dxv_decode(AVCodecContext *avctx, void *data,
         msgtext = "DXT5";
         break;
     case MKBETAG('Y', 'C', 'G', '6'):
+        decompress_tex = dxv_decompress_ycg6;
+        ctx->tex_funct_planar[0] = yo_block;
+        ctx->tex_funct_planar[1] = cocg_block;
+        ctx->tex_rat   = 8;
+        ctx->tex_step  = 32;
+        ctx->ctex_step = 16;
+        msgcomp = "YOCOCG6";
+        msgtext = "YCG6";
+        ctx->ctex_size = avctx->coded_width * avctx->coded_height / 4;
+        ctx->texture_block_h = 4;
+        ctx->texture_block_w = 16;
+        ctx->ctexture_block_h = 4;
+        ctx->ctexture_block_w = 4;
+        avctx->pix_fmt = AV_PIX_FMT_YUV420P;
+        avctx->colorspace = AVCOL_SPC_YCOCG;
+        break;
     case MKBETAG('Y', 'G', '1', '0'):
-        avpriv_report_missing_feature(avctx, "Tag 0x%08"PRIX32, tag);
-        return AVERROR_PATCHWELCOME;
+        decompress_tex = dxv_decompress_yg10;
+        ctx->tex_funct_planar[0] = yao_block;
+        ctx->tex_funct_planar[1] = cocg_block;
+        ctx->tex_rat   = 4;
+        ctx->tex_step  = 64;
+        ctx->ctex_step = 16;
+        msgcomp = "YAOCOCG10";
+        msgtext = "YG10";
+        ctx->ctex_size = avctx->coded_width * avctx->coded_height / 4;
+        ctx->texture_block_h = 4;
+        ctx->texture_block_w = 16;
+        ctx->ctexture_block_h = 4;
+        ctx->ctexture_block_w = 4;
+        avctx->pix_fmt = AV_PIX_FMT_YUVA420P;
+        avctx->colorspace = AVCOL_SPC_YCOCG;
+        break;
     default:
         /* Old version does not have a real header, just size and type. */
         size = tag & 0x00FFFFFF;
@@ -413,6 +1135,10 @@  static int dxv_decode(AVCodecContext *avctx, void *data,
         break;
     }
 
+    ctx->slice_count = av_clip(avctx->thread_count, 1,
+                               avctx->coded_height / FFMAX(ctx->texture_block_h,
+                                                           ctx->ctexture_block_h));
+
     /* New header is 12 bytes long. */
     if (!old_type) {
         version_major = bytestream2_get_byte(gbc) - 1;
@@ -440,10 +1166,28 @@  static int dxv_decode(AVCodecContext *avctx, void *data,
     }
 
     ctx->tex_size = avctx->coded_width * avctx->coded_height * 4 / ctx->tex_rat;
-    ret = av_reallocp(&ctx->tex_data, ctx->tex_size);
+    ret = av_reallocp(&ctx->tex_data, ctx->tex_size + AV_INPUT_BUFFER_PADDING_SIZE);
     if (ret < 0)
         return ret;
 
+    if (ctx->ctex_size) {
+        int i;
+
+        ctx->op_size[0] = avctx->coded_width * avctx->coded_height / 16;
+        ctx->op_size[1] = avctx->coded_width * avctx->coded_height / 32;
+        ctx->op_size[2] = avctx->coded_width * avctx->coded_height / 32;
+        ctx->op_size[3] = avctx->coded_width * avctx->coded_height / 16;
+
+        ret = av_reallocp(&ctx->ctex_data, ctx->ctex_size + AV_INPUT_BUFFER_PADDING_SIZE);
+        if (ret < 0)
+            return ret;
+        for (i = 0; i < 4; i++) {
+            ret = av_reallocp(&ctx->op_data[i], ctx->op_size[i]);
+            if (ret < 0)
+                return ret;
+        }
+    }
+
     /* Decompress texture out of the intermediate compression. */
     ret = decompress_tex(avctx);
     if (ret < 0)
@@ -482,10 +1226,6 @@  static int dxv_init(AVCodecContext *avctx)
     avctx->coded_height = FFALIGN(avctx->height, 16);
 
     ff_texturedsp_init(&ctx->texdsp);
-    avctx->pix_fmt = AV_PIX_FMT_RGBA;
-
-    ctx->slice_count = av_clip(avctx->thread_count, 1,
-                               avctx->coded_height / TEXTURE_BLOCK_H);
 
     return 0;
 }
@@ -495,6 +1235,11 @@  static int dxv_close(AVCodecContext *avctx)
     DXVContext *ctx = avctx->priv_data;
 
     av_freep(&ctx->tex_data);
+    av_freep(&ctx->ctex_data);
+    av_freep(&ctx->op_data[0]);
+    av_freep(&ctx->op_data[1]);
+    av_freep(&ctx->op_data[2]);
+    av_freep(&ctx->op_data[3]);
 
     return 0;
 }