diff mbox

[FFmpeg-devel] avcodec/dxv: add support for "high" quality mode

Message ID 20180412103104.8910-1-onemda@gmail.com
State Superseded
Headers show

Commit Message

Paul B Mahol April 12, 2018, 10:31 a.m. UTC
Signed-off-by: Paul B Mahol <onemda@gmail.com>
---
 libavcodec/dxv.c | 1006 ++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 978 insertions(+), 28 deletions(-)

Comments

Rostislav Pehlivanov April 12, 2018, 12:57 p.m. UTC | #1
On 12 April 2018 at 11:31, Paul B Mahol <onemda@gmail.com> wrote:

> Signed-off-by: Paul B Mahol <onemda@gmail.com>
> ---
>  libavcodec/dxv.c | 1006 ++++++++++++++++++++++++++++++
> ++++++++++++++++++++++--
>  1 file changed, 978 insertions(+), 28 deletions(-)
>
>
> +
> +static av_always_inline uint32_t yacocg2rgba(int yo, int co, int cg, int
> a)
> +{
> +    int r, g, b;
> +
> +    co = co - 127;
> +    cg = cg - 127;
> +
> +    r = av_clip_uint8(yo + co - cg);
> +    g = av_clip_uint8(yo + cg);
> +    b = av_clip_uint8(yo - co - cg);
> +
> +    return (a << 24) | (b << 16) | (g << 8) | (r);
> +}


Cinepak all over again? We're not doing and are never going to do
conversion inside decoders. Output it as YCoCg by writing directly to the
data planes. We support it. It doesn't matter if it's perfectly mappable to
RGB, if you allow for 2 more bits of precision.

On 12 April 2018 at 11:31, Paul B Mahol <onemda@gmail.com> wrote:

> Signed-off-by: Paul B Mahol <onemda@gmail.com>
> ---
>  libavcodec/dxv.c | 1006 ++++++++++++++++++++++++++++++
> ++++++++++++++++++++++--
>  1 file changed, 978 insertions(+), 28 deletions(-)
>
> diff --git a/libavcodec/dxv.c b/libavcodec/dxv.c
> index 529e211258..6308163735 100644
> --- a/libavcodec/dxv.c
> +++ b/libavcodec/dxv.c
> @@ -1,6 +1,7 @@
>  /*
>   * Resolume DXV decoder
>   * Copyright (C) 2015 Vittorio Giovara <vittorio.giovara@gmail.com>
> + * Copyright (C) 2018 Paul B Mahol
>   *
>   * This file is part of FFmpeg.
>   *
> @@ -23,6 +24,7 @@
>
>  #include "libavutil/imgutils.h"
>
> +#include "mathops.h"
>  #include "avcodec.h"
>  #include "bytestream.h"
>  #include "internal.h"
> @@ -34,53 +36,250 @@ typedef struct DXVContext {
>      TextureDSPContext texdsp;
>      GetByteContext gbc;
>
> -    uint8_t *tex_data;  // Compressed texture
> -    int tex_rat;        // Compression ratio
> -    int tex_step;       // Distance between blocks
> -    int64_t tex_size;   // Texture size
> +    uint8_t *tex_data;   // Compressed texture
> +    uint8_t *ctex_data;  // Compressed texture
> +    int tex_rat;         // Compression ratio
> +    int tex_step;        // Distance between blocks
> +    int ctex_step;       // Distance between blocks
> +    int64_t tex_size;    // Texture size
> +    int64_t ctex_size;   // Texture size
>
>      /* Optimal number of slices for parallel decoding */
>      int slice_count;
>
> +    uint8_t *op_data[4]; // Opcodes
> +    int64_t op_size[4];  // Opcodes size
> +
> +    int texture_block_w;
> +    int texture_block_h;
> +
> +    int ctexture_block_w;
> +    int ctexture_block_h;
> +
>      /* Pointer to the selected decompression function */
>      int (*tex_funct)(uint8_t *dst, ptrdiff_t stride, const uint8_t
> *block);
> +    int (*ctex_funct)(uint8_t *dst, ptrdiff_t stride, const uint8_t
> *block);
>  } DXVContext;
>
> +static void decompress_indices(uint8_t *dst, const uint8_t *src)
> +{
> +    int block, i;
> +
> +    for (block = 0; block < 2; block++) {
> +        int tmp = AV_RL24(src);
> +
> +        /* Unpack 8x3 bit from last 3 byte block */
> +        for (i = 0; i < 8; i++)
> +            dst[i] = (tmp >> (i * 3)) & 0x7;
> +
> +        src += 3;
> +        dst += 8;
> +    }
> +}
> +
> +static int extract_component(int yo0, int yo1, int code)
> +{
> +    int yo;
> +
> +    if (yo0 == yo1) {
> +        yo = yo0;
> +    } else if (code == 0) {
> +        yo = yo0;
> +    } else if (code == 1) {
> +        yo = yo1;
> +    } else {
> +        if (yo0 > yo1) {
> +            yo = (uint8_t) (((8 - code) * yo0 +
> +                             (code - 1) * yo1) / 7);
> +        } else {
> +            if (code == 6) {
> +                yo = 0;
> +            } else if (code == 7) {
> +                yo = 255;
> +            } else {
> +                yo = (uint8_t) (((6 - code) * yo0 +
> +                                 (code - 1) * yo1) / 5);
> +            }
> +        }
> +    }
> +
> +    return yo;
> +}
> +
> +static av_always_inline uint32_t yacocg2rgba(int yo, int co, int cg, int
> a)
> +{
> +    int r, g, b;
> +
> +    co = co - 127;
> +    cg = cg - 127;
> +
> +    r = av_clip_uint8(yo + co - cg);
> +    g = av_clip_uint8(yo + cg);
> +    b = av_clip_uint8(yo - co - cg);
> +
> +    return (a << 24) | (b << 16) | (g << 8) | (r);
> +}
> +
> +static int cocg_block(uint8_t *dst, ptrdiff_t stride,
> +                      const uint8_t *block)
> +{
> +    uint8_t co_indices[16];
> +    uint8_t cg_indices[16];
> +    uint8_t co0 = *(block);
> +    uint8_t co1 = *(block + 1);
> +    uint8_t cg0 = *(block + 8);
> +    uint8_t cg1 = *(block + 9);
> +    int x, y;
> +
> +    decompress_indices(co_indices, block + 2);
> +    decompress_indices(cg_indices, block + 10);
> +
> +    for (y = 0; y < 4; y++) {
> +        for (x = 0; x < 4; x++) {
> +            int co_code = co_indices[x + y * 4];
> +            int cg_code = cg_indices[x + y * 4];
> +            uint8_t co, cg;
> +
> +            co = extract_component(co0, co1, co_code);
> +            cg = extract_component(cg0, cg1, cg_code);
> +
> +            dst[x * 8 + 1] = co;
> +            dst[x * 8 + 2] = cg;
> +            dst[x * 8 + 5] = co;
> +            dst[x * 8 + 6] = cg;
> +            dst[x * 8 + stride + 1] = co;
> +            dst[x * 8 + stride + 2] = cg;
> +            dst[x * 8 + stride + 5] = co;
> +            dst[x * 8 + stride + 6] = cg;
> +        }
> +        dst += 2 * stride;
> +    }
> +
> +    return 16;
> +}
> +
> +static void yo_subblock(uint8_t *dst, uint8_t *yo_indices,
> +                        ptrdiff_t stride, const uint8_t *block)
> +{
> +    uint8_t yo0 = *(block);
> +    uint8_t yo1 = *(block + 1);
> +    int x, y;
> +
> +    decompress_indices(yo_indices, block + 2);
> +
> +    for (y = 0; y < 4; y++) {
> +        for (x = 0; x < 4; x++) {
> +            int yo_code = yo_indices[x + y * 4];
> +            uint8_t yo;
> +
> +            yo = extract_component(yo0, yo1, yo_code);
> +
> +            AV_WL32(dst + x * 4, 255u << 24 | yo);
> +        }
> +        dst += stride;
> +    }
> +}
> +
> +static int yo_block(uint8_t *dst, ptrdiff_t stride,
> +                    const uint8_t *block)
> +{
> +    uint8_t yo_indices[16];
> +
> +    yo_subblock(dst,      yo_indices, stride, block);
> +    yo_subblock(dst + 16, yo_indices, stride, block + 8);
> +    yo_subblock(dst + 32, yo_indices, stride, block + 16);
> +    yo_subblock(dst + 48, yo_indices, stride, block + 24);
> +
> +    return 32;
> +}
> +
> +static void a_subblock(uint8_t *dst, uint8_t *a_indices,
> +                       ptrdiff_t stride, const uint8_t *block)
> +{
> +    uint8_t a0 = *(block);
> +    uint8_t a1 = *(block + 1);
> +    int x, y;
> +
> +    decompress_indices(a_indices, block + 2);
> +
> +    for (y = 0; y < 4; y++) {
> +        for (x = 0; x < 4; x++) {
> +            int a_code = a_indices[x + y * 4];
> +            uint8_t a;
> +
> +            a = extract_component(a0, a1, a_code);
> +            dst[x * 4 + 3] = a;
> +        }
> +        dst += stride;
> +    }
> +}
> +
> +static int yao_block(uint8_t *dst, ptrdiff_t stride,
> +                    const uint8_t *block)
> +{
> +    uint8_t yo_indices[16];
> +    uint8_t a_indices[16];
> +
> +    yo_subblock(dst,      yo_indices, stride, block);
> +    a_subblock(dst,       a_indices,  stride, block + 8);
> +    yo_subblock(dst + 16, yo_indices, stride, block + 16);
> +    a_subblock(dst  + 16, a_indices,  stride, block + 24);
> +    yo_subblock(dst + 32, yo_indices, stride, block + 32);
> +    a_subblock(dst  + 32, a_indices,  stride, block + 40);
> +    yo_subblock(dst + 48, yo_indices, stride, block + 48);
> +    a_subblock(dst  + 48, a_indices,  stride, block + 56);
> +
> +    return 64;
> +}
> +
>  static int decompress_texture_thread(AVCodecContext *avctx, void *arg,
>                                       int slice, int thread_nb)
>  {
>      DXVContext *ctx = avctx->priv_data;
>      AVFrame *frame = arg;
>      const uint8_t *d = ctx->tex_data;
> -    int w_block = avctx->coded_width / TEXTURE_BLOCK_W;
> -    int h_block = avctx->coded_height / TEXTURE_BLOCK_H;
> +    const uint8_t *c = ctx->ctex_data;
> +    int w_block = avctx->coded_width / ctx->texture_block_w;
> +    int h_block = avctx->coded_height / ctx->texture_block_h;
>      int x, y;
>      int start_slice, end_slice;
> -    int base_blocks_per_slice = h_block / ctx->slice_count;
> -    int remainder_blocks = h_block % ctx->slice_count;
> -
> -    /* When the frame height (in blocks) doesn't divide evenly between the
> -     * number of slices, spread the remaining blocks evenly between the
> first
> -     * operations */
> -    start_slice = slice * base_blocks_per_slice;
> -    /* Add any extra blocks (one per slice) that have been added
> -     * before this slice */
> -    start_slice += FFMIN(slice, remainder_blocks);
> -
> -    end_slice = start_slice + base_blocks_per_slice;
> -    /* Add an extra block if there are remainder blocks to be accounted
> for */
> -    if (slice < remainder_blocks)
> -        end_slice++;
> +
> +    start_slice = h_block * slice / ctx->slice_count;
> +    end_slice = h_block * (slice + 1) / ctx->slice_count;
>
>      for (y = start_slice; y < end_slice; y++) {
> -        uint8_t *p = frame->data[0] + y * frame->linesize[0] *
> TEXTURE_BLOCK_H;
> +        uint8_t *p = frame->data[0] + y * frame->linesize[0] *
> ctx->texture_block_h;
>          int off  = y * w_block;
>          for (x = 0; x < w_block; x++) {
> -            ctx->tex_funct(p + x * 16, frame->linesize[0],
> +            ctx->tex_funct(p + x * 4 * ctx->texture_block_w,
> frame->linesize[0],
>                             d + (off + x) * ctx->tex_step);
>          }
>      }
>
> +    if (ctx->ctex_funct) {
> +        w_block = avctx->coded_width / ctx->ctexture_block_w;
> +        h_block = avctx->coded_height / ctx->ctexture_block_h;
> +
> +        start_slice = h_block * slice / ctx->slice_count;
> +        end_slice = h_block * (slice + 1) / ctx->slice_count;
> +
> +        for (y = start_slice; y < end_slice; y++) {
> +            uint8_t *p = frame->data[0] + y * frame->linesize[0] *
> ctx->ctexture_block_h;
> +            int off  = y * w_block;
> +            for (x = 0; x < w_block; x++) {
> +                ctx->ctex_funct(p + x * 4 * ctx->ctexture_block_w,
> frame->linesize[0],
> +                                c + (off + x) * ctx->ctex_step);
> +            }
> +        }
> +        for (y = start_slice * ctx->ctexture_block_h; y < FFMIN(end_slice
> * ctx->ctexture_block_h, avctx->height); y++) {
> +            uint8_t *p8 = frame->data[0] + y * frame->linesize[0];
> +            uint32_t *p = (uint32_t *)p8;
> +            for (x = 0; x < avctx->width; x++) {
> +                p[x] = yacocg2rgba(p8[4 * x], p8[4 * x + 1], p8[4 * x +
> 2], p8[4 * x + 3]);
> +            }
> +        }
> +    }
> +
>      return 0;
>  }
>
> @@ -169,6 +368,705 @@ static int dxv_decompress_dxt1(AVCodecContext
> *avctx)
>      return 0;
>  }
>
> +typedef struct OpcodeTable {
> +    int16_t next;
> +    uint8_t val1;
> +    uint8_t val2;
> +} OpcodeTable;
> +
> +static int fill_ltable(GetByteContext *gb, uint32_t *table, int
> *nb_elements)
> +{
> +    unsigned half = 512, bits = 1023, left = 1024, input, mask;
> +    int value, counter = 0, rshift = 10, lshift = 30;
> +
> +    mask = bytestream2_get_le32(gb) >> 2;
> +    while (left) {
> +        if (bytestream2_get_bytes_left(gb) < 0 || counter >= 256)
> +            return AVERROR_INVALIDDATA;
> +
> +        value = bits & mask;
> +        left -= bits & mask;
> +        mask >>= rshift;
> +        lshift -= rshift;
> +        table[counter++] = value;
> +        if (lshift < 16) {
> +            input = bytestream2_get_le16(gb);
> +            mask += input << lshift;
> +            lshift += 16;
> +        }
> +        if (left < half) {
> +            half >>= 1;
> +            bits >>= 1;
> +            rshift--;
> +        }
> +    }
> +
> +    for (; !table[counter - 1]; counter--)
> +        if (counter <= 0)
> +            return AVERROR_INVALIDDATA;
> +
> +    *nb_elements = counter;
> +
> +    if (counter < 256)
> +        memset(&table[counter], 0, 4 * (256 - counter));
> +
> +    if (lshift >= 16)
> +        bytestream2_seek(gb, -2, SEEK_CUR);
> +
> +    return 0;
> +}
> +
> +static int fill_optable(unsigned *table0, OpcodeTable *table1, int
> nb_elements)
> +{
> +    unsigned table2[256];
> +    unsigned x = 0;
> +    int val0, val1, i, j = 2, k = 0;
> +
> +    table2[0] = table0[0];
> +    for (i = 0; i < nb_elements - 1; i++, table2[i] = val0) {
> +        val0 = table0[i + 1] + table2[i];
> +    }
> +
> +    if (!table2[0]) {
> +        do {
> +            k++;
> +        } while (!table2[k]);
> +    }
> +
> +    j = 2;
> +    for (i = 1024; i > 0; i--) {
> +        for (table1[x].val1 = k; k < 256 && j > table2[k]; k++)
> +            ;
> +        x = (x - 383) & 0x3FF;
> +        j++;
> +    }
> +
> +    if (nb_elements > 0)
> +        memcpy(&table2[0], table0, 4 * nb_elements);
> +
> +    for (i = 0; i < 1024; i++) {
> +        val0 = table1[i].val1;
> +        val1 = table2[val0];
> +        table2[val0]++;
> +        x = 31 - ff_clz(val1);
> +        if (x > 10)
> +            return AVERROR_INVALIDDATA;
> +        table1[i].val2 = 10 - x;
> +        table1[i].next = (val1 << table1[i].val2) - 1024;
> +    }
> +
> +    return 0;
> +}
> +
> +static int get_opcodes(GetByteContext *gb, uint32_t *table, uint8_t *dst,
> int op_size, int nb_elements)
> +{
> +    OpcodeTable optable[1024];
> +    int sum, x, val, lshift, rshift, ret, size_in_bits, i, idx;
> +    int endoffset, newoffset, offset;
> +    unsigned next;
> +    uint8_t *src = (uint8_t *)gb->buffer;
> +
> +    ret = fill_optable(table, optable, nb_elements);
> +    if (ret < 0)
> +        return ret;
> +
> +    size_in_bits = bytestream2_get_le32(gb);
> +    endoffset = ((size_in_bits + 7) >> 3) - 4;
> +    if (endoffset <= 0 || bytestream2_get_bytes_left(gb) < endoffset)
> +        return AVERROR_INVALIDDATA;
> +
> +    offset = endoffset;
> +    next = AV_RL32(src + endoffset);
> +    rshift = (((size_in_bits & 0xFF) - 1) & 7) + 15;
> +    lshift = 32 - rshift;
> +    idx = (next >> rshift) & 0x3FF;
> +    for (i = 0; i < op_size; i++) {
> +        dst[i] = optable[idx].val1;
> +        val = optable[idx].val2;
> +        sum = val + lshift;
> +        x = (next << lshift) >> 1 >> (31 - val);
> +        newoffset = offset - (sum >> 3);
> +        lshift = sum & 7;
> +        idx = x + optable[idx].next;
> +        offset = newoffset;
> +        next = AV_RL32(src + offset);
> +    }
> +
> +    bytestream2_skip(gb, (size_in_bits + 7 >> 3) - 4);
> +
> +    return 0;
> +}
> +
> +static int dxv_decompress_opcodes(GetByteContext *gb, void *dstp, size_t
> op_size)
> +{
> +    int pos = bytestream2_tell(gb);
> +    int flag = bytestream2_peek_byte(gb);
> +
> +    if ((flag & 3) == 0) {
> +        bytestream2_skip(gb, 1);
> +        bytestream2_get_buffer(gb, dstp, op_size);
> +    } else if ((flag & 3) == 1) {
> +        bytestream2_skip(gb, 1);
> +        memset(dstp, bytestream2_get_byte(gb), op_size);
> +    } else {
> +        uint32_t table[256];
> +        int ret, elements = 0;
> +
> +        ret = fill_ltable(gb, table, &elements);
> +        if (ret < 0)
> +            return ret;
> +        ret = get_opcodes(gb, table, dstp, op_size, elements);
> +        if (ret < 0)
> +            return ret;
> +    }
> +    return bytestream2_tell(gb) - pos;
> +}
> +
> +static int dxv_decompress_cgo(DXVContext *ctx, GetByteContext *gb,
> +                              uint8_t *tex_data, int tex_size,
> +                              uint8_t *op_data, int *oindex,
> +                              uint8_t **dstp, int *statep,
> +                              uint8_t **tab0, uint8_t **tab1)
> +{
> +    uint8_t *dst = *dstp;
> +    uint8_t *tptr0, *tptr1, *tptr3;
> +    int oi = *oindex;
> +    int state = *statep;
> +    int opcode, v, vv;
> +
> +    if (state <= 0) {
> +        opcode = op_data[oi++];
> +        if (opcode) {
> +            switch (opcode) {
> +            case 1:
> +                AV_WL32(dst, AV_RL32(dst - 16));
> +                AV_WL32(dst + 4, AV_RL32(dst - 12));
> +                break;
> +            case 2:
> +                vv = 16 * ((uint16_t)bytestream2_get_le16(gb) + 1);
> +                if (vv < 0 || vv > dst - tex_data)
> +                    return AVERROR_INVALIDDATA;
> +                tptr0 = dst - vv;
> +                v = AV_RL32(tptr0);
> +                AV_WL32(dst, AV_RL32(tptr0));
> +                AV_WL32(dst + 4, AV_RL32(tptr0 + 4));
> +                tab0[0x9E3779B1 * (uint16_t)v >> 24] = dst;
> +                tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] =
> dst + 2;
> +                break;
> +            case 3:
> +                AV_WL32(dst, bytestream2_get_le32(gb));
> +                AV_WL32(dst + 4, bytestream2_get_le32(gb));
> +                tab0[0x9E3779B1 * AV_RL16(dst) >> 24] = dst;
> +                tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] =
> dst + 2;
> +                break;
> +            case 4:
> +                tptr3 = tab1[bytestream2_get_byte(gb)];
> +                AV_WL16(dst, bytestream2_get_le16(gb));
> +                AV_WL16(dst + 2, AV_RL16(tptr3));
> +                dst[4] = tptr3[2];
> +                AV_WL16(dst + 5, bytestream2_get_le16(gb));
> +                dst[7] = bytestream2_get_byte(gb);
> +                tab0[0x9E3779B1 * AV_RL16(dst) >> 24] = dst;
> +                break;
> +            case 5:
> +                tptr3 = tab1[bytestream2_get_byte(gb)];
> +                if (!tptr3)
> +                    return AVERROR_INVALIDDATA;
> +                AV_WL16(dst, bytestream2_get_le16(gb));
> +                AV_WL16(dst + 2, bytestream2_get_le16(gb));
> +                dst[4] = bytestream2_get_byte(gb);
> +                AV_WL16(dst + 5, AV_RL16(tptr3));
> +                dst[7] = tptr3[2];
> +                tab0[0x9E3779B1 * AV_RL16(dst) >> 24] = dst;
> +                tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] =
> dst + 2;
> +                break;
> +            case 6:
> +                tptr0 = tab1[bytestream2_get_byte(gb)];
> +                if (!tptr0)
> +                    return AVERROR_INVALIDDATA;
> +                tptr1 = tab1[bytestream2_get_byte(gb)];
> +                if (!tptr1)
> +                    return AVERROR_INVALIDDATA;
> +                AV_WL16(dst, bytestream2_get_le16(gb));
> +                AV_WL16(dst + 2, AV_RL16(tptr0));
> +                dst[4] = tptr0[2];
> +                AV_WL16(dst + 5, AV_RL16(tptr1));
> +                dst[7] = tptr1[2];
> +                tab0[0x9E3779B1 * AV_RL16(dst) >> 24] = dst;
> +                break;
> +            case 7:
> +                v = 16 * ((uint16_t)bytestream2_get_le16(gb) + 1);
> +                if (v < 0 || v > dst - tex_data)
> +                    return AVERROR_INVALIDDATA;
> +                tptr0 = dst - v;
> +                AV_WL16(dst, bytestream2_get_le16(gb));
> +                AV_WL16(dst + 2, AV_RL16(tptr0 + 2));
> +                AV_WL32(dst + 4, AV_RL32(tptr0 + 4));
> +                tab0[0x9E3779B1 * AV_RL16(dst) >> 24] = dst;
> +                tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] =
> dst + 2;
> +                break;
> +            case 8:
> +                tptr1 = tab0[bytestream2_get_byte(gb)];
> +                if (!tptr1)
> +                    return AVERROR_INVALIDDATA;
> +                AV_WL16(dst, AV_RL16(tptr1));
> +                AV_WL16(dst + 2, bytestream2_get_le16(gb));
> +                AV_WL32(dst + 4, bytestream2_get_le32(gb));
> +                tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] =
> dst + 2;
> +                break;
> +            case 9:
> +                tptr1 = tab0[bytestream2_get_byte(gb)];
> +                if (!tptr1)
> +                    return AVERROR_INVALIDDATA;
> +                tptr3 = tab1[bytestream2_get_byte(gb)];
> +                if (!tptr3)
> +                    return AVERROR_INVALIDDATA;
> +                AV_WL16(dst, AV_RL16(tptr1));
> +                AV_WL16(dst + 2, AV_RL16(tptr3));
> +                dst[4] = tptr3[2];
> +                AV_WL16(dst + 5, bytestream2_get_le16(gb));
> +                dst[7] = bytestream2_get_byte(gb);
> +                tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] =
> dst + 2;
> +                break;
> +            case 10:
> +                tptr1 = tab0[bytestream2_get_byte(gb)];
> +                if (!tptr1)
> +                    return AVERROR_INVALIDDATA;
> +                tptr3 = tab1[bytestream2_get_byte(gb)];
> +                if (!tptr3)
> +                    return AVERROR_INVALIDDATA;
> +                AV_WL16(dst, AV_RL16(tptr1));
> +                AV_WL16(dst + 2, bytestream2_get_le16(gb));
> +                dst[4] = bytestream2_get_byte(gb);
> +                AV_WL16(dst + 5, AV_RL16(tptr3));
> +                dst[7] = tptr3[2];
> +                tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] =
> dst + 2;
> +                break;
> +            case 11:
> +                tptr0 = tab0[bytestream2_get_byte(gb)];
> +                if (!tptr0)
> +                    return AVERROR_INVALIDDATA;
> +                tptr3 = tab1[bytestream2_get_byte(gb)];
> +                if (!tptr3)
> +                    return AVERROR_INVALIDDATA;
> +                tptr1 = tab1[bytestream2_get_byte(gb)];
> +                if (!tptr1)
> +                    return AVERROR_INVALIDDATA;
> +                AV_WL16(dst, AV_RL16(tptr0));
> +                AV_WL16(dst + 2, AV_RL16(tptr3));
> +                dst[4] = tptr3[2];
> +                AV_WL16(dst + 5, AV_RL16(tptr1));
> +                dst[7] = tptr1[2];
> +                break;
> +            case 12:
> +                tptr1 = tab0[bytestream2_get_byte(gb)];
> +                if (!tptr1)
> +                    return AVERROR_INVALIDDATA;
> +                v = 16 * ((uint16_t)bytestream2_get_le16(gb) + 1);
> +                if (v < 0 || v > dst - tex_data)
> +                    return AVERROR_INVALIDDATA;
> +                tptr0 = dst - v;
> +                AV_WL16(dst, AV_RL16(tptr1));
> +                AV_WL16(dst + 2, AV_RL16(tptr0 + 2));
> +                AV_WL32(dst + 4, AV_RL32(tptr0 + 4));
> +                tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] =
> dst + 2;
> +                break;
> +            case 13:
> +                AV_WL16(dst, AV_RL16(dst - 16));
> +                AV_WL16(dst + 2, bytestream2_get_le16(gb));
> +                AV_WL32(dst + 4, bytestream2_get_le32(gb));
> +                tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] =
> dst + 2;
> +                break;
> +            case 14:
> +                tptr3 = tab1[bytestream2_get_byte(gb)];
> +                if (!tptr3)
> +                    return AVERROR_INVALIDDATA;
> +                AV_WL16(dst, AV_RL16(dst - 16));
> +                AV_WL16(dst + 2, AV_RL16(tptr3));
> +                dst[4] = tptr3[2];
> +                AV_WL16(dst + 5, bytestream2_get_le16(gb));
> +                dst[7] = bytestream2_get_byte(gb);
> +                tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] =
> dst + 2;
> +                break;
> +            case 15:
> +                tptr3 = tab1[bytestream2_get_byte(gb)];
> +                if (!tptr3)
> +                    return AVERROR_INVALIDDATA;
> +                AV_WL16(dst, AV_RL16(dst - 16));
> +                AV_WL16(dst + 2, bytestream2_get_le16(gb));
> +                dst[4] = bytestream2_get_byte(gb);
> +                AV_WL16(dst + 5, AV_RL16(tptr3));
> +                dst[7] = tptr3[2];
> +                tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] =
> dst + 2;
> +                break;
> +            case 16:
> +                tptr3 = tab1[bytestream2_get_byte(gb)];
> +                if (!tptr3)
> +                    return AVERROR_INVALIDDATA;
> +                tptr1 = tab1[bytestream2_get_byte(gb)];
> +                if (!tptr1)
> +                    return AVERROR_INVALIDDATA;
> +                AV_WL16(dst, AV_RL16(dst - 16));
> +                AV_WL16(dst + 2, AV_RL16(tptr3));
> +                dst[4] = tptr3[2];
> +                AV_WL16(dst + 5, AV_RL16(tptr1));
> +                dst[7] = tptr1[2];
> +                break;
> +            case 17:
> +                v = 16 * ((uint16_t)bytestream2_get_le16(gb) + 1);
> +                if (v < 0 || v > dst - tex_data)
> +                    return AVERROR_INVALIDDATA;
> +                AV_WL16(dst, AV_RL16(dst - 16));
> +                AV_WL16(dst + 2, AV_RL16(&dst[-v + 2]));
> +                AV_WL32(dst + 4, AV_RL32(&dst[-v + 4]));
> +                tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] =
> dst + 2;
> +                break;
> +            default:
> +                break;
> +            }
> +        } else {
> +            v = bytestream2_get_byte(gb);
> +            if (v == 255) {
> +                do {
> +                    if (bytestream2_get_bytes_left(gb) <= 0)
> +                        return AVERROR_INVALIDDATA;
> +                    opcode = bytestream2_get_le16(gb);
> +                    v += opcode;
> +                } while (opcode == 0xFFFF);
> +            }
> +            AV_WL32(dst, AV_RL32(dst - 16));
> +            AV_WL32(dst + 4, AV_RL32(dst - 12));
> +            state = v + 3;
> +        }
> +    } else {
> +        AV_WL32(dst, AV_RL32(dst - 16));
> +        AV_WL32(dst + 4, AV_RL32(dst - 12));
> +        state--;
> +    }
> +    dst += 8;
> +
> +    *oindex = oi;
> +    *dstp = dst;
> +    *statep = state;
> +
> +    return 0;
> +}
> +
> +static int dxv_decompress_cocg(DXVContext *ctx, GetByteContext *gb,
> +                               uint8_t *tex_data, int tex_size,
> +                               uint8_t *op_data0, uint8_t *op_data1)
> +{
> +    uint8_t *dst, *tab2[256], *tab0[256], *tab3[256], *tab1[256];
> +    int op_offset = bytestream2_get_le32(gb);
> +    int op_size0 = bytestream2_get_le32(gb);
> +    int op_size1 = bytestream2_get_le32(gb);
> +    int data_start = bytestream2_tell(gb);
> +    int skip0, skip1, oi0 = 0, oi1 = 0;
> +    int ret, state0 = 0, state1 = 0;
> +
> +    dst = tex_data;
> +    bytestream2_skip(gb, op_offset - 12);
> +    skip0 = dxv_decompress_opcodes(gb, op_data0, op_size0);
> +    if (skip0 < 0)
> +        return skip0;
> +    bytestream2_seek(gb, data_start + op_offset + skip0 - 12, SEEK_SET);
> +    skip1 = dxv_decompress_opcodes(gb, op_data1, op_size1);
> +    if (skip1 < 0)
> +        return skip1;
> +    bytestream2_seek(gb, data_start, SEEK_SET);
> +
> +    AV_WL32(dst, bytestream2_get_le32(gb));
> +    AV_WL32(dst + 4, bytestream2_get_le32(gb));
> +    AV_WL32(dst + 8, bytestream2_get_le32(gb));
> +    AV_WL32(dst + 12, bytestream2_get_le32(gb));
> +
> +    tab0[0x9E3779B1 * AV_RL16(dst) >> 24] = dst;
> +    tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFF) >> 24] = dst + 2;
> +    tab2[0x9E3779B1 * AV_RL16(dst + 8) >> 24] = dst + 8;
> +    tab3[0x9E3779B1 * (AV_RL32(dst + 10) & 0xFFFFFF) >> 24] = dst + 10;
> +    dst += 16;
> +    while (dst + 10 < tex_data + tex_size) {
> +        ret = dxv_decompress_cgo(ctx, gb, tex_data, tex_size, op_data0,
> &oi0, &dst, &state0, tab0, tab1);
> +        if (ret < 0)
> +            return ret;
> +        ret = dxv_decompress_cgo(ctx, gb, tex_data, tex_size, op_data1,
> &oi1, &dst, &state1, tab2, tab3);
> +        if (ret < 0)
> +            return ret;
> +    }
> +
> +    bytestream2_seek(gb, data_start + op_offset + skip0 + skip1 - 12,
> SEEK_SET);
> +
> +    return 0;
> +}
> +
> +static int dxv_decompress_yo(DXVContext *ctx, GetByteContext *gb,
> +                             uint8_t *tex_data, int tex_size, uint8_t
> *op_data)
> +{
> +    int op_offset  = bytestream2_get_le32(gb);
> +    int op_size    = bytestream2_get_le32(gb);
> +    int data_start = bytestream2_tell(gb);
> +    uint8_t *dst, *tptr0, *tptr1, *table0[256], *table1[256];
> +    int opcode, skip, oi = 0, v, vv;
> +
> +    dst = tex_data;
> +    bytestream2_skip(gb, op_offset - 8);
> +    skip = dxv_decompress_opcodes(gb, op_data, op_size);
> +    if (skip < 0)
> +        return skip;
> +    bytestream2_seek(gb, data_start, SEEK_SET);
> +
> +    v = bytestream2_get_le32(gb);
> +    AV_WL32(dst, v);
> +    vv = bytestream2_get_le32(gb);
> +    table0[0x9E3779B1 * (uint16_t)v >> 24] = dst;
> +    AV_WL32(dst + 4, vv);
> +    table1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFF) >> 24] = dst + 2;
> +    dst += 8;
> +
> +    while (dst < tex_data + tex_size) {
> +        opcode = op_data[oi++];
> +        if (opcode) {
> +            switch (opcode) {
> +            case 1:
> +                AV_WL32(dst, AV_RL32(dst - 8));
> +                AV_WL32(dst + 4, AV_RL32(dst - 4));
> +                break;
> +            case 2:
> +                vv = 8 * (uint16_t)bytestream2_get_le16(gb) + 8;
> +                if (vv > dst - tex_data)
> +                    return AVERROR_INVALIDDATA;
> +                tptr0 = dst - vv;
> +                v = AV_RL32(tptr0);
> +                AV_WL32(dst, AV_RL32(tptr0));
> +                AV_WL32(dst + 4, AV_RL32(tptr0 + 4));
> +                table0[0x9E3779B1 * (uint16_t)v >> 24] = dst;
> +                table1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFU) >> 24]
> = dst + 2;
> +                break;
> +            case 3:
> +                v = bytestream2_get_le32(gb);
> +                AV_WL32(dst, v);
> +                AV_WL32(dst + 4, bytestream2_get_le32(gb));
> +                vv = AV_RL32(dst + 2);
> +                table0[0x9E3779B1 * (uint16_t)v >> 24] = dst;
> +                table1[0x9E3779B1 * (vv & 0xFFFFFF) >> 24] = dst + 2;
> +                break;
> +            case 4:
> +                tptr1 = table1[bytestream2_get_byte(gb)];
> +                if (!tptr1)
> +                    return AVERROR_INVALIDDATA;
> +                AV_WL16(dst, bytestream2_get_le16(gb));
> +                AV_WL16(dst + 2, AV_RL16(tptr1));
> +                dst[4] = tptr1[2];
> +                AV_WL16(dst + 5, bytestream2_get_le16(gb));
> +                dst[7] = bytestream2_get_byte(gb);
> +                v = 0x9E3779B1 * AV_RL16(dst) >> 24;
> +                table1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24]
> = dst + 2;
> +                table0[v] = dst;
> +                break;
> +            case 5:
> +                tptr1 = table1[bytestream2_get_byte(gb)];
> +                if (!tptr1)
> +                    return AVERROR_INVALIDDATA;
> +                AV_WL16(dst, bytestream2_get_le16(gb));
> +                AV_WL16(dst + 2, bytestream2_get_le16(gb));
> +                dst[4] = bytestream2_get_byte(gb);
> +                AV_WL16(dst + 5, AV_RL16(tptr1));
> +                dst[7] = tptr1[2];
> +                table0[0x9E3779B1 * AV_RL16(dst) >> 24] = dst;
> +                table1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24]
> = dst + 2;
> +                break;
> +            case 6:
> +                tptr0 = table1[bytestream2_get_byte(gb)];
> +                if (!tptr0)
> +                    return AVERROR_INVALIDDATA;
> +                tptr1 = table1[bytestream2_get_byte(gb)];
> +                if (!tptr1)
> +                    return AVERROR_INVALIDDATA;
> +                AV_WL16(dst, bytestream2_get_le16(gb));
> +                AV_WL16(dst + 2, AV_RL16(tptr0));
> +                dst[4] = tptr0[2];
> +                AV_WL16(dst + 5, AV_RL16(tptr1));
> +                dst[7] = tptr1[2];
> +                table0[0x9E3779B1 * AV_RL16(dst) >> 24] = dst;
> +                break;
> +            case 7:
> +                v = 8 * (uint16_t)bytestream2_get_le16(gb) + 8;
> +                if (v > dst - tex_data)
> +                    return AVERROR_INVALIDDATA;
> +                tptr0 = dst - v;
> +                AV_WL16(dst, bytestream2_get_le16(gb));
> +                AV_WL16(dst + 2, AV_RL16(tptr0 + 2));
> +                AV_WL32(dst + 4, AV_RL32(tptr0 + 4));
> +                table0[0x9E3779B1 * AV_RL16(dst) >> 24] = dst;
> +                table1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24]
> = dst + 2;
> +                break;
> +            case 8:
> +                tptr0 = table0[bytestream2_get_byte(gb)];
> +                if (!tptr0)
> +                    return AVERROR_INVALIDDATA;
> +                AV_WL16(dst, AV_RL16(tptr0));
> +                AV_WL16(dst + 2, bytestream2_get_le16(gb));
> +                AV_WL32(dst + 4, bytestream2_get_le32(gb));
> +                table1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24]
> = dst + 2;
> +                break;
> +            case 9:
> +                tptr0 = table0[bytestream2_get_byte(gb)];
> +                if (!tptr0)
> +                    return AVERROR_INVALIDDATA;
> +                AV_WL16(dst, AV_RL16(tptr0));
> +                tptr1 = table1[bytestream2_get_byte(gb)];
> +                if (!tptr1)
> +                    return AVERROR_INVALIDDATA;
> +                AV_WL16(dst + 2, AV_RL16(tptr1));
> +                dst[4] = tptr1[2];
> +                AV_WL16(dst + 5, bytestream2_get_le16(gb));
> +                dst[7] = bytestream2_get_byte(gb);
> +                table1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24]
> = dst + 2;
> +                break;
> +            case 10:
> +                tptr0 = table0[bytestream2_get_byte(gb)];
> +                if (!tptr0)
> +                    return AVERROR_INVALIDDATA;
> +                AV_WL16(dst, AV_RL16(tptr0));
> +                tptr1 = table1[bytestream2_get_byte(gb)];
> +                AV_WL16(dst + 2, bytestream2_get_le16(gb));
> +                dst[4] = bytestream2_get_byte(gb);
> +                AV_WL16(dst + 5, AV_RL16(tptr1));
> +                dst[7] = tptr1[2];
> +                table1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24]
> = dst + 2;
> +                break;
> +            case 11:
> +                tptr0 = table0[bytestream2_get_byte(gb)];
> +                if (!tptr0)
> +                    return AVERROR_INVALIDDATA;
> +                AV_WL16(dst, AV_RL16(tptr0));
> +                tptr0 = table1[bytestream2_get_byte(gb)];
> +                if (!tptr0)
> +                    return AVERROR_INVALIDDATA;
> +                tptr1 = table1[bytestream2_get_byte(gb)];
> +                if (!tptr1)
> +                    return AVERROR_INVALIDDATA;
> +                AV_WL16(dst + 2, AV_RL16(tptr0));
> +                dst[4] = tptr0[2];
> +                AV_WL16(dst + 5, AV_RL16(tptr1));
> +                dst[7] = tptr1[2];
> +                break;
> +            case 12:
> +                tptr0 = table0[bytestream2_get_byte(gb)];
> +                if (!tptr0)
> +                    return AVERROR_INVALIDDATA;
> +                AV_WL16(dst, AV_RL16(tptr0));
> +                tptr0 = dst - (8 * bytestream2_get_le16(gb) + 8);
> +                AV_WL16(dst + 2, AV_RL16(tptr0 + 2));
> +                AV_WL32(dst + 4, AV_RL32(tptr0 + 4));
> +                table1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFF) >> 24]
> = dst + 2;
> +                break;
> +            case 13:
> +                AV_WL16(dst, AV_RL16(dst - 8));
> +                AV_WL16(dst + 2, bytestream2_get_le16(gb));
> +                AV_WL32(dst + 4, bytestream2_get_le32(gb));
> +                table1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFF) >> 24]
> = dst + 2;
> +                break;
> +            case 14:
> +                tptr1 = table1[bytestream2_get_byte(gb)];
> +                if (!tptr1)
> +                    return AVERROR_INVALIDDATA;
> +                AV_WL16(dst, AV_RL16(dst - 8));
> +                AV_WL16(dst + 2, AV_RL16(tptr1));
> +                dst[4] = tptr1[2];
> +                AV_WL16(dst + 5, bytestream2_get_le16(gb));
> +                dst[7] = bytestream2_get_byte(gb);
> +                table1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFF) >> 24]
> = dst + 2;
> +                break;
> +            case 15:
> +                tptr1 = table1[bytestream2_get_byte(gb)];
> +                if (!tptr1)
> +                    return AVERROR_INVALIDDATA;
> +                AV_WL16(dst, AV_RL16(dst - 8));
> +                AV_WL16(dst + 2, bytestream2_get_le16(gb));
> +                dst[4] = bytestream2_get_byte(gb);
> +                AV_WL16(dst + 5, AV_RL16(tptr1));
> +                dst[7] = tptr1[2];
> +                table1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24]
> = dst + 2;
> +                break;
> +            case 16:
> +                tptr0 = table1[bytestream2_get_byte(gb)];
> +                if (!tptr0)
> +                    return AVERROR_INVALIDDATA;
> +                tptr1 = table1[bytestream2_get_byte(gb)];
> +                if (!tptr1)
> +                    return AVERROR_INVALIDDATA;
> +                AV_WL16(dst, AV_RL16(dst - 8));
> +                AV_WL16(dst + 2, AV_RL16(tptr0));
> +                dst[4] = tptr0[2];
> +                AV_WL16(dst + 5, AV_RL16(tptr1));
> +                dst[7] = tptr1[2];
> +                break;
> +            case 17:
> +                v = 8 * bytestream2_get_le16(gb) + 8;
> +                if (v > dst - tex_data)
> +                    return AVERROR_INVALIDDATA;
> +                tptr0 = dst - v;
> +                AV_WL16(dst, AV_RL16(dst - 8));
> +                AV_WL16(dst + 2, AV_RL16(tptr0 + 2));
> +                AV_WL32(dst + 4, AV_RL32(tptr0 + 4));
> +                table1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24]
> = dst + 2;
> +                break;
> +            default:
> +                break;
> +            }
> +            dst += 8;
> +        } else {
> +            v = bytestream2_get_byte(gb);
> +            if (v == 255) {
> +                do {
> +                    if (bytestream2_get_bytes_left(gb) <= 0)
> +                        return AVERROR_INVALIDDATA;
> +                    opcode = bytestream2_get_le16(gb);
> +                    v += opcode;
> +                } while (opcode == 0xFFFF);
> +            }
> +            vv = v + 4;
> +            do {
> +                AV_WL32(dst, AV_RL32(dst - 8));
> +                AV_WL32(dst + 4, AV_RL32(dst - 4));
> +                dst += 8;
> +                --vv;
> +            } while (vv);
> +        }
> +    }
> +
> +    bytestream2_seek(gb, data_start + op_offset + skip - 8, SEEK_SET);
> +
> +    return 0;
> +}
> +
> +static int dxv_decompress_ycg6(AVCodecContext *avctx)
> +{
> +    DXVContext *ctx = avctx->priv_data;
> +    GetByteContext *gb = &ctx->gbc;
> +    int ret;
> +
> +    ret = dxv_decompress_yo(ctx, gb, ctx->tex_data, ctx->tex_size,
> ctx->op_data[0]);
> +    if (ret < 0)
> +        return ret;
> +
> +    return dxv_decompress_cocg(ctx, gb, ctx->ctex_data, ctx->ctex_size,
> ctx->op_data[1], ctx->op_data[2]);
> +}
> +
> +static int dxv_decompress_yg10(AVCodecContext *avctx)
> +{
> +    DXVContext *ctx = avctx->priv_data;
> +    GetByteContext *gb = &ctx->gbc;
> +    int ret;
> +
> +    ret = dxv_decompress_cocg(ctx, gb, ctx->tex_data, ctx->tex_size,
> ctx->op_data[0], ctx->op_data[3]);
> +    if (ret < 0)
> +        return ret;
> +
> +    return dxv_decompress_cocg(ctx, gb, ctx->ctex_data, ctx->ctex_size,
> ctx->op_data[1], ctx->op_data[2]);
> +}
> +
>  static int dxv_decompress_dxt5(AVCodecContext *avctx)
>  {
>      DXVContext *ctx = avctx->priv_data;
> @@ -359,6 +1257,9 @@ static int dxv_decode(AVCodecContext *avctx, void
> *data,
>
>      bytestream2_init(gbc, avpkt->data, avpkt->size);
>
> +    ctx->texture_block_h = 4;
> +    ctx->texture_block_w = 4;
> +
>      tag = bytestream2_get_le32(gbc);
>      switch (tag) {
>      case MKBETAG('D', 'X', 'T', '1'):
> @@ -378,9 +1279,35 @@ static int dxv_decode(AVCodecContext *avctx, void
> *data,
>          msgtext = "DXT5";
>          break;
>      case MKBETAG('Y', 'C', 'G', '6'):
> +        decompress_tex = dxv_decompress_ycg6;
> +        ctx->tex_funct = yo_block;
> +        ctx->ctex_funct = cocg_block;
> +        ctx->tex_rat   = 8;
> +        ctx->tex_step  = 32;
> +        ctx->ctex_step = 16;
> +        msgcomp = "YOCOCG6";
> +        msgtext = "YCG6";
> +        ctx->ctex_size = avctx->coded_width * avctx->coded_height / 4;
> +        ctx->texture_block_h = 4;
> +        ctx->texture_block_w = 16;
> +        ctx->ctexture_block_h = 8;
> +        ctx->ctexture_block_w = 8;
> +        break;
>      case MKBETAG('Y', 'G', '1', '0'):
> -        avpriv_report_missing_feature(avctx, "Tag 0x%08"PRIX32, tag);
> -        return AVERROR_PATCHWELCOME;
> +        decompress_tex = dxv_decompress_yg10;
> +        ctx->tex_funct = yao_block;
> +        ctx->ctex_funct = cocg_block;
> +        ctx->tex_rat   = 4;
> +        ctx->tex_step  = 64;
> +        ctx->ctex_step = 16;
> +        msgcomp = "YAOCOCG10";
> +        msgtext = "YG10";
> +        ctx->ctex_size = avctx->coded_width * avctx->coded_height / 4;
> +        ctx->texture_block_h = 4;
> +        ctx->texture_block_w = 16;
> +        ctx->ctexture_block_h = 8;
> +        ctx->ctexture_block_w = 8;
> +        break;
>      default:
>          /* Old version does not have a real header, just size and type. */
>          size = tag & 0x00FFFFFF;
> @@ -413,6 +1340,10 @@ static int dxv_decode(AVCodecContext *avctx, void
> *data,
>          break;
>      }
>
> +    ctx->slice_count = av_clip(avctx->thread_count, 1,
> +                               avctx->coded_height /
> FFMAX(ctx->texture_block_h,
> +
>  ctx->ctexture_block_h));
> +
>      /* New header is 12 bytes long. */
>      if (!old_type) {
>          version_major = bytestream2_get_byte(gbc) - 1;
> @@ -444,6 +1375,24 @@ static int dxv_decode(AVCodecContext *avctx, void
> *data,
>      if (ret < 0)
>          return ret;
>
> +    if (ctx->ctex_size) {
> +        int i;
> +
> +        ctx->op_size[0] = avctx->coded_width * avctx->coded_height / 16;
> +        ctx->op_size[1] = avctx->coded_width * avctx->coded_height / 32;
> +        ctx->op_size[2] = avctx->coded_width * avctx->coded_height / 32;
> +        ctx->op_size[3] = avctx->coded_width * avctx->coded_height / 16;
> +
> +        ret = av_reallocp(&ctx->ctex_data, ctx->ctex_size);
> +        if (ret < 0)
> +            return ret;
> +        for (i = 0; i < 4; i++) {
> +            ret = av_reallocp(&ctx->op_data[i], ctx->op_size[i]);
> +            if (ret < 0)
> +                return ret;
> +        }
> +    }
> +
>      /* Decompress texture out of the intermediate compression. */
>      ret = decompress_tex(avctx);
>      if (ret < 0)
> @@ -484,9 +1433,6 @@ static int dxv_init(AVCodecContext *avctx)
>      ff_texturedsp_init(&ctx->texdsp);
>      avctx->pix_fmt = AV_PIX_FMT_RGBA;
>
> -    ctx->slice_count = av_clip(avctx->thread_count, 1,
> -                               avctx->coded_height / TEXTURE_BLOCK_H);
> -
>      return 0;
>  }
>
> @@ -495,6 +1441,10 @@ static int dxv_close(AVCodecContext *avctx)
>      DXVContext *ctx = avctx->priv_data;
>
>      av_freep(&ctx->tex_data);
> +    av_freep(&ctx->ctex_data);
> +    av_freep(&ctx->op_data[0]);
> +    av_freep(&ctx->op_data[1]);
> +    av_freep(&ctx->op_data[2]);
>
>      return 0;
>  }
> --
> 2.11.0
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
Paul B Mahol April 12, 2018, 1 p.m. UTC | #2
On 4/12/18, Rostislav Pehlivanov <atomnuker@gmail.com> wrote:
> On 12 April 2018 at 11:31, Paul B Mahol <onemda@gmail.com> wrote:
>
>> Signed-off-by: Paul B Mahol <onemda@gmail.com>
>> ---
>>  libavcodec/dxv.c | 1006 ++++++++++++++++++++++++++++++
>> ++++++++++++++++++++++--
>>  1 file changed, 978 insertions(+), 28 deletions(-)
>>
>>
>> +
>> +static av_always_inline uint32_t yacocg2rgba(int yo, int co, int cg, int
>> a)
>> +{
>> +    int r, g, b;
>> +
>> +    co = co - 127;
>> +    cg = cg - 127;
>> +
>> +    r = av_clip_uint8(yo + co - cg);
>> +    g = av_clip_uint8(yo + cg);
>> +    b = av_clip_uint8(yo - co - cg);
>> +
>> +    return (a << 24) | (b << 16) | (g << 8) | (r);
>> +}
>
>
> Cinepak all over again? We're not doing and are never going to do
> conversion inside decoders. Output it as YCoCg by writing directly to the
> data planes. We support it. It doesn't matter if it's perfectly mappable to
> RGB, if you allow for 2 more bits of precision.

This is subsampled YCgCo, which is never going to be implemented in libswscale.

And there is already code in texturedsp which deals with this, but it is
for non-subsampled case.
Michael Niedermayer April 12, 2018, 1:49 p.m. UTC | #3
On Thu, Apr 12, 2018 at 01:57:34PM +0100, Rostislav Pehlivanov wrote:
> On 12 April 2018 at 11:31, Paul B Mahol <onemda@gmail.com> wrote:
> 
> > Signed-off-by: Paul B Mahol <onemda@gmail.com>
> > ---
> >  libavcodec/dxv.c | 1006 ++++++++++++++++++++++++++++++
> > ++++++++++++++++++++++--
> >  1 file changed, 978 insertions(+), 28 deletions(-)
> >
> >
> > +
> > +static av_always_inline uint32_t yacocg2rgba(int yo, int co, int cg, int
> > a)
> > +{
> > +    int r, g, b;
> > +
> > +    co = co - 127;
> > +    cg = cg - 127;
> > +
> > +    r = av_clip_uint8(yo + co - cg);
> > +    g = av_clip_uint8(yo + cg);
> > +    b = av_clip_uint8(yo - co - cg);
> > +
> > +    return (a << 24) | (b << 16) | (g << 8) | (r);
> > +}
> 
> 
> Cinepak all over again? We're not doing and are never going to do

no, its different.
The cinepak case was a volunteer who wanted to maintain the cinepak code, had
a patch that made the decoder several fold faster (in his practical use) and 
possibly closer in output to the binary reference. That by doing colorspace
convertion to the vector quantizer data tables IIRC

He was attacked
cinepak in ffmpeg is unmaintained since then and there has been no further
contribution from him since then either.

This case here is very different, noone depends on this IIUC.
So i have no strong oppinion on this.

The mention of cinepak as a "good" example is what makes me a bit upset.
I do not think we should be proud of how that cinepak case was handled.


> conversion inside decoders. Output it as YCoCg by writing directly to the
> data planes. We support it. It doesn't matter if it's perfectly mappable to
> RGB, if you allow for 2 more bits of precision.


Thanks

[...]
wm4 April 12, 2018, 2:26 p.m. UTC | #4
On Thu, 12 Apr 2018 15:49:30 +0200
Michael Niedermayer <michael@niedermayer.cc> wrote:

> On Thu, Apr 12, 2018 at 01:57:34PM +0100, Rostislav Pehlivanov wrote:
> > On 12 April 2018 at 11:31, Paul B Mahol <onemda@gmail.com> wrote:
> >   
> > > Signed-off-by: Paul B Mahol <onemda@gmail.com>
> > > ---
> > >  libavcodec/dxv.c | 1006 ++++++++++++++++++++++++++++++
> > > ++++++++++++++++++++++--
> > >  1 file changed, 978 insertions(+), 28 deletions(-)
> > >
> > >
> > > +
> > > +static av_always_inline uint32_t yacocg2rgba(int yo, int co, int cg, int
> > > a)
> > > +{
> > > +    int r, g, b;
> > > +
> > > +    co = co - 127;
> > > +    cg = cg - 127;
> > > +
> > > +    r = av_clip_uint8(yo + co - cg);
> > > +    g = av_clip_uint8(yo + cg);
> > > +    b = av_clip_uint8(yo - co - cg);
> > > +
> > > +    return (a << 24) | (b << 16) | (g << 8) | (r);
> > > +}  
> > 
> > 
> > Cinepak all over again? We're not doing and are never going to do  
> 
> no, its different.
> The cinepak case was a volunteer who wanted to maintain the cinepak code, had
> a patch that made the decoder several fold faster (in his practical use) and 
> possibly closer in output to the binary reference. That by doing colorspace
> convertion to the vector quantizer data tables IIRC
> 
> He was attacked
> cinepak in ffmpeg is unmaintained since then and there has been no further
> contribution from him since then either.
> 
> This case here is very different, noone depends on this IIUC.
> So i have no strong oppinion on this.
> 
> The mention of cinepak as a "good" example is what makes me a bit upset.
> I do not think we should be proud of how that cinepak case was handled.

No, he posted a patch that was unacceptable for several reasons. These
were pointed out. He then made a drama out of it, implying something
about how FFmpeg will fade into irrelevance because Cinepak support is
not the "best". And now you drag out this drama, for whatever reason.

He also failed to provide explanations why this was supposed to be
necessary. Keep in mind that he wanted to decode directly to obscure
packed RGB formats and such.

Also Cinepak is an obscure garbage codec, so it's not a big loss that
it's a bit slower. Nobody complained about Cinepak support ever, except
that one guy.
wm4 April 12, 2018, 2:27 p.m. UTC | #5
On Thu, 12 Apr 2018 13:57:34 +0100
Rostislav Pehlivanov <atomnuker@gmail.com> wrote:

> On 12 April 2018 at 11:31, Paul B Mahol <onemda@gmail.com> wrote:
> 
> > Signed-off-by: Paul B Mahol <onemda@gmail.com>
> > ---
> >  libavcodec/dxv.c | 1006 ++++++++++++++++++++++++++++++
> > ++++++++++++++++++++++--
> >  1 file changed, 978 insertions(+), 28 deletions(-)
> >
> >
> > +
> > +static av_always_inline uint32_t yacocg2rgba(int yo, int co, int cg, int
> > a)
> > +{
> > +    int r, g, b;
> > +
> > +    co = co - 127;
> > +    cg = cg - 127;
> > +
> > +    r = av_clip_uint8(yo + co - cg);
> > +    g = av_clip_uint8(yo + cg);
> > +    b = av_clip_uint8(yo - co - cg);
> > +
> > +    return (a << 24) | (b << 16) | (g << 8) | (r);
> > +}  
> 
> 
> Cinepak all over again? We're not doing and are never going to do
> conversion inside decoders. Output it as YCoCg by writing directly to the
> data planes. We support it. It doesn't matter if it's perfectly mappable to
> RGB, if you allow for 2 more bits of precision.

+1

> On 12 April 2018 at 11:31, Paul B Mahol <onemda@gmail.com> wrote:
> 
> > Signed-off-by: Paul B Mahol <onemda@gmail.com>
> > ---
> >  libavcodec/dxv.c | 1006 ++++++++++++++++++++++++++++++
> > ++++++++++++++++++++++--
> >  1 file changed, 978 insertions(+), 28 deletions(-)
> >
> > diff --git a/libavcodec/dxv.c b/libavcodec/dxv.c
> > index 529e211258..6308163735 100644
> > --- a/libavcodec/dxv.c
> > +++ b/libavcodec/dxv.c
> > @@ -1,6 +1,7 @@
> >  /*
> >   * Resolume DXV decoder
> >   * Copyright (C) 2015 Vittorio Giovara <vittorio.giovara@gmail.com>
> > + * Copyright (C) 2018 Paul B Mahol
> >   *
> >   * This file is part of FFmpeg.
> >   *
> > @@ -23,6 +24,7 @@
> >
> >  #include "libavutil/imgutils.h"
> >
> > +#include "mathops.h"
> >  #include "avcodec.h"
> >  #include "bytestream.h"
> >  #include "internal.h"
> > @@ -34,53 +36,250 @@ typedef struct DXVContext {
> >      TextureDSPContext texdsp;
> >      GetByteContext gbc;
> >
> > -    uint8_t *tex_data;  // Compressed texture
> > -    int tex_rat;        // Compression ratio
> > -    int tex_step;       // Distance between blocks
> > -    int64_t tex_size;   // Texture size
> > +    uint8_t *tex_data;   // Compressed texture
> > +    uint8_t *ctex_data;  // Compressed texture
> > +    int tex_rat;         // Compression ratio
> > +    int tex_step;        // Distance between blocks
> > +    int ctex_step;       // Distance between blocks
> > +    int64_t tex_size;    // Texture size
> > +    int64_t ctex_size;   // Texture size
> >
> >      /* Optimal number of slices for parallel decoding */
> >      int slice_count;
> >
> > +    uint8_t *op_data[4]; // Opcodes
> > +    int64_t op_size[4];  // Opcodes size
> > +
> > +    int texture_block_w;
> > +    int texture_block_h;
> > +
> > +    int ctexture_block_w;
> > +    int ctexture_block_h;
> > +
> >      /* Pointer to the selected decompression function */
> >      int (*tex_funct)(uint8_t *dst, ptrdiff_t stride, const uint8_t
> > *block);
> > +    int (*ctex_funct)(uint8_t *dst, ptrdiff_t stride, const uint8_t
> > *block);
> >  } DXVContext;
> >
> > +static void decompress_indices(uint8_t *dst, const uint8_t *src)
> > +{
> > +    int block, i;
> > +
> > +    for (block = 0; block < 2; block++) {
> > +        int tmp = AV_RL24(src);
> > +
> > +        /* Unpack 8x3 bit from last 3 byte block */
> > +        for (i = 0; i < 8; i++)
> > +            dst[i] = (tmp >> (i * 3)) & 0x7;
> > +
> > +        src += 3;
> > +        dst += 8;
> > +    }
> > +}
> > +
> > +static int extract_component(int yo0, int yo1, int code)
> > +{
> > +    int yo;
> > +
> > +    if (yo0 == yo1) {
> > +        yo = yo0;
> > +    } else if (code == 0) {
> > +        yo = yo0;
> > +    } else if (code == 1) {
> > +        yo = yo1;
> > +    } else {
> > +        if (yo0 > yo1) {
> > +            yo = (uint8_t) (((8 - code) * yo0 +
> > +                             (code - 1) * yo1) / 7);
> > +        } else {
> > +            if (code == 6) {
> > +                yo = 0;
> > +            } else if (code == 7) {
> > +                yo = 255;
> > +            } else {
> > +                yo = (uint8_t) (((6 - code) * yo0 +
> > +                                 (code - 1) * yo1) / 5);
> > +            }
> > +        }
> > +    }
> > +
> > +    return yo;
> > +}
> > +
> > +static av_always_inline uint32_t yacocg2rgba(int yo, int co, int cg, int
> > a)
> > +{
> > +    int r, g, b;
> > +
> > +    co = co - 127;
> > +    cg = cg - 127;
> > +
> > +    r = av_clip_uint8(yo + co - cg);
> > +    g = av_clip_uint8(yo + cg);
> > +    b = av_clip_uint8(yo - co - cg);
> > +
> > +    return (a << 24) | (b << 16) | (g << 8) | (r);
> > +}
> > +
> > +static int cocg_block(uint8_t *dst, ptrdiff_t stride,
> > +                      const uint8_t *block)
> > +{
> > +    uint8_t co_indices[16];
> > +    uint8_t cg_indices[16];
> > +    uint8_t co0 = *(block);
> > +    uint8_t co1 = *(block + 1);
> > +    uint8_t cg0 = *(block + 8);
> > +    uint8_t cg1 = *(block + 9);
> > +    int x, y;
> > +
> > +    decompress_indices(co_indices, block + 2);
> > +    decompress_indices(cg_indices, block + 10);
> > +
> > +    for (y = 0; y < 4; y++) {
> > +        for (x = 0; x < 4; x++) {
> > +            int co_code = co_indices[x + y * 4];
> > +            int cg_code = cg_indices[x + y * 4];
> > +            uint8_t co, cg;
> > +
> > +            co = extract_component(co0, co1, co_code);
> > +            cg = extract_component(cg0, cg1, cg_code);
> > +
> > +            dst[x * 8 + 1] = co;
> > +            dst[x * 8 + 2] = cg;
> > +            dst[x * 8 + 5] = co;
> > +            dst[x * 8 + 6] = cg;
> > +            dst[x * 8 + stride + 1] = co;
> > +            dst[x * 8 + stride + 2] = cg;
> > +            dst[x * 8 + stride + 5] = co;
> > +            dst[x * 8 + stride + 6] = cg;
> > +        }
> > +        dst += 2 * stride;
> > +    }
> > +
> > +    return 16;
> > +}
> > +
> > +static void yo_subblock(uint8_t *dst, uint8_t *yo_indices,
> > +                        ptrdiff_t stride, const uint8_t *block)
> > +{
> > +    uint8_t yo0 = *(block);
> > +    uint8_t yo1 = *(block + 1);
> > +    int x, y;
> > +
> > +    decompress_indices(yo_indices, block + 2);
> > +
> > +    for (y = 0; y < 4; y++) {
> > +        for (x = 0; x < 4; x++) {
> > +            int yo_code = yo_indices[x + y * 4];
> > +            uint8_t yo;
> > +
> > +            yo = extract_component(yo0, yo1, yo_code);
> > +
> > +            AV_WL32(dst + x * 4, 255u << 24 | yo);
> > +        }
> > +        dst += stride;
> > +    }
> > +}
> > +
> > +static int yo_block(uint8_t *dst, ptrdiff_t stride,
> > +                    const uint8_t *block)
> > +{
> > +    uint8_t yo_indices[16];
> > +
> > +    yo_subblock(dst,      yo_indices, stride, block);
> > +    yo_subblock(dst + 16, yo_indices, stride, block + 8);
> > +    yo_subblock(dst + 32, yo_indices, stride, block + 16);
> > +    yo_subblock(dst + 48, yo_indices, stride, block + 24);
> > +
> > +    return 32;
> > +}
> > +
> > +static void a_subblock(uint8_t *dst, uint8_t *a_indices,
> > +                       ptrdiff_t stride, const uint8_t *block)
> > +{
> > +    uint8_t a0 = *(block);
> > +    uint8_t a1 = *(block + 1);
> > +    int x, y;
> > +
> > +    decompress_indices(a_indices, block + 2);
> > +
> > +    for (y = 0; y < 4; y++) {
> > +        for (x = 0; x < 4; x++) {
> > +            int a_code = a_indices[x + y * 4];
> > +            uint8_t a;
> > +
> > +            a = extract_component(a0, a1, a_code);
> > +            dst[x * 4 + 3] = a;
> > +        }
> > +        dst += stride;
> > +    }
> > +}
> > +
> > +static int yao_block(uint8_t *dst, ptrdiff_t stride,
> > +                    const uint8_t *block)
> > +{
> > +    uint8_t yo_indices[16];
> > +    uint8_t a_indices[16];
> > +
> > +    yo_subblock(dst,      yo_indices, stride, block);
> > +    a_subblock(dst,       a_indices,  stride, block + 8);
> > +    yo_subblock(dst + 16, yo_indices, stride, block + 16);
> > +    a_subblock(dst  + 16, a_indices,  stride, block + 24);
> > +    yo_subblock(dst + 32, yo_indices, stride, block + 32);
> > +    a_subblock(dst  + 32, a_indices,  stride, block + 40);
> > +    yo_subblock(dst + 48, yo_indices, stride, block + 48);
> > +    a_subblock(dst  + 48, a_indices,  stride, block + 56);
> > +
> > +    return 64;
> > +}
> > +
> >  static int decompress_texture_thread(AVCodecContext *avctx, void *arg,
> >                                       int slice, int thread_nb)
> >  {
> >      DXVContext *ctx = avctx->priv_data;
> >      AVFrame *frame = arg;
> >      const uint8_t *d = ctx->tex_data;
> > -    int w_block = avctx->coded_width / TEXTURE_BLOCK_W;
> > -    int h_block = avctx->coded_height / TEXTURE_BLOCK_H;
> > +    const uint8_t *c = ctx->ctex_data;
> > +    int w_block = avctx->coded_width / ctx->texture_block_w;
> > +    int h_block = avctx->coded_height / ctx->texture_block_h;
> >      int x, y;
> >      int start_slice, end_slice;
> > -    int base_blocks_per_slice = h_block / ctx->slice_count;
> > -    int remainder_blocks = h_block % ctx->slice_count;
> > -
> > -    /* When the frame height (in blocks) doesn't divide evenly between the
> > -     * number of slices, spread the remaining blocks evenly between the
> > first
> > -     * operations */
> > -    start_slice = slice * base_blocks_per_slice;
> > -    /* Add any extra blocks (one per slice) that have been added
> > -     * before this slice */
> > -    start_slice += FFMIN(slice, remainder_blocks);
> > -
> > -    end_slice = start_slice + base_blocks_per_slice;
> > -    /* Add an extra block if there are remainder blocks to be accounted
> > for */
> > -    if (slice < remainder_blocks)
> > -        end_slice++;
> > +
> > +    start_slice = h_block * slice / ctx->slice_count;
> > +    end_slice = h_block * (slice + 1) / ctx->slice_count;
> >
> >      for (y = start_slice; y < end_slice; y++) {
> > -        uint8_t *p = frame->data[0] + y * frame->linesize[0] *
> > TEXTURE_BLOCK_H;
> > +        uint8_t *p = frame->data[0] + y * frame->linesize[0] *
> > ctx->texture_block_h;
> >          int off  = y * w_block;
> >          for (x = 0; x < w_block; x++) {
> > -            ctx->tex_funct(p + x * 16, frame->linesize[0],
> > +            ctx->tex_funct(p + x * 4 * ctx->texture_block_w,
> > frame->linesize[0],
> >                             d + (off + x) * ctx->tex_step);
> >          }
> >      }
> >
> > +    if (ctx->ctex_funct) {
> > +        w_block = avctx->coded_width / ctx->ctexture_block_w;
> > +        h_block = avctx->coded_height / ctx->ctexture_block_h;
> > +
> > +        start_slice = h_block * slice / ctx->slice_count;
> > +        end_slice = h_block * (slice + 1) / ctx->slice_count;
> > +
> > +        for (y = start_slice; y < end_slice; y++) {
> > +            uint8_t *p = frame->data[0] + y * frame->linesize[0] *
> > ctx->ctexture_block_h;
> > +            int off  = y * w_block;
> > +            for (x = 0; x < w_block; x++) {
> > +                ctx->ctex_funct(p + x * 4 * ctx->ctexture_block_w,
> > frame->linesize[0],
> > +                                c + (off + x) * ctx->ctex_step);
> > +            }
> > +        }
> > +        for (y = start_slice * ctx->ctexture_block_h; y < FFMIN(end_slice
> > * ctx->ctexture_block_h, avctx->height); y++) {
> > +            uint8_t *p8 = frame->data[0] + y * frame->linesize[0];
> > +            uint32_t *p = (uint32_t *)p8;
> > +            for (x = 0; x < avctx->width; x++) {
> > +                p[x] = yacocg2rgba(p8[4 * x], p8[4 * x + 1], p8[4 * x +
> > 2], p8[4 * x + 3]);
> > +            }
> > +        }
> > +    }
> > +
> >      return 0;
> >  }
> >
> > @@ -169,6 +368,705 @@ static int dxv_decompress_dxt1(AVCodecContext
> > *avctx)
> >      return 0;
> >  }
> >
> > +typedef struct OpcodeTable {
> > +    int16_t next;
> > +    uint8_t val1;
> > +    uint8_t val2;
> > +} OpcodeTable;
> > +
> > +static int fill_ltable(GetByteContext *gb, uint32_t *table, int
> > *nb_elements)
> > +{
> > +    unsigned half = 512, bits = 1023, left = 1024, input, mask;
> > +    int value, counter = 0, rshift = 10, lshift = 30;
> > +
> > +    mask = bytestream2_get_le32(gb) >> 2;
> > +    while (left) {
> > +        if (bytestream2_get_bytes_left(gb) < 0 || counter >= 256)
> > +            return AVERROR_INVALIDDATA;
> > +
> > +        value = bits & mask;
> > +        left -= bits & mask;
> > +        mask >>= rshift;
> > +        lshift -= rshift;
> > +        table[counter++] = value;
> > +        if (lshift < 16) {
> > +            input = bytestream2_get_le16(gb);
> > +            mask += input << lshift;
> > +            lshift += 16;
> > +        }
> > +        if (left < half) {
> > +            half >>= 1;
> > +            bits >>= 1;
> > +            rshift--;
> > +        }
> > +    }
> > +
> > +    for (; !table[counter - 1]; counter--)
> > +        if (counter <= 0)
> > +            return AVERROR_INVALIDDATA;
> > +
> > +    *nb_elements = counter;
> > +
> > +    if (counter < 256)
> > +        memset(&table[counter], 0, 4 * (256 - counter));
> > +
> > +    if (lshift >= 16)
> > +        bytestream2_seek(gb, -2, SEEK_CUR);
> > +
> > +    return 0;
> > +}
> > +
> > +static int fill_optable(unsigned *table0, OpcodeTable *table1, int
> > nb_elements)
> > +{
> > +    unsigned table2[256];
> > +    unsigned x = 0;
> > +    int val0, val1, i, j = 2, k = 0;
> > +
> > +    table2[0] = table0[0];
> > +    for (i = 0; i < nb_elements - 1; i++, table2[i] = val0) {
> > +        val0 = table0[i + 1] + table2[i];
> > +    }
> > +
> > +    if (!table2[0]) {
> > +        do {
> > +            k++;
> > +        } while (!table2[k]);
> > +    }
> > +
> > +    j = 2;
> > +    for (i = 1024; i > 0; i--) {
> > +        for (table1[x].val1 = k; k < 256 && j > table2[k]; k++)
> > +            ;
> > +        x = (x - 383) & 0x3FF;
> > +        j++;
> > +    }
> > +
> > +    if (nb_elements > 0)
> > +        memcpy(&table2[0], table0, 4 * nb_elements);
> > +
> > +    for (i = 0; i < 1024; i++) {
> > +        val0 = table1[i].val1;
> > +        val1 = table2[val0];
> > +        table2[val0]++;
> > +        x = 31 - ff_clz(val1);
> > +        if (x > 10)
> > +            return AVERROR_INVALIDDATA;
> > +        table1[i].val2 = 10 - x;
> > +        table1[i].next = (val1 << table1[i].val2) - 1024;
> > +    }
> > +
> > +    return 0;
> > +}
> > +
> > +static int get_opcodes(GetByteContext *gb, uint32_t *table, uint8_t *dst,
> > int op_size, int nb_elements)
> > +{
> > +    OpcodeTable optable[1024];
> > +    int sum, x, val, lshift, rshift, ret, size_in_bits, i, idx;
> > +    int endoffset, newoffset, offset;
> > +    unsigned next;
> > +    uint8_t *src = (uint8_t *)gb->buffer;
> > +
> > +    ret = fill_optable(table, optable, nb_elements);
> > +    if (ret < 0)
> > +        return ret;
> > +
> > +    size_in_bits = bytestream2_get_le32(gb);
> > +    endoffset = ((size_in_bits + 7) >> 3) - 4;
> > +    if (endoffset <= 0 || bytestream2_get_bytes_left(gb) < endoffset)
> > +        return AVERROR_INVALIDDATA;
> > +
> > +    offset = endoffset;
> > +    next = AV_RL32(src + endoffset);
> > +    rshift = (((size_in_bits & 0xFF) - 1) & 7) + 15;
> > +    lshift = 32 - rshift;
> > +    idx = (next >> rshift) & 0x3FF;
> > +    for (i = 0; i < op_size; i++) {
> > +        dst[i] = optable[idx].val1;
> > +        val = optable[idx].val2;
> > +        sum = val + lshift;
> > +        x = (next << lshift) >> 1 >> (31 - val);
> > +        newoffset = offset - (sum >> 3);
> > +        lshift = sum & 7;
> > +        idx = x + optable[idx].next;
> > +        offset = newoffset;
> > +        next = AV_RL32(src + offset);
> > +    }
> > +
> > +    bytestream2_skip(gb, (size_in_bits + 7 >> 3) - 4);
> > +
> > +    return 0;
> > +}
> > +
> > +static int dxv_decompress_opcodes(GetByteContext *gb, void *dstp, size_t
> > op_size)
> > +{
> > +    int pos = bytestream2_tell(gb);
> > +    int flag = bytestream2_peek_byte(gb);
> > +
> > +    if ((flag & 3) == 0) {
> > +        bytestream2_skip(gb, 1);
> > +        bytestream2_get_buffer(gb, dstp, op_size);
> > +    } else if ((flag & 3) == 1) {
> > +        bytestream2_skip(gb, 1);
> > +        memset(dstp, bytestream2_get_byte(gb), op_size);
> > +    } else {
> > +        uint32_t table[256];
> > +        int ret, elements = 0;
> > +
> > +        ret = fill_ltable(gb, table, &elements);
> > +        if (ret < 0)
> > +            return ret;
> > +        ret = get_opcodes(gb, table, dstp, op_size, elements);
> > +        if (ret < 0)
> > +            return ret;
> > +    }
> > +    return bytestream2_tell(gb) - pos;
> > +}
> > +
> > +static int dxv_decompress_cgo(DXVContext *ctx, GetByteContext *gb,
> > +                              uint8_t *tex_data, int tex_size,
> > +                              uint8_t *op_data, int *oindex,
> > +                              uint8_t **dstp, int *statep,
> > +                              uint8_t **tab0, uint8_t **tab1)
> > +{
> > +    uint8_t *dst = *dstp;
> > +    uint8_t *tptr0, *tptr1, *tptr3;
> > +    int oi = *oindex;
> > +    int state = *statep;
> > +    int opcode, v, vv;
> > +
> > +    if (state <= 0) {
> > +        opcode = op_data[oi++];
> > +        if (opcode) {
> > +            switch (opcode) {
> > +            case 1:
> > +                AV_WL32(dst, AV_RL32(dst - 16));
> > +                AV_WL32(dst + 4, AV_RL32(dst - 12));
> > +                break;
> > +            case 2:
> > +                vv = 16 * ((uint16_t)bytestream2_get_le16(gb) + 1);
> > +                if (vv < 0 || vv > dst - tex_data)
> > +                    return AVERROR_INVALIDDATA;
> > +                tptr0 = dst - vv;
> > +                v = AV_RL32(tptr0);
> > +                AV_WL32(dst, AV_RL32(tptr0));
> > +                AV_WL32(dst + 4, AV_RL32(tptr0 + 4));
> > +                tab0[0x9E3779B1 * (uint16_t)v >> 24] = dst;
> > +                tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] =
> > dst + 2;
> > +                break;
> > +            case 3:
> > +                AV_WL32(dst, bytestream2_get_le32(gb));
> > +                AV_WL32(dst + 4, bytestream2_get_le32(gb));
> > +                tab0[0x9E3779B1 * AV_RL16(dst) >> 24] = dst;
> > +                tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] =
> > dst + 2;
> > +                break;
> > +            case 4:
> > +                tptr3 = tab1[bytestream2_get_byte(gb)];
> > +                AV_WL16(dst, bytestream2_get_le16(gb));
> > +                AV_WL16(dst + 2, AV_RL16(tptr3));
> > +                dst[4] = tptr3[2];
> > +                AV_WL16(dst + 5, bytestream2_get_le16(gb));
> > +                dst[7] = bytestream2_get_byte(gb);
> > +                tab0[0x9E3779B1 * AV_RL16(dst) >> 24] = dst;
> > +                break;
> > +            case 5:
> > +                tptr3 = tab1[bytestream2_get_byte(gb)];
> > +                if (!tptr3)
> > +                    return AVERROR_INVALIDDATA;
> > +                AV_WL16(dst, bytestream2_get_le16(gb));
> > +                AV_WL16(dst + 2, bytestream2_get_le16(gb));
> > +                dst[4] = bytestream2_get_byte(gb);
> > +                AV_WL16(dst + 5, AV_RL16(tptr3));
> > +                dst[7] = tptr3[2];
> > +                tab0[0x9E3779B1 * AV_RL16(dst) >> 24] = dst;
> > +                tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] =
> > dst + 2;
> > +                break;
> > +            case 6:
> > +                tptr0 = tab1[bytestream2_get_byte(gb)];
> > +                if (!tptr0)
> > +                    return AVERROR_INVALIDDATA;
> > +                tptr1 = tab1[bytestream2_get_byte(gb)];
> > +                if (!tptr1)
> > +                    return AVERROR_INVALIDDATA;
> > +                AV_WL16(dst, bytestream2_get_le16(gb));
> > +                AV_WL16(dst + 2, AV_RL16(tptr0));
> > +                dst[4] = tptr0[2];
> > +                AV_WL16(dst + 5, AV_RL16(tptr1));
> > +                dst[7] = tptr1[2];
> > +                tab0[0x9E3779B1 * AV_RL16(dst) >> 24] = dst;
> > +                break;
> > +            case 7:
> > +                v = 16 * ((uint16_t)bytestream2_get_le16(gb) + 1);
> > +                if (v < 0 || v > dst - tex_data)
> > +                    return AVERROR_INVALIDDATA;
> > +                tptr0 = dst - v;
> > +                AV_WL16(dst, bytestream2_get_le16(gb));
> > +                AV_WL16(dst + 2, AV_RL16(tptr0 + 2));
> > +                AV_WL32(dst + 4, AV_RL32(tptr0 + 4));
> > +                tab0[0x9E3779B1 * AV_RL16(dst) >> 24] = dst;
> > +                tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] =
> > dst + 2;
> > +                break;
> > +            case 8:
> > +                tptr1 = tab0[bytestream2_get_byte(gb)];
> > +                if (!tptr1)
> > +                    return AVERROR_INVALIDDATA;
> > +                AV_WL16(dst, AV_RL16(tptr1));
> > +                AV_WL16(dst + 2, bytestream2_get_le16(gb));
> > +                AV_WL32(dst + 4, bytestream2_get_le32(gb));
> > +                tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] =
> > dst + 2;
> > +                break;
> > +            case 9:
> > +                tptr1 = tab0[bytestream2_get_byte(gb)];
> > +                if (!tptr1)
> > +                    return AVERROR_INVALIDDATA;
> > +                tptr3 = tab1[bytestream2_get_byte(gb)];
> > +                if (!tptr3)
> > +                    return AVERROR_INVALIDDATA;
> > +                AV_WL16(dst, AV_RL16(tptr1));
> > +                AV_WL16(dst + 2, AV_RL16(tptr3));
> > +                dst[4] = tptr3[2];
> > +                AV_WL16(dst + 5, bytestream2_get_le16(gb));
> > +                dst[7] = bytestream2_get_byte(gb);
> > +                tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] =
> > dst + 2;
> > +                break;
> > +            case 10:
> > +                tptr1 = tab0[bytestream2_get_byte(gb)];
> > +                if (!tptr1)
> > +                    return AVERROR_INVALIDDATA;
> > +                tptr3 = tab1[bytestream2_get_byte(gb)];
> > +                if (!tptr3)
> > +                    return AVERROR_INVALIDDATA;
> > +                AV_WL16(dst, AV_RL16(tptr1));
> > +                AV_WL16(dst + 2, bytestream2_get_le16(gb));
> > +                dst[4] = bytestream2_get_byte(gb);
> > +                AV_WL16(dst + 5, AV_RL16(tptr3));
> > +                dst[7] = tptr3[2];
> > +                tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] =
> > dst + 2;
> > +                break;
> > +            case 11:
> > +                tptr0 = tab0[bytestream2_get_byte(gb)];
> > +                if (!tptr0)
> > +                    return AVERROR_INVALIDDATA;
> > +                tptr3 = tab1[bytestream2_get_byte(gb)];
> > +                if (!tptr3)
> > +                    return AVERROR_INVALIDDATA;
> > +                tptr1 = tab1[bytestream2_get_byte(gb)];
> > +                if (!tptr1)
> > +                    return AVERROR_INVALIDDATA;
> > +                AV_WL16(dst, AV_RL16(tptr0));
> > +                AV_WL16(dst + 2, AV_RL16(tptr3));
> > +                dst[4] = tptr3[2];
> > +                AV_WL16(dst + 5, AV_RL16(tptr1));
> > +                dst[7] = tptr1[2];
> > +                break;
> > +            case 12:
> > +                tptr1 = tab0[bytestream2_get_byte(gb)];
> > +                if (!tptr1)
> > +                    return AVERROR_INVALIDDATA;
> > +                v = 16 * ((uint16_t)bytestream2_get_le16(gb) + 1);
> > +                if (v < 0 || v > dst - tex_data)
> > +                    return AVERROR_INVALIDDATA;
> > +                tptr0 = dst - v;
> > +                AV_WL16(dst, AV_RL16(tptr1));
> > +                AV_WL16(dst + 2, AV_RL16(tptr0 + 2));
> > +                AV_WL32(dst + 4, AV_RL32(tptr0 + 4));
> > +                tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] =
> > dst + 2;
> > +                break;
> > +            case 13:
> > +                AV_WL16(dst, AV_RL16(dst - 16));
> > +                AV_WL16(dst + 2, bytestream2_get_le16(gb));
> > +                AV_WL32(dst + 4, bytestream2_get_le32(gb));
> > +                tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] =
> > dst + 2;
> > +                break;
> > +            case 14:
> > +                tptr3 = tab1[bytestream2_get_byte(gb)];
> > +                if (!tptr3)
> > +                    return AVERROR_INVALIDDATA;
> > +                AV_WL16(dst, AV_RL16(dst - 16));
> > +                AV_WL16(dst + 2, AV_RL16(tptr3));
> > +                dst[4] = tptr3[2];
> > +                AV_WL16(dst + 5, bytestream2_get_le16(gb));
> > +                dst[7] = bytestream2_get_byte(gb);
> > +                tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] =
> > dst + 2;
> > +                break;
> > +            case 15:
> > +                tptr3 = tab1[bytestream2_get_byte(gb)];
> > +                if (!tptr3)
> > +                    return AVERROR_INVALIDDATA;
> > +                AV_WL16(dst, AV_RL16(dst - 16));
> > +                AV_WL16(dst + 2, bytestream2_get_le16(gb));
> > +                dst[4] = bytestream2_get_byte(gb);
> > +                AV_WL16(dst + 5, AV_RL16(tptr3));
> > +                dst[7] = tptr3[2];
> > +                tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] =
> > dst + 2;
> > +                break;
> > +            case 16:
> > +                tptr3 = tab1[bytestream2_get_byte(gb)];
> > +                if (!tptr3)
> > +                    return AVERROR_INVALIDDATA;
> > +                tptr1 = tab1[bytestream2_get_byte(gb)];
> > +                if (!tptr1)
> > +                    return AVERROR_INVALIDDATA;
> > +                AV_WL16(dst, AV_RL16(dst - 16));
> > +                AV_WL16(dst + 2, AV_RL16(tptr3));
> > +                dst[4] = tptr3[2];
> > +                AV_WL16(dst + 5, AV_RL16(tptr1));
> > +                dst[7] = tptr1[2];
> > +                break;
> > +            case 17:
> > +                v = 16 * ((uint16_t)bytestream2_get_le16(gb) + 1);
> > +                if (v < 0 || v > dst - tex_data)
> > +                    return AVERROR_INVALIDDATA;
> > +                AV_WL16(dst, AV_RL16(dst - 16));
> > +                AV_WL16(dst + 2, AV_RL16(&dst[-v + 2]));
> > +                AV_WL32(dst + 4, AV_RL32(&dst[-v + 4]));
> > +                tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] =
> > dst + 2;
> > +                break;
> > +            default:
> > +                break;
> > +            }
> > +        } else {
> > +            v = bytestream2_get_byte(gb);
> > +            if (v == 255) {
> > +                do {
> > +                    if (bytestream2_get_bytes_left(gb) <= 0)
> > +                        return AVERROR_INVALIDDATA;
> > +                    opcode = bytestream2_get_le16(gb);
> > +                    v += opcode;
> > +                } while (opcode == 0xFFFF);
> > +            }
> > +            AV_WL32(dst, AV_RL32(dst - 16));
> > +            AV_WL32(dst + 4, AV_RL32(dst - 12));
> > +            state = v + 3;
> > +        }
> > +    } else {
> > +        AV_WL32(dst, AV_RL32(dst - 16));
> > +        AV_WL32(dst + 4, AV_RL32(dst - 12));
> > +        state--;
> > +    }
> > +    dst += 8;
> > +
> > +    *oindex = oi;
> > +    *dstp = dst;
> > +    *statep = state;
> > +
> > +    return 0;
> > +}
> > +
> > +static int dxv_decompress_cocg(DXVContext *ctx, GetByteContext *gb,
> > +                               uint8_t *tex_data, int tex_size,
> > +                               uint8_t *op_data0, uint8_t *op_data1)
> > +{
> > +    uint8_t *dst, *tab2[256], *tab0[256], *tab3[256], *tab1[256];
> > +    int op_offset = bytestream2_get_le32(gb);
> > +    int op_size0 = bytestream2_get_le32(gb);
> > +    int op_size1 = bytestream2_get_le32(gb);
> > +    int data_start = bytestream2_tell(gb);
> > +    int skip0, skip1, oi0 = 0, oi1 = 0;
> > +    int ret, state0 = 0, state1 = 0;
> > +
> > +    dst = tex_data;
> > +    bytestream2_skip(gb, op_offset - 12);
> > +    skip0 = dxv_decompress_opcodes(gb, op_data0, op_size0);
> > +    if (skip0 < 0)
> > +        return skip0;
> > +    bytestream2_seek(gb, data_start + op_offset + skip0 - 12, SEEK_SET);
> > +    skip1 = dxv_decompress_opcodes(gb, op_data1, op_size1);
> > +    if (skip1 < 0)
> > +        return skip1;
> > +    bytestream2_seek(gb, data_start, SEEK_SET);
> > +
> > +    AV_WL32(dst, bytestream2_get_le32(gb));
> > +    AV_WL32(dst + 4, bytestream2_get_le32(gb));
> > +    AV_WL32(dst + 8, bytestream2_get_le32(gb));
> > +    AV_WL32(dst + 12, bytestream2_get_le32(gb));
> > +
> > +    tab0[0x9E3779B1 * AV_RL16(dst) >> 24] = dst;
> > +    tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFF) >> 24] = dst + 2;
> > +    tab2[0x9E3779B1 * AV_RL16(dst + 8) >> 24] = dst + 8;
> > +    tab3[0x9E3779B1 * (AV_RL32(dst + 10) & 0xFFFFFF) >> 24] = dst + 10;
> > +    dst += 16;
> > +    while (dst + 10 < tex_data + tex_size) {
> > +        ret = dxv_decompress_cgo(ctx, gb, tex_data, tex_size, op_data0,
> > &oi0, &dst, &state0, tab0, tab1);
> > +        if (ret < 0)
> > +            return ret;
> > +        ret = dxv_decompress_cgo(ctx, gb, tex_data, tex_size, op_data1,
> > &oi1, &dst, &state1, tab2, tab3);
> > +        if (ret < 0)
> > +            return ret;
> > +    }
> > +
> > +    bytestream2_seek(gb, data_start + op_offset + skip0 + skip1 - 12,
> > SEEK_SET);
> > +
> > +    return 0;
> > +}
> > +
> > +static int dxv_decompress_yo(DXVContext *ctx, GetByteContext *gb,
> > +                             uint8_t *tex_data, int tex_size, uint8_t
> > *op_data)
> > +{
> > +    int op_offset  = bytestream2_get_le32(gb);
> > +    int op_size    = bytestream2_get_le32(gb);
> > +    int data_start = bytestream2_tell(gb);
> > +    uint8_t *dst, *tptr0, *tptr1, *table0[256], *table1[256];
> > +    int opcode, skip, oi = 0, v, vv;
> > +
> > +    dst = tex_data;
> > +    bytestream2_skip(gb, op_offset - 8);
> > +    skip = dxv_decompress_opcodes(gb, op_data, op_size);
> > +    if (skip < 0)
> > +        return skip;
> > +    bytestream2_seek(gb, data_start, SEEK_SET);
> > +
> > +    v = bytestream2_get_le32(gb);
> > +    AV_WL32(dst, v);
> > +    vv = bytestream2_get_le32(gb);
> > +    table0[0x9E3779B1 * (uint16_t)v >> 24] = dst;
> > +    AV_WL32(dst + 4, vv);
> > +    table1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFF) >> 24] = dst + 2;
> > +    dst += 8;
> > +
> > +    while (dst < tex_data + tex_size) {
> > +        opcode = op_data[oi++];
> > +        if (opcode) {
> > +            switch (opcode) {
> > +            case 1:
> > +                AV_WL32(dst, AV_RL32(dst - 8));
> > +                AV_WL32(dst + 4, AV_RL32(dst - 4));
> > +                break;
> > +            case 2:
> > +                vv = 8 * (uint16_t)bytestream2_get_le16(gb) + 8;
> > +                if (vv > dst - tex_data)
> > +                    return AVERROR_INVALIDDATA;
> > +                tptr0 = dst - vv;
> > +                v = AV_RL32(tptr0);
> > +                AV_WL32(dst, AV_RL32(tptr0));
> > +                AV_WL32(dst + 4, AV_RL32(tptr0 + 4));
> > +                table0[0x9E3779B1 * (uint16_t)v >> 24] = dst;
> > +                table1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFU) >> 24]
> > = dst + 2;
> > +                break;
> > +            case 3:
> > +                v = bytestream2_get_le32(gb);
> > +                AV_WL32(dst, v);
> > +                AV_WL32(dst + 4, bytestream2_get_le32(gb));
> > +                vv = AV_RL32(dst + 2);
> > +                table0[0x9E3779B1 * (uint16_t)v >> 24] = dst;
> > +                table1[0x9E3779B1 * (vv & 0xFFFFFF) >> 24] = dst + 2;
> > +                break;
> > +            case 4:
> > +                tptr1 = table1[bytestream2_get_byte(gb)];
> > +                if (!tptr1)
> > +                    return AVERROR_INVALIDDATA;
> > +                AV_WL16(dst, bytestream2_get_le16(gb));
> > +                AV_WL16(dst + 2, AV_RL16(tptr1));
> > +                dst[4] = tptr1[2];
> > +                AV_WL16(dst + 5, bytestream2_get_le16(gb));
> > +                dst[7] = bytestream2_get_byte(gb);
> > +                v = 0x9E3779B1 * AV_RL16(dst) >> 24;
> > +                table1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24]
> > = dst + 2;
> > +                table0[v] = dst;
> > +                break;
> > +            case 5:
> > +                tptr1 = table1[bytestream2_get_byte(gb)];
> > +                if (!tptr1)
> > +                    return AVERROR_INVALIDDATA;
> > +                AV_WL16(dst, bytestream2_get_le16(gb));
> > +                AV_WL16(dst + 2, bytestream2_get_le16(gb));
> > +                dst[4] = bytestream2_get_byte(gb);
> > +                AV_WL16(dst + 5, AV_RL16(tptr1));
> > +                dst[7] = tptr1[2];
> > +                table0[0x9E3779B1 * AV_RL16(dst) >> 24] = dst;
> > +                table1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24]
> > = dst + 2;
> > +                break;
> > +            case 6:
> > +                tptr0 = table1[bytestream2_get_byte(gb)];
> > +                if (!tptr0)
> > +                    return AVERROR_INVALIDDATA;
> > +                tptr1 = table1[bytestream2_get_byte(gb)];
> > +                if (!tptr1)
> > +                    return AVERROR_INVALIDDATA;
> > +                AV_WL16(dst, bytestream2_get_le16(gb));
> > +                AV_WL16(dst + 2, AV_RL16(tptr0));
> > +                dst[4] = tptr0[2];
> > +                AV_WL16(dst + 5, AV_RL16(tptr1));
> > +                dst[7] = tptr1[2];
> > +                table0[0x9E3779B1 * AV_RL16(dst) >> 24] = dst;
> > +                break;
> > +            case 7:
> > +                v = 8 * (uint16_t)bytestream2_get_le16(gb) + 8;
> > +                if (v > dst - tex_data)
> > +                    return AVERROR_INVALIDDATA;
> > +                tptr0 = dst - v;
> > +                AV_WL16(dst, bytestream2_get_le16(gb));
> > +                AV_WL16(dst + 2, AV_RL16(tptr0 + 2));
> > +                AV_WL32(dst + 4, AV_RL32(tptr0 + 4));
> > +                table0[0x9E3779B1 * AV_RL16(dst) >> 24] = dst;
> > +                table1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24]
> > = dst + 2;
> > +                break;
> > +            case 8:
> > +                tptr0 = table0[bytestream2_get_byte(gb)];
> > +                if (!tptr0)
> > +                    return AVERROR_INVALIDDATA;
> > +                AV_WL16(dst, AV_RL16(tptr0));
> > +                AV_WL16(dst + 2, bytestream2_get_le16(gb));
> > +                AV_WL32(dst + 4, bytestream2_get_le32(gb));
> > +                table1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24]
> > = dst + 2;
> > +                break;
> > +            case 9:
> > +                tptr0 = table0[bytestream2_get_byte(gb)];
> > +                if (!tptr0)
> > +                    return AVERROR_INVALIDDATA;
> > +                AV_WL16(dst, AV_RL16(tptr0));
> > +                tptr1 = table1[bytestream2_get_byte(gb)];
> > +                if (!tptr1)
> > +                    return AVERROR_INVALIDDATA;
> > +                AV_WL16(dst + 2, AV_RL16(tptr1));
> > +                dst[4] = tptr1[2];
> > +                AV_WL16(dst + 5, bytestream2_get_le16(gb));
> > +                dst[7] = bytestream2_get_byte(gb);
> > +                table1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24]
> > = dst + 2;
> > +                break;
> > +            case 10:
> > +                tptr0 = table0[bytestream2_get_byte(gb)];
> > +                if (!tptr0)
> > +                    return AVERROR_INVALIDDATA;
> > +                AV_WL16(dst, AV_RL16(tptr0));
> > +                tptr1 = table1[bytestream2_get_byte(gb)];
> > +                AV_WL16(dst + 2, bytestream2_get_le16(gb));
> > +                dst[4] = bytestream2_get_byte(gb);
> > +                AV_WL16(dst + 5, AV_RL16(tptr1));
> > +                dst[7] = tptr1[2];
> > +                table1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24]
> > = dst + 2;
> > +                break;
> > +            case 11:
> > +                tptr0 = table0[bytestream2_get_byte(gb)];
> > +                if (!tptr0)
> > +                    return AVERROR_INVALIDDATA;
> > +                AV_WL16(dst, AV_RL16(tptr0));
> > +                tptr0 = table1[bytestream2_get_byte(gb)];
> > +                if (!tptr0)
> > +                    return AVERROR_INVALIDDATA;
> > +                tptr1 = table1[bytestream2_get_byte(gb)];
> > +                if (!tptr1)
> > +                    return AVERROR_INVALIDDATA;
> > +                AV_WL16(dst + 2, AV_RL16(tptr0));
> > +                dst[4] = tptr0[2];
> > +                AV_WL16(dst + 5, AV_RL16(tptr1));
> > +                dst[7] = tptr1[2];
> > +                break;
> > +            case 12:
> > +                tptr0 = table0[bytestream2_get_byte(gb)];
> > +                if (!tptr0)
> > +                    return AVERROR_INVALIDDATA;
> > +                AV_WL16(dst, AV_RL16(tptr0));
> > +                tptr0 = dst - (8 * bytestream2_get_le16(gb) + 8);
> > +                AV_WL16(dst + 2, AV_RL16(tptr0 + 2));
> > +                AV_WL32(dst + 4, AV_RL32(tptr0 + 4));
> > +                table1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFF) >> 24]
> > = dst + 2;
> > +                break;
> > +            case 13:
> > +                AV_WL16(dst, AV_RL16(dst - 8));
> > +                AV_WL16(dst + 2, bytestream2_get_le16(gb));
> > +                AV_WL32(dst + 4, bytestream2_get_le32(gb));
> > +                table1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFF) >> 24]
> > = dst + 2;
> > +                break;
> > +            case 14:
> > +                tptr1 = table1[bytestream2_get_byte(gb)];
> > +                if (!tptr1)
> > +                    return AVERROR_INVALIDDATA;
> > +                AV_WL16(dst, AV_RL16(dst - 8));
> > +                AV_WL16(dst + 2, AV_RL16(tptr1));
> > +                dst[4] = tptr1[2];
> > +                AV_WL16(dst + 5, bytestream2_get_le16(gb));
> > +                dst[7] = bytestream2_get_byte(gb);
> > +                table1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFF) >> 24]
> > = dst + 2;
> > +                break;
> > +            case 15:
> > +                tptr1 = table1[bytestream2_get_byte(gb)];
> > +                if (!tptr1)
> > +                    return AVERROR_INVALIDDATA;
> > +                AV_WL16(dst, AV_RL16(dst - 8));
> > +                AV_WL16(dst + 2, bytestream2_get_le16(gb));
> > +                dst[4] = bytestream2_get_byte(gb);
> > +                AV_WL16(dst + 5, AV_RL16(tptr1));
> > +                dst[7] = tptr1[2];
> > +                table1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24]
> > = dst + 2;
> > +                break;
> > +            case 16:
> > +                tptr0 = table1[bytestream2_get_byte(gb)];
> > +                if (!tptr0)
> > +                    return AVERROR_INVALIDDATA;
> > +                tptr1 = table1[bytestream2_get_byte(gb)];
> > +                if (!tptr1)
> > +                    return AVERROR_INVALIDDATA;
> > +                AV_WL16(dst, AV_RL16(dst - 8));
> > +                AV_WL16(dst + 2, AV_RL16(tptr0));
> > +                dst[4] = tptr0[2];
> > +                AV_WL16(dst + 5, AV_RL16(tptr1));
> > +                dst[7] = tptr1[2];
> > +                break;
> > +            case 17:
> > +                v = 8 * bytestream2_get_le16(gb) + 8;
> > +                if (v > dst - tex_data)
> > +                    return AVERROR_INVALIDDATA;
> > +                tptr0 = dst - v;
> > +                AV_WL16(dst, AV_RL16(dst - 8));
> > +                AV_WL16(dst + 2, AV_RL16(tptr0 + 2));
> > +                AV_WL32(dst + 4, AV_RL32(tptr0 + 4));
> > +                table1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24]
> > = dst + 2;
> > +                break;
> > +            default:
> > +                break;
> > +            }
> > +            dst += 8;
> > +        } else {
> > +            v = bytestream2_get_byte(gb);
> > +            if (v == 255) {
> > +                do {
> > +                    if (bytestream2_get_bytes_left(gb) <= 0)
> > +                        return AVERROR_INVALIDDATA;
> > +                    opcode = bytestream2_get_le16(gb);
> > +                    v += opcode;
> > +                } while (opcode == 0xFFFF);
> > +            }
> > +            vv = v + 4;
> > +            do {
> > +                AV_WL32(dst, AV_RL32(dst - 8));
> > +                AV_WL32(dst + 4, AV_RL32(dst - 4));
> > +                dst += 8;
> > +                --vv;
> > +            } while (vv);
> > +        }
> > +    }
> > +
> > +    bytestream2_seek(gb, data_start + op_offset + skip - 8, SEEK_SET);
> > +
> > +    return 0;
> > +}
> > +
> > +static int dxv_decompress_ycg6(AVCodecContext *avctx)
> > +{
> > +    DXVContext *ctx = avctx->priv_data;
> > +    GetByteContext *gb = &ctx->gbc;
> > +    int ret;
> > +
> > +    ret = dxv_decompress_yo(ctx, gb, ctx->tex_data, ctx->tex_size,
> > ctx->op_data[0]);
> > +    if (ret < 0)
> > +        return ret;
> > +
> > +    return dxv_decompress_cocg(ctx, gb, ctx->ctex_data, ctx->ctex_size,
> > ctx->op_data[1], ctx->op_data[2]);
> > +}
> > +
> > +static int dxv_decompress_yg10(AVCodecContext *avctx)
> > +{
> > +    DXVContext *ctx = avctx->priv_data;
> > +    GetByteContext *gb = &ctx->gbc;
> > +    int ret;
> > +
> > +    ret = dxv_decompress_cocg(ctx, gb, ctx->tex_data, ctx->tex_size,
> > ctx->op_data[0], ctx->op_data[3]);
> > +    if (ret < 0)
> > +        return ret;
> > +
> > +    return dxv_decompress_cocg(ctx, gb, ctx->ctex_data, ctx->ctex_size,
> > ctx->op_data[1], ctx->op_data[2]);
> > +}
> > +
> >  static int dxv_decompress_dxt5(AVCodecContext *avctx)
> >  {
> >      DXVContext *ctx = avctx->priv_data;
> > @@ -359,6 +1257,9 @@ static int dxv_decode(AVCodecContext *avctx, void
> > *data,
> >
> >      bytestream2_init(gbc, avpkt->data, avpkt->size);
> >
> > +    ctx->texture_block_h = 4;
> > +    ctx->texture_block_w = 4;
> > +
> >      tag = bytestream2_get_le32(gbc);
> >      switch (tag) {
> >      case MKBETAG('D', 'X', 'T', '1'):
> > @@ -378,9 +1279,35 @@ static int dxv_decode(AVCodecContext *avctx, void
> > *data,
> >          msgtext = "DXT5";
> >          break;
> >      case MKBETAG('Y', 'C', 'G', '6'):
> > +        decompress_tex = dxv_decompress_ycg6;
> > +        ctx->tex_funct = yo_block;
> > +        ctx->ctex_funct = cocg_block;
> > +        ctx->tex_rat   = 8;
> > +        ctx->tex_step  = 32;
> > +        ctx->ctex_step = 16;
> > +        msgcomp = "YOCOCG6";
> > +        msgtext = "YCG6";
> > +        ctx->ctex_size = avctx->coded_width * avctx->coded_height / 4;
> > +        ctx->texture_block_h = 4;
> > +        ctx->texture_block_w = 16;
> > +        ctx->ctexture_block_h = 8;
> > +        ctx->ctexture_block_w = 8;
> > +        break;
> >      case MKBETAG('Y', 'G', '1', '0'):
> > -        avpriv_report_missing_feature(avctx, "Tag 0x%08"PRIX32, tag);
> > -        return AVERROR_PATCHWELCOME;
> > +        decompress_tex = dxv_decompress_yg10;
> > +        ctx->tex_funct = yao_block;
> > +        ctx->ctex_funct = cocg_block;
> > +        ctx->tex_rat   = 4;
> > +        ctx->tex_step  = 64;
> > +        ctx->ctex_step = 16;
> > +        msgcomp = "YAOCOCG10";
> > +        msgtext = "YG10";
> > +        ctx->ctex_size = avctx->coded_width * avctx->coded_height / 4;
> > +        ctx->texture_block_h = 4;
> > +        ctx->texture_block_w = 16;
> > +        ctx->ctexture_block_h = 8;
> > +        ctx->ctexture_block_w = 8;
> > +        break;
> >      default:
> >          /* Old version does not have a real header, just size and type. */
> >          size = tag & 0x00FFFFFF;
> > @@ -413,6 +1340,10 @@ static int dxv_decode(AVCodecContext *avctx, void
> > *data,
> >          break;
> >      }
> >
> > +    ctx->slice_count = av_clip(avctx->thread_count, 1,
> > +                               avctx->coded_height /
> > FFMAX(ctx->texture_block_h,
> > +
> >  ctx->ctexture_block_h));
> > +
> >      /* New header is 12 bytes long. */
> >      if (!old_type) {
> >          version_major = bytestream2_get_byte(gbc) - 1;
> > @@ -444,6 +1375,24 @@ static int dxv_decode(AVCodecContext *avctx, void
> > *data,
> >      if (ret < 0)
> >          return ret;
> >
> > +    if (ctx->ctex_size) {
> > +        int i;
> > +
> > +        ctx->op_size[0] = avctx->coded_width * avctx->coded_height / 16;
> > +        ctx->op_size[1] = avctx->coded_width * avctx->coded_height / 32;
> > +        ctx->op_size[2] = avctx->coded_width * avctx->coded_height / 32;
> > +        ctx->op_size[3] = avctx->coded_width * avctx->coded_height / 16;
> > +
> > +        ret = av_reallocp(&ctx->ctex_data, ctx->ctex_size);
> > +        if (ret < 0)
> > +            return ret;
> > +        for (i = 0; i < 4; i++) {
> > +            ret = av_reallocp(&ctx->op_data[i], ctx->op_size[i]);
> > +            if (ret < 0)
> > +                return ret;
> > +        }
> > +    }
> > +
> >      /* Decompress texture out of the intermediate compression. */
> >      ret = decompress_tex(avctx);
> >      if (ret < 0)
> > @@ -484,9 +1433,6 @@ static int dxv_init(AVCodecContext *avctx)
> >      ff_texturedsp_init(&ctx->texdsp);
> >      avctx->pix_fmt = AV_PIX_FMT_RGBA;
> >
> > -    ctx->slice_count = av_clip(avctx->thread_count, 1,
> > -                               avctx->coded_height / TEXTURE_BLOCK_H);
> > -
> >      return 0;
> >  }
> >
> > @@ -495,6 +1441,10 @@ static int dxv_close(AVCodecContext *avctx)
> >      DXVContext *ctx = avctx->priv_data;
> >
> >      av_freep(&ctx->tex_data);
> > +    av_freep(&ctx->ctex_data);
> > +    av_freep(&ctx->op_data[0]);
> > +    av_freep(&ctx->op_data[1]);
> > +    av_freep(&ctx->op_data[2]);
> >
> >      return 0;
> >  }
> > --
> > 2.11.0
> >
> > _______________________________________________
> > ffmpeg-devel mailing list
> > ffmpeg-devel@ffmpeg.org
> > http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> >  
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel

What is the purpose of not shortening irrelevant quoted text?
diff mbox

Patch

diff --git a/libavcodec/dxv.c b/libavcodec/dxv.c
index 529e211258..6308163735 100644
--- a/libavcodec/dxv.c
+++ b/libavcodec/dxv.c
@@ -1,6 +1,7 @@ 
 /*
  * Resolume DXV decoder
  * Copyright (C) 2015 Vittorio Giovara <vittorio.giovara@gmail.com>
+ * Copyright (C) 2018 Paul B Mahol
  *
  * This file is part of FFmpeg.
  *
@@ -23,6 +24,7 @@ 
 
 #include "libavutil/imgutils.h"
 
+#include "mathops.h"
 #include "avcodec.h"
 #include "bytestream.h"
 #include "internal.h"
@@ -34,53 +36,250 @@  typedef struct DXVContext {
     TextureDSPContext texdsp;
     GetByteContext gbc;
 
-    uint8_t *tex_data;  // Compressed texture
-    int tex_rat;        // Compression ratio
-    int tex_step;       // Distance between blocks
-    int64_t tex_size;   // Texture size
+    uint8_t *tex_data;   // Compressed texture
+    uint8_t *ctex_data;  // Compressed texture
+    int tex_rat;         // Compression ratio
+    int tex_step;        // Distance between blocks
+    int ctex_step;       // Distance between blocks
+    int64_t tex_size;    // Texture size
+    int64_t ctex_size;   // Texture size
 
     /* Optimal number of slices for parallel decoding */
     int slice_count;
 
+    uint8_t *op_data[4]; // Opcodes
+    int64_t op_size[4];  // Opcodes size
+
+    int texture_block_w;
+    int texture_block_h;
+
+    int ctexture_block_w;
+    int ctexture_block_h;
+
     /* Pointer to the selected decompression function */
     int (*tex_funct)(uint8_t *dst, ptrdiff_t stride, const uint8_t *block);
+    int (*ctex_funct)(uint8_t *dst, ptrdiff_t stride, const uint8_t *block);
 } DXVContext;
 
+static void decompress_indices(uint8_t *dst, const uint8_t *src)
+{
+    int block, i;
+
+    for (block = 0; block < 2; block++) {
+        int tmp = AV_RL24(src);
+
+        /* Unpack 8x3 bit from last 3 byte block */
+        for (i = 0; i < 8; i++)
+            dst[i] = (tmp >> (i * 3)) & 0x7;
+
+        src += 3;
+        dst += 8;
+    }
+}
+
+static int extract_component(int yo0, int yo1, int code)
+{
+    int yo;
+
+    if (yo0 == yo1) {
+        yo = yo0;
+    } else if (code == 0) {
+        yo = yo0;
+    } else if (code == 1) {
+        yo = yo1;
+    } else {
+        if (yo0 > yo1) {
+            yo = (uint8_t) (((8 - code) * yo0 +
+                             (code - 1) * yo1) / 7);
+        } else {
+            if (code == 6) {
+                yo = 0;
+            } else if (code == 7) {
+                yo = 255;
+            } else {
+                yo = (uint8_t) (((6 - code) * yo0 +
+                                 (code - 1) * yo1) / 5);
+            }
+        }
+    }
+
+    return yo;
+}
+
+static av_always_inline uint32_t yacocg2rgba(int yo, int co, int cg, int a)
+{
+    int r, g, b;
+
+    co = co - 127;
+    cg = cg - 127;
+
+    r = av_clip_uint8(yo + co - cg);
+    g = av_clip_uint8(yo + cg);
+    b = av_clip_uint8(yo - co - cg);
+
+    return (a << 24) | (b << 16) | (g << 8) | (r);
+}
+
+static int cocg_block(uint8_t *dst, ptrdiff_t stride,
+                      const uint8_t *block)
+{
+    uint8_t co_indices[16];
+    uint8_t cg_indices[16];
+    uint8_t co0 = *(block);
+    uint8_t co1 = *(block + 1);
+    uint8_t cg0 = *(block + 8);
+    uint8_t cg1 = *(block + 9);
+    int x, y;
+
+    decompress_indices(co_indices, block + 2);
+    decompress_indices(cg_indices, block + 10);
+
+    for (y = 0; y < 4; y++) {
+        for (x = 0; x < 4; x++) {
+            int co_code = co_indices[x + y * 4];
+            int cg_code = cg_indices[x + y * 4];
+            uint8_t co, cg;
+
+            co = extract_component(co0, co1, co_code);
+            cg = extract_component(cg0, cg1, cg_code);
+
+            dst[x * 8 + 1] = co;
+            dst[x * 8 + 2] = cg;
+            dst[x * 8 + 5] = co;
+            dst[x * 8 + 6] = cg;
+            dst[x * 8 + stride + 1] = co;
+            dst[x * 8 + stride + 2] = cg;
+            dst[x * 8 + stride + 5] = co;
+            dst[x * 8 + stride + 6] = cg;
+        }
+        dst += 2 * stride;
+    }
+
+    return 16;
+}
+
+static void yo_subblock(uint8_t *dst, uint8_t *yo_indices,
+                        ptrdiff_t stride, const uint8_t *block)
+{
+    uint8_t yo0 = *(block);
+    uint8_t yo1 = *(block + 1);
+    int x, y;
+
+    decompress_indices(yo_indices, block + 2);
+
+    for (y = 0; y < 4; y++) {
+        for (x = 0; x < 4; x++) {
+            int yo_code = yo_indices[x + y * 4];
+            uint8_t yo;
+
+            yo = extract_component(yo0, yo1, yo_code);
+
+            AV_WL32(dst + x * 4, 255u << 24 | yo);
+        }
+        dst += stride;
+    }
+}
+
+static int yo_block(uint8_t *dst, ptrdiff_t stride,
+                    const uint8_t *block)
+{
+    uint8_t yo_indices[16];
+
+    yo_subblock(dst,      yo_indices, stride, block);
+    yo_subblock(dst + 16, yo_indices, stride, block + 8);
+    yo_subblock(dst + 32, yo_indices, stride, block + 16);
+    yo_subblock(dst + 48, yo_indices, stride, block + 24);
+
+    return 32;
+}
+
+static void a_subblock(uint8_t *dst, uint8_t *a_indices,
+                       ptrdiff_t stride, const uint8_t *block)
+{
+    uint8_t a0 = *(block);
+    uint8_t a1 = *(block + 1);
+    int x, y;
+
+    decompress_indices(a_indices, block + 2);
+
+    for (y = 0; y < 4; y++) {
+        for (x = 0; x < 4; x++) {
+            int a_code = a_indices[x + y * 4];
+            uint8_t a;
+
+            a = extract_component(a0, a1, a_code);
+            dst[x * 4 + 3] = a;
+        }
+        dst += stride;
+    }
+}
+
+static int yao_block(uint8_t *dst, ptrdiff_t stride,
+                    const uint8_t *block)
+{
+    uint8_t yo_indices[16];
+    uint8_t a_indices[16];
+
+    yo_subblock(dst,      yo_indices, stride, block);
+    a_subblock(dst,       a_indices,  stride, block + 8);
+    yo_subblock(dst + 16, yo_indices, stride, block + 16);
+    a_subblock(dst  + 16, a_indices,  stride, block + 24);
+    yo_subblock(dst + 32, yo_indices, stride, block + 32);
+    a_subblock(dst  + 32, a_indices,  stride, block + 40);
+    yo_subblock(dst + 48, yo_indices, stride, block + 48);
+    a_subblock(dst  + 48, a_indices,  stride, block + 56);
+
+    return 64;
+}
+
 static int decompress_texture_thread(AVCodecContext *avctx, void *arg,
                                      int slice, int thread_nb)
 {
     DXVContext *ctx = avctx->priv_data;
     AVFrame *frame = arg;
     const uint8_t *d = ctx->tex_data;
-    int w_block = avctx->coded_width / TEXTURE_BLOCK_W;
-    int h_block = avctx->coded_height / TEXTURE_BLOCK_H;
+    const uint8_t *c = ctx->ctex_data;
+    int w_block = avctx->coded_width / ctx->texture_block_w;
+    int h_block = avctx->coded_height / ctx->texture_block_h;
     int x, y;
     int start_slice, end_slice;
-    int base_blocks_per_slice = h_block / ctx->slice_count;
-    int remainder_blocks = h_block % ctx->slice_count;
-
-    /* When the frame height (in blocks) doesn't divide evenly between the
-     * number of slices, spread the remaining blocks evenly between the first
-     * operations */
-    start_slice = slice * base_blocks_per_slice;
-    /* Add any extra blocks (one per slice) that have been added
-     * before this slice */
-    start_slice += FFMIN(slice, remainder_blocks);
-
-    end_slice = start_slice + base_blocks_per_slice;
-    /* Add an extra block if there are remainder blocks to be accounted for */
-    if (slice < remainder_blocks)
-        end_slice++;
+
+    start_slice = h_block * slice / ctx->slice_count;
+    end_slice = h_block * (slice + 1) / ctx->slice_count;
 
     for (y = start_slice; y < end_slice; y++) {
-        uint8_t *p = frame->data[0] + y * frame->linesize[0] * TEXTURE_BLOCK_H;
+        uint8_t *p = frame->data[0] + y * frame->linesize[0] * ctx->texture_block_h;
         int off  = y * w_block;
         for (x = 0; x < w_block; x++) {
-            ctx->tex_funct(p + x * 16, frame->linesize[0],
+            ctx->tex_funct(p + x * 4 * ctx->texture_block_w, frame->linesize[0],
                            d + (off + x) * ctx->tex_step);
         }
     }
 
+    if (ctx->ctex_funct) {
+        w_block = avctx->coded_width / ctx->ctexture_block_w;
+        h_block = avctx->coded_height / ctx->ctexture_block_h;
+
+        start_slice = h_block * slice / ctx->slice_count;
+        end_slice = h_block * (slice + 1) / ctx->slice_count;
+
+        for (y = start_slice; y < end_slice; y++) {
+            uint8_t *p = frame->data[0] + y * frame->linesize[0] * ctx->ctexture_block_h;
+            int off  = y * w_block;
+            for (x = 0; x < w_block; x++) {
+                ctx->ctex_funct(p + x * 4 * ctx->ctexture_block_w, frame->linesize[0],
+                                c + (off + x) * ctx->ctex_step);
+            }
+        }
+        for (y = start_slice * ctx->ctexture_block_h; y < FFMIN(end_slice * ctx->ctexture_block_h, avctx->height); y++) {
+            uint8_t *p8 = frame->data[0] + y * frame->linesize[0];
+            uint32_t *p = (uint32_t *)p8;
+            for (x = 0; x < avctx->width; x++) {
+                p[x] = yacocg2rgba(p8[4 * x], p8[4 * x + 1], p8[4 * x + 2], p8[4 * x + 3]);
+            }
+        }
+    }
+
     return 0;
 }
 
@@ -169,6 +368,705 @@  static int dxv_decompress_dxt1(AVCodecContext *avctx)
     return 0;
 }
 
+typedef struct OpcodeTable {
+    int16_t next;
+    uint8_t val1;
+    uint8_t val2;
+} OpcodeTable;
+
+static int fill_ltable(GetByteContext *gb, uint32_t *table, int *nb_elements)
+{
+    unsigned half = 512, bits = 1023, left = 1024, input, mask;
+    int value, counter = 0, rshift = 10, lshift = 30;
+
+    mask = bytestream2_get_le32(gb) >> 2;
+    while (left) {
+        if (bytestream2_get_bytes_left(gb) < 0 || counter >= 256)
+            return AVERROR_INVALIDDATA;
+
+        value = bits & mask;
+        left -= bits & mask;
+        mask >>= rshift;
+        lshift -= rshift;
+        table[counter++] = value;
+        if (lshift < 16) {
+            input = bytestream2_get_le16(gb);
+            mask += input << lshift;
+            lshift += 16;
+        }
+        if (left < half) {
+            half >>= 1;
+            bits >>= 1;
+            rshift--;
+        }
+    }
+
+    for (; !table[counter - 1]; counter--)
+        if (counter <= 0)
+            return AVERROR_INVALIDDATA;
+
+    *nb_elements = counter;
+
+    if (counter < 256)
+        memset(&table[counter], 0, 4 * (256 - counter));
+
+    if (lshift >= 16)
+        bytestream2_seek(gb, -2, SEEK_CUR);
+
+    return 0;
+}
+
+static int fill_optable(unsigned *table0, OpcodeTable *table1, int nb_elements)
+{
+    unsigned table2[256];
+    unsigned x = 0;
+    int val0, val1, i, j = 2, k = 0;
+
+    table2[0] = table0[0];
+    for (i = 0; i < nb_elements - 1; i++, table2[i] = val0) {
+        val0 = table0[i + 1] + table2[i];
+    }
+
+    if (!table2[0]) {
+        do {
+            k++;
+        } while (!table2[k]);
+    }
+
+    j = 2;
+    for (i = 1024; i > 0; i--) {
+        for (table1[x].val1 = k; k < 256 && j > table2[k]; k++)
+            ;
+        x = (x - 383) & 0x3FF;
+        j++;
+    }
+
+    if (nb_elements > 0)
+        memcpy(&table2[0], table0, 4 * nb_elements);
+
+    for (i = 0; i < 1024; i++) {
+        val0 = table1[i].val1;
+        val1 = table2[val0];
+        table2[val0]++;
+        x = 31 - ff_clz(val1);
+        if (x > 10)
+            return AVERROR_INVALIDDATA;
+        table1[i].val2 = 10 - x;
+        table1[i].next = (val1 << table1[i].val2) - 1024;
+    }
+
+    return 0;
+}
+
+static int get_opcodes(GetByteContext *gb, uint32_t *table, uint8_t *dst, int op_size, int nb_elements)
+{
+    OpcodeTable optable[1024];
+    int sum, x, val, lshift, rshift, ret, size_in_bits, i, idx;
+    int endoffset, newoffset, offset;
+    unsigned next;
+    uint8_t *src = (uint8_t *)gb->buffer;
+
+    ret = fill_optable(table, optable, nb_elements);
+    if (ret < 0)
+        return ret;
+
+    size_in_bits = bytestream2_get_le32(gb);
+    endoffset = ((size_in_bits + 7) >> 3) - 4;
+    if (endoffset <= 0 || bytestream2_get_bytes_left(gb) < endoffset)
+        return AVERROR_INVALIDDATA;
+
+    offset = endoffset;
+    next = AV_RL32(src + endoffset);
+    rshift = (((size_in_bits & 0xFF) - 1) & 7) + 15;
+    lshift = 32 - rshift;
+    idx = (next >> rshift) & 0x3FF;
+    for (i = 0; i < op_size; i++) {
+        dst[i] = optable[idx].val1;
+        val = optable[idx].val2;
+        sum = val + lshift;
+        x = (next << lshift) >> 1 >> (31 - val);
+        newoffset = offset - (sum >> 3);
+        lshift = sum & 7;
+        idx = x + optable[idx].next;
+        offset = newoffset;
+        next = AV_RL32(src + offset);
+    }
+
+    bytestream2_skip(gb, (size_in_bits + 7 >> 3) - 4);
+
+    return 0;
+}
+
+static int dxv_decompress_opcodes(GetByteContext *gb, void *dstp, size_t op_size)
+{
+    int pos = bytestream2_tell(gb);
+    int flag = bytestream2_peek_byte(gb);
+
+    if ((flag & 3) == 0) {
+        bytestream2_skip(gb, 1);
+        bytestream2_get_buffer(gb, dstp, op_size);
+    } else if ((flag & 3) == 1) {
+        bytestream2_skip(gb, 1);
+        memset(dstp, bytestream2_get_byte(gb), op_size);
+    } else {
+        uint32_t table[256];
+        int ret, elements = 0;
+
+        ret = fill_ltable(gb, table, &elements);
+        if (ret < 0)
+            return ret;
+        ret = get_opcodes(gb, table, dstp, op_size, elements);
+        if (ret < 0)
+            return ret;
+    }
+    return bytestream2_tell(gb) - pos;
+}
+
+static int dxv_decompress_cgo(DXVContext *ctx, GetByteContext *gb,
+                              uint8_t *tex_data, int tex_size,
+                              uint8_t *op_data, int *oindex,
+                              uint8_t **dstp, int *statep,
+                              uint8_t **tab0, uint8_t **tab1)
+{
+    uint8_t *dst = *dstp;
+    uint8_t *tptr0, *tptr1, *tptr3;
+    int oi = *oindex;
+    int state = *statep;
+    int opcode, v, vv;
+
+    if (state <= 0) {
+        opcode = op_data[oi++];
+        if (opcode) {
+            switch (opcode) {
+            case 1:
+                AV_WL32(dst, AV_RL32(dst - 16));
+                AV_WL32(dst + 4, AV_RL32(dst - 12));
+                break;
+            case 2:
+                vv = 16 * ((uint16_t)bytestream2_get_le16(gb) + 1);
+                if (vv < 0 || vv > dst - tex_data)
+                    return AVERROR_INVALIDDATA;
+                tptr0 = dst - vv;
+                v = AV_RL32(tptr0);
+                AV_WL32(dst, AV_RL32(tptr0));
+                AV_WL32(dst + 4, AV_RL32(tptr0 + 4));
+                tab0[0x9E3779B1 * (uint16_t)v >> 24] = dst;
+                tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] = dst + 2;
+                break;
+            case 3:
+                AV_WL32(dst, bytestream2_get_le32(gb));
+                AV_WL32(dst + 4, bytestream2_get_le32(gb));
+                tab0[0x9E3779B1 * AV_RL16(dst) >> 24] = dst;
+                tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] = dst + 2;
+                break;
+            case 4:
+                tptr3 = tab1[bytestream2_get_byte(gb)];
+                AV_WL16(dst, bytestream2_get_le16(gb));
+                AV_WL16(dst + 2, AV_RL16(tptr3));
+                dst[4] = tptr3[2];
+                AV_WL16(dst + 5, bytestream2_get_le16(gb));
+                dst[7] = bytestream2_get_byte(gb);
+                tab0[0x9E3779B1 * AV_RL16(dst) >> 24] = dst;
+                break;
+            case 5:
+                tptr3 = tab1[bytestream2_get_byte(gb)];
+                if (!tptr3)
+                    return AVERROR_INVALIDDATA;
+                AV_WL16(dst, bytestream2_get_le16(gb));
+                AV_WL16(dst + 2, bytestream2_get_le16(gb));
+                dst[4] = bytestream2_get_byte(gb);
+                AV_WL16(dst + 5, AV_RL16(tptr3));
+                dst[7] = tptr3[2];
+                tab0[0x9E3779B1 * AV_RL16(dst) >> 24] = dst;
+                tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] = dst + 2;
+                break;
+            case 6:
+                tptr0 = tab1[bytestream2_get_byte(gb)];
+                if (!tptr0)
+                    return AVERROR_INVALIDDATA;
+                tptr1 = tab1[bytestream2_get_byte(gb)];
+                if (!tptr1)
+                    return AVERROR_INVALIDDATA;
+                AV_WL16(dst, bytestream2_get_le16(gb));
+                AV_WL16(dst + 2, AV_RL16(tptr0));
+                dst[4] = tptr0[2];
+                AV_WL16(dst + 5, AV_RL16(tptr1));
+                dst[7] = tptr1[2];
+                tab0[0x9E3779B1 * AV_RL16(dst) >> 24] = dst;
+                break;
+            case 7:
+                v = 16 * ((uint16_t)bytestream2_get_le16(gb) + 1);
+                if (v < 0 || v > dst - tex_data)
+                    return AVERROR_INVALIDDATA;
+                tptr0 = dst - v;
+                AV_WL16(dst, bytestream2_get_le16(gb));
+                AV_WL16(dst + 2, AV_RL16(tptr0 + 2));
+                AV_WL32(dst + 4, AV_RL32(tptr0 + 4));
+                tab0[0x9E3779B1 * AV_RL16(dst) >> 24] = dst;
+                tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] = dst + 2;
+                break;
+            case 8:
+                tptr1 = tab0[bytestream2_get_byte(gb)];
+                if (!tptr1)
+                    return AVERROR_INVALIDDATA;
+                AV_WL16(dst, AV_RL16(tptr1));
+                AV_WL16(dst + 2, bytestream2_get_le16(gb));
+                AV_WL32(dst + 4, bytestream2_get_le32(gb));
+                tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] = dst + 2;
+                break;
+            case 9:
+                tptr1 = tab0[bytestream2_get_byte(gb)];
+                if (!tptr1)
+                    return AVERROR_INVALIDDATA;
+                tptr3 = tab1[bytestream2_get_byte(gb)];
+                if (!tptr3)
+                    return AVERROR_INVALIDDATA;
+                AV_WL16(dst, AV_RL16(tptr1));
+                AV_WL16(dst + 2, AV_RL16(tptr3));
+                dst[4] = tptr3[2];
+                AV_WL16(dst + 5, bytestream2_get_le16(gb));
+                dst[7] = bytestream2_get_byte(gb);
+                tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] = dst + 2;
+                break;
+            case 10:
+                tptr1 = tab0[bytestream2_get_byte(gb)];
+                if (!tptr1)
+                    return AVERROR_INVALIDDATA;
+                tptr3 = tab1[bytestream2_get_byte(gb)];
+                if (!tptr3)
+                    return AVERROR_INVALIDDATA;
+                AV_WL16(dst, AV_RL16(tptr1));
+                AV_WL16(dst + 2, bytestream2_get_le16(gb));
+                dst[4] = bytestream2_get_byte(gb);
+                AV_WL16(dst + 5, AV_RL16(tptr3));
+                dst[7] = tptr3[2];
+                tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] = dst + 2;
+                break;
+            case 11:
+                tptr0 = tab0[bytestream2_get_byte(gb)];
+                if (!tptr0)
+                    return AVERROR_INVALIDDATA;
+                tptr3 = tab1[bytestream2_get_byte(gb)];
+                if (!tptr3)
+                    return AVERROR_INVALIDDATA;
+                tptr1 = tab1[bytestream2_get_byte(gb)];
+                if (!tptr1)
+                    return AVERROR_INVALIDDATA;
+                AV_WL16(dst, AV_RL16(tptr0));
+                AV_WL16(dst + 2, AV_RL16(tptr3));
+                dst[4] = tptr3[2];
+                AV_WL16(dst + 5, AV_RL16(tptr1));
+                dst[7] = tptr1[2];
+                break;
+            case 12:
+                tptr1 = tab0[bytestream2_get_byte(gb)];
+                if (!tptr1)
+                    return AVERROR_INVALIDDATA;
+                v = 16 * ((uint16_t)bytestream2_get_le16(gb) + 1);
+                if (v < 0 || v > dst - tex_data)
+                    return AVERROR_INVALIDDATA;
+                tptr0 = dst - v;
+                AV_WL16(dst, AV_RL16(tptr1));
+                AV_WL16(dst + 2, AV_RL16(tptr0 + 2));
+                AV_WL32(dst + 4, AV_RL32(tptr0 + 4));
+                tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] = dst + 2;
+                break;
+            case 13:
+                AV_WL16(dst, AV_RL16(dst - 16));
+                AV_WL16(dst + 2, bytestream2_get_le16(gb));
+                AV_WL32(dst + 4, bytestream2_get_le32(gb));
+                tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] = dst + 2;
+                break;
+            case 14:
+                tptr3 = tab1[bytestream2_get_byte(gb)];
+                if (!tptr3)
+                    return AVERROR_INVALIDDATA;
+                AV_WL16(dst, AV_RL16(dst - 16));
+                AV_WL16(dst + 2, AV_RL16(tptr3));
+                dst[4] = tptr3[2];
+                AV_WL16(dst + 5, bytestream2_get_le16(gb));
+                dst[7] = bytestream2_get_byte(gb);
+                tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] = dst + 2;
+                break;
+            case 15:
+                tptr3 = tab1[bytestream2_get_byte(gb)];
+                if (!tptr3)
+                    return AVERROR_INVALIDDATA;
+                AV_WL16(dst, AV_RL16(dst - 16));
+                AV_WL16(dst + 2, bytestream2_get_le16(gb));
+                dst[4] = bytestream2_get_byte(gb);
+                AV_WL16(dst + 5, AV_RL16(tptr3));
+                dst[7] = tptr3[2];
+                tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] = dst + 2;
+                break;
+            case 16:
+                tptr3 = tab1[bytestream2_get_byte(gb)];
+                if (!tptr3)
+                    return AVERROR_INVALIDDATA;
+                tptr1 = tab1[bytestream2_get_byte(gb)];
+                if (!tptr1)
+                    return AVERROR_INVALIDDATA;
+                AV_WL16(dst, AV_RL16(dst - 16));
+                AV_WL16(dst + 2, AV_RL16(tptr3));
+                dst[4] = tptr3[2];
+                AV_WL16(dst + 5, AV_RL16(tptr1));
+                dst[7] = tptr1[2];
+                break;
+            case 17:
+                v = 16 * ((uint16_t)bytestream2_get_le16(gb) + 1);
+                if (v < 0 || v > dst - tex_data)
+                    return AVERROR_INVALIDDATA;
+                AV_WL16(dst, AV_RL16(dst - 16));
+                AV_WL16(dst + 2, AV_RL16(&dst[-v + 2]));
+                AV_WL32(dst + 4, AV_RL32(&dst[-v + 4]));
+                tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] = dst + 2;
+                break;
+            default:
+                break;
+            }
+        } else {
+            v = bytestream2_get_byte(gb);
+            if (v == 255) {
+                do {
+                    if (bytestream2_get_bytes_left(gb) <= 0)
+                        return AVERROR_INVALIDDATA;
+                    opcode = bytestream2_get_le16(gb);
+                    v += opcode;
+                } while (opcode == 0xFFFF);
+            }
+            AV_WL32(dst, AV_RL32(dst - 16));
+            AV_WL32(dst + 4, AV_RL32(dst - 12));
+            state = v + 3;
+        }
+    } else {
+        AV_WL32(dst, AV_RL32(dst - 16));
+        AV_WL32(dst + 4, AV_RL32(dst - 12));
+        state--;
+    }
+    dst += 8;
+
+    *oindex = oi;
+    *dstp = dst;
+    *statep = state;
+
+    return 0;
+}
+
+static int dxv_decompress_cocg(DXVContext *ctx, GetByteContext *gb,
+                               uint8_t *tex_data, int tex_size,
+                               uint8_t *op_data0, uint8_t *op_data1)
+{
+    uint8_t *dst, *tab2[256], *tab0[256], *tab3[256], *tab1[256];
+    int op_offset = bytestream2_get_le32(gb);
+    int op_size0 = bytestream2_get_le32(gb);
+    int op_size1 = bytestream2_get_le32(gb);
+    int data_start = bytestream2_tell(gb);
+    int skip0, skip1, oi0 = 0, oi1 = 0;
+    int ret, state0 = 0, state1 = 0;
+
+    dst = tex_data;
+    bytestream2_skip(gb, op_offset - 12);
+    skip0 = dxv_decompress_opcodes(gb, op_data0, op_size0);
+    if (skip0 < 0)
+        return skip0;
+    bytestream2_seek(gb, data_start + op_offset + skip0 - 12, SEEK_SET);
+    skip1 = dxv_decompress_opcodes(gb, op_data1, op_size1);
+    if (skip1 < 0)
+        return skip1;
+    bytestream2_seek(gb, data_start, SEEK_SET);
+
+    AV_WL32(dst, bytestream2_get_le32(gb));
+    AV_WL32(dst + 4, bytestream2_get_le32(gb));
+    AV_WL32(dst + 8, bytestream2_get_le32(gb));
+    AV_WL32(dst + 12, bytestream2_get_le32(gb));
+
+    tab0[0x9E3779B1 * AV_RL16(dst) >> 24] = dst;
+    tab1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFF) >> 24] = dst + 2;
+    tab2[0x9E3779B1 * AV_RL16(dst + 8) >> 24] = dst + 8;
+    tab3[0x9E3779B1 * (AV_RL32(dst + 10) & 0xFFFFFF) >> 24] = dst + 10;
+    dst += 16;
+    while (dst + 10 < tex_data + tex_size) {
+        ret = dxv_decompress_cgo(ctx, gb, tex_data, tex_size, op_data0, &oi0, &dst, &state0, tab0, tab1);
+        if (ret < 0)
+            return ret;
+        ret = dxv_decompress_cgo(ctx, gb, tex_data, tex_size, op_data1, &oi1, &dst, &state1, tab2, tab3);
+        if (ret < 0)
+            return ret;
+    }
+
+    bytestream2_seek(gb, data_start + op_offset + skip0 + skip1 - 12, SEEK_SET);
+
+    return 0;
+}
+
+static int dxv_decompress_yo(DXVContext *ctx, GetByteContext *gb,
+                             uint8_t *tex_data, int tex_size, uint8_t *op_data)
+{
+    int op_offset  = bytestream2_get_le32(gb);
+    int op_size    = bytestream2_get_le32(gb);
+    int data_start = bytestream2_tell(gb);
+    uint8_t *dst, *tptr0, *tptr1, *table0[256], *table1[256];
+    int opcode, skip, oi = 0, v, vv;
+
+    dst = tex_data;
+    bytestream2_skip(gb, op_offset - 8);
+    skip = dxv_decompress_opcodes(gb, op_data, op_size);
+    if (skip < 0)
+        return skip;
+    bytestream2_seek(gb, data_start, SEEK_SET);
+
+    v = bytestream2_get_le32(gb);
+    AV_WL32(dst, v);
+    vv = bytestream2_get_le32(gb);
+    table0[0x9E3779B1 * (uint16_t)v >> 24] = dst;
+    AV_WL32(dst + 4, vv);
+    table1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFF) >> 24] = dst + 2;
+    dst += 8;
+
+    while (dst < tex_data + tex_size) {
+        opcode = op_data[oi++];
+        if (opcode) {
+            switch (opcode) {
+            case 1:
+                AV_WL32(dst, AV_RL32(dst - 8));
+                AV_WL32(dst + 4, AV_RL32(dst - 4));
+                break;
+            case 2:
+                vv = 8 * (uint16_t)bytestream2_get_le16(gb) + 8;
+                if (vv > dst - tex_data)
+                    return AVERROR_INVALIDDATA;
+                tptr0 = dst - vv;
+                v = AV_RL32(tptr0);
+                AV_WL32(dst, AV_RL32(tptr0));
+                AV_WL32(dst + 4, AV_RL32(tptr0 + 4));
+                table0[0x9E3779B1 * (uint16_t)v >> 24] = dst;
+                table1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFU) >> 24] = dst + 2;
+                break;
+            case 3:
+                v = bytestream2_get_le32(gb);
+                AV_WL32(dst, v);
+                AV_WL32(dst + 4, bytestream2_get_le32(gb));
+                vv = AV_RL32(dst + 2);
+                table0[0x9E3779B1 * (uint16_t)v >> 24] = dst;
+                table1[0x9E3779B1 * (vv & 0xFFFFFF) >> 24] = dst + 2;
+                break;
+            case 4:
+                tptr1 = table1[bytestream2_get_byte(gb)];
+                if (!tptr1)
+                    return AVERROR_INVALIDDATA;
+                AV_WL16(dst, bytestream2_get_le16(gb));
+                AV_WL16(dst + 2, AV_RL16(tptr1));
+                dst[4] = tptr1[2];
+                AV_WL16(dst + 5, bytestream2_get_le16(gb));
+                dst[7] = bytestream2_get_byte(gb);
+                v = 0x9E3779B1 * AV_RL16(dst) >> 24;
+                table1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] = dst + 2;
+                table0[v] = dst;
+                break;
+            case 5:
+                tptr1 = table1[bytestream2_get_byte(gb)];
+                if (!tptr1)
+                    return AVERROR_INVALIDDATA;
+                AV_WL16(dst, bytestream2_get_le16(gb));
+                AV_WL16(dst + 2, bytestream2_get_le16(gb));
+                dst[4] = bytestream2_get_byte(gb);
+                AV_WL16(dst + 5, AV_RL16(tptr1));
+                dst[7] = tptr1[2];
+                table0[0x9E3779B1 * AV_RL16(dst) >> 24] = dst;
+                table1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] = dst + 2;
+                break;
+            case 6:
+                tptr0 = table1[bytestream2_get_byte(gb)];
+                if (!tptr0)
+                    return AVERROR_INVALIDDATA;
+                tptr1 = table1[bytestream2_get_byte(gb)];
+                if (!tptr1)
+                    return AVERROR_INVALIDDATA;
+                AV_WL16(dst, bytestream2_get_le16(gb));
+                AV_WL16(dst + 2, AV_RL16(tptr0));
+                dst[4] = tptr0[2];
+                AV_WL16(dst + 5, AV_RL16(tptr1));
+                dst[7] = tptr1[2];
+                table0[0x9E3779B1 * AV_RL16(dst) >> 24] = dst;
+                break;
+            case 7:
+                v = 8 * (uint16_t)bytestream2_get_le16(gb) + 8;
+                if (v > dst - tex_data)
+                    return AVERROR_INVALIDDATA;
+                tptr0 = dst - v;
+                AV_WL16(dst, bytestream2_get_le16(gb));
+                AV_WL16(dst + 2, AV_RL16(tptr0 + 2));
+                AV_WL32(dst + 4, AV_RL32(tptr0 + 4));
+                table0[0x9E3779B1 * AV_RL16(dst) >> 24] = dst;
+                table1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] = dst + 2;
+                break;
+            case 8:
+                tptr0 = table0[bytestream2_get_byte(gb)];
+                if (!tptr0)
+                    return AVERROR_INVALIDDATA;
+                AV_WL16(dst, AV_RL16(tptr0));
+                AV_WL16(dst + 2, bytestream2_get_le16(gb));
+                AV_WL32(dst + 4, bytestream2_get_le32(gb));
+                table1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] = dst + 2;
+                break;
+            case 9:
+                tptr0 = table0[bytestream2_get_byte(gb)];
+                if (!tptr0)
+                    return AVERROR_INVALIDDATA;
+                AV_WL16(dst, AV_RL16(tptr0));
+                tptr1 = table1[bytestream2_get_byte(gb)];
+                if (!tptr1)
+                    return AVERROR_INVALIDDATA;
+                AV_WL16(dst + 2, AV_RL16(tptr1));
+                dst[4] = tptr1[2];
+                AV_WL16(dst + 5, bytestream2_get_le16(gb));
+                dst[7] = bytestream2_get_byte(gb);
+                table1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] = dst + 2;
+                break;
+            case 10:
+                tptr0 = table0[bytestream2_get_byte(gb)];
+                if (!tptr0)
+                    return AVERROR_INVALIDDATA;
+                AV_WL16(dst, AV_RL16(tptr0));
+                tptr1 = table1[bytestream2_get_byte(gb)];
+                AV_WL16(dst + 2, bytestream2_get_le16(gb));
+                dst[4] = bytestream2_get_byte(gb);
+                AV_WL16(dst + 5, AV_RL16(tptr1));
+                dst[7] = tptr1[2];
+                table1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] = dst + 2;
+                break;
+            case 11:
+                tptr0 = table0[bytestream2_get_byte(gb)];
+                if (!tptr0)
+                    return AVERROR_INVALIDDATA;
+                AV_WL16(dst, AV_RL16(tptr0));
+                tptr0 = table1[bytestream2_get_byte(gb)];
+                if (!tptr0)
+                    return AVERROR_INVALIDDATA;
+                tptr1 = table1[bytestream2_get_byte(gb)];
+                if (!tptr1)
+                    return AVERROR_INVALIDDATA;
+                AV_WL16(dst + 2, AV_RL16(tptr0));
+                dst[4] = tptr0[2];
+                AV_WL16(dst + 5, AV_RL16(tptr1));
+                dst[7] = tptr1[2];
+                break;
+            case 12:
+                tptr0 = table0[bytestream2_get_byte(gb)];
+                if (!tptr0)
+                    return AVERROR_INVALIDDATA;
+                AV_WL16(dst, AV_RL16(tptr0));
+                tptr0 = dst - (8 * bytestream2_get_le16(gb) + 8);
+                AV_WL16(dst + 2, AV_RL16(tptr0 + 2));
+                AV_WL32(dst + 4, AV_RL32(tptr0 + 4));
+                table1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFF) >> 24] = dst + 2;
+                break;
+            case 13:
+                AV_WL16(dst, AV_RL16(dst - 8));
+                AV_WL16(dst + 2, bytestream2_get_le16(gb));
+                AV_WL32(dst + 4, bytestream2_get_le32(gb));
+                table1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFF) >> 24] = dst + 2;
+                break;
+            case 14:
+                tptr1 = table1[bytestream2_get_byte(gb)];
+                if (!tptr1)
+                    return AVERROR_INVALIDDATA;
+                AV_WL16(dst, AV_RL16(dst - 8));
+                AV_WL16(dst + 2, AV_RL16(tptr1));
+                dst[4] = tptr1[2];
+                AV_WL16(dst + 5, bytestream2_get_le16(gb));
+                dst[7] = bytestream2_get_byte(gb);
+                table1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFF) >> 24] = dst + 2;
+                break;
+            case 15:
+                tptr1 = table1[bytestream2_get_byte(gb)];
+                if (!tptr1)
+                    return AVERROR_INVALIDDATA;
+                AV_WL16(dst, AV_RL16(dst - 8));
+                AV_WL16(dst + 2, bytestream2_get_le16(gb));
+                dst[4] = bytestream2_get_byte(gb);
+                AV_WL16(dst + 5, AV_RL16(tptr1));
+                dst[7] = tptr1[2];
+                table1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] = dst + 2;
+                break;
+            case 16:
+                tptr0 = table1[bytestream2_get_byte(gb)];
+                if (!tptr0)
+                    return AVERROR_INVALIDDATA;
+                tptr1 = table1[bytestream2_get_byte(gb)];
+                if (!tptr1)
+                    return AVERROR_INVALIDDATA;
+                AV_WL16(dst, AV_RL16(dst - 8));
+                AV_WL16(dst + 2, AV_RL16(tptr0));
+                dst[4] = tptr0[2];
+                AV_WL16(dst + 5, AV_RL16(tptr1));
+                dst[7] = tptr1[2];
+                break;
+            case 17:
+                v = 8 * bytestream2_get_le16(gb) + 8;
+                if (v > dst - tex_data)
+                    return AVERROR_INVALIDDATA;
+                tptr0 = dst - v;
+                AV_WL16(dst, AV_RL16(dst - 8));
+                AV_WL16(dst + 2, AV_RL16(tptr0 + 2));
+                AV_WL32(dst + 4, AV_RL32(tptr0 + 4));
+                table1[0x9E3779B1 * (AV_RL32(dst + 2) & 0xFFFFFFu) >> 24] = dst + 2;
+                break;
+            default:
+                break;
+            }
+            dst += 8;
+        } else {
+            v = bytestream2_get_byte(gb);
+            if (v == 255) {
+                do {
+                    if (bytestream2_get_bytes_left(gb) <= 0)
+                        return AVERROR_INVALIDDATA;
+                    opcode = bytestream2_get_le16(gb);
+                    v += opcode;
+                } while (opcode == 0xFFFF);
+            }
+            vv = v + 4;
+            do {
+                AV_WL32(dst, AV_RL32(dst - 8));
+                AV_WL32(dst + 4, AV_RL32(dst - 4));
+                dst += 8;
+                --vv;
+            } while (vv);
+        }
+    }
+
+    bytestream2_seek(gb, data_start + op_offset + skip - 8, SEEK_SET);
+
+    return 0;
+}
+
+static int dxv_decompress_ycg6(AVCodecContext *avctx)
+{
+    DXVContext *ctx = avctx->priv_data;
+    GetByteContext *gb = &ctx->gbc;
+    int ret;
+
+    ret = dxv_decompress_yo(ctx, gb, ctx->tex_data, ctx->tex_size, ctx->op_data[0]);
+    if (ret < 0)
+        return ret;
+
+    return dxv_decompress_cocg(ctx, gb, ctx->ctex_data, ctx->ctex_size, ctx->op_data[1], ctx->op_data[2]);
+}
+
+static int dxv_decompress_yg10(AVCodecContext *avctx)
+{
+    DXVContext *ctx = avctx->priv_data;
+    GetByteContext *gb = &ctx->gbc;
+    int ret;
+
+    ret = dxv_decompress_cocg(ctx, gb, ctx->tex_data, ctx->tex_size, ctx->op_data[0], ctx->op_data[3]);
+    if (ret < 0)
+        return ret;
+
+    return dxv_decompress_cocg(ctx, gb, ctx->ctex_data, ctx->ctex_size, ctx->op_data[1], ctx->op_data[2]);
+}
+
 static int dxv_decompress_dxt5(AVCodecContext *avctx)
 {
     DXVContext *ctx = avctx->priv_data;
@@ -359,6 +1257,9 @@  static int dxv_decode(AVCodecContext *avctx, void *data,
 
     bytestream2_init(gbc, avpkt->data, avpkt->size);
 
+    ctx->texture_block_h = 4;
+    ctx->texture_block_w = 4;
+
     tag = bytestream2_get_le32(gbc);
     switch (tag) {
     case MKBETAG('D', 'X', 'T', '1'):
@@ -378,9 +1279,35 @@  static int dxv_decode(AVCodecContext *avctx, void *data,
         msgtext = "DXT5";
         break;
     case MKBETAG('Y', 'C', 'G', '6'):
+        decompress_tex = dxv_decompress_ycg6;
+        ctx->tex_funct = yo_block;
+        ctx->ctex_funct = cocg_block;
+        ctx->tex_rat   = 8;
+        ctx->tex_step  = 32;
+        ctx->ctex_step = 16;
+        msgcomp = "YOCOCG6";
+        msgtext = "YCG6";
+        ctx->ctex_size = avctx->coded_width * avctx->coded_height / 4;
+        ctx->texture_block_h = 4;
+        ctx->texture_block_w = 16;
+        ctx->ctexture_block_h = 8;
+        ctx->ctexture_block_w = 8;
+        break;
     case MKBETAG('Y', 'G', '1', '0'):
-        avpriv_report_missing_feature(avctx, "Tag 0x%08"PRIX32, tag);
-        return AVERROR_PATCHWELCOME;
+        decompress_tex = dxv_decompress_yg10;
+        ctx->tex_funct = yao_block;
+        ctx->ctex_funct = cocg_block;
+        ctx->tex_rat   = 4;
+        ctx->tex_step  = 64;
+        ctx->ctex_step = 16;
+        msgcomp = "YAOCOCG10";
+        msgtext = "YG10";
+        ctx->ctex_size = avctx->coded_width * avctx->coded_height / 4;
+        ctx->texture_block_h = 4;
+        ctx->texture_block_w = 16;
+        ctx->ctexture_block_h = 8;
+        ctx->ctexture_block_w = 8;
+        break;
     default:
         /* Old version does not have a real header, just size and type. */
         size = tag & 0x00FFFFFF;
@@ -413,6 +1340,10 @@  static int dxv_decode(AVCodecContext *avctx, void *data,
         break;
     }
 
+    ctx->slice_count = av_clip(avctx->thread_count, 1,
+                               avctx->coded_height / FFMAX(ctx->texture_block_h,
+                                                           ctx->ctexture_block_h));
+
     /* New header is 12 bytes long. */
     if (!old_type) {
         version_major = bytestream2_get_byte(gbc) - 1;
@@ -444,6 +1375,24 @@  static int dxv_decode(AVCodecContext *avctx, void *data,
     if (ret < 0)
         return ret;
 
+    if (ctx->ctex_size) {
+        int i;
+
+        ctx->op_size[0] = avctx->coded_width * avctx->coded_height / 16;
+        ctx->op_size[1] = avctx->coded_width * avctx->coded_height / 32;
+        ctx->op_size[2] = avctx->coded_width * avctx->coded_height / 32;
+        ctx->op_size[3] = avctx->coded_width * avctx->coded_height / 16;
+
+        ret = av_reallocp(&ctx->ctex_data, ctx->ctex_size);
+        if (ret < 0)
+            return ret;
+        for (i = 0; i < 4; i++) {
+            ret = av_reallocp(&ctx->op_data[i], ctx->op_size[i]);
+            if (ret < 0)
+                return ret;
+        }
+    }
+
     /* Decompress texture out of the intermediate compression. */
     ret = decompress_tex(avctx);
     if (ret < 0)
@@ -484,9 +1433,6 @@  static int dxv_init(AVCodecContext *avctx)
     ff_texturedsp_init(&ctx->texdsp);
     avctx->pix_fmt = AV_PIX_FMT_RGBA;
 
-    ctx->slice_count = av_clip(avctx->thread_count, 1,
-                               avctx->coded_height / TEXTURE_BLOCK_H);
-
     return 0;
 }
 
@@ -495,6 +1441,10 @@  static int dxv_close(AVCodecContext *avctx)
     DXVContext *ctx = avctx->priv_data;
 
     av_freep(&ctx->tex_data);
+    av_freep(&ctx->ctex_data);
+    av_freep(&ctx->op_data[0]);
+    av_freep(&ctx->op_data[1]);
+    av_freep(&ctx->op_data[2]);
 
     return 0;
 }