[FFmpeg-devel] Fix for paletteuse to support transparency

Submitted by Bjorn Roche on Oct. 6, 2017, 8:59 p.m.

Details

Message ID 20171006205955.93052-1-bjorn@giphy.com
State New
Headers show

Commit Message

Bjorn Roche Oct. 6, 2017, 8:59 p.m.
---
 libavfilter/vf_paletteuse.c | 175 ++++++++++++++++++++++++++++----------------
 1 file changed, 112 insertions(+), 63 deletions(-)

Comments

Carl Eugen Hoyos Oct. 6, 2017, 9:23 p.m.
2017-10-06 22:59 GMT+02:00 Bjorn Roche <bjorn@giphy.com>:

>  libavfilter/vf_paletteuse.c | 175

Works fine for me, passes fate, needs a review from Clément.

Do you think an option to set the value of the transparent
colour to something else than 0xFF00 would make sense?
And/or another default?
You can use ffplay on an output png file to test if it's not
clear what I mean.

Thank you, Carl Eugen
Clément Bœsch Oct. 7, 2017, 11:48 a.m.
> Subject: Re: [FFmpeg-devel] [PATCH] Fix for paletteuse to support transparency

"lavf/paletteuse: add support for transparency"

On Fri, Oct 06, 2017 at 04:59:55PM -0400, Bjorn Roche wrote:
> ---
>  libavfilter/vf_paletteuse.c | 175 ++++++++++++++++++++++++++++----------------
>  1 file changed, 112 insertions(+), 63 deletions(-)
> 
> diff --git a/libavfilter/vf_paletteuse.c b/libavfilter/vf_paletteuse.c
> index ffd37bf1da..4203543843 100644
> --- a/libavfilter/vf_paletteuse.c
> +++ b/libavfilter/vf_paletteuse.c
> @@ -56,7 +56,7 @@ enum diff_mode {
>  };
>  
>  struct color_node {
> -    uint8_t val[3];
> +    uint8_t val[4];
>      uint8_t palette_id;
>      int split;
>      int left_id, right_id;
> @@ -86,6 +86,7 @@ typedef struct PaletteUseContext {
>      struct cache_node cache[CACHE_SIZE];    /* lookup cache */
>      struct color_node map[AVPALETTE_COUNT]; /* 3D-Tree (KD-Tree with K=3) for reverse colormap */
>      uint32_t palette[AVPALETTE_COUNT];
> +    int transparency_index; /* index in the palette of transparency. -1 if there isn't a transparency. */
>      int palette_loaded;
>      int dither;
>      int new;
> @@ -108,6 +109,7 @@ typedef struct PaletteUseContext {
>  #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
>  static const AVOption paletteuse_options[] = {
>      { "dither", "select dithering mode", OFFSET(dither), AV_OPT_TYPE_INT, {.i64=DITHERING_SIERRA2_4A}, 0, NB_DITHERING-1, FLAGS, "dithering_mode" },

> +        { "none",            "no dither",                                                              0, AV_OPT_TYPE_CONST, {.i64=DITHERING_NONE},            INT_MIN, INT_MAX, FLAGS, "dithering_mode" },

I think none is already supported as builtin in AVOption, isn't it? In any
case, this should probably be in a separated patch if deemed useful.

>          { "bayer",           "ordered 8x8 bayer dithering (deterministic)",                            0, AV_OPT_TYPE_CONST, {.i64=DITHERING_BAYER},           INT_MIN, INT_MAX, FLAGS, "dithering_mode" },
>          { "heckbert",        "dithering as defined by Paul Heckbert in 1982 (simple error diffusion)", 0, AV_OPT_TYPE_CONST, {.i64=DITHERING_HECKBERT},        INT_MIN, INT_MAX, FLAGS, "dithering_mode" },
>          { "floyd_steinberg", "Floyd and Steingberg dithering (error diffusion)",                       0, AV_OPT_TYPE_CONST, {.i64=DITHERING_FLOYD_STEINBERG}, INT_MIN, INT_MAX, FLAGS, "dithering_mode" },
> @@ -157,7 +159,8 @@ static int query_formats(AVFilterContext *ctx)
>  
>  static av_always_inline int dither_color(uint32_t px, int er, int eg, int eb, int scale, int shift)
>  {
> -    return av_clip_uint8((px >> 16 & 0xff) + ((er * scale) / (1<<shift))) << 16

> +    return av_clip_uint8((px >> 24 & 0xff)                              ) << 24

Here and several times later, I believe you can drop the `& 0xff`.

> +         | av_clip_uint8((px >> 16 & 0xff) + ((er * scale) / (1<<shift))) << 16
>           | av_clip_uint8((px >>  8 & 0xff) + ((eg * scale) / (1<<shift))) <<  8
>           | av_clip_uint8((px       & 0xff) + ((eb * scale) / (1<<shift)));
>  }
> @@ -165,10 +168,18 @@ static av_always_inline int dither_color(uint32_t px, int er, int eg, int eb, in
>  static av_always_inline int diff(const uint8_t *c1, const uint8_t *c2)
>  {
>      // XXX: try L*a*b with CIE76 (dL*dL + da*da + db*db)
> -    const int dr = c1[0] - c2[0];
> -    const int dg = c1[1] - c2[1];
> -    const int db = c1[2] - c2[2];
> -    return dr*dr + dg*dg + db*db;
> +    const static int max_diff = 255*255 + 255*255 + 255*255;
> +    const int dr = c1[1] - c2[1];
> +    const int dg = c1[2] - c2[2];
> +    const int db = c1[3] - c2[3];
> +
> +    if (c1[0] == 0 && c2[0] == 0) {
> +        return 0;
> +    } else if (c1[0] == c2[0]) {
> +        return dr*dr + dg*dg + db*db;
> +    } else {
> +        return max_diff;
> +    }

So if both alpha are 0, you consider the color identical, and otherwise if
both alpha are different, you consider the color completely different?

I guess that's OK. Please inline 255*255 + 255*255 + 255*255 in the return
though, I don't trust compilers into optimizing that static int.

>  }
>  
>  static av_always_inline uint8_t colormap_nearest_bruteforce(const uint32_t *palette, const uint8_t *rgb)
> @@ -179,18 +190,20 @@ static av_always_inline uint8_t colormap_nearest_bruteforce(const uint32_t *pale
>          const uint32_t c = palette[i];
>  
>          if ((c & 0xff000000) == 0xff000000) { // ignore transparent entry
> -            const uint8_t palrgb[] = {
> +            const uint8_t palargb[] = {
> +                palette[i]>>24 & 0xff,

according to the condition just above, this is always 0xff. Is this what
you want?

>                  palette[i]>>16 & 0xff,
>                  palette[i]>> 8 & 0xff,
>                  palette[i]     & 0xff,
>              };
> -            const int d = diff(palrgb, rgb);
> +            const int d = diff(palargb, rgb);
>              if (d < min_dist) {
>                  pal_id = i;
>                  min_dist = d;
>              }
>          }
>      }

> +

unrelated change

>      return pal_id;
>  }
>  
> @@ -325,14 +338,15 @@ end:
>   * Note: r, g, and b are the component of c but are passed as well to avoid
>   * recomputing them (they are generally computed by the caller for other uses).
>   */

This comment probably needs adjustment for `a` (and yes, it refers to `c`
instead of `color`, this needs to be adjusted too).

> -static av_always_inline int color_get(struct cache_node *cache, uint32_t color,
> -                                      uint8_t r, uint8_t g, uint8_t b,

> +static av_always_inline int color_get(struct cache_node *cache, uint32_t argb,

I'd prefer if you'd keep "color" instead of "argb", it would reduce the
diff.

> +                                      uint8_t a, uint8_t r, uint8_t g, uint8_t b,
> +                                      int transparency_index,
>                                        const struct color_node *map,
>                                        const uint32_t *palette,
>                                        const enum color_search_method search_method)
>  {
>      int i;
> -    const uint8_t rgb[] = {r, g, b};

> +    const uint8_t argb_elts[] = {a, r, g, b};

elts?

>      const uint8_t rhash = r & ((1<<NBITS)-1);
>      const uint8_t ghash = g & ((1<<NBITS)-1);
>      const uint8_t bhash = b & ((1<<NBITS)-1);
> @@ -340,9 +354,14 @@ static av_always_inline int color_get(struct cache_node *cache, uint32_t color,
>      struct cache_node *node = &cache[hash];
>      struct cached_color *e;
>  
> +    // first, check for transparency
> +    if (a == 0 && transparency_index >= 0) {
> +        return transparency_index;
> +    }
> +
>      for (i = 0; i < node->nb_entries; i++) {
>          e = &node->entries[i];
> -        if (e->color == color)
> +        if (e->color == argb)
>              return e->pal_entry;
>      }
>  
> @@ -350,21 +369,24 @@ static av_always_inline int color_get(struct cache_node *cache, uint32_t color,
>                           sizeof(*node->entries), NULL);
>      if (!e)
>          return AVERROR(ENOMEM);
> -    e->color = color;
> -    e->pal_entry = COLORMAP_NEAREST(search_method, palette, map, rgb);
> +    e->color = argb;
> +    e->pal_entry = COLORMAP_NEAREST(search_method, palette, map, argb_elts);
> +
>      return e->pal_entry;
>  }
>  
>  static av_always_inline int get_dst_color_err(struct cache_node *cache,

> -                                              uint32_t c, const struct color_node *map,
> +                                              uint32_t argb, const struct color_node *map,

ditto, please keep c

>                                                const uint32_t *palette,
> +                                              int transparency_index,
>                                                int *er, int *eg, int *eb,
>                                                const enum color_search_method search_method)
[...]
> -    i = 0;
> +    pal_index = 0;
>      for (y = 0; y < palette_frame->height; y++) {
> -        for (x = 0; x < palette_frame->width; x++)
> -            s->palette[i++] = p[x];
> +        for (x = 0; x < palette_frame->width; x++) {
> +            s->palette[pal_index] = p[x];
> +            if ((p[x]>>24 & 0xff) == 0) {
> +                s->transparency_index = pal_index; // we are assuming at most one transparent color
> +            }
> +            ++pal_index;

please just keep `i` as in the original code to keep diff small.

> +        }
>          p += p_linesize;
>      }
>  
> -    load_colormap(s);

> +    load_colormap(s, pal_index);

Why do you need to pass that `pal_index` as `nb_colors`; in what situation
do you end up with nb_colors ≠ AVPALETTE_COUNT?

[...]

Overall this looks pretty fine, but I didn't test yet.

I have a few concerns that may or may not be justified:

- I hope RGB32 pix fmt always set the alpha to 0xff in the rest of the
  framework, otherwise this is going to break pretty hard. We may need to
  add some extra check at some point if we find "too much" transparency
  colors.
- I don't remember if diff should be linear somehow in the kdtree
  algorithm (I think that was originally the reason I didn't  use L*a*b
  instead of RGB, but I might be wrong).
- I'll have to check all the debug options to make sure something didn't
  go wrong.

So I'll test all of this at your next iteration.

Thanks for working on this.
Clément Bœsch Oct. 7, 2017, 11:49 a.m.
On Sat, Oct 07, 2017 at 01:48:16PM +0200, Clément Bœsch wrote:
> > Subject: Re: [FFmpeg-devel] [PATCH] Fix for paletteuse to support transparency
> 
> "lavf/paletteuse: add support for transparency"
> 

Sorry, "lavfi", not "lavf".

Also, can you add the trac ticket reference for this?

[...]
Carl Eugen Hoyos Oct. 7, 2017, 1:53 p.m.
2017-10-06 23:23 GMT+02:00 Carl Eugen Hoyos <ceffmpeg@gmail.com>:
> 2017-10-06 22:59 GMT+02:00 Bjorn Roche <bjorn@giphy.com>:
>
>>  libavfilter/vf_paletteuse.c | 175
>
> Works fine for me

I had tested with the sample from ticket #503 yesterday.

Using the following sample:
http://samples.ffmpeg.org/FLV/flash_with_alpha/300x180-Scr-f8-056alpha.flv
I get less nice results:
$ ffmpeg -i 300x180-Scr-f8-056alpha.flv -vframes 1 -vf palettegen pal.png
$ ffmpeg -i 300x180-Scr-f8-056alpha.flv -i pal.png -lavfi paletteuse
-vframes 1 out.png
Transparency in the output file is not correct, I wonder if the line
"if (c1[0] == 0 && c2[0] == 0" should be more something like
"if (c1[0] < 128 && c2[0] < 128)" (or with a threshold variable).

The results get worse with more frames and apng output:
Large blocks "disappear" from the output file, this looks
quite nice without your patch (but without transparency).

Thank you, Carl Eugen
Bjorn Roche Oct. 10, 2017, 7:48 p.m.
On Fri, Oct 6, 2017 at 5:23 PM, Carl Eugen Hoyos <ceffmpeg@gmail.com> wrote:

> 2017-10-06 22:59 GMT+02:00 Bjorn Roche <bjorn@giphy.com>:
>
> >  libavfilter/vf_paletteuse.c | 175
>
> Works fine for me, passes fate, needs a review from Clément.
>
> Do you think an option to set the value of the transparent
> colour to something else than 0xFF00 would make sense?
> And/or another default?
> You can use ffplay on an output png file to test if it's not
> clear what I mean.
>

I am pretty sure this is an issue in palettegen, since the 0x0000ff00 is
there as well.
Bjorn Roche Oct. 10, 2017, 8:12 p.m.
On Sat, Oct 7, 2017 at 7:48 AM, Clément Bœsch <u@pkh.me> wrote:

> > Subject: Re: [FFmpeg-devel] [PATCH] Fix for paletteuse to support
> transparency
>
> "lavf/paletteuse: add support for transparency"
>
> On Fri, Oct 06, 2017 at 04:59:55PM -0400, Bjorn Roche wrote:
> > ---
>

<snip>


> > +        { "none",            "no dither",
>                                 0, AV_OPT_TYPE_CONST,
> {.i64=DITHERING_NONE},            INT_MIN, INT_MAX, FLAGS, "dithering_mode"
> },
>
> I think none is already supported as builtin in AVOption, isn't it? In any
> case, this should probably be in a separated patch if deemed useful.
>

I'm not sure how that works, but apparently so.



> <snip>
> > +    if (c1[0] == 0 && c2[0] == 0) {
> > +        return 0;
> > +    } else if (c1[0] == c2[0]) {
> > +        return dr*dr + dg*dg + db*db;
> > +    } else {
> > +        return max_diff;
> > +    }
>
> So if both alpha are 0, you consider the color identical, and otherwise if
> both alpha are different, you consider the color completely different?
>

I don't see another clear solution.

<snip>

> >          if ((c & 0xff000000) == 0xff000000) { // ignore transparent
> entry
> > -            const uint8_t palrgb[] = {
> > +            const uint8_t palargb[] = {
> > +                palette[i]>>24 & 0xff,
>
> according to the condition just above, this is always 0xff. Is this what
> you want?
>

There is some ambiguity about dealing with transparency from the incoming
image, and I will have to change that condition anyway (more later).


<snip>

>
> >      return pal_id;
> >  }
> >
> > @@ -325,14 +338,15 @@ end:
> >   * Note: r, g, and b are the component of c but are passed as well to
> avoid
> >   * recomputing them (they are generally computed by the caller for
> other uses).
> >   */
>
> This comment probably needs adjustment for `a` (and yes, it refers to `c`
> instead of `color`, this needs to be adjusted too).
>
> > -static av_always_inline int color_get(struct cache_node *cache,
> uint32_t color,
> > -                                      uint8_t r, uint8_t g, uint8_t b,
>
> > +static av_always_inline int color_get(struct cache_node *cache,
> uint32_t argb,
>
> I'd prefer if you'd keep "color" instead of "argb", it would reduce the
> diff.
>

Changing the variable name was not accidental. "c" and "color" are
ambiguous when converting from rgb to indexed, and this patch no longer
works with rgb, but argb, so I believe being explicit is better. I was very
confused trying to follow which c/color variables where what when diving
into this code.


>
> > +                                      uint8_t a, uint8_t r, uint8_t g,
> uint8_t b,
> > +                                      int transparency_index,
> >                                        const struct color_node *map,
> >                                        const uint32_t *palette,
> >                                        const enum color_search_method
> search_method)
> >  {
> >      int i;
> > -    const uint8_t rgb[] = {r, g, b};
>
> > +    const uint8_t argb_elts[] = {a, r, g, b};
>
> elts?
>

this variable is the argb color, with elements (elts) separated into an
array. It is necessary for the diff function. Is there another variable
name you would prefer?


>
> > +        }
> >          p += p_linesize;
> >      }
> >
> > -    load_colormap(s);
>
> > +    load_colormap(s, pal_index);
>
> Why do you need to pass that `pal_index` as `nb_colors`; in what situation
> do you end up with nb_colors ≠ AVPALETTE_COUNT?
>

Sorry, that's an artifact of a prior version.


>
> [...]
>
> Overall this looks pretty fine, but I didn't test yet.
>
> I have a few concerns that may or may not be justified:
>
> - I hope RGB32 pix fmt always set the alpha to 0xff in the rest of the
>   framework, otherwise this is going to break pretty hard. We may need to
>   add some extra check at some point if we find "too much" transparency
>   colors.
>

I can't speak to this except for the testing I've done, and FATE.


> - I don't remember if diff should be linear somehow in the kdtree
>   algorithm (I think that was originally the reason I didn't  use L*a*b
>   instead of RGB, but I might be wrong).
>

I don't see anything to this effect, but that's part of the reason I kept
transparent colors out of the tree.

- I'll have to check all the debug options to make sure something didn't
>   go wrong.
>
Bjorn Roche Oct. 10, 2017, 8:16 p.m.
On Sat, Oct 7, 2017 at 7:49 AM, Clément Bœsch <u@pkh.me> wrote:

> On Sat, Oct 07, 2017 at 01:48:16PM +0200, Clément Bœsch wrote:
> > > Subject: Re: [FFmpeg-devel] [PATCH] Fix for paletteuse to support
> transparency
> >
> > "lavf/paletteuse: add support for transparency"
> >
>
> Sorry, "lavfi", not "lavf".
>
> Also, can you add the trac ticket reference for this?
>
>
Do you mean this bug?

https://trac.ffmpeg.org/ticket/4443

I'm not sure I consider this a fix, since animated gifs will still be
broken.
Bjorn Roche Oct. 10, 2017, 8:23 p.m.
On Sat, Oct 7, 2017 at 9:53 AM, Carl Eugen Hoyos <ceffmpeg@gmail.com> wrote:

> 2017-10-06 23:23 GMT+02:00 Carl Eugen Hoyos <ceffmpeg@gmail.com>:
> > 2017-10-06 22:59 GMT+02:00 Bjorn Roche <bjorn@giphy.com>:
> >
> >>  libavfilter/vf_paletteuse.c | 175
> >
> > Works fine for me
>
> I had tested with the sample from ticket #503 yesterday.
>
> Using the following sample:
> http://samples.ffmpeg.org/FLV/flash_with_alpha/300x180-Scr-f8-056alpha.flv
> I get less nice results:
> $ ffmpeg -i 300x180-Scr-f8-056alpha.flv -vframes 1 -vf palettegen pal.png
> $ ffmpeg -i 300x180-Scr-f8-056alpha.flv -i pal.png -lavfi paletteuse
> -vframes 1 out.png
> Transparency in the output file is not correct, I wonder if the line
> "if (c1[0] == 0 && c2[0] == 0" should be more something like
> "if (c1[0] < 128 && c2[0] < 128)" (or with a threshold variable).
>
> The results get worse with more frames and apng output:
> Large blocks "disappear" from the output file, this looks
> quite nice without your patch (but without transparency).
>
>
Thanks for checking that. While working on this, I wasn't sure what to
consider transparent in the source image, so I gave your threshold
suggestion a try setting the threshold as a #define and it works fine on
that file and my tests. Everything I've tested so far looks fine, but I am
not really sure what (if any) impact this has on dither.

I don't know how to make that a configurable argument, but I'll take a look.
Carl Eugen Hoyos Oct. 17, 2017, 1:14 a.m.
2017-10-10 21:48 GMT+02:00 Bjorn Roche <bjorn@giphy.com>:
> On Fri, Oct 6, 2017 at 5:23 PM, Carl Eugen Hoyos <ceffmpeg@gmail.com> wrote:

>> Do you think an option to set the value of the transparent
>> colour to something else than 0xFF00 would make sense?
>> And/or another default?
>> You can use ffplay on an output png file to test if it's not
>> clear what I mean.
>>
>
> I am pretty sure this is an issue in palettegen, since the
> 0x0000ff00 is there as well.

You are correct.

Thank you, Carl Eugen

Patch hide | download patch | download mbox

diff --git a/libavfilter/vf_paletteuse.c b/libavfilter/vf_paletteuse.c
index ffd37bf1da..4203543843 100644
--- a/libavfilter/vf_paletteuse.c
+++ b/libavfilter/vf_paletteuse.c
@@ -56,7 +56,7 @@  enum diff_mode {
 };
 
 struct color_node {
-    uint8_t val[3];
+    uint8_t val[4];
     uint8_t palette_id;
     int split;
     int left_id, right_id;
@@ -86,6 +86,7 @@  typedef struct PaletteUseContext {
     struct cache_node cache[CACHE_SIZE];    /* lookup cache */
     struct color_node map[AVPALETTE_COUNT]; /* 3D-Tree (KD-Tree with K=3) for reverse colormap */
     uint32_t palette[AVPALETTE_COUNT];
+    int transparency_index; /* index in the palette of transparency. -1 if there isn't a transparency. */
     int palette_loaded;
     int dither;
     int new;
@@ -108,6 +109,7 @@  typedef struct PaletteUseContext {
 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
 static const AVOption paletteuse_options[] = {
     { "dither", "select dithering mode", OFFSET(dither), AV_OPT_TYPE_INT, {.i64=DITHERING_SIERRA2_4A}, 0, NB_DITHERING-1, FLAGS, "dithering_mode" },
+        { "none",            "no dither",                                                              0, AV_OPT_TYPE_CONST, {.i64=DITHERING_NONE},            INT_MIN, INT_MAX, FLAGS, "dithering_mode" },
         { "bayer",           "ordered 8x8 bayer dithering (deterministic)",                            0, AV_OPT_TYPE_CONST, {.i64=DITHERING_BAYER},           INT_MIN, INT_MAX, FLAGS, "dithering_mode" },
         { "heckbert",        "dithering as defined by Paul Heckbert in 1982 (simple error diffusion)", 0, AV_OPT_TYPE_CONST, {.i64=DITHERING_HECKBERT},        INT_MIN, INT_MAX, FLAGS, "dithering_mode" },
         { "floyd_steinberg", "Floyd and Steingberg dithering (error diffusion)",                       0, AV_OPT_TYPE_CONST, {.i64=DITHERING_FLOYD_STEINBERG}, INT_MIN, INT_MAX, FLAGS, "dithering_mode" },
@@ -157,7 +159,8 @@  static int query_formats(AVFilterContext *ctx)
 
 static av_always_inline int dither_color(uint32_t px, int er, int eg, int eb, int scale, int shift)
 {
-    return av_clip_uint8((px >> 16 & 0xff) + ((er * scale) / (1<<shift))) << 16
+    return av_clip_uint8((px >> 24 & 0xff)                              ) << 24
+         | av_clip_uint8((px >> 16 & 0xff) + ((er * scale) / (1<<shift))) << 16
          | av_clip_uint8((px >>  8 & 0xff) + ((eg * scale) / (1<<shift))) <<  8
          | av_clip_uint8((px       & 0xff) + ((eb * scale) / (1<<shift)));
 }
@@ -165,10 +168,18 @@  static av_always_inline int dither_color(uint32_t px, int er, int eg, int eb, in
 static av_always_inline int diff(const uint8_t *c1, const uint8_t *c2)
 {
     // XXX: try L*a*b with CIE76 (dL*dL + da*da + db*db)
-    const int dr = c1[0] - c2[0];
-    const int dg = c1[1] - c2[1];
-    const int db = c1[2] - c2[2];
-    return dr*dr + dg*dg + db*db;
+    const static int max_diff = 255*255 + 255*255 + 255*255;
+    const int dr = c1[1] - c2[1];
+    const int dg = c1[2] - c2[2];
+    const int db = c1[3] - c2[3];
+
+    if (c1[0] == 0 && c2[0] == 0) {
+        return 0;
+    } else if (c1[0] == c2[0]) {
+        return dr*dr + dg*dg + db*db;
+    } else {
+        return max_diff;
+    }
 }
 
 static av_always_inline uint8_t colormap_nearest_bruteforce(const uint32_t *palette, const uint8_t *rgb)
@@ -179,18 +190,20 @@  static av_always_inline uint8_t colormap_nearest_bruteforce(const uint32_t *pale
         const uint32_t c = palette[i];
 
         if ((c & 0xff000000) == 0xff000000) { // ignore transparent entry
-            const uint8_t palrgb[] = {
+            const uint8_t palargb[] = {
+                palette[i]>>24 & 0xff,
                 palette[i]>>16 & 0xff,
                 palette[i]>> 8 & 0xff,
                 palette[i]     & 0xff,
             };
-            const int d = diff(palrgb, rgb);
+            const int d = diff(palargb, rgb);
             if (d < min_dist) {
                 pal_id = i;
                 min_dist = d;
             }
         }
     }
+
     return pal_id;
 }
 
@@ -325,14 +338,15 @@  end:
  * Note: r, g, and b are the component of c but are passed as well to avoid
  * recomputing them (they are generally computed by the caller for other uses).
  */
-static av_always_inline int color_get(struct cache_node *cache, uint32_t color,
-                                      uint8_t r, uint8_t g, uint8_t b,
+static av_always_inline int color_get(struct cache_node *cache, uint32_t argb,
+                                      uint8_t a, uint8_t r, uint8_t g, uint8_t b,
+                                      int transparency_index,
                                       const struct color_node *map,
                                       const uint32_t *palette,
                                       const enum color_search_method search_method)
 {
     int i;
-    const uint8_t rgb[] = {r, g, b};
+    const uint8_t argb_elts[] = {a, r, g, b};
     const uint8_t rhash = r & ((1<<NBITS)-1);
     const uint8_t ghash = g & ((1<<NBITS)-1);
     const uint8_t bhash = b & ((1<<NBITS)-1);
@@ -340,9 +354,14 @@  static av_always_inline int color_get(struct cache_node *cache, uint32_t color,
     struct cache_node *node = &cache[hash];
     struct cached_color *e;
 
+    // first, check for transparency
+    if (a == 0 && transparency_index >= 0) {
+        return transparency_index;
+    }
+
     for (i = 0; i < node->nb_entries; i++) {
         e = &node->entries[i];
-        if (e->color == color)
+        if (e->color == argb)
             return e->pal_entry;
     }
 
@@ -350,21 +369,24 @@  static av_always_inline int color_get(struct cache_node *cache, uint32_t color,
                          sizeof(*node->entries), NULL);
     if (!e)
         return AVERROR(ENOMEM);
-    e->color = color;
-    e->pal_entry = COLORMAP_NEAREST(search_method, palette, map, rgb);
+    e->color = argb;
+    e->pal_entry = COLORMAP_NEAREST(search_method, palette, map, argb_elts);
+
     return e->pal_entry;
 }
 
 static av_always_inline int get_dst_color_err(struct cache_node *cache,
-                                              uint32_t c, const struct color_node *map,
+                                              uint32_t argb, const struct color_node *map,
                                               const uint32_t *palette,
+                                              int transparency_index,
                                               int *er, int *eg, int *eb,
                                               const enum color_search_method search_method)
 {
-    const uint8_t r = c >> 16 & 0xff;
-    const uint8_t g = c >>  8 & 0xff;
-    const uint8_t b = c       & 0xff;
-    const int dstx = color_get(cache, c, r, g, b, map, palette, search_method);
+    const uint8_t a = argb >> 24 & 0xff;
+    const uint8_t r = argb >> 16 & 0xff;
+    const uint8_t g = argb >>  8 & 0xff;
+    const uint8_t b = argb       & 0xff;
+    const int dstx = color_get(cache, argb, a, r, g, b, transparency_index, map, palette, search_method);
     const uint32_t dstc = palette[dstx];
     *er = r - (dstc >> 16 & 0xff);
     *eg = g - (dstc >>  8 & 0xff);
@@ -385,6 +407,7 @@  static av_always_inline int set_frame(PaletteUseContext *s, AVFrame *out, AVFram
     const int dst_linesize = out->linesize[0];
     uint32_t *src = ((uint32_t *)in ->data[0]) + y_start*src_linesize;
     uint8_t  *dst =              out->data[0]  + y_start*dst_linesize;
+    int transparency_index = s->transparency_index;
 
     w += x_start;
     h += y_start;
@@ -395,14 +418,14 @@  static av_always_inline int set_frame(PaletteUseContext *s, AVFrame *out, AVFram
 
             if (dither == DITHERING_BAYER) {
                 const int d = s->ordered_dither[(y & 7)<<3 | (x & 7)];
+                const uint8_t a8 = src[x] >> 24 & 0xff;
                 const uint8_t r8 = src[x] >> 16 & 0xff;
                 const uint8_t g8 = src[x] >>  8 & 0xff;
                 const uint8_t b8 = src[x]       & 0xff;
                 const uint8_t r = av_clip_uint8(r8 + d);
                 const uint8_t g = av_clip_uint8(g8 + d);
                 const uint8_t b = av_clip_uint8(b8 + d);
-                const uint32_t c = r<<16 | g<<8 | b;
-                const int color = color_get(cache, c, r, g, b, map, palette, search_method);
+                const int color = color_get(cache, src[x], a8, r, g, b, transparency_index, map, palette, search_method);
 
                 if (color < 0)
                     return color;
@@ -410,7 +433,7 @@  static av_always_inline int set_frame(PaletteUseContext *s, AVFrame *out, AVFram
 
             } else if (dither == DITHERING_HECKBERT) {
                 const int right = x < w - 1, down = y < h - 1;
-                const int color = get_dst_color_err(cache, src[x], map, palette, &er, &eg, &eb, search_method);
+                const int color = get_dst_color_err(cache, src[x], map, palette, transparency_index, &er, &eg, &eb, search_method);
 
                 if (color < 0)
                     return color;
@@ -422,7 +445,7 @@  static av_always_inline int set_frame(PaletteUseContext *s, AVFrame *out, AVFram
 
             } else if (dither == DITHERING_FLOYD_STEINBERG) {
                 const int right = x < w - 1, down = y < h - 1, left = x > x_start;
-                const int color = get_dst_color_err(cache, src[x], map, palette, &er, &eg, &eb, search_method);
+                const int color = get_dst_color_err(cache, src[x], map, palette, transparency_index, &er, &eg, &eb, search_method);
 
                 if (color < 0)
                     return color;
@@ -436,7 +459,7 @@  static av_always_inline int set_frame(PaletteUseContext *s, AVFrame *out, AVFram
             } else if (dither == DITHERING_SIERRA2) {
                 const int right  = x < w - 1, down  = y < h - 1, left  = x > x_start;
                 const int right2 = x < w - 2,                    left2 = x > x_start + 1;
-                const int color = get_dst_color_err(cache, src[x], map, palette, &er, &eg, &eb, search_method);
+                const int color = get_dst_color_err(cache, src[x], map, palette, transparency_index, &er, &eg, &eb, search_method);
 
                 if (color < 0)
                     return color;
@@ -455,7 +478,7 @@  static av_always_inline int set_frame(PaletteUseContext *s, AVFrame *out, AVFram
 
             } else if (dither == DITHERING_SIERRA2_4A) {
                 const int right = x < w - 1, down = y < h - 1, left = x > x_start;
-                const int color = get_dst_color_err(cache, src[x], map, palette, &er, &eg, &eb, search_method);
+                const int color = get_dst_color_err(cache, src[x], map, palette, transparency_index, &er, &eg, &eb, search_method);
 
                 if (color < 0)
                     return color;
@@ -466,10 +489,11 @@  static av_always_inline int set_frame(PaletteUseContext *s, AVFrame *out, AVFram
                 if (         down) src[src_linesize + x    ] = dither_color(src[src_linesize + x    ], er, eg, eb, 1, 2);
 
             } else {
+                const uint8_t a = src[x] >> 24 & 0xff;
                 const uint8_t r = src[x] >> 16 & 0xff;
                 const uint8_t g = src[x] >>  8 & 0xff;
                 const uint8_t b = src[x]       & 0xff;
-                const int color = color_get(cache, src[x] & 0xffffff, r, g, b, map, palette, search_method);
+                const int color = color_get(cache, src[x], a, r, g, b, transparency_index, map, palette, search_method);
 
                 if (color < 0)
                     return color;
@@ -489,19 +513,19 @@  static void disp_node(AVBPrint *buf,
                       int depth)
 {
     const struct color_node *node = &map[node_id];
-    const uint32_t fontcolor = node->val[0] > 0x50 &&
-                               node->val[1] > 0x50 &&
-                               node->val[2] > 0x50 ? 0 : 0xffffff;
+    const uint32_t fontcolor = node->val[1] > 0x50 &&
+                               node->val[2] > 0x50 &&
+                               node->val[3] > 0x50 ? 0 : 0xffffff;
     av_bprintf(buf, "%*cnode%d ["
                "label=\"%c%02X%c%02X%c%02X%c\" "
                "fillcolor=\"#%02x%02x%02x\" "
                "fontcolor=\"#%06"PRIX32"\"]\n",
                depth*INDENT, ' ', node->palette_id,
-               "[  "[node->split], node->val[0],
-               "][ "[node->split], node->val[1],
-               " ]["[node->split], node->val[2],
+               "[  "[node->split], node->val[1],
+               "][ "[node->split], node->val[2],
+               " ]["[node->split], node->val[3],
                "  ]"[node->split],
-               node->val[0], node->val[1], node->val[2],
+               node->val[1], node->val[2], node->val[3],
                fontcolor);
     if (parent_id != -1)
         av_bprintf(buf, "%*cnode%d -> node%d\n", depth*INDENT, ' ',
@@ -584,17 +608,18 @@  static int cmp_##name(const void *pa, const void *pb)   \
 {                                                       \
     const struct color *a = pa;                         \
     const struct color *b = pb;                         \
-    return   (a->value >> (8 * (2 - (pos))) & 0xff)     \
-           - (b->value >> (8 * (2 - (pos))) & 0xff);    \
+    return   (a->value >> (8 * (3 - (pos))) & 0xff)     \
+           - (b->value >> (8 * (3 - (pos))) & 0xff);    \
 }
 
-DECLARE_CMP_FUNC(r, 0)
-DECLARE_CMP_FUNC(g, 1)
-DECLARE_CMP_FUNC(b, 2)
+DECLARE_CMP_FUNC(a, 0)
+DECLARE_CMP_FUNC(r, 1)
+DECLARE_CMP_FUNC(g, 2)
+DECLARE_CMP_FUNC(b, 3)
 
-static const cmp_func cmp_funcs[] = {cmp_r, cmp_g, cmp_b};
+static const cmp_func cmp_funcs[] = {cmp_a, cmp_r, cmp_g, cmp_b};
 
-static int get_next_color(const uint8_t *color_used, const uint32_t *palette,
+static int get_next_color(const uint8_t *color_used, const uint32_t *palette, const int nb_colors,
                           int *component, const struct color_rect *box)
 {
     int wr, wg, wb;
@@ -607,13 +632,18 @@  static int get_next_color(const uint8_t *color_used, const uint32_t *palette,
     ranges.min[0] = ranges.min[1] = ranges.min[2] = 0xff;
     ranges.max[0] = ranges.max[1] = ranges.max[2] = 0x00;
 
-    for (i = 0; i < AVPALETTE_COUNT; i++) {
+    for (i = 0; i < nb_colors; i++) {
         const uint32_t c = palette[i];
+        const uint8_t a = c >> 24 & 0xff;
         const uint8_t r = c >> 16 & 0xff;
         const uint8_t g = c >>  8 & 0xff;
         const uint8_t b = c       & 0xff;
 
-        if (color_used[i] ||
+        if (a != 0xff) {
+            continue;
+        }
+
+        if (color_used[i] || (a != 0xff) ||
             r < box->min[0] || g < box->min[1] || b < box->min[2] ||
             r > box->max[0] || g > box->max[1] || b > box->max[2])
             continue;
@@ -639,9 +669,9 @@  static int get_next_color(const uint8_t *color_used, const uint32_t *palette,
     wr = ranges.max[0] - ranges.min[0];
     wg = ranges.max[1] - ranges.min[1];
     wb = ranges.max[2] - ranges.min[2];
-    if (wr >= wg && wr >= wb) longest = 0;
-    if (wg >= wr && wg >= wb) longest = 1;
-    if (wb >= wr && wb >= wg) longest = 2;
+    if (wr >= wg && wr >= wb) longest = 1;
+    if (wg >= wr && wg >= wb) longest = 2;
+    if (wb >= wr && wb >= wg) longest = 3;
     cmpf = cmp_funcs[longest];
     *component = longest;
 
@@ -655,6 +685,7 @@  static int colormap_insert(struct color_node *map,
                            uint8_t *color_used,
                            int *nb_used,
                            const uint32_t *palette,
+                           const int nb_colors,
                            const struct color_rect *box)
 {
     uint32_t c;
@@ -662,7 +693,7 @@  static int colormap_insert(struct color_node *map,
     int node_left_id = -1, node_right_id = -1;
     struct color_node *node;
     struct color_rect box1, box2;
-    const int pal_id = get_next_color(color_used, palette, &component, box);
+    const int pal_id = get_next_color(color_used, palette, nb_colors, &component, box);
 
     if (pal_id < 0)
         return -1;
@@ -673,21 +704,22 @@  static int colormap_insert(struct color_node *map,
     node = &map[cur_id];
     node->split = component;
     node->palette_id = pal_id;
-    node->val[0] = c>>16 & 0xff;
-    node->val[1] = c>> 8 & 0xff;
-    node->val[2] = c     & 0xff;
+    node->val[0] = c>>24 & 0xff;
+    node->val[1] = c>>16 & 0xff;
+    node->val[2] = c>> 8 & 0xff;
+    node->val[3] = c     & 0xff;
 
     color_used[pal_id] = 1;
 
     /* get the two boxes this node creates */
     box1 = box2 = *box;
-    box1.max[component] = node->val[component];
-    box2.min[component] = node->val[component] + 1;
+    box1.max[component-1] = node->val[component];
+    box2.min[component-1] = node->val[component] + 1;
 
-    node_left_id = colormap_insert(map, color_used, nb_used, palette, &box1);
+    node_left_id = colormap_insert(map, color_used, nb_used, palette, nb_colors, &box1);
 
-    if (box2.min[component] <= box2.max[component])
-        node_right_id = colormap_insert(map, color_used, nb_used, palette, &box2);
+    if (box2.min[component-1] <= box2.max[component-1])
+        node_right_id = colormap_insert(map, color_used, nb_used, palette, nb_colors, &box2);
 
     node->left_id  = node_left_id;
     node->right_id = node_right_id;
@@ -702,7 +734,7 @@  static int cmp_pal_entry(const void *a, const void *b)
     return c1 - c2;
 }
 
-static void load_colormap(PaletteUseContext *s)
+static void load_colormap(PaletteUseContext *s, int nb_colors)
 {
     int i, nb_used = 0;
     uint8_t color_used[AVPALETTE_COUNT] = {0};
@@ -710,8 +742,18 @@  static void load_colormap(PaletteUseContext *s)
     struct color_rect box;
 
     /* disable transparent colors and dups */
-    qsort(s->palette, AVPALETTE_COUNT, sizeof(*s->palette), cmp_pal_entry);
-    for (i = 0; i < AVPALETTE_COUNT; i++) {
+    qsort(s->palette, nb_colors, sizeof(*s->palette), cmp_pal_entry);
+    // update transparency index:
+    if (s->transparency_index >= 0) {
+        for (i = 0; i < nb_colors; i++) {
+            if ((s->palette[i]>>24 & 0xff) == 0) {
+                s->transparency_index = i; // we are assuming at most one transparent color
+                break;
+            }
+        }
+    }
+
+    for (i = 0; i < nb_colors; i++) {
         const uint32_t c = s->palette[i];
         if (i != 0 && c == last_color) {
             color_used[i] = 1;
@@ -727,7 +769,7 @@  static void load_colormap(PaletteUseContext *s)
     box.min[0] = box.min[1] = box.min[2] = 0x00;
     box.max[0] = box.max[1] = box.max[2] = 0xff;
 
-    colormap_insert(s->map, color_used, &nb_used, s->palette, &box);
+    colormap_insert(s->map, color_used, &nb_used, s->palette, nb_colors, &box);
 
     if (s->dot_filename)
         disp_tree(s->map, s->dot_filename);
@@ -937,10 +979,12 @@  static int config_input_palette(AVFilterLink *inlink)
 
 static void load_palette(PaletteUseContext *s, const AVFrame *palette_frame)
 {
-    int i, x, y;
+    int pal_index, i, x, y;
     const uint32_t *p = (const uint32_t *)palette_frame->data[0];
     const int p_linesize = palette_frame->linesize[0] >> 2;
 
+    s->transparency_index = -1;
+
     if (s->new) {
         memset(s->palette, 0, sizeof(s->palette));
         memset(s->map, 0, sizeof(s->map));
@@ -949,14 +993,19 @@  static void load_palette(PaletteUseContext *s, const AVFrame *palette_frame)
         memset(s->cache, 0, sizeof(s->cache));
     }
 
-    i = 0;
+    pal_index = 0;
     for (y = 0; y < palette_frame->height; y++) {
-        for (x = 0; x < palette_frame->width; x++)
-            s->palette[i++] = p[x];
+        for (x = 0; x < palette_frame->width; x++) {
+            s->palette[pal_index] = p[x];
+            if ((p[x]>>24 & 0xff) == 0) {
+                s->transparency_index = pal_index; // we are assuming at most one transparent color
+            }
+            ++pal_index;
+        }
         p += p_linesize;
     }
 
-    load_colormap(s);
+    load_colormap(s, pal_index);
 
     if (!s->new)
         s->palette_loaded = 1;