@@ -51,6 +51,7 @@ typedef struct ColorBalanceContext {
uint8_t rgba_map[4];
int step;
+ int bps;
int (*apply_lut)(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs);
} ColorBalanceContext;
@@ -96,161 +97,89 @@ static int query_formats(AVFilterContext *ctx)
return ff_set_common_formats(ctx, fmts_list);
}
-static int apply_lut8_p(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
-{
- ColorBalanceContext *s = ctx->priv;
- ThreadData *td = arg;
- AVFrame *in = td->in;
- AVFrame *out = td->out;
- const int slice_start = (out->height * jobnr) / nb_jobs;
- const int slice_end = (out->height * (jobnr+1)) / nb_jobs;
- const uint8_t *srcg = in->data[0] + slice_start * in->linesize[0];
- const uint8_t *srcb = in->data[1] + slice_start * in->linesize[1];
- const uint8_t *srcr = in->data[2] + slice_start * in->linesize[2];
- const uint8_t *srca = in->data[3] + slice_start * in->linesize[3];
- uint8_t *dstg = out->data[0] + slice_start * out->linesize[0];
- uint8_t *dstb = out->data[1] + slice_start * out->linesize[1];
- uint8_t *dstr = out->data[2] + slice_start * out->linesize[2];
- uint8_t *dsta = out->data[3] + slice_start * out->linesize[3];
- int i, j;
-
- for (i = slice_start; i < slice_end; i++) {
- for (j = 0; j < out->width; j++) {
- dstg[j] = s->lut[G][srcg[j]];
- dstb[j] = s->lut[B][srcb[j]];
- dstr[j] = s->lut[R][srcr[j]];
- if (in != out && out->linesize[3])
- dsta[j] = srca[j];
- }
-
- srcg += in->linesize[0];
- srcb += in->linesize[1];
- srcr += in->linesize[2];
- srca += in->linesize[3];
- dstg += out->linesize[0];
- dstb += out->linesize[1];
- dstr += out->linesize[2];
- dsta += out->linesize[3];
- }
-
- return 0;
+#define DEF_PLANAR_LUT_FUNC(type, nbits) \
+static int lut_planar_##nbits(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) \
+{ \
+ ColorBalanceContext *s = ctx->priv; \
+ ThreadData *td = arg; \
+ AVFrame *in = td->in; \
+ AVFrame *out = td->out; \
+ const int slice_start = (out->height * jobnr) / nb_jobs; \
+ const int slice_end = (out->height * (jobnr+1)) / nb_jobs; \
+ const type *srcg = (const type *)in->data[0] + slice_start * in->linesize[0] / s->bps; \
+ const type *srcb = (const type *)in->data[1] + slice_start * in->linesize[1] / s->bps; \
+ const type *srcr = (const type *)in->data[2] + slice_start * in->linesize[2] / s->bps; \
+ const type *srca = (const type *)in->data[3] + slice_start * in->linesize[3] / s->bps; \
+ type *dstg = (type *)out->data[0] + slice_start * out->linesize[0] / s->bps; \
+ type *dstb = (type *)out->data[1] + slice_start * out->linesize[1] / s->bps; \
+ type *dstr = (type *)out->data[2] + slice_start * out->linesize[2] / s->bps; \
+ type *dsta = (type *)out->data[3] + slice_start * out->linesize[3] / s->bps; \
+ int i, j; \
+ \
+ for (i = slice_start; i < slice_end; i++) { \
+ for (j = 0; j < out->width; j++) { \
+ dstg[j] = s->lut[G][srcg[j]]; \
+ dstb[j] = s->lut[B][srcb[j]]; \
+ dstr[j] = s->lut[R][srcr[j]]; \
+ if (in != out && out->linesize[3]) \
+ dsta[j] = srca[j]; \
+ } \
+ \
+ srcg += in->linesize[0] / s->bps; \
+ srcb += in->linesize[1] / s->bps; \
+ srcr += in->linesize[2] / s->bps; \
+ srca += in->linesize[3] / s->bps; \
+ dstg += out->linesize[0] / s->bps; \
+ dstb += out->linesize[1] / s->bps; \
+ dstr += out->linesize[2] / s->bps; \
+ dsta += out->linesize[3] / s->bps; \
+ } \
+ \
+ return 0; \
}
-
-static int apply_lut16_p(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
-{
- ColorBalanceContext *s = ctx->priv;
- ThreadData *td = arg;
- AVFrame *in = td->in;
- AVFrame *out = td->out;
- const int slice_start = (out->height * jobnr) / nb_jobs;
- const int slice_end = (out->height * (jobnr+1)) / nb_jobs;
- const uint16_t *srcg = (const uint16_t *)in->data[0] + slice_start * in->linesize[0] / 2;
- const uint16_t *srcb = (const uint16_t *)in->data[1] + slice_start * in->linesize[1] / 2;
- const uint16_t *srcr = (const uint16_t *)in->data[2] + slice_start * in->linesize[2] / 2;
- const uint16_t *srca = (const uint16_t *)in->data[3] + slice_start * in->linesize[3] / 2;
- uint16_t *dstg = (uint16_t *)out->data[0] + slice_start * out->linesize[0] / 2;
- uint16_t *dstb = (uint16_t *)out->data[1] + slice_start * out->linesize[1] / 2;
- uint16_t *dstr = (uint16_t *)out->data[2] + slice_start * out->linesize[2] / 2;
- uint16_t *dsta = (uint16_t *)out->data[3] + slice_start * out->linesize[3] / 2;
- int i, j;
-
- for (i = slice_start; i < slice_end; i++) {
- for (j = 0; j < out->width; j++) {
- dstg[j] = s->lut[G][srcg[j]];
- dstb[j] = s->lut[B][srcb[j]];
- dstr[j] = s->lut[R][srcr[j]];
- if (in != out && out->linesize[3])
- dsta[j] = srca[j];
- }
-
- srcg += in->linesize[0] / 2;
- srcb += in->linesize[1] / 2;
- srcr += in->linesize[2] / 2;
- srca += in->linesize[3] / 2;
- dstg += out->linesize[0] / 2;
- dstb += out->linesize[1] / 2;
- dstr += out->linesize[2] / 2;
- dsta += out->linesize[3] / 2;
- }
-
- return 0;
-}
-
-static int apply_lut8(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
-{
- ColorBalanceContext *s = ctx->priv;
- ThreadData *td = arg;
- AVFrame *in = td->in;
- AVFrame *out = td->out;
- AVFilterLink *outlink = ctx->outputs[0];
- const int slice_start = (out->height * jobnr) / nb_jobs;
- const int slice_end = (out->height * (jobnr+1)) / nb_jobs;
- const uint8_t *srcrow = in->data[0] + slice_start * in->linesize[0];
- const uint8_t roffset = s->rgba_map[R];
- const uint8_t goffset = s->rgba_map[G];
- const uint8_t boffset = s->rgba_map[B];
- const uint8_t aoffset = s->rgba_map[A];
- const int step = s->step;
- uint8_t *dstrow;
- int i, j;
-
- dstrow = out->data[0] + slice_start * out->linesize[0];
- for (i = slice_start; i < slice_end; i++) {
- const uint8_t *src = srcrow;
- uint8_t *dst = dstrow;
-
- for (j = 0; j < outlink->w * step; j += step) {
- dst[j + roffset] = s->lut[R][src[j + roffset]];
- dst[j + goffset] = s->lut[G][src[j + goffset]];
- dst[j + boffset] = s->lut[B][src[j + boffset]];
- if (in != out && step == 4)
- dst[j + aoffset] = src[j + aoffset];
- }
-
- srcrow += in->linesize[0];
- dstrow += out->linesize[0];
- }
-
- return 0;
-}
-
-static int apply_lut16(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
-{
- ColorBalanceContext *s = ctx->priv;
- ThreadData *td = arg;
- AVFrame *in = td->in;
- AVFrame *out = td->out;
- AVFilterLink *outlink = ctx->outputs[0];
- const int slice_start = (out->height * jobnr) / nb_jobs;
- const int slice_end = (out->height * (jobnr+1)) / nb_jobs;
- const uint16_t *srcrow = (const uint16_t *)in->data[0] + slice_start * in->linesize[0] / 2;
- const uint8_t roffset = s->rgba_map[R];
- const uint8_t goffset = s->rgba_map[G];
- const uint8_t boffset = s->rgba_map[B];
- const uint8_t aoffset = s->rgba_map[A];
- const int step = s->step / 2;
- uint16_t *dstrow;
- int i, j;
-
- dstrow = (uint16_t *)out->data[0] + slice_start * out->linesize[0] / 2;
- for (i = slice_start; i < slice_end; i++) {
- const uint16_t *src = srcrow;
- uint16_t *dst = dstrow;
-
- for (j = 0; j < outlink->w * step; j += step) {
- dst[j + roffset] = s->lut[R][src[j + roffset]];
- dst[j + goffset] = s->lut[G][src[j + goffset]];
- dst[j + boffset] = s->lut[B][src[j + boffset]];
- if (in != out && step == 4)
- dst[j + aoffset] = src[j + aoffset];
- }
-
- srcrow += in->linesize[0] / 2;
- dstrow += out->linesize[0] / 2;
- }
-
- return 0;
+DEF_PLANAR_LUT_FUNC(uint16_t, 16);
+DEF_PLANAR_LUT_FUNC(uint8_t, 8);
+
+#define DEF_LUT_PACKETED_FUNC(type, nbits) \
+static int lut_packed_##nbits(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) \
+{ \
+ ColorBalanceContext *s = ctx->priv; \
+ ThreadData *td = arg; \
+ AVFrame *in = td->in; \
+ AVFrame *out = td->out; \
+ AVFilterLink *outlink = ctx->outputs[0]; \
+ const int slice_start = (out->height * jobnr) / nb_jobs; \
+ const int slice_end = (out->height * (jobnr+1)) / nb_jobs; \
+ const type *srcrow = (const type *)in->data[0] + slice_start * in->linesize[0] / s->bps; \
+ const uint8_t roffset = s->rgba_map[R]; \
+ const uint8_t goffset = s->rgba_map[G]; \
+ const uint8_t boffset = s->rgba_map[B]; \
+ const uint8_t aoffset = s->rgba_map[A]; \
+ const int step = s->step; \
+ type *dstrow; \
+ int i, j; \
+ \
+ dstrow = (type *)out->data[0] + slice_start * out->linesize[0] / s->bps; \
+ for (i = slice_start; i < slice_end; i++) { \
+ const type *src = srcrow; \
+ type *dst = dstrow; \
+ \
+ for (j = 0; j < outlink->w * step; j += step) { \
+ dst[j + roffset] = s->lut[R][src[j + roffset]]; \
+ dst[j + goffset] = s->lut[G][src[j + goffset]]; \
+ dst[j + boffset] = s->lut[B][src[j + boffset]]; \
+ if (in != out && step == 4) \
+ dst[j + aoffset] = src[j + aoffset]; \
+ } \
+ \
+ srcrow += in->linesize[0] / s->bps; \
+ dstrow += out->linesize[0] / s->bps; \
+ } \
+ \
+ return 0; \
}
+DEF_LUT_PACKETED_FUNC(uint16_t, 16);
+DEF_LUT_PACKETED_FUNC(uint8_t, 8);
static int config_output(AVFilterLink *outlink)
{
@@ -259,19 +188,15 @@ static int config_output(AVFilterLink *outlink)
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(outlink->format);
const int depth = desc->comp[0].depth;
const int max = 1 << depth;
- const int planar = av_pix_fmt_count_planes(outlink->format) > 1;
+ const int is_planar = desc->flags & AV_PIX_FMT_FLAG_PLANAR;
double *shadows, *midtones, *highlights, *buffer;
int i, r, g, b;
- if (max == 256 && planar) {
- s->apply_lut = apply_lut8_p;
- } else if (planar) {
- s->apply_lut = apply_lut16_p;
- } else if (max == 256) {
- s->apply_lut = apply_lut8;
- } else {
- s->apply_lut = apply_lut16;
- }
+ s->bps = depth > 8 ? 2 : 1;
+ if (!is_planar)
+ s->apply_lut = (depth <= 8) ? lut_packed_8 : lut_packed_16;
+ else
+ s->apply_lut = (depth <= 8) ? lut_planar_8 : lut_planar_16;
buffer = av_malloc(max * 3 * sizeof(*buffer));
if (!buffer)
@@ -317,7 +242,7 @@ static int config_output(AVFilterLink *outlink)
av_free(buffer);
ff_fill_rgba_map(s->rgba_map, outlink->format);
- s->step = av_get_padded_bits_per_pixel(desc) >> 3;
+ s->step = (av_get_padded_bits_per_pixel(desc) >> 3) / s->bps;
return 0;
}