diff mbox

[FFmpeg-devel,v7,2/2] lavc/tiff: Decode embedded JPEGs in DNG images

Message ID 20190725153506.1848-2-velocityra@gmail.com
State Superseded
Headers show

Commit Message

velocityra@gmail.com July 25, 2019, 3:35 p.m. UTC
From: Nick Renieris <velocityra@gmail.com>

Used a technique similar to lavc/tdsc.c for invoking the MJPEG decoder.

This commit adds support for:
- DNG tiles
- DNG tile huffman lossless JPEG decoding
- DNG 8-bpp ("packed" as dcraw calls it) decoding
- DNG color scaling [1]
  - LinearizationTable tag
  - BlackLevel tag

[1]: As specified in the DNG Specification - Chapter 5

Signed-off-by: Nick Renieris <velocityra@gmail.com>
---
 configure           |   1 +
 libavcodec/Makefile |   2 +-
 libavcodec/tiff.c   | 315 +++++++++++++++++++++++++++++++++++++++++++-
 libavcodec/tiff.h   |   2 +
 4 files changed, 312 insertions(+), 8 deletions(-)

Comments

Reimar Döffinger July 25, 2019, 11:21 p.m. UTC | #1
On 25.07.2019, at 17:35, velocityra@gmail.com wrote:

> +    // Lookup table lookup
> +    if (lut)
> +        value = lut[value];

As this function is in the innermost loop, doing the if here instead of having 2 different implementations is likely not ideal speed-wise.

> +    // Color scaling
> +    value = av_clip_uint16_c((unsigned)(((float)value * scale_factor) * 0xFFFF));

As input and output are both 16 bit I wonder if floating-point isn't rather overkill compared to doing fixed-point arithmetic.

> 
> +    if (is_u16) {
> +        for (line = 0; line < height; line++) {
> +            uint16_t *dst_u16 = (uint16_t *)dst;
> +            uint16_t *src_u16 = (uint16_t *)src;
> +
> +            for (col = 0; col < width; col++)
> +                *dst_u16++ = dng_raw_to_linear16(*src_u16++, s->dng_lut, s->black_level, scale_factor);
> +
> +            dst += dst_stride * sizeof(uint16_t);
> +            src += src_stride * sizeof(uint16_t);

Is all this casting working correctly on e.g. big-endian?
Also using sizeof on uint16_t and uint8_t seems a bit overkill.
Also not sure if since these are essentially brightness/contrast adjustments if we should't rather just have a way to export the transform to use...


> @@ -1519,6 +1773,26 @@ again:
>         return AVERROR_INVALIDDATA;
>     }
> 
> +    /* Handle DNG images with JPEG-compressed tiles */
> +
> +    if (s->tiff_type == TIFF_TYPE_DNG || s->tiff_type == TIFF_TYPE_CINEMADNG) {
> +        if (s->is_jpeg) {
> +            if (s->is_bayer) {
> +                if ((ret = dng_decode(avctx, (AVFrame*)data, avpkt)) > 0)
> +                    *got_frame = 1;
> +                return ret;
> +            } else {
> +                avpriv_report_missing_feature(avctx, "DNG JPG-compressed non-bayer-encoded images");
> +                return AVERROR_PATCHWELCOME;
> +            }
> +        } else if (s->is_tiled) {
> +            avpriv_report_missing_feature(avctx, "DNG uncompressed tiled images");
> +            return AVERROR_PATCHWELCOME;
> +        }

There is no need for an "else" block if the "if" block ends in a return.
Also putting the error handling first/at the deepest indentation level results in more readable code generally.
velocityra@gmail.com July 26, 2019, 7:34 a.m. UTC | #2
Στις Παρ, 26 Ιουλ 2019 στις 2:21 π.μ., ο/η Reimar Döffinger
<Reimar.Doeffinger@gmx.de> έγραψε:
>
> On 25.07.2019, at 17:35, velocityra@gmail.com wrote:
>
> > +    // Lookup table lookup
> > +    if (lut)
> > +        value = lut[value];
>
> As this function is in the innermost loop, doing the if here instead of having 2 different implementations is likely not ideal speed-wise.

If this were C++ this'd be trivial, but as it stands I don't see a way
to do this without sacrificing code readability and/or size.
I believe branch prediction and instruction pipelining will hide this
delay. I doubt it has any measurable effect on performance.

> > +    // Color scaling
> > +    value = av_clip_uint16_c((unsigned)(((float)value * scale_factor) * 0xFFFF));
>
> As input and output are both 16 bit I wonder if floating-point isn't rather overkill compared to doing fixed-point arithmetic.

Scaling in the [0.0, 1.0] range is mentioned in the DNG spec and it's
also what dcraw does.

> >
> > +    if (is_u16) {
> > +        for (line = 0; line < height; line++) {
> > +            uint16_t *dst_u16 = (uint16_t *)dst;
> > +            uint16_t *src_u16 = (uint16_t *)src;
> > +
> > +            for (col = 0; col < width; col++)
> > +                *dst_u16++ = dng_raw_to_linear16(*src_u16++, s->dng_lut, s->black_level, scale_factor);
> > +
> > +            dst += dst_stride * sizeof(uint16_t);
> > +            src += src_stride * sizeof(uint16_t);
>
> Is all this casting working correctly on e.g. big-endian?

Not sure, I don't see why not, considering I've seen such casting in
other parts of ffmpeg.

> Also using sizeof on uint16_t and uint8_t seems a bit overkill.

It makes the programmer's intention clear, I think that's worth the
few extra characters.

> Also not sure if since these are essentially brightness/contrast adjustments if we should't rather just have a way to export the transform to use...

Export to where? I don't see why you'd need to complicate this by
calling into other components, the transformation code is concise,
clear and accurate for this use case.

>
> > @@ -1519,6 +1773,26 @@ again:
> >         return AVERROR_INVALIDDATA;
> >     }
> >
> > +    /* Handle DNG images with JPEG-compressed tiles */
> > +
> > +    if (s->tiff_type == TIFF_TYPE_DNG || s->tiff_type == TIFF_TYPE_CINEMADNG) {
> > +        if (s->is_jpeg) {
> > +            if (s->is_bayer) {
> > +                if ((ret = dng_decode(avctx, (AVFrame*)data, avpkt)) > 0)
> > +                    *got_frame = 1;
> > +                return ret;
> > +            } else {
> > +                avpriv_report_missing_feature(avctx, "DNG JPG-compressed non-bayer-encoded images");
> > +                return AVERROR_PATCHWELCOME;
> > +            }
> > +        } else if (s->is_tiled) {
> > +            avpriv_report_missing_feature(avctx, "DNG uncompressed tiled images");
> > +            return AVERROR_PATCHWELCOME;
> > +        }
>
> There is no need for an "else" block if the "if" block ends in a return.

Indeed, but in my opinion it makes the code clearer on first glance
(if you miss the return). I can change it if you insist.

> Also putting the error handling first/at the deepest indentation level results in more readable code generally.

That's true, I will do that.
Reimar Döffinger July 27, 2019, 10:30 p.m. UTC | #3
On 26.07.2019, at 09:34, Nick Renieris <velocityra@gmail.com> wrote:

> Στις Παρ, 26 Ιουλ 2019 στις 2:21 π.μ., ο/η Reimar Döffinger
> <Reimar.Doeffinger@gmx.de> έγραψε:
>> 
>> On 25.07.2019, at 17:35, velocityra@gmail.com wrote:
>> 
>>> +    // Lookup table lookup
>>> +    if (lut)
>>> +        value = lut[value];
>> 
>> As this function is in the innermost loop, doing the if here instead of having 2 different implementations is likely not ideal speed-wise.
> 
> If this were C++ this'd be trivial, but as it stands I don't see a way
> to do this without sacrificing code readability and/or size.

Huh? Are you thinking of templates? always_inline can usually be used the same way.
I haven't looked into the how or anything, it was just a comment in principle.

> I believe branch prediction and instruction pipelining will hide this
> delay. I doubt it has any measurable effect on performance.

There are CPUs without that.

>>> +    // Color scaling
>>> +    value = av_clip_uint16_c((unsigned)(((float)value * scale_factor) * 0xFFFF));
>> 
>> As input and output are both 16 bit I wonder if floating-point isn't rather overkill compared to doing fixed-point arithmetic.
> 
> Scaling in the [0.0, 1.0] range is mentioned in the DNG spec and it's
> also what dcraw does.

I don't see the connection? The point is as input and output are 16 bit this calculation can be done as integer operations without the need for floating point and all the conversion.
Depending on required precision with either 32 or 64 bit intermediate values.
Essentially by simply changing
(value * scale_factor) * 0xFFFF
to something along the lines of 
(value * (unsigned)(scale_factor * 0xFFFF * (1<<8))) >> 8
With of course most all but one multiply and shift outside the loop.
Of course would need to look at what the actual requirements are concerning precision, rounding and range. But that should be done anyway.

>>> +    if (is_u16) {
>>> +        for (line = 0; line < height; line++) {
>>> +            uint16_t *dst_u16 = (uint16_t *)dst;
>>> +            uint16_t *src_u16 = (uint16_t *)src;
>>> +
>>> +            for (col = 0; col < width; col++)
>>> +                *dst_u16++ = dng_raw_to_linear16(*src_u16++, s->dng_lut, s->black_level, scale_factor);
>>> +
>>> +            dst += dst_stride * sizeof(uint16_t);
>>> +            src += src_stride * sizeof(uint16_t);
>> 
>> Is all this casting working correctly on e.g. big-endian?
> 
> Not sure, I don't see why not, considering I've seen such casting in
> other parts of ffmpeg.

Well, I did not find it obvious where src and dst are from and what format they are in.
Essentially if they are decoder output it's likely fine, if they are read from a file without decoding step it's likely wrong.

>> Also not sure if since these are essentially brightness/contrast adjustments if we should't rather just have a way to export the transform to use...
> 
> Export to where?

Just provide as metadata and leave to e.g. libavfilter.

> I don't see why you'd need to complicate this by
> calling into other components, the transformation code is concise,
> clear and accurate for this use case.

Slow and unoptimized and in it's current form hard to SIMD optimize, especially without changing output as well though (in addition to the large bit width of floats reducing the benefit, denormals can be an issue for SIMD-accelerating float code).
Unless I miss a reason why nobody would ever want this to be faster?

>>> @@ -1519,6 +1773,26 @@ again:
>>>        return AVERROR_INVALIDDATA;
>>>    }
>>> 
>>> +    /* Handle DNG images with JPEG-compressed tiles */
>>> +
>>> +    if (s->tiff_type == TIFF_TYPE_DNG || s->tiff_type == TIFF_TYPE_CINEMADNG) {
>>> +        if (s->is_jpeg) {
>>> +            if (s->is_bayer) {
>>> +                if ((ret = dng_decode(avctx, (AVFrame*)data, avpkt)) > 0)
>>> +                    *got_frame = 1;
>>> +                return ret;
>>> +            } else {
>>> +                avpriv_report_missing_feature(avctx, "DNG JPG-compressed non-bayer-encoded images");
>>> +                return AVERROR_PATCHWELCOME;
>>> +            }
>>> +        } else if (s->is_tiled) {
>>> +            avpriv_report_missing_feature(avctx, "DNG uncompressed tiled images");
>>> +            return AVERROR_PATCHWELCOME;
>>> +        }
>> 
>> There is no need for an "else" block if the "if" block ends in a return.
> 
> Indeed, but in my opinion it makes the code clearer on first glance
> (if you miss the return). I can change it if you insist.

The second comment was the more relevant one actually.
I only really care about finding some way to make this part a whole lot more readable.
Paul B Mahol July 28, 2019, 6:55 a.m. UTC | #4
On Sun, Jul 28, 2019 at 12:30 AM Reimar Döffinger <Reimar.Doeffinger@gmx.de>
wrote:

> On 26.07.2019, at 09:34, Nick Renieris <velocityra@gmail.com> wrote:
>
> > Στις Παρ, 26 Ιουλ 2019 στις 2:21 π.μ., ο/η Reimar Döffinger
> > <Reimar.Doeffinger@gmx.de> έγραψε:
> >>
> >> On 25.07.2019, at 17:35, velocityra@gmail.com wrote:
> >>
> >>> +    // Lookup table lookup
> >>> +    if (lut)
> >>> +        value = lut[value];
> >>
> >> As this function is in the innermost loop, doing the if here instead of
> having 2 different implementations is likely not ideal speed-wise.
> >
> > If this were C++ this'd be trivial, but as it stands I don't see a way
> > to do this without sacrificing code readability and/or size.
>
> Huh? Are you thinking of templates? always_inline can usually be used the
> same way.
> I haven't looked into the how or anything, it was just a comment in
> principle.
>
> > I believe branch prediction and instruction pipelining will hide this
> > delay. I doubt it has any measurable effect on performance.
>
> There are CPUs without that.
>
> >>> +    // Color scaling
> >>> +    value = av_clip_uint16_c((unsigned)(((float)value * scale_factor)
> * 0xFFFF));
> >>
> >> As input and output are both 16 bit I wonder if floating-point isn't
> rather overkill compared to doing fixed-point arithmetic.
> >
> > Scaling in the [0.0, 1.0] range is mentioned in the DNG spec and it's
> > also what dcraw does.
>
> I don't see the connection? The point is as input and output are 16 bit
> this calculation can be done as integer operations without the need for
> floating point and all the conversion.
> Depending on required precision with either 32 or 64 bit intermediate
> values.
> Essentially by simply changing
> (value * scale_factor) * 0xFFFF
> to something along the lines of
> (value * (unsigned)(scale_factor * 0xFFFF * (1<<8))) >> 8
> With of course most all but one multiply and shift outside the loop.
> Of course would need to look at what the actual requirements are
> concerning precision, rounding and range. But that should be done anyway.
>
> >>> +    if (is_u16) {
> >>> +        for (line = 0; line < height; line++) {
> >>> +            uint16_t *dst_u16 = (uint16_t *)dst;
> >>> +            uint16_t *src_u16 = (uint16_t *)src;
> >>> +
> >>> +            for (col = 0; col < width; col++)
> >>> +                *dst_u16++ = dng_raw_to_linear16(*src_u16++,
> s->dng_lut, s->black_level, scale_factor);
> >>> +
> >>> +            dst += dst_stride * sizeof(uint16_t);
> >>> +            src += src_stride * sizeof(uint16_t);
> >>
> >> Is all this casting working correctly on e.g. big-endian?
> >
> > Not sure, I don't see why not, considering I've seen such casting in
> > other parts of ffmpeg.
>
> Well, I did not find it obvious where src and dst are from and what format
> they are in.
> Essentially if they are decoder output it's likely fine, if they are read
> from a file without decoding step it's likely wrong.
>
> >> Also not sure if since these are essentially brightness/contrast
> adjustments if we should't rather just have a way to export the transform
> to use...
> >
> > Export to where?
>
> Just provide as metadata and leave to e.g. libavfilter.
>

That does not follow DNG specification.
I really do not have time to comment on other irrelevant stuff you pointed
in your review.


>
> > I don't see why you'd need to complicate this by
> > calling into other components, the transformation code is concise,
> > clear and accurate for this use case.
>
> Slow and unoptimized and in it's current form hard to SIMD optimize,
> especially without changing output as well though (in addition to the large
> bit width of floats reducing the benefit, denormals can be an issue for
> SIMD-accelerating float code).
> Unless I miss a reason why nobody would ever want this to be faster?
>
> >>> @@ -1519,6 +1773,26 @@ again:
> >>>        return AVERROR_INVALIDDATA;
> >>>    }
> >>>
> >>> +    /* Handle DNG images with JPEG-compressed tiles */
> >>> +
> >>> +    if (s->tiff_type == TIFF_TYPE_DNG || s->tiff_type ==
> TIFF_TYPE_CINEMADNG) {
> >>> +        if (s->is_jpeg) {
> >>> +            if (s->is_bayer) {
> >>> +                if ((ret = dng_decode(avctx, (AVFrame*)data, avpkt))
> > 0)
> >>> +                    *got_frame = 1;
> >>> +                return ret;
> >>> +            } else {
> >>> +                avpriv_report_missing_feature(avctx, "DNG
> JPG-compressed non-bayer-encoded images");
> >>> +                return AVERROR_PATCHWELCOME;
> >>> +            }
> >>> +        } else if (s->is_tiled) {
> >>> +            avpriv_report_missing_feature(avctx, "DNG uncompressed
> tiled images");
> >>> +            return AVERROR_PATCHWELCOME;
> >>> +        }
> >>
> >> There is no need for an "else" block if the "if" block ends in a return.
> >
> > Indeed, but in my opinion it makes the code clearer on first glance
> > (if you miss the return). I can change it if you insist.
>
> The second comment was the more relevant one actually.
> I only really care about finding some way to make this part a whole lot
> more readable.
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
velocityra@gmail.com July 28, 2019, 7:40 a.m. UTC | #5
Στις Κυρ, 28 Ιουλ 2019 στις 1:30 π.μ., ο/η Reimar Döffinger
<Reimar.Doeffinger@gmx.de> έγραψε:
>
> Huh? Are you thinking of templates? always_inline can usually be used the same way.
> I haven't looked into the how or anything, it was just a comment in principle.

You're totally right (I checked on godbolt just to make sure), I
hadn't considered that. Would need a trivial change (plus the function
is already inlined) so I'll do it.

> > I believe branch prediction and instruction pipelining will hide this
> > delay. I doubt it has any measurable effect on performance.
>
> There are CPUs without that.

Yes, but with neither? Branch prediction, sure, but I'd like to hear
one example of a CPU FFmpeg runs on without pipelining (I genuinely
don't know if there are).

> Of course would need to look at what the actual requirements are concerning precision, rounding and range. But that should be done anyway.
Like I said, I did. It's not possible :)

> Well, I did not find it obvious where src and dst are from and what format they are in.
> Essentially if they are decoder output it's likely fine, if they are read from a file without decoding step it's likely wrong.

Right, I see your concern. They are read from a file and they can be
both LE/BE, but that's specified in the file and there is logic in
tiff.c to account for it. So when we process them they are guaranteed
to be the same endianness as the host.

> >> Also not sure if since these are essentially brightness/contrast adjustments if we should't rather just have a way to export the transform to use...
> >
> > Export to where?
>
> Just provide as metadata and leave to e.g. libavfilter.

Not sure how I'd do that or if it'd be appropriate/feasible.

> > I don't see why you'd need to complicate this by
> > calling into other components, the transformation code is concise,
> > clear and accurate for this use case.
>
> Slow and unoptimized and in it's current form hard to SIMD optimize, especially without changing output as well though (in addition to the large bit width of floats reducing the benefit, denormals can be an issue for SIMD-accelerating float code).
> Unless I miss a reason why nobody would ever want this to be faster?

I'll take out the comparison which should make it faster, I'll talk
with my mentor if he thinks I should use libavfilter.

> >> There is no need for an "else" block if the "if" block ends in a return.
> >
> > Indeed, but in my opinion it makes the code clearer on first glance
> > (if you miss the return). I can change it if you insist.
>
> The second comment was the more relevant one actually.
> I only really care about finding some way to make this part a whole lot more readable.

Ok, hopefully the code I posted right after your review is readable enough.
velocityra@gmail.com July 28, 2019, 8:40 a.m. UTC | #6
Actually, I checked a more accurate version of my loop, and GCC
optimizes away the LUT check anyway:
https://godbolt.org/z/G1e1R4
As you can see it's smart enough to create 2 versions of my functions
(started at L3 with a lookup and L7 without it) and it does the check
outside.

There's no guarantee this is happening with the actual version of
course (it could be slower, or even faster if it also optimizes it
through dng_blit).
I could check the actual disasm in FFmpeg, but I don't think it's
worth it at this point (my mentor agrees).

Στις Κυρ, 28 Ιουλ 2019 στις 10:40 π.μ., ο/η Nick Renieris
<velocityra@gmail.com> έγραψε:
>
> Στις Κυρ, 28 Ιουλ 2019 στις 1:30 π.μ., ο/η Reimar Döffinger
> <Reimar.Doeffinger@gmx.de> έγραψε:
> >
> > Huh? Are you thinking of templates? always_inline can usually be used the same way.
> > I haven't looked into the how or anything, it was just a comment in principle.
>
> You're totally right (I checked on godbolt just to make sure), I
> hadn't considered that. Would need a trivial change (plus the function
> is already inlined) so I'll do it.
>
> > > I believe branch prediction and instruction pipelining will hide this
> > > delay. I doubt it has any measurable effect on performance.
> >
> > There are CPUs without that.
>
> Yes, but with neither? Branch prediction, sure, but I'd like to hear
> one example of a CPU FFmpeg runs on without pipelining (I genuinely
> don't know if there are).
>
> > Of course would need to look at what the actual requirements are concerning precision, rounding and range. But that should be done anyway.
> Like I said, I did. It's not possible :)
>
> > Well, I did not find it obvious where src and dst are from and what format they are in.
> > Essentially if they are decoder output it's likely fine, if they are read from a file without decoding step it's likely wrong.
>
> Right, I see your concern. They are read from a file and they can be
> both LE/BE, but that's specified in the file and there is logic in
> tiff.c to account for it. So when we process them they are guaranteed
> to be the same endianness as the host.
>
> > >> Also not sure if since these are essentially brightness/contrast adjustments if we should't rather just have a way to export the transform to use...
> > >
> > > Export to where?
> >
> > Just provide as metadata and leave to e.g. libavfilter.
>
> Not sure how I'd do that or if it'd be appropriate/feasible.
>
> > > I don't see why you'd need to complicate this by
> > > calling into other components, the transformation code is concise,
> > > clear and accurate for this use case.
> >
> > Slow and unoptimized and in it's current form hard to SIMD optimize, especially without changing output as well though (in addition to the large bit width of floats reducing the benefit, denormals can be an issue for SIMD-accelerating float code).
> > Unless I miss a reason why nobody would ever want this to be faster?
>
> I'll take out the comparison which should make it faster, I'll talk
> with my mentor if he thinks I should use libavfilter.
>
> > >> There is no need for an "else" block if the "if" block ends in a return.
> > >
> > > Indeed, but in my opinion it makes the code clearer on first glance
> > > (if you miss the return). I can change it if you insist.
> >
> > The second comment was the more relevant one actually.
> > I only really care about finding some way to make this part a whole lot more readable.
>
> Ok, hopefully the code I posted right after your review is readable enough.
Reimar Döffinger July 28, 2019, 2:29 p.m. UTC | #7
On 28.07.2019, at 10:40, Nick Renieris <velocityra@gmail.com> wrote:

> Actually, I checked a more accurate version of my loop, and GCC
> optimizes away the LUT check anyway:
> https://godbolt.org/z/G1e1R4
> As you can see it's smart enough to create 2 versions of my functions
> (started at L3 with a lookup and L7 without it) and it does the check
> outside.
> 
> There's no guarantee this is happening with the actual version of
> course (it could be slower, or even faster if it also optimizes it
> through dng_blit).
> I could check the actual disasm in FFmpeg, but I don't think it's
> worth it at this point (my mentor agrees).

Sorry, I did not know you already had discussions with an FFmpeg developer about the design.
I tend to review in a way that I just comment on anything I feel is not optimal.
I understand that much of it might not be reasonable to do differently in the end,
but feel that it is often enough to make it worth discussing things.
But I realize it can come across the wrong way, so sorry if I gave you a bad review experience,
and thanks for considering my comments.

Best regards,
Reimar
Reimar Döffinger July 28, 2019, 2:39 p.m. UTC | #8
On 28.07.2019, at 08:55, Paul B Mahol <onemda@gmail.com> wrote:

>> 
>> Just provide as metadata and leave to e.g. libavfilter.
>> 
> 
> That does not follow DNG specification.
> I really do not have time to comment on other irrelevant stuff you pointed
> in your review.

If you had just told me that I should back off in a polite manner instead of this dismissive-insulting
way it would have taken me much less effort to acknowledge your point.
And over time you could save a lot of time thanks to not having to complain
about people being biased against you.
Paul B Mahol July 28, 2019, 2:45 p.m. UTC | #9
On Sun, Jul 28, 2019 at 4:39 PM Reimar Döffinger <Reimar.Doeffinger@gmx.de>
wrote:

> On 28.07.2019, at 08:55, Paul B Mahol <onemda@gmail.com> wrote:
>
> >>
> >> Just provide as metadata and leave to e.g. libavfilter.
> >>
> >
> > That does not follow DNG specification.
> > I really do not have time to comment on other irrelevant stuff you
> pointed
> > in your review.
>
> If you had just told me that I should back off in a polite manner instead
> of this dismissive-insulting
> way it would have taken me much less effort to acknowledge your point.
> And over time you could save a lot of time thanks to not having to complain
> about people being biased against you.


Sorry if you feel attacked, but I simply do not like it.
Instead of complaining you could send patch one or two.
Reimar Döffinger July 28, 2019, 3:45 p.m. UTC | #10
On 28.07.2019, at 16:45, Paul B Mahol <onemda@gmail.com> wrote:

> On Sun, Jul 28, 2019 at 4:39 PM Reimar Döffinger <Reimar.Doeffinger@gmx.de>
> wrote:
> 
>> On 28.07.2019, at 08:55, Paul B Mahol <onemda@gmail.com> wrote:
>> 
>>>> 
>>>> Just provide as metadata and leave to e.g. libavfilter.
>>>> 
>>> 
>>> That does not follow DNG specification.
>>> I really do not have time to comment on other irrelevant stuff you
>> pointed
>>> in your review.
>> 
>> If you had just told me that I should back off in a polite manner instead
>> of this dismissive-insulting
>> way it would have taken me much less effort to acknowledge your point.
>> And over time you could save a lot of time thanks to not having to complain
>> about people being biased against you.
> 
> 
> Sorry if you feel attacked, but I simply do not like it.

That is fine to tell me. I'll at least make an effort. Just the wording discourages from doing so.

> Instead of complaining you could send patch one or two.

(Assuming you mean the review by "complaining")
Sorry, it was just meant to be a review, not complaining, but I know I tend to write in a way that is too much like complaining.
There are quite a few patches that go unreviewed because (I assume) there are not enough reviewers as usual, so I've rather been reviewing than developing.
Plus I have no personal itch to scratch to send a patch for currently.
But if more people think there are more useful ways to contribute I'll certainly listen.
diff mbox

Patch

diff --git a/configure b/configure
index 5a4f507246..6726883d5b 100755
--- a/configure
+++ b/configure
@@ -2811,6 +2811,7 @@  tdsc_decoder_deps="zlib"
 tdsc_decoder_select="mjpeg_decoder"
 theora_decoder_select="vp3_decoder"
 thp_decoder_select="mjpeg_decoder"
+tiff_decoder_select="mjpeg_decoder"
 tiff_decoder_suggest="zlib lzma"
 tiff_encoder_suggest="zlib"
 truehd_decoder_select="mlp_parser"
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 3cd73fbcc6..f814c69996 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -616,7 +616,7 @@  OBJS-$(CONFIG_TARGA_ENCODER)           += targaenc.o rle.o
 OBJS-$(CONFIG_TARGA_Y216_DECODER)      += targa_y216dec.o
 OBJS-$(CONFIG_TDSC_DECODER)            += tdsc.o
 OBJS-$(CONFIG_TIERTEXSEQVIDEO_DECODER) += tiertexseqv.o
-OBJS-$(CONFIG_TIFF_DECODER)            += tiff.o lzw.o faxcompr.o tiff_data.o tiff_common.o
+OBJS-$(CONFIG_TIFF_DECODER)            += tiff.o lzw.o faxcompr.o tiff_data.o tiff_common.o mjpegdec.o
 OBJS-$(CONFIG_TIFF_ENCODER)            += tiffenc.o rle.o lzwenc.o tiff_data.o
 OBJS-$(CONFIG_TMV_DECODER)             += tmv.o cga_data.o
 OBJS-$(CONFIG_TRUEHD_DECODER)          += mlpdec.o mlpdsp.o
diff --git a/libavcodec/tiff.c b/libavcodec/tiff.c
index c520d7df83..033339cec2 100644
--- a/libavcodec/tiff.c
+++ b/libavcodec/tiff.c
@@ -35,6 +35,7 @@ 
 
 #include "libavutil/attributes.h"
 #include "libavutil/avstring.h"
+#include "libavutil/error.h"
 #include "libavutil/intreadwrite.h"
 #include "libavutil/imgutils.h"
 #include "libavutil/opt.h"
@@ -46,6 +47,7 @@ 
 #include "mathops.h"
 #include "tiff.h"
 #include "tiff_data.h"
+#include "mjpegdec.h"
 #include "thread.h"
 #include "get_bits.h"
 
@@ -54,6 +56,10 @@  typedef struct TiffContext {
     AVCodecContext *avctx;
     GetByteContext gb;
 
+    /* JPEG decoding for DNG */
+    AVCodecContext *avctx_mjpeg; // wrapper context for MJPEG
+    AVFrame *jpgframe;           // decoded JPEG tile
+
     int get_subimage;
     uint16_t get_page;
     int get_thumbnail;
@@ -76,7 +82,9 @@  typedef struct TiffContext {
 
     int is_bayer;
     uint8_t pattern[4];
+    unsigned black_level;
     unsigned white_level;
+    const uint16_t *dng_lut; // Pointer to DNG linearization table
 
     uint32_t sub_ifd;
     uint16_t cur_page;
@@ -86,6 +94,14 @@  typedef struct TiffContext {
     int stripsizesoff, stripsize, stripoff, strippos;
     LZWState *lzw;
 
+    /* Tile support */
+    int is_tiled;
+    int tile_byte_counts_offset, tile_offsets_offset;
+    int tile_width, tile_length;
+    int tile_count;
+
+    int is_jpeg;
+
     uint8_t *deinvert_buf;
     int deinvert_buf_size;
     uint8_t *yuv_line;
@@ -257,6 +273,9 @@  static int add_metadata(int count, int type,
     };
 }
 
+static void av_always_inline dng_blit(TiffContext *s, uint8_t *dst, int dst_stride,
+                                      const uint8_t *src, int src_stride, int width, int height, int is_u16);
+
 static void av_always_inline horizontal_fill(TiffContext *s,
                                              unsigned int bpp, uint8_t* dst,
                                              int usePtr, const uint8_t *src,
@@ -712,6 +731,204 @@  static int tiff_unpack_strip(TiffContext *s, AVFrame *p, uint8_t *dst, int strid
     return 0;
 }
 
+/**
+ * Map stored raw sensor values into linear reference values.
+ * See: DNG Specification - Chapter 5
+ */
+static uint16_t av_always_inline dng_raw_to_linear16(uint16_t value,
+                                                    const uint16_t *lut,
+                                                    uint16_t black_level,
+                                                    float scale_factor) {
+    // Lookup table lookup
+    if (lut)
+        value = lut[value];
+
+    // Black level subtraction
+    value = av_clip_uint16_c((unsigned)value - black_level);
+
+    // Color scaling
+    value = av_clip_uint16_c((unsigned)(((float)value * scale_factor) * 0xFFFF));
+
+    return value;
+}
+
+static uint16_t av_always_inline dng_raw_to_linear8(uint16_t value,
+                                                    const uint16_t *lut,
+                                                    uint16_t black_level,
+                                                    float scale_factor) {
+    return dng_raw_to_linear16(value, lut, black_level, scale_factor) >> 8;
+}
+
+static void dng_blit(TiffContext *s, uint8_t *dst, int dst_stride,
+                     const uint8_t *src, int src_stride,
+                     int width, int height, int is_u16)
+{
+    int line, col;
+    float scale_factor;
+
+    scale_factor = 1.0f / (s->white_level - s->black_level);
+
+    if (is_u16) {
+        for (line = 0; line < height; line++) {
+            uint16_t *dst_u16 = (uint16_t *)dst;
+            uint16_t *src_u16 = (uint16_t *)src;
+
+            for (col = 0; col < width; col++)
+                *dst_u16++ = dng_raw_to_linear16(*src_u16++, s->dng_lut, s->black_level, scale_factor);
+
+            dst += dst_stride * sizeof(uint16_t);
+            src += src_stride * sizeof(uint16_t);
+        }
+    } else {
+        for (line = 0; line < height; line++) {
+            for (col = 0; col < width; col++)
+                *dst++ = dng_raw_to_linear8(*src++, s->dng_lut, s->black_level, scale_factor);
+
+            dst += dst_stride;
+            src += src_stride;
+        }
+    }
+}
+
+static int dng_decode_jpeg_tile(AVCodecContext *avctx, AVFrame *frame,
+                                int tile_byte_count, int x, int y, int w, int h)
+{
+    TiffContext *s = avctx->priv_data;
+    AVPacket jpkt;
+    uint8_t *dst_data, *src_data;
+    uint32_t dst_offset; /* offset from dst buffer in pixels */
+    int is_u16, pixel_size;
+    int ret;
+
+    /* Prepare a packet and send to the MJPEG decoder */
+    av_init_packet(&jpkt);
+    jpkt.data = (uint8_t*)s->gb.buffer;
+    jpkt.size = tile_byte_count;
+
+    ret = avcodec_send_packet(s->avctx_mjpeg, &jpkt);
+    if (ret < 0) {
+        av_log(avctx, AV_LOG_ERROR, "Error submitting a packet for decoding\n");
+        return ret;
+    }
+
+    ret = avcodec_receive_frame(s->avctx_mjpeg, s->jpgframe);
+    if (ret < 0) {
+        av_log(avctx, AV_LOG_ERROR, "JPEG decoding error: %s.\n", av_err2str(ret));
+
+        /* Normally skip, error if explode */
+        if (avctx->err_recognition & AV_EF_EXPLODE)
+            return AVERROR_INVALIDDATA;
+        else
+            return 0;
+    }
+
+    /* Copy the outputted tile's pixels from 'jpgframe' to 'frame' (final buffer */
+
+    is_u16 = (s->bpp > 8);
+    pixel_size = (is_u16 ? sizeof(uint16_t) : sizeof(uint8_t));
+
+    dst_offset = x + frame->linesize[0] * y / pixel_size;
+    dst_data = frame->data[0] + dst_offset * pixel_size;
+    src_data = s->jpgframe->data[0];
+
+    dng_blit(s,
+             dst_data,
+             frame->linesize[0] / pixel_size,
+             src_data,
+             s->jpgframe->linesize[0] / pixel_size,
+             w,
+             h,
+             is_u16);
+
+    av_frame_unref(s->jpgframe);
+
+    return 0;
+}
+
+static int dng_decode_tiles(AVCodecContext *avctx, AVFrame *frame)
+{
+    TiffContext *s = avctx->priv_data;
+    int tile_idx;
+    int tile_offset_offset, tile_offset;
+    int tile_byte_count_offset, tile_byte_count;
+    int tile_count_x, tile_count_y;
+    int tile_width, tile_length;
+    int tile_x = 0, tile_y = 0;
+    int pos_x = 0, pos_y = 0;
+    int ret;
+
+    /* Calculate tile counts (round up) */
+    tile_count_x = (s->width + s->tile_width - 1) / s->tile_width;
+    tile_count_y = (s->height + s->tile_length - 1) / s->tile_length;
+
+    /* Iterate over the number of tiles */
+    for (tile_idx = 0; tile_idx < s->tile_count; tile_idx++) {
+        tile_x = tile_idx % tile_count_x;
+        tile_y = tile_idx / tile_count_x;
+
+        if (tile_x == tile_count_x - 1) // If on the right edge
+            tile_width = s->width % s->tile_width;
+        else
+            tile_width = s->tile_width;
+
+        if (tile_y == tile_count_y - 1) // If on the bottom edge
+            tile_length = s->height % s->tile_length;
+        else
+            tile_length = s->tile_length;
+
+        /* Read tile offset */
+        tile_offset_offset = s->tile_offsets_offset + tile_idx * sizeof(int);
+        bytestream2_seek(&s->gb, tile_offset_offset, SEEK_SET);
+        tile_offset = ff_tget_long(&s->gb, s->le);
+
+        /* Read tile byte size */
+        tile_byte_count_offset = s->tile_byte_counts_offset + tile_idx * sizeof(int);
+        bytestream2_seek(&s->gb, tile_byte_count_offset, SEEK_SET);
+        tile_byte_count = ff_tget_long(&s->gb, s->le);
+
+        /* Seek to tile data */
+        bytestream2_seek(&s->gb, tile_offset, SEEK_SET);
+
+        /* Decode JPEG tile and copy it in the reference frame */
+        ret = dng_decode_jpeg_tile(avctx, frame, tile_byte_count, pos_x, pos_y, tile_width, tile_length);
+
+        if (ret < 0)
+            return ret;
+
+        /* Advance current positions */
+        pos_x += tile_width;
+        if (tile_x == tile_count_x - 1) { // If on the right edge
+            pos_x = 0;
+            pos_y += tile_length;
+        }
+    }
+
+    return 0;
+}
+
+static int dng_decode(AVCodecContext *avctx, AVFrame *frame, AVPacket *avpkt) {
+    int ret;
+
+    TiffContext *s = avctx->priv_data;
+
+    s->jpgframe->width  = s->tile_width;
+    s->jpgframe->height = s->tile_length;
+
+    s->avctx_mjpeg->width = s->tile_width;
+    s->avctx_mjpeg->height = s->tile_length;
+
+    /* Decode all tiles in a frame */
+    ret = dng_decode_tiles(avctx, frame);
+    if (ret < 0)
+        return ret;
+
+    /* Frame is ready to be output */
+    frame->pict_type = AV_PICTURE_TYPE_I;
+    frame->key_frame = 1;
+
+    return avpkt->size;
+}
+
 static int init_image(TiffContext *s, ThreadFrame *frame)
 {
     int ret;
@@ -923,7 +1140,7 @@  static void set_sar(TiffContext *s, unsigned tag, unsigned num, unsigned den)
 
 static int tiff_decode_tag(TiffContext *s, AVFrame *frame)
 {
-    unsigned tag, type, count, off, value = 0, value2 = 0;
+    unsigned tag, type, count, off, value = 0, value2 = 1; // value2 is a denominator so init. to 1
     int i, start;
     int pos;
     int ret;
@@ -1029,8 +1246,8 @@  static int tiff_decode_tag(TiffContext *s, AVFrame *frame)
 #endif
         case TIFF_JPEG:
         case TIFF_NEWJPEG:
-            avpriv_report_missing_feature(s->avctx, "JPEG compression");
-            return AVERROR_PATCHWELCOME;
+            s->is_jpeg = 1;
+            break;
         case TIFF_LZMA:
 #if CONFIG_LZMA
             break;
@@ -1085,12 +1302,19 @@  static int tiff_decode_tag(TiffContext *s, AVFrame *frame)
     case TIFF_YRES:
         set_sar(s, tag, value, value2);
         break;
+    case TIFF_TILE_OFFSETS:
+        s->tile_offsets_offset = off;
+        s->tile_count = count;
+        s->is_tiled = 1;
+        break;
     case TIFF_TILE_BYTE_COUNTS:
+        s->tile_byte_counts_offset = off;
+        break;
     case TIFF_TILE_LENGTH:
-    case TIFF_TILE_OFFSETS:
+        s->tile_length = value;
+        break;
     case TIFF_TILE_WIDTH:
-        av_log(s->avctx, AV_LOG_ERROR, "Tiled images are not supported\n");
-        return AVERROR_PATCHWELCOME;
+        s->tile_width = value;
         break;
     case TIFF_PREDICTOR:
         s->predictor = value;
@@ -1101,6 +1325,32 @@  static int tiff_decode_tag(TiffContext *s, AVFrame *frame)
         else if (count > 1)
             s->sub_ifd = ff_tget(&s->gb, TIFF_LONG, s->le); /** Only get the first SubIFD */
         break;
+    case DNG_LINEARIZATION_TABLE: {
+        uint32_t lut_offset = value;
+        uint32_t lut_size = count;
+        uint32_t lut_wanted_size = 1 << s->bpp;
+        if (lut_wanted_size != lut_size)
+            av_log(s->avctx, AV_LOG_WARNING, "DNG contains LUT with invalid size (%"PRIu32"), disabling LUT\n", lut_size);
+        else if (lut_offset >= bytestream2_size(&s->gb))
+            av_log(s->avctx, AV_LOG_WARNING, "DNG contains LUT with invalid offset (%"PRIu32"), disabling LUT\n", lut_offset);
+        else
+            s->dng_lut = (uint16_t*)(s->gb.buffer + lut_offset);
+        break;
+    }
+    case DNG_BLACK_LEVEL:
+        if (count > 1) {    /* Use the first value in the pattern (assume they're all the same) */
+            if (type == TIFF_RATIONAL) {
+                value  = ff_tget(&s->gb, TIFF_LONG, s->le);
+                value2 = ff_tget(&s->gb, TIFF_LONG, s->le);
+
+                s->black_level = value / value2;
+            } else
+                s->black_level = ff_tget(&s->gb, type, s->le);
+            av_log(s->avctx, AV_LOG_WARNING, "Assuming black level pattern values are identical\n");
+        } else {
+            s->black_level = value / value2;
+        }
+        break;
     case DNG_WHITE_LEVEL:
         s->white_level = value;
         break;
@@ -1420,6 +1670,8 @@  static int decode_frame(AVCodecContext *avctx,
     }
     s->le          = le;
     // TIFF_BPP is not a required tag and defaults to 1
+
+    s->tiff_type   = TIFF_TYPE_TIFF;
 again:
     s->is_thumbnail = 0;
     s->bppcount    = s->bpp = 1;
@@ -1428,8 +1680,10 @@  again:
     s->fill_order  = 0;
     s->white_level = 0;
     s->is_bayer    = 0;
+    s->is_tiled    = 0;
+    s->is_jpeg     = 0;
     s->cur_page    = 0;
-    s->tiff_type   = TIFF_TYPE_TIFF;
+    s->dng_lut     = NULL;
     free_geotags(s);
 
     // Reset these offsets so we can tell if they were set this frame
@@ -1519,6 +1773,26 @@  again:
         return AVERROR_INVALIDDATA;
     }
 
+    /* Handle DNG images with JPEG-compressed tiles */
+
+    if (s->tiff_type == TIFF_TYPE_DNG || s->tiff_type == TIFF_TYPE_CINEMADNG) {
+        if (s->is_jpeg) {
+            if (s->is_bayer) {
+                if ((ret = dng_decode(avctx, (AVFrame*)data, avpkt)) > 0)
+                    *got_frame = 1;
+                return ret;
+            } else {
+                avpriv_report_missing_feature(avctx, "DNG JPG-compressed non-bayer-encoded images");
+                return AVERROR_PATCHWELCOME;
+            }
+        } else if (s->is_tiled) {
+            avpriv_report_missing_feature(avctx, "DNG uncompressed tiled images");
+            return AVERROR_PATCHWELCOME;
+        }
+    }
+
+    /* Handle TIFF images and DNG images with uncompressed strips (non-tiled) */
+
     planes = s->planar ? s->bppcount : 1;
     for (plane = 0; plane < planes; plane++) {
         uint8_t *five_planes = NULL;
@@ -1678,6 +1952,8 @@  again:
 static av_cold int tiff_init(AVCodecContext *avctx)
 {
     TiffContext *s = avctx->priv_data;
+    const AVCodec *codec;
+    int ret;
 
     s->width  = 0;
     s->height = 0;
@@ -1689,6 +1965,29 @@  static av_cold int tiff_init(AVCodecContext *avctx)
         return AVERROR(ENOMEM);
     ff_ccitt_unpack_init();
 
+    /* Allocate JPEG frame */
+    s->jpgframe = av_frame_alloc();
+    if (!s->jpgframe)
+        return AVERROR(ENOMEM);
+
+    /* Prepare everything needed for JPEG decoding */
+    codec = avcodec_find_decoder(AV_CODEC_ID_MJPEG);
+    if (!codec)
+        return AVERROR_BUG;
+    s->avctx_mjpeg = avcodec_alloc_context3(codec);
+    if (!s->avctx_mjpeg)
+        return AVERROR(ENOMEM);
+    s->avctx_mjpeg->flags = avctx->flags;
+    s->avctx_mjpeg->flags2 = avctx->flags2;
+    s->avctx_mjpeg->dct_algo = avctx->dct_algo;
+    s->avctx_mjpeg->idct_algo = avctx->idct_algo;
+    ret = ff_codec_open2_recursive(s->avctx_mjpeg, codec, NULL);
+    if (ret < 0) {
+        av_frame_free(&s->jpgframe);
+        avcodec_free_context(&s->avctx_mjpeg);
+        return ret;
+    }
+
     return 0;
 }
 
@@ -1705,6 +2004,8 @@  static av_cold int tiff_end(AVCodecContext *avctx)
     s->yuv_line_size = 0;
     av_freep(&s->fax_buffer);
     s->fax_buffer_size = 0;
+    av_frame_free(&s->jpgframe);
+    avcodec_free_context(&s->avctx_mjpeg);
     return 0;
 }
 
diff --git a/libavcodec/tiff.h b/libavcodec/tiff.h
index 81913c6b1a..2184c2c829 100644
--- a/libavcodec/tiff.h
+++ b/libavcodec/tiff.h
@@ -101,6 +101,8 @@  enum TiffTags {
 enum DngTags {
     DNG_VERSION             = 0xC612,
     DNG_BACKWARD_VERSION    = 0xC613,
+    DNG_LINEARIZATION_TABLE = 0xC618,
+    DNG_BLACK_LEVEL         = 0xC61A,
     DNG_WHITE_LEVEL         = 0xC61D,
 };