diff mbox series

[FFmpeg-devel,24/46] avcodec/jpeglsenc: Avoid intermediate buffer, allow user-supplied buffers

Message ID HE1PR0301MB21541F2E1F394BB7F8DBA4D78F5F9@HE1PR0301MB2154.eurprd03.prod.outlook.com
State Accepted
Commit f249c32eff1514b86e2863f6f651daf7c761fdcb
Headers show
Series [FFmpeg-devel,01/46] avcodec/a64multienc: Avoid intermediate buffer
Related show

Checks

Context Check Description
andriy/x86_make success Make finished
andriy/x86_make_fate success Make fate finished
andriy/PPC64_make success Make finished
andriy/PPC64_make_fate success Make fate finished

Commit Message

Andreas Rheinhardt April 29, 2021, 11:56 p.m. UTC
Up until now, the JPEG-LS encoder allocated a worst-case-sized packet
at the beginning of each encode2 call; then it wrote the packet header
into its destination buffer and encoded the actual packet data;
said data is written into another worst-case-sized buffer, because it
needs to be escaped before being written into the packet buffer.
Finally, because the packet buffer is worst-case-sized, the generic
code copies the actually used part into a fresh buffer.

This commit changes this: Allocating the packet and writing the header
into it is deferred until the actual data has been encoded and its size
is known. This gives a good upper bound for the needed size of the packet
buffer (the upper bound might be 1/15 too large) and so one can avoid the
implicit intermediate buffer and support user-supplied buffers by using
ff_get_encode_buffer().

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
---
 libavcodec/jpeglsenc.c | 102 ++++++++++++++++++++++++-----------------
 1 file changed, 59 insertions(+), 43 deletions(-)

Comments

James Almer May 4, 2021, 6:50 p.m. UTC | #1
On 4/29/2021 8:56 PM, Andreas Rheinhardt wrote:
> Up until now, the JPEG-LS encoder allocated a worst-case-sized packet
> at the beginning of each encode2 call; then it wrote the packet header
> into its destination buffer and encoded the actual packet data;
> said data is written into another worst-case-sized buffer, because it
> needs to be escaped before being written into the packet buffer.
> Finally, because the packet buffer is worst-case-sized, the generic
> code copies the actually used part into a fresh buffer.
> 
> This commit changes this: Allocating the packet and writing the header
> into it is deferred until the actual data has been encoded and its size
> is known. This gives a good upper bound for the needed size of the packet
> buffer (the upper bound might be 1/15 too large) and so one can avoid the
> implicit intermediate buffer and support user-supplied buffers by using
> ff_get_encode_buffer().
> 
> Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
> ---
>   libavcodec/jpeglsenc.c | 102 ++++++++++++++++++++++++-----------------
>   1 file changed, 59 insertions(+), 43 deletions(-)
> 
> diff --git a/libavcodec/jpeglsenc.c b/libavcodec/jpeglsenc.c
> index 17d46c0449..a7bcd78275 100644
> --- a/libavcodec/jpeglsenc.c
> +++ b/libavcodec/jpeglsenc.c
> @@ -27,6 +27,7 @@
>   
>   #include "avcodec.h"
>   #include "bytestream.h"
> +#include "encode.h"
>   #include "get_bits.h"
>   #include "put_bits.h"
>   #include "golomb.h"
> @@ -283,53 +284,23 @@ static int encode_picture_ls(AVCodecContext *avctx, AVPacket *pkt,
>       const uint8_t *in;
>       uint8_t *last = NULL;
>       JLSState state = { 0 };
> -    int i, size, ret;
> +    size_t size;
> +    int i, ret, size_in_bits;
>       int comps;
>   
> -    if ((ret = ff_alloc_packet2(avctx, pkt, ctx->size, 0)) < 0)
> -        return ret;
> -
>       last = av_mallocz(FFABS(p->linesize[0]));
>       if (!last)
>           return AVERROR(ENOMEM);
>   
> -    bytestream2_init_writer(&pb, pkt->data, pkt->size);
>       init_put_bits(&pb2, ctx->buf, ctx->size);
>   
> -    /* write our own JPEG header, can't use mjpeg_picture_header */
>       comps = ctx->comps;
> -    put_marker_byteu(&pb, SOI);
> -    put_marker_byteu(&pb, SOF48);
> -    bytestream2_put_be16u(&pb, 8 + comps * 3); // header size depends on components
> -    bytestream2_put_byteu(&pb, (avctx->pix_fmt == AV_PIX_FMT_GRAY16) ? 16 : 8);  // bpp
> -    bytestream2_put_be16u(&pb, avctx->height);
> -    bytestream2_put_be16u(&pb, avctx->width);
> -    bytestream2_put_byteu(&pb, comps);          // components
> -    for (i = 1; i <= comps; i++) {
> -        bytestream2_put_byteu(&pb, i);     // component ID
> -        bytestream2_put_byteu(&pb, 0x11);  // subsampling: none
> -        bytestream2_put_byteu(&pb, 0);     // Tiq, used by JPEG-LS ext
> -    }
> -
> -    put_marker_byteu(&pb, SOS);
> -    bytestream2_put_be16u(&pb, 6 + comps * 2);
> -    bytestream2_put_byteu(&pb, comps);
> -    for (i = 1; i <= comps; i++) {
> -        bytestream2_put_byteu(&pb, i);   // component ID
> -        bytestream2_put_byteu(&pb, 0);   // mapping index: none
> -    }
> -    bytestream2_put_byteu(&pb, ctx->pred);
> -    bytestream2_put_byteu(&pb, (comps > 1) ? 1 : 0);  // interleaving: 0 - plane, 1 - line
> -    bytestream2_put_byteu(&pb, 0);  // point transform: none
> -
>       /* initialize JPEG-LS state from JPEG parameters */
>       state.near = ctx->pred;
>       state.bpp  = (avctx->pix_fmt == AV_PIX_FMT_GRAY16) ? 16 : 8;
>       ff_jpegls_reset_coding_parameters(&state, 0);
>       ff_jpegls_init_state(&state);
>   
> -    ls_store_lse(&state, &pb);
> -
>       in = p->data[0];
>       if (avctx->pix_fmt == AV_PIX_FMT_GRAY8) {
>           int t = 0;
> @@ -378,17 +349,63 @@ static int encode_picture_ls(AVCodecContext *avctx, AVPacket *pkt,
>               in += p->linesize[0];
>           }
>       }
> -
> -    /* the specification says that after doing 0xff escaping unused bits in
> -     * the last byte must be set to 0, so just append 7 "optional" zero bits
> -     * to avoid special-casing. */
> +    av_free(last);
> +    /* Now the actual image data has been written, which enables us to estimate
> +     * the needed packet size: For every 15 input bits, an escape bit might be
> +     * added below; and if put_bits_count % 15 is >= 8, then another bit might
> +     * be added.
> +     * Furthermore the specification says that after doing 0xff escaping unused
> +     * bits in the last byte must be set to 0, so just append 7 "optional" zero
> +     * bits to avoid special-casing. This also simplifies the size calculation:
> +     * Properly rounding up is now automatically baked-in. */
>       put_bits(&pb2, 7, 0);
> -    size = put_bits_count(&pb2);
> +    /* Make sure that the bit count + padding is representable in an int;
> +       necessary for put_bits_count() as well as for using a GetBitContext. */
> +    if (put_bytes_count(&pb2, 0) > INT_MAX / 8 - AV_INPUT_BUFFER_PADDING_SIZE)
> +        return AVERROR(ERANGE);
> +    size_in_bits = put_bits_count(&pb2);
>       flush_put_bits(&pb2);
> +    size  = size_in_bits * 2U / 15;
> +    size += 2 + 2 + 2 + 1 + 2 + 2 + 1 + comps * (1 + 1 + 1) + 2 + 2 + 1
> +            + comps * (1 + 1) + 1 + 1 + 1; /* Header */
> +    size += 2 + 2 + 1 + 2 + 2 + 2 + 2 + 2; /* LSE */
> +    size += 2; /* EOI */
> +    if ((ret = ff_get_encode_buffer(avctx, pkt, size, 0)) < 0)
> +        return ret;
> +
> +    bytestream2_init_writer(&pb, pkt->data, pkt->size);
> +
> +    /* write our own JPEG header, can't use mjpeg_picture_header */
> +    put_marker_byteu(&pb, SOI);
> +    put_marker_byteu(&pb, SOF48);
> +    bytestream2_put_be16u(&pb, 8 + comps * 3); // header size depends on components
> +    bytestream2_put_byteu(&pb, (avctx->pix_fmt == AV_PIX_FMT_GRAY16) ? 16 : 8);  // bpp
> +    bytestream2_put_be16u(&pb, avctx->height);
> +    bytestream2_put_be16u(&pb, avctx->width);
> +    bytestream2_put_byteu(&pb, comps);          // components
> +    for (i = 1; i <= comps; i++) {
> +        bytestream2_put_byteu(&pb, i);     // component ID
> +        bytestream2_put_byteu(&pb, 0x11);  // subsampling: none
> +        bytestream2_put_byteu(&pb, 0);     // Tiq, used by JPEG-LS ext
> +    }
> +
> +    put_marker_byteu(&pb, SOS);
> +    bytestream2_put_be16u(&pb, 6 + comps * 2);
> +    bytestream2_put_byteu(&pb, comps);
> +    for (i = 1; i <= comps; i++) {
> +        bytestream2_put_byteu(&pb, i);   // component ID
> +        bytestream2_put_byteu(&pb, 0);   // mapping index: none
> +    }
> +    bytestream2_put_byteu(&pb, ctx->pred);
> +    bytestream2_put_byteu(&pb, (comps > 1) ? 1 : 0);  // interleaving: 0 - plane, 1 - line
> +    bytestream2_put_byteu(&pb, 0);  // point transform: none
> +
> +    ls_store_lse(&state, &pb);
> +
>       /* do escape coding */
> -    init_get_bits(&gb, pb2.buf, size);
> -    size -= 7;
> -    while (get_bits_count(&gb) < size) {
> +    init_get_bits(&gb, pb2.buf, size_in_bits);
> +    size_in_bits -= 7;
> +    while (get_bits_count(&gb) < size_in_bits) {
>           int v;
>           v = get_bits(&gb, 8);
>           bytestream2_put_byte(&pb, v);
> @@ -397,15 +414,14 @@ static int encode_picture_ls(AVCodecContext *avctx, AVPacket *pkt,
>               bytestream2_put_byte(&pb, v);
>           }
>       }
> -    av_freep(&last);
>   
>       /* End of image */
>       put_marker_byte(&pb, EOI);
>   
>       emms_c();
>   
> -    pkt->size   = bytestream2_tell_p(&pb);
>       pkt->flags |= AV_PKT_FLAG_KEY;
> +    av_shrink_packet(pkt, bytestream2_tell_p(&pb));

You're shrinking the packet allocated by ff_get_encode_buffer(). Do we 
really want that? The doxy for AVCodec.get_encode_buffer() does not say 
that the requested size is not final and may change. It is obviously 
implicit that it will not grow, but the user may not expect it to shrink 
either.
Think an scenario like having a single buffer meant to be propagated 
through the network, where the user writes a header at a given offset, 
sets pkt->data in their custom get_encode_buffer() implementation to an 
offset immediately after said header, and then keeps the offset for the 
next packet around (data + size) to be used in the next 
get_encode_buffer() call. If pkt->size changes when the packet is 
returned by avcodec_receive_frame(), this would break.

Do you think it's worth amending the doxy with a comment that pkt->size 
may change (shrink) during the process after get_encode_buffer()? Or 
internally ensure no encoder changes it instead? Or is it too much of an 
obscure scenario and the user should only consider the returned packet 
size as the actual final one?

>       *got_packet = 1;
>       return 0;
>   }
> @@ -468,9 +484,9 @@ const AVCodec ff_jpegls_encoder = {
>       .long_name      = NULL_IF_CONFIG_SMALL("JPEG-LS"),
>       .type           = AVMEDIA_TYPE_VIDEO,
>       .id             = AV_CODEC_ID_JPEGLS,
> +    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
>       .priv_data_size = sizeof(JPEGLSContext),
>       .priv_class     = &jpegls_class,
> -    .capabilities   = AV_CODEC_CAP_FRAME_THREADS,
>       .init           = encode_jpegls_init,
>       .encode2        = encode_picture_ls,
>       .close          = encode_jpegls_close,
>
Andreas Rheinhardt May 4, 2021, 7:20 p.m. UTC | #2
James Almer:
> On 4/29/2021 8:56 PM, Andreas Rheinhardt wrote:
>> Up until now, the JPEG-LS encoder allocated a worst-case-sized packet
>> at the beginning of each encode2 call; then it wrote the packet header
>> into its destination buffer and encoded the actual packet data;
>> said data is written into another worst-case-sized buffer, because it
>> needs to be escaped before being written into the packet buffer.
>> Finally, because the packet buffer is worst-case-sized, the generic
>> code copies the actually used part into a fresh buffer.
>>
>> This commit changes this: Allocating the packet and writing the header
>> into it is deferred until the actual data has been encoded and its size
>> is known. This gives a good upper bound for the needed size of the packet
>> buffer (the upper bound might be 1/15 too large) and so one can avoid the
>> implicit intermediate buffer and support user-supplied buffers by using
>> ff_get_encode_buffer().
>>
>> Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
>> ---
>>   libavcodec/jpeglsenc.c | 102 ++++++++++++++++++++++++-----------------
>>   1 file changed, 59 insertions(+), 43 deletions(-)
>>
>> diff --git a/libavcodec/jpeglsenc.c b/libavcodec/jpeglsenc.c
>> index 17d46c0449..a7bcd78275 100644
>> --- a/libavcodec/jpeglsenc.c
>> +++ b/libavcodec/jpeglsenc.c
>> @@ -27,6 +27,7 @@
>>     #include "avcodec.h"
>>   #include "bytestream.h"
>> +#include "encode.h"
>>   #include "get_bits.h"
>>   #include "put_bits.h"
>>   #include "golomb.h"
>> @@ -283,53 +284,23 @@ static int encode_picture_ls(AVCodecContext
>> *avctx, AVPacket *pkt,
>>       const uint8_t *in;
>>       uint8_t *last = NULL;
>>       JLSState state = { 0 };
>> -    int i, size, ret;
>> +    size_t size;
>> +    int i, ret, size_in_bits;
>>       int comps;
>>   -    if ((ret = ff_alloc_packet2(avctx, pkt, ctx->size, 0)) < 0)
>> -        return ret;
>> -
>>       last = av_mallocz(FFABS(p->linesize[0]));
>>       if (!last)
>>           return AVERROR(ENOMEM);
>>   -    bytestream2_init_writer(&pb, pkt->data, pkt->size);
>>       init_put_bits(&pb2, ctx->buf, ctx->size);
>>   -    /* write our own JPEG header, can't use mjpeg_picture_header */
>>       comps = ctx->comps;
>> -    put_marker_byteu(&pb, SOI);
>> -    put_marker_byteu(&pb, SOF48);
>> -    bytestream2_put_be16u(&pb, 8 + comps * 3); // header size depends
>> on components
>> -    bytestream2_put_byteu(&pb, (avctx->pix_fmt == AV_PIX_FMT_GRAY16)
>> ? 16 : 8);  // bpp
>> -    bytestream2_put_be16u(&pb, avctx->height);
>> -    bytestream2_put_be16u(&pb, avctx->width);
>> -    bytestream2_put_byteu(&pb, comps);          // components
>> -    for (i = 1; i <= comps; i++) {
>> -        bytestream2_put_byteu(&pb, i);     // component ID
>> -        bytestream2_put_byteu(&pb, 0x11);  // subsampling: none
>> -        bytestream2_put_byteu(&pb, 0);     // Tiq, used by JPEG-LS ext
>> -    }
>> -
>> -    put_marker_byteu(&pb, SOS);
>> -    bytestream2_put_be16u(&pb, 6 + comps * 2);
>> -    bytestream2_put_byteu(&pb, comps);
>> -    for (i = 1; i <= comps; i++) {
>> -        bytestream2_put_byteu(&pb, i);   // component ID
>> -        bytestream2_put_byteu(&pb, 0);   // mapping index: none
>> -    }
>> -    bytestream2_put_byteu(&pb, ctx->pred);
>> -    bytestream2_put_byteu(&pb, (comps > 1) ? 1 : 0);  //
>> interleaving: 0 - plane, 1 - line
>> -    bytestream2_put_byteu(&pb, 0);  // point transform: none
>> -
>>       /* initialize JPEG-LS state from JPEG parameters */
>>       state.near = ctx->pred;
>>       state.bpp  = (avctx->pix_fmt == AV_PIX_FMT_GRAY16) ? 16 : 8;
>>       ff_jpegls_reset_coding_parameters(&state, 0);
>>       ff_jpegls_init_state(&state);
>>   -    ls_store_lse(&state, &pb);
>> -
>>       in = p->data[0];
>>       if (avctx->pix_fmt == AV_PIX_FMT_GRAY8) {
>>           int t = 0;
>> @@ -378,17 +349,63 @@ static int encode_picture_ls(AVCodecContext
>> *avctx, AVPacket *pkt,
>>               in += p->linesize[0];
>>           }
>>       }
>> -
>> -    /* the specification says that after doing 0xff escaping unused
>> bits in
>> -     * the last byte must be set to 0, so just append 7 "optional"
>> zero bits
>> -     * to avoid special-casing. */
>> +    av_free(last);
>> +    /* Now the actual image data has been written, which enables us
>> to estimate
>> +     * the needed packet size: For every 15 input bits, an escape bit
>> might be
>> +     * added below; and if put_bits_count % 15 is >= 8, then another
>> bit might
>> +     * be added.
>> +     * Furthermore the specification says that after doing 0xff
>> escaping unused
>> +     * bits in the last byte must be set to 0, so just append 7
>> "optional" zero
>> +     * bits to avoid special-casing. This also simplifies the size
>> calculation:
>> +     * Properly rounding up is now automatically baked-in. */
>>       put_bits(&pb2, 7, 0);
>> -    size = put_bits_count(&pb2);
>> +    /* Make sure that the bit count + padding is representable in an
>> int;
>> +       necessary for put_bits_count() as well as for using a
>> GetBitContext. */
>> +    if (put_bytes_count(&pb2, 0) > INT_MAX / 8 -
>> AV_INPUT_BUFFER_PADDING_SIZE)
>> +        return AVERROR(ERANGE);
>> +    size_in_bits = put_bits_count(&pb2);
>>       flush_put_bits(&pb2);
>> +    size  = size_in_bits * 2U / 15;
>> +    size += 2 + 2 + 2 + 1 + 2 + 2 + 1 + comps * (1 + 1 + 1) + 2 + 2 + 1
>> +            + comps * (1 + 1) + 1 + 1 + 1; /* Header */
>> +    size += 2 + 2 + 1 + 2 + 2 + 2 + 2 + 2; /* LSE */
>> +    size += 2; /* EOI */
>> +    if ((ret = ff_get_encode_buffer(avctx, pkt, size, 0)) < 0)
>> +        return ret;
>> +
>> +    bytestream2_init_writer(&pb, pkt->data, pkt->size);
>> +
>> +    /* write our own JPEG header, can't use mjpeg_picture_header */
>> +    put_marker_byteu(&pb, SOI);
>> +    put_marker_byteu(&pb, SOF48);
>> +    bytestream2_put_be16u(&pb, 8 + comps * 3); // header size depends
>> on components
>> +    bytestream2_put_byteu(&pb, (avctx->pix_fmt == AV_PIX_FMT_GRAY16)
>> ? 16 : 8);  // bpp
>> +    bytestream2_put_be16u(&pb, avctx->height);
>> +    bytestream2_put_be16u(&pb, avctx->width);
>> +    bytestream2_put_byteu(&pb, comps);          // components
>> +    for (i = 1; i <= comps; i++) {
>> +        bytestream2_put_byteu(&pb, i);     // component ID
>> +        bytestream2_put_byteu(&pb, 0x11);  // subsampling: none
>> +        bytestream2_put_byteu(&pb, 0);     // Tiq, used by JPEG-LS ext
>> +    }
>> +
>> +    put_marker_byteu(&pb, SOS);
>> +    bytestream2_put_be16u(&pb, 6 + comps * 2);
>> +    bytestream2_put_byteu(&pb, comps);
>> +    for (i = 1; i <= comps; i++) {
>> +        bytestream2_put_byteu(&pb, i);   // component ID
>> +        bytestream2_put_byteu(&pb, 0);   // mapping index: none
>> +    }
>> +    bytestream2_put_byteu(&pb, ctx->pred);
>> +    bytestream2_put_byteu(&pb, (comps > 1) ? 1 : 0);  //
>> interleaving: 0 - plane, 1 - line
>> +    bytestream2_put_byteu(&pb, 0);  // point transform: none
>> +
>> +    ls_store_lse(&state, &pb);
>> +
>>       /* do escape coding */
>> -    init_get_bits(&gb, pb2.buf, size);
>> -    size -= 7;
>> -    while (get_bits_count(&gb) < size) {
>> +    init_get_bits(&gb, pb2.buf, size_in_bits);
>> +    size_in_bits -= 7;
>> +    while (get_bits_count(&gb) < size_in_bits) {
>>           int v;
>>           v = get_bits(&gb, 8);
>>           bytestream2_put_byte(&pb, v);
>> @@ -397,15 +414,14 @@ static int encode_picture_ls(AVCodecContext
>> *avctx, AVPacket *pkt,
>>               bytestream2_put_byte(&pb, v);
>>           }
>>       }
>> -    av_freep(&last);
>>         /* End of image */
>>       put_marker_byte(&pb, EOI);
>>         emms_c();
>>   -    pkt->size   = bytestream2_tell_p(&pb);
>>       pkt->flags |= AV_PKT_FLAG_KEY;
>> +    av_shrink_packet(pkt, bytestream2_tell_p(&pb));
> 
> You're shrinking the packet allocated by ff_get_encode_buffer(). Do we
> really want that? The doxy for AVCodec.get_encode_buffer() does not say
> that the requested size is not final and may change. It is obviously
> implicit that it will not grow, but the user may not expect it to shrink
> either.
> Think an scenario like having a single buffer meant to be propagated
> through the network, where the user writes a header at a given offset,
> sets pkt->data in their custom get_encode_buffer() implementation to an
> offset immediately after said header, and then keeps the offset for the
> next packet around (data + size) to be used in the next
> get_encode_buffer() call. If pkt->size changes when the packet is
> returned by avcodec_receive_frame(), this would break.
> 
> Do you think it's worth amending the doxy with a comment that pkt->size
> may change (shrink) during the process after get_encode_buffer()? Or
> internally ensure no encoder changes it instead? Or is it too much of an
> obscure scenario and the user should only consider the returned packet
> size as the actual final one?

There are some encoders (at least this one, xavs and vc2enc) where one
has a very good estimate of the final size, but not the final size. If
we don't allow the packet to shrink a bit, then we would effectively
worsen vc2enc for users that don't use your scenario.

I actually never thought this be controversial: The doxy does not say
that the size is final and the old API also didn't support such a
scenario as you are describing. If you think this needs a doxy change,
so be it.
(Maybe we could add a flag to inform the user about whether the packet
can shrink after get_encode_buffer()?)

> 
>>       *got_packet = 1;
>>       return 0;
>>   }
>> @@ -468,9 +484,9 @@ const AVCodec ff_jpegls_encoder = {
>>       .long_name      = NULL_IF_CONFIG_SMALL("JPEG-LS"),
>>       .type           = AVMEDIA_TYPE_VIDEO,
>>       .id             = AV_CODEC_ID_JPEGLS,
>> +    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
>>       .priv_data_size = sizeof(JPEGLSContext),
>>       .priv_class     = &jpegls_class,
>> -    .capabilities   = AV_CODEC_CAP_FRAME_THREADS,
>>       .init           = encode_jpegls_init,
>>       .encode2        = encode_picture_ls,
>>       .close          = encode_jpegls_close,
>>
>
James Almer May 4, 2021, 7:49 p.m. UTC | #3
On 5/4/2021 4:20 PM, Andreas Rheinhardt wrote:
> James Almer:
>> On 4/29/2021 8:56 PM, Andreas Rheinhardt wrote:
>>> Up until now, the JPEG-LS encoder allocated a worst-case-sized packet
>>> at the beginning of each encode2 call; then it wrote the packet header
>>> into its destination buffer and encoded the actual packet data;
>>> said data is written into another worst-case-sized buffer, because it
>>> needs to be escaped before being written into the packet buffer.
>>> Finally, because the packet buffer is worst-case-sized, the generic
>>> code copies the actually used part into a fresh buffer.
>>>
>>> This commit changes this: Allocating the packet and writing the header
>>> into it is deferred until the actual data has been encoded and its size
>>> is known. This gives a good upper bound for the needed size of the packet
>>> buffer (the upper bound might be 1/15 too large) and so one can avoid the
>>> implicit intermediate buffer and support user-supplied buffers by using
>>> ff_get_encode_buffer().
>>>
>>> Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
>>> ---
>>>    libavcodec/jpeglsenc.c | 102 ++++++++++++++++++++++++-----------------
>>>    1 file changed, 59 insertions(+), 43 deletions(-)
>>>
>>> diff --git a/libavcodec/jpeglsenc.c b/libavcodec/jpeglsenc.c
>>> index 17d46c0449..a7bcd78275 100644
>>> --- a/libavcodec/jpeglsenc.c
>>> +++ b/libavcodec/jpeglsenc.c
>>> @@ -27,6 +27,7 @@
>>>      #include "avcodec.h"
>>>    #include "bytestream.h"
>>> +#include "encode.h"
>>>    #include "get_bits.h"
>>>    #include "put_bits.h"
>>>    #include "golomb.h"
>>> @@ -283,53 +284,23 @@ static int encode_picture_ls(AVCodecContext
>>> *avctx, AVPacket *pkt,
>>>        const uint8_t *in;
>>>        uint8_t *last = NULL;
>>>        JLSState state = { 0 };
>>> -    int i, size, ret;
>>> +    size_t size;
>>> +    int i, ret, size_in_bits;
>>>        int comps;
>>>    -    if ((ret = ff_alloc_packet2(avctx, pkt, ctx->size, 0)) < 0)
>>> -        return ret;
>>> -
>>>        last = av_mallocz(FFABS(p->linesize[0]));
>>>        if (!last)
>>>            return AVERROR(ENOMEM);
>>>    -    bytestream2_init_writer(&pb, pkt->data, pkt->size);
>>>        init_put_bits(&pb2, ctx->buf, ctx->size);
>>>    -    /* write our own JPEG header, can't use mjpeg_picture_header */
>>>        comps = ctx->comps;
>>> -    put_marker_byteu(&pb, SOI);
>>> -    put_marker_byteu(&pb, SOF48);
>>> -    bytestream2_put_be16u(&pb, 8 + comps * 3); // header size depends
>>> on components
>>> -    bytestream2_put_byteu(&pb, (avctx->pix_fmt == AV_PIX_FMT_GRAY16)
>>> ? 16 : 8);  // bpp
>>> -    bytestream2_put_be16u(&pb, avctx->height);
>>> -    bytestream2_put_be16u(&pb, avctx->width);
>>> -    bytestream2_put_byteu(&pb, comps);          // components
>>> -    for (i = 1; i <= comps; i++) {
>>> -        bytestream2_put_byteu(&pb, i);     // component ID
>>> -        bytestream2_put_byteu(&pb, 0x11);  // subsampling: none
>>> -        bytestream2_put_byteu(&pb, 0);     // Tiq, used by JPEG-LS ext
>>> -    }
>>> -
>>> -    put_marker_byteu(&pb, SOS);
>>> -    bytestream2_put_be16u(&pb, 6 + comps * 2);
>>> -    bytestream2_put_byteu(&pb, comps);
>>> -    for (i = 1; i <= comps; i++) {
>>> -        bytestream2_put_byteu(&pb, i);   // component ID
>>> -        bytestream2_put_byteu(&pb, 0);   // mapping index: none
>>> -    }
>>> -    bytestream2_put_byteu(&pb, ctx->pred);
>>> -    bytestream2_put_byteu(&pb, (comps > 1) ? 1 : 0);  //
>>> interleaving: 0 - plane, 1 - line
>>> -    bytestream2_put_byteu(&pb, 0);  // point transform: none
>>> -
>>>        /* initialize JPEG-LS state from JPEG parameters */
>>>        state.near = ctx->pred;
>>>        state.bpp  = (avctx->pix_fmt == AV_PIX_FMT_GRAY16) ? 16 : 8;
>>>        ff_jpegls_reset_coding_parameters(&state, 0);
>>>        ff_jpegls_init_state(&state);
>>>    -    ls_store_lse(&state, &pb);
>>> -
>>>        in = p->data[0];
>>>        if (avctx->pix_fmt == AV_PIX_FMT_GRAY8) {
>>>            int t = 0;
>>> @@ -378,17 +349,63 @@ static int encode_picture_ls(AVCodecContext
>>> *avctx, AVPacket *pkt,
>>>                in += p->linesize[0];
>>>            }
>>>        }
>>> -
>>> -    /* the specification says that after doing 0xff escaping unused
>>> bits in
>>> -     * the last byte must be set to 0, so just append 7 "optional"
>>> zero bits
>>> -     * to avoid special-casing. */
>>> +    av_free(last);
>>> +    /* Now the actual image data has been written, which enables us
>>> to estimate
>>> +     * the needed packet size: For every 15 input bits, an escape bit
>>> might be
>>> +     * added below; and if put_bits_count % 15 is >= 8, then another
>>> bit might
>>> +     * be added.
>>> +     * Furthermore the specification says that after doing 0xff
>>> escaping unused
>>> +     * bits in the last byte must be set to 0, so just append 7
>>> "optional" zero
>>> +     * bits to avoid special-casing. This also simplifies the size
>>> calculation:
>>> +     * Properly rounding up is now automatically baked-in. */
>>>        put_bits(&pb2, 7, 0);
>>> -    size = put_bits_count(&pb2);
>>> +    /* Make sure that the bit count + padding is representable in an
>>> int;
>>> +       necessary for put_bits_count() as well as for using a
>>> GetBitContext. */
>>> +    if (put_bytes_count(&pb2, 0) > INT_MAX / 8 -
>>> AV_INPUT_BUFFER_PADDING_SIZE)
>>> +        return AVERROR(ERANGE);
>>> +    size_in_bits = put_bits_count(&pb2);
>>>        flush_put_bits(&pb2);
>>> +    size  = size_in_bits * 2U / 15;
>>> +    size += 2 + 2 + 2 + 1 + 2 + 2 + 1 + comps * (1 + 1 + 1) + 2 + 2 + 1
>>> +            + comps * (1 + 1) + 1 + 1 + 1; /* Header */
>>> +    size += 2 + 2 + 1 + 2 + 2 + 2 + 2 + 2; /* LSE */
>>> +    size += 2; /* EOI */
>>> +    if ((ret = ff_get_encode_buffer(avctx, pkt, size, 0)) < 0)
>>> +        return ret;
>>> +
>>> +    bytestream2_init_writer(&pb, pkt->data, pkt->size);
>>> +
>>> +    /* write our own JPEG header, can't use mjpeg_picture_header */
>>> +    put_marker_byteu(&pb, SOI);
>>> +    put_marker_byteu(&pb, SOF48);
>>> +    bytestream2_put_be16u(&pb, 8 + comps * 3); // header size depends
>>> on components
>>> +    bytestream2_put_byteu(&pb, (avctx->pix_fmt == AV_PIX_FMT_GRAY16)
>>> ? 16 : 8);  // bpp
>>> +    bytestream2_put_be16u(&pb, avctx->height);
>>> +    bytestream2_put_be16u(&pb, avctx->width);
>>> +    bytestream2_put_byteu(&pb, comps);          // components
>>> +    for (i = 1; i <= comps; i++) {
>>> +        bytestream2_put_byteu(&pb, i);     // component ID
>>> +        bytestream2_put_byteu(&pb, 0x11);  // subsampling: none
>>> +        bytestream2_put_byteu(&pb, 0);     // Tiq, used by JPEG-LS ext
>>> +    }
>>> +
>>> +    put_marker_byteu(&pb, SOS);
>>> +    bytestream2_put_be16u(&pb, 6 + comps * 2);
>>> +    bytestream2_put_byteu(&pb, comps);
>>> +    for (i = 1; i <= comps; i++) {
>>> +        bytestream2_put_byteu(&pb, i);   // component ID
>>> +        bytestream2_put_byteu(&pb, 0);   // mapping index: none
>>> +    }
>>> +    bytestream2_put_byteu(&pb, ctx->pred);
>>> +    bytestream2_put_byteu(&pb, (comps > 1) ? 1 : 0);  //
>>> interleaving: 0 - plane, 1 - line
>>> +    bytestream2_put_byteu(&pb, 0);  // point transform: none
>>> +
>>> +    ls_store_lse(&state, &pb);
>>> +
>>>        /* do escape coding */
>>> -    init_get_bits(&gb, pb2.buf, size);
>>> -    size -= 7;
>>> -    while (get_bits_count(&gb) < size) {
>>> +    init_get_bits(&gb, pb2.buf, size_in_bits);
>>> +    size_in_bits -= 7;
>>> +    while (get_bits_count(&gb) < size_in_bits) {
>>>            int v;
>>>            v = get_bits(&gb, 8);
>>>            bytestream2_put_byte(&pb, v);
>>> @@ -397,15 +414,14 @@ static int encode_picture_ls(AVCodecContext
>>> *avctx, AVPacket *pkt,
>>>                bytestream2_put_byte(&pb, v);
>>>            }
>>>        }
>>> -    av_freep(&last);
>>>          /* End of image */
>>>        put_marker_byte(&pb, EOI);
>>>          emms_c();
>>>    -    pkt->size   = bytestream2_tell_p(&pb);
>>>        pkt->flags |= AV_PKT_FLAG_KEY;
>>> +    av_shrink_packet(pkt, bytestream2_tell_p(&pb));
>>
>> You're shrinking the packet allocated by ff_get_encode_buffer(). Do we
>> really want that? The doxy for AVCodec.get_encode_buffer() does not say
>> that the requested size is not final and may change. It is obviously
>> implicit that it will not grow, but the user may not expect it to shrink
>> either.
>> Think an scenario like having a single buffer meant to be propagated
>> through the network, where the user writes a header at a given offset,
>> sets pkt->data in their custom get_encode_buffer() implementation to an
>> offset immediately after said header, and then keeps the offset for the
>> next packet around (data + size) to be used in the next
>> get_encode_buffer() call. If pkt->size changes when the packet is
>> returned by avcodec_receive_frame(), this would break.
>>
>> Do you think it's worth amending the doxy with a comment that pkt->size
>> may change (shrink) during the process after get_encode_buffer()? Or
>> internally ensure no encoder changes it instead? Or is it too much of an
>> obscure scenario and the user should only consider the returned packet
>> size as the actual final one?
> 
> There are some encoders (at least this one, xavs and vc2enc) where one
> has a very good estimate of the final size, but not the final size. If
> we don't allow the packet to shrink a bit, then we would effectively
> worsen vc2enc for users that don't use your scenario.
> 
> I actually never thought this be controversial: The doxy does not say
> that the size is final and the old API also didn't support such a
> scenario as you are describing. If you think this needs a doxy change,
> so be it.

It's not controversial, seeing the shrink call simply made me think 
about an scenario like the above, and i figured it would probably be 
best to either explicitly mention size may change in the doxy, or 
prevent it from happening internally if encoders can be easily adapted.
I guess i was just thinking out loud.

Also, the old API did not tell the user how big the buffer should be 
because it was not requested as part of the encoding process, as there 
was no callback of any kind. The user would pass a custom buffer of 
whatever size they wanted to the main encode function, and if it was not 
big enough, the encoding process would fail.
The scenario i described was possible with it simply by passing an 
offset to the single big buffer in pkt->data, setting pkt->size to its 
remaining size, and then looking at the final pkt->size post-encoding to 
know what offset to use for the following packet's data pointer.

> (Maybe we could add a flag to inform the user about whether the packet
> can shrink after get_encode_buffer()?)

Writing the above made me realize that the user should definitely expect 
that the final size is the one in the returned packet and not the one 
requested in get_encode_buffer() (Same as it was with custom buffers 
with the old encode API), so at most, a simple comment in the doxy 
stating that the encoder may not make use of the entire buffer and 
update the size field in the packet accordingly should be enough.

> 
>>
>>>        *got_packet = 1;
>>>        return 0;
>>>    }
>>> @@ -468,9 +484,9 @@ const AVCodec ff_jpegls_encoder = {
>>>        .long_name      = NULL_IF_CONFIG_SMALL("JPEG-LS"),
>>>        .type           = AVMEDIA_TYPE_VIDEO,
>>>        .id             = AV_CODEC_ID_JPEGLS,
>>> +    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
>>>        .priv_data_size = sizeof(JPEGLSContext),
>>>        .priv_class     = &jpegls_class,
>>> -    .capabilities   = AV_CODEC_CAP_FRAME_THREADS,
>>>        .init           = encode_jpegls_init,
>>>        .encode2        = encode_picture_ls,
>>>        .close          = encode_jpegls_close,
>>>
>>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
diff mbox series

Patch

diff --git a/libavcodec/jpeglsenc.c b/libavcodec/jpeglsenc.c
index 17d46c0449..a7bcd78275 100644
--- a/libavcodec/jpeglsenc.c
+++ b/libavcodec/jpeglsenc.c
@@ -27,6 +27,7 @@ 
 
 #include "avcodec.h"
 #include "bytestream.h"
+#include "encode.h"
 #include "get_bits.h"
 #include "put_bits.h"
 #include "golomb.h"
@@ -283,53 +284,23 @@  static int encode_picture_ls(AVCodecContext *avctx, AVPacket *pkt,
     const uint8_t *in;
     uint8_t *last = NULL;
     JLSState state = { 0 };
-    int i, size, ret;
+    size_t size;
+    int i, ret, size_in_bits;
     int comps;
 
-    if ((ret = ff_alloc_packet2(avctx, pkt, ctx->size, 0)) < 0)
-        return ret;
-
     last = av_mallocz(FFABS(p->linesize[0]));
     if (!last)
         return AVERROR(ENOMEM);
 
-    bytestream2_init_writer(&pb, pkt->data, pkt->size);
     init_put_bits(&pb2, ctx->buf, ctx->size);
 
-    /* write our own JPEG header, can't use mjpeg_picture_header */
     comps = ctx->comps;
-    put_marker_byteu(&pb, SOI);
-    put_marker_byteu(&pb, SOF48);
-    bytestream2_put_be16u(&pb, 8 + comps * 3); // header size depends on components
-    bytestream2_put_byteu(&pb, (avctx->pix_fmt == AV_PIX_FMT_GRAY16) ? 16 : 8);  // bpp
-    bytestream2_put_be16u(&pb, avctx->height);
-    bytestream2_put_be16u(&pb, avctx->width);
-    bytestream2_put_byteu(&pb, comps);          // components
-    for (i = 1; i <= comps; i++) {
-        bytestream2_put_byteu(&pb, i);     // component ID
-        bytestream2_put_byteu(&pb, 0x11);  // subsampling: none
-        bytestream2_put_byteu(&pb, 0);     // Tiq, used by JPEG-LS ext
-    }
-
-    put_marker_byteu(&pb, SOS);
-    bytestream2_put_be16u(&pb, 6 + comps * 2);
-    bytestream2_put_byteu(&pb, comps);
-    for (i = 1; i <= comps; i++) {
-        bytestream2_put_byteu(&pb, i);   // component ID
-        bytestream2_put_byteu(&pb, 0);   // mapping index: none
-    }
-    bytestream2_put_byteu(&pb, ctx->pred);
-    bytestream2_put_byteu(&pb, (comps > 1) ? 1 : 0);  // interleaving: 0 - plane, 1 - line
-    bytestream2_put_byteu(&pb, 0);  // point transform: none
-
     /* initialize JPEG-LS state from JPEG parameters */
     state.near = ctx->pred;
     state.bpp  = (avctx->pix_fmt == AV_PIX_FMT_GRAY16) ? 16 : 8;
     ff_jpegls_reset_coding_parameters(&state, 0);
     ff_jpegls_init_state(&state);
 
-    ls_store_lse(&state, &pb);
-
     in = p->data[0];
     if (avctx->pix_fmt == AV_PIX_FMT_GRAY8) {
         int t = 0;
@@ -378,17 +349,63 @@  static int encode_picture_ls(AVCodecContext *avctx, AVPacket *pkt,
             in += p->linesize[0];
         }
     }
-
-    /* the specification says that after doing 0xff escaping unused bits in
-     * the last byte must be set to 0, so just append 7 "optional" zero bits
-     * to avoid special-casing. */
+    av_free(last);
+    /* Now the actual image data has been written, which enables us to estimate
+     * the needed packet size: For every 15 input bits, an escape bit might be
+     * added below; and if put_bits_count % 15 is >= 8, then another bit might
+     * be added.
+     * Furthermore the specification says that after doing 0xff escaping unused
+     * bits in the last byte must be set to 0, so just append 7 "optional" zero
+     * bits to avoid special-casing. This also simplifies the size calculation:
+     * Properly rounding up is now automatically baked-in. */
     put_bits(&pb2, 7, 0);
-    size = put_bits_count(&pb2);
+    /* Make sure that the bit count + padding is representable in an int;
+       necessary for put_bits_count() as well as for using a GetBitContext. */
+    if (put_bytes_count(&pb2, 0) > INT_MAX / 8 - AV_INPUT_BUFFER_PADDING_SIZE)
+        return AVERROR(ERANGE);
+    size_in_bits = put_bits_count(&pb2);
     flush_put_bits(&pb2);
+    size  = size_in_bits * 2U / 15;
+    size += 2 + 2 + 2 + 1 + 2 + 2 + 1 + comps * (1 + 1 + 1) + 2 + 2 + 1
+            + comps * (1 + 1) + 1 + 1 + 1; /* Header */
+    size += 2 + 2 + 1 + 2 + 2 + 2 + 2 + 2; /* LSE */
+    size += 2; /* EOI */
+    if ((ret = ff_get_encode_buffer(avctx, pkt, size, 0)) < 0)
+        return ret;
+
+    bytestream2_init_writer(&pb, pkt->data, pkt->size);
+
+    /* write our own JPEG header, can't use mjpeg_picture_header */
+    put_marker_byteu(&pb, SOI);
+    put_marker_byteu(&pb, SOF48);
+    bytestream2_put_be16u(&pb, 8 + comps * 3); // header size depends on components
+    bytestream2_put_byteu(&pb, (avctx->pix_fmt == AV_PIX_FMT_GRAY16) ? 16 : 8);  // bpp
+    bytestream2_put_be16u(&pb, avctx->height);
+    bytestream2_put_be16u(&pb, avctx->width);
+    bytestream2_put_byteu(&pb, comps);          // components
+    for (i = 1; i <= comps; i++) {
+        bytestream2_put_byteu(&pb, i);     // component ID
+        bytestream2_put_byteu(&pb, 0x11);  // subsampling: none
+        bytestream2_put_byteu(&pb, 0);     // Tiq, used by JPEG-LS ext
+    }
+
+    put_marker_byteu(&pb, SOS);
+    bytestream2_put_be16u(&pb, 6 + comps * 2);
+    bytestream2_put_byteu(&pb, comps);
+    for (i = 1; i <= comps; i++) {
+        bytestream2_put_byteu(&pb, i);   // component ID
+        bytestream2_put_byteu(&pb, 0);   // mapping index: none
+    }
+    bytestream2_put_byteu(&pb, ctx->pred);
+    bytestream2_put_byteu(&pb, (comps > 1) ? 1 : 0);  // interleaving: 0 - plane, 1 - line
+    bytestream2_put_byteu(&pb, 0);  // point transform: none
+
+    ls_store_lse(&state, &pb);
+
     /* do escape coding */
-    init_get_bits(&gb, pb2.buf, size);
-    size -= 7;
-    while (get_bits_count(&gb) < size) {
+    init_get_bits(&gb, pb2.buf, size_in_bits);
+    size_in_bits -= 7;
+    while (get_bits_count(&gb) < size_in_bits) {
         int v;
         v = get_bits(&gb, 8);
         bytestream2_put_byte(&pb, v);
@@ -397,15 +414,14 @@  static int encode_picture_ls(AVCodecContext *avctx, AVPacket *pkt,
             bytestream2_put_byte(&pb, v);
         }
     }
-    av_freep(&last);
 
     /* End of image */
     put_marker_byte(&pb, EOI);
 
     emms_c();
 
-    pkt->size   = bytestream2_tell_p(&pb);
     pkt->flags |= AV_PKT_FLAG_KEY;
+    av_shrink_packet(pkt, bytestream2_tell_p(&pb));
     *got_packet = 1;
     return 0;
 }
@@ -468,9 +484,9 @@  const AVCodec ff_jpegls_encoder = {
     .long_name      = NULL_IF_CONFIG_SMALL("JPEG-LS"),
     .type           = AVMEDIA_TYPE_VIDEO,
     .id             = AV_CODEC_ID_JPEGLS,
+    .capabilities   = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
     .priv_data_size = sizeof(JPEGLSContext),
     .priv_class     = &jpegls_class,
-    .capabilities   = AV_CODEC_CAP_FRAME_THREADS,
     .init           = encode_jpegls_init,
     .encode2        = encode_picture_ls,
     .close          = encode_jpegls_close,