diff mbox series

[FFmpeg-devel,2/3] doc/examples/demuxing_decoding: convert to new decoding API

Message ID 20200411150614.24578-2-anton@khirnov.net
State Accepted
Commit 3bfe20389de0cb81fdff7dcb92c3e85fbacb960d
Headers show
Series [FFmpeg-devel,1/3] doc/examples/demuxing_decoding: drop -refcount | expand

Checks

Context Check Description
andriy/default pending
andriy/make success Make finished
andriy/make_fate success Make fate finished

Commit Message

Anton Khirnov April 11, 2020, 3:06 p.m. UTC
---
 doc/examples/demuxing_decoding.c | 177 ++++++++++++++++---------------
 1 file changed, 91 insertions(+), 86 deletions(-)

Comments

James Almer April 11, 2020, 5:46 p.m. UTC | #1
On 4/11/2020 12:06 PM, Anton Khirnov wrote:
> ---
>  doc/examples/demuxing_decoding.c | 177 ++++++++++++++++---------------
>  1 file changed, 91 insertions(+), 86 deletions(-)
> 
> diff --git a/doc/examples/demuxing_decoding.c b/doc/examples/demuxing_decoding.c
> index 9bde927321..803e35d25c 100644
> --- a/doc/examples/demuxing_decoding.c
> +++ b/doc/examples/demuxing_decoding.c
> @@ -55,87 +55,93 @@ static AVPacket pkt;
>  static int video_frame_count = 0;
>  static int audio_frame_count = 0;
>  
> -static int decode_packet(int *got_frame, int cached)
> +static int output_video_frame(AVFrame *frame)
> +{
> +    if (frame->width != width || frame->height != height ||
> +        frame->format != pix_fmt) {
> +        /* To handle this change, one could call av_image_alloc again and
> +         * decode the following frames into another rawvideo file. */
> +        fprintf(stderr, "Error: Width, height and pixel format have to be "
> +                "constant in a rawvideo file, but the width, height or "
> +                "pixel format of the input video changed:\n"
> +                "old: width = %d, height = %d, format = %s\n"
> +                "new: width = %d, height = %d, format = %s\n",
> +                width, height, av_get_pix_fmt_name(pix_fmt),
> +                frame->width, frame->height,
> +                av_get_pix_fmt_name(frame->format));
> +        return -1;
> +    }
> +
> +    printf("video_frame n:%d coded_n:%d\n",
> +           video_frame_count++, frame->coded_picture_number);
> +
> +    /* copy decoded frame to destination buffer:
> +     * this is required since rawvideo expects non aligned data */
> +    av_image_copy(video_dst_data, video_dst_linesize,
> +                  (const uint8_t **)(frame->data), frame->linesize,
> +                  pix_fmt, width, height);
> +
> +    /* write to rawvideo file */
> +    fwrite(video_dst_data[0], 1, video_dst_bufsize, video_dst_file);
> +    return 0;
> +}
> +
> +static int output_audio_frame(AVFrame *frame)
> +{
> +    size_t unpadded_linesize = frame->nb_samples * av_get_bytes_per_sample(frame->format);
> +    printf("audio_frame n:%d nb_samples:%d pts:%s\n",
> +           audio_frame_count++, frame->nb_samples,
> +           av_ts2timestr(frame->pts, &audio_dec_ctx->time_base));
> +
> +    /* Write the raw audio data samples of the first plane. This works
> +     * fine for packed formats (e.g. AV_SAMPLE_FMT_S16). However,
> +     * most audio decoders output planar audio, which uses a separate
> +     * plane of audio samples for each channel (e.g. AV_SAMPLE_FMT_S16P).
> +     * In other words, this code will write only the first audio channel
> +     * in these cases.
> +     * You should use libswresample or libavfilter to convert the frame
> +     * to packed data. */
> +    fwrite(frame->extended_data[0], 1, unpadded_linesize, audio_dst_file);
> +
> +    return 0;
> +}
> +
> +static int decode_packet(AVCodecContext *dec, const AVPacket *pkt)
>  {
>      int ret = 0;
> -    int decoded = pkt.size;
>  
> -    *got_frame = 0;
> +    // submit the packet to the decoder
> +    ret = avcodec_send_packet(dec, pkt);
> +    if (ret < 0) {
> +        fprintf(stderr, "Error submitting a packet for decoding (%s)\n", av_err2str(ret));
> +        return ret;
> +    }
>  
> -    if (pkt.stream_index == video_stream_idx) {
> -        /* decode video frame */
> -        ret = avcodec_decode_video2(video_dec_ctx, frame, got_frame, &pkt);
> +    // get all the available frames from the decoder

Instead of this, it might be better to call receive_frame() one time
after each call to send_packet(), regardless of the latter consuming the
packet or not. If it doesn't, then you just keep it around until at some
point a call will consume it, then you can fetch the next one.

I say this because if you do

ret = avcodec_send_packet(pkt);
av_packet_unref(pkt);
if (ret < 0)
    return ret:
do {
    ret = avcodec_receive_frame(frame);
    if (!ret)
        output_frame(frame):
} while (!ret);

You'll be draining the decoders of all the frames it may have generated,
which may be detrimental in frame threading scenarios, versus something
like (Untested PoC):

do {
    ret = avcodec_send_packet(pkt);
    if (ret < 0) {
        if (ret != AVERROR(EAGAIN))
            return ret;
    } else
        av_packet_unref(pkt);
    ret = avcodec_receive_frame(frame);
    if (ret < 0) {
        if (ret != AVERROR(EAGAIN))
            return ret;
    } else
        output_frame(frame);
} while (!ret || pkt->size);

Which would constantly keep the decoder fed with packets as you retrieve
generated frames.

This is something I've noticed when writing the libdav1d decoder
wrapper. I don't know if it also applies to our frame threading logic,
or if it depends on the decoder used, but if it does, then the CLI would
also benefit from this.
Anton Khirnov April 11, 2020, 5:59 p.m. UTC | #2
Quoting James Almer (2020-04-11 19:46:03)
> On 4/11/2020 12:06 PM, Anton Khirnov wrote:
> 
> Instead of this, it might be better to call receive_frame() one time
> after each call to send_packet(), regardless of the latter consuming the
> packet or not. If it doesn't, then you just keep it around until at some
> point a call will consume it, then you can fetch the next one.
> 
> I say this because if you do
> 
> ret = avcodec_send_packet(pkt);
> av_packet_unref(pkt);
> if (ret < 0)
>     return ret:
> do {
>     ret = avcodec_receive_frame(frame);
>     if (!ret)
>         output_frame(frame):
> } while (!ret);
> 
> You'll be draining the decoders of all the frames it may have generated,
> which may be detrimental in frame threading scenarios, versus something
> like (Untested PoC):
> 
> do {
>     ret = avcodec_send_packet(pkt);
>     if (ret < 0) {
>         if (ret != AVERROR(EAGAIN))
>             return ret;
>     } else
>         av_packet_unref(pkt);
>     ret = avcodec_receive_frame(frame);
>     if (ret < 0) {
>         if (ret != AVERROR(EAGAIN))
>             return ret;
>     } else
>         output_frame(frame);
> } while (!ret || pkt->size);
> 
> Which would constantly keep the decoder fed with packets as you retrieve
> generated frames.
> 
> This is something I've noticed when writing the libdav1d decoder
> wrapper. I don't know if it also applies to our frame threading logic,
> or if it depends on the decoder used, but if it does, then the CLI would
> also benefit from this.

I don't know how libdav1d works, but lavc frame threading will not give
you any output unless all the workers are busy, i.e. the delay imposed
is constant and you cannot change it by calling the functions in a
different pattern.

There was a patch to change that recently, but people were generally
against it.

Also, the way I'm doing it in this patch -- one send_packet(), then
iterate receive_frame() until failure -- is the officially recommended
one in the doxy somewhere around the top of avcodec.h
James Almer April 11, 2020, 6:16 p.m. UTC | #3
On 4/11/2020 2:59 PM, Anton Khirnov wrote:
> Quoting James Almer (2020-04-11 19:46:03)
>> On 4/11/2020 12:06 PM, Anton Khirnov wrote:
>>
>> Instead of this, it might be better to call receive_frame() one time
>> after each call to send_packet(), regardless of the latter consuming the
>> packet or not. If it doesn't, then you just keep it around until at some
>> point a call will consume it, then you can fetch the next one.
>>
>> I say this because if you do
>>
>> ret = avcodec_send_packet(pkt);
>> av_packet_unref(pkt);
>> if (ret < 0)
>>     return ret:
>> do {
>>     ret = avcodec_receive_frame(frame);
>>     if (!ret)
>>         output_frame(frame):
>> } while (!ret);
>>
>> You'll be draining the decoders of all the frames it may have generated,
>> which may be detrimental in frame threading scenarios, versus something
>> like (Untested PoC):
>>
>> do {
>>     ret = avcodec_send_packet(pkt);
>>     if (ret < 0) {
>>         if (ret != AVERROR(EAGAIN))
>>             return ret;
>>     } else
>>         av_packet_unref(pkt);
>>     ret = avcodec_receive_frame(frame);
>>     if (ret < 0) {
>>         if (ret != AVERROR(EAGAIN))
>>             return ret;
>>     } else
>>         output_frame(frame);
>> } while (!ret || pkt->size);
>>
>> Which would constantly keep the decoder fed with packets as you retrieve
>> generated frames.
>>
>> This is something I've noticed when writing the libdav1d decoder
>> wrapper. I don't know if it also applies to our frame threading logic,
>> or if it depends on the decoder used, but if it does, then the CLI would
>> also benefit from this.
> 
> I don't know how libdav1d works, but lavc frame threading will not give
> you any output unless all the workers are busy, i.e. the delay imposed
> is constant and you cannot change it by calling the functions in a
> different pattern.
> 
> There was a patch to change that recently, but people were generally
> against it.
> 
> Also, the way I'm doing it in this patch -- one send_packet(), then
> iterate receive_frame() until failure -- is the officially recommended
> one in the doxy somewhere around the top of avcodec.h

Ok, if it's the recommended way described in the doxy then it makes
sense to use it in the examples, so LGTM if tested.
diff mbox series

Patch

diff --git a/doc/examples/demuxing_decoding.c b/doc/examples/demuxing_decoding.c
index 9bde927321..803e35d25c 100644
--- a/doc/examples/demuxing_decoding.c
+++ b/doc/examples/demuxing_decoding.c
@@ -55,87 +55,93 @@  static AVPacket pkt;
 static int video_frame_count = 0;
 static int audio_frame_count = 0;
 
-static int decode_packet(int *got_frame, int cached)
+static int output_video_frame(AVFrame *frame)
+{
+    if (frame->width != width || frame->height != height ||
+        frame->format != pix_fmt) {
+        /* To handle this change, one could call av_image_alloc again and
+         * decode the following frames into another rawvideo file. */
+        fprintf(stderr, "Error: Width, height and pixel format have to be "
+                "constant in a rawvideo file, but the width, height or "
+                "pixel format of the input video changed:\n"
+                "old: width = %d, height = %d, format = %s\n"
+                "new: width = %d, height = %d, format = %s\n",
+                width, height, av_get_pix_fmt_name(pix_fmt),
+                frame->width, frame->height,
+                av_get_pix_fmt_name(frame->format));
+        return -1;
+    }
+
+    printf("video_frame n:%d coded_n:%d\n",
+           video_frame_count++, frame->coded_picture_number);
+
+    /* copy decoded frame to destination buffer:
+     * this is required since rawvideo expects non aligned data */
+    av_image_copy(video_dst_data, video_dst_linesize,
+                  (const uint8_t **)(frame->data), frame->linesize,
+                  pix_fmt, width, height);
+
+    /* write to rawvideo file */
+    fwrite(video_dst_data[0], 1, video_dst_bufsize, video_dst_file);
+    return 0;
+}
+
+static int output_audio_frame(AVFrame *frame)
+{
+    size_t unpadded_linesize = frame->nb_samples * av_get_bytes_per_sample(frame->format);
+    printf("audio_frame n:%d nb_samples:%d pts:%s\n",
+           audio_frame_count++, frame->nb_samples,
+           av_ts2timestr(frame->pts, &audio_dec_ctx->time_base));
+
+    /* Write the raw audio data samples of the first plane. This works
+     * fine for packed formats (e.g. AV_SAMPLE_FMT_S16). However,
+     * most audio decoders output planar audio, which uses a separate
+     * plane of audio samples for each channel (e.g. AV_SAMPLE_FMT_S16P).
+     * In other words, this code will write only the first audio channel
+     * in these cases.
+     * You should use libswresample or libavfilter to convert the frame
+     * to packed data. */
+    fwrite(frame->extended_data[0], 1, unpadded_linesize, audio_dst_file);
+
+    return 0;
+}
+
+static int decode_packet(AVCodecContext *dec, const AVPacket *pkt)
 {
     int ret = 0;
-    int decoded = pkt.size;
 
-    *got_frame = 0;
+    // submit the packet to the decoder
+    ret = avcodec_send_packet(dec, pkt);
+    if (ret < 0) {
+        fprintf(stderr, "Error submitting a packet for decoding (%s)\n", av_err2str(ret));
+        return ret;
+    }
 
-    if (pkt.stream_index == video_stream_idx) {
-        /* decode video frame */
-        ret = avcodec_decode_video2(video_dec_ctx, frame, got_frame, &pkt);
+    // get all the available frames from the decoder
+    while (ret >= 0) {
+        ret = avcodec_receive_frame(dec, frame);
         if (ret < 0) {
-            fprintf(stderr, "Error decoding video frame (%s)\n", av_err2str(ret));
-            return ret;
-        }
+            // those two return values are special and mean there is no output
+            // frame available, but there were no errors during decoding
+            if (ret == AVERROR_EOF || ret == AVERROR(EAGAIN))
+                return 0;
 
-        if (*got_frame) {
-
-            if (frame->width != width || frame->height != height ||
-                frame->format != pix_fmt) {
-                /* To handle this change, one could call av_image_alloc again and
-                 * decode the following frames into another rawvideo file. */
-                fprintf(stderr, "Error: Width, height and pixel format have to be "
-                        "constant in a rawvideo file, but the width, height or "
-                        "pixel format of the input video changed:\n"
-                        "old: width = %d, height = %d, format = %s\n"
-                        "new: width = %d, height = %d, format = %s\n",
-                        width, height, av_get_pix_fmt_name(pix_fmt),
-                        frame->width, frame->height,
-                        av_get_pix_fmt_name(frame->format));
-                return -1;
-            }
-
-            printf("video_frame%s n:%d coded_n:%d\n",
-                   cached ? "(cached)" : "",
-                   video_frame_count++, frame->coded_picture_number);
-
-            /* copy decoded frame to destination buffer:
-             * this is required since rawvideo expects non aligned data */
-            av_image_copy(video_dst_data, video_dst_linesize,
-                          (const uint8_t **)(frame->data), frame->linesize,
-                          pix_fmt, width, height);
-
-            /* write to rawvideo file */
-            fwrite(video_dst_data[0], 1, video_dst_bufsize, video_dst_file);
-        }
-    } else if (pkt.stream_index == audio_stream_idx) {
-        /* decode audio frame */
-        ret = avcodec_decode_audio4(audio_dec_ctx, frame, got_frame, &pkt);
-        if (ret < 0) {
-            fprintf(stderr, "Error decoding audio frame (%s)\n", av_err2str(ret));
+            fprintf(stderr, "Error during decoding (%s)\n", av_err2str(ret));
             return ret;
         }
-        /* Some audio decoders decode only part of the packet, and have to be
-         * called again with the remainder of the packet data.
-         * Sample: fate-suite/lossless-audio/luckynight-partial.shn
-         * Also, some decoders might over-read the packet. */
-        decoded = FFMIN(ret, pkt.size);
-
-        if (*got_frame) {
-            size_t unpadded_linesize = frame->nb_samples * av_get_bytes_per_sample(frame->format);
-            printf("audio_frame%s n:%d nb_samples:%d pts:%s\n",
-                   cached ? "(cached)" : "",
-                   audio_frame_count++, frame->nb_samples,
-                   av_ts2timestr(frame->pts, &audio_dec_ctx->time_base));
-
-            /* Write the raw audio data samples of the first plane. This works
-             * fine for packed formats (e.g. AV_SAMPLE_FMT_S16). However,
-             * most audio decoders output planar audio, which uses a separate
-             * plane of audio samples for each channel (e.g. AV_SAMPLE_FMT_S16P).
-             * In other words, this code will write only the first audio channel
-             * in these cases.
-             * You should use libswresample or libavfilter to convert the frame
-             * to packed data. */
-            fwrite(frame->extended_data[0], 1, unpadded_linesize, audio_dst_file);
-        }
-    }
 
-    if (*got_frame)
+        // write the frame data to output file
+        if (dec->codec->type == AVMEDIA_TYPE_VIDEO)
+            ret = output_video_frame(frame);
+        else
+            ret = output_audio_frame(frame);
+
         av_frame_unref(frame);
+        if (ret < 0)
+            return ret;
+    }
 
-    return decoded;
+    return 0;
 }
 
 static int open_codec_context(int *stream_idx,
@@ -221,7 +227,7 @@  static int get_format_from_sample_fmt(const char **fmt,
 
 int main (int argc, char **argv)
 {
-    int ret = 0, got_frame;
+    int ret = 0;
 
     if (argc != 4) {
         fprintf(stderr, "usage: %s  input_file video_output_file audio_output_file\n"
@@ -309,23 +315,22 @@  int main (int argc, char **argv)
 
     /* read frames from the file */
     while (av_read_frame(fmt_ctx, &pkt) >= 0) {
-        AVPacket orig_pkt = pkt;
-        do {
-            ret = decode_packet(&got_frame, 0);
-            if (ret < 0)
-                break;
-            pkt.data += ret;
-            pkt.size -= ret;
-        } while (pkt.size > 0);
-        av_packet_unref(&orig_pkt);
+        // check if the packet belongs to a stream we are interested in, otherwise
+        // skip it
+        if (pkt.stream_index == video_stream_idx)
+            ret = decode_packet(video_dec_ctx, &pkt);
+        else if (pkt.stream_index == audio_stream_idx)
+            ret = decode_packet(audio_dec_ctx, &pkt);
+        av_packet_unref(&pkt);
+        if (ret < 0)
+            break;
     }
 
-    /* flush cached frames */
-    pkt.data = NULL;
-    pkt.size = 0;
-    do {
-        decode_packet(&got_frame, 1);
-    } while (got_frame);
+    /* flush the decoders */
+    if (video_dec_ctx)
+        decode_packet(video_dec_ctx, NULL);
+    if (audio_dec_ctx)
+        decode_packet(audio_dec_ctx, NULL);
 
     printf("Demuxing succeeded.\n");