diff mbox series

[FFmpeg-devel] doc/examples/muxing: code rewrite with improved readability and fixed issues

Message ID 122266341.9130638.1655553986479@mail.yahoo.com
State New
Headers show
Series [FFmpeg-devel] doc/examples/muxing: code rewrite with improved readability and fixed issues | expand

Checks

Context Check Description
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished
andriy/make_armv7_RPi4 success Make finished
andriy/make_fate_armv7_RPi4 success Make fate finished

Commit Message

Paolo Prete June 18, 2022, 12:06 p.m. UTC
Please review this. It's a code rewrite of doc/examples/muxing.c which improves readability and fixes issues.
More specifically:
*) Original functions of muxing.c have generic/unclear/ambiguous names and they don't group logically relatedblocks of code: this makes the code hard to read.See for example open_audio/video() and add_stream(), which mix initialization of the encoders' and muxers' stuff.A redundant structure with an ambiguous name ("struct OutputStream"), which is not part of API, is widely used too.The patch uses functions with clearer names and with blocks of code strictly logically related, such as: init_encoder(), init_avframe(), init_muxer(), convert_frame(), encode_frame(), mux_encoded_pkt()...
*) Errors are not always properly managed: in some cases they are not propagated to the main() function and a call to exit(1) is forced, causing memory leaks. This has been fixed as well.
*) The program doesn't work for some extensions (try for example: m4v or mjpeg). In addition, it gives weird results when the filename doesn't have an extension.This has been fixed by checking the filename and supporting a fixed set of extensions.
*) The program claims to output raw images by using '%%d' in the filename, but it actually doesn't. This has been fixed as well.
*) Input parameters such as width, height, sample_rate and ch_layout are now exposed at the level of the main() function and they are much easier to customize.
*) fprintf() has been replaced with av_log()

Comments

Leo Izen June 18, 2022, 3:17 p.m. UTC | #1
On 6/18/22 08:06, Paolo Prete wrote:
> Please review this. It's a code rewrite of doc/examples/muxing.c which improves readability and fixes issues.
> From 8a4e942a001ae49dc052899f331ed43abf954dda Mon Sep 17 00:00:00 2001
> From: paolo <paolopr976@gmail.com>
> Date: Sat, 18 Jun 2022 13:53:55 +0200
> Subject: [PATCH] doc/examples/muxing: code rewrite with improved readability
>  and fixed issues
This commit message is too long, you can just truncate "and fixed 
issues" since it doesn't say much.

> 
>                Improved readability with functions that have clearer prototypes and that don't mix logically unrelated blocks of code
> 
>                Fixed issues in case of unsupported extensions
> 
>                Fixed memory leaks on errors, which are now properly propagated to the main() function
> 
>                Fixed issue on raw images output
> 
>                fprintf() replaced with av_log()
> 
>                Input A/V parameters exposed in the main() function and easier to customize
> ---
>  doc/examples/muxing.c | 905 +++++++++++++++++++-----------------------
>  1 file changed, 406 insertions(+), 499 deletions(-)
> 
> diff --git a/doc/examples/muxing.c b/doc/examples/muxing.c
> index 3acb778322..04739995d8 100644
> --- a/doc/examples/muxing.c
> +++ b/doc/examples/muxing.c
> @@ -1,5 +1,5 @@
>  /*
> - * Copyright (c) 2003 Fabrice Bellard
> + * Copyright (c) 2022 Paolo Prete (paolopr976 at gmail.com) after Fabrice Bellard
Don't remove the original copyright, just add yourself below it. You 
also don't need to put your email address in the copyright line.
>   *
>   * Permission is hereby granted, free of charge, to any person obtaining a copy
>   * of this software and associated documentation files (the "Software"), to deal
> @@ -24,625 +24,532 @@
>   * @file
>   * libavformat API example.
>   *
> - * Output a media file in any supported libavformat format. The default
> + * Output a media file in a set of supported libavformat formats. The default
>   * codecs are used.
>   * @example muxing.c
>   */
>  
> -#include <stdlib.h>
> -#include <stdio.h>
> -#include <string.h>
> -#include <math.h>
> -
> -#include <libavutil/avassert.h>
> -#include <libavutil/channel_layout.h>
> -#include <libavutil/opt.h>
> -#include <libavutil/mathematics.h>
> -#include <libavutil/timestamp.h>
>  #include <libavcodec/avcodec.h>
>  #include <libavformat/avformat.h>
> -#include <libswscale/swscale.h>
> +#include <libavutil/timestamp.h>Make sure these stay sorted.
>  #include <libswresample/swresample.h>
> +#include <libswscale/swscale.h>
>  
> -#define STREAM_DURATION   10.0
> -#define STREAM_FRAME_RATE 25 /* 25 images/s */
> -#define STREAM_PIX_FMT    AV_PIX_FMT_YUV420P /* default pix_fmt */
> -
> -#define SCALE_FLAGS SWS_BICUBIC
> +#define VIDEO_FRAME_RATE 25 /* 25 images/s */
> +#define VIDEO_SCALE_FLAGS SWS_BICUBIC
> +#define STREAM_DURATION 10.0 /* 10 seconds */
>  
> -// a wrapper around a single output AVStream
> -typedef struct OutputStream {
> -    AVStream *st;
> -    AVCodecContext *enc;
> +static void log_error(const char *s, int *num)
> +{
> +    if (num)
> +        av_log(NULL, AV_LOG_ERROR, "%s (error '%s')\n", s, av_err2str(*num));
> +    else
> +        av_log(NULL, AV_LOG_ERROR, "%s\n", s);
> +}
This does not need to be a pointer. Convention is that negative values 
are errors and nonnegative values are not. So you could always use 
something like: if (num < 0).

> -    /* pts of the next frame that will be generated */
> -    int64_t next_pts;
> -    int samples_count;
> +static int mux_encoded_pkt(AVPacket *out_pkt, AVFormatContext *out_fmt_ctx,
> +                              enum AVMediaType type)
> +{
> +    int ret;
> +    AVRational enc_time_base, str_time_base;
>  
> -    AVFrame *frame;
> -    AVFrame *tmp_frame;
> +    if (out_fmt_ctx->streams[0]->codecpar->codec_type == type)
> +        out_pkt->stream_index = 0;
> +    else if ((out_fmt_ctx->nb_streams > 1) && (type == AVMEDIA_TYPE_VIDEO))
> +        out_pkt->stream_index = 1;
> +    str_time_base = out_fmt_ctx->streams[out_pkt->stream_index]->time_base;
>  
> -    AVPacket *tmp_pkt;
> +    if (type == AVMEDIA_TYPE_AUDIO)
> +        enc_time_base = ((AVRational *)out_fmt_ctx->opaque)[0];
> +    else
> +        enc_time_base = ((AVRational *)out_fmt_ctx->opaque)[1];
> 
Why are you referencing the opaque elements of out_fmt_ctx?

> -    float t, tincr, tincr2;
> +    av_packet_rescale_ts(out_pkt, enc_time_base, str_time_base);
>  
> -    struct SwsContext *sws_ctx;
> -    struct SwrContext *swr_ctx;
> -} OutputStream;
> +    av_log(NULL, AV_LOG_INFO, "stream_index=%d, size=%d, pts_time=%s\n",
> +           out_pkt->stream_index,
> +           out_pkt->size, av_ts2timestr(out_pkt->pts, &str_time_base));
>  
> -static void log_packet(const AVFormatContext *fmt_ctx, const AVPacket *pkt)
> -{
> -    AVRational *time_base = &fmt_ctx->streams[pkt->stream_index]->time_base;
> +    if ((ret = av_interleaved_write_frame(out_fmt_ctx, out_pkt)) < 0)
> +        log_error("Error calling av_interleaved_write_frame()", &ret);
>  
> -    printf("pts:%s pts_time:%s dts:%s dts_time:%s duration:%s duration_time:%s stream_index:%d\n",
> -           av_ts2str(pkt->pts), av_ts2timestr(pkt->pts, time_base),
> -           av_ts2str(pkt->dts), av_ts2timestr(pkt->dts, time_base),
> -           av_ts2str(pkt->duration), av_ts2timestr(pkt->duration, time_base),
> -           pkt->stream_index);
> +    return ret;
>  }
>  
> -static int write_frame(AVFormatContext *fmt_ctx, AVCodecContext *c,
> -                       AVStream *st, AVFrame *frame, AVPacket *pkt)
> +static int is_extension_supported(const char *filename)
Why are you artificially limiting what is permitted?
>  {
> -    int ret;
> +    const char *extensions[] = {".aac", ".avi", ".bmp", ".jpeg", ".mka",
> +                                ".mkv", ".mov", ".mp4", ".flv",  ".ts"};
> +    int i, size = sizeof(extensions) / sizeof(extensions[0]);
> +    char *dot = strrchr(filename, '.');
>  
> -    // send the frame to the encoder
> -    ret = avcodec_send_frame(c, frame);
> -    if (ret < 0) {
> -        fprintf(stderr, "Error sending a frame to the encoder: %s\n",
> -                av_err2str(ret));
> -        exit(1);
> +    for (i = 0; i < size; i++) {
> +        if (dot && !strcmp(dot, extensions[i]))
> +            return 1;
>      }
>  
> -    while (ret >= 0) {
> -        ret = avcodec_receive_packet(c, pkt);
> -        if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
> -            break;
> -        else if (ret < 0) {
> -            fprintf(stderr, "Error encoding a frame: %s\n", av_err2str(ret));
> -            exit(1);
> -        }
> +    log_error("File extension not supported", NULL);
> +    av_log(NULL, AV_LOG_WARNING, "Please choose one of the following extensions: ");
> +    for (i = 0; i < size - 1; i++)
> +        av_log(NULL, AV_LOG_WARNING, "%s, ", extensions[i]);
> +    av_log(NULL, AV_LOG_WARNING, "%s\n", extensions[size-1]);
>  
> -        /* rescale output packet timestamp values from codec to stream timebase */
> -        av_packet_rescale_ts(pkt, c->time_base, st->time_base);
> -        pkt->stream_index = st->index;
> -
> -        /* Write the compressed frame to the media file. */
> -        log_packet(fmt_ctx, pkt);
> -        ret = av_interleaved_write_frame(fmt_ctx, pkt);
> -        /* pkt is now blank (av_interleaved_write_frame() takes ownership of
> -         * its contents and resets pkt), so that no unreferencing is necessary.
> -         * This would be different if one used av_write_frame(). */
> -        if (ret < 0) {
> -            fprintf(stderr, "Error while writing output packet: %s\n", av_err2str(ret));
> -            exit(1);
> -        }
> -    }
> -
> -    return ret == AVERROR_EOF ? 1 : 0;
> +    return 0;
>  }
>  
> -/* Add an output stream. */
> -static void add_stream(OutputStream *ost, AVFormatContext *oc,
> -                       const AVCodec **codec,
> -                       enum AVCodecID codec_id)
> +static int get_default_enc_params(AVCodecParameters *params,
> +                                  const char *fname, enum AVMediaType type)
>  {
> -    AVCodecContext *c;
> -    int i;
> -
> -    /* find the encoder */
> -    *codec = avcodec_find_encoder(codec_id);
> -    if (!(*codec)) {
> -        fprintf(stderr, "Could not find encoder for '%s'\n",
> -                avcodec_get_name(codec_id));
> -        exit(1);
> +    AVFormatContext *tmp_fctx;
> +    enum AVCodecID id;
> +    const AVCodec *c;
> +    int ret = 0;
> +
> +    if ((ret = avformat_alloc_output_context2(&tmp_fctx, NULL, NULL, fname)) < 0) {
> +        log_error("Could not get default encoder", &ret);
> +        return AVERROR_EXIT;
>      }
>  
> -    ost->tmp_pkt = av_packet_alloc();
> -    if (!ost->tmp_pkt) {
> -        fprintf(stderr, "Could not allocate AVPacket\n");
> -        exit(1);
> -    }
> +    id = (type == AVMEDIA_TYPE_AUDIO) ? tmp_fctx->oformat->audio_codec :
> +                                        tmp_fctx->oformat->video_codec;
>  
> -    ost->st = avformat_new_stream(oc, NULL);
> -    if (!ost->st) {
> -        fprintf(stderr, "Could not allocate stream\n");
> -        exit(1);
> -    }
> -    ost->st->id = oc->nb_streams-1;
> -    c = avcodec_alloc_context3(*codec);
> -    if (!c) {
> -        fprintf(stderr, "Could not alloc an encoding context\n");
> -        exit(1);
> +    if (!(c = avcodec_find_encoder(id))) {
> +        avformat_free_context(tmp_fctx);
> +        return ret;
You probably don't want to return "ret" here as you don't assign it.

>      }
> -    ost->enc = c;
> -
> -    switch ((*codec)->type) {
> -    case AVMEDIA_TYPE_AUDIO:
> -        c->sample_fmt  = (*codec)->sample_fmts ?
> -            (*codec)->sample_fmts[0] : AV_SAMPLE_FMT_FLTP;
> -        c->bit_rate    = 64000;
> -        c->sample_rate = 44100;
> -        if ((*codec)->supported_samplerates) {
> -            c->sample_rate = (*codec)->supported_samplerates[0];
> -            for (i = 0; (*codec)->supported_samplerates[i]; i++) {
> -                if ((*codec)->supported_samplerates[i] == 44100)
> -                    c->sample_rate = 44100;
> -            }
> -        }
> -        av_channel_layout_copy(&c->ch_layout, &(AVChannelLayout)AV_CHANNEL_LAYOUT_STEREO);
> -        ost->st->time_base = (AVRational){ 1, c->sample_rate };
> -        break;
> -
> -    case AVMEDIA_TYPE_VIDEO:
> -        c->codec_id = codec_id;
> -
> -        c->bit_rate = 400000;
> -        /* Resolution must be a multiple of two. */
> -        c->width    = 352;
> -        c->height   = 288;
> -        /* timebase: This is the fundamental unit of time (in seconds) in terms
> -         * of which frame timestamps are represented. For fixed-fps content,
> -         * timebase should be 1/framerate and timestamp increments should be
> -         * identical to 1. */
> -        ost->st->time_base = (AVRational){ 1, STREAM_FRAME_RATE };
> -        c->time_base       = ost->st->time_base;
> -
> -        c->gop_size      = 12; /* emit one intra frame every twelve frames at most */
> -        c->pix_fmt       = STREAM_PIX_FMT;
> -        if (c->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
> -            /* just for testing, we also add B-frames */
> -            c->max_b_frames = 2;
> -        }
> -        if (c->codec_id == AV_CODEC_ID_MPEG1VIDEO) {
> -            /* Needed to avoid using macroblocks in which some coeffs overflow.
> -             * This does not happen with normal video, it just happens here as
> -             * the motion of the chroma plane does not match the luma plane. */
> -            c->mb_decision = 2;
> -        }
> -        break;
>  
> -    default:
> -        break;
> +    params->codec_type = c->type;
> +    params->codec_id   = c-> id;
> +    if (c->type == AVMEDIA_TYPE_AUDIO) {
> +        params->format      = c->sample_fmts ?
> +                              c->sample_fmts[0] : AV_SAMPLE_FMT_FLTP;
> +        params->ch_layout   = (AVChannelLayout)AV_CHANNEL_LAYOUT_STEREO;
> +        params->sample_rate = c->supported_samplerates ?
> +                              c->supported_samplerates[0] : 44100;
> +    } else if (c->type == AVMEDIA_TYPE_VIDEO) {
> +        params->format = c->pix_fmts ? c->pix_fmts[0] : AV_PIX_FMT_YUV420P;
>      }
> +    avformat_free_context(tmp_fctx);
>  
> -    /* Some formats want stream headers to be separate. */
> -    if (oc->oformat->flags & AVFMT_GLOBALHEADER)
> -        c->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
> +    return ret;
Again, why are you returning ret if you are not assigning to it?

>  }
>  
> -/**************************************************************/
> -/* audio output */
> -
> -static AVFrame *alloc_audio_frame(enum AVSampleFormat sample_fmt,
> -                                  const AVChannelLayout *channel_layout,
> -                                  int sample_rate, int nb_samples)
> +static int init_encoder(AVCodecContext **enc_ctx, AVCodecParameters *params)
>  {
> -    AVFrame *frame = av_frame_alloc();
> +    const AVCodec *codec = NULL;
>      int ret;
>  
> -    if (!frame) {
> -        fprintf(stderr, "Error allocating an audio frame\n");
> -        exit(1);
> +    codec = avcodec_find_encoder(params->codec_id);
> +    *enc_ctx = avcodec_alloc_context3(codec);
Don't attempt to allocate anything until after you check if the codec is 
found.
> +    if (!codec) {
> +        log_error("Could not allocate the encoding context", NULL);
This error message does not match the check, which is if the codec is found.
> +        return AVERROR_EXIT;
return AVERROR_CODEC_NOT_FOUND;
>      }
>  
> -    frame->format = sample_fmt;
> -    av_channel_layout_copy(&frame->ch_layout, channel_layout);
> -    frame->sample_rate = sample_rate;
> -    frame->nb_samples = nb_samples;
> -
> -    if (nb_samples) {
> -        ret = av_frame_get_buffer(frame, 0);
> -        if (ret < 0) {
> -            fprintf(stderr, "Error allocating an audio buffer\n");
> -            exit(1);
> -        }
> +    (*enc_ctx)->codec_id   = params->codec_id;
> +    (*enc_ctx)->codec_type = params->codec_type;
> +    if (params->codec_type == AVMEDIA_TYPE_AUDIO) {
> +        (*enc_ctx)->sample_fmt  = params->format;
> +        (*enc_ctx)->sample_rate = params->sample_rate;
> +        (*enc_ctx)->time_base   = (AVRational){1, params->sample_rate};
Use av_make_q to avoid casting.
> +        (*enc_ctx)->ch_layout   = params->ch_layout;
> +    } else if (params->codec_type == AVMEDIA_TYPE_VIDEO) {
> +        (*enc_ctx)->width      = params->width;
> +        (*enc_ctx)->height     = params->height;
> +        (*enc_ctx)->time_base  = (AVRational){ 1, VIDEO_FRAME_RATE };Better off just definine VIDEO_FRAME_RATE_Q to be an AVRational with 
value {1, 25};
> +        (*enc_ctx)->gop_size   = 12;
> +        (*enc_ctx)->pix_fmt    = params->format;
>      }
>  
> -    return frame;
> +    if ((ret = avcodec_open2(*enc_ctx, codec, NULL)) < 0) {
> +        log_error("Could not open input codec", &ret);
> +        return ret;
> +    } else
> +        return 0;This violates the coding style, you need to use braces {} for the else 
block if you also use it for the if block.
>  }
>  
> -static void open_audio(AVFormatContext *oc, const AVCodec *codec,
> -                       OutputStream *ost, AVDictionary *opt_arg)
> +static int init_avframe(AVFrame **frame, AVCodecParameters *params)
>  {
> -    AVCodecContext *c;
> -    int nb_samples;
>      int ret;
> -    AVDictionary *opt = NULL;
>  
> -    c = ost->enc;
> +    if (!(*frame = av_frame_alloc())) {
> +        log_error("Could not allocate AVFrame", NULL);
> +        return AVERROR(ENOMEM);
> +    }
>  
> -    /* open it */
> -    av_dict_copy(&opt, opt_arg, 0);
> -    ret = avcodec_open2(c, codec, &opt);
> -    av_dict_free(&opt);
> -    if (ret < 0) {
> -        fprintf(stderr, "Could not open audio codec: %s\n", av_err2str(ret));
> -        exit(1);
> +    (*frame)->opaque = &params->codec_type;
> +    if (params->codec_type == AVMEDIA_TYPE_AUDIO) {
> +        (*frame)->nb_samples  = params->frame_size;
> +        (*frame)->sample_rate = params->sample_rate;
> +        (*frame)->format      = params->format;
> +        (*frame)->ch_layout   = params->ch_layout;
> +    } else {
> +        (*frame)->width  = params->width;
> +        (*frame)->height = params->height;
> +        (*frame)->format = params->format;
>      }
>  
> -    /* init signal generator */
> -    ost->t     = 0;
> -    ost->tincr = 2 * M_PI * 110.0 / c->sample_rate;
> -    /* increment frequency by 110 Hz per second */
> -    ost->tincr2 = 2 * M_PI * 110.0 / c->sample_rate / c->sample_rate;
> +    /* Allocate the frame's data buffer */
> +    if ((ret = av_frame_get_buffer(*frame, 0)) < 0) {
> +        log_error("Could not allocate buffer for AVFrame", &ret);
> +        return AVERROR(ENOMEM);
> +    } else
> +        return 0;
You don't need the else block here at all.
> +}
>  
> -    if (c->codec->capabilities & AV_CODEC_CAP_VARIABLE_FRAME_SIZE)
> -        nb_samples = 10000;
> -    else
> -        nb_samples = c->frame_size;
> +static int init_audio_convert(struct SwrContext **ctx, AVCodecParameters *in_params,
> +                              AVCodecParameters *out_params)
> +{
> +    swr_alloc_set_opts2(ctx,
> +                        &(out_params->ch_layout),
> +                        out_params->format, out_params->sample_rate,
> +                        &(in_params->ch_layout),
> +                        in_params->format, in_params->sample_rate,
> +                        0, NULL);
> +    if (!*ctx) {
> +        log_error("Could not allocate resample context", NULL);
> +        return AVERROR(ENOMEM);
> +    } else
> +        return 0;
> +}
You don't need the else block here at all.
>  
> -    ost->frame     = alloc_audio_frame(c->sample_fmt, &c->ch_layout,
> -                                       c->sample_rate, nb_samples);
> -    ost->tmp_frame = alloc_audio_frame(AV_SAMPLE_FMT_S16, &c->ch_layout,
> -                                       c->sample_rate, nb_samples);
> +static int init_video_convert(struct SwsContext **ctx, AVCodecParameters *in_params,
> +                              AVCodecParameters *out_params)
> +{
This paper-thin wrapper function is unnecessary, just inline it.
> +    *ctx = sws_getContext(in_params->width, in_params->height,
> +                          in_params->format,
> +                          out_params->width, out_params->height,
> +                          out_params->codec_id == out_params->format,
> +                          VIDEO_SCALE_FLAGS, NULL, NULL, NULL);
> +    if (!*ctx) {
> +        log_error("Could not allocate scale context", NULL);
> +        return AVERROR(ENOMEM);
> +    } else
> +        return 0;
> +}
>  
> -    /* copy the stream parameters to the muxer */
> -    ret = avcodec_parameters_from_context(ost->st->codecpar, c);
> -    if (ret < 0) {
> -        fprintf(stderr, "Could not copy the stream parameters\n");
> -        exit(1);
> -    }
> +static int init_muxer(AVFormatContext **out_fmt_ctx, AVCodecContext *audio_enc_ctx,
> +                      AVCodecContext *video_enc_ctx, const char *filename)
> +{
> +    int ret;
> +    AVStream *out_audio_str, *out_video_str;
>  
> -    /* create resampler context */
> -    ost->swr_ctx = swr_alloc();
> -    if (!ost->swr_ctx) {
> -        fprintf(stderr, "Could not allocate resampler context\n");
> -        exit(1);
> +    if ((ret = avformat_alloc_output_context2(out_fmt_ctx, NULL, NULL, filename)) < 0) {
> +        log_error("Could not create output context", &ret);
> +        return ret;
>      }
>  
> -    /* set options */
> -    av_opt_set_chlayout  (ost->swr_ctx, "in_chlayout",       &c->ch_layout,      0);
> -    av_opt_set_int       (ost->swr_ctx, "in_sample_rate",     c->sample_rate,    0);
> -    av_opt_set_sample_fmt(ost->swr_ctx, "in_sample_fmt",      AV_SAMPLE_FMT_S16, 0);
> -    av_opt_set_chlayout  (ost->swr_ctx, "out_chlayout",      &c->ch_layout,      0);
> -    av_opt_set_int       (ost->swr_ctx, "out_sample_rate",    c->sample_rate,    0);
> -    av_opt_set_sample_fmt(ost->swr_ctx, "out_sample_fmt",     c->sample_fmt,     0);
> -
> -    /* initialize the resampling context */
> -    if ((ret = swr_init(ost->swr_ctx)) < 0) {
> -        fprintf(stderr, "Failed to initialize the resampling context\n");
> -        exit(1);
> +    /* open the output file, if needed */
> +    if (!((*out_fmt_ctx)->oformat->flags & AVFMT_NOFILE)) {
> +        if ((ret = avio_open(&(*out_fmt_ctx)->pb, filename, AVIO_FLAG_WRITE)) < 0) {
> +            log_error("Could not open output file", &ret);
> +            return ret;
> +        }
>      }
> -}
>  
> -/* Prepare a 16 bit dummy audio frame of 'frame_size' samples and
> - * 'nb_channels' channels. */
> -static AVFrame *get_audio_frame(OutputStream *ost)
> -{
> -    AVFrame *frame = ost->tmp_frame;
> -    int j, i, v;
> -    int16_t *q = (int16_t*)frame->data[0];
> -
> -    /* check if we want to generate more frames */
> -    if (av_compare_ts(ost->next_pts, ost->enc->time_base,
> -                      STREAM_DURATION, (AVRational){ 1, 1 }) > 0)
> -        return NULL;
> +    if (audio_enc_ctx) {
> +        if (!(out_audio_str = avformat_new_stream(*out_fmt_ctx, NULL))) {
> +            log_error("Could not create new stream", NULL);
> +            return AVERROR(ENOMEM);
> +        }
> +        out_audio_str->id = (*out_fmt_ctx)->nb_streams - 1;
> +        avcodec_parameters_from_context(out_audio_str->codecpar, audio_enc_ctx);
> +    }
>  
> -    for (j = 0; j <frame->nb_samples; j++) {
> -        v = (int)(sin(ost->t) * 10000);
> -        for (i = 0; i < ost->enc->ch_layout.nb_channels; i++)
> -            *q++ = v;
> -        ost->t     += ost->tincr;
> -        ost->tincr += ost->tincr2;
> +    if (video_enc_ctx) {
> +        if (!(out_video_str = avformat_new_stream(*out_fmt_ctx, NULL))) {
> +            log_error("Could not create new stream", NULL);
> +            return AVERROR(ENOMEM);
> +        }
> +        out_video_str->id = (*out_fmt_ctx)->nb_streams - 1;
> +        avcodec_parameters_from_context(out_video_str->codecpar, video_enc_ctx);
>      }
>  
> -    frame->pts = ost->next_pts;
> -    ost->next_pts  += frame->nb_samples;
> +    av_dump_format(*out_fmt_ctx, 0, filename, 1);
>  
> -    return frame;
> +    /* Write the stream header, if any. */
> +    if (avformat_write_header(*out_fmt_ctx, NULL) < 0) {
> +        log_error("avformat_write_header() error", NULL);
> +        return AVERROR_EXIT;
> +    } else
> +        return 0;This else block is unnecessary.
>  }
>  
> -/*
> - * encode one audio frame and send it to the muxer
> - * return 1 when encoding is finished, 0 otherwise
> - */
> -static int write_audio_frame(AVFormatContext *oc, OutputStream *ost)
> +static void fill_dummy_s16_frame(AVFrame *frame)
>  {
> -    AVCodecContext *c;
> -    AVFrame *frame;
> -    int ret;
> -    int dst_nb_samples;
> -
> -    c = ost->enc;
> -
> -    frame = get_audio_frame(ost);
> -
> -    if (frame) {
> -        /* convert samples from native format to destination codec format, using the resampler */
> -        /* compute destination number of samples */
> -        dst_nb_samples = av_rescale_rnd(swr_get_delay(ost->swr_ctx, c->sample_rate) + frame->nb_samples,
> -                                        c->sample_rate, c->sample_rate, AV_ROUND_UP);
> -        av_assert0(dst_nb_samples == frame->nb_samples);
> -
> -        /* when we pass a frame to the encoder, it may keep a reference to it
> -         * internally;
> -         * make sure we do not overwrite it here
> -         */
> -        ret = av_frame_make_writable(ost->frame);
> -        if (ret < 0)
> -            exit(1);
> -
> -        /* convert to destination format */
> -        ret = swr_convert(ost->swr_ctx,
> -                          ost->frame->data, dst_nb_samples,
> -                          (const uint8_t **)frame->data, frame->nb_samples);
> -        if (ret < 0) {
> -            fprintf(stderr, "Error while converting\n");
> -            exit(1);
> -        }
> -        frame = ost->frame;
> -
> -        frame->pts = av_rescale_q(ost->samples_count, (AVRational){1, c->sample_rate}, c->time_base);
> -        ost->samples_count += dst_nb_samples;
> +    int j, i, v;
> +    static float t, tincr, tincr2;
> +    int16_t *data = (int16_t*)frame->data[0];
> +    static int frame_ctr;
> +
> +    if (!tincr) {
> +        t       = 0;
> +        tincr   = 2 * M_PI * 110.0 / frame->sample_rate;
> +        /* increment frequency by 110 Hz per second */
> +        tincr2  = tincr / frame->sample_rate;
What are you doing here? Why are you doing it?

>      }
> -
> -    return write_frame(oc, c, ost->st, frame, ost->tmp_pkt);
> +    for (j = 0; j <frame->nb_samples; j++) {
> +        v = (int)(sin(t) * 10000);
> +        for (i = 0; i < frame->ch_layout.nb_channels; i++)
> +            *data++ = v;
> +        t     += tincr;
> +        tincr += tincr2;
> +    }
> +    frame->pts = frame->nb_samples*(++frame_ctr);
If you're trying to populate a stream, you should be using the aevalsrc 
filter, which exists for exactly this purpose. Otherwise just populate 
it with zeroes (silence).
>  }
>  
> -/**************************************************************/
> -/* video output */
> -
> -static AVFrame *alloc_picture(enum AVPixelFormat pix_fmt, int width, int height)
> +static void fill_dummy_yuv420p_frame(AVFrame *frame)
>  {
There's a testsrc filter, or just fill a frame with zeroes (black). 
Don't reinvent the wheel in an example, that discourages people from 
using features that exist.

> -    AVFrame *picture;
> -    int ret;
> -
> -    picture = av_frame_alloc();
> -    if (!picture)
> -        return NULL;
> +    int x, y;
> +    static int idx;
>  
> -    picture->format = pix_fmt;
> -    picture->width  = width;
> -    picture->height = height;
> +    /* Y */
> +    for (y = 0; y < frame->width; y++)
> +        for (x = 0; x < frame->width; x++)
> +            frame->data[0][y * frame->linesize[0] + x] = x + y + idx * 3;
>  
> -    /* allocate the buffers for the frame data */
> -    ret = av_frame_get_buffer(picture, 0);
> -    if (ret < 0) {
> -        fprintf(stderr, "Could not allocate frame data.\n");
> -        exit(1);
> +    /* Cb and Cr */
> +    for (y = 0; y < frame->height / 2; y++) {
> +        for (x = 0; x < frame->width / 2; x++) {
> +            frame->data[1][y * frame->linesize[1] + x] = 128 + y + idx * 2;
> +            frame->data[2][y * frame->linesize[2] + x] = 64 + x + idx * 5;
> +        }
>      }
> The
> -    return picture;
> +    frame->pts = idx++;
>  }
>  
> -static void open_video(AVFormatContext *oc, const AVCodec *codec,
> -                       OutputStream *ost, AVDictionary *opt_arg)
> +static int convert_frame(void *convert_ctx, AVFrame *in_frame, AVFrame *out_frame)
>  {
>      int ret;
> -    AVCodecContext *c = ost->enc;
> -    AVDictionary *opt = NULL;
> -
> -    av_dict_copy(&opt, opt_arg, 0);
> +    enum AVMediaType *type = (enum AVMediaType *)(in_frame->opaque);
>  
> -    /* open the codec */
> -    ret = avcodec_open2(c, codec, &opt);
> -    av_dict_free(&opt);
> -    if (ret < 0) {
> -        fprintf(stderr, "Could not open video codec: %s\n", av_err2str(ret));
> -        exit(1);
> -    }
> -
> -    /* allocate and init a re-usable frame */
> -    ost->frame = alloc_picture(c->pix_fmt, c->width, c->height);
> -    if (!ost->frame) {
> -        fprintf(stderr, "Could not allocate video frame\n");
> -        exit(1);
> +    if (av_frame_make_writable(out_frame) < 0) {
  if ((ret = av_frame_make_writable(out_frame)) < 0) {
> +        log_error("av_frame_make_writable() error", NULL);
> +        return AVERROR_EXIT;return ret;
>      }
>  
> -    /* If the output format is not YUV420P, then a temporary YUV420P
> -     * picture is needed too. It is then converted to the required
> -     * output format. */
> -    ost->tmp_frame = NULL;
> -    if (c->pix_fmt != AV_PIX_FMT_YUV420P) {
> -        ost->tmp_frame = alloc_picture(AV_PIX_FMT_YUV420P, c->width, c->height);
> -        if (!ost->tmp_frame) {
> -            fprintf(stderr, "Could not allocate temporary picture\n");
> -            exit(1);
> +    if (*type == AVMEDIA_TYPE_AUDIO) {
> +        if ((ret = swr_convert_frame((struct SwrContext *)convert_ctx, out_frame,
> +                                     (const AVFrame *)in_frame)) != 0) {
> +            log_error("Error converting AVFrame", &ret);
> +            return ret;
>          }
> +    } else {
> +        sws_scale((struct SwsContext *)convert_ctx, (const uint8_t * const *)in_frame->data,
> +                  in_frame->linesize, 0, in_frame->height, out_frame->data,
> +                  out_frame->linesize);
>      }
>  
> -    /* copy the stream parameters to the muxer */
> -    ret = avcodec_parameters_from_context(ost->st->codecpar, c);
> -    if (ret < 0) {
> -        fprintf(stderr, "Could not copy the stream parameters\n");
> -        exit(1);
> -    }
> +    out_frame->pts = in_frame->pts;
> +    return 0;
>  }
>  
> -/* Prepare a dummy image. */
> -static void fill_yuv_image(AVFrame *pict, int frame_index,
> -                           int width, int height)
> +static int encode_frame(AVCodecContext *ctx, AVFrame *in_frame, AVPacket *out_pkt)
>  {
> -    int x, y, i;
> +    static int is_flushing_audio = 0, is_flushing_video = 0;
> +    int ret = 0;
> +    int is_audio = ctx->codec->type == AVMEDIA_TYPE_AUDIO;
>  
> -    i = frame_index;
> -
> -    /* Y */
> -    for (y = 0; y < height; y++)
> -        for (x = 0; x < width; x++)
> -            pict->data[0][y * pict->linesize[0] + x] = x + y + i * 3;
> -
> -    /* Cb and Cr */
> -    for (y = 0; y < height / 2; y++) {
> -        for (x = 0; x < width / 2; x++) {
> -            pict->data[1][y * pict->linesize[1] + x] = 128 + y + i * 2;
> -            pict->data[2][y * pict->linesize[2] + x] = 64 + x + i * 5;
> -        }
> +    if ((is_audio && !is_flushing_audio) || (!is_audio && !is_flushing_video)) {
> +        ret = avcodec_send_frame(ctx, in_frame);
>      }
> -}
> -
> -static AVFrame *get_video_frame(OutputStream *ost)
> -{
> -    AVCodecContext *c = ost->enc;
> -
> -    /* check if we want to generate more frames */
> -    if (av_compare_ts(ost->next_pts, c->time_base,
> -                      STREAM_DURATION, (AVRational){ 1, 1 }) > 0)
> -        return NULL;
> -
> -    /* when we pass a frame to the encoder, it may keep a reference to it
> -     * internally; make sure we do not overwrite it here */
> -    if (av_frame_make_writable(ost->frame) < 0)
> -        exit(1);
> -
> -    if (c->pix_fmt != AV_PIX_FMT_YUV420P) {
> -        /* as we only generate a YUV420P picture, we must convert it
> -         * to the codec pixel format if needed */
> -        if (!ost->sws_ctx) {
> -            ost->sws_ctx = sws_getContext(c->width, c->height,
> -                                          AV_PIX_FMT_YUV420P,
> -                                          c->width, c->height,
> -                                          c->pix_fmt,
> -                                          SCALE_FLAGS, NULL, NULL, NULL);
> -            if (!ost->sws_ctx) {
> -                fprintf(stderr,
> -                        "Could not initialize the conversion context\n");
> -                exit(1);
> -            }
> +    if (ret < 0) {
You need to check for AVERROR(EAGAIN).
> +        av_log(NULL, AV_LOG_ERROR,
> +               "Error sending frame to the encoder (error '%s')\n", av_err2str(ret));
> +        return ret;
> +    } else if (ret == 0) {
> +        ret = avcodec_receive_packet(ctx, out_pkt);
> +        if ((ret < 0) && (ret != AVERROR(EAGAIN)) && (ret != AVERROR_EOF)) {
> +            av_log(NULL, AV_LOG_ERROR,
> +                   "Error receiving encoded packet (error '%s')\n", av_err2str(ret));
> +            return ret;
>          }
> -        fill_yuv_image(ost->tmp_frame, ost->next_pts, c->width, c->height);
> -        sws_scale(ost->sws_ctx, (const uint8_t * const *) ost->tmp_frame->data,
> -                  ost->tmp_frame->linesize, 0, c->height, ost->frame->data,
> -                  ost->frame->linesize);
> -    } else {
> -        fill_yuv_image(ost->frame, ost->next_pts, c->width, c->height);
>      }
>  
> -    ost->frame->pts = ost->next_pts++;
> +    if (is_audio)
> +        is_flushing_audio = (in_frame == NULL);
> +    else
> +        is_flushing_video = (in_frame == NULL);
>  
> -    return ost->frame;
> +    return ret;
>  }
>  
> -/*
> - * encode one video frame and send it to the muxer
> - * return 1 when encoding is finished, 0 otherwise
> - */
> -static int write_video_frame(AVFormatContext *oc, OutputStream *ost)
> +static int frame_exceeds_stream_duration(AVFrame *fr)
>  {
> -    return write_frame(oc, ost->enc, ost->st, get_video_frame(ost), ost->tmp_pkt);
> +    enum AVMediaType *type = (enum AVMediaType *)(fr->opaque);
Why are you reading from the opaque structure of the frame. Are you sure 
this is what you wanted to do?
> +    AVRational tb = (*type == AVMEDIA_TYPE_AUDIO) ? (AVRational){ 1, fr->sample_rate} :
> +                                                    (AVRational){ 1, VIDEO_FRAME_RATE};
> +
> +    return av_compare_ts(fr->pts, tb ,STREAM_DURATION, (AVRational){ 1, 1 }) > 0;
>  }
>  
> -static void close_stream(AVFormatContext *oc, OutputStream *ost)
> +static enum AVMediaType media_type_of_earlier_frame(AVFrame *audio_fr,
> +                                                    AVFrame *video_fr)
>  {
> -    avcodec_free_context(&ost->enc);
> -    av_frame_free(&ost->frame);
> -    av_frame_free(&ost->tmp_frame);
> -    av_packet_free(&ost->tmp_pkt);
> -    sws_freeContext(ost->sws_ctx);
> -    swr_free(&ost->swr_ctx);
> +    if (!audio_fr)
> +        return AVMEDIA_TYPE_VIDEO;
> +    if (!video_fr)
> +        return AVMEDIA_TYPE_AUDIO;
> +
> +    if (av_compare_ts(audio_fr->pts, (AVRational){ 1, audio_fr->sample_rate},
> +                      video_fr->pts, (AVRational){ 1, VIDEO_FRAME_RATE}) < 0)
> +        return AVMEDIA_TYPE_AUDIO;
> +    else
> +        return AVMEDIA_TYPE_VIDEO;
>  }
>  
> -/**************************************************************/
> -/* media file output */
> -
>  int main(int argc, char **argv)
>  {
> -    OutputStream video_st = { 0 }, audio_st = { 0 };
> -    const AVOutputFormat *fmt;
> -    const char *filename;
> -    AVFormatContext *oc;
> -    const AVCodec *audio_codec, *video_codec;
> -    int ret;
> -    int have_video = 0, have_audio = 0;
> -    int encode_video = 0, encode_audio = 0;
> -    AVDictionary *opt = NULL;
> -    int i;
> -
> -    if (argc < 2) {
> +    const char *fname;
> +    AVCodecContext *audio_enc_ctx = NULL, *video_enc_ctx = NULL, *enc_ctx = NULL;
> +
> +    /* NOTE: if you want to modify the audio/video input ".format" parameter,
> +     * you need to modify the corresponding fill_dummy_XXX_frame() function(s) too */
> +    AVCodecParameters audio_in_params = {
> +        .codec_type  = AVMEDIA_TYPE_AUDIO,
> +        .format      = AV_SAMPLE_FMT_S16,
> +        .sample_rate = 44100,
> +        .ch_layout   = (AVChannelLayout)AV_CHANNEL_LAYOUT_STEREO
> +    },
> +    video_in_params = {
> +        .codec_type = AVMEDIA_TYPE_VIDEO,
> +        .width      = 352,
> +        .height     = 288,
> +        .format     = AV_PIX_FMT_YUV420P
> +    },
> +    video_enc_params  = { 0 }, audio_enc_params = { 0 }; > +    struct AVRational enc_timebases[2];
> +    AVFrame *in_audio_frame = NULL, *converted_audio_frame = NULL,
> +            *in_video_frame = NULL, *converted_video_frame = NULL,
> +            *frame_to_encode = NULL;
> +    struct SwrContext *audio_convert_ctx = NULL;
> +    struct SwsContext *video_convert_ctx = NULL;
> +    enum AVMediaType media_type;
> +    AVFormatContext *out_fmt_ctx = NULL;
> +    AVPacket *out_pkt = av_packet_alloc();
> +    int ret = 0, process_audio = 0, process_video = 0;
> +
> +    if (argc != 2) {
>          printf("usage: %s output_file\n"
>                 "API example program to output a media file with libavformat.\n"
> -               "This program generates a synthetic audio and video stream, encodes and\n"
> +               "This program generates a synthetic audio and/or video stream, encodes and\n"
>                 "muxes them into a file named output_file.\n"
>                 "The output format is automatically guessed according to the file extension.\n"
> -               "Raw images can also be output by using '%%d' in the filename.\n"
> +               "BMP or JPEG images can also be output by using '%%d' in the filename.\n"
>                 "\n", argv[0]);
> -        return 1;
> +        return AVERROR_EXIT;
This return value is sent to the operating system with the exit() system 
call so you don't actually want to return an AVERROR value here.

>      }
>  
> -    filename = argv[1];
> -    for (i = 2; i+1 < argc; i+=2) {
> -        if (!strcmp(argv[i], "-flags") || !strcmp(argv[i], "-fflags"))
> -            av_dict_set(&opt, argv[i]+1, argv[i+1], 0);
> +    fname = argv[1];
> +    if (!is_extension_supported(fname)) {
> +        ret = AVERROR_EXIT;
> +        goto end;
>      }
>  
> -    /* allocate the output media context */
> -    avformat_alloc_output_context2(&oc, NULL, NULL, filename);
> -    if (!oc) {
> -        printf("Could not deduce output format from file extension: using MPEG.\n"); > -        avformat_alloc_output_context2(&oc, NULL, "mpeg", filename);
> -    }
> -    if (!oc)
> -        return 1;
> -
> -    fmt = oc->oformat;
> -
> -    /* Add the audio and video streams using the default format codecs
> -     * and initialize the codecs. */
> -    if (fmt->video_codec != AV_CODEC_ID_NONE) {
> -        add_stream(&video_st, oc, &video_codec, fmt->video_codec);
> -        have_video = 1;
> -        encode_video = 1;
> +    /* Desume the default codecs and their default parameters from the filename */
> +    if ((ret = get_default_enc_params(&audio_enc_params, fname, AVMEDIA_TYPE_AUDIO)) < 0)
> +        goto end;
> +    if ((ret = get_default_enc_params(&video_enc_params, fname, AVMEDIA_TYPE_VIDEO)) < 0)
> +        goto end;
> +    process_audio = audio_enc_params.codec_id != AV_CODEC_ID_NONE;
> +    process_video = video_enc_params.codec_id != AV_CODEC_ID_NONE;
> +    if (!process_audio && !process_video) {
> +        log_error("Could not get default encoder(s)", NULL);
> +        ret = AVERROR_EXIT;
> +        goto end;
>      }
> -    if (fmt->audio_codec != AV_CODEC_ID_NONE) {
> -        add_stream(&audio_st, oc, &audio_codec, fmt->audio_codec);
> -        have_audio = 1;
> -        encode_audio = 1;
> -    }
> -
> -    /* Now that all the parameters are set, we can open the audio and
> -     * video codecs and allocate the necessary encode buffers. */
> -    if (have_video)
> -        open_video(oc, video_codec, &video_st, opt);
> -
> -    if (have_audio)
> -        open_audio(oc, audio_codec, &audio_st, opt);
> -
> -    av_dump_format(oc, 0, filename, 1);
>  
> -    /* open the output file, if needed */
> -    if (!(fmt->flags & AVFMT_NOFILE)) {
> -        ret = avio_open(&oc->pb, filename, AVIO_FLAG_WRITE);
> -        if (ret < 0) {
> -            fprintf(stderr, "Could not open '%s': %s\n", filename,
> -                    av_err2str(ret));
> -            return 1;
> -        }
> +    if (process_audio) {
> +        /* Prepare the audio encoder*/
> +        if ((ret = init_encoder(&audio_enc_ctx, &audio_enc_params)) < 0)
> +            goto end;
> +        enc_timebases[0] = audio_enc_ctx->time_base;
> +        audio_in_params.frame_size  = audio_enc_params.frame_size  = audio_enc_ctx->frame_size;
> +
> +        /* Allocate an audio resampler and its input and output AVFrames */
> +        if ((ret = init_audio_convert(&audio_convert_ctx, &audio_in_params,
> +                                      &audio_enc_params)) < 0)
> +            goto end;
> +        if ((ret = init_avframe(&in_audio_frame, &audio_in_params)) < 0)
> +            goto end;
> +        if ((ret = init_avframe(&converted_audio_frame, &audio_enc_params)) < 0)
> +            goto end;
>      }
>  
> -    /* Write the stream header, if any. */
> -    ret = avformat_write_header(oc, &opt);
> -    if (ret < 0) {
> -        fprintf(stderr, "Error occurred when opening output file: %s\n",
> -                av_err2str(ret));
> -        return 1;
> +    if (process_video) {
> +        video_enc_params.width  = video_in_params.width;
> +        video_enc_params.height = video_in_params.height;
> +        if ((ret = init_encoder(&video_enc_ctx, &video_enc_params)) < 0)
> +            goto end;
> +        enc_timebases[1] = video_enc_ctx->time_base;
> +        if ((ret = init_video_convert(&video_convert_ctx,&video_in_params,
> +                                      &video_enc_params)) < 0)
> +            goto end;
> +        if ((ret = init_avframe(&in_video_frame, &video_in_params)) < 0)
> +            goto end;
> +        if ((ret = init_avframe(&converted_video_frame, &video_enc_params)) < 0)
> +            goto end;
>      }
>  
> -    while (encode_video || encode_audio) {
> -        /* select the stream to encode */
> -        if (encode_video &&
> -            (!encode_audio || av_compare_ts(video_st.next_pts, video_st.enc->time_base,
> -                                            audio_st.next_pts, audio_st.enc->time_base) <= 0)) {
> -            encode_video = !write_video_frame(oc, &video_st);
> +    /* Create the output container for the encoded frames */
> +    if ((ret = init_muxer(&out_fmt_ctx, audio_enc_ctx, video_enc_ctx, fname)) < 0)
> +        goto end;
> +    out_fmt_ctx->opaque = &enc_timebases;
> +
> +    while (process_audio || process_video) {
> +
> +        frame_to_encode = NULL;
> +        media_type = media_type_of_earlier_frame(in_audio_frame, in_video_frame);
> +
> +        /* fill and convert the input frames */
> +        if (media_type == AVMEDIA_TYPE_AUDIO) {
> +            enc_ctx = audio_enc_ctx;
> +            fill_dummy_s16_frame(in_audio_frame);
> +            if ((ret = convert_frame(audio_convert_ctx, in_audio_frame,
> +                                     converted_audio_frame)) != 0)
> +                goto end;
> +            if (!frame_exceeds_stream_duration(converted_audio_frame))
> +                frame_to_encode = converted_audio_frame;
>          } else {
> -            encode_audio = !write_audio_frame(oc, &audio_st);
> +            enc_ctx = video_enc_ctx;
> +            fill_dummy_yuv420p_frame(in_video_frame);
> +            if ((ret = convert_frame(video_convert_ctx, in_video_frame,
> +                                     converted_video_frame)) != 0)
> +                goto end;
> +            if (!frame_exceeds_stream_duration(in_video_frame))
> +                frame_to_encode = converted_video_frame;
>          }
> -    }
> -
> -    av_write_trailer(oc);
>  
> -    /* Close each codec. */
> -    if (have_video)
> -        close_stream(oc, &video_st);
> -    if (have_audio)
> -        close_stream(oc, &audio_st);
> +        /* encode the converted frames and mux the encoded packets */
> +        if ((ret = encode_frame(enc_ctx, frame_to_encode, out_pkt)) == 0) {
> +            if ((ret = mux_encoded_pkt(out_pkt, out_fmt_ctx, media_type)) < 0)
> +               goto end;
> +        }
>  
> -    if (!(fmt->flags & AVFMT_NOFILE))
> -        /* Close the output file. */
> -        avio_closep(&oc->pb);
> +        /* check if the encoders have been fully flushed */
> +        process_audio &= !((ret == AVERROR_EOF) && (media_type == AVMEDIA_TYPE_AUDIO));
> +        process_video &= !((ret == AVERROR_EOF) && (media_type == AVMEDIA_TYPE_VIDEO));
>  
> -    /* free the stream */
> -    avformat_free_context(oc);
> +    }
>  
> -    return 0;
> +    av_write_trailer(out_fmt_ctx);
> +    ret = 0;
> +
> +end:
> +
> +    avcodec_free_context(&audio_enc_ctx);
> +    avcodec_free_context(&video_enc_ctx);
> +    av_frame_free(&in_audio_frame);
> +    av_frame_free(&in_video_frame);
> +    av_frame_free(&converted_audio_frame);
> +    av_frame_free(&converted_video_frame);
> +    swr_free(&audio_convert_ctx);
> +    sws_freeContext(video_convert_ctx);
> +    if (out_fmt_ctx)
> +        avio_closep(&out_fmt_ctx->pb);
> +    avformat_free_context(out_fmt_ctx);
> +    av_packet_free(&out_pkt);
> +
> +    return ret;
>  }
> -- 
> 2.32.0

This isn't a thorough review since I'm not familiar enough with the mux 
API to really say whether or not it was used correctly, but this is what 
I noticed on first glance.
Andreas Rheinhardt June 18, 2022, 5:08 p.m. UTC | #2
Paolo Prete:
> Please review this. It's a code rewrite of doc/examples/muxing.c which improves readability and fixes issues.
> More specifically:
> *) Original functions of muxing.c have generic/unclear/ambiguous names and they don't group logically relatedblocks of code: this makes the code hard to read.See for example open_audio/video() and add_stream(), which mix initialization of the encoders' and muxers' stuff.A redundant structure with an ambiguous name ("struct OutputStream"), which is not part of API, is widely used too.The patch uses functions with clearer names and with blocks of code strictly logically related, such as: init_encoder(), init_avframe(), init_muxer(), convert_frame(), encode_frame(), mux_encoded_pkt()...

Using a structure for the user's data is actually intentional, because
users will probably use one, too. And given that it is the user's
structure it is of course not part of the public API.

> *) Errors are not always properly managed: in some cases they are not propagated to the main() function and a call to exit(1) is forced, causing memory leaks. This has been fixed as well.
> *) The program doesn't work for some extensions (try for example: m4v or mjpeg). In addition, it gives weird results when the filename doesn't have an extension.This has been fixed by checking the filename and supporting a fixed set of extensions.
> *) The program claims to output raw images by using '%%d' in the filename, but it actually doesn't. This has been fixed as well.
> *) Input parameters such as width, height, sample_rate and ch_layout are now exposed at the level of the main() function and they are much easier to customize.
> *) fprintf() has been replaced with av_log()
> 

This list alone implies that this patch must be split into small,
self-contained pieces that can be reviewed (whereas the current patch
can't).

There is just one thing that I immediately noticed:

> 
> +    AVCodecParameters audio_in_params = {
> +        .codec_type  = AVMEDIA_TYPE_AUDIO,
> +        .format      = AV_SAMPLE_FMT_S16,
> +        .sample_rate = 44100,
> +        .ch_layout   = (AVChannelLayout)AV_CHANNEL_LAYOUT_STEREO
> +    },
> +    video_in_params = {
> +        .codec_type = AVMEDIA_TYPE_VIDEO,
> +        .width      = 352,
> +        .height     = 288,
> +        .format     = AV_PIX_FMT_YUV420P
> +    },
> +    video_enc_params  = { 0 }, audio_enc_params = { 0 };

sizeof(AVCodecParameters) is not public, you must not put
AVCodecParameters on the stack.

- Andreas
Paolo Prete June 19, 2022, 12:25 a.m. UTC | #3
Il sabato 18 giugno 2022, 17:18:18 CEST, Leo Izen <leo.izen@gmail.com> ha scritto:

>>On 6/18/22 08:06, Paolo Prete wrote:>> +{>> +    if (num)>> +        av_log(NULL, AV_LOG_ERROR, "%s (error '%s')\n", s, av_err2str(*num));>> +    else>> +        av_log(NULL, AV_LOG_ERROR, "%s\n", s);> +}>This does not need to be a pointer. Convention is that negative values>are errors and nonnegative values are not. So you could always use>something like: if (num < 0).
The pointer makes clearer, when I call the function, that I'm not managing a specific error with a number id.
>> -    AVPacket *tmp_pkt;>> +    if (type == AVMEDIA_TYPE_AUDIO)>> +        enc_time_base = ((AVRational *)out_fmt_ctx->opaque)[0];>> +    else>> +        enc_time_base = ((AVRational *)out_fmt_ctx->opaque)[1];>>>Why are you referencing the opaque elements of out_fmt_ctx?
I pass, through the opaque element, user data to the muxer. That data (the timebases of the audio and video encoders) will be used for rescaling ts to themuxer timebases. Otherwise I would have to pass them through the function's params, which would make the function prototype longer and less readable.
>>>> -static int write_frame(AVFormatContext *fmt_ctx, AVCodecContext *c,>> -                      AVStream *st, AVFrame *frame, AVPacket *pkt)>> +static int is_extension_supported(const char *filename)>Why are you artificially limiting what is permitted?
From what I see, not all extensions are supported without modifying the default settings. In fact, as specified in the commit msg, some extensions don't work and the original muxing.c fails with them.

>> +    if (!(c = avcodec_find_encoder(id))) {>> +        avformat_free_context(tmp_fctx);>> +        return ret;>You probably don't want to return "ret" here as you don't assign it.
ret is assigned to 0 (= success) at the definition of the variable. And in this case, the function succeeds: the returned AVCodecParameter is set with AV_CODEC_ID_NONE

>> -    if (oc->oformat->flags & AVFMT_GLOBALHEADER)>> -        c->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;>> +    return ret;> Again, why are you returning ret if you are not assigning to it?
as above, ret is assigned to 0.
>> +    codec = avcodec_find_encoder(params->codec_id);>> +    *enc_ctx = avcodec_alloc_context3(codec);>Don't attempt to allocate anything until after you check if the codec is>found.
Is it really necessary?It has been already checked in the line with "if (process_audio/video) {" and the API doxy says that a NULL parameter will only cause that codec-specific defaults won't be initialized (so  it appears safe to me).
>> +    if (!codec) {>> +        log_error("Could not allocate the encoding context", NULL);>This error message does not match the check, which is if the codec is found.>> +        return AVERROR_EXIT;>return AVERROR_CODEC_NOT_FOUND;
Instead of changing the error msg, I think that what was wrong is the check. I fixed it withif (!(*enc_ctx = avcodec_alloc_context3(codec))) {
>> +        (*enc_ctx)->sample_rate = params->sample_rate;>> +        (*enc_ctx)->time_base  = (AVRational){1, params->sample_rate};>Use av_make_q to avoid casting.
Done, thanks.
> +        return ret;> +    } else> +        return 0;This violates the coding style, you need to use braces {} for the elseblock if you also use it for the if block.
Done, thanks.
> +    if ((ret = av_frame_get_buffer(*frame, 0)) < 0) {> +        log_error("Could not allocate buffer for AVFrame", &ret);> +        return AVERROR(ENOMEM);> +    } else> +        return 0;You don't need the else block here at all.
Done, thanks.
>> +static int init_video_convert(struct SwsContext **ctx, AVCodecParameters *in_params,>> +                              AVCodecParameters *out_params)>> +{>This paper-thin wrapper function is unnecessary, just inline it.
This is intentional: even if it's a paper-thin wrapper, it shortens the code of the main() functionby making it quicker to understand that the audio/video converters are initialized with their respective AVCodecParameters. I would wait for feedback about this from other readers, before changing the code.
> +> +    if (!tincr) {> +        t      = 0;> +        tincr  = 2 * M_PI * 110.0 / frame->sample_rate;> +        /* increment frequency by 110 Hz per second */> +        tincr2  = tincr / frame->sample_rate;What are you doing here? Why are you doing it?>>      }>> ->> -    return write_frame(oc, c, ost->st, frame, ost->tmp_pkt);>> +    for (j = 0; j <frame->nb_samples; j++) {>> +        v = (int)(sin(t) * 10000);>> +        for (i = 0; i < frame->ch_layout.nb_channels; i++)>> +            *data++ = v;>> +        t    += tincr;>> +        tincr += tincr2;>> +    }>> +    frame->pts = frame->nb_samples*(++frame_ctr);>If you're trying to populate a stream, you should be using the aevalsrc>filter, which exists for exactly this purpose. Otherwise just populate>it with zeroes (silence).>>  }>>>> -/**************************************************************/>> -/* video output */>> ->> -static AVFrame *alloc_picture(enum AVPixelFormat pix_fmt, int width, int height)>> +static void fill_dummy_yuv420p_frame(AVFrame *frame)>>  {>There's a testsrc filter, or just fill a frame with zeroes (black).>Don't reinvent the wheel in an example, that discourages people from>using features that exist.
This is all copied from the original muxing.c example. These dummy audio/video frames consist in few lines of code and they are common in doc/example files. See also encode-audio.c, encode-video.c. Adding a filtering context to the current example would consequently require to patch (and maybe rename) the other files as well. And if you patch in that way, for example, "encode-audio.c" the reader won't focus on the encoding task, because the filtering block of code would be somewhat distracting.
>> -            }>> +    if (ret < 0) {>You need to check for AVERROR(EAGAIN).>> +        av_log(NULL, AV_LOG_ERROR,>> +              "Error sending frame to the encoder (error '%s')\n", av_err2str(ret));>> +        return ret;
Is it really necessary to check AVERROR(EAGAIN) when sending the frame to the encoder, in this specific case?The function is written in a way that the encoder's output is always read before sending new frames. Note that in the original muxing.c this is not checked as well
>> -    return write_frame(oc, ost->enc, ost->st, get_video_frame(ost), ost->tmp_pkt);>> +    enum AVMediaType *type = (enum AVMediaType *)(fr->opaque);>Why are you reading from the opaque structure of the frame. Are you sure>this is what you wanted to do?
Yes, it just stores an additional info for the frame (it says if the frame contains video or audio data), which will be used later in "frame_exceeds_stream_duration()" function.
>>                "The output format is automatically guessed according to the file extension.\n">> -              "Raw images can also be output by using '%%d' in the filename.\n">> +              "BMP or JPEG images can also be output by using '%%d' in the filename.\n">>                "\n", argv[0]);>> -        return 1;>> +        return AVERROR_EXIT;>This return value is sent to the operating system with the exit() system>call so you don't actually want to return an AVERROR value here.
I replaced it with return 1.I also allocated AVCodecParameter with the proper alloc() function, as Andreas suggested.A new patch is attached to this mail.
    Il sabato 18 giugno 2022, 17:18:18 CEST, Leo Izen <leo.izen@gmail.com> ha scritto:  
 
 On 6/18/22 08:06, Paolo Prete wrote:
> Please review this. It's a code rewrite of doc/examples/muxing.c which improves readability and fixes issues.
> From 8a4e942a001ae49dc052899f331ed43abf954dda Mon Sep 17 00:00:00 2001
> From: paolo <paolopr976@gmail.com>
> Date: Sat, 18 Jun 2022 13:53:55 +0200
> Subject: [PATCH] doc/examples/muxing: code rewrite with improved readability
>  and fixed issues
This commit message is too long, you can just truncate "and fixed 
issues" since it doesn't say much.

> 
>                Improved readability with functions that have clearer prototypes and that don't mix logically unrelated blocks of code
> 
>                Fixed issues in case of unsupported extensions
> 
>                Fixed memory leaks on errors, which are now properly propagated to the main() function
> 
>                Fixed issue on raw images output
> 
>                fprintf() replaced with av_log()
> 
>                Input A/V parameters exposed in the main() function and easier to customize
> ---
>  doc/examples/muxing.c | 905 +++++++++++++++++++-----------------------
>  1 file changed, 406 insertions(+), 499 deletions(-)
> 
> diff --git a/doc/examples/muxing.c b/doc/examples/muxing.c
> index 3acb778322..04739995d8 100644
> --- a/doc/examples/muxing.c
> +++ b/doc/examples/muxing.c
> @@ -1,5 +1,5 @@
>  /*
> - * Copyright (c) 2003 Fabrice Bellard
> + * Copyright (c) 2022 Paolo Prete (paolopr976 at gmail.com) after Fabrice Bellard
Don't remove the original copyright, just add yourself below it. You 
also don't need to put your email address in the copyright line.
>  *
>  * Permission is hereby granted, free of charge, to any person obtaining a copy
>  * of this software and associated documentation files (the "Software"), to deal
> @@ -24,625 +24,532 @@
>  * @file
>  * libavformat API example.
>  *
> - * Output a media file in any supported libavformat format. The default
> + * Output a media file in a set of supported libavformat formats. The default
>  * codecs are used.
>  * @example muxing.c
>  */

> -#include <stdlib.h>
> -#include <stdio.h>
> -#include <string.h>
> -#include <math.h>
> -
> -#include <libavutil/avassert.h>
> -#include <libavutil/channel_layout.h>
> -#include <libavutil/opt.h>
> -#include <libavutil/mathematics.h>
> -#include <libavutil/timestamp.h>
>  #include <libavcodec/avcodec.h>
>  #include <libavformat/avformat.h>
> -#include <libswscale/swscale.h>
> +#include <libavutil/timestamp.h>Make sure these stay sorted.
>  #include <libswresample/swresample.h>
> +#include <libswscale/swscale.h>

> -#define STREAM_DURATION  10.0
> -#define STREAM_FRAME_RATE 25 /* 25 images/s */
> -#define STREAM_PIX_FMT    AV_PIX_FMT_YUV420P /* default pix_fmt */
> -
> -#define SCALE_FLAGS SWS_BICUBIC
> +#define VIDEO_FRAME_RATE 25 /* 25 images/s */
> +#define VIDEO_SCALE_FLAGS SWS_BICUBIC
> +#define STREAM_DURATION 10.0 /* 10 seconds */

> -// a wrapper around a single output AVStream
> -typedef struct OutputStream {
> -    AVStream *st;
> -    AVCodecContext *enc;
> +static void log_error(const char *s, int *num)
> +{
> +    if (num)
> +        av_log(NULL, AV_LOG_ERROR, "%s (error '%s')\n", s, av_err2str(*num));
> +    else
> +        av_log(NULL, AV_LOG_ERROR, "%s\n", s);
> +}
This does not need to be a pointer. Convention is that negative values 
are errors and nonnegative values are not. So you could always use 
something like: if (num < 0).

> -    /* pts of the next frame that will be generated */
> -    int64_t next_pts;
> -    int samples_count;
> +static int mux_encoded_pkt(AVPacket *out_pkt, AVFormatContext *out_fmt_ctx,
> +                              enum AVMediaType type)
> +{
> +    int ret;
> +    AVRational enc_time_base, str_time_base;

> -    AVFrame *frame;
> -    AVFrame *tmp_frame;
> +    if (out_fmt_ctx->streams[0]->codecpar->codec_type == type)
> +        out_pkt->stream_index = 0;
> +    else if ((out_fmt_ctx->nb_streams > 1) && (type == AVMEDIA_TYPE_VIDEO))
> +        out_pkt->stream_index = 1;
> +    str_time_base = out_fmt_ctx->streams[out_pkt->stream_index]->time_base;

> -    AVPacket *tmp_pkt;
> +    if (type == AVMEDIA_TYPE_AUDIO)
> +        enc_time_base = ((AVRational *)out_fmt_ctx->opaque)[0];
> +    else
> +        enc_time_base = ((AVRational *)out_fmt_ctx->opaque)[1];
> 
Why are you referencing the opaque elements of out_fmt_ctx?

> -    float t, tincr, tincr2;
> +    av_packet_rescale_ts(out_pkt, enc_time_base, str_time_base);

> -    struct SwsContext *sws_ctx;
> -    struct SwrContext *swr_ctx;
> -} OutputStream;
> +    av_log(NULL, AV_LOG_INFO, "stream_index=%d, size=%d, pts_time=%s\n",
> +          out_pkt->stream_index,
> +          out_pkt->size, av_ts2timestr(out_pkt->pts, &str_time_base));

> -static void log_packet(const AVFormatContext *fmt_ctx, const AVPacket *pkt)
> -{
> -    AVRational *time_base = &fmt_ctx->streams[pkt->stream_index]->time_base;
> +    if ((ret = av_interleaved_write_frame(out_fmt_ctx, out_pkt)) < 0)
> +        log_error("Error calling av_interleaved_write_frame()", &ret);

> -    printf("pts:%s pts_time:%s dts:%s dts_time:%s duration:%s duration_time:%s stream_index:%d\n",
> -          av_ts2str(pkt->pts), av_ts2timestr(pkt->pts, time_base),
> -          av_ts2str(pkt->dts), av_ts2timestr(pkt->dts, time_base),
> -          av_ts2str(pkt->duration), av_ts2timestr(pkt->duration, time_base),
> -          pkt->stream_index);
> +    return ret;
>  }

> -static int write_frame(AVFormatContext *fmt_ctx, AVCodecContext *c,
> -                      AVStream *st, AVFrame *frame, AVPacket *pkt)
> +static int is_extension_supported(const char *filename)
Why are you artificially limiting what is permitted?
>  {
> -    int ret;
> +    const char *extensions[] = {".aac", ".avi", ".bmp", ".jpeg", ".mka",
> +                                ".mkv", ".mov", ".mp4", ".flv",  ".ts"};
> +    int i, size = sizeof(extensions) / sizeof(extensions[0]);
> +    char *dot = strrchr(filename, '.');

> -    // send the frame to the encoder
> -    ret = avcodec_send_frame(c, frame);
> -    if (ret < 0) {
> -        fprintf(stderr, "Error sending a frame to the encoder: %s\n",
> -                av_err2str(ret));
> -        exit(1);
> +    for (i = 0; i < size; i++) {
> +        if (dot && !strcmp(dot, extensions[i]))
> +            return 1;
>      }

> -    while (ret >= 0) {
> -        ret = avcodec_receive_packet(c, pkt);
> -        if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
> -            break;
> -        else if (ret < 0) {
> -            fprintf(stderr, "Error encoding a frame: %s\n", av_err2str(ret));
> -            exit(1);
> -        }
> +    log_error("File extension not supported", NULL);
> +    av_log(NULL, AV_LOG_WARNING, "Please choose one of the following extensions: ");
> +    for (i = 0; i < size - 1; i++)
> +        av_log(NULL, AV_LOG_WARNING, "%s, ", extensions[i]);
> +    av_log(NULL, AV_LOG_WARNING, "%s\n", extensions[size-1]);

> -        /* rescale output packet timestamp values from codec to stream timebase */
> -        av_packet_rescale_ts(pkt, c->time_base, st->time_base);
> -        pkt->stream_index = st->index;
> -
> -        /* Write the compressed frame to the media file. */
> -        log_packet(fmt_ctx, pkt);
> -        ret = av_interleaved_write_frame(fmt_ctx, pkt);
> -        /* pkt is now blank (av_interleaved_write_frame() takes ownership of
> -        * its contents and resets pkt), so that no unreferencing is necessary.
> -        * This would be different if one used av_write_frame(). */
> -        if (ret < 0) {
> -            fprintf(stderr, "Error while writing output packet: %s\n", av_err2str(ret));
> -            exit(1);
> -        }
> -    }
> -
> -    return ret == AVERROR_EOF ? 1 : 0;
> +    return 0;
>  }

> -/* Add an output stream. */
> -static void add_stream(OutputStream *ost, AVFormatContext *oc,
> -                      const AVCodec **codec,
> -                      enum AVCodecID codec_id)
> +static int get_default_enc_params(AVCodecParameters *params,
> +                                  const char *fname, enum AVMediaType type)
>  {
> -    AVCodecContext *c;
> -    int i;
> -
> -    /* find the encoder */
> -    *codec = avcodec_find_encoder(codec_id);
> -    if (!(*codec)) {
> -        fprintf(stderr, "Could not find encoder for '%s'\n",
> -                avcodec_get_name(codec_id));
> -        exit(1);
> +    AVFormatContext *tmp_fctx;
> +    enum AVCodecID id;
> +    const AVCodec *c;
> +    int ret = 0;
> +
> +    if ((ret = avformat_alloc_output_context2(&tmp_fctx, NULL, NULL, fname)) < 0) {
> +        log_error("Could not get default encoder", &ret);
> +        return AVERROR_EXIT;
>      }

> -    ost->tmp_pkt = av_packet_alloc();
> -    if (!ost->tmp_pkt) {
> -        fprintf(stderr, "Could not allocate AVPacket\n");
> -        exit(1);
> -    }
> +    id = (type == AVMEDIA_TYPE_AUDIO) ? tmp_fctx->oformat->audio_codec :
> +                                        tmp_fctx->oformat->video_codec;

> -    ost->st = avformat_new_stream(oc, NULL);
> -    if (!ost->st) {
> -        fprintf(stderr, "Could not allocate stream\n");
> -        exit(1);
> -    }
> -    ost->st->id = oc->nb_streams-1;
> -    c = avcodec_alloc_context3(*codec);
> -    if (!c) {
> -        fprintf(stderr, "Could not alloc an encoding context\n");
> -        exit(1);
> +    if (!(c = avcodec_find_encoder(id))) {
> +        avformat_free_context(tmp_fctx);
> +        return ret;
You probably don't want to return "ret" here as you don't assign it.

>      }
> -    ost->enc = c;
> -
> -    switch ((*codec)->type) {
> -    case AVMEDIA_TYPE_AUDIO:
> -        c->sample_fmt  = (*codec)->sample_fmts ?
> -            (*codec)->sample_fmts[0] : AV_SAMPLE_FMT_FLTP;
> -        c->bit_rate    = 64000;
> -        c->sample_rate = 44100;
> -        if ((*codec)->supported_samplerates) {
> -            c->sample_rate = (*codec)->supported_samplerates[0];
> -            for (i = 0; (*codec)->supported_samplerates[i]; i++) {
> -                if ((*codec)->supported_samplerates[i] == 44100)
> -                    c->sample_rate = 44100;
> -            }
> -        }
> -        av_channel_layout_copy(&c->ch_layout, &(AVChannelLayout)AV_CHANNEL_LAYOUT_STEREO);
> -        ost->st->time_base = (AVRational){ 1, c->sample_rate };
> -        break;
> -
> -    case AVMEDIA_TYPE_VIDEO:
> -        c->codec_id = codec_id;
> -
> -        c->bit_rate = 400000;
> -        /* Resolution must be a multiple of two. */
> -        c->width    = 352;
> -        c->height  = 288;
> -        /* timebase: This is the fundamental unit of time (in seconds) in terms
> -        * of which frame timestamps are represented. For fixed-fps content,
> -        * timebase should be 1/framerate and timestamp increments should be
> -        * identical to 1. */
> -        ost->st->time_base = (AVRational){ 1, STREAM_FRAME_RATE };
> -        c->time_base      = ost->st->time_base;
> -
> -        c->gop_size      = 12; /* emit one intra frame every twelve frames at most */
> -        c->pix_fmt      = STREAM_PIX_FMT;
> -        if (c->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
> -            /* just for testing, we also add B-frames */
> -            c->max_b_frames = 2;
> -        }
> -        if (c->codec_id == AV_CODEC_ID_MPEG1VIDEO) {
> -            /* Needed to avoid using macroblocks in which some coeffs overflow.
> -            * This does not happen with normal video, it just happens here as
> -            * the motion of the chroma plane does not match the luma plane. */
> -            c->mb_decision = 2;
> -        }
> -        break;

> -    default:
> -        break;
> +    params->codec_type = c->type;
> +    params->codec_id  = c-> id;
> +    if (c->type == AVMEDIA_TYPE_AUDIO) {
> +        params->format      = c->sample_fmts ?
> +                              c->sample_fmts[0] : AV_SAMPLE_FMT_FLTP;
> +        params->ch_layout  = (AVChannelLayout)AV_CHANNEL_LAYOUT_STEREO;
> +        params->sample_rate = c->supported_samplerates ?
> +                              c->supported_samplerates[0] : 44100;
> +    } else if (c->type == AVMEDIA_TYPE_VIDEO) {
> +        params->format = c->pix_fmts ? c->pix_fmts[0] : AV_PIX_FMT_YUV420P;
>      }
> +    avformat_free_context(tmp_fctx);

> -    /* Some formats want stream headers to be separate. */
> -    if (oc->oformat->flags & AVFMT_GLOBALHEADER)
> -        c->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
> +    return ret;
Again, why are you returning ret if you are not assigning to it?

>  }

> -/**************************************************************/
> -/* audio output */
> -
> -static AVFrame *alloc_audio_frame(enum AVSampleFormat sample_fmt,
> -                                  const AVChannelLayout *channel_layout,
> -                                  int sample_rate, int nb_samples)
> +static int init_encoder(AVCodecContext **enc_ctx, AVCodecParameters *params)
>  {
> -    AVFrame *frame = av_frame_alloc();
> +    const AVCodec *codec = NULL;
>      int ret;

> -    if (!frame) {
> -        fprintf(stderr, "Error allocating an audio frame\n");
> -        exit(1);
> +    codec = avcodec_find_encoder(params->codec_id);
> +    *enc_ctx = avcodec_alloc_context3(codec);
Don't attempt to allocate anything until after you check if the codec is 
found.
> +    if (!codec) {
> +        log_error("Could not allocate the encoding context", NULL);
This error message does not match the check, which is if the codec is found.
> +        return AVERROR_EXIT;
return AVERROR_CODEC_NOT_FOUND;
>      }

> -    frame->format = sample_fmt;
> -    av_channel_layout_copy(&frame->ch_layout, channel_layout);
> -    frame->sample_rate = sample_rate;
> -    frame->nb_samples = nb_samples;
> -
> -    if (nb_samples) {
> -        ret = av_frame_get_buffer(frame, 0);
> -        if (ret < 0) {
> -            fprintf(stderr, "Error allocating an audio buffer\n");
> -            exit(1);
> -        }
> +    (*enc_ctx)->codec_id  = params->codec_id;
> +    (*enc_ctx)->codec_type = params->codec_type;
> +    if (params->codec_type == AVMEDIA_TYPE_AUDIO) {
> +        (*enc_ctx)->sample_fmt  = params->format;
> +        (*enc_ctx)->sample_rate = params->sample_rate;
> +        (*enc_ctx)->time_base  = (AVRational){1, params->sample_rate};
Use av_make_q to avoid casting.
> +        (*enc_ctx)->ch_layout  = params->ch_layout;
> +    } else if (params->codec_type == AVMEDIA_TYPE_VIDEO) {
> +        (*enc_ctx)->width      = params->width;
> +        (*enc_ctx)->height    = params->height;
> +        (*enc_ctx)->time_base  = (AVRational){ 1, VIDEO_FRAME_RATE };Better off just definine VIDEO_FRAME_RATE_Q to be an AVRational with 
value {1, 25};
> +        (*enc_ctx)->gop_size  = 12;
> +        (*enc_ctx)->pix_fmt    = params->format;
>      }

> -    return frame;
> +    if ((ret = avcodec_open2(*enc_ctx, codec, NULL)) < 0) {
> +        log_error("Could not open input codec", &ret);
> +        return ret;
> +    } else
> +        return 0;This violates the coding style, you need to use braces {} for the else 
block if you also use it for the if block.
>  }

> -static void open_audio(AVFormatContext *oc, const AVCodec *codec,
> -                      OutputStream *ost, AVDictionary *opt_arg)
> +static int init_avframe(AVFrame **frame, AVCodecParameters *params)
>  {
> -    AVCodecContext *c;
> -    int nb_samples;
>      int ret;
> -    AVDictionary *opt = NULL;

> -    c = ost->enc;
> +    if (!(*frame = av_frame_alloc())) {
> +        log_error("Could not allocate AVFrame", NULL);
> +        return AVERROR(ENOMEM);
> +    }

> -    /* open it */
> -    av_dict_copy(&opt, opt_arg, 0);
> -    ret = avcodec_open2(c, codec, &opt);
> -    av_dict_free(&opt);
> -    if (ret < 0) {
> -        fprintf(stderr, "Could not open audio codec: %s\n", av_err2str(ret));
> -        exit(1);
> +    (*frame)->opaque = &params->codec_type;
> +    if (params->codec_type == AVMEDIA_TYPE_AUDIO) {
> +        (*frame)->nb_samples  = params->frame_size;
> +        (*frame)->sample_rate = params->sample_rate;
> +        (*frame)->format      = params->format;
> +        (*frame)->ch_layout  = params->ch_layout;
> +    } else {
> +        (*frame)->width  = params->width;
> +        (*frame)->height = params->height;
> +        (*frame)->format = params->format;
>      }

> -    /* init signal generator */
> -    ost->t    = 0;
> -    ost->tincr = 2 * M_PI * 110.0 / c->sample_rate;
> -    /* increment frequency by 110 Hz per second */
> -    ost->tincr2 = 2 * M_PI * 110.0 / c->sample_rate / c->sample_rate;
> +    /* Allocate the frame's data buffer */
> +    if ((ret = av_frame_get_buffer(*frame, 0)) < 0) {
> +        log_error("Could not allocate buffer for AVFrame", &ret);
> +        return AVERROR(ENOMEM);
> +    } else
> +        return 0;
You don't need the else block here at all.
> +}

> -    if (c->codec->capabilities & AV_CODEC_CAP_VARIABLE_FRAME_SIZE)
> -        nb_samples = 10000;
> -    else
> -        nb_samples = c->frame_size;
> +static int init_audio_convert(struct SwrContext **ctx, AVCodecParameters *in_params,
> +                              AVCodecParameters *out_params)
> +{
> +    swr_alloc_set_opts2(ctx,
> +                        &(out_params->ch_layout),
> +                        out_params->format, out_params->sample_rate,
> +                        &(in_params->ch_layout),
> +                        in_params->format, in_params->sample_rate,
> +                        0, NULL);
> +    if (!*ctx) {
> +        log_error("Could not allocate resample context", NULL);
> +        return AVERROR(ENOMEM);
> +    } else
> +        return 0;
> +}
You don't need the else block here at all.

> -    ost->frame    = alloc_audio_frame(c->sample_fmt, &c->ch_layout,
> -                                      c->sample_rate, nb_samples);
> -    ost->tmp_frame = alloc_audio_frame(AV_SAMPLE_FMT_S16, &c->ch_layout,
> -                                      c->sample_rate, nb_samples);
> +static int init_video_convert(struct SwsContext **ctx, AVCodecParameters *in_params,
> +                              AVCodecParameters *out_params)
> +{
This paper-thin wrapper function is unnecessary, just inline it.
> +    *ctx = sws_getContext(in_params->width, in_params->height,
> +                          in_params->format,
> +                          out_params->width, out_params->height,
> +                          out_params->codec_id == out_params->format,
> +                          VIDEO_SCALE_FLAGS, NULL, NULL, NULL);
> +    if (!*ctx) {
> +        log_error("Could not allocate scale context", NULL);
> +        return AVERROR(ENOMEM);
> +    } else
> +        return 0;
> +}

> -    /* copy the stream parameters to the muxer */
> -    ret = avcodec_parameters_from_context(ost->st->codecpar, c);
> -    if (ret < 0) {
> -        fprintf(stderr, "Could not copy the stream parameters\n");
> -        exit(1);
> -    }
> +static int init_muxer(AVFormatContext **out_fmt_ctx, AVCodecContext *audio_enc_ctx,
> +                      AVCodecContext *video_enc_ctx, const char *filename)
> +{
> +    int ret;
> +    AVStream *out_audio_str, *out_video_str;

> -    /* create resampler context */
> -    ost->swr_ctx = swr_alloc();
> -    if (!ost->swr_ctx) {
> -        fprintf(stderr, "Could not allocate resampler context\n");
> -        exit(1);
> +    if ((ret = avformat_alloc_output_context2(out_fmt_ctx, NULL, NULL, filename)) < 0) {
> +        log_error("Could not create output context", &ret);
> +        return ret;
>      }

> -    /* set options */
> -    av_opt_set_chlayout  (ost->swr_ctx, "in_chlayout",      &c->ch_layout,      0);
> -    av_opt_set_int      (ost->swr_ctx, "in_sample_rate",    c->sample_rate,    0);
> -    av_opt_set_sample_fmt(ost->swr_ctx, "in_sample_fmt",      AV_SAMPLE_FMT_S16, 0);
> -    av_opt_set_chlayout  (ost->swr_ctx, "out_chlayout",      &c->ch_layout,      0);
> -    av_opt_set_int      (ost->swr_ctx, "out_sample_rate",    c->sample_rate,    0);
> -    av_opt_set_sample_fmt(ost->swr_ctx, "out_sample_fmt",    c->sample_fmt,    0);
> -
> -    /* initialize the resampling context */
> -    if ((ret = swr_init(ost->swr_ctx)) < 0) {
> -        fprintf(stderr, "Failed to initialize the resampling context\n");
> -        exit(1);
> +    /* open the output file, if needed */
> +    if (!((*out_fmt_ctx)->oformat->flags & AVFMT_NOFILE)) {
> +        if ((ret = avio_open(&(*out_fmt_ctx)->pb, filename, AVIO_FLAG_WRITE)) < 0) {
> +            log_error("Could not open output file", &ret);
> +            return ret;
> +        }
>      }
> -}

> -/* Prepare a 16 bit dummy audio frame of 'frame_size' samples and
> - * 'nb_channels' channels. */
> -static AVFrame *get_audio_frame(OutputStream *ost)
> -{
> -    AVFrame *frame = ost->tmp_frame;
> -    int j, i, v;
> -    int16_t *q = (int16_t*)frame->data[0];
> -
> -    /* check if we want to generate more frames */
> -    if (av_compare_ts(ost->next_pts, ost->enc->time_base,
> -                      STREAM_DURATION, (AVRational){ 1, 1 }) > 0)
> -        return NULL;
> +    if (audio_enc_ctx) {
> +        if (!(out_audio_str = avformat_new_stream(*out_fmt_ctx, NULL))) {
> +            log_error("Could not create new stream", NULL);
> +            return AVERROR(ENOMEM);
> +        }
> +        out_audio_str->id = (*out_fmt_ctx)->nb_streams - 1;
> +        avcodec_parameters_from_context(out_audio_str->codecpar, audio_enc_ctx);
> +    }

> -    for (j = 0; j <frame->nb_samples; j++) {
> -        v = (int)(sin(ost->t) * 10000);
> -        for (i = 0; i < ost->enc->ch_layout.nb_channels; i++)
> -            *q++ = v;
> -        ost->t    += ost->tincr;
> -        ost->tincr += ost->tincr2;
> +    if (video_enc_ctx) {
> +        if (!(out_video_str = avformat_new_stream(*out_fmt_ctx, NULL))) {
> +            log_error("Could not create new stream", NULL);
> +            return AVERROR(ENOMEM);
> +        }
> +        out_video_str->id = (*out_fmt_ctx)->nb_streams - 1;
> +        avcodec_parameters_from_context(out_video_str->codecpar, video_enc_ctx);
>      }

> -    frame->pts = ost->next_pts;
> -    ost->next_pts  += frame->nb_samples;
> +    av_dump_format(*out_fmt_ctx, 0, filename, 1);

> -    return frame;
> +    /* Write the stream header, if any. */
> +    if (avformat_write_header(*out_fmt_ctx, NULL) < 0) {
> +        log_error("avformat_write_header() error", NULL);
> +        return AVERROR_EXIT;
> +    } else
> +        return 0;This else block is unnecessary.
>  }

> -/*
> - * encode one audio frame and send it to the muxer
> - * return 1 when encoding is finished, 0 otherwise
> - */
> -static int write_audio_frame(AVFormatContext *oc, OutputStream *ost)
> +static void fill_dummy_s16_frame(AVFrame *frame)
>  {
> -    AVCodecContext *c;
> -    AVFrame *frame;
> -    int ret;
> -    int dst_nb_samples;
> -
> -    c = ost->enc;
> -
> -    frame = get_audio_frame(ost);
> -
> -    if (frame) {
> -        /* convert samples from native format to destination codec format, using the resampler */
> -        /* compute destination number of samples */
> -        dst_nb_samples = av_rescale_rnd(swr_get_delay(ost->swr_ctx, c->sample_rate) + frame->nb_samples,
> -                                        c->sample_rate, c->sample_rate, AV_ROUND_UP);
> -        av_assert0(dst_nb_samples == frame->nb_samples);
> -
> -        /* when we pass a frame to the encoder, it may keep a reference to it
> -        * internally;
> -        * make sure we do not overwrite it here
> -        */
> -        ret = av_frame_make_writable(ost->frame);
> -        if (ret < 0)
> -            exit(1);
> -
> -        /* convert to destination format */
> -        ret = swr_convert(ost->swr_ctx,
> -                          ost->frame->data, dst_nb_samples,
> -                          (const uint8_t **)frame->data, frame->nb_samples);
> -        if (ret < 0) {
> -            fprintf(stderr, "Error while converting\n");
> -            exit(1);
> -        }
> -        frame = ost->frame;
> -
> -        frame->pts = av_rescale_q(ost->samples_count, (AVRational){1, c->sample_rate}, c->time_base);
> -        ost->samples_count += dst_nb_samples;
> +    int j, i, v;
> +    static float t, tincr, tincr2;
> +    int16_t *data = (int16_t*)frame->data[0];
> +    static int frame_ctr;
> +
> +    if (!tincr) {
> +        t      = 0;
> +        tincr  = 2 * M_PI * 110.0 / frame->sample_rate;
> +        /* increment frequency by 110 Hz per second */
> +        tincr2  = tincr / frame->sample_rate;
What are you doing here? Why are you doing it?

>      }
> -
> -    return write_frame(oc, c, ost->st, frame, ost->tmp_pkt);
> +    for (j = 0; j <frame->nb_samples; j++) {
> +        v = (int)(sin(t) * 10000);
> +        for (i = 0; i < frame->ch_layout.nb_channels; i++)
> +            *data++ = v;
> +        t    += tincr;
> +        tincr += tincr2;
> +    }
> +    frame->pts = frame->nb_samples*(++frame_ctr);
If you're trying to populate a stream, you should be using the aevalsrc 
filter, which exists for exactly this purpose. Otherwise just populate 
it with zeroes (silence).
>  }

> -/**************************************************************/
> -/* video output */
> -
> -static AVFrame *alloc_picture(enum AVPixelFormat pix_fmt, int width, int height)
> +static void fill_dummy_yuv420p_frame(AVFrame *frame)
>  {
There's a testsrc filter, or just fill a frame with zeroes (black). 
Don't reinvent the wheel in an example, that discourages people from 
using features that exist.

> -    AVFrame *picture;
> -    int ret;
> -
> -    picture = av_frame_alloc();
> -    if (!picture)
> -        return NULL;
> +    int x, y;
> +    static int idx;

> -    picture->format = pix_fmt;
> -    picture->width  = width;
> -    picture->height = height;
> +    /* Y */
> +    for (y = 0; y < frame->width; y++)
> +        for (x = 0; x < frame->width; x++)
> +            frame->data[0][y * frame->linesize[0] + x] = x + y + idx * 3;

> -    /* allocate the buffers for the frame data */
> -    ret = av_frame_get_buffer(picture, 0);
> -    if (ret < 0) {
> -        fprintf(stderr, "Could not allocate frame data.\n");
> -        exit(1);
> +    /* Cb and Cr */
> +    for (y = 0; y < frame->height / 2; y++) {
> +        for (x = 0; x < frame->width / 2; x++) {
> +            frame->data[1][y * frame->linesize[1] + x] = 128 + y + idx * 2;
> +            frame->data[2][y * frame->linesize[2] + x] = 64 + x + idx * 5;
> +        }
>      }
> The
> -    return picture;
> +    frame->pts = idx++;
>  }

> -static void open_video(AVFormatContext *oc, const AVCodec *codec,
> -                      OutputStream *ost, AVDictionary *opt_arg)
> +static int convert_frame(void *convert_ctx, AVFrame *in_frame, AVFrame *out_frame)
>  {
>      int ret;
> -    AVCodecContext *c = ost->enc;
> -    AVDictionary *opt = NULL;
> -
> -    av_dict_copy(&opt, opt_arg, 0);
> +    enum AVMediaType *type = (enum AVMediaType *)(in_frame->opaque);

> -    /* open the codec */
> -    ret = avcodec_open2(c, codec, &opt);
> -    av_dict_free(&opt);
> -    if (ret < 0) {
> -        fprintf(stderr, "Could not open video codec: %s\n", av_err2str(ret));
> -        exit(1);
> -    }
> -
> -    /* allocate and init a re-usable frame */
> -    ost->frame = alloc_picture(c->pix_fmt, c->width, c->height);
> -    if (!ost->frame) {
> -        fprintf(stderr, "Could not allocate video frame\n");
> -        exit(1);
> +    if (av_frame_make_writable(out_frame) < 0) {
  if ((ret = av_frame_make_writable(out_frame)) < 0) {
> +        log_error("av_frame_make_writable() error", NULL);
> +        return AVERROR_EXIT;return ret;
>      }

> -    /* If the output format is not YUV420P, then a temporary YUV420P
> -    * picture is needed too. It is then converted to the required
> -    * output format. */
> -    ost->tmp_frame = NULL;
> -    if (c->pix_fmt != AV_PIX_FMT_YUV420P) {
> -        ost->tmp_frame = alloc_picture(AV_PIX_FMT_YUV420P, c->width, c->height);
> -        if (!ost->tmp_frame) {
> -            fprintf(stderr, "Could not allocate temporary picture\n");
> -            exit(1);
> +    if (*type == AVMEDIA_TYPE_AUDIO) {
> +        if ((ret = swr_convert_frame((struct SwrContext *)convert_ctx, out_frame,
> +                                    (const AVFrame *)in_frame)) != 0) {
> +            log_error("Error converting AVFrame", &ret);
> +            return ret;
>          }
> +    } else {
> +        sws_scale((struct SwsContext *)convert_ctx, (const uint8_t * const *)in_frame->data,
> +                  in_frame->linesize, 0, in_frame->height, out_frame->data,
> +                  out_frame->linesize);
>      }

> -    /* copy the stream parameters to the muxer */
> -    ret = avcodec_parameters_from_context(ost->st->codecpar, c);
> -    if (ret < 0) {
> -        fprintf(stderr, "Could not copy the stream parameters\n");
> -        exit(1);
> -    }
> +    out_frame->pts = in_frame->pts;
> +    return 0;
>  }

> -/* Prepare a dummy image. */
> -static void fill_yuv_image(AVFrame *pict, int frame_index,
> -                          int width, int height)
> +static int encode_frame(AVCodecContext *ctx, AVFrame *in_frame, AVPacket *out_pkt)
>  {
> -    int x, y, i;
> +    static int is_flushing_audio = 0, is_flushing_video = 0;
> +    int ret = 0;
> +    int is_audio = ctx->codec->type == AVMEDIA_TYPE_AUDIO;

> -    i = frame_index;
> -
> -    /* Y */
> -    for (y = 0; y < height; y++)
> -        for (x = 0; x < width; x++)
> -            pict->data[0][y * pict->linesize[0] + x] = x + y + i * 3;
> -
> -    /* Cb and Cr */
> -    for (y = 0; y < height / 2; y++) {
> -        for (x = 0; x < width / 2; x++) {
> -            pict->data[1][y * pict->linesize[1] + x] = 128 + y + i * 2;
> -            pict->data[2][y * pict->linesize[2] + x] = 64 + x + i * 5;
> -        }
> +    if ((is_audio && !is_flushing_audio) || (!is_audio && !is_flushing_video)) {
> +        ret = avcodec_send_frame(ctx, in_frame);
>      }
> -}
> -
> -static AVFrame *get_video_frame(OutputStream *ost)
> -{
> -    AVCodecContext *c = ost->enc;
> -
> -    /* check if we want to generate more frames */
> -    if (av_compare_ts(ost->next_pts, c->time_base,
> -                      STREAM_DURATION, (AVRational){ 1, 1 }) > 0)
> -        return NULL;
> -
> -    /* when we pass a frame to the encoder, it may keep a reference to it
> -    * internally; make sure we do not overwrite it here */
> -    if (av_frame_make_writable(ost->frame) < 0)
> -        exit(1);
> -
> -    if (c->pix_fmt != AV_PIX_FMT_YUV420P) {
> -        /* as we only generate a YUV420P picture, we must convert it
> -        * to the codec pixel format if needed */
> -        if (!ost->sws_ctx) {
> -            ost->sws_ctx = sws_getContext(c->width, c->height,
> -                                          AV_PIX_FMT_YUV420P,
> -                                          c->width, c->height,
> -                                          c->pix_fmt,
> -                                          SCALE_FLAGS, NULL, NULL, NULL);
> -            if (!ost->sws_ctx) {
> -                fprintf(stderr,
> -                        "Could not initialize the conversion context\n");
> -                exit(1);
> -            }
> +    if (ret < 0) {
You need to check for AVERROR(EAGAIN).
> +        av_log(NULL, AV_LOG_ERROR,
> +              "Error sending frame to the encoder (error '%s')\n", av_err2str(ret));
> +        return ret;
> +    } else if (ret == 0) {
> +        ret = avcodec_receive_packet(ctx, out_pkt);
> +        if ((ret < 0) && (ret != AVERROR(EAGAIN)) && (ret != AVERROR_EOF)) {
> +            av_log(NULL, AV_LOG_ERROR,
> +                  "Error receiving encoded packet (error '%s')\n", av_err2str(ret));
> +            return ret;
>          }
> -        fill_yuv_image(ost->tmp_frame, ost->next_pts, c->width, c->height);
> -        sws_scale(ost->sws_ctx, (const uint8_t * const *) ost->tmp_frame->data,
> -                  ost->tmp_frame->linesize, 0, c->height, ost->frame->data,
> -                  ost->frame->linesize);
> -    } else {
> -        fill_yuv_image(ost->frame, ost->next_pts, c->width, c->height);
>      }

> -    ost->frame->pts = ost->next_pts++;
> +    if (is_audio)
> +        is_flushing_audio = (in_frame == NULL);
> +    else
> +        is_flushing_video = (in_frame == NULL);

> -    return ost->frame;
> +    return ret;
>  }

> -/*
> - * encode one video frame and send it to the muxer
> - * return 1 when encoding is finished, 0 otherwise
> - */
> -static int write_video_frame(AVFormatContext *oc, OutputStream *ost)
> +static int frame_exceeds_stream_duration(AVFrame *fr)
>  {
> -    return write_frame(oc, ost->enc, ost->st, get_video_frame(ost), ost->tmp_pkt);
> +    enum AVMediaType *type = (enum AVMediaType *)(fr->opaque);
Why are you reading from the opaque structure of the frame. Are you sure 
this is what you wanted to do?
> +    AVRational tb = (*type == AVMEDIA_TYPE_AUDIO) ? (AVRational){ 1, fr->sample_rate} :
> +                                                    (AVRational){ 1, VIDEO_FRAME_RATE};
> +
> +    return av_compare_ts(fr->pts, tb ,STREAM_DURATION, (AVRational){ 1, 1 }) > 0;
>  }

> -static void close_stream(AVFormatContext *oc, OutputStream *ost)
> +static enum AVMediaType media_type_of_earlier_frame(AVFrame *audio_fr,
> +                                                    AVFrame *video_fr)
>  {
> -    avcodec_free_context(&ost->enc);
> -    av_frame_free(&ost->frame);
> -    av_frame_free(&ost->tmp_frame);
> -    av_packet_free(&ost->tmp_pkt);
> -    sws_freeContext(ost->sws_ctx);
> -    swr_free(&ost->swr_ctx);
> +    if (!audio_fr)
> +        return AVMEDIA_TYPE_VIDEO;
> +    if (!video_fr)
> +        return AVMEDIA_TYPE_AUDIO;
> +
> +    if (av_compare_ts(audio_fr->pts, (AVRational){ 1, audio_fr->sample_rate},
> +                      video_fr->pts, (AVRational){ 1, VIDEO_FRAME_RATE}) < 0)
> +        return AVMEDIA_TYPE_AUDIO;
> +    else
> +        return AVMEDIA_TYPE_VIDEO;
>  }

> -/**************************************************************/
> -/* media file output */
> -
>  int main(int argc, char **argv)
>  {
> -    OutputStream video_st = { 0 }, audio_st = { 0 };
> -    const AVOutputFormat *fmt;
> -    const char *filename;
> -    AVFormatContext *oc;
> -    const AVCodec *audio_codec, *video_codec;
> -    int ret;
> -    int have_video = 0, have_audio = 0;
> -    int encode_video = 0, encode_audio = 0;
> -    AVDictionary *opt = NULL;
> -    int i;
> -
> -    if (argc < 2) {
> +    const char *fname;
> +    AVCodecContext *audio_enc_ctx = NULL, *video_enc_ctx = NULL, *enc_ctx = NULL;
> +
> +    /* NOTE: if you want to modify the audio/video input ".format" parameter,
> +    * you need to modify the corresponding fill_dummy_XXX_frame() function(s) too */
> +    AVCodecParameters audio_in_params = {
> +        .codec_type  = AVMEDIA_TYPE_AUDIO,
> +        .format      = AV_SAMPLE_FMT_S16,
> +        .sample_rate = 44100,
> +        .ch_layout  = (AVChannelLayout)AV_CHANNEL_LAYOUT_STEREO
> +    },
> +    video_in_params = {
> +        .codec_type = AVMEDIA_TYPE_VIDEO,
> +        .width      = 352,
> +        .height    = 288,
> +        .format    = AV_PIX_FMT_YUV420P
> +    },
> +    video_enc_params  = { 0 }, audio_enc_params = { 0 }; > +    struct AVRational enc_timebases[2];
> +    AVFrame *in_audio_frame = NULL, *converted_audio_frame = NULL,
> +            *in_video_frame = NULL, *converted_video_frame = NULL,
> +            *frame_to_encode = NULL;
> +    struct SwrContext *audio_convert_ctx = NULL;
> +    struct SwsContext *video_convert_ctx = NULL;
> +    enum AVMediaType media_type;
> +    AVFormatContext *out_fmt_ctx = NULL;
> +    AVPacket *out_pkt = av_packet_alloc();
> +    int ret = 0, process_audio = 0, process_video = 0;
> +
> +    if (argc != 2) {
>          printf("usage: %s output_file\n"
>                "API example program to output a media file with libavformat.\n"
> -              "This program generates a synthetic audio and video stream, encodes and\n"
> +              "This program generates a synthetic audio and/or video stream, encodes and\n"
>                "muxes them into a file named output_file.\n"
>                "The output format is automatically guessed according to the file extension.\n"
> -              "Raw images can also be output by using '%%d' in the filename.\n"
> +              "BMP or JPEG images can also be output by using '%%d' in the filename.\n"
>                "\n", argv[0]);
> -        return 1;
> +        return AVERROR_EXIT;
This return value is sent to the operating system with the exit() system 
call so you don't actually want to return an AVERROR value here.

>      }

> -    filename = argv[1];
> -    for (i = 2; i+1 < argc; i+=2) {
> -        if (!strcmp(argv[i], "-flags") || !strcmp(argv[i], "-fflags"))
> -            av_dict_set(&opt, argv[i]+1, argv[i+1], 0);
> +    fname = argv[1];
> +    if (!is_extension_supported(fname)) {
> +        ret = AVERROR_EXIT;
> +        goto end;
>      }

> -    /* allocate the output media context */
> -    avformat_alloc_output_context2(&oc, NULL, NULL, filename);
> -    if (!oc) {
> -        printf("Could not deduce output format from file extension: using MPEG.\n"); > -        avformat_alloc_output_context2(&oc, NULL, "mpeg", filename);
> -    }
> -    if (!oc)
> -        return 1;
> -
> -    fmt = oc->oformat;
> -
> -    /* Add the audio and video streams using the default format codecs
> -    * and initialize the codecs. */
> -    if (fmt->video_codec != AV_CODEC_ID_NONE) {
> -        add_stream(&video_st, oc, &video_codec, fmt->video_codec);
> -        have_video = 1;
> -        encode_video = 1;
> +    /* Desume the default codecs and their default parameters from the filename */
> +    if ((ret = get_default_enc_params(&audio_enc_params, fname, AVMEDIA_TYPE_AUDIO)) < 0)
> +        goto end;
> +    if ((ret = get_default_enc_params(&video_enc_params, fname, AVMEDIA_TYPE_VIDEO)) < 0)
> +        goto end;
> +    process_audio = audio_enc_params.codec_id != AV_CODEC_ID_NONE;
> +    process_video = video_enc_params.codec_id != AV_CODEC_ID_NONE;
> +    if (!process_audio && !process_video) {
> +        log_error("Could not get default encoder(s)", NULL);
> +        ret = AVERROR_EXIT;
> +        goto end;
>      }
> -    if (fmt->audio_codec != AV_CODEC_ID_NONE) {
> -        add_stream(&audio_st, oc, &audio_codec, fmt->audio_codec);
> -        have_audio = 1;
> -        encode_audio = 1;
> -    }
> -
> -    /* Now that all the parameters are set, we can open the audio and
> -    * video codecs and allocate the necessary encode buffers. */
> -    if (have_video)
> -        open_video(oc, video_codec, &video_st, opt);
> -
> -    if (have_audio)
> -        open_audio(oc, audio_codec, &audio_st, opt);
> -
> -    av_dump_format(oc, 0, filename, 1);

> -    /* open the output file, if needed */
> -    if (!(fmt->flags & AVFMT_NOFILE)) {
> -        ret = avio_open(&oc->pb, filename, AVIO_FLAG_WRITE);
> -        if (ret < 0) {
> -            fprintf(stderr, "Could not open '%s': %s\n", filename,
> -                    av_err2str(ret));
> -            return 1;
> -        }
> +    if (process_audio) {
> +        /* Prepare the audio encoder*/
> +        if ((ret = init_encoder(&audio_enc_ctx, &audio_enc_params)) < 0)
> +            goto end;
> +        enc_timebases[0] = audio_enc_ctx->time_base;
> +        audio_in_params.frame_size  = audio_enc_params.frame_size  = audio_enc_ctx->frame_size;
> +
> +        /* Allocate an audio resampler and its input and output AVFrames */
> +        if ((ret = init_audio_convert(&audio_convert_ctx, &audio_in_params,
> +                                      &audio_enc_params)) < 0)
> +            goto end;
> +        if ((ret = init_avframe(&in_audio_frame, &audio_in_params)) < 0)
> +            goto end;
> +        if ((ret = init_avframe(&converted_audio_frame, &audio_enc_params)) < 0)
> +            goto end;
>      }

> -    /* Write the stream header, if any. */
> -    ret = avformat_write_header(oc, &opt);
> -    if (ret < 0) {
> -        fprintf(stderr, "Error occurred when opening output file: %s\n",
> -                av_err2str(ret));
> -        return 1;
> +    if (process_video) {
> +        video_enc_params.width  = video_in_params.width;
> +        video_enc_params.height = video_in_params.height;
> +        if ((ret = init_encoder(&video_enc_ctx, &video_enc_params)) < 0)
> +            goto end;
> +        enc_timebases[1] = video_enc_ctx->time_base;
> +        if ((ret = init_video_convert(&video_convert_ctx,&video_in_params,
> +                                      &video_enc_params)) < 0)
> +            goto end;
> +        if ((ret = init_avframe(&in_video_frame, &video_in_params)) < 0)
> +            goto end;
> +        if ((ret = init_avframe(&converted_video_frame, &video_enc_params)) < 0)
> +            goto end;
>      }

> -    while (encode_video || encode_audio) {
> -        /* select the stream to encode */
> -        if (encode_video &&
> -            (!encode_audio || av_compare_ts(video_st.next_pts, video_st.enc->time_base,
> -                                            audio_st.next_pts, audio_st.enc->time_base) <= 0)) {
> -            encode_video = !write_video_frame(oc, &video_st);
> +    /* Create the output container for the encoded frames */
> +    if ((ret = init_muxer(&out_fmt_ctx, audio_enc_ctx, video_enc_ctx, fname)) < 0)
> +        goto end;
> +    out_fmt_ctx->opaque = &enc_timebases;
> +
> +    while (process_audio || process_video) {
> +
> +        frame_to_encode = NULL;
> +        media_type = media_type_of_earlier_frame(in_audio_frame, in_video_frame);
> +
> +        /* fill and convert the input frames */
> +        if (media_type == AVMEDIA_TYPE_AUDIO) {
> +            enc_ctx = audio_enc_ctx;
> +            fill_dummy_s16_frame(in_audio_frame);
> +            if ((ret = convert_frame(audio_convert_ctx, in_audio_frame,
> +                                    converted_audio_frame)) != 0)
> +                goto end;
> +            if (!frame_exceeds_stream_duration(converted_audio_frame))
> +                frame_to_encode = converted_audio_frame;
>          } else {
> -            encode_audio = !write_audio_frame(oc, &audio_st);
> +            enc_ctx = video_enc_ctx;
> +            fill_dummy_yuv420p_frame(in_video_frame);
> +            if ((ret = convert_frame(video_convert_ctx, in_video_frame,
> +                                    converted_video_frame)) != 0)
> +                goto end;
> +            if (!frame_exceeds_stream_duration(in_video_frame))
> +                frame_to_encode = converted_video_frame;
>          }
> -    }
> -
> -    av_write_trailer(oc);

> -    /* Close each codec. */
> -    if (have_video)
> -        close_stream(oc, &video_st);
> -    if (have_audio)
> -        close_stream(oc, &audio_st);
> +        /* encode the converted frames and mux the encoded packets */
> +        if ((ret = encode_frame(enc_ctx, frame_to_encode, out_pkt)) == 0) {
> +            if ((ret = mux_encoded_pkt(out_pkt, out_fmt_ctx, media_type)) < 0)
> +              goto end;
> +        }

> -    if (!(fmt->flags & AVFMT_NOFILE))
> -        /* Close the output file. */
> -        avio_closep(&oc->pb);
> +        /* check if the encoders have been fully flushed */
> +        process_audio &= !((ret == AVERROR_EOF) && (media_type == AVMEDIA_TYPE_AUDIO));
> +        process_video &= !((ret == AVERROR_EOF) && (media_type == AVMEDIA_TYPE_VIDEO));

> -    /* free the stream */
> -    avformat_free_context(oc);
> +    }

> -    return 0;
> +    av_write_trailer(out_fmt_ctx);
> +    ret = 0;
> +
> +end:
> +
> +    avcodec_free_context(&audio_enc_ctx);
> +    avcodec_free_context(&video_enc_ctx);
> +    av_frame_free(&in_audio_frame);
> +    av_frame_free(&in_video_frame);
> +    av_frame_free(&converted_audio_frame);
> +    av_frame_free(&converted_video_frame);
> +    swr_free(&audio_convert_ctx);
> +    sws_freeContext(video_convert_ctx);
> +    if (out_fmt_ctx)
> +        avio_closep(&out_fmt_ctx->pb);
> +    avformat_free_context(out_fmt_ctx);
> +    av_packet_free(&out_pkt);
> +
> +    return ret;
>  }
> -- 
> 2.32.0

This isn't a thorough review since I'm not familiar enough with the mux 
API to really say whether or not it was used correctly, but this is what 
I noticed on first glance.
Paolo Prete June 19, 2022, 12:39 a.m. UTC | #4
Il sabato 18 giugno 2022, 19:07:11 CEST, Andreas Rheinhardt <andreas.rheinhardt@outlook.com> ha scritto:  
 
 >>Paolo Prete:
>> Please review this. It's a code rewrite of doc/examples/muxing.c which improves readability and fixes issues.
>> More specifically:
>> *) Original functions of muxing.c have generic/unclear/ambiguous names and they don't group logically relatedblocks of >>code: this makes the code hard to read.See for example open_audio/video() and add_stream(), which mix initialization of >>the encoders' and muxers' stuff.A redundant structure with an ambiguous name ("struct OutputStream"), which is not part of >>API, is widely used too.The patch uses functions with clearer names and with blocks of code strictly logically related, such >>as: init_encoder(), init_avframe(), init_muxer(), convert_frame(), encode_frame(), mux_encoded_pkt()...

>Using a structure for the user's data is actually intentional, because
>users will probably use one, too. And given that it is the user's
>structure it is of course not part of the public API.
This is not what I meant. The API already has all the needed structures for managing, by the user, all the flow of an A/V pipeline. There's no need of another struct (which is therefore redundant and with a meaningless name) for exchanging data between functions. Please consider the code of the patch in a different perspective. It explicitly, in the main() function, creates an encoder, a converter and a muxer and the required input/output frames: nothing more, nothing else. And it simply calls fill_frame(), convert_frame(), encode_frame(), mux_encoded_pkt(). This is what I suggest as a readable code which represents an A/V pipeline. These function have names that represent exactly what they do, instead of using generic/misleading names such as "open_audio()" or "write_frame()" etc. For doing that I had to rewrite the code from scratch, then it's not possible to split the patch into small pieces, as you ask.


> There is just one thing that I immediately noticed:
>
>
> sizeof(AVCodecParameters) is not public, you must not put
> AVCodecParameters on the stack.

i just fixed this, thanks. See the attached patch in response to Leo Izen
Paolo Prete June 19, 2022, 1:05 a.m. UTC | #5
Sorry: I had problems with my email client in formatting the previous message. I just try to resend it.

>> Il sabato 18 giugno 2022, 17:18:18 CEST, Leo Izen <leo.izen@gmail.com> ha scritto:
>>On 6/18/22 08:06, Paolo Prete wrote:
>> +{
>> +    if (num)
>> +        av_log(NULL, AV_LOG_ERROR, "%s (error '%s')\n", s, av_err2str(*num));
>> +    else
>> +        av_log(NULL, AV_LOG_ERROR, "%s\n", s);
> +}
>This does not need to be a pointer. Convention is that negative values
>are errors and nonnegative values are not. So you could always use
>something like: if (num < 0).

The pointer makes clearer, when I call the function, that I'm not managing an error with a number id.

>> -    AVPacket *tmp_pkt;
>> +    if (type == AVMEDIA_TYPE_AUDIO)
>> +        enc_time_base = ((AVRational *)out_fmt_ctx->opaque)[0];
>> +    else
>> +        enc_time_base = ((AVRational *)out_fmt_ctx->opaque)[1];
>>
>Why are you referencing the opaque elements of out_fmt_ctx?

I pass, through the opaque element, user data to the muxer. That data (the timebases of the audio and video encoders) will be used for rescaling ts to the
muxer timebases. Otherwise I would have to pass them through the function's params, which would make the function prototype longer and less readable.

>>
>> -static int write_frame(AVFormatContext *fmt_ctx, AVCodecContext *c,
>> -                      AVStream *st, AVFrame *frame, AVPacket *pkt)
>> +static int is_extension_supported(const char *filename)
>Why are you artificially limiting what is permitted?

From what I see, not all extensions are supported without modifying the default settings. In fact, as specified in the commit msg, some extensions don't work and the original muxing.c fails with them.

>> +    if (!(c = avcodec_find_encoder(id))) {
>> +        avformat_free_context(tmp_fctx);
>> +        return ret;
>You probably don't want to return "ret" here as you don't assign it.

ret is assigned to 0 (= success) at the definition of the variable. And in this case, the function succeeds: the returned AVCodecParameter is set with AV_CODEC_ID_NONE

>> -    if (oc->oformat->flags & AVFMT_GLOBALHEADER)
>> -        c->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
>> +    return ret;
> Again, why are you returning ret if you are not assigning to it?

as above, ret is assigned to 0.

>> +    codec = avcodec_find_encoder(params->codec_id);
>> +    *enc_ctx = avcodec_alloc_context3(codec);
>Don't attempt to allocate anything until after you check if the codec is
>found.

Is it really necessary?
It has been already checked in the line with "if (process_audio/video) {" and the API doxy says that a NULL parameter will only cause that codec-specific defaults won't be initialized (so  it appears safe to me).

>> +    if (!codec) {
>> +        log_error("Could not allocate the encoding context", NULL);
>This error message does not match the check, which is if the codec is found.
>> +        return AVERROR_EXIT;
>return AVERROR_CODEC_NOT_FOUND;

Instead of changing the error msg, I think that what was wrong is the check. I fixed it with
if (!(*enc_ctx = avcodec_alloc_context3(codec))) {

>> +        (*enc_ctx)->sample_rate = params->sample_rate;
>> +        (*enc_ctx)->time_base  = (AVRational){1, params->sample_rate};
>Use av_make_q to avoid casting.

Done, thanks.

> +        return ret;
> +    } else
> +        return 0;This violates the coding style, you need to use braces {} for the else
block if you also use it for the if block.

Done, thanks.

> +    if ((ret = av_frame_get_buffer(*frame, 0)) < 0) {
> +        log_error("Could not allocate buffer for AVFrame", &ret);
> +        return AVERROR(ENOMEM);
> +    } else
> +        return 0;
You don't need the else block here at all.

Done, thanks.

>> +static int init_video_convert(struct SwsContext **ctx, AVCodecParameters *in_params,
>> +                              AVCodecParameters *out_params)
>> +{
>This paper-thin wrapper function is unnecessary, just inline it.

This is intentional: even if it's a paper-thin wrapper, it shortens the code of the main() function
by making it quicker to understand that the audio/video converters are initialized with their respective AVCodecParameters. I would wait for feedback about this from other readers, before changing the code.

> +
> +    if (!tincr) {
> +        t      = 0;
> +        tincr  = 2 * M_PI * 110.0 / frame->sample_rate;
> +        /* increment frequency by 110 Hz per second */
> +        tincr2  = tincr / frame->sample_rate;
> What are you doing here? Why are you doing it?
>>      }
>> -
>> -    return write_frame(oc, c, ost->st, frame, ost->tmp_pkt);
>> +    for (j = 0; j <frame->nb_samples; j++) {
>> +        v = (int)(sin(t) * 10000);
>> +        for (i = 0; i < frame->ch_layout.nb_channels; i++)
>> +            *data++ = v;
>> +        t    += tincr;
>> +        tincr += tincr2;
>> +    }
>> +    frame->pts = frame->nb_samples*(++frame_ctr);
>If you're trying to populate a stream, you should be using the aevalsrc
>filter, which exists for exactly this purpose. Otherwise just populate
>it with zeroes (silence).
>>  }
>>
>> -/**************************************************************/
>> -/* video output */
>> -
>> -static AVFrame *alloc_picture(enum AVPixelFormat pix_fmt, int width, int height)
>> +static void fill_dummy_yuv420p_frame(AVFrame *frame)
>>  {
>There's a testsrc filter, or just fill a frame with zeroes (black).
>Don't reinvent the wheel in an example, that discourages people from
>using features that exist.

This is all copied from the original muxing.c example. These dummy audio/video frames consist in few lines of code and they are common in doc/example files. See also encode-audio.c, encode-video.c. Adding a filtering context to the current example would consequently require to patch (and maybe rename) the other files as well. And if you patch in that way, for example, "encode-audio.c" the reader won't focus on the encoding task, because the filtering block of code would be somewhat distracting.

>> -            }
>> +    if (ret < 0) {
>You need to check for AVERROR(EAGAIN).
>> +        av_log(NULL, AV_LOG_ERROR,
>> +              "Error sending frame to the encoder (error '%s')\n", av_err2str(ret));
>> +        return ret;

Is it really necessary to check AVERROR(EAGAIN) when sending the frame to the encoder, in this specific case?
The function is written in a way that the encoder's output is always read before sending new frames. Note that in the original muxing.c this is not checked as well

>> -    return write_frame(oc, ost->enc, ost->st, get_video_frame(ost), ost->tmp_pkt);
>> +    enum AVMediaType *type = (enum AVMediaType *)(fr->opaque);
>Why are you reading from the opaque structure of the frame. Are you sure
>this is what you wanted to do?

Yes, it just stores an additional info for the frame (it says if the frame contains video or audio data), which will be used later in "frame_exceeds_stream_duration()" function.

>>                "The output format is automatically guessed according to the file extension.\n"
>> -              "Raw images can also be output by using '%%d' in the filename.\n"
>> +              "BMP or JPEG images can also be output by using '%%d' in the filename.\n"
>>                "\n", argv[0]);
>> -        return 1;
>> +        return AVERROR_EXIT;
>This return value is sent to the operating system with the exit() system
>call so you don't actually want to return an AVERROR value here.

I replaced it with return 1.
I also allocated AVCodecParameter with the proper alloc() function, as Andreas suggested.
A new patch is attached to this mail.
Leo Izen June 19, 2022, 9:21 p.m. UTC | #6
On 6/18/22 21:05, Paolo Prete wrote:
> A new patch is attached to this mail.

Don't forget to add -v2 to your git format-patch line, which changes the 
patch header so it says [PATCH v2], which makes it easier for other 
readers to keep track of things. Also, it's usually a good idea to 
provide a shortlist of what changed in the new patch to help out
reviewers.

- Leo Izen (thebombzen)
diff mbox series

Patch

From 8a4e942a001ae49dc052899f331ed43abf954dda Mon Sep 17 00:00:00 2001
From: paolo <paolopr976@gmail.com>
Date: Sat, 18 Jun 2022 13:53:55 +0200
Subject: [PATCH] doc/examples/muxing: code rewrite with improved readability
 and fixed issues

               Improved readability with functions that have clearer prototypes and that don't mix logically unrelated blocks of code

               Fixed issues in case of unsupported extensions

               Fixed memory leaks on errors, which are now properly propagated to the main() function

               Fixed issue on raw images output

               fprintf() replaced with av_log()

               Input A/V parameters exposed in the main() function and easier to customize
---
 doc/examples/muxing.c | 905 +++++++++++++++++++-----------------------
 1 file changed, 406 insertions(+), 499 deletions(-)

diff --git a/doc/examples/muxing.c b/doc/examples/muxing.c
index 3acb778322..04739995d8 100644
--- a/doc/examples/muxing.c
+++ b/doc/examples/muxing.c
@@ -1,5 +1,5 @@ 
 /*
- * Copyright (c) 2003 Fabrice Bellard
+ * Copyright (c) 2022 Paolo Prete (paolopr976 at gmail.com) after Fabrice Bellard
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
@@ -24,625 +24,532 @@ 
  * @file
  * libavformat API example.
  *
- * Output a media file in any supported libavformat format. The default
+ * Output a media file in a set of supported libavformat formats. The default
  * codecs are used.
  * @example muxing.c
  */
 
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <math.h>
-
-#include <libavutil/avassert.h>
-#include <libavutil/channel_layout.h>
-#include <libavutil/opt.h>
-#include <libavutil/mathematics.h>
-#include <libavutil/timestamp.h>
 #include <libavcodec/avcodec.h>
 #include <libavformat/avformat.h>
-#include <libswscale/swscale.h>
+#include <libavutil/timestamp.h>
 #include <libswresample/swresample.h>
+#include <libswscale/swscale.h>
 
-#define STREAM_DURATION   10.0
-#define STREAM_FRAME_RATE 25 /* 25 images/s */
-#define STREAM_PIX_FMT    AV_PIX_FMT_YUV420P /* default pix_fmt */
-
-#define SCALE_FLAGS SWS_BICUBIC
+#define VIDEO_FRAME_RATE 25 /* 25 images/s */
+#define VIDEO_SCALE_FLAGS SWS_BICUBIC
+#define STREAM_DURATION 10.0 /* 10 seconds */
 
-// a wrapper around a single output AVStream
-typedef struct OutputStream {
-    AVStream *st;
-    AVCodecContext *enc;
+static void log_error(const char *s, int *num)
+{
+    if (num)
+        av_log(NULL, AV_LOG_ERROR, "%s (error '%s')\n", s, av_err2str(*num));
+    else
+        av_log(NULL, AV_LOG_ERROR, "%s\n", s);
+}
 
-    /* pts of the next frame that will be generated */
-    int64_t next_pts;
-    int samples_count;
+static int mux_encoded_pkt(AVPacket *out_pkt, AVFormatContext *out_fmt_ctx,
+                              enum AVMediaType type)
+{
+    int ret;
+    AVRational enc_time_base, str_time_base;
 
-    AVFrame *frame;
-    AVFrame *tmp_frame;
+    if (out_fmt_ctx->streams[0]->codecpar->codec_type == type)
+        out_pkt->stream_index = 0;
+    else if ((out_fmt_ctx->nb_streams > 1) && (type == AVMEDIA_TYPE_VIDEO))
+        out_pkt->stream_index = 1;
+    str_time_base = out_fmt_ctx->streams[out_pkt->stream_index]->time_base;
 
-    AVPacket *tmp_pkt;
+    if (type == AVMEDIA_TYPE_AUDIO)
+        enc_time_base = ((AVRational *)out_fmt_ctx->opaque)[0];
+    else
+        enc_time_base = ((AVRational *)out_fmt_ctx->opaque)[1];
 
-    float t, tincr, tincr2;
+    av_packet_rescale_ts(out_pkt, enc_time_base, str_time_base);
 
-    struct SwsContext *sws_ctx;
-    struct SwrContext *swr_ctx;
-} OutputStream;
+    av_log(NULL, AV_LOG_INFO, "stream_index=%d, size=%d, pts_time=%s\n",
+           out_pkt->stream_index,
+           out_pkt->size, av_ts2timestr(out_pkt->pts, &str_time_base));
 
-static void log_packet(const AVFormatContext *fmt_ctx, const AVPacket *pkt)
-{
-    AVRational *time_base = &fmt_ctx->streams[pkt->stream_index]->time_base;
+    if ((ret = av_interleaved_write_frame(out_fmt_ctx, out_pkt)) < 0)
+        log_error("Error calling av_interleaved_write_frame()", &ret);
 
-    printf("pts:%s pts_time:%s dts:%s dts_time:%s duration:%s duration_time:%s stream_index:%d\n",
-           av_ts2str(pkt->pts), av_ts2timestr(pkt->pts, time_base),
-           av_ts2str(pkt->dts), av_ts2timestr(pkt->dts, time_base),
-           av_ts2str(pkt->duration), av_ts2timestr(pkt->duration, time_base),
-           pkt->stream_index);
+    return ret;
 }
 
-static int write_frame(AVFormatContext *fmt_ctx, AVCodecContext *c,
-                       AVStream *st, AVFrame *frame, AVPacket *pkt)
+static int is_extension_supported(const char *filename)
 {
-    int ret;
+    const char *extensions[] = {".aac", ".avi", ".bmp", ".jpeg", ".mka",
+                                ".mkv", ".mov", ".mp4", ".flv",  ".ts"};
+    int i, size = sizeof(extensions) / sizeof(extensions[0]);
+    char *dot = strrchr(filename, '.');
 
-    // send the frame to the encoder
-    ret = avcodec_send_frame(c, frame);
-    if (ret < 0) {
-        fprintf(stderr, "Error sending a frame to the encoder: %s\n",
-                av_err2str(ret));
-        exit(1);
+    for (i = 0; i < size; i++) {
+        if (dot && !strcmp(dot, extensions[i]))
+            return 1;
     }
 
-    while (ret >= 0) {
-        ret = avcodec_receive_packet(c, pkt);
-        if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
-            break;
-        else if (ret < 0) {
-            fprintf(stderr, "Error encoding a frame: %s\n", av_err2str(ret));
-            exit(1);
-        }
+    log_error("File extension not supported", NULL);
+    av_log(NULL, AV_LOG_WARNING, "Please choose one of the following extensions: ");
+    for (i = 0; i < size - 1; i++)
+        av_log(NULL, AV_LOG_WARNING, "%s, ", extensions[i]);
+    av_log(NULL, AV_LOG_WARNING, "%s\n", extensions[size-1]);
 
-        /* rescale output packet timestamp values from codec to stream timebase */
-        av_packet_rescale_ts(pkt, c->time_base, st->time_base);
-        pkt->stream_index = st->index;
-
-        /* Write the compressed frame to the media file. */
-        log_packet(fmt_ctx, pkt);
-        ret = av_interleaved_write_frame(fmt_ctx, pkt);
-        /* pkt is now blank (av_interleaved_write_frame() takes ownership of
-         * its contents and resets pkt), so that no unreferencing is necessary.
-         * This would be different if one used av_write_frame(). */
-        if (ret < 0) {
-            fprintf(stderr, "Error while writing output packet: %s\n", av_err2str(ret));
-            exit(1);
-        }
-    }
-
-    return ret == AVERROR_EOF ? 1 : 0;
+    return 0;
 }
 
-/* Add an output stream. */
-static void add_stream(OutputStream *ost, AVFormatContext *oc,
-                       const AVCodec **codec,
-                       enum AVCodecID codec_id)
+static int get_default_enc_params(AVCodecParameters *params,
+                                  const char *fname, enum AVMediaType type)
 {
-    AVCodecContext *c;
-    int i;
-
-    /* find the encoder */
-    *codec = avcodec_find_encoder(codec_id);
-    if (!(*codec)) {
-        fprintf(stderr, "Could not find encoder for '%s'\n",
-                avcodec_get_name(codec_id));
-        exit(1);
+    AVFormatContext *tmp_fctx;
+    enum AVCodecID id;
+    const AVCodec *c;
+    int ret = 0;
+
+    if ((ret = avformat_alloc_output_context2(&tmp_fctx, NULL, NULL, fname)) < 0) {
+        log_error("Could not get default encoder", &ret);
+        return AVERROR_EXIT;
     }
 
-    ost->tmp_pkt = av_packet_alloc();
-    if (!ost->tmp_pkt) {
-        fprintf(stderr, "Could not allocate AVPacket\n");
-        exit(1);
-    }
+    id = (type == AVMEDIA_TYPE_AUDIO) ? tmp_fctx->oformat->audio_codec :
+                                        tmp_fctx->oformat->video_codec;
 
-    ost->st = avformat_new_stream(oc, NULL);
-    if (!ost->st) {
-        fprintf(stderr, "Could not allocate stream\n");
-        exit(1);
-    }
-    ost->st->id = oc->nb_streams-1;
-    c = avcodec_alloc_context3(*codec);
-    if (!c) {
-        fprintf(stderr, "Could not alloc an encoding context\n");
-        exit(1);
+    if (!(c = avcodec_find_encoder(id))) {
+        avformat_free_context(tmp_fctx);
+        return ret;
     }
-    ost->enc = c;
-
-    switch ((*codec)->type) {
-    case AVMEDIA_TYPE_AUDIO:
-        c->sample_fmt  = (*codec)->sample_fmts ?
-            (*codec)->sample_fmts[0] : AV_SAMPLE_FMT_FLTP;
-        c->bit_rate    = 64000;
-        c->sample_rate = 44100;
-        if ((*codec)->supported_samplerates) {
-            c->sample_rate = (*codec)->supported_samplerates[0];
-            for (i = 0; (*codec)->supported_samplerates[i]; i++) {
-                if ((*codec)->supported_samplerates[i] == 44100)
-                    c->sample_rate = 44100;
-            }
-        }
-        av_channel_layout_copy(&c->ch_layout, &(AVChannelLayout)AV_CHANNEL_LAYOUT_STEREO);
-        ost->st->time_base = (AVRational){ 1, c->sample_rate };
-        break;
-
-    case AVMEDIA_TYPE_VIDEO:
-        c->codec_id = codec_id;
-
-        c->bit_rate = 400000;
-        /* Resolution must be a multiple of two. */
-        c->width    = 352;
-        c->height   = 288;
-        /* timebase: This is the fundamental unit of time (in seconds) in terms
-         * of which frame timestamps are represented. For fixed-fps content,
-         * timebase should be 1/framerate and timestamp increments should be
-         * identical to 1. */
-        ost->st->time_base = (AVRational){ 1, STREAM_FRAME_RATE };
-        c->time_base       = ost->st->time_base;
-
-        c->gop_size      = 12; /* emit one intra frame every twelve frames at most */
-        c->pix_fmt       = STREAM_PIX_FMT;
-        if (c->codec_id == AV_CODEC_ID_MPEG2VIDEO) {
-            /* just for testing, we also add B-frames */
-            c->max_b_frames = 2;
-        }
-        if (c->codec_id == AV_CODEC_ID_MPEG1VIDEO) {
-            /* Needed to avoid using macroblocks in which some coeffs overflow.
-             * This does not happen with normal video, it just happens here as
-             * the motion of the chroma plane does not match the luma plane. */
-            c->mb_decision = 2;
-        }
-        break;
 
-    default:
-        break;
+    params->codec_type = c->type;
+    params->codec_id   = c-> id;
+    if (c->type == AVMEDIA_TYPE_AUDIO) {
+        params->format      = c->sample_fmts ?
+                              c->sample_fmts[0] : AV_SAMPLE_FMT_FLTP;
+        params->ch_layout   = (AVChannelLayout)AV_CHANNEL_LAYOUT_STEREO;
+        params->sample_rate = c->supported_samplerates ?
+                              c->supported_samplerates[0] : 44100;
+    } else if (c->type == AVMEDIA_TYPE_VIDEO) {
+        params->format = c->pix_fmts ? c->pix_fmts[0] : AV_PIX_FMT_YUV420P;
     }
+    avformat_free_context(tmp_fctx);
 
-    /* Some formats want stream headers to be separate. */
-    if (oc->oformat->flags & AVFMT_GLOBALHEADER)
-        c->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
+    return ret;
 }
 
-/**************************************************************/
-/* audio output */
-
-static AVFrame *alloc_audio_frame(enum AVSampleFormat sample_fmt,
-                                  const AVChannelLayout *channel_layout,
-                                  int sample_rate, int nb_samples)
+static int init_encoder(AVCodecContext **enc_ctx, AVCodecParameters *params)
 {
-    AVFrame *frame = av_frame_alloc();
+    const AVCodec *codec = NULL;
     int ret;
 
-    if (!frame) {
-        fprintf(stderr, "Error allocating an audio frame\n");
-        exit(1);
+    codec = avcodec_find_encoder(params->codec_id);
+    *enc_ctx = avcodec_alloc_context3(codec);
+    if (!codec) {
+        log_error("Could not allocate the encoding context", NULL);
+        return AVERROR_EXIT;
     }
 
-    frame->format = sample_fmt;
-    av_channel_layout_copy(&frame->ch_layout, channel_layout);
-    frame->sample_rate = sample_rate;
-    frame->nb_samples = nb_samples;
-
-    if (nb_samples) {
-        ret = av_frame_get_buffer(frame, 0);
-        if (ret < 0) {
-            fprintf(stderr, "Error allocating an audio buffer\n");
-            exit(1);
-        }
+    (*enc_ctx)->codec_id   = params->codec_id;
+    (*enc_ctx)->codec_type = params->codec_type;
+    if (params->codec_type == AVMEDIA_TYPE_AUDIO) {
+        (*enc_ctx)->sample_fmt  = params->format;
+        (*enc_ctx)->sample_rate = params->sample_rate;
+        (*enc_ctx)->time_base   = (AVRational){1, params->sample_rate};
+        (*enc_ctx)->ch_layout   = params->ch_layout;
+    } else if (params->codec_type == AVMEDIA_TYPE_VIDEO) {
+        (*enc_ctx)->width      = params->width;
+        (*enc_ctx)->height     = params->height;
+        (*enc_ctx)->time_base  = (AVRational){ 1, VIDEO_FRAME_RATE };
+        (*enc_ctx)->gop_size   = 12;
+        (*enc_ctx)->pix_fmt    = params->format;
     }
 
-    return frame;
+    if ((ret = avcodec_open2(*enc_ctx, codec, NULL)) < 0) {
+        log_error("Could not open input codec", &ret);
+        return ret;
+    } else
+        return 0;
 }
 
-static void open_audio(AVFormatContext *oc, const AVCodec *codec,
-                       OutputStream *ost, AVDictionary *opt_arg)
+static int init_avframe(AVFrame **frame, AVCodecParameters *params)
 {
-    AVCodecContext *c;
-    int nb_samples;
     int ret;
-    AVDictionary *opt = NULL;
 
-    c = ost->enc;
+    if (!(*frame = av_frame_alloc())) {
+        log_error("Could not allocate AVFrame", NULL);
+        return AVERROR(ENOMEM);
+    }
 
-    /* open it */
-    av_dict_copy(&opt, opt_arg, 0);
-    ret = avcodec_open2(c, codec, &opt);
-    av_dict_free(&opt);
-    if (ret < 0) {
-        fprintf(stderr, "Could not open audio codec: %s\n", av_err2str(ret));
-        exit(1);
+    (*frame)->opaque = &params->codec_type;
+    if (params->codec_type == AVMEDIA_TYPE_AUDIO) {
+        (*frame)->nb_samples  = params->frame_size;
+        (*frame)->sample_rate = params->sample_rate;
+        (*frame)->format      = params->format;
+        (*frame)->ch_layout   = params->ch_layout;
+    } else {
+        (*frame)->width  = params->width;
+        (*frame)->height = params->height;
+        (*frame)->format = params->format;
     }
 
-    /* init signal generator */
-    ost->t     = 0;
-    ost->tincr = 2 * M_PI * 110.0 / c->sample_rate;
-    /* increment frequency by 110 Hz per second */
-    ost->tincr2 = 2 * M_PI * 110.0 / c->sample_rate / c->sample_rate;
+    /* Allocate the frame's data buffer */
+    if ((ret = av_frame_get_buffer(*frame, 0)) < 0) {
+        log_error("Could not allocate buffer for AVFrame", &ret);
+        return AVERROR(ENOMEM);
+    } else
+        return 0;
+}
 
-    if (c->codec->capabilities & AV_CODEC_CAP_VARIABLE_FRAME_SIZE)
-        nb_samples = 10000;
-    else
-        nb_samples = c->frame_size;
+static int init_audio_convert(struct SwrContext **ctx, AVCodecParameters *in_params,
+                              AVCodecParameters *out_params)
+{
+    swr_alloc_set_opts2(ctx,
+                        &(out_params->ch_layout),
+                        out_params->format, out_params->sample_rate,
+                        &(in_params->ch_layout),
+                        in_params->format, in_params->sample_rate,
+                        0, NULL);
+    if (!*ctx) {
+        log_error("Could not allocate resample context", NULL);
+        return AVERROR(ENOMEM);
+    } else
+        return 0;
+}
 
-    ost->frame     = alloc_audio_frame(c->sample_fmt, &c->ch_layout,
-                                       c->sample_rate, nb_samples);
-    ost->tmp_frame = alloc_audio_frame(AV_SAMPLE_FMT_S16, &c->ch_layout,
-                                       c->sample_rate, nb_samples);
+static int init_video_convert(struct SwsContext **ctx, AVCodecParameters *in_params,
+                              AVCodecParameters *out_params)
+{
+    *ctx = sws_getContext(in_params->width, in_params->height,
+                          in_params->format,
+                          out_params->width, out_params->height,
+                          out_params->codec_id == out_params->format,
+                          VIDEO_SCALE_FLAGS, NULL, NULL, NULL);
+    if (!*ctx) {
+        log_error("Could not allocate scale context", NULL);
+        return AVERROR(ENOMEM);
+    } else
+        return 0;
+}
 
-    /* copy the stream parameters to the muxer */
-    ret = avcodec_parameters_from_context(ost->st->codecpar, c);
-    if (ret < 0) {
-        fprintf(stderr, "Could not copy the stream parameters\n");
-        exit(1);
-    }
+static int init_muxer(AVFormatContext **out_fmt_ctx, AVCodecContext *audio_enc_ctx,
+                      AVCodecContext *video_enc_ctx, const char *filename)
+{
+    int ret;
+    AVStream *out_audio_str, *out_video_str;
 
-    /* create resampler context */
-    ost->swr_ctx = swr_alloc();
-    if (!ost->swr_ctx) {
-        fprintf(stderr, "Could not allocate resampler context\n");
-        exit(1);
+    if ((ret = avformat_alloc_output_context2(out_fmt_ctx, NULL, NULL, filename)) < 0) {
+        log_error("Could not create output context", &ret);
+        return ret;
     }
 
-    /* set options */
-    av_opt_set_chlayout  (ost->swr_ctx, "in_chlayout",       &c->ch_layout,      0);
-    av_opt_set_int       (ost->swr_ctx, "in_sample_rate",     c->sample_rate,    0);
-    av_opt_set_sample_fmt(ost->swr_ctx, "in_sample_fmt",      AV_SAMPLE_FMT_S16, 0);
-    av_opt_set_chlayout  (ost->swr_ctx, "out_chlayout",      &c->ch_layout,      0);
-    av_opt_set_int       (ost->swr_ctx, "out_sample_rate",    c->sample_rate,    0);
-    av_opt_set_sample_fmt(ost->swr_ctx, "out_sample_fmt",     c->sample_fmt,     0);
-
-    /* initialize the resampling context */
-    if ((ret = swr_init(ost->swr_ctx)) < 0) {
-        fprintf(stderr, "Failed to initialize the resampling context\n");
-        exit(1);
+    /* open the output file, if needed */
+    if (!((*out_fmt_ctx)->oformat->flags & AVFMT_NOFILE)) {
+        if ((ret = avio_open(&(*out_fmt_ctx)->pb, filename, AVIO_FLAG_WRITE)) < 0) {
+            log_error("Could not open output file", &ret);
+            return ret;
+        }
     }
-}
 
-/* Prepare a 16 bit dummy audio frame of 'frame_size' samples and
- * 'nb_channels' channels. */
-static AVFrame *get_audio_frame(OutputStream *ost)
-{
-    AVFrame *frame = ost->tmp_frame;
-    int j, i, v;
-    int16_t *q = (int16_t*)frame->data[0];
-
-    /* check if we want to generate more frames */
-    if (av_compare_ts(ost->next_pts, ost->enc->time_base,
-                      STREAM_DURATION, (AVRational){ 1, 1 }) > 0)
-        return NULL;
+    if (audio_enc_ctx) {
+        if (!(out_audio_str = avformat_new_stream(*out_fmt_ctx, NULL))) {
+            log_error("Could not create new stream", NULL);
+            return AVERROR(ENOMEM);
+        }
+        out_audio_str->id = (*out_fmt_ctx)->nb_streams - 1;
+        avcodec_parameters_from_context(out_audio_str->codecpar, audio_enc_ctx);
+    }
 
-    for (j = 0; j <frame->nb_samples; j++) {
-        v = (int)(sin(ost->t) * 10000);
-        for (i = 0; i < ost->enc->ch_layout.nb_channels; i++)
-            *q++ = v;
-        ost->t     += ost->tincr;
-        ost->tincr += ost->tincr2;
+    if (video_enc_ctx) {
+        if (!(out_video_str = avformat_new_stream(*out_fmt_ctx, NULL))) {
+            log_error("Could not create new stream", NULL);
+            return AVERROR(ENOMEM);
+        }
+        out_video_str->id = (*out_fmt_ctx)->nb_streams - 1;
+        avcodec_parameters_from_context(out_video_str->codecpar, video_enc_ctx);
     }
 
-    frame->pts = ost->next_pts;
-    ost->next_pts  += frame->nb_samples;
+    av_dump_format(*out_fmt_ctx, 0, filename, 1);
 
-    return frame;
+    /* Write the stream header, if any. */
+    if (avformat_write_header(*out_fmt_ctx, NULL) < 0) {
+        log_error("avformat_write_header() error", NULL);
+        return AVERROR_EXIT;
+    } else
+        return 0;
 }
 
-/*
- * encode one audio frame and send it to the muxer
- * return 1 when encoding is finished, 0 otherwise
- */
-static int write_audio_frame(AVFormatContext *oc, OutputStream *ost)
+static void fill_dummy_s16_frame(AVFrame *frame)
 {
-    AVCodecContext *c;
-    AVFrame *frame;
-    int ret;
-    int dst_nb_samples;
-
-    c = ost->enc;
-
-    frame = get_audio_frame(ost);
-
-    if (frame) {
-        /* convert samples from native format to destination codec format, using the resampler */
-        /* compute destination number of samples */
-        dst_nb_samples = av_rescale_rnd(swr_get_delay(ost->swr_ctx, c->sample_rate) + frame->nb_samples,
-                                        c->sample_rate, c->sample_rate, AV_ROUND_UP);
-        av_assert0(dst_nb_samples == frame->nb_samples);
-
-        /* when we pass a frame to the encoder, it may keep a reference to it
-         * internally;
-         * make sure we do not overwrite it here
-         */
-        ret = av_frame_make_writable(ost->frame);
-        if (ret < 0)
-            exit(1);
-
-        /* convert to destination format */
-        ret = swr_convert(ost->swr_ctx,
-                          ost->frame->data, dst_nb_samples,
-                          (const uint8_t **)frame->data, frame->nb_samples);
-        if (ret < 0) {
-            fprintf(stderr, "Error while converting\n");
-            exit(1);
-        }
-        frame = ost->frame;
-
-        frame->pts = av_rescale_q(ost->samples_count, (AVRational){1, c->sample_rate}, c->time_base);
-        ost->samples_count += dst_nb_samples;
+    int j, i, v;
+    static float t, tincr, tincr2;
+    int16_t *data = (int16_t*)frame->data[0];
+    static int frame_ctr;
+
+    if (!tincr) {
+        t       = 0;
+        tincr   = 2 * M_PI * 110.0 / frame->sample_rate;
+        /* increment frequency by 110 Hz per second */
+        tincr2  = tincr / frame->sample_rate;
     }
-
-    return write_frame(oc, c, ost->st, frame, ost->tmp_pkt);
+    for (j = 0; j <frame->nb_samples; j++) {
+        v = (int)(sin(t) * 10000);
+        for (i = 0; i < frame->ch_layout.nb_channels; i++)
+            *data++ = v;
+        t     += tincr;
+        tincr += tincr2;
+    }
+    frame->pts = frame->nb_samples*(++frame_ctr);
 }
 
-/**************************************************************/
-/* video output */
-
-static AVFrame *alloc_picture(enum AVPixelFormat pix_fmt, int width, int height)
+static void fill_dummy_yuv420p_frame(AVFrame *frame)
 {
-    AVFrame *picture;
-    int ret;
-
-    picture = av_frame_alloc();
-    if (!picture)
-        return NULL;
+    int x, y;
+    static int idx;
 
-    picture->format = pix_fmt;
-    picture->width  = width;
-    picture->height = height;
+    /* Y */
+    for (y = 0; y < frame->width; y++)
+        for (x = 0; x < frame->width; x++)
+            frame->data[0][y * frame->linesize[0] + x] = x + y + idx * 3;
 
-    /* allocate the buffers for the frame data */
-    ret = av_frame_get_buffer(picture, 0);
-    if (ret < 0) {
-        fprintf(stderr, "Could not allocate frame data.\n");
-        exit(1);
+    /* Cb and Cr */
+    for (y = 0; y < frame->height / 2; y++) {
+        for (x = 0; x < frame->width / 2; x++) {
+            frame->data[1][y * frame->linesize[1] + x] = 128 + y + idx * 2;
+            frame->data[2][y * frame->linesize[2] + x] = 64 + x + idx * 5;
+        }
     }
 
-    return picture;
+    frame->pts = idx++;
 }
 
-static void open_video(AVFormatContext *oc, const AVCodec *codec,
-                       OutputStream *ost, AVDictionary *opt_arg)
+static int convert_frame(void *convert_ctx, AVFrame *in_frame, AVFrame *out_frame)
 {
     int ret;
-    AVCodecContext *c = ost->enc;
-    AVDictionary *opt = NULL;
-
-    av_dict_copy(&opt, opt_arg, 0);
+    enum AVMediaType *type = (enum AVMediaType *)(in_frame->opaque);
 
-    /* open the codec */
-    ret = avcodec_open2(c, codec, &opt);
-    av_dict_free(&opt);
-    if (ret < 0) {
-        fprintf(stderr, "Could not open video codec: %s\n", av_err2str(ret));
-        exit(1);
-    }
-
-    /* allocate and init a re-usable frame */
-    ost->frame = alloc_picture(c->pix_fmt, c->width, c->height);
-    if (!ost->frame) {
-        fprintf(stderr, "Could not allocate video frame\n");
-        exit(1);
+    if (av_frame_make_writable(out_frame) < 0) {
+        log_error("av_frame_make_writable() error", NULL);
+        return AVERROR_EXIT;
     }
 
-    /* If the output format is not YUV420P, then a temporary YUV420P
-     * picture is needed too. It is then converted to the required
-     * output format. */
-    ost->tmp_frame = NULL;
-    if (c->pix_fmt != AV_PIX_FMT_YUV420P) {
-        ost->tmp_frame = alloc_picture(AV_PIX_FMT_YUV420P, c->width, c->height);
-        if (!ost->tmp_frame) {
-            fprintf(stderr, "Could not allocate temporary picture\n");
-            exit(1);
+    if (*type == AVMEDIA_TYPE_AUDIO) {
+        if ((ret = swr_convert_frame((struct SwrContext *)convert_ctx, out_frame,
+                                     (const AVFrame *)in_frame)) != 0) {
+            log_error("Error converting AVFrame", &ret);
+            return ret;
         }
+    } else {
+        sws_scale((struct SwsContext *)convert_ctx, (const uint8_t * const *)in_frame->data,
+                  in_frame->linesize, 0, in_frame->height, out_frame->data,
+                  out_frame->linesize);
     }
 
-    /* copy the stream parameters to the muxer */
-    ret = avcodec_parameters_from_context(ost->st->codecpar, c);
-    if (ret < 0) {
-        fprintf(stderr, "Could not copy the stream parameters\n");
-        exit(1);
-    }
+    out_frame->pts = in_frame->pts;
+    return 0;
 }
 
-/* Prepare a dummy image. */
-static void fill_yuv_image(AVFrame *pict, int frame_index,
-                           int width, int height)
+static int encode_frame(AVCodecContext *ctx, AVFrame *in_frame, AVPacket *out_pkt)
 {
-    int x, y, i;
+    static int is_flushing_audio = 0, is_flushing_video = 0;
+    int ret = 0;
+    int is_audio = ctx->codec->type == AVMEDIA_TYPE_AUDIO;
 
-    i = frame_index;
-
-    /* Y */
-    for (y = 0; y < height; y++)
-        for (x = 0; x < width; x++)
-            pict->data[0][y * pict->linesize[0] + x] = x + y + i * 3;
-
-    /* Cb and Cr */
-    for (y = 0; y < height / 2; y++) {
-        for (x = 0; x < width / 2; x++) {
-            pict->data[1][y * pict->linesize[1] + x] = 128 + y + i * 2;
-            pict->data[2][y * pict->linesize[2] + x] = 64 + x + i * 5;
-        }
+    if ((is_audio && !is_flushing_audio) || (!is_audio && !is_flushing_video)) {
+        ret = avcodec_send_frame(ctx, in_frame);
     }
-}
-
-static AVFrame *get_video_frame(OutputStream *ost)
-{
-    AVCodecContext *c = ost->enc;
-
-    /* check if we want to generate more frames */
-    if (av_compare_ts(ost->next_pts, c->time_base,
-                      STREAM_DURATION, (AVRational){ 1, 1 }) > 0)
-        return NULL;
-
-    /* when we pass a frame to the encoder, it may keep a reference to it
-     * internally; make sure we do not overwrite it here */
-    if (av_frame_make_writable(ost->frame) < 0)
-        exit(1);
-
-    if (c->pix_fmt != AV_PIX_FMT_YUV420P) {
-        /* as we only generate a YUV420P picture, we must convert it
-         * to the codec pixel format if needed */
-        if (!ost->sws_ctx) {
-            ost->sws_ctx = sws_getContext(c->width, c->height,
-                                          AV_PIX_FMT_YUV420P,
-                                          c->width, c->height,
-                                          c->pix_fmt,
-                                          SCALE_FLAGS, NULL, NULL, NULL);
-            if (!ost->sws_ctx) {
-                fprintf(stderr,
-                        "Could not initialize the conversion context\n");
-                exit(1);
-            }
+    if (ret < 0) {
+        av_log(NULL, AV_LOG_ERROR,
+               "Error sending frame to the encoder (error '%s')\n", av_err2str(ret));
+        return ret;
+    } else if (ret == 0) {
+        ret = avcodec_receive_packet(ctx, out_pkt);
+        if ((ret < 0) && (ret != AVERROR(EAGAIN)) && (ret != AVERROR_EOF)) {
+            av_log(NULL, AV_LOG_ERROR,
+                   "Error receiving encoded packet (error '%s')\n", av_err2str(ret));
+            return ret;
         }
-        fill_yuv_image(ost->tmp_frame, ost->next_pts, c->width, c->height);
-        sws_scale(ost->sws_ctx, (const uint8_t * const *) ost->tmp_frame->data,
-                  ost->tmp_frame->linesize, 0, c->height, ost->frame->data,
-                  ost->frame->linesize);
-    } else {
-        fill_yuv_image(ost->frame, ost->next_pts, c->width, c->height);
     }
 
-    ost->frame->pts = ost->next_pts++;
+    if (is_audio)
+        is_flushing_audio = (in_frame == NULL);
+    else
+        is_flushing_video = (in_frame == NULL);
 
-    return ost->frame;
+    return ret;
 }
 
-/*
- * encode one video frame and send it to the muxer
- * return 1 when encoding is finished, 0 otherwise
- */
-static int write_video_frame(AVFormatContext *oc, OutputStream *ost)
+static int frame_exceeds_stream_duration(AVFrame *fr)
 {
-    return write_frame(oc, ost->enc, ost->st, get_video_frame(ost), ost->tmp_pkt);
+    enum AVMediaType *type = (enum AVMediaType *)(fr->opaque);
+    AVRational tb = (*type == AVMEDIA_TYPE_AUDIO) ? (AVRational){ 1, fr->sample_rate} :
+                                                    (AVRational){ 1, VIDEO_FRAME_RATE};
+
+    return av_compare_ts(fr->pts, tb ,STREAM_DURATION, (AVRational){ 1, 1 }) > 0;
 }
 
-static void close_stream(AVFormatContext *oc, OutputStream *ost)
+static enum AVMediaType media_type_of_earlier_frame(AVFrame *audio_fr,
+                                                    AVFrame *video_fr)
 {
-    avcodec_free_context(&ost->enc);
-    av_frame_free(&ost->frame);
-    av_frame_free(&ost->tmp_frame);
-    av_packet_free(&ost->tmp_pkt);
-    sws_freeContext(ost->sws_ctx);
-    swr_free(&ost->swr_ctx);
+    if (!audio_fr)
+        return AVMEDIA_TYPE_VIDEO;
+    if (!video_fr)
+        return AVMEDIA_TYPE_AUDIO;
+
+    if (av_compare_ts(audio_fr->pts, (AVRational){ 1, audio_fr->sample_rate},
+                      video_fr->pts, (AVRational){ 1, VIDEO_FRAME_RATE}) < 0)
+        return AVMEDIA_TYPE_AUDIO;
+    else
+        return AVMEDIA_TYPE_VIDEO;
 }
 
-/**************************************************************/
-/* media file output */
-
 int main(int argc, char **argv)
 {
-    OutputStream video_st = { 0 }, audio_st = { 0 };
-    const AVOutputFormat *fmt;
-    const char *filename;
-    AVFormatContext *oc;
-    const AVCodec *audio_codec, *video_codec;
-    int ret;
-    int have_video = 0, have_audio = 0;
-    int encode_video = 0, encode_audio = 0;
-    AVDictionary *opt = NULL;
-    int i;
-
-    if (argc < 2) {
+    const char *fname;
+    AVCodecContext *audio_enc_ctx = NULL, *video_enc_ctx = NULL, *enc_ctx = NULL;
+
+    /* NOTE: if you want to modify the audio/video input ".format" parameter,
+     * you need to modify the corresponding fill_dummy_XXX_frame() function(s) too */
+    AVCodecParameters audio_in_params = {
+        .codec_type  = AVMEDIA_TYPE_AUDIO,
+        .format      = AV_SAMPLE_FMT_S16,
+        .sample_rate = 44100,
+        .ch_layout   = (AVChannelLayout)AV_CHANNEL_LAYOUT_STEREO
+    },
+    video_in_params = {
+        .codec_type = AVMEDIA_TYPE_VIDEO,
+        .width      = 352,
+        .height     = 288,
+        .format     = AV_PIX_FMT_YUV420P
+    },
+    video_enc_params  = { 0 }, audio_enc_params = { 0 };
+    struct AVRational enc_timebases[2];
+    AVFrame *in_audio_frame = NULL, *converted_audio_frame = NULL,
+            *in_video_frame = NULL, *converted_video_frame = NULL,
+            *frame_to_encode = NULL;
+    struct SwrContext *audio_convert_ctx = NULL;
+    struct SwsContext *video_convert_ctx = NULL;
+    enum AVMediaType media_type;
+    AVFormatContext *out_fmt_ctx = NULL;
+    AVPacket *out_pkt = av_packet_alloc();
+    int ret = 0, process_audio = 0, process_video = 0;
+
+    if (argc != 2) {
         printf("usage: %s output_file\n"
                "API example program to output a media file with libavformat.\n"
-               "This program generates a synthetic audio and video stream, encodes and\n"
+               "This program generates a synthetic audio and/or video stream, encodes and\n"
                "muxes them into a file named output_file.\n"
                "The output format is automatically guessed according to the file extension.\n"
-               "Raw images can also be output by using '%%d' in the filename.\n"
+               "BMP or JPEG images can also be output by using '%%d' in the filename.\n"
                "\n", argv[0]);
-        return 1;
+        return AVERROR_EXIT;
     }
 
-    filename = argv[1];
-    for (i = 2; i+1 < argc; i+=2) {
-        if (!strcmp(argv[i], "-flags") || !strcmp(argv[i], "-fflags"))
-            av_dict_set(&opt, argv[i]+1, argv[i+1], 0);
+    fname = argv[1];
+    if (!is_extension_supported(fname)) {
+        ret = AVERROR_EXIT;
+        goto end;
     }
 
-    /* allocate the output media context */
-    avformat_alloc_output_context2(&oc, NULL, NULL, filename);
-    if (!oc) {
-        printf("Could not deduce output format from file extension: using MPEG.\n");
-        avformat_alloc_output_context2(&oc, NULL, "mpeg", filename);
-    }
-    if (!oc)
-        return 1;
-
-    fmt = oc->oformat;
-
-    /* Add the audio and video streams using the default format codecs
-     * and initialize the codecs. */
-    if (fmt->video_codec != AV_CODEC_ID_NONE) {
-        add_stream(&video_st, oc, &video_codec, fmt->video_codec);
-        have_video = 1;
-        encode_video = 1;
+    /* Desume the default codecs and their default parameters from the filename */
+    if ((ret = get_default_enc_params(&audio_enc_params, fname, AVMEDIA_TYPE_AUDIO)) < 0)
+        goto end;
+    if ((ret = get_default_enc_params(&video_enc_params, fname, AVMEDIA_TYPE_VIDEO)) < 0)
+        goto end;
+    process_audio = audio_enc_params.codec_id != AV_CODEC_ID_NONE;
+    process_video = video_enc_params.codec_id != AV_CODEC_ID_NONE;
+    if (!process_audio && !process_video) {
+        log_error("Could not get default encoder(s)", NULL);
+        ret = AVERROR_EXIT;
+        goto end;
     }
-    if (fmt->audio_codec != AV_CODEC_ID_NONE) {
-        add_stream(&audio_st, oc, &audio_codec, fmt->audio_codec);
-        have_audio = 1;
-        encode_audio = 1;
-    }
-
-    /* Now that all the parameters are set, we can open the audio and
-     * video codecs and allocate the necessary encode buffers. */
-    if (have_video)
-        open_video(oc, video_codec, &video_st, opt);
-
-    if (have_audio)
-        open_audio(oc, audio_codec, &audio_st, opt);
-
-    av_dump_format(oc, 0, filename, 1);
 
-    /* open the output file, if needed */
-    if (!(fmt->flags & AVFMT_NOFILE)) {
-        ret = avio_open(&oc->pb, filename, AVIO_FLAG_WRITE);
-        if (ret < 0) {
-            fprintf(stderr, "Could not open '%s': %s\n", filename,
-                    av_err2str(ret));
-            return 1;
-        }
+    if (process_audio) {
+        /* Prepare the audio encoder*/
+        if ((ret = init_encoder(&audio_enc_ctx, &audio_enc_params)) < 0)
+            goto end;
+        enc_timebases[0] = audio_enc_ctx->time_base;
+        audio_in_params.frame_size  = audio_enc_params.frame_size  = audio_enc_ctx->frame_size;
+
+        /* Allocate an audio resampler and its input and output AVFrames */
+        if ((ret = init_audio_convert(&audio_convert_ctx, &audio_in_params,
+                                      &audio_enc_params)) < 0)
+            goto end;
+        if ((ret = init_avframe(&in_audio_frame, &audio_in_params)) < 0)
+            goto end;
+        if ((ret = init_avframe(&converted_audio_frame, &audio_enc_params)) < 0)
+            goto end;
     }
 
-    /* Write the stream header, if any. */
-    ret = avformat_write_header(oc, &opt);
-    if (ret < 0) {
-        fprintf(stderr, "Error occurred when opening output file: %s\n",
-                av_err2str(ret));
-        return 1;
+    if (process_video) {
+        video_enc_params.width  = video_in_params.width;
+        video_enc_params.height = video_in_params.height;
+        if ((ret = init_encoder(&video_enc_ctx, &video_enc_params)) < 0)
+            goto end;
+        enc_timebases[1] = video_enc_ctx->time_base;
+        if ((ret = init_video_convert(&video_convert_ctx,&video_in_params,
+                                      &video_enc_params)) < 0)
+            goto end;
+        if ((ret = init_avframe(&in_video_frame, &video_in_params)) < 0)
+            goto end;
+        if ((ret = init_avframe(&converted_video_frame, &video_enc_params)) < 0)
+            goto end;
     }
 
-    while (encode_video || encode_audio) {
-        /* select the stream to encode */
-        if (encode_video &&
-            (!encode_audio || av_compare_ts(video_st.next_pts, video_st.enc->time_base,
-                                            audio_st.next_pts, audio_st.enc->time_base) <= 0)) {
-            encode_video = !write_video_frame(oc, &video_st);
+    /* Create the output container for the encoded frames */
+    if ((ret = init_muxer(&out_fmt_ctx, audio_enc_ctx, video_enc_ctx, fname)) < 0)
+        goto end;
+    out_fmt_ctx->opaque = &enc_timebases;
+
+    while (process_audio || process_video) {
+
+        frame_to_encode = NULL;
+        media_type = media_type_of_earlier_frame(in_audio_frame, in_video_frame);
+
+        /* fill and convert the input frames */
+        if (media_type == AVMEDIA_TYPE_AUDIO) {
+            enc_ctx = audio_enc_ctx;
+            fill_dummy_s16_frame(in_audio_frame);
+            if ((ret = convert_frame(audio_convert_ctx, in_audio_frame,
+                                     converted_audio_frame)) != 0)
+                goto end;
+            if (!frame_exceeds_stream_duration(converted_audio_frame))
+                frame_to_encode = converted_audio_frame;
         } else {
-            encode_audio = !write_audio_frame(oc, &audio_st);
+            enc_ctx = video_enc_ctx;
+            fill_dummy_yuv420p_frame(in_video_frame);
+            if ((ret = convert_frame(video_convert_ctx, in_video_frame,
+                                     converted_video_frame)) != 0)
+                goto end;
+            if (!frame_exceeds_stream_duration(in_video_frame))
+                frame_to_encode = converted_video_frame;
         }
-    }
-
-    av_write_trailer(oc);
 
-    /* Close each codec. */
-    if (have_video)
-        close_stream(oc, &video_st);
-    if (have_audio)
-        close_stream(oc, &audio_st);
+        /* encode the converted frames and mux the encoded packets */
+        if ((ret = encode_frame(enc_ctx, frame_to_encode, out_pkt)) == 0) {
+            if ((ret = mux_encoded_pkt(out_pkt, out_fmt_ctx, media_type)) < 0)
+               goto end;
+        }
 
-    if (!(fmt->flags & AVFMT_NOFILE))
-        /* Close the output file. */
-        avio_closep(&oc->pb);
+        /* check if the encoders have been fully flushed */
+        process_audio &= !((ret == AVERROR_EOF) && (media_type == AVMEDIA_TYPE_AUDIO));
+        process_video &= !((ret == AVERROR_EOF) && (media_type == AVMEDIA_TYPE_VIDEO));
 
-    /* free the stream */
-    avformat_free_context(oc);
+    }
 
-    return 0;
+    av_write_trailer(out_fmt_ctx);
+    ret = 0;
+
+end:
+
+    avcodec_free_context(&audio_enc_ctx);
+    avcodec_free_context(&video_enc_ctx);
+    av_frame_free(&in_audio_frame);
+    av_frame_free(&in_video_frame);
+    av_frame_free(&converted_audio_frame);
+    av_frame_free(&converted_video_frame);
+    swr_free(&audio_convert_ctx);
+    sws_freeContext(video_convert_ctx);
+    if (out_fmt_ctx)
+        avio_closep(&out_fmt_ctx->pb);
+    avformat_free_context(out_fmt_ctx);
+    av_packet_free(&out_pkt);
+
+    return ret;
 }
-- 
2.32.0