diff mbox series

[FFmpeg-devel,3/3] fftools/ffmpeg_demux: merge streams in a LCEVC stream group

Message ID 20240924144308.1196-3-jamrial@gmail.com
State New
Headers show
Series [FFmpeg-devel,1/3] avcodec: add an LCEVC merger bsf | expand

Checks

Context Check Description
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

James Almer Sept. 24, 2024, 2:43 p.m. UTC
Add the LCEVC data stream payloads as packet side data to the main video
stream, ensuring the former is always output by the demuxer even if not
used by the process.

Signed-off-by: James Almer <jamrial@gmail.com>
---
 configure              |   2 +-
 fftools/ffmpeg.h       |   6 +
 fftools/ffmpeg_demux.c | 296 ++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 300 insertions(+), 4 deletions(-)

Comments

Anton Khirnov Sept. 26, 2024, 9:54 a.m. UTC | #1
Quoting James Almer (2024-09-24 16:43:08)
> Add the LCEVC data stream payloads as packet side data to the main video
> stream, ensuring the former is always output by the demuxer even if not
> used by the process.
> 
> Signed-off-by: James Almer <jamrial@gmail.com>
> ---
>  configure              |   2 +-
>  fftools/ffmpeg.h       |   6 +
>  fftools/ffmpeg_demux.c | 296 ++++++++++++++++++++++++++++++++++++++++-
>  3 files changed, 300 insertions(+), 4 deletions(-)

Overall this patch seems like overgeneralizing from a single sample. Is
there a reason to think a generic stream group would need its own
bitstream filters? Or a "main stream for merged output"? It seems
cleaner to me to just make all of this explicitly LCEVC-specific and not
try to generalize until there actually is more than one user.

> 
> diff --git a/configure b/configure
> index d77a55b653..434c73776b 100755
> --- a/configure
> +++ b/configure
> @@ -4051,7 +4051,7 @@ ffmpeg_deps="avcodec avfilter avformat threads"
>  ffmpeg_select="aformat_filter anull_filter atrim_filter crop_filter
>                 format_filter hflip_filter null_filter rotate_filter
>                 transpose_filter trim_filter vflip_filter"
> -ffmpeg_suggest="ole32 psapi shell32"
> +ffmpeg_suggest="ole32 psapi shell32 lcevc_merge_bsf"
>  ffplay_deps="avcodec avformat avfilter swscale swresample sdl2"
>  ffplay_select="crop_filter transpose_filter hflip_filter vflip_filter rotate_filter"
>  ffplay_suggest="shell32 libplacebo vulkan"
> diff --git a/fftools/ffmpeg.h b/fftools/ffmpeg.h
> index 733d551fa4..f598f6a46f 100644
> --- a/fftools/ffmpeg.h
> +++ b/fftools/ffmpeg.h
> @@ -492,6 +492,12 @@ typedef struct InputFile {
>       * if new streams appear dynamically during demuxing */
>      InputStream    **streams;
>      int           nb_streams;
> +
> +    /**
> +     * stream groups that ffmpeg is aware of
> +     */
> +    struct InputStreamGroup **stream_groups;
> +    int             nb_stream_groups;

There seems to be no reason for this to be public, so it should not be.

>  } InputFile;
>  
>  enum forced_keyframes_const {
> diff --git a/fftools/ffmpeg_demux.c b/fftools/ffmpeg_demux.c
> index 108a4a94bf..6aa194e7ad 100644
> --- a/fftools/ffmpeg_demux.c
> +++ b/fftools/ffmpeg_demux.c
> @@ -40,6 +40,18 @@
>  
>  #include "libavformat/avformat.h"
>  
> +// Defined here until it's needed in other modules and moved to ffmpeg.h

"Local until/unless it needs to be public" applies to everything, not
just this struct.

> +typedef struct InputStreamGroup {
> +    const AVClass        *class;
> +
> +    /* parent source */
> +    struct InputFile     *file;
> +
> +    int                   index;
> +
> +    AVStreamGroup        *stg;
> +} InputStreamGroup;
> +
>  typedef struct DemuxStream {
>      InputStream              ist;
>  
> @@ -56,7 +68,7 @@ typedef struct DemuxStream {
>  #define DECODING_FOR_OST    1
>  #define DECODING_FOR_FILTER 2
>  
> -    /* true if stream data should be discarded */
> +    /* non-0 if stream data is not strictly used by an output */

This is backwards, and highly confusing. It should be a bitmask of "how
the stream is used".
diff mbox series

Patch

diff --git a/configure b/configure
index d77a55b653..434c73776b 100755
--- a/configure
+++ b/configure
@@ -4051,7 +4051,7 @@  ffmpeg_deps="avcodec avfilter avformat threads"
 ffmpeg_select="aformat_filter anull_filter atrim_filter crop_filter
                format_filter hflip_filter null_filter rotate_filter
                transpose_filter trim_filter vflip_filter"
-ffmpeg_suggest="ole32 psapi shell32"
+ffmpeg_suggest="ole32 psapi shell32 lcevc_merge_bsf"
 ffplay_deps="avcodec avformat avfilter swscale swresample sdl2"
 ffplay_select="crop_filter transpose_filter hflip_filter vflip_filter rotate_filter"
 ffplay_suggest="shell32 libplacebo vulkan"
diff --git a/fftools/ffmpeg.h b/fftools/ffmpeg.h
index 733d551fa4..f598f6a46f 100644
--- a/fftools/ffmpeg.h
+++ b/fftools/ffmpeg.h
@@ -492,6 +492,12 @@  typedef struct InputFile {
      * if new streams appear dynamically during demuxing */
     InputStream    **streams;
     int           nb_streams;
+
+    /**
+     * stream groups that ffmpeg is aware of
+     */
+    struct InputStreamGroup **stream_groups;
+    int             nb_stream_groups;
 } InputFile;
 
 enum forced_keyframes_const {
diff --git a/fftools/ffmpeg_demux.c b/fftools/ffmpeg_demux.c
index 108a4a94bf..6aa194e7ad 100644
--- a/fftools/ffmpeg_demux.c
+++ b/fftools/ffmpeg_demux.c
@@ -40,6 +40,18 @@ 
 
 #include "libavformat/avformat.h"
 
+// Defined here until it's needed in other modules and moved to ffmpeg.h
+typedef struct InputStreamGroup {
+    const AVClass        *class;
+
+    /* parent source */
+    struct InputFile     *file;
+
+    int                   index;
+
+    AVStreamGroup        *stg;
+} InputStreamGroup;
+
 typedef struct DemuxStream {
     InputStream              ist;
 
@@ -56,7 +68,7 @@  typedef struct DemuxStream {
 #define DECODING_FOR_OST    1
 #define DECODING_FOR_FILTER 2
 
-    /* true if stream data should be discarded */
+    /* non-0 if stream data is not strictly used by an output */
     int                      discard;
 
     // scheduler returned EOF for this stream
@@ -90,12 +102,29 @@  typedef struct DemuxStream {
 
     AVBSFContext            *bsf;
 
+    InputStreamGroup       **stream_groups;
+    int                      nb_stream_groups;
+
     /* number of packets successfully read for this stream */
     uint64_t                 nb_packets;
     // combined size of all the packets read
     uint64_t                 data_size;
 } DemuxStream;
 
+typedef struct DemuxStreamGroup {
+    InputStreamGroup         istg;
+
+    // main stream for merged output
+    InputStream             *stream;
+
+    // name used for logging
+    char                     log_name[32];
+
+    int                      discard;
+
+    AVBSFContext            *bsf;
+} DemuxStreamGroup;
+
 typedef struct Demuxer {
     InputFile             f;
 
@@ -142,13 +171,26 @@  typedef struct DemuxThreadContext {
     AVPacket *pkt_demux;
     // packet for reading from BSFs
     AVPacket *pkt_bsf;
+    AVPacket *pkt_group_bsf;
 } DemuxThreadContext;
 
+enum {
+    // Stream is to be discarded without further processing
+    DEMUXER_DISCARD = 1,
+    // Stream is to be discarded after being processed in some form
+    DEMUXER_FILTER = 2,
+};
+
 static DemuxStream *ds_from_ist(InputStream *ist)
 {
     return (DemuxStream*)ist;
 }
 
+static DemuxStreamGroup *dsg_from_istg(InputStreamGroup *istg)
+{
+    return (DemuxStreamGroup*)istg;
+}
+
 static Demuxer *demuxer_from_ifile(InputFile *f)
 {
     return (Demuxer*)f;
@@ -587,6 +629,17 @@  static int demux_send(Demuxer *d, DemuxThreadContext *dt, DemuxStream *ds,
     // pkt can be NULL only when flushing BSFs
     av_assert0(ds->bsf || pkt);
 
+    // the stream needs to be used by an output in some form
+    av_assert0(ds->discard != DEMUXER_DISCARD);
+
+    // create a reference for the packet to be filtered by group bsfs
+    if (pkt && ds->nb_stream_groups) {
+        av_packet_unref(dt->pkt_group_bsf);
+        ret = av_packet_ref(dt->pkt_group_bsf, pkt);
+        if (ret < 0)
+            return ret;
+    }
+
     // send heartbeat for sub2video streams
     if (d->pkt_heartbeat && pkt && pkt->pts != AV_NOPTS_VALUE) {
         for (int i = 0; i < f->nb_streams; i++) {
@@ -605,6 +658,26 @@  static int demux_send(Demuxer *d, DemuxThreadContext *dt, DemuxStream *ds,
         }
     }
 
+    for (int i = 0; i < ds->nb_stream_groups; i++) {
+        DemuxStreamGroup *dsg = dsg_from_istg(ds->stream_groups[i]);
+
+        // if the main stream is not used by an output, we don't want to filter
+        if (ds == ds_from_ist(dsg->stream) && ds->discard)
+            continue;
+
+        ret = demux_filter(d, dt, ds_from_ist(dsg->stream), dsg->bsf,
+                           pkt ? dt->pkt_group_bsf : NULL, dsg);
+        if (ret < 0)
+            return ret;
+
+        // TODO handle streams belonging to more than one Stream group
+        if (i == (ds->nb_stream_groups - 1) && ds == ds_from_ist(dsg->stream))
+            return 0;
+    }
+
+    if (ds->discard)
+        return 0;
+
     if (ds->bsf) {
         ret = demux_filter(d, dt, ds, ds->bsf, pkt, ds);
         if (ret < 0)
@@ -672,6 +745,7 @@  static void demux_thread_uninit(DemuxThreadContext *dt)
 {
     av_packet_free(&dt->pkt_demux);
     av_packet_free(&dt->pkt_bsf);
+    av_packet_free(&dt->pkt_group_bsf);
 
     memset(dt, 0, sizeof(*dt));
 }
@@ -688,6 +762,10 @@  static int demux_thread_init(DemuxThreadContext *dt)
     if (!dt->pkt_bsf)
         return AVERROR(ENOMEM);
 
+    dt->pkt_group_bsf = av_packet_alloc();
+    if (!dt->pkt_group_bsf)
+        return AVERROR(ENOMEM);
+
     return 0;
 }
 
@@ -760,11 +838,25 @@  static int input_thread(void *arg)
            dynamically in stream : we ignore them */
         ds = dt.pkt_demux->stream_index < f->nb_streams ?
              ds_from_ist(f->streams[dt.pkt_demux->stream_index]) : NULL;
-        if (!ds || ds->discard || ds->finished) {
+        if (!ds || ds->finished) {
             report_new_stream(d, dt.pkt_demux);
             av_packet_unref(dt.pkt_demux);
             continue;
         }
+        if (ds->discard == DEMUXER_DISCARD) {
+            int i;
+            /* is the stream known and unused, but still needed to handle a group? */
+            for (i = 0; i < ds->nb_stream_groups; i++)
+                if (!dsg_from_istg(ds->stream_groups[i])->discard)
+                    break;
+            /* like above, if it's a new stream, we ignore it */
+            if (i == ds->nb_stream_groups) {
+                report_new_stream(d, dt.pkt_demux);
+                av_packet_unref(dt.pkt_demux);
+                continue;
+            }
+            ds->discard = DEMUXER_FILTER;
+        }
 
         if (dt.pkt_demux->flags & AV_PKT_FLAG_CORRUPT) {
             av_log(d, exit_on_error ? AV_LOG_FATAL : AV_LOG_WARNING,
@@ -861,9 +953,25 @@  static void ist_free(InputStream **pist)
 
     av_bsf_free(&ds->bsf);
 
+    av_freep(&ds->stream_groups);
+
     av_freep(pist);
 }
 
+static void istg_free(InputStreamGroup **pistg)
+{
+    InputStreamGroup *istg = *pistg;
+    DemuxStreamGroup *dsg;
+
+    if (!istg)
+        return;
+    dsg = dsg_from_istg(istg);
+
+    av_bsf_free(&dsg->bsf);
+
+    av_freep(pistg);
+}
+
 void ifile_close(InputFile **pf)
 {
     InputFile *f = *pf;
@@ -878,6 +986,9 @@  void ifile_close(InputFile **pf)
     for (int i = 0; i < f->nb_streams; i++)
         ist_free(&f->streams[i]);
     av_freep(&f->streams);
+    for (int i = 0; i < f->nb_stream_groups; i++)
+        istg_free(&f->stream_groups[i]);
+    av_freep(&f->stream_groups);
 
     avformat_close_input(&f->ctx);
 
@@ -984,6 +1095,19 @@  static int ist_use(InputStream *ist, int decoding_needed,
                SCH_DSTREAM(d->f.index, ds->sch_idx_stream);
     }
 
+    // if this stream is the main one in any group, enable said group and
+    // all its streams, so lavf will return their packets
+    for (int i = 0; i < ds->nb_stream_groups; i++) {
+        DemuxStreamGroup *dsg = dsg_from_istg(ds->stream_groups[i]);
+        AVStreamGroup *stg = ds->stream_groups[i]->stg;
+
+        if (ist != dsg->stream)
+            continue;
+        for (int j = 0; j < stg->nb_streams; j++)
+            stg->streams[j]->discard = 0;
+        dsg->discard = 0;
+    }
+
     return 0;
 }
 
@@ -1269,7 +1393,7 @@  static int ist_add(const OptionsContext *o, Demuxer *d, AVStream *st, AVDictiona
 
     ist = &ds->ist;
 
-    ds->discard     = 1;
+    ds->discard     = DEMUXER_DISCARD;
     st->discard  = AVDISCARD_ALL;
     ds->first_dts   = AV_NOPTS_VALUE;
     ds->next_dts    = AV_NOPTS_VALUE;
@@ -1550,6 +1674,163 @@  static int ist_add(const OptionsContext *o, Demuxer *d, AVStream *st, AVDictiona
     return 0;
 }
 
+static const char *input_stream_group_item_name(void *obj)
+{
+    const DemuxStreamGroup *dsg = obj;
+
+    return dsg->log_name;
+}
+
+static const AVClass input_stream_group_class = {
+    .class_name = "InputStreamGroup",
+    .version    = LIBAVUTIL_VERSION_INT,
+    .item_name  = input_stream_group_item_name,
+    .category   = AV_CLASS_CATEGORY_DEMUXER,
+};
+
+static DemuxStreamGroup *demux_stream_group_alloc(Demuxer *d, AVStreamGroup *stg)
+{
+    InputFile    *f = &d->f;
+    DemuxStreamGroup *dsg;
+
+    dsg = allocate_array_elem(&f->stream_groups, sizeof(*dsg), &f->nb_stream_groups);
+    if (!dsg)
+        return NULL;
+
+    dsg->istg.stg       = stg;
+    dsg->istg.file      = f;
+    dsg->istg.index     = stg->index;
+    dsg->istg.class     = &input_stream_group_class;
+
+    snprintf(dsg->log_name, sizeof(dsg->log_name), "istg#%d:%d/%s",
+             d->f.index, stg->index, avformat_stream_group_name(stg->type));
+
+    return dsg;
+}
+
+static int istg_set_bidirectional_references(Demuxer *d, DemuxStreamGroup *dsg)
+{
+    InputFile    *f = &d->f;
+    AVStreamGroup *stg = dsg->istg.stg;
+
+    /* since the API lets us know what streams belong to a given group, but
+     * not what groups a given stream is part of, add a pointer to the
+     * DemuxStreamGroup to all relevant DemuxStream structs for this purpose */
+    for (int i = 0; i < stg->nb_streams; i++) {
+        DemuxStreamGroup **dsg1;
+        DemuxStream *ds;
+        int j;
+
+        for (j = 0; j < f->nb_streams; j++)
+            if (stg->streams[i] == f->streams[j]->st)
+                break;
+
+        if (j == f->nb_streams)
+            return AVERROR_BUG;
+
+        ds = ds_from_ist(f->streams[j]);
+        dsg1 = av_dynarray2_add((void **)&ds->stream_groups, &ds->nb_stream_groups,
+                                sizeof(*dsg1), NULL);
+        if (!dsg1)
+            return AVERROR(ENOMEM);
+
+        *dsg1 = dsg;
+    }
+
+    return 0;
+}
+
+static int istg_add_lcevc_group(Demuxer *d, AVStreamGroup *stg)
+{
+    InputFile    *f = &d->f;
+    const AVBitStreamFilter *filter;
+    DemuxStreamGroup *dsg;
+    int base_idx = -1, enhancement_idx;
+    int ret;
+
+    filter = av_bsf_get_by_name("lcevc_merge");
+    if (!filter)
+        return AVERROR(ENOSYS);
+
+    if (stg->nb_streams != 2) {
+        av_log(d, AV_LOG_WARNING, "LCEVC stream groups with more than 2 streams "
+                                  "are not supported.\n");
+        return AVERROR_PATCHWELCOME;
+    }
+
+    dsg = demux_stream_group_alloc(d, stg);
+    if (!dsg)
+        return AVERROR(ENOMEM);
+
+    // set the main stream for the group
+    for (int i = 0; i < stg->nb_streams; i++) {
+        const AVStreamGroupLCEVC *lcevc = stg->params.lcevc;
+        int j;
+
+        if (i == lcevc->lcevc_index) {
+            enhancement_idx = stg->streams[lcevc->lcevc_index]->index;
+            continue;
+        }
+
+        for (j = 0; j < f->nb_streams; j++)
+            if (stg->streams[i] == f->streams[j]->st)
+                break;
+
+        if (j == f->nb_streams)
+            return AVERROR_BUG;
+
+        if (stg->streams[i]->codecpar->codec_type != AVMEDIA_TYPE_VIDEO || base_idx >= 0)
+            return AVERROR_BUG;
+
+        dsg->stream = f->streams[j];
+        base_idx = f->streams[j]->st->index;
+    }
+
+    ret = av_bsf_alloc(filter, &dsg->bsf);
+    if (ret < 0)
+        return ret;
+
+    av_opt_set_int(dsg->bsf->priv_data, "base_idx", base_idx, 0);
+    av_opt_set_int(dsg->bsf->priv_data, "enhancement_idx", enhancement_idx, 0);
+
+    dsg->bsf->time_base_in = stg->streams[base_idx]->time_base;
+
+    ret = av_bsf_init(dsg->bsf);
+    if (ret < 0)
+        return ret;
+
+    return 0;
+}
+
+static int istg_add(Demuxer *d, AVStreamGroup *stg)
+{
+    InputFile    *f = &d->f;
+    DemuxStreamGroup *dsg;
+    int ret = AVERROR_BUG;
+
+    switch (stg->type) {
+    case AV_STREAM_GROUP_PARAMS_LCEVC:
+        ret = istg_add_lcevc_group(d, stg);
+        if (ret == AVERROR(ENOSYS))
+            return 0;
+        break;
+    default:
+        return 0;
+    }
+
+    if (ret < 0)
+        return ret;
+
+    dsg = dsg_from_istg(f->stream_groups[f->nb_stream_groups - 1]);
+    ret = istg_set_bidirectional_references(d, dsg);
+    if (ret < 0)
+        return ret;
+
+    dsg->discard = 1;
+
+    return 0;
+}
+
 static int dump_attachment(InputStream *ist, const char *filename)
 {
     AVStream *st = ist->st;
@@ -1904,6 +2185,15 @@  int ifile_open(const OptionsContext *o, const char *filename, Scheduler *sch)
         }
     }
 
+    /* Add all the stream groups from the given input file to the demuxer */
+    for (int i = 0; i < ic->nb_stream_groups; i++) {
+        ret = istg_add(d, ic->stream_groups[i]);
+        if (ret < 0) {
+            av_dict_free(&opts_used);
+            return ret;
+        }
+    }
+
     /* dump the file content */
     av_dump_format(ic, f->index, filename, 0);