diff mbox series

[FFmpeg-devel,16/23] avcodec/hevc/refs: export Stereo 3D side data

Message ID 20240914111036.17164-17-anton@khirnov.net
State New
Headers show
Series [FFmpeg-devel,01/23] compat: drop gcc, suncc, and pthreads stdatomic emulation | expand

Checks

Context Check Description
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Anton Khirnov Sept. 14, 2024, 10:45 a.m. UTC
From: James Almer <jamrial@gmail.com>

Use the 3D Reference Displays Info SEI message to link a view_id with
an eye.

Signed-off-by: James Almer <jamrial@gmail.com>
---
 libavcodec/hevc/hevcdec.c |  1 +
 libavcodec/hevc/refs.c    | 19 +++++++++++++++++++
 2 files changed, 20 insertions(+)

Comments

James Almer Sept. 14, 2024, 10:12 p.m. UTC | #1
On 9/14/2024 7:45 AM, Anton Khirnov wrote:
> From: James Almer <jamrial@gmail.com>
> 
> Use the 3D Reference Displays Info SEI message to link a view_id with
> an eye.
> 
> Signed-off-by: James Almer <jamrial@gmail.com>
> ---
>   libavcodec/hevc/hevcdec.c |  1 +
>   libavcodec/hevc/refs.c    | 19 +++++++++++++++++++
>   2 files changed, 20 insertions(+)
> 
> diff --git a/libavcodec/hevc/hevcdec.c b/libavcodec/hevc/hevcdec.c
> index 692f19e97e..b784b10bcf 100644
> --- a/libavcodec/hevc/hevcdec.c
> +++ b/libavcodec/hevc/hevcdec.c
> @@ -3968,6 +3968,7 @@ static int hevc_update_thread_context(AVCodecContext *dst,
>       s->sei.common.mastering_display    = s0->sei.common.mastering_display;
>       s->sei.common.content_light        = s0->sei.common.content_light;
>       s->sei.common.aom_film_grain       = s0->sei.common.aom_film_grain;
> +    s->sei.tdrdi                       = s0->sei.tdrdi;
>   
>       return 0;
>   }
> diff --git a/libavcodec/hevc/refs.c b/libavcodec/hevc/refs.c
> index b9b08ca416..ac1b07a308 100644
> --- a/libavcodec/hevc/refs.c
> +++ b/libavcodec/hevc/refs.c
> @@ -22,6 +22,7 @@
>    */
>   
>   #include "libavutil/mem.h"
> +#include "libavutil/stereo3d.h"
>   
>   #include "container_fifo.h"
>   #include "decode.h"
> @@ -94,6 +95,7 @@ static HEVCFrame *alloc_frame(HEVCContext *s, HEVCLayerContext *l)
>   
>           // add view ID side data if it's nontrivial
>           if (vps->nb_layers > 1 || view_id) {
> +            HEVCSEITDRDI *tdrdi = &s->sei.tdrdi;
>               AVFrameSideData *sd = av_frame_side_data_new(&frame->f->side_data,
>                                                            &frame->f->nb_side_data,
>                                                            AV_FRAME_DATA_VIEW_ID,
> @@ -101,6 +103,23 @@ static HEVCFrame *alloc_frame(HEVCContext *s, HEVCLayerContext *l)
>               if (!sd)
>                   goto fail;
>               *(int*)sd->data = view_id;
> +
> +            if (tdrdi->num_ref_displays) {
> +                AVStereo3D *stereo_3d;
> +
> +                av_frame_remove_side_data(frame->f, AV_FRAME_DATA_STEREO3D);

As this is now being called before ff_progress_frame_get_buffer() (is 
there a reason you wanted the view_id side data and this one applied to 
the frame before get_buffer()?), it became a no-op and any stereo 3d 
side data in the input packet will be appended to the frame, resulting 
in something like:

> [Parsed_showinfo_0 @ 00000281481551c0]   side data - View ID: view id: 0
> [Parsed_showinfo_0 @ 00000281481551c0]   side data - Stereo 3D: type - frame alternate, view - right, primary_eye - none
> [Parsed_showinfo_0 @ 00000281481551c0]   side data - Spherical Mapping: rectilinear
> [Parsed_showinfo_0 @ 00000281481551c0]   side data - Stereo 3D: type - unspecified, view - packed, primary_eye - none, baseline: 19240, horizontal_disparity_adjustment: 0.0200, horizontal_field_of_view: 63.400

We don't really want to lose the information that's coded in the 
container but not in the bitstream (Which happened in the previous 
version of the patch too), so we should instead amend the container 
level side data with the bitstream information.

Something like:

> diff --git a/libavcodec/hevc/refs.c b/libavcodec/hevc/refs.c
> index ac1b07a308..f4c2b18e83 100644
> --- a/libavcodec/hevc/refs.c
> +++ b/libavcodec/hevc/refs.c
> @@ -93,21 +93,32 @@ static HEVCFrame *alloc_frame(HEVCContext *s, HEVCLayerContext *l)
>          if (ret < 0)
>              return NULL;
> 
> +        if (!(s->layers_active_output & (1 << s->cur_layer)))
> +            frame->f->flags |= AV_FRAME_FLAG_DISCARD;
> +
> +        ret = ff_progress_frame_get_buffer(s->avctx, &frame->tf,
> +                                           AV_GET_BUFFER_FLAG_REF);
> +        if (ret < 0)
> +            return NULL;
> +
>          // add view ID side data if it's nontrivial
>          if (vps->nb_layers > 1 || view_id) {
>              HEVCSEITDRDI *tdrdi = &s->sei.tdrdi;
> -            AVFrameSideData *sd = av_frame_side_data_new(&frame->f->side_data,
> -                                                         &frame->f->nb_side_data,
> -                                                         AV_FRAME_DATA_VIEW_ID,
> -                                                         sizeof(int), 0);
> +            AVFrameSideData *sd;
> +
> +            av_frame_remove_side_data(frame->f, AV_FRAME_DATA_VIEW_ID);
> +            sd = av_frame_new_side_data(frame->f, AV_FRAME_DATA_VIEW_ID, sizeof(int));
>              if (!sd)
>                  goto fail;
>              *(int*)sd->data = view_id;
> 
>              if (tdrdi->num_ref_displays) {
> -                AVStereo3D *stereo_3d;
> +                AVStereo3D *stereo_3d = NULL;
> 
> -                av_frame_remove_side_data(frame->f, AV_FRAME_DATA_STEREO3D);
> +                sd = av_frame_get_side_data(frame->f, AV_FRAME_DATA_STEREO3D);
> +                if (sd)
> +                    stereo_3d = (AVStereo3D *)sd->data;
> +                else
>                  stereo_3d = av_stereo3d_create_side_data(frame->f);
>                  if (!stereo_3d)
>                      goto fail;
> @@ -122,14 +133,6 @@ static HEVCFrame *alloc_frame(HEVCContext *s, HEVCLayerContext *l)
>              }
>          }
> 
> -        if (!(s->layers_active_output & (1 << s->cur_layer)))
> -            frame->f->flags |= AV_FRAME_FLAG_DISCARD;
> -
> -        ret = ff_progress_frame_get_buffer(s->avctx, &frame->tf,
> -                                           AV_GET_BUFFER_FLAG_REF);
> -        if (ret < 0)
> -            return NULL;
> -
>          frame->rpl = ff_refstruct_allocz(s->pkt.nb_nals * sizeof(*frame->rpl));
>          if (!frame->rpl)
>              goto fail;

Which results in

> [Parsed_showinfo_0 @ 000001f776dc6020]   side data - Spherical Mapping: rectilinear
> [Parsed_showinfo_0 @ 000001f776dc6020]   side data - Stereo 3D: type - frame alternate, view - right, primary_eye - none, baseline: 19240, horizontal_disparity_adjustment: 0.0200, horizontal_field_of_view: 63.400
> [Parsed_showinfo_0 @ 000001f776dc6020]   side data - View ID: view id: 0
James Almer Sept. 15, 2024, 3:18 a.m. UTC | #2
On 9/14/2024 7:12 PM, James Almer wrote:
> On 9/14/2024 7:45 AM, Anton Khirnov wrote:
>> From: James Almer <jamrial@gmail.com>
>>
>> Use the 3D Reference Displays Info SEI message to link a view_id with
>> an eye.
>>
>> Signed-off-by: James Almer <jamrial@gmail.com>
>> ---
>>   libavcodec/hevc/hevcdec.c |  1 +
>>   libavcodec/hevc/refs.c    | 19 +++++++++++++++++++
>>   2 files changed, 20 insertions(+)
>>
>> diff --git a/libavcodec/hevc/hevcdec.c b/libavcodec/hevc/hevcdec.c
>> index 692f19e97e..b784b10bcf 100644
>> --- a/libavcodec/hevc/hevcdec.c
>> +++ b/libavcodec/hevc/hevcdec.c
>> @@ -3968,6 +3968,7 @@ static int 
>> hevc_update_thread_context(AVCodecContext *dst,
>>       s->sei.common.mastering_display    = s0- 
>> >sei.common.mastering_display;
>>       s->sei.common.content_light        = s0->sei.common.content_light;
>>       s->sei.common.aom_film_grain       = s0->sei.common.aom_film_grain;
>> +    s->sei.tdrdi                       = s0->sei.tdrdi;
>>       return 0;
>>   }
>> diff --git a/libavcodec/hevc/refs.c b/libavcodec/hevc/refs.c
>> index b9b08ca416..ac1b07a308 100644
>> --- a/libavcodec/hevc/refs.c
>> +++ b/libavcodec/hevc/refs.c
>> @@ -22,6 +22,7 @@
>>    */
>>   #include "libavutil/mem.h"
>> +#include "libavutil/stereo3d.h"
>>   #include "container_fifo.h"
>>   #include "decode.h"
>> @@ -94,6 +95,7 @@ static HEVCFrame *alloc_frame(HEVCContext *s, 
>> HEVCLayerContext *l)
>>           // add view ID side data if it's nontrivial
>>           if (vps->nb_layers > 1 || view_id) {
>> +            HEVCSEITDRDI *tdrdi = &s->sei.tdrdi;
>>               AVFrameSideData *sd = av_frame_side_data_new(&frame->f- 
>> >side_data,
>>                                                            &frame->f- 
>> >nb_side_data,
>>                                                            
>> AV_FRAME_DATA_VIEW_ID,
>> @@ -101,6 +103,23 @@ static HEVCFrame *alloc_frame(HEVCContext *s, 
>> HEVCLayerContext *l)
>>               if (!sd)
>>                   goto fail;
>>               *(int*)sd->data = view_id;
>> +
>> +            if (tdrdi->num_ref_displays) {
>> +                AVStereo3D *stereo_3d;
>> +
>> +                av_frame_remove_side_data(frame->f, 
>> AV_FRAME_DATA_STEREO3D);
> 
> As this is now being called before ff_progress_frame_get_buffer() (is 
> there a reason you wanted the view_id side data and this one applied to 
> the frame before get_buffer()?), it became a no-op and any stereo 3d 
> side data in the input packet will be appended to the frame, resulting 
> in something like:
> 
>> [Parsed_showinfo_0 @ 00000281481551c0]   side data - View ID: view id: 0
>> [Parsed_showinfo_0 @ 00000281481551c0]   side data - Stereo 3D: type - 
>> frame alternate, view - right, primary_eye - none
>> [Parsed_showinfo_0 @ 00000281481551c0]   side data - Spherical 
>> Mapping: rectilinear
>> [Parsed_showinfo_0 @ 00000281481551c0]   side data - Stereo 3D: type - 
>> unspecified, view - packed, primary_eye - none, baseline: 19240, 
>> horizontal_disparity_adjustment: 0.0200, horizontal_field_of_view: 63.400
> 
> We don't really want to lose the information that's coded in the 
> container but not in the bitstream (Which happened in the previous 
> version of the patch too), so we should instead amend the container 
> level side data with the bitstream information.
> 
> Something like:
> 
>> diff --git a/libavcodec/hevc/refs.c b/libavcodec/hevc/refs.c
>> index ac1b07a308..f4c2b18e83 100644
>> --- a/libavcodec/hevc/refs.c
>> +++ b/libavcodec/hevc/refs.c
>> @@ -93,21 +93,32 @@ static HEVCFrame *alloc_frame(HEVCContext *s, 
>> HEVCLayerContext *l)
>>          if (ret < 0)
>>              return NULL;
>>
>> +        if (!(s->layers_active_output & (1 << s->cur_layer)))
>> +            frame->f->flags |= AV_FRAME_FLAG_DISCARD;
>> +
>> +        ret = ff_progress_frame_get_buffer(s->avctx, &frame->tf,
>> +                                           AV_GET_BUFFER_FLAG_REF);
>> +        if (ret < 0)
>> +            return NULL;
>> +
>>          // add view ID side data if it's nontrivial
>>          if (vps->nb_layers > 1 || view_id) {
>>              HEVCSEITDRDI *tdrdi = &s->sei.tdrdi;
>> -            AVFrameSideData *sd = av_frame_side_data_new(&frame->f- 
>> >side_data,
>> -                                                         &frame->f- 
>> >nb_side_data,
>> -                                                         
>> AV_FRAME_DATA_VIEW_ID,
>> -                                                         sizeof(int), 
>> 0);
>> +            AVFrameSideData *sd;
>> +
>> +            av_frame_remove_side_data(frame->f, AV_FRAME_DATA_VIEW_ID);
>> +            sd = av_frame_new_side_data(frame->f, 
>> AV_FRAME_DATA_VIEW_ID, sizeof(int));
>>              if (!sd)
>>                  goto fail;
>>              *(int*)sd->data = view_id;
>>
>>              if (tdrdi->num_ref_displays) {
>> -                AVStereo3D *stereo_3d;
>> +                AVStereo3D *stereo_3d = NULL;
>>
>> -                av_frame_remove_side_data(frame->f, 
>> AV_FRAME_DATA_STEREO3D);
>> +                sd = av_frame_get_side_data(frame->f, 
>> AV_FRAME_DATA_STEREO3D);
>> +                if (sd)
>> +                    stereo_3d = (AVStereo3D *)sd->data;
>> +                else
>>                  stereo_3d = av_stereo3d_create_side_data(frame->f);
>>                  if (!stereo_3d)
>>                      goto fail;
>> @@ -122,14 +133,6 @@ static HEVCFrame *alloc_frame(HEVCContext *s, 
>> HEVCLayerContext *l)
>>              }
>>          }
>>
>> -        if (!(s->layers_active_output & (1 << s->cur_layer)))
>> -            frame->f->flags |= AV_FRAME_FLAG_DISCARD;
>> -
>> -        ret = ff_progress_frame_get_buffer(s->avctx, &frame->tf,
>> -                                           AV_GET_BUFFER_FLAG_REF);
>> -        if (ret < 0)
>> -            return NULL;
>> -
>>          frame->rpl = ff_refstruct_allocz(s->pkt.nb_nals * 
>> sizeof(*frame->rpl));
>>          if (!frame->rpl)
>>              goto fail;
> 
> Which results in
> 
>> [Parsed_showinfo_0 @ 000001f776dc6020]   side data - Spherical 
>> Mapping: rectilinear
>> [Parsed_showinfo_0 @ 000001f776dc6020]   side data - Stereo 3D: type - 
>> frame alternate, view - right, primary_eye - none, baseline: 19240, 
>> horizontal_disparity_adjustment: 0.0200, horizontal_field_of_view: 63.400
>> [Parsed_showinfo_0 @ 000001f776dc6020]   side data - View ID: view id: 0
> 

Turns out doing this results in the output muxer/encoder context having 
this as global side data, because enc_open() in ffmpeg_enc.c initializes 
avctx.decoded_side_data with the first frame. Which would maybe be nice 
if not for the fact you get "view - right" even if you output both.

It does not happen with your patch as is because the loop in enc_open() 
will use the last side data of a given type, and the second one for 
stereo 3d in this scenario is coincidentally the source container one.
Anton Khirnov Sept. 17, 2024, 11:53 a.m. UTC | #3
Quoting James Almer (2024-09-15 00:12:52)
> (is there a reason you wanted the view_id side data and this one
> applied to the frame before get_buffer()?),

So that the caller knows what view is get_buffer() called for.

> As this is now being called before ff_progress_frame_get_buffer() it
> became a no-op and any stereo 3d side data in the input packet will be
> appended to the frame, resulting in something like:
> 
> > [Parsed_showinfo_0 @ 00000281481551c0]   side data - View ID: view id: 0
> > [Parsed_showinfo_0 @ 00000281481551c0]   side data - Stereo 3D: type - frame alternate, view - right, primary_eye - none
> > [Parsed_showinfo_0 @ 00000281481551c0]   side data - Spherical Mapping: rectilinear
> > [Parsed_showinfo_0 @ 00000281481551c0]   side data - Stereo 3D: type - unspecified, view - packed, primary_eye - none, baseline: 19240, horizontal_disparity_adjustment: 0.0200, horizontal_field_of_view: 63.400
> 
> We don't really want to lose the information that's coded in the 
> container but not in the bitstream (Which happened in the previous 
> version of the patch too), so we should instead amend the container 
> level side data with the bitstream information.
> 
> Something like:

That results in this information NOT being available to caller in
get_buffer().

The least bad solution I see is have ff_decode_frame_props() merge
container information into existing side data, if set.
James Almer Sept. 17, 2024, 4:44 p.m. UTC | #4
On 9/17/2024 8:53 AM, Anton Khirnov wrote:
> Quoting James Almer (2024-09-15 00:12:52)
>> (is there a reason you wanted the view_id side data and this one
>> applied to the frame before get_buffer()?),
> 
> So that the caller knows what view is get_buffer() called for.
> 
>> As this is now being called before ff_progress_frame_get_buffer() it
>> became a no-op and any stereo 3d side data in the input packet will be
>> appended to the frame, resulting in something like:
>>
>>> [Parsed_showinfo_0 @ 00000281481551c0]   side data - View ID: view id: 0
>>> [Parsed_showinfo_0 @ 00000281481551c0]   side data - Stereo 3D: type - frame alternate, view - right, primary_eye - none
>>> [Parsed_showinfo_0 @ 00000281481551c0]   side data - Spherical Mapping: rectilinear
>>> [Parsed_showinfo_0 @ 00000281481551c0]   side data - Stereo 3D: type - unspecified, view - packed, primary_eye - none, baseline: 19240, horizontal_disparity_adjustment: 0.0200, horizontal_field_of_view: 63.400
>>
>> We don't really want to lose the information that's coded in the
>> container but not in the bitstream (Which happened in the previous
>> version of the patch too), so we should instead amend the container
>> level side data with the bitstream information.
>>
>> Something like:
> 
> That results in this information NOT being available to caller in
> get_buffer().
> 
> The least bad solution I see is have ff_decode_frame_props() merge
> container information into existing side data, if set.

We need to ensure two things:

- Frame side data as output by lavc should contain the stereo3d 
information from the decoder alongside any from avctx. This should be in 
a single frame side data entry.
- Output stream side data as passed to lavf by the CLI should contain 
information that applies to the entire stream, which includes all the 
views packets may contain. This obviously also applies to what is passed 
to the encoder beforehand.

The former can be done, like you said above, having 
ff_decode_frame_props() merge the side data. The latter however would 
require changes to the CLI so the output encoder is not blindly 
initialized with the side data from the first frame that comes out of 
lavfi, which for mv-hevc always reports one specific view (and frame 
alternate type).
The CLI would need to use the knowledge of what the user requested to 
the decoder (one view? all views?) to update the global side data that's 
passed to the encoder and ultimately makes it to the muxer.
Anton Khirnov Sept. 18, 2024, 9:53 a.m. UTC | #5
Quoting James Almer (2024-09-17 18:44:00)
> - Output stream side data as passed to lavf by the CLI should contain 
> information that applies to the entire stream, which includes all the 
> views packets may contain. This obviously also applies to what is passed 
> to the encoder beforehand.
> 
> The latter however would 
> require changes to the CLI so the output encoder is not blindly 
> initialized with the side data from the first frame that comes out of 
> lavfi, which for mv-hevc always reports one specific view (and frame 
> alternate type).
> The CLI would need to use the knowledge of what the user requested to 
> the decoder (one view? all views?) to update the global side data that's 
> passed to the encoder and ultimately makes it to the muxer.

I've always disliked this practice of treating first frame's side data
as global, but the big problem with changing it is that we'd need to add
global side data propagation to lavfi.

Or as a short-term workaround, I could have the decoder signal this
directly to encoder via opaque_ref, bypassing lavfi. The issue with that
would be that the user then cannot override this information inside
lavfi - e.g. if you have a complex filtergraph that puts the two views
side by side.
diff mbox series

Patch

diff --git a/libavcodec/hevc/hevcdec.c b/libavcodec/hevc/hevcdec.c
index 692f19e97e..b784b10bcf 100644
--- a/libavcodec/hevc/hevcdec.c
+++ b/libavcodec/hevc/hevcdec.c
@@ -3968,6 +3968,7 @@  static int hevc_update_thread_context(AVCodecContext *dst,
     s->sei.common.mastering_display    = s0->sei.common.mastering_display;
     s->sei.common.content_light        = s0->sei.common.content_light;
     s->sei.common.aom_film_grain       = s0->sei.common.aom_film_grain;
+    s->sei.tdrdi                       = s0->sei.tdrdi;
 
     return 0;
 }
diff --git a/libavcodec/hevc/refs.c b/libavcodec/hevc/refs.c
index b9b08ca416..ac1b07a308 100644
--- a/libavcodec/hevc/refs.c
+++ b/libavcodec/hevc/refs.c
@@ -22,6 +22,7 @@ 
  */
 
 #include "libavutil/mem.h"
+#include "libavutil/stereo3d.h"
 
 #include "container_fifo.h"
 #include "decode.h"
@@ -94,6 +95,7 @@  static HEVCFrame *alloc_frame(HEVCContext *s, HEVCLayerContext *l)
 
         // add view ID side data if it's nontrivial
         if (vps->nb_layers > 1 || view_id) {
+            HEVCSEITDRDI *tdrdi = &s->sei.tdrdi;
             AVFrameSideData *sd = av_frame_side_data_new(&frame->f->side_data,
                                                          &frame->f->nb_side_data,
                                                          AV_FRAME_DATA_VIEW_ID,
@@ -101,6 +103,23 @@  static HEVCFrame *alloc_frame(HEVCContext *s, HEVCLayerContext *l)
             if (!sd)
                 goto fail;
             *(int*)sd->data = view_id;
+
+            if (tdrdi->num_ref_displays) {
+                AVStereo3D *stereo_3d;
+
+                av_frame_remove_side_data(frame->f, AV_FRAME_DATA_STEREO3D);
+                stereo_3d = av_stereo3d_create_side_data(frame->f);
+                if (!stereo_3d)
+                    goto fail;
+
+                stereo_3d->type = AV_STEREO3D_FRAMESEQUENCE;
+                if (tdrdi->left_view_id[0] == view_id)
+                    stereo_3d->view = AV_STEREO3D_VIEW_LEFT;
+                else if (tdrdi->right_view_id[0] == view_id)
+                    stereo_3d->view = AV_STEREO3D_VIEW_RIGHT;
+                else
+                    stereo_3d->view = AV_STEREO3D_VIEW_UNSPEC;
+            }
         }
 
         if (!(s->layers_active_output & (1 << s->cur_layer)))