diff mbox series

[FFmpeg-devel,v2,2/5] avutil/stereo3d: Fill out stereo info provided by Vision Pro files

Message ID 20240617134141.224614-3-derek.buitenhuis@gmail.com
State New
Headers show
Series Vision Pro Spatial Data | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Derek Buitenhuis June 17, 2024, 1:41 p.m. UTC
Based on what is in the files themselves, and what the API provides
to users.

URLs:
  * https://developer.apple.com/documentation/videotoolbox/kvtcompressionpropertykey_heroeye
  * https://developer.apple.com/documentation/videotoolbox/kvtcompressionpropertykey_stereocamerabaseline
  * https://developer.apple.com/documentation/videotoolbox/kvtcompressionpropertykey_horizontaldisparityadjustment
  * https://developer.apple.com/documentation/coremedia/kcmformatdescriptionextension_horizontalfieldofview

Signed-off-by: Derek Buitenhuis <derek.buitenhuis@gmail.com>
---
 libavutil/stereo3d.c | 52 +++++++++++++++++++++++++++++
 libavutil/stereo3d.h | 78 ++++++++++++++++++++++++++++++++++++++++++++
 libavutil/version.h  |  2 +-
 3 files changed, 131 insertions(+), 1 deletion(-)

Comments

James Almer June 17, 2024, 4:53 p.m. UTC | #1
On 6/17/2024 10:41 AM, Derek Buitenhuis wrote:
> Based on what is in the files themselves, and what the API provides
> to users.
> 
> URLs:
>    * https://developer.apple.com/documentation/videotoolbox/kvtcompressionpropertykey_heroeye
>    * https://developer.apple.com/documentation/videotoolbox/kvtcompressionpropertykey_stereocamerabaseline
>    * https://developer.apple.com/documentation/videotoolbox/kvtcompressionpropertykey_horizontaldisparityadjustment
>    * https://developer.apple.com/documentation/coremedia/kcmformatdescriptionextension_horizontalfieldofview
> 
> Signed-off-by: Derek Buitenhuis <derek.buitenhuis@gmail.com>
> ---
>   libavutil/stereo3d.c | 52 +++++++++++++++++++++++++++++
>   libavutil/stereo3d.h | 78 ++++++++++++++++++++++++++++++++++++++++++++
>   libavutil/version.h  |  2 +-
>   3 files changed, 131 insertions(+), 1 deletion(-)
> 
> diff --git a/libavutil/stereo3d.c b/libavutil/stereo3d.c
> index 9c29ab01b5..a40a9439bb 100644
> --- a/libavutil/stereo3d.c
> +++ b/libavutil/stereo3d.c
> @@ -55,6 +55,18 @@ static const char * const stereo3d_type_names[] = {
>       [AV_STEREO3D_COLUMNS]             = "interleaved columns",
>   };
>   
> +static const char * const stereo3d_view_names[] = {
> +    [AV_STEREO3D_VIEW_PACKED] = "packed",
> +    [AV_STEREO3D_VIEW_LEFT]   = "left",
> +    [AV_STEREO3D_VIEW_RIGHT]  = "right",
> +};
> +
> +static const char * const stereo3d_primary_eye_names[] = {
> +    [AV_PRIMARY_EYE_NONE]  = "none",
> +    [AV_PRIMARY_EYE_LEFT]  = "left",
> +    [AV_PRIMARY_EYE_RIGHT] = "right",
> +};
> +
>   const char *av_stereo3d_type_name(unsigned int type)
>   {
>       if (type >= FF_ARRAY_ELEMS(stereo3d_type_names))
> @@ -74,3 +86,43 @@ int av_stereo3d_from_name(const char *name)
>   
>       return -1;
>   }
> +
> +const char *av_stereo3d_view_name(unsigned int view)
> +{
> +    if (view >= FF_ARRAY_ELEMS(stereo3d_view_names))
> +        return "unknown";
> +
> +    return stereo3d_view_names[view];
> +}
> +
> +int av_stereo3d_view_from_name(const char *name)
> +{
> +    int i;
> +
> +    for (i = 0; i < FF_ARRAY_ELEMS(stereo3d_view_names); i++) {
> +        if (av_strstart(name, stereo3d_view_names[i], NULL))
> +            return i;
> +    }
> +
> +    return -1;
> +}
> +
> +const char *av_stereo3d_primary_eye_name(unsigned int eye)
> +{
> +    if (eye >= FF_ARRAY_ELEMS(stereo3d_primary_eye_names))
> +        return "unknown";
> +
> +    return stereo3d_primary_eye_names[eye];
> +}
> +
> +int av_stereo3d_primary_eye_from_name(const char *name)
> +{
> +    int i;
> +
> +    for (i = 0; i < FF_ARRAY_ELEMS(stereo3d_primary_eye_names); i++) {
> +        if (av_strstart(name, stereo3d_primary_eye_names[i], NULL))
> +            return i;
> +    }
> +
> +    return -1;
> +}
> diff --git a/libavutil/stereo3d.h b/libavutil/stereo3d.h
> index 3aab959b79..d35e46e670 100644
> --- a/libavutil/stereo3d.h
> +++ b/libavutil/stereo3d.h
> @@ -158,6 +158,26 @@ enum AVStereo3DView {
>       AV_STEREO3D_VIEW_RIGHT,
>   };
>   
> +/**
> + * List of possible primary eyes.
> + */
> +enum AVStereo3DPrimaryEye {
> +    /**
> +     * Neither eye.
> +     */
> +    AV_PRIMARY_EYE_NONE,
> +
> +    /**
> +     * Left eye.
> +     */
> +    AV_PRIMARY_EYE_LEFT,
> +
> +    /**
> +     * Right eye
> +     */
> +    AV_PRIMARY_EYE_RIGHT,
> +};
> +
>   /**
>    * Inverted views, Right/Bottom represents the left view.
>    */
> @@ -185,6 +205,28 @@ typedef struct AVStereo3D {
>        * Determines which views are packed.
>        */
>       enum AVStereo3DView view;
> +
> +    /**
> +     * Which eye is the primary eye when rendering in 2D.
> +     */
> +    enum AVStereo3DPrimaryEye primary_eye;
> +
> +    /**
> +     * The distance between the centres of the lenses of the camera system,
> +     * in micrometers. Zero if unset.
> +     */
> +    uint32_t baseline;
> +
> +    /**
> +     * Relative shift of the left and right images, which changes the zero parallax plane.
> +     * Range -10000 to 10000, mapped to -1.0 to 1.0. Zero if unset.

Maybe this should be an AVRational then.

> +     */
> +    int32_t horizontal_disparity_adjustment;
> +
> +    /**
> +     * Horizontal field of view in thousanths of a degree. Zero if unset.
> +     */
> +    uint32_t horizontal_field_of_view;
>   } AVStereo3D;
>   
>   /**
> @@ -222,6 +264,42 @@ const char *av_stereo3d_type_name(unsigned int type);
>    */
>   int av_stereo3d_from_name(const char *name);
>   
> +/**
> + * Provide a human-readable name of a given stereo3d view.
> + *
> + * @param type The input stereo3d view value.
> + *
> + * @return The name of the stereo3d view value, or "unknown".
> + */
> +const char *av_stereo3d_view_name(unsigned int view);
> +
> +/**
> + * Get the AVStereo3DView form a human-readable name.
> + *
> + * @param name The input string.
> + *
> + * @return The AVStereo3DView value, or -1 if not found.
> + */
> +int av_stereo3d_view_from_name(const char *name);
> +
> +/**
> + * Provide a human-readable name of a given stereo3d primary eye.
> + *
> + * @param type The input stereo3d primary eye value.
> + *
> + * @return The name of the stereo3d primary eye value, or "unknown".
> + */
> +const char *av_stereo3d_primary_eye_name(unsigned int eye);
> +
> +/**
> + * Get the AVStereo3DPrimaryEye form a human-readable name.
> + *
> + * @param name The input string.
> + *
> + * @return The AVStereo3DPrimaryEye value, or -1 if not found.
> + */
> +int av_stereo3d_primary_eye_from_name(const char *name);
> +
>   /**
>    * @}
>    */
> diff --git a/libavutil/version.h b/libavutil/version.h
> index 7df546ee22..8044fd3935 100644
> --- a/libavutil/version.h
> +++ b/libavutil/version.h
> @@ -79,7 +79,7 @@
>    */
>   
>   #define LIBAVUTIL_VERSION_MAJOR  59
> -#define LIBAVUTIL_VERSION_MINOR  23
> +#define LIBAVUTIL_VERSION_MINOR  24
>   #define LIBAVUTIL_VERSION_MICRO 100
>   
>   #define LIBAVUTIL_VERSION_INT   AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \
Derek Buitenhuis June 17, 2024, 5:07 p.m. UTC | #2
On 6/17/2024 5:53 PM, James Almer wrote:
> Maybe this should be an AVRational then.

While that is probably 'more correct', it does mean that in 100% places
this could be used, it'll have to be converted back to the -10000 to 10000
range. Is there a simple way to do that with an AVRational that doesn't
involve a round trip to a double or float (i.e. lossy)?

- Derek
James Almer June 17, 2024, 6:09 p.m. UTC | #3
On 6/17/2024 2:07 PM, Derek Buitenhuis wrote:
> On 6/17/2024 5:53 PM, James Almer wrote:
>> Maybe this should be an AVRational then.
> 
> While that is probably 'more correct', it does mean that in 100% places
> this could be used, it'll have to be converted back to the -10000 to 10000
> range. Is there a simple way to do that with an AVRational that doesn't
> involve a round trip to a double or float (i.e. lossy)?

No, it's av_d2q(), av_q2d(), and av_rescale() as needed. Same as we do 
for Mastering Display and Ambient Viewing Environment Metadata.
The reason to use AVRational is that in this specific spec the values 
have a denominator of 10000, but in others it doesn't need to, allowing 
for more precise values (Matroska would store it as a double, in fact).

So we shouldn't define our API for one specific implementation but 
rather in a generic way that should accommodate to any potential 
implementation. I think we already did the former with a Google 
implementation (x.y fixed point values), and i want to avoid doing it again.

> 
> - Derek
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
Derek Buitenhuis June 17, 2024, 7:02 p.m. UTC | #4
On 6/17/2024 7:09 PM, James Almer wrote:
> No, it's av_d2q(), av_q2d(), and av_rescale() as needed. Same as we do 
> for Mastering Display and Ambient Viewing Environment Metadata.
> The reason to use AVRational is that in this specific spec the values 
> have a denominator of 10000, but in others it doesn't need to, allowing 
> for more precise values (Matroska would store it as a double, in fact).

This is unfortunate. Possibly we should add some util func for this case,
as it's a case I know I've personally hit more than once (with bugs caused
by lossy roundtrip) in my own code - I ended up manually using num/den in
the end.

> So we shouldn't define our API for one specific implementation but 
> rather in a generic way that should accommodate to any potential 
> implementation. I think we already did the former with a Google 
> implementation (x.y fixed point values), and i want to avoid doing it again.

Will send a v3 set using AVRational, then.

- Derek
diff mbox series

Patch

diff --git a/libavutil/stereo3d.c b/libavutil/stereo3d.c
index 9c29ab01b5..a40a9439bb 100644
--- a/libavutil/stereo3d.c
+++ b/libavutil/stereo3d.c
@@ -55,6 +55,18 @@  static const char * const stereo3d_type_names[] = {
     [AV_STEREO3D_COLUMNS]             = "interleaved columns",
 };
 
+static const char * const stereo3d_view_names[] = {
+    [AV_STEREO3D_VIEW_PACKED] = "packed",
+    [AV_STEREO3D_VIEW_LEFT]   = "left",
+    [AV_STEREO3D_VIEW_RIGHT]  = "right",
+};
+
+static const char * const stereo3d_primary_eye_names[] = {
+    [AV_PRIMARY_EYE_NONE]  = "none",
+    [AV_PRIMARY_EYE_LEFT]  = "left",
+    [AV_PRIMARY_EYE_RIGHT] = "right",
+};
+
 const char *av_stereo3d_type_name(unsigned int type)
 {
     if (type >= FF_ARRAY_ELEMS(stereo3d_type_names))
@@ -74,3 +86,43 @@  int av_stereo3d_from_name(const char *name)
 
     return -1;
 }
+
+const char *av_stereo3d_view_name(unsigned int view)
+{
+    if (view >= FF_ARRAY_ELEMS(stereo3d_view_names))
+        return "unknown";
+
+    return stereo3d_view_names[view];
+}
+
+int av_stereo3d_view_from_name(const char *name)
+{
+    int i;
+
+    for (i = 0; i < FF_ARRAY_ELEMS(stereo3d_view_names); i++) {
+        if (av_strstart(name, stereo3d_view_names[i], NULL))
+            return i;
+    }
+
+    return -1;
+}
+
+const char *av_stereo3d_primary_eye_name(unsigned int eye)
+{
+    if (eye >= FF_ARRAY_ELEMS(stereo3d_primary_eye_names))
+        return "unknown";
+
+    return stereo3d_primary_eye_names[eye];
+}
+
+int av_stereo3d_primary_eye_from_name(const char *name)
+{
+    int i;
+
+    for (i = 0; i < FF_ARRAY_ELEMS(stereo3d_primary_eye_names); i++) {
+        if (av_strstart(name, stereo3d_primary_eye_names[i], NULL))
+            return i;
+    }
+
+    return -1;
+}
diff --git a/libavutil/stereo3d.h b/libavutil/stereo3d.h
index 3aab959b79..d35e46e670 100644
--- a/libavutil/stereo3d.h
+++ b/libavutil/stereo3d.h
@@ -158,6 +158,26 @@  enum AVStereo3DView {
     AV_STEREO3D_VIEW_RIGHT,
 };
 
+/**
+ * List of possible primary eyes.
+ */
+enum AVStereo3DPrimaryEye {
+    /**
+     * Neither eye.
+     */
+    AV_PRIMARY_EYE_NONE,
+
+    /**
+     * Left eye.
+     */
+    AV_PRIMARY_EYE_LEFT,
+
+    /**
+     * Right eye
+     */
+    AV_PRIMARY_EYE_RIGHT,
+};
+
 /**
  * Inverted views, Right/Bottom represents the left view.
  */
@@ -185,6 +205,28 @@  typedef struct AVStereo3D {
      * Determines which views are packed.
      */
     enum AVStereo3DView view;
+
+    /**
+     * Which eye is the primary eye when rendering in 2D.
+     */
+    enum AVStereo3DPrimaryEye primary_eye;
+
+    /**
+     * The distance between the centres of the lenses of the camera system,
+     * in micrometers. Zero if unset.
+     */
+    uint32_t baseline;
+
+    /**
+     * Relative shift of the left and right images, which changes the zero parallax plane.
+     * Range -10000 to 10000, mapped to -1.0 to 1.0. Zero if unset.
+     */
+    int32_t horizontal_disparity_adjustment;
+
+    /**
+     * Horizontal field of view in thousanths of a degree. Zero if unset.
+     */
+    uint32_t horizontal_field_of_view;
 } AVStereo3D;
 
 /**
@@ -222,6 +264,42 @@  const char *av_stereo3d_type_name(unsigned int type);
  */
 int av_stereo3d_from_name(const char *name);
 
+/**
+ * Provide a human-readable name of a given stereo3d view.
+ *
+ * @param type The input stereo3d view value.
+ *
+ * @return The name of the stereo3d view value, or "unknown".
+ */
+const char *av_stereo3d_view_name(unsigned int view);
+
+/**
+ * Get the AVStereo3DView form a human-readable name.
+ *
+ * @param name The input string.
+ *
+ * @return The AVStereo3DView value, or -1 if not found.
+ */
+int av_stereo3d_view_from_name(const char *name);
+
+/**
+ * Provide a human-readable name of a given stereo3d primary eye.
+ *
+ * @param type The input stereo3d primary eye value.
+ *
+ * @return The name of the stereo3d primary eye value, or "unknown".
+ */
+const char *av_stereo3d_primary_eye_name(unsigned int eye);
+
+/**
+ * Get the AVStereo3DPrimaryEye form a human-readable name.
+ *
+ * @param name The input string.
+ *
+ * @return The AVStereo3DPrimaryEye value, or -1 if not found.
+ */
+int av_stereo3d_primary_eye_from_name(const char *name);
+
 /**
  * @}
  */
diff --git a/libavutil/version.h b/libavutil/version.h
index 7df546ee22..8044fd3935 100644
--- a/libavutil/version.h
+++ b/libavutil/version.h
@@ -79,7 +79,7 @@ 
  */
 
 #define LIBAVUTIL_VERSION_MAJOR  59
-#define LIBAVUTIL_VERSION_MINOR  23
+#define LIBAVUTIL_VERSION_MINOR  24
 #define LIBAVUTIL_VERSION_MICRO 100
 
 #define LIBAVUTIL_VERSION_INT   AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \