diff mbox

[FFmpeg-devel,3/3] mov: Export spherical information

Message ID 20161111224902.83581-3-vittorio.giovara@gmail.com
State Superseded
Headers show

Commit Message

Vittorio Giovara Nov. 11, 2016, 10:49 p.m. UTC
This implements Spherical Video V1 and V2, as described in the
spatial-media collection by Google.

Signed-off-by: Vittorio Giovara <vittorio.giovara@gmail.com>
---
Please CC.
Vittorio

 libavformat/isom.h |   7 ++
 libavformat/mov.c  | 281 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 287 insertions(+), 1 deletion(-)

Comments

James Almer Nov. 11, 2016, 11:23 p.m. UTC | #1
On 11/11/2016 7:49 PM, Vittorio Giovara wrote:
> This implements Spherical Video V1 and V2, as described in the
> spatial-media collection by Google.
> 
> Signed-off-by: Vittorio Giovara <vittorio.giovara@gmail.com>
> ---
> Please CC.
> Vittorio
> 
>  libavformat/isom.h |   7 ++
>  libavformat/mov.c  | 281 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
>  2 files changed, 287 insertions(+), 1 deletion(-)

[...]

> @@ -5682,6 +5927,40 @@ static int mov_read_header(AVFormatContext *s)
>                  sd->data = (uint8_t*)sc->display_matrix;
>                  sc->display_matrix = NULL;
>              }
> +            if (sc->stereo3d) {
> +                AVPacketSideData *sd, *tmp;
> +
> +                tmp = av_realloc_array(st->side_data,
> +                                       st->nb_side_data + 1, sizeof(*tmp));
> +                if (!tmp)
> +                    return AVERROR(ENOMEM);
> +
> +                st->side_data = tmp;
> +                st->nb_side_data++;
> +
> +                sd = &st->side_data[st->nb_side_data - 1];
> +                sd->type = AV_PKT_DATA_STEREO3D;
> +                sd->size = sizeof(*sc->stereo3d);
> +                sd->data = (uint8_t *)sc->stereo3d;
> +                sc->stereo3d = NULL;
> +            }
> +            if (sc->spherical) {
> +                AVPacketSideData *sd, *tmp;
> +
> +                tmp = av_realloc_array(st->side_data,
> +                                       st->nb_side_data + 1, sizeof(*tmp));
> +                if (!tmp)
> +                    return AVERROR(ENOMEM);
> +
> +                st->side_data = tmp;
> +                st->nb_side_data++;
> +
> +                sd = &st->side_data[st->nb_side_data - 1];
> +                sd->type = AV_PKT_DATA_SPHERICAL;
> +                sd->size = sc->spherical_size;
> +                sd->data = (uint8_t *)sc->spherical;
> +                sc->spherical = NULL;
> +            }

Why isn't this using av_stream_new_side_data()?

The question also goes for the display_matrix case already in the tree.

>              break;
>          }
>      }
>
Vittorio Giovara Nov. 12, 2016, 5:32 p.m. UTC | #2
On Fri, Nov 11, 2016 at 6:23 PM, James Almer <jamrial@gmail.com> wrote:
> On 11/11/2016 7:49 PM, Vittorio Giovara wrote:
>> This implements Spherical Video V1 and V2, as described in the
>> spatial-media collection by Google.
>>
>> Signed-off-by: Vittorio Giovara <vittorio.giovara@gmail.com>
>> ---
>> Please CC.
>> Vittorio
>>
>>  libavformat/isom.h |   7 ++
>>  libavformat/mov.c  | 281 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
>>  2 files changed, 287 insertions(+), 1 deletion(-)
>
> [...]
>
>> @@ -5682,6 +5927,40 @@ static int mov_read_header(AVFormatContext *s)
>>                  sd->data = (uint8_t*)sc->display_matrix;
>>                  sc->display_matrix = NULL;
>>              }
>> +            if (sc->stereo3d) {
>> +                AVPacketSideData *sd, *tmp;
>> +
>> +                tmp = av_realloc_array(st->side_data,
>> +                                       st->nb_side_data + 1, sizeof(*tmp));
>> +                if (!tmp)
>> +                    return AVERROR(ENOMEM);
>> +
>> +                st->side_data = tmp;
>> +                st->nb_side_data++;
>> +
>> +                sd = &st->side_data[st->nb_side_data - 1];
>> +                sd->type = AV_PKT_DATA_STEREO3D;
>> +                sd->size = sizeof(*sc->stereo3d);
>> +                sd->data = (uint8_t *)sc->stereo3d;
>> +                sc->stereo3d = NULL;
>> +            }
>> +            if (sc->spherical) {
>> +                AVPacketSideData *sd, *tmp;
>> +
>> +                tmp = av_realloc_array(st->side_data,
>> +                                       st->nb_side_data + 1, sizeof(*tmp));
>> +                if (!tmp)
>> +                    return AVERROR(ENOMEM);
>> +
>> +                st->side_data = tmp;
>> +                st->nb_side_data++;
>> +
>> +                sd = &st->side_data[st->nb_side_data - 1];
>> +                sd->type = AV_PKT_DATA_SPHERICAL;
>> +                sd->size = sc->spherical_size;
>> +                sd->data = (uint8_t *)sc->spherical;
>> +                sc->spherical = NULL;
>> +            }
>
> Why isn't this using av_stream_new_side_data()?

I didn't want to mix refactors and new code in a single patch,
coalescing that portion of code may be done later.
diff mbox

Patch

diff --git a/libavformat/isom.h b/libavformat/isom.h
index 02bfedd..0fd9eb0 100644
--- a/libavformat/isom.h
+++ b/libavformat/isom.h
@@ -24,6 +24,9 @@ 
 #ifndef AVFORMAT_ISOM_H
 #define AVFORMAT_ISOM_H
 
+#include "libavutil/spherical.h"
+#include "libavutil/stereo3d.h"
+
 #include "avio.h"
 #include "internal.h"
 #include "dv.h"
@@ -177,6 +180,10 @@  typedef struct MOVStreamContext {
     int stsd_count;
 
     int32_t *display_matrix;
+    AVStereo3D *stereo3d;
+    AVSphericalMapping *spherical;
+    size_t spherical_size;
+
     uint32_t format;
 
     int has_sidx;  // If there is an sidx entry for this stream.
diff --git a/libavformat/mov.c b/libavformat/mov.c
index cb3c61c..d36c935 100644
--- a/libavformat/mov.c
+++ b/libavformat/mov.c
@@ -42,6 +42,8 @@ 
 #include "libavutil/aes.h"
 #include "libavutil/aes_ctr.h"
 #include "libavutil/sha.h"
+#include "libavutil/spherical.h"
+#include "libavutil/stereo3d.h"
 #include "libavutil/timecode.h"
 #include "libavcodec/ac3tab.h"
 #include "libavcodec/mpegaudiodecheader.h"
@@ -4498,8 +4500,230 @@  static int mov_read_tmcd(MOVContext *c, AVIOContext *pb, MOVAtom atom)
     return 0;
 }
 
+static int mov_read_st3d(MOVContext *c, AVIOContext *pb, MOVAtom atom)
+{
+    AVStream *st;
+    MOVStreamContext *sc;
+    enum AVStereo3DType type;
+    int mode;
+
+    if (c->fc->nb_streams < 1)
+        return 0;
+
+    st = c->fc->streams[c->fc->nb_streams - 1];
+    sc = st->priv_data;
+
+    if (atom.size < 1) {
+        av_log(c->fc, AV_LOG_ERROR, "Empty stereoscopic video box\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    mode = avio_r8(pb);
+    switch (mode) {
+    case 0:
+        type = AV_STEREO3D_2D;
+        break;
+    case 1:
+        type = AV_STEREO3D_TOPBOTTOM;
+        break;
+    case 2:
+        type = AV_STEREO3D_SIDEBYSIDE;
+        break;
+    default:
+        av_log(c->fc, AV_LOG_WARNING, "Unknown st3d mode value %d\n", mode);
+        return 0;
+    }
+
+    sc->stereo3d = av_stereo3d_alloc();
+    if (!sc->stereo3d)
+        return AVERROR(ENOMEM);
+
+    sc->stereo3d->type = type;
+    return 0;
+}
+
+static int mov_read_sv3d(MOVContext *c, AVIOContext *pb, MOVAtom atom)
+{
+    AVStream *st;
+    MOVStreamContext *sc;
+    int size;
+    int32_t yaw, pitch, roll;
+    uint32_t tag;
+    unsigned l, t, r, b;
+    enum AVSphericalProjection projection;
+
+    if (c->fc->nb_streams < 1)
+        return 0;
+
+    st = c->fc->streams[c->fc->nb_streams - 1];
+    sc = st->priv_data;
+
+    if (atom.size < 4) {
+        av_log(c->fc, AV_LOG_ERROR, "Empty spherical video box\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    size = avio_rb32(pb);
+    if (size > atom.size)
+        return AVERROR_INVALIDDATA;
+
+    tag = avio_rl32(pb);
+    if (tag != MKTAG('s','v','h','d')) {
+        av_log(c->fc, AV_LOG_ERROR, "Missing spherical video header\n");
+        return 0;
+    }
+    avio_skip(pb, size - 8); /* metadata_source */
+
+    size = avio_rb32(pb);
+    if (size > atom.size)
+        return AVERROR_INVALIDDATA;
+
+    tag = avio_rl32(pb);
+    if (tag != MKTAG('p','r','o','j')) {
+        av_log(c->fc, AV_LOG_ERROR, "Missing projection box\n");
+        return 0;
+    }
+
+    size = avio_rb32(pb);
+    if (size > atom.size)
+        return AVERROR_INVALIDDATA;
+
+    tag = avio_rl32(pb);
+    if (tag != MKTAG('p','r','h','d')) {
+        av_log(c->fc, AV_LOG_ERROR, "Missing projection header box\n");
+        return 0;
+    }
+
+    /* 16.16 fixed point */
+    yaw   = avio_rb32(pb);
+    pitch = avio_rb32(pb);
+    roll  = avio_rb32(pb);
+
+    avio_skip(pb, size - 20);
+
+    size = avio_rb32(pb);
+    if (size > atom.size)
+        return AVERROR_INVALIDDATA;
+
+    tag = avio_rl32(pb);
+    switch (tag) {
+    case MKTAG('c','b','m','p'):
+        projection = AV_SPHERICAL_CUBEMAP;
+        avio_skip(pb, 4); /* layout */
+        l = t = r = b = avio_rb32(pb);
+        break;
+    case MKTAG('e','q','u','i'):
+        projection = AV_SPHERICAL_EQUIRECTANGULAR;
+        t = avio_rb32(pb);
+        b = avio_rb32(pb);
+        l = avio_rb32(pb);
+        r = avio_rb32(pb);
+        break;
+    default:
+        av_log(c->fc, AV_LOG_ERROR, "Unknown projection type\n");
+        return 0;
+    }
+
+    sc->spherical = av_spherical_alloc(&sc->spherical_size);
+    if (!sc->spherical)
+        return AVERROR(ENOMEM);
+
+    sc->spherical->projection = projection;
+
+    sc->spherical->yaw   = ((double) yaw)   / (1 << 16);
+    sc->spherical->pitch = ((double) pitch) / (1 << 16);
+    sc->spherical->roll  = ((double) roll)  / (1 << 16);
+
+    sc->spherical->left_offset   = l;
+    sc->spherical->top_offset    = t;
+    sc->spherical->right_offset  = r;
+    sc->spherical->bottom_offset = b;
+
+    return 0;
+}
+
+static int mov_parse_uuid_spherical(MOVStreamContext *sc, AVIOContext *pb, size_t len)
+{
+    int ret = 0;
+    uint8_t *buffer = av_malloc(len + 1);
+    const char *val;
+
+    if (!buffer)
+        return AVERROR(ENOMEM);
+    buffer[len] = '\0';
+
+    ret = ffio_read_size(pb, buffer, len);
+    if (ret < 0)
+        goto out;
+
+    /* Check for mandatory keys and values, try to support XML as best-effort */
+    if (av_stristr(buffer, "<GSpherical:StitchingSoftware>") &&
+        (val = av_stristr(buffer, "<GSpherical:Spherical>")) &&
+        av_stristr(val, "true") &&
+        (val = av_stristr(buffer, "<GSpherical:Stitched>")) &&
+        av_stristr(val, "true") &&
+        (val = av_stristr(buffer, "<GSpherical:ProjectionType>")) &&
+        av_stristr(val, "equirectangular")) {
+        sc->spherical = av_spherical_alloc(&sc->spherical_size);
+        if (!sc->spherical)
+            goto out;
+
+        sc->spherical->projection = AV_SPHERICAL_EQUIRECTANGULAR;
+
+        if (av_stristr(buffer, "<GSpherical:StereoMode>")) {
+            enum AVStereo3DType mode;
+
+            if (av_stristr(buffer, "left-right"))
+                mode = AV_STEREO3D_SIDEBYSIDE;
+            else if (av_stristr(buffer, "top-bottom"))
+                mode = AV_STEREO3D_TOPBOTTOM;
+            else
+                mode = AV_STEREO3D_2D;
+
+            sc->stereo3d = av_stereo3d_alloc();
+            if (!sc->stereo3d)
+                goto out;
+
+            sc->stereo3d->type = mode;
+        }
+
+        /* orientation */
+        val = av_stristr(buffer, "<GSpherical:InitialViewHeadingDegrees>");
+        if (val)
+            sc->spherical->yaw = strtol(val, NULL, 10);
+        val = av_stristr(buffer, "<GSpherical:InitialViewPitchDegrees>");
+        if (val)
+            sc->spherical->pitch = strtol(val, NULL, 10);
+        val = av_stristr(buffer, "<GSpherical:InitialViewRollDegrees>");
+        if (val)
+            sc->spherical->roll = strtol(val, NULL, 10);
+
+        /* cropping */
+        val = av_stristr(buffer, "<GSpherical:CroppedAreaLeftPixels>");
+        if (val)
+            sc->spherical->left_offset = strtol(val, NULL, 10);
+        val = av_stristr(buffer, "<GSpherical:CroppedAreaTopPixels>");
+        if (val)
+            sc->spherical->top_offset = strtol(val, NULL, 10);
+        val = av_stristr(buffer, "<GSpherical:CroppedAreaImageWidthPixels>");
+        if (val)
+            sc->spherical->right_offset =
+                sc->width - sc->spherical->left_offset - strtol(val, NULL, 10);
+        val = av_stristr(buffer, "<GSpherical:CroppedAreaImageHeightPixels>");
+        if (val)
+            sc->spherical->bottom_offset =
+                sc->height - sc->spherical->top_offset - strtol(val, NULL, 10);
+    }
+
+out:
+    av_free(buffer);
+    return ret;
+}
+
 static int mov_read_uuid(MOVContext *c, AVIOContext *pb, MOVAtom atom)
 {
+    AVStream *st;
+    MOVStreamContext *sc;
     int ret;
     uint8_t uuid[16];
     static const uint8_t uuid_isml_manifest[] = {
@@ -4510,10 +4734,19 @@  static int mov_read_uuid(MOVContext *c, AVIOContext *pb, MOVAtom atom)
         0xbe, 0x7a, 0xcf, 0xcb, 0x97, 0xa9, 0x42, 0xe8,
         0x9c, 0x71, 0x99, 0x94, 0x91, 0xe3, 0xaf, 0xac
     };
+    static const uint8_t uuid_spherical[] = {
+        0xff, 0xcc, 0x82, 0x63, 0xf8, 0x55, 0x4a, 0x93,
+        0x88, 0x14, 0x58, 0x7a, 0x02, 0x52, 0x1f, 0xdd,
+    };
 
     if (atom.size < sizeof(uuid) || atom.size == INT64_MAX)
         return AVERROR_INVALIDDATA;
 
+    if (c->fc->nb_streams < 1)
+        return 0;
+    st = c->fc->streams[c->fc->nb_streams - 1];
+    sc = st->priv_data;
+
     ret = avio_read(pb, uuid, sizeof(uuid));
     if (ret < 0) {
         return ret;
@@ -4585,7 +4818,14 @@  static int mov_read_uuid(MOVContext *c, AVIOContext *pb, MOVAtom atom)
             av_dict_set(&c->fc->metadata, "xmp", buffer, 0);
         }
         av_free(buffer);
-    }
+    } else if (!memcmp(uuid, uuid_spherical, sizeof(uuid))) {
+        size_t len = atom.size - sizeof(uuid);
+        ret = mov_parse_uuid_spherical(sc, pb, len);
+        if (ret < 0)
+            return ret;
+        if (!sc->spherical)
+            av_log(c->fc, AV_LOG_WARNING, "Invalid spherical metadata found\n");    }
+
     return 0;
 }
 
@@ -4935,6 +5175,8 @@  static const MOVParseTableEntry mov_default_parse_table[] = {
 { MKTAG('f','r','m','a'), mov_read_frma },
 { MKTAG('s','e','n','c'), mov_read_senc },
 { MKTAG('s','a','i','z'), mov_read_saiz },
+{ MKTAG('s','t','3','d'), mov_read_st3d }, /* stereoscopic 3D video box */
+{ MKTAG('s','v','3','d'), mov_read_sv3d }, /* spherical video box */
 { 0, NULL }
 };
 
@@ -5355,6 +5597,9 @@  static int mov_read_close(AVFormatContext *s)
         av_freep(&sc->cenc.auxiliary_info);
         av_freep(&sc->cenc.auxiliary_info_sizes);
         av_aes_ctr_free(sc->cenc.aes_ctr);
+
+        av_freep(&sc->stereo3d);
+        av_freep(&sc->spherical);
     }
 
     if (mov->dv_demux) {
@@ -5682,6 +5927,40 @@  static int mov_read_header(AVFormatContext *s)
                 sd->data = (uint8_t*)sc->display_matrix;
                 sc->display_matrix = NULL;
             }
+            if (sc->stereo3d) {
+                AVPacketSideData *sd, *tmp;
+
+                tmp = av_realloc_array(st->side_data,
+                                       st->nb_side_data + 1, sizeof(*tmp));
+                if (!tmp)
+                    return AVERROR(ENOMEM);
+
+                st->side_data = tmp;
+                st->nb_side_data++;
+
+                sd = &st->side_data[st->nb_side_data - 1];
+                sd->type = AV_PKT_DATA_STEREO3D;
+                sd->size = sizeof(*sc->stereo3d);
+                sd->data = (uint8_t *)sc->stereo3d;
+                sc->stereo3d = NULL;
+            }
+            if (sc->spherical) {
+                AVPacketSideData *sd, *tmp;
+
+                tmp = av_realloc_array(st->side_data,
+                                       st->nb_side_data + 1, sizeof(*tmp));
+                if (!tmp)
+                    return AVERROR(ENOMEM);
+
+                st->side_data = tmp;
+                st->nb_side_data++;
+
+                sd = &st->side_data[st->nb_side_data - 1];
+                sd->type = AV_PKT_DATA_SPHERICAL;
+                sd->size = sc->spherical_size;
+                sd->data = (uint8_t *)sc->spherical;
+                sc->spherical = NULL;
+            }
             break;
         }
     }