diff mbox

[FFmpeg-devel,1/1] segafilm: fetch duration from the container

Message ID 20180418192916.46055-2-misty@brew.sh
State New
Headers show

Commit Message

misty@brew.sh April 18, 2018, 7:29 p.m. UTC
From: Misty De Meo <mistydemeo@gmail.com>

---
 libavformat/segafilm.c | 20 +++-----------------
 1 file changed, 3 insertions(+), 17 deletions(-)

Comments

James Almer April 20, 2018, 3:31 a.m. UTC | #1
On 4/18/2018 4:29 PM, misty@brew.sh wrote:
> From: Misty De Meo <mistydemeo@gmail.com>
> 
> ---
>  libavformat/segafilm.c | 20 +++-----------------
>  1 file changed, 3 insertions(+), 17 deletions(-)
> 
> diff --git a/libavformat/segafilm.c b/libavformat/segafilm.c
> index e72c26b144..1529fc385e 100644
> --- a/libavformat/segafilm.c
> +++ b/libavformat/segafilm.c
> @@ -43,6 +43,7 @@ typedef struct film_sample {
>    int64_t sample_offset;
>    unsigned int sample_size;
>    int64_t pts;
> +  uint32_t duration;
>    int keyframe;
>  } film_sample;
>  
> @@ -226,6 +227,7 @@ static int film_read_header(AVFormatContext *s)
>              ret = AVERROR_INVALIDDATA;
>              goto fail;
>          }
> +        film->sample_table[i].duration = AV_RB32(&scratch[12]);

While for video tracks this field seems to report the same packet
durations that were being calculated pre patch, this field for audio
tracks is always 1.

Before this patch a codec copy of the sample logo-capcom.cpk in the FATE
suite gave:

1,          0,          0,    22048,    44096, 0xafd250ae
0,          2,          2,        2,    11080, 0xac3a462b
0,          4,          4,        2,    11300, 0xd8ee7f3e
0,          6,          6,        2,    21612, 0x73c3a3f9, F=0x0
0,          8,          8,        2,    21628, 0x00a5b4b9, F=0x0
0,         10,         10,        2,    14772, 0x1332b44f
0,         12,         12,        2,    14744, 0x5ce5d59b
0,         14,         14,        2,    14736, 0xd5ac2877
1,      22048,      22048,    11028,    22056, 0xe08a0f01

Whereas after applying it becomes:

1,          0,          0,        1,    44096, 0xafd250ae
0,          2,          2,        2,    11080, 0xac3a462b
0,          4,          4,        2,    11300, 0xd8ee7f3e
0,          6,          6,        2,    21612, 0x73c3a3f9, F=0x0
0,          8,          8,        2,    21628, 0x00a5b4b9, F=0x0
0,         10,         10,        2,    14772, 0x1332b44f
0,         12,         12,        2,    14744, 0x5ce5d59b
0,         14,         14,        2,    14736, 0xd5ac2877
1,      22048,      22048,        1,    22056, 0xe08a0f01

For reference, decoding it always gives the following with or without
this patch:

0,          0,          0,        1,   215040, 0x067c5362
1,          0,          0,    22048,    88192, 0x23bb50ae
0,          2,          2,        1,   215040, 0xd9eacb98
0,          4,          4,        1,   215040, 0x3c8a4cbd
0,          6,          6,        1,   215040, 0xbdf996e1
0,          8,          8,        1,   215040, 0x1b7fa123
0,         10,         10,        1,   215040, 0x834b4a8d
0,         12,         12,        1,   215040, 0xf4b1bebe
0,         14,         14,        1,   215040, 0x088c3802
1,      22048,      22048,    11028,    44112, 0x79600f01

Is this change desired/intended?

Also, unrelated to this patch, but as you can see decoding outputs one
extra video frame at the beginning that codec copy doesn't. this is
because packet keyframes are being set wrong: A 0 in the top bit of the
sample info dword on each sample table entry signals an Intra coded
frame, whereas 1 signals an Inter coded frame. The demuxer is assuming
the opposite.

>          if (AV_RB32(&scratch[8]) == 0xFFFFFFFF) {
>              film->sample_table[i].stream = film->audio_stream_index;
>              film->sample_table[i].pts = audio_frame_counter;
> @@ -270,7 +272,6 @@ static int film_read_packet(AVFormatContext *s,
>      FilmDemuxContext *film = s->priv_data;
>      AVIOContext *pb = s->pb;
>      film_sample *sample;
> -    film_sample *next_sample = NULL;
>      int next_sample_id;
>      int ret = 0;
>  
> @@ -279,20 +280,6 @@ static int film_read_packet(AVFormatContext *s,
>  
>      sample = &film->sample_table[film->current_sample];
>  
> -    /* Find the next sample from the same stream, assuming there is one;
> -     * this is used to calculate the duration below */
> -    next_sample_id = film->current_sample + 1;
> -    while (next_sample == NULL) {
> -        if (next_sample_id >= film->sample_count)
> -            break;
> -
> -        next_sample = &film->sample_table[next_sample_id];
> -        if (next_sample->stream != sample->stream) {
> -            next_sample = NULL;
> -            next_sample_id++;
> -        }
> -    }
> -
>      /* position the stream (will probably be there anyway) */
>      avio_seek(pb, sample->sample_offset, SEEK_SET);
>  
> @@ -304,8 +291,7 @@ static int film_read_packet(AVFormatContext *s,
>      pkt->dts = sample->pts;
>      pkt->pts = sample->pts;
>      pkt->flags |= sample->keyframe ? AV_PKT_FLAG_KEY : 0;
> -    if (next_sample != NULL)
> -        pkt->duration = next_sample->pts - sample->pts;
> +    pkt->duration = sample->duration;
>  
>      film->current_sample++;
misty@brew.sh April 20, 2018, 5:39 a.m. UTC | #2
On Thu, Apr 19, 2018 at 8:31 PM, James Almer <jamrial@gmail.com> wrote:
>
> > +        film->sample_table[i].duration = AV_RB32(&scratch[12]);
>
> While for video tracks this field seems to report the same packet
> durations that were being calculated pre patch, this field for audio
> tracks is always 1.
>
> Before this patch a codec copy of the sample logo-capcom.cpk in the FATE
> suite gave:
>
> 1,          0,          0,    22048,    44096, 0xafd250ae
> 0,          2,          2,        2,    11080, 0xac3a462b
> 0,          4,          4,        2,    11300, 0xd8ee7f3e
> 0,          6,          6,        2,    21612, 0x73c3a3f9, F=0x0
> 0,          8,          8,        2,    21628, 0x00a5b4b9, F=0x0
> 0,         10,         10,        2,    14772, 0x1332b44f
> 0,         12,         12,        2,    14744, 0x5ce5d59b
> 0,         14,         14,        2,    14736, 0xd5ac2877
> 1,      22048,      22048,    11028,    22056, 0xe08a0f01
>
> Whereas after applying it becomes:
>
> 1,          0,          0,        1,    44096, 0xafd250ae
> 0,          2,          2,        2,    11080, 0xac3a462b
> 0,          4,          4,        2,    11300, 0xd8ee7f3e
> 0,          6,          6,        2,    21612, 0x73c3a3f9, F=0x0
> 0,          8,          8,        2,    21628, 0x00a5b4b9, F=0x0
> 0,         10,         10,        2,    14772, 0x1332b44f
> 0,         12,         12,        2,    14744, 0x5ce5d59b
> 0,         14,         14,        2,    14736, 0xd5ac2877
> 1,      22048,      22048,        1,    22056, 0xe08a0f01
>
> For reference, decoding it always gives the following with or without
> this patch:
>
> 0,          0,          0,        1,   215040, 0x067c5362
> 1,          0,          0,    22048,    88192, 0x23bb50ae
> 0,          2,          2,        1,   215040, 0xd9eacb98
> 0,          4,          4,        1,   215040, 0x3c8a4cbd
> 0,          6,          6,        1,   215040, 0xbdf996e1
> 0,          8,          8,        1,   215040, 0x1b7fa123
> 0,         10,         10,        1,   215040, 0x834b4a8d
> 0,         12,         12,        1,   215040, 0xf4b1bebe
> 0,         14,         14,        1,   215040, 0x088c3802
> 1,      22048,      22048,    11028,    44112, 0x79600f01
>
> Is this change desired/intended?

This is actually intended; since that value is what's in the original
container, it's what should get passed through, especially for stream
copying back into a new FILM file.

>
> Also, unrelated to this patch, but as you can see decoding outputs one
> extra video frame at the beginning that codec copy doesn't. this is
> because packet keyframes are being set wrong: A 0 in the top bit of the
> sample info dword on each sample table entry signals an Intra coded
> frame, whereas 1 signals an Inter coded frame. The demuxer is assuming
> the opposite.

Here's a thread with context on the change to the intra setting:
http://ffmpeg.org/pipermail/ffmpeg-devel/2018-March/227014.html

The keyframe logic was intentionally inverted in
cfe1a9d311de6c36641cf295004cdbc77d7b600c
James Almer April 20, 2018, 3:05 p.m. UTC | #3
On 4/20/2018 2:39 AM, Misty De Meo wrote:
> On Thu, Apr 19, 2018 at 8:31 PM, James Almer <jamrial@gmail.com> wrote:
>>
>>> +        film->sample_table[i].duration = AV_RB32(&scratch[12]);
>>
>> While for video tracks this field seems to report the same packet
>> durations that were being calculated pre patch, this field for audio
>> tracks is always 1.
>>
>> Before this patch a codec copy of the sample logo-capcom.cpk in the FATE
>> suite gave:
>>
>> 1,          0,          0,    22048,    44096, 0xafd250ae
>> 0,          2,          2,        2,    11080, 0xac3a462b
>> 0,          4,          4,        2,    11300, 0xd8ee7f3e
>> 0,          6,          6,        2,    21612, 0x73c3a3f9, F=0x0
>> 0,          8,          8,        2,    21628, 0x00a5b4b9, F=0x0
>> 0,         10,         10,        2,    14772, 0x1332b44f
>> 0,         12,         12,        2,    14744, 0x5ce5d59b
>> 0,         14,         14,        2,    14736, 0xd5ac2877
>> 1,      22048,      22048,    11028,    22056, 0xe08a0f01
>>
>> Whereas after applying it becomes:
>>
>> 1,          0,          0,        1,    44096, 0xafd250ae
>> 0,          2,          2,        2,    11080, 0xac3a462b
>> 0,          4,          4,        2,    11300, 0xd8ee7f3e
>> 0,          6,          6,        2,    21612, 0x73c3a3f9, F=0x0
>> 0,          8,          8,        2,    21628, 0x00a5b4b9, F=0x0
>> 0,         10,         10,        2,    14772, 0x1332b44f
>> 0,         12,         12,        2,    14744, 0x5ce5d59b
>> 0,         14,         14,        2,    14736, 0xd5ac2877
>> 1,      22048,      22048,        1,    22056, 0xe08a0f01
>>
>> For reference, decoding it always gives the following with or without
>> this patch:
>>
>> 0,          0,          0,        1,   215040, 0x067c5362
>> 1,          0,          0,    22048,    88192, 0x23bb50ae
>> 0,          2,          2,        1,   215040, 0xd9eacb98
>> 0,          4,          4,        1,   215040, 0x3c8a4cbd
>> 0,          6,          6,        1,   215040, 0xbdf996e1
>> 0,          8,          8,        1,   215040, 0x1b7fa123
>> 0,         10,         10,        1,   215040, 0x834b4a8d
>> 0,         12,         12,        1,   215040, 0xf4b1bebe
>> 0,         14,         14,        1,   215040, 0x088c3802
>> 1,      22048,      22048,    11028,    44112, 0x79600f01
>>
>> Is this change desired/intended?
> 
> This is actually intended; since that value is what's in the original
> container, it's what should get passed through, especially for stream
> copying back into a new FILM file.

Maybe the muxer should ignore the input packet duration when writing
this field in that case, and hardcode it to 1. A duration of 1 for an
audio track with these presentation times is not correct.

You should set this value only for video. If pkt->duration is not set,
libavformat will calculate it on its own.

> 
>>
>> Also, unrelated to this patch, but as you can see decoding outputs one
>> extra video frame at the beginning that codec copy doesn't. this is
>> because packet keyframes are being set wrong: A 0 in the top bit of the
>> sample info dword on each sample table entry signals an Intra coded
>> frame, whereas 1 signals an Inter coded frame. The demuxer is assuming
>> the opposite.
> 
> Here's a thread with context on the change to the intra setting:
> http://ffmpeg.org/pipermail/ffmpeg-devel/2018-March/227014.html
> 
> The keyframe logic was intentionally inverted in
> cfe1a9d311de6c36641cf295004cdbc77d7b600c

I don't understand the reasoning to invert the logic here. The current
behavior is clearly not right. We're marking Inter coded frames as key
frames and Intra coded frames as non keyframes. The result as you can
see is at least one frame/packet lost during codec copy.

Setting pkt->flags was the only change that patch should have introduced.
Gyan April 20, 2018, 3:34 p.m. UTC | #4
On 4/20/2018 8:35 PM, James Almer wrote:


> I don't understand the reasoning to invert the logic here. The current
> behavior is clearly not right. We're marking Inter coded frames as key
> frames and Intra coded frames as non keyframes.

This behaviour was how it was originally. See the sample attached to 
#7091. Before inversion, first frame on that file is not marked as KF 
but all remaining frames are.

If you streamcopy the FATE sample to a new CPK starting at a frame 
currently not marked as KF, with -copyinkf, you should see 
artifact/incorrect decoding in the output.


Regards,
Gyan
James Almer April 20, 2018, 3:52 p.m. UTC | #5
On 4/20/2018 12:34 PM, Gyan Doshi wrote:
> 
> 
> On 4/20/2018 8:35 PM, James Almer wrote:
> 
> 
>> I don't understand the reasoning to invert the logic here. The current
>> behavior is clearly not right. We're marking Inter coded frames as key
>> frames and Intra coded frames as non keyframes.
> 
> This behaviour was how it was originally. See the sample attached to
> #7091. Before inversion, first frame on that file is not marked as KF
> but all remaining frames are.
> 
> If you streamcopy the FATE sample to a new CPK starting at a frame
> currently not marked as KF, with -copyinkf, you should see
> artifact/incorrect decoding in the output.

Wouldn't this hint that the file in question is broken? I see it was
generated with our muxer, which is indeed setting keyframes wrong.

In line 72 from segafilmenc.c I see

        if (pkt->keyframe)
            info1 |= (1 << 31);

So basically, a fifteen years old demuxer got its logic inverted to
match the behavior of an evidently faulty month old muxer.
Paul B Mahol April 20, 2018, 4:09 p.m. UTC | #6
On 4/20/18, James Almer <jamrial@gmail.com> wrote:
> On 4/20/2018 12:34 PM, Gyan Doshi wrote:
>>
>>
>> On 4/20/2018 8:35 PM, James Almer wrote:
>>
>>
>>> I don't understand the reasoning to invert the logic here. The current
>>> behavior is clearly not right. We're marking Inter coded frames as key
>>> frames and Intra coded frames as non keyframes.
>>
>> This behaviour was how it was originally. See the sample attached to
>> #7091. Before inversion, first frame on that file is not marked as KF
>> but all remaining frames are.
>>
>> If you streamcopy the FATE sample to a new CPK starting at a frame
>> currently not marked as KF, with -copyinkf, you should see
>> artifact/incorrect decoding in the output.
>
> Wouldn't this hint that the file in question is broken? I see it was
> generated with our muxer, which is indeed setting keyframes wrong.
>
> In line 72 from segafilmenc.c I see
>
>         if (pkt->keyframe)
>             info1 |= (1 << 31);
>
> So basically, a fifteen years old demuxer got its logic inverted to
> match the behavior of an evidently faulty month old muxer.
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>


Take real sample from game, if first frame is not marked as keyframe,
demuxer is wrong.
Gyan April 20, 2018, 4:09 p.m. UTC | #7
On 4/20/2018 9:22 PM, James Almer wrote:
> 
> Wouldn't this hint that the file in question is broken?

I don't follow. If the inversion was a mistake, then frames currently 
not marked by demuxer are in fact keyframes and a stream decoded from 
that frame onwards should show correct output. I see precisely the 
opposite. Output generated by streamcopying from currently marked KF 
decode correctly and those generated by streamcopying from currently 
marked non-KF decode incorrectly*.

This should rule out errors in the muxer as well.

* ffmpeg -i logo-capcom.cpk -ss 5.4 -c copy -copyinkf -an copy-test.cpk 
&& ffplay copy-test.cpk


It's very much possible I'm missing something but I don't see what.

Regards,
Gyan
James Almer April 20, 2018, 4:11 p.m. UTC | #8
On 4/20/2018 1:09 PM, Paul B Mahol wrote:
> On 4/20/18, James Almer <jamrial@gmail.com> wrote:
>> On 4/20/2018 12:34 PM, Gyan Doshi wrote:
>>>
>>>
>>> On 4/20/2018 8:35 PM, James Almer wrote:
>>>
>>>
>>>> I don't understand the reasoning to invert the logic here. The current
>>>> behavior is clearly not right. We're marking Inter coded frames as key
>>>> frames and Intra coded frames as non keyframes.
>>>
>>> This behaviour was how it was originally. See the sample attached to
>>> #7091. Before inversion, first frame on that file is not marked as KF
>>> but all remaining frames are.
>>>
>>> If you streamcopy the FATE sample to a new CPK starting at a frame
>>> currently not marked as KF, with -copyinkf, you should see
>>> artifact/incorrect decoding in the output.
>>
>> Wouldn't this hint that the file in question is broken? I see it was
>> generated with our muxer, which is indeed setting keyframes wrong.
>>
>> In line 72 from segafilmenc.c I see
>>
>>         if (pkt->keyframe)
>>             info1 |= (1 << 31);
>>
>> So basically, a fifteen years old demuxer got its logic inverted to
>> match the behavior of an evidently faulty month old muxer.
>> _______________________________________________
>> ffmpeg-devel mailing list
>> ffmpeg-devel@ffmpeg.org
>> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>>
> 
> 
> Take real sample from game, if first frame is not marked as keyframe,
> demuxer is wrong.

That's exactly what happens right now with the capcom logo sample in the
FATE suite, as i mentioned earlier.
Gyan April 20, 2018, 4:25 p.m. UTC | #9
On 4/20/2018 9:39 PM, Gyan Doshi wrote:
> 
> It's very much possible I'm missing something but I don't see what.

Yup. I see that the decoder sets the frame flags correctly. The ticket 
sample file is wrongly muxed, and so the inversion is wrong.

Will send patch tomorrow unless someone gets to it first.

Regards,
Gyan
diff mbox

Patch

diff --git a/libavformat/segafilm.c b/libavformat/segafilm.c
index e72c26b144..1529fc385e 100644
--- a/libavformat/segafilm.c
+++ b/libavformat/segafilm.c
@@ -43,6 +43,7 @@  typedef struct film_sample {
   int64_t sample_offset;
   unsigned int sample_size;
   int64_t pts;
+  uint32_t duration;
   int keyframe;
 } film_sample;
 
@@ -226,6 +227,7 @@  static int film_read_header(AVFormatContext *s)
             ret = AVERROR_INVALIDDATA;
             goto fail;
         }
+        film->sample_table[i].duration = AV_RB32(&scratch[12]);
         if (AV_RB32(&scratch[8]) == 0xFFFFFFFF) {
             film->sample_table[i].stream = film->audio_stream_index;
             film->sample_table[i].pts = audio_frame_counter;
@@ -270,7 +272,6 @@  static int film_read_packet(AVFormatContext *s,
     FilmDemuxContext *film = s->priv_data;
     AVIOContext *pb = s->pb;
     film_sample *sample;
-    film_sample *next_sample = NULL;
     int next_sample_id;
     int ret = 0;
 
@@ -279,20 +280,6 @@  static int film_read_packet(AVFormatContext *s,
 
     sample = &film->sample_table[film->current_sample];
 
-    /* Find the next sample from the same stream, assuming there is one;
-     * this is used to calculate the duration below */
-    next_sample_id = film->current_sample + 1;
-    while (next_sample == NULL) {
-        if (next_sample_id >= film->sample_count)
-            break;
-
-        next_sample = &film->sample_table[next_sample_id];
-        if (next_sample->stream != sample->stream) {
-            next_sample = NULL;
-            next_sample_id++;
-        }
-    }
-
     /* position the stream (will probably be there anyway) */
     avio_seek(pb, sample->sample_offset, SEEK_SET);
 
@@ -304,8 +291,7 @@  static int film_read_packet(AVFormatContext *s,
     pkt->dts = sample->pts;
     pkt->pts = sample->pts;
     pkt->flags |= sample->keyframe ? AV_PKT_FLAG_KEY : 0;
-    if (next_sample != NULL)
-        pkt->duration = next_sample->pts - sample->pts;
+    pkt->duration = sample->duration;
 
     film->current_sample++;