diff mbox

[FFmpeg-devel,2/2] avformat/mxfdec: use binary search in mxf_absolute_bodysid_offset

Message ID 20180301214104.13935-2-cus@passwd.hu
State Accepted
Commit 90756e67a0a1de762d27c2fe01a30ac8434a3631
Headers show

Commit Message

Marton Balint March 1, 2018, 9:41 p.m. UTC
Signed-off-by: Marton Balint <cus@passwd.hu>
---
 libavformat/mxfdec.c | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

Comments

Tomas Härdin March 4, 2018, 8:42 p.m. UTC | #1
tor 2018-03-01 klockan 22:41 +0100 skrev Marton Balint:
> > Signed-off-by: Marton Balint <cus@passwd.hu>
> ---
>  libavformat/mxfdec.c | 22 ++++++++++++++--------
>  1 file changed, 14 insertions(+), 8 deletions(-)
> 
> diff --git a/libavformat/mxfdec.c b/libavformat/mxfdec.c
> index d4291f5dc7..70091e0dc9 100644
> --- a/libavformat/mxfdec.c
> +++ b/libavformat/mxfdec.c
> @@ -1347,24 +1347,30 @@ static int mxf_get_sorted_table_segments(MXFContext *mxf, int *nb_sorted_segment
>   */
>  static int mxf_absolute_bodysid_offset(MXFContext *mxf, int body_sid, int64_t offset, int64_t *offset_out)
>  {
> -    int x;
>      MXFPartition *last_p = NULL;
> +    int a, b, m, m0;
>  
>      if (offset < 0)
>          return AVERROR(EINVAL);
>  
> -    for (x = 0; x < mxf->partitions_count; x++) {
> -        MXFPartition *p = &mxf->partitions[x];
> +    a = -1;

I've got a bad feeling about this -1

> +    b = mxf->partitions_count;
>  
> -        if (p->body_sid != body_sid)
> -            continue;
> +    while (b - a > 1) {
> +        m0 = m = (a + b) >> 1;

Could overflow with a specially crafted file. But I guess it would have
to be on the order of 1 TiB.

It also looks like this might behave incorrectly when a=-1, b=0

>  
> -        if (p->body_offset > offset)
> -            break;
> +        while (m < b && mxf->partitions[m].body_sid != body_sid)
> +            m++;
>  
> -        last_p = p;
> +        if (m < b && mxf->partitions[m].body_offset <= offset)
> +            a = m;
> +        else
> +            b = m0;
>      }
>  
> +    if (a >= 0)
> +        last_p = &mxf->partitions[a];
> +
>      if (last_p && (!last_p->essence_length || last_p->essence_length > (offset - last_p->body_offset))) {
>          *offset_out = last_p->essence_offset + (offset - last_p->body_offset);
>          return 0;

/Tomas
Marton Balint March 5, 2018, 1:28 a.m. UTC | #2
On Sun, 4 Mar 2018, Tomas Härdin wrote:

> tor 2018-03-01 klockan 22:41 +0100 skrev Marton Balint:
>> > Signed-off-by: Marton Balint <cus@passwd.hu>
>> ---
>>  libavformat/mxfdec.c | 22 ++++++++++++++--------
>>  1 file changed, 14 insertions(+), 8 deletions(-)
>> 
>> diff --git a/libavformat/mxfdec.c b/libavformat/mxfdec.c
>> index d4291f5dc7..70091e0dc9 100644
>> --- a/libavformat/mxfdec.c
>> +++ b/libavformat/mxfdec.c
>> @@ -1347,24 +1347,30 @@ static int mxf_get_sorted_table_segments(MXFContext *mxf, int *nb_sorted_segment
>>   */
>>  static int mxf_absolute_bodysid_offset(MXFContext *mxf, int body_sid, int64_t offset, int64_t *offset_out)
>>  {
>> -    int x;
>>      MXFPartition *last_p = NULL;
>> +    int a, b, m, m0;
>>  
>>      if (offset < 0)
>>          return AVERROR(EINVAL);
>>  
>> -    for (x = 0; x < mxf->partitions_count; x++) {
>> -        MXFPartition *p = &mxf->partitions[x];
>> +    a = -1;
>
> I've got a bad feeling about this -1

There is an explicit check after the loop when we actually use the value 
of 'a' to see if it remained -1 or not. Other than that using this 
construct (a = -1, b = count) is also used in other places throughout the 
codebase for binary search.

>
>> +    b = mxf->partitions_count;
>>  
>> -        if (p->body_sid != body_sid)
>> -            continue;
>> +    while (b - a > 1) {
>> +        m0 = m = (a + b) >> 1;
>
> Could overflow with a specially crafted file. But I guess it would have
> to be on the order of 1 TiB.

I guess we could limit the number of partitions to INT_MAX / 2, although 
it really needs a *huge* crafted file and parsing it would probably take 
ages for the demuxer anyway...

>
> It also looks like this might behave incorrectly when a=-1, b=0

That can't happen as the loop condition would be false in that case.

Regards,
Marton
Marton Balint March 9, 2018, 12:28 a.m. UTC | #3
On Mon, 5 Mar 2018, Marton Balint wrote:

>
>
> On Sun, 4 Mar 2018, Tomas Härdin wrote:
>
>> tor 2018-03-01 klockan 22:41 +0100 skrev Marton Balint:
>>> > Signed-off-by: Marton Balint <cus@passwd.hu>
>>> ---
>>>  libavformat/mxfdec.c | 22 ++++++++++++++--------
>>>  1 file changed, 14 insertions(+), 8 deletions(-)
>>> 
>>> diff --git a/libavformat/mxfdec.c b/libavformat/mxfdec.c
>>> index d4291f5dc7..70091e0dc9 100644
>>> --- a/libavformat/mxfdec.c
>>> +++ b/libavformat/mxfdec.c
>>> @@ -1347,24 +1347,30 @@ static int 
> mxf_get_sorted_table_segments(MXFContext *mxf, int *nb_sorted_segment
>>>   */
>>>  static int mxf_absolute_bodysid_offset(MXFContext *mxf, int body_sid, 
> int64_t offset, int64_t *offset_out)
>>>  {
>>> -    int x;
>>>      MXFPartition *last_p = NULL;
>>> +    int a, b, m, m0;
>>>  
>>>      if (offset < 0)
>>>          return AVERROR(EINVAL);
>>>  
>>> -    for (x = 0; x < mxf->partitions_count; x++) {
>>> -        MXFPartition *p = &mxf->partitions[x];
>>> +    a = -1;
>>
>> I've got a bad feeling about this -1
>
> There is an explicit check after the loop when we actually use the value 
> of 'a' to see if it remained -1 or not. Other than that using this 
> construct (a = -1, b = count) is also used in other places throughout the 
> codebase for binary search.
>
>>
>>> +    b = mxf->partitions_count;
>>>  
>>> -        if (p->body_sid != body_sid)
>>> -            continue;
>>> +    while (b - a > 1) {
>>> +        m0 = m = (a + b) >> 1;
>>
>> Could overflow with a specially crafted file. But I guess it would have
>> to be on the order of 1 TiB.
>
> I guess we could limit the number of partitions to INT_MAX / 2, although 
> it really needs a *huge* crafted file and parsing it would probably take 
> ages for the demuxer anyway...
>
>>
>> It also looks like this might behave incorrectly when a=-1, b=0
>
> That can't happen as the loop condition would be false in that case.
>

Will push this soon.

Regards,
Marton
Tomas Härdin March 9, 2018, 9:57 a.m. UTC | #4
On 2018-03-09 01:28, Marton Balint wrote:
>
>
> On Mon, 5 Mar 2018, Marton Balint wrote:
>
>>
>>
>> On Sun, 4 Mar 2018, Tomas Härdin wrote:
>>
>>> tor 2018-03-01 klockan 22:41 +0100 skrev Marton Balint:
>>>> > Signed-off-by: Marton Balint <cus@passwd.hu>
>>>> ---
>>>>  libavformat/mxfdec.c | 22 ++++++++++++++--------
>>>>  1 file changed, 14 insertions(+), 8 deletions(-)
>>>>
>>>> diff --git a/libavformat/mxfdec.c b/libavformat/mxfdec.c
>>>> index d4291f5dc7..70091e0dc9 100644
>>>> --- a/libavformat/mxfdec.c
>>>> +++ b/libavformat/mxfdec.c
>>>> @@ -1347,24 +1347,30 @@ static int 
>> mxf_get_sorted_table_segments(MXFContext *mxf, int *nb_sorted_segment
>>>>   */
>>>>  static int mxf_absolute_bodysid_offset(MXFContext *mxf, int body_sid, 
>> int64_t offset, int64_t *offset_out)
>>>>  {
>>>> -    int x;
>>>>      MXFPartition *last_p = NULL;
>>>> +    int a, b, m, m0;
>>>>
>>>>      if (offset < 0)
>>>>          return AVERROR(EINVAL);
>>>>
>>>> -    for (x = 0; x < mxf->partitions_count; x++) {
>>>> -        MXFPartition *p = &mxf->partitions[x];
>>>> +    a = -1;
>>>
>>> I've got a bad feeling about this -1
>>
>> There is an explicit check after the loop when we actually use the 
>> value of 'a' to see if it remained -1 or not. Other than that using 
>> this construct (a = -1, b = count) is also used in other places 
>> throughout the codebase for binary search.
>>
>>>
>>>> +    b = mxf->partitions_count;
>>>>
>>>> -        if (p->body_sid != body_sid)
>>>> -            continue;
>>>> +    while (b - a > 1) {
>>>> +        m0 = m = (a + b) >> 1;
>>>
>>> Could overflow with a specially crafted file. But I guess it would have
>>> to be on the order of 1 TiB.
>>
>> I guess we could limit the number of partitions to INT_MAX / 2, 
>> although it really needs a *huge* crafted file and parsing it would 
>> probably take ages for the demuxer anyway...
>>
>>>
>>> It also looks like this might behave incorrectly when a=-1, b=0
>>
>> That can't happen as the loop condition would be false in that case.
>>
>
> Will push this soon.

INT_MAX/2 sounds like a decent solution.

/Tomas
Marton Balint March 9, 2018, 8:14 p.m. UTC | #5
On Fri, 9 Mar 2018, Tomas Härdin wrote:

> On 2018-03-09 01:28, Marton Balint wrote:
>>
>>
>> On Mon, 5 Mar 2018, Marton Balint wrote:
>>
>>>
>>>
>>> On Sun, 4 Mar 2018, Tomas Härdin wrote:
>>>
>>>> tor 2018-03-01 klockan 22:41 +0100 skrev Marton Balint:
>>>>> > Signed-off-by: Marton Balint <cus@passwd.hu>
>>>>> ---
>>>>>  libavformat/mxfdec.c | 22 ++++++++++++++--------
>>>>>  1 file changed, 14 insertions(+), 8 deletions(-)
>>>>>
>>>>> diff --git a/libavformat/mxfdec.c b/libavformat/mxfdec.c
>>>>> index d4291f5dc7..70091e0dc9 100644
>>>>> --- a/libavformat/mxfdec.c
>>>>> +++ b/libavformat/mxfdec.c
>>>>> @@ -1347,24 +1347,30 @@ static int 
>>> mxf_get_sorted_table_segments(MXFContext *mxf, int *nb_sorted_segment
>>>>>   */
>>>>>  static int mxf_absolute_bodysid_offset(MXFContext *mxf, int body_sid, 
>>> int64_t offset, int64_t *offset_out)
>>>>>  {
>>>>> -    int x;
>>>>>      MXFPartition *last_p = NULL;
>>>>> +    int a, b, m, m0;
>>>>>
>>>>>      if (offset < 0)
>>>>>          return AVERROR(EINVAL);
>>>>>
>>>>> -    for (x = 0; x < mxf->partitions_count; x++) {
>>>>> -        MXFPartition *p = &mxf->partitions[x];
>>>>> +    a = -1;
>>>>
>>>> I've got a bad feeling about this -1
>>>
>>> There is an explicit check after the loop when we actually use the 
>>> value of 'a' to see if it remained -1 or not. Other than that using 
>>> this construct (a = -1, b = count) is also used in other places 
>>> throughout the codebase for binary search.
>>>
>>>>
>>>>> +    b = mxf->partitions_count;
>>>>>
>>>>> -        if (p->body_sid != body_sid)
>>>>> -            continue;
>>>>> +    while (b - a > 1) {
>>>>> +        m0 = m = (a + b) >> 1;
>>>>
>>>> Could overflow with a specially crafted file. But I guess it would have
>>>> to be on the order of 1 TiB.
>>>
>>> I guess we could limit the number of partitions to INT_MAX / 2, 
>>> although it really needs a *huge* crafted file and parsing it would 
>>> probably take ages for the demuxer anyway...
>>>
>>>>
>>>> It also looks like this might behave incorrectly when a=-1, b=0
>>>
>>> That can't happen as the loop condition would be false in that case.
>>>
>>
>> Will push this soon.
>
> INT_MAX/2 sounds like a decent solution.

Ok, pushed the series with an additional patch limiting the partition 
count.

Thanks,
Marton
diff mbox

Patch

diff --git a/libavformat/mxfdec.c b/libavformat/mxfdec.c
index d4291f5dc7..70091e0dc9 100644
--- a/libavformat/mxfdec.c
+++ b/libavformat/mxfdec.c
@@ -1347,24 +1347,30 @@  static int mxf_get_sorted_table_segments(MXFContext *mxf, int *nb_sorted_segment
  */
 static int mxf_absolute_bodysid_offset(MXFContext *mxf, int body_sid, int64_t offset, int64_t *offset_out)
 {
-    int x;
     MXFPartition *last_p = NULL;
+    int a, b, m, m0;
 
     if (offset < 0)
         return AVERROR(EINVAL);
 
-    for (x = 0; x < mxf->partitions_count; x++) {
-        MXFPartition *p = &mxf->partitions[x];
+    a = -1;
+    b = mxf->partitions_count;
 
-        if (p->body_sid != body_sid)
-            continue;
+    while (b - a > 1) {
+        m0 = m = (a + b) >> 1;
 
-        if (p->body_offset > offset)
-            break;
+        while (m < b && mxf->partitions[m].body_sid != body_sid)
+            m++;
 
-        last_p = p;
+        if (m < b && mxf->partitions[m].body_offset <= offset)
+            a = m;
+        else
+            b = m0;
     }
 
+    if (a >= 0)
+        last_p = &mxf->partitions[a];
+
     if (last_p && (!last_p->essence_length || last_p->essence_length > (offset - last_p->body_offset))) {
         *offset_out = last_p->essence_offset + (offset - last_p->body_offset);
         return 0;