diff mbox series

[FFmpeg-devel,1/7] avformat/avc: Fix undefined shift and assert when reading exp-golomb num

Message ID 20200709103542.19909-1-andreas.rheinhardt@gmail.com
State New
Headers show
Series [FFmpeg-devel,1/7] avformat/avc: Fix undefined shift and assert when reading exp-golomb num | expand

Checks

Context Check Description
andriy/default pending
andriy/make success Make finished
andriy/make_fate success Make fate finished

Commit Message

Andreas Rheinhardt July 9, 2020, 10:35 a.m. UTC
The current code for parsing unsigned exponential-golomb codes is not
suitable for long codes: If there are enough leading zeroes, it
left-shifts 1 by 32 places and uses get_bitsz to read 32 bits, although
it is only suitable to read 0-25 bits. There is an av_assert2 to
ensure this when using the uncached bitstream reader; with valid input
one can still run into the assert and left-shift 1 by 31 which is
undefined.

This commit changes this. All valid data is parsed correctly; invalid
data does no longer lead to undefined behaviour or to asserts. Parsing
all valid data correctly also necessitated changing the return value to
unsigned; also an intermediate variable used for parsing signed
exponential-golomb codes needed to be made unsigned, too, in order to
parse long signed codes correctly.

Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@gmail.com>
---
Moving the function to the beginning is done in preparation for another
commit that I'll send soon.

 libavformat/avc.c | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

Comments

James Almer July 9, 2020, 7:21 p.m. UTC | #1
On 7/9/2020 7:35 AM, Andreas Rheinhardt wrote:
> The current code for parsing unsigned exponential-golomb codes is not
> suitable for long codes: If there are enough leading zeroes, it
> left-shifts 1 by 32 places and uses get_bitsz to read 32 bits, although
> it is only suitable to read 0-25 bits. There is an av_assert2 to
> ensure this when using the uncached bitstream reader; with valid input
> one can still run into the assert and left-shift 1 by 31 which is
> undefined.
> 
> This commit changes this. All valid data is parsed correctly; invalid
> data does no longer lead to undefined behaviour or to asserts. Parsing
> all valid data correctly also necessitated changing the return value to
> unsigned; also an intermediate variable used for parsing signed
> exponential-golomb codes needed to be made unsigned, too, in order to
> parse long signed codes correctly.
> 
> Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@gmail.com>
> ---
> Moving the function to the beginning is done in preparation for another
> commit that I'll send soon.
> 
>  libavformat/avc.c | 24 ++++++++++++++++--------
>  1 file changed, 16 insertions(+), 8 deletions(-)
> 
> diff --git a/libavformat/avc.c b/libavformat/avc.c
> index b5e2921388..55494eb08a 100644
> --- a/libavformat/avc.c
> +++ b/libavformat/avc.c
> @@ -27,6 +27,21 @@
>  #include "avc.h"
>  #include "avio_internal.h"
>  
> +static inline unsigned get_ue_golomb(GetBitContext *gb)
> +{
> +    int i;
> +    for (i = 1; i <= 32; i++) {
> +        if (get_bits_left(gb) < i)
> +            return 0;
> +        if (show_bits1(gb))
> +            break;
> +        skip_bits1(gb);
> +    }
> +    if (i > 32)
> +        return 0;
> +    return get_bits_long(gb, i) - 1;
> +}
> +
>  static const uint8_t *avc_find_startcode_internal(const uint8_t *p, const uint8_t *end)
>  {
>      const uint8_t *a = p + 4 - ((intptr_t)p & 3);
> @@ -318,15 +333,8 @@ static const AVRational avc_sample_aspect_ratio[17] = {
>      {   2,  1 },
>  };
>  
> -static inline int get_ue_golomb(GetBitContext *gb) {
> -    int i;
> -    for (i = 0; i < 32 && !get_bits1(gb); i++)
> -        ;
> -    return get_bitsz(gb, i) + (1 << i) - 1;
> -}
> -
>  static inline int get_se_golomb(GetBitContext *gb) {
> -    int v = get_ue_golomb(gb) + 1;
> +    unsigned v = get_ue_golomb(gb) + 1;
>      int sign = -(v & 1);
>      return ((v >> 1) ^ sign) - sign;
>  }

I think it's best if we use the lavc golomb code instead. When i
suggested to re implement it here i wasn't aware it was shared with lavf
within the golomb_tab.c source file.
Andreas Rheinhardt July 10, 2020, 3:31 p.m. UTC | #2
James Almer:
> On 7/9/2020 7:35 AM, Andreas Rheinhardt wrote:
>> The current code for parsing unsigned exponential-golomb codes is not
>> suitable for long codes: If there are enough leading zeroes, it
>> left-shifts 1 by 32 places and uses get_bitsz to read 32 bits, although
>> it is only suitable to read 0-25 bits. There is an av_assert2 to
>> ensure this when using the uncached bitstream reader; with valid input
>> one can still run into the assert and left-shift 1 by 31 which is
>> undefined.
>>
>> This commit changes this. All valid data is parsed correctly; invalid
>> data does no longer lead to undefined behaviour or to asserts. Parsing
>> all valid data correctly also necessitated changing the return value to
>> unsigned; also an intermediate variable used for parsing signed
>> exponential-golomb codes needed to be made unsigned, too, in order to
>> parse long signed codes correctly.
>>
>> Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@gmail.com>
>> ---
>> Moving the function to the beginning is done in preparation for another
>> commit that I'll send soon.
>>
>>  libavformat/avc.c | 24 ++++++++++++++++--------
>>  1 file changed, 16 insertions(+), 8 deletions(-)
>>
>> diff --git a/libavformat/avc.c b/libavformat/avc.c
>> index b5e2921388..55494eb08a 100644
>> --- a/libavformat/avc.c
>> +++ b/libavformat/avc.c
>> @@ -27,6 +27,21 @@
>>  #include "avc.h"
>>  #include "avio_internal.h"
>>  
>> +static inline unsigned get_ue_golomb(GetBitContext *gb)
>> +{
>> +    int i;
>> +    for (i = 1; i <= 32; i++) {
>> +        if (get_bits_left(gb) < i)
>> +            return 0;
>> +        if (show_bits1(gb))
>> +            break;
>> +        skip_bits1(gb);
>> +    }
>> +    if (i > 32)
>> +        return 0;
>> +    return get_bits_long(gb, i) - 1;
>> +}
>> +
>>  static const uint8_t *avc_find_startcode_internal(const uint8_t *p, const uint8_t *end)
>>  {
>>      const uint8_t *a = p + 4 - ((intptr_t)p & 3);
>> @@ -318,15 +333,8 @@ static const AVRational avc_sample_aspect_ratio[17] = {
>>      {   2,  1 },
>>  };
>>  
>> -static inline int get_ue_golomb(GetBitContext *gb) {
>> -    int i;
>> -    for (i = 0; i < 32 && !get_bits1(gb); i++)
>> -        ;
>> -    return get_bitsz(gb, i) + (1 << i) - 1;
>> -}
>> -
>>  static inline int get_se_golomb(GetBitContext *gb) {
>> -    int v = get_ue_golomb(gb) + 1;
>> +    unsigned v = get_ue_golomb(gb) + 1;
>>      int sign = -(v & 1);
>>      return ((v >> 1) ^ sign) - sign;
>>  }
> 
> I think it's best if we use the lavc golomb code instead. When i
> suggested to re implement it here i wasn't aware it was shared with lavf
> within the golomb_tab.c source file.

I actually checked these functions and none did what I wanted to do
(notice that this is only one of two patches for this function, the rest
is in 14/17):
1. None of these functions check explicitly for overreads/end of buffer.
They only do so implicitly if the safe bitstream reader is on.
2. All of these functions are declared as inline; yet I do not really
think that this is needed here given that parsing of golomb stuff
happens only very rarely here (only at the beginning of the muxing process).
3. get_ue_golomb_long does not allow to check for invalid values (and do
actually all arch-specific versions of av_log2 obey av_log2(0) == 0?).
4. get_ue_golomb_31 is wrongly named: It can actually only read golomb
codes that are at most 9 bits long, i.e. at most four leading zeroes and
followed by five value bits. And therefore it can be used for values
0..30, but not 31; in particular, it must not be used for the SPS id,
yet it is. (But there are some places where it is not used where it
could be.)
5. The offset_for_* values that need to be parsed when
pic_order_cnt_type == 1 have a value of -2^31 + 1..2^31 - 1; when one
uses the corresponding unsigned function to parse them, one needs to be
able to parse values in the range 0..2^32 - 2, i.e. not get_ue_golomb.
But there is no such function with proper error checking.

- Andreas
James Almer July 10, 2020, 3:58 p.m. UTC | #3
On 7/10/2020 12:31 PM, Andreas Rheinhardt wrote:
> James Almer:
>> On 7/9/2020 7:35 AM, Andreas Rheinhardt wrote:
>>> The current code for parsing unsigned exponential-golomb codes is not
>>> suitable for long codes: If there are enough leading zeroes, it
>>> left-shifts 1 by 32 places and uses get_bitsz to read 32 bits, although
>>> it is only suitable to read 0-25 bits. There is an av_assert2 to
>>> ensure this when using the uncached bitstream reader; with valid input
>>> one can still run into the assert and left-shift 1 by 31 which is
>>> undefined.
>>>
>>> This commit changes this. All valid data is parsed correctly; invalid
>>> data does no longer lead to undefined behaviour or to asserts. Parsing
>>> all valid data correctly also necessitated changing the return value to
>>> unsigned; also an intermediate variable used for parsing signed
>>> exponential-golomb codes needed to be made unsigned, too, in order to
>>> parse long signed codes correctly.
>>>
>>> Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@gmail.com>
>>> ---
>>> Moving the function to the beginning is done in preparation for another
>>> commit that I'll send soon.
>>>
>>>  libavformat/avc.c | 24 ++++++++++++++++--------
>>>  1 file changed, 16 insertions(+), 8 deletions(-)
>>>
>>> diff --git a/libavformat/avc.c b/libavformat/avc.c
>>> index b5e2921388..55494eb08a 100644
>>> --- a/libavformat/avc.c
>>> +++ b/libavformat/avc.c
>>> @@ -27,6 +27,21 @@
>>>  #include "avc.h"
>>>  #include "avio_internal.h"
>>>  
>>> +static inline unsigned get_ue_golomb(GetBitContext *gb)
>>> +{
>>> +    int i;
>>> +    for (i = 1; i <= 32; i++) {
>>> +        if (get_bits_left(gb) < i)
>>> +            return 0;
>>> +        if (show_bits1(gb))
>>> +            break;
>>> +        skip_bits1(gb);
>>> +    }
>>> +    if (i > 32)
>>> +        return 0;
>>> +    return get_bits_long(gb, i) - 1;
>>> +}
>>> +
>>>  static const uint8_t *avc_find_startcode_internal(const uint8_t *p, const uint8_t *end)
>>>  {
>>>      const uint8_t *a = p + 4 - ((intptr_t)p & 3);
>>> @@ -318,15 +333,8 @@ static const AVRational avc_sample_aspect_ratio[17] = {
>>>      {   2,  1 },
>>>  };
>>>  
>>> -static inline int get_ue_golomb(GetBitContext *gb) {
>>> -    int i;
>>> -    for (i = 0; i < 32 && !get_bits1(gb); i++)
>>> -        ;
>>> -    return get_bitsz(gb, i) + (1 << i) - 1;
>>> -}
>>> -
>>>  static inline int get_se_golomb(GetBitContext *gb) {
>>> -    int v = get_ue_golomb(gb) + 1;
>>> +    unsigned v = get_ue_golomb(gb) + 1;
>>>      int sign = -(v & 1);
>>>      return ((v >> 1) ^ sign) - sign;
>>>  }
>>
>> I think it's best if we use the lavc golomb code instead. When i
>> suggested to re implement it here i wasn't aware it was shared with lavf
>> within the golomb_tab.c source file.
> 
> I actually checked these functions and none did what I wanted to do
> (notice that this is only one of two patches for this function, the rest
> is in 14/17):

I am aware that this is the first patch of a set, and that it's not the
only one changing the lavf golomb functions. I'm saying that, since
after all we could have reused the lavc ones to being with, then ideally
we should.

But if you need them to be strict and robust, which as Lynne said would
slow down several hot loops in the decoder, then i guess this
duplication is preferable.

> 1. None of these functions check explicitly for overreads/end of buffer.
> They only do so implicitly if the safe bitstream reader is on.
> 2. All of these functions are declared as inline; yet I do not really
> think that this is needed here given that parsing of golomb stuff
> happens only very rarely here (only at the beginning of the muxing process).
> 3. get_ue_golomb_long does not allow to check for invalid values (and do
> actually all arch-specific versions of av_log2 obey av_log2(0) == 0?).
> 4. get_ue_golomb_31 is wrongly named: It can actually only read golomb
> codes that are at most 9 bits long, i.e. at most four leading zeroes and
> followed by five value bits. And therefore it can be used for values
> 0..30, but not 31; in particular, it must not be used for the SPS id,
> yet it is. (But there are some places where it is not used where it
> could be.)
> 5. The offset_for_* values that need to be parsed when
> pic_order_cnt_type == 1 have a value of -2^31 + 1..2^31 - 1; when one
> uses the corresponding unsigned function to parse them, one needs to be
> able to parse values in the range 0..2^32 - 2, i.e. not get_ue_golomb.
> But there is no such function with proper error checking.

Most of this (Like making get_ue_golomb_long() do error checking) could
be fixed in lavc if needed as well, but it probably isn't desirable
given the speed critical code that makes use of it (get_ue_golomb_31
could be renamed since it's just cosmetic, though).

> 
> - Andreas
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
> 
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
diff mbox series

Patch

diff --git a/libavformat/avc.c b/libavformat/avc.c
index b5e2921388..55494eb08a 100644
--- a/libavformat/avc.c
+++ b/libavformat/avc.c
@@ -27,6 +27,21 @@ 
 #include "avc.h"
 #include "avio_internal.h"
 
+static inline unsigned get_ue_golomb(GetBitContext *gb)
+{
+    int i;
+    for (i = 1; i <= 32; i++) {
+        if (get_bits_left(gb) < i)
+            return 0;
+        if (show_bits1(gb))
+            break;
+        skip_bits1(gb);
+    }
+    if (i > 32)
+        return 0;
+    return get_bits_long(gb, i) - 1;
+}
+
 static const uint8_t *avc_find_startcode_internal(const uint8_t *p, const uint8_t *end)
 {
     const uint8_t *a = p + 4 - ((intptr_t)p & 3);
@@ -318,15 +333,8 @@  static const AVRational avc_sample_aspect_ratio[17] = {
     {   2,  1 },
 };
 
-static inline int get_ue_golomb(GetBitContext *gb) {
-    int i;
-    for (i = 0; i < 32 && !get_bits1(gb); i++)
-        ;
-    return get_bitsz(gb, i) + (1 << i) - 1;
-}
-
 static inline int get_se_golomb(GetBitContext *gb) {
-    int v = get_ue_golomb(gb) + 1;
+    unsigned v = get_ue_golomb(gb) + 1;
     int sign = -(v & 1);
     return ((v >> 1) ^ sign) - sign;
 }