Message ID | f6da67bd-c092-ab7f-71be-c7ff66866542@jkqxz.net |
---|---|
State | Accepted |
Commit | c53f9f436440be4e180aa3895920ef21019c076f |
Headers | show |
Series | [FFmpeg-devel] cbs_h265: Ensure that a predicted RPS doesn't contain too many pictures | expand |
Context | Check | Description |
---|---|---|
andriy/default | pending | |
andriy/make | success | Make finished |
andriy/make_fate | success | Make fate finished |
On 5/3/2020 12:30 PM, Mark Thompson wrote: > If the RPS we are predicting from has maximum size then at least one of > the pictures in it must be discarded before adding the current one. > > Also revert 588114cea4ee434c9c61353ed91ffc817d2965f5, which added > now-redundant checks for the special case of a too-large RPS with all > pictures being in the same direction from the current one. You could also revert it in a separate commit after this patch using git revert, so this patch only contains the actual fix. > --- > It would be helpful to test this on the fuzzing samples from 20446/clusterfuzz-testcase-minimized-ffmpeg_BSF_HEVC_METADATA_fuzzer-5707770718584832 which prompted the original incomplete fix. Is there somewhere I can find them? Michael can send it to you. Poke him on IRC if he misses this patch. > > libavcodec/cbs_h265_syntax_template.c | 16 +++++++++++----- > 1 file changed, 11 insertions(+), 5 deletions(-) > > diff --git a/libavcodec/cbs_h265_syntax_template.c b/libavcodec/cbs_h265_syntax_template.c > index 5f5531944c..85ff5e5dc5 100644 > --- a/libavcodec/cbs_h265_syntax_template.c > +++ b/libavcodec/cbs_h265_syntax_template.c > @@ -522,7 +522,7 @@ static int FUNC(st_ref_pic_set)(CodedBitstreamContext *ctx, RWContext *rw, > infer(inter_ref_pic_set_prediction_flag, 0); > > if (current->inter_ref_pic_set_prediction_flag) { > - unsigned int ref_rps_idx, num_delta_pocs; > + unsigned int ref_rps_idx, num_delta_pocs, num_ref_pics; > const H265RawSTRefPicSet *ref; > int delta_rps, d_poc; > int ref_delta_poc_s0[HEVC_MAX_REFS], ref_delta_poc_s1[HEVC_MAX_REFS]; > @@ -538,18 +538,28 @@ static int FUNC(st_ref_pic_set)(CodedBitstreamContext *ctx, RWContext *rw, > ref_rps_idx = st_rps_idx - (current->delta_idx_minus1 + 1); > ref = &sps->st_ref_pic_set[ref_rps_idx]; > num_delta_pocs = ref->num_negative_pics + ref->num_positive_pics; > + av_assert0(num_delta_pocs < HEVC_MAX_DPB_SIZE); > > flag(delta_rps_sign); > ue(abs_delta_rps_minus1, 0, INT16_MAX); > delta_rps = (1 - 2 * current->delta_rps_sign) * > (current->abs_delta_rps_minus1 + 1); > > + num_ref_pics = 0; > for (j = 0; j <= num_delta_pocs; j++) { > flags(used_by_curr_pic_flag[j], 1, j); > if (!current->used_by_curr_pic_flag[j]) > flags(use_delta_flag[j], 1, j); > else > infer(use_delta_flag[j], 1); > + if (current->use_delta_flag[i]) > + ++num_ref_pics; > + } > + if (num_ref_pics >= HEVC_MAX_DPB_SIZE) { > + av_log(ctx->log_ctx, AV_LOG_ERROR, "Invalid stream: " > + "short-term ref pic set %d " > + "contains too many pictures.\n", st_rps_idx); > + return AVERROR_INVALIDDATA; > } > > // Since the stored form of an RPS here is actually the delta-step > @@ -601,8 +611,6 @@ static int FUNC(st_ref_pic_set)(CodedBitstreamContext *ctx, RWContext *rw, > } > } > > - if (i > 15) > - return AVERROR_INVALIDDATA; > infer(num_negative_pics, i); > for (i = 0; i < current->num_negative_pics; i++) { > infer(delta_poc_s0_minus1[i], > @@ -632,8 +640,6 @@ static int FUNC(st_ref_pic_set)(CodedBitstreamContext *ctx, RWContext *rw, > } > } > > - if (i + current->num_negative_pics > 15) > - return AVERROR_INVALIDDATA; > infer(num_positive_pics, i); > for (i = 0; i < current->num_positive_pics; i++) { > infer(delta_poc_s1_minus1[i], >
On Sun, May 03, 2020 at 04:30:00PM +0100, Mark Thompson wrote: > If the RPS we are predicting from has maximum size then at least one of > the pictures in it must be discarded before adding the current one. > > Also revert 588114cea4ee434c9c61353ed91ffc817d2965f5, which added > now-redundant checks for the special case of a too-large RPS with all > pictures being in the same direction from the current one. > --- > It would be helpful to test this on the fuzzing samples from 20446/clusterfuzz-testcase-minimized-ffmpeg_BSF_HEVC_METADATA_fuzzer-5707770718584832 which prompted the original incomplete fix. Is there somewhere I can find them? yes, the samples are automatically made public based on some rules like timelimits and if they are fixed, if they are reproduceable, ... this one should be public since 3 days and here: https://oss-fuzz.com/download?testcase_id=5707770718584832 also your patch shows no regression with it here thanks [...]
On 03/05/2020 19:14, Michael Niedermayer wrote: > On Sun, May 03, 2020 at 04:30:00PM +0100, Mark Thompson wrote: >> If the RPS we are predicting from has maximum size then at least one of >> the pictures in it must be discarded before adding the current one. >> >> Also revert 588114cea4ee434c9c61353ed91ffc817d2965f5, which added >> now-redundant checks for the special case of a too-large RPS with all >> pictures being in the same direction from the current one. >> --- > >> It would be helpful to test this on the fuzzing samples from 20446/clusterfuzz-testcase-minimized-ffmpeg_BSF_HEVC_METADATA_fuzzer-5707770718584832 which prompted the original incomplete fix. Is there somewhere I can find them? > > yes, the samples are automatically made public based on some rules > like timelimits and if they are fixed, if they are reproduceable, ... > this one should be public since 3 days and here: > https://oss-fuzz.com/download?testcase_id=5707770718584832 Ha, cute. A stream with enough RPS entries hits this by overwriting with all-ones in the SPS, because an all-ones RPS reads as inter predicted from the previous RPS with all pictures used. > also your patch shows no regression with it here Confirmed here as well. Will apply tomorrow if there are no other comments. Thanks, - Mark
On 03/05/2020 23:38, Mark Thompson wrote: > On 03/05/2020 19:14, Michael Niedermayer wrote: >> On Sun, May 03, 2020 at 04:30:00PM +0100, Mark Thompson wrote: >>> If the RPS we are predicting from has maximum size then at least one of >>> the pictures in it must be discarded before adding the current one. >>> >>> Also revert 588114cea4ee434c9c61353ed91ffc817d2965f5, which added >>> now-redundant checks for the special case of a too-large RPS with all >>> pictures being in the same direction from the current one. >>> --- >> >>> It would be helpful to test this on the fuzzing samples from 20446/clusterfuzz-testcase-minimized-ffmpeg_BSF_HEVC_METADATA_fuzzer-5707770718584832 which prompted the original incomplete fix. Is there somewhere I can find them? >> >> yes, the samples are automatically made public based on some rules >> like timelimits and if they are fixed, if they are reproduceable, ... >> this one should be public since 3 days and here: >> https://oss-fuzz.com/download?testcase_id=5707770718584832 > > Ha, cute. A stream with enough RPS entries hits this by overwriting with all-ones in the SPS, because an all-ones RPS reads as inter predicted from the previous RPS with all pictures used. > >> also your patch shows no regression with it here > > Confirmed here as well. > > Will apply tomorrow if there are no other comments. Tomorrow came slightly later than expected. Applied. Thanks, - Mark
diff --git a/libavcodec/cbs_h265_syntax_template.c b/libavcodec/cbs_h265_syntax_template.c index 5f5531944c..85ff5e5dc5 100644 --- a/libavcodec/cbs_h265_syntax_template.c +++ b/libavcodec/cbs_h265_syntax_template.c @@ -522,7 +522,7 @@ static int FUNC(st_ref_pic_set)(CodedBitstreamContext *ctx, RWContext *rw, infer(inter_ref_pic_set_prediction_flag, 0); if (current->inter_ref_pic_set_prediction_flag) { - unsigned int ref_rps_idx, num_delta_pocs; + unsigned int ref_rps_idx, num_delta_pocs, num_ref_pics; const H265RawSTRefPicSet *ref; int delta_rps, d_poc; int ref_delta_poc_s0[HEVC_MAX_REFS], ref_delta_poc_s1[HEVC_MAX_REFS]; @@ -538,18 +538,28 @@ static int FUNC(st_ref_pic_set)(CodedBitstreamContext *ctx, RWContext *rw, ref_rps_idx = st_rps_idx - (current->delta_idx_minus1 + 1); ref = &sps->st_ref_pic_set[ref_rps_idx]; num_delta_pocs = ref->num_negative_pics + ref->num_positive_pics; + av_assert0(num_delta_pocs < HEVC_MAX_DPB_SIZE); flag(delta_rps_sign); ue(abs_delta_rps_minus1, 0, INT16_MAX); delta_rps = (1 - 2 * current->delta_rps_sign) * (current->abs_delta_rps_minus1 + 1); + num_ref_pics = 0; for (j = 0; j <= num_delta_pocs; j++) { flags(used_by_curr_pic_flag[j], 1, j); if (!current->used_by_curr_pic_flag[j]) flags(use_delta_flag[j], 1, j); else infer(use_delta_flag[j], 1); + if (current->use_delta_flag[i]) + ++num_ref_pics; + } + if (num_ref_pics >= HEVC_MAX_DPB_SIZE) { + av_log(ctx->log_ctx, AV_LOG_ERROR, "Invalid stream: " + "short-term ref pic set %d " + "contains too many pictures.\n", st_rps_idx); + return AVERROR_INVALIDDATA; } // Since the stored form of an RPS here is actually the delta-step @@ -601,8 +611,6 @@ static int FUNC(st_ref_pic_set)(CodedBitstreamContext *ctx, RWContext *rw, } } - if (i > 15) - return AVERROR_INVALIDDATA; infer(num_negative_pics, i); for (i = 0; i < current->num_negative_pics; i++) { infer(delta_poc_s0_minus1[i], @@ -632,8 +640,6 @@ static int FUNC(st_ref_pic_set)(CodedBitstreamContext *ctx, RWContext *rw, } } - if (i + current->num_negative_pics > 15) - return AVERROR_INVALIDDATA; infer(num_positive_pics, i); for (i = 0; i < current->num_positive_pics; i++) { infer(delta_poc_s1_minus1[i],