diff mbox series

[FFmpeg-devel,28/42] lavc/hevc/ps: implement SPS parsing for nuh_layer_id>0

Message ID 20240827154041.13846-30-anton@khirnov.net
State New
Headers show
Series [FFmpeg-devel,01/42] lavu/opt: add API for setting array-type option values | expand

Commit Message

Anton Khirnov Aug. 27, 2024, 3:05 p.m. UTC
Cf. F.7.3.2.2 "Sequence parameter set RBSP syntax", which extends normal
SPS parsing with special clauses depending on MultiLayerExtSpsFlag.
---
 libavcodec/hevc/hevcdec.c |  2 +-
 libavcodec/hevc/parse.c   |  3 +-
 libavcodec/hevc/parser.c  |  2 +-
 libavcodec/hevc/ps.c      | 62 +++++++++++++++++++++++++++++++++++----
 libavcodec/hevc/ps.h      |  7 +++--
 libavcodec/qsvenc_hevc.c  |  2 +-
 6 files changed, 65 insertions(+), 13 deletions(-)

Comments

James Almer Aug. 30, 2024, 2:52 a.m. UTC | #1
On 8/27/2024 12:05 PM, Anton Khirnov wrote:
> Cf. F.7.3.2.2 "Sequence parameter set RBSP syntax", which extends normal
> SPS parsing with special clauses depending on MultiLayerExtSpsFlag.
> ---
>   libavcodec/hevc/hevcdec.c |  2 +-
>   libavcodec/hevc/parse.c   |  3 +-
>   libavcodec/hevc/parser.c  |  2 +-
>   libavcodec/hevc/ps.c      | 62 +++++++++++++++++++++++++++++++++++----
>   libavcodec/hevc/ps.h      |  7 +++--
>   libavcodec/qsvenc_hevc.c  |  2 +-
>   6 files changed, 65 insertions(+), 13 deletions(-)
> 
> diff --git a/libavcodec/hevc/hevcdec.c b/libavcodec/hevc/hevcdec.c
> index 6b596f1573..260b9abef0 100644
> --- a/libavcodec/hevc/hevcdec.c
> +++ b/libavcodec/hevc/hevcdec.c
> @@ -3256,7 +3256,7 @@ static int decode_nal_unit(HEVCContext *s, const H2645NAL *nal)
>           break;
>       case HEVC_NAL_SPS:
>           ret = ff_hevc_decode_nal_sps(&gb, s->avctx, &s->ps,
> -                                     s->apply_defdispwin);
> +                                     nal->nuh_layer_id, s->apply_defdispwin);
>           if (ret < 0)
>               goto fail;
>           break;
> diff --git a/libavcodec/hevc/parse.c b/libavcodec/hevc/parse.c
> index ec8d1aeacf..ad84b7b152 100644
> --- a/libavcodec/hevc/parse.c
> +++ b/libavcodec/hevc/parse.c
> @@ -49,7 +49,8 @@ static int hevc_decode_nal_units(const uint8_t *buf, int buf_size, HEVCParamSets
>                   goto done;
>               break;
>           case HEVC_NAL_SPS:
> -            ret = ff_hevc_decode_nal_sps(&nal->gb, logctx, ps, apply_defdispwin);
> +            ret = ff_hevc_decode_nal_sps(&nal->gb, logctx, ps,
> +                                         nal->nuh_layer_id, apply_defdispwin);
>               if (ret < 0)
>                   goto done;
>               break;
> diff --git a/libavcodec/hevc/parser.c b/libavcodec/hevc/parser.c
> index 8db56e259e..a10f38941b 100644
> --- a/libavcodec/hevc/parser.c
> +++ b/libavcodec/hevc/parser.c
> @@ -209,7 +209,7 @@ static int parse_nal_units(AVCodecParserContext *s, const uint8_t *buf,
>               ff_hevc_decode_nal_vps(gb, avctx, ps);
>               break;
>           case HEVC_NAL_SPS:
> -            ff_hevc_decode_nal_sps(gb, avctx, ps, 1);
> +            ff_hevc_decode_nal_sps(gb, avctx, ps, nal->nuh_layer_id, 1);
>               break;
>           case HEVC_NAL_PPS:
>               ff_hevc_decode_nal_pps(gb, avctx, ps);
> diff --git a/libavcodec/hevc/ps.c b/libavcodec/hevc/ps.c
> index 0e084958be..0b34dd10a8 100644
> --- a/libavcodec/hevc/ps.c
> +++ b/libavcodec/hevc/ps.c
> @@ -1145,12 +1145,12 @@ static int map_pixel_format(AVCodecContext *avctx, HEVCSPS *sps)
>   }
>   
>   int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id,
> -                      int apply_defdispwin, const HEVCVPS * const *vps_list,
> -                      AVCodecContext *avctx)
> +                      unsigned nuh_layer_id, int apply_defdispwin,
> +                      const HEVCVPS * const *vps_list, AVCodecContext *avctx)
>   {
>       HEVCWindow *ow;
>       int ret = 0;
> -    int bit_depth_chroma, start, num_comps;
> +    int bit_depth_chroma, num_comps, multi_layer_ext;
>       int i;
>   
>       // Coded parameters
> @@ -1167,16 +1167,26 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id,
>       }
>   
>       sps->max_sub_layers = get_bits(gb, 3) + 1;
> +    multi_layer_ext = nuh_layer_id > 0 &&
> +                      sps->max_sub_layers == HEVC_MAX_SUB_LAYERS + 1;
> +    if (multi_layer_ext) {
> +        if (!sps->vps)
> +            return AVERROR(EINVAL);
> +
> +        sps->max_sub_layers = sps->vps->vps_max_sub_layers;
> +    }
>       if (sps->max_sub_layers > HEVC_MAX_SUB_LAYERS) {

Not strictly related to this patch, but sps->max_sub_layers should 
always be <= vps->vps_max_sub_layers (see F.7.4.3.2.1). So the presence 
of vps should be checked for.

>           av_log(avctx, AV_LOG_ERROR, "sps_max_sub_layers out of range: %d\n",
>                  sps->max_sub_layers);
>           return AVERROR_INVALIDDATA;
>       }
>   
> +    if (!multi_layer_ext) {
>       sps->temporal_id_nesting = get_bits(gb, 1);

Similarly (not strictly related to this patch), this needs to be 1 if 
sps->max_sub_layers is 0 or if vps->vps_temporal_id_nesting_flag is 1.

>   
>       if ((ret = parse_ptl(gb, avctx, 1, &sps->ptl, sps->max_sub_layers)) < 0)
>           return ret;
> +    }

(Actually related this time) If multi_layer_ext is true, 
sps->temporal_id_nesting needs to be set to 
vps->vps_temporal_id_nesting_flag when sps->max_sub_layers > 1, or 
hardcoded to 1 if it's 1.

>   
>       *sps_id = get_ue_golomb_long(gb);
>       if (*sps_id >= HEVC_MAX_SPS_COUNT) {
> @@ -1184,6 +1194,28 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id,
>           return AVERROR_INVALIDDATA;
>       }
>   
> +    if (multi_layer_ext) {
> +        const RepFormat *rf = &sps->vps->rep_format;
> +
> +        if (get_bits1(gb) &&    // update_rep_format_flag
> +            get_bits(gb, 8)) {  // sps_rep_format_idx
> +            av_log(avctx, AV_LOG_ERROR, "sps_rep_format_idx!=0\n");
> +            return AVERROR_PATCHWELCOME;
> +        }
> +
> +        sps->separate_colour_plane = rf->separate_colour_plane_flag;
> +        sps->chroma_format_idc     = sps->separate_colour_plane ? 0 :
> +                                     rf->chroma_format_idc;
> +        sps->bit_depth             = rf->bit_depth_luma;
> +        sps->width                 = rf->pic_width_in_luma_samples;
> +        sps->height                = rf->pic_height_in_luma_samples;
> +
> +        sps->pic_conf_win.left_offset   = rf->conf_win_left_offset;
> +        sps->pic_conf_win.right_offset  = rf->conf_win_right_offset;
> +        sps->pic_conf_win.top_offset    = rf->conf_win_top_offset;
> +        sps->pic_conf_win.bottom_offset = rf->conf_win_bottom_offset;
> +
> +    } else {
>       sps->chroma_format_idc = get_ue_golomb_long(gb);
>       if (sps->chroma_format_idc > 3U) {
>           av_log(avctx, AV_LOG_ERROR, "chroma_format_idc %d is invalid\n", sps->chroma_format_idc);
> @@ -1225,7 +1257,6 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id,
>               sps->pic_conf_win.top_offset    =
>               sps->pic_conf_win.bottom_offset = 0;
>           }
> -        sps->output_window = sps->pic_conf_win;
>       }
>   
>       sps->bit_depth = get_ue_golomb_31(gb) + 8;
> @@ -1248,6 +1279,9 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id,
>           return AVERROR_INVALIDDATA;
>       }
>       sps->bit_depth_chroma = bit_depth_chroma;
> +    }
> +
> +    sps->output_window = sps->pic_conf_win;
>   
>       ret = map_pixel_format(avctx, sps);
>       if (ret < 0)
> @@ -1260,6 +1294,9 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id,
>           return AVERROR_INVALIDDATA;
>       }
>   
> +    if (!multi_layer_ext) {
> +    int start;
> +
>       sps->sublayer_ordering_info = get_bits1(gb);
>       start = sps->sublayer_ordering_info ? 0 : sps->max_sub_layers - 1;
>       for (i = start; i < sps->max_sub_layers; i++) {
> @@ -1289,6 +1326,13 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id,
>               sps->temporal_layer[i].max_latency_increase  = sps->temporal_layer[start].max_latency_increase;
>           }
>       }
> +    } else {
> +        for (int i = 0; i < sps->max_sub_layers; i++) {
> +            sps->temporal_layer[i].max_dec_pic_buffering = sps->vps->dpb_size.max_dec_pic_buffering;
> +            sps->temporal_layer[i].num_reorder_pics      = sps->vps->dpb_size.max_num_reorder_pics;
> +            sps->temporal_layer[i].max_latency_increase  = sps->vps->dpb_size.max_latency_increase;
> +        }
> +    }
>   
>       sps->log2_min_cb_size                       = get_ue_golomb_long(gb) + 3;
>       sps->log2_diff_max_min_coding_block_size    = get_ue_golomb_long(gb);
> @@ -1325,6 +1369,11 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id,
>       if (sps->scaling_list_enabled) {
>           set_default_scaling_list_data(&sps->scaling_list);
>   
> +        if (multi_layer_ext && get_bits1(gb)) { // sps_infer_scaling_list_flag
> +            av_log(avctx, AV_LOG_ERROR, "sps_infer_scaling_list_flag=1 not supported\n");
> +            return AVERROR_PATCHWELCOME;
> +        }
> +
>           if (get_bits1(gb)) {
>               ret = scaling_list_data(gb, avctx, &sps->scaling_list, sps);
>               if (ret < 0)
> @@ -1579,7 +1628,8 @@ static int compare_sps(const HEVCSPS *sps1, const HEVCSPS *sps2)
>   }
>   
>   int ff_hevc_decode_nal_sps(GetBitContext *gb, AVCodecContext *avctx,
> -                           HEVCParamSets *ps, int apply_defdispwin)
> +                           HEVCParamSets *ps, unsigned nuh_layer_id,
> +                           int apply_defdispwin)
>   {
>       HEVCSPS *sps = ff_refstruct_alloc_ext(sizeof(*sps), 0, NULL, hevc_sps_free);
>       unsigned int sps_id;
> @@ -1598,7 +1648,7 @@ int ff_hevc_decode_nal_sps(GetBitContext *gb, AVCodecContext *avctx,
>       }
>   
>       ret = ff_hevc_parse_sps(sps, gb, &sps_id,
> -                            apply_defdispwin,
> +                            nuh_layer_id, apply_defdispwin,
>                               ps->vps_list, avctx);
>       if (ret < 0)
>           goto err;
> diff --git a/libavcodec/hevc/ps.h b/libavcodec/hevc/ps.h
> index bd1acf12e6..6f5b1f8755 100644
> --- a/libavcodec/hevc/ps.h
> +++ b/libavcodec/hevc/ps.h
> @@ -521,13 +521,14 @@ typedef struct HEVCParamSets {
>    *                 to an existing VPS
>    */
>   int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id,
> -                      int apply_defdispwin, const HEVCVPS * const *vps_list,
> -                      AVCodecContext *avctx);
> +                      unsigned nuh_layer_id, int apply_defdispwin,
> +                      const HEVCVPS * const *vps_list, AVCodecContext *avctx);
>   
>   int ff_hevc_decode_nal_vps(GetBitContext *gb, AVCodecContext *avctx,
>                              HEVCParamSets *ps);
>   int ff_hevc_decode_nal_sps(GetBitContext *gb, AVCodecContext *avctx,
> -                           HEVCParamSets *ps, int apply_defdispwin);
> +                           HEVCParamSets *ps, unsigned nuh_layer_id,
> +                           int apply_defdispwin);
>   int ff_hevc_decode_nal_pps(GetBitContext *gb, AVCodecContext *avctx,
>                              HEVCParamSets *ps);
>   
> diff --git a/libavcodec/qsvenc_hevc.c b/libavcodec/qsvenc_hevc.c
> index e6c038e67d..1479d4cb8b 100644
> --- a/libavcodec/qsvenc_hevc.c
> +++ b/libavcodec/qsvenc_hevc.c
> @@ -99,7 +99,7 @@ static int generate_fake_vps(QSVEncContext *q, AVCodecContext *avctx)
>       }
>       get_bits(&gb, 9);
>   
> -    ret = ff_hevc_parse_sps(&sps, &gb, &sps_id, 0, NULL, avctx);
> +    ret = ff_hevc_parse_sps(&sps, &gb, &sps_id, 0, 0, NULL, avctx);
>       av_freep(&sps_rbsp.rbsp_buffer);
>       if (ret < 0) {
>           av_log(avctx, AV_LOG_ERROR, "Error parsing the SPS\n");
Anton Khirnov Sept. 3, 2024, 9:39 a.m. UTC | #2
Quoting James Almer (2024-08-30 04:52:39)
> On 8/27/2024 12:05 PM, Anton Khirnov wrote:
> > @@ -1167,16 +1167,26 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id,
> >       }
> >   
> >       sps->max_sub_layers = get_bits(gb, 3) + 1;
> > +    multi_layer_ext = nuh_layer_id > 0 &&
> > +                      sps->max_sub_layers == HEVC_MAX_SUB_LAYERS + 1;
> > +    if (multi_layer_ext) {
> > +        if (!sps->vps)
> > +            return AVERROR(EINVAL);
> > +
> > +        sps->max_sub_layers = sps->vps->vps_max_sub_layers;
> > +    }
> >       if (sps->max_sub_layers > HEVC_MAX_SUB_LAYERS) {
> 
> Not strictly related to this patch, but sps->max_sub_layers should 
> always be <= vps->vps_max_sub_layers (see F.7.4.3.2.1). So the presence 
> of vps should be checked for.
> 
> >           av_log(avctx, AV_LOG_ERROR, "sps_max_sub_layers out of range: %d\n",
> >                  sps->max_sub_layers);
> >           return AVERROR_INVALIDDATA;
> >       }
> >   
> > +    if (!multi_layer_ext) {
> >       sps->temporal_id_nesting = get_bits(gb, 1);
> 
> Similarly (not strictly related to this patch), this needs to be 1 if 
> sps->max_sub_layers is 0 or if vps->vps_temporal_id_nesting_flag is 1.
> 
> >   
> >       if ((ret = parse_ptl(gb, avctx, 1, &sps->ptl, sps->max_sub_layers)) < 0)
> >           return ret;
> > +    }
> 
> (Actually related this time) If multi_layer_ext is true, 
> sps->temporal_id_nesting needs to be set to 
> vps->vps_temporal_id_nesting_flag when sps->max_sub_layers > 1, or 
> hardcoded to 1 if it's 1.

Honestly this feels like pointless churn. We don't support any of this
temporal id stuff in the decoder, so why bother?
James Almer Sept. 3, 2024, 12:58 p.m. UTC | #3
On 9/3/2024 6:39 AM, Anton Khirnov wrote:
> Quoting James Almer (2024-08-30 04:52:39)
>> On 8/27/2024 12:05 PM, Anton Khirnov wrote:
>>> @@ -1167,16 +1167,26 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id,
>>>        }
>>>    
>>>        sps->max_sub_layers = get_bits(gb, 3) + 1;
>>> +    multi_layer_ext = nuh_layer_id > 0 &&
>>> +                      sps->max_sub_layers == HEVC_MAX_SUB_LAYERS + 1;
>>> +    if (multi_layer_ext) {
>>> +        if (!sps->vps)
>>> +            return AVERROR(EINVAL);
>>> +
>>> +        sps->max_sub_layers = sps->vps->vps_max_sub_layers;
>>> +    }
>>>        if (sps->max_sub_layers > HEVC_MAX_SUB_LAYERS) {
>>
>> Not strictly related to this patch, but sps->max_sub_layers should
>> always be <= vps->vps_max_sub_layers (see F.7.4.3.2.1). So the presence
>> of vps should be checked for.
>>
>>>            av_log(avctx, AV_LOG_ERROR, "sps_max_sub_layers out of range: %d\n",
>>>                   sps->max_sub_layers);
>>>            return AVERROR_INVALIDDATA;
>>>        }
>>>    
>>> +    if (!multi_layer_ext) {
>>>        sps->temporal_id_nesting = get_bits(gb, 1);
>>
>> Similarly (not strictly related to this patch), this needs to be 1 if
>> sps->max_sub_layers is 0 or if vps->vps_temporal_id_nesting_flag is 1.
>>
>>>    
>>>        if ((ret = parse_ptl(gb, avctx, 1, &sps->ptl, sps->max_sub_layers)) < 0)
>>>            return ret;
>>> +    }
>>
>> (Actually related this time) If multi_layer_ext is true,
>> sps->temporal_id_nesting needs to be set to
>> vps->vps_temporal_id_nesting_flag when sps->max_sub_layers > 1, or
>> hardcoded to 1 if it's 1.
> 
> Honestly this feels like pointless churn. We don't support any of this
> temporal id stuff in the decoder, so why bother?

I see it used in videotoolbox.c and vulkan_hevc.c, so imo better make 
sure it's set right, just in case.
diff mbox series

Patch

diff --git a/libavcodec/hevc/hevcdec.c b/libavcodec/hevc/hevcdec.c
index 6b596f1573..260b9abef0 100644
--- a/libavcodec/hevc/hevcdec.c
+++ b/libavcodec/hevc/hevcdec.c
@@ -3256,7 +3256,7 @@  static int decode_nal_unit(HEVCContext *s, const H2645NAL *nal)
         break;
     case HEVC_NAL_SPS:
         ret = ff_hevc_decode_nal_sps(&gb, s->avctx, &s->ps,
-                                     s->apply_defdispwin);
+                                     nal->nuh_layer_id, s->apply_defdispwin);
         if (ret < 0)
             goto fail;
         break;
diff --git a/libavcodec/hevc/parse.c b/libavcodec/hevc/parse.c
index ec8d1aeacf..ad84b7b152 100644
--- a/libavcodec/hevc/parse.c
+++ b/libavcodec/hevc/parse.c
@@ -49,7 +49,8 @@  static int hevc_decode_nal_units(const uint8_t *buf, int buf_size, HEVCParamSets
                 goto done;
             break;
         case HEVC_NAL_SPS:
-            ret = ff_hevc_decode_nal_sps(&nal->gb, logctx, ps, apply_defdispwin);
+            ret = ff_hevc_decode_nal_sps(&nal->gb, logctx, ps,
+                                         nal->nuh_layer_id, apply_defdispwin);
             if (ret < 0)
                 goto done;
             break;
diff --git a/libavcodec/hevc/parser.c b/libavcodec/hevc/parser.c
index 8db56e259e..a10f38941b 100644
--- a/libavcodec/hevc/parser.c
+++ b/libavcodec/hevc/parser.c
@@ -209,7 +209,7 @@  static int parse_nal_units(AVCodecParserContext *s, const uint8_t *buf,
             ff_hevc_decode_nal_vps(gb, avctx, ps);
             break;
         case HEVC_NAL_SPS:
-            ff_hevc_decode_nal_sps(gb, avctx, ps, 1);
+            ff_hevc_decode_nal_sps(gb, avctx, ps, nal->nuh_layer_id, 1);
             break;
         case HEVC_NAL_PPS:
             ff_hevc_decode_nal_pps(gb, avctx, ps);
diff --git a/libavcodec/hevc/ps.c b/libavcodec/hevc/ps.c
index 0e084958be..0b34dd10a8 100644
--- a/libavcodec/hevc/ps.c
+++ b/libavcodec/hevc/ps.c
@@ -1145,12 +1145,12 @@  static int map_pixel_format(AVCodecContext *avctx, HEVCSPS *sps)
 }
 
 int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id,
-                      int apply_defdispwin, const HEVCVPS * const *vps_list,
-                      AVCodecContext *avctx)
+                      unsigned nuh_layer_id, int apply_defdispwin,
+                      const HEVCVPS * const *vps_list, AVCodecContext *avctx)
 {
     HEVCWindow *ow;
     int ret = 0;
-    int bit_depth_chroma, start, num_comps;
+    int bit_depth_chroma, num_comps, multi_layer_ext;
     int i;
 
     // Coded parameters
@@ -1167,16 +1167,26 @@  int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id,
     }
 
     sps->max_sub_layers = get_bits(gb, 3) + 1;
+    multi_layer_ext = nuh_layer_id > 0 &&
+                      sps->max_sub_layers == HEVC_MAX_SUB_LAYERS + 1;
+    if (multi_layer_ext) {
+        if (!sps->vps)
+            return AVERROR(EINVAL);
+
+        sps->max_sub_layers = sps->vps->vps_max_sub_layers;
+    }
     if (sps->max_sub_layers > HEVC_MAX_SUB_LAYERS) {
         av_log(avctx, AV_LOG_ERROR, "sps_max_sub_layers out of range: %d\n",
                sps->max_sub_layers);
         return AVERROR_INVALIDDATA;
     }
 
+    if (!multi_layer_ext) {
     sps->temporal_id_nesting = get_bits(gb, 1);
 
     if ((ret = parse_ptl(gb, avctx, 1, &sps->ptl, sps->max_sub_layers)) < 0)
         return ret;
+    }
 
     *sps_id = get_ue_golomb_long(gb);
     if (*sps_id >= HEVC_MAX_SPS_COUNT) {
@@ -1184,6 +1194,28 @@  int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id,
         return AVERROR_INVALIDDATA;
     }
 
+    if (multi_layer_ext) {
+        const RepFormat *rf = &sps->vps->rep_format;
+
+        if (get_bits1(gb) &&    // update_rep_format_flag
+            get_bits(gb, 8)) {  // sps_rep_format_idx
+            av_log(avctx, AV_LOG_ERROR, "sps_rep_format_idx!=0\n");
+            return AVERROR_PATCHWELCOME;
+        }
+
+        sps->separate_colour_plane = rf->separate_colour_plane_flag;
+        sps->chroma_format_idc     = sps->separate_colour_plane ? 0 :
+                                     rf->chroma_format_idc;
+        sps->bit_depth             = rf->bit_depth_luma;
+        sps->width                 = rf->pic_width_in_luma_samples;
+        sps->height                = rf->pic_height_in_luma_samples;
+
+        sps->pic_conf_win.left_offset   = rf->conf_win_left_offset;
+        sps->pic_conf_win.right_offset  = rf->conf_win_right_offset;
+        sps->pic_conf_win.top_offset    = rf->conf_win_top_offset;
+        sps->pic_conf_win.bottom_offset = rf->conf_win_bottom_offset;
+
+    } else {
     sps->chroma_format_idc = get_ue_golomb_long(gb);
     if (sps->chroma_format_idc > 3U) {
         av_log(avctx, AV_LOG_ERROR, "chroma_format_idc %d is invalid\n", sps->chroma_format_idc);
@@ -1225,7 +1257,6 @@  int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id,
             sps->pic_conf_win.top_offset    =
             sps->pic_conf_win.bottom_offset = 0;
         }
-        sps->output_window = sps->pic_conf_win;
     }
 
     sps->bit_depth = get_ue_golomb_31(gb) + 8;
@@ -1248,6 +1279,9 @@  int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id,
         return AVERROR_INVALIDDATA;
     }
     sps->bit_depth_chroma = bit_depth_chroma;
+    }
+
+    sps->output_window = sps->pic_conf_win;
 
     ret = map_pixel_format(avctx, sps);
     if (ret < 0)
@@ -1260,6 +1294,9 @@  int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id,
         return AVERROR_INVALIDDATA;
     }
 
+    if (!multi_layer_ext) {
+    int start;
+
     sps->sublayer_ordering_info = get_bits1(gb);
     start = sps->sublayer_ordering_info ? 0 : sps->max_sub_layers - 1;
     for (i = start; i < sps->max_sub_layers; i++) {
@@ -1289,6 +1326,13 @@  int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id,
             sps->temporal_layer[i].max_latency_increase  = sps->temporal_layer[start].max_latency_increase;
         }
     }
+    } else {
+        for (int i = 0; i < sps->max_sub_layers; i++) {
+            sps->temporal_layer[i].max_dec_pic_buffering = sps->vps->dpb_size.max_dec_pic_buffering;
+            sps->temporal_layer[i].num_reorder_pics      = sps->vps->dpb_size.max_num_reorder_pics;
+            sps->temporal_layer[i].max_latency_increase  = sps->vps->dpb_size.max_latency_increase;
+        }
+    }
 
     sps->log2_min_cb_size                       = get_ue_golomb_long(gb) + 3;
     sps->log2_diff_max_min_coding_block_size    = get_ue_golomb_long(gb);
@@ -1325,6 +1369,11 @@  int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id,
     if (sps->scaling_list_enabled) {
         set_default_scaling_list_data(&sps->scaling_list);
 
+        if (multi_layer_ext && get_bits1(gb)) { // sps_infer_scaling_list_flag
+            av_log(avctx, AV_LOG_ERROR, "sps_infer_scaling_list_flag=1 not supported\n");
+            return AVERROR_PATCHWELCOME;
+        }
+
         if (get_bits1(gb)) {
             ret = scaling_list_data(gb, avctx, &sps->scaling_list, sps);
             if (ret < 0)
@@ -1579,7 +1628,8 @@  static int compare_sps(const HEVCSPS *sps1, const HEVCSPS *sps2)
 }
 
 int ff_hevc_decode_nal_sps(GetBitContext *gb, AVCodecContext *avctx,
-                           HEVCParamSets *ps, int apply_defdispwin)
+                           HEVCParamSets *ps, unsigned nuh_layer_id,
+                           int apply_defdispwin)
 {
     HEVCSPS *sps = ff_refstruct_alloc_ext(sizeof(*sps), 0, NULL, hevc_sps_free);
     unsigned int sps_id;
@@ -1598,7 +1648,7 @@  int ff_hevc_decode_nal_sps(GetBitContext *gb, AVCodecContext *avctx,
     }
 
     ret = ff_hevc_parse_sps(sps, gb, &sps_id,
-                            apply_defdispwin,
+                            nuh_layer_id, apply_defdispwin,
                             ps->vps_list, avctx);
     if (ret < 0)
         goto err;
diff --git a/libavcodec/hevc/ps.h b/libavcodec/hevc/ps.h
index bd1acf12e6..6f5b1f8755 100644
--- a/libavcodec/hevc/ps.h
+++ b/libavcodec/hevc/ps.h
@@ -521,13 +521,14 @@  typedef struct HEVCParamSets {
  *                 to an existing VPS
  */
 int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id,
-                      int apply_defdispwin, const HEVCVPS * const *vps_list,
-                      AVCodecContext *avctx);
+                      unsigned nuh_layer_id, int apply_defdispwin,
+                      const HEVCVPS * const *vps_list, AVCodecContext *avctx);
 
 int ff_hevc_decode_nal_vps(GetBitContext *gb, AVCodecContext *avctx,
                            HEVCParamSets *ps);
 int ff_hevc_decode_nal_sps(GetBitContext *gb, AVCodecContext *avctx,
-                           HEVCParamSets *ps, int apply_defdispwin);
+                           HEVCParamSets *ps, unsigned nuh_layer_id,
+                           int apply_defdispwin);
 int ff_hevc_decode_nal_pps(GetBitContext *gb, AVCodecContext *avctx,
                            HEVCParamSets *ps);
 
diff --git a/libavcodec/qsvenc_hevc.c b/libavcodec/qsvenc_hevc.c
index e6c038e67d..1479d4cb8b 100644
--- a/libavcodec/qsvenc_hevc.c
+++ b/libavcodec/qsvenc_hevc.c
@@ -99,7 +99,7 @@  static int generate_fake_vps(QSVEncContext *q, AVCodecContext *avctx)
     }
     get_bits(&gb, 9);
 
-    ret = ff_hevc_parse_sps(&sps, &gb, &sps_id, 0, NULL, avctx);
+    ret = ff_hevc_parse_sps(&sps, &gb, &sps_id, 0, 0, NULL, avctx);
     av_freep(&sps_rbsp.rbsp_buffer);
     if (ret < 0) {
         av_log(avctx, AV_LOG_ERROR, "Error parsing the SPS\n");