@@ -100,11 +100,14 @@ enum HEVCSliceType {
enum {
// 7.4.3.1: vps_max_layers_minus1 is in [0, 62].
- HEVC_MAX_LAYERS = 63,
+ HEVC_MAX_LAYERS = 63,
// 7.4.3.1: vps_max_sub_layers_minus1 is in [0, 6].
- HEVC_MAX_SUB_LAYERS = 7,
+ HEVC_MAX_SUB_LAYERS = 7,
// 7.4.3.1: vps_num_layer_sets_minus1 is in [0, 1023].
- HEVC_MAX_LAYER_SETS = 1024,
+ HEVC_MAX_LAYER_SETS = 1024,
+ // 7.4.3.1: vps_max_layer_id is in [0, 63].
+ HEVC_MAX_LAYER_ID = 63,
+ HEVC_MAX_NUH_LAYER_ID = 62,
// 7.4.2.1: vps_video_parameter_set_id is u(4).
HEVC_MAX_VPS_COUNT = 16,
@@ -311,11 +311,17 @@ static int decode_profile_tier_level(GetBitContext *gb, AVCodecContext *avctx,
}
static int parse_ptl(GetBitContext *gb, AVCodecContext *avctx,
- PTL *ptl, int max_num_sub_layers)
+ int profile_present, PTL *ptl, int max_num_sub_layers)
{
- int i;
- if (decode_profile_tier_level(gb, avctx, &ptl->general_ptl) < 0 ||
- get_bits_left(gb) < 8 + (8*2 * (max_num_sub_layers - 1 > 0))) {
+ int i, status = 0;
+
+ if (profile_present) {
+ status = decode_profile_tier_level(gb, avctx, &ptl->general_ptl);
+ } else {
+ memset(&ptl->general_ptl, 0, sizeof(ptl->general_ptl));
+ }
+
+ if (status < 0 || get_bits_left(gb) < 8 + (8*2 * (max_num_sub_layers - 1 > 0))) {
av_log(avctx, AV_LOG_ERROR, "PTL information too short\n");
return -1;
}
@@ -444,13 +450,273 @@ static void hevc_vps_free(FFRefStructOpaque opaque, void *obj)
av_freep(&vps->data);
}
+enum ScalabilityMask {
+ HEVC_SCALABILITY_DEPTH = 0,
+ HEVC_SCALABILITY_MULTIVIEW = 1,
+ HEVC_SCALABILITY_SPATIAL = 2,
+ HEVC_SCALABILITY_AUXILIARY = 3,
+ HEVC_SCALABILITY_MASK_MAX = 15,
+};
+
+enum DependencyType {
+ HEVC_DEP_TYPE_SAMPLE = 0,
+ HEVC_DEP_TYPE_MV = 1,
+ HEVC_DEP_TYPE_BOTH = 2,
+};
+
+static int decode_vps_ext(GetBitContext *gb, AVCodecContext *avctx, HEVCVPS *vps,
+ uint64_t layer1_id_included)
+{
+ PTL ptl_dummy;
+ uint8_t max_sub_layers[HEVC_MAX_LAYERS];
+
+ int splitting_flag, dimension_id_len, view_id_len, num_add_olss,
+ default_output_layer_idc, direct_dep_type_len, direct_dep_type,
+ sub_layers_max_present, sub_layer_flag_info_present_flag, nb_ptl;
+ unsigned non_vui_extension_length;
+
+ if (vps->vps_max_layers == 1 || vps->vps_num_layer_sets == 1) {
+ av_log(avctx, AV_LOG_VERBOSE, "Ignoring VPS extensions with a single layer\n");
+ return 0;
+ }
+
+ if (vps->vps_max_layers > 2) {
+ av_log(avctx, AV_LOG_ERROR,
+ "VPS has %d layers, only 2 layers are supported\n",
+ vps->vps_max_layers);
+ return AVERROR_PATCHWELCOME;
+ }
+ if (vps->vps_num_layer_sets > 2) {
+ av_log(avctx, AV_LOG_ERROR,
+ "VPS has %d layer sets, only 2 layer sets are supported\n",
+ vps->vps_num_layer_sets);
+ return AVERROR_PATCHWELCOME;
+ }
+
+ align_get_bits(gb);
+
+ /**
+ * For stereoscopic MV-HEVC, the following simplifying assumptions are made:
+ *
+ * - vps_max_layers = 2 (one base layer, one multiview layer)
+ * - vps_num_layer_sets = 2 (one output layer set for each view)
+ * - NumScalabilityTypes = 1 (only HEVC_SCALABILITY_MULTIVIEW)
+ * - direct_dependency_flag[1][0] = 1 (second layer depends on first)
+ * - num_add_olss = 0 (no extra output layer sets)
+ * - default_output_layer_idc = 0 (1:1 mapping between OLSs and layers)
+ * - layer_id_included_flag[1] = {1, 1} (consequence of layer dependencies)
+ * - vps_num_rep_formats_minus1 = 0 (all layers have the same size)
+ *
+ * Which results in the following derived variables:
+ * - ViewOrderIdx = {0, 1}
+ * - NumViews = 2
+ * - DependencyFlag[1][0] = 1
+ * - NumDirectRefLayers = {0, 1}
+ * - NumRefLayers = {0, 1}
+ * - NumPredictedLayers = {1, 0}
+ * - NumIndependentLayers = 1
+ * - NumLayersInTreePartition = {2}
+ * - NumLayerSets = 2
+ * - NumOutputLayerSets = 2
+ * - OlsIdxToLsIdx = {0, 1}
+ * - LayerIdxInVps = {0, 1}
+ * - NumLayersInIdList = {1, 2}
+ * - NumNecessaryLayers = {1, 2}
+ * - NecessaryLayerFlag = {{1, 0}, {1, 1}}
+ * - NumOutputLayersInOutputLayerSet = {1, 2}
+ * - OutputLayerFlag = {{1, 0}, {1, 1}}
+ */
+ vps->nb_layers = 2;
+
+ if (parse_ptl(gb, avctx, 0, &ptl_dummy, vps->vps_max_sub_layers) < 0)
+ return AVERROR_INVALIDDATA;
+
+ splitting_flag = get_bits1(gb);
+ for (int i = 0; i <= HEVC_SCALABILITY_MASK_MAX; i++) {
+ int scalability_mask_flag = get_bits1(gb);
+ if (scalability_mask_flag != (i == HEVC_SCALABILITY_MULTIVIEW)) {
+ av_log(avctx, AV_LOG_ERROR, "Scalability type %d not supported\n", i);
+ return AVERROR_PATCHWELCOME;
+ }
+ }
+
+ if (!splitting_flag)
+ dimension_id_len = get_bits(gb, 3) + 1;
+
+ if (get_bits1(gb)) { /* vps_nuh_layer_id_present_flag */
+ int layer_id_in_nuh = get_bits(gb, 6);
+ if (layer_id_in_nuh >= FF_ARRAY_ELEMS(vps->layer_idx)) {
+ av_log(avctx, AV_LOG_ERROR, "Invalid layer_id_in_nuh[1]: %d\n",
+ layer_id_in_nuh);
+ return AVERROR_INVALIDDATA;
+ }
+ vps->layer_idx[layer_id_in_nuh] = 1;
+ vps->layer_id_in_nuh[1] = layer_id_in_nuh;
+ } else {
+ vps->layer_idx[1] = 1;
+ vps->layer_id_in_nuh[1] = 1;
+ }
+
+ if (!splitting_flag) {
+ int view_idx = get_bits(gb, dimension_id_len);
+ if (view_idx != 1) {
+ av_log(avctx, AV_LOG_ERROR, "Unexpected ViewOrderIdx: %d\n", view_idx);
+ return AVERROR_PATCHWELCOME;
+ }
+ }
+
+ view_id_len = get_bits(gb, 4);
+ if (view_id_len)
+ for (int i = 0; i < 2 /* NumViews */; i++)
+ vps->view_id[i] = get_bits(gb, view_id_len);
+
+ if (!get_bits1(gb) /* direct_dependency_flag */) {
+ av_log(avctx, AV_LOG_WARNING, "Independent output layers not supported\n");
+ return AVERROR_PATCHWELCOME;
+ }
+ vps->num_direct_ref_layers[1] = 1;
+
+ sub_layers_max_present = get_bits1(gb); // vps_sub_layers_max_minus1_present_flag
+ for (int i = 0; i < vps->vps_max_layers; i++)
+ max_sub_layers[i] = sub_layers_max_present ? get_bits(gb, 3) + 1 :
+ vps->vps_max_sub_layers;
+
+ if (get_bits1(gb) /* max_tid_ref_present_flag */)
+ skip_bits(gb, 3); // max_tid_il_ref_pics_plus1
+
+ vps->default_ref_layers_active = get_bits1(gb);
+
+ nb_ptl = get_ue_golomb(gb) + 1;
+ /* idx [0] is signalled in base VPS, idx [1] is signalled at the
+ * start of VPS extension, indices 2+ are signalled here;
+ * we ignore all but the first one anyway */
+ for (int i = 2; i < nb_ptl; i++) {
+ int profile_present = get_bits1(gb);
+ if (parse_ptl(gb, avctx, profile_present, &ptl_dummy, vps->vps_max_sub_layers) < 0)
+ return AVERROR_INVALIDDATA;
+ }
+
+ num_add_olss = get_ue_golomb(gb);
+ if (num_add_olss != 0) {
+ /* Since we don't implement support for independent output layer sets
+ * and auxiliary layers, this should never nonzero */
+ av_log(avctx, AV_LOG_ERROR, "Unexpected num_add_olss: %d\n", num_add_olss);
+ return AVERROR_PATCHWELCOME;
+ }
+
+ default_output_layer_idc = get_bits(gb, 2);
+ if (default_output_layer_idc != 0) {
+ av_log(avctx, AV_LOG_WARNING, "Unsupported default_output_layer_idc: %d\n",
+ default_output_layer_idc);
+ return AVERROR_PATCHWELCOME;
+ }
+
+ /* Consequence of established layer dependencies */
+ if (layer1_id_included != ((1 << vps->layer_id_in_nuh[0]) |
+ (1 << vps->layer_id_in_nuh[1]))) {
+ av_log(avctx, AV_LOG_ERROR, "Dependent layer not included in layer ID?\n");
+ return AVERROR_PATCHWELCOME;
+ }
+
+ vps->num_output_layer_sets = 2;
+ vps->ols[1] = 3;
+
+ for (int j = 0; j < av_popcount64(vps->ols[1]); j++) {
+ int ptl_idx = get_bits(gb, av_ceil_log2(nb_ptl));
+ if (ptl_idx < 1 || ptl_idx >= nb_ptl) {
+ av_log(avctx, AV_LOG_ERROR, "Invalid PTL index: %d\n", ptl_idx);
+ return AVERROR_INVALIDDATA;
+ }
+ }
+
+ if (get_ue_golomb_31(gb) != 0 /* vps_num_rep_formats_minus1 */) {
+ av_log(avctx, AV_LOG_ERROR, "Unexpected extra rep formats\n");
+ return AVERROR_INVALIDDATA;
+ }
+
+ vps->rep_format.pic_width_in_luma_samples = get_bits(gb, 16);
+ vps->rep_format.pic_height_in_luma_samples = get_bits(gb, 16);
+
+ if (!get_bits1(gb) /* chroma_and_bit_depth_vps_present_flag */) {
+ av_log(avctx, AV_LOG_ERROR,
+ "chroma_and_bit_depth_vps_present_flag=0 in first rep_format\n");
+ return AVERROR_INVALIDDATA;
+ }
+ vps->rep_format.chroma_format_idc = get_bits(gb, 2);
+ if (vps->rep_format.chroma_format_idc == 3)
+ vps->rep_format.separate_colour_plane_flag = get_bits1(gb);
+ vps->rep_format.bit_depth_luma = get_bits(gb, 4) + 8;
+ vps->rep_format.bit_depth_chroma = get_bits(gb, 4) + 8;
+ if (vps->rep_format.bit_depth_luma > 16 ||
+ vps->rep_format.bit_depth_chroma > 16 ||
+ vps->rep_format.bit_depth_luma != vps->rep_format.bit_depth_chroma) {
+ av_log(avctx, AV_LOG_ERROR, "Unsupported bit depth: %"PRIu8" %"PRIu8"\n",
+ vps->rep_format.bit_depth_luma, vps->rep_format.bit_depth_chroma);
+ return AVERROR_PATCHWELCOME;
+ }
+
+ if (get_bits1(gb) /* conformance_window_vps_flag */) {
+ int vert_mult = hevc_sub_height_c[vps->rep_format.chroma_format_idc];
+ int horiz_mult = hevc_sub_width_c[vps->rep_format.chroma_format_idc];
+ vps->rep_format.conf_win_left_offset = get_ue_golomb(gb) * horiz_mult;
+ vps->rep_format.conf_win_right_offset = get_ue_golomb(gb) * horiz_mult;
+ vps->rep_format.conf_win_top_offset = get_ue_golomb(gb) * vert_mult;
+ vps->rep_format.conf_win_bottom_offset = get_ue_golomb(gb) * vert_mult;
+ }
+
+ vps->max_one_active_ref_layer = get_bits1(gb);
+ vps->poc_lsb_aligned = get_bits1(gb);
+
+ sub_layer_flag_info_present_flag = get_bits1(gb);
+ for (int j = 0; j < FFMAX(max_sub_layers[0], max_sub_layers[1]); j++) {
+ int sub_layer_dpb_info_present_flag = 1;
+ if (j > 0 && sub_layer_flag_info_present_flag)
+ sub_layer_dpb_info_present_flag = get_bits1(gb);
+ if (sub_layer_dpb_info_present_flag) {
+ for (int k = 0; k < av_popcount64(vps->ols[1]); k++)
+ vps->dpb_size.max_dec_pic_buffering = get_ue_golomb_long(gb) + 1;
+ vps->dpb_size.max_num_reorder_pics = get_ue_golomb_long(gb);
+ vps->dpb_size.max_latency_increase = get_ue_golomb_long(gb) - 1;
+ }
+ }
+
+ direct_dep_type_len = get_ue_golomb_31(gb) + 2;
+ if (direct_dep_type_len > 32) {
+ av_log(avctx, AV_LOG_ERROR, "Invalid direct_dep_type_len: %d\n",
+ direct_dep_type_len);
+ return AVERROR_INVALIDDATA;
+ }
+
+ skip_bits1(gb); /* direct_depenency_all_layers_flag */
+ direct_dep_type = get_bits_long(gb, direct_dep_type_len);
+ if (direct_dep_type > HEVC_DEP_TYPE_BOTH) {
+ av_log(avctx, AV_LOG_WARNING, "Unsupported direct_dep_type: %d\n",
+ direct_dep_type);
+ return AVERROR_PATCHWELCOME;
+ }
+
+ non_vui_extension_length = get_ue_golomb(gb);
+ if (non_vui_extension_length > 4096) {
+ av_log(avctx, AV_LOG_ERROR, "vps_non_vui_extension_length too large: %u\n",
+ non_vui_extension_length);
+ return AVERROR_INVALIDDATA;
+ }
+ skip_bits_long(gb, non_vui_extension_length * 8);
+
+ if (get_bits1(gb)) // vps_vui_present_flag
+ av_log(avctx, AV_LOG_WARNING, "VPS VUI not supported\n");
+
+ return 0;
+}
+
int ff_hevc_decode_nal_vps(GetBitContext *gb, AVCodecContext *avctx,
HEVCParamSets *ps)
{
- int i,j;
+ int i;
int vps_id = get_bits(gb, 4);
ptrdiff_t nal_size = gb->buffer_end - gb->buffer;
int ret = AVERROR_INVALIDDATA;
+ uint64_t layer1_id_included = 0;
HEVCVPS *vps;
if (ps->vps_list[vps_id]) {
@@ -494,7 +760,7 @@ int ff_hevc_decode_nal_vps(GetBitContext *gb, AVCodecContext *avctx,
goto err;
}
- if (parse_ptl(gb, avctx, &vps->ptl, vps->vps_max_sub_layers) < 0)
+ if (parse_ptl(gb, avctx, 1, &vps->ptl, vps->vps_max_sub_layers) < 0)
goto err;
vps->vps_sub_layer_ordering_info_present_flag = get_bits1(gb);
@@ -526,9 +792,14 @@ int ff_hevc_decode_nal_vps(GetBitContext *gb, AVCodecContext *avctx,
goto err;
}
- for (i = 1; i < vps->vps_num_layer_sets; i++)
- for (j = 0; j <= vps->vps_max_layer_id; j++)
- skip_bits(gb, 1); // layer_id_included_flag[i][j]
+ vps->num_output_layer_sets = 1;
+ vps->ols[0] = 1;
+
+ // we support at most 2 layers, so ignore the others
+ if (vps->vps_num_layer_sets > 1)
+ layer1_id_included = get_bits64(gb, vps->vps_max_layer_id + 1); // layer_id_included_flag
+ if (vps->vps_num_layer_sets > 2)
+ skip_bits_long(gb, (vps->vps_num_layer_sets - 2) * (vps->vps_max_layer_id + 1));
vps->vps_timing_info_present_flag = get_bits1(gb);
if (vps->vps_timing_info_present_flag) {
@@ -560,7 +831,21 @@ int ff_hevc_decode_nal_vps(GetBitContext *gb, AVCodecContext *avctx,
vps->vps_max_sub_layers);
}
}
- get_bits1(gb); /* vps_extension_flag */
+
+ vps->nb_layers = 1;
+ vps->layer_idx[0] = 0;
+ for (int i = 1; i < FF_ARRAY_ELEMS(vps->layer_idx); i++)
+ vps->layer_idx[i] = -1;
+
+ if (vps->vps_max_layers > 1 && get_bits1(gb)) { /* vps_extension_flag */
+ int ret = decode_vps_ext(gb, avctx, vps, layer1_id_included);
+ if (ret == AVERROR_PATCHWELCOME) {
+ vps->nb_layers = 1;
+ av_log(avctx, AV_LOG_WARNING, "Ignoring unsupported VPS extension\n");
+ ret = 0;
+ } else if (ret < 0)
+ goto err;
+ }
if (get_bits_left(gb) < 0) {
av_log(avctx, AV_LOG_ERROR,
@@ -893,7 +1178,7 @@ int ff_hevc_parse_sps(HEVCSPS *sps, GetBitContext *gb, unsigned int *sps_id,
sps->temporal_id_nesting = get_bits(gb, 1);
- if ((ret = parse_ptl(gb, avctx, &sps->ptl, sps->max_sub_layers)) < 0)
+ if ((ret = parse_ptl(gb, avctx, 1, &sps->ptl, sps->max_sub_layers)) < 0)
return ret;
*sps_id = get_ue_golomb_long(gb);
@@ -32,6 +32,8 @@
#include "hevc.h"
+#define HEVC_VPS_MAX_LAYERS 2
+
typedef struct HEVCSublayerHdrParams {
uint32_t bit_rate_value_minus1[HEVC_MAX_CPB_CNT];
uint32_t cpb_size_value_minus1[HEVC_MAX_CPB_CNT];
@@ -153,6 +155,19 @@ typedef struct PTL {
uint8_t sub_layer_level_present_flag[HEVC_MAX_SUB_LAYERS];
} PTL;
+typedef struct RepFormat {
+ uint16_t pic_width_in_luma_samples;
+ uint16_t pic_height_in_luma_samples;
+ uint8_t chroma_format_idc;
+ uint8_t separate_colour_plane_flag;
+ uint8_t bit_depth_luma; ///< bit_depth_vps_luma_minus8 + 8
+ uint8_t bit_depth_chroma; ///< bit_depth_vps_chroma_minus8 + 8
+ uint16_t conf_win_left_offset;
+ uint16_t conf_win_right_offset;
+ uint16_t conf_win_top_offset;
+ uint16_t conf_win_bottom_offset;
+} RepFormat;
+
typedef struct HEVCVPS {
unsigned int vps_id;
@@ -176,6 +191,53 @@ typedef struct HEVCVPS {
HEVCHdrParams *hdr;
+ /* VPS extension */
+
+ /* Number of layers this VPS was parsed for, between 1 and
+ * min(HEVC_VPS_MAX_LAYERS, vps_max_layers).
+ *
+ * Note that vps_max_layers contains the layer count declared in the
+ * bitstream, while nb_layers contains the number of layers exported to
+ * users of this API (which may be smaller as we only support a subset of
+ * multilayer extensions).
+ *
+ * Arrays below documented as [layer_idx] have nb_layers valid entries.
+ */
+ int nb_layers;
+
+ // LayerIdxInVps[nuh_layer_id], i.e. a mapping of nuh_layer_id to VPS layer
+ // indices. Valid values are between 0 and HEVC_VPS_MAX_LAYERS. Entries for
+ // unmapped values of nuh_layer_id are set to -1.
+ int8_t layer_idx[HEVC_MAX_NUH_LAYER_ID + 1];
+
+ uint8_t layer_id_in_nuh[HEVC_VPS_MAX_LAYERS];
+
+ uint8_t default_ref_layers_active;
+ uint8_t max_one_active_ref_layer;
+ uint8_t poc_lsb_aligned;
+ // bitmask of poc_lsb_not_present[layer_idx]
+ uint8_t poc_lsb_not_present;
+
+ struct {
+ unsigned max_dec_pic_buffering; // max_vps_dec_pic_buffering_minus1 + 1
+ unsigned max_num_reorder_pics; // max_vps_num_reorder_pics
+ unsigned max_latency_increase; // max_vps_latency_increase_plus1 - 1
+ } dpb_size;
+
+ // ViewId[layer_idx]
+ uint16_t view_id[HEVC_VPS_MAX_LAYERS];
+
+ // NumOutputLayerSets
+ uint8_t num_output_layer_sets;
+ // Bitmasks specifying output layer sets. i-th bit set means layer with VPS
+ // index i is present in the layer set.
+ uint64_t ols[HEVC_VPS_MAX_LAYERS];
+
+ // NumDirectRefLayers[layer_idx]
+ uint8_t num_direct_ref_layers[HEVC_VPS_MAX_LAYERS];
+
+ RepFormat rep_format;
+
uint8_t *data;
int data_size;
} HEVCVPS;