@@ -163,88 +163,6 @@ static int vaapi_encode_wait(AVCodecContext *avctx,
return 0;
}
-static int vaapi_encode_make_row_slice(AVCodecContext *avctx,
- VAAPIEncodePicture *pic)
-{
- VAAPIEncodeContext *ctx = avctx->priv_data;
- VAAPIEncodeSlice *slice;
- int i, rounding;
-
- for (i = 0; i < pic->nb_slices; i++)
- pic->slices[i].row_size = ctx->slice_size;
-
- rounding = ctx->slice_block_rows - ctx->nb_slices * ctx->slice_size;
- if (rounding > 0) {
- // Place rounding error at top and bottom of frame.
- av_assert0(rounding < pic->nb_slices);
- // Some Intel drivers contain a bug where the encoder will fail
- // if the last slice is smaller than the one before it. Since
- // that's straightforward to avoid here, just do so.
- if (rounding <= 2) {
- for (i = 0; i < rounding; i++)
- ++pic->slices[i].row_size;
- } else {
- for (i = 0; i < (rounding + 1) / 2; i++)
- ++pic->slices[pic->nb_slices - i - 1].row_size;
- for (i = 0; i < rounding / 2; i++)
- ++pic->slices[i].row_size;
- }
- } else if (rounding < 0) {
- // Remove rounding error from last slice only.
- av_assert0(rounding < ctx->slice_size);
- pic->slices[pic->nb_slices - 1].row_size += rounding;
- }
-
- for (i = 0; i < pic->nb_slices; i++) {
- slice = &pic->slices[i];
- slice->index = i;
- if (i == 0) {
- slice->row_start = 0;
- slice->block_start = 0;
- } else {
- const VAAPIEncodeSlice *prev = &pic->slices[i - 1];
- slice->row_start = prev->row_start + prev->row_size;
- slice->block_start = prev->block_start + prev->block_size;
- }
- slice->block_size = slice->row_size * ctx->slice_block_cols;
-
- av_log(avctx, AV_LOG_DEBUG, "Slice %d: %d-%d (%d rows), "
- "%d-%d (%d blocks).\n", i, slice->row_start,
- slice->row_start + slice->row_size - 1, slice->row_size,
- slice->block_start, slice->block_start + slice->block_size - 1,
- slice->block_size);
- }
-
- return 0;
-}
-
-static int vaapi_encode_make_tile_slice(AVCodecContext *avctx,
- VAAPIEncodePicture *pic)
-{
- VAAPIEncodeContext *ctx = avctx->priv_data;
- VAAPIEncodeSlice *slice;
- int i, j, index;
-
- for (i = 0; i < ctx->tile_cols; i++) {
- for (j = 0; j < ctx->tile_rows; j++) {
- index = j * ctx->tile_cols + i;
- slice = &pic->slices[index];
- slice->index = index;
-
- pic->slices[index].block_start = ctx->col_bd[i] +
- ctx->row_bd[j] * ctx->slice_block_cols;
- pic->slices[index].block_size = ctx->row_height[j] * ctx->col_width[i];
-
- av_log(avctx, AV_LOG_DEBUG, "Slice %2d: (%2d, %2d) start at: %4d "
- "width:%2d height:%2d (%d blocks).\n", index, ctx->col_bd[i],
- ctx->row_bd[j], slice->block_start, ctx->col_width[i],
- ctx->row_height[j], slice->block_size);
- }
- }
-
- return 0;
-}
-
static int vaapi_encode_issue(AVCodecContext *avctx,
VAAPIEncodePicture *pic)
{
@@ -433,16 +351,17 @@ static int vaapi_encode_issue(AVCodecContext *avctx,
err = AVERROR(ENOMEM);
goto fail;
}
-
- if (ctx->tile_rows && ctx->tile_cols)
- vaapi_encode_make_tile_slice(avctx, pic);
- else
- vaapi_encode_make_row_slice(avctx, pic);
}
for (i = 0; i < pic->nb_slices; i++) {
slice = &pic->slices[i];
+ *slice = (VAAPIEncodeSlice) {
+ .index = i,
+ .block_start = ctx->slice_regions[i].block_start,
+ .block_size = ctx->slice_regions[i].block_size,
+ };
+
if (ctx->codec->slice_params_size > 0) {
slice->codec_slice_params = av_mallocz(ctx->codec->slice_params_size);
if (!slice->codec_slice_params) {
@@ -1884,108 +1803,252 @@ static av_cold int vaapi_encode_init_gop_structure(AVCodecContext *avctx)
return 0;
}
-static av_cold int vaapi_encode_init_row_slice_structure(AVCodecContext *avctx,
- uint32_t slice_structure)
+enum {
+ // Uniform: all regions approximately the same size, with the ends
+ // being slightly larger if necessary to accomodate rounding error.
+ REGION_SPACING_UNIFORM,
+ // Equal: all regions the same size except the last one, which can't be
+ // larger. If the target count is a power of two then this matches the
+ // behaviour of AV1 uniform_tile_spacing_flag (see AV1 5.9.15).
+ REGION_SPACING_EQUAL,
+ // Power of two: pick the largest power of two which will divide into
+ // the target count, then set all regions to that size except possibly
+ // the final one.
+ REGION_SPACING_EQUAL_POWER_OF_TWO,
+ // Single: every region has size one.
+ REGION_SPACING_SINGLE,
+ // H.265 with uniform_spacing_flag: see H.265 6.5.1. Similar to
+ // UNIFORM, but the rounding error is more randomly distributed rather
+ // than concentrated on the ends.
+ REGION_SPACING_UNIFORM_H265,
+};
+
+static int vaapi_encode_pick_spacing(int *sizes, int *actual_count,
+ int target_count, int max_count,
+ int total, int spacing_type)
+{
+ int i, count = target_count;
+ if (count > max_count)
+ count = max_count;
+ if (count > total)
+ count = total;
+
+ if (spacing_type == REGION_SPACING_EQUAL) {
+ int size, start;
+ size = (total + count - 1) / count;
+ i = 0;
+ for (start = 0; start < total - size; start += size) {
+ sizes[i] = size;
+ ++i;
+ }
+ sizes[i] = total - start;
+ count = i + 1;
+
+ } else if (spacing_type == REGION_SPACING_EQUAL_POWER_OF_TWO) {
+ int size;
+ if (count == 1) {
+ sizes[0] = total;
+ } else {
+ for (size = 1;; size *= 2) {
+ if (2 * size * (count - 1) + 1 >= total)
+ break;
+ }
+ count = (total + size - 1) / size;
+ for (i = 0; i < count - 1; i++)
+ sizes[i] = size;
+ sizes[i] = total - size * (count - 1);
+ }
+
+ } else if (spacing_type == REGION_SPACING_SINGLE) {
+ if (total > max_count)
+ return AVERROR(EINVAL);
+ count = total;
+ for (i = 0; i < count; i++)
+ sizes[i] = 1;
+
+ } else if (spacing_type == REGION_SPACING_UNIFORM_H265) {
+ for (i = 0; i < count; i++) {
+ sizes[i] = (i + 1) * total / count -
+ i * total / count;
+ }
+
+ } else {
+ int size, error;
+ size = total / count;
+ error = total - count * size;
+ for (i = 0; i < count; i++)
+ sizes[i] = size;
+ for (i = 0; i < (error + 1) / 2; i++)
+ ++sizes[count - i - 1];
+ for (i = 0; i < error / 2; i++)
+ ++sizes[i];
+ }
+
+ av_assert0(count <= max_count);
+ *actual_count = count;
+ return 0;
+}
+
+static av_cold int vaapi_encode_pick_tile_sizes(AVCodecContext *avctx,
+ int tile_spacing)
{
VAAPIEncodeContext *ctx = avctx->priv_data;
- int req_slices;
-
- // For fixed-size slices currently we only support whole rows, making
- // rectangular slices. This could be extended to arbitrary runs of
- // blocks, but since slices tend to be a conformance requirement and
- // most cases (such as broadcast or bluray) want rectangular slices
- // only it would need to be gated behind another option.
- if (avctx->slices > ctx->slice_block_rows) {
- av_log(avctx, AV_LOG_WARNING, "Not enough rows to use "
- "configured number of slices (%d < %d); using "
- "maximum.\n", ctx->slice_block_rows, avctx->slices);
- req_slices = ctx->slice_block_rows;
+ int count, err;
+
+ // Pick the horizontal block sizes of tiles.
+ if (ctx->tile_cols > 1) {
+ err = vaapi_encode_pick_spacing(ctx->tile_col_width, &count,
+ ctx->tile_cols, MAX_TILE_COLS,
+ ctx->slice_block_cols,
+ tile_spacing);
+ if (err < 0) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to satisfy tile column "
+ "constraints.\n");
+ return err;
+ }
+ if (count != ctx->tile_cols) {
+ av_log(avctx, AV_LOG_WARNING, "Tile column count modified to "
+ "%d (from %d) due to driver constraints on tile "
+ "structure.\n", count, ctx->tile_cols);
+ ctx->tile_cols = count;
+ }
} else {
- req_slices = avctx->slices;
+ ctx->tile_col_width[0] = ctx->slice_block_cols;
}
- if (slice_structure & VA_ENC_SLICE_STRUCTURE_ARBITRARY_ROWS ||
- slice_structure & VA_ENC_SLICE_STRUCTURE_ARBITRARY_MACROBLOCKS) {
- ctx->nb_slices = req_slices;
- ctx->slice_size = ctx->slice_block_rows / ctx->nb_slices;
- } else if (slice_structure & VA_ENC_SLICE_STRUCTURE_POWER_OF_TWO_ROWS) {
- int k;
- for (k = 1;; k *= 2) {
- if (2 * k * (req_slices - 1) + 1 >= ctx->slice_block_rows)
- break;
+
+ // Pick the vertical block sizes of tiles.
+ if (ctx->tile_rows > 1) {
+ err = vaapi_encode_pick_spacing(ctx->tile_row_height, &count,
+ ctx->tile_rows, MAX_TILE_ROWS,
+ ctx->slice_block_rows,
+ tile_spacing);
+ if (err < 0) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to satisfy tile row "
+ "constraints.\n");
+ return err;
+ }
+ if (count != ctx->tile_rows) {
+ av_log(avctx, AV_LOG_WARNING, "Tile row count modified to "
+ "%d (from %d) due to driver constraints on tile "
+ "structure.\n", count, ctx->tile_rows);
+ ctx->tile_rows = count;
}
- ctx->nb_slices = (ctx->slice_block_rows + k - 1) / k;
- ctx->slice_size = k;
-#if VA_CHECK_VERSION(1, 0, 0)
- } else if (slice_structure & VA_ENC_SLICE_STRUCTURE_EQUAL_ROWS) {
- ctx->nb_slices = ctx->slice_block_rows;
- ctx->slice_size = 1;
-#endif
} else {
- av_log(avctx, AV_LOG_ERROR, "Driver does not support any usable "
- "slice structure modes (%#x).\n", slice_structure);
- return AVERROR(EINVAL);
+ ctx->tile_row_height[0] = ctx->slice_block_rows;
}
+ ctx->nb_tiles = ctx->tile_cols * ctx->tile_rows;
return 0;
}
-static av_cold int vaapi_encode_init_tile_slice_structure(AVCodecContext *avctx,
- uint32_t slice_structure)
+static av_cold int vaapi_encode_pick_tile_slices(AVCodecContext *avctx)
{
VAAPIEncodeContext *ctx = avctx->priv_data;
- int i, req_tiles;
-
- if (!(slice_structure & VA_ENC_SLICE_STRUCTURE_ARBITRARY_MACROBLOCKS ||
- (slice_structure & VA_ENC_SLICE_STRUCTURE_ARBITRARY_ROWS &&
- ctx->tile_cols == 1))) {
- av_log(avctx, AV_LOG_ERROR, "Supported slice structure (%#x) doesn't work for "
- "current tile requirement.\n", slice_structure);
- return AVERROR(EINVAL);
+ // There is no driver constraint on how we do this distribution, so we
+ // try to make slice sizes as even as possible by placing slices in
+ // tiles in proportion to the number of blocks in those tiles. This is
+ // O(N^2), but N shouldn't be particularly large - if it is, then this
+ // could be improved by maintaining a priority queue of tiles in order
+ // of slices per block.
+ double spb, best_spb;
+ int i, slice, tile;
+
+ // All tiles start with one slice.
+ for (i = 0; i < ctx->nb_tiles; i++)
+ ctx->slices_per_tile[i] = 1;
+ // Repeatedly place the next slice in the tile with the lowest number
+ // of slices per block.
+ for (slice = ctx->nb_tiles; slice < ctx->nb_slices; slice++) {
+ tile = -1;
+ for (i = 0; i < ctx->nb_tiles; i++) {
+ spb = (double)ctx->slices_per_tile[i] /
+ (ctx->tile_col_width [i % ctx->tile_cols] *
+ ctx->tile_row_height[i / ctx->tile_cols]);
+ if (tile == -1 || spb < best_spb) {
+ tile = i;
+ best_spb = spb;
+ }
+ }
+ av_assert0(tile >= 0 && tile < ctx->nb_tiles);
+ ++ctx->slices_per_tile[tile];
}
+ return 0;
+}
- if (ctx->tile_rows > ctx->slice_block_rows ||
- ctx->tile_cols > ctx->slice_block_cols) {
- av_log(avctx, AV_LOG_WARNING, "Not enough block rows/cols (%d x %d) "
- "for configured number of tile (%d x %d); ",
- ctx->slice_block_rows, ctx->slice_block_cols,
- ctx->tile_rows, ctx->tile_cols);
- ctx->tile_rows = ctx->tile_rows > ctx->slice_block_rows ?
- ctx->slice_block_rows : ctx->tile_rows;
- ctx->tile_cols = ctx->tile_cols > ctx->slice_block_cols ?
- ctx->slice_block_cols : ctx->tile_cols;
- av_log(avctx, AV_LOG_WARNING, "using allowed maximum (%d x %d).\n",
- ctx->tile_rows, ctx->tile_cols);
+static av_cold int vaapi_encode_pick_slices(AVCodecContext *avctx,
+ int slice_spacing,
+ int max_slices)
+{
+ VAAPIEncodeContext *ctx = avctx->priv_data;
+ int tile_block_start, slice_block_start;
+ int slice, count, i, j, err;
+ int rows_per_slice[MAX_SLICES];
+
+ // If we didn't have any tile settings then this should be treated as
+ // a single tile filling the frame.
+ if (ctx->nb_tiles == 0) {
+ ctx->nb_tiles = 1;
+ ctx->tile_cols = 1;
+ ctx->tile_rows = 1;
+ ctx->tile_col_width[0] = ctx->slice_block_cols;
+ ctx->tile_row_height[0] = ctx->slice_block_rows;
+ ctx->slices_per_tile[0] = ctx->nb_slices;
+ }
+
+ // Iterate over tiles to pick the slice structure within each tile.
+ tile_block_start = 0;
+ slice = 0;
+ for (i = 0; i < ctx->nb_tiles; i++) {
+ int col = i % ctx->tile_cols;
+ int row = i / ctx->tile_cols;
+
+ // Pick slice rows within the tile.
+ err = vaapi_encode_pick_spacing(rows_per_slice, &count,
+ ctx->slices_per_tile[i],
+ max_slices - slice,
+ ctx->tile_row_height[row],
+ slice_spacing);
+ if (err < 0) {
+ av_log(avctx, AV_LOG_ERROR, "Unable to satisfy slice "
+ "constraints in tile (%d,%d).\n", row, col);
+ return err;
+ }
+ if (ctx->nb_tiles == 1 && count > 1) {
+ // Workaround: some Intel drivers contain a bug where the
+ // encoder will fail if the last slice is smaller than the
+ // one before it. Avoid ever hitting this by moving a small
+ // rounding error at the bottom of the frame to the top.
+ if (rows_per_slice[count - 1] > rows_per_slice[count - 2]) {
+ for (i = 0; i < count - 1; i++)
+ if (rows_per_slice[i] < rows_per_slice[count - 1])
+ break;
+ --rows_per_slice[count - 1];
+ ++rows_per_slice[i];
+ }
+ }
+ slice_block_start = tile_block_start;
+ for (j = 0; j < count; j++) {
+ ctx->slice_regions[slice++] = (VAAPIEncodeSliceRegion) {
+ .tile_col = col,
+ .tile_row = row,
+ .block_start = slice_block_start,
+ .block_size = rows_per_slice[j] * ctx->tile_col_width[col],
+ };
+ slice_block_start +=
+ rows_per_slice[j] * ctx->slice_block_cols;
+ }
+ if (col == ctx->tile_cols - 1)
+ tile_block_start +=
+ (ctx->tile_row_height[row] - 1) * ctx->slice_block_cols;
+ tile_block_start += ctx->tile_col_width[col];
}
- req_tiles = ctx->tile_rows * ctx->tile_cols;
-
- // Tile slice is not allowed to cross the boundary of a tile due to
- // the constraints of media-driver. Currently we support one slice
- // per tile. This could be extended to multiple slices per tile.
- if (avctx->slices != req_tiles)
- av_log(avctx, AV_LOG_WARNING, "The number of requested slices "
- "mismatches with configured number of tile (%d != %d); "
- "using requested tile number for slice.\n",
- avctx->slices, req_tiles);
-
- ctx->nb_slices = req_tiles;
-
- // Default in uniform spacing
- // 6-3, 6-5
- for (i = 0; i < ctx->tile_cols; i++) {
- ctx->col_width[i] = ( i + 1 ) * ctx->slice_block_cols / ctx->tile_cols -
- i * ctx->slice_block_cols / ctx->tile_cols;
- ctx->col_bd[i + 1] = ctx->col_bd[i] + ctx->col_width[i];
- }
- // 6-4, 6-6
- for (i = 0; i < ctx->tile_rows; i++) {
- ctx->row_height[i] = ( i + 1 ) * ctx->slice_block_rows / ctx->tile_rows -
- i * ctx->slice_block_rows / ctx->tile_rows;
- ctx->row_bd[i + 1] = ctx->row_bd[i] + ctx->row_height[i];
+ if (slice > ctx->nb_slices && avctx->slices > 0) {
+ av_log(avctx, AV_LOG_WARNING, "Slice count rounded up to "
+ "%d (from %d) due to driver constraints on slice "
+ "structure.\n", slice, avctx->slices);
}
-
- av_log(avctx, AV_LOG_VERBOSE, "Encoding pictures with %d x %d tile.\n",
- ctx->tile_rows, ctx->tile_cols);
+ ctx->nb_slices = slice;
return 0;
}
@@ -1993,33 +2056,64 @@ static av_cold int vaapi_encode_init_tile_slice_structure(AVCodecContext *avctx,
static av_cold int vaapi_encode_init_slice_structure(AVCodecContext *avctx)
{
VAAPIEncodeContext *ctx = avctx->priv_data;
- VAConfigAttrib attr[3] = { { VAConfigAttribEncMaxSlices },
- { VAConfigAttribEncSliceStructure },
+ VAConfigAttrib attr[] = {
+ { VAConfigAttribEncMaxSlices },
+ { VAConfigAttribEncSliceStructure },
#if VA_CHECK_VERSION(1, 1, 0)
- { VAConfigAttribEncTileSupport },
+ { VAConfigAttribEncTileSupport },
#endif
- };
+ };
VAStatus vas;
uint32_t max_slices, slice_structure;
- int ret;
+ int slice_spacing, tile_spacing;
+ int slices_wanted, tiles_wanted;
+ int err;
- if (!(ctx->codec->flags & FLAG_SLICE_CONTROL)) {
- if (avctx->slices > 0) {
- av_log(avctx, AV_LOG_WARNING, "Multiple slices were requested "
- "but this codec does not support controlling slices.\n");
- }
+ if (!(ctx->codec->flags & FLAG_SLICES)) {
+ // No slices at all in this codec, but we may still need to
+ // provide one slice parameter buffer.
+ ctx->nb_slices = 1;
return 0;
}
- ctx->slice_block_rows = (avctx->height + ctx->slice_block_height - 1) /
- ctx->slice_block_height;
ctx->slice_block_cols = (avctx->width + ctx->slice_block_width - 1) /
ctx->slice_block_width;
+ ctx->slice_block_rows = (avctx->height + ctx->slice_block_height - 1) /
+ ctx->slice_block_height;
- if (avctx->slices <= 1 && !ctx->tile_rows && !ctx->tile_cols) {
- ctx->nb_slices = 1;
- ctx->slice_size = ctx->slice_block_rows;
- return 0;
+ // Target values; these may be modified if we are unable to achieve
+ // them exactly. Since numbers of slices/tiles are generally a
+ // conformance requirement, we will try to avoid modifying them
+ // downwards unless higher values are not possible (and should always
+ // warn in such cases).
+ ctx->nb_slices = FFMAX(avctx->slices, 1);
+ ctx->tile_cols = FFMAX(ctx->tile_cols, 1);
+ ctx->tile_rows = FFMAX(ctx->tile_rows, 1);
+
+ if (ctx->codec->flags & FLAG_FIXED_SLICE_ROWS) {
+ ctx->nb_slices = ctx->slice_block_rows;
+ return vaapi_encode_pick_slices(avctx, REGION_SPACING_SINGLE,
+ ctx->nb_slices);
+ }
+
+ slices_wanted = ctx->nb_slices > 1;
+ tiles_wanted = ctx->tile_cols > 1 || ctx->tile_rows > 1;
+
+ if (!slices_wanted && !tiles_wanted) {
+ // No need to query anything because we are filling the frame
+ // with a single slice.
+ return vaapi_encode_pick_slices(avctx, REGION_SPACING_UNIFORM, 1);
+ }
+
+ if (slices_wanted && !(ctx->codec->flags & FLAG_SLICE_CONTROL)) {
+ av_log(avctx, AV_LOG_ERROR, "Multiple slices were requested, "
+ "but this codec does not support controlling slices.\n");
+ return AVERROR(EINVAL);
+ }
+ if (tiles_wanted && !(ctx->codec->flags & FLAG_TILE_CONTROL)) {
+ av_log(avctx, AV_LOG_ERROR, "Multiple tiles were requested "
+ "but this codec does not support controlling tiles.\n");
+ return AVERROR(EINVAL);
}
vas = vaGetConfigAttributes(ctx->hwctx->display,
@@ -2027,7 +2121,7 @@ static av_cold int vaapi_encode_init_slice_structure(AVCodecContext *avctx)
ctx->va_entrypoint,
attr, FF_ARRAY_ELEMS(attr));
if (vas != VA_STATUS_SUCCESS) {
- av_log(avctx, AV_LOG_ERROR, "Failed to query slice "
+ av_log(avctx, AV_LOG_ERROR, "Failed to query slice/tile "
"attributes: %d (%s).\n", vas, vaErrorStr(vas));
return AVERROR_EXTERNAL;
}
@@ -2039,8 +2133,33 @@ static av_cold int vaapi_encode_init_slice_structure(AVCodecContext *avctx)
"pictures as multiple slices.\n.");
return AVERROR(EINVAL);
}
+ if (max_slices > MAX_SLICES)
+ max_slices = MAX_SLICES;
- if (ctx->tile_rows && ctx->tile_cols) {
+ // For fixed-size slices currently we only support whole rows (making
+ // rectangular slices) so these two cases are equivalent. This could
+ // be extended to arbitrary runs of blocks, but since slices tend to
+ // be a conformance requirement and most cases (such as broadcast or
+ // bluray) want rectangular slices only it would need to be gated
+ // behind another option.
+ if (slice_structure & VA_ENC_SLICE_STRUCTURE_ARBITRARY_ROWS ||
+ slice_structure & VA_ENC_SLICE_STRUCTURE_ARBITRARY_MACROBLOCKS) {
+ slice_spacing = REGION_SPACING_UNIFORM;
+#if VA_CHECK_VERSION(1, 8, 0)
+ } else if (slice_structure & VA_ENC_SLICE_STRUCTURE_EQUAL_MULTI_ROWS) {
+ slice_spacing = REGION_SPACING_EQUAL;
+#endif
+ } else if (slice_structure & VA_ENC_SLICE_STRUCTURE_POWER_OF_TWO_ROWS) {
+ slice_spacing = REGION_SPACING_EQUAL_POWER_OF_TWO;
+ } else if (slice_structure & VA_ENC_SLICE_STRUCTURE_EQUAL_ROWS) {
+ slice_spacing = REGION_SPACING_SINGLE;
+ } else {
+ av_log(avctx, AV_LOG_ERROR, "Driver does not support any usable "
+ "slice structure modes (%#x).\n", slice_structure);
+ return AVERROR(EINVAL);
+ }
+
+ if (tiles_wanted) {
#if VA_CHECK_VERSION(1, 1, 0)
uint32_t tile_support = attr[2].value;
if (tile_support == VA_ATTRIB_NOT_SUPPORTED) {
@@ -2048,25 +2167,43 @@ static av_cold int vaapi_encode_init_slice_structure(AVCodecContext *avctx)
"pictures as multiple tiles.\n.");
return AVERROR(EINVAL);
}
+
+ tile_spacing = slice_spacing;
+ // If we are encoding H.265 then use the default uniform tiling there
+ // to save a bit of header space rather than making our own which would
+ // be very similar.
+ if (avctx->codec_id == AV_CODEC_ID_HEVC &&
+ tile_spacing == REGION_SPACING_UNIFORM)
+ tile_spacing = REGION_SPACING_UNIFORM_H265;
+
+ err = vaapi_encode_pick_tile_sizes(avctx, tile_spacing);
+ if (err < 0)
+ return err;
+
+ if (ctx->nb_slices < ctx->nb_tiles) {
+ if (avctx->slices > 0) {
+ // The user specified a number of slices, but we need more.
+ av_log(avctx, AV_LOG_WARNING, "Slice count rounded up to %d "
+ "because VAAPI requires at least one slice per tile.\n",
+ ctx->nb_tiles);
+ }
+ ctx->nb_slices = ctx->nb_tiles;
+ }
+
+ err = vaapi_encode_pick_tile_slices(avctx);
+ if (err < 0)
+ return 0;
#else
av_log(avctx, AV_LOG_ERROR, "Tile encoding option is "
- "not supported with this VAAPI version.\n");
+ "not supported with this VAAPI version.\n");
return AVERROR(EINVAL);
#endif
}
- if (ctx->tile_rows && ctx->tile_cols)
- ret = vaapi_encode_init_tile_slice_structure(avctx, slice_structure);
- else
- ret = vaapi_encode_init_row_slice_structure(avctx, slice_structure);
- if (ret < 0)
- return ret;
+ err = vaapi_encode_pick_slices(avctx, slice_spacing, max_slices);
+ if (err < 0)
+ return err;
- if (ctx->nb_slices > avctx->slices) {
- av_log(avctx, AV_LOG_WARNING, "Slice count rounded up to "
- "%d (from %d) due to driver constraints on slice "
- "structure.\n", ctx->nb_slices, avctx->slices);
- }
if (ctx->nb_slices > max_slices) {
av_log(avctx, AV_LOG_ERROR, "Driver does not support "
"encoding with %d slices (max %"PRIu32").\n",
@@ -2074,8 +2211,18 @@ static av_cold int vaapi_encode_init_slice_structure(AVCodecContext *avctx)
return AVERROR(EINVAL);
}
- av_log(avctx, AV_LOG_VERBOSE, "Encoding pictures with %d slices.\n",
- ctx->nb_slices);
+ av_log(avctx, AV_LOG_VERBOSE, "Encoding pictures with %d slices "
+ "in %d tiles.\n", ctx->nb_slices, ctx->nb_tiles);
+
+ for (int i = 0; i < ctx->nb_slices; i++) {
+ av_log(avctx, AV_LOG_DEBUG, "Slice %d in tile (%d,%d) "
+ "block start %d size %d.\n", i,
+ ctx->slice_regions[i].tile_col,
+ ctx->slice_regions[i].tile_row,
+ ctx->slice_regions[i].block_start,
+ ctx->slice_regions[i].block_size);
+ }
+
return 0;
}
@@ -43,10 +43,10 @@ enum {
MAX_PICTURE_REFERENCES = 2,
MAX_REORDER_DELAY = 16,
MAX_PARAM_BUFFER_SIZE = 1024,
- // A.4.1: table A.6 allows at most 22 tile rows for any level.
- MAX_TILE_ROWS = 22,
- // A.4.1: table A.6 allows at most 20 tile columns for any level.
+ MAX_SLICES = 1024,
+ // Tile constraints match H.265, increase if needed for other codecs.
MAX_TILE_COLS = 20,
+ MAX_TILE_ROWS = 22,
};
extern const AVCodecHWConfigInternal *ff_vaapi_encode_hw_configs[];
@@ -58,10 +58,15 @@ enum {
PICTURE_TYPE_B = 3,
};
+typedef struct VAAPIEncodeSliceRegion {
+ int tile_col;
+ int tile_row;
+ int block_start;
+ int block_size;
+} VAAPIEncodeSliceRegion;
+
typedef struct VAAPIEncodeSlice {
int index;
- int row_start;
- int row_size;
int block_start;
int block_size;
void *codec_slice_params;
@@ -299,23 +304,17 @@ typedef struct VAAPIEncodeContext {
int64_t dts_pts_diff;
int64_t ts_ring[MAX_REORDER_DELAY * 3];
- // Slice structure.
- int slice_block_rows;
- int slice_block_cols;
- int nb_slices;
- int slice_size;
-
- // Tile encoding.
- int tile_cols;
- int tile_rows;
- // Tile width of the i-th column.
- int col_width[MAX_TILE_COLS];
- // Tile height of i-th row.
- int row_height[MAX_TILE_ROWS];
- // Location of the i-th tile column boundary.
- int col_bd[MAX_TILE_COLS + 1];
- // Location of the i-th tile row boundary.
- int row_bd[MAX_TILE_ROWS + 1];
+ // Slice and tile structure.
+ int slice_block_cols;
+ int slice_block_rows;
+ int nb_slices;
+ int nb_tiles;
+ int tile_cols;
+ int tile_rows;
+ int tile_col_width[MAX_TILE_COLS];
+ int tile_row_height[MAX_TILE_ROWS];
+ int slices_per_tile[MAX_TILE_COLS * MAX_TILE_ROWS];
+ VAAPIEncodeSliceRegion slice_regions[MAX_SLICES];
// Frame type decision.
int gop_size;
@@ -348,8 +347,8 @@ typedef struct VAAPIEncodeContext {
} VAAPIEncodeContext;
enum {
- // Codec supports controlling the subdivision of pictures into slices.
- FLAG_SLICE_CONTROL = 1 << 0,
+ // Codec supports the subdivision of pictures into slices.
+ FLAG_SLICES = 1 << 0,
// Codec only supports constant quality (no rate control).
FLAG_CONSTANT_QUALITY_ONLY = 1 << 1,
// Codec is intra-only.
@@ -361,6 +360,12 @@ enum {
// Codec supports non-IDR key pictures (that is, key pictures do
// not necessarily empty the DPB).
FLAG_NON_IDR_KEY_PICTURES = 1 << 5,
+ // Codec requires fixed slice rows.
+ FLAG_FIXED_SLICE_ROWS = 1 << 6,
+ // Codec supports controlling the layout of slices.
+ FLAG_SLICE_CONTROL = 1 << 7,
+ // Codec supports controlling the layout of tiles.
+ FLAG_TILE_CONTROL = 1 << 8,
};
typedef struct VAAPIEncodeType {
@@ -1145,7 +1145,8 @@ static const VAAPIEncodeProfile vaapi_encode_h264_profiles[] = {
static const VAAPIEncodeType vaapi_encode_type_h264 = {
.profiles = vaapi_encode_h264_profiles,
- .flags = FLAG_SLICE_CONTROL |
+ .flags = FLAG_SLICES |
+ FLAG_SLICE_CONTROL |
FLAG_B_PICTURES |
FLAG_B_PICTURE_REFERENCES |
FLAG_NON_IDR_KEY_PICTURES,
@@ -555,7 +555,7 @@ static int vaapi_encode_h265_init_sequence_params(AVCodecContext *avctx)
pps->cu_qp_delta_enabled_flag = (ctx->va_rc_mode != VA_RC_CQP);
pps->diff_cu_qp_delta_depth = 0;
- if (ctx->tile_rows && ctx->tile_cols) {
+ if (ctx->tile_cols > 1 || ctx->tile_rows > 1) {
int uniform_spacing;
pps->tiles_enabled_flag = 1;
@@ -567,14 +567,14 @@ static int vaapi_encode_h265_init_sequence_params(AVCodecContext *avctx)
uniform_spacing = 1;
for (i = 0; i <= pps->num_tile_columns_minus1 &&
uniform_spacing; i++) {
- if (ctx->col_width[i] !=
+ if (ctx->tile_col_width[i] !=
(i + 1) * ctx->slice_block_cols / ctx->tile_cols -
i * ctx->slice_block_cols / ctx->tile_cols)
uniform_spacing = 0;
}
for (i = 0; i <= pps->num_tile_rows_minus1 &&
uniform_spacing; i++) {
- if (ctx->row_height[i] !=
+ if (ctx->tile_row_height[i] !=
(i + 1) * ctx->slice_block_rows / ctx->tile_rows -
i * ctx->slice_block_rows / ctx->tile_rows)
uniform_spacing = 0;
@@ -582,9 +582,9 @@ static int vaapi_encode_h265_init_sequence_params(AVCodecContext *avctx)
pps->uniform_spacing_flag = uniform_spacing;
for (i = 0; i <= pps->num_tile_columns_minus1; i++)
- pps->column_width_minus1[i] = ctx->col_width[i] - 1;
+ pps->column_width_minus1[i] = ctx->tile_col_width[i] - 1;
for (i = 0; i <= pps->num_tile_rows_minus1; i++)
- pps->row_height_minus1[i] = ctx->row_height[i] - 1;
+ pps->row_height_minus1[i] = ctx->tile_row_height[i] - 1;
pps->loop_filter_across_tiles_enabled_flag = 1;
}
@@ -1196,7 +1196,9 @@ static const VAAPIEncodeProfile vaapi_encode_h265_profiles[] = {
static const VAAPIEncodeType vaapi_encode_type_h265 = {
.profiles = vaapi_encode_h265_profiles,
- .flags = FLAG_SLICE_CONTROL |
+ .flags = FLAG_SLICES |
+ FLAG_SLICE_CONTROL |
+ FLAG_TILE_CONTROL |
FLAG_B_PICTURES |
FLAG_B_PICTURE_REFERENCES |
FLAG_NON_IDR_KEY_PICTURES,
@@ -545,12 +545,6 @@ static av_cold int vaapi_encode_mpeg2_configure(AVCodecContext *avctx)
priv->quant_b = 16;
}
- ctx->slice_block_rows = FFALIGN(avctx->height, 16) / 16;
- ctx->slice_block_cols = FFALIGN(avctx->width, 16) / 16;
-
- ctx->nb_slices = ctx->slice_block_rows;
- ctx->slice_size = 1;
-
ctx->roi_quant_range = 31;
return 0;
@@ -565,7 +559,9 @@ static const VAAPIEncodeProfile vaapi_encode_mpeg2_profiles[] = {
static const VAAPIEncodeType vaapi_encode_type_mpeg2 = {
.profiles = vaapi_encode_mpeg2_profiles,
- .flags = FLAG_B_PICTURES,
+ .flags = FLAG_SLICES |
+ FLAG_FIXED_SLICE_ROWS |
+ FLAG_B_PICTURES,
.configure = &vaapi_encode_mpeg2_configure,
@@ -625,6 +621,8 @@ static av_cold int vaapi_encode_mpeg2_init(AVCodecContext *avctx)
ctx->surface_width = FFALIGN(avctx->width, 16);
ctx->surface_height = FFALIGN(avctx->height, 16);
+ ctx->slice_block_height = ctx->slice_block_width = 16;
+
return ff_vaapi_encode_init(avctx);
}