diff mbox

[FFmpeg-devel,v2,1/3] libaomenc: Add support for tiles

Message ID 20180917234746.27125-1-sw@jkqxz.net
State Accepted
Headers show

Commit Message

Mark Thompson Sept. 17, 2018, 11:47 p.m. UTC
Adds an option to specify the number of tile rows and columns, then uses
equal-sized tiles to fill the frame.
---
 libavcodec/libaomenc.c | 54 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 54 insertions(+)

Comments

James Almer Sept. 18, 2018, 12:12 a.m. UTC | #1
On 9/17/2018 8:47 PM, Mark Thompson wrote:
> Adds an option to specify the number of tile rows and columns, then uses
> equal-sized tiles to fill the frame.
> ---
>  libavcodec/libaomenc.c | 54 ++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 54 insertions(+)
> 
> diff --git a/libavcodec/libaomenc.c b/libavcodec/libaomenc.c
> index 6a79d9b873..3ccff0e0fb 100644
> --- a/libavcodec/libaomenc.c
> +++ b/libavcodec/libaomenc.c
> @@ -68,6 +68,7 @@ typedef struct AOMEncoderContext {
>      int static_thresh;
>      int drop_threshold;
>      int noise_sensitivity;
> +    int tile_cols, tile_rows;
>  } AOMContext;
>  
>  static const char *const ctlidstr[] = {
> @@ -79,6 +80,7 @@ static const char *const ctlidstr[] = {
>      [AV1E_SET_COLOR_PRIMARIES]  = "AV1E_SET_COLOR_PRIMARIES",
>      [AV1E_SET_MATRIX_COEFFICIENTS] = "AV1E_SET_MATRIX_COEFFICIENTS",
>      [AV1E_SET_TRANSFER_CHARACTERISTICS] = "AV1E_SET_TRANSFER_CHARACTERISTICS",
> +    [AV1E_SET_SUPERBLOCK_SIZE]  = "AV1E_SET_SUPERBLOCK_SIZE",
>  };
>  
>  static av_cold void log_encoder_error(AVCodecContext *avctx, const char *desc)
> @@ -143,6 +145,10 @@ static av_cold void dump_enc_cfg(AVCodecContext *avctx,
>             width, "kf_mode:",     cfg->kf_mode,
>             width, "kf_min_dist:", cfg->kf_min_dist,
>             width, "kf_max_dist:", cfg->kf_max_dist);
> +    av_log(avctx, level, "tile settings\n"
> +                         "  %*s%d\n  %*s%d\n",
> +           width, "tile_width_count:",  cfg->tile_width_count,
> +           width, "tile_height_count:", cfg->tile_height_count);
>      av_log(avctx, level, "\n");
>  }
>  
> @@ -294,6 +300,7 @@ static av_cold int aom_init(AVCodecContext *avctx,
>      int res;
>      aom_img_fmt_t img_fmt;
>      aom_codec_caps_t codec_caps = aom_codec_get_caps(iface);
> +    aom_superblock_size_t superblock_size = AOM_SUPERBLOCK_SIZE_DYNAMIC;
>  
>      av_log(avctx, AV_LOG_INFO, "%s\n", aom_codec_version_str());
>      av_log(avctx, AV_LOG_VERBOSE, "%s\n", aom_codec_build_config());
> @@ -431,6 +438,50 @@ static av_cold int aom_init(AVCodecContext *avctx,
>  
>      enccfg.g_error_resilient = ctx->error_resilient;
>  
> +    if (ctx->tile_cols && ctx->tile_rows) {
> +        int sb_size, sb_width, sb_height;
> +        int cols_per_tile, cols_step, rows_per_tile, rows_step, i;
> +
> +        // Default to 128x128 superblocks if they fit, otherwise use 64x64.
> +        if ((avctx->width  + 127) / 128 < ctx->tile_cols ||
> +            (avctx->height + 127) / 128 < ctx->tile_rows) {
> +            sb_size = 64;
> +            superblock_size = AOM_SUPERBLOCK_SIZE_64X64;
> +        } else {
> +            sb_size = 128;
> +            superblock_size = AOM_SUPERBLOCK_SIZE_128X128;
> +        }
> +
> +        if ((avctx->width  + sb_size - 1) / sb_size < ctx->tile_cols ||
> +            (avctx->height + sb_size - 1) / sb_size < ctx->tile_rows) {
> +            av_log(avctx, AV_LOG_ERROR, "Invalid tile sizing: frame not "
> +                   "large enough to fit specified tile arrangement.\n");
> +            return AVERROR(EINVAL);
> +        }
> +        if (ctx->tile_cols > MAX_TILE_WIDTHS ||
> +            ctx->tile_rows > MAX_TILE_HEIGHTS) {
> +            av_log(avctx, AV_LOG_ERROR, "Invalid tile sizing: at most %dx%d "
> +                   "tiles allowed.\n", MAX_TILE_WIDTHS, MAX_TILE_HEIGHTS);
> +            return AVERROR(EINVAL);
> +        }
> +
> +        enccfg.tile_width_count  = ctx->tile_cols;
> +        enccfg.tile_height_count = ctx->tile_rows;
> +
> +        sb_width  = (avctx->width  + sb_size - 1) / sb_size;
> +        sb_height = (avctx->height + sb_size - 1) / sb_size;
> +
> +        cols_per_tile = sb_width / ctx->tile_cols;
> +        cols_step     = sb_width % ctx->tile_cols;
> +        for (i = 0; i < ctx->tile_cols; i++)
> +            enccfg.tile_widths[i] = cols_per_tile + (i < cols_step);
> +
> +        rows_per_tile = sb_height / ctx->tile_rows;
> +        rows_step     = sb_height % ctx->tile_rows;
> +        for (i = 0; i < ctx->tile_rows; i++)
> +            enccfg.tile_heights[i] = rows_per_tile + (i < rows_step);
> +    }
> +
>      dump_enc_cfg(avctx, &enccfg);
>      /* Construct Encoder Context */
>      res = aom_codec_enc_init(&ctx->encoder, iface, &enccfg, flags);
> @@ -454,6 +505,8 @@ static av_cold int aom_init(AVCodecContext *avctx,
>      codecctl_int(avctx, AV1E_SET_TRANSFER_CHARACTERISTICS, avctx->color_trc);
>      set_color_range(avctx);
>  
> +    codecctl_int(avctx, AV1E_SET_SUPERBLOCK_SIZE, superblock_size);
> +
>      // provide dummy value to initialize wrapper, values will be updated each _encode()
>      aom_img_wrap(&ctx->rawimg, img_fmt, avctx->width, avctx->height, 1,
>                   (unsigned char*)1);
> @@ -742,6 +795,7 @@ static const AVOption options[] = {
>      { "static-thresh",    "A change threshold on blocks below which they will be skipped by the encoder", OFFSET(static_thresh), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
>      { "drop-threshold",   "Frame drop threshold", offsetof(AOMContext, drop_threshold), AV_OPT_TYPE_INT, {.i64 = 0 }, INT_MIN, INT_MAX, VE },
>      { "noise-sensitivity", "Noise sensitivity", OFFSET(noise_sensitivity), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, 4, VE},
> +    { "tiles",            "Tile rows x columns", OFFSET(tile_cols), AV_OPT_TYPE_IMAGE_SIZE, { .str = NULL }, 0, 0, VE },

Using separate tile-columns and tile-rows AV_OPT_TYPE_INT options would
be more consistent with the libvpx wrapper, which already has them
called like that and also shares a lot of other option names with the
libaom.

LGTM otherwise.

>      { NULL }
>  };
>  
>
Mark Thompson Sept. 18, 2018, 10:55 p.m. UTC | #2
On 18/09/18 01:12, James Almer wrote:
> On 9/17/2018 8:47 PM, Mark Thompson wrote:
>> Adds an option to specify the number of tile rows and columns, then uses
>> equal-sized tiles to fill the frame.
>> ---
>>  libavcodec/libaomenc.c | 54 ++++++++++++++++++++++++++++++++++++++++++
>>  1 file changed, 54 insertions(+)
>>
>> diff --git a/libavcodec/libaomenc.c b/libavcodec/libaomenc.c
>> index 6a79d9b873..3ccff0e0fb 100644
>> --- a/libavcodec/libaomenc.c
>> +++ b/libavcodec/libaomenc.c
>> ...
>> @@ -742,6 +795,7 @@ static const AVOption options[] = {
>>      { "static-thresh",    "A change threshold on blocks below which they will be skipped by the encoder", OFFSET(static_thresh), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
>>      { "drop-threshold",   "Frame drop threshold", offsetof(AOMContext, drop_threshold), AV_OPT_TYPE_INT, {.i64 = 0 }, INT_MIN, INT_MAX, VE },
>>      { "noise-sensitivity", "Noise sensitivity", OFFSET(noise_sensitivity), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, 4, VE},
>> +    { "tiles",            "Tile rows x columns", OFFSET(tile_cols), AV_OPT_TYPE_IMAGE_SIZE, { .str = NULL }, 0, 0, VE },
> 
> Using separate tile-columns and tile-rows AV_OPT_TYPE_INT options would
> be more consistent with the libvpx wrapper, which already has them
> called like that and also shares a lot of other option names with the
> libaom.

The options on libvpx-vp9 are actually log2 of the value, so "-tile-rows 3 -tile-columns 2" gives you 8x4 tiles.  (VP9 requires that the number of tiles in each dimension is a power of two, while AV1 lets you set arbitrary sizes.)

I don't really mind how this works - I just thought the IMAGE_SIZE method looked nicer.  What do you prefer?

- Mark
James Almer Sept. 18, 2018, 11:15 p.m. UTC | #3
On 9/18/2018 7:55 PM, Mark Thompson wrote:
> On 18/09/18 01:12, James Almer wrote:
>> On 9/17/2018 8:47 PM, Mark Thompson wrote:
>>> Adds an option to specify the number of tile rows and columns, then uses
>>> equal-sized tiles to fill the frame.
>>> ---
>>>  libavcodec/libaomenc.c | 54 ++++++++++++++++++++++++++++++++++++++++++
>>>  1 file changed, 54 insertions(+)
>>>
>>> diff --git a/libavcodec/libaomenc.c b/libavcodec/libaomenc.c
>>> index 6a79d9b873..3ccff0e0fb 100644
>>> --- a/libavcodec/libaomenc.c
>>> +++ b/libavcodec/libaomenc.c
>>> ...
>>> @@ -742,6 +795,7 @@ static const AVOption options[] = {
>>>      { "static-thresh",    "A change threshold on blocks below which they will be skipped by the encoder", OFFSET(static_thresh), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
>>>      { "drop-threshold",   "Frame drop threshold", offsetof(AOMContext, drop_threshold), AV_OPT_TYPE_INT, {.i64 = 0 }, INT_MIN, INT_MAX, VE },
>>>      { "noise-sensitivity", "Noise sensitivity", OFFSET(noise_sensitivity), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, 4, VE},
>>> +    { "tiles",            "Tile rows x columns", OFFSET(tile_cols), AV_OPT_TYPE_IMAGE_SIZE, { .str = NULL }, 0, 0, VE },
>>
>> Using separate tile-columns and tile-rows AV_OPT_TYPE_INT options would
>> be more consistent with the libvpx wrapper, which already has them
>> called like that and also shares a lot of other option names with the
>> libaom.
> 
> The options on libvpx-vp9 are actually log2 of the value, so "-tile-rows 3 -tile-columns 2" gives you 8x4 tiles.  (VP9 requires that the number of tiles in each dimension is a power of two, while AV1 lets you set arbitrary sizes.)
> 
> I don't really mind how this works - I just thought the IMAGE_SIZE method looked nicer.  What do you prefer?

I usually prefer consistency in options for similar modules, but the
equivalent of the VP9 options would be to set the AV1E_SET_TILE_* codec
control IDs instead of what you're doing here, so your IMAGE_SIZE method
is fine.

There's for that matter a conflicting patch called "lavc/libaomenc: Add
-tile-columns/-tile-rows" by Kagami Hiiragi that sets the aforementioned
codec control IDs with the same option names as the VP9 ones. Maybe both
patches can be applied, and have the encoder abort if they are used at
the same time? Otherwise apply yours alone since it allows arbitrary sizes.

> 
> - Mark
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
Mark Thompson Sept. 23, 2018, 10:55 p.m. UTC | #4
On 19/09/18 00:15, James Almer wrote:
> On 9/18/2018 7:55 PM, Mark Thompson wrote:
>> On 18/09/18 01:12, James Almer wrote:
>>> On 9/17/2018 8:47 PM, Mark Thompson wrote:
>>>> Adds an option to specify the number of tile rows and columns, then uses
>>>> equal-sized tiles to fill the frame.
>>>> ---
>>>>  libavcodec/libaomenc.c | 54 ++++++++++++++++++++++++++++++++++++++++++
>>>>  1 file changed, 54 insertions(+)
>>>>
>>>> diff --git a/libavcodec/libaomenc.c b/libavcodec/libaomenc.c
>>>> index 6a79d9b873..3ccff0e0fb 100644
>>>> --- a/libavcodec/libaomenc.c
>>>> +++ b/libavcodec/libaomenc.c
>>>> ...
>>>> @@ -742,6 +795,7 @@ static const AVOption options[] = {
>>>>      { "static-thresh",    "A change threshold on blocks below which they will be skipped by the encoder", OFFSET(static_thresh), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
>>>>      { "drop-threshold",   "Frame drop threshold", offsetof(AOMContext, drop_threshold), AV_OPT_TYPE_INT, {.i64 = 0 }, INT_MIN, INT_MAX, VE },
>>>>      { "noise-sensitivity", "Noise sensitivity", OFFSET(noise_sensitivity), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, 4, VE},
>>>> +    { "tiles",            "Tile rows x columns", OFFSET(tile_cols), AV_OPT_TYPE_IMAGE_SIZE, { .str = NULL }, 0, 0, VE },
>>>
>>> Using separate tile-columns and tile-rows AV_OPT_TYPE_INT options would
>>> be more consistent with the libvpx wrapper, which already has them
>>> called like that and also shares a lot of other option names with the
>>> libaom.
>>
>> The options on libvpx-vp9 are actually log2 of the value, so "-tile-rows 3 -tile-columns 2" gives you 8x4 tiles.  (VP9 requires that the number of tiles in each dimension is a power of two, while AV1 lets you set arbitrary sizes.)
>>
>> I don't really mind how this works - I just thought the IMAGE_SIZE method looked nicer.  What do you prefer?
> 
> I usually prefer consistency in options for similar modules, but the
> equivalent of the VP9 options would be to set the AV1E_SET_TILE_* codec
> control IDs instead of what you're doing here, so your IMAGE_SIZE method
> is fine.
> 
> There's for that matter a conflicting patch called "lavc/libaomenc: Add
> -tile-columns/-tile-rows" by Kagami Hiiragi that sets the aforementioned
> codec control IDs with the same option names as the VP9 ones. Maybe both
> patches can be applied, and have the encoder abort if they are used at
> the same time? Otherwise apply yours alone since it allows arbitrary sizes.

I think having both would make sense - the two different options are reflecting the different values of uniform_tile_spacing_flag.  The explicit sizing can get the same result with the flag off as it being on, but will require more bits in every frame header to do so.

On matching behaviour with libvpx, I don't think tile_cols/rows_log2 in VP9 and AV1 (with uniform_tile_spacing_flag = 1) actually do have the same effect - VP9 ensures that you get exactly 2^tile_cols_log2 tile columns, while for AV1 it's an upper bound and depends on the width.  For example, given a width of 576 (as 9 64x64 superblocks) and tile_cols_log2 = 2, VP9 looks like it gives you { 2, 2, 2, 3 } while AV1 will give you { 3, 3, 3 } as the tile column widths.  That probably wants to be mentioned in the documentation, and maybe merits the options not matching precisely.

So, I'll look into supporting both cases in some consistent way.

Thanks,

- Mark
diff mbox

Patch

diff --git a/libavcodec/libaomenc.c b/libavcodec/libaomenc.c
index 6a79d9b873..3ccff0e0fb 100644
--- a/libavcodec/libaomenc.c
+++ b/libavcodec/libaomenc.c
@@ -68,6 +68,7 @@  typedef struct AOMEncoderContext {
     int static_thresh;
     int drop_threshold;
     int noise_sensitivity;
+    int tile_cols, tile_rows;
 } AOMContext;
 
 static const char *const ctlidstr[] = {
@@ -79,6 +80,7 @@  static const char *const ctlidstr[] = {
     [AV1E_SET_COLOR_PRIMARIES]  = "AV1E_SET_COLOR_PRIMARIES",
     [AV1E_SET_MATRIX_COEFFICIENTS] = "AV1E_SET_MATRIX_COEFFICIENTS",
     [AV1E_SET_TRANSFER_CHARACTERISTICS] = "AV1E_SET_TRANSFER_CHARACTERISTICS",
+    [AV1E_SET_SUPERBLOCK_SIZE]  = "AV1E_SET_SUPERBLOCK_SIZE",
 };
 
 static av_cold void log_encoder_error(AVCodecContext *avctx, const char *desc)
@@ -143,6 +145,10 @@  static av_cold void dump_enc_cfg(AVCodecContext *avctx,
            width, "kf_mode:",     cfg->kf_mode,
            width, "kf_min_dist:", cfg->kf_min_dist,
            width, "kf_max_dist:", cfg->kf_max_dist);
+    av_log(avctx, level, "tile settings\n"
+                         "  %*s%d\n  %*s%d\n",
+           width, "tile_width_count:",  cfg->tile_width_count,
+           width, "tile_height_count:", cfg->tile_height_count);
     av_log(avctx, level, "\n");
 }
 
@@ -294,6 +300,7 @@  static av_cold int aom_init(AVCodecContext *avctx,
     int res;
     aom_img_fmt_t img_fmt;
     aom_codec_caps_t codec_caps = aom_codec_get_caps(iface);
+    aom_superblock_size_t superblock_size = AOM_SUPERBLOCK_SIZE_DYNAMIC;
 
     av_log(avctx, AV_LOG_INFO, "%s\n", aom_codec_version_str());
     av_log(avctx, AV_LOG_VERBOSE, "%s\n", aom_codec_build_config());
@@ -431,6 +438,50 @@  static av_cold int aom_init(AVCodecContext *avctx,
 
     enccfg.g_error_resilient = ctx->error_resilient;
 
+    if (ctx->tile_cols && ctx->tile_rows) {
+        int sb_size, sb_width, sb_height;
+        int cols_per_tile, cols_step, rows_per_tile, rows_step, i;
+
+        // Default to 128x128 superblocks if they fit, otherwise use 64x64.
+        if ((avctx->width  + 127) / 128 < ctx->tile_cols ||
+            (avctx->height + 127) / 128 < ctx->tile_rows) {
+            sb_size = 64;
+            superblock_size = AOM_SUPERBLOCK_SIZE_64X64;
+        } else {
+            sb_size = 128;
+            superblock_size = AOM_SUPERBLOCK_SIZE_128X128;
+        }
+
+        if ((avctx->width  + sb_size - 1) / sb_size < ctx->tile_cols ||
+            (avctx->height + sb_size - 1) / sb_size < ctx->tile_rows) {
+            av_log(avctx, AV_LOG_ERROR, "Invalid tile sizing: frame not "
+                   "large enough to fit specified tile arrangement.\n");
+            return AVERROR(EINVAL);
+        }
+        if (ctx->tile_cols > MAX_TILE_WIDTHS ||
+            ctx->tile_rows > MAX_TILE_HEIGHTS) {
+            av_log(avctx, AV_LOG_ERROR, "Invalid tile sizing: at most %dx%d "
+                   "tiles allowed.\n", MAX_TILE_WIDTHS, MAX_TILE_HEIGHTS);
+            return AVERROR(EINVAL);
+        }
+
+        enccfg.tile_width_count  = ctx->tile_cols;
+        enccfg.tile_height_count = ctx->tile_rows;
+
+        sb_width  = (avctx->width  + sb_size - 1) / sb_size;
+        sb_height = (avctx->height + sb_size - 1) / sb_size;
+
+        cols_per_tile = sb_width / ctx->tile_cols;
+        cols_step     = sb_width % ctx->tile_cols;
+        for (i = 0; i < ctx->tile_cols; i++)
+            enccfg.tile_widths[i] = cols_per_tile + (i < cols_step);
+
+        rows_per_tile = sb_height / ctx->tile_rows;
+        rows_step     = sb_height % ctx->tile_rows;
+        for (i = 0; i < ctx->tile_rows; i++)
+            enccfg.tile_heights[i] = rows_per_tile + (i < rows_step);
+    }
+
     dump_enc_cfg(avctx, &enccfg);
     /* Construct Encoder Context */
     res = aom_codec_enc_init(&ctx->encoder, iface, &enccfg, flags);
@@ -454,6 +505,8 @@  static av_cold int aom_init(AVCodecContext *avctx,
     codecctl_int(avctx, AV1E_SET_TRANSFER_CHARACTERISTICS, avctx->color_trc);
     set_color_range(avctx);
 
+    codecctl_int(avctx, AV1E_SET_SUPERBLOCK_SIZE, superblock_size);
+
     // provide dummy value to initialize wrapper, values will be updated each _encode()
     aom_img_wrap(&ctx->rawimg, img_fmt, avctx->width, avctx->height, 1,
                  (unsigned char*)1);
@@ -742,6 +795,7 @@  static const AVOption options[] = {
     { "static-thresh",    "A change threshold on blocks below which they will be skipped by the encoder", OFFSET(static_thresh), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
     { "drop-threshold",   "Frame drop threshold", offsetof(AOMContext, drop_threshold), AV_OPT_TYPE_INT, {.i64 = 0 }, INT_MIN, INT_MAX, VE },
     { "noise-sensitivity", "Noise sensitivity", OFFSET(noise_sensitivity), AV_OPT_TYPE_INT, {.i64 = 0 }, 0, 4, VE},
+    { "tiles",            "Tile rows x columns", OFFSET(tile_cols), AV_OPT_TYPE_IMAGE_SIZE, { .str = NULL }, 0, 0, VE },
     { NULL }
 };