[FFmpeg-devel,2/2] vp8: make mv_min/max thread-local if using partition threading.

Message ID	1491423604-66388-2-git-send-email-rsbultje@gmail.com
State	Accepted
Commit	fed92adbb3fc6cbf735e3df9a2f7d0a2917fcfbd
Headers	show Delivered-To: ffmpegpatchwork@gmail.com Received-SPF: pass (google.com: domain of ffmpeg-devel-bounces@ffmpeg.org designates 79.124.17.100 as permitted sender) client-ip=79.124.17.100; From: "Ronald S. Bultje" <rsbultje@gmail.com> To: ffmpeg-devel@ffmpeg.org Date: Wed, 5 Apr 2017 16:20:04 -0400 Message-Id: <1491423604-66388-2-git-send-email-rsbultje@gmail.com> In-Reply-To: <1491423604-66388-1-git-send-email-rsbultje@gmail.com> References: <1491423604-66388-1-git-send-email-rsbultje@gmail.com> Subject: [FFmpeg-devel] [PATCH 2/2] vp8: make mv_min/max thread-local if using partition threading. Precedence: list Reply-To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org> Cc: "Ronald S. Bultje" <rsbultje@gmail.com> MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: base64 Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" <ffmpeg-devel-bounces@ffmpeg.org>

diff --git a/libavcodec/vp8.c b/libavcodec/vp8.c index 9bc1d95..fe7aa23 100644 --- a/libavcodec/vp8.c +++ b/libavcodec/vp8.c @@ -772,7 +772,7 @@ static int vp8_decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_si } static av_always_inline -void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src) +void clamp_mv(VP8mvbounds *s, VP56mv *dst, const VP56mv *src) { dst->x = av_clip(src->x, av_clip(s->mv_min.x, INT16_MIN, INT16_MAX), av_clip(s->mv_max.x, INT16_MIN, INT16_MAX)); @@ -1031,7 +1031,7 @@ void vp7_decode_mvs(VP8Context *s, VP8Macroblock *mb, } static av_always_inline -void vp8_decode_mvs(VP8Context *s, VP8Macroblock *mb, +void vp8_decode_mvs(VP8Context *s, VP8mvbounds *mv_bounds, VP8Macroblock *mb, int mb_x, int mb_y, int layout) { VP8Macroblock *mb_edge[3] = { 0 /* top */, @@ -1102,7 +1102,7 @@ void vp8_decode_mvs(VP8Context *s, VP8Macroblock *mb, if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) { if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) { /* Choose the best mv out of 0,0 and the nearest mv */ - clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]); + clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]); cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) + (mb_edge[VP8_EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 + (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT); @@ -1116,11 +1116,11 @@ void vp8_decode_mvs(VP8Context *s, VP8Macroblock *mb, mb->bmv[0] = mb->mv; } } else { - clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]); + clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_NEAR]); mb->bmv[0] = mb->mv; } } else { - clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]); + clamp_mv(mv_bounds, &mb->mv, &near_mv[CNT_NEAREST]); mb->bmv[0] = mb->mv; } } else { @@ -1166,7 +1166,8 @@ void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb, } static av_always_inline -void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, +void decode_mb_mode(VP8Context *s, VP8mvbounds *mv_bounds, + VP8Macroblock *mb, int mb_x, int mb_y, uint8_t *segment, uint8_t *ref, int layout, int is_vp7) { VP56RangeCoder *c = &s->c; @@ -1230,7 +1231,7 @@ void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, if (is_vp7) vp7_decode_mvs(s, mb, mb_x, mb_y, layout); else - vp8_decode_mvs(s, mb, mb_x, mb_y, layout); + vp8_decode_mvs(s, mv_bounds, mb, mb_x, mb_y, layout); } else { // intra MB, 16.1 mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16); @@ -2205,8 +2206,8 @@ void vp78_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe, VP8Context *s = avctx->priv_data; int mb_x, mb_y; - s->mv_min.y = -MARGIN; - s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN; + s->mv_bounds.mv_min.y = -MARGIN; + s->mv_bounds.mv_max.y = ((s->mb_height - 1) << 6) + MARGIN; for (mb_y = 0; mb_y < s->mb_height; mb_y++) { VP8Macroblock *mb = s->macroblocks_base + ((s->mb_width + 1) * (mb_y + 1) + 1); @@ -2214,20 +2215,20 @@ void vp78_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe, AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED * 0x01010101); - s->mv_min.x = -MARGIN; - s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN; + s->mv_bounds.mv_min.x = -MARGIN; + s->mv_bounds.mv_max.x = ((s->mb_width - 1) << 6) + MARGIN; for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) { if (mb_y == 0) AV_WN32A((mb - s->mb_width - 1)->intra4x4_pred_mode_top, DC_PRED * 0x01010101); - decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy, + decode_mb_mode(s, &s->mv_bounds, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy, prev_frame && prev_frame->seg_map ? prev_frame->seg_map->data + mb_xy : NULL, 1, is_vp7); - s->mv_min.x -= 64; - s->mv_max.x -= 64; + s->mv_bounds.mv_min.x -= 64; + s->mv_bounds.mv_max.x -= 64; } - s->mv_min.y -= 64; - s->mv_max.y -= 64; + s->mv_bounds.mv_min.y -= 64; + s->mv_bounds.mv_max.y -= 64; } } @@ -2325,8 +2326,8 @@ static av_always_inline int decode_mb_row_no_filter(AVCodecContext *avctx, void if (!is_vp7 || mb_y == 0) memset(td->left_nnz, 0, sizeof(td->left_nnz)); - s->mv_min.x = -MARGIN; - s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN; + td->mv_bounds.mv_min.x = -MARGIN; + td->mv_bounds.mv_max.x = ((s->mb_width - 1) << 6) + MARGIN; for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) { if (c->end <= c->buffer && c->bits >= 0) @@ -2350,7 +2351,7 @@ static av_always_inline int decode_mb_row_no_filter(AVCodecContext *avctx, void dst[2] - dst[1], 2); if (!s->mb_layout) - decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy, + decode_mb_mode(s, &td->mv_bounds, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy, prev_frame && prev_frame->seg_map ? prev_frame->seg_map->data + mb_xy : NULL, 0, is_vp7); @@ -2397,8 +2398,8 @@ static av_always_inline int decode_mb_row_no_filter(AVCodecContext *avctx, void dst[0] += 16; dst[1] += 8; dst[2] += 8; - s->mv_min.x -= 64; - s->mv_max.x -= 64; + td->mv_bounds.mv_min.x -= 64; + td->mv_bounds.mv_max.x -= 64; if (mb_x == s->mb_width + 1) { update_pos(td, mb_y, s->mb_width + 3); @@ -2504,6 +2505,8 @@ int vp78_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata, int jobnr, int ret; td->thread_nr = threadnr; + td->mv_bounds.mv_min.y = -MARGIN - 64 * threadnr; + td->mv_bounds.mv_max.y = ((s->mb_height - 1) << 6) + MARGIN - 64 * threadnr; for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) { atomic_store(&td->thread_mb_pos, mb_y << 16); ret = s->decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr); @@ -2515,8 +2518,8 @@ int vp78_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata, int jobnr, s->filter_mb_row(avctx, tdata, jobnr, threadnr); update_pos(td, mb_y, INT_MAX & 0xFFFF); - s->mv_min.y -= 64; - s->mv_max.y -= 64; + td->mv_bounds.mv_min.y -= 64 * num_jobs; + td->mv_bounds.mv_max.y -= 64 * num_jobs; if (avctx->active_thread_type == FF_THREAD_FRAME) ff_thread_report_progress(&curframe->tf, mb_y, 0); @@ -2662,8 +2665,8 @@ int vp78_decode_frame(AVCodecContext *avctx, void *data, int *got_frame, s->num_jobs = num_jobs; s->curframe = curframe; s->prev_frame = prev_frame; - s->mv_min.y = -MARGIN; - s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN; + s->mv_bounds.mv_min.y = -MARGIN; + s->mv_bounds.mv_max.y = ((s->mb_height - 1) << 6) + MARGIN; for (i = 0; i < MAX_THREADS; i++) { VP8ThreadData *td = &s->thread_data[i]; atomic_init(&td->thread_mb_pos, 0); diff --git a/libavcodec/vp8.h b/libavcodec/vp8.h index d7e7680..8263997 100644 --- a/libavcodec/vp8.h +++ b/libavcodec/vp8.h @@ -93,6 +93,16 @@ typedef struct VP8Macroblock { VP56mv bmv[16]; } VP8Macroblock; +typedef struct VP8intmv { + int x; + int y; +} VP8intmv; + +typedef struct VP8mvbounds { + VP8intmv mv_min; + VP8intmv mv_max; +} VP8mvbounds; + typedef struct VP8ThreadData { DECLARE_ALIGNED(16, int16_t, block)[6][4][16]; DECLARE_ALIGNED(16, int16_t, block_dc)[16]; @@ -122,6 +132,7 @@ typedef struct VP8ThreadData { #define EDGE_EMU_LINESIZE 32 DECLARE_ALIGNED(16, uint8_t, edge_emu_buffer)[21 * EDGE_EMU_LINESIZE]; VP8FilterStrength *filter_strength; + VP8mvbounds mv_bounds; } VP8ThreadData; typedef struct VP8Frame { @@ -129,11 +140,6 @@ typedef struct VP8Frame { AVBufferRef *seg_map; } VP8Frame; -typedef struct VP8intmv { - int x; - int y; -} VP8intmv; - #define MAX_THREADS 8 typedef struct VP8Context { VP8ThreadData *thread_data; @@ -152,8 +158,7 @@ typedef struct VP8Context { uint8_t deblock_filter; uint8_t mbskip_enabled; uint8_t profile; - VP8intmv mv_min; - VP8intmv mv_max; + VP8mvbounds mv_bounds; int8_t sign_bias[4]; ///< one state [0, 1] per ref frame type int ref_count[3];

[FFmpeg-devel,2/2] vp8: make mv_min/max thread-local if using partition threading.

Commit Message

Patch