[FFmpeg-devel] avcodec/magicyuv: add SIMD for median of 10bits

Message ID	1482514373-28939-1-git-send-email-onemda@gmail.com
State	Rejected
Headers	show Delivered-To: ffmpegpatchwork@gmail.com Received-SPF: pass (google.com: domain of ffmpeg-devel-bounces@ffmpeg.org designates 79.124.17.100 as permitted sender) client-ip=79.124.17.100; From: Paul B Mahol <onemda@gmail.com> To: ffmpeg-devel@ffmpeg.org Date: Fri, 23 Dec 2016 18:32:53 +0100 Message-Id: <1482514373-28939-1-git-send-email-onemda@gmail.com> Subject: [FFmpeg-devel] [PATCH] avcodec/magicyuv: add SIMD for median of 10bits Precedence: list Reply-To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org> MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: base64 Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" <ffmpeg-devel-bounces@ffmpeg.org>

diff --git a/libavcodec/lossless_videodsp.c b/libavcodec/lossless_videodsp.c index 3491621..15189f1 100644 --- a/libavcodec/lossless_videodsp.c +++ b/libavcodec/lossless_videodsp.c @@ -77,6 +77,23 @@ static void add_hfyu_median_pred_int16_c(uint16_t *dst, const uint16_t *src, con *left_top = lt; } +static void add_magy_median_pred_int16_c(uint16_t *dst, const uint16_t *src, const uint16_t *diff, unsigned mask, int w, int *left, int *left_top){ + int i; + uint16_t l, lt; + + l = *left; + lt = *left_top; + + for(i=0; i<w; i++){ + l = (mid_pred(l, src[i], (l + src[i] - lt)) + diff[i]) & mask; + lt = src[i]; + dst[i] = l; + } + + *left = l; + *left_top = lt; +} + static void sub_hfyu_median_pred_int16_c(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, unsigned mask, int w, int *left, int *left_top){ int i; uint16_t l, lt; @@ -122,6 +139,7 @@ void ff_llviddsp_init(LLVidDSPContext *c, AVCodecContext *avctx) c->add_hfyu_left_pred_int16 = add_hfyu_left_pred_int16_c; c->add_hfyu_median_pred_int16 = add_hfyu_median_pred_int16_c; c->sub_hfyu_median_pred_int16 = sub_hfyu_median_pred_int16_c; + c->add_magy_median_pred_int16 = add_magy_median_pred_int16_c; if (ARCH_X86) ff_llviddsp_init_x86(c, avctx); diff --git a/libavcodec/lossless_videodsp.h b/libavcodec/lossless_videodsp.h index 040902e..c7a6881 100644 --- a/libavcodec/lossless_videodsp.h +++ b/libavcodec/lossless_videodsp.h @@ -32,6 +32,7 @@ typedef struct LLVidDSPContext { void (*sub_hfyu_median_pred_int16)(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, unsigned mask, int w, int *left, int *left_top); void (*add_hfyu_median_pred_int16)(uint16_t *dst, const uint16_t *top, const uint16_t *diff, unsigned mask, int w, int *left, int *left_top); int (*add_hfyu_left_pred_int16)(uint16_t *dst, const uint16_t *src, unsigned mask, int w, unsigned left); + void (*add_magy_median_pred_int16)(uint16_t *dst, const uint16_t *top, const uint16_t *diff, unsigned mask, int w, int *left, int *left_top); } LLVidDSPContext; void ff_llviddsp_init(LLVidDSPContext *llviddsp, AVCodecContext *avctx); diff --git a/libavcodec/magicyuv.c b/libavcodec/magicyuv.c index 16d7027..f65c434 100644 --- a/libavcodec/magicyuv.c +++ b/libavcodec/magicyuv.c @@ -144,27 +144,6 @@ static int huff_build(VLC *vlc, uint8_t *len) syms, sizeof(*syms), sizeof(*syms), 0); } -static void magicyuv_median_pred10(uint16_t *dst, const uint16_t *src1, - const uint16_t *diff, intptr_t w, - int *left, int *left_top) -{ - int i; - uint16_t l, lt; - - l = *left; - lt = *left_top; - - for (i = 0; i < w; i++) { - l = mid_pred(l, src1[i], (l + src1[i] - lt)) + diff[i]; - l &= 0x3FF; - lt = src1[i]; - dst[i] = l; - } - - *left = l; - *left_top = lt; -} - static int magy_decode_slice10(AVCodecContext *avctx, void *tdata, int j, int threadnr) { @@ -265,7 +244,7 @@ static int magy_decode_slice10(AVCodecContext *avctx, void *tdata, dst += stride; } for (k = 1 + interlaced; k < height; k++) { - magicyuv_median_pred10(dst, dst - fake_stride, dst, width, &left, &lefttop); + s->llviddsp.add_magy_median_pred_int16(dst, dst - fake_stride, dst, 1023, width, &left, &lefttop); lefttop = left = dst[0]; dst += stride; } diff --git a/libavcodec/x86/lossless_videodsp.asm b/libavcodec/x86/lossless_videodsp.asm index f06fcdf..8a2eb26 100644 --- a/libavcodec/x86/lossless_videodsp.asm +++ b/libavcodec/x86/lossless_videodsp.asm @@ -292,3 +292,65 @@ cglobal sub_hfyu_median_pred_int16, 7,7,0, dst, src1, src2, mask, w, left, left_ movzx maskd, word [src2q + wq - 2] mov [leftq], maskd RET + +cglobal add_magy_median_pred_int16, 7,7,0, dst, top, diff, mask, w, left, left_top + add wd, wd + movd mm6, maskd + SPLATW mm6, mm6 + movq mm0, [topq] + movq mm2, mm0 + movd mm4, [left_topq] + psllq mm2, 16 + movq mm1, mm0 + por mm4, mm2 + movd mm3, [leftq] + psubw mm0, mm4 ; t-tl + add dstq, wq + add topq, wq + add diffq, wq + neg wq + jmp .skip +.loop: + movq mm4, [topq+wq] + movq mm0, mm4 + psllq mm4, 16 + por mm4, mm1 + movq mm1, mm0 ; t + psubw mm0, mm4 ; t-tl +.skip: + movq mm2, [diffq+wq] +%assign i 0 +%rep 4 + movq mm4, mm0 + paddw mm4, mm3 ; t-tl+l + movq mm5, mm3 + pmaxsw mm3, mm1 + pminsw mm5, mm1 + pminsw mm3, mm4 + pmaxsw mm3, mm5 ; median + paddw mm3, mm2 ; +residual + pand mm3, mm6 +%if i==0 + movq mm7, mm3 + psllq mm7, 48 +%else + movq mm4, mm3 + psrlq mm7, 16 + psllq mm4, 48 + por mm7, mm4 +%endif +%if i<3 + psrlq mm0, 16 + psrlq mm1, 16 + psrlq mm2, 16 +%endif +%assign i i+1 +%endrep + movq [dstq+wq], mm7 + add wq, 8 + jl .loop + movzx r2d, word [dstq-2] + mov [leftq], r2d + movzx r2d, word [topq-2] + mov [left_topq], r2d + RET diff --git a/libavcodec/x86/lossless_videodsp_init.c b/libavcodec/x86/lossless_videodsp_init.c index 548d043..8112c70 100644 --- a/libavcodec/x86/lossless_videodsp_init.c +++ b/libavcodec/x86/lossless_videodsp_init.c @@ -30,6 +30,7 @@ int ff_add_hfyu_left_pred_int16_ssse3(uint16_t *dst, const uint16_t *src, unsign int ff_add_hfyu_left_pred_int16_sse4(uint16_t *dst, const uint16_t *src, unsigned mask, int w, unsigned acc); void ff_add_hfyu_median_pred_int16_mmxext(uint16_t *dst, const uint16_t *top, const uint16_t *diff, unsigned mask, int w, int *left, int *left_top); void ff_sub_hfyu_median_pred_int16_mmxext(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, unsigned mask, int w, int *left, int *left_top); +void ff_add_magy_median_pred_int16_mmxext(uint16_t *dst, const uint16_t *top, const uint16_t *diff, unsigned mask, int w, int *left, int *left_top); void ff_llviddsp_init_x86(LLVidDSPContext *c, AVCodecContext *avctx) @@ -44,6 +45,7 @@ void ff_llviddsp_init_x86(LLVidDSPContext *c, AVCodecContext *avctx) if (EXTERNAL_MMXEXT(cpu_flags) && pix_desc && pix_desc->comp[0].depth<16) { c->add_hfyu_median_pred_int16 = ff_add_hfyu_median_pred_int16_mmxext; + c->add_magy_median_pred_int16 = ff_add_magy_median_pred_int16_mmxext; c->sub_hfyu_median_pred_int16 = ff_sub_hfyu_median_pred_int16_mmxext; }

[FFmpeg-devel] avcodec/magicyuv: add SIMD for median of 10bits

Commit Message

Comments

Patch