[FFmpeg-devel,6/9] avcodec: [loongson] optimize get_cabac_inline with asm.

Submitted by Shiyou Yin on July 11, 2018, 10:22 a.m.

Details

Message ID 1531304524-9625-1-git-send-email-yinshiyou-hf@loongson.cn
State Superseded
Headers show

Commit Message

Shiyou Yin July 11, 2018, 10:22 a.m.
Optimize function get_cabac_inline with asm for loongson platform.

Change-Id: I538ad5c2d2fc20793aa36b6697c956a1b5ddbc78
Signed-off-by: Shiyou Yin <yinshiyou-hf@loongson.cn>
---
 libavcodec/cabac_functions.h |   3 ++
 libavcodec/mips/cabac.h      | 119 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 122 insertions(+)
 create mode 100644 libavcodec/mips/cabac.h

Comments

Carl Eugen Hoyos July 11, 2018, noon
2018-07-11 12:22 GMT+02:00, Shiyou Yin <yinshiyou-hf@loongson.cn>:
> Optimize function get_cabac_inline with asm for loongson platform.

The commit message should contain some numbers about the
speed-up.

Does this patch (and do the other patches) work on both mips
platforms? (There are two that are currently supported by
FFmpeg, right?)

Carl Eugen
Shiyou Yin July 12, 2018, 12:38 p.m.
> 主题: Re: [FFmpeg-devel] [PATCH 6/9] avcodec: [loongson] optimize get_cabac_inline with asm.

> 

> 2018-07-11 12:22 GMT+02:00, Shiyou Yin <yinshiyou-hf@loongson.cn>:

> > Optimize function get_cabac_inline with asm for loongson platform.

> 

> The commit message should contain some numbers about the

> speed-up.

> 

> Does this patch (and do the other patches) work on both mips

> platforms? (There are two that are currently supported by

> FFmpeg, right?)


This optimization has only been tested on the loongson platform so far. 
In theory, it can work on mips platform too.

Patch hide | download patch | download mbox

diff --git a/libavcodec/cabac_functions.h b/libavcodec/cabac_functions.h
index fe72a82..bb2b421 100644
--- a/libavcodec/cabac_functions.h
+++ b/libavcodec/cabac_functions.h
@@ -45,6 +45,9 @@ 
 #if ARCH_X86
 #   include "x86/cabac.h"
 #endif
+#if ARCH_MIPS
+#   include "mips/cabac.h"
+#endif
 
 static const uint8_t * const ff_h264_norm_shift = ff_h264_cabac_tables + H264_NORM_SHIFT_OFFSET;
 static const uint8_t * const ff_h264_lps_range = ff_h264_cabac_tables + H264_LPS_RANGE_OFFSET;
diff --git a/libavcodec/mips/cabac.h b/libavcodec/mips/cabac.h
new file mode 100644
index 0000000..a22c1ea
--- /dev/null
+++ b/libavcodec/mips/cabac.h
@@ -0,0 +1,119 @@ 
+/*
+ * Loongson SIMD optimized h264chroma
+ *
+ * Copyright (c) 2015 Loongson Technology Corporation Limited
+ * Copyright (c) 2015 Shiyou Yin <yinshiyou-hf@loongson.cn>
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_MIPS_CABAC_H
+#define AVCODEC_MIPS_CABAC_H
+
+#include "libavcodec/cabac.h"
+#include "libavutil/mips/mmiutils.h"
+#include "config.h"
+
+#define get_cabac_inline get_cabac_inline_mips
+static av_always_inline int get_cabac_inline(CABACContext *c,
+                                             uint8_t * const state){
+    mips_reg tmp0, tmp1, tmp2, bit;
+
+    __asm__ volatile (
+        "lbu          %[bit],        0(%[state])                   \n\t"
+        "and          %[tmp0],       %[c_range],     0xC0          \n\t"
+        PTR_ADDU     "%[tmp0],       %[tmp0],        %[tmp0]       \n\t"
+        PTR_ADDU     "%[tmp0],       %[tmp0],        %[tables]     \n\t"
+        PTR_ADDU     "%[tmp0],       %[tmp0],        %[bit]        \n\t"
+        /* tmp1: RangeLPS */
+        "lbu          %[tmp1],       %[lps_off](%[tmp0])           \n\t"
+
+        PTR_SUBU     "%[c_range],    %[c_range],     %[tmp1]       \n\t"
+        PTR_SLL      "%[tmp0],       %[c_range],     0x11          \n\t"
+        PTR_SUBU     "%[tmp0],       %[tmp0],        %[c_low]      \n\t"
+
+        /* tmp2: lps_mask */
+        PTR_SRA      "%[tmp2],       %[tmp0],        0x1F          \n\t"
+        /* If tmp0 < 0, lps_mask ==  0xffffffff*/
+        /* If tmp0 >= 0, lps_mask ==  0x00000000*/
+        "beqz         %[tmp2],       1f                            \n\t"
+        PTR_SLL      "%[tmp0],       %[c_range],     0x11          \n\t"
+        PTR_SUBU     "%[c_low],      %[c_low],       %[tmp0]       \n\t"
+        PTR_SUBU     "%[tmp0],       %[tmp1],        %[c_range]    \n\t"
+        PTR_ADDU     "%[c_range],    %[c_range],     %[tmp0]       \n\t"
+        "xor          %[bit],        %[bit],         %[tmp2]       \n\t"
+
+        "1:                                                        \n\t"
+        /* tmp1: *state */
+        PTR_ADDU     "%[tmp0],       %[tables],      %[bit]        \n\t"
+        "lbu          %[tmp1],       %[mlps_off](%[tmp0])          \n\t"
+        /* tmp2: lps_mask */
+        PTR_ADDU     "%[tmp0],       %[tables],      %[c_range]    \n\t"
+        "lbu          %[tmp2],       %[norm_off](%[tmp0])          \n\t"
+
+        "sb           %[tmp1],       0(%[state])                   \n\t"
+        "and          %[bit],        %[bit],         0x01          \n\t"
+        PTR_SLL      "%[c_range],    %[c_range],     %[tmp2]       \n\t"
+        PTR_SLL      "%[c_low],      %[c_low],       %[tmp2]       \n\t"
+
+        "and          %[tmp0],       %[c_low],       %[cabac_mask] \n\t"
+        "bnez         %[tmp0],       1f                            \n\t"
+        PTR_ADDI     "%[tmp0],       %[c_low],       -0X01         \n\t"
+        "xor          %[tmp0],       %[c_low],       %[tmp0]       \n\t"
+        PTR_SRA      "%[tmp0],       %[tmp0],        0x0f          \n\t"
+        PTR_ADDU     "%[tmp0],       %[tmp0],        %[tables]     \n\t"
+        "lbu          %[tmp2],       %[norm_off](%[tmp0])          \n\t"
+#if CABAC_BITS == 16
+        "lbu          %[tmp0],       0(%[c_bytestream])            \n\t"
+        "lbu          %[tmp1],       1(%[c_bytestream])            \n\t"
+        PTR_SLL      "%[tmp0],       %[tmp0],        0x09          \n\t"
+        PTR_SLL      "%[tmp1],       %[tmp1],        0x01          \n\t"
+        PTR_ADDU     "%[tmp0],       %[tmp0],        %[tmp1]       \n\t"
+#else
+        "lbu          %[tmp0],       0(%[c_bytestream])            \n\t"
+        PTR_SLL      "%[tmp0],       %[tmp0],        0x01          \n\t"
+#endif
+        PTR_SUBU     "%[tmp0],       %[tmp0],        %[cabac_mask] \n\t"
+
+        "li           %[tmp1],       0x07                          \n\t"
+        PTR_SUBU     "%[tmp1],       %[tmp1],        %[tmp2]       \n\t"
+        PTR_SLL      "%[tmp0],       %[tmp0],        %[tmp1]       \n\t"
+        PTR_ADDU     "%[c_low],      %[c_low],       %[tmp0]       \n\t"
+
+#if !UNCHECKED_BITSTREAM_READER
+        "bge          %[c_bytestream], %[c_bytestream_end], 1f     \n\t"
+#endif
+        PTR_ADDIU    "%[c_bytestream], %[c_bytestream],     0X02   \n\t"
+        "1:                                                        \n\t"
+    : [bit]"=&r"(bit), [tmp0]"=&r"(tmp0), [tmp1]"=&r"(tmp1), [tmp2]"=&r"(tmp2),
+      [c_range]"+&r"(c->range), [c_low]"+&r"(c->low),
+      [c_bytestream]"+&r"(c->bytestream)
+    : [state]"r"(state), [tables]"r"(ff_h264_cabac_tables),
+#if !UNCHECKED_BITSTREAM_READER
+      [c_bytestream_end]"r"(c->bytestream_end),
+#endif
+      [lps_off]"i"(H264_LPS_RANGE_OFFSET),
+      [mlps_off]"i"(H264_MLPS_STATE_OFFSET + 128),
+      [norm_off]"i"(H264_NORM_SHIFT_OFFSET),
+      [cabac_mask]"i"(CABAC_MASK)
+    : "memory"
+    );
+
+    return bit;
+}
+
+#endif /* AVCODEC_MIPS_CABAC_H */