diff mbox

[FFmpeg-devel] avcodec/h264: enable sse2 chroma deblock/loop filter functions

Message ID 20170222002745.5680-2-jdarnley@obe.tv
State Accepted
Commit 33de0fee2c33c492aae96f643ed7bbaa393043dc
Headers show

Commit Message

James Darnley Feb. 22, 2017, 12:27 a.m. UTC
---
 libavcodec/x86/h264_deblock.asm |  1 +
 libavcodec/x86/h264dsp_init.c   | 10 ++++++++++
 2 files changed, 11 insertions(+)

Comments

James Darnley Feb. 27, 2017, 10:56 a.m. UTC | #1
On 2017-02-22 01:27, James Darnley wrote:
> ---
>  libavcodec/x86/h264_deblock.asm |  1 +
>  libavcodec/x86/h264dsp_init.c   | 10 ++++++++++
>  2 files changed, 11 insertions(+)
> 
> diff --git a/libavcodec/x86/h264_deblock.asm b/libavcodec/x86/h264_deblock.asm
> index 32aa3d3..6702ae9 100644
> --- a/libavcodec/x86/h264_deblock.asm
> +++ b/libavcodec/x86/h264_deblock.asm
> @@ -1252,6 +1252,7 @@ RET
>  
>  %endmacro ; DEBLOCK_CHROMA_XMM
>  
> +DEBLOCK_CHROMA_XMM sse2
>  DEBLOCK_CHROMA_XMM avx
>  
>  ;-----------------------------------------------------------------------------
> diff --git a/libavcodec/x86/h264dsp_init.c b/libavcodec/x86/h264dsp_init.c
> index 51082e8..0643b37 100644
> --- a/libavcodec/x86/h264dsp_init.c
> +++ b/libavcodec/x86/h264dsp_init.c
> @@ -304,6 +304,16 @@ av_cold void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
>  #if ARCH_X86_64
>              c->h264_h_loop_filter_luma_mbaff = ff_deblock_h_luma_mbaff_8_sse2;
>  #endif
> +
> +            c->h264_v_loop_filter_chroma       = ff_deblock_v_chroma_8_sse2;
> +            c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_8_sse2;
> +            if (chroma_format_idc <= 1) {
> +                c->h264_h_loop_filter_chroma       = ff_deblock_h_chroma_8_sse2;
> +                c->h264_h_loop_filter_chroma_intra = ff_deblock_h_chroma_intra_8_sse2;
> +            } else {
> +                c->h264_h_loop_filter_chroma       = ff_deblock_h_chroma422_8_sse2;
> +                c->h264_h_loop_filter_chroma_intra = ff_deblock_h_chroma422_intra_8_sse2;
> +            }
>          }
>          if (EXTERNAL_SSSE3(cpu_flags)) {
>              c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_ssse3;
> 

Does anyone have any comments on the patch set?  For example: should I
merge this sse2 patch into the others?
Paul B Mahol Feb. 27, 2017, 11:13 a.m. UTC | #2
On 2/27/17, James Darnley <jdarnley@obe.tv> wrote:
> On 2017-02-22 01:27, James Darnley wrote:
>> ---
>>  libavcodec/x86/h264_deblock.asm |  1 +
>>  libavcodec/x86/h264dsp_init.c   | 10 ++++++++++
>>  2 files changed, 11 insertions(+)
>>
>> diff --git a/libavcodec/x86/h264_deblock.asm
>> b/libavcodec/x86/h264_deblock.asm
>> index 32aa3d3..6702ae9 100644
>> --- a/libavcodec/x86/h264_deblock.asm
>> +++ b/libavcodec/x86/h264_deblock.asm
>> @@ -1252,6 +1252,7 @@ RET
>>
>>  %endmacro ; DEBLOCK_CHROMA_XMM
>>
>> +DEBLOCK_CHROMA_XMM sse2
>>  DEBLOCK_CHROMA_XMM avx
>>
>>
>> ;-----------------------------------------------------------------------------
>> diff --git a/libavcodec/x86/h264dsp_init.c b/libavcodec/x86/h264dsp_init.c
>> index 51082e8..0643b37 100644
>> --- a/libavcodec/x86/h264dsp_init.c
>> +++ b/libavcodec/x86/h264dsp_init.c
>> @@ -304,6 +304,16 @@ av_cold void ff_h264dsp_init_x86(H264DSPContext *c,
>> const int bit_depth,
>>  #if ARCH_X86_64
>>              c->h264_h_loop_filter_luma_mbaff =
>> ff_deblock_h_luma_mbaff_8_sse2;
>>  #endif
>> +
>> +            c->h264_v_loop_filter_chroma       =
>> ff_deblock_v_chroma_8_sse2;
>> +            c->h264_v_loop_filter_chroma_intra =
>> ff_deblock_v_chroma_intra_8_sse2;
>> +            if (chroma_format_idc <= 1) {
>> +                c->h264_h_loop_filter_chroma       =
>> ff_deblock_h_chroma_8_sse2;
>> +                c->h264_h_loop_filter_chroma_intra =
>> ff_deblock_h_chroma_intra_8_sse2;
>> +            } else {
>> +                c->h264_h_loop_filter_chroma       =
>> ff_deblock_h_chroma422_8_sse2;
>> +                c->h264_h_loop_filter_chroma_intra =
>> ff_deblock_h_chroma422_intra_8_sse2;
>> +            }
>>          }
>>          if (EXTERNAL_SSSE3(cpu_flags)) {
>>              c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_ssse3;
>>
>
> Does anyone have any comments on the patch set?  For example: should I
> merge this sse2 patch into the others?

probably not, just commit.
James Darnley Feb. 27, 2017, 12:08 p.m. UTC | #3
On 2017-02-27 12:13, Paul B Mahol wrote:
> On 2/27/17, James Darnley <jdarnley@obe.tv> wrote:
>>
>> Does anyone have any comments on the patch set?  For example: should I
>> merge this sse2 patch into the others?
> 
> probably not, just commit.

Will do.  I have added to the sse2 commit message two short lines about
performance increases.
Carl Eugen Hoyos Feb. 27, 2017, 12:10 p.m. UTC | #4
2017-02-27 13:08 GMT+01:00 James Darnley <jdarnley@obe.tv>:
> On 2017-02-27 12:13, Paul B Mahol wrote:
>> On 2/27/17, James Darnley <jdarnley@obe.tv> wrote:
>>>
>>> Does anyone have any comments on the patch set?  For example: should I
>>> merge this sse2 patch into the others?
>>
>> probably not, just commit.
>
> Will do.  I have added to the sse2 commit message two short lines about
> performance increases.

You created a new branch, I don't think this was intended.

Carl Eugen
diff mbox

Patch

diff --git a/libavcodec/x86/h264_deblock.asm b/libavcodec/x86/h264_deblock.asm
index 32aa3d3..6702ae9 100644
--- a/libavcodec/x86/h264_deblock.asm
+++ b/libavcodec/x86/h264_deblock.asm
@@ -1252,6 +1252,7 @@  RET
 
 %endmacro ; DEBLOCK_CHROMA_XMM
 
+DEBLOCK_CHROMA_XMM sse2
 DEBLOCK_CHROMA_XMM avx
 
 ;-----------------------------------------------------------------------------
diff --git a/libavcodec/x86/h264dsp_init.c b/libavcodec/x86/h264dsp_init.c
index 51082e8..0643b37 100644
--- a/libavcodec/x86/h264dsp_init.c
+++ b/libavcodec/x86/h264dsp_init.c
@@ -304,6 +304,16 @@  av_cold void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
 #if ARCH_X86_64
             c->h264_h_loop_filter_luma_mbaff = ff_deblock_h_luma_mbaff_8_sse2;
 #endif
+
+            c->h264_v_loop_filter_chroma       = ff_deblock_v_chroma_8_sse2;
+            c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_8_sse2;
+            if (chroma_format_idc <= 1) {
+                c->h264_h_loop_filter_chroma       = ff_deblock_h_chroma_8_sse2;
+                c->h264_h_loop_filter_chroma_intra = ff_deblock_h_chroma_intra_8_sse2;
+            } else {
+                c->h264_h_loop_filter_chroma       = ff_deblock_h_chroma422_8_sse2;
+                c->h264_h_loop_filter_chroma_intra = ff_deblock_h_chroma422_intra_8_sse2;
+            }
         }
         if (EXTERNAL_SSSE3(cpu_flags)) {
             c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_ssse3;