diff mbox series

[FFmpeg-devel] pixblockdsp, avdct: Add get_pixels_unaligned

Message ID 20200512082534.24992-1-martin@martin.st
State Accepted
Headers show
Series [FFmpeg-devel] pixblockdsp, avdct: Add get_pixels_unaligned | expand

Checks

Context Check Description
andriy/default pending
andriy/make success Make finished
andriy/make_fate success Make fate finished

Commit Message

Martin Storsjö May 12, 2020, 8:25 a.m. UTC
Use this in vf_spp.c, where the get_pixels operation is done on
unaligned source addresses.

This fixes fate-filter-spp on armv7.
---
People more familiar with the other assembly implementations of
get_pixels (in particular, x86) can hook them up to
get_pixels_unaligned if unaligned use explicitly is ok; as far as I
can read at least ff_get_pixels_mmx, it looks like it expects the source
to be aligned, but in practice it does seem to run fine even if
ff_get_pixels_sse2 is disabled.
---
 libavcodec/avdct.c       | 1 +
 libavcodec/avdct.h       | 4 ++++
 libavcodec/pixblockdsp.c | 2 ++
 libavcodec/pixblockdsp.h | 3 +++
 libavfilter/vf_spp.c     | 2 +-
 5 files changed, 11 insertions(+), 1 deletion(-)

Comments

Michael Niedermayer May 12, 2020, 6:54 p.m. UTC | #1
On Tue, May 12, 2020 at 11:25:34AM +0300, Martin Storsjö wrote:
> Use this in vf_spp.c, where the get_pixels operation is done on
> unaligned source addresses.
> 
> This fixes fate-filter-spp on armv7.

LGTM


> ---
> People more familiar with the other assembly implementations of
> get_pixels (in particular, x86) can hook them up to
> get_pixels_unaligned if unaligned use explicitly is ok; as far as I
> can read at least ff_get_pixels_mmx, it looks like it expects the source
> to be aligned, but in practice it does seem to run fine even if
> ff_get_pixels_sse2 is disabled.
> ---

id suggest that code that has been tested with unaligned data and works
is enabled

thx


[...]
Martin Storsjö May 12, 2020, 7:04 p.m. UTC | #2
On Tue, 12 May 2020, Michael Niedermayer wrote:

> On Tue, May 12, 2020 at 11:25:34AM +0300, Martin Storsjö wrote:
>> Use this in vf_spp.c, where the get_pixels operation is done on
>> unaligned source addresses.
>>
>> This fixes fate-filter-spp on armv7.
>
> LGTM
>
>
>> ---
>> People more familiar with the other assembly implementations of
>> get_pixels (in particular, x86) can hook them up to
>> get_pixels_unaligned if unaligned use explicitly is ok; as far as I
>> can read at least ff_get_pixels_mmx, it looks like it expects the source
>> to be aligned, but in practice it does seem to run fine even if
>> ff_get_pixels_sse2 is disabled.
>> ---
>
> id suggest that code that has been tested with unaligned data and works
> is enabled

Well ff_get_pixels_mmx uses "mova", which I'd presume is "aligned", but it 
still doesn't seem to crash when used with vf_spp today, even if that one 
uses it on unaligned addresses. So I'm not sure if I'm just being lucky, 
or is it guaranteed that this "mova" always will work on unaligned 
addresses?

As someone who doesn't do much x86 asm, I wouldn't base such an assignment 
just on "it seems to work for me".

// Martin
Michael Niedermayer May 12, 2020, 7:45 p.m. UTC | #3
On Tue, May 12, 2020 at 10:04:13PM +0300, Martin Storsjö wrote:
> On Tue, 12 May 2020, Michael Niedermayer wrote:
> 
> >On Tue, May 12, 2020 at 11:25:34AM +0300, Martin Storsjö wrote:
> >>Use this in vf_spp.c, where the get_pixels operation is done on
> >>unaligned source addresses.
> >>
> >>This fixes fate-filter-spp on armv7.
> >
> >LGTM
> >
> >
> >>---
> >>People more familiar with the other assembly implementations of
> >>get_pixels (in particular, x86) can hook them up to
> >>get_pixels_unaligned if unaligned use explicitly is ok; as far as I
> >>can read at least ff_get_pixels_mmx, it looks like it expects the source
> >>to be aligned, but in practice it does seem to run fine even if
> >>ff_get_pixels_sse2 is disabled.
> >>---
> >
> >id suggest that code that has been tested with unaligned data and works
> >is enabled
> 
> Well ff_get_pixels_mmx uses "mova", which I'd presume is "aligned", but it
> still doesn't seem to crash when used with vf_spp today, even if that one
> uses it on unaligned addresses. So I'm not sure if I'm just being lucky, or
> is it guaranteed that this "mova" always will work on unaligned addresses?
> 
> As someone who doesn't do much x86 asm, I wouldn't base such an assignment
> just on "it seems to work for me".

you can look at libavutil/x86/x86inc.asm
for mmx both mova and movu are mapped to the same instruction

    %define mova movq
    %define movu movq

Also there is intels instruction set reference that should document alignment
requirements but that doesnt seem to
document alignment requirements in a way that i can easily find from
starting with a random instruction name

but likely someone reading has a link to nice short / table / summary of 
alignment requirements 

thx
    
[...]
diff mbox series

Patch

diff --git a/libavcodec/avdct.c b/libavcodec/avdct.c
index 7c761cf39a..e8fa41f73b 100644
--- a/libavcodec/avdct.c
+++ b/libavcodec/avdct.c
@@ -120,6 +120,7 @@  int avcodec_dct_init(AVDCT *dsp)
         PixblockDSPContext pdsp;
         ff_pixblockdsp_init(&pdsp, avctx);
         COPY(pdsp, get_pixels);
+        COPY(pdsp, get_pixels_unaligned);
     }
 #endif
 
diff --git a/libavcodec/avdct.h b/libavcodec/avdct.h
index 272422e44c..6411fab6f6 100644
--- a/libavcodec/avdct.h
+++ b/libavcodec/avdct.h
@@ -67,6 +67,10 @@  typedef struct AVDCT {
                        ptrdiff_t line_size);
 
     int bits_per_sample;
+
+    void (*get_pixels_unaligned)(int16_t *block /* align 16 */,
+                       const uint8_t *pixels,
+                       ptrdiff_t line_size);
 } AVDCT;
 
 /**
diff --git a/libavcodec/pixblockdsp.c b/libavcodec/pixblockdsp.c
index 50e1d1d735..a79e547776 100644
--- a/libavcodec/pixblockdsp.c
+++ b/libavcodec/pixblockdsp.c
@@ -90,10 +90,12 @@  av_cold void ff_pixblockdsp_init(PixblockDSPContext *c, AVCodecContext *avctx)
     case 10:
     case 12:
     case 14:
+        c->get_pixels_unaligned =
         c->get_pixels = get_pixels_16_c;
         break;
     default:
         if (avctx->bits_per_raw_sample<=8 || avctx->codec_type != AVMEDIA_TYPE_VIDEO) {
+            c->get_pixels_unaligned =
             c->get_pixels = get_pixels_8_c;
         }
         break;
diff --git a/libavcodec/pixblockdsp.h b/libavcodec/pixblockdsp.h
index e036700ff0..fddb467212 100644
--- a/libavcodec/pixblockdsp.h
+++ b/libavcodec/pixblockdsp.h
@@ -29,6 +29,9 @@  typedef struct PixblockDSPContext {
     void (*get_pixels)(int16_t *av_restrict block /* align 16 */,
                        const uint8_t *pixels /* align 8 */,
                        ptrdiff_t stride);
+    void (*get_pixels_unaligned)(int16_t *av_restrict block /* align 16 */,
+                       const uint8_t *pixels,
+                       ptrdiff_t stride);
     void (*diff_pixels)(int16_t *av_restrict block /* align 16 */,
                         const uint8_t *s1 /* align 8 */,
                         const uint8_t *s2 /* align 8 */,
diff --git a/libavfilter/vf_spp.c b/libavfilter/vf_spp.c
index 6bee91b309..a83b1195c0 100644
--- a/libavfilter/vf_spp.c
+++ b/libavfilter/vf_spp.c
@@ -283,7 +283,7 @@  static void filter(SPPContext *p, uint8_t *dst, uint8_t *src,
                 const int x1 = x + offset[i + count - 1][0];
                 const int y1 = y + offset[i + count - 1][1];
                 const int index = x1 + y1*linesize;
-                p->dct->get_pixels(block, p->src + sample_bytes*index, sample_bytes*linesize);
+                p->dct->get_pixels_unaligned(block, p->src + sample_bytes*index, sample_bytes*linesize);
                 p->dct->fdct(block);
                 p->requantize(block2, block, qp, p->dct->idct_permutation);
                 p->dct->idct(block2);