diff mbox series

[FFmpeg-devel,v3,1/3] swscale: make yuv2interleavedX more asm-friendly

Message ID 1587868623-97667-2-git-send-email-negomez@linux.microsoft.com
State New
Headers show
Series swscale: add AVX2 version of yuv2nv12cX
Related show

Checks

Context Check Description
andriy/default pending
andriy/make success Make finished
andriy/make_fate success Make fate finished

Commit Message

Nelson Gomez April 26, 2020, 2:37 a.m. UTC
From: Nelson Gomez <nelson.gomez@microsoft.com>

Extracting information from SwsContext in assembly is difficult, and
rearranging SwsContext just for asm access didn't look good. These
functions only need a couple of fields from it anyway, so just make
them parameters in their own right.

Signed-off-by: Nelson Gomez <nelson.gomez@microsoft.com>
---
 libswscale/output.c           | 12 +++++-------
 libswscale/swscale_internal.h |  5 +++--
 libswscale/vscale.c           |  2 +-
 3 files changed, 9 insertions(+), 10 deletions(-)

Comments

Michael Niedermayer June 3, 2020, 11:29 p.m. UTC | #1
On Sat, Apr 25, 2020 at 07:37:01PM -0700, Nelson Gomez wrote:
> From: Nelson Gomez <nelson.gomez@microsoft.com>
> 
> Extracting information from SwsContext in assembly is difficult, and
> rearranging SwsContext just for asm access didn't look good. These
> functions only need a couple of fields from it anyway, so just make
> them parameters in their own right.
> 
> Signed-off-by: Nelson Gomez <nelson.gomez@microsoft.com>
> ---
>  libswscale/output.c           | 12 +++++-------
>  libswscale/swscale_internal.h |  5 +++--
>  libswscale/vscale.c           |  2 +-
>  3 files changed, 9 insertions(+), 10 deletions(-)

this doesnt apply cleanly anymore it seems

Applying: swscale: make yuv2interleavedX more asm-friendly
Using index info to reconstruct a base tree...
M	libswscale/output.c
M	libswscale/swscale_internal.h
M	libswscale/vscale.c
Falling back to patching base and 3-way merge...
Auto-merging libswscale/vscale.c
CONFLICT (content): Merge conflict in libswscale/vscale.c
Auto-merging libswscale/swscale_internal.h
Auto-merging libswscale/output.c
error: Failed to merge in the changes.
Patch failed at 0001 swscale: make yuv2interleavedX more asm-friendly
Use 'git am --show-current-patch' to see the failed patch
When you have resolved this problem, run "git am --continue".
If you prefer to skip this patch, run "git am --skip" instead.
To restore the original branch and stop patching, run "git am --abort".

[...]
diff mbox series

Patch

diff --git a/libswscale/output.c b/libswscale/output.c
index 68f43ffba3..2e5d6076ab 100644
--- a/libswscale/output.c
+++ b/libswscale/output.c
@@ -180,7 +180,7 @@  yuv2planeX_16_c_template(const int16_t *filter, int filterSize,
     }
 }
 
-static void yuv2p016cX_c(SwsContext *c, const int16_t *chrFilter, int chrFilterSize,
+static void yuv2p016cX_c(enum AVPixelFormat dstFormat, const uint8_t *chrDither, const int16_t *chrFilter, int chrFilterSize,
                          const int16_t **chrUSrc, const int16_t **chrVSrc,
                          uint8_t *dest8, int chrDstW)
 {
@@ -188,7 +188,7 @@  static void yuv2p016cX_c(SwsContext *c, const int16_t *chrFilter, int chrFilterS
     const int32_t **uSrc = (const int32_t **)chrUSrc;
     const int32_t **vSrc = (const int32_t **)chrVSrc;
     int shift = 15;
-    int big_endian = c->dstFormat == AV_PIX_FMT_P016BE;
+    int big_endian = dstFormat == AV_PIX_FMT_P016BE;
     int i, j;
 
     for (i = 0; i < chrDstW; i++) {
@@ -402,12 +402,10 @@  static void yuv2plane1_8_c(const int16_t *src, uint8_t *dest, int dstW,
     }
 }
 
-static void yuv2nv12cX_c(SwsContext *c, const int16_t *chrFilter, int chrFilterSize,
+static void yuv2nv12cX_c(enum AVPixelFormat dstFormat, const uint8_t *chrDither, const int16_t *chrFilter, int chrFilterSize,
                         const int16_t **chrUSrc, const int16_t **chrVSrc,
                         uint8_t *dest, int chrDstW)
 {
-    enum AVPixelFormat dstFormat = c->dstFormat;
-    const uint8_t *chrDither = c->chrDither8;
     int i;
 
     if (dstFormat == AV_PIX_FMT_NV12 ||
@@ -477,13 +475,13 @@  static void yuv2p010lX_c(const int16_t *filter, int filterSize,
     }
 }
 
-static void yuv2p010cX_c(SwsContext *c, const int16_t *chrFilter, int chrFilterSize,
+static void yuv2p010cX_c(enum AVPixelFormat dstFormat, const uint8_t *chrDither, const int16_t *chrFilter, int chrFilterSize,
                          const int16_t **chrUSrc, const int16_t **chrVSrc,
                          uint8_t *dest8, int chrDstW)
 {
     uint16_t *dest = (uint16_t*)dest8;
     int shift = 17;
-    int big_endian = c->dstFormat == AV_PIX_FMT_P010BE;
+    int big_endian = dstFormat == AV_PIX_FMT_P010BE;
     int i, j;
 
     for (i = 0; i < chrDstW; i++) {
diff --git a/libswscale/swscale_internal.h b/libswscale/swscale_internal.h
index 9dda53eead..42fd87e887 100644
--- a/libswscale/swscale_internal.h
+++ b/libswscale/swscale_internal.h
@@ -119,7 +119,8 @@  typedef void (*yuv2planarX_fn)(const int16_t *filter, int filterSize,
  * Write one line of horizontally scaled chroma to interleaved output
  * with multi-point vertical scaling between input pixels.
  *
- * @param c             SWS scaling context
+ * @param dstFormat     destination pixel format
+ * @param chrDither     ordered dither array of type uint8_t and size 8
  * @param chrFilter     vertical chroma scaling coefficients, 12 bits [0,4096]
  * @param chrUSrc       scaled chroma (U) source data, 15 bits for 8-10-bit
  *                      output, 19 bits for 16-bit output (in int32_t)
@@ -130,7 +131,7 @@  typedef void (*yuv2planarX_fn)(const int16_t *filter, int filterSize,
  *                      output, this is in uint16_t
  * @param dstW          width of chroma planes
  */
-typedef void (*yuv2interleavedX_fn)(struct SwsContext *c,
+typedef void (*yuv2interleavedX_fn)(enum AVPixelFormat dstFormat, const uint8_t *chrDither,
                                     const int16_t *chrFilter,
                                     int chrFilterSize,
                                     const int16_t **chrUSrc,
diff --git a/libswscale/vscale.c b/libswscale/vscale.c
index 72352dedb3..cac85921da 100644
--- a/libswscale/vscale.c
+++ b/libswscale/vscale.c
@@ -85,7 +85,7 @@  static int chr_planar_vscale(SwsContext *c, SwsFilterDescriptor *desc, int slice
         uint16_t *filter = inst->filter[0] + (inst->isMMX ? 0 : chrSliceY * inst->filter_size);
 
         if (c->yuv2nv12cX) {
-            ((yuv2interleavedX_fn)inst->pfn)(c, filter, inst->filter_size, (const int16_t**)src1, (const int16_t**)src2, dst1[0], dstW);
+            ((yuv2interleavedX_fn)inst->pfn)(c->dstFormat, c->chrDither8, filter, inst->filter_size, (const int16_t**)src1, (const int16_t**)src2, dst1[0], dstW);
         } else if (inst->filter_size == 1) {
             ((yuv2planar1_fn)inst->pfn)((const int16_t*)src1[0], dst1[0], dstW, c->chrDither8, 0);
             ((yuv2planar1_fn)inst->pfn)((const int16_t*)src2[0], dst2[0], dstW, c->chrDither8, 3);