diff mbox

[FFmpeg-devel,1/2] avutil/hwcontext_cuda: align allocated frames

Message ID 20161002165813.17366-1-timo@rothenpieler.org
State Superseded
Headers show

Commit Message

Timo Rothenpieler Oct. 2, 2016, 4:58 p.m. UTC
---
 libavutil/hwcontext_cuda.c | 43 ++++++++++++++++++++++++++++---------------
 1 file changed, 28 insertions(+), 15 deletions(-)

Comments

Philip Langdale Oct. 2, 2016, 5:07 p.m. UTC | #1
On Sun,  2 Oct 2016 18:58:12 +0200
Timo Rothenpieler <timo@rothenpieler.org> wrote:

> ---
>  libavutil/hwcontext_cuda.c | 43
> ++++++++++++++++++++++++++++--------------- 1 file changed, 28
> insertions(+), 15 deletions(-)
> 
> diff --git a/libavutil/hwcontext_cuda.c b/libavutil/hwcontext_cuda.c
> index 40d2971..706d195 100644
> --- a/libavutil/hwcontext_cuda.c
> +++ b/libavutil/hwcontext_cuda.c
> @@ -16,6 +16,8 @@
>   * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
> 02110-1301 USA */
>  
> +#include "libavutil/intmath.h"
> +
>  #include "buffer.h"
>  #include "common.h"
>  #include "hwcontext.h"
> @@ -35,6 +37,14 @@ static const enum AVPixelFormat
> supported_formats[] = { AV_PIX_FMT_YUV444P,
>  };
>  
> +static unsigned int next_pow2(unsigned int a)
> +{
> +    if (a <= 1)
> +        return 1;
> +    a = 1 << (sizeof(a) * 8 - ff_clz(a - 1));
> +    return FFALIGN(a, 256);
> +}
> +
>  static void cuda_buffer_free(void *opaque, uint8_t *data)
>  {
>      AVHWFramesContext *ctx = opaque;
> @@ -83,6 +93,7 @@ fail:
>  static int cuda_frames_init(AVHWFramesContext *ctx)
>  {
>      CUDAFramesContext *priv = ctx->internal->priv;
> +    int aligned_width = next_pow2(ctx->width);
>      int i;
>  
>      for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++) {
> @@ -103,10 +114,10 @@ static int cuda_frames_init(AVHWFramesContext
> *ctx) switch (ctx->sw_format) {
>          case AV_PIX_FMT_NV12:
>          case AV_PIX_FMT_YUV420P:
> -            size = ctx->width * ctx->height * 3 / 2;
> +            size = aligned_width * ctx->height * 3 / 2;
>              break;
>          case AV_PIX_FMT_YUV444P:
> -            size = ctx->width * ctx->height * 3;
> +            size = aligned_width * ctx->height * 3;
>              break;
>          }
>  
> @@ -120,6 +131,8 @@ static int cuda_frames_init(AVHWFramesContext
> *ctx) 
>  static int cuda_get_buffer(AVHWFramesContext *ctx, AVFrame *frame)
>  {
> +    int aligned_width = next_pow2(ctx->width);
> +
>      frame->buf[0] = av_buffer_pool_get(ctx->pool);
>      if (!frame->buf[0])
>          return AVERROR(ENOMEM);
> @@ -127,25 +140,25 @@ static int cuda_get_buffer(AVHWFramesContext
> *ctx, AVFrame *frame) switch (ctx->sw_format) {
>      case AV_PIX_FMT_NV12:
>          frame->data[0]     = frame->buf[0]->data;
> -        frame->data[1]     = frame->data[0] + ctx->width *
> ctx->height;
> -        frame->linesize[0] = ctx->width;
> -        frame->linesize[1] = ctx->width;
> +        frame->data[1]     = frame->data[0] + aligned_width *
> ctx->height;
> +        frame->linesize[0] = aligned_width;
> +        frame->linesize[1] = aligned_width;
>          break;
>      case AV_PIX_FMT_YUV420P:
>          frame->data[0]     = frame->buf[0]->data;
> -        frame->data[2]     = frame->data[0] + ctx->width *
> ctx->height;
> -        frame->data[1]     = frame->data[2] + ctx->width *
> ctx->height / 4;
> -        frame->linesize[0] = ctx->width;
> -        frame->linesize[1] = ctx->width / 2;
> -        frame->linesize[2] = ctx->width / 2;
> +        frame->data[2]     = frame->data[0] + aligned_width *
> ctx->height;
> +        frame->data[1]     = frame->data[2] + aligned_width *
> ctx->height / 4;
> +        frame->linesize[0] = aligned_width;
> +        frame->linesize[1] = aligned_width / 2;
> +        frame->linesize[2] = aligned_width / 2;
>          break;
>      case AV_PIX_FMT_YUV444P:
>          frame->data[0]     = frame->buf[0]->data;
> -        frame->data[1]     = frame->data[0] + ctx->width *
> ctx->height;
> -        frame->data[2]     = frame->data[1] + ctx->width *
> ctx->height;
> -        frame->linesize[0] = ctx->width;
> -        frame->linesize[1] = ctx->width;
> -        frame->linesize[2] = ctx->width;
> +        frame->data[1]     = frame->data[0] + aligned_width *
> ctx->height;
> +        frame->data[2]     = frame->data[1] + aligned_width *
> ctx->height;
> +        frame->linesize[0] = aligned_width;
> +        frame->linesize[1] = aligned_width;
> +        frame->linesize[2] = aligned_width;
>          break;
>      default:
>          av_frame_unref(frame);

Looks good to me (and I tried it out).


--phil
diff mbox

Patch

diff --git a/libavutil/hwcontext_cuda.c b/libavutil/hwcontext_cuda.c
index 40d2971..706d195 100644
--- a/libavutil/hwcontext_cuda.c
+++ b/libavutil/hwcontext_cuda.c
@@ -16,6 +16,8 @@ 
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
+#include "libavutil/intmath.h"
+
 #include "buffer.h"
 #include "common.h"
 #include "hwcontext.h"
@@ -35,6 +37,14 @@  static const enum AVPixelFormat supported_formats[] = {
     AV_PIX_FMT_YUV444P,
 };
 
+static unsigned int next_pow2(unsigned int a)
+{
+    if (a <= 1)
+        return 1;
+    a = 1 << (sizeof(a) * 8 - ff_clz(a - 1));
+    return FFALIGN(a, 256);
+}
+
 static void cuda_buffer_free(void *opaque, uint8_t *data)
 {
     AVHWFramesContext *ctx = opaque;
@@ -83,6 +93,7 @@  fail:
 static int cuda_frames_init(AVHWFramesContext *ctx)
 {
     CUDAFramesContext *priv = ctx->internal->priv;
+    int aligned_width = next_pow2(ctx->width);
     int i;
 
     for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++) {
@@ -103,10 +114,10 @@  static int cuda_frames_init(AVHWFramesContext *ctx)
         switch (ctx->sw_format) {
         case AV_PIX_FMT_NV12:
         case AV_PIX_FMT_YUV420P:
-            size = ctx->width * ctx->height * 3 / 2;
+            size = aligned_width * ctx->height * 3 / 2;
             break;
         case AV_PIX_FMT_YUV444P:
-            size = ctx->width * ctx->height * 3;
+            size = aligned_width * ctx->height * 3;
             break;
         }
 
@@ -120,6 +131,8 @@  static int cuda_frames_init(AVHWFramesContext *ctx)
 
 static int cuda_get_buffer(AVHWFramesContext *ctx, AVFrame *frame)
 {
+    int aligned_width = next_pow2(ctx->width);
+
     frame->buf[0] = av_buffer_pool_get(ctx->pool);
     if (!frame->buf[0])
         return AVERROR(ENOMEM);
@@ -127,25 +140,25 @@  static int cuda_get_buffer(AVHWFramesContext *ctx, AVFrame *frame)
     switch (ctx->sw_format) {
     case AV_PIX_FMT_NV12:
         frame->data[0]     = frame->buf[0]->data;
-        frame->data[1]     = frame->data[0] + ctx->width * ctx->height;
-        frame->linesize[0] = ctx->width;
-        frame->linesize[1] = ctx->width;
+        frame->data[1]     = frame->data[0] + aligned_width * ctx->height;
+        frame->linesize[0] = aligned_width;
+        frame->linesize[1] = aligned_width;
         break;
     case AV_PIX_FMT_YUV420P:
         frame->data[0]     = frame->buf[0]->data;
-        frame->data[2]     = frame->data[0] + ctx->width * ctx->height;
-        frame->data[1]     = frame->data[2] + ctx->width * ctx->height / 4;
-        frame->linesize[0] = ctx->width;
-        frame->linesize[1] = ctx->width / 2;
-        frame->linesize[2] = ctx->width / 2;
+        frame->data[2]     = frame->data[0] + aligned_width * ctx->height;
+        frame->data[1]     = frame->data[2] + aligned_width * ctx->height / 4;
+        frame->linesize[0] = aligned_width;
+        frame->linesize[1] = aligned_width / 2;
+        frame->linesize[2] = aligned_width / 2;
         break;
     case AV_PIX_FMT_YUV444P:
         frame->data[0]     = frame->buf[0]->data;
-        frame->data[1]     = frame->data[0] + ctx->width * ctx->height;
-        frame->data[2]     = frame->data[1] + ctx->width * ctx->height;
-        frame->linesize[0] = ctx->width;
-        frame->linesize[1] = ctx->width;
-        frame->linesize[2] = ctx->width;
+        frame->data[1]     = frame->data[0] + aligned_width * ctx->height;
+        frame->data[2]     = frame->data[1] + aligned_width * ctx->height;
+        frame->linesize[0] = aligned_width;
+        frame->linesize[1] = aligned_width;
+        frame->linesize[2] = aligned_width;
         break;
     default:
         av_frame_unref(frame);