Message ID | 20161002165813.17366-1-timo@rothenpieler.org |
---|---|
State | Superseded |
Headers | show |
On Sun, 2 Oct 2016 18:58:12 +0200 Timo Rothenpieler <timo@rothenpieler.org> wrote: > --- > libavutil/hwcontext_cuda.c | 43 > ++++++++++++++++++++++++++++--------------- 1 file changed, 28 > insertions(+), 15 deletions(-) > > diff --git a/libavutil/hwcontext_cuda.c b/libavutil/hwcontext_cuda.c > index 40d2971..706d195 100644 > --- a/libavutil/hwcontext_cuda.c > +++ b/libavutil/hwcontext_cuda.c > @@ -16,6 +16,8 @@ > * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA > 02110-1301 USA */ > > +#include "libavutil/intmath.h" > + > #include "buffer.h" > #include "common.h" > #include "hwcontext.h" > @@ -35,6 +37,14 @@ static const enum AVPixelFormat > supported_formats[] = { AV_PIX_FMT_YUV444P, > }; > > +static unsigned int next_pow2(unsigned int a) > +{ > + if (a <= 1) > + return 1; > + a = 1 << (sizeof(a) * 8 - ff_clz(a - 1)); > + return FFALIGN(a, 256); > +} > + > static void cuda_buffer_free(void *opaque, uint8_t *data) > { > AVHWFramesContext *ctx = opaque; > @@ -83,6 +93,7 @@ fail: > static int cuda_frames_init(AVHWFramesContext *ctx) > { > CUDAFramesContext *priv = ctx->internal->priv; > + int aligned_width = next_pow2(ctx->width); > int i; > > for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++) { > @@ -103,10 +114,10 @@ static int cuda_frames_init(AVHWFramesContext > *ctx) switch (ctx->sw_format) { > case AV_PIX_FMT_NV12: > case AV_PIX_FMT_YUV420P: > - size = ctx->width * ctx->height * 3 / 2; > + size = aligned_width * ctx->height * 3 / 2; > break; > case AV_PIX_FMT_YUV444P: > - size = ctx->width * ctx->height * 3; > + size = aligned_width * ctx->height * 3; > break; > } > > @@ -120,6 +131,8 @@ static int cuda_frames_init(AVHWFramesContext > *ctx) > static int cuda_get_buffer(AVHWFramesContext *ctx, AVFrame *frame) > { > + int aligned_width = next_pow2(ctx->width); > + > frame->buf[0] = av_buffer_pool_get(ctx->pool); > if (!frame->buf[0]) > return AVERROR(ENOMEM); > @@ -127,25 +140,25 @@ static int cuda_get_buffer(AVHWFramesContext > *ctx, AVFrame *frame) switch (ctx->sw_format) { > case AV_PIX_FMT_NV12: > frame->data[0] = frame->buf[0]->data; > - frame->data[1] = frame->data[0] + ctx->width * > ctx->height; > - frame->linesize[0] = ctx->width; > - frame->linesize[1] = ctx->width; > + frame->data[1] = frame->data[0] + aligned_width * > ctx->height; > + frame->linesize[0] = aligned_width; > + frame->linesize[1] = aligned_width; > break; > case AV_PIX_FMT_YUV420P: > frame->data[0] = frame->buf[0]->data; > - frame->data[2] = frame->data[0] + ctx->width * > ctx->height; > - frame->data[1] = frame->data[2] + ctx->width * > ctx->height / 4; > - frame->linesize[0] = ctx->width; > - frame->linesize[1] = ctx->width / 2; > - frame->linesize[2] = ctx->width / 2; > + frame->data[2] = frame->data[0] + aligned_width * > ctx->height; > + frame->data[1] = frame->data[2] + aligned_width * > ctx->height / 4; > + frame->linesize[0] = aligned_width; > + frame->linesize[1] = aligned_width / 2; > + frame->linesize[2] = aligned_width / 2; > break; > case AV_PIX_FMT_YUV444P: > frame->data[0] = frame->buf[0]->data; > - frame->data[1] = frame->data[0] + ctx->width * > ctx->height; > - frame->data[2] = frame->data[1] + ctx->width * > ctx->height; > - frame->linesize[0] = ctx->width; > - frame->linesize[1] = ctx->width; > - frame->linesize[2] = ctx->width; > + frame->data[1] = frame->data[0] + aligned_width * > ctx->height; > + frame->data[2] = frame->data[1] + aligned_width * > ctx->height; > + frame->linesize[0] = aligned_width; > + frame->linesize[1] = aligned_width; > + frame->linesize[2] = aligned_width; > break; > default: > av_frame_unref(frame); Looks good to me (and I tried it out). --phil
diff --git a/libavutil/hwcontext_cuda.c b/libavutil/hwcontext_cuda.c index 40d2971..706d195 100644 --- a/libavutil/hwcontext_cuda.c +++ b/libavutil/hwcontext_cuda.c @@ -16,6 +16,8 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#include "libavutil/intmath.h" + #include "buffer.h" #include "common.h" #include "hwcontext.h" @@ -35,6 +37,14 @@ static const enum AVPixelFormat supported_formats[] = { AV_PIX_FMT_YUV444P, }; +static unsigned int next_pow2(unsigned int a) +{ + if (a <= 1) + return 1; + a = 1 << (sizeof(a) * 8 - ff_clz(a - 1)); + return FFALIGN(a, 256); +} + static void cuda_buffer_free(void *opaque, uint8_t *data) { AVHWFramesContext *ctx = opaque; @@ -83,6 +93,7 @@ fail: static int cuda_frames_init(AVHWFramesContext *ctx) { CUDAFramesContext *priv = ctx->internal->priv; + int aligned_width = next_pow2(ctx->width); int i; for (i = 0; i < FF_ARRAY_ELEMS(supported_formats); i++) { @@ -103,10 +114,10 @@ static int cuda_frames_init(AVHWFramesContext *ctx) switch (ctx->sw_format) { case AV_PIX_FMT_NV12: case AV_PIX_FMT_YUV420P: - size = ctx->width * ctx->height * 3 / 2; + size = aligned_width * ctx->height * 3 / 2; break; case AV_PIX_FMT_YUV444P: - size = ctx->width * ctx->height * 3; + size = aligned_width * ctx->height * 3; break; } @@ -120,6 +131,8 @@ static int cuda_frames_init(AVHWFramesContext *ctx) static int cuda_get_buffer(AVHWFramesContext *ctx, AVFrame *frame) { + int aligned_width = next_pow2(ctx->width); + frame->buf[0] = av_buffer_pool_get(ctx->pool); if (!frame->buf[0]) return AVERROR(ENOMEM); @@ -127,25 +140,25 @@ static int cuda_get_buffer(AVHWFramesContext *ctx, AVFrame *frame) switch (ctx->sw_format) { case AV_PIX_FMT_NV12: frame->data[0] = frame->buf[0]->data; - frame->data[1] = frame->data[0] + ctx->width * ctx->height; - frame->linesize[0] = ctx->width; - frame->linesize[1] = ctx->width; + frame->data[1] = frame->data[0] + aligned_width * ctx->height; + frame->linesize[0] = aligned_width; + frame->linesize[1] = aligned_width; break; case AV_PIX_FMT_YUV420P: frame->data[0] = frame->buf[0]->data; - frame->data[2] = frame->data[0] + ctx->width * ctx->height; - frame->data[1] = frame->data[2] + ctx->width * ctx->height / 4; - frame->linesize[0] = ctx->width; - frame->linesize[1] = ctx->width / 2; - frame->linesize[2] = ctx->width / 2; + frame->data[2] = frame->data[0] + aligned_width * ctx->height; + frame->data[1] = frame->data[2] + aligned_width * ctx->height / 4; + frame->linesize[0] = aligned_width; + frame->linesize[1] = aligned_width / 2; + frame->linesize[2] = aligned_width / 2; break; case AV_PIX_FMT_YUV444P: frame->data[0] = frame->buf[0]->data; - frame->data[1] = frame->data[0] + ctx->width * ctx->height; - frame->data[2] = frame->data[1] + ctx->width * ctx->height; - frame->linesize[0] = ctx->width; - frame->linesize[1] = ctx->width; - frame->linesize[2] = ctx->width; + frame->data[1] = frame->data[0] + aligned_width * ctx->height; + frame->data[2] = frame->data[1] + aligned_width * ctx->height; + frame->linesize[0] = aligned_width; + frame->linesize[1] = aligned_width; + frame->linesize[2] = aligned_width; break; default: av_frame_unref(frame);