[FFmpeg-devel] libavcodec/cedrus264: h264 hardware encoding for Allwinner H2/H3

Submitted by Daniel Kucera on May 26, 2018, 1:51 p.m.

Details

Message ID 1527342677-10265-1-git-send-email-daniel.kucera@gmail.com
State New
Headers show

Commit Message

Daniel Kucera May 26, 2018, 1:51 p.m.
From: Daniel Kucera <daniel.kucera@gmail.com>

Signed-off-by: Daniel Kucera <daniel.kucera@gmail.com>
---
 libavcodec/Makefile              |   1 +
 libavcodec/allcodecs.c           |   1 +
 libavcodec/arm/sunxi/cedar_ve.h  |  79 ++++++
 libavcodec/arm/sunxi/ion.h       | 377 ++++++++++++++++++++++++++
 libavcodec/arm/sunxi/ion_sunxi.h | 108 ++++++++
 libavcodec/arm/sunxi/ve.c        | 552 +++++++++++++++++++++++++++++++++++++++
 libavcodec/arm/sunxi/ve.h        | 193 ++++++++++++++
 libavcodec/cedrus264.c           | 402 ++++++++++++++++++++++++++++
 8 files changed, 1713 insertions(+)
 create mode 100644 libavcodec/arm/sunxi/cedar_ve.h
 create mode 100644 libavcodec/arm/sunxi/ion.h
 create mode 100644 libavcodec/arm/sunxi/ion_sunxi.h
 create mode 100755 libavcodec/arm/sunxi/ve.c
 create mode 100755 libavcodec/arm/sunxi/ve.h
 create mode 100755 libavcodec/cedrus264.c

Comments

Mark Thompson May 26, 2018, 3:42 p.m.
On 26/05/18 14:51, daniel.kucera@gmail.com wrote:
> From: Daniel Kucera <daniel.kucera@gmail.com>
> 
> Signed-off-by: Daniel Kucera <daniel.kucera@gmail.com>
> ---
>  libavcodec/Makefile              |   1 +
>  libavcodec/allcodecs.c           |   1 +
>  libavcodec/arm/sunxi/cedar_ve.h  |  79 ++++++
>  libavcodec/arm/sunxi/ion.h       | 377 ++++++++++++++++++++++++++
>  libavcodec/arm/sunxi/ion_sunxi.h | 108 ++++++++
>  libavcodec/arm/sunxi/ve.c        | 552 +++++++++++++++++++++++++++++++++++++++
>  libavcodec/arm/sunxi/ve.h        | 193 ++++++++++++++
>  libavcodec/cedrus264.c           | 402 ++++++++++++++++++++++++++++
>  8 files changed, 1713 insertions(+)
>  create mode 100644 libavcodec/arm/sunxi/cedar_ve.h
>  create mode 100644 libavcodec/arm/sunxi/ion.h
>  create mode 100644 libavcodec/arm/sunxi/ion_sunxi.h
>  create mode 100755 libavcodec/arm/sunxi/ve.c
>  create mode 100755 libavcodec/arm/sunxi/ve.h
>  create mode 100755 libavcodec/cedrus264.c
> 
> diff --git a/libavcodec/Makefile b/libavcodec/Makefile
> index 3ab071a..f3821af 100644
> --- a/libavcodec/Makefile
> +++ b/libavcodec/Makefile
> @@ -987,6 +987,7 @@ OBJS-$(CONFIG_LIBX265_ENCODER)            += libx265.o
>  OBJS-$(CONFIG_LIBXAVS_ENCODER)            += libxavs.o
>  OBJS-$(CONFIG_LIBXVID_ENCODER)            += libxvid.o
>  OBJS-$(CONFIG_LIBZVBI_TELETEXT_DECODER)   += libzvbi-teletextdec.o ass.o
> +OBJS-$(CONFIG_CEDRUS264_ENCODER)          += cedrus264.o arm/sunxi/ve.o
>  
>  # parsers
>  OBJS-$(CONFIG_AAC_LATM_PARSER)         += latm_parser.o
> diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
> index 7b7a8c7..a153576 100644
> --- a/libavcodec/allcodecs.c
> +++ b/libavcodec/allcodecs.c
> @@ -66,6 +66,7 @@ extern AVCodec ff_c93_decoder;
>  extern AVCodec ff_cavs_decoder;
>  extern AVCodec ff_cdgraphics_decoder;
>  extern AVCodec ff_cdxl_decoder;
> +extern AVCodec ff_cedrus264_encoder;

This needs to go further down the list so that it isn't picked by default for H.264 encoding everywhere.

>  extern AVCodec ff_cfhd_decoder;
>  extern AVCodec ff_cinepak_encoder;
>  extern AVCodec ff_cinepak_decoder;


> diff --git a/libavcodec/arm/sunxi/cedar_ve.h b/libavcodec/arm/sunxi/cedar_ve.h

This file looks copied.  Where did it come from?  What licence is it used under?

> diff --git a/libavcodec/arm/sunxi/ion.h b/libavcodec/arm/sunxi/ion.h
> diff --git a/libavcodec/arm/sunxi/ion_sunxi.h b/libavcodec/arm/sunxi/ion_sunxi.h

These are Linux (/Android?) header files.  They should be included from there, not pasted into libavcodec.

> diff --git a/libavcodec/arm/sunxi/ve.c b/libavcodec/arm/sunxi/ve.c
> diff --git a/libavcodec/arm/sunxi/ve.h b/libavcodec/arm/sunxi/ve.h

These files expose an interface to map and talk to the device.  They might work better as an external library?  If not, they need to be rewritten to conform to the FFmpeg style and interfaces.


> diff --git a/libavcodec/cedrus264.c b/libavcodec/cedrus264.c
> new file mode 100755
> index 0000000..5e62361
> --- /dev/null
> +++ b/libavcodec/cedrus264.c
> @@ -0,0 +1,402 @@
> +/*
> + * Cedrus 264 Video Encoder
> + * Copyright (c) 2014 Julien Folly
> + *
> + * This file is part of Libav.

Is it?

> + *
> + * Libav is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * Libav is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with Libav; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +/**
> + * @file
> + * Cedrus 264 Encoder
> + */
> +
> +#include <fcntl.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <stdint.h>
> +#include <string.h>
> +#include <unistd.h>
> +#include <sys/stat.h>
> +
> +#include "libavutil/internal.h"
> +#include "libavutil/opt.h"
> +#include "libavutil/mem.h"
> +#include "libavutil/pixdesc.h"
> +#include "avcodec.h"
> +#include "internal.h"
> +
> +#include "arm/sunxi/ve.h"
> +
> +/* byte stream utils from:
> + * https://github.com/jemk/cedrus/tree/master/h264enc

This link indicates that the source is GPL2?  If you have copied any GPL2 code then this is also GPL2, and that needs to be noted in configure.

> + */
> +static void put_bits(void* regs, uint32_t x, int num)
> +{
> +	writel(x, (uint8_t *)regs + VE_AVC_BASIC_BITS);
> +	writel(0x1 | ((num & 0x1f) << 8), (uint8_t *)regs + VE_AVC_TRIGGER);
> +	// again the problem, how to check for finish?
> +}
> +
> +static void put_ue(void* regs, uint32_t x)
> +{
> +	x++;
> +	put_bits(regs, x, (32 - __builtin_clz(x)) * 2 - 1);
> +}
> +
> +static void put_se(void* regs, int x)
> +{
> +	x = 2 * x - 1;
> +	x ^= (x >> 31);
> +	put_ue(regs, x);
> +}
> +
> +static void put_start_code(void* regs)
> +{
> +	uint32_t tmp = readl((uint8_t *)regs + VE_AVC_PARAM);
> +
> +	/* Disable emulation_prevention_three_byte */
> +	writel(tmp | (0x1 << 31), (uint8_t *)regs + VE_AVC_PARAM);
> +
> +	put_bits(regs, 0, 31);
> +	put_bits(regs, 1, 1);
> +
> +	writel(tmp, (uint8_t *)regs + VE_AVC_PARAM);
> +}
> +
> +static void put_rbsp_trailing_bits(void* regs)
> +{
> +	unsigned int cur_bs_len = readl((uint8_t *)regs + VE_AVC_VLE_LENGTH);
> +
> +	int num_zero_bits = 8 - ((cur_bs_len + 1) & 0x7);
> +	put_bits(regs, 1 << num_zero_bits, num_zero_bits + 1);
> +}
> +
> +static void put_seq_parameter_set(void* regs, int width, int height)
> +{
> +	put_bits(regs, 3 << 5 | 7 << 0, 8);	// NAL Header
> +	put_bits(regs, 77, 8);			// profile_idc
> +	put_bits(regs, 0x0, 8);			// constraints
> +	put_bits(regs, 4 * 10 + 1, 8);		// level_idc
> +	put_ue(regs, 0);			// seq_parameter_set_id
> +
> +	put_ue(regs, 0);			// log2_max_frame_num_minus4
> +	put_ue(regs, 0);			// pic_order_cnt_type
> +	// if (pic_order_cnt_type == 0)
> +		put_ue(regs, 4);		// log2_max_pic_order_cnt_lsb_minus4
> +
> +	put_ue(regs, 1);			// max_num_ref_frames
> +	put_bits(regs, 0, 1);			// gaps_in_frame_num_value_allowed_flag
> +
> +	put_ue(regs, width - 1);		// pic_width_in_mbs_minus1
> +	put_ue(regs, height - 1);		// pic_height_in_map_units_minus1
> +
> +	put_bits(regs, 1, 1);			// frame_mbs_only_flag
> +	// if (!frame_mbs_only_flag)
> +
> +	put_bits(regs, 1, 1);			// direct_8x8_inference_flag
> +	put_bits(regs, 0, 1);			// frame_cropping_flag
> +	// if (frame_cropping_flag)
> +
> +	put_bits(regs, 0, 1);			// vui_parameters_present_flag
> +	// if (vui_parameters_present_flag)
> +}
> +
> +static void put_pic_parameter_set(void *regs, int qp_minus30)
> +{
> +	put_bits(regs, 3 << 5 | 8 << 0, 8);	// NAL Header
> +	put_ue(regs, 0);			// pic_parameter_set_id
> +	put_ue(regs, 0);			// seq_parameter_set_id
> +	put_bits(regs, 1, 1);			// entropy_coding_mode_flag
> +	put_bits(regs, 0, 1);			// bottom_field_pic_order_in_frame_present_flag
> +	put_ue(regs, 0);			// num_slice_groups_minus1
> +	// if (num_slice_groups_minus1 > 0)
> +
> +	put_ue(regs, 0);			// num_ref_idx_l0_default_active_minus1
> +	put_ue(regs, 0);			// num_ref_idx_l1_default_active_minus1
> +	put_bits(regs, 0, 1);			// weighted_pred_flag
> +	put_bits(regs, 0, 2);			// weighted_bipred_idc
> +	//put_se(regs, 0);			// pic_init_qp_minus26 (minus slice_qp_delta)
> +	//put_se(regs, 0);			// pic_init_qs_minus26
> +	put_se(regs, qp_minus30);		// pic_init_qp_minus26 (minus slice_qp_delta)
> +	put_se(regs, qp_minus30);		// pic_init_qs_minus26
> +	put_se(regs, 4);			// chroma_qp_index_offset
> +	put_bits(regs, 1, 1);			// deblocking_filter_control_present_flag
> +	put_bits(regs, 0, 1);			// constrained_intra_pred_flag
> +	put_bits(regs, 0, 1);			// redundant_pic_cnt_present_flag
> +}
> +
> +static void put_slice_header(void* regs)
> +{
> +	put_bits(regs, 3 << 5 | 5 << 0, 8);	// NAL Header
> +
> +	put_ue(regs, 0);			// first_mb_in_slice
> +	put_ue(regs, 2);			// slice_type
> +	put_ue(regs, 0);			// pic_parameter_set_id
> +	put_bits(regs, 0, 4);			// frame_num
> +
> +	// if (IdrPicFlag)
> +		put_ue(regs, 0);		// idr_pic_id
> +
> +	// if (pic_order_cnt_type == 0)
> +		put_bits(regs, 0, 8);		// pic_order_cnt_lsb
> +
> +	// dec_ref_pic_marking
> +		put_bits(regs, 0, 1);		// no_output_of_prior_pics_flag
> +		put_bits(regs, 0, 1);		// long_term_reference_flag
> +
> +	put_se(regs, 4);			// slice_qp_delta
> +
> +	// if (deblocking_filter_control_present_flag)
> +		put_ue(regs, 0);		// disable_deblocking_filter_idc
> +		// if (disable_deblocking_filter_idc != 1)
> +			put_se(regs, 0);	// slice_alpha_c0_offset_div2
> +			put_se(regs, 0);	// slice_beta_offset_div2
> +}
> +
> +static void put_aud(void* regs)
> +{
> +	put_bits(regs, 0 << 5 | 9 << 0, 8);	// NAL Header
> +
> +	put_bits(regs, 7, 3);			// primary_pic_type
> +}

See libavcodec/{cbs,cbs_h264}.h for safe code to write arbitrary H.264 headers.

> +
> +#define CEDAR_OUTPUT_BUF_SIZE	1*1024*1024

Where has this limit come from?

> +typedef struct cedrus264Context {
> +	AVClass *class;
> +	uint8_t *ve_regs;
> +	struct ve_mem *input_buf, *output_buf, *reconstruct_buf, *small_luma_buf, *mb_info_buf;
> +	unsigned int tile_w, tile_w2, tile_h, tile_h2, mb_w, mb_h, plane_size, frame_size;
> +	unsigned int frame_num;
> +	int qp, vewait;
> +} cedrus264Context;
> +
> +static av_cold int cedrus264_encode_init(AVCodecContext *avctx)
> +{
> +	cedrus264Context *c4 = avctx->priv_data;
> +	
> +	/* Check pixel format */
> +	if(avctx->pix_fmt != AV_PIX_FMT_NV12){
> +		av_log(avctx, AV_LOG_FATAL, "Unsupported pixel format (use -pix_fmt nv12)!\n");
> +		return AVERROR(EINVAL);

Tabs are not allowed in the FFmpeg codebase.  (Also in many places below.)

> +	}
> +
> +	/* Check width */
> +	if(avctx->width % 32 != 0){
> +		av_log(avctx, AV_LOG_FATAL, "Input width is not a multiple of 32!\n");
> +		return AVERROR(EINVAL);
> +	}

Is the 32 width a hardware constraint?  Even if it is, you could avoid this being a problem by encoding at that size but setting the cropping window in the SPS to the actual size.

Is there any minimum/maximum size constraint which should also be checked here?

> +
> +	/* Check if VE is available */
> +	while(!ve_lock()){
> +		if (c4->vewait <= 0){
> +			av_log(avctx, AV_LOG_ERROR, "VE in use!\n");
> +			return AVERROR(ENOMEM);
> +		}
> +		av_log(avctx, AV_LOG_INFO, "VE in use, wait %i seconds.\r", c4->vewait--);
> +		sleep(1);

This is because the hardware device can only be used by one process at a time?  I think it might be cleaner just to fail in this case, and the user can open it again when appropriate.

> +	}
> +
> +	/* Open VE */
> +	if(!ve_open()){
> +		av_log(avctx, AV_LOG_ERROR, "VE Open error.\n");
> +		return AVERROR(ENOMEM);
> +	}
> +	
> +
> +	/* Compute tile, macroblock and plane size */
> +	c4->tile_w = (avctx->width + 31) & ~31;

"FFALIGN(avctx->width, 32)"

and others below.

> +	c4->tile_w2 = (avctx->width / 2 + 31) & ~31;
> +	c4->tile_h = (avctx->height + 31) & ~31;
> +	c4->tile_h2 = (avctx->height / 2 + 31) & ~31;
> +	c4->mb_w = (avctx->width + 15) / 16;
> +	c4->mb_h = (avctx->height + 15) / 16;
> +	c4->plane_size = c4->mb_w * 16 * c4->mb_h * 16;
> +	c4->frame_size = c4->plane_size + c4->plane_size / 2;
> +
> +	/* Alloc buffers */
> +	c4->input_buf = ve_malloc(c4->frame_size);
> +	c4->output_buf = ve_malloc(CEDAR_OUTPUT_BUF_SIZE);
> +	c4->reconstruct_buf = ve_malloc(c4->tile_w * c4->tile_h + c4->tile_w * c4->tile_h2);
> +	c4->small_luma_buf = ve_malloc(c4->tile_w2 * c4->tile_h2);
> +	c4->mb_info_buf = ve_malloc(0x1000);
> +	if(!c4->input_buf || !c4->output_buf || !c4->reconstruct_buf || !c4->small_luma_buf || !c4->mb_info_buf){
> +		av_log(avctx, AV_LOG_FATAL, "Cannot allocate frame.\n");
> +		return AVERROR(ENOMEM);

Do the others need to be freed on this failure case, or is ve_malloc() magic somehow?

> +	}
> +
> +	/* Activate AVC engine */
> +	c4->ve_regs = ve_get(VE_ENGINE_AVC, 0);
> +
> +	/* ---- Part to put in cedrus264_encode if engine is used by multiple process (Need to be checked) */
> +
> +	/* Input size */
> +	writel(c4->mb_w << 16, c4->ve_regs + VE_ISP_INPUT_STRIDE);
> +	writel((c4->mb_w << 16) | (c4->mb_h << 0), c4->ve_regs + VE_ISP_INPUT_SIZE);
> +
> +	/* Input buffer */
> +	writel(c4->input_buf->phys, c4->ve_regs + VE_ISP_INPUT_LUMA);
> +	writel(c4->input_buf->phys + c4->plane_size, c4->ve_regs + VE_ISP_INPUT_CHROMA);
> +	
> +	/* Reference output */
> +	writel(c4->reconstruct_buf->phys, c4->ve_regs + VE_AVC_REC_LUMA);
> +	writel(c4->reconstruct_buf->phys + c4->tile_w * c4->tile_h, c4->ve_regs + VE_AVC_REC_CHROMA);
> +	writel(c4->small_luma_buf->phys, c4->ve_regs + VE_AVC_REC_SLUMA);
> +	writel(c4->mb_info_buf->phys, c4->ve_regs + VE_AVC_MB_INFO);
> +
> +	/* Encoding parameters */
> +	writel(0x00000100, c4->ve_regs + VE_AVC_PARAM);
> +	writel(0x00040000 | (c4->qp<<8) | c4->qp, c4->ve_regs + VE_AVC_QP);
> +	//writel(0x00041e1e, c4->ve_regs + VE_AVC_QP); // Fixed QP=30
> +	writel(0x00000104, c4->ve_regs + VE_AVC_MOTION_EST);

This sort of messing with memory-mapped registers really doesn't feel like it should be in libavcodec.

> +
> +	/* ---- Part end ---- */
> +
> +	/* Alloc Frame */
> +	avctx->coded_frame = av_frame_alloc();

AVCodecContext.coded_frame is deprecated, just drop this part.

> +	if(!avctx->coded_frame){
> +		av_log(avctx, AV_LOG_FATAL, "Cannot allocate frame.\n");
> +		return AVERROR(ENOMEM);
> +	}
> +
> +	/* Init variables */
> +	c4->frame_num = 0;
> +	avctx->coded_frame->quality = c4->qp * FF_QP2LAMBDA;
> +
> +	return 0;
> +}
> +
> +static int cedrus264_encode(AVCodecContext *avctx, AVPacket *pkt,
> +                      const AVFrame *frame, int *got_packet)
> +{
> +	cedrus264Context *c4 = avctx->priv_data;
> +	unsigned int size;
> +	int result;
> +
> +	/* Copy data */
> +	result = avpicture_layout((const AVPicture *)frame, AV_PIX_FMT_NV12,
> +		avctx->width, avctx->height, c4->input_buf->virt, c4->frame_size);

avpicture_* is deprecated; look at av_image_copy_to_buffer().  I'm guessing the input_buf here is magic memory, and we can't just use a pointer to the frame data?

> + 	if(result < 0){
> +		av_log(avctx, AV_LOG_ERROR, "Input buffer too small.\n");
> +		return AVERROR(ENOMEM);
> +	}
> +	ve_flush_cache(c4->input_buf);
> +
> +	/* flush output buffer, otherwise we might read old cached data */
> +	ve_flush_cache(c4->output_buf);
> +	
> +	/* Set output buffer */
> +	writel(0x0, c4->ve_regs + VE_AVC_VLE_OFFSET);
> +	writel(c4->output_buf->phys, c4->ve_regs + VE_AVC_VLE_ADDR);
> +	writel(c4->output_buf->phys + CEDAR_OUTPUT_BUF_SIZE - 1, c4->ve_regs + VE_AVC_VLE_END);
> +
> +	writel(0x04000000, c4->ve_regs + 0xb8c); // ???
> +	
> +	put_start_code(c4->ve_regs);
> +	put_aud(c4->ve_regs);
> +	put_rbsp_trailing_bits(c4->ve_regs);
> +
> +	if (c4->frame_num == 0)
> +	{
> +		put_start_code(c4->ve_regs);
> +		put_seq_parameter_set(c4->ve_regs, c4->mb_w, c4->mb_h);
> +		put_rbsp_trailing_bits(c4->ve_regs);
> +
> +		put_start_code(c4->ve_regs);
> +		put_pic_parameter_set(c4->ve_regs, c4->qp - 30);
> +		put_rbsp_trailing_bits(c4->ve_regs);
> +	}
> +
> +	put_start_code(c4->ve_regs);
> +	put_slice_header(c4->ve_regs);
> +
> +	writel(readl(c4->ve_regs + VE_AVC_CTRL) | 0xf, c4->ve_regs + VE_AVC_CTRL);
> +	writel(readl(c4->ve_regs + VE_AVC_STATUS) | 0x7, c4->ve_regs + VE_AVC_STATUS);
> +
> +	writel(0x8, c4->ve_regs + VE_AVC_TRIGGER);
> +	ve_wait(1);
> +
> +	writel(readl(c4->ve_regs + VE_AVC_STATUS), c4->ve_regs + VE_AVC_STATUS);
> +
> +	size = readl(c4->ve_regs + VE_AVC_VLE_LENGTH) / 8;
> +	if(size > 0){
> +		if ((result = ff_alloc_packet(pkt, size)) < 0){
> +			av_log(avctx, AV_LOG_ERROR, "Packet allocation error.\n");
> +			return result;
> +		}
> +		memcpy(pkt->data, c4->output_buf->virt, size);
> +
> +		pkt->pts = pkt->dts = frame->pts - ff_samples_to_time_base(avctx, avctx->delay);

What is this trying to do?  Since all frames are IDR there isn't any reordering, you should be able to just use the input pts.

> +		pkt->flags |= AV_PKT_FLAG_KEY;
> +		*got_packet = 1;
> +	}else *got_packet = 0;
> +
> +	c4->frame_num++;
> +
> +	return 0;
> +}
> +
> +static av_cold int cedrus264_close(AVCodecContext *avctx)
> +{
> +	cedrus264Context *c4 = avctx->priv_data;
> +
> +	/* Close AVC engine */
> +	ve_put();
> +
> +	/* Free buffers */
> +	ve_free(c4->input_buf);
> +	ve_free(c4->output_buf);
> +	ve_free(c4->reconstruct_buf);
> +	ve_free(c4->small_luma_buf);
> +	ve_free(c4->mb_info_buf);
> +
> +	/* Disable and close VE */
> +	ve_close();
> +	ve_unlock();
> +
> +	/* Free Frame */
> +	av_frame_free(&avctx->coded_frame);
> +
> +	return 0;
> +}
> +
> +#define OFFSET(x) offsetof(cedrus264Context, x)
> +#define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
> +static const AVOption options[] = {
> +	 /* Quality range form 0 to 51 not working, good is between 2 and 47 */
> +	{ "qp",		"Constant quantization parameter rate control method", OFFSET(qp), AV_OPT_TYPE_INT, { .i64 = 30 }, 2, 47, VE },
> +	{ "vewait",	"Time to wait if the VE is busy (default 0)", OFFSET(vewait), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
> +	{ NULL },
> +};
> +
> +static const AVClass cedrus264_class = {
> +	.class_name = "cedrus264",
> +	.item_name  = av_default_item_name,
> +	.option     = options,
> +	.version    = LIBAVUTIL_VERSION_INT,
> +};
> +
> +AVCodec ff_cedrus264_encoder = {
> +	.name           = "cedrus264",

This encoder should be called something like "h264_cedrus" (compare other hardware H.264 encoders: h264_v4l2m2m, h264_omx, h264_vaapi, etc.).  That probably makes it clearer where it should go in the Makefile and allcodecs, too.

> +	.long_name      = NULL_IF_CONFIG_SMALL("Cedrus H.264 Encoder"),
> +	.type           = AVMEDIA_TYPE_VIDEO,
> +	.id             = AV_CODEC_ID_H264,
> +	.priv_data_size = sizeof(cedrus264Context),
> +	.init           = cedrus264_encode_init,
> +	.encode2        = cedrus264_encode,
> +	.close          = cedrus264_close,
> +	.priv_class	= &cedrus264_class,
> +};
> 

This is an intra-only encoder, which seems slightly insane.  Is that a hardware constraint, or would a complete implementation work more sensibly?

What hardware would be most convenient to test this code?  (Some SBC, I'm guessing.)

- Mark
James Almer May 26, 2018, 3:53 p.m.
On 5/26/2018 10:51 AM, daniel.kucera@gmail.com wrote:
> From: Daniel Kucera <daniel.kucera@gmail.com>
> 
> Signed-off-by: Daniel Kucera <daniel.kucera@gmail.com>
> ---
>  libavcodec/Makefile              |   1 +
>  libavcodec/allcodecs.c           |   1 +

>  libavcodec/arm/sunxi/cedar_ve.h  |  79 ++++++
>  libavcodec/arm/sunxi/ion.h       | 377 ++++++++++++++++++++++++++
>  libavcodec/arm/sunxi/ion_sunxi.h | 108 ++++++++
>  libavcodec/arm/sunxi/ve.c        | 552 +++++++++++++++++++++++++++++++++++++++
>  libavcodec/arm/sunxi/ve.h        | 193 ++++++++++++++

This stuff does not belong in libavcodec. And much less in the arch
specific assembly code folder.

>  libavcodec/cedrus264.c           | 402 ++++++++++++++++++++++++++++
>  8 files changed, 1713 insertions(+)
>  create mode 100644 libavcodec/arm/sunxi/cedar_ve.h
>  create mode 100644 libavcodec/arm/sunxi/ion.h
>  create mode 100644 libavcodec/arm/sunxi/ion_sunxi.h
>  create mode 100755 libavcodec/arm/sunxi/ve.c
>  create mode 100755 libavcodec/arm/sunxi/ve.h
>  create mode 100755 libavcodec/cedrus264.c

Patch hide | download patch | download mbox

diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 3ab071a..f3821af 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -987,6 +987,7 @@  OBJS-$(CONFIG_LIBX265_ENCODER)            += libx265.o
 OBJS-$(CONFIG_LIBXAVS_ENCODER)            += libxavs.o
 OBJS-$(CONFIG_LIBXVID_ENCODER)            += libxvid.o
 OBJS-$(CONFIG_LIBZVBI_TELETEXT_DECODER)   += libzvbi-teletextdec.o ass.o
+OBJS-$(CONFIG_CEDRUS264_ENCODER)          += cedrus264.o arm/sunxi/ve.o
 
 # parsers
 OBJS-$(CONFIG_AAC_LATM_PARSER)         += latm_parser.o
diff --git a/libavcodec/allcodecs.c b/libavcodec/allcodecs.c
index 7b7a8c7..a153576 100644
--- a/libavcodec/allcodecs.c
+++ b/libavcodec/allcodecs.c
@@ -66,6 +66,7 @@  extern AVCodec ff_c93_decoder;
 extern AVCodec ff_cavs_decoder;
 extern AVCodec ff_cdgraphics_decoder;
 extern AVCodec ff_cdxl_decoder;
+extern AVCodec ff_cedrus264_encoder;
 extern AVCodec ff_cfhd_decoder;
 extern AVCodec ff_cinepak_encoder;
 extern AVCodec ff_cinepak_decoder;
diff --git a/libavcodec/arm/sunxi/cedar_ve.h b/libavcodec/arm/sunxi/cedar_ve.h
new file mode 100644
index 0000000..92faf59
--- /dev/null
+++ b/libavcodec/arm/sunxi/cedar_ve.h
@@ -0,0 +1,79 @@ 
+#ifndef _CEDAR_VE_H_
+#define _CEDAR_VE_H_
+
+enum IOCTL_CMD {
+	IOCTL_UNKOWN = 0x100,
+	IOCTL_GET_ENV_INFO,
+	IOCTL_WAIT_VE_DE,
+	IOCTL_WAIT_VE_EN,
+	IOCTL_RESET_VE,
+	IOCTL_ENABLE_VE,
+	IOCTL_DISABLE_VE,
+	IOCTL_SET_VE_FREQ,
+
+	IOCTL_CONFIG_AVS2 = 0x200,
+	IOCTL_GETVALUE_AVS2 ,
+	IOCTL_PAUSE_AVS2 ,
+	IOCTL_START_AVS2 ,
+	IOCTL_RESET_AVS2 ,
+	IOCTL_ADJUST_AVS2,
+	IOCTL_ENGINE_REQ,
+	IOCTL_ENGINE_REL,
+	IOCTL_ENGINE_CHECK_DELAY,
+	IOCTL_GET_IC_VER,
+	IOCTL_ADJUST_AVS2_ABS,
+	IOCTL_FLUSH_CACHE,
+	IOCTL_SET_REFCOUNT,
+
+	IOCTL_READ_REG = 0x300,
+	IOCTL_WRITE_REG,
+
+	IOCTL_SET_VOL = 0x400,
+
+#if defined CONFIG_ARCH_SUN8IW8P1
+	IOCTL_WAIT_JPEG_DEC = 0x500,
+#endif
+};
+
+struct cedarv_env_infomation{
+	unsigned int phymem_start;
+	int  phymem_total_size;
+	unsigned int  address_macc;
+};
+
+struct cedarv_cache_range{
+	long start;
+	long end;
+};
+
+/*struct __cedarv_task {
+	int task_prio;
+	int ID;
+	unsigned long timeout;
+	unsigned int frametime;
+	unsigned int block_mode;
+};
+
+struct cedarv_engine_task {
+	struct __cedarv_task t;
+	struct list_head list;
+	struct task_struct *task_handle;
+	unsigned int status;
+	unsigned int running;
+	unsigned int is_first_task;
+};
+
+struct cedarv_engine_task_info {
+	int task_prio;
+	unsigned int frametime;
+	unsigned int total_time;
+};*/
+
+struct cedarv_regop {
+    unsigned int addr;
+    unsigned int value;
+};
+/*--------------------------------------------------------------------------------*/
+
+
+#endif
diff --git a/libavcodec/arm/sunxi/ion.h b/libavcodec/arm/sunxi/ion.h
new file mode 100644
index 0000000..e777c69
--- /dev/null
+++ b/libavcodec/arm/sunxi/ion.h
@@ -0,0 +1,377 @@ 
+/*
+ * include/linux/ion.h
+ *
+ * Copyright (C) 2011 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _LINUX_ION_H
+#define _LINUX_ION_H
+
+#include <stdlib.h>
+#include <linux/types.h>
+
+struct ion_handle;
+/**
+ * enum ion_heap_types - list of all possible types of heaps
+ * @ION_HEAP_TYPE_SYSTEM:	 memory allocated via vmalloc
+ * @ION_HEAP_TYPE_SYSTEM_CONTIG: memory allocated via kmalloc
+ * @ION_HEAP_TYPE_CARVEOUT:	 memory allocated from a prereserved
+ * 				 carveout heap, allocations are physically
+ * 				 contiguous
+ * @ION_HEAP_TYPE_DMA:		 memory allocated via DMA API
+ * @ION_NUM_HEAPS:		 helper for iterating over heaps, a bit mask
+ * 				 is used to identify the heaps, so only 32
+ * 				 total heap types are supported
+ */
+enum ion_heap_type {
+	ION_HEAP_TYPE_SYSTEM,
+	ION_HEAP_TYPE_SYSTEM_CONTIG,
+	ION_HEAP_TYPE_CARVEOUT,
+	ION_HEAP_TYPE_CHUNK,
+	ION_HEAP_TYPE_DMA,
+	ION_HEAP_TYPE_CUSTOM, /* must be last so device specific heaps always
+				 are at the end of this enum */
+	ION_NUM_HEAPS = 16,
+};
+
+#define ION_HEAP_SYSTEM_MASK		(1 << ION_HEAP_TYPE_SYSTEM)
+#define ION_HEAP_SYSTEM_CONTIG_MASK	(1 << ION_HEAP_TYPE_SYSTEM_CONTIG)
+#define ION_HEAP_CARVEOUT_MASK		(1 << ION_HEAP_TYPE_CARVEOUT)
+#define ION_HEAP_TYPE_DMA_MASK          (1 << ION_HEAP_TYPE_DMA)
+
+#define ION_NUM_HEAP_IDS		sizeof(unsigned int) * 8
+
+/**
+ * allocation flags - the lower 16 bits are used by core ion, the upper 16
+ * bits are reserved for use by the heaps themselves.
+ */
+#define ION_FLAG_CACHED 1		/* mappings of this buffer should be
+					   cached, ion will do cache
+					   maintenance when the buffer is
+					   mapped for dma */
+#define ION_FLAG_CACHED_NEEDS_SYNC 2	/* mappings of this buffer will created
+					   at mmap time, if this is set
+					   caches must be managed manually */
+
+#ifdef __KERNEL__
+struct ion_device;
+struct ion_heap;
+struct ion_mapper;
+struct ion_client;
+struct ion_buffer;
+
+/* This should be removed some day when phys_addr_t's are fully
+   plumbed in the kernel, and all instances of ion_phys_addr_t should
+   be converted to phys_addr_t.  For the time being many kernel interfaces
+   do not accept phys_addr_t's that would have to */
+#define ion_phys_addr_t unsigned long
+
+/**
+ * struct ion_platform_heap - defines a heap in the given platform
+ * @type:	type of the heap from ion_heap_type enum
+ * @id:		unique identifier for heap.  When allocating higher numbers
+ * 		will be allocated from first.  At allocation these are passed
+ *		as a bit mask and therefore can not exceed ION_NUM_HEAP_IDS.
+ * @name:	used for debug purposes
+ * @base:	base address of heap in physical memory if applicable
+ * @size:	size of the heap in bytes if applicable
+ * @align:	required alignment in physical memory if applicable
+ * @priv:	private info passed from the board file
+ *
+ * Provided by the board file.
+ */
+struct ion_platform_heap {
+	enum ion_heap_type type;
+	unsigned int id;
+	const char *name;
+	ion_phys_addr_t base;
+	size_t size;
+	ion_phys_addr_t align;
+	void *priv;
+};
+
+/**
+ * struct ion_platform_data - array of platform heaps passed from board file
+ * @nr:		number of structures in the array
+ * @heaps:	array of platform_heap structions
+ *
+ * Provided by the board file in the form of platform data to a platform device.
+ */
+struct ion_platform_data {
+	int nr;
+	struct ion_platform_heap heaps[];
+};
+
+/**
+ * ion_reserve() - reserve memory for ion heaps if applicable
+ * @data:	platform data specifying starting physical address and
+ *		size
+ *
+ * Calls memblock reserve to set aside memory for heaps that are
+ * located at specific memory addresses or of specfic sizes not
+ * managed by the kernel
+ */
+void ion_reserve(struct ion_platform_data *data);
+
+/**
+ * ion_client_create() -  allocate a client and returns it
+ * @dev:		the global ion device
+ * @heap_type_mask:	mask of heaps this client can allocate from
+ * @name:		used for debugging
+ */
+struct ion_client *ion_client_create(struct ion_device *dev,
+				     const char *name);
+
+/**
+ * ion_client_destroy() -  free's a client and all it's handles
+ * @client:	the client
+ *
+ * Free the provided client and all it's resources including
+ * any handles it is holding.
+ */
+void ion_client_destroy(struct ion_client *client);
+
+/**
+ * ion_alloc - allocate ion memory
+ * @client:		the client
+ * @len:		size of the allocation
+ * @align:		requested allocation alignment, lots of hardware blocks
+ *			have alignment requirements of some kind
+ * @heap_id_mask:	mask of heaps to allocate from, if multiple bits are set
+ *			heaps will be tried in order from highest to lowest
+ *			id
+ * @flags:		heap flags, the low 16 bits are consumed by ion, the
+ *			high 16 bits are passed on to the respective heap and
+ *			can be heap custom
+ *
+ * Allocate memory in one of the heaps provided in heap mask and return
+ * an opaque handle to it.
+ */
+struct ion_handle *ion_alloc(struct ion_client *client, size_t len,
+			     size_t align, unsigned int heap_id_mask,
+			     unsigned int flags);
+
+/**
+ * ion_free - free a handle
+ * @client:	the client
+ * @handle:	the handle to free
+ *
+ * Free the provided handle.
+ */
+void ion_free(struct ion_client *client, struct ion_handle *handle);
+
+/**
+ * ion_phys - returns the physical address and len of a handle
+ * @client:	the client
+ * @handle:	the handle
+ * @addr:	a pointer to put the address in
+ * @len:	a pointer to put the length in
+ *
+ * This function queries the heap for a particular handle to get the
+ * handle's physical address.  It't output is only correct if
+ * a heap returns physically contiguous memory -- in other cases
+ * this api should not be implemented -- ion_sg_table should be used
+ * instead.  Returns -EINVAL if the handle is invalid.  This has
+ * no implications on the reference counting of the handle --
+ * the returned value may not be valid if the caller is not
+ * holding a reference.
+ */
+int ion_phys(struct ion_client *client, struct ion_handle *handle,
+	     ion_phys_addr_t *addr, size_t *len);
+
+/**
+ * ion_map_dma - return an sg_table describing a handle
+ * @client:	the client
+ * @handle:	the handle
+ *
+ * This function returns the sg_table describing
+ * a particular ion handle.
+ */
+struct sg_table *ion_sg_table(struct ion_client *client,
+			      struct ion_handle *handle);
+
+/**
+ * ion_map_kernel - create mapping for the given handle
+ * @client:	the client
+ * @handle:	handle to map
+ *
+ * Map the given handle into the kernel and return a kernel address that
+ * can be used to access this address.
+ */
+void *ion_map_kernel(struct ion_client *client, struct ion_handle *handle);
+
+/**
+ * ion_unmap_kernel() - destroy a kernel mapping for a handle
+ * @client:	the client
+ * @handle:	handle to unmap
+ */
+void ion_unmap_kernel(struct ion_client *client, struct ion_handle *handle);
+
+/**
+ * ion_share_dma_buf() - share buffer as dma-buf
+ * @client:	the client
+ * @handle:	the handle
+ */
+struct dma_buf *ion_share_dma_buf(struct ion_client *client,
+						struct ion_handle *handle);
+
+/**
+ * ion_share_dma_buf_fd() - given an ion client, create a dma-buf fd
+ * @client:	the client
+ * @handle:	the handle
+ */
+int ion_share_dma_buf_fd(struct ion_client *client, struct ion_handle *handle);
+
+/**
+ * ion_import_dma_buf() - given an dma-buf fd from the ion exporter get handle
+ * @client:	the client
+ * @fd:		the dma-buf fd
+ *
+ * Given an dma-buf fd that was allocated through ion via ion_share_dma_buf,
+ * import that fd and return a handle representing it.  If a dma-buf from
+ * another exporter is passed in this function will return ERR_PTR(-EINVAL)
+ */
+struct ion_handle *ion_import_dma_buf(struct ion_client *client, int fd);
+
+#endif /* __KERNEL__ */
+
+/**
+ * DOC: Ion Userspace API
+ *
+ * create a client by opening /dev/ion
+ * most operations handled via following ioctls
+ *
+ */
+
+/**
+ * struct ion_allocation_data - metadata passed from userspace for allocations
+ * @len:		size of the allocation
+ * @align:		required alignment of the allocation
+ * @heap_id_mask:	mask of heap ids to allocate from
+ * @flags:		flags passed to heap
+ * @handle:		pointer that will be populated with a cookie to use to
+ *			refer to this allocation
+ *
+ * Provided by userspace as an argument to the ioctl
+ */
+struct ion_allocation_data {
+	size_t len;
+	size_t align;
+	unsigned int heap_id_mask;
+	unsigned int flags;
+	struct ion_handle *handle;
+};
+
+/**
+ * struct ion_fd_data - metadata passed to/from userspace for a handle/fd pair
+ * @handle:	a handle
+ * @fd:		a file descriptor representing that handle
+ *
+ * For ION_IOC_SHARE or ION_IOC_MAP userspace populates the handle field with
+ * the handle returned from ion alloc, and the kernel returns the file
+ * descriptor to share or map in the fd field.  For ION_IOC_IMPORT, userspace
+ * provides the file descriptor and the kernel returns the handle.
+ */
+struct ion_fd_data {
+	struct ion_handle *handle;
+	int fd;
+};
+
+/**
+ * struct ion_handle_data - a handle passed to/from the kernel
+ * @handle:	a handle
+ */
+struct ion_handle_data {
+	struct ion_handle *handle;
+};
+
+/**
+ * struct ion_custom_data - metadata passed to/from userspace for a custom ioctl
+ * @cmd:	the custom ioctl function to call
+ * @arg:	additional data to pass to the custom ioctl, typically a user
+ *		pointer to a predefined structure
+ *
+ * This works just like the regular cmd and arg fields of an ioctl.
+ */
+struct ion_custom_data {
+	unsigned int cmd;
+	unsigned long arg;
+};
+
+#define ION_IOC_MAGIC		'I'
+
+/**
+ * DOC: ION_IOC_ALLOC - allocate memory
+ *
+ * Takes an ion_allocation_data struct and returns it with the handle field
+ * populated with the opaque handle for the allocation.
+ */
+#define ION_IOC_ALLOC		_IOWR(ION_IOC_MAGIC, 0, \
+				      struct ion_allocation_data)
+
+/**
+ * DOC: ION_IOC_FREE - free memory
+ *
+ * Takes an ion_handle_data struct and frees the handle.
+ */
+#define ION_IOC_FREE		_IOWR(ION_IOC_MAGIC, 1, struct ion_handle_data)
+
+/**
+ * DOC: ION_IOC_MAP - get a file descriptor to mmap
+ *
+ * Takes an ion_fd_data struct with the handle field populated with a valid
+ * opaque handle.  Returns the struct with the fd field set to a file
+ * descriptor open in the current address space.  This file descriptor
+ * can then be used as an argument to mmap.
+ */
+#define ION_IOC_MAP		_IOWR(ION_IOC_MAGIC, 2, struct ion_fd_data)
+
+/**
+ * DOC: ION_IOC_SHARE - creates a file descriptor to use to share an allocation
+ *
+ * Takes an ion_fd_data struct with the handle field populated with a valid
+ * opaque handle.  Returns the struct with the fd field set to a file
+ * descriptor open in the current address space.  This file descriptor
+ * can then be passed to another process.  The corresponding opaque handle can
+ * be retrieved via ION_IOC_IMPORT.
+ */
+#define ION_IOC_SHARE		_IOWR(ION_IOC_MAGIC, 4, struct ion_fd_data)
+
+/**
+ * DOC: ION_IOC_IMPORT - imports a shared file descriptor
+ *
+ * Takes an ion_fd_data struct with the fd field populated with a valid file
+ * descriptor obtained from ION_IOC_SHARE and returns the struct with the handle
+ * filed set to the corresponding opaque handle.
+ */
+#define ION_IOC_IMPORT		_IOWR(ION_IOC_MAGIC, 5, struct ion_fd_data)
+
+/**
+ * DOC: ION_IOC_SYNC - syncs a shared file descriptors to memory
+ *
+ * Deprecated in favor of using the dma_buf api's correctly (syncing
+ * will happend automatically when the buffer is mapped to a device).
+ * If necessary should be used after touching a cached buffer from the cpu,
+ * this will make the buffer in memory coherent.
+ */
+#define ION_IOC_SYNC		_IOWR(ION_IOC_MAGIC, 7, struct ion_fd_data)
+
+/**
+ * DOC: ION_IOC_CUSTOM - call architecture specific ion ioctl
+ *
+ * Takes the argument of the architecture specific ioctl to call and
+ * passes appropriate userdata for that ioctl
+ */
+#define ION_IOC_CUSTOM		_IOWR(ION_IOC_MAGIC, 6, struct ion_custom_data)
+
+#endif /* _LINUX_ION_H */
diff --git a/libavcodec/arm/sunxi/ion_sunxi.h b/libavcodec/arm/sunxi/ion_sunxi.h
new file mode 100644
index 0000000..d7468ae
--- /dev/null
+++ b/libavcodec/arm/sunxi/ion_sunxi.h
@@ -0,0 +1,108 @@ 
+/*
+ * include/linux/ion_sunxi.h
+ *
+ * Copyright(c) 2013-2015 Allwinnertech Co., Ltd.
+ *      http://www.allwinnertech.com
+ *
+ * Author: liugang <liugang@allwinnertech.com>
+ *
+ * sunxi ion header file
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef __ION_SUNXI_H
+#define __ION_SUNXI_H
+
+#define ION_HEAP_TYPE_SUNXI_START (ION_HEAP_TYPE_CUSTOM + 1)
+#define ION_HEAP_TYPE_SECURE	  (ION_HEAP_TYPE_SUNXI_START)
+
+typedef struct {
+	long 	start;
+	long 	end;
+}sunxi_cache_range;
+
+typedef struct {
+	void *handle;
+	unsigned long phys_addr;
+	unsigned long size;
+}sunxi_phys_data;
+
+#define DMA_BUF_MAXCNT 	8
+
+typedef struct {
+	unsigned int src_va;
+	unsigned int src_pa;
+	unsigned int dst_va;
+	unsigned int dst_pa;
+	unsigned int size;
+}dma_buf_item;
+
+typedef struct {
+	int multi_dma;
+	unsigned int cnt;
+	dma_buf_item item[DMA_BUF_MAXCNT];
+}dma_buf_group;
+
+#define ION_IOC_SUNXI_FLUSH_RANGE           5
+#define ION_IOC_SUNXI_FLUSH_ALL             6
+#define ION_IOC_SUNXI_PHYS_ADDR             7
+#define ION_IOC_SUNXI_DMA_COPY              8
+#define ION_IOC_SUNXI_DUMP                  9
+
+#ifdef __KERNEL__
+
+int flush_clean_user_range(long start, long end);
+int flush_user_range(long start, long end);
+void flush_dcache_all(void);
+
+/**
+ * sunxi_buf_alloc - alloc phys contigous memory in SUNXI platform.
+ * @size: size in bytes to allocate.
+ * @paddr: store the start phys address allocated.
+ *
+ * return the start virtual address, or 0 if failed.
+ */
+void *sunxi_buf_alloc(unsigned int size, unsigned int *paddr);
+/**
+ * sunxi_buf_free - free buffer allocated by sunxi_buf_alloc.
+ * @vaddr: the kernel virt addr of the area.
+ * @paddr: the start phys addr of the area.
+ * @size: size in bytes of the area.
+ */
+void sunxi_buf_free(void *vaddr, unsigned int paddr, unsigned int size);
+/**
+ * sunxi_alloc_phys - alloc phys contigous memory in SUNXI platform.
+ * @size: size in bytes to allocate.
+ *
+ * return the start phys addr, or 0 if failed.
+ */
+u32 sunxi_alloc_phys(size_t size);
+/**
+ * sunxi_free_phys - free phys contigous memory allocted by sunxi_alloc_phys.
+ * @paddr: the start phys addr of the area.
+ * @size: size in bytes of the area.
+ */
+void sunxi_free_phys(u32 paddr, size_t size);
+/**
+ * sunxi_map_kernel - map phys contigous memory to kernel virtual space.
+ * @paddr: the start phys addr of the area.
+ * @size: size in bytes of the area.
+ *
+ * return the start virt addr which is in vmalloc space, or NULL if failed.
+ */
+void *sunxi_map_kernel(unsigned int paddr, unsigned int size);
+/**
+ * sunxi_unmap_kernel - unmap phys contigous memory from kernel space.
+ * @vaddr: the kernel virt addr of the area.
+ * @paddr: the start phys addr of the area.
+ * @size: size in bytes of the area.
+ */
+void sunxi_unmap_kernel(void *vaddr, unsigned int paddr, unsigned int size);
+
+#endif /* __KERNEL__ */
+
+#endif
diff --git a/libavcodec/arm/sunxi/ve.c b/libavcodec/arm/sunxi/ve.c
new file mode 100755
index 0000000..b44b17e
--- /dev/null
+++ b/libavcodec/arm/sunxi/ve.c
@@ -0,0 +1,552 @@ 
+/*
+ * Copyright (c) 2013-2014 Jens Kuske <jenskuske@gmail.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ */
+
+#include <fcntl.h>
+#include <pthread.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/file.h>
+#include "ve.h"
+#include "ion.h"
+#include "ion_sunxi.h"
+#include "cedar_ve.h"
+
+#define LOCKFILE "/tmp/cedar_dev.lck"
+#define DEVICE "/dev/cedar_dev"
+#define PAGE_OFFSET (0xc0000000) // from kernel
+#define PAGE_SIZE (4096)
+
+#define typeof __typeof__
+
+#define container_of(ptr, type, member) ({                      \
+	const typeof( ((type *)0)->member ) *__mptr = (ptr);    \
+	(type *)( (char *)__mptr - offsetof(type,member) );})
+
+static int fd = -1, lockfd = -1;	
+//void memlist_add(struct ve_mem *mem);	
+//struct mem_list *memlist_find(struct ve_mem *mem);
+//int memlist_del(struct ve_mem *mem);
+//void memlist_del_all();
+
+struct memchunk_t
+{
+	struct ve_mem mem;
+	struct memchunk_t *next;
+};
+
+struct ion_mem
+{
+	struct ion_handle *handle;
+	int fd;
+	struct ve_mem mem;
+};
+
+static struct
+{
+	int fd;
+	int ion_fd;
+	void *regs;
+	int version;
+	struct memchunk_t first_memchunk;
+	pthread_rwlock_t memory_lock;
+	pthread_mutex_t device_lock;
+} ve = { .fd = -1, .ion_fd = -1, .memory_lock = PTHREAD_RWLOCK_INITIALIZER, .device_lock = PTHREAD_MUTEX_INITIALIZER };
+
+
+struct mem_list {
+	struct ve_mem *mem;
+	struct mem_list *next;
+} *memlist = NULL;
+
+static void memlist_add(struct ve_mem *mem) {
+	struct mem_list *m, *k;
+	if (memlist == NULL) {
+		memlist = (struct mem_list *)malloc(sizeof (struct mem_list));
+		memlist->mem = mem;
+		memlist->next = NULL;
+		return;
+	}
+	m = k = memlist;
+	while (m) {
+		k = m;
+		m = m->next;
+	}
+	m = (struct mem_list *)malloc(sizeof (struct mem_list));
+	m->mem = mem;
+	m->next = NULL;
+	k->next = m;
+}
+
+static struct mem_list *memlist_find(struct ve_mem *mem) {
+	struct mem_list *m = memlist;
+	
+	while (m) {
+		if (m->mem == mem) return m;
+		m = m->next;
+	}
+	return NULL;
+}
+
+static int memlist_del(struct ve_mem *mem) {
+	struct mem_list *m = memlist;
+	struct mem_list *prev = m;
+	
+	while (m) {
+		if (m->mem == mem) {
+			prev->next = m->next;
+			free(m);
+			return 0;
+		}
+		prev = m;
+		m = m->next;
+	}
+	return -1;
+}
+
+static void memlist_del_all(void) {
+	struct mem_list *m = memlist;
+	
+	while (m) {
+		struct mem_list *k = m;
+		m = m->next;
+		free(k);
+	}
+	memlist = NULL;
+}
+
+int ve_open(void)
+{
+	if (ve.fd != -1)
+		return 0;
+
+	struct cedarv_env_infomation info;
+
+	ve.fd = open(DEVICE, O_RDWR);
+	if (ve.fd == -1)
+		return 0;
+
+	if (ioctl(ve.fd, IOCTL_GET_ENV_INFO, (void *)(&info)) == -1)
+		goto close;
+
+	ve.regs = mmap(NULL, 0x800, PROT_READ | PROT_WRITE, MAP_SHARED, ve.fd, info.address_macc);
+	if (ve.regs == MAP_FAILED)
+		goto close;
+
+	ve.first_memchunk.mem.phys = info.phymem_start - PAGE_OFFSET;
+	ve.first_memchunk.mem.size = info.phymem_total_size;
+
+	if (ve.first_memchunk.mem.size == 0)
+	{
+		ve.ion_fd = open("/dev/ion", O_RDONLY);
+		if (ve.ion_fd == -1)
+			goto unmap;
+	}
+
+	ioctl(ve.fd, IOCTL_ENGINE_REQ, 0);
+	ioctl(ve.fd, IOCTL_ENABLE_VE, 0);
+	ioctl(ve.fd, IOCTL_SET_VE_FREQ, 320);
+	ioctl(ve.fd, IOCTL_RESET_VE, 0);
+
+	writel(0x00130007, ve.regs + VE_CTRL);
+
+	ve.version = readl(ve.regs + VE_VERSION) >> 16;
+	printf("[VDPAU SUNXI] VE version 0x%04x opened.\n", ve.version);
+
+	return 1;
+
+unmap:
+	munmap(ve.regs, 0x800);
+close:
+	close(ve.fd);
+	ve.fd = -1;
+	return 0;
+}
+
+int ve_lock(void) {
+	/* We must lock another file than /dev/cedar_dev, 
+	 * because opening the device already do problems.
+	 * (If device is opened, it will be closed at program exit and ve
+	 * interrupt will be disabled, also if anohter process was using it) */
+	if(lockfd == -1) lockfd = open(LOCKFILE, O_CREAT | O_RDWR, 0666);
+	if(lockfd == -1) return 0;
+	if(flock(lockfd, LOCK_EX | LOCK_NB) < 0) return 0;
+	return 1;
+}
+
+void ve_unlock(void) {
+	if (lockfd == -1) return;
+	flock(lockfd, LOCK_UN);
+	close(lockfd);
+	lockfd = -1;
+	/* Don't try to unlink file, it causes race conditions. */
+}
+
+void ve_close(void)
+{
+	if (ve.fd == -1)
+		return;
+
+	ioctl(ve.fd, IOCTL_DISABLE_VE, 0);
+	ioctl(ve.fd, IOCTL_ENGINE_REL, 0);
+
+	munmap(ve.regs, 0x800);
+	ve.regs = NULL;
+
+	if (ve.ion_fd != -1)
+		close(ve.ion_fd);
+
+	close(ve.fd);
+	ve.fd = -1;
+}
+
+int ve_get_version(void)
+{
+	return ve.version;
+}
+
+int ve_wait(int timeout)
+{
+	if (ve.fd == -1)
+		return 0;
+	if (ve_get_version() >= 0x1633)
+		return ioctl(ve.fd, IOCTL_WAIT_VE_EN, timeout);
+	else
+		return ioctl(ve.fd, IOCTL_WAIT_VE_DE, timeout);
+}
+
+void *ve_get(int engine, uint32_t flags)
+{
+	if (pthread_mutex_lock(&ve.device_lock))
+		return NULL;
+	if (ve_get_version() >= 0x1633)
+		writel(0x001300C0 | (engine & 0xf) | (flags & ~0xf), ve.regs + VE_CTRL);
+	else
+		writel(0x00130000 | (engine & 0xf) | (flags & ~0xf), ve.regs + VE_CTRL);
+
+	return ve.regs;
+}
+
+void ve_put(void)
+{
+	writel(0x00130007, ve.regs + VE_CTRL);
+	pthread_mutex_unlock(&ve.device_lock);
+}
+
+static struct ve_mem *ion_malloc(int size)
+{
+	struct ion_mem *imem = calloc(1, sizeof(struct ion_mem));
+	if (!imem)
+	{
+		perror("calloc ion_buffer failed");
+		return NULL;
+	}
+
+	struct ion_allocation_data alloc = {
+		.len = size,
+		.align = 4096,
+		.heap_id_mask = ION_HEAP_TYPE_DMA,
+		.flags = ION_FLAG_CACHED | ION_FLAG_CACHED_NEEDS_SYNC,
+	};
+
+	if (ioctl(ve.ion_fd, ION_IOC_ALLOC, &alloc))
+	{
+		perror("ION_IOC_ALLOC failed");
+		free(imem);
+		return NULL;
+	}
+
+	imem->handle = alloc.handle;
+	imem->mem.size = size;
+
+	struct ion_fd_data map = {
+		.handle = imem->handle,
+	};
+
+	if (ioctl(ve.ion_fd, ION_IOC_MAP, &map))
+	{
+		perror("ION_IOC_MAP failed");
+		free(imem);
+		return NULL;
+	}
+
+	imem->fd = map.fd;
+
+	imem->mem.virt = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, imem->fd, 0);
+	if (imem->mem.virt == MAP_FAILED)
+	{
+		perror("mmap failed");
+		return NULL;
+	}
+
+	sunxi_phys_data phys = {
+		.handle = imem->handle,
+	};
+
+	struct ion_custom_data custom = {
+		.cmd = ION_IOC_SUNXI_PHYS_ADDR,
+		.arg = (unsigned long)(&phys),
+	};
+
+	if (ioctl(ve.ion_fd, ION_IOC_CUSTOM, &custom))
+	{
+		perror("ION_IOC_CUSTOM(SUNXI_PHYS_ADDR) failed");
+		free(imem);
+		return NULL;
+	}
+
+	imem->mem.phys = phys.phys_addr - 0x40000000;
+
+	memlist_add(&imem->mem);
+	
+	return &imem->mem;
+}
+
+struct ve_mem *ve_malloc(int size)
+{
+	if (ve.fd == -1)
+		return NULL;
+
+	if (ve.ion_fd != -1)
+		return ion_malloc(size);
+
+	if (pthread_rwlock_wrlock(&ve.memory_lock))
+		return NULL;
+
+	void *addr = NULL;
+	struct ve_mem *ret = NULL;
+
+	size = (size + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1);
+	struct memchunk_t *c, *best_chunk = NULL;
+	for (c = &ve.first_memchunk; c != NULL; c = c->next)
+	{
+		if(c->mem.virt == NULL && c->mem.size >= size)
+		{
+			if (best_chunk == NULL || c->mem.size < best_chunk->mem.size)
+				best_chunk = c;
+
+			if (c->mem.size == size)
+				break;
+		}
+	}
+
+	if (!best_chunk)
+		goto out;
+
+	int left_size = best_chunk->mem.size - size;
+
+	addr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, ve.fd, best_chunk->mem.phys + PAGE_OFFSET);
+	if (addr == MAP_FAILED)
+	{
+		ret = NULL;
+		goto out;
+	}
+
+	best_chunk->mem.virt = addr;
+	best_chunk->mem.size = size;
+
+	if (left_size > 0)
+	{
+		c = malloc(sizeof(struct memchunk_t));
+		c->mem.phys = best_chunk->mem.phys + size;
+		c->mem.size = left_size;
+		c->mem.virt = NULL;
+		c->next = best_chunk->next;
+		best_chunk->next = c;
+	}
+
+	ret = &best_chunk->mem;
+out:
+	pthread_rwlock_unlock(&ve.memory_lock);
+	return ret;
+}
+
+static void ion_free(struct ve_mem *mem)
+{
+	if (ve.ion_fd == -1 || !mem)
+		return;
+
+	struct ion_mem *imem = container_of(mem, struct ion_mem, mem);
+
+	if (munmap(mem->virt, mem->size))
+	{
+		perror("munmap failed");
+		return;
+	}
+	
+	memlist_del(mem);
+
+	close(imem->fd);
+
+	struct ion_handle_data handle = {
+		.handle = imem->handle,
+	};
+
+	if (ioctl(ve.ion_fd, ION_IOC_FREE, &handle))
+	{
+		perror("ION_IOC_FREE failed");
+		free(imem);
+		return;
+	}
+}
+
+void ve_free(struct ve_mem *mem)
+{
+	if (ve.fd == -1)
+		return;
+
+	if (mem == NULL)
+		return;
+
+	if (ve.ion_fd != -1)
+		ion_free(mem);
+
+	if (pthread_rwlock_wrlock(&ve.memory_lock))
+		return;
+
+	struct memchunk_t *c;
+	for (c = &ve.first_memchunk; c != NULL; c = c->next)
+	{
+		if (&c->mem == mem)
+		{
+			munmap(c->mem.virt, c->mem.size);
+			c->mem.virt = NULL;
+			break;
+		}
+	}
+
+	for (c = &ve.first_memchunk; c != NULL; c = c->next)
+	{
+		if (c->mem.virt == NULL)
+		{
+			while (c->next != NULL && c->next->mem.virt == NULL)
+			{
+				struct memchunk_t *n = c->next;
+				c->mem.size += n->mem.size;
+				c->next = n->next;
+				free(n);
+			}
+		}
+	}
+
+	pthread_rwlock_unlock(&ve.memory_lock);
+}
+
+uint32_t ve_virt2phys(void *ptr)
+{
+	uint32_t addr = 0;
+	
+	if (ve.fd == -1)
+		return 0;
+	
+	if (ve.ion_fd != -1) {
+		
+		struct mem_list *m = memlist;
+		
+		while (m) {
+			struct ve_mem *mem = m->mem;
+			if (!mem) {
+				m = m->next;
+				continue;
+			}
+			
+			//printf("c->mem: virt 0x%08X, phys 0x%08X, ptr 0x%08X\n", (unsigned int)mem->virt, mem->phys, (unsigned int)ptr);
+			if (mem->virt == NULL)
+				continue;
+
+			if (mem->virt == ptr)
+			{
+				addr = mem->phys;
+				break;
+			}
+			else if (ptr > mem->virt && ptr < (mem->virt + mem->size))
+			{
+				addr = mem->phys + (ptr - mem->virt);
+				break;
+			}
+			m = m->next;
+		}
+		return addr;
+	}
+	
+
+	//if (pthread_rwlock_rdlock(&ve.memory_lock))
+	//	return 0;
+
+	
+
+	struct memchunk_t *c;
+	for (c = &ve.first_memchunk; c != NULL; c = c->next)
+	{
+		printf("c->mem: virt 0x%08X, phys 0x%08X, ptr 0x%08X\n", (unsigned int)c->mem.virt, c->mem.phys, (unsigned int)ptr);
+		if (c->mem.virt == NULL)
+			continue;
+
+		if (c->mem.virt == ptr)
+		{
+			addr = c->mem.phys;
+			break;
+		}
+		else if (ptr > c->mem.virt && ptr < (c->mem.virt + c->mem.size))
+		{
+			addr = c->mem.phys + (ptr - c->mem.virt);
+			break;
+		}
+	}
+
+	//pthread_rwlock_unlock(&ve.memory_lock);
+	return addr;
+}
+
+
+void ve_flush_cache(struct ve_mem *mem)
+{
+	if (ve.fd == -1)
+		return;
+
+	if (ve.ion_fd != -1)
+	{
+		sunxi_cache_range range = {
+			.start = (long)mem->virt,
+			.end = (long)mem->virt + mem->size,
+		};
+
+		struct ion_custom_data cache = {
+			.cmd = ION_IOC_SUNXI_FLUSH_RANGE,
+			.arg = (unsigned long)(&range),
+		};
+
+		if (ioctl(ve.ion_fd, ION_IOC_CUSTOM, &cache))
+			perror("ION_IOC_CUSTOM(SUNXI_FLUSH_RANGE) failed");
+	}
+	else
+	{
+		struct cedarv_cache_range range =
+		{
+			.start = (int)mem->virt,
+			.end = (int)mem->virt + mem->size
+		};
+
+		ioctl(ve.fd, IOCTL_FLUSH_CACHE, (void*)(&range));
+	}
+}
diff --git a/libavcodec/arm/sunxi/ve.h b/libavcodec/arm/sunxi/ve.h
new file mode 100755
index 0000000..c5dcc72
--- /dev/null
+++ b/libavcodec/arm/sunxi/ve.h
@@ -0,0 +1,193 @@ 
+/*
+ * Copyright (c) 2013 Jens Kuske <jenskuske@gmail.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+ *
+ */
+
+#ifndef __VE_H__
+#define __VE_H__
+
+#include <stdint.h>
+
+int ve_lock(void);
+void ve_unlock(void);
+int ve_open(void);
+void ve_close(void);
+int ve_get_version(void);
+int ve_wait(int timeout);
+void *ve_get(int engine, uint32_t flags);
+void ve_put(void);
+
+struct ve_mem
+{
+	void *virt;
+	uint32_t phys;
+	int size;
+};
+
+struct ve_mem *ve_malloc(int size);
+void ve_free(struct ve_mem *mem);
+void ve_flush_cache(struct ve_mem *mem);
+uint32_t ve_virt2phys(void *ptr);
+
+static inline void writeb(uint8_t val, void *addr)
+{
+	*((volatile uint8_t *)addr) = val;
+}
+
+static inline void writel(uint32_t val, void *addr)
+{
+	*((volatile uint32_t *)addr) = val;
+}
+
+static inline uint32_t readl(void *addr)
+{
+	return *((volatile uint32_t *) addr);
+}
+
+#define VE_ENGINE_MPEG			0x0
+#define VE_ENGINE_H264			0x1
+#define VE_ENGINE_HEVC			0x4
+#define VE_ENGINE_AVC			0xb
+
+#define VE_CTRL				0x000
+#define VE_EXTRA_OUT_FMT_OFFSET		0x0e8
+#define VE_VERSION			0x0f0
+
+#define VE_MPEG_PIC_HDR			0x100
+#define VE_MPEG_VOP_HDR			0x104
+#define VE_MPEG_SIZE			0x108
+#define VE_MPEG_FRAME_SIZE		0x10c
+#define VE_MPEG_MBA			0x110
+#define VE_MPEG_CTRL			0x114
+#define VE_MPEG_TRIGGER			0x118
+#define VE_MPEG_STATUS			0x11c
+#define VE_MPEG_TRBTRD_FIELD		0x120
+#define VE_MPEG_TRBTRD_FRAME		0x124
+#define VE_MPEG_VLD_ADDR		0x128
+#define VE_MPEG_VLD_OFFSET		0x12c
+#define VE_MPEG_VLD_LEN			0x130
+#define VE_MPEG_VLD_END			0x134
+#define VE_MPEG_MBH_ADDR		0x138
+#define VE_MPEG_DCAC_ADDR		0x13c
+#define VE_MPEG_NCF_ADDR		0x144
+#define VE_MPEG_REC_LUMA		0x148
+#define VE_MPEG_REC_CHROMA		0x14c
+#define VE_MPEG_FWD_LUMA		0x150
+#define VE_MPEG_FWD_CHROMA		0x154
+#define VE_MPEG_BACK_LUMA		0x158
+#define VE_MPEG_BACK_CHROMA		0x15c
+#define VE_MPEG_IQ_MIN_INPUT		0x180
+#define VE_MPEG_QP_INPUT		0x184
+
+#define VE_MPEG_ROT_LUMA		0x1cc
+#define VE_MPEG_ROT_CHROMA		0x1d0
+#define VE_MPEG_SDROT_CTRL		0x1d4
+
+#define VE_H264_FRAME_SIZE		0x200
+#define VE_H264_PIC_HDR			0x204
+#define VE_H264_SLICE_HDR		0x208
+#define VE_H264_SLICE_HDR2		0x20c
+#define VE_H264_PRED_WEIGHT		0x210
+#define VE_H264_QP_PARAM		0x21c
+#define VE_H264_CTRL			0x220
+#define VE_H264_TRIGGER			0x224
+#define VE_H264_STATUS			0x228
+#define VE_H264_CUR_MB_NUM		0x22c
+#define VE_H264_VLD_ADDR		0x230
+#define VE_H264_VLD_OFFSET		0x234
+#define VE_H264_VLD_LEN			0x238
+#define VE_H264_VLD_END			0x23c
+#define VE_H264_SDROT_CTRL		0x240
+#define VE_H264_SDROT_LUMA		0x244
+#define VE_H264_SDROT_CHROMA		0x248
+#define VE_H264_OUTPUT_FRAME_IDX	0x24c
+#define VE_H264_EXTRA_BUFFER1		0x250
+#define VE_H264_EXTRA_BUFFER2		0x254
+#define VE_H264_BASIC_BITS		0x2dc
+#define VE_H264_RAM_WRITE_PTR		0x2e0
+#define VE_H264_RAM_WRITE_DATA		0x2e4
+
+#define VE_SRAM_H264_PRED_WEIGHT_TABLE	0x000
+#define VE_SRAM_H264_FRAMEBUFFER_LIST	0x400
+#define VE_SRAM_H264_REF_LIST0		0x640
+#define VE_SRAM_H264_REF_LIST1		0x664
+#define VE_SRAM_H264_SCALING_LISTS	0x800
+
+#define VE_HEVC_NAL_HDR			0x500
+#define VE_HEVC_SPS			0x504
+#define VE_HEVC_PIC_SIZE		0x508
+#define VE_HEVC_PCM_HDR			0x50c
+#define VE_HEVC_PPS0			0x510
+#define VE_HEVC_PPS1			0x514
+#define VE_HEVC_SLICE_HDR0		0x520
+#define VE_HEVC_SLICE_HDR1		0x524
+#define VE_HEVC_SLICE_HDR2		0x528
+#define VE_HEVC_CTB_ADDR		0x52c
+#define VE_HEVC_CTRL			0x530
+#define VE_HEVC_TRIG			0x534
+#define VE_HEVC_STATUS			0x538
+#define VE_HEVC_CTU_NUM			0x53c
+#define VE_HEVC_BITS_ADDR		0x540
+#define VE_HEVC_BITS_OFFSET		0x544
+#define VE_HEVC_BITS_LEN		0x548
+#define VE_HEVC_BITS_END_ADDR		0x54c
+#define VE_HEVC_REC_BUF_IDX 		0x55c
+#define VE_HEVC_NEIGHBOR_INFO_ADDR	0x560
+#define VE_HEVC_TILE_LIST_ADDR		0x564
+#define VE_HEVC_TILE_START_CTB		0x568
+#define VE_HEVC_TILE_END_CTB		0x56c
+#define VE_HEVC_BITS_DATA		0x5dc
+#define VE_HEVC_SRAM_ADDR		0x5e0
+#define VE_HEVC_SRAM_DATA		0x5e4
+
+#define VE_SRAM_HEVC_PRED_WEIGHT_LUMA_L0	0x000
+#define VE_SRAM_HEVC_PRED_WEIGHT_CHROMA_L0	0x020
+#define VE_SRAM_HEVC_PRED_WEIGHT_LUMA_L1	0x060
+#define VE_SRAM_HEVC_PRED_WEIGHT_CHROMA_L1	0x080
+#define VE_SRAM_HEVG_PIC_LIST			0x400
+#define VE_SRAM_HEVC_REF_PIC_LIST0		0xc00
+#define VE_SRAM_HEVC_REF_PIC_LIST1		0xc10
+
+#define VE_ISP_INPUT_SIZE		0xa00
+#define VE_ISP_INPUT_STRIDE		0xa04
+#define VE_ISP_CTRL			0xa08
+#define VE_ISP_INPUT_LUMA		0xa78
+#define VE_ISP_INPUT_CHROMA		0xa7c
+
+#define VE_AVC_PARAM			0xb04
+#define VE_AVC_QP			0xb08
+#define VE_AVC_MOTION_EST		0xb10
+#define VE_AVC_CTRL			0xb14
+#define VE_AVC_TRIGGER			0xb18
+#define VE_AVC_STATUS			0xb1c
+#define VE_AVC_BASIC_BITS		0xb20
+#define VE_AVC_UNK_BUF			0xb60
+#define VE_AVC_VLE_ADDR			0xb80
+#define VE_AVC_VLE_END			0xb84
+#define VE_AVC_VLE_OFFSET		0xb88
+#define VE_AVC_VLE_MAX			0xb8c
+#define VE_AVC_VLE_LENGTH		0xb90
+#define VE_AVC_REF_LUMA			0xba0
+#define VE_AVC_REF_CHROMA		0xba4
+#define VE_AVC_REC_LUMA			0xbb0
+#define VE_AVC_REC_CHROMA		0xbb4
+#define VE_AVC_REF_SLUMA		0xbb8
+#define VE_AVC_REC_SLUMA		0xbbc
+#define VE_AVC_MB_INFO			0xbc0
+
+
+#endif
diff --git a/libavcodec/cedrus264.c b/libavcodec/cedrus264.c
new file mode 100755
index 0000000..5e62361
--- /dev/null
+++ b/libavcodec/cedrus264.c
@@ -0,0 +1,402 @@ 
+/*
+ * Cedrus 264 Video Encoder
+ * Copyright (c) 2014 Julien Folly
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Cedrus 264 Encoder
+ */
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/stat.h>
+
+#include "libavutil/internal.h"
+#include "libavutil/opt.h"
+#include "libavutil/mem.h"
+#include "libavutil/pixdesc.h"
+#include "avcodec.h"
+#include "internal.h"
+
+#include "arm/sunxi/ve.h"
+
+/* byte stream utils from:
+ * https://github.com/jemk/cedrus/tree/master/h264enc
+ */
+static void put_bits(void* regs, uint32_t x, int num)
+{
+	writel(x, (uint8_t *)regs + VE_AVC_BASIC_BITS);
+	writel(0x1 | ((num & 0x1f) << 8), (uint8_t *)regs + VE_AVC_TRIGGER);
+	// again the problem, how to check for finish?
+}
+
+static void put_ue(void* regs, uint32_t x)
+{
+	x++;
+	put_bits(regs, x, (32 - __builtin_clz(x)) * 2 - 1);
+}
+
+static void put_se(void* regs, int x)
+{
+	x = 2 * x - 1;
+	x ^= (x >> 31);
+	put_ue(regs, x);
+}
+
+static void put_start_code(void* regs)
+{
+	uint32_t tmp = readl((uint8_t *)regs + VE_AVC_PARAM);
+
+	/* Disable emulation_prevention_three_byte */
+	writel(tmp | (0x1 << 31), (uint8_t *)regs + VE_AVC_PARAM);
+
+	put_bits(regs, 0, 31);
+	put_bits(regs, 1, 1);
+
+	writel(tmp, (uint8_t *)regs + VE_AVC_PARAM);
+}
+
+static void put_rbsp_trailing_bits(void* regs)
+{
+	unsigned int cur_bs_len = readl((uint8_t *)regs + VE_AVC_VLE_LENGTH);
+
+	int num_zero_bits = 8 - ((cur_bs_len + 1) & 0x7);
+	put_bits(regs, 1 << num_zero_bits, num_zero_bits + 1);
+}
+
+static void put_seq_parameter_set(void* regs, int width, int height)
+{
+	put_bits(regs, 3 << 5 | 7 << 0, 8);	// NAL Header
+	put_bits(regs, 77, 8);			// profile_idc
+	put_bits(regs, 0x0, 8);			// constraints
+	put_bits(regs, 4 * 10 + 1, 8);		// level_idc
+	put_ue(regs, 0);			// seq_parameter_set_id
+
+	put_ue(regs, 0);			// log2_max_frame_num_minus4
+	put_ue(regs, 0);			// pic_order_cnt_type
+	// if (pic_order_cnt_type == 0)
+		put_ue(regs, 4);		// log2_max_pic_order_cnt_lsb_minus4
+
+	put_ue(regs, 1);			// max_num_ref_frames
+	put_bits(regs, 0, 1);			// gaps_in_frame_num_value_allowed_flag
+
+	put_ue(regs, width - 1);		// pic_width_in_mbs_minus1
+	put_ue(regs, height - 1);		// pic_height_in_map_units_minus1
+
+	put_bits(regs, 1, 1);			// frame_mbs_only_flag
+	// if (!frame_mbs_only_flag)
+
+	put_bits(regs, 1, 1);			// direct_8x8_inference_flag
+	put_bits(regs, 0, 1);			// frame_cropping_flag
+	// if (frame_cropping_flag)
+
+	put_bits(regs, 0, 1);			// vui_parameters_present_flag
+	// if (vui_parameters_present_flag)
+}
+
+static void put_pic_parameter_set(void *regs, int qp_minus30)
+{
+	put_bits(regs, 3 << 5 | 8 << 0, 8);	// NAL Header
+	put_ue(regs, 0);			// pic_parameter_set_id
+	put_ue(regs, 0);			// seq_parameter_set_id
+	put_bits(regs, 1, 1);			// entropy_coding_mode_flag
+	put_bits(regs, 0, 1);			// bottom_field_pic_order_in_frame_present_flag
+	put_ue(regs, 0);			// num_slice_groups_minus1
+	// if (num_slice_groups_minus1 > 0)
+
+	put_ue(regs, 0);			// num_ref_idx_l0_default_active_minus1
+	put_ue(regs, 0);			// num_ref_idx_l1_default_active_minus1
+	put_bits(regs, 0, 1);			// weighted_pred_flag
+	put_bits(regs, 0, 2);			// weighted_bipred_idc
+	//put_se(regs, 0);			// pic_init_qp_minus26 (minus slice_qp_delta)
+	//put_se(regs, 0);			// pic_init_qs_minus26
+	put_se(regs, qp_minus30);		// pic_init_qp_minus26 (minus slice_qp_delta)
+	put_se(regs, qp_minus30);		// pic_init_qs_minus26
+	put_se(regs, 4);			// chroma_qp_index_offset
+	put_bits(regs, 1, 1);			// deblocking_filter_control_present_flag
+	put_bits(regs, 0, 1);			// constrained_intra_pred_flag
+	put_bits(regs, 0, 1);			// redundant_pic_cnt_present_flag
+}
+
+static void put_slice_header(void* regs)
+{
+	put_bits(regs, 3 << 5 | 5 << 0, 8);	// NAL Header
+
+	put_ue(regs, 0);			// first_mb_in_slice
+	put_ue(regs, 2);			// slice_type
+	put_ue(regs, 0);			// pic_parameter_set_id
+	put_bits(regs, 0, 4);			// frame_num
+
+	// if (IdrPicFlag)
+		put_ue(regs, 0);		// idr_pic_id
+
+	// if (pic_order_cnt_type == 0)
+		put_bits(regs, 0, 8);		// pic_order_cnt_lsb
+
+	// dec_ref_pic_marking
+		put_bits(regs, 0, 1);		// no_output_of_prior_pics_flag
+		put_bits(regs, 0, 1);		// long_term_reference_flag
+
+	put_se(regs, 4);			// slice_qp_delta
+
+	// if (deblocking_filter_control_present_flag)
+		put_ue(regs, 0);		// disable_deblocking_filter_idc
+		// if (disable_deblocking_filter_idc != 1)
+			put_se(regs, 0);	// slice_alpha_c0_offset_div2
+			put_se(regs, 0);	// slice_beta_offset_div2
+}
+
+static void put_aud(void* regs)
+{
+	put_bits(regs, 0 << 5 | 9 << 0, 8);	// NAL Header
+
+	put_bits(regs, 7, 3);			// primary_pic_type
+}
+
+#define CEDAR_OUTPUT_BUF_SIZE	1*1024*1024
+typedef struct cedrus264Context {
+	AVClass *class;
+	uint8_t *ve_regs;
+	struct ve_mem *input_buf, *output_buf, *reconstruct_buf, *small_luma_buf, *mb_info_buf;
+	unsigned int tile_w, tile_w2, tile_h, tile_h2, mb_w, mb_h, plane_size, frame_size;
+	unsigned int frame_num;
+	int qp, vewait;
+} cedrus264Context;
+
+static av_cold int cedrus264_encode_init(AVCodecContext *avctx)
+{
+	cedrus264Context *c4 = avctx->priv_data;
+	
+	/* Check pixel format */
+	if(avctx->pix_fmt != AV_PIX_FMT_NV12){
+		av_log(avctx, AV_LOG_FATAL, "Unsupported pixel format (use -pix_fmt nv12)!\n");
+		return AVERROR(EINVAL);
+	}
+
+	/* Check width */
+	if(avctx->width % 32 != 0){
+		av_log(avctx, AV_LOG_FATAL, "Input width is not a multiple of 32!\n");
+		return AVERROR(EINVAL);
+	}
+
+	/* Check if VE is available */
+	while(!ve_lock()){
+		if (c4->vewait <= 0){
+			av_log(avctx, AV_LOG_ERROR, "VE in use!\n");
+			return AVERROR(ENOMEM);
+		}
+		av_log(avctx, AV_LOG_INFO, "VE in use, wait %i seconds.\r", c4->vewait--);
+		sleep(1);
+	}
+
+	/* Open VE */
+	if(!ve_open()){
+		av_log(avctx, AV_LOG_ERROR, "VE Open error.\n");
+		return AVERROR(ENOMEM);
+	}
+	
+
+	/* Compute tile, macroblock and plane size */
+	c4->tile_w = (avctx->width + 31) & ~31;
+	c4->tile_w2 = (avctx->width / 2 + 31) & ~31;
+	c4->tile_h = (avctx->height + 31) & ~31;
+	c4->tile_h2 = (avctx->height / 2 + 31) & ~31;
+	c4->mb_w = (avctx->width + 15) / 16;
+	c4->mb_h = (avctx->height + 15) / 16;
+	c4->plane_size = c4->mb_w * 16 * c4->mb_h * 16;
+	c4->frame_size = c4->plane_size + c4->plane_size / 2;
+
+	/* Alloc buffers */
+	c4->input_buf = ve_malloc(c4->frame_size);
+	c4->output_buf = ve_malloc(CEDAR_OUTPUT_BUF_SIZE);
+	c4->reconstruct_buf = ve_malloc(c4->tile_w * c4->tile_h + c4->tile_w * c4->tile_h2);
+	c4->small_luma_buf = ve_malloc(c4->tile_w2 * c4->tile_h2);
+	c4->mb_info_buf = ve_malloc(0x1000);
+	if(!c4->input_buf || !c4->output_buf || !c4->reconstruct_buf || !c4->small_luma_buf || !c4->mb_info_buf){
+		av_log(avctx, AV_LOG_FATAL, "Cannot allocate frame.\n");
+		return AVERROR(ENOMEM);
+	}
+
+	/* Activate AVC engine */
+	c4->ve_regs = ve_get(VE_ENGINE_AVC, 0);
+
+	/* ---- Part to put in cedrus264_encode if engine is used by multiple process (Need to be checked) */
+
+	/* Input size */
+	writel(c4->mb_w << 16, c4->ve_regs + VE_ISP_INPUT_STRIDE);
+	writel((c4->mb_w << 16) | (c4->mb_h << 0), c4->ve_regs + VE_ISP_INPUT_SIZE);
+
+	/* Input buffer */
+	writel(c4->input_buf->phys, c4->ve_regs + VE_ISP_INPUT_LUMA);
+	writel(c4->input_buf->phys + c4->plane_size, c4->ve_regs + VE_ISP_INPUT_CHROMA);
+	
+	/* Reference output */
+	writel(c4->reconstruct_buf->phys, c4->ve_regs + VE_AVC_REC_LUMA);
+	writel(c4->reconstruct_buf->phys + c4->tile_w * c4->tile_h, c4->ve_regs + VE_AVC_REC_CHROMA);
+	writel(c4->small_luma_buf->phys, c4->ve_regs + VE_AVC_REC_SLUMA);
+	writel(c4->mb_info_buf->phys, c4->ve_regs + VE_AVC_MB_INFO);
+
+	/* Encoding parameters */
+	writel(0x00000100, c4->ve_regs + VE_AVC_PARAM);
+	writel(0x00040000 | (c4->qp<<8) | c4->qp, c4->ve_regs + VE_AVC_QP);
+	//writel(0x00041e1e, c4->ve_regs + VE_AVC_QP); // Fixed QP=30
+	writel(0x00000104, c4->ve_regs + VE_AVC_MOTION_EST);
+
+	/* ---- Part end ---- */
+
+	/* Alloc Frame */
+	avctx->coded_frame = av_frame_alloc();
+	if(!avctx->coded_frame){
+		av_log(avctx, AV_LOG_FATAL, "Cannot allocate frame.\n");
+		return AVERROR(ENOMEM);
+	}
+
+	/* Init variables */
+	c4->frame_num = 0;
+	avctx->coded_frame->quality = c4->qp * FF_QP2LAMBDA;
+
+	return 0;
+}
+
+static int cedrus264_encode(AVCodecContext *avctx, AVPacket *pkt,
+                      const AVFrame *frame, int *got_packet)
+{
+	cedrus264Context *c4 = avctx->priv_data;
+	unsigned int size;
+	int result;
+
+	/* Copy data */
+	result = avpicture_layout((const AVPicture *)frame, AV_PIX_FMT_NV12,
+		avctx->width, avctx->height, c4->input_buf->virt, c4->frame_size);
+ 	if(result < 0){
+		av_log(avctx, AV_LOG_ERROR, "Input buffer too small.\n");
+		return AVERROR(ENOMEM);
+	}
+	ve_flush_cache(c4->input_buf);
+
+	/* flush output buffer, otherwise we might read old cached data */
+	ve_flush_cache(c4->output_buf);
+	
+	/* Set output buffer */
+	writel(0x0, c4->ve_regs + VE_AVC_VLE_OFFSET);
+	writel(c4->output_buf->phys, c4->ve_regs + VE_AVC_VLE_ADDR);
+	writel(c4->output_buf->phys + CEDAR_OUTPUT_BUF_SIZE - 1, c4->ve_regs + VE_AVC_VLE_END);
+
+	writel(0x04000000, c4->ve_regs + 0xb8c); // ???
+	
+	put_start_code(c4->ve_regs);
+	put_aud(c4->ve_regs);
+	put_rbsp_trailing_bits(c4->ve_regs);
+
+	if (c4->frame_num == 0)
+	{
+		put_start_code(c4->ve_regs);
+		put_seq_parameter_set(c4->ve_regs, c4->mb_w, c4->mb_h);
+		put_rbsp_trailing_bits(c4->ve_regs);
+
+		put_start_code(c4->ve_regs);
+		put_pic_parameter_set(c4->ve_regs, c4->qp - 30);
+		put_rbsp_trailing_bits(c4->ve_regs);
+	}
+
+	put_start_code(c4->ve_regs);
+	put_slice_header(c4->ve_regs);
+
+	writel(readl(c4->ve_regs + VE_AVC_CTRL) | 0xf, c4->ve_regs + VE_AVC_CTRL);
+	writel(readl(c4->ve_regs + VE_AVC_STATUS) | 0x7, c4->ve_regs + VE_AVC_STATUS);
+
+	writel(0x8, c4->ve_regs + VE_AVC_TRIGGER);
+	ve_wait(1);
+
+	writel(readl(c4->ve_regs + VE_AVC_STATUS), c4->ve_regs + VE_AVC_STATUS);
+
+	size = readl(c4->ve_regs + VE_AVC_VLE_LENGTH) / 8;
+	if(size > 0){
+		if ((result = ff_alloc_packet(pkt, size)) < 0){
+			av_log(avctx, AV_LOG_ERROR, "Packet allocation error.\n");
+			return result;
+		}
+		memcpy(pkt->data, c4->output_buf->virt, size);
+
+		pkt->pts = pkt->dts = frame->pts - ff_samples_to_time_base(avctx, avctx->delay);
+		pkt->flags |= AV_PKT_FLAG_KEY;
+		*got_packet = 1;
+	}else *got_packet = 0;
+
+	c4->frame_num++;
+
+	return 0;
+}
+
+static av_cold int cedrus264_close(AVCodecContext *avctx)
+{
+	cedrus264Context *c4 = avctx->priv_data;
+
+	/* Close AVC engine */
+	ve_put();
+
+	/* Free buffers */
+	ve_free(c4->input_buf);
+	ve_free(c4->output_buf);
+	ve_free(c4->reconstruct_buf);
+	ve_free(c4->small_luma_buf);
+	ve_free(c4->mb_info_buf);
+
+	/* Disable and close VE */
+	ve_close();
+	ve_unlock();
+
+	/* Free Frame */
+	av_frame_free(&avctx->coded_frame);
+
+	return 0;
+}
+
+#define OFFSET(x) offsetof(cedrus264Context, x)
+#define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
+static const AVOption options[] = {
+	 /* Quality range form 0 to 51 not working, good is between 2 and 47 */
+	{ "qp",		"Constant quantization parameter rate control method", OFFSET(qp), AV_OPT_TYPE_INT, { .i64 = 30 }, 2, 47, VE },
+	{ "vewait",	"Time to wait if the VE is busy (default 0)", OFFSET(vewait), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, INT_MAX, VE },
+	{ NULL },
+};
+
+static const AVClass cedrus264_class = {
+	.class_name = "cedrus264",
+	.item_name  = av_default_item_name,
+	.option     = options,
+	.version    = LIBAVUTIL_VERSION_INT,
+};
+
+AVCodec ff_cedrus264_encoder = {
+	.name           = "cedrus264",
+	.long_name      = NULL_IF_CONFIG_SMALL("Cedrus H.264 Encoder"),
+	.type           = AVMEDIA_TYPE_VIDEO,
+	.id             = AV_CODEC_ID_H264,
+	.priv_data_size = sizeof(cedrus264Context),
+	.init           = cedrus264_encode_init,
+	.encode2        = cedrus264_encode,
+	.close          = cedrus264_close,
+	.priv_class	= &cedrus264_class,
+};