Message ID | 20170327025833.GA1669@tdjones879 |
---|---|
State | New |
Headers | show |
On Sun, Mar 26, 2017 at 08:58:33PM -0600, Tyler Jones wrote: > The existing AAC psychoacoustic system is used to detect transients within the > vorbis encoder. This is useful, in general, as an initial step in later utilizing > a complex psychoacoustic model for the vorbis encoder, but more specifically > allows the cacellation of pre-echo effects that frequently occur with this > codec. > > Signed-off-by: Tyler Jones <tdjones879@gmail.com> > --- > libavcodec/psymodel.c | 1 + > libavcodec/vorbisenc.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++ > 2 files changed, 61 insertions(+) This changes make fate stddev: 6521.39 PSNR: 20.04 MAXDIFF:55190 bytes: 1675800/ 1675264 stddev: |6521.39 - 296| >= 30 Test vorbis-encode failed. Look at tests/data/fate/vorbis-encode.err for details. make: *** [fate-vorbis-encode] Error 1 If the change is intended the reference must be updated [...]
On 27 March 2017 at 03:58, Tyler Jones <tdjones879@gmail.com> wrote: > The existing AAC psychoacoustic system is used to detect transients within > the > vorbis encoder. This is useful, in general, as an initial step in later > utilizing > a complex psychoacoustic model for the vorbis encoder, but more > specifically > allows the cacellation of pre-echo effects that frequently occur with this > codec. > > Signed-off-by: Tyler Jones <tdjones879@gmail.com> > --- > libavcodec/psymodel.c | 1 + > libavcodec/vorbisenc.c | 60 ++++++++++++++++++++++++++++++ > ++++++++++++++++++++ > 2 files changed, 61 insertions(+) > > diff --git a/libavcodec/psymodel.c b/libavcodec/psymodel.c > index 2b5f111..38831ce 100644 > --- a/libavcodec/psymodel.c > +++ b/libavcodec/psymodel.c > @@ -62,6 +62,7 @@ av_cold int ff_psy_init(FFPsyContext *ctx, > AVCodecContext *avctx, int num_lens, > > switch (ctx->avctx->codec_id) { > case AV_CODEC_ID_AAC: > + case AV_CODEC_ID_VORBIS: > ctx->model = &ff_aac_psy_model; > break; > } > diff --git a/libavcodec/vorbisenc.c b/libavcodec/vorbisenc.c > index 2974ca2..e4ec822 100644 > --- a/libavcodec/vorbisenc.c > +++ b/libavcodec/vorbisenc.c > @@ -33,6 +33,8 @@ > #include "vorbis.h" > #include "vorbis_enc_data.h" > > +#include "psymodel.h" > + > #define BITSTREAM_WRITER_LE > #include "put_bits.h" > > @@ -126,6 +128,9 @@ typedef struct vorbis_enc_context { > vorbis_enc_mode *modes; > > int64_t next_pts; > + > + FFPsyContext psy; > + struct FFPsyPreprocessContext* psypp; > } vorbis_enc_context; > > #define MAX_CHANNELS 2 > @@ -1024,10 +1029,38 @@ static int vorbis_encode_frame(AVCodecContext > *avctx, AVPacket *avpkt, > vorbis_enc_context *venc = avctx->priv_data; > float **audio = frame ? (float **)frame->extended_data : NULL; > int samples = frame ? frame->nb_samples : 0; > + float *samples2, *la, *overlap; > vorbis_enc_mode *mode; > vorbis_enc_mapping *mapping; > PutBitContext pb; > int i, ret; > + int start_ch, ch, chans, cur_channel; > + FFPsyWindowInfo windows[MAX_CHANNELS]; > + enum WindowSequence window_sequence[MAX_CHANNELS]; > + > + if (!avctx->frame_number) > + return 0; > + > + if (venc->psypp) > + ff_psy_preprocess(venc->psypp, audio, venc->channels); > + > + if (frame) { > + start_ch = 0; > + cur_channel = 0; > + for (i = 0; i < venc->channels - 1; i++) { > + FFPsyWindowInfo* wi = windows + start_ch; > + chans = 2; > + for (ch = 0; ch < 2; ch++) { > + cur_channel = start_ch + ch; > + overlap = &audio[cur_channel][0]; > + samples2 = overlap + 1024; > + la = samples2 + (448+64); > + wi[ch] = venc->psy.model->window(&venc->psy, samples2, > la, > + cur_channel, > window_sequence[0]); > window_sequence[0] must point to the previous frame's type, not the current. You'll need to add enum WindowSequence window_sequence[MAX_CHANNELS]; to the main encoder context. After that, check the wi[i] for EIGHT_SHORT, and if it is EIGHT_SHORT, you'll need to modify the encoder to do 8 small transforms and signal that so that the decoder knows what to do. > + } > + start_ch += chans; > + } > + } > > if (!apply_window_and_mdct(venc, audio, samples)) > return 0; > @@ -1158,7 +1191,10 @@ static av_cold int vorbis_encode_close(AVCodecContext > *avctx) > > ff_mdct_end(&venc->mdct[0]); > ff_mdct_end(&venc->mdct[1]); > + ff_psy_end(&venc->psy); > > + if (venc->psypp) > + ff_psy_preprocess_end(venc->psypp); > av_freep(&avctx->extradata); > > return 0 ; > @@ -1168,6 +1204,10 @@ static av_cold int vorbis_encode_init(AVCodecContext > *avctx) > { > vorbis_enc_context *venc = avctx->priv_data; > int ret; > + const uint8_t *sizes[MAX_CHANNELS]; > + uint8_t grouping[MAX_CHANNELS]; > + int lengths[MAX_CHANNELS]; > + int samplerate_index; > > if (avctx->channels != 2) { > av_log(avctx, AV_LOG_ERROR, "Current FFmpeg Vorbis encoder only > supports 2 channels.\n"); > @@ -1190,6 +1230,26 @@ static av_cold int vorbis_encode_init(AVCodecContext > *avctx) > > avctx->frame_size = 1 << (venc->log2_blocksize[0] - 1); > > + for (samplerate_index = 0; samplerate_index < 16; samplerate_index++) > + if (avctx->sample_rate == mpeg4audio_sample_rates[ > samplerate_index]) > + break; > + if (samplerate_index == 16 || > + samplerate_index >= ff_vorbis_swb_size_1024_len || > + samplerate_index >= ff_vorbis_swb_size_128_len) > + av_log(avctx, AV_LOG_ERROR, "Unsupported sample rate %d\n", > avctx->sample_rate); > + > + sizes[0] = ff_vorbis_swb_size_1024[samplerate_index]; > + sizes[1] = ff_vorbis_swb_size_128[samplerate_index]; > + lengths[0] = ff_vorbis_num_swb_1024[samplerate_index]; > + lengths[1] = ff_vorbis_num_swb_128[samplerate_index]; > + grouping[0] = 1; > + > + if ((ret = ff_psy_init(&venc->psy, avctx, 2, > + sizes, lengths, > + 1, grouping)) < 0) > + goto error; > + venc->psypp = ff_psy_preprocess_init(avctx); > + > return 0; > error: > vorbis_encode_close(avctx); > -- > 2.7.4 > > _______________________________________________ > ffmpeg-devel mailing list > ffmpeg-devel@ffmpeg.org > http://ffmpeg.org/mailman/listinfo/ffmpeg-devel >
diff --git a/libavcodec/psymodel.c b/libavcodec/psymodel.c index 2b5f111..38831ce 100644 --- a/libavcodec/psymodel.c +++ b/libavcodec/psymodel.c @@ -62,6 +62,7 @@ av_cold int ff_psy_init(FFPsyContext *ctx, AVCodecContext *avctx, int num_lens, switch (ctx->avctx->codec_id) { case AV_CODEC_ID_AAC: + case AV_CODEC_ID_VORBIS: ctx->model = &ff_aac_psy_model; break; } diff --git a/libavcodec/vorbisenc.c b/libavcodec/vorbisenc.c index 2974ca2..e4ec822 100644 --- a/libavcodec/vorbisenc.c +++ b/libavcodec/vorbisenc.c @@ -33,6 +33,8 @@ #include "vorbis.h" #include "vorbis_enc_data.h" +#include "psymodel.h" + #define BITSTREAM_WRITER_LE #include "put_bits.h" @@ -126,6 +128,9 @@ typedef struct vorbis_enc_context { vorbis_enc_mode *modes; int64_t next_pts; + + FFPsyContext psy; + struct FFPsyPreprocessContext* psypp; } vorbis_enc_context; #define MAX_CHANNELS 2 @@ -1024,10 +1029,38 @@ static int vorbis_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, vorbis_enc_context *venc = avctx->priv_data; float **audio = frame ? (float **)frame->extended_data : NULL; int samples = frame ? frame->nb_samples : 0; + float *samples2, *la, *overlap; vorbis_enc_mode *mode; vorbis_enc_mapping *mapping; PutBitContext pb; int i, ret; + int start_ch, ch, chans, cur_channel; + FFPsyWindowInfo windows[MAX_CHANNELS]; + enum WindowSequence window_sequence[MAX_CHANNELS]; + + if (!avctx->frame_number) + return 0; + + if (venc->psypp) + ff_psy_preprocess(venc->psypp, audio, venc->channels); + + if (frame) { + start_ch = 0; + cur_channel = 0; + for (i = 0; i < venc->channels - 1; i++) { + FFPsyWindowInfo* wi = windows + start_ch; + chans = 2; + for (ch = 0; ch < 2; ch++) { + cur_channel = start_ch + ch; + overlap = &audio[cur_channel][0]; + samples2 = overlap + 1024; + la = samples2 + (448+64); + wi[ch] = venc->psy.model->window(&venc->psy, samples2, la, + cur_channel, window_sequence[0]); + } + start_ch += chans; + } + } if (!apply_window_and_mdct(venc, audio, samples)) return 0; @@ -1158,7 +1191,10 @@ static av_cold int vorbis_encode_close(AVCodecContext *avctx) ff_mdct_end(&venc->mdct[0]); ff_mdct_end(&venc->mdct[1]); + ff_psy_end(&venc->psy); + if (venc->psypp) + ff_psy_preprocess_end(venc->psypp); av_freep(&avctx->extradata); return 0 ; @@ -1168,6 +1204,10 @@ static av_cold int vorbis_encode_init(AVCodecContext *avctx) { vorbis_enc_context *venc = avctx->priv_data; int ret; + const uint8_t *sizes[MAX_CHANNELS]; + uint8_t grouping[MAX_CHANNELS]; + int lengths[MAX_CHANNELS]; + int samplerate_index; if (avctx->channels != 2) { av_log(avctx, AV_LOG_ERROR, "Current FFmpeg Vorbis encoder only supports 2 channels.\n"); @@ -1190,6 +1230,26 @@ static av_cold int vorbis_encode_init(AVCodecContext *avctx) avctx->frame_size = 1 << (venc->log2_blocksize[0] - 1); + for (samplerate_index = 0; samplerate_index < 16; samplerate_index++) + if (avctx->sample_rate == mpeg4audio_sample_rates[samplerate_index]) + break; + if (samplerate_index == 16 || + samplerate_index >= ff_vorbis_swb_size_1024_len || + samplerate_index >= ff_vorbis_swb_size_128_len) + av_log(avctx, AV_LOG_ERROR, "Unsupported sample rate %d\n", avctx->sample_rate); + + sizes[0] = ff_vorbis_swb_size_1024[samplerate_index]; + sizes[1] = ff_vorbis_swb_size_128[samplerate_index]; + lengths[0] = ff_vorbis_num_swb_1024[samplerate_index]; + lengths[1] = ff_vorbis_num_swb_128[samplerate_index]; + grouping[0] = 1; + + if ((ret = ff_psy_init(&venc->psy, avctx, 2, + sizes, lengths, + 1, grouping)) < 0) + goto error; + venc->psypp = ff_psy_preprocess_init(avctx); + return 0; error: vorbis_encode_close(avctx);
The existing AAC psychoacoustic system is used to detect transients within the vorbis encoder. This is useful, in general, as an initial step in later utilizing a complex psychoacoustic model for the vorbis encoder, but more specifically allows the cacellation of pre-echo effects that frequently occur with this codec. Signed-off-by: Tyler Jones <tdjones879@gmail.com> --- libavcodec/psymodel.c | 1 + libavcodec/vorbisenc.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+)