diff mbox

[FFmpeg-devel,V3,1/2] avcodec/vorbisenc: Add pre-echo detection

Message ID 20170727212245.GA32538@tdjones.localdomain
State Superseded
Headers show

Commit Message

Tyler Jones July 27, 2017, 9:22 p.m. UTC
The encoder will attempt to determine the existence of transient
signals by applying a 4th order highpass filter to remove dominant
low frequency waveforms. Frames are then split up into blocks
where the variance is calculated and compared with blocks from
the previous frame. A preecho is only likely to be noticeable when
relatively quiet audio is followed by a loud transient signal.

Signed-off-by: Tyler Jones <tdjones879@gmail.com>
---
V3: Use normal float notation
    Don't check before freeing NULL pointers
    Remove unnecessary includes	

V2: Provide proper prefix for non-static function

 libavcodec/Makefile    |   2 +-
 libavcodec/vorbisenc.c |  27 +++++++--
 libavcodec/vorbispsy.c | 148 +++++++++++++++++++++++++++++++++++++++++++++++++
 libavcodec/vorbispsy.h |  79 ++++++++++++++++++++++++++
 4 files changed, 251 insertions(+), 5 deletions(-)
 create mode 100644 libavcodec/vorbispsy.c
 create mode 100644 libavcodec/vorbispsy.h

Comments

James Almer July 27, 2017, 10:04 p.m. UTC | #1
On 7/27/2017 6:22 PM, Tyler Jones wrote:
> The encoder will attempt to determine the existence of transient
> signals by applying a 4th order highpass filter to remove dominant
> low frequency waveforms. Frames are then split up into blocks
> where the variance is calculated and compared with blocks from
> the previous frame. A preecho is only likely to be noticeable when
> relatively quiet audio is followed by a loud transient signal.
> 
> Signed-off-by: Tyler Jones <tdjones879@gmail.com>
> ---
> V3: Use normal float notation
>     Don't check before freeing NULL pointers
>     Remove unnecessary includes	
> 
> V2: Provide proper prefix for non-static function
> 
>  libavcodec/Makefile    |   2 +-
>  libavcodec/vorbisenc.c |  27 +++++++--
>  libavcodec/vorbispsy.c | 148 +++++++++++++++++++++++++++++++++++++++++++++++++
>  libavcodec/vorbispsy.h |  79 ++++++++++++++++++++++++++
>  4 files changed, 251 insertions(+), 5 deletions(-)
>  create mode 100644 libavcodec/vorbispsy.c
>  create mode 100644 libavcodec/vorbispsy.h
> 
> diff --git a/libavcodec/Makefile b/libavcodec/Makefile
> index 357fa1a361..08acbc723e 100644
> --- a/libavcodec/Makefile
> +++ b/libavcodec/Makefile
> @@ -611,7 +611,7 @@ OBJS-$(CONFIG_VMNC_DECODER)            += vmnc.o
>  OBJS-$(CONFIG_VORBIS_DECODER)          += vorbisdec.o vorbisdsp.o vorbis.o \
>                                            vorbis_data.o
>  OBJS-$(CONFIG_VORBIS_ENCODER)          += vorbisenc.o vorbis.o \
> -                                          vorbis_data.o
> +                                          vorbis_data.o vorbispsy.o
>  OBJS-$(CONFIG_VP3_DECODER)             += vp3.o
>  OBJS-$(CONFIG_VP5_DECODER)             += vp5.o vp56.o vp56data.o vp56rac.o
>  OBJS-$(CONFIG_VP6_DECODER)             += vp6.o vp56.o vp56data.o \
> diff --git a/libavcodec/vorbisenc.c b/libavcodec/vorbisenc.c
> index bf21a3b1ff..1330b1b376 100644
> --- a/libavcodec/vorbisenc.c
> +++ b/libavcodec/vorbisenc.c
> @@ -33,6 +33,7 @@
>  #include "mathops.h"
>  #include "vorbis.h"
>  #include "vorbis_enc_data.h"
> +#include "vorbispsy.h"
>  
>  #include "audio_frame_queue.h"
>  #include "libavfilter/bufferqueue.h"
> @@ -136,6 +137,7 @@ typedef struct vorbis_enc_context {
>      int64_t next_pts;
>  
>      AVFloatDSPContext *fdsp;
> +    VorbisPsyContext *vpctx;

Why a pointer? I don't see the benefit. It means an unnecessary malloc
and free call.

>  } vorbis_enc_context;
>  
>  #define MAX_CHANNELS     2
> @@ -272,11 +274,12 @@ static int create_vorbis_context(vorbis_enc_context *venc,
>      vorbis_enc_floor   *fc;
>      vorbis_enc_residue *rc;
>      vorbis_enc_mapping *mc;
> -    int i, book, ret;
> +    int i, book, ret, blocks;
>  
>      venc->channels    = avctx->channels;
>      venc->sample_rate = avctx->sample_rate;
> -    venc->log2_blocksize[0] = venc->log2_blocksize[1] = 11;
> +    venc->log2_blocksize[0] = 8;
> +    venc->log2_blocksize[1] = 11;
>  
>      venc->ncodebooks = FF_ARRAY_ELEMS(cvectors);
>      venc->codebooks  = av_malloc(sizeof(vorbis_enc_codebook) * venc->ncodebooks);
> @@ -464,6 +467,12 @@ static int create_vorbis_context(vorbis_enc_context *venc,
>      if ((ret = dsp_init(avctx, venc)) < 0)
>          return ret;
>  
> +    blocks = 1 << (venc->log2_blocksize[1] - venc->log2_blocksize[0]);
> +    venc->vpctx = av_mallocz(sizeof(VorbisPsyContext));
> +    if (!venc->vpctx || (ret = ff_psy_vorbis_init(venc->vpctx, venc->sample_rate,
> +                                                  venc->channels, blocks)) < 0)
> +        return AVERROR(ENOMEM);
> +
>      return 0;
>  }
>  
> @@ -1078,15 +1087,17 @@ static void move_audio(vorbis_enc_context *venc, int sf_size)
>          av_frame_free(&cur);
>      }
>      venc->have_saved = 1;
> -    memcpy(venc->scratch, venc->samples, 2 * venc->channels * frame_size);
> +    memcpy(venc->scratch, venc->samples, sizeof(float) * venc->channels * 2 * frame_size);
>  }
>  
>  static int vorbis_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
>                                 const AVFrame *frame, int *got_packet_ptr)
>  {
>      vorbis_enc_context *venc = avctx->priv_data;
> -    int i, ret, need_more;
> +    int i, ret, need_more, ch;
> +    int curr_win = 1;
>      int frame_size = 1 << (venc->log2_blocksize[1] - 1);
> +    int block_size = 1 << (venc->log2_blocksize[0] - 1);
>      vorbis_enc_mode *mode;
>      vorbis_enc_mapping *mapping;
>      PutBitContext pb;
> @@ -1121,6 +1132,13 @@ static int vorbis_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
>  
>      move_audio(venc, avctx->frame_size);
>  
> +    for (ch = 0; ch < venc->channels; ch++) {
> +        float *scratch = venc->scratch + 2 * ch * frame_size + frame_size;
> +
> +        if (!ff_psy_vorbis_block_frame(venc->vpctx, scratch, ch, frame_size, block_size))
> +            curr_win = 0;
> +    }
> +
>      if (!apply_window_and_mdct(venc))
>          return 0;
>  
> @@ -1252,6 +1270,7 @@ static av_cold int vorbis_encode_close(AVCodecContext *avctx)
>      ff_mdct_end(&venc->mdct[1]);
>      ff_af_queue_close(&venc->afq);
>      ff_bufqueue_discard_all(&venc->bufqueue);
> +    ff_psy_vorbis_close(venc->vpctx);

You should pass a pointer to venc->vpctx instead, regardless of what you
do with the comment above.

>  
>      av_freep(&avctx->extradata);
>  
> diff --git a/libavcodec/vorbispsy.c b/libavcodec/vorbispsy.c
> new file mode 100644
> index 0000000000..ef48f6ac8c
> --- /dev/null
> +++ b/libavcodec/vorbispsy.c
> @@ -0,0 +1,148 @@
> +/*
> + * Vorbis encoder psychoacoustic model
> + * Copyright (C) 2017 Tyler Jones
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +#include "libavutil/ffmath.h"
> +
> +#include "avcodec.h"
> +#include "vorbispsy.h"
> +
> +/**
> + * Generate the coefficients for a highpass biquad filter
> + *
> + * @param filter Instance of biquad filter to be initialized
> + * @param Fs     Input's sampling frequency
> + * @param Fc     Critical frequency for samples to be passed
> + * @param Q      Quality factor
> + */
> +static av_cold void biquad_filter_init(IIRFilter *filter, int Fs, int Fc, float Q)
> +{
> +    float k = tan(M_PI * Fc / Fs);
> +    float normalize = 1 / (1 + k / Q + k * k);
> +
> +    filter->b[0] = normalize;
> +    filter->b[1] = -2 * normalize;
> +    filter->b[2] = normalize;
> +
> +    filter->a[0] = 1;
> +    filter->a[1] = 2 * (k * k - 1) * normalize;
> +    filter->a[2] = (1 - k / Q + k * k) * normalize;
> +}
> +
> +/**
> + * Direct Form II implementation for a second order digital filter
> + *
> + * @param filter Filter to be applied to input samples
> + * @param in     Single input sample to be filtered
> + * @param delay  Array of IIR feedback values
> + * @return       Filtered sample
> + */
> +static float apply_filter(IIRFilter *filter, float in, float *delay)
> +{
> +    float ret, w;
> +
> +    w   = filter->a[0] * in - filter->a[1] * delay[0] - filter->a[2] * delay[1];
> +    ret = filter->b[0] * w  + filter->b[1] * delay[0] + filter->b[2] * delay[1];
> +
> +    delay[1] = delay[0];
> +    delay[0] = w;
> +
> +    return ret;
> +}
> +
> +/**
> + * Calculate the variance of a block of samples
> + *
> + * @param in     Array of input samples
> + * @param length Number of input samples being analyzed
> + * @return       The variance for the current block
> + */
> +static float variance(const float *in, int length)
> +{
> +    int i;
> +    float mean = 0.0f, square_sum = 0.0f;
> +
> +    for (i = 0; i < length; i++) {
> +        mean += in[i];
> +        square_sum += in[i] * in[i];

Can't you use AVFloatDSPContext's scalarproduct_float for square_sum?
The constrains are lax. 16 byte alignment for in and length a multiple
of 4. You can pad the buffer if needed to achieve that.

> +    }
> +
> +    mean /= length;
> +    return (square_sum - length * mean * mean) / (length - 1);
> +}
> +
> +av_cold int ff_psy_vorbis_init(VorbisPsyContext *vpctx, int sample_rate,
> +                               int channels, int blocks)
> +{
> +    int crit_freq;
> +    float Q[2] = {.54, 1.31}; // Quality values for maximally flat cascaded filters

const float Q[2]

> +
> +    vpctx->filter_delay = av_mallocz_array(4 * channels, sizeof(vpctx->filter_delay[0]));
> +    if (!vpctx->filter_delay)
> +        return AVERROR(ENOMEM);
> +
> +    crit_freq = sample_rate / 4;
> +    biquad_filter_init(&vpctx->filter[0], sample_rate, crit_freq, Q[0]);
> +    biquad_filter_init(&vpctx->filter[1], sample_rate, crit_freq, Q[1]);
> +
> +    vpctx->variance = av_mallocz_array(channels * blocks, sizeof(vpctx->variance[0]));
> +    if (!vpctx->variance)
> +        return AVERROR(ENOMEM);
> +
> +    vpctx->preecho_thresh = 100.0f;
> +
> +    return 0;
> +}
> +
> +int ff_psy_vorbis_block_frame(VorbisPsyContext *vpctx, float *audio,
> +                              int ch, int frame_size, int block_size)
> +{
> +    int i, block_flag = 1;
> +    int blocks = frame_size / block_size;
> +    float last_var;
> +    const float eps = 0.0001f;
> +    float *var = vpctx->variance + ch * blocks;
> +
> +    for (i = 0; i < frame_size; i++) {
> +        apply_filter(&vpctx->filter[0], audio[i], vpctx->filter_delay + 4 * ch);
> +        apply_filter(&vpctx->filter[1], audio[i], vpctx->filter_delay + 4 * ch + 2);
> +    }
> +
> +    for (i = 0; i < blocks; i++) {
> +        last_var = var[i];
> +        var[i] = variance(audio + i * block_size, block_size);
> +
> +        /* A small constant is added to the threshold in order to prevent false
> +         * transients from being detected when quiet sounds follow near-silence */
> +        if (var[i] > vpctx->preecho_thresh * last_var + eps)
> +            block_flag = 0;
> +    }
> +
> +    return block_flag;
> +}
> +
> +av_cold void ff_psy_vorbis_close(VorbisPsyContext *vpctx)
> +{
> +    if (vpctx) {
> +        av_freep(&vpctx->filter_delay);
> +        av_freep(&vpctx->variance);
> +    }
> +    av_freep(&vpctx);
> +}
> diff --git a/libavcodec/vorbispsy.h b/libavcodec/vorbispsy.h
> new file mode 100644
> index 0000000000..021a5e8a28
> --- /dev/null
> +++ b/libavcodec/vorbispsy.h
> @@ -0,0 +1,79 @@
> +/*
> + * Vorbis encoder psychoacoustic model
> + * Copyright (C) 2017 Tyler Jones
> + *
> + * This file is part of FFmpeg.
> + *
> + * FFmpeg is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2.1 of the License, or (at your option) any later version.
> + *
> + * FFmpeg is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with FFmpeg; if not, write to the Free Software
> + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
> + */
> +
> +/**
> + * @file
> + * Vorbis psychoacoustic model
> + */
> +
> +#ifndef AVCODEC_VORBISPSY_H
> +#define AVCODEC_VORBISPSY_H
> +
> +#include "libavutil/attributes.h"
> +
> +/**
> + * Second order IIR Filter
> + */
> +typedef struct IIRFilter {
> +    float b[3]; ///< Normalized cofficients for numerator of transfer function
> +    float a[3]; ///< Normalized coefficiets for denominator of transfer function
> +} IIRFilter;
> +
> +typedef struct VorbisPsyContext {
> +    IIRFilter filter[2];
> +    float *filter_delay;  ///< Direct Form II delay registers for each channel
> +    float *variance;      ///< Saved variances from previous sub-blocks for each channel
> +    float preecho_thresh; ///< Threshold for determining prescence of a preecho
> +} VorbisPsyContext;
> +
> +/**
> + * Initializes the psychoacoustic model context
> + *
> + * @param vpctx       Uninitialized pointer to the model context
> + * @param sample_rate Input audio sample rate
> + * @param channels    Number of channels being analyzed
> + * @param blocks      Number of short blocks for every frame of input
> + * @return            0 on success, negative on failure
> + */
> +av_cold int ff_psy_vorbis_init(VorbisPsyContext *vpctx, int sample_rate,
> +                               int channels, int blocks);
> +
> +/**
> + * Suggest the type of block to use for encoding the current frame
> + *
> + * Each frame of input is passed through a highpass filter to remove dominant
> + * low-frequency waveforms and the variance of each short block of input is
> + * then calculated. If the variance over this block is significantly more than
> + * blocks from the previous frame, a transient signal is likely present.
> + *
> + * @param audio      Pointer to the current channel's input samples
> + * @param ch         Current channel being analyzed
> + * @param frame_size Size of a full frame, i.e. the size of the long block
> + * @param block_size Size of the short block
> + * @return           The correct blockflag to use for encoding, 0 short and 1 long
> + */
> +int ff_psy_vorbis_block_frame(VorbisPsyContext *vpctx, float *audio,
> +                              int ch, int frame_size, int block_size);
> +/**
> + * Closes and frees the memory used by the psychoacoustic model
> + */
> +av_cold void ff_psy_vorbis_close(VorbisPsyContext *vpctx);
> +#endif /* AVCODEC_VORBISPSY_H */
> 
> 
> 
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
Tyler Jones July 28, 2017, 2:59 p.m. UTC | #2
> > --- a/libavcodec/vorbisenc.c
> > +++ b/libavcodec/vorbisenc.c
> > @@ -33,6 +33,7 @@
> >  #include "mathops.h"
> >  #include "vorbis.h"
> >  #include "vorbis_enc_data.h"
> > +#include "vorbispsy.h"
> >  
> >  #include "audio_frame_queue.h"
> >  #include "libavfilter/bufferqueue.h"
> > @@ -136,6 +137,7 @@ typedef struct vorbis_enc_context {
> >      int64_t next_pts;
> >  
> >      AVFloatDSPContext *fdsp;
> > +    VorbisPsyContext *vpctx;
> 
> Why a pointer? I don't see the benefit. It means an unnecessary malloc
> and free call.

You're probably right. It's changed now.

> > @@ -1252,6 +1270,7 @@ static av_cold int vorbis_encode_close(AVCodecContext *avctx)
> >      ff_mdct_end(&venc->mdct[1]);
> >      ff_af_queue_close(&venc->afq);
> >      ff_bufqueue_discard_all(&venc->bufqueue);
> > +    ff_psy_vorbis_close(venc->vpctx);
> 
> You should pass a pointer to venc->vpctx instead, regardless of what you
> do with the comment above.

I'm not sure I understand what you mean. It is passing a pointer to a
VorbisPsyContext, please see the prototype:

    av_cold void ff_psy_vorbis_close(VorbisPsyContext *vpctx);    

> > +/**
> > + * Calculate the variance of a block of samples
> > + *
> > + * @param in     Array of input samples
> > + * @param length Number of input samples being analyzed
> > + * @return       The variance for the current block
> > + */
> > +static float variance(const float *in, int length)
> > +{
> > +    int i;
> > +    float mean = 0.0f, square_sum = 0.0f;
> > +
> > +    for (i = 0; i < length; i++) {
> > +        mean += in[i];
> > +        square_sum += in[i] * in[i];
> 
> Can't you use AVFloatDSPContext's scalarproduct_float for square_sum?
> The constrains are lax. 16 byte alignment for in and length a multiple
> of 4. You can pad the buffer if needed to achieve that.

You are correct, it is switched over now.

> > +    }
> > +
> > +    mean /= length;
> > +    return (square_sum - length * mean * mean) / (length - 1);
> > +}
> > +
> > +av_cold int ff_psy_vorbis_init(VorbisPsyContext *vpctx, int sample_rate,
> > +                               int channels, int blocks)
> > +{
> > +    int crit_freq;
> > +    float Q[2] = {.54, 1.31}; // Quality values for maximally flat cascaded filters
> 
> const float Q[2]

Fixed.

Thank you for catching these mistakes and providing suggestions. A new version
of this patch will be sent soon.

Thanks again,

Tyler Jones
James Almer July 28, 2017, 3:51 p.m. UTC | #3
On 7/28/2017 11:59 AM, Tyler Jones wrote:
>>> --- a/libavcodec/vorbisenc.c
>>> +++ b/libavcodec/vorbisenc.c
>>> @@ -33,6 +33,7 @@
>>>  #include "mathops.h"
>>>  #include "vorbis.h"
>>>  #include "vorbis_enc_data.h"
>>> +#include "vorbispsy.h"
>>>  
>>>  #include "audio_frame_queue.h"
>>>  #include "libavfilter/bufferqueue.h"
>>> @@ -136,6 +137,7 @@ typedef struct vorbis_enc_context {
>>>      int64_t next_pts;
>>>  
>>>      AVFloatDSPContext *fdsp;
>>> +    VorbisPsyContext *vpctx;
>>
>> Why a pointer? I don't see the benefit. It means an unnecessary malloc
>> and free call.
> 
> You're probably right. It's changed now.
> 
>>> @@ -1252,6 +1270,7 @@ static av_cold int vorbis_encode_close(AVCodecContext *avctx)
>>>      ff_mdct_end(&venc->mdct[1]);
>>>      ff_af_queue_close(&venc->afq);
>>>      ff_bufqueue_discard_all(&venc->bufqueue);
>>> +    ff_psy_vorbis_close(venc->vpctx);
>>
>> You should pass a pointer to venc->vpctx instead, regardless of what you
>> do with the comment above.
> 
> I'm not sure I understand what you mean. It is passing a pointer to a
> VorbisPsyContext, please see the prototype:
> 
>     av_cold void ff_psy_vorbis_close(VorbisPsyContext *vpctx);    

For this version (but not V4) it should have been a pointer to the
pointer in vorbis_enc_context, that is **vpctx. Otherwise, the
av_freep() call in ff_psy_vorbis_close() would have zeroed a copy of
said pointer.

> 
>>> +/**
>>> + * Calculate the variance of a block of samples
>>> + *
>>> + * @param in     Array of input samples
>>> + * @param length Number of input samples being analyzed
>>> + * @return       The variance for the current block
>>> + */
>>> +static float variance(const float *in, int length)
>>> +{
>>> +    int i;
>>> +    float mean = 0.0f, square_sum = 0.0f;
>>> +
>>> +    for (i = 0; i < length; i++) {
>>> +        mean += in[i];
>>> +        square_sum += in[i] * in[i];
>>
>> Can't you use AVFloatDSPContext's scalarproduct_float for square_sum?
>> The constrains are lax. 16 byte alignment for in and length a multiple
>> of 4. You can pad the buffer if needed to achieve that.
> 
> You are correct, it is switched over now.
> 
>>> +    }
>>> +
>>> +    mean /= length;
>>> +    return (square_sum - length * mean * mean) / (length - 1);
>>> +}
>>> +
>>> +av_cold int ff_psy_vorbis_init(VorbisPsyContext *vpctx, int sample_rate,
>>> +                               int channels, int blocks)
>>> +{
>>> +    int crit_freq;
>>> +    float Q[2] = {.54, 1.31}; // Quality values for maximally flat cascaded filters
>>
>> const float Q[2]
> 
> Fixed.
> 
> Thank you for catching these mistakes and providing suggestions. A new version
> of this patch will be sent soon.
> 
> Thanks again,
> 
> Tyler Jones
> 
> 
> 
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> http://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
diff mbox

Patch

diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index 357fa1a361..08acbc723e 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -611,7 +611,7 @@  OBJS-$(CONFIG_VMNC_DECODER)            += vmnc.o
 OBJS-$(CONFIG_VORBIS_DECODER)          += vorbisdec.o vorbisdsp.o vorbis.o \
                                           vorbis_data.o
 OBJS-$(CONFIG_VORBIS_ENCODER)          += vorbisenc.o vorbis.o \
-                                          vorbis_data.o
+                                          vorbis_data.o vorbispsy.o
 OBJS-$(CONFIG_VP3_DECODER)             += vp3.o
 OBJS-$(CONFIG_VP5_DECODER)             += vp5.o vp56.o vp56data.o vp56rac.o
 OBJS-$(CONFIG_VP6_DECODER)             += vp6.o vp56.o vp56data.o \
diff --git a/libavcodec/vorbisenc.c b/libavcodec/vorbisenc.c
index bf21a3b1ff..1330b1b376 100644
--- a/libavcodec/vorbisenc.c
+++ b/libavcodec/vorbisenc.c
@@ -33,6 +33,7 @@ 
 #include "mathops.h"
 #include "vorbis.h"
 #include "vorbis_enc_data.h"
+#include "vorbispsy.h"
 
 #include "audio_frame_queue.h"
 #include "libavfilter/bufferqueue.h"
@@ -136,6 +137,7 @@  typedef struct vorbis_enc_context {
     int64_t next_pts;
 
     AVFloatDSPContext *fdsp;
+    VorbisPsyContext *vpctx;
 } vorbis_enc_context;
 
 #define MAX_CHANNELS     2
@@ -272,11 +274,12 @@  static int create_vorbis_context(vorbis_enc_context *venc,
     vorbis_enc_floor   *fc;
     vorbis_enc_residue *rc;
     vorbis_enc_mapping *mc;
-    int i, book, ret;
+    int i, book, ret, blocks;
 
     venc->channels    = avctx->channels;
     venc->sample_rate = avctx->sample_rate;
-    venc->log2_blocksize[0] = venc->log2_blocksize[1] = 11;
+    venc->log2_blocksize[0] = 8;
+    venc->log2_blocksize[1] = 11;
 
     venc->ncodebooks = FF_ARRAY_ELEMS(cvectors);
     venc->codebooks  = av_malloc(sizeof(vorbis_enc_codebook) * venc->ncodebooks);
@@ -464,6 +467,12 @@  static int create_vorbis_context(vorbis_enc_context *venc,
     if ((ret = dsp_init(avctx, venc)) < 0)
         return ret;
 
+    blocks = 1 << (venc->log2_blocksize[1] - venc->log2_blocksize[0]);
+    venc->vpctx = av_mallocz(sizeof(VorbisPsyContext));
+    if (!venc->vpctx || (ret = ff_psy_vorbis_init(venc->vpctx, venc->sample_rate,
+                                                  venc->channels, blocks)) < 0)
+        return AVERROR(ENOMEM);
+
     return 0;
 }
 
@@ -1078,15 +1087,17 @@  static void move_audio(vorbis_enc_context *venc, int sf_size)
         av_frame_free(&cur);
     }
     venc->have_saved = 1;
-    memcpy(venc->scratch, venc->samples, 2 * venc->channels * frame_size);
+    memcpy(venc->scratch, venc->samples, sizeof(float) * venc->channels * 2 * frame_size);
 }
 
 static int vorbis_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
                                const AVFrame *frame, int *got_packet_ptr)
 {
     vorbis_enc_context *venc = avctx->priv_data;
-    int i, ret, need_more;
+    int i, ret, need_more, ch;
+    int curr_win = 1;
     int frame_size = 1 << (venc->log2_blocksize[1] - 1);
+    int block_size = 1 << (venc->log2_blocksize[0] - 1);
     vorbis_enc_mode *mode;
     vorbis_enc_mapping *mapping;
     PutBitContext pb;
@@ -1121,6 +1132,13 @@  static int vorbis_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
 
     move_audio(venc, avctx->frame_size);
 
+    for (ch = 0; ch < venc->channels; ch++) {
+        float *scratch = venc->scratch + 2 * ch * frame_size + frame_size;
+
+        if (!ff_psy_vorbis_block_frame(venc->vpctx, scratch, ch, frame_size, block_size))
+            curr_win = 0;
+    }
+
     if (!apply_window_and_mdct(venc))
         return 0;
 
@@ -1252,6 +1270,7 @@  static av_cold int vorbis_encode_close(AVCodecContext *avctx)
     ff_mdct_end(&venc->mdct[1]);
     ff_af_queue_close(&venc->afq);
     ff_bufqueue_discard_all(&venc->bufqueue);
+    ff_psy_vorbis_close(venc->vpctx);
 
     av_freep(&avctx->extradata);
 
diff --git a/libavcodec/vorbispsy.c b/libavcodec/vorbispsy.c
new file mode 100644
index 0000000000..ef48f6ac8c
--- /dev/null
+++ b/libavcodec/vorbispsy.c
@@ -0,0 +1,148 @@ 
+/*
+ * Vorbis encoder psychoacoustic model
+ * Copyright (C) 2017 Tyler Jones
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/ffmath.h"
+
+#include "avcodec.h"
+#include "vorbispsy.h"
+
+/**
+ * Generate the coefficients for a highpass biquad filter
+ *
+ * @param filter Instance of biquad filter to be initialized
+ * @param Fs     Input's sampling frequency
+ * @param Fc     Critical frequency for samples to be passed
+ * @param Q      Quality factor
+ */
+static av_cold void biquad_filter_init(IIRFilter *filter, int Fs, int Fc, float Q)
+{
+    float k = tan(M_PI * Fc / Fs);
+    float normalize = 1 / (1 + k / Q + k * k);
+
+    filter->b[0] = normalize;
+    filter->b[1] = -2 * normalize;
+    filter->b[2] = normalize;
+
+    filter->a[0] = 1;
+    filter->a[1] = 2 * (k * k - 1) * normalize;
+    filter->a[2] = (1 - k / Q + k * k) * normalize;
+}
+
+/**
+ * Direct Form II implementation for a second order digital filter
+ *
+ * @param filter Filter to be applied to input samples
+ * @param in     Single input sample to be filtered
+ * @param delay  Array of IIR feedback values
+ * @return       Filtered sample
+ */
+static float apply_filter(IIRFilter *filter, float in, float *delay)
+{
+    float ret, w;
+
+    w   = filter->a[0] * in - filter->a[1] * delay[0] - filter->a[2] * delay[1];
+    ret = filter->b[0] * w  + filter->b[1] * delay[0] + filter->b[2] * delay[1];
+
+    delay[1] = delay[0];
+    delay[0] = w;
+
+    return ret;
+}
+
+/**
+ * Calculate the variance of a block of samples
+ *
+ * @param in     Array of input samples
+ * @param length Number of input samples being analyzed
+ * @return       The variance for the current block
+ */
+static float variance(const float *in, int length)
+{
+    int i;
+    float mean = 0.0f, square_sum = 0.0f;
+
+    for (i = 0; i < length; i++) {
+        mean += in[i];
+        square_sum += in[i] * in[i];
+    }
+
+    mean /= length;
+    return (square_sum - length * mean * mean) / (length - 1);
+}
+
+av_cold int ff_psy_vorbis_init(VorbisPsyContext *vpctx, int sample_rate,
+                               int channels, int blocks)
+{
+    int crit_freq;
+    float Q[2] = {.54, 1.31}; // Quality values for maximally flat cascaded filters
+
+    vpctx->filter_delay = av_mallocz_array(4 * channels, sizeof(vpctx->filter_delay[0]));
+    if (!vpctx->filter_delay)
+        return AVERROR(ENOMEM);
+
+    crit_freq = sample_rate / 4;
+    biquad_filter_init(&vpctx->filter[0], sample_rate, crit_freq, Q[0]);
+    biquad_filter_init(&vpctx->filter[1], sample_rate, crit_freq, Q[1]);
+
+    vpctx->variance = av_mallocz_array(channels * blocks, sizeof(vpctx->variance[0]));
+    if (!vpctx->variance)
+        return AVERROR(ENOMEM);
+
+    vpctx->preecho_thresh = 100.0f;
+
+    return 0;
+}
+
+int ff_psy_vorbis_block_frame(VorbisPsyContext *vpctx, float *audio,
+                              int ch, int frame_size, int block_size)
+{
+    int i, block_flag = 1;
+    int blocks = frame_size / block_size;
+    float last_var;
+    const float eps = 0.0001f;
+    float *var = vpctx->variance + ch * blocks;
+
+    for (i = 0; i < frame_size; i++) {
+        apply_filter(&vpctx->filter[0], audio[i], vpctx->filter_delay + 4 * ch);
+        apply_filter(&vpctx->filter[1], audio[i], vpctx->filter_delay + 4 * ch + 2);
+    }
+
+    for (i = 0; i < blocks; i++) {
+        last_var = var[i];
+        var[i] = variance(audio + i * block_size, block_size);
+
+        /* A small constant is added to the threshold in order to prevent false
+         * transients from being detected when quiet sounds follow near-silence */
+        if (var[i] > vpctx->preecho_thresh * last_var + eps)
+            block_flag = 0;
+    }
+
+    return block_flag;
+}
+
+av_cold void ff_psy_vorbis_close(VorbisPsyContext *vpctx)
+{
+    if (vpctx) {
+        av_freep(&vpctx->filter_delay);
+        av_freep(&vpctx->variance);
+    }
+    av_freep(&vpctx);
+}
diff --git a/libavcodec/vorbispsy.h b/libavcodec/vorbispsy.h
new file mode 100644
index 0000000000..021a5e8a28
--- /dev/null
+++ b/libavcodec/vorbispsy.h
@@ -0,0 +1,79 @@ 
+/*
+ * Vorbis encoder psychoacoustic model
+ * Copyright (C) 2017 Tyler Jones
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/**
+ * @file
+ * Vorbis psychoacoustic model
+ */
+
+#ifndef AVCODEC_VORBISPSY_H
+#define AVCODEC_VORBISPSY_H
+
+#include "libavutil/attributes.h"
+
+/**
+ * Second order IIR Filter
+ */
+typedef struct IIRFilter {
+    float b[3]; ///< Normalized cofficients for numerator of transfer function
+    float a[3]; ///< Normalized coefficiets for denominator of transfer function
+} IIRFilter;
+
+typedef struct VorbisPsyContext {
+    IIRFilter filter[2];
+    float *filter_delay;  ///< Direct Form II delay registers for each channel
+    float *variance;      ///< Saved variances from previous sub-blocks for each channel
+    float preecho_thresh; ///< Threshold for determining prescence of a preecho
+} VorbisPsyContext;
+
+/**
+ * Initializes the psychoacoustic model context
+ *
+ * @param vpctx       Uninitialized pointer to the model context
+ * @param sample_rate Input audio sample rate
+ * @param channels    Number of channels being analyzed
+ * @param blocks      Number of short blocks for every frame of input
+ * @return            0 on success, negative on failure
+ */
+av_cold int ff_psy_vorbis_init(VorbisPsyContext *vpctx, int sample_rate,
+                               int channels, int blocks);
+
+/**
+ * Suggest the type of block to use for encoding the current frame
+ *
+ * Each frame of input is passed through a highpass filter to remove dominant
+ * low-frequency waveforms and the variance of each short block of input is
+ * then calculated. If the variance over this block is significantly more than
+ * blocks from the previous frame, a transient signal is likely present.
+ *
+ * @param audio      Pointer to the current channel's input samples
+ * @param ch         Current channel being analyzed
+ * @param frame_size Size of a full frame, i.e. the size of the long block
+ * @param block_size Size of the short block
+ * @return           The correct blockflag to use for encoding, 0 short and 1 long
+ */
+int ff_psy_vorbis_block_frame(VorbisPsyContext *vpctx, float *audio,
+                              int ch, int frame_size, int block_size);
+/**
+ * Closes and frees the memory used by the psychoacoustic model
+ */
+av_cold void ff_psy_vorbis_close(VorbisPsyContext *vpctx);
+#endif /* AVCODEC_VORBISPSY_H */