[FFmpeg-devel,GSOC,3/3] lavc/cfhd:frame threading support for 3d transform progressive and interlaced samples

Submitted by Gagandeep Singh on Aug. 14, 2018, 7:44 a.m.

Details

Message ID CAOi=zRv-+_9Ch5K97eH=c5ri+wWTgoitK58n_0sbrFCiK8T3BQ@mail.gmail.com
State New
Headers show

Commit Message

Gagandeep Singh Aug. 14, 2018, 7:44 a.m.
Last patch adding frame thread support for ip samples in both progressive
and interlaced versions.

Gagandeep Singh

Comments

Gagandeep Singh Sept. 6, 2018, 6:07 a.m.
Hi,
The new patch is to applied on top of the new patches sent.

Thanks
Gagandeep Singh

On Tue, Aug 14, 2018 at 1:14 PM Gagandeep Singh <deepgagan231197@gmail.com>
wrote:

> Last patch adding frame thread support for ip samples in both progressive
> and interlaced versions.
>
> Gagandeep Singh
>

Patch hide | download patch | download mbox

From fa23549c61a6d8413cdc79c570376c53795a6ff1 Mon Sep 17 00:00:00 2001
From: Gagandeep Singh <deepgagan231197@gmail.com>
Date: Tue, 14 Aug 2018 12:43:20 +0530
Subject: [GSOC][FFmpeg-devel][PATCH 3/3] lavc/cfhd:frame threading support for 3d transform
 progressive and interlaced samples

---
 libavcodec/cfhd.c | 378 ++++++++++++++++++++++++++++------------------
 libavcodec/cfhd.h |   8 +-
 2 files changed, 242 insertions(+), 144 deletions(-)

diff --git a/libavcodec/cfhd.c b/libavcodec/cfhd.c
index 2c538f0bbd..7c298056ca 100644
--- a/libavcodec/cfhd.c
+++ b/libavcodec/cfhd.c
@@ -63,13 +63,23 @@  enum CFHDParam {
 
 static av_cold int cfhd_init(AVCodecContext *avctx)
 {
+    int ret;
+
     CFHDContext *s = avctx->priv_data;
+    if (!avctx->internal->is_copy) {
+        avctx->internal->allocate_progress = 1;
+        ret = ff_cfhd_init_vlcs(s);
+    } else
+        ret = 0;
 
     avctx->bits_per_raw_sample = 10;
     s->avctx                   = avctx;
     s->progressive             = 0;
+    s->i_frame.f = av_frame_alloc();
+    s->p_frame.f = av_frame_alloc();
 
-    return ff_cfhd_init_vlcs(s);
+
+    return ret;
 }
 
 static void init_plane_defaults(CFHDContext *s)
@@ -268,15 +278,18 @@  static void free_buffers(CFHDContext *s)
     for (i = 0; i < FF_ARRAY_ELEMS(s->plane); i++) {
         av_freep(&s->plane[i].idwt_buf);
         av_freep(&s->plane[i].idwt_tmp);
-        if (s->transform_type == 0)
+        if (s->transform_type == 0) {
             for (j = 0; j < 9; j++)
                 s->plane[i].subband[j] = NULL;
-        else
+            for (j = 0; j < 8; j++)
+                s->plane[i].l_h[j] = NULL;
+        }
+        else {
             for (j = 0; j < 17; j++)
                 s->plane[i].subband[j] = NULL;
-
-        for (j = 0; j < 8; j++)
-            s->plane[i].l_h[j] = NULL;
+            for (j = 0; j < 12; j++)
+                s->plane[i].l_h[j] = NULL;
+        }
     }
     s->a_height = 0;
     s->a_width  = 0;
@@ -394,8 +407,10 @@  static int alloc_buffers(AVCodecContext *avctx)
         s->plane[i].l_h[7] = s->plane[i].idwt_tmp + 2 * w2 * h2;
         if (s->transform_type == 2) {
             frame2 = s->plane[i].idwt_tmp + 4 * w2 * h2;
-            s->plane[i].l_h[8] = frame2;
-            s->plane[i].l_h[9] = frame2 + 2 * w2 * h2;
+            s->plane[i].l_h[8]  = frame2;
+            s->plane[i].l_h[9]  = frame2 + 2 * w4 * h4;
+            s->plane[i].l_h[10] = frame2;
+            s->plane[i].l_h[11] = frame2 + 2 * w2 * h2;
             }
     }
 
@@ -406,14 +421,28 @@  static int alloc_buffers(AVCodecContext *avctx)
     return 0;
 }
 
+static int update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
+{
+    CFHDContext *csrc = src->priv_data;
+    CFHDContext *cdst = dst->priv_data;
+    cdst->transform_type = csrc->transform_type;
+    if (csrc->sample_type != 1 && csrc->transform_type != 0) {
+        cdst->progressive = csrc->progressive;
+        cdst->picture = &csrc->p_frame;
+        cdst->connection = &csrc->i_frame;
+        cdst->buffers = csrc->plane;
+    }
+
+    return 0;
+}
+
 static int cfhd_decode(AVCodecContext *avctx, void *data, int *got_frame,
                        AVPacket *avpkt)
 {
     CFHDContext *s = avctx->priv_data;
     GetByteContext gb;
     ThreadFrame frame = { .f = data };
-    AVFrame *pic = data;
-    int ret = 0, i, j, planes, plane, got_buffer = 0;
+    int ret = 0, i, j, planes, plane, got_buffer = 0, progress1 = 1, progress2 = 1;
     int16_t *coeff_data;
 
     s->coded_format = AV_PIX_FMT_YUV422P10;
@@ -537,7 +566,9 @@  static int cfhd_decode(AVCodecContext *avctx, void *data, int *got_frame,
         } else if (tag == 1) {
             s->sample_type = data;
             if (data == 2)
-                s->pframe = 1;
+                s->pframe  = 1;
+            else if (data == 1)
+                s->transform_type = 2;
             av_log(avctx, AV_LOG_DEBUG, "Sample type? %"PRIu16"\n", data);
         } else if (tag == 10) {
             s->transform_type = data;
@@ -657,21 +688,54 @@  static int cfhd_decode(AVCodecContext *avctx, void *data, int *got_frame,
                     return ret;
                 }
             }
-            ret = ff_set_dimensions(avctx, s->coded_width, s->coded_height);
-            if (ret < 0)
-                return ret;
-            if (s->cropped_height)
-                avctx->height = s->cropped_height;
-            frame.f->width =
-            frame.f->height = 0;
-
-            if ((ret = ff_thread_get_buffer(avctx, &frame, 0)) < 0)
-                return ret;
-
+            if (s->transform_type == 2) {
+                if (s->sample_type != 1) {
+                    s->picture = &s->i_frame;
+                    s->connection = &s->p_frame;
+                    s->buffers = s->plane;
+                }
+                ret = ff_set_dimensions(avctx, s->coded_width, s->coded_height);
+                if (ret < 0)
+                    return ret;
+                if (s->sample_type != 1) {
+                    if (s->i_frame.f->data[0])
+                        ff_thread_release_buffer(avctx, &s->i_frame);
+                    if (s->p_frame.f->data[0])
+                        ff_thread_release_buffer(avctx, &s->p_frame);
+                    av_frame_copy_props(s->i_frame.f, frame.f);
+                    av_frame_copy_props(s->p_frame.f, frame.f);
+                    if (s->cropped_height)
+                        avctx->height = s->cropped_height;
+                    s->picture->f->width =
+                    s->picture->f->height = 0;
+                    s->connection->f->width =
+                    s->connection->f->height = 0;
+                    if ((ret = ff_thread_get_buffer(avctx, s->picture, 0)) < 0)
+                        return ret;
+                    if ((ret = ff_thread_get_buffer(avctx, s->connection, 0)) < 0)
+                        return ret;
+                }
+            } else {
+                s->picture = &s->i_frame;
+                s->buffers = s->plane;
+                if (s->picture->f->data[0])
+                    ff_thread_release_buffer(avctx, s->picture);
+                av_frame_copy_props(s->i_frame.f, frame.f);
+                ret = ff_set_dimensions(avctx, s->coded_width, s->coded_height);
+                if (ret < 0)
+                    return ret;
+                if (s->cropped_height)
+                    avctx->height = s->cropped_height;
+                s->picture->f->width =
+                s->picture->f->height = 0;
+                if ((ret = ff_thread_get_buffer(avctx, s->picture, 0)) < 0)
+                    return ret;
+            }
             s->coded_width = 0;
             s->coded_height = 0;
             s->coded_format = AV_PIX_FMT_NONE;
             got_buffer = 1;
+            ff_thread_finish_setup(avctx);
         }
         coeff_data = s->plane[s->channel_num].subband[s->subband_num_actual];
 
@@ -835,6 +899,8 @@  static int cfhd_decode(AVCodecContext *avctx, void *data, int *got_frame,
                        &coeff_data[(highpass_height - 1) * highpass_stride],
                        highpass_stride * sizeof(*coeff_data));
             }
+            if (s->transform_type == 2 && s->subband_num_actual == 10)
+                ff_thread_report_progress(s->picture, progress1 += 1, 0);
         }
     }
     //disabled to run mountain sample file
@@ -975,7 +1041,6 @@  static int cfhd_decode(AVCodecContext *avctx, void *data, int *got_frame,
             ret = AVERROR(EINVAL);
             goto end;
         }
-
         av_log(avctx, AV_LOG_DEBUG, "Level 3 plane %i %i %i %i\n", plane, lowpass_height, lowpass_width, highpass_stride);
         if (s->progressive) {
             low    = s->plane[plane].subband[0];
@@ -998,18 +1063,18 @@  static int cfhd_decode(AVCodecContext *avctx, void *data, int *got_frame,
                 output++;
             }
 
-            dst = (int16_t *)pic->data[act_plane];
+            dst = (int16_t *)s->picture->f->data[act_plane];
             low  = s->plane[plane].l_h[6];
             high = s->plane[plane].l_h[7];
             for (i = 0; i < lowpass_height * 2; i++) {
                 horiz_filter_clip(dst, low, high, lowpass_width, s->bpc);
                 low  += lowpass_width;
                 high += lowpass_width;
-                dst  += pic->linesize[act_plane] / 2;
+                dst  += s->picture->f->linesize[act_plane] / 2;
             }
         } else {
-            av_log(avctx, AV_LOG_DEBUG, "interlaced frame ? %d", pic->interlaced_frame);
-            pic->interlaced_frame = 1;
+            av_log(avctx, AV_LOG_DEBUG, "interlaced frame ? %d", s->picture->f->interlaced_frame);
+            s->picture->f->interlaced_frame = 1;
             low    = s->plane[plane].subband[0];
             high   = s->plane[plane].subband[7];
             output = s->plane[plane].l_h[6];
@@ -1030,23 +1095,23 @@  static int cfhd_decode(AVCodecContext *avctx, void *data, int *got_frame,
                 output += lowpass_width * 2;
             }
 
-            dst  = (int16_t *)pic->data[act_plane];
+            dst  = (int16_t *)s->picture->f->data[act_plane];
             low  = s->plane[plane].l_h[6];
             high = s->plane[plane].l_h[7];
             for (i = 0; i < lowpass_height; i++) {
-                inverse_temporal_filter(dst, low, high, lowpass_width * 2,  pic->linesize[act_plane]/2, 0);
+                inverse_temporal_filter(dst, low, high, lowpass_width * 2,  s->picture->f->linesize[act_plane]/2, 0);
                 low  += lowpass_width * 2;
                 high += lowpass_width * 2;
-                dst  += pic->linesize[act_plane];
+                dst  += s->picture->f->linesize[act_plane];
             }
         }
     }
-    //this is the serial version on ip sample decoding so buffers allocated using alloc_buffers() are not freed,
-    //so the stored decoded coefficients data is used for generating the second frame once empty packet is passed in sample_type = 1
+    av_frame_ref(frame.f, s->picture->f);
+    ff_thread_report_progress(s->picture, INT_MAX, 0);
     } else if (s->transform_type == 2 && s->sample_type != 1) {
         for (plane = 0; plane < planes && !ret; plane++) {
-            int lowpass_height  = s->plane[plane].band[0][0].height;
-            int lowpass_width   = s->plane[plane].band[0][0].width;
+            int lowpass_height  = s->plane[plane].band[0][1].height;
+            int lowpass_width   = s->plane[plane].band[0][1].width;
             int highpass_stride = s->plane[plane].band[0][1].stride;
             int act_plane = plane == 1 ? 2 : plane == 2 ? 1 : plane;
             int16_t *low, *high, *output, *dst;
@@ -1058,8 +1123,6 @@  static int cfhd_decode(AVCodecContext *avctx, void *data, int *got_frame,
                 goto end;
             }
 
-            av_log(avctx, AV_LOG_DEBUG, "Decoding level 1 plane %i %i %i %i\n", plane, lowpass_height, lowpass_width, highpass_stride);
-
             low    = s->plane[plane].subband[0];
             high   = s->plane[plane].subband[2];
             output = s->plane[plane].l_h[0];
@@ -1110,8 +1173,6 @@  static int cfhd_decode(AVCodecContext *avctx, void *data, int *got_frame,
                 goto end;
             }
 
-            av_log(avctx, AV_LOG_DEBUG, "Level 2 lowpass plane %i %i %i %i\n", plane, lowpass_height, lowpass_width, highpass_stride);
-
             low    = s->plane[plane].subband[0];
             high   = s->plane[plane].subband[5];
             output = s->plane[plane].l_h[3];
@@ -1149,40 +1210,9 @@  static int cfhd_decode(AVCodecContext *avctx, void *data, int *got_frame,
                 output += lowpass_width * 2;
             }
 
-            low    = s->plane[plane].subband[7];
-            high   = s->plane[plane].subband[9];
-            output = s->plane[plane].l_h[3];
-            for (i = 0; i < lowpass_width; i++) {
-                vert_filter(output, lowpass_width, low, lowpass_width, high, highpass_stride, lowpass_height);
-                low++;
-                high++;
-                output++;
-            }
-
-            low    = s->plane[plane].subband[8];
-            high   = s->plane[plane].subband[10];
-            output = s->plane[plane].l_h[4];
-            for (i = 0; i < lowpass_width; i++) {
-                vert_filter(output, lowpass_width, low, highpass_stride, high, highpass_stride, lowpass_height);
-                low++;
-                high++;
-                output++;
-            }
-
-            low    = s->plane[plane].l_h[3];
-            high   = s->plane[plane].l_h[4];
-            output = s->plane[plane].subband[7];
-            for (i = 0; i < lowpass_height * 2; i++) {
-                horiz_filter(output, low, high, lowpass_width);
-                low    += lowpass_width;
-                high   += lowpass_width;
-                output += lowpass_width * 2;
-            }
-
             lowpass_height  = s->plane[plane].band[4][1].height;
             lowpass_width   = s->plane[plane].band[4][1].width;
             highpass_stride = s->plane[plane].band[4][1].stride;
-            av_log(avctx, AV_LOG_DEBUG, "temporal level %i %i %i %i\n", plane, lowpass_height, lowpass_width, highpass_stride);
 
             if (lowpass_height > s->plane[plane].band[4][1].a_height || lowpass_width > s->plane[plane].band[4][1].a_width ||
                 !highpass_stride || s->plane[plane].band[4][1].width > s->plane[plane].band[4][1].a_width) {
@@ -1190,7 +1220,7 @@  static int cfhd_decode(AVCodecContext *avctx, void *data, int *got_frame,
                 ret = AVERROR(EINVAL);
                 goto end;
             }
-
+            ff_thread_await_progress(s->connection, progress2 += 1, 0);
             low    = s->plane[plane].subband[0];
             high   = s->plane[plane].subband[7];
             output = s->plane[plane].subband[0];
@@ -1199,6 +1229,7 @@  static int cfhd_decode(AVCodecContext *avctx, void *data, int *got_frame,
                 low    += lowpass_width;
                 high   += lowpass_width;
             }
+            ff_thread_report_progress(s->picture, progress1 += 1, 0);
             if (s->progressive) {
                 low    = s->plane[plane].subband[0];
                 high   = s->plane[plane].subband[15];
@@ -1220,37 +1251,17 @@  static int cfhd_decode(AVCodecContext *avctx, void *data, int *got_frame,
                     output++;
                 }
 
-                low    = s->plane[plane].subband[7];
-                high   = s->plane[plane].subband[12];
-                output = s->plane[plane].l_h[8];
-                for (i = 0; i < lowpass_width; i++) {
-                    vert_filter(output, lowpass_width, low, lowpass_width, high, highpass_stride, lowpass_height);
-                    low++;
-                    high++;
-                    output++;
-                }
-
-                low    = s->plane[plane].subband[11];
-                high   = s->plane[plane].subband[13];
-                output = s->plane[plane].l_h[9];
-                for (i = 0; i < lowpass_width; i++) {
-                    vert_filter(output, lowpass_width, low, highpass_stride, high, highpass_stride, lowpass_height);
-                    low++;
-                    high++;
-                    output++;
-                }
-
-                dst = (int16_t *)pic->data[act_plane];
+                dst = (int16_t *)s->picture->f->data[act_plane];
                 low  = s->plane[plane].l_h[6];
                 high = s->plane[plane].l_h[7];
                 for (i = 0; i < lowpass_height * 2; i++) {
-                    horiz_filter(dst, low, high, lowpass_width);
+                    horiz_filter_clip(dst, low, high, lowpass_width, s->bpc);
                     low  += lowpass_width;
                     high += lowpass_width;
-                    dst  += pic->linesize[act_plane] / 2;
+                    dst  += s->picture->f->linesize[act_plane] / 2;
                 }
             } else {
-                pic->interlaced_frame = 1;
+                s->picture->f->interlaced_frame = 1;
                 low    = s->plane[plane].subband[0];
                 high   = s->plane[plane].subband[14];
                 output = s->plane[plane].l_h[6];
@@ -1271,67 +1282,137 @@  static int cfhd_decode(AVCodecContext *avctx, void *data, int *got_frame,
                     output += lowpass_width * 2;
                 }
 
-                low    = s->plane[plane].subband[7];
-                high   = s->plane[plane].subband[11];
-                output = s->plane[plane].l_h[8];
-                for (i = 0; i < lowpass_height; i++) {
-                    horiz_filter(output, low, high, lowpass_width);
-                    low    += lowpass_width;
-                    high   += lowpass_width;
-                    output += lowpass_width * 2;
-                }
-
-                low    = s->plane[plane].subband[12];
-                high   = s->plane[plane].subband[13];
-                output = s->plane[plane].l_h[9];
-                for (i = 0; i < lowpass_height; i++) {
-                    horiz_filter(output, low, high, lowpass_width);
-                    low    += lowpass_width;
-                    high   += lowpass_width;
-                    output += lowpass_width * 2;
-                }
-
-
-                dst  = (int16_t *)pic->data[act_plane];
+                dst  = (int16_t *)s->picture->f->data[act_plane];
                 low  = s->plane[plane].l_h[6];
                 high = s->plane[plane].l_h[7];
                 for (i = 0; i < lowpass_height; i++) {
-                    inverse_temporal_filter(dst, low, high, lowpass_width * 2,  pic->linesize[act_plane]/2, 0);
+                    inverse_temporal_filter(dst, low, high, lowpass_width * 2,  s->picture->f->linesize[act_plane]/2, 0);
                     low  += lowpass_width * 2;
                     high += lowpass_width * 2;
-                    dst  += pic->linesize[act_plane];
+                    dst  += s->picture->f->linesize[act_plane];
                 }
             }
         }
+        ff_thread_report_progress(s->picture, INT_MAX, 0);
+        ff_thread_await_progress(s->connection, INT_MAX, 0);
+        av_frame_ref(frame.f, s->picture->f);
     } else if (s->sample_type == 1) {
-        int16_t *low, *high, *dst;
-        int lowpass_height, lowpass_width;
+        int16_t *low, *high, *dst, *output;
+        int lowpass_height, lowpass_width, highpass_stride, act_plane;
+        progress1 = 1, progress2 = 1;
         for (plane = 0; plane < planes && !ret; plane++) {
-            int act_plane = plane == 1 ? 2 : plane == 2 ? 1 : plane;
-            lowpass_height  = s->plane[plane].band[4][1].height;
-            lowpass_width   = s->plane[plane].band[4][1].width;
+            ff_thread_await_progress(s->connection, progress1 += 1, 0);
+            // highpass inverse for temporal
+            lowpass_height  = s->buffers[plane].band[1][1].a_height;
+            lowpass_width   = s->buffers[plane].band[1][1].a_width;
+            highpass_stride = s->buffers[plane].band[1][1].a_width;
+
+            low    = s->buffers[plane].subband[7];
+            high   = s->buffers[plane].subband[9];
+            output = s->buffers[plane].l_h[8];
+            for (i = 0; i < lowpass_width; i++) {
+                vert_filter(output, lowpass_width, low, lowpass_width, high, highpass_stride, lowpass_height);
+                low++;
+                high++;
+                output++;
+            }
+
+            low    = s->buffers[plane].subband[8];
+            high   = s->buffers[plane].subband[10];
+            output = s->buffers[plane].l_h[9];
+            for (i = 0; i < lowpass_width; i++) {
+                vert_filter(output, lowpass_width, low, highpass_stride, high, highpass_stride, lowpass_height);
+                low++;
+                high++;
+                output++;
+            }
+
+            low    = s->buffers[plane].l_h[8];
+            high   = s->buffers[plane].l_h[9];
+            output = s->buffers[plane].subband[7];
+            for (i = 0; i < lowpass_height * 2; i++) {
+                horiz_filter(output, low, high, lowpass_width);
+                low    += lowpass_width;
+                high   += lowpass_width;
+                output += lowpass_width * 2;
+            }
+            ff_thread_report_progress(s->picture, progress2 += 1, 0);
+        }
+        for (plane = 0; plane < planes && !ret; plane++) {
+            ff_thread_await_progress(s->connection, progress1 += 1, 0);
+
+            act_plane = plane == 1 ? 2 : plane == 2 ? 1 : plane;
+            lowpass_height  = s->buffers[plane].band[4][1].a_height;
+            lowpass_width   = s->buffers[plane].band[4][1].a_width;
+            highpass_stride = s->buffers[plane].band[4][1].a_width;
+
             if (s->progressive) {
-                dst = (int16_t *)pic->data[act_plane];
-                low  = s->plane[plane].l_h[8];
-                high = s->plane[plane].l_h[9];
+                low    = s->buffers[plane].subband[7];
+                high   = s->buffers[plane].subband[12];
+                output = s->buffers[plane].l_h[10];
+                for (i = 0; i < lowpass_width; i++) {
+                    vert_filter(output, lowpass_width, low, lowpass_width, high, highpass_stride, lowpass_height);
+                    low++;
+                    high++;
+                    output++;
+                }
+
+                low    = s->buffers[plane].subband[11];
+                high   = s->buffers[plane].subband[13];
+                output = s->buffers[plane].l_h[11];
+                for (i = 0; i < lowpass_width; i++) {
+                    vert_filter(output, lowpass_width, low, highpass_stride, high, highpass_stride, lowpass_height);
+                    low++;
+                    high++;
+                    output++;
+                }
+
+                dst = (int16_t *)s->picture->f->data[act_plane];
+                low  = s->buffers[plane].l_h[10];
+                high = s->buffers[plane].l_h[11];
                 for (i = 0; i < lowpass_height * 2; i++) {
-                    horiz_filter(dst, low, high, lowpass_width);
+                    horiz_filter_clip(dst, low, high, lowpass_width, s->bpc);
                     low  += lowpass_width;
                     high += lowpass_width;
-                    dst  += pic->linesize[act_plane] / 2;
+                    dst  += s->picture->f->linesize[act_plane] / 2;
                 }
             } else {
-                dst  = (int16_t *)pic->data[act_plane];
-                low  = s->plane[plane].l_h[8];
-                high = s->plane[plane].l_h[9];
+                av_log(avctx, AV_LOG_DEBUG, "interlaced frame ? %d", s->picture->f->interlaced_frame);
+                s->picture->f->interlaced_frame = 1;
+                low    = s->buffers[plane].subband[7];
+                high   = s->buffers[plane].subband[11];
+                output = s->buffers[plane].l_h[10];
+                for (i = 0; i < lowpass_height; i++) {
+                    horiz_filter(output, low, high, lowpass_width);
+                    low    += lowpass_width;
+                    high   += lowpass_width;
+                    output += lowpass_width * 2;
+                }
+
+                low    = s->buffers[plane].subband[12];
+                high   = s->buffers[plane].subband[13];
+                output = s->buffers[plane].l_h[11];
                 for (i = 0; i < lowpass_height; i++) {
-                    inverse_temporal_filter(dst, low, high, lowpass_width * 2,  pic->linesize[act_plane]/2, 0);
+                    horiz_filter(output, low, high, lowpass_width);
+                    low    += lowpass_width;
+                    high   += lowpass_width;
+                    output += lowpass_width * 2;
+                }
+
+                dst  = (int16_t *)s->picture->f->data[act_plane];
+                low  = s->buffers[plane].l_h[10];
+                high = s->buffers[plane].l_h[11];
+                for (i = 0; i < lowpass_height; i++) {
+                    inverse_temporal_filter(dst, low, high, lowpass_width * 2,  s->picture->f->linesize[act_plane]/2, 0);
                     low  += lowpass_width * 2;
                     high += lowpass_width * 2;
-                    dst  += pic->linesize[act_plane];
+                    dst  += s->picture->f->linesize[act_plane];
                 }
             }
         }
+        ff_thread_report_progress(s->picture, INT_MAX, 0);
+        ff_thread_await_progress(s->connection, INT_MAX, 0);
+        av_frame_ref(frame.f, s->picture->f);
     }
 
 end:
@@ -1352,19 +1433,30 @@  static av_cold int cfhd_close(AVCodecContext *avctx)
         ff_free_vlc(&s->vlc_9);
         ff_free_vlc(&s->vlc_18);
     }
+    if (s->i_frame.f && s->i_frame.f->data[0])
+        ff_thread_release_buffer(avctx, &s->i_frame);
+    if (s->p_frame.f && s->p_frame.f->data[0])
+        ff_thread_release_buffer(avctx, &s->p_frame);
+
+    if (s->i_frame.f)
+        av_frame_free(&s->i_frame.f);
+    if (s->p_frame.f)
+        av_frame_free(&s->p_frame.f);
 
     return 0;
 }
 
 AVCodec ff_cfhd_decoder = {
-    .name             = "cfhd",
-    .long_name        = NULL_IF_CONFIG_SMALL("Cineform HD"),
-    .type             = AVMEDIA_TYPE_VIDEO,
-    .id               = AV_CODEC_ID_CFHD,
-    .priv_data_size   = sizeof(CFHDContext),
-    .init             = cfhd_init,
-    .close            = cfhd_close,
-    .decode           = cfhd_decode,
-    .capabilities     = AV_CODEC_CAP_DR1,
-    .caps_internal    = FF_CODEC_CAP_INIT_CLEANUP,
+    .name                  = "cfhd",
+    .long_name             = NULL_IF_CONFIG_SMALL("Cineform HD"),
+    .type                  = AVMEDIA_TYPE_VIDEO,
+    .id                    = AV_CODEC_ID_CFHD,
+    .priv_data_size        = sizeof(CFHDContext),
+    .init                  = cfhd_init,
+    .close                 = cfhd_close,
+    .decode                = cfhd_decode,
+    .init_thread_copy      = ONLY_IF_THREADS_ENABLED(cfhd_init),
+    .update_thread_context = ONLY_IF_THREADS_ENABLED(update_thread_context),
+    .capabilities          = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS,
+    .caps_internal         = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
 };
diff --git a/libavcodec/cfhd.h b/libavcodec/cfhd.h
index 047c0f2028..d7a2ffe0a7 100644
--- a/libavcodec/cfhd.h
+++ b/libavcodec/cfhd.h
@@ -29,6 +29,7 @@ 
 #include "bytestream.h"
 #include "get_bits.h"
 #include "vlc.h"
+#include "thread.h"
 
 #define VLC_BITS       9
 #define SUBBAND_COUNT 17
@@ -63,7 +64,7 @@  typedef struct Plane {
 
     /* TODO: merge this into SubBand structure */
     int16_t *subband[SUBBAND_COUNT];
-    int16_t *l_h[10];
+    int16_t *l_h[12];
 
     SubBand band[DWT_LEVELS][4];
 } Plane;
@@ -76,6 +77,10 @@  typedef struct Peak {
 
 typedef struct CFHDContext {
     AVCodecContext *avctx;
+    ThreadFrame i_frame;
+    ThreadFrame p_frame;
+    ThreadFrame *connection;
+    ThreadFrame *picture;
 
     CFHD_RL_VLC_ELEM table_9_rl_vlc[2088];
     VLC vlc_9;
@@ -116,6 +121,7 @@  typedef struct CFHDContext {
 
     uint8_t prescale_shift[3];
     Plane plane[4];
+    Plane *buffers;
     Peak peak;
 } CFHDContext;
 
-- 
2.17.1