diff mbox series

[FFmpeg-devel,v9,09/13] vvcdec: add LMCS, Deblocking, SAO, and ALF filters

Message ID TYSPR06MB64332F4BBEAFD23E9463DCCFAA62A@TYSPR06MB6433.apcprd06.prod.outlook.com
State New
Headers show
Series Add vvc decoder | expand

Checks

Context Check Description
yinshiyou/make_loongarch64 success Make finished
yinshiyou/make_fate_loongarch64 success Make fate finished
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

Nuo Mi Jan. 1, 2024, 2:12 p.m. UTC
Co-authored-by: Xu Mu <toxumu@outlook.com>
Co-authored-by: Frank Plowman <post@frankplowman.com>
Co-authored-by: Shaun Loo <shaunloo10@gmail.com>
Co-authored-by: Wu Jianhua <toqsxw@outlook.com>
---
 libavcodec/vvc/Makefile              |    1 +
 libavcodec/vvc/vvc_ctu.h             |    1 +
 libavcodec/vvc/vvc_filter.c          | 1332 ++++++++++++++++++++++++++
 libavcodec/vvc/vvc_filter.h          |   71 ++
 libavcodec/vvc/vvc_filter_template.c | 1135 ++++++++++++++++++++++
 5 files changed, 2540 insertions(+)
 create mode 100644 libavcodec/vvc/vvc_filter.c
 create mode 100644 libavcodec/vvc/vvc_filter.h
 create mode 100644 libavcodec/vvc/vvc_filter_template.c

Comments

Lynne Jan. 1, 2024, 3:17 p.m. UTC | #1
Jan 1, 2024, 15:15 by nuomi2021@gmail.com:

> Co-authored-by: Xu Mu <toxumu@outlook.com>
> Co-authored-by: Frank Plowman <post@frankplowman.com>
> Co-authored-by: Shaun Loo <shaunloo10@gmail.com>
> Co-authored-by: Wu Jianhua <toqsxw@outlook.com>
> ---
>  libavcodec/vvc/Makefile              |    1 +
>  libavcodec/vvc/vvc_ctu.h             |    1 +
>  libavcodec/vvc/vvc_filter.c          | 1332 ++++++++++++++++++++++++++
>  libavcodec/vvc/vvc_filter.h          |   71 ++
>  libavcodec/vvc/vvc_filter_template.c | 1135 ++++++++++++++++++++++
>  5 files changed, 2540 insertions(+)
>  create mode 100644 libavcodec/vvc/vvc_filter.c
>  create mode 100644 libavcodec/vvc/vvc_filter.h
>  create mode 100644 libavcodec/vvc/vvc_filter_template.c
>

Is there really absolutely nothing you can reuse from the HEVC decoder?
Nuo Mi Jan. 2, 2024, 9:47 a.m. UTC | #2
On Mon, Jan 1, 2024 at 11:18 PM Lynne <dev@lynne.ee> wrote:

> Jan 1, 2024, 15:15 by nuomi2021@gmail.com:
>
> > Co-authored-by: Xu Mu <toxumu@outlook.com>
> > Co-authored-by: Frank Plowman <post@frankplowman.com>
> > Co-authored-by: Shaun Loo <shaunloo10@gmail.com>
> > Co-authored-by: Wu Jianhua <toqsxw@outlook.com>
> > ---
> >  libavcodec/vvc/Makefile              |    1 +
> >  libavcodec/vvc/vvc_ctu.h             |    1 +
> >  libavcodec/vvc/vvc_filter.c          | 1332 ++++++++++++++++++++++++++
> >  libavcodec/vvc/vvc_filter.h          |   71 ++
> >  libavcodec/vvc/vvc_filter_template.c | 1135 ++++++++++++++++++++++
> >  5 files changed, 2540 insertions(+)
> >  create mode 100644 libavcodec/vvc/vvc_filter.c
> >  create mode 100644 libavcodec/vvc/vvc_filter.h
> >  create mode 100644 libavcodec/vvc/vvc_filter_template.c
> >
>
> Is there really absolutely nothing you can reuse from the HEVC decoder?
>
LMCS and ALF are new filters. SAO is entirely reusable, and deblock is
partially reusable.
However, all of them require some changes to the HEVC code.
Currently, I prefer not to touch the HEVC code as there is no HEVC
maintainer, and any HEVC code change requires a very long review time.

> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
Jean-Baptiste Kempf Jan. 2, 2024, 11:38 a.m. UTC | #3
On Tue, 2 Jan 2024, at 10:47, Nuo Mi wrote:
> On Mon, Jan 1, 2024 at 11:18 PM Lynne <dev@lynne.ee> wrote:
>
>> Jan 1, 2024, 15:15 by nuomi2021@gmail.com:
>>
>> > Co-authored-by: Xu Mu <toxumu@outlook.com>
>> > Co-authored-by: Frank Plowman <post@frankplowman.com>
>> > Co-authored-by: Shaun Loo <shaunloo10@gmail.com>
>> > Co-authored-by: Wu Jianhua <toqsxw@outlook.com>
>> > ---
>> >  libavcodec/vvc/Makefile              |    1 +
>> >  libavcodec/vvc/vvc_ctu.h             |    1 +
>> >  libavcodec/vvc/vvc_filter.c          | 1332 ++++++++++++++++++++++++++
>> >  libavcodec/vvc/vvc_filter.h          |   71 ++
>> >  libavcodec/vvc/vvc_filter_template.c | 1135 ++++++++++++++++++++++
>> >  5 files changed, 2540 insertions(+)
>> >  create mode 100644 libavcodec/vvc/vvc_filter.c
>> >  create mode 100644 libavcodec/vvc/vvc_filter.h
>> >  create mode 100644 libavcodec/vvc/vvc_filter_template.c
>> >
>>
>> Is there really absolutely nothing you can reuse from the HEVC decoder?
>>
> LMCS and ALF are new filters. SAO is entirely reusable, and deblock is
> partially reusable.
> However, all of them require some changes to the HEVC code.
> Currently, I prefer not to touch the HEVC code as there is no HEVC
> maintainer, and any HEVC code change requires a very long review time.

Yes, keep it up like this.
Lynne Jan. 2, 2024, 3:51 p.m. UTC | #4
Jan 2, 2024, 10:48 by nuomi2021@gmail.com:

> On Mon, Jan 1, 2024 at 11:18 PM Lynne <dev@lynne.ee> wrote:
>
>> Jan 1, 2024, 15:15 by nuomi2021@gmail.com:
>>
>> > Co-authored-by: Xu Mu <toxumu@outlook.com>
>> > Co-authored-by: Frank Plowman <post@frankplowman.com>
>> > Co-authored-by: Shaun Loo <shaunloo10@gmail.com>
>> > Co-authored-by: Wu Jianhua <toqsxw@outlook.com>
>> > ---
>> >  libavcodec/vvc/Makefile              |    1 +
>> >  libavcodec/vvc/vvc_ctu.h             |    1 +
>> >  libavcodec/vvc/vvc_filter.c          | 1332 ++++++++++++++++++++++++++
>> >  libavcodec/vvc/vvc_filter.h          |   71 ++
>> >  libavcodec/vvc/vvc_filter_template.c | 1135 ++++++++++++++++++++++
>> >  5 files changed, 2540 insertions(+)
>> >  create mode 100644 libavcodec/vvc/vvc_filter.c
>> >  create mode 100644 libavcodec/vvc/vvc_filter.h
>> >  create mode 100644 libavcodec/vvc/vvc_filter_template.c
>> >
>>
>> Is there really absolutely nothing you can reuse from the HEVC decoder?
>>
> LMCS and ALF are new filters. SAO is entirely reusable, and deblock is
> partially reusable.
> However, all of them require some changes to the HEVC code.
> Currently, I prefer not to touch the HEVC code as there is no HEVC
> maintainer, and any HEVC code change requires a very long review time.
>

I disagree with this. There doesn't need to be an HEVC maintainer to merge
patches, many of us are experienced enough to review them.
If this does not get done now, it will never get done, and it'll be a nightmare
for someone trying to clean it up in 5 years.

Particularly, we've been trying to clean up and deduplicate code and tables
as much as possible in libavcodec, so I'd rather not have all our effort go to
waste.

Please, make them share the code. Put keep in in libavcodec/, since it's
common to both.
Nuo Mi Jan. 3, 2024, 1:14 a.m. UTC | #5
On Tue, Jan 2, 2024 at 11:51 PM Lynne <dev@lynne.ee> wrote:

> Jan 2, 2024, 10:48 by nuomi2021@gmail.com:
>
> > On Mon, Jan 1, 2024 at 11:18 PM Lynne <dev@lynne.ee> wrote:
> >
> >> Jan 1, 2024, 15:15 by nuomi2021@gmail.com:
> >>
> >> > Co-authored-by: Xu Mu <toxumu@outlook.com>
> >> > Co-authored-by: Frank Plowman <post@frankplowman.com>
> >> > Co-authored-by: Shaun Loo <shaunloo10@gmail.com>
> >> > Co-authored-by: Wu Jianhua <toqsxw@outlook.com>
> >> > ---
> >> >  libavcodec/vvc/Makefile              |    1 +
> >> >  libavcodec/vvc/vvc_ctu.h             |    1 +
> >> >  libavcodec/vvc/vvc_filter.c          | 1332
> ++++++++++++++++++++++++++
> >> >  libavcodec/vvc/vvc_filter.h          |   71 ++
> >> >  libavcodec/vvc/vvc_filter_template.c | 1135 ++++++++++++++++++++++
> >> >  5 files changed, 2540 insertions(+)
> >> >  create mode 100644 libavcodec/vvc/vvc_filter.c
> >> >  create mode 100644 libavcodec/vvc/vvc_filter.h
> >> >  create mode 100644 libavcodec/vvc/vvc_filter_template.c
> >> >
> >>
> >> Is there really absolutely nothing you can reuse from the HEVC decoder?
> >>
> > LMCS and ALF are new filters. SAO is entirely reusable, and deblock is
> > partially reusable.
> > However, all of them require some changes to the HEVC code.
> > Currently, I prefer not to touch the HEVC code as there is no HEVC
> > maintainer, and any HEVC code change requires a very long review time.
> >
>
> I disagree with this. There doesn't need to be an HEVC maintainer to merge
> patches, many of us are experienced enough to review them.
> If this does not get done now, it will never get done, and it'll be a
> nightmare
> for someone trying to clean it up in 5 years.
>

Version 7.0 will be released in February. I hope we can include VVC decode.
I also want to give people some time to try and report issues about VVC.
Sending the VVC enabling patch set again may make the filter change
reviewer lose focus.
How about I merge this patch set today, and later, my first patch will
focus on fixing this.
I promise I won't make you wait for so long.
Thank you


> Particularly, we've been trying to clean up and deduplicate code and tables
> as much as possible in libavcodec, so I'd rather not have all our effort
> go to
> waste.
>
> Please, make them share the code. Put keep in in libavcodec/, since it's
> common to both.
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
Michael Niedermayer Jan. 3, 2024, 6:38 p.m. UTC | #6
On Tue, Jan 02, 2024 at 05:47:53PM +0800, Nuo Mi wrote:
[...]
> maintainer, and any HEVC code change requires a very long review time.

I think changes that just move code around or rename functions would be
quickly reviewed
I dont know if things are 1:1 identical or some complex changes are needed

I think if things are 1:1 identical it can be done now otherwise it may be
better to do after the initial merge

thx

[...]
Nuo Mi Jan. 4, 2024, 11:45 a.m. UTC | #7
On Thu, Jan 4, 2024 at 2:38 AM Michael Niedermayer <michael@niedermayer.cc>
wrote:

> On Tue, Jan 02, 2024 at 05:47:53PM +0800, Nuo Mi wrote:
> [...]
> > maintainer, and any HEVC code change requires a very long review time.
>
> I think changes that just move code around or rename functions would be
> quickly reviewed
> I dont know if things are 1:1 identical or some complex changes are needed
>
> I think if things are 1:1 identical it can be done now otherwise it may be
> better to do after the initial merge
>
Agree,

Taking   https://patchwork.ffmpeg.org/project/ffmpeg/list/?series=10404  as
an example,
The following items are worth discussing, though they won't impact the
functionality:
1. Can we use "XVC" as a common name for HEVC and VVC, or should we use
"h2656" instead of "XVC"?
2. Considering that we will share inter/mc code between VVC and HEVC, do we
need to create a new directory for the common code?
Sending a separate patch will help us focus on discussions related to code
reuse and how to name files
This why I suggest we merge the initial patch firstly


> thx
>
> [...]
> --
> Michael     GnuPG fingerprint: 9FF2128B147EF6730BADF133611EC787040B0FAB
>
> Avoid a single point of failure, be that a person or equipment.
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
Martin Storsjö Jan. 4, 2024, 12:06 p.m. UTC | #8
On Thu, 4 Jan 2024, Nuo Mi wrote:

> On Thu, Jan 4, 2024 at 2:38 AM Michael Niedermayer <michael@niedermayer.cc>
> wrote:
>
>> On Tue, Jan 02, 2024 at 05:47:53PM +0800, Nuo Mi wrote:
>> [...]
>> > maintainer, and any HEVC code change requires a very long review time.
>>
>> I think changes that just move code around or rename functions would be
>> quickly reviewed
>> I dont know if things are 1:1 identical or some complex changes are needed
>>
>> I think if things are 1:1 identical it can be done now otherwise it may be
>> better to do after the initial merge
>>
> Agree,
>
> Taking   https://patchwork.ffmpeg.org/project/ffmpeg/list/?series=10404  as
> an example,
> The following items are worth discussing, though they won't impact the
> functionality:
> 1. Can we use "XVC" as a common name for HEVC and VVC, or should we use
> "h2656" instead of "XVC"?

The name "xvc" is potentially confusing - there is another video codec 
with the name xvc as well - see https://xvc.io.

// Martin
Nuo Mi Jan. 4, 2024, 1:57 p.m. UTC | #9
On Thu, Jan 4, 2024 at 8:07 PM Martin Storsjö <martin@martin.st> wrote:

> On Thu, 4 Jan 2024, Nuo Mi wrote:
>
> > On Thu, Jan 4, 2024 at 2:38 AM Michael Niedermayer <
> michael@niedermayer.cc>
> > wrote:
> >
> >> On Tue, Jan 02, 2024 at 05:47:53PM +0800, Nuo Mi wrote:
> >> [...]
> >> > maintainer, and any HEVC code change requires a very long review time.
> >>
> >> I think changes that just move code around or rename functions would be
> >> quickly reviewed
> >> I dont know if things are 1:1 identical or some complex changes are
> needed
> >>
> >> I think if things are 1:1 identical it can be done now otherwise it may
> be
> >> better to do after the initial merge
> >>
> > Agree,
> >
> > Taking   https://patchwork.ffmpeg.org/project/ffmpeg/list/?series=10404
> as
> > an example,
> > The following items are worth discussing, though they won't impact the
> > functionality:
> > 1. Can we use "XVC" as a common name for HEVC and VVC, or should we use
> > "h2656" instead of "XVC"?
>
> The name "xvc" is potentially confusing - there is another video codec
> with the name xvc as well - see https://xvc.io.
>
Yes, it's been dead for 4 years :). https://github.com/divideon/xvc
Not sure if the new codec will be named as xvc or not.
h2656 is safer but ugly


> // Martin
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
James Almer Jan. 4, 2024, 2:53 p.m. UTC | #10
On 1/4/2024 10:57 AM, Nuo Mi wrote:
> On Thu, Jan 4, 2024 at 8:07 PM Martin Storsjö <martin@martin.st> wrote:
> 
>> On Thu, 4 Jan 2024, Nuo Mi wrote:
>>
>>> On Thu, Jan 4, 2024 at 2:38 AM Michael Niedermayer <
>> michael@niedermayer.cc>
>>> wrote:
>>>
>>>> On Tue, Jan 02, 2024 at 05:47:53PM +0800, Nuo Mi wrote:
>>>> [...]
>>>>> maintainer, and any HEVC code change requires a very long review time.
>>>>
>>>> I think changes that just move code around or rename functions would be
>>>> quickly reviewed
>>>> I dont know if things are 1:1 identical or some complex changes are
>> needed
>>>>
>>>> I think if things are 1:1 identical it can be done now otherwise it may
>> be
>>>> better to do after the initial merge
>>>>
>>> Agree,
>>>
>>> Taking   https://patchwork.ffmpeg.org/project/ffmpeg/list/?series=10404
>> as
>>> an example,
>>> The following items are worth discussing, though they won't impact the
>>> functionality:
>>> 1. Can we use "XVC" as a common name for HEVC and VVC, or should we use
>>> "h2656" instead of "XVC"?
>>
>> The name "xvc" is potentially confusing - there is another video codec
>> with the name xvc as well - see https://xvc.io.
>>
> Yes, it's been dead for 4 years :). https://github.com/divideon/xvc
> Not sure if the new codec will be named as xvc or not.
> h2656 is safer but ugly

How about h26x? h264 also shares some code with hevc and vvc, like the 
NALu splitting code.
Nuo Mi Jan. 5, 2024, 12:02 a.m. UTC | #11
On Thu, Jan 4, 2024 at 10:53 PM James Almer <jamrial@gmail.com> wrote:

> On 1/4/2024 10:57 AM, Nuo Mi wrote:
> > On Thu, Jan 4, 2024 at 8:07 PM Martin Storsjö <martin@martin.st> wrote:
> >
> >> On Thu, 4 Jan 2024, Nuo Mi wrote:
> >>
> >>> On Thu, Jan 4, 2024 at 2:38 AM Michael Niedermayer <
> >> michael@niedermayer.cc>
> >>> wrote:
> >>>
> >>>> On Tue, Jan 02, 2024 at 05:47:53PM +0800, Nuo Mi wrote:
> >>>> [...]
> >>>>> maintainer, and any HEVC code change requires a very long review
> time.
> >>>>
> >>>> I think changes that just move code around or rename functions would
> be
> >>>> quickly reviewed
> >>>> I dont know if things are 1:1 identical or some complex changes are
> >> needed
> >>>>
> >>>> I think if things are 1:1 identical it can be done now otherwise it
> may
> >> be
> >>>> better to do after the initial merge
> >>>>
> >>> Agree,
> >>>
> >>> Taking
> https://patchwork.ffmpeg.org/project/ffmpeg/list/?series=10404
> >> as
> >>> an example,
> >>> The following items are worth discussing, though they won't impact the
> >>> functionality:
> >>> 1. Can we use "XVC" as a common name for HEVC and VVC, or should we use
> >>> "h2656" instead of "XVC"?
> >>
> >> The name "xvc" is potentially confusing - there is another video codec
> >> with the name xvc as well - see https://xvc.io.
> >>
> > Yes, it's been dead for 4 years :). https://github.com/divideon/xvc
> > Not sure if the new codec will be named as xvc or not.
> > h2656 is safer but ugly
>
> How about h26x? h264 also shares some code with hevc and vvc, like the
> NALu splitting code.
>
Good idea!
I will place them in "h26x/" and rename them to "h26x_sao_template.c" and
"h26x_deblock_template.c". and send patches later.
Thank you for the suggestion.

> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
Martin Storsjö Jan. 5, 2024, 11:55 a.m. UTC | #12
On Thu, 4 Jan 2024, James Almer wrote:

> On 1/4/2024 10:57 AM, Nuo Mi wrote:
>> On Thu, Jan 4, 2024 at 8:07 PM Martin Storsjö <martin@martin.st> wrote:
>> 
>>> On Thu, 4 Jan 2024, Nuo Mi wrote:
>>>
>>>> On Thu, Jan 4, 2024 at 2:38 AM Michael Niedermayer <
>>> michael@niedermayer.cc>
>>>> wrote:
>>>>
>>>>> On Tue, Jan 02, 2024 at 05:47:53PM +0800, Nuo Mi wrote:
>>>>> [...]
>>>>>> maintainer, and any HEVC code change requires a very long review time.
>>>>>
>>>>> I think changes that just move code around or rename functions would be
>>>>> quickly reviewed
>>>>> I dont know if things are 1:1 identical or some complex changes are
>>> needed
>>>>>
>>>>> I think if things are 1:1 identical it can be done now otherwise it may
>>> be
>>>>> better to do after the initial merge
>>>>>
>>>> Agree,
>>>>
>>>> Taking   https://patchwork.ffmpeg.org/project/ffmpeg/list/?series=10404
>>> as
>>>> an example,
>>>> The following items are worth discussing, though they won't impact the
>>>> functionality:
>>>> 1. Can we use "XVC" as a common name for HEVC and VVC, or should we use
>>>> "h2656" instead of "XVC"?
>>>
>>> The name "xvc" is potentially confusing - there is another video codec
>>> with the name xvc as well - see https://xvc.io.
>>>
>> Yes, it's been dead for 4 years :). https://github.com/divideon/xvc
>> Not sure if the new codec will be named as xvc or not.
>> h2656 is safer but ugly
>
> How about h26x? h264 also shares some code with hevc and vvc, like the 
> NALu splitting code.

h264x looks clearer, not that ugly, and less ambiguous to me, so that 
sounds reasonable to me.

// Martin
Michael Niedermayer Jan. 5, 2024, 8:47 p.m. UTC | #13
On Fri, Jan 05, 2024 at 01:55:08PM +0200, Martin Storsjö wrote:
> On Thu, 4 Jan 2024, James Almer wrote:
> 
> > On 1/4/2024 10:57 AM, Nuo Mi wrote:
> > > On Thu, Jan 4, 2024 at 8:07 PM Martin Storsjö <martin@martin.st> wrote:
> > > 
> > > > On Thu, 4 Jan 2024, Nuo Mi wrote:
> > > > 
> > > > > On Thu, Jan 4, 2024 at 2:38 AM Michael Niedermayer <
> > > > michael@niedermayer.cc>
> > > > > wrote:
> > > > > 
> > > > > > On Tue, Jan 02, 2024 at 05:47:53PM +0800, Nuo Mi wrote:
> > > > > > [...]
> > > > > > > maintainer, and any HEVC code change requires a very long review time.
> > > > > > 
> > > > > > I think changes that just move code around or rename functions would be
> > > > > > quickly reviewed
> > > > > > I dont know if things are 1:1 identical or some complex changes are
> > > > needed
> > > > > > 
> > > > > > I think if things are 1:1 identical it can be done now otherwise it may
> > > > be
> > > > > > better to do after the initial merge
> > > > > > 
> > > > > Agree,
> > > > > 
> > > > > Taking   https://patchwork.ffmpeg.org/project/ffmpeg/list/?series=10404
> > > > as
> > > > > an example,
> > > > > The following items are worth discussing, though they won't impact the
> > > > > functionality:
> > > > > 1. Can we use "XVC" as a common name for HEVC and VVC, or should we use
> > > > > "h2656" instead of "XVC"?
> > > > 
> > > > The name "xvc" is potentially confusing - there is another video codec
> > > > with the name xvc as well - see https://xvc.io.
> > > > 
> > > Yes, it's been dead for 4 years :). https://github.com/divideon/xvc
> > > Not sure if the new codec will be named as xvc or not.
> > > h2656 is safer but ugly
> > 
> > How about h26x? h264 also shares some code with hevc and vvc, like the
> > NALu splitting code.
> 
> h264x looks clearer, not that ugly, and less ambiguous to me, so that sounds
> reasonable to me.

i agree
there h261, h262/mpeg2, h263/mpeg4
it would be better for clarity if these groups can be distinguished from their
names or location

thx

[...]
diff mbox series

Patch

diff --git a/libavcodec/vvc/Makefile b/libavcodec/vvc/Makefile
index 35bb565680..57e2a2af75 100644
--- a/libavcodec/vvc/Makefile
+++ b/libavcodec/vvc/Makefile
@@ -4,6 +4,7 @@  clean::
 OBJS-$(CONFIG_VVC_DECODER)          +=  vvc/vvc_cabac.o         \
                                         vvc/vvc_ctu.o           \
                                         vvc/vvc_data.o          \
+                                        vvc/vvc_filter.o        \
                                         vvc/vvc_inter.o         \
                                         vvc/vvc_intra.o         \
                                         vvc/vvc_itx_1d.o        \
diff --git a/libavcodec/vvc/vvc_ctu.h b/libavcodec/vvc/vvc_ctu.h
index 47c9f181bb..f3b1b4b7e3 100644
--- a/libavcodec/vvc/vvc_ctu.h
+++ b/libavcodec/vvc/vvc_ctu.h
@@ -463,6 +463,7 @@  typedef struct ALFParams {
 void ff_vvc_set_neighbour_available(VVCLocalContext *lc, int x0, int y0, int w, int h);
 void ff_vvc_decode_neighbour(VVCLocalContext *lc, int x_ctb, int y_ctb, int rx, int ry, int rs);
 void ff_vvc_ctu_free_cus(CTU *ctu);
+int ff_vvc_get_qPy(const VVCFrameContext *fc, int xc, int yc);
 void ff_vvc_ep_init_stat_coeff(EntryPoint *ep, int bit_depth, int persistent_rice_adaptation_enabled_flag);
 
 #endif // AVCODEC_VVC_VVC_CTU_H
diff --git a/libavcodec/vvc/vvc_filter.c b/libavcodec/vvc/vvc_filter.c
new file mode 100644
index 0000000000..e5cd89b8a3
--- /dev/null
+++ b/libavcodec/vvc/vvc_filter.c
@@ -0,0 +1,1332 @@ 
+/*
+ * VVC filters
+ *
+ * Copyright (C) 2021 Nuo Mi
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#include "libavutil/frame.h"
+
+#include "vvc_ctu.h"
+#include "vvc_data.h"
+#include "vvc_filter.h"
+#include "vvc_refs.h"
+
+#define LEFT        0
+#define TOP         1
+#define RIGHT       2
+#define BOTTOM      3
+#define MAX_EDGES   4
+
+#define DEFAULT_INTRA_TC_OFFSET 2
+
+//Table 43 Derivation of threshold variables beta' and tc' from input Q
+static const uint16_t tctable[66] = {
+      0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+      0,   0,   3,   4,   4,   4,   4,   5,   5,   5,   5,   7,   7,   8,   9,  10,
+     10,  11,  13,  14,  15,  17,  19,  21,  24,  25,  29,  33,  36,  41,  45,  51,
+     57,  64,  71,  80,  89, 100, 112, 125, 141, 157, 177, 198, 222, 250, 280, 314,
+    352, 395,
+};
+
+//Table 43 Derivation of threshold variables beta' and tc' from input Q
+static const uint8_t betatable[64] = {
+      0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
+      6,   7,   8,   9,  10,  11,  12,  13,  14,  15,  16,  17,  18,  20,  22,  24,
+     26,  28,  30,  32,  34,  36,  38,  40,  42,  44,  46,  48,  50,  52,  54,  56,
+     58,  60,  62,  64,  66,  68,  70,  72,  74,  76,  78,  80,  82,  84,  86,  88,
+};
+
+static int get_qPc(const VVCFrameContext *fc, const int x0, const int y0, const int chroma)
+{
+    const int x            = x0 >> MIN_TU_LOG2;
+    const int y            = y0 >> MIN_TU_LOG2;
+    const int min_tu_width = fc->ps.pps->min_tu_width;
+    return fc->tab.qp[chroma][x + y * min_tu_width];
+}
+
+static void copy_ctb(uint8_t *dst, const uint8_t *src, const int width, const int height,
+    const ptrdiff_t dst_stride, const ptrdiff_t src_stride)
+{
+    for (int y = 0; y < height; y++) {
+        memcpy(dst, src, width);
+
+        dst += dst_stride;
+        src += src_stride;
+    }
+}
+
+static void copy_pixel(uint8_t *dst, const uint8_t *src, const int pixel_shift)
+{
+    if (pixel_shift)
+        *(uint16_t *)dst = *(uint16_t *)src;
+    else
+        *dst = *src;
+}
+
+static void copy_vert(uint8_t *dst, const uint8_t *src, const int pixel_shift, const int height,
+    const ptrdiff_t dst_stride, const ptrdiff_t src_stride)
+{
+    int i;
+    if (pixel_shift == 0) {
+        for (i = 0; i < height; i++) {
+            *dst = *src;
+            dst += dst_stride;
+            src += src_stride;
+        }
+    } else {
+        for (i = 0; i < height; i++) {
+            *(uint16_t *)dst = *(uint16_t *)src;
+            dst += dst_stride;
+            src += src_stride;
+        }
+    }
+}
+
+static void copy_ctb_to_hv(VVCFrameContext *fc, const uint8_t *src,
+    const ptrdiff_t src_stride, const int x, const int y, const int width, const int height,
+    const int c_idx, const int x_ctb, const int y_ctb, const int top)
+{
+    const int ps = fc->ps.sps->pixel_shift;
+    const int w  = fc->ps.sps->width >> fc->ps.sps->hshift[c_idx];
+    const int h  = fc->ps.sps->height >> fc->ps.sps->vshift[c_idx];
+
+    if (top) {
+        /* top */
+        memcpy(fc->tab.sao_pixel_buffer_h[c_idx] + (((2 * y_ctb) * w + x) << ps),
+            src, width << ps);
+    } else {
+        /* bottom */
+        memcpy(fc->tab.sao_pixel_buffer_h[c_idx] + (((2 * y_ctb + 1) * w + x) << ps),
+            src + src_stride * (height - 1), width << ps);
+
+        /* copy vertical edges */
+        copy_vert(fc->tab.sao_pixel_buffer_v[c_idx] + (((2 * x_ctb) * h + y) << ps), src, ps, height, 1 << ps, src_stride);
+        copy_vert(fc->tab.sao_pixel_buffer_v[c_idx] + (((2 * x_ctb + 1) * h + y) << ps), src + ((width - 1) << ps), ps, height, 1 << ps, src_stride);
+    }
+}
+
+static void sao_copy_ctb_to_hv(VVCLocalContext *lc, const int rx, const int ry, const int top)
+{
+    VVCFrameContext *fc  = lc->fc;
+    const int ctb_size_y = fc->ps.sps->ctb_size_y;
+    const int x0         = rx << fc->ps.sps->ctb_log2_size_y;
+    const int y0         = ry << fc->ps.sps->ctb_log2_size_y;
+
+    for (int c_idx = 0; c_idx < (fc->ps.sps->r->sps_chroma_format_idc ? 3 : 1); c_idx++) {
+        const int x                = x0 >> fc->ps.sps->hshift[c_idx];
+        const int y                = y0 >> fc->ps.sps->vshift[c_idx];
+        const ptrdiff_t src_stride = fc->frame->linesize[c_idx];
+        const int ctb_size_h       = ctb_size_y >> fc->ps.sps->hshift[c_idx];
+        const int ctb_size_v       = ctb_size_y >> fc->ps.sps->vshift[c_idx];
+        const int width            = FFMIN(ctb_size_h, (fc->ps.sps->width  >> fc->ps.sps->hshift[c_idx]) - x);
+        const int height           = FFMIN(ctb_size_v, (fc->ps.sps->height >> fc->ps.sps->vshift[c_idx]) - y);
+        const uint8_t *src          = &fc->frame->data[c_idx][y * src_stride + (x << fc->ps.sps->pixel_shift)];
+        copy_ctb_to_hv(fc, src, src_stride, x, y, width, height, c_idx, rx, ry, top);
+    }
+}
+
+void ff_vvc_sao_copy_ctb_to_hv(VVCLocalContext *lc, const int rx, const int ry, const int last_row)
+{
+    if (ry)
+        sao_copy_ctb_to_hv(lc, rx, ry - 1, 0);
+
+    sao_copy_ctb_to_hv(lc, rx, ry, 1);
+
+    if (last_row)
+        sao_copy_ctb_to_hv(lc, rx, ry, 0);
+}
+
+void ff_vvc_sao_filter(VVCLocalContext *lc, int x, int y)
+{
+    VVCFrameContext *fc  = lc->fc;
+    const int ctb_size_y = fc->ps.sps->ctb_size_y;
+    static const uint8_t sao_tab[16] = { 0, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8 };
+    int c_idx;
+    int edges[4];  // 0 left 1 top 2 right 3 bottom
+    const int x_ctb      = x >> fc->ps.sps->ctb_log2_size_y;
+    const int y_ctb      = y >> fc->ps.sps->ctb_log2_size_y;
+    const SAOParams *sao = &CTB(fc->tab.sao, x_ctb, y_ctb);
+    // flags indicating unfilterable edges
+    uint8_t vert_edge[]          = { 0, 0 };
+    uint8_t horiz_edge[]         = { 0, 0 };
+    uint8_t diag_edge[]          = { 0, 0, 0, 0 };
+    const uint8_t lfase          = fc->ps.pps->r->pps_loop_filter_across_slices_enabled_flag;
+    const uint8_t no_tile_filter = fc->ps.pps->r->num_tiles_in_pic > 1 &&
+                               !fc->ps.pps->r->pps_loop_filter_across_tiles_enabled_flag;
+    const uint8_t restore        = no_tile_filter || !lfase;
+    uint8_t left_tile_edge   = 0;
+    uint8_t right_tile_edge  = 0;
+    uint8_t up_tile_edge     = 0;
+    uint8_t bottom_tile_edge = 0;
+
+    edges[LEFT]   = x_ctb == 0;
+    edges[TOP]    = y_ctb == 0;
+    edges[RIGHT]  = x_ctb == fc->ps.pps->ctb_width  - 1;
+    edges[BOTTOM] = y_ctb == fc->ps.pps->ctb_height - 1;
+
+    if (restore) {
+        if (!edges[LEFT]) {
+            left_tile_edge  = no_tile_filter && fc->ps.pps->ctb_to_col_bd[x_ctb] == x_ctb;
+            vert_edge[0]    = (!lfase && CTB(fc->tab.slice_idx, x_ctb, y_ctb) != CTB(fc->tab.slice_idx, x_ctb - 1, y_ctb)) || left_tile_edge;
+        }
+        if (!edges[RIGHT]) {
+            right_tile_edge = no_tile_filter && fc->ps.pps->ctb_to_col_bd[x_ctb] != fc->ps.pps->ctb_to_col_bd[x_ctb + 1];
+            vert_edge[1]    = (!lfase && CTB(fc->tab.slice_idx, x_ctb, y_ctb) != CTB(fc->tab.slice_idx, x_ctb + 1, y_ctb)) || right_tile_edge;
+        }
+        if (!edges[TOP]) {
+            up_tile_edge     = no_tile_filter && fc->ps.pps->ctb_to_row_bd[y_ctb] == y_ctb;
+            horiz_edge[0]    = (!lfase && CTB(fc->tab.slice_idx, x_ctb, y_ctb) != CTB(fc->tab.slice_idx, x_ctb, y_ctb - 1)) || up_tile_edge;
+        }
+        if (!edges[BOTTOM]) {
+            bottom_tile_edge = no_tile_filter && fc->ps.pps->ctb_to_row_bd[y_ctb] != fc->ps.pps->ctb_to_row_bd[y_ctb + 1];
+            horiz_edge[1]    = (!lfase && CTB(fc->tab.slice_idx, x_ctb, y_ctb) != CTB(fc->tab.slice_idx, x_ctb, y_ctb + 1)) || bottom_tile_edge;
+        }
+        if (!edges[LEFT] && !edges[TOP]) {
+            diag_edge[0] = (!lfase && CTB(fc->tab.slice_idx, x_ctb, y_ctb) != CTB(fc->tab.slice_idx, x_ctb - 1, y_ctb - 1)) || left_tile_edge || up_tile_edge;
+        }
+        if (!edges[TOP] && !edges[RIGHT]) {
+            diag_edge[1] = (!lfase && CTB(fc->tab.slice_idx, x_ctb, y_ctb) != CTB(fc->tab.slice_idx, x_ctb + 1, y_ctb - 1)) || right_tile_edge || up_tile_edge;
+        }
+        if (!edges[RIGHT] && !edges[BOTTOM]) {
+            diag_edge[2] = (!lfase && CTB(fc->tab.slice_idx, x_ctb, y_ctb) != CTB(fc->tab.slice_idx, x_ctb + 1, y_ctb + 1)) || right_tile_edge || bottom_tile_edge;
+        }
+        if (!edges[LEFT] && !edges[BOTTOM]) {
+            diag_edge[3] = (!lfase && CTB(fc->tab.slice_idx, x_ctb, y_ctb) != CTB(fc->tab.slice_idx, x_ctb - 1, y_ctb + 1)) || left_tile_edge || bottom_tile_edge;
+        }
+    }
+
+    for (c_idx = 0; c_idx < (fc->ps.sps->r->sps_chroma_format_idc ? 3 : 1); c_idx++) {
+        int x0       = x >> fc->ps.sps->hshift[c_idx];
+        int y0       = y >> fc->ps.sps->vshift[c_idx];
+        ptrdiff_t src_stride = fc->frame->linesize[c_idx];
+        int ctb_size_h = ctb_size_y >> fc->ps.sps->hshift[c_idx];
+        int ctb_size_v = ctb_size_y >> fc->ps.sps->vshift[c_idx];
+        int width    = FFMIN(ctb_size_h, (fc->ps.sps->width  >> fc->ps.sps->hshift[c_idx]) - x0);
+        int height   = FFMIN(ctb_size_v, (fc->ps.sps->height >> fc->ps.sps->vshift[c_idx]) - y0);
+        int tab      = sao_tab[(FFALIGN(width, 8) >> 3) - 1];
+        uint8_t *src = &fc->frame->data[c_idx][y0 * src_stride + (x0 << fc->ps.sps->pixel_shift)];
+        ptrdiff_t dst_stride;
+        uint8_t *dst;
+
+        switch (sao->type_idx[c_idx]) {
+        case SAO_BAND:
+            fc->vvcdsp.sao.band_filter[tab](src, src, src_stride, src_stride,
+                sao->offset_val[c_idx], sao->band_position[c_idx], width, height);
+            break;
+        case SAO_EDGE:
+        {
+            const int w = fc->ps.sps->width >> fc->ps.sps->hshift[c_idx];
+            const int h = fc->ps.sps->height >> fc->ps.sps->vshift[c_idx];
+            const int sh = fc->ps.sps->pixel_shift;
+
+            dst_stride = 2*MAX_PB_SIZE + AV_INPUT_BUFFER_PADDING_SIZE;
+            dst = lc->sao_buffer + dst_stride + AV_INPUT_BUFFER_PADDING_SIZE;
+
+            if (!edges[TOP]) {
+                const int left = 1 - edges[LEFT];
+                const int right = 1 - edges[RIGHT];
+                const uint8_t *src1;
+                uint8_t *dst1;
+                int pos = 0;
+
+                dst1 = dst - dst_stride - (left << sh);
+                src1 = fc->tab.sao_pixel_buffer_h[c_idx] + (((2 * y_ctb - 1) * w + x0 - left) << sh);
+                if (left) {
+                    copy_pixel(dst1, src1, sh);
+                    pos += (1 << sh);
+                }
+                memcpy(dst1 + pos, src1 + pos, width << sh);
+                if (right) {
+                    pos += width << sh;
+                    copy_pixel(dst1 + pos, src1 + pos, sh);
+                }
+            }
+            if (!edges[BOTTOM]) {
+                const int left = 1 - edges[LEFT];
+                const int right = 1 - edges[RIGHT];
+                const uint8_t *src1;
+                uint8_t *dst1;
+                int pos = 0;
+
+                dst1 = dst + height * dst_stride - (left << sh);
+                src1 = fc->tab.sao_pixel_buffer_h[c_idx] + (((2 * y_ctb + 2) * w + x0 - left) << sh);
+                if (left) {
+                    copy_pixel(dst1, src1, sh);
+                    pos += (1 << sh);
+                }
+                memcpy(dst1 + pos, src1 + pos, width << sh);
+                if (right) {
+                    pos += width << sh;
+                    copy_pixel(dst1 + pos, src1 + pos, sh);
+                }
+            }
+            if (!edges[LEFT]) {
+                copy_vert(dst - (1 << sh),
+                    fc->tab.sao_pixel_buffer_v[c_idx] + (((2 * x_ctb - 1) * h + y0) << sh),
+                    sh, height, dst_stride, 1 << sh);
+            }
+            if (!edges[RIGHT]) {
+                copy_vert(dst + (width << sh),
+                    fc->tab.sao_pixel_buffer_v[c_idx] + (((2 * x_ctb + 2) * h + y0) << sh),
+                    sh, height, dst_stride, 1 << sh);
+            }
+
+            copy_ctb(dst, src,  width << sh, height, dst_stride, src_stride);
+            fc->vvcdsp.sao.edge_filter[tab](src, dst, src_stride, sao->offset_val[c_idx],
+                sao->eo_class[c_idx], width, height);
+            fc->vvcdsp.sao.edge_restore[restore](src, dst, src_stride, dst_stride,
+                sao, edges, width, height, c_idx, vert_edge, horiz_edge, diag_edge);
+            break;
+        }
+        }
+    }
+}
+
+#define TAB_BS(t, x, y)       (t)[((y) >> 2) * (fc->tab.sz.bs_width) + ((x) >> 2)]
+#define TAB_MAX_LEN(t, x, y)  (t)[((y) >> 2) * (fc->tab.sz.bs_width) + ((x) >> 2)]
+
+//8 samples a time
+#define DEBLOCK_STEP            8
+#define LUMA_GRID               4
+#define CHROMA_GRID             8
+
+static int boundary_strength(const VVCLocalContext *lc, const MvField *curr, const MvField *neigh,
+    const RefPicList *neigh_rpl)
+{
+    RefPicList *rpl = lc->sc->rpl;
+    if (curr->pred_flag == PF_BI &&  neigh->pred_flag == PF_BI) {
+        // same L0 and L1
+        if (rpl[0].list[curr->ref_idx[0]] == neigh_rpl[0].list[neigh->ref_idx[0]]  &&
+            rpl[0].list[curr->ref_idx[0]] == rpl[1].list[curr->ref_idx[1]] &&
+            neigh_rpl[0].list[neigh->ref_idx[0]] == neigh_rpl[1].list[neigh->ref_idx[1]]) {
+            if ((FFABS(neigh->mv[0].x - curr->mv[0].x) >= 8 || FFABS(neigh->mv[0].y - curr->mv[0].y) >= 8 ||
+                 FFABS(neigh->mv[1].x - curr->mv[1].x) >= 8 || FFABS(neigh->mv[1].y - curr->mv[1].y) >= 8) &&
+                (FFABS(neigh->mv[1].x - curr->mv[0].x) >= 8 || FFABS(neigh->mv[1].y - curr->mv[0].y) >= 8 ||
+                 FFABS(neigh->mv[0].x - curr->mv[1].x) >= 8 || FFABS(neigh->mv[0].y - curr->mv[1].y) >= 8))
+                return 1;
+            else
+                return 0;
+        } else if (neigh_rpl[0].list[neigh->ref_idx[0]] == rpl[0].list[curr->ref_idx[0]] &&
+                   neigh_rpl[1].list[neigh->ref_idx[1]] == rpl[1].list[curr->ref_idx[1]]) {
+            if (FFABS(neigh->mv[0].x - curr->mv[0].x) >= 8 || FFABS(neigh->mv[0].y - curr->mv[0].y) >= 8 ||
+                FFABS(neigh->mv[1].x - curr->mv[1].x) >= 8 || FFABS(neigh->mv[1].y - curr->mv[1].y) >= 8)
+                return 1;
+            else
+                return 0;
+        } else if (neigh_rpl[1].list[neigh->ref_idx[1]] == rpl[0].list[curr->ref_idx[0]] &&
+                   neigh_rpl[0].list[neigh->ref_idx[0]] == rpl[1].list[curr->ref_idx[1]]) {
+            if (FFABS(neigh->mv[1].x - curr->mv[0].x) >= 8 || FFABS(neigh->mv[1].y - curr->mv[0].y) >= 8 ||
+                FFABS(neigh->mv[0].x - curr->mv[1].x) >= 8 || FFABS(neigh->mv[0].y - curr->mv[1].y) >= 8)
+                return 1;
+            else
+                return 0;
+        } else {
+            return 1;
+        }
+    } else if ((curr->pred_flag != PF_BI) && (neigh->pred_flag != PF_BI)){ // 1 MV
+        Mv A, B;
+        int ref_A, ref_B;
+
+        if (curr->pred_flag & 1) {
+            A     = curr->mv[0];
+            ref_A = rpl[0].list[curr->ref_idx[0]];
+        } else {
+            A     = curr->mv[1];
+            ref_A = rpl[1].list[curr->ref_idx[1]];
+        }
+
+        if (neigh->pred_flag & 1) {
+            B     = neigh->mv[0];
+            ref_B = neigh_rpl[0].list[neigh->ref_idx[0]];
+        } else {
+            B     = neigh->mv[1];
+            ref_B = neigh_rpl[1].list[neigh->ref_idx[1]];
+        }
+
+        if (ref_A == ref_B) {
+            if (FFABS(A.x - B.x) >= 8 || FFABS(A.y - B.y) >= 8)
+                return 1;
+            else
+                return 0;
+        } else
+            return 1;
+    }
+
+    return 1;
+}
+
+//part of 8.8.3.3 Derivation process of transform block boundary
+static void derive_max_filter_length_luma(const VVCFrameContext *fc, const int qx, const int qy,
+                                          const int is_intra, const int has_subblock, const int vertical, uint8_t *max_len_p, uint8_t *max_len_q)
+{
+    const int px =  vertical ? qx - 1 : qx;
+    const int py = !vertical ? qy - 1 : qy;
+    const uint8_t *tb_size = vertical ? fc->tab.tb_width[LUMA] : fc->tab.tb_height[LUMA];
+    const int size_p = tb_size[(py >> MIN_TU_LOG2) * fc->ps.pps->min_tu_width + (px >> MIN_TU_LOG2)];
+    const int size_q = tb_size[(qy >> MIN_TU_LOG2) * fc->ps.pps->min_tu_width + (qx >> MIN_TU_LOG2)];
+    const int min_cb_log2 = fc->ps.sps->min_cb_log2_size_y;
+    const int off_p = (py >> min_cb_log2) * fc->ps.pps->min_cb_width + (px >> min_cb_log2);
+    if (size_p <= 4 || size_q <= 4) {
+        *max_len_p = *max_len_q = 1;
+    } else {
+        *max_len_p = *max_len_q = 3;
+        if (size_p >= 32)
+            *max_len_p = 7;
+        if (size_q >= 32)
+            *max_len_q = 7;
+    }
+    if (has_subblock)
+        *max_len_q = FFMIN(5, *max_len_q);
+    if (fc->tab.msf[off_p] || fc->tab.iaf[off_p])
+        *max_len_p = FFMIN(5, *max_len_p);
+}
+
+static void vvc_deblock_subblock_bs_vertical(const VVCLocalContext *lc,
+    const int cb_x, const int cb_y, const int x0, const int y0, const int width, const int height)
+{
+    const VVCFrameContext  *fc = lc->fc;
+    const MvField *tab_mvf     = fc->tab.mvf;
+    const RefPicList *rpl      = lc->sc->rpl;
+    const int min_pu_width     = fc->ps.pps->min_pu_width;
+    const int log2_min_pu_size = MIN_PU_LOG2;
+
+    // bs for TU internal vertical PU boundaries
+    for (int j = 0; j < height; j += 4) {
+        const int y_pu = (y0 + j) >> log2_min_pu_size;
+
+        for (int i = 8 - ((x0 - cb_x) % 8); i < width; i += 8) {
+            const int xp_pu = (x0 + i - 1) >> log2_min_pu_size;
+            const int xq_pu = (x0 + i)     >> log2_min_pu_size;
+            const MvField *left = &tab_mvf[y_pu * min_pu_width + xp_pu];
+            const MvField *curr = &tab_mvf[y_pu * min_pu_width + xq_pu];
+            const int x = x0 + i;
+            const int y = y0 + j;
+            const int bs = boundary_strength(lc, curr, left, rpl);
+            uint8_t max_len_p = 0, max_len_q = 0;
+
+            TAB_BS(fc->tab.vertical_bs[LUMA], x, y) = bs;
+
+            if (i == 4 || i == width - 4)
+                max_len_p = max_len_q = 1;
+            else if (i == 8 || i == width - 8)
+                max_len_p = max_len_q = 2;
+            else
+                max_len_p = max_len_q = 3;
+
+            TAB_MAX_LEN(fc->tab.vertical_p, x, y) = max_len_p;
+            TAB_MAX_LEN(fc->tab.vertical_q, x, y) = max_len_q;
+        }
+    }
+}
+
+static void vvc_deblock_subblock_bs_horizontal(const VVCLocalContext *lc,
+    const int cb_x, const int cb_y, const int x0, const int y0, const int width, const int height)
+{
+    const VVCFrameContext  *fc = lc->fc;
+    const MvField* tab_mvf     = fc->tab.mvf;
+    const RefPicList* rpl      = lc->sc->rpl;
+    const int min_pu_width     = fc->ps.pps->min_pu_width;
+    const int log2_min_pu_size = MIN_PU_LOG2;
+
+    // bs for TU internal horizontal PU boundaries
+    for (int j = 8 - ((y0 - cb_y) % 8); j < height; j += 8) {
+        int yp_pu = (y0 + j - 1) >> log2_min_pu_size;
+        int yq_pu = (y0 + j)     >> log2_min_pu_size;
+
+        for (int i = 0; i < width; i += 4) {
+            const int x_pu = (x0 + i) >> log2_min_pu_size;
+            const MvField *top  = &tab_mvf[yp_pu * min_pu_width + x_pu];
+            const MvField *curr = &tab_mvf[yq_pu * min_pu_width + x_pu];
+            const int x = x0 + i;
+            const int y = y0 + j;
+            const int bs = boundary_strength(lc, curr, top, rpl);
+            uint8_t max_len_p = 0, max_len_q = 0;
+
+            TAB_BS(fc->tab.horizontal_bs[LUMA], x, y) = bs;
+
+            //fixme:
+            //edgeTbFlags[ x − sbW ][ y ] is equal to 1
+            //edgeTbFlags[ x + sbW ][ y ] is equal to 1
+            if (j == 4 || j == height - 4)
+                max_len_p = max_len_q = 1;
+            else if (j == 8 || j == height - 8)
+                max_len_p = max_len_q = 2;
+            else
+                max_len_p = max_len_q = 3;
+            TAB_MAX_LEN(fc->tab.horizontal_p, x, y) = max_len_p;
+            TAB_MAX_LEN(fc->tab.horizontal_q, x, y) = max_len_q;
+        }
+    }
+}
+
+static void vvc_deblock_bs_luma_vertical(const VVCLocalContext *lc,
+    const int x0, const int y0, const int width, const int height)
+{
+    const VVCFrameContext *fc  = lc->fc;
+    const MvField *tab_mvf     = fc->tab.mvf;
+    const int log2_min_pu_size = MIN_PU_LOG2;
+    const int log2_min_tu_size = MIN_TU_LOG2;
+    const int min_pu_width     = fc->ps.pps->min_pu_width;
+    const int min_tu_width     = fc->ps.pps->min_tu_width;
+    const int min_cb_log2      = fc->ps.sps->min_cb_log2_size_y;
+    const int min_cb_width     = fc->ps.pps->min_cb_width;
+    const int is_intra         = tab_mvf[(y0 >> log2_min_pu_size) * min_pu_width +
+        (x0 >> log2_min_pu_size)].pred_flag == PF_INTRA;
+    int boundary_left;
+    int has_vertical_sb = 0;
+
+    const int off_q            = (y0 >> min_cb_log2) * min_cb_width + (x0 >> min_cb_log2);
+    const int cb_x             = fc->tab.cb_pos_x[LUMA][off_q];
+    const int cb_y             = fc->tab.cb_pos_y[LUMA][off_q];
+    const int cb_width         = fc->tab.cb_width[LUMA][off_q];
+
+    if (!is_intra) {
+        if (fc->tab.msf[off_q] || fc->tab.iaf[off_q])
+            has_vertical_sb = cb_width  > 8;
+    }
+
+    // bs for vertical TU boundaries
+    boundary_left = x0 > 0 && !(x0 & 3);
+    if (boundary_left &&
+        ((!fc->ps.pps->r->pps_loop_filter_across_slices_enabled_flag &&
+            lc->boundary_flags & BOUNDARY_LEFT_SLICE &&
+            (x0 % (1 << fc->ps.sps->ctb_log2_size_y)) == 0) ||
+            (!fc->ps.pps->r->pps_loop_filter_across_tiles_enabled_flag &&
+            lc->boundary_flags & BOUNDARY_LEFT_TILE &&
+            (x0 % (1 << fc->ps.sps->ctb_log2_size_y)) == 0)))
+        boundary_left = 0;
+
+    if (boundary_left) {
+        const RefPicList *rpl_left =
+            (lc->boundary_flags & BOUNDARY_LEFT_SLICE) ? ff_vvc_get_ref_list(fc, fc->ref, x0 - 1, y0) : lc->sc->rpl;
+        const int xp_pu = (x0 - 1) >> log2_min_pu_size;
+        const int xq_pu =  x0      >> log2_min_pu_size;
+        const int xp_tu = (x0 - 1) >> log2_min_tu_size;
+        const int xq_tu =  x0      >> log2_min_tu_size;
+
+        for (int i = 0; i < height; i += 4) {
+            const int off_x = cb_x - x0;
+            const int y_pu  = (y0 + i) >> log2_min_pu_size;
+            const int y_tu  = (y0 + i) >> log2_min_tu_size;
+            const MvField *left = &tab_mvf[y_pu * min_pu_width + xp_pu];
+            const MvField *curr = &tab_mvf[y_pu * min_pu_width + xq_pu];
+            const uint8_t left_cbf_luma = fc->tab.tu_coded_flag[LUMA][y_tu * min_tu_width + xp_tu];
+            const uint8_t curr_cbf_luma = fc->tab.tu_coded_flag[LUMA][y_tu * min_tu_width + xq_tu];
+            const uint8_t pcmf          = fc->tab.pcmf[LUMA][y_tu * min_tu_width + xp_tu] &&
+                fc->tab.pcmf[LUMA][y_tu * min_tu_width + xq_tu];
+            uint8_t max_len_p, max_len_q;
+            int bs;
+
+            if (pcmf)
+                bs = 0;
+            else if (curr->pred_flag == PF_INTRA || left->pred_flag == PF_INTRA || curr->ciip_flag || left->ciip_flag)
+                bs = 2;
+            else if (curr_cbf_luma || left_cbf_luma)
+                bs = 1;
+            else if (off_x && ((off_x % 8) || !has_vertical_sb))
+                bs = 0;                                     ////inside a cu, not aligned to 8 or with no subblocks
+            else
+                bs = boundary_strength(lc, curr, left, rpl_left);
+
+            TAB_BS(fc->tab.vertical_bs[LUMA], x0, (y0 + i)) = bs;
+
+            derive_max_filter_length_luma(fc, x0, y0 + i, is_intra, has_vertical_sb, 1, &max_len_p, &max_len_q);
+            TAB_MAX_LEN(fc->tab.vertical_p, x0, y0 + i) = max_len_p;
+            TAB_MAX_LEN(fc->tab.vertical_q, x0, y0 + i) = max_len_q;
+        }
+    }
+
+    if (!is_intra) {
+        if (fc->tab.msf[off_q] || fc->tab.iaf[off_q])
+            vvc_deblock_subblock_bs_vertical(lc, cb_x, cb_y, x0, y0, width, height);
+    }
+}
+
+static void vvc_deblock_bs_luma_horizontal(const VVCLocalContext *lc,
+    const int x0, const int y0, const int width, const int height)
+{
+    const VVCFrameContext *fc  = lc->fc;
+    const MvField *tab_mvf           = fc->tab.mvf;
+    const int log2_min_pu_size = MIN_PU_LOG2;
+    const int log2_min_tu_size = MIN_TU_LOG2;
+    const int min_pu_width     = fc->ps.pps->min_pu_width;
+    const int min_tu_width     = fc->ps.pps->min_tu_width;
+    const int min_cb_log2      = fc->ps.sps->min_cb_log2_size_y;
+    const int min_cb_width     = fc->ps.pps->min_cb_width;
+    const int is_intra = tab_mvf[(y0 >> log2_min_pu_size) * min_pu_width +
+                           (x0 >> log2_min_pu_size)].pred_flag == PF_INTRA;
+    int boundary_upper;
+    int has_horizontal_sb = 0;
+
+    const int off_q            = (y0 >> min_cb_log2) * min_cb_width + (x0 >> min_cb_log2);
+    const int cb_x             = fc->tab.cb_pos_x[LUMA][off_q];
+    const int cb_y             = fc->tab.cb_pos_y[LUMA][off_q];
+    const int cb_height        = fc->tab.cb_height[LUMA][off_q];
+
+    if (!is_intra) {
+        if (fc->tab.msf[off_q] || fc->tab.iaf[off_q])
+            has_horizontal_sb = cb_height > 8;
+    }
+
+    boundary_upper = y0 > 0 && !(y0 & 3);
+    if (boundary_upper &&
+        ((!fc->ps.pps->r->pps_loop_filter_across_slices_enabled_flag &&
+            lc->boundary_flags & BOUNDARY_UPPER_SLICE &&
+            (y0 % (1 << fc->ps.sps->ctb_log2_size_y)) == 0) ||
+            (!fc->ps.pps->r->pps_loop_filter_across_tiles_enabled_flag &&
+            lc->boundary_flags & BOUNDARY_UPPER_TILE &&
+            (y0 % (1 << fc->ps.sps->ctb_log2_size_y)) == 0)))
+        boundary_upper = 0;
+
+    if (boundary_upper) {
+        const RefPicList *rpl_top =
+            (lc->boundary_flags & BOUNDARY_UPPER_SLICE) ? ff_vvc_get_ref_list(fc, fc->ref, x0, y0 - 1) : lc->sc->rpl;
+        int yp_pu = (y0 - 1) >> log2_min_pu_size;
+        int yq_pu =  y0      >> log2_min_pu_size;
+        int yp_tu = (y0 - 1) >> log2_min_tu_size;
+        int yq_tu =  y0      >> log2_min_tu_size;
+
+        for (int i = 0; i < width; i += 4) {
+            const int off_y = y0 - cb_y;
+            const int x_pu  = (x0 + i) >> log2_min_pu_size;
+            const int x_tu  = (x0 + i) >> log2_min_tu_size;
+            const MvField *top  = &tab_mvf[yp_pu * min_pu_width + x_pu];
+            const MvField *curr = &tab_mvf[yq_pu * min_pu_width + x_pu];
+            const uint8_t top_cbf_luma  = fc->tab.tu_coded_flag[LUMA][yp_tu * min_tu_width + x_tu];
+            const uint8_t curr_cbf_luma = fc->tab.tu_coded_flag[LUMA][yq_tu * min_tu_width + x_tu];
+            const uint8_t pcmf          = fc->tab.pcmf[LUMA][yp_tu * min_tu_width + x_tu] &&
+                fc->tab.pcmf[LUMA][yq_tu * min_tu_width + x_tu];
+            uint8_t max_len_p, max_len_q;
+            int bs;
+
+            if (pcmf)
+                bs = 0;
+            else if (curr->pred_flag == PF_INTRA || top->pred_flag == PF_INTRA || curr->ciip_flag || top->ciip_flag)
+                bs = 2;
+            else if (curr_cbf_luma || top_cbf_luma)
+                bs = 1;
+            else if (off_y && ((off_y % 8) || !has_horizontal_sb))
+                bs = 0;                                     //inside a cu, not aligned to 8 or with no subblocks
+            else
+                bs = boundary_strength(lc, curr, top, rpl_top);
+
+            TAB_BS(fc->tab.horizontal_bs[LUMA], x0 + i, y0) = bs;
+
+            derive_max_filter_length_luma(fc, x0 + i, y0, is_intra, has_horizontal_sb, 0, &max_len_p, &max_len_q);
+            TAB_MAX_LEN(fc->tab.horizontal_p, x0 + i, y0) = max_len_p;
+            TAB_MAX_LEN(fc->tab.horizontal_q, x0 + i, y0) = max_len_q;
+        }
+    }
+
+    if (!is_intra) {
+        if (fc->tab.msf[off_q] || fc->tab.iaf[off_q])
+            vvc_deblock_subblock_bs_horizontal(lc, cb_x, cb_y, x0, y0, width, height);
+    }
+}
+
+static void vvc_deblock_bs_chroma_vertical(const VVCLocalContext *lc,
+    const int x0, const int y0, const int width, const int height)
+{
+    const VVCFrameContext *fc  = lc->fc;
+    const MvField *tab_mvf           = fc->tab.mvf;
+    const int log2_min_pu_size = MIN_PU_LOG2;
+    const int log2_min_tu_size = MIN_PU_LOG2;
+    const int min_pu_width     = fc->ps.pps->min_pu_width;
+    const int min_tu_width     = fc->ps.pps->min_tu_width;
+    int boundary_left;
+
+    // bs for vertical TU boundaries
+    boundary_left = x0 > 0 && !(x0 & ((CHROMA_GRID << fc->ps.sps->hshift[1]) - 1));
+    if (boundary_left &&
+        ((!fc->ps.pps->r->pps_loop_filter_across_slices_enabled_flag &&
+          lc->boundary_flags & BOUNDARY_LEFT_SLICE &&
+          (x0 % (1 << fc->ps.sps->ctb_log2_size_y)) == 0) ||
+         (!fc->ps.pps->r->pps_loop_filter_across_tiles_enabled_flag &&
+          lc->boundary_flags & BOUNDARY_LEFT_TILE &&
+          (x0 % (1 << fc->ps.sps->ctb_log2_size_y)) == 0)))
+        boundary_left = 0;
+
+    if (boundary_left) {
+        const int xp_pu = (x0 - 1) >> log2_min_pu_size;
+        const int xq_pu =  x0      >> log2_min_pu_size;
+        const int xp_tu = (x0 - 1) >> log2_min_tu_size;
+        const int xq_tu =  x0      >> log2_min_tu_size;
+
+        for (int i = 0; i < height; i += 2) {
+            const int y_pu      = (y0 + i) >> log2_min_pu_size;
+            const int y_tu      = (y0 + i) >> log2_min_tu_size;
+            const MvField *left = &tab_mvf[y_pu * min_pu_width + xp_pu];
+            const MvField *curr = &tab_mvf[y_pu * min_pu_width + xq_pu];
+            const int left_tu   = y_tu * min_tu_width + xp_tu;
+            const int curr_tu   = y_tu * min_tu_width + xq_tu;
+            const uint8_t pcmf  = fc->tab.pcmf[CHROMA][left_tu] && fc->tab.pcmf[CHROMA][curr_tu];
+
+            for (int c = CB; c <= CR; c++) {
+                uint8_t cbf = fc->tab.tu_coded_flag[c][left_tu] |
+                    fc->tab.tu_coded_flag[c][curr_tu] |
+                    fc->tab.tu_joint_cbcr_residual_flag[left_tu] |
+                    fc->tab.tu_joint_cbcr_residual_flag[curr_tu];
+                int bs = 0;
+
+                if (pcmf)
+                    bs = 0;
+                else if (curr->pred_flag == PF_INTRA || left->pred_flag == PF_INTRA || curr->ciip_flag || left->ciip_flag)
+                    bs = 2;
+                else if (cbf)
+                    bs = 1;
+                TAB_BS(fc->tab.vertical_bs[c], x0, (y0 + i)) = bs;
+            }
+        }
+    }
+}
+
+static void vvc_deblock_bs_chroma_horizontal(const VVCLocalContext *lc,
+    const int x0, const int y0, const int width, const int height)
+{
+    const VVCFrameContext *fc = lc->fc;
+    MvField *tab_mvf = fc->tab.mvf;
+    const int log2_min_pu_size = MIN_PU_LOG2;
+    const int log2_min_tu_size = MIN_PU_LOG2;
+    const int min_pu_width = fc->ps.pps->min_pu_width;
+    const int min_tu_width = fc->ps.pps->min_tu_width;
+    int boundary_upper;
+
+    boundary_upper = y0 > 0 && !(y0 & ((CHROMA_GRID << fc->ps.sps->vshift[1]) - 1));
+    if (boundary_upper &&
+        ((!fc->ps.pps->r->pps_loop_filter_across_slices_enabled_flag &&
+            lc->boundary_flags & BOUNDARY_UPPER_SLICE &&
+            (y0 % (1 << fc->ps.sps->ctb_log2_size_y)) == 0) ||
+            (!fc->ps.pps->r->pps_loop_filter_across_tiles_enabled_flag &&
+                lc->boundary_flags & BOUNDARY_UPPER_TILE &&
+                (y0 % (1 << fc->ps.sps->ctb_log2_size_y)) == 0)))
+        boundary_upper = 0;
+
+    if (boundary_upper) {
+        int yp_pu = (y0 - 1) >> log2_min_pu_size;
+        int yq_pu = y0 >> log2_min_pu_size;
+        int yp_tu = (y0 - 1) >> log2_min_tu_size;
+        int yq_tu = y0 >> log2_min_tu_size;
+
+        for (int i = 0; i < width; i += 2) {
+            const int x_pu = (x0 + i) >> log2_min_pu_size;
+            const int x_tu = (x0 + i) >> log2_min_tu_size;
+            const MvField *top  = &tab_mvf[yp_pu * min_pu_width + x_pu];
+            const MvField *curr = &tab_mvf[yq_pu * min_pu_width + x_pu];
+            const int top_tu  = yp_tu * min_tu_width + x_tu;
+            const int curr_tu = yq_tu * min_tu_width + x_tu;
+            const uint8_t pcmf = fc->tab.pcmf[CHROMA][top_tu] && fc->tab.pcmf[CHROMA][curr_tu];
+
+            for (int c = CB; c <= CR; c++) {
+                uint8_t cbf = fc->tab.tu_coded_flag[c][top_tu] |
+                    fc->tab.tu_coded_flag[c][curr_tu] |
+                    fc->tab.tu_joint_cbcr_residual_flag[top_tu] |
+                    fc->tab.tu_joint_cbcr_residual_flag[curr_tu];
+                int bs = 0;
+
+                if (pcmf)
+                    bs = 0;
+                else if (curr->pred_flag == PF_INTRA || top->pred_flag == PF_INTRA || curr->ciip_flag || top->ciip_flag)
+                    bs = 2;
+                else if (cbf)
+                    bs = 1;
+                TAB_BS(fc->tab.horizontal_bs[c], x0 + i, y0) = bs;
+            }
+        }
+    }
+}
+
+typedef void (*deblock_bs_fn)(const VVCLocalContext *lc, const int x0, const int y0,
+    const int width, const int height);
+
+static void vvc_deblock_bs(const VVCLocalContext *lc, const int x0, const int y0, const int vertical)
+{
+    const VVCFrameContext *fc = lc->fc;
+    const VVCSPS *sps  = fc->ps.sps;
+    const VVCPPS *pps  = fc->ps.pps;
+    const int ctb_size = sps->ctb_size_y;
+    const int x_end    = FFMIN(x0 + ctb_size, pps->width) >> MIN_TU_LOG2;
+    const int y_end    = FFMIN(y0 + ctb_size, pps->height) >> MIN_TU_LOG2;
+    deblock_bs_fn deblock_bs[2][2] = {
+        { vvc_deblock_bs_luma_horizontal, vvc_deblock_bs_chroma_horizontal },
+        { vvc_deblock_bs_luma_vertical,   vvc_deblock_bs_chroma_vertical   }
+    };
+
+    for (int is_chroma = 0; is_chroma <= 1; is_chroma++) {
+        const int hs = sps->hshift[is_chroma];
+        const int vs = sps->vshift[is_chroma];
+        for (int y = y0 >> MIN_TU_LOG2; y < y_end; y++) {
+            for (int x = x0 >> MIN_TU_LOG2; x < x_end; x++) {
+                const int off = y * fc->ps.pps->min_tu_width + x;
+                if ((fc->tab.tb_pos_x0[is_chroma][off] >> MIN_TU_LOG2) == x && (fc->tab.tb_pos_y0[is_chroma][off] >> MIN_TU_LOG2) == y) {
+                    deblock_bs[vertical][is_chroma](lc, x << MIN_TU_LOG2, y << MIN_TU_LOG2,
+                        fc->tab.tb_width[is_chroma][off] << hs, fc->tab.tb_height[is_chroma][off] << vs);
+                }
+            }
+        }
+    }
+}
+
+//part of 8.8.3.3 Derivation process of transform block boundary
+static void max_filter_length_luma(const VVCFrameContext *fc, const int qx, const int qy,
+                                   const int vertical, uint8_t *max_len_p, uint8_t *max_len_q)
+{
+    const uint8_t *tab_len_p = vertical ? fc->tab.vertical_p : fc->tab.horizontal_p;
+    const uint8_t *tab_len_q = vertical ? fc->tab.vertical_q : fc->tab.horizontal_q;
+    *max_len_p = TAB_MAX_LEN(tab_len_p, qx, qy);
+    *max_len_q = TAB_MAX_LEN(tab_len_q, qx, qy);
+}
+
+//part of 8.8.3.3 Derivation process of transform block boundary
+static void max_filter_length_chroma(const VVCFrameContext *fc, const int qx, const int qy,
+                                     const int vertical, const int horizontal_ctu_edge, const int bs, uint8_t *max_len_p, uint8_t *max_len_q)
+{
+    const int px =  vertical ? qx - 1 : qx;
+    const int py = !vertical ? qy - 1 : qy;
+    const uint8_t *tb_size = vertical ? fc->tab.tb_width[CHROMA] : fc->tab.tb_height[CHROMA];
+
+    const int size_p = tb_size[(py >> MIN_TU_LOG2) * fc->ps.pps->min_tu_width + (px >> MIN_TU_LOG2)];
+    const int size_q = tb_size[(qy >> MIN_TU_LOG2) * fc->ps.pps->min_tu_width + (qx >> MIN_TU_LOG2)];
+    if (size_p >= 8 && size_q >= 8) {
+        *max_len_p = *max_len_q = 3;
+        if (horizontal_ctu_edge)
+            *max_len_p = 1;
+    } else {
+        //part of 8.8.3.6.4 Decision process for chroma block edges
+        *max_len_p = *max_len_q = (bs == 2);
+    }
+}
+
+static void max_filter_length(const VVCFrameContext *fc, const int qx, const int qy,
+    const int c_idx, const int vertical, const int horizontal_ctu_edge, const int bs, uint8_t *max_len_p, uint8_t *max_len_q)
+{
+    if (!c_idx)
+        max_filter_length_luma(fc, qx, qy, vertical, max_len_p, max_len_q);
+    else
+        max_filter_length_chroma(fc, qx, qy, vertical, horizontal_ctu_edge, bs, max_len_p, max_len_q);
+}
+
+#define TC_CALC(qp, bs)                                                 \
+    tctable[av_clip((qp) + DEFAULT_INTRA_TC_OFFSET * ((bs) - 1) +       \
+                    (tc_offset & -2),                                   \
+                    0, MAX_QP + DEFAULT_INTRA_TC_OFFSET)]
+
+// part of 8.8.3.6.2 Decision process for luma block edges
+static int get_qp_y(const VVCFrameContext *fc, const uint8_t *src, const int x, const int y, const int vertical)
+{
+    const VVCSPS *sps = fc->ps.sps;
+    const int qp      = (ff_vvc_get_qPy(fc, x - vertical, y - !vertical) + ff_vvc_get_qPy(fc, x, y) + 1) >> 1;
+    int qp_offset     = 0;
+    int level;
+
+    if (!sps->r->sps_ladf_enabled_flag)
+        return qp;
+
+    level = fc->vvcdsp.lf.ladf_level[vertical](src, fc->frame->linesize[LUMA]);
+    qp_offset = sps->r->sps_ladf_lowest_interval_qp_offset;
+    for (int i = 0; i < sps->num_ladf_intervals - 1 && level > sps->ladf_interval_lower_bound[i + 1]; i++)
+        qp_offset = sps->r->sps_ladf_qp_offset[i];
+
+    return qp + qp_offset;
+}
+
+// part of 8.8.3.6.2 Decision process for luma block edges
+static int get_qp_c(const VVCFrameContext *fc, const int x, const int y, const int c_idx, const int vertical)
+{
+    const VVCSPS *sps = fc->ps.sps;
+    return (get_qPc(fc, x - vertical, y - !vertical, c_idx) + get_qPc(fc, x, y, c_idx) - 2 * sps->qp_bd_offset + 1) >> 1;
+}
+
+static int get_qp(const VVCFrameContext *fc, const uint8_t *src, const int x, const int y, const int c_idx, const int vertical)
+{
+    if (!c_idx)
+        return get_qp_y(fc, src, x, y, vertical);
+    return get_qp_c(fc, x, y, c_idx, vertical);
+}
+
+void ff_vvc_deblock_vertical(const VVCLocalContext *lc, int x0, int y0)
+{
+    VVCFrameContext *fc = lc->fc;
+    const VVCSPS *sps   = fc->ps.sps;
+    const int c_end     = sps->r->sps_chroma_format_idc ? VVC_MAX_SAMPLE_ARRAYS : 1;
+    uint8_t *src;
+    int x, y, qp;
+
+    //not use this yet, may needed by plt.
+    const uint8_t no_p[4] = { 0 };
+    const uint8_t no_q[4] = { 0 } ;
+
+    const int ctb_log2_size_y = fc->ps.sps->ctb_log2_size_y;
+    int x_end, y_end;
+    const int ctb_size = 1 << ctb_log2_size_y;
+    const int ctb = (x0 >> ctb_log2_size_y) +
+        (y0 >> ctb_log2_size_y) * fc->ps.pps->ctb_width;
+    const DBParams  *params = fc->tab.deblock + ctb;
+
+    vvc_deblock_bs(lc, x0, y0, 1);
+
+    x_end = x0 + ctb_size;
+    if (x_end > fc->ps.sps->width)
+        x_end = fc->ps.sps->width;
+    y_end = y0 + ctb_size;
+    if (y_end > fc->ps.sps->height)
+        y_end = fc->ps.sps->height;
+
+    for (int c_idx = 0; c_idx < c_end; c_idx++) {
+        const int hs          = sps->hshift[c_idx];
+        const int vs          = sps->vshift[c_idx];
+        const int grid        = c_idx ? (CHROMA_GRID << hs) : LUMA_GRID;
+        const int tc_offset   = params->tc_offset[c_idx];
+        const int beta_offset = params->beta_offset[c_idx];
+
+        for (y = y0; y < y_end; y += (DEBLOCK_STEP << vs)) {
+            for (x = x0 ? x0 : grid; x < x_end; x += grid) {
+                int32_t bs[4], beta[4], tc[4], all_zero_bs = 1;
+                uint8_t max_len_p[4], max_len_q[4];
+
+                for (int i = 0; i < DEBLOCK_STEP >> (2 - vs); i++) {
+                    const int dy = i << 2;
+                    bs[i] = (y + dy < y_end) ? TAB_BS(fc->tab.vertical_bs[c_idx], x, y + dy) : 0;
+                    if (bs[i]) {
+                        src = &fc->frame->data[c_idx][((y + dy) >> vs) * fc->frame->linesize[c_idx] + ((x >> hs) << fc->ps.sps->pixel_shift)];
+                        qp = get_qp(fc, src, x, y + dy, c_idx, 1);
+
+                        beta[i] = betatable[av_clip(qp + beta_offset, 0, MAX_QP)];
+
+                        max_filter_length(fc, x, y + dy, c_idx, 1, 0, bs[i], &max_len_p[i], &max_len_q[i]);
+                        all_zero_bs = 0;
+                    }
+                    tc[i] = bs[i] ? TC_CALC(qp, bs[i]) : 0;
+                }
+
+                if (!all_zero_bs) {
+                    src = &fc->frame->data[c_idx][(y >> vs) * fc->frame->linesize[c_idx] + ((x >> hs) << fc->ps.sps->pixel_shift)];
+                    if (!c_idx) {
+                        fc->vvcdsp.lf.filter_luma[1](src, fc->frame->linesize[c_idx],
+                            beta, tc, no_p, no_q, max_len_p, max_len_q, 0);
+                    } else {
+                        fc->vvcdsp.lf.filter_chroma[1](src, fc->frame->linesize[c_idx],
+                            beta, tc, no_p, no_q, max_len_p, max_len_q, vs);
+                    }
+                }
+            }
+        }
+    }
+}
+
+void ff_vvc_deblock_horizontal(const VVCLocalContext *lc, int x0, int y0)
+{
+    VVCFrameContext *fc = lc->fc;
+    const VVCSPS *sps   = fc->ps.sps;
+    const int c_end     = fc->ps.sps->r->sps_chroma_format_idc ? VVC_MAX_SAMPLE_ARRAYS : 1;
+    uint8_t* src;
+    int x, y, qp;
+
+    //not use this yet, may needed by plt.
+    const uint8_t no_p[4] = { 0 };
+    const uint8_t no_q[4] = { 0 } ;
+
+    const int ctb_log2_size_y = fc->ps.sps->ctb_log2_size_y;
+    int x_end, y_end;
+    const int ctb_size = 1 << ctb_log2_size_y;
+    const int ctb = (x0 >> ctb_log2_size_y) +
+        (y0 >> ctb_log2_size_y) * fc->ps.pps->ctb_width;
+    const DBParams *params = fc->tab.deblock + ctb;
+
+    vvc_deblock_bs(lc, x0, y0, 0);
+
+    x_end = x0 + ctb_size;
+    if (x_end > fc->ps.sps->width)
+        x_end = fc->ps.sps->width;
+    y_end = y0 + ctb_size;
+    if (y_end > fc->ps.sps->height)
+        y_end = fc->ps.sps->height;
+
+    for (int c_idx = 0; c_idx < c_end; c_idx++) {
+        const int hs          = sps->hshift[c_idx];
+        const int vs          = sps->vshift[c_idx];
+        const int grid        = c_idx ? (CHROMA_GRID << vs) : LUMA_GRID;
+        const int beta_offset = params->beta_offset[c_idx];
+        const int tc_offset   = params->tc_offset[c_idx];
+
+        for (y = y0; y < y_end; y += grid) {
+            const uint8_t horizontal_ctu_edge = !(y % fc->ps.sps->ctb_size_y);
+            if (!y)
+                continue;
+
+            for (x = x0 ? x0: 0; x < x_end; x += (DEBLOCK_STEP << hs)) {
+                int32_t bs[4], beta[4], tc[4], all_zero_bs = 1;
+                uint8_t max_len_p[4], max_len_q[4];
+
+                for (int i = 0; i < DEBLOCK_STEP >> (2 - hs); i++) {
+                    const int dx = i << 2;
+
+                    bs[i] = (x + dx < x_end) ? TAB_BS(fc->tab.horizontal_bs[c_idx], x + dx, y) : 0;
+                    if (bs[i]) {
+                        src = &fc->frame->data[c_idx][(y >> vs) * fc->frame->linesize[c_idx] + (((x + dx)>> hs) << fc->ps.sps->pixel_shift)];
+                        qp = get_qp(fc, src, x + dx, y, c_idx, 0);
+
+                        beta[i] = betatable[av_clip(qp + beta_offset, 0, MAX_QP)];
+
+                        max_filter_length(fc, x + dx, y, c_idx, 0, horizontal_ctu_edge, bs[i], &max_len_p[i], &max_len_q[i]);
+                        all_zero_bs = 0;
+                    }
+                    tc[i] = bs[i] ? TC_CALC(qp, bs[i]) : 0;
+                }
+                if (!all_zero_bs) {
+                    src = &fc->frame->data[c_idx][(y >> vs) * fc->frame->linesize[c_idx] + ((x >> hs) << fc->ps.sps->pixel_shift)];
+                    if (!c_idx) {
+                        fc->vvcdsp.lf.filter_luma[0](src, fc->frame->linesize[c_idx],
+                            beta, tc, no_p, no_q, max_len_p, max_len_q, horizontal_ctu_edge);
+                    } else {
+                        fc->vvcdsp.lf.filter_chroma[0](src, fc->frame->linesize[c_idx],
+                            beta, tc, no_p, no_q, max_len_p, max_len_q, hs);
+                    }
+                }
+            }
+        }
+    }
+}
+
+static void alf_copy_border(uint8_t *dst, const uint8_t *src,
+    const int pixel_shift, int width, const int height, const ptrdiff_t dst_stride, const ptrdiff_t src_stride)
+{
+    width <<= pixel_shift;
+    for (int i = 0; i < height; i++) {
+        memcpy(dst, src, width);
+        dst += dst_stride;
+        src += src_stride;
+    }
+}
+
+static void alf_extend_vert(uint8_t *_dst, const uint8_t *_src,
+    const int pixel_shift, const int width, const int height, ptrdiff_t stride)
+{
+    if (pixel_shift == 0) {
+        for (int i = 0; i < height; i++) {
+            memset(_dst, *_src, width);
+            _src += stride;
+            _dst += stride;
+        }
+    } else {
+        const uint16_t *src = (const uint16_t *)_src;
+        uint16_t *dst = (uint16_t *)_dst;
+        stride >>= pixel_shift;
+
+        for (int i = 0; i < height; i++) {
+            for (int j = 0; j < width; j++)
+                dst[j] = *src;
+            src += stride;
+            dst += stride;
+        }
+    }
+}
+
+static void alf_extend_horz(uint8_t *dst, const uint8_t *src,
+    const int pixel_shift, int width, const int height, const ptrdiff_t stride)
+{
+    width <<= pixel_shift;
+    for (int i = 0; i < height; i++) {
+        memcpy(dst, src, width);
+        dst += stride;
+    }
+}
+
+static void alf_copy_ctb_to_hv(VVCFrameContext *fc, const uint8_t *src, const ptrdiff_t src_stride,
+    const int x, const int y, const int width, const int height, const int x_ctb, const int y_ctb, const int c_idx)
+{
+    const int ps            = fc->ps.sps->pixel_shift;
+    const int w             = fc->ps.sps->width >> fc->ps.sps->hshift[c_idx];
+    const int h             = fc->ps.sps->height >> fc->ps.sps->vshift[c_idx];
+    const int border_pixels = (c_idx == 0) ? ALF_BORDER_LUMA : ALF_BORDER_CHROMA;
+    const int offset_h[]    = { 0, height - border_pixels };
+    const int offset_v[]    = { 0, width  - border_pixels };
+
+    /* copy horizontal edges */
+    for (int i = 0; i < FF_ARRAY_ELEMS(offset_h); i++) {
+        alf_copy_border(fc->tab.alf_pixel_buffer_h[c_idx][i] + ((border_pixels * y_ctb * w + x)<< ps),
+            src + offset_h[i] * src_stride, ps, width, border_pixels, w << ps, src_stride);
+    }
+    /* copy vertical edges */
+    for (int i = 0; i < FF_ARRAY_ELEMS(offset_v); i++) {
+        alf_copy_border(fc->tab.alf_pixel_buffer_v[c_idx][i] + ((h * x_ctb + y) * (border_pixels << ps)),
+            src + (offset_v[i] << ps), ps, border_pixels, height, border_pixels << ps, src_stride);
+    }
+}
+
+static void alf_fill_border_h(uint8_t *dst, const ptrdiff_t dst_stride, const uint8_t *src, const ptrdiff_t src_stride,
+    const uint8_t *border, const int width, const int border_pixels, const int ps, const int edge)
+{
+    if (edge)
+        alf_extend_horz(dst, border, ps, width, border_pixels, dst_stride);
+    else
+        alf_copy_border(dst, src, ps, width, border_pixels, dst_stride, src_stride);
+}
+
+static void alf_fill_border_v(uint8_t *dst, const ptrdiff_t dst_stride, const uint8_t *src,
+    const uint8_t *border, const int border_pixels, const int height, const int pixel_shift, const int *edges, const int edge)
+{
+    const ptrdiff_t src_stride = (border_pixels << pixel_shift);
+
+    if (edge) {
+        alf_extend_vert(dst, border, pixel_shift, border_pixels, height + 2 * border_pixels, dst_stride);
+        return;
+    }
+
+    //left/right
+    alf_copy_border(dst + dst_stride * border_pixels * edges[TOP], src + src_stride * border_pixels * edges[TOP],
+        pixel_shift, border_pixels, height + (!edges[TOP] + !edges[BOTTOM]) * border_pixels, dst_stride, src_stride);
+
+    //top left/right
+    if (edges[TOP])
+        alf_extend_horz(dst, dst + dst_stride * border_pixels, pixel_shift, border_pixels, border_pixels, dst_stride);
+
+    //bottom left/right
+    if (edges[BOTTOM]) {
+        dst += dst_stride * (border_pixels + height);
+        alf_extend_horz(dst, dst - dst_stride, pixel_shift, border_pixels, border_pixels, dst_stride);
+    }
+}
+
+static void alf_prepare_buffer(VVCFrameContext *fc, uint8_t *_dst, const uint8_t *_src, const int x, const int y,
+    const int x_ctb, const int y_ctb, const int width, const int height, const ptrdiff_t dst_stride, const ptrdiff_t src_stride,
+    const int c_idx, const int *edges)
+{
+    const int ps = fc->ps.sps->pixel_shift;
+    const int w = fc->ps.sps->width >> fc->ps.sps->hshift[c_idx];
+    const int h = fc->ps.sps->height >> fc->ps.sps->vshift[c_idx];
+    const int border_pixels = c_idx == 0 ? ALF_BORDER_LUMA : ALF_BORDER_CHROMA;
+    uint8_t *dst, *src;
+
+    copy_ctb(_dst, _src, width << ps, height, dst_stride, src_stride);
+
+    //top
+    src = fc->tab.alf_pixel_buffer_h[c_idx][1] + (((border_pixels * (y_ctb - 1)) * w + x) << ps);
+    dst = _dst - border_pixels * dst_stride;
+    alf_fill_border_h(dst, dst_stride, src, w  << ps, _dst, width, border_pixels, ps, edges[TOP]);
+
+    //bottom
+    src = fc->tab.alf_pixel_buffer_h[c_idx][0] + ((border_pixels * (y_ctb + 1) * w + x) << ps);
+    dst = _dst + height * dst_stride;
+    alf_fill_border_h(dst, dst_stride, src, w  << ps, _dst + (height - 1) * dst_stride, width, border_pixels, ps, edges[BOTTOM]);
+
+
+    //left
+    src = fc->tab.alf_pixel_buffer_v[c_idx][1] + (h * (x_ctb - 1) + y - border_pixels) * (border_pixels << ps);
+    dst = _dst - (border_pixels << ps) - border_pixels * dst_stride;
+    alf_fill_border_v(dst, dst_stride, src,  dst + (border_pixels << ps), border_pixels, height, ps, edges, edges[LEFT]);
+
+    //right
+    src = fc->tab.alf_pixel_buffer_v[c_idx][0] + (h * (x_ctb + 1) + y - border_pixels) * (border_pixels << ps);
+    dst = _dst + (width << ps) - border_pixels * dst_stride;
+    alf_fill_border_v(dst, dst_stride, src,  dst - (1 << ps), border_pixels, height, ps, edges, edges[RIGHT]);
+}
+
+#define ALF_MAX_BLOCKS_IN_CTU   (MAX_CTU_SIZE * MAX_CTU_SIZE / ALF_BLOCK_SIZE / ALF_BLOCK_SIZE)
+#define ALF_MAX_FILTER_SIZE     (ALF_MAX_BLOCKS_IN_CTU * ALF_NUM_COEFF_LUMA)
+
+static void alf_get_coeff_and_clip(VVCLocalContext *lc, int16_t *coeff, int16_t *clip,
+    const uint8_t *src, ptrdiff_t src_stride, int width, int height, int vb_pos, ALFParams *alf)
+{
+    const VVCFrameContext *fc     = lc->fc;
+    const H266RawSliceHeader *rsh = lc->sc->sh.r;
+    uint8_t fixed_clip_set[ALF_NUM_FILTERS_LUMA][ALF_NUM_COEFF_LUMA] = { 0 };
+    const int16_t *coeff_set;
+    const uint8_t *clip_idx_set;
+    const uint8_t *class_to_filt;
+    const int size = width * height / ALF_BLOCK_SIZE / ALF_BLOCK_SIZE;
+    int class_idx[ALF_MAX_BLOCKS_IN_CTU];
+    int transpose_idx[ALF_MAX_BLOCKS_IN_CTU];
+
+    if (alf->ctb_filt_set_idx_y < 16) {
+        coeff_set         = &ff_vvc_alf_fix_filt_coeff[0][0];
+        clip_idx_set      = &fixed_clip_set[0][0];
+        class_to_filt     = ff_vvc_alf_class_to_filt_map[alf->ctb_filt_set_idx_y];
+    } else {
+        const int id      = rsh->sh_alf_aps_id_luma[alf->ctb_filt_set_idx_y - 16];
+        const VVCALF *aps = fc->ps.alf_list[id];
+        coeff_set         = &aps->luma_coeff[0][0];
+        clip_idx_set      = &aps->luma_clip_idx[0][0];
+        class_to_filt     = ff_vvc_alf_aps_class_to_filt_map;
+    }
+    fc->vvcdsp.alf.classify(class_idx, transpose_idx, src, src_stride, width, height,
+        vb_pos, lc->alf_gradient_tmp);
+    fc->vvcdsp.alf.recon_coeff_and_clip(coeff, clip, class_idx, transpose_idx, size,
+        coeff_set, clip_idx_set, class_to_filt);
+}
+
+static void alf_filter_luma(VVCLocalContext *lc, uint8_t *dst, const uint8_t *src,
+    const ptrdiff_t dst_stride, const ptrdiff_t src_stride, const int x0, const int y0,
+    const int width, const int height, const int _vb_pos, ALFParams *alf)
+{
+    const VVCFrameContext *fc = lc->fc;
+    int vb_pos                = _vb_pos - y0;
+    int16_t *coeff            = (int16_t*)lc->tmp;
+    int16_t *clip             = (int16_t *)lc->tmp1;
+
+    av_assert0(ALF_MAX_FILTER_SIZE <= sizeof(lc->tmp));
+    av_assert0(ALF_MAX_FILTER_SIZE * sizeof(int16_t) <= sizeof(lc->tmp1));
+
+    alf_get_coeff_and_clip(lc, coeff, clip, src, src_stride, width, height, vb_pos, alf);
+    fc->vvcdsp.alf.filter[LUMA](dst, dst_stride, src, src_stride, width, height, coeff, clip, vb_pos);
+}
+
+static int alf_clip_from_idx(const VVCFrameContext *fc, const int idx)
+{
+    const VVCSPS *sps  = fc->ps.sps;
+    const int offset[] = {0, 3, 5, 7};
+
+    return 1 << (sps->bit_depth - offset[idx]);
+}
+
+static void alf_filter_chroma(VVCLocalContext *lc, uint8_t *dst, const uint8_t *src,
+    const ptrdiff_t dst_stride, const ptrdiff_t src_stride, const int c_idx,
+    const int width, const int height, const int vb_pos, ALFParams *alf)
+{
+    VVCFrameContext *fc           = lc->fc;
+    const H266RawSliceHeader *rsh = lc->sc->sh.r;
+    const VVCALF *aps             = fc->ps.alf_list[rsh->sh_alf_aps_id_chroma];
+    const int idx                 = alf->alf_ctb_filter_alt_idx[c_idx - 1];
+    const int16_t *coeff          = aps->chroma_coeff[idx];
+    int16_t clip[ALF_NUM_COEFF_CHROMA];
+
+    for (int i = 0; i < ALF_NUM_COEFF_CHROMA; i++)
+        clip[i] = alf_clip_from_idx(fc, aps->chroma_clip_idx[idx][i]);
+
+    fc->vvcdsp.alf.filter[CHROMA](dst, dst_stride, src, src_stride, width, height, coeff, clip, vb_pos);
+}
+
+static void alf_filter_cc(VVCLocalContext *lc, uint8_t *dst, const uint8_t *luma,
+    const ptrdiff_t dst_stride, const ptrdiff_t luma_stride, const int c_idx,
+    const int width, const int height, const int hs, const int vs, const int vb_pos, ALFParams *alf)
+{
+    const VVCFrameContext *fc     = lc->fc;
+    const H266RawSliceHeader *rsh = lc->sc->sh.r;
+    const int idx                 = c_idx - 1;
+    const int cc_aps_id           = c_idx == CB ? rsh->sh_alf_cc_cb_aps_id : rsh->sh_alf_cc_cr_aps_id;
+    const VVCALF *aps             = fc->ps.alf_list[cc_aps_id];
+
+    if (aps) {
+        const int16_t *coeff = aps->cc_coeff[idx][alf->ctb_cc_idc[idx] - 1];
+
+        fc->vvcdsp.alf.filter_cc(dst, dst_stride, luma, luma_stride, width, height, hs, vs, coeff, vb_pos);
+    }
+}
+
+void ff_vvc_alf_copy_ctu_to_hv(VVCLocalContext* lc, const int x0, const int y0)
+{
+    VVCFrameContext *fc  = lc->fc;
+    const int x_ctb      = x0 >> fc->ps.sps->ctb_log2_size_y;
+    const int y_ctb      = y0 >> fc->ps.sps->ctb_log2_size_y;
+    const int ctb_size_y = fc->ps.sps->ctb_size_y;
+    const int ps         = fc->ps.sps->pixel_shift;
+    const int c_end      = fc->ps.sps->r->sps_chroma_format_idc ? VVC_MAX_SAMPLE_ARRAYS : 1;
+
+    for (int c_idx = 0; c_idx < c_end; c_idx++) {
+        const int hs     = fc->ps.sps->hshift[c_idx];
+        const int vs     = fc->ps.sps->vshift[c_idx];
+        const int x      = x0 >> hs;
+        const int y      = y0 >> vs;
+        const int width  = FFMIN(fc->ps.sps->width - x0, ctb_size_y) >> hs;
+        const int height = FFMIN(fc->ps.sps->height - y0, ctb_size_y) >> vs;
+
+        const int src_stride = fc->frame->linesize[c_idx];
+        uint8_t* src = &fc->frame->data[c_idx][y * src_stride + (x << ps)];
+
+        alf_copy_ctb_to_hv(fc, src, src_stride, x, y, width, height, x_ctb, y_ctb, c_idx);
+    }
+}
+
+void ff_vvc_alf_filter(VVCLocalContext *lc, const int x0, const int y0)
+{
+    VVCFrameContext *fc     = lc->fc;
+    const VVCPPS *pps       = fc->ps.pps;
+    const int x_ctb         = x0 >> fc->ps.sps->ctb_log2_size_y;
+    const int y_ctb         = y0 >> fc->ps.sps->ctb_log2_size_y;
+    const int ctb_size_y    = fc->ps.sps->ctb_size_y;
+    const int ps            = fc->ps.sps->pixel_shift;
+    const int padded_stride = EDGE_EMU_BUFFER_STRIDE << ps;
+    const int padded_offset = padded_stride * ALF_PADDING_SIZE + (ALF_PADDING_SIZE << ps);
+    const int c_end         = fc->ps.sps->r->sps_chroma_format_idc ? VVC_MAX_SAMPLE_ARRAYS : 1;
+    ALFParams *alf          = &CTB(fc->tab.alf, x_ctb, y_ctb);
+    int edges[MAX_EDGES]    = { x_ctb == 0, y_ctb == 0, x_ctb == pps->ctb_width - 1, y_ctb == pps->ctb_height - 1 };
+
+    if (!pps->r->pps_loop_filter_across_tiles_enabled_flag) {
+        edges[LEFT]   = edges[LEFT] || (lc->boundary_flags & BOUNDARY_LEFT_TILE);
+        edges[TOP]    = edges[TOP] || (lc->boundary_flags & BOUNDARY_UPPER_TILE);
+        edges[RIGHT]  = edges[RIGHT] || pps->ctb_to_col_bd[x_ctb] != pps->ctb_to_col_bd[x_ctb + 1];
+        edges[BOTTOM] = edges[BOTTOM] || pps->ctb_to_row_bd[y_ctb] != pps->ctb_to_row_bd[y_ctb + 1];
+    }
+
+    if (!pps->r->pps_loop_filter_across_slices_enabled_flag) {
+        edges[LEFT]   = edges[LEFT] || (lc->boundary_flags & BOUNDARY_LEFT_SLICE);
+        edges[TOP]    = edges[TOP] || (lc->boundary_flags & BOUNDARY_UPPER_SLICE);
+        edges[RIGHT]  = edges[RIGHT] || CTB(fc->tab.slice_idx, x_ctb, y_ctb) != CTB(fc->tab.slice_idx, x_ctb + 1, y_ctb);
+        edges[BOTTOM] = edges[BOTTOM] || CTB(fc->tab.slice_idx, x_ctb, y_ctb) != CTB(fc->tab.slice_idx, x_ctb, y_ctb + 1);
+    }
+
+    for (int c_idx = 0; c_idx < c_end; c_idx++) {
+        const int hs = fc->ps.sps->hshift[c_idx];
+        const int vs = fc->ps.sps->vshift[c_idx];
+        const int ctb_size_h = ctb_size_y >> hs;
+        const int ctb_size_v = ctb_size_y >> vs;
+        const int x = x0 >> hs;
+        const int y = y0 >> vs;
+        const int pic_width = fc->ps.sps->width >> hs;
+        const int pic_height = fc->ps.sps->height >> vs;
+        const int width  = FFMIN(pic_width  - x, ctb_size_h);
+        const int height = FFMIN(pic_height - y, ctb_size_v);
+        const int src_stride = fc->frame->linesize[c_idx];
+        uint8_t *src = &fc->frame->data[c_idx][y * src_stride + (x << ps)];
+        uint8_t *padded;
+
+        if (alf->ctb_flag[c_idx] || (!c_idx && (alf->ctb_cc_idc[0] || alf->ctb_cc_idc[1]))) {
+            padded = (c_idx ? lc->alf_buffer_chroma : lc->alf_buffer_luma) + padded_offset;
+            alf_prepare_buffer(fc, padded, src, x, y, x_ctb, y_ctb, width, height,
+                padded_stride, src_stride, c_idx, edges);
+        }
+        if (alf->ctb_flag[c_idx]) {
+            if (!c_idx)  {
+                alf_filter_luma(lc, src, padded, src_stride, padded_stride, x, y,
+                    width, height, y + ctb_size_v - ALF_VB_POS_ABOVE_LUMA, alf);
+            } else {
+                alf_filter_chroma(lc, src, padded, src_stride, padded_stride, c_idx,
+                    width, height, ctb_size_v - ALF_VB_POS_ABOVE_CHROMA, alf);
+            }
+        }
+        if (c_idx && alf->ctb_cc_idc[c_idx - 1]) {
+            padded = lc->alf_buffer_luma + padded_offset;
+            alf_filter_cc(lc, src, padded, src_stride, padded_stride, c_idx,
+                width, height, hs, vs, (ctb_size_v << vs) - ALF_VB_POS_ABOVE_LUMA, alf);
+        }
+
+        alf->applied[c_idx] = 1;
+    }
+}
+
+
+void ff_vvc_lmcs_filter(const VVCLocalContext *lc, const int x, const int y)
+{
+    const SliceContext *sc = lc->sc;
+    const VVCFrameContext *fc = lc->fc;
+    const int ctb_size = fc->ps.sps->ctb_size_y;
+    const int width    = FFMIN(fc->ps.pps->width  - x, ctb_size);
+    const int height   = FFMIN(fc->ps.pps->height - y, ctb_size);
+    uint8_t *data      = fc->frame->data[LUMA] + y * fc->frame->linesize[LUMA] + (x << fc->ps.sps->pixel_shift);
+    if (sc->sh.r->sh_lmcs_used_flag)
+        fc->vvcdsp.lmcs.filter(data, fc->frame->linesize[LUMA], width, height, fc->ps.lmcs.inv_lut);
+}
diff --git a/libavcodec/vvc/vvc_filter.h b/libavcodec/vvc/vvc_filter.h
new file mode 100644
index 0000000000..2ae4c33e2d
--- /dev/null
+++ b/libavcodec/vvc/vvc_filter.h
@@ -0,0 +1,71 @@ 
+/*
+ * VVC filters
+ *
+ * Copyright (C) 2022 Nuo Mi
+ *
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#ifndef AVCODEC_VVC_VVC_FILTER_H
+#define AVCODEC_VVC_VVC_FILTER_H
+
+#include "vvcdec.h"
+
+/**
+ * lmcs filter for the CTU
+ * @param lc local context for CTU
+ * @param x0 x position for the CTU
+ * @param y0 y position for the CTU
+ */
+void ff_vvc_lmcs_filter(const VVCLocalContext *lc, const int x0, const int y0);
+
+/**
+ * vertical deblock filter for the CTU
+ * @param lc local context for CTU
+ * @param x0 x position for the CTU
+ * @param y0 y position for the CTU
+ */
+void ff_vvc_deblock_vertical(const VVCLocalContext *lc, int x0, int y0);
+
+/**
+ * horizontal deblock filter for the CTU
+ * @param lc local context for CTU
+ * @param x0 x position for the CTU
+ * @param y0 y position for the CTU
+ */
+void ff_vvc_deblock_horizontal(const VVCLocalContext *lc, int x0, int y0);
+
+/**
+ * sao filter for the CTU
+ * @param lc local context for CTU
+ * @param x0 x position for the CTU
+ * @param y0 y position for the CTU
+ */
+void ff_vvc_sao_filter(VVCLocalContext *lc, const int x0, const int y0);
+
+void ff_vvc_sao_copy_ctb_to_hv(VVCLocalContext* lc, int rx, int ry, int last_row);
+void ff_vvc_alf_copy_ctu_to_hv(VVCLocalContext* lc, int x0, int y0);
+
+/**
+ * alf filter for the CTU
+ * @param lc local context for CTU
+ * @param x0 x position for the CTU
+ * @param y0 y position for the CTU
+ */
+void ff_vvc_alf_filter(VVCLocalContext *lc, const int x0, const int y0);
+
+#endif // AVCODEC_VVC_VVC_CTU_H
diff --git a/libavcodec/vvc/vvc_filter_template.c b/libavcodec/vvc/vvc_filter_template.c
new file mode 100644
index 0000000000..a4f1792ec4
--- /dev/null
+++ b/libavcodec/vvc/vvc_filter_template.c
@@ -0,0 +1,1135 @@ 
+/*
+ * VVC filters DSP
+ *
+ * Copyright (C) 2022 Nuo Mi
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+static void FUNC(lmcs_filter_luma)(uint8_t *_dst, ptrdiff_t dst_stride, const int width, const int height, const uint8_t *_lut)
+{
+    const pixel *lut = (const pixel *)_lut;
+    pixel *dst = (pixel*)_dst;
+    dst_stride /= sizeof(pixel);
+
+    for (int y = 0; y < height; y++) {
+        for (int x = 0; x < width; x++)
+            dst[x] = lut[dst[x]];
+        dst += dst_stride;
+    }
+}
+
+static void FUNC(sao_band_filter)(uint8_t *_dst, const uint8_t *_src, ptrdiff_t dst_stride, ptrdiff_t src_stride,
+    const int16_t *sao_offset_val, const int sao_left_class, const int width, const int height)
+{
+    pixel *dst       = (pixel *)_dst;
+    const pixel *src = (pixel *)_src;
+    int offset_table[32] = { 0 };
+    int k, y, x;
+    int shift  = BIT_DEPTH - 5;
+
+    dst_stride /= sizeof(pixel);
+    src_stride /= sizeof(pixel);
+
+    for (k = 0; k < 4; k++)
+        offset_table[(k + sao_left_class) & 31] = sao_offset_val[k + 1];
+    for (y = 0; y < height; y++) {
+        for (x = 0; x < width; x++)
+            dst[x] = av_clip_pixel(src[x] + offset_table[src[x] >> shift]);
+        dst += dst_stride;
+        src += src_stride;
+    }
+}
+
+#define CMP(a, b) (((a) > (b)) - ((a) < (b)))
+
+static void FUNC(sao_edge_filter)(uint8_t *_dst, const uint8_t *_src, ptrdiff_t dst_stride,
+    const int16_t *sao_offset_val, const int eo, const int width, const int height)
+{
+    static const uint8_t edge_idx[] = { 1, 2, 0, 3, 4 };
+    static const int8_t pos[4][2][2] = {
+        { { -1,  0 }, {  1, 0 } }, // horizontal
+        { {  0, -1 }, {  0, 1 } }, // vertical
+        { { -1, -1 }, {  1, 1 } }, // 45 degree
+        { {  1, -1 }, { -1, 1 } }, // 135 degree
+    };
+    pixel *dst          = (pixel *)_dst;
+    const pixel *src    = (pixel *)_src;
+    int a_stride, b_stride;
+    int x, y;
+    ptrdiff_t src_stride = (2 * MAX_PB_SIZE + AV_INPUT_BUFFER_PADDING_SIZE) / sizeof(pixel);
+    dst_stride /= sizeof(pixel);
+
+    a_stride = pos[eo][0][0] + pos[eo][0][1] * src_stride;
+    b_stride = pos[eo][1][0] + pos[eo][1][1] * src_stride;
+    for (y = 0; y < height; y++) {
+        for (x = 0; x < width; x++) {
+            int diff0 = CMP(src[x], src[x + a_stride]);
+            int diff1 = CMP(src[x], src[x + b_stride]);
+            int offset_val        = edge_idx[2 + diff0 + diff1];
+            dst[x] = av_clip_pixel(src[x] + sao_offset_val[offset_val]);
+        }
+        src += src_stride;
+        dst += dst_stride;
+    }
+}
+
+static void FUNC(sao_edge_restore_0)(uint8_t *_dst, const uint8_t *_src,
+    ptrdiff_t dst_stride, ptrdiff_t src_stride, const SAOParams *sao,
+    const int *borders, const int _width, const int _height, const int c_idx,
+    const uint8_t *vert_edge, const uint8_t *horiz_edge, const uint8_t *diag_edge)
+{
+    int x, y;
+    pixel *dst                      = (pixel *)_dst;
+    const pixel *src                = (pixel *)_src;
+    const int16_t *sao_offset_val   = sao->offset_val[c_idx];
+    const int sao_eo_class          = sao->eo_class[c_idx];
+    int init_x = 0, width = _width, height = _height;
+
+    dst_stride /= sizeof(pixel);
+    src_stride /= sizeof(pixel);
+
+    if (sao_eo_class != SAO_EO_VERT) {
+        if (borders[0]) {
+            int offset_val = sao_offset_val[0];
+            for (y = 0; y < height; y++) {
+                dst[y * dst_stride] = av_clip_pixel(src[y * src_stride] + offset_val);
+            }
+            init_x = 1;
+        }
+        if (borders[2]) {
+            int offset_val = sao_offset_val[0];
+            int offset     = width - 1;
+            for (x = 0; x < height; x++) {
+                dst[x * dst_stride + offset] = av_clip_pixel(src[x * src_stride + offset] + offset_val);
+            }
+            width--;
+        }
+    }
+    if (sao_eo_class != SAO_EO_HORIZ) {
+        if (borders[1]) {
+            int offset_val = sao_offset_val[0];
+            for (x = init_x; x < width; x++)
+                dst[x] = av_clip_pixel(src[x] + offset_val);
+        }
+        if (borders[3]) {
+            int offset_val   = sao_offset_val[0];
+            ptrdiff_t y_dst_stride = dst_stride * (height - 1);
+            ptrdiff_t y_src_stride = src_stride * (height - 1);
+            for (x = init_x; x < width; x++)
+                dst[x + y_dst_stride] = av_clip_pixel(src[x + y_src_stride] + offset_val);
+            height--;
+        }
+    }
+}
+
+static void FUNC(sao_edge_restore_1)(uint8_t *_dst, const uint8_t *_src,
+    ptrdiff_t dst_stride, ptrdiff_t src_stride, const SAOParams *sao,
+    const int *borders, const int _width, const int _height, const int c_idx,
+    const uint8_t *vert_edge, const uint8_t *horiz_edge, const uint8_t *diag_edge)
+{
+    int x, y;
+    pixel *dst                      = (pixel *)_dst;
+    const pixel *src                = (pixel *)_src;
+    const int16_t *sao_offset_val   = sao->offset_val[c_idx];
+    const int sao_eo_class          = sao->eo_class[c_idx];
+    int init_x = 0, init_y = 0, width = _width, height = _height;
+
+    dst_stride /= sizeof(pixel);
+    src_stride /= sizeof(pixel);
+
+    if (sao_eo_class != SAO_EO_VERT) {
+        if (borders[0]) {
+            int offset_val = sao_offset_val[0];
+            for (y = 0; y < height; y++) {
+                dst[y * dst_stride] = av_clip_pixel(src[y * src_stride] + offset_val);
+            }
+            init_x = 1;
+        }
+        if (borders[2]) {
+            int offset_val = sao_offset_val[0];
+            int offset     = width - 1;
+            for (x = 0; x < height; x++) {
+                dst[x * dst_stride + offset] = av_clip_pixel(src[x * src_stride + offset] + offset_val);
+            }
+            width--;
+        }
+    }
+    if (sao_eo_class != SAO_EO_HORIZ) {
+        if (borders[1]) {
+            int offset_val = sao_offset_val[0];
+            for (x = init_x; x < width; x++)
+                dst[x] = av_clip_pixel(src[x] + offset_val);
+            init_y = 1;
+        }
+        if (borders[3]) {
+            int offset_val   = sao_offset_val[0];
+            ptrdiff_t y_dst_stride = dst_stride * (height - 1);
+            ptrdiff_t y_src_stride = src_stride * (height - 1);
+            for (x = init_x; x < width; x++)
+                dst[x + y_dst_stride] = av_clip_pixel(src[x + y_src_stride] + offset_val);
+            height--;
+        }
+    }
+
+    {
+        int save_upper_left  = !diag_edge[0] && sao_eo_class == SAO_EO_135D && !borders[0] && !borders[1];
+        int save_upper_right = !diag_edge[1] && sao_eo_class == SAO_EO_45D  && !borders[1] && !borders[2];
+        int save_lower_right = !diag_edge[2] && sao_eo_class == SAO_EO_135D && !borders[2] && !borders[3];
+        int save_lower_left  = !diag_edge[3] && sao_eo_class == SAO_EO_45D  && !borders[0] && !borders[3];
+
+        // Restore pixels that can't be modified
+        if (vert_edge[0] && sao_eo_class != SAO_EO_VERT) {
+            for (y = init_y + save_upper_left; y < height - save_lower_left; y++)
+                dst[y * dst_stride] = src[y * src_stride];
+        }
+        if (vert_edge[1] && sao_eo_class != SAO_EO_VERT) {
+            for (y = init_y + save_upper_right; y < height - save_lower_right; y++)
+                dst[y * dst_stride + width - 1] = src[y * src_stride + width - 1];
+        }
+
+        if (horiz_edge[0] && sao_eo_class != SAO_EO_HORIZ) {
+            for (x = init_x + save_upper_left; x < width - save_upper_right; x++)
+                dst[x] = src[x];
+        }
+        if (horiz_edge[1] && sao_eo_class != SAO_EO_HORIZ) {
+            for (x = init_x + save_lower_left; x < width - save_lower_right; x++)
+                dst[(height - 1) * dst_stride + x] = src[(height - 1) * src_stride + x];
+        }
+        if (diag_edge[0] && sao_eo_class == SAO_EO_135D)
+            dst[0] = src[0];
+        if (diag_edge[1] && sao_eo_class == SAO_EO_45D)
+            dst[width - 1] = src[width - 1];
+        if (diag_edge[2] && sao_eo_class == SAO_EO_135D)
+            dst[dst_stride * (height - 1) + width - 1] = src[src_stride * (height - 1) + width - 1];
+        if (diag_edge[3] && sao_eo_class == SAO_EO_45D)
+            dst[dst_stride * (height - 1)] = src[src_stride * (height - 1)];
+
+    }
+}
+
+#undef CMP
+
+static av_always_inline int16_t FUNC(alf_clip)(pixel curr, pixel v0, pixel v1, int16_t clip)
+{
+    return av_clip(v0 - curr, -clip, clip) + av_clip(v1 - curr, -clip, clip);
+}
+
+static void FUNC(alf_filter_luma)(uint8_t *_dst, ptrdiff_t dst_stride, const uint8_t *_src, ptrdiff_t src_stride,
+    const int width, const int height, const int16_t *filter, const int16_t *clip, const int vb_pos)
+{
+    const pixel *src    = (pixel *)_src;
+    const int shift     = 7;
+    const int offset    = 1 << ( shift - 1 );
+    const int vb_above  = vb_pos - 4;
+    const int vb_below  = vb_pos + 3;
+
+    dst_stride /= sizeof(pixel);
+    src_stride /= sizeof(pixel);
+
+    for (int y = 0; y < height; y += ALF_BLOCK_SIZE) {
+        for (int x = 0; x < width; x += ALF_BLOCK_SIZE) {
+            const pixel *s0 = src + y * src_stride + x;
+            const pixel *s1 = s0 + src_stride;
+            const pixel *s2 = s0 - src_stride;
+            const pixel *s3 = s1 + src_stride;
+            const pixel *s4 = s2 - src_stride;
+            const pixel *s5 = s3 + src_stride;
+            const pixel *s6 = s4 - src_stride;
+
+            for (int i = 0; i < ALF_BLOCK_SIZE; i++) {
+                pixel *dst = (pixel *)_dst + (y + i) * dst_stride + x;
+
+                const pixel *p0 = s0 + i * src_stride;
+                const pixel *p1 = s1 + i * src_stride;
+                const pixel *p2 = s2 + i * src_stride;
+                const pixel *p3 = s3 + i * src_stride;
+                const pixel *p4 = s4 + i * src_stride;
+                const pixel *p5 = s5 + i * src_stride;
+                const pixel *p6 = s6 + i * src_stride;
+
+                const int is_near_vb_above = (y + i <  vb_pos) && (y + i >= vb_pos - 1);
+                const int is_near_vb_below = (y + i >= vb_pos) && (y + i <= vb_pos);
+                const int is_near_vb = is_near_vb_above || is_near_vb_below;
+
+                if ((y + i < vb_pos) && ((y + i) >= vb_above)) {
+                    p1 = (y + i == vb_pos - 1) ? p0 : p1;
+                    p3 = (y + i >= vb_pos - 2) ? p1 : p3;
+                    p5 = (y + i >= vb_pos - 3) ? p3 : p5;
+
+                    p2 = (y + i == vb_pos - 1) ? p0 : p2;
+                    p4 = (y + i >= vb_pos - 2) ? p2 : p4;
+                    p6 = (y + i >= vb_pos - 3) ? p4 : p6;
+                } else if ((y + i >= vb_pos) && ((y + i) <= vb_below)) {
+                    p2 = (y + i == vb_pos    ) ? p0 : p2;
+                    p4 = (y + i <= vb_pos + 1) ? p2 : p4;
+                    p6 = (y + i <= vb_pos + 2) ? p4 : p6;
+
+                    p1 = (y + i == vb_pos    ) ? p0 : p1;
+                    p3 = (y + i <= vb_pos + 1) ? p1 : p3;
+                    p5 = (y + i <= vb_pos + 2) ? p3 : p5;
+                }
+
+                for (int j = 0; j < ALF_BLOCK_SIZE; j++) {
+                    int sum = 0;
+                    const pixel curr = *p0;
+
+                    sum += filter[0]  * FUNC(alf_clip)(curr, p5[+0], p6[+0], clip[0]);
+                    sum += filter[1]  * FUNC(alf_clip)(curr, p3[+1], p4[-1], clip[1]);
+                    sum += filter[2]  * FUNC(alf_clip)(curr, p3[+0], p4[+0], clip[2]);
+                    sum += filter[3]  * FUNC(alf_clip)(curr, p3[-1], p4[+1], clip[3]);
+                    sum += filter[4]  * FUNC(alf_clip)(curr, p1[+2], p2[-2], clip[4]);
+                    sum += filter[5]  * FUNC(alf_clip)(curr, p1[+1], p2[-1], clip[5]);
+                    sum += filter[6]  * FUNC(alf_clip)(curr, p1[+0], p2[+0], clip[6]);
+                    sum += filter[7]  * FUNC(alf_clip)(curr, p1[-1], p2[+1], clip[7]);
+                    sum += filter[8]  * FUNC(alf_clip)(curr, p1[-2], p2[+2], clip[8]);
+                    sum += filter[9]  * FUNC(alf_clip)(curr, p0[+3], p0[-3], clip[9]);
+                    sum += filter[10] * FUNC(alf_clip)(curr, p0[+2], p0[-2], clip[10]);
+                    sum += filter[11] * FUNC(alf_clip)(curr, p0[+1], p0[-1], clip[11]);
+
+                    if (!is_near_vb)
+                        sum = (sum + offset) >> shift;
+                    else
+                        sum = (sum + (1 << ((shift + 3) - 1))) >> (shift + 3);
+                    sum += curr;
+                    dst[j] = CLIP(sum);
+
+                    p0++;
+                    p1++;
+                    p2++;
+                    p3++;
+                    p4++;
+                    p5++;
+                    p6++;
+                }
+            }
+            filter += ALF_NUM_COEFF_LUMA;
+            clip += ALF_NUM_COEFF_LUMA;
+        }
+    }
+}
+
+static void FUNC(alf_filter_chroma)(uint8_t* _dst, ptrdiff_t dst_stride, const uint8_t* _src, ptrdiff_t src_stride,
+    const int width, const int height, const int16_t* filter, const int16_t* clip, const int vb_pos)
+{
+    const pixel *src = (pixel *)_src;
+    const int shift  = 7;
+    const int offset = 1 << ( shift - 1 );
+    const int vb_above  = vb_pos - 2;
+    const int vb_below  = vb_pos + 1;
+
+    dst_stride /= sizeof(pixel);
+    src_stride /= sizeof(pixel);
+
+    for (int y = 0; y < height; y += ALF_BLOCK_SIZE) {
+        for (int x = 0; x < width; x += ALF_BLOCK_SIZE) {
+            const pixel *s0 = src + y * src_stride + x;
+            const pixel *s1 = s0 + src_stride;
+            const pixel *s2 = s0 - src_stride;
+            const pixel *s3 = s1 + src_stride;
+            const pixel *s4 = s2 - src_stride;
+            const pixel *s5 = s3 + src_stride;
+            const pixel *s6 = s4 - src_stride;
+
+            for (int i = 0; i < ALF_BLOCK_SIZE; i++) {
+                pixel *dst = (pixel *)_dst + (y + i) * dst_stride + x;
+
+                const pixel *p0 = s0 + i * src_stride;
+                const pixel *p1 = s1 + i * src_stride;
+                const pixel *p2 = s2 + i * src_stride;
+                const pixel *p3 = s3 + i * src_stride;
+                const pixel *p4 = s4 + i * src_stride;
+                const pixel *p5 = s5 + i * src_stride;
+                const pixel *p6 = s6 + i * src_stride;
+
+                const int is_near_vb_above = (y + i <  vb_pos) && (y + i >= vb_pos - 1);
+                const int is_near_vb_below = (y + i >= vb_pos) && (y + i <= vb_pos);
+                const int is_near_vb = is_near_vb_above || is_near_vb_below;
+
+                if ((y + i < vb_pos) && ((y + i) >= vb_above)) {
+                    p1 = (y + i == vb_pos - 1) ? p0 : p1;
+                    p3 = (y + i >= vb_pos - 2) ? p1 : p3;
+                    p5 = (y + i >= vb_pos - 3) ? p3 : p5;
+
+                    p2 = (y + i == vb_pos - 1) ? p0 : p2;
+                    p4 = (y + i >= vb_pos - 2) ? p2 : p4;
+                    p6 = (y + i >= vb_pos - 3) ? p4 : p6;
+                } else if ((y + i >= vb_pos) && ((y + i) <= vb_below)) {
+                    p2 = (y + i == vb_pos    ) ? p0 : p2;
+                    p4 = (y + i <= vb_pos + 1) ? p2 : p4;
+                    p6 = (y + i <= vb_pos + 2) ? p4 : p6;
+
+                    p1 = (y + i == vb_pos    ) ? p0 : p1;
+                    p3 = (y + i <= vb_pos + 1) ? p1 : p3;
+                    p5 = (y + i <= vb_pos + 2) ? p3 : p5;
+                }
+
+                for (int j = 0; j < ALF_BLOCK_SIZE; j++) {
+                    int sum = 0;
+                    const pixel curr = *p0;
+
+                    sum += filter[0]  * FUNC(alf_clip)(curr, p3[+0], p4[+0], clip[0]);
+                    sum += filter[1]  * FUNC(alf_clip)(curr, p1[+1], p2[-1], clip[1]);
+                    sum += filter[2]  * FUNC(alf_clip)(curr, p1[+0], p2[+0], clip[2]);
+                    sum += filter[3]  * FUNC(alf_clip)(curr, p1[-1], p2[+1], clip[3]);
+                    sum += filter[4]  * FUNC(alf_clip)(curr, p0[+2], p0[-2], clip[4]);
+                    sum += filter[5]  * FUNC(alf_clip)(curr, p0[+1], p0[-1], clip[5]);
+
+                    if (!is_near_vb)
+                        sum = (sum + offset) >> shift;
+                    else
+                        sum = (sum + (1 << ((shift + 3) - 1))) >> (shift + 3);
+                    sum += curr;
+                    dst[j] = CLIP(sum);
+
+                    p0++;
+                    p1++;
+                    p2++;
+                    p3++;
+                    p4++;
+                    p5++;
+                    p6++;
+                }
+            }
+        }
+    }
+}
+
+static void FUNC(alf_filter_cc)(uint8_t *_dst, ptrdiff_t dst_stride, const uint8_t *_luma, const ptrdiff_t luma_stride,
+    const int width, const int height, const int hs, const int vs, const int16_t *filter, const int vb_pos)
+{
+    const ptrdiff_t stride = luma_stride / sizeof(pixel);
+
+    dst_stride /= sizeof(pixel);
+
+    for (int y = 0; y < height; y++) {
+        for (int x = 0; x < width; x++) {
+            int sum = 0;
+            pixel *dst  = (pixel *)_dst  + y * dst_stride + x;
+            const pixel *src  = (pixel *)_luma + (y << vs) * stride + (x << hs);
+
+            const pixel *s0 = src - stride;
+            const pixel *s1 = src;
+            const pixel *s2 = src + stride;
+            const pixel *s3 = src + 2 * stride;
+
+            const int pos = y << vs;
+            if (!vs && (pos == vb_pos || pos == vb_pos + 1))
+                continue;
+
+            if (pos == (vb_pos - 2) || pos == (vb_pos + 1))
+                s3 = s2;
+            else  if (pos == (vb_pos - 1) || pos == vb_pos)
+                s3 = s2 = s0 = s1;
+
+
+            sum += filter[0] * (*s0 - *src);
+            sum += filter[1] * (*(s1 - 1) - *src);
+            sum += filter[2] * (*(s1 + 1) - *src);
+            sum += filter[3] * (*(s2 - 1) - *src);
+            sum += filter[4] * (*s2 - *src);
+            sum += filter[5] * (*(s2 + 1) - *src);
+            sum += filter[6] * (*s3 - *src);
+            sum = av_clip((sum + 64) >> 7, -(1 << (BIT_DEPTH - 1)), (1 << (BIT_DEPTH - 1)) - 1);
+            sum += *dst;
+            *dst = av_clip_pixel(sum);
+        }
+    }
+}
+
+#define ALF_DIR_VERT        0
+#define ALF_DIR_HORZ        1
+#define ALF_DIR_DIGA0       2
+#define ALF_DIR_DIGA1       3
+
+static void FUNC(alf_get_idx)(int *class_idx, int *transpose_idx, const int *sum, const int ac)
+{
+    static const int arg_var[] = {0, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4 };
+
+    int hv0, hv1, dir_hv, d0, d1, dir_d, hvd1, hvd0, sum_hv, dir1;
+
+    dir_hv = sum[ALF_DIR_VERT] <= sum[ALF_DIR_HORZ];
+    hv1    = FFMAX(sum[ALF_DIR_VERT], sum[ALF_DIR_HORZ]);
+    hv0    = FFMIN(sum[ALF_DIR_VERT], sum[ALF_DIR_HORZ]);
+
+    dir_d  = sum[ALF_DIR_DIGA0] <= sum[ALF_DIR_DIGA1];
+    d1     = FFMAX(sum[ALF_DIR_DIGA0], sum[ALF_DIR_DIGA1]);
+    d0     = FFMIN(sum[ALF_DIR_DIGA0], sum[ALF_DIR_DIGA1]);
+
+    //promote to avoid overflow
+    dir1 = (uint64_t)d1 * hv0 <= (uint64_t)hv1 * d0;
+    hvd1 = dir1 ? hv1 : d1;
+    hvd0 = dir1 ? hv0 : d0;
+
+    sum_hv = sum[ALF_DIR_HORZ] + sum[ALF_DIR_VERT];
+    *class_idx = arg_var[av_clip_uintp2(sum_hv * ac >> (BIT_DEPTH - 1), 4)];
+    if (hvd1 * 2 > 9 * hvd0)
+        *class_idx += ((dir1 << 1) + 2) * 5;
+    else if (hvd1 > 2 * hvd0)
+        *class_idx += ((dir1 << 1) + 1) * 5;
+
+    *transpose_idx = dir_d * 2 + dir_hv;
+}
+
+static void FUNC(alf_classify)(int *class_idx, int *transpose_idx,
+    const uint8_t *_src, const ptrdiff_t _src_stride, const int width, const int height,
+    const int vb_pos, int *gradient_tmp)
+{
+    int *grad;
+
+    const int h = height + ALF_GRADIENT_BORDER * 2;
+    const int w = width  + ALF_GRADIENT_BORDER * 2;
+    const int size = (ALF_BLOCK_SIZE + ALF_GRADIENT_BORDER * 2) / ALF_GRADIENT_STEP;
+    const int gstride = (w / ALF_GRADIENT_STEP) * ALF_NUM_DIR;
+
+    const pixel *src           = (const pixel *)_src;
+    const ptrdiff_t src_stride = _src_stride / sizeof(pixel);
+    src -= (ALF_GRADIENT_BORDER + 1) * src_stride + ALF_GRADIENT_BORDER;
+
+    grad = gradient_tmp;
+    for (int y = 0; y < h; y += ALF_GRADIENT_STEP) {
+        const pixel *s0  = src + y * src_stride;
+        const pixel *s1  = s0 + src_stride;
+        const pixel *s2  = s1 + src_stride;
+        const pixel *s3  = s2 + src_stride;
+
+        if (y == vb_pos)          //above
+            s3 = s2;
+        else if (y == vb_pos + ALF_GRADIENT_BORDER)
+            s0 = s1;
+
+        for (int x = 0; x < w; x += ALF_GRADIENT_STEP) {
+            //two points a time
+            const pixel *a0  = s0 + x;
+            const pixel *p0  = s1 + x;
+            const pixel *b0  = s2 + x;
+            const int val0   = (*p0) << 1;
+
+            const pixel *a1  = s1 + x + 1;
+            const pixel *p1  = s2 + x + 1;
+            const pixel *b1  = s3 + x + 1;
+            const int val1   = (*p1) << 1;
+
+            grad[ALF_DIR_VERT]  = FFABS(val0 - *a0 - *b0) + FFABS(val1 - *a1 - *b1);
+            grad[ALF_DIR_HORZ]  = FFABS(val0 - *(p0 - 1) - *(p0 + 1)) + FFABS(val1 - *(p1 - 1) - *(p1 + 1));
+            grad[ALF_DIR_DIGA0] = FFABS(val0 - *(a0 - 1) - *(b0 + 1)) + FFABS(val1 - *(a1 - 1) - *(b1 + 1));
+            grad[ALF_DIR_DIGA1] = FFABS(val0 - *(a0 + 1) - *(b0 - 1)) + FFABS(val1 - *(a1 + 1) - *(b1 - 1));
+            grad += ALF_NUM_DIR;
+        }
+    }
+
+    for (int y = 0; y < height ; y += ALF_BLOCK_SIZE ) {
+        int start = 0;
+        int end   = (ALF_BLOCK_SIZE + ALF_GRADIENT_BORDER * 2) / ALF_GRADIENT_STEP;
+        int ac    = 2;
+        if (y + ALF_BLOCK_SIZE == vb_pos) {
+            end -= ALF_GRADIENT_BORDER / ALF_GRADIENT_STEP;
+            ac = 3;
+        } else if (y == vb_pos) {
+            start += ALF_GRADIENT_BORDER / ALF_GRADIENT_STEP;
+            ac = 3;
+        }
+        for (int x = 0; x < width; x += ALF_BLOCK_SIZE) {
+            const int xg = x / ALF_GRADIENT_STEP;
+            const int yg = y / ALF_GRADIENT_STEP;
+            int sum[ALF_NUM_DIR] = { 0 };
+
+            grad = gradient_tmp + (yg + start) * gstride + xg * ALF_NUM_DIR;
+            //todo: optimize this loop
+            for (int i = start; i < end; i++) {
+                for (int j = 0; j < size; j++) {
+                    sum[ALF_DIR_VERT]  += grad[ALF_DIR_VERT];
+                    sum[ALF_DIR_HORZ]  += grad[ALF_DIR_HORZ];
+                    sum[ALF_DIR_DIGA0] += grad[ALF_DIR_DIGA0];
+                    sum[ALF_DIR_DIGA1] += grad[ALF_DIR_DIGA1];
+                    grad += ALF_NUM_DIR;
+                }
+                grad += gstride - size * ALF_NUM_DIR;
+            }
+            FUNC(alf_get_idx)(class_idx, transpose_idx, sum, ac);
+
+            class_idx++;
+            transpose_idx++;
+        }
+    }
+
+}
+
+static void FUNC(alf_recon_coeff_and_clip)(int16_t *coeff, int16_t *clip,
+    const int *class_idx, const int *transpose_idx, const int size,
+    const int16_t *coeff_set, const uint8_t *clip_idx_set, const uint8_t *class_to_filt)
+{
+    const static int index[][ALF_NUM_COEFF_LUMA] = {
+        { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11 },
+        { 9, 4, 10, 8, 1, 5, 11, 7, 3, 0, 2, 6 },
+        { 0, 3, 2, 1, 8, 7, 6, 5, 4, 9, 10, 11 },
+        { 9, 8, 10, 4, 3, 7, 11, 5, 1, 0, 2, 6 },
+    };
+
+    const int16_t clip_set[] = {
+        1 << BIT_DEPTH, 1 << (BIT_DEPTH - 3), 1 << (BIT_DEPTH - 5), 1 << (BIT_DEPTH - 7)
+    };
+
+    for (int i = 0; i < size; i++) {
+        const int16_t  *src_coeff = coeff_set + class_to_filt[class_idx[i]] * ALF_NUM_COEFF_LUMA;
+        const uint8_t *clip_idx  = clip_idx_set + class_idx[i] * ALF_NUM_COEFF_LUMA;
+
+        for (int j = 0; j < ALF_NUM_COEFF_LUMA; j++) {
+            const int idx = index[transpose_idx[i]][j];
+            *coeff++ = src_coeff[idx];
+            *clip++  = clip_set[clip_idx[idx]];
+        }
+    }
+}
+
+#undef ALF_DIR_HORZ
+#undef ALF_DIR_VERT
+#undef ALF_DIR_DIGA0
+#undef ALF_DIR_DIGA1
+
+// line zero
+#define P7 pix[-8 * xstride]
+#define P6 pix[-7 * xstride]
+#define P5 pix[-6 * xstride]
+#define P4 pix[-5 * xstride]
+#define P3 pix[-4 * xstride]
+#define P2 pix[-3 * xstride]
+#define P1 pix[-2 * xstride]
+#define P0 pix[-1 * xstride]
+#define Q0 pix[0 * xstride]
+#define Q1 pix[1 * xstride]
+#define Q2 pix[2 * xstride]
+#define Q3 pix[3 * xstride]
+#define Q4 pix[4 * xstride]
+#define Q5 pix[5 * xstride]
+#define Q6 pix[6 * xstride]
+#define Q7 pix[7 * xstride]
+#define P(x) pix[(-(x)-1) * xstride]
+#define Q(x) pix[(x)      * xstride]
+
+// line three. used only for deblocking decision
+#define TP7 pix[-8 * xstride + 3 * ystride]
+#define TP6 pix[-7 * xstride + 3 * ystride]
+#define TP5 pix[-6 * xstride + 3 * ystride]
+#define TP4 pix[-5 * xstride + 3 * ystride]
+#define TP3 pix[-4 * xstride + 3 * ystride]
+#define TP2 pix[-3 * xstride + 3 * ystride]
+#define TP1 pix[-2 * xstride + 3 * ystride]
+#define TP0 pix[-1 * xstride + 3 * ystride]
+#define TQ0 pix[0  * xstride + 3 * ystride]
+#define TQ1 pix[1  * xstride + 3 * ystride]
+#define TQ2 pix[2  * xstride + 3 * ystride]
+#define TQ3 pix[3  * xstride + 3 * ystride]
+#define TQ4 pix[4  * xstride + 3 * ystride]
+#define TQ5 pix[5  * xstride + 3 * ystride]
+#define TQ6 pix[6  * xstride + 3 * ystride]
+#define TQ7 pix[7  * xstride + 3 * ystride]
+#define TP(x) pix[(-(x)-1) * xstride + 3 * ystride]
+#define TQ(x) pix[(x)      * xstride + 3 * ystride]
+
+#define FP3 pix[-4 * xstride + 1 * ystride]
+#define FP2 pix[-3 * xstride + 1 * ystride]
+#define FP1 pix[-2 * xstride + 1 * ystride]
+#define FP0 pix[-1 * xstride + 1 * ystride]
+#define FQ0 pix[0  * xstride + 1 * ystride]
+#define FQ1 pix[1  * xstride + 1 * ystride]
+#define FQ2 pix[2  * xstride + 1 * ystride]
+#define FQ3 pix[3  * xstride + 1 * ystride]
+
+static void FUNC(loop_filter_luma_large)(pixel *pix, const ptrdiff_t xstride, const ptrdiff_t ystride, const int32_t tc,
+    const uint8_t no_p, const uint8_t no_q, const uint8_t max_len_p, const uint8_t max_len_q)
+{
+    for (int d = 0; d < 4; d++) {
+        const int p6 = P6;
+        const int p5 = P5;
+        const int p4 = P4;
+        const int p3 = P3;
+        const int p2 = P2;
+        const int p1 = P1;
+        const int p0 = P0;
+        const int q0 = Q0;
+        const int q1 = Q1;
+        const int q2 = Q2;
+        const int q3 = Q3;
+        const int q4 = Q4;
+        const int q5 = Q5;
+        const int q6 = Q6;
+        int m;
+        if (max_len_p == 5 && max_len_q == 5)
+            m = (p4 + p3 + 2 * (p2 + p1 + p0 + q0 + q1 + q2) + q3 + q4 + 8) >> 4;
+        else if (max_len_p == max_len_q)
+            m = (p6 + p5 + p4 + p3 + p2 + p1 + 2 * (p0 + q0) + q1 + q2 + q3 + q4 + q5 + q6 + 8) >> 4;
+        else if (max_len_p + max_len_q == 12)
+            m = (p5 + p4 + p3 + p2 + 2 * (p1 + p0 + q0 + q1) + q2 + q3 + q4 + q5 + 8) >> 4;
+        else if (max_len_p + max_len_q == 8)
+            m = (p3 + p2 + p1 + p0 + q0 + q1 + q2 + q3 + 4) >> 3;
+        else if (max_len_q == 7)
+            m = (2 * (p2 + p1 + p0 + q0) + p0 + p1 + q1 + q2 + q3 + q4 + q5 + q6 + 8) >> 4;
+        else
+            m = (p6 + p5 + p4 + p3 + p2 + p1 + 2 * (q2 + q1 + q0 + p0) + q0 + q1 + 8) >> 4;
+        if (!no_p) {
+            const int refp = (P(max_len_p) + P(max_len_p - 1) + 1) >> 1;
+            if (max_len_p == 3) {
+                P0 = p0 + av_clip(((m * 53 + refp * 11 + 32) >> 6) - p0, -(tc * 6 >> 1), (tc * 6 >> 1));
+                P1 = p1 + av_clip(((m * 32 + refp * 32 + 32) >> 6) - p1, -(tc * 4 >> 1), (tc * 4 >> 1));
+                P2 = p2 + av_clip(((m * 11 + refp * 53 + 32) >> 6) - p2, -(tc * 2 >> 1), (tc * 2 >> 1));
+            } else if (max_len_p == 5) {
+                P0 = p0 + av_clip(((m * 58 + refp *  6 + 32) >> 6) - p0, -(tc * 6 >> 1), (tc * 6 >> 1));
+                P1 = p1 + av_clip(((m * 45 + refp * 19 + 32) >> 6) - p1, -(tc * 5 >> 1), (tc * 5 >> 1));
+                P2 = p2 + av_clip(((m * 32 + refp * 32 + 32) >> 6) - p2, -(tc * 4 >> 1), (tc * 4 >> 1));
+                P3 = p3 + av_clip(((m * 19 + refp * 45 + 32) >> 6) - p3, -(tc * 3 >> 1), (tc * 3 >> 1));
+                P4 = p4 + av_clip(((m *  6 + refp * 58 + 32) >> 6) - p4, -(tc * 2 >> 1), (tc * 2 >> 1));
+            } else {
+                P0 = p0 + av_clip(((m * 59 + refp *  5 + 32) >> 6) - p0, -(tc * 6 >> 1), (tc * 6 >> 1));
+                P1 = p1 + av_clip(((m * 50 + refp * 14 + 32) >> 6) - p1, -(tc * 5 >> 1), (tc * 5 >> 1));
+                P2 = p2 + av_clip(((m * 41 + refp * 23 + 32) >> 6) - p2, -(tc * 4 >> 1), (tc * 4 >> 1));
+                P3 = p3 + av_clip(((m * 32 + refp * 32 + 32) >> 6) - p3, -(tc * 3 >> 1), (tc * 3 >> 1));
+                P4 = p4 + av_clip(((m * 23 + refp * 41 + 32) >> 6) - p4, -(tc * 2 >> 1), (tc * 2 >> 1));
+                P5 = p5 + av_clip(((m * 14 + refp * 50 + 32) >> 6) - p5, -(tc * 1 >> 1), (tc * 1 >> 1));
+                P6 = p6 + av_clip(((m *  5 + refp * 59 + 32) >> 6) - p6, -(tc * 1 >> 1), (tc * 1 >> 1));
+            }
+        }
+        if (!no_q) {
+            const int refq = (Q(max_len_q) + Q(max_len_q - 1) + 1) >> 1;
+            if (max_len_q == 3) {
+                Q0 = q0 + av_clip(((m * 53 + refq * 11 + 32) >> 6) - q0,  -(tc * 6 >> 1), (tc * 6 >> 1));
+                Q1 = q1 + av_clip(((m * 32 + refq * 32 + 32) >> 6) - q1,  -(tc * 4 >> 1), (tc * 4 >> 1));
+                Q2 = q2 + av_clip(((m * 11 + refq * 53 + 32) >> 6) - q2,  -(tc * 2 >> 1), (tc * 2 >> 1));
+            } else if (max_len_q == 5) {
+                Q0 = q0 + av_clip(((m * 58 + refq *  6 + 32) >> 6) - q0, -(tc * 6 >> 1), (tc * 6 >> 1));
+                Q1 = q1 + av_clip(((m * 45 + refq * 19 + 32) >> 6) - q1, -(tc * 5 >> 1), (tc * 5 >> 1));
+                Q2 = q2 + av_clip(((m * 32 + refq * 32 + 32) >> 6) - q2, -(tc * 4 >> 1), (tc * 4 >> 1));
+                Q3 = q3 + av_clip(((m * 19 + refq * 45 + 32) >> 6) - q3, -(tc * 3 >> 1), (tc * 3 >> 1));
+                Q4 = q4 + av_clip(((m *  6 + refq * 58 + 32) >> 6) - q4, -(tc * 2 >> 1), (tc * 2 >> 1));
+            } else {
+                Q0 = q0 + av_clip(((m * 59 + refq *  5 + 32) >> 6) - q0, -(tc * 6 >> 1), (tc * 6 >> 1));
+                Q1 = q1 + av_clip(((m * 50 + refq * 14 + 32) >> 6) - q1, -(tc * 5 >> 1), (tc * 5 >> 1));
+                Q2 = q2 + av_clip(((m * 41 + refq * 23 + 32) >> 6) - q2, -(tc * 4 >> 1), (tc * 4 >> 1));
+                Q3 = q3 + av_clip(((m * 32 + refq * 32 + 32) >> 6) - q3, -(tc * 3 >> 1), (tc * 3 >> 1));
+                Q4 = q4 + av_clip(((m * 23 + refq * 41 + 32) >> 6) - q4, -(tc * 2 >> 1), (tc * 2 >> 1));
+                Q5 = q5 + av_clip(((m * 14 + refq * 50 + 32) >> 6) - q5, -(tc * 1 >> 1), (tc * 1 >> 1));
+                Q6 = q6 + av_clip(((m *  5 + refq * 59 + 32) >> 6) - q6, -(tc * 1 >> 1), (tc * 1 >> 1));
+            }
+
+        }
+        pix += ystride;
+    }
+}
+
+static void FUNC(loop_filter_luma_strong)(pixel *pix, const ptrdiff_t xstride, const ptrdiff_t ystride, const int32_t tc,
+    const uint8_t no_p, const uint8_t no_q)
+{
+    const int tc2 = tc << 1;
+    const int tc3 = tc * 3;
+    for (int d = 0; d < 4; d++) {
+        const int p3 = P3;
+        const int p2 = P2;
+        const int p1 = P1;
+        const int p0 = P0;
+        const int q0 = Q0;
+        const int q1 = Q1;
+        const int q2 = Q2;
+        const int q3 = Q3;
+        if (!no_p) {
+            P0 = p0 + av_clip(((p2 + 2 * p1 + 2 * p0 + 2 * q0 + q1 + 4) >> 3) - p0, -tc3, tc3);
+            P1 = p1 + av_clip(((p2 + p1 + p0 + q0 + 2) >> 2) - p1, -tc2, tc2);
+            P2 = p2 + av_clip(((2 * p3 + 3 * p2 + p1 + p0 + q0 + 4) >> 3) - p2, -tc, tc);
+        }
+        if (!no_q) {
+            Q0 = q0 + av_clip(((p1 + 2 * p0 + 2 * q0 + 2 * q1 + q2 + 4) >> 3) - q0, -tc3, tc3);
+            Q1 = q1 + av_clip(((p0 + q0 + q1 + q2 + 2) >> 2) - q1, -tc2, tc2);
+            Q2 = q2 + av_clip(((2 * q3 + 3 * q2 + q1 + q0 + p0 + 4) >> 3) - q2, -tc, tc);
+        }
+        pix += ystride;
+    }
+}
+
+static void FUNC(loop_filter_luma_weak)(pixel *pix, const ptrdiff_t xstride, const ptrdiff_t ystride,
+    const int32_t tc, const int32_t beta, const uint8_t no_p, const uint8_t no_q, const int nd_p, const int nd_q)
+{
+    const int tc_2 = tc >> 1;
+    for (int d = 0; d < 4; d++) {
+        const int p2 = P2;
+        const int p1 = P1;
+        const int p0 = P0;
+        const int q0 = Q0;
+        const int q1 = Q1;
+        const int q2 = Q2;
+        int delta0 = (9 * (q0 - p0) - 3 * (q1 - p1) + 8) >> 4;
+        if (abs(delta0) < 10 * tc) {
+            delta0 = av_clip(delta0, -tc, tc);
+            if (!no_p)
+                P0 = av_clip_pixel(p0 + delta0);
+            if (!no_q)
+                Q0 = av_clip_pixel(q0 - delta0);
+            if (!no_p && nd_p > 1) {
+                const int deltap1 = av_clip((((p2 + p0 + 1) >> 1) - p1 + delta0) >> 1, -tc_2, tc_2);
+                P1 = av_clip_pixel(p1 + deltap1);
+            }
+            if (!no_q && nd_q > 1) {
+                const int deltaq1 = av_clip((((q2 + q0 + 1) >> 1) - q1 - delta0) >> 1, -tc_2, tc_2);
+                Q1 = av_clip_pixel(q1 + deltaq1);
+            }
+        }
+        pix += ystride;
+    }
+
+}
+
+static void FUNC(vvc_loop_filter_luma)(uint8_t* _pix, ptrdiff_t _xstride, ptrdiff_t _ystride,
+    const int32_t *_beta, const int32_t *_tc, const uint8_t *_no_p, const uint8_t *_no_q,
+    const uint8_t *_max_len_p, const uint8_t *_max_len_q, int hor_ctu_edge)
+{
+    const ptrdiff_t xstride = _xstride / sizeof(pixel);
+    const ptrdiff_t ystride = _ystride / sizeof(pixel);
+
+    for (int i = 0; i < 2; i++) {
+        pixel* pix      = (pixel*)_pix + i * 4 * ystride;
+        const int dp0   = abs(P2 - 2 * P1 + P0);
+        const int dq0   = abs(Q2 - 2 * Q1 + Q0);
+        const int dp3   = abs(TP2 - 2 * TP1 + TP0);
+        const int dq3   = abs(TQ2 - 2 * TQ1 + TQ0);
+        const int d0    = dp0 + dq0;
+        const int d3    = dp3 + dq3;
+#if BIT_DEPTH < 10
+        const int tc    = (_tc[i] + (1 << (9 - BIT_DEPTH))) >> (10 - BIT_DEPTH);
+#else
+        const int tc    = _tc[i] << (BIT_DEPTH - 10);
+#endif
+        const int tc25  = ((tc * 5 + 1) >> 1);
+
+        const int no_p  = _no_p[i];
+        const int no_q  = _no_q[i];
+
+        int max_len_p   = _max_len_p[i];
+        int max_len_q   = _max_len_q[i];
+
+        const int large_p = (max_len_p > 3 && !hor_ctu_edge);
+        const int large_q = max_len_q > 3;
+        const int beta = _beta[i] << BIT_DEPTH - 8;
+
+        const int beta_3 = beta >> 3;
+        const int beta_2 = beta >> 2;
+
+        if (!tc)
+            continue;
+
+        if (large_p || large_q) {
+            const int dp0l = large_p ? ((dp0 + abs(P5 - 2 * P4 + P3) + 1) >> 1) : dp0;
+            const int dq0l = large_q ? ((dq0 + abs(Q5 - 2 * Q4 + Q3) + 1) >> 1) : dq0;
+            const int dp3l = large_p ? ((dp3 + abs(TP5 - 2 * TP4 + TP3) + 1) >> 1) : dp3;
+            const int dq3l = large_q ? ((dq3 + abs(TQ5 - 2 * TQ4 + TQ3) + 1) >> 1) : dq3;
+            const int d0l = dp0l + dq0l;
+            const int d3l = dp3l + dq3l;
+            const int beta53 = beta * 3 >> 5;
+            const int beta_4 = beta >> 4;
+            max_len_p = large_p ? max_len_p : 3;
+            max_len_q = large_q ? max_len_q : 3;
+
+            if (d0l + d3l < beta) {
+                const int sp0l = abs(P3 - P0) + (max_len_p == 7 ? abs(P7 - P6 - P5 + P4) : 0);
+                const int sq0l = abs(Q0 - Q3) + (max_len_q == 7 ? abs(Q4 - Q5 - Q6 + Q7) : 0);
+                const int sp3l = abs(TP3 - TP0) + (max_len_p == 7 ? abs(TP7 - TP6 - TP5 + TP4) : 0);
+                const int sq3l = abs(TQ0 - TQ3) + (max_len_q == 7 ? abs(TQ4 - TQ5 - TQ6 + TQ7) : 0);
+                const int sp0 = large_p ? ((sp0l + abs(P3 -   P(max_len_p)) + 1) >> 1) : sp0l;
+                const int sp3 = large_p ? ((sp3l + abs(TP3 - TP(max_len_p)) + 1) >> 1) : sp3l;
+                const int sq0 = large_q ? ((sq0l + abs(Q3 -   Q(max_len_q)) + 1) >> 1) : sq0l;
+                const int sq3 = large_q ? ((sq3l + abs(TQ3 - TQ(max_len_q)) + 1) >> 1) : sq3l;
+                if (sp0 + sq0 < beta53 && abs(P0 - Q0) < tc25 &&
+                    sp3 + sq3 < beta53 && abs(TP0 - TQ0) < tc25 &&
+                    (d0l << 1) < beta_4 && (d3l << 1) < beta_4) {
+                    FUNC(loop_filter_luma_large)(pix, xstride, ystride, tc, no_p, no_q, max_len_p, max_len_q);
+                    continue;
+                }
+            }
+        }
+        if (d0 + d3 < beta) {
+            if (max_len_p > 2 && max_len_q > 2 &&
+                abs(P3 - P0) + abs(Q3 - Q0) < beta_3 && abs(P0 - Q0) < tc25 &&
+                abs(TP3 - TP0) + abs(TQ3 - TQ0) < beta_3 && abs(TP0 - TQ0) < tc25 &&
+                (d0 << 1) < beta_2 && (d3 << 1) < beta_2) {
+                FUNC(loop_filter_luma_strong)(pix, xstride, ystride, tc, no_p, no_q);
+            } else { // weak filtering
+                int nd_p = 1;
+                int nd_q = 1;
+                if (max_len_p > 1 && max_len_q > 1) {
+                    if (dp0 + dp3 < ((beta + (beta >> 1)) >> 3))
+                        nd_p = 2;
+                    if (dq0 + dq3 < ((beta + (beta >> 1)) >> 3))
+                        nd_q = 2;
+                }
+                FUNC(loop_filter_luma_weak)(pix, xstride, ystride, tc, beta, no_p, no_q, nd_p, nd_q);
+            }
+        }
+    }
+}
+
+static void FUNC(loop_filter_chroma_strong)(pixel *pix, const ptrdiff_t xstride, const ptrdiff_t ystride,
+    const int size, const int32_t tc, const uint8_t no_p, const uint8_t no_q)
+{
+    for (int d = 0; d < size; d++) {
+        const int p3 = P3;
+        const int p2 = P2;
+        const int p1 = P1;
+        const int p0 = P0;
+        const int q0 = Q0;
+        const int q1 = Q1;
+        const int q2 = Q2;
+        const int q3 = Q3;
+        if (!no_p) {
+            P0 = av_clip((p3 + p2 + p1 + 2 * p0 + q0 + q1 + q2 + 4) >> 3, p0 - tc, p0 + tc);
+            P1 = av_clip((2 * p3 + p2 + 2 * p1 + p0 + q0 + q1 + 4) >> 3, p1 - tc, p1 + tc);
+            P2 = av_clip((3 * p3 + 2 * p2 + p1 + p0 + q0 + 4) >> 3, p2 - tc, p2 + tc );
+        }
+        if (!no_q) {
+            Q0 = av_clip((p2 + p1 + p0 + 2 * q0 + q1 + q2 + q3 + 4) >> 3, q0 - tc, q0 + tc);
+            Q1 = av_clip((p1 + p0 + q0 + 2 * q1 + q2 + 2 * q3 + 4) >> 3, q1 - tc, q1 + tc);
+            Q2 = av_clip((p0 + q0 + q1 + 2 * q2 + 3 * q3 + 4) >> 3, q2 - tc, q2 + tc);
+        }
+        pix += ystride;
+    }
+}
+
+static void FUNC(loop_filter_chroma_strong_one_side)(pixel *pix, const ptrdiff_t xstride, const ptrdiff_t ystride,
+    const int size, const int32_t tc, const uint8_t no_p, const uint8_t no_q)
+{
+    for (int d = 0; d < size; d++) {
+        const int p1 = P1;
+        const int p0 = P0;
+        const int q0 = Q0;
+        const int q1 = Q1;
+        const int q2 = Q2;
+        const int q3 = Q3;
+        if (!no_p) {
+            P0 = av_clip((3 * p1 + 2 * p0 + q0 + q1 + q2 + 4) >> 3, p0 - tc, p0 + tc);
+        }
+        if (!no_q) {
+            Q0 = av_clip((2 * p1 + p0 + 2 * q0 + q1 + q2 + q3 + 4) >> 3, q0 - tc, q0 + tc);
+            Q1 = av_clip((p1 + p0 + q0 + 2 * q1 + q2 + 2 * q3 + 4) >> 3, q1 - tc, q1 + tc);
+            Q2 = av_clip((p0 + q0 + q1 + 2 * q2 + 3 * q3 + 4) >> 3, q2 - tc, q2 + tc);
+        }
+        pix += ystride;
+    }
+}
+
+static void FUNC(loop_filter_chroma_weak)(pixel *pix, const ptrdiff_t xstride, const ptrdiff_t ystride,
+    const int size, const int32_t tc, const uint8_t no_p, const uint8_t no_q)
+{
+    for (int d = 0; d < size; d++) {
+        int delta0;
+        const int p1 = P1;
+        const int p0 = P0;
+        const int q0 = Q0;
+        const int q1 = Q1;
+        delta0 = av_clip((((q0 - p0) * 4) + p1 - q1 + 4) >> 3, -tc, tc);
+        if (!no_p)
+            P0 = av_clip_pixel(p0 + delta0);
+        if (!no_q)
+            Q0 = av_clip_pixel(q0 - delta0);
+        pix += ystride;
+    }
+}
+
+static void FUNC(vvc_loop_filter_chroma)(uint8_t *_pix, const ptrdiff_t  _xstride, const ptrdiff_t _ystride,
+    const int32_t *_beta, const int32_t *_tc, const uint8_t *_no_p, const uint8_t *_no_q,
+    const uint8_t *_max_len_p, const uint8_t *_max_len_q, const int shift)
+{
+    const ptrdiff_t xstride = _xstride / sizeof(pixel);
+    const ptrdiff_t ystride = _ystride / sizeof(pixel);
+    const int size          = shift ? 2 : 4;
+    const int end           = 8 / size;         // 8 samples a loop
+
+    for (int i = 0; i < end; i++) {
+        pixel *pix          = (pixel *)_pix + i * size * ystride;
+        const uint8_t no_p  = _no_p[i];
+        const uint8_t no_q  = _no_q[i];
+        const int beta      = _beta[i] << (BIT_DEPTH - 8);
+        const int beta_3    = beta >> 3;
+        const int beta_2    = beta >> 2;
+
+#if BIT_DEPTH < 10
+        const int tc = (_tc[i] + (1 << (9 - BIT_DEPTH))) >> (10 - BIT_DEPTH);
+#else
+        const int tc = _tc[i] << (BIT_DEPTH - 10);
+#endif
+        const int tc25      = ((tc * 5 + 1) >> 1);
+
+        uint8_t max_len_p   = _max_len_p[i];
+        uint8_t max_len_q   = _max_len_q[i];
+
+        if (!max_len_p || !max_len_q || !tc)
+            continue;
+
+        if (max_len_q == 3){
+            const int p1n  = shift ? FP1 : TP1;
+            const int p2n = max_len_p == 1 ? p1n : (shift ? FP2 : TP2);
+            const int p0n  = shift ? FP0 : TP0;
+            const int q0n  = shift ? FQ0 : TQ0;
+            const int q1n  = shift ? FQ1 : TQ1;
+            const int q2n  = shift ? FQ2 : TQ2;
+            const int p3   = max_len_p == 1 ? P1 : P3;
+            const int p2   = max_len_p == 1 ? P1 : P2;
+            const int p1   = P1;
+            const int p0   = P0;
+            const int dp0  = abs(p2 - 2 * p1 + p0);
+            const int dq0  = abs(Q2 - 2 * Q1 + Q0);
+
+            const int dp1 = abs(p2n - 2 * p1n + p0n);
+            const int dq1 = abs(q2n - 2 * q1n + q0n);
+            const int d0  = dp0 + dq0;
+            const int d1  = dp1 + dq1;
+
+            if (d0 + d1 < beta) {
+                const int p3n = max_len_p == 1 ? p1n : (shift ? FP3 : TP3);
+                const int q3n = shift ? FQ3 : TQ3;
+                const int dsam0 = (d0 << 1) < beta_2 && (abs(p3 - p0) + abs(Q0 - Q3)     < beta_3) &&
+                    abs(p0 - Q0)   < tc25;
+                const int dsam1 = (d1 << 1) < beta_2 && (abs(p3n - p0n) + abs(q0n - q3n) < beta_3) &&
+                    abs(p0n - q0n) < tc25;
+                if (!dsam0 || !dsam1)
+                    max_len_p = max_len_q = 1;
+            } else {
+                max_len_p = max_len_q = 1;
+            }
+        }
+
+        if (max_len_p == 3 && max_len_q == 3)
+            FUNC(loop_filter_chroma_strong)(pix, xstride, ystride, size, tc, no_p, no_q);
+        else if (max_len_q == 3)
+            FUNC(loop_filter_chroma_strong_one_side)(pix, xstride, ystride, size, tc, no_p, no_q);
+        else
+            FUNC(loop_filter_chroma_weak)(pix, xstride, ystride, size, tc, no_p, no_q);
+    }
+}
+
+static void FUNC(vvc_h_loop_filter_chroma)(uint8_t *pix, ptrdiff_t stride,
+    const int32_t *beta, const int32_t *tc, const uint8_t *no_p, const uint8_t *no_q,
+    const uint8_t *max_len_p, const uint8_t *max_len_q, int shift)
+{
+    FUNC(vvc_loop_filter_chroma)(pix, stride, sizeof(pixel), beta, tc,
+        no_p, no_q, max_len_p, max_len_q, shift);
+}
+
+static void FUNC(vvc_v_loop_filter_chroma)(uint8_t *pix, ptrdiff_t stride,
+    const int32_t *beta, const int32_t *tc, const uint8_t *no_p, const uint8_t *no_q,
+    const uint8_t *max_len_p, const uint8_t *max_len_q, int shift)
+{
+    FUNC(vvc_loop_filter_chroma)(pix, sizeof(pixel), stride, beta, tc,
+        no_p, no_q,  max_len_p, max_len_q, shift);
+}
+
+static void FUNC(vvc_h_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride,
+    const int32_t *beta, const int32_t *tc, const uint8_t *no_p, const uint8_t *no_q,
+    const uint8_t *max_len_p, const uint8_t *max_len_q, int hor_ctu_edge)
+{
+    FUNC(vvc_loop_filter_luma)(pix, stride, sizeof(pixel), beta, tc,
+        no_p, no_q, max_len_p, max_len_q, hor_ctu_edge);
+}
+
+static void FUNC(vvc_v_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride,
+    const int32_t *beta, const int32_t *tc, const uint8_t *no_p, const uint8_t *no_q,
+    const uint8_t *max_len_p, const uint8_t *max_len_q, int hor_ctu_edge)
+{
+    FUNC(vvc_loop_filter_luma)(pix, sizeof(pixel), stride, beta, tc,
+        no_p, no_q, max_len_p, max_len_q, hor_ctu_edge);
+}
+
+static int FUNC(vvc_loop_ladf_level)(const uint8_t *_pix, const ptrdiff_t _xstride, const ptrdiff_t _ystride)
+{
+    const pixel *pix        = (pixel *)_pix;
+    const ptrdiff_t xstride = _xstride / sizeof(pixel);
+    const ptrdiff_t ystride = _ystride / sizeof(pixel);
+    return (P0 + TP0 + Q0 + TQ0) >> 2;
+}
+
+static int FUNC(vvc_h_loop_ladf_level)(const uint8_t *pix, ptrdiff_t stride)
+{
+    return FUNC(vvc_loop_ladf_level)(pix, stride, sizeof(pixel));
+}
+
+static int FUNC(vvc_v_loop_ladf_level)(const uint8_t *pix, ptrdiff_t stride)
+{
+    return FUNC(vvc_loop_ladf_level)(pix, sizeof(pixel), stride);
+}
+
+#undef P7
+#undef P6
+#undef P5
+#undef P4
+#undef P3
+#undef P2
+#undef P1
+#undef P0
+#undef Q0
+#undef Q1
+#undef Q2
+#undef Q3
+#undef Q4
+#undef Q5
+#undef Q6
+#undef Q7
+
+#undef TP7
+#undef TP6
+#undef TP5
+#undef TP4
+#undef TP3
+#undef TP2
+#undef TP1
+#undef TP0
+#undef TQ0
+#undef TQ1
+#undef TQ2
+#undef TQ3
+#undef TQ4
+#undef TQ5
+#undef TQ6
+#undef TQ7
+
+static void FUNC(ff_vvc_lmcs_dsp_init)(VVCLMCSDSPContext *const lmcs)
+{
+    lmcs->filter = FUNC(lmcs_filter_luma);
+}
+
+static void FUNC(ff_vvc_lf_dsp_init)(VVCLFDSPContext *const lf)
+{
+    lf->ladf_level[0]      = FUNC(vvc_h_loop_ladf_level);
+    lf->ladf_level[1]      = FUNC(vvc_v_loop_ladf_level);
+    lf->filter_luma[0]     = FUNC(vvc_h_loop_filter_luma);
+    lf->filter_luma[1]     = FUNC(vvc_v_loop_filter_luma);
+    lf->filter_chroma[0]   = FUNC(vvc_h_loop_filter_chroma);
+    lf->filter_chroma[1]   = FUNC(vvc_v_loop_filter_chroma);
+}
+
+static void FUNC(ff_vvc_sao_dsp_init)(VVCSAODSPContext *const sao)
+{
+    for (int i = 0; i < FF_ARRAY_ELEMS(sao->band_filter); i++)
+        sao->band_filter[i] = FUNC(sao_band_filter);
+    for (int i = 0; i < FF_ARRAY_ELEMS(sao->edge_filter); i++)
+        sao->edge_filter[i] = FUNC(sao_edge_filter);
+    sao->edge_restore[0] = FUNC(sao_edge_restore_0);
+    sao->edge_restore[1] = FUNC(sao_edge_restore_1);
+}
+
+static void FUNC(ff_vvc_alf_dsp_init)(VVCALFDSPContext *const alf)
+{
+    alf->filter[LUMA]    = FUNC(alf_filter_luma);
+    alf->filter[CHROMA]  = FUNC(alf_filter_chroma);
+    alf->filter_cc       = FUNC(alf_filter_cc);
+    alf->classify        = FUNC(alf_classify);
+    alf->recon_coeff_and_clip = FUNC(alf_recon_coeff_and_clip);
+}