diff mbox series

[FFmpeg-devel] lavc/pgssubdec: Correct rendering of palette updates.

Message ID CWLP123MB6857374E8CB6FF8E1BFF4940C6469@CWLP123MB6857.GBRP123.PROD.OUTLOOK.COM
State New
Headers show
Series [FFmpeg-devel] lavc/pgssubdec: Correct rendering of palette updates. | expand

Checks

Context Check Description
andriy/make_x86 success Make finished
andriy/make_fate_x86 success Make fate finished

Commit Message

cubi cibo May 25, 2023, 3:45 p.m. UTC
Reference decoders (BD players) have a graphic plane to store the
rendered output. That plane is only refreshed by a window segment.
It is evaluated with the last palette and reused on palette updates.

This patch adds a conditionally writable graphic plane. The decoder
now differentiates appropriately palette updates, new compositions,
and display sets that perform no visible actions.
---
 libavcodec/pgssubdec.c | 339 +++++++++++++++++++++++++----------------
 1 file changed, 210 insertions(+), 129 deletions(-)

Comments

cubi cibo Nov. 6, 2023, 3:50 p.m. UTC | #1
Impolitely pushing this out of oblivion for review.

The limited capabilities of FFmpeg HDMV_PGS decoder deters the usage of advanced functionalities in hobbyist and professional applications. E.g !40981 is paramount for in-spec special effects.
I have merged both !40981 and !41826 (this) in the following patch, may that be handy for review (probably not!): https://raw.githubusercontent.com/HandBrake/HandBrake/d35545dbc21713e89c6d9d0d3404557c26a0523c/contrib/ffmpeg/A22-add-cropping-and-multi-objects-palette-update-to-PGS.patch
Paul B Mahol Nov. 6, 2023, 9:36 p.m. UTC | #2
Please provide sample(s) so this can be validated.
cubi cibo Nov. 8, 2023, 5:06 p.m. UTC | #3
Sure.
!40981: ffmpeg/incoming/00019.m2ts  - multiplexed video and PG stream, courtesy of the author of !40981.
!41826: ffmpeg/incoming/graphicplane_test.sup
The behaviour observed on reference decoders is described in the associated text files.

I updated the aggregate patch to fix a bug introduced in !40981, caught by graphicplane_test.sup: "Normal Case" object redefinition was broken.
https://raw.githubusercontent.com/cubicibo/HandBrake/75ac11994fa4a32380d45c55a6add8344327773c/contrib/ffmpeg/A22-Add-cropping-and-graphic-plane-to-PGS-decoder.patch
Paul B Mahol Nov. 8, 2023, 6:25 p.m. UTC | #4
On Wed, Nov 8, 2023 at 6:07 PM cubicibo cubicibo <cubicibo@outlook.com>
wrote:

> Sure.
> !40981: ffmpeg/incoming/00019.m2ts  - multiplexed video and PG stream,
> courtesy of the author of !40981.
> !41826: ffmpeg/incoming/graphicplane_test.sup
>

Does those have direct links now?

The behaviour observed on reference decoders is described in the associated
> text files.
>
> I updated the aggregate patch to fix a bug introduced in !40981, caught by
> graphicplane_test.sup: "Normal Case" object redefinition was broken.
>
> https://raw.githubusercontent.com/cubicibo/HandBrake/75ac11994fa4a32380d45c55a6add8344327773c/contrib/ffmpeg/A22-Add-cropping-and-graphic-plane-to-PGS-decoder.patch
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
>
cubi cibo Nov. 8, 2023, 6:28 p.m. UTC | #5
My bad.
Append .txt to the URLs to get the matching test description.

!40981: https://streams.videolan.org/ffmpeg/incoming/00019.m2ts
!41826: https://streams.videolan.org/ffmpeg/incoming/graphicplane_test.sup
diff mbox series

Patch

diff --git a/libavcodec/pgssubdec.c b/libavcodec/pgssubdec.c
index 5f76f12615..e7588b727b 100644
--- a/libavcodec/pgssubdec.c
+++ b/libavcodec/pgssubdec.c
@@ -47,57 +47,76 @@  enum SegmentType {
 };
 
 typedef struct PGSSubObjectRef {
-    int     id;
-    int     window_id;
-    uint8_t composition_flag;
-    int     x;
-    int     y;
-    int     crop_x;
-    int     crop_y;
-    int     crop_w;
-    int     crop_h;
+    uint16_t id;
+    uint8_t  window_id;
+    uint8_t  composition_flag;
+    uint16_t x;
+    uint16_t y;
+    uint16_t crop_x;
+    uint16_t crop_y;
+    uint16_t crop_w;
+    uint16_t crop_h;
 } PGSSubObjectRef;
 
 typedef struct PGSSubPresentation {
-    int id_number;
-    int palette_id;
-    int object_count;
+    uint8_t         palette_flag;
+    uint8_t         palette_id;
+    uint8_t         object_count;
     PGSSubObjectRef objects[MAX_OBJECT_REFS];
-    int64_t pts;
+    int64_t         pts;
 } PGSSubPresentation;
 
 typedef struct PGSSubObject {
-    int          id;
-    int          w;
-    int          h;
+    uint16_t     id;
+    uint16_t     w;
+    uint16_t     h;
     uint8_t      *rle;
     unsigned int rle_buffer_size, rle_data_len;
     unsigned int rle_remaining_len;
 } PGSSubObject;
 
 typedef struct PGSSubObjects {
-    int          count;
+    uint8_t      count;
     PGSSubObject object[MAX_EPOCH_OBJECTS];
 } PGSSubObjects;
 
 typedef struct PGSSubPalette {
-    int         id;
-    uint32_t    clut[256];
+    uint8_t     id;
+    uint32_t    clut[AVPALETTE_COUNT];
 } PGSSubPalette;
 
 typedef struct PGSSubPalettes {
-    int           count;
+    uint8_t       count;
     PGSSubPalette palette[MAX_EPOCH_PALETTES];
 } PGSSubPalettes;
 
+typedef struct PGSGraphicPlane {
+   uint8_t        count;
+   uint8_t        writable;
+   AVSubtitleRect visible_rect[MAX_OBJECT_REFS];
+} PGSGraphicPlane;
+
 typedef struct PGSSubContext {
     AVClass *class;
     PGSSubPresentation presentation;
     PGSSubPalettes     palettes;
     PGSSubObjects      objects;
+    PGSGraphicPlane    plane;
     int forced_subs_only;
 } PGSSubContext;
 
+static void clear_graphic_plane(PGSSubContext *ctx)
+{
+    int i;
+
+    for (i = 0; i < ctx->plane.count; i++) {
+       av_freep(&ctx->plane.visible_rect[i].data[0]);
+       memset(&ctx->plane.visible_rect[i], 0, sizeof(ctx->plane.visible_rect[i]));
+    }
+    ctx->plane.writable = 0;
+    ctx->plane.count = 0;
+}
+
 static void flush_cache(AVCodecContext *avctx)
 {
     PGSSubContext *ctx = avctx->priv_data;
@@ -143,6 +162,7 @@  static av_cold int init_decoder(AVCodecContext *avctx)
 
 static av_cold int close_decoder(AVCodecContext *avctx)
 {
+    clear_graphic_plane((PGSSubContext *)avctx->priv_data);
     flush_cache(avctx);
 
     return 0;
@@ -166,6 +186,8 @@  static int decode_rle(AVCodecContext *avctx, AVSubtitleRect *rect,
 
     rle_bitmap_end = buf + buf_size;
 
+    // Only decode if the plane has been cleared.
+    av_assert1(!rect->data[0]);
     rect->data[0] = av_malloc_array(rect->w, rect->h);
 
     if (!rect->data[0])
@@ -317,7 +339,7 @@  static int parse_object_segment(AVCodecContext *avctx,
  * Parse the palette segment packet.
  *
  * The palette segment contains details of the palette,
- * a maximum of 256 colors can be defined.
+ * a maximum of 256 colors (AVPALETTE_COUNT) can be defined.
  *
  * @param avctx contains the current codec context
  * @param buf pointer to the packet to process
@@ -390,13 +412,17 @@  static int parse_presentation_segment(AVCodecContext *avctx,
                                       int64_t pts)
 {
     PGSSubContext *ctx = avctx->priv_data;
-    int i, state, ret;
+    int ret;
+    uint8_t i, state;
     const uint8_t *buf_end = buf + buf_size;
 
     // Video descriptor
     int w = bytestream_get_be16(&buf);
     int h = bytestream_get_be16(&buf);
 
+    // On a new display set, reset writability of the graphic plane
+    ctx->plane.writable = 0;
+
     ctx->presentation.pts = pts;
 
     ff_dlog(avctx, "Video Dimensions %dx%d\n",
@@ -405,77 +431,80 @@  static int parse_presentation_segment(AVCodecContext *avctx,
     if (ret < 0)
         return ret;
 
-    /* Skip 1 bytes of unknown, frame rate */
-    buf++;
+    /* Skip 3 bytes: framerate (1), presentation id number (2) */
+    buf+=3;
 
-    // Composition descriptor
-    ctx->presentation.id_number = bytestream_get_be16(&buf);
     /*
-     * state is a 2 bit field that defines pgs epoch boundaries
+     * State is a 2 bit field that defines pgs epoch boundaries
      * 00 - Normal, previously defined objects and palettes are still valid
      * 01 - Acquisition point, previous objects and palettes can be released
      * 10 - Epoch start, previous objects and palettes can be released
      * 11 - Epoch continue, previous objects and palettes can be released
      *
-     * reserved 6 bits discarded
+     * Reserved 6 bits discarded
      */
     state = bytestream_get_byte(&buf) >> 6;
     if (state != 0) {
         flush_cache(avctx);
     }
 
+    /* Reserved 7 bits discarded. */
+    ctx->presentation.palette_flag = bytestream_get_byte(&buf) & 0x80;
+    ctx->presentation.palette_id = bytestream_get_byte(&buf);
+
     /*
-     * skip palette_update_flag (0x80),
+     * On palette update, don't parse the compositions references,
+     * just evaluate the existing graphic plane with the new palette.
+     * (On palette update, reference decoders expects a segment with one
+     *  composition information whose content is not decoded.)
      */
-    buf += 1;
-    ctx->presentation.palette_id = bytestream_get_byte(&buf);
-    ctx->presentation.object_count = bytestream_get_byte(&buf);
-    if (ctx->presentation.object_count > MAX_OBJECT_REFS) {
-        av_log(avctx, AV_LOG_ERROR,
-               "Invalid number of presentation objects %d\n",
-               ctx->presentation.object_count);
-        ctx->presentation.object_count = 2;
-        if (avctx->err_recognition & AV_EF_EXPLODE) {
-            return AVERROR_INVALIDDATA;
+    if (!ctx->presentation.palette_flag) {
+        ctx->presentation.object_count = bytestream_get_byte(&buf);
+        if (ctx->presentation.object_count > MAX_OBJECT_REFS) {
+            av_log(avctx, AV_LOG_ERROR,
+                   "Invalid number of presentation objects %d\n",
+                   ctx->presentation.object_count);
+            ctx->presentation.object_count = 2;
+            if (avctx->err_recognition & AV_EF_EXPLODE) {
+                return AVERROR_INVALIDDATA;
+            }
         }
-    }
 
+        for (i = 0; i < ctx->presentation.object_count; i++) {
+            PGSSubObjectRef *const object = &ctx->presentation.objects[i];
 
-    for (i = 0; i < ctx->presentation.object_count; i++)
-    {
-        PGSSubObjectRef *const object = &ctx->presentation.objects[i];
-
-        if (buf_end - buf < 8) {
-            av_log(avctx, AV_LOG_ERROR, "Insufficent space for object\n");
-            ctx->presentation.object_count = i;
-            return AVERROR_INVALIDDATA;
-        }
+            if (buf_end - buf < 8) {
+                av_log(avctx, AV_LOG_ERROR, "Insufficent space for object\n");
+                ctx->presentation.object_count = i;
+                return AVERROR_INVALIDDATA;
+            }
 
-        object->id               = bytestream_get_be16(&buf);
-        object->window_id        = bytestream_get_byte(&buf);
-        object->composition_flag = bytestream_get_byte(&buf);
+            object->id               = bytestream_get_be16(&buf);
+            object->window_id        = bytestream_get_byte(&buf);
+            object->composition_flag = bytestream_get_byte(&buf);
 
-        object->x = bytestream_get_be16(&buf);
-        object->y = bytestream_get_be16(&buf);
+            object->x = bytestream_get_be16(&buf);
+            object->y = bytestream_get_be16(&buf);
 
-        // If cropping
-        if (object->composition_flag & 0x80) {
-            object->crop_x = bytestream_get_be16(&buf);
-            object->crop_y = bytestream_get_be16(&buf);
-            object->crop_w = bytestream_get_be16(&buf);
-            object->crop_h = bytestream_get_be16(&buf);
-        }
+            // If cropping
+            if (object->composition_flag & 0x80) {
+                object->crop_x = bytestream_get_be16(&buf);
+                object->crop_y = bytestream_get_be16(&buf);
+                object->crop_w = bytestream_get_be16(&buf);
+                object->crop_h = bytestream_get_be16(&buf);
+            }
 
-        ff_dlog(avctx, "Subtitle Placement x=%d, y=%d\n",
-                object->x, object->y);
+            ff_dlog(avctx, "Subtitle Placement x=%d, y=%d\n",
+                    object->x, object->y);
 
-        if (object->x > avctx->width || object->y > avctx->height) {
-            av_log(avctx, AV_LOG_ERROR, "Subtitle out of video bounds. x = %d, y = %d, video width = %d, video height = %d.\n",
-                   object->x, object->y,
-                    avctx->width, avctx->height);
-            object->y = object->x = 0;
-            if (avctx->err_recognition & AV_EF_EXPLODE) {
-                return AVERROR_INVALIDDATA;
+            if (object->x > avctx->width || object->y > avctx->height) {
+                av_log(avctx, AV_LOG_ERROR, "Subtitle out of video bounds. x = %d, y = %d, video width = %d, video height = %d.\n",
+                       object->x, object->y,
+                        avctx->width, avctx->height);
+                object->y = object->x = 0;
+                if (avctx->err_recognition & AV_EF_EXPLODE) {
+                    return AVERROR_INVALIDDATA;
+                }
             }
         }
     }
@@ -483,10 +512,47 @@  static int parse_presentation_segment(AVCodecContext *avctx,
     return 0;
 }
 
+/**
+ * Parse the window segment packet.
+ *
+ * The window segment instructs the decoder to redraw the graphic plane
+ * with the composition references provided in the presentation segment
+ *
+ * @param avctx contains the current codec context
+ */
+static int parse_window_segment(AVCodecContext *avctx, const uint8_t *buf,
+                                int buf_size)
+{
+    PGSSubContext *ctx = (PGSSubContext *)avctx->priv_data;
+
+    // 1 byte: number of windows defined
+    if (bytestream_get_byte(&buf) > MAX_OBJECT_REFS) {
+        av_log(avctx, AV_LOG_ERROR, "Too many windows defined.\n");
+        return AVERROR_INVALIDDATA;
+    }
+
+    /* TODO: mask objects with windows when transfering to the graphic plane
+     * Window Segment Structure
+     *     {
+     *       1 byte : window id,
+     *       2 bytes: X position of window,
+     *       2 bytes: Y position of window,
+     *       2 bytes: Width of window,
+     *       2 bytes: Height of window.
+     *     }
+     */
+    // Flush the graphic plane, it will be redrawn.
+    clear_graphic_plane(ctx);
+    ctx->plane.writable = 1;
+    ctx->plane.count = ctx->presentation.object_count;
+    return 0;
+}
+
 /**
  * Parse the display segment packet.
  *
- * The display segment controls the updating of the display.
+ * The display segment closes the display set. The inferred data is used
+ * to decide if the display should be updated.
  *
  * @param avctx contains the current codec context
  * @param data pointer to the data pertaining the subtitle to display
@@ -505,20 +571,28 @@  static int display_end_segment(AVCodecContext *avctx, AVSubtitle *sub,
     memset(sub, 0, sizeof(*sub));
     sub->pts = pts;
     ctx->presentation.pts = AV_NOPTS_VALUE;
-    sub->start_display_time = 0;
     // There is no explicit end time for PGS subtitles.  The end time
     // is defined by the start of the next sub which may contain no
     // objects (i.e. clears the previous sub)
     sub->end_display_time   = UINT32_MAX;
-    sub->format             = 0;
 
-    // Blank if last object_count was 0.
-    if (!ctx->presentation.object_count)
+    if (!ctx->presentation.palette_flag && !ctx->plane.writable) {
+        // This display set does not perform a display update
+        // E.g. it only defines new objects or palettes for future usage.
+        return 0;
+    }
+
+    // Blank if object_count is 0 (undisplay). Stream is damaged if the
+    // palette update flag is set but plane count is zero.
+    if (!ctx->plane.count) {
+        avsubtitle_free(sub);
         return 1;
-    sub->rects = av_calloc(ctx->presentation.object_count, sizeof(*sub->rects));
-    if (!sub->rects) {
-        return AVERROR(ENOMEM);
     }
+
+    sub->rects = av_calloc(ctx->plane.count, sizeof(*sub->rects));
+    if (!sub->rects)
+        return AVERROR(ENOMEM);
+
     palette = find_palette(ctx->presentation.palette_id, &ctx->palettes);
     if (!palette) {
         // Missing palette.  Should only happen with damaged streams.
@@ -527,57 +601,71 @@  static int display_end_segment(AVCodecContext *avctx, AVSubtitle *sub,
         avsubtitle_free(sub);
         return AVERROR_INVALIDDATA;
     }
-    for (i = 0; i < ctx->presentation.object_count; i++) {
-        AVSubtitleRect *const rect = av_mallocz(sizeof(*rect));
-        PGSSubObject *object;
 
-        if (!rect)
-            return AVERROR(ENOMEM);
-        sub->rects[sub->num_rects++] = rect;
-        rect->type = SUBTITLE_BITMAP;
-
-        /* Process bitmap */
-        object = find_object(ctx->presentation.objects[i].id, &ctx->objects);
-        if (!object) {
-            // Missing object.  Should only happen with damaged streams.
-            av_log(avctx, AV_LOG_ERROR, "Invalid object id %d\n",
-                   ctx->presentation.objects[i].id);
-            if (avctx->err_recognition & AV_EF_EXPLODE)
-                return AVERROR_INVALIDDATA;
-            // Leaves rect empty with 0 width and height.
-            continue;
-        }
-        if (ctx->presentation.objects[i].composition_flag & 0x40)
-            rect->flags |= AV_SUBTITLE_FLAG_FORCED;
+    for (i = 0; i < ctx->plane.count; i++) {
+        AVSubtitleRect *const gp_rect = &ctx->plane.visible_rect[i];
+        AVSubtitleRect *rect;
+        gp_rect->type = SUBTITLE_BITMAP;
+
+        // Compose the graphic plane if a window segment has been provided
+        if (ctx->plane.writable) {
+            PGSSubObject *object;
+
+            // Process bitmap
+            object = find_object(ctx->presentation.objects[i].id, &ctx->objects);
+            if (!object) {
+                // Missing object.  Should only happen with damaged streams.
+                av_log(avctx, AV_LOG_ERROR, "Invalid object id %d\n",
+                       ctx->presentation.objects[i].id);
+                if (avctx->err_recognition & AV_EF_EXPLODE)
+                    return AVERROR_INVALIDDATA;
+                // Leaves rect empty with 0 width and height.
+                continue;
+            }
+            if (ctx->presentation.objects[i].composition_flag & 0x40)
+                gp_rect->flags |= AV_SUBTITLE_FLAG_FORCED;
 
-        rect->x    = ctx->presentation.objects[i].x;
-        rect->y    = ctx->presentation.objects[i].y;
+            gp_rect->x    = ctx->presentation.objects[i].x;
+            gp_rect->y    = ctx->presentation.objects[i].y;
 
-        if (object->rle) {
-            rect->w    = object->w;
-            rect->h    = object->h;
+            if (object->rle) {
+                gp_rect->w    = object->w;
+                gp_rect->h    = object->h;
 
-            rect->linesize[0] = object->w;
+                gp_rect->linesize[0] = object->w;
 
-            if (object->rle_remaining_len) {
-                av_log(avctx, AV_LOG_ERROR, "RLE data length %u is %u bytes shorter than expected\n",
-                       object->rle_data_len, object->rle_remaining_len);
-                if (avctx->err_recognition & AV_EF_EXPLODE)
-                    return AVERROR_INVALIDDATA;
-            }
-            ret = decode_rle(avctx, rect, object->rle, object->rle_data_len);
-            if (ret < 0) {
-                if ((avctx->err_recognition & AV_EF_EXPLODE) ||
-                    ret == AVERROR(ENOMEM)) {
-                    return ret;
+                if (object->rle_remaining_len) {
+                    av_log(avctx, AV_LOG_ERROR, "RLE data missing %u bytes out of %u.\n",
+                           object->rle_remaining_len, object->rle_data_len);
+                    if (avctx->err_recognition & AV_EF_EXPLODE)
+                        return AVERROR_INVALIDDATA;
+                }
+                ret = decode_rle(avctx, gp_rect, object->rle, object->rle_data_len);
+                if (ret < 0) {
+                    if ((avctx->err_recognition & AV_EF_EXPLODE) ||
+                        ret == AVERROR(ENOMEM)) {
+                        return ret;
+                    }
+                    gp_rect->w = 0;
+                    gp_rect->h = 0;
+                    continue;
                 }
-                rect->w = 0;
-                rect->h = 0;
-                continue;
             }
         }
-        /* Allocate memory for colors */
-        rect->nb_colors = 256;
+        // Export graphic plane content with latest palette
+        rect = av_memdup(gp_rect, sizeof(*gp_rect));
+        if (!rect)
+            return AVERROR(ENOMEM);
+
+        sub->rects[sub->num_rects++] = rect;
+        if (gp_rect->data[0]) {
+            rect->data[0] = av_memdup(gp_rect->data[0], rect->w*rect->h);
+            if (!rect->data[0])
+                return AVERROR(ENOMEM);
+        }
+
+        // Allocate memory for colors
+        rect->nb_colors = AVPALETTE_COUNT;
         rect->data[1]   = av_mallocz(AVPALETTE_SIZE);
         if (!rect->data[1])
             return AVERROR(ENOMEM);
@@ -640,14 +728,7 @@  static int decode(AVCodecContext *avctx, AVSubtitle *sub,
             ret = parse_presentation_segment(avctx, buf, segment_length, sub->pts);
             break;
         case WINDOW_SEGMENT:
-            /*
-             * Window Segment Structure (No new information provided):
-             *     2 bytes: Unknown,
-             *     2 bytes: X position of subtitle,
-             *     2 bytes: Y position of subtitle,
-             *     2 bytes: Width of subtitle,
-             *     2 bytes: Height of subtitle.
-             */
+            ret = parse_window_segment(avctx, buf, segment_length);
             break;
         case DISPLAY_SEGMENT:
             if (*got_sub_ptr) {