diff mbox

[FFmpeg-devel] avcodec: add a subcharenc mode that disables UTF-8 check

Message ID 20180324124321.25932-1-nfxjfg@googlemail.com
State Accepted
Commit b7d0d912ef9b60eae962e4622d72860af31a8b00
Headers show

Commit Message

wm4 March 24, 2018, 12:43 p.m. UTC
This is for applications which want to explicitly check for invalid
UTF-8 manually, and take actions that are better than dropping invalid
subtitles silently. (It's pretty much silent because sporadic avcodec
error messages are so common that you can't reasonably display them in a
prominent and meaningful way in a application GUI.)
---
 doc/APIchanges             | 3 +++
 libavcodec/avcodec.h       | 1 +
 libavcodec/decode.c        | 3 ++-
 libavcodec/options_table.h | 1 +
 libavcodec/version.h       | 2 +-
 5 files changed, 8 insertions(+), 2 deletions(-)

Comments

Philip Langdale March 25, 2018, 4 p.m. UTC | #1
On Sat, 24 Mar 2018 13:43:21 +0100
wm4 <nfxjfg@googlemail.com> wrote:

> This is for applications which want to explicitly check for invalid
> UTF-8 manually, and take actions that are better than dropping invalid
> subtitles silently. (It's pretty much silent because sporadic avcodec
> error messages are so common that you can't reasonably display them
> in a prominent and meaningful way in a application GUI.)
> ---
>  doc/APIchanges             | 3 +++
>  libavcodec/avcodec.h       | 1 +
>  libavcodec/decode.c        | 3 ++-
>  libavcodec/options_table.h | 1 +
>  libavcodec/version.h       | 2 +-
>  5 files changed, 8 insertions(+), 2 deletions(-)
> 
> diff --git a/doc/APIchanges b/doc/APIchanges
> index a099afd9bc..95b5cd772f 100644
> --- a/doc/APIchanges
> +++ b/doc/APIchanges
> @@ -15,6 +15,9 @@ libavutil:     2017-10-21
>  
>  API changes, most recent first:
>  
> +2018-03-xx - xxxxxxx - lavc 58.16.100 - avcodec.h
> +  Add FF_SUB_CHARENC_MODE_IGNORE.
> +
>  2018-xx-xx - xxxxxxx - lavu 56.8.100 - encryption_info.h
>    Add AVEncryptionInitInfo and AVEncryptionInfo structures to hold
> new side-data for encryption info.
> diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
> index 495242faf0..50c34dbff9 100644
> --- a/libavcodec/avcodec.h
> +++ b/libavcodec/avcodec.h
> @@ -3092,6 +3092,7 @@ typedef struct AVCodecContext {
>  #define FF_SUB_CHARENC_MODE_DO_NOTHING  -1  ///< do nothing (demuxer
> outputs a stream supposed to be already in UTF-8, or the codec is
> bitmap for instance) #define FF_SUB_CHARENC_MODE_AUTOMATIC    0  ///<
> libavcodec will select the mode itself #define
> FF_SUB_CHARENC_MODE_PRE_DECODER  1  ///< the AVPacket data needs to
> be recoded to UTF-8 before being fed to the decoder, requires iconv
> +#define FF_SUB_CHARENC_MODE_IGNORE       2  ///< neither convert the
> subtitles, nor check them for valid UTF-8 /**
>       * Skip processing alpha if supported by codec.
> diff --git a/libavcodec/decode.c b/libavcodec/decode.c
> index ea2168ad0c..40c8a8855c 100644
> --- a/libavcodec/decode.c
> +++ b/libavcodec/decode.c
> @@ -1057,7 +1057,8 @@ int avcodec_decode_subtitle2(AVCodecContext
> *avctx, AVSubtitle *sub, sub->format = 1;
>  
>              for (i = 0; i < sub->num_rects; i++) {
> -                if (sub->rects[i]->ass
> && !utf8_check(sub->rects[i]->ass)) {
> +                if (avctx->sub_charenc_mode !=
> FF_SUB_CHARENC_MODE_IGNORE &&
> +                    sub->rects[i]->ass
> && !utf8_check(sub->rects[i]->ass)) { av_log(avctx, AV_LOG_ERROR,
>                             "Invalid UTF-8 in decoded subtitles text;
> " "maybe missing -sub_charenc option\n");
> diff --git a/libavcodec/options_table.h b/libavcodec/options_table.h
> index 5a5eae65fb..099261e168 100644
> --- a/libavcodec/options_table.h
> +++ b/libavcodec/options_table.h
> @@ -447,6 +447,7 @@ static const AVOption avcodec_options[] = {
>  {"do_nothing",  NULL, 0, AV_OPT_TYPE_CONST, {.i64 =
> FF_SUB_CHARENC_MODE_DO_NOTHING},  INT_MIN, INT_MAX, S|D,
> "sub_charenc_mode"}, {"auto",        NULL, 0, AV_OPT_TYPE_CONST,
> {.i64 = FF_SUB_CHARENC_MODE_AUTOMATIC},   INT_MIN, INT_MAX, S|D,
> "sub_charenc_mode"}, {"pre_decoder", NULL, 0, AV_OPT_TYPE_CONST,
> {.i64 = FF_SUB_CHARENC_MODE_PRE_DECODER}, INT_MIN, INT_MAX, S|D,
> "sub_charenc_mode"}, +{"ignore",      NULL, 0, AV_OPT_TYPE_CONST,
> {.i64 = FF_SUB_CHARENC_MODE_IGNORE},      INT_MIN, INT_MAX, S|D,
> "sub_charenc_mode"}, #if FF_API_ASS_TIMING {"sub_text_format", "set
> decoded text subtitle format", OFFSET(sub_text_format),
> AV_OPT_TYPE_INT, {.i64 = FF_SUB_TEXT_FMT_ASS_WITH_TIMINGS}, 0, 1,
> S|D, "sub_text_format"}, #else diff --git a/libavcodec/version.h
> b/libavcodec/version.h index a5b7f752d1..8ac4626da7 100644 ---
> a/libavcodec/version.h +++ b/libavcodec/version.h @@ -28,7 +28,7 @@
> #include "libavutil/version.h" #define LIBAVCODEC_VERSION_MAJOR  58
> -#define LIBAVCODEC_VERSION_MINOR  15
> +#define LIBAVCODEC_VERSION_MINOR  16
>  #define LIBAVCODEC_VERSION_MICRO 100
>  
>  #define LIBAVCODEC_VERSION_INT
> AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \

LGTM.


--phil
wm4 March 25, 2018, 5:30 p.m. UTC | #2
On Sun, 25 Mar 2018 09:00:45 -0700
Philip Langdale <philipl@overt.org> wrote:

> On Sat, 24 Mar 2018 13:43:21 +0100
> wm4 <nfxjfg@googlemail.com> wrote:
> 
> > This is for applications which want to explicitly check for invalid
> > UTF-8 manually, and take actions that are better than dropping invalid
> > subtitles silently. (It's pretty much silent because sporadic avcodec
> > error messages are so common that you can't reasonably display them
> > in a prominent and meaningful way in a application GUI.)
> > ---
> >  doc/APIchanges             | 3 +++
> >  libavcodec/avcodec.h       | 1 +
> >  libavcodec/decode.c        | 3 ++-
> >  libavcodec/options_table.h | 1 +
> >  libavcodec/version.h       | 2 +-
> >  5 files changed, 8 insertions(+), 2 deletions(-)
> > 
> > diff --git a/doc/APIchanges b/doc/APIchanges
> > index a099afd9bc..95b5cd772f 100644
> > --- a/doc/APIchanges
> > +++ b/doc/APIchanges
> > @@ -15,6 +15,9 @@ libavutil:     2017-10-21
> >  
> >  API changes, most recent first:
> >  
> > +2018-03-xx - xxxxxxx - lavc 58.16.100 - avcodec.h
> > +  Add FF_SUB_CHARENC_MODE_IGNORE.
> > +
> >  2018-xx-xx - xxxxxxx - lavu 56.8.100 - encryption_info.h
> >    Add AVEncryptionInitInfo and AVEncryptionInfo structures to hold
> > new side-data for encryption info.
> > diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
> > index 495242faf0..50c34dbff9 100644
> > --- a/libavcodec/avcodec.h
> > +++ b/libavcodec/avcodec.h
> > @@ -3092,6 +3092,7 @@ typedef struct AVCodecContext {
> >  #define FF_SUB_CHARENC_MODE_DO_NOTHING  -1  ///< do nothing (demuxer
> > outputs a stream supposed to be already in UTF-8, or the codec is
> > bitmap for instance) #define FF_SUB_CHARENC_MODE_AUTOMATIC    0  ///<
> > libavcodec will select the mode itself #define
> > FF_SUB_CHARENC_MODE_PRE_DECODER  1  ///< the AVPacket data needs to
> > be recoded to UTF-8 before being fed to the decoder, requires iconv
> > +#define FF_SUB_CHARENC_MODE_IGNORE       2  ///< neither convert the
> > subtitles, nor check them for valid UTF-8 /**
> >       * Skip processing alpha if supported by codec.
> > diff --git a/libavcodec/decode.c b/libavcodec/decode.c
> > index ea2168ad0c..40c8a8855c 100644
> > --- a/libavcodec/decode.c
> > +++ b/libavcodec/decode.c
> > @@ -1057,7 +1057,8 @@ int avcodec_decode_subtitle2(AVCodecContext
> > *avctx, AVSubtitle *sub, sub->format = 1;
> >  
> >              for (i = 0; i < sub->num_rects; i++) {
> > -                if (sub->rects[i]->ass
> > && !utf8_check(sub->rects[i]->ass)) {
> > +                if (avctx->sub_charenc_mode !=
> > FF_SUB_CHARENC_MODE_IGNORE &&
> > +                    sub->rects[i]->ass
> > && !utf8_check(sub->rects[i]->ass)) { av_log(avctx, AV_LOG_ERROR,
> >                             "Invalid UTF-8 in decoded subtitles text;
> > " "maybe missing -sub_charenc option\n");
> > diff --git a/libavcodec/options_table.h b/libavcodec/options_table.h
> > index 5a5eae65fb..099261e168 100644
> > --- a/libavcodec/options_table.h
> > +++ b/libavcodec/options_table.h
> > @@ -447,6 +447,7 @@ static const AVOption avcodec_options[] = {
> >  {"do_nothing",  NULL, 0, AV_OPT_TYPE_CONST, {.i64 =
> > FF_SUB_CHARENC_MODE_DO_NOTHING},  INT_MIN, INT_MAX, S|D,
> > "sub_charenc_mode"}, {"auto",        NULL, 0, AV_OPT_TYPE_CONST,
> > {.i64 = FF_SUB_CHARENC_MODE_AUTOMATIC},   INT_MIN, INT_MAX, S|D,
> > "sub_charenc_mode"}, {"pre_decoder", NULL, 0, AV_OPT_TYPE_CONST,
> > {.i64 = FF_SUB_CHARENC_MODE_PRE_DECODER}, INT_MIN, INT_MAX, S|D,
> > "sub_charenc_mode"}, +{"ignore",      NULL, 0, AV_OPT_TYPE_CONST,
> > {.i64 = FF_SUB_CHARENC_MODE_IGNORE},      INT_MIN, INT_MAX, S|D,
> > "sub_charenc_mode"}, #if FF_API_ASS_TIMING {"sub_text_format", "set
> > decoded text subtitle format", OFFSET(sub_text_format),
> > AV_OPT_TYPE_INT, {.i64 = FF_SUB_TEXT_FMT_ASS_WITH_TIMINGS}, 0, 1,
> > S|D, "sub_text_format"}, #else diff --git a/libavcodec/version.h
> > b/libavcodec/version.h index a5b7f752d1..8ac4626da7 100644 ---
> > a/libavcodec/version.h +++ b/libavcodec/version.h @@ -28,7 +28,7 @@
> > #include "libavutil/version.h" #define LIBAVCODEC_VERSION_MAJOR  58
> > -#define LIBAVCODEC_VERSION_MINOR  15
> > +#define LIBAVCODEC_VERSION_MINOR  16
> >  #define LIBAVCODEC_VERSION_MICRO 100
> >  
> >  #define LIBAVCODEC_VERSION_INT
> > AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \  
> 
> LGTM.

Pushed.
diff mbox

Patch

diff --git a/doc/APIchanges b/doc/APIchanges
index a099afd9bc..95b5cd772f 100644
--- a/doc/APIchanges
+++ b/doc/APIchanges
@@ -15,6 +15,9 @@  libavutil:     2017-10-21
 
 API changes, most recent first:
 
+2018-03-xx - xxxxxxx - lavc 58.16.100 - avcodec.h
+  Add FF_SUB_CHARENC_MODE_IGNORE.
+
 2018-xx-xx - xxxxxxx - lavu 56.8.100 - encryption_info.h
   Add AVEncryptionInitInfo and AVEncryptionInfo structures to hold new side-data
   for encryption info.
diff --git a/libavcodec/avcodec.h b/libavcodec/avcodec.h
index 495242faf0..50c34dbff9 100644
--- a/libavcodec/avcodec.h
+++ b/libavcodec/avcodec.h
@@ -3092,6 +3092,7 @@  typedef struct AVCodecContext {
 #define FF_SUB_CHARENC_MODE_DO_NOTHING  -1  ///< do nothing (demuxer outputs a stream supposed to be already in UTF-8, or the codec is bitmap for instance)
 #define FF_SUB_CHARENC_MODE_AUTOMATIC    0  ///< libavcodec will select the mode itself
 #define FF_SUB_CHARENC_MODE_PRE_DECODER  1  ///< the AVPacket data needs to be recoded to UTF-8 before being fed to the decoder, requires iconv
+#define FF_SUB_CHARENC_MODE_IGNORE       2  ///< neither convert the subtitles, nor check them for valid UTF-8
 
     /**
      * Skip processing alpha if supported by codec.
diff --git a/libavcodec/decode.c b/libavcodec/decode.c
index ea2168ad0c..40c8a8855c 100644
--- a/libavcodec/decode.c
+++ b/libavcodec/decode.c
@@ -1057,7 +1057,8 @@  int avcodec_decode_subtitle2(AVCodecContext *avctx, AVSubtitle *sub,
                 sub->format = 1;
 
             for (i = 0; i < sub->num_rects; i++) {
-                if (sub->rects[i]->ass && !utf8_check(sub->rects[i]->ass)) {
+                if (avctx->sub_charenc_mode != FF_SUB_CHARENC_MODE_IGNORE &&
+                    sub->rects[i]->ass && !utf8_check(sub->rects[i]->ass)) {
                     av_log(avctx, AV_LOG_ERROR,
                            "Invalid UTF-8 in decoded subtitles text; "
                            "maybe missing -sub_charenc option\n");
diff --git a/libavcodec/options_table.h b/libavcodec/options_table.h
index 5a5eae65fb..099261e168 100644
--- a/libavcodec/options_table.h
+++ b/libavcodec/options_table.h
@@ -447,6 +447,7 @@  static const AVOption avcodec_options[] = {
 {"do_nothing",  NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_SUB_CHARENC_MODE_DO_NOTHING},  INT_MIN, INT_MAX, S|D, "sub_charenc_mode"},
 {"auto",        NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_SUB_CHARENC_MODE_AUTOMATIC},   INT_MIN, INT_MAX, S|D, "sub_charenc_mode"},
 {"pre_decoder", NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_SUB_CHARENC_MODE_PRE_DECODER}, INT_MIN, INT_MAX, S|D, "sub_charenc_mode"},
+{"ignore",      NULL, 0, AV_OPT_TYPE_CONST, {.i64 = FF_SUB_CHARENC_MODE_IGNORE},      INT_MIN, INT_MAX, S|D, "sub_charenc_mode"},
 #if FF_API_ASS_TIMING
 {"sub_text_format", "set decoded text subtitle format", OFFSET(sub_text_format), AV_OPT_TYPE_INT, {.i64 = FF_SUB_TEXT_FMT_ASS_WITH_TIMINGS}, 0, 1, S|D, "sub_text_format"},
 #else
diff --git a/libavcodec/version.h b/libavcodec/version.h
index a5b7f752d1..8ac4626da7 100644
--- a/libavcodec/version.h
+++ b/libavcodec/version.h
@@ -28,7 +28,7 @@ 
 #include "libavutil/version.h"
 
 #define LIBAVCODEC_VERSION_MAJOR  58
-#define LIBAVCODEC_VERSION_MINOR  15
+#define LIBAVCODEC_VERSION_MINOR  16
 #define LIBAVCODEC_VERSION_MICRO 100
 
 #define LIBAVCODEC_VERSION_INT  AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \