Message ID | 20200618004743.95896-3-rcombs@rcombs.me |
---|---|
State | New |
Headers | show |
Series | [FFmpeg-devel,1/5] lavf: matroska subtitle muxer | expand |
Context | Check | Description |
---|---|---|
andriy/default | pending | |
andriy/make | success | Make finished |
andriy/make_fate | success | Make fate finished |
rcombs: > Fixes ticket #5641 > --- > libavformat/matroska.c | 11 ++-- > libavformat/matroskadec.c | 111 ++++++++++++++++++++++++-------------- > 2 files changed, 77 insertions(+), 45 deletions(-) > > diff --git a/libavformat/matroska.c b/libavformat/matroska.c > index 7c56aba403..962fa496f4 100644 > --- a/libavformat/matroska.c > +++ b/libavformat/matroska.c > @@ -60,16 +60,12 @@ const CodecTags ff_mkv_codec_tags[]={ > {"A_VORBIS" , AV_CODEC_ID_VORBIS}, > {"A_WAVPACK4" , AV_CODEC_ID_WAVPACK}, > > - {"D_WEBVTT/SUBTITLES" , AV_CODEC_ID_WEBVTT}, > - {"D_WEBVTT/CAPTIONS" , AV_CODEC_ID_WEBVTT}, > - {"D_WEBVTT/DESCRIPTIONS", AV_CODEC_ID_WEBVTT}, > - {"D_WEBVTT/METADATA" , AV_CODEC_ID_WEBVTT}, > - > {"S_TEXT/UTF8" , AV_CODEC_ID_SUBRIP}, > {"S_TEXT/UTF8" , AV_CODEC_ID_TEXT}, > {"S_TEXT/ASCII" , AV_CODEC_ID_TEXT}, > {"S_TEXT/ASS" , AV_CODEC_ID_ASS}, > {"S_TEXT/SSA" , AV_CODEC_ID_ASS}, > + {"S_TEXT/WEBVTT" , AV_CODEC_ID_WEBVTT}, > {"S_ASS" , AV_CODEC_ID_ASS}, > {"S_SSA" , AV_CODEC_ID_ASS}, > {"S_VOBSUB" , AV_CODEC_ID_DVD_SUBTITLE}, > @@ -77,6 +73,11 @@ const CodecTags ff_mkv_codec_tags[]={ > {"S_HDMV/PGS" , AV_CODEC_ID_HDMV_PGS_SUBTITLE}, > {"S_HDMV/TEXTST" , AV_CODEC_ID_HDMV_TEXT_SUBTITLE}, > > + {"D_WEBVTT/SUBTITLES" , AV_CODEC_ID_WEBVTT}, > + {"D_WEBVTT/CAPTIONS" , AV_CODEC_ID_WEBVTT}, > + {"D_WEBVTT/DESCRIPTIONS", AV_CODEC_ID_WEBVTT}, > + {"D_WEBVTT/METADATA" , AV_CODEC_ID_WEBVTT}, > + > {"V_AV1" , AV_CODEC_ID_AV1}, > {"V_DIRAC" , AV_CODEC_ID_DIRAC}, > {"V_FFV1" , AV_CODEC_ID_FFV1}, > diff --git a/libavformat/matroskadec.c b/libavformat/matroskadec.c > index cff7f0cb54..1e4947e209 100644 > --- a/libavformat/matroskadec.c > +++ b/libavformat/matroskadec.c > @@ -3305,62 +3305,81 @@ static int matroska_parse_webvtt(MatroskaDemuxContext *matroska, > uint8_t *data, int data_len, > uint64_t timecode, > uint64_t duration, > - int64_t pos) > + int64_t pos, > + uint8_t *additional, uint64_t additional_id, int additional_size) > { > AVPacket pktl, *pkt = &pktl; > - uint8_t *id, *settings, *text, *buf; > - int id_len, settings_len, text_len; > + uint8_t *id, *settings, *comment, *text, *buf; > + int id_len = 0, settings_len = 0, comment_len = 0, text_len; > uint8_t *p, *q; > int err; > + int webm_style = !strncmp(track->codec_id, "D_WEBVTT/", 9); > > if (data_len <= 0) > return AVERROR_INVALIDDATA; > > - p = data; > - q = data + data_len; > - > - id = p; > - id_len = -1; > - while (p < q) { > - if (*p == '\r' || *p == '\n') { > - id_len = p - id; > - if (*p == '\r') > - p++; > - break; > + p = webm_style ? data : additional; > + q = webm_style ? (data + data_len) : (additional + additional_size); > + Unfortunately all pointer arithmetic involving NULL is undefined behaviour in C, even NULL + 0. So the above is ub when not using webm-style and when there is no BlockAddition. > + if (p) { > + id = p; > + id_len = -1; This is unnecessary. Initializing to zero (as is already done above) works just fine. > + while (p < q) { > + if (*p == '\r' || *p == '\n') { > + id_len = p - id; > + if (*p == '\r') > + p++; > + break; > + } > + p++; > } > + > + if (p >= q || *p != '\n') > + return AVERROR_INVALIDDATA; > p++; > - } > > - if (p >= q || *p != '\n') > - return AVERROR_INVALIDDATA; > - p++; > - > - settings = p; > - settings_len = -1; > - while (p < q) { > - if (*p == '\r' || *p == '\n') { > - settings_len = p - settings; > - if (*p == '\r') > - p++; > - break; > + settings = p; > + settings_len = -1; > + while (p < q) { > + if (*p == '\r' || *p == '\n') { > + settings_len = p - settings; > + if (*p == '\r') > + p++; > + break; > + } > + p++; > } > + > + if (p >= q || *p != '\n') > + return AVERROR_INVALIDDATA; You're repeating yourself. > p++; > + > + if (!webm_style && p < q) { > + if (q[-1] != '\r' && q[-1] != '\n') The current Matroska codec mapping indeed requires the terminating WebVTT line terminator to be retained, yet mkvmerge doesn't do so, so we should not be so picky with this. > + return AVERROR_INVALIDDATA; > + > + comment = p; > + comment_len = q - p; > + } > } > > - if (p >= q || *p != '\n') > - return AVERROR_INVALIDDATA; > - p++; > - > - text = p; > - text_len = q - p; > - while (text_len > 0) { > - const int len = text_len - 1; > - const uint8_t c = p[len]; > - if (c != '\r' && c != '\n') > - break; > - text_len = len; > + if (webm_style) { > + text = p; > + text_len = q - p; > + > + while (text_len > 0) { > + const int len = text_len - 1; > + const uint8_t c = p[len]; > + if (c != '\r' && c != '\n') > + break; > + text_len = len; > + } > + } else { > + text = data; > + text_len = data_len; > } > > + > if (text_len <= 0) > return AVERROR_INVALIDDATA; > > @@ -3393,6 +3412,17 @@ static int matroska_parse_webvtt(MatroskaDemuxContext *matroska, > memcpy(buf, settings, settings_len); > } > > + if (comment_len > 0) { > + buf = av_packet_new_side_data(pkt, > + AV_PKT_DATA_WEBVTT_COMMENT, > + comment_len); > + if (!buf) { > + av_packet_unref(pkt); > + return AVERROR(ENOMEM); > + } > + memcpy(buf, comment, comment_len); > + } > + > // Do we need this for subtitles? > // pkt->flags = AV_PKT_FLAG_KEY; > > @@ -3656,7 +3686,8 @@ static int matroska_parse_block(MatroskaDemuxContext *matroska, AVBufferRef *buf > res = matroska_parse_webvtt(matroska, track, st, > out_data, out_size, > timecode, lace_duration, > - pos); > + pos, > + additional, additional_id, additional_size); > if (!buf) > av_free(out_data); > if (res) >
Andreas Rheinhardt (12020-06-20): > Unfortunately all pointer arithmetic involving NULL is undefined > behaviour in C, even NULL + 0. I am pretty sure the UB is not there, and that NULL+0 is valid. Do you have the reference? Anyway, even if it is valid, it is not good style, I agree on that. Regards,
Nicolas George: > Andreas Rheinhardt (12020-06-20): >> Unfortunately all pointer arithmetic involving NULL is undefined >> behaviour in C, even NULL + 0. > > I am pretty sure the UB is not there, and that NULL+0 is valid. Do you > have the reference? > C99, 6.5.6 (the part about additive operators) says that pointer arithmetic is only defined in two cases: First, if it involves a pointer to an array (an object that is not an element of an array is treated as an array with exactly one element for the purposes of pointer arithmetic) and an offset so small so that it contains an element offset places away from the pointer. And secondly, if the pointer points to the last element of an array and the offset is 1. (For C90, this stuff is contained in 6.3.6.) A nullpointer does not point to any object, hence it is undefined. Notice that UBSan in Clang 10 started warning about this (see ticket 8592 or see https://github.com/llvm/llvm-project/commit/536b0ee40ab97f2878dc124a321cf9108ee3d233). > Anyway, even if it is valid, it is not good style, I agree on that. While it would be very bad style to write NULL + 0 or something like that when it is known at compile-time that this is the only case that will happen, I don't consider an expression that might lead to NULL + 0 to be bad style. I actually think that this is one of the insanities of the C standard (C++ btw explicitly allows nullptr + 0). - Andreas
diff --git a/libavformat/matroska.c b/libavformat/matroska.c index 7c56aba403..962fa496f4 100644 --- a/libavformat/matroska.c +++ b/libavformat/matroska.c @@ -60,16 +60,12 @@ const CodecTags ff_mkv_codec_tags[]={ {"A_VORBIS" , AV_CODEC_ID_VORBIS}, {"A_WAVPACK4" , AV_CODEC_ID_WAVPACK}, - {"D_WEBVTT/SUBTITLES" , AV_CODEC_ID_WEBVTT}, - {"D_WEBVTT/CAPTIONS" , AV_CODEC_ID_WEBVTT}, - {"D_WEBVTT/DESCRIPTIONS", AV_CODEC_ID_WEBVTT}, - {"D_WEBVTT/METADATA" , AV_CODEC_ID_WEBVTT}, - {"S_TEXT/UTF8" , AV_CODEC_ID_SUBRIP}, {"S_TEXT/UTF8" , AV_CODEC_ID_TEXT}, {"S_TEXT/ASCII" , AV_CODEC_ID_TEXT}, {"S_TEXT/ASS" , AV_CODEC_ID_ASS}, {"S_TEXT/SSA" , AV_CODEC_ID_ASS}, + {"S_TEXT/WEBVTT" , AV_CODEC_ID_WEBVTT}, {"S_ASS" , AV_CODEC_ID_ASS}, {"S_SSA" , AV_CODEC_ID_ASS}, {"S_VOBSUB" , AV_CODEC_ID_DVD_SUBTITLE}, @@ -77,6 +73,11 @@ const CodecTags ff_mkv_codec_tags[]={ {"S_HDMV/PGS" , AV_CODEC_ID_HDMV_PGS_SUBTITLE}, {"S_HDMV/TEXTST" , AV_CODEC_ID_HDMV_TEXT_SUBTITLE}, + {"D_WEBVTT/SUBTITLES" , AV_CODEC_ID_WEBVTT}, + {"D_WEBVTT/CAPTIONS" , AV_CODEC_ID_WEBVTT}, + {"D_WEBVTT/DESCRIPTIONS", AV_CODEC_ID_WEBVTT}, + {"D_WEBVTT/METADATA" , AV_CODEC_ID_WEBVTT}, + {"V_AV1" , AV_CODEC_ID_AV1}, {"V_DIRAC" , AV_CODEC_ID_DIRAC}, {"V_FFV1" , AV_CODEC_ID_FFV1}, diff --git a/libavformat/matroskadec.c b/libavformat/matroskadec.c index cff7f0cb54..1e4947e209 100644 --- a/libavformat/matroskadec.c +++ b/libavformat/matroskadec.c @@ -3305,62 +3305,81 @@ static int matroska_parse_webvtt(MatroskaDemuxContext *matroska, uint8_t *data, int data_len, uint64_t timecode, uint64_t duration, - int64_t pos) + int64_t pos, + uint8_t *additional, uint64_t additional_id, int additional_size) { AVPacket pktl, *pkt = &pktl; - uint8_t *id, *settings, *text, *buf; - int id_len, settings_len, text_len; + uint8_t *id, *settings, *comment, *text, *buf; + int id_len = 0, settings_len = 0, comment_len = 0, text_len; uint8_t *p, *q; int err; + int webm_style = !strncmp(track->codec_id, "D_WEBVTT/", 9); if (data_len <= 0) return AVERROR_INVALIDDATA; - p = data; - q = data + data_len; - - id = p; - id_len = -1; - while (p < q) { - if (*p == '\r' || *p == '\n') { - id_len = p - id; - if (*p == '\r') - p++; - break; + p = webm_style ? data : additional; + q = webm_style ? (data + data_len) : (additional + additional_size); + + if (p) { + id = p; + id_len = -1; + while (p < q) { + if (*p == '\r' || *p == '\n') { + id_len = p - id; + if (*p == '\r') + p++; + break; + } + p++; } + + if (p >= q || *p != '\n') + return AVERROR_INVALIDDATA; p++; - } - if (p >= q || *p != '\n') - return AVERROR_INVALIDDATA; - p++; - - settings = p; - settings_len = -1; - while (p < q) { - if (*p == '\r' || *p == '\n') { - settings_len = p - settings; - if (*p == '\r') - p++; - break; + settings = p; + settings_len = -1; + while (p < q) { + if (*p == '\r' || *p == '\n') { + settings_len = p - settings; + if (*p == '\r') + p++; + break; + } + p++; } + + if (p >= q || *p != '\n') + return AVERROR_INVALIDDATA; p++; + + if (!webm_style && p < q) { + if (q[-1] != '\r' && q[-1] != '\n') + return AVERROR_INVALIDDATA; + + comment = p; + comment_len = q - p; + } } - if (p >= q || *p != '\n') - return AVERROR_INVALIDDATA; - p++; - - text = p; - text_len = q - p; - while (text_len > 0) { - const int len = text_len - 1; - const uint8_t c = p[len]; - if (c != '\r' && c != '\n') - break; - text_len = len; + if (webm_style) { + text = p; + text_len = q - p; + + while (text_len > 0) { + const int len = text_len - 1; + const uint8_t c = p[len]; + if (c != '\r' && c != '\n') + break; + text_len = len; + } + } else { + text = data; + text_len = data_len; } + if (text_len <= 0) return AVERROR_INVALIDDATA; @@ -3393,6 +3412,17 @@ static int matroska_parse_webvtt(MatroskaDemuxContext *matroska, memcpy(buf, settings, settings_len); } + if (comment_len > 0) { + buf = av_packet_new_side_data(pkt, + AV_PKT_DATA_WEBVTT_COMMENT, + comment_len); + if (!buf) { + av_packet_unref(pkt); + return AVERROR(ENOMEM); + } + memcpy(buf, comment, comment_len); + } + // Do we need this for subtitles? // pkt->flags = AV_PKT_FLAG_KEY; @@ -3656,7 +3686,8 @@ static int matroska_parse_block(MatroskaDemuxContext *matroska, AVBufferRef *buf res = matroska_parse_webvtt(matroska, track, st, out_data, out_size, timecode, lace_duration, - pos); + pos, + additional, additional_id, additional_size); if (!buf) av_free(out_data); if (res)