[FFmpeg-devel] Improved SAMI support

Message ID	CAFgjPJ8A1cQRTBTYDVjM1fvgudfNA+SetwM_RvWKv9=njLzXnA@mail.gmail.com
State	Changes Requested
Headers	show Delivered-To: ffmpegpatchwork@gmail.com Received-SPF: pass (google.com: domain of ffmpeg-devel-bounces@ffmpeg.org designates 79.124.17.100 as permitted sender) client-ip=79.124.17.100; MIME-Version: 1.0 From: =?UTF-8?B?SmVoYW4gUGFnw6hz?= <jehan.marmottard@gmail.com> Date: Fri, 30 Sep 2016 00:28:14 +0200 Message-ID: <CAFgjPJ8A1cQRTBTYDVjM1fvgudfNA+SetwM_RvWKv9=njLzXnA@mail.gmail.com> To: ffmpeg-devel@ffmpeg.org Content-Type: multipart/mixed; boundary=94eb2c1b27fa8da183053dacff58 Subject: [FFmpeg-devel] Improved SAMI support Precedence: list Reply-To: FFmpeg development discussions and patches <ffmpeg-devel@ffmpeg.org> Errors-To: ffmpeg-devel-bounces@ffmpeg.org Sender: "ffmpeg-devel" <ffmpeg-devel-bounces@ffmpeg.org>

From 0cd4d92b98ecbf364891038b851740291cf75219 Mon Sep 17 00:00:00 2001 From: Jehan <jehan@girinstud.io> Date: Wed, 28 Sep 2016 03:28:52 +0200 Subject: [PATCH] avformat: basic language support in SAMI subtitles. Different languages are not yet extracted as separate subtitle tracks. This first basic version simply uses the default language and ignore any others. The spec says: "If the user (or author) has not explicitly selected a language, the first Class (language) definition will be used by default." See: https://msdn.microsoft.com/en-us/library/ms971327.aspx --- libavformat/realtextdec.c | 14 ++-- libavformat/samidec.c | 167 ++++++++++++++++++++++++++++++++++++++++++---- libavformat/subtitles.c | 22 +++++- libavformat/subtitles.h | 5 +- 4 files changed, 187 insertions(+), 21 deletions(-) diff --git a/libavformat/realtextdec.c b/libavformat/realtextdec.c index 618d4f7..c0b0e44 100644 --- a/libavformat/realtextdec.c +++ b/libavformat/realtextdec.c @@ -85,10 +85,12 @@ static int realtext_read_header(AVFormatContext *s) if (!av_strncasecmp(buf.str, "<window", 7)) { /* save header to extradata */ - const char *p = ff_smil_get_attr_ptr(buf.str, "duration"); + char *p = ff_smil_get_attr_ptr(buf.str, "duration"); - if (p) + if (p) { duration = read_ts(p); + av_free(p); + } st->codecpar->extradata = av_strdup(buf.str); if (!st->codecpar->extradata) { res = AVERROR(ENOMEM); @@ -105,12 +107,16 @@ static int realtext_read_header(AVFormatContext *s) goto end; } if (!merge) { - const char *begin = ff_smil_get_attr_ptr(buf.str, "begin"); - const char *end = ff_smil_get_attr_ptr(buf.str, "end"); + char *begin = ff_smil_get_attr_ptr(buf.str, "begin"); + char *end = ff_smil_get_attr_ptr(buf.str, "end"); sub->pos = pos; sub->pts = begin ? read_ts(begin) : 0; sub->duration = end ? (read_ts(end) - sub->pts) : duration; + if (begin) + av_free(begin); + if (end) + av_free(end); } } av_bprint_clear(&buf); diff --git a/libavformat/samidec.c b/libavformat/samidec.c index 7ea1bdf..3bd16ea 100644 --- a/libavformat/samidec.c +++ b/libavformat/samidec.c @@ -26,6 +26,7 @@ #include "avformat.h" #include "internal.h" +#include "regex.h" #include "subtitles.h" #include "libavcodec/internal.h" #include "libavutil/avstring.h" @@ -34,6 +35,10 @@ typedef struct { FFDemuxSubtitlesQueue q; + char **lang_names; + char **lang_codes; + char **lang_ids; + int n_langs; } SAMIContext; static int sami_probe(AVProbeData *p) @@ -51,11 +56,19 @@ static int sami_read_header(AVFormatContext *s) SAMIContext *sami = s->priv_data; AVStream *st = avformat_new_stream(s, NULL); AVBPrint buf, hdr_buf; + int is_style = 0; char c = 0; int res = 0, got_first_sync_point = 0; + int ignore_sub; FFTextReader tr; + int64_t last_pos; + int64_t last_pts; + int new_sub = 0; + ff_text_init_avio(s, &tr, s->pb); + sami->n_langs = 0; + if (!st) return AVERROR(ENOMEM); avpriv_set_pts_info(st, 64, 1, 1000); @@ -65,10 +78,87 @@ static int sami_read_header(AVFormatContext *s) av_bprint_init(&buf, 0, AV_BPRINT_SIZE_UNLIMITED); av_bprint_init(&hdr_buf, 0, AV_BPRINT_SIZE_UNLIMITED); + /* Read header */ + while (!ff_text_eof(&tr)) { + int header_end; + int n = ff_smil_extract_next_text_chunk(&tr, &buf, &c); + + if (n == 0) + break; + + header_end = !av_strncasecmp(buf.str, "</Head", 6); + if (header_end) { + av_bprint_clear(&buf); + break; + } + + if (!av_strncasecmp(buf.str, "<Style", 6)) { + is_style = 1; + av_bprint_clear(&buf); + continue; + } + + if (is_style) { + if (!av_strncasecmp(buf.str, "</Style", 7)) { + is_style = 0; + av_bprint_clear(&buf); + continue; + } + else { + regex_t regex; + regmatch_t matchptr[4]; + + if (!regcomp(&regex, "\\.\\([a-z]\\+\\)\\s*{\\s*Name:\\s*\\([^;}]\\+\\);\\s*Lang:\\s*\\([^;}]\\+\\)", REG_ICASE)) { + char *style_text = buf.str; + + while (!regexec(&regex, style_text, 4, matchptr, REG_ICASE)) { + char **lang_names; + char **lang_codes; + char **lang_ids; + + lang_names = av_realloc(sami->lang_names, + (sami->n_langs + 1) * sizeof(char*)); + lang_codes = av_realloc(sami->lang_codes, + (sami->n_langs + 1) * sizeof(char*)); + lang_ids = av_realloc(sami->lang_ids, + (sami->n_langs + 1) * sizeof(char*)); + if (lang_names && lang_codes && lang_ids) { + char *lang_name; + char *lang_code; + char *lang_id; + + lang_name = av_strndup(style_text + matchptr[2].rm_so, + matchptr[2].rm_eo - matchptr[2].rm_so); + + lang_code = av_strndup(style_text + matchptr[3].rm_so, + matchptr[3].rm_eo - matchptr[3].rm_so); + + lang_id = av_strndup(style_text + matchptr[1].rm_so, + matchptr[1].rm_eo - matchptr[1].rm_so); + + lang_names[sami->n_langs] = lang_name; + lang_codes[sami->n_langs] = lang_code; + lang_ids[sami->n_langs++] = lang_id; + + sami->lang_names = lang_names; + sami->lang_codes = lang_codes; + sami->lang_ids = lang_ids; + } + style_text = style_text + matchptr[0].rm_eo; + } + } + regfree(&regex); + } + } + av_bprint_clear(&buf); + } + + /* Read body. */ while (!ff_text_eof(&tr)) { AVPacket *sub; const int64_t pos = ff_text_pos(&tr) - (c != 0); - int is_sync, is_body, n = ff_smil_extract_next_text_chunk(&tr, &buf, &c); + int is_sync, is_body, is_p; + int n = ff_smil_extract_next_text_chunk(&tr, &buf, &c); if (n == 0) break; @@ -80,22 +170,60 @@ static int sami_read_header(AVFormatContext *s) } is_sync = !av_strncasecmp(buf.str, "<SYNC", 5); - if (is_sync) + if (is_sync) { + char *p = ff_smil_get_attr_ptr(buf.str, "Start"); + got_first_sync_point = 1; + ignore_sub = 0; + new_sub = 1; + + last_pos = pos; + last_pts = p ? strtol(p, NULL, 10) : 0; + if (p) + av_free(p); + } + + is_p = !av_strncasecmp(buf.str, "<P", 2); + if (is_p) { + char *class = ff_smil_get_attr_ptr(buf.str, "Class"); + if (class && !sami->n_langs) { + /* If no languages were set in <STYLE>, let's just + * assume that the first encountered Class is the + * default language. */ + sami->lang_names = av_malloc(sizeof(char*)); + sami->lang_codes = av_malloc(sizeof(char*)); + sami->lang_ids = av_malloc(sizeof(char*)); + sami->lang_names[0] = av_strdup(class); + sami->lang_codes[0] = av_strdup(class); + sami->lang_ids[0] = av_strdup(class); + sami->n_langs = 1; + } + if (class && av_strcasecmp(sami->lang_ids[0], class)) { + /* Ignore subtitle for a different language. */ + ignore_sub = 1; + } + else { + ignore_sub = 0; + } + if (class) + av_free(class); + } if (!got_first_sync_point) { av_bprintf(&hdr_buf, "%s", buf.str); } else { - sub = ff_subtitles_queue_insert(&sami->q, buf.str, buf.len, !is_sync); - if (!sub) { - res = AVERROR(ENOMEM); - goto end; - } - if (is_sync) { - const char *p = ff_smil_get_attr_ptr(buf.str, "Start"); - sub->pos = pos; - sub->pts = p ? strtol(p, NULL, 10) : 0; - sub->duration = -1; + if (!ignore_sub && !is_sync) { + sub = ff_subtitles_queue_insert(&sami->q, buf.str, buf.len, !new_sub); + if (!sub) { + res = AVERROR(ENOMEM); + goto end; + } + if (new_sub) { + new_sub = 0; + sub->duration = -1; + sub->pos = last_pos; + sub->pts = last_pts; + } } } av_bprint_clear(&buf); @@ -130,6 +258,21 @@ static int sami_read_close(AVFormatContext *s) { SAMIContext *sami = s->priv_data; ff_subtitles_queue_clean(&sami->q); + if (sami->lang_names) { + for (int i = 0; i < sami->n_langs; i++) + av_free(sami->lang_names[i]); + av_free(sami->lang_names); + } + if (sami->lang_codes) { + for (int i = 0; i < sami->n_langs; i++) + av_free(sami->lang_codes[i]); + av_free(sami->lang_codes); + } + if (sami->lang_ids) { + for (int i = 0; i < sami->n_langs; i++) + av_free(sami->lang_ids[i]); + av_free(sami->lang_ids); + } return 0; } diff --git a/libavformat/subtitles.c b/libavformat/subtitles.c index 108f909..4d22daf 100644 --- a/libavformat/subtitles.c +++ b/libavformat/subtitles.c @@ -327,7 +327,7 @@ int ff_smil_extract_next_text_chunk(FFTextReader *tr, AVBPrint *buf, char *c) return i; } -const char *ff_smil_get_attr_ptr(const char *s, const char *attr) +char *ff_smil_get_attr_ptr(const char *s, const char *attr) { int in_quotes = 0; const size_t len = strlen(attr); @@ -341,8 +341,24 @@ const char *ff_smil_get_attr_ptr(const char *s, const char *attr) } while (av_isspace(*s)) s++; - if (!av_strncasecmp(s, attr, len) && s[len] == '=') - return s + len + 1 + (s[len + 1] == '"'); + if (!av_strncasecmp(s, attr, len) && s[len] == '=') { + const char *value = NULL; + int quoted_value = 0; + + quoted_value = (s[len + 1] == '"'); + value = s + len + 1 + quoted_value; + s = value; + + while (*s) { + if ((quoted_value && *s == '"') || + (!quoted_value && (av_isspace(*s) || *s == '>')) || + *s == '\0') + { + return av_strndup(value, s - value); + } + s++; + } + } } return NULL; } diff --git a/libavformat/subtitles.h b/libavformat/subtitles.h index ca78db2..0e1a2ef 100644 --- a/libavformat/subtitles.h +++ b/libavformat/subtitles.h @@ -151,12 +151,13 @@ void ff_subtitles_queue_clean(FFDemuxSubtitlesQueue *q); int ff_smil_extract_next_text_chunk(FFTextReader *tr, AVBPrint *buf, char *c); /** - * SMIL helper to point on the value of an attribute in the given tag. + * SMIL helper returning a newly allocated copy of the value of an + * attribute in the given tag. * * @param s SMIL tag ("<...>") * @param attr the attribute to look for */ -const char *ff_smil_get_attr_ptr(const char *s, const char *attr); +char *ff_smil_get_attr_ptr(const char *s, const char *attr); /** * @brief Same as ff_subtitles_read_text_chunk(), but read from an AVIOContext. -- 2.7.4

[FFmpeg-devel] Improved SAMI support

Commit Message

Comments

Patch