diff mbox

[FFmpeg-devel] lavf/mpegts: Convert service name and service provider to utf-8

Message ID CAB0OVGpHfERWdvH3ORs2CcOLKWqaPiE8XbvdZO24XXE-5GpWRg@mail.gmail.com
State Superseded
Headers show

Commit Message

Carl Eugen Hoyos Feb. 8, 2019, 11:51 p.m. UTC
2019-02-08 23:09 GMT+01:00, Marton Balint <cus@passwd.hu>:
>
>
> On Fri, 8 Feb 2019, Carl Eugen Hoyos wrote:
>
>> Hi!
>>
>> Attached patch fixes ticket #6320, tested with the sample from ticket
>> #7069.
>>
>> Please comment, Carl Eugen
>>
>> From fdcd141a29f336925681193a9cdd3f4eaa5c368e Mon Sep 17 00:00:00 2001
>> From: Carl Eugen Hoyos <ceffmpeg@gmail.com>
>> Date: Fri, 8 Feb 2019 01:35:33 +0100
>> Subject: [PATCH] lavf/mpegts: Convert service_name and service_provider to
>>  utf-8.
>>
>> Fixes ticket #6320.
>> ---
>>  libavformat/mpegts.c |   33 +++++++++++++++++++++++++++++++++
>>  1 file changed, 33 insertions(+)
>>
>> diff --git a/libavformat/mpegts.c b/libavformat/mpegts.c
>> index b04fd7b..dde610f 100644
>> --- a/libavformat/mpegts.c
>> +++ b/libavformat/mpegts.c
>> @@ -37,6 +37,9 @@
>>  #include "avio_internal.h"
>>  #include "mpeg.h"
>>  #include "isom.h"
>> +#if CONFIG_ICONV
>> +#include <iconv.h>
>> +#endif
>>
>>  /* maximum size in which we look for synchronization if
>>   * synchronization is lost */
>> @@ -674,6 +677,36 @@ static char *getstr8(const uint8_t **pp, const
>> uint8_t *p_end)
>>          return NULL;
>>      if (len > p_end - p)
>>          return NULL;
>> +#if CONFIG_ICONV
>> +    if (len && *p < 0x20) {
>> +        const char *encodings[] = {
>> +            "ISO6937", "ISO-8859-5", "ISO-8859-6", "ISO-8859-7",
>> "ISO-8859-8",
>> +            "ISO-8859-9", "ISO-8859-10", "ISO-8859-11", "",
>> "ISO-8859-13",
>> +            "ISO-8859-14", "ISO-8859-15", "", "", "", "",
>> +            "", "ISO-10646", "KSC_5601", "GB2312", "ISO-10646", "UTF-8",
>> "",
>> +            "", "", "", "", "", "", "", "", ""
>> +        };
>> +        iconv_t cd;
>> +        char *in, *out;
>> +        size_t inlen = len - 1, outlen = inlen * 6;
>> +        cd = iconv_open("UTF-8", encodings[*p]);
>
> Can you add support for the ISO-8859-any case where
> p[0] == 0x10, p[1] == 0x00
> and p[2] == any? I will upload a sample to the trac ticket.

New patch attached.

Thank you, Carl Eugen

Comments

Marton Balint Feb. 9, 2019, 4:42 p.m. UTC | #1
On Sat, 9 Feb 2019, Carl Eugen Hoyos wrote:

> From 9033f0a18727a7a576c4cc06b9985d6d922d46ad Mon Sep 17 00:00:00 2001
> From: Carl Eugen Hoyos <ceffmpeg@gmail.com>
> Date: Sat, 9 Feb 2019 00:49:51 +0100
> Subject: [PATCH] lavf/mpegts: Convert service_name and service_provider to
>  utf-8.
> 
> Fixes ticket #6320.
> ---
>  libavformat/mpegts.c |   48 ++++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 48 insertions(+)
> 
> Diff --git a/libavformat/mpegts.c b/libavformat/mpegts.c
> Index b04fd7b..1e27500 100644
> --- a/libavformat/mpegts.c
> +++ b/libavformat/mpegts.c
> @@ -37,6 +37,9 @@
>  #include "avio_internal.h"
>  #include "mpeg.h"
>  #include "isom.h"
> +#if CONFIG_ICONV
> +#include <iconv.h>
> +#endif
>
>  /* maximum size in which we look for synchronization if
>   * synchronization is lost */
> @@ -674,6 +677,51 @@ static char *getstr8(const uint8_t **pp, const uint8_t *p_end)
>          return NULL;
>      if (len > p_end - p)
>          return NULL;
> +#if CONFIG_ICONV
> +    if (len && *p < 0x20) {
> +        char iso8859[] = "ISO-8859-00";
> +        const char *encodings[] = {
> +            "ISO6937", "ISO-8859-5", "ISO-8859-6", "ISO-8859-7", "ISO-8859-8",
> +            "ISO-8859-9", "ISO-8859-10", "ISO-8859-11", "", "ISO-8859-13",
> +            "ISO-8859-14", "ISO-8859-15", "", "", "", "",
> +            "", "ISO-10646", "KSC_5601", "GB2312", "ISO-10646", "UTF-8", "",
> +            "", "", "", "", "", "", "", "", ""
> +        };
> +        iconv_t cd;
> +        char *in, *out;
> +        size_t inlen = len - 1, outlen = inlen * 6 + 1;
> +        if (len >= 3 && p[0] == 0x10 && !p[1] && p[2] && p[2] <= 0xf && p[2] != 0xc) {
> +            if (p[2] < 10) {
> +                iso8859[9] += p[2];
> +                iso8859[10] = 0;
> +            } else {
> +                iso8859[9]++;
> +                iso8859[10] += p[2] - 10;
> +            }

I think this would be much more readable:

char iso8859[16];
snprintf(iso8859, sizeof(iso8859), "ISO-8859-%d", p[2]);

Also inlen should be len - 3 here, no?

> +            in = (char *)p + 3;
> +            cd = iconv_open("UTF-8", iso8859);
> +        } else {
> +            in = (char *)p + 1;
> +            cd = iconv_open("UTF-8", encodings[*p]);
> +        }
> +        if (cd == (iconv_t)-1)
> +            goto no_iconv;
> +        str = out = av_malloc(outlen);
> +        if (!str) {
> +            iconv_close(cd);
> +            return NULL;
> +        }
> +        if (iconv(cd, &in, &inlen, &out, &outlen) == -1) {
> +            iconv_close(cd);

You are leaking str here.

> +            goto no_iconv;
> +        }
> +        iconv_close(cd);
> +        *out = 0;
> +        *pp = p + len;
> +        return str;
> +    }
> +no_iconv:
> +#endif
>      str = av_malloc(len + 1);
>      if (!str)
>          return NULL;
> -- 
> 1.7.10.4

Thanks,
Marton
diff mbox

Patch

From 9033f0a18727a7a576c4cc06b9985d6d922d46ad Mon Sep 17 00:00:00 2001
From: Carl Eugen Hoyos <ceffmpeg@gmail.com>
Date: Sat, 9 Feb 2019 00:49:51 +0100
Subject: [PATCH] lavf/mpegts: Convert service_name and service_provider to
 utf-8.

Fixes ticket #6320.
---
 libavformat/mpegts.c |   48 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 48 insertions(+)

diff --git a/libavformat/mpegts.c b/libavformat/mpegts.c
index b04fd7b..1e27500 100644
--- a/libavformat/mpegts.c
+++ b/libavformat/mpegts.c
@@ -37,6 +37,9 @@ 
 #include "avio_internal.h"
 #include "mpeg.h"
 #include "isom.h"
+#if CONFIG_ICONV
+#include <iconv.h>
+#endif
 
 /* maximum size in which we look for synchronization if
  * synchronization is lost */
@@ -674,6 +677,51 @@  static char *getstr8(const uint8_t **pp, const uint8_t *p_end)
         return NULL;
     if (len > p_end - p)
         return NULL;
+#if CONFIG_ICONV
+    if (len && *p < 0x20) {
+        char iso8859[] = "ISO-8859-00";
+        const char *encodings[] = {
+            "ISO6937", "ISO-8859-5", "ISO-8859-6", "ISO-8859-7", "ISO-8859-8",
+            "ISO-8859-9", "ISO-8859-10", "ISO-8859-11", "", "ISO-8859-13",
+            "ISO-8859-14", "ISO-8859-15", "", "", "", "",
+            "", "ISO-10646", "KSC_5601", "GB2312", "ISO-10646", "UTF-8", "",
+            "", "", "", "", "", "", "", "", ""
+        };
+        iconv_t cd;
+        char *in, *out;
+        size_t inlen = len - 1, outlen = inlen * 6 + 1;
+        if (len >= 3 && p[0] == 0x10 && !p[1] && p[2] && p[2] <= 0xf && p[2] != 0xc) {
+            if (p[2] < 10) {
+                iso8859[9] += p[2];
+                iso8859[10] = 0;
+            } else {
+                iso8859[9]++;
+                iso8859[10] += p[2] - 10;
+            }
+            in = (char *)p + 3;
+            cd = iconv_open("UTF-8", iso8859);
+        } else {
+            in = (char *)p + 1;
+            cd = iconv_open("UTF-8", encodings[*p]);
+        }
+        if (cd == (iconv_t)-1)
+            goto no_iconv;
+        str = out = av_malloc(outlen);
+        if (!str) {
+            iconv_close(cd);
+            return NULL;
+        }
+        if (iconv(cd, &in, &inlen, &out, &outlen) == -1) {
+            iconv_close(cd);
+            goto no_iconv;
+        }
+        iconv_close(cd);
+        *out = 0;
+        *pp = p + len;
+        return str;
+    }
+no_iconv:
+#endif
     str = av_malloc(len + 1);
     if (!str)
         return NULL;
-- 
1.7.10.4