[FFmpeg-devel,3/4] avformat/hashenc: add option to create hash per stream

Submitted by Moritz Barsnick on Aug. 11, 2019, 12:47 p.m.

Details

Message ID 08b3f8a989581481678dcd0ce5a06105269cdccf.1565527501.git.barsnick@gmx.net
State New
Headers show

Commit Message

Moritz Barsnick Aug. 11, 2019, 12:47 p.m.
Non-frame based muxers only, the frame based ones are already per
stream.

Signed-off-by: Moritz Barsnick <barsnick@gmx.net>
---
 doc/muxers.texi       |  5 +++++
 libavformat/hashenc.c | 41 +++++++++++++++++++++++++++++------------
 2 files changed, 34 insertions(+), 12 deletions(-)

--
2.20.1

Comments

Paul B Mahol Sept. 3, 2019, 2:48 p.m.
I think better thing to do is to add streamhash muxer instead of doing
it in hash muxer.

On 8/11/19, Moritz Barsnick <barsnick@gmx.net> wrote:
> Non-frame based muxers only, the frame based ones are already per
> stream.
>
> Signed-off-by: Moritz Barsnick <barsnick@gmx.net>
> ---
>  doc/muxers.texi       |  5 +++++
>  libavformat/hashenc.c | 41 +++++++++++++++++++++++++++++------------
>  2 files changed, 34 insertions(+), 12 deletions(-)
>
> diff --git a/doc/muxers.texi b/doc/muxers.texi
> index bc38cf6029..34ca7f07cb 100644
> --- a/doc/muxers.texi
> +++ b/doc/muxers.texi
> @@ -511,6 +511,11 @@ Supported values include @code{MD5}, @code{murmur3},
> @code{RIPEMD128},
>  @code{SHA224}, @code{SHA256} (default), @code{SHA512/224},
> @code{SHA512/256},
>  @code{SHA384}, @code{SHA512}, @code{CRC32} and @code{adler32}.
>
> +@item per_stream @var{bool}
> +Whether to calculate a hash per stream, instead of combined over all
> +packets' payload. Each stream's hash is prefixed with its stream index.
> +Default is @code{false}.
> +
>  @end table
>
>  @subsection Examples
> diff --git a/libavformat/hashenc.c b/libavformat/hashenc.c
> index 96e00f580c..394b8a0fce 100644
> --- a/libavformat/hashenc.c
> +++ b/libavformat/hashenc.c
> @@ -31,6 +31,7 @@ struct HashContext {
>      const AVClass *avclass;
>      struct AVHashContext **hashes;
>      char *hash_name;
> +    int per_stream;
>      int format_version;
>  };
>
> @@ -40,10 +41,13 @@ struct HashContext {
>      { "hash", "set hash to use", OFFSET(hash_name), AV_OPT_TYPE_STRING,
> {.str = defaulttype}, 0, 0, ENC }
>  #define FORMAT_VERSION_OPT \
>      { "format_version", "file format version", OFFSET(format_version),
> AV_OPT_TYPE_INT, {.i64 = 2}, 1, 2, ENC }
> +#define PER_STREAM_OPT \
> +    { "per_stream", "whether to calculate a hash per stream",
> OFFSET(per_stream), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, ENC }
>
>  #if CONFIG_HASH_MUXER
>  static const AVOption hash_options[] = {
>      HASH_OPT("sha256"),
> +    PER_STREAM_OPT,
>      { NULL },
>  };
>  #endif
> @@ -59,6 +63,7 @@ static const AVOption framehash_options[] = {
>  #if CONFIG_MD5_MUXER
>  static const AVOption md5_options[] = {
>      HASH_OPT("md5"),
> +    PER_STREAM_OPT,
>      { NULL },
>  };
>  #endif
> @@ -74,39 +79,51 @@ static const AVOption framemd5_options[] = {
>  #if CONFIG_HASH_MUXER || CONFIG_MD5_MUXER
>  static int hash_write_header(struct AVFormatContext *s)
>  {
> -    int res;
> +    int i, res;
>      struct HashContext *c = s->priv_data;
> -    c->hashes = av_malloc_array(1, sizeof(c->hashes));
> +    int num_hashes = c->per_stream ? s->nb_streams : 1;
> +    c->hashes = av_malloc_array(num_hashes, sizeof(c->hashes));
>      if (!c->hashes)
>          return AVERROR(ENOMEM);
> -    res = av_hash_alloc(&c->hashes[0], c->hash_name);
> -    if (res < 0) {
> -        av_freep(&c->hashes);
> -        return res;
> +    for (i = 0; i < num_hashes; i++) {
> +    res = av_hash_alloc(&c->hashes[i], c->hash_name);
> +    if (res < 0)
> +        goto err;
> +    av_hash_init(c->hashes[i]);
>      }
> -    av_hash_init(c->hashes[0]);
>      return 0;
> +err:
> +    for (int j = 0; j < i; j++)
> +        av_hash_freep(&c->hashes[j]);
> +    av_freep(&c->hashes);
> +    return res;
>  }
>
>  static int hash_write_packet(struct AVFormatContext *s, AVPacket *pkt)
>  {
>      struct HashContext *c = s->priv_data;
> -    av_hash_update(c->hashes[0], pkt->data, pkt->size);
> +    av_hash_update(c->hashes[c->per_stream ? pkt->stream_index : 0],
> pkt->data, pkt->size);
>      return 0;
>  }
>
>  static int hash_write_trailer(struct AVFormatContext *s)
>  {
>      struct HashContext *c = s->priv_data;
> +    int num_hashes = c->per_stream ? s->nb_streams : 1;
> +    for (int i = 0; i < num_hashes; i++) {
>      char buf[AV_HASH_MAX_SIZE*2+128];
> -    snprintf(buf, sizeof(buf) - 200, "%s=",
> av_hash_get_name(c->hashes[0]));
> -
> -    av_hash_final_hex(c->hashes[0], buf + strlen(buf), sizeof(buf) -
> strlen(buf));
> +    if (c->per_stream) {
> +        snprintf(buf, sizeof(buf) - 200, "%d,%s=", i,
> av_hash_get_name(c->hashes[i]));
> +    } else {
> +        snprintf(buf, sizeof(buf) - 200, "%s=",
> av_hash_get_name(c->hashes[i]));
> +    }
> +    av_hash_final_hex(c->hashes[i], buf + strlen(buf), sizeof(buf) -
> strlen(buf));
>      av_strlcatf(buf, sizeof(buf), "\n");
>      avio_write(s->pb, buf, strlen(buf));
>      avio_flush(s->pb);
>
> -    av_hash_freep(&c->hashes[0]);
> +    av_hash_freep(&c->hashes[i]);
> +    }
>      av_freep(&c->hashes);
>      return 0;
>  }
> --
> 2.20.1
>
> _______________________________________________
> ffmpeg-devel mailing list
> ffmpeg-devel@ffmpeg.org
> https://ffmpeg.org/mailman/listinfo/ffmpeg-devel
>
> To unsubscribe, visit link above, or email
> ffmpeg-devel-request@ffmpeg.org with subject "unsubscribe".
Moritz Barsnick Sept. 4, 2019, 2:12 p.m.
On Tue, Sep 03, 2019 at 16:48:39 +0200, Paul B Mahol wrote:
> I think better thing to do is to add streamhash muxer instead of doing
> it in hash muxer.

If that's the general wish, I will reimplement it as such.

The code reuse will be similar, so I assume the other approaches are
okay? Especially cleaning up the options definitions before adding new
ones, and expanding the hash store to an array.

I will also make use of a deinit() as asked by Nicolas.

Thanks,
Moritz
Moritz Barsnick Sept. 11, 2019, 1:34 p.m.
Adds a streamhash muxer, much like the hash muxer, but analyzing each stream
independently. I chose not to add a "streammd5" muxer, as the other *md5
muxers are just legacy versions of the *hash muxers with a different default
algorithm.

The first two patches re-arrange the code in preparation for new muxer.
The patch "use an array of hashes" makes no sense without the subsequent
addition of the new muxer, but the patch "rearrange options definition"
could be seen independently. Do squash if you will.

Moritz Barsnick (3):
  avformat/hashenc: rearrange options definition
  avformat/hashenc: use an array of hashes
  avformat/hashenc: add streamhash muxer

 Changelog                |   1 +
 doc/muxers.texi          |  46 +++++++++
 libavformat/Makefile     |   1 +
 libavformat/allformats.c |   1 +
 libavformat/hashenc.c    | 197 ++++++++++++++++++++++++++++++---------
 libavformat/version.h    |   4 +-
 6 files changed, 205 insertions(+), 45 deletions(-)

--
2.20.1
Moritz Barsnick Sept. 12, 2019, 9:23 a.m.
Adds a streamhash muxer, much like the hash muxer, but analyzing each stream
independently. I chose not to add a "streammd5" muxer, as the other *md5
muxers are just legacy versions of the *hash muxers with a different default
algorithm.

The first two patches re-arrange the code in preparation for new muxer.
The patch "use an array of hashes" makes no sense without the subsequent
addition of the new muxer, but the patch "rearrange options definition"
could be seen independently. Do squash if you will.

Changes in V3 vs V2:
- NULL pointer check in deinit() callbacks
- don't free resources in init() callbacks, deinit() is called on failure
- add a streamtype indicator character to the streamhash output

Moritz Barsnick (3):
  avformat/hashenc: rearrange options definition
  avformat/hashenc: use an array of hashes
  avformat/hashenc: add streamhash muxer

 Changelog                |   1 +
 doc/muxers.texi          |  47 +++++++++
 libavformat/Makefile     |   1 +
 libavformat/allformats.c |   1 +
 libavformat/hashenc.c    | 207 +++++++++++++++++++++++++++++++--------
 libavformat/version.h    |   4 +-
 6 files changed, 218 insertions(+), 43 deletions(-)

--
2.20.1
Moritz Barsnick Sept. 18, 2019, 1:47 p.m.
On Thu, Sep 12, 2019 at 11:23:03 +0200, Moritz Barsnick wrote:
> Moritz Barsnick (3):
>   avformat/hashenc: rearrange options definition
>   avformat/hashenc: use an array of hashes
>   avformat/hashenc: add streamhash muxer

Friendly ping for the patch set.

Moritz
James Almer Sept. 18, 2019, 7:02 p.m.
On 9/18/2019 10:47 AM, Moritz Barsnick wrote:
> On Thu, Sep 12, 2019 at 11:23:03 +0200, Moritz Barsnick wrote:
>> Moritz Barsnick (3):
>>   avformat/hashenc: rearrange options definition
>>   avformat/hashenc: use an array of hashes
>>   avformat/hashenc: add streamhash muxer
> 
> Friendly ping for the patch set.
> 
> Moritz

I tested it and seems to work as intended, so LGTM.
Michael Niedermayer Sept. 20, 2019, 9:31 p.m.
On Wed, Sep 18, 2019 at 04:02:55PM -0300, James Almer wrote:
> On 9/18/2019 10:47 AM, Moritz Barsnick wrote:
> > On Thu, Sep 12, 2019 at 11:23:03 +0200, Moritz Barsnick wrote:
> >> Moritz Barsnick (3):
> >>   avformat/hashenc: rearrange options definition
> >>   avformat/hashenc: use an array of hashes
> >>   avformat/hashenc: add streamhash muxer
> > 
> > Friendly ping for the patch set.
> > 
> > Moritz
> 
> I tested it and seems to work as intended, so LGTM.

applied


thx

[...]
Gyan Sept. 21, 2019, 4:37 a.m.
On 21-09-2019 03:01 AM, Michael Niedermayer wrote:
> On Wed, Sep 18, 2019 at 04:02:55PM -0300, James Almer wrote:
>> On 9/18/2019 10:47 AM, Moritz Barsnick wrote:
>>> On Thu, Sep 12, 2019 at 11:23:03 +0200, Moritz Barsnick wrote:
>>>> Moritz Barsnick (3):
>>>>    avformat/hashenc: rearrange options definition
>>>>    avformat/hashenc: use an array of hashes
>>>>    avformat/hashenc: add streamhash muxer
>>> Friendly ping for the patch set.
>>>
>>> Moritz
>> I tested it and seems to work as intended, so LGTM.
> applied
Coverity reports three issues arising from this patchset, actually one 
issue, having an instance in each of the three init () functions.

CID 1453867, 1453866 & 1453865

Code maintainability issues  (SIZEOF_MISMATCH)

For

     c->hashes = av_mallocz_array(1, sizeof(c->hashes));


it says,

"Passing argument "8UL /* sizeof (c->hashes) */" to function 
"av_mallocz_array" and then casting the return value to "struct 
AVHashContext **" is suspicious. In this particular case "sizeof (struct 
AVHashContext **)" happens to be equal to "sizeof (struct AVHashContext 
*)", but this is not a portable assumption."

Gyan
Moritz Barsnick Sept. 22, 2019, 12:28 p.m.
On Sat, Sep 21, 2019 at 10:07:25 +0530, Gyan wrote:
[...]
> Coverity reports three issues arising from this patchset, actually one
> issue, having an instance in each of the three init () functions.
[...]
>      c->hashes = av_mallocz_array(1, sizeof(c->hashes));
[...]
> "Passing argument "8UL /* sizeof (c->hashes) */" to function
> "av_mallocz_array" and then casting the return value to "struct
> AVHashContext **" is suspicious. In this particular case "sizeof (struct
> AVHashContext **)" happens to be equal to "sizeof (struct AVHashContext
> *)", but this is not a portable assumption."

Now that you point it out, it's obvious. Shame on me, it wasn't before.

(For the record, for those who want to learn what I learned:
    c->hashes = av_mallocz_array(1, sizeof(*c->hashes));
would have been correct.)

Too bad we don't have other static analysis tools to catch this. (Or
would cppcheck have caught this?) I'm aware that Coverity only operates
on the actual repo, not on submitted patches.

Anyway, a patch to fix this is posted.
Thanks,
Moritz

Patch hide | download patch | download mbox

diff --git a/doc/muxers.texi b/doc/muxers.texi
index bc38cf6029..34ca7f07cb 100644
--- a/doc/muxers.texi
+++ b/doc/muxers.texi
@@ -511,6 +511,11 @@  Supported values include @code{MD5}, @code{murmur3}, @code{RIPEMD128},
 @code{SHA224}, @code{SHA256} (default), @code{SHA512/224}, @code{SHA512/256},
 @code{SHA384}, @code{SHA512}, @code{CRC32} and @code{adler32}.

+@item per_stream @var{bool}
+Whether to calculate a hash per stream, instead of combined over all
+packets' payload. Each stream's hash is prefixed with its stream index.
+Default is @code{false}.
+
 @end table

 @subsection Examples
diff --git a/libavformat/hashenc.c b/libavformat/hashenc.c
index 96e00f580c..394b8a0fce 100644
--- a/libavformat/hashenc.c
+++ b/libavformat/hashenc.c
@@ -31,6 +31,7 @@  struct HashContext {
     const AVClass *avclass;
     struct AVHashContext **hashes;
     char *hash_name;
+    int per_stream;
     int format_version;
 };

@@ -40,10 +41,13 @@  struct HashContext {
     { "hash", "set hash to use", OFFSET(hash_name), AV_OPT_TYPE_STRING, {.str = defaulttype}, 0, 0, ENC }
 #define FORMAT_VERSION_OPT \
     { "format_version", "file format version", OFFSET(format_version), AV_OPT_TYPE_INT, {.i64 = 2}, 1, 2, ENC }
+#define PER_STREAM_OPT \
+    { "per_stream", "whether to calculate a hash per stream", OFFSET(per_stream), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, ENC }

 #if CONFIG_HASH_MUXER
 static const AVOption hash_options[] = {
     HASH_OPT("sha256"),
+    PER_STREAM_OPT,
     { NULL },
 };
 #endif
@@ -59,6 +63,7 @@  static const AVOption framehash_options[] = {
 #if CONFIG_MD5_MUXER
 static const AVOption md5_options[] = {
     HASH_OPT("md5"),
+    PER_STREAM_OPT,
     { NULL },
 };
 #endif
@@ -74,39 +79,51 @@  static const AVOption framemd5_options[] = {
 #if CONFIG_HASH_MUXER || CONFIG_MD5_MUXER
 static int hash_write_header(struct AVFormatContext *s)
 {
-    int res;
+    int i, res;
     struct HashContext *c = s->priv_data;
-    c->hashes = av_malloc_array(1, sizeof(c->hashes));
+    int num_hashes = c->per_stream ? s->nb_streams : 1;
+    c->hashes = av_malloc_array(num_hashes, sizeof(c->hashes));
     if (!c->hashes)
         return AVERROR(ENOMEM);
-    res = av_hash_alloc(&c->hashes[0], c->hash_name);
-    if (res < 0) {
-        av_freep(&c->hashes);
-        return res;
+    for (i = 0; i < num_hashes; i++) {
+    res = av_hash_alloc(&c->hashes[i], c->hash_name);
+    if (res < 0)
+        goto err;
+    av_hash_init(c->hashes[i]);
     }
-    av_hash_init(c->hashes[0]);
     return 0;
+err:
+    for (int j = 0; j < i; j++)
+        av_hash_freep(&c->hashes[j]);
+    av_freep(&c->hashes);
+    return res;
 }

 static int hash_write_packet(struct AVFormatContext *s, AVPacket *pkt)
 {
     struct HashContext *c = s->priv_data;
-    av_hash_update(c->hashes[0], pkt->data, pkt->size);
+    av_hash_update(c->hashes[c->per_stream ? pkt->stream_index : 0], pkt->data, pkt->size);
     return 0;
 }

 static int hash_write_trailer(struct AVFormatContext *s)
 {
     struct HashContext *c = s->priv_data;
+    int num_hashes = c->per_stream ? s->nb_streams : 1;
+    for (int i = 0; i < num_hashes; i++) {
     char buf[AV_HASH_MAX_SIZE*2+128];
-    snprintf(buf, sizeof(buf) - 200, "%s=", av_hash_get_name(c->hashes[0]));
-
-    av_hash_final_hex(c->hashes[0], buf + strlen(buf), sizeof(buf) - strlen(buf));
+    if (c->per_stream) {
+        snprintf(buf, sizeof(buf) - 200, "%d,%s=", i, av_hash_get_name(c->hashes[i]));
+    } else {
+        snprintf(buf, sizeof(buf) - 200, "%s=", av_hash_get_name(c->hashes[i]));
+    }
+    av_hash_final_hex(c->hashes[i], buf + strlen(buf), sizeof(buf) - strlen(buf));
     av_strlcatf(buf, sizeof(buf), "\n");
     avio_write(s->pb, buf, strlen(buf));
     avio_flush(s->pb);

-    av_hash_freep(&c->hashes[0]);
+    av_hash_freep(&c->hashes[i]);
+    }
     av_freep(&c->hashes);
     return 0;
 }