Message ID | 20181121183431.6404-2-andreas.rheinhardt@googlemail.com |
---|---|
State | Accepted |
Headers | show |
On 21/11/18 18:34, Andreas Rheinhardt wrote: > Now memcpy is avoided for NAL units that don't contain 0x03 escape > characters. > > Improves performance of cbs_h2645_fragment_add_nals from 36940 > decicycles to 6364 decicycles based on 8 runs with a 5.1 Mb/s H.264 > sample (262144 runs each). > > Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@googlemail.com> > --- > libavcodec/cbs_h2645.c | 29 +++++++++++++++++------------ > libavcodec/h2645_parse.h | 5 +++++ > 2 files changed, 22 insertions(+), 12 deletions(-) > > diff --git a/libavcodec/cbs_h2645.c b/libavcodec/cbs_h2645.c > index a2d0170e97..6846cad0bb 100644 > --- a/libavcodec/cbs_h2645.c > +++ b/libavcodec/cbs_h2645.c > @@ -532,24 +532,29 @@ static int cbs_h2645_fragment_add_nals(CodedBitstreamContext *ctx, > for (i = 0; i < packet->nb_nals; i++) { > const H2645NAL *nal = &packet->nals[i]; > size_t size = nal->size; > - uint8_t *data; > - > // Remove trailing zeroes. > while (size > 0 && nal->data[size - 1] == 0) > --size; > av_assert0(size > 0); > > - data = av_malloc(size + AV_INPUT_BUFFER_PADDING_SIZE); > - if (!data) > - return AVERROR(ENOMEM); > - memcpy(data, nal->data, size); > - memset(data + size, 0, AV_INPUT_BUFFER_PADDING_SIZE); > + if (nal->data == nal->raw_data) { > + err = ff_cbs_insert_unit_data(ctx, frag, -1, nal->type, > + (uint8_t*)nal->data, size, frag->data_ref); > + if (err < 0) > + return err; > + } else { > + uint8_t *data = av_malloc(size + AV_INPUT_BUFFER_PADDING_SIZE); > + if (!data) > + return AVERROR(ENOMEM); > + memcpy(data, nal->data, size); > + memset(data + size, 0, AV_INPUT_BUFFER_PADDING_SIZE); > > - err = ff_cbs_insert_unit_data(ctx, frag, -1, nal->type, > - data, size, NULL); > - if (err < 0) { > - av_freep(&data); > - return err; > + err = ff_cbs_insert_unit_data(ctx, frag, -1, nal->type, > + data, size, NULL); > + if (err < 0) { > + av_freep(&data); > + return err; > + } > } > } > > diff --git a/libavcodec/h2645_parse.h b/libavcodec/h2645_parse.h > index 2e29ad26cb..a0a5ca5868 100644 > --- a/libavcodec/h2645_parse.h > +++ b/libavcodec/h2645_parse.h > @@ -86,6 +86,11 @@ int ff_h2645_extract_rbsp(const uint8_t *src, int length, H2645RBSP *rbsp, > > /** > * Split an input packet into NAL units. > + * If data == raw_data holds true for a NAL unit of the returned pkt, then > + * said NAL unit does not contain any emulation_prevention_three_byte and > + * the data is contained in the input buffer pointed to by buf. > + * Otherwise, the unescaped data is part of the rbsp_buffer described by the > + * packet's H2645RBSP. > */ > int ff_h2645_packet_split(H2645Packet *pkt, const uint8_t *buf, int length, > void *logctx, int is_nalff, int nal_length_size, > LGTM, tested, applied. Thanks, - Mark
diff --git a/libavcodec/cbs_h2645.c b/libavcodec/cbs_h2645.c index a2d0170e97..6846cad0bb 100644 --- a/libavcodec/cbs_h2645.c +++ b/libavcodec/cbs_h2645.c @@ -532,24 +532,29 @@ static int cbs_h2645_fragment_add_nals(CodedBitstreamContext *ctx, for (i = 0; i < packet->nb_nals; i++) { const H2645NAL *nal = &packet->nals[i]; size_t size = nal->size; - uint8_t *data; - // Remove trailing zeroes. while (size > 0 && nal->data[size - 1] == 0) --size; av_assert0(size > 0); - data = av_malloc(size + AV_INPUT_BUFFER_PADDING_SIZE); - if (!data) - return AVERROR(ENOMEM); - memcpy(data, nal->data, size); - memset(data + size, 0, AV_INPUT_BUFFER_PADDING_SIZE); + if (nal->data == nal->raw_data) { + err = ff_cbs_insert_unit_data(ctx, frag, -1, nal->type, + (uint8_t*)nal->data, size, frag->data_ref); + if (err < 0) + return err; + } else { + uint8_t *data = av_malloc(size + AV_INPUT_BUFFER_PADDING_SIZE); + if (!data) + return AVERROR(ENOMEM); + memcpy(data, nal->data, size); + memset(data + size, 0, AV_INPUT_BUFFER_PADDING_SIZE); - err = ff_cbs_insert_unit_data(ctx, frag, -1, nal->type, - data, size, NULL); - if (err < 0) { - av_freep(&data); - return err; + err = ff_cbs_insert_unit_data(ctx, frag, -1, nal->type, + data, size, NULL); + if (err < 0) { + av_freep(&data); + return err; + } } } diff --git a/libavcodec/h2645_parse.h b/libavcodec/h2645_parse.h index 2e29ad26cb..a0a5ca5868 100644 --- a/libavcodec/h2645_parse.h +++ b/libavcodec/h2645_parse.h @@ -86,6 +86,11 @@ int ff_h2645_extract_rbsp(const uint8_t *src, int length, H2645RBSP *rbsp, /** * Split an input packet into NAL units. + * If data == raw_data holds true for a NAL unit of the returned pkt, then + * said NAL unit does not contain any emulation_prevention_three_byte and + * the data is contained in the input buffer pointed to by buf. + * Otherwise, the unescaped data is part of the rbsp_buffer described by the + * packet's H2645RBSP. */ int ff_h2645_packet_split(H2645Packet *pkt, const uint8_t *buf, int length, void *logctx, int is_nalff, int nal_length_size,
Now memcpy is avoided for NAL units that don't contain 0x03 escape characters. Improves performance of cbs_h2645_fragment_add_nals from 36940 decicycles to 6364 decicycles based on 8 runs with a 5.1 Mb/s H.264 sample (262144 runs each). Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@googlemail.com> --- libavcodec/cbs_h2645.c | 29 +++++++++++++++++------------ libavcodec/h2645_parse.h | 5 +++++ 2 files changed, 22 insertions(+), 12 deletions(-)