[FFmpeg-devel] aacenc: WIP support for PCEs

Submitted by Rostislav Pehlivanov on Oct. 3, 2016, 6:53 p.m.

Details

Message ID 20161003185311.31035-1-atomnuker@gmail.com
State New
Headers show

Commit Message

Rostislav Pehlivanov Oct. 3, 2016, 6:53 p.m.
Hopefully whoever wants to have support for crazy formats can help.
The table in aacenc.h (temporary position) tells the encoder what
to put in the bitstream and how to encode. Problem is, the specifications
dont specify anything. Thats because I've not been able to find any bloody
specifications and had to work with what the decoder does. And there was
plenty of guessing there because the decoder does some magic on layout_map
which I can't even figure out nor even know if its correct (it seems to be
for the formats I've tested).
Then there's the problem with the exact order that the channels have to be in.
Again a guessing game, since you essentially have no idea what the index part
of the map is supposed to be, whether it has to be incremented starting from
the first channel or reset upon every front/side/back channel groups. At
least the map to instruct the encoder's straightforward.

Anyway, help appreciated.

Applies cleanly on 543142990b6f7b8757753c13ea6dbc56275c5c7e, but should
work fine with newer versions.
---
 libavcodec/aacenc.c    | 69 +++++++++++++++++++++++++++++++++++++++++++++-----
 libavcodec/aacenc.h    | 50 +++++++++++++++++++++++++++++++++++-
 libavcodec/aacenctab.h | 13 +++++++++-
 3 files changed, 124 insertions(+), 8 deletions(-)

Comments

Claudio Freire Oct. 3, 2016, 10:19 p.m.
On Mon, Oct 3, 2016 at 3:53 PM, Rostislav Pehlivanov
<atomnuker@gmail.com> wrote:
> Hopefully whoever wants to have support for crazy formats can help.
> The table in aacenc.h (temporary position) tells the encoder what
> to put in the bitstream and how to encode. Problem is, the specifications
> dont specify anything. Thats because I've not been able to find any bloody
> specifications and had to work with what the decoder does. And there was
> plenty of guessing there because the decoder does some magic on layout_map
> which I can't even figure out nor even know if its correct (it seems to be
> for the formats I've tested).
> Then there's the problem with the exact order that the channels have to be in.
> Again a guessing game, since you essentially have no idea what the index part
> of the map is supposed to be, whether it has to be incremented starting from
> the first channel or reset upon every front/side/back channel groups. At
> least the map to instruct the encoder's straightforward.
>
> Anyway, help appreciated.

Will take a deeper look later, but on a shallow review, shouldn't it
have some tests?

(seems fairly easily testable)

Patch hide | download patch | download mbox

diff --git a/libavcodec/aacenc.c b/libavcodec/aacenc.c
index 2653cef..c138699 100644
--- a/libavcodec/aacenc.c
+++ b/libavcodec/aacenc.c
@@ -50,6 +50,40 @@ 
 
 static AVOnce aac_table_init = AV_ONCE_INIT;
 
+static void put_pce(PutBitContext *pb, AVCodecContext *avctx)
+{
+    int i, j;
+    AACEncContext *s = avctx->priv_data;
+    AACPCEInfo *pce = &s->pce;
+
+    put_bits(pb, 4, 0);
+
+    put_bits(pb, 2, avctx->profile);
+    put_bits(pb, 4, s->samplerate_index);
+
+    put_bits(pb, 4, pce->num_ele[0]); /* Front */
+    put_bits(pb, 4, pce->num_ele[1]); /* Side */
+    put_bits(pb, 4, pce->num_ele[2]); /* Back */
+    put_bits(pb, 2, pce->num_ele[3]); /* LFE */
+    put_bits(pb, 3, 0); /* Assoc data */
+    put_bits(pb, 4, 0); /* CCs */
+
+    put_bits(pb, 1, 0); /* Stereo mixdown */
+    put_bits(pb, 1, 0); /* Mono mixdown */
+    put_bits(pb, 1, 0); /* Something else */
+
+    for (i = 0; i < 4; i++) {
+        for (j = 0; j < pce->num_ele[i]; j++) {
+            if (i < 3)
+                put_bits(pb, 1, pce->pairing[i][j]);
+            put_bits(pb, 4, pce->index[i][j]);
+        }
+    }
+
+    avpriv_align_put_bits(pb);
+    put_bits(pb, 8, 0);
+}
+
 /**
  * Make AAC audio config object.
  * @see 1.6.2.1 "Syntax - AudioSpecificConfig"
@@ -58,7 +92,7 @@  static void put_audio_specific_config(AVCodecContext *avctx)
 {
     PutBitContext pb;
     AACEncContext *s = avctx->priv_data;
-    int channels = s->channels - (s->channels == 8 ? 1 : 0);
+    int channels = (!s->needs_pce)*(s->channels - (s->channels == 8 ? 1 : 0));
 
     init_put_bits(&pb, avctx->extradata, avctx->extradata_size);
     put_bits(&pb, 5, s->profile+1); //profile
@@ -68,6 +102,8 @@  static void put_audio_specific_config(AVCodecContext *avctx)
     put_bits(&pb, 1, 0); //frame length - 1024 samples
     put_bits(&pb, 1, 0); //does not depend on core coder
     put_bits(&pb, 1, 0); //is not extension
+    if (s->needs_pce)
+        put_pce(&pb, avctx);
 
     //Explicitly Mark SBR absent
     put_bits(&pb, 11, 0x2b7); //sync extension
@@ -488,7 +524,7 @@  static void copy_input_samples(AACEncContext *s, const AVFrame *frame)
 {
     int ch;
     int end = 2048 + (frame ? frame->nb_samples : 0);
-    const uint8_t *channel_map = aac_chan_maps[s->channels - 1];
+    const uint8_t *channel_map = s->reorder_map;
 
     /* copy and remap input samples */
     for (ch = 0; ch < s->channels; ch++) {
@@ -923,16 +959,36 @@  static av_cold int aac_encode_init(AVCodecContext *avctx)
 
     /* Constants */
     s->last_frame_pb_count = 0;
-    avctx->extradata_size = 5;
+    avctx->extradata_size = 20;
     avctx->frame_size = 1024;
     avctx->initial_padding = 1024;
     s->lambda = avctx->global_quality > 0 ? avctx->global_quality : 120;
 
     /* Channel map and unspecified bitrate guessing */
     s->channels = avctx->channels;
-    ERROR_IF(s->channels > AAC_MAX_CHANNELS || s->channels == 7,
-             "Unsupported number of channels: %d\n", s->channels);
-    s->chan_map = aac_chan_configs[s->channels-1];
+
+    s->needs_pce = 1;
+    for (i = 0; i < FF_ARRAY_ELEMS(aac_normal_chan_layouts); i++) {
+        if (avctx->channel_layout == aac_normal_chan_layouts[i]) {
+            s->needs_pce = s->options.pce;
+            break;
+        }
+    }
+
+    if (s->needs_pce) {
+        for (i = 0; i < FF_ARRAY_ELEMS(aac_pce_configs); i++)
+            if (avctx->channel_layout == aac_pce_configs[i].layout)
+                break;
+        ERROR_IF(i == FF_ARRAY_ELEMS(aac_pce_configs), "Unsupported channel layout\n");
+        WARN_IF(1, "Using a PCE!\n");
+        s->pce = aac_pce_configs[i];
+        s->reorder_map = s->pce.reorder_map;
+        s->chan_map = s->pce.config_map;
+    } else {
+        s->reorder_map = aac_chan_maps[s->channels - 1];
+        s->chan_map = aac_chan_configs[s->channels - 1];
+    }
+
     if (!avctx->bit_rate) {
         for (i = 1; i <= s->chan_map[0]; i++) {
             avctx->bit_rate += s->chan_map[i] == TYPE_CPE ? 128000 : /* Pair */
@@ -1059,6 +1115,7 @@  static const AVOption aacenc_options[] = {
     {"aac_tns", "Temporal noise shaping", offsetof(AACEncContext, options.tns), AV_OPT_TYPE_BOOL, {.i64 = 1}, -1, 1, AACENC_FLAGS},
     {"aac_ltp", "Long term prediction", offsetof(AACEncContext, options.ltp), AV_OPT_TYPE_BOOL, {.i64 = 0}, -1, 1, AACENC_FLAGS},
     {"aac_pred", "AAC-Main prediction", offsetof(AACEncContext, options.pred), AV_OPT_TYPE_BOOL, {.i64 = 0}, -1, 1, AACENC_FLAGS},
+    {"aac_pce", "Forces the use of PCEs", offsetof(AACEncContext, options.pce), AV_OPT_TYPE_BOOL, {.i64 = 0}, -1, 1, AACENC_FLAGS},
     {NULL}
 };
 
diff --git a/libavcodec/aacenc.h b/libavcodec/aacenc.h
index 63e7893..9441f29 100644
--- a/libavcodec/aacenc.h
+++ b/libavcodec/aacenc.h
@@ -46,6 +46,7 @@  typedef struct AACEncOptions {
     int pns;
     int tns;
     int ltp;
+    int pce;
     int pred;
     int mid_side;
     int intensity_stereo;
@@ -90,6 +91,50 @@  typedef struct AACQuantizeBandCostCacheEntry {
     uint16_t generation;
 } AACQuantizeBandCostCacheEntry;
 
+typedef struct AACPCEInfo {
+    int64_t layout;
+    int num_ele[4];                              ///< front, side, back, lfe
+    int pairing[3][8];                           ///< front, side, back
+    int index[4][8];                             ///< front, side, back, lfe
+    uint8_t config_map[16];                      ///< configs the encoder's channel specific settings
+    uint8_t reorder_map[16];                     ///< maps channels from lavc to aac order
+} AACPCEInfo;
+
+static const AACPCEInfo aac_pce_configs[] = {
+    {
+        .layout = AV_CH_LAYOUT_MONO,
+        .num_ele = { 1, 0, 0, 0 },
+        .pairing = { { 0 }, },
+        .index = { { 0 }, },
+        .config_map = { 1, TYPE_SCE, },
+        .reorder_map = { 0 },
+    },
+    {
+        .layout = AV_CH_LAYOUT_STEREO,
+        .num_ele = { 1, 0, 0, 0 },
+        .pairing = { { 1 }, },
+        .index = { { 0 }, },
+        .config_map = { 1, TYPE_CPE, },
+        .reorder_map = { 0, 1 },
+    },
+    {
+        .layout = AV_CH_LAYOUT_SURROUND,
+        .num_ele = { 2, 0, 0, 0 },
+        .pairing = { { 1, 0 }, },
+        .index = { { 0, 1 }, },
+        .config_map = { 2, TYPE_SCE, TYPE_CPE },
+        .reorder_map = { 2, 0, 1 },
+    },
+    {
+        .layout = AV_CH_LAYOUT_4POINT0,
+        .num_ele = { 2, 0, 1, 0 },
+        .pairing = { { 1, 0 }, { 0 }, { 0 }, },
+        .index = { { 0, 1 }, { 0 }, { 0 } },
+        .config_map = { 3, TYPE_SCE, TYPE_CPE, TYPE_SCE },
+        .reorder_map = { 2, 0, 1, 3 },
+    },
+};
+
 /**
  * AAC encoder context
  */
@@ -101,12 +146,15 @@  typedef struct AACEncContext {
     FFTContext mdct128;                          ///< short (128 samples) frame transform context
     AVFloatDSPContext *fdsp;
     AVLFG lfg;                                   ///< PRNG needed for PNS
-    float *planar_samples[8];                    ///< saved preprocessed input
+    AACPCEInfo pce;                              ///< PCE data, if needed
+    float *planar_samples[16];                   ///< saved preprocessed input
 
     int profile;                                 ///< copied from avctx
+    int needs_pce;                               ///< flag for non-standard layout
     LPCContext lpc;                              ///< used by TNS
     int samplerate_index;                        ///< MPEG-4 samplerate index
     int channels;                                ///< channel count
+    const uint8_t *reorder_map;                  ///< lavc to aac reorder map
     const uint8_t *chan_map;                     ///< channel configuration map
 
     ChannelElement *cpe;                         ///< channel elements
diff --git a/libavcodec/aacenctab.h b/libavcodec/aacenctab.h
index 5fc9411..c852a29 100644
--- a/libavcodec/aacenctab.h
+++ b/libavcodec/aacenctab.h
@@ -36,13 +36,24 @@ 
 /** Total number of codebooks, including special ones **/
 #define CB_TOT_ALL 15
 
-#define AAC_MAX_CHANNELS 8
+#define AAC_MAX_CHANNELS 16
 
 extern const uint8_t *ff_aac_swb_size_1024[];
 extern const int      ff_aac_swb_size_1024_len;
 extern const uint8_t *ff_aac_swb_size_128[];
 extern const int      ff_aac_swb_size_128_len;
 
+/* Supported layouts without using a PCE */
+static const int64_t aac_normal_chan_layouts[7] = {
+    AV_CH_LAYOUT_MONO,
+    AV_CH_LAYOUT_STEREO,
+    AV_CH_LAYOUT_SURROUND,
+    AV_CH_LAYOUT_4POINT0,
+    AV_CH_LAYOUT_5POINT0,
+    AV_CH_LAYOUT_5POINT1,
+    AV_CH_LAYOUT_7POINT1,
+};
+
 /** default channel configurations */
 static const uint8_t aac_chan_configs[AAC_MAX_CHANNELS][6] = {
     {1, TYPE_SCE},                                         // 1 channel  - single channel element