305 lines
9.3 KiB
C
Executable file
305 lines
9.3 KiB
C
Executable file
/*
|
|
* Libaac-next encoder (libxaac based)
|
|
* Copyright (c) 2025 Wrapper
|
|
*
|
|
* This file is part of FFmpeg.
|
|
*
|
|
* FFmpeg is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
* License as published by the Free Software Foundation; either
|
|
* version 2.1 of the License, or (at your option) any later version.
|
|
*
|
|
* FFmpeg is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Lesser General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with FFmpeg; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
*/
|
|
|
|
/**
|
|
* @file
|
|
* Interface to libaac-next encoder.
|
|
*/
|
|
|
|
#include <libaac.h>
|
|
|
|
#include "libavutil/channel_layout.h"
|
|
#include "libavutil/internal.h"
|
|
#include "libavutil/intreadwrite.h"
|
|
#include "libavutil/log.h"
|
|
#include "libavutil/opt.h"
|
|
#include "libavutil/mem.h"
|
|
#include "avcodec.h"
|
|
#include "defs.h"
|
|
#include "audio_frame_queue.h"
|
|
#include "codec_internal.h"
|
|
#include "encode.h"
|
|
#include "libavutil/samplefmt.h"
|
|
#include "profiles.h"
|
|
|
|
typedef struct
|
|
{
|
|
const AVClass *class;
|
|
AACContext *encoder;
|
|
int delay_sent;
|
|
int flush_delay;
|
|
|
|
int eld_v2;
|
|
int esbr;
|
|
int frame_length;
|
|
int iq;
|
|
int tns;
|
|
|
|
AudioFrameQueue afq;
|
|
} libaacEncodeCTX;
|
|
|
|
static const AVOption aac_enc_options[] = {
|
|
{ "eld_v2", "Enable ELDv2 (LD-MPS extension for ELD stereo signals)", offsetof(libaacEncodeCTX, eld_v2), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM },
|
|
{ "esbr", "Enable the use of Enhanced SBR", offsetof(libaacEncodeCTX, esbr), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM },
|
|
{ "frame_length", "The desired frame length", offsetof(libaacEncodeCTX, frame_length), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1024, AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM },
|
|
{ "iq", "Inverse quantization", offsetof(libaacEncodeCTX, frame_length), AV_OPT_TYPE_INT, { .i64 = 2 }, 0, 2, AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM },
|
|
{ "tns", "Temporal Noise Shaping", offsetof(libaacEncodeCTX, tns), AV_OPT_TYPE_INT, { .i64 = 1 }, 0, 1, AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM },
|
|
FF_AAC_PROFILE_OPTS
|
|
{ NULL }
|
|
};
|
|
|
|
static const AVClass aac_enc_class = {
|
|
.class_name = "libaac",
|
|
.item_name = av_default_item_name,
|
|
.option = aac_enc_options,
|
|
.version = LIBAVUTIL_VERSION_INT,
|
|
};
|
|
|
|
static void aac_enc_error_handler(uint32_t errorCode, const char *section, const char *errorMsg, bool isFatal, void *handle) {
|
|
AVCodecContext *ctx = (AVCodecContext *)handle;
|
|
av_log(ctx, AV_LOG_ERROR, "%s: %s (0x%08X)\n", section, errorMsg, errorCode);
|
|
}
|
|
|
|
static av_cold int libaac_encode_init(AVCodecContext *avctx)
|
|
{
|
|
libaacEncodeCTX *s = avctx->priv_data;
|
|
AACSettings cfg = {0};
|
|
|
|
/* number of channels */
|
|
if (avctx->ch_layout.nb_channels < 1 || avctx->ch_layout.nb_channels > 6)
|
|
{
|
|
av_log(avctx, AV_LOG_ERROR, "encoding %d channel(s) is not allowed\n", avctx->ch_layout.nb_channels);
|
|
return AVERROR(EINVAL);
|
|
}
|
|
|
|
cfg.sampleRate = avctx->sample_rate;
|
|
cfg.noChannels = avctx->ch_layout.nb_channels;
|
|
cfg.bitsPerSamples = avctx->sample_fmt == AV_SAMPLE_FMT_FLT ? 32 : 16;
|
|
cfg.bitrate = avctx->bit_rate;
|
|
cfg.adts = !(avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER);
|
|
cfg.cutoff = avctx->cutoff;
|
|
switch (avctx->profile) {
|
|
case AV_PROFILE_AAC_LOW:
|
|
case AV_PROFILE_UNKNOWN:
|
|
cfg.profile = AAC_LC;
|
|
break;
|
|
|
|
case AV_PROFILE_AAC_HE:
|
|
cfg.profile = AAC_HE;
|
|
break;
|
|
|
|
case AV_PROFILE_AAC_HE_V2:
|
|
cfg.profile = AAC_HEV2;
|
|
break;
|
|
|
|
case AV_PROFILE_AAC_LD:
|
|
cfg.profile = AAC_LD;
|
|
break;
|
|
|
|
case AV_PROFILE_AAC_ELD:
|
|
cfg.profile = AAC_ELD;
|
|
break;
|
|
|
|
default:
|
|
av_log(avctx, AV_LOG_ERROR, "unsupported profile, supported profiles are LC, HE, HEv2, LD and ELD\n");
|
|
return AVERROR(EINVAL);
|
|
}
|
|
cfg.tns = s->tns;
|
|
cfg.frameSize = s->frame_length;
|
|
cfg.eSBR = s->esbr;
|
|
cfg.iq = s->iq;
|
|
|
|
cfg.errorHandleCtx = avctx;
|
|
cfg.errorHandler = aac_enc_error_handler;
|
|
|
|
s->encoder = aac_encode_open(cfg);
|
|
|
|
if (!s->encoder)
|
|
{
|
|
return AVERROR(EINVAL);
|
|
}
|
|
|
|
avctx->frame_size = s->encoder->no_samples / avctx->ch_layout.nb_channels;
|
|
avctx->initial_padding = s->encoder->inputDelay;
|
|
s->flush_delay = s->encoder->inputDelay;
|
|
|
|
av_log(avctx, AV_LOG_TRACE, "frame size: %d, initial delay: %d\n", avctx->frame_size, avctx->initial_padding);
|
|
|
|
ff_af_queue_init(avctx, &s->afq);
|
|
|
|
/* Set decoder specific info */
|
|
avctx->extradata_size = 0;
|
|
if (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER)
|
|
{
|
|
avctx->extradata = av_mallocz(s->encoder->ascSize + AV_INPUT_BUFFER_PADDING_SIZE);
|
|
|
|
if (!avctx->extradata)
|
|
{
|
|
return AVERROR(ENOMEM);
|
|
}
|
|
|
|
memcpy(avctx->extradata, s->encoder->asc, s->encoder->ascSize);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static int libaac_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
|
|
const AVFrame *frame, int *got_packet)
|
|
{
|
|
libaacEncodeCTX *s = avctx->priv_data;
|
|
int ret;
|
|
int discard_padding;
|
|
|
|
if ((ret = ff_alloc_packet(avctx, pkt, s->encoder->max_out_bytes)) < 0)
|
|
return ret;
|
|
|
|
if (!frame)
|
|
{
|
|
av_log(avctx, AV_LOG_TRACE, "flush_delay: %d\n", s->flush_delay);
|
|
|
|
if (s->flush_delay <= 0)
|
|
return 0;
|
|
|
|
/* Flushing */
|
|
if ((ret = aac_encode(s->encoder, NULL, 0, pkt->data, (unsigned int *)&pkt->size)) < 0)
|
|
{
|
|
return AVERROR(EINVAL);
|
|
}
|
|
|
|
s->flush_delay -= avctx->frame_size;
|
|
}
|
|
else
|
|
{
|
|
/* Encoding */
|
|
if ((ret = ff_af_queue_add(&s->afq, frame)) < 0)
|
|
return ret;
|
|
|
|
int encodeSize = av_get_bytes_per_sample(avctx->sample_fmt) * avctx->ch_layout.nb_channels * frame->nb_samples;
|
|
av_log(avctx, AV_LOG_TRACE, "encode size: %d\n", encodeSize);
|
|
|
|
if ((ret = aac_encode(s->encoder, frame->data[0], encodeSize, pkt->data, (unsigned int *)&pkt->size)) < 0)
|
|
{
|
|
return AVERROR(EINVAL);
|
|
}
|
|
}
|
|
|
|
ff_af_queue_remove(&s->afq, avctx->frame_size, &pkt->pts, &pkt->duration);
|
|
|
|
/* discard padding copied from fdkaac encoder */
|
|
discard_padding = avctx->frame_size - pkt->duration;
|
|
|
|
// Check if subtraction resulted in an overflow
|
|
if ((discard_padding < avctx->frame_size) != (pkt->duration > 0))
|
|
{
|
|
av_log(avctx, AV_LOG_ERROR, "discard padding overflow\n");
|
|
return AVERROR(EINVAL);
|
|
}
|
|
|
|
if ((!s->delay_sent && avctx->initial_padding > 0) || discard_padding > 0)
|
|
{
|
|
uint8_t *side_data =
|
|
av_packet_new_side_data(pkt, AV_PKT_DATA_SKIP_SAMPLES, 10);
|
|
if (!side_data)
|
|
return AVERROR(ENOMEM);
|
|
if (!s->delay_sent)
|
|
{
|
|
AV_WL32(side_data, avctx->initial_padding);
|
|
s->delay_sent = 1;
|
|
}
|
|
AV_WL32(side_data + 4, discard_padding);
|
|
}
|
|
|
|
pkt->flags |= AV_PKT_FLAG_KEY;
|
|
*got_packet = 1;
|
|
return 0;
|
|
}
|
|
|
|
static void libaac_encode_flush(AVCodecContext *avctx)
|
|
{
|
|
libaacEncodeCTX *s = avctx->priv_data;
|
|
uint8_t sink_null[32768];
|
|
int64_t pts, duration;
|
|
uint32_t out_bytes;
|
|
|
|
av_log(avctx, AV_LOG_TRACE, "encoder flush\n");
|
|
ff_af_queue_remove(&s->afq, s->afq.frame_count, &pts, &duration);
|
|
aac_encode(s->encoder, NULL, 0, sink_null, &out_bytes);
|
|
}
|
|
|
|
static av_cold int libaac_encode_close(AVCodecContext *avctx)
|
|
{
|
|
libaacEncodeCTX *s = avctx->priv_data;
|
|
|
|
if (s->encoder)
|
|
aac_encode_close(s->encoder);
|
|
|
|
ff_af_queue_close(&s->afq);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static const FFCodecDefault defaults[] = {
|
|
{"b", "128000"},
|
|
{NULL}};
|
|
|
|
static const AVProfile libaac_profiles[] = {
|
|
{ AV_PROFILE_AAC_LOW, "LC" },
|
|
{ AV_PROFILE_AAC_HE, "HE-AAC" },
|
|
{ AV_PROFILE_AAC_HE_V2, "HE-AACv2" },
|
|
{ AV_PROFILE_AAC_LD, "LD" },
|
|
{ AV_PROFILE_AAC_ELD, "ELD" },
|
|
{AV_PROFILE_UNKNOWN},
|
|
};
|
|
|
|
static const int aac_sample_rates[] = {
|
|
96000, 88200, 64000, 48000, 44100, 32000,
|
|
24000, 22050, 16000, 12000, 11025, 8000, 0
|
|
};
|
|
|
|
static const AVChannelLayout aac_ch_layouts[6] = {
|
|
AV_CHANNEL_LAYOUT_MONO,
|
|
AV_CHANNEL_LAYOUT_STEREO,
|
|
AV_CHANNEL_LAYOUT_SURROUND,
|
|
AV_CHANNEL_LAYOUT_4POINT0,
|
|
AV_CHANNEL_LAYOUT_5POINT0_BACK,
|
|
AV_CHANNEL_LAYOUT_5POINT1_BACK,
|
|
};
|
|
|
|
const FFCodec ff_libaac_next_encoder = {
|
|
.p.name = "libaac",
|
|
CODEC_LONG_NAME("custom libxaac-based AAC encoder"),
|
|
.p.type = AVMEDIA_TYPE_AUDIO,
|
|
.p.id = AV_CODEC_ID_AAC,
|
|
.p.capabilities = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_ENCODER_FLUSH,
|
|
.caps_internal = FF_CODEC_CAP_NOT_INIT_THREADSAFE,
|
|
.priv_data_size = sizeof(libaacEncodeCTX),
|
|
.init = libaac_encode_init,
|
|
FF_CODEC_ENCODE_CB(libaac_encode_frame),
|
|
.flush = libaac_encode_flush,
|
|
.close = libaac_encode_close,
|
|
CODEC_SAMPLEFMTS(AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16),
|
|
.p.priv_class = &aac_enc_class,
|
|
.defaults = defaults,
|
|
.p.profiles = libaac_profiles,
|
|
CODEC_SAMPLERATES_ARRAY(aac_sample_rates),
|
|
.p.wrapper_name = "libaac",
|
|
CODEC_CH_LAYOUTS_ARRAY(aac_ch_layouts),
|
|
};
|