FFmpeg/libavcodec/libaac_nextenc.c
2025-08-18 15:49:02 +07:00

305 lines
9.3 KiB
C
Executable file

/*
* Libaac-next encoder (libxaac based)
* Copyright (c) 2025 Wrapper
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
* @file
* Interface to libaac-next encoder.
*/
#include <libaac.h>
#include "libavutil/channel_layout.h"
#include "libavutil/internal.h"
#include "libavutil/intreadwrite.h"
#include "libavutil/log.h"
#include "libavutil/opt.h"
#include "libavutil/mem.h"
#include "avcodec.h"
#include "defs.h"
#include "audio_frame_queue.h"
#include "codec_internal.h"
#include "encode.h"
#include "libavutil/samplefmt.h"
#include "profiles.h"
typedef struct
{
const AVClass *class;
AACContext *encoder;
int delay_sent;
int flush_delay;
int eld_v2;
int esbr;
int frame_length;
int iq;
int tns;
AudioFrameQueue afq;
} libaacEncodeCTX;
static const AVOption aac_enc_options[] = {
{ "eld_v2", "Enable ELDv2 (LD-MPS extension for ELD stereo signals)", offsetof(libaacEncodeCTX, eld_v2), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM },
{ "esbr", "Enable the use of Enhanced SBR", offsetof(libaacEncodeCTX, esbr), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM },
{ "frame_length", "The desired frame length", offsetof(libaacEncodeCTX, frame_length), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1024, AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM },
{ "iq", "Inverse quantization", offsetof(libaacEncodeCTX, frame_length), AV_OPT_TYPE_INT, { .i64 = 2 }, 0, 2, AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM },
{ "tns", "Temporal Noise Shaping", offsetof(libaacEncodeCTX, tns), AV_OPT_TYPE_INT, { .i64 = 1 }, 0, 1, AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM },
FF_AAC_PROFILE_OPTS
{ NULL }
};
static const AVClass aac_enc_class = {
.class_name = "libaac",
.item_name = av_default_item_name,
.option = aac_enc_options,
.version = LIBAVUTIL_VERSION_INT,
};
static void aac_enc_error_handler(uint32_t errorCode, const char *section, const char *errorMsg, bool isFatal, void *handle) {
AVCodecContext *ctx = (AVCodecContext *)handle;
av_log(ctx, AV_LOG_ERROR, "%s: %s (0x%08X)\n", section, errorMsg, errorCode);
}
static av_cold int libaac_encode_init(AVCodecContext *avctx)
{
libaacEncodeCTX *s = avctx->priv_data;
AACSettings cfg = {0};
/* number of channels */
if (avctx->ch_layout.nb_channels < 1 || avctx->ch_layout.nb_channels > 6)
{
av_log(avctx, AV_LOG_ERROR, "encoding %d channel(s) is not allowed\n", avctx->ch_layout.nb_channels);
return AVERROR(EINVAL);
}
cfg.sampleRate = avctx->sample_rate;
cfg.noChannels = avctx->ch_layout.nb_channels;
cfg.bitsPerSamples = avctx->sample_fmt == AV_SAMPLE_FMT_FLT ? 32 : 16;
cfg.bitrate = avctx->bit_rate;
cfg.adts = !(avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER);
cfg.cutoff = avctx->cutoff;
switch (avctx->profile) {
case AV_PROFILE_AAC_LOW:
case AV_PROFILE_UNKNOWN:
cfg.profile = AAC_LC;
break;
case AV_PROFILE_AAC_HE:
cfg.profile = AAC_HE;
break;
case AV_PROFILE_AAC_HE_V2:
cfg.profile = AAC_HEV2;
break;
case AV_PROFILE_AAC_LD:
cfg.profile = AAC_LD;
break;
case AV_PROFILE_AAC_ELD:
cfg.profile = AAC_ELD;
break;
default:
av_log(avctx, AV_LOG_ERROR, "unsupported profile, supported profiles are LC, HE, HEv2, LD and ELD\n");
return AVERROR(EINVAL);
}
cfg.tns = s->tns;
cfg.frameSize = s->frame_length;
cfg.eSBR = s->esbr;
cfg.iq = s->iq;
cfg.errorHandleCtx = avctx;
cfg.errorHandler = aac_enc_error_handler;
s->encoder = aac_encode_open(cfg);
if (!s->encoder)
{
return AVERROR(EINVAL);
}
avctx->frame_size = s->encoder->no_samples / avctx->ch_layout.nb_channels;
avctx->initial_padding = s->encoder->inputDelay;
s->flush_delay = s->encoder->inputDelay;
av_log(avctx, AV_LOG_TRACE, "frame size: %d, initial delay: %d\n", avctx->frame_size, avctx->initial_padding);
ff_af_queue_init(avctx, &s->afq);
/* Set decoder specific info */
avctx->extradata_size = 0;
if (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER)
{
avctx->extradata = av_mallocz(s->encoder->ascSize + AV_INPUT_BUFFER_PADDING_SIZE);
if (!avctx->extradata)
{
return AVERROR(ENOMEM);
}
memcpy(avctx->extradata, s->encoder->asc, s->encoder->ascSize);
}
return 0;
}
static int libaac_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
const AVFrame *frame, int *got_packet)
{
libaacEncodeCTX *s = avctx->priv_data;
int ret;
int discard_padding;
if ((ret = ff_alloc_packet(avctx, pkt, s->encoder->max_out_bytes)) < 0)
return ret;
if (!frame)
{
av_log(avctx, AV_LOG_TRACE, "flush_delay: %d\n", s->flush_delay);
if (s->flush_delay <= 0)
return 0;
/* Flushing */
if ((ret = aac_encode(s->encoder, NULL, 0, pkt->data, (unsigned int *)&pkt->size)) < 0)
{
return AVERROR(EINVAL);
}
s->flush_delay -= avctx->frame_size;
}
else
{
/* Encoding */
if ((ret = ff_af_queue_add(&s->afq, frame)) < 0)
return ret;
int encodeSize = av_get_bytes_per_sample(avctx->sample_fmt) * avctx->ch_layout.nb_channels * frame->nb_samples;
av_log(avctx, AV_LOG_TRACE, "encode size: %d\n", encodeSize);
if ((ret = aac_encode(s->encoder, frame->data[0], encodeSize, pkt->data, (unsigned int *)&pkt->size)) < 0)
{
return AVERROR(EINVAL);
}
}
ff_af_queue_remove(&s->afq, avctx->frame_size, &pkt->pts, &pkt->duration);
/* discard padding copied from fdkaac encoder */
discard_padding = avctx->frame_size - pkt->duration;
// Check if subtraction resulted in an overflow
if ((discard_padding < avctx->frame_size) != (pkt->duration > 0))
{
av_log(avctx, AV_LOG_ERROR, "discard padding overflow\n");
return AVERROR(EINVAL);
}
if ((!s->delay_sent && avctx->initial_padding > 0) || discard_padding > 0)
{
uint8_t *side_data =
av_packet_new_side_data(pkt, AV_PKT_DATA_SKIP_SAMPLES, 10);
if (!side_data)
return AVERROR(ENOMEM);
if (!s->delay_sent)
{
AV_WL32(side_data, avctx->initial_padding);
s->delay_sent = 1;
}
AV_WL32(side_data + 4, discard_padding);
}
pkt->flags |= AV_PKT_FLAG_KEY;
*got_packet = 1;
return 0;
}
static void libaac_encode_flush(AVCodecContext *avctx)
{
libaacEncodeCTX *s = avctx->priv_data;
uint8_t sink_null[32768];
int64_t pts, duration;
uint32_t out_bytes;
av_log(avctx, AV_LOG_TRACE, "encoder flush\n");
ff_af_queue_remove(&s->afq, s->afq.frame_count, &pts, &duration);
aac_encode(s->encoder, NULL, 0, sink_null, &out_bytes);
}
static av_cold int libaac_encode_close(AVCodecContext *avctx)
{
libaacEncodeCTX *s = avctx->priv_data;
if (s->encoder)
aac_encode_close(s->encoder);
ff_af_queue_close(&s->afq);
return 0;
}
static const FFCodecDefault defaults[] = {
{"b", "128000"},
{NULL}};
static const AVProfile libaac_profiles[] = {
{ AV_PROFILE_AAC_LOW, "LC" },
{ AV_PROFILE_AAC_HE, "HE-AAC" },
{ AV_PROFILE_AAC_HE_V2, "HE-AACv2" },
{ AV_PROFILE_AAC_LD, "LD" },
{ AV_PROFILE_AAC_ELD, "ELD" },
{AV_PROFILE_UNKNOWN},
};
static const int aac_sample_rates[] = {
96000, 88200, 64000, 48000, 44100, 32000,
24000, 22050, 16000, 12000, 11025, 8000, 0
};
static const AVChannelLayout aac_ch_layouts[6] = {
AV_CHANNEL_LAYOUT_MONO,
AV_CHANNEL_LAYOUT_STEREO,
AV_CHANNEL_LAYOUT_SURROUND,
AV_CHANNEL_LAYOUT_4POINT0,
AV_CHANNEL_LAYOUT_5POINT0_BACK,
AV_CHANNEL_LAYOUT_5POINT1_BACK,
};
const FFCodec ff_libaac_next_encoder = {
.p.name = "libaac",
CODEC_LONG_NAME("custom libxaac-based AAC encoder"),
.p.type = AVMEDIA_TYPE_AUDIO,
.p.id = AV_CODEC_ID_AAC,
.p.capabilities = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_ENCODER_FLUSH,
.caps_internal = FF_CODEC_CAP_NOT_INIT_THREADSAFE,
.priv_data_size = sizeof(libaacEncodeCTX),
.init = libaac_encode_init,
FF_CODEC_ENCODE_CB(libaac_encode_frame),
.flush = libaac_encode_flush,
.close = libaac_encode_close,
CODEC_SAMPLEFMTS(AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16),
.p.priv_class = &aac_enc_class,
.defaults = defaults,
.p.profiles = libaac_profiles,
CODEC_SAMPLERATES_ARRAY(aac_sample_rates),
.p.wrapper_name = "libaac",
CODEC_CH_LAYOUTS_ARRAY(aac_ch_layouts),
};