1076 lines
36 KiB
C
1076 lines
36 KiB
C
/******************************************************************************
|
|
* *
|
|
* Copyright (C) 2023 The Android Open Source Project
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at:
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*
|
|
*****************************************************************************
|
|
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
|
*/
|
|
#include <string.h>
|
|
#include <math.h>
|
|
#include "iusace_type_def.h"
|
|
#include "iusace_cnst.h"
|
|
|
|
#include "iusace_fd_quant.h"
|
|
#include "iusace_bitbuffer.h"
|
|
#include "impd_drc_common_enc.h"
|
|
#include "impd_drc_uni_drc.h"
|
|
#include "impd_drc_api.h"
|
|
#include "impd_drc_uni_drc_eq.h"
|
|
#include "impd_drc_uni_drc_filter_bank.h"
|
|
#include "impd_drc_gain_enc.h"
|
|
#include "impd_drc_struct_def.h"
|
|
|
|
#include "ixheaace_memory_standards.h"
|
|
#include "iusace_tns_usac.h"
|
|
#include "iusace_psy_mod.h"
|
|
#include "iusace_config.h"
|
|
#include "iusace_signal_classifier.h"
|
|
#include "iusace_fft.h"
|
|
#include "iusace_block_switch_const.h"
|
|
#include "iusace_block_switch_struct_def.h"
|
|
#include "iusace_cnst.h"
|
|
#include "iusace_ms.h"
|
|
#include "ixheaace_adjust_threshold_data.h"
|
|
#include "iusace_fd_qc_util.h"
|
|
#include "ixheaace_sbr_header.h"
|
|
#include "ixheaace_config.h"
|
|
#include "ixheaace_asc_write.h"
|
|
#include "iusace_main.h"
|
|
|
|
static VOID iusace_calc_pds(FLOAT32 *ptr_input, WORD32 ccfl) {
|
|
WORD32 i;
|
|
FLOAT64 max_pow, delta;
|
|
FLOAT64 log_ccfl_base_10 = (ccfl == 1024) ? LOG_1024_BASE_10 : LOG_768_BASE_10;
|
|
|
|
max_pow = MAX(
|
|
10 * (log10(ptr_input[0] * ptr_input[0] + ptr_input[1] * ptr_input[1]) - log_ccfl_base_10) +
|
|
10e-15,
|
|
MIN_POW);
|
|
|
|
for (i = 1; i<ccfl>> 1; i++) {
|
|
/* removed the sqrt along with clubbing the for loops */
|
|
ptr_input[2 * i] = (FLOAT32)MAX(10 * (log10(ptr_input[2 * i] * ptr_input[2 * i] +
|
|
ptr_input[2 * i + 1] * ptr_input[2 * i + 1]) -
|
|
log_ccfl_base_10) +
|
|
10e-15,
|
|
MIN_POW);
|
|
|
|
max_pow = MAX(max_pow, ptr_input[2 * i]);
|
|
}
|
|
|
|
/* Normalized to reference sound pressure level 96 dB */
|
|
delta = 96 - max_pow;
|
|
|
|
for (i = 0; i<ccfl>> 1; i++) {
|
|
ptr_input[2 * i] = ptr_input[2 * i] + (FLOAT32)delta;
|
|
}
|
|
return;
|
|
}
|
|
|
|
static VOID iusace_find_tonal(FLOAT32 *ptr_input, WORD32 *ptr_tonal_flag, FLOAT32 *ptr_scratch,
|
|
WORD32 ccfl) {
|
|
WORD32 i, j;
|
|
WORD32 is_tonal;
|
|
FLOAT64 tonal_spl;
|
|
FLOAT64 absolute_threshold_xm;
|
|
|
|
for (i = 0; i<ccfl>> 1; i++) {
|
|
ptr_scratch[i] = ptr_input[2 * i];
|
|
}
|
|
|
|
if (ccfl == FRAME_LEN_LONG) {
|
|
for (i = 0; i <= 511; i++) {
|
|
ptr_tonal_flag[i] = 0;
|
|
}
|
|
|
|
for (i = 2; i < 500; i++) {
|
|
if (ptr_scratch[i] > ptr_scratch[i - 1] && ptr_scratch[i] >= ptr_scratch[i + 1]) {
|
|
is_tonal = 1;
|
|
|
|
/* Verify it meets the condition: ptr_scratch[i]-ptr_scratch[i+j]>=7 */
|
|
|
|
if (1 < i && i < 62) {
|
|
for (j = -2; j <= -2; j++) {
|
|
is_tonal = is_tonal && ptr_scratch[i] - ptr_scratch[i + j] >= 7;
|
|
if (is_tonal == 0) break;
|
|
}
|
|
if (is_tonal == 1) {
|
|
for (j = 2; j <= 2; j++) {
|
|
is_tonal = is_tonal && ptr_scratch[i] - ptr_scratch[i + j] >= 7;
|
|
if (is_tonal == 0) break;
|
|
}
|
|
}
|
|
|
|
if (is_tonal == 1) {
|
|
ptr_tonal_flag[i] = 1;
|
|
}
|
|
}
|
|
|
|
else if (62 <= i && i < 126) {
|
|
for (j = -3; j <= -2; j++) {
|
|
is_tonal = is_tonal && ptr_scratch[i] - ptr_scratch[i + j] >= 7;
|
|
if (is_tonal == 0) break;
|
|
}
|
|
if (is_tonal == 1) {
|
|
for (j = 2; j <= 3; j++) {
|
|
is_tonal = is_tonal && ptr_scratch[i] - ptr_scratch[i + j] >= 7;
|
|
if (is_tonal == 0) break;
|
|
}
|
|
}
|
|
|
|
if (is_tonal == 1) {
|
|
ptr_tonal_flag[i] = 1;
|
|
}
|
|
}
|
|
|
|
else if (126 <= i && i < 254) {
|
|
for (j = -6; j <= -2; j++) {
|
|
is_tonal = is_tonal && ptr_scratch[i] - ptr_scratch[i + j] >= 7;
|
|
if (is_tonal == 0) break;
|
|
}
|
|
if (is_tonal == 1) {
|
|
for (j = 2; j <= 6; j++) {
|
|
is_tonal = is_tonal && ptr_scratch[i] - ptr_scratch[i + j] >= 7;
|
|
if (is_tonal == 0) break;
|
|
}
|
|
}
|
|
|
|
if (is_tonal == 1) {
|
|
ptr_tonal_flag[i] = 1;
|
|
}
|
|
}
|
|
|
|
else if (254 <= i && i < 500) {
|
|
for (j = -12; j <= -2; j++) {
|
|
is_tonal = is_tonal && ptr_scratch[i] - ptr_scratch[i + j] >= 7;
|
|
if (is_tonal == 0) break;
|
|
}
|
|
if (is_tonal == 1) {
|
|
for (j = 2; j <= 12; j++) {
|
|
is_tonal = is_tonal && ptr_scratch[i] - ptr_scratch[i + j] >= 7;
|
|
if (is_tonal == 0) break;
|
|
}
|
|
}
|
|
|
|
if (is_tonal == 1) {
|
|
ptr_tonal_flag[i] = 1;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
for (i = 0; i <= 511; i++) {
|
|
if (ptr_tonal_flag[i] == 1) {
|
|
/* compute the SPL of tonal */
|
|
tonal_spl =
|
|
10 * log10(pow(10, (ptr_scratch[i - 1] / 10)) + pow(10, (ptr_scratch[i] / 10)) +
|
|
pow(10, (ptr_scratch[i + 1] / 10)));
|
|
|
|
if (i >= 324) {
|
|
absolute_threshold_xm = iusace_classify_arrays.absolute_threshold_1024[i] + 20;
|
|
} else {
|
|
absolute_threshold_xm = iusace_classify_arrays.absolute_threshold_1024[i];
|
|
}
|
|
if (tonal_spl < absolute_threshold_xm) {
|
|
ptr_tonal_flag[i] = 0;
|
|
}
|
|
}
|
|
}
|
|
} else // (ccfl == 768)
|
|
{
|
|
for (i = 0; i <= 383; i++) {
|
|
ptr_tonal_flag[i] = 0;
|
|
}
|
|
|
|
for (i = 2; i < 375; i++) {
|
|
if (ptr_scratch[i] > ptr_scratch[i - 1] && ptr_scratch[i] >= ptr_scratch[i + 1]) {
|
|
is_tonal = 1;
|
|
|
|
/* Verify it meets the condition: ptr_scratch[i]-ptr_scratch[i+j]>=7 */
|
|
|
|
if (1 < i && i < 47) {
|
|
for (j = -2; j <= -2; j++) {
|
|
is_tonal = is_tonal && ptr_scratch[i] - ptr_scratch[i + j] >= 7;
|
|
if (is_tonal == 0) break;
|
|
}
|
|
if (is_tonal == 1) {
|
|
for (j = 2; j <= 2; j++) {
|
|
is_tonal = is_tonal && ptr_scratch[i] - ptr_scratch[i + j] >= 7;
|
|
if (is_tonal == 0) break;
|
|
}
|
|
}
|
|
|
|
if (is_tonal == 1) {
|
|
ptr_tonal_flag[i] = 1;
|
|
}
|
|
}
|
|
|
|
else if (47 <= i && i < 95) {
|
|
for (j = -3; j <= -2; j++) {
|
|
is_tonal = is_tonal && ptr_scratch[i] - ptr_scratch[i + j] >= 7;
|
|
if (is_tonal == 0) break;
|
|
}
|
|
if (is_tonal == 1) {
|
|
for (j = 2; j <= 3; j++) {
|
|
is_tonal = is_tonal && ptr_scratch[i] - ptr_scratch[i + j] >= 7;
|
|
if (is_tonal == 0) break;
|
|
}
|
|
}
|
|
|
|
if (is_tonal == 1) {
|
|
ptr_tonal_flag[i] = 1;
|
|
}
|
|
}
|
|
|
|
else if (95 <= i && i < 194) {
|
|
for (j = -5; j <= -2; j++) {
|
|
is_tonal = is_tonal && ptr_scratch[i] - ptr_scratch[i + j] >= 7;
|
|
if (is_tonal == 0) break;
|
|
}
|
|
if (is_tonal == 1) {
|
|
for (j = 2; j <= 5; j++) {
|
|
is_tonal = is_tonal && ptr_scratch[i] - ptr_scratch[i + j] >= 7;
|
|
if (is_tonal == 0) break;
|
|
}
|
|
}
|
|
|
|
if (is_tonal == 1) {
|
|
ptr_tonal_flag[i] = 1;
|
|
}
|
|
}
|
|
|
|
else if (191 <= i && i < 375) {
|
|
for (j = -9; j <= -2; j++) {
|
|
is_tonal = is_tonal && ptr_scratch[i] - ptr_scratch[i + j] >= 7;
|
|
if (is_tonal == 0) break;
|
|
}
|
|
if (is_tonal == 1) {
|
|
for (j = 2; j <= 9; j++) {
|
|
is_tonal = is_tonal && ptr_scratch[i] - ptr_scratch[i + j] >= 7;
|
|
if (is_tonal == 0) break;
|
|
}
|
|
}
|
|
|
|
if (is_tonal == 1) {
|
|
ptr_tonal_flag[i] = 1;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
for (i = 0; i <= 383; i++) {
|
|
if (ptr_tonal_flag[i] == 1) {
|
|
/* compute the SPL of tonal */
|
|
tonal_spl =
|
|
10 * log10(pow(10, (ptr_scratch[i - 1] / 10)) + pow(10, (ptr_scratch[i] / 10)) +
|
|
pow(10, (ptr_scratch[i + 1] / 10)));
|
|
|
|
if (i >= 243) {
|
|
absolute_threshold_xm = iusace_classify_arrays.absolute_threshold_768[i] + 20;
|
|
} else {
|
|
absolute_threshold_xm = iusace_classify_arrays.absolute_threshold_768[i];
|
|
}
|
|
if (tonal_spl < absolute_threshold_xm) {
|
|
ptr_tonal_flag[i] = 0;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return;
|
|
}
|
|
|
|
static VOID iusace_tonal_analysis(ia_tonal_params_struct *pstr_ton_params,
|
|
iusace_scratch_mem *pstr_scratch, WORD32 ccfl) {
|
|
FLOAT32 *ptr_complex_fft = pstr_scratch->p_complex_fft;
|
|
WORD32 *ptr_tonal_flag = pstr_scratch->p_tonal_flag;
|
|
FLOAT32 *ptr_time_sig = pstr_ton_params->time_signal;
|
|
WORD32 framecnt_xm = pstr_ton_params->framecnt_xm;
|
|
WORD32 *ptr_n_tonal = pstr_ton_params->n_tonal;
|
|
WORD32 *ptr_n_tonal_low_frequency = pstr_ton_params->n_tonal_low_frequency;
|
|
FLOAT32 *ptr_n_tonal_low_frequency_ratio = pstr_ton_params->n_tonal_low_frequency_ratio;
|
|
FLOAT32 *ave_n_tonal = pstr_ton_params->ave_n_tonal;
|
|
FLOAT32 *ave_n_tonal_short = pstr_ton_params->ave_n_tonal_short;
|
|
WORD32 i;
|
|
WORD32 fft_size = ccfl;
|
|
|
|
WORD32 frame_length;
|
|
WORD32 n_tonal_total, n_tonal_low_frequency_total;
|
|
|
|
for (i = 0; i < ccfl; i++) {
|
|
ptr_complex_fft[2 * i] = (FLOAT32)(
|
|
ptr_time_sig[i] * ((ccfl == 1024) ? iusace_classify_arrays.hanning_window_1024[i]
|
|
: iusace_classify_arrays.hanning_window_768[i]));
|
|
ptr_complex_fft[2 * i + 1] = 0;
|
|
}
|
|
|
|
iusace_complex_fft(ptr_complex_fft, fft_size, pstr_scratch);
|
|
|
|
/* compute power density spectrum */
|
|
/* re_fft contains the resulting pds */
|
|
iusace_calc_pds(ptr_complex_fft, ccfl);
|
|
|
|
/* detect tonal */
|
|
iusace_find_tonal(ptr_complex_fft, ptr_tonal_flag, pstr_scratch->p_pow_spec, ccfl);
|
|
|
|
/* update n_tonal, n_tonal_low_frequency */
|
|
for (i = 0; i < 99; i++) {
|
|
ptr_n_tonal[i] = ptr_n_tonal[i + 1];
|
|
ptr_n_tonal_low_frequency[i] = ptr_n_tonal_low_frequency[i + 1];
|
|
}
|
|
ptr_n_tonal[99] = 0;
|
|
for (i = 0; i<ccfl>> 1; i++) {
|
|
ptr_n_tonal[99] += ptr_tonal_flag[i];
|
|
}
|
|
ptr_n_tonal_low_frequency[99] = 0;
|
|
for (i = 0; i < INDEXOFLOWFREQUENCY; i++) {
|
|
ptr_n_tonal_low_frequency[99] += ptr_tonal_flag[i];
|
|
}
|
|
|
|
/* compute long-term AVE and the ratio of distribution in low-frequency domain */
|
|
if (framecnt_xm < AVE_TONAL_LENGTH) {
|
|
frame_length = framecnt_xm;
|
|
} else {
|
|
frame_length = AVE_TONAL_LENGTH;
|
|
}
|
|
|
|
n_tonal_total = 0;
|
|
n_tonal_low_frequency_total = 0;
|
|
for (i = 0; i < frame_length; i++) {
|
|
n_tonal_total += ptr_n_tonal[99 - i];
|
|
n_tonal_low_frequency_total += ptr_n_tonal_low_frequency[99 - i];
|
|
}
|
|
|
|
*ave_n_tonal = (FLOAT32)n_tonal_total / frame_length;
|
|
|
|
if (n_tonal_total == 0) {
|
|
*ptr_n_tonal_low_frequency_ratio = 1;
|
|
} else {
|
|
*ptr_n_tonal_low_frequency_ratio = (FLOAT32)n_tonal_low_frequency_total / n_tonal_total;
|
|
}
|
|
|
|
/* compute the short-term AVE */
|
|
if (framecnt_xm < AVE_TONAL_LENGTH_SHORT) {
|
|
frame_length = framecnt_xm;
|
|
} else {
|
|
frame_length = AVE_TONAL_LENGTH_SHORT;
|
|
}
|
|
|
|
n_tonal_total = 0;
|
|
for (i = 0; i < frame_length; i++) {
|
|
n_tonal_total += ptr_n_tonal[99 - i];
|
|
}
|
|
|
|
*ave_n_tonal_short = (FLOAT32)n_tonal_total / frame_length;
|
|
return;
|
|
}
|
|
|
|
static VOID iusace_spectral_tilt_analysis(ia_spec_tilt_params_struct *ptr_spec_params,
|
|
WORD32 ccfl) {
|
|
FLOAT32 *ptr_time_signal = ptr_spec_params->time_signal;
|
|
WORD32 framecnt_xm = ptr_spec_params->framecnt_xm;
|
|
FLOAT32 *ptr_spec_tilt_buf = ptr_spec_params->spec_tilt_buf;
|
|
FLOAT32 *ptr_msd_spec_tilt = ptr_spec_params->msd_spec_tilt;
|
|
FLOAT32 *ptr_msd_spec_tilt_short = ptr_spec_params->msd_spec_tilt_short;
|
|
WORD32 i;
|
|
WORD32 frame_length;
|
|
|
|
FLOAT32 r0, r1;
|
|
FLOAT32 spec_tilt;
|
|
FLOAT32 ave_spec_tilt;
|
|
|
|
/* compute spectral tilt */
|
|
r0 = 0;
|
|
r1 = 0;
|
|
for (i = 0; i < ccfl - 1; i++) {
|
|
r0 += ptr_time_signal[i] * ptr_time_signal[i];
|
|
r1 += ptr_time_signal[i] * ptr_time_signal[i + 1];
|
|
}
|
|
r0 += ptr_time_signal[i] * ptr_time_signal[i];
|
|
|
|
if (r0 == 0) {
|
|
spec_tilt = 1.0f;
|
|
} else {
|
|
spec_tilt = r1 / r0;
|
|
}
|
|
|
|
/* update spec_tilt_buf */
|
|
for (i = 0; i < 100 - 1; i++) {
|
|
ptr_spec_tilt_buf[i] = ptr_spec_tilt_buf[i + 1];
|
|
}
|
|
ptr_spec_tilt_buf[99] = spec_tilt;
|
|
|
|
/* compute the long-term mean square deviation of the spectral tilt */
|
|
if (framecnt_xm < SPECTRAL_TILT_LENGTH) {
|
|
frame_length = framecnt_xm;
|
|
} else {
|
|
frame_length = SPECTRAL_TILT_LENGTH;
|
|
}
|
|
|
|
ave_spec_tilt = 0;
|
|
for (i = 0; i < frame_length; i++) {
|
|
ave_spec_tilt += ptr_spec_tilt_buf[99 - i];
|
|
}
|
|
ave_spec_tilt /= frame_length;
|
|
|
|
*ptr_msd_spec_tilt = 0;
|
|
for (i = 0; i < frame_length; i++) {
|
|
*ptr_msd_spec_tilt +=
|
|
(ptr_spec_tilt_buf[99 - i] - ave_spec_tilt) * (ptr_spec_tilt_buf[99 - i] - ave_spec_tilt);
|
|
}
|
|
*ptr_msd_spec_tilt /= frame_length;
|
|
|
|
/* compute the short-term mean square deviation of the spectral tilt */
|
|
if (framecnt_xm < SPECTRAL_TILT_LENGTH_SHORT) {
|
|
frame_length = framecnt_xm;
|
|
} else {
|
|
frame_length = SPECTRAL_TILT_LENGTH_SHORT;
|
|
}
|
|
|
|
ave_spec_tilt = 0;
|
|
for (i = 0; i < frame_length; i++) {
|
|
ave_spec_tilt += ptr_spec_tilt_buf[99 - i];
|
|
}
|
|
ave_spec_tilt /= frame_length;
|
|
|
|
*ptr_msd_spec_tilt_short = 0;
|
|
for (i = 0; i < frame_length; i++) {
|
|
*ptr_msd_spec_tilt_short +=
|
|
(ptr_spec_tilt_buf[99 - i] - ave_spec_tilt) * (ptr_spec_tilt_buf[99 - i] - ave_spec_tilt);
|
|
}
|
|
*ptr_msd_spec_tilt_short /= frame_length;
|
|
|
|
/* compute the energy of current frame */
|
|
if (r0 <= 1) {
|
|
ptr_spec_params->frame_energy = 0;
|
|
} else {
|
|
ptr_spec_params->frame_energy = (FLOAT32)(10 * log(r0) / log(10));
|
|
}
|
|
return;
|
|
}
|
|
|
|
static WORD32 iusace_init_mode_decision(ia_mode_params_struct *pstr_mode_params) {
|
|
WORD32 i;
|
|
WORD32 framecnt = pstr_mode_params->framecnt;
|
|
WORD32 *framecnt_xm = pstr_mode_params->framecnt_xm;
|
|
WORD32 *flag_border = pstr_mode_params->flag_border;
|
|
FLOAT32 ave_n_tonal_short = pstr_mode_params->ave_n_tonal_short;
|
|
FLOAT32 ave_n_tonal = pstr_mode_params->ave_n_tonal;
|
|
FLOAT32 *ave_n_tonal_short_buf = pstr_mode_params->ave_n_tonal_short_buf;
|
|
FLOAT32 *ave_n_tonal_buf = pstr_mode_params->ave_n_tonal_buf;
|
|
FLOAT32 msd_spec_tilt = pstr_mode_params->msd_spec_tilt;
|
|
FLOAT32 msd_spec_tilt_short = pstr_mode_params->msd_spec_tilt_short;
|
|
FLOAT32 *msd_spec_tilt_buf = pstr_mode_params->msd_spec_tilt_buf;
|
|
FLOAT32 *msd_spec_tilt_short_buf = pstr_mode_params->msd_spec_tilt_short_buf;
|
|
FLOAT32 n_tonal_low_frequency_ratio = pstr_mode_params->n_tonal_low_frequency_ratio;
|
|
FLOAT32 frame_energy = pstr_mode_params->frame_energy;
|
|
WORD32 init_mode_decision_result = TBD;
|
|
WORD32 count_msd_st_monchhichi = 0;
|
|
WORD32 count_msd_st_speech_music = 0, count_msd_st_music_speech = 0;
|
|
WORD32 flag_ave_music_speech = 0;
|
|
WORD32 count_msd_st_music = 0;
|
|
WORD32 border_state = 0;
|
|
WORD32 count_quiet_mode = 0;
|
|
|
|
*flag_border = NO_BORDER;
|
|
|
|
/* border decision according to spectral tilt */
|
|
|
|
/* update msd_spec_tilt_buf, msd_spec_tilt_short_buf */
|
|
for (i = 0; i < 5 - 1; i++) {
|
|
msd_spec_tilt_buf[i] = msd_spec_tilt_buf[i + 1];
|
|
msd_spec_tilt_short_buf[i] = msd_spec_tilt_short_buf[i + 1];
|
|
}
|
|
msd_spec_tilt_buf[4] = msd_spec_tilt;
|
|
msd_spec_tilt_short_buf[4] = msd_spec_tilt_short;
|
|
|
|
/* speech->music find strict border of speech->music */
|
|
if ((msd_spec_tilt >= 0.014) && (msd_spec_tilt_short <= 0.000005)) {
|
|
count_msd_st_monchhichi++;
|
|
} else {
|
|
count_msd_st_monchhichi = 0;
|
|
}
|
|
if (((*flag_border != BORDER_SPEECH_MUSIC_DEFINITE) &&
|
|
(*flag_border != BORDER_MUSIC_SPEECH_DEFINITE)) &&
|
|
(border_state != BORDER_SPEECH_MUSIC_DEFINITE) && (count_msd_st_monchhichi >= 15) &&
|
|
(*framecnt_xm >= 300)) {
|
|
*framecnt_xm = 10;
|
|
*flag_border = BORDER_SPEECH_MUSIC;
|
|
}
|
|
|
|
/* find the relative loose border of speech->music */
|
|
if ((msd_spec_tilt >= 0.0025) && (msd_spec_tilt_short <= 0.000003)) {
|
|
count_msd_st_speech_music++;
|
|
} else {
|
|
count_msd_st_speech_music = 0;
|
|
}
|
|
if (((*flag_border != BORDER_SPEECH_MUSIC_DEFINITE) &&
|
|
(*flag_border != BORDER_MUSIC_SPEECH_DEFINITE)) &&
|
|
(border_state != BORDER_SPEECH_MUSIC_DEFINITE) && (count_msd_st_speech_music >= 15) &&
|
|
(*framecnt_xm >= 300)) {
|
|
*framecnt_xm = 10;
|
|
*flag_border = BORDER_SPEECH_MUSIC;
|
|
}
|
|
|
|
/* music->speech */
|
|
if ((msd_spec_tilt_buf[0] <= 0.0003) && (msd_spec_tilt_short_buf[0] <= 0.0002)) {
|
|
count_msd_st_music_speech++;
|
|
}
|
|
if (((*flag_border != BORDER_SPEECH_MUSIC_DEFINITE) &&
|
|
(*flag_border != BORDER_MUSIC_SPEECH_DEFINITE)) &&
|
|
(border_state != BORDER_MUSIC_SPEECH_DEFINITE) && (count_msd_st_music_speech >= 100) &&
|
|
(msd_spec_tilt >= 0.0008) && (msd_spec_tilt_short >= 0.0025) && (*framecnt_xm >= 20)) {
|
|
*framecnt_xm = 10;
|
|
*flag_border = BORDER_MUSIC_SPEECH;
|
|
}
|
|
|
|
/* border decision according to tonal
|
|
* update ave_n_tonal_short_buf, ave_n_tonal_buf */
|
|
for (i = 0; i < 5 - 1; i++) {
|
|
ave_n_tonal_short_buf[i] = ave_n_tonal_short_buf[i + 1];
|
|
ave_n_tonal_buf[i] = ave_n_tonal_buf[i + 1];
|
|
}
|
|
ave_n_tonal_short_buf[4] = ave_n_tonal_short;
|
|
ave_n_tonal_buf[4] = ave_n_tonal;
|
|
|
|
/* music->speech */
|
|
if ((ave_n_tonal_buf[0] >= 12) && (ave_n_tonal_buf[0] < 15) &&
|
|
(ave_n_tonal_buf[0] - ave_n_tonal_short_buf[0] >= 5) && (*framecnt_xm >= 20) &&
|
|
(ave_n_tonal_short - ave_n_tonal_short_buf[0] < 5)) {
|
|
*framecnt_xm = 10;
|
|
flag_ave_music_speech = 1;
|
|
*flag_border = BORDER_MUSIC_SPEECH_DEFINITE;
|
|
}
|
|
|
|
/* update border decision according to energy */
|
|
if (frame_energy <= 60) {
|
|
count_quiet_mode = 0;
|
|
} else {
|
|
count_quiet_mode++;
|
|
}
|
|
|
|
if ((*flag_border == BORDER_MUSIC_SPEECH) && (count_quiet_mode <= 5)) {
|
|
*flag_border = BORDER_MUSIC_SPEECH_DEFINITE;
|
|
*framecnt_xm = 10;
|
|
}
|
|
|
|
/* MUSIC_DEFINITE and SPEECH_DEFINITE mode decision according to short-term characters */
|
|
|
|
/* ave_n_tonal_short */
|
|
if ((init_mode_decision_result == TBD) && (ave_n_tonal_short >= 19)) {
|
|
init_mode_decision_result = MUSIC_DEFINITE;
|
|
}
|
|
if ((init_mode_decision_result == TBD) && (ave_n_tonal_short <= 1.5)) {
|
|
init_mode_decision_result = SPEECH_DEFINITE;
|
|
}
|
|
|
|
/* msd_spec_tilt_short */
|
|
if (msd_spec_tilt_short >= 0.02) {
|
|
init_mode_decision_result = SPEECH_DEFINITE;
|
|
}
|
|
if ((init_mode_decision_result == TBD) && (msd_spec_tilt_short <= 0.00000025) &&
|
|
(framecnt >= 10)) {
|
|
init_mode_decision_result = MUSIC_DEFINITE;
|
|
}
|
|
|
|
/* SPEECH mode decision */
|
|
|
|
/* flag_ave_music_speech??ave_n_tonal_short */
|
|
if ((init_mode_decision_result == TBD) && (flag_ave_music_speech == 1)) {
|
|
if ((ave_n_tonal_short <= 12) && (*framecnt_xm <= 150)) {
|
|
init_mode_decision_result = SPEECH;
|
|
}
|
|
}
|
|
|
|
/* MUSIC_DEFINITE and SPEECH_DEFINITE mode decision */
|
|
|
|
/* ave_n_tonal */
|
|
if ((init_mode_decision_result == TBD) && (ave_n_tonal <= 3)) {
|
|
init_mode_decision_result = SPEECH_DEFINITE;
|
|
}
|
|
if ((init_mode_decision_result == TBD) && (ave_n_tonal >= 15)) {
|
|
init_mode_decision_result = MUSIC_DEFINITE;
|
|
}
|
|
|
|
/** ave_n_tonal_short
|
|
*/
|
|
if ((init_mode_decision_result == TBD) && (ave_n_tonal_short >= 17)) {
|
|
init_mode_decision_result = MUSIC_DEFINITE;
|
|
}
|
|
|
|
/** msd_spec_tilt
|
|
*/
|
|
if ((init_mode_decision_result == TBD) && (msd_spec_tilt >= 0.01)) {
|
|
init_mode_decision_result = SPEECH_DEFINITE;
|
|
}
|
|
if ((init_mode_decision_result == TBD) && (framecnt >= 10) && (msd_spec_tilt <= 0.00004)) {
|
|
init_mode_decision_result = MUSIC_DEFINITE;
|
|
}
|
|
|
|
/** n_tonal_low_frequency_ratio
|
|
*/
|
|
if ((init_mode_decision_result == TBD) && (n_tonal_low_frequency_ratio <= 0.91)) {
|
|
init_mode_decision_result = MUSIC_DEFINITE;
|
|
}
|
|
|
|
/** MUSIC and SPEECH mode decision
|
|
*/
|
|
|
|
/** msd_spec_tilt
|
|
*/
|
|
if ((init_mode_decision_result == TBD) && (msd_spec_tilt <= 0.0002) && (*framecnt_xm >= 15)) {
|
|
init_mode_decision_result = MUSIC;
|
|
}
|
|
|
|
/** n_tonal_low_frequency_ratio
|
|
*/
|
|
if ((init_mode_decision_result == TBD) && (n_tonal_low_frequency_ratio >= 0.95)) {
|
|
init_mode_decision_result = SPEECH;
|
|
}
|
|
if ((init_mode_decision_result == TBD) && (n_tonal_low_frequency_ratio <= 0.935)) {
|
|
init_mode_decision_result = MUSIC;
|
|
}
|
|
|
|
/** the rest of the frame to SPEECH
|
|
*/
|
|
if (init_mode_decision_result == TBD) {
|
|
init_mode_decision_result = SPEECH;
|
|
}
|
|
|
|
/** MUSIC mode decision according to changes of the MSD of the spectral tilt
|
|
*/
|
|
|
|
/** compute the changes of the MSD of the spectral tilt
|
|
*/
|
|
if ((msd_spec_tilt <= 0.007) && (init_mode_decision_result != SPEECH_DEFINITE)) {
|
|
if (init_mode_decision_result != SPEECH) {
|
|
count_msd_st_music++;
|
|
}
|
|
} else {
|
|
count_msd_st_music = 0;
|
|
}
|
|
|
|
if ((init_mode_decision_result != SPEECH_DEFINITE) && (count_msd_st_music >= 400) &&
|
|
(border_state != BORDER_MUSIC_SPEECH_DEFINITE)) {
|
|
init_mode_decision_result = MUSIC;
|
|
}
|
|
|
|
/** update border flag
|
|
*/
|
|
|
|
if (*flag_border != NO_BORDER) {
|
|
border_state = *flag_border;
|
|
}
|
|
|
|
/** update BORDER_SPEECH_MUSIC_DEFINITE
|
|
*/
|
|
if (((border_state == BORDER_MUSIC_SPEECH) || (border_state == BORDER_MUSIC_SPEECH_DEFINITE)) &&
|
|
(init_mode_decision_result == MUSIC_DEFINITE) && (*framecnt_xm >= 20)) {
|
|
*flag_border = BORDER_SPEECH_MUSIC_DEFINITE;
|
|
*framecnt_xm = 10;
|
|
border_state = *flag_border;
|
|
}
|
|
|
|
/** update BORDER_MUSIC_SPEECH_DEFINITE
|
|
*/
|
|
if (((border_state == BORDER_SPEECH_MUSIC) || (border_state == BORDER_SPEECH_MUSIC_DEFINITE)) &&
|
|
(init_mode_decision_result == SPEECH_DEFINITE) && (*framecnt_xm >= 20)) {
|
|
*flag_border = BORDER_MUSIC_SPEECH_DEFINITE;
|
|
*framecnt_xm = 10;
|
|
}
|
|
|
|
return init_mode_decision_result;
|
|
}
|
|
|
|
static WORD32 iusace_smoothing_mode_decision(ia_smooth_params_struct *pstr_smooth_param) {
|
|
WORD32 *ptr_init_result_ahead = pstr_smooth_param->init_result_ahead;
|
|
WORD32 flag_border = pstr_smooth_param->flag_border;
|
|
WORD32 *ptr_flag_border_buf_behind = pstr_smooth_param->flag_border_buf_behind;
|
|
WORD32 *ptr_flag_border_buf_ahead = pstr_smooth_param->flag_border_buf_ahead;
|
|
FLOAT32 frame_energy = pstr_smooth_param->frame_energy;
|
|
FLOAT32 *ptr_frame_energy_buf_behind = pstr_smooth_param->frame_energy_buf_behind;
|
|
FLOAT32 *ptr_frame_energy_buf_ahead = pstr_smooth_param->frame_energy_buf_ahead;
|
|
WORD32 *ptr_smoothing_result_buf = pstr_smooth_param->smoothing_result_buf;
|
|
WORD32 *ptr_init_result_behind = pstr_smooth_param->init_result_behind;
|
|
WORD32 init_mode_decision_result = pstr_smooth_param->init_mode_decision_result;
|
|
WORD32 i;
|
|
|
|
WORD32 mode_decision_result;
|
|
|
|
WORD32 num_music, num_speech;
|
|
|
|
/** update data array
|
|
*/
|
|
|
|
/** update init_result_behind, init_result_ahead
|
|
*/
|
|
for (i = 0; i < 99; i++) {
|
|
ptr_init_result_behind[i] = ptr_init_result_behind[i + 1];
|
|
}
|
|
ptr_init_result_behind[99] = ptr_init_result_ahead[0];
|
|
|
|
ptr_init_result_ahead[NFRAMEAHEAD - 1] = init_mode_decision_result;
|
|
|
|
/** update flag_border_buf_behind, flag_border_buf_ahead
|
|
* update frame_energy_buf_behind, frame_energy_buf_ahead
|
|
*/
|
|
|
|
for (i = 0; i < 9; i++) {
|
|
ptr_flag_border_buf_behind[i] = ptr_flag_border_buf_behind[i + 1];
|
|
ptr_frame_energy_buf_behind[i] = ptr_frame_energy_buf_behind[i + 1];
|
|
}
|
|
ptr_flag_border_buf_behind[9] = ptr_flag_border_buf_ahead[0];
|
|
ptr_frame_energy_buf_behind[9] = ptr_frame_energy_buf_ahead[0];
|
|
|
|
ptr_flag_border_buf_ahead[NFRAMEAHEAD - 1] = flag_border;
|
|
|
|
ptr_frame_energy_buf_ahead[NFRAMEAHEAD - 1] = frame_energy;
|
|
|
|
/** smoothing according to past results
|
|
*/
|
|
|
|
mode_decision_result = ptr_init_result_behind[99];
|
|
|
|
/** update smoothing_result_buf
|
|
*/
|
|
if (ptr_flag_border_buf_behind[9] == NO_BORDER) {
|
|
for (i = 0; i < 99; i++) {
|
|
ptr_smoothing_result_buf[i] = ptr_smoothing_result_buf[i + 1];
|
|
}
|
|
pstr_smooth_param->num_smoothing++;
|
|
} else {
|
|
for (i = 0; i < 99; i++) {
|
|
ptr_smoothing_result_buf[i] = TBD;
|
|
}
|
|
pstr_smooth_param->num_smoothing = 1;
|
|
}
|
|
ptr_smoothing_result_buf[99] = ptr_init_result_behind[99];
|
|
|
|
if (pstr_smooth_param->num_smoothing >= SMOOTHING_LENGTH) {
|
|
num_music = 0;
|
|
num_speech = 0;
|
|
|
|
/** smoothed result count
|
|
*/
|
|
for (i = 0; i < SMOOTHING_LENGTH; i++) {
|
|
if ((ptr_smoothing_result_buf[100 - i] == SPEECH) ||
|
|
(ptr_smoothing_result_buf[100 - i] == SPEECH_DEFINITE)) {
|
|
num_speech++;
|
|
} else {
|
|
num_music++;
|
|
}
|
|
}
|
|
|
|
/** smoothing
|
|
*/
|
|
if ((num_speech > num_music) && (init_mode_decision_result != MUSIC_DEFINITE)) {
|
|
mode_decision_result = SPEECH;
|
|
}
|
|
if ((num_music > num_speech) && (init_mode_decision_result != SPEECH_DEFINITE)) {
|
|
mode_decision_result = MUSIC;
|
|
}
|
|
}
|
|
|
|
/** correct according to energies and ahead mode decision results
|
|
*/
|
|
|
|
if ((mode_decision_result == MUSIC) && (ptr_frame_energy_buf_behind[9] <= 60)) {
|
|
for (i = 0; i < NFRAMEAHEAD; i++) {
|
|
if ((ptr_init_result_ahead[i] == SPEECH_DEFINITE) || (ptr_init_result_ahead[i] == SPEECH)) {
|
|
pstr_smooth_param->flag_speech_definite = 1;
|
|
}
|
|
}
|
|
}
|
|
if ((pstr_smooth_param->flag_speech_definite == 1) && (mode_decision_result == MUSIC)) {
|
|
mode_decision_result = SPEECH;
|
|
} else {
|
|
pstr_smooth_param->flag_speech_definite = 0;
|
|
}
|
|
|
|
/** correct MUSIC mode
|
|
*/
|
|
|
|
if (ptr_frame_energy_buf_behind[9] <= 65) {
|
|
pstr_smooth_param->count_small_energy = 0;
|
|
} else {
|
|
pstr_smooth_param->count_small_energy++;
|
|
}
|
|
if (((ptr_flag_border_buf_ahead[NFRAMEAHEAD - 1] == BORDER_SPEECH_MUSIC) ||
|
|
(ptr_flag_border_buf_ahead[NFRAMEAHEAD - 1] == BORDER_SPEECH_MUSIC_DEFINITE)) &&
|
|
(pstr_smooth_param->count_small_energy <= 30)) {
|
|
pstr_smooth_param->flag_music_definite = 1;
|
|
}
|
|
if ((pstr_smooth_param->flag_music_definite == 1) &&
|
|
((mode_decision_result == SPEECH) || (mode_decision_result == SPEECH_DEFINITE))) {
|
|
mode_decision_result = MUSIC;
|
|
} else {
|
|
pstr_smooth_param->flag_music_definite = 0;
|
|
}
|
|
|
|
return mode_decision_result;
|
|
}
|
|
|
|
static WORD32 iusace_classification_ccfl(ia_classification_struct *pstr_sig_class,
|
|
FLOAT32 *ptr_time_signal,
|
|
iusace_scratch_mem *pstr_scratch, WORD32 ccfl) {
|
|
WORD32 i;
|
|
ia_tonal_params_struct pstr_ton_params;
|
|
ia_smooth_params_struct smooth_param;
|
|
ia_mode_params_struct pstr_mode_params;
|
|
ia_spec_tilt_params_struct ptr_spec_params;
|
|
|
|
ia_classification_buf_struct *pstr_buffers = &(pstr_sig_class->buffers);
|
|
pFLOAT32 spec_tilt_buf = pstr_sig_class->spec_tilt_buf;
|
|
pWORD32 n_tonal = pstr_sig_class->n_tonal;
|
|
pWORD32 n_tonal_low_frequency = pstr_sig_class->n_tonal_low_frequency;
|
|
pWORD32 framecnt_xm = &(pstr_sig_class->framecnt_xm);
|
|
pWORD32 framecnt = &(pstr_sig_class->framecnt);
|
|
pFLOAT32 ave_n_tonal_short_buf = pstr_sig_class->ave_n_tonal_short_buf;
|
|
pFLOAT32 ave_n_tonal_buf = pstr_sig_class->ave_n_tonal_buf;
|
|
pFLOAT32 msd_spec_tilt_buf = pstr_sig_class->msd_spec_tilt_buf;
|
|
pFLOAT32 msd_spec_tilt_short_buf = pstr_sig_class->msd_spec_tilt_short_buf;
|
|
|
|
FLOAT32 n_tonal_low_frequency_ratio; /* the ratio of distribution of the numbers */
|
|
/* of tonal in the low frequency domain */
|
|
FLOAT32 ave_n_tonal, ave_n_tonal_short; /**< the number of tonal */
|
|
FLOAT32 msd_spec_tilt; /* the long-term MSD of spectral tilt */
|
|
FLOAT32 msd_spec_tilt_short; /* the short-term MSD of spectral tilt */
|
|
|
|
WORD32 init_mode_decision_result; /* the initial mode decision */
|
|
WORD32 flag_border = NO_BORDER; /* flag of current border */
|
|
|
|
WORD32 mode_decision_result; /* final mode decision result */
|
|
|
|
if (pstr_sig_class->init_flag == 0) {
|
|
/* initialize */
|
|
pstr_sig_class->init_flag = 1;
|
|
|
|
for (i = 0; i < 5; i++) {
|
|
n_tonal[i] = 0;
|
|
n_tonal_low_frequency[i] = 0;
|
|
spec_tilt_buf[i] = 0;
|
|
pstr_buffers->init_result_behind[i] = TBD;
|
|
pstr_buffers->smoothing_result_buf[i] = TBD;
|
|
|
|
ave_n_tonal_short_buf[i] = 0;
|
|
ave_n_tonal_buf[i] = 0;
|
|
msd_spec_tilt_buf[i] = 0;
|
|
msd_spec_tilt_short_buf[i] = 0;
|
|
|
|
pstr_buffers->frame_energy_buf_behind[i] = 0;
|
|
pstr_buffers->flag_border_buf_behind[i] = NO_BORDER;
|
|
}
|
|
for (; i < 10; i++) {
|
|
n_tonal[i] = 0;
|
|
n_tonal_low_frequency[i] = 0;
|
|
spec_tilt_buf[i] = 0;
|
|
pstr_buffers->init_result_behind[i] = TBD;
|
|
pstr_buffers->smoothing_result_buf[i] = TBD;
|
|
|
|
pstr_buffers->frame_energy_buf_behind[i] = 0;
|
|
pstr_buffers->flag_border_buf_behind[i] = NO_BORDER;
|
|
}
|
|
|
|
for (; i < 100; i++) {
|
|
n_tonal[i] = 0;
|
|
n_tonal_low_frequency[i] = 0;
|
|
spec_tilt_buf[i] = 0;
|
|
pstr_buffers->init_result_behind[i] = TBD;
|
|
pstr_buffers->smoothing_result_buf[i] = TBD;
|
|
}
|
|
for (i = 0; i < NFRAMEAHEAD; i++) {
|
|
pstr_buffers->frame_energy_buf_ahead[i] = 0;
|
|
pstr_buffers->flag_border_buf_ahead[i] = NO_BORDER;
|
|
pstr_buffers->init_result_ahead[i] = TBD;
|
|
}
|
|
}
|
|
|
|
*framecnt += 1;
|
|
*framecnt_xm += 1;
|
|
|
|
pstr_ton_params.time_signal = (FLOAT32 *)ptr_time_signal;
|
|
pstr_ton_params.framecnt_xm = *framecnt_xm;
|
|
pstr_ton_params.n_tonal = n_tonal;
|
|
pstr_ton_params.n_tonal_low_frequency = n_tonal_low_frequency;
|
|
pstr_ton_params.n_tonal_low_frequency_ratio = &n_tonal_low_frequency_ratio;
|
|
pstr_ton_params.ave_n_tonal = &ave_n_tonal;
|
|
pstr_ton_params.ave_n_tonal_short = &ave_n_tonal_short;
|
|
/** analysis tonal
|
|
*/
|
|
iusace_tonal_analysis(&pstr_ton_params, pstr_scratch, ccfl);
|
|
|
|
ptr_spec_params.time_signal = ptr_time_signal;
|
|
ptr_spec_params.framecnt_xm = *framecnt_xm;
|
|
ptr_spec_params.spec_tilt_buf = spec_tilt_buf;
|
|
ptr_spec_params.msd_spec_tilt = &msd_spec_tilt;
|
|
ptr_spec_params.msd_spec_tilt_short = &msd_spec_tilt_short;
|
|
/** analysis spectral tilt
|
|
*/
|
|
iusace_spectral_tilt_analysis(&ptr_spec_params, ccfl);
|
|
|
|
pstr_mode_params.framecnt = *framecnt;
|
|
pstr_mode_params.framecnt_xm = framecnt_xm;
|
|
pstr_mode_params.flag_border = &flag_border;
|
|
pstr_mode_params.ave_n_tonal_short = ave_n_tonal_short;
|
|
pstr_mode_params.ave_n_tonal = ave_n_tonal;
|
|
pstr_mode_params.ave_n_tonal_short_buf = ave_n_tonal_short_buf;
|
|
pstr_mode_params.ave_n_tonal_buf = ave_n_tonal_buf;
|
|
pstr_mode_params.msd_spec_tilt = msd_spec_tilt;
|
|
pstr_mode_params.msd_spec_tilt_short = msd_spec_tilt_short;
|
|
pstr_mode_params.msd_spec_tilt_buf = msd_spec_tilt_buf;
|
|
pstr_mode_params.msd_spec_tilt_short_buf = msd_spec_tilt_short_buf;
|
|
pstr_mode_params.n_tonal_low_frequency_ratio = n_tonal_low_frequency_ratio;
|
|
pstr_mode_params.frame_energy = ptr_spec_params.frame_energy;
|
|
/** initial mode decision and boundary decisions
|
|
*/
|
|
init_mode_decision_result = iusace_init_mode_decision(&pstr_mode_params);
|
|
|
|
smooth_param.flag_border_buf_behind = pstr_buffers->flag_border_buf_behind;
|
|
smooth_param.flag_border_buf_ahead = pstr_buffers->flag_border_buf_ahead;
|
|
smooth_param.frame_energy = ptr_spec_params.frame_energy;
|
|
smooth_param.frame_energy_buf_behind = pstr_buffers->frame_energy_buf_behind;
|
|
smooth_param.frame_energy_buf_ahead = pstr_buffers->frame_energy_buf_ahead;
|
|
smooth_param.smoothing_result_buf = pstr_buffers->smoothing_result_buf;
|
|
smooth_param.init_result_ahead = pstr_buffers->init_result_ahead;
|
|
smooth_param.flag_border = flag_border;
|
|
smooth_param.init_result_behind = pstr_buffers->init_result_behind;
|
|
smooth_param.init_mode_decision_result = init_mode_decision_result;
|
|
smooth_param.flag_speech_definite = 0;
|
|
smooth_param.count_small_energy = 0;
|
|
smooth_param.flag_music_definite = 0;
|
|
smooth_param.num_smoothing = 0;
|
|
/* smoothing */
|
|
mode_decision_result = iusace_smoothing_mode_decision(&smooth_param);
|
|
|
|
return mode_decision_result;
|
|
}
|
|
|
|
VOID iusace_classification(ia_classification_struct *pstr_sig_class,
|
|
iusace_scratch_mem *pstr_scratch, WORD32 ccfl) {
|
|
WORD32 n_frames, n_class, avg_cls, nf;
|
|
WORD32 i;
|
|
FLOAT32 *ptr_time_signal = pstr_scratch->p_time_signal;
|
|
WORD32 mode_decision_result;
|
|
|
|
n_frames = pstr_sig_class->n_buffer_samples / ccfl;
|
|
|
|
for (nf = 0; nf < n_frames; nf++) {
|
|
for (i = 0; i < ccfl; i++) {
|
|
ptr_time_signal[i] = pstr_sig_class->input_samples[ccfl * nf + i];
|
|
}
|
|
|
|
/* classification of ccfl-frame */
|
|
mode_decision_result =
|
|
iusace_classification_ccfl(pstr_sig_class, ptr_time_signal, pstr_scratch, ccfl);
|
|
|
|
/* coding mode decision of 1024-frame */
|
|
if ((mode_decision_result == MUSIC) || (mode_decision_result == MUSIC_DEFINITE)) {
|
|
pstr_sig_class->coding_mode = FD_MODE;
|
|
} else if ((mode_decision_result == SPEECH) || (mode_decision_result == SPEECH_DEFINITE)) {
|
|
pstr_sig_class->coding_mode = TD_MODE;
|
|
}
|
|
|
|
pstr_sig_class->class_buf[pstr_sig_class->n_buf_class + nf] = pstr_sig_class->coding_mode;
|
|
pstr_sig_class->pre_mode = pstr_sig_class->coding_mode;
|
|
}
|
|
|
|
/* merge ccfl-frame results */
|
|
pstr_sig_class->n_buf_class += n_frames;
|
|
n_class = (pstr_sig_class->n_class_frames > pstr_sig_class->n_buf_class)
|
|
? pstr_sig_class->n_buf_class
|
|
: pstr_sig_class->n_class_frames;
|
|
{
|
|
WORD32 min_cls, max_cls;
|
|
|
|
min_cls = max_cls = pstr_sig_class->class_buf[0];
|
|
for (i = 1; i < n_class; i++) {
|
|
if (pstr_sig_class->class_buf[i] > max_cls) {
|
|
max_cls = pstr_sig_class->class_buf[i];
|
|
} else if (pstr_sig_class->class_buf[i] < min_cls) {
|
|
min_cls = pstr_sig_class->class_buf[i];
|
|
}
|
|
}
|
|
|
|
avg_cls = 0;
|
|
for (i = 0; i < n_class; i++) {
|
|
if (pstr_sig_class->class_buf[i] == max_cls) {
|
|
avg_cls += 1;
|
|
}
|
|
if (pstr_sig_class->class_buf[i] == min_cls) {
|
|
avg_cls += -1;
|
|
}
|
|
}
|
|
|
|
if (avg_cls > 0) {
|
|
pstr_sig_class->coding_mode = max_cls;
|
|
} else {
|
|
pstr_sig_class->coding_mode = min_cls;
|
|
}
|
|
}
|
|
|
|
/* shift, save pre_mode and unused class */
|
|
if (n_class > 0) {
|
|
pstr_sig_class->pre_mode = pstr_sig_class->class_buf[n_class - 1];
|
|
}
|
|
pstr_sig_class->n_buf_class -= n_class;
|
|
pstr_sig_class->n_buffer_samples -= ccfl * n_frames;
|
|
|
|
WORD32 minimum = MIN(pstr_sig_class->n_buf_class, pstr_sig_class->n_buffer_samples);
|
|
if (minimum == pstr_sig_class->n_buf_class) {
|
|
for (i = 0; i < minimum; i++) {
|
|
pstr_sig_class->class_buf[i] = pstr_sig_class->class_buf[i + n_class];
|
|
pstr_sig_class->input_samples[i] = pstr_sig_class->input_samples[i + ccfl * n_frames];
|
|
}
|
|
|
|
/* shift, save unused samples */
|
|
for (; i < pstr_sig_class->n_buffer_samples; i++) {
|
|
pstr_sig_class->input_samples[i] = pstr_sig_class->input_samples[i + ccfl * n_frames];
|
|
}
|
|
} else {
|
|
for (i = 0; i < minimum; i++) {
|
|
pstr_sig_class->class_buf[i] = pstr_sig_class->class_buf[i + n_class];
|
|
pstr_sig_class->input_samples[i] = pstr_sig_class->input_samples[i + ccfl * n_frames];
|
|
}
|
|
|
|
/* shift, save unused samples */
|
|
for (; i < pstr_sig_class->n_buf_class; i++) {
|
|
pstr_sig_class->class_buf[i] = pstr_sig_class->class_buf[i + n_class];
|
|
}
|
|
}
|
|
}
|
|
|
|
VOID iusace_init_classification(ia_classification_struct *pstr_sig_class) {
|
|
pstr_sig_class->pre_mode = FD_MODE;
|
|
|
|
pstr_sig_class->n_buffer_samples = 0;
|
|
memset(pstr_sig_class->input_samples, 0, 3840 * 2 * sizeof(FLOAT32));
|
|
pstr_sig_class->n_class_frames = 2;
|
|
pstr_sig_class->n_buf_class = 0;
|
|
|
|
pstr_sig_class->is_switch_mode = 1;
|
|
|
|
pstr_sig_class->framecnt = 0;
|
|
pstr_sig_class->init_flag = 0;
|
|
pstr_sig_class->framecnt_xm = 0;
|
|
|
|
memset(&pstr_sig_class->buffers, 0, sizeof(ia_classification_buf_struct));
|
|
memset(pstr_sig_class->spec_tilt_buf, 0, sizeof(FLOAT32) * 100);
|
|
memset(pstr_sig_class->n_tonal, 0, sizeof(WORD32) * 100);
|
|
memset(pstr_sig_class->n_tonal_low_frequency, 0, sizeof(WORD32) * 100);
|
|
memset(pstr_sig_class->msd_spec_tilt_buf, 0, sizeof(FLOAT32) * 5);
|
|
memset(pstr_sig_class->msd_spec_tilt_short_buf, 0, sizeof(FLOAT32) * 5);
|
|
memset(pstr_sig_class->ave_n_tonal_short_buf, 0, sizeof(FLOAT32) * 5);
|
|
memset(pstr_sig_class->ave_n_tonal_buf, 0, sizeof(FLOAT32) * 5);
|
|
return;
|
|
}
|