mirror of
https://github.com/ittiam-systems/libxaac.git
synced 2026-04-02 20:30:47 +07:00
1586 lines
No EOL
43 KiB
C
1586 lines
No EOL
43 KiB
C
/******************************************************************************
|
|
* *
|
|
* Copyright (C) 2023 The Android Open Source Project
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at:
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*
|
|
*****************************************************************************
|
|
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
|
*/
|
|
|
|
#include <string.h>
|
|
#include "ixheaac_type_def.h"
|
|
#include "ixheaace_adjust_threshold_data.h"
|
|
#include "iusace_cnst.h"
|
|
#include "iusace_block_switch_const.h"
|
|
#include "iusace_rom.h"
|
|
#include "iusace_bitbuffer.h"
|
|
|
|
/* DRC */
|
|
#include "impd_drc_common_enc.h"
|
|
#include "impd_drc_uni_drc.h"
|
|
#include "impd_drc_tables.h"
|
|
#include "impd_drc_api.h"
|
|
#include "impd_drc_uni_drc_eq.h"
|
|
#include "impd_drc_uni_drc_filter_bank.h"
|
|
#include "impd_drc_gain_enc.h"
|
|
#include "impd_drc_struct_def.h"
|
|
|
|
#include "iusace_tns_usac.h"
|
|
#include "iusace_psy_mod.h"
|
|
#include "iusace_config.h"
|
|
#include "iusace_fft.h"
|
|
#include "iusace_basic_ops_flt.h"
|
|
#include "ixheaac_constants.h"
|
|
#include "ixheaace_aac_constants.h"
|
|
#include "ixheaac_basic_ops32.h"
|
|
#include "ixheaace_common_utils.h"
|
|
#include "ixheaac_error_standards.h"
|
|
#include "ixheaace_error_codes.h"
|
|
|
|
#define DIG_REV(i, m, j) \
|
|
do { \
|
|
unsigned _ = (i); \
|
|
_ = ((_ & 0x33333333) << 2) | ((_ & ~0x33333333) >> 2); \
|
|
_ = ((_ & 0x0F0F0F0F) << 4) | ((_ & ~0x0F0F0F0F) >> 4); \
|
|
_ = ((_ & 0x00FF00FF) << 8) | ((_ & ~0x00FF00FF) >> 8); \
|
|
(j) = _ >> (m); \
|
|
} while (0)
|
|
|
|
static PLATFORM_INLINE WORD8 iusace_calc_norm(WORD32 a) {
|
|
WORD8 norm_val;
|
|
|
|
if (a == 0) {
|
|
norm_val = 31;
|
|
} else {
|
|
if (a == (WORD32)0xffffffffL) {
|
|
norm_val = 31;
|
|
} else {
|
|
if (a < 0) {
|
|
a = ~a;
|
|
}
|
|
for (norm_val = 0; a < (WORD32)0x40000000L; norm_val++) {
|
|
a <<= 1;
|
|
}
|
|
}
|
|
}
|
|
|
|
return norm_val;
|
|
}
|
|
|
|
static PLATFORM_INLINE VOID iusace_complex_3point_fft(FLOAT32 *ptr_in, FLOAT32 *ptr_out) {
|
|
FLOAT32 add_r, sub_r;
|
|
FLOAT32 add_i, sub_i;
|
|
FLOAT32 x01r, x01i, temp;
|
|
FLOAT32 p1, p2, p3, p4;
|
|
FLOAT64 sinmu;
|
|
|
|
sinmu = 0.866025403784439;
|
|
|
|
x01r = ptr_in[0] + ptr_in[2];
|
|
x01i = ptr_in[1] + ptr_in[3];
|
|
|
|
add_r = ptr_in[2] + ptr_in[4];
|
|
add_i = ptr_in[3] + ptr_in[5];
|
|
|
|
sub_r = ptr_in[2] - ptr_in[4];
|
|
sub_i = ptr_in[3] - ptr_in[5];
|
|
|
|
p1 = add_r / (FLOAT32)2.0;
|
|
p4 = add_i / (FLOAT32)2.0;
|
|
p2 = (FLOAT32)((FLOAT64)sub_i * sinmu);
|
|
p3 = (FLOAT32)((FLOAT64)sub_r * sinmu);
|
|
|
|
temp = ptr_in[0] - p1;
|
|
|
|
ptr_out[0] = x01r + ptr_in[4];
|
|
ptr_out[1] = x01i + ptr_in[5];
|
|
ptr_out[2] = temp + p2;
|
|
ptr_out[3] = (ptr_in[1] - p3) - p4;
|
|
ptr_out[4] = temp - p2;
|
|
ptr_out[5] = (ptr_in[1] + p3) - p4;
|
|
|
|
return;
|
|
}
|
|
|
|
VOID iusace_complex_fft_p2(FLOAT32 *ptr_x, WORD32 nlength, FLOAT32 *scratch_fft_p2_y) {
|
|
WORD32 i, j, k, n_stages, h2;
|
|
FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
|
|
FLOAT32 tmp;
|
|
WORD32 del, nodespacing, in_loop_cnt;
|
|
WORD32 not_power_4;
|
|
WORD32 dig_rev_shift;
|
|
FLOAT32 *y = scratch_fft_p2_y;
|
|
WORD32 mpass = nlength;
|
|
WORD32 npoints = nlength;
|
|
FLOAT32 *ptr_y = y;
|
|
const FLOAT64 *ptr_w;
|
|
|
|
dig_rev_shift = iusace_calc_norm(mpass) + 1 - 16;
|
|
n_stages = 30 - iusace_calc_norm(mpass);
|
|
not_power_4 = n_stages & 1;
|
|
|
|
n_stages = n_stages >> 1;
|
|
|
|
ptr_w = iusace_twiddle_table_fft_32x32;
|
|
|
|
if (dig_rev_shift < 0) {
|
|
dig_rev_shift = 0;
|
|
}
|
|
|
|
for (i = 0; i < npoints; i += 4) {
|
|
FLOAT32 *inp = ptr_x;
|
|
FLOAT32 tmk;
|
|
|
|
DIG_REV(i, dig_rev_shift, h2);
|
|
if (not_power_4) {
|
|
h2 += 1;
|
|
h2 &= ~1;
|
|
}
|
|
inp += (h2);
|
|
|
|
x0r = *inp;
|
|
x0i = *(inp + 1);
|
|
inp += (npoints >> 1);
|
|
|
|
x1r = *inp;
|
|
x1i = *(inp + 1);
|
|
inp += (npoints >> 1);
|
|
|
|
x2r = *inp;
|
|
x2i = *(inp + 1);
|
|
inp += (npoints >> 1);
|
|
|
|
x3r = *inp;
|
|
x3i = *(inp + 1);
|
|
|
|
x0r = x0r + x2r;
|
|
x0i = x0i + x2i;
|
|
|
|
tmk = x0r - x2r;
|
|
x2r = tmk - x2r;
|
|
tmk = x0i - x2i;
|
|
x2i = tmk - x2i;
|
|
|
|
x1r = x1r + x3r;
|
|
x1i = x1i + x3i;
|
|
|
|
tmk = x1r - x3r;
|
|
x3r = tmk - x3r;
|
|
tmk = x1i - x3i;
|
|
x3i = tmk - x3i;
|
|
|
|
x0r = x0r + x1r;
|
|
x0i = x0i + x1i;
|
|
|
|
tmk = x0r - x1r;
|
|
x1r = tmk - x1r;
|
|
tmk = x0i - x1i;
|
|
x1i = tmk - x1i;
|
|
|
|
x2r = x2r + x3i;
|
|
x2i = x2i - x3r;
|
|
|
|
tmk = x2r - x3i;
|
|
x3i = tmk - x3i;
|
|
tmk = x2i + x3r;
|
|
x3r = tmk + x3r;
|
|
|
|
*ptr_y++ = x0r;
|
|
*ptr_y++ = x0i;
|
|
*ptr_y++ = x2r;
|
|
*ptr_y++ = x2i;
|
|
*ptr_y++ = x1r;
|
|
*ptr_y++ = x1i;
|
|
*ptr_y++ = x3i;
|
|
*ptr_y++ = x3r;
|
|
}
|
|
ptr_y -= 2 * npoints;
|
|
del = 4;
|
|
nodespacing = 64;
|
|
in_loop_cnt = npoints >> 4;
|
|
for (i = n_stages - 1; i > 0; i--) {
|
|
const FLOAT64 *twiddles = ptr_w;
|
|
FLOAT32 *data = ptr_y;
|
|
FLOAT64 w_1, w_2, w_3, w_4, w_5, w_6;
|
|
WORD32 sec_loop_cnt;
|
|
|
|
for (k = in_loop_cnt; k != 0; k--) {
|
|
x0r = (*data);
|
|
x0i = (*(data + 1));
|
|
data += ((SIZE_T)del << 1);
|
|
|
|
x1r = (*data);
|
|
x1i = (*(data + 1));
|
|
data += ((SIZE_T)del << 1);
|
|
|
|
x2r = (*data);
|
|
x2i = (*(data + 1));
|
|
data += ((SIZE_T)del << 1);
|
|
|
|
x3r = (*data);
|
|
x3i = (*(data + 1));
|
|
data -= 3 * (del << 1);
|
|
|
|
x0r = x0r + x2r;
|
|
x0i = x0i + x2i;
|
|
x2r = x0r - (x2r * 2);
|
|
x2i = x0i - (x2i * 2);
|
|
x1r = x1r + x3r;
|
|
x1i = x1i + x3i;
|
|
x3r = x1r - (x3r * 2);
|
|
x3i = x1i - (x3i * 2);
|
|
|
|
x0r = x0r + x1r;
|
|
x0i = x0i + x1i;
|
|
x1r = x0r - (x1r * 2);
|
|
x1i = x0i - (x1i * 2);
|
|
x2r = x2r + x3i;
|
|
x2i = x2i - x3r;
|
|
x3i = x2r - (x3i * 2);
|
|
x3r = x2i + (x3r * 2);
|
|
|
|
*data = x0r;
|
|
*(data + 1) = x0i;
|
|
data += ((SIZE_T)del << 1);
|
|
|
|
*data = x2r;
|
|
*(data + 1) = x2i;
|
|
data += ((SIZE_T)del << 1);
|
|
|
|
*data = x1r;
|
|
*(data + 1) = x1i;
|
|
data += ((SIZE_T)del << 1);
|
|
|
|
*data = x3i;
|
|
*(data + 1) = x3r;
|
|
data += ((SIZE_T)del << 1);
|
|
}
|
|
data = ptr_y + 2;
|
|
|
|
sec_loop_cnt = (nodespacing * del);
|
|
sec_loop_cnt = (sec_loop_cnt / 4) + (sec_loop_cnt / 8) - (sec_loop_cnt / 16) +
|
|
(sec_loop_cnt / 32) - (sec_loop_cnt / 64) + (sec_loop_cnt / 128) -
|
|
(sec_loop_cnt / 256);
|
|
|
|
for (j = nodespacing; j <= sec_loop_cnt; j += nodespacing) {
|
|
w_1 = *(twiddles + j);
|
|
w_4 = *(twiddles + j + 257);
|
|
w_2 = *(twiddles + ((SIZE_T)j << 1));
|
|
w_5 = *(twiddles + ((SIZE_T)j << 1) + 257);
|
|
w_3 = *(twiddles + j + ((SIZE_T)j << 1));
|
|
w_6 = *(twiddles + j + ((SIZE_T)j << 1) + 257);
|
|
|
|
for (k = in_loop_cnt; k != 0; k--) {
|
|
data += ((SIZE_T)del << 1);
|
|
|
|
x1r = *data;
|
|
x1i = *(data + 1);
|
|
data += ((SIZE_T)del << 1);
|
|
|
|
x2r = *data;
|
|
x2i = *(data + 1);
|
|
data += ((SIZE_T)del << 1);
|
|
|
|
x3r = *data;
|
|
x3i = *(data + 1);
|
|
data -= 3 * (del << 1);
|
|
|
|
tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x1r, w_1) - ixheaace_dmult((FLOAT64)x1i, w_4));
|
|
x1i = (FLOAT32)ixheaace_dmac(ixheaace_dmult((FLOAT64)x1r, w_4), (FLOAT64)x1i, w_1);
|
|
x1r = tmp;
|
|
|
|
tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x2r, w_2) - ixheaace_dmult((FLOAT64)x2i, w_5));
|
|
x2i = (FLOAT32)ixheaace_dmac(ixheaace_dmult((FLOAT64)x2r, w_5), (FLOAT64)x2i, w_2);
|
|
x2r = tmp;
|
|
|
|
tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x3r, w_3) - ixheaace_dmult((FLOAT64)x3i, w_6));
|
|
x3i = (FLOAT32)ixheaace_dmac(ixheaace_dmult((FLOAT64)x3r, w_6), (FLOAT64)x3i, w_3);
|
|
x3r = tmp;
|
|
|
|
x0r = (*data);
|
|
x0i = (*(data + 1));
|
|
|
|
x0r = x0r + (x2r);
|
|
x0i = x0i + (x2i);
|
|
x2r = x0r - (x2r * 2);
|
|
x2i = x0i - (x2i * 2);
|
|
x1r = x1r + x3r;
|
|
x1i = x1i + x3i;
|
|
x3r = x1r - (x3r * 2);
|
|
x3i = x1i - (x3i * 2);
|
|
|
|
x0r = x0r + (x1r);
|
|
x0i = x0i + (x1i);
|
|
x1r = x0r - (x1r * 2);
|
|
x1i = x0i - (x1i * 2);
|
|
x2r = x2r + (x3i);
|
|
x2i = x2i - (x3r);
|
|
x3i = x2r - (x3i * 2);
|
|
x3r = x2i + (x3r * 2);
|
|
|
|
*data = x0r;
|
|
*(data + 1) = x0i;
|
|
data += ((SIZE_T)del << 1);
|
|
|
|
*data = x2r;
|
|
*(data + 1) = x2i;
|
|
data += ((SIZE_T)del << 1);
|
|
|
|
*data = x1r;
|
|
*(data + 1) = x1i;
|
|
data += ((SIZE_T)del << 1);
|
|
|
|
*data = x3i;
|
|
*(data + 1) = x3r;
|
|
data += ((SIZE_T)del << 1);
|
|
}
|
|
data -= 2 * npoints;
|
|
data += 2;
|
|
}
|
|
for (; j <= (nodespacing * del) >> 1; j += nodespacing) {
|
|
w_1 = *(twiddles + j);
|
|
w_4 = *(twiddles + j + 257);
|
|
w_2 = *(twiddles + ((SIZE_T)j << 1));
|
|
w_5 = *(twiddles + ((SIZE_T)j << 1) + 257);
|
|
w_3 = *(twiddles + j + ((SIZE_T)j << 1) - 256);
|
|
w_6 = *(twiddles + j + ((SIZE_T)j << 1) + 1);
|
|
|
|
for (k = in_loop_cnt; k != 0; k--) {
|
|
data += ((SIZE_T)del << 1);
|
|
|
|
x1r = *data;
|
|
x1i = *(data + 1);
|
|
data += ((SIZE_T)del << 1);
|
|
|
|
x2r = *data;
|
|
x2i = *(data + 1);
|
|
data += ((SIZE_T)del << 1);
|
|
|
|
x3r = *data;
|
|
x3i = *(data + 1);
|
|
data -= 3 * (del << 1);
|
|
|
|
tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x1r, w_1) - ixheaace_dmult((FLOAT64)x1i, w_4));
|
|
x1i = (FLOAT32)ixheaace_dmac(ixheaace_dmult((FLOAT64)x1r, w_4), (FLOAT64)x1i, w_1);
|
|
x1r = tmp;
|
|
|
|
tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x2r, w_2) - ixheaace_dmult((FLOAT64)x2i, w_5));
|
|
x2i = (FLOAT32)ixheaace_dmac(ixheaace_dmult((FLOAT64)x2r, w_5), (FLOAT64)x2i, w_2);
|
|
x2r = tmp;
|
|
|
|
tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x3r, w_6) + ixheaace_dmult((FLOAT64)x3i, w_3));
|
|
x3i = (FLOAT32)(-ixheaace_dmult((FLOAT64)x3r, w_3) + ixheaace_dmult((FLOAT64)x3i, w_6));
|
|
x3r = tmp;
|
|
|
|
x0r = (*data);
|
|
x0i = (*(data + 1));
|
|
|
|
x0r = x0r + (x2r);
|
|
x0i = x0i + (x2i);
|
|
x2r = x0r - (x2r * 2);
|
|
x2i = x0i - (x2i * 2);
|
|
x1r = x1r + x3r;
|
|
x1i = x1i + x3i;
|
|
x3r = x1r - (x3r * 2);
|
|
x3i = x1i - (x3i * 2);
|
|
|
|
x0r = x0r + (x1r);
|
|
x0i = x0i + (x1i);
|
|
x1r = x0r - (x1r * 2);
|
|
x1i = x0i - (x1i * 2);
|
|
x2r = x2r + (x3i);
|
|
x2i = x2i - (x3r);
|
|
x3i = x2r - (x3i * 2);
|
|
x3r = x2i + (x3r * 2);
|
|
|
|
*data = x0r;
|
|
*(data + 1) = x0i;
|
|
data += ((SIZE_T)del << 1);
|
|
|
|
*data = x2r;
|
|
*(data + 1) = x2i;
|
|
data += ((SIZE_T)del << 1);
|
|
|
|
*data = x1r;
|
|
*(data + 1) = x1i;
|
|
data += ((SIZE_T)del << 1);
|
|
|
|
*data = x3i;
|
|
*(data + 1) = x3r;
|
|
data += ((SIZE_T)del << 1);
|
|
}
|
|
data -= 2 * npoints;
|
|
data += 2;
|
|
}
|
|
for (; j <= sec_loop_cnt * 2; j += nodespacing) {
|
|
w_1 = *(twiddles + j);
|
|
w_4 = *(twiddles + j + 257);
|
|
w_2 = *(twiddles + ((SIZE_T)j << 1) - 256);
|
|
w_5 = *(twiddles + ((SIZE_T)j << 1) + 1);
|
|
w_3 = *(twiddles + j + ((SIZE_T)j << 1) - 256);
|
|
w_6 = *(twiddles + j + ((SIZE_T)j << 1) + 1);
|
|
|
|
for (k = in_loop_cnt; k != 0; k--) {
|
|
data += ((SIZE_T)del << 1);
|
|
|
|
x1r = *data;
|
|
x1i = *(data + 1);
|
|
data += ((SIZE_T)del << 1);
|
|
|
|
x2r = *data;
|
|
x2i = *(data + 1);
|
|
data += ((SIZE_T)del << 1);
|
|
|
|
x3r = *data;
|
|
x3i = *(data + 1);
|
|
data -= 3 * (del << 1);
|
|
|
|
tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x1r, w_1) - ixheaace_dmult((FLOAT64)x1i, w_4));
|
|
x1i = (FLOAT32)ixheaace_dmac(ixheaace_dmult(x1r, w_4), x1i, w_1);
|
|
x1r = tmp;
|
|
|
|
tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x2r, w_5) + ixheaace_dmult((FLOAT64)x2i, w_2));
|
|
x2i = (FLOAT32)(-ixheaace_dmult(x2r, w_2) + ixheaace_dmult(x2i, w_5));
|
|
x2r = tmp;
|
|
|
|
tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x3r, w_6) + ixheaace_dmult((FLOAT64)x3i, w_3));
|
|
x3i = (FLOAT32)(-ixheaace_dmult((FLOAT64)x3r, w_3) + ixheaace_dmult((FLOAT64)x3i, w_6));
|
|
x3r = tmp;
|
|
|
|
x0r = (*data);
|
|
x0i = (*(data + 1));
|
|
|
|
x0r = x0r + (x2r);
|
|
x0i = x0i + (x2i);
|
|
x2r = x0r - (x2r * 2);
|
|
x2i = x0i - (x2i * 2);
|
|
x1r = x1r + x3r;
|
|
x1i = x1i + x3i;
|
|
x3r = x1r - (x3r * 2);
|
|
x3i = x1i - (x3i * 2);
|
|
|
|
x0r = x0r + (x1r);
|
|
x0i = x0i + (x1i);
|
|
x1r = x0r - (x1r * 2);
|
|
x1i = x0i - (x1i * 2);
|
|
x2r = x2r + (x3i);
|
|
x2i = x2i - (x3r);
|
|
x3i = x2r - (x3i * 2);
|
|
x3r = x2i + (x3r * 2);
|
|
|
|
*data = x0r;
|
|
*(data + 1) = x0i;
|
|
data += ((SIZE_T)del << 1);
|
|
|
|
*data = x2r;
|
|
*(data + 1) = x2i;
|
|
data += ((SIZE_T)del << 1);
|
|
|
|
*data = x1r;
|
|
*(data + 1) = x1i;
|
|
data += ((SIZE_T)del << 1);
|
|
|
|
*data = x3i;
|
|
*(data + 1) = x3r;
|
|
data += ((SIZE_T)del << 1);
|
|
}
|
|
data -= 2 * npoints;
|
|
data += 2;
|
|
}
|
|
for (; j < nodespacing * del; j += nodespacing) {
|
|
w_1 = *(twiddles + j);
|
|
w_4 = *(twiddles + j + 257);
|
|
w_2 = *(twiddles + ((SIZE_T)j << 1) - 256);
|
|
w_5 = *(twiddles + ((SIZE_T)j << 1) + 1);
|
|
w_3 = *(twiddles + j + ((SIZE_T)j << 1) - 512);
|
|
w_6 = *(twiddles + j + ((SIZE_T)j << 1) - 512 + 257);
|
|
|
|
for (k = in_loop_cnt; k != 0; k--) {
|
|
data += ((SIZE_T)del << 1);
|
|
|
|
x1r = *data;
|
|
x1i = *(data + 1);
|
|
data += ((SIZE_T)del << 1);
|
|
|
|
x2r = *data;
|
|
x2i = *(data + 1);
|
|
data += ((SIZE_T)del << 1);
|
|
|
|
x3r = *data;
|
|
x3i = *(data + 1);
|
|
data -= 3 * ((SIZE_T)del << 1);
|
|
|
|
tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x1r, w_1) - ixheaace_dmult((FLOAT64)x1i, w_4));
|
|
x1i = (FLOAT32)ixheaace_dmac(ixheaace_dmult((FLOAT64)x1r, w_4), (FLOAT64)x1i, w_1);
|
|
x1r = tmp;
|
|
|
|
tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x2r, w_5) + ixheaace_dmult((FLOAT64)x2i, w_2));
|
|
x2i = (FLOAT32)(-ixheaace_dmult((FLOAT64)x2r, w_2) + ixheaace_dmult((FLOAT64)x2i, w_5));
|
|
x2r = tmp;
|
|
|
|
tmp = (FLOAT32)(-ixheaace_dmult((FLOAT64)x3r, w_3) + ixheaace_dmult((FLOAT64)x3i, w_6));
|
|
x3i = (FLOAT32)ixheaace_dmac(ixheaace_dmult((FLOAT64)x3r, w_6), (FLOAT64)x3i, w_3);
|
|
x3r = tmp;
|
|
|
|
x0r = (*data);
|
|
x0i = (*(data + 1));
|
|
|
|
x0r = x0r + (x2r);
|
|
x0i = x0i + (x2i);
|
|
x2r = x0r - (x2r * 2);
|
|
x2i = x0i - (x2i * 2);
|
|
x1r = x1r + x3r;
|
|
x1i = x1i - x3i;
|
|
x3r = x1r - (x3r * 2);
|
|
x3i = x1i + (x3i * 2);
|
|
|
|
x0r = x0r + (x1r);
|
|
x0i = x0i + (x1i);
|
|
x1r = x0r - (x1r * 2);
|
|
x1i = x0i - (x1i * 2);
|
|
x2r = x2r + (x3i);
|
|
x2i = x2i - (x3r);
|
|
x3i = x2r - (x3i * 2);
|
|
x3r = x2i + (x3r * 2);
|
|
|
|
*data = x0r;
|
|
*(data + 1) = x0i;
|
|
data += ((SIZE_T)del << 1);
|
|
|
|
*data = x2r;
|
|
*(data + 1) = x2i;
|
|
data += ((SIZE_T)del << 1);
|
|
|
|
*data = x1r;
|
|
*(data + 1) = x1i;
|
|
data += ((SIZE_T)del << 1);
|
|
|
|
*data = x3i;
|
|
*(data + 1) = x3r;
|
|
data += ((SIZE_T)del << 1);
|
|
}
|
|
data -= 2 * npoints;
|
|
data += 2;
|
|
}
|
|
nodespacing >>= 2;
|
|
del <<= 2;
|
|
in_loop_cnt >>= 2;
|
|
}
|
|
if (not_power_4) {
|
|
const FLOAT64 *twiddles = ptr_w;
|
|
nodespacing <<= 1;
|
|
|
|
for (j = del / 2; j != 0; j--) {
|
|
FLOAT64 w_1 = *twiddles;
|
|
FLOAT64 w_4 = *(twiddles + 257);
|
|
twiddles += nodespacing;
|
|
|
|
x0r = *ptr_y;
|
|
x0i = *(ptr_y + 1);
|
|
ptr_y += ((SIZE_T)del << 1);
|
|
|
|
x1r = *ptr_y;
|
|
x1i = *(ptr_y + 1);
|
|
|
|
tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x1r, w_1) - ixheaace_dmult((FLOAT64)x1i, w_4));
|
|
x1i = (FLOAT32)ixheaace_dmac(ixheaace_dmult((FLOAT64)x1r, w_4), (FLOAT64)x1i, w_1);
|
|
x1r = tmp;
|
|
|
|
*ptr_y = (x0r) - (x1r);
|
|
*(ptr_y + 1) = (x0i) - (x1i);
|
|
ptr_y -= ((SIZE_T)del << 1);
|
|
|
|
*ptr_y = (x0r) + (x1r);
|
|
*(ptr_y + 1) = (x0i) + (x1i);
|
|
ptr_y += 2;
|
|
}
|
|
twiddles = ptr_w;
|
|
for (j = del / 2; j != 0; j--) {
|
|
FLOAT64 w_1 = *twiddles;
|
|
FLOAT64 w_4 = *(twiddles + 257);
|
|
twiddles += nodespacing;
|
|
|
|
x0r = *ptr_y;
|
|
x0i = *(ptr_y + 1);
|
|
ptr_y += ((SIZE_T)del << 1);
|
|
|
|
x1r = *ptr_y;
|
|
x1i = *(ptr_y + 1);
|
|
|
|
tmp = (FLOAT32)(ixheaace_dmult((FLOAT64)x1r, w_4) + ixheaace_dmult((FLOAT64)x1i, w_1));
|
|
x1i = (FLOAT32)(-ixheaace_dmult((FLOAT64)x1r, w_1) + ixheaace_dmult((FLOAT64)x1i, w_4));
|
|
x1r = tmp;
|
|
|
|
*ptr_y = (x0r) - (x1r);
|
|
*(ptr_y + 1) = (x0i) - (x1i);
|
|
ptr_y -= ((SIZE_T)del << 1);
|
|
|
|
*ptr_y = (x0r) + (x1r);
|
|
*(ptr_y + 1) = (x0i) + (x1i);
|
|
ptr_y += 2;
|
|
}
|
|
}
|
|
|
|
for (i = 0; i < nlength; i++) {
|
|
*(ptr_x + 2 * i) = y[2 * i];
|
|
*(ptr_x + 2 * i + 1) = y[2 * i + 1];
|
|
}
|
|
}
|
|
|
|
static VOID iusace_complex_fft_p3(FLOAT32 *data, WORD32 nlength,
|
|
iusace_scratch_mem *pstr_scratch) {
|
|
WORD32 i, j;
|
|
FLOAT32 *data_3 = pstr_scratch->p_fft_p3_data_3;
|
|
FLOAT32 *y = pstr_scratch->p_fft_p3_y;
|
|
WORD32 cnfac;
|
|
WORD32 mpass = nlength;
|
|
FLOAT32 *ptr_x = data;
|
|
FLOAT32 *ptr_y = y;
|
|
|
|
cnfac = 0;
|
|
while (mpass % 3 == 0) {
|
|
mpass /= 3;
|
|
cnfac++;
|
|
}
|
|
|
|
for (i = 0; i < 3 * cnfac; i++) {
|
|
for (j = 0; j < mpass; j++) {
|
|
data_3[2 * j] = data[3 * (2 * j) + (2 * i)];
|
|
data_3[2 * j + 1] = data[3 * (2 * j) + 1 + (2 * i)];
|
|
}
|
|
iusace_complex_fft_p2(data_3, mpass, pstr_scratch->p_fft_p2_y);
|
|
|
|
for (j = 0; j < mpass; j++) {
|
|
data[3 * (2 * j) + (2 * i)] = data_3[2 * j];
|
|
data[3 * (2 * j) + 1 + (2 * i)] = data_3[2 * j + 1];
|
|
}
|
|
}
|
|
|
|
{
|
|
const FLOAT64 *w1r, *w1i;
|
|
FLOAT32 tmp;
|
|
w1r = iusace_twiddle_table_3pr;
|
|
w1i = iusace_twiddle_table_3pi;
|
|
|
|
for (i = 0; i < nlength; i += 3) {
|
|
tmp = (FLOAT32)((FLOAT64)data[2 * i] * (*w1r) - (FLOAT64)data[2 * i + 1] * (*w1i));
|
|
data[2 * i + 1] =
|
|
(FLOAT32)((FLOAT64)data[2 * i] * (*w1i) + (FLOAT64)data[2 * i + 1] * (*w1r));
|
|
data[2 * i] = tmp;
|
|
|
|
w1r++;
|
|
w1i++;
|
|
|
|
tmp = (FLOAT32)((FLOAT64)data[2 * (i + 1)] * (*w1r) -
|
|
(FLOAT64)data[2 * (i + 1) + 1] * (*w1i));
|
|
data[2 * (i + 1) + 1] = (FLOAT32)((FLOAT64)data[2 * (i + 1)] * (*w1i) +
|
|
(FLOAT64)data[2 * (i + 1) + 1] * (*w1r));
|
|
data[2 * (i + 1)] = tmp;
|
|
|
|
w1r++;
|
|
w1i++;
|
|
|
|
tmp = (FLOAT32)((FLOAT64)data[2 * (i + 2)] * (*w1r) -
|
|
(FLOAT64)data[2 * (i + 2) + 1] * (*w1i));
|
|
data[2 * (i + 2) + 1] = (FLOAT32)((FLOAT64)data[2 * (i + 2)] * (*w1i) +
|
|
(FLOAT64)data[2 * (i + 2) + 1] * (*w1r));
|
|
data[2 * (i + 2)] = tmp;
|
|
|
|
w1r += 3 * (128 / mpass - 1) + 1;
|
|
w1i += 3 * (128 / mpass - 1) + 1;
|
|
}
|
|
}
|
|
|
|
for (i = 0; i < mpass; i++) {
|
|
iusace_complex_3point_fft(ptr_x, ptr_y);
|
|
|
|
ptr_x = ptr_x + 6;
|
|
ptr_y = ptr_y + 6;
|
|
}
|
|
|
|
for (i = 0; i < mpass; i++) {
|
|
data[2 * i] = y[6 * i];
|
|
data[2 * i + 1] = y[6 * i + 1];
|
|
}
|
|
|
|
for (i = 0; i < mpass; i++) {
|
|
data[2 * (i + mpass)] = y[6 * i + 2];
|
|
data[2 * (i + mpass) + 1] = y[6 * i + 3];
|
|
}
|
|
|
|
for (i = 0; i < mpass; i++) {
|
|
data[2 * (i + 2 * mpass)] = y[6 * i + 4];
|
|
data[2 * (i + 2 * mpass) + 1] = y[6 * i + 5];
|
|
}
|
|
}
|
|
|
|
VOID iusace_complex_fft_p3_no_scratch(FLOAT32 *data, WORD32 nlength) {
|
|
WORD32 i, j;
|
|
|
|
FLOAT32 data_3[800];
|
|
FLOAT32 y[1024];
|
|
FLOAT32 p_fft_p2_y[2048];
|
|
WORD32 cnfac;
|
|
WORD32 mpass = nlength;
|
|
FLOAT32 *ptr_x = data;
|
|
FLOAT32 *ptr_y = y;
|
|
|
|
cnfac = 0;
|
|
while (mpass % 3 == 0) {
|
|
mpass /= 3;
|
|
cnfac++;
|
|
}
|
|
|
|
for (i = 0; i < 3 * cnfac; i++) {
|
|
for (j = 0; j < mpass; j++) {
|
|
data_3[2 * j] = data[3 * (2 * j) + (2 * i)];
|
|
data_3[2 * j + 1] = data[3 * (2 * j) + 1 + (2 * i)];
|
|
}
|
|
iusace_complex_fft_p2(data_3, mpass, p_fft_p2_y);
|
|
|
|
for (j = 0; j < mpass; j++) {
|
|
data[3 * (2 * j) + (2 * i)] = data_3[2 * j];
|
|
data[3 * (2 * j) + 1 + (2 * i)] = data_3[2 * j + 1];
|
|
}
|
|
}
|
|
|
|
{
|
|
const FLOAT64 *w1r, *w1i;
|
|
FLOAT32 tmp;
|
|
w1r = iusace_twiddle_table_3pr;
|
|
w1i = iusace_twiddle_table_3pi;
|
|
|
|
for (i = 0; i < nlength; i += 3) {
|
|
tmp = (FLOAT32)((FLOAT64)data[2 * i] * (*w1r) - (FLOAT64)data[2 * i + 1] * (*w1i));
|
|
data[2 * i + 1] =
|
|
(FLOAT32)((FLOAT64)data[2 * i] * (*w1i) + (FLOAT64)data[2 * i + 1] * (*w1r));
|
|
data[2 * i] = tmp;
|
|
|
|
w1r++;
|
|
w1i++;
|
|
|
|
tmp = (FLOAT32)((FLOAT64)data[2 * (i + 1)] * (*w1r) -
|
|
(FLOAT64)data[2 * (i + 1) + 1] * (*w1i));
|
|
data[2 * (i + 1) + 1] = (FLOAT32)((FLOAT64)data[2 * (i + 1)] * (*w1i) +
|
|
(FLOAT64)data[2 * (i + 1) + 1] * (*w1r));
|
|
data[2 * (i + 1)] = tmp;
|
|
|
|
w1r++;
|
|
w1i++;
|
|
|
|
tmp = (FLOAT32)((FLOAT64)data[2 * (i + 2)] * (*w1r) -
|
|
(FLOAT64)data[2 * (i + 2) + 1] * (*w1i));
|
|
data[2 * (i + 2) + 1] = (FLOAT32)((FLOAT64)data[2 * (i + 2)] * (*w1i) +
|
|
(FLOAT64)data[2 * (i + 2) + 1] * (*w1r));
|
|
data[2 * (i + 2)] = tmp;
|
|
|
|
w1r += 3 * (128 / mpass - 1) + 1;
|
|
w1i += 3 * (128 / mpass - 1) + 1;
|
|
}
|
|
}
|
|
|
|
for (i = 0; i < mpass; i++) {
|
|
iusace_complex_3point_fft(ptr_x, ptr_y);
|
|
|
|
ptr_x = ptr_x + 6;
|
|
ptr_y = ptr_y + 6;
|
|
}
|
|
|
|
for (i = 0; i < mpass; i++) {
|
|
data[2 * i] = y[6 * i];
|
|
data[2 * i + 1] = y[6 * i + 1];
|
|
}
|
|
|
|
for (i = 0; i < mpass; i++) {
|
|
data[2 * (i + mpass)] = y[6 * i + 2];
|
|
data[2 * (i + mpass) + 1] = y[6 * i + 3];
|
|
}
|
|
|
|
for (i = 0; i < mpass; i++) {
|
|
data[2 * (i + 2 * mpass)] = y[6 * i + 4];
|
|
data[2 * (i + 2 * mpass) + 1] = y[6 * i + 5];
|
|
}
|
|
}
|
|
|
|
static VOID iusace_calc_pre_twid_enc(FLOAT64 *ptr_in, FLOAT32 *fft_ptr, WORD32 npoints,
|
|
const FLOAT64 *cos_ptr, const FLOAT64 *sin_ptr,
|
|
const WORD32 tx_flag) {
|
|
WORD32 i, n;
|
|
WORD32 b = npoints >> 1;
|
|
WORD32 a = npoints - b;
|
|
WORD32 nlength = npoints >> 2;
|
|
FLOAT64 tempr, tempi;
|
|
|
|
if (tx_flag == 0) {
|
|
FLOAT64 norm;
|
|
for (i = 0; i < b; i++) {
|
|
norm = ptr_in[i]; /* reuse MDCT: spectrally reverse all bins */
|
|
ptr_in[i] = ptr_in[npoints - 1 - i];
|
|
ptr_in[npoints - 1 - i] = norm;
|
|
}
|
|
}
|
|
for (i = 0; i < nlength; i++) {
|
|
n = npoints / 2 - 1 - 2 * i;
|
|
if (i < b / 4) {
|
|
tempr = ptr_in[a / 2 + n] + ptr_in[npoints + a / 2 - 1 - n];
|
|
} else {
|
|
tempr = ptr_in[a / 2 + n] - ptr_in[a / 2 - 1 - n];
|
|
}
|
|
n = 2 * i;
|
|
if (i < a / 4) {
|
|
tempi = ptr_in[a / 2 + n] - ptr_in[a / 2 - 1 - n];
|
|
} else {
|
|
tempi = ptr_in[a / 2 + n] + ptr_in[npoints + a / 2 - 1 - n];
|
|
}
|
|
|
|
fft_ptr[2 * i] = (FLOAT32)(tempr * (*cos_ptr) + tempi * (*sin_ptr));
|
|
fft_ptr[2 * i + 1] = (FLOAT32)(tempi * (*cos_ptr++) - tempr * (*sin_ptr++));
|
|
}
|
|
}
|
|
|
|
VOID iusace_complex_fft(FLOAT32 *data, WORD32 nlength, iusace_scratch_mem *pstr_scratch) {
|
|
if (nlength & (nlength - 1)) {
|
|
iusace_complex_fft_p3(data, nlength, pstr_scratch);
|
|
} else {
|
|
iusace_complex_fft_p2(data, nlength, pstr_scratch->p_fft_p2_y);
|
|
}
|
|
}
|
|
|
|
static VOID iusace_calc_post_twid_enc(FLOAT64 *ptr_out, FLOAT32 *fft_ptr, WORD32 npoints,
|
|
const FLOAT64 *cos_ptr, const FLOAT64 *sin_ptr,
|
|
const WORD32 tx_flag) {
|
|
WORD32 i;
|
|
WORD32 nlength = npoints >> 2;
|
|
FLOAT64 tempr, tempi;
|
|
|
|
/* post-twiddle FFT output and then get output data */
|
|
for (i = 0; i < nlength; i++) {
|
|
tempr =
|
|
2 * ((FLOAT64)(fft_ptr[2 * i]) * (*cos_ptr) + (FLOAT64)(fft_ptr[2 * i + 1]) * (*sin_ptr));
|
|
tempi = 2 * ((FLOAT64)(fft_ptr[2 * i + 1]) * (*cos_ptr++) -
|
|
(FLOAT64)(fft_ptr[2 * i]) * (*sin_ptr++));
|
|
|
|
ptr_out[2 * i] = -tempr;
|
|
ptr_out[npoints / 2 - 1 - 2 * i] = tempi;
|
|
ptr_out[npoints / 2 + 2 * i] = -tempi;
|
|
ptr_out[npoints - 1 - 2 * i] = tempr;
|
|
}
|
|
if (tx_flag == 0) {
|
|
for (i = 0; i < npoints; i += 2) {
|
|
ptr_out[i] *= -1; /* reuse MDCT: flip signs at odd indices */
|
|
}
|
|
}
|
|
}
|
|
|
|
IA_ERRORCODE iusace_fft_based_mdct(FLOAT64 *ptr_in, FLOAT64 *ptr_out, WORD32 npoints,
|
|
const WORD32 tx_flag, iusace_scratch_mem *pstr_scratch) {
|
|
FLOAT32 *ptr_scratch1 = pstr_scratch->p_fft_mdct_buf;
|
|
const FLOAT64 *cos_ptr = NULL;
|
|
const FLOAT64 *sin_ptr = NULL;
|
|
WORD32 nlength = npoints >> 1;
|
|
WORD32 n_total = npoints << 1;
|
|
|
|
memset(ptr_scratch1, 0, ((SIZE_T)n_total << 1) * sizeof(*ptr_scratch1));
|
|
|
|
switch (npoints) {
|
|
case (96):
|
|
cos_ptr = iexheaac_pre_post_twid_cos_192;
|
|
sin_ptr = iexheaac_pre_post_twid_sin_192;
|
|
break;
|
|
case (128):
|
|
cos_ptr = iusace_pre_post_twid_cos_256;
|
|
sin_ptr = iusace_pre_post_twid_sin_256;
|
|
break;
|
|
case (768):
|
|
cos_ptr = iexheaac_pre_post_twid_cos_1536;
|
|
sin_ptr = iexheaac_pre_post_twid_sin_1536;
|
|
break;
|
|
case (1024):
|
|
cos_ptr = iusace_pre_post_twid_cos_2048;
|
|
sin_ptr = iusace_pre_post_twid_sin_2048;
|
|
break;
|
|
default:
|
|
return IA_EXHEAACE_EXE_FATAL_USAC_INVALID_WINDOW_LENGTH;
|
|
}
|
|
|
|
/* pre-twiddle */
|
|
iusace_calc_pre_twid_enc(ptr_in, ptr_scratch1, npoints << 1, cos_ptr, sin_ptr, tx_flag);
|
|
|
|
/* complex FFT */
|
|
iusace_complex_fft(ptr_scratch1, nlength, pstr_scratch);
|
|
|
|
/* post-twiddle */
|
|
iusace_calc_post_twid_enc(ptr_out, ptr_scratch1, npoints << 1, cos_ptr, sin_ptr, tx_flag);
|
|
|
|
return IA_NO_ERROR;
|
|
}
|
|
|
|
VOID iusace_complex_fft_2048(FLOAT32 *ptr_x, FLOAT32 *scratch_fft) {
|
|
WORD32 i;
|
|
FLOAT32 re, im, c_v, s_v, tmp_re, tmp_im;
|
|
FLOAT32 *ptr_re, *ptr_im, *ptr_re_h, *ptr_im_h;
|
|
FLOAT32 *ptr_cos_val, *ptr_sin_val;
|
|
iusace_complex_fft_p2(ptr_x, 1024, scratch_fft);
|
|
iusace_complex_fft_p2(ptr_x + 2048, 1024, scratch_fft);
|
|
|
|
ptr_re = ptr_x;
|
|
ptr_im = ptr_x + 1;
|
|
ptr_re_h = ptr_x + 2048;
|
|
ptr_im_h = ptr_x + 2048 + 1;
|
|
ptr_cos_val = (FLOAT32 *)&iusace_twiddle_cos_2048[0];
|
|
ptr_sin_val = (FLOAT32 *)&iusace_twiddle_sin_2048[0];
|
|
for (i = 0; i < 1024; i++) {
|
|
re = *ptr_re_h;
|
|
im = *ptr_im_h;
|
|
c_v = ptr_cos_val[i];
|
|
s_v = ptr_sin_val[i];
|
|
tmp_re = (re * c_v) + (im * s_v);
|
|
tmp_im = -(re * s_v) + (im * c_v);
|
|
re = *ptr_re;
|
|
im = *ptr_im;
|
|
|
|
*ptr_re = re + tmp_re;
|
|
*ptr_im = im + tmp_im;
|
|
*ptr_re_h = re - tmp_re;
|
|
*ptr_im_h = im - tmp_im;
|
|
|
|
ptr_re += 2;
|
|
ptr_im += 2;
|
|
ptr_re_h += 2;
|
|
ptr_im_h += 2;
|
|
}
|
|
}
|
|
static VOID ixheaace_rad2_cplx_fft(FLOAT32 *ptr_real, FLOAT32 *ptr_imag, WORD32 n_points,
|
|
FLOAT32 *ptr_scratch) {
|
|
WORD32 i, j, k, n_stages, h2;
|
|
FLOAT32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
|
|
WORD32 del, nodespacing, in_loop_cnt;
|
|
WORD32 not_power_4;
|
|
WORD32 dig_rev_shift;
|
|
WORD32 m_points = n_points;
|
|
FLOAT32 *ptr_x = ptr_scratch;
|
|
FLOAT32 *y = ptr_scratch + 2048;
|
|
FLOAT32 *ptr_y = y;
|
|
const FLOAT32 *ptr_w;
|
|
|
|
dig_rev_shift = ixheaac_norm32(m_points) + 1 - 16;
|
|
n_stages = 30 - ixheaac_norm32(m_points);
|
|
not_power_4 = n_stages & 1;
|
|
|
|
n_stages = n_stages >> 1;
|
|
|
|
ptr_w = ia_fft_twiddle_table_float;
|
|
|
|
for (i = 0; i < n_points; i++) {
|
|
ptr_x[2 * i] = ptr_real[i];
|
|
ptr_x[2 * i + 1] = ptr_imag[i];
|
|
}
|
|
dig_rev_shift = max(dig_rev_shift, 0);
|
|
for (i = 0; i < n_points; i += 4) {
|
|
FLOAT32 *inp = ptr_x;
|
|
FLOAT32 tmk;
|
|
|
|
DIG_REV(i, dig_rev_shift, h2);
|
|
if (not_power_4) {
|
|
h2 += 1;
|
|
h2 &= ~1;
|
|
}
|
|
inp += (h2);
|
|
|
|
x0r = *inp;
|
|
x0i = *(inp + 1);
|
|
inp += (n_points >> 1);
|
|
|
|
x1r = *inp;
|
|
x1i = *(inp + 1);
|
|
inp += (n_points >> 1);
|
|
|
|
x2r = *inp;
|
|
x2i = *(inp + 1);
|
|
inp += (n_points >> 1);
|
|
|
|
x3r = *inp;
|
|
x3i = *(inp + 1);
|
|
|
|
x0r = ia_add_flt(x0r, x2r);
|
|
x0i = ia_add_flt(x0i, x2i);
|
|
|
|
tmk = ia_sub_flt(x0r, x2r);
|
|
x2r = ia_sub_flt(tmk, x2r);
|
|
tmk = ia_sub_flt(x0i, x2i);
|
|
x2i = ia_sub_flt(tmk, x2i);
|
|
|
|
x1r = ia_add_flt(x1r, x3r);
|
|
x1i = ia_add_flt(x1i, x3i);
|
|
|
|
tmk = ia_sub_flt(x1r, x3r);
|
|
x3r = ia_sub_flt(tmk, x3r);
|
|
tmk = ia_sub_flt(x1i, x3i);
|
|
x3i = ia_sub_flt(tmk, x3i);
|
|
|
|
x0r = ia_add_flt(x0r, x1r);
|
|
x0i = ia_add_flt(x0i, x1i);
|
|
|
|
tmk = ia_sub_flt(x0r, x1r);
|
|
x1r = ia_sub_flt(tmk, x1r);
|
|
tmk = ia_sub_flt(x0i, x1i);
|
|
x1i = ia_sub_flt(tmk, x1i);
|
|
|
|
x2r = ia_add_flt(x2r, x3i);
|
|
x2i = ia_sub_flt(x2i, x3r);
|
|
|
|
tmk = ia_sub_flt(x2r, x3i);
|
|
x3i = ia_sub_flt(tmk, x3i);
|
|
tmk = ia_add_flt(x2i, x3r);
|
|
x3r = ia_add_flt(tmk, x3r);
|
|
|
|
*ptr_y++ = x0r;
|
|
*ptr_y++ = x0i;
|
|
*ptr_y++ = x2r;
|
|
*ptr_y++ = x2i;
|
|
*ptr_y++ = x1r;
|
|
*ptr_y++ = x1i;
|
|
*ptr_y++ = x3i;
|
|
*ptr_y++ = x3r;
|
|
}
|
|
ptr_y -= 2 * n_points;
|
|
del = 4;
|
|
nodespacing = 64;
|
|
in_loop_cnt = n_points >> 4;
|
|
for (i = n_stages - 1; i > 0; i--) {
|
|
const FLOAT32 *twiddles = ptr_w;
|
|
FLOAT32 *data = ptr_y;
|
|
FLOAT32 w_1, w_2, w_3, w_4, w_5, w_6;
|
|
WORD32 sec_loop_cnt;
|
|
|
|
for (k = in_loop_cnt; k != 0; k--) {
|
|
x0r = (*data);
|
|
x0i = (*(data + 1));
|
|
data += ((SIZE_T)del << 1);
|
|
|
|
x1r = (*data);
|
|
x1i = (*(data + 1));
|
|
data += ((SIZE_T)del << 1);
|
|
|
|
x2r = (*data);
|
|
x2i = (*(data + 1));
|
|
data += ((SIZE_T)del << 1);
|
|
|
|
x3r = (*data);
|
|
x3i = (*(data + 1));
|
|
data -= 3 * (del << 1);
|
|
|
|
x0r = ia_add_flt(x0r, x2r);
|
|
x0i = ia_add_flt(x0i, x2i);
|
|
x2r = ia_msu_flt(x0r, x2r, 2);
|
|
x2i = ia_msu_flt(x0i, x2i, 2);
|
|
x1r = ia_add_flt(x1r, x3r);
|
|
x1i = ia_add_flt(x1i, x3i);
|
|
x3r = ia_msu_flt(x1r, x3r, 2);
|
|
x3i = ia_msu_flt(x1i, x3i, 2);
|
|
|
|
x0r = ia_add_flt(x0r, x1r);
|
|
x0i = ia_add_flt(x0i, x1i);
|
|
x1r = ia_msu_flt(x0r, x1r, 2);
|
|
x1i = ia_msu_flt(x0i, x1i, 2);
|
|
x2r = ia_add_flt(x2r, x3i);
|
|
x2i = ia_sub_flt(x2i, x3r);
|
|
x3i = ia_msu_flt(x2r, x3i, 2);
|
|
x3r = ia_mac_flt(x2i, x3r, 2);
|
|
|
|
*data = x0r;
|
|
*(data + 1) = x0i;
|
|
data += ((SIZE_T)del << 1);
|
|
|
|
*data = x2r;
|
|
*(data + 1) = x2i;
|
|
data += ((SIZE_T)del << 1);
|
|
|
|
*data = x1r;
|
|
*(data + 1) = x1i;
|
|
data += ((SIZE_T)del << 1);
|
|
|
|
*data = x3i;
|
|
*(data + 1) = x3r;
|
|
data += ((SIZE_T)del << 1);
|
|
}
|
|
data = ptr_y + 2;
|
|
|
|
sec_loop_cnt = (nodespacing * del);
|
|
sec_loop_cnt = (sec_loop_cnt / 4) + (sec_loop_cnt / 8) - (sec_loop_cnt / 16) +
|
|
(sec_loop_cnt / 32) - (sec_loop_cnt / 64) + (sec_loop_cnt / 128) -
|
|
(sec_loop_cnt / 256);
|
|
|
|
for (j = nodespacing; j <= sec_loop_cnt; j += nodespacing) {
|
|
w_1 = *(twiddles + j);
|
|
w_4 = *(twiddles + j + 257);
|
|
w_2 = *(twiddles + ((SIZE_T)j << 1));
|
|
w_5 = *(twiddles + ((SIZE_T)j << 1) + 257);
|
|
w_3 = *(twiddles + j + ((SIZE_T)j << 1));
|
|
w_6 = *(twiddles + j + ((SIZE_T)j << 1) + 257);
|
|
|
|
for (k = in_loop_cnt; k != 0; k--) {
|
|
FLOAT32 tmp;
|
|
/*x0 is loaded later to avoid register crunch*/
|
|
|
|
data += ((SIZE_T)del << 1);
|
|
|
|
x1r = *data;
|
|
x1i = *(data + 1);
|
|
data += ((SIZE_T)del << 1);
|
|
|
|
x2r = *data;
|
|
x2i = *(data + 1);
|
|
data += ((SIZE_T)del << 1);
|
|
|
|
x3r = *data;
|
|
x3i = *(data + 1);
|
|
data -= 3 * (del << 1);
|
|
|
|
tmp = ia_sub_flt(ia_mul_flt(x1r, w_1), ia_mul_flt(x1i, w_4));
|
|
x1i = ia_mac_flt(ia_mul_flt(x1r, w_4), x1i, w_1);
|
|
x1r = tmp;
|
|
|
|
tmp = ia_sub_flt(ia_mul_flt(x2r, w_2), ia_mul_flt(x2i, w_5));
|
|
x2i = ia_mac_flt(ia_mul_flt(x2r, w_5), x2i, w_2);
|
|
x2r = tmp;
|
|
|
|
tmp = ia_sub_flt(ia_mul_flt(x3r, w_3), ia_mul_flt(x3i, w_6));
|
|
x3i = ia_mac_flt(ia_mul_flt(x3r, w_6), x3i, w_3);
|
|
x3r = tmp;
|
|
|
|
x0r = (*data);
|
|
x0i = (*(data + 1));
|
|
|
|
x0r = ia_add_flt(x0r, (x2r));
|
|
x0i = ia_add_flt(x0i, (x2i));
|
|
x2r = ia_msu_flt(x0r, x2r, 2);
|
|
x2i = ia_msu_flt(x0i, x2i, 2);
|
|
x1r = ia_add_flt(x1r, x3r);
|
|
x1i = ia_add_flt(x1i, x3i);
|
|
x3r = ia_msu_flt(x1r, x3r, 2);
|
|
x3i = ia_msu_flt(x1i, x3i, 2);
|
|
|
|
x0r = ia_add_flt(x0r, (x1r));
|
|
x0i = ia_add_flt(x0i, (x1i));
|
|
x1r = ia_msu_flt(x0r, x1r, 2);
|
|
x1i = ia_msu_flt(x0i, x1i, 2);
|
|
x2r = ia_add_flt(x2r, (x3i));
|
|
x2i = ia_sub_flt(x2i, (x3r));
|
|
x3i = ia_msu_flt(x2r, x3i, 2);
|
|
x3r = ia_mac_flt(x2i, x3r, 2);
|
|
|
|
*data = x0r;
|
|
*(data + 1) = x0i;
|
|
data += ((SIZE_T)del << 1);
|
|
|
|
*data = x2r;
|
|
*(data + 1) = x2i;
|
|
data += ((SIZE_T)del << 1);
|
|
|
|
*data = x1r;
|
|
*(data + 1) = x1i;
|
|
data += ((SIZE_T)del << 1);
|
|
|
|
*data = x3i;
|
|
*(data + 1) = x3r;
|
|
data += ((SIZE_T)del << 1);
|
|
}
|
|
data -= 2 * n_points;
|
|
data += 2;
|
|
}
|
|
for (; j <= (nodespacing * del) >> 1; j += nodespacing) {
|
|
w_1 = *(twiddles + j);
|
|
w_4 = *(twiddles + j + 257);
|
|
w_2 = *(twiddles + ((SIZE_T)j << 1));
|
|
w_5 = *(twiddles + ((SIZE_T)j << 1) + 257);
|
|
w_3 = *(twiddles + j + ((SIZE_T)j << 1) - 256);
|
|
w_6 = *(twiddles + j + ((SIZE_T)j << 1) + 1);
|
|
|
|
for (k = in_loop_cnt; k != 0; k--) {
|
|
FLOAT32 tmp;
|
|
/*x0 is loaded later to avoid register crunch*/
|
|
|
|
data += ((SIZE_T)del << 1);
|
|
|
|
x1r = *data;
|
|
x1i = *(data + 1);
|
|
data += ((SIZE_T)del << 1);
|
|
|
|
x2r = *data;
|
|
x2i = *(data + 1);
|
|
data += ((SIZE_T)del << 1);
|
|
|
|
x3r = *data;
|
|
x3i = *(data + 1);
|
|
data -= 3 * (del << 1);
|
|
|
|
tmp = ia_sub_flt(ia_mul_flt(x1r, w_1), ia_mul_flt(x1i, w_4));
|
|
x1i = ia_mac_flt(ia_mul_flt(x1r, w_4), x1i, w_1);
|
|
x1r = tmp;
|
|
|
|
tmp = ia_sub_flt(ia_mul_flt(x2r, w_2), ia_mul_flt(x2i, w_5));
|
|
x2i = ia_mac_flt(ia_mul_flt(x2r, w_5), x2i, w_2);
|
|
x2r = tmp;
|
|
|
|
tmp = ia_add_flt(ia_mul_flt(x3r, w_6), ia_mul_flt(x3i, w_3));
|
|
x3i = ia_add_flt(ia_negate_flt(ia_mul_flt(x3r, w_3)), ia_mul_flt(x3i, w_6));
|
|
x3r = tmp;
|
|
|
|
x0r = (*data);
|
|
x0i = (*(data + 1));
|
|
|
|
x0r = ia_add_flt(x0r, (x2r));
|
|
x0i = ia_add_flt(x0i, (x2i));
|
|
x2r = ia_msu_flt(x0r, x2r, 2);
|
|
x2i = ia_msu_flt(x0i, x2i, 2);
|
|
x1r = ia_add_flt(x1r, x3r);
|
|
x1i = ia_add_flt(x1i, x3i);
|
|
x3r = ia_msu_flt(x1r, x3r, 2);
|
|
x3i = ia_msu_flt(x1i, x3i, 2);
|
|
|
|
x0r = ia_add_flt(x0r, (x1r));
|
|
x0i = ia_add_flt(x0i, (x1i));
|
|
x1r = ia_msu_flt(x0r, x1r, 2);
|
|
x1i = ia_msu_flt(x0i, x1i, 2);
|
|
x2r = ia_add_flt(x2r, (x3i));
|
|
x2i = ia_sub_flt(x2i, (x3r));
|
|
x3i = ia_msu_flt(x2r, x3i, 2);
|
|
x3r = ia_mac_flt(x2i, x3r, 2);
|
|
|
|
*data = x0r;
|
|
*(data + 1) = x0i;
|
|
data += ((SIZE_T)del << 1);
|
|
|
|
*data = x2r;
|
|
*(data + 1) = x2i;
|
|
data += ((SIZE_T)del << 1);
|
|
|
|
*data = x1r;
|
|
*(data + 1) = x1i;
|
|
data += ((SIZE_T)del << 1);
|
|
|
|
*data = x3i;
|
|
*(data + 1) = x3r;
|
|
data += ((SIZE_T)del << 1);
|
|
}
|
|
data -= 2 * n_points;
|
|
data += 2;
|
|
}
|
|
for (; j <= sec_loop_cnt * 2; j += nodespacing) {
|
|
w_1 = *(twiddles + j);
|
|
w_4 = *(twiddles + j + 257);
|
|
w_2 = *(twiddles + ((SIZE_T)j << 1) - 256);
|
|
w_5 = *(twiddles + ((SIZE_T)j << 1) + 1);
|
|
w_3 = *(twiddles + j + ((SIZE_T)j << 1) - 256);
|
|
w_6 = *(twiddles + j + ((SIZE_T)j << 1) + 1);
|
|
|
|
for (k = in_loop_cnt; k != 0; k--) {
|
|
FLOAT32 tmp;
|
|
/*x0 is loaded later to avoid register crunch*/
|
|
|
|
data += ((SIZE_T)del << 1);
|
|
|
|
x1r = *data;
|
|
x1i = *(data + 1);
|
|
data += ((SIZE_T)del << 1);
|
|
|
|
x2r = *data;
|
|
x2i = *(data + 1);
|
|
data += ((SIZE_T)del << 1);
|
|
|
|
x3r = *data;
|
|
x3i = *(data + 1);
|
|
data -= 3 * (del << 1);
|
|
|
|
tmp = ia_sub_flt(ia_mul_flt(x1r, w_1), ia_mul_flt(x1i, w_4));
|
|
x1i = ia_mac_flt(ia_mul_flt(x1r, w_4), x1i, w_1);
|
|
x1r = tmp;
|
|
|
|
tmp = ia_add_flt(ia_mul_flt(x2r, w_5), ia_mul_flt(x2i, w_2));
|
|
x2i = ia_add_flt(ia_negate_flt(ia_mul_flt(x2r, w_2)), ia_mul_flt(x2i, w_5));
|
|
x2r = tmp;
|
|
|
|
tmp = ia_add_flt(ia_mul_flt(x3r, w_6), ia_mul_flt(x3i, w_3));
|
|
x3i = ia_add_flt(ia_negate_flt(ia_mul_flt(x3r, w_3)), ia_mul_flt(x3i, w_6));
|
|
x3r = tmp;
|
|
|
|
x0r = (*data);
|
|
x0i = (*(data + 1));
|
|
|
|
x0r = ia_add_flt(x0r, (x2r));
|
|
x0i = ia_add_flt(x0i, (x2i));
|
|
x2r = ia_msu_flt(x0r, x2r, 2);
|
|
x2i = ia_msu_flt(x0i, x2i, 2);
|
|
x1r = ia_add_flt(x1r, x3r);
|
|
x1i = ia_add_flt(x1i, x3i);
|
|
x3r = ia_msu_flt(x1r, x3r, 2);
|
|
x3i = ia_msu_flt(x1i, x3i, 2);
|
|
|
|
x0r = ia_add_flt(x0r, (x1r));
|
|
x0i = ia_add_flt(x0i, (x1i));
|
|
x1r = ia_msu_flt(x0r, x1r, 2);
|
|
x1i = ia_msu_flt(x0i, x1i, 2);
|
|
x2r = ia_add_flt(x2r, (x3i));
|
|
x2i = ia_sub_flt(x2i, (x3r));
|
|
x3i = ia_msu_flt(x2r, x3i, 2);
|
|
x3r = ia_mac_flt(x2i, x3r, 2);
|
|
|
|
*data = x0r;
|
|
*(data + 1) = x0i;
|
|
data += ((SIZE_T)del << 1);
|
|
|
|
*data = x2r;
|
|
*(data + 1) = x2i;
|
|
data += ((SIZE_T)del << 1);
|
|
|
|
*data = x1r;
|
|
*(data + 1) = x1i;
|
|
data += ((SIZE_T)del << 1);
|
|
|
|
*data = x3i;
|
|
*(data + 1) = x3r;
|
|
data += ((SIZE_T)del << 1);
|
|
}
|
|
data -= 2 * n_points;
|
|
data += 2;
|
|
}
|
|
for (; j < nodespacing * del; j += nodespacing) {
|
|
w_1 = *(twiddles + j);
|
|
w_4 = *(twiddles + j + 257);
|
|
w_2 = *(twiddles + ((SIZE_T)j << 1) - 256);
|
|
w_5 = *(twiddles + ((SIZE_T)j << 1) + 1);
|
|
w_3 = *(twiddles + j + ((SIZE_T)j << 1) - 512);
|
|
w_6 = *(twiddles + j + ((SIZE_T)j << 1) - 512 + 257);
|
|
|
|
for (k = in_loop_cnt; k != 0; k--) {
|
|
FLOAT32 tmp;
|
|
/*x0 is loaded later to avoid register crunch*/
|
|
|
|
data += ((SIZE_T)del << 1);
|
|
|
|
x1r = *data;
|
|
x1i = *(data + 1);
|
|
data += ((SIZE_T)del << 1);
|
|
|
|
x2r = *data;
|
|
x2i = *(data + 1);
|
|
data += ((SIZE_T)del << 1);
|
|
|
|
x3r = *data;
|
|
x3i = *(data + 1);
|
|
data -= 3 * (del << 1);
|
|
|
|
tmp = ia_sub_flt(ia_mul_flt(x1r, w_1), ia_mul_flt(x1i, w_4));
|
|
x1i = ia_mac_flt(ia_mul_flt(x1r, w_4), x1i, w_1);
|
|
x1r = tmp;
|
|
|
|
tmp = ia_add_flt(ia_mul_flt(x2r, w_5), ia_mul_flt(x2i, w_2));
|
|
x2i = ia_add_flt(ia_negate_flt(ia_mul_flt(x2r, w_2)), ia_mul_flt(x2i, w_5));
|
|
x2r = tmp;
|
|
|
|
tmp = ia_add_flt(ia_negate_flt(ia_mul_flt(x3r, w_3)), ia_mul_flt(x3i, w_6));
|
|
x3i = ia_mac_flt(ia_mul_flt(x3r, w_6), x3i, w_3);
|
|
x3r = tmp;
|
|
|
|
x0r = (*data);
|
|
x0i = (*(data + 1));
|
|
|
|
x0r = ia_add_flt(x0r, (x2r));
|
|
x0i = ia_add_flt(x0i, (x2i));
|
|
x2r = ia_msu_flt(x0r, x2r, 2);
|
|
x2i = ia_msu_flt(x0i, x2i, 2);
|
|
x1r = ia_add_flt(x1r, x3r);
|
|
x1i = ia_sub_flt(x1i, x3i);
|
|
x3r = ia_msu_flt(x1r, x3r, 2);
|
|
x3i = ia_mac_flt(x1i, x3i, 2);
|
|
|
|
x0r = ia_add_flt(x0r, (x1r));
|
|
x0i = ia_add_flt(x0i, (x1i));
|
|
x1r = ia_msu_flt(x0r, x1r, 2);
|
|
x1i = ia_msu_flt(x0i, x1i, 2);
|
|
x2r = ia_add_flt(x2r, (x3i));
|
|
x2i = ia_sub_flt(x2i, (x3r));
|
|
x3i = ia_msu_flt(x2r, x3i, 2);
|
|
x3r = ia_mac_flt(x2i, x3r, 2);
|
|
|
|
*data = x0r;
|
|
*(data + 1) = x0i;
|
|
data += ((SIZE_T)del << 1);
|
|
|
|
*data = x2r;
|
|
*(data + 1) = x2i;
|
|
data += ((SIZE_T)del << 1);
|
|
|
|
*data = x1r;
|
|
*(data + 1) = x1i;
|
|
data += ((SIZE_T)del << 1);
|
|
|
|
*data = x3i;
|
|
*(data + 1) = x3r;
|
|
data += ((SIZE_T)del << 1);
|
|
}
|
|
data -= 2 * n_points;
|
|
data += 2;
|
|
}
|
|
nodespacing >>= 2;
|
|
del <<= 2;
|
|
in_loop_cnt >>= 2;
|
|
}
|
|
if (not_power_4) {
|
|
const FLOAT32 *twiddles = ptr_w;
|
|
nodespacing <<= 1;
|
|
|
|
for (j = del / 2; j != 0; j--) {
|
|
FLOAT32 w_1 = *twiddles;
|
|
FLOAT32 w_4 = *(twiddles + 257);
|
|
FLOAT32 tmp;
|
|
twiddles += nodespacing;
|
|
|
|
x0r = *ptr_y;
|
|
x0i = *(ptr_y + 1);
|
|
ptr_y += ((SIZE_T)del << 1);
|
|
|
|
x1r = *ptr_y;
|
|
x1i = *(ptr_y + 1);
|
|
|
|
tmp = ia_sub_flt(ia_mul_flt(x1r, w_1), ia_mul_flt(x1i, w_4));
|
|
x1i = (FLOAT32)ia_mac_flt(ia_mul_flt(x1r, w_4), x1i, w_1);
|
|
x1r = tmp;
|
|
|
|
*ptr_y = ia_sub_flt((x0r), (x1r));
|
|
*(ptr_y + 1) = ia_sub_flt((x0i), (x1i));
|
|
ptr_y -= ((SIZE_T)del << 1);
|
|
|
|
*ptr_y = ia_add_flt((x0r), (x1r));
|
|
*(ptr_y + 1) = ia_add_flt((x0i), (x1i));
|
|
ptr_y += 2;
|
|
}
|
|
twiddles = ptr_w;
|
|
for (j = del / 2; j != 0; j--) {
|
|
FLOAT32 w_1 = *twiddles;
|
|
FLOAT32 w_4 = *(twiddles + 257);
|
|
FLOAT32 tmp;
|
|
twiddles += nodespacing;
|
|
|
|
x0r = *ptr_y;
|
|
x0i = *(ptr_y + 1);
|
|
ptr_y += ((SIZE_T)del << 1);
|
|
|
|
x1r = *ptr_y;
|
|
x1i = *(ptr_y + 1);
|
|
|
|
tmp = ia_add_flt(ia_mul_flt(x1r, w_4), ia_mul_flt(x1i, w_1));
|
|
x1i = ia_add_flt(ia_negate_flt(ia_mul_flt(x1r, w_1)), ia_mul_flt(x1i, w_4));
|
|
x1r = tmp;
|
|
|
|
*ptr_y = ia_sub_flt((x0r), (x1r));
|
|
*(ptr_y + 1) = ia_sub_flt((x0i), (x1i));
|
|
ptr_y -= ((SIZE_T)del << 1);
|
|
|
|
*ptr_y = ia_add_flt((x0r), (x1r));
|
|
*(ptr_y + 1) = ia_add_flt((x0i), (x1i));
|
|
ptr_y += 2;
|
|
}
|
|
}
|
|
|
|
for (i = 0; i < n_points; i++) {
|
|
ptr_real[i] = y[2 * i];
|
|
ptr_imag[i] = y[2 * i + 1];
|
|
}
|
|
}
|
|
static VOID ixheaace_cplx_fft_4(FLOAT32 *x_r, FLOAT32 *x_i) {
|
|
FLOAT32 x_0, x_1, x_2, x_3;
|
|
FLOAT32 x_4, x_5, x_6, x_7;
|
|
FLOAT32 x0r, x1r, x2r, x3r;
|
|
FLOAT32 x0i, x1i, x2i, x3i;
|
|
|
|
// 4 Point FFT
|
|
x_0 = x_r[0];
|
|
x_1 = x_i[0];
|
|
x_2 = x_r[1];
|
|
x_3 = x_i[1];
|
|
x_4 = x_r[2];
|
|
x_5 = x_i[2];
|
|
x_6 = x_r[3];
|
|
x_7 = x_i[3];
|
|
|
|
x0r = ia_add_flt(x_0, x_4);
|
|
x0i = ia_add_flt(x_1, x_5);
|
|
x2r = ia_sub_flt(x_0, x_4);
|
|
x2i = ia_sub_flt(x_1, x_5);
|
|
x1r = ia_add_flt(x_2, x_6);
|
|
x1i = ia_add_flt(x_3, x_7);
|
|
x3r = ia_sub_flt(x_2, x_6);
|
|
x3i = ia_sub_flt(x_3, x_7);
|
|
|
|
x_r[0] = ia_add_flt(x0r, x1r);
|
|
x_i[0] = ia_add_flt(x0i, x1i);
|
|
x_r[2] = ia_sub_flt(x0r, x1r);
|
|
x_i[2] = ia_sub_flt(x0i, x1i);
|
|
x_r[1] = ia_add_flt(x2r, x3i);
|
|
x_i[1] = ia_sub_flt(x2i, x3r);
|
|
x_r[3] = ia_sub_flt(x2r, x3i);
|
|
x_i[3] = ia_add_flt(x2i, x3r);
|
|
return;
|
|
}
|
|
VOID iusace_complex_fft_4096(FLOAT32 *ptr_x_r, FLOAT32 *ptr_x_i, FLOAT32 *ptr_scratch_buf) {
|
|
FLOAT32 *ptr_data_r;
|
|
FLOAT32 *ptr_data_i;
|
|
WORD32 fft_len = 4096;
|
|
FLOAT32 *ptr_fft_interim_buf = &ptr_scratch_buf[2 * fft_len];
|
|
WORD32 i, j;
|
|
WORD32 dim2 = fft_len >> 10;
|
|
WORD32 dim1 = fft_len / dim2;
|
|
WORD32 fac = 4;
|
|
|
|
for (i = 0; i < dim2; i++) {
|
|
ptr_data_r = &ptr_scratch_buf[(2 * i + 0) * dim1];
|
|
ptr_data_i = &ptr_scratch_buf[(2 * i + 1) * dim1];
|
|
for (j = 0; j < dim1; j++) {
|
|
ptr_data_r[j] = ptr_x_r[(dim2 * j + i)];
|
|
ptr_data_i[j] = 0;
|
|
}
|
|
ixheaace_rad2_cplx_fft(ptr_data_r, ptr_data_i, dim1, ptr_fft_interim_buf);
|
|
}
|
|
ptr_data_r = &ptr_scratch_buf[0];
|
|
ptr_data_i = &ptr_scratch_buf[0];
|
|
for (i = 0; i < dim1; i++) {
|
|
FLOAT32 *ptr_cos_val = (FLOAT32 *)&ia_mixed_rad_twiddle_cos[i * dim2 * fac];
|
|
FLOAT32 *ptr_sin_val = (FLOAT32 *)&ia_mixed_rad_twiddle_sin[i * dim2 * fac];
|
|
for (j = 0; j < dim2; j++) {
|
|
FLOAT32 real = ptr_data_r[(2 * j + 0) * dim1 + i];
|
|
FLOAT32 imag = ptr_data_i[(2 * j + 1) * dim1 + i];
|
|
FLOAT32 cos_val = ptr_cos_val[j * fac];
|
|
FLOAT32 sin_val = ptr_sin_val[j * fac];
|
|
FLOAT32 temp_real = (FLOAT32)(real * cos_val + imag * sin_val);
|
|
FLOAT32 temp_imag = (FLOAT32)(imag * cos_val - real * sin_val);
|
|
ptr_fft_interim_buf[(2 * i + 0) * dim2 + j] = temp_real;
|
|
ptr_fft_interim_buf[(2 * i + 1) * dim2 + j] = temp_imag;
|
|
}
|
|
}
|
|
for (i = 0; i < dim1; i++) {
|
|
ptr_data_r = &ptr_fft_interim_buf[(2 * i + 0) * dim2];
|
|
ptr_data_i = &ptr_fft_interim_buf[(2 * i + 1) * dim2];
|
|
ixheaace_cplx_fft_4(ptr_data_r, ptr_data_i);
|
|
}
|
|
ptr_data_r = &ptr_fft_interim_buf[0];
|
|
ptr_data_i = &ptr_fft_interim_buf[0];
|
|
for (i = 0; i < dim1; i++) {
|
|
for (j = 0; j < dim2; j++) {
|
|
ptr_x_r[(j * dim1 + i)] = ptr_data_r[(2 * i + 0) * dim2 + j];
|
|
ptr_x_i[(j * dim1 + i)] = ptr_data_i[(2 * i + 1) * dim2 + j];
|
|
}
|
|
}
|
|
} |