Import xHE-AAC decoder from Ittiam

Clean copy of version XHEAAC_V1_36 as delivered 2018/4/13
Sole change is to map cr/lf line termination to unix lf.

Added appropriate LICENSE, MODULE_LICENSE_APACHE2, and NOTICE files
as part of folding into internal master.

Bug: 77287124
Test: CTS audio/media
Change-Id: I3c8d124033f967b29d6e384cce5c843ee17a7bb1
This commit is contained in:
Ray Essick 2018-05-02 10:47:09 -07:00
parent 61cd144fc5
commit 48f5fd9660
372 changed files with 148215 additions and 0 deletions

293
Android.bp Normal file
View file

@ -0,0 +1,293 @@
cc_library_static {
name: "libia_xaacdec",
vendor_available: true,
vndk: {
enabled: true,
},
cflags: [
"-O3"
],
export_include_dirs: [
"decoder"
],
srcs: [
"decoder/ixheaacd_aacdecoder.c",
"decoder/ixheaacd_aacpluscheck.c",
"decoder/ixheaacd_aac_imdct.c",
"decoder/ixheaacd_aac_rom.c",
"decoder/ixheaacd_aac_tns.c",
"decoder/ixheaacd_acelp_bitparse.c",
"decoder/ixheaacd_acelp_decode.c",
"decoder/ixheaacd_acelp_mdct.c",
"decoder/ixheaacd_acelp_tools.c",
"decoder/ixheaacd_adts_crc_check.c",
"decoder/ixheaacd_api.c",
"decoder/ixheaacd_arith_dec.c",
"decoder/ixheaacd_avq_dec.c",
"decoder/ixheaacd_avq_rom.c",
"decoder/ixheaacd_basic_ops.c",
"decoder/ixheaacd_bitbuffer.c",
"decoder/ixheaacd_block.c",
"decoder/ixheaacd_channel.c",
"decoder/ixheaacd_common_initfuncs.c",
"decoder/ixheaacd_common_lpfuncs.c",
"decoder/ixheaacd_common_rom.c",
"decoder/ixheaacd_create.c",
"decoder/ixheaacd_decode_main.c",
"decoder/ixheaacd_dsp_fft32x32s.c",
"decoder/ixheaacd_env_calc.c",
"decoder/ixheaacd_env_dec.c",
"decoder/ixheaacd_env_extr.c",
"decoder/ixheaacd_esbr_envcal.c",
"decoder/ixheaacd_esbr_polyphase.c",
"decoder/ixheaacd_esbr_rom.c",
"decoder/ixheaacd_esbr_fft.c",
"decoder/ixheaacd_ext_ch_ele.c",
"decoder/ixheaacd_fft.c",
"decoder/ixheaacd_freq_sca.c",
"decoder/ixheaacd_fwd_alias_cnx.c",
"decoder/ixheaacd_hbe_trans.c",
"decoder/ixheaacd_headerdecode.c",
"decoder/ixheaacd_hufftables.c",
"decoder/ixheaacd_huff_tools.c",
"decoder/ixheaacd_hybrid.c",
"decoder/ixheaacd_imdct.c",
"decoder/ixheaacd_initfuncs.c",
"decoder/ixheaacd_init_config.c",
"decoder/ixheaacd_longblock.c",
"decoder/ixheaacd_lpc.c",
"decoder/ixheaacd_lpc_dec.c",
"decoder/ixheaacd_lpfuncs.c",
"decoder/ixheaacd_lpp_tran.c",
"decoder/ixheaacd_lt_predict.c",
"decoder/ixheaacd_mps_dec.c",
"decoder/ixheaacd_mps_decorr.c",
"decoder/ixheaacd_mps_hybrid_filt.c",
"decoder/ixheaacd_mps_parse.c",
"decoder/ixheaacd_mps_pre_mix.c",
"decoder/ixheaacd_mps_rom.c",
"decoder/ixheaacd_mps_smoothing.c",
"decoder/ixheaacd_mps_temp_process.c",
"decoder/ixheaacd_mps_temp_reshape.c",
"decoder/ixheaacd_pns_js_thumb.c",
"decoder/ixheaacd_pred_vec_block.c",
"decoder/ixheaacd_process.c",
"decoder/ixheaacd_ps_bitdec.c",
"decoder/ixheaacd_ps_dec.c",
"decoder/ixheaacd_pvc_rom.c",
"decoder/ixheaacd_rom.c",
"decoder/ixheaacd_sbrdecoder.c",
"decoder/ixheaacd_sbrdec_initfuncs.c",
"decoder/ixheaacd_sbrdec_lpfuncs.c",
"decoder/ixheaacd_sbr_crc.c",
"decoder/ixheaacd_sbr_dec.c",
"decoder/ixheaacd_sbr_rom.c",
"decoder/ixheaacd_spectrum_dec.c",
"decoder/ixheaacd_stereo.c",
"decoder/ixheaacd_tcx_fwd_alcnx.c",
"decoder/ixheaacd_tcx_fwd_mdct.c",
"decoder/ixheaacd_thumb_ps_dec.c",
"decoder/ixheaacd_tns.c",
"decoder/ixheaacd_basic_funcs.c",
"decoder/ixheaacd_Windowing.c",
"decoder/ixheaacd_latmdemux.c",
"decoder/ixheaacd_multichannel.c",
"decoder/ixheaacd_drc_freq_dec.c",
"decoder/ixheaacd_mps_poly_filt.c",
"decoder/ixheaacd_huff_code_reorder.c",
"decoder/ixheaacd_rev_vlc.c",
"decoder/drc_src/impd_drc_api.c",
"decoder/drc_src/impd_drc_bitbuffer.c",
"decoder/drc_src/impd_drc_dec.c",
"decoder/drc_src/impd_drc_dynamic_payload.c",
"decoder/drc_src/impd_drc_eq.c",
"decoder/drc_src/impd_drc_extr_delta_coded_info.c",
"decoder/drc_src/impd_drc_filter_bank.c",
"decoder/drc_src/impd_drc_gain_dec.c",
"decoder/drc_src/impd_drc_gain_decoder.c",
"decoder/drc_src/impd_drc_host_params.c",
"decoder/drc_src/impd_drc_init.c",
"decoder/drc_src/impd_drc_interface_decoder.c",
"decoder/drc_src/impd_drc_interface_parser.c",
"decoder/drc_src/impd_drc_loudness_control.c",
"decoder/drc_src/impd_drc_main_qmf_process.c",
"decoder/drc_src/impd_drc_main_stft_process.c",
"decoder/drc_src/impd_drc_main_td_process.c",
"decoder/drc_src/impd_drc_main_td_qmf_process.c",
"decoder/drc_src/impd_drc_multiband.c",
"decoder/drc_src/impd_drc_parametric_dec.c",
"decoder/drc_src/impd_drc_peak_limiter.c",
"decoder/drc_src/impd_drc_process.c",
"decoder/drc_src/impd_drc_rom.c",
"decoder/drc_src/impd_drc_selection_process.c",
"decoder/drc_src/impd_drc_selection_process_drcset_selection.c",
"decoder/drc_src/impd_drc_selection_process_init.c",
"decoder/drc_src/impd_drc_shape_filter.c",
"decoder/drc_src/impd_drc_static_payload.c",
],
sanitize: {
misc_undefined: [
"unsigned-integer-overflow",
"signed-integer-overflow",
],
cfi: true,
diag: {
cfi: true,
},
},
arch: {
arm: {
local_include_dirs: [
"decoder/armv7",
"decoder"
],
srcs: [
"decoder/armv7/ixheaacd_qmf_dec.c",
"decoder/armv7/ixheaacd_fft_armv7.c",
"decoder/armv7/ixheaacd_function_selector_armv7.c",
"decoder/armv7/ixheaacd_overlap_add1.s",
"decoder/armv7/ixheaacd_overlap_add2.s",
"decoder/armv7/ixheaacd_lap1.s",
"decoder/armv7/ixheaacd_dec_DCT2_64_asm.s",
"decoder/armv7/ixheaacd_apply_rot.s",
"decoder/armv7/ixheaacd_autocorr_st2.s",
"decoder/armv7/ixheaacd_auto_corr.s",
"decoder/armv7/ixheaacd_calcmaxspectralline.s",
"decoder/armv7/ixheaacd_conv_ergtoamplitude.s",
"decoder/armv7/ixheaacd_conv_ergtoamplitudelp.s",
"decoder/armv7/ixheaacd_cos_sin_mod.s",
"decoder/armv7/ixheaacd_dct3_32.s",
"decoder/armv7/ixheaacd_decorr_filter2.s",
"decoder/armv7/ixheaacd_enery_calc_per_subband.s",
"decoder/armv7/ixheaacd_expsubbandsamples.s",
"decoder/armv7/ixheaacd_ffr_divide16.s",
"decoder/armv7/ixheaacd_fwd_modulation.s",
"decoder/armv7/ixheaacd_harm_idx_zerotwolp.s",
"decoder/armv7/ixheaacd_imdct_using_fft.s",
"decoder/armv7/ixheaacd_inv_dit_fft_8pt.s",
"decoder/armv7/ixheaacd_no_lap1.s",
"decoder/armv7/ixheaacd_post_radix_compute2.s",
"decoder/armv7/ixheaacd_post_radix_compute4.s",
"decoder/armv7/ixheaacd_post_twiddle.s",
"decoder/armv7/ixheaacd_pre_twiddle_compute.s",
"decoder/armv7/ixheaacd_post_twiddle_overlap.s",
"decoder/armv7/ixheaacd_radix4_bfly.s",
"decoder/armv7/ixheaacd_rescale_subbandsamples.s",
"decoder/armv7/ixheaacd_sbr_imdct_using_fft.s",
"decoder/armv7/ixheaacd_sbr_qmfanal32_winadds.s",
"decoder/armv7/ixheaacd_sbr_qmfsyn64_winadd.s",
"decoder/armv7/ixheaacd_shiftrountine.s",
"decoder/armv7/ixheaacd_shiftrountine_with_round.s",
"decoder/armv7/ixheaacd_tns_ar_filter_fixed_32x16.s",
"decoder/armv7/ixheaacd_tns_parcor2lpc_32x16.s",
"decoder/armv7/ixheaacd_esbr_radix4bfly.s",
"decoder/armv7/ixheaacd_esbr_cos_sin_mod_loop1.s",
"decoder/armv7/ixheaacd_esbr_qmfsyn64_winadd.s",
"decoder/armv7/ixheaacd_complex_ifft_p2.s",
"decoder/armv7/ixheaacd_complex_fft_p2.s",
"decoder/armv7/ixheaacd_esbr_cos_sin_mod_loop2.s",
"decoder/armv7/ixheaacd_shiftrountine_with_round_hq.s",
"decoder/armv7/ixheaacd_mps_complex_fft_64_asm.s",
"decoder/armv7/ixheaacd_esbr_fwd_modulation.s",
"decoder/armv7/ixheaacd_mps_synt_pre_twiddle.s",
"decoder/armv7/ixheaacd_mps_synt_post_twiddle.s",
"decoder/armv7/ixheaacd_calc_pre_twid.s",
"decoder/armv7/ixheaacd_calc_post_twid.s",
"decoder/armv7/ixheaacd_mps_synt_out_calc.s",
"decoder/armv7/ixheaacd_mps_synt_post_fft_twiddle.s",
"decoder/armv7/ixheaacd_sbr_qmfanal32_winadds_eld.s",
"decoder/armv7/ixheaacd_shiftrountine_with_rnd_eld.s",
"decoder/armv7/ixheaacd_eld_decoder_sbr_pre_twiddle.s",
"decoder/armv7/ixheaacd_fft_15_ld.s",
"decoder/armv7/ixheaacd_aac_ld_dec_rearrange.s",
"decoder/armv7/ixheaacd_fft32x32_ld2_armv7.s",
"decoder/armv7/ixheaacd_apply_scale_fac.s"
],
cflags: [
"-mfloat-abi=softfp",
"-mfpu=neon",
"-mcpu=cortex-a8",
],
armv7_a_neon: {
srcs: [
],
cflags: [
],
},
},
arm64: {
cflags: [
"-march=armv8-a",
],
local_include_dirs: [
"decoder/armv8",
"decoder"
],
srcs: [
"decoder/armv8/ixheaacd_qmf_dec.c",
"decoder/armv8/ixheaacd_function_selector_armv8.c",
"decoder/armv8/ixheaacd_calcmaxspectralline.s",
"decoder/armv8/ixheaacd_sbr_imdct_using_fft.s",
"decoder/armv8/ixheaacd_imdct_using_fft.s",
"decoder/armv8/ixheaacd_no_lap1.s",
"decoder/armv8/ixheaacd_post_twiddle.s",
"decoder/armv8/ixheaacd_pre_twiddle.s",
"decoder/armv8/ixheaacd_sbr_qmfsyn64_winadd.s",
"decoder/armv8/ixheaacd_overlap_add1.s",
"decoder/armv8/ixheaacd_overlap_add2.s",
"decoder/armv8/ixheaacd_shiftrountine_with_round_eld.s",
"decoder/armv8/ixheaacd_fft32x32_ld2_armv8.s",
"decoder/armv8/ixheaacd_inv_dit_fft_8pt.s",
"decoder/armv8/ixheaacd_shiftrountine_with_round.s",
"decoder/armv8/ixheaacd_sbr_qmf_analysis32_neon.s",
"decoder/armv8/ixheaacd_postradixcompute4.s",
"decoder/armv8/ixheaacd_apply_scale_factors.s",
"decoder/armv8/ixheaacd_cos_sin_mod_loop1.s",
"decoder/armv8/ixheaacd_cos_sin_mod_loop2.s",
],
},
x86: {
local_include_dirs: [
"decoder"
],
srcs: [
"decoder/ixheaacd_qmf_dec.c",
"decoder/x86/ixheaacd_function_selector_x86.c",
],
cflags: [
],
},
x86_64: {
local_include_dirs: [
"decoder"
],
srcs: [
"decoder/ixheaacd_qmf_dec.c",
"decoder/x86_64/ixheaacd_function_selector_x86_64.c",
],
cflags: [
],
},
},
}
subdirs = ["test"]

203
LICENSE Normal file
View file

@ -0,0 +1,203 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
Create Short LinkX

0
MODULE_LICENSE_APACHE2 Normal file
View file

19
NOTICE Normal file
View file

@ -0,0 +1,19 @@
/******************************************************************************
* *
* Copyright (C) 2018 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/

View file

@ -0,0 +1,46 @@
@/******************************************************************************
@ *
@ * Copyright (C) 2018 The Android Open Source Project
@ *
@ * Licensed under the Apache License, Version 2.0 (the "License");
@ * you may not use this file except in compliance with the License.
@ * You may obtain a copy of the License at:
@ *
@ * http://www.apache.org/licenses/LICENSE-2.0
@ *
@ * Unless required by applicable law or agreed to in writing, software
@ * distributed under the License is distributed on an "AS IS" BASIS,
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ * See the License for the specific language governing permissions and
@ * limitations under the License.
@ *
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
.text
.p2align 2
.global ixheaacd_mps_mulshift
ixheaacd_mps_mulshift:
STMFD sp!, {R4-R12}
VPUSH {d8 - d15}
LOOP:
VLD1.32 {Q0, Q1}, [R0]! @LOADING values from R0
VLD1.32 {Q2, Q3}, [R1]! @LOADING values from R1
VQDMULL.S32 Q4, D0, D4
VQDMULL.S32 Q5, D2, D6
VQDMULL.S32 Q6, D1, D5
VQDMULL.S32 Q7, D3, D7
VUZP.32 Q4, Q6
VUZP.32 Q5, Q7
VST1.32 {Q6, Q7}, [R2]! @Storing values to R2
SUBS R3, R3, #8
BGT LOOP
VPOP {d8 - d15}
LDMFD sp!, {R4-R12}
BX LR

View file

@ -0,0 +1,240 @@
@/******************************************************************************
@ *
@ * Copyright (C) 2018 The Android Open Source Project
@ *
@ * Licensed under the Apache License, Version 2.0 (the "License");
@ * you may not use this file except in compliance with the License.
@ * You may obtain a copy of the License at:
@ *
@ * http://www.apache.org/licenses/LICENSE-2.0
@ *
@ * Unless required by applicable law or agreed to in writing, software
@ * distributed under the License is distributed on an "AS IS" BASIS,
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ * See the License for the specific language governing permissions and
@ * limitations under the License.
@ *
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
.text
.p2align 2
.global ixheaacd_mps_mulshift_acc
ixheaacd_mps_mulshift_acc:
STMFD sp!, {R4-R12, R14}
VPUSH {d8 - d15}
LDR R4, [SP, #104] @Argument V_fix
LDR R5, [SP, #108] @Argument 2*resolution
ASR R6, R5, #1
MOV R7, R4
MOV R11, #40
MUL R11, R11, R5
ADD R7, R7, R11
LSL R8, R5, #2
SUB R8, R7, R8
MOV R10, #9
MUL R9, R5, R10
COPYLOOP:
SUB R8, R8, #32
VLD1.32 {Q0, Q1}, [R8]
SUB R7, R7, #32
VST1.32 {Q0, Q1}, [R7]
SUBS R9, R9, #8
BGT COPYLOOP
LOOP:
VMOV.I64 Q14, #0
VMOV.I64 Q15, #0
MOV R7, R6
MOV R8, R0
MOV R9, R1
LOOP1:
VLD1.32 {Q0, Q1}, [R8]! @LOADING values from R0 Sr_fix
VLD1.32 {Q2, Q3}, [R9]! @LOADING values from R1 Si_fix
VLD1.32 {Q4, Q5}, [R2]! @LOADING values from R2 N.real_fix
VLD1.32 {Q6, Q7}, [R3]! @LOADING values from R3 N.imag_fix
VMULL.S32 Q12, D0, D8
VMULL.S32 Q10, D1, D9
VMULL.S32 Q11, D3, D11
VMULL.S32 Q13, D2, D10
VMULL.S32 Q0, D4, D12
VMULL.S32 Q4, D5, D13
VMULL.S32 Q5, D7, D15
VMULL.S32 Q1, D6, D14
VSHR.S64 Q0, Q0, #31
VSHR.S64 Q1, Q1, #31
VSHR.S64 Q4, Q4, #31
VSHR.S64 Q5, Q5, #31
VSHR.S64 Q12, Q12, #31
VSHR.S64 Q13, Q13, #31
VSHR.S64 Q10, Q10, #31
VSHR.S64 Q11, Q11, #31
VSUB.I64 Q12, Q12, Q0
VSUB.I64 Q13, Q13, Q1
VSUB.I64 Q10, Q10, Q4
VSUB.I64 Q11, Q11, Q5
VADD.I64 Q12, Q12, Q13
VADD.I64 Q10, Q10, Q11
VADD.I64 Q12, Q12, Q10
VADD.I64 D24, D24, D25
VADD.I64 D28, D28, D24
SUBS R7, R7, #8
BGT LOOP1
MOV R7, R6
MOV R8, R0
MOV R9, R1
LOOP2:
VLD1.32 {Q0, Q1}, [R8]! @LOADING values from R0 Sr_fix
VLD1.32 {Q2, Q3}, [R9]! @LOADING values from R1 Si_fix
VLD1.32 {Q4, Q5}, [R2]! @LOADING values from R2 N.real_fix
VLD1.32 {Q6, Q7}, [R3]! @LOADING values from R3 N.imag_fix
VMULL.S32 Q12, D0, D8
VMULL.S32 Q10, D1, D9
VMULL.S32 Q11, D3, D11
VMULL.S32 Q13, D2, D10
VMULL.S32 Q0, D4, D12
VMULL.S32 Q4, D5, D13
VMULL.S32 Q5, D7, D15
VMULL.S32 Q1, D6, D14
VSHR.S64 Q12, Q12, #31
VSHR.S64 Q13, Q13, #31
VSHR.S64 Q10, Q10, #31
VSHR.S64 Q11, Q11, #31
VSHR.S64 Q0, Q0, #31
VSHR.S64 Q1, Q1, #31
VSHR.S64 Q4, Q4, #31
VSHR.S64 Q5, Q5, #31
VSUB.I64 Q12, Q12, Q0
VSUB.I64 Q13, Q13, Q1
VSUB.I64 Q10, Q10, Q4
VSUB.I64 Q11, Q11, Q5
VADD.I64 Q12, Q12, Q13
VADD.I64 Q10, Q10, Q11
VADD.I64 Q12, Q12, Q10
VADD.I64 D24, D24, D25
VADD.I64 D29, D29, D24
SUBS R7, R7, #8
BGT LOOP2
MOV R7, R6
MOV R8, R0
MOV R9, R1
LOOP3:
VLD1.32 {Q0, Q1}, [R8]! @LOADING values from R0 Sr_fix
VLD1.32 {Q2, Q3}, [R9]! @LOADING values from R1 Si_fix
VLD1.32 {Q4, Q5}, [R2]! @LOADING values from R2 N.real_fix
VLD1.32 {Q6, Q7}, [R3]! @LOADING values from R3 N.imag_fix
VMULL.S32 Q12, D0, D8
VMULL.S32 Q10, D1, D9
VMULL.S32 Q11, D3, D11
VMULL.S32 Q13, D2, D10
VMULL.S32 Q0, D4, D12
VMULL.S32 Q4, D5, D13
VMULL.S32 Q5, D7, D15
VMULL.S32 Q1, D6, D14
VSHR.S64 Q12, Q12, #31
VSHR.S64 Q13, Q13, #31
VSHR.S64 Q10, Q10, #31
VSHR.S64 Q11, Q11, #31
VSHR.S64 Q0, Q0, #31
VSHR.S64 Q1, Q1, #31
VSHR.S64 Q4, Q4, #31
VSHR.S64 Q5, Q5, #31
VSUB.I64 Q12, Q12, Q0
VSUB.I64 Q13, Q13, Q1
VSUB.I64 Q10, Q10, Q4
VSUB.I64 Q11, Q11, Q5
VADD.I64 Q12, Q12, Q13
VADD.I64 Q10, Q10, Q11
VADD.I64 Q12, Q12, Q10
VADD.I64 D24, D24, D25
VADD.I64 D30, D30, D24
SUBS R7, R7, #8
BGT LOOP3
MOV R7, R6
MOV R8, R0
MOV R9, R1
LOOP4:
VLD1.32 {Q0, Q1}, [R8]! @LOADING values from R0 Sr_fix
VLD1.32 {Q2, Q3}, [R9]! @LOADING values from R1 Si_fix
VLD1.32 {Q4, Q5}, [R2]! @LOADING values from R2 N.real_fix
VLD1.32 {Q6, Q7}, [R3]! @LOADING values from R3 N.imag_fix
VMULL.S32 Q12, D0, D8
VMULL.S32 Q10, D1, D9
VMULL.S32 Q11, D3, D11
VMULL.S32 Q13, D2, D10
VMULL.S32 Q0, D4, D12
VMULL.S32 Q4, D5, D13
VMULL.S32 Q5, D7, D15
VMULL.S32 Q1, D6, D14
VSHR.S64 Q12, Q12, #31
VSHR.S64 Q13, Q13, #31
VSHR.S64 Q10, Q10, #31
VSHR.S64 Q11, Q11, #31
VSHR.S64 Q0, Q0, #31
VSHR.S64 Q1, Q1, #31
VSHR.S64 Q4, Q4, #31
VSHR.S64 Q5, Q5, #31
VSUB.I64 Q12, Q12, Q0
VSUB.I64 Q13, Q13, Q1
VSUB.I64 Q10, Q10, Q4
VSUB.I64 Q11, Q11, Q5
VADD.I64 Q12, Q12, Q13
VADD.I64 Q10, Q10, Q11
VADD.I64 Q12, Q12, Q10
VADD.I64 D24, D24, D25
VADD.I64 D31, D31, D24
SUBS R7, R7, #8
BGT LOOP4
VQMOVN.S64 D0, Q14
VQMOVN.S64 D1, Q15
VST1.32 {Q0}, [R4]! @Storing values to R4
SUBS R5, R5, #4
BGT LOOP
VPOP {d8 - d15}
LDMFD sp!, {R4-R12, R14}
BX LR

View file

@ -0,0 +1,50 @@
.text
.p2align 2
.global ia_aac_ld_dec_rearrange_armv7
ia_aac_ld_dec_rearrange_armv7:
STMFD r13!, {r4 - r12, r14}
@ASR r2,r2,#3 @
MOV R2, R2, ASR #3
LOOP_REARRANGE:
LDRB r4, [r3], #1 @ idx = mdct_tables_ptr->re_arr_tab[n]
LDRB r5, [r3], #1 @ idx = mdct_tables_ptr->re_arr_tab[n]
LDRB r6, [r3], #1 @ idx = mdct_tables_ptr->re_arr_tab[n]
LDRB r7, [r3], #1 @ idx = mdct_tables_ptr->re_arr_tab[n]
LDRB r8, [r3], #1 @ idx = mdct_tables_ptr->re_arr_tab[n]
LDRB r9, [r3], #1 @ idx = mdct_tables_ptr->re_arr_tab[n]
LDRB r10, [r3], #1 @ idx = mdct_tables_ptr->re_arr_tab[n]
LDRB r11, [r3], #1 @ idx = mdct_tables_ptr->re_arr_tab[n]
ADD r4, r0, r4, lsl #3
ADD r5, r0, r5, lsl #3
ADD r6, r0, r6, lsl #3
ADD r7, r0, r7, lsl #3
ADD r8, r0, r8, lsl #3
ADD r9, r0, r9, lsl #3
ADD r10, r0, r10, lsl #3
ADD r11, r0, r11, lsl #3
LDMIA r4, {r12, r14} @ r12 = inp[idx] and r14 = inp[idx+1]
STMIA r1!, {r12, r14} @ *buf1++ = inp[idx] and *buf1++ = inp[idx+1]
LDMIA r5, {r12, r14} @ r12 = inp[idx] and r14 = inp[idx+1]
STMIA r1!, {r12, r14} @ *buf1++ = inp[idx] and *buf1++ = inp[idx+1]
LDMIA r6, {r12, r14} @ r12 = inp[idx] and r14 = inp[idx+1]
STMIA r1!, {r12, r14} @ *buf1++ = inp[idx] and *buf1++ = inp[idx+1]
LDMIA r7, {r12, r14} @ r12 = inp[idx] and r14 = inp[idx+1]
STMIA r1!, {r12, r14} @ *buf1++ = inp[idx] and *buf1++ = inp[idx+1]
LDMIA r8, {r12, r14} @ r12 = inp[idx] and r14 = inp[idx+1]
STMIA r1!, {r12, r14} @ *buf1++ = inp[idx] and *buf1++ = inp[idx+1]
LDMIA r9, {r12, r14} @ r12 = inp[idx] and r14 = inp[idx+1]
STMIA r1!, {r12, r14} @ *buf1++ = inp[idx] and *buf1++ = inp[idx+1]
LDMIA r10, {r12, r14} @ r12 = inp[idx] and r14 = inp[idx+1]
STMIA r1!, {r12, r14} @ *buf1++ = inp[idx] and *buf1++ = inp[idx+1]
LDMIA r11, {r12, r14} @ r12 = inp[idx] and r14 = inp[idx+1]
STMIA r1!, {r12, r14} @ *buf1++ = inp[idx] and *buf1++ = inp[idx+1]
SUBS r2, r2, #1
BGT LOOP_REARRANGE
LDMFD r13!, {r4 - r12, r15}

View file

@ -0,0 +1,229 @@
@/******************************************************************************
@ *
@ * Copyright (C) 2018 The Android Open Source Project
@ *
@ * Licensed under the Apache License, Version 2.0 (the "License");
@ * you may not use this file except in compliance with the License.
@ * You may obtain a copy of the License at:
@ *
@ * http://www.apache.org/licenses/LICENSE-2.0
@ *
@ * Unless required by applicable law or agreed to in writing, software
@ * distributed under the License is distributed on an "AS IS" BASIS,
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ * See the License for the specific language governing permissions and
@ * limitations under the License.
@ *
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
.text
.p2align 2
.global ixheaacd_apply_rot_armv7
ixheaacd_apply_rot_armv7:
STMFD SP!, {R4-R12, R14}
LDR R5, =0x59e
MOV R4, #22
ADD R11, R0, R5
LOOP1:
LDRSH R5, [R11, #-98]
LDRSH R6, [R11, #94]
LDRSH R7, [R11, #-96]
LDRSH R8, [R11, #96]
ADD R9, R5, R6
STRH R9, [R11, #-98]
ADD R10, R7, R8
STRH R10, [R11, #-96]
LDRSH R5, [R11, #-2]
LDRSH R6, [R11, #190]
LDRSH R7, [R11]
LDRSH R8, [R11, #192]
ADD R9, R5, R6
STRH R9, [R11, #-2]
ADD R10, R7, R8
STRH R10, [R11], #4
LDRSH R5, [R11, #-98]
LDRSH R6, [R11, #94]
LDRSH R7, [R11, #-96]
LDRSH R8, [R11, #96]
ADD R9, R5, R6
STRH R9, [R11, #-98]
ADD R10, R7, R8
STRH R10, [R11, #-96]
SUBS R4, R4, #2
LDRSH R5, [R11, #-2]
LDRSH R6, [R11, #190]
LDRSH R7, [R11]
LDRSH R8, [R11, #192]
ADD R9, R5, R6
STRH R9, [R11, #-2]
ADD R10, R7, R8
STRH R10, [R11], #4
BGT LOOP1
LDR R4, =0x53C
LDR R12, [R0, #44]
ADD R11, R0, R4
MOV R4, #10
LOOP2:
LDR R5, [R12]
LDR R7, [R11], #4
LDR R6, [R12, #0x80]
LDR R8, [R11, #92]
SMULWB R9, R5, R7
SMULWB R10, R6, R8
SMULWT R14, R5, R7
QADD R5, R9, R10
SMULWT R6, R6, R8
MOV R5, R5, LSL #2
QADD R14, R14, R6
STR R5, [R12], #4
MOV R14, R14, LSL #2
STR R14, [R12, #0x7c]
LDR R5, [R12, #0x3c]
LDR R6, [R12, #0xbc]
SMULWB R9, R5, R7
SMULWB R10, R6, R8
SMULWT R14, R5, R7
QADD R5, R9, R10
SMULWT R6, R6, R8
MOV R5, R5, LSL #2
QADD R14, R14, R6
STR R5, [R12, #0x3c]
MOV R14, R14, LSL #2
STR R14, [R12, #0xbc]
SUBS R4, R4, #1
BGT LOOP2
LDR R11, =0x6c2
LDR R5, =0x564
LDRSH R14, [R0, R11]
ADD R11, R0, R5
LDR R5, [SP, #44]
SUB SP, SP, #512
MOV R12, SP
LDR R6, [R5, #12]
MOV R4, #12
ADD R6, R6, #0xb8
LOOP3:
LDRSH R5, [R6], #2
LDRSH R7, [R6, #-4]
LDR R10, [R11, #96]
LDR R9, [R11], #4
CMP R14, R5
SUB R8, R14, R7
SUBGT R8, R5, R7
ADD R5, R12, R7, LSL #3
LOOP3INN1:
STR R10, [R5, #4]
STR R9, [R5], #8
SUBS R8, R8, #1
BGT LOOP3INN1
SUBS R4, R4, #1
BGT LOOP3
MOV R4, #3
LDR R12, [R0, #44]
LDR R9, [SP, #48+512]
LDR R0, [SP, #40+512]
STR R14, [SP, #-4]!
LOOP4:
LDR R5, [R12], #4
LDR R6, [R12, #0x3c]
LDR R7, [R12, #0x7c]
LDRSH R10, [R9], #2
LDR R8, [R12, #0xbc]
MOV R11, #5
CMP R10, #6
SUBLT R11, R10, #1
LOOP4INN1:
LDR R10, [R12], #4
LDR R14, [R12, #0x3C]
QADD R5, R5, R10
QADD R6, R6, R14
LDR R10, [R12, #0x7C]
LDR R14, [R12, #0xBC]
QADD R7, R7, R10
QADD R8, R8, R14
SUBS R11, R11, #1
BGT LOOP4INN1
STR R5, [R1], #4
STR R6, [R2], #4
STR R7, [R3], #4
STR R8, [R0], #4
SUBS R4, R4, #1
BGT LOOP4
LDR R14, [SP]
ADD R11, SP, #28
SUB R4, R14, #3
LOOP5:
LDR R5, [R1]
LDR R7, [R11], #4
LDR R6, [R3]
LDR R8, [R11], #4
SMULWB R9, R5, R7
SMULWB R10, R6, R8
SMULWT R14, R5, R7
QADD R5, R9, R10
SMULWT R6, R6, R8
MOV R5, R5, LSL #2
QADD R14, R14, R6
STR R5, [R1], #4
MOV R14, R14, LSL #2
STR R14, [R3], #4
SUBS R4, R4, #1
LDR R5, [R2]
LDR R6, [R0]
SMULWB R9, R5, R7
SMULWB R10, R6, R8
SMULWT R14, R5, R7
QADD R5, R9, R10
SMULWT R6, R6, R8
MOV R5, R5, LSL #2
QADD R14, R14, R6
STR R5, [R2], #4
MOV R14, R14, LSL #2
STR R14, [R0], #4
BGT LOOP5
ADD SP, SP, #516
LDMFD sp!, {r4-r12, r15}

View file

@ -0,0 +1,147 @@
@/******************************************************************************
@ *
@ * Copyright (C) 2018 The Android Open Source Project
@ *
@ * Licensed under the Apache License, Version 2.0 (the "License");
@ * you may not use this file except in compliance with the License.
@ * You may obtain a copy of the License at:
@ *
@ * http://www.apache.org/licenses/LICENSE-2.0
@ *
@ * Unless required by applicable law or agreed to in writing, software
@ * distributed under the License is distributed on an "AS IS" BASIS,
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ * See the License for the specific language governing permissions and
@ * limitations under the License.
@ *
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
.text
.p2align 2
.global ixheaacd_scale_factor_process_armv7
ixheaacd_scale_factor_process_armv7:
STMFD sp!, {r4-r12, r14}
LDR r9, [sp, #0x28]
LDR r11, [sp, #0x2c]
CMP r2, #0
BLE END
MOV r10, #0
CMP r11, #2
MOVLE r11, #0x25
MOVGT r11, #0x22
TBANDS_LOOP:
LDRSH r5, [r1], #2
LDRB r4, [r3], #1
LDR r6, [sp, #0x30]
LDR r7, [sp, #0x34]
CMP r5, #0x18
BGE SCALE_FACTOR_GE_12
CMP r4, #0
BLE OFFSET_ZERO
SCALE_FACTOR_LT_12:
STR r10, [r0], #4
STR r10, [r0], #4
STR r10, [r0], #4
STR r10, [r0], #4
SUBS r4, r4, #4
BGT SCALE_FACTOR_LT_12
B OFFSET_ZERO
SCALE_FACTOR_GE_12:
SUBS r6, r11, r5, ASR #2
AND r5, r5, #3
LDR r5, [r9, r5, LSL #2]
BLE SHIFT_LE_ZERO
SUB r14, r6, #1
SHIFT_POSITIVE:
LDRD r6, [r0, #0]
SMULWB r6, r6, r5
SMULWB r7, r7, r5
MOV r6, r6, ASR r14
MOV r7, r7, ASR r14
STRD r6, [r0], #8
LDRD r6, [r0, #0]
SMULWB r6, r6, r5
SMULWB r7, r7, r5
SUBS r4, r4, #4
MOV r6, r6, ASR r14
MOV r7, r7, ASR r14
STRD r6, [r0], #8
BGT SHIFT_POSITIVE
B OFFSET_ZERO
SHIFT_LE_ZERO:
RSBS r14, r6, #0
BGT SHIFT_NEGTIVE1
SHIFT_ZERO:
LDRD r6, [r0, #0]
SMULWB r6, r6, r5
SMULWB r7, r7, r5
MOV r6, r6, LSL #1
MOV r7, r7, LSL #1
STRD r6, [r0], #8
SUBS r4, r4, #2
BGT SHIFT_ZERO
B OFFSET_ZERO
SHIFT_NEGTIVE1:
SUB r14, r14, #1
SHIFT_NEGTIVE:
LDRD r6, [r0, #0]
MOV r6, r6, LSL r14
MOV r7, r7, LSL r14
SMULWB r6, r6, r5
SMULWB r7, r7, r5
MOV r6, r6, LSL #2
MOV r7, r7, LSL #2
STRD r6, [r0], #8
SUBS r4, r4, #2
BGT SHIFT_NEGTIVE
OFFSET_ZERO:
SUBS r2, r2, #1
BGT TBANDS_LOOP
END:
LDMFD sp!, {r4-r12, r15}

View file

@ -0,0 +1,155 @@
@/******************************************************************************
@ *
@ * Copyright (C) 2018 The Android Open Source Project
@ *
@ * Licensed under the Apache License, Version 2.0 (the "License");
@ * you may not use this file except in compliance with the License.
@ * You may obtain a copy of the License at:
@ *
@ * http://www.apache.org/licenses/LICENSE-2.0
@ *
@ * Unless required by applicable law or agreed to in writing, software
@ * distributed under the License is distributed on an "AS IS" BASIS,
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ * See the License for the specific language governing permissions and
@ * limitations under the License.
@ *
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
.text
.p2align 2
.global ixheaacd_covariance_matrix_calc_armv7
ixheaacd_covariance_matrix_calc_armv7:
STMFD sp!, {r4-r12, r14}
MOVS r12, r2
BLE EXIT
AUTOCORR:
MOV r12, r0
MOV r5, #9728
LDR r4, [r12, r5]
ADD r5, r5, #256
LDR r3, [r12, r5]
LDR r6, [r12], #256
LDR r5, [r12], #256
MOV r4, r4, ASR #3
MOV r3, r3, ASR #3
MOV r6, r6, ASR #3
MOV r5, r5, ASR #3
SMULWT r11, r3, r4
SMULWT r9, r5, r6
SMULWT r14, r4, r4
SUB r11, r9, r11
SMULWT r9, r6, r6
MOV r3, #12
SUB r14, r9, r14
MOV r7, #0
MOV r8, #0
MOV r9, #0
AUTO_CORR_RIGHT:
LDR r4, [r12], #256
LDR r10, [r12], #256
MOV r4, r4, ASR #3
SMLAWT r9, r5, r5, r9
SMLAWT r7, r4, r5, r7
SMLAWT r8, r4, r6, r8
MOV r6, r10, ASR #3
SMLAWT r9, r4, r4, r9
SMLAWT r8, r6, r5, r8
LDR r5, [r12], #256
SMLAWT r7, r6, r4, r7
MOV r5, r5, ASR #3
SMLAWT r9, r6, r6, r9
SMLAWT r7, r5, r6, r7
SMLAWT r8, r5, r4, r8
SUBS r3, r3, #1
BNE AUTO_CORR_RIGHT
LDR r4, [r12], #256
MOV r4, r4, ASR #3
SMLAWT r9, r5, r5, r9
SMLAWT r7, r4, r5, r7
SMLAWT r8, r4, r6, r8
LDR r6, [r12], #256
MOV r6, r6, ASR #3
SMLAWT r9, r4, r4, r9
SMLAWT r7, r6, r4, r7
SMLAWT r8, r6, r5, r8
CAL_AUTOCORR:
ADD r12, r7, r11
ADD r14, r9, r14
EOR r5, r7, r7, ASR #31
EOR r6, r8, r8, ASR #31
ORR r5, r6, r5
EOR r6, r12, r12, ASR #31
ORR r5, r6, r5
ORR r5, r9, r5
ORR r5, r14, r5
CLZ r5, r5
SUB r5, r5, #1
MOV r7, r7, LSL r5
MOV r8, r8, LSL r5
MOV r9, r9, LSL r5
MOV r12, r12, LSL r5
MOV r14, r14, LSL r5
STR r9, [r1], #4
STR r14, [r1], #4
STR r7, [r1], #4
SMULL r6, r5, r9, r14
SMULL r6, r10, r12, r12
STR r8, [r1], #4
STR r12, [r1], #4
QSUB r10, r5, r10
ADD r0, r0, #4
ADD r1, r1, #12
STR r10, [r1], #4
SUBS r2, r2, #1
BNE AUTOCORR
EXIT:
LDMFD sp!, {r4-r12, r15}

View file

@ -0,0 +1,403 @@
@/******************************************************************************
@ *
@ * Copyright (C) 2018 The Android Open Source Project
@ *
@ * Licensed under the Apache License, Version 2.0 (the "License");
@ * you may not use this file except in compliance with the License.
@ * You may obtain a copy of the License at:
@ *
@ * http://www.apache.org/licenses/LICENSE-2.0
@ *
@ * Unless required by applicable law or agreed to in writing, software
@ * distributed under the License is distributed on an "AS IS" BASIS,
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ * See the License for the specific language governing permissions and
@ * limitations under the License.
@ *
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
.text
.p2align 2
.global ixheaacd_covariance_matrix_calc_2_armv7
ixheaacd_covariance_matrix_calc_2_armv7:
STMFD sp!, {r4-r12, r14}
AUTO_CORR_LOOP:
STR r0 , [sp, #-4]!
STR r1 , [sp, #-4]!
LDR r1 , [sp], #4
LDR r4 , [r1, #-4*128]
LDR r5 , [r1, #4*(64-128)]
LDR r6 , [r1]
LDR r7 , [r1, #4*64]
MOV r4, r4, ASR #3
MOV r5, r5, ASR #3
MOV r6, r6, ASR #3
MOV r7, r7, ASR #3
SMULWT r8 , r6 , r4
SMULWT r9 , r7 , r4
SMULWT r10, r6 , r5
SMLAWT r8 , r7 , r5, r8
SMULWT r11, r4 , r4
SUB r9 , r9 , r10
SMLAWT r11, r5 , r5, r11
MOV r10, r1
ADD r12, r1, #64*4
MOV r4 , r6
MOV r5 , r7
SUB r14, r3 , #2
MOVS r14, r14, LSR #1
BEQ ENDLOOP2
LOOP2:
LDR r6 , [r10, #4*128]!
LDR r7 , [r12, #4*128]!
MOV r6, r6, ASR #3
MOV r7, r7, ASR #3
SMLAWT r8 , r6 , r4, r8
SMLAWT r9 , r7 , r4, r9
SMLAWT r8 , r7 , r5, r8
SMULWT r0 , r6 , r5
SMLAWT r11, r4 , r4, r11
SUB r9 , r9 , r0
SMLAWT r11, r5 , r5, r11
LDR r4 , [r10, #4*128]!
LDR r5 , [r12, #4*128]!
MOV r4, r4, ASR #3
MOV r5, r5, ASR #3
SUBS r14, r14, #1
SMLAWT r8 , r4 , r6, r8
SMLAWT r9 , r5 , r6, r9
SMLAWT r8 , r5 , r7, r8
SMULWT r0 , r4 , r7
SMLAWT r11, r6 , r6, r11
SUB r9 , r9 , r0
SMLAWT r11, r7 , r7, r11
BNE LOOP2
ANDS r0, r3, #0x01
BEQ ENDLOOP2
ODDLOOP:
LDR r6 , [r10, #4*128]!
LDR r7 , [r12, #4*128]!
MOV r6, r6, ASR #3
MOV r7, r7, ASR #3
SMLAWT r8 , r6 , r4, r8
SMLAWT r9 , r7 , r4, r9
SMLAWT r8 , r7 , r5, r8
SMULWT r0 , r6 , r5
SMLAWT r11, r4 , r4, r11
SUB r9 , r9 , r0
SMLAWT r11, r5 , r5, r11
ENDLOOP2:
MOV r12, r11
LDR r6 , [r1, #-8*128]
LDR r7 , [r1, #4*64-8*128]
MOV r6, r6, ASR #3
MOV r7, r7, ASR #3
SMLAWT r12, r6 , r6, r12
SUB r10, r3, #2
SMLAWT r12, r7 , r7, r12
MOV r0, r10, LSL #(2+7)
ADD r0, r0, #0x100
LDR r4 , [r1, r10, LSL #(2+7)]
LDR r5 , [r1, r0]
MOV r4, r4, ASR #3
MOV r5, r5, ASR #3
SMLAWT r11, r4, r4, r11
LDR r0 , [sp], #4
SMLAWT r11, r5, r5, r11
STR r12, [r0, #4]
STR r11, [r0]
MOV r11, r8
LDR r12, [r1, #-4*128]
LDR r14, [r1, #4*(64-128)]
MOV r12, r12, ASR #3
MOV r14, r14, ASR #3
SMLAWT r11, r12, r6, r11
ADD r10, r10, #1
LDR r12, [r1, r10, LSL#(2+7)]
SMLAWT r11, r14, r7, r11
MOV r14, r10, LSL #(2+7)
ADD r14, r14, #0x100
MOV r12, r12, ASR #3
LDR r14, [r1, r14]
SMLAWT r8 , r12, r4, r8
MOV r14, r14, ASR #3
MOV r10, r9
SMLAWT r8 , r14, r5, r8
STR r11, [r0, #16]
STR r8 , [r0, #8]
SMLAWT r9 , r14, r4 , r9
SMULWT r8 , r12, r5
LDR r14, [r1, #4*(64-128)]
SUB r9 , r9 , r8
MOV r14, r14, ASR #3
LDR r12, [r1, #-4*128]
SMLAWT r10, r14, r6 , r10
MOV r12, r12, ASR #3
SMULWT r8 , r12, r7
STR r9 , [r0, #20]
SUB r10, r10, r8
STR r10, [r0, #28]
STR r1 , [sp, #-4]!
STMFD sp!, {r0, r3}
MOVS r0 , r3 , LSR #2
MOV r12, #0
MOV r3 , #0
LDR r5 , [r1, #-8*128]
LDR r7 , [r1, #-4*128]
LDR r9 , [r1, #4*(64-256)]
LDR r11, [r1, #4*(64-128)]
MOV r5, r5, ASR #3
MOV r7, r7, ASR #3
MOV r9, r9, ASR #3
MOV r11, r11, ASR #3
BEQ ENDLOOP3
LOOP3:
LDR r4 , [r1], #4*128
LDR r8 , [r1, #4*(64-128)]
MOV r4, r4, ASR #3
MOV r8, r8, ASR #3
SMLAWT r12, r4 , r5 , r12
SMLAWT r12, r8 , r9 , r12
SMULWT r14, r4 , r9
SMLAWT r3 , r8 , r5 , r3
LDR r6 , [r1], #4*128
SUB r3 , r3 , r14
LDR r10, [r1, #4*(64-128)]
MOV r6, r6, ASR #3
MOV r10, r10, ASR #3
SMLAWT r12, r6 , r7 , r12
SMLAWT r12, r10, r11, r12
SMULWT r14, r6 , r11
SMLAWT r3 , r10, r7 , r3
LDR r5 , [r1], #4*128
SUB r3 , r3 , r14
LDR r9 , [r1, #4*(64-128)]
MOV r5, r5, ASR #3
MOV r9, r9, ASR #3
SMLAWT r12, r5 , r4 , r12
SMLAWT r12, r9 , r8 , r12
SMULWT r14, r5 , r8
SMLAWT r3 , r9 , r4 , r3
LDR r7 , [r1], #4*128
SUB r3 , r3 , r14
LDR r11, [r1, #4*(64-128)]
MOV r7, r7, ASR #3
MOV r11, r11, ASR #3
SMLAWT r12, r7 , r6 , r12
SMLAWT r12, r11, r10, r12
SMULWT r14, r7 , r10
SMLAWT r3 , r11, r6 , r3
SUBS r0 , r0 , #1
SUB r3 , r3 , r14
BNE LOOP3
ENDLOOP3:
MOV r4 , r3
LDMFD sp!, {r0, r3}
ANDS r5 , r3 , #3
BEQ ENDLOOP4
LOOP4:
LDR r6 , [r1, #-8*128]
LDR r10, [r1, #4*(64-256)]
LDR r7 , [r1], #4*128
LDR r11, [r1, #4*(64-128)]
MOV r6, r6, ASR #3
MOV r7, r7, ASR #3
MOV r10, r10, ASR #3
MOV r11, r11, ASR #3
SMLAWT r12, r7 , r6 , r12
SMLAWT r12, r11, r10, r12
SMULWT r14, r7 , r10
SMLAWT r4 , r11, r6 , r4
SUBSNE r5 , r5 , #1
SUB r4 , r4 , r14
BNE LOOP4
ENDLOOP4:
STR r12, [r0, #12]
STR r4 , [r0, #24]
LDR r1 , [sp], #4
SUBS R2, R2, #1
ADD r0, r0, #4*9
ADD r1, r1, #4
BGT AUTO_CORR_LOOP
END_OF_AUT0:
LDMFD sp!, {r4-r12, r15}

View file

@ -0,0 +1,79 @@
/******************************************************************************
* *
* Copyright (C) 2018 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
#ifndef IXHEAACD_BASIC_OP_H
#define IXHEAACD_BASIC_OP_H
#define add_d(a, b) ((a) + (b))
#define sub_d(a, b) ((a) - (b))
#define ixheaacd_cbrt_calc(a) cbrt(1.0f / a)
static PLATFORM_INLINE WORD32 msu32x16in32_dual(WORD32 a, WORD16 c1, WORD32 b,
WORD16 c2) {
WORD32 result;
WORD32 temp_result;
UWORD32 a_lsb;
WORD32 a_msb;
UWORD32 b_lsb;
WORD32 b_msb;
a_lsb = a & 65535;
a_msb = a >> 16;
b_lsb = b & 65535;
b_msb = b >> 16;
temp_result = ((UWORD32)a_lsb * (UWORD32)c1);
temp_result = temp_result - (UWORD32)b_lsb * (UWORD32)c2;
temp_result = ((WORD32)temp_result) >> 16;
result = temp_result + ((a_msb * (WORD32)c1) - (b_msb * (WORD32)c2));
return (result);
}
static PLATFORM_INLINE WORD32 mac32x16in32_dual(WORD32 a, WORD16 c1, WORD32 b,
WORD16 c2) {
WORD32 result;
WORD32 temp_result;
UWORD32 a_lsb;
WORD32 a_msb;
UWORD32 b_lsb;
WORD32 b_msb;
a_lsb = a & 65535;
a_msb = a >> 16;
b_lsb = b & 65535;
b_msb = b >> 16;
temp_result = (UWORD32)a_lsb * (UWORD32)c1;
temp_result = temp_result + (UWORD32)b_lsb * (UWORD32)c2;
temp_result = ((UWORD32)temp_result) >> 16;
result = temp_result + ((a_msb * (WORD32)c1)) + ((b_msb * (WORD32)c2));
return (result);
}
static PLATFORM_INLINE WORD64 mac32x32in64_dual(WORD32 a, WORD32 b, WORD64 c) {
WORD64 result;
WORD64 temp_result;
temp_result = (WORD64)a * (WORD64)b;
result = c + (temp_result);
return (result);
}
#endif

View file

@ -0,0 +1,403 @@
/******************************************************************************
* *
* Copyright (C) 2018 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
#ifndef IXHEAACD_BASIC_OPS16_H
#define IXHEAACD_BASIC_OPS16_H
static PLATFORM_INLINE WORD16 ixheaacd_sat16(WORD32 op1) {
WORD32 var_out;
__asm__(
" MOV %0, %1 \n\t"
" CMP %0, #0x8000 \n\t"
" ITEE GE \n\t"
" MVNGE %0, #0x8000 \n\t"
" CMNLT %0, #0x00008000 \n\t"
" MOVLT %0, #0x00008000 \n\t"
: "=r"(var_out)
: "r"(op1)
: "cc");
return ((WORD16)var_out);
}
// add 2 16 bit variables and returns 16 bit result
static PLATFORM_INLINE WORD16 ixheaacd_add16(WORD16 op1, WORD16 op2) {
WORD16 var_out;
var_out = ((WORD16)(op1 + op2));
return (var_out);
}
// add 2 16 bit variables and returns 16 bit result with saturation
static PLATFORM_INLINE WORD16 ixheaacd_add16_sat(WORD16 op1, WORD16 op2) {
WORD16 var_out;
WORD32 sum;
sum = (WORD32)op1 + (WORD32)op2;
var_out = ixheaacd_sat16(sum);
return (var_out);
}
// subtract 2 16 bit variables and returns 16 bit result
static PLATFORM_INLINE WORD16 ixheaacd_sub16(WORD16 op1, WORD16 op2) {
WORD16 var_out;
var_out = ((WORD16)(op1 - op2));
return (var_out);
}
// subtract 2 16 bit variables and returns 16 bit result with saturation
static PLATFORM_INLINE WORD16 ixheaacd_sub16_sat(WORD16 op1, WORD16 op2) {
WORD16 var_out;
WORD32 diff;
diff = (WORD32)op1 - op2;
var_out = ixheaacd_sat16(diff);
return (var_out);
}
// multiply 2 16 bit variables and return 31 to 16 bits
static PLATFORM_INLINE WORD16 ixheaacd_mult16(WORD16 op1, WORD16 op2) {
WORD16 var_out;
var_out = ((WORD16)(((WORD32)op1 * (WORD32)op2) >> 16));
return (var_out);
}
// multiply 2 16 bit variables and return 30 to 15 bits
static PLATFORM_INLINE WORD16 ixheaacd_mult16_shl(WORD16 op1, WORD16 op2) {
WORD16 var_out;
var_out = ((WORD16)(((WORD32)op1 * (WORD32)op2) >> 15));
return (var_out);
}
// multiply 2 16 bit variables and return 30 to 15 bits with saturation
static PLATFORM_INLINE WORD16 ixheaacd_mult16_shl_sat(WORD16 op1, WORD16 op2) {
WORD32 temp;
__asm__(
" SMULBB %0, %1, %2 \n\t"
" MOV %0, %0, ASR #15 \n\t"
" CMP %0, #0x00008000 \n\t"
" IT EQ \n\t"
" SUBEQ %0, %0, #1 \n\t"
: "=r"(temp)
: "r"(op1), "r"(op2)
: "cc");
return (temp);
}
// shifts left a 16 bit variable by the shift value and returns a 16 bit result
static PLATFORM_INLINE WORD16 ixheaacd_shl16(WORD16 op1, WORD16 shift) {
WORD16 var_out;
var_out = (WORD16)(op1 << shift);
return (var_out);
}
// shifts left a 16 bit variable by the shift value and returns a 16 bit value
// with saturation
static PLATFORM_INLINE WORD16 ixheaacd_shl16_sat(WORD16 op1, WORD16 shift) {
WORD16 var_out;
WORD32 temp;
if (shift > 15) {
shift = 15;
}
temp = (WORD32)(op1 << shift);
var_out = ixheaacd_sat16(temp);
return (var_out);
}
// shifts right a 16 bit variable by the shift value and returns a 16 bit value
static PLATFORM_INLINE WORD16 ixheaacd_shr16(WORD16 op1, WORD16 shift) {
WORD16 var_out;
var_out = ((WORD16)(op1 >> shift));
return (var_out);
}
// shifts left a 16 bit variable by the shift value
// if the value is positive else shifts right and returns a 16 bit
static PLATFORM_INLINE WORD16 shl16_dir(WORD16 op1, WORD16 shift) {
WORD16 var_out;
if (shift > 0) {
var_out = ixheaacd_shl16(op1, shift);
} else {
var_out = ixheaacd_shr16(op1, (WORD16)(-shift));
}
return (var_out);
}
// shifts left a 16 bit variable by the shift value
// if the value is negative else shifts right and returns a 16 bit
static PLATFORM_INLINE WORD16 shr16_dir(WORD16 op1, WORD16 shift) {
WORD16 var_out;
if (shift < 0) {
var_out = ixheaacd_shl16(op1, (WORD16)(-shift));
} else {
var_out = ixheaacd_shr16(op1, shift);
}
return (var_out);
}
// shifts left a 16 bit variable by the shift value
// if the value is positive else shifts right and returns a 16 bit with
// saturation
static PLATFORM_INLINE WORD16 shl16_dir_sat(WORD16 op1, WORD16 shift) {
WORD16 var_out;
if (shift > 0) {
var_out = ixheaacd_shl16_sat(op1, shift);
} else {
var_out = ixheaacd_shr16(op1, (WORD16)(-shift));
}
return (var_out);
}
// shifts left a 16 bit variable by the shift value
// if the value is negative else shifts right and returns a 16 bit
static PLATFORM_INLINE WORD16 ixheaacd_shr16_dir_sat(WORD16 op1, WORD16 shift) {
WORD16 var_out;
if (shift < 0) {
var_out = ixheaacd_shl16_sat(op1, (WORD16)(-shift));
} else {
var_out = ixheaacd_shr16(op1, shift);
}
return (var_out);
}
// finds a value which normalizes the input to 16 bit
static PLATFORM_INLINE WORD16 norm16(WORD16 var1) {
WORD16 var_out;
__asm__(
"MOVS %1, %1, LSL #16 \n\t"
"MVNMIS %1, %1 \n\t"
"MOVEQ %0, #16 \n\t"
" CLZGT %0, %1 \n\t"
"SUB %0, %0, #1 \n\t"
: "=r"(var_out)
: "r"(var1)
: "cc");
return (var_out);
}
// finds no. of significant bits excluding sign bit
// value 15 returned for zero
static PLATFORM_INLINE WORD16 bin_expo16(WORD16 op1) {
WORD16 var_out;
var_out = ((WORD16)(15 - norm16(op1)));
return (var_out);
}
// returns a 16 bit absolute value of a given signed 16 bit value
static PLATFORM_INLINE WORD16 ixheaacd_abs16(WORD16 var1) {
WORD16 var_out;
__asm__(
" MOVS %0, %1, LSL #16 \n\t"
" RSBLTS %0 , %0, #0 \n\t"
" MOV %0, %0, ASR #16 \n\t"
: "=r"(var_out)
: "r"(var1)
: "cc");
return (var_out);
}
// returns a 16 bit absolute value of a given signed 16 bit value with
// saturation
static PLATFORM_INLINE WORD16 ixheaacd_abs16_sat(WORD16 var1) {
WORD16 var_out;
__asm__(
" MOVS %0, %1, LSL #16 \n\t"
" RSBLTS %0 , %0, #0 \n\t"
" MOVMI %0, #0x7fffffff \n\t"
" MOV %0, %0, ASR #16 \n\t"
: "=r"(var_out)
: "r"(var1)
: "cc");
return (var_out);
}
// returns a 16 bit negative value of a given signed 16 bit value.
static PLATFORM_INLINE WORD16 ixheaacd_negate16(WORD16 op1) {
WORD16 var_out;
if (-32768 == op1) {
var_out = MAX_16;
} else {
var_out = (WORD16)(-op1);
}
return (var_out);
}
// returns the minima of 2 16 bit variables
static PLATFORM_INLINE WORD16 ixheaacd_min16(WORD16 op1, WORD16 op2) {
WORD16 var_out;
var_out = op1 < op2 ? op1 : op2;
return (var_out);
}
// returns the maxima of 2 16 bit variables
static PLATFORM_INLINE WORD16 ixheaacd_max16(WORD16 op1, WORD16 op2) {
WORD16 var_out;
var_out = op1 > op2 ? op1 : op2;
return (var_out);
}
/*****************************************************************************/
/* */
/* function name : div16 */
/* */
/* description : divides 2 16 bit variables and returns the quotient */
/* the q-format of the result is modified */
/* ( op1/op2 to 14 bits precision) */
/* */
/* inputs : WORD16 op1, WORD16 op2, WORD16 *q_format */
/* */
/* globals : none */
/* */
/* processing : non-restoration type algorithm(shift & substract) */
/* */
/* outputs : WORD16 *q_format */
/* */
/* returns : WORD16 var_out */
/* */
/* issues : none */
/* */
/* revision history : */
/* */
/* DD MM YYYY author changes */
/* 11 11 2003 preethi modified(bug fixes) */
/* 15 11 2004 tejaswi/vishal modified(bug fixes/cleanup) */
/* */
/*****************************************************************************/
// divides 2 16 bit variables and returns the quotient
static PLATFORM_INLINE WORD16 div16(WORD16 op1, WORD16 op2, WORD16 *q_format) {
WORD32 quotient;
UWORD16 mantissa_nr, mantissa_dr;
WORD16 sign = 0;
LOOPIDX i;
WORD16 q_nr, q_dr;
mantissa_nr = op1;
mantissa_dr = op2;
quotient = 0;
if (op1 < 0 && op2 != 0) {
op1 = -op1;
sign = (WORD16)(sign ^ -1);
}
if (op2 < 0) {
op2 = -op2;
sign = (WORD16)(sign ^ -1);
}
if (op2 == 0) {
*q_format = 0;
return (op1);
}
quotient = 0;
q_nr = norm16(op1);
mantissa_nr = (UWORD16)op1 << (q_nr);
q_dr = norm16(op2);
mantissa_dr = (UWORD16)op2 << (q_dr);
*q_format = (WORD16)(14 + q_nr - q_dr);
for (i = 0; i < 15; i++) {
quotient = quotient << 1;
if (mantissa_nr >= mantissa_dr) {
mantissa_nr = mantissa_nr - mantissa_dr;
quotient += 1;
}
mantissa_nr = (UWORD32)mantissa_nr << 1;
}
if (sign < 0) {
quotient = -quotient;
}
return (WORD16)quotient;
}
// multiply 2 16 bit variables, add 31 to 16 bits to acc
static PLATFORM_INLINE WORD16 mac16(WORD16 c, WORD16 op1, WORD16 op2) {
WORD16 var_out;
var_out = ixheaacd_mult16(op1, op2);
var_out = ixheaacd_add16(c, var_out);
return (var_out);
}
// multiply 2 16 bit variables, add 31 to 16 bits to acc with saturation
static PLATFORM_INLINE WORD16 mac16_sat(WORD16 c, WORD16 op1, WORD16 op2) {
WORD16 var_out;
var_out = ixheaacd_mult16(op1, op2);
var_out = ixheaacd_add16_sat(c, var_out);
return (var_out);
}
// multiply 2 16 bit variables, add 30 to 15 bits to acc
static PLATFORM_INLINE WORD16 mac16_shl(WORD16 c, WORD16 op1, WORD16 op2) {
WORD16 var_out;
var_out = ixheaacd_mult16_shl(op1, op2);
var_out = ixheaacd_add16(c, var_out);
return (var_out);
}
// multiply 2 16 bit variables, add 30 to 15 bits to acc with saturation
static PLATFORM_INLINE WORD16 mac16_shl_sat(WORD16 c, WORD16 op1, WORD16 op2) {
WORD16 var_out;
WORD32 temp;
temp = ((WORD32)op1 * (WORD32)op2) >> 15;
temp += c;
var_out = ixheaacd_sat16(temp);
return (var_out);
}
// rounds a 32 bit variable to a 16 bit variable with saturation
static PLATFORM_INLINE WORD16 ixheaacd_round16(WORD32 op1) {
WORD16 var_out;
__asm__(
" ADDS %0, %1, #0x8000 \n\t"
" IT VS \n\t"
" MVNVS %0, #0x80000000 \n\t"
" MOV %0, %0, ASR #16 \n\t"
: "=r"(var_out)
: "r"(op1)
: "cc");
return (var_out);
}
#endif

View file

@ -0,0 +1,516 @@
/******************************************************************************
* *
* Copyright (C) 2018 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
#ifndef IXHEAACD_BASIC_OPS32_H
#define IXHEAACD_BASIC_OPS32_H
// returns the minima of 2 32 bit variables
static PLATFORM_INLINE WORD32 ixheaacd_min32(WORD32 a, WORD32 b) {
WORD32 min_val;
__asm__ __volatile__(
" CMP %1, %2 \n\t"
" ITE GT \n\t"
" MOVGT %0, %2 \n\t"
" MOVLE %0, %1 \n\t"
: "=r"(min_val)
: "r"(a), "r"(b)
: "cc");
return min_val;
}
// returns the maxima of 2 32 bit variables
static PLATFORM_INLINE WORD32 ixheaacd_max32(WORD32 a, WORD32 b) {
WORD32 max_val;
__asm__ __volatile__(
" CMP %1, %2 \n\t"
" MOVLE %0, %2 \n\t"
" MOVGT %0, %1 \n\t"
: "=r"(max_val)
: "r"(a), "r"(b)
: "cc");
return max_val;
}
// shifts a 32-bit value left by specificed bits
static PLATFORM_INLINE WORD32 ixheaacd_shl32(WORD32 a, WORD b) {
WORD32 out_val;
__asm__(
" MOV %0, %1, LSL %2 \n\t"
: "=r"(out_val)
: "r"(a), "r"(b));
return (out_val);
}
// shifts a 32-bit value right by specificed bits
static PLATFORM_INLINE WORD32 ixheaacd_shr32(WORD32 a, WORD b) {
WORD32 out_val;
__asm__(
" MOV %0, %1, ASR %2 \n\t"
: "=r"(out_val)
: "r"(a), "r"(b));
return out_val;
}
// shifts a 32-bit value left by specificed bits and saturates it to 32 bits
static PLATFORM_INLINE WORD32 ixheaacd_shl32_sat(WORD32 a, WORD b) {
WORD32 out_val = a;
// WORD32 dummy1=0/*,dummy2=0*/;
__asm__ __volatile__(
" RSBS r3, %2, #31 \n\t"
" MOVS r3, %1, ASR r3 \n\t"
" ITT LT \n\t"
" CMNLT r3, #1 \n\t"
" MOVLT %0, #0x80000000 \n\t"
" IT GT \n\t"
" MOVGT %0, #0x7fffffff \n\t"
" IT EQ \n\t"
" MOVEQ %0, %1, LSL %2 \n\t"
: "=r"(out_val)
: "r"(a), "r"(b)
: "cc", "r3");
return (out_val);
}
// shifts a 32-bit value left by specificed bits, shifts
// it right if specified no. of bits is negative
static PLATFORM_INLINE WORD32 ixheaacd_shl32_dir(WORD32 a, WORD b) {
WORD32 out_val = 0;
// WORD32 dummy=0;
__asm__ __volatile__(
" RSBS r3, %2, #0 \n\t"
" MOVMI %0, %1, LSL %2 \n\t"
" MOVPL %0, %1, ASR r3 \n\t"
: "=r"(out_val)
: "r"(a), "r"((WORD)b)
: "cc", "r3");
return out_val;
}
// shifts a 32-bit value left by specificed bits with sat,
// shifts it right if specified no. of bits is negative
static PLATFORM_INLINE WORD32 ixheaacd_shl32_dir_sat(WORD32 a, WORD b) {
WORD32 out_val;
if (b < 0) {
out_val = ixheaacd_shr32(a, -b);
} else {
out_val = ixheaacd_shl32_sat(a, b);
}
return out_val;
}
// shifts a 32-bit value right by specificed bits, shifts
// it left if specified no. of bits is negative
static PLATFORM_INLINE WORD32 ixheaacd_shr32_dir(WORD32 a, WORD b) {
WORD32 out_val = 0;
__asm__ __volatile__(
" RSBS r3, %2, #0 \n\t"
" IT MI \n\t"
" MOVMI %0, %1, ASR %2 \n\t"
" IT PL \n\t"
" MOVPL %0, %1, LSL r3 \n\t"
: "=r"(out_val)
: "r"(a), "r"(b)
: "cc", "r3");
return out_val;
}
// shifts a 32-bit value right by specificed bits, shifts
// it left with sat if specified no. of bits is negative
static PLATFORM_INLINE WORD32 shr32_dir_sat(WORD32 a, WORD b) {
WORD32 out_val;
if (b < 0) {
out_val = ixheaacd_shl32_sat(a, -b);
} else {
out_val = ixheaacd_shr32(a, b);
}
return out_val;
}
// multiplies two 16 bit numbers and returns their 32-bit result
static PLATFORM_INLINE WORD32 ixheaacd_mult16x16in32(WORD16 a, WORD16 b) {
WORD32 product;
__asm__(
" SMULBB %0 , %1, %2 \n\t"
: "=r"(product)
: "r"(a), "r"(b));
return product;
}
// multiplies two 16 bit numbers and returns their 32-bit
// result after removing 1 redundant sign bit
static PLATFORM_INLINE WORD32 ixheaacd_mult16x16in32_shl(WORD16 a, WORD16 b) {
WORD32 product;
__asm__(
" SMULBB %0 , %1, %2 \n\t"
" MOV %0, %0, LSL #1 \n\t"
: "=r"(product)
: "r"(a), "r"(b));
return product;
}
// multiplies two 16 bit numbers and returns their 32-bit
// result after removing 1 redundant sign bit with saturation
static PLATFORM_INLINE WORD32 ixheaacd_mult16x16in32_shl_sat(WORD16 a,
WORD16 b) {
WORD32 product;
__asm__(
" SMULBB %0 , %1, %2 \n\t"
" QADD %0, %0, %0 \n\t"
: "=r"(product)
: "r"(a), "r"(b));
return product;
}
// adds 2 32 bit variables
static PLATFORM_INLINE WORD32 ixheaacd_add32(WORD32 a, WORD32 b) {
WORD32 sum;
__asm__(
" ADD %0 , %1, %2 \n\t"
: "=r"(sum)
: "r"(a), "r"(b));
return (sum);
}
// subtract 2 32 bit variables
static PLATFORM_INLINE WORD32 ixheaacd_sub32(WORD32 a, WORD32 b) {
WORD32 diff;
__asm__(
" SUB %0 , %1, %2 \n\t"
: "=r"(diff)
: "r"(a), "r"(b));
return (diff);
}
// adds 2 32 bit variables with saturation
static PLATFORM_INLINE WORD32 ixheaacd_add32_sat(WORD32 a, WORD32 b) {
WORD32 sum;
__asm__(
" QADD %0 , %1, %2 \n\t"
: "=r"(sum)
: "r"(a), "r"(b));
return (sum);
}
// subtract 2 32 bit variables
static PLATFORM_INLINE WORD32 ixheaacd_sub32_sat(WORD32 a, WORD32 b) {
WORD32 diff;
__asm__(
" QSUB %0 , %1, %2 \n\t"
: "=r"(diff)
: "r"(a), "r"(b));
return (diff);
}
// returns number of redundant sign bits in a 32-bit value.
// return zero for a value of zero
static PLATFORM_INLINE WORD ixheaacd_norm32(WORD32 a) {
WORD32 norm_val;
__asm__(
" eor %0 , %1, %1,asr #31 \n\t"
" CLZ %0, %0 \n\t"
" SUB %0, %0, #1 \n\t"
: "=r"(norm_val)
: "r"(a));
return norm_val;
}
static PLATFORM_INLINE WORD ixheaacd_pnorm32(WORD32 a) {
WORD32 norm_val;
__asm__(
" CLZ %0, %1 \n\t"
" SUB %0, %0, #1 \n\t"
: "=r"(norm_val)
: "r"(a));
return norm_val;
}
// returns the position of the most significant bit for negative numbers.
// ignores leading zeros to determine the position of most significant bit.
static PLATFORM_INLINE WORD bin_expo32(WORD32 a) {
WORD bin_expo_val;
bin_expo_val = 31 - ixheaacd_norm32(a);
return bin_expo_val;
}
// returns the absolute value of 32-bit number
static PLATFORM_INLINE WORD32 ixheaacd_abs32(WORD32 a) {
WORD32 abs_val;
abs_val = a;
if (a < 0) {
abs_val = -a;
}
return abs_val;
}
// returns the absolute value of 32-bit number
static PLATFORM_INLINE WORD32 ixheaacd_abs32_nrm(WORD32 a) {
WORD32 abs_val;
abs_val = a;
if (a < 0) {
abs_val = ~a;
}
return abs_val;
}
// returns the absolute value of 32-bit number with saturation
static PLATFORM_INLINE WORD32 ixheaacd_abs32_sat(WORD32 a) {
WORD32 abs_val;
__asm__ __volatile__(
" MOVS %0 , %1 \n\t"
" IT MI \n\t"
" RSBSMI %0 , %1 , #0 \n\t"
" IT MI \n\t"
" MOVMI %0 , #0x7fffffff \n\t"
: "=r"(abs_val)
: "r"(a)
: "cc");
return abs_val;
}
// returns the negated value of 32-bit number
static PLATFORM_INLINE WORD32 ixheaacd_negate32(WORD32 a) {
WORD32 neg_val;
__asm__(" RSB %0, %1, #0 \n\t" : "=r"(neg_val) : "r"(a));
return neg_val;
}
// returns the negated value of 32-bit number with saturation
static PLATFORM_INLINE WORD32 ixheaacd_negate32_sat(WORD32 a) {
WORD32 neg_val;
__asm__(
" RSBS %0, %1, #0 \n\t"
" IT VS \n\t"
" MVNVS %0, #0x80000000 \n\t"
: "=r"(neg_val)
: "r"(a)
: "cc");
return neg_val;
}
// divides 2 32 bit variables and returns the quotient
static PLATFORM_INLINE WORD32 div32(WORD32 a, WORD32 b, WORD *q_format) {
WORD32 quotient;
UWORD32 mantissa_nr, mantissa_dr;
WORD16 sign = 0;
LOOPINDEX i;
WORD q_nr, q_dr;
mantissa_nr = a;
mantissa_dr = b;
quotient = 0;
if ((a < 0) && (0 != b)) {
a = -a;
sign = (WORD16)(sign ^ -1);
}
if (b < 0) {
b = -b;
sign = (WORD16)(sign ^ -1);
}
if (0 == b) {
*q_format = 0;
return (a);
}
quotient = 0;
q_nr = ixheaacd_norm32(a);
mantissa_nr = (UWORD32)a << (q_nr);
q_dr = ixheaacd_norm32(b);
mantissa_dr = (UWORD32)b << (q_dr);
*q_format = (WORD)(30 + q_nr - q_dr);
for (i = 0; i < 31; i++) {
quotient = quotient << 1;
if (mantissa_nr >= mantissa_dr) {
mantissa_nr = mantissa_nr - mantissa_dr;
quotient += 1;
}
mantissa_nr = (UWORD32)mantissa_nr << 1;
}
if (sign < 0) {
quotient = -quotient;
}
return quotient;
}
// multiplies two 16 bit numbers and accumulates their result in a 32 bit
// variable
static PLATFORM_INLINE WORD32 ixheaacd_mac16x16in32(WORD32 a, WORD16 b,
WORD16 c) {
WORD32 acc;
__asm__(
" SMLABB %0, %2, %3, %1 \n\t"
: "=r"(acc)
: "r"(a), "r"(b), "r"(c)
);
return acc;
}
// multiplies lower 16 bit of one data with upper 16 bit of
// other and accumulates their result in a 32 bit variable
static PLATFORM_INLINE WORD32 mac16x16hin32(WORD32 a, WORD32 b, WORD32 c) {
WORD32 acc;
acc = ixheaacd_mult16x16in32((WORD16)b, (WORD16)(c >> 16));
acc = ixheaacd_add32(a, acc);
return acc;
}
// multiplies two 16 bit numbers and accumulates their result in a 32 bit
// variable
static PLATFORM_INLINE WORD32 ixheaacd_mac16x16in32_shl(WORD32 a, WORD16 b,
WORD16 c) {
WORD32 acc;
acc = ixheaacd_mult16x16in32_shl(b, c);
acc = ixheaacd_add32(a, acc);
return acc;
}
// multiplies two 16 bit numbers and accumulates their
// result in a 32 bit variable with saturation
static PLATFORM_INLINE WORD32 ixheaacd_mac16x16in32_shl_sat(WORD32 a, WORD16 b,
WORD16 c) {
WORD32 acc;
acc = ixheaacd_mult16x16in32_shl_sat(b, c);
acc = ixheaacd_add32_sat(a, acc);
return acc;
}
// multiplies two 16 bit numbers and subtracts their
// result from a 32 bit variable
static PLATFORM_INLINE WORD32 msu16x16in32(WORD32 a, WORD16 b, WORD16 c) {
WORD32 acc;
acc = ixheaacd_mult16x16in32(b, c);
acc = ixheaacd_sub32(a, acc);
return acc;
}
// multiplies two 16 bit numbers and subtracts their
// result from a 32 bit variable after removing a redundant sign bit in the
// product
static PLATFORM_INLINE WORD32 msu16x16in32_shl(WORD32 a, WORD16 b, WORD16 c) {
WORD32 acc;
acc = ixheaacd_mult16x16in32_shl(b, c);
acc = ixheaacd_sub32(a, acc);
return acc;
}
// multiplies two 16 bit numbers and subtracts their
// result from a 32 bit variable with saturation
// after removing a redundant sign bit in the product
static PLATFORM_INLINE WORD32 msu16x16in32_shl_sat(WORD32 a, WORD16 b,
WORD16 c) {
WORD32 acc;
acc = ixheaacd_mult16x16in32_shl_sat(b, c);
acc = ixheaacd_sub32_sat(a, acc);
return acc;
}
// adding two 32 bit numbers and taking care of overflow
// by downshifting both numbers before addition
static PLATFORM_INLINE WORD32 add32_shr(WORD32 a, WORD32 b) {
WORD32 sum;
a = ixheaacd_shr32(a, 1);
b = ixheaacd_shr32(b, 1);
sum = ixheaacd_add32(a, b);
return sum;
}
// subtracting two 32 bit numbers and taking care of
// overflow by downshifting both numbers before addition
static PLATFORM_INLINE WORD32 sub32_shr(WORD32 a, WORD32 b) {
WORD32 diff;
a = ixheaacd_shr32(a, 1);
b = ixheaacd_shr32(b, 1);
diff = ixheaacd_sub32(a, b);
return diff;
}
#endif

View file

@ -0,0 +1,400 @@
/******************************************************************************
* *
* Copyright (C) 2018 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
#ifndef IXHEAACD_BASIC_OPS40_H
#define IXHEAACD_BASIC_OPS40_H
#define lo64(a) (((unsigned *)&a)[0]) /* low 32 bits of a long long */
#define hi64(a) (((WORD32 *)&a)[1]) /* high 32 bits of a long long */
// normalize input to 32 bits, return denormalizing info
static PLATFORM_INLINE WORD16 norm40(WORD40 *in) {
WORD16 expo;
WORD32 tempo;
if (0 == (*in)) return 31;
if (((*in) <= 0x7fffffff) && ((WORD40)(*in) >= (WORD40)0xFFFFFFFF80000000)) {
tempo = (WORD32)(*in);
expo = ixheaacd_norm32(tempo);
*in = tempo << expo;
return (expo);
}
tempo = (WORD32)((*in) >> 31);
expo = 31 - (ixheaacd_norm32(tempo));
*in = (*in) >> expo;
return (-expo);
}
// adds two numbers and right shifts by 1
static PLATFORM_INLINE WORD32 add32_shr40(WORD32 a, WORD32 b) {
WORD40 sum;
sum = (WORD40)a + (WORD40)b;
sum = sum >> 1;
return ((WORD32)sum);
}
// subtracts and right shifts by one
static PLATFORM_INLINE WORD32 sub32_shr40(WORD32 a, WORD32 b) {
WORD40 sum;
sum = (WORD40)a - (WORD40)b;
sum = sum >> 1;
return ((WORD32)sum);
}
// multiply WORD32 with WORD16 return bits 46 to 15
static PLATFORM_INLINE WORD32 ixheaacd_mult32x16in32_shl(WORD32 a, WORD16 b) {
WORD32 result;
__asm__(
" SMULWB %0, %1, %2 \n\t"
" MOV %0, %0, LSL #1 \n\t"
: "=r"(result)
: "r"(a), "r"(b));
return result;
}
// multiply WORD32 with higher 16 bits of second data and return bits 46 to 15
static PLATFORM_INLINE WORD32 mult32x16hin32_shl(WORD32 a, WORD32 b) {
WORD32 product;
WORD64 temp_product;
temp_product = (WORD64)a * (WORD64)(b >> 16);
product = (WORD32)(temp_product >> 16);
return (product << 1);
}
// multiply WORD32 with WORD16 return bits 47 to 16
static PLATFORM_INLINE WORD32 ixheaacd_mult32x16in32(WORD32 a, WORD16 b) {
WORD32 result;
__asm__(
" SMULWB %0, %1, %2 \n\t"
: "=r"(result)
: "r"(a), "r"(b));
return (result);
}
// multiply WORD32 with WORD16 return bits 46 to 15 with saturation
static PLATFORM_INLINE WORD32 ixheaacd_mult32x16in32_shl_sat(WORD32 a,
WORD16 b) {
WORD32 result;
__asm__(
" SMULWB %0, %1, %2 \n\t"
" QADD %0, %0, %0 \n\t"
: "=r"(result)
: "r"(a), "r"(b));
return (result);
}
// multiply WORD32 with WORD32 return bits 62 to 31
static PLATFORM_INLINE WORD32 ixheaacd_mult32_shl(WORD32 a, WORD32 b) {
WORD32 resultHi;
__asm__(
" smull r3, %0, %1, %2 \n\t"
" MOV %0, %0, LSL #1 \n\t"
: "=r"(resultHi)
: "r"(a), "r"(b)
: "cc", "r3");
return resultHi;
}
// multiply WORD32 with WORD32 return bits 63 to 32
static PLATFORM_INLINE WORD32 ixheaacd_mult32(WORD32 a, WORD32 b) {
WORD32 resultHi;
__asm__(
" smull r3, %0, %1, %2 \n\t"
: "=r"(resultHi)
: "r"(a), "r"(b)
: "r3");
return resultHi;
}
// multiply WORD32 with WORD32 return bits 62 to 31 with saturation
static PLATFORM_INLINE WORD32 ixheaacd_mult32_shl_sat(WORD32 a, WORD32 b) {
WORD32 result;
if (a == (WORD32)0x80000000 && b == (WORD32)0x80000000) {
result = 0x7fffffff;
} else {
result = ixheaacd_mult32_shl(a, b);
}
return (result);
}
// multiply WORD32 with WORD16 add bits 47 to 16 to accumulator
static PLATFORM_INLINE WORD32 ixheaacd_mac32x16in32(WORD32 a, WORD32 b,
WORD16 c) {
WORD32 result;
result = a + ixheaacd_mult32x16in32(b, c);
return (result);
}
// multiply WORD32 with WORD16 add bits 46 to 15 to accumulator
static PLATFORM_INLINE WORD32 ixheaacd_mac32x16in32_shl(WORD32 a, WORD32 b,
WORD16 c) {
WORD32 result;
result = a + ixheaacd_mult32x16in32_shl(b, c);
return (result);
}
// multiply WORD32 with WORD16 add bits 46 to 15 to accumulator with saturation
static PLATFORM_INLINE WORD32 mac32x16in32_shl_sat(WORD32 a, WORD32 b,
WORD16 c) {
return (ixheaacd_add32_sat(a, ixheaacd_mult32x16in32_shl_sat(b, c)));
}
// multiply WORD32 with WORD32 add bits 63 to 32 to accumulator
static PLATFORM_INLINE WORD32 ixheaacd_mac32(WORD32 a, WORD32 b, WORD32 c) {
WORD32 result;
result = a + ixheaacd_mult32(b, c);
return (result);
}
// multiply WORD32 with WORD32 add bits 62 to 31 to accumulator
static PLATFORM_INLINE WORD32 mac32_shl(WORD32 a, WORD32 b, WORD32 c) {
WORD32 result;
result = a + ixheaacd_mult32_shl(b, c);
return (result);
}
// multiply WORD32 with WORD32 add bits 62 to 31 to accumulator with saturation
static PLATFORM_INLINE WORD32 mac32_shl_sat(WORD32 a, WORD32 b, WORD32 c) {
return (ixheaacd_add32_sat(a, ixheaacd_mult32_shl_sat(b, c)));
}
// multiply WORD32 with WORD16 sub bits 47 to 16 from accumulator
static PLATFORM_INLINE WORD32 msu32x16in32(WORD32 a, WORD32 b, WORD16 c) {
WORD32 result;
result = a - ixheaacd_mult32x16in32(b, c);
return (result);
}
// multiply WORD32 with WORD16 sub bits 46 to 15 from accumulator
static PLATFORM_INLINE WORD32 msu32x16in32_shl(WORD32 a, WORD32 b, WORD16 c) {
WORD32 result;
result = a - ixheaacd_mult32x16in32_shl(b, c);
return (result);
}
// multiply WORD32 with WORD16 sub bits 46 to 15 from accumulator with
// saturation
static PLATFORM_INLINE WORD32 msu32x16in32_shl_sat(WORD32 a, WORD32 b,
WORD16 c) {
return (ixheaacd_sub32_sat(a, ixheaacd_mult32x16in32_shl_sat(b, c)));
}
// multiply WORD32 with WORD32 sub bits 63 to 32 from accumulator
static PLATFORM_INLINE WORD32 msu32(WORD32 a, WORD32 b, WORD32 c) {
WORD32 result;
result = a - ixheaacd_mult32(b, c);
return (result);
}
// multiply WORD32 with WORD32 sub bits 62 to 31 from accumulator
static PLATFORM_INLINE WORD32 msu32_shl(WORD32 a, WORD32 b, WORD32 c) {
WORD32 result;
result = a - ixheaacd_mult32_shl(b, c);
return (result);
}
// multiply WORD32 with WORD32 sub bits 62 to 31 from accumulator with
// saturation
static PLATFORM_INLINE WORD32 msu32_shl_sat(WORD32 a, WORD32 b, WORD32 c) {
return (ixheaacd_sub32_sat(a, ixheaacd_mult32_shl_sat(b, c)));
}
// returns normalized 32 bit accumulated result
static PLATFORM_INLINE WORD32 mac3216_arr40(WORD32 *x, WORD16 *y,
LOOPINDEX length, WORD16 *q_val) {
LOOPINDEX i;
WORD40 sum = 0;
for (i = 0; i < length; i++) {
sum += (WORD40)(ixheaacd_mult32x16in32(x[i], y[i]));
}
*q_val = norm40(&sum);
return (WORD32)sum;
}
// returns normalized 32 bit accumulated result
static PLATFORM_INLINE WORD32 mac32_arr40(WORD32 *x, WORD32 *y,
LOOPINDEX length, WORD16 *q_val) {
LOOPINDEX i;
WORD40 sum = 0;
for (i = 0; i < length; i++) {
sum += (WORD40)(ixheaacd_mult32(x[i], y[i]));
}
*q_val = norm40(&sum);
return ((WORD32)sum);
}
// returns normalized 32 bit accumulated result
static PLATFORM_INLINE WORD32 mac16_arr40(WORD16 *x, WORD16 *y,
LOOPINDEX length, WORD16 *q_val) {
LOOPINDEX i;
WORD40 sum = 0;
for (i = 0; i < length; i++) {
sum += (WORD40)((WORD32)x[i] * (WORD32)y[i]);
}
*q_val = norm40(&sum);
return ((WORD32)sum);
}
// returns normalized 32 bit accumulated result
static PLATFORM_INLINE WORD32 add32_arr40(WORD32 *in_arr, LOOPINDEX length,
WORD16 *q_val) {
LOOPINDEX i;
WORD40 sum = 0;
for (i = 0; i < length; i++) {
sum += (WORD40)in_arr[i];
}
*q_val = norm40(&sum);
return ((WORD32)sum);
}
// multiply WORD32 with WORD32 return WORD64
static PLATFORM_INLINE WORD64 ixheaacd_mult32x32in64(WORD32 a, WORD32 b) {
WORD64 result;
result = (WORD64)a * (WORD64)b;
return (result);
}
// multiply WORD32 with WORD32 and accumulate the 64 bit result
static PLATFORM_INLINE WORD64 ixheaacd_mac32x32in64(WORD64 sum, WORD32 a,
WORD32 b) {
sum += (WORD64)a * (WORD64)b;
return (sum);
}
static PLATFORM_INLINE WORD64 ixheaacd_mac32x32in64_7(WORD64 sum,
const WORD32 *a,
const WORD16 *b) {
sum = (WORD64)a[0] * (WORD64)b[0];
sum += (WORD64)a[1] * (WORD64)b[1];
sum += (WORD64)a[2] * (WORD64)b[2];
sum += (WORD64)a[3] * (WORD64)b[3];
sum += (WORD64)a[4] * (WORD64)b[4];
sum += (WORD64)a[5] * (WORD64)b[5];
sum += (WORD64)a[6] * (WORD64)b[6];
return (sum);
}
static PLATFORM_INLINE WORD64 ixheaacd_mac32x32in64_n(WORD64 sum,
const WORD32 *a,
const WORD16 *b,
WORD32 n) {
WORD32 k;
sum += (WORD64)a[0] * (WORD64)b[0];
for (k = 1; k < n; k++) sum += (WORD64)a[k] * (WORD64)b[k];
return (sum);
}
static PLATFORM_INLINE WORD64 ixheaacd_mult64(WORD32 a, WORD32 b) {
WORD64 result;
result = (WORD64)a * (WORD64)b;
return (result);
}
static PLATFORM_INLINE WORD64 ixheaacd_add64(WORD64 a, WORD64 b) {
WORD64 result;
result = a + b;
return (result);
}
static PLATFORM_INLINE WORD64 ixheaacd_sub64(WORD64 a, WORD64 b) {
WORD64 diff;
diff = (WORD64)a - (WORD64)b;
return diff;
}
static PLATFORM_INLINE WORD64 ixheaacd_sub64_sat(WORD64 a, WORD64 b) {
WORD64 diff;
diff = ixheaacd_sub64(a, b);
if ((((WORD64)a ^ (WORD64)b) & (WORD64)MIN_64) != 0) {
if (((WORD64)diff ^ (WORD64)a) & (WORD64)MIN_64) {
diff = (a < 0L) ? MIN_64 : MAX_64;
}
}
return (diff);
}
static PLATFORM_INLINE WORD32 ixheaacd_mul32_sh(WORD32 a, WORD32 b,
WORD8 shift) {
WORD32 result;
WORD64 temp_result;
temp_result = (WORD64)a * (WORD64)b;
result = (WORD32)(temp_result >> shift);
return (result);
}
#endif

View file

@ -0,0 +1,109 @@
@/******************************************************************************
@ *
@ * Copyright (C) 2018 The Android Open Source Project
@ *
@ * Licensed under the Apache License, Version 2.0 (the "License");
@ * you may not use this file except in compliance with the License.
@ * You may obtain a copy of the License at:
@ *
@ * http:@www.apache.org/licenses/LICENSE-2.0
@ *
@ * Unless required by applicable law or agreed to in writing, software
@ * distributed under the License is distributed on an "AS IS" BASIS,
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ * See the License for the specific language governing permissions and
@ * limitations under the License.
@ *
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
.text
.p2align 2
.global ixheaacd_calc_post_twid_armv7
ixheaacd_calc_post_twid_armv7:
STMFD sp!, {r4-r12, r14}
VPUSH {D8-D15}
LDR R4, [SP, #104]
LDR R5, [SP, #108]
ADD R6, R0, R3, LSL #3
SUB R6, R6, #4
MOV R7, #-8
MOV R8, #8
LOOP1:
VLD1.32 {D0, D1}, [R4]!
VLD1.32 {D2, D3}, [R5]!
VLD1.32 {D4, D5}, [R1]!
VLD1.32 {D6, D7}, [R2]!
VMULL.S32 Q4, D4, D0
VMULL.S32 Q5, D6, D2
VMULL.S32 Q6, D6, D0
VMULL.S32 Q7, D4, D2
VMULL.S32 Q8, D5, D1
VMULL.S32 Q9, D7, D3
VMULL.S32 Q10, D7, D1
VMULL.S32 Q11, D5, D3
VSHRN.S64 D6, Q4, #32
VSHRN.S64 D8, Q5, #32
VSHRN.S64 D10, Q6, #32
VSHRN.S64 D12, Q7, #32
VSHRN.S64 D7, Q8, #32
VSHRN.S64 D9, Q9, #32
VSHRN.S64 D11, Q10, #32
VSHRN.S64 D13, Q11, #32
VSUB.I32 D0, D6, D8
VADD.I32 D1, D10, D12
VSUB.I32 D2, D7, D9
VADD.I32 D3, D11, D13
VNEG.S32 Q0, Q0
VNEG.S32 Q1, Q1
SUBS R3, R3, #4
VST1.32 {D0[0]}, [R0], R8
VST1.32 {D1[0]}, [R6], R7
VST1.32 {D0[1]}, [R0], R8
VST1.32 {D1[1]}, [R6], R7
VST1.32 {D2[0]}, [R0], R8
VST1.32 {D3[0]}, [R6], R7
VST1.32 {D2[1]}, [R0], R8
VST1.32 {D3[1]}, [R6], R7
BGT LOOP1
VPOP {D8-D15}
LDMFD sp!, {r4-r12, r15}

View file

@ -0,0 +1,107 @@
@/******************************************************************************
@ *
@ * Copyright (C) 2018 The Android Open Source Project
@ *
@ * Licensed under the Apache License, Version 2.0 (the "License");
@ * you may not use this file except in compliance with the License.
@ * You may obtain a copy of the License at:
@ *
@ * http:@www.apache.org/licenses/LICENSE-2.0
@ *
@ * Unless required by applicable law or agreed to in writing, software
@ * distributed under the License is distributed on an "AS IS" BASIS,
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ * See the License for the specific language governing permissions and
@ * limitations under the License.
@ *
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
.text
.p2align 2
.global ixheaacd_calc_pre_twid_armv7
ixheaacd_calc_pre_twid_armv7:
STMFD sp!, {r4-r12, r14}
VPUSH {D8-D15}
LDR R4, [SP, #104]
LDR R5, [SP, #108]
ADD R6, R0, R3, LSL #3
SUB R6, R6, #12
MOV R7, #-16
LOOP1:
VLD1.32 {D0, D1}, [R4]!
VLD1.32 {D2, D3}, [R5]!
VLD2.32 {D4, D5}, [R0]!
VLD2.32 {D6, D7}, [R0]!
VLD2.32 {D8, D9}, [R6], R7
VLD2.32 {D10, D11}, [R6], R7
VREV64.32 D8, D8
VREV64.32 D9, D10
VNEG.S32 D5, D4
VNEG.S32 D7, D6
VMULL.S32 Q6, D0, D5
VMULL.S32 Q7, D2, D8
VMULL.S32 Q8, D0, D8
VMULL.S32 Q9, D2, D4
VMULL.S32 Q10, D1, D7
VMULL.S32 Q11, D9, D3
VMULL.S32 Q12, D1, D9
VMULL.S32 Q13, D3, D6
VSHRN.S64 D12, Q6, #32
VSHRN.S64 D14, Q7, #32
VSHRN.S64 D16, Q8, #32
VSHRN.S64 D18, Q9, #32
VSHRN.S64 D20, Q10, #32
VSHRN.S64 D22, Q11, #32
VSHRN.S64 D24, Q12, #32
VSHRN.S64 D26, Q13, #32
VSUB.I32 D0, D12, D14
VSUB.I32 D2, D16, D18
VSUB.I32 D1, D20, D22
VSUB.I32 D3, D24, D26
SUBS R3, R3, #4
VST1.32 {D0, D1}, [R1]!
VST1.32 {D2, D3}, [R2]!
BGT LOOP1
VPOP {D8-D15}
LDMFD sp!, {r4-r12, r15}

View file

@ -0,0 +1,82 @@
@/******************************************************************************
@ *
@ * Copyright (C) 2018 The Android Open Source Project
@ *
@ * Licensed under the Apache License, Version 2.0 (the "License");
@ * you may not use this file except in compliance with the License.
@ * You may obtain a copy of the License at:
@ *
@ * http://www.apache.org/licenses/LICENSE-2.0
@ *
@ * Unless required by applicable law or agreed to in writing, software
@ * distributed under the License is distributed on an "AS IS" BASIS,
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ * See the License for the specific language governing permissions and
@ * limitations under the License.
@ *
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
.text
.p2align 2
.global ixheaacd_calc_max_spectral_line_armv7
ixheaacd_calc_max_spectral_line_armv7:
STMFD sp!, {R4-R12, R14}
MOV R4, R1, LSR #3
MOV R6, R4, LSL #3
VMOV.S32 D6, #0x00000000
VMOV.S32 D7, #0x00000000
LOOP_1:
VLD1.32 {D0, D1}, [R0]!
VLD1.32 {D2, D3}, [R0]!
VABS.S32 Q0, Q0
VABS.S32 Q1, Q1
SUBS R4, R4, #1
VORR Q3, Q0, Q3
VORR Q3, Q1, Q3
BGT LOOP_1
SUBS R7, R1, R6
VMOV.32 R4, D6[0]
VMOV.32 R1, D6[1]
VMOV.32 R2, D7[0]
ORR R4, R4, R1
VMOV.32 R3, D7[1]
ORR R4, R4, R2
ORR R4, R4, R3
BEQ END_FUNC
LOOP_2:
LDR R2, [R0], #4
MOVS R2, R2
RSBMI R2, R2, #0
ORR R4, R4, R2
SUBS R7, R7, #1
BGT LOOP_2
END_FUNC:
MOVS R0, R4
MVNMI R0, R0
CLZ R0, R0
SUB R0, R0, #1
LDMFD sp!, {R4-R12, R15}

View file

@ -0,0 +1,809 @@
.text
.p2align 2
.global ixheaacd_complex_fft_p2_asm
ixheaacd_complex_fft_p2_asm:
STMFD sp!, {r0-r12, lr}
SUB sp, sp, #0x28
LDR r0, [sp, #0x2c]
@LDR r12,[sp,#0x5c+4]
EOR r0, r0, r0, ASR #31
CLZ r0, r0
SUB r12, r0, #16 @dig_rev_shift = norm32(npoints) + 1 -16@
SUB r0, r0, #1
RSB r0, r0, #0x1e
AND r1, r0, #1
STR r1, [sp, #0x14]
MOV r1, r0, ASR #1
LDR r0, [sp, #0x2c] @npoints
STR r1, [sp, #-4]!
MOV lr, r0, LSL #1 @(npoints >>1) * 4
MOV r0, #0
FIRST_STAGE_R4:
LDR r4, =0x33333333
LDR r5, =0x0F0F0F0F
AND r6, r4, r0
AND r7, r4, r0, LSR #2
ORR r4, r7, r6, LSL #2
AND r6, r5, r4
AND r7, r5, r4, LSR #4
ORR r4, r7, r6, LSL #4
BIC r6, r4, #0x0000FF00
BIC r7, r4, #0x00FF0000
MOV r7, r7, LSR #8
ORR r4, r7, r6, LSL #8
LDR r5, [sp, #0x18]
MOV r10, r4, LSR r12
CMP r5, #0
ADDNE r10, r10, #1
BICNE r10, r10, #1
ADD r1, r2, r10, LSL #2
LDRD r4, [r1] @r4=x0r, r5=x0i
ADD r1, r1, lr
LDRD r8, [r1] @r8=x1r, r9=x1i
ADD r1, r1, lr
LDRD r6, [r1] @r6=x2r, r7=x2i
ADD r1, r1, lr
LDRD r10, [r1] @r10=x3r, r11=x3i
ADD r0, r0, #4
CMP r0, lr, ASR #1
ADD r4, r4, r6 @x0r = x0r + x2r@
ADD r5, r5, r7 @x0i = x0i + x2i@
SUB r6, r4, r6, lsl#1 @x2r = x0r - (x2r << 1)@
SUB r7, r5, r7, lsl#1 @x2i = x0i - (x2i << 1)@
ADD r8, r8, r10 @x1r = x1r + x3r@
ADD r9, r9, r11 @x1i = x1i + x3i@
SUB r1, r8, r10, lsl#1 @x3r = x1r - (x3r << 1)@
SUB r11, r9, r11, lsl#1 @x3i = x1i - (x3i << 1)@
ADD r4, r4, r8 @x0r = x0r + x1r@
ADD r5, r5, r9 @x0i = x0i + x1i@
SUB r8, r4, r8, lsl#1 @x1r = x0r - (x1r << 1)@
SUB r9, r5, r9, lsl#1 @x1i = x0i - (x1i << 1)
ADD r6, r6, r11 @x2r = x2r + x3i@
SUB r7, r7, r1 @x2i = x2i - x3r@
SUB r10, r6, r11, lsl#1 @x3i = x2r - (x3i << 1)@
ADD r11, r7, r1, lsl#1 @x3r = x2i + (x3r << 1)@
STMIA r3!, {r4-r11}
BLT FIRST_STAGE_R4
LDR r1, [sp], #4
LDR r0, [sp, #0x2c]
MOV r12, #0x40 @nodespacing = 64@
STR r12, [sp, #0x1c]
LDR r12, [sp, #0x2c]
SUB r3, r3, r0, LSL #3
SUBS r1, r1, #1
STR r3, [sp, #0x34]
MOV r4, r12, ASR #4
MOV r0, #4
STR r4, [sp, #0x18]
STR r1, [sp, #0x20]
BLE RADIX2
OUTER_LOOP:
LDR r1, [sp, #0x28]
LDR r12, [sp, #0x34] @WORD32 *data = ptr_y@
STR r1, [sp, #0x10]
LDR r1, [sp, #0x18]
MOV r0, r0, LSL #3 @(del<<1) * 4
LOOP_TRIVIAL_TWIDDLE:
LDRD r4, [r12] @r4=x0r, r5=x0i
ADD r12, r12, r0
LDRD r6, [r12] @r6=x1r, r7=x1i
ADD r12, r12, r0
LDRD r8, [r12] @r8=x2r, r9=x2i
ADD r12, r12, r0
LDRD r10, [r12] @r10=x3r, r11=x3i
@MOV r4,r4,ASR #1
@MOV r5,r5,ASR #1
@MOV r6,r6,ASR #1
@MOV r7,r7,ASR #1
@MOV r8,r8,ASR #1
@MOV r9,r9,ASR #1
@MOV r10,r10,ASR #1
@MOV r11,r11,ASR #1
ADD r4, r4, r8 @x0r = x0r + x2r@
ADD r5, r5, r9 @x0i = x0i + x2i@
SUB r8, r4, r8, lsl #1 @x2r = x0r - (x2r << 1)@
SUB r9, r5, r9, lsl #1 @x2i = x0i - (x2i << 1)@
ADD r6, r6, r10 @x1r = x1r + x3r@
ADD r7, r7, r11 @x1i = x1i + x3i@
SUB r2, r6, r10, lsl #1 @x3r = x1r - (x3r << 1)@
SUB r11, r7, r11, lsl #1 @x3i = x1i - (x3i << 1)@
ADD r4, r4, r6 @x0r = x0r + x1r@
ADD r5, r5, r7 @x0i = x0i + x1i@
@MOV r4,r4,ASR #1
@MOV r5,r5,ASR #1
SUB r6, r4, r6, lsl #1 @x1r = x0r - (x1r << 1)@
SUB r7, r5, r7, lsl #1 @x1i = x0i - (x1i << 1)
ADD r8, r8, r11 @x2r = x2r + x3i@
SUB r9, r9, r2 @x2i = x2i - x3r@
SUB r10, r8, r11, lsl#1 @x3i = x2r - (x3i << 1)@
ADD r11, r9, r2, lsl#1 @x3r = x2i + (x3r << 1)
STRD r10, [r12] @r10=x3r, r11=x3i
SUB r12, r12, r0
STRD r6, [r12] @r6=x1r, r7=x1i
SUB r12, r12, r0
STRD r8, [r12] @r8=x2r, r9=x2i
SUB r12, r12, r0
STRD r4, [r12] @r4=x0r, r5=x0i
ADD r12, r12, r0, lsl #2
SUBS r1, r1, #1
BNE LOOP_TRIVIAL_TWIDDLE
MOV r0, r0, ASR #3
LDR r4, [sp, #0x1c]
LDR r3, [sp, #0x34]
MUL r1, r0, r4
ADD r12, r3, #8
STR r1, [sp, #0x24]
MOV r3, r1, ASR #2
ADD r3, r3, r1, ASR #3
SUB r3, r3, r1, ASR #4
ADD r3, r3, r1, ASR #5
SUB r3, r3, r1, ASR #6
ADD r3, r3, r1, ASR #7
SUB r3, r3, r1, ASR #8
STR r3, [sp, #-4]!
SECOND_LOOP:
LDR r3, [sp, #0x10+4]
LDR r14, [sp, #0x18+4]
MOV r0, r0, LSL #3 @(del<<1) * 4
LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@
LDR r2, [r3, #4] @w1l = *(twiddles + 2*j + 1)@
LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@
LDR r6, [r3, #4] @w2l = *(twiddles + 2*(j<<1) + 1)@
LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@
LDR r8, [r3, #4] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@
STR r4, [sp, #8+4]
STR r1, [sp, #-4]
STR r2, [sp, #-8]
STR r5, [sp, #-12]
STR r6, [sp, #-16]
STR r7, [sp, #-20]
STR r8, [sp, #-24]
RADIX4_BFLY:
LDRD r6, [r12, r0]! @r6=x1r, r7=x1i
LDRD r8, [r12, r0]! @r8=x2r, r9=x2i
LDRD r10, [r12, r0] @r10=x3r, r11=x3i
SUBS r14, r14, #1
LDR r1, [sp, #-4]
LDR r2, [sp, #-8]
SMULL r3, r4, r6, r2 @ixheaacd_mult32(x1r,w1l)
LSR r3, r3, #31
ORR r4, r3, r4, LSL#1
SMULL r3, r6, r6, r1 @mult32x16hin32(x1r,W1h)
LSR r3, r3, #31
ORR r6, r3, r6, LSL#1
SMULL r3, r5, r7, r1 @mult32x16hin32(x1i,W1h)
LSR r3, r3, #31
ORR r5, r3, r5, LSL#1
SMULL r3, r7, r7, r2 @ixheaacd_mac32(ixheaacd_mult32(x1r,w1h) ,x1i,w1l)
LSR r3, r3, #31
ORR r7, r3, r7, LSL#1
ADD r7, r7, r6
SUB r6, r4, r5 @
LDR r1, [sp, #-12]
LDR r2, [sp, #-16]
SMULL r3, r4, r8, r2 @ixheaacd_mult32(x2r,w2l)
LSR r3, r3, #31
ORR r4, r3, r4, LSL#1
SMULL r3, r8, r8, r1 @mult32x16hin32(x2r,W2h)
LSR r3, r3, #31
ORR r8, r3, r8, LSL#1
SMULL r3, r5, r9, r1 @mult32x16hin32(x2i,W2h)
LSR r3, r3, #31
ORR r5, r3, r5, LSL#1
SMULL r3, r9, r9, r2 @ixheaacd_mac32(ixheacd_mult32(x1r,w1h) ,x1i,w1l)
LSR r3, r3, #31
ORR r9, r3, r9, LSL#1
ADD r9, r9, r8
SUB r8, r4, r5 @
LDR r1, [sp, #-20]
LDR r2, [sp, #-24]
SMULL r3, r4, r10, r2 @ixheaacd_mult32(x3r,w3l)
LSR r3, r3, #31
ORR r4, r3, r4, LSL#1
SMULL r3, r10, r10, r1 @mult32x16hin32(x3r,W3h)
LSR r3, r3, #31
ORR r10, r3, r10, LSL#1
SMULL r3, r5, r11, r1 @mult32x16hin32(x3i,W3h)
LSR r3, r3, #31
ORR r5, r3, r5, LSL#1
SMULL r3, r11, r11, r2 @ixheaacd_mac32(ixheacd_mult32(x3r,w3h) ,x3i,w3l)
LSR r3, r3, #31
ORR r11, r3, r11, LSL#1
ADD r11, r11, r10
SUB r10, r4, r5 @
@SUB r12,r12,r0,lsl #1
@LDRD r4,[r12] @r4=x0r, r5=x0i
LDR r4, [r12, -r0, lsl #1]! @
LDR r5, [r12, #4]
ADD r4, r8, r4 @x0r = x0r + x2r@
ADD r5, r9, r5 @x0i = x0i + x2i@
SUB r8, r4, r8, lsl#1 @x2r = x0r - (x2r << 1)@
SUB r9, r5, r9, lsl#1 @x2i = x0i - (x2i << 1)@
ADD r6, r6, r10 @x1r = x1r + x3r@
ADD r7, r7, r11 @x1i = x1i + x3i@
SUB r10, r6, r10, lsl#1 @x3r = x1r - (x3r << 1)@
SUB r11, r7, r11, lsl#1 @x3i = x1i - (x3i << 1)@
ADD r4, r4, r6 @x0r = x0r + x1r@
ADD r5, r5, r7 @x0i = x0i + x1i@
SUB r6, r4, r6, lsl#1 @x1r = x0r - (x1r << 1)@
SUB r7, r5, r7, lsl#1 @x1i = x0i - (x1i << 1)
STRD r4, [r12] @r4=x0r, r5=x0i
ADD r12, r12, r0
ADD r8, r8, r11 @x2r = x2r + x3i@
SUB r9, r9, r10 @x2i = x2i - x3r@
SUB r4, r8, r11, lsl#1 @x3i = x2r - (x3i << 1)@
ADD r5, r9, r10, lsl#1 @x3r = x2i + (x3r << 1)
STRD r8, [r12] @r8=x2r, r9=x2i
ADD r12, r12, r0
STRD r6, [r12] @r6=x1r, r7=x1i
ADD r12, r12, r0
STRD r4, [r12] @r10=x3r, r11=x3i
ADD r12, r12, r0
BNE RADIX4_BFLY
MOV r0, r0, ASR #3
LDR r1, [sp, #0x2c+4]
LDR r4, [sp, #8+4]
SUB r1, r12, r1, LSL #3
LDR r6, [sp, #0x1c+4]
ADD r12, r1, #8
LDR r7, [sp, #0]
ADD r4, r4, r6
CMP r4, r7
BLE SECOND_LOOP
SECOND_LOOP_2:
LDR r3, [sp, #0x10+4]
LDR r14, [sp, #0x18+4]
MOV r0, r0, LSL #3 @(del<<1) * 4
LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@
LDR r2, [r3, #4] @w1l = *(twiddles + 2*j + 1)@
LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@
LDR r6, [r3, #4] @w2l = *(twiddles + 2*(j<<1) + 1)@
SUB r3, r3, #2048 @ 512 *4
LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@
LDR r8, [r3, #4] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@
STR r4, [sp, #8+4]
STR r1, [sp, #-4]
STR r2, [sp, #-8]
STR r5, [sp, #-12]
STR r6, [sp, #-16]
STR r7, [sp, #-20]
STR r8, [sp, #-24]
RADIX4_BFLY_2:
LDRD r6, [r12, r0]! @r6=x1r, r7=x1i
LDRD r8, [r12, r0]! @r8=x2r, r9=x2i
LDRD r10, [r12, r0] @r10=x3r, r11=x3i
SUBS r14, r14, #1
LDR r1, [sp, #-4]
LDR r2, [sp, #-8]
SMULL r3, r4, r6, r2 @ixheaacd_mult32(x1r,w1l)
LSR r3, r3, #31
ORR r4, r3, r4, LSL#1
SMULL r3, r6, r6, r1 @mult32x16hin32(x1r,W1h)
LSR r3, r3, #31
ORR r6, r3, r6, LSL#1
SMULL r3, r5, r7, r1 @mult32x16hin32(x1i,W1h)
LSR r3, r3, #31
ORR r5, r3, r5, LSL#1
SMULL r3, r7, r7, r2 @ixheaacd_mac32(ixheaacd_mult32(x1r,w1h) ,x1i,w1l)
LSR r3, r3, #31
ORR r7, r3, r7, LSL#1
ADD r7, r7, r6
SUB r6, r4, r5 @
LDR r1, [sp, #-12]
LDR r2, [sp, #-16]
SMULL r3, r4, r8, r2 @ixheaacd_mult32(x2r,w2l)
LSR r3, r3, #31
ORR r4, r3, r4, LSL#1
SMULL r3, r8, r8, r1 @mult32x16hin32(x2r,W2h)
LSR r3, r3, #31
ORR r8, r3, r8, LSL#1
SMULL r3, r5, r9, r1 @mult32x16hin32(x2i,W2h)
LSR r3, r3, #31
ORR r5, r3, r5, LSL#1
SMULL r3, r9, r9, r2 @ixheaacd_mac32(ixheacd_mult32(x1r,w1h) ,x1i,w1l)
LSR r3, r3, #31
ORR r9, r3, r9, LSL#1
ADD r9, r9, r8
SUB r8, r4, r5 @
LDR r1, [sp, #-20]
LDR r2, [sp, #-24]
SMULL r3, r4, r10, r2 @ixheaacd_mult32(x3r,w3l)
LSR r3, r3, #31
ORR r4, r3, r4, LSL#1
SMULL r3, r10, r10, r1 @mult32x16hin32(x3r,W3h)
LSR r3, r3, #31
ORR r10, r3, r10, LSL#1
SMULL r3, r5, r11, r1 @mult32x16hin32(x3i,W3h)
LSR r3, r3, #31
ORR r5, r3, r5, LSL#1
SMULL r3, r11, r11, r2 @ixheaacd_mac32(ixheacd_mult32(x3r,w3h) ,x3i,w3l)
LSR r3, r3, #31
ORR r11, r3, r11, LSL#1
ADD r10, r11, r10
SUB r11, r5, r4 @
@SUB r12,r12,r0,lsl #1
@LDRD r4,[r12] @r4=x0r, r5=x0i
LDR r4, [r12, -r0, lsl #1]! @
LDR r5, [r12, #4]
ADD r4, r8, r4 @x0r = x0r + x2r@
ADD r5, r9, r5 @x0i = x0i + x2i@
SUB r8, r4, r8, lsl#1 @x2r = x0r - (x2r << 1)@
SUB r9, r5, r9, lsl#1 @x2i = x0i - (x2i << 1)@
ADD r6, r6, r10 @x1r = x1r + x3r@
ADD r7, r7, r11 @x1i = x1i + x3i@
SUB r10, r6, r10, lsl#1 @x3r = x1r - (x3r << 1)@
SUB r11, r7, r11, lsl#1 @x3i = x1i - (x3i << 1)@
ADD r4, r4, r6 @x0r = x0r + x1r@
ADD r5, r5, r7 @x0i = x0i + x1i@
SUB r6, r4, r6, lsl#1 @x1r = x0r - (x1r << 1)@
SUB r7, r5, r7, lsl#1 @x1i = x0i - (x1i << 1)
STRD r4, [r12] @r4=x0r, r5=x0i
ADD r12, r12, r0
ADD r8, r8, r11 @x2r = x2r + x3i@
SUB r9, r9, r10 @x2i = x2i - x3r@
SUB r4, r8, r11, lsl#1 @x3i = x2r - (x3i << 1)@
ADD r5, r9, r10, lsl#1 @x3r = x2i + (x3r << 1)
STRD r8, [r12] @r8=x2r, r9=x2i
ADD r12, r12, r0
STRD r6, [r12] @r6=x1r, r7=x1i
ADD r12, r12, r0
STRD r4, [r12] @r10=x3r, r11=x3i
ADD r12, r12, r0
BNE RADIX4_BFLY_2
MOV r0, r0, ASR #3
LDR r1, [sp, #0x2c+4]
LDR r4, [sp, #8+4]
SUB r1, r12, r1, LSL #3
LDR r6, [sp, #0x1c+4]
ADD r12, r1, #8
LDR r7, [sp, #0x24+4]
ADD r4, r4, r6
CMP r4, r7, ASR #1
BLE SECOND_LOOP_2
LDR r7, [sp, #0]
CMP r4, r7, LSL #1
BGT SECOND_LOOP_4
SECOND_LOOP_3:
LDR r3, [sp, #0x10+4]
LDR r14, [sp, #0x18+4]
MOV r0, r0, LSL #3 @(del<<1) * 4
LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@
LDR r2, [r3, #4] @w1l = *(twiddles + 2*j + 1)@
SUB r3, r3, #2048 @ 512 *4
LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@
LDR r6, [r3, #4] @w2l = *(twiddles + 2*(j<<1) + 1)@
LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@
LDR r8, [r3, #4] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@
STR r4, [sp, #8+4]
STR r1, [sp, #-4]
STR r2, [sp, #-8]
STR r5, [sp, #-12]
STR r6, [sp, #-16]
STR r7, [sp, #-20]
STR r8, [sp, #-24]
RADIX4_BFLY_3:
LDRD r6, [r12, r0]! @r6=x1r, r7=x1i
LDRD r8, [r12, r0]! @r8=x2r, r9=x2i
LDRD r10, [r12, r0] @r10=x3r, r11=x3i
SUBS r14, r14, #1
LDR r1, [sp, #-4]
LDR r2, [sp, #-8]
SMULL r3, r4, r6, r2 @ixheaacd_mult32(x1r,w1l)
LSR r3, r3, #31
ORR r4, r3, r4, LSL#1
SMULL r3, r6, r6, r1 @mult32x16hin32(x1r,W1h)
LSR r3, r3, #31
ORR r6, r3, r6, LSL#1
SMULL r3, r5, r7, r1 @mult32x16hin32(x1i,W1h)
LSR r3, r3, #31
ORR r5, r3, r5, LSL#1
SMULL r3, r7, r7, r2 @ixheaacd_mac32(ixheaacd_mult32(x1r,w1h) ,x1i,w1l)
LSR r3, r3, #31
ORR r7, r3, r7, LSL#1
ADD r7, r7, r6
SUB r6, r4, r5 @
LDR r1, [sp, #-12]
LDR r2, [sp, #-16]
SMULL r3, r4, r8, r2 @ixheaacd_mult32(x2r,w2l)
LSR r3, r3, #31
ORR r4, r3, r4, LSL#1
SMULL r3, r8, r8, r1 @mult32x16hin32(x2r,W2h)
LSR r3, r3, #31
ORR r8, r3, r8, LSL#1
SMULL r3, r5, r9, r1 @mult32x16hin32(x2i,W2h)
LSR r3, r3, #31
ORR r5, r3, r5, LSL#1
SMULL r3, r9, r9, r2 @ixheaacd_mac32(ixheacd_mult32(x1r,w1h) ,x1i,w1l)
LSR r3, r3, #31
ORR r9, r3, r9, LSL#1
ADD r8, r9, r8
SUB r9, r5, r4 @
LDR r1, [sp, #-20]
LDR r2, [sp, #-24]
SMULL r3, r4, r10, r2 @ixheaacd_mult32(x3r,w3l)
LSR r3, r3, #31
ORR r4, r3, r4, LSL#1
SMULL r3, r10, r10, r1 @mult32x16hin32(x3r,W3h)
LSR r3, r3, #31
ORR r10, r3, r10, LSL#1
SMULL r3, r5, r11, r1 @mult32x16hin32(x3i,W3h)
LSR r3, r3, #31
ORR r5, r3, r5, LSL#1
SMULL r3, r11, r11, r2 @ixheaacd_mac32(ixheacd_mult32(x3r,w3h) ,x3i,w3l)
LSR r3, r3, #31
ORR r11, r3, r11, LSL#1
ADD r10, r11, r10
SUB r11, r5, r4 @
@SUB r12,r12,r0,lsl #1
@LDRD r4,[r12] @r4=x0r, r5=x0i
LDR r4, [r12, -r0, lsl #1]! @
LDR r5, [r12, #4]
ADD r4, r8, r4 @x0r = x0r + x2r@
ADD r5, r9, r5 @x0i = x0i + x2i@
SUB r8, r4, r8, lsl#1 @x2r = x0r - (x2r << 1)@
SUB r9, r5, r9, lsl#1 @x2i = x0i - (x2i << 1)@
ADD r6, r6, r10 @x1r = x1r + x3r@
ADD r7, r7, r11 @x1i = x1i + x3i@
SUB r10, r6, r10, lsl#1 @x3r = x1r - (x3r << 1)@
SUB r11, r7, r11, lsl#1 @x3i = x1i - (x3i << 1)@
ADD r4, r4, r6 @x0r = x0r + x1r@
ADD r5, r5, r7 @x0i = x0i + x1i@
SUB r6, r4, r6, lsl#1 @x1r = x0r - (x1r << 1)@
SUB r7, r5, r7, lsl#1 @x1i = x0i - (x1i << 1)
STRD r4, [r12] @r4=x0r, r5=x0i
ADD r12, r12, r0
ADD r8, r8, r11 @x2r = x2r + x3i@
SUB r9, r9, r10 @x2i = x2i - x3r@
SUB r4, r8, r11, lsl#1 @x3i = x2r - (x3i << 1)@
ADD r5, r9, r10, lsl#1 @x3r = x2i + (x3r << 1)
STRD r8, [r12] @r8=x2r, r9=x2i
ADD r12, r12, r0
STRD r6, [r12] @r6=x1r, r7=x1i
ADD r12, r12, r0
STRD r4, [r12] @r10=x3r, r11=x3i
ADD r12, r12, r0
BNE RADIX4_BFLY_3
MOV r0, r0, ASR #3
LDR r1, [sp, #0x2c+4]
LDR r4, [sp, #8+4]
SUB r1, r12, r1, LSL #3
LDR r6, [sp, #0x1c+4]
ADD r12, r1, #8
LDR r7, [sp, #0]
ADD r4, r4, r6
CMP r4, r7, LSL #1
BLE SECOND_LOOP_3
SECOND_LOOP_4:
LDR r3, [sp, #0x10+4]
LDR r14, [sp, #0x18+4]
MOV r0, r0, LSL #3 @(del<<1) * 4
LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@
LDR r2, [r3, #4] @w1l = *(twiddles + 2*j + 1)@
SUB r3, r3, #2048 @ 512 *4
LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@
LDR r6, [r3, #4] @w2l = *(twiddles + 2*(j<<1) + 1)@
SUB r3, r3, #2048 @ 512 *4
LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@
LDR r8, [r3, #4] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@
STR r4, [sp, #8+4]
STR r1, [sp, #-4]
STR r2, [sp, #-8]
STR r5, [sp, #-12]
STR r6, [sp, #-16]
STR r7, [sp, #-20]
STR r8, [sp, #-24]
RADIX4_BFLY_4:
LDRD r6, [r12, r0]! @r6=x1r, r7=x1i
LDRD r8, [r12, r0]! @r8=x2r, r9=x2i
LDRD r10, [r12, r0] @r10=x3r, r11=x3i
SUBS r14, r14, #1
LDR r1, [sp, #-4]
LDR r2, [sp, #-8]
SMULL r3, r4, r6, r2 @ixheaacd_mult32(x1r,w1l)
LSR r3, r3, #31
ORR r4, r3, r4, LSL#1
SMULL r3, r6, r6, r1 @mult32x16hin32(x1r,W1h)
LSR r3, r3, #31
ORR r6, r3, r6, LSL#1
SMULL r3, r5, r7, r1 @mult32x16hin32(x1i,W1h)
LSR r3, r3, #31
ORR r5, r3, r5, LSL#1
SMULL r3, r7, r7, r2 @ixheaacd_mac32(ixheaacd_mult32(x1r,w1h) ,x1i,w1l)
LSR r3, r3, #31
ORR r7, r3, r7, LSL#1
ADD r7, r7, r6
SUB r6, r4, r5 @
LDR r1, [sp, #-12]
LDR r2, [sp, #-16]
SMULL r3, r4, r8, r2 @ixheaacd_mult32(x2r,w2l)
LSR r3, r3, #31
ORR r4, r3, r4, LSL#1
SMULL r3, r8, r8, r1 @mult32x16hin32(x2r,W2h)
LSR r3, r3, #31
ORR r8, r3, r8, LSL#1
SMULL r3, r5, r9, r1 @mult32x16hin32(x2i,W2h)
LSR r3, r3, #31
ORR r5, r3, r5, LSL#1
SMULL r3, r9, r9, r2 @ixheaacd_mac32(ixheacd_mult32(x1r,w1h) ,x1i,w1l)
LSR r3, r3, #31
ORR r9, r3, r9, LSL#1
ADD r8, r9, r8
SUB r9, r5, r4 @
LDR r1, [sp, #-20]
LDR r2, [sp, #-24]
SMULL r3, r4, r10, r2 @ixheaacd_mult32(x3r,w3l)
LSR r3, r3, #31
ORR r4, r3, r4, LSL#1
SMULL r3, r10, r10, r1 @mult32x16hin32(x3r,W3h)
LSR r3, r3, #31
ORR r10, r3, r10, LSL#1
SMULL r3, r5, r11, r1 @mult32x16hin32(x3i,W3h)
LSR r3, r3, #31
ORR r5, r3, r5, LSL#1
SMULL r3, r11, r11, r2 @ixheaacd_mac32(ixheacd_mult32(x3r,w3h) ,x3i,w3l)
LSR r3, r3, #31
ORR r11, r3, r11, LSL#1
ADD r11, r11, r10
SUB r10, r5, r4 @
@SUB r12,r12,r0,lsl #1
@LDRD r4,[r12] @r4=x0r, r5=x0i
LDR r4, [r12, -r0, lsl #1]! @
LDR r5, [r12, #4]
ADD r4, r8, r4 @x0r = x0r + x2r@
ADD r5, r9, r5 @x0i = x0i + x2i@
SUB r8, r4, r8, lsl#1 @x2r = x0r - (x2r << 1)@
SUB r9, r5, r9, lsl#1 @x2i = x0i - (x2i << 1)@
ADD r6, r6, r10 @x1r = x1r + x3r@
SUB r7, r7, r11 @x1i = x1i - x3i@
SUB r10, r6, r10, lsl#1 @x3r = x1r - (x3r << 1)@
ADD r11, r7, r11, lsl#1 @x3i = x1i + (x3i << 1)@
ADD r4, r4, r6 @x0r = x0r + x1r@
ADD r5, r5, r7 @x0i = x0i + x1i@
SUB r6, r4, r6, lsl#1 @x1r = x0r - (x1r << 1)@
SUB r7, r5, r7, lsl#1 @x1i = x0i - (x1i << 1)
STRD r4, [r12] @r4=x0r, r5=x0i
ADD r12, r12, r0
ADD r8, r8, r11 @x2r = x2r + x3i@
SUB r9, r9, r10 @x2i = x2i - x3r@
SUB r4, r8, r11, lsl#1 @x3i = x2r - (x3i << 1)@
ADD r5, r9, r10, lsl#1 @x3r = x2i + (x3r << 1)
STRD r8, [r12] @r8=x2r, r9=x2i
ADD r12, r12, r0
STRD r6, [r12] @r6=x1r, r7=x1i
ADD r12, r12, r0
STRD r4, [r12] @r10=x3r, r11=x3i
ADD r12, r12, r0
BNE RADIX4_BFLY_4
MOV r0, r0, ASR #3
LDR r1, [sp, #0x2c+4]
LDR r4, [sp, #8+4]
SUB r1, r12, r1, LSL #3
LDR r6, [sp, #0x1c+4]
ADD r12, r1, #8
LDR r7, [sp, #0x24+4]
ADD r4, r4, r6
CMP r4, r7
BLT SECOND_LOOP_4
ADD sp, sp, #4
LDR r1, [sp, #0x1c]
MOV r0, r0, LSL #2
MOV r1, r1, ASR #2
STR r1, [sp, #0x1c]
LDR r1, [sp, #0x18]
MOV r1, r1, ASR #2
STR r1, [sp, #0x18]
LDR r1, [sp, #0x20]
SUBS r1, r1, #1
STR r1, [sp, #0x20]
BGT OUTER_LOOP
RADIX2:
LDR r1, [sp, #0x14]
CMP r1, #0
BEQ EXIT
LDR r12, [sp, #0x1c]
LDR r1, [sp, #0x28]
CMP r12, #0
LDRNE r12, [sp, #0x1c]
MOVEQ r4, #1
MOVNE r4, r12, LSL #1
MOVS r3, r0
BEQ EXIT
MOV r3, r3, ASR #1
LDR r5, [sp, #0x34]
MOV r0, r0, LSL #3 @(del<<1) * 4
STR r1, [sp, #-4]
RADIX2_BFLY:
LDR r1, [sp, #-4]
LDRD r6, [r5] @r6 = x0r
ADD r5, r5, r0
LDRD r8, [r5] @r8 = x1r
LDR r2, [r1]
SUBS r3, r3, #1
SMULL r1, r11, r8, r2 @mult32x16hin32(x1r,W1h)
LSR r1, r1, #31
ORR r11, r1, r11, LSL#1
SMULL r1, r10, r9, r2 @mult32x16hin32(x1i,W1h)
LSR r1, r1, #31
ORR r10, r1, r10, LSL#1
LDR r1, [sp, #-4]
LDR r2, [r1, #4]
ADD r1, r1, r4, LSL #3
STR r1, [sp, #-4]
SMULL r1, r8, r8, r2 @ixheaacd_mult32(x1r,w1l)
LSR r1, r1, #31
ORR r8, r1, r8, LSL#1
SMULL r1, r9, r9, r2 @ixheaacd_mac32(ixheacd_mult32(x1r,w1h) ,x1i,w1l)
LSR r1, r1, #31
ORR r9, r1, r9, LSL#1
SUB r8, r8, r10
ADD r9, r9, r11
ADD r10, r8, r6 @(x0r/2) + (x1r/2)
ASR r10, r10, #1
ADD r11, r9, r7 @(x0i/2) + (x1i/2)@
ASR r11, r11, #1
SUB r8, r6, r8 @(x0r/2) - (x1r/2)
ASR r8, r8, #1
SUB r9, r7, r9 @(x0i/2) - (x1i/2)@
ASR r9, r9, #1
STRD r8, [r5]
SUB r5, r5, r0
STRD r10, [r5], #8
BNE RADIX2_BFLY
LDR r1, [sp, #0x28]
MOV r3, r0, ASR #4
STR r1, [sp, #-4]
RADIX2_BFLY_2:
LDR r1, [sp, #-4]
LDRD r6, [r5] @r6 = x0r
ADD r5, r5, r0
LDRD r8, [r5] @r8 = x1r
LDR r2, [r1]
SUBS r3, r3, #1
SMULL r1, r11, r8, r2 @mult32x16hin32(x1r,W1h)
LSR r1, r1, #31
ORR r11, r1, r11, LSL#1
SMULL r1, r10, r9, r2 @mult32x16hin32(x1i,W1h)
LSR r1, r1, #31
ORR r10, r1, r10, LSL#1
LDR r1, [sp, #-4]
LDR r2, [r1, #4]
ADD r1, r1, r4, LSL #3
STR r1, [sp, #-4]
SMULL r1, r8, r8, r2 @ixheaacd_mult32(x1r,w1l)
LSR r1, r1, #31
ORR r8, r1, r8, LSL#1
SMULL r1, r9, r9, r2 @ixheaacd_mac32(ixheacd_mult32(x1r,w1h) ,x1i,w1l)
LSR r1, r1, #31
ORR r9, r1, r9, LSL#1
ADD r11, r11, r9
SUB r9, r10, r8 @
MOV r8, r11
ADD r10, r8, r6 @(x0r>>1) + (x1r)
ASR r10, r10, #1
ADD r11, r9, r7 @(x0i>>1) + (x1i)@
ASR r11, r11, #1
SUB r8, r6, r8 @(x0r>>1) - (x1r)
ASR r8, r8, #1
SUB r9, r7, r9 @(x0i>>1) - (x1i)@
ASR r9, r9, #1
STRD r8, [r5]
SUB r5, r5, r0
STRD r10, [r5], #8
BNE RADIX2_BFLY_2
EXIT:
ADD sp, sp, #0x38
LDMFD sp!, {r4-r12, pc}

View file

@ -0,0 +1,809 @@
.text
.p2align 2
.global ixheaacd_complex_ifft_p2_asm
ixheaacd_complex_ifft_p2_asm:
STMFD sp!, {r0-r12, lr}
SUB sp, sp, #0x28
LDR r0, [sp, #0x2c]
@LDR r12,[sp,#0x5c+4]
EOR r0, r0, r0, ASR #31
CLZ r0, r0
SUB r12, r0, #16 @dig_rev_shift = norm32(npoints) + 1 -16@
SUB r0, r0, #1
RSB r0, r0, #0x1e
AND r1, r0, #1
STR r1, [sp, #0x14]
MOV r1, r0, ASR #1
LDR r0, [sp, #0x2c] @npoints
STR r1, [sp, #-4]!
MOV lr, r0, LSL #1 @(npoints >>1) * 4
MOV r0, #0
FIRST_STAGE_R4:
LDR r4, =0x33333333
LDR r5, =0x0F0F0F0F
AND r6, r4, r0
AND r7, r4, r0, LSR #2
ORR r4, r7, r6, LSL #2
AND r6, r5, r4
AND r7, r5, r4, LSR #4
ORR r4, r7, r6, LSL #4
BIC r6, r4, #0x0000FF00
BIC r7, r4, #0x00FF0000
MOV r7, r7, LSR #8
ORR r4, r7, r6, LSL #8
LDR r5, [sp, #0x18]
MOV r10, r4, LSR r12
CMP r5, #0
ADDNE r10, r10, #1
BICNE r10, r10, #1
ADD r1, r2, r10, LSL #2
LDRD r4, [r1] @r4=x0r, r5=x0i
ADD r1, r1, lr
LDRD r8, [r1] @r8=x1r, r9=x1i
ADD r1, r1, lr
LDRD r6, [r1] @r6=x2r, r7=x2i
ADD r1, r1, lr
LDRD r10, [r1] @r10=x3r, r11=x3i
ADD r0, r0, #4
CMP r0, lr, ASR #1
ADD r4, r4, r6 @x0r = x0r + x2r@
ADD r5, r5, r7 @x0i = x0i + x2i@
SUB r6, r4, r6, lsl#1 @x2r = x0r - (x2r << 1)@
SUB r7, r5, r7, lsl#1 @x2i = x0i - (x2i << 1)@
ADD r8, r8, r10 @x1r = x1r + x3r@
ADD r9, r9, r11 @x1i = x1i + x3i@
SUB r1, r8, r10, lsl#1 @x3r = x1r - (x3r << 1)@
SUB r11, r9, r11, lsl#1 @x3i = x1i - (x3i << 1)@
ADD r4, r4, r8 @x0r = x0r + x1r@
ADD r5, r5, r9 @x0i = x0i + x1i@
SUB r8, r4, r8, lsl#1 @x1r = x0r - (x1r << 1)@
SUB r9, r5, r9, lsl#1 @x1i = x0i - (x1i << 1)
SUB r6, r6, r11 @x2r = x2r - x3i@
ADD r7, r7, r1 @x2i = x2i + x3r@
ADD r10, r6, r11, lsl#1 @x3i = x2r + (x3i << 1)@
SUB r11, r7, r1, lsl#1 @x3r = x2i - (x3r << 1)@
STMIA r3!, {r4-r11}
BLT FIRST_STAGE_R4
LDR r1, [sp], #4
LDR r0, [sp, #0x2c]
MOV r12, #0x40 @nodespacing = 64@
STR r12, [sp, #0x1c]
LDR r12, [sp, #0x2c]
SUB r3, r3, r0, LSL #3
SUBS r1, r1, #1
STR r3, [sp, #0x34]
MOV r4, r12, ASR #4
MOV r0, #4
STR r4, [sp, #0x18]
STR r1, [sp, #0x20]
BLE RADIX2
OUTER_LOOP:
LDR r1, [sp, #0x28]
LDR r12, [sp, #0x34] @WORD32 *data = ptr_y@
STR r1, [sp, #0x10]
LDR r1, [sp, #0x18]
MOV r0, r0, LSL #3 @(del<<1) * 4
LOOP_TRIVIAL_TWIDDLE:
LDRD r4, [r12] @r4=x0r, r5=x0i
ADD r12, r12, r0
LDRD r6, [r12] @r6=x1r, r7=x1i
ADD r12, r12, r0
LDRD r8, [r12] @r8=x2r, r9=x2i
ADD r12, r12, r0
LDRD r10, [r12] @r10=x3r, r11=x3i
@MOV r4,r4,ASR #1
@MOV r5,r5,ASR #1
@MOV r6,r6,ASR #1
@MOV r7,r7,ASR #1
@MOV r8,r8,ASR #1
@MOV r9,r9,ASR #1
@MOV r10,r10,ASR #1
@MOV r11,r11,ASR #1
ADD r4, r4, r8 @x0r = x0r + x2r@
ADD r5, r5, r9 @x0i = x0i + x2i@
SUB r8, r4, r8, lsl #1 @x2r = x0r - (x2r << 1)@
SUB r9, r5, r9, lsl #1 @x2i = x0i - (x2i << 1)@
ADD r6, r6, r10 @x1r = x1r + x3r@
ADD r7, r7, r11 @x1i = x1i + x3i@
SUB r2, r6, r10, lsl #1 @x3r = x1r - (x3r << 1)@
SUB r11, r7, r11, lsl #1 @x3i = x1i - (x3i << 1)@
ADD r4, r4, r6 @x0r = x0r + x1r@
ADD r5, r5, r7 @x0i = x0i + x1i@
@MOV r4,r4,ASR #1
@MOV r5,r5,ASR #1
SUB r6, r4, r6, lsl #1 @x1r = x0r - (x1r << 1)@
SUB r7, r5, r7, lsl #1 @x1i = x0i - (x1i << 1)
SUB r8, r8, r11 @x2r = x2r - x3i@
ADD r9, r9, r2 @x2i = x2i + x3r@
ADD r10, r8, r11, lsl#1 @x3i = x2r + (x3i << 1)@
SUB r11, r9, r2, lsl#1 @x3r = x2i - (x3r << 1)
STRD r10, [r12] @r10=x3r, r11=x3i
SUB r12, r12, r0
STRD r6, [r12] @r6=x1r, r7=x1i
SUB r12, r12, r0
STRD r8, [r12] @r8=x2r, r9=x2i
SUB r12, r12, r0
STRD r4, [r12] @r4=x0r, r5=x0i
ADD r12, r12, r0, lsl #2
SUBS r1, r1, #1
BNE LOOP_TRIVIAL_TWIDDLE
MOV r0, r0, ASR #3
LDR r4, [sp, #0x1c]
LDR r3, [sp, #0x34]
MUL r1, r0, r4
ADD r12, r3, #8
STR r1, [sp, #0x24]
MOV r3, r1, ASR #2
ADD r3, r3, r1, ASR #3
SUB r3, r3, r1, ASR #4
ADD r3, r3, r1, ASR #5
SUB r3, r3, r1, ASR #6
ADD r3, r3, r1, ASR #7
SUB r3, r3, r1, ASR #8
STR r3, [sp, #-4]!
SECOND_LOOP:
LDR r3, [sp, #0x10+4]
LDR r14, [sp, #0x18+4]
MOV r0, r0, LSL #3 @(del<<1) * 4
LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@
LDR r2, [r3, #4] @w1l = *(twiddles + 2*j + 1)@
LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@
LDR r6, [r3, #4] @w2l = *(twiddles + 2*(j<<1) + 1)@
LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@
LDR r8, [r3, #4] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@
STR r4, [sp, #8+4]
STR r1, [sp, #-4]
STR r2, [sp, #-8]
STR r5, [sp, #-12]
STR r6, [sp, #-16]
STR r7, [sp, #-20]
STR r8, [sp, #-24]
RADIX4_BFLY:
LDRD r6, [r12, r0]! @r6=x1r, r7=x1i
LDRD r8, [r12, r0]! @r8=x2r, r9=x2i
LDRD r10, [r12, r0] @r10=x3r, r11=x3i
SUBS r14, r14, #1
LDR r1, [sp, #-4]
LDR r2, [sp, #-8]
SMULL r3, r4, r6, r2 @ixheaacd_mult32(x1r,w1l)
LSR r3, r3, #31
ORR r4, r3, r4, LSL#1
SMULL r3, r6, r6, r1 @mult32x16hin32(x1r,W1h)
LSR r3, r3, #31
ORR r6, r3, r6, LSL#1
SMULL r3, r5, r7, r1 @mult32x16hin32(x1i,W1h)
LSR r3, r3, #31
ORR r5, r3, r5, LSL#1
SMULL r3, r7, r7, r2 @ixheaacd_mac32(ixheaacd_mult32(x1r,w1h) ,x1i,w1l)
LSR r3, r3, #31
ORR r7, r3, r7, LSL#1
SUB r7, r7, r6
ADD r6, r4, r5 @
LDR r1, [sp, #-12]
LDR r2, [sp, #-16]
SMULL r3, r4, r8, r2 @ixheaacd_mult32(x2r,w2l)
LSR r3, r3, #31
ORR r4, r3, r4, LSL#1
SMULL r3, r8, r8, r1 @mult32x16hin32(x2r,W2h)
LSR r3, r3, #31
ORR r8, r3, r8, LSL#1
SMULL r3, r5, r9, r1 @mult32x16hin32(x2i,W2h)
LSR r3, r3, #31
ORR r5, r3, r5, LSL#1
SMULL r3, r9, r9, r2 @ixheaacd_mac32(ixheacd_mult32(x1r,w1h) ,x1i,w1l)
LSR r3, r3, #31
ORR r9, r3, r9, LSL#1
SUB r9, r9, r8
ADD r8, r4, r5 @
LDR r1, [sp, #-20]
LDR r2, [sp, #-24]
SMULL r3, r4, r10, r2 @ixheaacd_mult32(x3r,w3l)
LSR r3, r3, #31
ORR r4, r3, r4, LSL#1
SMULL r3, r10, r10, r1 @mult32x16hin32(x3r,W3h)
LSR r3, r3, #31
ORR r10, r3, r10, LSL#1
SMULL r3, r5, r11, r1 @mult32x16hin32(x3i,W3h)
LSR r3, r3, #31
ORR r5, r3, r5, LSL#1
SMULL r3, r11, r11, r2 @ixheaacd_mac32(ixheacd_mult32(x3r,w3h) ,x3i,w3l)
LSR r3, r3, #31
ORR r11, r3, r11, LSL#1
SUB r11, r11, r10
ADD r10, r4, r5 @
@SUB r12,r12,r0,lsl #1
@LDRD r4,[r12] @r4=x0r, r5=x0i
LDR r4, [r12, -r0, lsl #1]! @
LDR r5, [r12, #4]
ADD r4, r8, r4 @x0r = x0r + x2r@
ADD r5, r9, r5 @x0i = x0i + x2i@
SUB r8, r4, r8, lsl#1 @x2r = x0r - (x2r << 1)@
SUB r9, r5, r9, lsl#1 @x2i = x0i - (x2i << 1)@
ADD r6, r6, r10 @x1r = x1r + x3r@
ADD r7, r7, r11 @x1i = x1i + x3i@
SUB r10, r6, r10, lsl#1 @x3r = x1r - (x3r << 1)@
SUB r11, r7, r11, lsl#1 @x3i = x1i - (x3i << 1)@
ADD r4, r4, r6 @x0r = x0r + x1r@
ADD r5, r5, r7 @x0i = x0i + x1i@
SUB r6, r4, r6, lsl#1 @x1r = x0r - (x1r << 1)@
SUB r7, r5, r7, lsl#1 @x1i = x0i - (x1i << 1)
STRD r4, [r12] @r4=x0r, r5=x0i
ADD r12, r12, r0
SUB r8, r8, r11 @x2r = x2r - x3i@
ADD r9, r9, r10 @x2i = x2i + x3r@
ADD r4, r8, r11, lsl#1 @x3i = x2r + (x3i << 1)@
SUB r5, r9, r10, lsl#1 @x3r = x2i - (x3r << 1)
STRD r8, [r12] @r8=x2r, r9=x2i
ADD r12, r12, r0
STRD r6, [r12] @r6=x1r, r7=x1i
ADD r12, r12, r0
STRD r4, [r12] @r10=x3r, r11=x3i
ADD r12, r12, r0
BNE RADIX4_BFLY
MOV r0, r0, ASR #3
LDR r1, [sp, #0x2c+4]
LDR r4, [sp, #8+4]
SUB r1, r12, r1, LSL #3
LDR r6, [sp, #0x1c+4]
ADD r12, r1, #8
LDR r7, [sp, #0]
ADD r4, r4, r6
CMP r4, r7
BLE SECOND_LOOP
SECOND_LOOP_2:
LDR r3, [sp, #0x10+4]
LDR r14, [sp, #0x18+4]
MOV r0, r0, LSL #3 @(del<<1) * 4
LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@
LDR r2, [r3, #4] @w1l = *(twiddles + 2*j + 1)@
LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@
LDR r6, [r3, #4] @w2l = *(twiddles + 2*(j<<1) + 1)@
SUB r3, r3, #2048 @ 512 *4
LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@
LDR r8, [r3, #4] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@
STR r4, [sp, #8+4]
STR r1, [sp, #-4]
STR r2, [sp, #-8]
STR r5, [sp, #-12]
STR r6, [sp, #-16]
STR r7, [sp, #-20]
STR r8, [sp, #-24]
RADIX4_BFLY_2:
LDRD r6, [r12, r0]! @r6=x1r, r7=x1i
LDRD r8, [r12, r0]! @r8=x2r, r9=x2i
LDRD r10, [r12, r0] @r10=x3r, r11=x3i
SUBS r14, r14, #1
LDR r1, [sp, #-4]
LDR r2, [sp, #-8]
SMULL r3, r4, r6, r2 @ixheaacd_mult32(x1r,w1l)
LSR r3, r3, #31
ORR r4, r3, r4, LSL#1
SMULL r3, r6, r6, r1 @mult32x16hin32(x1r,W1h)
LSR r3, r3, #31
ORR r6, r3, r6, LSL#1
SMULL r3, r5, r7, r1 @mult32x16hin32(x1i,W1h)
LSR r3, r3, #31
ORR r5, r3, r5, LSL#1
SMULL r3, r7, r7, r2 @ixheaacd_mac32(ixheaacd_mult32(x1r,w1h) ,x1i,w1l)
LSR r3, r3, #31
ORR r7, r3, r7, LSL#1
SUB r7, r7, r6
ADD r6, r4, r5 @
LDR r1, [sp, #-12]
LDR r2, [sp, #-16]
SMULL r3, r4, r8, r2 @ixheaacd_mult32(x2r,w2l)
LSR r3, r3, #31
ORR r4, r3, r4, LSL#1
SMULL r3, r8, r8, r1 @mult32x16hin32(x2r,W2h)
LSR r3, r3, #31
ORR r8, r3, r8, LSL#1
SMULL r3, r5, r9, r1 @mult32x16hin32(x2i,W2h)
LSR r3, r3, #31
ORR r5, r3, r5, LSL#1
SMULL r3, r9, r9, r2 @ixheaacd_mac32(ixheacd_mult32(x1r,w1h) ,x1i,w1l)
LSR r3, r3, #31
ORR r9, r3, r9, LSL#1
SUB r9, r9, r8
ADD r8, r4, r5 @
LDR r1, [sp, #-20]
LDR r2, [sp, #-24]
SMULL r3, r4, r10, r2 @ixheaacd_mult32(x3r,w3l)
LSR r3, r3, #31
ORR r4, r3, r4, LSL#1
SMULL r3, r10, r10, r1 @mult32x16hin32(x3r,W3h)
LSR r3, r3, #31
ORR r10, r3, r10, LSL#1
SMULL r3, r5, r11, r1 @mult32x16hin32(x3i,W3h)
LSR r3, r3, #31
ORR r5, r3, r5, LSL#1
SMULL r3, r11, r11, r2 @ixheaacd_mac32(ixheacd_mult32(x3r,w3h) ,x3i,w3l)
LSR r3, r3, #31
ORR r11, r3, r11, LSL#1
SUB r10, r10, r11
ADD r11, r5, r4 @
@SUB r12,r12,r0,lsl #1
@LDRD r4,[r12] @r4=x0r, r5=x0i
LDR r4, [r12, -r0, lsl #1]! @
LDR r5, [r12, #4]
ADD r4, r8, r4 @x0r = x0r + x2r@
ADD r5, r9, r5 @x0i = x0i + x2i@
SUB r8, r4, r8, lsl#1 @x2r = x0r - (x2r << 1)@
SUB r9, r5, r9, lsl#1 @x2i = x0i - (x2i << 1)@
ADD r6, r6, r10 @x1r = x1r + x3r@
ADD r7, r7, r11 @x1i = x1i + x3i@
SUB r10, r6, r10, lsl#1 @x3r = x1r - (x3r << 1)@
SUB r11, r7, r11, lsl#1 @x3i = x1i - (x3i << 1)@
ADD r4, r4, r6 @x0r = x0r + x1r@
ADD r5, r5, r7 @x0i = x0i + x1i@
SUB r6, r4, r6, lsl#1 @x1r = x0r - (x1r << 1)@
SUB r7, r5, r7, lsl#1 @x1i = x0i - (x1i << 1)
STRD r4, [r12] @r4=x0r, r5=x0i
ADD r12, r12, r0
SUB r8, r8, r11 @x2r = x2r - x3i@
ADD r9, r9, r10 @x2i = x2i + x3r@
ADD r4, r8, r11, lsl#1 @x3i = x2r + (x3i << 1)@
SUB r5, r9, r10, lsl#1 @x3r = x2i - (x3r << 1)
STRD r8, [r12] @r8=x2r, r9=x2i
ADD r12, r12, r0
STRD r6, [r12] @r6=x1r, r7=x1i
ADD r12, r12, r0
STRD r4, [r12] @r10=x3r, r11=x3i
ADD r12, r12, r0
BNE RADIX4_BFLY_2
MOV r0, r0, ASR #3
LDR r1, [sp, #0x2c+4]
LDR r4, [sp, #8+4]
SUB r1, r12, r1, LSL #3
LDR r6, [sp, #0x1c+4]
ADD r12, r1, #8
LDR r7, [sp, #0x24+4]
ADD r4, r4, r6
CMP r4, r7, ASR #1
BLE SECOND_LOOP_2
LDR r7, [sp, #0]
CMP r4, r7, LSL #1
BGT SECOND_LOOP_4
SECOND_LOOP_3:
LDR r3, [sp, #0x10+4]
LDR r14, [sp, #0x18+4]
MOV r0, r0, LSL #3 @(del<<1) * 4
LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@
LDR r2, [r3, #4] @w1l = *(twiddles + 2*j + 1)@
SUB r3, r3, #2048 @ 512 *4
LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@
LDR r6, [r3, #4] @w2l = *(twiddles + 2*(j<<1) + 1)@
LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@
LDR r8, [r3, #4] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@
STR r4, [sp, #8+4]
STR r1, [sp, #-4]
STR r2, [sp, #-8]
STR r5, [sp, #-12]
STR r6, [sp, #-16]
STR r7, [sp, #-20]
STR r8, [sp, #-24]
RADIX4_BFLY_3:
LDRD r6, [r12, r0]! @r6=x1r, r7=x1i
LDRD r8, [r12, r0]! @r8=x2r, r9=x2i
LDRD r10, [r12, r0] @r10=x3r, r11=x3i
SUBS r14, r14, #1
LDR r1, [sp, #-4]
LDR r2, [sp, #-8]
SMULL r3, r4, r6, r2 @ixheaacd_mult32(x1r,w1l)
LSR r3, r3, #31
ORR r4, r3, r4, LSL#1
SMULL r3, r6, r6, r1 @mult32x16hin32(x1r,W1h)
LSR r3, r3, #31
ORR r6, r3, r6, LSL#1
SMULL r3, r5, r7, r1 @mult32x16hin32(x1i,W1h)
LSR r3, r3, #31
ORR r5, r3, r5, LSL#1
SMULL r3, r7, r7, r2 @ixheaacd_mac32(ixheaacd_mult32(x1r,w1h) ,x1i,w1l)
LSR r3, r3, #31
ORR r7, r3, r7, LSL#1
SUB r7, r7, r6
ADD r6, r4, r5 @
LDR r1, [sp, #-12]
LDR r2, [sp, #-16]
SMULL r3, r4, r8, r2 @ixheaacd_mult32(x2r,w2l)
LSR r3, r3, #31
ORR r4, r3, r4, LSL#1
SMULL r3, r8, r8, r1 @mult32x16hin32(x2r,W2h)
LSR r3, r3, #31
ORR r8, r3, r8, LSL#1
SMULL r3, r5, r9, r1 @mult32x16hin32(x2i,W2h)
LSR r3, r3, #31
ORR r5, r3, r5, LSL#1
SMULL r3, r9, r9, r2 @ixheaacd_mac32(ixheacd_mult32(x1r,w1h) ,x1i,w1l)
LSR r3, r3, #31
ORR r9, r3, r9, LSL#1
SUB r8, r8, r9
ADD r9, r5, r4 @
LDR r1, [sp, #-20]
LDR r2, [sp, #-24]
SMULL r3, r4, r10, r2 @ixheaacd_mult32(x3r,w3l)
LSR r3, r3, #31
ORR r4, r3, r4, LSL#1
SMULL r3, r10, r10, r1 @mult32x16hin32(x3r,W3h)
LSR r3, r3, #31
ORR r10, r3, r10, LSL#1
SMULL r3, r5, r11, r1 @mult32x16hin32(x3i,W3h)
LSR r3, r3, #31
ORR r5, r3, r5, LSL#1
SMULL r3, r11, r11, r2 @ixheaacd_mac32(ixheacd_mult32(x3r,w3h) ,x3i,w3l)
LSR r3, r3, #31
ORR r11, r3, r11, LSL#1
SUB r10, r10, r11
ADD r11, r5, r4 @
@SUB r12,r12,r0,lsl #1
@LDRD r4,[r12] @r4=x0r, r5=x0i
LDR r4, [r12, -r0, lsl #1]! @
LDR r5, [r12, #4]
ADD r4, r8, r4 @x0r = x0r + x2r@
ADD r5, r9, r5 @x0i = x0i + x2i@
SUB r8, r4, r8, lsl#1 @x2r = x0r - (x2r << 1)@
SUB r9, r5, r9, lsl#1 @x2i = x0i - (x2i << 1)@
ADD r6, r6, r10 @x1r = x1r + x3r@
ADD r7, r7, r11 @x1i = x1i + x3i@
SUB r10, r6, r10, lsl#1 @x3r = x1r - (x3r << 1)@
SUB r11, r7, r11, lsl#1 @x3i = x1i - (x3i << 1)@
ADD r4, r4, r6 @x0r = x0r + x1r@
ADD r5, r5, r7 @x0i = x0i + x1i@
SUB r6, r4, r6, lsl#1 @x1r = x0r - (x1r << 1)@
SUB r7, r5, r7, lsl#1 @x1i = x0i - (x1i << 1)
STRD r4, [r12] @r4=x0r, r5=x0i
ADD r12, r12, r0
SUB r8, r8, r11 @x2r = x2r - x3i@
ADD r9, r9, r10 @x2i = x2i + x3r@
ADD r4, r8, r11, lsl#1 @x3i = x2r + (x3i << 1)@
SUB r5, r9, r10, lsl#1 @x3r = x2i - (x3r << 1)
STRD r8, [r12] @r8=x2r, r9=x2i
ADD r12, r12, r0
STRD r6, [r12] @r6=x1r, r7=x1i
ADD r12, r12, r0
STRD r4, [r12] @r10=x3r, r11=x3i
ADD r12, r12, r0
BNE RADIX4_BFLY_3
MOV r0, r0, ASR #3
LDR r1, [sp, #0x2c+4]
LDR r4, [sp, #8+4]
SUB r1, r12, r1, LSL #3
LDR r6, [sp, #0x1c+4]
ADD r12, r1, #8
LDR r7, [sp, #0]
ADD r4, r4, r6
CMP r4, r7, LSL #1
BLE SECOND_LOOP_3
SECOND_LOOP_4:
LDR r3, [sp, #0x10+4]
LDR r14, [sp, #0x18+4]
MOV r0, r0, LSL #3 @(del<<1) * 4
LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@
LDR r2, [r3, #4] @w1l = *(twiddles + 2*j + 1)@
SUB r3, r3, #2048 @ 512 *4
LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@
LDR r6, [r3, #4] @w2l = *(twiddles + 2*(j<<1) + 1)@
SUB r3, r3, #2048 @ 512 *4
LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@
LDR r8, [r3, #4] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@
STR r4, [sp, #8+4]
STR r1, [sp, #-4]
STR r2, [sp, #-8]
STR r5, [sp, #-12]
STR r6, [sp, #-16]
STR r7, [sp, #-20]
STR r8, [sp, #-24]
RADIX4_BFLY_4:
LDRD r6, [r12, r0]! @r6=x1r, r7=x1i
LDRD r8, [r12, r0]! @r8=x2r, r9=x2i
LDRD r10, [r12, r0] @r10=x3r, r11=x3i
SUBS r14, r14, #1
LDR r1, [sp, #-4]
LDR r2, [sp, #-8]
SMULL r3, r4, r6, r2 @ixheaacd_mult32(x1r,w1l)
LSR r3, r3, #31
ORR r4, r3, r4, LSL#1
SMULL r3, r6, r6, r1 @mult32x16hin32(x1r,W1h)
LSR r3, r3, #31
ORR r6, r3, r6, LSL#1
SMULL r3, r5, r7, r1 @mult32x16hin32(x1i,W1h)
LSR r3, r3, #31
ORR r5, r3, r5, LSL#1
SMULL r3, r7, r7, r2 @ixheaacd_mac32(ixheaacd_mult32(x1r,w1h) ,x1i,w1l)
LSR r3, r3, #31
ORR r7, r3, r7, LSL#1
SUB r7, r7, r6
ADD r6, r4, r5 @
LDR r1, [sp, #-12]
LDR r2, [sp, #-16]
SMULL r3, r4, r8, r2 @ixheaacd_mult32(x2r,w2l)
LSR r3, r3, #31
ORR r4, r3, r4, LSL#1
SMULL r3, r8, r8, r1 @mult32x16hin32(x2r,W2h)
LSR r3, r3, #31
ORR r8, r3, r8, LSL#1
SMULL r3, r5, r9, r1 @mult32x16hin32(x2i,W2h)
LSR r3, r3, #31
ORR r5, r3, r5, LSL#1
SMULL r3, r9, r9, r2 @ixheaacd_mac32(ixheacd_mult32(x1r,w1h) ,x1i,w1l)
LSR r3, r3, #31
ORR r9, r3, r9, LSL#1
SUB r8, r8, r9
ADD r9, r5, r4 @
LDR r1, [sp, #-20]
LDR r2, [sp, #-24]
SMULL r3, r4, r10, r2 @ixheaacd_mult32(x3r,w3l)
LSR r3, r3, #31
ORR r4, r3, r4, LSL#1
SMULL r3, r10, r10, r1 @mult32x16hin32(x3r,W3h)
LSR r3, r3, #31
ORR r10, r3, r10, LSL#1
SMULL r3, r5, r11, r1 @mult32x16hin32(x3i,W3h)
LSR r3, r3, #31
ORR r5, r3, r5, LSL#1
SMULL r3, r11, r11, r2 @ixheaacd_mac32(ixheacd_mult32(x3r,w3h) ,x3i,w3l)
LSR r3, r3, #31
ORR r11, r3, r11, LSL#1
SUB r11, r11, r10
ADD r10, r5, r4 @
RSB r10, r10, #0
@SUB r12,r12,r0,lsl #1
@LDRD r4,[r12] @r4=x0r, r5=x0i
LDR r4, [r12, -r0, lsl #1]! @
LDR r5, [r12, #4]
ADD r4, r8, r4 @x0r = x0r + x2r@
ADD r5, r9, r5 @x0i = x0i + x2i@
SUB r8, r4, r8, lsl#1 @x2r = x0r - (x2r << 1)@
SUB r9, r5, r9, lsl#1 @x2i = x0i - (x2i << 1)@
ADD r6, r6, r10 @x1r = x1r + x3r@
SUB r7, r7, r11 @x1i = x1i - x3i@
SUB r10, r6, r10, lsl#1 @x3r = x1r - (x3r << 1)@
ADD r11, r7, r11, lsl#1 @x3i = x1i + (x3i << 1)@
ADD r4, r4, r6 @x0r = x0r + x1r@
ADD r5, r5, r7 @x0i = x0i + x1i@
SUB r6, r4, r6, lsl#1 @x1r = x0r - (x1r << 1)@
SUB r7, r5, r7, lsl#1 @x1i = x0i - (x1i << 1)
STRD r4, [r12] @r4=x0r, r5=x0i
ADD r12, r12, r0
SUB r8, r8, r11 @x2r = x2r - x3i@
ADD r9, r9, r10 @x2i = x2i + x3r@
ADD r4, r8, r11, lsl#1 @x3i = x2r + (x3i << 1)@
SUB r5, r9, r10, lsl#1 @x3r = x2i - (x3r << 1)
STRD r8, [r12] @r8=x2r, r9=x2i
ADD r12, r12, r0
STRD r6, [r12] @r6=x1r, r7=x1i
ADD r12, r12, r0
STRD r4, [r12] @r10=x3r, r11=x3i
ADD r12, r12, r0
BNE RADIX4_BFLY_4
MOV r0, r0, ASR #3
LDR r1, [sp, #0x2c+4]
LDR r4, [sp, #8+4]
SUB r1, r12, r1, LSL #3
LDR r6, [sp, #0x1c+4]
ADD r12, r1, #8
LDR r7, [sp, #0x24+4]
ADD r4, r4, r6
CMP r4, r7
BLT SECOND_LOOP_4
ADD sp, sp, #4
LDR r1, [sp, #0x1c]
MOV r0, r0, LSL #2
MOV r1, r1, ASR #2
STR r1, [sp, #0x1c]
LDR r1, [sp, #0x18]
MOV r1, r1, ASR #2
STR r1, [sp, #0x18]
LDR r1, [sp, #0x20]
SUBS r1, r1, #1
STR r1, [sp, #0x20]
BGT OUTER_LOOP
RADIX2:
LDR r1, [sp, #0x14]
CMP r1, #0
BEQ EXIT
LDR r12, [sp, #0x1c]
LDR r1, [sp, #0x28]
CMP r12, #0
LDRNE r12, [sp, #0x1c]
MOVEQ r4, #1
MOVNE r4, r12, LSL #1
MOVS r3, r0
BEQ EXIT
MOV r3, r3, ASR #1
LDR r5, [sp, #0x34]
MOV r0, r0, LSL #3 @(del<<1) * 4
STR r1, [sp, #-4]
RADIX2_BFLY:
LDR r1, [sp, #-4]
LDRD r6, [r5] @r6 = x0r
ADD r5, r5, r0
LDRD r8, [r5] @r8 = x1r
LDR r2, [r1]
SUBS r3, r3, #1
SMULL r1, r11, r8, r2 @mult32x16hin32(x1r,W1h)
LSR r1, r1, #31
ORR r11, r1, r11, LSL#1
SMULL r1, r10, r9, r2 @mult32x16hin32(x1i,W1h)
LSR r1, r1, #31
ORR r10, r1, r10, LSL#1
LDR r1, [sp, #-4]
LDR r2, [r1, #4]
ADD r1, r1, r4, LSL #3
STR r1, [sp, #-4]
SMULL r1, r8, r8, r2 @ixheaacd_mult32(x1r,w1l)
LSR r1, r1, #31
ORR r8, r1, r8, LSL#1
SMULL r1, r9, r9, r2 @ixheaacd_mac32(ixheacd_mult32(x1r,w1h) ,x1i,w1l)
LSR r1, r1, #31
ORR r9, r1, r9, LSL#1
ADD r8, r8, r10
SUB r9, r9, r11
ASR r8, r8, #1
ASR r6, r6, #1
ASR r9, r9, #1
ASR r7, r7, #1
ADD r10, r8, r6 @(x0r/2) + (x1r/2)
ADD r11, r9, r7 @(x0i/2) + (x1i/2)@
SUB r8, r6, r8 @(x0r/2) - (x1r/2)
SUB r9, r7, r9 @(x0i/2) - (x1i/2)@
STRD r8, [r5]
SUB r5, r5, r0
STRD r10, [r5], #8
BNE RADIX2_BFLY
LDR r1, [sp, #0x28]
MOV r3, r0, ASR #4
STR r1, [sp, #-4]
RADIX2_BFLY_2:
LDR r1, [sp, #-4]
LDRD r6, [r5] @r6 = x0r
ADD r5, r5, r0
LDRD r8, [r5] @r8 = x1r
LDR r2, [r1]
SUBS r3, r3, #1
SMULL r1, r11, r8, r2 @mult32x16hin32(x1r,W1h)
LSR r1, r1, #31
ORR r11, r1, r11, LSL#1
SMULL r1, r10, r9, r2 @mult32x16hin32(x1i,W1h)
LSR r1, r1, #31
ORR r10, r1, r10, LSL#1
LDR r1, [sp, #-4]
LDR r2, [r1, #4]
ADD r1, r1, r4, LSL #3
STR r1, [sp, #-4]
SMULL r1, r8, r8, r2 @ixheaacd_mult32(x1r,w1l)
LSR r1, r1, #31
ORR r8, r1, r8, LSL#1
SMULL r1, r9, r9, r2 @ixheaacd_mac32(ixheacd_mult32(x1r,w1h) ,x1i,w1l)
LSR r1, r1, #31
ORR r9, r1, r9, LSL#1
SUB r11, r11, r9
ADD r9, r10, r8 @
MOV r8, r11
ASR r8, r8, #1
ASR r6, r6, #1
ASR r9, r9, #1
ASR r7, r7, #1
ADD r10, r8, r6 @(x0r>>1) + (x1r)
ADD r11, r9, r7 @(x0i>>1) + (x1i)@
SUB r8, r6, r8 @(x0r>>1) - (x1r)
SUB r9, r7, r9 @(x0i>>1) - (x1i)@
STRD r8, [r5]
SUB r5, r5, r0
STRD r10, [r5], #8
BNE RADIX2_BFLY_2
EXIT:
ADD sp, sp, #0x38
LDMFD sp!, {r4-r12, pc}

View file

@ -0,0 +1,132 @@
@/******************************************************************************
@ *
@ * Copyright (C) 2018 The Android Open Source Project
@ *
@ * Licensed under the Apache License, Version 2.0 (the "License");
@ * you may not use this file except in compliance with the License.
@ * You may obtain a copy of the License at:
@ *
@ * http://www.apache.org/licenses/LICENSE-2.0
@ *
@ * Unless required by applicable law or agreed to in writing, software
@ * distributed under the License is distributed on an "AS IS" BASIS,
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ * See the License for the specific language governing permissions and
@ * limitations under the License.
@ *
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
.text
.p2align 2
.global ixheaacd_conv_ergtoamplitude_armv7
ixheaacd_conv_ergtoamplitude_armv7:
STMFD sp!, {r4-r12, r14}
LDR R5, [SP, #44]
LDR R4, [SP, #40]
LDR R14, =0x1FF
LDR R10, =0x5A82
LOOP1:
LDRSH R6, [R2], #2
LDRSH R7, [R2], #2
MOV R12, #0
MOV R9, #-16
MOVS R6, R6
BLE ENDIF1_1
CLZ R8, R6
SUB R8, R8, #17
SUB R7, R7, R8
MOV R11, R6, LSL R8
MOV R11, R11, ASR #5
ANDS R11, R11, R14
BIC R11, R11, #1
LDRH R12, [R11, R5]
TST R7, #1
ADDNE R7, R7, #3
SMULWBNE R12, R12, R10
MOV R9, R7, ASR #1
ENDIF1_1:
STRH R12, [R2, #-4]
STRH R9, [R2, #-2]
LDRSH R6, [R3], #2
LDRSH R7, [R3], #2
MOV R8, #0
MOV R9, #-16
MOVS R6, R6
BLE ENDIF1_2
CLZ R8, R6
SUB R8, R8, #17
SUB R7, R7, R8
MOV R11, R6, LSL R8
MOV R11, R11, ASR #5
ANDS R11, R11, R14
BIC R11, R11, #1
LDRH R8, [R11, R5]
TST R7, #1
ADDNE R7, R7, #3
SMULWBNE R8, R8, R10
MOV R9, R7, ASR #1
ENDIF1_2:
STRH R8, [R3, #-4]
STRH R9, [R3, #-2]
LDRSH R6, [R4], #2
LDRSH R7, [R4], #2
MOV R8, #0
MOV R9, #-16
MOVS R6, R6
BLE ENDIF1_3
CLZ R8, R6
SUB R8, R8, #17
SUB R7, R7, R8
MOV R11, R6, LSL R8
MOV R11, R11, ASR #5
ANDS R11, R11, R14
BIC R11, R11, #1
LDRH R8, [R11, R5]
TST R7, #1
ADDNE R7, R7, #3
SMULWBNE R8, R8, R10
MOV R9, R7, ASR #1
ENDIF1_3:
STRH R9, [R4, #-2]
SUB R6, R1, R9
SUBS R6, R6, #4
RSBLE R6, R6, #0
MOVGT R8, R8, ASR R6
MOVLE R8, R8, LSL R6
STRH R8, [R4, #-4]
SUBS R0, R0, #1
BGT LOOP1
LDMFD sp!, {r4-r12, r15}

View file

@ -0,0 +1,148 @@
@/******************************************************************************
@ *
@ * Copyright (C) 2018 The Android Open Source Project
@ *
@ * Licensed under the Apache License, Version 2.0 (the "License");
@ * you may not use this file except in compliance with the License.
@ * You may obtain a copy of the License at:
@ *
@ * http://www.apache.org/licenses/LICENSE-2.0
@ *
@ * Unless required by applicable law or agreed to in writing, software
@ * distributed under the License is distributed on an "AS IS" BASIS,
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ * See the License for the specific language governing permissions and
@ * limitations under the License.
@ *
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
.text
.p2align 2
.global ixheaacd_conv_ergtoamplitudelp_armv7
ixheaacd_conv_ergtoamplitudelp_armv7:
STMFD sp!, {r4-r12, r14}
LDR R5, [SP, #44]
LDR R4, [SP, #40]
LDR R11, =0x5A82
LDR R10, =0x1FF
LOOP1:
LDRSH R6, [R2, #0]
MOV R12, #0
MOV R14, #-16
MOVS R6, R6
BLE ENDIF1_1
LDRSH R7, [R2, #2]
CLZ R8, R6
SUB R8, R8, #17
SUB R7, R7, R8
MOV R6, R6, LSL R8
MOV R6, R6, ASR #5
AND R6, R6, R10
TST R7, #1
BIC R6, R6, #1
LDRH R12, [R6, R5]
ADDNE R7, R7, #3
MOV R14, R7, ASR #1
SMULWBNE R12, R12, R11
ENDIF1_1:
STRH R14, [R2, #2]
LDRSH R6, [R3, #0]
MOV R8, #0
MOV R9, #-16
MOVS R6, R6
BLE ENDIF1_2
LDRSH R7, [R3, #2]
CLZ R8, R6
SUB R8, R8, #17
SUB R7, R7, R8
MOV R6, R6, LSL R8
MOV R6, R6, ASR #5
AND R6, R6, R10
TST R7, #1
BIC R6, R6, #1
LDRH R8, [R6, R5]
ADDNE R7, R7, #3
MOV R9, R7, ASR #1
SMULWBNE R8, R8, R11
ENDIF1_2:
STRH R9, [R3, #2]
STRH R8, [R3], #4
LDRSH R6, [R4, #0]
MOV R8, #0
MOV R9, #-16
MOVS R6, R6
BLE ENDIF1_3
LDRSH R7, [R4, #2]
CLZ R8, R6
SUB R8, R8, #17
SUB R7, R7, R8
MOV R6, R6, LSL R8
MOV R6, R6, ASR #5
ANDS R6, R6, R10
TST R7, #1
BIC R6, R6, #1
LDRH R8, [R6, R5]
ADDNE R7, R7, #3
MOV R9, R7, ASR #1
SMULWBNE R8, R8, R11
ENDIF1_3:
STRH R9, [R4, #2]
SUB R6, R1, R9
SUBS R6, R6, #4
RSBLE R6, R6, #0
MOVGT R8, R8, ASR R6
MOVLE R8, R8, LSL R6
STRH R8, [R4], #4
SUBS R6, R14, R1
BLE ELSE1
CMP R6, #15
MOVGT R6, #15
MOV R12, R12, LSL R6
CMP R12, #0x8000
MVNGE R12, #0x8000
CMNLT R12, #0x00008000
MOVLT R12, #0x00008000
STRH R12, [R2], #4
SUBS R0, R0, #1
BGT LOOP1
ELSE1:
RSB R6, R6, #0
MOV R12, R12, ASR R6
STRH R12, [R2], #4
SUBS R0, R0, #1
BGT LOOP1
LDMFD sp!, {r4-r12, r15}

View file

@ -0,0 +1,472 @@
@/******************************************************************************
@ *
@ * Copyright (C) 2018 The Android Open Source Project
@ *
@ * Licensed under the Apache License, Version 2.0 (the "License");
@ * you may not use this file except in compliance with the License.
@ * You may obtain a copy of the License at:
@ *
@ * http://www.apache.org/licenses/LICENSE-2.0
@ *
@ * Unless required by applicable law or agreed to in writing, software
@ * distributed under the License is distributed on an "AS IS" BASIS,
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ * See the License for the specific language governing permissions and
@ * limitations under the License.
@ *
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
.text
.p2align 2
.extern ixheaacd_radix4bfly
.hidden ixheaacd_radix4bfly
.extern ixheaacd_postradixcompute2
.hidden ixheaacd_postradixcompute2
.extern ixheaacd_postradixcompute4
.hidden ixheaacd_postradixcompute4
.extern ixheaacd_sbr_imdct_using_fft
.hidden ixheaacd_sbr_imdct_using_fft
.global ixheaacd_cos_sin_mod
ixheaacd_cos_sin_mod:
STMFD SP!, {R4-R12, R14}
LDR R5, [R1]
MOV R7, R5, ASR #1
LDR R4, [R1, #12]
MOV R5, R7, ASR #2
MOV R8, R0
MOV R6, R7, LSL #3
SUB R10, SP, #516
SUB SP, SP, #516
AND R12, R10, #7
CMP R12, #0
ADDNE R10, R10, #4
STMFD SP!, {R0-R3}
SUB R6, R6, #4
ADD R9, R0, R6
LDR R2, [R4], #4
LDR R1, [R9], #-4
LDR R0, [R8], #4
ADD R11, R10, R6
LOOP1:
SUBS R5, R5, #1
SMULWT R12, R1, R2
SMULWB R6, R0, R2
SMULWT R14, R0, R2
LDR R0, [R8, #0xFC]
QSUB R12, R12, R6
SMLAWB R14, R1, R2, R14
LDR R1, [R9, #0x104]
STR R12, [R10, #4]
STR R14, [R10], #8
SMULWT R6, R0, R2
SMULWB R12, R1, R2
SMULWT R14, R1, R2
LDR R1, [R8], #4
QSUB R12, R12, R6
SMLAWB R14, R0, R2, R14
LDR R2, [R4], #4
LDR R0, [R9], #-4
STR R12, [R10, #0xF8]
STR R14, [R10, #0xFC]
SMULWT R3, R1, R2
SMULWB R6, R0, R2
SMULWT R12, R0, R2
LDR R0, [R9, #0x104]
QSUB R3, R3, R6
SMLAWB R12, R1, R2, R12
LDR R1, [R8, #0xFC]
STR R12, [R11, #-4]
STR R3, [R11], #-8
SMULWT R6, R0, R2
SMULWB R14, R1, R2
SMULWT R12, R1, R2
LDR R1, [R9], #-4
QSUB R14, R14, R6
SMLAWB R3, R0, R2, R12
LDR R2, [R4], #4
LDR R0, [R8], #4
STR R3, [R11, #0x108]
STR R14, [R11, #0x104]
SMULWT R12, R1, R2
SMULWB R6, R0, R2
SMULWT R14, R0, R2
LDR R0, [R8, #0xFC]
QSUB R12, R12, R6
SMLAWB R14, R1, R2, R14
LDR R1, [R9, #0x104]
STR R12, [R10, #4]
STR R14, [R10], #8
SMULWT R6, R0, R2
SMULWB R12, R1, R2
SMULWT R14, R1, R2
LDR R1, [R8], #4
QSUB R12, R12, R6
SMLAWB R14, R0, R2, R14
LDR R2, [R4], #4
LDR R0, [R9], #-4
STR R12, [R10, #0xF8]
STR R14, [R10, #0xFC]
SMULWT R3, R1, R2
SMULWB R6, R0, R2
SMULWT R12, R0, R2
LDR R0, [R9, #0x104]
QSUB R3, R3, R6
SMLAWB R12, R1, R2, R12
LDR R1, [R8, #0xFC]
STR R3, [R11], #-4
STR R12, [R11], #-4
SMULWT R6, R0, R2
SMULWB R3, R1, R2
SMULWT R12, R1, R2
LDRGT R1, [R9], #-4
QSUB R3, R3, R6
SMLAWB R12, R0, R2, R12
LDRGT R2, [R4], #4
LDRGT R0, [R8], #4
STR R3, [R11, #0x104]
STR R12, [R11, #0x108]
BGT LOOP1
LDR R1, [SP, #4]
LDR R5, [R1]
LDR R4, [SP, #8]
LDR R0, [SP, #8]
ADD R1, SP, #16
AND R2, R1, #7
CMP R2, #0
ADDNE R1, R1, #4
CMP R5, #64
LDR R5, [SP, #12]
MOV R2, #1
BNE THIRTY2BAND
MOV R2, R1
MOV R1, #32
LDR R3, [SP]
STR R5, [SP, #-4]!
STR R5, [SP, #-4]!
STR R5, [SP, #-4]!
STR R5, [SP, #-4]!
BL ixheaacd_sbr_imdct_using_fft
ADD SP, SP, #16
MOV R0, R4
MOV R1, #32
ADD R2, SP, #16
AND R6, R2, #7
CMP R6, #0
ADDNE R2, R2, #4
LDR R3, [SP]
ADD R2, R2, #256
ADD R3, R3, #256
STR R5, [SP, #-4]!
STR R5, [SP, #-4]!
STR R5, [SP, #-4]!
STR R5, [SP, #-4]!
BL ixheaacd_sbr_imdct_using_fft
ADD SP, SP, #16
LDR R8, [SP]
LDR R12, [SP, #4]
MOV R3, #32
LDR R6, [R8]
LDR R11, [R8, #4]
ADD R9, R8, #252
B LOOP2_PRO
THIRTY2BAND:
MOV R2, R1
MOV R1, #16
LDR R3, [SP]
STR R5, [SP, #-4]!
STR R5, [SP, #-4]!
STR R5, [SP, #-4]!
STR R5, [SP, #-4]!
BL ixheaacd_sbr_imdct_using_fft
ADD SP, SP, #16
MOV R0, R4
MOV R1, #16
ADD R2, SP, #16
AND R6, R2, #7
CMP R6, #0
ADDNE R2, R2, #4
LDR R3, [SP]
ADD R2, R2, #256
ADD R3, R3, #256
STR R5, [SP, #-4]!
STR R5, [SP, #-4]!
STR R5, [SP, #-4]!
STR R5, [SP, #-4]!
BL ixheaacd_sbr_imdct_using_fft
ADD SP, SP, #16
LDR R8, [SP]
LDR R12, [SP, #4]
LDR R6, [R8]
LDR R11, [R8, #4]
ADD R9, R8, #124
LOOP2_PRO:
LDR R4, [R12, #20]
MOV R6, R6, ASR #1
STR R6, [R8], #4
LDR R0, [R9]
LDR R2, [R4], #4
MOV R11, R11, ASR #1
LDR R1, [R9, #-4]
RSB R12, R11, #0
STR R12, [R9], #-4
SMULWT R14, R1, R2
SMULWB R6, R0, R2
SMULWT R12, R0, R2
LDR R0, [R9, #260]
QSUB R14, R14, R6
SMLAWB R12, R1, R2, R12
LDR R6, [R8, #252]
LDR R11, [R8, #256]
STR R14, [R8], #4
STR R12, [R9], #-4
MOV R6, R6, ASR #1
MOV R11, R11, ASR #1
LDR R1, [R9, #260]
RSB R6, R6, #0
STR R6, [R9, #264]
STR R11, [R8, #248]
SMULWT R12, R0, R2
SMULWT R14, R1, R2
SMULWB R6, R0, R2
SMLAWB R12, R1, R2, R12
MOV R11, #0
QSUB R14, R6, R14
QSUB R12, R11, R12
LDR R0, [R8, #4]
LDR R1, [R8]
STR R12, [R8, #252]
STR R14, [R9, #260]
LDR R5, [SP, #4]
LDR R5, [R5]
MOV R5, R5, ASR #2
SUB R5, R5, #2
LOOP2:
SMULWB R12, R0, R2
SMULWB R14, R1, R2
SMULWT R6, R0, R2
SMLAWT R12, R1, R2, R12
LDR R10, [R9]
QSUB R14, R14, R6
LDR R0, [R8, #260]
LDR R1, [R8, #256]
STR R12, [R8], #4
STR R14, [R9], #-4
SMULWB R3, R0, R2
SMULWT R6, R0, R2
SMULWB R14, R1, R2
SMLAWT R3, R1, R2, R3
LDR R7, [R9, #260]
QSUB R6, R6, R14
QSUB R3, R11, R3
LDR R2, [R4], #4
LDR R1, [R9]
STR R3, [R9, #260]
STR R6, [R8, #252]
SMULWT R12, R10, R2
SMULWT R14, R1, R2
SMULWB R6, R10, R2
SMLAWB R12, R1, R2, R12
LDR R1, [R9, #256]
QSUB R14, R14, R6
STR R12, [R9], #-4
STR R14, [R8], #4
SUBS R5, R5, #1
SMULWT R12, R7, R2
SMULWT R14, R1, R2
SMULWB R6, R7, R2
SMLAWB R12, R1, R2, R12
LDRGE R0, [R8, #4]
LDRGE R1, [R8]
QSUB R12, R11, R12
QSUB R14, R6, R14
STR R12, [R8, #252]
STR R14, [R9, #260]
BGE LOOP2
ENDLOOP2:
ADD SP, SP, #532
LDMFD sp!, {r4-r12, r15}

View file

@ -0,0 +1,506 @@
@/******************************************************************************
@ *
@ * Copyright (C) 2018 The Android Open Source Project
@ *
@ * Licensed under the Apache License, Version 2.0 (the "License");
@ * you may not use this file except in compliance with the License.
@ * You may obtain a copy of the License at:
@ *
@ * http://www.apache.org/licenses/LICENSE-2.0
@ *
@ * Unless required by applicable law or agreed to in writing, software
@ * distributed under the License is distributed on an "AS IS" BASIS,
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ * See the License for the specific language governing permissions and
@ * limitations under the License.
@ *
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
.text
.p2align 2
.global ixheaacd_dct3_32
.extern ixheaacd_radix4bfly
.hidden ixheaacd_radix4bfly
.extern ixheaacd_postradixcompute4
.hidden ixheaacd_postradixcompute4
ixheaacd_dct3_32:
STMFD sp!, {R4-R12, R14}
VPUSH {D8 - D15}
ADD R6, R0, #196
SUB R7, R6, #8
ADD R10, R7, #4
MOV R9, #0
VDUP.32 D0, R9
ADD R4, R2, #8
MOV R8, R1
VLD1.32 D0[0], [R10]
MOV R11, #-4
VSHR.S32 D0, D0, #7
VLD4.16 {D12, D13, D14, D15}, [R4]!
MOV R12, #-16
VST1.32 D0, [R8]!
SUB R7, R7, #12
VLD1.32 {Q0}, [R6]!
VLD1.32 {Q1}, [R7], R12
SUB R9, R6, #144
VREV64.32 Q1, Q1
SUB R5, R7, #112
VSWP D2, D3
VSHR.S32 Q0, Q0, #7
VSHR.S32 Q1, Q1, #7
VLD1.32 {Q3}, [R9]!
VADD.I32 Q2, Q1, Q0
VUZP.16 D4, D5
VSHR.S32 Q3, Q3, #7
VLD1.32 {Q4}, [R5], R12
VMULL.U16 Q15, D4, D12
VREV64.32 Q4, Q4
VMULL.U16 Q14, D4, D13
VSWP D8, D9
VSHR.S32 Q4, Q4, #7
VLD1.32 {Q0}, [R6]!
VSUB.I32 Q5, Q3, Q4
VUZP.16 D10, D11
VMLAL.U16 Q15, D10, D13
VLD1.32 {Q1}, [R7], R12
VMLSL.U16 Q14, D10, D12
VREV64.32 Q1, Q1
VSHR.S32 Q0, Q0, #7
VSWP D2, D3
VSHR.U32 Q15, Q15, #16
VSHR.S32 Q1, Q1, #7
VMLAL.S16 Q15, D5, D12
VMLAL.S16 Q15, D11, D13
VSHR.S32 Q14, Q14, #16
VMLAL.S16 Q14, D5, D13
VADD.I32 Q2, Q1, Q0
VMLSL.S16 Q14, D11, D12
VUZP.16 D4, D5
SUB R9, R6, #144
VSWP Q15, Q14
SUB R5, R7, #112
VST2.32 {Q14, Q15}, [R8]!
VLD1.32 {Q3}, [R9]!
VLD1.32 {Q4}, [R5], R12
VSHR.S32 Q3, Q3, #7
VREV64.32 Q4, Q4
VSWP D8, D9
VSHR.S32 Q4, Q4, #7
VLD4.16 {D12, D13, D14, D15}, [R4]!
VSUB.I32 Q5, Q3, Q4
VUZP.16 D10, D11
VMULL.U16 Q15, D4, D12
VMLAL.U16 Q15, D10, D13
VMULL.U16 Q14, D4, D13
VLD1.32 {Q0}, [R6]!
VMLSL.U16 Q14, D10, D12
VLD1.32 {Q1}, [R7], R11
VSHR.U32 Q15, Q15, #16
VREV64.32 Q1, Q1
VSHR.S32 Q14, Q14, #16
VSWP D2, D3
VSHR.S32 Q0, Q0, #7
SUB R9, R6, #144
SUB R5, R7, #124
VLD1.32 {Q3}, [R9]!
VMLAL.S16 Q14, D5, D13
VMLSL.S16 Q14, D11, D12
VLD1.32 {Q4}, [R5], R11
VMLAL.S16 Q15, D5, D12
VREV64.32 Q4, Q4
VMLAL.S16 Q15, D11, D13
VSWP D8, D9
VSHR.S32 Q1, Q1, #7
VADD.I32 Q2, Q1, Q0
VLD4.16 {D12, D13, D14, D15}, [R4]!
VSHR.S32 Q3, Q3, #7
VUZP.16 D4, D5
VSHR.S32 Q4, Q4, #7
VSWP Q15, Q14
VSUB.I32 Q5, Q3, Q4
VST2.32 {Q14, Q15}, [R8]!
VUZP.16 D10, D11
VMULL.U16 Q15, D4, D12
VMLAL.U16 Q15, D10, D13
VLD1.32 D0, [R6]!
VMULL.U16 Q14, D4, D13
VMLSL.U16 Q14, D10, D12
VLD1.32 D1[0], [R6]!
VSHR.U32 Q15, Q15, #16
VLD1.32 D2[0], [R7], R11
VMLAL.S16 Q15, D5, D12
VLD1.32 D2[1], [R7], R11
VMLAL.S16 Q15, D11, D13
SUB R9, R6, #140
VLD1.32 D3[0], [R7], R11
SUB R5, R7, #116
VLD1.32 D6, [R9]!
VSHR.S32 Q14, Q14, #16
VSHR.S32 Q0, Q0, #7
VLD1.32 D7[0], [R9]!
VMLAL.S16 Q14, D5, D13
VLD1.32 D8[0], [R5], R11
VMLSL.S16 Q14, D11, D12
VSHR.S32 Q1, Q1, #7
VLD4.16 {D12, D13, D14, D15}, [R4]
VADD.I32 Q2, Q1, Q0
VLD1.32 D8[1], [R5], R11
VSHR.S32 Q3, Q3, #7
VSWP Q15, Q14
VLD1.32 D9[0], [R5], R11
VSHR.S32 Q4, Q4, #7
VST2.32 {Q14, Q15}, [R8]!
ADD R4, #24
VUZP.16 D4, D5
VSUB.I32 Q5, Q3, Q4
VUZP.16 D10, D11
VMULL.U16 Q15, D4, D12
VMLAL.U16 Q15, D10, D13
VMULL.U16 Q14, D4, D13
VMLSL.U16 Q14, D10, D12
VLD1.16 D0[0], [R4]!
VSHR.U32 Q15, Q15, #16
VSHR.S32 Q14, Q14, #16
VLD1.32 D2[0], [R7], R11
VMLAL.S16 Q15, D5, D12
SUB R5, R7, #124
VMLAL.S16 Q15, D11, D13
VLD1.32 D4[0], [R5]
VMLAL.S16 Q14, D5, D13
VMLSL.S16 Q14, D11, D12
VSHR.S32 D2, D2, #7
VST1.32 D30[0], [R8]!
VSHR.S32 D4, D4, #7
VSUB.I32 D2, D2, D4
VMOV D4, D2
VST1.32 D28[0], [R8]!
MOV R6, R1
ADD R7, R1, #124
VST1.32 D30[1], [R8]!
ADD R10, R3, #16
SUB R7, R7, #28
VST1.32 D28[1], [R8]!
MOV R5, #-16
MOV R9, #-4
VST1.32 D31[0], [R8]!
MOV R11, #16
VST1.32 D29[0], [R8]!
MOV R12, #4
VUZP.16 D4, D5
MOV R8, #6
VLD1.16 D1[0], [R4], R8
VMULL.U16 Q15, D4, D0
VUZP.16 D2, D3
VMULL.U16 Q14, D4, D1
VMLAL.U16 Q15, D2, D1
VLD2.32 {D10, D11}, [R6]
VMLSL.U16 Q14, D2, D0
ADD R4, R3, #4
MOV R8, #-32
VSHR.U32 Q15, Q15, #16
VSHR.S32 Q14, Q14, #16
VMLAL.S16 Q15, D5, D0
VMLAL.S16 Q15, D3, D1
VMLAL.S16 Q14, D5, D1
VMLSL.S16 Q14, D3, D0
VADD.I32 D14, D11, D28
VLD2.32 {Q2, Q3}, [R7]
VNEG.S32 D14, D14
VREV64.32 Q2, Q2
VSUB.I32 D12, D10, D30
VREV64.32 Q3, Q3
VADD.I32 D10, D10, D30
VSWP D4, D5
VADD.I32 D10, D10, D14
VSWP D6, D7
VSUB.I32 D11, D11, D28
VADD.I32 D11, D11, D12
VLD2.16 {D8, D9}, [R10], R5
VSHR.S32 D10, D10, #1
VREV64.16 D8, D8
VSHR.S32 D11, D11, #1
VUZP.32 D10, D11
VST1.32 D10, [R6]!
VLD2.32 {Q0, Q1}, [R6]
VADD.I32 Q7, Q0, Q2
VLD2.16 {D10, D11}, [R4], R11
VSUB.I32 Q6, Q0, Q2
VUZP.16 D12, D13
VADD.I32 Q8, Q1, Q3
VUZP.16 D16, D17
VSUB.I32 Q9, Q1, Q3
VMULL.U16 Q15, D12, D8
VMLAL.U16 Q15, D16, D10
VMULL.U16 Q14, D12, D10
VMLSL.U16 Q14, D16, D8
VSHR.S32 Q7, Q7, #1
VSHR.U32 Q15, Q15, #16
VSHR.S32 Q9, Q9, #1
VSHR.S32 Q14, Q14, #16
VMLAL.S16 Q15, D13, D8
VMLAL.S16 Q15, D17, D10
VMLAL.S16 Q14, D13, D10
VMLSL.S16 Q14, D17, D8
VSUB.I32 Q10, Q7, Q15
VLD2.16 {D8, D9}, [R10]
VADD.I32 Q13, Q7, Q15
VREV64.32 Q13, Q13
VSWP D26, D27
VADD.I32 Q11, Q9, Q14
VREV64.16 D8, D8
VSUB.I32 Q12, Q14, Q9
VREV64.32 Q12, Q12
VST2.32 {Q10, Q11}, [R6]!
VSWP D24, D25
VSWP Q12, Q13
VST2.32 {Q12, Q13}, [R7], R8
VLD2.32 {Q0, Q1}, [R6]
VLD2.32 {Q2, Q3}, [R7]
VREV64.32 Q2, Q2
VREV64.32 Q3, Q3
VSWP D4, D5
VSWP D6, D7
VSUB.I32 Q6, Q0, Q2
VADD.I32 Q7, Q0, Q2
VLD2.16 {D10, D11}, [R4], R11
VADD.I32 Q8, Q1, Q3
VUZP.16 D12, D13
VSUB.I32 Q9, Q1, Q3
VUZP.16 D16, D17
VMULL.U16 Q15, D12, D8
VMLAL.U16 Q15, D16, D10
VMULL.U16 Q14, D12, D10
VMLSL.U16 Q14, D16, D8
ADD R7, R7, #8
VSHR.U32 Q15, Q15, #16
VSHR.S32 Q7, Q7, #1
VSHR.S32 Q14, Q14, #16
VMLAL.S16 Q15, D13, D8
VMLAL.S16 Q15, D17, D10
VMLAL.S16 Q14, D13, D10
VMLSL.S16 Q14, D17, D8
VSHR.S32 Q9, Q9, #1
VSUB.I32 Q10, Q7, Q15
VSUB.I32 Q12, Q14, Q9
VADD.I32 Q11, Q9, Q14
VST1.32 D20[0], [R6]!
VADD.I32 Q13, Q7, Q15
VST1.32 D22[0], [R6]!
VST1.32 D20[1], [R6]!
VST1.32 D22[1], [R6]!
VST1.32 D21[0], [R6]!
VST1.32 D23[0], [R6]!
VREV64.32 Q12, Q12
VREV64.32 Q13, Q13
VSWP D24, D25
VSWP D26, D27
VST1.32 D26[1], [R7]!
VST1.32 D24[1], [R7]!
VST1.32 D27[0], [R7]!
VST1.32 D25[0], [R7]!
VST1.32 D27[1], [R7]!
VST1.32 D25[1], [R7]!
SUB R7, R7, #32
VLD2.32 {D0, D1}, [R6]
VLD2.32 {D2, D3}, [R7]
VSUB.I32 D12, D0, D2
VLD1.16 D8, [R10], R9
VADD.I32 D14, D0, D2
VADD.I32 D16, D1, D3
VLD1.16 D10, [R4], R12
VSUB.I32 D18, D1, D3
VUZP.16 D12, D13
MOV R4, R0
VUZP.16 D16, D17
VMULL.U16 Q15, D12, D8
VMLAL.U16 Q15, D16, D10
VMULL.U16 Q14, D12, D10
VMLSL.U16 Q14, D16, D8
VSHR.S32 D18, D18, #1
VSHR.U32 Q15, Q15, #16
VSHR.S32 Q14, Q14, #16
VMLAL.S16 Q15, D13, D8
VMLAL.S16 Q15, D17, D10
MOV R10, R1
VMLAL.S16 Q14, D13, D10
VMLSL.S16 Q14, D17, D8
VNEG.S32 Q15, Q15
VSHR.S32 D14, D14, #1
VADD.I32 Q13, Q7, Q15
VADD.I32 Q11, Q9, Q14
LDR r0 , [sp , #104]
VST1.32 D26[0], [R6]!
MOV r2, #1
VST1.32 D22[0], [R6]!
MOV r3, #4
BL ixheaacd_radix4bfly
MOV r0, r4
MOV r1, r10
LDR r2 , [sp , #108]
MOV r3, #16
BL ixheaacd_postradixcompute4
MOV r0, r4
MOV r1, r10
LDMIA r0!, {r4, r5}
STR r4, [r1], #4
STR r5, [r1, #4]
ADD r2, r0, #64
ADD r3, r1, #116
MOV r6, #7
BACK3:
LDMIA r0!, {r4, r5}
STR r5, [r1], #8
STR r4, [r1], #8
LDMIA r2!, {r4, r5}
STR r5, [r3], #-8
STR r4, [r3], #-8
SUBS r6, r6, #1
BNE BACK3
LDMIA r0!, {r4, r5}
STR r5, [r1], #8
STR r4, [r1], #8
VPOP {D8 - D15}
LDMFD sp!, {r4-r12, r15}

View file

@ -0,0 +1,522 @@
@/******************************************************************************
@ *
@ * Copyright (C) 2018 The Android Open Source Project
@ *
@ * Licensed under the Apache License, Version 2.0 (the "License");
@ * you may not use this file except in compliance with the License.
@ * You may obtain a copy of the License at:
@ *
@ * http://www.apache.org/licenses/LICENSE-2.0
@ *
@ * Unless required by applicable law or agreed to in writing, software
@ * distributed under the License is distributed on an "AS IS" BASIS,
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ * See the License for the specific language governing permissions and
@ * limitations under the License.
@ *
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
.text
.p2align 2
.extern ixheaacd_radix4bfly
.hidden ixheaacd_radix4bfly
.extern ixheaacd_postradixcompute2
.hidden ixheaacd_postradixcompute2
.extern ixheaacd_sbr_imdct_using_fft
.hidden ixheaacd_sbr_imdct_using_fft
.global ixheaacd_dec_DCT2_64_asm
ixheaacd_dec_DCT2_64_asm:
STMFD sp!, {r0-r3, r4-r12, r14}
ADD R2, R1, #252
MOV R3, #32
MOV R4, #-4
ADD R2, R2, #4
FOR_LOOP:
VLD2.32 {Q0, Q1}, [R0]!
SUBS R3, R3, #4
VST1.32 {Q0}, [R1]!
SUB R2, R2, #16
VREV64.32 Q1, Q1
VSWP D2, D3
VST1.32 {Q1}, [R2]
BGT FOR_LOOP
LDR r0, [sp, #8]
MOV r1, #32
LDR r2, [sp, #4]
LDR r3, [sp]
LDR r4, [sp, #12]
STR r4, [sp, #-4]!
STR r4, [sp, #-4]!
STR r4, [sp, #-4]!
STR r4, [sp, #-4]!
BL ixheaacd_sbr_imdct_using_fft
ADD sp, sp, #16
LDR r0, [sp]
LDR r2, [sp, #56]
VPUSH {D8 - D15}
ADD R5, R0, #252
VLD1.32 D0, [R0]
ADD R3, R2, #2
VSHL.S32 D0, D0, #1
VST1.32 D0, [R0]!
SUB R5, R5, #28
VLD2.32 {Q0, Q1}, [R0]!
VLD2.32 {Q2, Q3}, [R5]!
VREV64.32 Q2, Q2
VSWP D4, D5
MOV R10, #-8
VREV64.32 Q3, Q3
ADD R4, R2, #30
VSWP D6, D7
SUB R4, R4, #6
VLD1.16 D8, [R3]!
VSUB.I32 Q11, Q3, Q1
VLD1.16 D10, [R4], R10
VADD.I32 Q10, Q3, Q1
VREV64.16 D10, D10
VSUB.I32 Q9, Q0, Q2
VUZP.16 D20, D21
VADD.I32 Q8, Q0, Q2
VUZP.16 D18, D19
VMULL.U16 Q15, D20, D8
VMLSL.U16 Q15, D18, D10
VMULL.U16 Q14, D18, D8
VMLAL.U16 Q14, D20, D10
SUB R11, R0, #32
VSHR.S32 Q15, Q15, #16
VSHR.U32 Q14, Q14, #16
SUB R12, R5, #32
VMLAL.S16 Q15, D21, D8
VMLSL.S16 Q15, D19, D10
VLD2.32 {Q0, Q1}, [R0]!
SUB R5, R5, #64
VMLAL.S16 Q14, D19, D8
VLD2.32 {Q2, Q3}, [R5]!
VMLAL.S16 Q14, D21, D10
VREV64.32 Q2, Q2
VSHL.S32 Q15, Q15, #1
VSWP D4, D5
VSHL.S32 Q14, Q14, #1
VREV64.32 Q3, Q3
VADD.I32 Q13, Q8, Q15
VSWP D6, D7
VADD.I32 Q12, Q11, Q14
VLD1.16 D8, [R3]!
VSUB.I32 Q7, Q14, Q11
VLD1.16 D10, [R4], R10
VSUB.I32 Q6, Q8, Q15
VREV64.32 Q7, Q7
VREV64.32 Q6, Q6
VSWP D14, D15
VSWP D12, D13
VREV64.16 D10, D10
VSUB.I32 Q11, Q3, Q1
VSWP Q13, Q12
VADD.I32 Q10, Q3, Q1
VST2.32 {Q12, Q13}, [R11]!
VSUB.I32 Q9, Q0, Q2
VADD.I32 Q8, Q0, Q2
VST2.32 {Q6, Q7}, [R12]
SUB R11, R0, #32
VUZP.16 D20, D21
SUB R12, R5, #32
VUZP.16 D18, D19
SUB R5, R5, #64
VMULL.U16 Q15, D20, D8
VLD2.32 {Q0, Q1}, [R0]!
VMLSL.U16 Q15, D18, D10
VLD2.32 {Q2, Q3}, [R5]!
VMULL.U16 Q14, D18, D8
VREV64.32 Q2, Q2
VMLAL.U16 Q14, D20, D10
VSWP D4, D5
VSHR.S32 Q15, Q15, #16
VREV64.32 Q3, Q3
VMLAL.S16 Q15, D21, D8
VMLSL.S16 Q15, D19, D10
VSWP D6, D7
VSHR.U32 Q14, Q14, #16
VMLAL.S16 Q14, D19, D8
VLD1.16 D8, [R3]!
VMLAL.S16 Q14, D21, D10
VSHL.S32 Q15, Q15, #1
VLD1.16 D10, [R4], R10
VSHL.S32 Q14, Q14, #1
VREV64.16 D10, D10
VADD.I32 Q13, Q8, Q15
VADD.I32 Q12, Q11, Q14
VSUB.I32 Q7, Q14, Q11
VSUB.I32 Q6, Q8, Q15
VREV64.32 Q7, Q7
VSUB.I32 Q11, Q3, Q1
VREV64.32 Q6, Q6
VADD.I32 Q10, Q3, Q1
VSWP D14, D15
VSUB.I32 Q9, Q0, Q2
VSWP D12, D13
VADD.I32 Q8, Q0, Q2
VSWP Q13, Q12
VUZP.16 D20, D21
VUZP.16 D18, D19
VMULL.U16 Q15, D20, D8
VMLSL.U16 Q15, D18, D10
VST2.32 {Q12, Q13}, [R11]!
VMULL.U16 Q14, D18, D8
VMLAL.U16 Q14, D20, D10
VST2.32 {Q6, Q7}, [R12]
SUB R11, R0, #32
VLD2.32 {Q0, Q1}, [R0]!
SUB R12, R5, #32
SUB R5, R5, #64
VSHR.S32 Q15, Q15, #16
VLD2.32 {Q2, Q3}, [R5]!
VMLAL.S16 Q15, D21, D8
VREV64.32 Q2, Q2
VMLSL.S16 Q15, D19, D10
VSWP D4, D5
VSHR.U32 Q14, Q14, #16
VREV64.32 Q3, Q3
VMLAL.S16 Q14, D19, D8
VSWP D6, D7
VMLAL.S16 Q14, D21, D10
VSHL.S32 Q15, Q15, #1
VLD1.16 D8, [R3]!
VSHL.S32 Q14, Q14, #1
VADD.I32 Q13, Q8, Q15
VLD1.16 D10, [R4], R10
VADD.I32 Q12, Q11, Q14
VREV64.16 D10, D10
VSUB.I32 Q7, Q14, Q11
VSUB.I32 Q6, Q8, Q15
VREV64.32 Q7, Q7
VREV64.32 Q6, Q6
VSWP D14, D15
VSWP D12, D13
VSWP Q13, Q12
VSUB.I32 Q11, Q3, Q1
VST2.32 {Q12, Q13}, [R11]!
VADD.I32 Q10, Q3, Q1
VST2.32 {Q6, Q7}, [R12]
SUB R11, R0, #32
VSUB.I32 Q9, Q0, Q2
VADD.I32 Q8, Q0, Q2
VUZP.16 D20, D21
SUB R12, R5, #32
VUZP.16 D18, D19
SUB R5, R5, #64
VMULL.U16 Q15, D20, D8
VMLSL.U16 Q15, D18, D10
VMULL.U16 Q14, D18, D8
VMLAL.U16 Q14, D20, D10
VSHR.S32 Q15, Q15, #16
VMLAL.S16 Q15, D21, D8
VMLSL.S16 Q15, D19, D10
VSHR.U32 Q14, Q14, #16
VMLAL.S16 Q14, D19, D8
VMLAL.S16 Q14, D21, D10
VSHL.S32 Q15, Q15, #1
VSHL.S32 Q14, Q14, #1
VADD.I32 Q13, Q8, Q15
VADD.I32 Q12, Q11, Q14
VSUB.I32 Q7, Q14, Q11
VSUB.I32 Q6, Q8, Q15
VREV64.32 Q7, Q7
VREV64.32 Q6, Q6
VSWP D14, D15
VSWP D12, D13
VSWP Q13, Q12
VST2.32 {Q12, Q13}, [R11]!
VST2.32 {Q6, Q7}, [R12]
VPOP {D8 - D15}
LDMFD sp!, {r0-r2, r3}
LDR R1, [SP, #48]
LDR R2, [SP, #44]
ADD R3, R1, #126
VLD1.32 D0[0], [R0, :32]!
SUB R4, R1, #2
ADD R5, R1, #130
VLD1.32 D1[0], [R0, :32]!
ADD R7, R2, #4
MOV R6, #0x8000
VDUP.32 Q15, R6
VADD.I32 D2, D0, D1
VSHR.S32 D2, D2, #1
VSHL.S32 D2, D2, #4
VADD.I32 Q2, Q1, Q15
VSHR.S32 Q2, Q2, #16
VSUB.I32 D6, D0, D1
VST1.16 D4[0], [R1]!
MOV R8, #28
MOV R9, #-2
VLD2.32 {Q0, Q1}, [R0]!
SUB R4, R4, #6
SUB R3, R3, #6
VLD2.16 {D4, D5}, [R7]!
VUZP.16 D0, D1
VUZP.16 D2, D3
VMULL.U16 Q14, D0, D4
VMLSL.U16 Q14, D2, D5
VSHR.S32 Q14, Q14, #16
VMLAL.S16 Q14, D1, D4
VMLSL.S16 Q14, D3, D5
VMULL.U16 Q13, D0, D5
VMLAL.U16 Q13, D2, D4
VSHR.U32 Q13, Q13, #16
VMLAL.S16 Q13, D1, D5
VMLAL.S16 Q13, D3, D4
VSHL.S32 Q12, Q14, #4
VLD2.32 {Q0, Q1}, [R0]!
VADD.I32 Q12, Q12, Q15
VSHR.S32 Q12, Q12, #16
VUZP.16 D24, D25
VSHL.S32 Q11, Q13, #4
VLD2.16 {D4, D5}, [R7]!
VADD.I32 Q11, Q11, Q15
VSHR.S32 Q11, Q11, #16
VUZP.16 D22, D23
VQNEG.S16 D20, D22
VUZP.16 D0, D1
VUZP.16 D2, D3
SUB R8, R8, #8
LOOP_2:
VMULL.U16 Q14, D0, D4
VST1.16 D24, [R1]!
VMLSL.U16 Q14, D2, D5
VREV64.16 D24, D24
VMULL.U16 Q13, D0, D5
VMLAL.U16 Q13, D2, D4
VST1.16 D24, [R4]
SUB R4, R4, #8
VREV64.16 D22, D22
VSHR.S32 Q14, Q14, #16
VSHR.U32 Q13, Q13, #16
VST1.16 D22, [R3]
VMLAL.S16 Q14, D1, D4
VMLSL.S16 Q14, D3, D5
VST1.16 D20, [R5]!
VMLAL.S16 Q13, D1, D5
VMLAL.S16 Q13, D3, D4
VSHL.S32 Q12, Q14, #4
SUB R3, R3, #8
VLD2.32 {Q0, Q1}, [R0]!
VSHL.S32 Q11, Q13, #4
VADD.I32 Q12, Q12, Q15
VLD2.16 {D4, D5}, [R7]!
VADD.I32 Q11, Q11, Q15
VUZP.16 D0, D1
VSHR.S32 Q12, Q12, #16
VUZP.16 D24, D25
VSHR.S32 Q11, Q11, #16
VUZP.16 D22, D23
SUBS R8, R8, #4
VUZP.16 D2, D3
VQNEG.S16 D20, D22
BGT LOOP_2
VMULL.U16 Q14, D0, D4
VST1.16 D24, [R1]!
VMLSL.U16 Q14, D2, D5
VREV64.16 D24, D24
VMULL.U16 Q13, D0, D5
VMLAL.U16 Q13, D2, D4
VST1.16 D24, [R4]
VSHR.S32 Q14, Q14, #16
SUB R4, R4, #8
VST1.16 D20, [R5]!
VMLAL.S16 Q14, D1, D4
VMLSL.S16 Q14, D3, D5
VREV64.16 D22, D22
VSHR.U32 Q13, Q13, #16
VST1.16 D22, [R3]
SUB R3, R3, #8
VMLAL.S16 Q13, D1, D5
VSHL.S32 Q12, Q14, #4
VMLAL.S16 Q13, D3, D4
VADD.I32 Q12, Q12, Q15
VSHL.S32 Q11, Q13, #4
VSHR.S32 Q12, Q12, #16
VUZP.16 D24, D25
VADD.I32 Q11, Q11, Q15
VST1.16 D24, [R1]!
VSHR.S32 Q11, Q11, #16
VREV64.16 D24, D24
VUZP.16 D22, D23
VST1.16 D24, [R4]
VQNEG.S16 D20, D22
SUB R4, R4, #2
VREV64.16 D22, D22
VST1.16 D22, [R3]
SUB R3, R3, #2
VST1.16 D20, [R5]!
VLD2.32 {Q0, Q1}, [R0]!
VLD2.16 {Q2}, [R7]
ADD R7, R7, #12
VUZP.16 D0, D1
VUZP.16 D2, D3
VMULL.U16 Q14, D0, D4
VMLSL.U16 Q14, D2, D5
VSHR.S32 Q14, Q14, #16
VMLAL.S16 Q14, D1, D4
VMLSL.S16 Q14, D3, D5
VMULL.U16 Q13, D0, D5
VMLAL.U16 Q13, D2, D4
VSHR.U32 Q13, Q13, #16
VMLAL.S16 Q13, D1, D5
VMLAL.S16 Q13, D3, D4
VSHL.S32 Q12, Q14, #4
VADD.I32 Q12, Q12, Q15
VSHR.S32 Q12, Q12, #16
VUZP.16 D24, D25
VSHL.S32 Q11, Q13, #4
VADD.I32 Q11, Q11, Q15
VSHR.S32 Q11, Q11, #16
VUZP.16 D22, D23
VQNEG.S16 D20, D22
VST1.16 D24[0], [R1]!
VST1.16 D24[1], [R1]!
VST1.16 D24[2], [R1]!
VST1.16 D24[0], [R4], R9
VST1.16 D24[1], [R4], R9
VST1.16 D24[2], [R4], R9
VST1.16 D22[0], [R3], R9
VST1.16 D22[1], [R3], R9
VST1.16 D22[2], [R3], R9
VST1.16 D20[0], [R5]!
VST1.16 D20[1], [R5]!
VST1.16 D20[2], [R5]!
VUZP.16 D6, D7
VLD1.16 D0, [R7]!
VMULL.U16 Q1, D0, D6
VSHR.S32 Q1, Q1, #16
VMLAL.S16 Q1, D0, D7
VSHL.S32 Q1, Q1, #4
VADD.I32 Q1, Q1, Q15
VSHR.S32 Q1, Q1, #16
VST1.16 D2[0], [R1]!
VST1.16 D2[0], [R4], R9
LDMFD sp!, {r4-r12, r15}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,67 @@
.text
.p2align 2
.global ia_eld_decoder_sbr_pre_twiddle
ia_eld_decoder_sbr_pre_twiddle:
STMFD sp!, {r4-r12, r14}
LDR r4, [r0, #0] @Xre = *pXre
MOV r3, #62 @Loop count
LDR r5, [r1, #0] @Xim = *pXim
LOOP:
LDR r6, [r2], #4 @Load and increment pointer *pTwiddles++ Lower - cosine , higher - sine
SUBS r3, r3, #1 @Decrement loop count by 1
SMULWB r8, r4, r6 @mult32x16in32(Xre, cosine)
LSL r8, r8, #1 @Left shift the multiplied value by 1
SMULWT r10, r5, r6 @mult32x16in32( Xim , sine)
ADD r12, r8, r10, LSL #1 @mac32x16in32_shl( mult32x16in32_shl(Xre, cosine) , mult32x16in32_shl( Xim , sine))@
SMULWT r7, r4, r6 @mult32x16in32(Xre, sine)
LDR r4, [r0, #4] @Load next iteration value Xre = *pXre
SMULWB r9, r5, r6 @mult32x16in32(Xim, cosine)
STR r12, [r0], #4 @Store and increment pointer *pXre++ = re
LSL r9, r9, #1 @Left shift the multiplied value by 1
LDR r5, [r1, #4] @Load next iteration value Xim = *pXim
SUB r14, r9, r7, LSL #1 @sub32(mult32x16in32_shl(Xim, cosine) , mult32x16in32_shl(Xre, sine))
STR r14, [r1], #4 @Store and increment pointer *pXim++ = im
BNE LOOP @Check r3 equals 0 and continue
EPILOUGE:
LDR r6, [r2], #4
SMULWB r8, r4, r6
LSL r8, r8, #1
SMULWT r10, r5, r6
ADD r12, r8, r10, LSL #1
SMULWB r9, r5, r6
LSL r9, r9, #1
SMULWT r7, r4, r6
SUB r14, r9, r7, LSL #1
STR r12, [r0], #4
STR r14, [r1], #4
END_LOOP:
LDMFD sp!, {r4-r12, pc}

View file

@ -0,0 +1,158 @@
@/******************************************************************************
@ *
@ * Copyright (C) 2018 The Android Open Source Project
@ *
@ * Licensed under the Apache License, Version 2.0 (the "License");
@ * you may not use this file except in compliance with the License.
@ * You may obtain a copy of the License at:
@ *
@ * http://www.apache.org/licenses/LICENSE-2.0
@ *
@ * Unless required by applicable law or agreed to in writing, software
@ * distributed under the License is distributed on an "AS IS" BASIS,
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ * See the License for the specific language governing permissions and
@ * limitations under the License.
@ *
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
.text
.p2align 2
.global ixheaacd_enery_calc_per_subband_armv7
ixheaacd_enery_calc_per_subband_armv7:
STMFD sp!, {r4-r12, r14}
LDR r10, [sp, #0x34]
MOV R4, R2
MOV R5, R3
MOV R2, R0
MOV R3, R1
SUB R12, R3, R2
LDR r10, [r10, #0]
ADD r10, r10, r12, LSL #1
LDRSH r9, [r10, #0x20]
LDR R1, [sp, #0x28]
MOV R1, R1, LSL #1
SUBS R5, R5, R4
LDR R0, [sp, #0x38]
LDR R7, [sp, #0x2C]
LDR R8, [sp, #0x30]
BLE ENDCALC
MOVS R8, R8
BEQ HQ_PART
ADD R0, R0, R4, LSL #2
ADD R0, R0, R2, LSL #8
SUB R2, R3, R2
MOV R10, #20
B LP_SBR_LOOP
HQ_PART:
ADD R0, R0, R4, LSL #2
ADD R0, R0, R2, LSL #9
SUB R2, R3, R2
MOV R2, R2, LSL #1
MOV R10, #21
SUB R1, R1, #1
LP_SBR_LOOP:
MOV R6, #0
MOV R8, R0
MOVS R11, R2
BLE STORE_ZEROES
MOV R6, #1
LOOP1_CALC_MAX:
LDR R4, [R8], #0x100
LDR R12, [R8], #0x100
EOR R4, R4, R4, ASR #31
ORR R6, R6, R4
EOR R12, R12, R12, ASR #31
SUBS R11, R11, #2
ORRGE R6, R6, R12
BGT LOOP1_CALC_MAX
CALC_NORM:
CLZ R6, R6
RSBS R14, R6, R10
MOV R6, #0
MOV R8, R0
MOV R11, R2
BLE NEG_SHIFT
LOOP2_APPLY_POS_SHIFT:
LDR R4, [R8], #0x100
LDR R12, [R8], #0x100
SUBS R11, R11, #2
MOV R4, R4, ASR R14
SMLABB R6, R4, R4, R6
MOV R12, R12, ASR R14
SMLABB R6, R12, R12, R6
BGT LOOP2_APPLY_POS_SHIFT
B CONVERT_TO_MANT_EXP
NEG_SHIFT:
RSB R12, R14, #0
LOOP2_APPLY_NEG_SHIFT:
LDR R4, [R8], #0x100
LDR R3, [R8], #0x100
SUBS R11, R11, #2
MOV R4, R4, LSL R12
SMLABB R6, R4, R4, R6
MOV R3, R3, LSL R12
SMLABB R6, R3, R3, R6
BGT LOOP2_APPLY_NEG_SHIFT
CONVERT_TO_MANT_EXP:
SUB R14, R14, #23
ADD R0, R0, #4
MOVS R6, R6
BEQ STORE_ZEROES
CLZ R12, R6
RSB R12, R12, #17
MOV R4, R6, ASR R12
SMULBB R11, R4, R9
ADD R12, R12, R14, LSL#1
MOV R11, R11, ASR #15
CMP R11, #0x00008000
MVNEQ R11, R11
STRH R11, [R7], #2
ADD R11, R1, R12
STRH R11, [R7], #2
SUBS R5, R5, #1
BGT LP_SBR_LOOP
B ENDCALC
STORE_ZEROES:
STR R6, [R7], #4
SUBS R5, R5, #1
BGT LP_SBR_LOOP
ENDCALC:
LDMFD sp!, {r4-r12, r15}

View file

@ -0,0 +1,172 @@
@/******************************************************************************
@ *
@ * Copyright (C) 2018 The Android Open Source Project
@ *
@ * Licensed under the Apache License, Version 2.0 (the "License");
@ * you may not use this file except in compliance with the License.
@ * You may obtain a copy of the License at:
@ *
@ * http:@www.apache.org/licenses/LICENSE-2.0
@ *
@ * Unless required by applicable law or agreed to in writing, software
@ * distributed under the License is distributed on an "AS IS" BASIS,
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ * See the License for the specific language governing permissions and
@ * limitations under the License.
@ *
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
.text
.p2align 2
.global ixheaacd_esbr_cos_sin_mod_loop1
ixheaacd_esbr_cos_sin_mod_loop1:
STMFD sp!, {r4-r12, r14}
VPUSH {D8-D11}
@generating load addresses
ADD r4, r0, r1, lsl #3 @psubband1
SUB r4, r4, #4
ADD r5, r3, r1, lsl #3 @psubband1_t
SUB r5, r5, #8
MOV r6, r1, ASR #2
LOOP1:
@first part
vld1.32 {d0} , [r2]!
vrev64.32 d1, d0
vld1.32 {d2[0]}, [r0]!
ADD r7, r0, #252
vld1.32 {d2[1]}, [r7]
vld1.32 {d3[0]}, [r4]
ADD r7, r4, #256
vld1.32 {d3[1]}, [r7]
SUB r4, r4, #4
VMULL.S32 q2, d0, d2 @qsub 2nd
VMULL.S32 q3, d0, d3 @add 2nd
VMULL.S32 q4, d1, d2 @add 1st
VMULL.S32 q5, d1, d3 @qsub 1st
vadd.I64 q0, q4, q3
VQSUB.S64 Q1, Q5, Q2
VSHRN.I64 D0, Q0, #32
VSHRN.I64 D2, Q1, #32
VMOV.32 D3, D0
VST2.32 {D0[0], D2[0]}, [R3]!
ADD r7, r3, #248
VST2.32 {D2[1], D3[1]}, [R7]
@second part
vld1.32 {d0} , [r2]!
vrev64.32 d1, d0
vld1.32 {d2[0]}, [r0]!
ADD R7, R0, #252
vld1.32 {d2[1]}, [r7]
vld1.32 {d3[0]}, [r4]
ADD R7, R4, #256
vld1.32 {d3[1]}, [r7]
SUB r4, r4, #4
VMULL.S32 q2, d0, d2 @add 2nd
VMULL.S32 q3, d0, d3 @sub 2nd
VMULL.S32 q4, d1, d2 @sub 1st
VMULL.S32 q5, d1, d3 @add 1st
VADD.I64 Q0, Q5, Q2
VQSUB.S64 Q1, Q4, Q3
VSHRN.I64 D0, Q0, #32
VSHRN.I64 D2, Q1, #32
VMOV.32 D3, D0
VST2.32 {D0[0], D2[0]}, [R5]
ADD R7, R5, #256
VST2.32 {D2[1], D3[1]}, [R7]
SUB r5, r5, #8
@Third part
vld1.32 {d0} , [r2]!
vrev64.32 d1, d0
vld1.32 {d2[0]}, [r0]!
ADD r7, r0, #252
vld1.32 {d2[1]}, [r7]
vld1.32 {d3[0]}, [r4]
ADD r7, r4, #256
vld1.32 {d3[1]}, [r7]
SUB r4, r4, #4
VMULL.S32 q2, d0, d2 @qsub 2nd
VMULL.S32 q3, d0, d3 @add 2nd
VMULL.S32 q4, d1, d2 @add 1st
VMULL.S32 q5, d1, d3 @qsub 1st
vadd.I64 q0, q4, q3
VQSUB.S64 Q1, Q5, Q2
VSHRN.I64 D0, Q0, #32
VSHRN.I64 D2, Q1, #32
VMOV.32 D3, D0
VST2.32 {D0[0], D2[0]}, [R3]!
ADD r7, r3, #248
VST2.32 {D2[1], D3[1]}, [R7]
@Fourth part
vld1.32 {d0} , [r2]!
vrev64.32 d1, d0
vld1.32 {d2[0]}, [r0]!
ADD R7, R0, #252
vld1.32 {d2[1]}, [r7]
vld1.32 {d3[0]}, [r4]
ADD R7, R4, #256
vld1.32 {d3[1]}, [r7]
SUB r4, r4, #4
VMULL.S32 q2, d0, d2 @add 2nd
VMULL.S32 q3, d0, d3 @sub 2nd
VMULL.S32 q4, d1, d2 @sub 1st
VMULL.S32 q5, d1, d3 @add 1st
VADD.I64 Q0, Q5, Q2
VQSUB.S64 Q1, Q4, Q3
VSHRN.I64 D0, Q0, #32
VSHRN.I64 D2, Q1, #32
VMOV.32 D3, D0
VST2.32 {D0[0], D2[0]}, [R5]
ADD R7, R5, #256
SUBS R6, R6, #1
VST2.32 {D2[1], D3[1]}, [R7]
SUB r5, r5, #8
BGT LOOP1
VPOP {D8-D11}
LDMFD sp!, {r4-r12, r15}

View file

@ -0,0 +1,180 @@
@/******************************************************************************
@ *
@ * Copyright (C) 2018 The Android Open Source Project
@ *
@ * Licensed under the Apache License, Version 2.0 (the "License");
@ * you may not use this file except in compliance with the License.
@ * You may obtain a copy of the License at:
@ *
@ * http:@www.apache.org/licenses/LICENSE-2.0
@ *
@ * Unless required by applicable law or agreed to in writing, software
@ * distributed under the License is distributed on an "AS IS" BASIS,
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ * See the License for the specific language governing permissions and
@ * limitations under the License.
@ *
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
.text
.p2align 2
.global ixheaacd_esbr_cos_sin_mod_loop2
ixheaacd_esbr_cos_sin_mod_loop2:
STMFD sp!, {r4-r12, r14}
VPUSH {D8-D15}
@generating load addresses
ADD R3, R0, R2, LSL #3 @psubband1 = &subband[2 * M - 1];
SUB R3, R3, #4
ADD R10, R0, #256
ADD R11, R10, R2, LSL #3
SUB R11, R11, #4
MOV R8, #-4
LDR R6, [R0]
MOV R4, R2, ASR #1 @M_2 = ixheaacd_shr32(M, 1);
SUB R4, R4, #1
ASR R6, R6, #1 @*psubband = *psubband >> 1;
VLD1.32 {D2[0]}, [R3]
STR R6, [R0], #4 @psubband++;
LDR R7, [R0]
ASR R7, R7, #1
RSB R6, R7, #0
STR R6, [R3], #-4
VLD1.32 {D3[0]}, [R3] @ im = *psubband1;
VLD2.32 {D0[0], D1[0]}, [R1]!
VDUP.32 D0, D0[0]
VDUP.32 D1, D1[0]
VLD1.32 {D2[1]}, [R11] @re = *psubband12;
LDR R6, [R10]
ASR R7, R6, #1
MOV R9, #0
QSUB R7, R9, R7
STR R7, [R11], #-4
LDR R6, [R10, #4]
ASR R6, R6, #1
STR R6, [R10], #4
VLD1.32 {D3[1]}, [R11]
VMULL.S32 q2, d0, d2 @qsub 2nd
VMULL.S32 q3, d0, d3 @add 2nd
VMULL.S32 q4, d1, d2 @add 1st
VMULL.S32 q5, d1, d3 @qsub 1st
vadd.I64 q6, q4, q3
VQSUB.S64 Q7, Q5, Q2
VQSUB.S64 Q8, Q2, Q5
VSHRN.I64 D12, Q6, #32
VSHRN.I64 D14, Q7, #32
VSHRN.I64 D16, Q8, #32
VST1.32 {D12[0]}, [R3], R8
VST1.32 {D14[0]}, [R0]!
VQNEG.S32 D12, D12
VST1.32 {D12[1]}, [R10]!
VST1.32 {D16[1]}, [R11], R8
LOOP1:
VLD1.32 {D2}, [R0]
VLD1.32 {D3}, [R10]
LDR R5, [R3] @RE2
LDR R6, [R11] @RE3
VTRN.32 D2, D3
VMULL.S32 q2, d0, d2 @qsub 2nd
VMULL.S32 q3, d0, d3 @add 2nd
VMULL.S32 q4, d1, d2 @add 1st
VMULL.S32 q5, d1, d3 @qsub 1st
vadd.I64 q6, q4, q3
VQSUB.S64 Q7, Q2, Q5
VQSUB.S64 Q8, Q5, Q2
VSHRN.I64 D12, Q6, #32
VSHRN.I64 D14, Q7, #32
VSHRN.I64 D16, Q8, #32
VST1.32 {D12[0]}, [R0]!
VST1.32 {D14[0]}, [R3], R8
VQNEG.S32 D12, D12
VST1.32 {D12[1]}, [R11], R8
VST1.32 {D16[1]}, [R10]!
@ second part
VLD2.32 {D0[0], D1[0]}, [R1]!
VDUP.32 D0, D0[0]
VDUP.32 D1, D1[0]
VMOV D3, R5, R6
VLD1.32 {D2[0]}, [R3]
VLD1.32 {D2[1]}, [R11]
VMULL.S32 q2, d0, d2 @qsub 2nd
VMULL.S32 q3, d0, d3 @add 2nd
VMULL.S32 q4, d1, d2 @add 1st
VMULL.S32 q5, d1, d3 @qsub 1st
vadd.I64 q6, q2, q5
VQSUB.S64 Q7, Q4, Q3
VQSUB.S64 Q8, Q3, Q4
VSHRN.I64 D12, Q6, #32
VSHRN.I64 D14, Q7, #32
VSHRN.I64 D16, Q8, #32
VST1.32 {D12[0]}, [R3], R8
VST1.32 {D14[0]}, [R0]!
VQNEG.S32 D12, D12
subs r4, r4, #1
VST1.32 {D12[1]}, [R10]!
VST1.32 {D16[1]}, [R11], R8
BGT LOOP1
VPOP {D8-D15}
LDMFD sp!, {r4-r12, r15}

View file

@ -0,0 +1,111 @@
@/******************************************************************************
@ *
@ * Copyright (C) 2018 The Android Open Source Project
@ *
@ * Licensed under the Apache License, Version 2.0 (the "License");
@ * you may not use this file except in compliance with the License.
@ * You may obtain a copy of the License at:
@ *
@ * http://www.apache.org/licenses/LICENSE-2.0
@ *
@ * Unless required by applicable law or agreed to in writing, software
@ * distributed under the License is distributed on an "AS IS" BASIS,
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ * See the License for the specific language governing permissions and
@ * limitations under the License.
@ *
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
.text
.p2align 2
.extern ixheaacd_esbr_cos_sin_mod
.hidden ixheaacd_esbr_cos_sin_mod
.global ixheaacd_esbr_fwd_modulation
ixheaacd_esbr_fwd_modulation:
STMFD sp!, {r4-r12, lr}
VPUSH {D8 - D15}
LDR R4, [R3]
ADD R5, R0, R4, LSL #3
MOV R6, R1
MOV R7, R2
LOOP1:
SUB R5, R5, #32
VLD1.32 {D0, D1, D2, D3}, [R0]!
VLD1.32 {D4, D5, D6, D7}, [R5]
VSHR.S32 Q0, Q0, #4
VSHR.S32 Q1, Q1, #4
VSHR.S32 Q2, Q2, #4
VSHR.S32 Q3, Q3, #4
vswp d4, d7
vswp d5, d6
vrev64.32 q2, q2
vrev64.32 q3, q3
VQSUB.S32 Q4, Q0, Q2
VQSUB.S32 Q5, Q1, Q3
VADD.S32 Q6, Q0, Q2
VADD.S32 Q7, Q1, Q3
SUBS R4, R4, #8
VST1.32 {D8, D9, D10, D11}, [R6]!
VST1.32 {D12, D13, D14, D15}, [R7]!
BGT LOOP1
STMFD sp!, {r0-r3, lr}
LDR R4, [SP, #124]
MOV R0, R1
MOV R1, R3
ldr R5, =0x41FC
ADD R2, R4, R5
ADD R3, R4, #0xB8
BL ixheaacd_esbr_cos_sin_mod
LDMFD sp!, {r0-r3, r14}
LDR R0, [R3, #0x5C]
LDRSH R4, [R3, #0x2C]
LDRSH R5, [R3, #0x2A]
SUB R4, R4, R5
LOOP2:
VLD2.32 {D0, D1}, [R0]!
VLD1.32 {D2}, [R1]
VLD1.32 {D3}, [R2]
VMULL.S32 q2, d0, d2
VMULL.S32 q3, d0, d3
VMULL.S32 q4, d1, d2
VMULL.S32 q5, d1, d3
VADD.I64 Q0, Q2, Q5
VQSUB.S64 Q1, Q3, Q4
VSHRN.I64 D0, Q0, #31
VSHRN.I64 D2, Q1, #31
SUBS R4, R4, #2
VST1.32 {D0}, [R1]!
VST1.32 {D2}, [R2]!
BGT LOOP2
VPOP {D8-D15}
LDMFD sp!, {r4-r12, r15}

View file

@ -0,0 +1,409 @@
@VOID ixheaacd_esbr_qmfsyn64_winadd(
@WORD32 *tmp1,
@WORD32 *tmp2,
@WORD32 *inp1,
@WORD32 *sample_buffer,
@WORD32 ch_fac)
@R0->Word32 *tmp1
@R1->Word32 *tmp2
@R2->Word32 *inp1
@R3->Word32 *sample_buffer
@R5->ch_fac
.text
.p2align 2
.global ixheaacd_esbr_qmfsyn64_winadd
ixheaacd_esbr_qmfsyn64_winadd: @ PROC
STMFD sp!, {R4-R12, R14}
VPUSH {D8- D15}
LDR R5, [SP, #104]
MOV R7, #0
VLD1.32 {D0, D1}, [R0]!
MOV R12, R2
VDUP.32 Q15, R7
VLD1.32 {D2, D3}, [R2]!
MOV R10, R0
MOV R11, R2
ADD R0, R0, #1008
ADD R2, R2, #496
MOV R6, #64
MOV R6, R6, LSL #2
ADD R12, R12, R6
MOV R7, #256
MOV R9, R7, LSL #1
ADD R1, R1, R9
MOV R6, #64
MOV R7, #256
MOV R9, R7, LSL #1 @(256*2)
MOV R7, #512
MOV R8, R7, LSL #1 @(512*2)
MOV R5, R5, LSL #2
VMOV Q13, Q15
VMOV Q14, Q15
VMLAL.S32 Q13, D0, D2
VMLAL.S32 Q14, D1, D3
VLD1.32 {D4, D5}, [R0], R8
VLD1.32 {D6, D7}, [R2], R9
VMLAL.S32 Q13, D6, D4
VMLAL.S32 Q14, D7, D5
VLD1.32 {D8, D9}, [R0], R8
VLD1.32 {D10, D11}, [R2], R9
VMLAL.S32 Q13, D10, D8
VMLAL.S32 Q14, D11, D9
VLD1.32 {D12, D13}, [R0], R8
VLD1.32 {D14, D15}, [R2], R9
VMLAL.S32 Q13, D12, D14
VMLAL.S32 Q14, D13, D15
VLD1.32 {D16, D17}, [R0], R8
VLD1.32 {D18, D19}, [R2], R9
VMLAL.S32 Q13, D16, D18
VMLAL.S32 Q14, D17, D19
MOV R0, R10
MOV R2, R11
VLD1.32 {D0, D1}, [R1]!
MOV R10, R1
VLD1.32 {D2, D3}, [R12]!
ADD R1, R1, #1008
MOV R11, R12
VMLAL.S32 Q13, D0, D2
VMLAL.S32 Q14, D1, D3
VLD1.32 {D4, D5}, [R1], R8
ADD R12, R12, #496
VLD1.32 {D6, D7}, [R12], R9
VMLAL.S32 Q13, D6, D4
VMLAL.S32 Q14, D7, D5
VLD1.32 {D8, D9}, [R1], R8
VLD1.32 {D10, D11}, [R12], R9
VMLAL.S32 Q13, D10, D8
VMLAL.S32 Q14, D11, D9
VLD1.32 {D12, D13}, [R1], R8
VLD1.32 {D14, D15}, [R12], R9
VMLAL.S32 Q13, D12, D14
VMLAL.S32 Q14, D13, D15
VLD1.32 {D16, D17}, [R1], R8
VLD1.32 {D18, D19}, [R12], R9
VMLAL.S32 Q13, D16, D18
VMLAL.S32 Q14, D17, D19
VSHRN.S64 D26 , Q13, #31
VST1.32 D26[0], [R3], R5
VST1.32 D26[1], [R3], R5
VSHRN.S64 D27 , Q14, #31
VST1.32 D27[0], [R3], R5
VST1.32 D27[1], [R3], R5
SUB R6, R6, #8
LOOP_1:
VLD1.32 {D0, D1}, [R0]!
MOV R12, R11
MOV R1, R10
VLD1.32 {D2, D3}, [R2]!
MOV R10, R0
ADD R0, R0, #1008
MOV R11, R2
ADD R2, R2, #496
VMOV Q13, Q15
VMOV Q14, Q15
VMLAL.S32 Q13, D0, D2
VMLAL.S32 Q14, D1, D3
VLD1.32 {D4, D5}, [R0], R8
VLD1.32 {D6, D7}, [R2], R9
VMLAL.S32 Q13, D6, D4
VMLAL.S32 Q14, D7, D5
VLD1.32 {D8, D9}, [R0], R8
VLD1.32 {D10, D11}, [R2], R9
VMLAL.S32 Q13, D10, D8
VMLAL.S32 Q14, D11, D9
VLD1.32 {D12, D13}, [R0], R8
VLD1.32 {D14, D15}, [R2], R9
VMLAL.S32 Q13, D12, D14
VMLAL.S32 Q14, D13, D15
VLD1.32 {D16, D17}, [R0], R8
VLD1.32 {D18, D19}, [R2], R9
VMLAL.S32 Q13, D16, D18
VMLAL.S32 Q14, D17, D19
MOV R0, R10
MOV R2, R11
VLD1.32 {D0, D1}, [R1]!
MOV R10, R1
VLD1.32 {D2, D3}, [R12]!
ADD R1, R1, #1008
MOV R11, R12
VMLAL.S32 Q13, D0, D2
VMLAL.S32 Q14, D1, D3
VLD1.32 {D4, D5}, [R1], R8
ADD R12, R12, #496
VLD1.32 {D6, D7}, [R12], R9
VMLAL.S32 Q13, D6, D4
VMLAL.S32 Q14, D7, D5
VLD1.32 {D8, D9}, [R1], R8
VLD1.32 {D10, D11}, [R12], R9
VMLAL.S32 Q13, D10, D8
VMLAL.S32 Q14, D11, D9
VLD1.32 {D12, D13}, [R1], R8
VLD1.32 {D14, D15}, [R12], R9
VMLAL.S32 Q13, D12, D14
VMLAL.S32 Q14, D13, D15
VLD1.32 {D16, D17}, [R1], R8
VLD1.32 {D18, D19}, [R12], R9
VMLAL.S32 Q13, D16, D18
VMLAL.S32 Q14, D17, D19
VSHRN.S64 D26 , Q13, #31
VST1.32 D26[0], [R3], R5
VST1.32 D26[1], [R3], R5
VSHRN.S64 D27 , Q14, #31
VST1.32 D27[0], [R3], R5
VST1.32 D27[1], [R3], R5
@@@
VLD1.32 {D0, D1}, [R0]!
MOV R12, R11
MOV R1, R10
VLD1.32 {D2, D3}, [R2]!
MOV R10, R0
VMOV Q13, Q15
VMLAL.S32 Q13, D0, D2
VMOV Q14, Q15
VMLAL.S32 Q14, D1, D3
ADD R0, R0, #1008
MOV R11, R2
VLD1.32 {D4, D5}, [R0], R8
ADD R2, R2, #496
VLD1.32 {D6, D7}, [R2], R9
VMLAL.S32 Q13, D6, D4
VMLAL.S32 Q14, D7, D5
VLD1.32 {D8, D9}, [R0], R8
VLD1.32 {D10, D11}, [R2], R9
VMLAL.S32 Q13, D8, D10
VMLAL.S32 Q14, D9, D11
VLD1.32 {D12, D13}, [R0], R8
VLD1.32 {D14, D15}, [R2], R9
VMLAL.S32 Q13, D12, D14
VMLAL.S32 Q14, D13, D15
VLD1.32 {D16, D17}, [R0], R8
VLD1.32 {D18, D19}, [R2], R9
VMLAL.S32 Q13, D16, D18
VMLAL.S32 Q14, D17, D19
MOV R0, R10
MOV R2, R11
VLD1.32 {D0, D1}, [R1]!
MOV R10, R1
VLD1.32 {D2, D3}, [R12]!
ADD R1, R1, #1008
VMLAL.S32 Q13, D0, D2
VMLAL.S32 Q14, D1, D3
MOV R11, R12
VLD1.32 {D4, D5}, [R1], R8
ADD R12, R12, #496
VLD1.32 {D6, D7}, [R12], R9
VMLAL.S32 Q13, D6, D4
VMLAL.S32 Q14, D7, D5
VLD1.32 {D8, D9}, [R1], R8
VLD1.32 {D10, D11}, [R12], R9
VMLAL.S32 Q13, D8, D10
VMLAL.S32 Q14, D9, D11
VLD1.32 {D12, D13}, [R1], R8
VLD1.32 {D14, D15}, [R12], R9
VMLAL.S32 Q13, D12, D14
VMLAL.S32 Q14, D13, D15
VLD1.32 {D16, D17}, [R1], R8
VLD1.32 {D18, D19}, [R12], R9
VMLAL.S32 Q13, D16, D18
VMLAL.S32 Q14, D17, D19
VSHRN.S64 D26 , Q13, #31
VST1.32 D26[0], [R3], R5
VST1.32 D26[1], [R3], R5
VSHRN.S64 D27 , Q14, #31
VST1.32 D27[0], [R3], R5
VST1.32 D27[1], [R3], R5
SUBS R6, R6, #8 @1
BGT LOOP_1
VLD1.32 {D0, D1}, [R0]!
MOV R12, R11
MOV R1, R10
VLD1.32 {D2, D3}, [R2]!
MOV R10, R0
VMOV Q13, Q15
VMLAL.S32 Q13, D0, D2
VMOV Q14, Q15
VMLAL.S32 Q14, D1, D3
ADD R0, R0, #1008
MOV R11, R2
VLD1.32 {D4, D5}, [R0], R8
ADD R2, R2, #496
VLD1.32 {D6, D7}, [R2], R9
VMLAL.S32 Q13, D6, D4
VMLAL.S32 Q14, D7, D5
VLD1.32 {D8, D9}, [R0], R8
VLD1.32 {D10, D11}, [R2], R9
VMLAL.S32 Q13, D8, D10
VMLAL.S32 Q14, D9, D11
VLD1.32 {D12, D13}, [R0], R8
VLD1.32 {D14, D15}, [R2], R9
VMLAL.S32 Q13, D12, D14
VMLAL.S32 Q14, D13, D15
VLD1.32 {D16, D17}, [R0], R8
VLD1.32 {D18, D19}, [R2], R9
VMLAL.S32 Q13, D16, D18
VMLAL.S32 Q14, D17, D19
MOV R0, R10
MOV R2, R11
VLD1.32 {D0, D1}, [R1]!
MOV R10, R1
VLD1.32 {D2, D3}, [R12]!
ADD R1, R1, #1008
VMLAL.S32 Q13, D0, D2
VMLAL.S32 Q14, D1, D3
MOV R11, R12
VLD1.32 {D4, D5}, [R1], R8
ADD R12, R12, #496
VLD1.32 {D6, D7}, [R12], R9
VMLAL.S32 Q13, D6, D4
VMLAL.S32 Q14, D7, D5
VLD1.32 {D8, D9}, [R1], R8
VLD1.32 {D10, D11}, [R12], R9
VMLAL.S32 Q13, D8, D10
VMLAL.S32 Q14, D9, D11
VLD1.32 {D12, D13}, [R1], R8
VLD1.32 {D14, D15}, [R12], R9
VMLAL.S32 Q13, D12, D14
VMLAL.S32 Q14, D13, D15
VLD1.32 {D16, D17}, [R1], R8
VLD1.32 {D18, D19}, [R12], R9
VMLAL.S32 Q13, D16, D18
VMLAL.S32 Q14, D17, D19
VSHRN.S64 D26 , Q13, #31
VST1.32 D26[0], [R3], R5
VST1.32 D26[1], [R3], R5
VSHRN.S64 D27, Q14, #31
VST1.32 D27[0], [R3], R5
VST1.32 D27[1], [R3], R5
VPOP {D8 - D15}
LDMFD sp!, {R4-R12, R15}
@ ENDP

View file

@ -0,0 +1,154 @@
@/******************************************************************************
@ *
@ * Copyright (C) 2018 The Android Open Source Project
@ *
@ * Licensed under the Apache License, Version 2.0 (the "License");
@ * you may not use this file except in compliance with the License.
@ * You may obtain a copy of the License at:
@ *
@ * http:@www.apache.org/licenses/LICENSE-2.0
@ *
@ * Unless required by applicable law or agreed to in writing, software
@ * distributed under the License is distributed on an "AS IS" BASIS,
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ * See the License for the specific language governing permissions and
@ * limitations under the License.
@ *
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
.global ixheaacd_esbr_radix4bfly
ixheaacd_esbr_radix4bfly:
STMFD sp!, {r4-r12, r14}
SUB sp, sp, #16
MOV r6, #6
MUL r7, r6, r3
MOV r4, r3
STR r7, [sp]
MOV r3, r3, lsl #1
STR r2, [sp, #8]
STR r4, [sp, #12]
STR r4, [sp, #4]
ADD r2, r1, r3, lsl #2
ADD r0, r0, #16
RADIX4_OUTLOOP:
RADIX4_INLOOP:
LDR r6, [r1]
LDR r7, [r2]
LDR r8, [r2, r3, lsl #2]
LDR r9, [r2, r3, lsl #3]
ADD r10, r6, r8
SUB r11, r6, r8
ADD r12, r7, r9
SUB r14, r7, r9
ADD r6, r10, r12
SUB r7, r10, r12
STR r6, [r1], #4
LDR r8, [r1]
LDR r6, [r2, #4]!
LDR r9, [r2, r3, lsl #2]!
LDR r10, [r2, r3, lsl #2]!
ADD r12, r8, r9
SUB r8, r8, r9
ADD r9, r6, r10
SUB r6, r6, r10
ADD r10, r12, r9
STR r10, [r1], #4
SUB r12, r12, r9
ADD r9, r11, r6
SUB r10, r11, r6
ADD r11, r8, r14
LDR r5, [r0], #4
LDR r4, [r0], #-12
SUB r6, r8, r14
RSB r5, r5, #0
SMULL r14, r8, r10, r5
SMLAL r14, r8, r11, r4
RSB r5, r5, #0
MOV r8, r8, lsl #1
STR r8, [r2], #-4
SMULL r14, r8, r10, r4
SMLAL r14, r8, r11, r5
LDR r11, [r0], #4
LDR r4, [r0], #-12
MOV r8, r8, lsl #1
STR r8, [r2], -r3, lsl #2
SMULL r10, r8, r7, r4
SMLAL r10, r8, r12, r11
LDR r14, [r0], #4
MOV r5, r8, lsl #1
RSB r11, r11, #0
SMULL r10, r8, r7, r11
SMLAL r10, r8, r12, r4
LDR r4, [r0], #36
STR r5, [r2], #4
MOV r7, r8, lsl #1
RSB r14, r14, #0
SMULL r11, r12, r9, r14
SMLAL r11, r12, r6, r4
RSB r14, r14, #0
STR r7, [r2], -r3, lsl #2
MOV r12, r12, lsl #1
SMULL r10, r7, r9, r4
SMLAL r10, r7, r6, r14
STR r12, [r2], #-4
MOV r7, r7, lsl #1
STR r7, [r2], #8
LDR r4, [sp, #4]
SUBS r4, r4, #1
STR r4, [sp, #4]
BGT RADIX4_INLOOP
LDR r8, [sp]
LDR r4, [sp, #12]
LDR r6, [sp, #8]
SUB r0, r0, r8, lsl #2
ADD r1, r1, r8, lsl #2
ADD r2, r2, r8, lsl #2
STR r4, [sp, #4]
SUBS r6, r6, #1
STR r6, [sp, #8]
BGT RADIX4_OUTLOOP
ADD sp, sp, #16
LDMFD sp!, {r4-r12, r15}

View file

@ -0,0 +1,113 @@
@/******************************************************************************
@ *
@ * Copyright (C) 2018 The Android Open Source Project
@ *
@ * Licensed under the Apache License, Version 2.0 (the "License");
@ * you may not use this file except in compliance with the License.
@ * You may obtain a copy of the License at:
@ *
@ * http://www.apache.org/licenses/LICENSE-2.0
@ *
@ * Unless required by applicable law or agreed to in writing, software
@ * distributed under the License is distributed on an "AS IS" BASIS,
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ * See the License for the specific language governing permissions and
@ * limitations under the License.
@ *
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
.text
.p2align 2
.global ixheaacd_expsubbandsamples_armv7
ixheaacd_expsubbandsamples_armv7:
STMFD sp!, {r4-r12}
LDR r7, [sp, #0x24]
LDR r10, [sp, #0x28]
SUB r11, r3, r2
MOV r12, #1
CMP r7, r10
BGE EXIT
MOVS r3, r11
BEQ EXIT
LDR r4, [sp, #0x2c]
CMP r4, #0
BEQ HQ_OUTER_LOOP
SUB r10, r10, r7
ADD r0, r0, r7, LSL #2
LDR r1, [r0], #4
OUTERLOOP:
MOV r3, r11
ADD r5, r1, r2, LSL #2
INLOOP:
LDR r4, [r5], #4
SUBS r3, r3, #2
LDRGE r8, [r5], #4
EOR r1 , r4 , r4, asr #31
ORR r12, r12, r1
EORGE r1 , r8 , r8, asr #31
ORRGE r12, r12, r1
BGT INLOOP
SUBS r10, r10, #1
LDR r1, [r0], #4
BGT OUTERLOOP
B EXIT
HQ_OUTER_LOOP:
LDR r6, [r0, r7, LSL #2]
LDR r5, [r1, r7, LSL #2]
ADD r6, r6, r2, LSL #2
ADD r5, r5, r2, LSL #2
MOV r4, r11
HQ_IN_LOOP:
LDR r8, [r6], #4
LDR r9, [r5], #4
SUBS r4, r4, #2
EOR r3 , r8 , r8, asr #31
ORR r12, r12, r3
EOR r3 , r9 , r9, asr #31
ORR r12, r12, r3
LDRGE r8, [r6], #4
LDRGE r9, [r5], #4
EORGE r3 , r8 , r8, asr #31
ORRGE r12, r12, r3
EORGE r3 , r9 , r9, asr #31
ORRGE r12, r12, r3
BGT HQ_IN_LOOP
INLOEN:
ADD r7, r7, #1
CMP r7, r10
BLT HQ_OUTER_LOOP
EXIT:
CLZ r0, r12
SUB r0, r0, #1
LDMFD sp!, {r4-r12}
BX lr

View file

@ -0,0 +1,49 @@
@/******************************************************************************
@ *
@ * Copyright (C) 2018 The Android Open Source Project
@ *
@ * Licensed under the Apache License, Version 2.0 (the "License");
@ * you may not use this file except in compliance with the License.
@ * You may obtain a copy of the License at:
@ *
@ * http://www.apache.org/licenses/LICENSE-2.0
@ *
@ * Unless required by applicable law or agreed to in writing, software
@ * distributed under the License is distributed on an "AS IS" BASIS,
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ * See the License for the specific language governing permissions and
@ * limitations under the License.
@ *
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
.text
.p2align 2
.global ixheaacd_fix_div_armv7
ixheaacd_fix_div_armv7:
EOR r12, r0, r1
MOVS r3, r1, ASR #1
RSBMI r3, r3, #0
MOVS r2, r0, ASR #1
RSBMI r2, r2, #0
MOV r0, #0
BEQ L2
MOV r1, #0xf
L1:
MOV r2, r2, LSL #1
CMP r2, r3
MOV r0, r0, LSL #1
ADDCS r0, r0, #1
SUBCS r2, r2, r3
SUBS r1, r1, #1
BGT L1
L2:
CMP r12, #0
RSBLT r0, r0, #0
BX lr

View file

@ -0,0 +1,860 @@
.text
.p2align 2
.global DSP_fft32x16_dit
DSP_fft32x16_dit:
STMFD sp!, {r4-r12, r14}
VPUSH {D8-D15}
@**************Variables Vs Registers*************************
@ r0 = *ptr_w
@ r1 = npoints
@ r2 = ptr_x and
@ r3 = ptr_y
@ r4 = pbit_rev_1024 and pdigRevTable
@ r5 = pbit_rev_512 and p_data1
@ r6 = pbit_rev_128 and p_data2
@ r7 = pbit_rev_32 and p_data3
@ r8 = power and nstages_4
@ r9 = stage_1_count
@ r10 = first_stage (8 or 4)
@ r11 = p_data4
@ r12 = bit reverse value
@ LDR r4,[sp,#0x68]
LDR r5, [sp, #0x68]
LDR r6, [sp, #0x68+4]
LDR r7, [sp, #0x68+8]
@ These conditions can be optimised to lesser number
@************************************************************************************
@COND_1 CMP r1, #0x400 @1024
@ BNE COND_2
@ @MOV r10, #4 @ because radix 8 first stage is by default
@ MOV r8, #4
@ B RADIX_4_FIRST_START
@line 59 "../../algo/aacdec/src/neon_asm/fft32x16ch_neon.s"
COND_2: CMP r1, #0x200 @512
BNE COND_3
@MOV r10, #8
MOV r8, #3
MOV r4, r5
B RADIX_8_FIRST_START
COND_3: CMP r1, #0x100
BNE COND_4
@MOV r10, #4
MOV r8, #3
MOV r4, r5
B RADIX_4_FIRST_START
COND_4: CMP r1, #0x80 @128
BNE COND_5
@MOV r10, #8
MOV r8, #2
MOV r4, r6
B RADIX_8_FIRST_START
COND_5: CMP r1, #0x40
BNE COND_6
@MOV r10, #4
MOV r8, #2
MOV r4, r6
B RADIX_4_FIRST_START
COND_6:
@MOV r10, #8
MOV r8, #1
MOV r4, r7
@**********************************************************************************
@CMP r10,#4
@BEQ RADIX_4_FIRST_START
RADIX_8_FIRST_START:
LSR r9 , r1, #5 @ LOOP count for first stage
LSL r1, r1, #1
RADIX_8_FIRST_LOOP:
MOV r5 , r2
MOV r6 , r2
MOV r7 , r2
MOV r11 , r2
@*************** Register mapping to data ****************************************
@ a_data0_r=q0
@ a_data0_i=q1
@ a_data2_r=q2
@ a_data2_i=q3
@ a_data4_r=q4
@ a_data4_i=q5
@ a_data6_r=q6
@ a_data6_i=q7
@ b_data0_r=q8
@ b_data0_i=q9
@ b_data2_r=q10
@ b_data2_i=q11
@ b_data4_r=q12
@ b_data4_i=q13
@ b_data6_r=q14
@ b_data6_i=q15
@*********************************************************************************
LDRB r12, [r4, #0]
ADD r5, r5, r12, LSL #3
VLD2.32 {d0[0], d2[0]}, [r5] , r1
ADD r5, r5, r1
VLD2.32 {d8[0], d10[0]}, [r5] , r1
SUB r5, r5, r1, LSL #1
VLD2.32 {d4[0], d6[0]}, [r5] , r1
ADD r5, r5, r1
VLD2.32 {d12[0], d14[0]}, [r5], r1
SUB r5, r5, r1, LSL #2
LDRB r12, [r4, #1]
ADD r6, r6, r12, LSL #3
VLD2.32 {d0[1], d2[1]}, [r6] , r1
ADD r6, r6, r1
VLD2.32 {d8[1], d10[1]}, [r6] , r1
SUB r6, r6, r1, LSL #1
VLD2.32 {d4[1], d6[1]}, [r6] , r1
ADD r6, r6, r1
VLD2.32 {d12[1], d14[1]}, [r6], r1
SUB r6, r6, r1, LSL #2
LDRB r12, [r4, #2]
ADD r7, r7, r12 , LSL #3
VLD2.32 {d1[0], d3[0]}, [r7] , r1
ADD r7, r7, r1
VLD2.32 {d9[0], d11[0]}, [r7] , r1
SUB r7, r7, r1, LSL #1
LDRB r12, [r4, #3]
ADD r11, r11, r12 , LSL #3
VLD2.32 {d1[1], d3[1]}, [r11] , r1
ADD r11, r11, r1
VLD2.32 {d9[1], d11[1]}, [r11] , r1
SUB r11, r11, r1, LSL #1
@VHADD.S32 q8, q0, q4 @b_data0_r=vhaddq_s32(a_data0_r_i.val[0],a_data4_r_i.val[0])@
VADD.I32 q8, q0, q4 @b_data0_r=vhaddq_s32(a_data0_r_i.val[0],a_data4_r_i.val[0])@
VLD2.32 {d5[0], d7[0]}, [r7] , r1
ADD r7, r7, r1
@VHSUB.S32 q9, q0, q4 @b_data4_r=vhsubq_s32(a_data0_r_i.val[0],a_data4_r_i.val[0])@
VSUB.I32 q9, q0, q4 @b_data4_r=vhsubq_s32(a_data0_r_i.val[0],a_data4_r_i.val[0])@
VLD2.32 {d13[0], d15[0]}, [r7], r1
SUB r7, r7, r1, LSL #2
@VHADD.S32 q0, q1, q5 @b_data0_i=vhaddq_s32(a_data0_r_i.val[1],a_data4_r_i.val[1])@
VADD.I32 q0, q1, q5 @b_data0_i=vhaddq_s32(a_data0_r_i.val[1],a_data4_r_i.val[1])@
VLD2.32 {d5[1], d7[1]}, [r11] , r1
ADD r11, r11, r1
@VHSUB.S32 q4, q1, q5 @b_data4_i=vhsubq_s32(a_data0_r_i.val[1],a_data4_r_i.val[1])@
VSUB.I32 q4, q1, q5 @b_data4_i=vhsubq_s32(a_data0_r_i.val[1],a_data4_r_i.val[1])@
VLD2.32 {d13[1], d15[1]}, [r11], r1
SUB r11, r11, r1, LSL #2
ADD r4, r4, #4
ADD r5, r5, r1, LSR #1
ADD r6, r6, r1, LSR #1
ADD r7, r7, r1, LSR #1
ADD r11, r11, r1, LSR #1
@VHADD.S32 q1, q2, q6 @b_data2_r=vhaddq_s32(a_data2_r_i.val[0],a_data6_r_i.val[0])@
VADD.I32 q1, q2, q6 @b_data2_r=vhaddq_s32(a_data2_r_i.val[0],a_data6_r_i.val[0])@
VLD2.32 {d28[0], d30[0]}, [r5] , r1 @a_data1_r_i=vld2q_lane_s32(__transfersize(2) p_data1,a_data1_r_i,0)@
@VHSUB.S32 q5, q2, q6 @b_data6_r=vhsubq_s32(a_data2_r_i.val[0],a_data6_r_i.val[0])@
VSUB.I32 q5, q2, q6 @b_data6_r=vhsubq_s32(a_data2_r_i.val[0],a_data6_r_i.val[0])@
VLD2.32 {d20[0], d22[0]}, [r5] , r1 @a_data3_r_i=vld2q_lane_s32(__transfersize(2) p_data1,a_data3_r_i,0)
@VHADD.S32 q2, q3, q7 @b_data2_i=vhaddq_s32(a_data2_r_i.val[1],a_data6_r_i.val[1])@
VADD.I32 q2, q3, q7 @b_data2_i=vhaddq_s32(a_data2_r_i.val[1],a_data6_r_i.val[1])@
VLD2.32 {d24[0], d26[0]}, [r5] , r1 @a_data5_r_i=vld2q_lane_s32(__transfersize(2) p_data1,a_data5_r_i,0)
@VHSUB.S32 q6, q3, q7 @b_data6_i=vhsubq_s32(a_data2_r_i.val[1],a_data6_r_i.val[1])@
VSUB.I32 q6, q3, q7 @b_data6_i=vhsubq_s32(a_data2_r_i.val[1],a_data6_r_i.val[1])@
VLD2.32 {d28[1], d30[1]}, [r6] , r1
VADD.S32 q3, q9, q6 @c_data4_r=vaddq_s32(b_data4_r,b_data6_i)@
VLD2.32 {d20[1], d22[1]}, [r6] , r1
VSUB.S32 q7, q9, q6 @c_data6_r=vsubq_s32(b_data4_r,b_data6_i)@
VLD2.32 {d24[1], d26[1]}, [r6] , r1
VSUB.S32 q6, q4, q5 @c_data4_i=vsubq_s32(b_data4_i,b_data6_r)@
VLD2.32 {d29[0], d31[0]}, [r7] , r1
VADD.S32 q9, q4, q5 @c_data6_i=vaddq_s32(b_data4_i,b_data6_r)@
VLD2.32 {d21[0], d23[0]}, [r7] , r1
VADD.S32 q4, q8, q1 @c_data0_r=vaddq_s32(b_data0_r,b_data2_r)@
VLD2.32 {d25[0], d27[0]}, [r7] , r1
VSUB.S32 q5, q8, q1 @c_data2_r=vsubq_s32(b_data0_r,b_data2_r)@
VLD2.32 {d29[1], d31[1]}, [r11] , r1
VADD.S32 q8, q0, q2 @c_data0_i=vaddq_s32(b_data0_i,b_data2_i)@
VLD2.32 {d21[1], d23[1]}, [r11] , r1
VSUB.S32 q0, q0, q2 @c_data2_i=vsubq_s32(b_data0_i,b_data2_i)@
VLD2.32 {d25[1], d27[1]}, [r11] , r1
VPUSH {q3} @ VPUSH(c_data4_r, c_data6_r)
VPUSH {q7}
VLD2.32 {d2[0], d4[0]}, [r5], r1 @a_data7_r_i=vld2q_lane_s32(__transfersize(2) p_data1,a_data7_r_i,0)
@VHADD.S32 q7, q14, q12 @b_data1_r=vhaddq_s32(a_data1_r,a_data5_r)@
VADD.I32 q7, q14, q12 @b_data1_r=vhaddq_s32(a_data1_r,a_data5_r)@
VLD2.32 {d2[1], d4[1]}, [r6] , r1
@VHSUB.S32 q3, q14, q12 @b_data5_r=vhsubq_s32(a_data1_r,a_data5_r)@
VSUB.I32 q3, q14, q12 @b_data5_r=vhsubq_s32(a_data1_r,a_data5_r)@
VLD2.32 {d3[0], d5[0]}, [r7] , r1
@VHADD.S32 q14, q15, q13 @b_data1_i=vhaddq_s32(a_data1_i,a_data5_i)@
VADD.I32 q14, q15, q13 @b_data1_i=vhaddq_s32(a_data1_i,a_data5_i)@
VLD2.32 {d3[1], d5[1]}, [r11] , r1
@VHSUB.S32 q12, q15, q13 @b_data5_i=vhsubq_s32(a_data1_i,a_data5_i)@
VSUB.I32 q12, q15, q13 @b_data5_i=vhsubq_s32(a_data1_i,a_data5_i)@
@VHADD.S32 q15, q10,q1 @b_data3_r=vhaddq_s32(a_data3_r,a_data7_r)@
@VHSUB.S32 q13, q10,q1 @b_data7_r=vhsubq_s32(a_data3_r,a_data7_r)@
@VHADD.S32 q10, q11, q2 @b_data3_i=vhaddq_s32(a_data3_i,a_data7_i)@
@VHSUB.S32 q1, q11, q2 @b_data7_i=vhsubq_s32(a_data3_i,a_data7_i)@
VADD.I32 q15, q10, q1 @b_data3_r=vhaddq_s32(a_data3_r,a_data7_r)@
VSUB.I32 q13, q10, q1 @b_data7_r=vhsubq_s32(a_data3_r,a_data7_r)@
VADD.I32 q10, q11, q2 @b_data3_i=vhaddq_s32(a_data3_i,a_data7_i)@
VSUB.I32 q1, q11, q2 @b_data7_i=vhsubq_s32(a_data3_i,a_data7_i)@
VADD.S32 q11, q7, q15 @c_data1_r=vaddq_s32(b_data1_r,b_data3_r)@
VSUB.S32 q2, q7, q15 @c_data3_r=vsubq_s32(b_data1_r,b_data3_r)@
VADD.S32 q7, q14, q10 @c_data1_i=vaddq_s32(b_data1_i,b_data3_i)@
VSUB.S32 q15, q14, q10 @c_data3_i=vsubq_s32(b_data1_i,b_data3_i)@
VADD.S32 q14, q3, q12 @c_data5_r=vaddq_s32(b_data5_r,b_data5_i)@
VSUB.S32 q10, q3, q12 @c_data5_i=vsubq_s32(b_data5_r,b_data5_i)@
VADD.S32 q3, q13, q1 @c_data7_r=vaddq_s32(b_data7_r,b_data7_i)@
VSUB.S32 q12, q13, q1 @c_data7_i=vsubq_s32(b_data7_r,b_data7_i)@
VADD.S32 q1 , q14, q12 @b_data5_r=vaddq_s32(c_data7_i,c_data5_r)@
VSUB.S32 q13, q14, q12 @b_data7_i=vsubq_s32(c_data5_r,c_data7_i)@
VSUB.S32 q12, q3, q10 @b_data5_i=vsubq_s32(c_data7_r,c_data5_i)@
VUZP.16 d2, d3 @ D0 = b_data5_r_low, D1= b_data5_r_high
VADD.S32 q14, q3, q10 @b_data7_r=vaddq_s32(c_data5_i,c_data7_r)@
VUZP.16 d26, d27
VADD.S32 q3, q4, q11 @b_data0_r=vaddq_s32(c_data0_r,c_data1_r)@
VUZP.16 d24, d25
VSUB.S32 q10, q4, q11 @b_data1_r=vsubq_s32(c_data0_r,c_data1_r)@
VUZP.16 d28, d29
VADD.S32 q4, q8, q7 @b_data0_i=vaddq_s32(c_data0_i,c_data1_i)@
LDR r14, = 0x5a82
VSUB.S32 q11, q8, q7 @b_data1_i=vsubq_s32(c_data0_i,c_data1_i)@
VADD.S32 q8, q5, q15 @b_data2_r=vaddq_s32(c_data2_r,c_data3_i)@
VSUB.S32 q7, q5, q15 @b_data3_r=vsubq_s32(c_data2_r,c_data3_i)@
VSUB.S32 q5, q0, q2 @b_data2_i=vsubq_s32(c_data2_i,c_data3_r)@
VADD.S32 q15, q0, q2 @b_data3_i=vaddq_s32(c_data2_i,c_data3_r)@
VPOP {q0}
VPOP {q2}
VPUSH {q3-q4}
VPUSH {q10}
@********************************************************************
@ b_data5_r = q1 free regs = q3,q4,q5,q7,q8,q10,q11
@ b_data5_i = q12
@ b_data7_r = q14
@ b_data7_i = q13
@ c_data4_r = q2
@ c_data4_i = q6
@ c_data6_r = q0
@ c_data6_i = q9
@********************************************************************
VDUP.16 d20, r14
VMULL.u16 q4, d26, d20
VMULL.u16 q3, d28, d20
VPUSH {q7-q8}
VPUSH {q5}
VSHR.S32 q4, q4, #15
VSHR.S32 q3, q3, #15
VQDMLAL.S16 q4, d27, d20
VQDMLAL.S16 q3, d29, d20
VPUSH {q11}
VMULL.u16 q13, d24, d20
VMULL.u16 q14, d2, d20
VADD.S32 q5, q2, q4 @q5=b_data7_i
VSUB.S32 q7, q2, q4 @q7=b_data4_r
VADD.S32 q8, q6, q3 @q10 = b_data4_i
VSUB.S32 q6, q6, q3 @q11 = b_data7_r
VSHR.S32 q13, q13, #15
VSHR.S32 q14, q14, #15
VQDMLAL.S16 q13, d25, d20
VQDMLAL.S16 q14, d3, d20
VPOP {q1}
VPOP {q10}
VADD.S32 q2, q0, q13 @q2 = b_data5_i
VSUB.S32 q4, q0, q13 @q4 = b_data6_r
VADD.S32 q11, q9, q14 @q6 = b_data6_i
VSUB.S32 q3, q9, q14 @q8 = b_data5_r
VPOP {q14}
VPOP {q9}
VPOP {q0}
VPOP {q12, q13}
@**************regs maping************
@b_data0_r = q12
@b_data0_i = q13
@b_data1_r = q0
@b_data1_i = q1
@b_data2_r = q9
@b_data2_i = q10
@b_data3_r = q14
@b_data3_i = q15
@b_data4_r = q7
@b_data4_i = q8
@b_data5_r = q3
@b_data5_i = q2
@b_data6_r = q4
@b_data6_i = q11
@b_data7_r = q6
@b_data7_i = q5
@******************************************
@shifts added (as dual simd instrn)
VTRN.32 q12, q5
@line 455 "../../algo/aacdec/src/neon_asm/fft32x16ch_neon.s"
VSHL.S32 q12, q12, #3 @ch
VTRN.32 q9, q2
VSHL.S32 q5, q5, #3 @ch
VSHL.S32 q9, q9, #3 @ch
VTRN.32 q0, q7
VSHL.S32 q2, q2, #3 @ch
VSHL.S32 q0, q0, #3 @ch
VTRN.32 q14, q4
VSHL.S32 q7, q7, #3 @ch
VSHL.S32 q14, q14, #3 @ch
VTRN.32 q13, q6
VSHL.S32 q4, q4, #3 @ch
VSHL.S32 q13, q13, #3 @ch
VTRN.32 q10, q3
VSHL.S32 q6, q6, #3 @ch
VSHL.S32 q10, q10, #3 @ch
VTRN.32 q1, q8
VSHL.S32 q3, q3, #3 @ch
VSHL.S32 q1, q1, #3 @ch
VTRN.32 q15, q11
VSHL.S32 q8, q8, #3 @ch
VSHL.S32 q15, q15, #3 @ch
VSWP d18, d25
VSHL.S32 q11, q11, #3 @ch
VSWP d4, d11
VSWP d1, d28
VSWP d15, d8
VSWP d20, d27
VSWP d6, d13
VSWP d30, d3
VSWP d22, d17
VST2.32 {q12, q13}, [r3]!
VST2.32 {q0, q1}, [r3]!
VST2.32 {q5, q6}, [r3]!
VST2.32 {q7, q8}, [r3]!
VMOV q5, q11
VST2.32 {q9, q10}, [r3]!
VST2.32 {q14, q15}, [r3]!
VST2.32 {q2, q3}, [r3]!
VST2.32 {q4, q5}, [r3]!
SUBS r9, r9, #1
BNE RADIX_8_FIRST_LOOP
LSR r1, r1, #1
SUB r3, r1, LSL #3
MOV r5, #8
MOV r4, #32
LSR r6, r1, #5
B RADIX_4_FIRST_ENDS
RADIX_8_FIRST_ENDS:
@************************************RADIX 4 FIRST STAGE**********************************
RADIX_4_FIRST_START:
LSR r9 , r1, #4 @ LOOP count for first stage
LSL r1, r1, #1
RADIX_4_LOOP:
MOV r5 , r2
MOV r6 , r2
MOV r7 , r2
MOV r11 , r2
@*************** Register mapping to data ****************************************
@ a_data0_r=q0
@ a_data0_i=q1
@ a_data1_r=q2
@ a_data1_i=q3
@ a_data2_r=q4
@ a_data2_i=q5
@ a_data3_r=q6
@ a_data4_i=q7
@*********************************************************************************
LDRB r12, [r4, #0]
ADD r5, r5, r12, LSL #3
VLD2.32 {d0[0], d2[0]}, [r5] , r1
ADD r5, r5, r1
VLD2.32 {d8[0], d10[0]}, [r5] , r1
SUB r5, r5, r1, LSL #1
VLD2.32 {d4[0], d6[0]}, [r5] , r1
ADD r5, r5, r1
VLD2.32 {d12[0], d14[0]}, [r5], r1
LDRB r12, [r4, #1]
ADD r6, r6, r12, LSL #3
VLD2.32 {d0[1], d2[1]}, [r6] , r1
ADD r6, r6, r1
VLD2.32 {d8[1], d10[1]}, [r6] , r1
SUB r6, r6, r1, LSL #1
VLD2.32 {d4[1], d6[1]}, [r6] , r1
ADD r6, r6, r1
VLD2.32 {d12[1], d14[1]}, [r6], r1
LDRB r12, [r4, #2]
ADD r7, r7, r12, LSL #3
VLD2.32 {d1[0], d3[0]}, [r7] , r1
ADD r7, r7, r1
VLD2.32 {d9[0], d11[0]}, [r7] , r1
LDRB r12, [r4, #3]
ADD r11, r11, r12 , LSL #3
VLD2.32 {d1[1], d3[1]}, [r11] , r1
ADD r11, r11, r1
VLD2.32 {d9[1], d11[1]}, [r11] , r1
SUB r7, r7, r1, LSL #1
VADD.S32 q8, q0, q4 @b_data0_r=vaddq_s32(a_data0_r,a_data2_r)@
VLD2.32 {d5[0], d7[0]}, [r7] , r1
ADD r7, r7, r1
VADD.S32 q9, q1, q5 @b_data0_i=vaddq_s32(a_data0_i,a_data2_i)@
VLD2.32 {d13[0], d15[0]}, [r7], r1
SUB r11, r11, r1, LSL #1
VSUB.S32 q10, q0, q4 @b_data2_r=vsubq_s32(a_data0_r,a_data2_r)@
VLD2.32 {d5[1], d7[1]}, [r11] , r1
ADD r11, r11, r1
VSUB.S32 q11, q1, q5 @b_data2_i=vsubq_s32(a_data0_i,a_data2_i)@
VLD2.32 {d13[1], d15[1]}, [r11], r1
ADD r4, r4, #4
VADD.S32 q12, q2, q6 @b_data1_r=vaddq_s32(a_data1_r,a_data3_r)@
VADD.S32 q13, q3, q7 @b_data1_i=vaddq_s32(a_data1_i,a_data3_i)@
VSUB.S32 q14, q2, q6 @b_data3_r=vsubq_s32(a_data1_r,a_data3_r)@
VSUB.S32 q15, q3, q7 @b_data3_i=vsubq_s32(a_data1_i,a_data3_i)@
VADD.S32 q0, q8, q12 @a_data0_r=vaddq_s32(b_data0_r,b_data1_r)@
VADD.S32 q1, q9, q13 @a_data0_i=vaddq_s32(b_data0_i,b_data1_i)@
VSUB.S32 q2, q8, q12 @a_data1_r=vsubq_s32(b_data0_r,b_data1_r)@
VSUB.S32 q3, q9, q13 @a_data1_i=vsubq_s32(b_data0_i,b_data1_i)@
VADD.S32 q4, q10, q15 @a_data2_r=vaddq_s32(b_data2_r,b_data3_i)@
VSUB.S32 q5, q11, q14 @a_data2_i=vsubq_s32(b_data2_i,b_data3_r)@
VADD.S32 q7, q11, q14 @a_data3_r=vaddq_s32(b_data2_i,b_data3_r)@
VSUB.S32 q6, q10, q15 @a_data3_i=vsubq_s32(b_data2_r,b_data3_i)@
@shifts added
VTRN.32 q0, q4
VSHL.S32 q0, q0, #2 @ch
VTRN.32 q2, q6
VSHL.S32 q4, q4, #2 @ch
VSHL.S32 q2, q2, #2 @ch
VTRN.32 q1, q5 @ch
VSHL.S32 q6, q6, #2 @ch
VSHL.S32 q1, q1, #2 @ch
VTRN.32 q3, q7 @ch
VSHL.S32 q5, q5, #2 @ch
VSHL.S32 q3, q3, #2 @ch
VSWP d4, d1
VSHL.S32 q7, q7, #2 @ch
VSWP d12, d9
@VTRN.32 q1, q5
@VTRN.32 q3, q7
VSWP d6, d3
VSWP d14, d11
VST2.32 {q0, q1}, [r3]!
VST2.32 {q4, q5}, [r3]!
VST2.32 {q2, q3}, [r3]!
VST2.32 {q6, q7}, [r3]!
SUBS r9, r9, #1
BNE RADIX_4_LOOP
LSR r1, r1, #1
SUB r3, r1, LSL #3
MOV r5, #4
MOV r4, #64
LSR r6, r1, #4
RADIX_4_FIRST_ENDS:
@********************************END OF RADIX 4 FIRST STAGE*******************************
@*************** register assignment after first radix 8 stage****************************
@ r1 = npoints
@ r0 = *ptr_w
@ r3 = *ptr_y
@ r8 = nstages_4
@ free regs r2, r4,r5,r6,r7,r9,r10,r11,r12
@ r2 = j
@ r4 = node_spacing
@ r5 = del
@ r6 = in_loop_count
@ r7 = middle_loop_count (del*node_spacing)
@ r9 = p_twiddle_factors
@ r10= p_twiddle_factors and inner loop counter
@ r11=
@ r12=
@ r14= *data
PUSH {r3}
LSR r5, r5, #2
OUTER_LOOP_R4:
LDR r14, [sp]
@MOV r14,r3
@LSR r7,r5,#0 @,#2
MOV r7, r5
MOV r2, #0
MOV r9, r0
LSL r12 , r5, #5
MIDDLE_LOOP_R4:
VLD2.16 {d0[0], d1[0]}, [r9], r2 @cos_1 = d0 , sin_1=d1
VLD2.16 {d2[0], d3[0]}, [r9], r2 @cos_2 = d2 , sin_2=d3
ADD r11, r2, r4, LSL #2
VLD2.16 {d4[0], d5[0]}, [r9] @cos_3 = d4 , sin_3=d5
ADD r10, r0, r11
VLD2.16 {d0[1], d1[1]}, [r10], r11
VLD2.16 {d2[1], d3[1]}, [r10], r11
ADD r2, r11, r4, LSL #2
VLD2.16 {d4[1], d5[1]}, [r10]
ADD r9, r0, r2
VLD2.16 {d0[2], d1[2]}, [r9], r2
VLD2.16 {d2[2], d3[2]}, [r9], r2
ADD r11, r2, r4, LSL #2
VLD2.16 {d4[2], d5[2]}, [r9]
ADD r10, r0, r11
VLD2.16 {d0[3], d1[3]}, [r10], r11
VLD2.16 {d2[3], d3[3]}, [r10], r11
ADD r2, r11, r4, LSL #2
VLD2.16 {d4[3], d5[3]}, [r10]
ADD r9, r0, r2
MOV r10, r6
INNER_LOOP_R4:
VLD2.32 {q3, q4}, [r14], r12
VSHR.S32 q3, q3, #1
VLD4.16 {q5, q6}, [r14], r12 @a_data1_r_l=d10 , a_data1_r_h=d11, a_data1_i_l=d12, a_data1_i_h=d13
VSHR.S32 q4, q4, #1
VSHR.U16 d10, d10, #1 @a_data1.val[0]= vreinterpret_s16_u16(vshr_n_u16(vreinterpret_u16_s16(a_data1.val[0]), 1))@
VLD4.16 {q7, q8}, [r14], r12
VSHR.U16 d12, d12, #1 @a_data1.val[2]= vreinterpret_s16_u16(vshr_n_u16(vreinterpret_u16_s16(a_data1.val[2]), 1))@
VMULL.S16 q11, d10, d0 @prod_1r=vmull_s16(a_data1.val[0], cos_1)@
VMLSL.S16 q11, d12, d1 @prod_1r=vmlsl_s16(prod_1r, a_data1.val[2], sin_1)@
VLD4.16 {q9, q10}, [r14], r12
VMULL.S16 q12, d10, d1 @prod_1i=vmull_s16(a_data1.val[0], sin_1)@
VMLAL.S16 q12, d12, d0 @prod_1i=vmlal_s16(prod_1i, a_data1.val[2], cos_1)@
VSHR.U16 d14, d14, #1 @a_data2.val[0]=vreinterpret_s16_u16(vshr_n_u16(vreinterpret_u16_s16(a_data2.val[0]), 1))@
VSHR.U16 d16, d16, #1 @a_data2.val[2]=vreinterpret_s16_u16(vshr_n_u16(vreinterpret_u16_s16(a_data2.val[2]), 1))@
SUB r14, r14, r12, LSL #2
VSHR.U16 d18, d18, #1 @a_data3.val[0]= vreinterpret_s16_u16(vshr_n_u16(vreinterpret_u16_s16(a_data3.val[0]), 1))@
VSHR.U16 d20, d20, #1 @a_data3.val[2]= vreinterpret_s16_u16(vshr_n_u16(vreinterpret_u16_s16(a_data3.val[2]), 1))@
VMULL.S16 q13, d14, d2 @prod_2r=vmull_s16(a_data2.val[0], cos_2)@
VMLSL.S16 q13, d16, d3 @prod_2r=vmlsl_s16(prod_2r, a_data2.val[2], sin_2)@
VSHR.S32 q11, q11, #15 @a_data1_r=vshrq_n_s32(prod_1r,15)@
VMULL.S16 q14, d14, d3 @prod_2i=vmull_s16(a_data2.val[0], sin_2)@
VMLAL.S16 q14, d16, d2 @prod_2i=vmlal_s16(prod_2i, a_data2.val[2], cos_2)@
VMULL.S16 q15, d18, d4 @prod_3r=vmull_s16(a_data3.val[0], cos_3)@
VMLSL.S16 q15, d20, d5 @prod_3r=vmlsl_s16(prod_3r, a_data3.val[2], sin_3)@
VMLAL.S16 q11, d11, d0 @a_data1_r=vmlal_s16(a_data1_r, a_data1.val[1], cos_1)@
VMLSL.S16 q11, d13, d1 @a_data1_r=vmlsl_s16(a_data1_r, a_data1.val[3], sin_1)@
VSHR.S32 q12, q12, #15 @a_data1_i=vshrq_n_s32(prod_1i,15)@
VSHR.S32 q13, q13, #15 @a_data2_r=vshrq_n_s32(prod_2r,15)@
VSHR.S32 q14, q14, #15 @a_data2_i=vshrq_n_s32(prod_2i,15)@
VSHR.S32 q15, q15, #15 @a_data3_r=vshrq_n_s32(prod_3r,15)@
VMLAL.S16 q12, d11, d1 @a_data1_i=vmlal_s16(a_data1_i, a_data1.val[1], sin_1)@
VMLAL.S16 q12, d13, d0 @a_data1_i=vmlal_s16(a_data1_i, a_data1.val[3], cos_1)@
VMULL.S16 q5, d18, d5 @prod_3i=vmull_s16(a_data3.val[0], sin_3)@
VMLAL.S16 q5, d20, d4 @prod_3i=vmlal_s16(prod_3i, a_data3.val[2], cos_3)@
VMLAL.S16 q13, d15, d2 @a_data2_r=vmlal_s16(a_data2_r, a_data2.val[1], cos_2)@
VMLSL.S16 q13, d17, d3 @a_data2_r=vmlsl_s16(a_data2_r, a_data2.val[3], sin_2)@
VMLAL.S16 q14, d15, d3 @a_data2_i=vmlal_s16(a_data2_i, a_data2.val[1], sin_2)@
VMLAL.S16 q14, d17, d2 @a_data2_i=vmlal_s16(a_data2_i, a_data2.val[3], cos_2)@
VMLAL.S16 q15, d19, d4 @a_data3_r=vmlal_s16(a_data3_r, a_data3.val[1], cos_3)@
VMLSL.S16 q15, d21, d5 @a_data3_r=vmlsl_s16(a_data3_r, a_data3.val[3], sin_3)@
VSHR.S32 q5, q5, #15 @a_data3_i=vshrq_n_s32(prod_3i,15)@
VMLAL.S16 q5, d19, d5 @a_data3_i=vmlal_s16(a_data3_i, a_data3.val[1], sin_3)@
VMLAL.S16 q5, d21, d4 @a_data3_i=vmlal_s16(a_data3_i, a_data3.val[3], cos_3)@
@**********if condition******************
CMP r7, r5
BNE BYPASS_IF
ADD r14, r14, r12
LDR r3, [r14], r12
ASR r3, r3, #1
VMOV.32 d22[0], r3
LDR r3, [r14], r12
ASR r3, r3, #1
VMOV.32 d26[0], r3
LDR r3, [r14]
ASR r3, r3, #1
VMOV.32 d30[0], r3
SUB r14, r14, r12, LSL #1
ADD r14, r14, #4
LDR r3, [r14], r12
ASR r3, r3, #1
VMOV.32 d24[0], r3
LDR r3, [r14], r12
ASR r3, r3, #1
VMOV.32 d28[0], r3
LDR r3, [r14], r12
ASR r3, r3, #1
VMOV.32 d10[0], r3
SUB r14, r14, #4
SUB r14, r14, r12, LSL #2
@****************************************
BYPASS_IF:
VADD.S32 q6, q3, q13 @b_data0_r=vaddq_s32(a_data0_r,a_data2_r)@
VADD.S32 q7, q4, q14 @b_data0_i=vaddq_s32(a_data0_i,a_data2_i)@
VSUB.S32 q3, q3, q13 @b_data2_r=vsubq_s32(a_data0_r,a_data2_r)@
VSUB.S32 q4, q4, q14 @b_data2_i=vsubq_s32(a_data0_i,a_data2_i)@
VADD.S32 q8, q11, q15 @b_data1_r=vaddq_s32(a_data1_r,a_data3_r)@
VADD.S32 q9, q12, q5 @b_data1_i=vaddq_s32(a_data1_i,a_data3_i)@
VSUB.S32 q15, q11, q15 @b_data3_r=vsubq_s32(a_data1_r,a_data3_r)@
VSUB.S32 q14, q12, q5 @b_data3_i=vsubq_s32(a_data1_i,a_data3_i)@
@line 882 "../../algo/aacdec/src/neon_asm/fft32x16ch_neon.s"
VADD.S32 q10, q6, q8 @c_data0_r=vaddq_s32(b_data0_r,b_data1_r)@
VADD.S32 q11, q7, q9 @c_data0_i=vaddq_s32(b_data0_i,b_data1_i)@
VADD.S32 q12, q3, q14 @c_data2_r=vaddq_s32(b_data2_r,b_data3_i)@
VSUB.S32 q13, q4, q15 @c_data2_i=vsubq_s32(b_data2_i,b_data3_r)@
VSUB.S32 q6, q6, q8 @c_data1_r=vsubq_s32(b_data0_r,b_data1_r)@
VST2.32 {q10, q11}, [r14], r12 @ storing (c_data0_r,c_data0_i)
VSUB.S32 q7, q7, q9 @c_data1_i=vsubq_s32(b_data0_i,b_data1_i)@
VSUB.S32 q8, q3, q14 @c_data3_i=vsubq_s32(b_data2_r,b_data3_i)@
VST2.32 {q12, q13}, [r14], r12 @ storing (c_data2_r,c_data2_i)
VADD.S32 q9, q4, q15 @c_data3_r=vaddq_s32(b_data2_i,b_data3_r)@
VST2.32 {q6, q7}, [r14], r12 @ storing (c_data1_r,c_data1_i)
VST2.32 {q8, q9}, [r14], r12 @ storing (c_data3_i,c_data3_r)
SUBS r10, r10, #1
BNE INNER_LOOP_R4
SUB r14, r14, r1, LSL #3
ADD r14, r14, #32
SUBS r7, r7, #1
BNE MIDDLE_LOOP_R4
LSR r4, r4, #2
LSL r5, r5, #2
LSR r6, r6, #2
SUBS r8, r8, #1
BNE OUTER_LOOP_R4
END_LOOPS:
POP {r3}
VPOP {D8-D15}
LDMFD sp!, {r4-r12, r15}

View file

@ -0,0 +1,373 @@
.text
.p2align 2
.global ixheaacd_fft32x32_ld2_armv7
ixheaacd_fft32x32_ld2_armv7:
STMFD sp!, {r4-r12, r14}
@DIT Radix-4 FFT First Stage
@First Butterfly
MOV r0, r2
MOV r1, r3
LDR r2, [r0] @x_0 = x[0 ]
LDR r3, [r0, #32] @x_2 = x[8 ]
LDR r4, [r0, #64] @x_4 = x[16]
LDR r5, [r0, #96] @x_6 = x[24]
ADD r6, r2, r4 @xh0_0 = x_0 + x_4
SUB r7, r2, r4 @xl0_0 = x_0 - x_4
ADD r8, r3, r5 @xh0_1 = x_2 + x_6
SUB r9, r3, r5 @xl0_1 = x_2 - x_6
LDR r2, [r0, #4] @x_1 = x[0 +1]
LDR r3, [r0, #36] @x_3 = x[8 +1]
LDR r4, [r0, #68] @x_5 = x[16+1]
LDR r5, [r0, #100] @x_7 = x[24+1]
ADD r10, r2, r4 @xh1_0 = x_1 + x_5
SUB r11, r2, r4 @xl1_0 = x_1 - x_5
ADD r12, r3, r5 @xh1_1 = x_3 + x_7
SUB r14, r3, r5 @xl1_1 = x_3 - x_7
ADD r2, r6, r8 @n00 = xh0_0 + xh0_1
ADD r3, r7, r14 @n10 = xl0_0 + xl1_1
SUB r4, r6, r8 @n20 = xh0_0 - xh0_1
SUB r5, r7, r14 @n30 = xl0_0 - xl1_1
STR r2, [r0] @x[0 ] = n00
STR r3, [r0, #32] @x[8 ] = n10
STR r4, [r0, #64] @x[16] = n20
STR r5, [r0, #96] @x[24] = n30
ADD r2, r10, r12 @n01 = xh1_0 + xh1_1
SUB r3, r11, r9 @n11 = xl1_0 - xl0_1
SUB r4, r10, r12 @n21 = xh1_0 - xh1_1
ADD r5, r11, r9 @n31 = xl1_0 + xl0_1
STR r2, [r0, #4] @x[1 ] = n01
STR r3, [r0, #36] @x[8+1 ] = n11
STR r4, [r0, #68] @x[16+1] = n21
STR r5, [r0, #100] @x[24+1] = n31
@Second Butterfly
LDR r2, [r0, #8] @x_0 = x[2 ]
LDR r3, [r0, #40] @x_2 = x[10]
LDR r4, [r0, #72] @x_4 = x[18]
LDR r5, [r0, #104] @x_6 = x[26]
ADD r6, r2, r4 @xh0_0 = x_0 + x_4
SUB r7, r2, r4 @xl0_0 = x_0 - x_4
ADD r8, r3, r5 @xh0_1 = x_2 + x_6
SUB r9, r3, r5 @xl0_1 = x_2 - x_6
LDR r2, [r0, #12] @x_1 = x[2 +1]
LDR r3, [r0, #44] @x_3 = x[10+1]
LDR r4, [r0, #76] @x_5 = x[18+1]
LDR r5, [r0, #108] @x_7 = x[26+1]
ADD r10, r2, r4 @xh1_0 = x_1 + x_5
SUB r11, r2, r4 @xl1_0 = x_1 - x_5
ADD r12, r3, r5 @xh1_1 = x_3 + x_7
SUB r14, r3, r5 @xl1_1 = x_3 - x_7
ADD r2, r6, r8 @n00 = xh0_0 + xh0_1
ADD r3, r7, r14 @n10 = xl0_0 + xl1_1
SUB r4, r6, r8 @n20 = xh0_0 - xh0_1
SUB r5, r7, r14 @n30 = xl0_0 - xl1_1
STR r2, [r0, #8] @x[2 ] = n00
STR r3, [r0, #40] @x[10] = n10
STR r4, [r0, #72] @x[18] = n20
STR r5, [r0, #104] @x[26] = n30
ADD r2, r10, r12 @n01 = xh1_0 + xh1_1
SUB r3, r11, r9 @n11 = xl1_0 - xl0_1
SUB r4, r10, r12 @n21 = xh1_0 - xh1_1
ADD r5, r11, r9 @n31 = xl1_0 + xl0_1
STR r2, [r0, #12] @x[2 +1] = n01
STR r3, [r0, #44] @x[10+1] = n11
STR r4, [r0, #76] @x[18+1] = n21
STR r5, [r0, #108] @x[26+1] = n31
@Third Butterfly
LDR r2, [r0, #16] @x_0 = x[4 ]
LDR r3, [r0, #48] @x_2 = x[12]
LDR r4, [r0, #80] @x_4 = x[20]
LDR r5, [r0, #112] @x_6 = x[28]
ADD r6, r2, r4 @xh0_0 = x_0 + x_4
SUB r7, r2, r4 @xl0_0 = x_0 - x_4
ADD r8, r3, r5 @xh0_1 = x_2 + x_6
SUB r9, r3, r5 @xl0_1 = x_2 - x_6
LDR r2, [r0, #20] @x_1 = x[4 +1]
LDR r3, [r0, #52] @x_3 = x[12+1]
LDR r4, [r0, #84] @x_5 = x[20+1]
LDR r5, [r0, #116] @x_7 = x[28+1]
ADD r10, r2, r4 @xh1_0 = x_1 + x_5
SUB r11, r2, r4 @xl1_0 = x_1 - x_5
ADD r12, r3, r5 @xh1_1 = x_3 + x_7
SUB r14, r3, r5 @xl1_1 = x_3 - x_7
ADD r2, r6, r8 @n00 = xh0_0 + xh0_1
ADD r3, r7, r14 @n10 = xl0_0 + xl1_1
SUB r4, r6, r8 @n20 = xh0_0 - xh0_1
SUB r5, r7, r14 @n30 = xl0_0 - xl1_1
STR r2, [r0, #16] @x[4 ] = n00
STR r3, [r0, #48] @x[12] = n10
STR r4, [r0, #80] @x[20] = n20
STR r5, [r0, #112] @x[28] = n30
ADD r2, r10, r12 @n01 = xh1_0 + xh1_1
SUB r3, r11, r9 @n11 = xl1_0 - xl0_1
SUB r4, r10, r12 @n21 = xh1_0 - xh1_1
ADD r5, r11, r9 @n31 = xl1_0 + xl0_1
STR r2, [r0, #20] @x[4 +1] = n01
STR r3, [r0, #52] @x[12+1] = n11
STR r4, [r0, #84] @x[20+1] = n21
STR r5, [r0, #116] @x[28+1] = n31
@Fourth Butterfly
LDR r2, [r0, #24] @x_0 = x[6 ]
LDR r3, [r0, #56] @x_2 = x[14]
LDR r4, [r0, #88] @x_4 = x[22]
LDR r5, [r0, #120] @x_6 = x[30]
ADD r6, r2, r4 @xh0_0 = x_0 + x_4
SUB r7, r2, r4 @xl0_0 = x_0 - x_4
ADD r8, r3, r5 @xh0_1 = x_2 + x_6
SUB r9, r3, r5 @xl0_1 = x_2 - x_6
LDR r2, [r0, #28] @x_1 = x[6 +1]
LDR r3, [r0, #60] @x_3 = x[14+1]
LDR r4, [r0, #92] @x_5 = x[22+1]
LDR r5, [r0, #124] @x_7 = x[30+1]
ADD r10, r2, r4 @xh1_0 = x_1 + x_5
SUB r11, r2, r4 @xl1_0 = x_1 - x_5
ADD r12, r3, r5 @xh1_1 = x_3 + x_7
SUB r14, r3, r5 @xl1_1 = x_3 - x_7
ADD r2, r6, r8 @n00 = xh0_0 + xh0_1
ADD r3, r7, r14 @n10 = xl0_0 + xl1_1
SUB r4, r6, r8 @n20 = xh0_0 - xh0_1
SUB r5, r7, r14 @n30 = xl0_0 - xl1_1
STR r2, [r0, #24] @x[6 ] = n00
STR r3, [r0, #56] @x[14] = n10
STR r4, [r0, #88] @x[22] = n20
STR r5, [r0, #120] @x[30] = n30
ADD r2, r10, r12 @n01 = xh1_0 + xh1_1
SUB r3, r11, r9 @n11 = xl1_0 - xl0_1
SUB r4, r10, r12 @n21 = xh1_0 - xh1_1
ADD r5, r11, r9 @n31 = xl1_0 + xl0_1
STR r2, [r0, #28] @x[6 +1] = n01
STR r3, [r0, #60] @x[14+1] = n11
STR r4, [r0, #92] @x[22+1] = n21
STR r5, [r0, #124] @x[30+1] = n31
@DIT Radix-4 FFT Second Stage
@First Butterfly
LDR r2, [r0] @inp_0qr = x[0]
LDR r3, [r0, #8] @inp_1qr = x[2]
LDR r4, [r0, #16] @inp_2qr = x[4]
LDR r5, [r0, #24] @inp_3qr = x[6]
ADD r6, r2, r4 @sum_0qr = mul_0qr + mul_2qr
SUB r7, r2, r4 @sum_1qr = mul_0qr - mul_2qr
ADD r8, r3, r5 @sum_2qr = mul_1qr + mul_3qr
SUB r9, r3, r5 @sum_3qr = mul_1qr - mul_3qr
LDR r2, [r0, #4] @inp_0qi = x[1]
LDR r3, [r0, #12] @inp_1qi = x[3]
LDR r4, [r0, #20] @inp_2qi = x[5]
LDR r5, [r0, #28] @inp_3qi = x[7]
ADD r10, r2, r4 @sum_0qi = mul_0qi + mul_2qi
SUB r11, r2, r4 @sum_1qi = mul_0qi - mul_2qi
ADD r12, r3, r5 @sum_2qi = mul_1qi + mul_3qi
SUB r14, r3, r5 @sum_3qi = mul_1qi - mul_3qi
ADD r2, r6, r8 @sum_0qr + sum_2qr
ADD r3, r7, r14 @sum_1qr + sum_3qi
SUB r4, r6, r8 @sum_0qr - sum_2qr
SUB r5, r7, r14 @sum_1qr - sum_3qi
STR r2, [r1] @y[0 ] = sum_0qr + sum_2qr
STR r3, [r1, #32] @y[8 ] = sum_1qr + sum_3qi
STR r4, [r1, #64] @y[16] = sum_0qr - sum_2qr
STR r5, [r1, #96] @y[24] = sum_1qr - sum_3qi
ADD r2, r10, r12 @sum_0qi + sum_2qi
SUB r3, r11, r9 @sum_1qi - sum_3qr
SUB r4, r10, r12 @sum_0qi - sum_2qi
ADD r5, r11, r9 @sum_1qi + sum_3qr
STR r2, [r1, #4] @y[0 +1] = sum_0qi + sum_2qi
STR r3, [r1, #36] @y[8 +1] = sum_1qi - sum_3qr
STR r4, [r1, #68] @y[16+1] = sum_0qi - sum_2qi
STR r5, [r1, #100] @y[24+1] = sum_1qi + sum_3qr
@Load twiddle factors
LDR r11, =2310960706 @0x89BE7642
LDR r12, =3473158396 @0xCF0430FC
LDR r14, =2776455811 @0xA57D5A83
@Second Butterfly
LDR r2, [r0, #32] @mul_0qr = inp_0qr = x[8]
LDR r3, [r0, #36] @mul_0qi = inp_1qr = x[9]
LDR r5, [r0, #40] @inp_1qr = x[10]
LDR r6, [r0, #44] @inp_1qi = x[11]
SMULWB r4, r5, r11 @mul_1qr = mpy_16_32_ns( 0x7642 , inp_1qr)
SMLAWB r4, r6, r12, r4 @mul_1qr -= mpy_16_32_ns(-0x30FC , inp_1qi)
SMULWT r5, r5, r12 @mul_1qi = mpy_16_32_ns(-0x30FC , inp_1qr)
LDR r7, [r0, #48] @inp_2qr = x[12]
LDR r8, [r0, #52] @inp_2qi = x[13]
@Moved for delay slot
SMLAWB r5, r6, r11, r5 @mul_1qi += mpy_16_32_ns( 0x7642 , inp_1qi)
ADD r6, r7, r8 @(inp_2qr + inp_2qi)
SMULWB r6, r6, r14 @mul_2qr = mpy_16_32_ns(0x5A83 , (inp_2qr + inp_2qi))
SUB r7, r8, r7 @(-inp_2qr + inp_2qi)
SMULWB r7, r7, r14 @mul_2qi = mpy_16_32_ns(0x5A83 , (-inp_2qr + inp_2qi))
LDR r9 , [r0, #56] @inp_3qr = x[14]
LDR r10, [r0, #60] @inp_3qi = x[15]
SMULWB r8, r9 , r12 @mul_3qr = mpy_16_32_ns( 0x30FC , inp_3qr)
SMLAWB r8, r10, r11, r8 @mul_3qr -= mpy_16_32_ns(-0x7642 , inp_3qi)@
SMULWT r9, r9 , r11 @mul_3qi = mpy_16_32_ns(-0x7642 , inp_3qr)
SMLAWB r9, r10, r12, r9 @mul_3qi += mpy_16_32_ns( 0x30FC , inp_3qi)
ADD r10, r2, r6, lsl #1 @sum_0qr = mul_0qr + (mul_2qr << 1)
SUB r2 , r2, r6, lsl #1 @sum_1qr = mul_0qr - (mul_2qr << 1)
ADD r6 , r4, r8 @sum_2qr = mul_1qr + mul_3qr
SUB r4 , r4, r8 @sum_3qr = mul_1qr - mul_3qr
ADD r8 , r3, r7, lsl #1 @sum_0qi = mul_0qi + (mul_2qi << 1)
SUB r3 , r3, r7, lsl #1 @sum_1qi = mul_0qi - (mul_2qi << 1)
ADD r7 , r5, r9 @sum_2qi = mul_1qi + mul_3qi
SUB r5 , r5, r9 @sum_3qi = mul_1qi - mul_3qi
ADD r9 , r10, r6, lsl #1 @sum_0qr + (sum_2qr << 1)
SUB r10, r10, r6, lsl #1 @sum_0qr - (sum_2qr << 1)
ADD r6 , r2 , r5, lsl #1 @sum_1qr + (sum_3qi << 1)
SUB r2 , r2 , r5, lsl #1 @sum_1qr - (sum_3qi << 1)
STR r9 , [r1, #8] @y[2 ] = sum_0qr + (sum_2qr << 1)
STR r10, [r1, #72] @y[18] = sum_0qr - (sum_2qr << 1)
STR r6 , [r1, #40] @y[10] = sum_1qr + (sum_3qi << 1)
STR r2 , [r1, #104] @y[26] = sum_1qr - (sum_3qi << 1)
ADD r5 , r8 , r7, lsl #1 @sum_0qi + (sum_2qi << 1)
SUB r8 , r8 , r7, lsl #1 @sum_0qi - (sum_2qi << 1)
SUB r7 , r3 , r4, lsl #1 @sum_1qi - (sum_3qr << 1)
ADD r3 , r3 , r4, lsl #1 @sum_1qi + (sum_3qr << 1)
STR r5 , [r1, #12] @y[2 +1] = sum_0qi + (sum_2qi << 1)
STR r8 , [r1, #76] @y[18+1] = sum_0qi - (sum_2qi << 1)
STR r7 , [r1, #44] @y[10+1] = sum_1qi - (sum_3qr << 1)
STR r3 , [r1, #108] @y[26+1] = sum_1qi + (sum_3qr << 1)
@Third Butterfly
LDR r2, [r0, #64] @mul_0qr = inp_0qr = x[16]
LDR r5, [r0, #72] @inp_1qr = x[18]
LDR r6, [r0, #76] @inp_1qi = x[19]
@Moved for delay slot
LDR r3, [r0, #68] @mul_0qi = inp_1qr = x[17]
ADD r4, r5, r6 @(inp_1qr + inp_1qi)
SMULWB r4, r4, r14 @mul_1qr = mpy_16_32_ns(0x5A83 , (inp_1qr + inp_1qi))
SUB r5, r6, r5 @(-inp_1qr + inp_1qi)
SMULWB r5, r5, r14 @mul_1qi = mpy_16_32_ns(0x5A83 , (-inp_1qr + inp_1qi))
LDR r6, [r0, #84] @mul_2qr = inp_2qi = x[21]
LDR r9 , [r0, #88] @inp_3qr = x[22]
LDR r10, [r0, #92] @inp_3qi = x[23]
@Moved for delay slot
LDR r7, [r0, #80] @mul_2qi = inp_2qr = x[20]
SUB r8 , r10, r9 @(-inp_3qr + inp_3qi)
SMULWB r8 , r8 , r14 @mul_3qr = mpy_16_32_ns( 0x5A83 , (-inp_3qr + inp_3qi))
ADD r9 , r9 , r10 @(inp_3qr + inp_3qi)
SMULWT r9 , r9 , r14 @mul_3qi = mpy_16_32_ns(-0x5A83 , (inp_3qr + inp_3qi))
ADD r10, r2, r6 @sum_0qr = mul_0qr + mul_2qr
SUB r2 , r2, r6 @sum_1qr = mul_0qr - mul_2qr
ADD r6 , r4, r8 @sum_2qr = mul_1qr + mul_3qr
SUB r4 , r4, r8 @sum_3qr = mul_1qr - mul_3qr
SUB r8 , r3, r7 @sum_0qi = mul_0qi - mul_2qi
ADD r3 , r3, r7 @sum_1qi = mul_0qi + mul_2qi
ADD r7 , r5, r9 @sum_2qi = mul_1qi + mul_3qi
SUB r5 , r5, r9 @sum_3qi = mul_1qi - mul_3qi
ADD r9 , r10, r6, lsl #1 @sum_0qr + (sum_2qr << 1)
SUB r10, r10, r6, lsl #1 @sum_0qr - (sum_2qr << 1)
ADD r6 , r2 , r5, lsl #1 @sum_1qr + (sum_3qi << 1)
SUB r2 , r2 , r5, lsl #1 @sum_1qr - (sum_3qi << 1)
STR r9 , [r1, #16] @y[4 ] = sum_0qr + (sum_2qr << 1)
STR r10, [r1, #80] @y[20] = sum_0qr - (sum_2qr << 1)
STR r6 , [r1, #48] @y[12] = sum_1qr + (sum_3qi << 1)
STR r2 , [r1, #112] @y[28] = sum_1qr - (sum_3qi << 1)
ADD r5, r8, r7, lsl #1 @sum_0qi + (sum_2qi << 1)
SUB r8, r8, r7, lsl #1 @sum_0qi - (sum_2qi << 1)
SUB r7, r3, r4, lsl #1 @sum_1qi - (sum_3qr << 1)
ADD r3, r3, r4, lsl #1 @sum_1qi + (sum_3qr << 1)
STR r5 , [r1, #20] @y[4 +1] = sum_0qi + (sum_2qi << 1)
STR r8 , [r1, #84] @y[20+1] = sum_0qi - (sum_2qi << 1)
STR r7 , [r1, #52] @y[12+1] = sum_1qi - (sum_3qr << 1)
STR r3 , [r1, #116] @y[28+1] = sum_1qi + (sum_3qr << 1)
@Fourth Butterfly
LDR r2, [r0, #96] @mul_0qr = inp_0qr = x[24]
LDR r3, [r0, #100] @mul_0qi = inp_1qr = x[25]
LDR r5, [r0, #104] @inp_1qr = x[26]
LDR r6, [r0, #108] @inp_1qi = x[27]
SMULWB r4, r5, r12 @mul_1qr = mpy_16_32_ns( 0x30FC , inp_1qr)
SMLAWB r4, r6, r11, r4 @mul_1qr -= mpy_16_32_ns(-0x7642 , inp_1qi)
SMULWT r5, r5, r11 @mul_1qi = mpy_16_32_ns(-0x7642 , inp_1qr)
LDR r7, [r0, #112] @inp_2qr = x[28]
LDR r8, [r0, #116] @inp_2qi = x[29]
@Moved for delay slot
SMLAWB r5, r6, r12, r5 @mul_1qi += mpy_16_32_ns( 0x30FC , inp_1qi)
SUB r6, r8, r7 @(-inp_2qr + inp_2qi)
SMULWB r6, r6, r14 @mul_2qr = mpy_16_32_ns( 0x5A83 , (-inp_2qr + inp_2qi))
ADD r7, r8, r7 @(inp_2qr + inp_2qi)
SMULWT r7, r7, r14 @mul_2qi = mpy_16_32_ns(-0x5A83 , (inp_2qr + inp_2qi))
LDR r9 , [r0, #120] @inp_3qr = x[30]
LDR r10, [r0, #124] @inp_3qi = x[31]
SMULWT r8, r9 , r11 @mul_3qr = mpy_16_32_ns(-0x7642 , inp_3qr)
SMLAWT r8, r10, r12, r8 @mul_3qr -= mpy_16_32_ns( 0x30FC , inp_3qi)@
SMULWB r9, r9 , r12 @mul_3qi = mpy_16_32_ns( 0x30FC , inp_3qr)
SMLAWT r9, r10, r11, r9 @mul_3qi += mpy_16_32_ns(-0x7642 , inp_3qi)
ADD r10, r2, r6, lsl #1 @sum_0qr = mul_0qr + (mul_2qr << 1)
SUB r2 , r2, r6, lsl #1 @sum_1qr = mul_0qr - (mul_2qr << 1)
ADD r6 , r4, r8 @sum_2qr = mul_1qr + mul_3qr
SUB r4 , r4, r8 @sum_3qr = mul_1qr - mul_3qr
ADD r8 , r3, r7, lsl #1 @sum_0qi = mul_0qi + (mul_2qi << 1)
SUB r3 , r3, r7, lsl #1 @sum_1qi = mul_0qi - (mul_2qi << 1)
ADD r7 , r5, r9 @sum_2qi = mul_1qi + mul_3qi
SUB r5 , r5, r9 @sum_3qi = mul_1qi - mul_3qi
ADD r9 , r10, r6, lsl #1 @sum_0qr + (sum_2qr << 1)
SUB r10, r10, r6, lsl #1 @sum_0qr - (sum_2qr << 1)
ADD r6 , r2 , r5, lsl #1 @sum_1qr + (sum_3qi << 1)
SUB r2 , r2 , r5, lsl #1 @sum_1qr - (sum_3qi << 1)
STR r9 , [r1, #24] @y[6 ] = sum_0qr + (sum_2qr << 1)
STR r10, [r1, #88] @y[22] = sum_0qr - (sum_2qr << 1)
STR r6 , [r1, #56] @y[14] = sum_1qr + (sum_3qi << 1)
STR r2 , [r1, #120] @y[30] = sum_1qr - (sum_3qi << 1)
ADD r5 , r8 , r7, lsl #1 @sum_0qi + (sum_2qi << 1)
SUB r8 , r8 , r7, lsl #1 @sum_0qi - (sum_2qi << 1)
SUB r7 , r3 , r4, lsl #1 @sum_1qi - (sum_3qr << 1)
ADD r3 , r3 , r4, lsl #1 @sum_1qi + (sum_3qr << 1)
STR r5 , [r1, #28] @y[6 +1] = sum_0qi + (sum_2qi << 1)
STR r8 , [r1, #92] @y[22+1] = sum_0qi - (sum_2qi << 1)
STR r7 , [r1, #60] @y[14+1] = sum_1qi - (sum_3qr << 1)
STR r3 , [r1, #124] @y[30+1] = sum_1qi + (sum_3qr << 1)
LDMFD sp!, {r4-r12, r15}

View file

@ -0,0 +1,516 @@
.equ C53_VAL , -11904
.equ SINMU_VAL , 28378
.equ C51_52VAL , 0x79BC9D84
.equ C54_55VAL , 0x478EB000
.equ FFTOP_OFFSET , -1536
.equ FFTOP_OFFSET1 , 256
.text
.p2align 2
.global ixheaacd_fft_15_ld_armv7
ixheaacd_fft_15_ld_armv7:
STMFD r13!, {r4 - r12, r14} @
STR r1 , [r13, #-4]! @
STR r3 , [r13, #-4]! @
MOV lr, r2 @ lr - fft3out
MOV r12, #384 @
LOOP_FFT5:
LDRD r2, [r0] @ r2 = buf1a[0] and r3 = buf1a[1]
ADD r0, r0, r12
LDRD r4, [r0] @ r4 = buf1a[2] and r5 = buf1a[3]
ADD r0, r0, r12
LDRD r6, [r0] @ r6 = buf1a[4] and r7 = buf1a[5]
ADD r0, r0, r12
LDRD r8, [r0] @ r8 = buf1a[6] and r9 = buf1a[7]
ADD r0, r0, r12
LDRD r10, [r0] @ r10 = buf1a[8] and r11 = buf1a[9]
ADD r1, r4, r10 @ r1 = buf1a[2] + buf1a[8]
SUB r4, r4, r10 @ r4 = buf1a[2] - buf1a[8]@
LDR r10, = C54_55VAL
ADD r12, r6, r8 @ r3 = buf1a[4] + buf1a[6]
SUB r8, r6, r8 @ r2 = buf1a[4] - buf1a[6]
SUB r6, r1, r12 @ (r1 - r3)
SMULWT r6, r6, r10 @ t = mult32x16in32_shl((r1 - r3), C54)
ADD r1, r1, r12 @ r1 = r1 + r3@
ADD r2, r2, r1 @ temp1 = inp[0] + r1@
SMULWB r1, r1, r10 @ mult32_shl(r1, C55)
ADD r1, r2, r1, lsl #2 @ r1 = temp1 + ((mult32_shl(r1, C55)) << 1)@
LDR r10, = C51_52VAL @
STR r2, [lr], #4 @ *buf2++ = temp1@
SUB r12, r1, r6, LSL #1 @ r3 = r1 - t@
ADD r1, r1, r6, LSL #1 @ r1 = r1 + t@
ADD r2, r4, r8 @ (r4 + r2)
SMULWT r2, r2, r10 @ t = mult32_shl((r4 + r2), C51)@
@LSL r2, r2, #1
MOV r2, r2, LSL #1
SMULWB r4, r4, r10 @ mult32_shl(r4, C52)
LDR r10, = C53_VAL
ADD r4, r2, r4, LSL #2 @ r4 = t + (mult32_shl(r4, C52) << 1)@
SMULWB r8, r8, r10 @ mult32_shl(r2, C53)
ADD r2, r2, r8, LSL #1 @ r2 = t + mult32_shl(r2, C53)@
ADD r6, r5, r11 @ s1 = buf1a[3] + buf1a[9]
SUB r8, r5, r11 @ s4 = buf1a[3] - buf1a[9]
LDR r10, = C54_55VAL
ADD r5, r7, r9 @ s3 = buf1a[5] + buf1a[7]@
SUB r7, r7, r9 @ s2 = buf1a[5] + buf1a[7]@
SUB r9, r6, r5 @ (s1 - s3)
SMULWT r9, r9, r10 @ t = mult32x16in32_shl((s1 - s3), C54)
ADD r6, r6, r5 @ s1 = s1 + s3@
ADD r3, r3, r6 @ temp2 = buf1a[1] + s1
SMULWB r6, r6, r10 @ mult32_shl(s1, C55)
ADD r6, r3, r6, lsl #2 @ s1 = temp1 + ((mult32_shl(s1, C55)) << 1)@
LDR r10, = C51_52VAL @
STR r3, [lr], #4 @ *buf2++ = temp2@
SUB r5, r6, r9, LSL #1 @ s3 = s1 - t@
ADD r6, r6, r9, LSL #1 @ s1 = s1 + t@
SUB r0, r0, #896 @ r0 -inp[160]
ADD r11, r7, r8 @ (s4 + s2)
SMULWT r11, r11, r10 @ t = mult32_shl((s4 + s2), C51)@
@LSL r11, r11, #1 @
MOV r11, r11, LSL #1
SMULWB r8, r8, r10 @ mult32_shl(s4, C52)
LDR r10, = C53_VAL
ADD r8, r11, r8, LSL #2 @ s4 = t + (mult32_shl(s4, C52) << 1)@
SMULWB r7, r7, r10 @ mult32_shl(s2, C53)
ADD r7, r11, r7, LSL #1 @ s2 = t + mult32_shl(s2, C53)@
ADD r3, r1, r7 @ buf2[2] = r1 + s2
SUB r9, r6, r2 @ buf2[3] = s1 - r2
SUB r10, r12, r8 @ buf2[4] = r3 - s4
ADD r11, r5, r4 @ buf2[5] = s3 + r4
ADD r12, r12, r8 @ buf2[6] = r3 + s4
SUB r4, r5, r4 @ buf2[7] = s3 - r4
SUB r5, r1, r7 @ buf2[8] = r1 - s2
ADD r6, r6, r2 @ buf2[9] = s1 + r2
STMIA lr!, {r3, r9-r12} @
MOV r12, #384 @
LDR r1, = FFTOP_OFFSET @
STMIA lr!, {r4-r6} @
LDRD r2, [r0] @ r2 = buf1a[0] and r3 = buf1a[1]
ADD r0, r0, r12
LDRD r4, [r0] @ r4 = buf1a[2] and r5 = buf1a[3]
ADD r0, r0, r12
LDRD r6, [r0] @ r6 = buf1a[4] and r7 = buf1a[5]
ADD r0, r0, r12
LDRD r8, [r0] @ r8 = buf1a[6] and r9 = buf1a[7]
ADD r0, r0, r1
LDRD r10, [r0] @ r10 = buf1a[8] and r11 = buf1a[9]
ADD r0, r0, #1024 @ r0 -inp[320]
ADD r1, r4, r10 @ r1 = buf1a[2] + buf1a[8]
SUB r4, r4, r10 @ r4 = buf1a[2] - buf1a[8]@
LDR r10, = C54_55VAL
ADD r12, r6, r8 @ r3 = buf1a[4] + buf1a[6]
SUB r8, r6, r8 @ r2 = buf1a[4] - buf1a[6]
SUB r6, r1, r12 @ (r1 - r3)
SMULWT r6, r6, r10 @ t = mult32x16in32_shl((r1 - r3), C54)
ADD r1, r1, r12 @ r1 = r1 + r3@
ADD r2, r2, r1 @ temp1 = inp[0] + r1@
SMULWB r1, r1, r10 @ mult32_shl(r1, C55)
ADD r1, r2, r1, lsl #2 @ r1 = temp1 + ((mult32_shl(r1, C55)) << 1)@
LDR r10, = C51_52VAL @
STR r2, [lr], #4 @ *buf2++ = temp1@
SUB r12, r1, r6, LSL #1 @ r3 = r1 - t@
ADD r1, r1, r6, LSL #1 @ r1 = r1 + t@
ADD r2, r4, r8 @ (r4 + r2)
SMULWT r2, r2, r10 @ t = mult32_shl((r4 + r2), C51)@
@LSL r2, r2, #1
MOV r2, r2, LSL #1
SMULWB r4, r4, r10 @ mult32_shl(r4, C52)
LDR r10, = C53_VAL
ADD r4, r2, r4, LSL #2 @ r4 = t + (mult32_shl(r4, C52) << 1)@
SMULWB r8, r8, r10 @ mult32_shl(r2, C53)
ADD r2, r2, r8, LSL #1 @ r2 = t + mult32_shl(r2, C53)@
ADD r6, r5, r11 @ s1 = buf1a[3] + buf1a[9]
SUB r8, r5, r11 @ s4 = buf1a[3] - buf1a[9]
LDR r10, = C54_55VAL
ADD r5, r7, r9 @ s3 = buf1a[5] + buf1a[7]@
SUB r7, r7, r9 @ s2 = buf1a[5] + buf1a[7]@
SUB r9, r6, r5 @ (s1 - s3)
SMULWT r9, r9, r10 @ t = mult32x16in32_shl((s1 - s3), C54)
ADD r6, r6, r5 @ s1 = s1 + s3@
ADD r3, r3, r6 @ temp2 = buf1a[1] + s1
SMULWB r6, r6, r10 @ mult32_shl(s1, C55)
ADD r6, r3, r6, lsl #2 @ s1 = temp1 + ((mult32_shl(s1, C55)) << 1)@
LDR r10, = C51_52VAL @
STR r3, [lr], #4 @ *buf2++ = temp2@
SUB r5, r6, r9, LSL #1 @ s3 = s1 - t@
ADD r6, r6, r9, LSL #1 @ s1 = s1 + t@
ADD r11, r7, r8 @ (s4 + s2)
SMULWT r11, r11, r10 @ t = mult32_shl((s4 + s2), C51)@
@LSL r11, r11, #1
MOV r11, r11, LSL #1
SMULWB r8, r8, r10 @mult32_shl(s4, C52)
LDR r10, = C53_VAL
ADD r8, r11, r8, LSL #2 @s4 = t + (mult32_shl(s4, C52) << 1)@
SMULWB r7, r7, r10 @mult32_shl(s2, C53)
ADD r7, r11, r7, LSL #1 @s2 = t + mult32_shl(s2, C53)@
ADD r3, r1, r7 @buf2[2] = r1 + s2
SUB r9, r6, r2 @buf2[3] = s1 - r2
SUB r10, r12, r8 @buf2[4] = r3 - s4
ADD r11, r5, r4 @buf2[5] = s3 + r4
ADD r12, r12, r8 @buf2[6] = r3 + s4
SUB r4, r5, r4 @buf2[7] = s3 - r4
SUB r5, r1, r7 @buf2[8] = r1 - s2
ADD r6, r6, r2 @buf2[9] = s1 + r2
LDR r1, = FFTOP_OFFSET @
STMIA lr!, {r3, r9-r12}
MOV r12, #384 @
STMIA lr!, {r4-r6} @
LDRD r2, [r0] @ r2 = buf1a[0] and r3 = buf1a[1]
ADD r0, r0, r12
LDRD r4, [r0] @ r4 = buf1a[2] and r5 = buf1a[3]
ADD r0, r0, r1
LDRD r6, [r0] @ r6 = buf1a[4] and r7 = buf1a[5]
ADD r0, r0, r12
LDRD r8, [r0] @ r8 = buf1a[6] and r9 = buf1a[7]
ADD r0, r0, r12
LDRD r10, [r0] @ r10 = buf1a[8] and r11 = buf1a[9]
ADD r0, r0, r12
ADD r1, r4, r10 @ r1 = buf1a[2] + buf1a[8]
SUB r4, r4, r10 @ r4 = buf1a[2] - buf1a[8]@
LDR r10, = C54_55VAL
ADD r12, r6, r8 @ r3 = buf1a[4] + buf1a[6]
SUB r8, r6, r8 @ r2 = buf1a[4] - buf1a[6]
SUB r6, r1, r12 @ (r1 - r3)
SMULWT r6, r6, r10 @ t = mult32x16in32_shl((r1 - r3), C54)
ADD r1, r1, r12 @ r1 = r1 + r3@
ADD r2, r2, r1 @ temp1 = inp[0] + r1@
SMULWB r1, r1, r10 @ mult32_shl(r1, C55)
ADD r1, r2, r1, lsl #2 @ r1 = temp1 + ((mult32_shl(r1, C55)) << 1)@
LDR r10, = C51_52VAL @
STR r2, [lr], #4 @ *buf2++ = temp1@
SUB r12, r1, r6, LSL #1 @ r3 = r1 - t@
ADD r1, r1, r6, LSL #1 @ r1 = r1 + t@
ADD r2, r4, r8 @ (r4 + r2)
SMULWT r2, r2, r10 @ t = mult32_shl((r4 + r2), C51)@
@LSL r2, r2, #1
MOV r2, r2, LSL #1
SMULWB r4, r4, r10 @ mult32_shl(r4, C52)
LDR r10, = C53_VAL
ADD r4, r2, r4, LSL #2 @ r4 = t + (mult32_shl(r4, C52) << 1)@
SMULWB r8, r8, r10 @ mult32_shl(r2, C53)
ADD r2, r2, r8, LSL #1 @ r2 = t + mult32_shl(r2, C53)@
ADD r6, r5, r11 @ s1 = buf1a[3] + buf1a[9]
SUB r8, r5, r11 @ s4 = buf1a[3] - buf1a[9]
LDR r10, = C54_55VAL
ADD r5, r7, r9 @ s3 = buf1a[5] + buf1a[7]@
SUB r7, r7, r9 @ s2 = buf1a[5] + buf1a[7]@
SUB r9, r6, r5 @ (s1 - s3)
SMULWT r9, r9, r10 @ t = mult32x16in32_shl((s1 - s3), C54)
ADD r6, r6, r5 @ s1 = s1 + s3@
ADD r3, r3, r6 @ temp2 = buf1a[1] + s1
SMULWB r6, r6, r10 @ mult32_shl(s1, C55)
ADD r6, r3, r6, lsl #2 @ s1 = temp1 + ((mult32_shl(s1, C55)) << 1)@
LDR r10, = C51_52VAL @
STR r3, [lr], #4 @ *buf2++ = temp2@
SUB r5, r6, r9, LSL #1 @ s3 = s1 - t@
ADD r6, r6, r9, LSL #1 @ s1 = s1 + t@
ADD r11, r7, r8 @ (s4 + s2)
SMULWT r11, r11, r10 @ t = mult32_shl((s4 + s2), C51)@
@LSL r11, r11, #1 @
MOV r11, r11, LSL #1
SMULWB r8, r8, r10 @mult32_shl(s4, C52)
LDR r10, = C53_VAL
ADD r8, r11, r8, LSL #2 @s4 = t + (mult32_shl(s4, C52) << 1)@
SMULWB r7, r7, r10 @mult32_shl(s2, C53)
ADD r7, r11, r7, LSL #1 @s2 = t + mult32_shl(s2, C53)@
ADD r3, r1, r7 @buf2[2] = r1 + s2
SUB r9, r6, r2 @buf2[3] = s1 - r2
SUB r10, r12, r8 @buf2[4] = r3 - s4
ADD r11, r5, r4 @buf2[5] = s3 + r4
ADD r12, r12, r8 @buf2[6] = r3 + s4
SUB r4, r5, r4 @buf2[7] = s3 - r4
SUB r5, r1, r7 @buf2[8] = r1 - s2
ADD r6, r6, r2 @buf2[9] = s1 + r2
STMIA lr!, {r3, r9-r12}
STMIA lr!, {r4-r6} @
SUB lr, lr, #120 @
LDR r12, = SINMU_VAL @
LDMFD r13!, {r10, r11} @
LOOP_FFT3:
LDRD r0, [lr] @ r0 = fft3outptr[0] and r1 = fft3outptr[1]
LDRD r2, [lr, #40] @ r2 = fft3outptr[10] and r3 = fft3outptr[11]
LDRD r4, [lr, #80] @ r4 = fft3outptr[20] and r5 = fft3outptr[21]
ADD lr, lr, #8 @
ADD r6, r0, r2 @ X01r = add32(buf1[0], buf1[2])
ADD r7, r1, r3 @ X01i = add32(buf1[1], buf1[3])
ADD r8, r2, r4 @ add_r = add32(buf1[2], buf1[4])
ADD r9, r3, r5 @ add_i = add32(buf1[3], buf1[5])
SUB r2, r2, r4 @ sub_r = sub32(buf1[2], buf1[4])@
SUB r3, r3, r5 @ sub_i = sub32(buf1[3], buf1[5])@
@ASR r8, r8, #1 @ p1 = add_r >> 1@
MOV r8, r8, ASR #1
@ASR r9, r9, #1 @ p4 = add_i >> 1@
MOV r9, r9, ASR #1
SMULWB r3, r3, r12 @ p2 = mult32x16in32_shl(sub_i, sinmu)@
SMULWB r2, r2, r12 @ p3 = mult32x16in32_shl(sub_r, sinmu)@
SUB r0, r0, r8 @ temp = sub32(buf1a[0], p1)@
ADD r8, r1, r2, LSL #1 @ temp1 = add32(buf1a[1], p3)@
SUB r2, r1, r2, LSL #1 @ temp2 = sub32(buf1a[1], p3)@
ADD r4, r6, r4 @ add32(X01r, buf1a[4])@
ADD r5, r7, r5 @ add32(X01i, buf1a[5])@
ADD r6, r0, r3, LSL #1 @ add32(temp, p2)@
SUB r7, r2, r9 @ sub32(temp2, p4)@
SUB r9, r8, r9 @ sub32(temp1, p4)@
SUB r8, r0, r3, LSL #1 @ sub32(temp, p2)@
MOV r3, r11 @
LDRB r0, [r10], #1 @
LDRB r1, [r10], #1 @
LDRB r2, [r10], #1 @
ADD r0, r11, r0, lsl #3 @
ADD r1, r11, r1, lsl #3 @
ADD r2, r11, r2, lsl #3 @
STRD r4, [r0] @
STRD r6, [r1] @
STRD r8, [r2] @
LDRD r0, [lr] @ r0 = fft3outptr[0] and r1 = fft3outptr[1]
LDRD r2, [lr, #40] @ r2 = fft3outptr[10] and r3 = fft3outptr[11]
LDRD r4, [lr, #80] @ r4 = fft3outptr[20] and r5 = fft3outptr[21]
ADD lr, lr, #8 @
ADD r6, r0, r2 @ X01r = add32(buf1[0], buf1[2])
ADD r7, r1, r3 @ X01i = add32(buf1[1], buf1[3])
ADD r8, r2, r4 @ add_r = add32(buf1[2], buf1[4])
ADD r9, r3, r5 @ add_i = add32(buf1[3], buf1[5])
SUB r2, r2, r4 @ sub_r = sub32(buf1[2], buf1[4])@
SUB r3, r3, r5 @ sub_i = sub32(buf1[3], buf1[5])@
@ASR r8, r8, #1 @ p1 = add_r >> 1@
MOV r8, r8, ASR #1
@ASR r9, r9, #1 @ p4 = add_i >> 1@
MOV r9, r9, ASR #1
SMULWB r3, r3, r12 @ p2 = mult32x16in32_shl(sub_i, sinmu)@
SMULWB r2, r2, r12 @ p3 = mult32x16in32_shl(sub_r, sinmu)@
SUB r0, r0, r8 @ temp = sub32(buf1a[0], p1)@
ADD r8, r1, r2, LSL #1 @ temp1 = add32(buf1a[1], p3)@
SUB r2, r1, r2, LSL #1 @ temp2 = sub32(buf1a[1], p3)@
ADD r4, r6, r4 @ add32(X01r, buf1a[4])@
ADD r5, r7, r5 @ add32(X01i, buf1a[5])@
ADD r6, r0, r3, LSL #1 @ add32(temp, p2)@
SUB r7, r2, r9 @ sub32(temp2, p4)@
SUB r9, r8, r9 @ sub32(temp1, p4)@
SUB r8, r0, r3, LSL #1 @ sub32(temp, p2)@
LDRB r0, [r10], #1 @
LDRB r1, [r10], #1 @
LDRB r2, [r10], #1 @
ADD r0, r11, r0, lsl #3 @
ADD r1, r11, r1, lsl #3 @
ADD r2, r11, r2, lsl #3 @
STRD r4, [r0] @
STRD r6, [r1] @
STRD r8, [r2] @
LDRD r0, [lr] @ r0 = fft3outptr[0] and r1 = fft3outptr[1]
LDRD r2, [lr, #40] @ r2 = fft3outptr[10] and r3 = fft3outptr[11]
LDRD r4, [lr, #80] @ r4 = fft3outptr[20] and r5 = fft3outptr[21]
ADD lr, lr, #8 @
ADD r6, r0, r2 @ X01r = add32(buf1[0], buf1[2])
ADD r7, r1, r3 @ X01i = add32(buf1[1], buf1[3])
ADD r8, r2, r4 @ add_r = add32(buf1[2], buf1[4])
ADD r9, r3, r5 @ add_i = add32(buf1[3], buf1[5])
SUB r2, r2, r4 @ sub_r = sub32(buf1[2], buf1[4])@
SUB r3, r3, r5 @ sub_i = sub32(buf1[3], buf1[5])@
@ASR r8, r8, #1 @ p1 = add_r >> 1@
MOV r8, r8, ASR #1
@ASR r9, r9, #1 @ p4 = add_i >> 1@
MOV r9, r9, ASR #1
SMULWB r3, r3, r12 @ p2 = mult32x16in32_shl(sub_i, sinmu)@
SMULWB r2, r2, r12 @ p3 = mult32x16in32_shl(sub_r, sinmu)@
SUB r0, r0, r8 @ temp = sub32(buf1a[0], p1)@
ADD r8, r1, r2, LSL #1 @ temp1 = add32(buf1a[1], p3)@
SUB r2, r1, r2, LSL #1 @ temp2 = sub32(buf1a[1], p3)@
ADD r4, r6, r4 @ add32(X01r, buf1a[4])@
ADD r5, r7, r5 @ add32(X01i, buf1a[5])@
ADD r6, r0, r3, LSL #1 @ add32(temp, p2)@
SUB r7, r2, r9 @ sub32(temp2, p4)@
SUB r9, r8, r9 @ sub32(temp1, p4)@
SUB r8, r0, r3, LSL #1 @ sub32(temp, p2)@
LDRB r0, [r10], #1 @
LDRB r1, [r10], #1 @
LDRB r2, [r10], #1 @
ADD r0, r11, r0, lsl #3 @
ADD r1, r11, r1, lsl #3 @
ADD r2, r11, r2, lsl #3 @
STRD r4, [r0] @
STRD r6, [r1] @
STRD r8, [r2] @
LDRD r0, [lr] @ r0 = fft3outptr[0] and r1 = fft3outptr[1]
LDRD r2, [lr, #40] @ r2 = fft3outptr[10] and r3 = fft3outptr[11]
LDRD r4, [lr, #80] @ r4 = fft3outptr[20] and r5 = fft3outptr[21]
ADD lr, lr, #8 @
ADD r6, r0, r2 @ X01r = add32(buf1[0], buf1[2])
ADD r7, r1, r3 @ X01i = add32(buf1[1], buf1[3])
ADD r8, r2, r4 @ add_r = add32(buf1[2], buf1[4])
ADD r9, r3, r5 @ add_i = add32(buf1[3], buf1[5])
SUB r2, r2, r4 @ sub_r = sub32(buf1[2], buf1[4])@
SUB r3, r3, r5 @ sub_i = sub32(buf1[3], buf1[5])@
@ASR r8, r8, #1 @ p1 = add_r >> 1@
MOV r8, r8, ASR #1
@ASR r9, r9, #1 @ p4 = add_i >> 1@
MOV r9, r9, ASR #1
SMULWB r3, r3, r12 @ p2 = mult32x16in32_shl(sub_i, sinmu)@
SMULWB r2, r2, r12 @ p3 = mult32x16in32_shl(sub_r, sinmu)@
SUB r0, r0, r8 @ temp = sub32(buf1a[0], p1)@
ADD r8, r1, r2, LSL #1 @ temp1 = add32(buf1a[1], p3)@
SUB r2, r1, r2, LSL #1 @ temp2 = sub32(buf1a[1], p3)@
ADD r4, r6, r4 @ add32(X01r, buf1a[4])@
ADD r5, r7, r5 @ add32(X01i, buf1a[5])@
ADD r6, r0, r3, LSL #1 @ add32(temp, p2)@
SUB r7, r2, r9 @ sub32(temp2, p4)@
SUB r9, r8, r9 @ sub32(temp1, p4)@
SUB r8, r0, r3, LSL #1 @ sub32(temp, p2)@
LDRB r0, [r10], #1 @
LDRB r1, [r10], #1 @
LDRB r2, [r10], #1 @
ADD r0, r11, r0, lsl #3 @
ADD r1, r11, r1, lsl #3 @
ADD r2, r11, r2, lsl #3 @
STRD r4, [r0] @
STRD r6, [r1] @
STRD r8, [r2] @
LDRD r0, [lr] @ r0 = fft3outptr[0] and r1 = fft3outptr[1]
LDRD r2, [lr, #40] @ r2 = fft3outptr[10] and r3 = fft3outptr[11]
LDRD r4, [lr, #80] @ r4 = fft3outptr[20] and r5 = fft3outptr[21]
ADD r6, r0, r2 @ X01r = add32(buf1[0], buf1[2])
ADD r7, r1, r3 @ X01i = add32(buf1[1], buf1[3])
ADD r8, r2, r4 @ add_r = add32(buf1[2], buf1[4])
ADD r9, r3, r5 @ add_i = add32(buf1[3], buf1[5])
SUB r2, r2, r4 @ sub_r = sub32(buf1[2], buf1[4])@
SUB r3, r3, r5 @ sub_i = sub32(buf1[3], buf1[5])@
@ASR r8, r8, #1 @ p1 = add_r >> 1@
MOV r8, r8, ASR #1
@ASR r9, r9, #1 @ p4 = add_i >> 1@
MOV r9, r9, ASR #1
SMULWB r3, r3, r12 @ p2 = mult32x16in32_shl(sub_i, sinmu)@
SMULWB r2, r2, r12 @ p3 = mult32x16in32_shl(sub_r, sinmu)@
SUB r0, r0, r8 @ temp = sub32(buf1a[0], p1)@
ADD r8, r1, r2, LSL #1 @ temp1 = add32(buf1a[1], p3)@
SUB r2, r1, r2, LSL #1 @ temp2 = sub32(buf1a[1], p3)@
ADD r4, r6, r4 @ add32(X01r, buf1a[4])@
ADD r5, r7, r5 @ add32(X01i, buf1a[5])@
ADD r6, r0, r3, LSL #1 @ add32(temp, p2)@
SUB r7, r2, r9 @ sub32(temp2, p4)@
SUB r9, r8, r9 @ sub32(temp1, p4)@
SUB r8, r0, r3, LSL #1 @ sub32(temp, p2)@
LDRB r0, [r10], #1 @
LDRB r1, [r10], #1 @
LDRB r2, [r10], #1 @
ADD r0, r11, r0, lsl #3 @
ADD r1, r11, r1, lsl #3 @
ADD r2, r11, r2, lsl #3 @
STRD r4, [r0] @
STRD r6, [r1] @
STRD r8, [r2] @
LDMFD r13!, {r4 - r12, r15}

View file

@ -0,0 +1,89 @@
#include <stdlib.h>
#include <stdio.h>
#include <ixheaacd_type_def.h>
#include "ixheaacd_interface.h"
#include "ixheaacd_constants.h"
#include <ixheaacd_basic_ops32.h>
#include "ixheaacd_function_selector.h"
extern const WORD32 ixheaacd_twiddle_table_fft_32x32[514];
extern const WORD8 ixheaacd_mps_dig_rev[16];
VOID ixheaacd_complex_fft_p2_armv7(WORD32 *xr, WORD32 *xi, WORD32 nlength,
WORD32 fft_mode, WORD32 *preshift) {
WORD32 i, n_stages;
WORD32 not_power_4;
WORD32 npts, shift;
WORD32 dig_rev_shift;
WORD32 ptr_x[1024];
WORD32 y[1024];
WORD32 npoints = nlength;
WORD32 n = 0;
WORD32 *ptr_y = y;
dig_rev_shift = ixheaacd_norm32(npoints) + 1 - 16;
n_stages = 30 - ixheaacd_norm32(npoints); // log2(npoints), if npoints=2^m
not_power_4 = n_stages & 1;
n_stages = n_stages >> 1;
npts = npoints; // CALCULATION OF GUARD BITS
while (npts >> 1) {
n++;
npts = npts >> 1;
}
if (n % 2 == 0)
shift = ((n + 4)) / 2;
else
shift = ((n + 3) / 2);
for (i = 0; i < nlength; i++) {
ptr_x[2 * i] = (xr[i] / (1 << (shift)));
ptr_x[2 * i + 1] = (xi[i] / (1 << (shift)));
}
if (fft_mode == -1) {
ixheaacd_complex_fft_p2_asm(ixheaacd_twiddle_table_fft_32x32, nlength,
ptr_x, ptr_y);
if (not_power_4) shift += 1;
}
else {
ixheaacd_complex_ifft_p2_asm(ixheaacd_twiddle_table_fft_32x32, nlength,
ptr_x, ptr_y);
if (not_power_4) shift += 1;
}
for (i = 0; i < nlength; i++) {
xr[i] = y[2 * i];
xi[i] = y[2 * i + 1];
}
*preshift = shift - *preshift;
return;
}
VOID ixheaacd_mps_complex_fft_64_armv7(WORD32 *ptr_x, WORD32 *fin_re,
WORD32 *fin_im, WORD32 nlength) {
WORD32 i, n_stages;
WORD32 y[128];
WORD32 npoints = nlength;
WORD32 *ptr_y = y;
const WORD32 *ptr_w;
n_stages = 30 - ixheaacd_norm32(npoints); // log2(npoints), if npoints=2^m
n_stages = n_stages >> 1;
ptr_w = ixheaacd_twiddle_table_fft_32x32; // 32 BIT TWIDDLE TABLE
ixheaacd_mps_complex_fft_64_asm(ptr_w, nlength, ptr_x, ptr_y,
ixheaacd_mps_dig_rev);
for (i = 0; i < 2 * nlength; i += 2) {
fin_re[i] = y[i];
fin_im[i] = y[i + 1];
}
return;
}

View file

@ -0,0 +1,185 @@
/******************************************************************************
* *
* Copyright (C) 2018 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
#include <stdio.h>
#include <string.h>
#include "ixheaacd_sbr_common.h"
#include <ixheaacd_type_def.h>
#include "ixheaacd_constants.h"
#include <ixheaacd_basic_ops32.h>
#include <ixheaacd_basic_ops16.h>
#include <ixheaacd_basic_ops40.h>
#include "ixheaacd_basic_ops.h"
#include <ixheaacd_basic_op.h>
#include "ixheaacd_intrinsics.h"
#include "ixheaacd_common_rom.h"
#include "ixheaacd_sbrdecsettings.h"
#include "ixheaacd_bitbuffer.h"
#include "ixheaacd_defines.h"
#include "ixheaacd_pns.h"
#include <ixheaacd_aac_rom.h>
#include "ixheaacd_aac_imdct.h"
#include "ixheaacd_pulsedata.h"
#include "ixheaacd_drc_data_struct.h"
#include "ixheaacd_channelinfo.h"
#include "ixheaacd_drc_dec.h"
#include "ixheaacd_sbrdecoder.h"
#include "ixheaacd_tns.h"
#include "ixheaacd_sbr_scale.h"
#include "ixheaacd_lpp_tran.h"
#include "ixheaacd_env_extr_part.h"
#include <ixheaacd_sbr_rom.h>
#include "ixheaacd_block.h"
#include "ixheaacd_hybrid.h"
#include "ixheaacd_ps_dec.h"
#include "ixheaacd_env_extr.h"
#include "ixheaacd_basic_funcs.h"
#include "ixheaacd_env_calc.h"
WORD32 (*ixheaacd_fix_div)(WORD32, WORD32) = &ixheaacd_fix_div_armv7;
VOID(*ixheaacd_covariance_matrix_calc)
(WORD32 *, ixheaacd_lpp_trans_cov_matrix *,
WORD32) = &ixheaacd_covariance_matrix_calc_armv7;
VOID(*ixheaacd_covariance_matrix_calc_2)
(ixheaacd_lpp_trans_cov_matrix *, WORD32 *, WORD32,
WORD16) = &ixheaacd_covariance_matrix_calc_2_armv7;
VOID(*ixheaacd_over_lap_add1)
(WORD32 *, WORD32 *, WORD16 *, const WORD16 *, WORD16, WORD16,
WORD16) = &ixheaacd_over_lap_add1_dec;
VOID(*ixheaacd_over_lap_add2)
(WORD32 *, WORD32 *, WORD32 *, const WORD16 *, WORD16, WORD16,
WORD16) = &ixheaacd_over_lap_add2_dec;
VOID(*ixheaacd_decorr_filter2)
(ia_ps_dec_struct *ptr_ps_dec, WORD32 *p_buf_left_real, WORD32 *p_buf_left_imag,
WORD32 *p_buf_right_real, WORD32 *p_buf_right_imag,
ia_ps_tables_struct *ps_tables_ptr,
WORD16 *transient_ratio) = &ixheaacd_decorr_filter2_armv7;
VOID(*ixheaacd_decorr_filter1)
(ia_ps_dec_struct *ptr_ps_dec, ia_ps_tables_struct *ps_tables_ptr,
WORD16 *transient_ratio) = &ixheaacd_decorr_filter1_armv7;
WORD32(*ixheaacd_divide16_pos)
(WORD32 op1, WORD32 op2) = &ixheaacd_divide16_pos_armv7;
VOID(*ixheaacd_decorrelation)
(ia_ps_dec_struct *ptr_ps_dec, WORD32 *p_buf_left_real, WORD32 *p_buf_left_imag,
WORD32 *p_buf_right_real, WORD32 *p_buf_right_imag,
ia_ps_tables_struct *ps_tables_ptr) = &ixheaacd_decorrelation_armv7;
VOID(*ixheaacd_apply_rot)
(ia_ps_dec_struct *ptr_ps_dec, WORD32 *p_qmf_left_re, WORD32 *p_qmf_left_im,
WORD32 *p_qmf_right_re, WORD32 *p_qmf_right_im,
ia_sbr_tables_struct *sbr_tables_ptr,
const WORD16 *ptr_res) = &ixheaacd_apply_rot_armv7;
VOID(*ixheaacd_conv_ergtoamplitudelp)
(WORD32 bands, WORD16 noise_e, WORD16 *nrg_sine, WORD16 *nrg_gain,
WORD16 *noise_level_mant,
WORD16 *sqrt_table) = &ixheaacd_conv_ergtoamplitudelp_armv7;
VOID(*ixheaacd_conv_ergtoamplitude)
(WORD32 bands, WORD16 noise_e, WORD16 *nrg_sine, WORD16 *nrg_gain,
WORD16 *noise_level_mant,
WORD16 *sqrt_table) = &ixheaacd_conv_ergtoamplitude_armv7;
VOID(*ixheaacd_adjust_scale)
(WORD32 **re, WORD32 **im, WORD32 sub_band_start, WORD32 sub_band_end,
WORD32 start_pos, WORD32 next_pos, WORD32 shift,
FLAG low_pow_flag) = &ixheaacd_adjust_scale_armv7;
WORD16(*ixheaacd_ixheaacd_expsubbandsamples)
(WORD32 **re, WORD32 **im, WORD32 sub_band_start, WORD32 sub_band_end,
WORD32 start_pos, WORD32 next_pos,
FLAG low_pow_flag) = &ixheaacd_expsubbandsamples_armv7;
VOID(*ixheaacd_enery_calc_per_subband)
(WORD32 start_pos, WORD32 next_pos, WORD32 sub_band_start, WORD32 sub_band_end,
WORD32 frame_exp, WORD16 *nrg_est_mant, FLAG low_pow_flag,
ia_sbr_tables_struct *ptr_sbr_tables,
WORD32 *ptr_qmf_matrix) = &ixheaacd_enery_calc_per_subband_dec;
VOID(*ixheaacd_harm_idx_zerotwolp)
(WORD32 *ptr_real_buf, WORD16 *ptr_gain_buf, WORD32 scale_change,
WORD16 *ptr_sine_level_buf, const WORD32 *ptr_rand_ph,
WORD16 *noise_level_mant, WORD32 num_sub_bands, FLAG noise_absc_flag,
WORD32 harm_index) = &ixheaacd_harm_idx_zerotwolp_armv7;
VOID(*ixheaacd_tns_ar_filter_fixed)
(WORD32 *spectrum, WORD32 size, WORD32 inc, WORD32 *lpc, WORD32 order,
WORD32 shift_value, WORD scale_spec) = &ixheaacd_tns_ar_filter_fixed_armv7;
VOID(*ixheaacd_tns_ar_filter)
(WORD32 *spectrum, WORD32 size, WORD32 inc, WORD16 *lpc, WORD32 order,
WORD32 shift_value, WORD scale_spec,
WORD32 *ptr_filter_state) = &ixheaacd_tns_ar_filter_armv7;
VOID(*ixheaacd_tns_parcor_lpc_convert)
(WORD16 *parcor, WORD16 *lpc, WORD16 *scale,
WORD order) = &ixheaacd_tns_parcor_lpc_convert_armv7;
WORD32(*ixheaacd_calc_max_spectral_line)
(WORD32 *ptr_tmp, WORD32 size) = &ixheaacd_calc_max_spectral_line_dec;
VOID(*ixheaacd_post_twiddle)
(WORD32 out_ptr[], WORD32 spec_data[],
ia_aac_dec_imdct_tables_struct *ptr_imdct_tables,
WORD npoints) = &ixheaacd_post_twiddle_dec;
VOID(*ixheaacd_post_twid_overlap_add)
(WORD16 pcm_out[], WORD32 spec_data[],
ia_aac_dec_imdct_tables_struct *ptr_imdct_tables, WORD npoints,
WORD32 *ptr_overlap_buf, WORD16 q_shift, const WORD16 *window,
WORD16 ch_fac) = &ixheaacd_post_twid_overlap_add_dec;
VOID(*ixheaacd_neg_shift_spec)
(WORD32 *coef, WORD16 *out, WORD16 q_shift,
WORD16 ch_fac) = &ixheaacd_neg_shift_spec_dec;
VOID(*ixheaacd_spec_to_overlapbuf)
(WORD32 *ptr_overlap_buf, WORD32 *ptr_spec_coeff, WORD32 q_shift,
WORD32 size) = &ixheaacd_spec_to_overlapbuf_armv7;
VOID(*ixheaacd_overlap_buf_out)
(WORD16 *out_samples, WORD32 *ptr_overlap_buf, WORD32 size,
const WORD16 ch_fac) = &ixheaacd_overlap_buf_out_armv7;
VOID(*ixheaacd_overlap_out_copy)
(WORD16 *out_samples, WORD32 *ptr_overlap_buf, WORD32 *ptr_overlap_buf1,
const WORD16 ch_fac) = &ixheaacd_overlap_out_copy_armv7;
VOID(*ixheaacd_pretwiddle_compute)
(WORD32 *spec_data1, WORD32 *spec_data2, WORD32 *out_ptr,
ia_aac_dec_imdct_tables_struct *ptr_imdct_tables, WORD npoints4,
WORD32 neg_expo) = &ixheaacd_pretwiddle_compute_dec;
VOID(*ixheaacd_imdct_using_fft)
(ia_aac_dec_imdct_tables_struct *ptr_imdct_tables, WORD32 npoints,
WORD32 *ptr_x, WORD32 *ptr_y) = &ixheaacd_imdct_using_fft_dec;

View file

@ -0,0 +1,249 @@
/******************************************************************************
* *
* Copyright (C) 2018 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
#include <stdio.h>
#include <string.h>
#include "ixheaacd_sbr_common.h"
#include <ixheaacd_type_def.h>
#include "ixheaacd_constants.h"
#include <ixheaacd_basic_ops32.h>
#include <ixheaacd_basic_ops16.h>
#include <ixheaacd_basic_ops40.h>
#include "ixheaacd_basic_ops.h"
#include <ixheaacd_basic_op.h>
#include "ixheaacd_intrinsics.h"
#include "ixheaacd_common_rom.h"
#include "ixheaacd_sbrdecsettings.h"
#include "ixheaacd_bitbuffer.h"
#include "ixheaacd_defines.h"
#include "ixheaacd_pns.h"
#include <ixheaacd_aac_rom.h>
#include "ixheaacd_aac_imdct.h"
#include "ixheaacd_pulsedata.h"
#include "ixheaacd_drc_data_struct.h"
#include "ixheaacd_lt_predict.h"
#include "ixheaacd_channelinfo.h"
#include "ixheaacd_drc_dec.h"
#include "ixheaacd_sbrdecoder.h"
#include "ixheaacd_tns.h"
#include "ixheaacd_sbr_scale.h"
#include "ixheaacd_lpp_tran.h"
#include "ixheaacd_env_extr_part.h"
#include <ixheaacd_sbr_rom.h>
#include "ixheaacd_block.h"
#include "ixheaacd_hybrid.h"
#include "ixheaacd_ps_dec.h"
#include "ixheaacd_env_extr.h"
#include "ixheaacd_basic_funcs.h"
#include "ixheaacd_env_calc.h"
#include "ixheaacd_dsp_fft32x32s.h"
#include "ixheaacd_interface.h"
WORD32 (*ixheaacd_fix_div)(WORD32, WORD32) = &ixheaacd_fix_div_armv7;
VOID(*ixheaacd_covariance_matrix_calc)
(WORD32 *, ixheaacd_lpp_trans_cov_matrix *,
WORD32) = &ixheaacd_covariance_matrix_calc_armv7;
VOID(*ixheaacd_covariance_matrix_calc_2)
(ixheaacd_lpp_trans_cov_matrix *, WORD32 *, WORD32,
WORD16) = &ixheaacd_covariance_matrix_calc_2_armv7;
VOID(*ixheaacd_over_lap_add1)
(WORD32 *, WORD32 *, WORD16 *, const WORD16 *, WORD16, WORD16,
WORD16) = &ixheaacd_over_lap_add1_armv7;
VOID(*ixheaacd_over_lap_add2)
(WORD32 *, WORD32 *, WORD32 *, const WORD16 *, WORD16, WORD16,
WORD16) = &ixheaacd_over_lap_add2_armv7;
VOID(*ixheaacd_decorr_filter2)
(ia_ps_dec_struct *ptr_ps_dec, WORD32 *p_buf_left_real, WORD32 *p_buf_left_imag,
WORD32 *p_buf_right_real, WORD32 *p_buf_right_imag,
ia_ps_tables_struct *ps_tables_ptr,
WORD16 *transient_ratio) = &ixheaacd_decorr_filter2_armv7;
VOID(*ixheaacd_decorr_filter1)
(ia_ps_dec_struct *ptr_ps_dec, ia_ps_tables_struct *ps_tables_ptr,
WORD16 *transient_ratio) = &ixheaacd_decorr_filter1_armv7;
WORD32(*ixheaacd_divide16_pos)
(WORD32 op1, WORD32 op2) = &ixheaacd_divide16_pos_armv7;
VOID(*ixheaacd_decorrelation)
(ia_ps_dec_struct *ptr_ps_dec, WORD32 *p_buf_left_real, WORD32 *p_buf_left_imag,
WORD32 *p_buf_right_real, WORD32 *p_buf_right_imag,
ia_ps_tables_struct *ps_tables_ptr) = &ixheaacd_decorrelation_armv7;
VOID(*ixheaacd_apply_rot)
(ia_ps_dec_struct *ptr_ps_dec, WORD32 *qmf_left_real, WORD32 *qmf_left_imag,
WORD32 *qmf_right_real, WORD32 *qmf_right_imag,
ia_sbr_tables_struct *sbr_tables_ptr,
const WORD16 *ptr_resol) = &ixheaacd_apply_rot_armv7;
VOID(*ixheaacd_conv_ergtoamplitudelp)
(WORD32 bands, WORD16 noise_e, WORD16 *nrg_sine, WORD16 *nrg_gain,
WORD16 *noise_level_mant,
WORD16 *sqrt_table) = &ixheaacd_conv_ergtoamplitudelp_armv7;
VOID(*ixheaacd_conv_ergtoamplitude)
(WORD32 bands, WORD16 noise_e, WORD16 *nrg_sine, WORD16 *nrg_gain,
WORD16 *noise_level_mant,
WORD16 *sqrt_table) = &ixheaacd_conv_ergtoamplitude_armv7;
VOID(*ixheaacd_adjust_scale)
(WORD32 **re, WORD32 **im, WORD32 sub_band_start, WORD32 sub_band_end,
WORD32 start_pos, WORD32 next_pos, WORD32 shift,
FLAG low_pow_flag) = &ixheaacd_adjust_scale_armv7;
WORD16(*ixheaacd_ixheaacd_expsubbandsamples)
(WORD32 **re, WORD32 **im, WORD32 sub_band_start, WORD32 sub_band_end,
WORD32 start_pos, WORD32 next_pos,
FLAG low_pow_flag) = &ixheaacd_expsubbandsamples_armv7;
VOID(*ixheaacd_enery_calc_per_subband)
(WORD32 start_pos, WORD32 next_pos, WORD32 sub_band_start, WORD32 sub_band_end,
WORD32 frame_exp, WORD16 *nrg_est_mant, FLAG low_pow_flag,
ia_sbr_tables_struct *ptr_sbr_tables,
WORD32 *ptr_qmf_matrix) = &ixheaacd_enery_calc_per_subband_armv7;
VOID(*ixheaacd_harm_idx_zerotwolp)
(WORD32 *ptr_real_buf, WORD16 *ptr_gain_buf, WORD32 scale_change,
WORD16 *ptr_sine_level_buf, const WORD32 *ptr_rand_ph,
WORD16 *noise_level_mant, WORD32 num_sub_bands, FLAG noise_absc_flag,
WORD32 harm_index) = &ixheaacd_harm_idx_zerotwolp_armv7;
VOID(*ixheaacd_tns_ar_filter_fixed)
(WORD32 *spectrum, WORD32 size, WORD32 inc, WORD32 *lpc, WORD32 order,
WORD32 shift_value, WORD scale_spec) = &ixheaacd_tns_ar_filter_fixed_armv7;
VOID(*ixheaacd_tns_ar_filter)
(WORD32 *spectrum, WORD32 size, WORD32 inc, WORD16 *lpc, WORD32 order,
WORD32 shift_value, WORD scale_spec,
WORD32 *ptr_filter_state) = &ixheaacd_tns_ar_filter_armv7;
VOID(*ixheaacd_tns_parcor_lpc_convert)
(WORD16 *parcor, WORD16 *lpc, WORD16 *scale,
WORD order) = &ixheaacd_tns_parcor_lpc_convert_armv7;
WORD32(*ixheaacd_calc_max_spectral_line)
(WORD32 *ptr_tmp, WORD32 size) = &ixheaacd_calc_max_spectral_line_armv7;
VOID(*ixheaacd_post_twiddle)
(WORD32 out_ptr[], WORD32 spec_data[],
ia_aac_dec_imdct_tables_struct *ptr_imdct_tables,
WORD npoints) = &ixheaacd_post_twiddle_armv7;
VOID(*ixheaacd_post_twid_overlap_add)
(WORD16 pcm_out[], WORD32 spec_data[],
ia_aac_dec_imdct_tables_struct *ptr_imdct_tables, WORD npoints,
WORD32 *ptr_overlap_buf, WORD16 q_shift, const WORD16 *window,
WORD16 ch_fac) = &ixheaacd_post_twid_overlap_add_armv7;
VOID(*ixheaacd_neg_shift_spec)
(WORD32 *coef, WORD16 *out, WORD16 q_shift,
WORD16 ch_fac) = &ixheaacd_neg_shift_spec_armv7;
VOID(*ixheaacd_spec_to_overlapbuf)
(WORD32 *ptr_overlap_buf, WORD32 *ptr_spec_coeff, WORD32 q_shift,
WORD32 size) = &ixheaacd_spec_to_overlapbuf_armv7;
VOID(*ixheaacd_overlap_buf_out)
(WORD16 *out_samples, WORD32 *ptr_overlap_buf, WORD32 size,
const WORD16 ch_fac) = &ixheaacd_overlap_buf_out_armv7;
VOID(*ixheaacd_overlap_out_copy)
(WORD16 *out_samples, WORD32 *ptr_overlap_buf, WORD32 *ptr_overlap_buf1,
const WORD16 ch_fac) = &ixheaacd_overlap_out_copy_armv7;
VOID(*ixheaacd_pretwiddle_compute)
(WORD32 *spec_data1, WORD32 *spec_data2, WORD32 *out_ptr,
ia_aac_dec_imdct_tables_struct *ptr_imdct_tables, WORD npoints4,
WORD32 neg_expo) = &ixheaacd_pretwiddle_compute_armv7;
VOID(*ixheaacd_imdct_using_fft)
(ia_aac_dec_imdct_tables_struct *ptr_imdct_tables, WORD32 npoints,
WORD32 *ptr_x, WORD32 *ptr_y) = &ixheaacd_imdct_using_fft_armv7;
VOID(*ixheaacd_complex_fft_p2)
(WORD32 *xr, WORD32 *xi, WORD32 nlength, WORD32 fft_mode,
WORD32 *preshift) = &ixheaacd_complex_fft_p2_armv7;
VOID(*ixheaacd_mps_complex_fft_64)
(WORD32 *ptr_x, WORD32 *fin_re, WORD32 *fin_im,
WORD32 nlength) = &ixheaacd_mps_complex_fft_64_armv7;
VOID(*ixheaacd_mps_synt_pre_twiddle)
(WORD32 *ptr_in, WORD32 *table_re, WORD32 *table_im,
WORD32 resolution) = &ixheaacd_mps_synt_pre_twiddle_armv7;
VOID(*ixheaacd_mps_synt_post_twiddle)
(WORD32 *ptr_in, WORD32 *table_re, WORD32 *table_im,
WORD32 resolution) = &ixheaacd_mps_synt_post_twiddle_armv7;
VOID(*ixheaacd_calc_pre_twid)
(WORD32 *ptr_x, WORD32 *r_ptr, WORD32 *i_ptr, WORD32 nlength,
const WORD32 *cos_ptr, const WORD32 *sin_ptr) = &ixheaacd_calc_pre_twid_armv7;
VOID(*ixheaacd_calc_post_twid)
(WORD32 *ptr_x, WORD32 *r_ptr, WORD32 *i_ptr, WORD32 nlength,
const WORD32 *cos_ptr, const WORD32 *sin_ptr) = &ixheaacd_calc_post_twid_armv7;
VOID(*ixheaacd_mps_synt_post_fft_twiddle)
(WORD32 resolution, WORD32 *fin_re, WORD32 *fin_im, WORD32 *table_re,
WORD32 *table_im, WORD32 *state) = &ixheaacd_mps_synt_post_fft_twiddle_armv7;
VOID(*ixheaacd_mps_synt_out_calc)
(WORD32 resolution, WORD32 *out, WORD32 *state,
const WORD32 *filter_coeff) = &ixheaacd_mps_synt_out_calc_armv7;
VOID(*ixheaacd_fft_15_ld)
(WORD32 *inp, WORD32 *op, WORD32 *fft3out,
UWORD8 *re_arr_tab_sml_240_ptr) = &ixheaacd_fft_15_ld_armv7;
VOID(*ixheaacd_aac_ld_dec_rearrange)
(WORD32 *ip, WORD32 *op, WORD32 mdct_len_2,
UWORD8 *re_arr_tab) = &ia_aac_ld_dec_rearrange_armv7;
VOID (*ixheaacd_fft32x32_ld)
(ia_aac_dec_imdct_tables_struct *imdct_tables_ptr, WORD32 npoints,
WORD32 *ptr_x, WORD32 *ptr_y) = &ixheaacd_imdct_using_fft_armv7;
VOID (*ixheaacd_fft32x32_ld2)
(ia_aac_dec_imdct_tables_struct *imdct_tables_ptr, WORD32 npoints,
WORD32 *ptr_x, WORD32 *ptr_y) = &ixheaacd_fft32x32_ld2_armv7;
WORD16 (*ixheaacd_neg_expo_inc)(WORD16 neg_expo) = &ixheaacd_neg_expo_inc_arm;
VOID (*ixheaacd_inv_dit_fft_8pt)
(WORD32 *x, WORD32 *real, WORD32 *imag) = &ixheaacd_inv_dit_fft_8pt_armv7;
VOID (*ixheaacd_scale_factor_process)
(WORD32 *x_invquant, WORD16 *scale_fact, WORD no_band, WORD8 *width,
WORD32 *scale_tables_ptr, WORD32 total_channels, WORD32 object_type,
WORD32 aac_sf_data_resil_flag) = &ixheaacd_scale_factor_process_armv7;

View file

@ -0,0 +1,102 @@
@/******************************************************************************
@ *
@ * Copyright (C) 2018 The Android Open Source Project
@ *
@ * Licensed under the Apache License, Version 2.0 (the "License");
@ * you may not use this file except in compliance with the License.
@ * You may obtain a copy of the License at:
@ *
@ * http://www.apache.org/licenses/LICENSE-2.0
@ *
@ * Unless required by applicable law or agreed to in writing, software
@ * distributed under the License is distributed on an "AS IS" BASIS,
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ * See the License for the specific language governing permissions and
@ * limitations under the License.
@ *
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
.text
.p2align 2
.extern ixheaacd_cos_sin_mod
.hidden ixheaacd_cos_sin_mod
.global ixheaacd_fwd_modulation
ixheaacd_fwd_modulation:
STMFD sp!, {r3-r9, r12, lr}
MOV r5, r2
LDR r2, [sp, #0x24]
MOV lr, r1
MOV r4, r1
MOV r1, #0x1f
MOV r7, r5
ADD r8, r0, #0xfc
MOV r6, r3
LOOP1:
LDR r3, [r0], #4
LDR r12, [r8], #-4
MOV r3, r3, ASR #4
MOV r12, r12, ASR #4
QSUB r9, r3, r12
ADD r3, r3, r12
STR r9, [lr], #4
SUBS r1, r1, #1
STR r3, [r7], #4
BPL LOOP1
MOV r1, r6
MOV r0, r4
MOV r3, #0xd8
LSL r3, r3, #4
ADD r3, r3, #8
ADD r3, r2, r3
ADD r2, r3, #4
BL ixheaacd_cos_sin_mod
LDRSH r1, [r6, #0x2c]
LDRSH r2, [r6, #0x2a]
LDR r0, [r6, #0x18]
SUBS r2, r1, r2
@ LDMLEFD sp!, {r3-r9, r12, pc}
LDMFDLE sp!, {r3-r9, r12, pc}
LOOP2:
LDR r1, [r0], #4
LDR r12, [r5, #0]
LDR r3, [r4, #0]
SMULWT r6, r12, r1
SMULWB lr, r3, r1
SMULWB r12, r12, r1
SMULWT r1, r3, r1
ADD lr, lr, r6
QSUB r1, r12, r1
MOV r3, lr, LSL #1
MOV r1, r1, LSL #1
STR r3, [r4], #4
SUBS r2, r2, #1
STR r1, [r5], #4
BGT LOOP2
LDMFD sp!, {r3-r9, r12, pc}

View file

@ -0,0 +1,109 @@
@/******************************************************************************
@ *
@ * Copyright (C) 2018 The Android Open Source Project
@ *
@ * Licensed under the Apache License, Version 2.0 (the "License");
@ * you may not use this file except in compliance with the License.
@ * You may obtain a copy of the License at:
@ *
@ * http://www.apache.org/licenses/LICENSE-2.0
@ *
@ * Unless required by applicable law or agreed to in writing, software
@ * distributed under the License is distributed on an "AS IS" BASIS,
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ * See the License for the specific language governing permissions and
@ * limitations under the License.
@ *
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
.text
.p2align 2
.global ixheaacd_harm_idx_zerotwolp_armv7
ixheaacd_harm_idx_zerotwolp_armv7:
STMFD sp!, {r4-r12}
SUB r5, r2, #1
MOV r2, #-1
LDR r6, [sp, #52]
LDR r12, [sp, #48]
ADD r10, sp, #36
LDR r4, [sp, #44]
LDMIA r10, {r9, r10}
CMP r4, #0
BLE EXIT
CMP r12, #0
BNE NO_NOISE
LOOP1:
LDR r12, [r0, #0]
LDRSH r7, [r1], #2
LDRSH r8, [r1], #2
ADD r2, r2, #1
SMULWB r7, r12, r7
SUBS r8, r8, r5
LDRH r12, [r3], #4
RSBLE r8, r8, #0
MOVLE r8, r7, ASR r8
MOVGT r8, r7, LSL r8
MOVS r12, r12, LSL #16
BEQ NEXT
CMP r6, #0
QADDEQ r8, r8, r12
QSUBNE r8, r8, r12
SUBS r4, r4, #1
B STORE
NEXT:
LDR r7, [r9, r2, LSL #2]
ADD r12, r10, r2, LSL #2
LDRSH r12, [r12, #0]
SUBS r4, r4, #1
SMULTB r7, r7, r12
ADD r8, r8, r7, LSL #1
STORE:
STR r8, [r0], #4
BGT LOOP1
B EXIT
NO_NOISE:
LOOP2:
LDR r12, [r0, #0]
LDRSH r7, [r1], #2
LDRSH r9, [r1], #2
LDRH r10, [r3], #4
SMULWB r7, r12, r7
SUBS r9, r9, r5
RSBMI r9, r9, #0
MOVMI r12, r7, ASR r9
MOVPL r12, r7, LSL r9
MOV r7, r10, LSL #16
CMP r6, #0
QADDEQ r12, r12, r7
QSUBNE r12, r12, r7
SUBS r4, r4, #1
STR r12, [r0], #4
BGT LOOP2
EXIT:
LDMFD sp!, {r4-r12}
BX lr

View file

@ -0,0 +1,825 @@
@/******************************************************************************
@ *
@ * Copyright (C) 2018 The Android Open Source Project
@ *
@ * Licensed under the Apache License, Version 2.0 (the "License");
@ * you may not use this file except in compliance with the License.
@ * You may obtain a copy of the License at:
@ *
@ * http://www.apache.org/licenses/LICENSE-2.0
@ *
@ * Unless required by applicable law or agreed to in writing, software
@ * distributed under the License is distributed on an "AS IS" BASIS,
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ * See the License for the specific language governing permissions and
@ * limitations under the License.
@ *
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
.text
.p2align 2
.global ixheaacd_imdct_using_fft_armv7
ixheaacd_imdct_using_fft_armv7:
STMFD sp!, {r4-r12, lr}
vpush {d8-d15}
@ LDR r4, [sp, #0x68]
@ LDR r5, [sp, #0x68+4]
@ LDR r6, [sp, #0x68+8]
@ LDR r7, [sp, #0x68+12]
LDR r8, =11600
ADD r4, r0, r8
LDR r8, =11856
ADD r5, r0, r8
LDR r8, =11920
ADD r6, r0, r8
LDR r8, =11936
ADD r7, r0, r8
COND_1: CMP r1, #0x400
BNE COND_2
MOV r8, #4
B RADIX_4_FIRST_START
COND_2: CMP r1, #0x200
BNE COND_3
MOV r8, #3
MOV r4, r5
B RADIX_8_FIRST_START
COND_3: CMP r1, #0x100
BNE COND_4
MOV r8, #3
MOV r4, r5
B RADIX_4_FIRST_START
COND_4: CMP r1, #0x80
BNE COND_5
MOV r8, #2
MOV r4, r6
B RADIX_8_FIRST_START
COND_5: CMP r1, #0x40
BNE COND_6
MOV r8, #2
MOV r4, r6
B RADIX_4_FIRST_START
COND_6:
MOV r8, #1
MOV r4, r7
RADIX_8_FIRST_START:
LSR r9 , r1, #5
LSL r1, r1, #1
RADIX_8_FIRST_LOOP:
MOV r5 , r2
MOV r6 , r2
MOV r7 , r2
MOV r11 , r2
LDRB r12, [r4, #0]
ADD r5, r5, r12, LSL #3
VLD2.32 {d0[0], d2[0]}, [r5] , r1
ADD r5, r5, r1
VLD2.32 {d8[0], d10[0]}, [r5] , r1
SUB r5, r5, r1, LSL #1
VLD2.32 {d4[0], d6[0]}, [r5] , r1
ADD r5, r5, r1
VLD2.32 {d12[0], d14[0]}, [r5], r1
SUB r5, r5, r1, LSL #2
LDRB r12, [r4, #1]
ADD r6, r6, r12, LSL #3
VLD2.32 {d0[1], d2[1]}, [r6] , r1
ADD r6, r6, r1
VLD2.32 {d8[1], d10[1]}, [r6] , r1
SUB r6, r6, r1, LSL #1
VLD2.32 {d4[1], d6[1]}, [r6] , r1
ADD r6, r6, r1
VLD2.32 {d12[1], d14[1]}, [r6], r1
SUB r6, r6, r1, LSL #2
LDRB r12, [r4, #2]
ADD r7, r7, r12 , LSL #3
VLD2.32 {d1[0], d3[0]}, [r7] , r1
ADD r7, r7, r1
VLD2.32 {d9[0], d11[0]}, [r7] , r1
SUB r7, r7, r1, LSL #1
LDRB r12, [r4, #3]
ADD r11, r11, r12 , LSL #3
VLD2.32 {d1[1], d3[1]}, [r11] , r1
ADD r11, r11, r1
VLD2.32 {d9[1], d11[1]}, [r11] , r1
SUB r11, r11, r1, LSL #1
VADD.I32 q8, q0, q4
VLD2.32 {d5[0], d7[0]}, [r7] , r1
ADD r7, r7, r1
VSUB.I32 q9, q0, q4
VLD2.32 {d13[0], d15[0]}, [r7], r1
SUB r7, r7, r1, LSL #2
VADD.I32 q0, q1, q5
VLD2.32 {d5[1], d7[1]}, [r11] , r1
ADD r11, r11, r1
VSUB.I32 q4, q1, q5
VLD2.32 {d13[1], d15[1]}, [r11], r1
SUB r11, r11, r1, LSL #2
ADD r4, r4, #4
ADD r5, r5, r1, LSR #1
ADD r6, r6, r1, LSR #1
ADD r7, r7, r1, LSR #1
ADD r11, r11, r1, LSR #1
VADD.I32 q1, q2, q6
VLD2.32 {d28[0], d30[0]}, [r5] , r1
VSUB.I32 q5, q2, q6
VLD2.32 {d20[0], d22[0]}, [r5] , r1
VADD.I32 q2, q3, q7
VLD2.32 {d24[0], d26[0]}, [r5] , r1
VSUB.I32 q6, q3, q7
VLD2.32 {d28[1], d30[1]}, [r6] , r1
VADD.S32 q3, q9, q6
VLD2.32 {d20[1], d22[1]}, [r6] , r1
VSUB.S32 q7, q9, q6
VLD2.32 {d24[1], d26[1]}, [r6] , r1
VSUB.S32 q6, q4, q5
VLD2.32 {d29[0], d31[0]}, [r7] , r1
VADD.S32 q9, q4, q5
VLD2.32 {d21[0], d23[0]}, [r7] , r1
VADD.S32 q4, q8, q1
VLD2.32 {d25[0], d27[0]}, [r7] , r1
VSUB.S32 q5, q8, q1
VLD2.32 {d29[1], d31[1]}, [r11] , r1
VADD.S32 q8, q0, q2
VLD2.32 {d21[1], d23[1]}, [r11] , r1
VSUB.S32 q0, q0, q2
VLD2.32 {d25[1], d27[1]}, [r11] , r1
VPUSH {q3}
VPUSH {q7}
VLD2.32 {d2[0], d4[0]}, [r5], r1
VADD.I32 q7, q14, q12
VLD2.32 {d2[1], d4[1]}, [r6] , r1
VSUB.I32 q3, q14, q12
VLD2.32 {d3[0], d5[0]}, [r7] , r1
VADD.I32 q14, q15, q13
VLD2.32 {d3[1], d5[1]}, [r11] , r1
VSUB.I32 q12, q15, q13
VADD.I32 q15, q10, q1
VSUB.I32 q13, q10, q1
VADD.I32 q10, q11, q2
VSUB.I32 q1, q11, q2
VADD.S32 q11, q7, q15
VSUB.S32 q2, q7, q15
VADD.S32 q7, q14, q10
VSUB.S32 q15, q14, q10
VADD.S32 q14, q3, q12
VSUB.S32 q10, q3, q12
VADD.S32 q3, q13, q1
VSUB.S32 q12, q13, q1
VADD.S32 q1 , q14, q12
VSUB.S32 q13, q14, q12
VSUB.S32 q12, q3, q10
VUZP.16 d2, d3
VADD.S32 q14, q3, q10
VUZP.16 d26, d27
VADD.S32 q3, q4, q11
VUZP.16 d24, d25
VSUB.S32 q10, q4, q11
VUZP.16 d28, d29
VADD.S32 q4, q8, q7
LDR r14, =0x5a82
VSUB.S32 q11, q8, q7
VADD.S32 q8, q5, q15
VSUB.S32 q7, q5, q15
VSUB.S32 q5, q0, q2
VADD.S32 q15, q0, q2
VPOP {q0}
VPOP {q2}
VPUSH {q3-q4}
VPUSH {q10}
VDUP.16 d20, r14
VMULL.u16 q4, d26, d20
VMULL.u16 q3, d28, d20
VPUSH {q7-q8}
VPUSH {q5}
VSHR.S32 q4, q4, #15
VSHR.S32 q3, q3, #15
VQDMLAL.S16 q4, d27, d20
VQDMLAL.S16 q3, d29, d20
VPUSH {q11}
VMULL.u16 q13, d24, d20
VMULL.u16 q14, d2, d20
VADD.S32 q5, q2, q4
VSUB.S32 q7, q2, q4
VADD.S32 q8, q6, q3
VSUB.S32 q6, q6, q3
VSHR.S32 q13, q13, #15
VSHR.S32 q14, q14, #15
VQDMLAL.S16 q13, d25, d20
VQDMLAL.S16 q14, d3, d20
VPOP {q1}
VPOP {q10}
VADD.S32 q2, q0, q13
VSUB.S32 q4, q0, q13
VADD.S32 q11, q9, q14
VSUB.S32 q3, q9, q14
VPOP {q14}
VPOP {q9}
VPOP {q0}
VPOP {q12, q13}
VTRN.32 q12, q5
VSHL.S32 q12, q12, #3
VTRN.32 q9, q2
VSHL.S32 q5, q5, #3
VSHL.S32 q9, q9, #3
VTRN.32 q0, q7
VSHL.S32 q2, q2, #3
VSHL.S32 q0, q0, #3
VTRN.32 q14, q4
VSHL.S32 q7, q7, #3
VSHL.S32 q14, q14, #3
VTRN.32 q13, q6
VSHL.S32 q4, q4, #3
VSHL.S32 q13, q13, #3
VTRN.32 q10, q3
VSHL.S32 q6, q6, #3
VSHL.S32 q10, q10, #3
VTRN.32 q1, q8
VSHL.S32 q3, q3, #3
VSHL.S32 q1, q1, #3
VTRN.32 q15, q11
VSHL.S32 q8, q8, #3
VSHL.S32 q15, q15, #3
VSWP d18, d25
VSHL.S32 q11, q11, #3
VSWP d4, d11
VSWP d1, d28
VSWP d15, d8
VSWP d20, d27
VSWP d6, d13
VSWP d30, d3
VSWP d22, d17
VST2.32 {q12, q13}, [r3]!
VST2.32 {q0, q1}, [r3]!
VST2.32 {q5, q6}, [r3]!
VST2.32 {q7, q8}, [r3]!
VMOV q5, q11
VST2.32 {q9, q10}, [r3]!
VST2.32 {q14, q15}, [r3]!
VST2.32 {q2, q3}, [r3]!
VST2.32 {q4, q5}, [r3]!
SUBS r9, r9, #1
BNE RADIX_8_FIRST_LOOP
LSR r1, r1, #1
SUB r3, r1, LSL #3
MOV r5, #8
MOV r4, #32
LSR r6, r1, #5
B RADIX_4_FIRST_ENDS
RADIX_8_FIRST_ENDS:
RADIX_4_FIRST_START:
LSR r9 , r1, #4
LSL r1, r1, #1
RADIX_4_LOOP:
MOV r5 , r2
MOV r6 , r2
MOV r7 , r2
MOV r11 , r2
LDRB r12, [r4, #0]
ADD r5, r5, r12, LSL #3
VLD2.32 {d0[0], d2[0]}, [r5] , r1
ADD r5, r5, r1
VLD2.32 {d8[0], d10[0]}, [r5] , r1
SUB r5, r5, r1, LSL #1
VLD2.32 {d4[0], d6[0]}, [r5] , r1
ADD r5, r5, r1
VLD2.32 {d12[0], d14[0]}, [r5], r1
LDRB r12, [r4, #1]
ADD r6, r6, r12, LSL #3
VLD2.32 {d0[1], d2[1]}, [r6] , r1
ADD r6, r6, r1
VLD2.32 {d8[1], d10[1]}, [r6] , r1
SUB r6, r6, r1, LSL #1
VLD2.32 {d4[1], d6[1]}, [r6] , r1
ADD r6, r6, r1
VLD2.32 {d12[1], d14[1]}, [r6], r1
LDRB r12, [r4, #2]
ADD r7, r7, r12, LSL #3
VLD2.32 {d1[0], d3[0]}, [r7] , r1
ADD r7, r7, r1
VLD2.32 {d9[0], d11[0]}, [r7] , r1
LDRB r12, [r4, #3]
ADD r11, r11, r12 , LSL #3
VLD2.32 {d1[1], d3[1]}, [r11] , r1
ADD r11, r11, r1
VLD2.32 {d9[1], d11[1]}, [r11] , r1
SUB r7, r7, r1, LSL #1
VADD.S32 q8, q0, q4
VLD2.32 {d5[0], d7[0]}, [r7] , r1
ADD r7, r7, r1
VADD.S32 q9, q1, q5
VLD2.32 {d13[0], d15[0]}, [r7], r1
SUB r11, r11, r1, LSL #1
VSUB.S32 q10, q0, q4
VLD2.32 {d5[1], d7[1]}, [r11] , r1
ADD r11, r11, r1
VSUB.S32 q11, q1, q5
VLD2.32 {d13[1], d15[1]}, [r11], r1
ADD r4, r4, #4
VADD.S32 q12, q2, q6
VADD.S32 q13, q3, q7
VSUB.S32 q14, q2, q6
VSUB.S32 q15, q3, q7
VADD.S32 q0, q8, q12
VADD.S32 q1, q9, q13
VSUB.S32 q2, q8, q12
VSUB.S32 q3, q9, q13
VADD.S32 q4, q10, q15
VSUB.S32 q5, q11, q14
VADD.S32 q7, q11, q14
VSUB.S32 q6, q10, q15
VTRN.32 q0, q4
VSHL.S32 q0, q0, #2
VTRN.32 q2, q6
VSHL.S32 q4, q4, #2
VSHL.S32 q2, q2, #2
VTRN.32 q1, q5
VSHL.S32 q6, q6, #2
VSHL.S32 q1, q1, #2
VTRN.32 q3, q7
VSHL.S32 q5, q5, #2
VSHL.S32 q3, q3, #2
VSWP d4, d1
VSHL.S32 q7, q7, #2
VSWP d12, d9
VSWP d6, d3
VSWP d14, d11
VST2.32 {q0, q1}, [r3]!
VST2.32 {q4, q5}, [r3]!
VST2.32 {q2, q3}, [r3]!
VST2.32 {q6, q7}, [r3]!
SUBS r9, r9, #1
BNE RADIX_4_LOOP
LSR r1, r1, #1
SUB r3, r1, LSL #3
MOV r5, #4
MOV r4, #64
LSR r6, r1, #4
RADIX_4_FIRST_ENDS:
PUSH {r3}
LSR r5, r5, #2
LDR r14, =8528
ADD r0, r0, r14
OUTER_LOOP_R4:
LDR r14, [sp]
MOV r7, r5
MOV r2, #0
MOV r9, r0
LSL r12 , r5, #5
MIDDLE_LOOP_R4:
VLD2.16 {d0[0], d1[0]}, [r9], r2
VLD2.16 {d2[0], d3[0]}, [r9], r2
ADD r11, r2, r4, LSL #2
VLD2.16 {d4[0], d5[0]}, [r9]
ADD r10, r0, r11
VLD2.16 {d0[1], d1[1]}, [r10], r11
VLD2.16 {d2[1], d3[1]}, [r10], r11
ADD r2, r11, r4, LSL #2
VLD2.16 {d4[1], d5[1]}, [r10]
ADD r9, r0, r2
VLD2.16 {d0[2], d1[2]}, [r9], r2
VLD2.16 {d2[2], d3[2]}, [r9], r2
ADD r11, r2, r4, LSL #2
VLD2.16 {d4[2], d5[2]}, [r9]
ADD r10, r0, r11
VLD2.16 {d0[3], d1[3]}, [r10], r11
VLD2.16 {d2[3], d3[3]}, [r10], r11
ADD r2, r11, r4, LSL #2
VLD2.16 {d4[3], d5[3]}, [r10]
ADD r9, r0, r2
MOV r10, r6
INNER_LOOP_R4:
VLD2.32 {q3, q4}, [r14], r12
VSHR.S32 q3, q3, #1
VLD4.16 {q5, q6}, [r14], r12
VSHR.S32 q4, q4, #1
VSHR.U16 d10, d10, #1
VLD4.16 {q7, q8}, [r14], r12
VSHR.U16 d12, d12, #1
VMULL.S16 q11, d10, d0
VMLSL.S16 q11, d12, d1
VLD4.16 {q9, q10}, [r14], r12
VMULL.S16 q12, d10, d1
VMLAL.S16 q12, d12, d0
VSHR.U16 d14, d14, #1
VSHR.U16 d16, d16, #1
SUB r14, r14, r12, LSL #2
VSHR.U16 d18, d18, #1
VSHR.U16 d20, d20, #1
VMULL.S16 q13, d14, d2
VMLSL.S16 q13, d16, d3
VSHR.S32 q11, q11, #15
VMULL.S16 q14, d14, d3
VMLAL.S16 q14, d16, d2
VMULL.S16 q15, d18, d4
VMLSL.S16 q15, d20, d5
VMLAL.S16 q11, d11, d0
VMLSL.S16 q11, d13, d1
VSHR.S32 q12, q12, #15
VSHR.S32 q13, q13, #15
VSHR.S32 q14, q14, #15
VSHR.S32 q15, q15, #15
VMLAL.S16 q12, d11, d1
VMLAL.S16 q12, d13, d0
VMULL.S16 q5, d18, d5
VMLAL.S16 q5, d20, d4
VMLAL.S16 q13, d15, d2
VMLSL.S16 q13, d17, d3
VMLAL.S16 q14, d15, d3
VMLAL.S16 q14, d17, d2
VMLAL.S16 q15, d19, d4
VMLSL.S16 q15, d21, d5
VSHR.S32 q5, q5, #15
VMLAL.S16 q5, d19, d5
VMLAL.S16 q5, d21, d4
CMP r7, r5
BNE BYPASS_IF
ADD r14, r14, r12
LDR r3, [r14], r12
ASR r3, r3, #1
VMOV.32 d22[0], r3
LDR r3, [r14], r12
ASR r3, r3, #1
VMOV.32 d26[0], r3
LDR r3, [r14]
ASR r3, r3, #1
VMOV.32 d30[0], r3
SUB r14, r14, r12, LSL #1
ADD r14, r14, #4
LDR r3, [r14], r12
ASR r3, r3, #1
VMOV.32 d24[0], r3
LDR r3, [r14], r12
ASR r3, r3, #1
VMOV.32 d28[0], r3
LDR r3, [r14], r12
ASR r3, r3, #1
VMOV.32 d10[0], r3
SUB r14, r14, #4
SUB r14, r14, r12, LSL #2
BYPASS_IF:
VADD.S32 q6, q3, q13
VADD.S32 q7, q4, q14
VSUB.S32 q3, q3, q13
VSUB.S32 q4, q4, q14
VADD.S32 q8, q11, q15
VADD.S32 q9, q12, q5
VSUB.S32 q15, q11, q15
VSUB.S32 q14, q12, q5
VADD.S32 q10, q6, q8
VADD.S32 q11, q7, q9
VADD.S32 q12, q3, q14
VSUB.S32 q13, q4, q15
VSUB.S32 q6, q6, q8
VST2.32 {q10, q11}, [r14], r12
VSUB.S32 q7, q7, q9
VSUB.S32 q8, q3, q14
VST2.32 {q12, q13}, [r14], r12
VADD.S32 q9, q4, q15
VST2.32 {q6, q7}, [r14], r12
VST2.32 {q8, q9}, [r14], r12
SUBS r10, r10, #1
BNE INNER_LOOP_R4
SUB r14, r14, r1, LSL #3
ADD r14, r14, #32
SUBS r7, r7, #1
BNE MIDDLE_LOOP_R4
LSR r4, r4, #2
LSL r5, r5, #2
LSR r6, r6, #2
SUBS r8, r8, #1
BNE OUTER_LOOP_R4
END_LOOPS:
POP {r3}
vpop {d8-d15}
LDMFD sp!, {r4-r12, pc}

View file

@ -0,0 +1,163 @@
@/******************************************************************************
@ *
@ * Copyright (C) 2018 The Android Open Source Project
@ *
@ * Licensed under the Apache License, Version 2.0 (the "License");
@ * you may not use this file except in compliance with the License.
@ * You may obtain a copy of the License at:
@ *
@ * http://www.apache.org/licenses/LICENSE-2.0
@ *
@ * Unless required by applicable law or agreed to in writing, software
@ * distributed under the License is distributed on an "AS IS" BASIS,
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ * See the License for the specific language governing permissions and
@ * limitations under the License.
@ *
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
.text
.p2align 2
.global ixheaacd_inv_dit_fft_8pt_armv7
ixheaacd_inv_dit_fft_8pt_armv7:
STMFD sp!, {r4-r12, lr}
LDR r3, [r0, #0]
LDR r4, [r0, #0x20]
LDR r5, [r0, #0x24]
QADD r12, r3, r4
LDR r6, [r0, #0x30]
QSUB r8, r3, r4
LDR r3, [r0, #4]
LDR r9, [r0, #0x34]
QADD r4, r3, r5
SUB sp, sp, #0x14
QSUB r5, r3, r5
LDR lr, [r0, #0x10]
LDR r3, [r0, #0x14]
QADD r10, lr, r6
QSUB r6, lr, r6
QADD r7, r3, r9
QSUB r9, r3, r9
QADD r3, r12, r10
QSUB lr, r12, r10
QADD r12, r4, r7
QSUB r7, r4, r7
QSUB r4, r8, r9
STR r7, [sp, #8]
QADD r7, r8, r9
QADD r8, r5, r6
STR r7, [sp, #0xc]
QSUB r5, r5, r6
STMIA sp, {r8, lr}
STR r5, [sp, #0x10]
LDR r5, [r0, #8]
LDR lr, [r0, #0x28]
LDR r9, [r0, #0x2c]
QADD r7, r5, lr
LDR r11, [r0, #0x38]
LDR r6, [r0, #0xc]
QSUB r5, r5, lr
LDR lr, [r0, #0x18]
QADD r8, r6, r9
QSUB r6, r6, r9
QADD r10, lr, r11
QSUB r9, lr, r11
LDR r11, [r0, #0x1c]
LDR r0, [r0, #0x3c]
MOV lr, r11
QADD r11, r11, r0
QSUB r0, lr, r0
QADD lr, r7, r10
QSUB r10, r7, r10
QADD r7, r8, r11
QSUB r11, r8, r11
QSUB r8, r5, r0
QADD r5, r5, r0
QADD r0, r6, r9
QSUB r6, r6, r9
QADD r9, r3, lr
QSUB r3, r3, lr
STR r9, [r1, #0]
QADD r9, r12, r7
LDR lr, [sp, #4]
STR r9, [r2, #0]
QSUB r9, r12, r7
QSUB r12, lr, r11
QADD r11, lr, r11
LDR lr, [sp, #8]
STR r11, [r1, #0x10]
QADD r7, lr, r10
QSUB r10, lr, r10
LDR r11, =0x00005a82
STR r10, [r2, #0x10]
QSUB r10, r8, r0
QADD r0, r8, r0
SMULWB r10, r10, r11
SMULWB r0, r0, r11
MOV r10, r10, LSL #1
QADD r8, r4, r10
LDR lr, [sp, #0]
STR r8, [r1, #4]
MOV r0, r0, LSL #1
QADD r8, lr, r0
QSUB r4, r4, r10
STR r8, [r2, #4]
QSUB r0, lr, r0
QADD r12, r12, r4
QADD r0, r7, r0
STR r12, [r1, #8]
STR r0, [r2, #8]
QADD r0, r5, r6
LDR r7, [sp, #0xc]
SMULWB r0, r0, r11
QSUB r12, r5, r6
MOV r0, r0, LSL #1
SMULWB r12, r12, r11
LDR r5, [sp, #0x10]
QSUB r4, r7, r0
MOV r12, r12, LSL #1
QADD r10, r5, r12
QADD r3, r3, r4
QADD lr, r9, r10
QADD r0, r7, r0
QSUB r10, r5, r12
STR r3, [r1, #0xc]
STR lr, [r2, #0xc]
STR r0, [r1, #0x14]
STR r10, [r2, #0x14]
ADD sp, sp, #0x14
LDMFD sp!, {r4-r12, pc}

View file

@ -0,0 +1,113 @@
@/******************************************************************************
@ *
@ * Copyright (C) 2018 The Android Open Source Project
@ *
@ * Licensed under the Apache License, Version 2.0 (the "License");
@ * you may not use this file except in compliance with the License.
@ * You may obtain a copy of the License at:
@ *
@ * http://www.apache.org/licenses/LICENSE-2.0
@ *
@ * Unless required by applicable law or agreed to in writing, software
@ * distributed under the License is distributed on an "AS IS" BASIS,
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ * See the License for the specific language governing permissions and
@ * limitations under the License.
@ *
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
.text
.p2align 2
.global ixheaacd_overlap_out_copy_armv7
.global ixheaacd_spec_to_overlapbuf_armv7
.global ixheaacd_overlap_buf_out_armv7
ixheaacd_overlap_buf_out_armv7:
STMFD sp!, {r4-r5}
MOV r3, r3, lsl #1
OUTSAMPLE_LOOP:
LDR r4, [r1], #4
LDR r5, [r1], #4
SUBS r2, r2, #2
QADD16 r4, r4, r4
QADD16 r5, r5, r5
STRH r4, [r0], r3
STRH r5, [r0], r3
BGT OUTSAMPLE_LOOP
LDMFD sp!, {r4-r5}
BX lr
ixheaacd_overlap_out_copy_armv7:
STMFD sp!, {r4-r9, r14}
MOV r9, #32
MOV r8, r1
MOV r3, r3, LSL #1
OUT_OVERLAP_LOOP:
LDR r4, [r1], #4
LDR r5, [r1], #4
SUBS r9, r9, #1
QADD16 r4, r4, r4
QADD16 r5, r5, r5
LDR r6, [r2], #4
LDR r7, [r2], #4
STRH r4, [r0], r3
STRH r5, [r0], r3
STR r6, [r8], #4
STR r7, [r8], #4
BGT OUT_OVERLAP_LOOP
LDMFD sp!, {r4-r9, r15}
ixheaacd_spec_to_overlapbuf_armv7:
STMFD sp!, {r4-r10, r14}
MOV r6, #1
RSB r2, r2, #16
AND r2, r2, #0xFF
SUB r7, r2, #1
LSL r14, r6, r7
MOV r3, r3, ASR #1
OVERLAP_LOOP1:
LDMIA r1!, {r4-r5}
SUBS r3, r3, #1
QADD r4, r4, r14
QADD r5, r5, r14
MOV r4, r4, ASR r2
MOV r5, r5, ASR r2
STR r4, [r0], #4
STR r5, [r0], #4
BGT OVERLAP_LOOP1
LDMFD sp!, {r4-r10, pc}

View file

@ -0,0 +1,694 @@
.text
.p2align 2
.global ixheaacd_mps_complex_fft_64_asm
ixheaacd_mps_complex_fft_64_asm:
@LDR r4,[sp]
STMFD sp!, {r0-r12, lr}
LDR r4, [sp, #0x38]
SUB sp, sp, #0x28
@ LDR r4,[sp,#0x30]
LDR r0, [sp, #0x2c]
@LDR r12,[sp,#0x5c+4]
EOR r0, r0, r0, ASR #31
CLZ r0, r0
SUB r12, r0, #16 @dig_rev_shift = norm32(npoints) + 1 -16@
SUB r0, r0, #1
RSB r0, r0, #0x1e
AND r1, r0, #1
STR r1, [sp, #0x14]
MOV r1, r0, ASR #1
LDR r0, [sp, #0x2c] @npoints
STR r1, [sp, #-4]!
MOV lr, r0, LSL #1 @(npoints >>1) * 4
MOV r0, #0
MOV r12, r4
FIRST_STAGE_R4:
LDRB r10, [r12, r0, LSR #2]
ADD r1, r2, r10, LSL #2
LDRD r4, [r1] @r4=x0r, r5=x0i
ADD r1, r1, lr
LDRD r8, [r1] @r8=x1r, r9=x1i
ADD r1, r1, lr
LDRD r6, [r1] @r6=x2r, r7=x2i
ADD r1, r1, lr
LDRD r10, [r1] @r10=x3r, r11=x3i
ADD r0, r0, #4
CMP r0, lr, ASR #1
ADD r4, r4, r6 @x0r = x0r + x2r@
ADD r5, r5, r7 @x0i = x0i + x2i@
SUB r6, r4, r6, lsl#1 @x2r = x0r - (x2r << 1)@
SUB r7, r5, r7, lsl#1 @x2i = x0i - (x2i << 1)@
ADD r8, r8, r10 @x1r = x1r + x3r@
ADD r9, r9, r11 @x1i = x1i + x3i@
SUB r1, r8, r10, lsl#1 @x3r = x1r - (x3r << 1)@
SUB r11, r9, r11, lsl#1 @x3i = x1i - (x3i << 1)@
ADD r4, r4, r8 @x0r = x0r + x1r@
ADD r5, r5, r9 @x0i = x0i + x1i@
SUB r8, r4, r8, lsl#1 @x1r = x0r - (x1r << 1)@
SUB r9, r5, r9, lsl#1 @x1i = x0i - (x1i << 1)
ADD r6, r6, r11 @x2r = x2r + x3i@
SUB r7, r7, r1 @x2i = x2i - x3r@
SUB r10, r6, r11, lsl#1 @x3i = x2r - (x3i << 1)@
ADD r11, r7, r1, lsl#1 @x3r = x2i + (x3r << 1)@
STMIA r3!, {r4-r11}
BLT FIRST_STAGE_R4
LDR r1, [sp], #4
LDR r0, [sp, #0x2c]
MOV r12, #0x40 @nodespacing = 64@
STR r12, [sp, #0x1c]
LDR r12, [sp, #0x2c]
SUB r3, r3, r0, LSL #3
SUBS r1, r1, #1
STR r3, [sp, #0x34]
MOV r4, r12, ASR #4
MOV r0, #4
STR r4, [sp, #0x18]
STR r1, [sp, #0x20]
BLE EXIT
OUTER_LOOP:
LDR r1, [sp, #0x28]
LDR r12, [sp, #0x34] @WORD32 *data = ptr_y@
STR r1, [sp, #0x10]
LDR r1, [sp, #0x18]
MOV r0, r0, LSL #3 @(del<<1) * 4
LOOP_TRIVIAL_TWIDDLE:
LDRD r4, [r12] @r4=x0r, r5=x0i
ADD r12, r12, r0
LDRD r6, [r12] @r6=x1r, r7=x1i
ADD r12, r12, r0
LDRD r8, [r12] @r8=x2r, r9=x2i
ADD r12, r12, r0
LDRD r10, [r12] @r10=x3r, r11=x3i
@MOV r4,r4,ASR #1
@MOV r5,r5,ASR #1
@MOV r6,r6,ASR #1
@MOV r7,r7,ASR #1
@MOV r8,r8,ASR #1
@MOV r9,r9,ASR #1
@MOV r10,r10,ASR #1
@MOV r11,r11,ASR #1
ADD r4, r4, r8 @x0r = x0r + x2r@
ADD r5, r5, r9 @x0i = x0i + x2i@
SUB r8, r4, r8, lsl #1 @x2r = x0r - (x2r << 1)@
SUB r9, r5, r9, lsl #1 @x2i = x0i - (x2i << 1)@
ADD r6, r6, r10 @x1r = x1r + x3r@
ADD r7, r7, r11 @x1i = x1i + x3i@
SUB r2, r6, r10, lsl #1 @x3r = x1r - (x3r << 1)@
SUB r11, r7, r11, lsl #1 @x3i = x1i - (x3i << 1)@
ADD r4, r4, r6 @x0r = x0r + x1r@
ADD r5, r5, r7 @x0i = x0i + x1i@
@MOV r4,r4,ASR #1
@MOV r5,r5,ASR #1
SUB r6, r4, r6, lsl #1 @x1r = x0r - (x1r << 1)@
SUB r7, r5, r7, lsl #1 @x1i = x0i - (x1i << 1)
ADD r8, r8, r11 @x2r = x2r + x3i@
SUB r9, r9, r2 @x2i = x2i - x3r@
SUB r10, r8, r11, lsl#1 @x3i = x2r - (x3i << 1)@
ADD r11, r9, r2, lsl#1 @x3r = x2i + (x3r << 1)
STRD r10, [r12] @r10=x3r, r11=x3i
SUB r12, r12, r0
STRD r6, [r12] @r6=x1r, r7=x1i
SUB r12, r12, r0
STRD r8, [r12] @r8=x2r, r9=x2i
SUB r12, r12, r0
STRD r4, [r12] @r4=x0r, r5=x0i
ADD r12, r12, r0, lsl #2
SUBS r1, r1, #1
BNE LOOP_TRIVIAL_TWIDDLE
MOV r0, r0, ASR #3
LDR r4, [sp, #0x1c]
LDR r3, [sp, #0x34]
MUL r1, r0, r4
ADD r12, r3, #8
STR r1, [sp, #0x24]
MOV r3, r1, ASR #2
ADD r3, r3, r1, ASR #3
SUB r3, r3, r1, ASR #4
ADD r3, r3, r1, ASR #5
SUB r3, r3, r1, ASR #6
ADD r3, r3, r1, ASR #7
SUB r3, r3, r1, ASR #8
STR r3, [sp, #-4]!
SECOND_LOOP:
LDR r3, [sp, #0x10+4]
LDR r14, [sp, #0x18+4]
MOV r0, r0, LSL #3 @(del<<1) * 4
LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@
LDR r2, [r3, #4] @w1l = *(twiddles + 2*j + 1)@
LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@
LDR r6, [r3, #4] @w2l = *(twiddles + 2*(j<<1) + 1)@
LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@
LDR r8, [r3, #4] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@
STR r4, [sp, #8+4]
STR r1, [sp, #-4]
STR r2, [sp, #-8]
STR r5, [sp, #-12]
STR r6, [sp, #-16]
STR r7, [sp, #-20]
STR r8, [sp, #-24]
RADIX4_BFLY:
LDRD r6, [r12, r0]! @r6=x1r, r7=x1i
LDRD r8, [r12, r0]! @r8=x2r, r9=x2i
LDRD r10, [r12, r0] @r10=x3r, r11=x3i
SUBS r14, r14, #1
LDR r1, [sp, #-4]
LDR r2, [sp, #-8]
SMULL r3, r4, r6, r2 @ixheaacd_mult32(x1r,w1l)
LSR r3, r3, #31
ORR r4, r3, r4, LSL#1
SMULL r3, r6, r6, r1 @mult32x16hin32(x1r,W1h)
LSR r3, r3, #31
ORR r6, r3, r6, LSL#1
SMULL r3, r5, r7, r1 @mult32x16hin32(x1i,W1h)
LSR r3, r3, #31
ORR r5, r3, r5, LSL#1
SMULL r3, r7, r7, r2 @ixheaacd_mac32(ixheaacd_mult32(x1r,w1h) ,x1i,w1l)
LSR r3, r3, #31
ORR r7, r3, r7, LSL#1
ADD r7, r7, r6
SUB r6, r4, r5 @
LDR r1, [sp, #-12]
LDR r2, [sp, #-16]
SMULL r3, r4, r8, r2 @ixheaacd_mult32(x2r,w2l)
LSR r3, r3, #31
ORR r4, r3, r4, LSL#1
SMULL r3, r8, r8, r1 @mult32x16hin32(x2r,W2h)
LSR r3, r3, #31
ORR r8, r3, r8, LSL#1
SMULL r3, r5, r9, r1 @mult32x16hin32(x2i,W2h)
LSR r3, r3, #31
ORR r5, r3, r5, LSL#1
SMULL r3, r9, r9, r2 @ixheaacd_mac32(ixheacd_mult32(x1r,w1h) ,x1i,w1l)
LSR r3, r3, #31
ORR r9, r3, r9, LSL#1
ADD r9, r9, r8
SUB r8, r4, r5 @
LDR r1, [sp, #-20]
LDR r2, [sp, #-24]
SMULL r3, r4, r10, r2 @ixheaacd_mult32(x3r,w3l)
LSR r3, r3, #31
ORR r4, r3, r4, LSL#1
SMULL r3, r10, r10, r1 @mult32x16hin32(x3r,W3h)
LSR r3, r3, #31
ORR r10, r3, r10, LSL#1
SMULL r3, r5, r11, r1 @mult32x16hin32(x3i,W3h)
LSR r3, r3, #31
ORR r5, r3, r5, LSL#1
SMULL r3, r11, r11, r2 @ixheaacd_mac32(ixheacd_mult32(x3r,w3h) ,x3i,w3l)
LSR r3, r3, #31
ORR r11, r3, r11, LSL#1
ADD r11, r11, r10
SUB r10, r4, r5 @
@SUB r12,r12,r0,lsl #1
@LDRD r4,[r12] @r4=x0r, r5=x0i
LDR r4, [r12, -r0, lsl #1]! @
LDR r5, [r12, #4]
ADD r4, r8, r4 @x0r = x0r + x2r@
ADD r5, r9, r5 @x0i = x0i + x2i@
SUB r8, r4, r8, lsl#1 @x2r = x0r - (x2r << 1)@
SUB r9, r5, r9, lsl#1 @x2i = x0i - (x2i << 1)@
ADD r6, r6, r10 @x1r = x1r + x3r@
ADD r7, r7, r11 @x1i = x1i + x3i@
SUB r10, r6, r10, lsl#1 @x3r = x1r - (x3r << 1)@
SUB r11, r7, r11, lsl#1 @x3i = x1i - (x3i << 1)@
ADD r4, r4, r6 @x0r = x0r + x1r@
ADD r5, r5, r7 @x0i = x0i + x1i@
SUB r6, r4, r6, lsl#1 @x1r = x0r - (x1r << 1)@
SUB r7, r5, r7, lsl#1 @x1i = x0i - (x1i << 1)
STRD r4, [r12] @r4=x0r, r5=x0i
ADD r12, r12, r0
ADD r8, r8, r11 @x2r = x2r + x3i@
SUB r9, r9, r10 @x2i = x2i - x3r@
SUB r4, r8, r11, lsl#1 @x3i = x2r - (x3i << 1)@
ADD r5, r9, r10, lsl#1 @x3r = x2i + (x3r << 1)
STRD r8, [r12] @r8=x2r, r9=x2i
ADD r12, r12, r0
STRD r6, [r12] @r6=x1r, r7=x1i
ADD r12, r12, r0
STRD r4, [r12] @r10=x3r, r11=x3i
ADD r12, r12, r0
BNE RADIX4_BFLY
MOV r0, r0, ASR #3
LDR r1, [sp, #0x2c+4]
LDR r4, [sp, #8+4]
SUB r1, r12, r1, LSL #3
LDR r6, [sp, #0x1c+4]
ADD r12, r1, #8
LDR r7, [sp, #0]
ADD r4, r4, r6
CMP r4, r7
BLE SECOND_LOOP
SECOND_LOOP_2:
LDR r3, [sp, #0x10+4]
LDR r14, [sp, #0x18+4]
MOV r0, r0, LSL #3 @(del<<1) * 4
LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@
LDR r2, [r3, #4] @w1l = *(twiddles + 2*j + 1)@
LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@
LDR r6, [r3, #4] @w2l = *(twiddles + 2*(j<<1) + 1)@
SUB r3, r3, #2048 @ 512 *4
LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@
LDR r8, [r3, #4] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@
STR r4, [sp, #8+4]
STR r1, [sp, #-4]
STR r2, [sp, #-8]
STR r5, [sp, #-12]
STR r6, [sp, #-16]
STR r7, [sp, #-20]
STR r8, [sp, #-24]
RADIX4_BFLY_2:
LDRD r6, [r12, r0]! @r6=x1r, r7=x1i
LDRD r8, [r12, r0]! @r8=x2r, r9=x2i
LDRD r10, [r12, r0] @r10=x3r, r11=x3i
SUBS r14, r14, #1
LDR r1, [sp, #-4]
LDR r2, [sp, #-8]
SMULL r3, r4, r6, r2 @ixheaacd_mult32(x1r,w1l)
LSR r3, r3, #31
ORR r4, r3, r4, LSL#1
SMULL r3, r6, r6, r1 @mult32x16hin32(x1r,W1h)
LSR r3, r3, #31
ORR r6, r3, r6, LSL#1
SMULL r3, r5, r7, r1 @mult32x16hin32(x1i,W1h)
LSR r3, r3, #31
ORR r5, r3, r5, LSL#1
SMULL r3, r7, r7, r2 @ixheaacd_mac32(ixheaacd_mult32(x1r,w1h) ,x1i,w1l)
LSR r3, r3, #31
ORR r7, r3, r7, LSL#1
ADD r7, r7, r6
SUB r6, r4, r5 @
LDR r1, [sp, #-12]
LDR r2, [sp, #-16]
SMULL r3, r4, r8, r2 @ixheaacd_mult32(x2r,w2l)
LSR r3, r3, #31
ORR r4, r3, r4, LSL#1
SMULL r3, r8, r8, r1 @mult32x16hin32(x2r,W2h)
LSR r3, r3, #31
ORR r8, r3, r8, LSL#1
SMULL r3, r5, r9, r1 @mult32x16hin32(x2i,W2h)
LSR r3, r3, #31
ORR r5, r3, r5, LSL#1
SMULL r3, r9, r9, r2 @ixheaacd_mac32(ixheacd_mult32(x1r,w1h) ,x1i,w1l)
LSR r3, r3, #31
ORR r9, r3, r9, LSL#1
ADD r9, r9, r8
SUB r8, r4, r5 @
LDR r1, [sp, #-20]
LDR r2, [sp, #-24]
SMULL r3, r4, r10, r2 @ixheaacd_mult32(x3r,w3l)
LSR r3, r3, #31
ORR r4, r3, r4, LSL#1
SMULL r3, r10, r10, r1 @mult32x16hin32(x3r,W3h)
LSR r3, r3, #31
ORR r10, r3, r10, LSL#1
SMULL r3, r5, r11, r1 @mult32x16hin32(x3i,W3h)
LSR r3, r3, #31
ORR r5, r3, r5, LSL#1
SMULL r3, r11, r11, r2 @ixheaacd_mac32(ixheacd_mult32(x3r,w3h) ,x3i,w3l)
LSR r3, r3, #31
ORR r11, r3, r11, LSL#1
ADD r10, r11, r10
SUB r11, r5, r4 @
@SUB r12,r12,r0,lsl #1
@LDRD r4,[r12] @r4=x0r, r5=x0i
LDR r4, [r12, -r0, lsl #1]! @
LDR r5, [r12, #4]
ADD r4, r8, r4 @x0r = x0r + x2r@
ADD r5, r9, r5 @x0i = x0i + x2i@
SUB r8, r4, r8, lsl#1 @x2r = x0r - (x2r << 1)@
SUB r9, r5, r9, lsl#1 @x2i = x0i - (x2i << 1)@
ADD r6, r6, r10 @x1r = x1r + x3r@
ADD r7, r7, r11 @x1i = x1i + x3i@
SUB r10, r6, r10, lsl#1 @x3r = x1r - (x3r << 1)@
SUB r11, r7, r11, lsl#1 @x3i = x1i - (x3i << 1)@
ADD r4, r4, r6 @x0r = x0r + x1r@
ADD r5, r5, r7 @x0i = x0i + x1i@
SUB r6, r4, r6, lsl#1 @x1r = x0r - (x1r << 1)@
SUB r7, r5, r7, lsl#1 @x1i = x0i - (x1i << 1)
STRD r4, [r12] @r4=x0r, r5=x0i
ADD r12, r12, r0
ADD r8, r8, r11 @x2r = x2r + x3i@
SUB r9, r9, r10 @x2i = x2i - x3r@
SUB r4, r8, r11, lsl#1 @x3i = x2r - (x3i << 1)@
ADD r5, r9, r10, lsl#1 @x3r = x2i + (x3r << 1)
STRD r8, [r12] @r8=x2r, r9=x2i
ADD r12, r12, r0
STRD r6, [r12] @r6=x1r, r7=x1i
ADD r12, r12, r0
STRD r4, [r12] @r10=x3r, r11=x3i
ADD r12, r12, r0
BNE RADIX4_BFLY_2
MOV r0, r0, ASR #3
LDR r1, [sp, #0x2c+4]
LDR r4, [sp, #8+4]
SUB r1, r12, r1, LSL #3
LDR r6, [sp, #0x1c+4]
ADD r12, r1, #8
LDR r7, [sp, #0x24+4]
ADD r4, r4, r6
CMP r4, r7, ASR #1
BLE SECOND_LOOP_2
LDR r7, [sp, #0]
CMP r4, r7, LSL #1
BGT SECOND_LOOP_4
SECOND_LOOP_3:
LDR r3, [sp, #0x10+4]
LDR r14, [sp, #0x18+4]
MOV r0, r0, LSL #3 @(del<<1) * 4
LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@
LDR r2, [r3, #4] @w1l = *(twiddles + 2*j + 1)@
SUB r3, r3, #2048 @ 512 *4
LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@
LDR r6, [r3, #4] @w2l = *(twiddles + 2*(j<<1) + 1)@
LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@
LDR r8, [r3, #4] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@
STR r4, [sp, #8+4]
STR r1, [sp, #-4]
STR r2, [sp, #-8]
STR r5, [sp, #-12]
STR r6, [sp, #-16]
STR r7, [sp, #-20]
STR r8, [sp, #-24]
RADIX4_BFLY_3:
LDRD r6, [r12, r0]! @r6=x1r, r7=x1i
LDRD r8, [r12, r0]! @r8=x2r, r9=x2i
LDRD r10, [r12, r0] @r10=x3r, r11=x3i
SUBS r14, r14, #1
LDR r1, [sp, #-4]
LDR r2, [sp, #-8]
SMULL r3, r4, r6, r2 @ixheaacd_mult32(x1r,w1l)
LSR r3, r3, #31
ORR r4, r3, r4, LSL#1
SMULL r3, r6, r6, r1 @mult32x16hin32(x1r,W1h)
LSR r3, r3, #31
ORR r6, r3, r6, LSL#1
SMULL r3, r5, r7, r1 @mult32x16hin32(x1i,W1h)
LSR r3, r3, #31
ORR r5, r3, r5, LSL#1
SMULL r3, r7, r7, r2 @ixheaacd_mac32(ixheaacd_mult32(x1r,w1h) ,x1i,w1l)
LSR r3, r3, #31
ORR r7, r3, r7, LSL#1
ADD r7, r7, r6
SUB r6, r4, r5 @
LDR r1, [sp, #-12]
LDR r2, [sp, #-16]
SMULL r3, r4, r8, r2 @ixheaacd_mult32(x2r,w2l)
LSR r3, r3, #31
ORR r4, r3, r4, LSL#1
SMULL r3, r8, r8, r1 @mult32x16hin32(x2r,W2h)
LSR r3, r3, #31
ORR r8, r3, r8, LSL#1
SMULL r3, r5, r9, r1 @mult32x16hin32(x2i,W2h)
LSR r3, r3, #31
ORR r5, r3, r5, LSL#1
SMULL r3, r9, r9, r2 @ixheaacd_mac32(ixheacd_mult32(x1r,w1h) ,x1i,w1l)
LSR r3, r3, #31
ORR r9, r3, r9, LSL#1
ADD r8, r9, r8
SUB r9, r5, r4 @
LDR r1, [sp, #-20]
LDR r2, [sp, #-24]
SMULL r3, r4, r10, r2 @ixheaacd_mult32(x3r,w3l)
LSR r3, r3, #31
ORR r4, r3, r4, LSL#1
SMULL r3, r10, r10, r1 @mult32x16hin32(x3r,W3h)
LSR r3, r3, #31
ORR r10, r3, r10, LSL#1
SMULL r3, r5, r11, r1 @mult32x16hin32(x3i,W3h)
LSR r3, r3, #31
ORR r5, r3, r5, LSL#1
SMULL r3, r11, r11, r2 @ixheaacd_mac32(ixheacd_mult32(x3r,w3h) ,x3i,w3l)
LSR r3, r3, #31
ORR r11, r3, r11, LSL#1
ADD r10, r11, r10
SUB r11, r5, r4 @
@SUB r12,r12,r0,lsl #1
@LDRD r4,[r12] @r4=x0r, r5=x0i
LDR r4, [r12, -r0, lsl #1]! @
LDR r5, [r12, #4]
ADD r4, r8, r4 @x0r = x0r + x2r@
ADD r5, r9, r5 @x0i = x0i + x2i@
SUB r8, r4, r8, lsl#1 @x2r = x0r - (x2r << 1)@
SUB r9, r5, r9, lsl#1 @x2i = x0i - (x2i << 1)@
ADD r6, r6, r10 @x1r = x1r + x3r@
ADD r7, r7, r11 @x1i = x1i + x3i@
SUB r10, r6, r10, lsl#1 @x3r = x1r - (x3r << 1)@
SUB r11, r7, r11, lsl#1 @x3i = x1i - (x3i << 1)@
ADD r4, r4, r6 @x0r = x0r + x1r@
ADD r5, r5, r7 @x0i = x0i + x1i@
SUB r6, r4, r6, lsl#1 @x1r = x0r - (x1r << 1)@
SUB r7, r5, r7, lsl#1 @x1i = x0i - (x1i << 1)
STRD r4, [r12] @r4=x0r, r5=x0i
ADD r12, r12, r0
ADD r8, r8, r11 @x2r = x2r + x3i@
SUB r9, r9, r10 @x2i = x2i - x3r@
SUB r4, r8, r11, lsl#1 @x3i = x2r - (x3i << 1)@
ADD r5, r9, r10, lsl#1 @x3r = x2i + (x3r << 1)
STRD r8, [r12] @r8=x2r, r9=x2i
ADD r12, r12, r0
STRD r6, [r12] @r6=x1r, r7=x1i
ADD r12, r12, r0
STRD r4, [r12] @r10=x3r, r11=x3i
ADD r12, r12, r0
BNE RADIX4_BFLY_3
MOV r0, r0, ASR #3
LDR r1, [sp, #0x2c+4]
LDR r4, [sp, #8+4]
SUB r1, r12, r1, LSL #3
LDR r6, [sp, #0x1c+4]
ADD r12, r1, #8
LDR r7, [sp, #0]
ADD r4, r4, r6
CMP r4, r7, LSL #1
BLE SECOND_LOOP_3
SECOND_LOOP_4:
LDR r3, [sp, #0x10+4]
LDR r14, [sp, #0x18+4]
MOV r0, r0, LSL #3 @(del<<1) * 4
LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@
LDR r2, [r3, #4] @w1l = *(twiddles + 2*j + 1)@
SUB r3, r3, #2048 @ 512 *4
LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@
LDR r6, [r3, #4] @w2l = *(twiddles + 2*(j<<1) + 1)@
SUB r3, r3, #2048 @ 512 *4
LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@
LDR r8, [r3, #4] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@
STR r4, [sp, #8+4]
STR r1, [sp, #-4]
STR r2, [sp, #-8]
STR r5, [sp, #-12]
STR r6, [sp, #-16]
STR r7, [sp, #-20]
STR r8, [sp, #-24]
RADIX4_BFLY_4:
LDRD r6, [r12, r0]! @r6=x1r, r7=x1i
LDRD r8, [r12, r0]! @r8=x2r, r9=x2i
LDRD r10, [r12, r0] @r10=x3r, r11=x3i
SUBS r14, r14, #1
LDR r1, [sp, #-4]
LDR r2, [sp, #-8]
SMULL r3, r4, r6, r2 @ixheaacd_mult32(x1r,w1l)
LSR r3, r3, #31
ORR r4, r3, r4, LSL#1
SMULL r3, r6, r6, r1 @mult32x16hin32(x1r,W1h)
LSR r3, r3, #31
ORR r6, r3, r6, LSL#1
SMULL r3, r5, r7, r1 @mult32x16hin32(x1i,W1h)
LSR r3, r3, #31
ORR r5, r3, r5, LSL#1
SMULL r3, r7, r7, r2 @ixheaacd_mac32(ixheaacd_mult32(x1r,w1h) ,x1i,w1l)
LSR r3, r3, #31
ORR r7, r3, r7, LSL#1
ADD r7, r7, r6
SUB r6, r4, r5 @
LDR r1, [sp, #-12]
LDR r2, [sp, #-16]
SMULL r3, r4, r8, r2 @ixheaacd_mult32(x2r,w2l)
LSR r3, r3, #31
ORR r4, r3, r4, LSL#1
SMULL r3, r8, r8, r1 @mult32x16hin32(x2r,W2h)
LSR r3, r3, #31
ORR r8, r3, r8, LSL#1
SMULL r3, r5, r9, r1 @mult32x16hin32(x2i,W2h)
LSR r3, r3, #31
ORR r5, r3, r5, LSL#1
SMULL r3, r9, r9, r2 @ixheaacd_mac32(ixheacd_mult32(x1r,w1h) ,x1i,w1l)
LSR r3, r3, #31
ORR r9, r3, r9, LSL#1
ADD r8, r9, r8
SUB r9, r5, r4 @
LDR r1, [sp, #-20]
LDR r2, [sp, #-24]
SMULL r3, r4, r10, r2 @ixheaacd_mult32(x3r,w3l)
LSR r3, r3, #31
ORR r4, r3, r4, LSL#1
SMULL r3, r10, r10, r1 @mult32x16hin32(x3r,W3h)
LSR r3, r3, #31
ORR r10, r3, r10, LSL#1
SMULL r3, r5, r11, r1 @mult32x16hin32(x3i,W3h)
LSR r3, r3, #31
ORR r5, r3, r5, LSL#1
SMULL r3, r11, r11, r2 @ixheaacd_mac32(ixheacd_mult32(x3r,w3h) ,x3i,w3l)
LSR r3, r3, #31
ORR r11, r3, r11, LSL#1
ADD r11, r11, r10
SUB r10, r5, r4 @
@SUB r12,r12,r0,lsl #1
@LDRD r4,[r12] @r4=x0r, r5=x0i
LDR r4, [r12, -r0, lsl #1]! @
LDR r5, [r12, #4]
ADD r4, r8, r4 @x0r = x0r + x2r@
ADD r5, r9, r5 @x0i = x0i + x2i@
SUB r8, r4, r8, lsl#1 @x2r = x0r - (x2r << 1)@
SUB r9, r5, r9, lsl#1 @x2i = x0i - (x2i << 1)@
ADD r6, r6, r10 @x1r = x1r + x3r@
SUB r7, r7, r11 @x1i = x1i - x3i@
SUB r10, r6, r10, lsl#1 @x3r = x1r - (x3r << 1)@
ADD r11, r7, r11, lsl#1 @x3i = x1i + (x3i << 1)@
ADD r4, r4, r6 @x0r = x0r + x1r@
ADD r5, r5, r7 @x0i = x0i + x1i@
SUB r6, r4, r6, lsl#1 @x1r = x0r - (x1r << 1)@
SUB r7, r5, r7, lsl#1 @x1i = x0i - (x1i << 1)
STRD r4, [r12] @r4=x0r, r5=x0i
ADD r12, r12, r0
ADD r8, r8, r11 @x2r = x2r + x3i@
SUB r9, r9, r10 @x2i = x2i - x3r@
SUB r4, r8, r11, lsl#1 @x3i = x2r - (x3i << 1)@
ADD r5, r9, r10, lsl#1 @x3r = x2i + (x3r << 1)
STRD r8, [r12] @r8=x2r, r9=x2i
ADD r12, r12, r0
STRD r6, [r12] @r6=x1r, r7=x1i
ADD r12, r12, r0
STRD r4, [r12] @r10=x3r, r11=x3i
ADD r12, r12, r0
BNE RADIX4_BFLY_4
MOV r0, r0, ASR #3
LDR r1, [sp, #0x2c+4]
LDR r4, [sp, #8+4]
SUB r1, r12, r1, LSL #3
LDR r6, [sp, #0x1c+4]
ADD r12, r1, #8
LDR r7, [sp, #0x24+4]
ADD r4, r4, r6
CMP r4, r7
BLT SECOND_LOOP_4
ADD sp, sp, #4
LDR r1, [sp, #0x1c]
MOV r0, r0, LSL #2
MOV r1, r1, ASR #2
STR r1, [sp, #0x1c]
LDR r1, [sp, #0x18]
MOV r1, r1, ASR #2
STR r1, [sp, #0x18]
LDR r1, [sp, #0x20]
SUBS r1, r1, #1
STR r1, [sp, #0x20]
BGT OUTER_LOOP
LDR r1, [sp, #0x14]
CMP r1, #0
BEQ EXIT
LDR r12, [sp, #0x1c]
LDR r1, [sp, #0x28]
CMP r12, #0
LDRNE r12, [sp, #0x1c]
MOVEQ r4, #1
MOVNE r4, r12, LSL #1
MOVS r3, r0
BEQ EXIT
MOV r3, r3, ASR #1
LDR r5, [sp, #0x34]
MOV r0, r0, LSL #3 @(del<<1) * 4
STR r1, [sp, #-4]
EXIT:
ADD sp, sp, #0x38
LDMFD sp!, {r4-r12, pc}

View file

@ -0,0 +1,55 @@
@ * limitations under the License.
@ *
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
.text
.p2align 2
.global ixheaacd_mps_synt_out_calc_armv7
ixheaacd_mps_synt_out_calc_armv7:
STMFD sp!, {r4-r12, r14}
VPUSH {D8-D15}
MOV R6, #3
MUL R7, R0, R6
ADD R4, R1, R0, LSL #2
ADD R5, R2, R7, LSL #2
MOV R6, #5
LOOP1:
MOV R8, R0
LOOP2:
VLD2.32 {D4, D5}, [R3]!
VLD1.32 {D0, D1}, [R2]!
VLD1.32 {D2, D3}, [R5]!
VLD2.32 {D6, D7}, [R3]!
VMULL.S32 Q4, D0, D4
VMULL.S32 Q5, D1, D6
VMULL.S32 Q6, D2, D5
VMULL.S32 Q7, D3, D7
VSHRN.S64 D8, Q4, #31
VSHRN.S64 D9, Q5, #31
VSHRN.S64 D12, Q6, #31
VSHRN.S64 D13, Q7, #31
SUBS R8, R8, #4
VST1.32 {D8, D9}, [R1]!
VST1.32 {D12, D13}, [R4]!
BGT LOOP2
SUBS R6, R6, #1
ADD R1, R1, R0, LSL #2
ADD R4, R4, R0, LSL #2
ADD R2, R2, R7, LSL #2
ADD R5, R5, R7, LSL #2
BGT LOOP1
VPOP {D8-D15}
LDMFD sp!, {r4-r12, r15}

View file

@ -0,0 +1,65 @@
@/******************************************************************************
@ *
@ * Copyright (C) 2018 The Android Open Source Project
@ *
@ * Licensed under the Apache License, Version 2.0 (the "License");
@ * you may not use this file except in compliance with the License.
@ * You may obtain a copy of the License at:
@ *
@ * http:@www.apache.org/licenses/LICENSE-2.0
@ *
@ * Unless required by applicable law or agreed to in writing, software
@ * distributed under the License is distributed on an "AS IS" BASIS,
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ * See the License for the specific language governing permissions and
@ * limitations under the License.
@ *
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
.text
.p2align 2
.global ixheaacd_mps_synt_post_fft_twiddle_armv7
ixheaacd_mps_synt_post_fft_twiddle_armv7:
STMFD sp!, {r4-r12, r14}
VPUSH {D8-D15}
LDR R4, [SP, #104]
LDR R5, [SP, #108]
ADD R6, R5, R0, LSL #3
LSL R7, R0, #1
MOV R8, #-16
ADD R6, R6, R8
LOOP1:
VLD1.32 {D0, D1}, [R1]!
VLD1.32 {D2, D3}, [R2]!
VLD1.32 {D4, D5}, [R3]!
VLD1.32 {D6, D7}, [R4]!
VMULL.S32 Q4, D0, D4
VMULL.S32 Q5, D2, D6
VMULL.S32 Q6, D1, D5
VMULL.S32 Q7, D3, D7
VSHRN.S64 D8, Q4, #31
VSHRN.S64 D10, Q5, #31
VSHRN.S64 D12, Q6, #31
VSHRN.S64 D14, Q7, #31
VQADD.S32 D1, D8, D10
VQADD.S32 D0, D12, D14
VREV64.32 D1, D1
VREV64.32 D0, D0
SUBS R7, R7, #4
VST1.32 {D0, D1}, [R6], R8
BGT LOOP1
VPOP {D8-D15}
LDMFD sp!, {r4-r12, r15}

View file

@ -0,0 +1,60 @@
@/******************************************************************************
@ *
@ * Copyright (C) 2018 The Android Open Source Project
@ *
@ * Licensed under the Apache License, Version 2.0 (the "License");
@ * you may not use this file except in compliance with the License.
@ * You may obtain a copy of the License at:
@ *
@ * http://www.apache.org/licenses/LICENSE-2.0
@ *
@ * Unless required by applicable law or agreed to in writing, software
@ * distributed under the License is distributed on an "AS IS" BASIS,
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ * See the License for the specific language governing permissions and
@ * limitations under the License.
@ *
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
.text
.p2align 2
.global ixheaacd_mps_synt_post_twiddle_armv7
ixheaacd_mps_synt_post_twiddle_armv7:
STMFD sp!, {r4-r12, r14}
VPUSH {D8-D15}
lsl R3, R3, #1
LOOP1:
VLD2.32 {D13, D14}, [R2]!
VLD2.32 {D15, D16}, [R1]!
VLD2.32 {D2, D3}, [R0]
VNEG.S32 D12, D2
VMULL.S32 Q2, D13, D12
VMULL.S32 Q3, D13, D3
VMULL.S32 Q4, D15, D2
VMULL.S32 Q5, D15, D3
VSHRN.I64 D4, Q2, #31
VSHRN.I64 D6, Q3, #31
VSHRN.I64 D8, Q4, #31
VSHRN.I64 D10, Q5, #31
VQADD.S32 D0, D8, D6
VQADD.S32 D1, D4, D10
SUBS R3, R3, #4
VST2.32 {D0, D1} , [R0]!
BGT LOOP1
VPOP {D8-D15}
LDMFD sp!, {r4-r12, r15}

View file

@ -0,0 +1,60 @@
@/******************************************************************************
@ *
@ * Copyright (C) 2018 The Android Open Source Project
@ *
@ * Licensed under the Apache License, Version 2.0 (the "License");
@ * you may not use this file except in compliance with the License.
@ * You may obtain a copy of the License at:
@ *
@ * http://www.apache.org/licenses/LICENSE-2.0
@ *
@ * Unless required by applicable law or agreed to in writing, software
@ * distributed under the License is distributed on an "AS IS" BASIS,
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ * See the License for the specific language governing permissions and
@ * limitations under the License.
@ *
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
.text
.p2align 2
.global ixheaacd_mps_synt_pre_twiddle_armv7
ixheaacd_mps_synt_pre_twiddle_armv7:
STMFD sp!, {r4-r12, r14}
VPUSH {D8-D15}
lsl R3, R3, #1
LOOP1:
VLD1.32 {D0}, [R2]!
VLD1.32 {D1}, [R1]!
VLD2.32 {D2, D3}, [R0]
VNEG.S32 D12, D2
VMULL.S32 Q2, D0, D12
VMULL.S32 Q3, D0, D3
VMULL.S32 Q4, D1, D2
VMULL.S32 Q5, D1, D3
VSHRN.I64 D4, Q2, #31
VSHRN.I64 D6, Q3, #31
VSHRN.I64 D8, Q4, #31
VSHRN.I64 D10, Q5, #31
VQADD.S32 D0, D8, D6
VQADD.S32 D1, D4, D10
SUBS R3, R3, #4
VST2.32 {D0, D1} , [R0]!
BGT LOOP1
VPOP {D8-D15}
LDMFD sp!, {r4-r12, r15}

View file

@ -0,0 +1,110 @@
@/******************************************************************************
@ *
@ * Copyright (C) 2018 The Android Open Source Project
@ *
@ * Licensed under the Apache License, Version 2.0 (the "License");
@ * you may not use this file except in compliance with the License.
@ * You may obtain a copy of the License at:
@ *
@ * http://www.apache.org/licenses/LICENSE-2.0
@ *
@ * Unless required by applicable law or agreed to in writing, software
@ * distributed under the License is distributed on an "AS IS" BASIS,
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ * See the License for the specific language governing permissions and
@ * limitations under the License.
@ *
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
.text
.p2align 2
.global ixheaacd_neg_shift_spec_armv7
ixheaacd_neg_shift_spec_armv7:
STMFD sp!, {R4-R12, R14}
VPUSH {D8 - D15}
MOV R5, #448
SUB R6, R5, #1
MOV R6, R6, LSL #2
ADD R6, R6, R0
MOV R8, #-16
SUB R6, R6, #12
MOV R7, R3, LSL #1
VDUP.32 Q1, R2
MOV R4, #0x8000
VDUP.32 Q2, R4
VLD1.32 {D0, D1}, [R6], R8
VQNEG.S32 Q0, Q0
VLD1.32 {D6, D7}, [R6], R8
VQSHL.S32 Q15, Q0, Q1
VQADD.S32 Q14, Q15, Q2
VSHR.S32 Q13, Q14, #16
VREV64.32 Q13, Q13
SUB R5, R5, #8
VUZP.16 D27, D26
VQNEG.S32 Q3, Q3
LOOP_1:
VST1.16 D27[0], [R1], R7
VQSHL.S32 Q12, Q3, Q1
VLD1.32 {D0, D1}, [R6], R8
VST1.16 D27[1], [R1], R7
VQADD.S32 Q11, Q12, Q2
VST1.16 D27[2], [R1], R7
VQNEG.S32 Q0, Q0
VST1.16 D27[3], [R1], R7
VSHR.S32 Q10, Q11, #16
VREV64.32 Q10, Q10
SUBS R5, R5, #8
VUZP.16 D21, D20
VQSHL.S32 Q15, Q0, Q1
VST1.16 D21[0], [R1], R7
VLD1.32 {D6, D7}, [R6], R8
VQADD.S32 Q14, Q15, Q2
VST1.16 D21[1], [R1], R7
VSHR.S32 Q13, Q14, #16
VST1.16 D21[2], [R1], R7
VREV64.32 Q13, Q13
VST1.16 D21[3], [R1], R7
VUZP.16 D27, D26
VQNEG.S32 Q3, Q3
BGT LOOP_1
VST1.16 D27[0], [R1], R7
VQSHL.S32 Q12, Q3, Q1
VST1.16 D27[1], [R1], R7
VST1.16 D27[2], [R1], R7
VQADD.S32 Q11, Q12, Q2
VST1.16 D27[3], [R1], R7
VSHR.S32 Q10, Q11, #16
VREV64.32 Q10, Q10
VUZP.16 D21, D20
VST1.16 D21[0], [R1], R7
VST1.16 D21[1], [R1], R7
VST1.16 D21[2], [R1], R7
VST1.16 D21[3], [R1], R7
VPOP {D8 - D15}
LDMFD sp!, {R4-R12, R15}
.end

View file

@ -0,0 +1,297 @@
@/******************************************************************************
@ *
@ * Copyright (C) 2018 The Android Open Source Project
@ *
@ * Licensed under the Apache License, Version 2.0 (the "License");
@ * you may not use this file except in compliance with the License.
@ * You may obtain a copy of the License at:
@ *
@ * http://www.apache.org/licenses/LICENSE-2.0
@ *
@ * Unless required by applicable law or agreed to in writing, software
@ * distributed under the License is distributed on an "AS IS" BASIS,
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ * See the License for the specific language governing permissions and
@ * limitations under the License.
@ *
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
.text
.p2align 2
.global ixheaacd_over_lap_add1_armv7
ixheaacd_over_lap_add1_armv7:
STMFD sp!, {R4-R12, R14}
VPUSH {d8 - d15}
LDR R4, [SP, #104]
LDR R5, [SP, #108]
LDR R6, [SP, #112]
MOV R10, R5, LSL #1
SUB R11, R10, #1
MOV R10, R11, LSL #2
ADD R10, R0, R10
SUB R10, R10, #12
MOV R8, R11, LSL #1
ADD R8, R8, R3
SUB R8, R8, #14
MOV R12, #0
VDUP.S16 D12, R12
MOV R12, #-16
VDUP.16 Q11, R4
VLD1.32 {D6, D7}, [R10], R12
MOV R7, #0x2000
VREV64.32 Q3, Q3
RSB R7, R7, #0
VQNEG.S32 Q0, Q3
VDUP.32 Q10, R7
VUZP.16 D1, D0
SUB R11, R5, #1
VUZP.16 D7, D6
SMULBB R11, R11, R6
MOV R11, R11, LSL #1
VLD2.16 {D2, D3}, [R8], R12
ADD R11, R11, R2
VREV64.16 Q1, Q1
MOV R4, R6, LSL #1
RSB R4, R4, #0
MOV R9, R6, LSL #1
SMULBB R6, R5, R6
MOV R6, R6, LSL #1
ADD R6, R6, R2
VMULL.U16 Q15, D7, D2
VLD1.32 {D4, D5}, [R1]!
VSHR.U32 Q15, Q15, #16
VMLAL.S16 Q15, D6, D2
VQSHL.S32 Q15, Q15, Q11
VADDL.S16 Q7, D3, D12
VMULL.S32 Q13, D4, D14
VQMOVN.S64 D28, Q13
VMULL.S32 Q13, D5, D15
VQMOVN.S64 D29, Q13
VQADD.S32 Q14, Q14, Q10
VQSUB.S32 Q13, Q15, Q14
VQSHL.S32 Q13, Q13, #2
VSHR.S32 Q13, Q13, #16
VUZP.16 D26, D27
VMULL.U16 Q12, D1, D3
VSHR.U32 Q12, Q12, #16
VMLAL.S16 Q12, D0, D3
VQSHL.S32 Q12, Q12, Q11
VLD1.32 {D6, D7}, [R10], R12
VADDL.S16 Q7, D2, D12
VMULL.S32 Q0, D14, D4
VQMOVN.S64 D16, Q0
VMULL.S32 Q0, D15, D5
VQMOVN.S64 D17, Q0
VREV64.32 Q3, Q3
VQADD.S32 Q8, Q8, Q10
VQNEG.S32 Q0, Q3
VUZP.16 D1, D0
VQSUB.S32 Q9, Q12, Q8
VUZP.16 D7, D6
VQSHL.S32 Q9, Q9, #2
VLD2.16 {D2, D3}, [R8], R12
VSHR.S32 Q9, Q9, #16
VREV64.16 Q1, Q1
VUZP.16 D18, D19
VLD1.32 {D4, D5}, [R1]!
SUB R5, R5, #8
LOOP_1:
VST1.16 D26[0], [R11], R4
VMULL.U16 Q15, D7, D2
VST1.16 D26[1], [R11], R4
VMULL.U16 Q12, D1, D3
VST1.16 D26[2], [R11], R4
VSHR.U32 Q15, Q15, #16
VST1.16 D26[3], [R11], R4
VSHR.U32 Q12, Q12, #16
VST1.16 D18[0], [R6], R9
VMLAL.S16 Q15, D6, D2
VST1.16 D18[1], [R6], R9
VMLAL.S16 Q12, D0, D3
VST1.16 D18[2], [R6], R9
VQSHL.S32 Q15, Q15, Q11
VST1.16 D18[3], [R6], R9
VQSHL.S32 Q12, Q12, Q11
VLD1.32 {D6, D7}, [R10], R12
VADDL.S16 Q7, D3, D12
VMULL.S32 Q8, D4, D14
VQMOVN.S64 D28, Q8
VMULL.S32 Q8, D5, D15
VQMOVN.S64 D29, Q8
VREV64.32 Q3, Q3
VADDL.S16 Q7, D2, D12
VMULL.S32 Q0, D4, D14
VQMOVN.S64 D16, Q0
VMULL.S32 Q0, D5, D15
VQMOVN.S64 D17, Q0
VLD2.16 {D2, D3}, [R8], R12
VQNEG.S32 Q0, Q3
VLD1.32 {D4, D5}, [R1]!
VQADD.S32 Q14, Q14, Q10
VUZP.16 D1, D0
VQADD.S32 Q8, Q8, Q10
VUZP.16 D7, D6
VQSUB.S32 Q13, Q15, Q14
VREV64.16 Q1, Q1
VQSUB.S32 Q9, Q12, Q8
VQSHL.S32 Q13, Q13, #2
VQSHL.S32 Q9, Q9, #2
VMULL.U16 Q15, D7, D2
VSHR.S32 Q13, Q13, #16
VUZP.16 D26, D27
VSHR.S32 Q9, Q9, #16
VST1.16 D26[0], [R11], R4
VMULL.U16 Q12, D1, D3
VUZP.16 D18, D19
VSHR.U32 Q15, Q15, #16
VST1.16 D26[1], [R11], R4
VMLAL.S16 Q15, D6, D2 @MLA
VST1.16 D26[2], [R11], R4
VSHR.U32 Q12, Q12, #16
VST1.16 D26[3], [R11], R4
VMLAL.S16 Q12, D0, D3 @MLA
VST1.16 D18[0], [R6], R9
VQSHL.S32 Q15, Q15, Q11
VST1.16 D18[1], [R6], R9
VQSHL.S32 Q12, Q12, Q11
VST1.16 D18[2], [R6], R9
VADDL.S16 Q7, D3, D12
VMULL.S32 Q8, D4, D14
VQMOVN.S64 D28, Q8
VMULL.S32 Q8, D5, D15
VQMOVN.S64 D29, Q8
VST1.16 D18[3], [R6], R9
VADDL.S16 Q7, D2, D12
VMULL.S32 Q0, D4, D14
VQMOVN.S64 D16, Q0
VMULL.S32 Q0, D5, D15
VQMOVN.S64 D17, Q0
VLD1.32 {D6, D7}, [R10], R12
VQADD.S32 Q14, Q14, Q10
VREV64.32 Q3, Q3
VQNEG.S32 Q0, Q3
VUZP.16 D1, D0
VQSUB.S32 Q13, Q15, Q14
VUZP.16 D7, D6
VQADD.S32 Q8, Q8, Q10
VLD2.16 {D2, D3}, [R8], R12
VQSUB.S32 Q9, Q12, Q8
VREV64.16 Q1, Q1
VQSHL.S32 Q13, Q13, #2
VLD1.32 {D4, D5}, [R1]!
VQSHL.S32 Q9, Q9, #2
VSHR.S32 Q13, Q13, #16
SUBS R5, R5, #8
VSHR.S32 Q9, Q9, #16
VUZP.16 D26, D27
VUZP.16 D18, D19
BGT LOOP_1
VST1.16 D26[0], [R11], R4
VMULL.U16 Q15, D7, D2
VST1.16 D26[1], [R11], R4
VMULL.U16 Q12, D1, D3
VST1.16 D26[2], [R11], R4
VSHR.U32 Q15, Q15, #16
VST1.16 D26[3], [R11], R4
VSHR.U32 Q12, Q12, #16
VST1.16 D18[0], [R6], R9
VMLAL.S16 Q15, D6, D2
VST1.16 D18[1], [R6], R9
VMLAL.S16 Q12, D0, D3
VST1.16 D18[2], [R6], R9
VQSHL.S32 Q15, Q15, Q11
VST1.16 D18[3], [R6], R9
VQSHL.S32 Q12, Q12, Q11
VADDL.S16 Q7, D3, D12
VMULL.S32 Q8, D4, D14
VQMOVN.S64 D28, Q8
VMULL.S32 Q8, D5, D15
VQMOVN.S64 D29, Q8
VADDL.S16 Q7, D2, D12
VMULL.S32 Q13, D4, D14
VQMOVN.S64 D16, Q13
VMULL.S32 Q13, D5, D15
VQMOVN.S64 D17, Q13
VQADD.S32 Q14, Q14, Q10
VQADD.S32 Q8, Q8, Q10
VQSUB.S32 Q13, Q15, Q14
VQSUB.S32 Q9, Q12, Q8
VQSHL.S32 Q13, Q13, #2
VQSHL.S32 Q9, Q9, #2
VSHR.S32 Q13, Q13, #16
VSHR.S32 Q9, Q9, #16
VUZP.16 D26, D27
VUZP.16 D18, D19
VST1.16 D26[0], [R11], R4
VST1.16 D26[1], [R11], R4
VST1.16 D26[2], [R11], R4
VST1.16 D26[3], [R11], R4
VST1.16 D18[0], [R6], R9
VST1.16 D18[1], [R6], R9
VST1.16 D18[2], [R6], R9
VST1.16 D18[3], [R6], R9
VPOP {d8 - d15}
LDMFD sp!, {R4-R12, R15}

View file

@ -0,0 +1,268 @@
@/******************************************************************************
@ *
@ * Copyright (C) 2018 The Android Open Source Project
@ *
@ * Licensed under the Apache License, Version 2.0 (the "License");
@ * you may not use this file except in compliance with the License.
@ * You may obtain a copy of the License at:
@ *
@ * http://www.apache.org/licenses/LICENSE-2.0
@ *
@ * Unless required by applicable law or agreed to in writing, software
@ * distributed under the License is distributed on an "AS IS" BASIS,
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ * See the License for the specific language governing permissions and
@ * limitations under the License.
@ *
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
.text
.p2align 2
.global ixheaacd_over_lap_add2_armv7
ixheaacd_over_lap_add2_armv7:
STMFD sp!, {R4-R12, R14}
VPUSH {d8 - d15}
LDR R4, [SP, #104]
LDR R5, [SP, #108]
LDR R6, [SP, #112]
RSB R4, R4, #15
CMP R4, #31
MOVGT R4, #31
SUB R9, R4, #1
MOV R8, #1
MOV R8, R8, LSL R9
RSB R4, R4, #0
VDUP.32 Q11, R4
VDUP.32 Q10, R8
MOV R8, R5
SUB R12, R5, #1
MOV R9, R5, LSL #2
MOV R12, R12, LSL #2
ADD R10, R0, R9
ADD R7, R1, R12
VLD2.16 {D0, D1}, [R10]!
MOV R11, R6, LSL #2
SUB R7, R7, #12
ADD R4, R4, #1
MOV R12, #-16
VLD2.16 {D6, D7}, [R7], R12
MOV R4, #0x8000
VREV64.16 D4, D6
VREV64.16 D5, D7
MOV R4, R3
MOV R9, R2
VLD2.16 {D2, D3}, [R3]!
VMULL.U16 Q13, D0, D2
VMLSL.U16 Q13, D4, D3
VLD2.16 {D8, D9}, [R10]!
VSHR.S32 Q13, Q13, #16
VLD2.16 {D10, D11}, [R3]!
VMLAL.S16 Q13, D1, D2
VMLSL.S16 Q13, D5, D3
VLD2.16 {D14, D15}, [R7], R12
VREV64.16 Q6, Q7
VQADD.S32 Q12, Q13, Q10
VQSHL.S32 Q12, Q12, Q11
SUB R8, R8, #8
LOOP_1:
VLD2.16 {D0, D1}, [R10]!
VMULL.U16 Q9, D8, D10
VLD2.16 {D2, D3}, [R3]!
VMLSL.U16 Q9, D12, D11
VLD2.16 {D6, D7}, [R7], R12
VMULL.U16 Q13, D0, D2
VREV64.16 D4, D6
VMLSL.U16 Q13, D4, D3
VREV64.16 D5, D7
VSHR.S32 Q9, Q9, #16
VST1.32 {D24[0]}, [R2], R11
VMLAL.S16 Q9, D9, D10
VST1.32 {D24[1]}, [R2], R11
VSHR.S32 Q13, Q13, #16
VST1.32 {D25[0]}, [R2], R11
VMLAL.S16 Q13, D1, D2
VST1.32 {D25[1]}, [R2], R11
VMLSL.S16 Q9, D13, D11
VMLSL.S16 Q13, D5, D3
VLD2.16 {D8, D9}, [R10]!
VLD2.16 {D10, D11}, [R3]!
VLD2.16 {D14, D15}, [R7], R12
VQADD.S32 Q8, Q9, Q10
VREV64.16 Q6, Q7
VQADD.S32 Q12, Q13, Q10
VQSHL.S32 Q8, Q8, Q11
VST1.32 D16[0], [R2], R11
VQSHL.S32 Q12, Q12, Q11
SUBS R8, R8, #8
VST1.32 D16[1], [R2], R11
VST1.32 D17[0], [R2], R11
VST1.32 D17[1], [R2], R11
BGT LOOP_1
VST1.32 D24[0], [R2], R11
VMULL.U16 Q9, D8, D10
VMLSL.U16 Q9, D12, D11
VST1.32 D24[1], [R2], R11
VST1.32 D25[0], [R2], R11
VSHR.S32 Q9, Q9, #16
VST1.32 D25[1], [R2], R11
VMLAL.S16 Q9, D9, D10
VMLSL.S16 Q9, D13, D11
MOV R12, #12
SMULBB R7, R5, R6
MOV R10, R5, LSL #1
VQADD.S32 Q8, Q9, Q10
VQSHL.S32 Q8, Q8, Q11
VST1.32 D16[0], [R2], R11
MOV R7, R7, LSL #2
VST1.32 D16[1], [R2], R11
ADD R7, R7, R9
VST1.32 D17[0], [R2], R11
VST1.32 D17[1], [R2], R11
SUB R11, R10, #1
MOV R10, R11, LSL #2
ADD R10, R0, R10
MOV R11, R11, LSL #1
SUB R10, R10, R12
MOV R8, R6, LSL #2
MOV R12, #-16
ADD R11, R11, R4
VLD1.32 {D6, D7}, [R10], R12
SUB R11, R11, #14
VREV64.32 D0, D6
VREV64.32 D1, D7
VQNEG.S32 D0, D0
VQNEG.S32 D1, D1
VUZP.16 D1, D0
VLD2.16 {D2, D3}, [R11], R12
VREV64.16 D2, D2
VREV64.16 D3, D3
VLD2.16 {D4, D5}, [R1]!
VMULL.U16 Q13, D1, D3
VMLSL.U16 Q13, D4, D2
VSHR.S32 Q13, Q13, #16
VMLAL.S16 Q13, D0, D3
VMLSL.S16 Q13, D5, D2
@VQSHL.S32 Q12,Q13,Q11
@VQADD.S32 Q12,Q12,Q10
@VSHR.S32 Q12,Q12,#16
VQADD.S32 Q12, Q13, Q10
VQSHL.S32 Q12, Q12, Q11
VUZP.16 D24, D25
VLD1.32 {D14, D15}, [R10], R12
VMULL.U16 Q13, D1, D3
VMLSL.U16 Q13, D4, D2
VREV64.32 Q4, Q7
VQNEG.S32 Q4, Q4
VLD2.16 {D10, D11}, [R11], R12
VSHR.S32 Q13, Q13, #16
VLD2.16 {D12, D13}, [R1]!
VMLAL.S16 Q13, D0, D3
VMLSL.S16 Q13, D5, D2
VUZP.16 D9, D8
VREV64.16 Q5, Q5
VQADD.S32 Q12, Q13, Q10
SUB R5, R5, #8
VQSHL.S32 Q12, Q12, Q11
LOOP_2:
VLD1.32 {D6, D7}, [R10], R12
VMULL.U16 Q9, D9, D11
VREV64.32 Q0, Q3
VQNEG.S32 Q0, Q0
VUZP.16 D1, D0
VLD2.16 {D2, D3}, [R11], R12
VREV64.16 Q1, Q1
VLD2.16 {D4, D5}, [R1]!
VMLSL.U16 Q9, D12, D10
VST1.32 D24[0], [R7], R8
VMULL.U16 Q13, D1, D3
VST1.32 D24[1], [R7], R8
VSHR.S32 Q9, Q9, #16
VST1.32 D25[0], [R7], R8
VMLSL.U16 Q13, D4, D2
VST1.32 D25[1], [R7], R8
VMLAL.S16 Q9, D8, D11
VLD1.32 {D14, D15}, [R10], R12
VSHR.S32 Q13, Q13, #16
VMLSL.S16 Q9, D13, D10
VLD2.16 {D10, D11}, [R11], R12
VMLAL.S16 Q13, D0, D3
VMLSL.S16 Q13, D5, D2
VREV64.32 Q4, Q7
VLD2.16 {D12, D13}, [R1]!
VQNEG.S32 Q4, Q4
VREV64.16 Q5, Q5
VQADD.S32 Q8, Q9, Q10
VUZP.16 D9, D8
VQADD.S32 Q12, Q13, Q10
VQSHL.S32 Q8, Q8, Q11
SUBS R5, R5, #8
VST1.32 D16[0], [R7], R8
VQSHL.S32 Q12, Q12, Q11
VST1.32 D16[1], [R7], R8
VST1.32 D17[0], [R7], R8
VST1.32 D17[1], [R7], R8
BGT LOOP_2
VST1.32 D24[0], [R7], R8
VMULL.U16 Q9, D9, D11
VMLSL.U16 Q9, D12, D10
VST1.32 D24[1], [R7], R8
VST1.32 D25[0], [R7], R8
VSHR.S32 Q9, Q9, #16
VST1.32 D25[1], [R7], R8
VMLAL.S16 Q9, D8, D11
VMLSL.S16 Q9, D13, D10
VQADD.S32 Q8, Q9, Q10
VQSHL.S32 Q8, Q8, Q11
VST1.32 D16[0], [R7], R8
VST1.32 D16[1], [R7], R8
VST1.32 D17[0], [R7], R8
VST1.32 D17[1], [R7], R8
VPOP {d8 - d15}
LDMFD sp!, {R4-R12, R15}

View file

@ -0,0 +1,144 @@
@/******************************************************************************
@ *
@ * Copyright (C) 2018 The Android Open Source Project
@ *
@ * Licensed under the Apache License, Version 2.0 (the "License");
@ * you may not use this file except in compliance with the License.
@ * You may obtain a copy of the License at:
@ *
@ * http://www.apache.org/licenses/LICENSE-2.0
@ *
@ * Unless required by applicable law or agreed to in writing, software
@ * distributed under the License is distributed on an "AS IS" BASIS,
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ * See the License for the specific language governing permissions and
@ * limitations under the License.
@ *
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
.text
.p2align 2
.global ixheaacd_postradixcompute2
ixheaacd_postradixcompute2:
STMFD sp!, {r4-r12, r14}
SUB sp, sp, #20
STR r0, [sp, #16]
ADD r4, r1, r3, lsl #1
MOV r3, r3, asr #4
STR r3, [sp, #4]
MOV r5, #1
STR r5, [sp]
POSTRADIX2_START:
LDR r14, [r2]
LDMIA r1!, {r5-r12}
ADD r0, r0, r14
ADD r14, r5, r7
SUB r5, r5, r7
ADD r7, r9, r11
SUB r9, r9, r11
ADD r11, r6, r8
SUB r6, r6, r8
ADD r8, r10, r12
SUB r10, r10, r12
STR r14, [r0], #4
STR r11, [r0], #32-4
STR r7, [r0], #4
STR r8, [r0], #(32+(32<<1))-4
STR r5, [r0], #4
STR r6, [r0], #32-4
STR r9, [r0], #4
STR r10, [r0], #0
LDR r0, [sp, #16]
LDR r14, [r2], #4
LDMIA r4!, {r5-r12}
ADD r0, r0, r14
ADD r0, r0, #8
ADD r14, r5, r7
SUB r5, r5, r7
ADD r7, r9, r11
SUB r9, r9, r11
ADD r11, r6, r8
SUB r6, r6, r8
ADD r8, r10, r12
SUB r10, r10, r12
STR r14, [r0], #4
STR r11, [r0], #32-4
STR r7, [r0], #4
STR r8, [r0], #(32+(32<<1))-4
STR r5, [r0], #4
STR r6, [r0], #32-4
STR r9, [r0], #4
STR r10, [r0], #0
SUBS r3, r3, #1
LDR r0, [sp, #16]
BGT POSTRADIX2_START
LDR r0, [sp, #16]
LDR r3, [sp, #4]
LDR r6, [sp]
ADD r1, r1, r3, lsl #5
ADD r4, r4, r3, lsl #5
SUBS r6, r6, #1
STR r6, [sp]
BPL POSTRADIX2_START
ADD sp, sp, #20
LDMFD sp!, {r4-r12, r15}

View file

@ -0,0 +1,138 @@
@/******************************************************************************
@ *
@ * Copyright (C) 2018 The Android Open Source Project
@ *
@ * Licensed under the Apache License, Version 2.0 (the "License");
@ * you may not use this file except in compliance with the License.
@ * You may obtain a copy of the License at:
@ *
@ * http://www.apache.org/licenses/LICENSE-2.0
@ *
@ * Unless required by applicable law or agreed to in writing, software
@ * distributed under the License is distributed on an "AS IS" BASIS,
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ * See the License for the specific language governing permissions and
@ * limitations under the License.
@ *
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
.text
.p2align 2
.global ixheaacd_postradixcompute4
ixheaacd_postradixcompute4:
STMFD sp!, {r4-r12, r14}
ADD r4, r1, r3, lsl #1
MOV r3, #2
POSTRADIX4_START:
LDMIA r1!, {r5-r12}
ADD r14, r5, r9
SUB r5, r5, r9
ADD r9, r6, r10
SUB r6, r6, r10
ADD r10, r7, r11
SUB r7, r7, r11
ADD r11, r8, r12
SUB r8, r8, r12
ADD r12, r14, r10
SUB r14, r14, r10
ADD r10, r9, r11
SUB r9, r9, r11
ADD r11, r5, r8
SUB r5, r5, r8
ADD r8, r6, r7
SUB r6, r6, r7
STR r12, [r0], #4
STR r10, [r0], #14<<1
STR r11, [r0], #4
STR r6 , [r0], #14<<1
STR r14, [r0], #4
STR r9 , [r0], #14<<1
STR r5, [r0], #4
STR r8, [r0], #0
LDMIA r4!, {r5-r12}
SUB r0, r0, #92
ADD r14, r5, r9
SUB r5, r5, r9
ADD r9, r6, r10
SUB r6, r6, r10
ADD r10, r7, r11
SUB r7, r7, r11
ADD r11, r8, r12
SUB r8, r8, r12
ADD r12, r14, r10
SUB r14, r14, r10
ADD r10, r9, r11
SUB r9, r9, r11
ADD r11, r5, r8
SUB r5, r5, r8
ADD r8, r6, r7
SUB r6, r6, r7
STR r12, [r0], #4
STR r10, [r0], #14<<1
STR r11, [r0], #4
STR r6, [r0], #14<<1
STR r14, [r0], #4
STR r9, [r0], #14<<1
STR r5, [r0], #4
STR r8, [r0], #0
ADD r1, r1, #1 << 5
ADD r4, r4, #1 << 5
SUB r0, r0, #100-8
SUBS r3, r3, #1
BGT POSTRADIX4_START
LDMFD sp!, {r4-r12, r15}

View file

@ -0,0 +1,545 @@
@/******************************************************************************
@ *
@ * Copyright (C) 2018 The Android Open Source Project
@ *
@ * Licensed under the Apache License, Version 2.0 (the "License");
@ * you may not use this file except in compliance with the License.
@ * You may obtain a copy of the License at:
@ *
@ * http://www.apache.org/licenses/LICENSE-2.0
@ *
@ * Unless required by applicable law or agreed to in writing, software
@ * distributed under the License is distributed on an "AS IS" BASIS,
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ * See the License for the specific language governing permissions and
@ * limitations under the License.
@ *
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
.text
.p2align 2
.global ixheaacd_post_twiddle_armv7
ixheaacd_post_twiddle_armv7:
STMFD sp!, {R4-R12}
VPUSH {d8 - d15}
LDR R4, [sp, #100]
ARM_PROLOGUE:
CMP R3, #0x400
LDR R6, =7500
ADD R2, R2, R6
BLT NEXT
MOV R4, #50
MOV R5, #-50
MOV R6, #4
VDUP.16 D10, R4
B NEXT1
NEXT:
LDR R4, =0x192
LDR R5, =0xfe6e
MOV R6, #32
VDUP.16 D10, R4
NEXT1:
LDR R7, [R1], #4
LDR R8, [R1], #4
LDR R9, [R2]
ADD R2, R2, R6
SMULWT R11, R8, R9
SMULWB R10, R8, R9
SMULWT R12, R7, R9
SMLAWB R8, R7, R9, R11
SUB R10, R10, R12
MVN R8, R8
ADD R8, R8, #1
SMLAWB R9, R10, R5, R8
SMLAWB R11, R8, R4, R10
LSL R7, R3, #2
ADD R7, R0, R7
SUB R7, R7, #4
STR R11, [R7], #-4
STR R9, [R0], #4
LSL R5, R3, #2
ADD R5, R1, R5
SUB R5, R5, #40
SUB R3, R3, #1
ASR R3, R3, #4
SUB R7, R7, #28
MOV R8, #-32
NEON_PROLOGUE:
VLD4.16 {D0, D1, D2, D3}, [R5], R8
VLD4.16 {D4, D5, D6, D7}, [R1]!
VLD2.16 {D8[0], D9[0]}, [R2], R6
VLD2.16 {D8[1], D9[1]}, [R2], R6
VLD2.16 {D8[2], D9[2]}, [R2], R6
VLD2.16 {D8[3], D9[3]}, [R2], R6
VREV64.16 Q6, Q4
VMULL.U16 Q15, D2, D13
VMULL.U16 Q14, D0, D13
VMULL.U16 Q13, D2, D12
VMULL.U16 Q12, D0, D12
VSHR.U32 Q15, Q15, #16
VSHR.U32 Q14, Q14, #16
VSHR.U32 Q13, Q13, #16
VSHR.U32 Q12, Q12, #16
VMLAL.S16 Q15, D3, D13
VMLAL.S16 Q14, D1, D13
VMLAL.S16 Q13, D3, D12
VMLAL.S16 Q12, D1, D12
VMULL.U16 Q11, D6, D9
VMULL.U16 Q10, D4, D9
VADD.I32 Q14, Q14, Q13
VSUB.I32 Q15, Q15, Q12
VNEG.S32 Q14, Q14
VMULL.U16 Q9, D6, D8
VMULL.U16 Q8, D4, D8
VMOV Q13, Q15
VSHR.U32 Q11, Q11, #16
VMOV Q12, Q14
VSHR.U32 Q10, Q10, #16
VUZP.16 D26, D27
VSHR.U32 Q9, Q9, #16
VUZP.16 D24, D25
VSHR.U32 Q8, Q8, #16
VMLAL.S16 Q11, D7, D9
VMLAL.S16 Q10, D5, D9
VMLAL.S16 Q9, D7, D8
VMLAL.S16 Q8, D5, D8
VLD2.16 {D8[0], D9[0]}, [R2], R6
VMULL.U16 Q0, D26, D10
VLD2.16 {D8[1], D9[1]}, [R2], R6
VMULL.U16 Q1, D24, D10
VLD2.16 {D8[2], D9[2]}, [R2], R6
VADD.I32 Q11, Q11, Q8
VLD2.16 {D8[3], D9[3]}, [R2], R6
VSUB.I32 Q10, Q9, Q10
VREV64.16 Q6, Q4
VNEG.S32 Q11, Q11
VMOV Q9, Q11
VSHR.U32 Q0, Q0, #16
VMOV Q8, Q10
VSHR.U32 Q1, Q1, #16
VUZP.16 D18, D19
VMLAL.S16 Q0, D27, D10
VUZP.16 D16, D17
VMLAL.S16 Q1, D25, D10
VMULL.U16 Q2, D18, D10
VMULL.U16 Q3, D16, D10
VNEG.S32 Q0, Q0
VADD.I32 Q7, Q15, Q1
VADD.I32 Q13, Q14, Q0
VREV64.32 Q7, Q7
VSHR.U32 Q2, Q2, #16
VSWP D14, D15
VSHR.U32 Q3, Q3, #16
VMLAL.S16 Q2, D19, D10
VLD4.16 {D0, D1, D2, D3}, [R5], R8
VMLAL.S16 Q3, D17, D10
SUB R3, R3, #2
VADD.I32 Q12, Q10, Q2
VREV64.32 Q12, Q12
VNEG.S32 Q8, Q3
VLD4.16 {D4, D5, D6, D7}, [R1]!
VSWP D24, D25
VADD.I32 Q8, Q11, Q8
CORE_LOOP:
VMULL.U16 Q15, D2, D13
VST2.32 {Q12, Q13}, [R7], R8
VMULL.U16 Q14, D0, D13
VMULL.U16 Q13, D2, D12
VST2.32 {Q7, Q8}, [R0]!
VMULL.U16 Q12, D0, D12
VSHR.U32 Q15, Q15, #16
VSHR.U32 Q14, Q14, #16
VSHR.U32 Q13, Q13, #16
VSHR.U32 Q12, Q12, #16
VMLAL.S16 Q15, D3, D13
VMLAL.S16 Q14, D1, D13
VMLAL.S16 Q13, D3, D12
VMLAL.S16 Q12, D1, D12
VMULL.U16 Q11, D6, D9
VMULL.U16 Q10, D4, D9
VADD.I32 Q14, Q14, Q13
VSUB.I32 Q15, Q15, Q12
VNEG.S32 Q14, Q14
VMULL.U16 Q9, D6, D8
VMULL.U16 Q8, D4, D8
VMOV Q13, Q15
VSHR.U32 Q11, Q11, #16
VMOV Q12, Q14
VSHR.U32 Q10, Q10, #16
VUZP.16 D26, D27
VSHR.U32 Q9, Q9, #16
VUZP.16 D24, D25
VSHR.U32 Q8, Q8, #16
VMLAL.S16 Q11, D7, D9
VMLAL.S16 Q10, D5, D9
VMLAL.S16 Q9, D7, D8
VMLAL.S16 Q8, D5, D8
VLD2.16 {D8[0], D9[0]}, [R2], R6
VMULL.U16 Q0, D26, D10
VLD2.16 {D8[1], D9[1]}, [R2], R6
VMULL.U16 Q1, D24, D10
VLD2.16 {D8[2], D9[2]}, [R2], R6
VADD.I32 Q11, Q11, Q8
VLD2.16 {D8[3], D9[3]}, [R2], R6
VSUB.I32 Q10, Q9, Q10
VREV64.16 Q6, Q4
VNEG.S32 Q11, Q11
VMOV Q9, Q11
VSHR.U32 Q0, Q0, #16
VMOV Q8, Q10
VSHR.U32 Q1, Q1, #16
VUZP.16 D18, D19
VMLAL.S16 Q0, D27, D10
VUZP.16 D16, D17
VMLAL.S16 Q1, D25, D10
VMULL.U16 Q2, D18, D10
VMULL.U16 Q3, D16, D10
VNEG.S32 Q0, Q0
VADD.I32 Q7, Q15, Q1
VADD.I32 Q13, Q14, Q0
VREV64.32 Q7, Q7
VSHR.U32 Q2, Q2, #16
VSWP D14, D15
VSHR.U32 Q3, Q3, #16
VMLAL.S16 Q2, D19, D10
VLD4.16 {D0, D1, D2, D3}, [R5], R8
VMLAL.S16 Q3, D17, D10
VADD.I32 Q12, Q10, Q2
VREV64.32 Q12, Q12
VNEG.S32 Q8, Q3
VLD4.16 {D4, D5, D6, D7}, [R1]!
VSWP D24, D25
VADD.I32 Q8, Q11, Q8
SUBS R3, R3, #1
BNE CORE_LOOP
NEON_EPILOGUE:
VMULL.U16 Q15, D2, D13
VST2.32 {Q12, Q13}, [R7], R8
VMULL.U16 Q14, D0, D13
VMULL.U16 Q13, D2, D12
VST2.32 {Q7, Q8}, [R0]!
VMULL.U16 Q12, D0, D12
VSHR.U32 Q15, Q15, #16
VSHR.U32 Q14, Q14, #16
VSHR.U32 Q13, Q13, #16
VSHR.U32 Q12, Q12, #16
VMLAL.S16 Q15, D3, D13
VMLAL.S16 Q14, D1, D13
VMLAL.S16 Q13, D3, D12
VMLAL.S16 Q12, D1, D12
VMULL.U16 Q11, D6, D9
VMULL.U16 Q10, D4, D9
VADD.I32 Q14, Q14, Q13
VSUB.I32 Q15, Q15, Q12
VNEG.S32 Q14, Q14
VMULL.U16 Q9, D6, D8
VMULL.U16 Q8, D4, D8
VMOV Q13, Q15
VSHR.U32 Q11, Q11, #16
VMOV Q12, Q14
VSHR.U32 Q10, Q10, #16
VUZP.16 D26, D27
VSHR.U32 Q9, Q9, #16
VUZP.16 D24, D25
VSHR.U32 Q8, Q8, #16
VMLAL.S16 Q11, D7, D9
VMLAL.S16 Q10, D5, D9
VMLAL.S16 Q9, D7, D8
VMLAL.S16 Q8, D5, D8
VMULL.U16 Q0, D26, D10
VMULL.U16 Q1, D24, D10
VADD.I32 Q11, Q11, Q8
VSUB.I32 Q10, Q9, Q10
VNEG.S32 Q11, Q11
VMOV Q9, Q11
VSHR.U32 Q0, Q0, #16
VMOV Q8, Q10
VSHR.U32 Q1, Q1, #16
VUZP.16 D18, D19
VMLAL.S16 Q0, D27, D10
VUZP.16 D16, D17
VMLAL.S16 Q1, D25, D10
VMULL.U16 Q2, D18, D10
VMULL.U16 Q3, D16, D10
VNEG.S32 Q0, Q0
VADD.I32 Q7, Q15, Q1
VADD.I32 Q13, Q14, Q0
VREV64.32 Q7, Q7
VSHR.U32 Q2, Q2, #16
VSWP D14, D15
VSHR.U32 Q3, Q3, #16
VMLAL.S16 Q2, D19, D10
VMLAL.S16 Q3, D17, D10
VADD.I32 Q12, Q10, Q2
VREV64.32 Q12, Q12
VNEG.S32 Q8, Q3
VSWP D24, D25
VADD.I32 Q8, Q11, Q8
VST2.32 {Q7, Q8}, [R0]!
VST2.32 {Q12, Q13}, [R7], R8
VLD4.16 {D0, D1, D2, D3}, [R5], R8
VMOV.S32 D5, #0x00000000
VMOV.S32 D7, #0x00000000
VLD2.32 {D4, D6}, [R1]!
VLD2.32 {D5[0], D7[0]}, [R1]
VLD2.16 {D8[0], D9[0]}, [R2], R6
VLD2.16 {D8[1], D9[1]}, [R2], R6
VLD2.16 {D8[2], D9[2]}, [R2], R6
VLD2.16 {D8[3], D9[3]}, [R2], R6
VREV64.16 Q6, Q4
VUZP.16 D4, D5
VUZP.16 D6, D7
VMULL.U16 Q15, D2, D13
VMULL.U16 Q14, D0, D13
VMULL.U16 Q13, D2, D12
VMULL.U16 Q12, D0, D12
VSHR.U32 Q15, Q15, #16
VSHR.U32 Q14, Q14, #16
VSHR.U32 Q13, Q13, #16
VSHR.U32 Q12, Q12, #16
VMLAL.S16 Q15, D3, D13
VMLAL.S16 Q14, D1, D13
VMLAL.S16 Q13, D3, D12
VMLAL.S16 Q12, D1, D12
VMULL.U16 Q11, D6, D9
VMULL.U16 Q10, D4, D9
VADD.I32 Q14, Q14, Q13
VSUB.I32 Q15, Q15, Q12
VNEG.S32 Q14, Q14
VMULL.U16 Q9, D6, D8
VMULL.U16 Q8, D4, D8
VMOV Q13, Q15
VSHR.U32 Q11, Q11, #16
VMOV Q12, Q14
VSHR.U32 Q10, Q10, #16
VUZP.16 D26, D27
VSHR.U32 Q9, Q9, #16
VUZP.16 D24, D25
VSHR.U32 Q8, Q8, #16
VMLAL.S16 Q11, D7, D9
VMLAL.S16 Q10, D5, D9
VMLAL.S16 Q9, D7, D8
VMLAL.S16 Q8, D5, D8
VMULL.U16 Q0, D26, D10
VMULL.U16 Q1, D24, D10
VADD.I32 Q11, Q11, Q8
VSUB.I32 Q10, Q9, Q10
VNEG.S32 Q11, Q11
VMOV Q9, Q11
VSHR.U32 Q0, Q0, #16
VMOV Q8, Q10
VSHR.U32 Q1, Q1, #16
VUZP.16 D18, D19
VMLAL.S16 Q0, D27, D10
VUZP.16 D16, D17
VMLAL.S16 Q1, D25, D10
VMULL.U16 Q2, D18, D10
VMULL.U16 Q3, D16, D10
VNEG.S32 Q0, Q0
VADD.I32 Q7, Q15, Q1
VADD.I32 Q13, Q14, Q0
VREV64.32 Q7, Q7
VSHR.U32 Q2, Q2, #16
VSWP D14, D15
VSHR.U32 Q3, Q3, #16
VMLAL.S16 Q2, D19, D10
VMLAL.S16 Q3, D17, D10
VADD.I32 Q12, Q10, Q2
VREV64.32 Q12, Q12
VNEG.S32 Q8, Q3
VSWP D24, D25
VADD.I32 Q8, Q11, Q8
VST2.32 {D14, D16}, [R0]!
VST2.32 {D15[0], D17[0]}, [R0]!
VST1.32 D15[1], [R0]
ADD R7, R7, #4
VST1.32 D26[0], [R7]!
VST2.32 {D24[1], D26[1]}, [R7]!
VST2.32 {D25, D27}, [R7]
VPOP {d8 - d15}
LDMFD sp!, {R4-R12}
BX LR

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,388 @@
@/******************************************************************************
@ *
@ * Copyright (C) 2018 The Android Open Source Project
@ *
@ * Licensed under the Apache License, Version 2.0 (the "License");
@ * you may not use this file except in compliance with the License.
@ * You may obtain a copy of the License at:
@ *
@ * http://www.apache.org/licenses/LICENSE-2.0
@ *
@ * Unless required by applicable law or agreed to in writing, software
@ * distributed under the License is distributed on an "AS IS" BASIS,
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ * See the License for the specific language governing permissions and
@ * limitations under the License.
@ *
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
.text
.p2align 2
.global ixheaacd_pretwiddle_compute_armv7
ixheaacd_pretwiddle_compute_armv7:
STMFD sp!, {R4-R12}
VPUSH {d8 - d15}
LDR R8, =7500
ADD R3, R3, R8
LDR R4, [sp, #100]
LDR R5, [sp, #104]
LSL R7, R4, #4
ADD R7, R2, R7
SUB R7, R7, #4
MVN R5, R5
ADD R5, R5, #1
ARM_PROLOGUE:
LDR R8, [R3], #4
LDR R9, [R0], #4
SMULWB R12, R9, R8
LDR R10, [R1], #-4
SMULWT R11, R9, R8
SMLAWT R9, R10, R8, R12
SMULWB R6, R10, R8
MVN R9, R9
ADD R9, R9, #1
SUB R11, R11, R6
CMP R5, #0
BGT NEXT
MVN R8, R5
ADD R8, R8, #1
ASR R11, R11, R8
ASR R9, R9, R8
B NEXT1
NEXT:
LSL R11, R11, R5
LSL R9, R9, R5
NEXT1:
STR R9, [R2], #4
STR R11, [R2], #4
CMP R4, #0x100
BNE NXT
MOV R6, #4
B NXT1
NXT:
MOV R6, #32
ADD R3, R3, #28
NXT1:
SUB R4, R4, #1
ASR R4, R4, #2
SUB R7, R7, #28
NEON_PROLOGUE:
MOV R8, #-32
VDUP.32 Q7, R5
SUB R1, R1, #28
VLD2.16 {D8[0], D9[0]}, [R3], R6
VLD2.16 {D8[1], D9[1]}, [R3], R6
VLD2.16 {D8[2], D9[2]}, [R3], R6
VLD2.16 {D8[3], D9[3]}, [R3], R6
VREV64.16 Q5, Q4
VLD4.16 {D0, D1, D2, D3}, [R0]!
VLD4.16 {D4, D5, D6, D7}, [R1], R8
VREV64.16 Q0, Q0
VREV64.16 Q2, Q2
VMULL.U16 Q15, D2, D9
VMULL.U16 Q14, D4, D9
VMULL.U16 Q13, D2, D8
VMULL.U16 Q12, D4, D8
VSHR.U32 Q15, Q15, #16
VSHR.U32 Q14, Q14, #16
VSHR.U32 Q13, Q13, #16
VSHR.U32 Q12, Q12, #16
VMLAL.S16 Q15, D3, D9
VMLAL.S16 Q14, D5, D9
VMLAL.S16 Q13, D3, D8
VMLAL.S16 Q12, D5, D8
VADD.I32 Q14, Q13, Q14
VNEG.S32 Q14, Q14
VSUB.I32 Q15, Q15, Q12
VMULL.U16 Q11, D0, D11
VMULL.U16 Q10, D6, D11
VMULL.U16 Q9, D0, D10
VMULL.U16 Q8, D6, D10
VSHR.U32 Q11, Q11, #16
VSHR.U32 Q10, Q10, #16
VSHR.U32 Q9, Q9, #16
VSHR.U32 Q8, Q8, #16
VMLAL.S16 Q11, D1, D11
VLD2.16 {D8[0], D9[0]}, [R3], R6
VMLAL.S16 Q10, D7, D11
VLD2.16 {D8[1], D9[1]}, [R3], R6
VMLAL.S16 Q9, D1, D10
VLD2.16 {D8[2], D9[2]}, [R3], R6
VMLAL.S16 Q8, D7, D10
VLD2.16 {D8[3], D9[3]}, [R3], R6
VADD.I32 Q10, Q10, Q9
VNEG.S32 Q10, Q10
VREV64.16 Q5, Q4
VSUB.I32 Q11, Q8, Q11
VLD4.16 {D0, D1, D2, D3}, [R0]!
VSHL.S32 Q10, Q10, Q7
VLD4.16 {D4, D5, D6, D7}, [R1], R8
VREV64.16 Q0, Q0
VSHL.S32 Q11, Q11, Q7
VREV64.16 Q2, Q2
VSHL.S32 Q9, Q15, Q7
VSHL.S32 Q8, Q14, Q7
SUB R4, R4, #2
CORE_LOOP:
VMULL.U16 Q15, D2, D9
VST2.32 {Q8, Q9}, [R2]!
VMULL.U16 Q14, D4, D9
VMULL.U16 Q13, D2, D8
VST2.32 {Q10, Q11}, [R7], R8
VMULL.U16 Q12, D4, D8
VSHR.U32 Q15, Q15, #16
VSHR.U32 Q14, Q14, #16
VSHR.U32 Q13, Q13, #16
VSHR.U32 Q12, Q12, #16
VMLAL.S16 Q15, D3, D9
VMLAL.S16 Q14, D5, D9
VMLAL.S16 Q13, D3, D8
VMLAL.S16 Q12, D5, D8
VADD.I32 Q14, Q13, Q14
VNEG.S32 Q14, Q14
VSUB.I32 Q15, Q15, Q12
VMULL.U16 Q11, D0, D11
VLD2.16 {D8[0], D9[0]}, [R3], R6
VMULL.U16 Q10, D6, D11
VMULL.U16 Q9, D0, D10
VLD2.16 {D8[1], D9[1]}, [R3], R6
VMULL.U16 Q8, D6, D10
VSHR.U32 Q11, Q11, #16
VLD2.16 {D8[2], D9[2]}, [R3], R6
VSHR.U32 Q10, Q10, #16
VSHR.U32 Q9, Q9, #16
VLD2.16 {D8[3], D9[3]}, [R3], R6
VSHR.U32 Q8, Q8, #16
VMLAL.S16 Q11, D1, D11
VMLAL.S16 Q10, D7, D11
VMLAL.S16 Q9, D1, D10
VMLAL.S16 Q8, D7, D10
VLD4.16 {D0, D1, D2, D3}, [R0]!
VADD.I32 Q10, Q10, Q9
VNEG.S32 Q10, Q10
VREV64.16 Q5, Q4
VSUB.I32 Q11, Q8, Q11
VLD4.16 {D4, D5, D6, D7}, [R1], R8
VSHL.S32 Q10, Q10, Q7
VSHL.S32 Q11, Q11, Q7
VREV64.16 Q0, Q0
VSHL.S32 Q9, Q15, Q7
VREV64.16 Q2, Q2
VSHL.S32 Q8, Q14, Q7
SUBS R4, R4, #1
BNE CORE_LOOP
NEON_EPILOGUE:
VMULL.U16 Q15, D2, D9
VST2.32 {Q8, Q9}, [R2]!
VMULL.U16 Q14, D4, D9
VMULL.U16 Q13, D2, D8
VST2.32 {Q10, Q11}, [R7], R8
VMULL.U16 Q12, D4, D8
VSHR.U32 Q15, Q15, #16
VSHR.U32 Q14, Q14, #16
VSHR.U32 Q13, Q13, #16
VSHR.U32 Q12, Q12, #16
VMLAL.S16 Q15, D3, D9
VMLAL.S16 Q14, D5, D9
VMLAL.S16 Q13, D3, D8
VMLAL.S16 Q12, D5, D8
VADD.I32 Q14, Q13, Q14
VNEG.S32 Q14, Q14
VSUB.I32 Q15, Q15, Q12
VMULL.U16 Q11, D0, D11
VMULL.U16 Q10, D6, D11
VMULL.U16 Q9, D0, D10
VMULL.U16 Q8, D6, D10
VSHR.U32 Q11, Q11, #16
VSHR.U32 Q10, Q10, #16
VSHR.U32 Q9, Q9, #16
VSHR.U32 Q8, Q8, #16
VMLAL.S16 Q11, D1, D11
VMLAL.S16 Q10, D7, D11
VMLAL.S16 Q9, D1, D10
VMLAL.S16 Q8, D7, D10
VADD.I32 Q10, Q10, Q9
VNEG.S32 Q10, Q10
VSUB.I32 Q11, Q8, Q11
VSHL.S32 Q10, Q10, Q7
VSHL.S32 Q11, Q11, Q7
VSHL.S32 Q9, Q15, Q7
VSHL.S32 Q8, Q14, Q7
VST2.32 {Q8, Q9}, [R2]!
VST2.32 {Q10, Q11}, [R7], R8
RESIDUE_NEON:
MOV R10, #-16
VMOV.S32 D3, #0x00000000
VMOV.S32 D4, #0x00000000
VLD2.32 {D0, D2}, [R0]!
VLD2.32 {D1[0], D3[0]}, [R0]!
VLD1.32 D1[1], [R0]
VUZP.16 D0, D1
VUZP.16 D2, D3
ADD R1, R1, #4
VLD1.32 D6[0], [R1]!
VLD2.32 {D4[1], D6[1]}, [R1]!
VLD2.32 {D5, D7}, [R1]!
VUZP.16 D4, D5
VUZP.16 D6, D7
VREV64.16 Q0, Q0
VREV64.16 Q2, Q2
VLD2.16 {D8[0], D9[0]}, [R3], R6
VLD2.16 {D8[1], D9[1]}, [R3], R6
VLD2.16 {D8[2], D9[2]}, [R3], R6
VLD2.16 {D8[3], D9[3]}, [R3], R6
VREV64.16 Q5, Q4
VMULL.U16 Q15, D2, D9
VMULL.U16 Q14, D4, D9
VMULL.U16 Q13, D2, D8
VMULL.U16 Q12, D4, D8
VSHR.U32 Q15, Q15, #16
VSHR.U32 Q14, Q14, #16
VSHR.U32 Q13, Q13, #16
VSHR.U32 Q12, Q12, #16
VMLAL.S16 Q15, D3, D9
VMLAL.S16 Q14, D5, D9
VMLAL.S16 Q13, D3, D8
VMLAL.S16 Q12, D5, D8
VADD.I32 Q14, Q13, Q14
VNEG.S32 Q14, Q14
VSUB.I32 Q15, Q15, Q12
VMULL.U16 Q11, D0, D11
VMULL.U16 Q10, D6, D11
VMULL.U16 Q9, D0, D10
VMULL.U16 Q8, D6, D10
VSHR.U32 Q11, Q11, #16
VSHR.U32 Q10, Q10, #16
VSHR.U32 Q9, Q9, #16
VSHR.U32 Q8, Q8, #16
VMLAL.S16 Q11, D1, D11
VMLAL.S16 Q10, D7, D11
VMLAL.S16 Q9, D1, D10
VMLAL.S16 Q8, D7, D10
VADD.I32 Q10, Q10, Q9
VNEG.S32 Q10, Q10
VSUB.I32 Q11, Q8, Q11
VSHL.S32 Q10, Q10, Q7
VSHL.S32 Q11, Q11, Q7
VSHL.S32 Q9, Q15, Q7
VSHL.S32 Q8, Q14, Q7
VST2.32 {Q10, Q11}, [R7]
VST2.32 {D16, D18}, [R2]!
VST2.32 {D17[0], D19[0]}, [R2]!
VPOP {d8 - d15}
LDMFD sp!, {R4-R12}
BX LR

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,149 @@
@/******************************************************************************
@ *
@ * Copyright (C) 2018 The Android Open Source Project
@ *
@ * Licensed under the Apache License, Version 2.0 (the "License");
@ * you may not use this file except in compliance with the License.
@ * You may obtain a copy of the License at:
@ *
@ * http://www.apache.org/licenses/LICENSE-2.0
@ *
@ * Unless required by applicable law or agreed to in writing, software
@ * distributed under the License is distributed on an "AS IS" BASIS,
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ * See the License for the specific language governing permissions and
@ * limitations under the License.
@ *
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
.text
.p2align 2
.global ixheaacd_radix4bfly
ixheaacd_radix4bfly:
STMFD sp!, {r4-r12, r14}
SUB sp, sp, #16
MOV r6, #6
MUL r7, r6, r3
MOV r4, r3
STR r7, [sp]
MOV r3, r3, lsl #1
STR r2, [sp, #8]
STR r4, [sp, #12]
ADD r2, r1, r3, lsl #2
ADD r0, r0, #8
RADIX4_OUTLOOP:
RADIX4_INLOOP:
LDR r6, [r1]
LDR r7, [r2]
LDR r8, [r2, r3, lsl #2]
LDR r9, [r2, r3, lsl #3]
ADD r10, r6, r8
SUB r11, r6, r8
ADD r12, r7, r9
SUB r14, r7, r9
ADD r6, r10, r12
SUB r7, r10, r12
STR r6, [r1], #4
LDR r8, [r1]
LDR r6, [r2, #4]!
LDR r9, [r2, r3, lsl #2]!
LDR r10, [r2, r3, lsl #2]!
ADD r12, r8, r9
SUB r8, r8, r9
ADD r9, r6, r10
SUB r6, r6, r10
ADD r10, r12, r9
STR r10, [r1], #4
SUB r12, r12, r9
ADD r9, r11, r6
SUB r10, r11, r6
ADD r11, r8, r14
LDR r5, [r0], #-4
SUB r6, r8, r14
SMULWB r14, r10, r5
SMULWT r8, r11, r5
SUBS r4, r4, #1
SUB r8, r8, r14
MOV r8, r8, lsl #1
STR r8, [r2], #-4
SMULWT r14, r10, r5
SMLAWB r8, r11, r5, r14
LDR r11, [r0], #-4
MOV r8, r8, lsl #1
STR r8, [r2], -r3, lsl #2
SMULWT r10, r7, r11
SMLAWB r8, r12, r11, r10
LDR r14, [r0], #20
MOV r5, r8, lsl #1
SMULWB r10, r7, r11
SMULWT r8, r12, r11
STR r5, [r2], #4
SUB r7, r8, r10
MOV r7, r7, lsl #1
SMULWB r11, r9, r14
SMULWT r12, r6, r14
STR r7, [r2], -r3, lsl #2
SUB r12, r12, r11
MOV r12, r12, lsl #1
SMULWT r10, r9, r14
SMLAWB r7, r6, r14, r10
STR r12, [r2], #-4
MOV r7, r7, lsl #1
STR r7, [r2], #8
BNE RADIX4_INLOOP
LDR r8, [sp]
LDR r4, [sp, #12]
LDR r6, [sp, #8]
SUB r0, r0, r8, lsl #1
ADD r1, r1, r8, lsl #2
ADD r2, r2, r8, lsl #2
SUBS r6, r6, #1
STR r6, [sp, #8]
BNE RADIX4_OUTLOOP
ADD sp, sp, #16
LDMFD sp!, {r4-r12, r15}

View file

@ -0,0 +1,205 @@
@/******************************************************************************
@ *
@ * Copyright (C) 2018 The Android Open Source Project
@ *
@ * Licensed under the Apache License, Version 2.0 (the "License");
@ * you may not use this file except in compliance with the License.
@ * You may obtain a copy of the License at:
@ *
@ * http://www.apache.org/licenses/LICENSE-2.0
@ *
@ * Unless required by applicable law or agreed to in writing, software
@ * distributed under the License is distributed on an "AS IS" BASIS,
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ * See the License for the specific language governing permissions and
@ * limitations under the License.
@ *
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
.text
.p2align 2
.global ixheaacd_adjust_scale_armv7
ixheaacd_adjust_scale_armv7:
STMFD SP!, {R4-R11, R14}
LDR R4, [SP, #44]
LDR R5, [SP, #36]
LDR R6, [SP, #40]
MOVS R4, R4
BEQ ENDRESSCALE
SUBS R3, R3, R2
BLE ENDRESSCALE
SUBS R6, R6, R5
BLE ENDRESSCALE
ADD R9, R0, R5, LSL#2
LDR R10, [R9], #4
CMP R4, #31
MOVGT R4, #31
CMP R4, #-31
MOVLT R4, #-31
LDR R8, [SP, #48]
MOVS R8, R8
BEQ ELIF1
MOVS R4, R4
BLE ELIF2_1
LOOP1:
ADD R10, R10, R2, LSL #2
MOV R7, R3
INNLOOP1:
LDR R11, [R10]
SUBS R7, R7 , #2
LDRGE R5, [R10, #4]
MOV R11, R11, LSL R4
STR R11, [R10], #4
MOVGE R5, R5, LSL R4
STRGE R5, [R10], #4
BGT INNLOOP1
LDR R10, [R9], #4
SUBS R6, R6, #1
BGT LOOP1
B ENDRESSCALE
ELIF2_1:
RSB R4, R4, #0
LOOP2:
ADD R10, R10, R2, LSL #2
MOV R7, R3
INNLOOP2:
LDR R11, [R10]
SUBS R7, R7 , #2
LDRGE R5, [R10, #4]
MOV R11, R11, ASR R4
STR R11, [R10], #4
MOVGE R5, R5, ASR R4
STRGE R5, [R10], #4
BGT INNLOOP2
LDR R10, [R9], #4
SUBS R6, R6, #1
BGT LOOP2
B ENDRESSCALE
ELIF1:
ADD R5, R1, R5, LSL#2
MOVS R4, R4
BLE ELIF2_2
LOOP3:
LDR R8, [R5], #4
ADD R10, R10, R2, LSL #2
ADD R8, R8, R2, LSL #2
BICS R7, R3, #1
BEQ COUNTODD1
INNLOOP3:
LDR R11, [R10]
LDR R1, [R8]
MOV R11, R11, LSL R4
MOV R1, R1, LSL R4
STR R11, [R10], #4
STR R1, [R8], #4
LDR R11, [R10]
LDR R1, [R8]
MOV R11, R11, LSL R4
MOV R1, R1, LSL R4
STR R11, [R10], #4
STR R1, [R8], #4
SUBS R7, R7 , #2
BGT INNLOOP3
COUNTODD1:
BIC R7, R3, #1
CMP R7, R3
BEQ INNLOOP3END
LDR R11, [R10]
LDR R1, [R8]
MOV R11, R11, LSL R4
MOV R1, R1, LSL R4
STR R11, [R10], #4
STR R1, [R8], #4
INNLOOP3END:
LDR R10, [R9], #4
SUBS R6, R6, #1
BGT LOOP3
B ENDRESSCALE
ELIF2_2:
RSB R4, R4, #0
LOOP4:
LDR R8, [R5], #4
ADD R10, R10, R2, LSL #2
ADD R8, R8, R2, LSL #2
BICS R7, R3, #1
BEQ COUNTODD2
INNLOOP4:
LDR R11, [R10]
LDR R1, [R8]
MOV R11, R11, ASR R4
MOV R1, R1, ASR R4
STR R11, [R10], #4
STR R1, [R8], #4
LDR R11, [R10]
LDR R1, [R8]
MOV R11, R11, ASR R4
MOV R1, R1, ASR R4
STR R11, [R10], #4
STR R1, [R8], #4
SUBS R7, R7 , #2
BGT INNLOOP4
COUNTODD2:
BIC R7, R3, #1
CMP R7, R3
BEQ INNLOOP4END
LDR R11, [R10]
LDR R1, [R8]
MOV R11, R11, ASR R4
MOV R1, R1, ASR R4
STR R11, [R10], #4
STR R1, [R8], #4
INNLOOP4END:
LDR R10, [R9], #4
SUBS R6, R6, #1
BGT LOOP4
ENDRESSCALE:
LDMFD sp!, {r4-r11, r15}

View file

@ -0,0 +1,855 @@
@/******************************************************************************
@ *
@ * Copyright (C) 2018 The Android Open Source Project
@ *
@ * Licensed under the Apache License, Version 2.0 (the "License");
@ * you may not use this file except in compliance with the License.
@ * You may obtain a copy of the License at:
@ *
@ * http://www.apache.org/licenses/LICENSE-2.0
@ *
@ * Unless required by applicable law or agreed to in writing, software
@ * distributed under the License is distributed on an "AS IS" BASIS,
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ * See the License for the specific language governing permissions and
@ * limitations under the License.
@ *
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
.text
.p2align 2
.global ixheaacd_sbr_imdct_using_fft
ixheaacd_sbr_imdct_using_fft:
STMFD sp!, {r4-r12, lr}
VPUSH {D8 - D15}
LDR r5, [sp, #0x68]
LDR r6, [sp, #0x68+4]
LDR r7, [sp, #0x68+8]
COND_6: CMP r1, #0x10
BNE COND_7
MOV r8, #1
MOV r4, r7
B RADIX_4_FIRST_START
COND_7: CMP r1, #0x20
MOV r8, #1
MOV r4, r7
RADIX_8_FIRST_START:
LSR r9 , r1, #5
LSL r1, r1, #1
RADIX_8_FIRST_LOOP:
MOV r5 , r2
MOV r6 , r2
MOV r7 , r2
MOV r11 , r2
LDRB r12, [r4, #0]
ADD r5, r5, r12, LSL #3
VLD2.32 {d0[0], d2[0]}, [r5] , r1
ADD r5, r5, r1
VLD2.32 {d8[0], d10[0]}, [r5] , r1
SUB r5, r5, r1, LSL #1
VLD2.32 {d4[0], d6[0]}, [r5] , r1
ADD r5, r5, r1
VLD2.32 {d12[0], d14[0]}, [r5], r1
SUB r5, r5, r1, LSL #2
LDRB r12, [r4, #1]
ADD r6, r6, r12, LSL #3
VLD2.32 {d0[1], d2[1]}, [r6] , r1
ADD r6, r6, r1
VLD2.32 {d8[1], d10[1]}, [r6] , r1
SUB r6, r6, r1, LSL #1
VLD2.32 {d4[1], d6[1]}, [r6] , r1
ADD r6, r6, r1
VLD2.32 {d12[1], d14[1]}, [r6], r1
SUB r6, r6, r1, LSL #2
LDRB r12, [r4, #2]
ADD r7, r7, r12 , LSL #3
VLD2.32 {d1[0], d3[0]}, [r7] , r1
ADD r7, r7, r1
VLD2.32 {d9[0], d11[0]}, [r7] , r1
SUB r7, r7, r1, LSL #1
LDRB r12, [r4, #3]
ADD r11, r11, r12 , LSL #3
VLD2.32 {d1[1], d3[1]}, [r11] , r1
ADD r11, r11, r1
VLD2.32 {d9[1], d11[1]}, [r11] , r1
SUB r11, r11, r1, LSL #1
VADD.I32 q8, q0, q4
VLD2.32 {d5[0], d7[0]}, [r7] , r1
ADD r7, r7, r1
VSUB.I32 q9, q0, q4
VLD2.32 {d13[0], d15[0]}, [r7], r1
SUB r7, r7, r1, LSL #2
VADD.I32 q0, q1, q5
VLD2.32 {d5[1], d7[1]}, [r11] , r1
ADD r11, r11, r1
VSUB.I32 q4, q1, q5
VLD2.32 {d13[1], d15[1]}, [r11], r1
SUB r11, r11, r1, LSL #2
ADD r4, r4, #4
ADD r5, r5, r1, LSR #1
ADD r6, r6, r1, LSR #1
ADD r7, r7, r1, LSR #1
ADD r11, r11, r1, LSR #1
VADD.I32 q1, q2, q6
VLD2.32 {d28[0], d30[0]}, [r5] , r1
VSUB.I32 q5, q2, q6
VLD2.32 {d20[0], d22[0]}, [r5] , r1
VADD.I32 q2, q3, q7
VLD2.32 {d24[0], d26[0]}, [r5] , r1
VSUB.I32 q6, q3, q7
VLD2.32 {d28[1], d30[1]}, [r6] , r1
VADD.S32 q3, q9, q6
VLD2.32 {d20[1], d22[1]}, [r6] , r1
VSUB.S32 q7, q9, q6
VLD2.32 {d24[1], d26[1]}, [r6] , r1
VSUB.S32 q6, q4, q5
VLD2.32 {d29[0], d31[0]}, [r7] , r1
VADD.S32 q9, q4, q5
VLD2.32 {d21[0], d23[0]}, [r7] , r1
VADD.S32 q4, q8, q1
VLD2.32 {d25[0], d27[0]}, [r7] , r1
VSUB.S32 q5, q8, q1
VLD2.32 {d29[1], d31[1]}, [r11] , r1
VADD.S32 q8, q0, q2
VLD2.32 {d21[1], d23[1]}, [r11] , r1
VSUB.S32 q0, q0, q2
VLD2.32 {d25[1], d27[1]}, [r11] , r1
VPUSH {q3}
VPUSH {q7}
VLD2.32 {d2[0], d4[0]}, [r5], r1
VADD.I32 q7, q14, q12
VLD2.32 {d2[1], d4[1]}, [r6] , r1
VSUB.I32 q3, q14, q12
VLD2.32 {d3[0], d5[0]}, [r7] , r1
VADD.I32 q14, q15, q13
VLD2.32 {d3[1], d5[1]}, [r11] , r1
VSUB.I32 q12, q15, q13
VADD.I32 q15, q10, q1
VSUB.I32 q13, q10, q1
VADD.I32 q10, q11, q2
VSUB.I32 q1, q11, q2
VADD.S32 q11, q7, q15
VSUB.S32 q2, q7, q15
VADD.S32 q7, q14, q10
VSUB.S32 q15, q14, q10
VADD.S32 q14, q3, q12
VSUB.S32 q10, q3, q12
VADD.S32 q3, q13, q1
VSUB.S32 q12, q13, q1
VADD.S32 q1 , q14, q12
VSUB.S32 q13, q14, q12
VSUB.S32 q12, q3, q10
VUZP.16 d2, d3
VADD.S32 q14, q3, q10
VUZP.16 d26, d27
VADD.S32 q3, q4, q11
VUZP.16 d24, d25
VSUB.S32 q10, q4, q11
VUZP.16 d28, d29
VADD.S32 q4, q8, q7
LDR r14, =0x5a82
VSUB.S32 q11, q8, q7
VADD.S32 q8, q5, q15
VSUB.S32 q7, q5, q15
VSUB.S32 q5, q0, q2
VADD.S32 q15, q0, q2
VPOP {q0}
VPOP {q2}
VPUSH {q3-q4}
VPUSH {q10}
VDUP.16 d20, r14
VMULL.u16 q4, d26, d20
VMULL.u16 q3, d28, d20
VPUSH {q7-q8}
VPUSH {q5}
VSHR.S32 q4, q4, #15
VSHR.S32 q3, q3, #15
VQDMLAL.S16 q4, d27, d20
VQDMLAL.S16 q3, d29, d20
VPUSH {q11}
VMULL.u16 q13, d24, d20
VMULL.u16 q14, d2, d20
VADD.S32 q5, q2, q4
VSUB.S32 q7, q2, q4
VADD.S32 q8, q6, q3
VSUB.S32 q6, q6, q3
VSHR.S32 q13, q13, #15
VSHR.S32 q14, q14, #15
VQDMLAL.S16 q13, d25, d20
VQDMLAL.S16 q14, d3, d20
VPOP {q1}
VPOP {q10}
VADD.S32 q2, q0, q13
VSUB.S32 q4, q0, q13
VADD.S32 q11, q9, q14
VSUB.S32 q3, q9, q14
VPOP {q14}
VPOP {q9}
VPOP {q0}
VPOP {q12, q13}
VTRN.32 q12, q5
VSHL.S32 q12, q12, #1
VTRN.32 q9, q2
VSHL.S32 q5, q5, #1
VSHL.S32 q9, q9, #1
VTRN.32 q0, q7
VSHL.S32 q2, q2, #1
VSHL.S32 q0, q0, #1
VTRN.32 q14, q4
VSHL.S32 q7, q7, #1
VSHL.S32 q14, q14, #1
VTRN.32 q13, q6
VSHL.S32 q4, q4, #1
VSHL.S32 q13, q13, #1
VTRN.32 q10, q3
VSHL.S32 q6, q6, #1
VSHL.S32 q10, q10, #1
VTRN.32 q1, q8
VSHL.S32 q3, q3, #1
VSHL.S32 q1, q1, #1
VTRN.32 q15, q11
VSHL.S32 q8, q8, #1
VSHL.S32 q15, q15, #1
VSWP d18, d25
VSHL.S32 q11, q11, #1
VSWP d4, d11
VSWP d1, d28
VSWP d15, d8
VSWP d20, d27
VSWP d6, d13
VSWP d30, d3
VSWP d22, d17
VST2.32 {q12, q13}, [r3]!
VST2.32 {q0, q1}, [r3]!
VST2.32 {q5, q6}, [r3]!
VST2.32 {q7, q8}, [r3]!
VMOV q5, q11
VST2.32 {q9, q10}, [r3]!
VST2.32 {q14, q15}, [r3]!
VST2.32 {q2, q3}, [r3]!
VST2.32 {q4, q5}, [r3]!
SUBS r9, r9, #1
BNE RADIX_8_FIRST_LOOP
LSR r1, r1, #1
SUB r3, r1, LSL #3
MOV r5, #8
MOV r4, #32
LSR r6, r1, #5
B RADIX_4_FIRST_ENDS
RADIX_8_FIRST_ENDS:
RADIX_4_FIRST_START:
LSR r9 , r1, #4
LSL r1, r1, #1
RADIX_4_LOOP:
MOV r5 , r2
MOV r6 , r2
MOV r7 , r2
MOV r11 , r2
LDRB r12, [r4, #0]
ADD r5, r5, r12, LSL #3
VLD2.32 {d0[0], d2[0]}, [r5] , r1
ADD r5, r5, r1
VLD2.32 {d8[0], d10[0]}, [r5] , r1
SUB r5, r5, r1, LSL #1
VLD2.32 {d4[0], d6[0]}, [r5] , r1
ADD r5, r5, r1
VLD2.32 {d12[0], d14[0]}, [r5], r1
LDRB r12, [r4, #1]
ADD r6, r6, r12, LSL #3
VLD2.32 {d0[1], d2[1]}, [r6] , r1
ADD r6, r6, r1
VLD2.32 {d8[1], d10[1]}, [r6] , r1
SUB r6, r6, r1, LSL #1
VLD2.32 {d4[1], d6[1]}, [r6] , r1
ADD r6, r6, r1
VLD2.32 {d12[1], d14[1]}, [r6], r1
LDRB r12, [r4, #2]
ADD r7, r7, r12, LSL #3
VLD2.32 {d1[0], d3[0]}, [r7] , r1
ADD r7, r7, r1
VLD2.32 {d9[0], d11[0]}, [r7] , r1
LDRB r12, [r4, #3]
ADD r11, r11, r12 , LSL #3
VLD2.32 {d1[1], d3[1]}, [r11] , r1
ADD r11, r11, r1
VLD2.32 {d9[1], d11[1]}, [r11] , r1
SUB r7, r7, r1, LSL #1
VADD.S32 q8, q0, q4
VLD2.32 {d5[0], d7[0]}, [r7] , r1
ADD r7, r7, r1
VADD.S32 q9, q1, q5
VLD2.32 {d13[0], d15[0]}, [r7], r1
SUB r11, r11, r1, LSL #1
VSUB.S32 q10, q0, q4
VLD2.32 {d5[1], d7[1]}, [r11] , r1
ADD r11, r11, r1
VSUB.S32 q11, q1, q5
VLD2.32 {d13[1], d15[1]}, [r11], r1
ADD r4, r4, #4
VADD.S32 q12, q2, q6
VADD.S32 q13, q3, q7
VSUB.S32 q14, q2, q6
VSUB.S32 q15, q3, q7
VADD.S32 q0, q8, q12
VADD.S32 q1, q9, q13
VSUB.S32 q2, q8, q12
VSUB.S32 q3, q9, q13
VADD.S32 q4, q10, q15
VSUB.S32 q5, q11, q14
VADD.S32 q7, q11, q14
VSUB.S32 q6, q10, q15
VTRN.32 q0, q4
VSHL.S32 q0, q0, #1
VTRN.32 q2, q6
VSHL.S32 q4, q4, #1
VSHL.S32 q2, q2, #1
VTRN.32 q1, q5
VSHL.S32 q6, q6, #1
VSHL.S32 q1, q1, #1
VTRN.32 q3, q7
VSHL.S32 q5, q5, #1
VSHL.S32 q3, q3, #1
VSWP d4, d1
VSHL.S32 q7, q7, #1
VSWP d12, d9
VSWP d6, d3
VSWP d14, d11
VST2.32 {q0, q1}, [r3]!
VST2.32 {q4, q5}, [r3]!
VST2.32 {q2, q3}, [r3]!
VST2.32 {q6, q7}, [r3]!
SUBS r9, r9, #1
BNE RADIX_4_LOOP
LSR r1, r1, #1
SUB r3, r1, LSL #3
MOV r5, #4
MOV r4, #64
LSR r6, r1, #4
RADIX_4_FIRST_ENDS:
PUSH {r3}
LSR r5, r5, #2
OUTER_LOOP_R4:
LDR r14, [sp]
MOV r7, r5
MOV r2, #0
MOV r9, r0
LSL r12 , r5, #5
MIDDLE_LOOP_R4:
VLD2.16 {d0[0], d1[0]}, [r9], r2
VLD2.16 {d2[0], d3[0]}, [r9], r2
ADD r11, r2, r4, LSL #2
VLD2.16 {d4[0], d5[0]}, [r9]
ADD r10, r0, r11
VLD2.16 {d0[1], d1[1]}, [r10], r11
VLD2.16 {d2[1], d3[1]}, [r10], r11
ADD r2, r11, r4, LSL #2
VLD2.16 {d4[1], d5[1]}, [r10]
ADD r9, r0, r2
VLD2.16 {d0[2], d1[2]}, [r9], r2
VLD2.16 {d2[2], d3[2]}, [r9], r2
ADD r11, r2, r4, LSL #2
VLD2.16 {d4[2], d5[2]}, [r9]
ADD r10, r0, r11
VLD2.16 {d0[3], d1[3]}, [r10], r11
VLD2.16 {d2[3], d3[3]}, [r10], r11
ADD r2, r11, r4, LSL #2
VLD2.16 {d4[3], d5[3]}, [r10]
ADD r9, r0, r2
MOV r10, r6
INNER_LOOP_R4:
VLD2.32 {q3, q4}, [r14], r12
VSHR.S32 q3, q3, #1
VLD4.16 {q5, q6}, [r14], r12
VSHR.S32 q4, q4, #1
VSHR.U16 d10, d10, #1
VLD4.16 {q7, q8}, [r14], r12
VSHR.U16 d12, d12, #1
VMULL.S16 q11, d10, d0
VMLSL.S16 q11, d12, d1
VLD4.16 {q9, q10}, [r14], r12
VMULL.S16 q12, d10, d1
VMLAL.S16 q12, d12, d0
VSHR.U16 d14, d14, #1
VSHR.U16 d16, d16, #1
SUB r14, r14, r12, LSL #2
VSHR.U16 d18, d18, #1
VSHR.U16 d20, d20, #1
VMULL.S16 q13, d14, d2
VMLSL.S16 q13, d16, d3
VSHR.S32 q11, q11, #15
VMULL.S16 q14, d14, d3
VMLAL.S16 q14, d16, d2
VMULL.S16 q15, d18, d4
VMLSL.S16 q15, d20, d5
VMLAL.S16 q11, d11, d0
VMLSL.S16 q11, d13, d1
VSHR.S32 q12, q12, #15
VSHR.S32 q13, q13, #15
VSHR.S32 q14, q14, #15
VSHR.S32 q15, q15, #15
VMLAL.S16 q12, d11, d1
VMLAL.S16 q12, d13, d0
VMULL.S16 q5, d18, d5
VMLAL.S16 q5, d20, d4
VMLAL.S16 q13, d15, d2
VMLSL.S16 q13, d17, d3
VMLAL.S16 q14, d15, d3
VMLAL.S16 q14, d17, d2
VMLAL.S16 q15, d19, d4
VMLSL.S16 q15, d21, d5
VSHR.S32 q5, q5, #15
VMLAL.S16 q5, d19, d5
VMLAL.S16 q5, d21, d4
CMP r7, r5
BNE BYPASS_IF
ADD r14, r14, r12
LDR r3, [r14], r12
ASR r3, r3, #1
VMOV.S32 d22[0], r3
LDR r3, [r14], r12
ASR r3, r3, #1
VMOV.S32 d26[0], r3
LDR r3, [r14]
ASR r3, r3, #1
VMOV.S32 d30[0], r3
SUB r14, r14, r12, LSL #1
ADD r14, r14, #4
LDR r3, [r14], r12
ASR r3, r3, #1
VMOV.S32 d24[0], r3
LDR r3, [r14], r12
ASR r3, r3, #1
VMOV.S32 d28[0], r3
LDR r3, [r14], r12
ASR r3, r3, #1
VMOV.S32 d10[0], r3
SUB r14, r14, #4
SUB r14, r14, r12, LSL #2
BYPASS_IF:
VADD.S32 q6, q3, q13
VADD.S32 q7, q4, q14
VSUB.S32 q3, q3, q13
VSUB.S32 q4, q4, q14
VADD.S32 q8, q11, q15
VADD.S32 q9, q12, q5
VSUB.S32 q15, q11, q15
VSUB.S32 q14, q12, q5
VADD.S32 q10, q6, q8
VADD.S32 q11, q7, q9
VADD.S32 q12, q3, q14
VSUB.S32 q13, q4, q15
VSUB.S32 q6, q6, q8
VST2.32 {q10, q11}, [r14], r12
VSUB.S32 q7, q7, q9
VSUB.S32 q8, q3, q14
VST2.32 {q12, q13}, [r14], r12
VADD.S32 q9, q4, q15
VST2.32 {q6, q7}, [r14], r12
VST2.32 {q8, q9}, [r14], r12
SUBS r10, r10, #1
BNE INNER_LOOP_R4
SUB r14, r14, r1, LSL #3
ADD r14, r14, #32
SUBS r7, r7, #1
BNE MIDDLE_LOOP_R4
LSR r4, r4, #2
LSL r5, r5, #2
LSR r6, r6, #2
SUBS r8, r8, #1
BNE OUTER_LOOP_R4
END_LOOPS:
POP {r3}
VPOP {D8 - D15}
LDMFD sp!, {r4-r12, pc}

View file

@ -0,0 +1,265 @@
@/******************************************************************************
@ *
@ * Copyright (C) 2018 The Android Open Source Project
@ *
@ * Licensed under the Apache License, Version 2.0 (the "License");
@ * you may not use this file except in compliance with the License.
@ * You may obtain a copy of the License at:
@ *
@ * http://www.apache.org/licenses/LICENSE-2.0
@ *
@ * Unless required by applicable law or agreed to in writing, software
@ * distributed under the License is distributed on an "AS IS" BASIS,
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ * See the License for the specific language governing permissions and
@ * limitations under the License.
@ *
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
.text
.p2align 2
.global ixheaacd_sbr_qmfanal32_winadds
ixheaacd_sbr_qmfanal32_winadds:
STMFD sp!, {R4-R12, R14}
VPUSH {D8 - D15}
LDR R5, [SP, #108]
LDR R6, [SP, #112]
LDR R7, [SP, #116]
MOV R9, R7, LSL #1
ADD r5, r5, #64
MOV r10, #3
LOOP:
LDRSH r4 , [R6], r9
LDRSH r8 , [R6], r9
LDRSH r11 , [R6], r9
LDRSH r12 , [R6], r9
STRH r4 , [r5 , #-2]!
STRH r8 , [r5 , #-2]!
STRH r11 , [r5 , #-2]!
STRH r12 , [r5 , #-2]!
LDRSH r4 , [R6], r9
LDRSH r8 , [R6], r9
LDRSH r11 , [R6], r9
LDRSH r12 , [R6], r9
STRH r4 , [r5 , #-2]!
STRH r8 , [r5 , #-2]!
STRH r11 , [r5 , #-2]!
STRH r12 , [r5 , #-2]!
SUBS r10, r10, #1
BPL LOOP
LDR R4, [SP, #104]
MOV R5, #8
VLD1.16 D0, [R0]!
MOV R6, #64
MOV R6, R6, LSL #1
VLD2.16 {D1, D2}, [R2]!
MOV R7, #244
MOV R9, R0
ADD R0, R0, #120
MOV R11, R4
VLD1.16 D2, [R0], R6
ADD R11, R11, #128
MOV R10, R2
ADD R2, R2, #240
VMULL.S16 Q15, D0, D1
VLD2.16 {D3, D4}, [R2]!
ADD R2, R2, #240
VLD1.16 D4, [R0], R6
VMLAL.S16 Q15, D2, D3
VLD2.16 {D5, D6}, [R2]!
ADD R2, R2, #240
VLD1.16 D6, [R0], R6
VMLAL.S16 Q15, D4, D5
VLD2.16 {D7, D8}, [R2]!
ADD R2, R2, #240
VLD1.16 D8, [R0], R6
VMLAL.S16 Q15, D6, D7
MOV R0, R9
VLD2.16 {D9, D10}, [R2]!
ADD R2, R2, #240
VLD1.16 D10, [R1]!
VMLAL.S16 Q15, D8, D9
MOV R9, R1
VLD2.16 {D11, D12}, [R3]!
ADD R1, R1, #120
MOV R2, R10
VLD1.16 D12, [R1], R6
MOV R10, R3
ADD R3, R3, #240
VLD2.16 {D13, D14}, [R3]!
ADD R3, R3, #240
VLD2.16 {D15, D16}, [R3]!
VLD1.16 D14, [R1], R6
ADD R3, R3, #240
VLD1.16 D16, [R1], R6
SUB R5, R5, #1
VLD2.16 {D17, D18}, [R3]!
ADD R3, R3, #240
VLD1.16 D18, [R1], R6
MOV R1, R9
VLD2.16 {D19, D20}, [R3]!
ADD R3, R3, #240
MOV R3, R10
LOOP_1:
VLD1.16 D0, [R0]!
MOV R9, R0
VLD2.16 {D1, D2}, [R2]!
ADD R0, R0, #120
MOV R10, R2
VST1.32 {Q15}, [R4]!
ADD R2, R2, #240
VMULL.S16 Q15, D10, D11
VLD1.16 D2, [R0], R6
VMLAL.S16 Q15, D12, D13
VMLAL.S16 Q15, D14, D15
VLD2.16 {D3, D4}, [R2]!
VMLAL.S16 Q15, D16, D17
VMLAL.S16 Q15, D18, D19
VLD1.16 D4, [R0], R6
ADD R2, R2, #240
VST1.32 {Q15}, [R11]!
VMULL.S16 Q15, D0, D1
VLD2.16 {D5, D6}, [R2]!
VMLAL.S16 Q15, D2, D3
ADD R2, R2, #240
VLD1.16 D6, [R0], R6
VMLAL.S16 Q15, D4, D5
VLD2.16 {D7, D8}, [R2]!
ADD R2, R2, #240
VLD1.16 D8, [R0], R6
VMLAL.S16 Q15, D6, D7
MOV R0, R9
VLD2.16 {D9, D10}, [R2]!
ADD R2, R2, #240
VLD1.16 D10, [R1]!
MOV R2, R10
MOV R9, R1
VLD2.16 {D11, D12}, [R3]!
ADD R1, R1, #120
VMLAL.S16 Q15, D8, D9
VLD1.16 D12, [R1], R6
MOV R10, R3
ADD R3, R3, #240
VLD2.16 {D13, D14}, [R3]!
ADD R3, R3, #240
VLD1.16 D14, [R1], R6
VLD2.16 {D15, D16}, [R3]!
ADD R3, R3, #240
VLD1.16 D16, [R1], R6
VLD2.16 {D17, D18}, [R3]!
ADD R3, R3, #240
VLD1.16 D18, [R1], R6
SUBS R5, R5, #1
MOV R1, R9
VLD2.16 {D19, D20}, [R3]!
ADD R3, R3, #240
MOV R3, R10
BGT LOOP_1
VST1.32 {Q15}, [R4]!
VMULL.S16 Q15, D10, D11
VMLAL.S16 Q15, D12, D13
VMLAL.S16 Q15, D14, D15
VMLAL.S16 Q15, D16, D17
VMLAL.S16 Q15, D18, D19
VST1.32 {Q15}, [R11]!
VPOP {D8 - D15}
LDMFD sp!, {R4-R12, R15}

View file

@ -0,0 +1,245 @@
.text
.p2align 2
.global ixheaacd_sbr_qmfanal32_winadds_eld
ixheaacd_sbr_qmfanal32_winadds_eld:
STMFD sp!, {R4-R12, R14}
LDR R5, [SP, #44] @filterStates
LDR R6, [SP, #48] @timeIn
LDR R7, [SP, #52] @stride
MOV R9, R7, LSL #1
ADD r5, r5, #64
MOV r10, #3
LOOP:
LDRSH r4 , [R6], r9
LDRSH r8 , [R6], r9
LDRSH r11 , [R6], r9
LDRSH r12 , [R6], r9
STRH r4 , [r5 , #-2]!
STRH r8 , [r5 , #-2]!
STRH r11 , [r5 , #-2]!
STRH r12 , [r5 , #-2]!
LDRSH r4 , [R6], r9
LDRSH r8 , [R6], r9
LDRSH r11 , [R6], r9
LDRSH r12 , [R6], r9
STRH r4 , [r5 , #-2]!
STRH r8 , [r5 , #-2]!
STRH r11 , [r5 , #-2]!
STRH r12 , [r5 , #-2]!
SUBS r10, r10, #1
BPL LOOP
LDR R4, [SP, #40] @winAdd
MOV R5, #8
VLD1.16 D0, [R0]! @tmpQ1[n + 0] load and incremented R0 by 8
MOV R6, #64
MOV R6, R6, LSL #1 @
VLD1.16 {D1, D2}, [R2]! @ tmpQmf_c1[2*(n + 0)] load and incremented
MOV R7, #244 @ NOT USED further
MOV R9, R0
ADD R0, R0, #120 @ incrementing R0 by 120 + 8 = 128
MOV R11, R4 @ Mov winAdd to R11
VLD1.16 D2, [R0], R6 @ tmpQ1[n + 64] load and incremented by R6
ADD R11, R11, #128 @ increment winAdd by 128
MOV R10, R2 @
ADD R2, R2, #112 @ This should be 240 --> 112
VMULL.S16 Q15, D0, D1
VLD1.16 {D3, D4}, [R2]! @ tmpQmf_c1[2*(n + 64)] load and incremented
ADD R2, R2, #112 @ This should be 112
VLD1.16 D4, [R0], R6 @ tmpQ1[n + 128] load and incremented by R6
VMLAL.S16 Q15, D2, D3
VLD1.16 {D5, D6}, [R2]! @ tmpQmf_c1[2*(n + 128)] load and incremented
SUB R10, R10, #8
ADD R2, R2, #112 @ This should be 112
VLD1.16 D6, [R0], R6 @ tmpQ1[n + 192] load and incremented by R6
VMLAL.S16 Q15, D4, D5
VLD1.16 {D7, D8}, [R2]! @ tmpQmf_c1[2*(n + 192)] load and incremented
ADD R2, R2, #112 @ This should be 112
VLD1.16 D8, [R0], R6 @ tmpQ1[n + 256] load and incremented by R6
VMLAL.S16 Q15, D6, D7
MOV R0, R9
VLD1.16 {D9, D10}, [R2]! @ tmpQmf_c1[2*(n + 256)] load and incremented
ADD R2, R2, #112 @ This should be 112
VLD1.16 D10, [R1]! @ tmpQ2[n + 0] load and incremented
VMLAL.S16 Q15, D8, D9
MOV R9, R1
VLD1.16 {D11, D12}, [R3]! @ tmpQmf_c2[2*(n + 0)] load and incremented
ADD R1, R1, #120 @ incrementing R1 by 120 + 8 = 128
MOV R2, R10 @
VLD1.16 D12, [R1], R6 @ tmpQ2[n + 64] load and incremented by R6
MOV R10, R3
ADD R3, R3, #112 @ This sholud be 112
VLD1.16 {D13, D14}, [R3]! @ tmpQmf_c2[2*(n + 64)] load and incremented
ADD R3, R3, #112 @ This sholud be 112
VLD1.16 {D15, D16}, [R3]! @ tmpQmf_c2[2*(n + 128)] load and incremented
SUB R10, R10, #8
VLD1.16 D14, [R1], R6
ADD R3, R3, #112 @ This should be 112
VLD1.16 D16, [R1], R6
SUB R5, R5, #1
VLD1.16 {D17, D18}, [R3]! @ tmpQmf_c2[2*(n + 192)] load and incremented
ADD R3, R3, #112 @ This should be 112
VLD1.16 D18, [R1], R6
MOV R1, R9
VLD1.16 {D19, D20}, [R3]! @ tmpQmf_c2[2*(n + 256)] load and incremented
ADD R3, R3, #112 @ This should be 112
MOV R3, R10
LOOP_1:
VLD1.16 D0, [R0]!
MOV R9, R0
VLD1.16 {D1, D2}, [R2]!
ADD R0, R0, #120
MOV R10, R2
VST1.32 {Q15}, [R4]!
ADD R2, R2, #112 @ This should be 112
VMULL.S16 Q15, D10, D11
VLD1.16 D2, [R0], R6
VMLAL.S16 Q15, D12, D13
VMLAL.S16 Q15, D14, D15
VLD1.16 {D3, D4}, [R2]!
VMLAL.S16 Q15, D16, D17
VMLAL.S16 Q15, D18, D19
VLD1.16 D4, [R0], R6
ADD R2, R2, #112 @ This should be 112
VST1.32 {Q15}, [R11]!
SUB R10, R10, #8
VMULL.S16 Q15, D0, D1
VLD1.16 {D5, D6}, [R2]!
VMLAL.S16 Q15, D2, D3
ADD R2, R2, #112 @ This should be 112
VLD1.16 D6, [R0], R6
VMLAL.S16 Q15, D4, D5
VLD1.16 {D7, D8}, [R2]!
ADD R2, R2, #112 @ This should be 112
VLD1.16 D8, [R0], R6
VMLAL.S16 Q15, D6, D7
MOV R0, R9
VLD1.16 {D9, D10}, [R2]!
ADD R2, R2, #112 @ This should be 112
VLD1.16 D10, [R1]!
MOV R2, R10
MOV R9, R1
VLD1.16 {D11, D12}, [R3]!
ADD R1, R1, #120
VMLAL.S16 Q15, D8, D9
VLD1.16 D12, [R1], R6
MOV R10, R3
ADD R3, R3, #112 @ This should be 112
VLD1.16 {D13, D14}, [R3]!
ADD R3, R3, #112 @ This should be 112
VLD1.16 D14, [R1], R6
SUB R10, R10, #8
VLD1.16 {D15, D16}, [R3]!
ADD R3, R3, #112 @ This should be 112
VLD1.16 D16, [R1], R6
VLD1.16 {D17, D18}, [R3]!
ADD R3, R3, #112 @ This should be 112
VLD1.16 D18, [R1], R6
SUBS R5, R5, #1
MOV R1, R9
VLD1.16 {D19, D20}, [R3]!
ADD R3, R3, #112 @ This should be 112
MOV R3, R10
BGT LOOP_1
VST1.32 {Q15}, [R4]!
VMULL.S16 Q15, D10, D11
VMLAL.S16 Q15, D12, D13
VMLAL.S16 Q15, D14, D15
VMLAL.S16 Q15, D16, D17
VMLAL.S16 Q15, D18, D19
VST1.32 {Q15}, [R11]!
LDMFD sp!, {R4-R12, R15}

View file

@ -0,0 +1,379 @@
@/******************************************************************************
@ *
@ * Copyright (C) 2018 The Android Open Source Project
@ *
@ * Licensed under the Apache License, Version 2.0 (the "License");
@ * you may not use this file except in compliance with the License.
@ * You may obtain a copy of the License at:
@ *
@ * http://www.apache.org/licenses/LICENSE-2.0
@ *
@ * Unless required by applicable law or agreed to in writing, software
@ * distributed under the License is distributed on an "AS IS" BASIS,
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ * See the License for the specific language governing permissions and
@ * limitations under the License.
@ *
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
.text
.p2align 2
.global ixheaacd_sbr_qmfsyn64_winadd
ixheaacd_sbr_qmfsyn64_winadd:
STMFD sp!, {R4-R12, R14}
VPUSH {D8- D15}
LDR R4, [SP, #104]
LDR R5, [SP, #108]
MOV R7, #0x8000
VLD1.16 D0, [R0]!
MOV R12, R2
VDUP.32 Q15, R7
VLD1.16 D1, [R2]!
VDUP.32 Q11, R4
MOV R10, R0
MOV R11, R2
ADD R0, R0, #504
ADD R2, R2, #248
VNEG.S32 Q14, Q11
VSHL.S32 Q10, Q15, Q14
MOV R6, #64
MOV R6, R6, LSL #1
ADD R12, R12, R6
MOV R7, #128
MOV R9, R7, LSL #1
ADD R1, R1, R9
MOV R6, #16
MOV R7, #128
MOV R9, R7, LSL #1
MOV R7, #256
MOV R8, R7, LSL #1
MOV R5, R5, LSL #1
VLD1.16 D2, [R0], R8
VMOV Q13, Q10
VMLAL.S16 Q13, D0, D1
VLD1.16 D3, [R2], R9
VLD1.16 D4, [R0], R8
VMLAL.S16 Q13, D2, D3
VLD1.16 D5, [R2], R9
VLD1.16 D6, [R0], R8
VMLAL.S16 Q13, D5, D4
VLD1.16 D7, [R2], R9
VLD1.16 D8, [R0], R8
VMLAL.S16 Q13, D7, D6
VLD1.16 D9, [R2], R9
MOV R0, R10
MOV R2, R11
VLD1.16 D10, [R1]!
VMLAL.S16 Q13, D9, D8
MOV R10, R1
VLD1.16 D11, [R12]!
ADD R1, R1, #504
MOV R11, R12
VLD1.16 D12, [R1], R8
ADD R12, R12, #248
VMLAL.S16 Q13, D10, D11
VLD1.16 D13, [R12], R9
VLD1.16 D14, [R1], R8
VMLAL.S16 Q13, D12, D13
VLD1.16 D15, [R12], R9
VLD1.16 D16, [R1], R8
VMLAL.S16 Q13, D15, D14
VLD1.16 D17, [R12], R9
VLD1.16 D18, [R1], R8
VMLAL.S16 Q13, D17, D16
VLD1.16 D19, [R12], R9
VMLAL.S16 Q13, D19, D18
VLD1.16 D0, [R0]!
MOV R12, R11
MOV R1, R10
VLD1.16 D1, [R2]!
MOV R10, R0
VQSHL.S32 Q13, Q13, Q11
ADD R0, R0, #504
MOV R11, R2
VLD1.16 D2, [R0], R8
ADD R2, R2, #248
VSHR.S32 Q14, Q13, #16
VLD1.16 D3, [R2], R9
VUZP.16 D28, D29
VMOV Q13, Q10
VLD1.16 D4, [R0], R8
VLD1.16 D5, [R2], R9
VLD1.16 D6, [R0], R8
VLD1.16 D7, [R2], R9
VLD1.16 D8, [R0], R8
VLD1.16 D9, [R2], R9
MOV R0, R10
MOV R2, R11
VLD1.16 D10, [R1]!
MOV R10, R1
VLD1.16 D11, [R12]!
ADD R1, R1, #504
MOV R11, R12
VLD1.16 D12, [R1], R8
ADD R12, R12, #248
VLD1.16 D13, [R12], R9
VLD1.16 D14, [R1], R8
VLD1.16 D15, [R12], R9
VLD1.16 D16, [R1], R8
VLD1.16 D17, [R12], R9
VLD1.16 D18, [R1], R8
SUB R6, R6, #2
VLD1.16 D19, [R12], R9
MOV R1, R10
MOV R12, R11
LOOP_1:
VMLAL.S16 Q13, D0, D1
VST1.16 D28[0], [R3], R5
VMLAL.S16 Q13, D2, D3
VLD1.16 D0, [R0]!
VMLAL.S16 Q13, D5, D4
VMLAL.S16 Q13, D7, D6
VST1.16 D28[1], [R3], R5
MOV R10, R0
VLD1.16 D1, [R2]!
ADD R0, R0, #504
VMLAL.S16 Q13, D9, D8
VST1.16 D28[2], [R3], R5
VMLAL.S16 Q13, D10, D11
VST1.16 D28[3], [R3], R5
MOV R11, R2
VLD1.16 D2, [R0], R8
ADD R2, R2, #248
VMLAL.S16 Q13, D12, D13
VLD1.16 D3, [R2], R9
VMLAL.S16 Q13, D15, D14
VMLAL.S16 Q13, D17, D16
VLD1.16 D4, [R0], R8
VMLAL.S16 Q13, D19, D18
VLD1.16 D5, [R2], R9
VLD1.16 D6, [R0], R8
VQSHL.S32 Q13, Q13, Q11
VSHR.S32 Q14, Q13, #16
VLD1.16 D7, [R2], R9
VMOV Q13, Q10
VUZP.16 D28, D29
VMLAL.S16 Q13, D0, D1
VMLAL.S16 Q13, D2, D3
VLD1.16 D8, [R0], R8
VMLAL.S16 Q13, D5, D4
VMLAL.S16 Q13, D7, D6
VLD1.16 D9, [R2], R9
VLD1.16 D10, [R1]!
VMLAL.S16 Q13, D9, D8
MOV R2, R11
VLD1.16 D11, [R12]!
MOV R0, R10
MOV R10, R1
ADD R1, R1, #504
MOV R11, R12
VLD1.16 D12, [R1], R8
ADD R12, R12, #248
VLD1.16 D13, [R12], R9
VMLAL.S16 Q13, D10, D11
VLD1.16 D14, [R1], R8
VMLAL.S16 Q13, D12, D13
VLD1.16 D15, [R12], R9
VLD1.16 D16, [R1], R8
VMLAL.S16 Q13, D15, D14
VLD1.16 D17, [R12], R9
VLD1.16 D18, [R1], R8
VMLAL.S16 Q13, D17, D16
VLD1.16 D19, [R12], R9
MOV R1, R10
VMLAL.S16 Q13, D19, D18
VST1.16 D28[0], [R3], R5
MOV R12, R11
VLD1.16 D0, [R0]!
VLD1.16 D1, [R2]!
VQSHL.S32 Q13, Q13, Q11
VST1.16 D28[1], [R3], R5
MOV R10, R0
VST1.16 D28[2], [R3], R5
ADD R0, R0, #504
VST1.16 D28[3], [R3], R5
MOV R11, R2
VSHR.S32 Q14, Q13, #16
VLD1.16 D2, [R0], R8
ADD R2, R2, #248
VLD1.16 D3, [R2], R9
VLD1.16 D4, [R0], R8
VLD1.16 D5, [R2], R9
VLD1.16 D6, [R0], R8
VLD1.16 D7, [R2], R9
VLD1.16 D8, [R0], R8
VLD1.16 D9, [R2], R9
VUZP.16 D28, D29
VMOV Q13, Q10
MOV R0, R10
VLD1.16 D10, [R1]!
MOV R2, R11
MOV R10, R1
VLD1.16 D11, [R12]!
ADD R1, R1, #504
MOV R11, R12
VLD1.16 D12, [R1], R8
ADD R12, R12, #248
VLD1.16 D13, [R12], R9
VLD1.16 D14, [R1], R8
VLD1.16 D15, [R12], R9
VLD1.16 D16, [R1], R8
VLD1.16 D17, [R12], R9
SUBS R6, R6, #2
VLD1.16 D18, [R1], R8
MOV R1, R10
VLD1.16 D19, [R12], R9
MOV R12, R11
BGT LOOP_1
VMLAL.S16 Q13, D0, D1
VST1.16 D28[0], [R3], R5
VMLAL.S16 Q13, D2, D3
VMLAL.S16 Q13, D5, D4
VST1.16 D28[1], [R3], R5
VMLAL.S16 Q13, D7, D6
VMLAL.S16 Q13, D9, D8
VST1.16 D28[2], [R3], R5
VMLAL.S16 Q13, D10, D11
VMLAL.S16 Q13, D12, D13
VST1.16 D28[3], [R3], R5
VMLAL.S16 Q13, D15, D14
VMLAL.S16 Q13, D17, D16
VMLAL.S16 Q13, D19, D18
VQSHL.S32 Q13, Q13, Q11
VSHR.S32 Q14, Q13, #16
VUZP.16 D28, D29
VST1.16 D28[0], [R3], R5
VST1.16 D28[1], [R3], R5
VST1.16 D28[2], [R3], R5
VST1.16 D28[3], [R3], R5
VPOP {D8 - D15}
LDMFD sp!, {R4-R12, R15}

View file

@ -0,0 +1,105 @@
@/******************************************************************************
@ *
@ * Copyright (C) 2018 The Android Open Source Project
@ *
@ * Licensed under the Apache License, Version 2.0 (the "License");
@ * you may not use this file except in compliance with the License.
@ * You may obtain a copy of the License at:
@ *
@ * http://www.apache.org/licenses/LICENSE-2.0
@ *
@ * Unless required by applicable law or agreed to in writing, software
@ * distributed under the License is distributed on an "AS IS" BASIS,
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ * See the License for the specific language governing permissions and
@ * limitations under the License.
@ *
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
.code 32
.eabi_attribute 24, 1 @Tag_ABI_align_needed
.eabi_attribute 25, 1 @Tag_ABI_align_preserved
.text
.p2align 2
.global ixheaacd_shiftrountine
ixheaacd_shiftrountine:
CMP r3, #0
STMFD sp!, {r4-r7, r12}
MOV r12, #0x1f
BGE SROUTINE_L1
RSB r3, r3, #0
CMP r3, r12
MOVGT r3, r12
SUBS r2, r2, #2
@ LDMMIFD sp!, {r4-r7, r12}
LDMFDMI sp!, {r4-r7, r12}
BXMI lr
SROUTINE_L2:
LDR r12, [r0, #0]
LDR r4, [r1, #0]
MOV r12, r12, ASR r3
MOV r4, r4, ASR r3
STR r12, [r0], #4
STR r4, [r1], #4
LDR r12, [r0, #0]
LDR r4, [r1, #0]
MOV r12, r12, ASR r3
MOV r4, r4, ASR r3
SUBS r2, r2, #2
STR r12, [r0], #4
STR r4, [r1], #4
BPL SROUTINE_L2
LDMFD sp!, {r4-r7, r12}
BX lr
SROUTINE_L1:
SUBS r4, r2, #2
RSB r2, r3, #0x1f
@ LDMMIFD sp!, {r4-r7, r12}
LDMFDMI sp!, {r4-r7, r12}
BXMI lr
SROUTINE_L3:
LDR r12, [r0, #0]
LDR r5, [r1, #0]
MOVS r7, r12, ASR r2
CMNLT r7, #1
MOVLT r6, #0x80000000
MVNGT r6, #0x80000000
MOVEQ r6, r12, LSL r3
MOVS r7, r5, ASR r2
CMNLT r7, #1
MOVLT r12, #0x80000000
MVNGT r12, #0x80000000
MOVEQ r12, r5, LSL r3
STR r6, [r0], #4
STR r12, [r1], #4
LDR r12, [r0, #0]
LDR r5, [r1, #0]
MOVS r7, r12, ASR r2
CMNLT r7, #1
MOVLT r6, #0x80000000
MVNGT r6, #0x80000000
MOVEQ r6, r12, LSL r3
MOVS r7, r5, ASR r2
CMNLT r7, #1
MOVLT r12, #0x80000000
MVNGT r12, #0x80000000
MOVEQ r12, r5, LSL r3
SUBS r4, r4, #2
STR r6, [r0], #4
STR r12, [r1], #4
BPL SROUTINE_L3
LDMFD sp!, {r4-r7, r12}
BX lr

View file

@ -0,0 +1,92 @@
.text
.p2align 2
.global ixheaacd_shiftrountine_with_rnd_eld
ixheaacd_shiftrountine_with_rnd_eld:
STMFD sp!, {r4-r12, r14}
MOV r4, #0x1f
ADD r12, r2, r3, LSL #1
MOV r9, #0x8000
SUBS r3, r3, #1
BMI S_WITH_R_L6
S_WITH_R_L5:
LDR r5, [r1, r3, LSL #2] @i2 = qmfImag[j]
LDR r7, [r0, r3, LSL #2] @r2 = qmfReal[j]
LDR r14, [r0], #4 @r1 = *qmfReal
LDR r10, [r1], #4 @i1 = *qmfImag
ADD r6, r5, r7 @*qmfImag++ = add32(i2, r2)
MVN r6, r6 @negate32(add32(i2, r2))
ADD r6, r6 , #1
@SUB r5,r5,r7 @qmfReal[j] = sub32(i2, r2)
SUB r5, r7, r5 @qmfReal[j] = sub32(r2, i2)
ADD r7, r10, r14 @qmfImag[j] = add32(i1, r1)
MVN r7, r7 @negate32(add32(i1, r1))
ADD r7, r7 , #1
@SUB r4,r10,r14 @*qmfReal++ = sub32(i1, r1)
SUB r4, r14, r10 @*qmfReal++ = sub32(r1, i1)
@STR r7,[r1,r3,LSL #2]
@STR r5,[r0,r3,LSL #2]
@STR r6,[r1],#4
@STR r4,[r0],#4
@LDRD r4,[r0],#8 @DEBUG
@LDRD r6,[r1],#8
MOVS r10, r4, ASR #0x16 @Right shift by 22 to check the overflow ( is not AAC_ELD right shifted by 21)
CMNLT r10, #1 @Check r4 is overflow or not
MOVLT r4, #0x80000000 @saturate value if r4 is overflowed
MVNGT r4, #0x80000000
MOVEQ r4, r4, LSL #9 @shift by 9(hardcoded value) if not AAC_ELD left shifted by 10
MOVS r10, r5, ASR #0x16
QADD r4, r4, r9
CMNLT r10, #1
MOV r4, r4, ASR #16
MOVLT r5, #0x80000000
MVNGT r5, #0x80000000
MOVEQ r5, r5, LSL #9
MOV r14, r3, lsl #1
MOVS r10, r6, ASR #0x16
QADD r5, r5, r9
CMNLT r10, #1
MOV r5, r5, ASR #16
MOVLT r6, #0x80000000
@STRH r5,[r2],#2
STRH r5, [r2, r14]
MVNGT r6, #0x80000000
MOVEQ r6, r6, LSL #9
MOVS r10, r7, ASR #0x16
QADD r6, r6, r9
CMNLT r10, #1
MOV r6, r6, ASR #16
MOVLT r7, #0x80000000
MVNGT r7, #0x80000000
MOVEQ r7, r7, LSL #9
QADD r7, r7, r9
STRH r4, [r2], #2
MOV r7, r7, ASR #16
@STRH r7,[r12],#2
STRH r7, [r12, r14]
SUBS r3, r3, #2
STRH r6, [r12], #2
BGE S_WITH_R_L5
S_WITH_R_L6:
LDMFD sp!, {r4-r12, r15}

View file

@ -0,0 +1,111 @@
@/******************************************************************************
@ *
@ * Copyright (C) 2018 The Android Open Source Project
@ *
@ * Licensed under the Apache License, Version 2.0 (the "License");
@ * you may not use this file except in compliance with the License.
@ * You may obtain a copy of the License at:
@ *
@ * http://www.apache.org/licenses/LICENSE-2.0
@ *
@ * Unless required by applicable law or agreed to in writing, software
@ * distributed under the License is distributed on an "AS IS" BASIS,
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ * See the License for the specific language governing permissions and
@ * limitations under the License.
@ *
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
.code 32
.eabi_attribute 24, 1 @Tag_ABI_align_needed
.eabi_attribute 25, 1 @Tag_ABI_align_preserved
.text
.p2align 2
.global ixheaacd_shiftrountine_with_rnd
ixheaacd_shiftrountine_with_rnd:
STMFD sp!, {r4-r12, r14}
MOV r4, #0x1f
ADD r12, r2, r3, LSL #1
MOV r9, #0x8000
SUBS r3, r3, #1
BMI S_WITH_R_L6
S_WITH_R_L5:
LDR r5, [r1, r3, LSL #2]
LDR r7, [r0, r3, LSL #2]
LDR r14, [r0], #4
LDR r10, [r1], #4
ADD r6, r5, r7
SUB r5, r5, r7
ADD r7, r10, r14
SUB r4, r10, r14
MOVS r10, r4, ASR #0x15
CMNLT r10, #1
MOVLT r4, #0x80000000
MVNGT r4, #0x80000000
MOVEQ r4, r4, LSL #10
MOVS r10, r5, ASR #0x15
QADD r4, r4, r9
CMNLT r10, #1
MOV r4, r4, ASR #16
MOVLT r5, #0x80000000
MVNGT r5, #0x80000000
MOVEQ r5, r5, LSL #10
MOV r14, r3, lsl #1
MOVS r10, r6, ASR #0x15
QADD r5, r5, r9
CMNLT r10, #1
MOV r5, r5, ASR #16
MOVLT r6, #0x80000000
STRH r5, [r2, r14]
MVNGT r6, #0x80000000
MOVEQ r6, r6, LSL #10
MOVS r10, r7, ASR #0x15
QADD r6, r6, r9
CMNLT r10, #1
MOV r6, r6, ASR #16
MOVLT r7, #0x80000000
MVNGT r7, #0x80000000
MOVEQ r7, r7, LSL #10
QADD r7, r7, r9
STRH r4, [r2], #2
MOV r7, r7, ASR #16
STRH r7, [r12, r14]
SUBS r3, r3, #2
STRH r6, [r12], #2
BGE S_WITH_R_L5
S_WITH_R_L6:
LDMFD sp!, {r4-r12, r15}

View file

@ -0,0 +1,75 @@
.code 32
.eabi_attribute 24, 1 @Tag_ABI_align_needed
.eabi_attribute 25, 1 @Tag_ABI_align_preserved
.text
.p2align 2
.global ixheaacd_shiftrountine_with_rnd_hq
ixheaacd_shiftrountine_with_rnd_hq:
STMFD sp!, {r4-r12, r14}
ADD r12, r2, r3, LSL #2
MOV r9, #0x8000
SUBS r3, r3, #1
BMI S_WITH_R_L6
S_WITH_R_L5:
LDR r5, [r1, r3, LSL #2]
LDR r7, [r0, r3, LSL #2]
LDR r14, [r0], #4
LDR r10, [r1], #4
ADD r6, r5, r7
SUB r5, r5, r7
ADD r7, r10, r14
SUB r4, r10, r14
MOVS r10, r4, ASR #0x19
CMNLT r10, #1
MOVLT r4, #0x80000000
MVNGT r4, #0x80000000
MOVEQ r4, r4, LSL #6
MOVS r10, r5, ASR #0x19
CMNLT r10, #1
MOVLT r5, #0x80000000
MVNGT r5, #0x80000000
MOVEQ r5, r5, LSL #6
MOV r14, r3, lsl #2
MOVS r10, r6, ASR #0x19
CMNLT r10, #1
MOVLT r6, #0x80000000
STR r5, [r2, r14]
MVNGT r6, #0x80000000
MOVEQ r6, r6, LSL #6
MOVS r10, r7, ASR #0x19
CMNLT r10, #1
MOVLT r7, #0x80000000
MVNGT r7, #0x80000000
MOVEQ r7, r7, LSL #6
STR r4, [r2], #4
STR r7, [r12, r14]
SUBS r3, r3, #2
STR r6, [r12], #4
BGE S_WITH_R_L5
S_WITH_R_L6:
LDMFD sp!, {r4-r12, r15}

View file

@ -0,0 +1,272 @@
@/******************************************************************************
@ *
@ * Copyright (C) 2018 The Android Open Source Project
@ *
@ * Licensed under the Apache License, Version 2.0 (the "License");
@ * you may not use this file except in compliance with the License.
@ * You may obtain a copy of the License at:
@ *
@ * http://www.apache.org/licenses/LICENSE-2.0
@ *
@ * Unless required by applicable law or agreed to in writing, software
@ * distributed under the License is distributed on an "AS IS" BASIS,
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ * See the License for the specific language governing permissions and
@ * limitations under the License.
@ *
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
.text
.p2align 2
.global ixheaacd_tns_ar_filter_armv7
ixheaacd_tns_ar_filter_armv7:
STMFD r13! , {r4 - r12, r14}
SUB sp, sp, #4
LDR r4, [sp, #44]
LDR r6, [sp, #48]
STR r1, [sp]
LDR r12, [sp, #56]
ANDS r5, r4, #3
ADD r12, r12, #4096
BEQ FILTER_LOOP
MOV r8, #0
ADD r14, r3, r4, LSL #1
RSBS r7, r5, #3
BEQ ORDER_LOOPEND
ORDER_LOOP:
STRH r8, [r14, #2]!
SUBS r7, r7, #1
BGT ORDER_LOOP
ORDER_LOOPEND:
STRH r8, [r14, #2]
BIC r4, r4, #3
ADD r4, r4, #4
FILTER_LOOP:
LDR r1, [sp, #52]
CMP r2, #1
MOV r7, r4
BNE NEG_INC
LDR r8 , [r0]
SUBS r7 , r7 , #1
MOV r8, r8, lsl r1
MOV r9, r8, asr r1
MOV r8 , r8 , lsl r6
STR r8 , [r12], #-4
STR r9, [r0], #4
BEQ FILTER_LOOP2
FILTER_LOOP1:
LDR r8 , [r0]
SUB r5 , r4 , r7
MOV r5 , r5 , lsl #1
MOV r11 , #0
ADD r14, r12, r5, lsl #1
INNER_LOOP1:
LDRSH r9 , [r3 , r5]
LDR r10 , [r14], #-4
SUBS r5 , r5 , #2
SMLAWB r11 , r10, r9, r11
BGT INNER_LOOP1
MOV r8, r8, lsl r1
SUB r8 , r8 , r11, lsl #1
MOV r9, r8, asr r1
STR r9 , [r0], #4
SUBS r7 , r7 , #1
MOV r8 , r8 , lsl r6
STR r8 , [r12], #-4
BGT FILTER_LOOP1
FILTER_LOOP2:
LDR r1, [sp]
SUBS r7 , r1 , r4
BLE EXIT
LDR r1, [sp, #52]
CMP r6, #1
BEQ SHIFT_1
OUTER_LOOP2:
LDR r8 , [r0]
MOV r5 , r4 , lsl #1
MOV r11 , #0
LDR r9 , [r3 , r5]
ADD r14 , r12, r5, lsl #1
SUB r5 , r5 , #4
INNER_LOOP2:
LDR r10 , [r14], #-4
LDR r2 , [r14] , #-4
SMLAWB r11, r10 , r9, r11
LDR r9 , [r3 , r5]
SUB r5 , r5 , #4
SMLAWT r11, r2 , r9, r11
LDR r10 , [r14] , #-4
LDR r2 , [r14] , #-4
SMLAWB r11, r10 , r9, r11
LDR r9 , [r3 , r5]
SUBS r5 , r5 , #4
SMLAWT r11, r2 , r9, r11
BGT INNER_LOOP2
MOV r8, r8, lsl r1
SUB r8 , r8 , r11, lsl #1
MOV r9, r8, asr r1
STR r9 , [r0], #4
MOV r2 , r8 , lsl r6
STR r2 , [r12], #-4
SUBS r7 , r7 , #1
BGT OUTER_LOOP2
B EXIT
SHIFT_1:
MOV r6, r3
OUTER_LOOP2_SHIFT_1:
ADD r3, r6, r4 , lsl #1
LDR r9 , [r3 ], #-4
LDR r8 , [r0]
ADD r14 , r12, r4, lsl #2
MOV r5 , r4
MOV r11 , #0
INNER_LOOP2_SHIFT_1:
LDR r10 , [r14] , #-4
LDR r2 , [r14] , #-4
SMLAWB r11 , r10 , r9, r11
LDR r9 , [r3] , #-4
LDR r10 , [r14] , #-4
SMLAWT r11, r2 , r9, r11
LDR r2 , [r14] , #-4
SMLAWB r11, r10 , r9, r11
LDR r9 , [r3 ], #-4
SUBS r5 , r5 , #4
SMLAWT r11, r2 , r9, r11
BGT INNER_LOOP2_SHIFT_1
MOV r8, r8, lsl r1
SUB r8 , r8 , r11, lsl #1
MOV r9, r8, asr r1
STR r9 , [r0], #4
MOV r2 , r8 , lsl #1
STR r2 , [r12], #-4
SUBS r7 , r7 , #1
BGT OUTER_LOOP2_SHIFT_1
B EXIT
NEG_INC:
LDR r8 , [r0]
SUBS r7 , r7 , #1
MOV r8, r8, lsl r1
MOV r9, r8, asr r1
MOV r8 , r8 , lsl r6
STR r8 , [r12], #-4
STR r9, [r0], #-4
BEQ FILTER_LOOP2_NEG
FILTER_LOOP1_NEG:
LDR r8 , [r0]
SUB r5 , r4 , r7
MOV r5 , r5 , lsl #1
MOV r11 , #0
ADD r14, r12, r5, lsl #1
INNER_LOOP1_NEG:
LDRSH r9 , [r3 , r5]
LDR r10 , [r14], #-4
SUBS r5 , r5 , #2
SMLAWB r11 , r10, r9, r11
BGT INNER_LOOP1_NEG
MOV r8, r8, lsl r1
SUB r8 , r8 , r11, lsl #1
MOV r9, r8, asr r1
STR r9 , [r0], #-4
SUBS r7 , r7 , #1
MOV r8 , r8 , lsl r6
STR r8 , [r12], #-4
BGT FILTER_LOOP1_NEG
FILTER_LOOP2_NEG:
LDR r1, [sp]
SUBS r7 , r1 , r4
BLE EXIT
LDR r1, [sp, #52]
OUTER_LOOP2_NEG:
LDR r8 , [r0]
MOV r5 , r4 , lsl #1
MOV r11 , #0
LDR r9 , [r3 , r5]
ADD r14 , r12, r5, lsl #1
SUB r5 , r5 , #4
INNER_LOOP2_NEG:
LDR r10 , [r14], #-4
LDR r2 , [r14] , #-4
SMLAWB r11, r10 , r9, r11
LDR r9 , [r3 , r5]
SUB r5 , r5 , #4
SMLAWT r11, r2 , r9, r11
LDR r10 , [r14] , #-4
LDR r2 , [r14] , #-4
SMLAWB r11, r10 , r9, r11
LDR r9 , [r3 , r5]
SUBS r5 , r5 , #4
SMLAWT r11, r2 , r9, r11
BGT INNER_LOOP2_NEG
MOV r8, r8, lsl r1
SUB r8 , r8 , r11, lsl #1
MOV r9, r8, asr r1
STR r9 , [r0], #-4
MOV r2 , r8 , lsl r6
STR r2 , [r12], #-4
SUBS r7 , r7 , #1
BGT OUTER_LOOP2_NEG
EXIT:
ADD sp, sp , #4
LDMFD r13!, {r4 - r12, r15}

View file

@ -0,0 +1,122 @@
@/******************************************************************************
@ *
@ * Copyright (C) 2018 The Android Open Source Project
@ *
@ * Licensed under the Apache License, Version 2.0 (the "License");
@ * you may not use this file except in compliance with the License.
@ * You may obtain a copy of the License at:
@ *
@ * http://www.apache.org/licenses/LICENSE-2.0
@ *
@ * Unless required by applicable law or agreed to in writing, software
@ * distributed under the License is distributed on an "AS IS" BASIS,
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ * See the License for the specific language governing permissions and
@ * limitations under the License.
@ *
@ *****************************************************************************
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
@*/
.text
.p2align 2
.global ixheaacd_tns_parcor_lpc_convert_armv7
ixheaacd_tns_parcor_lpc_convert_armv7:
STMFD SP!, {R2, R4-R12, R14}
SUB SP, SP, #128
MOV R4, SP
MOV R8, #0
MOV R5, #0x8000
OUTLOOP:
MOV R6, #0
MOV R7, #16
LOOP1:
STR R6, [R4], #4
STR R6, [R4, #60]
SUBS R7, R7, #1
BGT LOOP1
SUB R4, R4, #64
MOV R9, #0x7FFFFFFF
MOV R10, R9, ASR R8
MOV R7, R3
LOOP2:
MOV R11, R10
LDRSH R2, [R4], #2
LDRSH R14, [R0], #2
MOV R12, R3
LOOP2_1:
SMULBB R2, R2, R14
QADD R14, R10, R5
CMP R2, #0x40000000
MOV R14, R14, ASR #16
MOVNE R2, R2, LSL #1
MOVEQ R2, #0x7FFFFFFF
QADD R10, R10, R2
STRH R14, [R4, #62]
MOVS R2, R10
RSBSMI R2, R2, #0
MOVMI R2, #0x7FFFFFFF
CMP R2, #0x7FFFFFFF
MOVEQ R6, #1
SUBS R12, R12, #1
@ LDRGTSH R2, [R4], #2
@ LDRGTSH R14, [R0], #2
LDRSHGT R2, [R4], #2
LDRSHGT R14, [R0], #2
BGT LOOP2_1
LDRSH R2, [R4, #62]
MOV R12, R3
LOOP2_2:
LDRSH R14, [R0, #-2]!
LDRSH R9, [R4, #-2]!
SMULBB R2, R2, R14
MOV R9, R9, LSL #16
CMP R2, #0x40000000
MOVNE R2, R2, LSL #1
MOVEQ R2, #0x7FFFFFFF
QADD R9, R9, R2
LDRSH R2, [R4, #62]
QADD R14, R9, R5
MOVS R9, R9
MOV R14, R14, ASR #16
STRH R14, [R4, #2]
@ RSBMIS R9, R9, #0
RSBSMI R9, R9, #0
MOVMI R9, #0x7FFFFFFF
CMP R9, #0x7FFFFFFF
MOVEQ R6, #1
SUBS R12, R12, #1
BGT LOOP2_2
QADD R11, R11, R5
QADD R2, R10, R5
MOV R11, R11, ASR #16
MOV R2, R2, ASR #16
STRH R11, [R4]
STRH R2, [R1], #2
MOV R10, #0
SUBS R7, R7, #1
BGE LOOP2
SUB R1, R1, R3, LSL #1
SUB R1, R1, #2
SUBS R10, R6, #1
ADDEQ R8, R8, #1
BEQ OUTLOOP
LDR R2, [SP, #128]
ADD SP, SP, #132
STRH R8, [R2]
LDMFD sp!, {r4-r12, r15}

View file

@ -0,0 +1,166 @@
//.include "ihevc_neon_macros.s"
.macro push_v_regs
stp X8, X9, [sp, #-16]!
stp X10, X11, [sp, #-16]!
stp X12, X13, [sp, #-16]!
stp X14, X15, [sp, #-16]!
stp X16, X17, [sp, #-16]!
stp x19, x20, [sp, #-16]!
stp x21, x22, [sp, #-16]!
stp X29, X30, [sp, #-16]!
.endm
.macro pop_v_regs
ldp X29, X30, [sp], #16
ldp x21, x22, [sp], #16
ldp x19, x20, [sp], #16
ldp X16, X17, [sp], #16
ldp X14, X15, [sp], #16
ldp X12, X13, [sp], #16
ldp X10, X11, [sp], #16
ldp X8, X9, [sp], #16
.endm
.text
.p2align 2
.global ixheaacd_scale_factor_process_armv8
ixheaacd_scale_factor_process_armv8:
push_v_regs
MOV x9, x4
MOV x21, x6
MOV x22, x7
CMP x2, #0 // Tbands
BGT lbl17
pop_v_regs
ret
lbl17:
MOV x10, #0
CMP x5, #2
BGT ADD_34
MOV x11, #0x25
B TBANDS_LOOP
ADD_34:
MOV x11, #0x22
// MOV x11, #0x25 // temp=37
TBANDS_LOOP:
LDRSH x5, [x1], #2 // scale_factor = *Scfactor++;
LDRB w4, [x3], #1 //Offset [1]
sxtw x4, w4
CMP x5, #0x18 //if(scale_factor < 24)
BGE SCALE_FACTOR_GE_12 //
CMP x4, #0
BLE OFFSET_ZERO
SCALE_FACTOR_LT_12:
STR x10, [x0], #8
STR x10, [x0], #8
SUBS x4, x4, #4
BGT SCALE_FACTOR_LT_12
B OFFSET_ZERO
SCALE_FACTOR_GE_12:
SUBS x6, x11, x5, ASR #2 // 37-(scale_factor >> 2)
AND x5, x5, #3 // scale_factor & 0x0003
//ADD x5,x9,x5,LSL #1 ; scale_table_ptr[(scale_factor & 0x0003)];
LDR w5, [x9, x5, LSL #2] // scale_short = scale_table_ptr[(scale_factor & 0x0003)];
sxtw x5, w5
AND w17, w5, #0x0000FFFF
sxth w17, w17 //16-bit value stored as 32-bit,so SMULWB can still be used
BLE SHIFT_LE_ZERO // if shift less than or equal to zero
SUB x14, x6, #1 //dont do that extra LSL #1 in SMULWB
SHIFT_POSITIVE: //loop over sfbWidth a multiple of 4
LDP w6, w7 , [x0, #0] // temp1 = *x_invquant
LDP w19, w20, [x0, #8]
//SMULWB x6, x6, x5 // buffex1 = mult32x16in32(temp1, scale_short);
SMULL x6, w6, w17
SMULL x7, w7, w17
SMULL x19, w19, w17
SMULL x20, w20, w17
ASR x6, x6, #16
ASR x7, x7 , #16
ASR x19, x19 , #16
ASR x20, x20 , #16
ASR x6, x6, x14 // buffex1 = shx32(buffex1, shift);
ASR x7, x7, x14
ASR x19, x19, x14
ASR x20, x20, x14
stp w6, w7, [x0], #8
stp w19, w20, [x0], #8
SUBS x4, x4, #4
BGT SHIFT_POSITIVE
B OFFSET_ZERO
SHIFT_LE_ZERO:
//RSBS x14, x6, #0 //-shift
NEGS x14, x6
BGT SHIFT_NEGTIVE1
SHIFT_ZERO: //loop over sfbWidth a multiple of 4
LDP w6, w7, [x0, #0] // temp1 = *x_invquant;
//SMULWB x6, x6, x5 // buffex1 = mult32x16in32(temp1, scale_short);
SMULL x6, w6, w17
SMULL x7, w7, w17
ASR x6, x6, #16
ASR x7, x7, #16
LSL x6, x6, #1
LSL x7, x7, #1
STP w6, w7, [x0], #8 // *x_invquant++ = buffex1;
SUBS x4, x4, #2
BGT SHIFT_ZERO
B OFFSET_ZERO
SHIFT_NEGTIVE1:
SUB x14, x14, #1
SHIFT_NEGTIVE: //;loop over sfbWidth a multiple of 4
LDP w6, w7, [x0, #0]
LSL w6, w6, w14 // buffex1 = shl32(buffex1, shift-1);
LSL w7, w7, w14 // buffex1 = shl32(buffex1, shift-1);
//SMULWB x6, x6, x5 // buffex1 = mult32x16in32(temp1, scale_short);
SMULL x6, w6, w17
SMULL x7, w7, w17
ASR x6, x6, #16
ASR x7, x7, #16
LSL x6, x6, #2 // shl for fixmul_32x16b and shl32(buffer,1)
LSL x7, x7, #2 // shl for fixmul_32x16b and shl32(buffer,1)
STP w6, w7, [x0], #8 // *x_invquant++ = buffex1;
SUBS x4, x4, #2
BGT SHIFT_NEGTIVE
OFFSET_ZERO:
SUBS x2, x2, #1
BGT TBANDS_LOOP
pop_v_regs
ret

View file

@ -0,0 +1,99 @@
/******************************************************************************
* *
* Copyright (C) 2018 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
#ifndef IXHEAACD_BASIC_OP_H
#define IXHEAACD_BASIC_OP_H
#define add_d(a, b) ((a) + (b))
#define sub_d(a, b) ((a) - (b))
#define ixheaacd_cbrt_calc(a) cbrt(1.0f / a)
/*
static PLATFORM_INLINE WORD32 mult32x16in32_dual(WORD32 a , WORD16 b)
{
WORD32 result;
WORD32 msb;
UWORD32 lsb;
lsb=a&0x0000FFFF ;
msb=(a>>16);
result=(UWORD32)(lsb*(UWORD32)b);
result= msb*(WORD32)b+(result>>16);
return(result);
}
*/
static PLATFORM_INLINE WORD32 msu32x16in32_dual(WORD32 a, WORD16 c1, WORD32 b,
WORD16 c2) {
WORD32 result;
WORD32 temp_result;
UWORD32 a_lsb;
WORD32 a_msb;
UWORD32 b_lsb;
WORD32 b_msb;
a_lsb = a & 65535;
a_msb = a >> 16;
b_lsb = b & 65535;
b_msb = b >> 16;
temp_result = ((UWORD32)a_lsb * (UWORD32)c1);
temp_result = temp_result - (WORD32)((UWORD32)b_lsb * (UWORD32)c2);
temp_result = ((WORD32)temp_result) >> 16;
result = temp_result + ((a_msb * (WORD32)c1) - (b_msb * (WORD32)c2));
return (result);
}
static PLATFORM_INLINE WORD32 mac32x16in32_dual(WORD32 a, WORD16 c1, WORD32 b,
WORD16 c2) {
WORD32 result;
WORD32 temp_result;
UWORD32 a_lsb;
WORD32 a_msb;
UWORD32 b_lsb;
WORD32 b_msb;
a_lsb = a & 65535;
a_msb = a >> 16;
b_lsb = b & 65535;
b_msb = b >> 16;
temp_result = (UWORD32)a_lsb * (UWORD32)c1;
temp_result = temp_result + (UWORD32)b_lsb * (UWORD32)c2;
temp_result = ((UWORD32)temp_result) >> 16;
result = temp_result + ((a_msb * (WORD32)c1)) + ((b_msb * (WORD32)c2));
return (result);
}
/*
static PLATFORM_INLINE WORD64 mac32x32in64_dual(WORD32 a, WORD32 b,WORD64 c,
WORD32 d)
{
WORD64 result;
WORD64 temp_result;
temp_result = (WORD64)a * (WORD64)b;
result = c + (temp_result);
return (result);
}
*/
#endif

View file

@ -0,0 +1,397 @@
/******************************************************************************
* *
* Copyright (C) 2018 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
#ifndef IXHEAACD_BASIC_OPS16_H
#define IXHEAACD_BASIC_OPS16_H
// limits the 32 bit input to the range of a 16 bit word
static PLATFORM_INLINE WORD16 ixheaacd_sat16(WORD32 op1) {
WORD16 var_out;
if (op1 > 0X00007fffL) {
var_out = MAX_16;
} else if (op1 < (WORD32)0xffff8000L) {
var_out = (WORD16)(-32768);
} else {
var_out = (WORD16)(op1);
}
return (var_out);
}
// add 2 16 bit variables and returns 16 bit result
static PLATFORM_INLINE WORD16 ixheaacd_add16(WORD16 op1, WORD16 op2) {
WORD16 var_out;
var_out = ((WORD16)(op1 + op2));
return (var_out);
}
// add 2 16 bit variables and returns 16 bit result with saturation
static PLATFORM_INLINE WORD16 ixheaacd_add16_sat(WORD16 op1, WORD16 op2) {
WORD16 var_out;
WORD32 sum;
sum = (WORD32)op1 + (WORD32)op2;
var_out = ixheaacd_sat16(sum);
return (var_out);
}
// subtract 2 16 bit variables and returns 16 bit result
static PLATFORM_INLINE WORD16 ixheaacd_sub16(WORD16 op1, WORD16 op2) {
WORD16 var_out;
var_out = ((WORD16)(op1 - op2));
return (var_out);
}
// subtract 2 16 bit variables and returns 16 bit result with saturation
static PLATFORM_INLINE WORD16 ixheaacd_sub16_sat(WORD16 op1, WORD16 op2) {
WORD16 var_out;
WORD32 diff;
diff = (WORD32)op1 - op2;
var_out = ixheaacd_sat16(diff);
return (var_out);
}
// multiply 2 16 bit variables and return 31 to 16 bits
static PLATFORM_INLINE WORD16 ixheaacd_mult16(WORD16 op1, WORD16 op2) {
WORD16 var_out;
var_out = ((WORD16)(((WORD32)op1 * (WORD32)op2) >> 16));
return (var_out);
}
// multiply 2 16 bit variables and return 30 to 15 bits
static PLATFORM_INLINE WORD16 ixheaacd_mult16_shl(WORD16 op1, WORD16 op2) {
WORD16 var_out;
var_out = ((WORD16)(((WORD32)op1 * (WORD32)op2) >> 15));
return (var_out);
}
// multiply 2 16 bit variables and return 30 to 15 bits with saturation
static PLATFORM_INLINE WORD16 ixheaacd_mult16_shl_sat(WORD16 op1, WORD16 op2) {
WORD16 var_out;
WORD32 temp;
temp = ((WORD32)(((WORD32)op1 * (WORD32)op2) >> 15));
var_out = ixheaacd_sat16(temp);
return (var_out);
}
// shifts left a 16 bit variable by the shift value and returns a 16 bit result
static PLATFORM_INLINE WORD16 ixheaacd_shl16(WORD16 op1, WORD16 shift) {
WORD16 var_out;
var_out = (WORD16)(op1 << shift);
return (var_out);
}
// shifts left a 16 bit variable by the shift value and returns a 16 bit value
// with saturation
static PLATFORM_INLINE WORD16 ixheaacd_shl16_sat(WORD16 op1, WORD16 shift) {
WORD16 var_out;
WORD32 temp;
if (shift > 15) {
shift = 15;
}
temp = (WORD32)(op1 << shift);
var_out = ixheaacd_sat16(temp);
return (var_out);
}
// shifts right a 16 bit variable by the shift value and returns a 16 bit value
static PLATFORM_INLINE WORD16 ixheaacd_shr16(WORD16 op1, WORD16 shift) {
WORD16 var_out;
var_out = ((WORD16)(op1 >> shift));
return (var_out);
}
// shifts left a 16 bit variable by the shift value if the
// value is positive else shifts right and returns a 16 bit result
static PLATFORM_INLINE WORD16 shl16_dir(WORD16 op1, WORD16 shift) {
WORD16 var_out;
if (shift > 0) {
var_out = ixheaacd_shl16(op1, shift);
} else {
var_out = ixheaacd_shr16(op1, (WORD16)(-shift));
}
return (var_out);
}
// shifts left a 16 bit variable by the shift value if the
// value is negative else shifts right and returns a 16 bit result
static PLATFORM_INLINE WORD16 shr16_dir(WORD16 op1, WORD16 shift) {
WORD16 var_out;
if (shift < 0) {
var_out = ixheaacd_shl16(op1, (WORD16)(-shift));
} else {
var_out = ixheaacd_shr16(op1, shift);
}
return (var_out);
}
// shifts left a 16 bit variable by the shift value
// if the value is positive else shifts right and returns a 16 bit with
// saturation
static PLATFORM_INLINE WORD16 shl16_dir_sat(WORD16 op1, WORD16 shift) {
WORD16 var_out;
if (shift > 0) {
var_out = ixheaacd_shl16_sat(op1, shift);
} else {
var_out = ixheaacd_shr16(op1, (WORD16)(-shift));
}
return (var_out);
}
// shifts left a 16 bit variable by the shift value if the
// value is negative else shifts right and returns a 16 bit
// result with saturation
static PLATFORM_INLINE WORD16 ixheaacd_shr16_dir_sat(WORD16 op1, WORD16 shift) {
WORD16 var_out;
if (shift < 0) {
var_out = ixheaacd_shl16_sat(op1, (WORD16)(-shift));
} else {
var_out = ixheaacd_shr16(op1, shift);
}
return (var_out);
}
// finds a value which normalizes the input to 16 bit
// return zero for value zero
static PLATFORM_INLINE WORD16 norm16(WORD16 op1) {
WORD16 var_out;
if (0 == op1) {
var_out = 0;
} else {
if ((WORD16)0xffff == op1) {
var_out = 15;
} else {
if (op1 < 0) {
op1 = (WORD16)(~op1);
}
for (var_out = 0; op1 < 0x4000; var_out++) {
op1 <<= 1;
}
}
}
return (var_out);
}
// finds no. of significant bits excluding sign bit
// value 15 returned for zero
static PLATFORM_INLINE WORD16 bin_expo16(WORD16 op1) {
WORD16 var_out;
var_out = ((WORD16)(15 - norm16(op1)));
return (var_out);
}
// returns a 16 bit absolute value of a given signed 16 bit value
static PLATFORM_INLINE WORD16 ixheaacd_abs16(WORD16 op1) {
WORD16 var_out;
if (op1 < 0) {
var_out = (WORD16)(-op1);
} else {
var_out = op1;
}
return (var_out);
}
// returns a 16 bit absolute value of a given signed 16 bit value with
// saturation
static PLATFORM_INLINE WORD16 ixheaacd_abs16_sat(WORD16 op1) {
WORD16 var_out;
if (-32768 == op1) {
var_out = MAX_16;
} else {
if (op1 < 0) {
var_out = (WORD16)(-op1);
} else {
var_out = op1;
}
}
return (var_out);
}
// returns a 16 bit negative value of a given signed 16 bit value.
static PLATFORM_INLINE WORD16 ixheaacd_negate16(WORD16 op1) {
WORD16 var_out;
if (-32768 == op1) {
var_out = MAX_16;
} else {
var_out = (WORD16)(-op1);
}
return (var_out);
}
// returns the minima of 2 16 bit variables
static PLATFORM_INLINE WORD16 ixheaacd_min16(WORD16 op1, WORD16 op2) {
WORD16 var_out;
var_out = op1 < op2 ? op1 : op2;
return (var_out);
}
// returns the maxima of 2 16 bit variables
static PLATFORM_INLINE WORD16 ixheaacd_max16(WORD16 op1, WORD16 op2) {
WORD16 var_out;
var_out = op1 > op2 ? op1 : op2;
return (var_out);
}
/*****************************************************************************/
/* */
/* function name : div16 */
/* */
/* description : divides 2 16 bit variables and returns the quotient */
/* the q-format of the result is modified */
/* ( op1/op2 to 14 bits precision) */
/* */
/* inputs : WORD16 op1, WORD16 op2, WORD16 *q_format */
/* */
/* globals : none */
/* */
/* processing : non-restoration type algorithm(shift & substract) */
/* */
/* outputs : WORD16 *q_format */
/* */
/* returns : WORD16 var_out */
/* */
/* issues : none */
/* */
/* revision history : */
/* */
/* DD MM YYYY author changes */
/* 11 11 2003 preethi modified(bug fixes) */
/* 15 11 2004 tejaswi/vishal modified(bug fixes/cleanup) */
/* */
/*****************************************************************************/
// divides 2 16 bit variables and returns the quotient
static PLATFORM_INLINE WORD16 div16(WORD16 op1, WORD16 op2, WORD16 *q_format) {
WORD32 quotient;
UWORD16 mantissa_nr, mantissa_dr;
WORD16 sign = 0;
LOOPIDX i;
WORD16 q_nr, q_dr;
mantissa_nr = op1;
mantissa_dr = op2;
quotient = 0;
if (op1 < 0 && op2 != 0) {
op1 = -op1;
sign = (WORD16)(sign ^ -1);
}
if (op2 < 0) {
op2 = -op2;
sign = (WORD16)(sign ^ -1);
}
if (op2 == 0) {
*q_format = 0;
return (op1);
}
quotient = 0;
q_nr = norm16(op1);
mantissa_nr = (UWORD16)op1 << (q_nr);
q_dr = norm16(op2);
mantissa_dr = (UWORD16)op2 << (q_dr);
*q_format = (WORD16)(14 + q_nr - q_dr);
for (i = 0; i < 15; i++) {
quotient = quotient << 1;
if (mantissa_nr >= mantissa_dr) {
mantissa_nr = mantissa_nr - mantissa_dr;
quotient += 1;
}
mantissa_nr = (UWORD32)mantissa_nr << 1;
}
if (sign < 0) {
quotient = -quotient;
}
return (WORD16)quotient;
}
// multiply 2 16 bit variables, add 31 to 16 bits to acc
static PLATFORM_INLINE WORD16 mac16(WORD16 c, WORD16 op1, WORD16 op2) {
WORD16 var_out;
var_out = ixheaacd_mult16(op1, op2);
var_out = ixheaacd_add16(c, var_out);
return (var_out);
}
// multiply 2 16 bit variables, add 31 to 16 bits to acc with saturation
static PLATFORM_INLINE WORD16 mac16_sat(WORD16 c, WORD16 op1, WORD16 op2) {
WORD16 var_out;
var_out = ixheaacd_mult16(op1, op2);
var_out = ixheaacd_add16_sat(c, var_out);
return (var_out);
}
// multiply 2 16 bit variables, add 30 to 15 bits to acc
static PLATFORM_INLINE WORD16 mac16_shl(WORD16 c, WORD16 op1, WORD16 op2) {
WORD16 var_out;
var_out = ixheaacd_mult16_shl(op1, op2);
var_out = ixheaacd_add16(c, var_out);
return (var_out);
}
// multiply 2 16 bit variables, add 30 to 15 bits to acc with saturation
static PLATFORM_INLINE WORD16 mac16_shl_sat(WORD16 c, WORD16 op1, WORD16 op2) {
WORD16 var_out;
WORD32 temp;
temp = ((WORD32)op1 * (WORD32)op2) >> 15;
temp += c;
var_out = ixheaacd_sat16(temp);
return (var_out);
}
// rounds a 32 bit variable to a 16 bit variable with saturation
static PLATFORM_INLINE WORD16 ixheaacd_round16(WORD32 op1) {
WORD16 var_out;
var_out = (WORD16)(ixheaacd_add32_sat(op1, 0x8000) >> 16);
return (var_out);
}
#endif

View file

@ -0,0 +1,598 @@
/******************************************************************************
* *
* Copyright (C) 2018 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
#ifndef IXHEAACD_BASIC_OPS32_H
#define IXHEAACD_BASIC_OPS32_H
#if 0
//returns the minima of 2 32 bit variables
static PLATFORM_INLINE WORD32 ixheaacd_min32(WORD32 a, WORD32 b)
{
WORD32 min_val;
asm (
"CMP %w[a], %w[b]\n\t"
"CSEL %w[min_val], %w[b], %w[a], GT\n"
: [min_val] "=r" (min_val), [a] "+r" (a)
: [b] "r" (b)
: "cc"
);
return (min_val);
}
//returns the maxima of 2 32 bit variables
static PLATFORM_INLINE WORD32 ixheaacd_max32(WORD32 a, WORD32 b)
{
WORD32 max_val;
asm (
"CMP %w[a], %w[b]\n"
"CSEL %w[max_val], %w[b], %w[a], LT\n"
: [max_val] "=r" (max_val), [a] "+r" (a)
: [b] "r" (b)
: "cc"
);
return (max_val);
}
#else
static PLATFORM_INLINE WORD32 ixheaacd_min32(WORD32 a, WORD32 b) {
WORD32 min_val;
min_val = (a < b) ? a : b;
return min_val;
}
// returns the maxima of 2 32 bit variables
static PLATFORM_INLINE WORD32 ixheaacd_max32(WORD32 a, WORD32 b) {
WORD32 max_val;
max_val = (a > b) ? a : b;
return max_val;
}
#endif
// shifts a 32-bit value left by specificed bits
static PLATFORM_INLINE WORD32 ixheaacd_shl32(WORD32 a, WORD b) {
WORD32 out_val;
b = ((UWORD32)(b << 24) >> 24); /* Mod 8 */
if (b > 31)
out_val = 0;
else
out_val = (WORD32)a << b;
return out_val;
}
// shifts a 32-bit value right by specificed bits
static PLATFORM_INLINE WORD32 ixheaacd_shr32(WORD32 a, WORD b) {
WORD32 out_val;
b = ((UWORD32)(b << 24) >> 24); /* Mod 8 */
if (b >= 31) {
if (a < 0)
out_val = -1;
else
out_val = 0;
} else {
out_val = (WORD32)a >> b;
}
return out_val;
}
// shifts a 32-bit value left by specificed bits and saturates it to 32 bits
static PLATFORM_INLINE WORD32 ixheaacd_shl32_sat(WORD32 a, WORD b) {
WORD32 out_val = a;
for (; b > 0; b--) {
if (a > (WORD32)0X3fffffffL) {
out_val = MAX_32;
break;
} else if (a < (WORD32)0xc0000000L) {
out_val = MIN_32;
break;
}
a = ixheaacd_shl32(a, 1);
out_val = a;
}
return (out_val);
}
// shifts a 32-bit value left by specificed bits, shifts
// it right if specified no. of bits is negative
static PLATFORM_INLINE WORD32 ixheaacd_shl32_dir(WORD32 a, WORD b) {
WORD32 out_val;
if (b < 0) {
out_val = ixheaacd_shr32(a, -b);
} else {
out_val = ixheaacd_shl32(a, b);
}
return out_val;
}
// shifts a 32-bit value left by specificed bits with sat,
// shifts it right if specified no. of bits is negative
static PLATFORM_INLINE WORD32 ixheaacd_shl32_dir_sat(WORD32 a, WORD b) {
WORD32 out_val;
if (b < 0) {
out_val = ixheaacd_shr32(a, -b);
} else {
out_val = ixheaacd_shl32_sat(a, b);
}
return out_val;
}
// shifts a 32-bit value right by specificed bits, shifts
// it left if specified no. of bits is negative
static PLATFORM_INLINE WORD32 ixheaacd_shr32_dir(WORD32 a, WORD b) {
WORD32 out_val;
if (b < 0) {
out_val = ixheaacd_shl32(a, -b);
} else {
out_val = ixheaacd_shr32(a, b);
}
return out_val;
}
// shifts a 32-bit value right by specificed bits, shifts
// it left with sat if specified no. of bits is negative
static PLATFORM_INLINE WORD32 shr32_dir_sat(WORD32 a, WORD b) {
WORD32 out_val;
if (b < 0) {
out_val = ixheaacd_shl32_sat(a, -b);
} else {
out_val = ixheaacd_shr32(a, b);
}
return out_val;
}
// multiplies two 16 bit numbers and returns their 32-bit result
static PLATFORM_INLINE WORD32 ixheaacd_mult16x16in32(WORD16 a, WORD16 b) {
WORD32 product;
product = (WORD32)a * (WORD32)b;
return product;
}
// multiplies two 32 bit numbers considering their last
// 16 bits and returns their 32-bit result
static PLATFORM_INLINE WORD32 mult16x16in32_32(WORD32 a, WORD32 b) {
WORD32 product;
asm("AND %w[a], %w[a], #0x0000FFFF\n"
"SXTH %w[a], %w[a]\n"
"AND %w[b], %w[b], #0x0000FFFF\n"
"SXTH %w[b], %w[b]\n"
"MUL %w[product], %w[a], %w[b]\n"
: [product] "=r"(product)
: [b] "r"(b), [a] "r"(a));
return product;
}
// multiplies two 16 bit numbers and returns their 32-bit
// result after removing 1 redundant sign bit
static PLATFORM_INLINE WORD32 ixheaacd_mult16x16in32_shl(WORD16 a, WORD16 b) {
WORD32 product;
product = ixheaacd_shl32(ixheaacd_mult16x16in32(a, b), 1);
return product;
}
// multiplies two 16 bit numbers and returns their 32-bit
// result after removing 1 redundant sign bit with saturation
static PLATFORM_INLINE WORD32 ixheaacd_mult16x16in32_shl_sat(WORD16 a,
WORD16 b) {
WORD32 product;
product = (WORD32)a * (WORD32)b;
if (product != (WORD32)0x40000000L) {
product = ixheaacd_shl32(product, 1);
} else {
product = MAX_32;
}
return product;
}
// adds 2 32 bit variables
static PLATFORM_INLINE WORD32 ixheaacd_add32(WORD32 a, WORD32 b) {
WORD32 sum;
sum = (WORD32)a + (WORD32)b;
return sum;
}
// subtract 2 32 bit variables
static PLATFORM_INLINE WORD32 ixheaacd_sub32(WORD32 a, WORD32 b) {
WORD32 diff;
diff = (WORD32)a - (WORD32)b;
return diff;
}
// adds 2 32 bit variables with saturation
static PLATFORM_INLINE WORD32 ixheaacd_add32_sat(WORD32 a, WORD32 b) {
WORD32 sum;
sum = ixheaacd_add32(a, b);
if ((((WORD32)a ^ (WORD32)b) & (WORD32)MIN_32) == 0) {
if (((WORD32)sum ^ (WORD32)a) & (WORD32)MIN_32) {
sum = (a < 0) ? MIN_32 : MAX_32;
}
}
return sum;
}
// subtract 2 32 bit variables
static PLATFORM_INLINE WORD32 ixheaacd_sub32_sat(WORD32 a, WORD32 b) {
WORD32 diff;
diff = ixheaacd_sub32(a, b);
if ((((WORD32)a ^ (WORD32)b) & (WORD32)MIN_32) != 0) {
if (((WORD32)diff ^ (WORD32)a) & (WORD32)MIN_32) {
diff = (a < 0L) ? MIN_32 : MAX_32;
}
}
return (diff);
}
// returns number of redundant sign bits in a 32-bit value.
// return zero for a value of zero
static PLATFORM_INLINE WORD ixheaacd_norm32(WORD32 a) {
#if 1
WORD norm_val;
if (a == 0) {
norm_val = 31;
} else {
if (a == (WORD32)0xffffffffL) {
norm_val = 31;
} else {
if (a < 0) {
a = ~a;
}
for (norm_val = 0; a < (WORD32)0x40000000L; norm_val++) {
a <<= 1;
}
}
}
#else
WORD32 norm_val, temp;
asm("ASR %w[temp], %w[a], #31\n"
"EOR %w[norm_val], %w[a], %w[temp]\n"
"CLZ %w[norm_val], %w[norm_val]\n"
"SUB %w[norm_val], %w[norm_val], #1\n"
: [norm_val] "=r"(norm_val), [temp] "+r"(temp)
: [a] "r"(a)
: "cc");
#endif
return norm_val;
}
static PLATFORM_INLINE WORD ixheaacd_pnorm32(WORD32 a) {
WORD32 norm_val;
asm("CLZ %w[norm_val], %w[a]\n"
"SUB %w[norm_val], %w[norm_val], #1\n"
: [norm_val] "=r"(norm_val)
: [a] "r"(a));
return norm_val;
}
// returns the position of the most significant bit for negative numbers.
// ignores leading zeros to determine the position of most significant bit.
static PLATFORM_INLINE WORD bin_expo32(WORD32 a) {
WORD bin_expo_val;
bin_expo_val = 31 - ixheaacd_norm32(a);
return bin_expo_val;
}
// returns the absolute value of 32-bit number
static PLATFORM_INLINE WORD32 ixheaacd_abs32(WORD32 a) {
WORD32 abs_val;
asm("CMP %w[a], #0\n"
"NEG %w[abs_val], %w[a]\n"
"CSEL %w[abs_val], %w[abs_val], %w[a], LT\n"
: [abs_val] "=r"(abs_val), [a] "+r"(a)
:
: "cc");
return (abs_val);
}
// returns the absolute value of 32-bit number
static PLATFORM_INLINE WORD32 ixheaacd_abs32_nrm(WORD32 a) {
WORD32 abs_val, temp;
asm("ASR %w[temp], %w[a], #31\n"
"EOR %w[abs_val], %w[a], %w[temp]\n"
: [abs_val] "=r"(abs_val), [temp] "+r"(temp)
: [a] "r"(a)
: "cc");
return abs_val;
}
#if 0
//returns the absolute value of 32-bit number with saturation
static PLATFORM_INLINE WORD32 ixheaacd_abs32_sat(WORD32 a)
{
WORD32 abs_val,temp;
asm (
"ADDS %w[abs_val], %w[a], #0\n"
"NEG %w[temp], %w[abs_val]\n"
"CSEL %w[abs_val], %w[temp], %w[a], MI\n"
"CMP %w[abs_val], #0\n"
"MOV %w[temp], #2147483647\n"
"CSEL %w[abs_val], %w[temp], %w[abs_val], LT\n"
: [abs_val] "=r" (abs_val), [temp] "+r" (temp)
: [a] "r" (a)
: "cc"
);
return abs_val;
}
//returns the negated value of 32-bit number
static PLATFORM_INLINE WORD32 ixheaacd_negate32(WORD32 a)
{
WORD32 neg_val;
asm (
"NEG %w[neg_val], %w[a]\n"
: [neg_val] "=r" (neg_val)
: [a] "r" (a)
);
return neg_val;
}
//returns the negated value of 32-bit number with saturation
static PLATFORM_INLINE WORD32 ixheaacd_negate32_sat(WORD32 a)
{
WORD32 neg_val,temp;
asm (
"NEGS %w[neg_val], %w[a]\n"
"MOV %w[temp], #0x7FFFFFFF\n"
"CSEL %w[neg_val], %w[temp], %w[neg_val], VS\n"
: [neg_val] "=r" (neg_val), [temp] "+r" (temp)
: [a] "r" (a)
:"cc"
);
return neg_val;
}
#else
static PLATFORM_INLINE WORD32 ixheaacd_abs32_sat(WORD32 a) {
WORD32 abs_val;
abs_val = a;
if (a == MIN_32) {
abs_val = MAX_32;
} else if (a < 0) {
abs_val = -a;
}
return abs_val;
}
static PLATFORM_INLINE WORD32 ixheaacd_negate32(WORD32 a) {
WORD32 neg_val;
neg_val = -a;
return neg_val;
}
static PLATFORM_INLINE WORD32 ixheaacd_negate32_sat(WORD32 a) {
WORD32 neg_val;
neg_val = -a;
if (a == MIN_32) {
neg_val = MAX_32;
}
return neg_val;
}
#endif
// divides 2 32 bit variables and returns the quotient
static PLATFORM_INLINE WORD32 div32(WORD32 a, WORD32 b, WORD *q_format) {
WORD32 quotient;
UWORD32 mantissa_nr, mantissa_dr;
WORD16 sign = 0;
LOOPINDEX i;
WORD q_nr, q_dr;
mantissa_nr = a;
mantissa_dr = b;
quotient = 0;
if ((a < 0) && (0 != b)) {
a = -a;
sign = (WORD16)(sign ^ -1);
}
if (b < 0) {
b = -b;
sign = (WORD16)(sign ^ -1);
}
if (0 == b) {
*q_format = 0;
return (a);
}
quotient = 0;
q_nr = ixheaacd_norm32(a);
mantissa_nr = (UWORD32)a << (q_nr);
q_dr = ixheaacd_norm32(b);
mantissa_dr = (UWORD32)b << (q_dr);
*q_format = (WORD)(30 + q_nr - q_dr);
for (i = 0; i < 31; i++) {
quotient = quotient << 1;
if (mantissa_nr >= mantissa_dr) {
mantissa_nr = mantissa_nr - mantissa_dr;
quotient += 1;
}
mantissa_nr = (UWORD32)mantissa_nr << 1;
}
if (sign < 0) {
quotient = -quotient;
}
return quotient;
}
// multiplies two 16 bit numbers and accumulates their result in a 32 bit
// variable
static PLATFORM_INLINE WORD32 ixheaacd_mac16x16in32(WORD32 a, WORD16 b,
WORD16 c) {
WORD32 acc;
acc = ixheaacd_mult16x16in32(b, c);
acc = ixheaacd_add32(a, acc);
return acc;
}
// multiplies lower 16 bit of one data with upper 16 bit of
// other and accumulates their result in a 32 bit variable
static PLATFORM_INLINE WORD32 mac16x16hin32(WORD32 a, WORD32 b, WORD32 c) {
WORD32 acc;
acc = ixheaacd_mult16x16in32((WORD16)b, (WORD16)(c >> 16));
acc = ixheaacd_add32(a, acc);
return acc;
}
// multiplies two 16 bit numbers and accumulates their result in a 32 bit
// variable
static PLATFORM_INLINE WORD32 ixheaacd_mac16x16in32_shl(WORD32 a, WORD16 b,
WORD16 c) {
WORD32 acc;
acc = ixheaacd_mult16x16in32_shl(b, c);
acc = ixheaacd_add32(a, acc);
return acc;
}
// multiplies two 16 bit numbers and accumulates their
// result in a 32 bit variable with saturation
static PLATFORM_INLINE WORD32 ixheaacd_mac16x16in32_shl_sat(WORD32 a, WORD16 b,
WORD16 c) {
WORD32 acc;
acc = ixheaacd_mult16x16in32_shl_sat(b, c);
acc = ixheaacd_add32_sat(a, acc);
return acc;
}
// multiplies two 16 bit numbers and subtracts their
// result from a 32 bit variable
static PLATFORM_INLINE WORD32 msu16x16in32(WORD32 a, WORD16 b, WORD16 c) {
WORD32 acc;
acc = ixheaacd_mult16x16in32(b, c);
acc = ixheaacd_sub32(a, acc);
return acc;
}
// multiplies two 16 bit numbers and subtracts their
// result from a 32 bit variable after removing a redundant sign bit in the
// product
static PLATFORM_INLINE WORD32 msu16x16in32_shl(WORD32 a, WORD16 b, WORD16 c) {
WORD32 acc;
acc = ixheaacd_mult16x16in32_shl(b, c);
acc = ixheaacd_sub32(a, acc);
return acc;
}
// multiplies two 16 bit numbers and subtracts their
// result from a 32 bit variable with saturation
// after removing a redundant sign bit in the product
static PLATFORM_INLINE WORD32 msu16x16in32_shl_sat(WORD32 a, WORD16 b,
WORD16 c) {
WORD32 acc;
acc = ixheaacd_mult16x16in32_shl_sat(b, c);
acc = ixheaacd_sub32_sat(a, acc);
return acc;
}
// adding two 32 bit numbers and taking care of overflow
// by downshifting both numbers before addition
static PLATFORM_INLINE WORD32 add32_shr(WORD32 a, WORD32 b) {
WORD32 sum;
a = ixheaacd_shr32(a, 1);
b = ixheaacd_shr32(b, 1);
sum = ixheaacd_add32(a, b);
return sum;
}
// subtracting two 32 bit numbers and taking care of
// overflow by downshifting both numbers before addition
static PLATFORM_INLINE WORD32 sub32_shr(WORD32 a, WORD32 b) {
WORD32 diff;
a = ixheaacd_shr32(a, 1);
b = ixheaacd_shr32(b, 1);
diff = ixheaacd_sub32(a, b);
return diff;
}
#endif

View file

@ -0,0 +1,439 @@
/******************************************************************************
* *
* Copyright (C) 2018 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
#ifndef IXHEAACD_BASIC_OPS40_H
#define IXHEAACD_BASIC_OPS40_H
#define hi64(a) ((WORD32)((a & (WORD64)0xFFFFFFFF00000000) >> 32))
#define lo64(a) ((UWORD32)(a))
// normalize input to 32 bits, return denormalizing info
static PLATFORM_INLINE WORD16 norm40(WORD40 *in) {
WORD16 expo;
WORD32 tempo;
if (0 == (*in)) return 31;
if (((*in) <= 0x7fffffff) && ((WORD40)(*in) >= (WORD40)0xFFFFFFFF80000000)) {
tempo = (WORD32)(*in);
expo = ixheaacd_norm32(tempo);
*in = tempo << expo;
return (expo);
}
tempo = (WORD32)((*in) >> 31);
expo = 31 - (ixheaacd_norm32(tempo));
*in = (*in) >> expo;
return (-expo);
}
// adds two numbers and right shifts by 1
static PLATFORM_INLINE WORD32 add32_shr40(WORD32 a, WORD32 b) {
WORD40 sum;
sum = (WORD40)a + (WORD40)b;
sum = sum >> 1;
return ((WORD32)sum);
}
// subtracts and right shifts by one
static PLATFORM_INLINE WORD32 sub32_shr40(WORD32 a, WORD32 b) {
WORD40 sum;
sum = (WORD40)a - (WORD40)b;
sum = sum >> 1;
return ((WORD32)sum);
}
// multiply WORD32 with WORD16 return bits 46 to 15
static PLATFORM_INLINE WORD32 ixheaacd_mult32x16in32_shl(WORD32 a, WORD16 b) {
WORD32 result;
WORD64 temp_result;
temp_result = (WORD64)a * (WORD64)b;
result = (WORD32)(temp_result >> 16);
return (result << 1);
}
// multiply WORD32 with higher 16 bits of second data and return bits 46 to 15
static PLATFORM_INLINE WORD32 mult32x16hin32_shl(WORD32 a, WORD32 b) {
WORD32 product;
WORD64 temp_product;
temp_product = (WORD64)a * (WORD64)(b >> 16);
product = (WORD32)(temp_product >> 16);
return (product << 1);
}
// multiply WORD32 with WORD16 return bits 47 to 16
static PLATFORM_INLINE WORD32 ixheaacd_mult32x16in32(WORD32 a, WORD16 b) {
WORD32 result;
WORD64 temp_result;
temp_result = (WORD64)a * (WORD64)b;
result = (WORD32)(temp_result >> 16);
return (result);
}
// multiply WORD32 with WORD16 return bits 46 to 15 with saturation
static PLATFORM_INLINE WORD32 ixheaacd_mult32x16in32_shl_sat(WORD32 a,
WORD16 b) {
WORD32 result;
if (a == (WORD32)0x80000000 && b == (WORD16)0x8000) {
result = (WORD32)0x7fffffff;
} else {
result = ixheaacd_mult32x16in32_shl(a, b);
}
return (result);
}
#if 0
//multiply WORD32 with WORD32 return bits 62 to 31
static PLATFORM_INLINE WORD32 ixheaacd_mult32_shl(WORD32 a, WORD32 b)
{
WORD64 result;
asm (
"SMULL %[result], %w[a], %w[b]\n"
"ASR %[result], %[result], #32\n"
: [result] "=r" (result)
: [a] "r" (a), [b] "r" (b)
);
return ((WORD32)(result << 1));
}
//multiply WORD32 with WORD32 return bits 63 to 32
static PLATFORM_INLINE WORD32 ixheaacd_mult32(WORD32 a, WORD32 b)
{
WORD64 result;
asm (
"SMULL %[result], %w[a], %w[b]\n"
"ASR %[result], %[result], #32\n"
: [result] "=r" (result)
: [a] "r" (a), [b] "r" (b)
);
return ((WORD32)result);
}
#else
static PLATFORM_INLINE WORD32 ixheaacd_mult32_shl(WORD32 a, WORD32 b) {
WORD32 result;
WORD64 temp_result;
temp_result = (WORD64)a * (WORD64)b;
result = (WORD32)(temp_result >> 32);
return (result << 1);
}
static PLATFORM_INLINE WORD32 ixheaacd_mult32(WORD32 a, WORD32 b) {
WORD32 result;
WORD64 temp_result;
temp_result = (WORD64)a * (WORD64)b;
result = (WORD32)(temp_result >> 32);
return (result);
}
#endif
// multiply WORD32 with WORD32 return bits 62 to 31 with saturation
static PLATFORM_INLINE WORD32 ixheaacd_mult32_shl_sat(WORD32 a, WORD32 b) {
WORD32 result;
if (a == (WORD32)0x80000000 && b == (WORD32)0x80000000) {
result = 0x7fffffff;
} else {
result = ixheaacd_mult32_shl(a, b);
}
return (result);
}
// multiply WORD32 with WORD16 add bits 47 to 16 to accumulator
static PLATFORM_INLINE WORD32 ixheaacd_mac32x16in32(WORD32 a, WORD32 b,
WORD16 c) {
WORD32 result;
result = a + ixheaacd_mult32x16in32(b, c);
return (result);
}
// multiply WORD32 with WORD16 add bits 46 to 15 to accumulator
static PLATFORM_INLINE WORD32 ixheaacd_mac32x16in32_shl(WORD32 a, WORD32 b,
WORD16 c) {
WORD32 result;
result = a + ixheaacd_mult32x16in32_shl(b, c);
return (result);
}
// multiply WORD32 with WORD16 add bits 46 to 15 to accumulator with saturation
static PLATFORM_INLINE WORD32 mac32x16in32_shl_sat(WORD32 a, WORD32 b,
WORD16 c) {
return (ixheaacd_add32_sat(a, ixheaacd_mult32x16in32_shl_sat(b, c)));
}
// multiply WORD32 with WORD32 add bits 63 to 32 to accumulator
static PLATFORM_INLINE WORD32 ixheaacd_mac32(WORD32 a, WORD32 b, WORD32 c) {
WORD32 result;
result = a + ixheaacd_mult32(b, c);
return (result);
}
// multiply WORD32 with WORD32 add bits 62 to 31 to accumulator
static PLATFORM_INLINE WORD32 mac32_shl(WORD32 a, WORD32 b, WORD32 c) {
WORD32 result;
result = a + ixheaacd_mult32_shl(b, c);
return (result);
}
// multiply WORD32 with WORD32 add bits 62 to 31 to accumulator with saturation
static PLATFORM_INLINE WORD32 mac32_shl_sat(WORD32 a, WORD32 b, WORD32 c) {
return (ixheaacd_add32_sat(a, ixheaacd_mult32_shl_sat(b, c)));
}
// multiply WORD32 with WORD16 sub bits 47 to 16 from accumulator
static PLATFORM_INLINE WORD32 msu32x16in32(WORD32 a, WORD32 b, WORD16 c) {
WORD32 result;
result = a - ixheaacd_mult32x16in32(b, c);
return (result);
}
// multiply WORD32 with WORD16 sub bits 46 to 15 from accumulator
static PLATFORM_INLINE WORD32 msu32x16in32_shl(WORD32 a, WORD32 b, WORD16 c) {
WORD32 result;
result = a - ixheaacd_mult32x16in32_shl(b, c);
return (result);
}
// multiply WORD32 with WORD16 sub bits 46 to 15 from accumulator with
// saturation
static PLATFORM_INLINE WORD32 msu32x16in32_shl_sat(WORD32 a, WORD32 b,
WORD16 c) {
return (ixheaacd_sub32_sat(a, ixheaacd_mult32x16in32_shl_sat(b, c)));
}
// multiply WORD32 with WORD32 sub bits 63 to 32 from accumulator
static PLATFORM_INLINE WORD32 msu32(WORD32 a, WORD32 b, WORD32 c) {
WORD32 result;
result = a - ixheaacd_mult32(b, c);
return (result);
}
// multiply WORD32 with WORD32 sub bits 62 to 31 from accumulator
static PLATFORM_INLINE WORD32 msu32_shl(WORD32 a, WORD32 b, WORD32 c) {
WORD32 result;
result = a - ixheaacd_mult32_shl(b, c);
return (result);
}
// multiply WORD32 with WORD32 sub bits 62 to 31 from accumulator with
// saturation
static PLATFORM_INLINE WORD32 msu32_shl_sat(WORD32 a, WORD32 b, WORD32 c) {
return (ixheaacd_sub32_sat(a, ixheaacd_mult32_shl_sat(b, c)));
}
// returns normalized 32 bit accumulated result
static PLATFORM_INLINE WORD32 mac3216_arr40(WORD32 *x, WORD16 *y,
LOOPINDEX length, WORD16 *q_val) {
LOOPINDEX i;
WORD40 sum = 0;
for (i = 0; i < length; i++) {
sum += (WORD40)(ixheaacd_mult32x16in32(x[i], y[i]));
}
*q_val = norm40(&sum);
return (WORD32)sum;
}
// returns normalized 32 bit accumulated result
static PLATFORM_INLINE WORD32 mac32_arr40(WORD32 *x, WORD32 *y,
LOOPINDEX length, WORD16 *q_val) {
LOOPINDEX i;
WORD40 sum = 0;
for (i = 0; i < length; i++) {
sum += (WORD40)(ixheaacd_mult32(x[i], y[i]));
}
*q_val = norm40(&sum);
return ((WORD32)sum);
}
// returns normalized 32 bit accumulated result
static PLATFORM_INLINE WORD32 mac16_arr40(WORD16 *x, WORD16 *y,
LOOPINDEX length, WORD16 *q_val) {
LOOPINDEX i;
WORD40 sum = 0;
for (i = 0; i < length; i++) {
sum += (WORD40)((WORD32)x[i] * (WORD32)y[i]);
}
*q_val = norm40(&sum);
return ((WORD32)sum);
}
// returns normalized 32 bit accumulated result
static PLATFORM_INLINE WORD32 add32_arr40(WORD32 *in_arr, LOOPINDEX length,
WORD16 *q_val) {
LOOPINDEX i;
WORD40 sum = 0;
for (i = 0; i < length; i++) {
sum += (WORD40)in_arr[i];
}
*q_val = norm40(&sum);
return ((WORD32)sum);
}
#if 0
//multiply WORD32 with WORD32 return WORD64
static PLATFORM_INLINE WORD64 ixheaacd_mult32x32in64(WORD32 a, WORD32 b)
{
WORD64 result;
asm (
"SMULL %[result], %w[a], %w[b]\n"
: [result] "=r" (result)
: [a] "r" (a), [b] "r" (b)
);
return (result);
}
#else
static PLATFORM_INLINE WORD64 ixheaacd_mult32x32in64(WORD32 a, WORD32 b) {
WORD64 result;
result = (WORD64)a * (WORD64)b;
return (result);
}
#endif
// multiply WORD32 with WORD32 and accumulate the 64 bit result
static PLATFORM_INLINE WORD64 ixheaacd_mac32x32in64(WORD64 sum, WORD32 a,
WORD32 b) {
sum += (WORD64)a * (WORD64)b;
return (sum);
}
static PLATFORM_INLINE WORD64 ixheaacd_mac32x32in64_7(WORD64 sum,
const WORD32 *a,
const WORD16 *b) {
sum = (WORD64)a[0] * (WORD64)b[0];
sum += (WORD64)a[1] * (WORD64)b[1];
sum += (WORD64)a[2] * (WORD64)b[2];
sum += (WORD64)a[3] * (WORD64)b[3];
sum += (WORD64)a[4] * (WORD64)b[4];
sum += (WORD64)a[5] * (WORD64)b[5];
sum += (WORD64)a[6] * (WORD64)b[6];
return (sum);
}
static PLATFORM_INLINE WORD64 ixheaacd_mac32x32in64_n(WORD64 sum,
const WORD32 *a,
const WORD16 *b,
WORD32 n) {
WORD32 k;
sum += (WORD64)a[0] * (WORD64)b[0];
for (k = 1; k < n; k++) sum += (WORD64)a[k] * (WORD64)b[k];
return (sum);
}
static PLATFORM_INLINE WORD64 ixheaacd_mult64(WORD32 a, WORD32 b) {
WORD64 result;
result = (WORD64)a * (WORD64)b;
return (result);
}
static PLATFORM_INLINE WORD64 ixheaacd_add64(WORD64 a, WORD64 b) {
WORD64 result;
result = a + b;
return (result);
}
static PLATFORM_INLINE WORD64 ixheaacd_sub64(WORD64 a, WORD64 b) {
WORD64 diff;
diff = (WORD64)a - (WORD64)b;
return diff;
}
static PLATFORM_INLINE WORD64 ixheaacd_sub64_sat(WORD64 a, WORD64 b) {
WORD64 diff;
diff = ixheaacd_sub64(a, b);
if ((((WORD64)a ^ (WORD64)b) & (WORD64)MIN_64) != 0) {
if (((WORD64)diff ^ (WORD64)a) & (WORD64)MIN_64) {
diff = (a < 0L) ? MIN_64 : MAX_64;
}
}
return (diff);
}
static PLATFORM_INLINE WORD32 ixheaacd_mul32_sh(WORD32 a, WORD32 b,
WORD8 shift) {
WORD32 result;
WORD64 temp_result;
temp_result = (WORD64)a * (WORD64)b;
result = (WORD32)(temp_result >> shift);
return (result);
}
#endif

View file

@ -0,0 +1,82 @@
///******************************************************************************
// *
// * Copyright (C) 2018 The Android Open Source Project
// *
// * Licensed under the Apache License, Version 2.0 (the "License");
// * you may not use this file except in compliance with the License.
// * You may obtain a copy of the License at:
// *
// * http://www.apache.org/licenses/LICENSE-2.0
// *
// * Unless required by applicable law or agreed to in writing, software
// * distributed under the License is distributed on an "AS IS" BASIS,
// * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// * See the License for the specific language governing permissions and
// * limitations under the License.
// *
// *****************************************************************************
// * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
//*/
.text
.global ixheaacd_calc_max_spectral_line_armv8
ixheaacd_calc_max_spectral_line_armv8:
LSR W4, W1, #3
LSL W6, W4, #3
MOV w11, #0x00000000
MOV V3.S[0], w11
MOV V3.S[1], w11
MOV V3.S[2], w11
MOV V3.S[3], w11
LOOP_1:
LD1 {V0.4S}, [X0], #16
LD1 {V1.4S}, [X0], #16
ABS V0.4S, V0.4S
ABS V1.4S, V1.4S
SUBS W4, W4, #1
ORR V3.16B, V0.16B, V3.16B
ORR V3.16B, V1.16B, V3.16B
BGT LOOP_1
SUBS W7, W1, W6
MOV W4, V3.S[0]
MOV W1, V3.S[1]
MOV W2, V3.S[2]
ORR W4, W4, W1
MOV W3, V3.S[3]
ORR W4, W4, W2
ORR W4, W4, W3
BEQ END_FUNC
LOOP_2:
LDR W2, [X0], #4
CMP W2, #0
CNEG W2, W2, LE
ORR W4, W4, W2
SUBS W7, W7, #1
BGT LOOP_2
END_FUNC:
MOV W0, W4
CMP W0, #0
CNEG W0, W0, LE
CLZ W0, W0
SUB W0, W0, #1
RET

View file

@ -0,0 +1,231 @@
.macro push_v_regs
stp q8, q9, [sp, #-32]!
stp q10, q11, [sp, #-32]!
stp q12, q13, [sp, #-32]!
stp q14, q15, [sp, #-32]!
stp X8, X9, [sp, #-16]!
stp X10, X11, [sp, #-16]!
stp X12, X13, [sp, #-16]!
stp X14, X15, [sp, #-16]!
stp X16, X17, [sp, #-16]!
stp X18, X19, [sp, #-16]!
stp X20, X21, [sp, #-16]!
stp X29, X30, [sp, #-16]!
.endm
.macro pop_v_regs
ldp X29, X30, [sp], #16
ldp X20, X21, [sp], #16
ldp X18, X19, [sp], #16
ldp X16, X17, [sp], #16
ldp X14, X15, [sp], #16
ldp X12, X13, [sp], #16
ldp X10, X11, [sp], #16
ldp X8, X9, [sp], #16
ldp q14, q15, [sp], #32
ldp q12, q13, [sp], #32
ldp q10, q11, [sp], #32
ldp q8, q9, [sp], #32
.endm
.text
.p2align 2
.global ixheaacd_cos_sin_mod_loop1
ixheaacd_cos_sin_mod_loop1:
// STMFD sp!, {x4-x12, x14}
push_v_regs
//stp x19, x20,[sp,#-16]!
//VPUSH {D8-D11}
//generating load addresses
ADD x4, x0, x1, lsl #3 //psubband1
SUB x4, x4, #4
ADD x5, x3, x1, lsl #3 //psubband1_t
SUB x5, x5, #8
ASR x6, x1, #2
LDR w19, =0
DUP V0.8h, w19
LOOP1:
//first part
ld1 {v0.h}[0] , [x2]
ADD x2, x2, #2
ld1 {v0.h}[2] , [x2]
ADD x2, x2, #2
rev64 v1.2s, v0.2s
ld1 {v2.s}[0], [x0]
ADD x0, x0, #4
ADD x7, x0, #252
ld1 {v2.s}[1], [x7]
ld1 {v3.s}[0], [x4]
ADD x7, x4, #256
ld1 {v3.s}[1], [x7]
SUB x4, x4, #4
sMULL v4.2d, v0.2s, v2.2s //qsub 2nd
sshr v4.2d, v4.2d, #16
sMULL v6.2d, v0.2s, v3.2s //add 2nd
sshr v6.2d, v6.2d, #16
sMULL v8.2d, v1.2s, v2.2s //add 1st
sshr v8.2d, v8.2d, #16
sMULL v10.2d, v1.2s, v3.2s //qsub 1st
sshr v10.2d, v10.2d, #16
add v0.4s, v8.4s , v6.4s
SQSUB v2.4s, v10.4s , v4.4s
//shrn v0.2s, v0.2d,#32
//shrn v2.2s, v2.2d,#32
mov v3.16b, v0.16b
mov v1.16b, v2.16b
ST2 {v0.s, v1.s}[0], [x3]
ADD x3, x3, #8
ADD x7, x3, #248
ST2 {v2.s, v3.s}[2], [x7]
LDR w19, =0
DUP V0.8h, w19
//second part
ld1 {v0.h}[0] , [x2]
ADD x2, x2, #2
ld1 {v0.h}[2] , [x2]
ADD x2, x2, #2
rev64 v1.2s, v0.2s
ld1 {v2.s}[0], [x0]
ADD x0, x0, #4
ADD x7, x0, #252
ld1 {v2.s}[1], [x7]
ld1 {v3.s}[0], [x4]
ADD x7, x4, #256
ld1 {v3.s}[1], [x7]
SUB x4, x4, #4
sMULL v4.2d, v0.2s, v2.2s //qsub 2nd
sshr v4.2d, v4.2d, #16
sMULL v6.2d, v0.2s, v3.2s //add 2nd
sshr v6.2d, v6.2d, #16
sMULL v8.2d, v1.2s, v2.2s //add 1st
sshr v8.2d, v8.2d, #16
sMULL v10.2d, v1.2s, v3.2s //qsub 1st
sshr v10.2d, v10.2d, #16
ADD v0.4s, v10.4s , v4.4s
SQSUB v2.4s, v8.4s , v6.4s
//shrn v0.2s, v0.2d,#32
//shrn v2.2s, v2.2d,#32
mov v3.16b, v0.16b
mov v1.16b, v2.16b
ST2 {v0.s, v1.s}[0], [x5]
ADD x7, x5, #256
ST2 {v2.s, v3.s}[2], [x7]
SUB x5, x5, #8
LDR w19, =0
DUP V0.8h, w19
//Third part
ld1 {v0.h}[0] , [x2]
ADD x2, x2, #2
ld1 {v0.h}[2] , [x2]
ADD x2, x2, #2
rev64 v1.2s, v0.2s
ld1 {v2.s}[0], [x0], #4
ADD x7, x0, #252
ld1 {v2.s}[1], [x7]
ld1 {v3.s}[0], [x4]
ADD x7, x4, #256
ld1 {v3.s}[1], [x7]
SUB x4, x4, #4
sMULL v4.2d, v0.2s, v2.2s //qsub 2nd
sshr v4.2d, v4.2d, #16
sMULL v6.2d, v0.2s, v3.2s //add 2nd
sshr v6.2d, v6.2d, #16
sMULL v8.2d, v1.2s, v2.2s //add 1st
sshr v8.2d, v8.2d, #16
sMULL v10.2d, v1.2s, v3.2s //qsub 1st
sshr v10.2d, v10.2d, #16
add v0.4s, v8.4s , v6.4s
SQSUB v2.4s, v10.4s , v4.4s
//shrn v0.2s, v0.2d,#32
//shrn v2.2s, v2.2d,#32
mov v3.16b, v0.16b
mov v1.16b, v2.16b
ST2 {v0.s, v1.s}[0], [x3]
ADD x3, x3, #8
ADD x7, x3, #248
ST2 {v2.s, v3.s}[2], [x7]
LDR w19, =0
DUP V0.8h, w19
//Fourth part
ld1 {v0.h}[0] , [x2]
ADD x2, x2, #2
ld1 {v0.h}[2] , [x2]
ADD x2, x2, #2
rev64 v1.2s, v0.2s
ld1 {v2.s}[0], [x0]
ADD x0, x0, #4
ADD x7, x0, #252
ld1 {v2.s}[1], [x7]
ld1 {v3.s}[0], [x4]
ADD x7, x4, #256
ld1 {v3.s}[1], [x7]
SUB x4, x4, #4
sMULL v4.2d, v0.2s, v2.2s //qsub 2nd
sshr v4.2d, v4.2d, #16
sMULL v6.2d, v0.2s, v3.2s //add 2nd
sshr v6.2d, v6.2d, #16
sMULL v8.2d, v1.2s, v2.2s //add 1st
sshr v8.2d, v8.2d, #16
sMULL v10.2d, v1.2s, v3.2s //qsub 1st
sshr v10.2d, v10.2d, #16
ADD v0.4s, v10.4s , v4.4s
SQSUB v2.4s, v8.4s , v6.4s
//shrn v0.2s, v0.2d,#32
//shrn v2.2s, v2.2d,#32
mov v3.16b, v0.16b
mov v1.16b, v2.16b
ST2 {v0.s, v1.s}[0], [x5]
ADD x7, x5, #256
SUBS x6, x6, #1
ST2 {v2.s, v3.s}[2], [x7]
SUB x5, x5, #8
LDR w19, =0
DUP V0.8h, w19
BGT LOOP1
//VPOP {D8-D11}
// LDMFD sp!, {x4-x12, x15}
//ldp x19, x20,[sp],#16
pop_v_regs
ret

View file

@ -0,0 +1,213 @@
.macro push_v_regs
stp q8, q9, [sp, #-32]!
stp q10, q11, [sp, #-32]!
stp q12, q13, [sp, #-32]!
stp q14, q15, [sp, #-32]!
stp X8, X9, [sp, #-16]!
stp X10, X11, [sp, #-16]!
stp X12, X13, [sp, #-16]!
stp X14, X15, [sp, #-16]!
stp X16, X17, [sp, #-16]!
stp X18, X19, [sp, #-16]!
stp X20, X21, [sp, #-16]!
stp X29, X30, [sp, #-16]!
.endm
.macro pop_v_regs
ldp X29, X30, [sp], #16
ldp X20, X21, [sp], #16
ldp X18, X19, [sp], #16
ldp X16, X17, [sp], #16
ldp X14, X15, [sp], #16
ldp X12, X13, [sp], #16
ldp X10, X11, [sp], #16
ldp X8, X9, [sp], #16
ldp q14, q15, [sp], #32
ldp q12, q13, [sp], #32
ldp q10, q11, [sp], #32
ldp q8, q9, [sp], #32
.endm
.text
.p2align 2
.global ixheaacd_cos_sin_mod_loop2
ixheaacd_cos_sin_mod_loop2:
// STMFD sp!, {x4-x12, x14}
push_v_regs
//stp x19, x20,[sp,#-16]!
//VPUSH {D8-D15}
//generating load addresses
ADD x3, x0, x2, LSL #3 //psubband1 = &subband[2 * M - 1];
SUB x3, x3, #4
ADD x10, x0, #256
ADD x11, x10, x2, LSL #3
SUB x11, x11, #4
MOV x8, #-4
LDR w19, =0
DUP V0.4s, w19
DUP V1.4s, w19
LDR w6, [x0]
sxtw x6, w6
ASR x4, x2, #1 //M_2 = ixheaacd_shx32(M, 1);
SUB x4, x4, #1
ASR x6, x6, #1 //*psubband = *psubband >> 1;
LD1 {v2.s}[0], [x3]
STR w6, [x0], #4 //psubband++;
sxtw x6, w6
LDR w7, [x0]
sxtw x7, w7
ASR x7, x7, #1
sub x20, x7, #0
neg x6, x20
STR w6, [x3], #-4
sxtw x6, w6
LD1 {v3.s}[0], [x3] // im = *psubband1;
LD2 {v0.h, v1.h}[0], [x1], #4
sxtl v0.4s, v0.4h
sxtl v1.4s, v1.4h
dup v0.2s, v0.s[0]
dup v1.2s, v1.s[0]
LD1 {v2.s}[1], [x11] //re = *psubband12;
// LDR w6, [x10]
// sxtw x6,w6
// ASR x7, x6, #1
// MOV x9, #0
// QSUB x7, x9, x7
LD1 {v4.s}[0], [x10]
SSHR v4.2s, v4.2s, #1
MOV x9, #0
DUP v6.2s, w9
SQSUB v4.2s, v6.2s, v4.2s
ST1 {v4.s}[0], [x11]
// str X7, [X11]
SUB x11, x11, #4
// sxtw x7,w7
LDR w6, [x10, #4]
sxtw x6, w6
ASR x6, x6, #1
STR w6, [x10], #4
sxtw x6, w6
LD1 {v3.s}[1], [x11]
sMULL v4.2d, v0.2s, v2.2s //qsub 2nd
sshr v4.2d, v4.2d, #16
sMULL v6.2d, v0.2s, v3.2s //add 2nd
sshr v6.2d, v6.2d, #16
sMULL v8.2d, v1.2s, v2.2s //add 1st
sshr v8.2d, v8.2d, #16
sMULL v10.2d, v1.2s, v3.2s //qsub 1st
sshr v10.2d, v10.2d, #16
add v12.2d, v8.2d , v6.2d
SQSUB v14.2d, v10.2d , v4.2d
SQSUB v16.2d, v4.2d , v10.2d
//shrn v12.2s, v12.2d,#32
//shrn v14.2s, v14.2d,#32
//shrn v16.2s, v16.2d,#32
ST1 {v12.s}[0], [x3], x8
ST1 {v14.s}[0], [x0], #4
SQNEG v12.4s, v12.4s
ST1 {v12.s}[2], [x10], #4
ST1 {v16.s}[2], [x11], x8
LOOP1:
LD1 {v2.2s}, [x0]
LD1 {v3.2s}, [x10]
LDR w5, [x3] //RE2
sxtw x5, w5
LDR w6, [x11] //RE3
sxtw x6, w6
//VTRN.32 D2, D3
TRN1 v4.2s, v2.2s, v3.2s
TRN2 v3.2s, v2.2s, v3.2s
MOV v2.8b, v4.8b
sMULL v4.2d, v0.2s, v2.2s //qsub 2nd
sshr v4.2d, v4.2d, #16
sMULL v6.2d, v0.2s, v3.2s //add 2nd
sshr v6.2d, v6.2d, #16
sMULL v8.2d, v1.2s, v2.2s //add 1st
sshr v8.2d, v8.2d, #16
sMULL v10.2d, v1.2s, v3.2s //qsub 1st
sshr v10.2d, v10.2d, #16
add v12.2d, v8.2d , v6.2d
SQSUB v14.2d, v4.2d , v10.2d
SQSUB v16.2d, v10.2d , v4.2d
//shrn v12.2s, v12.2d,#32
//shrn v14.2s, v14.2d,#32
//shrn v16.2s, v16.2d,#32
ST1 {v12.s}[0], [x0], #4
ST1 {v14.s}[0], [x3], x8
SQNEG v12.4s, v12.4s
ST1 {v12.s}[2], [x11], x8
ST1 {v16.s}[2], [x10], #4
LDR w19, =0
DUP V0.4s, w19
DUP V1.4s, w19
// second part
LD2 {v0.h, v1.h}[0], [x1], #4
sxtl v0.4s, v0.4h
sxtl v1.4s, v1.4h
dup v0.2s, v0.s[0]
dup v1.2s, v1.s[0]
mov v3.s[0], w5
mov v3.s[1], w6
LD1 {v2.s}[0], [x3]
LD1 {v2.s}[1], [x11]
sMULL v4.2d, v0.2s, v2.2s //qsub 2nd
sshr v4.2d, v4.2d, #16
sMULL v6.2d, v0.2s, v3.2s //add 2nd
sshr v6.2d, v6.2d, #16
sMULL v8.2d, v1.2s, v2.2s //add 1st
sshr v8.2d, v8.2d, #16
sMULL v10.2d, v1.2s, v3.2s //qsub 1st
sshr v10.2d, v10.2d, #16
add v12.2d, v4.2d , v10.2d
SQSUB v14.2d, v8.2d , v6.2d
SQSUB v16.2d, v6.2d , v8.2d
//shrn v12.2s, v12.2d,#32
//shrn v14.2s, v14.2d,#32
//shrn v16.2s, v16.2d,#32
ST1 {v12.s}[0], [x3], x8
ST1 {v14.s}[0], [x0], #4
SQNEG v12.4s, v12.4s
subs x4, x4, #1
ST1 {v12.s}[2], [x10], #4
ST1 {v16.s}[2], [x11], x8
BGT LOOP1
//VPOP {D8-D15}
// LDMFD sp!, {x4-x12, x15}
//ldp x19, x20,[sp],#16
pop_v_regs
ret

View file

@ -0,0 +1,555 @@
.macro push_v_regs
stp X8, X9, [sp, #-16]!
stp X10, X11, [sp, #-16]!
stp X12, X13, [sp, #-16]!
stp X14, X15, [sp, #-16]!
stp X16, X17, [sp, #-16]!
stp X18, X19, [sp, #-16]!
stp X20, X21, [sp, #-16]!
stp X22, X24, [sp, #-16]!
stp X29, X30, [sp, #-16]!
.endm
.macro pop_v_regs
ldp X29, X30, [sp], #16
ldp X22, X24, [sp], #16
ldp X20, X21, [sp], #16
ldp X18, X19, [sp], #16
ldp X16, X17, [sp], #16
ldp X14, X15, [sp], #16
ldp X12, X13, [sp], #16
ldp X10, X11, [sp], #16
ldp X8, X9, [sp], #16
.endm
.text
.p2align 2
.global ixheaacd_fft32x32_ld2_armv8
ixheaacd_fft32x32_ld2_armv8:
// STMFD sp!, {x4-x12,x14}
push_v_regs
stp x19, x20, [sp, #-16]!
//DIT Radix-4 FFT First Stage
//First Butterfly
MOV x0, x2
MOV x1, x3
LDR w2, [x0] //x_0 = x[0 ]
sxtw x2, w2
LDR w3, [x0, #32] //x_2 = x[8 ]
sxtw x3, w3
LDR w4, [x0, #64] //x_4 = x[16]
sxtw x4, w4
LDR w5, [x0, #96] //x_6 = x[24]
sxtw x5, w5
ADD w6, w2, w4 //xh0_0 = x_0 + x_4
SUB w7, w2, w4 //xl0_0 = x_0 - x_4
ADD w8, w3, w5 //xh0_1 = x_2 + x_6
SUB w9, w3, w5 //xl0_1 = x_2 - x_6
LDR w2, [x0, #4] //x_1 = x[0 +1]
sxtw x2, w2
LDR w3, [x0, #36] //x_3 = x[8 +1]
sxtw x3, w3
LDR w4, [x0, #68] //x_5 = x[16+1]
sxtw x4, w4
LDR w5, [x0, #100] //x_7 = x[24+1]
sxtw x5, w5
ADD w10, w2, w4 //xh1_0 = x_1 + x_5
SUB w11, w2, w4 //xl1_0 = x_1 - x_5
ADD w12, w3, w5 //xh1_1 = x_3 + x_7
SUB w14, w3, w5 //xl1_1 = x_3 - x_7
ADD w2, w6, w8 //n00 = xh0_0 + xh0_1
ADD w3, w7, w14 //n10 = xl0_0 + xl1_1
SUB w4, w6, w8 //n20 = xh0_0 - xh0_1
SUB w5, w7, w14 //n30 = xl0_0 - xl1_1
STR w2, [x0] //x[0 ] = n00
STR w3, [x0, #32] //x[8 ] = n10
STR w4, [x0, #64] //x[16] = n20
STR w5, [x0, #96] //x[24] = n30
ADD w2, w10, w12 //n01 = xh1_0 + xh1_1
SUB w3, w11, w9 //n11 = xl1_0 - xl0_1
SUB w4, w10, w12 //n21 = xh1_0 - xh1_1
ADD w5, w11, w9 //n31 = xl1_0 + xl0_1
STR w2, [x0, #4] //x[1 ] = n01
STR w3, [x0, #36] //x[8+1 ] = n11
STR w4, [x0, #68] //x[16+1] = n21
STR w5, [x0, #100] //x[24+1] = n31
//Second Butterfly
LDR w2, [x0, #8] //x_0 = x[2 ]
sxtw x2, w2
LDR w3, [x0, #40] //x_2 = x[10]
sxtw x3, w3
LDR w4, [x0, #72] //x_4 = x[18]
sxtw x4, w4
LDR w5, [x0, #104] //x_6 = x[26]
sxtw x5, w5
ADD w6, w2, w4 //xh0_0 = x_0 + x_4
SUB w7, w2, w4 //xl0_0 = x_0 - x_4
ADD w8, w3, w5 //xh0_1 = x_2 + x_6
SUB w9, w3, w5 //xl0_1 = x_2 - x_6
LDR w2, [x0, #12] //x_1 = x[2 +1]
sxtw x2, w2
LDR w3, [x0, #44] //x_3 = x[10+1]
sxtw x3, w3
LDR w4, [x0, #76] //x_5 = x[18+1]
sxtw x4, w4
LDR w5, [x0, #108] //x_7 = x[26+1]
sxtw x5, w5
ADD w10, w2, w4 //xh1_0 = x_1 + x_5
SUB w11, w2, w4 //xl1_0 = x_1 - x_5
ADD w12, w3, w5 //xh1_1 = x_3 + x_7
SUB w14, w3, w5 //xl1_1 = x_3 - x_7
ADD w2, w6, w8 //n00 = xh0_0 + xh0_1
ADD w3, w7, w14 //n10 = xl0_0 + xl1_1
SUB w4, w6, w8 //n20 = xh0_0 - xh0_1
SUB w5, w7, w14 //n30 = xl0_0 - xl1_1
STR w2, [x0, #8] //x[2 ] = n00
STR w3, [x0, #40] //x[10] = n10
STR w4, [x0, #72] //x[18] = n20
STR w5, [x0, #104] //x[26] = n30
ADD w2, w10, w12 //n01 = xh1_0 + xh1_1
SUB w3, w11, w9 //n11 = xl1_0 - xl0_1
SUB w4, w10, w12 //n21 = xh1_0 - xh1_1
ADD w5, w11, w9 //n31 = xl1_0 + xl0_1
STR w2, [x0, #12] //x[2 +1] = n01
STR w3, [x0, #44] //x[10+1] = n11
STR w4, [x0, #76] //x[18+1] = n21
STR w5, [x0, #108] //x[26+1] = n31
//Third Butterfly
LDR w2, [x0, #16] //x_0 = x[4 ]
sxtw x2, w2
LDR w3, [x0, #48] //x_2 = x[12]
sxtw x3, w3
LDR w4, [x0, #80] //x_4 = x[20]
sxtw x4, w4
LDR w5, [x0, #112] //x_6 = x[28]
sxtw x5, w5
ADD w6, w2, w4 //xh0_0 = x_0 + x_4
SUB w7, w2, w4 //xl0_0 = x_0 - x_4
ADD w8, w3, w5 //xh0_1 = x_2 + x_6
SUB w9, w3, w5 //xl0_1 = x_2 - x_6
LDR w2, [x0, #20] //x_1 = x[4 +1]
sxtw x2, w2
LDR w3, [x0, #52] //x_3 = x[12+1]
sxtw x3, w3
LDR w4, [x0, #84] //x_5 = x[20+1]
sxtw x4, w4
LDR w5, [x0, #116] //x_7 = x[28+1]
sxtw x5, w5
ADD w10, w2, w4 //xh1_0 = x_1 + x_5
SUB w11, w2, w4 //xl1_0 = x_1 - x_5
ADD w12, w3, w5 //xh1_1 = x_3 + x_7
SUB w14, w3, w5 //xl1_1 = x_3 - x_7
ADD w2, w6, w8 //n00 = xh0_0 + xh0_1
ADD w3, w7, w14 //n10 = xl0_0 + xl1_1
SUB w4, w6, w8 //n20 = xh0_0 - xh0_1
SUB w5, w7, w14 //n30 = xl0_0 - xl1_1
STR w2, [x0, #16] //x[4 ] = n00
STR w3, [x0, #48] //x[12] = n10
STR w4, [x0, #80] //x[20] = n20
STR w5, [x0, #112] //x[28] = n30
ADD w2, w10, w12 //n01 = xh1_0 + xh1_1
SUB w3, w11, w9 //n11 = xl1_0 - xl0_1
SUB w4, w10, w12 //n21 = xh1_0 - xh1_1
ADD w5, w11, w9 //n31 = xl1_0 + xl0_1
STR w2, [x0, #20] //x[4 +1] = n01
STR w3, [x0, #52] //x[12+1] = n11
STR w4, [x0, #84] //x[20+1] = n21
STR w5, [x0, #116] //x[28+1] = n31
//Fourth Butterfly
LDR w2, [x0, #24] //x_0 = x[6 ]
sxtw x2, w2
LDR w3, [x0, #56] //x_2 = x[14]
sxtw x3, w3
LDR w4, [x0, #88] //x_4 = x[22]
sxtw x4, w4
LDR w5, [x0, #120] //x_6 = x[30]
sxtw x5, w5
ADD w6, w2, w4 //xh0_0 = x_0 + x_4
SUB w7, w2, w4 //xl0_0 = x_0 - x_4
ADD w8, w3, w5 //xh0_1 = x_2 + x_6
SUB w9, w3, w5 //xl0_1 = x_2 - x_6
LDR w2, [x0, #28] //x_1 = x[6 +1]
sxtw x2, w2
LDR w3, [x0, #60] //x_3 = x[14+1]
sxtw x3, w3
LDR w4, [x0, #92] //x_5 = x[22+1]
sxtw x4, w4
LDR w5, [x0, #124] //x_7 = x[30+1]
sxtw x5, w5
ADD w10, w2, w4 //xh1_0 = x_1 + x_5
SUB w11, w2, w4 //xl1_0 = x_1 - x_5
ADD w12, w3, w5 //xh1_1 = x_3 + x_7
SUB w14, w3, w5 //xl1_1 = x_3 - x_7
ADD w2, w6, w8 //n00 = xh0_0 + xh0_1
ADD w3, w7, w14 //n10 = xl0_0 + xl1_1
SUB w4, w6, w8 //n20 = xh0_0 - xh0_1
SUB w5, w7, w14 //n30 = xl0_0 - xl1_1
STR w2, [x0, #24] //x[6 ] = n00
STR w3, [x0, #56] //x[14] = n10
STR w4, [x0, #88] //x[22] = n20
STR w5, [x0, #120] //x[30] = n30
ADD w2, w10, w12 //n01 = xh1_0 + xh1_1
SUB w3, w11, w9 //n11 = xl1_0 - xl0_1
SUB w4, w10, w12 //n21 = xh1_0 - xh1_1
ADD w5, w11, w9 //n31 = xl1_0 + xl0_1
STR w2, [x0, #28] //x[6 +1] = n01
STR w3, [x0, #60] //x[14+1] = n11
STR w4, [x0, #92] //x[22+1] = n21
STR w5, [x0, #124] //x[30+1] = n31
//DIT Radix-4 FFT Second Stage
//First Butterfly
LDR w2, [x0] //inp_0qr = x[0]
sxtw x2, w2
LDR w3, [x0, #8] //inp_1qr = x[2]
sxtw x3, w3
LDR w4, [x0, #16] //inp_2qr = x[4]
sxtw x4, w4
LDR w5, [x0, #24] //inp_3qr = x[6]
sxtw x5, w5
ADD w6, w2, w4 //sum_0qr = mul_0qr + mul_2qr
SUB w7, w2, w4 //sum_1qr = mul_0qr - mul_2qr
ADD w8, w3, w5 //sum_2qr = mul_1qr + mul_3qr
SUB w9, w3, w5 //sum_3qr = mul_1qr - mul_3qr
LDR w2, [x0, #4] //inp_0qi = x[1]
sxtw x2, w2
LDR w3, [x0, #12] //inp_1qi = x[3]
sxtw x3, w3
LDR w4, [x0, #20] //inp_2qi = x[5]
sxtw x4, w4
LDR w5, [x0, #28] //inp_3qi = x[7]
sxtw x5, w5
ADD w10, w2, w4 //sum_0qi = mul_0qi + mul_2qi
SUB w11, w2, w4 //sum_1qi = mul_0qi - mul_2qi
ADD w12, w3, w5 //sum_2qi = mul_1qi + mul_3qi
SUB w14, w3, w5 //sum_3qi = mul_1qi - mul_3qi
ADD w2, w6, w8 //sum_0qr + sum_2qr
ADD w3, w7, w14 //sum_1qr + sum_3qi
SUB w4, w6, w8 //sum_0qr - sum_2qr
SUB w5, w7, w14 //sum_1qr - sum_3qi
STR w2, [x1] //y[0 ] = sum_0qr + sum_2qr
STR w3, [x1, #32] //y[8 ] = sum_1qr + sum_3qi
STR w4, [x1, #64] //y[16] = sum_0qr - sum_2qr
STR w5, [x1, #96] //y[24] = sum_1qr - sum_3qi
ADD w2, w10, w12 //sum_0qi + sum_2qi
SUB w3, w11, w9 //sum_1qi - sum_3qr
SUB w4, w10, w12 //sum_0qi - sum_2qi
ADD w5, w11, w9 //sum_1qi + sum_3qr
STR w2, [x1, #4] //y[0 +1] = sum_0qi + sum_2qi
STR w3, [x1, #36] //y[8 +1] = sum_1qi - sum_3qr
STR w4, [x1, #68] //y[16+1] = sum_0qi - sum_2qi
STR w5, [x1, #100] //y[24+1] = sum_1qi + sum_3qr
//Load twiddle factors
// LDR w11, =2310960706 //0x89BE7642
LDR w11, =0x7642
sxth w11, w11
LDR w21, =0x89BE
sxth w21, w21
// LDR w12, =3473158396 //0xCF0430FC
LDR w12, =0x30FC
sxth w12, w12
LDR w22, =0xCF04
sxth w22, w22
// LDR w14, =2776455811 //0xA57D5A83
LDR w14, =0x5A83
sxth w14, w14
LDR w24, =0xA57D
sxth w24, w24
//Second Butterfly
LDR w2, [x0, #32] //mul_0qr = inp_0qr = x[8]
sxtw x2, w2
LDR w3, [x0, #36] //mul_0qi = inp_1qr = x[9]
sxtw x3, w3
LDR w5, [x0, #40] //inp_1qr = x[10]
sxtw x5, w5
LDR w6, [x0, #44] //inp_1qi = x[11]
sxtw x6, w6
SMULL x4, w5, w11
ASR x4, x4, #16
// SMULWB x4, x5, x11 //mul_1qr = mpy_16_32_ns( 0x7642 , inp_1qr)
SMULL x20, w6, w12
ASR x20, x20, #16
ADD w4, w4, w20
// SMLAWB x4, x6, x12, x4 //mul_1qr -= mpy_16_32_ns(-0x30FC , inp_1qi)
SMULL x5, w5, w22
ASR x5, x5, #16
// SMULWT x5, x5, x12 //mul_1qi = mpy_16_32_ns(-0x30FC , inp_1qr)
LDR w7, [x0, #48] //inp_2qr = x[12]
sxtw x7, w7
LDR w8, [x0, #52] //inp_2qi = x[13]
sxtw x8, w8
//Moved for delay slot
SMULL x20, w6, w11
ASR x20, x20, #16
ADD w5, w5, w20
// SMLAWB x5, x6, x11, x5 //mul_1qi += mpy_16_32_ns( 0x7642 , inp_1qi)
ADD w6, w7, w8 //(inp_2qr + inp_2qi)
SMULL x6, w6, w14
ASR x6, x6, #16
// SMULWB x6, x6, x14 //mul_2qr = mpy_16_32_ns(0x5A83 , (inp_2qr + inp_2qi))
SUB w7, w8, w7 //(-inp_2qr + inp_2qi)
SMULL x7, w7, w14
ASR x7, x7, #16
// SMULWB x7, x7, x14 //mul_2qi = mpy_16_32_ns(0x5A83 , (-inp_2qr + inp_2qi))
LDR x9 , [x0, #56] //inp_3qr = x[14]
sxtw x9, w9
LDR w10, [x0, #60] //inp_3qi = x[15]
sxtw x10, w10
SMULL x8, w9, w12
ASR x8, x8, #16
// SMULWB x8, x9 , x12 //mul_3qr = mpy_16_32_ns( 0x30FC , inp_3qr)
SMULL x20, w10, w11
ASR x20, x20, #16
ADD w8, w8, w20
// SMLAWB x8, x10, x11, x8 //mul_3qr -= mpy_16_32_ns(-0x7642 , inp_3qi)//
SMULL x9, w9 , w21
ASR x9, x9, #16
// SMULWT x9, x9 , x11 //mul_3qi = mpy_16_32_ns(-0x7642 , inp_3qr)
SMULL x20, w10, w12
ASR x20, x20, #16
ADD w9, w9, w20
// SMLAWB x9, x10, x12, x9 //mul_3qi += mpy_16_32_ns( 0x30FC , inp_3qi)
ADD w10, w2, w6, lsl #1 //sum_0qr = mul_0qr + (mul_2qr << 1)
SUB w2 , w2, w6, lsl #1 //sum_1qr = mul_0qr - (mul_2qr << 1)
ADD w6 , w4, w8 //sum_2qr = mul_1qr + mul_3qr
SUB w4 , w4, w8 //sum_3qr = mul_1qr - mul_3qr
ADD w8 , w3, w7, lsl #1 //sum_0qi = mul_0qi + (mul_2qi << 1)
SUB w3 , w3, w7, lsl #1 //sum_1qi = mul_0qi - (mul_2qi << 1)
ADD w7 , w5, w9 //sum_2qi = mul_1qi + mul_3qi
SUB w5 , w5, w9 //sum_3qi = mul_1qi - mul_3qi
ADD w9 , w10, w6, lsl #1 //sum_0qr + (sum_2qr << 1)
SUB w10, w10, w6, lsl #1 //sum_0qr - (sum_2qr << 1)
ADD w6 , w2 , w5, lsl #1 //sum_1qr + (sum_3qi << 1)
SUB w2 , w2 , w5, lsl #1 //sum_1qr - (sum_3qi << 1)
STR w9 , [x1, #8] //y[2 ] = sum_0qr + (sum_2qr << 1)
STR w10, [x1, #72] //y[18] = sum_0qr - (sum_2qr << 1)
STR w6 , [x1, #40] //y[10] = sum_1qr + (sum_3qi << 1)
STR w2 , [x1, #104] //y[26] = sum_1qr - (sum_3qi << 1)
ADD w5 , w8 , w7, lsl #1 //sum_0qi + (sum_2qi << 1)
SUB w8 , w8 , w7, lsl #1 //sum_0qi - (sum_2qi << 1)
SUB w7 , w3 , w4, lsl #1 //sum_1qi - (sum_3qr << 1)
ADD w3 , w3 , w4, lsl #1 //sum_1qi + (sum_3qr << 1)
STR w5 , [x1, #12] //y[2 +1] = sum_0qi + (sum_2qi << 1)
STR w8 , [x1, #76] //y[18+1] = sum_0qi - (sum_2qi << 1)
STR w7 , [x1, #44] //y[10+1] = sum_1qi - (sum_3qr << 1)
STR w3 , [x1, #108] //y[26+1] = sum_1qi + (sum_3qr << 1)
//Third Butterfly
LDR w2, [x0, #64] //mul_0qr = inp_0qr = x[16]
sxtw x2, w2
LDR w5, [x0, #72] //inp_1qr = x[18]
sxtw x5, w5
LDR w6, [x0, #76] //inp_1qi = x[19]
sxtw x6, w6
//Moved for delay slot
LDR w3, [x0, #68] //mul_0qi = inp_1qr = x[17]
sxtw x3, w3
ADD w4, w5, w6 //(inp_1qr + inp_1qi)
SMULL x4, w4, w14
ASR x4, x4, #16
// SMULWB x4, x4, x14 //mul_1qr = mpy_16_32_ns(0x5A83 , (inp_1qr + inp_1qi))
SUB w5, w6, w5 //(-inp_1qr + inp_1qi)
SMULL x5, w5, w14
ASR x5, x5, #16
// SMULWB x5, x5, x14 //mul_1qi = mpy_16_32_ns(0x5A83 , (-inp_1qr + inp_1qi))
LDR w6, [x0, #84] //mul_2qr = inp_2qi = x[21]
sxtw x6, w6
LDR x9 , [x0, #88] //inp_3qr = x[22]
sxtw x9, w9
LDR w10, [x0, #92] //inp_3qi = x[23]
sxtw x10, w10
//Moved for delay slot
LDR w7, [x0, #80] //mul_2qi = inp_2qr = x[20]
sxtw x7, w7
SUB w8 , w10, w9 //(-inp_3qr + inp_3qi)
SMULL x8, w8, w14
ASR x8, x8, #16
// SMULWB x8 , x8 , x14 //mul_3qr = mpy_16_32_ns( 0x5A83 , (-inp_3qr + inp_3qi))
ADD w9 , w9 , w10 //(inp_3qr + inp_3qi)
SMULL x9, w9, w24
ASR x9, x9, #16
// SMULWT x9 , x9 , x14 //mul_3qi = mpy_16_32_ns(-0x5A83 , (inp_3qr + inp_3qi))
ADD w10, w2, w6 //sum_0qr = mul_0qr + mul_2qr
SUB w2 , w2, w6 //sum_1qr = mul_0qr - mul_2qr
ADD w6 , w4, w8 //sum_2qr = mul_1qr + mul_3qr
SUB w4 , w4, w8 //sum_3qr = mul_1qr - mul_3qr
SUB w8 , w3, w7 //sum_0qi = mul_0qi - mul_2qi
ADD w3 , w3, w7 //sum_1qi = mul_0qi + mul_2qi
ADD w7 , w5, w9 //sum_2qi = mul_1qi + mul_3qi
SUB w5 , w5, w9 //sum_3qi = mul_1qi - mul_3qi
ADD w9 , w10, w6, lsl #1 //sum_0qr + (sum_2qr << 1)
SUB w10, w10, w6, lsl #1 //sum_0qr - (sum_2qr << 1)
ADD w6 , w2 , w5, lsl #1 //sum_1qr + (sum_3qi << 1)
SUB w2 , w2 , w5, lsl #1 //sum_1qr - (sum_3qi << 1)
STR w9 , [x1, #16] //y[4 ] = sum_0qr + (sum_2qr << 1)
STR w10, [x1, #80] //y[20] = sum_0qr - (sum_2qr << 1)
STR w6 , [x1, #48] //y[12] = sum_1qr + (sum_3qi << 1)
STR w2 , [x1, #112] //y[28] = sum_1qr - (sum_3qi << 1)
ADD w5, w8, w7, lsl #1 //sum_0qi + (sum_2qi << 1)
SUB w8, w8, w7, lsl #1 //sum_0qi - (sum_2qi << 1)
SUB w7, w3, w4, lsl #1 //sum_1qi - (sum_3qr << 1)
ADD w3, w3, w4, lsl #1 //sum_1qi + (sum_3qr << 1)
STR w5 , [x1, #20] //y[4 +1] = sum_0qi + (sum_2qi << 1)
STR w8 , [x1, #84] //y[20+1] = sum_0qi - (sum_2qi << 1)
STR w7 , [x1, #52] //y[12+1] = sum_1qi - (sum_3qr << 1)
STR w3 , [x1, #116] //y[28+1] = sum_1qi + (sum_3qr << 1)
//Fourth Butterfly
LDR w2, [x0, #96] //mul_0qr = inp_0qr = x[24]
sxtw x2, w2
LDR w3, [x0, #100] //mul_0qi = inp_1qr = x[25]
sxtw x3, w3
LDR w5, [x0, #104] //inp_1qr = x[26]
sxtw x5, w5
LDR w6, [x0, #108] //inp_1qi = x[27]
sxtw x6, w6
SMULL x4, w5, w12
ASR x4, x4, #16
// SMULWB x4, x5, x12 //mul_1qr = mpy_16_32_ns( 0x30FC , inp_1qr)
SMULL x20, w6, w11
ASR x20, x20, #16
ADD w4, w4, w20
// SMLAWB x4, x6, x11, x4 //mul_1qr -= mpy_16_32_ns(-0x7642 , inp_1qi)
SMULL x5, w5, w21
ASR x5, x5, #16
// SMULWT x5, x5, x11 //mul_1qi = mpy_16_32_ns(-0x7642 , inp_1qr)
LDR w7, [x0, #112] //inp_2qr = x[28]
sxtw x7, w7
LDR w8, [x0, #116] //inp_2qi = x[29]
sxtw x8, w8
//Moved for delay slot
SMULL x20, w6, w12
ASR x20, x20, #16
ADD w5, w5, w20
// SMLAWB x5, x6, x12, x5 //mul_1qi += mpy_16_32_ns( 0x30FC , inp_1qi)
SUB w6, w8, w7 //(-inp_2qr + inp_2qi)
SMULL x6, w6, w14
ASR x6, x6, #16
// SMULWB x6, x6, x14 //mul_2qr = mpy_16_32_ns( 0x5A83 , (-inp_2qr + inp_2qi))
ADD w7, w8, w7 //(inp_2qr + inp_2qi)
SMULL x7, w7, w24
ASR x7, x7, #16
// SMULWT x7, x7, x14 //mul_2qi = mpy_16_32_ns(-0x5A83 , (inp_2qr + inp_2qi))
LDR w9 , [x0, #120] //inp_3qr = x[30]
sxtw x9, w9
LDR w10, [x0, #124] //inp_3qi = x[31]
sxtw x10, w10
SMULL x8, w9, w21
ASR x8, x8, #16
// SMULWT x8, x9 , x11 //mul_3qr = mpy_16_32_ns(-0x7642 , inp_3qr)
SMULL x20, w10, w22
ASR x20, x20, #16
ADD w8, w8, w20
// SMLAWT x8, x10, x12, x8 //mul_3qr -= mpy_16_32_ns( 0x30FC , inp_3qi)//
SMULL x9, w9, w12
ASR x9, x9, #16
// SMULWB x9, x9 , x12 //mul_3qi = mpy_16_32_ns( 0x30FC , inp_3qr)
SMULL x20, w10, w21
ASR x20, x20, #16
ADD w9, w9, w20
// SMLAWT x9, x10, x11, x9 //mul_3qi += mpy_16_32_ns(-0x7642 , inp_3qi)
ADD w10, w2, w6, lsl #1 //sum_0qr = mul_0qr + (mul_2qr << 1)
SUB w2 , w2, w6, lsl #1 //sum_1qr = mul_0qr - (mul_2qr << 1)
ADD w6 , w4, w8 //sum_2qr = mul_1qr + mul_3qr
SUB w4 , w4, w8 //sum_3qr = mul_1qr - mul_3qr
ADD w8 , w3, w7, lsl #1 //sum_0qi = mul_0qi + (mul_2qi << 1)
SUB w3 , w3, w7, lsl #1 //sum_1qi = mul_0qi - (mul_2qi << 1)
ADD w7 , w5, w9 //sum_2qi = mul_1qi + mul_3qi
SUB w5 , w5, w9 //sum_3qi = mul_1qi - mul_3qi
ADD w9 , w10, w6, lsl #1 //sum_0qr + (sum_2qr << 1)
SUB w10, w10, w6, lsl #1 //sum_0qr - (sum_2qr << 1)
ADD w6 , w2 , w5, lsl #1 //sum_1qr + (sum_3qi << 1)
SUB w2 , w2 , w5, lsl #1 //sum_1qr - (sum_3qi << 1)
STR w9 , [x1, #24] //y[6 ] = sum_0qr + (sum_2qr << 1)
STR w10, [x1, #88] //y[22] = sum_0qr - (sum_2qr << 1)
STR w6 , [x1, #56] //y[14] = sum_1qr + (sum_3qi << 1)
STR w2 , [x1, #120] //y[30] = sum_1qr - (sum_3qi << 1)
ADD w5 , w8 , w7, lsl #1 //sum_0qi + (sum_2qi << 1)
SUB w8 , w8 , w7, lsl #1 //sum_0qi - (sum_2qi << 1)
SUB w7 , w3 , w4, lsl #1 //sum_1qi - (sum_3qr << 1)
ADD w3 , w3 , w4, lsl #1 //sum_1qi + (sum_3qr << 1)
STR w5 , [x1, #28] //y[6 +1] = sum_0qi + (sum_2qi << 1)
STR w8 , [x1, #92] //y[22+1] = sum_0qi - (sum_2qi << 1)
STR w7 , [x1, #60] //y[14+1] = sum_1qi - (sum_3qr << 1)
STR w3 , [x1, #124] //y[30+1] = sum_1qi + (sum_3qr << 1)
// LDMFD sp!, {x4-x12,x15}
ldp x19, x20, [sp], #16
pop_v_regs
ret

View file

@ -0,0 +1,248 @@
/******************************************************************************
* *
* Copyright (C) 2018 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
#include <stdio.h>
#include <string.h>
#include "ixheaacd_sbr_common.h"
#include <ixheaacd_type_def.h>
#include "ixheaacd_constants.h"
#include <ixheaacd_basic_ops32.h>
#include <ixheaacd_basic_ops16.h>
#include <ixheaacd_basic_ops40.h>
#include "ixheaacd_basic_ops.h"
#include <ixheaacd_basic_op.h>
#include "ixheaacd_intrinsics.h"
#include "ixheaacd_common_rom.h"
#include "ixheaacd_sbrdecsettings.h"
#include "ixheaacd_bitbuffer.h"
#include "ixheaacd_defines.h"
#include "ixheaacd_pns.h"
#include <ixheaacd_aac_rom.h>
#include "ixheaacd_aac_imdct.h"
#include "ixheaacd_pulsedata.h"
#include "ixheaacd_drc_data_struct.h"
#include "ixheaacd_lt_predict.h"
#include "ixheaacd_channelinfo.h"
#include "ixheaacd_drc_dec.h"
#include "ixheaacd_sbrdecoder.h"
#include "ixheaacd_tns.h"
#include "ixheaacd_sbr_scale.h"
#include "ixheaacd_lpp_tran.h"
#include "ixheaacd_env_extr_part.h"
#include <ixheaacd_sbr_rom.h>
#include "ixheaacd_block.h"
#include "ixheaacd_hybrid.h"
#include "ixheaacd_ps_dec.h"
#include "ixheaacd_env_extr.h"
#include "ixheaacd_basic_funcs.h"
#include "ixheaacd_env_calc.h"
#include "ixheaacd_dsp_fft32x32s.h"
#include "ixheaacd_interface.h"
WORD32 (*ixheaacd_fix_div)(WORD32, WORD32) = &ixheaacd_fix_div_dec;
VOID(*ixheaacd_covariance_matrix_calc)
(WORD32 *, ixheaacd_lpp_trans_cov_matrix *,
WORD32) = &ixheaacd_covariance_matrix_calc_dec;
VOID(*ixheaacd_covariance_matrix_calc_2)
(ixheaacd_lpp_trans_cov_matrix *, WORD32 *, WORD32,
WORD16) = &ixheaacd_covariance_matrix_calc_2_dec;
VOID(*ixheaacd_over_lap_add1)
(WORD32 *, WORD32 *, WORD16 *, const WORD16 *, WORD16, WORD16,
WORD16) = &ixheaacd_over_lap_add1_armv8;
VOID(*ixheaacd_over_lap_add2)
(WORD32 *, WORD32 *, WORD32 *, const WORD16 *, WORD16, WORD16,
WORD16) = &ixheaacd_over_lap_add2_armv8;
VOID(*ixheaacd_decorr_filter2)
(ia_ps_dec_struct *ptr_ps_dec, WORD32 *p_buf_left_real, WORD32 *p_buf_left_imag,
WORD32 *p_buf_right_real, WORD32 *p_buf_right_imag,
ia_ps_tables_struct *ps_tables_ptr,
WORD16 *transient_ratio) = &ixheaacd_decorr_filter2_dec;
VOID(*ixheaacd_decorr_filter1)
(ia_ps_dec_struct *ptr_ps_dec, ia_ps_tables_struct *ps_tables_ptr,
WORD16 *transient_ratio) = &ixheaacd_decorr_filter1_dec;
WORD32(*ixheaacd_divide16_pos)
(WORD32 op1, WORD32 op2) = &ixheaacd_divide16_pos_dec;
VOID(*ixheaacd_decorrelation)
(ia_ps_dec_struct *ptr_ps_dec, WORD32 *p_buf_left_real, WORD32 *p_buf_left_imag,
WORD32 *p_buf_right_real, WORD32 *p_buf_right_imag,
ia_ps_tables_struct *ps_tables_ptr) = &ixheaacd_decorrelation_dec;
VOID(*ixheaacd_apply_rot)
(ia_ps_dec_struct *ptr_ps_dec, WORD32 *p_qmf_left_re, WORD32 *p_qmf_left_im,
WORD32 *p_qmf_right_re, WORD32 *p_qmf_right_im,
ia_sbr_tables_struct *sbr_tables_ptr,
const WORD16 *ptr_res) = &ixheaacd_apply_rot_dec;
VOID(*ixheaacd_conv_ergtoamplitudelp)
(WORD32 bands, WORD16 noise_e, WORD16 *nrg_sine, WORD16 *nrg_gain,
WORD16 *noise_level_mant,
WORD16 *sqrt_table) = &ixheaacd_conv_ergtoamplitudelp_dec;
VOID(*ixheaacd_conv_ergtoamplitude)
(WORD32 bands, WORD16 noise_e, WORD16 *nrg_sine, WORD16 *nrg_gain,
WORD16 *noise_level_mant,
WORD16 *sqrt_table) = &ixheaacd_conv_ergtoamplitude_dec;
VOID(*ixheaacd_adjust_scale)
(WORD32 **re, WORD32 **im, WORD32 sub_band_start, WORD32 sub_band_end,
WORD32 start_pos, WORD32 next_pos, WORD32 shift,
FLAG low_pow_flag) = &ixheaacd_adjust_scale_dec;
WORD16(*ixheaacd_ixheaacd_expsubbandsamples)
(WORD32 **re, WORD32 **im, WORD32 sub_band_start, WORD32 sub_band_end,
WORD32 start_pos, WORD32 next_pos,
FLAG low_pow_flag) = &ixheaacd_expsubbandsamples_dec;
VOID(*ixheaacd_enery_calc_per_subband)
(WORD32 start_pos, WORD32 next_pos, WORD32 sub_band_start, WORD32 sub_band_end,
WORD32 frame_exp, WORD16 *nrg_est_mant, FLAG low_pow_flag,
ia_sbr_tables_struct *ptr_sbr_tables,
WORD32 *ptr_qmf_matrix) = &ixheaacd_enery_calc_per_subband_dec;
VOID(*ixheaacd_harm_idx_zerotwolp)
(WORD32 *ptr_real_buf, WORD16 *ptr_gain_buf, WORD32 scale_change,
WORD16 *ptr_sine_level_buf, const WORD32 *ptr_rand_ph,
WORD16 *noise_level_mant, WORD32 num_sub_bands, FLAG noise_absc_flag,
WORD32 harm_index) = &ixheaacd_harm_idx_zerotwolp_dec;
VOID(*ixheaacd_tns_ar_filter_fixed)
(WORD32 *spectrum, WORD32 size, WORD32 inc, WORD32 *lpc, WORD32 order,
WORD32 shift_value, WORD scale_spec) = &ixheaacd_tns_ar_filter_fixed_armv8;
VOID(*ixheaacd_tns_ar_filter)
(WORD32 *spectrum, WORD32 size, WORD32 inc, WORD16 *lpc, WORD32 order,
WORD32 shift_value, WORD scale_spec,
WORD32 *ptr_filter_state) = &ixheaacd_tns_ar_filter_dec;
VOID(*ixheaacd_tns_parcor_lpc_convert)
(WORD16 *parcor, WORD16 *lpc, WORD16 *scale,
WORD order) = &ixheaacd_tns_parcor_lpc_convert_dec;
WORD32(*ixheaacd_calc_max_spectral_line)
(WORD32 *ptr_tmp, WORD32 size) = &ixheaacd_calc_max_spectral_line_armv8;
VOID(*ixheaacd_post_twiddle)
(WORD32 out_ptr[], WORD32 spec_data[],
ia_aac_dec_imdct_tables_struct *ptr_imdct_tables,
WORD npoints) = &ixheaacd_post_twiddle_armv8;
VOID(*ixheaacd_post_twid_overlap_add)
(WORD16 pcm_out[], WORD32 spec_data[],
ia_aac_dec_imdct_tables_struct *ptr_imdct_tables, WORD npoints,
WORD32 *ptr_overlap_buf, WORD16 q_shift, const WORD16 *window,
WORD16 ch_fac) = &ixheaacd_post_twid_overlap_add_armv8;
VOID(*ixheaacd_neg_shift_spec)
(WORD32 *coef, WORD16 *out, WORD16 q_shift,
WORD16 ch_fac) = &ixheaacd_neg_shift_spec_armv8;
VOID(*ixheaacd_spec_to_overlapbuf)
(WORD32 *ptr_overlap_buf, WORD32 *ptr_spec_coeff, WORD32 q_shift,
WORD32 size) = &ixheaacd_spec_to_overlapbuf_dec;
VOID(*ixheaacd_overlap_buf_out)
(WORD16 *out_samples, WORD32 *ptr_overlap_buf, WORD32 size,
const WORD16 ch_fac) = &ixheaacd_overlap_buf_out_dec;
VOID(*ixheaacd_overlap_out_copy)
(WORD16 *out_samples, WORD32 *ptr_overlap_buf, WORD32 *ptr_overlap_buf1,
const WORD16 ch_fac) = &ixheaacd_overlap_out_copy_dec;
VOID(*ixheaacd_pretwiddle_compute)
(WORD32 *spec_data1, WORD32 *spec_data2, WORD32 *out_ptr,
ia_aac_dec_imdct_tables_struct *ptr_imdct_tables, WORD npoints4,
WORD32 neg_expo) = &ixheaacd_pretwiddle_compute_armv8;
VOID(*ixheaacd_imdct_using_fft)
(ia_aac_dec_imdct_tables_struct *ptr_imdct_tables, WORD32 npoints,
WORD32 *ptr_x, WORD32 *ptr_y) = &ixheaacd_imdct_using_fft_armv8;
VOID(*ixheaacd_complex_fft_p2)
(WORD32 *xr, WORD32 *xi, WORD32 nlength, WORD32 fft_mode,
WORD32 *preshift) = &ixheaacd_complex_fft_p2_dec;
VOID(*ixheaacd_mps_complex_fft_64)
(WORD32 *ptr_x, WORD32 *fin_re, WORD32 *fin_im,
WORD32 nlength) = &ixheaacd_mps_complex_fft_64_dec;
VOID(*ixheaacd_mps_synt_pre_twiddle)
(WORD32 *ptr_in, WORD32 *table_re, WORD32 *table_im,
WORD32 resolution) = &ixheaacd_mps_synt_pre_twiddle_dec;
VOID(*ixheaacd_mps_synt_post_twiddle)
(WORD32 *ptr_in, WORD32 *table_re, WORD32 *table_im,
WORD32 resolution) = &ixheaacd_mps_synt_post_twiddle_dec;
VOID(*ixheaacd_calc_pre_twid)
(WORD32 *ptr_x, WORD32 *r_ptr, WORD32 *i_ptr, WORD32 nlength,
const WORD32 *cos_ptr, const WORD32 *sin_ptr) = &ixheaacd_calc_pre_twid_dec;
VOID(*ixheaacd_calc_post_twid)
(WORD32 *ptr_x, WORD32 *r_ptr, WORD32 *i_ptr, WORD32 nlength,
const WORD32 *cos_ptr, const WORD32 *sin_ptr) = &ixheaacd_calc_post_twid_dec;
VOID(*ixheaacd_mps_synt_post_fft_twiddle)
(WORD32 resolution, WORD32 *fin_re, WORD32 *fin_im, WORD32 *table_re,
WORD32 *table_im, WORD32 *state) = &ixheaacd_mps_synt_post_fft_twiddle_dec;
VOID(*ixheaacd_mps_synt_out_calc)
(WORD32 resolution, WORD32 *out, WORD32 *state,
const WORD32 *filter_coeff) = &ixheaacd_mps_synt_out_calc_dec;
VOID(*ixheaacd_fft_15_ld)
(WORD32 *inp, WORD32 *op, WORD32 *fft3out,
UWORD8 *re_arr_tab_sml_240_ptr) = &ixheaacd_fft_15_ld_dec;
VOID(*ixheaacd_aac_ld_dec_rearrange)
(WORD32 *ip, WORD32 *op, WORD32 mdct_len_2,
UWORD8 *re_arr_tab) = &ixheaacd_rearrange_dec;
VOID (*ixheaacd_fft32x32_ld)
(ia_aac_dec_imdct_tables_struct *imdct_tables_ptr, WORD32 npoints,
WORD32 *ptr_x, WORD32 *ptr_y) = &ixheaacd_imdct_using_fft_armv8;
VOID (*ixheaacd_fft32x32_ld2)
(ia_aac_dec_imdct_tables_struct *imdct_tables_ptr, WORD32 npoints,
WORD32 *ptr_x, WORD32 *ptr_y) = &ixheaacd_fft32x32_ld2_armv8;
WORD16 (*ixheaacd_neg_expo_inc)(WORD16 neg_expo) = &ixheaacd_neg_expo_inc_arm;
VOID (*ixheaacd_inv_dit_fft_8pt)
(WORD32 *x, WORD32 *real, WORD32 *imag) = &ixheaacd_inv_dit_fft_8pt_armv8;
VOID (*ixheaacd_scale_factor_process)
(WORD32 *x_invquant, WORD16 *scale_fact, WORD no_band, WORD8 *width,
WORD32 *scale_tables_ptr, WORD32 total_channels, WORD32 object_type,
WORD32 aac_sf_data_resil_flag) = &ixheaacd_scale_factor_process_armv8;

View file

@ -0,0 +1,819 @@
///******************************************************************************
// *
// * Copyright (C) 2018 The Android Open Source Project
// *
// * Licensed under the Apache License, Version 2.0 (the "License");
// * you may not use this file except in compliance with the License.
// * You may obtain a copy of the License at:
// *
// * http://www.apache.org/licenses/LICENSE-2.0
// *
// * Unless required by applicable law or agreed to in writing, software
// * distributed under the License is distributed on an "AS IS" BASIS,
// * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// * See the License for the specific language governing permissions and
// * limitations under the License.
// *
// *****************************************************************************
// * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
//*/
.macro push_v_regs
stp q8, q9, [sp, #-32]!
stp q10, q11, [sp, #-32]!
stp q12, q13, [sp, #-32]!
stp q14, q15, [sp, #-32]!
stp X8, X9, [sp, #-16]!
stp X10, X11, [sp, #-16]!
stp X12, X13, [sp, #-16]!
stp X14, X15, [sp, #-16]!
stp X16, X17, [sp, #-16]!
stp X29, X30, [sp, #-16]!
.endm
.macro pop_v_regs
ldp X29, X30, [sp], #16
ldp X16, X17, [sp], #16
ldp X14, X15, [sp], #16
ldp X12, X13, [sp], #16
ldp X10, X11, [sp], #16
ldp X8, X9, [sp], #16
ldp q14, q15, [sp], #32
ldp q12, q13, [sp], #32
ldp q10, q11, [sp], #32
ldp q8, q9, [sp], #32
.endm
.macro swp reg1, reg2
MOv x16, \reg1
MOv \reg1, \reg2
MOv \reg2, x16
.endm
.text
.p2align 2
.global ixheaacd_imdct_using_fft_armv8
ixheaacd_imdct_using_fft_armv8:
push_v_regs
LDR X29, =11600
ADD X4, X0, X29
LDR X29, =11856
ADD X5, X0, X29
LDR X29, =11920
ADD X6, X0, X29
LDR X29, =11936
ADD X7, X0, X29
COND_1: CMP X1, #0x400
BNE COND_2
MOv X8, #4
B RADIX_4_FIRST_START
COND_2: CMP X1, #0x200
BNE COND_3
MOv X8, #3
MOv X4, X5
B RADIX_8_FIRST_START
COND_3: CMP X1, #0x100
BNE COND_4
MOv X8, #3
MOv X4, X5
B RADIX_4_FIRST_START
COND_4: CMP X1, #0x80
BNE COND_5
MOv X8, #2
MOv X4, X6
B RADIX_8_FIRST_START
COND_5: CMP X1, #0x40
BNE COND_6
MOv X8, #2
MOv X4, X6
B RADIX_4_FIRST_START
COND_6:
MOv X8, #1
MOv X4, X7
RADIX_8_FIRST_START:
LSR W9 , W1, #5
LSL W1, W1, #1
RADIX_8_FIRST_LOOP:
MOv X5 , X2
MOv X6 , X2
MOv X7 , X2
MOv X11 , X2
LDRB W12, [X4]
ADD X5, X5, X12, LSL #3
LD2 {v0.S, v1.S}[0], [X5], X1
ADD X5, X5, X1
LD2 {v4.S, v5.S}[0], [X5], X1
SUB X5, X5, X1, LSL #1
LD2 {v2.S, v3.S}[0], [X5], X1
ADD X5, X5, X1
LD2 {v6.S, v7.S}[0], [X5], X1
SUB X5, X5, X1, LSL #2
LDRB W12, [X4, #1]
ADD X6, X6, X12, LSL #3
LD2 {v0.S, v1.S}[1], [X6] , X1
ADD X6, X6, X1
LD2 {v4.S, v5.S}[1], [X6] , X1
SUB X6, X6, X1, LSL #1
LD2 {v2.S, v3.S}[1], [X6] , X1
ADD X6, X6, X1
LD2 {v6.S, v7.S}[1], [X6], X1
SUB X6, X6, X1, LSL #2
LDRB W12, [X4, #2]
ADD X7, X7, X12, LSL #3
LD2 {v0.S, v1.S}[2], [X7] , X1
ADD X7, X7, X1
LD2 {v4.S, v5.S}[2], [X7] , X1
SUB X7, X7, X1, LSL #1
LDRB W12, [X4, #3]
ADD X11, X11, X12, LSL #3
LD2 {v0.S, v1.S}[3], [X11] , X1
ADD X11, X11, X1
LD2 {v4.S, v5.S}[3], [X11] , X1
SUB X11, X11, X1, LSL #1
ADD v8.4S, v0.4S, v4.4S
LD2 {v2.S, v3.S}[2], [X7] , X1
ADD X7, X7, X1
SUB v9.4S, v0.4S, v4.4S
LD2 {v6.S, v7.S}[2], [X7], X1
SUB X7, X7, X1, LSL #2
ADD v0.4S, v1.4S, v5.4S
LD2 {v2.S, v3.S}[3], [X11] , X1
ADD X11, X11, X1
SUB v4.4S, v1.4S, v5.4S
LD2 {v6.S, v7.S}[3], [X11], X1
SUB X11, X11, X1, LSL #2
ADD X4, X4, #4
ADD X5, X5, X1, LSR #1
ADD X6, X6, X1, LSR #1
ADD X7, X7, X1, LSR #1
ADD X11, X11, X1, LSR #1
ADD v1.4S, v2.4S, v6.4S
LD2 {v14.S, v15.S}[0], [X5] , X1
SUB v5.4S, v2.4S, v6.4S
LD2 {v10.S, v11.S}[0], [X5] , X1
ADD v2.4S, v3.4S, v7.4S
LD2 {v12.S, v13.S}[0], [X5] , X1
SUB v6.4S, v3.4S, v7.4S
LD2 {v14.S, v15.S}[1], [X6] , X1
ADD v3.4S, v9.4S, v6.4S
LD2 {v10.S, v11.S}[1], [X6] , X1
SUB v7.4S, v9.4S, v6.4S
LD2 {v12.S, v13.S}[1], [X6] , X1
SUB v6.4S, v4.4S, v5.4S
LD2 {v14.S, v15.S}[2], [X7] , X1
ADD v9.4S, v4.4S, v5.4S
LD2 {v10.S, v11.S}[2], [X7] , X1
ADD v4.4S, v8.4S, v1.4S
LD2 {v12.S, v13.S}[2], [X7] , X1
SUB v5.4S, v8.4S, v1.4S
LD2 {v14.S, v15.S}[3], [X11] , X1
ADD v8.4S, v0.4S, v2.4S
LD2 {v10.S, v11.S}[3], [X11] , X1
SUB v0.4S, v0.4S, v2.4S
LD2 {v12.S, v13.S}[3], [X11] , X1
LD2 {v1.S, v2.S}[0], [X5], X1
ADD v17.4S, v14.4S, v12.4S
LD2 {v1.S, v2.S}[1], [X6] , X1
SUB v16.4S, v14.4S, v12.4S
LD2 {v1.S, v2.S}[2], [X7] , X1
ADD v14.4S, v15.4S, v13.4S
LD2 {v1.S, v2.S}[3], [X11] , X1
SUB v12.4S, v15.4S, v13.4S
ADD v15.4S, v10.4S, v1.4S
SUB v13.4S, v10.4S, v1.4S
ADD v10.4S, v11.4S, v2.4S
SUB v1.4S, v11.4S, v2.4S
ADD v11.4S, v17.4S, v15.4S
SUB v2.4S, v17.4S, v15.4S
ADD v17.4S, v14.4S, v10.4S
SUB v15.4S, v14.4S, v10.4S
ADD v14.4S, v16.4S, v12.4S
SUB v10.4S, v16.4S, v12.4S
ADD v16.4S, v13.4S, v1.4S
SUB v12.4S, v13.4S, v1.4S
ADD v1.4S , v14.4S, v12.4S
SUB v13.4S, v14.4S, v12.4S
SUB v12.4S, v16.4S, v10.4S
UZP1 v22.8H, v1.8H, v1.8H
UZP2 v23.8H, v1.8H, v1.8H
ADD v14.4S, v16.4S, v10.4S
UZP1 v26.8H, v13.8H, v13.8H
UZP2 v27.8H, v13.8H, v13.8H
ADD v16.4S, v4.4S, v11.4S
UZP1 v24.8H, v12.8H, v12.8H
UZP2 v25.8H, v12.8H, v12.8H
SUB v10.4S, v4.4S, v11.4S
UZP1 v28.8H, v14.8H, v14.8H
UZP2 v29.8H, v14.8H, v14.8H
ADD v4.4S, v8.4S, v17.4S
MOv W14, #0x5a82
SUB v11.4S, v8.4S, v17.4S
ADD v8.4S, v5.4S, v15.4S
SUB v17.4S, v5.4S, v15.4S
SUB v5.4S, v0.4S, v2.4S
ADD v15.4S, v0.4S, v2.4S
DUP v31.4H, W14
UMULL v19.4S, v26.4H, v31.4H
UMULL v18.4S, v28.4H, v31.4H
SSHR v19.4S, v19.4S, #15
SSHR v18.4S, v18.4S, #15
SQDMLAL v19.4S, v27.4H, v31.4H
SQDMLAL v18.4S, v29.4H, v31.4H
UMULL v13.4S, v24.4H, v31.4H
UMULL v14.4S, v22.4H, v31.4H
ADD v20.4S, v3.4S, v19.4S
SUB v21.4S, v3.4S, v19.4S
ADD v30.4S, v6.4S, v18.4S
SUB v6.4S, v6.4S, v18.4S
SSHR v13.4S, v13.4S, #15
SSHR v14.4S, v14.4S, #15
SQDMLAL v13.4S, v25.4H, v31.4H
SQDMLAL v14.4S, v23.4H, v31.4H
ADD v3.4S, v7.4S, v13.4S
SUB v19.4S, v7.4S, v13.4S
ADD v1.4S, v9.4S, v14.4S
SUB v18.4S, v9.4S, v14.4S
swp v17.D[0], v8.D[0]
swp v17.D[1], v8.D[1]
swp v4.D[0], v16.D[0]
swp v4.D[1], v16.D[1]
TRN1 v12.4S, v4.4S, v20.4S
TRN2 v22.4S, v4.4S, v20.4S
SHL v12.4S, v12.4S, #3
TRN1 v9.4S, v17.4S, v3.4S
TRN2 v2.4S, v17.4S, v3.4S
SHL v22.4S, v22.4S, #3
SHL v9.4S, v9.4S, #3
TRN1 v24.4S, v10.4S, v21.4S
TRN2 v7.4S, v10.4S, v21.4S
SHL v2.4S, v2.4S, #3
SHL v24.4S, v24.4S, #3
TRN1 v13.4S, v16.4S, v6.4S
TRN2 v23.4S, v16.4S, v6.4S
SHL v7.4S, v7.4S, #3
SHL v13.4S, v13.4S, #3
TRN1 v10.4S, v5.4S, v18.4S
TRN2 v3.4S, v5.4S, v18.4S
SHL v23.4S, v23.4S, #3
SHL v10.4S, v10.4S, #3
TRN1 v26.4S, v8.4S, v19.4S
TRN2 v4.4S, v8.4S, v19.4S
SHL v3.4S, v3.4S, #3
SHL v26.4S, v26.4S, #3
TRN1 v25.4S, v11.4S, v30.4S
TRN2 v8.4S, v11.4S, v30.4S
SHL v4.4S, v4.4S, #3
SHL v25.4S, v25.4S, #3
TRN1 v27.4S, v15.4S, v1.4S
TRN2 v5.4S, v15.4S, v1.4S
SHL v8.4S, v8.4S, #3
SHL v27.4S, v27.4S, #3
swp v9.D[0], v12.D[1]
SHL v5.4S, v5.4S, #3
swp v2.D[0], v22.D[1]
swp v24.D[1], v26.D[0]
swp v7.D[1], v4.D[0]
swp v10.D[0], v13.D[1]
swp v3.D[0], v23.D[1]
swp v27.D[0], v25.D[1]
swp v5.D[0], v8.D[1]
MOv X15, #32
ST2 {v12.4S, v13.4S}, [X3], X15
ST2 {v24.4S, v25.4S}, [X3], X15
ST2 {v22.4S, v23.4S}, [X3], X15
ST2 {v7.4S, v8.4S}, [X3], X15
ST2 {v9.4S, v10.4S}, [X3], X15
ST2 {v26.4S, v27.4S}, [X3], X15
ST2 {v2.4S, v3.4S}, [X3], X15
ST2 {v4.4S, v5.4S}, [X3], X15
SUBS X9, X9, #1
BNE RADIX_8_FIRST_LOOP
LSR X1, X1, #1
LSL X15, X1, #3
SUB X3, X3, X15
MOv X5, #8
MOv X4, #32
LSR X15, X1, #5
MOv X6, X15
B RADIX_4_FIRST_ENDS
RADIX_8_FIRST_ENDS:
RADIX_4_FIRST_START:
LSR W9, W1, #4
LSL W1, W1, #1
RADIX_4_LOOP:
MOv X5 , X2
MOv X6 , X2
MOv X7 , X2
MOv X11 , X2
LDRB W12, [X4, #0]
ADD X5, X5, X12, LSL #3
LD2 {v0.S, v1.S}[0], [X5] , X1
ADD X5, X5, X1
LD2 {v8.S, v9.S}[0], [X5] , X1
SUB X5, X5, X1, LSL #1
LD2 {v4.S, v5.S}[0], [X5] , X1
ADD X5, X5, X1
LD2 {v12.S, v13.S}[0], [X5] , X1
LDRB W12, [X4, #1]
ADD X6, X6, X12, LSL #3
LD2 {v0.S, v1.S}[1], [X6] , X1
ADD X6, X6, X1
LD2 {v8.S, v9.S}[1], [X6] , X1
SUB X6, X6, X1, LSL #1
LD2 {v4.S, v5.S}[1], [X6] , X1
ADD X6, X6, X1
LD2 {v12.S, v13.S}[1], [X6] , X1
LDRB W12, [X4, #2]
ADD X7, X7, X12, LSL #3
LD2 {v0.S, v1.S}[2], [X7] , X1
ADD X7, X7, X1
LD2 {v8.S, v9.S}[2], [X7] , X1
LDRB W12, [X4, #3]
ADD X11, X11, X12 , LSL #3
LD2 {v0.S, v1.S}[3], [X11] , X1
ADD X11, X11, X1
LD2 {v8.S, v9.S}[3], [X11] , X1
SUB X7, X7, X1, LSL #1
ADD v16.4S, v0.4S, v8.4S
LD2 {v4.S, v5.S}[2], [X7] , X1
ADD X7, X7, X1
ADD v18.4S, v1.4S, v9.4S
LD2 {v12.S, v13.S}[2], [X7] , X1
SUB X11, X11, X1, LSL #1
SUB v20.4S, v0.4S, v8.4S
LD2 {v4.S, v5.S}[3], [X11] , X1
ADD X11, X11, X1
SUB v22.4S, v1.4S, v9.4S
LD2 {v12.S, v13.S}[3], [X11] , X1
ADD X4, X4, #4
ADD v24.4S, v4.4S, v12.4S
ADD v26.4S, v5.4S, v13.4S
SUB v28.4S, v4.4S, v12.4S
SUB v30.4S, v5.4S, v13.4S
ADD v17.4S, v16.4S, v24.4S
ADD v11.4S, v18.4S, v26.4S
SUB v19.4S, v16.4S, v24.4S
SUB v15.4S, v18.4S, v26.4S
ADD v8.4S, v20.4S, v30.4S
SUB v9.4S, v22.4S, v28.4S
ADD v13.4S, v22.4S, v28.4S
SUB v12.4S, v20.4S, v30.4S
TRN1 v0.4S, v17.4S, v8.4S
TRN2 v8.4S, v17.4S, v8.4S
SHL v0.4S, v0.4S, #2
TRN1 v4.4S, v19.4S, v12.4S
TRN2 v12.4S, v19.4S, v12.4S
SHL v8.4S, v8.4S, #2
SHL v4.4S, v4.4S, #2
TRN1 v1.4S, v11.4S, v9.4S
TRN2 v9.4S, v11.4S, v9.4S
SHL v12.4S, v12.4S, #2
SHL v1.4S, v1.4S, #2
TRN1 v5.4S, v15.4S, v13.4S
TRN2 v13.4S, v15.4S, v13.4S
SHL v9.4S, v9.4S, #2
SHL v5.4S, v5.4S, #2
swp v4.D[0], v0.D[1]
SHL v13.4S, v13.4S, #2
swp v12.D[0], v8.D[1]
swp v5.D[0], v1.D[1]
swp v13.D[0], v9.D[1]
MOv X15, #32
ST2 {v0.4S, v1.4S}, [X3], X15
ST2 {v8.4S, v9.4S}, [X3], X15
ST2 {v4.4S, v5.4S}, [X3], X15
ST2 {v12.4S, v13.4S}, [X3], X15
SUBS W9, W9, #1
BNE RADIX_4_LOOP
LSR X1, X1, #1
SUB X3, X3, X1, LSL #3
MOv X5, #4
MOv X4, #64
LSR X6, X1, #4
RADIX_4_FIRST_ENDS:
MOv x30, X3
LSR X5, X5, #2
LDR X14, =8528
ADD X0, X0, X14
OUTER_LOOP_R4:
MOv X14, x30
MOv X7, X5
MOv X2, #0
MOv X9, X0
LSL X12, X5, #5
MIDDLE_LOOP_R4:
LD2 {v20.H, v21.H}[0], [X9], X2
LD2 {v22.H, v23.H}[0], [X9], X2
ADD X11, X2, X4, LSL #2
LD2 {v24.H, v25.H}[0], [X9]
ADD X10, X0, X11
LD2 {v20.H, v21.H}[1], [X10], X11
LD2 {v22.H, v23.H}[1], [X10], X11
ADD X2, X11, X4, LSL #2
LD2 {v24.H, v25.H}[1], [X10]
ADD X9, X0, X2
LD2 {v20.H, v21.H}[2], [X9], X2
LD2 {v22.H, v23.H}[2], [X9], X2
ADD X11, X2, X4, LSL #2
LD2 {v24.H, v25.H}[2], [X9]
ADD X10, X0, X11
LD2 {v20.H, v21.H}[3], [X10], X11
LD2 {v22.H, v23.H}[3], [X10], X11
ADD X2, X11, X4, LSL #2
LD2 {v24.H, v25.H}[3], [X10]
ADD X9, X0, X2
MOv X10, X6
INNER_LOOP_R4:
LD2 {v30.4S, v31.4S}, [X14], X12
SSHR v30.4S, v30.4S, #1
LD4 {v16.4H, v17.4H, v18.4H, v19.4H}, [X14], X12
SSHR v31.4S, v31.4S, #1
USHR v16.4H, v16.4H, #1
LD4 {v26.4H, v27.4H, v28.4H, v29.4H}, [X14], X12
USHR v18.4H, v18.4H, #1
SMULL v11.4S, v16.4H, v20.4H
SMLSL v11.4S, v18.4H, v21.4H
LD4 {v0.4H, v1.4H, v2.4H, v3.4H}, [X14], X12
SMULL v12.4S, v16.4H, v21.4H
SMLAL v12.4S, v18.4H, v20.4H
USHR v26.4H, v26.4H, #1
USHR v28.4H, v28.4H, #1
LSL x29, X12, #2
SUB X14, X14, X12, LSL #2
USHR v0.4H, v0.4H, #1
USHR v2.4H, v2.4H, #1
SMULL v13.4S, v26.4H, v22.4H
SMLSL v13.4S, v28.4H, v23.4H
SSHR v11.4S, v11.4S, #15
SMULL v14.4S, v26.4H, v23.4H
SMLAL v14.4S, v28.4H, v22.4H
SMULL v15.4S, v0.4H, v24.4H
SMLSL v15.4S, v2.4H, v25.4H
SMLAL v11.4S, v17.4H, v20.4H
SMLSL v11.4S, v19.4H, v21.4H
SSHR v12.4S, v12.4S, #15
SSHR v13.4S, v13.4S, #15
SSHR v14.4S, v14.4S, #15
SSHR v15.4S, v15.4S, #15
SMLAL v12.4S, v17.4H, v21.4H
SMLAL v12.4S, v19.4H, v20.4H
SMULL v5.4S, v0.4H, v25.4H
SMLAL v5.4S, v2.4H, v24.4H
SMLAL v13.4S, v27.4H, v22.4H
SMLSL v13.4S, v29.4H, v23.4H
SMLAL v14.4S, v27.4H, v23.4H
SMLAL v14.4S, v29.4H, v22.4H
SMLAL v15.4S, v1.4H, v24.4H
SMLSL v15.4S, v3.4H, v25.4H
SSHR v5.4S, v5.4S, #15
SMLAL v5.4S, v1.4H, v25.4H
SMLAL v5.4S, v3.4H, v24.4H
SUBS x17, X7, X5
BNE BYPASS_IF
ADD X14, X14, X12
LDR W3, [X14]
ADD X14, X14, X12
ASR W3, W3, #1
MOv v11.S[0], W3
LDR W3, [X14]
ADD X14, X14, X12
ASR W3, W3, #1
MOv v13.S[0], W3
LDR W3, [X14]
ASR W3, W3, #1
MOv v15.S[0], W3
SUB X14, X14, X12, LSL #1
ADD X14, X14, #4
LDR W3, [X14]
ADD X14, X14, X12
ASR W3, W3, #1
MOv v12.S[0], W3
LDR W3, [X14]
ADD X14, X14, X12
ASR W3, W3, #1
MOv v14.S[0], W3
LDR W3, [X14]
ADD X14, X14, X12
ASR W3, W3, #1
MOv v5.S[0], W3
SUB X14, X14, #4
SUB X14, X14, x29
BYPASS_IF:
ADD v6.4S, v30.4S, v13.4S
ADD v7.4S, v31.4S, v14.4S
SUB v30.4S, v30.4S, v13.4S
SUB v31.4S, v31.4S, v14.4S
ADD v8.4S, v11.4S, v15.4S
ADD v9.4S, v12.4S, v5.4S
SUB v15.4S, v11.4S, v15.4S
SUB v14.4S, v12.4S, v5.4S
ADD v10.4S, v6.4S, v8.4S
ADD v11.4S, v7.4S, v9.4S
ADD v12.4S, v30.4S, v14.4S
SUB v13.4S, v31.4S, v15.4S
SUB v6.4S, v6.4S, v8.4S
ST2 {v10.4S, v11.4S}, [X14], X12
SUB v7.4S, v7.4S, v9.4S
SUB v8.4S, v30.4S, v14.4S
ST2 {v12.4S, v13.4S}, [X14], X12
ADD v9.4S, v31.4S, v15.4S
ST2 {v6.4S, v7.4S}, [X14], X12
ST2 {v8.4S, v9.4S}, [X14], X12
SUBS X10, X10, #1
BNE INNER_LOOP_R4
SUB X14, X14, X1, LSL #3
ADD X14, X14, #32
SUBS X7, X7, #1
BNE MIDDLE_LOOP_R4
LSR X4, X4, #2
LSL X5, X5, #2
LSR X6, X6, #2
SUBS X8, X8, #1
BNE OUTER_LOOP_R4
END_LOOPS:
pop_v_regs
RET

View file

@ -0,0 +1,174 @@
//VOID ixheaacd_inv_dit_fft_8pt(WORD32 *y,
// WORD32 *real,
// WORD32 *imag)
.macro push_v_regs
stp q8, q9, [sp, #-32]!
stp q10, q11, [sp, #-32]!
stp q12, q13, [sp, #-32]!
stp q14, q15, [sp, #-32]!
.endm
.macro pop_v_regs
ldp q14, q15, [sp], #32
ldp q12, q13, [sp], #32
ldp q10, q11, [sp], #32
ldp q8, q9, [sp], #32
.endm
.text
.global ixheaacd_inv_dit_fft_8pt_armv8
ixheaacd_inv_dit_fft_8pt_armv8:
push_v_regs
LDR w3, =0x5A820000
DUP v0.2s, w3
MOV x5, #8
ADD x6, x0, #4
//LD2 {v1.2s,v2.2s},[x0],x5
//LD2 {v3.2s,v4.2s},[x0],x5
//LD2 {v5.2s,v6.2s},[x0],x5
//LD2 {v7.2s,v8.2s},[x0],x5
LD1 {v1.s}[0], [x0], x5
LD1 {v2.s}[0], [x6], x5
LD1 {v1.s}[1], [x0], x5
LD1 {v2.s}[1], [x6], x5
LD1 {v3.s}[0], [x0], x5
LD1 {v4.s}[0], [x6], x5
LD1 {v3.s}[1], [x0], x5
LD1 {v4.s}[1], [x6], x5
LD1 {v5.s}[0], [x0], x5
LD1 {v6.s}[0], [x6], x5
LD1 {v5.s}[1], [x0], x5
LD1 {v6.s}[1], [x6], x5
LD1 {v7.s}[0], [x0], x5
LD1 {v8.s}[0], [x6], x5
LD1 {v7.s}[1], [x0], x5
LD1 {v8.s}[1], [x6], x5
//v1 - y0_2
//v2 - y1_3
//v3 - y4_6
//v4 - y5_7
//v5 - y8_10
//v6 - y9_11
//v7 - y12_14
//v8 - y13_15
SQADD v9.2s, v1.2s, v5.2s //a00_v = vqadd_s32(y0_2,y8_10);
SQADD v10.2s, v2.2s, v6.2s //a20_v = vqadd_s32(y1_3,y9_11);
SQADD v11.2s, v3.2s, v7.2s //a10_v = vqadd_s32(y4_6,y12_14);
SQADD v12.2s, v4.2s, v8.2s //a30_v = vqadd_s32(y5_7,y13_15);
SQSUB v1.2s, v1.2s, v5.2s //a0_v = vqsub_s32(y0_2,y8_10);
SQSUB v5.2s, v2.2s, v6.2s //a3_v = vqsub_s32(y1_3,y9_11);
SQSUB v2.2s, v3.2s, v7.2s //a2_v = vqsub_s32(y4_6,y12_14);
SQSUB v6.2s, v4.2s, v8.2s //a1_v = vqsub_s32(y5_7,y13_15);
SQADD v3.2s, v9.2s, v11.2s //x0_8 = vqadd_s32(a00_v,a10_v);
SQADD v7.2s, v10.2s, v12.2s //x1_9 = vqadd_s32(a20_v,a30_v);
SQSUB v4.2s, v9.2s, v11.2s //x4_12 = vqsub_s32(a00_v,a10_v);
SQSUB v8.2s, v10.2s, v12.2s //x5_13 = vqsub_s32(a20_v,a30_v);
SQADD v9.2s, v1.2s, v6.2s //x6_14 = vqadd_s32(a0_v,a1_v);
SQADD v11.2s, v5.2s, v2.2s //x3_11 = vqadd_s32(a3_v,a2_v);
SQSUB v10.2s, v1.2s, v6.2s //x2_10 = vqsub_s32(a0_v,a1_v);
SQSUB v13.2s, v5.2s, v2.2s //x7_15 = vqsub_s32(a3_v,a2_v);
UZP1 v1.2s, v3.2s, v7.2s //x0_1 = vuzp1_s32(x0_8,x1_9);
UZP2 v5.2s, v3.2s, v7.2s //x8_9 = vuzp2_s32(x0_8,x1_9);
UZP1 v6.2s, v4.2s, v8.2s //x4_5 = vuzp1_s32(x4_12,x5_13);
UZP2 v7.2s, v4.2s, v8.2s //x12_13 = vuzp2_s32(x4_12,x5_13);
REV64 v7.2s, v7.2s //x13_12 = vrev64_s32(x12_13);
SQADD v3.2s, v1.2s, v5.2s //real_imag0 = vqadd_s32(x0_1,x8_9);
SQSUB v8.2s, v1.2s, v5.2s //a00_10_v = vqsub_s32(x0_1,x8_9);
SQADD v12.2s, v6.2s, v7.2s //real_imag4 = vqadd_s32(x4_5,x13_12);
SQSUB v14.2s, v6.2s, v7.2s //a0_1_v = vqsub_s32(x4_5,x13_12);
MOV w4, v12.s[1]
MOV v12.s[1], v14.s[1]
MOV v14.s[1], w4
UZP1 v6.2s, v10.2s, v11.2s //x2_3
SQSUB v1.2s, v10.2s, v11.2s //tempr = vqsub_s32(x2_10,x3_11)
SQADD v5.2s, v10.2s, v11.2s //tempi = vqadd_s32(x2_10,x3_11)
SMULL v7.2d, v1.2s, v0.2s
SMULL v10.2d, v5.2s, v0.2s
SSHR v7.2d, v7.2d, #32 //tempr_q
SSHR v10.2d, v10.2d, #32 //tempi_q
SHL v7.4s, v7.4s, #1
SHL v10.4s, v10.4s, #1
MOV v1.s[0], v7.s[2]
MOV v1.s[1], v10.s[2] //vr_i
SQSUB v7.2s, v6.2s, v1.2s //a2_3_v = vqsub_s32(x2_3,vr_i);
SQADD v4.2s, v6.2s, v1.2s //real_imag1 = vqadd_s32(x2_3,vr_i);
SQADD v5.2s, v14.2s, v7.2s //real_imag2 = vqadd_s32(a0_1_v,a2_3_v);
UZP1 v1.2s, v9.2s, v13.2s //x6_7
SQADD v6.2s, v9.2s, v13.2s //tempr = vqadd_s32(x6_14,x7_15);
SQSUB v14.2s, v9.2s, v13.2s //tempi = vqsub_s32(x6_14,x7_15);
SMULL v9.2d, v6.2s, v0.2s
SMULL v13.2d, v14.2s, v0.2s
SSHR v9.2d, v9.2d, #32
SSHR v13.2d, v13.2d, #32
SHL v9.4s, v9.4s, #1
SHL v13.4s, v13.4s, #1
MOV v0.s[0], v9.s[2]
MOV v0.s[1], v13.s[2]
SQSUB v9.2s, v1.2s, v0.2s // a20_30_v
SQADD v13.2s, v1.2s, v0.2s //real_imag5
MOV w4, v9.s[1]
MOV v9.s[1], v13.s[1]
MOV v13.s[1], w4
SQADD v6.2s, v9.2s, v8.2s //real_imag3
ST1 {v3.s}[0], [x1], #4
ST1 {v4.s}[0], [x1], #4
ST1 {v5.s}[0], [x1], #4
ST1 {v6.s}[0], [x1], #4
ST1 {v12.s}[0], [x1], #4
ST1 {v13.s}[0], [x1], #4
ST1 {v3.s}[1], [x2], #4
ST1 {v4.s}[1], [x2], #4
ST1 {v5.s}[1], [x2], #4
ST1 {v6.s}[1], [x2], #4
ST1 {v12.s}[1], [x2], #4
ST1 {v13.s}[1], [x2], #4
//ST4 {v3.s,v4.s,v5.s,v6.s}[0],[x1],x5
//ST4 {v3.s,v4.s,v5.s,v6.s}[1],[x2],x5
//ST2 {v12.s,v13.s}[0],[x1]
//ST2 {v12.s,v13.s}[1],[x2]
pop_v_regs
ret

View file

@ -0,0 +1,123 @@
///******************************************************************************
// *
// * Copyright (C) 2018 The Android Open Source Project
// *
// * Licensed under the Apache License, Version 2.0 (the "License");
// * you may not use this file except in compliance with the License.
// * You may obtain a copy of the License at:
// *
// * http://www.apache.org/licenses/LICENSE-2.0
// *
// * Unless required by applicable law or agreed to in writing, software
// * distributed under the License is distributed on an "AS IS" BASIS,
// * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// * See the License for the specific language governing permissions and
// * limitations under the License.
// *
// *****************************************************************************
// * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
//*/
.macro push_v_regs
stp q8, q9, [sp, #-32]!
stp q10, q11, [sp, #-32]!
stp q12, q13, [sp, #-32]!
stp q14, q15, [sp, #-32]!
stp X8, X9, [sp, #-16]!
stp X10, X11, [sp, #-16]!
stp X12, X13, [sp, #-16]!
stp X14, X15, [sp, #-16]!
stp X16, X17, [sp, #-16]!
stp X29, X30, [sp, #-16]!
.endm
.macro pop_v_regs
ldp X29, X30, [sp], #16
ldp X16, X17, [sp], #16
ldp X14, X15, [sp], #16
ldp X12, X13, [sp], #16
ldp X10, X11, [sp], #16
ldp X8, X9, [sp], #16
ldp q14, q15, [sp], #32
ldp q12, q13, [sp], #32
ldp q10, q11, [sp], #32
ldp q8, q9, [sp], #32
.endm
.text
.global ixheaacd_neg_shift_spec_armv8
ixheaacd_neg_shift_spec_armv8:
push_v_regs
MOV X5, #448
SUB X6, X5, #1
LSL X6, X6, #2
ADD X6, X6, X0
MOV X8, #-16
SUB X6, X6, #12
LSL X7, X3, #1
DUP V31.4S, W2
MOV W4, #0x8000
DUP V30.4S, W4
LD1 {V0.4S}, [X6], X8
SQNEG V0.4S, V0.4S
LD1 {V6.4S}, [X6], X8
SQSHL V25.4S, V0.4S, V31.4S
SQADD V24.4S, V25.4S, V30.4S
SSHR V23.4S, V24.4S, #16
REV64 V23.4S, V23.4S
SUB X5, X5, #8
UZP1 V27.8H, V23.8H, V23.8H
SQNEG V29.4S, V6.4S
LOOP_1:
ST1 {V27.H}[2], [X1], X7
SQSHL V22.4S, V29.4S, V31.4S
LD1 {V0.4S}, [X6], X8
ST1 {V27.H}[3], [X1], X7
SQADD V21.4S, V22.4S, V30.4S
ST1 {V27.H}[0], [X1], X7
SQNEG V0.4S, V0.4S
ST1 {V27.H}[1], [X1], X7
SSHR V20.4S, V21.4S, #16
REV64 V20.4S, V20.4S
SUBS X5, X5, #8
UZP1 V27.8H, V20.8H, V20.8H
SQSHL V25.4S, V0.4S, V31.4S
ST1 {V27.H}[2], [X1], X7
LD1 {V6.4S}, [X6], X8
SQADD V24.4S, V25.4S, V30.4S
ST1 {V27.H}[3], [X1], X7
SSHR V23.4S, V24.4S, #16
ST1 {V27.H}[0], [X1], X7
REV64 V23.4S, V23.4S
ST1 {V27.H}[1], [X1], X7
UZP1 V27.8H, V23.8H, V23.8H
SQNEG V29.4S, V6.4S
BGT LOOP_1
ST1 {V27.H}[2], [X1], X7
SQSHL V22.4S, V29.4S, V31.4S
ST1 {V27.H}[3], [X1], X7
ST1 {V27.H}[0], [X1], X7
SQADD V21.4S, V22.4S, V30.4S
ST1 {V27.H}[1], [X1], X7
SSHR V20.4S, V21.4S, #16
REV64 V20.4S, V20.4S
UZP1 V27.8H, V20.8H, V20.8H
ST1 {V27.H}[2], [X1], X7
ST1 {V27.H}[3], [X1], X7
ST1 {V27.H}[0], [X1], X7
ST1 {V27.H}[1], [X1], X7
pop_v_regs
RET

View file

@ -0,0 +1,333 @@
///******************************************************************************
// *
// * Copyright (C) 2018 The Android Open Source Project
// *
// * Licensed under the Apache License, Version 2.0 (the "License");
// * you may not use this file except in compliance with the License.
// * You may obtain a copy of the License at:
// *
// * http://www.apache.org/licenses/LICENSE-2.0
// *
// * Unless required by applicable law or agreed to in writing, software
// * distributed under the License is distributed on an "AS IS" BASIS,
// * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// * See the License for the specific language governing permissions and
// * limitations under the License.
// *
// *****************************************************************************
// * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
//*/
.macro push_v_regs
stp q8, q9, [sp, #-32]!
stp q10, q11, [sp, #-32]!
stp q12, q13, [sp, #-32]!
stp q14, q15, [sp, #-32]!
stp X8, X9, [sp, #-16]!
stp X10, X11, [sp, #-16]!
stp X12, X13, [sp, #-16]!
stp X14, X15, [sp, #-16]!
stp X16, X17, [sp, #-16]!
stp X29, X30, [sp, #-16]!
.endm
.macro pop_v_regs
ldp X29, X30, [sp], #16
ldp X16, X17, [sp], #16
ldp X14, X15, [sp], #16
ldp X12, X13, [sp], #16
ldp X10, X11, [sp], #16
ldp X8, X9, [sp], #16
ldp q14, q15, [sp], #32
ldp q12, q13, [sp], #32
ldp q10, q11, [sp], #32
ldp q8, q9, [sp], #32
.endm
.text
.global ixheaacd_over_lap_add1_armv8
ixheaacd_over_lap_add1_armv8:
push_v_regs
LSL X10, X5, #1
SUB X11, X10, #1
LSL X10, X11, #2
ADD X10, X0, X10
SUB X10, X10, #12
LSL X8, X11, #1
ADD X8, X8, X3
SUB X8, X8, #14
MOV X12, #-16
DUP V11.8H, W4
LD1 {V3.4S}, [X10], X12
MOV W7, #0x2000
NEG W7, W7
SQNEG V0.4S, V3.4S
DUP V10.4S, W7
UZP1 V31.8H, V0.8H, V0.8H
UZP2 V30.8H, V0.8H, V0.8H
REV64 V31.8h, V31.8h
REV64 V30.8h, V30.8h
SUB X11, X5, #1
UZP1 V7.8H, V3.8H, V3.8H
UZP2 V6.8H, V3.8H, V3.8H
REV64 V7.8H, V7.8H
REV64 V6.8H, V6.8H
MOV V16.S[0], W6
MOV V17.S[0], W11
SMULL V17.4S, V16.4H, V17.4H
MOV W11, V17.S[0]
LSL X11, X11, #1
LD2 {V2.4H, V3.4H}, [X8], X12
ADD X11, X11, X2
REV64 V2.4H, V2.4H
REV64 V3.4H, V3.4H
LSL X4, X6, #1
NEG X4, X4
LSL X9, X6, #1
MOV V16.S[0], W5
MOV V17.S[0], W6
SMULL V17.4S, V16.4H, V17.4H
MOV W6, V17.S[0]
LSL W6, W6, #1
ADD X6, X6, X2
UMULL V15.4S, V7.4H, V2.4H
LD1 {V4.4S}, [X1], #16
USHR V15.4S, V15.4S, #16
SMLAL V15.4S, V6.4H, V2.4H
SQSHL V15.4S, V15.4S, V11.4S
SSHLL V27.4S, V3.4H, #0
SMULL V28.2D, V27.2S, V4.2S
SMULL2 V29.2D, V27.4S, V4.4S
SQXTN V28.2S, V28.2D
SQXTN2 V28.4S, V29.2D
MOV V14.16B, V28.16B
SQADD V14.4S, V14.4S, V10.4S
SQSUB V13.4S, V15.4S, V14.4S
SQSHL V13.4S, V13.4S, #2
SSHR V13.4S, V13.4S, #16
UZP1 V26.8H, V13.8H, V13.8H
UMULL V12.4S, V31.4H, V3.4H
USHR V12.4S, V12.4S, #16
SMLAL V12.4S, V30.4H, V3.4H
SQSHL V12.4S, V12.4S, V11.4S
LD1 {V3.4S}, [X10], X12
SSHLL V27.4S, V2.4H, #0
SMULL V28.2D, V27.2S, V4.2S
SMULL2 V29.2D, V27.4S, V4.4S
SQXTN V28.2S, V28.2D
SQXTN2 V28.4S, V29.2D
MOV V8.16B, V28.16B
SQADD V8.4S, V8.4S, V10.4S
SQNEG V0.4S, V3.4S
UZP1 V1.8H, V0.8H, V0.8H
UZP2 V0.8H, V0.8H, V0.8H
REV64 V1.8h, V1.8h
REV64 V0.8h, V0.8h
SQSUB V9.4S, V12.4S, V8.4S
UZP1 V7.8H, V3.8H, V3.8H
UZP2 V6.8H, V3.8H, V3.8H
REV64 V7.8h, V7.8h
REV64 V6.8h, V6.8h
SQSHL V9.4S, V9.4S, #2
LD2 {V2.4H, V3.4H}, [X8], X12
SSHR V9.4S, V9.4S, #16
REV64 V2.4H, V2.4H
REV64 V3.4H, V3.4H
UZP1 V18.8H, V9.8H, V9.8H
LD1 {V4.4S}, [X1], #16
SUB W5, W5, #8
LOOP_1:
ST1 {V26.H}[0], [X11], X4
UMULL V15.4S, V7.4H, V2.4H
ST1 {V26.H}[1], [X11], X4
UMULL V12.4S, V1.4H, V3.4H
ST1 {V26.H}[2], [X11], X4
USHR V15.4S, V15.4S, #16
ST1 {V26.H}[3], [X11], X4
USHR V12.4S, V12.4S, #16
ST1 {V18.H}[0], [X6], X9
SMLAL V15.4S, V6.4H, V2.4H
ST1 {V18.H}[1], [X6], X9
SMLAL V12.4S, V0.4H, V3.4H
ST1 {V18.H}[2], [X6], X9
SQSHL V15.4S, V15.4S, V11.4S
ST1 {V18.H}[3], [X6], X9
SQSHL V12.4S, V12.4S, V11.4S
LD1 {V6.4S}, [X10], X12
SSHLL V27.4S, V3.4H, #0
SMULL V28.2D, V27.2S, V4.2S
SMULL2 V29.2D, V27.4S, V4.4S
SQXTN V28.2S, V28.2D
SQXTN2 V28.4S, V29.2D
MOV V14.16B, V28.16B
SSHLL V27.4S, V2.4H, #0
SMULL V28.2D, V27.2S, V4.2S
SMULL2 V29.2D, V27.4S, V4.4S
SQXTN V28.2S, V28.2D
SQXTN2 V28.4S, V29.2D
MOV V8.16B, V28.16B
LD2 {V2.4H, V3.4H}, [X8], X12
SQNEG V0.4S, V6.4S
LD1 {V4.4S}, [X1], #16
SQADD V14.4S, V14.4S, V10.4S
UZP1 V1.8H, V0.8H, V0.8H
UZP2 V0.8H, V0.8H, V0.8H
REV64 V1.8h, V1.8h
REV64 V0.8h, V0.8h
SQADD V8.4S, V8.4S, V10.4S
UZP1 V7.8H, V6.8H, V6.8H
UZP2 V6.8H, V6.8H, V6.8H
REV64 V7.8h, V7.8h
REV64 V6.8h, V6.8h
SQSUB V13.4S, V15.4S, V14.4S
REV64 V2.4H, V2.4H
REV64 V3.4H, V3.4H
SQSUB V9.4S, V12.4S, V8.4S
SQSHL V13.4S, V13.4S, #2
SQSHL V9.4S, V9.4S, #2
UMULL V15.4S, V7.4H, V2.4H
SSHR V13.4S, V13.4S, #16
UZP1 V26.8H, V13.8H, V13.8H
SSHR V9.4S, V9.4S, #16
ST1 {V26.H}[0], [X11], X4
UMULL V12.4S, V1.4H, V3.4H
UZP1 V18.8H, V9.8H, V9.8H
USHR V15.4S, V15.4S, #16
ST1 {V26.H}[1], [X11], X4
SMLAL V15.4S, V6.4H, V2.4H
ST1 {V26.H}[2], [X11], X4
USHR V12.4S, V12.4S, #16
ST1 {V26.H}[3], [X11], X4
SMLAL V12.4S, V0.4H, V3.4H
ST1 {V18.H}[0], [X6], X9
SQSHL V15.4S, V15.4S, V11.4S
ST1 {V18.H}[1], [X6], X9
SQSHL V12.4S, V12.4S, V11.4S
ST1 {V18.H}[2], [X6], X9
SSHLL V27.4S, V3.4H, #0
SMULL V28.2D, V27.2S, V4.2S
SMULL2 V29.2D, V27.4S, V4.4S
SQXTN V28.2S, V28.2D
SQXTN2 V28.4S, V29.2D
MOV V14.16B, V28.16B
ST1 {V18.H}[3], [X6], X9
SSHLL V27.4S, V2.4H, #0
SMULL V28.2D, V27.2S, V4.2S
SMULL2 V29.2D, V27.4S, V4.4S
SQXTN V28.2S, V28.2D
SQXTN2 V28.4S, V29.2D
MOV V8.16B, V28.16B
LD1 {V3.4S}, [X10], X12
SQADD V14.4S, V14.4S, V10.4S
SQNEG V0.4S, V3.4S
UZP1 V1.8H, V0.8H, V0.8H
UZP2 V0.8H, V0.8H, V0.8H
REV64 V1.8H, V1.8H
REV64 V0.8H, V0.8H
SQSUB V13.4S, V15.4S, V14.4S
UZP1 V7.8H, V3.8H, V3.8H
UZP2 V6.8H, V3.8H, V3.8H
REV64 V7.8H, V7.8H
REV64 V6.8H, V6.8H
SQADD V8.4S, V8.4S, V10.4S
LD2 {V2.4H, V3.4H}, [X8], X12
SQSUB V9.4S, V12.4S, V8.4S
REV64 V2.4H, V2.4H
REV64 V3.4H, V3.4H
SQSHL V13.4S, V13.4S, #2
LD1 {V4.4S}, [X1], #16
SQSHL V9.4S, V9.4S, #2
SSHR V13.4S, V13.4S, #16
SUBS X5, X5, #8
SSHR V9.4S, V9.4S, #16
UZP1 V26.8H, V13.8H, V13.8H
UZP1 V18.8H, V9.8H, V9.8H
BGT LOOP_1
ST1 {V26.H}[0], [X11], X4
UMULL V15.4S, V7.4H, V2.4H
ST1 {V26.H}[1], [X11], X4
UMULL V12.4s, V1.4H, V3.4H
ST1 {V26.H}[2], [X11], X4
USHR V15.4S, V15.4S, #16
ST1 {V26.H}[3], [X11], X4
USHR V12.4S, V12.4S, #16
ST1 {V18.H}[0], [X6], X9
SMLAL V15.4S, V6.4H, V2.4H
ST1 {V18.H}[1], [X6], X9
SMLAL V12.4S, V0.4H, V3.4H
ST1 {V18.H}[2], [X6], X9
SQSHL V15.4S, V15.4S, V11.4S
ST1 {V18.H}[3], [X6], X9
SQSHL V12.4S, V12.4S, V11.4S
SSHLL V27.4S, V3.4H, #0
SMULL V28.2D, V27.2S, V4.2S
SMULL2 V29.2D, V27.4S, V4.4S
SQXTN V28.2S, V28.2D
SQXTN2 V28.4S, V29.2D
MOV V14.16B, V28.16B
SSHLL V27.4S, V2.4H, #0
SMULL V28.2D, V27.2S, V4.2S
SMULL2 V29.2D, V27.4S, V4.4S
SQXTN V28.2S, V28.2D
SQXTN2 V28.4S, V29.2D
MOV V8.16B, V28.16B
SQADD V14.4S, V14.4S, V10.4S
SQADD V8.4S, V8.4S, V10.4S
SQSUB V13.4S, V15.4S, V14.4S
SQSUB V9.4S, V12.4S, V8.4S
SQSHL V13.4S, V13.4S, #2
SQSHL V9.4S, V9.4S, #2
SSHR V13.4S, V13.4S, #16
SSHR V9.4S, V9.4S, #16
UZP1 V26.8H, V13.8H, V13.8H
UZP1 V18.8H, V9.8H, V9.8H
ST1 {V26.H}[0], [X11], X4
ST1 {V26.H}[1], [X11], X4
ST1 {V26.H}[2], [X11], X4
ST1 {V26.H}[3], [X11], X4
ST1 {V18.H}[0], [X6], X9
ST1 {V18.H}[1], [X6], X9
ST1 {V18.H}[2], [X6], X9
ST1 {V18.H}[3], [X6], X9
pop_v_regs
RET

View file

@ -0,0 +1,305 @@
///******************************************************************************
// *
// * Copyright (C) 2018 The Android Open Source Project
// *
// * Licensed under the Apache License, Version 2.0 (the "License");
// * you may not use this file except in compliance with the License.
// * You may obtain a copy of the License at:
// *
// * http://www.apache.org/licenses/LICENSE-2.0
// *
// * Unless required by applicable law or agreed to in writing, software
// * distributed under the License is distributed on an "AS IS" BASIS,
// * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// * See the License for the specific language governing permissions and
// * limitations under the License.
// *
// *****************************************************************************
// * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
//*/
.macro push_v_regs
stp q8, q9, [sp, #-32]!
stp q10, q11, [sp, #-32]!
stp q12, q13, [sp, #-32]!
stp q14, q15, [sp, #-32]!
stp X8, X9, [sp, #-16]!
stp X10, X11, [sp, #-16]!
stp X12, X13, [sp, #-16]!
stp X14, X15, [sp, #-16]!
stp X16, X17, [sp, #-16]!
stp X29, X30, [sp, #-16]!
.endm
.macro pop_v_regs
ldp X29, X30, [sp], #16
ldp X16, X17, [sp], #16
ldp X14, X15, [sp], #16
ldp X12, X13, [sp], #16
ldp X10, X11, [sp], #16
ldp X8, X9, [sp], #16
ldp q14, q15, [sp], #32
ldp q12, q13, [sp], #32
ldp q10, q11, [sp], #32
ldp q8, q9, [sp], #32
.endm
.text
.global ixheaacd_over_lap_add2_armv8
ixheaacd_over_lap_add2_armv8:
push_v_regs
MOV X8, X5
SUB X12, X5, #1
LSL X9, X5, #2
LSL X12, X12, #2
ADD X10, X0, X9
ADD X7, X1, X12
ADD X4, X4, #1
LD2 {V0.4H, V1.4H}, [X10], #16
LSL X11, X6, #2
SUB X7, X7, #12
SUB X4, X4, #16
MOV X12, #-16
MOV X13, #1
ADD X14, X4, #1
NEG X14, X14
DUP V21.4S, W4
LD2 {V6.4H, V7.4H}, [X7], X12
LSL X4, X13, X14
REV64 V4.4H, V6.4H
DUP V20.4S, W4
REV64 V5.4H, V7.4H
MOV X4, X3
MOV X9, X2
LD2 {V2.4H, V3.4H}, [X3], #16
UMULL V23.4S, V0.4H, V2.4H
UMLSL V23.4S, V4.4H, V3.4H
LD2 {V8.4H, V9.4H}, [X10], #16
SSHR V23.4S, V23.4S, #16
LD2 {V10.4H, V11.4H}, [X3], #16
SMLAL V23.4S, V1.4H, V2.4H
SMLSL V23.4S, V5.4H, V3.4H
LD2 {V14.4H, V15.4H}, [X7], X12
REV64 V12.4H, V14.4H
REV64 V13.4H, V15.4H
SQADD V22.4S, V23.4S, V20.4S
SSHL V22.4S, V22.4S, V21.4S
MOV V24.16B, V22.16B
SUB X8, X8, #8
LOOP_1:
LD2 {V0.4H, V1.4H}, [X10], #16
UMULL V19.4S, V8.4H, V10.4H
LD2 {V2.4H, V3.4H}, [X3], #16
UMLSL V19.4S, V12.4H, V11.4H
LD2 {V6.4H, V7.4H}, [X7], X12
UMULL V23.4S, V0.4H, V2.4H
REV64 V4.4H, V6.4H
UMLSL V23.4S, V4.4H, V3.4H
REV64 V5.4H, V7.4H
SSHR V19.4S, V19.4S, #16
ST1 {V24.S}[0], [X2], X11
SMLAL V19.4S, V9.4H, V10.4H
ST1 {V24.S}[1], [X2], X11
SSHR V23.4S, V23.4S, #16
ST1 {V24.S}[2], [X2], X11
SMLAL V23.4S, V1.4H, V2.4H
ST1 {V24.S}[3], [X2], X11
SMLSL V19.4S, V13.4H, V11.4H
SMLSL V23.4S, V5.4H, V3.4H
LD2 {V8.4H, V9.4H}, [X10], #16
LD2 {V10.4H, V11.4H}, [X3], #16
LD2 {V14.4H, V15.4H}, [X7], X12
SQADD V18.4S, V19.4S, V20.4S
REV64 V12.4H, V14.4H
REV64 V13.4H, V15.4H
SQADD V22.4S, V23.4S, V20.4S
SSHL V18.4S, V18.4S, V21.4S
MOV V16.16B, V18.16B
ST1 {V16.S}[0], [X2], X11
SSHL V22.4S, V22.4S, V21.4S
MOV V24.16B, V22.16B
SUBS X8, X8, #8
ST1 {V16.S}[1], [X2], X11
ST1 {V16.S}[2], [X2], X11
ST1 {V16.S}[3], [X2], X11
BGT LOOP_1
ST1 {V24.S}[0], [X2], X11
UMULL V19.4S, V8.4H, V10.4H
UMLSL V19.4S, V12.4H, V11.4H
ST1 {V24.S}[1], [X2], X11
ST1 {V24.S}[2], [X2], X11
SSHR V19.4S, V19.4S, #16
ST1 {V24.S}[3], [X2], X11
SMLAL V19.4S, V9.4H, V10.4H
SMLSL V19.4S, V13.4H, V11.4H
MOV X12, #12
MOV V30.S[0], W5
MOV V31.S[0], W6
SMULL V29.4S, V30.4H, V31.4H
MOV W7, V29.S[0]
LSL W10, W5, #1
SQADD V18.4S, V19.4S, V20.4S
SSHL V18.4S, V18.4S, V21.4S
MOV V16.16B, V18.16B
ST1 {V16.S}[0], [X2], X11
LSL X7, X7, #2
ST1 {V16.S}[1], [X2], X11
ADD X7, X7, X9
ST1 {V16.S}[2], [X2], X11
ST1 {V16.S}[3], [X2], X11
SUB X11, X10, #1
LSL X10, X11, #2
ADD X10, X0, X10
LSL X11, X11, #1
SUB X10, X10, X12
LSL X8, X6, #2
MOV X12, #-16
ADD X11, X11, X4
LD1 {V6.4S}, [X10], X12
SUB X11, X11, #14
REV64 V0.4S, V6.4S
SQNEG V0.4S, V0.4S
UZP1 V1.8H, V0.8H, V0.8H
UZP2 V0.8H, V0.8H, V0.8H
REV64 V1.4S, V1.4S
REV64 V0.4S, V0.4S
LD2 {V2.4H, V3.4H}, [X11], X12
REV64 V2.4H, V2.4H
REV64 V3.4H, V3.4H
LD2 {V4.4H, V5.4H}, [X1], #16
UMULL V23.4S, V1.4H, V3.4H
UMLSL V23.4S, V4.4H, V2.4H
SSHR V23.4S, V23.4S, #16
SMLAL V23.4S, V0.4H, V3.4H
SMLSL V23.4S, V5.4H, V2.4H
SQADD V22.4S, V23.4S, V20.4S
SSHL V22.4S, V22.4S, V21.4S
MOV V24.16B, V22.16B
LD1 {V14.4S}, [X10], X12
UMULL V23.4S, V1.4H, V3.4H
UMLSL V23.4S, V4.4H, V2.4H
REV64 V8.4S, V14.4S
SQNEG V8.4S, V8.4S
LD2 {V10.4H, V11.4H}, [X11], X12
SSHR V23.4S, V23.4S, #16
LD2 {V12.4H, V13.4H}, [X1], #16
SMLAL V23.4S, V0.4H, V3.4H
SMLSL V23.4S, V5.4H, V2.4H
UZP1 V9.8H, V8.8H, V8.8H
UZP2 V8.8H, V8.8H, V8.8H
rev64 v9.4s, v9.4s
rev64 v8.4s, v8.4s
REV64 V10.4H, V10.4H
REV64 V11.4H, V11.4H
SQADD V22.4S, V23.4S, V20.4S
SUB X5, X5, #8
SSHL V22.4S, V22.4S, V21.4S
MOV V24.16B, V22.16B
LOOP_2:
LD1 {V6.4S}, [X10], X12
UMULL V19.4S, V9.4H, V11.4H
REV64 V0.4S, V6.4S
SQNEG V0.4S, V0.4S
UZP1 V1.8H, V0.8H, V0.8H
UZP2 V0.8H, V0.8H, V0.8H
REV64 V1.4S, V1.4S
REV64 V0.4S, V0.4S
LD2 {V2.4H, V3.4H}, [X11], X12
REV64 V2.8H, V2.8H
REV64 V3.8H, V3.8H
LD2 {V4.4H, V5.4H}, [X1], #16
UMLSL V19.4S, V12.4H, V10.4H
ST1 {V24.S}[0], [X7], X8
UMULL V23.4S, V1.4H, V3.4H
ST1 {V24.S}[1], [X7], X8
SSHR V19.4S, V19.4S, #16
ST1 {V24.S}[2], [X7], X8
UMLSL V23.4S, V4.4H, V2.4H
ST1 {V24.S}[3], [X7], X8
SMLAL V19.4S, V8.4H, V11.4H
LD1 {V14.4S}, [X10], X12
SSHR V23.4S, V23.4S, #16
SMLSL V19.4S, V13.4H, V10.4H
LD2 {V10.4H, V11.4H}, [X11], X12
SMLAL V23.4S, V0.4H, V3.4H
SMLSL V23.4S, V5.4H, V2.4H
REV64 V8.4S, V14.4S
LD2 {V12.4H, V13.4H}, [X1], #16
SQNEG V8.4S, V8.4S
REV64 V11.4H, V11.4h
REV64 V10.4H, V10.4H
SQADD V18.4S, V19.4S, V20.4S
UZP1 V9.8H, V8.8H, V8.8H
UZP2 V8.8H, V8.8H, V8.8H
rev64 v9.4s, v9.4s
rev64 v8.4s, v8.4s
SQADD V22.4S, V23.4S, V20.4S
SSHL V18.4S, V18.4S, V21.4S
SUBS X5, X5, #8
MOV V16.16B, V18.16B
ST1 {V16.S}[0], [X7], X8
SSHL V22.4S, V22.4S, V21.4S
ST1 {V16.S}[1], [X7], X8
MOV V24.16B, V22.16B
ST1 {V16.S}[2], [X7], X8
ST1 {V16.S}[3], [X7], X8
BGT LOOP_2
ST1 {V24.S}[0], [X7], X8
UMULL V19.4S, V9.4H, V11.4H
UMLSL V19.4S, V12.4H, V10.4H
ST1 {V24.S}[1], [X7], X8
ST1 {V24.S}[2], [X7], X8
SSHR V19.4S, V19.4S, #16
ST1 {V24.S}[3], [X7], X8
SMLAL V19.4S, V8.4H, V11.4H
SMLSL V19.4S, V13.4H, V10.4H
SQADD V18.4S, V19.4S, V20.4S
SSHL V18.4S, V18.4S, V21.4S
MOV V16.16B, V18.16B
ST1 {V16.S}[0], [X7], X8
ST1 {V16.S}[1], [X7], X8
ST1 {V16.S}[2], [X7], X8
ST1 {V16.S}[3], [X7], X8
pop_v_regs
RET

View file

@ -0,0 +1,713 @@
///******************************************************************************
// *
// * Copyright (C) 2018 The Android Open Source Project
// *
// * Licensed under the Apache License, Version 2.0 (the "License");
// * you may not use this file except in compliance with the License.
// * You may obtain a copy of the License at:
// *
// * http://www.apache.org/licenses/LICENSE-2.0
// *
// * Unless required by applicable law or agreed to in writing, software
// * distributed under the License is distributed on an "AS IS" BASIS,
// * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// * See the License for the specific language governing permissions and
// * limitations under the License.
// *
// *****************************************************************************
// * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
//*/
.macro push_v_regs
stp q8, q9, [sp, #-32]!
stp q10, q11, [sp, #-32]!
stp q12, q13, [sp, #-32]!
stp q14, q15, [sp, #-32]!
stp x21, x22, [sp, #-16]!
stp x23, x24, [sp, #-16]!
.endm
.macro pop_v_regs
ldp x23, x24, [sp], #16
ldp x21, x22, [sp], #16
ldp q14, q15, [sp], #32
ldp q12, q13, [sp], #32
ldp q10, q11, [sp], #32
ldp q8, q9, [sp], #32
.endm
.macro swp reg1, reg2
MOV X16, \reg1
MOV \reg1, \reg2
MOV \reg2, x16
.endm
.text
.global ixheaacd_post_twiddle_armv8
ixheaacd_post_twiddle_armv8:
push_v_regs
ARM_PROLOGUE:
CMP w3, #0x400
LDR x21, =7500
ADD x2, x2, x21
BLT NEXT
MOV w4, #50
MOV w5, #-50
MOV x6, #4
dup v10.4h, w4
B NEXT1
NEXT:
MOV w4, #0x192
MOV w5, #0xfe6e
MOV x6, #32
dup v10.4h, w4
NEXT1:
LDR w9, [x2]
LSL W22, W9, #16
AND W21, W9, #0xFFFF0000
LDR w7, [x1], #4
LDR w8, [x1], #4
ADD x2, x2, x6
SMULL X11, w8, w21
ASR X11, x11, #32
SMULL X10, w8, w22
ASR X10, x10, #32
SMULL X12, w7, w21
ASR X12, x12, #32
SMULL X23, w7, w22
ASR X23, x23, #32
ADD w8, w11, w23
SUB w10, w10, w12
MVN w8, w8
ADD w8, w8, #1
LSL w21, w5, #16
LSL w22, w4, #16
SMULL X23, w10, w21
ASR X23, x23, #32
ADD w9, w8, w23
SMULL X23, w8, w22
ASR X23, x23, #32
ADD w11, w10, w23
LSL x7, x3, #2
ADD x7, x0, x7
SUB x7, x7, #4
STR w11, [x7], #-4
STR w9, [x0], #4
LSL x5, x3, #2
ADD x5, x1, x5
SUB x5, x5, #40
SUB w3, w3, #1
ASR w3, w3, #4
SUB x7, x7, #28
MOV x8, #-32
NEON_PROLOGUE:
LD4 {v0.4h, v1.4h, v2.4h, v3.4h}, [x5], x8
LD4 {v4.4h, v5.4h, v6.4h, v7.4h}, [x1], #32
LD2 {v8.h, v9.h}[0], [x2], x6
LD2 {v8.h, v9.h}[1], [x2], x6
LD2 {v8.h, v9.h}[2], [x2], x6
LD2 {v8.h, v9.h}[3], [x2], x6
rev64 v12.4h, v8.4h
rev64 v13.4h, v9.4h
uMULL v30.4s, v2.4h, v13.4h
uMULL v28.4s, v0.4h, v13.4h
uMULL v26.4s, v2.4h, v12.4h
uMULL v24.4s, v0.4h, v12.4h
ushR v30.4s, v30.4s, #16
ushR v28.4s, v28.4s, #16
ushR v26.4s, v26.4s, #16
ushR v24.4s, v24.4s, #16
sMLAL v30.4s, v3.4h, v13.4h
sMLAL v28.4s, v1.4h, v13.4h
sMLAL v26.4s, v3.4h, v12.4h
sMLAL v24.4s, v1.4h, v12.4h
uMULL v22.4s, v6.4h, v9.4h
uMULL v20.4s, v4.4h, v9.4h
ADD v28.4s, v28.4s , v26.4s
SUB v30.4s, v30.4s , v24.4s
NEG v28.4s, v28.4s
uMULL v18.4s, v6.4h, v8.4h
uMULL v16.4s, v4.4h, v8.4h
mov v31.8b, v30.8b
mov v27.D[0], v30.D[1]
ushR v22.4s, v22.4s, #16
mov v24.8b, v28.8b
mov v25.D[0], v28.D[1]
ushR v20.4s, v20.4s, #16
UZP1 v26.4h, v31.4h, v27.4h
UZP2 v27.4h, v31.4h, v27.4h
ushR v18.4s, v18.4s, #16
mov v31.8B , v24.8B
UZP1 v24.4h, v31.4h, v25.4h
UZP2 v25.4h, v31.4h, v25.4h
ushR v16.4s, v16.4s, #16
sMLAL v22.4s, v7.4h, v9.4h
sMLAL v20.4s, v5.4h, v9.4h
sMLAL v18.4s, v7.4h, v8.4h
sMLAL v16.4s, v5.4h, v8.4h
LD2 {v8.h, v9.h}[0], [x2], x6
uMULL v0.4s, v26.4h, v10.4h
LD2 {v8.h, v9.h}[1], [x2], x6
uMULL v2.4s, v24.4h, v10.4h
LD2 {v8.h, v9.h}[2], [x2], x6
ADD v22.4s, v22.4s , v16.4s
LD2 {v8.h, v9.h}[3], [x2], x6
SUB v20.4s, v18.4s , v20.4s
rev64 v12.4h, v8.4h
rev64 v13.4h, v9.4h
NEG v22.4s, v22.4s
mov v18.8b, v22.8b
mov v19.D[0], v22.D[1]
ushR v0.4s, v0.4s, #16
mov v16.16b, v20.16b
mov v17.D[0], v20.D[1]
ushR v2.4s, v2.4s, #16
MOV v31.8b, v18.8b
UZP1 v18.4h, v31.4h, v19.4h
UZP2 v19.4h, v31.4h, v19.4h
sMLAL v0.4s, v27.4h, v10.4h
MOV v31.8b, v16.8b
UZP1 v16.4h, v31.4h, v17.4h
UZP2 v17.4h, v31.4h, v17.4h
sMLAL v2.4s, v25.4h, v10.4h
uMULL v4.4s, v18.4h, v10.4h
uMULL v6.4s, v16.4h, v10.4h
NEG v0.4s, v0.4s
ADD v14.4s, v30.4s , v2.4s
ADD v26.4s, v28.4s , v0.4s
rev64 v14.4s, v14.4s
ushR v4.4s, v4.4s, #16
swp v14.D[0], v14.D[1]
ushR v6.4s, v6.4s, #16
sMLAL v4.4s, v19.4h, v10.4h
LD4 {v0.4h, v1.4h, v2.4h, v3.4h}, [x5], x8
sMLAL v6.4s, v17.4h, v10.4h
SUB x3, x3, #2
ADD v24.4s, v20.4s , v4.4s
rev64 v24.4s, v24.4s
NEG v16.4s, v6.4s
LD4 {v4.4h, v5.4h, v6.4h, v7.4h}, [x1], #32
swp v24.D[0], v24.D[1]
ADD v16.4s, v22.4s , v16.4s
CORE_LOOP:
uMULL v30.4s, v2.4h, v13.4h
MOV v25.16B, v24.16B
ST2 { v25.4s, v26.4s}, [x7], x8
uMULL v28.4s, v0.4h, v13.4h
uMULL v26.4s, v2.4h, v12.4h
MOV v15.16B, v14.16B
ST2 { v15.4s, v16.4s}, [x0], #32
uMULL v24.4s, v0.4h, v12.4h
ushR v30.4s, v30.4s, #16
ushR v28.4s, v28.4s, #16
ushR v26.4s, v26.4s, #16
ushR v24.4s, v24.4s, #16
sMLAL v30.4s, v3.4h, v13.4h
sMLAL v28.4s, v1.4h, v13.4h
sMLAL v26.4s, v3.4h, v12.4h
sMLAL v24.4s, v1.4h, v12.4h
uMULL v22.4s, v6.4h, v9.4h
uMULL v20.4s, v4.4h, v9.4h
ADD v28.4s, v28.4s , v26.4s
SUB v30.4s, v30.4s , v24.4s
NEG v28.4s, v28.4s
uMULL v18.4s, v6.4h, v8.4h
uMULL v16.4s, v4.4h, v8.4h
mov v26.8b, v30.8b
mov v27.D[0], v30.D[1]
ushR v22.4s, v22.4s, #16
mov v24.8b, v28.8b
mov v25.D[0], v28.D[1]
ushR v20.4s, v20.4s, #16
MOV v31.8b, v26.8b
UZP1 v26.4h, v31.4h, v27.4h
UZP2 v27.4h, v31.4h, v27.4h
ushR v18.4s, v18.4s, #16
MOV v31.8b, v24.8b
UZP1 v24.4h, v31.4h, v25.4h
UZP2 v25.4h, v31.4h, v25.4h
ushR v16.4s, v16.4s, #16
sMLAL v22.4s, v7.4h, v9.4h
sMLAL v20.4s, v5.4h, v9.4h
sMLAL v18.4s, v7.4h, v8.4h
sMLAL v16.4s, v5.4h, v8.4h
LD2 {v8.h, v9.h}[0], [x2], x6
uMULL v0.4s, v26.4h, v10.4h
LD2 {v8.h, v9.h}[1], [x2], x6
uMULL v2.4s, v24.4h, v10.4h
LD2 {v8.h, v9.h}[2], [x2], x6
ADD v22.4s, v22.4s , v16.4s
LD2 {v8.h, v9.h}[3], [x2], x6
SUB v20.4s, v18.4s , v20.4s
rev64 v12.4h, v8.4h
rev64 v13.4h, v9.4h
NEG v22.4s, v22.4s
mov v18.8b, v22.8b
mov v19.D[0], v22.D[1]
ushR v0.4s, v0.4s, #16
mov v16.8b, v20.8b
mov v17.D[0], v20.D[1]
ushR v2.4s, v2.4s, #16
MOV v31.8b, v18.8b
UZP1 v18.4h, v31.4h, v19.4h
UZP2 v19.4h, v31.4h, v19.4h
sMLAL v0.4s, v27.4h, v10.4h
MOV v31.8b, v16.8b
UZP1 v16.4h, v31.4h, v17.4h
UZP2 v17.4h, v31.4h, v17.4h
sMLAL v2.4s, v25.4h, v10.4h
uMULL v4.4s, v18.4h, v10.4h
uMULL v6.4s, v16.4h, v10.4h
NEG v0.4s, v0.4s
ADD v14.4s, v30.4s , v2.4s
ADD v26.4s, v28.4s , v0.4s
rev64 v14.4s, v14.4s
ushR v4.4s, v4.4s, #16
swp v14.D[0], v14.D[1]
ushR v6.4s, v6.4s, #16
sMLAL v4.4s, v19.4h, v10.4h
LD4 {v0.4h, v1.4h, v2.4h, v3.4h}, [x5], x8
sMLAL v6.4s, v17.4h, v10.4h
ADD v24.4s, v20.4s , v4.4s
rev64 v24.4s, v24.4s
NEG v16.4s, v6.4s
LD4 {v4.4h, v5.4h, v6.4h, v7.4h}, [x1], #32
swp v24.D[0], v24.D[1]
ADD v16.4s, v22.4s , v16.4s
SUBS x3, x3, #1
BNE CORE_LOOP
NEON_EPILOGUE:
uMULL v30.4s, v2.4h, v13.4h
MOV v25.16B, v24.16B
ST2 { v25.4s, v26.4s}, [x7], x8
uMULL v28.4s, v0.4h, v13.4h
uMULL v26.4s, v2.4h, v12.4h
MOV v15.16B, v14.16B
ST2 { v15.4s, v16.4s}, [x0], #32
uMULL v24.4s, v0.4h, v12.4h
ushR v30.4s, v30.4s, #16
ushR v28.4s, v28.4s, #16
ushR v26.4s, v26.4s, #16
ushR v24.4s, v24.4s, #16
sMLAL v30.4s, v3.4h, v13.4h
sMLAL v28.4s, v1.4h, v13.4h
sMLAL v26.4s, v3.4h, v12.4h
sMLAL v24.4s, v1.4h, v12.4h
uMULL v22.4s, v6.4h, v9.4h
uMULL v20.4s, v4.4h, v9.4h
ADD v28.4s, v28.4s , v26.4s
SUB v30.4s, v30.4s , v24.4s
NEG v28.4s, v28.4s
uMULL v18.4s, v6.4h, v8.4h
uMULL v16.4s, v4.4h, v8.4h
mov v26.8b, v30.8b
mov v27.D[0], v30.D[1]
ushR v22.4s, v22.4s, #16
mov v24.16b, v28.16b
mov v25.D[0], v28.D[1]
ushR v20.4s, v20.4s, #16
mov v31.8b, v26.8b
UZP1 v26.4h, v31.4h, v27.4h
UZP2 v27.4h, v31.4h, v27.4h
ushR v18.4s, v18.4s, #16
mov v31.8b, v24.8b
UZP1 v24.4h, v31.4h, v25.4h
UZP2 v25.4h, v31.4h, v25.4h
ushR v16.4s, v16.4s, #16
sMLAL v22.4s, v7.4h, v9.4h
sMLAL v20.4s, v5.4h, v9.4h
sMLAL v18.4s, v7.4h, v8.4h
sMLAL v16.4s, v5.4h, v8.4h
uMULL v0.4s, v26.4h, v10.4h
uMULL v2.4s, v24.4h, v10.4h
ADD v22.4s, v22.4s , v16.4s
SUB v20.4s, v18.4s , v20.4s
NEG v22.4s, v22.4s
mov v18.16b, v22.16b
ushR v0.4s, v0.4s, #16
mov v16.16b, v20.16b
ushR v2.4s, v2.4s, #16
mov v31.16b, v18.16b
mov v19.d[0], v31.d[1]
UZP1 v18.4h, v31.4h, v19.4h
UZP2 v19.4h, v31.4h, v19.4h
sMLAL v0.4s, v27.4h, v10.4h
mov v31.16b, v16.16b
mov v17.d[0], v31.d[1]
UZP1 v16.4h, v31.4h, v17.4h
UZP2 v17.4h, v31.4h, v17.4h
sMLAL v2.4s, v25.4h, v10.4h
uMULL v4.4s, v18.4h, v10.4h
uMULL v6.4s, v16.4h, v10.4h
NEG v0.4s, v0.4s
ADD v14.4s, v30.4s , v2.4s
ADD v26.4s, v28.4s , v0.4s
rev64 v14.4s, v14.4s
ushR v4.4s, v4.4s, #16
swp v14.D[0], v14.D[1]
ushR v6.4s, v6.4s, #16
sMLAL v4.4s, v19.4h, v10.4h
sMLAL v6.4s, v17.4h, v10.4h
ADD v24.4s, v20.4s , v4.4s
rev64 v24.4s, v24.4s
NEG v16.4s, v6.4s
swp v24.D[0], v24.D[1]
ADD v16.4s, v22.4s , v16.4s
MOV v25.16B, v24.16B
MOV v15.16B, v14.16B
ST2 { v15.4s, v16.4s}, [x0], #32
ST2 { v25.4s, v26.4s}, [x7], x8
LD4 {v0.4h, v1.4h, v2.4h, v3.4h}, [x5], x8
movi v6.2s, #0x00000000
movi v7.2s, #0x00000000
LD2 {v4.2s, v5.2s}, [x1], #16
LD2 {v6.s, v7.s}[0], [x1]
LD2 {v8.h, v9.h}[0], [x2], x6
LD2 {v8.h, v9.h}[1], [x2], x6
LD2 {v8.h, v9.h}[2], [x2], x6
LD2 {v8.h, v9.h}[3], [x2], x6
rev64 v12.8h, v8.8h
rev64 v13.8h, v9.8h
swp v5.D[0], v6.D[0]
MOV v30.8B, V4.8B
UZP1 v4.4h, v30.4h, v5.4h
UZP2 v5.4h, v30.4h, v5.4h
MOV v30.8B, V6.8B
UZP1 v6.4h, v30.4h, v7.4h
UZP2 v7.4h, v30.4h, v7.4h
uMULL v30.4s, v2.4h, v13.4h
uMULL v28.4s, v0.4h, v13.4h
uMULL v26.4s, v2.4h, v12.4h
uMULL v24.4s, v0.4h, v12.4h
ushR v30.4s, v30.4s, #16
ushR v28.4s, v28.4s, #16
ushR v26.4s, v26.4s, #16
ushR v24.4s, v24.4s, #16
sMLAL v30.4s, v3.4h, v13.4h
sMLAL v28.4s, v1.4h, v13.4h
sMLAL v26.4s, v3.4h, v12.4h
sMLAL v24.4s, v1.4h, v12.4h
uMULL v22.4s, v6.4h, v9.4h
uMULL v20.4s, v4.4h, v9.4h
ADD v28.4s, v28.4s , v26.4s
SUB v30.4s, v30.4s , v24.4s
NEG v28.4s, v28.4s
uMULL v18.4s, v6.4h, v8.4h
uMULL v16.4s, v4.4h, v8.4h
mov v26.8b, v30.8b
mov v27.D[0], v30.D[1]
ushR v22.4s, v22.4s, #16
mov v24.16b, v28.16b
mov v25.D[0], v28.D[1]
ushR v20.4s, v20.4s, #16
MOV v31.8B, V26.8B
UZP1 v26.4h, v31.4h, v27.4h
UZP2 v27.4h, v31.4h, v27.4h
ushr v18.4s, v18.4s, #16
MOV v31.8B, V24.8B
UZP1 v24.4h, v31.4h, v25.4h
UZP2 v25.4h, v31.4h, v25.4h
ushR v16.4s, v16.4s, #16
sMLAL v22.4s, v7.4h, v9.4h
sMLAL v20.4s, v5.4h, v9.4h
sMLAL v18.4s, v7.4h, v8.4h
sMLAL v16.4s, v5.4h, v8.4h
uMULL v0.4s, v26.4h, v10.4h
uMULL v2.4s, v24.4h, v10.4h
ADD v22.4s, v22.4s , v16.4s
SUB v20.4s, v18.4s , v20.4s
NEG v22.4s, v22.4s
mov v18.8B, v22.8B
mov v19.D[0], v22.D[1]
ushR v0.4s, v0.4s, #16
mov v16.16b, v20.16b
mov v17.D[0], v20.D[1]
ushR v2.4s, v2.4s, #16
MOV v31.8B, V18.8B
UZP1 v18.4h, v31.4h, v19.4h
UZP2 v19.4h, v31.4h, v19.4h
sMLAL v0.4s, v27.4h, v10.4h
MOV v31.8B, V16.8B
UZP1 v16.4h, v31.4h, v17.4h
UZP2 v17.4h, v31.4h, v17.4h
sMLAL v2.4s, v25.4h, v10.4h
uMULL v4.4s, v18.4h, v10.4h
uMULL v6.4s, v16.4h, v10.4h
NEG v0.4s, v0.4s
ADD v14.4s, v30.4s , v2.4s
ADD v26.4s, v28.4s , v0.4s
rev64 v14.4s, v14.4s
ushR v4.4s, v4.4s, #16
swp v14.D[0], v14.D[1]
ushR v6.4s, v6.4s, #16
sMLAL v4.4s, v19.4h, v10.4h
sMLAL v6.4s, v17.4h, v10.4h
ADD v24.4s, v20.4s , v4.4s
rev64 v24.4s, v24.4s
NEG v16.4s, v6.4s
swp v24.D[0], v24.D[1]
ADD v16.4s, v22.4s , v16.4s
MOV v15.16B, v14.16B
ST2 {v15.2s, v16.2s}, [x0], #16
ST2 {v15.s, v16.s}[2], [x0], #8
ST1 {v15.s}[3], [x0]
ADD x7, x7, #4
ST1 {v26.s}[0], [x7], #4
MOV v25.16B, v24.16B
ST2 {v25.s, v26.s}[1], [x7], #8
MOV v27.D[0], V26.d[1]
mov v26.d[0], v25.d[1]
ST2 {v26.2s, v27.2s}, [x7]
pop_v_regs
ret

View file

@ -0,0 +1,148 @@
//.include "ihevc_neon_macros.s"
.macro push_v_regs
stp x8, x9, [sp, #-16]!
stp x10, X11, [sp, #-16]!
stp X12, X13, [sp, #-16]!
stp X14, X15, [sp, #-16]!
stp X29, X30, [sp, #-16]!
.endm
.macro pop_v_regs
ldp X29, X30, [sp], #16
ldp X14, X15, [sp], #16
ldp X12, X13, [sp], #16
ldp X10, X11, [sp], #16
ldp X8, X9, [sp], #16
.endm
.text
.p2align 2
.global ixheaacd_postradixcompute4
ixheaacd_postradixcompute4:
// STMFD sp!, {x4-x12, x14}
push_v_regs
//SUB sp, sp, #16
//HARD CODED for FFT Length of 16
// x3 is always 16
//SUB x4, x3, #2 ; y to y offset calculated
//MOV x4, #14
//STR x4, [sp, #8] ; (npoints / 2)*4bytes - 4bytes
//STR x0, [sp, #12] ; (3*(npoints/2))*4bytes - 4bytes
// x0 to x2 offset (npoints / 2)*4bytes
ADD x4, x1, x3, lsl #1 // x1 -> x0, x4 -> x2
MOV x3, #2
POSTRADIX4_START:
// LDMIA x1!, {x5-x12} // x_0 :x_7
LDP w5, w6, [x1], #8 // x_0 :x_1
LDP w7, w8, [x1], #8 // x_2 :x_3
LDP w9, w10, [x1], #8 // x_4 :x_5
LDP w11, w12, [x1], #8 // x_6 :x_7
ADD w14, w5, w9 // xh0_0 = x_0 + x_4
SUB w5, w5, w9 // xl0_0 = x_0 - x_4
ADD w9, w6, w10 // xh1_0 = x_1 + x_5
SUB w6, w6, w10 // xl1_0 = x_1 - x_5
ADD w10, w7, w11 // xh0_1 = x_2 + x_6
SUB w7, w7, w11 // xl0_1 = x_2 - x_6
ADD w11, w8, w12 // xh1_1 = x_3 + x_7
SUB w8, w8, w12 // xl1_1 = x_3 - x_7
ADD w12, w14, w10 // n00 = xh0_0 + xh0_1
SUB w14, w14, w10 // n20 = xh0_0 - xh0_1
ADD w10, w9, w11 // n01 = xh1_0 + xh1_1
SUB w9, w9, w11 // n21 = xh1_0 - xh1_1
ADD w11, w5, w8 // n10 = xl0_0 + xl1_1
SUB w5, w5, w8 // n30 = xl0_0 - xl1_1
ADD w8, w6, w7 // n31 = xl1_0 + xl0_1
SUB w6, w6, w7 // n11 = xl1_0 - xl0_1
STR w12, [x0], #4 // y0[h2] = n00, x7 -> y0[h2 + 1]
STR w10, [x0], #14<<1 // y0[h2 + 1] = n01, x7 -> y1[h2]
STR w11, [x0], #4 // y1[h2] = n10, x7 -> y1[h2 + 1]
STR w6 , [x0], #14<<1 // y1[h2 + 1] = n11, x7 -> y2[h2]
STR w14, [x0], #4 // y2[h2] = n20, x7 -> y2[h2 + 1]
STR w9 , [x0], #14<<1 // y2[h2 + 1] = n21, x7 -> y3[h2]
STR w5, [x0], #4 // y3[h2] = n30, x7 -> y3[h2 + 1]
STR w8, [x0], #0 // y3[h2 + 1] = n31, x7 -> y0[h2+2]
// LDMIA x4!, {x5-x12} // x_0 :x_7
LDP w5, w6, [x4], #8 // x_8 :x_8
LDP w7, w8, [x4], #8 // x_a :x_b
LDP w9, w10, [x4], #8 // x_c :x_d
LDP w11, w12, [x4], #8 // x_e :x_f
SUB x0, x0, #92 // #4*3 + #14<<1 * 3 - 8
ADD w14, w5, w9
SUB w5, w5, w9
ADD w9, w6, w10
SUB w6, w6, w10
ADD w10, w7, w11
SUB w7, w7, w11
ADD w11, w8, w12
SUB w8, w8, w12
ADD w12, w14, w10
SUB w14, w14, w10
ADD w10, w9, w11
SUB w9, w9, w11
ADD w11, w5, w8
SUB w5, w5, w8
ADD w8, w6, w7
SUB w6, w6, w7
STR w12, [x0], #4
STR w10, [x0], #14<<1
STR w11, [x0], #4
STR w6, [x0], #14<<1
STR w14, [x0], #4
STR w9, [x0], #14<<1
STR w5, [x0], #4
STR w8, [x0], #0
ADD x1, x1, #1 << 5 // x0 += (Word32) npoints >> 1
ADD x4, x4, #1 << 5 // x2 += (Word32) npoints >> 1
SUB x0, x0, #100-8
SUBS w3, w3, #1
BGT POSTRADIX4_START
// LDMFD sp!, {x4-x12, x15}
pop_v_regs
ret

View file

@ -0,0 +1,512 @@
///******************************************************************************
// *
// * Copyright (C) 2018 The Android Open Source Project
// *
// * Licensed under the Apache License, Version 2.0 (the "License");
// * you may not use this file except in compliance with the License.
// * You may obtain a copy of the License at:
// *
// * http://www.apache.org/licenses/LICENSE-2.0
// *
// * Unless required by applicable law or agreed to in writing, software
// * distributed under the License is distributed on an "AS IS" BASIS,
// * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// * See the License for the specific language governing permissions and
// * limitations under the License.
// *
// *****************************************************************************
// * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
//*/
.macro push_v_regs
stp q8, q9, [sp, #-32]!
stp q10, q11, [sp, #-32]!
stp q12, q13, [sp, #-32]!
stp q14, q15, [sp, #-32]!
stp X8, X9, [sp, #-16]!
stp X10, X11, [sp, #-16]!
stp X12, X13, [sp, #-16]!
stp X22, X23, [sp, #-16]!
stp X16, X17, [sp, #-16]!
stp X20, X21, [sp, #-16]!
.endm
.macro pop_v_regs
ldp X20, X21, [sp], #16
ldp X16, X17, [sp], #16
ldp X22, X23, [sp], #16
ldp X12, X13, [sp], #16
ldp X10, X11, [sp], #16
ldp X8, X9, [sp], #16
ldp q14, q15, [sp], #32
ldp q12, q13, [sp], #32
ldp q10, q11, [sp], #32
ldp q8, q9, [sp], #32
.endm
.macro swp reg1, reg2
MOV X16, \reg1
MOV \reg1, \reg2
MOV \reg2, x16
.endm
.text
.global ixheaacd_pretwiddle_compute_armv8
ixheaacd_pretwiddle_compute_armv8:
push_v_regs
LSL x7, x4, #4
ADD x7, x2, x7
SUB x7, x7, #4
LDR x22, =7500
ADD x3, x3, x22
MVN w5, w5
ADD w5, w5, #1
ARM_PROLOGUE:
LDRH w21, [x3]
LDRH w22, [x3, #2]
LSL w22, w22, #16
LSL w21, w21, #16
LDR w8, [x3], #4
LDR w9, [x0], #4
SMULL X12, w9, w21
ASR X12, x12, #32
LDR w10, [x1], #-4
SMULL X11, w9, w22
ASR X11, x11, #32
SMULL X23, w10, w22
ASR X23, x23, #32
ADD w9, w12, w23
SMULL X6, w10, w21
ASR X6, x6, #32
MVN w9, w9
ADD w9, w9, #1
SUB w11, w11, w6
CMP w5, #0
BGT NEXT
MVN w8, w5
ADD w8, w8, #1
ASR w11, w11, w8
ASR w9, w9, w8
B NEXT1
NEXT:
LSL w11, w11, w5
LSL w9, w9, w5
NEXT1:
STR w9, [x2], #4
STR w11, [x2], #4
CMP X4, #0x100
BNE NXT
MOV X6, #4
B NXT1
NXT:
MOV X6, #32
ADD X3, X3, #28
NXT1:
SUB X4, X4, #1
ASR X4, X4, #2
SUB x7, x7, #28
NEON_PROLOGUE:
MOV x8, #-32
dup v14.4s, w5
SUB X1, X1, #28
LD2 {v8.h, v9.h}[0], [x3], x6
LD2 {v8.h, v9.h}[1], [x3], x6
LD2 {v8.h, v9.h}[2], [x3], x6
LD2 {v8.h, v9.h}[3], [x3], x6
rev64 v10.4h, v8.4h
rev64 v11.4h, v9.4h
LD4 {v0.4h, v1.4h, v2.4h, v3.4h}, [x0], #32
LD4 {v4.4h, v5.4h, v6.4h, v7.4h}, [x1], x8
rev64 v0.4h, v0.4h
rev64 v1.4h, v1.4h
rev64 v4.4h, v4.4h
rev64 v5.4h, v5.4h
uMULL v30.4s, v2.4h, v9.4h
uMULL v28.4s, v4.4h, v9.4h
uMULL v26.4s, v2.4h, v8.4h
uMULL v24.4s, v4.4h, v8.4h
ushR v30.4s, v30.4s, #16
ushR v28.4s, v28.4s, #16
ushR v26.4s, v26.4s, #16
ushR v24.4s, v24.4s, #16
sMLAL v30.4s, v3.4h, v9.4h
sMLAL v28.4s, v5.4h, v9.4h
sMLAL v26.4s, v3.4h, v8.4h
sMLAL v24.4s, v5.4h, v8.4h
ADD v28.4s, v26.4s , v28.4s
NEG v28.4s, v28.4s
SUB v30.4s, v30.4s , v24.4s
uMULL v22.4s, v0.4h, v11.4h
uMULL v20.4s, v6.4h, v11.4h
uMULL v18.4s, v0.4h, v10.4h
uMULL v16.4s, v6.4h, v10.4h
ushR v22.4s, v22.4s, #16
ushR v20.4s, v20.4s, #16
ushR v18.4s, v18.4s, #16
ushR v16.4s, v16.4s, #16
sMLAL v22.4s, v1.4h, v11.4h
LD2 {v8.h, v9.h}[0], [x3], x6
sMLAL v20.4s, v7.4h, v11.4h
LD2 {v8.h, v9.h}[1], [x3], x6
sMLAL v18.4s, v1.4h, v10.4h
LD2 {v8.h, v9.h}[2], [x3], x6
sMLAL v16.4s, v7.4h, v10.4h
LD2 {v8.h, v9.h}[3], [x3], x6
ADD v20.4s, v20.4s , v18.4s
NEG v20.4s, v20.4s
rev64 v10.4h, v8.4h
rev64 v11.4h, v9.4h
SUB v22.4s, v16.4s , v22.4s
LD4 {v0.4h, v1.4h, v2.4h, v3.4h}, [x0], #32
sshL v20.4s, v20.4s, v14.4s
LD4 {v4.4h, v5.4h, v6.4h, v7.4h}, [x1], x8
rev64 v0.4h, v0.4h
rev64 v1.4h, v1.4h
sshL v22.4s, v22.4s, v14.4s
rev64 v4.4h, v4.4h
rev64 v5.4h, v5.4h
sshL v18.4s, v30.4s, v14.4s
sshL v16.4s, v28.4s, v14.4s
SUB X4, X4, #2
CORE_LOOP:
uMULL v30.4s, v2.4h, v9.4h
MOV v17.16B, v18.16B
ST2 { v16.4s, v17.4s}, [x2]
ADD x2, x2, #32
uMULL v28.4s, v4.4h, v9.4h
uMULL v26.4s, v2.4h, v8.4h
MOV v21.16B, v22.16B
ST2 { v20.4s, v21.4s}, [x7], x8
uMULL v24.4s, v4.4h, v8.4h
ushR v30.4s, v30.4s, #16
ushR v28.4s, v28.4s, #16
ushR v26.4s, v26.4s, #16
ushR v24.4s, v24.4s, #16
sMLAL v30.4s, v3.4h, v9.4h
sMLAL v28.4s, v5.4h, v9.4h
sMLAL v26.4s, v3.4h, v8.4h
sMLAL v24.4s, v5.4h, v8.4h
ADD v28.4s, v26.4s , v28.4s
NEG v28.4s, v28.4s
SUB v30.4s, v30.4s , v24.4s
uMULL v22.4s, v0.4h, v11.4h
LD2 {v8.h, v9.h}[0], [x3], x6
uMULL v20.4s, v6.4h, v11.4h
uMULL v18.4s, v0.4h, v10.4h
LD2 {v8.h, v9.h}[1], [x3], x6
uMULL v16.4s, v6.4h, v10.4h
ushR v22.4s, v22.4s, #16
LD2 {v8.h, v9.h}[2], [x3], x6
ushR v20.4s, v20.4s, #16
ushR v18.4s, v18.4s, #16
LD2 {v8.h, v9.h}[3], [x3], x6
ushR v16.4s, v16.4s, #16
sMLAL v22.4s, v1.4h, v11.4h
sMLAL v20.4s, v7.4h, v11.4h
sMLAL v18.4s, v1.4h, v10.4h
sMLAL v16.4s, v7.4h, v10.4h
LD4 {v0.4h, v1.4h, v2.4h, v3.4h}, [x0], #32
ADD v20.4s, v20.4s , v18.4s
NEG v20.4s, v20.4s
rev64 v10.4h, v8.4h
rev64 v11.4h, v9.4h
SUB v22.4s, v16.4s , v22.4s
LD4 {v4.4h, v5.4h, v6.4h, v7.4h}, [x1], x8
sshL v20.4s, v20.4s, v14.4s
sshL v22.4s, v22.4s, v14.4s
rev64 v0.4h, v0.4h
rev64 v1.4h, v1.4h
sshL v18.4s, v30.4s, v14.4s
rev64 v4.4h, v4.4h
rev64 v5.4h, v5.4h
sshL v16.4s, v28.4s, v14.4s
SUBS x4, x4, #1
BNE CORE_LOOP
NEON_EPILOGUE:
uMULL v30.4s, v2.4h, v9.4h
MOV v17.16B, v18.16B
ST2 { v16.4s, v17.4s}, [x2]
ADD x2, x2, #32
uMULL v28.4s, v4.4h, v9.4h
uMULL v26.4s, v2.4h, v8.4h
MOV v21.16B, v22.16B
ST2 { v20.4s, v21.4s}, [x7], x8
uMULL v24.4s, v4.4h, v8.4h
ushR v30.4s, v30.4s, #16
ushR v28.4s, v28.4s, #16
ushR v26.4s, v26.4s, #16
ushR v24.4s, v24.4s, #16
sMLAL v30.4s, v3.4h, v9.4h
sMLAL v28.4s, v5.4h, v9.4h
sMLAL v26.4s, v3.4h, v8.4h
sMLAL v24.4s, v5.4h, v8.4h
ADD v28.4s, v26.4s , v28.4s
NEG v28.4s, v28.4s
SUB v30.4s, v30.4s , v24.4s
uMULL v22.4s, v0.4h, v11.4h
uMULL v20.4s, v6.4h, v11.4h
uMULL v18.4s, v0.4h, v10.4h
uMULL v16.4s, v6.4h, v10.4h
ushR v22.4s, v22.4s, #16
ushR v20.4s, v20.4s, #16
ushR v18.4s, v18.4s, #16
ushR v16.4s, v16.4s, #16
sMLAL v22.4s, v1.4h, v11.4h
sMLAL v20.4s, v7.4h, v11.4h
sMLAL v18.4s, v1.4h, v10.4h
sMLAL v16.4s, v7.4h, v10.4h
ADD v20.4s, v20.4s , v18.4s
NEG v20.4s, v20.4s
SUB v22.4s, v16.4s , v22.4s
sshL v20.4s, v20.4s, v14.4s
sshL v22.4s, v22.4s, v14.4s
sshL v18.4s, v30.4s, v14.4s
sshL v16.4s, v28.4s, v14.4s
MOV v17.16B, v18.16B
ST2 { v16.4s, v17.4s}, [x2]
ADD x2, x2, #32
MOV v21.16B, v22.16B
ST2 { v20.4s, v21.4s}, [x7], x8
RESIDUE_NEON:
MOV x10, #-16
movi v3.2s, #0x00000000
movi v4.2s, #0x00000000
LD2 {v21.2s, v22.2s}, [x0], #16
MOV v0.8B, v21.8B
MOV v2.8B, v22.8B
LD1 {v1.s}[0], [x0], #4;
LD1 {v3.s}[0], [x0], #4;
LD1 {v1.s}[1], [x0]
MOV v21.8B, v0.8B
UZP1 v0.4h, v21.4h, v1.4h
UZP2 v1.4h, v21.4h, v1.4h
MOV v21.8B, v2.8B
UZP1 v2.4h, v21.4h, v3.4h
UZP2 v3.4h, v21.4h, v3.4h
ADD x1, x1, #4
LD1 {v6.s}[0], [x1], #4
LD1 {v4.s}[1], [x1], #4
LD1 {v6.s}[1], [x1], #4
LD2 {v21.2s, v22.2s}, [x1], #16
MOV v5.8B, v21.8B
MOV v7.8B, v22.8B
MOV v21.8B, v4.8B
UZP1 v4.4h, v21.4h, v5.4h
UZP2 v5.4h, v21.4h, v5.4h
MOV v21.8B, v6.8B
UZP1 v6.4h, v21.4h, v7.4h
UZP2 v7.4h, v21.4h, v7.4h
rev64 v0.4h, v0.4h
rev64 v1.4h, v1.4h
rev64 v4.4h, v4.4h
rev64 v5.4h, v5.4h
LD2 {v8.h, v9.h}[0], [x3], x6
LD2 {v8.h, v9.h}[1], [x3], x6
LD2 {v8.h, v9.h}[2], [x3], x6
LD2 {v8.h, v9.h}[3], [x3], x6
rev64 v10.4h, v8.4h
rev64 v11.4h, v9.4h
uMULL v30.4s, v2.4h, v9.4h
uMULL v28.4s, v4.4h, v9.4h
uMULL v26.4s, v2.4h, v8.4h
uMULL v24.4s, v4.4h, v8.4h
ushR v30.4s, v30.4s, #16
ushR v28.4s, v28.4s, #16
ushR v26.4s, v26.4s, #16
ushR v24.4s, v24.4s, #16
sMLAL v30.4s, v3.4h, v9.4h
sMLAL v28.4s, v5.4h, v9.4h
sMLAL v26.4s, v3.4h, v8.4h
sMLAL v24.4s, v5.4h, v8.4h
ADD v28.4s, v26.4s , v28.4s
NEG v28.4s, v28.4s
SUB v30.4s, v30.4s , v24.4s
uMULL v22.4s, v0.4h, v11.4h
uMULL v20.4s, v6.4h, v11.4h
uMULL v18.4s, v0.4h, v10.4h
uMULL v16.4s, v6.4h, v10.4h
ushR v22.4s, v22.4s, #16
ushR v20.4s, v20.4s, #16
ushR v18.4s, v18.4s, #16
ushR v16.4s, v16.4s, #16
sMLAL v22.4s, v1.4h, v11.4h
sMLAL v20.4s, v7.4h, v11.4h
sMLAL v18.4s, v1.4h, v10.4h
sMLAL v16.4s, v7.4h, v10.4h
ADD v20.4s, v20.4s , v18.4s
NEG v20.4s, v20.4s
SUB v22.4s, v16.4s , v22.4s
sshL v20.4s, v20.4s, v14.4s
sshL v22.4s, v22.4s, v14.4s
sshL v18.4s, v30.4s, v14.4s
sshL v16.4s, v28.4s, v14.4s
MOV v21.16B, v22.16B
ST2 { v20.4s, v21.4s}, [x7]
mov v17.16B, v18.16B
ST2 {v16.2s, v17.2s}, [x2]
ADD x2, x2, #16
ST2 {v16.s, v17.s}[2], [x2]
ADD x2, x2, #8
END1:
pop_v_regs
ret

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,777 @@
///******************************************************************************
// *
// * Copyright (C) 2018 The Android Open Source Project
// *
// * Licensed under the Apache License, Version 2.0 (the "License");
// * you may not use this file except in compliance with the License.
// * You may obtain a copy of the License at:
// *
// * http://www.apache.org/licenses/LICENSE-2.0
// *
// * Unless required by applicable law or agreed to in writing, software
// * distributed under the License is distributed on an "AS IS" BASIS,
// * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// * See the License for the specific language governing permissions and
// * limitations under the License.
// *
// *****************************************************************************
// * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
//*/
.macro push_v_regs
stp d8, d9, [sp, #-16]!
stp d10, d11, [sp, #-16]!
stp d12, d13, [sp, #-16]!
stp d14, d15, [sp, #-16]!
stp X8, X9, [sp, #-16]!
stp X10, X11, [sp, #-16]!
stp X12, X13, [sp, #-16]!
stp X14, X15, [sp, #-16]!
stp X16, X17, [sp, #-16]!
stp X29, X30, [sp, #-16]!
.endm
.macro pop_v_regs
ldp X29, X30, [sp], #16
ldp X16, X17, [sp], #16
ldp X14, X15, [sp], #16
ldp X12, X13, [sp], #16
ldp X10, X11, [sp], #16
ldp X8, X9, [sp], #16
ldp d14, d15, [sp], #16
ldp d12, d13, [sp], #16
ldp d10, d11, [sp], #16
ldp d8, d9, [sp], #16
.endm
.macro swp reg1, reg2
MOV x16, \reg1
MOV \reg1, \reg2
MOV \reg2, x16
.endm
.text
.p2align 2
.global ixheaacd_sbr_imdct_using_fft
ixheaacd_sbr_imdct_using_fft:
push_v_regs
COND_6: cmp x1, #0x10
bne COND_7
MOV X8, #1
MOV X4, X7
B RADIX_4_FIRST_START
COND_7: cmp x1, #0x20
mov x8, #1
mov x4, x7
RADIX_8_FIRST_START:
LSR W9 , W1, #5
LSL W1, W1, #1
RADIX_8_FIRST_LOOP:
MOV X5 , X2
MOV X6 , X2
MOV X7 , X2
MOV X11 , X2
LDRB W12, [X4]
ADD X5, X5, X12, LSL #3
LD2 {V0.S, V1.S}[0], [X5], X1
ADD X5, X5, X1
LD2 {V4.S, V5.S}[0], [X5], X1
SUB X5, X5, X1, LSL #1
LD2 {V2.S, V3.S}[0], [X5], X1
ADD X5, X5, X1
LD2 {V6.S, V7.S}[0], [X5], X1
SUB X5, X5, X1, LSL #2
LDRB W12, [X4, #1]
ADD X6, X6, X12, LSL #3
LD2 {V0.S, V1.S}[1], [X6] , X1
ADD X6, X6, X1
LD2 {V4.S, V5.S}[1], [X6] , X1
SUB X6, X6, X1, LSL #1
LD2 {V2.S, V3.S}[1], [X6] , X1
ADD X6, X6, X1
LD2 {V6.S, V7.S}[1], [X6], X1
SUB X6, X6, X1, LSL #2
LDRB W12, [X4, #2]
ADD X7, X7, X12, LSL #3
LD2 {V0.S, V1.S}[2], [X7] , X1
ADD X7, X7, X1
LD2 {V4.S, V5.S}[2], [X7] , X1
SUB X7, X7, X1, LSL #1
LDRB W12, [X4, #3]
ADD X11, X11, X12, LSL #3
LD2 {V0.S, V1.S}[3], [X11] , X1
ADD X11, X11, X1
LD2 {V4.S, V5.S}[3], [X11] , X1
SUB X11, X11, X1, LSL #1
ADD V8.4S, V0.4S, V4.4S
LD2 {V2.S, V3.S}[2], [X7] , X1
ADD X7, X7, X1
SUB V9.4S, V0.4S, V4.4S
LD2 {V6.S, V7.S}[2], [X7], X1
SUB X7, X7, X1, LSL #2
ADD V0.4S, V1.4S, V5.4S
LD2 {V2.S, V3.S}[3], [X11] , X1
ADD X11, X11, X1
SUB V4.4S, V1.4S, V5.4S
LD2 {V6.S, V7.S}[3], [X11], X1
SUB X11, X11, X1, LSL #2
ADD X4, X4, #4
ADD X5, X5, X1, LSR #1
ADD X6, X6, X1, LSR #1
ADD X7, X7, X1, LSR #1
ADD X11, X11, X1, LSR #1
ADD V1.4S, V2.4S, V6.4S
LD2 {V14.S, V15.S}[0], [X5] , X1
SUB V5.4S, V2.4S, V6.4S
LD2 {V10.S, V11.S}[0], [X5] , X1
ADD V2.4S, V3.4S, V7.4S
LD2 {V12.S, V13.S}[0], [X5] , X1
SUB V6.4S, V3.4S, V7.4S
LD2 {V14.S, V15.S}[1], [X6] , X1
ADD V3.4S, V9.4S, V6.4S
LD2 {V10.S, V11.S}[1], [X6] , X1
SUB V7.4S, V9.4S, V6.4S
LD2 {V12.S, V13.S}[1], [X6] , X1
SUB V6.4S, V4.4S, V5.4S
LD2 {V14.S, V15.S}[2], [X7] , X1
ADD V9.4S, V4.4S, V5.4S
LD2 {V10.S, V11.S}[2], [X7] , X1
ADD V4.4S, V8.4S, V1.4S
LD2 {V12.S, V13.S}[2], [X7] , X1
SUB V5.4S, V8.4S, V1.4S
LD2 {V14.S, V15.S}[3], [X11] , X1
ADD V8.4S, V0.4S, V2.4S
LD2 {V10.S, V11.S}[3], [X11] , X1
SUB V0.4S, V0.4S, V2.4S
LD2 {V12.S, V13.S}[3], [X11] , X1
LD2 {V1.S, V2.S}[0], [X5], X1
ADD V17.4S, V14.4S, V12.4S
LD2 {V1.S, V2.S}[1], [X6] , X1
SUB V16.4S, V14.4S, V12.4S
LD2 {V1.S, V2.S}[2], [X7] , X1
ADD V14.4S, V15.4S, V13.4S
LD2 {V1.S, V2.S}[3], [X11] , X1
SUB V12.4S, V15.4S, V13.4S
ADD V15.4S, V10.4S, V1.4S
SUB V13.4S, V10.4S, V1.4S
ADD V10.4S, V11.4S, V2.4S
SUB V1.4S, V11.4S, V2.4S
ADD V11.4S, V17.4S, V15.4S
SUB V2.4S, V17.4S, V15.4S
ADD V17.4S, V14.4S, V10.4S
SUB V15.4S, V14.4S, V10.4S
ADD V14.4S, V16.4S, V12.4S
SUB V10.4S, V16.4S, V12.4S
ADD V16.4S, V13.4S, V1.4S
SUB V12.4S, V13.4S, V1.4S
ADD V1.4S , V14.4S, V12.4S
SUB V13.4S, V14.4S, V12.4S
SUB V12.4S, V16.4S, V10.4S
UZP1 V22.8H, V1.8H, V1.8H
UZP2 V23.8H, V1.8H, V1.8H
ADD V14.4S, V16.4S, V10.4S
UZP1 V26.8H, V13.8H, V13.8H
UZP2 V27.8H, V13.8H, V13.8H
ADD V16.4S, V4.4S, V11.4S
UZP1 V24.8H, V12.8H, V12.8H
UZP2 V25.8H, V12.8H, V12.8H
SUB V10.4S, V4.4S, V11.4S
UZP1 V28.8H, V14.8H, V14.8H
UZP2 V29.8H, V14.8H, V14.8H
ADD V4.4S, V8.4S, V17.4S
MOV W14, #0x5a82
SUB V11.4S, V8.4S, V17.4S
ADD V8.4S, V5.4S, V15.4S
SUB V17.4S, V5.4S, V15.4S
SUB V5.4S, V0.4S, V2.4S
ADD V15.4S, V0.4S, V2.4S
DUP V31.4H, W14
UMULL V19.4S, V26.4H, V31.4H
UMULL V18.4S, V28.4H, V31.4H
SSHR V19.4S, V19.4S, #15
SSHR V18.4S, V18.4S, #15
SQDMLAL V19.4S, V27.4H, V31.4H
SQDMLAL V18.4S, V29.4H, V31.4H
UMULL V13.4S, V24.4H, V31.4H
UMULL V14.4S, V22.4H, V31.4H
ADD V20.4S, V3.4S, V19.4S
SUB V21.4S, V3.4S, V19.4S
ADD V30.4S, V6.4S, V18.4S
SUB V6.4S, V6.4S, V18.4S
SSHR V13.4S, V13.4S, #15
SSHR V14.4S, V14.4S, #15
SQDMLAL V13.4S, V25.4H, V31.4H
SQDMLAL V14.4S, V23.4H, V31.4H
ADD V3.4S, V7.4S, V13.4S
SUB V19.4S, V7.4S, V13.4S
ADD V1.4S, V9.4S, V14.4S
SUB V18.4S, V9.4S, V14.4S
swp V17.D[0], V8.D[0]
swp V17.D[1], V8.D[1]
swp V4.D[0], V16.D[0]
swp V4.D[1], V16.D[1]
TRN1 V12.4S, V4.4S, V20.4S
TRN2 V22.4S, V4.4S, V20.4S
SHL V12.4S, V12.4S, #1
TRN1 V9.4S, V17.4S, V3.4S
TRN2 V2.4S, V17.4S, V3.4S
SHL V22.4S, V22.4S, #1
SHL V9.4S, V9.4S, #1
TRN1 V24.4S, V10.4S, V21.4S
TRN2 V7.4S, V10.4S, V21.4S
SHL V2.4S, V2.4S, #1
SHL V24.4S, V24.4S, #1
TRN1 V13.4S, V16.4S, V6.4S
TRN2 V23.4S, V16.4S, V6.4S
SHL V7.4S, V7.4S, #1
SHL V13.4S, V13.4S, #1
TRN1 V10.4S, V5.4S, V18.4S
TRN2 V3.4S, V5.4S, V18.4S
SHL V23.4S, V23.4S, #1
SHL V10.4S, V10.4S, #1
TRN1 V26.4S, V8.4S, V19.4S
TRN2 V4.4S, V8.4S, V19.4S
SHL V3.4S, V3.4S, #1
SHL V26.4S, V26.4S, #1
TRN1 V25.4S, V11.4S, V30.4S
TRN2 V8.4S, V11.4S, V30.4S
SHL V4.4S, V4.4S, #1
SHL V25.4S, V25.4S, #1
TRN1 V27.4S, V15.4S, V1.4S
TRN2 V5.4S, V15.4S, V1.4S
SHL V8.4S, V8.4S, #1
SHL V27.4S, V27.4S, #1
swp V9.D[0], V12.D[1]
SHL V5.4S, V5.4S, #1
swp V2.D[0], V22.D[1]
swp V24.D[1], V26.D[0]
swp V7.D[1], V4.D[0]
swp V10.D[0], V13.D[1]
swp V3.D[0], V23.D[1]
swp V27.D[0], V25.D[1]
swp V5.D[0], V8.D[1]
MOV X15, #32
ST2 {V12.4S, V13.4S}, [X3], X15
ST2 {V24.4S, V25.4S}, [X3], X15
ST2 {V22.4S, V23.4S}, [X3], X15
ST2 {V7.4S, V8.4S}, [X3], X15
ST2 {V9.4S, V10.4S}, [X3], X15
ST2 {V26.4S, V27.4S}, [X3], X15
ST2 {V2.4S, V3.4S}, [X3], X15
ST2 {V4.4S, V5.4S}, [X3], X15
SUBS X9, X9, #1
BNE RADIX_8_FIRST_LOOP
LSR X1, X1, #1
LSL X15, X1, #3
SUB X3, X3, X15
MOV X5, #8
MOV X4, #32
LSR X15, X1, #5
MOV X6, X15
B RADIX_4_FIRST_ENDS
RADIX_8_FIRST_ENDS:
RADIX_4_FIRST_START:
LSR W9, W1, #4
LSL W1, W1, #1
RADIX_4_LOOP:
MOV X5 , X2
MOV X6 , X2
MOV X7 , X2
MOV X11 , X2
LDRB W12, [X4, #0]
ADD X5, X5, X12, LSL #3
LD2 {V0.S, V1.S}[0], [X5] , X1
ADD X5, X5, X1
LD2 {V8.S, V9.S}[0], [X5] , X1
SUB X5, X5, X1, LSL #1
LD2 {V4.S, V5.S}[0], [X5] , X1
ADD X5, X5, X1
LD2 {V12.S, V13.S}[0], [X5] , X1
LDRB W12, [X4, #1]
ADD X6, X6, X12, LSL #3
LD2 {V0.S, V1.S}[1], [X6] , X1
ADD X6, X6, X1
LD2 {V8.S, V9.S}[1], [X6] , X1
SUB X6, X6, X1, LSL #1
LD2 {V4.S, V5.S}[1], [X6] , X1
ADD X6, X6, X1
LD2 {V12.S, V13.S}[1], [X6] , X1
LDRB W12, [X4, #2]
ADD X7, X7, X12, LSL #3
LD2 {V0.S, V1.S}[2], [X7] , X1
ADD X7, X7, X1
LD2 {V8.S, V9.S}[2], [X7] , X1
LDRB W12, [X4, #3]
ADD X11, X11, X12 , LSL #3
LD2 {V0.S, V1.S}[3], [X11] , X1
ADD X11, X11, X1
LD2 {V8.S, V9.S}[3], [X11] , X1
SUB X7, X7, X1, LSL #1
ADD V16.4S, V0.4S, V8.4S
LD2 {V4.S, V5.S}[2], [X7] , X1
ADD X7, X7, X1
ADD V18.4S, V1.4S, V9.4S
LD2 {V12.S, V13.S}[2], [X7] , X1
SUB X11, X11, X1, LSL #1
SUB V20.4S, V0.4S, V8.4S
LD2 {V4.S, V5.S}[3], [X11] , X1
ADD X11, X11, X1
SUB V22.4S, V1.4S, V9.4S
LD2 {V12.S, V13.S}[3], [X11] , X1
ADD X4, X4, #4
ADD V24.4S, V4.4S, V12.4S
ADD V26.4S, V5.4S, V13.4S
SUB V28.4S, V4.4S, V12.4S
SUB V30.4S, V5.4S, V13.4S
ADD V17.4S, V16.4S, V24.4S
ADD V11.4S, V18.4S, V26.4S
SUB V19.4S, V16.4S, V24.4S
SUB V15.4S, V18.4S, V26.4S
ADD V8.4S, V20.4S, V30.4S
SUB V9.4S, V22.4S, V28.4S
ADD V13.4S, V22.4S, V28.4S
SUB V12.4S, V20.4S, V30.4S
TRN1 V0.4S, V17.4S, V8.4S
TRN2 V8.4S, V17.4S, V8.4S
SHL V0.4S, V0.4S, #1
TRN1 V4.4S, V19.4S, V12.4S
TRN2 V12.4S, V19.4S, V12.4S
SHL V8.4S, V8.4S, #1
SHL V4.4S, V4.4S, #1
TRN1 V1.4S, V11.4S, V9.4S
TRN2 V9.4S, V11.4S, V9.4S
SHL V12.4S, V12.4S, #1
SHL V1.4S, V1.4S, #1
TRN1 V5.4S, V15.4S, V13.4S
TRN2 V13.4S, V15.4S, V13.4S
SHL V9.4S, V9.4S, #1
SHL V5.4S, V5.4S, #1
swp V4.D[0], V0.D[1]
SHL V13.4S, V13.4S, #1
swp V12.D[0], V8.D[1]
swp V5.D[0], V1.D[1]
swp V13.D[0], V9.D[1]
MOV X15, #32
ST2 {V0.4S, V1.4S}, [X3], X15
ST2 {V8.4S, V9.4S}, [X3], X15
ST2 {V4.4S, V5.4S}, [X3], X15
ST2 {V12.4S, V13.4S}, [X3], X15
SUBS W9, W9, #1
BNE RADIX_4_LOOP
LSR X1, X1, #1
SUB X3, X3, X1, LSL #3
MOV X5, #4
MOV X4, #64
LSR X6, X1, #4
RADIX_4_FIRST_ENDS:
MOV x30, X3
LSR X5, X5, #2
OUTER_LOOP_R4:
MOV X14, x30
MOV X7, X5
MOV X2, #0
MOV X9, X0
LSL X12, X5, #5
MIDDLE_LOOP_R4:
LD2 {V20.H, V21.H}[0], [X9], X2
LD2 {V22.H, V23.H}[0], [X9], X2
ADD X11, X2, X4, LSL #2
LD2 {V24.H, V25.H}[0], [X9]
ADD X10, X0, X11
LD2 {V20.H, V21.H}[1], [X10], X11
LD2 {V22.H, V23.H}[1], [X10], X11
ADD X2, X11, X4, LSL #2
LD2 {V24.H, V25.H}[1], [X10]
ADD X9, X0, X2
LD2 {V20.H, V21.H}[2], [X9], X2
LD2 {V22.H, V23.H}[2], [X9], X2
ADD X11, X2, X4, LSL #2
LD2 {V24.H, V25.H}[2], [X9]
ADD X10, X0, X11
LD2 {V20.H, V21.H}[3], [X10], X11
LD2 {V22.H, V23.H}[3], [X10], X11
ADD X2, X11, X4, LSL #2
LD2 {V24.H, V25.H}[3], [X10]
ADD X9, X0, X2
MOV X10, X6
INNER_LOOP_R4:
LD2 {V30.4S, V31.4S}, [X14], X12
SSHR V30.4S, V30.4S, #1
LD4 {V16.4H, V17.4H, V18.4H, V19.4H}, [X14], X12
SSHR V31.4S, V31.4S, #1
USHR V16.4H, V16.4H, #1
LD4 {V26.4H, V27.4H, V28.4H, V29.4H}, [X14], X12
USHR V18.4H, V18.4H, #1
SMULL V11.4S, V16.4H, V20.4H
SMLSL V11.4S, V18.4H, V21.4H
LD4 {V0.4H, V1.4H, V2.4H, V3.4H}, [X14], X12
SMULL V12.4S, V16.4H, V21.4H
SMLAL V12.4S, V18.4H, V20.4H
USHR V26.4H, V26.4H, #1
USHR V28.4H, V28.4H, #1
LSL x29, X12, #2
SUB X14, X14, X12, LSL #2
USHR V0.4H, V0.4H, #1
USHR V2.4H, V2.4H, #1
SMULL V13.4S, V26.4H, V22.4H
SMLSL V13.4S, V28.4H, V23.4H
SSHR V11.4S, V11.4S, #15
SMULL V14.4S, V26.4H, V23.4H
SMLAL V14.4S, V28.4H, V22.4H
SMULL V15.4S, V0.4H, V24.4H
SMLSL V15.4S, V2.4H, V25.4H
SMLAL V11.4S, V17.4H, V20.4H
SMLSL V11.4S, V19.4H, V21.4H
SSHR V12.4S, V12.4S, #15
SSHR V13.4S, V13.4S, #15
SSHR V14.4S, V14.4S, #15
SSHR V15.4S, V15.4S, #15
SMLAL V12.4S, V17.4H, V21.4H
SMLAL V12.4S, V19.4H, V20.4H
SMULL V5.4S, V0.4H, V25.4H
SMLAL V5.4S, V2.4H, V24.4H
SMLAL V13.4S, V27.4H, V22.4H
SMLSL V13.4S, V29.4H, V23.4H
SMLAL V14.4S, V27.4H, V23.4H
SMLAL V14.4S, V29.4H, V22.4H
SMLAL V15.4S, V1.4H, V24.4H
SMLSL V15.4S, V3.4H, V25.4H
SSHR V5.4S, V5.4S, #15
SMLAL V5.4S, V1.4H, V25.4H
SMLAL V5.4S, V3.4H, V24.4H
SUBS x17, X7, X5
BNE BYPASS_IF
ADD X14, X14, X12
LDR W3, [X14]
ADD X14, X14, X12
ASR W3, W3, #1
MOV V11.S[0], W3
LDR W3, [X14]
ADD X14, X14, X12
ASR W3, W3, #1
MOV V13.S[0], W3
LDR W3, [X14]
ASR W3, W3, #1
MOV V15.S[0], W3
SUB X14, X14, X12, LSL #1
ADD X14, X14, #4
LDR W3, [X14]
ADD X14, X14, X12
ASR W3, W3, #1
MOV V12.S[0], W3
LDR W3, [X14]
ADD X14, X14, X12
ASR W3, W3, #1
MOV V14.S[0], W3
LDR W3, [X14]
ADD X14, X14, X12
ASR W3, W3, #1
MOV V5.S[0], W3
SUB X14, X14, #4
SUB X14, X14, x29
BYPASS_IF:
ADD V6.4S, V30.4S, V13.4S
ADD V7.4S, V31.4S, V14.4S
SUB V30.4S, V30.4S, V13.4S
SUB V31.4S, V31.4S, V14.4S
ADD V8.4S, V11.4S, V15.4S
ADD V9.4S, V12.4S, V5.4S
SUB V15.4S, V11.4S, V15.4S
SUB V14.4S, V12.4S, V5.4S
ADD V10.4S, V6.4S, V8.4S
ADD V11.4S, V7.4S, V9.4S
ADD V12.4S, V30.4S, V14.4S
SUB V13.4S, V31.4S, V15.4S
SUB V6.4S, V6.4S, V8.4S
ST2 {V10.4S, V11.4S}, [X14], X12
SUB V7.4S, V7.4S, V9.4S
SUB V8.4S, V30.4S, V14.4S
ST2 {V12.4S, V13.4S}, [X14], X12
ADD V9.4S, V31.4S, V15.4S
ST2 {V6.4S, V7.4S}, [X14], X12
ST2 {V8.4S, V9.4S}, [X14], X12
SUBS X10, X10, #1
BNE INNER_LOOP_R4
SUB X14, X14, X1, LSL #3
ADD X14, X14, #32
SUBS X7, X7, #1
BNE MIDDLE_LOOP_R4
LSR X4, X4, #2
LSL X5, X5, #2
LSR X6, X6, #2
SUBS X8, X8, #1
BNE OUTER_LOOP_R4
END_LOOPS:
pop_v_regs
RET

View file

@ -0,0 +1,341 @@
.macro push_v_regs
stp d8, d9, [sp, #-16]!
stp d10, d11, [sp, #-16]!
stp d12, d13, [sp, #-16]!
stp d14, d15, [sp, #-16]!
stp X8, X9, [sp, #-16]!
stp X10, X11, [sp, #-16]!
stp X12, X13, [sp, #-16]!
stp X14, X15, [sp, #-16]!
stp X16, X17, [sp, #-16]!
stp X29, X30, [sp, #-16]!
.endm
.macro pop_v_regs
ldp X29, X30, [sp], #16
ldp X16, X17, [sp], #16
ldp X14, X15, [sp], #16
ldp X12, X13, [sp], #16
ldp X10, X11, [sp], #16
ldp X8, X9, [sp], #16
ldp d14, d15, [sp], #16
ldp d12, d13, [sp], #16
ldp d10, d11, [sp], #16
ldp d8, d9, [sp], #16
.endm
.text
.p2align 2
.global ixheaacd_sbr_qmfanal32_winadds
ixheaacd_sbr_qmfanal32_winadds: // PROC
// STMFD sp!, {x4-x12, x14}
push_v_regs
stp x19, x20, [sp, #-16]!
//VPUSH {D8 - D15}
//LDR w5, [SP, #108] //filterStates
//sxtw x5,w5
//LDR w6, [SP, #112] //timeIn
//sxtw x6,w6
//LDR w7, [SP, #116] //stride
//sxtw x7,w7
LSL x9, x7, #1
MOV x20, x4
ADD x5, x5, #64
MOV w10, #3
//ADD x5, x5, #56
//MOV x10, #1
////SUB x6, x6, x9
//CMP x7, #1
//MOV x11, #-8
//BGT LOOP_SKIP_ODD
LOOP:
LDRSH w4 , [x6]
ADD x6, x6, x9
LDRSH w8 , [x6]
ADD x6, x6, x9
LDRSH w11 , [x6]
ADD x6, x6, x9
LDRSH w12 , [x6]
ADD x6, x6, x9
STRH w4 , [x5 , #-2]!
STRH w8 , [x5 , #-2]!
STRH w11 , [x5 , #-2]!
STRH w12 , [x5 , #-2]!
LDRSH w4 , [x6]
ADD x6, x6, x9
LDRSH w8 , [x6]
ADD x6, x6, x9
LDRSH w11 , [x6]
ADD x6, x6, x9
LDRSH w12 , [x6]
ADD x6, x6, x9
STRH w4 , [x5 , #-2]!
STRH w8 , [x5 , #-2]!
STRH w11 , [x5 , #-2]!
STRH w12 , [x5 , #-2]!
SUBS w10, w10, #1
BPL LOOP
//LOOP:
// LD1 {v0.4h} , [x6], #8
// LD1 {v1.4h} , [x6], #8
//
// REV64 v4.4h , v0.4h
// REV64 v5.4h , v1.4h
//
// ST1 {v4.4h} , [x5] , x11
// ST1 {v5.4h} , [x5] , x11
//
// LD1 {v2.4h} , [x6], #8
// LD1 {v3.4h} , [x6], #8
//
// REV64 v6.4h , v2.4h
// REV64 v7.4h , v3.4h
//
// ST1 {v6.4h} , [x5] , x11
// ST1 {v7.4h} , [x5] , x11
//
// SUBS x10, x10, #1
// BPL LOOP
// B SKIP_LOOP
//
//LOOP_SKIP_ODD:
// LD2 {v0.4h , v1.4h} , [x6], #16
// LD2 {v2.4h , v3.4h} , [x6], #16
//
// REV64 v1.4h , v0.4h
// REV64 v3.4h , v2.4h
//
// ST1 {v1.4h} , [x5], x11
// ST1 {v3.4h} , [x5], x11
//
// LD2 {v4.4h , v5.4h} , [x6], #16
// LD2 {v6.4h , v7.4h} , [x6], #16
//
//
// REV64 v5.4h , v4.4h
// REV64 v7.4h , v6.4h
//
// ST1 {v5.4h} , [x5], x11
// ST1 {v7.4h} , [x5], x11
//
// SUBS x10, x10, #1
// BPL LOOP_SKIP_ODD
SKIP_LOOP:
//LDR w4, [SP, #104] //winAdd
// sxtw x4,w4
MOV x4, x20
MOV x5, #8
LD1 {v0.4h}, [x0], #8
MOV x6, #64
LSL x6, x6, #1
LD2 {v1.4h, v2.4h}, [x2], #16
MOV x7, #244
MOV x9, x0
ADD x0, x0, #120
MOV x11, x4
LD1 {v2.4h}, [x0], x6
ADD x11, x11, #128
MOV x10, x2
ADD x2, x2, #240
sMULL v30.4s, v0.4h, v1.4h
LD2 {v3.4h, v4.4h}, [x2], #16
ADD x2, x2, #240
LD1 {v4.4h}, [x0], x6
sMLAL v30.4s, v2.4h, v3.4h
LD2 {v5.4h, v6.4h}, [x2], #16
ADD x2, x2, #240
LD1 {v6.4h}, [x0], x6
sMLAL v30.4s, v4.4h, v5.4h
LD2 {v7.4h, v8.4h}, [x2], #16
ADD x2, x2, #240
LD1 {v8.4h}, [x0], x6
sMLAL v30.4s, v6.4h, v7.4h
MOV x0, x9
LD2 {v9.4h, v10.4h}, [x2], #16
ADD x2, x2, #240
LD1 {v10.4h}, [x1], #8
sMLAL v30.4s, v8.4h, v9.4h
MOV x9, x1
LD2 {v11.4h, v12.4h}, [x3], #16
ADD x1, x1, #120
MOV x2, x10
LD1 {v12.4h}, [x1], x6
MOV x10, x3
ADD x3, x3, #240
LD2 {v13.4h, v14.4h}, [x3], #16
ADD x3, x3, #240
LD2 {v15.4h, v16.4h}, [x3], #16
LD1 {v14.4h}, [x1], x6
ADD x3, x3, #240
LD1 {v16.4h}, [x1], x6
SUB x5, x5, #1
LD2 {v17.4h, v18.4h}, [x3], #16
ADD x3, x3, #240
LD1 {v18.4h}, [x1], x6
MOV x1, x9
LD2 {v19.4h, v20.4h}, [x3], #16
ADD x3, x3, #240
MOV x3, x10
LOOP_1:
LD1 {v0.4h}, [x0], #8
MOV x9, x0
LD2 {v1.4h, v2.4h}, [x2], #16
ADD x0, x0, #120
MOV x10, x2
ST1 { v30.4s}, [x4], #16
ADD x2, x2, #240
sMULL v30.4s, v10.4h, v11.4h
LD1 {v2.4h}, [x0], x6
sMLAL v30.4s, v12.4h, v13.4h
sMLAL v30.4s, v14.4h, v15.4h
LD2 {v3.4h, v4.4h}, [x2], #16
sMLAL v30.4s, v16.4h, v17.4h
sMLAL v30.4s, v18.4h, v19.4h
LD1 {v4.4h}, [x0], x6
ADD x2, x2, #240
ST1 { v30.4s}, [x11], #16
sMULL v30.4s, v0.4h, v1.4h
LD2 {v5.4h, v6.4h}, [x2], #16
sMLAL v30.4s, v2.4h, v3.4h
ADD x2, x2, #240
LD1 {v6.4h}, [x0], x6
sMLAL v30.4s, v4.4h, v5.4h
LD2 {v7.4h, v8.4h}, [x2], #16
ADD x2, x2, #240
LD1 {v8.4h}, [x0], x6
sMLAL v30.4s, v6.4h, v7.4h
MOV x0, x9
LD2 {v9.4h, v10.4h}, [x2], #16
ADD x2, x2, #240
LD1 {v10.4h}, [x1], #8
MOV x2, x10
MOV x9, x1
LD2 {v11.4h, v12.4h}, [x3], #16
ADD x1, x1, #120
sMLAL v30.4s, v8.4h, v9.4h
LD1 {v12.4h}, [x1], x6
MOV x10, x3
ADD x3, x3, #240
LD2 {v13.4h, v14.4h}, [x3], #16
ADD x3, x3, #240
LD1 {v14.4h}, [x1], x6
LD2 {v15.4h, v16.4h}, [x3], #16
ADD x3, x3, #240
LD1 {v16.4h}, [x1], x6
LD2 {v17.4h, v18.4h}, [x3], #16
ADD x3, x3, #240
LD1 {v18.4h}, [x1], x6
SUBS x5, x5, #1
MOV x1, x9
LD2 {v19.4h, v20.4h}, [x3], #16
ADD x3, x3, #240
MOV x3, x10
BGT LOOP_1
ST1 { v30.4s}, [x4], #16
sMULL v30.4s, v10.4h, v11.4h
sMLAL v30.4s, v12.4h, v13.4h
sMLAL v30.4s, v14.4h, v15.4h
sMLAL v30.4s, v16.4h, v17.4h
sMLAL v30.4s, v18.4h, v19.4h
ST1 { v30.4s}, [x11], #16
//VPOP {D8 - D15}
// LDMFD sp!, {x4-x12, x15}
ldp x19, x20, [sp], #16
pop_v_regs
ret
// ENDP

View file

@ -0,0 +1,403 @@
///******************************************************************************
// *
// * Copyright (C) 2018 The Android Open Source Project
// *
// * Licensed under the Apache License, Version 2.0 (the "License");
// * you may not use this file except in compliance with the License.
// * You may obtain a copy of the License at:
// *
// * http://www.apache.org/licenses/LICENSE-2.0
// *
// * Unless required by applicable law or agreed to in writing, software
// * distributed under the License is distributed on an "AS IS" BASIS,
// * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// * See the License for the specific language governing permissions and
// * limitations under the License.
// *
// *****************************************************************************
// * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
//*/
.macro push_v_regs
stp q8, q9, [sp, #-32]!
stp q10, q11, [sp, #-32]!
stp q12, q13, [sp, #-32]!
stp q14, q15, [sp, #-32]!
stp x21, x22, [sp, #-16]!
stp x23, x24, [sp, #-16]!
.endm
.macro pop_v_regs
ldp x23, x24, [sp], #16
ldp x21, x22, [sp], #16
ldp q14, q15, [sp], #32
ldp q12, q13, [sp], #32
ldp q10, q11, [sp], #32
ldp q8, q9, [sp], #32
.endm
.macro swp reg1, reg2
MOV X16, \reg1
MOV \reg1, \reg2
MOV \reg2, x16
.endm
.text
.global ixheaacd_sbr_qmfsyn64_winadd
ixheaacd_sbr_qmfsyn64_winadd:
push_v_regs
MOV w7, #0x8000
LD1 {v0.4h}, [x0], #8
MOV x12, x2
dup v30.4s, w7
LD1 {v1.4h}, [x2], #8
dup v22.4s, w4
MOV x10, x0
MOV x11, x2
ADD x0, x0, #504
ADD x2, x2, #248
NEG v28.4s, v22.4s
sshL v20.4s, v30.4s, v28.4s
MOV x6, #64
LSL x6, x6, #1
ADD x12, x12, x6
MOV x7, #128
LSL x9, x7, #1
ADD x1, x1, x9
MOV x6, #16
MOV x7, #128
LSL x9, x7, #1
MOV x7, #256
LSL x8, x7, #1
LSL x5, x5, #1
LD1 {v2.4h}, [x0], x8
mov v26.16b, v20.16b
sMLAL v26.4s, v0.4h, v1.4h
LD1 {v3.4h}, [x2], x9
LD1 {v4.4h}, [x0], x8
sMLAL v26.4s, v2.4h, v3.4h
LD1 {v5.4h}, [x2], x9
LD1 {v6.4h}, [x0], x8
sMLAL v26.4s, v5.4h, v4.4h
LD1 {v7.4h}, [x2], x9
LD1 {v8.4h}, [x0], x8
sMLAL v26.4s, v7.4h, v6.4h
LD1 {v9.4h}, [x2], x9
MOV x0, x10
MOV x2, x11
LD1 {v10.4h}, [x1], #8
sMLAL v26.4s, v9.4h, v8.4h
MOV x10, x1
LD1 {v11.4h}, [x12], #8
ADD x1, x1, #504
MOV x11, x12
LD1 {v12.4h}, [x1], x8
ADD x12, x12, #248
sMLAL v26.4s, v10.4h, v11.4h
LD1 {v13.4h}, [x12], x9
LD1 {v14.4h}, [x1], x8
sMLAL v26.4s, v12.4h, v13.4h
LD1 {v15.4h}, [x12], x9
LD1 {v16.4h}, [x1], x8
sMLAL v26.4s, v15.4h, v14.4h
LD1 {v17.4h}, [x12], x9
LD1 {v18.4h}, [x1], x8
sMLAL v26.4s, v17.4h, v16.4h
LD1 {v19.4h}, [x12], x9
sMLAL v26.4s, v19.4h, v18.4h
LD1 {v0.4h}, [x0], #8
MOV x12, x11
MOV x1, x10
LD1 {v1.4h}, [x2], #8
MOV x10, x0
sQshL v26.4s, v26.4s, v22.4s
ADD x0, x0, #504
MOV x11, x2
LD1 {v2.4h}, [x0], x8
ADD x2, x2, #248
sshR v28.4s, v26.4s, #16
LD1 {v3.4h}, [x2], x9
UZP2 v29.8h, v28.8h, v28.8h
UZP1 v28.8h, v28.8h, v28.8h
mov v26.16b, v20.16b
LD1 {v4.4h}, [x0], x8
LD1 {v5.4h}, [x2], x9
LD1 {v6.4h}, [x0], x8
LD1 {v7.4h}, [x2], x9
LD1 {v8.4h}, [x0], x8
LD1 {v9.4h}, [x2], x9
MOV x0, x10
MOV x2, x11
LD1 {v10.4h}, [x1], #8
MOV x10, x1
LD1 {v11.4h}, [x12], #8
ADD x1, x1, #504
MOV x11, x12
LD1 {v12.4h}, [x1], x8
ADD x12, x12, #248
LD1 {v13.4h}, [x12], x9
LD1 {v14.4h}, [x1], x8
LD1 {v15.4h}, [x12], x9
LD1 {v16.4h}, [x1], x8
LD1 {v17.4h}, [x12], x9
LD1 {v18.4h}, [x1], x8
SUB x6, x6, #2
LD1 {v19.4h}, [x12], x9
MOV x1, x10
MOV x12, x11
LOOP_1:
sMLAL v26.4s, v0.4h, v1.4h
ST1 {v28.h}[0], [x3], x5
sMLAL v26.4s, v2.4h, v3.4h
LD1 {v0.4h}, [x0], #8
sMLAL v26.4s, v5.4h, v4.4h
sMLAL v26.4s, v7.4h, v6.4h
ST1 {v28.h}[1], [x3], x5
MOV x10, x0
LD1 {v1.4h}, [x2], #8
ADD x0, x0, #504
sMLAL v26.4s, v9.4h, v8.4h
ST1 {v28.h}[2], [x3], x5
sMLAL v26.4s, v10.4h, v11.4h
ST1 {v28.h}[3], [x3], x5
MOV x11, x2
LD1 {v2.4h}, [x0], x8
ADD x2, x2, #248
sMLAL v26.4s, v12.4h, v13.4h
LD1 {v3.4h}, [x2], x9
sMLAL v26.4s, v15.4h, v14.4h
sMLAL v26.4s, v17.4h, v16.4h
LD1 {v4.4h}, [x0], x8
sMLAL v26.4s, v19.4h, v18.4h
LD1 {v5.4h}, [x2], x9
LD1 {v6.4h}, [x0], x8
sQshL v26.4s, v26.4s, v22.4s
sshR v28.4s, v26.4s, #16
LD1 {v7.4h}, [x2], x9
mov v26.16b, v20.16b
UZP2 v29.8h, v28.8h, v28.8h
UZP1 v28.8h, v28.8h, v28.8h
sMLAL v26.4s, v0.4h, v1.4h
sMLAL v26.4s, v2.4h, v3.4h
LD1 {v8.4h}, [x0], x8
sMLAL v26.4s, v5.4h, v4.4h
sMLAL v26.4s, v7.4h, v6.4h
LD1 {v9.4h}, [x2], x9
LD1 {v10.4h}, [x1], #8
sMLAL v26.4s, v9.4h, v8.4h
MOV x2, x11
LD1 {v11.4h}, [x12], #8
MOV x0, x10
MOV x10, x1
ADD x1, x1, #504
MOV x11, x12
LD1 {v12.4h}, [x1], x8
ADD x12, x12, #248
LD1 {v13.4h}, [x12], x9
sMLAL v26.4s, v10.4h, v11.4h
LD1 {v14.4h}, [x1], x8
sMLAL v26.4s, v12.4h, v13.4h
LD1 {v15.4h}, [x12], x9
LD1 {v16.4h}, [x1], x8
sMLAL v26.4s, v15.4h, v14.4h
LD1 {v17.4h}, [x12], x9
LD1 {v18.4h}, [x1], x8
sMLAL v26.4s, v17.4h, v16.4h
LD1 {v19.4h}, [x12], x9
MOV x1, x10
sMLAL v26.4s, v19.4h, v18.4h
ST1 {v28.h}[0], [x3], x5
MOV x12, x11
LD1 {v0.4h}, [x0], #8
LD1 {v1.4h}, [x2], #8
sQshL v26.4s, v26.4s, v22.4s
ST1 {v28.h}[1], [x3], x5
MOV x10, x0
ST1 {v28.h}[2], [x3], x5
ADD x0, x0, #504
ST1 {v28.h}[3], [x3], x5
MOV x11, x2
sshR v28.4s, v26.4s, #16
LD1 {v2.4h}, [x0], x8
ADD x2, x2, #248
LD1 {v3.4h}, [x2], x9
LD1 {v4.4h}, [x0], x8
LD1 {v5.4h}, [x2], x9
LD1 {v6.4h}, [x0], x8
LD1 {v7.4h}, [x2], x9
LD1 {v8.4h}, [x0], x8
LD1 {v9.4h}, [x2], x9
UZP2 v29.8h, v28.8h, v28.8h
UZP1 v28.8h, v28.8h, v28.8h
mov v26.16b, v20.16b
MOV x0, x10
LD1 {v10.4h}, [x1], #8
MOV x2, x11
MOV x10, x1
LD1 {v11.4h}, [x12], #8
ADD x1, x1, #504
MOV x11, x12
LD1 {v12.4h}, [x1], x8
ADD x12, x12, #248
LD1 {v13.4h}, [x12], x9
LD1 {v14.4h}, [x1], x8
LD1 {v15.4h}, [x12], x9
LD1 {v16.4h}, [x1], x8
LD1 {v17.4h}, [x12], x9
SUBS x6, x6, #2
LD1 {v18.4h}, [x1], x8
MOV x1, x10
LD1 {v19.4h}, [x12], x9
MOV x12, x11
BGT LOOP_1
sMLAL v26.4s, v0.4h, v1.4h
ST1 {v28.h}[0], [x3], x5
sMLAL v26.4s, v2.4h, v3.4h
sMLAL v26.4s, v5.4h, v4.4h
ST1 {v28.h}[1], [x3], x5
sMLAL v26.4s, v7.4h, v6.4h
sMLAL v26.4s, v9.4h, v8.4h
ST1 {v28.h}[2], [x3], x5
sMLAL v26.4s, v10.4h, v11.4h
sMLAL v26.4s, v12.4h, v13.4h
ST1 {v28.h}[3], [x3], x5
sMLAL v26.4s, v15.4h, v14.4h
sMLAL v26.4s, v17.4h, v16.4h
sMLAL v26.4s, v19.4h, v18.4h
sQshL v26.4s, v26.4s, v22.4s
sshR v28.4s, v26.4s, #16
UZP2 v29.8h, v28.8h, v28.8h
UZP1 v28.8h, v28.8h, v28.8h
ST1 {v28.h}[0], [x3], x5
ST1 {v28.h}[1], [x3], x5
ST1 {v28.h}[2], [x3], x5
ST1 {v28.h}[3], [x3], x5
pop_v_regs
ret

View file

@ -0,0 +1,73 @@
.macro push_v_regs
stp X8, X9, [sp, #-16]!
stp X10, X11, [sp, #-16]!
stp X12, X13, [sp, #-16]!
stp X14, X15, [sp, #-16]!
stp X20, X21, [sp, #-16]!
stp X26, X17, [sp, #-16]!
stp X27, X28, [sp, #-16]!
stp q2, q3, [sp, #-32]!
stp q0, q1, [sp, #-32]!
.endm
.macro pop_v_regs
ldp q0, q1, [sp], #32
ldp q2, q3, [sp], #32
ldp X27, X28, [sp], #16
ldp X26, X17, [sp], #16
ldp X20, X21, [sp], #16
ldp X14, X15, [sp], #16
ldp X12, X13, [sp], #16
ldp X10, X11, [sp], #16
ldp X8, X9, [sp], #16
.endm
.text
.p2align 2
.global ixheaacd_shiftrountine_with_rnd
ixheaacd_shiftrountine_with_rnd:
push_v_regs
ADD x12, x2, x3, LSL #1
MOV W9, #0x00008000
DUP V0.4s, w9
MOVI v3.4s, #10
MOV W27, #0x80000000
MOV W28, #0x7fffffff
MOV W26, #0
SUBS W3, W3, #1
BMI S_WITH_R_L6
S_WITH_R_L5:
LDR w5, [x1, x3, LSL #2] //i2 = qmfImag[j]
LDR w7, [x0, x3, LSL #2] //x2 = qmfReal[j]
LDR w14, [x0], #4 //x1 = *qmfReal
LDR w10, [x1], #4 //i1 = *qmfImag
ADD w6, w5, w7 //*qmfImag++ = add32(i2, x2)
SUB w5, w5, w7 //qmfReal[j] = sub32(i2, x2)
ADD w7, w10, w14 //qmfImag[j] = add32(i1, x1)
SUB w4, w10, w14 //*qmfReal++ = sub32(i1, x1)
MOV v1.s[0], W4 //QADD x4, x4, x9
MOV v1.s[1], W5 //QADD x4, x4, x9
MOV v1.s[2], W6 //QADD x4, x4, x9
MOV v1.s[3], W7 //QADD x4, x4, x9
lsl w14, w3, #1
SQSHL v1.4s, v1.4s, v3.4s
ADD X17, X2, X14
SQADD v2.4s, v1.4s, v0.4s
ST1 {v2.h}[1], [x2], #2
ST1 {v2.h}[3], [X17]
ADD X17, X12, X14
ST1 {v2.h}[7], [x17] //STRH w7, [x12, x14]
ST1 {v2.h}[5], [x12], #2 //STRH w6, [x12], #2
SUBS x3, x3, #2
BGE S_WITH_R_L5
S_WITH_R_L6:
pop_v_regs
ret

View file

@ -0,0 +1,79 @@
.macro push_v_regs
stp X8, X9, [sp, #-16]!
stp X10, X11, [sp, #-16]!
stp X12, X13, [sp, #-16]!
stp X14, X15, [sp, #-16]!
stp X20, X21, [sp, #-16]!
stp X26, X17, [sp, #-16]!
stp X27, X28, [sp, #-16]!
stp q2, q3, [sp, #-32]!
stp q0, q1, [sp, #-32]!
.endm
.macro pop_v_regs
ldp q0, q1, [sp], #32
ldp q2, q3, [sp], #32
ldp X27, X28, [sp], #16
ldp X26, X17, [sp], #16
ldp X20, X21, [sp], #16
ldp X14, X15, [sp], #16
ldp X12, X13, [sp], #16
ldp X10, X11, [sp], #16
ldp X8, X9, [sp], #16
.endm
.text
.p2align 2
.global ixheaacd_shiftrountine_with_rnd_eld
ixheaacd_shiftrountine_with_rnd_eld:
push_v_regs
ADD x12, x2, x3, LSL #1
MOV W9, #0x00008000
DUP V0.4s, w9
MOVI v3.4s, #9
MOV W27, #0x80000000
MOV W28, #0x7fffffff
MOV W26, #0
SUBS W3, W3, #1
BMI S_WITH_R_L6
S_WITH_R_L5:
LDR w5, [x1, x3, LSL #2] //i2 = qmfImag[j]
LDR w7, [x0, x3, LSL #2] //x2 = qmfReal[j]
LDR w14, [x0], #4 //x1 = *qmfReal
LDR w10, [x1], #4 //i1 = *qmfImag
ADD w6, w5, w7 //*qmfImag++ = add32(i2, x2)
MVN w6, w6
ADD w6, w6, #1
SUB w5, w7, w5 //qmfReal[j] = sub32(i2, x2)
ADD w7, w10, w14 //qmfImag[j] = add32(i1, x1)
MVN w7, w7
ADD w7, w7, #1
SUB w4, w14, w10 //*qmfReal++ = sub32(i1, x1)
MOV v1.s[0], W4 //QADD x4, x4, x9
MOV v1.s[1], W5 //QADD x4, x4, x9
MOV v1.s[2], W6 //QADD x4, x4, x9
MOV v1.s[3], W7 //QADD x4, x4, x9
lsl w14, w3, #1
SQSHL v1.4s, v1.4s, v3.4s
ADD X17, X2, X14
SQADD v2.4s, v1.4s, v0.4s
ST1 {v2.h}[1], [x2], #2
ST1 {v2.h}[3], [X17]
ADD X17, X12, X14
ST1 {v2.h}[7], [x17] //STRH w7, [x12, x14]
ST1 {v2.h}[5], [x12], #2 //STRH w6, [x12], #2
SUBS x3, x3, #2
BGE S_WITH_R_L5
S_WITH_R_L6:
pop_v_regs
ret

View file

@ -0,0 +1,106 @@
/******************************************************************************
*
* Copyright (C) 2018 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
#ifndef IMPD_API_CMD_STANDARDS_H
#define IMPD_API_CMD_STANDARDS_H
/*****************************************************************************/
/* Ittiam standard API commands */
/*****************************************************************************/
#define IA_API_CMD_GET_LIB_ID_STRINGS 0x0001
#define IA_API_CMD_GET_API_SIZE 0x0002
#define IA_API_CMD_INIT 0x0003
#define IA_API_CMD_SET_CONFIG_PARAM 0x0004
#define IA_API_CMD_GET_CONFIG_PARAM 0x0005
#define IA_API_CMD_GET_MEMTABS_SIZE 0x0006
#define IA_API_CMD_SET_MEMTABS_PTR 0x0007
#define IA_API_CMD_GET_N_MEMTABS 0x0008
#define IA_API_CMD_EXECUTE 0x0009
#define IA_API_CMD_PUT_INPUT_QUERY 0x000A
#define IA_API_CMD_GET_CURIDX_INPUT_BUF 0x000B
#define IA_API_CMD_SET_INPUT_BYTES 0x000C
#define IA_API_CMD_GET_OUTPUT_BYTES 0x000D
#define IA_API_CMD_RESET 0x0010
#define IA_API_CMD_GET_MEM_INFO_SIZE 0x0011
#define IA_API_CMD_GET_MEM_INFO_ALIGNMENT 0x0012
#define IA_API_CMD_GET_MEM_INFO_TYPE 0x0013
#define IA_API_CMD_GET_MEM_INFO_PLACEMENT 0x0014
#define IA_API_CMD_GET_MEM_INFO_PRIORITY 0x0015
#define IA_API_CMD_SET_MEM_PTR 0x0016
#define IA_API_CMD_SET_MEM_INFO_SIZE 0x0017
#define IA_API_CMD_SET_MEM_PLACEMENT 0x0018
#define IA_API_CMD_GET_N_TABLES 0x0019
#define IA_API_CMD_GET_TABLE_INFO_SIZE 0x001A
#define IA_API_CMD_GET_TABLE_INFO_ALIGNMENT 0x001B
#define IA_API_CMD_GET_TABLE_INFO_PRIORITY 0x001C
#define IA_API_CMD_SET_TABLE_PTR 0x001D
#define IA_API_CMD_GET_TABLE_PTR 0x001E
#define IA_API_CMD_INPUT_OVER 0x0020
#define IA_API_CMD_INPUT_OVER_BS 0x0021
#define IA_API_CMD_INPUT_OVER_IC_BS 0x0022
#define IA_API_CMD_INPUT_OVER_IG_BS 0x0023
#define IA_API_CMD_INPUT_OVER_IL_BS 0x0024
#define IA_API_CMD_INPUT_OVER_IN_BS 0x0025
#define IA_API_CMD_SET_INPUT_BYTES_BS 0x0026
#define IA_API_CMD_SET_INPUT_BYTES_IC_BS 0x0027
#define IA_API_CMD_SET_INPUT_BYTES_IG_BS 0x0028
#define IA_API_CMD_SET_INPUT_BYTES_IL_BS 0x0029
#define IA_API_CMD_SET_INPUT_BYTES_IN_BS 0x002A
/*****************************************************************************/
/* Ittiam standard API command indices */
/*****************************************************************************/
/* IA_API_CMD_GET_LIB_ID_STRINGS indices */
#define IA_CMD_TYPE_LIB_NAME 0x0100
#define IA_CMD_TYPE_LIB_VERSION 0x0200
#define IA_CMD_TYPE_API_VERSION 0x0300
/* IA_API_CMD_INIT indices */
#define IA_CMD_TYPE_INIT_API_PRE_CONFIG_PARAMS 0x0100
#define IA_CMD_TYPE_INIT_API_POST_CONFIG_PARAMS 0x0200
#define IA_CMD_TYPE_INIT_PROCESS 0x0300
#define IA_CMD_TYPE_INIT_DONE_QUERY 0x0400
#define IA_CMD_TYPE_INIT_CPY_BSF_BUFF 0x0201
#define IA_CMD_TYPE_INIT_CPY_IC_BSF_BUFF 0x0202
#define IA_CMD_TYPE_INIT_CPY_IL_BSF_BUFF 0x0203
#define IA_CMD_TYPE_INIT_CPY_IG_BSF_BUFF 0x0204
#define IA_CMD_TYPE_INIT_CPY_IN_BSF_BUFF 0x0205
#define IA_CMD_TYPE_INIT_CPY_BSF_BUFF_OVER_QUERY 0x0206
#define IA_CMD_TYPE_INIT_CPY_IC_BSF_BUFF_OVER_QUERY 0x0207
#define IA_CMD_TYPE_INIT_CPY_IL_BSF_BUFF_OVER_QUERY 0x0208
#define IA_CMD_TYPE_INIT_CPY_IG_BSF_BUFF_OVER_QUERY 0x0209
#define IA_CMD_TYPE_INIT_CPY_IN_BSF_BUFF_OVER_QUERY 0x020A
#define IA_CMD_TYPE_INIT_SET_BUFF_PTR 0x020B
/* IA_API_CMD_EXECUTE indices */
#define IA_CMD_TYPE_DO_EXECUTE 0x0100
#define IA_CMD_TYPE_DONE_QUERY 0x0200
#endif

View file

@ -0,0 +1,699 @@
/******************************************************************************
*
* Copyright (C) 2018 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
#include <string.h>
#include <stdlib.h>
#include "impd_type_def.h"
#include "impd_error_standards.h"
#include "impd_apicmd_standards.h"
#include "impd_memory_standards.h"
#include "impd_drc_bitbuffer.h"
#include "impd_drc_extr_delta_coded_info.h"
#include "impd_drc_common.h"
#include "impd_drc_struct.h"
#include "impd_drc_interface.h"
#include "impd_parametric_drc_dec.h"
#include "impd_drc_gain_dec.h"
#include "impd_drc_filter_bank.h"
#include "impd_drc_multi_band.h"
#include "impd_drc_process_audio.h"
#include "impd_drc_eq.h"
#include "impd_drc_gain_decoder.h"
#include "impd_drc_config_params.h"
#include "impd_drc_api_defs.h"
#include "impd_drc_definitions.h"
#include "impd_drc_hashdefines.h"
#include "impd_drc_peak_limiter.h"
#include "impd_drc_selection_process.h"
#include "impd_drc_api_struct_def.h"
#include "impd_drc_error_codes.h"
WORD32 impd_init_process_audio_main_qmf(ia_drc_api_struct *p_obj_drc);
WORD32 impd_init_process_audio_main_stft(ia_drc_api_struct *p_obj_drc);
WORD32 impd_init_process_audio_main_td_qmf(ia_drc_api_struct *p_obj_drc);
IA_ERRORCODE impd_drc_mem_api(ia_drc_api_struct *p_obj_drc,
WORD32 i_cmd, WORD32 i_idx, pVOID pv_value);
IA_ERRORCODE impd_drc_fill_mem_tables(ia_drc_api_struct *p_obj_drc);
VOID impd_drc_set_default_config_params(ia_drc_config_struct* ptr_config);
IA_ERRORCODE impd_drc_process_frame(ia_drc_api_struct *p_obj_drc);
IA_ERRORCODE impd_drc_init(ia_drc_api_struct *p_obj_drc);
IA_ERRORCODE impd_drc_set_default_config(ia_drc_api_struct *p_obj_drc);
IA_ERRORCODE impd_drc_set_struct_pointer(ia_drc_api_struct *p_obj_drc);
IA_ERRORCODE impd_process_time_domain(ia_drc_api_struct *p_obj_drc);
#define NUM_DRC_TABLES 4
#define SCRATCH_MEM_SIZE 1024*256*64
IA_ERRORCODE ia_drc_dec_api(pVOID p_ia_drc_dec_obj,
WORD32 i_cmd,
WORD32 i_idx,
pVOID pv_value)
{
ia_drc_api_struct *p_obj_drc = p_ia_drc_dec_obj;
IA_ERRORCODE error_code=IA_NO_ERROR;
LOOPIDX i;
pUWORD32 pui_value = pv_value;
pUWORD32 pus_value = pv_value;
pWORD8 pb_value = pv_value;
SIZE_T *ps_value = pv_value;
switch(i_cmd)
{
case IA_API_CMD_GET_MEM_INFO_SIZE:
case IA_API_CMD_GET_MEM_INFO_ALIGNMENT:
case IA_API_CMD_GET_MEM_INFO_TYPE:
case IA_API_CMD_GET_MEM_INFO_PLACEMENT:
case IA_API_CMD_GET_MEM_INFO_PRIORITY:
case IA_API_CMD_SET_MEM_PTR:
case IA_API_CMD_SET_MEM_PLACEMENT:
{
return impd_drc_mem_api(p_ia_drc_dec_obj, i_cmd, i_idx, pv_value);
}
};
switch(i_cmd)
{
case IA_API_CMD_GET_LIB_ID_STRINGS:
{
switch(i_idx)
{
case IA_CMD_TYPE_LIB_NAME:
{
WORD8 lib_name[] = LIBNAME;
for(i = 0; i < IA_API_STR_LEN && lib_name[i -1] != 0; i++)
{
pb_value[i] = lib_name[i];
}
break;
}
case IA_CMD_TYPE_LIB_VERSION:
{
break;
}
case IA_CMD_TYPE_API_VERSION:
{
}
default:
{
return -1;
}
};
break;
}
case IA_API_CMD_GET_API_SIZE:
{
*pui_value = sizeof(ia_drc_api_struct)+(sizeof(ia_drc_state_struct)+8)+8080*1024;
break;
}
case IA_API_CMD_INIT:
{
switch(i_idx)
{
case IA_CMD_TYPE_INIT_SET_BUFF_PTR:
{
p_obj_drc->p_state->persistant_ptr=p_obj_drc->pp_mem[IA_DRC_PERSIST_IDX];
impd_drc_set_struct_pointer(p_obj_drc);
break;
}
case IA_CMD_TYPE_INIT_API_PRE_CONFIG_PARAMS:
{
impd_drc_set_default_config(p_obj_drc);
break;
}
case IA_CMD_TYPE_INIT_API_POST_CONFIG_PARAMS:
{
p_obj_drc->p_state=(ia_drc_state_struct *)((SIZE_T)p_obj_drc+8000*1024);
p_obj_drc->p_mem_info=(ia_mem_info_struct *)((SIZE_T)p_obj_drc+8002*1024);
p_obj_drc->pp_mem=(pVOID)((SIZE_T)p_obj_drc+8006*1024);
impd_drc_fill_mem_tables(p_obj_drc);
break;
}
case IA_CMD_TYPE_INIT_PROCESS:
{
IA_ERRORCODE Error=0;
if(p_obj_drc->pp_mem[IA_DRC_PERSIST_IDX] == 0)
{
return(-1);
}
Error = impd_drc_init(p_obj_drc);
if(Error)
return Error;
p_obj_drc->p_state->ui_init_done = 1;
return Error;
break;
}
case IA_CMD_TYPE_INIT_DONE_QUERY:
{
if(p_obj_drc->p_state->ui_init_done == 1)
{
*pui_value = 1;
}
else
{
*pui_value = 0;
}
break;
}
case IA_CMD_TYPE_INIT_CPY_BSF_BUFF_OVER_QUERY:
{
*pui_value = p_obj_drc->str_bit_handler.cpy_over;
break;
}
case IA_CMD_TYPE_INIT_CPY_IC_BSF_BUFF_OVER_QUERY:
{
*pui_value = p_obj_drc->str_bit_handler.cpy_over_ic;
break;
}
case IA_CMD_TYPE_INIT_CPY_IL_BSF_BUFF_OVER_QUERY:
{
*pui_value = p_obj_drc->str_bit_handler.cpy_over_il;
break;
}
case IA_CMD_TYPE_INIT_CPY_IN_BSF_BUFF_OVER_QUERY:
{
*pui_value = p_obj_drc->str_bit_handler.cpy_over_in;
break;
}
case IA_CMD_TYPE_INIT_CPY_BSF_BUFF:
{
memcpy(p_obj_drc->str_bit_handler.it_bit_buf+p_obj_drc->str_bit_handler.num_bytes_bs, p_obj_drc->pp_mem[2],p_obj_drc->str_bit_handler.num_byts_cur);
p_obj_drc->str_bit_handler.num_bytes_bs=p_obj_drc->str_bit_handler.num_bytes_bs+p_obj_drc->str_bit_handler.num_byts_cur;
break;
}
case IA_CMD_TYPE_INIT_CPY_IC_BSF_BUFF:
{
memcpy(p_obj_drc->str_bit_handler.bitstream_drc_config+p_obj_drc->str_bit_handler.num_bytes_bs_drc_config, p_obj_drc->pp_mem[2],p_obj_drc->str_bit_handler.num_byts_cur_ic);
p_obj_drc->str_bit_handler.num_bytes_bs_drc_config=p_obj_drc->str_bit_handler.num_bytes_bs_drc_config+p_obj_drc->str_bit_handler.num_byts_cur_ic;
break;
}
case IA_CMD_TYPE_INIT_CPY_IL_BSF_BUFF:
{
memcpy(p_obj_drc->str_bit_handler.bitstream_loudness_info+p_obj_drc->str_bit_handler.num_bytes_bs_loudness_info, p_obj_drc->pp_mem[2],p_obj_drc->str_bit_handler.num_byts_cur_il);
p_obj_drc->str_bit_handler.num_bytes_bs_loudness_info=p_obj_drc->str_bit_handler.num_bytes_bs_loudness_info+p_obj_drc->str_bit_handler.num_byts_cur_il;
break;
}
case IA_CMD_TYPE_INIT_CPY_IN_BSF_BUFF:
{
memcpy(p_obj_drc->str_bit_handler.bitstream_unidrc_interface+p_obj_drc->str_bit_handler.num_bytes_bs_unidrc_interface, p_obj_drc->pp_mem[2],p_obj_drc->str_bit_handler.num_byts_cur_in);
p_obj_drc->str_bit_handler.num_bytes_bs_unidrc_interface=p_obj_drc->str_bit_handler.num_bytes_bs_unidrc_interface+p_obj_drc->str_bit_handler.num_byts_cur_in;
break;
}
default:
{
return -1;
}
};
break;
}
case IA_API_CMD_GET_CONFIG_PARAM:
{
switch(i_idx)
{
case IA_DRC_DEC_CONFIG_PARAM_SAMP_FREQ:
{
*pus_value=p_obj_drc->str_config.sampling_rate;
break;
}
case IA_DRC_DEC_CONFIG_PARAM_NUM_CHANNELS:
{
*pus_value=p_obj_drc->str_config.num_ch_out;
break;
}
case IA_DRC_DEC_CONFIG_PROC_OUT_PTR:
{
*ps_value=(SIZE_T)p_obj_drc->str_payload.pstr_drc_sel_proc_output;
break;
}
}
break;
}
case IA_API_CMD_SET_CONFIG_PARAM:
{
switch(i_idx)
{
case IA_DRC_DEC_CONFIG_PARAM_DEC_TYPE:
{
if(*pus_value==1){
p_obj_drc->str_config.dec_type = DEC_TYPE_TD_QMF64;
p_obj_drc->str_config.sub_band_domain_mode = SUBBAND_DOMAIN_MODE_QMF64;
p_obj_drc->str_config.sub_band_down_sampling_factor = AUDIO_CODEC_SUBBAND_DOWNSAMPLING_FACTOR_QMF64;
p_obj_drc->str_config.sub_band_count = AUDIO_CODEC_SUBBAND_COUNT_QMF64;
}
else if(*pus_value==2){
p_obj_drc->str_config.dec_type = DEC_TYPE_QMF64;
p_obj_drc->str_config.sub_band_domain_mode = SUBBAND_DOMAIN_MODE_QMF64;
p_obj_drc->str_config.sub_band_down_sampling_factor = AUDIO_CODEC_SUBBAND_DOWNSAMPLING_FACTOR_QMF64;
p_obj_drc->str_config.sub_band_count = AUDIO_CODEC_SUBBAND_COUNT_QMF64;
}
else if(*pus_value==3){
p_obj_drc->str_config.dec_type = DEC_TYPE_STFT256;
p_obj_drc->str_config.sub_band_domain_mode = SUBBAND_DOMAIN_MODE_STFT256;
p_obj_drc->str_config.sub_band_down_sampling_factor = AUDIO_CODEC_SUBBAND_DOWNSAMPLING_FACTOR_STFT256;
p_obj_drc->str_config.sub_band_count = AUDIO_CODEC_SUBBAND_COUNT_STFT256;
}
else{
p_obj_drc->str_config.dec_type = DEC_TYPE_TD;
p_obj_drc->str_config.sub_band_domain_mode = SUBBAND_DOMAIN_MODE_OFF;
}
if(*pus_value<0 || *pus_value >3)
{
return IA_DRC_DEC_CONFIG_NON_FATAL_INVALID_DECODE_TYPE;
}
break;
}
case IA_DRC_DEC_CONFIG_PARAM_CTRL_PARAM:
{
if(*pus_value<1 || *pus_value>39)
{
return IA_DRC_DEC_CONFIG_NON_FATAL_INVALID_CTRL_PARAM_IDX;
}
p_obj_drc->str_config.control_parameter_index = *pus_value;
break;
}
case IA_DRC_DEC_CONFIG_PARAM_PEAK_LIMITER:
{
if(*pus_value<0 || *pus_value>1)
{
return IA_DRC_DEC_CONFIG_NON_FATAL_INVALID_PEAK_LIM_FLAG;
}
p_obj_drc->str_config.peak_limiter = *pus_value;
break;
}
case IA_DRC_DEC_CONFIG_PARAM_VER_MODE:
{
break;
}
case IA_DRC_DEC_CONFIG_PARAM_SAMP_FREQ:
{
if(*pus_value<8000 || *pus_value>96000)
{
return IA_DRC_DEC_CONFIG_NON_FATAL_INVALID_SAMP_FREQ;
}
p_obj_drc->str_config.sampling_rate = *pus_value;
break;
}
case IA_DRC_DEC_CONFIG_PARAM_NUM_CHANNELS:
{
p_obj_drc->str_config.num_ch_in = *pus_value;
if(*pus_value < 1 || *pus_value > MAX_CHANNEL_COUNT)
{
return IA_DRC_DEC_CONFIG_NON_FATAL_INVALID_NUM_OF_CHANNELS;
}
break;
}
case IA_DRC_DEC_CONFIG_PARAM_PCM_WDSZ:
{
if((*pus_value!=16) && (*pus_value!=32))
{
return IA_DRC_DEC_CONFIG_NON_FATAL_INVALID_PCM_SIZE;
}
p_obj_drc->str_config.pcm_size = *pus_value;
break;
}
case IA_DRC_DEC_CONFIG_PARAM_BITS_FORMAT:
{
if((*pus_value!=1)&&(*pus_value!=0))
{
return -1;
}
p_obj_drc->str_config.bitstream_file_format = *pus_value;
break;
}
case IA_DRC_DEC_CONFIG_PARAM_INT_PRESENT:
{
if((*pus_value!=1)&&(*pus_value!=0))
{
return -1;
}
p_obj_drc->str_config.interface_bitstream_present = *pus_value;
break;
}
case IA_DRC_DEC_CONFIG_PARAM_DELAY_MODE:
{
if((*pus_value!=1)&&(*pus_value!=0))
{
return IA_DRC_DEC_CONFIG_NON_FATAL_INVALID_DELAY_MODE;
}
p_obj_drc->str_config.delay_mode = *pus_value;
break;
}
case IA_DRC_DEC_CONFIG_PARAM_GAIN_DELAY:
{
if ((*pus_value > MAX_SIGNAL_DELAY) || (*pus_value < 0))
{
return IA_DRC_DEC_CONFIG_NON_FATAL_INVALID_GAIN_DELAY;
}
p_obj_drc->str_config.gain_delay_samples = *pus_value;
break;
}
/*Sujith: introduce error*/
case IA_DRC_DEC_CONFIG_PARAM_AUDIO_DELAY:
{
break;
}
case IA_DRC_DEC_CONFIG_PARAM_CON_DELAY_MODE:
{
if(*pus_value<0 || *pus_value>1)
{
return IA_DRC_DEC_CONFIG_PARAM_CON_DELAY_MODE;
}
p_obj_drc->str_config.constant_delay_on = *pus_value;
break;
}
case IA_DRC_DEC_CONFIG_PARAM_ABSO_DELAY_OFF:
{
p_obj_drc->str_config.absorb_delay_on = *pus_value;
break;
}
case IA_DRC_DEC_CONFIG_PARAM_FRAME_SIZE:
{
if(*pus_value<1 || *pus_value>4096)
{
return IA_DRC_DEC_CONFIG_NON_FATAL_INVALID_FRAME_SIZE;
}
p_obj_drc->str_config.frame_size = *pus_value;
break;
}
case IA_DRC_DEC_CONFIG_GAIN_STREAM_FLAG:
{
p_obj_drc->str_bit_handler.gain_stream_flag = *pus_value;
break;
}
default:
{
return -1;
}
}
break;
}
case IA_API_CMD_GET_MEMTABS_SIZE:
{
break;
}
case IA_API_CMD_SET_MEMTABS_PTR:
{
break;
}
case IA_API_CMD_GET_N_MEMTABS:
{
*pui_value = NUM_DRC_TABLES;
break;
}
case IA_API_CMD_GET_N_TABLES:
{
break;
}
case IA_API_CMD_EXECUTE:
{
switch(i_idx)
{
case IA_CMD_TYPE_DO_EXECUTE:
{
if(p_obj_drc->str_config.dec_type==DEC_TYPE_TD){
error_code=impd_process_time_domain(p_obj_drc);}
else if(p_obj_drc->str_config.dec_type==DEC_TYPE_QMF64){
error_code=impd_init_process_audio_main_qmf(p_obj_drc);
}
else if(p_obj_drc->str_config.dec_type==DEC_TYPE_STFT256){
error_code=impd_init_process_audio_main_stft(p_obj_drc);
}
else if(p_obj_drc->str_config.dec_type==DEC_TYPE_TD_QMF64){
error_code=impd_init_process_audio_main_td_qmf(p_obj_drc);
}
break;
}
case IA_CMD_TYPE_DONE_QUERY:
{
*pui_value = p_obj_drc->p_state->ui_exe_done;
break;
}
default:
{
return -1;
}
};
break;
}
case IA_API_CMD_PUT_INPUT_QUERY:
{
*pui_value = 1;
break;
}
case IA_API_CMD_GET_CURIDX_INPUT_BUF:
{
UWORD32 ui_in_buf_size =
p_obj_drc->p_mem_info[IA_DRC_INPUT_IDX].ui_size;
UWORD32 ui_in_bytes = p_obj_drc->p_state->ui_in_bytes;
*pui_value = ui_in_buf_size > ui_in_bytes ? ui_in_bytes : ui_in_buf_size;
break;
}
case IA_API_CMD_SET_INPUT_BYTES:
{
p_obj_drc->p_state->ui_in_bytes = *pui_value;
break;
}
case IA_API_CMD_GET_OUTPUT_BYTES:
{
*pui_value = p_obj_drc->p_state->ui_out_bytes;
break;
}
case IA_API_CMD_INPUT_OVER:
{
p_obj_drc->p_state->ui_exe_done=1;
break;
}
case IA_API_CMD_INPUT_OVER_BS:
{
p_obj_drc->str_bit_handler.cpy_over = 1;
break;
}
case IA_API_CMD_INPUT_OVER_IC_BS:
{
p_obj_drc->str_bit_handler.cpy_over_ic = 1;
break;
}
case IA_API_CMD_INPUT_OVER_IL_BS:
{
p_obj_drc->str_bit_handler.cpy_over_il = 1;
break;
}
case IA_API_CMD_INPUT_OVER_IN_BS:
{
p_obj_drc->str_bit_handler.cpy_over_in = 1;
break;
}
case IA_API_CMD_SET_INPUT_BYTES_BS:
{
p_obj_drc->str_bit_handler.num_byts_cur=*pus_value;
break;
}
case IA_API_CMD_SET_INPUT_BYTES_IC_BS:
{
p_obj_drc->str_bit_handler.num_byts_cur_ic=*pus_value;
break;
}
case IA_API_CMD_SET_INPUT_BYTES_IL_BS:
{
p_obj_drc->str_bit_handler.num_byts_cur_il=*pus_value;
break;
}
case IA_API_CMD_SET_INPUT_BYTES_IN_BS:
{
p_obj_drc->str_bit_handler.num_byts_cur_in=*pus_value;
break;
}
default:
{
return -1;
}
};
return error_code;
}
IA_ERRORCODE impd_drc_mem_api(ia_drc_api_struct *p_obj_drc,
WORD32 i_cmd, WORD32 i_idx, pVOID pv_value)
{
pUWORD32 pui_value = pv_value;
switch(i_cmd)
{
case IA_API_CMD_GET_MEM_INFO_SIZE:
{
*pui_value = p_obj_drc->p_mem_info[i_idx].ui_size;
break;
}
case IA_API_CMD_GET_MEM_INFO_ALIGNMENT:
{
*pui_value = p_obj_drc->p_mem_info[i_idx].ui_alignment;
break;
}
case IA_API_CMD_GET_MEM_INFO_TYPE:
{
*pui_value = p_obj_drc->p_mem_info[i_idx].ui_type;
break;
}
case IA_API_CMD_GET_MEM_INFO_PLACEMENT:
{
*pui_value = p_obj_drc->p_mem_info[i_idx].ui_placement[0];
*(pui_value + 1) = p_obj_drc->p_mem_info[i_idx].ui_placement[1];
break;
}
case IA_API_CMD_GET_MEM_INFO_PRIORITY:
{
*pui_value = p_obj_drc->p_mem_info[i_idx].ui_priority;
break;
}
case IA_API_CMD_SET_MEM_PTR:
{
pWORD8 pbtemp;
UWORD32 sz;
if(pv_value == 0)
{
return(-1);
}
if(((SIZE_T)pv_value % p_obj_drc->p_mem_info[i_idx].ui_alignment) != 0)
{
return(-1);
}
p_obj_drc->pp_mem[i_idx] = pv_value;
pbtemp = p_obj_drc->pp_mem[i_idx];
sz = p_obj_drc->p_mem_info[i_idx].ui_size;
memset(pbtemp,0,sz);
}
case IA_API_CMD_SET_MEM_PLACEMENT:
{
}
};
return IA_NO_ERROR;
}
IA_ERRORCODE impd_drc_fill_mem_tables(ia_drc_api_struct *p_obj_drc)
{
ia_mem_info_struct *p_mem_info;
{
p_mem_info = &p_obj_drc->p_mem_info[IA_DRC_PERSIST_IDX];
p_mem_info->ui_size = 64*1024*1024;
p_mem_info->ui_alignment = 8;
p_mem_info->ui_type = IA_MEMTYPE_PERSIST;
p_mem_info->ui_placement[0] = 0;
p_mem_info->ui_placement[1] = 0;
p_mem_info->ui_priority = IA_MEMPRIORITY_ANYWHERE;
p_mem_info->ui_placed[0] = 0;
p_mem_info->ui_placed[1] = 0;
}
{
p_mem_info = &p_obj_drc->p_mem_info[IA_DRC_INPUT_IDX];
p_mem_info->ui_size = p_obj_drc->str_config.frame_size*(p_obj_drc->str_config.pcm_size>>3)*p_obj_drc->str_config.num_ch_in;
p_mem_info->ui_alignment = 4;
p_mem_info->ui_type = IA_MEMTYPE_INPUT;
p_mem_info->ui_placement[0] = 0;
p_mem_info->ui_placement[1] = 0;
p_mem_info->ui_priority = IA_MEMPRIORITY_ANYWHERE;
p_mem_info->ui_placed[0] = 0;
p_mem_info->ui_placed[1] = 0;
}
{
p_mem_info = &p_obj_drc->p_mem_info[IA_DRC_OUTPUT_IDX];
p_mem_info->ui_size = p_obj_drc->str_config.frame_size*(p_obj_drc->str_config.pcm_size>>3)*p_obj_drc->str_config.num_ch_in;
p_mem_info->ui_alignment = 4;
p_mem_info->ui_type = IA_MEMTYPE_OUTPUT;
p_mem_info->ui_placement[0] = 0;
p_mem_info->ui_placement[1] = 0;
p_mem_info->ui_priority = IA_MEMPRIORITY_ANYWHERE;
p_mem_info->ui_placed[0] = 0;
p_mem_info->ui_placed[1] = 0;
}
{
p_mem_info = &p_obj_drc->p_mem_info[IA_DRC_SCRATCH_IDX];
p_mem_info->ui_size = SCRATCH_MEM_SIZE;
p_mem_info->ui_alignment = 8;
p_mem_info->ui_type = IA_MEMTYPE_SCRATCH;
p_mem_info->ui_placement[0] = 0;
p_mem_info->ui_placement[1] = 0;
p_mem_info->ui_priority = IA_MEMPRIORITY_ANYWHERE;
p_mem_info->ui_placed[0] = 0;
p_mem_info->ui_placed[1] = 0;
}
return IA_NO_ERROR;
}

View file

@ -0,0 +1,51 @@
/******************************************************************************
*
* Copyright (C) 2018 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
#ifndef IMPD_API_DEFS_H
#define IMPD_API_DEFS_H
/*****************************************************************************/
/* Constant hash defines */
/*****************************************************************************/
/* A constant to let API copy small strings to buffers outside */
#define IA_API_STR_LEN 30
#define IA_APIVERSION_MAJOR 1
#define IA_APIVERSION_MINOR 10
/* last compatible version */
/* sometimes a new API version is just for a bugfix, or a added feature in */
/* this case it is better to use a newer version even though a library was */
/* made for an older version, library API can then be upgraded to newer API */
/* version after checking for compatibility or by adding features */
#define IA_LASTCOMP_APIVERSION_MAJOR 1
#define IA_LASTCOMP_APIVERSION_MINOR 10
#define IA_STR(str) #str
#define IA_MAKE_VERSION_STR(maj, min) IA_STR(maj) "." IA_STR(min)
#define IA_APIVERSION IA_MAKE_VERSION_STR(\
IA_APIVERSION_MAJOR, \
IA_APIVERSION_MINOR)
#define IA_LAST_COMP_APIVERSION IA_MAKE_VERSION_STR(\
IA_LASTCOMP_APIVERSION_MAJOR, \
IA_LASTCOMP_APIVERSION_MINOR)
#endif

View file

@ -0,0 +1,130 @@
/******************************************************************************
*
* Copyright (C) 2018 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*****************************************************************************
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
*/
#ifndef IMPD_DRC_API_STRUCT_DEF_H
#define IMPD_DRC_API_STRUCT_DEF_H
/****************************************************************************/
/* structure definitions */
/****************************************************************************/
/* DRC Configuration */
typedef struct ia_drc_config_struct
{
WORD32 bitstream_file_format;
WORD32 dec_type;
WORD32 sub_band_domain_mode;
WORD32 num_ch_in;
WORD32 num_ch_out;
WORD32 sampling_rate;
WORD32 control_parameter_index;
WORD32 delay_mode;
WORD32 absorb_delay_on;
WORD32 gain_delay_samples;
WORD32 subband_domain_io_flag;
WORD32 frame_size;
WORD32 sub_band_down_sampling_factor;
WORD32 sub_band_count;
WORD32 peak_limiter;
WORD32 interface_bitstream_present;
WORD32 pcm_size;
WORD32 parametric_drc_delay_gain_dec_instance ;
WORD32 parametric_drc_delay;
WORD32 parametric_drc_delay_max;
WORD32 eq_delay_gain_dec_instance;
WORD32 eq_delay;
WORD32 eq_delay_max;
WORD32 delay_line_samples;
WORD32 constant_delay_on;
WORD32 audio_delay_samples;
}ia_drc_config_struct;
/* DRC bitsteam handler */
typedef struct bits_handler
{
UWORD8* bitstream_drc_config;
UWORD8* bitstream_loudness_info ;
UWORD8* bitstream_unidrc_interface ;
UWORD8* it_bit_buf;
WORD32 num_bytes_bs_drc_config;
WORD32 num_bytes_bs_loudness_info;
WORD32 num_bits_read_bs_unidrc_interface;
WORD32 num_bytes_bs_unidrc_interface;
WORD32 num_bits_read_bs;
WORD32 num_bytes_read_bs;
WORD32 num_bytes_bs;
WORD32 num_bits_offset_bs;
WORD32 byte_index_bs;
WORD32 num_byts_cur;
WORD32 num_byts_cur_ic;
WORD32 num_byts_cur_il;
WORD32 num_byts_cur_in;
WORD32 cpy_over;
WORD32 cpy_over_ic;
WORD32 cpy_over_il;
WORD32 cpy_over_in;
WORD32 gain_stream_flag;
}ia_drc_bits_handler_struct;
typedef struct
{
ia_drc_bits_dec_struct *pstr_bitstream_dec;
ia_drc_gain_dec_struct *pstr_gain_dec[2];
ia_drc_sel_pro_struct *pstr_selection_proc;
ia_drc_config *pstr_drc_config;
ia_drc_loudness_info_set_struct *pstr_loudness_info;
ia_drc_gain_struct *pstr_drc_gain;
ia_drc_interface_struct *pstr_drc_interface;
ia_drc_peak_limiter_struct *pstr_peak_limiter;
ia_drc_qmf_filt_struct *pstr_qmf_filter;
ia_drc_sel_proc_params_struct *pstr_drc_sel_proc_params;
ia_drc_sel_proc_output_struct *pstr_drc_sel_proc_output;
}ia_drc_payload_struct;
typedef struct ia_drc_state_struct
{
UWORD32 ui_out_bytes;
UWORD32 ui_in_bytes;
UWORD32 ui_ir_bytes;
UWORD32 total_num_out_samples;
UWORD32 frame_no;
UWORD32 out_size;
UWORD32 ui_init_done;
UWORD32 ui_exe_done;
UWORD32 ui_ir_used;
WORD32 delay_in_output;
WORD32 delay_adjust_samples;
pVOID persistant_ptr;
}ia_drc_state_struct;
typedef struct IA_PSM_API_Struct
{
ia_drc_state_struct *p_state;
ia_drc_config_struct str_config;
ia_drc_payload_struct str_payload;
ia_drc_bits_handler_struct str_bit_handler;
ia_mem_info_struct *p_mem_info;
pVOID *pp_mem;
struct ia_bit_buf_struct str_bit_buf, *pstr_bit_buf;
} ia_drc_api_struct;
#endif

Some files were not shown because too many files have changed in this diff Show more