mirror of
https://github.com/ittiam-systems/libxaac.git
synced 2026-04-02 20:30:47 +07:00
Import xHE-AAC decoder from Ittiam
Clean copy of version XHEAAC_V1_36 as delivered 2018/4/13 Sole change is to map cr/lf line termination to unix lf. Added appropriate LICENSE, MODULE_LICENSE_APACHE2, and NOTICE files as part of folding into internal master. Bug: 77287124 Test: CTS audio/media Change-Id: I3c8d124033f967b29d6e384cce5c843ee17a7bb1
This commit is contained in:
parent
61cd144fc5
commit
48f5fd9660
372 changed files with 148215 additions and 0 deletions
293
Android.bp
Normal file
293
Android.bp
Normal file
|
|
@ -0,0 +1,293 @@
|
|||
cc_library_static {
|
||||
name: "libia_xaacdec",
|
||||
|
||||
vendor_available: true,
|
||||
vndk: {
|
||||
enabled: true,
|
||||
},
|
||||
|
||||
cflags: [
|
||||
"-O3"
|
||||
],
|
||||
|
||||
export_include_dirs: [
|
||||
"decoder"
|
||||
],
|
||||
|
||||
srcs: [
|
||||
"decoder/ixheaacd_aacdecoder.c",
|
||||
"decoder/ixheaacd_aacpluscheck.c",
|
||||
"decoder/ixheaacd_aac_imdct.c",
|
||||
"decoder/ixheaacd_aac_rom.c",
|
||||
"decoder/ixheaacd_aac_tns.c",
|
||||
"decoder/ixheaacd_acelp_bitparse.c",
|
||||
"decoder/ixheaacd_acelp_decode.c",
|
||||
"decoder/ixheaacd_acelp_mdct.c",
|
||||
"decoder/ixheaacd_acelp_tools.c",
|
||||
"decoder/ixheaacd_adts_crc_check.c",
|
||||
"decoder/ixheaacd_api.c",
|
||||
"decoder/ixheaacd_arith_dec.c",
|
||||
"decoder/ixheaacd_avq_dec.c",
|
||||
"decoder/ixheaacd_avq_rom.c",
|
||||
"decoder/ixheaacd_basic_ops.c",
|
||||
"decoder/ixheaacd_bitbuffer.c",
|
||||
"decoder/ixheaacd_block.c",
|
||||
"decoder/ixheaacd_channel.c",
|
||||
"decoder/ixheaacd_common_initfuncs.c",
|
||||
"decoder/ixheaacd_common_lpfuncs.c",
|
||||
"decoder/ixheaacd_common_rom.c",
|
||||
"decoder/ixheaacd_create.c",
|
||||
"decoder/ixheaacd_decode_main.c",
|
||||
"decoder/ixheaacd_dsp_fft32x32s.c",
|
||||
"decoder/ixheaacd_env_calc.c",
|
||||
"decoder/ixheaacd_env_dec.c",
|
||||
"decoder/ixheaacd_env_extr.c",
|
||||
"decoder/ixheaacd_esbr_envcal.c",
|
||||
"decoder/ixheaacd_esbr_polyphase.c",
|
||||
"decoder/ixheaacd_esbr_rom.c",
|
||||
"decoder/ixheaacd_esbr_fft.c",
|
||||
"decoder/ixheaacd_ext_ch_ele.c",
|
||||
"decoder/ixheaacd_fft.c",
|
||||
"decoder/ixheaacd_freq_sca.c",
|
||||
"decoder/ixheaacd_fwd_alias_cnx.c",
|
||||
"decoder/ixheaacd_hbe_trans.c",
|
||||
"decoder/ixheaacd_headerdecode.c",
|
||||
"decoder/ixheaacd_hufftables.c",
|
||||
"decoder/ixheaacd_huff_tools.c",
|
||||
"decoder/ixheaacd_hybrid.c",
|
||||
"decoder/ixheaacd_imdct.c",
|
||||
"decoder/ixheaacd_initfuncs.c",
|
||||
"decoder/ixheaacd_init_config.c",
|
||||
"decoder/ixheaacd_longblock.c",
|
||||
"decoder/ixheaacd_lpc.c",
|
||||
"decoder/ixheaacd_lpc_dec.c",
|
||||
"decoder/ixheaacd_lpfuncs.c",
|
||||
"decoder/ixheaacd_lpp_tran.c",
|
||||
"decoder/ixheaacd_lt_predict.c",
|
||||
"decoder/ixheaacd_mps_dec.c",
|
||||
"decoder/ixheaacd_mps_decorr.c",
|
||||
"decoder/ixheaacd_mps_hybrid_filt.c",
|
||||
"decoder/ixheaacd_mps_parse.c",
|
||||
"decoder/ixheaacd_mps_pre_mix.c",
|
||||
"decoder/ixheaacd_mps_rom.c",
|
||||
"decoder/ixheaacd_mps_smoothing.c",
|
||||
"decoder/ixheaacd_mps_temp_process.c",
|
||||
"decoder/ixheaacd_mps_temp_reshape.c",
|
||||
"decoder/ixheaacd_pns_js_thumb.c",
|
||||
"decoder/ixheaacd_pred_vec_block.c",
|
||||
"decoder/ixheaacd_process.c",
|
||||
"decoder/ixheaacd_ps_bitdec.c",
|
||||
"decoder/ixheaacd_ps_dec.c",
|
||||
"decoder/ixheaacd_pvc_rom.c",
|
||||
"decoder/ixheaacd_rom.c",
|
||||
"decoder/ixheaacd_sbrdecoder.c",
|
||||
"decoder/ixheaacd_sbrdec_initfuncs.c",
|
||||
"decoder/ixheaacd_sbrdec_lpfuncs.c",
|
||||
"decoder/ixheaacd_sbr_crc.c",
|
||||
"decoder/ixheaacd_sbr_dec.c",
|
||||
"decoder/ixheaacd_sbr_rom.c",
|
||||
"decoder/ixheaacd_spectrum_dec.c",
|
||||
"decoder/ixheaacd_stereo.c",
|
||||
"decoder/ixheaacd_tcx_fwd_alcnx.c",
|
||||
"decoder/ixheaacd_tcx_fwd_mdct.c",
|
||||
"decoder/ixheaacd_thumb_ps_dec.c",
|
||||
"decoder/ixheaacd_tns.c",
|
||||
"decoder/ixheaacd_basic_funcs.c",
|
||||
"decoder/ixheaacd_Windowing.c",
|
||||
"decoder/ixheaacd_latmdemux.c",
|
||||
"decoder/ixheaacd_multichannel.c",
|
||||
"decoder/ixheaacd_drc_freq_dec.c",
|
||||
"decoder/ixheaacd_mps_poly_filt.c",
|
||||
"decoder/ixheaacd_huff_code_reorder.c",
|
||||
"decoder/ixheaacd_rev_vlc.c",
|
||||
"decoder/drc_src/impd_drc_api.c",
|
||||
"decoder/drc_src/impd_drc_bitbuffer.c",
|
||||
"decoder/drc_src/impd_drc_dec.c",
|
||||
"decoder/drc_src/impd_drc_dynamic_payload.c",
|
||||
"decoder/drc_src/impd_drc_eq.c",
|
||||
"decoder/drc_src/impd_drc_extr_delta_coded_info.c",
|
||||
"decoder/drc_src/impd_drc_filter_bank.c",
|
||||
"decoder/drc_src/impd_drc_gain_dec.c",
|
||||
"decoder/drc_src/impd_drc_gain_decoder.c",
|
||||
"decoder/drc_src/impd_drc_host_params.c",
|
||||
"decoder/drc_src/impd_drc_init.c",
|
||||
"decoder/drc_src/impd_drc_interface_decoder.c",
|
||||
"decoder/drc_src/impd_drc_interface_parser.c",
|
||||
"decoder/drc_src/impd_drc_loudness_control.c",
|
||||
"decoder/drc_src/impd_drc_main_qmf_process.c",
|
||||
"decoder/drc_src/impd_drc_main_stft_process.c",
|
||||
"decoder/drc_src/impd_drc_main_td_process.c",
|
||||
"decoder/drc_src/impd_drc_main_td_qmf_process.c",
|
||||
"decoder/drc_src/impd_drc_multiband.c",
|
||||
"decoder/drc_src/impd_drc_parametric_dec.c",
|
||||
"decoder/drc_src/impd_drc_peak_limiter.c",
|
||||
"decoder/drc_src/impd_drc_process.c",
|
||||
"decoder/drc_src/impd_drc_rom.c",
|
||||
"decoder/drc_src/impd_drc_selection_process.c",
|
||||
"decoder/drc_src/impd_drc_selection_process_drcset_selection.c",
|
||||
"decoder/drc_src/impd_drc_selection_process_init.c",
|
||||
"decoder/drc_src/impd_drc_shape_filter.c",
|
||||
"decoder/drc_src/impd_drc_static_payload.c",
|
||||
],
|
||||
|
||||
sanitize: {
|
||||
misc_undefined: [
|
||||
"unsigned-integer-overflow",
|
||||
"signed-integer-overflow",
|
||||
],
|
||||
cfi: true,
|
||||
diag: {
|
||||
cfi: true,
|
||||
},
|
||||
},
|
||||
arch: {
|
||||
arm: {
|
||||
local_include_dirs: [
|
||||
"decoder/armv7",
|
||||
"decoder"
|
||||
],
|
||||
|
||||
srcs: [
|
||||
"decoder/armv7/ixheaacd_qmf_dec.c",
|
||||
"decoder/armv7/ixheaacd_fft_armv7.c",
|
||||
"decoder/armv7/ixheaacd_function_selector_armv7.c",
|
||||
|
||||
"decoder/armv7/ixheaacd_overlap_add1.s",
|
||||
"decoder/armv7/ixheaacd_overlap_add2.s",
|
||||
"decoder/armv7/ixheaacd_lap1.s",
|
||||
"decoder/armv7/ixheaacd_dec_DCT2_64_asm.s",
|
||||
"decoder/armv7/ixheaacd_apply_rot.s",
|
||||
"decoder/armv7/ixheaacd_autocorr_st2.s",
|
||||
"decoder/armv7/ixheaacd_auto_corr.s",
|
||||
"decoder/armv7/ixheaacd_calcmaxspectralline.s",
|
||||
"decoder/armv7/ixheaacd_conv_ergtoamplitude.s",
|
||||
"decoder/armv7/ixheaacd_conv_ergtoamplitudelp.s",
|
||||
"decoder/armv7/ixheaacd_cos_sin_mod.s",
|
||||
"decoder/armv7/ixheaacd_dct3_32.s",
|
||||
"decoder/armv7/ixheaacd_decorr_filter2.s",
|
||||
"decoder/armv7/ixheaacd_enery_calc_per_subband.s",
|
||||
"decoder/armv7/ixheaacd_expsubbandsamples.s",
|
||||
"decoder/armv7/ixheaacd_ffr_divide16.s",
|
||||
"decoder/armv7/ixheaacd_fwd_modulation.s",
|
||||
"decoder/armv7/ixheaacd_harm_idx_zerotwolp.s",
|
||||
"decoder/armv7/ixheaacd_imdct_using_fft.s",
|
||||
"decoder/armv7/ixheaacd_inv_dit_fft_8pt.s",
|
||||
"decoder/armv7/ixheaacd_no_lap1.s",
|
||||
"decoder/armv7/ixheaacd_post_radix_compute2.s",
|
||||
"decoder/armv7/ixheaacd_post_radix_compute4.s",
|
||||
"decoder/armv7/ixheaacd_post_twiddle.s",
|
||||
"decoder/armv7/ixheaacd_pre_twiddle_compute.s",
|
||||
"decoder/armv7/ixheaacd_post_twiddle_overlap.s",
|
||||
"decoder/armv7/ixheaacd_radix4_bfly.s",
|
||||
"decoder/armv7/ixheaacd_rescale_subbandsamples.s",
|
||||
"decoder/armv7/ixheaacd_sbr_imdct_using_fft.s",
|
||||
"decoder/armv7/ixheaacd_sbr_qmfanal32_winadds.s",
|
||||
"decoder/armv7/ixheaacd_sbr_qmfsyn64_winadd.s",
|
||||
"decoder/armv7/ixheaacd_shiftrountine.s",
|
||||
"decoder/armv7/ixheaacd_shiftrountine_with_round.s",
|
||||
"decoder/armv7/ixheaacd_tns_ar_filter_fixed_32x16.s",
|
||||
"decoder/armv7/ixheaacd_tns_parcor2lpc_32x16.s",
|
||||
"decoder/armv7/ixheaacd_esbr_radix4bfly.s",
|
||||
"decoder/armv7/ixheaacd_esbr_cos_sin_mod_loop1.s",
|
||||
"decoder/armv7/ixheaacd_esbr_qmfsyn64_winadd.s",
|
||||
"decoder/armv7/ixheaacd_complex_ifft_p2.s",
|
||||
"decoder/armv7/ixheaacd_complex_fft_p2.s",
|
||||
"decoder/armv7/ixheaacd_esbr_cos_sin_mod_loop2.s",
|
||||
"decoder/armv7/ixheaacd_shiftrountine_with_round_hq.s",
|
||||
"decoder/armv7/ixheaacd_mps_complex_fft_64_asm.s",
|
||||
"decoder/armv7/ixheaacd_esbr_fwd_modulation.s",
|
||||
"decoder/armv7/ixheaacd_mps_synt_pre_twiddle.s",
|
||||
"decoder/armv7/ixheaacd_mps_synt_post_twiddle.s",
|
||||
"decoder/armv7/ixheaacd_calc_pre_twid.s",
|
||||
"decoder/armv7/ixheaacd_calc_post_twid.s",
|
||||
"decoder/armv7/ixheaacd_mps_synt_out_calc.s",
|
||||
"decoder/armv7/ixheaacd_mps_synt_post_fft_twiddle.s",
|
||||
"decoder/armv7/ixheaacd_sbr_qmfanal32_winadds_eld.s",
|
||||
"decoder/armv7/ixheaacd_shiftrountine_with_rnd_eld.s",
|
||||
"decoder/armv7/ixheaacd_eld_decoder_sbr_pre_twiddle.s",
|
||||
"decoder/armv7/ixheaacd_fft_15_ld.s",
|
||||
"decoder/armv7/ixheaacd_aac_ld_dec_rearrange.s",
|
||||
"decoder/armv7/ixheaacd_fft32x32_ld2_armv7.s",
|
||||
"decoder/armv7/ixheaacd_apply_scale_fac.s"
|
||||
],
|
||||
|
||||
cflags: [
|
||||
"-mfloat-abi=softfp",
|
||||
"-mfpu=neon",
|
||||
"-mcpu=cortex-a8",
|
||||
],
|
||||
|
||||
armv7_a_neon: {
|
||||
srcs: [
|
||||
],
|
||||
cflags: [
|
||||
],
|
||||
},
|
||||
},
|
||||
|
||||
arm64: {
|
||||
cflags: [
|
||||
"-march=armv8-a",
|
||||
],
|
||||
local_include_dirs: [
|
||||
"decoder/armv8",
|
||||
"decoder"
|
||||
],
|
||||
|
||||
srcs: [
|
||||
|
||||
"decoder/armv8/ixheaacd_qmf_dec.c",
|
||||
"decoder/armv8/ixheaacd_function_selector_armv8.c",
|
||||
|
||||
"decoder/armv8/ixheaacd_calcmaxspectralline.s",
|
||||
"decoder/armv8/ixheaacd_sbr_imdct_using_fft.s",
|
||||
"decoder/armv8/ixheaacd_imdct_using_fft.s",
|
||||
"decoder/armv8/ixheaacd_no_lap1.s",
|
||||
"decoder/armv8/ixheaacd_post_twiddle.s",
|
||||
"decoder/armv8/ixheaacd_pre_twiddle.s",
|
||||
"decoder/armv8/ixheaacd_sbr_qmfsyn64_winadd.s",
|
||||
"decoder/armv8/ixheaacd_overlap_add1.s",
|
||||
"decoder/armv8/ixheaacd_overlap_add2.s",
|
||||
"decoder/armv8/ixheaacd_shiftrountine_with_round_eld.s",
|
||||
"decoder/armv8/ixheaacd_fft32x32_ld2_armv8.s",
|
||||
"decoder/armv8/ixheaacd_inv_dit_fft_8pt.s",
|
||||
"decoder/armv8/ixheaacd_shiftrountine_with_round.s",
|
||||
"decoder/armv8/ixheaacd_sbr_qmf_analysis32_neon.s",
|
||||
"decoder/armv8/ixheaacd_postradixcompute4.s",
|
||||
"decoder/armv8/ixheaacd_apply_scale_factors.s",
|
||||
"decoder/armv8/ixheaacd_cos_sin_mod_loop1.s",
|
||||
"decoder/armv8/ixheaacd_cos_sin_mod_loop2.s",
|
||||
],
|
||||
},
|
||||
|
||||
x86: {
|
||||
local_include_dirs: [
|
||||
"decoder"
|
||||
],
|
||||
|
||||
srcs: [
|
||||
"decoder/ixheaacd_qmf_dec.c",
|
||||
"decoder/x86/ixheaacd_function_selector_x86.c",
|
||||
],
|
||||
|
||||
cflags: [
|
||||
],
|
||||
},
|
||||
|
||||
x86_64: {
|
||||
local_include_dirs: [
|
||||
"decoder"
|
||||
],
|
||||
|
||||
srcs: [
|
||||
"decoder/ixheaacd_qmf_dec.c",
|
||||
"decoder/x86_64/ixheaacd_function_selector_x86_64.c",
|
||||
],
|
||||
|
||||
cflags: [
|
||||
],
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
subdirs = ["test"]
|
||||
203
LICENSE
Normal file
203
LICENSE
Normal file
|
|
@ -0,0 +1,203 @@
|
|||
|
||||
Apache License
|
||||
Version 2.0, January 2004
|
||||
http://www.apache.org/licenses/
|
||||
|
||||
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction,
|
||||
and distribution as defined by Sections 1 through 9 of this document.
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by
|
||||
the copyright owner that is granting the License.
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all
|
||||
other entities that control, are controlled by, or are under common
|
||||
control with that entity. For the purposes of this definition,
|
||||
"control" means (i) the power, direct or indirect, to cause the
|
||||
direction or management of such entity, whether by contract or
|
||||
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
||||
outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity
|
||||
exercising permissions granted by this License.
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications,
|
||||
including but not limited to software source code, documentation
|
||||
source, and configuration files.
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical
|
||||
transformation or translation of a Source form, including but
|
||||
not limited to compiled object code, generated documentation,
|
||||
and conversions to other media types.
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or
|
||||
Object form, made available under the License, as indicated by a
|
||||
copyright notice that is included in or attached to the work
|
||||
(an example is provided in the Appendix below).
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object
|
||||
form, that is based on (or derived from) the Work and for which the
|
||||
editorial revisions, annotations, elaborations, or other modifications
|
||||
represent, as a whole, an original work of authorship. For the purposes
|
||||
of this License, Derivative Works shall not include works that remain
|
||||
separable from, or merely link (or bind by name) to the interfaces of,
|
||||
the Work and Derivative Works thereof.
|
||||
|
||||
"Contribution" shall mean any work of authorship, including
|
||||
the original version of the Work and any modifications or additions
|
||||
to that Work or Derivative Works thereof, that is intentionally
|
||||
submitted to Licensor for inclusion in the Work by the copyright owner
|
||||
or by an individual or Legal Entity authorized to submit on behalf of
|
||||
the copyright owner. For the purposes of this definition, "submitted"
|
||||
means any form of electronic, verbal, or written communication sent
|
||||
to the Licensor or its representatives, including but not limited to
|
||||
communication on electronic mailing lists, source code control systems,
|
||||
and issue tracking systems that are managed by, or on behalf of, the
|
||||
Licensor for the purpose of discussing and improving the Work, but
|
||||
excluding communication that is conspicuously marked or otherwise
|
||||
designated in writing by the copyright owner as "Not a Contribution."
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity
|
||||
on behalf of whom a Contribution has been received by Licensor and
|
||||
subsequently incorporated within the Work.
|
||||
|
||||
2. Grant of Copyright License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
copyright license to reproduce, prepare Derivative Works of,
|
||||
publicly display, publicly perform, sublicense, and distribute the
|
||||
Work and such Derivative Works in Source or Object form.
|
||||
|
||||
3. Grant of Patent License. Subject to the terms and conditions of
|
||||
this License, each Contributor hereby grants to You a perpetual,
|
||||
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
||||
(except as stated in this section) patent license to make, have made,
|
||||
use, offer to sell, sell, import, and otherwise transfer the Work,
|
||||
where such license applies only to those patent claims licensable
|
||||
by such Contributor that are necessarily infringed by their
|
||||
Contribution(s) alone or by combination of their Contribution(s)
|
||||
with the Work to which such Contribution(s) was submitted. If You
|
||||
institute patent litigation against any entity (including a
|
||||
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
||||
or a Contribution incorporated within the Work constitutes direct
|
||||
or contributory patent infringement, then any patent licenses
|
||||
granted to You under this License for that Work shall terminate
|
||||
as of the date such litigation is filed.
|
||||
|
||||
4. Redistribution. You may reproduce and distribute copies of the
|
||||
Work or Derivative Works thereof in any medium, with or without
|
||||
modifications, and in Source or Object form, provided that You
|
||||
meet the following conditions:
|
||||
|
||||
(a) You must give any other recipients of the Work or
|
||||
Derivative Works a copy of this License; and
|
||||
|
||||
(b) You must cause any modified files to carry prominent notices
|
||||
stating that You changed the files; and
|
||||
|
||||
(c) You must retain, in the Source form of any Derivative Works
|
||||
that You distribute, all copyright, patent, trademark, and
|
||||
attribution notices from the Source form of the Work,
|
||||
excluding those notices that do not pertain to any part of
|
||||
the Derivative Works; and
|
||||
|
||||
(d) If the Work includes a "NOTICE" text file as part of its
|
||||
distribution, then any Derivative Works that You distribute must
|
||||
include a readable copy of the attribution notices contained
|
||||
within such NOTICE file, excluding those notices that do not
|
||||
pertain to any part of the Derivative Works, in at least one
|
||||
of the following places: within a NOTICE text file distributed
|
||||
as part of the Derivative Works; within the Source form or
|
||||
documentation, if provided along with the Derivative Works; or,
|
||||
within a display generated by the Derivative Works, if and
|
||||
wherever such third-party notices normally appear. The contents
|
||||
of the NOTICE file are for informational purposes only and
|
||||
do not modify the License. You may add Your own attribution
|
||||
notices within Derivative Works that You distribute, alongside
|
||||
or as an addendum to the NOTICE text from the Work, provided
|
||||
that such additional attribution notices cannot be construed
|
||||
as modifying the License.
|
||||
|
||||
You may add Your own copyright statement to Your modifications and
|
||||
may provide additional or different license terms and conditions
|
||||
for use, reproduction, or distribution of Your modifications, or
|
||||
for any such Derivative Works as a whole, provided Your use,
|
||||
reproduction, and distribution of the Work otherwise complies with
|
||||
the conditions stated in this License.
|
||||
|
||||
5. Submission of Contributions. Unless You explicitly state otherwise,
|
||||
any Contribution intentionally submitted for inclusion in the Work
|
||||
by You to the Licensor shall be under the terms and conditions of
|
||||
this License, without any additional terms or conditions.
|
||||
Notwithstanding the above, nothing herein shall supersede or modify
|
||||
the terms of any separate license agreement you may have executed
|
||||
with Licensor regarding such Contributions.
|
||||
|
||||
6. Trademarks. This License does not grant permission to use the trade
|
||||
names, trademarks, service marks, or product names of the Licensor,
|
||||
except as required for reasonable and customary use in describing the
|
||||
origin of the Work and reproducing the content of the NOTICE file.
|
||||
|
||||
7. Disclaimer of Warranty. Unless required by applicable law or
|
||||
agreed to in writing, Licensor provides the Work (and each
|
||||
Contributor provides its Contributions) on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
||||
implied, including, without limitation, any warranties or conditions
|
||||
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
||||
PARTICULAR PURPOSE. You are solely responsible for determining the
|
||||
appropriateness of using or redistributing the Work and assume any
|
||||
risks associated with Your exercise of permissions under this License.
|
||||
|
||||
8. Limitation of Liability. In no event and under no legal theory,
|
||||
whether in tort (including negligence), contract, or otherwise,
|
||||
unless required by applicable law (such as deliberate and grossly
|
||||
negligent acts) or agreed to in writing, shall any Contributor be
|
||||
liable to You for damages, including any direct, indirect, special,
|
||||
incidental, or consequential damages of any character arising as a
|
||||
result of this License or out of the use or inability to use the
|
||||
Work (including but not limited to damages for loss of goodwill,
|
||||
work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses), even if such Contributor
|
||||
has been advised of the possibility of such damages.
|
||||
|
||||
9. Accepting Warranty or Additional Liability. While redistributing
|
||||
the Work or Derivative Works thereof, You may choose to offer,
|
||||
and charge a fee for, acceptance of support, warranty, indemnity,
|
||||
or other liability obligations and/or rights consistent with this
|
||||
License. However, in accepting such obligations, You may act only
|
||||
on Your own behalf and on Your sole responsibility, not on behalf
|
||||
of any other Contributor, and only if You agree to indemnify,
|
||||
defend, and hold each Contributor harmless for any liability
|
||||
incurred by, or claims asserted against, such Contributor by reason
|
||||
of your accepting any such warranty or additional liability.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
APPENDIX: How to apply the Apache License to your work.
|
||||
|
||||
To apply the Apache License to your work, attach the following
|
||||
boilerplate notice, with the fields enclosed by brackets "[]"
|
||||
replaced with your own identifying information. (Don't include
|
||||
the brackets!) The text should be enclosed in the appropriate
|
||||
comment syntax for the file format. We also recommend that a
|
||||
file or class name and description of purpose be included on the
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
Create Short LinkX
|
||||
0
MODULE_LICENSE_APACHE2
Normal file
0
MODULE_LICENSE_APACHE2
Normal file
19
NOTICE
Normal file
19
NOTICE
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
/******************************************************************************
|
||||
* *
|
||||
* Copyright (C) 2018 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
46
decoder/armv7/ia_xheaacd_mps_mulshift.s
Normal file
46
decoder/armv7/ia_xheaacd_mps_mulshift.s
Normal file
|
|
@ -0,0 +1,46 @@
|
|||
@/******************************************************************************
|
||||
@ *
|
||||
@ * Copyright (C) 2018 The Android Open Source Project
|
||||
@ *
|
||||
@ * Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ * you may not use this file except in compliance with the License.
|
||||
@ * You may obtain a copy of the License at:
|
||||
@ *
|
||||
@ * http://www.apache.org/licenses/LICENSE-2.0
|
||||
@ *
|
||||
@ * Unless required by applicable law or agreed to in writing, software
|
||||
@ * distributed under the License is distributed on an "AS IS" BASIS,
|
||||
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
@ * See the License for the specific language governing permissions and
|
||||
@ * limitations under the License.
|
||||
@ *
|
||||
@ *****************************************************************************
|
||||
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
@*/
|
||||
|
||||
|
||||
.text
|
||||
.p2align 2
|
||||
.global ixheaacd_mps_mulshift
|
||||
|
||||
ixheaacd_mps_mulshift:
|
||||
|
||||
STMFD sp!, {R4-R12}
|
||||
VPUSH {d8 - d15}
|
||||
LOOP:
|
||||
VLD1.32 {Q0, Q1}, [R0]! @LOADING values from R0
|
||||
VLD1.32 {Q2, Q3}, [R1]! @LOADING values from R1
|
||||
VQDMULL.S32 Q4, D0, D4
|
||||
VQDMULL.S32 Q5, D2, D6
|
||||
VQDMULL.S32 Q6, D1, D5
|
||||
VQDMULL.S32 Q7, D3, D7
|
||||
VUZP.32 Q4, Q6
|
||||
VUZP.32 Q5, Q7
|
||||
VST1.32 {Q6, Q7}, [R2]! @Storing values to R2
|
||||
SUBS R3, R3, #8
|
||||
BGT LOOP
|
||||
|
||||
VPOP {d8 - d15}
|
||||
LDMFD sp!, {R4-R12}
|
||||
BX LR
|
||||
|
||||
240
decoder/armv7/ia_xheaacd_mps_reoder_mulshift_acc.s
Normal file
240
decoder/armv7/ia_xheaacd_mps_reoder_mulshift_acc.s
Normal file
|
|
@ -0,0 +1,240 @@
|
|||
@/******************************************************************************
|
||||
@ *
|
||||
@ * Copyright (C) 2018 The Android Open Source Project
|
||||
@ *
|
||||
@ * Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ * you may not use this file except in compliance with the License.
|
||||
@ * You may obtain a copy of the License at:
|
||||
@ *
|
||||
@ * http://www.apache.org/licenses/LICENSE-2.0
|
||||
@ *
|
||||
@ * Unless required by applicable law or agreed to in writing, software
|
||||
@ * distributed under the License is distributed on an "AS IS" BASIS,
|
||||
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
@ * See the License for the specific language governing permissions and
|
||||
@ * limitations under the License.
|
||||
@ *
|
||||
@ *****************************************************************************
|
||||
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
@*/
|
||||
|
||||
|
||||
.text
|
||||
.p2align 2
|
||||
.global ixheaacd_mps_mulshift_acc
|
||||
|
||||
ixheaacd_mps_mulshift_acc:
|
||||
|
||||
STMFD sp!, {R4-R12, R14}
|
||||
VPUSH {d8 - d15}
|
||||
LDR R4, [SP, #104] @Argument V_fix
|
||||
LDR R5, [SP, #108] @Argument 2*resolution
|
||||
ASR R6, R5, #1
|
||||
|
||||
MOV R7, R4
|
||||
MOV R11, #40
|
||||
MUL R11, R11, R5
|
||||
ADD R7, R7, R11
|
||||
LSL R8, R5, #2
|
||||
SUB R8, R7, R8
|
||||
MOV R10, #9
|
||||
MUL R9, R5, R10
|
||||
|
||||
COPYLOOP:
|
||||
SUB R8, R8, #32
|
||||
VLD1.32 {Q0, Q1}, [R8]
|
||||
SUB R7, R7, #32
|
||||
VST1.32 {Q0, Q1}, [R7]
|
||||
SUBS R9, R9, #8
|
||||
BGT COPYLOOP
|
||||
|
||||
LOOP:
|
||||
VMOV.I64 Q14, #0
|
||||
VMOV.I64 Q15, #0
|
||||
MOV R7, R6
|
||||
MOV R8, R0
|
||||
MOV R9, R1
|
||||
|
||||
LOOP1:
|
||||
VLD1.32 {Q0, Q1}, [R8]! @LOADING values from R0 Sr_fix
|
||||
VLD1.32 {Q2, Q3}, [R9]! @LOADING values from R1 Si_fix
|
||||
VLD1.32 {Q4, Q5}, [R2]! @LOADING values from R2 N.real_fix
|
||||
VLD1.32 {Q6, Q7}, [R3]! @LOADING values from R3 N.imag_fix
|
||||
|
||||
VMULL.S32 Q12, D0, D8
|
||||
VMULL.S32 Q10, D1, D9
|
||||
VMULL.S32 Q11, D3, D11
|
||||
VMULL.S32 Q13, D2, D10
|
||||
|
||||
VMULL.S32 Q0, D4, D12
|
||||
VMULL.S32 Q4, D5, D13
|
||||
VMULL.S32 Q5, D7, D15
|
||||
VMULL.S32 Q1, D6, D14
|
||||
|
||||
VSHR.S64 Q0, Q0, #31
|
||||
VSHR.S64 Q1, Q1, #31
|
||||
VSHR.S64 Q4, Q4, #31
|
||||
VSHR.S64 Q5, Q5, #31
|
||||
|
||||
VSHR.S64 Q12, Q12, #31
|
||||
VSHR.S64 Q13, Q13, #31
|
||||
VSHR.S64 Q10, Q10, #31
|
||||
VSHR.S64 Q11, Q11, #31
|
||||
|
||||
VSUB.I64 Q12, Q12, Q0
|
||||
VSUB.I64 Q13, Q13, Q1
|
||||
VSUB.I64 Q10, Q10, Q4
|
||||
VSUB.I64 Q11, Q11, Q5
|
||||
|
||||
VADD.I64 Q12, Q12, Q13
|
||||
VADD.I64 Q10, Q10, Q11
|
||||
VADD.I64 Q12, Q12, Q10
|
||||
VADD.I64 D24, D24, D25
|
||||
VADD.I64 D28, D28, D24
|
||||
SUBS R7, R7, #8
|
||||
BGT LOOP1
|
||||
|
||||
|
||||
MOV R7, R6
|
||||
MOV R8, R0
|
||||
MOV R9, R1
|
||||
|
||||
LOOP2:
|
||||
VLD1.32 {Q0, Q1}, [R8]! @LOADING values from R0 Sr_fix
|
||||
VLD1.32 {Q2, Q3}, [R9]! @LOADING values from R1 Si_fix
|
||||
VLD1.32 {Q4, Q5}, [R2]! @LOADING values from R2 N.real_fix
|
||||
VLD1.32 {Q6, Q7}, [R3]! @LOADING values from R3 N.imag_fix
|
||||
|
||||
VMULL.S32 Q12, D0, D8
|
||||
VMULL.S32 Q10, D1, D9
|
||||
VMULL.S32 Q11, D3, D11
|
||||
VMULL.S32 Q13, D2, D10
|
||||
|
||||
VMULL.S32 Q0, D4, D12
|
||||
VMULL.S32 Q4, D5, D13
|
||||
VMULL.S32 Q5, D7, D15
|
||||
VMULL.S32 Q1, D6, D14
|
||||
|
||||
VSHR.S64 Q12, Q12, #31
|
||||
VSHR.S64 Q13, Q13, #31
|
||||
VSHR.S64 Q10, Q10, #31
|
||||
VSHR.S64 Q11, Q11, #31
|
||||
|
||||
VSHR.S64 Q0, Q0, #31
|
||||
VSHR.S64 Q1, Q1, #31
|
||||
VSHR.S64 Q4, Q4, #31
|
||||
VSHR.S64 Q5, Q5, #31
|
||||
|
||||
VSUB.I64 Q12, Q12, Q0
|
||||
VSUB.I64 Q13, Q13, Q1
|
||||
VSUB.I64 Q10, Q10, Q4
|
||||
VSUB.I64 Q11, Q11, Q5
|
||||
|
||||
VADD.I64 Q12, Q12, Q13
|
||||
VADD.I64 Q10, Q10, Q11
|
||||
VADD.I64 Q12, Q12, Q10
|
||||
VADD.I64 D24, D24, D25
|
||||
VADD.I64 D29, D29, D24
|
||||
SUBS R7, R7, #8
|
||||
BGT LOOP2
|
||||
|
||||
MOV R7, R6
|
||||
MOV R8, R0
|
||||
MOV R9, R1
|
||||
|
||||
|
||||
LOOP3:
|
||||
VLD1.32 {Q0, Q1}, [R8]! @LOADING values from R0 Sr_fix
|
||||
VLD1.32 {Q2, Q3}, [R9]! @LOADING values from R1 Si_fix
|
||||
VLD1.32 {Q4, Q5}, [R2]! @LOADING values from R2 N.real_fix
|
||||
VLD1.32 {Q6, Q7}, [R3]! @LOADING values from R3 N.imag_fix
|
||||
|
||||
VMULL.S32 Q12, D0, D8
|
||||
VMULL.S32 Q10, D1, D9
|
||||
VMULL.S32 Q11, D3, D11
|
||||
VMULL.S32 Q13, D2, D10
|
||||
|
||||
VMULL.S32 Q0, D4, D12
|
||||
VMULL.S32 Q4, D5, D13
|
||||
VMULL.S32 Q5, D7, D15
|
||||
VMULL.S32 Q1, D6, D14
|
||||
|
||||
VSHR.S64 Q12, Q12, #31
|
||||
VSHR.S64 Q13, Q13, #31
|
||||
VSHR.S64 Q10, Q10, #31
|
||||
VSHR.S64 Q11, Q11, #31
|
||||
|
||||
VSHR.S64 Q0, Q0, #31
|
||||
VSHR.S64 Q1, Q1, #31
|
||||
VSHR.S64 Q4, Q4, #31
|
||||
VSHR.S64 Q5, Q5, #31
|
||||
|
||||
VSUB.I64 Q12, Q12, Q0
|
||||
VSUB.I64 Q13, Q13, Q1
|
||||
VSUB.I64 Q10, Q10, Q4
|
||||
VSUB.I64 Q11, Q11, Q5
|
||||
|
||||
VADD.I64 Q12, Q12, Q13
|
||||
VADD.I64 Q10, Q10, Q11
|
||||
VADD.I64 Q12, Q12, Q10
|
||||
VADD.I64 D24, D24, D25
|
||||
VADD.I64 D30, D30, D24
|
||||
SUBS R7, R7, #8
|
||||
BGT LOOP3
|
||||
|
||||
MOV R7, R6
|
||||
MOV R8, R0
|
||||
MOV R9, R1
|
||||
|
||||
|
||||
LOOP4:
|
||||
VLD1.32 {Q0, Q1}, [R8]! @LOADING values from R0 Sr_fix
|
||||
VLD1.32 {Q2, Q3}, [R9]! @LOADING values from R1 Si_fix
|
||||
VLD1.32 {Q4, Q5}, [R2]! @LOADING values from R2 N.real_fix
|
||||
VLD1.32 {Q6, Q7}, [R3]! @LOADING values from R3 N.imag_fix
|
||||
|
||||
VMULL.S32 Q12, D0, D8
|
||||
VMULL.S32 Q10, D1, D9
|
||||
VMULL.S32 Q11, D3, D11
|
||||
VMULL.S32 Q13, D2, D10
|
||||
|
||||
VMULL.S32 Q0, D4, D12
|
||||
VMULL.S32 Q4, D5, D13
|
||||
VMULL.S32 Q5, D7, D15
|
||||
VMULL.S32 Q1, D6, D14
|
||||
|
||||
VSHR.S64 Q12, Q12, #31
|
||||
VSHR.S64 Q13, Q13, #31
|
||||
VSHR.S64 Q10, Q10, #31
|
||||
VSHR.S64 Q11, Q11, #31
|
||||
|
||||
VSHR.S64 Q0, Q0, #31
|
||||
VSHR.S64 Q1, Q1, #31
|
||||
VSHR.S64 Q4, Q4, #31
|
||||
VSHR.S64 Q5, Q5, #31
|
||||
|
||||
VSUB.I64 Q12, Q12, Q0
|
||||
VSUB.I64 Q13, Q13, Q1
|
||||
VSUB.I64 Q10, Q10, Q4
|
||||
VSUB.I64 Q11, Q11, Q5
|
||||
|
||||
VADD.I64 Q12, Q12, Q13
|
||||
VADD.I64 Q10, Q10, Q11
|
||||
VADD.I64 Q12, Q12, Q10
|
||||
VADD.I64 D24, D24, D25
|
||||
VADD.I64 D31, D31, D24
|
||||
SUBS R7, R7, #8
|
||||
BGT LOOP4
|
||||
|
||||
VQMOVN.S64 D0, Q14
|
||||
VQMOVN.S64 D1, Q15
|
||||
|
||||
VST1.32 {Q0}, [R4]! @Storing values to R4
|
||||
|
||||
SUBS R5, R5, #4
|
||||
BGT LOOP
|
||||
|
||||
VPOP {d8 - d15}
|
||||
LDMFD sp!, {R4-R12, R14}
|
||||
BX LR
|
||||
|
||||
50
decoder/armv7/ixheaacd_aac_ld_dec_rearrange.s
Normal file
50
decoder/armv7/ixheaacd_aac_ld_dec_rearrange.s
Normal file
|
|
@ -0,0 +1,50 @@
|
|||
.text
|
||||
.p2align 2
|
||||
.global ia_aac_ld_dec_rearrange_armv7
|
||||
|
||||
ia_aac_ld_dec_rearrange_armv7:
|
||||
STMFD r13!, {r4 - r12, r14}
|
||||
@ASR r2,r2,#3 @
|
||||
MOV R2, R2, ASR #3
|
||||
|
||||
|
||||
LOOP_REARRANGE:
|
||||
LDRB r4, [r3], #1 @ idx = mdct_tables_ptr->re_arr_tab[n]
|
||||
LDRB r5, [r3], #1 @ idx = mdct_tables_ptr->re_arr_tab[n]
|
||||
LDRB r6, [r3], #1 @ idx = mdct_tables_ptr->re_arr_tab[n]
|
||||
LDRB r7, [r3], #1 @ idx = mdct_tables_ptr->re_arr_tab[n]
|
||||
LDRB r8, [r3], #1 @ idx = mdct_tables_ptr->re_arr_tab[n]
|
||||
LDRB r9, [r3], #1 @ idx = mdct_tables_ptr->re_arr_tab[n]
|
||||
LDRB r10, [r3], #1 @ idx = mdct_tables_ptr->re_arr_tab[n]
|
||||
LDRB r11, [r3], #1 @ idx = mdct_tables_ptr->re_arr_tab[n]
|
||||
ADD r4, r0, r4, lsl #3
|
||||
ADD r5, r0, r5, lsl #3
|
||||
ADD r6, r0, r6, lsl #3
|
||||
ADD r7, r0, r7, lsl #3
|
||||
ADD r8, r0, r8, lsl #3
|
||||
ADD r9, r0, r9, lsl #3
|
||||
ADD r10, r0, r10, lsl #3
|
||||
ADD r11, r0, r11, lsl #3
|
||||
LDMIA r4, {r12, r14} @ r12 = inp[idx] and r14 = inp[idx+1]
|
||||
STMIA r1!, {r12, r14} @ *buf1++ = inp[idx] and *buf1++ = inp[idx+1]
|
||||
LDMIA r5, {r12, r14} @ r12 = inp[idx] and r14 = inp[idx+1]
|
||||
STMIA r1!, {r12, r14} @ *buf1++ = inp[idx] and *buf1++ = inp[idx+1]
|
||||
LDMIA r6, {r12, r14} @ r12 = inp[idx] and r14 = inp[idx+1]
|
||||
STMIA r1!, {r12, r14} @ *buf1++ = inp[idx] and *buf1++ = inp[idx+1]
|
||||
LDMIA r7, {r12, r14} @ r12 = inp[idx] and r14 = inp[idx+1]
|
||||
STMIA r1!, {r12, r14} @ *buf1++ = inp[idx] and *buf1++ = inp[idx+1]
|
||||
LDMIA r8, {r12, r14} @ r12 = inp[idx] and r14 = inp[idx+1]
|
||||
STMIA r1!, {r12, r14} @ *buf1++ = inp[idx] and *buf1++ = inp[idx+1]
|
||||
LDMIA r9, {r12, r14} @ r12 = inp[idx] and r14 = inp[idx+1]
|
||||
STMIA r1!, {r12, r14} @ *buf1++ = inp[idx] and *buf1++ = inp[idx+1]
|
||||
LDMIA r10, {r12, r14} @ r12 = inp[idx] and r14 = inp[idx+1]
|
||||
STMIA r1!, {r12, r14} @ *buf1++ = inp[idx] and *buf1++ = inp[idx+1]
|
||||
LDMIA r11, {r12, r14} @ r12 = inp[idx] and r14 = inp[idx+1]
|
||||
STMIA r1!, {r12, r14} @ *buf1++ = inp[idx] and *buf1++ = inp[idx+1]
|
||||
|
||||
SUBS r2, r2, #1
|
||||
BGT LOOP_REARRANGE
|
||||
|
||||
LDMFD r13!, {r4 - r12, r15}
|
||||
|
||||
|
||||
229
decoder/armv7/ixheaacd_apply_rot.s
Normal file
229
decoder/armv7/ixheaacd_apply_rot.s
Normal file
|
|
@ -0,0 +1,229 @@
|
|||
@/******************************************************************************
|
||||
@ *
|
||||
@ * Copyright (C) 2018 The Android Open Source Project
|
||||
@ *
|
||||
@ * Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ * you may not use this file except in compliance with the License.
|
||||
@ * You may obtain a copy of the License at:
|
||||
@ *
|
||||
@ * http://www.apache.org/licenses/LICENSE-2.0
|
||||
@ *
|
||||
@ * Unless required by applicable law or agreed to in writing, software
|
||||
@ * distributed under the License is distributed on an "AS IS" BASIS,
|
||||
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
@ * See the License for the specific language governing permissions and
|
||||
@ * limitations under the License.
|
||||
@ *
|
||||
@ *****************************************************************************
|
||||
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
@*/
|
||||
|
||||
|
||||
.text
|
||||
.p2align 2
|
||||
.global ixheaacd_apply_rot_armv7
|
||||
|
||||
ixheaacd_apply_rot_armv7:
|
||||
STMFD SP!, {R4-R12, R14}
|
||||
LDR R5, =0x59e
|
||||
MOV R4, #22
|
||||
ADD R11, R0, R5
|
||||
LOOP1:
|
||||
LDRSH R5, [R11, #-98]
|
||||
LDRSH R6, [R11, #94]
|
||||
LDRSH R7, [R11, #-96]
|
||||
LDRSH R8, [R11, #96]
|
||||
ADD R9, R5, R6
|
||||
STRH R9, [R11, #-98]
|
||||
ADD R10, R7, R8
|
||||
STRH R10, [R11, #-96]
|
||||
|
||||
|
||||
LDRSH R5, [R11, #-2]
|
||||
LDRSH R6, [R11, #190]
|
||||
LDRSH R7, [R11]
|
||||
LDRSH R8, [R11, #192]
|
||||
ADD R9, R5, R6
|
||||
STRH R9, [R11, #-2]
|
||||
ADD R10, R7, R8
|
||||
STRH R10, [R11], #4
|
||||
|
||||
LDRSH R5, [R11, #-98]
|
||||
LDRSH R6, [R11, #94]
|
||||
LDRSH R7, [R11, #-96]
|
||||
LDRSH R8, [R11, #96]
|
||||
ADD R9, R5, R6
|
||||
STRH R9, [R11, #-98]
|
||||
ADD R10, R7, R8
|
||||
STRH R10, [R11, #-96]
|
||||
|
||||
SUBS R4, R4, #2
|
||||
|
||||
LDRSH R5, [R11, #-2]
|
||||
LDRSH R6, [R11, #190]
|
||||
LDRSH R7, [R11]
|
||||
LDRSH R8, [R11, #192]
|
||||
ADD R9, R5, R6
|
||||
STRH R9, [R11, #-2]
|
||||
ADD R10, R7, R8
|
||||
STRH R10, [R11], #4
|
||||
|
||||
BGT LOOP1
|
||||
|
||||
LDR R4, =0x53C
|
||||
LDR R12, [R0, #44]
|
||||
ADD R11, R0, R4
|
||||
MOV R4, #10
|
||||
|
||||
LOOP2:
|
||||
|
||||
LDR R5, [R12]
|
||||
LDR R7, [R11], #4
|
||||
LDR R6, [R12, #0x80]
|
||||
LDR R8, [R11, #92]
|
||||
|
||||
SMULWB R9, R5, R7
|
||||
SMULWB R10, R6, R8
|
||||
SMULWT R14, R5, R7
|
||||
|
||||
QADD R5, R9, R10
|
||||
SMULWT R6, R6, R8
|
||||
|
||||
MOV R5, R5, LSL #2
|
||||
QADD R14, R14, R6
|
||||
STR R5, [R12], #4
|
||||
MOV R14, R14, LSL #2
|
||||
STR R14, [R12, #0x7c]
|
||||
|
||||
LDR R5, [R12, #0x3c]
|
||||
LDR R6, [R12, #0xbc]
|
||||
|
||||
SMULWB R9, R5, R7
|
||||
SMULWB R10, R6, R8
|
||||
SMULWT R14, R5, R7
|
||||
|
||||
QADD R5, R9, R10
|
||||
|
||||
SMULWT R6, R6, R8
|
||||
|
||||
MOV R5, R5, LSL #2
|
||||
QADD R14, R14, R6
|
||||
STR R5, [R12, #0x3c]
|
||||
MOV R14, R14, LSL #2
|
||||
STR R14, [R12, #0xbc]
|
||||
|
||||
SUBS R4, R4, #1
|
||||
|
||||
BGT LOOP2
|
||||
|
||||
LDR R11, =0x6c2
|
||||
LDR R5, =0x564
|
||||
LDRSH R14, [R0, R11]
|
||||
ADD R11, R0, R5
|
||||
LDR R5, [SP, #44]
|
||||
SUB SP, SP, #512
|
||||
MOV R12, SP
|
||||
LDR R6, [R5, #12]
|
||||
MOV R4, #12
|
||||
ADD R6, R6, #0xb8
|
||||
|
||||
LOOP3:
|
||||
LDRSH R5, [R6], #2
|
||||
LDRSH R7, [R6, #-4]
|
||||
LDR R10, [R11, #96]
|
||||
LDR R9, [R11], #4
|
||||
CMP R14, R5
|
||||
SUB R8, R14, R7
|
||||
SUBGT R8, R5, R7
|
||||
ADD R5, R12, R7, LSL #3
|
||||
|
||||
LOOP3INN1:
|
||||
STR R10, [R5, #4]
|
||||
STR R9, [R5], #8
|
||||
SUBS R8, R8, #1
|
||||
BGT LOOP3INN1
|
||||
|
||||
SUBS R4, R4, #1
|
||||
BGT LOOP3
|
||||
|
||||
MOV R4, #3
|
||||
LDR R12, [R0, #44]
|
||||
LDR R9, [SP, #48+512]
|
||||
LDR R0, [SP, #40+512]
|
||||
STR R14, [SP, #-4]!
|
||||
|
||||
LOOP4:
|
||||
LDR R5, [R12], #4
|
||||
LDR R6, [R12, #0x3c]
|
||||
LDR R7, [R12, #0x7c]
|
||||
LDRSH R10, [R9], #2
|
||||
LDR R8, [R12, #0xbc]
|
||||
MOV R11, #5
|
||||
CMP R10, #6
|
||||
SUBLT R11, R10, #1
|
||||
|
||||
LOOP4INN1:
|
||||
LDR R10, [R12], #4
|
||||
LDR R14, [R12, #0x3C]
|
||||
QADD R5, R5, R10
|
||||
QADD R6, R6, R14
|
||||
LDR R10, [R12, #0x7C]
|
||||
LDR R14, [R12, #0xBC]
|
||||
QADD R7, R7, R10
|
||||
QADD R8, R8, R14
|
||||
SUBS R11, R11, #1
|
||||
BGT LOOP4INN1
|
||||
|
||||
STR R5, [R1], #4
|
||||
STR R6, [R2], #4
|
||||
STR R7, [R3], #4
|
||||
STR R8, [R0], #4
|
||||
SUBS R4, R4, #1
|
||||
BGT LOOP4
|
||||
|
||||
LDR R14, [SP]
|
||||
ADD R11, SP, #28
|
||||
SUB R4, R14, #3
|
||||
|
||||
LOOP5:
|
||||
LDR R5, [R1]
|
||||
LDR R7, [R11], #4
|
||||
LDR R6, [R3]
|
||||
LDR R8, [R11], #4
|
||||
|
||||
SMULWB R9, R5, R7
|
||||
SMULWB R10, R6, R8
|
||||
SMULWT R14, R5, R7
|
||||
|
||||
QADD R5, R9, R10
|
||||
SMULWT R6, R6, R8
|
||||
|
||||
MOV R5, R5, LSL #2
|
||||
QADD R14, R14, R6
|
||||
STR R5, [R1], #4
|
||||
MOV R14, R14, LSL #2
|
||||
STR R14, [R3], #4
|
||||
|
||||
SUBS R4, R4, #1
|
||||
|
||||
LDR R5, [R2]
|
||||
LDR R6, [R0]
|
||||
|
||||
SMULWB R9, R5, R7
|
||||
SMULWB R10, R6, R8
|
||||
SMULWT R14, R5, R7
|
||||
|
||||
QADD R5, R9, R10
|
||||
|
||||
SMULWT R6, R6, R8
|
||||
|
||||
MOV R5, R5, LSL #2
|
||||
QADD R14, R14, R6
|
||||
STR R5, [R2], #4
|
||||
MOV R14, R14, LSL #2
|
||||
STR R14, [R0], #4
|
||||
|
||||
BGT LOOP5
|
||||
ADD SP, SP, #516
|
||||
LDMFD sp!, {r4-r12, r15}
|
||||
|
||||
147
decoder/armv7/ixheaacd_apply_scale_fac.s
Normal file
147
decoder/armv7/ixheaacd_apply_scale_fac.s
Normal file
|
|
@ -0,0 +1,147 @@
|
|||
@/******************************************************************************
|
||||
@ *
|
||||
@ * Copyright (C) 2018 The Android Open Source Project
|
||||
@ *
|
||||
@ * Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ * you may not use this file except in compliance with the License.
|
||||
@ * You may obtain a copy of the License at:
|
||||
@ *
|
||||
@ * http://www.apache.org/licenses/LICENSE-2.0
|
||||
@ *
|
||||
@ * Unless required by applicable law or agreed to in writing, software
|
||||
@ * distributed under the License is distributed on an "AS IS" BASIS,
|
||||
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
@ * See the License for the specific language governing permissions and
|
||||
@ * limitations under the License.
|
||||
@ *
|
||||
@ *****************************************************************************
|
||||
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
@*/
|
||||
|
||||
|
||||
.text
|
||||
.p2align 2
|
||||
.global ixheaacd_scale_factor_process_armv7
|
||||
|
||||
ixheaacd_scale_factor_process_armv7:
|
||||
|
||||
|
||||
|
||||
|
||||
STMFD sp!, {r4-r12, r14}
|
||||
LDR r9, [sp, #0x28]
|
||||
LDR r11, [sp, #0x2c]
|
||||
|
||||
CMP r2, #0
|
||||
|
||||
BLE END
|
||||
MOV r10, #0
|
||||
CMP r11, #2
|
||||
MOVLE r11, #0x25
|
||||
MOVGT r11, #0x22
|
||||
|
||||
TBANDS_LOOP:
|
||||
|
||||
LDRSH r5, [r1], #2
|
||||
LDRB r4, [r3], #1
|
||||
|
||||
LDR r6, [sp, #0x30]
|
||||
LDR r7, [sp, #0x34]
|
||||
|
||||
CMP r5, #0x18
|
||||
BGE SCALE_FACTOR_GE_12
|
||||
|
||||
CMP r4, #0
|
||||
BLE OFFSET_ZERO
|
||||
|
||||
SCALE_FACTOR_LT_12:
|
||||
|
||||
STR r10, [r0], #4
|
||||
STR r10, [r0], #4
|
||||
STR r10, [r0], #4
|
||||
STR r10, [r0], #4
|
||||
SUBS r4, r4, #4
|
||||
BGT SCALE_FACTOR_LT_12
|
||||
B OFFSET_ZERO
|
||||
|
||||
SCALE_FACTOR_GE_12:
|
||||
|
||||
SUBS r6, r11, r5, ASR #2
|
||||
|
||||
|
||||
AND r5, r5, #3
|
||||
|
||||
|
||||
LDR r5, [r9, r5, LSL #2]
|
||||
|
||||
BLE SHIFT_LE_ZERO
|
||||
|
||||
SUB r14, r6, #1
|
||||
|
||||
SHIFT_POSITIVE:
|
||||
LDRD r6, [r0, #0]
|
||||
|
||||
SMULWB r6, r6, r5
|
||||
SMULWB r7, r7, r5
|
||||
|
||||
MOV r6, r6, ASR r14
|
||||
MOV r7, r7, ASR r14
|
||||
|
||||
STRD r6, [r0], #8
|
||||
|
||||
LDRD r6, [r0, #0]
|
||||
|
||||
SMULWB r6, r6, r5
|
||||
SMULWB r7, r7, r5
|
||||
SUBS r4, r4, #4
|
||||
|
||||
MOV r6, r6, ASR r14
|
||||
MOV r7, r7, ASR r14
|
||||
|
||||
STRD r6, [r0], #8
|
||||
|
||||
BGT SHIFT_POSITIVE
|
||||
B OFFSET_ZERO
|
||||
SHIFT_LE_ZERO:
|
||||
|
||||
RSBS r14, r6, #0
|
||||
BGT SHIFT_NEGTIVE1
|
||||
|
||||
SHIFT_ZERO:
|
||||
LDRD r6, [r0, #0]
|
||||
|
||||
SMULWB r6, r6, r5
|
||||
SMULWB r7, r7, r5
|
||||
MOV r6, r6, LSL #1
|
||||
MOV r7, r7, LSL #1
|
||||
|
||||
STRD r6, [r0], #8
|
||||
|
||||
SUBS r4, r4, #2
|
||||
|
||||
BGT SHIFT_ZERO
|
||||
B OFFSET_ZERO
|
||||
|
||||
SHIFT_NEGTIVE1:
|
||||
SUB r14, r14, #1
|
||||
SHIFT_NEGTIVE:
|
||||
LDRD r6, [r0, #0]
|
||||
MOV r6, r6, LSL r14
|
||||
MOV r7, r7, LSL r14
|
||||
|
||||
SMULWB r6, r6, r5
|
||||
SMULWB r7, r7, r5
|
||||
MOV r6, r6, LSL #2
|
||||
MOV r7, r7, LSL #2
|
||||
|
||||
STRD r6, [r0], #8
|
||||
|
||||
SUBS r4, r4, #2
|
||||
|
||||
BGT SHIFT_NEGTIVE
|
||||
|
||||
OFFSET_ZERO:
|
||||
SUBS r2, r2, #1
|
||||
BGT TBANDS_LOOP
|
||||
END:
|
||||
LDMFD sp!, {r4-r12, r15}
|
||||
155
decoder/armv7/ixheaacd_auto_corr.s
Normal file
155
decoder/armv7/ixheaacd_auto_corr.s
Normal file
|
|
@ -0,0 +1,155 @@
|
|||
@/******************************************************************************
|
||||
@ *
|
||||
@ * Copyright (C) 2018 The Android Open Source Project
|
||||
@ *
|
||||
@ * Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ * you may not use this file except in compliance with the License.
|
||||
@ * You may obtain a copy of the License at:
|
||||
@ *
|
||||
@ * http://www.apache.org/licenses/LICENSE-2.0
|
||||
@ *
|
||||
@ * Unless required by applicable law or agreed to in writing, software
|
||||
@ * distributed under the License is distributed on an "AS IS" BASIS,
|
||||
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
@ * See the License for the specific language governing permissions and
|
||||
@ * limitations under the License.
|
||||
@ *
|
||||
@ *****************************************************************************
|
||||
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
@*/
|
||||
|
||||
|
||||
.text
|
||||
.p2align 2
|
||||
.global ixheaacd_covariance_matrix_calc_armv7
|
||||
|
||||
|
||||
ixheaacd_covariance_matrix_calc_armv7:
|
||||
|
||||
STMFD sp!, {r4-r12, r14}
|
||||
MOVS r12, r2
|
||||
BLE EXIT
|
||||
AUTOCORR:
|
||||
MOV r12, r0
|
||||
|
||||
MOV r5, #9728
|
||||
LDR r4, [r12, r5]
|
||||
ADD r5, r5, #256
|
||||
LDR r3, [r12, r5]
|
||||
|
||||
LDR r6, [r12], #256
|
||||
LDR r5, [r12], #256
|
||||
|
||||
MOV r4, r4, ASR #3
|
||||
MOV r3, r3, ASR #3
|
||||
MOV r6, r6, ASR #3
|
||||
MOV r5, r5, ASR #3
|
||||
|
||||
SMULWT r11, r3, r4
|
||||
SMULWT r9, r5, r6
|
||||
|
||||
SMULWT r14, r4, r4
|
||||
SUB r11, r9, r11
|
||||
|
||||
|
||||
SMULWT r9, r6, r6
|
||||
|
||||
|
||||
MOV r3, #12
|
||||
|
||||
SUB r14, r9, r14
|
||||
|
||||
MOV r7, #0
|
||||
MOV r8, #0
|
||||
MOV r9, #0
|
||||
|
||||
AUTO_CORR_RIGHT:
|
||||
LDR r4, [r12], #256
|
||||
LDR r10, [r12], #256
|
||||
|
||||
MOV r4, r4, ASR #3
|
||||
SMLAWT r9, r5, r5, r9
|
||||
SMLAWT r7, r4, r5, r7
|
||||
SMLAWT r8, r4, r6, r8
|
||||
|
||||
MOV r6, r10, ASR #3
|
||||
SMLAWT r9, r4, r4, r9
|
||||
SMLAWT r8, r6, r5, r8
|
||||
|
||||
LDR r5, [r12], #256
|
||||
SMLAWT r7, r6, r4, r7
|
||||
|
||||
|
||||
MOV r5, r5, ASR #3
|
||||
SMLAWT r9, r6, r6, r9
|
||||
SMLAWT r7, r5, r6, r7
|
||||
SMLAWT r8, r5, r4, r8
|
||||
|
||||
SUBS r3, r3, #1
|
||||
BNE AUTO_CORR_RIGHT
|
||||
|
||||
LDR r4, [r12], #256
|
||||
MOV r4, r4, ASR #3
|
||||
SMLAWT r9, r5, r5, r9
|
||||
SMLAWT r7, r4, r5, r7
|
||||
SMLAWT r8, r4, r6, r8
|
||||
|
||||
LDR r6, [r12], #256
|
||||
MOV r6, r6, ASR #3
|
||||
SMLAWT r9, r4, r4, r9
|
||||
SMLAWT r7, r6, r4, r7
|
||||
SMLAWT r8, r6, r5, r8
|
||||
|
||||
CAL_AUTOCORR:
|
||||
|
||||
ADD r12, r7, r11
|
||||
ADD r14, r9, r14
|
||||
|
||||
EOR r5, r7, r7, ASR #31
|
||||
EOR r6, r8, r8, ASR #31
|
||||
|
||||
ORR r5, r6, r5
|
||||
EOR r6, r12, r12, ASR #31
|
||||
ORR r5, r6, r5
|
||||
|
||||
ORR r5, r9, r5
|
||||
ORR r5, r14, r5
|
||||
|
||||
CLZ r5, r5
|
||||
SUB r5, r5, #1
|
||||
|
||||
MOV r7, r7, LSL r5
|
||||
MOV r8, r8, LSL r5
|
||||
MOV r9, r9, LSL r5
|
||||
MOV r12, r12, LSL r5
|
||||
MOV r14, r14, LSL r5
|
||||
|
||||
|
||||
STR r9, [r1], #4
|
||||
STR r14, [r1], #4
|
||||
STR r7, [r1], #4
|
||||
|
||||
|
||||
SMULL r6, r5, r9, r14
|
||||
SMULL r6, r10, r12, r12
|
||||
|
||||
STR r8, [r1], #4
|
||||
STR r12, [r1], #4
|
||||
|
||||
QSUB r10, r5, r10
|
||||
ADD r0, r0, #4
|
||||
|
||||
ADD r1, r1, #12
|
||||
|
||||
STR r10, [r1], #4
|
||||
|
||||
|
||||
SUBS r2, r2, #1
|
||||
BNE AUTOCORR
|
||||
|
||||
|
||||
EXIT:
|
||||
|
||||
LDMFD sp!, {r4-r12, r15}
|
||||
|
||||
|
||||
403
decoder/armv7/ixheaacd_autocorr_st2.s
Normal file
403
decoder/armv7/ixheaacd_autocorr_st2.s
Normal file
|
|
@ -0,0 +1,403 @@
|
|||
@/******************************************************************************
|
||||
@ *
|
||||
@ * Copyright (C) 2018 The Android Open Source Project
|
||||
@ *
|
||||
@ * Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ * you may not use this file except in compliance with the License.
|
||||
@ * You may obtain a copy of the License at:
|
||||
@ *
|
||||
@ * http://www.apache.org/licenses/LICENSE-2.0
|
||||
@ *
|
||||
@ * Unless required by applicable law or agreed to in writing, software
|
||||
@ * distributed under the License is distributed on an "AS IS" BASIS,
|
||||
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
@ * See the License for the specific language governing permissions and
|
||||
@ * limitations under the License.
|
||||
@ *
|
||||
@ *****************************************************************************
|
||||
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
@*/
|
||||
|
||||
|
||||
.text
|
||||
.p2align 2
|
||||
.global ixheaacd_covariance_matrix_calc_2_armv7
|
||||
ixheaacd_covariance_matrix_calc_2_armv7:
|
||||
STMFD sp!, {r4-r12, r14}
|
||||
|
||||
AUTO_CORR_LOOP:
|
||||
|
||||
STR r0 , [sp, #-4]!
|
||||
STR r1 , [sp, #-4]!
|
||||
LDR r1 , [sp], #4
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
LDR r4 , [r1, #-4*128]
|
||||
LDR r5 , [r1, #4*(64-128)]
|
||||
LDR r6 , [r1]
|
||||
LDR r7 , [r1, #4*64]
|
||||
|
||||
MOV r4, r4, ASR #3
|
||||
MOV r5, r5, ASR #3
|
||||
MOV r6, r6, ASR #3
|
||||
MOV r7, r7, ASR #3
|
||||
|
||||
|
||||
SMULWT r8 , r6 , r4
|
||||
SMULWT r9 , r7 , r4
|
||||
SMULWT r10, r6 , r5
|
||||
SMLAWT r8 , r7 , r5, r8
|
||||
SMULWT r11, r4 , r4
|
||||
SUB r9 , r9 , r10
|
||||
SMLAWT r11, r5 , r5, r11
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
MOV r10, r1
|
||||
ADD r12, r1, #64*4
|
||||
MOV r4 , r6
|
||||
MOV r5 , r7
|
||||
|
||||
SUB r14, r3 , #2
|
||||
MOVS r14, r14, LSR #1
|
||||
BEQ ENDLOOP2
|
||||
LOOP2:
|
||||
LDR r6 , [r10, #4*128]!
|
||||
LDR r7 , [r12, #4*128]!
|
||||
|
||||
MOV r6, r6, ASR #3
|
||||
MOV r7, r7, ASR #3
|
||||
|
||||
SMLAWT r8 , r6 , r4, r8
|
||||
SMLAWT r9 , r7 , r4, r9
|
||||
SMLAWT r8 , r7 , r5, r8
|
||||
SMULWT r0 , r6 , r5
|
||||
SMLAWT r11, r4 , r4, r11
|
||||
SUB r9 , r9 , r0
|
||||
SMLAWT r11, r5 , r5, r11
|
||||
|
||||
LDR r4 , [r10, #4*128]!
|
||||
LDR r5 , [r12, #4*128]!
|
||||
|
||||
MOV r4, r4, ASR #3
|
||||
MOV r5, r5, ASR #3
|
||||
|
||||
SUBS r14, r14, #1
|
||||
|
||||
SMLAWT r8 , r4 , r6, r8
|
||||
SMLAWT r9 , r5 , r6, r9
|
||||
SMLAWT r8 , r5 , r7, r8
|
||||
SMULWT r0 , r4 , r7
|
||||
SMLAWT r11, r6 , r6, r11
|
||||
SUB r9 , r9 , r0
|
||||
SMLAWT r11, r7 , r7, r11
|
||||
|
||||
BNE LOOP2
|
||||
|
||||
ANDS r0, r3, #0x01
|
||||
BEQ ENDLOOP2
|
||||
ODDLOOP:
|
||||
|
||||
LDR r6 , [r10, #4*128]!
|
||||
LDR r7 , [r12, #4*128]!
|
||||
|
||||
MOV r6, r6, ASR #3
|
||||
MOV r7, r7, ASR #3
|
||||
|
||||
SMLAWT r8 , r6 , r4, r8
|
||||
SMLAWT r9 , r7 , r4, r9
|
||||
SMLAWT r8 , r7 , r5, r8
|
||||
SMULWT r0 , r6 , r5
|
||||
SMLAWT r11, r4 , r4, r11
|
||||
SUB r9 , r9 , r0
|
||||
SMLAWT r11, r5 , r5, r11
|
||||
|
||||
|
||||
|
||||
ENDLOOP2:
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
MOV r12, r11
|
||||
LDR r6 , [r1, #-8*128]
|
||||
LDR r7 , [r1, #4*64-8*128]
|
||||
|
||||
MOV r6, r6, ASR #3
|
||||
MOV r7, r7, ASR #3
|
||||
|
||||
SMLAWT r12, r6 , r6, r12
|
||||
|
||||
SUB r10, r3, #2
|
||||
SMLAWT r12, r7 , r7, r12
|
||||
|
||||
|
||||
|
||||
|
||||
MOV r0, r10, LSL #(2+7)
|
||||
ADD r0, r0, #0x100
|
||||
LDR r4 , [r1, r10, LSL #(2+7)]
|
||||
LDR r5 , [r1, r0]
|
||||
|
||||
MOV r4, r4, ASR #3
|
||||
MOV r5, r5, ASR #3
|
||||
|
||||
SMLAWT r11, r4, r4, r11
|
||||
LDR r0 , [sp], #4
|
||||
SMLAWT r11, r5, r5, r11
|
||||
|
||||
STR r12, [r0, #4]
|
||||
STR r11, [r0]
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
MOV r11, r8
|
||||
LDR r12, [r1, #-4*128]
|
||||
LDR r14, [r1, #4*(64-128)]
|
||||
|
||||
MOV r12, r12, ASR #3
|
||||
MOV r14, r14, ASR #3
|
||||
|
||||
|
||||
SMLAWT r11, r12, r6, r11
|
||||
ADD r10, r10, #1
|
||||
|
||||
LDR r12, [r1, r10, LSL#(2+7)]
|
||||
SMLAWT r11, r14, r7, r11
|
||||
|
||||
MOV r14, r10, LSL #(2+7)
|
||||
ADD r14, r14, #0x100
|
||||
|
||||
|
||||
MOV r12, r12, ASR #3
|
||||
|
||||
LDR r14, [r1, r14]
|
||||
|
||||
SMLAWT r8 , r12, r4, r8
|
||||
|
||||
MOV r14, r14, ASR #3
|
||||
MOV r10, r9
|
||||
|
||||
SMLAWT r8 , r14, r5, r8
|
||||
STR r11, [r0, #16]
|
||||
STR r8 , [r0, #8]
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
SMLAWT r9 , r14, r4 , r9
|
||||
SMULWT r8 , r12, r5
|
||||
LDR r14, [r1, #4*(64-128)]
|
||||
SUB r9 , r9 , r8
|
||||
|
||||
MOV r14, r14, ASR #3
|
||||
LDR r12, [r1, #-4*128]
|
||||
SMLAWT r10, r14, r6 , r10
|
||||
|
||||
MOV r12, r12, ASR #3
|
||||
SMULWT r8 , r12, r7
|
||||
STR r9 , [r0, #20]
|
||||
SUB r10, r10, r8
|
||||
STR r10, [r0, #28]
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
STR r1 , [sp, #-4]!
|
||||
|
||||
STMFD sp!, {r0, r3}
|
||||
MOVS r0 , r3 , LSR #2
|
||||
|
||||
MOV r12, #0
|
||||
MOV r3 , #0
|
||||
LDR r5 , [r1, #-8*128]
|
||||
LDR r7 , [r1, #-4*128]
|
||||
LDR r9 , [r1, #4*(64-256)]
|
||||
LDR r11, [r1, #4*(64-128)]
|
||||
|
||||
|
||||
|
||||
MOV r5, r5, ASR #3
|
||||
MOV r7, r7, ASR #3
|
||||
MOV r9, r9, ASR #3
|
||||
MOV r11, r11, ASR #3
|
||||
|
||||
|
||||
BEQ ENDLOOP3
|
||||
LOOP3:
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
LDR r4 , [r1], #4*128
|
||||
LDR r8 , [r1, #4*(64-128)]
|
||||
|
||||
MOV r4, r4, ASR #3
|
||||
MOV r8, r8, ASR #3
|
||||
|
||||
SMLAWT r12, r4 , r5 , r12
|
||||
SMLAWT r12, r8 , r9 , r12
|
||||
SMULWT r14, r4 , r9
|
||||
SMLAWT r3 , r8 , r5 , r3
|
||||
|
||||
LDR r6 , [r1], #4*128
|
||||
SUB r3 , r3 , r14
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
LDR r10, [r1, #4*(64-128)]
|
||||
|
||||
MOV r6, r6, ASR #3
|
||||
MOV r10, r10, ASR #3
|
||||
|
||||
SMLAWT r12, r6 , r7 , r12
|
||||
SMLAWT r12, r10, r11, r12
|
||||
SMULWT r14, r6 , r11
|
||||
SMLAWT r3 , r10, r7 , r3
|
||||
|
||||
LDR r5 , [r1], #4*128
|
||||
SUB r3 , r3 , r14
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
LDR r9 , [r1, #4*(64-128)]
|
||||
|
||||
MOV r5, r5, ASR #3
|
||||
MOV r9, r9, ASR #3
|
||||
|
||||
SMLAWT r12, r5 , r4 , r12
|
||||
SMLAWT r12, r9 , r8 , r12
|
||||
SMULWT r14, r5 , r8
|
||||
SMLAWT r3 , r9 , r4 , r3
|
||||
|
||||
LDR r7 , [r1], #4*128
|
||||
SUB r3 , r3 , r14
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
LDR r11, [r1, #4*(64-128)]
|
||||
|
||||
MOV r7, r7, ASR #3
|
||||
MOV r11, r11, ASR #3
|
||||
|
||||
SMLAWT r12, r7 , r6 , r12
|
||||
SMLAWT r12, r11, r10, r12
|
||||
SMULWT r14, r7 , r10
|
||||
SMLAWT r3 , r11, r6 , r3
|
||||
|
||||
SUBS r0 , r0 , #1
|
||||
SUB r3 , r3 , r14
|
||||
|
||||
BNE LOOP3
|
||||
ENDLOOP3:
|
||||
MOV r4 , r3
|
||||
LDMFD sp!, {r0, r3}
|
||||
|
||||
ANDS r5 , r3 , #3
|
||||
BEQ ENDLOOP4
|
||||
|
||||
LOOP4:
|
||||
LDR r6 , [r1, #-8*128]
|
||||
LDR r10, [r1, #4*(64-256)]
|
||||
|
||||
LDR r7 , [r1], #4*128
|
||||
LDR r11, [r1, #4*(64-128)]
|
||||
|
||||
|
||||
MOV r6, r6, ASR #3
|
||||
MOV r7, r7, ASR #3
|
||||
MOV r10, r10, ASR #3
|
||||
MOV r11, r11, ASR #3
|
||||
|
||||
|
||||
SMLAWT r12, r7 , r6 , r12
|
||||
SMLAWT r12, r11, r10, r12
|
||||
SMULWT r14, r7 , r10
|
||||
SMLAWT r4 , r11, r6 , r4
|
||||
|
||||
SUBSNE r5 , r5 , #1
|
||||
|
||||
SUB r4 , r4 , r14
|
||||
|
||||
BNE LOOP4
|
||||
ENDLOOP4:
|
||||
STR r12, [r0, #12]
|
||||
STR r4 , [r0, #24]
|
||||
LDR r1 , [sp], #4
|
||||
|
||||
SUBS R2, R2, #1
|
||||
|
||||
ADD r0, r0, #4*9
|
||||
|
||||
|
||||
ADD r1, r1, #4
|
||||
BGT AUTO_CORR_LOOP
|
||||
|
||||
END_OF_AUT0:
|
||||
|
||||
LDMFD sp!, {r4-r12, r15}
|
||||
|
||||
79
decoder/armv7/ixheaacd_basic_op.h
Normal file
79
decoder/armv7/ixheaacd_basic_op.h
Normal file
|
|
@ -0,0 +1,79 @@
|
|||
/******************************************************************************
|
||||
* *
|
||||
* Copyright (C) 2018 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
#ifndef IXHEAACD_BASIC_OP_H
|
||||
#define IXHEAACD_BASIC_OP_H
|
||||
|
||||
#define add_d(a, b) ((a) + (b))
|
||||
#define sub_d(a, b) ((a) - (b))
|
||||
#define ixheaacd_cbrt_calc(a) cbrt(1.0f / a)
|
||||
|
||||
static PLATFORM_INLINE WORD32 msu32x16in32_dual(WORD32 a, WORD16 c1, WORD32 b,
|
||||
WORD16 c2) {
|
||||
WORD32 result;
|
||||
WORD32 temp_result;
|
||||
UWORD32 a_lsb;
|
||||
WORD32 a_msb;
|
||||
UWORD32 b_lsb;
|
||||
WORD32 b_msb;
|
||||
|
||||
a_lsb = a & 65535;
|
||||
a_msb = a >> 16;
|
||||
|
||||
b_lsb = b & 65535;
|
||||
b_msb = b >> 16;
|
||||
temp_result = ((UWORD32)a_lsb * (UWORD32)c1);
|
||||
temp_result = temp_result - (UWORD32)b_lsb * (UWORD32)c2;
|
||||
temp_result = ((WORD32)temp_result) >> 16;
|
||||
result = temp_result + ((a_msb * (WORD32)c1) - (b_msb * (WORD32)c2));
|
||||
|
||||
return (result);
|
||||
}
|
||||
|
||||
static PLATFORM_INLINE WORD32 mac32x16in32_dual(WORD32 a, WORD16 c1, WORD32 b,
|
||||
WORD16 c2) {
|
||||
WORD32 result;
|
||||
WORD32 temp_result;
|
||||
UWORD32 a_lsb;
|
||||
WORD32 a_msb;
|
||||
UWORD32 b_lsb;
|
||||
WORD32 b_msb;
|
||||
|
||||
a_lsb = a & 65535;
|
||||
a_msb = a >> 16;
|
||||
|
||||
b_lsb = b & 65535;
|
||||
b_msb = b >> 16;
|
||||
temp_result = (UWORD32)a_lsb * (UWORD32)c1;
|
||||
temp_result = temp_result + (UWORD32)b_lsb * (UWORD32)c2;
|
||||
temp_result = ((UWORD32)temp_result) >> 16;
|
||||
result = temp_result + ((a_msb * (WORD32)c1)) + ((b_msb * (WORD32)c2));
|
||||
return (result);
|
||||
}
|
||||
|
||||
static PLATFORM_INLINE WORD64 mac32x32in64_dual(WORD32 a, WORD32 b, WORD64 c) {
|
||||
WORD64 result;
|
||||
WORD64 temp_result;
|
||||
|
||||
temp_result = (WORD64)a * (WORD64)b;
|
||||
result = c + (temp_result);
|
||||
return (result);
|
||||
}
|
||||
|
||||
#endif
|
||||
403
decoder/armv7/ixheaacd_basic_ops16.h
Normal file
403
decoder/armv7/ixheaacd_basic_ops16.h
Normal file
|
|
@ -0,0 +1,403 @@
|
|||
/******************************************************************************
|
||||
* *
|
||||
* Copyright (C) 2018 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
#ifndef IXHEAACD_BASIC_OPS16_H
|
||||
#define IXHEAACD_BASIC_OPS16_H
|
||||
|
||||
static PLATFORM_INLINE WORD16 ixheaacd_sat16(WORD32 op1) {
|
||||
WORD32 var_out;
|
||||
__asm__(
|
||||
" MOV %0, %1 \n\t"
|
||||
" CMP %0, #0x8000 \n\t"
|
||||
" ITEE GE \n\t"
|
||||
" MVNGE %0, #0x8000 \n\t"
|
||||
" CMNLT %0, #0x00008000 \n\t"
|
||||
" MOVLT %0, #0x00008000 \n\t"
|
||||
: "=r"(var_out)
|
||||
: "r"(op1)
|
||||
: "cc");
|
||||
return ((WORD16)var_out);
|
||||
}
|
||||
|
||||
// add 2 16 bit variables and returns 16 bit result
|
||||
static PLATFORM_INLINE WORD16 ixheaacd_add16(WORD16 op1, WORD16 op2) {
|
||||
WORD16 var_out;
|
||||
|
||||
var_out = ((WORD16)(op1 + op2));
|
||||
return (var_out);
|
||||
}
|
||||
|
||||
// add 2 16 bit variables and returns 16 bit result with saturation
|
||||
static PLATFORM_INLINE WORD16 ixheaacd_add16_sat(WORD16 op1, WORD16 op2) {
|
||||
WORD16 var_out;
|
||||
WORD32 sum;
|
||||
|
||||
sum = (WORD32)op1 + (WORD32)op2;
|
||||
var_out = ixheaacd_sat16(sum);
|
||||
return (var_out);
|
||||
}
|
||||
|
||||
// subtract 2 16 bit variables and returns 16 bit result
|
||||
static PLATFORM_INLINE WORD16 ixheaacd_sub16(WORD16 op1, WORD16 op2) {
|
||||
WORD16 var_out;
|
||||
|
||||
var_out = ((WORD16)(op1 - op2));
|
||||
return (var_out);
|
||||
}
|
||||
|
||||
// subtract 2 16 bit variables and returns 16 bit result with saturation
|
||||
static PLATFORM_INLINE WORD16 ixheaacd_sub16_sat(WORD16 op1, WORD16 op2) {
|
||||
WORD16 var_out;
|
||||
WORD32 diff;
|
||||
|
||||
diff = (WORD32)op1 - op2;
|
||||
var_out = ixheaacd_sat16(diff);
|
||||
return (var_out);
|
||||
}
|
||||
|
||||
// multiply 2 16 bit variables and return 31 to 16 bits
|
||||
static PLATFORM_INLINE WORD16 ixheaacd_mult16(WORD16 op1, WORD16 op2) {
|
||||
WORD16 var_out;
|
||||
|
||||
var_out = ((WORD16)(((WORD32)op1 * (WORD32)op2) >> 16));
|
||||
return (var_out);
|
||||
}
|
||||
|
||||
// multiply 2 16 bit variables and return 30 to 15 bits
|
||||
static PLATFORM_INLINE WORD16 ixheaacd_mult16_shl(WORD16 op1, WORD16 op2) {
|
||||
WORD16 var_out;
|
||||
|
||||
var_out = ((WORD16)(((WORD32)op1 * (WORD32)op2) >> 15));
|
||||
return (var_out);
|
||||
}
|
||||
|
||||
// multiply 2 16 bit variables and return 30 to 15 bits with saturation
|
||||
static PLATFORM_INLINE WORD16 ixheaacd_mult16_shl_sat(WORD16 op1, WORD16 op2) {
|
||||
WORD32 temp;
|
||||
__asm__(
|
||||
" SMULBB %0, %1, %2 \n\t"
|
||||
" MOV %0, %0, ASR #15 \n\t"
|
||||
" CMP %0, #0x00008000 \n\t"
|
||||
" IT EQ \n\t"
|
||||
" SUBEQ %0, %0, #1 \n\t"
|
||||
: "=r"(temp)
|
||||
: "r"(op1), "r"(op2)
|
||||
: "cc");
|
||||
return (temp);
|
||||
}
|
||||
|
||||
// shifts left a 16 bit variable by the shift value and returns a 16 bit result
|
||||
static PLATFORM_INLINE WORD16 ixheaacd_shl16(WORD16 op1, WORD16 shift) {
|
||||
WORD16 var_out;
|
||||
|
||||
var_out = (WORD16)(op1 << shift);
|
||||
return (var_out);
|
||||
}
|
||||
|
||||
// shifts left a 16 bit variable by the shift value and returns a 16 bit value
|
||||
// with saturation
|
||||
static PLATFORM_INLINE WORD16 ixheaacd_shl16_sat(WORD16 op1, WORD16 shift) {
|
||||
WORD16 var_out;
|
||||
WORD32 temp;
|
||||
|
||||
if (shift > 15) {
|
||||
shift = 15;
|
||||
}
|
||||
temp = (WORD32)(op1 << shift);
|
||||
var_out = ixheaacd_sat16(temp);
|
||||
return (var_out);
|
||||
}
|
||||
|
||||
// shifts right a 16 bit variable by the shift value and returns a 16 bit value
|
||||
static PLATFORM_INLINE WORD16 ixheaacd_shr16(WORD16 op1, WORD16 shift) {
|
||||
WORD16 var_out;
|
||||
|
||||
var_out = ((WORD16)(op1 >> shift));
|
||||
return (var_out);
|
||||
}
|
||||
|
||||
// shifts left a 16 bit variable by the shift value
|
||||
// if the value is positive else shifts right and returns a 16 bit
|
||||
|
||||
static PLATFORM_INLINE WORD16 shl16_dir(WORD16 op1, WORD16 shift) {
|
||||
WORD16 var_out;
|
||||
if (shift > 0) {
|
||||
var_out = ixheaacd_shl16(op1, shift);
|
||||
} else {
|
||||
var_out = ixheaacd_shr16(op1, (WORD16)(-shift));
|
||||
}
|
||||
return (var_out);
|
||||
}
|
||||
|
||||
// shifts left a 16 bit variable by the shift value
|
||||
// if the value is negative else shifts right and returns a 16 bit
|
||||
static PLATFORM_INLINE WORD16 shr16_dir(WORD16 op1, WORD16 shift) {
|
||||
WORD16 var_out;
|
||||
|
||||
if (shift < 0) {
|
||||
var_out = ixheaacd_shl16(op1, (WORD16)(-shift));
|
||||
} else {
|
||||
var_out = ixheaacd_shr16(op1, shift);
|
||||
}
|
||||
return (var_out);
|
||||
}
|
||||
|
||||
// shifts left a 16 bit variable by the shift value
|
||||
// if the value is positive else shifts right and returns a 16 bit with
|
||||
// saturation
|
||||
static PLATFORM_INLINE WORD16 shl16_dir_sat(WORD16 op1, WORD16 shift) {
|
||||
WORD16 var_out;
|
||||
if (shift > 0) {
|
||||
var_out = ixheaacd_shl16_sat(op1, shift);
|
||||
} else {
|
||||
var_out = ixheaacd_shr16(op1, (WORD16)(-shift));
|
||||
}
|
||||
return (var_out);
|
||||
}
|
||||
|
||||
// shifts left a 16 bit variable by the shift value
|
||||
// if the value is negative else shifts right and returns a 16 bit
|
||||
static PLATFORM_INLINE WORD16 ixheaacd_shr16_dir_sat(WORD16 op1, WORD16 shift) {
|
||||
WORD16 var_out;
|
||||
|
||||
if (shift < 0) {
|
||||
var_out = ixheaacd_shl16_sat(op1, (WORD16)(-shift));
|
||||
} else {
|
||||
var_out = ixheaacd_shr16(op1, shift);
|
||||
}
|
||||
return (var_out);
|
||||
}
|
||||
|
||||
// finds a value which normalizes the input to 16 bit
|
||||
|
||||
static PLATFORM_INLINE WORD16 norm16(WORD16 var1) {
|
||||
WORD16 var_out;
|
||||
|
||||
__asm__(
|
||||
"MOVS %1, %1, LSL #16 \n\t"
|
||||
"MVNMIS %1, %1 \n\t"
|
||||
"MOVEQ %0, #16 \n\t"
|
||||
" CLZGT %0, %1 \n\t"
|
||||
"SUB %0, %0, #1 \n\t"
|
||||
: "=r"(var_out)
|
||||
: "r"(var1)
|
||||
: "cc");
|
||||
|
||||
return (var_out);
|
||||
}
|
||||
|
||||
// finds no. of significant bits excluding sign bit
|
||||
// value 15 returned for zero
|
||||
static PLATFORM_INLINE WORD16 bin_expo16(WORD16 op1) {
|
||||
WORD16 var_out;
|
||||
|
||||
var_out = ((WORD16)(15 - norm16(op1)));
|
||||
return (var_out);
|
||||
}
|
||||
|
||||
// returns a 16 bit absolute value of a given signed 16 bit value
|
||||
static PLATFORM_INLINE WORD16 ixheaacd_abs16(WORD16 var1) {
|
||||
WORD16 var_out;
|
||||
__asm__(
|
||||
" MOVS %0, %1, LSL #16 \n\t"
|
||||
" RSBLTS %0 , %0, #0 \n\t"
|
||||
" MOV %0, %0, ASR #16 \n\t"
|
||||
: "=r"(var_out)
|
||||
: "r"(var1)
|
||||
: "cc");
|
||||
return (var_out);
|
||||
}
|
||||
|
||||
// returns a 16 bit absolute value of a given signed 16 bit value with
|
||||
// saturation
|
||||
static PLATFORM_INLINE WORD16 ixheaacd_abs16_sat(WORD16 var1) {
|
||||
WORD16 var_out;
|
||||
__asm__(
|
||||
" MOVS %0, %1, LSL #16 \n\t"
|
||||
" RSBLTS %0 , %0, #0 \n\t"
|
||||
" MOVMI %0, #0x7fffffff \n\t"
|
||||
" MOV %0, %0, ASR #16 \n\t"
|
||||
: "=r"(var_out)
|
||||
: "r"(var1)
|
||||
: "cc");
|
||||
return (var_out);
|
||||
}
|
||||
|
||||
// returns a 16 bit negative value of a given signed 16 bit value.
|
||||
static PLATFORM_INLINE WORD16 ixheaacd_negate16(WORD16 op1) {
|
||||
WORD16 var_out;
|
||||
|
||||
if (-32768 == op1) {
|
||||
var_out = MAX_16;
|
||||
} else {
|
||||
var_out = (WORD16)(-op1);
|
||||
}
|
||||
return (var_out);
|
||||
}
|
||||
|
||||
// returns the minima of 2 16 bit variables
|
||||
static PLATFORM_INLINE WORD16 ixheaacd_min16(WORD16 op1, WORD16 op2) {
|
||||
WORD16 var_out;
|
||||
|
||||
var_out = op1 < op2 ? op1 : op2;
|
||||
return (var_out);
|
||||
}
|
||||
|
||||
// returns the maxima of 2 16 bit variables
|
||||
static PLATFORM_INLINE WORD16 ixheaacd_max16(WORD16 op1, WORD16 op2) {
|
||||
WORD16 var_out;
|
||||
|
||||
var_out = op1 > op2 ? op1 : op2;
|
||||
return (var_out);
|
||||
}
|
||||
|
||||
/*****************************************************************************/
|
||||
/* */
|
||||
/* function name : div16 */
|
||||
/* */
|
||||
/* description : divides 2 16 bit variables and returns the quotient */
|
||||
/* the q-format of the result is modified */
|
||||
/* ( op1/op2 to 14 bits precision) */
|
||||
/* */
|
||||
/* inputs : WORD16 op1, WORD16 op2, WORD16 *q_format */
|
||||
/* */
|
||||
/* globals : none */
|
||||
/* */
|
||||
/* processing : non-restoration type algorithm(shift & substract) */
|
||||
/* */
|
||||
/* outputs : WORD16 *q_format */
|
||||
/* */
|
||||
/* returns : WORD16 var_out */
|
||||
/* */
|
||||
/* issues : none */
|
||||
/* */
|
||||
/* revision history : */
|
||||
/* */
|
||||
/* DD MM YYYY author changes */
|
||||
/* 11 11 2003 preethi modified(bug fixes) */
|
||||
/* 15 11 2004 tejaswi/vishal modified(bug fixes/cleanup) */
|
||||
/* */
|
||||
/*****************************************************************************/
|
||||
|
||||
// divides 2 16 bit variables and returns the quotient
|
||||
static PLATFORM_INLINE WORD16 div16(WORD16 op1, WORD16 op2, WORD16 *q_format) {
|
||||
WORD32 quotient;
|
||||
UWORD16 mantissa_nr, mantissa_dr;
|
||||
WORD16 sign = 0;
|
||||
|
||||
LOOPIDX i;
|
||||
WORD16 q_nr, q_dr;
|
||||
|
||||
mantissa_nr = op1;
|
||||
mantissa_dr = op2;
|
||||
quotient = 0;
|
||||
|
||||
if (op1 < 0 && op2 != 0) {
|
||||
op1 = -op1;
|
||||
sign = (WORD16)(sign ^ -1);
|
||||
}
|
||||
|
||||
if (op2 < 0) {
|
||||
op2 = -op2;
|
||||
sign = (WORD16)(sign ^ -1);
|
||||
}
|
||||
|
||||
if (op2 == 0) {
|
||||
*q_format = 0;
|
||||
return (op1);
|
||||
}
|
||||
|
||||
quotient = 0;
|
||||
|
||||
q_nr = norm16(op1);
|
||||
mantissa_nr = (UWORD16)op1 << (q_nr);
|
||||
q_dr = norm16(op2);
|
||||
mantissa_dr = (UWORD16)op2 << (q_dr);
|
||||
*q_format = (WORD16)(14 + q_nr - q_dr);
|
||||
|
||||
for (i = 0; i < 15; i++) {
|
||||
quotient = quotient << 1;
|
||||
|
||||
if (mantissa_nr >= mantissa_dr) {
|
||||
mantissa_nr = mantissa_nr - mantissa_dr;
|
||||
quotient += 1;
|
||||
}
|
||||
|
||||
mantissa_nr = (UWORD32)mantissa_nr << 1;
|
||||
}
|
||||
|
||||
if (sign < 0) {
|
||||
quotient = -quotient;
|
||||
}
|
||||
|
||||
return (WORD16)quotient;
|
||||
}
|
||||
|
||||
// multiply 2 16 bit variables, add 31 to 16 bits to acc
|
||||
static PLATFORM_INLINE WORD16 mac16(WORD16 c, WORD16 op1, WORD16 op2) {
|
||||
WORD16 var_out;
|
||||
|
||||
var_out = ixheaacd_mult16(op1, op2);
|
||||
var_out = ixheaacd_add16(c, var_out);
|
||||
return (var_out);
|
||||
}
|
||||
|
||||
// multiply 2 16 bit variables, add 31 to 16 bits to acc with saturation
|
||||
static PLATFORM_INLINE WORD16 mac16_sat(WORD16 c, WORD16 op1, WORD16 op2) {
|
||||
WORD16 var_out;
|
||||
|
||||
var_out = ixheaacd_mult16(op1, op2);
|
||||
var_out = ixheaacd_add16_sat(c, var_out);
|
||||
return (var_out);
|
||||
}
|
||||
|
||||
// multiply 2 16 bit variables, add 30 to 15 bits to acc
|
||||
static PLATFORM_INLINE WORD16 mac16_shl(WORD16 c, WORD16 op1, WORD16 op2) {
|
||||
WORD16 var_out;
|
||||
|
||||
var_out = ixheaacd_mult16_shl(op1, op2);
|
||||
var_out = ixheaacd_add16(c, var_out);
|
||||
return (var_out);
|
||||
}
|
||||
|
||||
// multiply 2 16 bit variables, add 30 to 15 bits to acc with saturation
|
||||
static PLATFORM_INLINE WORD16 mac16_shl_sat(WORD16 c, WORD16 op1, WORD16 op2) {
|
||||
WORD16 var_out;
|
||||
WORD32 temp;
|
||||
|
||||
temp = ((WORD32)op1 * (WORD32)op2) >> 15;
|
||||
temp += c;
|
||||
var_out = ixheaacd_sat16(temp);
|
||||
return (var_out);
|
||||
}
|
||||
|
||||
// rounds a 32 bit variable to a 16 bit variable with saturation
|
||||
static PLATFORM_INLINE WORD16 ixheaacd_round16(WORD32 op1) {
|
||||
WORD16 var_out;
|
||||
__asm__(
|
||||
" ADDS %0, %1, #0x8000 \n\t"
|
||||
" IT VS \n\t"
|
||||
" MVNVS %0, #0x80000000 \n\t"
|
||||
" MOV %0, %0, ASR #16 \n\t"
|
||||
: "=r"(var_out)
|
||||
: "r"(op1)
|
||||
: "cc");
|
||||
return (var_out);
|
||||
}
|
||||
#endif
|
||||
516
decoder/armv7/ixheaacd_basic_ops32.h
Normal file
516
decoder/armv7/ixheaacd_basic_ops32.h
Normal file
|
|
@ -0,0 +1,516 @@
|
|||
/******************************************************************************
|
||||
* *
|
||||
* Copyright (C) 2018 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
#ifndef IXHEAACD_BASIC_OPS32_H
|
||||
#define IXHEAACD_BASIC_OPS32_H
|
||||
|
||||
// returns the minima of 2 32 bit variables
|
||||
static PLATFORM_INLINE WORD32 ixheaacd_min32(WORD32 a, WORD32 b) {
|
||||
WORD32 min_val;
|
||||
|
||||
__asm__ __volatile__(
|
||||
" CMP %1, %2 \n\t"
|
||||
" ITE GT \n\t"
|
||||
" MOVGT %0, %2 \n\t"
|
||||
" MOVLE %0, %1 \n\t"
|
||||
: "=r"(min_val)
|
||||
: "r"(a), "r"(b)
|
||||
: "cc");
|
||||
|
||||
return min_val;
|
||||
}
|
||||
|
||||
// returns the maxima of 2 32 bit variables
|
||||
static PLATFORM_INLINE WORD32 ixheaacd_max32(WORD32 a, WORD32 b) {
|
||||
WORD32 max_val;
|
||||
__asm__ __volatile__(
|
||||
" CMP %1, %2 \n\t"
|
||||
" MOVLE %0, %2 \n\t"
|
||||
" MOVGT %0, %1 \n\t"
|
||||
: "=r"(max_val)
|
||||
: "r"(a), "r"(b)
|
||||
: "cc");
|
||||
|
||||
return max_val;
|
||||
}
|
||||
|
||||
// shifts a 32-bit value left by specificed bits
|
||||
static PLATFORM_INLINE WORD32 ixheaacd_shl32(WORD32 a, WORD b) {
|
||||
WORD32 out_val;
|
||||
__asm__(
|
||||
|
||||
" MOV %0, %1, LSL %2 \n\t"
|
||||
: "=r"(out_val)
|
||||
: "r"(a), "r"(b));
|
||||
|
||||
return (out_val);
|
||||
}
|
||||
|
||||
// shifts a 32-bit value right by specificed bits
|
||||
static PLATFORM_INLINE WORD32 ixheaacd_shr32(WORD32 a, WORD b) {
|
||||
WORD32 out_val;
|
||||
__asm__(
|
||||
|
||||
" MOV %0, %1, ASR %2 \n\t"
|
||||
: "=r"(out_val)
|
||||
: "r"(a), "r"(b));
|
||||
|
||||
return out_val;
|
||||
}
|
||||
|
||||
// shifts a 32-bit value left by specificed bits and saturates it to 32 bits
|
||||
static PLATFORM_INLINE WORD32 ixheaacd_shl32_sat(WORD32 a, WORD b) {
|
||||
WORD32 out_val = a;
|
||||
// WORD32 dummy1=0/*,dummy2=0*/;
|
||||
|
||||
__asm__ __volatile__(
|
||||
" RSBS r3, %2, #31 \n\t"
|
||||
" MOVS r3, %1, ASR r3 \n\t"
|
||||
" ITT LT \n\t"
|
||||
" CMNLT r3, #1 \n\t"
|
||||
" MOVLT %0, #0x80000000 \n\t"
|
||||
" IT GT \n\t"
|
||||
" MOVGT %0, #0x7fffffff \n\t"
|
||||
" IT EQ \n\t"
|
||||
" MOVEQ %0, %1, LSL %2 \n\t"
|
||||
: "=r"(out_val)
|
||||
: "r"(a), "r"(b)
|
||||
: "cc", "r3");
|
||||
|
||||
return (out_val);
|
||||
}
|
||||
|
||||
// shifts a 32-bit value left by specificed bits, shifts
|
||||
// it right if specified no. of bits is negative
|
||||
|
||||
static PLATFORM_INLINE WORD32 ixheaacd_shl32_dir(WORD32 a, WORD b) {
|
||||
WORD32 out_val = 0;
|
||||
// WORD32 dummy=0;
|
||||
|
||||
__asm__ __volatile__(
|
||||
|
||||
" RSBS r3, %2, #0 \n\t"
|
||||
" MOVMI %0, %1, LSL %2 \n\t"
|
||||
" MOVPL %0, %1, ASR r3 \n\t"
|
||||
: "=r"(out_val)
|
||||
: "r"(a), "r"((WORD)b)
|
||||
: "cc", "r3");
|
||||
return out_val;
|
||||
}
|
||||
|
||||
// shifts a 32-bit value left by specificed bits with sat,
|
||||
// shifts it right if specified no. of bits is negative
|
||||
|
||||
static PLATFORM_INLINE WORD32 ixheaacd_shl32_dir_sat(WORD32 a, WORD b) {
|
||||
WORD32 out_val;
|
||||
|
||||
if (b < 0) {
|
||||
out_val = ixheaacd_shr32(a, -b);
|
||||
} else {
|
||||
out_val = ixheaacd_shl32_sat(a, b);
|
||||
}
|
||||
|
||||
return out_val;
|
||||
}
|
||||
|
||||
// shifts a 32-bit value right by specificed bits, shifts
|
||||
// it left if specified no. of bits is negative
|
||||
static PLATFORM_INLINE WORD32 ixheaacd_shr32_dir(WORD32 a, WORD b) {
|
||||
WORD32 out_val = 0;
|
||||
__asm__ __volatile__(
|
||||
" RSBS r3, %2, #0 \n\t"
|
||||
" IT MI \n\t"
|
||||
" MOVMI %0, %1, ASR %2 \n\t"
|
||||
" IT PL \n\t"
|
||||
" MOVPL %0, %1, LSL r3 \n\t"
|
||||
: "=r"(out_val)
|
||||
: "r"(a), "r"(b)
|
||||
: "cc", "r3");
|
||||
|
||||
return out_val;
|
||||
}
|
||||
|
||||
// shifts a 32-bit value right by specificed bits, shifts
|
||||
// it left with sat if specified no. of bits is negative
|
||||
static PLATFORM_INLINE WORD32 shr32_dir_sat(WORD32 a, WORD b) {
|
||||
WORD32 out_val;
|
||||
|
||||
if (b < 0) {
|
||||
out_val = ixheaacd_shl32_sat(a, -b);
|
||||
} else {
|
||||
out_val = ixheaacd_shr32(a, b);
|
||||
}
|
||||
|
||||
return out_val;
|
||||
}
|
||||
|
||||
// multiplies two 16 bit numbers and returns their 32-bit result
|
||||
static PLATFORM_INLINE WORD32 ixheaacd_mult16x16in32(WORD16 a, WORD16 b) {
|
||||
WORD32 product;
|
||||
__asm__(
|
||||
|
||||
" SMULBB %0 , %1, %2 \n\t"
|
||||
: "=r"(product)
|
||||
: "r"(a), "r"(b));
|
||||
return product;
|
||||
}
|
||||
|
||||
// multiplies two 16 bit numbers and returns their 32-bit
|
||||
// result after removing 1 redundant sign bit
|
||||
static PLATFORM_INLINE WORD32 ixheaacd_mult16x16in32_shl(WORD16 a, WORD16 b) {
|
||||
WORD32 product;
|
||||
__asm__(
|
||||
|
||||
" SMULBB %0 , %1, %2 \n\t"
|
||||
" MOV %0, %0, LSL #1 \n\t"
|
||||
: "=r"(product)
|
||||
: "r"(a), "r"(b));
|
||||
return product;
|
||||
}
|
||||
|
||||
// multiplies two 16 bit numbers and returns their 32-bit
|
||||
// result after removing 1 redundant sign bit with saturation
|
||||
static PLATFORM_INLINE WORD32 ixheaacd_mult16x16in32_shl_sat(WORD16 a,
|
||||
WORD16 b) {
|
||||
WORD32 product;
|
||||
__asm__(
|
||||
|
||||
" SMULBB %0 , %1, %2 \n\t"
|
||||
" QADD %0, %0, %0 \n\t"
|
||||
: "=r"(product)
|
||||
: "r"(a), "r"(b));
|
||||
return product;
|
||||
}
|
||||
|
||||
// adds 2 32 bit variables
|
||||
static PLATFORM_INLINE WORD32 ixheaacd_add32(WORD32 a, WORD32 b) {
|
||||
WORD32 sum;
|
||||
__asm__(
|
||||
|
||||
" ADD %0 , %1, %2 \n\t"
|
||||
: "=r"(sum)
|
||||
: "r"(a), "r"(b));
|
||||
return (sum);
|
||||
}
|
||||
|
||||
// subtract 2 32 bit variables
|
||||
static PLATFORM_INLINE WORD32 ixheaacd_sub32(WORD32 a, WORD32 b) {
|
||||
WORD32 diff;
|
||||
__asm__(
|
||||
|
||||
" SUB %0 , %1, %2 \n\t"
|
||||
: "=r"(diff)
|
||||
: "r"(a), "r"(b));
|
||||
return (diff);
|
||||
}
|
||||
|
||||
// adds 2 32 bit variables with saturation
|
||||
static PLATFORM_INLINE WORD32 ixheaacd_add32_sat(WORD32 a, WORD32 b) {
|
||||
WORD32 sum;
|
||||
__asm__(
|
||||
|
||||
" QADD %0 , %1, %2 \n\t"
|
||||
: "=r"(sum)
|
||||
: "r"(a), "r"(b));
|
||||
return (sum);
|
||||
}
|
||||
|
||||
// subtract 2 32 bit variables
|
||||
static PLATFORM_INLINE WORD32 ixheaacd_sub32_sat(WORD32 a, WORD32 b) {
|
||||
WORD32 diff;
|
||||
__asm__(
|
||||
|
||||
" QSUB %0 , %1, %2 \n\t"
|
||||
: "=r"(diff)
|
||||
: "r"(a), "r"(b));
|
||||
return (diff);
|
||||
}
|
||||
|
||||
// returns number of redundant sign bits in a 32-bit value.
|
||||
// return zero for a value of zero
|
||||
static PLATFORM_INLINE WORD ixheaacd_norm32(WORD32 a) {
|
||||
WORD32 norm_val;
|
||||
__asm__(
|
||||
" eor %0 , %1, %1,asr #31 \n\t"
|
||||
" CLZ %0, %0 \n\t"
|
||||
" SUB %0, %0, #1 \n\t"
|
||||
: "=r"(norm_val)
|
||||
: "r"(a));
|
||||
return norm_val;
|
||||
}
|
||||
|
||||
static PLATFORM_INLINE WORD ixheaacd_pnorm32(WORD32 a) {
|
||||
WORD32 norm_val;
|
||||
__asm__(
|
||||
|
||||
" CLZ %0, %1 \n\t"
|
||||
" SUB %0, %0, #1 \n\t"
|
||||
: "=r"(norm_val)
|
||||
: "r"(a));
|
||||
return norm_val;
|
||||
}
|
||||
|
||||
// returns the position of the most significant bit for negative numbers.
|
||||
// ignores leading zeros to determine the position of most significant bit.
|
||||
static PLATFORM_INLINE WORD bin_expo32(WORD32 a) {
|
||||
WORD bin_expo_val;
|
||||
|
||||
bin_expo_val = 31 - ixheaacd_norm32(a);
|
||||
|
||||
return bin_expo_val;
|
||||
}
|
||||
|
||||
// returns the absolute value of 32-bit number
|
||||
static PLATFORM_INLINE WORD32 ixheaacd_abs32(WORD32 a) {
|
||||
WORD32 abs_val;
|
||||
|
||||
abs_val = a;
|
||||
|
||||
if (a < 0) {
|
||||
abs_val = -a;
|
||||
}
|
||||
|
||||
return abs_val;
|
||||
}
|
||||
|
||||
// returns the absolute value of 32-bit number
|
||||
static PLATFORM_INLINE WORD32 ixheaacd_abs32_nrm(WORD32 a) {
|
||||
WORD32 abs_val;
|
||||
|
||||
abs_val = a;
|
||||
|
||||
if (a < 0) {
|
||||
abs_val = ~a;
|
||||
}
|
||||
|
||||
return abs_val;
|
||||
}
|
||||
|
||||
// returns the absolute value of 32-bit number with saturation
|
||||
static PLATFORM_INLINE WORD32 ixheaacd_abs32_sat(WORD32 a) {
|
||||
WORD32 abs_val;
|
||||
__asm__ __volatile__(
|
||||
|
||||
" MOVS %0 , %1 \n\t"
|
||||
" IT MI \n\t"
|
||||
" RSBSMI %0 , %1 , #0 \n\t"
|
||||
" IT MI \n\t"
|
||||
" MOVMI %0 , #0x7fffffff \n\t"
|
||||
: "=r"(abs_val)
|
||||
: "r"(a)
|
||||
: "cc");
|
||||
|
||||
return abs_val;
|
||||
}
|
||||
|
||||
// returns the negated value of 32-bit number
|
||||
static PLATFORM_INLINE WORD32 ixheaacd_negate32(WORD32 a) {
|
||||
WORD32 neg_val;
|
||||
__asm__(" RSB %0, %1, #0 \n\t" : "=r"(neg_val) : "r"(a));
|
||||
return neg_val;
|
||||
}
|
||||
|
||||
// returns the negated value of 32-bit number with saturation
|
||||
static PLATFORM_INLINE WORD32 ixheaacd_negate32_sat(WORD32 a) {
|
||||
WORD32 neg_val;
|
||||
__asm__(
|
||||
" RSBS %0, %1, #0 \n\t"
|
||||
" IT VS \n\t"
|
||||
" MVNVS %0, #0x80000000 \n\t"
|
||||
: "=r"(neg_val)
|
||||
: "r"(a)
|
||||
: "cc");
|
||||
return neg_val;
|
||||
}
|
||||
|
||||
// divides 2 32 bit variables and returns the quotient
|
||||
static PLATFORM_INLINE WORD32 div32(WORD32 a, WORD32 b, WORD *q_format) {
|
||||
WORD32 quotient;
|
||||
UWORD32 mantissa_nr, mantissa_dr;
|
||||
WORD16 sign = 0;
|
||||
|
||||
LOOPINDEX i;
|
||||
WORD q_nr, q_dr;
|
||||
|
||||
mantissa_nr = a;
|
||||
mantissa_dr = b;
|
||||
quotient = 0;
|
||||
|
||||
if ((a < 0) && (0 != b)) {
|
||||
a = -a;
|
||||
sign = (WORD16)(sign ^ -1);
|
||||
}
|
||||
|
||||
if (b < 0) {
|
||||
b = -b;
|
||||
sign = (WORD16)(sign ^ -1);
|
||||
}
|
||||
|
||||
if (0 == b) {
|
||||
*q_format = 0;
|
||||
return (a);
|
||||
}
|
||||
|
||||
quotient = 0;
|
||||
|
||||
q_nr = ixheaacd_norm32(a);
|
||||
mantissa_nr = (UWORD32)a << (q_nr);
|
||||
q_dr = ixheaacd_norm32(b);
|
||||
mantissa_dr = (UWORD32)b << (q_dr);
|
||||
*q_format = (WORD)(30 + q_nr - q_dr);
|
||||
|
||||
for (i = 0; i < 31; i++) {
|
||||
quotient = quotient << 1;
|
||||
|
||||
if (mantissa_nr >= mantissa_dr) {
|
||||
mantissa_nr = mantissa_nr - mantissa_dr;
|
||||
quotient += 1;
|
||||
}
|
||||
|
||||
mantissa_nr = (UWORD32)mantissa_nr << 1;
|
||||
}
|
||||
|
||||
if (sign < 0) {
|
||||
quotient = -quotient;
|
||||
}
|
||||
|
||||
return quotient;
|
||||
}
|
||||
|
||||
// multiplies two 16 bit numbers and accumulates their result in a 32 bit
|
||||
// variable
|
||||
static PLATFORM_INLINE WORD32 ixheaacd_mac16x16in32(WORD32 a, WORD16 b,
|
||||
WORD16 c) {
|
||||
WORD32 acc;
|
||||
__asm__(
|
||||
|
||||
" SMLABB %0, %2, %3, %1 \n\t"
|
||||
: "=r"(acc)
|
||||
: "r"(a), "r"(b), "r"(c)
|
||||
|
||||
);
|
||||
|
||||
return acc;
|
||||
}
|
||||
|
||||
// multiplies lower 16 bit of one data with upper 16 bit of
|
||||
// other and accumulates their result in a 32 bit variable
|
||||
static PLATFORM_INLINE WORD32 mac16x16hin32(WORD32 a, WORD32 b, WORD32 c) {
|
||||
WORD32 acc;
|
||||
|
||||
acc = ixheaacd_mult16x16in32((WORD16)b, (WORD16)(c >> 16));
|
||||
|
||||
acc = ixheaacd_add32(a, acc);
|
||||
|
||||
return acc;
|
||||
}
|
||||
|
||||
// multiplies two 16 bit numbers and accumulates their result in a 32 bit
|
||||
// variable
|
||||
static PLATFORM_INLINE WORD32 ixheaacd_mac16x16in32_shl(WORD32 a, WORD16 b,
|
||||
WORD16 c) {
|
||||
WORD32 acc;
|
||||
|
||||
acc = ixheaacd_mult16x16in32_shl(b, c);
|
||||
|
||||
acc = ixheaacd_add32(a, acc);
|
||||
|
||||
return acc;
|
||||
}
|
||||
|
||||
// multiplies two 16 bit numbers and accumulates their
|
||||
// result in a 32 bit variable with saturation
|
||||
|
||||
static PLATFORM_INLINE WORD32 ixheaacd_mac16x16in32_shl_sat(WORD32 a, WORD16 b,
|
||||
WORD16 c) {
|
||||
WORD32 acc;
|
||||
|
||||
acc = ixheaacd_mult16x16in32_shl_sat(b, c);
|
||||
|
||||
acc = ixheaacd_add32_sat(a, acc);
|
||||
|
||||
return acc;
|
||||
}
|
||||
|
||||
// multiplies two 16 bit numbers and subtracts their
|
||||
// result from a 32 bit variable
|
||||
static PLATFORM_INLINE WORD32 msu16x16in32(WORD32 a, WORD16 b, WORD16 c) {
|
||||
WORD32 acc;
|
||||
|
||||
acc = ixheaacd_mult16x16in32(b, c);
|
||||
|
||||
acc = ixheaacd_sub32(a, acc);
|
||||
|
||||
return acc;
|
||||
}
|
||||
|
||||
// multiplies two 16 bit numbers and subtracts their
|
||||
// result from a 32 bit variable after removing a redundant sign bit in the
|
||||
// product
|
||||
static PLATFORM_INLINE WORD32 msu16x16in32_shl(WORD32 a, WORD16 b, WORD16 c) {
|
||||
WORD32 acc;
|
||||
|
||||
acc = ixheaacd_mult16x16in32_shl(b, c);
|
||||
|
||||
acc = ixheaacd_sub32(a, acc);
|
||||
|
||||
return acc;
|
||||
}
|
||||
|
||||
// multiplies two 16 bit numbers and subtracts their
|
||||
// result from a 32 bit variable with saturation
|
||||
// after removing a redundant sign bit in the product
|
||||
static PLATFORM_INLINE WORD32 msu16x16in32_shl_sat(WORD32 a, WORD16 b,
|
||||
WORD16 c) {
|
||||
WORD32 acc;
|
||||
|
||||
acc = ixheaacd_mult16x16in32_shl_sat(b, c);
|
||||
|
||||
acc = ixheaacd_sub32_sat(a, acc);
|
||||
|
||||
return acc;
|
||||
}
|
||||
|
||||
// adding two 32 bit numbers and taking care of overflow
|
||||
// by downshifting both numbers before addition
|
||||
static PLATFORM_INLINE WORD32 add32_shr(WORD32 a, WORD32 b) {
|
||||
WORD32 sum;
|
||||
|
||||
a = ixheaacd_shr32(a, 1);
|
||||
b = ixheaacd_shr32(b, 1);
|
||||
|
||||
sum = ixheaacd_add32(a, b);
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
||||
// subtracting two 32 bit numbers and taking care of
|
||||
// overflow by downshifting both numbers before addition
|
||||
|
||||
static PLATFORM_INLINE WORD32 sub32_shr(WORD32 a, WORD32 b) {
|
||||
WORD32 diff;
|
||||
|
||||
a = ixheaacd_shr32(a, 1);
|
||||
b = ixheaacd_shr32(b, 1);
|
||||
|
||||
diff = ixheaacd_sub32(a, b);
|
||||
|
||||
return diff;
|
||||
}
|
||||
#endif
|
||||
400
decoder/armv7/ixheaacd_basic_ops40.h
Normal file
400
decoder/armv7/ixheaacd_basic_ops40.h
Normal file
|
|
@ -0,0 +1,400 @@
|
|||
/******************************************************************************
|
||||
* *
|
||||
* Copyright (C) 2018 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
#ifndef IXHEAACD_BASIC_OPS40_H
|
||||
#define IXHEAACD_BASIC_OPS40_H
|
||||
#define lo64(a) (((unsigned *)&a)[0]) /* low 32 bits of a long long */
|
||||
#define hi64(a) (((WORD32 *)&a)[1]) /* high 32 bits of a long long */
|
||||
|
||||
// normalize input to 32 bits, return denormalizing info
|
||||
static PLATFORM_INLINE WORD16 norm40(WORD40 *in) {
|
||||
WORD16 expo;
|
||||
WORD32 tempo;
|
||||
|
||||
if (0 == (*in)) return 31;
|
||||
|
||||
if (((*in) <= 0x7fffffff) && ((WORD40)(*in) >= (WORD40)0xFFFFFFFF80000000)) {
|
||||
tempo = (WORD32)(*in);
|
||||
expo = ixheaacd_norm32(tempo);
|
||||
*in = tempo << expo;
|
||||
|
||||
return (expo);
|
||||
}
|
||||
|
||||
tempo = (WORD32)((*in) >> 31);
|
||||
expo = 31 - (ixheaacd_norm32(tempo));
|
||||
*in = (*in) >> expo;
|
||||
|
||||
return (-expo);
|
||||
}
|
||||
|
||||
// adds two numbers and right shifts by 1
|
||||
static PLATFORM_INLINE WORD32 add32_shr40(WORD32 a, WORD32 b) {
|
||||
WORD40 sum;
|
||||
|
||||
sum = (WORD40)a + (WORD40)b;
|
||||
sum = sum >> 1;
|
||||
|
||||
return ((WORD32)sum);
|
||||
}
|
||||
|
||||
// subtracts and right shifts by one
|
||||
static PLATFORM_INLINE WORD32 sub32_shr40(WORD32 a, WORD32 b) {
|
||||
WORD40 sum;
|
||||
|
||||
sum = (WORD40)a - (WORD40)b;
|
||||
sum = sum >> 1;
|
||||
|
||||
return ((WORD32)sum);
|
||||
}
|
||||
|
||||
// multiply WORD32 with WORD16 return bits 46 to 15
|
||||
static PLATFORM_INLINE WORD32 ixheaacd_mult32x16in32_shl(WORD32 a, WORD16 b) {
|
||||
WORD32 result;
|
||||
__asm__(
|
||||
|
||||
" SMULWB %0, %1, %2 \n\t"
|
||||
" MOV %0, %0, LSL #1 \n\t"
|
||||
: "=r"(result)
|
||||
: "r"(a), "r"(b));
|
||||
return result;
|
||||
}
|
||||
|
||||
// multiply WORD32 with higher 16 bits of second data and return bits 46 to 15
|
||||
static PLATFORM_INLINE WORD32 mult32x16hin32_shl(WORD32 a, WORD32 b) {
|
||||
WORD32 product;
|
||||
WORD64 temp_product;
|
||||
|
||||
temp_product = (WORD64)a * (WORD64)(b >> 16);
|
||||
product = (WORD32)(temp_product >> 16);
|
||||
|
||||
return (product << 1);
|
||||
}
|
||||
|
||||
// multiply WORD32 with WORD16 return bits 47 to 16
|
||||
static PLATFORM_INLINE WORD32 ixheaacd_mult32x16in32(WORD32 a, WORD16 b) {
|
||||
WORD32 result;
|
||||
__asm__(
|
||||
|
||||
" SMULWB %0, %1, %2 \n\t"
|
||||
: "=r"(result)
|
||||
: "r"(a), "r"(b));
|
||||
return (result);
|
||||
}
|
||||
|
||||
// multiply WORD32 with WORD16 return bits 46 to 15 with saturation
|
||||
static PLATFORM_INLINE WORD32 ixheaacd_mult32x16in32_shl_sat(WORD32 a,
|
||||
WORD16 b) {
|
||||
WORD32 result;
|
||||
|
||||
__asm__(
|
||||
|
||||
" SMULWB %0, %1, %2 \n\t"
|
||||
" QADD %0, %0, %0 \n\t"
|
||||
: "=r"(result)
|
||||
: "r"(a), "r"(b));
|
||||
return (result);
|
||||
}
|
||||
|
||||
// multiply WORD32 with WORD32 return bits 62 to 31
|
||||
static PLATFORM_INLINE WORD32 ixheaacd_mult32_shl(WORD32 a, WORD32 b) {
|
||||
WORD32 resultHi;
|
||||
|
||||
__asm__(
|
||||
|
||||
" smull r3, %0, %1, %2 \n\t"
|
||||
" MOV %0, %0, LSL #1 \n\t"
|
||||
: "=r"(resultHi)
|
||||
: "r"(a), "r"(b)
|
||||
: "cc", "r3");
|
||||
|
||||
return resultHi;
|
||||
}
|
||||
|
||||
// multiply WORD32 with WORD32 return bits 63 to 32
|
||||
static PLATFORM_INLINE WORD32 ixheaacd_mult32(WORD32 a, WORD32 b) {
|
||||
WORD32 resultHi;
|
||||
__asm__(
|
||||
|
||||
" smull r3, %0, %1, %2 \n\t"
|
||||
: "=r"(resultHi)
|
||||
: "r"(a), "r"(b)
|
||||
: "r3");
|
||||
return resultHi;
|
||||
}
|
||||
|
||||
// multiply WORD32 with WORD32 return bits 62 to 31 with saturation
|
||||
static PLATFORM_INLINE WORD32 ixheaacd_mult32_shl_sat(WORD32 a, WORD32 b) {
|
||||
WORD32 result;
|
||||
|
||||
if (a == (WORD32)0x80000000 && b == (WORD32)0x80000000) {
|
||||
result = 0x7fffffff;
|
||||
} else {
|
||||
result = ixheaacd_mult32_shl(a, b);
|
||||
}
|
||||
|
||||
return (result);
|
||||
}
|
||||
|
||||
// multiply WORD32 with WORD16 add bits 47 to 16 to accumulator
|
||||
static PLATFORM_INLINE WORD32 ixheaacd_mac32x16in32(WORD32 a, WORD32 b,
|
||||
WORD16 c) {
|
||||
WORD32 result;
|
||||
|
||||
result = a + ixheaacd_mult32x16in32(b, c);
|
||||
|
||||
return (result);
|
||||
}
|
||||
|
||||
// multiply WORD32 with WORD16 add bits 46 to 15 to accumulator
|
||||
static PLATFORM_INLINE WORD32 ixheaacd_mac32x16in32_shl(WORD32 a, WORD32 b,
|
||||
WORD16 c) {
|
||||
WORD32 result;
|
||||
|
||||
result = a + ixheaacd_mult32x16in32_shl(b, c);
|
||||
|
||||
return (result);
|
||||
}
|
||||
|
||||
// multiply WORD32 with WORD16 add bits 46 to 15 to accumulator with saturation
|
||||
static PLATFORM_INLINE WORD32 mac32x16in32_shl_sat(WORD32 a, WORD32 b,
|
||||
WORD16 c) {
|
||||
return (ixheaacd_add32_sat(a, ixheaacd_mult32x16in32_shl_sat(b, c)));
|
||||
}
|
||||
|
||||
// multiply WORD32 with WORD32 add bits 63 to 32 to accumulator
|
||||
static PLATFORM_INLINE WORD32 ixheaacd_mac32(WORD32 a, WORD32 b, WORD32 c) {
|
||||
WORD32 result;
|
||||
|
||||
result = a + ixheaacd_mult32(b, c);
|
||||
|
||||
return (result);
|
||||
}
|
||||
|
||||
// multiply WORD32 with WORD32 add bits 62 to 31 to accumulator
|
||||
static PLATFORM_INLINE WORD32 mac32_shl(WORD32 a, WORD32 b, WORD32 c) {
|
||||
WORD32 result;
|
||||
|
||||
result = a + ixheaacd_mult32_shl(b, c);
|
||||
|
||||
return (result);
|
||||
}
|
||||
|
||||
// multiply WORD32 with WORD32 add bits 62 to 31 to accumulator with saturation
|
||||
static PLATFORM_INLINE WORD32 mac32_shl_sat(WORD32 a, WORD32 b, WORD32 c) {
|
||||
return (ixheaacd_add32_sat(a, ixheaacd_mult32_shl_sat(b, c)));
|
||||
}
|
||||
|
||||
// multiply WORD32 with WORD16 sub bits 47 to 16 from accumulator
|
||||
static PLATFORM_INLINE WORD32 msu32x16in32(WORD32 a, WORD32 b, WORD16 c) {
|
||||
WORD32 result;
|
||||
|
||||
result = a - ixheaacd_mult32x16in32(b, c);
|
||||
|
||||
return (result);
|
||||
}
|
||||
|
||||
// multiply WORD32 with WORD16 sub bits 46 to 15 from accumulator
|
||||
static PLATFORM_INLINE WORD32 msu32x16in32_shl(WORD32 a, WORD32 b, WORD16 c) {
|
||||
WORD32 result;
|
||||
|
||||
result = a - ixheaacd_mult32x16in32_shl(b, c);
|
||||
|
||||
return (result);
|
||||
}
|
||||
|
||||
// multiply WORD32 with WORD16 sub bits 46 to 15 from accumulator with
|
||||
// saturation
|
||||
static PLATFORM_INLINE WORD32 msu32x16in32_shl_sat(WORD32 a, WORD32 b,
|
||||
WORD16 c) {
|
||||
return (ixheaacd_sub32_sat(a, ixheaacd_mult32x16in32_shl_sat(b, c)));
|
||||
}
|
||||
|
||||
// multiply WORD32 with WORD32 sub bits 63 to 32 from accumulator
|
||||
static PLATFORM_INLINE WORD32 msu32(WORD32 a, WORD32 b, WORD32 c) {
|
||||
WORD32 result;
|
||||
|
||||
result = a - ixheaacd_mult32(b, c);
|
||||
|
||||
return (result);
|
||||
}
|
||||
|
||||
// multiply WORD32 with WORD32 sub bits 62 to 31 from accumulator
|
||||
static PLATFORM_INLINE WORD32 msu32_shl(WORD32 a, WORD32 b, WORD32 c) {
|
||||
WORD32 result;
|
||||
|
||||
result = a - ixheaacd_mult32_shl(b, c);
|
||||
|
||||
return (result);
|
||||
}
|
||||
|
||||
// multiply WORD32 with WORD32 sub bits 62 to 31 from accumulator with
|
||||
// saturation
|
||||
static PLATFORM_INLINE WORD32 msu32_shl_sat(WORD32 a, WORD32 b, WORD32 c) {
|
||||
return (ixheaacd_sub32_sat(a, ixheaacd_mult32_shl_sat(b, c)));
|
||||
}
|
||||
|
||||
// returns normalized 32 bit accumulated result
|
||||
static PLATFORM_INLINE WORD32 mac3216_arr40(WORD32 *x, WORD16 *y,
|
||||
LOOPINDEX length, WORD16 *q_val) {
|
||||
LOOPINDEX i;
|
||||
WORD40 sum = 0;
|
||||
|
||||
for (i = 0; i < length; i++) {
|
||||
sum += (WORD40)(ixheaacd_mult32x16in32(x[i], y[i]));
|
||||
}
|
||||
|
||||
*q_val = norm40(&sum);
|
||||
|
||||
return (WORD32)sum;
|
||||
}
|
||||
|
||||
// returns normalized 32 bit accumulated result
|
||||
static PLATFORM_INLINE WORD32 mac32_arr40(WORD32 *x, WORD32 *y,
|
||||
LOOPINDEX length, WORD16 *q_val) {
|
||||
LOOPINDEX i;
|
||||
WORD40 sum = 0;
|
||||
|
||||
for (i = 0; i < length; i++) {
|
||||
sum += (WORD40)(ixheaacd_mult32(x[i], y[i]));
|
||||
}
|
||||
|
||||
*q_val = norm40(&sum);
|
||||
|
||||
return ((WORD32)sum);
|
||||
}
|
||||
|
||||
// returns normalized 32 bit accumulated result
|
||||
static PLATFORM_INLINE WORD32 mac16_arr40(WORD16 *x, WORD16 *y,
|
||||
LOOPINDEX length, WORD16 *q_val) {
|
||||
LOOPINDEX i;
|
||||
WORD40 sum = 0;
|
||||
|
||||
for (i = 0; i < length; i++) {
|
||||
sum += (WORD40)((WORD32)x[i] * (WORD32)y[i]);
|
||||
}
|
||||
|
||||
*q_val = norm40(&sum);
|
||||
|
||||
return ((WORD32)sum);
|
||||
}
|
||||
|
||||
// returns normalized 32 bit accumulated result
|
||||
static PLATFORM_INLINE WORD32 add32_arr40(WORD32 *in_arr, LOOPINDEX length,
|
||||
WORD16 *q_val) {
|
||||
LOOPINDEX i;
|
||||
WORD40 sum = 0;
|
||||
|
||||
for (i = 0; i < length; i++) {
|
||||
sum += (WORD40)in_arr[i];
|
||||
}
|
||||
|
||||
*q_val = norm40(&sum);
|
||||
|
||||
return ((WORD32)sum);
|
||||
}
|
||||
|
||||
// multiply WORD32 with WORD32 return WORD64
|
||||
static PLATFORM_INLINE WORD64 ixheaacd_mult32x32in64(WORD32 a, WORD32 b) {
|
||||
WORD64 result;
|
||||
|
||||
result = (WORD64)a * (WORD64)b;
|
||||
|
||||
return (result);
|
||||
}
|
||||
|
||||
// multiply WORD32 with WORD32 and accumulate the 64 bit result
|
||||
static PLATFORM_INLINE WORD64 ixheaacd_mac32x32in64(WORD64 sum, WORD32 a,
|
||||
WORD32 b) {
|
||||
sum += (WORD64)a * (WORD64)b;
|
||||
|
||||
return (sum);
|
||||
}
|
||||
|
||||
static PLATFORM_INLINE WORD64 ixheaacd_mac32x32in64_7(WORD64 sum,
|
||||
const WORD32 *a,
|
||||
const WORD16 *b) {
|
||||
sum = (WORD64)a[0] * (WORD64)b[0];
|
||||
sum += (WORD64)a[1] * (WORD64)b[1];
|
||||
sum += (WORD64)a[2] * (WORD64)b[2];
|
||||
sum += (WORD64)a[3] * (WORD64)b[3];
|
||||
sum += (WORD64)a[4] * (WORD64)b[4];
|
||||
sum += (WORD64)a[5] * (WORD64)b[5];
|
||||
sum += (WORD64)a[6] * (WORD64)b[6];
|
||||
|
||||
return (sum);
|
||||
}
|
||||
|
||||
static PLATFORM_INLINE WORD64 ixheaacd_mac32x32in64_n(WORD64 sum,
|
||||
const WORD32 *a,
|
||||
const WORD16 *b,
|
||||
WORD32 n) {
|
||||
WORD32 k;
|
||||
|
||||
sum += (WORD64)a[0] * (WORD64)b[0];
|
||||
for (k = 1; k < n; k++) sum += (WORD64)a[k] * (WORD64)b[k];
|
||||
return (sum);
|
||||
}
|
||||
|
||||
static PLATFORM_INLINE WORD64 ixheaacd_mult64(WORD32 a, WORD32 b) {
|
||||
WORD64 result;
|
||||
result = (WORD64)a * (WORD64)b;
|
||||
return (result);
|
||||
}
|
||||
|
||||
static PLATFORM_INLINE WORD64 ixheaacd_add64(WORD64 a, WORD64 b) {
|
||||
WORD64 result;
|
||||
result = a + b;
|
||||
return (result);
|
||||
}
|
||||
|
||||
static PLATFORM_INLINE WORD64 ixheaacd_sub64(WORD64 a, WORD64 b) {
|
||||
WORD64 diff;
|
||||
|
||||
diff = (WORD64)a - (WORD64)b;
|
||||
|
||||
return diff;
|
||||
}
|
||||
|
||||
static PLATFORM_INLINE WORD64 ixheaacd_sub64_sat(WORD64 a, WORD64 b) {
|
||||
WORD64 diff;
|
||||
|
||||
diff = ixheaacd_sub64(a, b);
|
||||
|
||||
if ((((WORD64)a ^ (WORD64)b) & (WORD64)MIN_64) != 0) {
|
||||
if (((WORD64)diff ^ (WORD64)a) & (WORD64)MIN_64) {
|
||||
diff = (a < 0L) ? MIN_64 : MAX_64;
|
||||
}
|
||||
}
|
||||
|
||||
return (diff);
|
||||
}
|
||||
|
||||
static PLATFORM_INLINE WORD32 ixheaacd_mul32_sh(WORD32 a, WORD32 b,
|
||||
WORD8 shift) {
|
||||
WORD32 result;
|
||||
WORD64 temp_result;
|
||||
|
||||
temp_result = (WORD64)a * (WORD64)b;
|
||||
result = (WORD32)(temp_result >> shift);
|
||||
|
||||
return (result);
|
||||
}
|
||||
|
||||
#endif
|
||||
109
decoder/armv7/ixheaacd_calc_post_twid.s
Normal file
109
decoder/armv7/ixheaacd_calc_post_twid.s
Normal file
|
|
@ -0,0 +1,109 @@
|
|||
@/******************************************************************************
|
||||
@ *
|
||||
@ * Copyright (C) 2018 The Android Open Source Project
|
||||
@ *
|
||||
@ * Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ * you may not use this file except in compliance with the License.
|
||||
@ * You may obtain a copy of the License at:
|
||||
@ *
|
||||
@ * http:@www.apache.org/licenses/LICENSE-2.0
|
||||
@ *
|
||||
@ * Unless required by applicable law or agreed to in writing, software
|
||||
@ * distributed under the License is distributed on an "AS IS" BASIS,
|
||||
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
@ * See the License for the specific language governing permissions and
|
||||
@ * limitations under the License.
|
||||
@ *
|
||||
@ *****************************************************************************
|
||||
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
@*/
|
||||
|
||||
|
||||
.text
|
||||
.p2align 2
|
||||
|
||||
.global ixheaacd_calc_post_twid_armv7
|
||||
ixheaacd_calc_post_twid_armv7:
|
||||
|
||||
STMFD sp!, {r4-r12, r14}
|
||||
VPUSH {D8-D15}
|
||||
LDR R4, [SP, #104]
|
||||
LDR R5, [SP, #108]
|
||||
ADD R6, R0, R3, LSL #3
|
||||
SUB R6, R6, #4
|
||||
MOV R7, #-8
|
||||
MOV R8, #8
|
||||
|
||||
LOOP1:
|
||||
VLD1.32 {D0, D1}, [R4]!
|
||||
VLD1.32 {D2, D3}, [R5]!
|
||||
VLD1.32 {D4, D5}, [R1]!
|
||||
VLD1.32 {D6, D7}, [R2]!
|
||||
|
||||
VMULL.S32 Q4, D4, D0
|
||||
VMULL.S32 Q5, D6, D2
|
||||
VMULL.S32 Q6, D6, D0
|
||||
VMULL.S32 Q7, D4, D2
|
||||
VMULL.S32 Q8, D5, D1
|
||||
VMULL.S32 Q9, D7, D3
|
||||
VMULL.S32 Q10, D7, D1
|
||||
VMULL.S32 Q11, D5, D3
|
||||
|
||||
VSHRN.S64 D6, Q4, #32
|
||||
VSHRN.S64 D8, Q5, #32
|
||||
VSHRN.S64 D10, Q6, #32
|
||||
VSHRN.S64 D12, Q7, #32
|
||||
VSHRN.S64 D7, Q8, #32
|
||||
VSHRN.S64 D9, Q9, #32
|
||||
VSHRN.S64 D11, Q10, #32
|
||||
VSHRN.S64 D13, Q11, #32
|
||||
|
||||
VSUB.I32 D0, D6, D8
|
||||
VADD.I32 D1, D10, D12
|
||||
VSUB.I32 D2, D7, D9
|
||||
VADD.I32 D3, D11, D13
|
||||
|
||||
VNEG.S32 Q0, Q0
|
||||
VNEG.S32 Q1, Q1
|
||||
SUBS R3, R3, #4
|
||||
|
||||
VST1.32 {D0[0]}, [R0], R8
|
||||
VST1.32 {D1[0]}, [R6], R7
|
||||
VST1.32 {D0[1]}, [R0], R8
|
||||
VST1.32 {D1[1]}, [R6], R7
|
||||
|
||||
VST1.32 {D2[0]}, [R0], R8
|
||||
VST1.32 {D3[0]}, [R6], R7
|
||||
VST1.32 {D2[1]}, [R0], R8
|
||||
VST1.32 {D3[1]}, [R6], R7
|
||||
BGT LOOP1
|
||||
VPOP {D8-D15}
|
||||
LDMFD sp!, {r4-r12, r15}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
107
decoder/armv7/ixheaacd_calc_pre_twid.s
Normal file
107
decoder/armv7/ixheaacd_calc_pre_twid.s
Normal file
|
|
@ -0,0 +1,107 @@
|
|||
@/******************************************************************************
|
||||
@ *
|
||||
@ * Copyright (C) 2018 The Android Open Source Project
|
||||
@ *
|
||||
@ * Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ * you may not use this file except in compliance with the License.
|
||||
@ * You may obtain a copy of the License at:
|
||||
@ *
|
||||
@ * http:@www.apache.org/licenses/LICENSE-2.0
|
||||
@ *
|
||||
@ * Unless required by applicable law or agreed to in writing, software
|
||||
@ * distributed under the License is distributed on an "AS IS" BASIS,
|
||||
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
@ * See the License for the specific language governing permissions and
|
||||
@ * limitations under the License.
|
||||
@ *
|
||||
@ *****************************************************************************
|
||||
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
@*/
|
||||
|
||||
|
||||
.text
|
||||
.p2align 2
|
||||
|
||||
.global ixheaacd_calc_pre_twid_armv7
|
||||
ixheaacd_calc_pre_twid_armv7:
|
||||
|
||||
STMFD sp!, {r4-r12, r14}
|
||||
VPUSH {D8-D15}
|
||||
LDR R4, [SP, #104]
|
||||
LDR R5, [SP, #108]
|
||||
ADD R6, R0, R3, LSL #3
|
||||
SUB R6, R6, #12
|
||||
MOV R7, #-16
|
||||
|
||||
LOOP1:
|
||||
VLD1.32 {D0, D1}, [R4]!
|
||||
VLD1.32 {D2, D3}, [R5]!
|
||||
VLD2.32 {D4, D5}, [R0]!
|
||||
VLD2.32 {D6, D7}, [R0]!
|
||||
VLD2.32 {D8, D9}, [R6], R7
|
||||
VLD2.32 {D10, D11}, [R6], R7
|
||||
|
||||
VREV64.32 D8, D8
|
||||
VREV64.32 D9, D10
|
||||
VNEG.S32 D5, D4
|
||||
VNEG.S32 D7, D6
|
||||
|
||||
VMULL.S32 Q6, D0, D5
|
||||
VMULL.S32 Q7, D2, D8
|
||||
VMULL.S32 Q8, D0, D8
|
||||
VMULL.S32 Q9, D2, D4
|
||||
VMULL.S32 Q10, D1, D7
|
||||
VMULL.S32 Q11, D9, D3
|
||||
VMULL.S32 Q12, D1, D9
|
||||
VMULL.S32 Q13, D3, D6
|
||||
|
||||
|
||||
VSHRN.S64 D12, Q6, #32
|
||||
VSHRN.S64 D14, Q7, #32
|
||||
VSHRN.S64 D16, Q8, #32
|
||||
VSHRN.S64 D18, Q9, #32
|
||||
VSHRN.S64 D20, Q10, #32
|
||||
VSHRN.S64 D22, Q11, #32
|
||||
VSHRN.S64 D24, Q12, #32
|
||||
VSHRN.S64 D26, Q13, #32
|
||||
|
||||
VSUB.I32 D0, D12, D14
|
||||
VSUB.I32 D2, D16, D18
|
||||
VSUB.I32 D1, D20, D22
|
||||
VSUB.I32 D3, D24, D26
|
||||
|
||||
SUBS R3, R3, #4
|
||||
VST1.32 {D0, D1}, [R1]!
|
||||
VST1.32 {D2, D3}, [R2]!
|
||||
|
||||
BGT LOOP1
|
||||
VPOP {D8-D15}
|
||||
LDMFD sp!, {r4-r12, r15}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
82
decoder/armv7/ixheaacd_calcmaxspectralline.s
Normal file
82
decoder/armv7/ixheaacd_calcmaxspectralline.s
Normal file
|
|
@ -0,0 +1,82 @@
|
|||
@/******************************************************************************
|
||||
@ *
|
||||
@ * Copyright (C) 2018 The Android Open Source Project
|
||||
@ *
|
||||
@ * Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ * you may not use this file except in compliance with the License.
|
||||
@ * You may obtain a copy of the License at:
|
||||
@ *
|
||||
@ * http://www.apache.org/licenses/LICENSE-2.0
|
||||
@ *
|
||||
@ * Unless required by applicable law or agreed to in writing, software
|
||||
@ * distributed under the License is distributed on an "AS IS" BASIS,
|
||||
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
@ * See the License for the specific language governing permissions and
|
||||
@ * limitations under the License.
|
||||
@ *
|
||||
@ *****************************************************************************
|
||||
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
@*/
|
||||
|
||||
|
||||
.text
|
||||
.p2align 2
|
||||
.global ixheaacd_calc_max_spectral_line_armv7
|
||||
|
||||
ixheaacd_calc_max_spectral_line_armv7:
|
||||
|
||||
STMFD sp!, {R4-R12, R14}
|
||||
MOV R4, R1, LSR #3
|
||||
MOV R6, R4, LSL #3
|
||||
VMOV.S32 D6, #0x00000000
|
||||
VMOV.S32 D7, #0x00000000
|
||||
|
||||
LOOP_1:
|
||||
VLD1.32 {D0, D1}, [R0]!
|
||||
|
||||
VLD1.32 {D2, D3}, [R0]!
|
||||
VABS.S32 Q0, Q0
|
||||
|
||||
|
||||
VABS.S32 Q1, Q1
|
||||
SUBS R4, R4, #1
|
||||
|
||||
VORR Q3, Q0, Q3
|
||||
|
||||
VORR Q3, Q1, Q3
|
||||
BGT LOOP_1
|
||||
|
||||
SUBS R7, R1, R6
|
||||
|
||||
VMOV.32 R4, D6[0]
|
||||
VMOV.32 R1, D6[1]
|
||||
VMOV.32 R2, D7[0]
|
||||
ORR R4, R4, R1
|
||||
VMOV.32 R3, D7[1]
|
||||
ORR R4, R4, R2
|
||||
|
||||
|
||||
ORR R4, R4, R3
|
||||
BEQ END_FUNC
|
||||
LOOP_2:
|
||||
|
||||
LDR R2, [R0], #4
|
||||
MOVS R2, R2
|
||||
RSBMI R2, R2, #0
|
||||
ORR R4, R4, R2
|
||||
SUBS R7, R7, #1
|
||||
BGT LOOP_2
|
||||
|
||||
END_FUNC:
|
||||
|
||||
MOVS R0, R4
|
||||
MVNMI R0, R0
|
||||
CLZ R0, R0
|
||||
SUB R0, R0, #1
|
||||
|
||||
LDMFD sp!, {R4-R12, R15}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
809
decoder/armv7/ixheaacd_complex_fft_p2.s
Normal file
809
decoder/armv7/ixheaacd_complex_fft_p2.s
Normal file
|
|
@ -0,0 +1,809 @@
|
|||
.text
|
||||
.p2align 2
|
||||
.global ixheaacd_complex_fft_p2_asm
|
||||
|
||||
ixheaacd_complex_fft_p2_asm:
|
||||
STMFD sp!, {r0-r12, lr}
|
||||
SUB sp, sp, #0x28
|
||||
LDR r0, [sp, #0x2c]
|
||||
@LDR r12,[sp,#0x5c+4]
|
||||
EOR r0, r0, r0, ASR #31
|
||||
CLZ r0, r0
|
||||
SUB r12, r0, #16 @dig_rev_shift = norm32(npoints) + 1 -16@
|
||||
SUB r0, r0, #1
|
||||
RSB r0, r0, #0x1e
|
||||
AND r1, r0, #1
|
||||
STR r1, [sp, #0x14]
|
||||
MOV r1, r0, ASR #1
|
||||
LDR r0, [sp, #0x2c] @npoints
|
||||
STR r1, [sp, #-4]!
|
||||
MOV lr, r0, LSL #1 @(npoints >>1) * 4
|
||||
MOV r0, #0
|
||||
|
||||
FIRST_STAGE_R4:
|
||||
LDR r4, =0x33333333
|
||||
LDR r5, =0x0F0F0F0F
|
||||
AND r6, r4, r0
|
||||
AND r7, r4, r0, LSR #2
|
||||
ORR r4, r7, r6, LSL #2
|
||||
AND r6, r5, r4
|
||||
AND r7, r5, r4, LSR #4
|
||||
ORR r4, r7, r6, LSL #4
|
||||
BIC r6, r4, #0x0000FF00
|
||||
BIC r7, r4, #0x00FF0000
|
||||
MOV r7, r7, LSR #8
|
||||
ORR r4, r7, r6, LSL #8
|
||||
LDR r5, [sp, #0x18]
|
||||
MOV r10, r4, LSR r12
|
||||
CMP r5, #0
|
||||
ADDNE r10, r10, #1
|
||||
BICNE r10, r10, #1
|
||||
|
||||
ADD r1, r2, r10, LSL #2
|
||||
LDRD r4, [r1] @r4=x0r, r5=x0i
|
||||
ADD r1, r1, lr
|
||||
LDRD r8, [r1] @r8=x1r, r9=x1i
|
||||
ADD r1, r1, lr
|
||||
LDRD r6, [r1] @r6=x2r, r7=x2i
|
||||
ADD r1, r1, lr
|
||||
LDRD r10, [r1] @r10=x3r, r11=x3i
|
||||
ADD r0, r0, #4
|
||||
CMP r0, lr, ASR #1
|
||||
|
||||
ADD r4, r4, r6 @x0r = x0r + x2r@
|
||||
ADD r5, r5, r7 @x0i = x0i + x2i@
|
||||
SUB r6, r4, r6, lsl#1 @x2r = x0r - (x2r << 1)@
|
||||
SUB r7, r5, r7, lsl#1 @x2i = x0i - (x2i << 1)@
|
||||
ADD r8, r8, r10 @x1r = x1r + x3r@
|
||||
ADD r9, r9, r11 @x1i = x1i + x3i@
|
||||
SUB r1, r8, r10, lsl#1 @x3r = x1r - (x3r << 1)@
|
||||
SUB r11, r9, r11, lsl#1 @x3i = x1i - (x3i << 1)@
|
||||
|
||||
ADD r4, r4, r8 @x0r = x0r + x1r@
|
||||
ADD r5, r5, r9 @x0i = x0i + x1i@
|
||||
SUB r8, r4, r8, lsl#1 @x1r = x0r - (x1r << 1)@
|
||||
SUB r9, r5, r9, lsl#1 @x1i = x0i - (x1i << 1)
|
||||
ADD r6, r6, r11 @x2r = x2r + x3i@
|
||||
SUB r7, r7, r1 @x2i = x2i - x3r@
|
||||
SUB r10, r6, r11, lsl#1 @x3i = x2r - (x3i << 1)@
|
||||
ADD r11, r7, r1, lsl#1 @x3r = x2i + (x3r << 1)@
|
||||
|
||||
STMIA r3!, {r4-r11}
|
||||
BLT FIRST_STAGE_R4
|
||||
LDR r1, [sp], #4
|
||||
LDR r0, [sp, #0x2c]
|
||||
MOV r12, #0x40 @nodespacing = 64@
|
||||
STR r12, [sp, #0x1c]
|
||||
LDR r12, [sp, #0x2c]
|
||||
SUB r3, r3, r0, LSL #3
|
||||
SUBS r1, r1, #1
|
||||
STR r3, [sp, #0x34]
|
||||
MOV r4, r12, ASR #4
|
||||
MOV r0, #4
|
||||
STR r4, [sp, #0x18]
|
||||
STR r1, [sp, #0x20]
|
||||
BLE RADIX2
|
||||
OUTER_LOOP:
|
||||
LDR r1, [sp, #0x28]
|
||||
LDR r12, [sp, #0x34] @WORD32 *data = ptr_y@
|
||||
STR r1, [sp, #0x10]
|
||||
LDR r1, [sp, #0x18]
|
||||
|
||||
MOV r0, r0, LSL #3 @(del<<1) * 4
|
||||
LOOP_TRIVIAL_TWIDDLE:
|
||||
LDRD r4, [r12] @r4=x0r, r5=x0i
|
||||
ADD r12, r12, r0
|
||||
LDRD r6, [r12] @r6=x1r, r7=x1i
|
||||
ADD r12, r12, r0
|
||||
LDRD r8, [r12] @r8=x2r, r9=x2i
|
||||
ADD r12, r12, r0
|
||||
LDRD r10, [r12] @r10=x3r, r11=x3i
|
||||
|
||||
@MOV r4,r4,ASR #1
|
||||
@MOV r5,r5,ASR #1
|
||||
@MOV r6,r6,ASR #1
|
||||
@MOV r7,r7,ASR #1
|
||||
@MOV r8,r8,ASR #1
|
||||
@MOV r9,r9,ASR #1
|
||||
@MOV r10,r10,ASR #1
|
||||
@MOV r11,r11,ASR #1
|
||||
|
||||
ADD r4, r4, r8 @x0r = x0r + x2r@
|
||||
ADD r5, r5, r9 @x0i = x0i + x2i@
|
||||
SUB r8, r4, r8, lsl #1 @x2r = x0r - (x2r << 1)@
|
||||
SUB r9, r5, r9, lsl #1 @x2i = x0i - (x2i << 1)@
|
||||
ADD r6, r6, r10 @x1r = x1r + x3r@
|
||||
ADD r7, r7, r11 @x1i = x1i + x3i@
|
||||
SUB r2, r6, r10, lsl #1 @x3r = x1r - (x3r << 1)@
|
||||
SUB r11, r7, r11, lsl #1 @x3i = x1i - (x3i << 1)@
|
||||
|
||||
ADD r4, r4, r6 @x0r = x0r + x1r@
|
||||
ADD r5, r5, r7 @x0i = x0i + x1i@
|
||||
@MOV r4,r4,ASR #1
|
||||
@MOV r5,r5,ASR #1
|
||||
SUB r6, r4, r6, lsl #1 @x1r = x0r - (x1r << 1)@
|
||||
SUB r7, r5, r7, lsl #1 @x1i = x0i - (x1i << 1)
|
||||
ADD r8, r8, r11 @x2r = x2r + x3i@
|
||||
SUB r9, r9, r2 @x2i = x2i - x3r@
|
||||
SUB r10, r8, r11, lsl#1 @x3i = x2r - (x3i << 1)@
|
||||
ADD r11, r9, r2, lsl#1 @x3r = x2i + (x3r << 1)
|
||||
|
||||
STRD r10, [r12] @r10=x3r, r11=x3i
|
||||
SUB r12, r12, r0
|
||||
STRD r6, [r12] @r6=x1r, r7=x1i
|
||||
SUB r12, r12, r0
|
||||
STRD r8, [r12] @r8=x2r, r9=x2i
|
||||
SUB r12, r12, r0
|
||||
STRD r4, [r12] @r4=x0r, r5=x0i
|
||||
ADD r12, r12, r0, lsl #2
|
||||
|
||||
SUBS r1, r1, #1
|
||||
BNE LOOP_TRIVIAL_TWIDDLE
|
||||
|
||||
MOV r0, r0, ASR #3
|
||||
LDR r4, [sp, #0x1c]
|
||||
LDR r3, [sp, #0x34]
|
||||
MUL r1, r0, r4
|
||||
ADD r12, r3, #8
|
||||
STR r1, [sp, #0x24]
|
||||
MOV r3, r1, ASR #2
|
||||
ADD r3, r3, r1, ASR #3
|
||||
SUB r3, r3, r1, ASR #4
|
||||
ADD r3, r3, r1, ASR #5
|
||||
SUB r3, r3, r1, ASR #6
|
||||
ADD r3, r3, r1, ASR #7
|
||||
SUB r3, r3, r1, ASR #8
|
||||
STR r3, [sp, #-4]!
|
||||
SECOND_LOOP:
|
||||
LDR r3, [sp, #0x10+4]
|
||||
LDR r14, [sp, #0x18+4]
|
||||
MOV r0, r0, LSL #3 @(del<<1) * 4
|
||||
LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@
|
||||
LDR r2, [r3, #4] @w1l = *(twiddles + 2*j + 1)@
|
||||
LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@
|
||||
LDR r6, [r3, #4] @w2l = *(twiddles + 2*(j<<1) + 1)@
|
||||
LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@
|
||||
LDR r8, [r3, #4] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@
|
||||
|
||||
STR r4, [sp, #8+4]
|
||||
STR r1, [sp, #-4]
|
||||
STR r2, [sp, #-8]
|
||||
STR r5, [sp, #-12]
|
||||
STR r6, [sp, #-16]
|
||||
STR r7, [sp, #-20]
|
||||
STR r8, [sp, #-24]
|
||||
|
||||
RADIX4_BFLY:
|
||||
|
||||
LDRD r6, [r12, r0]! @r6=x1r, r7=x1i
|
||||
LDRD r8, [r12, r0]! @r8=x2r, r9=x2i
|
||||
LDRD r10, [r12, r0] @r10=x3r, r11=x3i
|
||||
SUBS r14, r14, #1
|
||||
|
||||
LDR r1, [sp, #-4]
|
||||
LDR r2, [sp, #-8]
|
||||
|
||||
SMULL r3, r4, r6, r2 @ixheaacd_mult32(x1r,w1l)
|
||||
LSR r3, r3, #31
|
||||
ORR r4, r3, r4, LSL#1
|
||||
SMULL r3, r6, r6, r1 @mult32x16hin32(x1r,W1h)
|
||||
LSR r3, r3, #31
|
||||
ORR r6, r3, r6, LSL#1
|
||||
SMULL r3, r5, r7, r1 @mult32x16hin32(x1i,W1h)
|
||||
LSR r3, r3, #31
|
||||
ORR r5, r3, r5, LSL#1
|
||||
SMULL r3, r7, r7, r2 @ixheaacd_mac32(ixheaacd_mult32(x1r,w1h) ,x1i,w1l)
|
||||
LSR r3, r3, #31
|
||||
ORR r7, r3, r7, LSL#1
|
||||
ADD r7, r7, r6
|
||||
SUB r6, r4, r5 @
|
||||
|
||||
LDR r1, [sp, #-12]
|
||||
LDR r2, [sp, #-16]
|
||||
|
||||
SMULL r3, r4, r8, r2 @ixheaacd_mult32(x2r,w2l)
|
||||
LSR r3, r3, #31
|
||||
ORR r4, r3, r4, LSL#1
|
||||
SMULL r3, r8, r8, r1 @mult32x16hin32(x2r,W2h)
|
||||
LSR r3, r3, #31
|
||||
ORR r8, r3, r8, LSL#1
|
||||
SMULL r3, r5, r9, r1 @mult32x16hin32(x2i,W2h)
|
||||
LSR r3, r3, #31
|
||||
ORR r5, r3, r5, LSL#1
|
||||
SMULL r3, r9, r9, r2 @ixheaacd_mac32(ixheacd_mult32(x1r,w1h) ,x1i,w1l)
|
||||
LSR r3, r3, #31
|
||||
ORR r9, r3, r9, LSL#1
|
||||
ADD r9, r9, r8
|
||||
SUB r8, r4, r5 @
|
||||
|
||||
LDR r1, [sp, #-20]
|
||||
LDR r2, [sp, #-24]
|
||||
|
||||
SMULL r3, r4, r10, r2 @ixheaacd_mult32(x3r,w3l)
|
||||
LSR r3, r3, #31
|
||||
ORR r4, r3, r4, LSL#1
|
||||
SMULL r3, r10, r10, r1 @mult32x16hin32(x3r,W3h)
|
||||
LSR r3, r3, #31
|
||||
ORR r10, r3, r10, LSL#1
|
||||
SMULL r3, r5, r11, r1 @mult32x16hin32(x3i,W3h)
|
||||
LSR r3, r3, #31
|
||||
ORR r5, r3, r5, LSL#1
|
||||
SMULL r3, r11, r11, r2 @ixheaacd_mac32(ixheacd_mult32(x3r,w3h) ,x3i,w3l)
|
||||
LSR r3, r3, #31
|
||||
ORR r11, r3, r11, LSL#1
|
||||
ADD r11, r11, r10
|
||||
SUB r10, r4, r5 @
|
||||
|
||||
@SUB r12,r12,r0,lsl #1
|
||||
@LDRD r4,[r12] @r4=x0r, r5=x0i
|
||||
LDR r4, [r12, -r0, lsl #1]! @
|
||||
LDR r5, [r12, #4]
|
||||
|
||||
|
||||
ADD r4, r8, r4 @x0r = x0r + x2r@
|
||||
ADD r5, r9, r5 @x0i = x0i + x2i@
|
||||
SUB r8, r4, r8, lsl#1 @x2r = x0r - (x2r << 1)@
|
||||
SUB r9, r5, r9, lsl#1 @x2i = x0i - (x2i << 1)@
|
||||
ADD r6, r6, r10 @x1r = x1r + x3r@
|
||||
ADD r7, r7, r11 @x1i = x1i + x3i@
|
||||
SUB r10, r6, r10, lsl#1 @x3r = x1r - (x3r << 1)@
|
||||
SUB r11, r7, r11, lsl#1 @x3i = x1i - (x3i << 1)@
|
||||
|
||||
ADD r4, r4, r6 @x0r = x0r + x1r@
|
||||
ADD r5, r5, r7 @x0i = x0i + x1i@
|
||||
SUB r6, r4, r6, lsl#1 @x1r = x0r - (x1r << 1)@
|
||||
SUB r7, r5, r7, lsl#1 @x1i = x0i - (x1i << 1)
|
||||
STRD r4, [r12] @r4=x0r, r5=x0i
|
||||
ADD r12, r12, r0
|
||||
|
||||
ADD r8, r8, r11 @x2r = x2r + x3i@
|
||||
SUB r9, r9, r10 @x2i = x2i - x3r@
|
||||
SUB r4, r8, r11, lsl#1 @x3i = x2r - (x3i << 1)@
|
||||
ADD r5, r9, r10, lsl#1 @x3r = x2i + (x3r << 1)
|
||||
|
||||
STRD r8, [r12] @r8=x2r, r9=x2i
|
||||
ADD r12, r12, r0
|
||||
STRD r6, [r12] @r6=x1r, r7=x1i
|
||||
ADD r12, r12, r0
|
||||
STRD r4, [r12] @r10=x3r, r11=x3i
|
||||
ADD r12, r12, r0
|
||||
|
||||
BNE RADIX4_BFLY
|
||||
MOV r0, r0, ASR #3
|
||||
|
||||
LDR r1, [sp, #0x2c+4]
|
||||
LDR r4, [sp, #8+4]
|
||||
SUB r1, r12, r1, LSL #3
|
||||
LDR r6, [sp, #0x1c+4]
|
||||
ADD r12, r1, #8
|
||||
LDR r7, [sp, #0]
|
||||
ADD r4, r4, r6
|
||||
CMP r4, r7
|
||||
BLE SECOND_LOOP
|
||||
|
||||
SECOND_LOOP_2:
|
||||
LDR r3, [sp, #0x10+4]
|
||||
LDR r14, [sp, #0x18+4]
|
||||
MOV r0, r0, LSL #3 @(del<<1) * 4
|
||||
|
||||
LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@
|
||||
LDR r2, [r3, #4] @w1l = *(twiddles + 2*j + 1)@
|
||||
LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@
|
||||
LDR r6, [r3, #4] @w2l = *(twiddles + 2*(j<<1) + 1)@
|
||||
SUB r3, r3, #2048 @ 512 *4
|
||||
LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@
|
||||
LDR r8, [r3, #4] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@
|
||||
|
||||
STR r4, [sp, #8+4]
|
||||
|
||||
STR r1, [sp, #-4]
|
||||
STR r2, [sp, #-8]
|
||||
STR r5, [sp, #-12]
|
||||
STR r6, [sp, #-16]
|
||||
STR r7, [sp, #-20]
|
||||
STR r8, [sp, #-24]
|
||||
|
||||
RADIX4_BFLY_2:
|
||||
LDRD r6, [r12, r0]! @r6=x1r, r7=x1i
|
||||
LDRD r8, [r12, r0]! @r8=x2r, r9=x2i
|
||||
LDRD r10, [r12, r0] @r10=x3r, r11=x3i
|
||||
SUBS r14, r14, #1
|
||||
LDR r1, [sp, #-4]
|
||||
LDR r2, [sp, #-8]
|
||||
|
||||
SMULL r3, r4, r6, r2 @ixheaacd_mult32(x1r,w1l)
|
||||
LSR r3, r3, #31
|
||||
ORR r4, r3, r4, LSL#1
|
||||
SMULL r3, r6, r6, r1 @mult32x16hin32(x1r,W1h)
|
||||
LSR r3, r3, #31
|
||||
ORR r6, r3, r6, LSL#1
|
||||
SMULL r3, r5, r7, r1 @mult32x16hin32(x1i,W1h)
|
||||
LSR r3, r3, #31
|
||||
ORR r5, r3, r5, LSL#1
|
||||
SMULL r3, r7, r7, r2 @ixheaacd_mac32(ixheaacd_mult32(x1r,w1h) ,x1i,w1l)
|
||||
LSR r3, r3, #31
|
||||
ORR r7, r3, r7, LSL#1
|
||||
ADD r7, r7, r6
|
||||
SUB r6, r4, r5 @
|
||||
|
||||
LDR r1, [sp, #-12]
|
||||
LDR r2, [sp, #-16]
|
||||
|
||||
SMULL r3, r4, r8, r2 @ixheaacd_mult32(x2r,w2l)
|
||||
LSR r3, r3, #31
|
||||
ORR r4, r3, r4, LSL#1
|
||||
SMULL r3, r8, r8, r1 @mult32x16hin32(x2r,W2h)
|
||||
LSR r3, r3, #31
|
||||
ORR r8, r3, r8, LSL#1
|
||||
SMULL r3, r5, r9, r1 @mult32x16hin32(x2i,W2h)
|
||||
LSR r3, r3, #31
|
||||
ORR r5, r3, r5, LSL#1
|
||||
SMULL r3, r9, r9, r2 @ixheaacd_mac32(ixheacd_mult32(x1r,w1h) ,x1i,w1l)
|
||||
LSR r3, r3, #31
|
||||
ORR r9, r3, r9, LSL#1
|
||||
ADD r9, r9, r8
|
||||
SUB r8, r4, r5 @
|
||||
|
||||
LDR r1, [sp, #-20]
|
||||
LDR r2, [sp, #-24]
|
||||
|
||||
SMULL r3, r4, r10, r2 @ixheaacd_mult32(x3r,w3l)
|
||||
LSR r3, r3, #31
|
||||
ORR r4, r3, r4, LSL#1
|
||||
SMULL r3, r10, r10, r1 @mult32x16hin32(x3r,W3h)
|
||||
LSR r3, r3, #31
|
||||
ORR r10, r3, r10, LSL#1
|
||||
SMULL r3, r5, r11, r1 @mult32x16hin32(x3i,W3h)
|
||||
LSR r3, r3, #31
|
||||
ORR r5, r3, r5, LSL#1
|
||||
SMULL r3, r11, r11, r2 @ixheaacd_mac32(ixheacd_mult32(x3r,w3h) ,x3i,w3l)
|
||||
LSR r3, r3, #31
|
||||
ORR r11, r3, r11, LSL#1
|
||||
ADD r10, r11, r10
|
||||
SUB r11, r5, r4 @
|
||||
|
||||
@SUB r12,r12,r0,lsl #1
|
||||
@LDRD r4,[r12] @r4=x0r, r5=x0i
|
||||
LDR r4, [r12, -r0, lsl #1]! @
|
||||
LDR r5, [r12, #4]
|
||||
|
||||
|
||||
ADD r4, r8, r4 @x0r = x0r + x2r@
|
||||
ADD r5, r9, r5 @x0i = x0i + x2i@
|
||||
SUB r8, r4, r8, lsl#1 @x2r = x0r - (x2r << 1)@
|
||||
SUB r9, r5, r9, lsl#1 @x2i = x0i - (x2i << 1)@
|
||||
ADD r6, r6, r10 @x1r = x1r + x3r@
|
||||
ADD r7, r7, r11 @x1i = x1i + x3i@
|
||||
SUB r10, r6, r10, lsl#1 @x3r = x1r - (x3r << 1)@
|
||||
SUB r11, r7, r11, lsl#1 @x3i = x1i - (x3i << 1)@
|
||||
|
||||
ADD r4, r4, r6 @x0r = x0r + x1r@
|
||||
ADD r5, r5, r7 @x0i = x0i + x1i@
|
||||
SUB r6, r4, r6, lsl#1 @x1r = x0r - (x1r << 1)@
|
||||
SUB r7, r5, r7, lsl#1 @x1i = x0i - (x1i << 1)
|
||||
STRD r4, [r12] @r4=x0r, r5=x0i
|
||||
ADD r12, r12, r0
|
||||
|
||||
ADD r8, r8, r11 @x2r = x2r + x3i@
|
||||
SUB r9, r9, r10 @x2i = x2i - x3r@
|
||||
SUB r4, r8, r11, lsl#1 @x3i = x2r - (x3i << 1)@
|
||||
ADD r5, r9, r10, lsl#1 @x3r = x2i + (x3r << 1)
|
||||
|
||||
STRD r8, [r12] @r8=x2r, r9=x2i
|
||||
ADD r12, r12, r0
|
||||
STRD r6, [r12] @r6=x1r, r7=x1i
|
||||
ADD r12, r12, r0
|
||||
STRD r4, [r12] @r10=x3r, r11=x3i
|
||||
ADD r12, r12, r0
|
||||
|
||||
BNE RADIX4_BFLY_2
|
||||
MOV r0, r0, ASR #3
|
||||
|
||||
LDR r1, [sp, #0x2c+4]
|
||||
LDR r4, [sp, #8+4]
|
||||
SUB r1, r12, r1, LSL #3
|
||||
LDR r6, [sp, #0x1c+4]
|
||||
ADD r12, r1, #8
|
||||
LDR r7, [sp, #0x24+4]
|
||||
ADD r4, r4, r6
|
||||
CMP r4, r7, ASR #1
|
||||
BLE SECOND_LOOP_2
|
||||
LDR r7, [sp, #0]
|
||||
CMP r4, r7, LSL #1
|
||||
BGT SECOND_LOOP_4
|
||||
|
||||
SECOND_LOOP_3:
|
||||
LDR r3, [sp, #0x10+4]
|
||||
LDR r14, [sp, #0x18+4]
|
||||
MOV r0, r0, LSL #3 @(del<<1) * 4
|
||||
|
||||
LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@
|
||||
LDR r2, [r3, #4] @w1l = *(twiddles + 2*j + 1)@
|
||||
SUB r3, r3, #2048 @ 512 *4
|
||||
LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@
|
||||
LDR r6, [r3, #4] @w2l = *(twiddles + 2*(j<<1) + 1)@
|
||||
LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@
|
||||
LDR r8, [r3, #4] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@
|
||||
|
||||
STR r4, [sp, #8+4]
|
||||
STR r1, [sp, #-4]
|
||||
STR r2, [sp, #-8]
|
||||
STR r5, [sp, #-12]
|
||||
STR r6, [sp, #-16]
|
||||
STR r7, [sp, #-20]
|
||||
STR r8, [sp, #-24]
|
||||
|
||||
|
||||
RADIX4_BFLY_3:
|
||||
LDRD r6, [r12, r0]! @r6=x1r, r7=x1i
|
||||
LDRD r8, [r12, r0]! @r8=x2r, r9=x2i
|
||||
LDRD r10, [r12, r0] @r10=x3r, r11=x3i
|
||||
SUBS r14, r14, #1
|
||||
|
||||
LDR r1, [sp, #-4]
|
||||
LDR r2, [sp, #-8]
|
||||
|
||||
SMULL r3, r4, r6, r2 @ixheaacd_mult32(x1r,w1l)
|
||||
LSR r3, r3, #31
|
||||
ORR r4, r3, r4, LSL#1
|
||||
SMULL r3, r6, r6, r1 @mult32x16hin32(x1r,W1h)
|
||||
LSR r3, r3, #31
|
||||
ORR r6, r3, r6, LSL#1
|
||||
SMULL r3, r5, r7, r1 @mult32x16hin32(x1i,W1h)
|
||||
LSR r3, r3, #31
|
||||
ORR r5, r3, r5, LSL#1
|
||||
SMULL r3, r7, r7, r2 @ixheaacd_mac32(ixheaacd_mult32(x1r,w1h) ,x1i,w1l)
|
||||
LSR r3, r3, #31
|
||||
ORR r7, r3, r7, LSL#1
|
||||
ADD r7, r7, r6
|
||||
SUB r6, r4, r5 @
|
||||
|
||||
LDR r1, [sp, #-12]
|
||||
LDR r2, [sp, #-16]
|
||||
|
||||
SMULL r3, r4, r8, r2 @ixheaacd_mult32(x2r,w2l)
|
||||
LSR r3, r3, #31
|
||||
ORR r4, r3, r4, LSL#1
|
||||
SMULL r3, r8, r8, r1 @mult32x16hin32(x2r,W2h)
|
||||
LSR r3, r3, #31
|
||||
ORR r8, r3, r8, LSL#1
|
||||
SMULL r3, r5, r9, r1 @mult32x16hin32(x2i,W2h)
|
||||
LSR r3, r3, #31
|
||||
ORR r5, r3, r5, LSL#1
|
||||
SMULL r3, r9, r9, r2 @ixheaacd_mac32(ixheacd_mult32(x1r,w1h) ,x1i,w1l)
|
||||
LSR r3, r3, #31
|
||||
ORR r9, r3, r9, LSL#1
|
||||
ADD r8, r9, r8
|
||||
SUB r9, r5, r4 @
|
||||
|
||||
LDR r1, [sp, #-20]
|
||||
LDR r2, [sp, #-24]
|
||||
|
||||
SMULL r3, r4, r10, r2 @ixheaacd_mult32(x3r,w3l)
|
||||
LSR r3, r3, #31
|
||||
ORR r4, r3, r4, LSL#1
|
||||
SMULL r3, r10, r10, r1 @mult32x16hin32(x3r,W3h)
|
||||
LSR r3, r3, #31
|
||||
ORR r10, r3, r10, LSL#1
|
||||
SMULL r3, r5, r11, r1 @mult32x16hin32(x3i,W3h)
|
||||
LSR r3, r3, #31
|
||||
ORR r5, r3, r5, LSL#1
|
||||
SMULL r3, r11, r11, r2 @ixheaacd_mac32(ixheacd_mult32(x3r,w3h) ,x3i,w3l)
|
||||
LSR r3, r3, #31
|
||||
ORR r11, r3, r11, LSL#1
|
||||
ADD r10, r11, r10
|
||||
SUB r11, r5, r4 @
|
||||
|
||||
@SUB r12,r12,r0,lsl #1
|
||||
@LDRD r4,[r12] @r4=x0r, r5=x0i
|
||||
LDR r4, [r12, -r0, lsl #1]! @
|
||||
LDR r5, [r12, #4]
|
||||
|
||||
|
||||
ADD r4, r8, r4 @x0r = x0r + x2r@
|
||||
ADD r5, r9, r5 @x0i = x0i + x2i@
|
||||
SUB r8, r4, r8, lsl#1 @x2r = x0r - (x2r << 1)@
|
||||
SUB r9, r5, r9, lsl#1 @x2i = x0i - (x2i << 1)@
|
||||
ADD r6, r6, r10 @x1r = x1r + x3r@
|
||||
ADD r7, r7, r11 @x1i = x1i + x3i@
|
||||
SUB r10, r6, r10, lsl#1 @x3r = x1r - (x3r << 1)@
|
||||
SUB r11, r7, r11, lsl#1 @x3i = x1i - (x3i << 1)@
|
||||
|
||||
ADD r4, r4, r6 @x0r = x0r + x1r@
|
||||
ADD r5, r5, r7 @x0i = x0i + x1i@
|
||||
SUB r6, r4, r6, lsl#1 @x1r = x0r - (x1r << 1)@
|
||||
SUB r7, r5, r7, lsl#1 @x1i = x0i - (x1i << 1)
|
||||
STRD r4, [r12] @r4=x0r, r5=x0i
|
||||
ADD r12, r12, r0
|
||||
|
||||
ADD r8, r8, r11 @x2r = x2r + x3i@
|
||||
SUB r9, r9, r10 @x2i = x2i - x3r@
|
||||
SUB r4, r8, r11, lsl#1 @x3i = x2r - (x3i << 1)@
|
||||
ADD r5, r9, r10, lsl#1 @x3r = x2i + (x3r << 1)
|
||||
|
||||
STRD r8, [r12] @r8=x2r, r9=x2i
|
||||
ADD r12, r12, r0
|
||||
STRD r6, [r12] @r6=x1r, r7=x1i
|
||||
ADD r12, r12, r0
|
||||
STRD r4, [r12] @r10=x3r, r11=x3i
|
||||
ADD r12, r12, r0
|
||||
|
||||
BNE RADIX4_BFLY_3
|
||||
MOV r0, r0, ASR #3
|
||||
|
||||
LDR r1, [sp, #0x2c+4]
|
||||
LDR r4, [sp, #8+4]
|
||||
SUB r1, r12, r1, LSL #3
|
||||
LDR r6, [sp, #0x1c+4]
|
||||
ADD r12, r1, #8
|
||||
LDR r7, [sp, #0]
|
||||
ADD r4, r4, r6
|
||||
CMP r4, r7, LSL #1
|
||||
BLE SECOND_LOOP_3
|
||||
|
||||
SECOND_LOOP_4:
|
||||
LDR r3, [sp, #0x10+4]
|
||||
LDR r14, [sp, #0x18+4]
|
||||
MOV r0, r0, LSL #3 @(del<<1) * 4
|
||||
|
||||
LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@
|
||||
LDR r2, [r3, #4] @w1l = *(twiddles + 2*j + 1)@
|
||||
SUB r3, r3, #2048 @ 512 *4
|
||||
LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@
|
||||
LDR r6, [r3, #4] @w2l = *(twiddles + 2*(j<<1) + 1)@
|
||||
SUB r3, r3, #2048 @ 512 *4
|
||||
LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@
|
||||
LDR r8, [r3, #4] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@
|
||||
|
||||
|
||||
STR r4, [sp, #8+4]
|
||||
STR r1, [sp, #-4]
|
||||
STR r2, [sp, #-8]
|
||||
STR r5, [sp, #-12]
|
||||
STR r6, [sp, #-16]
|
||||
STR r7, [sp, #-20]
|
||||
STR r8, [sp, #-24]
|
||||
|
||||
RADIX4_BFLY_4:
|
||||
LDRD r6, [r12, r0]! @r6=x1r, r7=x1i
|
||||
LDRD r8, [r12, r0]! @r8=x2r, r9=x2i
|
||||
LDRD r10, [r12, r0] @r10=x3r, r11=x3i
|
||||
SUBS r14, r14, #1
|
||||
|
||||
LDR r1, [sp, #-4]
|
||||
LDR r2, [sp, #-8]
|
||||
|
||||
SMULL r3, r4, r6, r2 @ixheaacd_mult32(x1r,w1l)
|
||||
LSR r3, r3, #31
|
||||
ORR r4, r3, r4, LSL#1
|
||||
SMULL r3, r6, r6, r1 @mult32x16hin32(x1r,W1h)
|
||||
LSR r3, r3, #31
|
||||
ORR r6, r3, r6, LSL#1
|
||||
SMULL r3, r5, r7, r1 @mult32x16hin32(x1i,W1h)
|
||||
LSR r3, r3, #31
|
||||
ORR r5, r3, r5, LSL#1
|
||||
SMULL r3, r7, r7, r2 @ixheaacd_mac32(ixheaacd_mult32(x1r,w1h) ,x1i,w1l)
|
||||
LSR r3, r3, #31
|
||||
ORR r7, r3, r7, LSL#1
|
||||
ADD r7, r7, r6
|
||||
SUB r6, r4, r5 @
|
||||
|
||||
LDR r1, [sp, #-12]
|
||||
LDR r2, [sp, #-16]
|
||||
|
||||
SMULL r3, r4, r8, r2 @ixheaacd_mult32(x2r,w2l)
|
||||
LSR r3, r3, #31
|
||||
ORR r4, r3, r4, LSL#1
|
||||
SMULL r3, r8, r8, r1 @mult32x16hin32(x2r,W2h)
|
||||
LSR r3, r3, #31
|
||||
ORR r8, r3, r8, LSL#1
|
||||
SMULL r3, r5, r9, r1 @mult32x16hin32(x2i,W2h)
|
||||
LSR r3, r3, #31
|
||||
ORR r5, r3, r5, LSL#1
|
||||
SMULL r3, r9, r9, r2 @ixheaacd_mac32(ixheacd_mult32(x1r,w1h) ,x1i,w1l)
|
||||
LSR r3, r3, #31
|
||||
ORR r9, r3, r9, LSL#1
|
||||
ADD r8, r9, r8
|
||||
SUB r9, r5, r4 @
|
||||
|
||||
LDR r1, [sp, #-20]
|
||||
LDR r2, [sp, #-24]
|
||||
|
||||
SMULL r3, r4, r10, r2 @ixheaacd_mult32(x3r,w3l)
|
||||
LSR r3, r3, #31
|
||||
ORR r4, r3, r4, LSL#1
|
||||
SMULL r3, r10, r10, r1 @mult32x16hin32(x3r,W3h)
|
||||
LSR r3, r3, #31
|
||||
ORR r10, r3, r10, LSL#1
|
||||
SMULL r3, r5, r11, r1 @mult32x16hin32(x3i,W3h)
|
||||
LSR r3, r3, #31
|
||||
ORR r5, r3, r5, LSL#1
|
||||
SMULL r3, r11, r11, r2 @ixheaacd_mac32(ixheacd_mult32(x3r,w3h) ,x3i,w3l)
|
||||
LSR r3, r3, #31
|
||||
ORR r11, r3, r11, LSL#1
|
||||
ADD r11, r11, r10
|
||||
SUB r10, r5, r4 @
|
||||
|
||||
@SUB r12,r12,r0,lsl #1
|
||||
@LDRD r4,[r12] @r4=x0r, r5=x0i
|
||||
LDR r4, [r12, -r0, lsl #1]! @
|
||||
LDR r5, [r12, #4]
|
||||
|
||||
|
||||
ADD r4, r8, r4 @x0r = x0r + x2r@
|
||||
ADD r5, r9, r5 @x0i = x0i + x2i@
|
||||
SUB r8, r4, r8, lsl#1 @x2r = x0r - (x2r << 1)@
|
||||
SUB r9, r5, r9, lsl#1 @x2i = x0i - (x2i << 1)@
|
||||
ADD r6, r6, r10 @x1r = x1r + x3r@
|
||||
SUB r7, r7, r11 @x1i = x1i - x3i@
|
||||
SUB r10, r6, r10, lsl#1 @x3r = x1r - (x3r << 1)@
|
||||
ADD r11, r7, r11, lsl#1 @x3i = x1i + (x3i << 1)@
|
||||
|
||||
ADD r4, r4, r6 @x0r = x0r + x1r@
|
||||
ADD r5, r5, r7 @x0i = x0i + x1i@
|
||||
SUB r6, r4, r6, lsl#1 @x1r = x0r - (x1r << 1)@
|
||||
SUB r7, r5, r7, lsl#1 @x1i = x0i - (x1i << 1)
|
||||
STRD r4, [r12] @r4=x0r, r5=x0i
|
||||
ADD r12, r12, r0
|
||||
|
||||
ADD r8, r8, r11 @x2r = x2r + x3i@
|
||||
SUB r9, r9, r10 @x2i = x2i - x3r@
|
||||
SUB r4, r8, r11, lsl#1 @x3i = x2r - (x3i << 1)@
|
||||
ADD r5, r9, r10, lsl#1 @x3r = x2i + (x3r << 1)
|
||||
|
||||
STRD r8, [r12] @r8=x2r, r9=x2i
|
||||
ADD r12, r12, r0
|
||||
STRD r6, [r12] @r6=x1r, r7=x1i
|
||||
ADD r12, r12, r0
|
||||
STRD r4, [r12] @r10=x3r, r11=x3i
|
||||
ADD r12, r12, r0
|
||||
|
||||
BNE RADIX4_BFLY_4
|
||||
MOV r0, r0, ASR #3
|
||||
|
||||
LDR r1, [sp, #0x2c+4]
|
||||
LDR r4, [sp, #8+4]
|
||||
SUB r1, r12, r1, LSL #3
|
||||
LDR r6, [sp, #0x1c+4]
|
||||
ADD r12, r1, #8
|
||||
LDR r7, [sp, #0x24+4]
|
||||
ADD r4, r4, r6
|
||||
CMP r4, r7
|
||||
BLT SECOND_LOOP_4
|
||||
ADD sp, sp, #4
|
||||
|
||||
LDR r1, [sp, #0x1c]
|
||||
MOV r0, r0, LSL #2
|
||||
MOV r1, r1, ASR #2
|
||||
STR r1, [sp, #0x1c]
|
||||
LDR r1, [sp, #0x18]
|
||||
MOV r1, r1, ASR #2
|
||||
STR r1, [sp, #0x18]
|
||||
LDR r1, [sp, #0x20]
|
||||
SUBS r1, r1, #1
|
||||
STR r1, [sp, #0x20]
|
||||
BGT OUTER_LOOP
|
||||
|
||||
RADIX2:
|
||||
LDR r1, [sp, #0x14]
|
||||
CMP r1, #0
|
||||
BEQ EXIT
|
||||
LDR r12, [sp, #0x1c]
|
||||
LDR r1, [sp, #0x28]
|
||||
CMP r12, #0
|
||||
LDRNE r12, [sp, #0x1c]
|
||||
MOVEQ r4, #1
|
||||
MOVNE r4, r12, LSL #1
|
||||
MOVS r3, r0
|
||||
BEQ EXIT
|
||||
|
||||
MOV r3, r3, ASR #1
|
||||
LDR r5, [sp, #0x34]
|
||||
MOV r0, r0, LSL #3 @(del<<1) * 4
|
||||
STR r1, [sp, #-4]
|
||||
RADIX2_BFLY:
|
||||
LDR r1, [sp, #-4]
|
||||
LDRD r6, [r5] @r6 = x0r
|
||||
ADD r5, r5, r0
|
||||
LDRD r8, [r5] @r8 = x1r
|
||||
|
||||
LDR r2, [r1]
|
||||
SUBS r3, r3, #1
|
||||
|
||||
|
||||
SMULL r1, r11, r8, r2 @mult32x16hin32(x1r,W1h)
|
||||
LSR r1, r1, #31
|
||||
ORR r11, r1, r11, LSL#1
|
||||
SMULL r1, r10, r9, r2 @mult32x16hin32(x1i,W1h)
|
||||
LSR r1, r1, #31
|
||||
ORR r10, r1, r10, LSL#1
|
||||
|
||||
|
||||
LDR r1, [sp, #-4]
|
||||
LDR r2, [r1, #4]
|
||||
ADD r1, r1, r4, LSL #3
|
||||
STR r1, [sp, #-4]
|
||||
|
||||
SMULL r1, r8, r8, r2 @ixheaacd_mult32(x1r,w1l)
|
||||
LSR r1, r1, #31
|
||||
ORR r8, r1, r8, LSL#1
|
||||
SMULL r1, r9, r9, r2 @ixheaacd_mac32(ixheacd_mult32(x1r,w1h) ,x1i,w1l)
|
||||
LSR r1, r1, #31
|
||||
ORR r9, r1, r9, LSL#1
|
||||
|
||||
SUB r8, r8, r10
|
||||
ADD r9, r9, r11
|
||||
|
||||
|
||||
ADD r10, r8, r6 @(x0r/2) + (x1r/2)
|
||||
ASR r10, r10, #1
|
||||
ADD r11, r9, r7 @(x0i/2) + (x1i/2)@
|
||||
ASR r11, r11, #1
|
||||
SUB r8, r6, r8 @(x0r/2) - (x1r/2)
|
||||
ASR r8, r8, #1
|
||||
SUB r9, r7, r9 @(x0i/2) - (x1i/2)@
|
||||
ASR r9, r9, #1
|
||||
|
||||
STRD r8, [r5]
|
||||
SUB r5, r5, r0
|
||||
STRD r10, [r5], #8
|
||||
|
||||
BNE RADIX2_BFLY
|
||||
|
||||
LDR r1, [sp, #0x28]
|
||||
MOV r3, r0, ASR #4
|
||||
STR r1, [sp, #-4]
|
||||
RADIX2_BFLY_2:
|
||||
LDR r1, [sp, #-4]
|
||||
LDRD r6, [r5] @r6 = x0r
|
||||
ADD r5, r5, r0
|
||||
LDRD r8, [r5] @r8 = x1r
|
||||
|
||||
LDR r2, [r1]
|
||||
SUBS r3, r3, #1
|
||||
|
||||
|
||||
|
||||
SMULL r1, r11, r8, r2 @mult32x16hin32(x1r,W1h)
|
||||
LSR r1, r1, #31
|
||||
ORR r11, r1, r11, LSL#1
|
||||
SMULL r1, r10, r9, r2 @mult32x16hin32(x1i,W1h)
|
||||
LSR r1, r1, #31
|
||||
ORR r10, r1, r10, LSL#1
|
||||
|
||||
|
||||
LDR r1, [sp, #-4]
|
||||
LDR r2, [r1, #4]
|
||||
ADD r1, r1, r4, LSL #3
|
||||
STR r1, [sp, #-4]
|
||||
|
||||
SMULL r1, r8, r8, r2 @ixheaacd_mult32(x1r,w1l)
|
||||
LSR r1, r1, #31
|
||||
ORR r8, r1, r8, LSL#1
|
||||
SMULL r1, r9, r9, r2 @ixheaacd_mac32(ixheacd_mult32(x1r,w1h) ,x1i,w1l)
|
||||
LSR r1, r1, #31
|
||||
ORR r9, r1, r9, LSL#1
|
||||
|
||||
ADD r11, r11, r9
|
||||
SUB r9, r10, r8 @
|
||||
MOV r8, r11
|
||||
|
||||
ADD r10, r8, r6 @(x0r>>1) + (x1r)
|
||||
ASR r10, r10, #1
|
||||
ADD r11, r9, r7 @(x0i>>1) + (x1i)@
|
||||
ASR r11, r11, #1
|
||||
SUB r8, r6, r8 @(x0r>>1) - (x1r)
|
||||
ASR r8, r8, #1
|
||||
SUB r9, r7, r9 @(x0i>>1) - (x1i)@
|
||||
ASR r9, r9, #1
|
||||
|
||||
STRD r8, [r5]
|
||||
SUB r5, r5, r0
|
||||
STRD r10, [r5], #8
|
||||
|
||||
BNE RADIX2_BFLY_2
|
||||
|
||||
EXIT:
|
||||
ADD sp, sp, #0x38
|
||||
LDMFD sp!, {r4-r12, pc}
|
||||
|
||||
809
decoder/armv7/ixheaacd_complex_ifft_p2.s
Normal file
809
decoder/armv7/ixheaacd_complex_ifft_p2.s
Normal file
|
|
@ -0,0 +1,809 @@
|
|||
.text
|
||||
.p2align 2
|
||||
.global ixheaacd_complex_ifft_p2_asm
|
||||
|
||||
ixheaacd_complex_ifft_p2_asm:
|
||||
STMFD sp!, {r0-r12, lr}
|
||||
SUB sp, sp, #0x28
|
||||
LDR r0, [sp, #0x2c]
|
||||
@LDR r12,[sp,#0x5c+4]
|
||||
EOR r0, r0, r0, ASR #31
|
||||
CLZ r0, r0
|
||||
SUB r12, r0, #16 @dig_rev_shift = norm32(npoints) + 1 -16@
|
||||
SUB r0, r0, #1
|
||||
RSB r0, r0, #0x1e
|
||||
AND r1, r0, #1
|
||||
STR r1, [sp, #0x14]
|
||||
MOV r1, r0, ASR #1
|
||||
LDR r0, [sp, #0x2c] @npoints
|
||||
STR r1, [sp, #-4]!
|
||||
MOV lr, r0, LSL #1 @(npoints >>1) * 4
|
||||
MOV r0, #0
|
||||
|
||||
FIRST_STAGE_R4:
|
||||
LDR r4, =0x33333333
|
||||
LDR r5, =0x0F0F0F0F
|
||||
AND r6, r4, r0
|
||||
AND r7, r4, r0, LSR #2
|
||||
ORR r4, r7, r6, LSL #2
|
||||
AND r6, r5, r4
|
||||
AND r7, r5, r4, LSR #4
|
||||
ORR r4, r7, r6, LSL #4
|
||||
BIC r6, r4, #0x0000FF00
|
||||
BIC r7, r4, #0x00FF0000
|
||||
MOV r7, r7, LSR #8
|
||||
ORR r4, r7, r6, LSL #8
|
||||
LDR r5, [sp, #0x18]
|
||||
MOV r10, r4, LSR r12
|
||||
CMP r5, #0
|
||||
ADDNE r10, r10, #1
|
||||
BICNE r10, r10, #1
|
||||
|
||||
ADD r1, r2, r10, LSL #2
|
||||
LDRD r4, [r1] @r4=x0r, r5=x0i
|
||||
ADD r1, r1, lr
|
||||
LDRD r8, [r1] @r8=x1r, r9=x1i
|
||||
ADD r1, r1, lr
|
||||
LDRD r6, [r1] @r6=x2r, r7=x2i
|
||||
ADD r1, r1, lr
|
||||
LDRD r10, [r1] @r10=x3r, r11=x3i
|
||||
ADD r0, r0, #4
|
||||
CMP r0, lr, ASR #1
|
||||
|
||||
ADD r4, r4, r6 @x0r = x0r + x2r@
|
||||
ADD r5, r5, r7 @x0i = x0i + x2i@
|
||||
SUB r6, r4, r6, lsl#1 @x2r = x0r - (x2r << 1)@
|
||||
SUB r7, r5, r7, lsl#1 @x2i = x0i - (x2i << 1)@
|
||||
ADD r8, r8, r10 @x1r = x1r + x3r@
|
||||
ADD r9, r9, r11 @x1i = x1i + x3i@
|
||||
SUB r1, r8, r10, lsl#1 @x3r = x1r - (x3r << 1)@
|
||||
SUB r11, r9, r11, lsl#1 @x3i = x1i - (x3i << 1)@
|
||||
|
||||
ADD r4, r4, r8 @x0r = x0r + x1r@
|
||||
ADD r5, r5, r9 @x0i = x0i + x1i@
|
||||
SUB r8, r4, r8, lsl#1 @x1r = x0r - (x1r << 1)@
|
||||
SUB r9, r5, r9, lsl#1 @x1i = x0i - (x1i << 1)
|
||||
SUB r6, r6, r11 @x2r = x2r - x3i@
|
||||
ADD r7, r7, r1 @x2i = x2i + x3r@
|
||||
ADD r10, r6, r11, lsl#1 @x3i = x2r + (x3i << 1)@
|
||||
SUB r11, r7, r1, lsl#1 @x3r = x2i - (x3r << 1)@
|
||||
|
||||
STMIA r3!, {r4-r11}
|
||||
BLT FIRST_STAGE_R4
|
||||
LDR r1, [sp], #4
|
||||
LDR r0, [sp, #0x2c]
|
||||
MOV r12, #0x40 @nodespacing = 64@
|
||||
STR r12, [sp, #0x1c]
|
||||
LDR r12, [sp, #0x2c]
|
||||
SUB r3, r3, r0, LSL #3
|
||||
SUBS r1, r1, #1
|
||||
STR r3, [sp, #0x34]
|
||||
MOV r4, r12, ASR #4
|
||||
MOV r0, #4
|
||||
STR r4, [sp, #0x18]
|
||||
STR r1, [sp, #0x20]
|
||||
BLE RADIX2
|
||||
OUTER_LOOP:
|
||||
LDR r1, [sp, #0x28]
|
||||
LDR r12, [sp, #0x34] @WORD32 *data = ptr_y@
|
||||
STR r1, [sp, #0x10]
|
||||
LDR r1, [sp, #0x18]
|
||||
|
||||
MOV r0, r0, LSL #3 @(del<<1) * 4
|
||||
LOOP_TRIVIAL_TWIDDLE:
|
||||
LDRD r4, [r12] @r4=x0r, r5=x0i
|
||||
ADD r12, r12, r0
|
||||
LDRD r6, [r12] @r6=x1r, r7=x1i
|
||||
ADD r12, r12, r0
|
||||
LDRD r8, [r12] @r8=x2r, r9=x2i
|
||||
ADD r12, r12, r0
|
||||
LDRD r10, [r12] @r10=x3r, r11=x3i
|
||||
|
||||
@MOV r4,r4,ASR #1
|
||||
@MOV r5,r5,ASR #1
|
||||
@MOV r6,r6,ASR #1
|
||||
@MOV r7,r7,ASR #1
|
||||
@MOV r8,r8,ASR #1
|
||||
@MOV r9,r9,ASR #1
|
||||
@MOV r10,r10,ASR #1
|
||||
@MOV r11,r11,ASR #1
|
||||
|
||||
ADD r4, r4, r8 @x0r = x0r + x2r@
|
||||
ADD r5, r5, r9 @x0i = x0i + x2i@
|
||||
SUB r8, r4, r8, lsl #1 @x2r = x0r - (x2r << 1)@
|
||||
SUB r9, r5, r9, lsl #1 @x2i = x0i - (x2i << 1)@
|
||||
ADD r6, r6, r10 @x1r = x1r + x3r@
|
||||
ADD r7, r7, r11 @x1i = x1i + x3i@
|
||||
SUB r2, r6, r10, lsl #1 @x3r = x1r - (x3r << 1)@
|
||||
SUB r11, r7, r11, lsl #1 @x3i = x1i - (x3i << 1)@
|
||||
|
||||
ADD r4, r4, r6 @x0r = x0r + x1r@
|
||||
ADD r5, r5, r7 @x0i = x0i + x1i@
|
||||
@MOV r4,r4,ASR #1
|
||||
@MOV r5,r5,ASR #1
|
||||
SUB r6, r4, r6, lsl #1 @x1r = x0r - (x1r << 1)@
|
||||
SUB r7, r5, r7, lsl #1 @x1i = x0i - (x1i << 1)
|
||||
SUB r8, r8, r11 @x2r = x2r - x3i@
|
||||
ADD r9, r9, r2 @x2i = x2i + x3r@
|
||||
ADD r10, r8, r11, lsl#1 @x3i = x2r + (x3i << 1)@
|
||||
SUB r11, r9, r2, lsl#1 @x3r = x2i - (x3r << 1)
|
||||
|
||||
STRD r10, [r12] @r10=x3r, r11=x3i
|
||||
SUB r12, r12, r0
|
||||
STRD r6, [r12] @r6=x1r, r7=x1i
|
||||
SUB r12, r12, r0
|
||||
STRD r8, [r12] @r8=x2r, r9=x2i
|
||||
SUB r12, r12, r0
|
||||
STRD r4, [r12] @r4=x0r, r5=x0i
|
||||
ADD r12, r12, r0, lsl #2
|
||||
|
||||
SUBS r1, r1, #1
|
||||
BNE LOOP_TRIVIAL_TWIDDLE
|
||||
|
||||
MOV r0, r0, ASR #3
|
||||
LDR r4, [sp, #0x1c]
|
||||
LDR r3, [sp, #0x34]
|
||||
MUL r1, r0, r4
|
||||
ADD r12, r3, #8
|
||||
STR r1, [sp, #0x24]
|
||||
MOV r3, r1, ASR #2
|
||||
ADD r3, r3, r1, ASR #3
|
||||
SUB r3, r3, r1, ASR #4
|
||||
ADD r3, r3, r1, ASR #5
|
||||
SUB r3, r3, r1, ASR #6
|
||||
ADD r3, r3, r1, ASR #7
|
||||
SUB r3, r3, r1, ASR #8
|
||||
STR r3, [sp, #-4]!
|
||||
SECOND_LOOP:
|
||||
LDR r3, [sp, #0x10+4]
|
||||
LDR r14, [sp, #0x18+4]
|
||||
MOV r0, r0, LSL #3 @(del<<1) * 4
|
||||
LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@
|
||||
LDR r2, [r3, #4] @w1l = *(twiddles + 2*j + 1)@
|
||||
LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@
|
||||
LDR r6, [r3, #4] @w2l = *(twiddles + 2*(j<<1) + 1)@
|
||||
LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@
|
||||
LDR r8, [r3, #4] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@
|
||||
|
||||
STR r4, [sp, #8+4]
|
||||
STR r1, [sp, #-4]
|
||||
STR r2, [sp, #-8]
|
||||
STR r5, [sp, #-12]
|
||||
STR r6, [sp, #-16]
|
||||
STR r7, [sp, #-20]
|
||||
STR r8, [sp, #-24]
|
||||
|
||||
RADIX4_BFLY:
|
||||
|
||||
LDRD r6, [r12, r0]! @r6=x1r, r7=x1i
|
||||
LDRD r8, [r12, r0]! @r8=x2r, r9=x2i
|
||||
LDRD r10, [r12, r0] @r10=x3r, r11=x3i
|
||||
SUBS r14, r14, #1
|
||||
|
||||
LDR r1, [sp, #-4]
|
||||
LDR r2, [sp, #-8]
|
||||
|
||||
SMULL r3, r4, r6, r2 @ixheaacd_mult32(x1r,w1l)
|
||||
LSR r3, r3, #31
|
||||
ORR r4, r3, r4, LSL#1
|
||||
SMULL r3, r6, r6, r1 @mult32x16hin32(x1r,W1h)
|
||||
LSR r3, r3, #31
|
||||
ORR r6, r3, r6, LSL#1
|
||||
SMULL r3, r5, r7, r1 @mult32x16hin32(x1i,W1h)
|
||||
LSR r3, r3, #31
|
||||
ORR r5, r3, r5, LSL#1
|
||||
SMULL r3, r7, r7, r2 @ixheaacd_mac32(ixheaacd_mult32(x1r,w1h) ,x1i,w1l)
|
||||
LSR r3, r3, #31
|
||||
ORR r7, r3, r7, LSL#1
|
||||
SUB r7, r7, r6
|
||||
ADD r6, r4, r5 @
|
||||
|
||||
LDR r1, [sp, #-12]
|
||||
LDR r2, [sp, #-16]
|
||||
|
||||
SMULL r3, r4, r8, r2 @ixheaacd_mult32(x2r,w2l)
|
||||
LSR r3, r3, #31
|
||||
ORR r4, r3, r4, LSL#1
|
||||
SMULL r3, r8, r8, r1 @mult32x16hin32(x2r,W2h)
|
||||
LSR r3, r3, #31
|
||||
ORR r8, r3, r8, LSL#1
|
||||
SMULL r3, r5, r9, r1 @mult32x16hin32(x2i,W2h)
|
||||
LSR r3, r3, #31
|
||||
ORR r5, r3, r5, LSL#1
|
||||
SMULL r3, r9, r9, r2 @ixheaacd_mac32(ixheacd_mult32(x1r,w1h) ,x1i,w1l)
|
||||
LSR r3, r3, #31
|
||||
ORR r9, r3, r9, LSL#1
|
||||
SUB r9, r9, r8
|
||||
ADD r8, r4, r5 @
|
||||
|
||||
LDR r1, [sp, #-20]
|
||||
LDR r2, [sp, #-24]
|
||||
|
||||
SMULL r3, r4, r10, r2 @ixheaacd_mult32(x3r,w3l)
|
||||
LSR r3, r3, #31
|
||||
ORR r4, r3, r4, LSL#1
|
||||
SMULL r3, r10, r10, r1 @mult32x16hin32(x3r,W3h)
|
||||
LSR r3, r3, #31
|
||||
ORR r10, r3, r10, LSL#1
|
||||
SMULL r3, r5, r11, r1 @mult32x16hin32(x3i,W3h)
|
||||
LSR r3, r3, #31
|
||||
ORR r5, r3, r5, LSL#1
|
||||
SMULL r3, r11, r11, r2 @ixheaacd_mac32(ixheacd_mult32(x3r,w3h) ,x3i,w3l)
|
||||
LSR r3, r3, #31
|
||||
ORR r11, r3, r11, LSL#1
|
||||
SUB r11, r11, r10
|
||||
ADD r10, r4, r5 @
|
||||
|
||||
@SUB r12,r12,r0,lsl #1
|
||||
@LDRD r4,[r12] @r4=x0r, r5=x0i
|
||||
LDR r4, [r12, -r0, lsl #1]! @
|
||||
LDR r5, [r12, #4]
|
||||
|
||||
|
||||
ADD r4, r8, r4 @x0r = x0r + x2r@
|
||||
ADD r5, r9, r5 @x0i = x0i + x2i@
|
||||
SUB r8, r4, r8, lsl#1 @x2r = x0r - (x2r << 1)@
|
||||
SUB r9, r5, r9, lsl#1 @x2i = x0i - (x2i << 1)@
|
||||
ADD r6, r6, r10 @x1r = x1r + x3r@
|
||||
ADD r7, r7, r11 @x1i = x1i + x3i@
|
||||
SUB r10, r6, r10, lsl#1 @x3r = x1r - (x3r << 1)@
|
||||
SUB r11, r7, r11, lsl#1 @x3i = x1i - (x3i << 1)@
|
||||
|
||||
ADD r4, r4, r6 @x0r = x0r + x1r@
|
||||
ADD r5, r5, r7 @x0i = x0i + x1i@
|
||||
SUB r6, r4, r6, lsl#1 @x1r = x0r - (x1r << 1)@
|
||||
SUB r7, r5, r7, lsl#1 @x1i = x0i - (x1i << 1)
|
||||
STRD r4, [r12] @r4=x0r, r5=x0i
|
||||
ADD r12, r12, r0
|
||||
|
||||
SUB r8, r8, r11 @x2r = x2r - x3i@
|
||||
ADD r9, r9, r10 @x2i = x2i + x3r@
|
||||
ADD r4, r8, r11, lsl#1 @x3i = x2r + (x3i << 1)@
|
||||
SUB r5, r9, r10, lsl#1 @x3r = x2i - (x3r << 1)
|
||||
|
||||
STRD r8, [r12] @r8=x2r, r9=x2i
|
||||
ADD r12, r12, r0
|
||||
STRD r6, [r12] @r6=x1r, r7=x1i
|
||||
ADD r12, r12, r0
|
||||
STRD r4, [r12] @r10=x3r, r11=x3i
|
||||
ADD r12, r12, r0
|
||||
|
||||
BNE RADIX4_BFLY
|
||||
MOV r0, r0, ASR #3
|
||||
|
||||
LDR r1, [sp, #0x2c+4]
|
||||
LDR r4, [sp, #8+4]
|
||||
SUB r1, r12, r1, LSL #3
|
||||
LDR r6, [sp, #0x1c+4]
|
||||
ADD r12, r1, #8
|
||||
LDR r7, [sp, #0]
|
||||
ADD r4, r4, r6
|
||||
CMP r4, r7
|
||||
BLE SECOND_LOOP
|
||||
|
||||
SECOND_LOOP_2:
|
||||
LDR r3, [sp, #0x10+4]
|
||||
LDR r14, [sp, #0x18+4]
|
||||
MOV r0, r0, LSL #3 @(del<<1) * 4
|
||||
|
||||
LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@
|
||||
LDR r2, [r3, #4] @w1l = *(twiddles + 2*j + 1)@
|
||||
LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@
|
||||
LDR r6, [r3, #4] @w2l = *(twiddles + 2*(j<<1) + 1)@
|
||||
SUB r3, r3, #2048 @ 512 *4
|
||||
LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@
|
||||
LDR r8, [r3, #4] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@
|
||||
|
||||
STR r4, [sp, #8+4]
|
||||
|
||||
STR r1, [sp, #-4]
|
||||
STR r2, [sp, #-8]
|
||||
STR r5, [sp, #-12]
|
||||
STR r6, [sp, #-16]
|
||||
STR r7, [sp, #-20]
|
||||
STR r8, [sp, #-24]
|
||||
|
||||
RADIX4_BFLY_2:
|
||||
LDRD r6, [r12, r0]! @r6=x1r, r7=x1i
|
||||
LDRD r8, [r12, r0]! @r8=x2r, r9=x2i
|
||||
LDRD r10, [r12, r0] @r10=x3r, r11=x3i
|
||||
SUBS r14, r14, #1
|
||||
LDR r1, [sp, #-4]
|
||||
LDR r2, [sp, #-8]
|
||||
|
||||
SMULL r3, r4, r6, r2 @ixheaacd_mult32(x1r,w1l)
|
||||
LSR r3, r3, #31
|
||||
ORR r4, r3, r4, LSL#1
|
||||
SMULL r3, r6, r6, r1 @mult32x16hin32(x1r,W1h)
|
||||
LSR r3, r3, #31
|
||||
ORR r6, r3, r6, LSL#1
|
||||
SMULL r3, r5, r7, r1 @mult32x16hin32(x1i,W1h)
|
||||
LSR r3, r3, #31
|
||||
ORR r5, r3, r5, LSL#1
|
||||
SMULL r3, r7, r7, r2 @ixheaacd_mac32(ixheaacd_mult32(x1r,w1h) ,x1i,w1l)
|
||||
LSR r3, r3, #31
|
||||
ORR r7, r3, r7, LSL#1
|
||||
SUB r7, r7, r6
|
||||
ADD r6, r4, r5 @
|
||||
|
||||
LDR r1, [sp, #-12]
|
||||
LDR r2, [sp, #-16]
|
||||
|
||||
SMULL r3, r4, r8, r2 @ixheaacd_mult32(x2r,w2l)
|
||||
LSR r3, r3, #31
|
||||
ORR r4, r3, r4, LSL#1
|
||||
SMULL r3, r8, r8, r1 @mult32x16hin32(x2r,W2h)
|
||||
LSR r3, r3, #31
|
||||
ORR r8, r3, r8, LSL#1
|
||||
SMULL r3, r5, r9, r1 @mult32x16hin32(x2i,W2h)
|
||||
LSR r3, r3, #31
|
||||
ORR r5, r3, r5, LSL#1
|
||||
SMULL r3, r9, r9, r2 @ixheaacd_mac32(ixheacd_mult32(x1r,w1h) ,x1i,w1l)
|
||||
LSR r3, r3, #31
|
||||
ORR r9, r3, r9, LSL#1
|
||||
SUB r9, r9, r8
|
||||
ADD r8, r4, r5 @
|
||||
|
||||
LDR r1, [sp, #-20]
|
||||
LDR r2, [sp, #-24]
|
||||
|
||||
SMULL r3, r4, r10, r2 @ixheaacd_mult32(x3r,w3l)
|
||||
LSR r3, r3, #31
|
||||
ORR r4, r3, r4, LSL#1
|
||||
SMULL r3, r10, r10, r1 @mult32x16hin32(x3r,W3h)
|
||||
LSR r3, r3, #31
|
||||
ORR r10, r3, r10, LSL#1
|
||||
SMULL r3, r5, r11, r1 @mult32x16hin32(x3i,W3h)
|
||||
LSR r3, r3, #31
|
||||
ORR r5, r3, r5, LSL#1
|
||||
SMULL r3, r11, r11, r2 @ixheaacd_mac32(ixheacd_mult32(x3r,w3h) ,x3i,w3l)
|
||||
LSR r3, r3, #31
|
||||
ORR r11, r3, r11, LSL#1
|
||||
SUB r10, r10, r11
|
||||
ADD r11, r5, r4 @
|
||||
|
||||
@SUB r12,r12,r0,lsl #1
|
||||
@LDRD r4,[r12] @r4=x0r, r5=x0i
|
||||
LDR r4, [r12, -r0, lsl #1]! @
|
||||
LDR r5, [r12, #4]
|
||||
|
||||
|
||||
ADD r4, r8, r4 @x0r = x0r + x2r@
|
||||
ADD r5, r9, r5 @x0i = x0i + x2i@
|
||||
SUB r8, r4, r8, lsl#1 @x2r = x0r - (x2r << 1)@
|
||||
SUB r9, r5, r9, lsl#1 @x2i = x0i - (x2i << 1)@
|
||||
ADD r6, r6, r10 @x1r = x1r + x3r@
|
||||
ADD r7, r7, r11 @x1i = x1i + x3i@
|
||||
SUB r10, r6, r10, lsl#1 @x3r = x1r - (x3r << 1)@
|
||||
SUB r11, r7, r11, lsl#1 @x3i = x1i - (x3i << 1)@
|
||||
|
||||
ADD r4, r4, r6 @x0r = x0r + x1r@
|
||||
ADD r5, r5, r7 @x0i = x0i + x1i@
|
||||
SUB r6, r4, r6, lsl#1 @x1r = x0r - (x1r << 1)@
|
||||
SUB r7, r5, r7, lsl#1 @x1i = x0i - (x1i << 1)
|
||||
STRD r4, [r12] @r4=x0r, r5=x0i
|
||||
ADD r12, r12, r0
|
||||
|
||||
SUB r8, r8, r11 @x2r = x2r - x3i@
|
||||
ADD r9, r9, r10 @x2i = x2i + x3r@
|
||||
ADD r4, r8, r11, lsl#1 @x3i = x2r + (x3i << 1)@
|
||||
SUB r5, r9, r10, lsl#1 @x3r = x2i - (x3r << 1)
|
||||
|
||||
STRD r8, [r12] @r8=x2r, r9=x2i
|
||||
ADD r12, r12, r0
|
||||
STRD r6, [r12] @r6=x1r, r7=x1i
|
||||
ADD r12, r12, r0
|
||||
STRD r4, [r12] @r10=x3r, r11=x3i
|
||||
ADD r12, r12, r0
|
||||
|
||||
BNE RADIX4_BFLY_2
|
||||
MOV r0, r0, ASR #3
|
||||
|
||||
LDR r1, [sp, #0x2c+4]
|
||||
LDR r4, [sp, #8+4]
|
||||
SUB r1, r12, r1, LSL #3
|
||||
LDR r6, [sp, #0x1c+4]
|
||||
ADD r12, r1, #8
|
||||
LDR r7, [sp, #0x24+4]
|
||||
ADD r4, r4, r6
|
||||
CMP r4, r7, ASR #1
|
||||
BLE SECOND_LOOP_2
|
||||
LDR r7, [sp, #0]
|
||||
CMP r4, r7, LSL #1
|
||||
BGT SECOND_LOOP_4
|
||||
|
||||
SECOND_LOOP_3:
|
||||
LDR r3, [sp, #0x10+4]
|
||||
LDR r14, [sp, #0x18+4]
|
||||
MOV r0, r0, LSL #3 @(del<<1) * 4
|
||||
|
||||
LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@
|
||||
LDR r2, [r3, #4] @w1l = *(twiddles + 2*j + 1)@
|
||||
SUB r3, r3, #2048 @ 512 *4
|
||||
LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@
|
||||
LDR r6, [r3, #4] @w2l = *(twiddles + 2*(j<<1) + 1)@
|
||||
LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@
|
||||
LDR r8, [r3, #4] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@
|
||||
|
||||
STR r4, [sp, #8+4]
|
||||
STR r1, [sp, #-4]
|
||||
STR r2, [sp, #-8]
|
||||
STR r5, [sp, #-12]
|
||||
STR r6, [sp, #-16]
|
||||
STR r7, [sp, #-20]
|
||||
STR r8, [sp, #-24]
|
||||
|
||||
|
||||
RADIX4_BFLY_3:
|
||||
LDRD r6, [r12, r0]! @r6=x1r, r7=x1i
|
||||
LDRD r8, [r12, r0]! @r8=x2r, r9=x2i
|
||||
LDRD r10, [r12, r0] @r10=x3r, r11=x3i
|
||||
SUBS r14, r14, #1
|
||||
|
||||
LDR r1, [sp, #-4]
|
||||
LDR r2, [sp, #-8]
|
||||
|
||||
SMULL r3, r4, r6, r2 @ixheaacd_mult32(x1r,w1l)
|
||||
LSR r3, r3, #31
|
||||
ORR r4, r3, r4, LSL#1
|
||||
SMULL r3, r6, r6, r1 @mult32x16hin32(x1r,W1h)
|
||||
LSR r3, r3, #31
|
||||
ORR r6, r3, r6, LSL#1
|
||||
SMULL r3, r5, r7, r1 @mult32x16hin32(x1i,W1h)
|
||||
LSR r3, r3, #31
|
||||
ORR r5, r3, r5, LSL#1
|
||||
SMULL r3, r7, r7, r2 @ixheaacd_mac32(ixheaacd_mult32(x1r,w1h) ,x1i,w1l)
|
||||
LSR r3, r3, #31
|
||||
ORR r7, r3, r7, LSL#1
|
||||
SUB r7, r7, r6
|
||||
ADD r6, r4, r5 @
|
||||
|
||||
LDR r1, [sp, #-12]
|
||||
LDR r2, [sp, #-16]
|
||||
|
||||
SMULL r3, r4, r8, r2 @ixheaacd_mult32(x2r,w2l)
|
||||
LSR r3, r3, #31
|
||||
ORR r4, r3, r4, LSL#1
|
||||
SMULL r3, r8, r8, r1 @mult32x16hin32(x2r,W2h)
|
||||
LSR r3, r3, #31
|
||||
ORR r8, r3, r8, LSL#1
|
||||
SMULL r3, r5, r9, r1 @mult32x16hin32(x2i,W2h)
|
||||
LSR r3, r3, #31
|
||||
ORR r5, r3, r5, LSL#1
|
||||
SMULL r3, r9, r9, r2 @ixheaacd_mac32(ixheacd_mult32(x1r,w1h) ,x1i,w1l)
|
||||
LSR r3, r3, #31
|
||||
ORR r9, r3, r9, LSL#1
|
||||
SUB r8, r8, r9
|
||||
ADD r9, r5, r4 @
|
||||
|
||||
LDR r1, [sp, #-20]
|
||||
LDR r2, [sp, #-24]
|
||||
|
||||
SMULL r3, r4, r10, r2 @ixheaacd_mult32(x3r,w3l)
|
||||
LSR r3, r3, #31
|
||||
ORR r4, r3, r4, LSL#1
|
||||
SMULL r3, r10, r10, r1 @mult32x16hin32(x3r,W3h)
|
||||
LSR r3, r3, #31
|
||||
ORR r10, r3, r10, LSL#1
|
||||
SMULL r3, r5, r11, r1 @mult32x16hin32(x3i,W3h)
|
||||
LSR r3, r3, #31
|
||||
ORR r5, r3, r5, LSL#1
|
||||
SMULL r3, r11, r11, r2 @ixheaacd_mac32(ixheacd_mult32(x3r,w3h) ,x3i,w3l)
|
||||
LSR r3, r3, #31
|
||||
ORR r11, r3, r11, LSL#1
|
||||
SUB r10, r10, r11
|
||||
ADD r11, r5, r4 @
|
||||
|
||||
@SUB r12,r12,r0,lsl #1
|
||||
@LDRD r4,[r12] @r4=x0r, r5=x0i
|
||||
LDR r4, [r12, -r0, lsl #1]! @
|
||||
LDR r5, [r12, #4]
|
||||
|
||||
|
||||
ADD r4, r8, r4 @x0r = x0r + x2r@
|
||||
ADD r5, r9, r5 @x0i = x0i + x2i@
|
||||
SUB r8, r4, r8, lsl#1 @x2r = x0r - (x2r << 1)@
|
||||
SUB r9, r5, r9, lsl#1 @x2i = x0i - (x2i << 1)@
|
||||
ADD r6, r6, r10 @x1r = x1r + x3r@
|
||||
ADD r7, r7, r11 @x1i = x1i + x3i@
|
||||
SUB r10, r6, r10, lsl#1 @x3r = x1r - (x3r << 1)@
|
||||
SUB r11, r7, r11, lsl#1 @x3i = x1i - (x3i << 1)@
|
||||
|
||||
ADD r4, r4, r6 @x0r = x0r + x1r@
|
||||
ADD r5, r5, r7 @x0i = x0i + x1i@
|
||||
SUB r6, r4, r6, lsl#1 @x1r = x0r - (x1r << 1)@
|
||||
SUB r7, r5, r7, lsl#1 @x1i = x0i - (x1i << 1)
|
||||
STRD r4, [r12] @r4=x0r, r5=x0i
|
||||
ADD r12, r12, r0
|
||||
|
||||
SUB r8, r8, r11 @x2r = x2r - x3i@
|
||||
ADD r9, r9, r10 @x2i = x2i + x3r@
|
||||
ADD r4, r8, r11, lsl#1 @x3i = x2r + (x3i << 1)@
|
||||
SUB r5, r9, r10, lsl#1 @x3r = x2i - (x3r << 1)
|
||||
|
||||
STRD r8, [r12] @r8=x2r, r9=x2i
|
||||
ADD r12, r12, r0
|
||||
STRD r6, [r12] @r6=x1r, r7=x1i
|
||||
ADD r12, r12, r0
|
||||
STRD r4, [r12] @r10=x3r, r11=x3i
|
||||
ADD r12, r12, r0
|
||||
|
||||
BNE RADIX4_BFLY_3
|
||||
MOV r0, r0, ASR #3
|
||||
|
||||
LDR r1, [sp, #0x2c+4]
|
||||
LDR r4, [sp, #8+4]
|
||||
SUB r1, r12, r1, LSL #3
|
||||
LDR r6, [sp, #0x1c+4]
|
||||
ADD r12, r1, #8
|
||||
LDR r7, [sp, #0]
|
||||
ADD r4, r4, r6
|
||||
CMP r4, r7, LSL #1
|
||||
BLE SECOND_LOOP_3
|
||||
|
||||
SECOND_LOOP_4:
|
||||
LDR r3, [sp, #0x10+4]
|
||||
LDR r14, [sp, #0x18+4]
|
||||
MOV r0, r0, LSL #3 @(del<<1) * 4
|
||||
|
||||
LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@
|
||||
LDR r2, [r3, #4] @w1l = *(twiddles + 2*j + 1)@
|
||||
SUB r3, r3, #2048 @ 512 *4
|
||||
LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@
|
||||
LDR r6, [r3, #4] @w2l = *(twiddles + 2*(j<<1) + 1)@
|
||||
SUB r3, r3, #2048 @ 512 *4
|
||||
LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@
|
||||
LDR r8, [r3, #4] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@
|
||||
|
||||
|
||||
STR r4, [sp, #8+4]
|
||||
STR r1, [sp, #-4]
|
||||
STR r2, [sp, #-8]
|
||||
STR r5, [sp, #-12]
|
||||
STR r6, [sp, #-16]
|
||||
STR r7, [sp, #-20]
|
||||
STR r8, [sp, #-24]
|
||||
|
||||
RADIX4_BFLY_4:
|
||||
LDRD r6, [r12, r0]! @r6=x1r, r7=x1i
|
||||
LDRD r8, [r12, r0]! @r8=x2r, r9=x2i
|
||||
LDRD r10, [r12, r0] @r10=x3r, r11=x3i
|
||||
SUBS r14, r14, #1
|
||||
|
||||
LDR r1, [sp, #-4]
|
||||
LDR r2, [sp, #-8]
|
||||
|
||||
SMULL r3, r4, r6, r2 @ixheaacd_mult32(x1r,w1l)
|
||||
LSR r3, r3, #31
|
||||
ORR r4, r3, r4, LSL#1
|
||||
SMULL r3, r6, r6, r1 @mult32x16hin32(x1r,W1h)
|
||||
LSR r3, r3, #31
|
||||
ORR r6, r3, r6, LSL#1
|
||||
SMULL r3, r5, r7, r1 @mult32x16hin32(x1i,W1h)
|
||||
LSR r3, r3, #31
|
||||
ORR r5, r3, r5, LSL#1
|
||||
SMULL r3, r7, r7, r2 @ixheaacd_mac32(ixheaacd_mult32(x1r,w1h) ,x1i,w1l)
|
||||
LSR r3, r3, #31
|
||||
ORR r7, r3, r7, LSL#1
|
||||
SUB r7, r7, r6
|
||||
ADD r6, r4, r5 @
|
||||
|
||||
LDR r1, [sp, #-12]
|
||||
LDR r2, [sp, #-16]
|
||||
|
||||
SMULL r3, r4, r8, r2 @ixheaacd_mult32(x2r,w2l)
|
||||
LSR r3, r3, #31
|
||||
ORR r4, r3, r4, LSL#1
|
||||
SMULL r3, r8, r8, r1 @mult32x16hin32(x2r,W2h)
|
||||
LSR r3, r3, #31
|
||||
ORR r8, r3, r8, LSL#1
|
||||
SMULL r3, r5, r9, r1 @mult32x16hin32(x2i,W2h)
|
||||
LSR r3, r3, #31
|
||||
ORR r5, r3, r5, LSL#1
|
||||
SMULL r3, r9, r9, r2 @ixheaacd_mac32(ixheacd_mult32(x1r,w1h) ,x1i,w1l)
|
||||
LSR r3, r3, #31
|
||||
ORR r9, r3, r9, LSL#1
|
||||
SUB r8, r8, r9
|
||||
ADD r9, r5, r4 @
|
||||
|
||||
LDR r1, [sp, #-20]
|
||||
LDR r2, [sp, #-24]
|
||||
|
||||
SMULL r3, r4, r10, r2 @ixheaacd_mult32(x3r,w3l)
|
||||
LSR r3, r3, #31
|
||||
ORR r4, r3, r4, LSL#1
|
||||
SMULL r3, r10, r10, r1 @mult32x16hin32(x3r,W3h)
|
||||
LSR r3, r3, #31
|
||||
ORR r10, r3, r10, LSL#1
|
||||
SMULL r3, r5, r11, r1 @mult32x16hin32(x3i,W3h)
|
||||
LSR r3, r3, #31
|
||||
ORR r5, r3, r5, LSL#1
|
||||
SMULL r3, r11, r11, r2 @ixheaacd_mac32(ixheacd_mult32(x3r,w3h) ,x3i,w3l)
|
||||
LSR r3, r3, #31
|
||||
ORR r11, r3, r11, LSL#1
|
||||
SUB r11, r11, r10
|
||||
ADD r10, r5, r4 @
|
||||
RSB r10, r10, #0
|
||||
|
||||
@SUB r12,r12,r0,lsl #1
|
||||
@LDRD r4,[r12] @r4=x0r, r5=x0i
|
||||
LDR r4, [r12, -r0, lsl #1]! @
|
||||
LDR r5, [r12, #4]
|
||||
|
||||
|
||||
ADD r4, r8, r4 @x0r = x0r + x2r@
|
||||
ADD r5, r9, r5 @x0i = x0i + x2i@
|
||||
SUB r8, r4, r8, lsl#1 @x2r = x0r - (x2r << 1)@
|
||||
SUB r9, r5, r9, lsl#1 @x2i = x0i - (x2i << 1)@
|
||||
ADD r6, r6, r10 @x1r = x1r + x3r@
|
||||
SUB r7, r7, r11 @x1i = x1i - x3i@
|
||||
SUB r10, r6, r10, lsl#1 @x3r = x1r - (x3r << 1)@
|
||||
ADD r11, r7, r11, lsl#1 @x3i = x1i + (x3i << 1)@
|
||||
|
||||
ADD r4, r4, r6 @x0r = x0r + x1r@
|
||||
ADD r5, r5, r7 @x0i = x0i + x1i@
|
||||
SUB r6, r4, r6, lsl#1 @x1r = x0r - (x1r << 1)@
|
||||
SUB r7, r5, r7, lsl#1 @x1i = x0i - (x1i << 1)
|
||||
STRD r4, [r12] @r4=x0r, r5=x0i
|
||||
ADD r12, r12, r0
|
||||
|
||||
SUB r8, r8, r11 @x2r = x2r - x3i@
|
||||
ADD r9, r9, r10 @x2i = x2i + x3r@
|
||||
ADD r4, r8, r11, lsl#1 @x3i = x2r + (x3i << 1)@
|
||||
SUB r5, r9, r10, lsl#1 @x3r = x2i - (x3r << 1)
|
||||
|
||||
STRD r8, [r12] @r8=x2r, r9=x2i
|
||||
ADD r12, r12, r0
|
||||
STRD r6, [r12] @r6=x1r, r7=x1i
|
||||
ADD r12, r12, r0
|
||||
STRD r4, [r12] @r10=x3r, r11=x3i
|
||||
ADD r12, r12, r0
|
||||
|
||||
BNE RADIX4_BFLY_4
|
||||
MOV r0, r0, ASR #3
|
||||
|
||||
LDR r1, [sp, #0x2c+4]
|
||||
LDR r4, [sp, #8+4]
|
||||
SUB r1, r12, r1, LSL #3
|
||||
LDR r6, [sp, #0x1c+4]
|
||||
ADD r12, r1, #8
|
||||
LDR r7, [sp, #0x24+4]
|
||||
ADD r4, r4, r6
|
||||
CMP r4, r7
|
||||
BLT SECOND_LOOP_4
|
||||
ADD sp, sp, #4
|
||||
|
||||
LDR r1, [sp, #0x1c]
|
||||
MOV r0, r0, LSL #2
|
||||
MOV r1, r1, ASR #2
|
||||
STR r1, [sp, #0x1c]
|
||||
LDR r1, [sp, #0x18]
|
||||
MOV r1, r1, ASR #2
|
||||
STR r1, [sp, #0x18]
|
||||
LDR r1, [sp, #0x20]
|
||||
SUBS r1, r1, #1
|
||||
STR r1, [sp, #0x20]
|
||||
BGT OUTER_LOOP
|
||||
|
||||
RADIX2:
|
||||
LDR r1, [sp, #0x14]
|
||||
CMP r1, #0
|
||||
BEQ EXIT
|
||||
LDR r12, [sp, #0x1c]
|
||||
LDR r1, [sp, #0x28]
|
||||
CMP r12, #0
|
||||
LDRNE r12, [sp, #0x1c]
|
||||
MOVEQ r4, #1
|
||||
MOVNE r4, r12, LSL #1
|
||||
MOVS r3, r0
|
||||
BEQ EXIT
|
||||
|
||||
MOV r3, r3, ASR #1
|
||||
LDR r5, [sp, #0x34]
|
||||
MOV r0, r0, LSL #3 @(del<<1) * 4
|
||||
STR r1, [sp, #-4]
|
||||
RADIX2_BFLY:
|
||||
LDR r1, [sp, #-4]
|
||||
LDRD r6, [r5] @r6 = x0r
|
||||
ADD r5, r5, r0
|
||||
LDRD r8, [r5] @r8 = x1r
|
||||
|
||||
LDR r2, [r1]
|
||||
SUBS r3, r3, #1
|
||||
|
||||
|
||||
SMULL r1, r11, r8, r2 @mult32x16hin32(x1r,W1h)
|
||||
LSR r1, r1, #31
|
||||
ORR r11, r1, r11, LSL#1
|
||||
SMULL r1, r10, r9, r2 @mult32x16hin32(x1i,W1h)
|
||||
LSR r1, r1, #31
|
||||
ORR r10, r1, r10, LSL#1
|
||||
|
||||
|
||||
LDR r1, [sp, #-4]
|
||||
LDR r2, [r1, #4]
|
||||
ADD r1, r1, r4, LSL #3
|
||||
STR r1, [sp, #-4]
|
||||
|
||||
SMULL r1, r8, r8, r2 @ixheaacd_mult32(x1r,w1l)
|
||||
LSR r1, r1, #31
|
||||
ORR r8, r1, r8, LSL#1
|
||||
SMULL r1, r9, r9, r2 @ixheaacd_mac32(ixheacd_mult32(x1r,w1h) ,x1i,w1l)
|
||||
LSR r1, r1, #31
|
||||
ORR r9, r1, r9, LSL#1
|
||||
|
||||
ADD r8, r8, r10
|
||||
SUB r9, r9, r11
|
||||
|
||||
ASR r8, r8, #1
|
||||
ASR r6, r6, #1
|
||||
ASR r9, r9, #1
|
||||
ASR r7, r7, #1
|
||||
ADD r10, r8, r6 @(x0r/2) + (x1r/2)
|
||||
ADD r11, r9, r7 @(x0i/2) + (x1i/2)@
|
||||
SUB r8, r6, r8 @(x0r/2) - (x1r/2)
|
||||
SUB r9, r7, r9 @(x0i/2) - (x1i/2)@
|
||||
|
||||
STRD r8, [r5]
|
||||
SUB r5, r5, r0
|
||||
STRD r10, [r5], #8
|
||||
|
||||
BNE RADIX2_BFLY
|
||||
|
||||
LDR r1, [sp, #0x28]
|
||||
MOV r3, r0, ASR #4
|
||||
STR r1, [sp, #-4]
|
||||
RADIX2_BFLY_2:
|
||||
LDR r1, [sp, #-4]
|
||||
LDRD r6, [r5] @r6 = x0r
|
||||
ADD r5, r5, r0
|
||||
LDRD r8, [r5] @r8 = x1r
|
||||
|
||||
LDR r2, [r1]
|
||||
SUBS r3, r3, #1
|
||||
|
||||
|
||||
|
||||
SMULL r1, r11, r8, r2 @mult32x16hin32(x1r,W1h)
|
||||
LSR r1, r1, #31
|
||||
ORR r11, r1, r11, LSL#1
|
||||
SMULL r1, r10, r9, r2 @mult32x16hin32(x1i,W1h)
|
||||
LSR r1, r1, #31
|
||||
ORR r10, r1, r10, LSL#1
|
||||
|
||||
|
||||
LDR r1, [sp, #-4]
|
||||
LDR r2, [r1, #4]
|
||||
ADD r1, r1, r4, LSL #3
|
||||
STR r1, [sp, #-4]
|
||||
|
||||
SMULL r1, r8, r8, r2 @ixheaacd_mult32(x1r,w1l)
|
||||
LSR r1, r1, #31
|
||||
ORR r8, r1, r8, LSL#1
|
||||
SMULL r1, r9, r9, r2 @ixheaacd_mac32(ixheacd_mult32(x1r,w1h) ,x1i,w1l)
|
||||
LSR r1, r1, #31
|
||||
ORR r9, r1, r9, LSL#1
|
||||
|
||||
SUB r11, r11, r9
|
||||
ADD r9, r10, r8 @
|
||||
MOV r8, r11
|
||||
|
||||
ASR r8, r8, #1
|
||||
ASR r6, r6, #1
|
||||
ASR r9, r9, #1
|
||||
ASR r7, r7, #1
|
||||
ADD r10, r8, r6 @(x0r>>1) + (x1r)
|
||||
ADD r11, r9, r7 @(x0i>>1) + (x1i)@
|
||||
SUB r8, r6, r8 @(x0r>>1) - (x1r)
|
||||
SUB r9, r7, r9 @(x0i>>1) - (x1i)@
|
||||
|
||||
STRD r8, [r5]
|
||||
SUB r5, r5, r0
|
||||
STRD r10, [r5], #8
|
||||
|
||||
BNE RADIX2_BFLY_2
|
||||
|
||||
EXIT:
|
||||
ADD sp, sp, #0x38
|
||||
LDMFD sp!, {r4-r12, pc}
|
||||
|
||||
132
decoder/armv7/ixheaacd_conv_ergtoamplitude.s
Normal file
132
decoder/armv7/ixheaacd_conv_ergtoamplitude.s
Normal file
|
|
@ -0,0 +1,132 @@
|
|||
@/******************************************************************************
|
||||
@ *
|
||||
@ * Copyright (C) 2018 The Android Open Source Project
|
||||
@ *
|
||||
@ * Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ * you may not use this file except in compliance with the License.
|
||||
@ * You may obtain a copy of the License at:
|
||||
@ *
|
||||
@ * http://www.apache.org/licenses/LICENSE-2.0
|
||||
@ *
|
||||
@ * Unless required by applicable law or agreed to in writing, software
|
||||
@ * distributed under the License is distributed on an "AS IS" BASIS,
|
||||
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
@ * See the License for the specific language governing permissions and
|
||||
@ * limitations under the License.
|
||||
@ *
|
||||
@ *****************************************************************************
|
||||
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
@*/
|
||||
|
||||
|
||||
.text
|
||||
.p2align 2
|
||||
.global ixheaacd_conv_ergtoamplitude_armv7
|
||||
ixheaacd_conv_ergtoamplitude_armv7:
|
||||
|
||||
STMFD sp!, {r4-r12, r14}
|
||||
|
||||
LDR R5, [SP, #44]
|
||||
LDR R4, [SP, #40]
|
||||
LDR R14, =0x1FF
|
||||
LDR R10, =0x5A82
|
||||
|
||||
LOOP1:
|
||||
|
||||
LDRSH R6, [R2], #2
|
||||
LDRSH R7, [R2], #2
|
||||
MOV R12, #0
|
||||
MOV R9, #-16
|
||||
MOVS R6, R6
|
||||
BLE ENDIF1_1
|
||||
CLZ R8, R6
|
||||
SUB R8, R8, #17
|
||||
SUB R7, R7, R8
|
||||
MOV R11, R6, LSL R8
|
||||
|
||||
MOV R11, R11, ASR #5
|
||||
ANDS R11, R11, R14
|
||||
|
||||
BIC R11, R11, #1
|
||||
LDRH R12, [R11, R5]
|
||||
|
||||
TST R7, #1
|
||||
ADDNE R7, R7, #3
|
||||
SMULWBNE R12, R12, R10
|
||||
|
||||
|
||||
MOV R9, R7, ASR #1
|
||||
ENDIF1_1:
|
||||
STRH R12, [R2, #-4]
|
||||
STRH R9, [R2, #-2]
|
||||
|
||||
|
||||
LDRSH R6, [R3], #2
|
||||
LDRSH R7, [R3], #2
|
||||
MOV R8, #0
|
||||
MOV R9, #-16
|
||||
MOVS R6, R6
|
||||
BLE ENDIF1_2
|
||||
CLZ R8, R6
|
||||
SUB R8, R8, #17
|
||||
SUB R7, R7, R8
|
||||
MOV R11, R6, LSL R8
|
||||
MOV R11, R11, ASR #5
|
||||
ANDS R11, R11, R14
|
||||
|
||||
BIC R11, R11, #1
|
||||
LDRH R8, [R11, R5]
|
||||
TST R7, #1
|
||||
ADDNE R7, R7, #3
|
||||
|
||||
SMULWBNE R8, R8, R10
|
||||
|
||||
|
||||
MOV R9, R7, ASR #1
|
||||
ENDIF1_2:
|
||||
STRH R8, [R3, #-4]
|
||||
STRH R9, [R3, #-2]
|
||||
|
||||
|
||||
|
||||
LDRSH R6, [R4], #2
|
||||
LDRSH R7, [R4], #2
|
||||
MOV R8, #0
|
||||
MOV R9, #-16
|
||||
MOVS R6, R6
|
||||
BLE ENDIF1_3
|
||||
CLZ R8, R6
|
||||
SUB R8, R8, #17
|
||||
SUB R7, R7, R8
|
||||
MOV R11, R6, LSL R8
|
||||
MOV R11, R11, ASR #5
|
||||
ANDS R11, R11, R14
|
||||
|
||||
BIC R11, R11, #1
|
||||
LDRH R8, [R11, R5]
|
||||
TST R7, #1
|
||||
ADDNE R7, R7, #3
|
||||
|
||||
SMULWBNE R8, R8, R10
|
||||
|
||||
|
||||
MOV R9, R7, ASR #1
|
||||
ENDIF1_3:
|
||||
STRH R9, [R4, #-2]
|
||||
|
||||
SUB R6, R1, R9
|
||||
SUBS R6, R6, #4
|
||||
|
||||
RSBLE R6, R6, #0
|
||||
MOVGT R8, R8, ASR R6
|
||||
MOVLE R8, R8, LSL R6
|
||||
STRH R8, [R4, #-4]
|
||||
|
||||
SUBS R0, R0, #1
|
||||
BGT LOOP1
|
||||
LDMFD sp!, {r4-r12, r15}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
148
decoder/armv7/ixheaacd_conv_ergtoamplitudelp.s
Normal file
148
decoder/armv7/ixheaacd_conv_ergtoamplitudelp.s
Normal file
|
|
@ -0,0 +1,148 @@
|
|||
@/******************************************************************************
|
||||
@ *
|
||||
@ * Copyright (C) 2018 The Android Open Source Project
|
||||
@ *
|
||||
@ * Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ * you may not use this file except in compliance with the License.
|
||||
@ * You may obtain a copy of the License at:
|
||||
@ *
|
||||
@ * http://www.apache.org/licenses/LICENSE-2.0
|
||||
@ *
|
||||
@ * Unless required by applicable law or agreed to in writing, software
|
||||
@ * distributed under the License is distributed on an "AS IS" BASIS,
|
||||
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
@ * See the License for the specific language governing permissions and
|
||||
@ * limitations under the License.
|
||||
@ *
|
||||
@ *****************************************************************************
|
||||
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
@*/
|
||||
|
||||
|
||||
.text
|
||||
.p2align 2
|
||||
.global ixheaacd_conv_ergtoamplitudelp_armv7
|
||||
ixheaacd_conv_ergtoamplitudelp_armv7:
|
||||
|
||||
STMFD sp!, {r4-r12, r14}
|
||||
|
||||
LDR R5, [SP, #44]
|
||||
LDR R4, [SP, #40]
|
||||
LDR R11, =0x5A82
|
||||
LDR R10, =0x1FF
|
||||
|
||||
LOOP1:
|
||||
|
||||
LDRSH R6, [R2, #0]
|
||||
MOV R12, #0
|
||||
MOV R14, #-16
|
||||
MOVS R6, R6
|
||||
BLE ENDIF1_1
|
||||
|
||||
LDRSH R7, [R2, #2]
|
||||
CLZ R8, R6
|
||||
SUB R8, R8, #17
|
||||
SUB R7, R7, R8
|
||||
MOV R6, R6, LSL R8
|
||||
MOV R6, R6, ASR #5
|
||||
AND R6, R6, R10
|
||||
|
||||
TST R7, #1
|
||||
BIC R6, R6, #1
|
||||
LDRH R12, [R6, R5]
|
||||
ADDNE R7, R7, #3
|
||||
MOV R14, R7, ASR #1
|
||||
SMULWBNE R12, R12, R11
|
||||
|
||||
ENDIF1_1:
|
||||
STRH R14, [R2, #2]
|
||||
|
||||
|
||||
LDRSH R6, [R3, #0]
|
||||
MOV R8, #0
|
||||
MOV R9, #-16
|
||||
|
||||
MOVS R6, R6
|
||||
BLE ENDIF1_2
|
||||
|
||||
LDRSH R7, [R3, #2]
|
||||
CLZ R8, R6
|
||||
SUB R8, R8, #17
|
||||
SUB R7, R7, R8
|
||||
MOV R6, R6, LSL R8
|
||||
MOV R6, R6, ASR #5
|
||||
AND R6, R6, R10
|
||||
|
||||
TST R7, #1
|
||||
BIC R6, R6, #1
|
||||
LDRH R8, [R6, R5]
|
||||
ADDNE R7, R7, #3
|
||||
MOV R9, R7, ASR #1
|
||||
SMULWBNE R8, R8, R11
|
||||
|
||||
ENDIF1_2:
|
||||
STRH R9, [R3, #2]
|
||||
STRH R8, [R3], #4
|
||||
|
||||
|
||||
|
||||
|
||||
LDRSH R6, [R4, #0]
|
||||
MOV R8, #0
|
||||
MOV R9, #-16
|
||||
MOVS R6, R6
|
||||
BLE ENDIF1_3
|
||||
|
||||
LDRSH R7, [R4, #2]
|
||||
CLZ R8, R6
|
||||
SUB R8, R8, #17
|
||||
SUB R7, R7, R8
|
||||
MOV R6, R6, LSL R8
|
||||
MOV R6, R6, ASR #5
|
||||
ANDS R6, R6, R10
|
||||
|
||||
TST R7, #1
|
||||
BIC R6, R6, #1
|
||||
LDRH R8, [R6, R5]
|
||||
ADDNE R7, R7, #3
|
||||
MOV R9, R7, ASR #1
|
||||
SMULWBNE R8, R8, R11
|
||||
|
||||
ENDIF1_3:
|
||||
STRH R9, [R4, #2]
|
||||
|
||||
SUB R6, R1, R9
|
||||
SUBS R6, R6, #4
|
||||
|
||||
RSBLE R6, R6, #0
|
||||
MOVGT R8, R8, ASR R6
|
||||
MOVLE R8, R8, LSL R6
|
||||
STRH R8, [R4], #4
|
||||
|
||||
|
||||
SUBS R6, R14, R1
|
||||
BLE ELSE1
|
||||
|
||||
CMP R6, #15
|
||||
MOVGT R6, #15
|
||||
MOV R12, R12, LSL R6
|
||||
CMP R12, #0x8000
|
||||
MVNGE R12, #0x8000
|
||||
CMNLT R12, #0x00008000
|
||||
MOVLT R12, #0x00008000
|
||||
STRH R12, [R2], #4
|
||||
SUBS R0, R0, #1
|
||||
BGT LOOP1
|
||||
|
||||
ELSE1:
|
||||
RSB R6, R6, #0
|
||||
MOV R12, R12, ASR R6
|
||||
STRH R12, [R2], #4
|
||||
SUBS R0, R0, #1
|
||||
BGT LOOP1
|
||||
LDMFD sp!, {r4-r12, r15}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
472
decoder/armv7/ixheaacd_cos_sin_mod.s
Normal file
472
decoder/armv7/ixheaacd_cos_sin_mod.s
Normal file
|
|
@ -0,0 +1,472 @@
|
|||
@/******************************************************************************
|
||||
@ *
|
||||
@ * Copyright (C) 2018 The Android Open Source Project
|
||||
@ *
|
||||
@ * Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ * you may not use this file except in compliance with the License.
|
||||
@ * You may obtain a copy of the License at:
|
||||
@ *
|
||||
@ * http://www.apache.org/licenses/LICENSE-2.0
|
||||
@ *
|
||||
@ * Unless required by applicable law or agreed to in writing, software
|
||||
@ * distributed under the License is distributed on an "AS IS" BASIS,
|
||||
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
@ * See the License for the specific language governing permissions and
|
||||
@ * limitations under the License.
|
||||
@ *
|
||||
@ *****************************************************************************
|
||||
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
@*/
|
||||
|
||||
|
||||
.text
|
||||
.p2align 2
|
||||
.extern ixheaacd_radix4bfly
|
||||
.hidden ixheaacd_radix4bfly
|
||||
.extern ixheaacd_postradixcompute2
|
||||
.hidden ixheaacd_postradixcompute2
|
||||
.extern ixheaacd_postradixcompute4
|
||||
.hidden ixheaacd_postradixcompute4
|
||||
|
||||
|
||||
|
||||
|
||||
.extern ixheaacd_sbr_imdct_using_fft
|
||||
.hidden ixheaacd_sbr_imdct_using_fft
|
||||
|
||||
|
||||
.global ixheaacd_cos_sin_mod
|
||||
ixheaacd_cos_sin_mod:
|
||||
STMFD SP!, {R4-R12, R14}
|
||||
|
||||
LDR R5, [R1]
|
||||
MOV R7, R5, ASR #1
|
||||
LDR R4, [R1, #12]
|
||||
MOV R5, R7, ASR #2
|
||||
|
||||
MOV R8, R0
|
||||
MOV R6, R7, LSL #3
|
||||
|
||||
|
||||
SUB R10, SP, #516
|
||||
SUB SP, SP, #516
|
||||
|
||||
AND R12, R10, #7
|
||||
CMP R12, #0
|
||||
ADDNE R10, R10, #4
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
STMFD SP!, {R0-R3}
|
||||
|
||||
SUB R6, R6, #4
|
||||
ADD R9, R0, R6
|
||||
|
||||
LDR R2, [R4], #4
|
||||
LDR R1, [R9], #-4
|
||||
LDR R0, [R8], #4
|
||||
ADD R11, R10, R6
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
LOOP1:
|
||||
|
||||
SUBS R5, R5, #1
|
||||
|
||||
SMULWT R12, R1, R2
|
||||
SMULWB R6, R0, R2
|
||||
SMULWT R14, R0, R2
|
||||
|
||||
|
||||
LDR R0, [R8, #0xFC]
|
||||
|
||||
QSUB R12, R12, R6
|
||||
|
||||
SMLAWB R14, R1, R2, R14
|
||||
|
||||
LDR R1, [R9, #0x104]
|
||||
|
||||
STR R12, [R10, #4]
|
||||
STR R14, [R10], #8
|
||||
|
||||
SMULWT R6, R0, R2
|
||||
SMULWB R12, R1, R2
|
||||
SMULWT R14, R1, R2
|
||||
|
||||
LDR R1, [R8], #4
|
||||
|
||||
QSUB R12, R12, R6
|
||||
|
||||
SMLAWB R14, R0, R2, R14
|
||||
|
||||
LDR R2, [R4], #4
|
||||
LDR R0, [R9], #-4
|
||||
|
||||
STR R12, [R10, #0xF8]
|
||||
STR R14, [R10, #0xFC]
|
||||
|
||||
SMULWT R3, R1, R2
|
||||
SMULWB R6, R0, R2
|
||||
SMULWT R12, R0, R2
|
||||
|
||||
LDR R0, [R9, #0x104]
|
||||
|
||||
QSUB R3, R3, R6
|
||||
|
||||
SMLAWB R12, R1, R2, R12
|
||||
|
||||
LDR R1, [R8, #0xFC]
|
||||
|
||||
STR R12, [R11, #-4]
|
||||
STR R3, [R11], #-8
|
||||
|
||||
SMULWT R6, R0, R2
|
||||
SMULWB R14, R1, R2
|
||||
SMULWT R12, R1, R2
|
||||
|
||||
LDR R1, [R9], #-4
|
||||
|
||||
QSUB R14, R14, R6
|
||||
|
||||
SMLAWB R3, R0, R2, R12
|
||||
|
||||
LDR R2, [R4], #4
|
||||
LDR R0, [R8], #4
|
||||
|
||||
STR R3, [R11, #0x108]
|
||||
STR R14, [R11, #0x104]
|
||||
|
||||
SMULWT R12, R1, R2
|
||||
SMULWB R6, R0, R2
|
||||
SMULWT R14, R0, R2
|
||||
|
||||
LDR R0, [R8, #0xFC]
|
||||
|
||||
QSUB R12, R12, R6
|
||||
|
||||
SMLAWB R14, R1, R2, R14
|
||||
|
||||
LDR R1, [R9, #0x104]
|
||||
|
||||
STR R12, [R10, #4]
|
||||
STR R14, [R10], #8
|
||||
|
||||
SMULWT R6, R0, R2
|
||||
SMULWB R12, R1, R2
|
||||
SMULWT R14, R1, R2
|
||||
|
||||
LDR R1, [R8], #4
|
||||
|
||||
QSUB R12, R12, R6
|
||||
|
||||
SMLAWB R14, R0, R2, R14
|
||||
|
||||
LDR R2, [R4], #4
|
||||
LDR R0, [R9], #-4
|
||||
STR R12, [R10, #0xF8]
|
||||
STR R14, [R10, #0xFC]
|
||||
|
||||
SMULWT R3, R1, R2
|
||||
SMULWB R6, R0, R2
|
||||
SMULWT R12, R0, R2
|
||||
|
||||
LDR R0, [R9, #0x104]
|
||||
|
||||
QSUB R3, R3, R6
|
||||
SMLAWB R12, R1, R2, R12
|
||||
|
||||
LDR R1, [R8, #0xFC]
|
||||
STR R3, [R11], #-4
|
||||
STR R12, [R11], #-4
|
||||
|
||||
SMULWT R6, R0, R2
|
||||
SMULWB R3, R1, R2
|
||||
SMULWT R12, R1, R2
|
||||
|
||||
|
||||
LDRGT R1, [R9], #-4
|
||||
|
||||
QSUB R3, R3, R6
|
||||
SMLAWB R12, R0, R2, R12
|
||||
|
||||
|
||||
LDRGT R2, [R4], #4
|
||||
LDRGT R0, [R8], #4
|
||||
|
||||
STR R3, [R11, #0x104]
|
||||
STR R12, [R11, #0x108]
|
||||
|
||||
|
||||
BGT LOOP1
|
||||
LDR R1, [SP, #4]
|
||||
LDR R5, [R1]
|
||||
LDR R4, [SP, #8]
|
||||
LDR R0, [SP, #8]
|
||||
ADD R1, SP, #16
|
||||
|
||||
|
||||
AND R2, R1, #7
|
||||
CMP R2, #0
|
||||
ADDNE R1, R1, #4
|
||||
|
||||
|
||||
CMP R5, #64
|
||||
LDR R5, [SP, #12]
|
||||
MOV R2, #1
|
||||
|
||||
BNE THIRTY2BAND
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
MOV R2, R1
|
||||
MOV R1, #32
|
||||
LDR R3, [SP]
|
||||
STR R5, [SP, #-4]!
|
||||
STR R5, [SP, #-4]!
|
||||
STR R5, [SP, #-4]!
|
||||
STR R5, [SP, #-4]!
|
||||
|
||||
BL ixheaacd_sbr_imdct_using_fft
|
||||
ADD SP, SP, #16
|
||||
|
||||
MOV R0, R4
|
||||
MOV R1, #32
|
||||
ADD R2, SP, #16
|
||||
|
||||
|
||||
AND R6, R2, #7
|
||||
CMP R6, #0
|
||||
ADDNE R2, R2, #4
|
||||
|
||||
|
||||
LDR R3, [SP]
|
||||
ADD R2, R2, #256
|
||||
ADD R3, R3, #256
|
||||
|
||||
STR R5, [SP, #-4]!
|
||||
STR R5, [SP, #-4]!
|
||||
STR R5, [SP, #-4]!
|
||||
STR R5, [SP, #-4]!
|
||||
|
||||
BL ixheaacd_sbr_imdct_using_fft
|
||||
|
||||
ADD SP, SP, #16
|
||||
|
||||
LDR R8, [SP]
|
||||
LDR R12, [SP, #4]
|
||||
MOV R3, #32
|
||||
LDR R6, [R8]
|
||||
LDR R11, [R8, #4]
|
||||
|
||||
ADD R9, R8, #252
|
||||
|
||||
|
||||
B LOOP2_PRO
|
||||
|
||||
THIRTY2BAND:
|
||||
|
||||
|
||||
|
||||
MOV R2, R1
|
||||
MOV R1, #16
|
||||
LDR R3, [SP]
|
||||
|
||||
STR R5, [SP, #-4]!
|
||||
STR R5, [SP, #-4]!
|
||||
STR R5, [SP, #-4]!
|
||||
STR R5, [SP, #-4]!
|
||||
|
||||
BL ixheaacd_sbr_imdct_using_fft
|
||||
ADD SP, SP, #16
|
||||
|
||||
MOV R0, R4
|
||||
MOV R1, #16
|
||||
ADD R2, SP, #16
|
||||
|
||||
|
||||
AND R6, R2, #7
|
||||
CMP R6, #0
|
||||
ADDNE R2, R2, #4
|
||||
|
||||
|
||||
LDR R3, [SP]
|
||||
ADD R2, R2, #256
|
||||
ADD R3, R3, #256
|
||||
|
||||
STR R5, [SP, #-4]!
|
||||
STR R5, [SP, #-4]!
|
||||
STR R5, [SP, #-4]!
|
||||
STR R5, [SP, #-4]!
|
||||
|
||||
BL ixheaacd_sbr_imdct_using_fft
|
||||
|
||||
ADD SP, SP, #16
|
||||
LDR R8, [SP]
|
||||
LDR R12, [SP, #4]
|
||||
LDR R6, [R8]
|
||||
LDR R11, [R8, #4]
|
||||
ADD R9, R8, #124
|
||||
|
||||
|
||||
|
||||
|
||||
LOOP2_PRO:
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
LDR R4, [R12, #20]
|
||||
MOV R6, R6, ASR #1
|
||||
STR R6, [R8], #4
|
||||
LDR R0, [R9]
|
||||
LDR R2, [R4], #4
|
||||
MOV R11, R11, ASR #1
|
||||
LDR R1, [R9, #-4]
|
||||
RSB R12, R11, #0
|
||||
STR R12, [R9], #-4
|
||||
|
||||
SMULWT R14, R1, R2
|
||||
SMULWB R6, R0, R2
|
||||
SMULWT R12, R0, R2
|
||||
|
||||
LDR R0, [R9, #260]
|
||||
QSUB R14, R14, R6
|
||||
SMLAWB R12, R1, R2, R12
|
||||
|
||||
LDR R6, [R8, #252]
|
||||
LDR R11, [R8, #256]
|
||||
STR R14, [R8], #4
|
||||
STR R12, [R9], #-4
|
||||
|
||||
MOV R6, R6, ASR #1
|
||||
MOV R11, R11, ASR #1
|
||||
LDR R1, [R9, #260]
|
||||
|
||||
RSB R6, R6, #0
|
||||
STR R6, [R9, #264]
|
||||
STR R11, [R8, #248]
|
||||
|
||||
SMULWT R12, R0, R2
|
||||
SMULWT R14, R1, R2
|
||||
SMULWB R6, R0, R2
|
||||
SMLAWB R12, R1, R2, R12
|
||||
|
||||
MOV R11, #0
|
||||
QSUB R14, R6, R14
|
||||
QSUB R12, R11, R12
|
||||
LDR R0, [R8, #4]
|
||||
LDR R1, [R8]
|
||||
STR R12, [R8, #252]
|
||||
STR R14, [R9, #260]
|
||||
|
||||
LDR R5, [SP, #4]
|
||||
LDR R5, [R5]
|
||||
MOV R5, R5, ASR #2
|
||||
SUB R5, R5, #2
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
LOOP2:
|
||||
SMULWB R12, R0, R2
|
||||
SMULWB R14, R1, R2
|
||||
SMULWT R6, R0, R2
|
||||
SMLAWT R12, R1, R2, R12
|
||||
|
||||
LDR R10, [R9]
|
||||
QSUB R14, R14, R6
|
||||
LDR R0, [R8, #260]
|
||||
LDR R1, [R8, #256]
|
||||
STR R12, [R8], #4
|
||||
STR R14, [R9], #-4
|
||||
|
||||
SMULWB R3, R0, R2
|
||||
SMULWT R6, R0, R2
|
||||
SMULWB R14, R1, R2
|
||||
SMLAWT R3, R1, R2, R3
|
||||
|
||||
LDR R7, [R9, #260]
|
||||
QSUB R6, R6, R14
|
||||
QSUB R3, R11, R3
|
||||
LDR R2, [R4], #4
|
||||
LDR R1, [R9]
|
||||
|
||||
STR R3, [R9, #260]
|
||||
STR R6, [R8, #252]
|
||||
|
||||
SMULWT R12, R10, R2
|
||||
SMULWT R14, R1, R2
|
||||
SMULWB R6, R10, R2
|
||||
SMLAWB R12, R1, R2, R12
|
||||
|
||||
LDR R1, [R9, #256]
|
||||
QSUB R14, R14, R6
|
||||
|
||||
STR R12, [R9], #-4
|
||||
STR R14, [R8], #4
|
||||
|
||||
SUBS R5, R5, #1
|
||||
|
||||
SMULWT R12, R7, R2
|
||||
SMULWT R14, R1, R2
|
||||
SMULWB R6, R7, R2
|
||||
SMLAWB R12, R1, R2, R12
|
||||
|
||||
LDRGE R0, [R8, #4]
|
||||
LDRGE R1, [R8]
|
||||
|
||||
QSUB R12, R11, R12
|
||||
QSUB R14, R6, R14
|
||||
|
||||
STR R12, [R8, #252]
|
||||
STR R14, [R9, #260]
|
||||
|
||||
BGE LOOP2
|
||||
ENDLOOP2:
|
||||
|
||||
ADD SP, SP, #532
|
||||
LDMFD sp!, {r4-r12, r15}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
506
decoder/armv7/ixheaacd_dct3_32.s
Normal file
506
decoder/armv7/ixheaacd_dct3_32.s
Normal file
|
|
@ -0,0 +1,506 @@
|
|||
@/******************************************************************************
|
||||
@ *
|
||||
@ * Copyright (C) 2018 The Android Open Source Project
|
||||
@ *
|
||||
@ * Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ * you may not use this file except in compliance with the License.
|
||||
@ * You may obtain a copy of the License at:
|
||||
@ *
|
||||
@ * http://www.apache.org/licenses/LICENSE-2.0
|
||||
@ *
|
||||
@ * Unless required by applicable law or agreed to in writing, software
|
||||
@ * distributed under the License is distributed on an "AS IS" BASIS,
|
||||
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
@ * See the License for the specific language governing permissions and
|
||||
@ * limitations under the License.
|
||||
@ *
|
||||
@ *****************************************************************************
|
||||
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
@*/
|
||||
|
||||
|
||||
.text
|
||||
.p2align 2
|
||||
.global ixheaacd_dct3_32
|
||||
.extern ixheaacd_radix4bfly
|
||||
.hidden ixheaacd_radix4bfly
|
||||
.extern ixheaacd_postradixcompute4
|
||||
.hidden ixheaacd_postradixcompute4
|
||||
|
||||
ixheaacd_dct3_32:
|
||||
|
||||
|
||||
STMFD sp!, {R4-R12, R14}
|
||||
VPUSH {D8 - D15}
|
||||
ADD R6, R0, #196
|
||||
SUB R7, R6, #8
|
||||
ADD R10, R7, #4
|
||||
MOV R9, #0
|
||||
VDUP.32 D0, R9
|
||||
ADD R4, R2, #8
|
||||
MOV R8, R1
|
||||
|
||||
VLD1.32 D0[0], [R10]
|
||||
MOV R11, #-4
|
||||
|
||||
VSHR.S32 D0, D0, #7
|
||||
VLD4.16 {D12, D13, D14, D15}, [R4]!
|
||||
|
||||
MOV R12, #-16
|
||||
VST1.32 D0, [R8]!
|
||||
SUB R7, R7, #12
|
||||
|
||||
VLD1.32 {Q0}, [R6]!
|
||||
VLD1.32 {Q1}, [R7], R12
|
||||
SUB R9, R6, #144
|
||||
|
||||
VREV64.32 Q1, Q1
|
||||
SUB R5, R7, #112
|
||||
|
||||
VSWP D2, D3
|
||||
VSHR.S32 Q0, Q0, #7
|
||||
|
||||
VSHR.S32 Q1, Q1, #7
|
||||
VLD1.32 {Q3}, [R9]!
|
||||
VADD.I32 Q2, Q1, Q0
|
||||
|
||||
VUZP.16 D4, D5
|
||||
VSHR.S32 Q3, Q3, #7
|
||||
|
||||
|
||||
VLD1.32 {Q4}, [R5], R12
|
||||
VMULL.U16 Q15, D4, D12
|
||||
|
||||
VREV64.32 Q4, Q4
|
||||
VMULL.U16 Q14, D4, D13
|
||||
|
||||
VSWP D8, D9
|
||||
|
||||
VSHR.S32 Q4, Q4, #7
|
||||
VLD1.32 {Q0}, [R6]!
|
||||
VSUB.I32 Q5, Q3, Q4
|
||||
|
||||
VUZP.16 D10, D11
|
||||
|
||||
VMLAL.U16 Q15, D10, D13
|
||||
VLD1.32 {Q1}, [R7], R12
|
||||
VMLSL.U16 Q14, D10, D12
|
||||
|
||||
VREV64.32 Q1, Q1
|
||||
VSHR.S32 Q0, Q0, #7
|
||||
|
||||
VSWP D2, D3
|
||||
|
||||
VSHR.U32 Q15, Q15, #16
|
||||
VSHR.S32 Q1, Q1, #7
|
||||
|
||||
VMLAL.S16 Q15, D5, D12
|
||||
VMLAL.S16 Q15, D11, D13
|
||||
|
||||
|
||||
VSHR.S32 Q14, Q14, #16
|
||||
VMLAL.S16 Q14, D5, D13
|
||||
VADD.I32 Q2, Q1, Q0
|
||||
VMLSL.S16 Q14, D11, D12
|
||||
|
||||
VUZP.16 D4, D5
|
||||
SUB R9, R6, #144
|
||||
|
||||
VSWP Q15, Q14
|
||||
SUB R5, R7, #112
|
||||
|
||||
VST2.32 {Q14, Q15}, [R8]!
|
||||
|
||||
VLD1.32 {Q3}, [R9]!
|
||||
|
||||
VLD1.32 {Q4}, [R5], R12
|
||||
VSHR.S32 Q3, Q3, #7
|
||||
|
||||
VREV64.32 Q4, Q4
|
||||
|
||||
VSWP D8, D9
|
||||
|
||||
VSHR.S32 Q4, Q4, #7
|
||||
VLD4.16 {D12, D13, D14, D15}, [R4]!
|
||||
VSUB.I32 Q5, Q3, Q4
|
||||
|
||||
VUZP.16 D10, D11
|
||||
VMULL.U16 Q15, D4, D12
|
||||
VMLAL.U16 Q15, D10, D13
|
||||
VMULL.U16 Q14, D4, D13
|
||||
VLD1.32 {Q0}, [R6]!
|
||||
VMLSL.U16 Q14, D10, D12
|
||||
VLD1.32 {Q1}, [R7], R11
|
||||
VSHR.U32 Q15, Q15, #16
|
||||
|
||||
VREV64.32 Q1, Q1
|
||||
VSHR.S32 Q14, Q14, #16
|
||||
|
||||
VSWP D2, D3
|
||||
VSHR.S32 Q0, Q0, #7
|
||||
|
||||
SUB R9, R6, #144
|
||||
|
||||
SUB R5, R7, #124
|
||||
VLD1.32 {Q3}, [R9]!
|
||||
VMLAL.S16 Q14, D5, D13
|
||||
|
||||
VMLSL.S16 Q14, D11, D12
|
||||
VLD1.32 {Q4}, [R5], R11
|
||||
VMLAL.S16 Q15, D5, D12
|
||||
|
||||
VREV64.32 Q4, Q4
|
||||
VMLAL.S16 Q15, D11, D13
|
||||
|
||||
VSWP D8, D9
|
||||
VSHR.S32 Q1, Q1, #7
|
||||
|
||||
VADD.I32 Q2, Q1, Q0
|
||||
VLD4.16 {D12, D13, D14, D15}, [R4]!
|
||||
VSHR.S32 Q3, Q3, #7
|
||||
|
||||
VUZP.16 D4, D5
|
||||
VSHR.S32 Q4, Q4, #7
|
||||
|
||||
VSWP Q15, Q14
|
||||
VSUB.I32 Q5, Q3, Q4
|
||||
|
||||
VST2.32 {Q14, Q15}, [R8]!
|
||||
|
||||
VUZP.16 D10, D11
|
||||
VMULL.U16 Q15, D4, D12
|
||||
|
||||
VMLAL.U16 Q15, D10, D13
|
||||
VLD1.32 D0, [R6]!
|
||||
VMULL.U16 Q14, D4, D13
|
||||
|
||||
VMLSL.U16 Q14, D10, D12
|
||||
VLD1.32 D1[0], [R6]!
|
||||
|
||||
VSHR.U32 Q15, Q15, #16
|
||||
VLD1.32 D2[0], [R7], R11
|
||||
|
||||
VMLAL.S16 Q15, D5, D12
|
||||
VLD1.32 D2[1], [R7], R11
|
||||
VMLAL.S16 Q15, D11, D13
|
||||
|
||||
SUB R9, R6, #140
|
||||
VLD1.32 D3[0], [R7], R11
|
||||
|
||||
SUB R5, R7, #116
|
||||
VLD1.32 D6, [R9]!
|
||||
VSHR.S32 Q14, Q14, #16
|
||||
|
||||
VSHR.S32 Q0, Q0, #7
|
||||
VLD1.32 D7[0], [R9]!
|
||||
VMLAL.S16 Q14, D5, D13
|
||||
|
||||
VLD1.32 D8[0], [R5], R11
|
||||
VMLSL.S16 Q14, D11, D12
|
||||
|
||||
VSHR.S32 Q1, Q1, #7
|
||||
VLD4.16 {D12, D13, D14, D15}, [R4]
|
||||
VADD.I32 Q2, Q1, Q0
|
||||
|
||||
VLD1.32 D8[1], [R5], R11
|
||||
VSHR.S32 Q3, Q3, #7
|
||||
|
||||
VSWP Q15, Q14
|
||||
|
||||
VLD1.32 D9[0], [R5], R11
|
||||
|
||||
VSHR.S32 Q4, Q4, #7
|
||||
VST2.32 {Q14, Q15}, [R8]!
|
||||
ADD R4, #24
|
||||
|
||||
VUZP.16 D4, D5
|
||||
VSUB.I32 Q5, Q3, Q4
|
||||
|
||||
VUZP.16 D10, D11
|
||||
VMULL.U16 Q15, D4, D12
|
||||
|
||||
VMLAL.U16 Q15, D10, D13
|
||||
VMULL.U16 Q14, D4, D13
|
||||
|
||||
VMLSL.U16 Q14, D10, D12
|
||||
VLD1.16 D0[0], [R4]!
|
||||
VSHR.U32 Q15, Q15, #16
|
||||
|
||||
VSHR.S32 Q14, Q14, #16
|
||||
VLD1.32 D2[0], [R7], R11
|
||||
VMLAL.S16 Q15, D5, D12
|
||||
|
||||
SUB R5, R7, #124
|
||||
VMLAL.S16 Q15, D11, D13
|
||||
VLD1.32 D4[0], [R5]
|
||||
VMLAL.S16 Q14, D5, D13
|
||||
|
||||
VMLSL.S16 Q14, D11, D12
|
||||
|
||||
VSHR.S32 D2, D2, #7
|
||||
VST1.32 D30[0], [R8]!
|
||||
VSHR.S32 D4, D4, #7
|
||||
VSUB.I32 D2, D2, D4
|
||||
|
||||
VMOV D4, D2
|
||||
VST1.32 D28[0], [R8]!
|
||||
MOV R6, R1
|
||||
|
||||
ADD R7, R1, #124
|
||||
VST1.32 D30[1], [R8]!
|
||||
ADD R10, R3, #16
|
||||
|
||||
SUB R7, R7, #28
|
||||
VST1.32 D28[1], [R8]!
|
||||
MOV R5, #-16
|
||||
|
||||
MOV R9, #-4
|
||||
VST1.32 D31[0], [R8]!
|
||||
MOV R11, #16
|
||||
|
||||
VST1.32 D29[0], [R8]!
|
||||
MOV R12, #4
|
||||
|
||||
VUZP.16 D4, D5
|
||||
MOV R8, #6
|
||||
|
||||
VLD1.16 D1[0], [R4], R8
|
||||
VMULL.U16 Q15, D4, D0
|
||||
|
||||
|
||||
VUZP.16 D2, D3
|
||||
VMULL.U16 Q14, D4, D1
|
||||
|
||||
VMLAL.U16 Q15, D2, D1
|
||||
VLD2.32 {D10, D11}, [R6]
|
||||
VMLSL.U16 Q14, D2, D0
|
||||
|
||||
ADD R4, R3, #4
|
||||
MOV R8, #-32
|
||||
VSHR.U32 Q15, Q15, #16
|
||||
VSHR.S32 Q14, Q14, #16
|
||||
VMLAL.S16 Q15, D5, D0
|
||||
VMLAL.S16 Q15, D3, D1
|
||||
VMLAL.S16 Q14, D5, D1
|
||||
VMLSL.S16 Q14, D3, D0
|
||||
|
||||
VADD.I32 D14, D11, D28
|
||||
VLD2.32 {Q2, Q3}, [R7]
|
||||
VNEG.S32 D14, D14
|
||||
|
||||
VREV64.32 Q2, Q2
|
||||
VSUB.I32 D12, D10, D30
|
||||
|
||||
VREV64.32 Q3, Q3
|
||||
VADD.I32 D10, D10, D30
|
||||
|
||||
VSWP D4, D5
|
||||
VADD.I32 D10, D10, D14
|
||||
|
||||
VSWP D6, D7
|
||||
VSUB.I32 D11, D11, D28
|
||||
|
||||
VADD.I32 D11, D11, D12
|
||||
VLD2.16 {D8, D9}, [R10], R5
|
||||
VSHR.S32 D10, D10, #1
|
||||
|
||||
VREV64.16 D8, D8
|
||||
VSHR.S32 D11, D11, #1
|
||||
|
||||
VUZP.32 D10, D11
|
||||
|
||||
VST1.32 D10, [R6]!
|
||||
VLD2.32 {Q0, Q1}, [R6]
|
||||
|
||||
VADD.I32 Q7, Q0, Q2
|
||||
VLD2.16 {D10, D11}, [R4], R11
|
||||
VSUB.I32 Q6, Q0, Q2
|
||||
|
||||
VUZP.16 D12, D13
|
||||
VADD.I32 Q8, Q1, Q3
|
||||
|
||||
VUZP.16 D16, D17
|
||||
VSUB.I32 Q9, Q1, Q3
|
||||
|
||||
|
||||
VMULL.U16 Q15, D12, D8
|
||||
VMLAL.U16 Q15, D16, D10
|
||||
VMULL.U16 Q14, D12, D10
|
||||
VMLSL.U16 Q14, D16, D8
|
||||
VSHR.S32 Q7, Q7, #1
|
||||
VSHR.U32 Q15, Q15, #16
|
||||
VSHR.S32 Q9, Q9, #1
|
||||
VSHR.S32 Q14, Q14, #16
|
||||
VMLAL.S16 Q15, D13, D8
|
||||
VMLAL.S16 Q15, D17, D10
|
||||
VMLAL.S16 Q14, D13, D10
|
||||
VMLSL.S16 Q14, D17, D8
|
||||
|
||||
VSUB.I32 Q10, Q7, Q15
|
||||
VLD2.16 {D8, D9}, [R10]
|
||||
VADD.I32 Q13, Q7, Q15
|
||||
|
||||
VREV64.32 Q13, Q13
|
||||
VSWP D26, D27
|
||||
|
||||
VADD.I32 Q11, Q9, Q14
|
||||
VREV64.16 D8, D8
|
||||
|
||||
VSUB.I32 Q12, Q14, Q9
|
||||
|
||||
VREV64.32 Q12, Q12
|
||||
|
||||
VST2.32 {Q10, Q11}, [R6]!
|
||||
VSWP D24, D25
|
||||
VSWP Q12, Q13
|
||||
VST2.32 {Q12, Q13}, [R7], R8
|
||||
|
||||
VLD2.32 {Q0, Q1}, [R6]
|
||||
VLD2.32 {Q2, Q3}, [R7]
|
||||
|
||||
VREV64.32 Q2, Q2
|
||||
VREV64.32 Q3, Q3
|
||||
|
||||
VSWP D4, D5
|
||||
VSWP D6, D7
|
||||
|
||||
VSUB.I32 Q6, Q0, Q2
|
||||
VADD.I32 Q7, Q0, Q2
|
||||
VLD2.16 {D10, D11}, [R4], R11
|
||||
VADD.I32 Q8, Q1, Q3
|
||||
|
||||
VUZP.16 D12, D13
|
||||
VSUB.I32 Q9, Q1, Q3
|
||||
|
||||
|
||||
VUZP.16 D16, D17
|
||||
VMULL.U16 Q15, D12, D8
|
||||
|
||||
VMLAL.U16 Q15, D16, D10
|
||||
VMULL.U16 Q14, D12, D10
|
||||
VMLSL.U16 Q14, D16, D8
|
||||
ADD R7, R7, #8
|
||||
VSHR.U32 Q15, Q15, #16
|
||||
VSHR.S32 Q7, Q7, #1
|
||||
VSHR.S32 Q14, Q14, #16
|
||||
VMLAL.S16 Q15, D13, D8
|
||||
VMLAL.S16 Q15, D17, D10
|
||||
VMLAL.S16 Q14, D13, D10
|
||||
VMLSL.S16 Q14, D17, D8
|
||||
|
||||
VSHR.S32 Q9, Q9, #1
|
||||
VSUB.I32 Q10, Q7, Q15
|
||||
VSUB.I32 Q12, Q14, Q9
|
||||
|
||||
VADD.I32 Q11, Q9, Q14
|
||||
VST1.32 D20[0], [R6]!
|
||||
VADD.I32 Q13, Q7, Q15
|
||||
|
||||
|
||||
VST1.32 D22[0], [R6]!
|
||||
VST1.32 D20[1], [R6]!
|
||||
VST1.32 D22[1], [R6]!
|
||||
VST1.32 D21[0], [R6]!
|
||||
VST1.32 D23[0], [R6]!
|
||||
|
||||
VREV64.32 Q12, Q12
|
||||
|
||||
VREV64.32 Q13, Q13
|
||||
VSWP D24, D25
|
||||
VSWP D26, D27
|
||||
|
||||
|
||||
VST1.32 D26[1], [R7]!
|
||||
VST1.32 D24[1], [R7]!
|
||||
VST1.32 D27[0], [R7]!
|
||||
VST1.32 D25[0], [R7]!
|
||||
VST1.32 D27[1], [R7]!
|
||||
VST1.32 D25[1], [R7]!
|
||||
|
||||
SUB R7, R7, #32
|
||||
VLD2.32 {D0, D1}, [R6]
|
||||
VLD2.32 {D2, D3}, [R7]
|
||||
|
||||
VSUB.I32 D12, D0, D2
|
||||
VLD1.16 D8, [R10], R9
|
||||
VADD.I32 D14, D0, D2
|
||||
|
||||
VADD.I32 D16, D1, D3
|
||||
VLD1.16 D10, [R4], R12
|
||||
VSUB.I32 D18, D1, D3
|
||||
|
||||
VUZP.16 D12, D13
|
||||
MOV R4, R0
|
||||
|
||||
VUZP.16 D16, D17
|
||||
VMULL.U16 Q15, D12, D8
|
||||
VMLAL.U16 Q15, D16, D10
|
||||
VMULL.U16 Q14, D12, D10
|
||||
VMLSL.U16 Q14, D16, D8
|
||||
VSHR.S32 D18, D18, #1
|
||||
VSHR.U32 Q15, Q15, #16
|
||||
VSHR.S32 Q14, Q14, #16
|
||||
|
||||
VMLAL.S16 Q15, D13, D8
|
||||
VMLAL.S16 Q15, D17, D10
|
||||
|
||||
MOV R10, R1
|
||||
|
||||
VMLAL.S16 Q14, D13, D10
|
||||
VMLSL.S16 Q14, D17, D8
|
||||
VNEG.S32 Q15, Q15
|
||||
VSHR.S32 D14, D14, #1
|
||||
|
||||
VADD.I32 Q13, Q7, Q15
|
||||
|
||||
VADD.I32 Q11, Q9, Q14
|
||||
|
||||
LDR r0 , [sp , #104]
|
||||
VST1.32 D26[0], [R6]!
|
||||
MOV r2, #1
|
||||
|
||||
VST1.32 D22[0], [R6]!
|
||||
MOV r3, #4
|
||||
BL ixheaacd_radix4bfly
|
||||
|
||||
MOV r0, r4
|
||||
MOV r1, r10
|
||||
LDR r2 , [sp , #108]
|
||||
MOV r3, #16
|
||||
BL ixheaacd_postradixcompute4
|
||||
|
||||
MOV r0, r4
|
||||
MOV r1, r10
|
||||
LDMIA r0!, {r4, r5}
|
||||
STR r4, [r1], #4
|
||||
STR r5, [r1, #4]
|
||||
ADD r2, r0, #64
|
||||
ADD r3, r1, #116
|
||||
MOV r6, #7
|
||||
|
||||
BACK3:
|
||||
|
||||
LDMIA r0!, {r4, r5}
|
||||
STR r5, [r1], #8
|
||||
STR r4, [r1], #8
|
||||
|
||||
LDMIA r2!, {r4, r5}
|
||||
STR r5, [r3], #-8
|
||||
STR r4, [r3], #-8
|
||||
|
||||
SUBS r6, r6, #1
|
||||
BNE BACK3
|
||||
|
||||
LDMIA r0!, {r4, r5}
|
||||
STR r5, [r1], #8
|
||||
STR r4, [r1], #8
|
||||
|
||||
VPOP {D8 - D15}
|
||||
LDMFD sp!, {r4-r12, r15}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
522
decoder/armv7/ixheaacd_dec_DCT2_64_asm.s
Normal file
522
decoder/armv7/ixheaacd_dec_DCT2_64_asm.s
Normal file
|
|
@ -0,0 +1,522 @@
|
|||
@/******************************************************************************
|
||||
@ *
|
||||
@ * Copyright (C) 2018 The Android Open Source Project
|
||||
@ *
|
||||
@ * Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ * you may not use this file except in compliance with the License.
|
||||
@ * You may obtain a copy of the License at:
|
||||
@ *
|
||||
@ * http://www.apache.org/licenses/LICENSE-2.0
|
||||
@ *
|
||||
@ * Unless required by applicable law or agreed to in writing, software
|
||||
@ * distributed under the License is distributed on an "AS IS" BASIS,
|
||||
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
@ * See the License for the specific language governing permissions and
|
||||
@ * limitations under the License.
|
||||
@ *
|
||||
@ *****************************************************************************
|
||||
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
@*/
|
||||
|
||||
|
||||
.text
|
||||
.p2align 2
|
||||
.extern ixheaacd_radix4bfly
|
||||
.hidden ixheaacd_radix4bfly
|
||||
.extern ixheaacd_postradixcompute2
|
||||
.hidden ixheaacd_postradixcompute2
|
||||
.extern ixheaacd_sbr_imdct_using_fft
|
||||
.hidden ixheaacd_sbr_imdct_using_fft
|
||||
|
||||
.global ixheaacd_dec_DCT2_64_asm
|
||||
|
||||
ixheaacd_dec_DCT2_64_asm:
|
||||
|
||||
STMFD sp!, {r0-r3, r4-r12, r14}
|
||||
ADD R2, R1, #252
|
||||
MOV R3, #32
|
||||
MOV R4, #-4
|
||||
|
||||
ADD R2, R2, #4
|
||||
|
||||
|
||||
FOR_LOOP:
|
||||
|
||||
VLD2.32 {Q0, Q1}, [R0]!
|
||||
SUBS R3, R3, #4
|
||||
|
||||
VST1.32 {Q0}, [R1]!
|
||||
SUB R2, R2, #16
|
||||
|
||||
VREV64.32 Q1, Q1
|
||||
VSWP D2, D3
|
||||
VST1.32 {Q1}, [R2]
|
||||
BGT FOR_LOOP
|
||||
|
||||
LDR r0, [sp, #8]
|
||||
MOV r1, #32
|
||||
LDR r2, [sp, #4]
|
||||
LDR r3, [sp]
|
||||
|
||||
LDR r4, [sp, #12]
|
||||
STR r4, [sp, #-4]!
|
||||
STR r4, [sp, #-4]!
|
||||
STR r4, [sp, #-4]!
|
||||
STR r4, [sp, #-4]!
|
||||
|
||||
BL ixheaacd_sbr_imdct_using_fft
|
||||
|
||||
ADD sp, sp, #16
|
||||
|
||||
LDR r0, [sp]
|
||||
|
||||
|
||||
LDR r2, [sp, #56]
|
||||
|
||||
VPUSH {D8 - D15}
|
||||
ADD R5, R0, #252
|
||||
VLD1.32 D0, [R0]
|
||||
ADD R3, R2, #2
|
||||
VSHL.S32 D0, D0, #1
|
||||
VST1.32 D0, [R0]!
|
||||
SUB R5, R5, #28
|
||||
|
||||
|
||||
VLD2.32 {Q0, Q1}, [R0]!
|
||||
|
||||
VLD2.32 {Q2, Q3}, [R5]!
|
||||
VREV64.32 Q2, Q2
|
||||
VSWP D4, D5
|
||||
MOV R10, #-8
|
||||
VREV64.32 Q3, Q3
|
||||
ADD R4, R2, #30
|
||||
VSWP D6, D7
|
||||
SUB R4, R4, #6
|
||||
VLD1.16 D8, [R3]!
|
||||
VSUB.I32 Q11, Q3, Q1
|
||||
VLD1.16 D10, [R4], R10
|
||||
VADD.I32 Q10, Q3, Q1
|
||||
VREV64.16 D10, D10
|
||||
VSUB.I32 Q9, Q0, Q2
|
||||
VUZP.16 D20, D21
|
||||
VADD.I32 Q8, Q0, Q2
|
||||
VUZP.16 D18, D19
|
||||
VMULL.U16 Q15, D20, D8
|
||||
VMLSL.U16 Q15, D18, D10
|
||||
VMULL.U16 Q14, D18, D8
|
||||
VMLAL.U16 Q14, D20, D10
|
||||
SUB R11, R0, #32
|
||||
VSHR.S32 Q15, Q15, #16
|
||||
VSHR.U32 Q14, Q14, #16
|
||||
SUB R12, R5, #32
|
||||
VMLAL.S16 Q15, D21, D8
|
||||
VMLSL.S16 Q15, D19, D10
|
||||
|
||||
VLD2.32 {Q0, Q1}, [R0]!
|
||||
SUB R5, R5, #64
|
||||
VMLAL.S16 Q14, D19, D8
|
||||
VLD2.32 {Q2, Q3}, [R5]!
|
||||
VMLAL.S16 Q14, D21, D10
|
||||
VREV64.32 Q2, Q2
|
||||
VSHL.S32 Q15, Q15, #1
|
||||
VSWP D4, D5
|
||||
VSHL.S32 Q14, Q14, #1
|
||||
|
||||
VREV64.32 Q3, Q3
|
||||
VADD.I32 Q13, Q8, Q15
|
||||
VSWP D6, D7
|
||||
VADD.I32 Q12, Q11, Q14
|
||||
|
||||
|
||||
VLD1.16 D8, [R3]!
|
||||
VSUB.I32 Q7, Q14, Q11
|
||||
VLD1.16 D10, [R4], R10
|
||||
|
||||
VSUB.I32 Q6, Q8, Q15
|
||||
VREV64.32 Q7, Q7
|
||||
VREV64.32 Q6, Q6
|
||||
VSWP D14, D15
|
||||
VSWP D12, D13
|
||||
VREV64.16 D10, D10
|
||||
|
||||
|
||||
VSUB.I32 Q11, Q3, Q1
|
||||
VSWP Q13, Q12
|
||||
VADD.I32 Q10, Q3, Q1
|
||||
VST2.32 {Q12, Q13}, [R11]!
|
||||
VSUB.I32 Q9, Q0, Q2
|
||||
|
||||
VADD.I32 Q8, Q0, Q2
|
||||
VST2.32 {Q6, Q7}, [R12]
|
||||
SUB R11, R0, #32
|
||||
|
||||
VUZP.16 D20, D21
|
||||
SUB R12, R5, #32
|
||||
|
||||
VUZP.16 D18, D19
|
||||
SUB R5, R5, #64
|
||||
|
||||
VMULL.U16 Q15, D20, D8
|
||||
VLD2.32 {Q0, Q1}, [R0]!
|
||||
VMLSL.U16 Q15, D18, D10
|
||||
VLD2.32 {Q2, Q3}, [R5]!
|
||||
VMULL.U16 Q14, D18, D8
|
||||
VREV64.32 Q2, Q2
|
||||
VMLAL.U16 Q14, D20, D10
|
||||
VSWP D4, D5
|
||||
VSHR.S32 Q15, Q15, #16
|
||||
VREV64.32 Q3, Q3
|
||||
VMLAL.S16 Q15, D21, D8
|
||||
VMLSL.S16 Q15, D19, D10
|
||||
VSWP D6, D7
|
||||
VSHR.U32 Q14, Q14, #16
|
||||
VMLAL.S16 Q14, D19, D8
|
||||
VLD1.16 D8, [R3]!
|
||||
VMLAL.S16 Q14, D21, D10
|
||||
VSHL.S32 Q15, Q15, #1
|
||||
VLD1.16 D10, [R4], R10
|
||||
|
||||
VSHL.S32 Q14, Q14, #1
|
||||
VREV64.16 D10, D10
|
||||
VADD.I32 Q13, Q8, Q15
|
||||
VADD.I32 Q12, Q11, Q14
|
||||
|
||||
|
||||
VSUB.I32 Q7, Q14, Q11
|
||||
VSUB.I32 Q6, Q8, Q15
|
||||
VREV64.32 Q7, Q7
|
||||
VSUB.I32 Q11, Q3, Q1
|
||||
VREV64.32 Q6, Q6
|
||||
VADD.I32 Q10, Q3, Q1
|
||||
VSWP D14, D15
|
||||
VSUB.I32 Q9, Q0, Q2
|
||||
VSWP D12, D13
|
||||
VADD.I32 Q8, Q0, Q2
|
||||
VSWP Q13, Q12
|
||||
|
||||
|
||||
VUZP.16 D20, D21
|
||||
VUZP.16 D18, D19
|
||||
VMULL.U16 Q15, D20, D8
|
||||
VMLSL.U16 Q15, D18, D10
|
||||
VST2.32 {Q12, Q13}, [R11]!
|
||||
VMULL.U16 Q14, D18, D8
|
||||
VMLAL.U16 Q14, D20, D10
|
||||
VST2.32 {Q6, Q7}, [R12]
|
||||
|
||||
SUB R11, R0, #32
|
||||
VLD2.32 {Q0, Q1}, [R0]!
|
||||
SUB R12, R5, #32
|
||||
SUB R5, R5, #64
|
||||
VSHR.S32 Q15, Q15, #16
|
||||
VLD2.32 {Q2, Q3}, [R5]!
|
||||
VMLAL.S16 Q15, D21, D8
|
||||
VREV64.32 Q2, Q2
|
||||
VMLSL.S16 Q15, D19, D10
|
||||
VSWP D4, D5
|
||||
VSHR.U32 Q14, Q14, #16
|
||||
|
||||
VREV64.32 Q3, Q3
|
||||
VMLAL.S16 Q14, D19, D8
|
||||
VSWP D6, D7
|
||||
VMLAL.S16 Q14, D21, D10
|
||||
VSHL.S32 Q15, Q15, #1
|
||||
|
||||
VLD1.16 D8, [R3]!
|
||||
VSHL.S32 Q14, Q14, #1
|
||||
|
||||
VADD.I32 Q13, Q8, Q15
|
||||
|
||||
VLD1.16 D10, [R4], R10
|
||||
|
||||
VADD.I32 Q12, Q11, Q14
|
||||
|
||||
|
||||
VREV64.16 D10, D10
|
||||
VSUB.I32 Q7, Q14, Q11
|
||||
|
||||
|
||||
VSUB.I32 Q6, Q8, Q15
|
||||
VREV64.32 Q7, Q7
|
||||
VREV64.32 Q6, Q6
|
||||
VSWP D14, D15
|
||||
VSWP D12, D13
|
||||
|
||||
VSWP Q13, Q12
|
||||
VSUB.I32 Q11, Q3, Q1
|
||||
VST2.32 {Q12, Q13}, [R11]!
|
||||
VADD.I32 Q10, Q3, Q1
|
||||
|
||||
VST2.32 {Q6, Q7}, [R12]
|
||||
|
||||
SUB R11, R0, #32
|
||||
|
||||
VSUB.I32 Q9, Q0, Q2
|
||||
VADD.I32 Q8, Q0, Q2
|
||||
VUZP.16 D20, D21
|
||||
SUB R12, R5, #32
|
||||
VUZP.16 D18, D19
|
||||
SUB R5, R5, #64
|
||||
|
||||
VMULL.U16 Q15, D20, D8
|
||||
VMLSL.U16 Q15, D18, D10
|
||||
VMULL.U16 Q14, D18, D8
|
||||
VMLAL.U16 Q14, D20, D10
|
||||
VSHR.S32 Q15, Q15, #16
|
||||
VMLAL.S16 Q15, D21, D8
|
||||
VMLSL.S16 Q15, D19, D10
|
||||
|
||||
VSHR.U32 Q14, Q14, #16
|
||||
VMLAL.S16 Q14, D19, D8
|
||||
VMLAL.S16 Q14, D21, D10
|
||||
|
||||
VSHL.S32 Q15, Q15, #1
|
||||
VSHL.S32 Q14, Q14, #1
|
||||
VADD.I32 Q13, Q8, Q15
|
||||
VADD.I32 Q12, Q11, Q14
|
||||
|
||||
VSUB.I32 Q7, Q14, Q11
|
||||
VSUB.I32 Q6, Q8, Q15
|
||||
VREV64.32 Q7, Q7
|
||||
VREV64.32 Q6, Q6
|
||||
VSWP D14, D15
|
||||
VSWP D12, D13
|
||||
|
||||
|
||||
VSWP Q13, Q12
|
||||
VST2.32 {Q12, Q13}, [R11]!
|
||||
|
||||
VST2.32 {Q6, Q7}, [R12]
|
||||
VPOP {D8 - D15}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
LDMFD sp!, {r0-r2, r3}
|
||||
LDR R1, [SP, #48]
|
||||
LDR R2, [SP, #44]
|
||||
ADD R3, R1, #126
|
||||
VLD1.32 D0[0], [R0, :32]!
|
||||
SUB R4, R1, #2
|
||||
|
||||
ADD R5, R1, #130
|
||||
VLD1.32 D1[0], [R0, :32]!
|
||||
ADD R7, R2, #4
|
||||
|
||||
MOV R6, #0x8000
|
||||
VDUP.32 Q15, R6
|
||||
|
||||
VADD.I32 D2, D0, D1
|
||||
VSHR.S32 D2, D2, #1
|
||||
VSHL.S32 D2, D2, #4
|
||||
VADD.I32 Q2, Q1, Q15
|
||||
VSHR.S32 Q2, Q2, #16
|
||||
VSUB.I32 D6, D0, D1
|
||||
VST1.16 D4[0], [R1]!
|
||||
MOV R8, #28
|
||||
|
||||
MOV R9, #-2
|
||||
VLD2.32 {Q0, Q1}, [R0]!
|
||||
SUB R4, R4, #6
|
||||
|
||||
SUB R3, R3, #6
|
||||
VLD2.16 {D4, D5}, [R7]!
|
||||
|
||||
VUZP.16 D0, D1
|
||||
|
||||
VUZP.16 D2, D3
|
||||
VMULL.U16 Q14, D0, D4
|
||||
|
||||
VMLSL.U16 Q14, D2, D5
|
||||
VSHR.S32 Q14, Q14, #16
|
||||
VMLAL.S16 Q14, D1, D4
|
||||
VMLSL.S16 Q14, D3, D5
|
||||
|
||||
|
||||
VMULL.U16 Q13, D0, D5
|
||||
VMLAL.U16 Q13, D2, D4
|
||||
VSHR.U32 Q13, Q13, #16
|
||||
VMLAL.S16 Q13, D1, D5
|
||||
VMLAL.S16 Q13, D3, D4
|
||||
|
||||
VSHL.S32 Q12, Q14, #4
|
||||
VLD2.32 {Q0, Q1}, [R0]!
|
||||
VADD.I32 Q12, Q12, Q15
|
||||
|
||||
VSHR.S32 Q12, Q12, #16
|
||||
VUZP.16 D24, D25
|
||||
|
||||
|
||||
VSHL.S32 Q11, Q13, #4
|
||||
VLD2.16 {D4, D5}, [R7]!
|
||||
VADD.I32 Q11, Q11, Q15
|
||||
|
||||
VSHR.S32 Q11, Q11, #16
|
||||
VUZP.16 D22, D23
|
||||
|
||||
|
||||
VQNEG.S16 D20, D22
|
||||
|
||||
VUZP.16 D0, D1
|
||||
VUZP.16 D2, D3
|
||||
SUB R8, R8, #8
|
||||
|
||||
LOOP_2:
|
||||
|
||||
VMULL.U16 Q14, D0, D4
|
||||
VST1.16 D24, [R1]!
|
||||
VMLSL.U16 Q14, D2, D5
|
||||
|
||||
VREV64.16 D24, D24
|
||||
VMULL.U16 Q13, D0, D5
|
||||
|
||||
VMLAL.U16 Q13, D2, D4
|
||||
VST1.16 D24, [R4]
|
||||
SUB R4, R4, #8
|
||||
|
||||
VREV64.16 D22, D22
|
||||
VSHR.S32 Q14, Q14, #16
|
||||
VSHR.U32 Q13, Q13, #16
|
||||
VST1.16 D22, [R3]
|
||||
VMLAL.S16 Q14, D1, D4
|
||||
VMLSL.S16 Q14, D3, D5
|
||||
VST1.16 D20, [R5]!
|
||||
VMLAL.S16 Q13, D1, D5
|
||||
VMLAL.S16 Q13, D3, D4
|
||||
VSHL.S32 Q12, Q14, #4
|
||||
|
||||
SUB R3, R3, #8
|
||||
VLD2.32 {Q0, Q1}, [R0]!
|
||||
VSHL.S32 Q11, Q13, #4
|
||||
|
||||
VADD.I32 Q12, Q12, Q15
|
||||
VLD2.16 {D4, D5}, [R7]!
|
||||
|
||||
VADD.I32 Q11, Q11, Q15
|
||||
VUZP.16 D0, D1
|
||||
VSHR.S32 Q12, Q12, #16
|
||||
VUZP.16 D24, D25
|
||||
|
||||
|
||||
VSHR.S32 Q11, Q11, #16
|
||||
VUZP.16 D22, D23
|
||||
SUBS R8, R8, #4
|
||||
|
||||
VUZP.16 D2, D3
|
||||
VQNEG.S16 D20, D22
|
||||
|
||||
BGT LOOP_2
|
||||
|
||||
|
||||
VMULL.U16 Q14, D0, D4
|
||||
VST1.16 D24, [R1]!
|
||||
VMLSL.U16 Q14, D2, D5
|
||||
VREV64.16 D24, D24
|
||||
VMULL.U16 Q13, D0, D5
|
||||
VMLAL.U16 Q13, D2, D4
|
||||
VST1.16 D24, [R4]
|
||||
VSHR.S32 Q14, Q14, #16
|
||||
SUB R4, R4, #8
|
||||
VST1.16 D20, [R5]!
|
||||
|
||||
VMLAL.S16 Q14, D1, D4
|
||||
VMLSL.S16 Q14, D3, D5
|
||||
VREV64.16 D22, D22
|
||||
VSHR.U32 Q13, Q13, #16
|
||||
VST1.16 D22, [R3]
|
||||
SUB R3, R3, #8
|
||||
VMLAL.S16 Q13, D1, D5
|
||||
VSHL.S32 Q12, Q14, #4
|
||||
VMLAL.S16 Q13, D3, D4
|
||||
VADD.I32 Q12, Q12, Q15
|
||||
|
||||
VSHL.S32 Q11, Q13, #4
|
||||
|
||||
VSHR.S32 Q12, Q12, #16
|
||||
VUZP.16 D24, D25
|
||||
VADD.I32 Q11, Q11, Q15
|
||||
|
||||
VST1.16 D24, [R1]!
|
||||
VSHR.S32 Q11, Q11, #16
|
||||
VREV64.16 D24, D24
|
||||
VUZP.16 D22, D23
|
||||
|
||||
VST1.16 D24, [R4]
|
||||
VQNEG.S16 D20, D22
|
||||
|
||||
SUB R4, R4, #2
|
||||
VREV64.16 D22, D22
|
||||
VST1.16 D22, [R3]
|
||||
SUB R3, R3, #2
|
||||
|
||||
VST1.16 D20, [R5]!
|
||||
VLD2.32 {Q0, Q1}, [R0]!
|
||||
|
||||
VLD2.16 {Q2}, [R7]
|
||||
ADD R7, R7, #12
|
||||
VUZP.16 D0, D1
|
||||
VUZP.16 D2, D3
|
||||
|
||||
VMULL.U16 Q14, D0, D4
|
||||
VMLSL.U16 Q14, D2, D5
|
||||
VSHR.S32 Q14, Q14, #16
|
||||
VMLAL.S16 Q14, D1, D4
|
||||
VMLSL.S16 Q14, D3, D5
|
||||
|
||||
VMULL.U16 Q13, D0, D5
|
||||
VMLAL.U16 Q13, D2, D4
|
||||
VSHR.U32 Q13, Q13, #16
|
||||
VMLAL.S16 Q13, D1, D5
|
||||
VMLAL.S16 Q13, D3, D4
|
||||
|
||||
VSHL.S32 Q12, Q14, #4
|
||||
VADD.I32 Q12, Q12, Q15
|
||||
VSHR.S32 Q12, Q12, #16
|
||||
VUZP.16 D24, D25
|
||||
|
||||
VSHL.S32 Q11, Q13, #4
|
||||
VADD.I32 Q11, Q11, Q15
|
||||
VSHR.S32 Q11, Q11, #16
|
||||
VUZP.16 D22, D23
|
||||
|
||||
VQNEG.S16 D20, D22
|
||||
|
||||
VST1.16 D24[0], [R1]!
|
||||
VST1.16 D24[1], [R1]!
|
||||
VST1.16 D24[2], [R1]!
|
||||
|
||||
VST1.16 D24[0], [R4], R9
|
||||
VST1.16 D24[1], [R4], R9
|
||||
VST1.16 D24[2], [R4], R9
|
||||
|
||||
VST1.16 D22[0], [R3], R9
|
||||
VST1.16 D22[1], [R3], R9
|
||||
VST1.16 D22[2], [R3], R9
|
||||
|
||||
VST1.16 D20[0], [R5]!
|
||||
VST1.16 D20[1], [R5]!
|
||||
VST1.16 D20[2], [R5]!
|
||||
VUZP.16 D6, D7
|
||||
VLD1.16 D0, [R7]!
|
||||
VMULL.U16 Q1, D0, D6
|
||||
VSHR.S32 Q1, Q1, #16
|
||||
VMLAL.S16 Q1, D0, D7
|
||||
VSHL.S32 Q1, Q1, #4
|
||||
|
||||
VADD.I32 Q1, Q1, Q15
|
||||
VSHR.S32 Q1, Q1, #16
|
||||
|
||||
VST1.16 D2[0], [R1]!
|
||||
VST1.16 D2[0], [R4], R9
|
||||
|
||||
LDMFD sp!, {r4-r12, r15}
|
||||
|
||||
|
||||
1071
decoder/armv7/ixheaacd_decorr_filter2.s
Normal file
1071
decoder/armv7/ixheaacd_decorr_filter2.s
Normal file
File diff suppressed because it is too large
Load diff
67
decoder/armv7/ixheaacd_eld_decoder_sbr_pre_twiddle.s
Normal file
67
decoder/armv7/ixheaacd_eld_decoder_sbr_pre_twiddle.s
Normal file
|
|
@ -0,0 +1,67 @@
|
|||
.text
|
||||
.p2align 2
|
||||
.global ia_eld_decoder_sbr_pre_twiddle
|
||||
|
||||
|
||||
ia_eld_decoder_sbr_pre_twiddle:
|
||||
|
||||
|
||||
|
||||
STMFD sp!, {r4-r12, r14}
|
||||
LDR r4, [r0, #0] @Xre = *pXre
|
||||
MOV r3, #62 @Loop count
|
||||
LDR r5, [r1, #0] @Xim = *pXim
|
||||
|
||||
LOOP:
|
||||
LDR r6, [r2], #4 @Load and increment pointer *pTwiddles++ Lower - cosine , higher - sine
|
||||
SUBS r3, r3, #1 @Decrement loop count by 1
|
||||
|
||||
SMULWB r8, r4, r6 @mult32x16in32(Xre, cosine)
|
||||
LSL r8, r8, #1 @Left shift the multiplied value by 1
|
||||
|
||||
SMULWT r10, r5, r6 @mult32x16in32( Xim , sine)
|
||||
|
||||
ADD r12, r8, r10, LSL #1 @mac32x16in32_shl( mult32x16in32_shl(Xre, cosine) , mult32x16in32_shl( Xim , sine))@
|
||||
|
||||
|
||||
SMULWT r7, r4, r6 @mult32x16in32(Xre, sine)
|
||||
LDR r4, [r0, #4] @Load next iteration value Xre = *pXre
|
||||
|
||||
SMULWB r9, r5, r6 @mult32x16in32(Xim, cosine)
|
||||
STR r12, [r0], #4 @Store and increment pointer *pXre++ = re
|
||||
|
||||
LSL r9, r9, #1 @Left shift the multiplied value by 1
|
||||
LDR r5, [r1, #4] @Load next iteration value Xim = *pXim
|
||||
|
||||
|
||||
SUB r14, r9, r7, LSL #1 @sub32(mult32x16in32_shl(Xim, cosine) , mult32x16in32_shl(Xre, sine))
|
||||
|
||||
STR r14, [r1], #4 @Store and increment pointer *pXim++ = im
|
||||
|
||||
BNE LOOP @Check r3 equals 0 and continue
|
||||
|
||||
EPILOUGE:
|
||||
|
||||
LDR r6, [r2], #4
|
||||
|
||||
SMULWB r8, r4, r6
|
||||
LSL r8, r8, #1
|
||||
|
||||
SMULWT r10, r5, r6
|
||||
|
||||
ADD r12, r8, r10, LSL #1
|
||||
|
||||
|
||||
SMULWB r9, r5, r6
|
||||
LSL r9, r9, #1
|
||||
|
||||
SMULWT r7, r4, r6
|
||||
|
||||
SUB r14, r9, r7, LSL #1
|
||||
|
||||
STR r12, [r0], #4
|
||||
STR r14, [r1], #4
|
||||
|
||||
END_LOOP:
|
||||
|
||||
LDMFD sp!, {r4-r12, pc}
|
||||
158
decoder/armv7/ixheaacd_enery_calc_per_subband.s
Normal file
158
decoder/armv7/ixheaacd_enery_calc_per_subband.s
Normal file
|
|
@ -0,0 +1,158 @@
|
|||
@/******************************************************************************
|
||||
@ *
|
||||
@ * Copyright (C) 2018 The Android Open Source Project
|
||||
@ *
|
||||
@ * Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ * you may not use this file except in compliance with the License.
|
||||
@ * You may obtain a copy of the License at:
|
||||
@ *
|
||||
@ * http://www.apache.org/licenses/LICENSE-2.0
|
||||
@ *
|
||||
@ * Unless required by applicable law or agreed to in writing, software
|
||||
@ * distributed under the License is distributed on an "AS IS" BASIS,
|
||||
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
@ * See the License for the specific language governing permissions and
|
||||
@ * limitations under the License.
|
||||
@ *
|
||||
@ *****************************************************************************
|
||||
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
@*/
|
||||
|
||||
|
||||
.text
|
||||
.p2align 2
|
||||
.global ixheaacd_enery_calc_per_subband_armv7
|
||||
|
||||
ixheaacd_enery_calc_per_subband_armv7:
|
||||
STMFD sp!, {r4-r12, r14}
|
||||
|
||||
LDR r10, [sp, #0x34]
|
||||
MOV R4, R2
|
||||
MOV R5, R3
|
||||
MOV R2, R0
|
||||
MOV R3, R1
|
||||
SUB R12, R3, R2
|
||||
LDR r10, [r10, #0]
|
||||
ADD r10, r10, r12, LSL #1
|
||||
LDRSH r9, [r10, #0x20]
|
||||
LDR R1, [sp, #0x28]
|
||||
MOV R1, R1, LSL #1
|
||||
|
||||
SUBS R5, R5, R4
|
||||
LDR R0, [sp, #0x38]
|
||||
LDR R7, [sp, #0x2C]
|
||||
|
||||
|
||||
LDR R8, [sp, #0x30]
|
||||
|
||||
BLE ENDCALC
|
||||
|
||||
|
||||
MOVS R8, R8
|
||||
BEQ HQ_PART
|
||||
|
||||
|
||||
ADD R0, R0, R4, LSL #2
|
||||
ADD R0, R0, R2, LSL #8
|
||||
SUB R2, R3, R2
|
||||
MOV R10, #20
|
||||
|
||||
|
||||
B LP_SBR_LOOP
|
||||
|
||||
HQ_PART:
|
||||
ADD R0, R0, R4, LSL #2
|
||||
ADD R0, R0, R2, LSL #9
|
||||
SUB R2, R3, R2
|
||||
MOV R2, R2, LSL #1
|
||||
MOV R10, #21
|
||||
SUB R1, R1, #1
|
||||
|
||||
|
||||
|
||||
LP_SBR_LOOP:
|
||||
|
||||
MOV R6, #0
|
||||
MOV R8, R0
|
||||
MOVS R11, R2
|
||||
BLE STORE_ZEROES
|
||||
MOV R6, #1
|
||||
|
||||
LOOP1_CALC_MAX:
|
||||
LDR R4, [R8], #0x100
|
||||
LDR R12, [R8], #0x100
|
||||
EOR R4, R4, R4, ASR #31
|
||||
ORR R6, R6, R4
|
||||
EOR R12, R12, R12, ASR #31
|
||||
SUBS R11, R11, #2
|
||||
ORRGE R6, R6, R12
|
||||
BGT LOOP1_CALC_MAX
|
||||
|
||||
CALC_NORM:
|
||||
|
||||
CLZ R6, R6
|
||||
RSBS R14, R6, R10
|
||||
MOV R6, #0
|
||||
MOV R8, R0
|
||||
MOV R11, R2
|
||||
BLE NEG_SHIFT
|
||||
|
||||
LOOP2_APPLY_POS_SHIFT:
|
||||
LDR R4, [R8], #0x100
|
||||
LDR R12, [R8], #0x100
|
||||
SUBS R11, R11, #2
|
||||
MOV R4, R4, ASR R14
|
||||
SMLABB R6, R4, R4, R6
|
||||
MOV R12, R12, ASR R14
|
||||
SMLABB R6, R12, R12, R6
|
||||
BGT LOOP2_APPLY_POS_SHIFT
|
||||
|
||||
B CONVERT_TO_MANT_EXP
|
||||
|
||||
NEG_SHIFT:
|
||||
RSB R12, R14, #0
|
||||
|
||||
LOOP2_APPLY_NEG_SHIFT:
|
||||
LDR R4, [R8], #0x100
|
||||
LDR R3, [R8], #0x100
|
||||
SUBS R11, R11, #2
|
||||
MOV R4, R4, LSL R12
|
||||
SMLABB R6, R4, R4, R6
|
||||
MOV R3, R3, LSL R12
|
||||
SMLABB R6, R3, R3, R6
|
||||
BGT LOOP2_APPLY_NEG_SHIFT
|
||||
|
||||
|
||||
CONVERT_TO_MANT_EXP:
|
||||
SUB R14, R14, #23
|
||||
ADD R0, R0, #4
|
||||
MOVS R6, R6
|
||||
BEQ STORE_ZEROES
|
||||
|
||||
CLZ R12, R6
|
||||
RSB R12, R12, #17
|
||||
MOV R4, R6, ASR R12
|
||||
|
||||
SMULBB R11, R4, R9
|
||||
ADD R12, R12, R14, LSL#1
|
||||
|
||||
MOV R11, R11, ASR #15
|
||||
CMP R11, #0x00008000
|
||||
MVNEQ R11, R11
|
||||
STRH R11, [R7], #2
|
||||
ADD R11, R1, R12
|
||||
STRH R11, [R7], #2
|
||||
SUBS R5, R5, #1
|
||||
BGT LP_SBR_LOOP
|
||||
|
||||
B ENDCALC
|
||||
|
||||
STORE_ZEROES:
|
||||
STR R6, [R7], #4
|
||||
SUBS R5, R5, #1
|
||||
BGT LP_SBR_LOOP
|
||||
|
||||
ENDCALC:
|
||||
|
||||
LDMFD sp!, {r4-r12, r15}
|
||||
|
||||
172
decoder/armv7/ixheaacd_esbr_cos_sin_mod_loop1.s
Normal file
172
decoder/armv7/ixheaacd_esbr_cos_sin_mod_loop1.s
Normal file
|
|
@ -0,0 +1,172 @@
|
|||
@/******************************************************************************
|
||||
@ *
|
||||
@ * Copyright (C) 2018 The Android Open Source Project
|
||||
@ *
|
||||
@ * Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ * you may not use this file except in compliance with the License.
|
||||
@ * You may obtain a copy of the License at:
|
||||
@ *
|
||||
@ * http:@www.apache.org/licenses/LICENSE-2.0
|
||||
@ *
|
||||
@ * Unless required by applicable law or agreed to in writing, software
|
||||
@ * distributed under the License is distributed on an "AS IS" BASIS,
|
||||
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
@ * See the License for the specific language governing permissions and
|
||||
@ * limitations under the License.
|
||||
@ *
|
||||
@ *****************************************************************************
|
||||
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
@*/
|
||||
|
||||
|
||||
.text
|
||||
.p2align 2
|
||||
|
||||
.global ixheaacd_esbr_cos_sin_mod_loop1
|
||||
ixheaacd_esbr_cos_sin_mod_loop1:
|
||||
|
||||
STMFD sp!, {r4-r12, r14}
|
||||
VPUSH {D8-D11}
|
||||
@generating load addresses
|
||||
ADD r4, r0, r1, lsl #3 @psubband1
|
||||
SUB r4, r4, #4
|
||||
ADD r5, r3, r1, lsl #3 @psubband1_t
|
||||
SUB r5, r5, #8
|
||||
MOV r6, r1, ASR #2
|
||||
|
||||
LOOP1:
|
||||
@first part
|
||||
vld1.32 {d0} , [r2]!
|
||||
vrev64.32 d1, d0
|
||||
vld1.32 {d2[0]}, [r0]!
|
||||
ADD r7, r0, #252
|
||||
vld1.32 {d2[1]}, [r7]
|
||||
vld1.32 {d3[0]}, [r4]
|
||||
ADD r7, r4, #256
|
||||
vld1.32 {d3[1]}, [r7]
|
||||
SUB r4, r4, #4
|
||||
|
||||
VMULL.S32 q2, d0, d2 @qsub 2nd
|
||||
VMULL.S32 q3, d0, d3 @add 2nd
|
||||
VMULL.S32 q4, d1, d2 @add 1st
|
||||
VMULL.S32 q5, d1, d3 @qsub 1st
|
||||
|
||||
vadd.I64 q0, q4, q3
|
||||
VQSUB.S64 Q1, Q5, Q2
|
||||
|
||||
VSHRN.I64 D0, Q0, #32
|
||||
VSHRN.I64 D2, Q1, #32
|
||||
VMOV.32 D3, D0
|
||||
VST2.32 {D0[0], D2[0]}, [R3]!
|
||||
ADD r7, r3, #248
|
||||
VST2.32 {D2[1], D3[1]}, [R7]
|
||||
|
||||
@second part
|
||||
vld1.32 {d0} , [r2]!
|
||||
vrev64.32 d1, d0
|
||||
vld1.32 {d2[0]}, [r0]!
|
||||
ADD R7, R0, #252
|
||||
vld1.32 {d2[1]}, [r7]
|
||||
vld1.32 {d3[0]}, [r4]
|
||||
ADD R7, R4, #256
|
||||
vld1.32 {d3[1]}, [r7]
|
||||
SUB r4, r4, #4
|
||||
|
||||
VMULL.S32 q2, d0, d2 @add 2nd
|
||||
VMULL.S32 q3, d0, d3 @sub 2nd
|
||||
VMULL.S32 q4, d1, d2 @sub 1st
|
||||
VMULL.S32 q5, d1, d3 @add 1st
|
||||
|
||||
VADD.I64 Q0, Q5, Q2
|
||||
VQSUB.S64 Q1, Q4, Q3
|
||||
|
||||
VSHRN.I64 D0, Q0, #32
|
||||
VSHRN.I64 D2, Q1, #32
|
||||
VMOV.32 D3, D0
|
||||
VST2.32 {D0[0], D2[0]}, [R5]
|
||||
ADD R7, R5, #256
|
||||
VST2.32 {D2[1], D3[1]}, [R7]
|
||||
SUB r5, r5, #8
|
||||
@Third part
|
||||
vld1.32 {d0} , [r2]!
|
||||
vrev64.32 d1, d0
|
||||
vld1.32 {d2[0]}, [r0]!
|
||||
ADD r7, r0, #252
|
||||
vld1.32 {d2[1]}, [r7]
|
||||
vld1.32 {d3[0]}, [r4]
|
||||
ADD r7, r4, #256
|
||||
vld1.32 {d3[1]}, [r7]
|
||||
SUB r4, r4, #4
|
||||
|
||||
VMULL.S32 q2, d0, d2 @qsub 2nd
|
||||
VMULL.S32 q3, d0, d3 @add 2nd
|
||||
VMULL.S32 q4, d1, d2 @add 1st
|
||||
VMULL.S32 q5, d1, d3 @qsub 1st
|
||||
|
||||
vadd.I64 q0, q4, q3
|
||||
VQSUB.S64 Q1, Q5, Q2
|
||||
|
||||
VSHRN.I64 D0, Q0, #32
|
||||
VSHRN.I64 D2, Q1, #32
|
||||
VMOV.32 D3, D0
|
||||
VST2.32 {D0[0], D2[0]}, [R3]!
|
||||
ADD r7, r3, #248
|
||||
VST2.32 {D2[1], D3[1]}, [R7]
|
||||
|
||||
@Fourth part
|
||||
vld1.32 {d0} , [r2]!
|
||||
vrev64.32 d1, d0
|
||||
vld1.32 {d2[0]}, [r0]!
|
||||
ADD R7, R0, #252
|
||||
vld1.32 {d2[1]}, [r7]
|
||||
vld1.32 {d3[0]}, [r4]
|
||||
ADD R7, R4, #256
|
||||
vld1.32 {d3[1]}, [r7]
|
||||
SUB r4, r4, #4
|
||||
|
||||
VMULL.S32 q2, d0, d2 @add 2nd
|
||||
VMULL.S32 q3, d0, d3 @sub 2nd
|
||||
VMULL.S32 q4, d1, d2 @sub 1st
|
||||
VMULL.S32 q5, d1, d3 @add 1st
|
||||
|
||||
VADD.I64 Q0, Q5, Q2
|
||||
VQSUB.S64 Q1, Q4, Q3
|
||||
|
||||
VSHRN.I64 D0, Q0, #32
|
||||
VSHRN.I64 D2, Q1, #32
|
||||
VMOV.32 D3, D0
|
||||
VST2.32 {D0[0], D2[0]}, [R5]
|
||||
ADD R7, R5, #256
|
||||
SUBS R6, R6, #1
|
||||
VST2.32 {D2[1], D3[1]}, [R7]
|
||||
SUB r5, r5, #8
|
||||
|
||||
BGT LOOP1
|
||||
VPOP {D8-D11}
|
||||
LDMFD sp!, {r4-r12, r15}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
180
decoder/armv7/ixheaacd_esbr_cos_sin_mod_loop2.s
Normal file
180
decoder/armv7/ixheaacd_esbr_cos_sin_mod_loop2.s
Normal file
|
|
@ -0,0 +1,180 @@
|
|||
@/******************************************************************************
|
||||
@ *
|
||||
@ * Copyright (C) 2018 The Android Open Source Project
|
||||
@ *
|
||||
@ * Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ * you may not use this file except in compliance with the License.
|
||||
@ * You may obtain a copy of the License at:
|
||||
@ *
|
||||
@ * http:@www.apache.org/licenses/LICENSE-2.0
|
||||
@ *
|
||||
@ * Unless required by applicable law or agreed to in writing, software
|
||||
@ * distributed under the License is distributed on an "AS IS" BASIS,
|
||||
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
@ * See the License for the specific language governing permissions and
|
||||
@ * limitations under the License.
|
||||
@ *
|
||||
@ *****************************************************************************
|
||||
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
@*/
|
||||
|
||||
|
||||
.text
|
||||
.p2align 2
|
||||
|
||||
.global ixheaacd_esbr_cos_sin_mod_loop2
|
||||
ixheaacd_esbr_cos_sin_mod_loop2:
|
||||
|
||||
STMFD sp!, {r4-r12, r14}
|
||||
VPUSH {D8-D15}
|
||||
@generating load addresses
|
||||
ADD R3, R0, R2, LSL #3 @psubband1 = &subband[2 * M - 1];
|
||||
SUB R3, R3, #4
|
||||
ADD R10, R0, #256
|
||||
ADD R11, R10, R2, LSL #3
|
||||
SUB R11, R11, #4
|
||||
MOV R8, #-4
|
||||
LDR R6, [R0]
|
||||
MOV R4, R2, ASR #1 @M_2 = ixheaacd_shr32(M, 1);
|
||||
SUB R4, R4, #1
|
||||
|
||||
ASR R6, R6, #1 @*psubband = *psubband >> 1;
|
||||
VLD1.32 {D2[0]}, [R3]
|
||||
|
||||
STR R6, [R0], #4 @psubband++;
|
||||
LDR R7, [R0]
|
||||
ASR R7, R7, #1
|
||||
RSB R6, R7, #0
|
||||
STR R6, [R3], #-4
|
||||
VLD1.32 {D3[0]}, [R3] @ im = *psubband1;
|
||||
|
||||
VLD2.32 {D0[0], D1[0]}, [R1]!
|
||||
VDUP.32 D0, D0[0]
|
||||
VDUP.32 D1, D1[0]
|
||||
|
||||
VLD1.32 {D2[1]}, [R11] @re = *psubband12;
|
||||
|
||||
LDR R6, [R10]
|
||||
ASR R7, R6, #1
|
||||
MOV R9, #0
|
||||
QSUB R7, R9, R7
|
||||
|
||||
STR R7, [R11], #-4
|
||||
|
||||
LDR R6, [R10, #4]
|
||||
ASR R6, R6, #1
|
||||
STR R6, [R10], #4
|
||||
|
||||
VLD1.32 {D3[1]}, [R11]
|
||||
|
||||
VMULL.S32 q2, d0, d2 @qsub 2nd
|
||||
VMULL.S32 q3, d0, d3 @add 2nd
|
||||
VMULL.S32 q4, d1, d2 @add 1st
|
||||
VMULL.S32 q5, d1, d3 @qsub 1st
|
||||
|
||||
vadd.I64 q6, q4, q3
|
||||
VQSUB.S64 Q7, Q5, Q2
|
||||
VQSUB.S64 Q8, Q2, Q5
|
||||
|
||||
VSHRN.I64 D12, Q6, #32
|
||||
VSHRN.I64 D14, Q7, #32
|
||||
VSHRN.I64 D16, Q8, #32
|
||||
|
||||
VST1.32 {D12[0]}, [R3], R8
|
||||
|
||||
VST1.32 {D14[0]}, [R0]!
|
||||
|
||||
VQNEG.S32 D12, D12
|
||||
|
||||
|
||||
VST1.32 {D12[1]}, [R10]!
|
||||
|
||||
VST1.32 {D16[1]}, [R11], R8
|
||||
|
||||
LOOP1:
|
||||
VLD1.32 {D2}, [R0]
|
||||
VLD1.32 {D3}, [R10]
|
||||
LDR R5, [R3] @RE2
|
||||
LDR R6, [R11] @RE3
|
||||
VTRN.32 D2, D3
|
||||
|
||||
VMULL.S32 q2, d0, d2 @qsub 2nd
|
||||
VMULL.S32 q3, d0, d3 @add 2nd
|
||||
VMULL.S32 q4, d1, d2 @add 1st
|
||||
VMULL.S32 q5, d1, d3 @qsub 1st
|
||||
|
||||
vadd.I64 q6, q4, q3
|
||||
VQSUB.S64 Q7, Q2, Q5
|
||||
VQSUB.S64 Q8, Q5, Q2
|
||||
|
||||
VSHRN.I64 D12, Q6, #32
|
||||
VSHRN.I64 D14, Q7, #32
|
||||
VSHRN.I64 D16, Q8, #32
|
||||
|
||||
VST1.32 {D12[0]}, [R0]!
|
||||
VST1.32 {D14[0]}, [R3], R8
|
||||
VQNEG.S32 D12, D12
|
||||
|
||||
VST1.32 {D12[1]}, [R11], R8
|
||||
VST1.32 {D16[1]}, [R10]!
|
||||
|
||||
@ second part
|
||||
VLD2.32 {D0[0], D1[0]}, [R1]!
|
||||
VDUP.32 D0, D0[0]
|
||||
VDUP.32 D1, D1[0]
|
||||
|
||||
VMOV D3, R5, R6
|
||||
VLD1.32 {D2[0]}, [R3]
|
||||
VLD1.32 {D2[1]}, [R11]
|
||||
|
||||
VMULL.S32 q2, d0, d2 @qsub 2nd
|
||||
VMULL.S32 q3, d0, d3 @add 2nd
|
||||
VMULL.S32 q4, d1, d2 @add 1st
|
||||
VMULL.S32 q5, d1, d3 @qsub 1st
|
||||
|
||||
vadd.I64 q6, q2, q5
|
||||
VQSUB.S64 Q7, Q4, Q3
|
||||
VQSUB.S64 Q8, Q3, Q4
|
||||
|
||||
VSHRN.I64 D12, Q6, #32
|
||||
VSHRN.I64 D14, Q7, #32
|
||||
VSHRN.I64 D16, Q8, #32
|
||||
|
||||
VST1.32 {D12[0]}, [R3], R8
|
||||
VST1.32 {D14[0]}, [R0]!
|
||||
|
||||
VQNEG.S32 D12, D12
|
||||
|
||||
subs r4, r4, #1
|
||||
VST1.32 {D12[1]}, [R10]!
|
||||
VST1.32 {D16[1]}, [R11], R8
|
||||
|
||||
BGT LOOP1
|
||||
VPOP {D8-D15}
|
||||
LDMFD sp!, {r4-r12, r15}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
111
decoder/armv7/ixheaacd_esbr_fwd_modulation.s
Normal file
111
decoder/armv7/ixheaacd_esbr_fwd_modulation.s
Normal file
|
|
@ -0,0 +1,111 @@
|
|||
@/******************************************************************************
|
||||
@ *
|
||||
@ * Copyright (C) 2018 The Android Open Source Project
|
||||
@ *
|
||||
@ * Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ * you may not use this file except in compliance with the License.
|
||||
@ * You may obtain a copy of the License at:
|
||||
@ *
|
||||
@ * http://www.apache.org/licenses/LICENSE-2.0
|
||||
@ *
|
||||
@ * Unless required by applicable law or agreed to in writing, software
|
||||
@ * distributed under the License is distributed on an "AS IS" BASIS,
|
||||
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
@ * See the License for the specific language governing permissions and
|
||||
@ * limitations under the License.
|
||||
@ *
|
||||
@ *****************************************************************************
|
||||
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
@*/
|
||||
|
||||
|
||||
.text
|
||||
.p2align 2
|
||||
.extern ixheaacd_esbr_cos_sin_mod
|
||||
.hidden ixheaacd_esbr_cos_sin_mod
|
||||
.global ixheaacd_esbr_fwd_modulation
|
||||
ixheaacd_esbr_fwd_modulation:
|
||||
|
||||
STMFD sp!, {r4-r12, lr}
|
||||
VPUSH {D8 - D15}
|
||||
LDR R4, [R3]
|
||||
ADD R5, R0, R4, LSL #3
|
||||
MOV R6, R1
|
||||
MOV R7, R2
|
||||
|
||||
LOOP1:
|
||||
SUB R5, R5, #32
|
||||
VLD1.32 {D0, D1, D2, D3}, [R0]!
|
||||
VLD1.32 {D4, D5, D6, D7}, [R5]
|
||||
VSHR.S32 Q0, Q0, #4
|
||||
VSHR.S32 Q1, Q1, #4
|
||||
VSHR.S32 Q2, Q2, #4
|
||||
VSHR.S32 Q3, Q3, #4
|
||||
|
||||
vswp d4, d7
|
||||
vswp d5, d6
|
||||
|
||||
vrev64.32 q2, q2
|
||||
vrev64.32 q3, q3
|
||||
|
||||
VQSUB.S32 Q4, Q0, Q2
|
||||
VQSUB.S32 Q5, Q1, Q3
|
||||
|
||||
VADD.S32 Q6, Q0, Q2
|
||||
VADD.S32 Q7, Q1, Q3
|
||||
|
||||
SUBS R4, R4, #8
|
||||
VST1.32 {D8, D9, D10, D11}, [R6]!
|
||||
VST1.32 {D12, D13, D14, D15}, [R7]!
|
||||
|
||||
BGT LOOP1
|
||||
STMFD sp!, {r0-r3, lr}
|
||||
LDR R4, [SP, #124]
|
||||
MOV R0, R1
|
||||
MOV R1, R3
|
||||
ldr R5, =0x41FC
|
||||
ADD R2, R4, R5
|
||||
ADD R3, R4, #0xB8
|
||||
|
||||
BL ixheaacd_esbr_cos_sin_mod
|
||||
|
||||
LDMFD sp!, {r0-r3, r14}
|
||||
|
||||
LDR R0, [R3, #0x5C]
|
||||
LDRSH R4, [R3, #0x2C]
|
||||
LDRSH R5, [R3, #0x2A]
|
||||
|
||||
SUB R4, R4, R5
|
||||
|
||||
LOOP2:
|
||||
VLD2.32 {D0, D1}, [R0]!
|
||||
VLD1.32 {D2}, [R1]
|
||||
VLD1.32 {D3}, [R2]
|
||||
|
||||
VMULL.S32 q2, d0, d2
|
||||
VMULL.S32 q3, d0, d3
|
||||
VMULL.S32 q4, d1, d2
|
||||
VMULL.S32 q5, d1, d3
|
||||
|
||||
VADD.I64 Q0, Q2, Q5
|
||||
VQSUB.S64 Q1, Q3, Q4
|
||||
|
||||
VSHRN.I64 D0, Q0, #31
|
||||
VSHRN.I64 D2, Q1, #31
|
||||
|
||||
SUBS R4, R4, #2
|
||||
VST1.32 {D0}, [R1]!
|
||||
VST1.32 {D2}, [R2]!
|
||||
|
||||
BGT LOOP2
|
||||
|
||||
VPOP {D8-D15}
|
||||
LDMFD sp!, {r4-r12, r15}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
409
decoder/armv7/ixheaacd_esbr_qmfsyn64_winadd.s
Normal file
409
decoder/armv7/ixheaacd_esbr_qmfsyn64_winadd.s
Normal file
|
|
@ -0,0 +1,409 @@
|
|||
@VOID ixheaacd_esbr_qmfsyn64_winadd(
|
||||
@WORD32 *tmp1,
|
||||
@WORD32 *tmp2,
|
||||
@WORD32 *inp1,
|
||||
@WORD32 *sample_buffer,
|
||||
@WORD32 ch_fac)
|
||||
@R0->Word32 *tmp1
|
||||
@R1->Word32 *tmp2
|
||||
@R2->Word32 *inp1
|
||||
@R3->Word32 *sample_buffer
|
||||
@R5->ch_fac
|
||||
|
||||
.text
|
||||
.p2align 2
|
||||
.global ixheaacd_esbr_qmfsyn64_winadd
|
||||
|
||||
ixheaacd_esbr_qmfsyn64_winadd: @ PROC
|
||||
|
||||
STMFD sp!, {R4-R12, R14}
|
||||
VPUSH {D8- D15}
|
||||
LDR R5, [SP, #104]
|
||||
|
||||
MOV R7, #0
|
||||
VLD1.32 {D0, D1}, [R0]!
|
||||
MOV R12, R2
|
||||
|
||||
VDUP.32 Q15, R7
|
||||
VLD1.32 {D2, D3}, [R2]!
|
||||
|
||||
MOV R10, R0
|
||||
MOV R11, R2
|
||||
ADD R0, R0, #1008
|
||||
ADD R2, R2, #496
|
||||
|
||||
MOV R6, #64
|
||||
MOV R6, R6, LSL #2
|
||||
ADD R12, R12, R6
|
||||
MOV R7, #256
|
||||
MOV R9, R7, LSL #1
|
||||
ADD R1, R1, R9
|
||||
MOV R6, #64
|
||||
MOV R7, #256
|
||||
MOV R9, R7, LSL #1 @(256*2)
|
||||
MOV R7, #512
|
||||
MOV R8, R7, LSL #1 @(512*2)
|
||||
|
||||
MOV R5, R5, LSL #2
|
||||
VMOV Q13, Q15
|
||||
VMOV Q14, Q15
|
||||
|
||||
VMLAL.S32 Q13, D0, D2
|
||||
VMLAL.S32 Q14, D1, D3
|
||||
|
||||
VLD1.32 {D4, D5}, [R0], R8
|
||||
VLD1.32 {D6, D7}, [R2], R9
|
||||
|
||||
VMLAL.S32 Q13, D6, D4
|
||||
VMLAL.S32 Q14, D7, D5
|
||||
|
||||
VLD1.32 {D8, D9}, [R0], R8
|
||||
VLD1.32 {D10, D11}, [R2], R9
|
||||
|
||||
VMLAL.S32 Q13, D10, D8
|
||||
VMLAL.S32 Q14, D11, D9
|
||||
|
||||
VLD1.32 {D12, D13}, [R0], R8
|
||||
VLD1.32 {D14, D15}, [R2], R9
|
||||
|
||||
VMLAL.S32 Q13, D12, D14
|
||||
VMLAL.S32 Q14, D13, D15
|
||||
|
||||
VLD1.32 {D16, D17}, [R0], R8
|
||||
VLD1.32 {D18, D19}, [R2], R9
|
||||
|
||||
VMLAL.S32 Q13, D16, D18
|
||||
VMLAL.S32 Q14, D17, D19
|
||||
|
||||
MOV R0, R10
|
||||
|
||||
|
||||
MOV R2, R11
|
||||
VLD1.32 {D0, D1}, [R1]!
|
||||
MOV R10, R1
|
||||
VLD1.32 {D2, D3}, [R12]!
|
||||
ADD R1, R1, #1008
|
||||
MOV R11, R12
|
||||
|
||||
VMLAL.S32 Q13, D0, D2
|
||||
VMLAL.S32 Q14, D1, D3
|
||||
VLD1.32 {D4, D5}, [R1], R8
|
||||
ADD R12, R12, #496
|
||||
|
||||
VLD1.32 {D6, D7}, [R12], R9
|
||||
|
||||
VMLAL.S32 Q13, D6, D4
|
||||
VMLAL.S32 Q14, D7, D5
|
||||
|
||||
VLD1.32 {D8, D9}, [R1], R8
|
||||
VLD1.32 {D10, D11}, [R12], R9
|
||||
|
||||
VMLAL.S32 Q13, D10, D8
|
||||
VMLAL.S32 Q14, D11, D9
|
||||
|
||||
VLD1.32 {D12, D13}, [R1], R8
|
||||
VLD1.32 {D14, D15}, [R12], R9
|
||||
|
||||
VMLAL.S32 Q13, D12, D14
|
||||
VMLAL.S32 Q14, D13, D15
|
||||
|
||||
VLD1.32 {D16, D17}, [R1], R8
|
||||
VLD1.32 {D18, D19}, [R12], R9
|
||||
|
||||
VMLAL.S32 Q13, D16, D18
|
||||
VMLAL.S32 Q14, D17, D19
|
||||
|
||||
VSHRN.S64 D26 , Q13, #31
|
||||
|
||||
VST1.32 D26[0], [R3], R5
|
||||
VST1.32 D26[1], [R3], R5
|
||||
|
||||
VSHRN.S64 D27 , Q14, #31
|
||||
|
||||
VST1.32 D27[0], [R3], R5
|
||||
VST1.32 D27[1], [R3], R5
|
||||
|
||||
SUB R6, R6, #8
|
||||
LOOP_1:
|
||||
|
||||
VLD1.32 {D0, D1}, [R0]!
|
||||
MOV R12, R11
|
||||
MOV R1, R10
|
||||
VLD1.32 {D2, D3}, [R2]!
|
||||
MOV R10, R0
|
||||
|
||||
ADD R0, R0, #1008
|
||||
|
||||
MOV R11, R2
|
||||
ADD R2, R2, #496
|
||||
|
||||
|
||||
VMOV Q13, Q15
|
||||
VMOV Q14, Q15
|
||||
|
||||
VMLAL.S32 Q13, D0, D2
|
||||
VMLAL.S32 Q14, D1, D3
|
||||
|
||||
VLD1.32 {D4, D5}, [R0], R8
|
||||
VLD1.32 {D6, D7}, [R2], R9
|
||||
|
||||
VMLAL.S32 Q13, D6, D4
|
||||
VMLAL.S32 Q14, D7, D5
|
||||
|
||||
VLD1.32 {D8, D9}, [R0], R8
|
||||
VLD1.32 {D10, D11}, [R2], R9
|
||||
|
||||
VMLAL.S32 Q13, D10, D8
|
||||
VMLAL.S32 Q14, D11, D9
|
||||
|
||||
VLD1.32 {D12, D13}, [R0], R8
|
||||
VLD1.32 {D14, D15}, [R2], R9
|
||||
|
||||
VMLAL.S32 Q13, D12, D14
|
||||
VMLAL.S32 Q14, D13, D15
|
||||
|
||||
VLD1.32 {D16, D17}, [R0], R8
|
||||
VLD1.32 {D18, D19}, [R2], R9
|
||||
|
||||
VMLAL.S32 Q13, D16, D18
|
||||
VMLAL.S32 Q14, D17, D19
|
||||
|
||||
MOV R0, R10
|
||||
|
||||
|
||||
MOV R2, R11
|
||||
VLD1.32 {D0, D1}, [R1]!
|
||||
MOV R10, R1
|
||||
VLD1.32 {D2, D3}, [R12]!
|
||||
ADD R1, R1, #1008
|
||||
MOV R11, R12
|
||||
|
||||
VMLAL.S32 Q13, D0, D2
|
||||
VMLAL.S32 Q14, D1, D3
|
||||
VLD1.32 {D4, D5}, [R1], R8
|
||||
ADD R12, R12, #496
|
||||
|
||||
VLD1.32 {D6, D7}, [R12], R9
|
||||
|
||||
VMLAL.S32 Q13, D6, D4
|
||||
VMLAL.S32 Q14, D7, D5
|
||||
|
||||
VLD1.32 {D8, D9}, [R1], R8
|
||||
VLD1.32 {D10, D11}, [R12], R9
|
||||
|
||||
VMLAL.S32 Q13, D10, D8
|
||||
VMLAL.S32 Q14, D11, D9
|
||||
|
||||
VLD1.32 {D12, D13}, [R1], R8
|
||||
VLD1.32 {D14, D15}, [R12], R9
|
||||
|
||||
VMLAL.S32 Q13, D12, D14
|
||||
VMLAL.S32 Q14, D13, D15
|
||||
|
||||
VLD1.32 {D16, D17}, [R1], R8
|
||||
VLD1.32 {D18, D19}, [R12], R9
|
||||
|
||||
VMLAL.S32 Q13, D16, D18
|
||||
VMLAL.S32 Q14, D17, D19
|
||||
|
||||
VSHRN.S64 D26 , Q13, #31
|
||||
|
||||
VST1.32 D26[0], [R3], R5
|
||||
VST1.32 D26[1], [R3], R5
|
||||
|
||||
VSHRN.S64 D27 , Q14, #31
|
||||
|
||||
VST1.32 D27[0], [R3], R5
|
||||
VST1.32 D27[1], [R3], R5
|
||||
@@@
|
||||
VLD1.32 {D0, D1}, [R0]!
|
||||
MOV R12, R11
|
||||
MOV R1, R10
|
||||
VLD1.32 {D2, D3}, [R2]!
|
||||
MOV R10, R0
|
||||
|
||||
VMOV Q13, Q15
|
||||
VMLAL.S32 Q13, D0, D2
|
||||
VMOV Q14, Q15
|
||||
VMLAL.S32 Q14, D1, D3
|
||||
|
||||
ADD R0, R0, #1008
|
||||
|
||||
MOV R11, R2
|
||||
VLD1.32 {D4, D5}, [R0], R8
|
||||
ADD R2, R2, #496
|
||||
|
||||
|
||||
VLD1.32 {D6, D7}, [R2], R9
|
||||
|
||||
VMLAL.S32 Q13, D6, D4
|
||||
VMLAL.S32 Q14, D7, D5
|
||||
|
||||
VLD1.32 {D8, D9}, [R0], R8
|
||||
VLD1.32 {D10, D11}, [R2], R9
|
||||
|
||||
VMLAL.S32 Q13, D8, D10
|
||||
VMLAL.S32 Q14, D9, D11
|
||||
|
||||
VLD1.32 {D12, D13}, [R0], R8
|
||||
VLD1.32 {D14, D15}, [R2], R9
|
||||
|
||||
VMLAL.S32 Q13, D12, D14
|
||||
VMLAL.S32 Q14, D13, D15
|
||||
|
||||
VLD1.32 {D16, D17}, [R0], R8
|
||||
VLD1.32 {D18, D19}, [R2], R9
|
||||
|
||||
VMLAL.S32 Q13, D16, D18
|
||||
VMLAL.S32 Q14, D17, D19
|
||||
|
||||
MOV R0, R10
|
||||
|
||||
|
||||
MOV R2, R11
|
||||
VLD1.32 {D0, D1}, [R1]!
|
||||
|
||||
MOV R10, R1
|
||||
VLD1.32 {D2, D3}, [R12]!
|
||||
ADD R1, R1, #1008
|
||||
|
||||
VMLAL.S32 Q13, D0, D2
|
||||
VMLAL.S32 Q14, D1, D3
|
||||
|
||||
MOV R11, R12
|
||||
VLD1.32 {D4, D5}, [R1], R8
|
||||
ADD R12, R12, #496
|
||||
|
||||
|
||||
VLD1.32 {D6, D7}, [R12], R9
|
||||
|
||||
VMLAL.S32 Q13, D6, D4
|
||||
VMLAL.S32 Q14, D7, D5
|
||||
|
||||
VLD1.32 {D8, D9}, [R1], R8
|
||||
VLD1.32 {D10, D11}, [R12], R9
|
||||
|
||||
VMLAL.S32 Q13, D8, D10
|
||||
VMLAL.S32 Q14, D9, D11
|
||||
|
||||
VLD1.32 {D12, D13}, [R1], R8
|
||||
VLD1.32 {D14, D15}, [R12], R9
|
||||
|
||||
VMLAL.S32 Q13, D12, D14
|
||||
VMLAL.S32 Q14, D13, D15
|
||||
|
||||
VLD1.32 {D16, D17}, [R1], R8
|
||||
VLD1.32 {D18, D19}, [R12], R9
|
||||
|
||||
VMLAL.S32 Q13, D16, D18
|
||||
VMLAL.S32 Q14, D17, D19
|
||||
|
||||
VSHRN.S64 D26 , Q13, #31
|
||||
|
||||
VST1.32 D26[0], [R3], R5
|
||||
VST1.32 D26[1], [R3], R5
|
||||
|
||||
VSHRN.S64 D27 , Q14, #31
|
||||
|
||||
VST1.32 D27[0], [R3], R5
|
||||
VST1.32 D27[1], [R3], R5
|
||||
|
||||
SUBS R6, R6, #8 @1
|
||||
|
||||
BGT LOOP_1
|
||||
|
||||
VLD1.32 {D0, D1}, [R0]!
|
||||
MOV R12, R11
|
||||
MOV R1, R10
|
||||
VLD1.32 {D2, D3}, [R2]!
|
||||
MOV R10, R0
|
||||
|
||||
VMOV Q13, Q15
|
||||
VMLAL.S32 Q13, D0, D2
|
||||
VMOV Q14, Q15
|
||||
VMLAL.S32 Q14, D1, D3
|
||||
|
||||
ADD R0, R0, #1008
|
||||
|
||||
MOV R11, R2
|
||||
VLD1.32 {D4, D5}, [R0], R8
|
||||
ADD R2, R2, #496
|
||||
|
||||
|
||||
VLD1.32 {D6, D7}, [R2], R9
|
||||
|
||||
VMLAL.S32 Q13, D6, D4
|
||||
VMLAL.S32 Q14, D7, D5
|
||||
|
||||
VLD1.32 {D8, D9}, [R0], R8
|
||||
VLD1.32 {D10, D11}, [R2], R9
|
||||
|
||||
VMLAL.S32 Q13, D8, D10
|
||||
VMLAL.S32 Q14, D9, D11
|
||||
|
||||
VLD1.32 {D12, D13}, [R0], R8
|
||||
VLD1.32 {D14, D15}, [R2], R9
|
||||
|
||||
VMLAL.S32 Q13, D12, D14
|
||||
VMLAL.S32 Q14, D13, D15
|
||||
|
||||
VLD1.32 {D16, D17}, [R0], R8
|
||||
VLD1.32 {D18, D19}, [R2], R9
|
||||
|
||||
VMLAL.S32 Q13, D16, D18
|
||||
VMLAL.S32 Q14, D17, D19
|
||||
|
||||
MOV R0, R10
|
||||
|
||||
|
||||
MOV R2, R11
|
||||
VLD1.32 {D0, D1}, [R1]!
|
||||
|
||||
MOV R10, R1
|
||||
VLD1.32 {D2, D3}, [R12]!
|
||||
ADD R1, R1, #1008
|
||||
|
||||
VMLAL.S32 Q13, D0, D2
|
||||
VMLAL.S32 Q14, D1, D3
|
||||
|
||||
MOV R11, R12
|
||||
VLD1.32 {D4, D5}, [R1], R8
|
||||
ADD R12, R12, #496
|
||||
|
||||
|
||||
VLD1.32 {D6, D7}, [R12], R9
|
||||
|
||||
VMLAL.S32 Q13, D6, D4
|
||||
VMLAL.S32 Q14, D7, D5
|
||||
|
||||
VLD1.32 {D8, D9}, [R1], R8
|
||||
VLD1.32 {D10, D11}, [R12], R9
|
||||
|
||||
VMLAL.S32 Q13, D8, D10
|
||||
VMLAL.S32 Q14, D9, D11
|
||||
|
||||
VLD1.32 {D12, D13}, [R1], R8
|
||||
VLD1.32 {D14, D15}, [R12], R9
|
||||
|
||||
VMLAL.S32 Q13, D12, D14
|
||||
VMLAL.S32 Q14, D13, D15
|
||||
|
||||
VLD1.32 {D16, D17}, [R1], R8
|
||||
VLD1.32 {D18, D19}, [R12], R9
|
||||
|
||||
VMLAL.S32 Q13, D16, D18
|
||||
VMLAL.S32 Q14, D17, D19
|
||||
|
||||
VSHRN.S64 D26 , Q13, #31
|
||||
|
||||
VST1.32 D26[0], [R3], R5
|
||||
VST1.32 D26[1], [R3], R5
|
||||
|
||||
VSHRN.S64 D27, Q14, #31
|
||||
|
||||
VST1.32 D27[0], [R3], R5
|
||||
VST1.32 D27[1], [R3], R5
|
||||
|
||||
VPOP {D8 - D15}
|
||||
LDMFD sp!, {R4-R12, R15}
|
||||
@ ENDP
|
||||
154
decoder/armv7/ixheaacd_esbr_radix4bfly.s
Normal file
154
decoder/armv7/ixheaacd_esbr_radix4bfly.s
Normal file
|
|
@ -0,0 +1,154 @@
|
|||
@/******************************************************************************
|
||||
@ *
|
||||
@ * Copyright (C) 2018 The Android Open Source Project
|
||||
@ *
|
||||
@ * Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ * you may not use this file except in compliance with the License.
|
||||
@ * You may obtain a copy of the License at:
|
||||
@ *
|
||||
@ * http:@www.apache.org/licenses/LICENSE-2.0
|
||||
@ *
|
||||
@ * Unless required by applicable law or agreed to in writing, software
|
||||
@ * distributed under the License is distributed on an "AS IS" BASIS,
|
||||
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
@ * See the License for the specific language governing permissions and
|
||||
@ * limitations under the License.
|
||||
@ *
|
||||
@ *****************************************************************************
|
||||
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
@*/
|
||||
|
||||
|
||||
.global ixheaacd_esbr_radix4bfly
|
||||
|
||||
ixheaacd_esbr_radix4bfly:
|
||||
|
||||
STMFD sp!, {r4-r12, r14}
|
||||
|
||||
SUB sp, sp, #16
|
||||
|
||||
MOV r6, #6
|
||||
MUL r7, r6, r3
|
||||
MOV r4, r3
|
||||
STR r7, [sp]
|
||||
|
||||
|
||||
|
||||
MOV r3, r3, lsl #1
|
||||
|
||||
STR r2, [sp, #8]
|
||||
STR r4, [sp, #12]
|
||||
STR r4, [sp, #4]
|
||||
|
||||
ADD r2, r1, r3, lsl #2
|
||||
ADD r0, r0, #16
|
||||
|
||||
|
||||
RADIX4_OUTLOOP:
|
||||
RADIX4_INLOOP:
|
||||
|
||||
|
||||
LDR r6, [r1]
|
||||
LDR r7, [r2]
|
||||
LDR r8, [r2, r3, lsl #2]
|
||||
LDR r9, [r2, r3, lsl #3]
|
||||
|
||||
ADD r10, r6, r8
|
||||
SUB r11, r6, r8
|
||||
ADD r12, r7, r9
|
||||
SUB r14, r7, r9
|
||||
|
||||
ADD r6, r10, r12
|
||||
SUB r7, r10, r12
|
||||
STR r6, [r1], #4
|
||||
|
||||
LDR r8, [r1]
|
||||
LDR r6, [r2, #4]!
|
||||
LDR r9, [r2, r3, lsl #2]!
|
||||
LDR r10, [r2, r3, lsl #2]!
|
||||
|
||||
ADD r12, r8, r9
|
||||
SUB r8, r8, r9
|
||||
ADD r9, r6, r10
|
||||
SUB r6, r6, r10
|
||||
|
||||
ADD r10, r12, r9
|
||||
STR r10, [r1], #4
|
||||
SUB r12, r12, r9
|
||||
|
||||
ADD r9, r11, r6
|
||||
SUB r10, r11, r6
|
||||
ADD r11, r8, r14
|
||||
LDR r5, [r0], #4
|
||||
LDR r4, [r0], #-12
|
||||
SUB r6, r8, r14
|
||||
|
||||
RSB r5, r5, #0
|
||||
SMULL r14, r8, r10, r5
|
||||
SMLAL r14, r8, r11, r4
|
||||
RSB r5, r5, #0
|
||||
|
||||
MOV r8, r8, lsl #1
|
||||
STR r8, [r2], #-4
|
||||
|
||||
SMULL r14, r8, r10, r4
|
||||
SMLAL r14, r8, r11, r5
|
||||
LDR r11, [r0], #4
|
||||
LDR r4, [r0], #-12
|
||||
MOV r8, r8, lsl #1
|
||||
STR r8, [r2], -r3, lsl #2
|
||||
|
||||
SMULL r10, r8, r7, r4
|
||||
SMLAL r10, r8, r12, r11
|
||||
|
||||
LDR r14, [r0], #4
|
||||
|
||||
MOV r5, r8, lsl #1
|
||||
|
||||
RSB r11, r11, #0
|
||||
SMULL r10, r8, r7, r11
|
||||
SMLAL r10, r8, r12, r4
|
||||
LDR r4, [r0], #36
|
||||
STR r5, [r2], #4
|
||||
MOV r7, r8, lsl #1
|
||||
|
||||
RSB r14, r14, #0
|
||||
SMULL r11, r12, r9, r14
|
||||
SMLAL r11, r12, r6, r4
|
||||
RSB r14, r14, #0
|
||||
|
||||
STR r7, [r2], -r3, lsl #2
|
||||
MOV r12, r12, lsl #1
|
||||
|
||||
SMULL r10, r7, r9, r4
|
||||
SMLAL r10, r7, r6, r14
|
||||
|
||||
STR r12, [r2], #-4
|
||||
MOV r7, r7, lsl #1
|
||||
STR r7, [r2], #8
|
||||
|
||||
LDR r4, [sp, #4]
|
||||
SUBS r4, r4, #1
|
||||
STR r4, [sp, #4]
|
||||
BGT RADIX4_INLOOP
|
||||
|
||||
LDR r8, [sp]
|
||||
LDR r4, [sp, #12]
|
||||
|
||||
LDR r6, [sp, #8]
|
||||
|
||||
|
||||
SUB r0, r0, r8, lsl #2
|
||||
ADD r1, r1, r8, lsl #2
|
||||
ADD r2, r2, r8, lsl #2
|
||||
STR r4, [sp, #4]
|
||||
SUBS r6, r6, #1
|
||||
STR r6, [sp, #8]
|
||||
BGT RADIX4_OUTLOOP
|
||||
|
||||
|
||||
|
||||
ADD sp, sp, #16
|
||||
LDMFD sp!, {r4-r12, r15}
|
||||
|
||||
|
||||
113
decoder/armv7/ixheaacd_expsubbandsamples.s
Normal file
113
decoder/armv7/ixheaacd_expsubbandsamples.s
Normal file
|
|
@ -0,0 +1,113 @@
|
|||
@/******************************************************************************
|
||||
@ *
|
||||
@ * Copyright (C) 2018 The Android Open Source Project
|
||||
@ *
|
||||
@ * Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ * you may not use this file except in compliance with the License.
|
||||
@ * You may obtain a copy of the License at:
|
||||
@ *
|
||||
@ * http://www.apache.org/licenses/LICENSE-2.0
|
||||
@ *
|
||||
@ * Unless required by applicable law or agreed to in writing, software
|
||||
@ * distributed under the License is distributed on an "AS IS" BASIS,
|
||||
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
@ * See the License for the specific language governing permissions and
|
||||
@ * limitations under the License.
|
||||
@ *
|
||||
@ *****************************************************************************
|
||||
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
@*/
|
||||
|
||||
|
||||
.text
|
||||
.p2align 2
|
||||
.global ixheaacd_expsubbandsamples_armv7
|
||||
ixheaacd_expsubbandsamples_armv7:
|
||||
|
||||
STMFD sp!, {r4-r12}
|
||||
|
||||
LDR r7, [sp, #0x24]
|
||||
LDR r10, [sp, #0x28]
|
||||
SUB r11, r3, r2
|
||||
MOV r12, #1
|
||||
CMP r7, r10
|
||||
BGE EXIT
|
||||
|
||||
MOVS r3, r11
|
||||
BEQ EXIT
|
||||
|
||||
|
||||
LDR r4, [sp, #0x2c]
|
||||
CMP r4, #0
|
||||
BEQ HQ_OUTER_LOOP
|
||||
|
||||
|
||||
|
||||
SUB r10, r10, r7
|
||||
ADD r0, r0, r7, LSL #2
|
||||
LDR r1, [r0], #4
|
||||
|
||||
OUTERLOOP:
|
||||
MOV r3, r11
|
||||
ADD r5, r1, r2, LSL #2
|
||||
INLOOP:
|
||||
|
||||
LDR r4, [r5], #4
|
||||
SUBS r3, r3, #2
|
||||
LDRGE r8, [r5], #4
|
||||
EOR r1 , r4 , r4, asr #31
|
||||
ORR r12, r12, r1
|
||||
EORGE r1 , r8 , r8, asr #31
|
||||
ORRGE r12, r12, r1
|
||||
BGT INLOOP
|
||||
|
||||
SUBS r10, r10, #1
|
||||
LDR r1, [r0], #4
|
||||
BGT OUTERLOOP
|
||||
|
||||
|
||||
|
||||
B EXIT
|
||||
|
||||
|
||||
HQ_OUTER_LOOP:
|
||||
LDR r6, [r0, r7, LSL #2]
|
||||
LDR r5, [r1, r7, LSL #2]
|
||||
ADD r6, r6, r2, LSL #2
|
||||
ADD r5, r5, r2, LSL #2
|
||||
MOV r4, r11
|
||||
|
||||
HQ_IN_LOOP:
|
||||
LDR r8, [r6], #4
|
||||
LDR r9, [r5], #4
|
||||
SUBS r4, r4, #2
|
||||
|
||||
EOR r3 , r8 , r8, asr #31
|
||||
ORR r12, r12, r3
|
||||
|
||||
EOR r3 , r9 , r9, asr #31
|
||||
ORR r12, r12, r3
|
||||
|
||||
LDRGE r8, [r6], #4
|
||||
LDRGE r9, [r5], #4
|
||||
|
||||
EORGE r3 , r8 , r8, asr #31
|
||||
ORRGE r12, r12, r3
|
||||
|
||||
EORGE r3 , r9 , r9, asr #31
|
||||
ORRGE r12, r12, r3
|
||||
|
||||
BGT HQ_IN_LOOP
|
||||
|
||||
INLOEN:
|
||||
ADD r7, r7, #1
|
||||
CMP r7, r10
|
||||
BLT HQ_OUTER_LOOP
|
||||
|
||||
EXIT:
|
||||
|
||||
CLZ r0, r12
|
||||
SUB r0, r0, #1
|
||||
LDMFD sp!, {r4-r12}
|
||||
BX lr
|
||||
|
||||
49
decoder/armv7/ixheaacd_ffr_divide16.s
Normal file
49
decoder/armv7/ixheaacd_ffr_divide16.s
Normal file
|
|
@ -0,0 +1,49 @@
|
|||
@/******************************************************************************
|
||||
@ *
|
||||
@ * Copyright (C) 2018 The Android Open Source Project
|
||||
@ *
|
||||
@ * Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ * you may not use this file except in compliance with the License.
|
||||
@ * You may obtain a copy of the License at:
|
||||
@ *
|
||||
@ * http://www.apache.org/licenses/LICENSE-2.0
|
||||
@ *
|
||||
@ * Unless required by applicable law or agreed to in writing, software
|
||||
@ * distributed under the License is distributed on an "AS IS" BASIS,
|
||||
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
@ * See the License for the specific language governing permissions and
|
||||
@ * limitations under the License.
|
||||
@ *
|
||||
@ *****************************************************************************
|
||||
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
@*/
|
||||
|
||||
|
||||
.text
|
||||
.p2align 2
|
||||
.global ixheaacd_fix_div_armv7
|
||||
|
||||
ixheaacd_fix_div_armv7:
|
||||
EOR r12, r0, r1
|
||||
|
||||
MOVS r3, r1, ASR #1
|
||||
RSBMI r3, r3, #0
|
||||
|
||||
MOVS r2, r0, ASR #1
|
||||
RSBMI r2, r2, #0
|
||||
|
||||
MOV r0, #0
|
||||
BEQ L2
|
||||
MOV r1, #0xf
|
||||
L1:
|
||||
MOV r2, r2, LSL #1
|
||||
CMP r2, r3
|
||||
MOV r0, r0, LSL #1
|
||||
ADDCS r0, r0, #1
|
||||
SUBCS r2, r2, r3
|
||||
SUBS r1, r1, #1
|
||||
BGT L1
|
||||
L2:
|
||||
CMP r12, #0
|
||||
RSBLT r0, r0, #0
|
||||
BX lr
|
||||
860
decoder/armv7/ixheaacd_fft32x32_ld.s
Normal file
860
decoder/armv7/ixheaacd_fft32x32_ld.s
Normal file
|
|
@ -0,0 +1,860 @@
|
|||
.text
|
||||
.p2align 2
|
||||
.global DSP_fft32x16_dit
|
||||
|
||||
DSP_fft32x16_dit:
|
||||
|
||||
STMFD sp!, {r4-r12, r14}
|
||||
VPUSH {D8-D15}
|
||||
|
||||
@**************Variables Vs Registers*************************
|
||||
@ r0 = *ptr_w
|
||||
@ r1 = npoints
|
||||
@ r2 = ptr_x and
|
||||
@ r3 = ptr_y
|
||||
@ r4 = pbit_rev_1024 and pdigRevTable
|
||||
@ r5 = pbit_rev_512 and p_data1
|
||||
@ r6 = pbit_rev_128 and p_data2
|
||||
@ r7 = pbit_rev_32 and p_data3
|
||||
@ r8 = power and nstages_4
|
||||
@ r9 = stage_1_count
|
||||
@ r10 = first_stage (8 or 4)
|
||||
@ r11 = p_data4
|
||||
@ r12 = bit reverse value
|
||||
|
||||
|
||||
@ LDR r4,[sp,#0x68]
|
||||
LDR r5, [sp, #0x68]
|
||||
LDR r6, [sp, #0x68+4]
|
||||
LDR r7, [sp, #0x68+8]
|
||||
|
||||
|
||||
@ These conditions can be optimised to lesser number
|
||||
@************************************************************************************
|
||||
|
||||
@COND_1 CMP r1, #0x400 @1024
|
||||
@ BNE COND_2
|
||||
@ @MOV r10, #4 @ because radix 8 first stage is by default
|
||||
@ MOV r8, #4
|
||||
@ B RADIX_4_FIRST_START
|
||||
|
||||
@line 59 "../../algo/aacdec/src/neon_asm/fft32x16ch_neon.s"
|
||||
COND_2: CMP r1, #0x200 @512
|
||||
BNE COND_3
|
||||
@MOV r10, #8
|
||||
MOV r8, #3
|
||||
MOV r4, r5
|
||||
B RADIX_8_FIRST_START
|
||||
|
||||
COND_3: CMP r1, #0x100
|
||||
BNE COND_4
|
||||
@MOV r10, #4
|
||||
MOV r8, #3
|
||||
MOV r4, r5
|
||||
B RADIX_4_FIRST_START
|
||||
|
||||
COND_4: CMP r1, #0x80 @128
|
||||
BNE COND_5
|
||||
@MOV r10, #8
|
||||
MOV r8, #2
|
||||
MOV r4, r6
|
||||
B RADIX_8_FIRST_START
|
||||
|
||||
COND_5: CMP r1, #0x40
|
||||
BNE COND_6
|
||||
@MOV r10, #4
|
||||
MOV r8, #2
|
||||
MOV r4, r6
|
||||
B RADIX_4_FIRST_START
|
||||
COND_6:
|
||||
@MOV r10, #8
|
||||
MOV r8, #1
|
||||
MOV r4, r7
|
||||
@**********************************************************************************
|
||||
|
||||
|
||||
@CMP r10,#4
|
||||
@BEQ RADIX_4_FIRST_START
|
||||
|
||||
RADIX_8_FIRST_START:
|
||||
|
||||
|
||||
LSR r9 , r1, #5 @ LOOP count for first stage
|
||||
LSL r1, r1, #1
|
||||
|
||||
RADIX_8_FIRST_LOOP:
|
||||
|
||||
MOV r5 , r2
|
||||
MOV r6 , r2
|
||||
MOV r7 , r2
|
||||
MOV r11 , r2
|
||||
|
||||
@*************** Register mapping to data ****************************************
|
||||
@ a_data0_r=q0
|
||||
@ a_data0_i=q1
|
||||
@ a_data2_r=q2
|
||||
@ a_data2_i=q3
|
||||
@ a_data4_r=q4
|
||||
@ a_data4_i=q5
|
||||
@ a_data6_r=q6
|
||||
@ a_data6_i=q7
|
||||
|
||||
@ b_data0_r=q8
|
||||
@ b_data0_i=q9
|
||||
@ b_data2_r=q10
|
||||
@ b_data2_i=q11
|
||||
@ b_data4_r=q12
|
||||
@ b_data4_i=q13
|
||||
@ b_data6_r=q14
|
||||
@ b_data6_i=q15
|
||||
|
||||
@*********************************************************************************
|
||||
|
||||
|
||||
LDRB r12, [r4, #0]
|
||||
ADD r5, r5, r12, LSL #3
|
||||
VLD2.32 {d0[0], d2[0]}, [r5] , r1
|
||||
ADD r5, r5, r1
|
||||
VLD2.32 {d8[0], d10[0]}, [r5] , r1
|
||||
SUB r5, r5, r1, LSL #1
|
||||
VLD2.32 {d4[0], d6[0]}, [r5] , r1
|
||||
ADD r5, r5, r1
|
||||
VLD2.32 {d12[0], d14[0]}, [r5], r1
|
||||
SUB r5, r5, r1, LSL #2
|
||||
|
||||
LDRB r12, [r4, #1]
|
||||
ADD r6, r6, r12, LSL #3
|
||||
VLD2.32 {d0[1], d2[1]}, [r6] , r1
|
||||
ADD r6, r6, r1
|
||||
VLD2.32 {d8[1], d10[1]}, [r6] , r1
|
||||
SUB r6, r6, r1, LSL #1
|
||||
VLD2.32 {d4[1], d6[1]}, [r6] , r1
|
||||
ADD r6, r6, r1
|
||||
VLD2.32 {d12[1], d14[1]}, [r6], r1
|
||||
SUB r6, r6, r1, LSL #2
|
||||
|
||||
|
||||
LDRB r12, [r4, #2]
|
||||
ADD r7, r7, r12 , LSL #3
|
||||
VLD2.32 {d1[0], d3[0]}, [r7] , r1
|
||||
ADD r7, r7, r1
|
||||
VLD2.32 {d9[0], d11[0]}, [r7] , r1
|
||||
SUB r7, r7, r1, LSL #1
|
||||
|
||||
LDRB r12, [r4, #3]
|
||||
ADD r11, r11, r12 , LSL #3
|
||||
VLD2.32 {d1[1], d3[1]}, [r11] , r1
|
||||
ADD r11, r11, r1
|
||||
VLD2.32 {d9[1], d11[1]}, [r11] , r1
|
||||
SUB r11, r11, r1, LSL #1
|
||||
|
||||
|
||||
@VHADD.S32 q8, q0, q4 @b_data0_r=vhaddq_s32(a_data0_r_i.val[0],a_data4_r_i.val[0])@
|
||||
VADD.I32 q8, q0, q4 @b_data0_r=vhaddq_s32(a_data0_r_i.val[0],a_data4_r_i.val[0])@
|
||||
VLD2.32 {d5[0], d7[0]}, [r7] , r1
|
||||
ADD r7, r7, r1
|
||||
@VHSUB.S32 q9, q0, q4 @b_data4_r=vhsubq_s32(a_data0_r_i.val[0],a_data4_r_i.val[0])@
|
||||
VSUB.I32 q9, q0, q4 @b_data4_r=vhsubq_s32(a_data0_r_i.val[0],a_data4_r_i.val[0])@
|
||||
VLD2.32 {d13[0], d15[0]}, [r7], r1
|
||||
SUB r7, r7, r1, LSL #2
|
||||
|
||||
|
||||
|
||||
@VHADD.S32 q0, q1, q5 @b_data0_i=vhaddq_s32(a_data0_r_i.val[1],a_data4_r_i.val[1])@
|
||||
VADD.I32 q0, q1, q5 @b_data0_i=vhaddq_s32(a_data0_r_i.val[1],a_data4_r_i.val[1])@
|
||||
VLD2.32 {d5[1], d7[1]}, [r11] , r1
|
||||
ADD r11, r11, r1
|
||||
@VHSUB.S32 q4, q1, q5 @b_data4_i=vhsubq_s32(a_data0_r_i.val[1],a_data4_r_i.val[1])@
|
||||
VSUB.I32 q4, q1, q5 @b_data4_i=vhsubq_s32(a_data0_r_i.val[1],a_data4_r_i.val[1])@
|
||||
VLD2.32 {d13[1], d15[1]}, [r11], r1
|
||||
SUB r11, r11, r1, LSL #2
|
||||
|
||||
|
||||
|
||||
ADD r4, r4, #4
|
||||
|
||||
ADD r5, r5, r1, LSR #1
|
||||
ADD r6, r6, r1, LSR #1
|
||||
ADD r7, r7, r1, LSR #1
|
||||
ADD r11, r11, r1, LSR #1
|
||||
|
||||
@VHADD.S32 q1, q2, q6 @b_data2_r=vhaddq_s32(a_data2_r_i.val[0],a_data6_r_i.val[0])@
|
||||
VADD.I32 q1, q2, q6 @b_data2_r=vhaddq_s32(a_data2_r_i.val[0],a_data6_r_i.val[0])@
|
||||
VLD2.32 {d28[0], d30[0]}, [r5] , r1 @a_data1_r_i=vld2q_lane_s32(__transfersize(2) p_data1,a_data1_r_i,0)@
|
||||
|
||||
@VHSUB.S32 q5, q2, q6 @b_data6_r=vhsubq_s32(a_data2_r_i.val[0],a_data6_r_i.val[0])@
|
||||
VSUB.I32 q5, q2, q6 @b_data6_r=vhsubq_s32(a_data2_r_i.val[0],a_data6_r_i.val[0])@
|
||||
VLD2.32 {d20[0], d22[0]}, [r5] , r1 @a_data3_r_i=vld2q_lane_s32(__transfersize(2) p_data1,a_data3_r_i,0)
|
||||
|
||||
@VHADD.S32 q2, q3, q7 @b_data2_i=vhaddq_s32(a_data2_r_i.val[1],a_data6_r_i.val[1])@
|
||||
VADD.I32 q2, q3, q7 @b_data2_i=vhaddq_s32(a_data2_r_i.val[1],a_data6_r_i.val[1])@
|
||||
VLD2.32 {d24[0], d26[0]}, [r5] , r1 @a_data5_r_i=vld2q_lane_s32(__transfersize(2) p_data1,a_data5_r_i,0)
|
||||
|
||||
@VHSUB.S32 q6, q3, q7 @b_data6_i=vhsubq_s32(a_data2_r_i.val[1],a_data6_r_i.val[1])@
|
||||
VSUB.I32 q6, q3, q7 @b_data6_i=vhsubq_s32(a_data2_r_i.val[1],a_data6_r_i.val[1])@
|
||||
VLD2.32 {d28[1], d30[1]}, [r6] , r1
|
||||
|
||||
VADD.S32 q3, q9, q6 @c_data4_r=vaddq_s32(b_data4_r,b_data6_i)@
|
||||
VLD2.32 {d20[1], d22[1]}, [r6] , r1
|
||||
|
||||
VSUB.S32 q7, q9, q6 @c_data6_r=vsubq_s32(b_data4_r,b_data6_i)@
|
||||
VLD2.32 {d24[1], d26[1]}, [r6] , r1
|
||||
|
||||
VSUB.S32 q6, q4, q5 @c_data4_i=vsubq_s32(b_data4_i,b_data6_r)@
|
||||
VLD2.32 {d29[0], d31[0]}, [r7] , r1
|
||||
|
||||
VADD.S32 q9, q4, q5 @c_data6_i=vaddq_s32(b_data4_i,b_data6_r)@
|
||||
VLD2.32 {d21[0], d23[0]}, [r7] , r1
|
||||
|
||||
VADD.S32 q4, q8, q1 @c_data0_r=vaddq_s32(b_data0_r,b_data2_r)@
|
||||
VLD2.32 {d25[0], d27[0]}, [r7] , r1
|
||||
|
||||
VSUB.S32 q5, q8, q1 @c_data2_r=vsubq_s32(b_data0_r,b_data2_r)@
|
||||
VLD2.32 {d29[1], d31[1]}, [r11] , r1
|
||||
|
||||
VADD.S32 q8, q0, q2 @c_data0_i=vaddq_s32(b_data0_i,b_data2_i)@
|
||||
VLD2.32 {d21[1], d23[1]}, [r11] , r1
|
||||
|
||||
VSUB.S32 q0, q0, q2 @c_data2_i=vsubq_s32(b_data0_i,b_data2_i)@
|
||||
VLD2.32 {d25[1], d27[1]}, [r11] , r1
|
||||
|
||||
|
||||
VPUSH {q3} @ VPUSH(c_data4_r, c_data6_r)
|
||||
VPUSH {q7}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
VLD2.32 {d2[0], d4[0]}, [r5], r1 @a_data7_r_i=vld2q_lane_s32(__transfersize(2) p_data1,a_data7_r_i,0)
|
||||
@VHADD.S32 q7, q14, q12 @b_data1_r=vhaddq_s32(a_data1_r,a_data5_r)@
|
||||
VADD.I32 q7, q14, q12 @b_data1_r=vhaddq_s32(a_data1_r,a_data5_r)@
|
||||
|
||||
VLD2.32 {d2[1], d4[1]}, [r6] , r1
|
||||
@VHSUB.S32 q3, q14, q12 @b_data5_r=vhsubq_s32(a_data1_r,a_data5_r)@
|
||||
VSUB.I32 q3, q14, q12 @b_data5_r=vhsubq_s32(a_data1_r,a_data5_r)@
|
||||
|
||||
VLD2.32 {d3[0], d5[0]}, [r7] , r1
|
||||
@VHADD.S32 q14, q15, q13 @b_data1_i=vhaddq_s32(a_data1_i,a_data5_i)@
|
||||
VADD.I32 q14, q15, q13 @b_data1_i=vhaddq_s32(a_data1_i,a_data5_i)@
|
||||
|
||||
VLD2.32 {d3[1], d5[1]}, [r11] , r1
|
||||
@VHSUB.S32 q12, q15, q13 @b_data5_i=vhsubq_s32(a_data1_i,a_data5_i)@
|
||||
VSUB.I32 q12, q15, q13 @b_data5_i=vhsubq_s32(a_data1_i,a_data5_i)@
|
||||
|
||||
|
||||
|
||||
|
||||
@VHADD.S32 q15, q10,q1 @b_data3_r=vhaddq_s32(a_data3_r,a_data7_r)@
|
||||
@VHSUB.S32 q13, q10,q1 @b_data7_r=vhsubq_s32(a_data3_r,a_data7_r)@
|
||||
@VHADD.S32 q10, q11, q2 @b_data3_i=vhaddq_s32(a_data3_i,a_data7_i)@
|
||||
@VHSUB.S32 q1, q11, q2 @b_data7_i=vhsubq_s32(a_data3_i,a_data7_i)@
|
||||
|
||||
VADD.I32 q15, q10, q1 @b_data3_r=vhaddq_s32(a_data3_r,a_data7_r)@
|
||||
VSUB.I32 q13, q10, q1 @b_data7_r=vhsubq_s32(a_data3_r,a_data7_r)@
|
||||
VADD.I32 q10, q11, q2 @b_data3_i=vhaddq_s32(a_data3_i,a_data7_i)@
|
||||
VSUB.I32 q1, q11, q2 @b_data7_i=vhsubq_s32(a_data3_i,a_data7_i)@
|
||||
|
||||
|
||||
|
||||
VADD.S32 q11, q7, q15 @c_data1_r=vaddq_s32(b_data1_r,b_data3_r)@
|
||||
VSUB.S32 q2, q7, q15 @c_data3_r=vsubq_s32(b_data1_r,b_data3_r)@
|
||||
VADD.S32 q7, q14, q10 @c_data1_i=vaddq_s32(b_data1_i,b_data3_i)@
|
||||
VSUB.S32 q15, q14, q10 @c_data3_i=vsubq_s32(b_data1_i,b_data3_i)@
|
||||
|
||||
VADD.S32 q14, q3, q12 @c_data5_r=vaddq_s32(b_data5_r,b_data5_i)@
|
||||
VSUB.S32 q10, q3, q12 @c_data5_i=vsubq_s32(b_data5_r,b_data5_i)@
|
||||
VADD.S32 q3, q13, q1 @c_data7_r=vaddq_s32(b_data7_r,b_data7_i)@
|
||||
VSUB.S32 q12, q13, q1 @c_data7_i=vsubq_s32(b_data7_r,b_data7_i)@
|
||||
|
||||
VADD.S32 q1 , q14, q12 @b_data5_r=vaddq_s32(c_data7_i,c_data5_r)@
|
||||
VSUB.S32 q13, q14, q12 @b_data7_i=vsubq_s32(c_data5_r,c_data7_i)@
|
||||
VSUB.S32 q12, q3, q10 @b_data5_i=vsubq_s32(c_data7_r,c_data5_i)@
|
||||
|
||||
VUZP.16 d2, d3 @ D0 = b_data5_r_low, D1= b_data5_r_high
|
||||
VADD.S32 q14, q3, q10 @b_data7_r=vaddq_s32(c_data5_i,c_data7_r)@
|
||||
|
||||
VUZP.16 d26, d27
|
||||
VADD.S32 q3, q4, q11 @b_data0_r=vaddq_s32(c_data0_r,c_data1_r)@
|
||||
|
||||
VUZP.16 d24, d25
|
||||
VSUB.S32 q10, q4, q11 @b_data1_r=vsubq_s32(c_data0_r,c_data1_r)@
|
||||
|
||||
VUZP.16 d28, d29
|
||||
VADD.S32 q4, q8, q7 @b_data0_i=vaddq_s32(c_data0_i,c_data1_i)@
|
||||
|
||||
LDR r14, = 0x5a82
|
||||
|
||||
VSUB.S32 q11, q8, q7 @b_data1_i=vsubq_s32(c_data0_i,c_data1_i)@
|
||||
|
||||
VADD.S32 q8, q5, q15 @b_data2_r=vaddq_s32(c_data2_r,c_data3_i)@
|
||||
VSUB.S32 q7, q5, q15 @b_data3_r=vsubq_s32(c_data2_r,c_data3_i)@
|
||||
VSUB.S32 q5, q0, q2 @b_data2_i=vsubq_s32(c_data2_i,c_data3_r)@
|
||||
VADD.S32 q15, q0, q2 @b_data3_i=vaddq_s32(c_data2_i,c_data3_r)@
|
||||
|
||||
VPOP {q0}
|
||||
VPOP {q2}
|
||||
VPUSH {q3-q4}
|
||||
VPUSH {q10}
|
||||
|
||||
|
||||
|
||||
|
||||
@********************************************************************
|
||||
@ b_data5_r = q1 free regs = q3,q4,q5,q7,q8,q10,q11
|
||||
@ b_data5_i = q12
|
||||
@ b_data7_r = q14
|
||||
@ b_data7_i = q13
|
||||
|
||||
@ c_data4_r = q2
|
||||
@ c_data4_i = q6
|
||||
@ c_data6_r = q0
|
||||
@ c_data6_i = q9
|
||||
@********************************************************************
|
||||
|
||||
|
||||
VDUP.16 d20, r14
|
||||
|
||||
|
||||
VMULL.u16 q4, d26, d20
|
||||
VMULL.u16 q3, d28, d20
|
||||
|
||||
VPUSH {q7-q8}
|
||||
VPUSH {q5}
|
||||
|
||||
VSHR.S32 q4, q4, #15
|
||||
VSHR.S32 q3, q3, #15
|
||||
|
||||
VQDMLAL.S16 q4, d27, d20
|
||||
VQDMLAL.S16 q3, d29, d20
|
||||
|
||||
|
||||
VPUSH {q11}
|
||||
|
||||
VMULL.u16 q13, d24, d20
|
||||
VMULL.u16 q14, d2, d20
|
||||
|
||||
VADD.S32 q5, q2, q4 @q5=b_data7_i
|
||||
VSUB.S32 q7, q2, q4 @q7=b_data4_r
|
||||
|
||||
VADD.S32 q8, q6, q3 @q10 = b_data4_i
|
||||
VSUB.S32 q6, q6, q3 @q11 = b_data7_r
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
VSHR.S32 q13, q13, #15
|
||||
VSHR.S32 q14, q14, #15
|
||||
|
||||
VQDMLAL.S16 q13, d25, d20
|
||||
VQDMLAL.S16 q14, d3, d20
|
||||
|
||||
VPOP {q1}
|
||||
VPOP {q10}
|
||||
|
||||
VADD.S32 q2, q0, q13 @q2 = b_data5_i
|
||||
VSUB.S32 q4, q0, q13 @q4 = b_data6_r
|
||||
|
||||
VADD.S32 q11, q9, q14 @q6 = b_data6_i
|
||||
VSUB.S32 q3, q9, q14 @q8 = b_data5_r
|
||||
|
||||
|
||||
|
||||
|
||||
VPOP {q14}
|
||||
VPOP {q9}
|
||||
VPOP {q0}
|
||||
VPOP {q12, q13}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@**************regs maping************
|
||||
@b_data0_r = q12
|
||||
@b_data0_i = q13
|
||||
@b_data1_r = q0
|
||||
@b_data1_i = q1
|
||||
|
||||
@b_data2_r = q9
|
||||
@b_data2_i = q10
|
||||
@b_data3_r = q14
|
||||
@b_data3_i = q15
|
||||
|
||||
@b_data4_r = q7
|
||||
@b_data4_i = q8
|
||||
@b_data5_r = q3
|
||||
@b_data5_i = q2
|
||||
|
||||
@b_data6_r = q4
|
||||
@b_data6_i = q11
|
||||
@b_data7_r = q6
|
||||
@b_data7_i = q5
|
||||
@******************************************
|
||||
|
||||
@shifts added (as dual simd instrn)
|
||||
|
||||
VTRN.32 q12, q5
|
||||
@line 455 "../../algo/aacdec/src/neon_asm/fft32x16ch_neon.s"
|
||||
VSHL.S32 q12, q12, #3 @ch
|
||||
VTRN.32 q9, q2
|
||||
VSHL.S32 q5, q5, #3 @ch
|
||||
|
||||
VSHL.S32 q9, q9, #3 @ch
|
||||
VTRN.32 q0, q7
|
||||
VSHL.S32 q2, q2, #3 @ch
|
||||
|
||||
VSHL.S32 q0, q0, #3 @ch
|
||||
VTRN.32 q14, q4
|
||||
VSHL.S32 q7, q7, #3 @ch
|
||||
|
||||
VSHL.S32 q14, q14, #3 @ch
|
||||
VTRN.32 q13, q6
|
||||
VSHL.S32 q4, q4, #3 @ch
|
||||
|
||||
VSHL.S32 q13, q13, #3 @ch
|
||||
VTRN.32 q10, q3
|
||||
VSHL.S32 q6, q6, #3 @ch
|
||||
|
||||
VSHL.S32 q10, q10, #3 @ch
|
||||
VTRN.32 q1, q8
|
||||
VSHL.S32 q3, q3, #3 @ch
|
||||
|
||||
VSHL.S32 q1, q1, #3 @ch
|
||||
VTRN.32 q15, q11
|
||||
VSHL.S32 q8, q8, #3 @ch
|
||||
|
||||
VSHL.S32 q15, q15, #3 @ch
|
||||
VSWP d18, d25
|
||||
|
||||
VSHL.S32 q11, q11, #3 @ch
|
||||
VSWP d4, d11
|
||||
|
||||
VSWP d1, d28
|
||||
VSWP d15, d8
|
||||
|
||||
VSWP d20, d27
|
||||
VSWP d6, d13
|
||||
|
||||
VSWP d30, d3
|
||||
VSWP d22, d17
|
||||
|
||||
VST2.32 {q12, q13}, [r3]!
|
||||
VST2.32 {q0, q1}, [r3]!
|
||||
|
||||
VST2.32 {q5, q6}, [r3]!
|
||||
VST2.32 {q7, q8}, [r3]!
|
||||
|
||||
VMOV q5, q11
|
||||
|
||||
VST2.32 {q9, q10}, [r3]!
|
||||
VST2.32 {q14, q15}, [r3]!
|
||||
|
||||
VST2.32 {q2, q3}, [r3]!
|
||||
VST2.32 {q4, q5}, [r3]!
|
||||
|
||||
|
||||
SUBS r9, r9, #1
|
||||
BNE RADIX_8_FIRST_LOOP
|
||||
|
||||
LSR r1, r1, #1
|
||||
SUB r3, r1, LSL #3
|
||||
|
||||
MOV r5, #8
|
||||
MOV r4, #32
|
||||
LSR r6, r1, #5
|
||||
|
||||
B RADIX_4_FIRST_ENDS
|
||||
|
||||
RADIX_8_FIRST_ENDS:
|
||||
|
||||
|
||||
|
||||
|
||||
@************************************RADIX 4 FIRST STAGE**********************************
|
||||
|
||||
RADIX_4_FIRST_START:
|
||||
|
||||
|
||||
LSR r9 , r1, #4 @ LOOP count for first stage
|
||||
LSL r1, r1, #1
|
||||
|
||||
RADIX_4_LOOP:
|
||||
|
||||
MOV r5 , r2
|
||||
MOV r6 , r2
|
||||
MOV r7 , r2
|
||||
MOV r11 , r2
|
||||
|
||||
@*************** Register mapping to data ****************************************
|
||||
@ a_data0_r=q0
|
||||
@ a_data0_i=q1
|
||||
@ a_data1_r=q2
|
||||
@ a_data1_i=q3
|
||||
@ a_data2_r=q4
|
||||
@ a_data2_i=q5
|
||||
@ a_data3_r=q6
|
||||
@ a_data4_i=q7
|
||||
|
||||
|
||||
@*********************************************************************************
|
||||
|
||||
|
||||
LDRB r12, [r4, #0]
|
||||
ADD r5, r5, r12, LSL #3
|
||||
|
||||
VLD2.32 {d0[0], d2[0]}, [r5] , r1
|
||||
ADD r5, r5, r1
|
||||
VLD2.32 {d8[0], d10[0]}, [r5] , r1
|
||||
SUB r5, r5, r1, LSL #1
|
||||
VLD2.32 {d4[0], d6[0]}, [r5] , r1
|
||||
ADD r5, r5, r1
|
||||
VLD2.32 {d12[0], d14[0]}, [r5], r1
|
||||
|
||||
LDRB r12, [r4, #1]
|
||||
ADD r6, r6, r12, LSL #3
|
||||
|
||||
VLD2.32 {d0[1], d2[1]}, [r6] , r1
|
||||
ADD r6, r6, r1
|
||||
VLD2.32 {d8[1], d10[1]}, [r6] , r1
|
||||
SUB r6, r6, r1, LSL #1
|
||||
VLD2.32 {d4[1], d6[1]}, [r6] , r1
|
||||
ADD r6, r6, r1
|
||||
VLD2.32 {d12[1], d14[1]}, [r6], r1
|
||||
|
||||
|
||||
LDRB r12, [r4, #2]
|
||||
ADD r7, r7, r12, LSL #3
|
||||
|
||||
VLD2.32 {d1[0], d3[0]}, [r7] , r1
|
||||
ADD r7, r7, r1
|
||||
VLD2.32 {d9[0], d11[0]}, [r7] , r1
|
||||
|
||||
LDRB r12, [r4, #3]
|
||||
ADD r11, r11, r12 , LSL #3
|
||||
|
||||
VLD2.32 {d1[1], d3[1]}, [r11] , r1
|
||||
ADD r11, r11, r1
|
||||
VLD2.32 {d9[1], d11[1]}, [r11] , r1
|
||||
|
||||
|
||||
SUB r7, r7, r1, LSL #1
|
||||
VADD.S32 q8, q0, q4 @b_data0_r=vaddq_s32(a_data0_r,a_data2_r)@
|
||||
VLD2.32 {d5[0], d7[0]}, [r7] , r1
|
||||
ADD r7, r7, r1
|
||||
VADD.S32 q9, q1, q5 @b_data0_i=vaddq_s32(a_data0_i,a_data2_i)@
|
||||
VLD2.32 {d13[0], d15[0]}, [r7], r1
|
||||
|
||||
|
||||
|
||||
SUB r11, r11, r1, LSL #1
|
||||
VSUB.S32 q10, q0, q4 @b_data2_r=vsubq_s32(a_data0_r,a_data2_r)@
|
||||
VLD2.32 {d5[1], d7[1]}, [r11] , r1
|
||||
ADD r11, r11, r1
|
||||
VSUB.S32 q11, q1, q5 @b_data2_i=vsubq_s32(a_data0_i,a_data2_i)@
|
||||
VLD2.32 {d13[1], d15[1]}, [r11], r1
|
||||
|
||||
|
||||
ADD r4, r4, #4
|
||||
|
||||
VADD.S32 q12, q2, q6 @b_data1_r=vaddq_s32(a_data1_r,a_data3_r)@
|
||||
VADD.S32 q13, q3, q7 @b_data1_i=vaddq_s32(a_data1_i,a_data3_i)@
|
||||
VSUB.S32 q14, q2, q6 @b_data3_r=vsubq_s32(a_data1_r,a_data3_r)@
|
||||
VSUB.S32 q15, q3, q7 @b_data3_i=vsubq_s32(a_data1_i,a_data3_i)@
|
||||
|
||||
VADD.S32 q0, q8, q12 @a_data0_r=vaddq_s32(b_data0_r,b_data1_r)@
|
||||
VADD.S32 q1, q9, q13 @a_data0_i=vaddq_s32(b_data0_i,b_data1_i)@
|
||||
VSUB.S32 q2, q8, q12 @a_data1_r=vsubq_s32(b_data0_r,b_data1_r)@
|
||||
VSUB.S32 q3, q9, q13 @a_data1_i=vsubq_s32(b_data0_i,b_data1_i)@
|
||||
|
||||
VADD.S32 q4, q10, q15 @a_data2_r=vaddq_s32(b_data2_r,b_data3_i)@
|
||||
VSUB.S32 q5, q11, q14 @a_data2_i=vsubq_s32(b_data2_i,b_data3_r)@
|
||||
VADD.S32 q7, q11, q14 @a_data3_r=vaddq_s32(b_data2_i,b_data3_r)@
|
||||
VSUB.S32 q6, q10, q15 @a_data3_i=vsubq_s32(b_data2_r,b_data3_i)@
|
||||
|
||||
|
||||
@shifts added
|
||||
|
||||
VTRN.32 q0, q4
|
||||
|
||||
VSHL.S32 q0, q0, #2 @ch
|
||||
VTRN.32 q2, q6
|
||||
VSHL.S32 q4, q4, #2 @ch
|
||||
|
||||
VSHL.S32 q2, q2, #2 @ch
|
||||
VTRN.32 q1, q5 @ch
|
||||
VSHL.S32 q6, q6, #2 @ch
|
||||
|
||||
VSHL.S32 q1, q1, #2 @ch
|
||||
VTRN.32 q3, q7 @ch
|
||||
VSHL.S32 q5, q5, #2 @ch
|
||||
|
||||
VSHL.S32 q3, q3, #2 @ch
|
||||
VSWP d4, d1
|
||||
|
||||
VSHL.S32 q7, q7, #2 @ch
|
||||
VSWP d12, d9
|
||||
|
||||
@VTRN.32 q1, q5
|
||||
@VTRN.32 q3, q7
|
||||
VSWP d6, d3
|
||||
VSWP d14, d11
|
||||
|
||||
|
||||
VST2.32 {q0, q1}, [r3]!
|
||||
VST2.32 {q4, q5}, [r3]!
|
||||
|
||||
VST2.32 {q2, q3}, [r3]!
|
||||
VST2.32 {q6, q7}, [r3]!
|
||||
|
||||
|
||||
|
||||
SUBS r9, r9, #1
|
||||
BNE RADIX_4_LOOP
|
||||
|
||||
LSR r1, r1, #1
|
||||
SUB r3, r1, LSL #3
|
||||
MOV r5, #4
|
||||
MOV r4, #64
|
||||
LSR r6, r1, #4
|
||||
|
||||
|
||||
RADIX_4_FIRST_ENDS:
|
||||
|
||||
|
||||
|
||||
@********************************END OF RADIX 4 FIRST STAGE*******************************
|
||||
|
||||
@*************** register assignment after first radix 8 stage****************************
|
||||
@ r1 = npoints
|
||||
@ r0 = *ptr_w
|
||||
@ r3 = *ptr_y
|
||||
@ r8 = nstages_4
|
||||
@ free regs r2, r4,r5,r6,r7,r9,r10,r11,r12
|
||||
@ r2 = j
|
||||
@ r4 = node_spacing
|
||||
@ r5 = del
|
||||
@ r6 = in_loop_count
|
||||
@ r7 = middle_loop_count (del*node_spacing)
|
||||
@ r9 = p_twiddle_factors
|
||||
@ r10= p_twiddle_factors and inner loop counter
|
||||
@ r11=
|
||||
@ r12=
|
||||
@ r14= *data
|
||||
|
||||
PUSH {r3}
|
||||
|
||||
LSR r5, r5, #2
|
||||
|
||||
OUTER_LOOP_R4:
|
||||
|
||||
LDR r14, [sp]
|
||||
@MOV r14,r3
|
||||
@LSR r7,r5,#0 @,#2
|
||||
MOV r7, r5
|
||||
MOV r2, #0
|
||||
MOV r9, r0
|
||||
LSL r12 , r5, #5
|
||||
MIDDLE_LOOP_R4:
|
||||
|
||||
|
||||
VLD2.16 {d0[0], d1[0]}, [r9], r2 @cos_1 = d0 , sin_1=d1
|
||||
VLD2.16 {d2[0], d3[0]}, [r9], r2 @cos_2 = d2 , sin_2=d3
|
||||
ADD r11, r2, r4, LSL #2
|
||||
VLD2.16 {d4[0], d5[0]}, [r9] @cos_3 = d4 , sin_3=d5
|
||||
ADD r10, r0, r11
|
||||
|
||||
|
||||
VLD2.16 {d0[1], d1[1]}, [r10], r11
|
||||
VLD2.16 {d2[1], d3[1]}, [r10], r11
|
||||
ADD r2, r11, r4, LSL #2
|
||||
VLD2.16 {d4[1], d5[1]}, [r10]
|
||||
ADD r9, r0, r2
|
||||
|
||||
|
||||
VLD2.16 {d0[2], d1[2]}, [r9], r2
|
||||
VLD2.16 {d2[2], d3[2]}, [r9], r2
|
||||
ADD r11, r2, r4, LSL #2
|
||||
VLD2.16 {d4[2], d5[2]}, [r9]
|
||||
ADD r10, r0, r11
|
||||
|
||||
|
||||
|
||||
VLD2.16 {d0[3], d1[3]}, [r10], r11
|
||||
VLD2.16 {d2[3], d3[3]}, [r10], r11
|
||||
ADD r2, r11, r4, LSL #2
|
||||
VLD2.16 {d4[3], d5[3]}, [r10]
|
||||
ADD r9, r0, r2
|
||||
|
||||
MOV r10, r6
|
||||
|
||||
|
||||
|
||||
INNER_LOOP_R4:
|
||||
|
||||
VLD2.32 {q3, q4}, [r14], r12
|
||||
|
||||
VSHR.S32 q3, q3, #1
|
||||
VLD4.16 {q5, q6}, [r14], r12 @a_data1_r_l=d10 , a_data1_r_h=d11, a_data1_i_l=d12, a_data1_i_h=d13
|
||||
VSHR.S32 q4, q4, #1
|
||||
|
||||
VSHR.U16 d10, d10, #1 @a_data1.val[0]= vreinterpret_s16_u16(vshr_n_u16(vreinterpret_u16_s16(a_data1.val[0]), 1))@
|
||||
VLD4.16 {q7, q8}, [r14], r12
|
||||
VSHR.U16 d12, d12, #1 @a_data1.val[2]= vreinterpret_s16_u16(vshr_n_u16(vreinterpret_u16_s16(a_data1.val[2]), 1))@
|
||||
|
||||
VMULL.S16 q11, d10, d0 @prod_1r=vmull_s16(a_data1.val[0], cos_1)@
|
||||
VMLSL.S16 q11, d12, d1 @prod_1r=vmlsl_s16(prod_1r, a_data1.val[2], sin_1)@
|
||||
VLD4.16 {q9, q10}, [r14], r12
|
||||
VMULL.S16 q12, d10, d1 @prod_1i=vmull_s16(a_data1.val[0], sin_1)@
|
||||
VMLAL.S16 q12, d12, d0 @prod_1i=vmlal_s16(prod_1i, a_data1.val[2], cos_1)@
|
||||
|
||||
VSHR.U16 d14, d14, #1 @a_data2.val[0]=vreinterpret_s16_u16(vshr_n_u16(vreinterpret_u16_s16(a_data2.val[0]), 1))@
|
||||
VSHR.U16 d16, d16, #1 @a_data2.val[2]=vreinterpret_s16_u16(vshr_n_u16(vreinterpret_u16_s16(a_data2.val[2]), 1))@
|
||||
|
||||
SUB r14, r14, r12, LSL #2
|
||||
|
||||
VSHR.U16 d18, d18, #1 @a_data3.val[0]= vreinterpret_s16_u16(vshr_n_u16(vreinterpret_u16_s16(a_data3.val[0]), 1))@
|
||||
VSHR.U16 d20, d20, #1 @a_data3.val[2]= vreinterpret_s16_u16(vshr_n_u16(vreinterpret_u16_s16(a_data3.val[2]), 1))@
|
||||
|
||||
VMULL.S16 q13, d14, d2 @prod_2r=vmull_s16(a_data2.val[0], cos_2)@
|
||||
VMLSL.S16 q13, d16, d3 @prod_2r=vmlsl_s16(prod_2r, a_data2.val[2], sin_2)@
|
||||
|
||||
VSHR.S32 q11, q11, #15 @a_data1_r=vshrq_n_s32(prod_1r,15)@
|
||||
|
||||
VMULL.S16 q14, d14, d3 @prod_2i=vmull_s16(a_data2.val[0], sin_2)@
|
||||
VMLAL.S16 q14, d16, d2 @prod_2i=vmlal_s16(prod_2i, a_data2.val[2], cos_2)@
|
||||
|
||||
VMULL.S16 q15, d18, d4 @prod_3r=vmull_s16(a_data3.val[0], cos_3)@
|
||||
VMLSL.S16 q15, d20, d5 @prod_3r=vmlsl_s16(prod_3r, a_data3.val[2], sin_3)@
|
||||
|
||||
VMLAL.S16 q11, d11, d0 @a_data1_r=vmlal_s16(a_data1_r, a_data1.val[1], cos_1)@
|
||||
VMLSL.S16 q11, d13, d1 @a_data1_r=vmlsl_s16(a_data1_r, a_data1.val[3], sin_1)@
|
||||
|
||||
VSHR.S32 q12, q12, #15 @a_data1_i=vshrq_n_s32(prod_1i,15)@
|
||||
VSHR.S32 q13, q13, #15 @a_data2_r=vshrq_n_s32(prod_2r,15)@
|
||||
VSHR.S32 q14, q14, #15 @a_data2_i=vshrq_n_s32(prod_2i,15)@
|
||||
VSHR.S32 q15, q15, #15 @a_data3_r=vshrq_n_s32(prod_3r,15)@
|
||||
|
||||
|
||||
VMLAL.S16 q12, d11, d1 @a_data1_i=vmlal_s16(a_data1_i, a_data1.val[1], sin_1)@
|
||||
VMLAL.S16 q12, d13, d0 @a_data1_i=vmlal_s16(a_data1_i, a_data1.val[3], cos_1)@
|
||||
|
||||
|
||||
VMULL.S16 q5, d18, d5 @prod_3i=vmull_s16(a_data3.val[0], sin_3)@
|
||||
VMLAL.S16 q5, d20, d4 @prod_3i=vmlal_s16(prod_3i, a_data3.val[2], cos_3)@
|
||||
|
||||
|
||||
VMLAL.S16 q13, d15, d2 @a_data2_r=vmlal_s16(a_data2_r, a_data2.val[1], cos_2)@
|
||||
VMLSL.S16 q13, d17, d3 @a_data2_r=vmlsl_s16(a_data2_r, a_data2.val[3], sin_2)@
|
||||
|
||||
VMLAL.S16 q14, d15, d3 @a_data2_i=vmlal_s16(a_data2_i, a_data2.val[1], sin_2)@
|
||||
VMLAL.S16 q14, d17, d2 @a_data2_i=vmlal_s16(a_data2_i, a_data2.val[3], cos_2)@
|
||||
|
||||
|
||||
VMLAL.S16 q15, d19, d4 @a_data3_r=vmlal_s16(a_data3_r, a_data3.val[1], cos_3)@
|
||||
VMLSL.S16 q15, d21, d5 @a_data3_r=vmlsl_s16(a_data3_r, a_data3.val[3], sin_3)@
|
||||
|
||||
VSHR.S32 q5, q5, #15 @a_data3_i=vshrq_n_s32(prod_3i,15)@
|
||||
|
||||
VMLAL.S16 q5, d19, d5 @a_data3_i=vmlal_s16(a_data3_i, a_data3.val[1], sin_3)@
|
||||
VMLAL.S16 q5, d21, d4 @a_data3_i=vmlal_s16(a_data3_i, a_data3.val[3], cos_3)@
|
||||
|
||||
@**********if condition******************
|
||||
|
||||
CMP r7, r5
|
||||
BNE BYPASS_IF
|
||||
|
||||
ADD r14, r14, r12
|
||||
|
||||
LDR r3, [r14], r12
|
||||
ASR r3, r3, #1
|
||||
VMOV.32 d22[0], r3
|
||||
|
||||
LDR r3, [r14], r12
|
||||
ASR r3, r3, #1
|
||||
VMOV.32 d26[0], r3
|
||||
|
||||
LDR r3, [r14]
|
||||
ASR r3, r3, #1
|
||||
VMOV.32 d30[0], r3
|
||||
|
||||
SUB r14, r14, r12, LSL #1
|
||||
ADD r14, r14, #4
|
||||
|
||||
LDR r3, [r14], r12
|
||||
ASR r3, r3, #1
|
||||
VMOV.32 d24[0], r3
|
||||
|
||||
LDR r3, [r14], r12
|
||||
ASR r3, r3, #1
|
||||
VMOV.32 d28[0], r3
|
||||
|
||||
LDR r3, [r14], r12
|
||||
ASR r3, r3, #1
|
||||
VMOV.32 d10[0], r3
|
||||
|
||||
SUB r14, r14, #4
|
||||
|
||||
SUB r14, r14, r12, LSL #2
|
||||
@****************************************
|
||||
BYPASS_IF:
|
||||
|
||||
VADD.S32 q6, q3, q13 @b_data0_r=vaddq_s32(a_data0_r,a_data2_r)@
|
||||
VADD.S32 q7, q4, q14 @b_data0_i=vaddq_s32(a_data0_i,a_data2_i)@
|
||||
VSUB.S32 q3, q3, q13 @b_data2_r=vsubq_s32(a_data0_r,a_data2_r)@
|
||||
VSUB.S32 q4, q4, q14 @b_data2_i=vsubq_s32(a_data0_i,a_data2_i)@
|
||||
VADD.S32 q8, q11, q15 @b_data1_r=vaddq_s32(a_data1_r,a_data3_r)@
|
||||
VADD.S32 q9, q12, q5 @b_data1_i=vaddq_s32(a_data1_i,a_data3_i)@
|
||||
|
||||
VSUB.S32 q15, q11, q15 @b_data3_r=vsubq_s32(a_data1_r,a_data3_r)@
|
||||
VSUB.S32 q14, q12, q5 @b_data3_i=vsubq_s32(a_data1_i,a_data3_i)@
|
||||
|
||||
@line 882 "../../algo/aacdec/src/neon_asm/fft32x16ch_neon.s"
|
||||
VADD.S32 q10, q6, q8 @c_data0_r=vaddq_s32(b_data0_r,b_data1_r)@
|
||||
VADD.S32 q11, q7, q9 @c_data0_i=vaddq_s32(b_data0_i,b_data1_i)@
|
||||
VADD.S32 q12, q3, q14 @c_data2_r=vaddq_s32(b_data2_r,b_data3_i)@
|
||||
VSUB.S32 q13, q4, q15 @c_data2_i=vsubq_s32(b_data2_i,b_data3_r)@
|
||||
|
||||
VSUB.S32 q6, q6, q8 @c_data1_r=vsubq_s32(b_data0_r,b_data1_r)@
|
||||
VST2.32 {q10, q11}, [r14], r12 @ storing (c_data0_r,c_data0_i)
|
||||
VSUB.S32 q7, q7, q9 @c_data1_i=vsubq_s32(b_data0_i,b_data1_i)@
|
||||
|
||||
VSUB.S32 q8, q3, q14 @c_data3_i=vsubq_s32(b_data2_r,b_data3_i)@
|
||||
VST2.32 {q12, q13}, [r14], r12 @ storing (c_data2_r,c_data2_i)
|
||||
VADD.S32 q9, q4, q15 @c_data3_r=vaddq_s32(b_data2_i,b_data3_r)@
|
||||
|
||||
|
||||
VST2.32 {q6, q7}, [r14], r12 @ storing (c_data1_r,c_data1_i)
|
||||
VST2.32 {q8, q9}, [r14], r12 @ storing (c_data3_i,c_data3_r)
|
||||
|
||||
|
||||
|
||||
|
||||
SUBS r10, r10, #1
|
||||
BNE INNER_LOOP_R4
|
||||
|
||||
SUB r14, r14, r1, LSL #3
|
||||
ADD r14, r14, #32
|
||||
|
||||
SUBS r7, r7, #1
|
||||
BNE MIDDLE_LOOP_R4
|
||||
|
||||
|
||||
|
||||
|
||||
LSR r4, r4, #2
|
||||
LSL r5, r5, #2
|
||||
LSR r6, r6, #2
|
||||
SUBS r8, r8, #1
|
||||
BNE OUTER_LOOP_R4
|
||||
END_LOOPS:
|
||||
POP {r3}
|
||||
VPOP {D8-D15}
|
||||
LDMFD sp!, {r4-r12, r15}
|
||||
|
||||
|
||||
373
decoder/armv7/ixheaacd_fft32x32_ld2_armv7.s
Normal file
373
decoder/armv7/ixheaacd_fft32x32_ld2_armv7.s
Normal file
|
|
@ -0,0 +1,373 @@
|
|||
.text
|
||||
.p2align 2
|
||||
.global ixheaacd_fft32x32_ld2_armv7
|
||||
|
||||
ixheaacd_fft32x32_ld2_armv7:
|
||||
|
||||
STMFD sp!, {r4-r12, r14}
|
||||
|
||||
@DIT Radix-4 FFT First Stage
|
||||
@First Butterfly
|
||||
MOV r0, r2
|
||||
MOV r1, r3
|
||||
LDR r2, [r0] @x_0 = x[0 ]
|
||||
LDR r3, [r0, #32] @x_2 = x[8 ]
|
||||
LDR r4, [r0, #64] @x_4 = x[16]
|
||||
LDR r5, [r0, #96] @x_6 = x[24]
|
||||
ADD r6, r2, r4 @xh0_0 = x_0 + x_4
|
||||
SUB r7, r2, r4 @xl0_0 = x_0 - x_4
|
||||
ADD r8, r3, r5 @xh0_1 = x_2 + x_6
|
||||
SUB r9, r3, r5 @xl0_1 = x_2 - x_6
|
||||
|
||||
LDR r2, [r0, #4] @x_1 = x[0 +1]
|
||||
LDR r3, [r0, #36] @x_3 = x[8 +1]
|
||||
LDR r4, [r0, #68] @x_5 = x[16+1]
|
||||
LDR r5, [r0, #100] @x_7 = x[24+1]
|
||||
ADD r10, r2, r4 @xh1_0 = x_1 + x_5
|
||||
SUB r11, r2, r4 @xl1_0 = x_1 - x_5
|
||||
ADD r12, r3, r5 @xh1_1 = x_3 + x_7
|
||||
SUB r14, r3, r5 @xl1_1 = x_3 - x_7
|
||||
|
||||
ADD r2, r6, r8 @n00 = xh0_0 + xh0_1
|
||||
ADD r3, r7, r14 @n10 = xl0_0 + xl1_1
|
||||
SUB r4, r6, r8 @n20 = xh0_0 - xh0_1
|
||||
SUB r5, r7, r14 @n30 = xl0_0 - xl1_1
|
||||
STR r2, [r0] @x[0 ] = n00
|
||||
STR r3, [r0, #32] @x[8 ] = n10
|
||||
STR r4, [r0, #64] @x[16] = n20
|
||||
STR r5, [r0, #96] @x[24] = n30
|
||||
|
||||
ADD r2, r10, r12 @n01 = xh1_0 + xh1_1
|
||||
SUB r3, r11, r9 @n11 = xl1_0 - xl0_1
|
||||
SUB r4, r10, r12 @n21 = xh1_0 - xh1_1
|
||||
ADD r5, r11, r9 @n31 = xl1_0 + xl0_1
|
||||
STR r2, [r0, #4] @x[1 ] = n01
|
||||
STR r3, [r0, #36] @x[8+1 ] = n11
|
||||
STR r4, [r0, #68] @x[16+1] = n21
|
||||
STR r5, [r0, #100] @x[24+1] = n31
|
||||
|
||||
@Second Butterfly
|
||||
LDR r2, [r0, #8] @x_0 = x[2 ]
|
||||
LDR r3, [r0, #40] @x_2 = x[10]
|
||||
LDR r4, [r0, #72] @x_4 = x[18]
|
||||
LDR r5, [r0, #104] @x_6 = x[26]
|
||||
ADD r6, r2, r4 @xh0_0 = x_0 + x_4
|
||||
SUB r7, r2, r4 @xl0_0 = x_0 - x_4
|
||||
ADD r8, r3, r5 @xh0_1 = x_2 + x_6
|
||||
SUB r9, r3, r5 @xl0_1 = x_2 - x_6
|
||||
|
||||
LDR r2, [r0, #12] @x_1 = x[2 +1]
|
||||
LDR r3, [r0, #44] @x_3 = x[10+1]
|
||||
LDR r4, [r0, #76] @x_5 = x[18+1]
|
||||
LDR r5, [r0, #108] @x_7 = x[26+1]
|
||||
ADD r10, r2, r4 @xh1_0 = x_1 + x_5
|
||||
SUB r11, r2, r4 @xl1_0 = x_1 - x_5
|
||||
ADD r12, r3, r5 @xh1_1 = x_3 + x_7
|
||||
SUB r14, r3, r5 @xl1_1 = x_3 - x_7
|
||||
|
||||
ADD r2, r6, r8 @n00 = xh0_0 + xh0_1
|
||||
ADD r3, r7, r14 @n10 = xl0_0 + xl1_1
|
||||
SUB r4, r6, r8 @n20 = xh0_0 - xh0_1
|
||||
SUB r5, r7, r14 @n30 = xl0_0 - xl1_1
|
||||
STR r2, [r0, #8] @x[2 ] = n00
|
||||
STR r3, [r0, #40] @x[10] = n10
|
||||
STR r4, [r0, #72] @x[18] = n20
|
||||
STR r5, [r0, #104] @x[26] = n30
|
||||
|
||||
ADD r2, r10, r12 @n01 = xh1_0 + xh1_1
|
||||
SUB r3, r11, r9 @n11 = xl1_0 - xl0_1
|
||||
SUB r4, r10, r12 @n21 = xh1_0 - xh1_1
|
||||
ADD r5, r11, r9 @n31 = xl1_0 + xl0_1
|
||||
STR r2, [r0, #12] @x[2 +1] = n01
|
||||
STR r3, [r0, #44] @x[10+1] = n11
|
||||
STR r4, [r0, #76] @x[18+1] = n21
|
||||
STR r5, [r0, #108] @x[26+1] = n31
|
||||
|
||||
@Third Butterfly
|
||||
LDR r2, [r0, #16] @x_0 = x[4 ]
|
||||
LDR r3, [r0, #48] @x_2 = x[12]
|
||||
LDR r4, [r0, #80] @x_4 = x[20]
|
||||
LDR r5, [r0, #112] @x_6 = x[28]
|
||||
ADD r6, r2, r4 @xh0_0 = x_0 + x_4
|
||||
SUB r7, r2, r4 @xl0_0 = x_0 - x_4
|
||||
ADD r8, r3, r5 @xh0_1 = x_2 + x_6
|
||||
SUB r9, r3, r5 @xl0_1 = x_2 - x_6
|
||||
|
||||
LDR r2, [r0, #20] @x_1 = x[4 +1]
|
||||
LDR r3, [r0, #52] @x_3 = x[12+1]
|
||||
LDR r4, [r0, #84] @x_5 = x[20+1]
|
||||
LDR r5, [r0, #116] @x_7 = x[28+1]
|
||||
ADD r10, r2, r4 @xh1_0 = x_1 + x_5
|
||||
SUB r11, r2, r4 @xl1_0 = x_1 - x_5
|
||||
ADD r12, r3, r5 @xh1_1 = x_3 + x_7
|
||||
SUB r14, r3, r5 @xl1_1 = x_3 - x_7
|
||||
|
||||
ADD r2, r6, r8 @n00 = xh0_0 + xh0_1
|
||||
ADD r3, r7, r14 @n10 = xl0_0 + xl1_1
|
||||
SUB r4, r6, r8 @n20 = xh0_0 - xh0_1
|
||||
SUB r5, r7, r14 @n30 = xl0_0 - xl1_1
|
||||
STR r2, [r0, #16] @x[4 ] = n00
|
||||
STR r3, [r0, #48] @x[12] = n10
|
||||
STR r4, [r0, #80] @x[20] = n20
|
||||
STR r5, [r0, #112] @x[28] = n30
|
||||
|
||||
ADD r2, r10, r12 @n01 = xh1_0 + xh1_1
|
||||
SUB r3, r11, r9 @n11 = xl1_0 - xl0_1
|
||||
SUB r4, r10, r12 @n21 = xh1_0 - xh1_1
|
||||
ADD r5, r11, r9 @n31 = xl1_0 + xl0_1
|
||||
STR r2, [r0, #20] @x[4 +1] = n01
|
||||
STR r3, [r0, #52] @x[12+1] = n11
|
||||
STR r4, [r0, #84] @x[20+1] = n21
|
||||
STR r5, [r0, #116] @x[28+1] = n31
|
||||
|
||||
@Fourth Butterfly
|
||||
LDR r2, [r0, #24] @x_0 = x[6 ]
|
||||
LDR r3, [r0, #56] @x_2 = x[14]
|
||||
LDR r4, [r0, #88] @x_4 = x[22]
|
||||
LDR r5, [r0, #120] @x_6 = x[30]
|
||||
ADD r6, r2, r4 @xh0_0 = x_0 + x_4
|
||||
SUB r7, r2, r4 @xl0_0 = x_0 - x_4
|
||||
ADD r8, r3, r5 @xh0_1 = x_2 + x_6
|
||||
SUB r9, r3, r5 @xl0_1 = x_2 - x_6
|
||||
|
||||
LDR r2, [r0, #28] @x_1 = x[6 +1]
|
||||
LDR r3, [r0, #60] @x_3 = x[14+1]
|
||||
LDR r4, [r0, #92] @x_5 = x[22+1]
|
||||
LDR r5, [r0, #124] @x_7 = x[30+1]
|
||||
ADD r10, r2, r4 @xh1_0 = x_1 + x_5
|
||||
SUB r11, r2, r4 @xl1_0 = x_1 - x_5
|
||||
ADD r12, r3, r5 @xh1_1 = x_3 + x_7
|
||||
SUB r14, r3, r5 @xl1_1 = x_3 - x_7
|
||||
|
||||
ADD r2, r6, r8 @n00 = xh0_0 + xh0_1
|
||||
ADD r3, r7, r14 @n10 = xl0_0 + xl1_1
|
||||
SUB r4, r6, r8 @n20 = xh0_0 - xh0_1
|
||||
SUB r5, r7, r14 @n30 = xl0_0 - xl1_1
|
||||
STR r2, [r0, #24] @x[6 ] = n00
|
||||
STR r3, [r0, #56] @x[14] = n10
|
||||
STR r4, [r0, #88] @x[22] = n20
|
||||
STR r5, [r0, #120] @x[30] = n30
|
||||
|
||||
ADD r2, r10, r12 @n01 = xh1_0 + xh1_1
|
||||
SUB r3, r11, r9 @n11 = xl1_0 - xl0_1
|
||||
SUB r4, r10, r12 @n21 = xh1_0 - xh1_1
|
||||
ADD r5, r11, r9 @n31 = xl1_0 + xl0_1
|
||||
STR r2, [r0, #28] @x[6 +1] = n01
|
||||
STR r3, [r0, #60] @x[14+1] = n11
|
||||
STR r4, [r0, #92] @x[22+1] = n21
|
||||
STR r5, [r0, #124] @x[30+1] = n31
|
||||
|
||||
|
||||
@DIT Radix-4 FFT Second Stage
|
||||
@First Butterfly
|
||||
LDR r2, [r0] @inp_0qr = x[0]
|
||||
LDR r3, [r0, #8] @inp_1qr = x[2]
|
||||
LDR r4, [r0, #16] @inp_2qr = x[4]
|
||||
LDR r5, [r0, #24] @inp_3qr = x[6]
|
||||
ADD r6, r2, r4 @sum_0qr = mul_0qr + mul_2qr
|
||||
SUB r7, r2, r4 @sum_1qr = mul_0qr - mul_2qr
|
||||
ADD r8, r3, r5 @sum_2qr = mul_1qr + mul_3qr
|
||||
SUB r9, r3, r5 @sum_3qr = mul_1qr - mul_3qr
|
||||
|
||||
LDR r2, [r0, #4] @inp_0qi = x[1]
|
||||
LDR r3, [r0, #12] @inp_1qi = x[3]
|
||||
LDR r4, [r0, #20] @inp_2qi = x[5]
|
||||
LDR r5, [r0, #28] @inp_3qi = x[7]
|
||||
ADD r10, r2, r4 @sum_0qi = mul_0qi + mul_2qi
|
||||
SUB r11, r2, r4 @sum_1qi = mul_0qi - mul_2qi
|
||||
ADD r12, r3, r5 @sum_2qi = mul_1qi + mul_3qi
|
||||
SUB r14, r3, r5 @sum_3qi = mul_1qi - mul_3qi
|
||||
|
||||
ADD r2, r6, r8 @sum_0qr + sum_2qr
|
||||
ADD r3, r7, r14 @sum_1qr + sum_3qi
|
||||
SUB r4, r6, r8 @sum_0qr - sum_2qr
|
||||
SUB r5, r7, r14 @sum_1qr - sum_3qi
|
||||
STR r2, [r1] @y[0 ] = sum_0qr + sum_2qr
|
||||
STR r3, [r1, #32] @y[8 ] = sum_1qr + sum_3qi
|
||||
STR r4, [r1, #64] @y[16] = sum_0qr - sum_2qr
|
||||
STR r5, [r1, #96] @y[24] = sum_1qr - sum_3qi
|
||||
|
||||
ADD r2, r10, r12 @sum_0qi + sum_2qi
|
||||
SUB r3, r11, r9 @sum_1qi - sum_3qr
|
||||
SUB r4, r10, r12 @sum_0qi - sum_2qi
|
||||
ADD r5, r11, r9 @sum_1qi + sum_3qr
|
||||
STR r2, [r1, #4] @y[0 +1] = sum_0qi + sum_2qi
|
||||
STR r3, [r1, #36] @y[8 +1] = sum_1qi - sum_3qr
|
||||
STR r4, [r1, #68] @y[16+1] = sum_0qi - sum_2qi
|
||||
STR r5, [r1, #100] @y[24+1] = sum_1qi + sum_3qr
|
||||
|
||||
|
||||
@Load twiddle factors
|
||||
LDR r11, =2310960706 @0x89BE7642
|
||||
LDR r12, =3473158396 @0xCF0430FC
|
||||
LDR r14, =2776455811 @0xA57D5A83
|
||||
|
||||
@Second Butterfly
|
||||
LDR r2, [r0, #32] @mul_0qr = inp_0qr = x[8]
|
||||
LDR r3, [r0, #36] @mul_0qi = inp_1qr = x[9]
|
||||
|
||||
LDR r5, [r0, #40] @inp_1qr = x[10]
|
||||
LDR r6, [r0, #44] @inp_1qi = x[11]
|
||||
SMULWB r4, r5, r11 @mul_1qr = mpy_16_32_ns( 0x7642 , inp_1qr)
|
||||
SMLAWB r4, r6, r12, r4 @mul_1qr -= mpy_16_32_ns(-0x30FC , inp_1qi)
|
||||
SMULWT r5, r5, r12 @mul_1qi = mpy_16_32_ns(-0x30FC , inp_1qr)
|
||||
|
||||
LDR r7, [r0, #48] @inp_2qr = x[12]
|
||||
LDR r8, [r0, #52] @inp_2qi = x[13]
|
||||
|
||||
@Moved for delay slot
|
||||
SMLAWB r5, r6, r11, r5 @mul_1qi += mpy_16_32_ns( 0x7642 , inp_1qi)
|
||||
|
||||
ADD r6, r7, r8 @(inp_2qr + inp_2qi)
|
||||
SMULWB r6, r6, r14 @mul_2qr = mpy_16_32_ns(0x5A83 , (inp_2qr + inp_2qi))
|
||||
SUB r7, r8, r7 @(-inp_2qr + inp_2qi)
|
||||
SMULWB r7, r7, r14 @mul_2qi = mpy_16_32_ns(0x5A83 , (-inp_2qr + inp_2qi))
|
||||
|
||||
LDR r9 , [r0, #56] @inp_3qr = x[14]
|
||||
LDR r10, [r0, #60] @inp_3qi = x[15]
|
||||
SMULWB r8, r9 , r12 @mul_3qr = mpy_16_32_ns( 0x30FC , inp_3qr)
|
||||
SMLAWB r8, r10, r11, r8 @mul_3qr -= mpy_16_32_ns(-0x7642 , inp_3qi)@
|
||||
SMULWT r9, r9 , r11 @mul_3qi = mpy_16_32_ns(-0x7642 , inp_3qr)
|
||||
SMLAWB r9, r10, r12, r9 @mul_3qi += mpy_16_32_ns( 0x30FC , inp_3qi)
|
||||
|
||||
ADD r10, r2, r6, lsl #1 @sum_0qr = mul_0qr + (mul_2qr << 1)
|
||||
SUB r2 , r2, r6, lsl #1 @sum_1qr = mul_0qr - (mul_2qr << 1)
|
||||
ADD r6 , r4, r8 @sum_2qr = mul_1qr + mul_3qr
|
||||
SUB r4 , r4, r8 @sum_3qr = mul_1qr - mul_3qr
|
||||
|
||||
ADD r8 , r3, r7, lsl #1 @sum_0qi = mul_0qi + (mul_2qi << 1)
|
||||
SUB r3 , r3, r7, lsl #1 @sum_1qi = mul_0qi - (mul_2qi << 1)
|
||||
ADD r7 , r5, r9 @sum_2qi = mul_1qi + mul_3qi
|
||||
SUB r5 , r5, r9 @sum_3qi = mul_1qi - mul_3qi
|
||||
|
||||
ADD r9 , r10, r6, lsl #1 @sum_0qr + (sum_2qr << 1)
|
||||
SUB r10, r10, r6, lsl #1 @sum_0qr - (sum_2qr << 1)
|
||||
ADD r6 , r2 , r5, lsl #1 @sum_1qr + (sum_3qi << 1)
|
||||
SUB r2 , r2 , r5, lsl #1 @sum_1qr - (sum_3qi << 1)
|
||||
STR r9 , [r1, #8] @y[2 ] = sum_0qr + (sum_2qr << 1)
|
||||
STR r10, [r1, #72] @y[18] = sum_0qr - (sum_2qr << 1)
|
||||
STR r6 , [r1, #40] @y[10] = sum_1qr + (sum_3qi << 1)
|
||||
STR r2 , [r1, #104] @y[26] = sum_1qr - (sum_3qi << 1)
|
||||
|
||||
ADD r5 , r8 , r7, lsl #1 @sum_0qi + (sum_2qi << 1)
|
||||
SUB r8 , r8 , r7, lsl #1 @sum_0qi - (sum_2qi << 1)
|
||||
SUB r7 , r3 , r4, lsl #1 @sum_1qi - (sum_3qr << 1)
|
||||
ADD r3 , r3 , r4, lsl #1 @sum_1qi + (sum_3qr << 1)
|
||||
STR r5 , [r1, #12] @y[2 +1] = sum_0qi + (sum_2qi << 1)
|
||||
STR r8 , [r1, #76] @y[18+1] = sum_0qi - (sum_2qi << 1)
|
||||
STR r7 , [r1, #44] @y[10+1] = sum_1qi - (sum_3qr << 1)
|
||||
STR r3 , [r1, #108] @y[26+1] = sum_1qi + (sum_3qr << 1)
|
||||
|
||||
@Third Butterfly
|
||||
LDR r2, [r0, #64] @mul_0qr = inp_0qr = x[16]
|
||||
|
||||
LDR r5, [r0, #72] @inp_1qr = x[18]
|
||||
LDR r6, [r0, #76] @inp_1qi = x[19]
|
||||
|
||||
@Moved for delay slot
|
||||
LDR r3, [r0, #68] @mul_0qi = inp_1qr = x[17]
|
||||
|
||||
ADD r4, r5, r6 @(inp_1qr + inp_1qi)
|
||||
SMULWB r4, r4, r14 @mul_1qr = mpy_16_32_ns(0x5A83 , (inp_1qr + inp_1qi))
|
||||
SUB r5, r6, r5 @(-inp_1qr + inp_1qi)
|
||||
SMULWB r5, r5, r14 @mul_1qi = mpy_16_32_ns(0x5A83 , (-inp_1qr + inp_1qi))
|
||||
|
||||
LDR r6, [r0, #84] @mul_2qr = inp_2qi = x[21]
|
||||
|
||||
LDR r9 , [r0, #88] @inp_3qr = x[22]
|
||||
LDR r10, [r0, #92] @inp_3qi = x[23]
|
||||
|
||||
@Moved for delay slot
|
||||
LDR r7, [r0, #80] @mul_2qi = inp_2qr = x[20]
|
||||
|
||||
SUB r8 , r10, r9 @(-inp_3qr + inp_3qi)
|
||||
SMULWB r8 , r8 , r14 @mul_3qr = mpy_16_32_ns( 0x5A83 , (-inp_3qr + inp_3qi))
|
||||
ADD r9 , r9 , r10 @(inp_3qr + inp_3qi)
|
||||
SMULWT r9 , r9 , r14 @mul_3qi = mpy_16_32_ns(-0x5A83 , (inp_3qr + inp_3qi))
|
||||
|
||||
ADD r10, r2, r6 @sum_0qr = mul_0qr + mul_2qr
|
||||
SUB r2 , r2, r6 @sum_1qr = mul_0qr - mul_2qr
|
||||
ADD r6 , r4, r8 @sum_2qr = mul_1qr + mul_3qr
|
||||
SUB r4 , r4, r8 @sum_3qr = mul_1qr - mul_3qr
|
||||
|
||||
SUB r8 , r3, r7 @sum_0qi = mul_0qi - mul_2qi
|
||||
ADD r3 , r3, r7 @sum_1qi = mul_0qi + mul_2qi
|
||||
ADD r7 , r5, r9 @sum_2qi = mul_1qi + mul_3qi
|
||||
SUB r5 , r5, r9 @sum_3qi = mul_1qi - mul_3qi
|
||||
|
||||
ADD r9 , r10, r6, lsl #1 @sum_0qr + (sum_2qr << 1)
|
||||
SUB r10, r10, r6, lsl #1 @sum_0qr - (sum_2qr << 1)
|
||||
ADD r6 , r2 , r5, lsl #1 @sum_1qr + (sum_3qi << 1)
|
||||
SUB r2 , r2 , r5, lsl #1 @sum_1qr - (sum_3qi << 1)
|
||||
STR r9 , [r1, #16] @y[4 ] = sum_0qr + (sum_2qr << 1)
|
||||
STR r10, [r1, #80] @y[20] = sum_0qr - (sum_2qr << 1)
|
||||
STR r6 , [r1, #48] @y[12] = sum_1qr + (sum_3qi << 1)
|
||||
STR r2 , [r1, #112] @y[28] = sum_1qr - (sum_3qi << 1)
|
||||
|
||||
ADD r5, r8, r7, lsl #1 @sum_0qi + (sum_2qi << 1)
|
||||
SUB r8, r8, r7, lsl #1 @sum_0qi - (sum_2qi << 1)
|
||||
SUB r7, r3, r4, lsl #1 @sum_1qi - (sum_3qr << 1)
|
||||
ADD r3, r3, r4, lsl #1 @sum_1qi + (sum_3qr << 1)
|
||||
STR r5 , [r1, #20] @y[4 +1] = sum_0qi + (sum_2qi << 1)
|
||||
STR r8 , [r1, #84] @y[20+1] = sum_0qi - (sum_2qi << 1)
|
||||
STR r7 , [r1, #52] @y[12+1] = sum_1qi - (sum_3qr << 1)
|
||||
STR r3 , [r1, #116] @y[28+1] = sum_1qi + (sum_3qr << 1)
|
||||
|
||||
@Fourth Butterfly
|
||||
LDR r2, [r0, #96] @mul_0qr = inp_0qr = x[24]
|
||||
LDR r3, [r0, #100] @mul_0qi = inp_1qr = x[25]
|
||||
|
||||
LDR r5, [r0, #104] @inp_1qr = x[26]
|
||||
LDR r6, [r0, #108] @inp_1qi = x[27]
|
||||
SMULWB r4, r5, r12 @mul_1qr = mpy_16_32_ns( 0x30FC , inp_1qr)
|
||||
SMLAWB r4, r6, r11, r4 @mul_1qr -= mpy_16_32_ns(-0x7642 , inp_1qi)
|
||||
SMULWT r5, r5, r11 @mul_1qi = mpy_16_32_ns(-0x7642 , inp_1qr)
|
||||
|
||||
LDR r7, [r0, #112] @inp_2qr = x[28]
|
||||
LDR r8, [r0, #116] @inp_2qi = x[29]
|
||||
|
||||
@Moved for delay slot
|
||||
SMLAWB r5, r6, r12, r5 @mul_1qi += mpy_16_32_ns( 0x30FC , inp_1qi)
|
||||
|
||||
SUB r6, r8, r7 @(-inp_2qr + inp_2qi)
|
||||
SMULWB r6, r6, r14 @mul_2qr = mpy_16_32_ns( 0x5A83 , (-inp_2qr + inp_2qi))
|
||||
ADD r7, r8, r7 @(inp_2qr + inp_2qi)
|
||||
SMULWT r7, r7, r14 @mul_2qi = mpy_16_32_ns(-0x5A83 , (inp_2qr + inp_2qi))
|
||||
|
||||
LDR r9 , [r0, #120] @inp_3qr = x[30]
|
||||
LDR r10, [r0, #124] @inp_3qi = x[31]
|
||||
SMULWT r8, r9 , r11 @mul_3qr = mpy_16_32_ns(-0x7642 , inp_3qr)
|
||||
SMLAWT r8, r10, r12, r8 @mul_3qr -= mpy_16_32_ns( 0x30FC , inp_3qi)@
|
||||
SMULWB r9, r9 , r12 @mul_3qi = mpy_16_32_ns( 0x30FC , inp_3qr)
|
||||
SMLAWT r9, r10, r11, r9 @mul_3qi += mpy_16_32_ns(-0x7642 , inp_3qi)
|
||||
|
||||
ADD r10, r2, r6, lsl #1 @sum_0qr = mul_0qr + (mul_2qr << 1)
|
||||
SUB r2 , r2, r6, lsl #1 @sum_1qr = mul_0qr - (mul_2qr << 1)
|
||||
ADD r6 , r4, r8 @sum_2qr = mul_1qr + mul_3qr
|
||||
SUB r4 , r4, r8 @sum_3qr = mul_1qr - mul_3qr
|
||||
|
||||
ADD r8 , r3, r7, lsl #1 @sum_0qi = mul_0qi + (mul_2qi << 1)
|
||||
SUB r3 , r3, r7, lsl #1 @sum_1qi = mul_0qi - (mul_2qi << 1)
|
||||
ADD r7 , r5, r9 @sum_2qi = mul_1qi + mul_3qi
|
||||
SUB r5 , r5, r9 @sum_3qi = mul_1qi - mul_3qi
|
||||
|
||||
ADD r9 , r10, r6, lsl #1 @sum_0qr + (sum_2qr << 1)
|
||||
SUB r10, r10, r6, lsl #1 @sum_0qr - (sum_2qr << 1)
|
||||
ADD r6 , r2 , r5, lsl #1 @sum_1qr + (sum_3qi << 1)
|
||||
SUB r2 , r2 , r5, lsl #1 @sum_1qr - (sum_3qi << 1)
|
||||
STR r9 , [r1, #24] @y[6 ] = sum_0qr + (sum_2qr << 1)
|
||||
STR r10, [r1, #88] @y[22] = sum_0qr - (sum_2qr << 1)
|
||||
STR r6 , [r1, #56] @y[14] = sum_1qr + (sum_3qi << 1)
|
||||
STR r2 , [r1, #120] @y[30] = sum_1qr - (sum_3qi << 1)
|
||||
|
||||
ADD r5 , r8 , r7, lsl #1 @sum_0qi + (sum_2qi << 1)
|
||||
SUB r8 , r8 , r7, lsl #1 @sum_0qi - (sum_2qi << 1)
|
||||
SUB r7 , r3 , r4, lsl #1 @sum_1qi - (sum_3qr << 1)
|
||||
ADD r3 , r3 , r4, lsl #1 @sum_1qi + (sum_3qr << 1)
|
||||
STR r5 , [r1, #28] @y[6 +1] = sum_0qi + (sum_2qi << 1)
|
||||
STR r8 , [r1, #92] @y[22+1] = sum_0qi - (sum_2qi << 1)
|
||||
STR r7 , [r1, #60] @y[14+1] = sum_1qi - (sum_3qr << 1)
|
||||
STR r3 , [r1, #124] @y[30+1] = sum_1qi + (sum_3qr << 1)
|
||||
|
||||
LDMFD sp!, {r4-r12, r15}
|
||||
|
||||
516
decoder/armv7/ixheaacd_fft_15_ld.s
Normal file
516
decoder/armv7/ixheaacd_fft_15_ld.s
Normal file
|
|
@ -0,0 +1,516 @@
|
|||
|
||||
|
||||
.equ C53_VAL , -11904
|
||||
.equ SINMU_VAL , 28378
|
||||
.equ C51_52VAL , 0x79BC9D84
|
||||
.equ C54_55VAL , 0x478EB000
|
||||
.equ FFTOP_OFFSET , -1536
|
||||
.equ FFTOP_OFFSET1 , 256
|
||||
|
||||
.text
|
||||
.p2align 2
|
||||
.global ixheaacd_fft_15_ld_armv7
|
||||
|
||||
ixheaacd_fft_15_ld_armv7:
|
||||
|
||||
STMFD r13!, {r4 - r12, r14} @
|
||||
STR r1 , [r13, #-4]! @
|
||||
STR r3 , [r13, #-4]! @
|
||||
MOV lr, r2 @ lr - fft3out
|
||||
MOV r12, #384 @
|
||||
|
||||
|
||||
LOOP_FFT5:
|
||||
LDRD r2, [r0] @ r2 = buf1a[0] and r3 = buf1a[1]
|
||||
ADD r0, r0, r12
|
||||
LDRD r4, [r0] @ r4 = buf1a[2] and r5 = buf1a[3]
|
||||
ADD r0, r0, r12
|
||||
LDRD r6, [r0] @ r6 = buf1a[4] and r7 = buf1a[5]
|
||||
ADD r0, r0, r12
|
||||
LDRD r8, [r0] @ r8 = buf1a[6] and r9 = buf1a[7]
|
||||
ADD r0, r0, r12
|
||||
LDRD r10, [r0] @ r10 = buf1a[8] and r11 = buf1a[9]
|
||||
|
||||
|
||||
ADD r1, r4, r10 @ r1 = buf1a[2] + buf1a[8]
|
||||
SUB r4, r4, r10 @ r4 = buf1a[2] - buf1a[8]@
|
||||
LDR r10, = C54_55VAL
|
||||
ADD r12, r6, r8 @ r3 = buf1a[4] + buf1a[6]
|
||||
SUB r8, r6, r8 @ r2 = buf1a[4] - buf1a[6]
|
||||
|
||||
SUB r6, r1, r12 @ (r1 - r3)
|
||||
SMULWT r6, r6, r10 @ t = mult32x16in32_shl((r1 - r3), C54)
|
||||
ADD r1, r1, r12 @ r1 = r1 + r3@
|
||||
ADD r2, r2, r1 @ temp1 = inp[0] + r1@
|
||||
SMULWB r1, r1, r10 @ mult32_shl(r1, C55)
|
||||
ADD r1, r2, r1, lsl #2 @ r1 = temp1 + ((mult32_shl(r1, C55)) << 1)@
|
||||
LDR r10, = C51_52VAL @
|
||||
STR r2, [lr], #4 @ *buf2++ = temp1@
|
||||
|
||||
SUB r12, r1, r6, LSL #1 @ r3 = r1 - t@
|
||||
ADD r1, r1, r6, LSL #1 @ r1 = r1 + t@
|
||||
|
||||
ADD r2, r4, r8 @ (r4 + r2)
|
||||
SMULWT r2, r2, r10 @ t = mult32_shl((r4 + r2), C51)@
|
||||
|
||||
@LSL r2, r2, #1
|
||||
MOV r2, r2, LSL #1
|
||||
|
||||
SMULWB r4, r4, r10 @ mult32_shl(r4, C52)
|
||||
LDR r10, = C53_VAL
|
||||
ADD r4, r2, r4, LSL #2 @ r4 = t + (mult32_shl(r4, C52) << 1)@
|
||||
|
||||
SMULWB r8, r8, r10 @ mult32_shl(r2, C53)
|
||||
ADD r2, r2, r8, LSL #1 @ r2 = t + mult32_shl(r2, C53)@
|
||||
|
||||
ADD r6, r5, r11 @ s1 = buf1a[3] + buf1a[9]
|
||||
SUB r8, r5, r11 @ s4 = buf1a[3] - buf1a[9]
|
||||
LDR r10, = C54_55VAL
|
||||
ADD r5, r7, r9 @ s3 = buf1a[5] + buf1a[7]@
|
||||
SUB r7, r7, r9 @ s2 = buf1a[5] + buf1a[7]@
|
||||
|
||||
|
||||
SUB r9, r6, r5 @ (s1 - s3)
|
||||
SMULWT r9, r9, r10 @ t = mult32x16in32_shl((s1 - s3), C54)
|
||||
ADD r6, r6, r5 @ s1 = s1 + s3@
|
||||
ADD r3, r3, r6 @ temp2 = buf1a[1] + s1
|
||||
SMULWB r6, r6, r10 @ mult32_shl(s1, C55)
|
||||
ADD r6, r3, r6, lsl #2 @ s1 = temp1 + ((mult32_shl(s1, C55)) << 1)@
|
||||
LDR r10, = C51_52VAL @
|
||||
STR r3, [lr], #4 @ *buf2++ = temp2@
|
||||
|
||||
SUB r5, r6, r9, LSL #1 @ s3 = s1 - t@
|
||||
ADD r6, r6, r9, LSL #1 @ s1 = s1 + t@
|
||||
SUB r0, r0, #896 @ r0 -inp[160]
|
||||
|
||||
ADD r11, r7, r8 @ (s4 + s2)
|
||||
SMULWT r11, r11, r10 @ t = mult32_shl((s4 + s2), C51)@
|
||||
@LSL r11, r11, #1 @
|
||||
MOV r11, r11, LSL #1
|
||||
|
||||
|
||||
SMULWB r8, r8, r10 @ mult32_shl(s4, C52)
|
||||
LDR r10, = C53_VAL
|
||||
ADD r8, r11, r8, LSL #2 @ s4 = t + (mult32_shl(s4, C52) << 1)@
|
||||
|
||||
SMULWB r7, r7, r10 @ mult32_shl(s2, C53)
|
||||
ADD r7, r11, r7, LSL #1 @ s2 = t + mult32_shl(s2, C53)@
|
||||
|
||||
|
||||
ADD r3, r1, r7 @ buf2[2] = r1 + s2
|
||||
SUB r9, r6, r2 @ buf2[3] = s1 - r2
|
||||
SUB r10, r12, r8 @ buf2[4] = r3 - s4
|
||||
ADD r11, r5, r4 @ buf2[5] = s3 + r4
|
||||
ADD r12, r12, r8 @ buf2[6] = r3 + s4
|
||||
SUB r4, r5, r4 @ buf2[7] = s3 - r4
|
||||
SUB r5, r1, r7 @ buf2[8] = r1 - s2
|
||||
ADD r6, r6, r2 @ buf2[9] = s1 + r2
|
||||
STMIA lr!, {r3, r9-r12} @
|
||||
|
||||
MOV r12, #384 @
|
||||
LDR r1, = FFTOP_OFFSET @
|
||||
|
||||
STMIA lr!, {r4-r6} @
|
||||
|
||||
|
||||
LDRD r2, [r0] @ r2 = buf1a[0] and r3 = buf1a[1]
|
||||
ADD r0, r0, r12
|
||||
LDRD r4, [r0] @ r4 = buf1a[2] and r5 = buf1a[3]
|
||||
ADD r0, r0, r12
|
||||
LDRD r6, [r0] @ r6 = buf1a[4] and r7 = buf1a[5]
|
||||
ADD r0, r0, r12
|
||||
LDRD r8, [r0] @ r8 = buf1a[6] and r9 = buf1a[7]
|
||||
ADD r0, r0, r1
|
||||
LDRD r10, [r0] @ r10 = buf1a[8] and r11 = buf1a[9]
|
||||
ADD r0, r0, #1024 @ r0 -inp[320]
|
||||
|
||||
ADD r1, r4, r10 @ r1 = buf1a[2] + buf1a[8]
|
||||
SUB r4, r4, r10 @ r4 = buf1a[2] - buf1a[8]@
|
||||
LDR r10, = C54_55VAL
|
||||
ADD r12, r6, r8 @ r3 = buf1a[4] + buf1a[6]
|
||||
SUB r8, r6, r8 @ r2 = buf1a[4] - buf1a[6]
|
||||
|
||||
SUB r6, r1, r12 @ (r1 - r3)
|
||||
SMULWT r6, r6, r10 @ t = mult32x16in32_shl((r1 - r3), C54)
|
||||
ADD r1, r1, r12 @ r1 = r1 + r3@
|
||||
ADD r2, r2, r1 @ temp1 = inp[0] + r1@
|
||||
SMULWB r1, r1, r10 @ mult32_shl(r1, C55)
|
||||
ADD r1, r2, r1, lsl #2 @ r1 = temp1 + ((mult32_shl(r1, C55)) << 1)@
|
||||
LDR r10, = C51_52VAL @
|
||||
STR r2, [lr], #4 @ *buf2++ = temp1@
|
||||
|
||||
SUB r12, r1, r6, LSL #1 @ r3 = r1 - t@
|
||||
ADD r1, r1, r6, LSL #1 @ r1 = r1 + t@
|
||||
|
||||
ADD r2, r4, r8 @ (r4 + r2)
|
||||
SMULWT r2, r2, r10 @ t = mult32_shl((r4 + r2), C51)@
|
||||
@LSL r2, r2, #1
|
||||
MOV r2, r2, LSL #1
|
||||
|
||||
|
||||
SMULWB r4, r4, r10 @ mult32_shl(r4, C52)
|
||||
LDR r10, = C53_VAL
|
||||
ADD r4, r2, r4, LSL #2 @ r4 = t + (mult32_shl(r4, C52) << 1)@
|
||||
|
||||
SMULWB r8, r8, r10 @ mult32_shl(r2, C53)
|
||||
ADD r2, r2, r8, LSL #1 @ r2 = t + mult32_shl(r2, C53)@
|
||||
|
||||
ADD r6, r5, r11 @ s1 = buf1a[3] + buf1a[9]
|
||||
SUB r8, r5, r11 @ s4 = buf1a[3] - buf1a[9]
|
||||
LDR r10, = C54_55VAL
|
||||
ADD r5, r7, r9 @ s3 = buf1a[5] + buf1a[7]@
|
||||
SUB r7, r7, r9 @ s2 = buf1a[5] + buf1a[7]@
|
||||
|
||||
|
||||
SUB r9, r6, r5 @ (s1 - s3)
|
||||
SMULWT r9, r9, r10 @ t = mult32x16in32_shl((s1 - s3), C54)
|
||||
ADD r6, r6, r5 @ s1 = s1 + s3@
|
||||
ADD r3, r3, r6 @ temp2 = buf1a[1] + s1
|
||||
SMULWB r6, r6, r10 @ mult32_shl(s1, C55)
|
||||
ADD r6, r3, r6, lsl #2 @ s1 = temp1 + ((mult32_shl(s1, C55)) << 1)@
|
||||
LDR r10, = C51_52VAL @
|
||||
STR r3, [lr], #4 @ *buf2++ = temp2@
|
||||
|
||||
|
||||
SUB r5, r6, r9, LSL #1 @ s3 = s1 - t@
|
||||
ADD r6, r6, r9, LSL #1 @ s1 = s1 + t@
|
||||
|
||||
ADD r11, r7, r8 @ (s4 + s2)
|
||||
SMULWT r11, r11, r10 @ t = mult32_shl((s4 + s2), C51)@
|
||||
@LSL r11, r11, #1
|
||||
MOV r11, r11, LSL #1
|
||||
|
||||
SMULWB r8, r8, r10 @mult32_shl(s4, C52)
|
||||
LDR r10, = C53_VAL
|
||||
ADD r8, r11, r8, LSL #2 @s4 = t + (mult32_shl(s4, C52) << 1)@
|
||||
|
||||
SMULWB r7, r7, r10 @mult32_shl(s2, C53)
|
||||
ADD r7, r11, r7, LSL #1 @s2 = t + mult32_shl(s2, C53)@
|
||||
|
||||
ADD r3, r1, r7 @buf2[2] = r1 + s2
|
||||
SUB r9, r6, r2 @buf2[3] = s1 - r2
|
||||
SUB r10, r12, r8 @buf2[4] = r3 - s4
|
||||
ADD r11, r5, r4 @buf2[5] = s3 + r4
|
||||
ADD r12, r12, r8 @buf2[6] = r3 + s4
|
||||
SUB r4, r5, r4 @buf2[7] = s3 - r4
|
||||
SUB r5, r1, r7 @buf2[8] = r1 - s2
|
||||
ADD r6, r6, r2 @buf2[9] = s1 + r2
|
||||
LDR r1, = FFTOP_OFFSET @
|
||||
|
||||
STMIA lr!, {r3, r9-r12}
|
||||
MOV r12, #384 @
|
||||
STMIA lr!, {r4-r6} @
|
||||
|
||||
LDRD r2, [r0] @ r2 = buf1a[0] and r3 = buf1a[1]
|
||||
ADD r0, r0, r12
|
||||
LDRD r4, [r0] @ r4 = buf1a[2] and r5 = buf1a[3]
|
||||
ADD r0, r0, r1
|
||||
|
||||
LDRD r6, [r0] @ r6 = buf1a[4] and r7 = buf1a[5]
|
||||
ADD r0, r0, r12
|
||||
LDRD r8, [r0] @ r8 = buf1a[6] and r9 = buf1a[7]
|
||||
ADD r0, r0, r12
|
||||
LDRD r10, [r0] @ r10 = buf1a[8] and r11 = buf1a[9]
|
||||
ADD r0, r0, r12
|
||||
|
||||
ADD r1, r4, r10 @ r1 = buf1a[2] + buf1a[8]
|
||||
SUB r4, r4, r10 @ r4 = buf1a[2] - buf1a[8]@
|
||||
LDR r10, = C54_55VAL
|
||||
ADD r12, r6, r8 @ r3 = buf1a[4] + buf1a[6]
|
||||
SUB r8, r6, r8 @ r2 = buf1a[4] - buf1a[6]
|
||||
|
||||
SUB r6, r1, r12 @ (r1 - r3)
|
||||
SMULWT r6, r6, r10 @ t = mult32x16in32_shl((r1 - r3), C54)
|
||||
ADD r1, r1, r12 @ r1 = r1 + r3@
|
||||
ADD r2, r2, r1 @ temp1 = inp[0] + r1@
|
||||
SMULWB r1, r1, r10 @ mult32_shl(r1, C55)
|
||||
ADD r1, r2, r1, lsl #2 @ r1 = temp1 + ((mult32_shl(r1, C55)) << 1)@
|
||||
LDR r10, = C51_52VAL @
|
||||
STR r2, [lr], #4 @ *buf2++ = temp1@
|
||||
|
||||
SUB r12, r1, r6, LSL #1 @ r3 = r1 - t@
|
||||
ADD r1, r1, r6, LSL #1 @ r1 = r1 + t@
|
||||
|
||||
ADD r2, r4, r8 @ (r4 + r2)
|
||||
SMULWT r2, r2, r10 @ t = mult32_shl((r4 + r2), C51)@
|
||||
@LSL r2, r2, #1
|
||||
MOV r2, r2, LSL #1
|
||||
|
||||
SMULWB r4, r4, r10 @ mult32_shl(r4, C52)
|
||||
LDR r10, = C53_VAL
|
||||
ADD r4, r2, r4, LSL #2 @ r4 = t + (mult32_shl(r4, C52) << 1)@
|
||||
|
||||
SMULWB r8, r8, r10 @ mult32_shl(r2, C53)
|
||||
ADD r2, r2, r8, LSL #1 @ r2 = t + mult32_shl(r2, C53)@
|
||||
|
||||
ADD r6, r5, r11 @ s1 = buf1a[3] + buf1a[9]
|
||||
SUB r8, r5, r11 @ s4 = buf1a[3] - buf1a[9]
|
||||
LDR r10, = C54_55VAL
|
||||
ADD r5, r7, r9 @ s3 = buf1a[5] + buf1a[7]@
|
||||
SUB r7, r7, r9 @ s2 = buf1a[5] + buf1a[7]@
|
||||
|
||||
SUB r9, r6, r5 @ (s1 - s3)
|
||||
SMULWT r9, r9, r10 @ t = mult32x16in32_shl((s1 - s3), C54)
|
||||
ADD r6, r6, r5 @ s1 = s1 + s3@
|
||||
ADD r3, r3, r6 @ temp2 = buf1a[1] + s1
|
||||
SMULWB r6, r6, r10 @ mult32_shl(s1, C55)
|
||||
ADD r6, r3, r6, lsl #2 @ s1 = temp1 + ((mult32_shl(s1, C55)) << 1)@
|
||||
LDR r10, = C51_52VAL @
|
||||
STR r3, [lr], #4 @ *buf2++ = temp2@
|
||||
|
||||
SUB r5, r6, r9, LSL #1 @ s3 = s1 - t@
|
||||
ADD r6, r6, r9, LSL #1 @ s1 = s1 + t@
|
||||
|
||||
ADD r11, r7, r8 @ (s4 + s2)
|
||||
SMULWT r11, r11, r10 @ t = mult32_shl((s4 + s2), C51)@
|
||||
@LSL r11, r11, #1 @
|
||||
MOV r11, r11, LSL #1
|
||||
|
||||
SMULWB r8, r8, r10 @mult32_shl(s4, C52)
|
||||
LDR r10, = C53_VAL
|
||||
ADD r8, r11, r8, LSL #2 @s4 = t + (mult32_shl(s4, C52) << 1)@
|
||||
|
||||
|
||||
SMULWB r7, r7, r10 @mult32_shl(s2, C53)
|
||||
ADD r7, r11, r7, LSL #1 @s2 = t + mult32_shl(s2, C53)@
|
||||
|
||||
ADD r3, r1, r7 @buf2[2] = r1 + s2
|
||||
SUB r9, r6, r2 @buf2[3] = s1 - r2
|
||||
SUB r10, r12, r8 @buf2[4] = r3 - s4
|
||||
ADD r11, r5, r4 @buf2[5] = s3 + r4
|
||||
ADD r12, r12, r8 @buf2[6] = r3 + s4
|
||||
SUB r4, r5, r4 @buf2[7] = s3 - r4
|
||||
SUB r5, r1, r7 @buf2[8] = r1 - s2
|
||||
ADD r6, r6, r2 @buf2[9] = s1 + r2
|
||||
|
||||
STMIA lr!, {r3, r9-r12}
|
||||
STMIA lr!, {r4-r6} @
|
||||
|
||||
SUB lr, lr, #120 @
|
||||
LDR r12, = SINMU_VAL @
|
||||
LDMFD r13!, {r10, r11} @
|
||||
|
||||
|
||||
LOOP_FFT3:
|
||||
LDRD r0, [lr] @ r0 = fft3outptr[0] and r1 = fft3outptr[1]
|
||||
LDRD r2, [lr, #40] @ r2 = fft3outptr[10] and r3 = fft3outptr[11]
|
||||
LDRD r4, [lr, #80] @ r4 = fft3outptr[20] and r5 = fft3outptr[21]
|
||||
ADD lr, lr, #8 @
|
||||
|
||||
ADD r6, r0, r2 @ X01r = add32(buf1[0], buf1[2])
|
||||
ADD r7, r1, r3 @ X01i = add32(buf1[1], buf1[3])
|
||||
|
||||
ADD r8, r2, r4 @ add_r = add32(buf1[2], buf1[4])
|
||||
ADD r9, r3, r5 @ add_i = add32(buf1[3], buf1[5])
|
||||
|
||||
SUB r2, r2, r4 @ sub_r = sub32(buf1[2], buf1[4])@
|
||||
SUB r3, r3, r5 @ sub_i = sub32(buf1[3], buf1[5])@
|
||||
|
||||
@ASR r8, r8, #1 @ p1 = add_r >> 1@
|
||||
MOV r8, r8, ASR #1
|
||||
|
||||
@ASR r9, r9, #1 @ p4 = add_i >> 1@
|
||||
MOV r9, r9, ASR #1
|
||||
|
||||
SMULWB r3, r3, r12 @ p2 = mult32x16in32_shl(sub_i, sinmu)@
|
||||
SMULWB r2, r2, r12 @ p3 = mult32x16in32_shl(sub_r, sinmu)@
|
||||
|
||||
SUB r0, r0, r8 @ temp = sub32(buf1a[0], p1)@
|
||||
ADD r8, r1, r2, LSL #1 @ temp1 = add32(buf1a[1], p3)@
|
||||
SUB r2, r1, r2, LSL #1 @ temp2 = sub32(buf1a[1], p3)@
|
||||
|
||||
ADD r4, r6, r4 @ add32(X01r, buf1a[4])@
|
||||
ADD r5, r7, r5 @ add32(X01i, buf1a[5])@
|
||||
ADD r6, r0, r3, LSL #1 @ add32(temp, p2)@
|
||||
SUB r7, r2, r9 @ sub32(temp2, p4)@
|
||||
SUB r9, r8, r9 @ sub32(temp1, p4)@
|
||||
SUB r8, r0, r3, LSL #1 @ sub32(temp, p2)@
|
||||
|
||||
MOV r3, r11 @
|
||||
LDRB r0, [r10], #1 @
|
||||
LDRB r1, [r10], #1 @
|
||||
LDRB r2, [r10], #1 @
|
||||
ADD r0, r11, r0, lsl #3 @
|
||||
ADD r1, r11, r1, lsl #3 @
|
||||
ADD r2, r11, r2, lsl #3 @
|
||||
STRD r4, [r0] @
|
||||
STRD r6, [r1] @
|
||||
STRD r8, [r2] @
|
||||
|
||||
LDRD r0, [lr] @ r0 = fft3outptr[0] and r1 = fft3outptr[1]
|
||||
LDRD r2, [lr, #40] @ r2 = fft3outptr[10] and r3 = fft3outptr[11]
|
||||
LDRD r4, [lr, #80] @ r4 = fft3outptr[20] and r5 = fft3outptr[21]
|
||||
ADD lr, lr, #8 @
|
||||
|
||||
|
||||
ADD r6, r0, r2 @ X01r = add32(buf1[0], buf1[2])
|
||||
ADD r7, r1, r3 @ X01i = add32(buf1[1], buf1[3])
|
||||
|
||||
ADD r8, r2, r4 @ add_r = add32(buf1[2], buf1[4])
|
||||
ADD r9, r3, r5 @ add_i = add32(buf1[3], buf1[5])
|
||||
|
||||
SUB r2, r2, r4 @ sub_r = sub32(buf1[2], buf1[4])@
|
||||
SUB r3, r3, r5 @ sub_i = sub32(buf1[3], buf1[5])@
|
||||
|
||||
@ASR r8, r8, #1 @ p1 = add_r >> 1@
|
||||
MOV r8, r8, ASR #1
|
||||
@ASR r9, r9, #1 @ p4 = add_i >> 1@
|
||||
MOV r9, r9, ASR #1
|
||||
|
||||
SMULWB r3, r3, r12 @ p2 = mult32x16in32_shl(sub_i, sinmu)@
|
||||
SMULWB r2, r2, r12 @ p3 = mult32x16in32_shl(sub_r, sinmu)@
|
||||
|
||||
SUB r0, r0, r8 @ temp = sub32(buf1a[0], p1)@
|
||||
ADD r8, r1, r2, LSL #1 @ temp1 = add32(buf1a[1], p3)@
|
||||
SUB r2, r1, r2, LSL #1 @ temp2 = sub32(buf1a[1], p3)@
|
||||
|
||||
ADD r4, r6, r4 @ add32(X01r, buf1a[4])@
|
||||
ADD r5, r7, r5 @ add32(X01i, buf1a[5])@
|
||||
ADD r6, r0, r3, LSL #1 @ add32(temp, p2)@
|
||||
SUB r7, r2, r9 @ sub32(temp2, p4)@
|
||||
SUB r9, r8, r9 @ sub32(temp1, p4)@
|
||||
SUB r8, r0, r3, LSL #1 @ sub32(temp, p2)@
|
||||
|
||||
LDRB r0, [r10], #1 @
|
||||
LDRB r1, [r10], #1 @
|
||||
LDRB r2, [r10], #1 @
|
||||
ADD r0, r11, r0, lsl #3 @
|
||||
ADD r1, r11, r1, lsl #3 @
|
||||
ADD r2, r11, r2, lsl #3 @
|
||||
STRD r4, [r0] @
|
||||
STRD r6, [r1] @
|
||||
STRD r8, [r2] @
|
||||
|
||||
LDRD r0, [lr] @ r0 = fft3outptr[0] and r1 = fft3outptr[1]
|
||||
LDRD r2, [lr, #40] @ r2 = fft3outptr[10] and r3 = fft3outptr[11]
|
||||
LDRD r4, [lr, #80] @ r4 = fft3outptr[20] and r5 = fft3outptr[21]
|
||||
ADD lr, lr, #8 @
|
||||
|
||||
|
||||
ADD r6, r0, r2 @ X01r = add32(buf1[0], buf1[2])
|
||||
ADD r7, r1, r3 @ X01i = add32(buf1[1], buf1[3])
|
||||
|
||||
ADD r8, r2, r4 @ add_r = add32(buf1[2], buf1[4])
|
||||
ADD r9, r3, r5 @ add_i = add32(buf1[3], buf1[5])
|
||||
|
||||
SUB r2, r2, r4 @ sub_r = sub32(buf1[2], buf1[4])@
|
||||
SUB r3, r3, r5 @ sub_i = sub32(buf1[3], buf1[5])@
|
||||
|
||||
|
||||
@ASR r8, r8, #1 @ p1 = add_r >> 1@
|
||||
MOV r8, r8, ASR #1
|
||||
@ASR r9, r9, #1 @ p4 = add_i >> 1@
|
||||
MOV r9, r9, ASR #1
|
||||
|
||||
SMULWB r3, r3, r12 @ p2 = mult32x16in32_shl(sub_i, sinmu)@
|
||||
SMULWB r2, r2, r12 @ p3 = mult32x16in32_shl(sub_r, sinmu)@
|
||||
|
||||
SUB r0, r0, r8 @ temp = sub32(buf1a[0], p1)@
|
||||
ADD r8, r1, r2, LSL #1 @ temp1 = add32(buf1a[1], p3)@
|
||||
SUB r2, r1, r2, LSL #1 @ temp2 = sub32(buf1a[1], p3)@
|
||||
|
||||
ADD r4, r6, r4 @ add32(X01r, buf1a[4])@
|
||||
ADD r5, r7, r5 @ add32(X01i, buf1a[5])@
|
||||
ADD r6, r0, r3, LSL #1 @ add32(temp, p2)@
|
||||
SUB r7, r2, r9 @ sub32(temp2, p4)@
|
||||
SUB r9, r8, r9 @ sub32(temp1, p4)@
|
||||
SUB r8, r0, r3, LSL #1 @ sub32(temp, p2)@
|
||||
|
||||
LDRB r0, [r10], #1 @
|
||||
LDRB r1, [r10], #1 @
|
||||
LDRB r2, [r10], #1 @
|
||||
ADD r0, r11, r0, lsl #3 @
|
||||
ADD r1, r11, r1, lsl #3 @
|
||||
ADD r2, r11, r2, lsl #3 @
|
||||
STRD r4, [r0] @
|
||||
STRD r6, [r1] @
|
||||
STRD r8, [r2] @
|
||||
|
||||
LDRD r0, [lr] @ r0 = fft3outptr[0] and r1 = fft3outptr[1]
|
||||
LDRD r2, [lr, #40] @ r2 = fft3outptr[10] and r3 = fft3outptr[11]
|
||||
LDRD r4, [lr, #80] @ r4 = fft3outptr[20] and r5 = fft3outptr[21]
|
||||
ADD lr, lr, #8 @
|
||||
|
||||
ADD r6, r0, r2 @ X01r = add32(buf1[0], buf1[2])
|
||||
ADD r7, r1, r3 @ X01i = add32(buf1[1], buf1[3])
|
||||
|
||||
ADD r8, r2, r4 @ add_r = add32(buf1[2], buf1[4])
|
||||
ADD r9, r3, r5 @ add_i = add32(buf1[3], buf1[5])
|
||||
|
||||
SUB r2, r2, r4 @ sub_r = sub32(buf1[2], buf1[4])@
|
||||
SUB r3, r3, r5 @ sub_i = sub32(buf1[3], buf1[5])@
|
||||
|
||||
@ASR r8, r8, #1 @ p1 = add_r >> 1@
|
||||
MOV r8, r8, ASR #1
|
||||
@ASR r9, r9, #1 @ p4 = add_i >> 1@
|
||||
MOV r9, r9, ASR #1
|
||||
|
||||
SMULWB r3, r3, r12 @ p2 = mult32x16in32_shl(sub_i, sinmu)@
|
||||
SMULWB r2, r2, r12 @ p3 = mult32x16in32_shl(sub_r, sinmu)@
|
||||
|
||||
SUB r0, r0, r8 @ temp = sub32(buf1a[0], p1)@
|
||||
ADD r8, r1, r2, LSL #1 @ temp1 = add32(buf1a[1], p3)@
|
||||
SUB r2, r1, r2, LSL #1 @ temp2 = sub32(buf1a[1], p3)@
|
||||
|
||||
ADD r4, r6, r4 @ add32(X01r, buf1a[4])@
|
||||
ADD r5, r7, r5 @ add32(X01i, buf1a[5])@
|
||||
ADD r6, r0, r3, LSL #1 @ add32(temp, p2)@
|
||||
SUB r7, r2, r9 @ sub32(temp2, p4)@
|
||||
SUB r9, r8, r9 @ sub32(temp1, p4)@
|
||||
SUB r8, r0, r3, LSL #1 @ sub32(temp, p2)@
|
||||
|
||||
LDRB r0, [r10], #1 @
|
||||
LDRB r1, [r10], #1 @
|
||||
LDRB r2, [r10], #1 @
|
||||
ADD r0, r11, r0, lsl #3 @
|
||||
ADD r1, r11, r1, lsl #3 @
|
||||
ADD r2, r11, r2, lsl #3 @
|
||||
STRD r4, [r0] @
|
||||
STRD r6, [r1] @
|
||||
STRD r8, [r2] @
|
||||
|
||||
LDRD r0, [lr] @ r0 = fft3outptr[0] and r1 = fft3outptr[1]
|
||||
LDRD r2, [lr, #40] @ r2 = fft3outptr[10] and r3 = fft3outptr[11]
|
||||
LDRD r4, [lr, #80] @ r4 = fft3outptr[20] and r5 = fft3outptr[21]
|
||||
|
||||
ADD r6, r0, r2 @ X01r = add32(buf1[0], buf1[2])
|
||||
ADD r7, r1, r3 @ X01i = add32(buf1[1], buf1[3])
|
||||
|
||||
ADD r8, r2, r4 @ add_r = add32(buf1[2], buf1[4])
|
||||
ADD r9, r3, r5 @ add_i = add32(buf1[3], buf1[5])
|
||||
|
||||
SUB r2, r2, r4 @ sub_r = sub32(buf1[2], buf1[4])@
|
||||
SUB r3, r3, r5 @ sub_i = sub32(buf1[3], buf1[5])@
|
||||
|
||||
@ASR r8, r8, #1 @ p1 = add_r >> 1@
|
||||
MOV r8, r8, ASR #1
|
||||
@ASR r9, r9, #1 @ p4 = add_i >> 1@
|
||||
MOV r9, r9, ASR #1
|
||||
|
||||
SMULWB r3, r3, r12 @ p2 = mult32x16in32_shl(sub_i, sinmu)@
|
||||
SMULWB r2, r2, r12 @ p3 = mult32x16in32_shl(sub_r, sinmu)@
|
||||
|
||||
SUB r0, r0, r8 @ temp = sub32(buf1a[0], p1)@
|
||||
ADD r8, r1, r2, LSL #1 @ temp1 = add32(buf1a[1], p3)@
|
||||
SUB r2, r1, r2, LSL #1 @ temp2 = sub32(buf1a[1], p3)@
|
||||
|
||||
ADD r4, r6, r4 @ add32(X01r, buf1a[4])@
|
||||
ADD r5, r7, r5 @ add32(X01i, buf1a[5])@
|
||||
ADD r6, r0, r3, LSL #1 @ add32(temp, p2)@
|
||||
SUB r7, r2, r9 @ sub32(temp2, p4)@
|
||||
SUB r9, r8, r9 @ sub32(temp1, p4)@
|
||||
SUB r8, r0, r3, LSL #1 @ sub32(temp, p2)@
|
||||
|
||||
LDRB r0, [r10], #1 @
|
||||
LDRB r1, [r10], #1 @
|
||||
LDRB r2, [r10], #1 @
|
||||
ADD r0, r11, r0, lsl #3 @
|
||||
ADD r1, r11, r1, lsl #3 @
|
||||
ADD r2, r11, r2, lsl #3 @
|
||||
STRD r4, [r0] @
|
||||
STRD r6, [r1] @
|
||||
STRD r8, [r2] @
|
||||
|
||||
LDMFD r13!, {r4 - r12, r15}
|
||||
|
||||
|
||||
89
decoder/armv7/ixheaacd_fft_armv7.c
Normal file
89
decoder/armv7/ixheaacd_fft_armv7.c
Normal file
|
|
@ -0,0 +1,89 @@
|
|||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include <ixheaacd_type_def.h>
|
||||
#include "ixheaacd_interface.h"
|
||||
#include "ixheaacd_constants.h"
|
||||
#include <ixheaacd_basic_ops32.h>
|
||||
#include "ixheaacd_function_selector.h"
|
||||
|
||||
extern const WORD32 ixheaacd_twiddle_table_fft_32x32[514];
|
||||
extern const WORD8 ixheaacd_mps_dig_rev[16];
|
||||
|
||||
VOID ixheaacd_complex_fft_p2_armv7(WORD32 *xr, WORD32 *xi, WORD32 nlength,
|
||||
WORD32 fft_mode, WORD32 *preshift) {
|
||||
WORD32 i, n_stages;
|
||||
WORD32 not_power_4;
|
||||
WORD32 npts, shift;
|
||||
WORD32 dig_rev_shift;
|
||||
WORD32 ptr_x[1024];
|
||||
WORD32 y[1024];
|
||||
WORD32 npoints = nlength;
|
||||
WORD32 n = 0;
|
||||
WORD32 *ptr_y = y;
|
||||
dig_rev_shift = ixheaacd_norm32(npoints) + 1 - 16;
|
||||
n_stages = 30 - ixheaacd_norm32(npoints); // log2(npoints), if npoints=2^m
|
||||
not_power_4 = n_stages & 1;
|
||||
|
||||
n_stages = n_stages >> 1;
|
||||
|
||||
npts = npoints; // CALCULATION OF GUARD BITS
|
||||
while (npts >> 1) {
|
||||
n++;
|
||||
npts = npts >> 1;
|
||||
}
|
||||
|
||||
if (n % 2 == 0)
|
||||
shift = ((n + 4)) / 2;
|
||||
else
|
||||
shift = ((n + 3) / 2);
|
||||
|
||||
for (i = 0; i < nlength; i++) {
|
||||
ptr_x[2 * i] = (xr[i] / (1 << (shift)));
|
||||
ptr_x[2 * i + 1] = (xi[i] / (1 << (shift)));
|
||||
}
|
||||
|
||||
if (fft_mode == -1) {
|
||||
ixheaacd_complex_fft_p2_asm(ixheaacd_twiddle_table_fft_32x32, nlength,
|
||||
ptr_x, ptr_y);
|
||||
if (not_power_4) shift += 1;
|
||||
}
|
||||
|
||||
else {
|
||||
ixheaacd_complex_ifft_p2_asm(ixheaacd_twiddle_table_fft_32x32, nlength,
|
||||
ptr_x, ptr_y);
|
||||
if (not_power_4) shift += 1;
|
||||
}
|
||||
|
||||
for (i = 0; i < nlength; i++) {
|
||||
xr[i] = y[2 * i];
|
||||
xi[i] = y[2 * i + 1];
|
||||
}
|
||||
|
||||
*preshift = shift - *preshift;
|
||||
return;
|
||||
}
|
||||
|
||||
VOID ixheaacd_mps_complex_fft_64_armv7(WORD32 *ptr_x, WORD32 *fin_re,
|
||||
WORD32 *fin_im, WORD32 nlength) {
|
||||
WORD32 i, n_stages;
|
||||
WORD32 y[128];
|
||||
WORD32 npoints = nlength;
|
||||
WORD32 *ptr_y = y;
|
||||
const WORD32 *ptr_w;
|
||||
n_stages = 30 - ixheaacd_norm32(npoints); // log2(npoints), if npoints=2^m
|
||||
|
||||
n_stages = n_stages >> 1;
|
||||
|
||||
ptr_w = ixheaacd_twiddle_table_fft_32x32; // 32 BIT TWIDDLE TABLE
|
||||
|
||||
ixheaacd_mps_complex_fft_64_asm(ptr_w, nlength, ptr_x, ptr_y,
|
||||
ixheaacd_mps_dig_rev);
|
||||
|
||||
for (i = 0; i < 2 * nlength; i += 2) {
|
||||
fin_re[i] = y[i];
|
||||
fin_im[i] = y[i + 1];
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
185
decoder/armv7/ixheaacd_function_selector_arm_non_neon.c
Normal file
185
decoder/armv7/ixheaacd_function_selector_arm_non_neon.c
Normal file
|
|
@ -0,0 +1,185 @@
|
|||
/******************************************************************************
|
||||
* *
|
||||
* Copyright (C) 2018 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include "ixheaacd_sbr_common.h"
|
||||
#include <ixheaacd_type_def.h>
|
||||
|
||||
#include "ixheaacd_constants.h"
|
||||
#include <ixheaacd_basic_ops32.h>
|
||||
#include <ixheaacd_basic_ops16.h>
|
||||
#include <ixheaacd_basic_ops40.h>
|
||||
#include "ixheaacd_basic_ops.h"
|
||||
|
||||
#include <ixheaacd_basic_op.h>
|
||||
#include "ixheaacd_intrinsics.h"
|
||||
#include "ixheaacd_common_rom.h"
|
||||
#include "ixheaacd_sbrdecsettings.h"
|
||||
#include "ixheaacd_bitbuffer.h"
|
||||
#include "ixheaacd_defines.h"
|
||||
|
||||
#include "ixheaacd_pns.h"
|
||||
|
||||
#include <ixheaacd_aac_rom.h>
|
||||
#include "ixheaacd_aac_imdct.h"
|
||||
#include "ixheaacd_pulsedata.h"
|
||||
|
||||
#include "ixheaacd_drc_data_struct.h"
|
||||
#include "ixheaacd_channelinfo.h"
|
||||
#include "ixheaacd_drc_dec.h"
|
||||
|
||||
#include "ixheaacd_sbrdecoder.h"
|
||||
#include "ixheaacd_tns.h"
|
||||
#include "ixheaacd_sbr_scale.h"
|
||||
#include "ixheaacd_lpp_tran.h"
|
||||
#include "ixheaacd_env_extr_part.h"
|
||||
#include <ixheaacd_sbr_rom.h>
|
||||
#include "ixheaacd_block.h"
|
||||
#include "ixheaacd_hybrid.h"
|
||||
#include "ixheaacd_ps_dec.h"
|
||||
#include "ixheaacd_env_extr.h"
|
||||
#include "ixheaacd_basic_funcs.h"
|
||||
#include "ixheaacd_env_calc.h"
|
||||
|
||||
WORD32 (*ixheaacd_fix_div)(WORD32, WORD32) = &ixheaacd_fix_div_armv7;
|
||||
|
||||
VOID(*ixheaacd_covariance_matrix_calc)
|
||||
(WORD32 *, ixheaacd_lpp_trans_cov_matrix *,
|
||||
WORD32) = &ixheaacd_covariance_matrix_calc_armv7;
|
||||
|
||||
VOID(*ixheaacd_covariance_matrix_calc_2)
|
||||
(ixheaacd_lpp_trans_cov_matrix *, WORD32 *, WORD32,
|
||||
WORD16) = &ixheaacd_covariance_matrix_calc_2_armv7;
|
||||
|
||||
VOID(*ixheaacd_over_lap_add1)
|
||||
(WORD32 *, WORD32 *, WORD16 *, const WORD16 *, WORD16, WORD16,
|
||||
WORD16) = &ixheaacd_over_lap_add1_dec;
|
||||
|
||||
VOID(*ixheaacd_over_lap_add2)
|
||||
(WORD32 *, WORD32 *, WORD32 *, const WORD16 *, WORD16, WORD16,
|
||||
WORD16) = &ixheaacd_over_lap_add2_dec;
|
||||
|
||||
VOID(*ixheaacd_decorr_filter2)
|
||||
(ia_ps_dec_struct *ptr_ps_dec, WORD32 *p_buf_left_real, WORD32 *p_buf_left_imag,
|
||||
WORD32 *p_buf_right_real, WORD32 *p_buf_right_imag,
|
||||
ia_ps_tables_struct *ps_tables_ptr,
|
||||
WORD16 *transient_ratio) = &ixheaacd_decorr_filter2_armv7;
|
||||
|
||||
VOID(*ixheaacd_decorr_filter1)
|
||||
(ia_ps_dec_struct *ptr_ps_dec, ia_ps_tables_struct *ps_tables_ptr,
|
||||
WORD16 *transient_ratio) = &ixheaacd_decorr_filter1_armv7;
|
||||
|
||||
WORD32(*ixheaacd_divide16_pos)
|
||||
(WORD32 op1, WORD32 op2) = &ixheaacd_divide16_pos_armv7;
|
||||
|
||||
VOID(*ixheaacd_decorrelation)
|
||||
(ia_ps_dec_struct *ptr_ps_dec, WORD32 *p_buf_left_real, WORD32 *p_buf_left_imag,
|
||||
WORD32 *p_buf_right_real, WORD32 *p_buf_right_imag,
|
||||
ia_ps_tables_struct *ps_tables_ptr) = &ixheaacd_decorrelation_armv7;
|
||||
|
||||
VOID(*ixheaacd_apply_rot)
|
||||
(ia_ps_dec_struct *ptr_ps_dec, WORD32 *p_qmf_left_re, WORD32 *p_qmf_left_im,
|
||||
WORD32 *p_qmf_right_re, WORD32 *p_qmf_right_im,
|
||||
ia_sbr_tables_struct *sbr_tables_ptr,
|
||||
const WORD16 *ptr_res) = &ixheaacd_apply_rot_armv7;
|
||||
|
||||
VOID(*ixheaacd_conv_ergtoamplitudelp)
|
||||
(WORD32 bands, WORD16 noise_e, WORD16 *nrg_sine, WORD16 *nrg_gain,
|
||||
WORD16 *noise_level_mant,
|
||||
WORD16 *sqrt_table) = &ixheaacd_conv_ergtoamplitudelp_armv7;
|
||||
|
||||
VOID(*ixheaacd_conv_ergtoamplitude)
|
||||
(WORD32 bands, WORD16 noise_e, WORD16 *nrg_sine, WORD16 *nrg_gain,
|
||||
WORD16 *noise_level_mant,
|
||||
WORD16 *sqrt_table) = &ixheaacd_conv_ergtoamplitude_armv7;
|
||||
|
||||
VOID(*ixheaacd_adjust_scale)
|
||||
(WORD32 **re, WORD32 **im, WORD32 sub_band_start, WORD32 sub_band_end,
|
||||
WORD32 start_pos, WORD32 next_pos, WORD32 shift,
|
||||
FLAG low_pow_flag) = &ixheaacd_adjust_scale_armv7;
|
||||
|
||||
WORD16(*ixheaacd_ixheaacd_expsubbandsamples)
|
||||
(WORD32 **re, WORD32 **im, WORD32 sub_band_start, WORD32 sub_band_end,
|
||||
WORD32 start_pos, WORD32 next_pos,
|
||||
FLAG low_pow_flag) = &ixheaacd_expsubbandsamples_armv7;
|
||||
|
||||
VOID(*ixheaacd_enery_calc_per_subband)
|
||||
(WORD32 start_pos, WORD32 next_pos, WORD32 sub_band_start, WORD32 sub_band_end,
|
||||
WORD32 frame_exp, WORD16 *nrg_est_mant, FLAG low_pow_flag,
|
||||
ia_sbr_tables_struct *ptr_sbr_tables,
|
||||
WORD32 *ptr_qmf_matrix) = &ixheaacd_enery_calc_per_subband_dec;
|
||||
|
||||
VOID(*ixheaacd_harm_idx_zerotwolp)
|
||||
(WORD32 *ptr_real_buf, WORD16 *ptr_gain_buf, WORD32 scale_change,
|
||||
WORD16 *ptr_sine_level_buf, const WORD32 *ptr_rand_ph,
|
||||
WORD16 *noise_level_mant, WORD32 num_sub_bands, FLAG noise_absc_flag,
|
||||
WORD32 harm_index) = &ixheaacd_harm_idx_zerotwolp_armv7;
|
||||
|
||||
VOID(*ixheaacd_tns_ar_filter_fixed)
|
||||
(WORD32 *spectrum, WORD32 size, WORD32 inc, WORD32 *lpc, WORD32 order,
|
||||
WORD32 shift_value, WORD scale_spec) = &ixheaacd_tns_ar_filter_fixed_armv7;
|
||||
|
||||
VOID(*ixheaacd_tns_ar_filter)
|
||||
(WORD32 *spectrum, WORD32 size, WORD32 inc, WORD16 *lpc, WORD32 order,
|
||||
WORD32 shift_value, WORD scale_spec,
|
||||
WORD32 *ptr_filter_state) = &ixheaacd_tns_ar_filter_armv7;
|
||||
|
||||
VOID(*ixheaacd_tns_parcor_lpc_convert)
|
||||
(WORD16 *parcor, WORD16 *lpc, WORD16 *scale,
|
||||
WORD order) = &ixheaacd_tns_parcor_lpc_convert_armv7;
|
||||
|
||||
WORD32(*ixheaacd_calc_max_spectral_line)
|
||||
(WORD32 *ptr_tmp, WORD32 size) = &ixheaacd_calc_max_spectral_line_dec;
|
||||
|
||||
VOID(*ixheaacd_post_twiddle)
|
||||
(WORD32 out_ptr[], WORD32 spec_data[],
|
||||
ia_aac_dec_imdct_tables_struct *ptr_imdct_tables,
|
||||
WORD npoints) = &ixheaacd_post_twiddle_dec;
|
||||
|
||||
VOID(*ixheaacd_post_twid_overlap_add)
|
||||
(WORD16 pcm_out[], WORD32 spec_data[],
|
||||
ia_aac_dec_imdct_tables_struct *ptr_imdct_tables, WORD npoints,
|
||||
WORD32 *ptr_overlap_buf, WORD16 q_shift, const WORD16 *window,
|
||||
WORD16 ch_fac) = &ixheaacd_post_twid_overlap_add_dec;
|
||||
|
||||
VOID(*ixheaacd_neg_shift_spec)
|
||||
(WORD32 *coef, WORD16 *out, WORD16 q_shift,
|
||||
WORD16 ch_fac) = &ixheaacd_neg_shift_spec_dec;
|
||||
|
||||
VOID(*ixheaacd_spec_to_overlapbuf)
|
||||
(WORD32 *ptr_overlap_buf, WORD32 *ptr_spec_coeff, WORD32 q_shift,
|
||||
WORD32 size) = &ixheaacd_spec_to_overlapbuf_armv7;
|
||||
|
||||
VOID(*ixheaacd_overlap_buf_out)
|
||||
(WORD16 *out_samples, WORD32 *ptr_overlap_buf, WORD32 size,
|
||||
const WORD16 ch_fac) = &ixheaacd_overlap_buf_out_armv7;
|
||||
|
||||
VOID(*ixheaacd_overlap_out_copy)
|
||||
(WORD16 *out_samples, WORD32 *ptr_overlap_buf, WORD32 *ptr_overlap_buf1,
|
||||
const WORD16 ch_fac) = &ixheaacd_overlap_out_copy_armv7;
|
||||
|
||||
VOID(*ixheaacd_pretwiddle_compute)
|
||||
(WORD32 *spec_data1, WORD32 *spec_data2, WORD32 *out_ptr,
|
||||
ia_aac_dec_imdct_tables_struct *ptr_imdct_tables, WORD npoints4,
|
||||
WORD32 neg_expo) = &ixheaacd_pretwiddle_compute_dec;
|
||||
|
||||
VOID(*ixheaacd_imdct_using_fft)
|
||||
(ia_aac_dec_imdct_tables_struct *ptr_imdct_tables, WORD32 npoints,
|
||||
WORD32 *ptr_x, WORD32 *ptr_y) = &ixheaacd_imdct_using_fft_dec;
|
||||
249
decoder/armv7/ixheaacd_function_selector_armv7.c
Normal file
249
decoder/armv7/ixheaacd_function_selector_armv7.c
Normal file
|
|
@ -0,0 +1,249 @@
|
|||
/******************************************************************************
|
||||
* *
|
||||
* Copyright (C) 2018 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include "ixheaacd_sbr_common.h"
|
||||
#include <ixheaacd_type_def.h>
|
||||
|
||||
#include "ixheaacd_constants.h"
|
||||
#include <ixheaacd_basic_ops32.h>
|
||||
#include <ixheaacd_basic_ops16.h>
|
||||
#include <ixheaacd_basic_ops40.h>
|
||||
#include "ixheaacd_basic_ops.h"
|
||||
|
||||
#include <ixheaacd_basic_op.h>
|
||||
#include "ixheaacd_intrinsics.h"
|
||||
#include "ixheaacd_common_rom.h"
|
||||
#include "ixheaacd_sbrdecsettings.h"
|
||||
#include "ixheaacd_bitbuffer.h"
|
||||
#include "ixheaacd_defines.h"
|
||||
|
||||
#include "ixheaacd_pns.h"
|
||||
|
||||
#include <ixheaacd_aac_rom.h>
|
||||
#include "ixheaacd_aac_imdct.h"
|
||||
#include "ixheaacd_pulsedata.h"
|
||||
|
||||
#include "ixheaacd_drc_data_struct.h"
|
||||
|
||||
#include "ixheaacd_lt_predict.h"
|
||||
|
||||
#include "ixheaacd_channelinfo.h"
|
||||
#include "ixheaacd_drc_dec.h"
|
||||
|
||||
#include "ixheaacd_sbrdecoder.h"
|
||||
#include "ixheaacd_tns.h"
|
||||
#include "ixheaacd_sbr_scale.h"
|
||||
#include "ixheaacd_lpp_tran.h"
|
||||
#include "ixheaacd_env_extr_part.h"
|
||||
#include <ixheaacd_sbr_rom.h>
|
||||
#include "ixheaacd_block.h"
|
||||
#include "ixheaacd_hybrid.h"
|
||||
#include "ixheaacd_ps_dec.h"
|
||||
#include "ixheaacd_env_extr.h"
|
||||
|
||||
#include "ixheaacd_basic_funcs.h"
|
||||
#include "ixheaacd_env_calc.h"
|
||||
#include "ixheaacd_dsp_fft32x32s.h"
|
||||
|
||||
#include "ixheaacd_interface.h"
|
||||
|
||||
WORD32 (*ixheaacd_fix_div)(WORD32, WORD32) = &ixheaacd_fix_div_armv7;
|
||||
|
||||
VOID(*ixheaacd_covariance_matrix_calc)
|
||||
(WORD32 *, ixheaacd_lpp_trans_cov_matrix *,
|
||||
WORD32) = &ixheaacd_covariance_matrix_calc_armv7;
|
||||
|
||||
VOID(*ixheaacd_covariance_matrix_calc_2)
|
||||
(ixheaacd_lpp_trans_cov_matrix *, WORD32 *, WORD32,
|
||||
WORD16) = &ixheaacd_covariance_matrix_calc_2_armv7;
|
||||
|
||||
VOID(*ixheaacd_over_lap_add1)
|
||||
(WORD32 *, WORD32 *, WORD16 *, const WORD16 *, WORD16, WORD16,
|
||||
WORD16) = &ixheaacd_over_lap_add1_armv7;
|
||||
|
||||
VOID(*ixheaacd_over_lap_add2)
|
||||
(WORD32 *, WORD32 *, WORD32 *, const WORD16 *, WORD16, WORD16,
|
||||
WORD16) = &ixheaacd_over_lap_add2_armv7;
|
||||
|
||||
VOID(*ixheaacd_decorr_filter2)
|
||||
(ia_ps_dec_struct *ptr_ps_dec, WORD32 *p_buf_left_real, WORD32 *p_buf_left_imag,
|
||||
WORD32 *p_buf_right_real, WORD32 *p_buf_right_imag,
|
||||
ia_ps_tables_struct *ps_tables_ptr,
|
||||
WORD16 *transient_ratio) = &ixheaacd_decorr_filter2_armv7;
|
||||
|
||||
VOID(*ixheaacd_decorr_filter1)
|
||||
(ia_ps_dec_struct *ptr_ps_dec, ia_ps_tables_struct *ps_tables_ptr,
|
||||
WORD16 *transient_ratio) = &ixheaacd_decorr_filter1_armv7;
|
||||
|
||||
WORD32(*ixheaacd_divide16_pos)
|
||||
(WORD32 op1, WORD32 op2) = &ixheaacd_divide16_pos_armv7;
|
||||
|
||||
VOID(*ixheaacd_decorrelation)
|
||||
(ia_ps_dec_struct *ptr_ps_dec, WORD32 *p_buf_left_real, WORD32 *p_buf_left_imag,
|
||||
WORD32 *p_buf_right_real, WORD32 *p_buf_right_imag,
|
||||
ia_ps_tables_struct *ps_tables_ptr) = &ixheaacd_decorrelation_armv7;
|
||||
|
||||
VOID(*ixheaacd_apply_rot)
|
||||
(ia_ps_dec_struct *ptr_ps_dec, WORD32 *qmf_left_real, WORD32 *qmf_left_imag,
|
||||
WORD32 *qmf_right_real, WORD32 *qmf_right_imag,
|
||||
ia_sbr_tables_struct *sbr_tables_ptr,
|
||||
const WORD16 *ptr_resol) = &ixheaacd_apply_rot_armv7;
|
||||
|
||||
VOID(*ixheaacd_conv_ergtoamplitudelp)
|
||||
(WORD32 bands, WORD16 noise_e, WORD16 *nrg_sine, WORD16 *nrg_gain,
|
||||
WORD16 *noise_level_mant,
|
||||
WORD16 *sqrt_table) = &ixheaacd_conv_ergtoamplitudelp_armv7;
|
||||
|
||||
VOID(*ixheaacd_conv_ergtoamplitude)
|
||||
(WORD32 bands, WORD16 noise_e, WORD16 *nrg_sine, WORD16 *nrg_gain,
|
||||
WORD16 *noise_level_mant,
|
||||
WORD16 *sqrt_table) = &ixheaacd_conv_ergtoamplitude_armv7;
|
||||
|
||||
VOID(*ixheaacd_adjust_scale)
|
||||
(WORD32 **re, WORD32 **im, WORD32 sub_band_start, WORD32 sub_band_end,
|
||||
WORD32 start_pos, WORD32 next_pos, WORD32 shift,
|
||||
FLAG low_pow_flag) = &ixheaacd_adjust_scale_armv7;
|
||||
|
||||
WORD16(*ixheaacd_ixheaacd_expsubbandsamples)
|
||||
(WORD32 **re, WORD32 **im, WORD32 sub_band_start, WORD32 sub_band_end,
|
||||
WORD32 start_pos, WORD32 next_pos,
|
||||
FLAG low_pow_flag) = &ixheaacd_expsubbandsamples_armv7;
|
||||
|
||||
VOID(*ixheaacd_enery_calc_per_subband)
|
||||
(WORD32 start_pos, WORD32 next_pos, WORD32 sub_band_start, WORD32 sub_band_end,
|
||||
WORD32 frame_exp, WORD16 *nrg_est_mant, FLAG low_pow_flag,
|
||||
ia_sbr_tables_struct *ptr_sbr_tables,
|
||||
WORD32 *ptr_qmf_matrix) = &ixheaacd_enery_calc_per_subband_armv7;
|
||||
|
||||
VOID(*ixheaacd_harm_idx_zerotwolp)
|
||||
(WORD32 *ptr_real_buf, WORD16 *ptr_gain_buf, WORD32 scale_change,
|
||||
WORD16 *ptr_sine_level_buf, const WORD32 *ptr_rand_ph,
|
||||
WORD16 *noise_level_mant, WORD32 num_sub_bands, FLAG noise_absc_flag,
|
||||
WORD32 harm_index) = &ixheaacd_harm_idx_zerotwolp_armv7;
|
||||
|
||||
VOID(*ixheaacd_tns_ar_filter_fixed)
|
||||
(WORD32 *spectrum, WORD32 size, WORD32 inc, WORD32 *lpc, WORD32 order,
|
||||
WORD32 shift_value, WORD scale_spec) = &ixheaacd_tns_ar_filter_fixed_armv7;
|
||||
|
||||
VOID(*ixheaacd_tns_ar_filter)
|
||||
(WORD32 *spectrum, WORD32 size, WORD32 inc, WORD16 *lpc, WORD32 order,
|
||||
WORD32 shift_value, WORD scale_spec,
|
||||
WORD32 *ptr_filter_state) = &ixheaacd_tns_ar_filter_armv7;
|
||||
|
||||
VOID(*ixheaacd_tns_parcor_lpc_convert)
|
||||
(WORD16 *parcor, WORD16 *lpc, WORD16 *scale,
|
||||
WORD order) = &ixheaacd_tns_parcor_lpc_convert_armv7;
|
||||
|
||||
WORD32(*ixheaacd_calc_max_spectral_line)
|
||||
(WORD32 *ptr_tmp, WORD32 size) = &ixheaacd_calc_max_spectral_line_armv7;
|
||||
|
||||
VOID(*ixheaacd_post_twiddle)
|
||||
(WORD32 out_ptr[], WORD32 spec_data[],
|
||||
ia_aac_dec_imdct_tables_struct *ptr_imdct_tables,
|
||||
WORD npoints) = &ixheaacd_post_twiddle_armv7;
|
||||
|
||||
VOID(*ixheaacd_post_twid_overlap_add)
|
||||
(WORD16 pcm_out[], WORD32 spec_data[],
|
||||
ia_aac_dec_imdct_tables_struct *ptr_imdct_tables, WORD npoints,
|
||||
WORD32 *ptr_overlap_buf, WORD16 q_shift, const WORD16 *window,
|
||||
WORD16 ch_fac) = &ixheaacd_post_twid_overlap_add_armv7;
|
||||
VOID(*ixheaacd_neg_shift_spec)
|
||||
(WORD32 *coef, WORD16 *out, WORD16 q_shift,
|
||||
WORD16 ch_fac) = &ixheaacd_neg_shift_spec_armv7;
|
||||
|
||||
VOID(*ixheaacd_spec_to_overlapbuf)
|
||||
(WORD32 *ptr_overlap_buf, WORD32 *ptr_spec_coeff, WORD32 q_shift,
|
||||
WORD32 size) = &ixheaacd_spec_to_overlapbuf_armv7;
|
||||
|
||||
VOID(*ixheaacd_overlap_buf_out)
|
||||
(WORD16 *out_samples, WORD32 *ptr_overlap_buf, WORD32 size,
|
||||
const WORD16 ch_fac) = &ixheaacd_overlap_buf_out_armv7;
|
||||
|
||||
VOID(*ixheaacd_overlap_out_copy)
|
||||
(WORD16 *out_samples, WORD32 *ptr_overlap_buf, WORD32 *ptr_overlap_buf1,
|
||||
const WORD16 ch_fac) = &ixheaacd_overlap_out_copy_armv7;
|
||||
|
||||
VOID(*ixheaacd_pretwiddle_compute)
|
||||
(WORD32 *spec_data1, WORD32 *spec_data2, WORD32 *out_ptr,
|
||||
ia_aac_dec_imdct_tables_struct *ptr_imdct_tables, WORD npoints4,
|
||||
WORD32 neg_expo) = &ixheaacd_pretwiddle_compute_armv7;
|
||||
|
||||
VOID(*ixheaacd_imdct_using_fft)
|
||||
(ia_aac_dec_imdct_tables_struct *ptr_imdct_tables, WORD32 npoints,
|
||||
WORD32 *ptr_x, WORD32 *ptr_y) = &ixheaacd_imdct_using_fft_armv7;
|
||||
|
||||
VOID(*ixheaacd_complex_fft_p2)
|
||||
(WORD32 *xr, WORD32 *xi, WORD32 nlength, WORD32 fft_mode,
|
||||
WORD32 *preshift) = &ixheaacd_complex_fft_p2_armv7;
|
||||
|
||||
VOID(*ixheaacd_mps_complex_fft_64)
|
||||
(WORD32 *ptr_x, WORD32 *fin_re, WORD32 *fin_im,
|
||||
WORD32 nlength) = &ixheaacd_mps_complex_fft_64_armv7;
|
||||
|
||||
VOID(*ixheaacd_mps_synt_pre_twiddle)
|
||||
(WORD32 *ptr_in, WORD32 *table_re, WORD32 *table_im,
|
||||
WORD32 resolution) = &ixheaacd_mps_synt_pre_twiddle_armv7;
|
||||
|
||||
VOID(*ixheaacd_mps_synt_post_twiddle)
|
||||
(WORD32 *ptr_in, WORD32 *table_re, WORD32 *table_im,
|
||||
WORD32 resolution) = &ixheaacd_mps_synt_post_twiddle_armv7;
|
||||
|
||||
VOID(*ixheaacd_calc_pre_twid)
|
||||
(WORD32 *ptr_x, WORD32 *r_ptr, WORD32 *i_ptr, WORD32 nlength,
|
||||
const WORD32 *cos_ptr, const WORD32 *sin_ptr) = &ixheaacd_calc_pre_twid_armv7;
|
||||
|
||||
VOID(*ixheaacd_calc_post_twid)
|
||||
(WORD32 *ptr_x, WORD32 *r_ptr, WORD32 *i_ptr, WORD32 nlength,
|
||||
const WORD32 *cos_ptr, const WORD32 *sin_ptr) = &ixheaacd_calc_post_twid_armv7;
|
||||
|
||||
VOID(*ixheaacd_mps_synt_post_fft_twiddle)
|
||||
(WORD32 resolution, WORD32 *fin_re, WORD32 *fin_im, WORD32 *table_re,
|
||||
WORD32 *table_im, WORD32 *state) = &ixheaacd_mps_synt_post_fft_twiddle_armv7;
|
||||
|
||||
VOID(*ixheaacd_mps_synt_out_calc)
|
||||
(WORD32 resolution, WORD32 *out, WORD32 *state,
|
||||
const WORD32 *filter_coeff) = &ixheaacd_mps_synt_out_calc_armv7;
|
||||
|
||||
VOID(*ixheaacd_fft_15_ld)
|
||||
(WORD32 *inp, WORD32 *op, WORD32 *fft3out,
|
||||
UWORD8 *re_arr_tab_sml_240_ptr) = &ixheaacd_fft_15_ld_armv7;
|
||||
|
||||
VOID(*ixheaacd_aac_ld_dec_rearrange)
|
||||
(WORD32 *ip, WORD32 *op, WORD32 mdct_len_2,
|
||||
UWORD8 *re_arr_tab) = &ia_aac_ld_dec_rearrange_armv7;
|
||||
|
||||
VOID (*ixheaacd_fft32x32_ld)
|
||||
(ia_aac_dec_imdct_tables_struct *imdct_tables_ptr, WORD32 npoints,
|
||||
WORD32 *ptr_x, WORD32 *ptr_y) = &ixheaacd_imdct_using_fft_armv7;
|
||||
|
||||
VOID (*ixheaacd_fft32x32_ld2)
|
||||
(ia_aac_dec_imdct_tables_struct *imdct_tables_ptr, WORD32 npoints,
|
||||
WORD32 *ptr_x, WORD32 *ptr_y) = &ixheaacd_fft32x32_ld2_armv7;
|
||||
|
||||
WORD16 (*ixheaacd_neg_expo_inc)(WORD16 neg_expo) = &ixheaacd_neg_expo_inc_arm;
|
||||
|
||||
VOID (*ixheaacd_inv_dit_fft_8pt)
|
||||
(WORD32 *x, WORD32 *real, WORD32 *imag) = &ixheaacd_inv_dit_fft_8pt_armv7;
|
||||
|
||||
VOID (*ixheaacd_scale_factor_process)
|
||||
(WORD32 *x_invquant, WORD16 *scale_fact, WORD no_band, WORD8 *width,
|
||||
WORD32 *scale_tables_ptr, WORD32 total_channels, WORD32 object_type,
|
||||
WORD32 aac_sf_data_resil_flag) = &ixheaacd_scale_factor_process_armv7;
|
||||
102
decoder/armv7/ixheaacd_fwd_modulation.s
Normal file
102
decoder/armv7/ixheaacd_fwd_modulation.s
Normal file
|
|
@ -0,0 +1,102 @@
|
|||
@/******************************************************************************
|
||||
@ *
|
||||
@ * Copyright (C) 2018 The Android Open Source Project
|
||||
@ *
|
||||
@ * Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ * you may not use this file except in compliance with the License.
|
||||
@ * You may obtain a copy of the License at:
|
||||
@ *
|
||||
@ * http://www.apache.org/licenses/LICENSE-2.0
|
||||
@ *
|
||||
@ * Unless required by applicable law or agreed to in writing, software
|
||||
@ * distributed under the License is distributed on an "AS IS" BASIS,
|
||||
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
@ * See the License for the specific language governing permissions and
|
||||
@ * limitations under the License.
|
||||
@ *
|
||||
@ *****************************************************************************
|
||||
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
@*/
|
||||
|
||||
|
||||
.text
|
||||
.p2align 2
|
||||
.extern ixheaacd_cos_sin_mod
|
||||
.hidden ixheaacd_cos_sin_mod
|
||||
.global ixheaacd_fwd_modulation
|
||||
ixheaacd_fwd_modulation:
|
||||
|
||||
STMFD sp!, {r3-r9, r12, lr}
|
||||
MOV r5, r2
|
||||
LDR r2, [sp, #0x24]
|
||||
MOV lr, r1
|
||||
MOV r4, r1
|
||||
MOV r1, #0x1f
|
||||
MOV r7, r5
|
||||
ADD r8, r0, #0xfc
|
||||
MOV r6, r3
|
||||
LOOP1:
|
||||
LDR r3, [r0], #4
|
||||
LDR r12, [r8], #-4
|
||||
|
||||
MOV r3, r3, ASR #4
|
||||
MOV r12, r12, ASR #4
|
||||
|
||||
QSUB r9, r3, r12
|
||||
ADD r3, r3, r12
|
||||
|
||||
STR r9, [lr], #4
|
||||
SUBS r1, r1, #1
|
||||
STR r3, [r7], #4
|
||||
|
||||
BPL LOOP1
|
||||
|
||||
MOV r1, r6
|
||||
MOV r0, r4
|
||||
|
||||
|
||||
|
||||
MOV r3, #0xd8
|
||||
LSL r3, r3, #4
|
||||
ADD r3, r3, #8
|
||||
|
||||
ADD r3, r2, r3
|
||||
|
||||
|
||||
ADD r2, r3, #4
|
||||
|
||||
|
||||
|
||||
BL ixheaacd_cos_sin_mod
|
||||
|
||||
LDRSH r1, [r6, #0x2c]
|
||||
LDRSH r2, [r6, #0x2a]
|
||||
LDR r0, [r6, #0x18]
|
||||
SUBS r2, r1, r2
|
||||
|
||||
@ LDMLEFD sp!, {r3-r9, r12, pc}
|
||||
LDMFDLE sp!, {r3-r9, r12, pc}
|
||||
LOOP2:
|
||||
LDR r1, [r0], #4
|
||||
LDR r12, [r5, #0]
|
||||
LDR r3, [r4, #0]
|
||||
|
||||
SMULWT r6, r12, r1
|
||||
SMULWB lr, r3, r1
|
||||
|
||||
SMULWB r12, r12, r1
|
||||
SMULWT r1, r3, r1
|
||||
|
||||
|
||||
|
||||
|
||||
ADD lr, lr, r6
|
||||
QSUB r1, r12, r1
|
||||
MOV r3, lr, LSL #1
|
||||
MOV r1, r1, LSL #1
|
||||
STR r3, [r4], #4
|
||||
SUBS r2, r2, #1
|
||||
STR r1, [r5], #4
|
||||
BGT LOOP2
|
||||
|
||||
LDMFD sp!, {r3-r9, r12, pc}
|
||||
109
decoder/armv7/ixheaacd_harm_idx_zerotwolp.s
Normal file
109
decoder/armv7/ixheaacd_harm_idx_zerotwolp.s
Normal file
|
|
@ -0,0 +1,109 @@
|
|||
@/******************************************************************************
|
||||
@ *
|
||||
@ * Copyright (C) 2018 The Android Open Source Project
|
||||
@ *
|
||||
@ * Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ * you may not use this file except in compliance with the License.
|
||||
@ * You may obtain a copy of the License at:
|
||||
@ *
|
||||
@ * http://www.apache.org/licenses/LICENSE-2.0
|
||||
@ *
|
||||
@ * Unless required by applicable law or agreed to in writing, software
|
||||
@ * distributed under the License is distributed on an "AS IS" BASIS,
|
||||
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
@ * See the License for the specific language governing permissions and
|
||||
@ * limitations under the License.
|
||||
@ *
|
||||
@ *****************************************************************************
|
||||
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
@*/
|
||||
|
||||
|
||||
.text
|
||||
.p2align 2
|
||||
.global ixheaacd_harm_idx_zerotwolp_armv7
|
||||
ixheaacd_harm_idx_zerotwolp_armv7:
|
||||
STMFD sp!, {r4-r12}
|
||||
SUB r5, r2, #1
|
||||
MOV r2, #-1
|
||||
LDR r6, [sp, #52]
|
||||
LDR r12, [sp, #48]
|
||||
ADD r10, sp, #36
|
||||
LDR r4, [sp, #44]
|
||||
LDMIA r10, {r9, r10}
|
||||
CMP r4, #0
|
||||
BLE EXIT
|
||||
CMP r12, #0
|
||||
BNE NO_NOISE
|
||||
|
||||
|
||||
|
||||
LOOP1:
|
||||
LDR r12, [r0, #0]
|
||||
LDRSH r7, [r1], #2
|
||||
LDRSH r8, [r1], #2
|
||||
|
||||
ADD r2, r2, #1
|
||||
SMULWB r7, r12, r7
|
||||
SUBS r8, r8, r5
|
||||
|
||||
LDRH r12, [r3], #4
|
||||
RSBLE r8, r8, #0
|
||||
MOVLE r8, r7, ASR r8
|
||||
MOVGT r8, r7, LSL r8
|
||||
|
||||
MOVS r12, r12, LSL #16
|
||||
BEQ NEXT
|
||||
|
||||
CMP r6, #0
|
||||
QADDEQ r8, r8, r12
|
||||
QSUBNE r8, r8, r12
|
||||
SUBS r4, r4, #1
|
||||
B STORE
|
||||
|
||||
NEXT:
|
||||
|
||||
|
||||
LDR r7, [r9, r2, LSL #2]
|
||||
ADD r12, r10, r2, LSL #2
|
||||
LDRSH r12, [r12, #0]
|
||||
SUBS r4, r4, #1
|
||||
SMULTB r7, r7, r12
|
||||
ADD r8, r8, r7, LSL #1
|
||||
|
||||
STORE:
|
||||
STR r8, [r0], #4
|
||||
BGT LOOP1
|
||||
B EXIT
|
||||
|
||||
|
||||
NO_NOISE:
|
||||
|
||||
LOOP2:
|
||||
LDR r12, [r0, #0]
|
||||
LDRSH r7, [r1], #2
|
||||
LDRSH r9, [r1], #2
|
||||
LDRH r10, [r3], #4
|
||||
|
||||
SMULWB r7, r12, r7
|
||||
SUBS r9, r9, r5
|
||||
RSBMI r9, r9, #0
|
||||
|
||||
MOVMI r12, r7, ASR r9
|
||||
MOVPL r12, r7, LSL r9
|
||||
|
||||
MOV r7, r10, LSL #16
|
||||
|
||||
CMP r6, #0
|
||||
QADDEQ r12, r12, r7
|
||||
QSUBNE r12, r12, r7
|
||||
|
||||
SUBS r4, r4, #1
|
||||
STR r12, [r0], #4
|
||||
BGT LOOP2
|
||||
|
||||
EXIT:
|
||||
LDMFD sp!, {r4-r12}
|
||||
BX lr
|
||||
|
||||
|
||||
825
decoder/armv7/ixheaacd_imdct_using_fft.s
Normal file
825
decoder/armv7/ixheaacd_imdct_using_fft.s
Normal file
|
|
@ -0,0 +1,825 @@
|
|||
@/******************************************************************************
|
||||
@ *
|
||||
@ * Copyright (C) 2018 The Android Open Source Project
|
||||
@ *
|
||||
@ * Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ * you may not use this file except in compliance with the License.
|
||||
@ * You may obtain a copy of the License at:
|
||||
@ *
|
||||
@ * http://www.apache.org/licenses/LICENSE-2.0
|
||||
@ *
|
||||
@ * Unless required by applicable law or agreed to in writing, software
|
||||
@ * distributed under the License is distributed on an "AS IS" BASIS,
|
||||
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
@ * See the License for the specific language governing permissions and
|
||||
@ * limitations under the License.
|
||||
@ *
|
||||
@ *****************************************************************************
|
||||
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
@*/
|
||||
|
||||
|
||||
.text
|
||||
.p2align 2
|
||||
|
||||
.global ixheaacd_imdct_using_fft_armv7
|
||||
ixheaacd_imdct_using_fft_armv7:
|
||||
|
||||
STMFD sp!, {r4-r12, lr}
|
||||
vpush {d8-d15}
|
||||
|
||||
@ LDR r4, [sp, #0x68]
|
||||
@ LDR r5, [sp, #0x68+4]
|
||||
@ LDR r6, [sp, #0x68+8]
|
||||
@ LDR r7, [sp, #0x68+12]
|
||||
|
||||
LDR r8, =11600
|
||||
ADD r4, r0, r8
|
||||
LDR r8, =11856
|
||||
ADD r5, r0, r8
|
||||
LDR r8, =11920
|
||||
ADD r6, r0, r8
|
||||
LDR r8, =11936
|
||||
ADD r7, r0, r8
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
COND_1: CMP r1, #0x400
|
||||
BNE COND_2
|
||||
|
||||
MOV r8, #4
|
||||
B RADIX_4_FIRST_START
|
||||
|
||||
|
||||
COND_2: CMP r1, #0x200
|
||||
BNE COND_3
|
||||
|
||||
MOV r8, #3
|
||||
MOV r4, r5
|
||||
B RADIX_8_FIRST_START
|
||||
|
||||
COND_3: CMP r1, #0x100
|
||||
BNE COND_4
|
||||
|
||||
MOV r8, #3
|
||||
MOV r4, r5
|
||||
B RADIX_4_FIRST_START
|
||||
|
||||
COND_4: CMP r1, #0x80
|
||||
BNE COND_5
|
||||
|
||||
MOV r8, #2
|
||||
MOV r4, r6
|
||||
B RADIX_8_FIRST_START
|
||||
|
||||
COND_5: CMP r1, #0x40
|
||||
BNE COND_6
|
||||
|
||||
MOV r8, #2
|
||||
MOV r4, r6
|
||||
B RADIX_4_FIRST_START
|
||||
COND_6:
|
||||
|
||||
MOV r8, #1
|
||||
MOV r4, r7
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
RADIX_8_FIRST_START:
|
||||
|
||||
|
||||
LSR r9 , r1, #5
|
||||
LSL r1, r1, #1
|
||||
|
||||
RADIX_8_FIRST_LOOP:
|
||||
|
||||
MOV r5 , r2
|
||||
MOV r6 , r2
|
||||
MOV r7 , r2
|
||||
MOV r11 , r2
|
||||
|
||||
LDRB r12, [r4, #0]
|
||||
ADD r5, r5, r12, LSL #3
|
||||
VLD2.32 {d0[0], d2[0]}, [r5] , r1
|
||||
ADD r5, r5, r1
|
||||
VLD2.32 {d8[0], d10[0]}, [r5] , r1
|
||||
SUB r5, r5, r1, LSL #1
|
||||
VLD2.32 {d4[0], d6[0]}, [r5] , r1
|
||||
ADD r5, r5, r1
|
||||
VLD2.32 {d12[0], d14[0]}, [r5], r1
|
||||
SUB r5, r5, r1, LSL #2
|
||||
|
||||
LDRB r12, [r4, #1]
|
||||
ADD r6, r6, r12, LSL #3
|
||||
VLD2.32 {d0[1], d2[1]}, [r6] , r1
|
||||
ADD r6, r6, r1
|
||||
VLD2.32 {d8[1], d10[1]}, [r6] , r1
|
||||
SUB r6, r6, r1, LSL #1
|
||||
VLD2.32 {d4[1], d6[1]}, [r6] , r1
|
||||
ADD r6, r6, r1
|
||||
VLD2.32 {d12[1], d14[1]}, [r6], r1
|
||||
SUB r6, r6, r1, LSL #2
|
||||
|
||||
|
||||
LDRB r12, [r4, #2]
|
||||
ADD r7, r7, r12 , LSL #3
|
||||
VLD2.32 {d1[0], d3[0]}, [r7] , r1
|
||||
ADD r7, r7, r1
|
||||
VLD2.32 {d9[0], d11[0]}, [r7] , r1
|
||||
SUB r7, r7, r1, LSL #1
|
||||
|
||||
LDRB r12, [r4, #3]
|
||||
ADD r11, r11, r12 , LSL #3
|
||||
VLD2.32 {d1[1], d3[1]}, [r11] , r1
|
||||
ADD r11, r11, r1
|
||||
VLD2.32 {d9[1], d11[1]}, [r11] , r1
|
||||
SUB r11, r11, r1, LSL #1
|
||||
|
||||
|
||||
|
||||
VADD.I32 q8, q0, q4
|
||||
VLD2.32 {d5[0], d7[0]}, [r7] , r1
|
||||
ADD r7, r7, r1
|
||||
|
||||
VSUB.I32 q9, q0, q4
|
||||
VLD2.32 {d13[0], d15[0]}, [r7], r1
|
||||
SUB r7, r7, r1, LSL #2
|
||||
|
||||
|
||||
|
||||
|
||||
VADD.I32 q0, q1, q5
|
||||
VLD2.32 {d5[1], d7[1]}, [r11] , r1
|
||||
ADD r11, r11, r1
|
||||
|
||||
VSUB.I32 q4, q1, q5
|
||||
VLD2.32 {d13[1], d15[1]}, [r11], r1
|
||||
SUB r11, r11, r1, LSL #2
|
||||
|
||||
|
||||
|
||||
ADD r4, r4, #4
|
||||
|
||||
ADD r5, r5, r1, LSR #1
|
||||
ADD r6, r6, r1, LSR #1
|
||||
ADD r7, r7, r1, LSR #1
|
||||
ADD r11, r11, r1, LSR #1
|
||||
|
||||
|
||||
VADD.I32 q1, q2, q6
|
||||
VLD2.32 {d28[0], d30[0]}, [r5] , r1
|
||||
|
||||
|
||||
VSUB.I32 q5, q2, q6
|
||||
VLD2.32 {d20[0], d22[0]}, [r5] , r1
|
||||
|
||||
|
||||
VADD.I32 q2, q3, q7
|
||||
VLD2.32 {d24[0], d26[0]}, [r5] , r1
|
||||
|
||||
|
||||
VSUB.I32 q6, q3, q7
|
||||
VLD2.32 {d28[1], d30[1]}, [r6] , r1
|
||||
|
||||
VADD.S32 q3, q9, q6
|
||||
VLD2.32 {d20[1], d22[1]}, [r6] , r1
|
||||
|
||||
VSUB.S32 q7, q9, q6
|
||||
VLD2.32 {d24[1], d26[1]}, [r6] , r1
|
||||
|
||||
VSUB.S32 q6, q4, q5
|
||||
VLD2.32 {d29[0], d31[0]}, [r7] , r1
|
||||
|
||||
VADD.S32 q9, q4, q5
|
||||
VLD2.32 {d21[0], d23[0]}, [r7] , r1
|
||||
|
||||
VADD.S32 q4, q8, q1
|
||||
VLD2.32 {d25[0], d27[0]}, [r7] , r1
|
||||
|
||||
VSUB.S32 q5, q8, q1
|
||||
VLD2.32 {d29[1], d31[1]}, [r11] , r1
|
||||
|
||||
VADD.S32 q8, q0, q2
|
||||
VLD2.32 {d21[1], d23[1]}, [r11] , r1
|
||||
|
||||
VSUB.S32 q0, q0, q2
|
||||
VLD2.32 {d25[1], d27[1]}, [r11] , r1
|
||||
|
||||
|
||||
VPUSH {q3}
|
||||
VPUSH {q7}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
VLD2.32 {d2[0], d4[0]}, [r5], r1
|
||||
|
||||
VADD.I32 q7, q14, q12
|
||||
|
||||
VLD2.32 {d2[1], d4[1]}, [r6] , r1
|
||||
|
||||
VSUB.I32 q3, q14, q12
|
||||
|
||||
VLD2.32 {d3[0], d5[0]}, [r7] , r1
|
||||
|
||||
VADD.I32 q14, q15, q13
|
||||
|
||||
VLD2.32 {d3[1], d5[1]}, [r11] , r1
|
||||
|
||||
VSUB.I32 q12, q15, q13
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
VADD.I32 q15, q10, q1
|
||||
VSUB.I32 q13, q10, q1
|
||||
VADD.I32 q10, q11, q2
|
||||
VSUB.I32 q1, q11, q2
|
||||
|
||||
|
||||
|
||||
VADD.S32 q11, q7, q15
|
||||
VSUB.S32 q2, q7, q15
|
||||
VADD.S32 q7, q14, q10
|
||||
VSUB.S32 q15, q14, q10
|
||||
|
||||
VADD.S32 q14, q3, q12
|
||||
VSUB.S32 q10, q3, q12
|
||||
VADD.S32 q3, q13, q1
|
||||
VSUB.S32 q12, q13, q1
|
||||
|
||||
VADD.S32 q1 , q14, q12
|
||||
VSUB.S32 q13, q14, q12
|
||||
VSUB.S32 q12, q3, q10
|
||||
|
||||
VUZP.16 d2, d3
|
||||
VADD.S32 q14, q3, q10
|
||||
|
||||
VUZP.16 d26, d27
|
||||
VADD.S32 q3, q4, q11
|
||||
|
||||
VUZP.16 d24, d25
|
||||
VSUB.S32 q10, q4, q11
|
||||
|
||||
VUZP.16 d28, d29
|
||||
VADD.S32 q4, q8, q7
|
||||
|
||||
LDR r14, =0x5a82
|
||||
|
||||
VSUB.S32 q11, q8, q7
|
||||
|
||||
VADD.S32 q8, q5, q15
|
||||
VSUB.S32 q7, q5, q15
|
||||
VSUB.S32 q5, q0, q2
|
||||
VADD.S32 q15, q0, q2
|
||||
|
||||
VPOP {q0}
|
||||
VPOP {q2}
|
||||
VPUSH {q3-q4}
|
||||
VPUSH {q10}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
VDUP.16 d20, r14
|
||||
|
||||
|
||||
VMULL.u16 q4, d26, d20
|
||||
VMULL.u16 q3, d28, d20
|
||||
|
||||
VPUSH {q7-q8}
|
||||
VPUSH {q5}
|
||||
|
||||
VSHR.S32 q4, q4, #15
|
||||
VSHR.S32 q3, q3, #15
|
||||
|
||||
VQDMLAL.S16 q4, d27, d20
|
||||
VQDMLAL.S16 q3, d29, d20
|
||||
|
||||
|
||||
VPUSH {q11}
|
||||
|
||||
VMULL.u16 q13, d24, d20
|
||||
VMULL.u16 q14, d2, d20
|
||||
|
||||
VADD.S32 q5, q2, q4
|
||||
VSUB.S32 q7, q2, q4
|
||||
|
||||
VADD.S32 q8, q6, q3
|
||||
VSUB.S32 q6, q6, q3
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
VSHR.S32 q13, q13, #15
|
||||
VSHR.S32 q14, q14, #15
|
||||
|
||||
VQDMLAL.S16 q13, d25, d20
|
||||
VQDMLAL.S16 q14, d3, d20
|
||||
|
||||
VPOP {q1}
|
||||
VPOP {q10}
|
||||
|
||||
VADD.S32 q2, q0, q13
|
||||
VSUB.S32 q4, q0, q13
|
||||
|
||||
VADD.S32 q11, q9, q14
|
||||
VSUB.S32 q3, q9, q14
|
||||
|
||||
|
||||
|
||||
|
||||
VPOP {q14}
|
||||
VPOP {q9}
|
||||
VPOP {q0}
|
||||
VPOP {q12, q13}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
VTRN.32 q12, q5
|
||||
VSHL.S32 q12, q12, #3
|
||||
VTRN.32 q9, q2
|
||||
VSHL.S32 q5, q5, #3
|
||||
|
||||
VSHL.S32 q9, q9, #3
|
||||
VTRN.32 q0, q7
|
||||
VSHL.S32 q2, q2, #3
|
||||
|
||||
VSHL.S32 q0, q0, #3
|
||||
VTRN.32 q14, q4
|
||||
VSHL.S32 q7, q7, #3
|
||||
|
||||
VSHL.S32 q14, q14, #3
|
||||
VTRN.32 q13, q6
|
||||
VSHL.S32 q4, q4, #3
|
||||
|
||||
VSHL.S32 q13, q13, #3
|
||||
VTRN.32 q10, q3
|
||||
VSHL.S32 q6, q6, #3
|
||||
|
||||
VSHL.S32 q10, q10, #3
|
||||
VTRN.32 q1, q8
|
||||
VSHL.S32 q3, q3, #3
|
||||
|
||||
VSHL.S32 q1, q1, #3
|
||||
VTRN.32 q15, q11
|
||||
VSHL.S32 q8, q8, #3
|
||||
|
||||
VSHL.S32 q15, q15, #3
|
||||
VSWP d18, d25
|
||||
|
||||
VSHL.S32 q11, q11, #3
|
||||
VSWP d4, d11
|
||||
|
||||
VSWP d1, d28
|
||||
VSWP d15, d8
|
||||
|
||||
VSWP d20, d27
|
||||
VSWP d6, d13
|
||||
|
||||
VSWP d30, d3
|
||||
VSWP d22, d17
|
||||
|
||||
VST2.32 {q12, q13}, [r3]!
|
||||
VST2.32 {q0, q1}, [r3]!
|
||||
|
||||
VST2.32 {q5, q6}, [r3]!
|
||||
VST2.32 {q7, q8}, [r3]!
|
||||
|
||||
VMOV q5, q11
|
||||
|
||||
VST2.32 {q9, q10}, [r3]!
|
||||
VST2.32 {q14, q15}, [r3]!
|
||||
|
||||
VST2.32 {q2, q3}, [r3]!
|
||||
VST2.32 {q4, q5}, [r3]!
|
||||
|
||||
|
||||
SUBS r9, r9, #1
|
||||
BNE RADIX_8_FIRST_LOOP
|
||||
|
||||
LSR r1, r1, #1
|
||||
SUB r3, r1, LSL #3
|
||||
|
||||
MOV r5, #8
|
||||
MOV r4, #32
|
||||
LSR r6, r1, #5
|
||||
|
||||
B RADIX_4_FIRST_ENDS
|
||||
|
||||
RADIX_8_FIRST_ENDS:
|
||||
|
||||
|
||||
RADIX_4_FIRST_START:
|
||||
|
||||
|
||||
LSR r9 , r1, #4
|
||||
LSL r1, r1, #1
|
||||
|
||||
RADIX_4_LOOP:
|
||||
|
||||
MOV r5 , r2
|
||||
MOV r6 , r2
|
||||
MOV r7 , r2
|
||||
MOV r11 , r2
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
LDRB r12, [r4, #0]
|
||||
ADD r5, r5, r12, LSL #3
|
||||
|
||||
VLD2.32 {d0[0], d2[0]}, [r5] , r1
|
||||
ADD r5, r5, r1
|
||||
VLD2.32 {d8[0], d10[0]}, [r5] , r1
|
||||
SUB r5, r5, r1, LSL #1
|
||||
VLD2.32 {d4[0], d6[0]}, [r5] , r1
|
||||
ADD r5, r5, r1
|
||||
VLD2.32 {d12[0], d14[0]}, [r5], r1
|
||||
|
||||
LDRB r12, [r4, #1]
|
||||
ADD r6, r6, r12, LSL #3
|
||||
|
||||
VLD2.32 {d0[1], d2[1]}, [r6] , r1
|
||||
ADD r6, r6, r1
|
||||
VLD2.32 {d8[1], d10[1]}, [r6] , r1
|
||||
SUB r6, r6, r1, LSL #1
|
||||
VLD2.32 {d4[1], d6[1]}, [r6] , r1
|
||||
ADD r6, r6, r1
|
||||
VLD2.32 {d12[1], d14[1]}, [r6], r1
|
||||
|
||||
|
||||
LDRB r12, [r4, #2]
|
||||
ADD r7, r7, r12, LSL #3
|
||||
|
||||
VLD2.32 {d1[0], d3[0]}, [r7] , r1
|
||||
ADD r7, r7, r1
|
||||
VLD2.32 {d9[0], d11[0]}, [r7] , r1
|
||||
|
||||
LDRB r12, [r4, #3]
|
||||
ADD r11, r11, r12 , LSL #3
|
||||
|
||||
VLD2.32 {d1[1], d3[1]}, [r11] , r1
|
||||
ADD r11, r11, r1
|
||||
VLD2.32 {d9[1], d11[1]}, [r11] , r1
|
||||
|
||||
|
||||
SUB r7, r7, r1, LSL #1
|
||||
VADD.S32 q8, q0, q4
|
||||
VLD2.32 {d5[0], d7[0]}, [r7] , r1
|
||||
ADD r7, r7, r1
|
||||
VADD.S32 q9, q1, q5
|
||||
VLD2.32 {d13[0], d15[0]}, [r7], r1
|
||||
|
||||
|
||||
|
||||
SUB r11, r11, r1, LSL #1
|
||||
VSUB.S32 q10, q0, q4
|
||||
VLD2.32 {d5[1], d7[1]}, [r11] , r1
|
||||
ADD r11, r11, r1
|
||||
VSUB.S32 q11, q1, q5
|
||||
VLD2.32 {d13[1], d15[1]}, [r11], r1
|
||||
|
||||
|
||||
ADD r4, r4, #4
|
||||
|
||||
VADD.S32 q12, q2, q6
|
||||
VADD.S32 q13, q3, q7
|
||||
VSUB.S32 q14, q2, q6
|
||||
VSUB.S32 q15, q3, q7
|
||||
|
||||
VADD.S32 q0, q8, q12
|
||||
VADD.S32 q1, q9, q13
|
||||
VSUB.S32 q2, q8, q12
|
||||
VSUB.S32 q3, q9, q13
|
||||
|
||||
VADD.S32 q4, q10, q15
|
||||
VSUB.S32 q5, q11, q14
|
||||
VADD.S32 q7, q11, q14
|
||||
VSUB.S32 q6, q10, q15
|
||||
|
||||
|
||||
|
||||
|
||||
VTRN.32 q0, q4
|
||||
|
||||
VSHL.S32 q0, q0, #2
|
||||
VTRN.32 q2, q6
|
||||
VSHL.S32 q4, q4, #2
|
||||
|
||||
VSHL.S32 q2, q2, #2
|
||||
VTRN.32 q1, q5
|
||||
VSHL.S32 q6, q6, #2
|
||||
|
||||
VSHL.S32 q1, q1, #2
|
||||
VTRN.32 q3, q7
|
||||
VSHL.S32 q5, q5, #2
|
||||
|
||||
VSHL.S32 q3, q3, #2
|
||||
VSWP d4, d1
|
||||
|
||||
VSHL.S32 q7, q7, #2
|
||||
VSWP d12, d9
|
||||
|
||||
|
||||
|
||||
VSWP d6, d3
|
||||
VSWP d14, d11
|
||||
|
||||
|
||||
VST2.32 {q0, q1}, [r3]!
|
||||
VST2.32 {q4, q5}, [r3]!
|
||||
|
||||
VST2.32 {q2, q3}, [r3]!
|
||||
VST2.32 {q6, q7}, [r3]!
|
||||
|
||||
|
||||
|
||||
SUBS r9, r9, #1
|
||||
BNE RADIX_4_LOOP
|
||||
|
||||
LSR r1, r1, #1
|
||||
SUB r3, r1, LSL #3
|
||||
MOV r5, #4
|
||||
MOV r4, #64
|
||||
LSR r6, r1, #4
|
||||
|
||||
|
||||
RADIX_4_FIRST_ENDS:
|
||||
|
||||
PUSH {r3}
|
||||
LSR r5, r5, #2
|
||||
LDR r14, =8528
|
||||
ADD r0, r0, r14
|
||||
|
||||
OUTER_LOOP_R4:
|
||||
|
||||
|
||||
LDR r14, [sp]
|
||||
|
||||
MOV r7, r5
|
||||
MOV r2, #0
|
||||
MOV r9, r0
|
||||
LSL r12 , r5, #5
|
||||
MIDDLE_LOOP_R4:
|
||||
|
||||
|
||||
VLD2.16 {d0[0], d1[0]}, [r9], r2
|
||||
VLD2.16 {d2[0], d3[0]}, [r9], r2
|
||||
ADD r11, r2, r4, LSL #2
|
||||
VLD2.16 {d4[0], d5[0]}, [r9]
|
||||
ADD r10, r0, r11
|
||||
|
||||
|
||||
VLD2.16 {d0[1], d1[1]}, [r10], r11
|
||||
VLD2.16 {d2[1], d3[1]}, [r10], r11
|
||||
ADD r2, r11, r4, LSL #2
|
||||
VLD2.16 {d4[1], d5[1]}, [r10]
|
||||
ADD r9, r0, r2
|
||||
|
||||
|
||||
VLD2.16 {d0[2], d1[2]}, [r9], r2
|
||||
VLD2.16 {d2[2], d3[2]}, [r9], r2
|
||||
ADD r11, r2, r4, LSL #2
|
||||
VLD2.16 {d4[2], d5[2]}, [r9]
|
||||
ADD r10, r0, r11
|
||||
|
||||
|
||||
|
||||
VLD2.16 {d0[3], d1[3]}, [r10], r11
|
||||
VLD2.16 {d2[3], d3[3]}, [r10], r11
|
||||
ADD r2, r11, r4, LSL #2
|
||||
VLD2.16 {d4[3], d5[3]}, [r10]
|
||||
ADD r9, r0, r2
|
||||
|
||||
MOV r10, r6
|
||||
|
||||
|
||||
|
||||
INNER_LOOP_R4:
|
||||
|
||||
VLD2.32 {q3, q4}, [r14], r12
|
||||
|
||||
VSHR.S32 q3, q3, #1
|
||||
VLD4.16 {q5, q6}, [r14], r12
|
||||
VSHR.S32 q4, q4, #1
|
||||
|
||||
VSHR.U16 d10, d10, #1
|
||||
VLD4.16 {q7, q8}, [r14], r12
|
||||
VSHR.U16 d12, d12, #1
|
||||
|
||||
VMULL.S16 q11, d10, d0
|
||||
VMLSL.S16 q11, d12, d1
|
||||
VLD4.16 {q9, q10}, [r14], r12
|
||||
VMULL.S16 q12, d10, d1
|
||||
VMLAL.S16 q12, d12, d0
|
||||
|
||||
VSHR.U16 d14, d14, #1
|
||||
VSHR.U16 d16, d16, #1
|
||||
|
||||
SUB r14, r14, r12, LSL #2
|
||||
|
||||
VSHR.U16 d18, d18, #1
|
||||
VSHR.U16 d20, d20, #1
|
||||
|
||||
VMULL.S16 q13, d14, d2
|
||||
VMLSL.S16 q13, d16, d3
|
||||
|
||||
VSHR.S32 q11, q11, #15
|
||||
|
||||
VMULL.S16 q14, d14, d3
|
||||
VMLAL.S16 q14, d16, d2
|
||||
|
||||
VMULL.S16 q15, d18, d4
|
||||
VMLSL.S16 q15, d20, d5
|
||||
|
||||
VMLAL.S16 q11, d11, d0
|
||||
VMLSL.S16 q11, d13, d1
|
||||
|
||||
VSHR.S32 q12, q12, #15
|
||||
VSHR.S32 q13, q13, #15
|
||||
VSHR.S32 q14, q14, #15
|
||||
VSHR.S32 q15, q15, #15
|
||||
|
||||
|
||||
VMLAL.S16 q12, d11, d1
|
||||
VMLAL.S16 q12, d13, d0
|
||||
|
||||
|
||||
VMULL.S16 q5, d18, d5
|
||||
VMLAL.S16 q5, d20, d4
|
||||
|
||||
|
||||
VMLAL.S16 q13, d15, d2
|
||||
VMLSL.S16 q13, d17, d3
|
||||
|
||||
VMLAL.S16 q14, d15, d3
|
||||
VMLAL.S16 q14, d17, d2
|
||||
|
||||
|
||||
VMLAL.S16 q15, d19, d4
|
||||
VMLSL.S16 q15, d21, d5
|
||||
|
||||
VSHR.S32 q5, q5, #15
|
||||
|
||||
VMLAL.S16 q5, d19, d5
|
||||
VMLAL.S16 q5, d21, d4
|
||||
|
||||
|
||||
|
||||
CMP r7, r5
|
||||
BNE BYPASS_IF
|
||||
|
||||
ADD r14, r14, r12
|
||||
|
||||
LDR r3, [r14], r12
|
||||
ASR r3, r3, #1
|
||||
VMOV.32 d22[0], r3
|
||||
|
||||
LDR r3, [r14], r12
|
||||
ASR r3, r3, #1
|
||||
VMOV.32 d26[0], r3
|
||||
|
||||
LDR r3, [r14]
|
||||
ASR r3, r3, #1
|
||||
VMOV.32 d30[0], r3
|
||||
|
||||
SUB r14, r14, r12, LSL #1
|
||||
ADD r14, r14, #4
|
||||
|
||||
LDR r3, [r14], r12
|
||||
ASR r3, r3, #1
|
||||
VMOV.32 d24[0], r3
|
||||
|
||||
LDR r3, [r14], r12
|
||||
ASR r3, r3, #1
|
||||
VMOV.32 d28[0], r3
|
||||
|
||||
LDR r3, [r14], r12
|
||||
ASR r3, r3, #1
|
||||
VMOV.32 d10[0], r3
|
||||
|
||||
SUB r14, r14, #4
|
||||
|
||||
SUB r14, r14, r12, LSL #2
|
||||
|
||||
BYPASS_IF:
|
||||
|
||||
VADD.S32 q6, q3, q13
|
||||
VADD.S32 q7, q4, q14
|
||||
VSUB.S32 q3, q3, q13
|
||||
VSUB.S32 q4, q4, q14
|
||||
VADD.S32 q8, q11, q15
|
||||
VADD.S32 q9, q12, q5
|
||||
|
||||
VSUB.S32 q15, q11, q15
|
||||
VSUB.S32 q14, q12, q5
|
||||
|
||||
VADD.S32 q10, q6, q8
|
||||
VADD.S32 q11, q7, q9
|
||||
VADD.S32 q12, q3, q14
|
||||
VSUB.S32 q13, q4, q15
|
||||
|
||||
VSUB.S32 q6, q6, q8
|
||||
VST2.32 {q10, q11}, [r14], r12
|
||||
VSUB.S32 q7, q7, q9
|
||||
|
||||
VSUB.S32 q8, q3, q14
|
||||
VST2.32 {q12, q13}, [r14], r12
|
||||
VADD.S32 q9, q4, q15
|
||||
|
||||
|
||||
VST2.32 {q6, q7}, [r14], r12
|
||||
VST2.32 {q8, q9}, [r14], r12
|
||||
|
||||
|
||||
|
||||
|
||||
SUBS r10, r10, #1
|
||||
BNE INNER_LOOP_R4
|
||||
|
||||
SUB r14, r14, r1, LSL #3
|
||||
ADD r14, r14, #32
|
||||
|
||||
SUBS r7, r7, #1
|
||||
BNE MIDDLE_LOOP_R4
|
||||
|
||||
|
||||
|
||||
|
||||
LSR r4, r4, #2
|
||||
LSL r5, r5, #2
|
||||
LSR r6, r6, #2
|
||||
SUBS r8, r8, #1
|
||||
BNE OUTER_LOOP_R4
|
||||
END_LOOPS:
|
||||
POP {r3}
|
||||
vpop {d8-d15}
|
||||
LDMFD sp!, {r4-r12, pc}
|
||||
|
||||
163
decoder/armv7/ixheaacd_inv_dit_fft_8pt.s
Normal file
163
decoder/armv7/ixheaacd_inv_dit_fft_8pt.s
Normal file
|
|
@ -0,0 +1,163 @@
|
|||
@/******************************************************************************
|
||||
@ *
|
||||
@ * Copyright (C) 2018 The Android Open Source Project
|
||||
@ *
|
||||
@ * Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ * you may not use this file except in compliance with the License.
|
||||
@ * You may obtain a copy of the License at:
|
||||
@ *
|
||||
@ * http://www.apache.org/licenses/LICENSE-2.0
|
||||
@ *
|
||||
@ * Unless required by applicable law or agreed to in writing, software
|
||||
@ * distributed under the License is distributed on an "AS IS" BASIS,
|
||||
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
@ * See the License for the specific language governing permissions and
|
||||
@ * limitations under the License.
|
||||
@ *
|
||||
@ *****************************************************************************
|
||||
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
@*/
|
||||
|
||||
|
||||
.text
|
||||
.p2align 2
|
||||
.global ixheaacd_inv_dit_fft_8pt_armv7
|
||||
|
||||
ixheaacd_inv_dit_fft_8pt_armv7:
|
||||
|
||||
|
||||
STMFD sp!, {r4-r12, lr}
|
||||
LDR r3, [r0, #0]
|
||||
LDR r4, [r0, #0x20]
|
||||
LDR r5, [r0, #0x24]
|
||||
QADD r12, r3, r4
|
||||
LDR r6, [r0, #0x30]
|
||||
QSUB r8, r3, r4
|
||||
LDR r3, [r0, #4]
|
||||
LDR r9, [r0, #0x34]
|
||||
QADD r4, r3, r5
|
||||
SUB sp, sp, #0x14
|
||||
QSUB r5, r3, r5
|
||||
LDR lr, [r0, #0x10]
|
||||
LDR r3, [r0, #0x14]
|
||||
QADD r10, lr, r6
|
||||
QSUB r6, lr, r6
|
||||
QADD r7, r3, r9
|
||||
QSUB r9, r3, r9
|
||||
|
||||
|
||||
QADD r3, r12, r10
|
||||
QSUB lr, r12, r10
|
||||
QADD r12, r4, r7
|
||||
QSUB r7, r4, r7
|
||||
QSUB r4, r8, r9
|
||||
|
||||
STR r7, [sp, #8]
|
||||
QADD r7, r8, r9
|
||||
QADD r8, r5, r6
|
||||
STR r7, [sp, #0xc]
|
||||
QSUB r5, r5, r6
|
||||
|
||||
STMIA sp, {r8, lr}
|
||||
STR r5, [sp, #0x10]
|
||||
|
||||
|
||||
|
||||
LDR r5, [r0, #8]
|
||||
LDR lr, [r0, #0x28]
|
||||
LDR r9, [r0, #0x2c]
|
||||
QADD r7, r5, lr
|
||||
LDR r11, [r0, #0x38]
|
||||
LDR r6, [r0, #0xc]
|
||||
QSUB r5, r5, lr
|
||||
LDR lr, [r0, #0x18]
|
||||
QADD r8, r6, r9
|
||||
QSUB r6, r6, r9
|
||||
|
||||
|
||||
|
||||
QADD r10, lr, r11
|
||||
QSUB r9, lr, r11
|
||||
LDR r11, [r0, #0x1c]
|
||||
LDR r0, [r0, #0x3c]
|
||||
|
||||
MOV lr, r11
|
||||
QADD r11, r11, r0
|
||||
QSUB r0, lr, r0
|
||||
|
||||
|
||||
QADD lr, r7, r10
|
||||
QSUB r10, r7, r10
|
||||
QADD r7, r8, r11
|
||||
QSUB r11, r8, r11
|
||||
|
||||
QSUB r8, r5, r0
|
||||
QADD r5, r5, r0
|
||||
QADD r0, r6, r9
|
||||
QSUB r6, r6, r9
|
||||
|
||||
|
||||
QADD r9, r3, lr
|
||||
QSUB r3, r3, lr
|
||||
STR r9, [r1, #0]
|
||||
|
||||
QADD r9, r12, r7
|
||||
LDR lr, [sp, #4]
|
||||
STR r9, [r2, #0]
|
||||
QSUB r9, r12, r7
|
||||
|
||||
|
||||
QSUB r12, lr, r11
|
||||
QADD r11, lr, r11
|
||||
LDR lr, [sp, #8]
|
||||
STR r11, [r1, #0x10]
|
||||
QADD r7, lr, r10
|
||||
QSUB r10, lr, r10
|
||||
|
||||
LDR r11, =0x00005a82
|
||||
STR r10, [r2, #0x10]
|
||||
|
||||
QSUB r10, r8, r0
|
||||
QADD r0, r8, r0
|
||||
SMULWB r10, r10, r11
|
||||
SMULWB r0, r0, r11
|
||||
MOV r10, r10, LSL #1
|
||||
|
||||
QADD r8, r4, r10
|
||||
LDR lr, [sp, #0]
|
||||
|
||||
STR r8, [r1, #4]
|
||||
MOV r0, r0, LSL #1
|
||||
QADD r8, lr, r0
|
||||
|
||||
QSUB r4, r4, r10
|
||||
STR r8, [r2, #4]
|
||||
QSUB r0, lr, r0
|
||||
|
||||
QADD r12, r12, r4
|
||||
QADD r0, r7, r0
|
||||
STR r12, [r1, #8]
|
||||
STR r0, [r2, #8]
|
||||
|
||||
QADD r0, r5, r6
|
||||
LDR r7, [sp, #0xc]
|
||||
SMULWB r0, r0, r11
|
||||
|
||||
QSUB r12, r5, r6
|
||||
MOV r0, r0, LSL #1
|
||||
SMULWB r12, r12, r11
|
||||
LDR r5, [sp, #0x10]
|
||||
QSUB r4, r7, r0
|
||||
MOV r12, r12, LSL #1
|
||||
QADD r10, r5, r12
|
||||
QADD r3, r3, r4
|
||||
QADD lr, r9, r10
|
||||
QADD r0, r7, r0
|
||||
QSUB r10, r5, r12
|
||||
STR r3, [r1, #0xc]
|
||||
STR lr, [r2, #0xc]
|
||||
STR r0, [r1, #0x14]
|
||||
STR r10, [r2, #0x14]
|
||||
ADD sp, sp, #0x14
|
||||
LDMFD sp!, {r4-r12, pc}
|
||||
|
||||
113
decoder/armv7/ixheaacd_lap1.s
Normal file
113
decoder/armv7/ixheaacd_lap1.s
Normal file
|
|
@ -0,0 +1,113 @@
|
|||
@/******************************************************************************
|
||||
@ *
|
||||
@ * Copyright (C) 2018 The Android Open Source Project
|
||||
@ *
|
||||
@ * Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ * you may not use this file except in compliance with the License.
|
||||
@ * You may obtain a copy of the License at:
|
||||
@ *
|
||||
@ * http://www.apache.org/licenses/LICENSE-2.0
|
||||
@ *
|
||||
@ * Unless required by applicable law or agreed to in writing, software
|
||||
@ * distributed under the License is distributed on an "AS IS" BASIS,
|
||||
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
@ * See the License for the specific language governing permissions and
|
||||
@ * limitations under the License.
|
||||
@ *
|
||||
@ *****************************************************************************
|
||||
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
@*/
|
||||
|
||||
|
||||
.text
|
||||
.p2align 2
|
||||
|
||||
.global ixheaacd_overlap_out_copy_armv7
|
||||
.global ixheaacd_spec_to_overlapbuf_armv7
|
||||
.global ixheaacd_overlap_buf_out_armv7
|
||||
|
||||
ixheaacd_overlap_buf_out_armv7:
|
||||
|
||||
STMFD sp!, {r4-r5}
|
||||
MOV r3, r3, lsl #1
|
||||
|
||||
OUTSAMPLE_LOOP:
|
||||
|
||||
LDR r4, [r1], #4
|
||||
LDR r5, [r1], #4
|
||||
SUBS r2, r2, #2
|
||||
|
||||
QADD16 r4, r4, r4
|
||||
QADD16 r5, r5, r5
|
||||
|
||||
STRH r4, [r0], r3
|
||||
STRH r5, [r0], r3
|
||||
|
||||
BGT OUTSAMPLE_LOOP
|
||||
|
||||
LDMFD sp!, {r4-r5}
|
||||
BX lr
|
||||
|
||||
|
||||
ixheaacd_overlap_out_copy_armv7:
|
||||
|
||||
STMFD sp!, {r4-r9, r14}
|
||||
MOV r9, #32
|
||||
MOV r8, r1
|
||||
MOV r3, r3, LSL #1
|
||||
|
||||
|
||||
OUT_OVERLAP_LOOP:
|
||||
LDR r4, [r1], #4
|
||||
LDR r5, [r1], #4
|
||||
SUBS r9, r9, #1
|
||||
|
||||
QADD16 r4, r4, r4
|
||||
QADD16 r5, r5, r5
|
||||
|
||||
LDR r6, [r2], #4
|
||||
LDR r7, [r2], #4
|
||||
|
||||
STRH r4, [r0], r3
|
||||
STRH r5, [r0], r3
|
||||
|
||||
STR r6, [r8], #4
|
||||
STR r7, [r8], #4
|
||||
|
||||
BGT OUT_OVERLAP_LOOP
|
||||
|
||||
LDMFD sp!, {r4-r9, r15}
|
||||
|
||||
|
||||
|
||||
|
||||
ixheaacd_spec_to_overlapbuf_armv7:
|
||||
|
||||
STMFD sp!, {r4-r10, r14}
|
||||
|
||||
MOV r6, #1
|
||||
RSB r2, r2, #16
|
||||
AND r2, r2, #0xFF
|
||||
SUB r7, r2, #1
|
||||
LSL r14, r6, r7
|
||||
MOV r3, r3, ASR #1
|
||||
|
||||
OVERLAP_LOOP1:
|
||||
|
||||
|
||||
LDMIA r1!, {r4-r5}
|
||||
SUBS r3, r3, #1
|
||||
|
||||
QADD r4, r4, r14
|
||||
QADD r5, r5, r14
|
||||
|
||||
MOV r4, r4, ASR r2
|
||||
MOV r5, r5, ASR r2
|
||||
|
||||
STR r4, [r0], #4
|
||||
STR r5, [r0], #4
|
||||
|
||||
|
||||
BGT OVERLAP_LOOP1
|
||||
LDMFD sp!, {r4-r10, pc}
|
||||
|
||||
694
decoder/armv7/ixheaacd_mps_complex_fft_64_asm.s
Normal file
694
decoder/armv7/ixheaacd_mps_complex_fft_64_asm.s
Normal file
|
|
@ -0,0 +1,694 @@
|
|||
.text
|
||||
.p2align 2
|
||||
.global ixheaacd_mps_complex_fft_64_asm
|
||||
|
||||
ixheaacd_mps_complex_fft_64_asm:
|
||||
@LDR r4,[sp]
|
||||
STMFD sp!, {r0-r12, lr}
|
||||
LDR r4, [sp, #0x38]
|
||||
SUB sp, sp, #0x28
|
||||
@ LDR r4,[sp,#0x30]
|
||||
LDR r0, [sp, #0x2c]
|
||||
@LDR r12,[sp,#0x5c+4]
|
||||
EOR r0, r0, r0, ASR #31
|
||||
CLZ r0, r0
|
||||
SUB r12, r0, #16 @dig_rev_shift = norm32(npoints) + 1 -16@
|
||||
SUB r0, r0, #1
|
||||
RSB r0, r0, #0x1e
|
||||
AND r1, r0, #1
|
||||
STR r1, [sp, #0x14]
|
||||
MOV r1, r0, ASR #1
|
||||
LDR r0, [sp, #0x2c] @npoints
|
||||
STR r1, [sp, #-4]!
|
||||
MOV lr, r0, LSL #1 @(npoints >>1) * 4
|
||||
MOV r0, #0
|
||||
MOV r12, r4
|
||||
FIRST_STAGE_R4:
|
||||
LDRB r10, [r12, r0, LSR #2]
|
||||
|
||||
|
||||
ADD r1, r2, r10, LSL #2
|
||||
LDRD r4, [r1] @r4=x0r, r5=x0i
|
||||
ADD r1, r1, lr
|
||||
LDRD r8, [r1] @r8=x1r, r9=x1i
|
||||
ADD r1, r1, lr
|
||||
LDRD r6, [r1] @r6=x2r, r7=x2i
|
||||
ADD r1, r1, lr
|
||||
LDRD r10, [r1] @r10=x3r, r11=x3i
|
||||
ADD r0, r0, #4
|
||||
CMP r0, lr, ASR #1
|
||||
|
||||
ADD r4, r4, r6 @x0r = x0r + x2r@
|
||||
ADD r5, r5, r7 @x0i = x0i + x2i@
|
||||
SUB r6, r4, r6, lsl#1 @x2r = x0r - (x2r << 1)@
|
||||
SUB r7, r5, r7, lsl#1 @x2i = x0i - (x2i << 1)@
|
||||
ADD r8, r8, r10 @x1r = x1r + x3r@
|
||||
ADD r9, r9, r11 @x1i = x1i + x3i@
|
||||
SUB r1, r8, r10, lsl#1 @x3r = x1r - (x3r << 1)@
|
||||
SUB r11, r9, r11, lsl#1 @x3i = x1i - (x3i << 1)@
|
||||
|
||||
ADD r4, r4, r8 @x0r = x0r + x1r@
|
||||
ADD r5, r5, r9 @x0i = x0i + x1i@
|
||||
SUB r8, r4, r8, lsl#1 @x1r = x0r - (x1r << 1)@
|
||||
SUB r9, r5, r9, lsl#1 @x1i = x0i - (x1i << 1)
|
||||
ADD r6, r6, r11 @x2r = x2r + x3i@
|
||||
SUB r7, r7, r1 @x2i = x2i - x3r@
|
||||
SUB r10, r6, r11, lsl#1 @x3i = x2r - (x3i << 1)@
|
||||
ADD r11, r7, r1, lsl#1 @x3r = x2i + (x3r << 1)@
|
||||
|
||||
STMIA r3!, {r4-r11}
|
||||
BLT FIRST_STAGE_R4
|
||||
LDR r1, [sp], #4
|
||||
LDR r0, [sp, #0x2c]
|
||||
MOV r12, #0x40 @nodespacing = 64@
|
||||
STR r12, [sp, #0x1c]
|
||||
LDR r12, [sp, #0x2c]
|
||||
SUB r3, r3, r0, LSL #3
|
||||
SUBS r1, r1, #1
|
||||
STR r3, [sp, #0x34]
|
||||
MOV r4, r12, ASR #4
|
||||
MOV r0, #4
|
||||
STR r4, [sp, #0x18]
|
||||
STR r1, [sp, #0x20]
|
||||
BLE EXIT
|
||||
OUTER_LOOP:
|
||||
LDR r1, [sp, #0x28]
|
||||
LDR r12, [sp, #0x34] @WORD32 *data = ptr_y@
|
||||
STR r1, [sp, #0x10]
|
||||
LDR r1, [sp, #0x18]
|
||||
|
||||
MOV r0, r0, LSL #3 @(del<<1) * 4
|
||||
LOOP_TRIVIAL_TWIDDLE:
|
||||
LDRD r4, [r12] @r4=x0r, r5=x0i
|
||||
ADD r12, r12, r0
|
||||
LDRD r6, [r12] @r6=x1r, r7=x1i
|
||||
ADD r12, r12, r0
|
||||
LDRD r8, [r12] @r8=x2r, r9=x2i
|
||||
ADD r12, r12, r0
|
||||
LDRD r10, [r12] @r10=x3r, r11=x3i
|
||||
|
||||
@MOV r4,r4,ASR #1
|
||||
@MOV r5,r5,ASR #1
|
||||
@MOV r6,r6,ASR #1
|
||||
@MOV r7,r7,ASR #1
|
||||
@MOV r8,r8,ASR #1
|
||||
@MOV r9,r9,ASR #1
|
||||
@MOV r10,r10,ASR #1
|
||||
@MOV r11,r11,ASR #1
|
||||
|
||||
ADD r4, r4, r8 @x0r = x0r + x2r@
|
||||
ADD r5, r5, r9 @x0i = x0i + x2i@
|
||||
SUB r8, r4, r8, lsl #1 @x2r = x0r - (x2r << 1)@
|
||||
SUB r9, r5, r9, lsl #1 @x2i = x0i - (x2i << 1)@
|
||||
ADD r6, r6, r10 @x1r = x1r + x3r@
|
||||
ADD r7, r7, r11 @x1i = x1i + x3i@
|
||||
SUB r2, r6, r10, lsl #1 @x3r = x1r - (x3r << 1)@
|
||||
SUB r11, r7, r11, lsl #1 @x3i = x1i - (x3i << 1)@
|
||||
|
||||
ADD r4, r4, r6 @x0r = x0r + x1r@
|
||||
ADD r5, r5, r7 @x0i = x0i + x1i@
|
||||
@MOV r4,r4,ASR #1
|
||||
@MOV r5,r5,ASR #1
|
||||
SUB r6, r4, r6, lsl #1 @x1r = x0r - (x1r << 1)@
|
||||
SUB r7, r5, r7, lsl #1 @x1i = x0i - (x1i << 1)
|
||||
ADD r8, r8, r11 @x2r = x2r + x3i@
|
||||
SUB r9, r9, r2 @x2i = x2i - x3r@
|
||||
SUB r10, r8, r11, lsl#1 @x3i = x2r - (x3i << 1)@
|
||||
ADD r11, r9, r2, lsl#1 @x3r = x2i + (x3r << 1)
|
||||
|
||||
STRD r10, [r12] @r10=x3r, r11=x3i
|
||||
SUB r12, r12, r0
|
||||
STRD r6, [r12] @r6=x1r, r7=x1i
|
||||
SUB r12, r12, r0
|
||||
STRD r8, [r12] @r8=x2r, r9=x2i
|
||||
SUB r12, r12, r0
|
||||
STRD r4, [r12] @r4=x0r, r5=x0i
|
||||
ADD r12, r12, r0, lsl #2
|
||||
|
||||
SUBS r1, r1, #1
|
||||
BNE LOOP_TRIVIAL_TWIDDLE
|
||||
|
||||
MOV r0, r0, ASR #3
|
||||
LDR r4, [sp, #0x1c]
|
||||
LDR r3, [sp, #0x34]
|
||||
MUL r1, r0, r4
|
||||
ADD r12, r3, #8
|
||||
STR r1, [sp, #0x24]
|
||||
MOV r3, r1, ASR #2
|
||||
ADD r3, r3, r1, ASR #3
|
||||
SUB r3, r3, r1, ASR #4
|
||||
ADD r3, r3, r1, ASR #5
|
||||
SUB r3, r3, r1, ASR #6
|
||||
ADD r3, r3, r1, ASR #7
|
||||
SUB r3, r3, r1, ASR #8
|
||||
STR r3, [sp, #-4]!
|
||||
SECOND_LOOP:
|
||||
LDR r3, [sp, #0x10+4]
|
||||
LDR r14, [sp, #0x18+4]
|
||||
MOV r0, r0, LSL #3 @(del<<1) * 4
|
||||
LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@
|
||||
LDR r2, [r3, #4] @w1l = *(twiddles + 2*j + 1)@
|
||||
LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@
|
||||
LDR r6, [r3, #4] @w2l = *(twiddles + 2*(j<<1) + 1)@
|
||||
LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@
|
||||
LDR r8, [r3, #4] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@
|
||||
|
||||
STR r4, [sp, #8+4]
|
||||
STR r1, [sp, #-4]
|
||||
STR r2, [sp, #-8]
|
||||
STR r5, [sp, #-12]
|
||||
STR r6, [sp, #-16]
|
||||
STR r7, [sp, #-20]
|
||||
STR r8, [sp, #-24]
|
||||
|
||||
RADIX4_BFLY:
|
||||
|
||||
LDRD r6, [r12, r0]! @r6=x1r, r7=x1i
|
||||
LDRD r8, [r12, r0]! @r8=x2r, r9=x2i
|
||||
LDRD r10, [r12, r0] @r10=x3r, r11=x3i
|
||||
SUBS r14, r14, #1
|
||||
|
||||
LDR r1, [sp, #-4]
|
||||
LDR r2, [sp, #-8]
|
||||
|
||||
SMULL r3, r4, r6, r2 @ixheaacd_mult32(x1r,w1l)
|
||||
LSR r3, r3, #31
|
||||
ORR r4, r3, r4, LSL#1
|
||||
SMULL r3, r6, r6, r1 @mult32x16hin32(x1r,W1h)
|
||||
LSR r3, r3, #31
|
||||
ORR r6, r3, r6, LSL#1
|
||||
SMULL r3, r5, r7, r1 @mult32x16hin32(x1i,W1h)
|
||||
LSR r3, r3, #31
|
||||
ORR r5, r3, r5, LSL#1
|
||||
SMULL r3, r7, r7, r2 @ixheaacd_mac32(ixheaacd_mult32(x1r,w1h) ,x1i,w1l)
|
||||
LSR r3, r3, #31
|
||||
ORR r7, r3, r7, LSL#1
|
||||
ADD r7, r7, r6
|
||||
SUB r6, r4, r5 @
|
||||
|
||||
LDR r1, [sp, #-12]
|
||||
LDR r2, [sp, #-16]
|
||||
|
||||
SMULL r3, r4, r8, r2 @ixheaacd_mult32(x2r,w2l)
|
||||
LSR r3, r3, #31
|
||||
ORR r4, r3, r4, LSL#1
|
||||
SMULL r3, r8, r8, r1 @mult32x16hin32(x2r,W2h)
|
||||
LSR r3, r3, #31
|
||||
ORR r8, r3, r8, LSL#1
|
||||
SMULL r3, r5, r9, r1 @mult32x16hin32(x2i,W2h)
|
||||
LSR r3, r3, #31
|
||||
ORR r5, r3, r5, LSL#1
|
||||
SMULL r3, r9, r9, r2 @ixheaacd_mac32(ixheacd_mult32(x1r,w1h) ,x1i,w1l)
|
||||
LSR r3, r3, #31
|
||||
ORR r9, r3, r9, LSL#1
|
||||
ADD r9, r9, r8
|
||||
SUB r8, r4, r5 @
|
||||
|
||||
LDR r1, [sp, #-20]
|
||||
LDR r2, [sp, #-24]
|
||||
|
||||
SMULL r3, r4, r10, r2 @ixheaacd_mult32(x3r,w3l)
|
||||
LSR r3, r3, #31
|
||||
ORR r4, r3, r4, LSL#1
|
||||
SMULL r3, r10, r10, r1 @mult32x16hin32(x3r,W3h)
|
||||
LSR r3, r3, #31
|
||||
ORR r10, r3, r10, LSL#1
|
||||
SMULL r3, r5, r11, r1 @mult32x16hin32(x3i,W3h)
|
||||
LSR r3, r3, #31
|
||||
ORR r5, r3, r5, LSL#1
|
||||
SMULL r3, r11, r11, r2 @ixheaacd_mac32(ixheacd_mult32(x3r,w3h) ,x3i,w3l)
|
||||
LSR r3, r3, #31
|
||||
ORR r11, r3, r11, LSL#1
|
||||
ADD r11, r11, r10
|
||||
SUB r10, r4, r5 @
|
||||
|
||||
@SUB r12,r12,r0,lsl #1
|
||||
@LDRD r4,[r12] @r4=x0r, r5=x0i
|
||||
LDR r4, [r12, -r0, lsl #1]! @
|
||||
LDR r5, [r12, #4]
|
||||
|
||||
|
||||
ADD r4, r8, r4 @x0r = x0r + x2r@
|
||||
ADD r5, r9, r5 @x0i = x0i + x2i@
|
||||
SUB r8, r4, r8, lsl#1 @x2r = x0r - (x2r << 1)@
|
||||
SUB r9, r5, r9, lsl#1 @x2i = x0i - (x2i << 1)@
|
||||
ADD r6, r6, r10 @x1r = x1r + x3r@
|
||||
ADD r7, r7, r11 @x1i = x1i + x3i@
|
||||
SUB r10, r6, r10, lsl#1 @x3r = x1r - (x3r << 1)@
|
||||
SUB r11, r7, r11, lsl#1 @x3i = x1i - (x3i << 1)@
|
||||
|
||||
ADD r4, r4, r6 @x0r = x0r + x1r@
|
||||
ADD r5, r5, r7 @x0i = x0i + x1i@
|
||||
SUB r6, r4, r6, lsl#1 @x1r = x0r - (x1r << 1)@
|
||||
SUB r7, r5, r7, lsl#1 @x1i = x0i - (x1i << 1)
|
||||
STRD r4, [r12] @r4=x0r, r5=x0i
|
||||
ADD r12, r12, r0
|
||||
|
||||
ADD r8, r8, r11 @x2r = x2r + x3i@
|
||||
SUB r9, r9, r10 @x2i = x2i - x3r@
|
||||
SUB r4, r8, r11, lsl#1 @x3i = x2r - (x3i << 1)@
|
||||
ADD r5, r9, r10, lsl#1 @x3r = x2i + (x3r << 1)
|
||||
|
||||
STRD r8, [r12] @r8=x2r, r9=x2i
|
||||
ADD r12, r12, r0
|
||||
STRD r6, [r12] @r6=x1r, r7=x1i
|
||||
ADD r12, r12, r0
|
||||
STRD r4, [r12] @r10=x3r, r11=x3i
|
||||
ADD r12, r12, r0
|
||||
|
||||
BNE RADIX4_BFLY
|
||||
MOV r0, r0, ASR #3
|
||||
|
||||
LDR r1, [sp, #0x2c+4]
|
||||
LDR r4, [sp, #8+4]
|
||||
SUB r1, r12, r1, LSL #3
|
||||
LDR r6, [sp, #0x1c+4]
|
||||
ADD r12, r1, #8
|
||||
LDR r7, [sp, #0]
|
||||
ADD r4, r4, r6
|
||||
CMP r4, r7
|
||||
BLE SECOND_LOOP
|
||||
|
||||
SECOND_LOOP_2:
|
||||
LDR r3, [sp, #0x10+4]
|
||||
LDR r14, [sp, #0x18+4]
|
||||
MOV r0, r0, LSL #3 @(del<<1) * 4
|
||||
|
||||
LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@
|
||||
LDR r2, [r3, #4] @w1l = *(twiddles + 2*j + 1)@
|
||||
LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@
|
||||
LDR r6, [r3, #4] @w2l = *(twiddles + 2*(j<<1) + 1)@
|
||||
SUB r3, r3, #2048 @ 512 *4
|
||||
LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@
|
||||
LDR r8, [r3, #4] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@
|
||||
|
||||
STR r4, [sp, #8+4]
|
||||
|
||||
STR r1, [sp, #-4]
|
||||
STR r2, [sp, #-8]
|
||||
STR r5, [sp, #-12]
|
||||
STR r6, [sp, #-16]
|
||||
STR r7, [sp, #-20]
|
||||
STR r8, [sp, #-24]
|
||||
|
||||
RADIX4_BFLY_2:
|
||||
LDRD r6, [r12, r0]! @r6=x1r, r7=x1i
|
||||
LDRD r8, [r12, r0]! @r8=x2r, r9=x2i
|
||||
LDRD r10, [r12, r0] @r10=x3r, r11=x3i
|
||||
SUBS r14, r14, #1
|
||||
LDR r1, [sp, #-4]
|
||||
LDR r2, [sp, #-8]
|
||||
|
||||
SMULL r3, r4, r6, r2 @ixheaacd_mult32(x1r,w1l)
|
||||
LSR r3, r3, #31
|
||||
ORR r4, r3, r4, LSL#1
|
||||
SMULL r3, r6, r6, r1 @mult32x16hin32(x1r,W1h)
|
||||
LSR r3, r3, #31
|
||||
ORR r6, r3, r6, LSL#1
|
||||
SMULL r3, r5, r7, r1 @mult32x16hin32(x1i,W1h)
|
||||
LSR r3, r3, #31
|
||||
ORR r5, r3, r5, LSL#1
|
||||
SMULL r3, r7, r7, r2 @ixheaacd_mac32(ixheaacd_mult32(x1r,w1h) ,x1i,w1l)
|
||||
LSR r3, r3, #31
|
||||
ORR r7, r3, r7, LSL#1
|
||||
ADD r7, r7, r6
|
||||
SUB r6, r4, r5 @
|
||||
|
||||
LDR r1, [sp, #-12]
|
||||
LDR r2, [sp, #-16]
|
||||
|
||||
SMULL r3, r4, r8, r2 @ixheaacd_mult32(x2r,w2l)
|
||||
LSR r3, r3, #31
|
||||
ORR r4, r3, r4, LSL#1
|
||||
SMULL r3, r8, r8, r1 @mult32x16hin32(x2r,W2h)
|
||||
LSR r3, r3, #31
|
||||
ORR r8, r3, r8, LSL#1
|
||||
SMULL r3, r5, r9, r1 @mult32x16hin32(x2i,W2h)
|
||||
LSR r3, r3, #31
|
||||
ORR r5, r3, r5, LSL#1
|
||||
SMULL r3, r9, r9, r2 @ixheaacd_mac32(ixheacd_mult32(x1r,w1h) ,x1i,w1l)
|
||||
LSR r3, r3, #31
|
||||
ORR r9, r3, r9, LSL#1
|
||||
ADD r9, r9, r8
|
||||
SUB r8, r4, r5 @
|
||||
|
||||
LDR r1, [sp, #-20]
|
||||
LDR r2, [sp, #-24]
|
||||
|
||||
SMULL r3, r4, r10, r2 @ixheaacd_mult32(x3r,w3l)
|
||||
LSR r3, r3, #31
|
||||
ORR r4, r3, r4, LSL#1
|
||||
SMULL r3, r10, r10, r1 @mult32x16hin32(x3r,W3h)
|
||||
LSR r3, r3, #31
|
||||
ORR r10, r3, r10, LSL#1
|
||||
SMULL r3, r5, r11, r1 @mult32x16hin32(x3i,W3h)
|
||||
LSR r3, r3, #31
|
||||
ORR r5, r3, r5, LSL#1
|
||||
SMULL r3, r11, r11, r2 @ixheaacd_mac32(ixheacd_mult32(x3r,w3h) ,x3i,w3l)
|
||||
LSR r3, r3, #31
|
||||
ORR r11, r3, r11, LSL#1
|
||||
ADD r10, r11, r10
|
||||
SUB r11, r5, r4 @
|
||||
|
||||
@SUB r12,r12,r0,lsl #1
|
||||
@LDRD r4,[r12] @r4=x0r, r5=x0i
|
||||
LDR r4, [r12, -r0, lsl #1]! @
|
||||
LDR r5, [r12, #4]
|
||||
|
||||
|
||||
ADD r4, r8, r4 @x0r = x0r + x2r@
|
||||
ADD r5, r9, r5 @x0i = x0i + x2i@
|
||||
SUB r8, r4, r8, lsl#1 @x2r = x0r - (x2r << 1)@
|
||||
SUB r9, r5, r9, lsl#1 @x2i = x0i - (x2i << 1)@
|
||||
ADD r6, r6, r10 @x1r = x1r + x3r@
|
||||
ADD r7, r7, r11 @x1i = x1i + x3i@
|
||||
SUB r10, r6, r10, lsl#1 @x3r = x1r - (x3r << 1)@
|
||||
SUB r11, r7, r11, lsl#1 @x3i = x1i - (x3i << 1)@
|
||||
|
||||
ADD r4, r4, r6 @x0r = x0r + x1r@
|
||||
ADD r5, r5, r7 @x0i = x0i + x1i@
|
||||
SUB r6, r4, r6, lsl#1 @x1r = x0r - (x1r << 1)@
|
||||
SUB r7, r5, r7, lsl#1 @x1i = x0i - (x1i << 1)
|
||||
STRD r4, [r12] @r4=x0r, r5=x0i
|
||||
ADD r12, r12, r0
|
||||
|
||||
ADD r8, r8, r11 @x2r = x2r + x3i@
|
||||
SUB r9, r9, r10 @x2i = x2i - x3r@
|
||||
SUB r4, r8, r11, lsl#1 @x3i = x2r - (x3i << 1)@
|
||||
ADD r5, r9, r10, lsl#1 @x3r = x2i + (x3r << 1)
|
||||
|
||||
STRD r8, [r12] @r8=x2r, r9=x2i
|
||||
ADD r12, r12, r0
|
||||
STRD r6, [r12] @r6=x1r, r7=x1i
|
||||
ADD r12, r12, r0
|
||||
STRD r4, [r12] @r10=x3r, r11=x3i
|
||||
ADD r12, r12, r0
|
||||
|
||||
BNE RADIX4_BFLY_2
|
||||
MOV r0, r0, ASR #3
|
||||
|
||||
LDR r1, [sp, #0x2c+4]
|
||||
LDR r4, [sp, #8+4]
|
||||
SUB r1, r12, r1, LSL #3
|
||||
LDR r6, [sp, #0x1c+4]
|
||||
ADD r12, r1, #8
|
||||
LDR r7, [sp, #0x24+4]
|
||||
ADD r4, r4, r6
|
||||
CMP r4, r7, ASR #1
|
||||
BLE SECOND_LOOP_2
|
||||
LDR r7, [sp, #0]
|
||||
CMP r4, r7, LSL #1
|
||||
BGT SECOND_LOOP_4
|
||||
|
||||
SECOND_LOOP_3:
|
||||
LDR r3, [sp, #0x10+4]
|
||||
LDR r14, [sp, #0x18+4]
|
||||
MOV r0, r0, LSL #3 @(del<<1) * 4
|
||||
|
||||
LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@
|
||||
LDR r2, [r3, #4] @w1l = *(twiddles + 2*j + 1)@
|
||||
SUB r3, r3, #2048 @ 512 *4
|
||||
LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@
|
||||
LDR r6, [r3, #4] @w2l = *(twiddles + 2*(j<<1) + 1)@
|
||||
LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@
|
||||
LDR r8, [r3, #4] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@
|
||||
|
||||
STR r4, [sp, #8+4]
|
||||
STR r1, [sp, #-4]
|
||||
STR r2, [sp, #-8]
|
||||
STR r5, [sp, #-12]
|
||||
STR r6, [sp, #-16]
|
||||
STR r7, [sp, #-20]
|
||||
STR r8, [sp, #-24]
|
||||
|
||||
|
||||
RADIX4_BFLY_3:
|
||||
LDRD r6, [r12, r0]! @r6=x1r, r7=x1i
|
||||
LDRD r8, [r12, r0]! @r8=x2r, r9=x2i
|
||||
LDRD r10, [r12, r0] @r10=x3r, r11=x3i
|
||||
SUBS r14, r14, #1
|
||||
|
||||
LDR r1, [sp, #-4]
|
||||
LDR r2, [sp, #-8]
|
||||
|
||||
SMULL r3, r4, r6, r2 @ixheaacd_mult32(x1r,w1l)
|
||||
LSR r3, r3, #31
|
||||
ORR r4, r3, r4, LSL#1
|
||||
SMULL r3, r6, r6, r1 @mult32x16hin32(x1r,W1h)
|
||||
LSR r3, r3, #31
|
||||
ORR r6, r3, r6, LSL#1
|
||||
SMULL r3, r5, r7, r1 @mult32x16hin32(x1i,W1h)
|
||||
LSR r3, r3, #31
|
||||
ORR r5, r3, r5, LSL#1
|
||||
SMULL r3, r7, r7, r2 @ixheaacd_mac32(ixheaacd_mult32(x1r,w1h) ,x1i,w1l)
|
||||
LSR r3, r3, #31
|
||||
ORR r7, r3, r7, LSL#1
|
||||
ADD r7, r7, r6
|
||||
SUB r6, r4, r5 @
|
||||
|
||||
LDR r1, [sp, #-12]
|
||||
LDR r2, [sp, #-16]
|
||||
|
||||
SMULL r3, r4, r8, r2 @ixheaacd_mult32(x2r,w2l)
|
||||
LSR r3, r3, #31
|
||||
ORR r4, r3, r4, LSL#1
|
||||
SMULL r3, r8, r8, r1 @mult32x16hin32(x2r,W2h)
|
||||
LSR r3, r3, #31
|
||||
ORR r8, r3, r8, LSL#1
|
||||
SMULL r3, r5, r9, r1 @mult32x16hin32(x2i,W2h)
|
||||
LSR r3, r3, #31
|
||||
ORR r5, r3, r5, LSL#1
|
||||
SMULL r3, r9, r9, r2 @ixheaacd_mac32(ixheacd_mult32(x1r,w1h) ,x1i,w1l)
|
||||
LSR r3, r3, #31
|
||||
ORR r9, r3, r9, LSL#1
|
||||
ADD r8, r9, r8
|
||||
SUB r9, r5, r4 @
|
||||
|
||||
LDR r1, [sp, #-20]
|
||||
LDR r2, [sp, #-24]
|
||||
|
||||
SMULL r3, r4, r10, r2 @ixheaacd_mult32(x3r,w3l)
|
||||
LSR r3, r3, #31
|
||||
ORR r4, r3, r4, LSL#1
|
||||
SMULL r3, r10, r10, r1 @mult32x16hin32(x3r,W3h)
|
||||
LSR r3, r3, #31
|
||||
ORR r10, r3, r10, LSL#1
|
||||
SMULL r3, r5, r11, r1 @mult32x16hin32(x3i,W3h)
|
||||
LSR r3, r3, #31
|
||||
ORR r5, r3, r5, LSL#1
|
||||
SMULL r3, r11, r11, r2 @ixheaacd_mac32(ixheacd_mult32(x3r,w3h) ,x3i,w3l)
|
||||
LSR r3, r3, #31
|
||||
ORR r11, r3, r11, LSL#1
|
||||
ADD r10, r11, r10
|
||||
SUB r11, r5, r4 @
|
||||
|
||||
@SUB r12,r12,r0,lsl #1
|
||||
@LDRD r4,[r12] @r4=x0r, r5=x0i
|
||||
LDR r4, [r12, -r0, lsl #1]! @
|
||||
LDR r5, [r12, #4]
|
||||
|
||||
|
||||
ADD r4, r8, r4 @x0r = x0r + x2r@
|
||||
ADD r5, r9, r5 @x0i = x0i + x2i@
|
||||
SUB r8, r4, r8, lsl#1 @x2r = x0r - (x2r << 1)@
|
||||
SUB r9, r5, r9, lsl#1 @x2i = x0i - (x2i << 1)@
|
||||
ADD r6, r6, r10 @x1r = x1r + x3r@
|
||||
ADD r7, r7, r11 @x1i = x1i + x3i@
|
||||
SUB r10, r6, r10, lsl#1 @x3r = x1r - (x3r << 1)@
|
||||
SUB r11, r7, r11, lsl#1 @x3i = x1i - (x3i << 1)@
|
||||
|
||||
ADD r4, r4, r6 @x0r = x0r + x1r@
|
||||
ADD r5, r5, r7 @x0i = x0i + x1i@
|
||||
SUB r6, r4, r6, lsl#1 @x1r = x0r - (x1r << 1)@
|
||||
SUB r7, r5, r7, lsl#1 @x1i = x0i - (x1i << 1)
|
||||
STRD r4, [r12] @r4=x0r, r5=x0i
|
||||
ADD r12, r12, r0
|
||||
|
||||
ADD r8, r8, r11 @x2r = x2r + x3i@
|
||||
SUB r9, r9, r10 @x2i = x2i - x3r@
|
||||
SUB r4, r8, r11, lsl#1 @x3i = x2r - (x3i << 1)@
|
||||
ADD r5, r9, r10, lsl#1 @x3r = x2i + (x3r << 1)
|
||||
|
||||
STRD r8, [r12] @r8=x2r, r9=x2i
|
||||
ADD r12, r12, r0
|
||||
STRD r6, [r12] @r6=x1r, r7=x1i
|
||||
ADD r12, r12, r0
|
||||
STRD r4, [r12] @r10=x3r, r11=x3i
|
||||
ADD r12, r12, r0
|
||||
|
||||
BNE RADIX4_BFLY_3
|
||||
MOV r0, r0, ASR #3
|
||||
|
||||
LDR r1, [sp, #0x2c+4]
|
||||
LDR r4, [sp, #8+4]
|
||||
SUB r1, r12, r1, LSL #3
|
||||
LDR r6, [sp, #0x1c+4]
|
||||
ADD r12, r1, #8
|
||||
LDR r7, [sp, #0]
|
||||
ADD r4, r4, r6
|
||||
CMP r4, r7, LSL #1
|
||||
BLE SECOND_LOOP_3
|
||||
|
||||
SECOND_LOOP_4:
|
||||
LDR r3, [sp, #0x10+4]
|
||||
LDR r14, [sp, #0x18+4]
|
||||
MOV r0, r0, LSL #3 @(del<<1) * 4
|
||||
|
||||
LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@
|
||||
LDR r2, [r3, #4] @w1l = *(twiddles + 2*j + 1)@
|
||||
SUB r3, r3, #2048 @ 512 *4
|
||||
LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@
|
||||
LDR r6, [r3, #4] @w2l = *(twiddles + 2*(j<<1) + 1)@
|
||||
SUB r3, r3, #2048 @ 512 *4
|
||||
LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@
|
||||
LDR r8, [r3, #4] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@
|
||||
|
||||
|
||||
STR r4, [sp, #8+4]
|
||||
STR r1, [sp, #-4]
|
||||
STR r2, [sp, #-8]
|
||||
STR r5, [sp, #-12]
|
||||
STR r6, [sp, #-16]
|
||||
STR r7, [sp, #-20]
|
||||
STR r8, [sp, #-24]
|
||||
|
||||
RADIX4_BFLY_4:
|
||||
LDRD r6, [r12, r0]! @r6=x1r, r7=x1i
|
||||
LDRD r8, [r12, r0]! @r8=x2r, r9=x2i
|
||||
LDRD r10, [r12, r0] @r10=x3r, r11=x3i
|
||||
SUBS r14, r14, #1
|
||||
|
||||
LDR r1, [sp, #-4]
|
||||
LDR r2, [sp, #-8]
|
||||
|
||||
SMULL r3, r4, r6, r2 @ixheaacd_mult32(x1r,w1l)
|
||||
LSR r3, r3, #31
|
||||
ORR r4, r3, r4, LSL#1
|
||||
SMULL r3, r6, r6, r1 @mult32x16hin32(x1r,W1h)
|
||||
LSR r3, r3, #31
|
||||
ORR r6, r3, r6, LSL#1
|
||||
SMULL r3, r5, r7, r1 @mult32x16hin32(x1i,W1h)
|
||||
LSR r3, r3, #31
|
||||
ORR r5, r3, r5, LSL#1
|
||||
SMULL r3, r7, r7, r2 @ixheaacd_mac32(ixheaacd_mult32(x1r,w1h) ,x1i,w1l)
|
||||
LSR r3, r3, #31
|
||||
ORR r7, r3, r7, LSL#1
|
||||
ADD r7, r7, r6
|
||||
SUB r6, r4, r5 @
|
||||
|
||||
LDR r1, [sp, #-12]
|
||||
LDR r2, [sp, #-16]
|
||||
|
||||
SMULL r3, r4, r8, r2 @ixheaacd_mult32(x2r,w2l)
|
||||
LSR r3, r3, #31
|
||||
ORR r4, r3, r4, LSL#1
|
||||
SMULL r3, r8, r8, r1 @mult32x16hin32(x2r,W2h)
|
||||
LSR r3, r3, #31
|
||||
ORR r8, r3, r8, LSL#1
|
||||
SMULL r3, r5, r9, r1 @mult32x16hin32(x2i,W2h)
|
||||
LSR r3, r3, #31
|
||||
ORR r5, r3, r5, LSL#1
|
||||
SMULL r3, r9, r9, r2 @ixheaacd_mac32(ixheacd_mult32(x1r,w1h) ,x1i,w1l)
|
||||
LSR r3, r3, #31
|
||||
ORR r9, r3, r9, LSL#1
|
||||
ADD r8, r9, r8
|
||||
SUB r9, r5, r4 @
|
||||
|
||||
LDR r1, [sp, #-20]
|
||||
LDR r2, [sp, #-24]
|
||||
|
||||
SMULL r3, r4, r10, r2 @ixheaacd_mult32(x3r,w3l)
|
||||
LSR r3, r3, #31
|
||||
ORR r4, r3, r4, LSL#1
|
||||
SMULL r3, r10, r10, r1 @mult32x16hin32(x3r,W3h)
|
||||
LSR r3, r3, #31
|
||||
ORR r10, r3, r10, LSL#1
|
||||
SMULL r3, r5, r11, r1 @mult32x16hin32(x3i,W3h)
|
||||
LSR r3, r3, #31
|
||||
ORR r5, r3, r5, LSL#1
|
||||
SMULL r3, r11, r11, r2 @ixheaacd_mac32(ixheacd_mult32(x3r,w3h) ,x3i,w3l)
|
||||
LSR r3, r3, #31
|
||||
ORR r11, r3, r11, LSL#1
|
||||
ADD r11, r11, r10
|
||||
SUB r10, r5, r4 @
|
||||
|
||||
@SUB r12,r12,r0,lsl #1
|
||||
@LDRD r4,[r12] @r4=x0r, r5=x0i
|
||||
LDR r4, [r12, -r0, lsl #1]! @
|
||||
LDR r5, [r12, #4]
|
||||
|
||||
|
||||
ADD r4, r8, r4 @x0r = x0r + x2r@
|
||||
ADD r5, r9, r5 @x0i = x0i + x2i@
|
||||
SUB r8, r4, r8, lsl#1 @x2r = x0r - (x2r << 1)@
|
||||
SUB r9, r5, r9, lsl#1 @x2i = x0i - (x2i << 1)@
|
||||
ADD r6, r6, r10 @x1r = x1r + x3r@
|
||||
SUB r7, r7, r11 @x1i = x1i - x3i@
|
||||
SUB r10, r6, r10, lsl#1 @x3r = x1r - (x3r << 1)@
|
||||
ADD r11, r7, r11, lsl#1 @x3i = x1i + (x3i << 1)@
|
||||
|
||||
ADD r4, r4, r6 @x0r = x0r + x1r@
|
||||
ADD r5, r5, r7 @x0i = x0i + x1i@
|
||||
SUB r6, r4, r6, lsl#1 @x1r = x0r - (x1r << 1)@
|
||||
SUB r7, r5, r7, lsl#1 @x1i = x0i - (x1i << 1)
|
||||
STRD r4, [r12] @r4=x0r, r5=x0i
|
||||
ADD r12, r12, r0
|
||||
ADD r8, r8, r11 @x2r = x2r + x3i@
|
||||
SUB r9, r9, r10 @x2i = x2i - x3r@
|
||||
SUB r4, r8, r11, lsl#1 @x3i = x2r - (x3i << 1)@
|
||||
ADD r5, r9, r10, lsl#1 @x3r = x2i + (x3r << 1)
|
||||
|
||||
STRD r8, [r12] @r8=x2r, r9=x2i
|
||||
ADD r12, r12, r0
|
||||
STRD r6, [r12] @r6=x1r, r7=x1i
|
||||
ADD r12, r12, r0
|
||||
STRD r4, [r12] @r10=x3r, r11=x3i
|
||||
ADD r12, r12, r0
|
||||
|
||||
BNE RADIX4_BFLY_4
|
||||
MOV r0, r0, ASR #3
|
||||
|
||||
LDR r1, [sp, #0x2c+4]
|
||||
LDR r4, [sp, #8+4]
|
||||
SUB r1, r12, r1, LSL #3
|
||||
LDR r6, [sp, #0x1c+4]
|
||||
ADD r12, r1, #8
|
||||
LDR r7, [sp, #0x24+4]
|
||||
ADD r4, r4, r6
|
||||
CMP r4, r7
|
||||
BLT SECOND_LOOP_4
|
||||
ADD sp, sp, #4
|
||||
|
||||
LDR r1, [sp, #0x1c]
|
||||
MOV r0, r0, LSL #2
|
||||
MOV r1, r1, ASR #2
|
||||
STR r1, [sp, #0x1c]
|
||||
LDR r1, [sp, #0x18]
|
||||
MOV r1, r1, ASR #2
|
||||
STR r1, [sp, #0x18]
|
||||
LDR r1, [sp, #0x20]
|
||||
SUBS r1, r1, #1
|
||||
STR r1, [sp, #0x20]
|
||||
BGT OUTER_LOOP
|
||||
|
||||
LDR r1, [sp, #0x14]
|
||||
CMP r1, #0
|
||||
BEQ EXIT
|
||||
LDR r12, [sp, #0x1c]
|
||||
LDR r1, [sp, #0x28]
|
||||
CMP r12, #0
|
||||
LDRNE r12, [sp, #0x1c]
|
||||
MOVEQ r4, #1
|
||||
MOVNE r4, r12, LSL #1
|
||||
MOVS r3, r0
|
||||
BEQ EXIT
|
||||
|
||||
MOV r3, r3, ASR #1
|
||||
LDR r5, [sp, #0x34]
|
||||
MOV r0, r0, LSL #3 @(del<<1) * 4
|
||||
STR r1, [sp, #-4]
|
||||
|
||||
EXIT:
|
||||
ADD sp, sp, #0x38
|
||||
LDMFD sp!, {r4-r12, pc}
|
||||
|
||||
55
decoder/armv7/ixheaacd_mps_synt_out_calc.s
Normal file
55
decoder/armv7/ixheaacd_mps_synt_out_calc.s
Normal file
|
|
@ -0,0 +1,55 @@
|
|||
@ * limitations under the License.
|
||||
@ *
|
||||
@ *****************************************************************************
|
||||
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
@*/
|
||||
|
||||
|
||||
.text
|
||||
.p2align 2
|
||||
|
||||
.global ixheaacd_mps_synt_out_calc_armv7
|
||||
ixheaacd_mps_synt_out_calc_armv7:
|
||||
|
||||
STMFD sp!, {r4-r12, r14}
|
||||
VPUSH {D8-D15}
|
||||
MOV R6, #3
|
||||
MUL R7, R0, R6
|
||||
ADD R4, R1, R0, LSL #2
|
||||
ADD R5, R2, R7, LSL #2
|
||||
MOV R6, #5
|
||||
LOOP1:
|
||||
MOV R8, R0
|
||||
LOOP2:
|
||||
VLD2.32 {D4, D5}, [R3]!
|
||||
VLD1.32 {D0, D1}, [R2]!
|
||||
VLD1.32 {D2, D3}, [R5]!
|
||||
VLD2.32 {D6, D7}, [R3]!
|
||||
|
||||
|
||||
VMULL.S32 Q4, D0, D4
|
||||
VMULL.S32 Q5, D1, D6
|
||||
VMULL.S32 Q6, D2, D5
|
||||
VMULL.S32 Q7, D3, D7
|
||||
VSHRN.S64 D8, Q4, #31
|
||||
VSHRN.S64 D9, Q5, #31
|
||||
VSHRN.S64 D12, Q6, #31
|
||||
VSHRN.S64 D13, Q7, #31
|
||||
|
||||
|
||||
SUBS R8, R8, #4
|
||||
VST1.32 {D8, D9}, [R1]!
|
||||
VST1.32 {D12, D13}, [R4]!
|
||||
BGT LOOP2
|
||||
SUBS R6, R6, #1
|
||||
ADD R1, R1, R0, LSL #2
|
||||
ADD R4, R4, R0, LSL #2
|
||||
ADD R2, R2, R7, LSL #2
|
||||
ADD R5, R5, R7, LSL #2
|
||||
BGT LOOP1
|
||||
VPOP {D8-D15}
|
||||
LDMFD sp!, {r4-r12, r15}
|
||||
|
||||
|
||||
|
||||
|
||||
65
decoder/armv7/ixheaacd_mps_synt_post_fft_twiddle.s
Normal file
65
decoder/armv7/ixheaacd_mps_synt_post_fft_twiddle.s
Normal file
|
|
@ -0,0 +1,65 @@
|
|||
@/******************************************************************************
|
||||
@ *
|
||||
@ * Copyright (C) 2018 The Android Open Source Project
|
||||
@ *
|
||||
@ * Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ * you may not use this file except in compliance with the License.
|
||||
@ * You may obtain a copy of the License at:
|
||||
@ *
|
||||
@ * http:@www.apache.org/licenses/LICENSE-2.0
|
||||
@ *
|
||||
@ * Unless required by applicable law or agreed to in writing, software
|
||||
@ * distributed under the License is distributed on an "AS IS" BASIS,
|
||||
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
@ * See the License for the specific language governing permissions and
|
||||
@ * limitations under the License.
|
||||
@ *
|
||||
@ *****************************************************************************
|
||||
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
@*/
|
||||
|
||||
|
||||
.text
|
||||
.p2align 2
|
||||
|
||||
.global ixheaacd_mps_synt_post_fft_twiddle_armv7
|
||||
ixheaacd_mps_synt_post_fft_twiddle_armv7:
|
||||
|
||||
STMFD sp!, {r4-r12, r14}
|
||||
VPUSH {D8-D15}
|
||||
LDR R4, [SP, #104]
|
||||
LDR R5, [SP, #108]
|
||||
|
||||
ADD R6, R5, R0, LSL #3
|
||||
LSL R7, R0, #1
|
||||
MOV R8, #-16
|
||||
ADD R6, R6, R8
|
||||
LOOP1:
|
||||
VLD1.32 {D0, D1}, [R1]!
|
||||
VLD1.32 {D2, D3}, [R2]!
|
||||
VLD1.32 {D4, D5}, [R3]!
|
||||
VLD1.32 {D6, D7}, [R4]!
|
||||
|
||||
VMULL.S32 Q4, D0, D4
|
||||
VMULL.S32 Q5, D2, D6
|
||||
VMULL.S32 Q6, D1, D5
|
||||
VMULL.S32 Q7, D3, D7
|
||||
|
||||
VSHRN.S64 D8, Q4, #31
|
||||
VSHRN.S64 D10, Q5, #31
|
||||
VSHRN.S64 D12, Q6, #31
|
||||
VSHRN.S64 D14, Q7, #31
|
||||
|
||||
VQADD.S32 D1, D8, D10
|
||||
VQADD.S32 D0, D12, D14
|
||||
|
||||
VREV64.32 D1, D1
|
||||
VREV64.32 D0, D0
|
||||
|
||||
|
||||
SUBS R7, R7, #4
|
||||
VST1.32 {D0, D1}, [R6], R8
|
||||
|
||||
BGT LOOP1
|
||||
VPOP {D8-D15}
|
||||
LDMFD sp!, {r4-r12, r15}
|
||||
60
decoder/armv7/ixheaacd_mps_synt_post_twiddle.s
Normal file
60
decoder/armv7/ixheaacd_mps_synt_post_twiddle.s
Normal file
|
|
@ -0,0 +1,60 @@
|
|||
@/******************************************************************************
|
||||
@ *
|
||||
@ * Copyright (C) 2018 The Android Open Source Project
|
||||
@ *
|
||||
@ * Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ * you may not use this file except in compliance with the License.
|
||||
@ * You may obtain a copy of the License at:
|
||||
@ *
|
||||
@ * http://www.apache.org/licenses/LICENSE-2.0
|
||||
@ *
|
||||
@ * Unless required by applicable law or agreed to in writing, software
|
||||
@ * distributed under the License is distributed on an "AS IS" BASIS,
|
||||
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
@ * See the License for the specific language governing permissions and
|
||||
@ * limitations under the License.
|
||||
@ *
|
||||
@ *****************************************************************************
|
||||
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
@*/
|
||||
|
||||
|
||||
.text
|
||||
.p2align 2
|
||||
.global ixheaacd_mps_synt_post_twiddle_armv7
|
||||
|
||||
|
||||
ixheaacd_mps_synt_post_twiddle_armv7:
|
||||
|
||||
STMFD sp!, {r4-r12, r14}
|
||||
VPUSH {D8-D15}
|
||||
lsl R3, R3, #1
|
||||
LOOP1:
|
||||
VLD2.32 {D13, D14}, [R2]!
|
||||
VLD2.32 {D15, D16}, [R1]!
|
||||
VLD2.32 {D2, D3}, [R0]
|
||||
VNEG.S32 D12, D2
|
||||
|
||||
VMULL.S32 Q2, D13, D12
|
||||
VMULL.S32 Q3, D13, D3
|
||||
VMULL.S32 Q4, D15, D2
|
||||
VMULL.S32 Q5, D15, D3
|
||||
|
||||
VSHRN.I64 D4, Q2, #31
|
||||
VSHRN.I64 D6, Q3, #31
|
||||
VSHRN.I64 D8, Q4, #31
|
||||
VSHRN.I64 D10, Q5, #31
|
||||
|
||||
VQADD.S32 D0, D8, D6
|
||||
VQADD.S32 D1, D4, D10
|
||||
|
||||
SUBS R3, R3, #4
|
||||
VST2.32 {D0, D1} , [R0]!
|
||||
BGT LOOP1
|
||||
|
||||
VPOP {D8-D15}
|
||||
LDMFD sp!, {r4-r12, r15}
|
||||
|
||||
|
||||
|
||||
|
||||
60
decoder/armv7/ixheaacd_mps_synt_pre_twiddle.s
Normal file
60
decoder/armv7/ixheaacd_mps_synt_pre_twiddle.s
Normal file
|
|
@ -0,0 +1,60 @@
|
|||
@/******************************************************************************
|
||||
@ *
|
||||
@ * Copyright (C) 2018 The Android Open Source Project
|
||||
@ *
|
||||
@ * Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ * you may not use this file except in compliance with the License.
|
||||
@ * You may obtain a copy of the License at:
|
||||
@ *
|
||||
@ * http://www.apache.org/licenses/LICENSE-2.0
|
||||
@ *
|
||||
@ * Unless required by applicable law or agreed to in writing, software
|
||||
@ * distributed under the License is distributed on an "AS IS" BASIS,
|
||||
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
@ * See the License for the specific language governing permissions and
|
||||
@ * limitations under the License.
|
||||
@ *
|
||||
@ *****************************************************************************
|
||||
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
@*/
|
||||
|
||||
|
||||
.text
|
||||
.p2align 2
|
||||
.global ixheaacd_mps_synt_pre_twiddle_armv7
|
||||
|
||||
|
||||
ixheaacd_mps_synt_pre_twiddle_armv7:
|
||||
|
||||
STMFD sp!, {r4-r12, r14}
|
||||
VPUSH {D8-D15}
|
||||
lsl R3, R3, #1
|
||||
LOOP1:
|
||||
VLD1.32 {D0}, [R2]!
|
||||
VLD1.32 {D1}, [R1]!
|
||||
VLD2.32 {D2, D3}, [R0]
|
||||
VNEG.S32 D12, D2
|
||||
|
||||
VMULL.S32 Q2, D0, D12
|
||||
VMULL.S32 Q3, D0, D3
|
||||
VMULL.S32 Q4, D1, D2
|
||||
VMULL.S32 Q5, D1, D3
|
||||
|
||||
VSHRN.I64 D4, Q2, #31
|
||||
VSHRN.I64 D6, Q3, #31
|
||||
VSHRN.I64 D8, Q4, #31
|
||||
VSHRN.I64 D10, Q5, #31
|
||||
|
||||
VQADD.S32 D0, D8, D6
|
||||
VQADD.S32 D1, D4, D10
|
||||
|
||||
SUBS R3, R3, #4
|
||||
VST2.32 {D0, D1} , [R0]!
|
||||
BGT LOOP1
|
||||
|
||||
VPOP {D8-D15}
|
||||
LDMFD sp!, {r4-r12, r15}
|
||||
|
||||
|
||||
|
||||
|
||||
110
decoder/armv7/ixheaacd_no_lap1.s
Normal file
110
decoder/armv7/ixheaacd_no_lap1.s
Normal file
|
|
@ -0,0 +1,110 @@
|
|||
@/******************************************************************************
|
||||
@ *
|
||||
@ * Copyright (C) 2018 The Android Open Source Project
|
||||
@ *
|
||||
@ * Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ * you may not use this file except in compliance with the License.
|
||||
@ * You may obtain a copy of the License at:
|
||||
@ *
|
||||
@ * http://www.apache.org/licenses/LICENSE-2.0
|
||||
@ *
|
||||
@ * Unless required by applicable law or agreed to in writing, software
|
||||
@ * distributed under the License is distributed on an "AS IS" BASIS,
|
||||
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
@ * See the License for the specific language governing permissions and
|
||||
@ * limitations under the License.
|
||||
@ *
|
||||
@ *****************************************************************************
|
||||
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
@*/
|
||||
|
||||
|
||||
.text
|
||||
.p2align 2
|
||||
.global ixheaacd_neg_shift_spec_armv7
|
||||
|
||||
ixheaacd_neg_shift_spec_armv7:
|
||||
STMFD sp!, {R4-R12, R14}
|
||||
VPUSH {D8 - D15}
|
||||
MOV R5, #448
|
||||
SUB R6, R5, #1
|
||||
MOV R6, R6, LSL #2
|
||||
ADD R6, R6, R0
|
||||
MOV R8, #-16
|
||||
SUB R6, R6, #12
|
||||
MOV R7, R3, LSL #1
|
||||
VDUP.32 Q1, R2
|
||||
MOV R4, #0x8000
|
||||
VDUP.32 Q2, R4
|
||||
|
||||
VLD1.32 {D0, D1}, [R6], R8
|
||||
VQNEG.S32 Q0, Q0
|
||||
|
||||
|
||||
VLD1.32 {D6, D7}, [R6], R8
|
||||
VQSHL.S32 Q15, Q0, Q1
|
||||
VQADD.S32 Q14, Q15, Q2
|
||||
VSHR.S32 Q13, Q14, #16
|
||||
VREV64.32 Q13, Q13
|
||||
SUB R5, R5, #8
|
||||
|
||||
VUZP.16 D27, D26
|
||||
VQNEG.S32 Q3, Q3
|
||||
|
||||
|
||||
LOOP_1:
|
||||
|
||||
|
||||
VST1.16 D27[0], [R1], R7
|
||||
VQSHL.S32 Q12, Q3, Q1
|
||||
VLD1.32 {D0, D1}, [R6], R8
|
||||
VST1.16 D27[1], [R1], R7
|
||||
VQADD.S32 Q11, Q12, Q2
|
||||
VST1.16 D27[2], [R1], R7
|
||||
VQNEG.S32 Q0, Q0
|
||||
VST1.16 D27[3], [R1], R7
|
||||
VSHR.S32 Q10, Q11, #16
|
||||
VREV64.32 Q10, Q10
|
||||
SUBS R5, R5, #8
|
||||
|
||||
|
||||
VUZP.16 D21, D20
|
||||
VQSHL.S32 Q15, Q0, Q1
|
||||
VST1.16 D21[0], [R1], R7
|
||||
VLD1.32 {D6, D7}, [R6], R8
|
||||
VQADD.S32 Q14, Q15, Q2
|
||||
VST1.16 D21[1], [R1], R7
|
||||
VSHR.S32 Q13, Q14, #16
|
||||
VST1.16 D21[2], [R1], R7
|
||||
VREV64.32 Q13, Q13
|
||||
VST1.16 D21[3], [R1], R7
|
||||
|
||||
|
||||
VUZP.16 D27, D26
|
||||
VQNEG.S32 Q3, Q3
|
||||
|
||||
|
||||
|
||||
BGT LOOP_1
|
||||
|
||||
VST1.16 D27[0], [R1], R7
|
||||
VQSHL.S32 Q12, Q3, Q1
|
||||
VST1.16 D27[1], [R1], R7
|
||||
VST1.16 D27[2], [R1], R7
|
||||
VQADD.S32 Q11, Q12, Q2
|
||||
VST1.16 D27[3], [R1], R7
|
||||
VSHR.S32 Q10, Q11, #16
|
||||
|
||||
|
||||
VREV64.32 Q10, Q10
|
||||
|
||||
VUZP.16 D21, D20
|
||||
|
||||
VST1.16 D21[0], [R1], R7
|
||||
VST1.16 D21[1], [R1], R7
|
||||
VST1.16 D21[2], [R1], R7
|
||||
VST1.16 D21[3], [R1], R7
|
||||
VPOP {D8 - D15}
|
||||
LDMFD sp!, {R4-R12, R15}
|
||||
.end
|
||||
|
||||
297
decoder/armv7/ixheaacd_overlap_add1.s
Normal file
297
decoder/armv7/ixheaacd_overlap_add1.s
Normal file
|
|
@ -0,0 +1,297 @@
|
|||
@/******************************************************************************
|
||||
@ *
|
||||
@ * Copyright (C) 2018 The Android Open Source Project
|
||||
@ *
|
||||
@ * Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ * you may not use this file except in compliance with the License.
|
||||
@ * You may obtain a copy of the License at:
|
||||
@ *
|
||||
@ * http://www.apache.org/licenses/LICENSE-2.0
|
||||
@ *
|
||||
@ * Unless required by applicable law or agreed to in writing, software
|
||||
@ * distributed under the License is distributed on an "AS IS" BASIS,
|
||||
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
@ * See the License for the specific language governing permissions and
|
||||
@ * limitations under the License.
|
||||
@ *
|
||||
@ *****************************************************************************
|
||||
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
@*/
|
||||
|
||||
|
||||
.text
|
||||
.p2align 2
|
||||
|
||||
|
||||
.global ixheaacd_over_lap_add1_armv7
|
||||
ixheaacd_over_lap_add1_armv7:
|
||||
|
||||
STMFD sp!, {R4-R12, R14}
|
||||
VPUSH {d8 - d15}
|
||||
|
||||
LDR R4, [SP, #104]
|
||||
LDR R5, [SP, #108]
|
||||
LDR R6, [SP, #112]
|
||||
MOV R10, R5, LSL #1
|
||||
SUB R11, R10, #1
|
||||
MOV R10, R11, LSL #2
|
||||
ADD R10, R0, R10
|
||||
SUB R10, R10, #12
|
||||
MOV R8, R11, LSL #1
|
||||
ADD R8, R8, R3
|
||||
SUB R8, R8, #14
|
||||
MOV R12, #0
|
||||
VDUP.S16 D12, R12
|
||||
MOV R12, #-16
|
||||
VDUP.16 Q11, R4
|
||||
VLD1.32 {D6, D7}, [R10], R12
|
||||
MOV R7, #0x2000
|
||||
VREV64.32 Q3, Q3
|
||||
RSB R7, R7, #0
|
||||
VQNEG.S32 Q0, Q3
|
||||
VDUP.32 Q10, R7
|
||||
VUZP.16 D1, D0
|
||||
SUB R11, R5, #1
|
||||
VUZP.16 D7, D6
|
||||
SMULBB R11, R11, R6
|
||||
MOV R11, R11, LSL #1
|
||||
VLD2.16 {D2, D3}, [R8], R12
|
||||
ADD R11, R11, R2
|
||||
VREV64.16 Q1, Q1
|
||||
MOV R4, R6, LSL #1
|
||||
RSB R4, R4, #0
|
||||
MOV R9, R6, LSL #1
|
||||
SMULBB R6, R5, R6
|
||||
MOV R6, R6, LSL #1
|
||||
ADD R6, R6, R2
|
||||
|
||||
|
||||
|
||||
VMULL.U16 Q15, D7, D2
|
||||
VLD1.32 {D4, D5}, [R1]!
|
||||
VSHR.U32 Q15, Q15, #16
|
||||
|
||||
VMLAL.S16 Q15, D6, D2
|
||||
VQSHL.S32 Q15, Q15, Q11
|
||||
|
||||
|
||||
VADDL.S16 Q7, D3, D12
|
||||
|
||||
VMULL.S32 Q13, D4, D14
|
||||
VQMOVN.S64 D28, Q13
|
||||
VMULL.S32 Q13, D5, D15
|
||||
VQMOVN.S64 D29, Q13
|
||||
|
||||
VQADD.S32 Q14, Q14, Q10
|
||||
VQSUB.S32 Q13, Q15, Q14
|
||||
VQSHL.S32 Q13, Q13, #2
|
||||
VSHR.S32 Q13, Q13, #16
|
||||
VUZP.16 D26, D27
|
||||
|
||||
|
||||
VMULL.U16 Q12, D1, D3
|
||||
VSHR.U32 Q12, Q12, #16
|
||||
VMLAL.S16 Q12, D0, D3
|
||||
VQSHL.S32 Q12, Q12, Q11
|
||||
VLD1.32 {D6, D7}, [R10], R12
|
||||
|
||||
|
||||
VADDL.S16 Q7, D2, D12
|
||||
|
||||
VMULL.S32 Q0, D14, D4
|
||||
VQMOVN.S64 D16, Q0
|
||||
VMULL.S32 Q0, D15, D5
|
||||
VQMOVN.S64 D17, Q0
|
||||
|
||||
VREV64.32 Q3, Q3
|
||||
VQADD.S32 Q8, Q8, Q10
|
||||
VQNEG.S32 Q0, Q3
|
||||
VUZP.16 D1, D0
|
||||
VQSUB.S32 Q9, Q12, Q8
|
||||
VUZP.16 D7, D6
|
||||
VQSHL.S32 Q9, Q9, #2
|
||||
VLD2.16 {D2, D3}, [R8], R12
|
||||
VSHR.S32 Q9, Q9, #16
|
||||
VREV64.16 Q1, Q1
|
||||
VUZP.16 D18, D19
|
||||
|
||||
VLD1.32 {D4, D5}, [R1]!
|
||||
SUB R5, R5, #8
|
||||
|
||||
|
||||
LOOP_1:
|
||||
|
||||
VST1.16 D26[0], [R11], R4
|
||||
VMULL.U16 Q15, D7, D2
|
||||
VST1.16 D26[1], [R11], R4
|
||||
VMULL.U16 Q12, D1, D3
|
||||
VST1.16 D26[2], [R11], R4
|
||||
VSHR.U32 Q15, Q15, #16
|
||||
VST1.16 D26[3], [R11], R4
|
||||
VSHR.U32 Q12, Q12, #16
|
||||
VST1.16 D18[0], [R6], R9
|
||||
VMLAL.S16 Q15, D6, D2
|
||||
VST1.16 D18[1], [R6], R9
|
||||
VMLAL.S16 Q12, D0, D3
|
||||
VST1.16 D18[2], [R6], R9
|
||||
VQSHL.S32 Q15, Q15, Q11
|
||||
VST1.16 D18[3], [R6], R9
|
||||
VQSHL.S32 Q12, Q12, Q11
|
||||
VLD1.32 {D6, D7}, [R10], R12
|
||||
|
||||
|
||||
VADDL.S16 Q7, D3, D12
|
||||
|
||||
VMULL.S32 Q8, D4, D14
|
||||
VQMOVN.S64 D28, Q8
|
||||
VMULL.S32 Q8, D5, D15
|
||||
VQMOVN.S64 D29, Q8
|
||||
VREV64.32 Q3, Q3
|
||||
|
||||
|
||||
|
||||
VADDL.S16 Q7, D2, D12
|
||||
|
||||
VMULL.S32 Q0, D4, D14
|
||||
VQMOVN.S64 D16, Q0
|
||||
VMULL.S32 Q0, D5, D15
|
||||
VQMOVN.S64 D17, Q0
|
||||
|
||||
VLD2.16 {D2, D3}, [R8], R12
|
||||
VQNEG.S32 Q0, Q3
|
||||
VLD1.32 {D4, D5}, [R1]!
|
||||
VQADD.S32 Q14, Q14, Q10
|
||||
VUZP.16 D1, D0
|
||||
VQADD.S32 Q8, Q8, Q10
|
||||
VUZP.16 D7, D6
|
||||
VQSUB.S32 Q13, Q15, Q14
|
||||
VREV64.16 Q1, Q1
|
||||
VQSUB.S32 Q9, Q12, Q8
|
||||
VQSHL.S32 Q13, Q13, #2
|
||||
VQSHL.S32 Q9, Q9, #2
|
||||
VMULL.U16 Q15, D7, D2
|
||||
VSHR.S32 Q13, Q13, #16
|
||||
VUZP.16 D26, D27
|
||||
VSHR.S32 Q9, Q9, #16
|
||||
VST1.16 D26[0], [R11], R4
|
||||
VMULL.U16 Q12, D1, D3
|
||||
VUZP.16 D18, D19
|
||||
VSHR.U32 Q15, Q15, #16
|
||||
VST1.16 D26[1], [R11], R4
|
||||
VMLAL.S16 Q15, D6, D2 @MLA
|
||||
VST1.16 D26[2], [R11], R4
|
||||
VSHR.U32 Q12, Q12, #16
|
||||
VST1.16 D26[3], [R11], R4
|
||||
VMLAL.S16 Q12, D0, D3 @MLA
|
||||
VST1.16 D18[0], [R6], R9
|
||||
VQSHL.S32 Q15, Q15, Q11
|
||||
VST1.16 D18[1], [R6], R9
|
||||
VQSHL.S32 Q12, Q12, Q11
|
||||
VST1.16 D18[2], [R6], R9
|
||||
|
||||
|
||||
VADDL.S16 Q7, D3, D12
|
||||
|
||||
VMULL.S32 Q8, D4, D14
|
||||
VQMOVN.S64 D28, Q8
|
||||
VMULL.S32 Q8, D5, D15
|
||||
VQMOVN.S64 D29, Q8
|
||||
|
||||
VST1.16 D18[3], [R6], R9
|
||||
|
||||
|
||||
VADDL.S16 Q7, D2, D12
|
||||
|
||||
VMULL.S32 Q0, D4, D14
|
||||
VQMOVN.S64 D16, Q0
|
||||
VMULL.S32 Q0, D5, D15
|
||||
VQMOVN.S64 D17, Q0
|
||||
|
||||
VLD1.32 {D6, D7}, [R10], R12
|
||||
VQADD.S32 Q14, Q14, Q10
|
||||
VREV64.32 Q3, Q3
|
||||
VQNEG.S32 Q0, Q3
|
||||
VUZP.16 D1, D0
|
||||
VQSUB.S32 Q13, Q15, Q14
|
||||
VUZP.16 D7, D6
|
||||
VQADD.S32 Q8, Q8, Q10
|
||||
VLD2.16 {D2, D3}, [R8], R12
|
||||
VQSUB.S32 Q9, Q12, Q8
|
||||
VREV64.16 Q1, Q1
|
||||
VQSHL.S32 Q13, Q13, #2
|
||||
VLD1.32 {D4, D5}, [R1]!
|
||||
VQSHL.S32 Q9, Q9, #2
|
||||
VSHR.S32 Q13, Q13, #16
|
||||
SUBS R5, R5, #8
|
||||
VSHR.S32 Q9, Q9, #16
|
||||
VUZP.16 D26, D27
|
||||
VUZP.16 D18, D19
|
||||
|
||||
|
||||
|
||||
BGT LOOP_1
|
||||
|
||||
VST1.16 D26[0], [R11], R4
|
||||
VMULL.U16 Q15, D7, D2
|
||||
VST1.16 D26[1], [R11], R4
|
||||
VMULL.U16 Q12, D1, D3
|
||||
VST1.16 D26[2], [R11], R4
|
||||
VSHR.U32 Q15, Q15, #16
|
||||
VST1.16 D26[3], [R11], R4
|
||||
VSHR.U32 Q12, Q12, #16
|
||||
|
||||
VST1.16 D18[0], [R6], R9
|
||||
VMLAL.S16 Q15, D6, D2
|
||||
VST1.16 D18[1], [R6], R9
|
||||
VMLAL.S16 Q12, D0, D3
|
||||
VST1.16 D18[2], [R6], R9
|
||||
VQSHL.S32 Q15, Q15, Q11
|
||||
VST1.16 D18[3], [R6], R9
|
||||
VQSHL.S32 Q12, Q12, Q11
|
||||
|
||||
|
||||
|
||||
|
||||
VADDL.S16 Q7, D3, D12
|
||||
|
||||
VMULL.S32 Q8, D4, D14
|
||||
VQMOVN.S64 D28, Q8
|
||||
VMULL.S32 Q8, D5, D15
|
||||
VQMOVN.S64 D29, Q8
|
||||
|
||||
|
||||
|
||||
VADDL.S16 Q7, D2, D12
|
||||
|
||||
VMULL.S32 Q13, D4, D14
|
||||
VQMOVN.S64 D16, Q13
|
||||
VMULL.S32 Q13, D5, D15
|
||||
VQMOVN.S64 D17, Q13
|
||||
|
||||
VQADD.S32 Q14, Q14, Q10
|
||||
VQADD.S32 Q8, Q8, Q10
|
||||
VQSUB.S32 Q13, Q15, Q14
|
||||
VQSUB.S32 Q9, Q12, Q8
|
||||
VQSHL.S32 Q13, Q13, #2
|
||||
VQSHL.S32 Q9, Q9, #2
|
||||
VSHR.S32 Q13, Q13, #16
|
||||
VSHR.S32 Q9, Q9, #16
|
||||
VUZP.16 D26, D27
|
||||
|
||||
VUZP.16 D18, D19
|
||||
|
||||
|
||||
|
||||
VST1.16 D26[0], [R11], R4
|
||||
VST1.16 D26[1], [R11], R4
|
||||
VST1.16 D26[2], [R11], R4
|
||||
VST1.16 D26[3], [R11], R4
|
||||
|
||||
VST1.16 D18[0], [R6], R9
|
||||
VST1.16 D18[1], [R6], R9
|
||||
VST1.16 D18[2], [R6], R9
|
||||
VST1.16 D18[3], [R6], R9
|
||||
|
||||
VPOP {d8 - d15}
|
||||
LDMFD sp!, {R4-R12, R15}
|
||||
|
||||
268
decoder/armv7/ixheaacd_overlap_add2.s
Normal file
268
decoder/armv7/ixheaacd_overlap_add2.s
Normal file
|
|
@ -0,0 +1,268 @@
|
|||
@/******************************************************************************
|
||||
@ *
|
||||
@ * Copyright (C) 2018 The Android Open Source Project
|
||||
@ *
|
||||
@ * Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ * you may not use this file except in compliance with the License.
|
||||
@ * You may obtain a copy of the License at:
|
||||
@ *
|
||||
@ * http://www.apache.org/licenses/LICENSE-2.0
|
||||
@ *
|
||||
@ * Unless required by applicable law or agreed to in writing, software
|
||||
@ * distributed under the License is distributed on an "AS IS" BASIS,
|
||||
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
@ * See the License for the specific language governing permissions and
|
||||
@ * limitations under the License.
|
||||
@ *
|
||||
@ *****************************************************************************
|
||||
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
@*/
|
||||
|
||||
|
||||
.text
|
||||
.p2align 2
|
||||
.global ixheaacd_over_lap_add2_armv7
|
||||
|
||||
ixheaacd_over_lap_add2_armv7:
|
||||
|
||||
STMFD sp!, {R4-R12, R14}
|
||||
VPUSH {d8 - d15}
|
||||
|
||||
LDR R4, [SP, #104]
|
||||
LDR R5, [SP, #108]
|
||||
LDR R6, [SP, #112]
|
||||
RSB R4, R4, #15
|
||||
CMP R4, #31
|
||||
MOVGT R4, #31
|
||||
SUB R9, R4, #1
|
||||
MOV R8, #1
|
||||
MOV R8, R8, LSL R9
|
||||
RSB R4, R4, #0
|
||||
VDUP.32 Q11, R4
|
||||
VDUP.32 Q10, R8
|
||||
MOV R8, R5
|
||||
SUB R12, R5, #1
|
||||
MOV R9, R5, LSL #2
|
||||
MOV R12, R12, LSL #2
|
||||
ADD R10, R0, R9
|
||||
ADD R7, R1, R12
|
||||
VLD2.16 {D0, D1}, [R10]!
|
||||
MOV R11, R6, LSL #2
|
||||
SUB R7, R7, #12
|
||||
ADD R4, R4, #1
|
||||
MOV R12, #-16
|
||||
VLD2.16 {D6, D7}, [R7], R12
|
||||
MOV R4, #0x8000
|
||||
VREV64.16 D4, D6
|
||||
VREV64.16 D5, D7
|
||||
MOV R4, R3
|
||||
|
||||
MOV R9, R2
|
||||
VLD2.16 {D2, D3}, [R3]!
|
||||
|
||||
VMULL.U16 Q13, D0, D2
|
||||
VMLSL.U16 Q13, D4, D3
|
||||
VLD2.16 {D8, D9}, [R10]!
|
||||
VSHR.S32 Q13, Q13, #16
|
||||
VLD2.16 {D10, D11}, [R3]!
|
||||
VMLAL.S16 Q13, D1, D2
|
||||
VMLSL.S16 Q13, D5, D3
|
||||
VLD2.16 {D14, D15}, [R7], R12
|
||||
VREV64.16 Q6, Q7
|
||||
VQADD.S32 Q12, Q13, Q10
|
||||
VQSHL.S32 Q12, Q12, Q11
|
||||
SUB R8, R8, #8
|
||||
|
||||
|
||||
LOOP_1:
|
||||
|
||||
VLD2.16 {D0, D1}, [R10]!
|
||||
VMULL.U16 Q9, D8, D10
|
||||
VLD2.16 {D2, D3}, [R3]!
|
||||
VMLSL.U16 Q9, D12, D11
|
||||
VLD2.16 {D6, D7}, [R7], R12
|
||||
VMULL.U16 Q13, D0, D2
|
||||
VREV64.16 D4, D6
|
||||
VMLSL.U16 Q13, D4, D3
|
||||
VREV64.16 D5, D7
|
||||
VSHR.S32 Q9, Q9, #16
|
||||
VST1.32 {D24[0]}, [R2], R11
|
||||
VMLAL.S16 Q9, D9, D10
|
||||
VST1.32 {D24[1]}, [R2], R11
|
||||
VSHR.S32 Q13, Q13, #16
|
||||
VST1.32 {D25[0]}, [R2], R11
|
||||
VMLAL.S16 Q13, D1, D2
|
||||
|
||||
VST1.32 {D25[1]}, [R2], R11
|
||||
VMLSL.S16 Q9, D13, D11
|
||||
VMLSL.S16 Q13, D5, D3
|
||||
|
||||
VLD2.16 {D8, D9}, [R10]!
|
||||
VLD2.16 {D10, D11}, [R3]!
|
||||
|
||||
|
||||
VLD2.16 {D14, D15}, [R7], R12
|
||||
VQADD.S32 Q8, Q9, Q10
|
||||
VREV64.16 Q6, Q7
|
||||
VQADD.S32 Q12, Q13, Q10
|
||||
VQSHL.S32 Q8, Q8, Q11
|
||||
VST1.32 D16[0], [R2], R11
|
||||
VQSHL.S32 Q12, Q12, Q11
|
||||
|
||||
|
||||
SUBS R8, R8, #8
|
||||
|
||||
VST1.32 D16[1], [R2], R11
|
||||
VST1.32 D17[0], [R2], R11
|
||||
VST1.32 D17[1], [R2], R11
|
||||
|
||||
|
||||
BGT LOOP_1
|
||||
|
||||
|
||||
VST1.32 D24[0], [R2], R11
|
||||
VMULL.U16 Q9, D8, D10
|
||||
VMLSL.U16 Q9, D12, D11
|
||||
VST1.32 D24[1], [R2], R11
|
||||
VST1.32 D25[0], [R2], R11
|
||||
VSHR.S32 Q9, Q9, #16
|
||||
VST1.32 D25[1], [R2], R11
|
||||
VMLAL.S16 Q9, D9, D10
|
||||
VMLSL.S16 Q9, D13, D11
|
||||
MOV R12, #12
|
||||
SMULBB R7, R5, R6
|
||||
MOV R10, R5, LSL #1
|
||||
VQADD.S32 Q8, Q9, Q10
|
||||
VQSHL.S32 Q8, Q8, Q11
|
||||
|
||||
VST1.32 D16[0], [R2], R11
|
||||
MOV R7, R7, LSL #2
|
||||
|
||||
VST1.32 D16[1], [R2], R11
|
||||
ADD R7, R7, R9
|
||||
|
||||
VST1.32 D17[0], [R2], R11
|
||||
VST1.32 D17[1], [R2], R11
|
||||
|
||||
SUB R11, R10, #1
|
||||
MOV R10, R11, LSL #2
|
||||
ADD R10, R0, R10
|
||||
MOV R11, R11, LSL #1
|
||||
SUB R10, R10, R12
|
||||
MOV R8, R6, LSL #2
|
||||
MOV R12, #-16
|
||||
ADD R11, R11, R4
|
||||
|
||||
VLD1.32 {D6, D7}, [R10], R12
|
||||
SUB R11, R11, #14
|
||||
|
||||
|
||||
VREV64.32 D0, D6
|
||||
VREV64.32 D1, D7
|
||||
VQNEG.S32 D0, D0
|
||||
VQNEG.S32 D1, D1
|
||||
VUZP.16 D1, D0
|
||||
VLD2.16 {D2, D3}, [R11], R12
|
||||
VREV64.16 D2, D2
|
||||
VREV64.16 D3, D3
|
||||
|
||||
VLD2.16 {D4, D5}, [R1]!
|
||||
|
||||
VMULL.U16 Q13, D1, D3
|
||||
VMLSL.U16 Q13, D4, D2
|
||||
VSHR.S32 Q13, Q13, #16
|
||||
VMLAL.S16 Q13, D0, D3
|
||||
VMLSL.S16 Q13, D5, D2
|
||||
@VQSHL.S32 Q12,Q13,Q11
|
||||
@VQADD.S32 Q12,Q12,Q10
|
||||
@VSHR.S32 Q12,Q12,#16
|
||||
VQADD.S32 Q12, Q13, Q10
|
||||
VQSHL.S32 Q12, Q12, Q11
|
||||
VUZP.16 D24, D25
|
||||
|
||||
|
||||
VLD1.32 {D14, D15}, [R10], R12
|
||||
VMULL.U16 Q13, D1, D3
|
||||
VMLSL.U16 Q13, D4, D2
|
||||
VREV64.32 Q4, Q7
|
||||
VQNEG.S32 Q4, Q4
|
||||
VLD2.16 {D10, D11}, [R11], R12
|
||||
VSHR.S32 Q13, Q13, #16
|
||||
VLD2.16 {D12, D13}, [R1]!
|
||||
VMLAL.S16 Q13, D0, D3
|
||||
VMLSL.S16 Q13, D5, D2
|
||||
VUZP.16 D9, D8
|
||||
VREV64.16 Q5, Q5
|
||||
VQADD.S32 Q12, Q13, Q10
|
||||
SUB R5, R5, #8
|
||||
VQSHL.S32 Q12, Q12, Q11
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
LOOP_2:
|
||||
|
||||
|
||||
VLD1.32 {D6, D7}, [R10], R12
|
||||
VMULL.U16 Q9, D9, D11
|
||||
VREV64.32 Q0, Q3
|
||||
VQNEG.S32 Q0, Q0
|
||||
VUZP.16 D1, D0
|
||||
VLD2.16 {D2, D3}, [R11], R12
|
||||
VREV64.16 Q1, Q1
|
||||
|
||||
VLD2.16 {D4, D5}, [R1]!
|
||||
VMLSL.U16 Q9, D12, D10
|
||||
VST1.32 D24[0], [R7], R8
|
||||
VMULL.U16 Q13, D1, D3
|
||||
VST1.32 D24[1], [R7], R8
|
||||
VSHR.S32 Q9, Q9, #16
|
||||
VST1.32 D25[0], [R7], R8
|
||||
VMLSL.U16 Q13, D4, D2
|
||||
VST1.32 D25[1], [R7], R8
|
||||
VMLAL.S16 Q9, D8, D11
|
||||
VLD1.32 {D14, D15}, [R10], R12
|
||||
VSHR.S32 Q13, Q13, #16
|
||||
VMLSL.S16 Q9, D13, D10
|
||||
VLD2.16 {D10, D11}, [R11], R12
|
||||
VMLAL.S16 Q13, D0, D3
|
||||
VMLSL.S16 Q13, D5, D2
|
||||
VREV64.32 Q4, Q7
|
||||
VLD2.16 {D12, D13}, [R1]!
|
||||
VQNEG.S32 Q4, Q4
|
||||
VREV64.16 Q5, Q5
|
||||
VQADD.S32 Q8, Q9, Q10
|
||||
VUZP.16 D9, D8
|
||||
VQADD.S32 Q12, Q13, Q10
|
||||
VQSHL.S32 Q8, Q8, Q11
|
||||
SUBS R5, R5, #8
|
||||
VST1.32 D16[0], [R7], R8
|
||||
VQSHL.S32 Q12, Q12, Q11
|
||||
VST1.32 D16[1], [R7], R8
|
||||
|
||||
VST1.32 D17[0], [R7], R8
|
||||
VST1.32 D17[1], [R7], R8
|
||||
|
||||
BGT LOOP_2
|
||||
|
||||
VST1.32 D24[0], [R7], R8
|
||||
VMULL.U16 Q9, D9, D11
|
||||
VMLSL.U16 Q9, D12, D10
|
||||
VST1.32 D24[1], [R7], R8
|
||||
VST1.32 D25[0], [R7], R8
|
||||
VSHR.S32 Q9, Q9, #16
|
||||
VST1.32 D25[1], [R7], R8
|
||||
|
||||
VMLAL.S16 Q9, D8, D11
|
||||
VMLSL.S16 Q9, D13, D10
|
||||
VQADD.S32 Q8, Q9, Q10
|
||||
VQSHL.S32 Q8, Q8, Q11
|
||||
|
||||
VST1.32 D16[0], [R7], R8
|
||||
VST1.32 D16[1], [R7], R8
|
||||
VST1.32 D17[0], [R7], R8
|
||||
VST1.32 D17[1], [R7], R8
|
||||
|
||||
VPOP {d8 - d15}
|
||||
LDMFD sp!, {R4-R12, R15}
|
||||
144
decoder/armv7/ixheaacd_post_radix_compute2.s
Normal file
144
decoder/armv7/ixheaacd_post_radix_compute2.s
Normal file
|
|
@ -0,0 +1,144 @@
|
|||
@/******************************************************************************
|
||||
@ *
|
||||
@ * Copyright (C) 2018 The Android Open Source Project
|
||||
@ *
|
||||
@ * Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ * you may not use this file except in compliance with the License.
|
||||
@ * You may obtain a copy of the License at:
|
||||
@ *
|
||||
@ * http://www.apache.org/licenses/LICENSE-2.0
|
||||
@ *
|
||||
@ * Unless required by applicable law or agreed to in writing, software
|
||||
@ * distributed under the License is distributed on an "AS IS" BASIS,
|
||||
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
@ * See the License for the specific language governing permissions and
|
||||
@ * limitations under the License.
|
||||
@ *
|
||||
@ *****************************************************************************
|
||||
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
@*/
|
||||
|
||||
|
||||
.text
|
||||
.p2align 2
|
||||
.global ixheaacd_postradixcompute2
|
||||
|
||||
|
||||
ixheaacd_postradixcompute2:
|
||||
|
||||
STMFD sp!, {r4-r12, r14}
|
||||
|
||||
SUB sp, sp, #20
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
STR r0, [sp, #16]
|
||||
|
||||
|
||||
ADD r4, r1, r3, lsl #1
|
||||
MOV r3, r3, asr #4
|
||||
STR r3, [sp, #4]
|
||||
MOV r5, #1
|
||||
STR r5, [sp]
|
||||
|
||||
POSTRADIX2_START:
|
||||
|
||||
LDR r14, [r2]
|
||||
|
||||
LDMIA r1!, {r5-r12}
|
||||
ADD r0, r0, r14
|
||||
|
||||
ADD r14, r5, r7
|
||||
SUB r5, r5, r7
|
||||
|
||||
ADD r7, r9, r11
|
||||
SUB r9, r9, r11
|
||||
|
||||
ADD r11, r6, r8
|
||||
SUB r6, r6, r8
|
||||
|
||||
ADD r8, r10, r12
|
||||
SUB r10, r10, r12
|
||||
|
||||
STR r14, [r0], #4
|
||||
STR r11, [r0], #32-4
|
||||
|
||||
STR r7, [r0], #4
|
||||
STR r8, [r0], #(32+(32<<1))-4
|
||||
|
||||
STR r5, [r0], #4
|
||||
STR r6, [r0], #32-4
|
||||
|
||||
STR r9, [r0], #4
|
||||
STR r10, [r0], #0
|
||||
|
||||
LDR r0, [sp, #16]
|
||||
|
||||
LDR r14, [r2], #4
|
||||
|
||||
LDMIA r4!, {r5-r12}
|
||||
|
||||
ADD r0, r0, r14
|
||||
|
||||
|
||||
ADD r0, r0, #8
|
||||
|
||||
ADD r14, r5, r7
|
||||
SUB r5, r5, r7
|
||||
|
||||
ADD r7, r9, r11
|
||||
SUB r9, r9, r11
|
||||
|
||||
ADD r11, r6, r8
|
||||
SUB r6, r6, r8
|
||||
|
||||
ADD r8, r10, r12
|
||||
SUB r10, r10, r12
|
||||
|
||||
STR r14, [r0], #4
|
||||
STR r11, [r0], #32-4
|
||||
|
||||
STR r7, [r0], #4
|
||||
STR r8, [r0], #(32+(32<<1))-4
|
||||
|
||||
STR r5, [r0], #4
|
||||
STR r6, [r0], #32-4
|
||||
|
||||
|
||||
STR r9, [r0], #4
|
||||
STR r10, [r0], #0
|
||||
|
||||
SUBS r3, r3, #1
|
||||
|
||||
|
||||
LDR r0, [sp, #16]
|
||||
BGT POSTRADIX2_START
|
||||
|
||||
LDR r0, [sp, #16]
|
||||
|
||||
LDR r3, [sp, #4]
|
||||
LDR r6, [sp]
|
||||
|
||||
ADD r1, r1, r3, lsl #5
|
||||
ADD r4, r4, r3, lsl #5
|
||||
|
||||
SUBS r6, r6, #1
|
||||
STR r6, [sp]
|
||||
|
||||
|
||||
BPL POSTRADIX2_START
|
||||
|
||||
|
||||
ADD sp, sp, #20
|
||||
LDMFD sp!, {r4-r12, r15}
|
||||
|
||||
|
||||
138
decoder/armv7/ixheaacd_post_radix_compute4.s
Normal file
138
decoder/armv7/ixheaacd_post_radix_compute4.s
Normal file
|
|
@ -0,0 +1,138 @@
|
|||
@/******************************************************************************
|
||||
@ *
|
||||
@ * Copyright (C) 2018 The Android Open Source Project
|
||||
@ *
|
||||
@ * Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ * you may not use this file except in compliance with the License.
|
||||
@ * You may obtain a copy of the License at:
|
||||
@ *
|
||||
@ * http://www.apache.org/licenses/LICENSE-2.0
|
||||
@ *
|
||||
@ * Unless required by applicable law or agreed to in writing, software
|
||||
@ * distributed under the License is distributed on an "AS IS" BASIS,
|
||||
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
@ * See the License for the specific language governing permissions and
|
||||
@ * limitations under the License.
|
||||
@ *
|
||||
@ *****************************************************************************
|
||||
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
@*/
|
||||
|
||||
|
||||
.text
|
||||
.p2align 2
|
||||
.global ixheaacd_postradixcompute4
|
||||
|
||||
|
||||
ixheaacd_postradixcompute4:
|
||||
|
||||
STMFD sp!, {r4-r12, r14}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
ADD r4, r1, r3, lsl #1
|
||||
MOV r3, #2
|
||||
|
||||
|
||||
POSTRADIX4_START:
|
||||
|
||||
LDMIA r1!, {r5-r12}
|
||||
|
||||
ADD r14, r5, r9
|
||||
SUB r5, r5, r9
|
||||
|
||||
ADD r9, r6, r10
|
||||
SUB r6, r6, r10
|
||||
|
||||
ADD r10, r7, r11
|
||||
SUB r7, r7, r11
|
||||
|
||||
ADD r11, r8, r12
|
||||
SUB r8, r8, r12
|
||||
|
||||
ADD r12, r14, r10
|
||||
SUB r14, r14, r10
|
||||
|
||||
ADD r10, r9, r11
|
||||
SUB r9, r9, r11
|
||||
|
||||
ADD r11, r5, r8
|
||||
SUB r5, r5, r8
|
||||
|
||||
ADD r8, r6, r7
|
||||
SUB r6, r6, r7
|
||||
|
||||
|
||||
STR r12, [r0], #4
|
||||
STR r10, [r0], #14<<1
|
||||
|
||||
STR r11, [r0], #4
|
||||
STR r6 , [r0], #14<<1
|
||||
|
||||
STR r14, [r0], #4
|
||||
STR r9 , [r0], #14<<1
|
||||
|
||||
STR r5, [r0], #4
|
||||
STR r8, [r0], #0
|
||||
|
||||
LDMIA r4!, {r5-r12}
|
||||
SUB r0, r0, #92
|
||||
|
||||
|
||||
ADD r14, r5, r9
|
||||
SUB r5, r5, r9
|
||||
|
||||
ADD r9, r6, r10
|
||||
SUB r6, r6, r10
|
||||
|
||||
ADD r10, r7, r11
|
||||
SUB r7, r7, r11
|
||||
|
||||
ADD r11, r8, r12
|
||||
SUB r8, r8, r12
|
||||
|
||||
ADD r12, r14, r10
|
||||
SUB r14, r14, r10
|
||||
|
||||
ADD r10, r9, r11
|
||||
SUB r9, r9, r11
|
||||
|
||||
ADD r11, r5, r8
|
||||
SUB r5, r5, r8
|
||||
|
||||
ADD r8, r6, r7
|
||||
SUB r6, r6, r7
|
||||
|
||||
STR r12, [r0], #4
|
||||
STR r10, [r0], #14<<1
|
||||
|
||||
STR r11, [r0], #4
|
||||
STR r6, [r0], #14<<1
|
||||
|
||||
STR r14, [r0], #4
|
||||
STR r9, [r0], #14<<1
|
||||
|
||||
|
||||
STR r5, [r0], #4
|
||||
STR r8, [r0], #0
|
||||
|
||||
ADD r1, r1, #1 << 5
|
||||
ADD r4, r4, #1 << 5
|
||||
SUB r0, r0, #100-8
|
||||
|
||||
SUBS r3, r3, #1
|
||||
|
||||
BGT POSTRADIX4_START
|
||||
|
||||
LDMFD sp!, {r4-r12, r15}
|
||||
|
||||
|
||||
545
decoder/armv7/ixheaacd_post_twiddle.s
Normal file
545
decoder/armv7/ixheaacd_post_twiddle.s
Normal file
|
|
@ -0,0 +1,545 @@
|
|||
@/******************************************************************************
|
||||
@ *
|
||||
@ * Copyright (C) 2018 The Android Open Source Project
|
||||
@ *
|
||||
@ * Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ * you may not use this file except in compliance with the License.
|
||||
@ * You may obtain a copy of the License at:
|
||||
@ *
|
||||
@ * http://www.apache.org/licenses/LICENSE-2.0
|
||||
@ *
|
||||
@ * Unless required by applicable law or agreed to in writing, software
|
||||
@ * distributed under the License is distributed on an "AS IS" BASIS,
|
||||
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
@ * See the License for the specific language governing permissions and
|
||||
@ * limitations under the License.
|
||||
@ *
|
||||
@ *****************************************************************************
|
||||
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
@*/
|
||||
|
||||
|
||||
.text
|
||||
.p2align 2
|
||||
.global ixheaacd_post_twiddle_armv7
|
||||
|
||||
ixheaacd_post_twiddle_armv7:
|
||||
|
||||
STMFD sp!, {R4-R12}
|
||||
VPUSH {d8 - d15}
|
||||
LDR R4, [sp, #100]
|
||||
|
||||
ARM_PROLOGUE:
|
||||
|
||||
CMP R3, #0x400
|
||||
LDR R6, =7500
|
||||
ADD R2, R2, R6
|
||||
BLT NEXT
|
||||
MOV R4, #50
|
||||
MOV R5, #-50
|
||||
MOV R6, #4
|
||||
VDUP.16 D10, R4
|
||||
|
||||
B NEXT1
|
||||
|
||||
NEXT:
|
||||
LDR R4, =0x192
|
||||
LDR R5, =0xfe6e
|
||||
MOV R6, #32
|
||||
VDUP.16 D10, R4
|
||||
|
||||
NEXT1:
|
||||
LDR R7, [R1], #4
|
||||
LDR R8, [R1], #4
|
||||
LDR R9, [R2]
|
||||
ADD R2, R2, R6
|
||||
|
||||
SMULWT R11, R8, R9
|
||||
SMULWB R10, R8, R9
|
||||
SMULWT R12, R7, R9
|
||||
SMLAWB R8, R7, R9, R11
|
||||
|
||||
SUB R10, R10, R12
|
||||
|
||||
MVN R8, R8
|
||||
ADD R8, R8, #1
|
||||
|
||||
SMLAWB R9, R10, R5, R8
|
||||
SMLAWB R11, R8, R4, R10
|
||||
|
||||
LSL R7, R3, #2
|
||||
ADD R7, R0, R7
|
||||
SUB R7, R7, #4
|
||||
|
||||
STR R11, [R7], #-4
|
||||
STR R9, [R0], #4
|
||||
|
||||
LSL R5, R3, #2
|
||||
ADD R5, R1, R5
|
||||
SUB R5, R5, #40
|
||||
|
||||
SUB R3, R3, #1
|
||||
ASR R3, R3, #4
|
||||
|
||||
|
||||
SUB R7, R7, #28
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
MOV R8, #-32
|
||||
|
||||
NEON_PROLOGUE:
|
||||
|
||||
VLD4.16 {D0, D1, D2, D3}, [R5], R8
|
||||
VLD4.16 {D4, D5, D6, D7}, [R1]!
|
||||
|
||||
VLD2.16 {D8[0], D9[0]}, [R2], R6
|
||||
VLD2.16 {D8[1], D9[1]}, [R2], R6
|
||||
VLD2.16 {D8[2], D9[2]}, [R2], R6
|
||||
VLD2.16 {D8[3], D9[3]}, [R2], R6
|
||||
|
||||
VREV64.16 Q6, Q4
|
||||
|
||||
VMULL.U16 Q15, D2, D13
|
||||
VMULL.U16 Q14, D0, D13
|
||||
VMULL.U16 Q13, D2, D12
|
||||
VMULL.U16 Q12, D0, D12
|
||||
|
||||
VSHR.U32 Q15, Q15, #16
|
||||
VSHR.U32 Q14, Q14, #16
|
||||
VSHR.U32 Q13, Q13, #16
|
||||
VSHR.U32 Q12, Q12, #16
|
||||
|
||||
VMLAL.S16 Q15, D3, D13
|
||||
VMLAL.S16 Q14, D1, D13
|
||||
VMLAL.S16 Q13, D3, D12
|
||||
VMLAL.S16 Q12, D1, D12
|
||||
|
||||
VMULL.U16 Q11, D6, D9
|
||||
VMULL.U16 Q10, D4, D9
|
||||
|
||||
|
||||
VADD.I32 Q14, Q14, Q13
|
||||
VSUB.I32 Q15, Q15, Q12
|
||||
VNEG.S32 Q14, Q14
|
||||
|
||||
VMULL.U16 Q9, D6, D8
|
||||
VMULL.U16 Q8, D4, D8
|
||||
|
||||
VMOV Q13, Q15
|
||||
VSHR.U32 Q11, Q11, #16
|
||||
|
||||
VMOV Q12, Q14
|
||||
VSHR.U32 Q10, Q10, #16
|
||||
|
||||
VUZP.16 D26, D27
|
||||
VSHR.U32 Q9, Q9, #16
|
||||
|
||||
VUZP.16 D24, D25
|
||||
VSHR.U32 Q8, Q8, #16
|
||||
|
||||
|
||||
VMLAL.S16 Q11, D7, D9
|
||||
VMLAL.S16 Q10, D5, D9
|
||||
VMLAL.S16 Q9, D7, D8
|
||||
VMLAL.S16 Q8, D5, D8
|
||||
|
||||
VLD2.16 {D8[0], D9[0]}, [R2], R6
|
||||
VMULL.U16 Q0, D26, D10
|
||||
|
||||
VLD2.16 {D8[1], D9[1]}, [R2], R6
|
||||
VMULL.U16 Q1, D24, D10
|
||||
|
||||
VLD2.16 {D8[2], D9[2]}, [R2], R6
|
||||
VADD.I32 Q11, Q11, Q8
|
||||
|
||||
VLD2.16 {D8[3], D9[3]}, [R2], R6
|
||||
VSUB.I32 Q10, Q9, Q10
|
||||
|
||||
VREV64.16 Q6, Q4
|
||||
VNEG.S32 Q11, Q11
|
||||
|
||||
|
||||
VMOV Q9, Q11
|
||||
VSHR.U32 Q0, Q0, #16
|
||||
|
||||
VMOV Q8, Q10
|
||||
VSHR.U32 Q1, Q1, #16
|
||||
|
||||
VUZP.16 D18, D19
|
||||
VMLAL.S16 Q0, D27, D10
|
||||
|
||||
VUZP.16 D16, D17
|
||||
VMLAL.S16 Q1, D25, D10
|
||||
|
||||
VMULL.U16 Q2, D18, D10
|
||||
VMULL.U16 Q3, D16, D10
|
||||
|
||||
VNEG.S32 Q0, Q0
|
||||
VADD.I32 Q7, Q15, Q1
|
||||
VADD.I32 Q13, Q14, Q0
|
||||
|
||||
VREV64.32 Q7, Q7
|
||||
VSHR.U32 Q2, Q2, #16
|
||||
|
||||
VSWP D14, D15
|
||||
VSHR.U32 Q3, Q3, #16
|
||||
|
||||
VMLAL.S16 Q2, D19, D10
|
||||
VLD4.16 {D0, D1, D2, D3}, [R5], R8
|
||||
VMLAL.S16 Q3, D17, D10
|
||||
SUB R3, R3, #2
|
||||
|
||||
VADD.I32 Q12, Q10, Q2
|
||||
|
||||
VREV64.32 Q12, Q12
|
||||
VNEG.S32 Q8, Q3
|
||||
|
||||
VLD4.16 {D4, D5, D6, D7}, [R1]!
|
||||
|
||||
VSWP D24, D25
|
||||
VADD.I32 Q8, Q11, Q8
|
||||
|
||||
|
||||
|
||||
|
||||
CORE_LOOP:
|
||||
VMULL.U16 Q15, D2, D13
|
||||
VST2.32 {Q12, Q13}, [R7], R8
|
||||
VMULL.U16 Q14, D0, D13
|
||||
|
||||
VMULL.U16 Q13, D2, D12
|
||||
VST2.32 {Q7, Q8}, [R0]!
|
||||
VMULL.U16 Q12, D0, D12
|
||||
|
||||
VSHR.U32 Q15, Q15, #16
|
||||
VSHR.U32 Q14, Q14, #16
|
||||
VSHR.U32 Q13, Q13, #16
|
||||
VSHR.U32 Q12, Q12, #16
|
||||
|
||||
VMLAL.S16 Q15, D3, D13
|
||||
VMLAL.S16 Q14, D1, D13
|
||||
VMLAL.S16 Q13, D3, D12
|
||||
VMLAL.S16 Q12, D1, D12
|
||||
|
||||
VMULL.U16 Q11, D6, D9
|
||||
VMULL.U16 Q10, D4, D9
|
||||
|
||||
|
||||
VADD.I32 Q14, Q14, Q13
|
||||
VSUB.I32 Q15, Q15, Q12
|
||||
VNEG.S32 Q14, Q14
|
||||
|
||||
VMULL.U16 Q9, D6, D8
|
||||
VMULL.U16 Q8, D4, D8
|
||||
|
||||
|
||||
VMOV Q13, Q15
|
||||
VSHR.U32 Q11, Q11, #16
|
||||
|
||||
VMOV Q12, Q14
|
||||
VSHR.U32 Q10, Q10, #16
|
||||
|
||||
VUZP.16 D26, D27
|
||||
VSHR.U32 Q9, Q9, #16
|
||||
|
||||
VUZP.16 D24, D25
|
||||
VSHR.U32 Q8, Q8, #16
|
||||
|
||||
|
||||
VMLAL.S16 Q11, D7, D9
|
||||
VMLAL.S16 Q10, D5, D9
|
||||
VMLAL.S16 Q9, D7, D8
|
||||
VMLAL.S16 Q8, D5, D8
|
||||
|
||||
VLD2.16 {D8[0], D9[0]}, [R2], R6
|
||||
VMULL.U16 Q0, D26, D10
|
||||
|
||||
VLD2.16 {D8[1], D9[1]}, [R2], R6
|
||||
VMULL.U16 Q1, D24, D10
|
||||
|
||||
VLD2.16 {D8[2], D9[2]}, [R2], R6
|
||||
VADD.I32 Q11, Q11, Q8
|
||||
|
||||
VLD2.16 {D8[3], D9[3]}, [R2], R6
|
||||
VSUB.I32 Q10, Q9, Q10
|
||||
|
||||
VREV64.16 Q6, Q4
|
||||
VNEG.S32 Q11, Q11
|
||||
|
||||
|
||||
VMOV Q9, Q11
|
||||
VSHR.U32 Q0, Q0, #16
|
||||
|
||||
VMOV Q8, Q10
|
||||
VSHR.U32 Q1, Q1, #16
|
||||
|
||||
VUZP.16 D18, D19
|
||||
VMLAL.S16 Q0, D27, D10
|
||||
|
||||
VUZP.16 D16, D17
|
||||
VMLAL.S16 Q1, D25, D10
|
||||
|
||||
VMULL.U16 Q2, D18, D10
|
||||
VMULL.U16 Q3, D16, D10
|
||||
|
||||
VNEG.S32 Q0, Q0
|
||||
VADD.I32 Q7, Q15, Q1
|
||||
VADD.I32 Q13, Q14, Q0
|
||||
|
||||
VREV64.32 Q7, Q7
|
||||
VSHR.U32 Q2, Q2, #16
|
||||
|
||||
VSWP D14, D15
|
||||
VSHR.U32 Q3, Q3, #16
|
||||
|
||||
VMLAL.S16 Q2, D19, D10
|
||||
VLD4.16 {D0, D1, D2, D3}, [R5], R8
|
||||
VMLAL.S16 Q3, D17, D10
|
||||
|
||||
VADD.I32 Q12, Q10, Q2
|
||||
VREV64.32 Q12, Q12
|
||||
VNEG.S32 Q8, Q3
|
||||
|
||||
VLD4.16 {D4, D5, D6, D7}, [R1]!
|
||||
VSWP D24, D25
|
||||
VADD.I32 Q8, Q11, Q8
|
||||
|
||||
SUBS R3, R3, #1
|
||||
BNE CORE_LOOP
|
||||
|
||||
|
||||
|
||||
|
||||
NEON_EPILOGUE:
|
||||
VMULL.U16 Q15, D2, D13
|
||||
VST2.32 {Q12, Q13}, [R7], R8
|
||||
VMULL.U16 Q14, D0, D13
|
||||
|
||||
VMULL.U16 Q13, D2, D12
|
||||
VST2.32 {Q7, Q8}, [R0]!
|
||||
VMULL.U16 Q12, D0, D12
|
||||
|
||||
VSHR.U32 Q15, Q15, #16
|
||||
VSHR.U32 Q14, Q14, #16
|
||||
VSHR.U32 Q13, Q13, #16
|
||||
VSHR.U32 Q12, Q12, #16
|
||||
|
||||
VMLAL.S16 Q15, D3, D13
|
||||
VMLAL.S16 Q14, D1, D13
|
||||
VMLAL.S16 Q13, D3, D12
|
||||
VMLAL.S16 Q12, D1, D12
|
||||
|
||||
VMULL.U16 Q11, D6, D9
|
||||
VMULL.U16 Q10, D4, D9
|
||||
|
||||
|
||||
VADD.I32 Q14, Q14, Q13
|
||||
VSUB.I32 Q15, Q15, Q12
|
||||
VNEG.S32 Q14, Q14
|
||||
|
||||
VMULL.U16 Q9, D6, D8
|
||||
VMULL.U16 Q8, D4, D8
|
||||
|
||||
|
||||
VMOV Q13, Q15
|
||||
VSHR.U32 Q11, Q11, #16
|
||||
|
||||
VMOV Q12, Q14
|
||||
VSHR.U32 Q10, Q10, #16
|
||||
|
||||
VUZP.16 D26, D27
|
||||
VSHR.U32 Q9, Q9, #16
|
||||
|
||||
VUZP.16 D24, D25
|
||||
VSHR.U32 Q8, Q8, #16
|
||||
|
||||
|
||||
VMLAL.S16 Q11, D7, D9
|
||||
VMLAL.S16 Q10, D5, D9
|
||||
VMLAL.S16 Q9, D7, D8
|
||||
VMLAL.S16 Q8, D5, D8
|
||||
|
||||
VMULL.U16 Q0, D26, D10
|
||||
VMULL.U16 Q1, D24, D10
|
||||
|
||||
VADD.I32 Q11, Q11, Q8
|
||||
VSUB.I32 Q10, Q9, Q10
|
||||
VNEG.S32 Q11, Q11
|
||||
|
||||
|
||||
VMOV Q9, Q11
|
||||
VSHR.U32 Q0, Q0, #16
|
||||
|
||||
VMOV Q8, Q10
|
||||
VSHR.U32 Q1, Q1, #16
|
||||
|
||||
VUZP.16 D18, D19
|
||||
VMLAL.S16 Q0, D27, D10
|
||||
|
||||
VUZP.16 D16, D17
|
||||
VMLAL.S16 Q1, D25, D10
|
||||
|
||||
VMULL.U16 Q2, D18, D10
|
||||
VMULL.U16 Q3, D16, D10
|
||||
|
||||
VNEG.S32 Q0, Q0
|
||||
VADD.I32 Q7, Q15, Q1
|
||||
VADD.I32 Q13, Q14, Q0
|
||||
|
||||
VREV64.32 Q7, Q7
|
||||
VSHR.U32 Q2, Q2, #16
|
||||
|
||||
VSWP D14, D15
|
||||
VSHR.U32 Q3, Q3, #16
|
||||
|
||||
VMLAL.S16 Q2, D19, D10
|
||||
VMLAL.S16 Q3, D17, D10
|
||||
|
||||
VADD.I32 Q12, Q10, Q2
|
||||
|
||||
VREV64.32 Q12, Q12
|
||||
VNEG.S32 Q8, Q3
|
||||
|
||||
VSWP D24, D25
|
||||
VADD.I32 Q8, Q11, Q8
|
||||
|
||||
|
||||
VST2.32 {Q7, Q8}, [R0]!
|
||||
VST2.32 {Q12, Q13}, [R7], R8
|
||||
|
||||
|
||||
|
||||
VLD4.16 {D0, D1, D2, D3}, [R5], R8
|
||||
|
||||
VMOV.S32 D5, #0x00000000
|
||||
VMOV.S32 D7, #0x00000000
|
||||
|
||||
VLD2.32 {D4, D6}, [R1]!
|
||||
VLD2.32 {D5[0], D7[0]}, [R1]
|
||||
|
||||
VLD2.16 {D8[0], D9[0]}, [R2], R6
|
||||
VLD2.16 {D8[1], D9[1]}, [R2], R6
|
||||
VLD2.16 {D8[2], D9[2]}, [R2], R6
|
||||
VLD2.16 {D8[3], D9[3]}, [R2], R6
|
||||
|
||||
VREV64.16 Q6, Q4
|
||||
|
||||
VUZP.16 D4, D5
|
||||
VUZP.16 D6, D7
|
||||
|
||||
VMULL.U16 Q15, D2, D13
|
||||
VMULL.U16 Q14, D0, D13
|
||||
|
||||
VMULL.U16 Q13, D2, D12
|
||||
VMULL.U16 Q12, D0, D12
|
||||
|
||||
VSHR.U32 Q15, Q15, #16
|
||||
VSHR.U32 Q14, Q14, #16
|
||||
VSHR.U32 Q13, Q13, #16
|
||||
VSHR.U32 Q12, Q12, #16
|
||||
|
||||
VMLAL.S16 Q15, D3, D13
|
||||
VMLAL.S16 Q14, D1, D13
|
||||
VMLAL.S16 Q13, D3, D12
|
||||
VMLAL.S16 Q12, D1, D12
|
||||
|
||||
VMULL.U16 Q11, D6, D9
|
||||
VMULL.U16 Q10, D4, D9
|
||||
|
||||
|
||||
VADD.I32 Q14, Q14, Q13
|
||||
VSUB.I32 Q15, Q15, Q12
|
||||
VNEG.S32 Q14, Q14
|
||||
|
||||
VMULL.U16 Q9, D6, D8
|
||||
VMULL.U16 Q8, D4, D8
|
||||
|
||||
|
||||
VMOV Q13, Q15
|
||||
VSHR.U32 Q11, Q11, #16
|
||||
|
||||
VMOV Q12, Q14
|
||||
VSHR.U32 Q10, Q10, #16
|
||||
|
||||
VUZP.16 D26, D27
|
||||
VSHR.U32 Q9, Q9, #16
|
||||
|
||||
VUZP.16 D24, D25
|
||||
VSHR.U32 Q8, Q8, #16
|
||||
|
||||
|
||||
VMLAL.S16 Q11, D7, D9
|
||||
VMLAL.S16 Q10, D5, D9
|
||||
VMLAL.S16 Q9, D7, D8
|
||||
VMLAL.S16 Q8, D5, D8
|
||||
|
||||
|
||||
VMULL.U16 Q0, D26, D10
|
||||
VMULL.U16 Q1, D24, D10
|
||||
|
||||
|
||||
VADD.I32 Q11, Q11, Q8
|
||||
VSUB.I32 Q10, Q9, Q10
|
||||
VNEG.S32 Q11, Q11
|
||||
|
||||
|
||||
VMOV Q9, Q11
|
||||
VSHR.U32 Q0, Q0, #16
|
||||
|
||||
VMOV Q8, Q10
|
||||
VSHR.U32 Q1, Q1, #16
|
||||
|
||||
VUZP.16 D18, D19
|
||||
VMLAL.S16 Q0, D27, D10
|
||||
|
||||
VUZP.16 D16, D17
|
||||
VMLAL.S16 Q1, D25, D10
|
||||
|
||||
VMULL.U16 Q2, D18, D10
|
||||
VMULL.U16 Q3, D16, D10
|
||||
|
||||
VNEG.S32 Q0, Q0
|
||||
VADD.I32 Q7, Q15, Q1
|
||||
VADD.I32 Q13, Q14, Q0
|
||||
|
||||
VREV64.32 Q7, Q7
|
||||
VSHR.U32 Q2, Q2, #16
|
||||
|
||||
VSWP D14, D15
|
||||
VSHR.U32 Q3, Q3, #16
|
||||
|
||||
VMLAL.S16 Q2, D19, D10
|
||||
|
||||
VMLAL.S16 Q3, D17, D10
|
||||
|
||||
VADD.I32 Q12, Q10, Q2
|
||||
|
||||
VREV64.32 Q12, Q12
|
||||
VNEG.S32 Q8, Q3
|
||||
|
||||
VSWP D24, D25
|
||||
VADD.I32 Q8, Q11, Q8
|
||||
|
||||
VST2.32 {D14, D16}, [R0]!
|
||||
VST2.32 {D15[0], D17[0]}, [R0]!
|
||||
VST1.32 D15[1], [R0]
|
||||
|
||||
ADD R7, R7, #4
|
||||
|
||||
VST1.32 D26[0], [R7]!
|
||||
VST2.32 {D24[1], D26[1]}, [R7]!
|
||||
VST2.32 {D25, D27}, [R7]
|
||||
|
||||
VPOP {d8 - d15}
|
||||
LDMFD sp!, {R4-R12}
|
||||
BX LR
|
||||
1277
decoder/armv7/ixheaacd_post_twiddle_overlap.s
Normal file
1277
decoder/armv7/ixheaacd_post_twiddle_overlap.s
Normal file
File diff suppressed because it is too large
Load diff
388
decoder/armv7/ixheaacd_pre_twiddle_compute.s
Normal file
388
decoder/armv7/ixheaacd_pre_twiddle_compute.s
Normal file
|
|
@ -0,0 +1,388 @@
|
|||
@/******************************************************************************
|
||||
@ *
|
||||
@ * Copyright (C) 2018 The Android Open Source Project
|
||||
@ *
|
||||
@ * Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ * you may not use this file except in compliance with the License.
|
||||
@ * You may obtain a copy of the License at:
|
||||
@ *
|
||||
@ * http://www.apache.org/licenses/LICENSE-2.0
|
||||
@ *
|
||||
@ * Unless required by applicable law or agreed to in writing, software
|
||||
@ * distributed under the License is distributed on an "AS IS" BASIS,
|
||||
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
@ * See the License for the specific language governing permissions and
|
||||
@ * limitations under the License.
|
||||
@ *
|
||||
@ *****************************************************************************
|
||||
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
@*/
|
||||
|
||||
|
||||
.text
|
||||
.p2align 2
|
||||
.global ixheaacd_pretwiddle_compute_armv7
|
||||
|
||||
ixheaacd_pretwiddle_compute_armv7:
|
||||
|
||||
STMFD sp!, {R4-R12}
|
||||
VPUSH {d8 - d15}
|
||||
|
||||
LDR R8, =7500
|
||||
ADD R3, R3, R8
|
||||
LDR R4, [sp, #100]
|
||||
LDR R5, [sp, #104]
|
||||
|
||||
LSL R7, R4, #4
|
||||
ADD R7, R2, R7
|
||||
SUB R7, R7, #4
|
||||
|
||||
MVN R5, R5
|
||||
ADD R5, R5, #1
|
||||
|
||||
ARM_PROLOGUE:
|
||||
LDR R8, [R3], #4
|
||||
LDR R9, [R0], #4
|
||||
|
||||
SMULWB R12, R9, R8
|
||||
LDR R10, [R1], #-4
|
||||
SMULWT R11, R9, R8
|
||||
SMLAWT R9, R10, R8, R12
|
||||
SMULWB R6, R10, R8
|
||||
|
||||
MVN R9, R9
|
||||
ADD R9, R9, #1
|
||||
|
||||
SUB R11, R11, R6
|
||||
|
||||
CMP R5, #0
|
||||
BGT NEXT
|
||||
MVN R8, R5
|
||||
ADD R8, R8, #1
|
||||
ASR R11, R11, R8
|
||||
ASR R9, R9, R8
|
||||
B NEXT1
|
||||
|
||||
NEXT:
|
||||
LSL R11, R11, R5
|
||||
LSL R9, R9, R5
|
||||
|
||||
|
||||
|
||||
NEXT1:
|
||||
STR R9, [R2], #4
|
||||
STR R11, [R2], #4
|
||||
|
||||
CMP R4, #0x100
|
||||
BNE NXT
|
||||
MOV R6, #4
|
||||
B NXT1
|
||||
NXT:
|
||||
MOV R6, #32
|
||||
ADD R3, R3, #28
|
||||
|
||||
NXT1:
|
||||
SUB R4, R4, #1
|
||||
ASR R4, R4, #2
|
||||
SUB R7, R7, #28
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
NEON_PROLOGUE:
|
||||
|
||||
MOV R8, #-32
|
||||
VDUP.32 Q7, R5
|
||||
SUB R1, R1, #28
|
||||
|
||||
VLD2.16 {D8[0], D9[0]}, [R3], R6
|
||||
VLD2.16 {D8[1], D9[1]}, [R3], R6
|
||||
VLD2.16 {D8[2], D9[2]}, [R3], R6
|
||||
VLD2.16 {D8[3], D9[3]}, [R3], R6
|
||||
|
||||
VREV64.16 Q5, Q4
|
||||
|
||||
VLD4.16 {D0, D1, D2, D3}, [R0]!
|
||||
VLD4.16 {D4, D5, D6, D7}, [R1], R8
|
||||
|
||||
VREV64.16 Q0, Q0
|
||||
VREV64.16 Q2, Q2
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
VMULL.U16 Q15, D2, D9
|
||||
VMULL.U16 Q14, D4, D9
|
||||
VMULL.U16 Q13, D2, D8
|
||||
VMULL.U16 Q12, D4, D8
|
||||
|
||||
VSHR.U32 Q15, Q15, #16
|
||||
VSHR.U32 Q14, Q14, #16
|
||||
VSHR.U32 Q13, Q13, #16
|
||||
VSHR.U32 Q12, Q12, #16
|
||||
|
||||
VMLAL.S16 Q15, D3, D9
|
||||
VMLAL.S16 Q14, D5, D9
|
||||
VMLAL.S16 Q13, D3, D8
|
||||
VMLAL.S16 Q12, D5, D8
|
||||
|
||||
VADD.I32 Q14, Q13, Q14
|
||||
VNEG.S32 Q14, Q14
|
||||
VSUB.I32 Q15, Q15, Q12
|
||||
|
||||
VMULL.U16 Q11, D0, D11
|
||||
VMULL.U16 Q10, D6, D11
|
||||
VMULL.U16 Q9, D0, D10
|
||||
VMULL.U16 Q8, D6, D10
|
||||
|
||||
VSHR.U32 Q11, Q11, #16
|
||||
VSHR.U32 Q10, Q10, #16
|
||||
VSHR.U32 Q9, Q9, #16
|
||||
VSHR.U32 Q8, Q8, #16
|
||||
|
||||
VMLAL.S16 Q11, D1, D11
|
||||
VLD2.16 {D8[0], D9[0]}, [R3], R6
|
||||
|
||||
VMLAL.S16 Q10, D7, D11
|
||||
VLD2.16 {D8[1], D9[1]}, [R3], R6
|
||||
|
||||
VMLAL.S16 Q9, D1, D10
|
||||
VLD2.16 {D8[2], D9[2]}, [R3], R6
|
||||
|
||||
VMLAL.S16 Q8, D7, D10
|
||||
VLD2.16 {D8[3], D9[3]}, [R3], R6
|
||||
|
||||
VADD.I32 Q10, Q10, Q9
|
||||
|
||||
VNEG.S32 Q10, Q10
|
||||
VREV64.16 Q5, Q4
|
||||
|
||||
VSUB.I32 Q11, Q8, Q11
|
||||
VLD4.16 {D0, D1, D2, D3}, [R0]!
|
||||
|
||||
|
||||
|
||||
VSHL.S32 Q10, Q10, Q7
|
||||
VLD4.16 {D4, D5, D6, D7}, [R1], R8
|
||||
|
||||
VREV64.16 Q0, Q0
|
||||
VSHL.S32 Q11, Q11, Q7
|
||||
|
||||
VREV64.16 Q2, Q2
|
||||
VSHL.S32 Q9, Q15, Q7
|
||||
VSHL.S32 Q8, Q14, Q7
|
||||
|
||||
|
||||
|
||||
SUB R4, R4, #2
|
||||
|
||||
CORE_LOOP:
|
||||
VMULL.U16 Q15, D2, D9
|
||||
VST2.32 {Q8, Q9}, [R2]!
|
||||
VMULL.U16 Q14, D4, D9
|
||||
|
||||
VMULL.U16 Q13, D2, D8
|
||||
VST2.32 {Q10, Q11}, [R7], R8
|
||||
VMULL.U16 Q12, D4, D8
|
||||
|
||||
VSHR.U32 Q15, Q15, #16
|
||||
VSHR.U32 Q14, Q14, #16
|
||||
VSHR.U32 Q13, Q13, #16
|
||||
VSHR.U32 Q12, Q12, #16
|
||||
|
||||
VMLAL.S16 Q15, D3, D9
|
||||
VMLAL.S16 Q14, D5, D9
|
||||
VMLAL.S16 Q13, D3, D8
|
||||
VMLAL.S16 Q12, D5, D8
|
||||
|
||||
VADD.I32 Q14, Q13, Q14
|
||||
VNEG.S32 Q14, Q14
|
||||
VSUB.I32 Q15, Q15, Q12
|
||||
|
||||
VMULL.U16 Q11, D0, D11
|
||||
VLD2.16 {D8[0], D9[0]}, [R3], R6
|
||||
VMULL.U16 Q10, D6, D11
|
||||
|
||||
VMULL.U16 Q9, D0, D10
|
||||
VLD2.16 {D8[1], D9[1]}, [R3], R6
|
||||
VMULL.U16 Q8, D6, D10
|
||||
|
||||
VSHR.U32 Q11, Q11, #16
|
||||
VLD2.16 {D8[2], D9[2]}, [R3], R6
|
||||
VSHR.U32 Q10, Q10, #16
|
||||
|
||||
VSHR.U32 Q9, Q9, #16
|
||||
VLD2.16 {D8[3], D9[3]}, [R3], R6
|
||||
VSHR.U32 Q8, Q8, #16
|
||||
|
||||
VMLAL.S16 Q11, D1, D11
|
||||
VMLAL.S16 Q10, D7, D11
|
||||
VMLAL.S16 Q9, D1, D10
|
||||
VMLAL.S16 Q8, D7, D10
|
||||
|
||||
VLD4.16 {D0, D1, D2, D3}, [R0]!
|
||||
VADD.I32 Q10, Q10, Q9
|
||||
|
||||
VNEG.S32 Q10, Q10
|
||||
VREV64.16 Q5, Q4
|
||||
|
||||
VSUB.I32 Q11, Q8, Q11
|
||||
VLD4.16 {D4, D5, D6, D7}, [R1], R8
|
||||
VSHL.S32 Q10, Q10, Q7
|
||||
VSHL.S32 Q11, Q11, Q7
|
||||
|
||||
VREV64.16 Q0, Q0
|
||||
VSHL.S32 Q9, Q15, Q7
|
||||
|
||||
VREV64.16 Q2, Q2
|
||||
VSHL.S32 Q8, Q14, Q7
|
||||
|
||||
SUBS R4, R4, #1
|
||||
BNE CORE_LOOP
|
||||
|
||||
NEON_EPILOGUE:
|
||||
VMULL.U16 Q15, D2, D9
|
||||
VST2.32 {Q8, Q9}, [R2]!
|
||||
VMULL.U16 Q14, D4, D9
|
||||
|
||||
VMULL.U16 Q13, D2, D8
|
||||
VST2.32 {Q10, Q11}, [R7], R8
|
||||
VMULL.U16 Q12, D4, D8
|
||||
|
||||
VSHR.U32 Q15, Q15, #16
|
||||
VSHR.U32 Q14, Q14, #16
|
||||
VSHR.U32 Q13, Q13, #16
|
||||
VSHR.U32 Q12, Q12, #16
|
||||
|
||||
VMLAL.S16 Q15, D3, D9
|
||||
VMLAL.S16 Q14, D5, D9
|
||||
VMLAL.S16 Q13, D3, D8
|
||||
VMLAL.S16 Q12, D5, D8
|
||||
|
||||
VADD.I32 Q14, Q13, Q14
|
||||
VNEG.S32 Q14, Q14
|
||||
VSUB.I32 Q15, Q15, Q12
|
||||
|
||||
VMULL.U16 Q11, D0, D11
|
||||
VMULL.U16 Q10, D6, D11
|
||||
VMULL.U16 Q9, D0, D10
|
||||
VMULL.U16 Q8, D6, D10
|
||||
|
||||
VSHR.U32 Q11, Q11, #16
|
||||
VSHR.U32 Q10, Q10, #16
|
||||
VSHR.U32 Q9, Q9, #16
|
||||
VSHR.U32 Q8, Q8, #16
|
||||
|
||||
VMLAL.S16 Q11, D1, D11
|
||||
VMLAL.S16 Q10, D7, D11
|
||||
VMLAL.S16 Q9, D1, D10
|
||||
VMLAL.S16 Q8, D7, D10
|
||||
|
||||
VADD.I32 Q10, Q10, Q9
|
||||
VNEG.S32 Q10, Q10
|
||||
VSUB.I32 Q11, Q8, Q11
|
||||
|
||||
|
||||
VSHL.S32 Q10, Q10, Q7
|
||||
VSHL.S32 Q11, Q11, Q7
|
||||
VSHL.S32 Q9, Q15, Q7
|
||||
VSHL.S32 Q8, Q14, Q7
|
||||
|
||||
VST2.32 {Q8, Q9}, [R2]!
|
||||
VST2.32 {Q10, Q11}, [R7], R8
|
||||
|
||||
|
||||
RESIDUE_NEON:
|
||||
MOV R10, #-16
|
||||
VMOV.S32 D3, #0x00000000
|
||||
VMOV.S32 D4, #0x00000000
|
||||
|
||||
VLD2.32 {D0, D2}, [R0]!
|
||||
VLD2.32 {D1[0], D3[0]}, [R0]!
|
||||
VLD1.32 D1[1], [R0]
|
||||
|
||||
VUZP.16 D0, D1
|
||||
VUZP.16 D2, D3
|
||||
|
||||
ADD R1, R1, #4
|
||||
|
||||
VLD1.32 D6[0], [R1]!
|
||||
VLD2.32 {D4[1], D6[1]}, [R1]!
|
||||
VLD2.32 {D5, D7}, [R1]!
|
||||
|
||||
VUZP.16 D4, D5
|
||||
VUZP.16 D6, D7
|
||||
|
||||
VREV64.16 Q0, Q0
|
||||
VREV64.16 Q2, Q2
|
||||
|
||||
VLD2.16 {D8[0], D9[0]}, [R3], R6
|
||||
VLD2.16 {D8[1], D9[1]}, [R3], R6
|
||||
VLD2.16 {D8[2], D9[2]}, [R3], R6
|
||||
VLD2.16 {D8[3], D9[3]}, [R3], R6
|
||||
|
||||
VREV64.16 Q5, Q4
|
||||
|
||||
|
||||
VMULL.U16 Q15, D2, D9
|
||||
VMULL.U16 Q14, D4, D9
|
||||
VMULL.U16 Q13, D2, D8
|
||||
VMULL.U16 Q12, D4, D8
|
||||
|
||||
VSHR.U32 Q15, Q15, #16
|
||||
VSHR.U32 Q14, Q14, #16
|
||||
VSHR.U32 Q13, Q13, #16
|
||||
VSHR.U32 Q12, Q12, #16
|
||||
|
||||
VMLAL.S16 Q15, D3, D9
|
||||
VMLAL.S16 Q14, D5, D9
|
||||
VMLAL.S16 Q13, D3, D8
|
||||
VMLAL.S16 Q12, D5, D8
|
||||
|
||||
VADD.I32 Q14, Q13, Q14
|
||||
VNEG.S32 Q14, Q14
|
||||
VSUB.I32 Q15, Q15, Q12
|
||||
|
||||
VMULL.U16 Q11, D0, D11
|
||||
VMULL.U16 Q10, D6, D11
|
||||
VMULL.U16 Q9, D0, D10
|
||||
VMULL.U16 Q8, D6, D10
|
||||
|
||||
VSHR.U32 Q11, Q11, #16
|
||||
VSHR.U32 Q10, Q10, #16
|
||||
VSHR.U32 Q9, Q9, #16
|
||||
VSHR.U32 Q8, Q8, #16
|
||||
|
||||
VMLAL.S16 Q11, D1, D11
|
||||
VMLAL.S16 Q10, D7, D11
|
||||
VMLAL.S16 Q9, D1, D10
|
||||
VMLAL.S16 Q8, D7, D10
|
||||
|
||||
VADD.I32 Q10, Q10, Q9
|
||||
VNEG.S32 Q10, Q10
|
||||
VSUB.I32 Q11, Q8, Q11
|
||||
|
||||
|
||||
VSHL.S32 Q10, Q10, Q7
|
||||
VSHL.S32 Q11, Q11, Q7
|
||||
VSHL.S32 Q9, Q15, Q7
|
||||
VSHL.S32 Q8, Q14, Q7
|
||||
|
||||
VST2.32 {Q10, Q11}, [R7]
|
||||
VST2.32 {D16, D18}, [R2]!
|
||||
VST2.32 {D17[0], D19[0]}, [R2]!
|
||||
|
||||
VPOP {d8 - d15}
|
||||
LDMFD sp!, {R4-R12}
|
||||
BX LR
|
||||
|
||||
1282
decoder/armv7/ixheaacd_qmf_dec.c
Normal file
1282
decoder/armv7/ixheaacd_qmf_dec.c
Normal file
File diff suppressed because it is too large
Load diff
149
decoder/armv7/ixheaacd_radix4_bfly.s
Normal file
149
decoder/armv7/ixheaacd_radix4_bfly.s
Normal file
|
|
@ -0,0 +1,149 @@
|
|||
@/******************************************************************************
|
||||
@ *
|
||||
@ * Copyright (C) 2018 The Android Open Source Project
|
||||
@ *
|
||||
@ * Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ * you may not use this file except in compliance with the License.
|
||||
@ * You may obtain a copy of the License at:
|
||||
@ *
|
||||
@ * http://www.apache.org/licenses/LICENSE-2.0
|
||||
@ *
|
||||
@ * Unless required by applicable law or agreed to in writing, software
|
||||
@ * distributed under the License is distributed on an "AS IS" BASIS,
|
||||
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
@ * See the License for the specific language governing permissions and
|
||||
@ * limitations under the License.
|
||||
@ *
|
||||
@ *****************************************************************************
|
||||
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
@*/
|
||||
|
||||
|
||||
.text
|
||||
.p2align 2
|
||||
.global ixheaacd_radix4bfly
|
||||
|
||||
ixheaacd_radix4bfly:
|
||||
|
||||
STMFD sp!, {r4-r12, r14}
|
||||
|
||||
SUB sp, sp, #16
|
||||
|
||||
MOV r6, #6
|
||||
MUL r7, r6, r3
|
||||
MOV r4, r3
|
||||
STR r7, [sp]
|
||||
|
||||
|
||||
|
||||
MOV r3, r3, lsl #1
|
||||
|
||||
STR r2, [sp, #8]
|
||||
STR r4, [sp, #12]
|
||||
|
||||
|
||||
ADD r2, r1, r3, lsl #2
|
||||
ADD r0, r0, #8
|
||||
|
||||
|
||||
RADIX4_OUTLOOP:
|
||||
RADIX4_INLOOP:
|
||||
|
||||
|
||||
LDR r6, [r1]
|
||||
LDR r7, [r2]
|
||||
LDR r8, [r2, r3, lsl #2]
|
||||
LDR r9, [r2, r3, lsl #3]
|
||||
|
||||
ADD r10, r6, r8
|
||||
SUB r11, r6, r8
|
||||
ADD r12, r7, r9
|
||||
SUB r14, r7, r9
|
||||
|
||||
ADD r6, r10, r12
|
||||
SUB r7, r10, r12
|
||||
STR r6, [r1], #4
|
||||
|
||||
LDR r8, [r1]
|
||||
LDR r6, [r2, #4]!
|
||||
LDR r9, [r2, r3, lsl #2]!
|
||||
LDR r10, [r2, r3, lsl #2]!
|
||||
|
||||
ADD r12, r8, r9
|
||||
SUB r8, r8, r9
|
||||
ADD r9, r6, r10
|
||||
SUB r6, r6, r10
|
||||
|
||||
ADD r10, r12, r9
|
||||
STR r10, [r1], #4
|
||||
SUB r12, r12, r9
|
||||
|
||||
ADD r9, r11, r6
|
||||
SUB r10, r11, r6
|
||||
ADD r11, r8, r14
|
||||
LDR r5, [r0], #-4
|
||||
SUB r6, r8, r14
|
||||
|
||||
SMULWB r14, r10, r5
|
||||
SMULWT r8, r11, r5
|
||||
|
||||
SUBS r4, r4, #1
|
||||
SUB r8, r8, r14
|
||||
MOV r8, r8, lsl #1
|
||||
STR r8, [r2], #-4
|
||||
|
||||
SMULWT r14, r10, r5
|
||||
SMLAWB r8, r11, r5, r14
|
||||
LDR r11, [r0], #-4
|
||||
MOV r8, r8, lsl #1
|
||||
STR r8, [r2], -r3, lsl #2
|
||||
|
||||
SMULWT r10, r7, r11
|
||||
SMLAWB r8, r12, r11, r10
|
||||
|
||||
LDR r14, [r0], #20
|
||||
MOV r5, r8, lsl #1
|
||||
|
||||
SMULWB r10, r7, r11
|
||||
SMULWT r8, r12, r11
|
||||
|
||||
STR r5, [r2], #4
|
||||
SUB r7, r8, r10
|
||||
MOV r7, r7, lsl #1
|
||||
|
||||
SMULWB r11, r9, r14
|
||||
SMULWT r12, r6, r14
|
||||
|
||||
STR r7, [r2], -r3, lsl #2
|
||||
SUB r12, r12, r11
|
||||
MOV r12, r12, lsl #1
|
||||
|
||||
SMULWT r10, r9, r14
|
||||
SMLAWB r7, r6, r14, r10
|
||||
|
||||
STR r12, [r2], #-4
|
||||
MOV r7, r7, lsl #1
|
||||
STR r7, [r2], #8
|
||||
|
||||
|
||||
BNE RADIX4_INLOOP
|
||||
|
||||
LDR r8, [sp]
|
||||
LDR r4, [sp, #12]
|
||||
LDR r6, [sp, #8]
|
||||
|
||||
|
||||
SUB r0, r0, r8, lsl #1
|
||||
ADD r1, r1, r8, lsl #2
|
||||
ADD r2, r2, r8, lsl #2
|
||||
|
||||
SUBS r6, r6, #1
|
||||
STR r6, [sp, #8]
|
||||
BNE RADIX4_OUTLOOP
|
||||
|
||||
|
||||
|
||||
ADD sp, sp, #16
|
||||
LDMFD sp!, {r4-r12, r15}
|
||||
|
||||
|
||||
205
decoder/armv7/ixheaacd_rescale_subbandsamples.s
Normal file
205
decoder/armv7/ixheaacd_rescale_subbandsamples.s
Normal file
|
|
@ -0,0 +1,205 @@
|
|||
@/******************************************************************************
|
||||
@ *
|
||||
@ * Copyright (C) 2018 The Android Open Source Project
|
||||
@ *
|
||||
@ * Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ * you may not use this file except in compliance with the License.
|
||||
@ * You may obtain a copy of the License at:
|
||||
@ *
|
||||
@ * http://www.apache.org/licenses/LICENSE-2.0
|
||||
@ *
|
||||
@ * Unless required by applicable law or agreed to in writing, software
|
||||
@ * distributed under the License is distributed on an "AS IS" BASIS,
|
||||
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
@ * See the License for the specific language governing permissions and
|
||||
@ * limitations under the License.
|
||||
@ *
|
||||
@ *****************************************************************************
|
||||
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
@*/
|
||||
|
||||
|
||||
.text
|
||||
.p2align 2
|
||||
.global ixheaacd_adjust_scale_armv7
|
||||
ixheaacd_adjust_scale_armv7:
|
||||
STMFD SP!, {R4-R11, R14}
|
||||
LDR R4, [SP, #44]
|
||||
LDR R5, [SP, #36]
|
||||
LDR R6, [SP, #40]
|
||||
MOVS R4, R4
|
||||
BEQ ENDRESSCALE
|
||||
SUBS R3, R3, R2
|
||||
BLE ENDRESSCALE
|
||||
SUBS R6, R6, R5
|
||||
BLE ENDRESSCALE
|
||||
|
||||
ADD R9, R0, R5, LSL#2
|
||||
LDR R10, [R9], #4
|
||||
|
||||
CMP R4, #31
|
||||
MOVGT R4, #31
|
||||
CMP R4, #-31
|
||||
MOVLT R4, #-31
|
||||
|
||||
|
||||
LDR R8, [SP, #48]
|
||||
MOVS R8, R8
|
||||
BEQ ELIF1
|
||||
|
||||
|
||||
MOVS R4, R4
|
||||
BLE ELIF2_1
|
||||
|
||||
LOOP1:
|
||||
ADD R10, R10, R2, LSL #2
|
||||
MOV R7, R3
|
||||
|
||||
INNLOOP1:
|
||||
LDR R11, [R10]
|
||||
SUBS R7, R7 , #2
|
||||
LDRGE R5, [R10, #4]
|
||||
|
||||
MOV R11, R11, LSL R4
|
||||
STR R11, [R10], #4
|
||||
|
||||
MOVGE R5, R5, LSL R4
|
||||
STRGE R5, [R10], #4
|
||||
|
||||
BGT INNLOOP1
|
||||
|
||||
LDR R10, [R9], #4
|
||||
SUBS R6, R6, #1
|
||||
BGT LOOP1
|
||||
|
||||
B ENDRESSCALE
|
||||
|
||||
ELIF2_1:
|
||||
RSB R4, R4, #0
|
||||
|
||||
LOOP2:
|
||||
ADD R10, R10, R2, LSL #2
|
||||
MOV R7, R3
|
||||
INNLOOP2:
|
||||
LDR R11, [R10]
|
||||
SUBS R7, R7 , #2
|
||||
LDRGE R5, [R10, #4]
|
||||
|
||||
MOV R11, R11, ASR R4
|
||||
STR R11, [R10], #4
|
||||
|
||||
MOVGE R5, R5, ASR R4
|
||||
STRGE R5, [R10], #4
|
||||
|
||||
BGT INNLOOP2
|
||||
|
||||
LDR R10, [R9], #4
|
||||
SUBS R6, R6, #1
|
||||
BGT LOOP2
|
||||
|
||||
|
||||
|
||||
|
||||
B ENDRESSCALE
|
||||
|
||||
ELIF1:
|
||||
ADD R5, R1, R5, LSL#2
|
||||
|
||||
MOVS R4, R4
|
||||
BLE ELIF2_2
|
||||
LOOP3:
|
||||
LDR R8, [R5], #4
|
||||
ADD R10, R10, R2, LSL #2
|
||||
ADD R8, R8, R2, LSL #2
|
||||
BICS R7, R3, #1
|
||||
BEQ COUNTODD1
|
||||
INNLOOP3:
|
||||
LDR R11, [R10]
|
||||
LDR R1, [R8]
|
||||
MOV R11, R11, LSL R4
|
||||
MOV R1, R1, LSL R4
|
||||
STR R11, [R10], #4
|
||||
STR R1, [R8], #4
|
||||
|
||||
LDR R11, [R10]
|
||||
LDR R1, [R8]
|
||||
MOV R11, R11, LSL R4
|
||||
MOV R1, R1, LSL R4
|
||||
STR R11, [R10], #4
|
||||
STR R1, [R8], #4
|
||||
|
||||
SUBS R7, R7 , #2
|
||||
BGT INNLOOP3
|
||||
COUNTODD1:
|
||||
BIC R7, R3, #1
|
||||
CMP R7, R3
|
||||
BEQ INNLOOP3END
|
||||
|
||||
LDR R11, [R10]
|
||||
LDR R1, [R8]
|
||||
MOV R11, R11, LSL R4
|
||||
MOV R1, R1, LSL R4
|
||||
STR R11, [R10], #4
|
||||
STR R1, [R8], #4
|
||||
|
||||
|
||||
|
||||
INNLOOP3END:
|
||||
|
||||
|
||||
LDR R10, [R9], #4
|
||||
SUBS R6, R6, #1
|
||||
BGT LOOP3
|
||||
B ENDRESSCALE
|
||||
|
||||
ELIF2_2:
|
||||
RSB R4, R4, #0
|
||||
|
||||
LOOP4:
|
||||
LDR R8, [R5], #4
|
||||
ADD R10, R10, R2, LSL #2
|
||||
ADD R8, R8, R2, LSL #2
|
||||
BICS R7, R3, #1
|
||||
BEQ COUNTODD2
|
||||
INNLOOP4:
|
||||
LDR R11, [R10]
|
||||
LDR R1, [R8]
|
||||
MOV R11, R11, ASR R4
|
||||
MOV R1, R1, ASR R4
|
||||
STR R11, [R10], #4
|
||||
STR R1, [R8], #4
|
||||
|
||||
LDR R11, [R10]
|
||||
LDR R1, [R8]
|
||||
MOV R11, R11, ASR R4
|
||||
MOV R1, R1, ASR R4
|
||||
STR R11, [R10], #4
|
||||
STR R1, [R8], #4
|
||||
|
||||
|
||||
SUBS R7, R7 , #2
|
||||
BGT INNLOOP4
|
||||
COUNTODD2:
|
||||
BIC R7, R3, #1
|
||||
CMP R7, R3
|
||||
BEQ INNLOOP4END
|
||||
|
||||
LDR R11, [R10]
|
||||
LDR R1, [R8]
|
||||
MOV R11, R11, ASR R4
|
||||
MOV R1, R1, ASR R4
|
||||
STR R11, [R10], #4
|
||||
STR R1, [R8], #4
|
||||
|
||||
|
||||
INNLOOP4END:
|
||||
LDR R10, [R9], #4
|
||||
SUBS R6, R6, #1
|
||||
BGT LOOP4
|
||||
|
||||
|
||||
ENDRESSCALE:
|
||||
LDMFD sp!, {r4-r11, r15}
|
||||
|
||||
|
||||
|
||||
855
decoder/armv7/ixheaacd_sbr_imdct_using_fft.s
Normal file
855
decoder/armv7/ixheaacd_sbr_imdct_using_fft.s
Normal file
|
|
@ -0,0 +1,855 @@
|
|||
@/******************************************************************************
|
||||
@ *
|
||||
@ * Copyright (C) 2018 The Android Open Source Project
|
||||
@ *
|
||||
@ * Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ * you may not use this file except in compliance with the License.
|
||||
@ * You may obtain a copy of the License at:
|
||||
@ *
|
||||
@ * http://www.apache.org/licenses/LICENSE-2.0
|
||||
@ *
|
||||
@ * Unless required by applicable law or agreed to in writing, software
|
||||
@ * distributed under the License is distributed on an "AS IS" BASIS,
|
||||
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
@ * See the License for the specific language governing permissions and
|
||||
@ * limitations under the License.
|
||||
@ *
|
||||
@ *****************************************************************************
|
||||
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
@*/
|
||||
|
||||
|
||||
.text
|
||||
.p2align 2
|
||||
|
||||
.global ixheaacd_sbr_imdct_using_fft
|
||||
ixheaacd_sbr_imdct_using_fft:
|
||||
|
||||
STMFD sp!, {r4-r12, lr}
|
||||
VPUSH {D8 - D15}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
LDR r5, [sp, #0x68]
|
||||
LDR r6, [sp, #0x68+4]
|
||||
LDR r7, [sp, #0x68+8]
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
COND_6: CMP r1, #0x10
|
||||
BNE COND_7
|
||||
MOV r8, #1
|
||||
MOV r4, r7
|
||||
B RADIX_4_FIRST_START
|
||||
|
||||
COND_7: CMP r1, #0x20
|
||||
|
||||
MOV r8, #1
|
||||
MOV r4, r7
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
RADIX_8_FIRST_START:
|
||||
|
||||
|
||||
LSR r9 , r1, #5
|
||||
LSL r1, r1, #1
|
||||
|
||||
RADIX_8_FIRST_LOOP:
|
||||
|
||||
MOV r5 , r2
|
||||
MOV r6 , r2
|
||||
MOV r7 , r2
|
||||
MOV r11 , r2
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
LDRB r12, [r4, #0]
|
||||
ADD r5, r5, r12, LSL #3
|
||||
VLD2.32 {d0[0], d2[0]}, [r5] , r1
|
||||
ADD r5, r5, r1
|
||||
VLD2.32 {d8[0], d10[0]}, [r5] , r1
|
||||
SUB r5, r5, r1, LSL #1
|
||||
VLD2.32 {d4[0], d6[0]}, [r5] , r1
|
||||
ADD r5, r5, r1
|
||||
VLD2.32 {d12[0], d14[0]}, [r5], r1
|
||||
SUB r5, r5, r1, LSL #2
|
||||
|
||||
LDRB r12, [r4, #1]
|
||||
ADD r6, r6, r12, LSL #3
|
||||
VLD2.32 {d0[1], d2[1]}, [r6] , r1
|
||||
ADD r6, r6, r1
|
||||
VLD2.32 {d8[1], d10[1]}, [r6] , r1
|
||||
SUB r6, r6, r1, LSL #1
|
||||
VLD2.32 {d4[1], d6[1]}, [r6] , r1
|
||||
ADD r6, r6, r1
|
||||
VLD2.32 {d12[1], d14[1]}, [r6], r1
|
||||
SUB r6, r6, r1, LSL #2
|
||||
|
||||
|
||||
LDRB r12, [r4, #2]
|
||||
ADD r7, r7, r12 , LSL #3
|
||||
VLD2.32 {d1[0], d3[0]}, [r7] , r1
|
||||
ADD r7, r7, r1
|
||||
VLD2.32 {d9[0], d11[0]}, [r7] , r1
|
||||
SUB r7, r7, r1, LSL #1
|
||||
|
||||
LDRB r12, [r4, #3]
|
||||
ADD r11, r11, r12 , LSL #3
|
||||
VLD2.32 {d1[1], d3[1]}, [r11] , r1
|
||||
ADD r11, r11, r1
|
||||
VLD2.32 {d9[1], d11[1]}, [r11] , r1
|
||||
SUB r11, r11, r1, LSL #1
|
||||
|
||||
|
||||
|
||||
VADD.I32 q8, q0, q4
|
||||
VLD2.32 {d5[0], d7[0]}, [r7] , r1
|
||||
ADD r7, r7, r1
|
||||
|
||||
VSUB.I32 q9, q0, q4
|
||||
VLD2.32 {d13[0], d15[0]}, [r7], r1
|
||||
SUB r7, r7, r1, LSL #2
|
||||
|
||||
|
||||
|
||||
|
||||
VADD.I32 q0, q1, q5
|
||||
VLD2.32 {d5[1], d7[1]}, [r11] , r1
|
||||
ADD r11, r11, r1
|
||||
|
||||
VSUB.I32 q4, q1, q5
|
||||
VLD2.32 {d13[1], d15[1]}, [r11], r1
|
||||
SUB r11, r11, r1, LSL #2
|
||||
|
||||
|
||||
|
||||
ADD r4, r4, #4
|
||||
|
||||
ADD r5, r5, r1, LSR #1
|
||||
ADD r6, r6, r1, LSR #1
|
||||
ADD r7, r7, r1, LSR #1
|
||||
ADD r11, r11, r1, LSR #1
|
||||
|
||||
|
||||
VADD.I32 q1, q2, q6
|
||||
VLD2.32 {d28[0], d30[0]}, [r5] , r1
|
||||
|
||||
|
||||
VSUB.I32 q5, q2, q6
|
||||
VLD2.32 {d20[0], d22[0]}, [r5] , r1
|
||||
|
||||
|
||||
VADD.I32 q2, q3, q7
|
||||
VLD2.32 {d24[0], d26[0]}, [r5] , r1
|
||||
|
||||
|
||||
VSUB.I32 q6, q3, q7
|
||||
VLD2.32 {d28[1], d30[1]}, [r6] , r1
|
||||
|
||||
VADD.S32 q3, q9, q6
|
||||
VLD2.32 {d20[1], d22[1]}, [r6] , r1
|
||||
|
||||
VSUB.S32 q7, q9, q6
|
||||
VLD2.32 {d24[1], d26[1]}, [r6] , r1
|
||||
|
||||
VSUB.S32 q6, q4, q5
|
||||
VLD2.32 {d29[0], d31[0]}, [r7] , r1
|
||||
|
||||
VADD.S32 q9, q4, q5
|
||||
VLD2.32 {d21[0], d23[0]}, [r7] , r1
|
||||
|
||||
VADD.S32 q4, q8, q1
|
||||
VLD2.32 {d25[0], d27[0]}, [r7] , r1
|
||||
|
||||
VSUB.S32 q5, q8, q1
|
||||
VLD2.32 {d29[1], d31[1]}, [r11] , r1
|
||||
|
||||
VADD.S32 q8, q0, q2
|
||||
VLD2.32 {d21[1], d23[1]}, [r11] , r1
|
||||
|
||||
VSUB.S32 q0, q0, q2
|
||||
VLD2.32 {d25[1], d27[1]}, [r11] , r1
|
||||
|
||||
|
||||
VPUSH {q3}
|
||||
VPUSH {q7}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
VLD2.32 {d2[0], d4[0]}, [r5], r1
|
||||
|
||||
VADD.I32 q7, q14, q12
|
||||
|
||||
VLD2.32 {d2[1], d4[1]}, [r6] , r1
|
||||
|
||||
VSUB.I32 q3, q14, q12
|
||||
|
||||
VLD2.32 {d3[0], d5[0]}, [r7] , r1
|
||||
|
||||
VADD.I32 q14, q15, q13
|
||||
|
||||
VLD2.32 {d3[1], d5[1]}, [r11] , r1
|
||||
|
||||
VSUB.I32 q12, q15, q13
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
VADD.I32 q15, q10, q1
|
||||
VSUB.I32 q13, q10, q1
|
||||
VADD.I32 q10, q11, q2
|
||||
VSUB.I32 q1, q11, q2
|
||||
|
||||
|
||||
|
||||
VADD.S32 q11, q7, q15
|
||||
VSUB.S32 q2, q7, q15
|
||||
VADD.S32 q7, q14, q10
|
||||
VSUB.S32 q15, q14, q10
|
||||
|
||||
VADD.S32 q14, q3, q12
|
||||
VSUB.S32 q10, q3, q12
|
||||
VADD.S32 q3, q13, q1
|
||||
VSUB.S32 q12, q13, q1
|
||||
|
||||
VADD.S32 q1 , q14, q12
|
||||
VSUB.S32 q13, q14, q12
|
||||
VSUB.S32 q12, q3, q10
|
||||
|
||||
VUZP.16 d2, d3
|
||||
VADD.S32 q14, q3, q10
|
||||
|
||||
VUZP.16 d26, d27
|
||||
VADD.S32 q3, q4, q11
|
||||
|
||||
VUZP.16 d24, d25
|
||||
VSUB.S32 q10, q4, q11
|
||||
|
||||
VUZP.16 d28, d29
|
||||
VADD.S32 q4, q8, q7
|
||||
|
||||
LDR r14, =0x5a82
|
||||
|
||||
VSUB.S32 q11, q8, q7
|
||||
|
||||
VADD.S32 q8, q5, q15
|
||||
VSUB.S32 q7, q5, q15
|
||||
VSUB.S32 q5, q0, q2
|
||||
VADD.S32 q15, q0, q2
|
||||
|
||||
VPOP {q0}
|
||||
VPOP {q2}
|
||||
VPUSH {q3-q4}
|
||||
VPUSH {q10}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
VDUP.16 d20, r14
|
||||
|
||||
|
||||
VMULL.u16 q4, d26, d20
|
||||
VMULL.u16 q3, d28, d20
|
||||
|
||||
VPUSH {q7-q8}
|
||||
VPUSH {q5}
|
||||
|
||||
VSHR.S32 q4, q4, #15
|
||||
VSHR.S32 q3, q3, #15
|
||||
|
||||
VQDMLAL.S16 q4, d27, d20
|
||||
VQDMLAL.S16 q3, d29, d20
|
||||
|
||||
|
||||
VPUSH {q11}
|
||||
|
||||
VMULL.u16 q13, d24, d20
|
||||
VMULL.u16 q14, d2, d20
|
||||
|
||||
VADD.S32 q5, q2, q4
|
||||
VSUB.S32 q7, q2, q4
|
||||
|
||||
VADD.S32 q8, q6, q3
|
||||
VSUB.S32 q6, q6, q3
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
VSHR.S32 q13, q13, #15
|
||||
VSHR.S32 q14, q14, #15
|
||||
|
||||
VQDMLAL.S16 q13, d25, d20
|
||||
VQDMLAL.S16 q14, d3, d20
|
||||
|
||||
VPOP {q1}
|
||||
VPOP {q10}
|
||||
|
||||
VADD.S32 q2, q0, q13
|
||||
VSUB.S32 q4, q0, q13
|
||||
|
||||
VADD.S32 q11, q9, q14
|
||||
VSUB.S32 q3, q9, q14
|
||||
|
||||
|
||||
|
||||
|
||||
VPOP {q14}
|
||||
VPOP {q9}
|
||||
VPOP {q0}
|
||||
VPOP {q12, q13}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
VTRN.32 q12, q5
|
||||
|
||||
VSHL.S32 q12, q12, #1
|
||||
VTRN.32 q9, q2
|
||||
VSHL.S32 q5, q5, #1
|
||||
|
||||
VSHL.S32 q9, q9, #1
|
||||
VTRN.32 q0, q7
|
||||
VSHL.S32 q2, q2, #1
|
||||
|
||||
VSHL.S32 q0, q0, #1
|
||||
VTRN.32 q14, q4
|
||||
VSHL.S32 q7, q7, #1
|
||||
|
||||
VSHL.S32 q14, q14, #1
|
||||
VTRN.32 q13, q6
|
||||
VSHL.S32 q4, q4, #1
|
||||
|
||||
VSHL.S32 q13, q13, #1
|
||||
VTRN.32 q10, q3
|
||||
VSHL.S32 q6, q6, #1
|
||||
|
||||
VSHL.S32 q10, q10, #1
|
||||
VTRN.32 q1, q8
|
||||
VSHL.S32 q3, q3, #1
|
||||
|
||||
VSHL.S32 q1, q1, #1
|
||||
VTRN.32 q15, q11
|
||||
VSHL.S32 q8, q8, #1
|
||||
|
||||
VSHL.S32 q15, q15, #1
|
||||
VSWP d18, d25
|
||||
|
||||
VSHL.S32 q11, q11, #1
|
||||
VSWP d4, d11
|
||||
|
||||
VSWP d1, d28
|
||||
VSWP d15, d8
|
||||
|
||||
VSWP d20, d27
|
||||
VSWP d6, d13
|
||||
|
||||
VSWP d30, d3
|
||||
VSWP d22, d17
|
||||
|
||||
VST2.32 {q12, q13}, [r3]!
|
||||
VST2.32 {q0, q1}, [r3]!
|
||||
|
||||
VST2.32 {q5, q6}, [r3]!
|
||||
VST2.32 {q7, q8}, [r3]!
|
||||
|
||||
VMOV q5, q11
|
||||
|
||||
VST2.32 {q9, q10}, [r3]!
|
||||
VST2.32 {q14, q15}, [r3]!
|
||||
|
||||
VST2.32 {q2, q3}, [r3]!
|
||||
VST2.32 {q4, q5}, [r3]!
|
||||
|
||||
|
||||
SUBS r9, r9, #1
|
||||
BNE RADIX_8_FIRST_LOOP
|
||||
|
||||
LSR r1, r1, #1
|
||||
SUB r3, r1, LSL #3
|
||||
|
||||
MOV r5, #8
|
||||
MOV r4, #32
|
||||
LSR r6, r1, #5
|
||||
|
||||
B RADIX_4_FIRST_ENDS
|
||||
|
||||
RADIX_8_FIRST_ENDS:
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
RADIX_4_FIRST_START:
|
||||
|
||||
|
||||
LSR r9 , r1, #4
|
||||
LSL r1, r1, #1
|
||||
|
||||
RADIX_4_LOOP:
|
||||
|
||||
MOV r5 , r2
|
||||
MOV r6 , r2
|
||||
MOV r7 , r2
|
||||
MOV r11 , r2
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
LDRB r12, [r4, #0]
|
||||
ADD r5, r5, r12, LSL #3
|
||||
|
||||
VLD2.32 {d0[0], d2[0]}, [r5] , r1
|
||||
ADD r5, r5, r1
|
||||
VLD2.32 {d8[0], d10[0]}, [r5] , r1
|
||||
SUB r5, r5, r1, LSL #1
|
||||
VLD2.32 {d4[0], d6[0]}, [r5] , r1
|
||||
ADD r5, r5, r1
|
||||
VLD2.32 {d12[0], d14[0]}, [r5], r1
|
||||
|
||||
LDRB r12, [r4, #1]
|
||||
ADD r6, r6, r12, LSL #3
|
||||
|
||||
VLD2.32 {d0[1], d2[1]}, [r6] , r1
|
||||
ADD r6, r6, r1
|
||||
VLD2.32 {d8[1], d10[1]}, [r6] , r1
|
||||
SUB r6, r6, r1, LSL #1
|
||||
VLD2.32 {d4[1], d6[1]}, [r6] , r1
|
||||
ADD r6, r6, r1
|
||||
VLD2.32 {d12[1], d14[1]}, [r6], r1
|
||||
|
||||
|
||||
LDRB r12, [r4, #2]
|
||||
ADD r7, r7, r12, LSL #3
|
||||
|
||||
VLD2.32 {d1[0], d3[0]}, [r7] , r1
|
||||
ADD r7, r7, r1
|
||||
VLD2.32 {d9[0], d11[0]}, [r7] , r1
|
||||
|
||||
LDRB r12, [r4, #3]
|
||||
ADD r11, r11, r12 , LSL #3
|
||||
|
||||
VLD2.32 {d1[1], d3[1]}, [r11] , r1
|
||||
ADD r11, r11, r1
|
||||
VLD2.32 {d9[1], d11[1]}, [r11] , r1
|
||||
|
||||
|
||||
SUB r7, r7, r1, LSL #1
|
||||
VADD.S32 q8, q0, q4
|
||||
VLD2.32 {d5[0], d7[0]}, [r7] , r1
|
||||
ADD r7, r7, r1
|
||||
VADD.S32 q9, q1, q5
|
||||
VLD2.32 {d13[0], d15[0]}, [r7], r1
|
||||
|
||||
|
||||
|
||||
SUB r11, r11, r1, LSL #1
|
||||
VSUB.S32 q10, q0, q4
|
||||
VLD2.32 {d5[1], d7[1]}, [r11] , r1
|
||||
ADD r11, r11, r1
|
||||
VSUB.S32 q11, q1, q5
|
||||
VLD2.32 {d13[1], d15[1]}, [r11], r1
|
||||
|
||||
|
||||
ADD r4, r4, #4
|
||||
|
||||
VADD.S32 q12, q2, q6
|
||||
VADD.S32 q13, q3, q7
|
||||
VSUB.S32 q14, q2, q6
|
||||
VSUB.S32 q15, q3, q7
|
||||
|
||||
VADD.S32 q0, q8, q12
|
||||
VADD.S32 q1, q9, q13
|
||||
VSUB.S32 q2, q8, q12
|
||||
VSUB.S32 q3, q9, q13
|
||||
|
||||
VADD.S32 q4, q10, q15
|
||||
VSUB.S32 q5, q11, q14
|
||||
VADD.S32 q7, q11, q14
|
||||
VSUB.S32 q6, q10, q15
|
||||
|
||||
|
||||
|
||||
|
||||
VTRN.32 q0, q4
|
||||
|
||||
VSHL.S32 q0, q0, #1
|
||||
VTRN.32 q2, q6
|
||||
VSHL.S32 q4, q4, #1
|
||||
|
||||
VSHL.S32 q2, q2, #1
|
||||
VTRN.32 q1, q5
|
||||
VSHL.S32 q6, q6, #1
|
||||
|
||||
VSHL.S32 q1, q1, #1
|
||||
VTRN.32 q3, q7
|
||||
VSHL.S32 q5, q5, #1
|
||||
|
||||
VSHL.S32 q3, q3, #1
|
||||
VSWP d4, d1
|
||||
|
||||
VSHL.S32 q7, q7, #1
|
||||
VSWP d12, d9
|
||||
|
||||
|
||||
|
||||
VSWP d6, d3
|
||||
VSWP d14, d11
|
||||
|
||||
|
||||
VST2.32 {q0, q1}, [r3]!
|
||||
VST2.32 {q4, q5}, [r3]!
|
||||
|
||||
VST2.32 {q2, q3}, [r3]!
|
||||
VST2.32 {q6, q7}, [r3]!
|
||||
|
||||
|
||||
|
||||
SUBS r9, r9, #1
|
||||
BNE RADIX_4_LOOP
|
||||
|
||||
LSR r1, r1, #1
|
||||
SUB r3, r1, LSL #3
|
||||
MOV r5, #4
|
||||
MOV r4, #64
|
||||
LSR r6, r1, #4
|
||||
|
||||
|
||||
RADIX_4_FIRST_ENDS:
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
PUSH {r3}
|
||||
|
||||
LSR r5, r5, #2
|
||||
|
||||
OUTER_LOOP_R4:
|
||||
|
||||
LDR r14, [sp]
|
||||
|
||||
|
||||
MOV r7, r5
|
||||
MOV r2, #0
|
||||
MOV r9, r0
|
||||
LSL r12 , r5, #5
|
||||
MIDDLE_LOOP_R4:
|
||||
|
||||
|
||||
VLD2.16 {d0[0], d1[0]}, [r9], r2
|
||||
VLD2.16 {d2[0], d3[0]}, [r9], r2
|
||||
ADD r11, r2, r4, LSL #2
|
||||
VLD2.16 {d4[0], d5[0]}, [r9]
|
||||
ADD r10, r0, r11
|
||||
|
||||
|
||||
VLD2.16 {d0[1], d1[1]}, [r10], r11
|
||||
VLD2.16 {d2[1], d3[1]}, [r10], r11
|
||||
ADD r2, r11, r4, LSL #2
|
||||
VLD2.16 {d4[1], d5[1]}, [r10]
|
||||
ADD r9, r0, r2
|
||||
|
||||
|
||||
VLD2.16 {d0[2], d1[2]}, [r9], r2
|
||||
VLD2.16 {d2[2], d3[2]}, [r9], r2
|
||||
ADD r11, r2, r4, LSL #2
|
||||
VLD2.16 {d4[2], d5[2]}, [r9]
|
||||
ADD r10, r0, r11
|
||||
|
||||
|
||||
|
||||
VLD2.16 {d0[3], d1[3]}, [r10], r11
|
||||
VLD2.16 {d2[3], d3[3]}, [r10], r11
|
||||
ADD r2, r11, r4, LSL #2
|
||||
VLD2.16 {d4[3], d5[3]}, [r10]
|
||||
ADD r9, r0, r2
|
||||
|
||||
MOV r10, r6
|
||||
|
||||
|
||||
|
||||
INNER_LOOP_R4:
|
||||
|
||||
VLD2.32 {q3, q4}, [r14], r12
|
||||
|
||||
VSHR.S32 q3, q3, #1
|
||||
VLD4.16 {q5, q6}, [r14], r12
|
||||
VSHR.S32 q4, q4, #1
|
||||
|
||||
VSHR.U16 d10, d10, #1
|
||||
VLD4.16 {q7, q8}, [r14], r12
|
||||
VSHR.U16 d12, d12, #1
|
||||
|
||||
VMULL.S16 q11, d10, d0
|
||||
VMLSL.S16 q11, d12, d1
|
||||
VLD4.16 {q9, q10}, [r14], r12
|
||||
VMULL.S16 q12, d10, d1
|
||||
VMLAL.S16 q12, d12, d0
|
||||
|
||||
VSHR.U16 d14, d14, #1
|
||||
VSHR.U16 d16, d16, #1
|
||||
|
||||
SUB r14, r14, r12, LSL #2
|
||||
|
||||
VSHR.U16 d18, d18, #1
|
||||
VSHR.U16 d20, d20, #1
|
||||
|
||||
VMULL.S16 q13, d14, d2
|
||||
VMLSL.S16 q13, d16, d3
|
||||
|
||||
VSHR.S32 q11, q11, #15
|
||||
|
||||
VMULL.S16 q14, d14, d3
|
||||
VMLAL.S16 q14, d16, d2
|
||||
|
||||
VMULL.S16 q15, d18, d4
|
||||
VMLSL.S16 q15, d20, d5
|
||||
|
||||
VMLAL.S16 q11, d11, d0
|
||||
VMLSL.S16 q11, d13, d1
|
||||
|
||||
VSHR.S32 q12, q12, #15
|
||||
VSHR.S32 q13, q13, #15
|
||||
VSHR.S32 q14, q14, #15
|
||||
VSHR.S32 q15, q15, #15
|
||||
|
||||
|
||||
VMLAL.S16 q12, d11, d1
|
||||
VMLAL.S16 q12, d13, d0
|
||||
|
||||
|
||||
VMULL.S16 q5, d18, d5
|
||||
VMLAL.S16 q5, d20, d4
|
||||
|
||||
|
||||
VMLAL.S16 q13, d15, d2
|
||||
VMLSL.S16 q13, d17, d3
|
||||
|
||||
VMLAL.S16 q14, d15, d3
|
||||
VMLAL.S16 q14, d17, d2
|
||||
|
||||
|
||||
VMLAL.S16 q15, d19, d4
|
||||
VMLSL.S16 q15, d21, d5
|
||||
|
||||
VSHR.S32 q5, q5, #15
|
||||
|
||||
VMLAL.S16 q5, d19, d5
|
||||
VMLAL.S16 q5, d21, d4
|
||||
|
||||
|
||||
|
||||
CMP r7, r5
|
||||
BNE BYPASS_IF
|
||||
|
||||
ADD r14, r14, r12
|
||||
|
||||
LDR r3, [r14], r12
|
||||
ASR r3, r3, #1
|
||||
VMOV.S32 d22[0], r3
|
||||
|
||||
LDR r3, [r14], r12
|
||||
ASR r3, r3, #1
|
||||
VMOV.S32 d26[0], r3
|
||||
|
||||
LDR r3, [r14]
|
||||
ASR r3, r3, #1
|
||||
VMOV.S32 d30[0], r3
|
||||
|
||||
SUB r14, r14, r12, LSL #1
|
||||
ADD r14, r14, #4
|
||||
|
||||
LDR r3, [r14], r12
|
||||
ASR r3, r3, #1
|
||||
VMOV.S32 d24[0], r3
|
||||
|
||||
LDR r3, [r14], r12
|
||||
ASR r3, r3, #1
|
||||
VMOV.S32 d28[0], r3
|
||||
|
||||
LDR r3, [r14], r12
|
||||
ASR r3, r3, #1
|
||||
VMOV.S32 d10[0], r3
|
||||
|
||||
SUB r14, r14, #4
|
||||
|
||||
SUB r14, r14, r12, LSL #2
|
||||
|
||||
BYPASS_IF:
|
||||
|
||||
VADD.S32 q6, q3, q13
|
||||
VADD.S32 q7, q4, q14
|
||||
VSUB.S32 q3, q3, q13
|
||||
VSUB.S32 q4, q4, q14
|
||||
VADD.S32 q8, q11, q15
|
||||
VADD.S32 q9, q12, q5
|
||||
|
||||
VSUB.S32 q15, q11, q15
|
||||
VSUB.S32 q14, q12, q5
|
||||
|
||||
|
||||
VADD.S32 q10, q6, q8
|
||||
VADD.S32 q11, q7, q9
|
||||
VADD.S32 q12, q3, q14
|
||||
VSUB.S32 q13, q4, q15
|
||||
|
||||
VSUB.S32 q6, q6, q8
|
||||
VST2.32 {q10, q11}, [r14], r12
|
||||
VSUB.S32 q7, q7, q9
|
||||
|
||||
VSUB.S32 q8, q3, q14
|
||||
VST2.32 {q12, q13}, [r14], r12
|
||||
VADD.S32 q9, q4, q15
|
||||
|
||||
|
||||
VST2.32 {q6, q7}, [r14], r12
|
||||
VST2.32 {q8, q9}, [r14], r12
|
||||
|
||||
|
||||
|
||||
|
||||
SUBS r10, r10, #1
|
||||
BNE INNER_LOOP_R4
|
||||
|
||||
SUB r14, r14, r1, LSL #3
|
||||
ADD r14, r14, #32
|
||||
|
||||
SUBS r7, r7, #1
|
||||
BNE MIDDLE_LOOP_R4
|
||||
|
||||
|
||||
|
||||
|
||||
LSR r4, r4, #2
|
||||
LSL r5, r5, #2
|
||||
LSR r6, r6, #2
|
||||
SUBS r8, r8, #1
|
||||
BNE OUTER_LOOP_R4
|
||||
END_LOOPS:
|
||||
POP {r3}
|
||||
VPOP {D8 - D15}
|
||||
LDMFD sp!, {r4-r12, pc}
|
||||
|
||||
265
decoder/armv7/ixheaacd_sbr_qmfanal32_winadds.s
Normal file
265
decoder/armv7/ixheaacd_sbr_qmfanal32_winadds.s
Normal file
|
|
@ -0,0 +1,265 @@
|
|||
@/******************************************************************************
|
||||
@ *
|
||||
@ * Copyright (C) 2018 The Android Open Source Project
|
||||
@ *
|
||||
@ * Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ * you may not use this file except in compliance with the License.
|
||||
@ * You may obtain a copy of the License at:
|
||||
@ *
|
||||
@ * http://www.apache.org/licenses/LICENSE-2.0
|
||||
@ *
|
||||
@ * Unless required by applicable law or agreed to in writing, software
|
||||
@ * distributed under the License is distributed on an "AS IS" BASIS,
|
||||
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
@ * See the License for the specific language governing permissions and
|
||||
@ * limitations under the License.
|
||||
@ *
|
||||
@ *****************************************************************************
|
||||
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
@*/
|
||||
|
||||
|
||||
.text
|
||||
.p2align 2
|
||||
.global ixheaacd_sbr_qmfanal32_winadds
|
||||
|
||||
ixheaacd_sbr_qmfanal32_winadds:
|
||||
|
||||
STMFD sp!, {R4-R12, R14}
|
||||
VPUSH {D8 - D15}
|
||||
LDR R5, [SP, #108]
|
||||
LDR R6, [SP, #112]
|
||||
LDR R7, [SP, #116]
|
||||
|
||||
MOV R9, R7, LSL #1
|
||||
|
||||
ADD r5, r5, #64
|
||||
MOV r10, #3
|
||||
|
||||
LOOP:
|
||||
LDRSH r4 , [R6], r9
|
||||
LDRSH r8 , [R6], r9
|
||||
LDRSH r11 , [R6], r9
|
||||
LDRSH r12 , [R6], r9
|
||||
|
||||
|
||||
STRH r4 , [r5 , #-2]!
|
||||
STRH r8 , [r5 , #-2]!
|
||||
STRH r11 , [r5 , #-2]!
|
||||
STRH r12 , [r5 , #-2]!
|
||||
|
||||
LDRSH r4 , [R6], r9
|
||||
LDRSH r8 , [R6], r9
|
||||
LDRSH r11 , [R6], r9
|
||||
LDRSH r12 , [R6], r9
|
||||
|
||||
|
||||
STRH r4 , [r5 , #-2]!
|
||||
STRH r8 , [r5 , #-2]!
|
||||
STRH r11 , [r5 , #-2]!
|
||||
STRH r12 , [r5 , #-2]!
|
||||
|
||||
|
||||
SUBS r10, r10, #1
|
||||
|
||||
BPL LOOP
|
||||
|
||||
LDR R4, [SP, #104]
|
||||
|
||||
MOV R5, #8
|
||||
VLD1.16 D0, [R0]!
|
||||
MOV R6, #64
|
||||
|
||||
MOV R6, R6, LSL #1
|
||||
VLD2.16 {D1, D2}, [R2]!
|
||||
MOV R7, #244
|
||||
|
||||
MOV R9, R0
|
||||
ADD R0, R0, #120
|
||||
|
||||
MOV R11, R4
|
||||
VLD1.16 D2, [R0], R6
|
||||
ADD R11, R11, #128
|
||||
|
||||
|
||||
|
||||
|
||||
MOV R10, R2
|
||||
ADD R2, R2, #240
|
||||
|
||||
VMULL.S16 Q15, D0, D1
|
||||
VLD2.16 {D3, D4}, [R2]!
|
||||
ADD R2, R2, #240
|
||||
|
||||
|
||||
VLD1.16 D4, [R0], R6
|
||||
VMLAL.S16 Q15, D2, D3
|
||||
|
||||
VLD2.16 {D5, D6}, [R2]!
|
||||
|
||||
|
||||
ADD R2, R2, #240
|
||||
VLD1.16 D6, [R0], R6
|
||||
VMLAL.S16 Q15, D4, D5
|
||||
|
||||
VLD2.16 {D7, D8}, [R2]!
|
||||
|
||||
|
||||
ADD R2, R2, #240
|
||||
VLD1.16 D8, [R0], R6
|
||||
VMLAL.S16 Q15, D6, D7
|
||||
|
||||
MOV R0, R9
|
||||
VLD2.16 {D9, D10}, [R2]!
|
||||
|
||||
|
||||
ADD R2, R2, #240
|
||||
VLD1.16 D10, [R1]!
|
||||
VMLAL.S16 Q15, D8, D9
|
||||
|
||||
|
||||
|
||||
MOV R9, R1
|
||||
VLD2.16 {D11, D12}, [R3]!
|
||||
ADD R1, R1, #120
|
||||
|
||||
|
||||
MOV R2, R10
|
||||
VLD1.16 D12, [R1], R6
|
||||
MOV R10, R3
|
||||
|
||||
ADD R3, R3, #240
|
||||
VLD2.16 {D13, D14}, [R3]!
|
||||
ADD R3, R3, #240
|
||||
|
||||
|
||||
VLD2.16 {D15, D16}, [R3]!
|
||||
|
||||
VLD1.16 D14, [R1], R6
|
||||
ADD R3, R3, #240
|
||||
|
||||
|
||||
|
||||
VLD1.16 D16, [R1], R6
|
||||
SUB R5, R5, #1
|
||||
|
||||
VLD2.16 {D17, D18}, [R3]!
|
||||
|
||||
|
||||
ADD R3, R3, #240
|
||||
VLD1.16 D18, [R1], R6
|
||||
|
||||
MOV R1, R9
|
||||
VLD2.16 {D19, D20}, [R3]!
|
||||
|
||||
ADD R3, R3, #240
|
||||
|
||||
MOV R3, R10
|
||||
|
||||
|
||||
LOOP_1:
|
||||
|
||||
|
||||
VLD1.16 D0, [R0]!
|
||||
|
||||
MOV R9, R0
|
||||
VLD2.16 {D1, D2}, [R2]!
|
||||
ADD R0, R0, #120
|
||||
|
||||
MOV R10, R2
|
||||
VST1.32 {Q15}, [R4]!
|
||||
ADD R2, R2, #240
|
||||
|
||||
|
||||
VMULL.S16 Q15, D10, D11
|
||||
VLD1.16 D2, [R0], R6
|
||||
VMLAL.S16 Q15, D12, D13
|
||||
|
||||
VMLAL.S16 Q15, D14, D15
|
||||
VLD2.16 {D3, D4}, [R2]!
|
||||
VMLAL.S16 Q15, D16, D17
|
||||
|
||||
VMLAL.S16 Q15, D18, D19
|
||||
VLD1.16 D4, [R0], R6
|
||||
ADD R2, R2, #240
|
||||
|
||||
VST1.32 {Q15}, [R11]!
|
||||
|
||||
|
||||
VMULL.S16 Q15, D0, D1
|
||||
VLD2.16 {D5, D6}, [R2]!
|
||||
VMLAL.S16 Q15, D2, D3
|
||||
|
||||
|
||||
|
||||
ADD R2, R2, #240
|
||||
VLD1.16 D6, [R0], R6
|
||||
VMLAL.S16 Q15, D4, D5
|
||||
|
||||
VLD2.16 {D7, D8}, [R2]!
|
||||
|
||||
|
||||
ADD R2, R2, #240
|
||||
VLD1.16 D8, [R0], R6
|
||||
VMLAL.S16 Q15, D6, D7
|
||||
|
||||
MOV R0, R9
|
||||
VLD2.16 {D9, D10}, [R2]!
|
||||
|
||||
|
||||
|
||||
ADD R2, R2, #240
|
||||
VLD1.16 D10, [R1]!
|
||||
MOV R2, R10
|
||||
|
||||
MOV R9, R1
|
||||
VLD2.16 {D11, D12}, [R3]!
|
||||
ADD R1, R1, #120
|
||||
|
||||
|
||||
VMLAL.S16 Q15, D8, D9
|
||||
VLD1.16 D12, [R1], R6
|
||||
MOV R10, R3
|
||||
|
||||
|
||||
ADD R3, R3, #240
|
||||
VLD2.16 {D13, D14}, [R3]!
|
||||
ADD R3, R3, #240
|
||||
|
||||
|
||||
|
||||
VLD1.16 D14, [R1], R6
|
||||
VLD2.16 {D15, D16}, [R3]!
|
||||
ADD R3, R3, #240
|
||||
|
||||
|
||||
VLD1.16 D16, [R1], R6
|
||||
VLD2.16 {D17, D18}, [R3]!
|
||||
ADD R3, R3, #240
|
||||
|
||||
|
||||
VLD1.16 D18, [R1], R6
|
||||
SUBS R5, R5, #1
|
||||
|
||||
MOV R1, R9
|
||||
VLD2.16 {D19, D20}, [R3]!
|
||||
|
||||
ADD R3, R3, #240
|
||||
|
||||
MOV R3, R10
|
||||
|
||||
BGT LOOP_1
|
||||
|
||||
VST1.32 {Q15}, [R4]!
|
||||
VMULL.S16 Q15, D10, D11
|
||||
VMLAL.S16 Q15, D12, D13
|
||||
|
||||
VMLAL.S16 Q15, D14, D15
|
||||
VMLAL.S16 Q15, D16, D17
|
||||
VMLAL.S16 Q15, D18, D19
|
||||
|
||||
VST1.32 {Q15}, [R11]!
|
||||
|
||||
VPOP {D8 - D15}
|
||||
LDMFD sp!, {R4-R12, R15}
|
||||
|
||||
245
decoder/armv7/ixheaacd_sbr_qmfanal32_winadds_eld.s
Normal file
245
decoder/armv7/ixheaacd_sbr_qmfanal32_winadds_eld.s
Normal file
|
|
@ -0,0 +1,245 @@
|
|||
.text
|
||||
.p2align 2
|
||||
.global ixheaacd_sbr_qmfanal32_winadds_eld
|
||||
|
||||
ixheaacd_sbr_qmfanal32_winadds_eld:
|
||||
|
||||
STMFD sp!, {R4-R12, R14}
|
||||
LDR R5, [SP, #44] @filterStates
|
||||
LDR R6, [SP, #48] @timeIn
|
||||
LDR R7, [SP, #52] @stride
|
||||
|
||||
MOV R9, R7, LSL #1
|
||||
|
||||
ADD r5, r5, #64
|
||||
MOV r10, #3
|
||||
|
||||
LOOP:
|
||||
LDRSH r4 , [R6], r9
|
||||
LDRSH r8 , [R6], r9
|
||||
LDRSH r11 , [R6], r9
|
||||
LDRSH r12 , [R6], r9
|
||||
|
||||
|
||||
STRH r4 , [r5 , #-2]!
|
||||
STRH r8 , [r5 , #-2]!
|
||||
STRH r11 , [r5 , #-2]!
|
||||
STRH r12 , [r5 , #-2]!
|
||||
|
||||
LDRSH r4 , [R6], r9
|
||||
LDRSH r8 , [R6], r9
|
||||
LDRSH r11 , [R6], r9
|
||||
LDRSH r12 , [R6], r9
|
||||
|
||||
|
||||
STRH r4 , [r5 , #-2]!
|
||||
STRH r8 , [r5 , #-2]!
|
||||
STRH r11 , [r5 , #-2]!
|
||||
STRH r12 , [r5 , #-2]!
|
||||
|
||||
|
||||
SUBS r10, r10, #1
|
||||
|
||||
BPL LOOP
|
||||
|
||||
LDR R4, [SP, #40] @winAdd
|
||||
|
||||
MOV R5, #8
|
||||
VLD1.16 D0, [R0]! @tmpQ1[n + 0] load and incremented R0 by 8
|
||||
|
||||
MOV R6, #64
|
||||
MOV R6, R6, LSL #1 @
|
||||
VLD1.16 {D1, D2}, [R2]! @ tmpQmf_c1[2*(n + 0)] load and incremented
|
||||
|
||||
MOV R7, #244 @ NOT USED further
|
||||
|
||||
MOV R9, R0
|
||||
ADD R0, R0, #120 @ incrementing R0 by 120 + 8 = 128
|
||||
|
||||
MOV R11, R4 @ Mov winAdd to R11
|
||||
VLD1.16 D2, [R0], R6 @ tmpQ1[n + 64] load and incremented by R6
|
||||
ADD R11, R11, #128 @ increment winAdd by 128
|
||||
|
||||
|
||||
MOV R10, R2 @
|
||||
ADD R2, R2, #112 @ This should be 240 --> 112
|
||||
|
||||
VMULL.S16 Q15, D0, D1
|
||||
VLD1.16 {D3, D4}, [R2]! @ tmpQmf_c1[2*(n + 64)] load and incremented
|
||||
ADD R2, R2, #112 @ This should be 112
|
||||
|
||||
|
||||
VLD1.16 D4, [R0], R6 @ tmpQ1[n + 128] load and incremented by R6
|
||||
VMLAL.S16 Q15, D2, D3
|
||||
|
||||
VLD1.16 {D5, D6}, [R2]! @ tmpQmf_c1[2*(n + 128)] load and incremented
|
||||
SUB R10, R10, #8
|
||||
|
||||
|
||||
ADD R2, R2, #112 @ This should be 112
|
||||
VLD1.16 D6, [R0], R6 @ tmpQ1[n + 192] load and incremented by R6
|
||||
VMLAL.S16 Q15, D4, D5
|
||||
|
||||
VLD1.16 {D7, D8}, [R2]! @ tmpQmf_c1[2*(n + 192)] load and incremented
|
||||
|
||||
|
||||
ADD R2, R2, #112 @ This should be 112
|
||||
VLD1.16 D8, [R0], R6 @ tmpQ1[n + 256] load and incremented by R6
|
||||
VMLAL.S16 Q15, D6, D7
|
||||
|
||||
MOV R0, R9
|
||||
VLD1.16 {D9, D10}, [R2]! @ tmpQmf_c1[2*(n + 256)] load and incremented
|
||||
|
||||
|
||||
ADD R2, R2, #112 @ This should be 112
|
||||
VLD1.16 D10, [R1]! @ tmpQ2[n + 0] load and incremented
|
||||
VMLAL.S16 Q15, D8, D9
|
||||
|
||||
|
||||
|
||||
MOV R9, R1
|
||||
VLD1.16 {D11, D12}, [R3]! @ tmpQmf_c2[2*(n + 0)] load and incremented
|
||||
ADD R1, R1, #120 @ incrementing R1 by 120 + 8 = 128
|
||||
|
||||
|
||||
MOV R2, R10 @
|
||||
VLD1.16 D12, [R1], R6 @ tmpQ2[n + 64] load and incremented by R6
|
||||
MOV R10, R3
|
||||
|
||||
ADD R3, R3, #112 @ This sholud be 112
|
||||
VLD1.16 {D13, D14}, [R3]! @ tmpQmf_c2[2*(n + 64)] load and incremented
|
||||
ADD R3, R3, #112 @ This sholud be 112
|
||||
|
||||
|
||||
VLD1.16 {D15, D16}, [R3]! @ tmpQmf_c2[2*(n + 128)] load and incremented
|
||||
|
||||
SUB R10, R10, #8
|
||||
|
||||
VLD1.16 D14, [R1], R6
|
||||
ADD R3, R3, #112 @ This should be 112
|
||||
|
||||
|
||||
|
||||
VLD1.16 D16, [R1], R6
|
||||
SUB R5, R5, #1
|
||||
|
||||
VLD1.16 {D17, D18}, [R3]! @ tmpQmf_c2[2*(n + 192)] load and incremented
|
||||
|
||||
|
||||
ADD R3, R3, #112 @ This should be 112
|
||||
VLD1.16 D18, [R1], R6
|
||||
|
||||
MOV R1, R9
|
||||
VLD1.16 {D19, D20}, [R3]! @ tmpQmf_c2[2*(n + 256)] load and incremented
|
||||
|
||||
ADD R3, R3, #112 @ This should be 112
|
||||
|
||||
MOV R3, R10
|
||||
|
||||
|
||||
LOOP_1:
|
||||
|
||||
|
||||
VLD1.16 D0, [R0]!
|
||||
|
||||
MOV R9, R0
|
||||
VLD1.16 {D1, D2}, [R2]!
|
||||
ADD R0, R0, #120
|
||||
|
||||
MOV R10, R2
|
||||
VST1.32 {Q15}, [R4]!
|
||||
ADD R2, R2, #112 @ This should be 112
|
||||
|
||||
|
||||
VMULL.S16 Q15, D10, D11
|
||||
VLD1.16 D2, [R0], R6
|
||||
VMLAL.S16 Q15, D12, D13
|
||||
|
||||
VMLAL.S16 Q15, D14, D15
|
||||
VLD1.16 {D3, D4}, [R2]!
|
||||
VMLAL.S16 Q15, D16, D17
|
||||
|
||||
VMLAL.S16 Q15, D18, D19
|
||||
VLD1.16 D4, [R0], R6
|
||||
ADD R2, R2, #112 @ This should be 112
|
||||
|
||||
VST1.32 {Q15}, [R11]!
|
||||
SUB R10, R10, #8
|
||||
|
||||
|
||||
VMULL.S16 Q15, D0, D1
|
||||
VLD1.16 {D5, D6}, [R2]!
|
||||
VMLAL.S16 Q15, D2, D3
|
||||
|
||||
|
||||
|
||||
ADD R2, R2, #112 @ This should be 112
|
||||
VLD1.16 D6, [R0], R6
|
||||
VMLAL.S16 Q15, D4, D5
|
||||
|
||||
VLD1.16 {D7, D8}, [R2]!
|
||||
|
||||
|
||||
ADD R2, R2, #112 @ This should be 112
|
||||
VLD1.16 D8, [R0], R6
|
||||
VMLAL.S16 Q15, D6, D7
|
||||
|
||||
MOV R0, R9
|
||||
VLD1.16 {D9, D10}, [R2]!
|
||||
|
||||
|
||||
|
||||
ADD R2, R2, #112 @ This should be 112
|
||||
VLD1.16 D10, [R1]!
|
||||
MOV R2, R10
|
||||
|
||||
MOV R9, R1
|
||||
VLD1.16 {D11, D12}, [R3]!
|
||||
ADD R1, R1, #120
|
||||
|
||||
|
||||
VMLAL.S16 Q15, D8, D9
|
||||
VLD1.16 D12, [R1], R6
|
||||
MOV R10, R3
|
||||
|
||||
|
||||
ADD R3, R3, #112 @ This should be 112
|
||||
VLD1.16 {D13, D14}, [R3]!
|
||||
ADD R3, R3, #112 @ This should be 112
|
||||
|
||||
|
||||
|
||||
VLD1.16 D14, [R1], R6
|
||||
SUB R10, R10, #8
|
||||
VLD1.16 {D15, D16}, [R3]!
|
||||
ADD R3, R3, #112 @ This should be 112
|
||||
|
||||
|
||||
VLD1.16 D16, [R1], R6
|
||||
VLD1.16 {D17, D18}, [R3]!
|
||||
ADD R3, R3, #112 @ This should be 112
|
||||
|
||||
|
||||
VLD1.16 D18, [R1], R6
|
||||
SUBS R5, R5, #1
|
||||
|
||||
MOV R1, R9
|
||||
VLD1.16 {D19, D20}, [R3]!
|
||||
|
||||
ADD R3, R3, #112 @ This should be 112
|
||||
|
||||
MOV R3, R10
|
||||
|
||||
BGT LOOP_1
|
||||
|
||||
VST1.32 {Q15}, [R4]!
|
||||
VMULL.S16 Q15, D10, D11
|
||||
VMLAL.S16 Q15, D12, D13
|
||||
|
||||
VMLAL.S16 Q15, D14, D15
|
||||
VMLAL.S16 Q15, D16, D17
|
||||
VMLAL.S16 Q15, D18, D19
|
||||
|
||||
VST1.32 {Q15}, [R11]!
|
||||
|
||||
LDMFD sp!, {R4-R12, R15}
|
||||
379
decoder/armv7/ixheaacd_sbr_qmfsyn64_winadd.s
Normal file
379
decoder/armv7/ixheaacd_sbr_qmfsyn64_winadd.s
Normal file
|
|
@ -0,0 +1,379 @@
|
|||
@/******************************************************************************
|
||||
@ *
|
||||
@ * Copyright (C) 2018 The Android Open Source Project
|
||||
@ *
|
||||
@ * Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ * you may not use this file except in compliance with the License.
|
||||
@ * You may obtain a copy of the License at:
|
||||
@ *
|
||||
@ * http://www.apache.org/licenses/LICENSE-2.0
|
||||
@ *
|
||||
@ * Unless required by applicable law or agreed to in writing, software
|
||||
@ * distributed under the License is distributed on an "AS IS" BASIS,
|
||||
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
@ * See the License for the specific language governing permissions and
|
||||
@ * limitations under the License.
|
||||
@ *
|
||||
@ *****************************************************************************
|
||||
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
@*/
|
||||
|
||||
|
||||
.text
|
||||
.p2align 2
|
||||
.global ixheaacd_sbr_qmfsyn64_winadd
|
||||
|
||||
ixheaacd_sbr_qmfsyn64_winadd:
|
||||
|
||||
STMFD sp!, {R4-R12, R14}
|
||||
VPUSH {D8- D15}
|
||||
LDR R4, [SP, #104]
|
||||
LDR R5, [SP, #108]
|
||||
|
||||
MOV R7, #0x8000
|
||||
VLD1.16 D0, [R0]!
|
||||
MOV R12, R2
|
||||
|
||||
VDUP.32 Q15, R7
|
||||
VLD1.16 D1, [R2]!
|
||||
VDUP.32 Q11, R4
|
||||
|
||||
MOV R10, R0
|
||||
MOV R11, R2
|
||||
ADD R0, R0, #504
|
||||
ADD R2, R2, #248
|
||||
|
||||
VNEG.S32 Q14, Q11
|
||||
VSHL.S32 Q10, Q15, Q14
|
||||
MOV R6, #64
|
||||
MOV R6, R6, LSL #1
|
||||
ADD R12, R12, R6
|
||||
MOV R7, #128
|
||||
MOV R9, R7, LSL #1
|
||||
ADD R1, R1, R9
|
||||
MOV R6, #16
|
||||
MOV R7, #128
|
||||
MOV R9, R7, LSL #1
|
||||
MOV R7, #256
|
||||
MOV R8, R7, LSL #1
|
||||
|
||||
MOV R5, R5, LSL #1
|
||||
VLD1.16 D2, [R0], R8
|
||||
VMOV Q13, Q10
|
||||
|
||||
|
||||
VMLAL.S16 Q13, D0, D1
|
||||
VLD1.16 D3, [R2], R9
|
||||
|
||||
VLD1.16 D4, [R0], R8
|
||||
VMLAL.S16 Q13, D2, D3
|
||||
|
||||
VLD1.16 D5, [R2], R9
|
||||
|
||||
VLD1.16 D6, [R0], R8
|
||||
VMLAL.S16 Q13, D5, D4
|
||||
|
||||
VLD1.16 D7, [R2], R9
|
||||
|
||||
VLD1.16 D8, [R0], R8
|
||||
VMLAL.S16 Q13, D7, D6
|
||||
|
||||
VLD1.16 D9, [R2], R9
|
||||
MOV R0, R10
|
||||
|
||||
|
||||
MOV R2, R11
|
||||
VLD1.16 D10, [R1]!
|
||||
VMLAL.S16 Q13, D9, D8
|
||||
|
||||
MOV R10, R1
|
||||
VLD1.16 D11, [R12]!
|
||||
ADD R1, R1, #504
|
||||
|
||||
|
||||
|
||||
MOV R11, R12
|
||||
VLD1.16 D12, [R1], R8
|
||||
ADD R12, R12, #248
|
||||
|
||||
VMLAL.S16 Q13, D10, D11
|
||||
VLD1.16 D13, [R12], R9
|
||||
|
||||
VLD1.16 D14, [R1], R8
|
||||
VMLAL.S16 Q13, D12, D13
|
||||
|
||||
VLD1.16 D15, [R12], R9
|
||||
|
||||
VLD1.16 D16, [R1], R8
|
||||
VMLAL.S16 Q13, D15, D14
|
||||
|
||||
VLD1.16 D17, [R12], R9
|
||||
|
||||
VLD1.16 D18, [R1], R8
|
||||
VMLAL.S16 Q13, D17, D16
|
||||
|
||||
VLD1.16 D19, [R12], R9
|
||||
|
||||
VMLAL.S16 Q13, D19, D18
|
||||
VLD1.16 D0, [R0]!
|
||||
MOV R12, R11
|
||||
|
||||
MOV R1, R10
|
||||
VLD1.16 D1, [R2]!
|
||||
MOV R10, R0
|
||||
|
||||
VQSHL.S32 Q13, Q13, Q11
|
||||
|
||||
ADD R0, R0, #504
|
||||
|
||||
MOV R11, R2
|
||||
VLD1.16 D2, [R0], R8
|
||||
ADD R2, R2, #248
|
||||
|
||||
VSHR.S32 Q14, Q13, #16
|
||||
VLD1.16 D3, [R2], R9
|
||||
|
||||
|
||||
VUZP.16 D28, D29
|
||||
VMOV Q13, Q10
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
VLD1.16 D4, [R0], R8
|
||||
VLD1.16 D5, [R2], R9
|
||||
|
||||
VLD1.16 D6, [R0], R8
|
||||
VLD1.16 D7, [R2], R9
|
||||
|
||||
VLD1.16 D8, [R0], R8
|
||||
VLD1.16 D9, [R2], R9
|
||||
MOV R0, R10
|
||||
|
||||
|
||||
MOV R2, R11
|
||||
VLD1.16 D10, [R1]!
|
||||
|
||||
MOV R10, R1
|
||||
VLD1.16 D11, [R12]!
|
||||
ADD R1, R1, #504
|
||||
|
||||
|
||||
MOV R11, R12
|
||||
VLD1.16 D12, [R1], R8
|
||||
ADD R12, R12, #248
|
||||
|
||||
|
||||
VLD1.16 D13, [R12], R9
|
||||
|
||||
VLD1.16 D14, [R1], R8
|
||||
VLD1.16 D15, [R12], R9
|
||||
|
||||
VLD1.16 D16, [R1], R8
|
||||
VLD1.16 D17, [R12], R9
|
||||
|
||||
VLD1.16 D18, [R1], R8
|
||||
SUB R6, R6, #2
|
||||
VLD1.16 D19, [R12], R9
|
||||
MOV R1, R10
|
||||
|
||||
MOV R12, R11
|
||||
|
||||
LOOP_1:
|
||||
|
||||
VMLAL.S16 Q13, D0, D1
|
||||
VST1.16 D28[0], [R3], R5
|
||||
|
||||
VMLAL.S16 Q13, D2, D3
|
||||
VLD1.16 D0, [R0]!
|
||||
VMLAL.S16 Q13, D5, D4
|
||||
|
||||
VMLAL.S16 Q13, D7, D6
|
||||
VST1.16 D28[1], [R3], R5
|
||||
|
||||
|
||||
MOV R10, R0
|
||||
VLD1.16 D1, [R2]!
|
||||
ADD R0, R0, #504
|
||||
|
||||
VMLAL.S16 Q13, D9, D8
|
||||
VST1.16 D28[2], [R3], R5
|
||||
|
||||
VMLAL.S16 Q13, D10, D11
|
||||
VST1.16 D28[3], [R3], R5
|
||||
|
||||
MOV R11, R2
|
||||
VLD1.16 D2, [R0], R8
|
||||
ADD R2, R2, #248
|
||||
|
||||
VMLAL.S16 Q13, D12, D13
|
||||
VLD1.16 D3, [R2], R9
|
||||
VMLAL.S16 Q13, D15, D14
|
||||
|
||||
VMLAL.S16 Q13, D17, D16
|
||||
VLD1.16 D4, [R0], R8
|
||||
VMLAL.S16 Q13, D19, D18
|
||||
|
||||
VLD1.16 D5, [R2], R9
|
||||
|
||||
VLD1.16 D6, [R0], R8
|
||||
VQSHL.S32 Q13, Q13, Q11
|
||||
|
||||
VSHR.S32 Q14, Q13, #16
|
||||
VLD1.16 D7, [R2], R9
|
||||
VMOV Q13, Q10
|
||||
|
||||
|
||||
VUZP.16 D28, D29
|
||||
VMLAL.S16 Q13, D0, D1
|
||||
|
||||
VMLAL.S16 Q13, D2, D3
|
||||
VLD1.16 D8, [R0], R8
|
||||
VMLAL.S16 Q13, D5, D4
|
||||
|
||||
VMLAL.S16 Q13, D7, D6
|
||||
VLD1.16 D9, [R2], R9
|
||||
|
||||
|
||||
VLD1.16 D10, [R1]!
|
||||
VMLAL.S16 Q13, D9, D8
|
||||
|
||||
MOV R2, R11
|
||||
VLD1.16 D11, [R12]!
|
||||
MOV R0, R10
|
||||
|
||||
MOV R10, R1
|
||||
|
||||
ADD R1, R1, #504
|
||||
|
||||
MOV R11, R12
|
||||
VLD1.16 D12, [R1], R8
|
||||
ADD R12, R12, #248
|
||||
|
||||
VLD1.16 D13, [R12], R9
|
||||
VMLAL.S16 Q13, D10, D11
|
||||
|
||||
VLD1.16 D14, [R1], R8
|
||||
VMLAL.S16 Q13, D12, D13
|
||||
|
||||
VLD1.16 D15, [R12], R9
|
||||
|
||||
VLD1.16 D16, [R1], R8
|
||||
VMLAL.S16 Q13, D15, D14
|
||||
|
||||
VLD1.16 D17, [R12], R9
|
||||
|
||||
VLD1.16 D18, [R1], R8
|
||||
VMLAL.S16 Q13, D17, D16
|
||||
|
||||
VLD1.16 D19, [R12], R9
|
||||
MOV R1, R10
|
||||
|
||||
VMLAL.S16 Q13, D19, D18
|
||||
VST1.16 D28[0], [R3], R5
|
||||
|
||||
MOV R12, R11
|
||||
VLD1.16 D0, [R0]!
|
||||
|
||||
VLD1.16 D1, [R2]!
|
||||
VQSHL.S32 Q13, Q13, Q11
|
||||
|
||||
|
||||
VST1.16 D28[1], [R3], R5
|
||||
MOV R10, R0
|
||||
|
||||
VST1.16 D28[2], [R3], R5
|
||||
ADD R0, R0, #504
|
||||
|
||||
VST1.16 D28[3], [R3], R5
|
||||
MOV R11, R2
|
||||
|
||||
VSHR.S32 Q14, Q13, #16
|
||||
VLD1.16 D2, [R0], R8
|
||||
ADD R2, R2, #248
|
||||
|
||||
VLD1.16 D3, [R2], R9
|
||||
VLD1.16 D4, [R0], R8
|
||||
VLD1.16 D5, [R2], R9
|
||||
VLD1.16 D6, [R0], R8
|
||||
VLD1.16 D7, [R2], R9
|
||||
VLD1.16 D8, [R0], R8
|
||||
VLD1.16 D9, [R2], R9
|
||||
|
||||
VUZP.16 D28, D29
|
||||
VMOV Q13, Q10
|
||||
|
||||
|
||||
|
||||
|
||||
MOV R0, R10
|
||||
VLD1.16 D10, [R1]!
|
||||
MOV R2, R11
|
||||
|
||||
MOV R10, R1
|
||||
VLD1.16 D11, [R12]!
|
||||
ADD R1, R1, #504
|
||||
|
||||
|
||||
MOV R11, R12
|
||||
VLD1.16 D12, [R1], R8
|
||||
ADD R12, R12, #248
|
||||
|
||||
|
||||
VLD1.16 D13, [R12], R9
|
||||
|
||||
VLD1.16 D14, [R1], R8
|
||||
VLD1.16 D15, [R12], R9
|
||||
|
||||
VLD1.16 D16, [R1], R8
|
||||
VLD1.16 D17, [R12], R9
|
||||
|
||||
SUBS R6, R6, #2
|
||||
VLD1.16 D18, [R1], R8
|
||||
|
||||
MOV R1, R10
|
||||
VLD1.16 D19, [R12], R9
|
||||
|
||||
MOV R12, R11
|
||||
|
||||
|
||||
BGT LOOP_1
|
||||
|
||||
VMLAL.S16 Q13, D0, D1
|
||||
VST1.16 D28[0], [R3], R5
|
||||
VMLAL.S16 Q13, D2, D3
|
||||
|
||||
VMLAL.S16 Q13, D5, D4
|
||||
VST1.16 D28[1], [R3], R5
|
||||
VMLAL.S16 Q13, D7, D6
|
||||
|
||||
VMLAL.S16 Q13, D9, D8
|
||||
VST1.16 D28[2], [R3], R5
|
||||
VMLAL.S16 Q13, D10, D11
|
||||
|
||||
VMLAL.S16 Q13, D12, D13
|
||||
VST1.16 D28[3], [R3], R5
|
||||
VMLAL.S16 Q13, D15, D14
|
||||
|
||||
|
||||
|
||||
VMLAL.S16 Q13, D17, D16
|
||||
|
||||
VMLAL.S16 Q13, D19, D18
|
||||
|
||||
VQSHL.S32 Q13, Q13, Q11
|
||||
|
||||
VSHR.S32 Q14, Q13, #16
|
||||
|
||||
VUZP.16 D28, D29
|
||||
|
||||
|
||||
VST1.16 D28[0], [R3], R5
|
||||
VST1.16 D28[1], [R3], R5
|
||||
VST1.16 D28[2], [R3], R5
|
||||
VST1.16 D28[3], [R3], R5
|
||||
|
||||
VPOP {D8 - D15}
|
||||
LDMFD sp!, {R4-R12, R15}
|
||||
|
||||
105
decoder/armv7/ixheaacd_shiftrountine.s
Normal file
105
decoder/armv7/ixheaacd_shiftrountine.s
Normal file
|
|
@ -0,0 +1,105 @@
|
|||
@/******************************************************************************
|
||||
@ *
|
||||
@ * Copyright (C) 2018 The Android Open Source Project
|
||||
@ *
|
||||
@ * Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ * you may not use this file except in compliance with the License.
|
||||
@ * You may obtain a copy of the License at:
|
||||
@ *
|
||||
@ * http://www.apache.org/licenses/LICENSE-2.0
|
||||
@ *
|
||||
@ * Unless required by applicable law or agreed to in writing, software
|
||||
@ * distributed under the License is distributed on an "AS IS" BASIS,
|
||||
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
@ * See the License for the specific language governing permissions and
|
||||
@ * limitations under the License.
|
||||
@ *
|
||||
@ *****************************************************************************
|
||||
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
@*/
|
||||
|
||||
|
||||
.code 32
|
||||
.eabi_attribute 24, 1 @Tag_ABI_align_needed
|
||||
.eabi_attribute 25, 1 @Tag_ABI_align_preserved
|
||||
.text
|
||||
.p2align 2
|
||||
.global ixheaacd_shiftrountine
|
||||
ixheaacd_shiftrountine:
|
||||
CMP r3, #0
|
||||
STMFD sp!, {r4-r7, r12}
|
||||
MOV r12, #0x1f
|
||||
BGE SROUTINE_L1
|
||||
RSB r3, r3, #0
|
||||
CMP r3, r12
|
||||
MOVGT r3, r12
|
||||
SUBS r2, r2, #2
|
||||
@ LDMMIFD sp!, {r4-r7, r12}
|
||||
LDMFDMI sp!, {r4-r7, r12}
|
||||
BXMI lr
|
||||
SROUTINE_L2:
|
||||
LDR r12, [r0, #0]
|
||||
LDR r4, [r1, #0]
|
||||
MOV r12, r12, ASR r3
|
||||
MOV r4, r4, ASR r3
|
||||
STR r12, [r0], #4
|
||||
STR r4, [r1], #4
|
||||
|
||||
LDR r12, [r0, #0]
|
||||
LDR r4, [r1, #0]
|
||||
MOV r12, r12, ASR r3
|
||||
MOV r4, r4, ASR r3
|
||||
SUBS r2, r2, #2
|
||||
STR r12, [r0], #4
|
||||
STR r4, [r1], #4
|
||||
|
||||
BPL SROUTINE_L2
|
||||
LDMFD sp!, {r4-r7, r12}
|
||||
BX lr
|
||||
SROUTINE_L1:
|
||||
SUBS r4, r2, #2
|
||||
RSB r2, r3, #0x1f
|
||||
@ LDMMIFD sp!, {r4-r7, r12}
|
||||
LDMFDMI sp!, {r4-r7, r12}
|
||||
BXMI lr
|
||||
SROUTINE_L3:
|
||||
LDR r12, [r0, #0]
|
||||
LDR r5, [r1, #0]
|
||||
|
||||
MOVS r7, r12, ASR r2
|
||||
CMNLT r7, #1
|
||||
MOVLT r6, #0x80000000
|
||||
MVNGT r6, #0x80000000
|
||||
MOVEQ r6, r12, LSL r3
|
||||
|
||||
MOVS r7, r5, ASR r2
|
||||
CMNLT r7, #1
|
||||
MOVLT r12, #0x80000000
|
||||
MVNGT r12, #0x80000000
|
||||
MOVEQ r12, r5, LSL r3
|
||||
STR r6, [r0], #4
|
||||
STR r12, [r1], #4
|
||||
|
||||
LDR r12, [r0, #0]
|
||||
LDR r5, [r1, #0]
|
||||
|
||||
MOVS r7, r12, ASR r2
|
||||
CMNLT r7, #1
|
||||
MOVLT r6, #0x80000000
|
||||
MVNGT r6, #0x80000000
|
||||
MOVEQ r6, r12, LSL r3
|
||||
|
||||
MOVS r7, r5, ASR r2
|
||||
CMNLT r7, #1
|
||||
MOVLT r12, #0x80000000
|
||||
MVNGT r12, #0x80000000
|
||||
MOVEQ r12, r5, LSL r3
|
||||
SUBS r4, r4, #2
|
||||
STR r6, [r0], #4
|
||||
STR r12, [r1], #4
|
||||
|
||||
BPL SROUTINE_L3
|
||||
LDMFD sp!, {r4-r7, r12}
|
||||
BX lr
|
||||
|
||||
|
||||
92
decoder/armv7/ixheaacd_shiftrountine_with_rnd_eld.s
Normal file
92
decoder/armv7/ixheaacd_shiftrountine_with_rnd_eld.s
Normal file
|
|
@ -0,0 +1,92 @@
|
|||
.text
|
||||
.p2align 2
|
||||
.global ixheaacd_shiftrountine_with_rnd_eld
|
||||
|
||||
ixheaacd_shiftrountine_with_rnd_eld:
|
||||
STMFD sp!, {r4-r12, r14}
|
||||
MOV r4, #0x1f
|
||||
ADD r12, r2, r3, LSL #1
|
||||
MOV r9, #0x8000
|
||||
SUBS r3, r3, #1
|
||||
BMI S_WITH_R_L6
|
||||
|
||||
S_WITH_R_L5:
|
||||
LDR r5, [r1, r3, LSL #2] @i2 = qmfImag[j]
|
||||
LDR r7, [r0, r3, LSL #2] @r2 = qmfReal[j]
|
||||
LDR r14, [r0], #4 @r1 = *qmfReal
|
||||
LDR r10, [r1], #4 @i1 = *qmfImag
|
||||
|
||||
ADD r6, r5, r7 @*qmfImag++ = add32(i2, r2)
|
||||
MVN r6, r6 @negate32(add32(i2, r2))
|
||||
ADD r6, r6 , #1
|
||||
|
||||
@SUB r5,r5,r7 @qmfReal[j] = sub32(i2, r2)
|
||||
SUB r5, r7, r5 @qmfReal[j] = sub32(r2, i2)
|
||||
|
||||
ADD r7, r10, r14 @qmfImag[j] = add32(i1, r1)
|
||||
MVN r7, r7 @negate32(add32(i1, r1))
|
||||
ADD r7, r7 , #1
|
||||
|
||||
@SUB r4,r10,r14 @*qmfReal++ = sub32(i1, r1)
|
||||
SUB r4, r14, r10 @*qmfReal++ = sub32(r1, i1)
|
||||
|
||||
@STR r7,[r1,r3,LSL #2]
|
||||
@STR r5,[r0,r3,LSL #2]
|
||||
@STR r6,[r1],#4
|
||||
@STR r4,[r0],#4
|
||||
|
||||
|
||||
|
||||
@LDRD r4,[r0],#8 @DEBUG
|
||||
|
||||
@LDRD r6,[r1],#8
|
||||
MOVS r10, r4, ASR #0x16 @Right shift by 22 to check the overflow ( is not AAC_ELD right shifted by 21)
|
||||
CMNLT r10, #1 @Check r4 is overflow or not
|
||||
|
||||
MOVLT r4, #0x80000000 @saturate value if r4 is overflowed
|
||||
MVNGT r4, #0x80000000
|
||||
MOVEQ r4, r4, LSL #9 @shift by 9(hardcoded value) if not AAC_ELD left shifted by 10
|
||||
|
||||
MOVS r10, r5, ASR #0x16
|
||||
QADD r4, r4, r9
|
||||
CMNLT r10, #1
|
||||
MOV r4, r4, ASR #16
|
||||
MOVLT r5, #0x80000000
|
||||
MVNGT r5, #0x80000000
|
||||
MOVEQ r5, r5, LSL #9
|
||||
MOV r14, r3, lsl #1
|
||||
|
||||
|
||||
MOVS r10, r6, ASR #0x16
|
||||
QADD r5, r5, r9
|
||||
CMNLT r10, #1
|
||||
MOV r5, r5, ASR #16
|
||||
MOVLT r6, #0x80000000
|
||||
@STRH r5,[r2],#2
|
||||
STRH r5, [r2, r14]
|
||||
MVNGT r6, #0x80000000
|
||||
MOVEQ r6, r6, LSL #9
|
||||
|
||||
MOVS r10, r7, ASR #0x16
|
||||
QADD r6, r6, r9
|
||||
CMNLT r10, #1
|
||||
MOV r6, r6, ASR #16
|
||||
MOVLT r7, #0x80000000
|
||||
MVNGT r7, #0x80000000
|
||||
MOVEQ r7, r7, LSL #9
|
||||
|
||||
QADD r7, r7, r9
|
||||
STRH r4, [r2], #2
|
||||
|
||||
MOV r7, r7, ASR #16
|
||||
|
||||
@STRH r7,[r12],#2
|
||||
STRH r7, [r12, r14]
|
||||
SUBS r3, r3, #2
|
||||
STRH r6, [r12], #2
|
||||
BGE S_WITH_R_L5
|
||||
S_WITH_R_L6:
|
||||
LDMFD sp!, {r4-r12, r15}
|
||||
|
||||
|
||||
|
||||
111
decoder/armv7/ixheaacd_shiftrountine_with_round.s
Normal file
111
decoder/armv7/ixheaacd_shiftrountine_with_round.s
Normal file
|
|
@ -0,0 +1,111 @@
|
|||
@/******************************************************************************
|
||||
@ *
|
||||
@ * Copyright (C) 2018 The Android Open Source Project
|
||||
@ *
|
||||
@ * Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ * you may not use this file except in compliance with the License.
|
||||
@ * You may obtain a copy of the License at:
|
||||
@ *
|
||||
@ * http://www.apache.org/licenses/LICENSE-2.0
|
||||
@ *
|
||||
@ * Unless required by applicable law or agreed to in writing, software
|
||||
@ * distributed under the License is distributed on an "AS IS" BASIS,
|
||||
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
@ * See the License for the specific language governing permissions and
|
||||
@ * limitations under the License.
|
||||
@ *
|
||||
@ *****************************************************************************
|
||||
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
@*/
|
||||
|
||||
|
||||
.code 32
|
||||
.eabi_attribute 24, 1 @Tag_ABI_align_needed
|
||||
.eabi_attribute 25, 1 @Tag_ABI_align_preserved
|
||||
.text
|
||||
.p2align 2
|
||||
.global ixheaacd_shiftrountine_with_rnd
|
||||
ixheaacd_shiftrountine_with_rnd:
|
||||
STMFD sp!, {r4-r12, r14}
|
||||
MOV r4, #0x1f
|
||||
ADD r12, r2, r3, LSL #1
|
||||
MOV r9, #0x8000
|
||||
SUBS r3, r3, #1
|
||||
BMI S_WITH_R_L6
|
||||
|
||||
S_WITH_R_L5:
|
||||
LDR r5, [r1, r3, LSL #2]
|
||||
LDR r7, [r0, r3, LSL #2]
|
||||
LDR r14, [r0], #4
|
||||
LDR r10, [r1], #4
|
||||
|
||||
ADD r6, r5, r7
|
||||
SUB r5, r5, r7
|
||||
ADD r7, r10, r14
|
||||
SUB r4, r10, r14
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
MOVS r10, r4, ASR #0x15
|
||||
CMNLT r10, #1
|
||||
|
||||
MOVLT r4, #0x80000000
|
||||
MVNGT r4, #0x80000000
|
||||
MOVEQ r4, r4, LSL #10
|
||||
|
||||
MOVS r10, r5, ASR #0x15
|
||||
QADD r4, r4, r9
|
||||
CMNLT r10, #1
|
||||
MOV r4, r4, ASR #16
|
||||
MOVLT r5, #0x80000000
|
||||
MVNGT r5, #0x80000000
|
||||
MOVEQ r5, r5, LSL #10
|
||||
MOV r14, r3, lsl #1
|
||||
|
||||
|
||||
MOVS r10, r6, ASR #0x15
|
||||
QADD r5, r5, r9
|
||||
CMNLT r10, #1
|
||||
MOV r5, r5, ASR #16
|
||||
MOVLT r6, #0x80000000
|
||||
|
||||
STRH r5, [r2, r14]
|
||||
MVNGT r6, #0x80000000
|
||||
MOVEQ r6, r6, LSL #10
|
||||
|
||||
MOVS r10, r7, ASR #0x15
|
||||
QADD r6, r6, r9
|
||||
CMNLT r10, #1
|
||||
MOV r6, r6, ASR #16
|
||||
MOVLT r7, #0x80000000
|
||||
MVNGT r7, #0x80000000
|
||||
MOVEQ r7, r7, LSL #10
|
||||
|
||||
QADD r7, r7, r9
|
||||
STRH r4, [r2], #2
|
||||
|
||||
MOV r7, r7, ASR #16
|
||||
|
||||
|
||||
STRH r7, [r12, r14]
|
||||
SUBS r3, r3, #2
|
||||
STRH r6, [r12], #2
|
||||
BGE S_WITH_R_L5
|
||||
S_WITH_R_L6:
|
||||
LDMFD sp!, {r4-r12, r15}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
75
decoder/armv7/ixheaacd_shiftrountine_with_round_hq.s
Normal file
75
decoder/armv7/ixheaacd_shiftrountine_with_round_hq.s
Normal file
|
|
@ -0,0 +1,75 @@
|
|||
.code 32
|
||||
.eabi_attribute 24, 1 @Tag_ABI_align_needed
|
||||
.eabi_attribute 25, 1 @Tag_ABI_align_preserved
|
||||
.text
|
||||
.p2align 2
|
||||
.global ixheaacd_shiftrountine_with_rnd_hq
|
||||
ixheaacd_shiftrountine_with_rnd_hq:
|
||||
|
||||
STMFD sp!, {r4-r12, r14}
|
||||
ADD r12, r2, r3, LSL #2
|
||||
MOV r9, #0x8000
|
||||
SUBS r3, r3, #1
|
||||
BMI S_WITH_R_L6
|
||||
|
||||
S_WITH_R_L5:
|
||||
LDR r5, [r1, r3, LSL #2]
|
||||
LDR r7, [r0, r3, LSL #2]
|
||||
LDR r14, [r0], #4
|
||||
LDR r10, [r1], #4
|
||||
|
||||
ADD r6, r5, r7
|
||||
SUB r5, r5, r7
|
||||
ADD r7, r10, r14
|
||||
SUB r4, r10, r14
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
MOVS r10, r4, ASR #0x19
|
||||
CMNLT r10, #1
|
||||
|
||||
MOVLT r4, #0x80000000
|
||||
MVNGT r4, #0x80000000
|
||||
MOVEQ r4, r4, LSL #6
|
||||
|
||||
MOVS r10, r5, ASR #0x19
|
||||
CMNLT r10, #1
|
||||
MOVLT r5, #0x80000000
|
||||
MVNGT r5, #0x80000000
|
||||
MOVEQ r5, r5, LSL #6
|
||||
MOV r14, r3, lsl #2
|
||||
|
||||
|
||||
MOVS r10, r6, ASR #0x19
|
||||
CMNLT r10, #1
|
||||
MOVLT r6, #0x80000000
|
||||
|
||||
STR r5, [r2, r14]
|
||||
MVNGT r6, #0x80000000
|
||||
MOVEQ r6, r6, LSL #6
|
||||
|
||||
MOVS r10, r7, ASR #0x19
|
||||
CMNLT r10, #1
|
||||
|
||||
MOVLT r7, #0x80000000
|
||||
MVNGT r7, #0x80000000
|
||||
MOVEQ r7, r7, LSL #6
|
||||
|
||||
STR r4, [r2], #4
|
||||
|
||||
|
||||
|
||||
STR r7, [r12, r14]
|
||||
SUBS r3, r3, #2
|
||||
STR r6, [r12], #4
|
||||
BGE S_WITH_R_L5
|
||||
S_WITH_R_L6:
|
||||
LDMFD sp!, {r4-r12, r15}
|
||||
272
decoder/armv7/ixheaacd_tns_ar_filter_fixed_32x16.s
Normal file
272
decoder/armv7/ixheaacd_tns_ar_filter_fixed_32x16.s
Normal file
|
|
@ -0,0 +1,272 @@
|
|||
@/******************************************************************************
|
||||
@ *
|
||||
@ * Copyright (C) 2018 The Android Open Source Project
|
||||
@ *
|
||||
@ * Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ * you may not use this file except in compliance with the License.
|
||||
@ * You may obtain a copy of the License at:
|
||||
@ *
|
||||
@ * http://www.apache.org/licenses/LICENSE-2.0
|
||||
@ *
|
||||
@ * Unless required by applicable law or agreed to in writing, software
|
||||
@ * distributed under the License is distributed on an "AS IS" BASIS,
|
||||
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
@ * See the License for the specific language governing permissions and
|
||||
@ * limitations under the License.
|
||||
@ *
|
||||
@ *****************************************************************************
|
||||
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
@*/
|
||||
|
||||
|
||||
.text
|
||||
.p2align 2
|
||||
.global ixheaacd_tns_ar_filter_armv7
|
||||
|
||||
ixheaacd_tns_ar_filter_armv7:
|
||||
|
||||
STMFD r13! , {r4 - r12, r14}
|
||||
SUB sp, sp, #4
|
||||
LDR r4, [sp, #44]
|
||||
LDR r6, [sp, #48]
|
||||
STR r1, [sp]
|
||||
LDR r12, [sp, #56]
|
||||
ANDS r5, r4, #3
|
||||
ADD r12, r12, #4096
|
||||
BEQ FILTER_LOOP
|
||||
|
||||
|
||||
|
||||
MOV r8, #0
|
||||
ADD r14, r3, r4, LSL #1
|
||||
RSBS r7, r5, #3
|
||||
BEQ ORDER_LOOPEND
|
||||
ORDER_LOOP:
|
||||
STRH r8, [r14, #2]!
|
||||
SUBS r7, r7, #1
|
||||
BGT ORDER_LOOP
|
||||
ORDER_LOOPEND:
|
||||
STRH r8, [r14, #2]
|
||||
BIC r4, r4, #3
|
||||
ADD r4, r4, #4
|
||||
|
||||
|
||||
FILTER_LOOP:
|
||||
LDR r1, [sp, #52]
|
||||
|
||||
|
||||
|
||||
|
||||
CMP r2, #1
|
||||
MOV r7, r4
|
||||
BNE NEG_INC
|
||||
|
||||
LDR r8 , [r0]
|
||||
SUBS r7 , r7 , #1
|
||||
MOV r8, r8, lsl r1
|
||||
MOV r9, r8, asr r1
|
||||
MOV r8 , r8 , lsl r6
|
||||
STR r8 , [r12], #-4
|
||||
STR r9, [r0], #4
|
||||
|
||||
BEQ FILTER_LOOP2
|
||||
FILTER_LOOP1:
|
||||
LDR r8 , [r0]
|
||||
SUB r5 , r4 , r7
|
||||
MOV r5 , r5 , lsl #1
|
||||
MOV r11 , #0
|
||||
ADD r14, r12, r5, lsl #1
|
||||
INNER_LOOP1:
|
||||
LDRSH r9 , [r3 , r5]
|
||||
LDR r10 , [r14], #-4
|
||||
SUBS r5 , r5 , #2
|
||||
SMLAWB r11 , r10, r9, r11
|
||||
BGT INNER_LOOP1
|
||||
|
||||
MOV r8, r8, lsl r1
|
||||
SUB r8 , r8 , r11, lsl #1
|
||||
MOV r9, r8, asr r1
|
||||
STR r9 , [r0], #4
|
||||
SUBS r7 , r7 , #1
|
||||
MOV r8 , r8 , lsl r6
|
||||
STR r8 , [r12], #-4
|
||||
BGT FILTER_LOOP1
|
||||
|
||||
FILTER_LOOP2:
|
||||
LDR r1, [sp]
|
||||
SUBS r7 , r1 , r4
|
||||
BLE EXIT
|
||||
|
||||
LDR r1, [sp, #52]
|
||||
|
||||
|
||||
|
||||
|
||||
CMP r6, #1
|
||||
BEQ SHIFT_1
|
||||
|
||||
OUTER_LOOP2:
|
||||
LDR r8 , [r0]
|
||||
MOV r5 , r4 , lsl #1
|
||||
MOV r11 , #0
|
||||
LDR r9 , [r3 , r5]
|
||||
ADD r14 , r12, r5, lsl #1
|
||||
SUB r5 , r5 , #4
|
||||
INNER_LOOP2:
|
||||
LDR r10 , [r14], #-4
|
||||
LDR r2 , [r14] , #-4
|
||||
|
||||
SMLAWB r11, r10 , r9, r11
|
||||
LDR r9 , [r3 , r5]
|
||||
SUB r5 , r5 , #4
|
||||
|
||||
SMLAWT r11, r2 , r9, r11
|
||||
LDR r10 , [r14] , #-4
|
||||
LDR r2 , [r14] , #-4
|
||||
|
||||
SMLAWB r11, r10 , r9, r11
|
||||
LDR r9 , [r3 , r5]
|
||||
SUBS r5 , r5 , #4
|
||||
|
||||
SMLAWT r11, r2 , r9, r11
|
||||
|
||||
BGT INNER_LOOP2
|
||||
|
||||
MOV r8, r8, lsl r1
|
||||
SUB r8 , r8 , r11, lsl #1
|
||||
MOV r9, r8, asr r1
|
||||
STR r9 , [r0], #4
|
||||
MOV r2 , r8 , lsl r6
|
||||
STR r2 , [r12], #-4
|
||||
SUBS r7 , r7 , #1
|
||||
BGT OUTER_LOOP2
|
||||
B EXIT
|
||||
|
||||
SHIFT_1:
|
||||
MOV r6, r3
|
||||
|
||||
OUTER_LOOP2_SHIFT_1:
|
||||
ADD r3, r6, r4 , lsl #1
|
||||
LDR r9 , [r3 ], #-4
|
||||
|
||||
LDR r8 , [r0]
|
||||
ADD r14 , r12, r4, lsl #2
|
||||
MOV r5 , r4
|
||||
MOV r11 , #0
|
||||
|
||||
INNER_LOOP2_SHIFT_1:
|
||||
LDR r10 , [r14] , #-4
|
||||
LDR r2 , [r14] , #-4
|
||||
SMLAWB r11 , r10 , r9, r11
|
||||
|
||||
LDR r9 , [r3] , #-4
|
||||
LDR r10 , [r14] , #-4
|
||||
SMLAWT r11, r2 , r9, r11
|
||||
|
||||
LDR r2 , [r14] , #-4
|
||||
SMLAWB r11, r10 , r9, r11
|
||||
LDR r9 , [r3 ], #-4
|
||||
|
||||
SUBS r5 , r5 , #4
|
||||
SMLAWT r11, r2 , r9, r11
|
||||
|
||||
BGT INNER_LOOP2_SHIFT_1
|
||||
|
||||
MOV r8, r8, lsl r1
|
||||
SUB r8 , r8 , r11, lsl #1
|
||||
MOV r9, r8, asr r1
|
||||
STR r9 , [r0], #4
|
||||
MOV r2 , r8 , lsl #1
|
||||
STR r2 , [r12], #-4
|
||||
SUBS r7 , r7 , #1
|
||||
|
||||
|
||||
BGT OUTER_LOOP2_SHIFT_1
|
||||
|
||||
|
||||
B EXIT
|
||||
|
||||
|
||||
NEG_INC:
|
||||
|
||||
LDR r8 , [r0]
|
||||
SUBS r7 , r7 , #1
|
||||
MOV r8, r8, lsl r1
|
||||
MOV r9, r8, asr r1
|
||||
MOV r8 , r8 , lsl r6
|
||||
STR r8 , [r12], #-4
|
||||
STR r9, [r0], #-4
|
||||
|
||||
BEQ FILTER_LOOP2_NEG
|
||||
FILTER_LOOP1_NEG:
|
||||
LDR r8 , [r0]
|
||||
SUB r5 , r4 , r7
|
||||
MOV r5 , r5 , lsl #1
|
||||
MOV r11 , #0
|
||||
ADD r14, r12, r5, lsl #1
|
||||
INNER_LOOP1_NEG:
|
||||
LDRSH r9 , [r3 , r5]
|
||||
LDR r10 , [r14], #-4
|
||||
SUBS r5 , r5 , #2
|
||||
SMLAWB r11 , r10, r9, r11
|
||||
BGT INNER_LOOP1_NEG
|
||||
|
||||
MOV r8, r8, lsl r1
|
||||
SUB r8 , r8 , r11, lsl #1
|
||||
MOV r9, r8, asr r1
|
||||
STR r9 , [r0], #-4
|
||||
SUBS r7 , r7 , #1
|
||||
MOV r8 , r8 , lsl r6
|
||||
STR r8 , [r12], #-4
|
||||
BGT FILTER_LOOP1_NEG
|
||||
|
||||
FILTER_LOOP2_NEG:
|
||||
LDR r1, [sp]
|
||||
SUBS r7 , r1 , r4
|
||||
BLE EXIT
|
||||
|
||||
LDR r1, [sp, #52]
|
||||
|
||||
|
||||
|
||||
|
||||
OUTER_LOOP2_NEG:
|
||||
LDR r8 , [r0]
|
||||
MOV r5 , r4 , lsl #1
|
||||
MOV r11 , #0
|
||||
LDR r9 , [r3 , r5]
|
||||
ADD r14 , r12, r5, lsl #1
|
||||
SUB r5 , r5 , #4
|
||||
INNER_LOOP2_NEG:
|
||||
LDR r10 , [r14], #-4
|
||||
LDR r2 , [r14] , #-4
|
||||
|
||||
SMLAWB r11, r10 , r9, r11
|
||||
LDR r9 , [r3 , r5]
|
||||
SUB r5 , r5 , #4
|
||||
|
||||
SMLAWT r11, r2 , r9, r11
|
||||
LDR r10 , [r14] , #-4
|
||||
LDR r2 , [r14] , #-4
|
||||
|
||||
SMLAWB r11, r10 , r9, r11
|
||||
LDR r9 , [r3 , r5]
|
||||
SUBS r5 , r5 , #4
|
||||
|
||||
SMLAWT r11, r2 , r9, r11
|
||||
|
||||
BGT INNER_LOOP2_NEG
|
||||
|
||||
MOV r8, r8, lsl r1
|
||||
SUB r8 , r8 , r11, lsl #1
|
||||
MOV r9, r8, asr r1
|
||||
STR r9 , [r0], #-4
|
||||
MOV r2 , r8 , lsl r6
|
||||
STR r2 , [r12], #-4
|
||||
SUBS r7 , r7 , #1
|
||||
BGT OUTER_LOOP2_NEG
|
||||
|
||||
EXIT:
|
||||
ADD sp, sp , #4
|
||||
LDMFD r13!, {r4 - r12, r15}
|
||||
|
||||
122
decoder/armv7/ixheaacd_tns_parcor2lpc_32x16.s
Normal file
122
decoder/armv7/ixheaacd_tns_parcor2lpc_32x16.s
Normal file
|
|
@ -0,0 +1,122 @@
|
|||
@/******************************************************************************
|
||||
@ *
|
||||
@ * Copyright (C) 2018 The Android Open Source Project
|
||||
@ *
|
||||
@ * Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@ * you may not use this file except in compliance with the License.
|
||||
@ * You may obtain a copy of the License at:
|
||||
@ *
|
||||
@ * http://www.apache.org/licenses/LICENSE-2.0
|
||||
@ *
|
||||
@ * Unless required by applicable law or agreed to in writing, software
|
||||
@ * distributed under the License is distributed on an "AS IS" BASIS,
|
||||
@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
@ * See the License for the specific language governing permissions and
|
||||
@ * limitations under the License.
|
||||
@ *
|
||||
@ *****************************************************************************
|
||||
@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
@*/
|
||||
|
||||
|
||||
.text
|
||||
.p2align 2
|
||||
.global ixheaacd_tns_parcor_lpc_convert_armv7
|
||||
ixheaacd_tns_parcor_lpc_convert_armv7:
|
||||
STMFD SP!, {R2, R4-R12, R14}
|
||||
SUB SP, SP, #128
|
||||
MOV R4, SP
|
||||
MOV R8, #0
|
||||
MOV R5, #0x8000
|
||||
|
||||
OUTLOOP:
|
||||
MOV R6, #0
|
||||
MOV R7, #16
|
||||
LOOP1:
|
||||
STR R6, [R4], #4
|
||||
STR R6, [R4, #60]
|
||||
SUBS R7, R7, #1
|
||||
BGT LOOP1
|
||||
|
||||
SUB R4, R4, #64
|
||||
MOV R9, #0x7FFFFFFF
|
||||
MOV R10, R9, ASR R8
|
||||
|
||||
|
||||
MOV R7, R3
|
||||
LOOP2:
|
||||
MOV R11, R10
|
||||
LDRSH R2, [R4], #2
|
||||
LDRSH R14, [R0], #2
|
||||
MOV R12, R3
|
||||
|
||||
LOOP2_1:
|
||||
SMULBB R2, R2, R14
|
||||
QADD R14, R10, R5
|
||||
CMP R2, #0x40000000
|
||||
MOV R14, R14, ASR #16
|
||||
MOVNE R2, R2, LSL #1
|
||||
MOVEQ R2, #0x7FFFFFFF
|
||||
QADD R10, R10, R2
|
||||
STRH R14, [R4, #62]
|
||||
MOVS R2, R10
|
||||
RSBSMI R2, R2, #0
|
||||
MOVMI R2, #0x7FFFFFFF
|
||||
CMP R2, #0x7FFFFFFF
|
||||
MOVEQ R6, #1
|
||||
SUBS R12, R12, #1
|
||||
@ LDRGTSH R2, [R4], #2
|
||||
@ LDRGTSH R14, [R0], #2
|
||||
LDRSHGT R2, [R4], #2
|
||||
LDRSHGT R14, [R0], #2
|
||||
BGT LOOP2_1
|
||||
|
||||
LDRSH R2, [R4, #62]
|
||||
MOV R12, R3
|
||||
LOOP2_2:
|
||||
LDRSH R14, [R0, #-2]!
|
||||
LDRSH R9, [R4, #-2]!
|
||||
SMULBB R2, R2, R14
|
||||
MOV R9, R9, LSL #16
|
||||
CMP R2, #0x40000000
|
||||
MOVNE R2, R2, LSL #1
|
||||
MOVEQ R2, #0x7FFFFFFF
|
||||
QADD R9, R9, R2
|
||||
LDRSH R2, [R4, #62]
|
||||
QADD R14, R9, R5
|
||||
MOVS R9, R9
|
||||
MOV R14, R14, ASR #16
|
||||
STRH R14, [R4, #2]
|
||||
@ RSBMIS R9, R9, #0
|
||||
RSBSMI R9, R9, #0
|
||||
MOVMI R9, #0x7FFFFFFF
|
||||
CMP R9, #0x7FFFFFFF
|
||||
MOVEQ R6, #1
|
||||
SUBS R12, R12, #1
|
||||
BGT LOOP2_2
|
||||
|
||||
QADD R11, R11, R5
|
||||
QADD R2, R10, R5
|
||||
MOV R11, R11, ASR #16
|
||||
MOV R2, R2, ASR #16
|
||||
STRH R11, [R4]
|
||||
STRH R2, [R1], #2
|
||||
MOV R10, #0
|
||||
|
||||
SUBS R7, R7, #1
|
||||
BGE LOOP2
|
||||
|
||||
SUB R1, R1, R3, LSL #1
|
||||
SUB R1, R1, #2
|
||||
SUBS R10, R6, #1
|
||||
ADDEQ R8, R8, #1
|
||||
BEQ OUTLOOP
|
||||
|
||||
LDR R2, [SP, #128]
|
||||
ADD SP, SP, #132
|
||||
STRH R8, [R2]
|
||||
LDMFD sp!, {r4-r12, r15}
|
||||
|
||||
|
||||
|
||||
|
||||
166
decoder/armv8/ixheaacd_apply_scale_factors.s
Normal file
166
decoder/armv8/ixheaacd_apply_scale_factors.s
Normal file
|
|
@ -0,0 +1,166 @@
|
|||
//.include "ihevc_neon_macros.s"
|
||||
.macro push_v_regs
|
||||
stp X8, X9, [sp, #-16]!
|
||||
stp X10, X11, [sp, #-16]!
|
||||
stp X12, X13, [sp, #-16]!
|
||||
stp X14, X15, [sp, #-16]!
|
||||
stp X16, X17, [sp, #-16]!
|
||||
stp x19, x20, [sp, #-16]!
|
||||
stp x21, x22, [sp, #-16]!
|
||||
stp X29, X30, [sp, #-16]!
|
||||
.endm
|
||||
.macro pop_v_regs
|
||||
ldp X29, X30, [sp], #16
|
||||
ldp x21, x22, [sp], #16
|
||||
ldp x19, x20, [sp], #16
|
||||
ldp X16, X17, [sp], #16
|
||||
ldp X14, X15, [sp], #16
|
||||
ldp X12, X13, [sp], #16
|
||||
ldp X10, X11, [sp], #16
|
||||
ldp X8, X9, [sp], #16
|
||||
.endm
|
||||
|
||||
.text
|
||||
.p2align 2
|
||||
.global ixheaacd_scale_factor_process_armv8
|
||||
|
||||
ixheaacd_scale_factor_process_armv8:
|
||||
|
||||
push_v_regs
|
||||
|
||||
MOV x9, x4
|
||||
|
||||
MOV x21, x6
|
||||
MOV x22, x7
|
||||
CMP x2, #0 // Tbands
|
||||
|
||||
BGT lbl17
|
||||
|
||||
pop_v_regs
|
||||
ret
|
||||
lbl17:
|
||||
MOV x10, #0
|
||||
CMP x5, #2
|
||||
BGT ADD_34
|
||||
MOV x11, #0x25
|
||||
B TBANDS_LOOP
|
||||
ADD_34:
|
||||
MOV x11, #0x22
|
||||
// MOV x11, #0x25 // temp=37
|
||||
|
||||
TBANDS_LOOP:
|
||||
LDRSH x5, [x1], #2 // scale_factor = *Scfactor++;
|
||||
LDRB w4, [x3], #1 //Offset [1]
|
||||
sxtw x4, w4
|
||||
|
||||
|
||||
CMP x5, #0x18 //if(scale_factor < 24)
|
||||
BGE SCALE_FACTOR_GE_12 //
|
||||
|
||||
CMP x4, #0
|
||||
BLE OFFSET_ZERO
|
||||
|
||||
SCALE_FACTOR_LT_12:
|
||||
|
||||
STR x10, [x0], #8
|
||||
STR x10, [x0], #8
|
||||
SUBS x4, x4, #4
|
||||
BGT SCALE_FACTOR_LT_12
|
||||
B OFFSET_ZERO
|
||||
|
||||
SCALE_FACTOR_GE_12:
|
||||
|
||||
SUBS x6, x11, x5, ASR #2 // 37-(scale_factor >> 2)
|
||||
AND x5, x5, #3 // scale_factor & 0x0003
|
||||
|
||||
//ADD x5,x9,x5,LSL #1 ; scale_table_ptr[(scale_factor & 0x0003)];
|
||||
LDR w5, [x9, x5, LSL #2] // scale_short = scale_table_ptr[(scale_factor & 0x0003)];
|
||||
sxtw x5, w5
|
||||
AND w17, w5, #0x0000FFFF
|
||||
sxth w17, w17 //16-bit value stored as 32-bit,so SMULWB can still be used
|
||||
BLE SHIFT_LE_ZERO // if shift less than or equal to zero
|
||||
|
||||
SUB x14, x6, #1 //dont do that extra LSL #1 in SMULWB
|
||||
|
||||
SHIFT_POSITIVE: //loop over sfbWidth a multiple of 4
|
||||
LDP w6, w7 , [x0, #0] // temp1 = *x_invquant
|
||||
LDP w19, w20, [x0, #8]
|
||||
|
||||
//SMULWB x6, x6, x5 // buffex1 = mult32x16in32(temp1, scale_short);
|
||||
SMULL x6, w6, w17
|
||||
SMULL x7, w7, w17
|
||||
SMULL x19, w19, w17
|
||||
SMULL x20, w20, w17
|
||||
|
||||
ASR x6, x6, #16
|
||||
ASR x7, x7 , #16
|
||||
ASR x19, x19 , #16
|
||||
ASR x20, x20 , #16
|
||||
|
||||
ASR x6, x6, x14 // buffex1 = shx32(buffex1, shift);
|
||||
ASR x7, x7, x14
|
||||
ASR x19, x19, x14
|
||||
ASR x20, x20, x14
|
||||
|
||||
stp w6, w7, [x0], #8
|
||||
stp w19, w20, [x0], #8
|
||||
|
||||
SUBS x4, x4, #4
|
||||
|
||||
BGT SHIFT_POSITIVE
|
||||
B OFFSET_ZERO
|
||||
SHIFT_LE_ZERO:
|
||||
|
||||
//RSBS x14, x6, #0 //-shift
|
||||
NEGS x14, x6
|
||||
BGT SHIFT_NEGTIVE1
|
||||
|
||||
SHIFT_ZERO: //loop over sfbWidth a multiple of 4
|
||||
LDP w6, w7, [x0, #0] // temp1 = *x_invquant;
|
||||
|
||||
//SMULWB x6, x6, x5 // buffex1 = mult32x16in32(temp1, scale_short);
|
||||
SMULL x6, w6, w17
|
||||
SMULL x7, w7, w17
|
||||
|
||||
ASR x6, x6, #16
|
||||
ASR x7, x7, #16
|
||||
|
||||
LSL x6, x6, #1
|
||||
LSL x7, x7, #1
|
||||
|
||||
STP w6, w7, [x0], #8 // *x_invquant++ = buffex1;
|
||||
|
||||
SUBS x4, x4, #2
|
||||
|
||||
BGT SHIFT_ZERO
|
||||
B OFFSET_ZERO
|
||||
|
||||
SHIFT_NEGTIVE1:
|
||||
SUB x14, x14, #1
|
||||
SHIFT_NEGTIVE: //;loop over sfbWidth a multiple of 4
|
||||
|
||||
LDP w6, w7, [x0, #0]
|
||||
LSL w6, w6, w14 // buffex1 = shl32(buffex1, shift-1);
|
||||
LSL w7, w7, w14 // buffex1 = shl32(buffex1, shift-1);
|
||||
|
||||
//SMULWB x6, x6, x5 // buffex1 = mult32x16in32(temp1, scale_short);
|
||||
SMULL x6, w6, w17
|
||||
SMULL x7, w7, w17
|
||||
ASR x6, x6, #16
|
||||
ASR x7, x7, #16
|
||||
|
||||
LSL x6, x6, #2 // shl for fixmul_32x16b and shl32(buffer,1)
|
||||
LSL x7, x7, #2 // shl for fixmul_32x16b and shl32(buffer,1)
|
||||
|
||||
STP w6, w7, [x0], #8 // *x_invquant++ = buffex1;
|
||||
|
||||
SUBS x4, x4, #2
|
||||
|
||||
BGT SHIFT_NEGTIVE
|
||||
|
||||
OFFSET_ZERO:
|
||||
SUBS x2, x2, #1
|
||||
BGT TBANDS_LOOP
|
||||
|
||||
pop_v_regs
|
||||
ret
|
||||
99
decoder/armv8/ixheaacd_basic_op.h
Normal file
99
decoder/armv8/ixheaacd_basic_op.h
Normal file
|
|
@ -0,0 +1,99 @@
|
|||
/******************************************************************************
|
||||
* *
|
||||
* Copyright (C) 2018 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
#ifndef IXHEAACD_BASIC_OP_H
|
||||
#define IXHEAACD_BASIC_OP_H
|
||||
|
||||
#define add_d(a, b) ((a) + (b))
|
||||
#define sub_d(a, b) ((a) - (b))
|
||||
#define ixheaacd_cbrt_calc(a) cbrt(1.0f / a)
|
||||
|
||||
/*
|
||||
static PLATFORM_INLINE WORD32 mult32x16in32_dual(WORD32 a , WORD16 b)
|
||||
{
|
||||
WORD32 result;
|
||||
WORD32 msb;
|
||||
UWORD32 lsb;
|
||||
|
||||
lsb=a&0x0000FFFF ;
|
||||
msb=(a>>16);
|
||||
|
||||
result=(UWORD32)(lsb*(UWORD32)b);
|
||||
result= msb*(WORD32)b+(result>>16);
|
||||
return(result);
|
||||
}
|
||||
*/
|
||||
|
||||
static PLATFORM_INLINE WORD32 msu32x16in32_dual(WORD32 a, WORD16 c1, WORD32 b,
|
||||
WORD16 c2) {
|
||||
WORD32 result;
|
||||
WORD32 temp_result;
|
||||
UWORD32 a_lsb;
|
||||
WORD32 a_msb;
|
||||
UWORD32 b_lsb;
|
||||
WORD32 b_msb;
|
||||
|
||||
a_lsb = a & 65535;
|
||||
a_msb = a >> 16;
|
||||
|
||||
b_lsb = b & 65535;
|
||||
b_msb = b >> 16;
|
||||
temp_result = ((UWORD32)a_lsb * (UWORD32)c1);
|
||||
temp_result = temp_result - (WORD32)((UWORD32)b_lsb * (UWORD32)c2);
|
||||
temp_result = ((WORD32)temp_result) >> 16;
|
||||
result = temp_result + ((a_msb * (WORD32)c1) - (b_msb * (WORD32)c2));
|
||||
|
||||
return (result);
|
||||
}
|
||||
|
||||
static PLATFORM_INLINE WORD32 mac32x16in32_dual(WORD32 a, WORD16 c1, WORD32 b,
|
||||
WORD16 c2) {
|
||||
WORD32 result;
|
||||
WORD32 temp_result;
|
||||
UWORD32 a_lsb;
|
||||
WORD32 a_msb;
|
||||
UWORD32 b_lsb;
|
||||
WORD32 b_msb;
|
||||
|
||||
a_lsb = a & 65535;
|
||||
a_msb = a >> 16;
|
||||
|
||||
b_lsb = b & 65535;
|
||||
b_msb = b >> 16;
|
||||
temp_result = (UWORD32)a_lsb * (UWORD32)c1;
|
||||
temp_result = temp_result + (UWORD32)b_lsb * (UWORD32)c2;
|
||||
temp_result = ((UWORD32)temp_result) >> 16;
|
||||
result = temp_result + ((a_msb * (WORD32)c1)) + ((b_msb * (WORD32)c2));
|
||||
return (result);
|
||||
}
|
||||
|
||||
/*
|
||||
static PLATFORM_INLINE WORD64 mac32x32in64_dual(WORD32 a, WORD32 b,WORD64 c,
|
||||
WORD32 d)
|
||||
{
|
||||
WORD64 result;
|
||||
WORD64 temp_result;
|
||||
|
||||
temp_result = (WORD64)a * (WORD64)b;
|
||||
result = c + (temp_result);
|
||||
return (result);
|
||||
}
|
||||
*/
|
||||
|
||||
#endif
|
||||
397
decoder/armv8/ixheaacd_basic_ops16.h
Normal file
397
decoder/armv8/ixheaacd_basic_ops16.h
Normal file
|
|
@ -0,0 +1,397 @@
|
|||
/******************************************************************************
|
||||
* *
|
||||
* Copyright (C) 2018 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
#ifndef IXHEAACD_BASIC_OPS16_H
|
||||
#define IXHEAACD_BASIC_OPS16_H
|
||||
|
||||
// limits the 32 bit input to the range of a 16 bit word
|
||||
static PLATFORM_INLINE WORD16 ixheaacd_sat16(WORD32 op1) {
|
||||
WORD16 var_out;
|
||||
|
||||
if (op1 > 0X00007fffL) {
|
||||
var_out = MAX_16;
|
||||
} else if (op1 < (WORD32)0xffff8000L) {
|
||||
var_out = (WORD16)(-32768);
|
||||
} else {
|
||||
var_out = (WORD16)(op1);
|
||||
}
|
||||
return (var_out);
|
||||
}
|
||||
|
||||
// add 2 16 bit variables and returns 16 bit result
|
||||
static PLATFORM_INLINE WORD16 ixheaacd_add16(WORD16 op1, WORD16 op2) {
|
||||
WORD16 var_out;
|
||||
|
||||
var_out = ((WORD16)(op1 + op2));
|
||||
return (var_out);
|
||||
}
|
||||
|
||||
// add 2 16 bit variables and returns 16 bit result with saturation
|
||||
static PLATFORM_INLINE WORD16 ixheaacd_add16_sat(WORD16 op1, WORD16 op2) {
|
||||
WORD16 var_out;
|
||||
WORD32 sum;
|
||||
|
||||
sum = (WORD32)op1 + (WORD32)op2;
|
||||
var_out = ixheaacd_sat16(sum);
|
||||
return (var_out);
|
||||
}
|
||||
|
||||
// subtract 2 16 bit variables and returns 16 bit result
|
||||
static PLATFORM_INLINE WORD16 ixheaacd_sub16(WORD16 op1, WORD16 op2) {
|
||||
WORD16 var_out;
|
||||
|
||||
var_out = ((WORD16)(op1 - op2));
|
||||
return (var_out);
|
||||
}
|
||||
|
||||
// subtract 2 16 bit variables and returns 16 bit result with saturation
|
||||
static PLATFORM_INLINE WORD16 ixheaacd_sub16_sat(WORD16 op1, WORD16 op2) {
|
||||
WORD16 var_out;
|
||||
WORD32 diff;
|
||||
|
||||
diff = (WORD32)op1 - op2;
|
||||
var_out = ixheaacd_sat16(diff);
|
||||
return (var_out);
|
||||
}
|
||||
|
||||
// multiply 2 16 bit variables and return 31 to 16 bits
|
||||
static PLATFORM_INLINE WORD16 ixheaacd_mult16(WORD16 op1, WORD16 op2) {
|
||||
WORD16 var_out;
|
||||
|
||||
var_out = ((WORD16)(((WORD32)op1 * (WORD32)op2) >> 16));
|
||||
return (var_out);
|
||||
}
|
||||
|
||||
// multiply 2 16 bit variables and return 30 to 15 bits
|
||||
static PLATFORM_INLINE WORD16 ixheaacd_mult16_shl(WORD16 op1, WORD16 op2) {
|
||||
WORD16 var_out;
|
||||
|
||||
var_out = ((WORD16)(((WORD32)op1 * (WORD32)op2) >> 15));
|
||||
return (var_out);
|
||||
}
|
||||
|
||||
// multiply 2 16 bit variables and return 30 to 15 bits with saturation
|
||||
static PLATFORM_INLINE WORD16 ixheaacd_mult16_shl_sat(WORD16 op1, WORD16 op2) {
|
||||
WORD16 var_out;
|
||||
WORD32 temp;
|
||||
|
||||
temp = ((WORD32)(((WORD32)op1 * (WORD32)op2) >> 15));
|
||||
var_out = ixheaacd_sat16(temp);
|
||||
return (var_out);
|
||||
}
|
||||
|
||||
// shifts left a 16 bit variable by the shift value and returns a 16 bit result
|
||||
static PLATFORM_INLINE WORD16 ixheaacd_shl16(WORD16 op1, WORD16 shift) {
|
||||
WORD16 var_out;
|
||||
|
||||
var_out = (WORD16)(op1 << shift);
|
||||
return (var_out);
|
||||
}
|
||||
|
||||
// shifts left a 16 bit variable by the shift value and returns a 16 bit value
|
||||
// with saturation
|
||||
static PLATFORM_INLINE WORD16 ixheaacd_shl16_sat(WORD16 op1, WORD16 shift) {
|
||||
WORD16 var_out;
|
||||
WORD32 temp;
|
||||
|
||||
if (shift > 15) {
|
||||
shift = 15;
|
||||
}
|
||||
temp = (WORD32)(op1 << shift);
|
||||
var_out = ixheaacd_sat16(temp);
|
||||
return (var_out);
|
||||
}
|
||||
|
||||
// shifts right a 16 bit variable by the shift value and returns a 16 bit value
|
||||
static PLATFORM_INLINE WORD16 ixheaacd_shr16(WORD16 op1, WORD16 shift) {
|
||||
WORD16 var_out;
|
||||
|
||||
var_out = ((WORD16)(op1 >> shift));
|
||||
return (var_out);
|
||||
}
|
||||
|
||||
// shifts left a 16 bit variable by the shift value if the
|
||||
// value is positive else shifts right and returns a 16 bit result
|
||||
static PLATFORM_INLINE WORD16 shl16_dir(WORD16 op1, WORD16 shift) {
|
||||
WORD16 var_out;
|
||||
if (shift > 0) {
|
||||
var_out = ixheaacd_shl16(op1, shift);
|
||||
} else {
|
||||
var_out = ixheaacd_shr16(op1, (WORD16)(-shift));
|
||||
}
|
||||
return (var_out);
|
||||
}
|
||||
|
||||
// shifts left a 16 bit variable by the shift value if the
|
||||
// value is negative else shifts right and returns a 16 bit result
|
||||
static PLATFORM_INLINE WORD16 shr16_dir(WORD16 op1, WORD16 shift) {
|
||||
WORD16 var_out;
|
||||
|
||||
if (shift < 0) {
|
||||
var_out = ixheaacd_shl16(op1, (WORD16)(-shift));
|
||||
} else {
|
||||
var_out = ixheaacd_shr16(op1, shift);
|
||||
}
|
||||
return (var_out);
|
||||
}
|
||||
|
||||
// shifts left a 16 bit variable by the shift value
|
||||
// if the value is positive else shifts right and returns a 16 bit with
|
||||
// saturation
|
||||
static PLATFORM_INLINE WORD16 shl16_dir_sat(WORD16 op1, WORD16 shift) {
|
||||
WORD16 var_out;
|
||||
if (shift > 0) {
|
||||
var_out = ixheaacd_shl16_sat(op1, shift);
|
||||
} else {
|
||||
var_out = ixheaacd_shr16(op1, (WORD16)(-shift));
|
||||
}
|
||||
return (var_out);
|
||||
}
|
||||
|
||||
// shifts left a 16 bit variable by the shift value if the
|
||||
// value is negative else shifts right and returns a 16 bit
|
||||
// result with saturation
|
||||
static PLATFORM_INLINE WORD16 ixheaacd_shr16_dir_sat(WORD16 op1, WORD16 shift) {
|
||||
WORD16 var_out;
|
||||
|
||||
if (shift < 0) {
|
||||
var_out = ixheaacd_shl16_sat(op1, (WORD16)(-shift));
|
||||
} else {
|
||||
var_out = ixheaacd_shr16(op1, shift);
|
||||
}
|
||||
return (var_out);
|
||||
}
|
||||
|
||||
// finds a value which normalizes the input to 16 bit
|
||||
// return zero for value zero
|
||||
static PLATFORM_INLINE WORD16 norm16(WORD16 op1) {
|
||||
WORD16 var_out;
|
||||
|
||||
if (0 == op1) {
|
||||
var_out = 0;
|
||||
} else {
|
||||
if ((WORD16)0xffff == op1) {
|
||||
var_out = 15;
|
||||
} else {
|
||||
if (op1 < 0) {
|
||||
op1 = (WORD16)(~op1);
|
||||
}
|
||||
for (var_out = 0; op1 < 0x4000; var_out++) {
|
||||
op1 <<= 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return (var_out);
|
||||
}
|
||||
|
||||
// finds no. of significant bits excluding sign bit
|
||||
// value 15 returned for zero
|
||||
static PLATFORM_INLINE WORD16 bin_expo16(WORD16 op1) {
|
||||
WORD16 var_out;
|
||||
|
||||
var_out = ((WORD16)(15 - norm16(op1)));
|
||||
return (var_out);
|
||||
}
|
||||
|
||||
// returns a 16 bit absolute value of a given signed 16 bit value
|
||||
static PLATFORM_INLINE WORD16 ixheaacd_abs16(WORD16 op1) {
|
||||
WORD16 var_out;
|
||||
|
||||
if (op1 < 0) {
|
||||
var_out = (WORD16)(-op1);
|
||||
} else {
|
||||
var_out = op1;
|
||||
}
|
||||
return (var_out);
|
||||
}
|
||||
|
||||
// returns a 16 bit absolute value of a given signed 16 bit value with
|
||||
// saturation
|
||||
static PLATFORM_INLINE WORD16 ixheaacd_abs16_sat(WORD16 op1) {
|
||||
WORD16 var_out;
|
||||
|
||||
if (-32768 == op1) {
|
||||
var_out = MAX_16;
|
||||
} else {
|
||||
if (op1 < 0) {
|
||||
var_out = (WORD16)(-op1);
|
||||
} else {
|
||||
var_out = op1;
|
||||
}
|
||||
}
|
||||
return (var_out);
|
||||
}
|
||||
|
||||
// returns a 16 bit negative value of a given signed 16 bit value.
|
||||
static PLATFORM_INLINE WORD16 ixheaacd_negate16(WORD16 op1) {
|
||||
WORD16 var_out;
|
||||
|
||||
if (-32768 == op1) {
|
||||
var_out = MAX_16;
|
||||
} else {
|
||||
var_out = (WORD16)(-op1);
|
||||
}
|
||||
return (var_out);
|
||||
}
|
||||
|
||||
// returns the minima of 2 16 bit variables
|
||||
static PLATFORM_INLINE WORD16 ixheaacd_min16(WORD16 op1, WORD16 op2) {
|
||||
WORD16 var_out;
|
||||
|
||||
var_out = op1 < op2 ? op1 : op2;
|
||||
return (var_out);
|
||||
}
|
||||
|
||||
// returns the maxima of 2 16 bit variables
|
||||
static PLATFORM_INLINE WORD16 ixheaacd_max16(WORD16 op1, WORD16 op2) {
|
||||
WORD16 var_out;
|
||||
|
||||
var_out = op1 > op2 ? op1 : op2;
|
||||
return (var_out);
|
||||
}
|
||||
|
||||
/*****************************************************************************/
|
||||
/* */
|
||||
/* function name : div16 */
|
||||
/* */
|
||||
/* description : divides 2 16 bit variables and returns the quotient */
|
||||
/* the q-format of the result is modified */
|
||||
/* ( op1/op2 to 14 bits precision) */
|
||||
/* */
|
||||
/* inputs : WORD16 op1, WORD16 op2, WORD16 *q_format */
|
||||
/* */
|
||||
/* globals : none */
|
||||
/* */
|
||||
/* processing : non-restoration type algorithm(shift & substract) */
|
||||
/* */
|
||||
/* outputs : WORD16 *q_format */
|
||||
/* */
|
||||
/* returns : WORD16 var_out */
|
||||
/* */
|
||||
/* issues : none */
|
||||
/* */
|
||||
/* revision history : */
|
||||
/* */
|
||||
/* DD MM YYYY author changes */
|
||||
/* 11 11 2003 preethi modified(bug fixes) */
|
||||
/* 15 11 2004 tejaswi/vishal modified(bug fixes/cleanup) */
|
||||
/* */
|
||||
/*****************************************************************************/
|
||||
|
||||
// divides 2 16 bit variables and returns the quotient
|
||||
static PLATFORM_INLINE WORD16 div16(WORD16 op1, WORD16 op2, WORD16 *q_format) {
|
||||
WORD32 quotient;
|
||||
UWORD16 mantissa_nr, mantissa_dr;
|
||||
WORD16 sign = 0;
|
||||
|
||||
LOOPIDX i;
|
||||
WORD16 q_nr, q_dr;
|
||||
|
||||
mantissa_nr = op1;
|
||||
mantissa_dr = op2;
|
||||
quotient = 0;
|
||||
|
||||
if (op1 < 0 && op2 != 0) {
|
||||
op1 = -op1;
|
||||
sign = (WORD16)(sign ^ -1);
|
||||
}
|
||||
|
||||
if (op2 < 0) {
|
||||
op2 = -op2;
|
||||
sign = (WORD16)(sign ^ -1);
|
||||
}
|
||||
|
||||
if (op2 == 0) {
|
||||
*q_format = 0;
|
||||
return (op1);
|
||||
}
|
||||
|
||||
quotient = 0;
|
||||
|
||||
q_nr = norm16(op1);
|
||||
mantissa_nr = (UWORD16)op1 << (q_nr);
|
||||
q_dr = norm16(op2);
|
||||
mantissa_dr = (UWORD16)op2 << (q_dr);
|
||||
*q_format = (WORD16)(14 + q_nr - q_dr);
|
||||
|
||||
for (i = 0; i < 15; i++) {
|
||||
quotient = quotient << 1;
|
||||
|
||||
if (mantissa_nr >= mantissa_dr) {
|
||||
mantissa_nr = mantissa_nr - mantissa_dr;
|
||||
quotient += 1;
|
||||
}
|
||||
|
||||
mantissa_nr = (UWORD32)mantissa_nr << 1;
|
||||
}
|
||||
|
||||
if (sign < 0) {
|
||||
quotient = -quotient;
|
||||
}
|
||||
|
||||
return (WORD16)quotient;
|
||||
}
|
||||
|
||||
// multiply 2 16 bit variables, add 31 to 16 bits to acc
|
||||
static PLATFORM_INLINE WORD16 mac16(WORD16 c, WORD16 op1, WORD16 op2) {
|
||||
WORD16 var_out;
|
||||
|
||||
var_out = ixheaacd_mult16(op1, op2);
|
||||
var_out = ixheaacd_add16(c, var_out);
|
||||
return (var_out);
|
||||
}
|
||||
|
||||
// multiply 2 16 bit variables, add 31 to 16 bits to acc with saturation
|
||||
static PLATFORM_INLINE WORD16 mac16_sat(WORD16 c, WORD16 op1, WORD16 op2) {
|
||||
WORD16 var_out;
|
||||
|
||||
var_out = ixheaacd_mult16(op1, op2);
|
||||
var_out = ixheaacd_add16_sat(c, var_out);
|
||||
return (var_out);
|
||||
}
|
||||
|
||||
// multiply 2 16 bit variables, add 30 to 15 bits to acc
|
||||
static PLATFORM_INLINE WORD16 mac16_shl(WORD16 c, WORD16 op1, WORD16 op2) {
|
||||
WORD16 var_out;
|
||||
|
||||
var_out = ixheaacd_mult16_shl(op1, op2);
|
||||
var_out = ixheaacd_add16(c, var_out);
|
||||
return (var_out);
|
||||
}
|
||||
|
||||
// multiply 2 16 bit variables, add 30 to 15 bits to acc with saturation
|
||||
static PLATFORM_INLINE WORD16 mac16_shl_sat(WORD16 c, WORD16 op1, WORD16 op2) {
|
||||
WORD16 var_out;
|
||||
WORD32 temp;
|
||||
|
||||
temp = ((WORD32)op1 * (WORD32)op2) >> 15;
|
||||
temp += c;
|
||||
var_out = ixheaacd_sat16(temp);
|
||||
return (var_out);
|
||||
}
|
||||
|
||||
// rounds a 32 bit variable to a 16 bit variable with saturation
|
||||
static PLATFORM_INLINE WORD16 ixheaacd_round16(WORD32 op1) {
|
||||
WORD16 var_out;
|
||||
|
||||
var_out = (WORD16)(ixheaacd_add32_sat(op1, 0x8000) >> 16);
|
||||
return (var_out);
|
||||
}
|
||||
#endif
|
||||
598
decoder/armv8/ixheaacd_basic_ops32.h
Normal file
598
decoder/armv8/ixheaacd_basic_ops32.h
Normal file
|
|
@ -0,0 +1,598 @@
|
|||
/******************************************************************************
|
||||
* *
|
||||
* Copyright (C) 2018 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
#ifndef IXHEAACD_BASIC_OPS32_H
|
||||
#define IXHEAACD_BASIC_OPS32_H
|
||||
|
||||
#if 0
|
||||
//returns the minima of 2 32 bit variables
|
||||
static PLATFORM_INLINE WORD32 ixheaacd_min32(WORD32 a, WORD32 b)
|
||||
{
|
||||
WORD32 min_val;
|
||||
asm (
|
||||
"CMP %w[a], %w[b]\n\t"
|
||||
"CSEL %w[min_val], %w[b], %w[a], GT\n"
|
||||
: [min_val] "=r" (min_val), [a] "+r" (a)
|
||||
: [b] "r" (b)
|
||||
: "cc"
|
||||
);
|
||||
return (min_val);
|
||||
}
|
||||
|
||||
//returns the maxima of 2 32 bit variables
|
||||
static PLATFORM_INLINE WORD32 ixheaacd_max32(WORD32 a, WORD32 b)
|
||||
{
|
||||
WORD32 max_val;
|
||||
asm (
|
||||
"CMP %w[a], %w[b]\n"
|
||||
"CSEL %w[max_val], %w[b], %w[a], LT\n"
|
||||
: [max_val] "=r" (max_val), [a] "+r" (a)
|
||||
: [b] "r" (b)
|
||||
: "cc"
|
||||
);
|
||||
return (max_val);
|
||||
}
|
||||
#else
|
||||
static PLATFORM_INLINE WORD32 ixheaacd_min32(WORD32 a, WORD32 b) {
|
||||
WORD32 min_val;
|
||||
|
||||
min_val = (a < b) ? a : b;
|
||||
|
||||
return min_val;
|
||||
}
|
||||
|
||||
// returns the maxima of 2 32 bit variables
|
||||
static PLATFORM_INLINE WORD32 ixheaacd_max32(WORD32 a, WORD32 b) {
|
||||
WORD32 max_val;
|
||||
|
||||
max_val = (a > b) ? a : b;
|
||||
|
||||
return max_val;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
// shifts a 32-bit value left by specificed bits
|
||||
static PLATFORM_INLINE WORD32 ixheaacd_shl32(WORD32 a, WORD b) {
|
||||
WORD32 out_val;
|
||||
|
||||
b = ((UWORD32)(b << 24) >> 24); /* Mod 8 */
|
||||
if (b > 31)
|
||||
out_val = 0;
|
||||
else
|
||||
out_val = (WORD32)a << b;
|
||||
|
||||
return out_val;
|
||||
}
|
||||
|
||||
// shifts a 32-bit value right by specificed bits
|
||||
static PLATFORM_INLINE WORD32 ixheaacd_shr32(WORD32 a, WORD b) {
|
||||
WORD32 out_val;
|
||||
|
||||
b = ((UWORD32)(b << 24) >> 24); /* Mod 8 */
|
||||
if (b >= 31) {
|
||||
if (a < 0)
|
||||
out_val = -1;
|
||||
else
|
||||
out_val = 0;
|
||||
} else {
|
||||
out_val = (WORD32)a >> b;
|
||||
}
|
||||
|
||||
return out_val;
|
||||
}
|
||||
|
||||
// shifts a 32-bit value left by specificed bits and saturates it to 32 bits
|
||||
static PLATFORM_INLINE WORD32 ixheaacd_shl32_sat(WORD32 a, WORD b) {
|
||||
WORD32 out_val = a;
|
||||
for (; b > 0; b--) {
|
||||
if (a > (WORD32)0X3fffffffL) {
|
||||
out_val = MAX_32;
|
||||
break;
|
||||
} else if (a < (WORD32)0xc0000000L) {
|
||||
out_val = MIN_32;
|
||||
break;
|
||||
}
|
||||
|
||||
a = ixheaacd_shl32(a, 1);
|
||||
out_val = a;
|
||||
}
|
||||
return (out_val);
|
||||
}
|
||||
|
||||
// shifts a 32-bit value left by specificed bits, shifts
|
||||
// it right if specified no. of bits is negative
|
||||
|
||||
static PLATFORM_INLINE WORD32 ixheaacd_shl32_dir(WORD32 a, WORD b) {
|
||||
WORD32 out_val;
|
||||
|
||||
if (b < 0) {
|
||||
out_val = ixheaacd_shr32(a, -b);
|
||||
} else {
|
||||
out_val = ixheaacd_shl32(a, b);
|
||||
}
|
||||
|
||||
return out_val;
|
||||
}
|
||||
|
||||
// shifts a 32-bit value left by specificed bits with sat,
|
||||
// shifts it right if specified no. of bits is negative
|
||||
|
||||
static PLATFORM_INLINE WORD32 ixheaacd_shl32_dir_sat(WORD32 a, WORD b) {
|
||||
WORD32 out_val;
|
||||
|
||||
if (b < 0) {
|
||||
out_val = ixheaacd_shr32(a, -b);
|
||||
} else {
|
||||
out_val = ixheaacd_shl32_sat(a, b);
|
||||
}
|
||||
|
||||
return out_val;
|
||||
}
|
||||
|
||||
// shifts a 32-bit value right by specificed bits, shifts
|
||||
// it left if specified no. of bits is negative
|
||||
static PLATFORM_INLINE WORD32 ixheaacd_shr32_dir(WORD32 a, WORD b) {
|
||||
WORD32 out_val;
|
||||
|
||||
if (b < 0) {
|
||||
out_val = ixheaacd_shl32(a, -b);
|
||||
} else {
|
||||
out_val = ixheaacd_shr32(a, b);
|
||||
}
|
||||
|
||||
return out_val;
|
||||
}
|
||||
|
||||
// shifts a 32-bit value right by specificed bits, shifts
|
||||
// it left with sat if specified no. of bits is negative
|
||||
static PLATFORM_INLINE WORD32 shr32_dir_sat(WORD32 a, WORD b) {
|
||||
WORD32 out_val;
|
||||
|
||||
if (b < 0) {
|
||||
out_val = ixheaacd_shl32_sat(a, -b);
|
||||
} else {
|
||||
out_val = ixheaacd_shr32(a, b);
|
||||
}
|
||||
|
||||
return out_val;
|
||||
}
|
||||
|
||||
// multiplies two 16 bit numbers and returns their 32-bit result
|
||||
static PLATFORM_INLINE WORD32 ixheaacd_mult16x16in32(WORD16 a, WORD16 b) {
|
||||
WORD32 product;
|
||||
|
||||
product = (WORD32)a * (WORD32)b;
|
||||
|
||||
return product;
|
||||
}
|
||||
|
||||
// multiplies two 32 bit numbers considering their last
|
||||
// 16 bits and returns their 32-bit result
|
||||
static PLATFORM_INLINE WORD32 mult16x16in32_32(WORD32 a, WORD32 b) {
|
||||
WORD32 product;
|
||||
asm("AND %w[a], %w[a], #0x0000FFFF\n"
|
||||
"SXTH %w[a], %w[a]\n"
|
||||
"AND %w[b], %w[b], #0x0000FFFF\n"
|
||||
"SXTH %w[b], %w[b]\n"
|
||||
"MUL %w[product], %w[a], %w[b]\n"
|
||||
: [product] "=r"(product)
|
||||
: [b] "r"(b), [a] "r"(a));
|
||||
return product;
|
||||
}
|
||||
|
||||
// multiplies two 16 bit numbers and returns their 32-bit
|
||||
// result after removing 1 redundant sign bit
|
||||
static PLATFORM_INLINE WORD32 ixheaacd_mult16x16in32_shl(WORD16 a, WORD16 b) {
|
||||
WORD32 product;
|
||||
|
||||
product = ixheaacd_shl32(ixheaacd_mult16x16in32(a, b), 1);
|
||||
|
||||
return product;
|
||||
}
|
||||
|
||||
// multiplies two 16 bit numbers and returns their 32-bit
|
||||
// result after removing 1 redundant sign bit with saturation
|
||||
static PLATFORM_INLINE WORD32 ixheaacd_mult16x16in32_shl_sat(WORD16 a,
|
||||
WORD16 b) {
|
||||
WORD32 product;
|
||||
product = (WORD32)a * (WORD32)b;
|
||||
if (product != (WORD32)0x40000000L) {
|
||||
product = ixheaacd_shl32(product, 1);
|
||||
} else {
|
||||
product = MAX_32;
|
||||
}
|
||||
return product;
|
||||
}
|
||||
|
||||
// adds 2 32 bit variables
|
||||
static PLATFORM_INLINE WORD32 ixheaacd_add32(WORD32 a, WORD32 b) {
|
||||
WORD32 sum;
|
||||
|
||||
sum = (WORD32)a + (WORD32)b;
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
||||
// subtract 2 32 bit variables
|
||||
static PLATFORM_INLINE WORD32 ixheaacd_sub32(WORD32 a, WORD32 b) {
|
||||
WORD32 diff;
|
||||
|
||||
diff = (WORD32)a - (WORD32)b;
|
||||
|
||||
return diff;
|
||||
}
|
||||
|
||||
// adds 2 32 bit variables with saturation
|
||||
static PLATFORM_INLINE WORD32 ixheaacd_add32_sat(WORD32 a, WORD32 b) {
|
||||
WORD32 sum;
|
||||
|
||||
sum = ixheaacd_add32(a, b);
|
||||
|
||||
if ((((WORD32)a ^ (WORD32)b) & (WORD32)MIN_32) == 0) {
|
||||
if (((WORD32)sum ^ (WORD32)a) & (WORD32)MIN_32) {
|
||||
sum = (a < 0) ? MIN_32 : MAX_32;
|
||||
}
|
||||
}
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
||||
// subtract 2 32 bit variables
|
||||
static PLATFORM_INLINE WORD32 ixheaacd_sub32_sat(WORD32 a, WORD32 b) {
|
||||
WORD32 diff;
|
||||
|
||||
diff = ixheaacd_sub32(a, b);
|
||||
|
||||
if ((((WORD32)a ^ (WORD32)b) & (WORD32)MIN_32) != 0) {
|
||||
if (((WORD32)diff ^ (WORD32)a) & (WORD32)MIN_32) {
|
||||
diff = (a < 0L) ? MIN_32 : MAX_32;
|
||||
}
|
||||
}
|
||||
|
||||
return (diff);
|
||||
}
|
||||
|
||||
// returns number of redundant sign bits in a 32-bit value.
|
||||
// return zero for a value of zero
|
||||
static PLATFORM_INLINE WORD ixheaacd_norm32(WORD32 a) {
|
||||
#if 1
|
||||
WORD norm_val;
|
||||
|
||||
if (a == 0) {
|
||||
norm_val = 31;
|
||||
} else {
|
||||
if (a == (WORD32)0xffffffffL) {
|
||||
norm_val = 31;
|
||||
} else {
|
||||
if (a < 0) {
|
||||
a = ~a;
|
||||
}
|
||||
for (norm_val = 0; a < (WORD32)0x40000000L; norm_val++) {
|
||||
a <<= 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
#else
|
||||
WORD32 norm_val, temp;
|
||||
asm("ASR %w[temp], %w[a], #31\n"
|
||||
"EOR %w[norm_val], %w[a], %w[temp]\n"
|
||||
"CLZ %w[norm_val], %w[norm_val]\n"
|
||||
"SUB %w[norm_val], %w[norm_val], #1\n"
|
||||
: [norm_val] "=r"(norm_val), [temp] "+r"(temp)
|
||||
: [a] "r"(a)
|
||||
: "cc");
|
||||
#endif
|
||||
return norm_val;
|
||||
}
|
||||
|
||||
static PLATFORM_INLINE WORD ixheaacd_pnorm32(WORD32 a) {
|
||||
WORD32 norm_val;
|
||||
asm("CLZ %w[norm_val], %w[a]\n"
|
||||
"SUB %w[norm_val], %w[norm_val], #1\n"
|
||||
: [norm_val] "=r"(norm_val)
|
||||
: [a] "r"(a));
|
||||
return norm_val;
|
||||
}
|
||||
|
||||
// returns the position of the most significant bit for negative numbers.
|
||||
// ignores leading zeros to determine the position of most significant bit.
|
||||
static PLATFORM_INLINE WORD bin_expo32(WORD32 a) {
|
||||
WORD bin_expo_val;
|
||||
|
||||
bin_expo_val = 31 - ixheaacd_norm32(a);
|
||||
|
||||
return bin_expo_val;
|
||||
}
|
||||
|
||||
// returns the absolute value of 32-bit number
|
||||
static PLATFORM_INLINE WORD32 ixheaacd_abs32(WORD32 a) {
|
||||
WORD32 abs_val;
|
||||
asm("CMP %w[a], #0\n"
|
||||
"NEG %w[abs_val], %w[a]\n"
|
||||
"CSEL %w[abs_val], %w[abs_val], %w[a], LT\n"
|
||||
: [abs_val] "=r"(abs_val), [a] "+r"(a)
|
||||
:
|
||||
: "cc");
|
||||
return (abs_val);
|
||||
}
|
||||
|
||||
// returns the absolute value of 32-bit number
|
||||
static PLATFORM_INLINE WORD32 ixheaacd_abs32_nrm(WORD32 a) {
|
||||
WORD32 abs_val, temp;
|
||||
asm("ASR %w[temp], %w[a], #31\n"
|
||||
"EOR %w[abs_val], %w[a], %w[temp]\n"
|
||||
: [abs_val] "=r"(abs_val), [temp] "+r"(temp)
|
||||
: [a] "r"(a)
|
||||
: "cc");
|
||||
return abs_val;
|
||||
}
|
||||
|
||||
#if 0
|
||||
//returns the absolute value of 32-bit number with saturation
|
||||
static PLATFORM_INLINE WORD32 ixheaacd_abs32_sat(WORD32 a)
|
||||
{
|
||||
WORD32 abs_val,temp;
|
||||
asm (
|
||||
"ADDS %w[abs_val], %w[a], #0\n"
|
||||
"NEG %w[temp], %w[abs_val]\n"
|
||||
"CSEL %w[abs_val], %w[temp], %w[a], MI\n"
|
||||
"CMP %w[abs_val], #0\n"
|
||||
"MOV %w[temp], #2147483647\n"
|
||||
"CSEL %w[abs_val], %w[temp], %w[abs_val], LT\n"
|
||||
: [abs_val] "=r" (abs_val), [temp] "+r" (temp)
|
||||
: [a] "r" (a)
|
||||
: "cc"
|
||||
);
|
||||
return abs_val;
|
||||
}
|
||||
|
||||
//returns the negated value of 32-bit number
|
||||
static PLATFORM_INLINE WORD32 ixheaacd_negate32(WORD32 a)
|
||||
{
|
||||
WORD32 neg_val;
|
||||
asm (
|
||||
"NEG %w[neg_val], %w[a]\n"
|
||||
: [neg_val] "=r" (neg_val)
|
||||
: [a] "r" (a)
|
||||
);
|
||||
return neg_val;
|
||||
}
|
||||
|
||||
//returns the negated value of 32-bit number with saturation
|
||||
static PLATFORM_INLINE WORD32 ixheaacd_negate32_sat(WORD32 a)
|
||||
{
|
||||
WORD32 neg_val,temp;
|
||||
asm (
|
||||
"NEGS %w[neg_val], %w[a]\n"
|
||||
"MOV %w[temp], #0x7FFFFFFF\n"
|
||||
"CSEL %w[neg_val], %w[temp], %w[neg_val], VS\n"
|
||||
: [neg_val] "=r" (neg_val), [temp] "+r" (temp)
|
||||
: [a] "r" (a)
|
||||
:"cc"
|
||||
);
|
||||
return neg_val;
|
||||
}
|
||||
#else
|
||||
|
||||
static PLATFORM_INLINE WORD32 ixheaacd_abs32_sat(WORD32 a) {
|
||||
WORD32 abs_val;
|
||||
|
||||
abs_val = a;
|
||||
|
||||
if (a == MIN_32) {
|
||||
abs_val = MAX_32;
|
||||
} else if (a < 0) {
|
||||
abs_val = -a;
|
||||
}
|
||||
|
||||
return abs_val;
|
||||
}
|
||||
|
||||
static PLATFORM_INLINE WORD32 ixheaacd_negate32(WORD32 a) {
|
||||
WORD32 neg_val;
|
||||
|
||||
neg_val = -a;
|
||||
|
||||
return neg_val;
|
||||
}
|
||||
static PLATFORM_INLINE WORD32 ixheaacd_negate32_sat(WORD32 a) {
|
||||
WORD32 neg_val;
|
||||
|
||||
neg_val = -a;
|
||||
if (a == MIN_32) {
|
||||
neg_val = MAX_32;
|
||||
}
|
||||
|
||||
return neg_val;
|
||||
}
|
||||
#endif
|
||||
|
||||
// divides 2 32 bit variables and returns the quotient
|
||||
static PLATFORM_INLINE WORD32 div32(WORD32 a, WORD32 b, WORD *q_format) {
|
||||
WORD32 quotient;
|
||||
UWORD32 mantissa_nr, mantissa_dr;
|
||||
WORD16 sign = 0;
|
||||
|
||||
LOOPINDEX i;
|
||||
WORD q_nr, q_dr;
|
||||
|
||||
mantissa_nr = a;
|
||||
mantissa_dr = b;
|
||||
quotient = 0;
|
||||
|
||||
if ((a < 0) && (0 != b)) {
|
||||
a = -a;
|
||||
sign = (WORD16)(sign ^ -1);
|
||||
}
|
||||
|
||||
if (b < 0) {
|
||||
b = -b;
|
||||
sign = (WORD16)(sign ^ -1);
|
||||
}
|
||||
|
||||
if (0 == b) {
|
||||
*q_format = 0;
|
||||
return (a);
|
||||
}
|
||||
|
||||
quotient = 0;
|
||||
|
||||
q_nr = ixheaacd_norm32(a);
|
||||
mantissa_nr = (UWORD32)a << (q_nr);
|
||||
q_dr = ixheaacd_norm32(b);
|
||||
mantissa_dr = (UWORD32)b << (q_dr);
|
||||
*q_format = (WORD)(30 + q_nr - q_dr);
|
||||
|
||||
for (i = 0; i < 31; i++) {
|
||||
quotient = quotient << 1;
|
||||
|
||||
if (mantissa_nr >= mantissa_dr) {
|
||||
mantissa_nr = mantissa_nr - mantissa_dr;
|
||||
quotient += 1;
|
||||
}
|
||||
|
||||
mantissa_nr = (UWORD32)mantissa_nr << 1;
|
||||
}
|
||||
|
||||
if (sign < 0) {
|
||||
quotient = -quotient;
|
||||
}
|
||||
|
||||
return quotient;
|
||||
}
|
||||
|
||||
// multiplies two 16 bit numbers and accumulates their result in a 32 bit
|
||||
// variable
|
||||
static PLATFORM_INLINE WORD32 ixheaacd_mac16x16in32(WORD32 a, WORD16 b,
|
||||
WORD16 c) {
|
||||
WORD32 acc;
|
||||
|
||||
acc = ixheaacd_mult16x16in32(b, c);
|
||||
|
||||
acc = ixheaacd_add32(a, acc);
|
||||
|
||||
return acc;
|
||||
}
|
||||
|
||||
// multiplies lower 16 bit of one data with upper 16 bit of
|
||||
// other and accumulates their result in a 32 bit variable
|
||||
static PLATFORM_INLINE WORD32 mac16x16hin32(WORD32 a, WORD32 b, WORD32 c) {
|
||||
WORD32 acc;
|
||||
|
||||
acc = ixheaacd_mult16x16in32((WORD16)b, (WORD16)(c >> 16));
|
||||
|
||||
acc = ixheaacd_add32(a, acc);
|
||||
|
||||
return acc;
|
||||
}
|
||||
|
||||
// multiplies two 16 bit numbers and accumulates their result in a 32 bit
|
||||
// variable
|
||||
static PLATFORM_INLINE WORD32 ixheaacd_mac16x16in32_shl(WORD32 a, WORD16 b,
|
||||
WORD16 c) {
|
||||
WORD32 acc;
|
||||
|
||||
acc = ixheaacd_mult16x16in32_shl(b, c);
|
||||
|
||||
acc = ixheaacd_add32(a, acc);
|
||||
|
||||
return acc;
|
||||
}
|
||||
|
||||
// multiplies two 16 bit numbers and accumulates their
|
||||
// result in a 32 bit variable with saturation
|
||||
|
||||
static PLATFORM_INLINE WORD32 ixheaacd_mac16x16in32_shl_sat(WORD32 a, WORD16 b,
|
||||
WORD16 c) {
|
||||
WORD32 acc;
|
||||
|
||||
acc = ixheaacd_mult16x16in32_shl_sat(b, c);
|
||||
|
||||
acc = ixheaacd_add32_sat(a, acc);
|
||||
|
||||
return acc;
|
||||
}
|
||||
|
||||
// multiplies two 16 bit numbers and subtracts their
|
||||
// result from a 32 bit variable
|
||||
static PLATFORM_INLINE WORD32 msu16x16in32(WORD32 a, WORD16 b, WORD16 c) {
|
||||
WORD32 acc;
|
||||
|
||||
acc = ixheaacd_mult16x16in32(b, c);
|
||||
|
||||
acc = ixheaacd_sub32(a, acc);
|
||||
|
||||
return acc;
|
||||
}
|
||||
|
||||
// multiplies two 16 bit numbers and subtracts their
|
||||
// result from a 32 bit variable after removing a redundant sign bit in the
|
||||
// product
|
||||
static PLATFORM_INLINE WORD32 msu16x16in32_shl(WORD32 a, WORD16 b, WORD16 c) {
|
||||
WORD32 acc;
|
||||
|
||||
acc = ixheaacd_mult16x16in32_shl(b, c);
|
||||
|
||||
acc = ixheaacd_sub32(a, acc);
|
||||
|
||||
return acc;
|
||||
}
|
||||
|
||||
// multiplies two 16 bit numbers and subtracts their
|
||||
// result from a 32 bit variable with saturation
|
||||
// after removing a redundant sign bit in the product
|
||||
static PLATFORM_INLINE WORD32 msu16x16in32_shl_sat(WORD32 a, WORD16 b,
|
||||
WORD16 c) {
|
||||
WORD32 acc;
|
||||
|
||||
acc = ixheaacd_mult16x16in32_shl_sat(b, c);
|
||||
|
||||
acc = ixheaacd_sub32_sat(a, acc);
|
||||
|
||||
return acc;
|
||||
}
|
||||
|
||||
// adding two 32 bit numbers and taking care of overflow
|
||||
// by downshifting both numbers before addition
|
||||
static PLATFORM_INLINE WORD32 add32_shr(WORD32 a, WORD32 b) {
|
||||
WORD32 sum;
|
||||
|
||||
a = ixheaacd_shr32(a, 1);
|
||||
b = ixheaacd_shr32(b, 1);
|
||||
|
||||
sum = ixheaacd_add32(a, b);
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
||||
// subtracting two 32 bit numbers and taking care of
|
||||
// overflow by downshifting both numbers before addition
|
||||
|
||||
static PLATFORM_INLINE WORD32 sub32_shr(WORD32 a, WORD32 b) {
|
||||
WORD32 diff;
|
||||
|
||||
a = ixheaacd_shr32(a, 1);
|
||||
b = ixheaacd_shr32(b, 1);
|
||||
|
||||
diff = ixheaacd_sub32(a, b);
|
||||
|
||||
return diff;
|
||||
}
|
||||
#endif
|
||||
439
decoder/armv8/ixheaacd_basic_ops40.h
Normal file
439
decoder/armv8/ixheaacd_basic_ops40.h
Normal file
|
|
@ -0,0 +1,439 @@
|
|||
/******************************************************************************
|
||||
* *
|
||||
* Copyright (C) 2018 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
#ifndef IXHEAACD_BASIC_OPS40_H
|
||||
#define IXHEAACD_BASIC_OPS40_H
|
||||
|
||||
#define hi64(a) ((WORD32)((a & (WORD64)0xFFFFFFFF00000000) >> 32))
|
||||
#define lo64(a) ((UWORD32)(a))
|
||||
|
||||
// normalize input to 32 bits, return denormalizing info
|
||||
static PLATFORM_INLINE WORD16 norm40(WORD40 *in) {
|
||||
WORD16 expo;
|
||||
WORD32 tempo;
|
||||
|
||||
if (0 == (*in)) return 31;
|
||||
|
||||
if (((*in) <= 0x7fffffff) && ((WORD40)(*in) >= (WORD40)0xFFFFFFFF80000000)) {
|
||||
tempo = (WORD32)(*in);
|
||||
expo = ixheaacd_norm32(tempo);
|
||||
*in = tempo << expo;
|
||||
|
||||
return (expo);
|
||||
}
|
||||
|
||||
tempo = (WORD32)((*in) >> 31);
|
||||
expo = 31 - (ixheaacd_norm32(tempo));
|
||||
*in = (*in) >> expo;
|
||||
|
||||
return (-expo);
|
||||
}
|
||||
|
||||
// adds two numbers and right shifts by 1
|
||||
static PLATFORM_INLINE WORD32 add32_shr40(WORD32 a, WORD32 b) {
|
||||
WORD40 sum;
|
||||
|
||||
sum = (WORD40)a + (WORD40)b;
|
||||
sum = sum >> 1;
|
||||
|
||||
return ((WORD32)sum);
|
||||
}
|
||||
|
||||
// subtracts and right shifts by one
|
||||
static PLATFORM_INLINE WORD32 sub32_shr40(WORD32 a, WORD32 b) {
|
||||
WORD40 sum;
|
||||
|
||||
sum = (WORD40)a - (WORD40)b;
|
||||
sum = sum >> 1;
|
||||
|
||||
return ((WORD32)sum);
|
||||
}
|
||||
|
||||
// multiply WORD32 with WORD16 return bits 46 to 15
|
||||
static PLATFORM_INLINE WORD32 ixheaacd_mult32x16in32_shl(WORD32 a, WORD16 b) {
|
||||
WORD32 result;
|
||||
WORD64 temp_result;
|
||||
|
||||
temp_result = (WORD64)a * (WORD64)b;
|
||||
|
||||
result = (WORD32)(temp_result >> 16);
|
||||
|
||||
return (result << 1);
|
||||
}
|
||||
|
||||
// multiply WORD32 with higher 16 bits of second data and return bits 46 to 15
|
||||
static PLATFORM_INLINE WORD32 mult32x16hin32_shl(WORD32 a, WORD32 b) {
|
||||
WORD32 product;
|
||||
WORD64 temp_product;
|
||||
|
||||
temp_product = (WORD64)a * (WORD64)(b >> 16);
|
||||
product = (WORD32)(temp_product >> 16);
|
||||
|
||||
return (product << 1);
|
||||
}
|
||||
|
||||
// multiply WORD32 with WORD16 return bits 47 to 16
|
||||
static PLATFORM_INLINE WORD32 ixheaacd_mult32x16in32(WORD32 a, WORD16 b) {
|
||||
WORD32 result;
|
||||
WORD64 temp_result;
|
||||
|
||||
temp_result = (WORD64)a * (WORD64)b;
|
||||
|
||||
result = (WORD32)(temp_result >> 16);
|
||||
|
||||
return (result);
|
||||
}
|
||||
|
||||
// multiply WORD32 with WORD16 return bits 46 to 15 with saturation
|
||||
static PLATFORM_INLINE WORD32 ixheaacd_mult32x16in32_shl_sat(WORD32 a,
|
||||
WORD16 b) {
|
||||
WORD32 result;
|
||||
|
||||
if (a == (WORD32)0x80000000 && b == (WORD16)0x8000) {
|
||||
result = (WORD32)0x7fffffff;
|
||||
} else {
|
||||
result = ixheaacd_mult32x16in32_shl(a, b);
|
||||
}
|
||||
|
||||
return (result);
|
||||
}
|
||||
|
||||
#if 0
|
||||
//multiply WORD32 with WORD32 return bits 62 to 31
|
||||
static PLATFORM_INLINE WORD32 ixheaacd_mult32_shl(WORD32 a, WORD32 b)
|
||||
{
|
||||
|
||||
WORD64 result;
|
||||
asm (
|
||||
"SMULL %[result], %w[a], %w[b]\n"
|
||||
"ASR %[result], %[result], #32\n"
|
||||
: [result] "=r" (result)
|
||||
: [a] "r" (a), [b] "r" (b)
|
||||
);
|
||||
return ((WORD32)(result << 1));
|
||||
}
|
||||
|
||||
//multiply WORD32 with WORD32 return bits 63 to 32
|
||||
static PLATFORM_INLINE WORD32 ixheaacd_mult32(WORD32 a, WORD32 b)
|
||||
{
|
||||
|
||||
WORD64 result;
|
||||
asm (
|
||||
"SMULL %[result], %w[a], %w[b]\n"
|
||||
"ASR %[result], %[result], #32\n"
|
||||
: [result] "=r" (result)
|
||||
: [a] "r" (a), [b] "r" (b)
|
||||
);
|
||||
return ((WORD32)result);
|
||||
}
|
||||
#else
|
||||
static PLATFORM_INLINE WORD32 ixheaacd_mult32_shl(WORD32 a, WORD32 b) {
|
||||
WORD32 result;
|
||||
WORD64 temp_result;
|
||||
|
||||
temp_result = (WORD64)a * (WORD64)b;
|
||||
result = (WORD32)(temp_result >> 32);
|
||||
|
||||
return (result << 1);
|
||||
}
|
||||
|
||||
static PLATFORM_INLINE WORD32 ixheaacd_mult32(WORD32 a, WORD32 b) {
|
||||
WORD32 result;
|
||||
WORD64 temp_result;
|
||||
|
||||
temp_result = (WORD64)a * (WORD64)b;
|
||||
result = (WORD32)(temp_result >> 32);
|
||||
|
||||
return (result);
|
||||
}
|
||||
#endif
|
||||
|
||||
// multiply WORD32 with WORD32 return bits 62 to 31 with saturation
|
||||
static PLATFORM_INLINE WORD32 ixheaacd_mult32_shl_sat(WORD32 a, WORD32 b) {
|
||||
WORD32 result;
|
||||
|
||||
if (a == (WORD32)0x80000000 && b == (WORD32)0x80000000) {
|
||||
result = 0x7fffffff;
|
||||
} else {
|
||||
result = ixheaacd_mult32_shl(a, b);
|
||||
}
|
||||
|
||||
return (result);
|
||||
}
|
||||
|
||||
// multiply WORD32 with WORD16 add bits 47 to 16 to accumulator
|
||||
static PLATFORM_INLINE WORD32 ixheaacd_mac32x16in32(WORD32 a, WORD32 b,
|
||||
WORD16 c) {
|
||||
WORD32 result;
|
||||
|
||||
result = a + ixheaacd_mult32x16in32(b, c);
|
||||
|
||||
return (result);
|
||||
}
|
||||
|
||||
// multiply WORD32 with WORD16 add bits 46 to 15 to accumulator
|
||||
static PLATFORM_INLINE WORD32 ixheaacd_mac32x16in32_shl(WORD32 a, WORD32 b,
|
||||
WORD16 c) {
|
||||
WORD32 result;
|
||||
|
||||
result = a + ixheaacd_mult32x16in32_shl(b, c);
|
||||
|
||||
return (result);
|
||||
}
|
||||
|
||||
// multiply WORD32 with WORD16 add bits 46 to 15 to accumulator with saturation
|
||||
static PLATFORM_INLINE WORD32 mac32x16in32_shl_sat(WORD32 a, WORD32 b,
|
||||
WORD16 c) {
|
||||
return (ixheaacd_add32_sat(a, ixheaacd_mult32x16in32_shl_sat(b, c)));
|
||||
}
|
||||
|
||||
// multiply WORD32 with WORD32 add bits 63 to 32 to accumulator
|
||||
static PLATFORM_INLINE WORD32 ixheaacd_mac32(WORD32 a, WORD32 b, WORD32 c) {
|
||||
WORD32 result;
|
||||
|
||||
result = a + ixheaacd_mult32(b, c);
|
||||
|
||||
return (result);
|
||||
}
|
||||
|
||||
// multiply WORD32 with WORD32 add bits 62 to 31 to accumulator
|
||||
static PLATFORM_INLINE WORD32 mac32_shl(WORD32 a, WORD32 b, WORD32 c) {
|
||||
WORD32 result;
|
||||
|
||||
result = a + ixheaacd_mult32_shl(b, c);
|
||||
|
||||
return (result);
|
||||
}
|
||||
|
||||
// multiply WORD32 with WORD32 add bits 62 to 31 to accumulator with saturation
|
||||
static PLATFORM_INLINE WORD32 mac32_shl_sat(WORD32 a, WORD32 b, WORD32 c) {
|
||||
return (ixheaacd_add32_sat(a, ixheaacd_mult32_shl_sat(b, c)));
|
||||
}
|
||||
|
||||
// multiply WORD32 with WORD16 sub bits 47 to 16 from accumulator
|
||||
static PLATFORM_INLINE WORD32 msu32x16in32(WORD32 a, WORD32 b, WORD16 c) {
|
||||
WORD32 result;
|
||||
|
||||
result = a - ixheaacd_mult32x16in32(b, c);
|
||||
|
||||
return (result);
|
||||
}
|
||||
|
||||
// multiply WORD32 with WORD16 sub bits 46 to 15 from accumulator
|
||||
static PLATFORM_INLINE WORD32 msu32x16in32_shl(WORD32 a, WORD32 b, WORD16 c) {
|
||||
WORD32 result;
|
||||
|
||||
result = a - ixheaacd_mult32x16in32_shl(b, c);
|
||||
|
||||
return (result);
|
||||
}
|
||||
|
||||
// multiply WORD32 with WORD16 sub bits 46 to 15 from accumulator with
|
||||
// saturation
|
||||
static PLATFORM_INLINE WORD32 msu32x16in32_shl_sat(WORD32 a, WORD32 b,
|
||||
WORD16 c) {
|
||||
return (ixheaacd_sub32_sat(a, ixheaacd_mult32x16in32_shl_sat(b, c)));
|
||||
}
|
||||
|
||||
// multiply WORD32 with WORD32 sub bits 63 to 32 from accumulator
|
||||
static PLATFORM_INLINE WORD32 msu32(WORD32 a, WORD32 b, WORD32 c) {
|
||||
WORD32 result;
|
||||
|
||||
result = a - ixheaacd_mult32(b, c);
|
||||
|
||||
return (result);
|
||||
}
|
||||
|
||||
// multiply WORD32 with WORD32 sub bits 62 to 31 from accumulator
|
||||
static PLATFORM_INLINE WORD32 msu32_shl(WORD32 a, WORD32 b, WORD32 c) {
|
||||
WORD32 result;
|
||||
|
||||
result = a - ixheaacd_mult32_shl(b, c);
|
||||
|
||||
return (result);
|
||||
}
|
||||
|
||||
// multiply WORD32 with WORD32 sub bits 62 to 31 from accumulator with
|
||||
// saturation
|
||||
static PLATFORM_INLINE WORD32 msu32_shl_sat(WORD32 a, WORD32 b, WORD32 c) {
|
||||
return (ixheaacd_sub32_sat(a, ixheaacd_mult32_shl_sat(b, c)));
|
||||
}
|
||||
|
||||
// returns normalized 32 bit accumulated result
|
||||
static PLATFORM_INLINE WORD32 mac3216_arr40(WORD32 *x, WORD16 *y,
|
||||
LOOPINDEX length, WORD16 *q_val) {
|
||||
LOOPINDEX i;
|
||||
WORD40 sum = 0;
|
||||
|
||||
for (i = 0; i < length; i++) {
|
||||
sum += (WORD40)(ixheaacd_mult32x16in32(x[i], y[i]));
|
||||
}
|
||||
|
||||
*q_val = norm40(&sum);
|
||||
|
||||
return (WORD32)sum;
|
||||
}
|
||||
|
||||
// returns normalized 32 bit accumulated result
|
||||
static PLATFORM_INLINE WORD32 mac32_arr40(WORD32 *x, WORD32 *y,
|
||||
LOOPINDEX length, WORD16 *q_val) {
|
||||
LOOPINDEX i;
|
||||
WORD40 sum = 0;
|
||||
|
||||
for (i = 0; i < length; i++) {
|
||||
sum += (WORD40)(ixheaacd_mult32(x[i], y[i]));
|
||||
}
|
||||
|
||||
*q_val = norm40(&sum);
|
||||
|
||||
return ((WORD32)sum);
|
||||
}
|
||||
|
||||
// returns normalized 32 bit accumulated result
|
||||
static PLATFORM_INLINE WORD32 mac16_arr40(WORD16 *x, WORD16 *y,
|
||||
LOOPINDEX length, WORD16 *q_val) {
|
||||
LOOPINDEX i;
|
||||
WORD40 sum = 0;
|
||||
|
||||
for (i = 0; i < length; i++) {
|
||||
sum += (WORD40)((WORD32)x[i] * (WORD32)y[i]);
|
||||
}
|
||||
|
||||
*q_val = norm40(&sum);
|
||||
|
||||
return ((WORD32)sum);
|
||||
}
|
||||
|
||||
// returns normalized 32 bit accumulated result
|
||||
static PLATFORM_INLINE WORD32 add32_arr40(WORD32 *in_arr, LOOPINDEX length,
|
||||
WORD16 *q_val) {
|
||||
LOOPINDEX i;
|
||||
WORD40 sum = 0;
|
||||
|
||||
for (i = 0; i < length; i++) {
|
||||
sum += (WORD40)in_arr[i];
|
||||
}
|
||||
|
||||
*q_val = norm40(&sum);
|
||||
|
||||
return ((WORD32)sum);
|
||||
}
|
||||
|
||||
#if 0
|
||||
//multiply WORD32 with WORD32 return WORD64
|
||||
static PLATFORM_INLINE WORD64 ixheaacd_mult32x32in64(WORD32 a, WORD32 b)
|
||||
{
|
||||
WORD64 result;
|
||||
|
||||
asm (
|
||||
"SMULL %[result], %w[a], %w[b]\n"
|
||||
: [result] "=r" (result)
|
||||
: [a] "r" (a), [b] "r" (b)
|
||||
);
|
||||
return (result);
|
||||
}
|
||||
#else
|
||||
static PLATFORM_INLINE WORD64 ixheaacd_mult32x32in64(WORD32 a, WORD32 b) {
|
||||
WORD64 result;
|
||||
|
||||
result = (WORD64)a * (WORD64)b;
|
||||
|
||||
return (result);
|
||||
}
|
||||
#endif
|
||||
|
||||
// multiply WORD32 with WORD32 and accumulate the 64 bit result
|
||||
static PLATFORM_INLINE WORD64 ixheaacd_mac32x32in64(WORD64 sum, WORD32 a,
|
||||
WORD32 b) {
|
||||
sum += (WORD64)a * (WORD64)b;
|
||||
|
||||
return (sum);
|
||||
}
|
||||
|
||||
static PLATFORM_INLINE WORD64 ixheaacd_mac32x32in64_7(WORD64 sum,
|
||||
const WORD32 *a,
|
||||
const WORD16 *b) {
|
||||
sum = (WORD64)a[0] * (WORD64)b[0];
|
||||
sum += (WORD64)a[1] * (WORD64)b[1];
|
||||
sum += (WORD64)a[2] * (WORD64)b[2];
|
||||
sum += (WORD64)a[3] * (WORD64)b[3];
|
||||
sum += (WORD64)a[4] * (WORD64)b[4];
|
||||
sum += (WORD64)a[5] * (WORD64)b[5];
|
||||
sum += (WORD64)a[6] * (WORD64)b[6];
|
||||
|
||||
return (sum);
|
||||
}
|
||||
|
||||
static PLATFORM_INLINE WORD64 ixheaacd_mac32x32in64_n(WORD64 sum,
|
||||
const WORD32 *a,
|
||||
const WORD16 *b,
|
||||
WORD32 n) {
|
||||
WORD32 k;
|
||||
|
||||
sum += (WORD64)a[0] * (WORD64)b[0];
|
||||
for (k = 1; k < n; k++) sum += (WORD64)a[k] * (WORD64)b[k];
|
||||
return (sum);
|
||||
}
|
||||
|
||||
static PLATFORM_INLINE WORD64 ixheaacd_mult64(WORD32 a, WORD32 b) {
|
||||
WORD64 result;
|
||||
result = (WORD64)a * (WORD64)b;
|
||||
return (result);
|
||||
}
|
||||
|
||||
static PLATFORM_INLINE WORD64 ixheaacd_add64(WORD64 a, WORD64 b) {
|
||||
WORD64 result;
|
||||
result = a + b;
|
||||
return (result);
|
||||
}
|
||||
|
||||
static PLATFORM_INLINE WORD64 ixheaacd_sub64(WORD64 a, WORD64 b) {
|
||||
WORD64 diff;
|
||||
|
||||
diff = (WORD64)a - (WORD64)b;
|
||||
|
||||
return diff;
|
||||
}
|
||||
|
||||
static PLATFORM_INLINE WORD64 ixheaacd_sub64_sat(WORD64 a, WORD64 b) {
|
||||
WORD64 diff;
|
||||
|
||||
diff = ixheaacd_sub64(a, b);
|
||||
|
||||
if ((((WORD64)a ^ (WORD64)b) & (WORD64)MIN_64) != 0) {
|
||||
if (((WORD64)diff ^ (WORD64)a) & (WORD64)MIN_64) {
|
||||
diff = (a < 0L) ? MIN_64 : MAX_64;
|
||||
}
|
||||
}
|
||||
|
||||
return (diff);
|
||||
}
|
||||
|
||||
static PLATFORM_INLINE WORD32 ixheaacd_mul32_sh(WORD32 a, WORD32 b,
|
||||
WORD8 shift) {
|
||||
WORD32 result;
|
||||
WORD64 temp_result;
|
||||
|
||||
temp_result = (WORD64)a * (WORD64)b;
|
||||
result = (WORD32)(temp_result >> shift);
|
||||
|
||||
return (result);
|
||||
}
|
||||
|
||||
#endif
|
||||
82
decoder/armv8/ixheaacd_calcmaxspectralline.s
Normal file
82
decoder/armv8/ixheaacd_calcmaxspectralline.s
Normal file
|
|
@ -0,0 +1,82 @@
|
|||
///******************************************************************************
|
||||
// *
|
||||
// * Copyright (C) 2018 The Android Open Source Project
|
||||
// *
|
||||
// * Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// * you may not use this file except in compliance with the License.
|
||||
// * You may obtain a copy of the License at:
|
||||
// *
|
||||
// * http://www.apache.org/licenses/LICENSE-2.0
|
||||
// *
|
||||
// * Unless required by applicable law or agreed to in writing, software
|
||||
// * distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// * See the License for the specific language governing permissions and
|
||||
// * limitations under the License.
|
||||
// *
|
||||
// *****************************************************************************
|
||||
// * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
//*/
|
||||
|
||||
|
||||
.text
|
||||
.global ixheaacd_calc_max_spectral_line_armv8
|
||||
ixheaacd_calc_max_spectral_line_armv8:
|
||||
|
||||
LSR W4, W1, #3
|
||||
LSL W6, W4, #3
|
||||
MOV w11, #0x00000000
|
||||
MOV V3.S[0], w11
|
||||
MOV V3.S[1], w11
|
||||
MOV V3.S[2], w11
|
||||
MOV V3.S[3], w11
|
||||
|
||||
LOOP_1:
|
||||
LD1 {V0.4S}, [X0], #16
|
||||
LD1 {V1.4S}, [X0], #16
|
||||
|
||||
ABS V0.4S, V0.4S
|
||||
ABS V1.4S, V1.4S
|
||||
|
||||
SUBS W4, W4, #1
|
||||
|
||||
ORR V3.16B, V0.16B, V3.16B
|
||||
ORR V3.16B, V1.16B, V3.16B
|
||||
|
||||
BGT LOOP_1
|
||||
|
||||
SUBS W7, W1, W6
|
||||
|
||||
MOV W4, V3.S[0]
|
||||
MOV W1, V3.S[1]
|
||||
MOV W2, V3.S[2]
|
||||
ORR W4, W4, W1
|
||||
MOV W3, V3.S[3]
|
||||
ORR W4, W4, W2
|
||||
ORR W4, W4, W3
|
||||
BEQ END_FUNC
|
||||
LOOP_2:
|
||||
|
||||
LDR W2, [X0], #4
|
||||
|
||||
CMP W2, #0
|
||||
|
||||
CNEG W2, W2, LE
|
||||
ORR W4, W4, W2
|
||||
SUBS W7, W7, #1
|
||||
BGT LOOP_2
|
||||
|
||||
END_FUNC:
|
||||
|
||||
MOV W0, W4
|
||||
CMP W0, #0
|
||||
|
||||
CNEG W0, W0, LE
|
||||
CLZ W0, W0
|
||||
SUB W0, W0, #1
|
||||
|
||||
RET
|
||||
|
||||
|
||||
|
||||
|
||||
231
decoder/armv8/ixheaacd_cos_sin_mod_loop1.s
Normal file
231
decoder/armv8/ixheaacd_cos_sin_mod_loop1.s
Normal file
|
|
@ -0,0 +1,231 @@
|
|||
.macro push_v_regs
|
||||
stp q8, q9, [sp, #-32]!
|
||||
stp q10, q11, [sp, #-32]!
|
||||
stp q12, q13, [sp, #-32]!
|
||||
stp q14, q15, [sp, #-32]!
|
||||
stp X8, X9, [sp, #-16]!
|
||||
stp X10, X11, [sp, #-16]!
|
||||
stp X12, X13, [sp, #-16]!
|
||||
stp X14, X15, [sp, #-16]!
|
||||
stp X16, X17, [sp, #-16]!
|
||||
stp X18, X19, [sp, #-16]!
|
||||
stp X20, X21, [sp, #-16]!
|
||||
stp X29, X30, [sp, #-16]!
|
||||
.endm
|
||||
|
||||
.macro pop_v_regs
|
||||
ldp X29, X30, [sp], #16
|
||||
ldp X20, X21, [sp], #16
|
||||
ldp X18, X19, [sp], #16
|
||||
ldp X16, X17, [sp], #16
|
||||
ldp X14, X15, [sp], #16
|
||||
ldp X12, X13, [sp], #16
|
||||
ldp X10, X11, [sp], #16
|
||||
ldp X8, X9, [sp], #16
|
||||
ldp q14, q15, [sp], #32
|
||||
ldp q12, q13, [sp], #32
|
||||
ldp q10, q11, [sp], #32
|
||||
ldp q8, q9, [sp], #32
|
||||
.endm
|
||||
|
||||
.text
|
||||
.p2align 2
|
||||
|
||||
.global ixheaacd_cos_sin_mod_loop1
|
||||
ixheaacd_cos_sin_mod_loop1:
|
||||
|
||||
// STMFD sp!, {x4-x12, x14}
|
||||
push_v_regs
|
||||
//stp x19, x20,[sp,#-16]!
|
||||
//VPUSH {D8-D11}
|
||||
//generating load addresses
|
||||
ADD x4, x0, x1, lsl #3 //psubband1
|
||||
SUB x4, x4, #4
|
||||
ADD x5, x3, x1, lsl #3 //psubband1_t
|
||||
SUB x5, x5, #8
|
||||
ASR x6, x1, #2
|
||||
|
||||
LDR w19, =0
|
||||
DUP V0.8h, w19
|
||||
LOOP1:
|
||||
//first part
|
||||
ld1 {v0.h}[0] , [x2]
|
||||
ADD x2, x2, #2
|
||||
ld1 {v0.h}[2] , [x2]
|
||||
ADD x2, x2, #2
|
||||
rev64 v1.2s, v0.2s
|
||||
ld1 {v2.s}[0], [x0]
|
||||
ADD x0, x0, #4
|
||||
ADD x7, x0, #252
|
||||
ld1 {v2.s}[1], [x7]
|
||||
ld1 {v3.s}[0], [x4]
|
||||
ADD x7, x4, #256
|
||||
ld1 {v3.s}[1], [x7]
|
||||
SUB x4, x4, #4
|
||||
|
||||
sMULL v4.2d, v0.2s, v2.2s //qsub 2nd
|
||||
sshr v4.2d, v4.2d, #16
|
||||
sMULL v6.2d, v0.2s, v3.2s //add 2nd
|
||||
sshr v6.2d, v6.2d, #16
|
||||
sMULL v8.2d, v1.2s, v2.2s //add 1st
|
||||
sshr v8.2d, v8.2d, #16
|
||||
sMULL v10.2d, v1.2s, v3.2s //qsub 1st
|
||||
sshr v10.2d, v10.2d, #16
|
||||
|
||||
add v0.4s, v8.4s , v6.4s
|
||||
SQSUB v2.4s, v10.4s , v4.4s
|
||||
|
||||
//shrn v0.2s, v0.2d,#32
|
||||
//shrn v2.2s, v2.2d,#32
|
||||
mov v3.16b, v0.16b
|
||||
mov v1.16b, v2.16b
|
||||
ST2 {v0.s, v1.s}[0], [x3]
|
||||
ADD x3, x3, #8
|
||||
ADD x7, x3, #248
|
||||
ST2 {v2.s, v3.s}[2], [x7]
|
||||
LDR w19, =0
|
||||
DUP V0.8h, w19
|
||||
//second part
|
||||
ld1 {v0.h}[0] , [x2]
|
||||
ADD x2, x2, #2
|
||||
ld1 {v0.h}[2] , [x2]
|
||||
ADD x2, x2, #2
|
||||
rev64 v1.2s, v0.2s
|
||||
ld1 {v2.s}[0], [x0]
|
||||
ADD x0, x0, #4
|
||||
ADD x7, x0, #252
|
||||
ld1 {v2.s}[1], [x7]
|
||||
ld1 {v3.s}[0], [x4]
|
||||
ADD x7, x4, #256
|
||||
ld1 {v3.s}[1], [x7]
|
||||
SUB x4, x4, #4
|
||||
|
||||
sMULL v4.2d, v0.2s, v2.2s //qsub 2nd
|
||||
sshr v4.2d, v4.2d, #16
|
||||
sMULL v6.2d, v0.2s, v3.2s //add 2nd
|
||||
sshr v6.2d, v6.2d, #16
|
||||
sMULL v8.2d, v1.2s, v2.2s //add 1st
|
||||
sshr v8.2d, v8.2d, #16
|
||||
sMULL v10.2d, v1.2s, v3.2s //qsub 1st
|
||||
sshr v10.2d, v10.2d, #16
|
||||
|
||||
ADD v0.4s, v10.4s , v4.4s
|
||||
SQSUB v2.4s, v8.4s , v6.4s
|
||||
|
||||
//shrn v0.2s, v0.2d,#32
|
||||
//shrn v2.2s, v2.2d,#32
|
||||
mov v3.16b, v0.16b
|
||||
mov v1.16b, v2.16b
|
||||
ST2 {v0.s, v1.s}[0], [x5]
|
||||
ADD x7, x5, #256
|
||||
ST2 {v2.s, v3.s}[2], [x7]
|
||||
SUB x5, x5, #8
|
||||
LDR w19, =0
|
||||
DUP V0.8h, w19
|
||||
//Third part
|
||||
ld1 {v0.h}[0] , [x2]
|
||||
ADD x2, x2, #2
|
||||
ld1 {v0.h}[2] , [x2]
|
||||
ADD x2, x2, #2
|
||||
rev64 v1.2s, v0.2s
|
||||
ld1 {v2.s}[0], [x0], #4
|
||||
ADD x7, x0, #252
|
||||
ld1 {v2.s}[1], [x7]
|
||||
ld1 {v3.s}[0], [x4]
|
||||
ADD x7, x4, #256
|
||||
ld1 {v3.s}[1], [x7]
|
||||
SUB x4, x4, #4
|
||||
|
||||
sMULL v4.2d, v0.2s, v2.2s //qsub 2nd
|
||||
sshr v4.2d, v4.2d, #16
|
||||
sMULL v6.2d, v0.2s, v3.2s //add 2nd
|
||||
sshr v6.2d, v6.2d, #16
|
||||
sMULL v8.2d, v1.2s, v2.2s //add 1st
|
||||
sshr v8.2d, v8.2d, #16
|
||||
sMULL v10.2d, v1.2s, v3.2s //qsub 1st
|
||||
sshr v10.2d, v10.2d, #16
|
||||
|
||||
add v0.4s, v8.4s , v6.4s
|
||||
SQSUB v2.4s, v10.4s , v4.4s
|
||||
|
||||
//shrn v0.2s, v0.2d,#32
|
||||
//shrn v2.2s, v2.2d,#32
|
||||
mov v3.16b, v0.16b
|
||||
mov v1.16b, v2.16b
|
||||
ST2 {v0.s, v1.s}[0], [x3]
|
||||
ADD x3, x3, #8
|
||||
ADD x7, x3, #248
|
||||
ST2 {v2.s, v3.s}[2], [x7]
|
||||
LDR w19, =0
|
||||
DUP V0.8h, w19
|
||||
//Fourth part
|
||||
ld1 {v0.h}[0] , [x2]
|
||||
ADD x2, x2, #2
|
||||
ld1 {v0.h}[2] , [x2]
|
||||
ADD x2, x2, #2
|
||||
rev64 v1.2s, v0.2s
|
||||
ld1 {v2.s}[0], [x0]
|
||||
ADD x0, x0, #4
|
||||
ADD x7, x0, #252
|
||||
ld1 {v2.s}[1], [x7]
|
||||
ld1 {v3.s}[0], [x4]
|
||||
ADD x7, x4, #256
|
||||
ld1 {v3.s}[1], [x7]
|
||||
SUB x4, x4, #4
|
||||
|
||||
sMULL v4.2d, v0.2s, v2.2s //qsub 2nd
|
||||
sshr v4.2d, v4.2d, #16
|
||||
sMULL v6.2d, v0.2s, v3.2s //add 2nd
|
||||
sshr v6.2d, v6.2d, #16
|
||||
sMULL v8.2d, v1.2s, v2.2s //add 1st
|
||||
sshr v8.2d, v8.2d, #16
|
||||
sMULL v10.2d, v1.2s, v3.2s //qsub 1st
|
||||
sshr v10.2d, v10.2d, #16
|
||||
|
||||
|
||||
ADD v0.4s, v10.4s , v4.4s
|
||||
SQSUB v2.4s, v8.4s , v6.4s
|
||||
|
||||
//shrn v0.2s, v0.2d,#32
|
||||
//shrn v2.2s, v2.2d,#32
|
||||
mov v3.16b, v0.16b
|
||||
mov v1.16b, v2.16b
|
||||
ST2 {v0.s, v1.s}[0], [x5]
|
||||
ADD x7, x5, #256
|
||||
SUBS x6, x6, #1
|
||||
ST2 {v2.s, v3.s}[2], [x7]
|
||||
SUB x5, x5, #8
|
||||
LDR w19, =0
|
||||
DUP V0.8h, w19
|
||||
BGT LOOP1
|
||||
//VPOP {D8-D11}
|
||||
// LDMFD sp!, {x4-x12, x15}
|
||||
//ldp x19, x20,[sp],#16
|
||||
pop_v_regs
|
||||
ret
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
213
decoder/armv8/ixheaacd_cos_sin_mod_loop2.s
Normal file
213
decoder/armv8/ixheaacd_cos_sin_mod_loop2.s
Normal file
|
|
@ -0,0 +1,213 @@
|
|||
.macro push_v_regs
|
||||
stp q8, q9, [sp, #-32]!
|
||||
stp q10, q11, [sp, #-32]!
|
||||
stp q12, q13, [sp, #-32]!
|
||||
stp q14, q15, [sp, #-32]!
|
||||
stp X8, X9, [sp, #-16]!
|
||||
stp X10, X11, [sp, #-16]!
|
||||
stp X12, X13, [sp, #-16]!
|
||||
stp X14, X15, [sp, #-16]!
|
||||
stp X16, X17, [sp, #-16]!
|
||||
stp X18, X19, [sp, #-16]!
|
||||
stp X20, X21, [sp, #-16]!
|
||||
stp X29, X30, [sp, #-16]!
|
||||
.endm
|
||||
|
||||
.macro pop_v_regs
|
||||
ldp X29, X30, [sp], #16
|
||||
ldp X20, X21, [sp], #16
|
||||
ldp X18, X19, [sp], #16
|
||||
ldp X16, X17, [sp], #16
|
||||
ldp X14, X15, [sp], #16
|
||||
ldp X12, X13, [sp], #16
|
||||
ldp X10, X11, [sp], #16
|
||||
ldp X8, X9, [sp], #16
|
||||
ldp q14, q15, [sp], #32
|
||||
ldp q12, q13, [sp], #32
|
||||
ldp q10, q11, [sp], #32
|
||||
ldp q8, q9, [sp], #32
|
||||
.endm
|
||||
|
||||
.text
|
||||
.p2align 2
|
||||
|
||||
.global ixheaacd_cos_sin_mod_loop2
|
||||
ixheaacd_cos_sin_mod_loop2:
|
||||
|
||||
// STMFD sp!, {x4-x12, x14}
|
||||
push_v_regs
|
||||
//stp x19, x20,[sp,#-16]!
|
||||
//VPUSH {D8-D15}
|
||||
//generating load addresses
|
||||
ADD x3, x0, x2, LSL #3 //psubband1 = &subband[2 * M - 1];
|
||||
SUB x3, x3, #4
|
||||
ADD x10, x0, #256
|
||||
ADD x11, x10, x2, LSL #3
|
||||
SUB x11, x11, #4
|
||||
MOV x8, #-4
|
||||
LDR w19, =0
|
||||
DUP V0.4s, w19
|
||||
DUP V1.4s, w19
|
||||
|
||||
LDR w6, [x0]
|
||||
sxtw x6, w6
|
||||
ASR x4, x2, #1 //M_2 = ixheaacd_shx32(M, 1);
|
||||
SUB x4, x4, #1
|
||||
|
||||
ASR x6, x6, #1 //*psubband = *psubband >> 1;
|
||||
LD1 {v2.s}[0], [x3]
|
||||
|
||||
STR w6, [x0], #4 //psubband++;
|
||||
sxtw x6, w6
|
||||
LDR w7, [x0]
|
||||
sxtw x7, w7
|
||||
ASR x7, x7, #1
|
||||
sub x20, x7, #0
|
||||
neg x6, x20
|
||||
STR w6, [x3], #-4
|
||||
sxtw x6, w6
|
||||
LD1 {v3.s}[0], [x3] // im = *psubband1;
|
||||
|
||||
LD2 {v0.h, v1.h}[0], [x1], #4
|
||||
sxtl v0.4s, v0.4h
|
||||
sxtl v1.4s, v1.4h
|
||||
dup v0.2s, v0.s[0]
|
||||
dup v1.2s, v1.s[0]
|
||||
|
||||
LD1 {v2.s}[1], [x11] //re = *psubband12;
|
||||
|
||||
// LDR w6, [x10]
|
||||
// sxtw x6,w6
|
||||
// ASR x7, x6, #1
|
||||
// MOV x9, #0
|
||||
// QSUB x7, x9, x7
|
||||
LD1 {v4.s}[0], [x10]
|
||||
SSHR v4.2s, v4.2s, #1
|
||||
MOV x9, #0
|
||||
DUP v6.2s, w9
|
||||
SQSUB v4.2s, v6.2s, v4.2s
|
||||
|
||||
ST1 {v4.s}[0], [x11]
|
||||
// str X7, [X11]
|
||||
SUB x11, x11, #4
|
||||
// sxtw x7,w7
|
||||
|
||||
LDR w6, [x10, #4]
|
||||
sxtw x6, w6
|
||||
ASR x6, x6, #1
|
||||
STR w6, [x10], #4
|
||||
sxtw x6, w6
|
||||
|
||||
LD1 {v3.s}[1], [x11]
|
||||
|
||||
sMULL v4.2d, v0.2s, v2.2s //qsub 2nd
|
||||
sshr v4.2d, v4.2d, #16
|
||||
sMULL v6.2d, v0.2s, v3.2s //add 2nd
|
||||
sshr v6.2d, v6.2d, #16
|
||||
sMULL v8.2d, v1.2s, v2.2s //add 1st
|
||||
sshr v8.2d, v8.2d, #16
|
||||
sMULL v10.2d, v1.2s, v3.2s //qsub 1st
|
||||
sshr v10.2d, v10.2d, #16
|
||||
|
||||
add v12.2d, v8.2d , v6.2d
|
||||
SQSUB v14.2d, v10.2d , v4.2d
|
||||
SQSUB v16.2d, v4.2d , v10.2d
|
||||
|
||||
//shrn v12.2s, v12.2d,#32
|
||||
//shrn v14.2s, v14.2d,#32
|
||||
//shrn v16.2s, v16.2d,#32
|
||||
|
||||
ST1 {v12.s}[0], [x3], x8
|
||||
|
||||
ST1 {v14.s}[0], [x0], #4
|
||||
|
||||
SQNEG v12.4s, v12.4s
|
||||
|
||||
|
||||
ST1 {v12.s}[2], [x10], #4
|
||||
|
||||
ST1 {v16.s}[2], [x11], x8
|
||||
|
||||
LOOP1:
|
||||
LD1 {v2.2s}, [x0]
|
||||
LD1 {v3.2s}, [x10]
|
||||
LDR w5, [x3] //RE2
|
||||
sxtw x5, w5
|
||||
LDR w6, [x11] //RE3
|
||||
sxtw x6, w6
|
||||
//VTRN.32 D2, D3
|
||||
TRN1 v4.2s, v2.2s, v3.2s
|
||||
TRN2 v3.2s, v2.2s, v3.2s
|
||||
MOV v2.8b, v4.8b
|
||||
|
||||
sMULL v4.2d, v0.2s, v2.2s //qsub 2nd
|
||||
sshr v4.2d, v4.2d, #16
|
||||
sMULL v6.2d, v0.2s, v3.2s //add 2nd
|
||||
sshr v6.2d, v6.2d, #16
|
||||
sMULL v8.2d, v1.2s, v2.2s //add 1st
|
||||
sshr v8.2d, v8.2d, #16
|
||||
sMULL v10.2d, v1.2s, v3.2s //qsub 1st
|
||||
sshr v10.2d, v10.2d, #16
|
||||
|
||||
add v12.2d, v8.2d , v6.2d
|
||||
SQSUB v14.2d, v4.2d , v10.2d
|
||||
SQSUB v16.2d, v10.2d , v4.2d
|
||||
|
||||
//shrn v12.2s, v12.2d,#32
|
||||
//shrn v14.2s, v14.2d,#32
|
||||
//shrn v16.2s, v16.2d,#32
|
||||
|
||||
ST1 {v12.s}[0], [x0], #4
|
||||
ST1 {v14.s}[0], [x3], x8
|
||||
SQNEG v12.4s, v12.4s
|
||||
|
||||
ST1 {v12.s}[2], [x11], x8
|
||||
ST1 {v16.s}[2], [x10], #4
|
||||
|
||||
LDR w19, =0
|
||||
DUP V0.4s, w19
|
||||
DUP V1.4s, w19
|
||||
// second part
|
||||
LD2 {v0.h, v1.h}[0], [x1], #4
|
||||
sxtl v0.4s, v0.4h
|
||||
sxtl v1.4s, v1.4h
|
||||
dup v0.2s, v0.s[0]
|
||||
dup v1.2s, v1.s[0]
|
||||
|
||||
mov v3.s[0], w5
|
||||
mov v3.s[1], w6
|
||||
LD1 {v2.s}[0], [x3]
|
||||
LD1 {v2.s}[1], [x11]
|
||||
|
||||
sMULL v4.2d, v0.2s, v2.2s //qsub 2nd
|
||||
sshr v4.2d, v4.2d, #16
|
||||
sMULL v6.2d, v0.2s, v3.2s //add 2nd
|
||||
sshr v6.2d, v6.2d, #16
|
||||
sMULL v8.2d, v1.2s, v2.2s //add 1st
|
||||
sshr v8.2d, v8.2d, #16
|
||||
sMULL v10.2d, v1.2s, v3.2s //qsub 1st
|
||||
sshr v10.2d, v10.2d, #16
|
||||
|
||||
add v12.2d, v4.2d , v10.2d
|
||||
SQSUB v14.2d, v8.2d , v6.2d
|
||||
SQSUB v16.2d, v6.2d , v8.2d
|
||||
|
||||
//shrn v12.2s, v12.2d,#32
|
||||
//shrn v14.2s, v14.2d,#32
|
||||
//shrn v16.2s, v16.2d,#32
|
||||
|
||||
ST1 {v12.s}[0], [x3], x8
|
||||
ST1 {v14.s}[0], [x0], #4
|
||||
|
||||
SQNEG v12.4s, v12.4s
|
||||
|
||||
subs x4, x4, #1
|
||||
ST1 {v12.s}[2], [x10], #4
|
||||
ST1 {v16.s}[2], [x11], x8
|
||||
|
||||
BGT LOOP1
|
||||
//VPOP {D8-D15}
|
||||
// LDMFD sp!, {x4-x12, x15}
|
||||
//ldp x19, x20,[sp],#16
|
||||
pop_v_regs
|
||||
ret
|
||||
555
decoder/armv8/ixheaacd_fft32x32_ld2_armv8.s
Normal file
555
decoder/armv8/ixheaacd_fft32x32_ld2_armv8.s
Normal file
|
|
@ -0,0 +1,555 @@
|
|||
.macro push_v_regs
|
||||
stp X8, X9, [sp, #-16]!
|
||||
stp X10, X11, [sp, #-16]!
|
||||
stp X12, X13, [sp, #-16]!
|
||||
stp X14, X15, [sp, #-16]!
|
||||
stp X16, X17, [sp, #-16]!
|
||||
stp X18, X19, [sp, #-16]!
|
||||
stp X20, X21, [sp, #-16]!
|
||||
stp X22, X24, [sp, #-16]!
|
||||
stp X29, X30, [sp, #-16]!
|
||||
.endm
|
||||
|
||||
.macro pop_v_regs
|
||||
ldp X29, X30, [sp], #16
|
||||
ldp X22, X24, [sp], #16
|
||||
ldp X20, X21, [sp], #16
|
||||
ldp X18, X19, [sp], #16
|
||||
ldp X16, X17, [sp], #16
|
||||
ldp X14, X15, [sp], #16
|
||||
ldp X12, X13, [sp], #16
|
||||
ldp X10, X11, [sp], #16
|
||||
ldp X8, X9, [sp], #16
|
||||
.endm
|
||||
|
||||
|
||||
.text
|
||||
.p2align 2
|
||||
.global ixheaacd_fft32x32_ld2_armv8
|
||||
|
||||
ixheaacd_fft32x32_ld2_armv8:
|
||||
|
||||
// STMFD sp!, {x4-x12,x14}
|
||||
push_v_regs
|
||||
stp x19, x20, [sp, #-16]!
|
||||
|
||||
//DIT Radix-4 FFT First Stage
|
||||
//First Butterfly
|
||||
MOV x0, x2
|
||||
MOV x1, x3
|
||||
LDR w2, [x0] //x_0 = x[0 ]
|
||||
sxtw x2, w2
|
||||
LDR w3, [x0, #32] //x_2 = x[8 ]
|
||||
sxtw x3, w3
|
||||
LDR w4, [x0, #64] //x_4 = x[16]
|
||||
sxtw x4, w4
|
||||
LDR w5, [x0, #96] //x_6 = x[24]
|
||||
sxtw x5, w5
|
||||
ADD w6, w2, w4 //xh0_0 = x_0 + x_4
|
||||
SUB w7, w2, w4 //xl0_0 = x_0 - x_4
|
||||
ADD w8, w3, w5 //xh0_1 = x_2 + x_6
|
||||
SUB w9, w3, w5 //xl0_1 = x_2 - x_6
|
||||
|
||||
LDR w2, [x0, #4] //x_1 = x[0 +1]
|
||||
sxtw x2, w2
|
||||
LDR w3, [x0, #36] //x_3 = x[8 +1]
|
||||
sxtw x3, w3
|
||||
LDR w4, [x0, #68] //x_5 = x[16+1]
|
||||
sxtw x4, w4
|
||||
LDR w5, [x0, #100] //x_7 = x[24+1]
|
||||
sxtw x5, w5
|
||||
ADD w10, w2, w4 //xh1_0 = x_1 + x_5
|
||||
SUB w11, w2, w4 //xl1_0 = x_1 - x_5
|
||||
ADD w12, w3, w5 //xh1_1 = x_3 + x_7
|
||||
SUB w14, w3, w5 //xl1_1 = x_3 - x_7
|
||||
|
||||
ADD w2, w6, w8 //n00 = xh0_0 + xh0_1
|
||||
ADD w3, w7, w14 //n10 = xl0_0 + xl1_1
|
||||
SUB w4, w6, w8 //n20 = xh0_0 - xh0_1
|
||||
SUB w5, w7, w14 //n30 = xl0_0 - xl1_1
|
||||
STR w2, [x0] //x[0 ] = n00
|
||||
STR w3, [x0, #32] //x[8 ] = n10
|
||||
STR w4, [x0, #64] //x[16] = n20
|
||||
STR w5, [x0, #96] //x[24] = n30
|
||||
|
||||
ADD w2, w10, w12 //n01 = xh1_0 + xh1_1
|
||||
SUB w3, w11, w9 //n11 = xl1_0 - xl0_1
|
||||
SUB w4, w10, w12 //n21 = xh1_0 - xh1_1
|
||||
ADD w5, w11, w9 //n31 = xl1_0 + xl0_1
|
||||
STR w2, [x0, #4] //x[1 ] = n01
|
||||
STR w3, [x0, #36] //x[8+1 ] = n11
|
||||
STR w4, [x0, #68] //x[16+1] = n21
|
||||
STR w5, [x0, #100] //x[24+1] = n31
|
||||
|
||||
//Second Butterfly
|
||||
LDR w2, [x0, #8] //x_0 = x[2 ]
|
||||
sxtw x2, w2
|
||||
LDR w3, [x0, #40] //x_2 = x[10]
|
||||
sxtw x3, w3
|
||||
LDR w4, [x0, #72] //x_4 = x[18]
|
||||
sxtw x4, w4
|
||||
LDR w5, [x0, #104] //x_6 = x[26]
|
||||
sxtw x5, w5
|
||||
ADD w6, w2, w4 //xh0_0 = x_0 + x_4
|
||||
SUB w7, w2, w4 //xl0_0 = x_0 - x_4
|
||||
ADD w8, w3, w5 //xh0_1 = x_2 + x_6
|
||||
SUB w9, w3, w5 //xl0_1 = x_2 - x_6
|
||||
|
||||
LDR w2, [x0, #12] //x_1 = x[2 +1]
|
||||
sxtw x2, w2
|
||||
LDR w3, [x0, #44] //x_3 = x[10+1]
|
||||
sxtw x3, w3
|
||||
LDR w4, [x0, #76] //x_5 = x[18+1]
|
||||
sxtw x4, w4
|
||||
LDR w5, [x0, #108] //x_7 = x[26+1]
|
||||
sxtw x5, w5
|
||||
ADD w10, w2, w4 //xh1_0 = x_1 + x_5
|
||||
SUB w11, w2, w4 //xl1_0 = x_1 - x_5
|
||||
ADD w12, w3, w5 //xh1_1 = x_3 + x_7
|
||||
SUB w14, w3, w5 //xl1_1 = x_3 - x_7
|
||||
|
||||
ADD w2, w6, w8 //n00 = xh0_0 + xh0_1
|
||||
ADD w3, w7, w14 //n10 = xl0_0 + xl1_1
|
||||
SUB w4, w6, w8 //n20 = xh0_0 - xh0_1
|
||||
SUB w5, w7, w14 //n30 = xl0_0 - xl1_1
|
||||
STR w2, [x0, #8] //x[2 ] = n00
|
||||
STR w3, [x0, #40] //x[10] = n10
|
||||
STR w4, [x0, #72] //x[18] = n20
|
||||
STR w5, [x0, #104] //x[26] = n30
|
||||
|
||||
ADD w2, w10, w12 //n01 = xh1_0 + xh1_1
|
||||
SUB w3, w11, w9 //n11 = xl1_0 - xl0_1
|
||||
SUB w4, w10, w12 //n21 = xh1_0 - xh1_1
|
||||
ADD w5, w11, w9 //n31 = xl1_0 + xl0_1
|
||||
STR w2, [x0, #12] //x[2 +1] = n01
|
||||
STR w3, [x0, #44] //x[10+1] = n11
|
||||
STR w4, [x0, #76] //x[18+1] = n21
|
||||
STR w5, [x0, #108] //x[26+1] = n31
|
||||
|
||||
//Third Butterfly
|
||||
LDR w2, [x0, #16] //x_0 = x[4 ]
|
||||
sxtw x2, w2
|
||||
LDR w3, [x0, #48] //x_2 = x[12]
|
||||
sxtw x3, w3
|
||||
LDR w4, [x0, #80] //x_4 = x[20]
|
||||
sxtw x4, w4
|
||||
LDR w5, [x0, #112] //x_6 = x[28]
|
||||
sxtw x5, w5
|
||||
ADD w6, w2, w4 //xh0_0 = x_0 + x_4
|
||||
SUB w7, w2, w4 //xl0_0 = x_0 - x_4
|
||||
ADD w8, w3, w5 //xh0_1 = x_2 + x_6
|
||||
SUB w9, w3, w5 //xl0_1 = x_2 - x_6
|
||||
|
||||
LDR w2, [x0, #20] //x_1 = x[4 +1]
|
||||
sxtw x2, w2
|
||||
LDR w3, [x0, #52] //x_3 = x[12+1]
|
||||
sxtw x3, w3
|
||||
LDR w4, [x0, #84] //x_5 = x[20+1]
|
||||
sxtw x4, w4
|
||||
LDR w5, [x0, #116] //x_7 = x[28+1]
|
||||
sxtw x5, w5
|
||||
ADD w10, w2, w4 //xh1_0 = x_1 + x_5
|
||||
SUB w11, w2, w4 //xl1_0 = x_1 - x_5
|
||||
ADD w12, w3, w5 //xh1_1 = x_3 + x_7
|
||||
SUB w14, w3, w5 //xl1_1 = x_3 - x_7
|
||||
|
||||
ADD w2, w6, w8 //n00 = xh0_0 + xh0_1
|
||||
ADD w3, w7, w14 //n10 = xl0_0 + xl1_1
|
||||
SUB w4, w6, w8 //n20 = xh0_0 - xh0_1
|
||||
SUB w5, w7, w14 //n30 = xl0_0 - xl1_1
|
||||
STR w2, [x0, #16] //x[4 ] = n00
|
||||
STR w3, [x0, #48] //x[12] = n10
|
||||
STR w4, [x0, #80] //x[20] = n20
|
||||
STR w5, [x0, #112] //x[28] = n30
|
||||
|
||||
ADD w2, w10, w12 //n01 = xh1_0 + xh1_1
|
||||
SUB w3, w11, w9 //n11 = xl1_0 - xl0_1
|
||||
SUB w4, w10, w12 //n21 = xh1_0 - xh1_1
|
||||
ADD w5, w11, w9 //n31 = xl1_0 + xl0_1
|
||||
STR w2, [x0, #20] //x[4 +1] = n01
|
||||
STR w3, [x0, #52] //x[12+1] = n11
|
||||
STR w4, [x0, #84] //x[20+1] = n21
|
||||
STR w5, [x0, #116] //x[28+1] = n31
|
||||
|
||||
//Fourth Butterfly
|
||||
LDR w2, [x0, #24] //x_0 = x[6 ]
|
||||
sxtw x2, w2
|
||||
LDR w3, [x0, #56] //x_2 = x[14]
|
||||
sxtw x3, w3
|
||||
LDR w4, [x0, #88] //x_4 = x[22]
|
||||
sxtw x4, w4
|
||||
LDR w5, [x0, #120] //x_6 = x[30]
|
||||
sxtw x5, w5
|
||||
ADD w6, w2, w4 //xh0_0 = x_0 + x_4
|
||||
SUB w7, w2, w4 //xl0_0 = x_0 - x_4
|
||||
ADD w8, w3, w5 //xh0_1 = x_2 + x_6
|
||||
SUB w9, w3, w5 //xl0_1 = x_2 - x_6
|
||||
|
||||
LDR w2, [x0, #28] //x_1 = x[6 +1]
|
||||
sxtw x2, w2
|
||||
LDR w3, [x0, #60] //x_3 = x[14+1]
|
||||
sxtw x3, w3
|
||||
LDR w4, [x0, #92] //x_5 = x[22+1]
|
||||
sxtw x4, w4
|
||||
LDR w5, [x0, #124] //x_7 = x[30+1]
|
||||
sxtw x5, w5
|
||||
ADD w10, w2, w4 //xh1_0 = x_1 + x_5
|
||||
SUB w11, w2, w4 //xl1_0 = x_1 - x_5
|
||||
ADD w12, w3, w5 //xh1_1 = x_3 + x_7
|
||||
SUB w14, w3, w5 //xl1_1 = x_3 - x_7
|
||||
|
||||
ADD w2, w6, w8 //n00 = xh0_0 + xh0_1
|
||||
ADD w3, w7, w14 //n10 = xl0_0 + xl1_1
|
||||
SUB w4, w6, w8 //n20 = xh0_0 - xh0_1
|
||||
SUB w5, w7, w14 //n30 = xl0_0 - xl1_1
|
||||
STR w2, [x0, #24] //x[6 ] = n00
|
||||
STR w3, [x0, #56] //x[14] = n10
|
||||
STR w4, [x0, #88] //x[22] = n20
|
||||
STR w5, [x0, #120] //x[30] = n30
|
||||
|
||||
ADD w2, w10, w12 //n01 = xh1_0 + xh1_1
|
||||
SUB w3, w11, w9 //n11 = xl1_0 - xl0_1
|
||||
SUB w4, w10, w12 //n21 = xh1_0 - xh1_1
|
||||
ADD w5, w11, w9 //n31 = xl1_0 + xl0_1
|
||||
STR w2, [x0, #28] //x[6 +1] = n01
|
||||
STR w3, [x0, #60] //x[14+1] = n11
|
||||
STR w4, [x0, #92] //x[22+1] = n21
|
||||
STR w5, [x0, #124] //x[30+1] = n31
|
||||
|
||||
|
||||
//DIT Radix-4 FFT Second Stage
|
||||
//First Butterfly
|
||||
LDR w2, [x0] //inp_0qr = x[0]
|
||||
sxtw x2, w2
|
||||
LDR w3, [x0, #8] //inp_1qr = x[2]
|
||||
sxtw x3, w3
|
||||
LDR w4, [x0, #16] //inp_2qr = x[4]
|
||||
sxtw x4, w4
|
||||
LDR w5, [x0, #24] //inp_3qr = x[6]
|
||||
sxtw x5, w5
|
||||
ADD w6, w2, w4 //sum_0qr = mul_0qr + mul_2qr
|
||||
SUB w7, w2, w4 //sum_1qr = mul_0qr - mul_2qr
|
||||
ADD w8, w3, w5 //sum_2qr = mul_1qr + mul_3qr
|
||||
SUB w9, w3, w5 //sum_3qr = mul_1qr - mul_3qr
|
||||
|
||||
LDR w2, [x0, #4] //inp_0qi = x[1]
|
||||
sxtw x2, w2
|
||||
LDR w3, [x0, #12] //inp_1qi = x[3]
|
||||
sxtw x3, w3
|
||||
LDR w4, [x0, #20] //inp_2qi = x[5]
|
||||
sxtw x4, w4
|
||||
LDR w5, [x0, #28] //inp_3qi = x[7]
|
||||
sxtw x5, w5
|
||||
ADD w10, w2, w4 //sum_0qi = mul_0qi + mul_2qi
|
||||
SUB w11, w2, w4 //sum_1qi = mul_0qi - mul_2qi
|
||||
ADD w12, w3, w5 //sum_2qi = mul_1qi + mul_3qi
|
||||
SUB w14, w3, w5 //sum_3qi = mul_1qi - mul_3qi
|
||||
|
||||
ADD w2, w6, w8 //sum_0qr + sum_2qr
|
||||
ADD w3, w7, w14 //sum_1qr + sum_3qi
|
||||
SUB w4, w6, w8 //sum_0qr - sum_2qr
|
||||
SUB w5, w7, w14 //sum_1qr - sum_3qi
|
||||
STR w2, [x1] //y[0 ] = sum_0qr + sum_2qr
|
||||
STR w3, [x1, #32] //y[8 ] = sum_1qr + sum_3qi
|
||||
STR w4, [x1, #64] //y[16] = sum_0qr - sum_2qr
|
||||
STR w5, [x1, #96] //y[24] = sum_1qr - sum_3qi
|
||||
|
||||
ADD w2, w10, w12 //sum_0qi + sum_2qi
|
||||
SUB w3, w11, w9 //sum_1qi - sum_3qr
|
||||
SUB w4, w10, w12 //sum_0qi - sum_2qi
|
||||
ADD w5, w11, w9 //sum_1qi + sum_3qr
|
||||
STR w2, [x1, #4] //y[0 +1] = sum_0qi + sum_2qi
|
||||
STR w3, [x1, #36] //y[8 +1] = sum_1qi - sum_3qr
|
||||
STR w4, [x1, #68] //y[16+1] = sum_0qi - sum_2qi
|
||||
STR w5, [x1, #100] //y[24+1] = sum_1qi + sum_3qr
|
||||
|
||||
|
||||
//Load twiddle factors
|
||||
// LDR w11, =2310960706 //0x89BE7642
|
||||
LDR w11, =0x7642
|
||||
sxth w11, w11
|
||||
LDR w21, =0x89BE
|
||||
sxth w21, w21
|
||||
// LDR w12, =3473158396 //0xCF0430FC
|
||||
LDR w12, =0x30FC
|
||||
sxth w12, w12
|
||||
LDR w22, =0xCF04
|
||||
sxth w22, w22
|
||||
// LDR w14, =2776455811 //0xA57D5A83
|
||||
LDR w14, =0x5A83
|
||||
sxth w14, w14
|
||||
LDR w24, =0xA57D
|
||||
sxth w24, w24
|
||||
|
||||
//Second Butterfly
|
||||
LDR w2, [x0, #32] //mul_0qr = inp_0qr = x[8]
|
||||
sxtw x2, w2
|
||||
LDR w3, [x0, #36] //mul_0qi = inp_1qr = x[9]
|
||||
sxtw x3, w3
|
||||
|
||||
LDR w5, [x0, #40] //inp_1qr = x[10]
|
||||
sxtw x5, w5
|
||||
LDR w6, [x0, #44] //inp_1qi = x[11]
|
||||
sxtw x6, w6
|
||||
|
||||
SMULL x4, w5, w11
|
||||
ASR x4, x4, #16
|
||||
// SMULWB x4, x5, x11 //mul_1qr = mpy_16_32_ns( 0x7642 , inp_1qr)
|
||||
|
||||
SMULL x20, w6, w12
|
||||
ASR x20, x20, #16
|
||||
ADD w4, w4, w20
|
||||
// SMLAWB x4, x6, x12, x4 //mul_1qr -= mpy_16_32_ns(-0x30FC , inp_1qi)
|
||||
|
||||
SMULL x5, w5, w22
|
||||
ASR x5, x5, #16
|
||||
// SMULWT x5, x5, x12 //mul_1qi = mpy_16_32_ns(-0x30FC , inp_1qr)
|
||||
|
||||
LDR w7, [x0, #48] //inp_2qr = x[12]
|
||||
sxtw x7, w7
|
||||
LDR w8, [x0, #52] //inp_2qi = x[13]
|
||||
sxtw x8, w8
|
||||
|
||||
//Moved for delay slot
|
||||
SMULL x20, w6, w11
|
||||
ASR x20, x20, #16
|
||||
ADD w5, w5, w20
|
||||
// SMLAWB x5, x6, x11, x5 //mul_1qi += mpy_16_32_ns( 0x7642 , inp_1qi)
|
||||
|
||||
ADD w6, w7, w8 //(inp_2qr + inp_2qi)
|
||||
|
||||
SMULL x6, w6, w14
|
||||
ASR x6, x6, #16
|
||||
// SMULWB x6, x6, x14 //mul_2qr = mpy_16_32_ns(0x5A83 , (inp_2qr + inp_2qi))
|
||||
|
||||
SUB w7, w8, w7 //(-inp_2qr + inp_2qi)
|
||||
|
||||
SMULL x7, w7, w14
|
||||
ASR x7, x7, #16
|
||||
// SMULWB x7, x7, x14 //mul_2qi = mpy_16_32_ns(0x5A83 , (-inp_2qr + inp_2qi))
|
||||
|
||||
LDR x9 , [x0, #56] //inp_3qr = x[14]
|
||||
sxtw x9, w9
|
||||
LDR w10, [x0, #60] //inp_3qi = x[15]
|
||||
sxtw x10, w10
|
||||
|
||||
SMULL x8, w9, w12
|
||||
ASR x8, x8, #16
|
||||
// SMULWB x8, x9 , x12 //mul_3qr = mpy_16_32_ns( 0x30FC , inp_3qr)
|
||||
|
||||
SMULL x20, w10, w11
|
||||
ASR x20, x20, #16
|
||||
ADD w8, w8, w20
|
||||
// SMLAWB x8, x10, x11, x8 //mul_3qr -= mpy_16_32_ns(-0x7642 , inp_3qi)//
|
||||
|
||||
SMULL x9, w9 , w21
|
||||
ASR x9, x9, #16
|
||||
// SMULWT x9, x9 , x11 //mul_3qi = mpy_16_32_ns(-0x7642 , inp_3qr)
|
||||
|
||||
SMULL x20, w10, w12
|
||||
ASR x20, x20, #16
|
||||
ADD w9, w9, w20
|
||||
// SMLAWB x9, x10, x12, x9 //mul_3qi += mpy_16_32_ns( 0x30FC , inp_3qi)
|
||||
|
||||
ADD w10, w2, w6, lsl #1 //sum_0qr = mul_0qr + (mul_2qr << 1)
|
||||
SUB w2 , w2, w6, lsl #1 //sum_1qr = mul_0qr - (mul_2qr << 1)
|
||||
ADD w6 , w4, w8 //sum_2qr = mul_1qr + mul_3qr
|
||||
SUB w4 , w4, w8 //sum_3qr = mul_1qr - mul_3qr
|
||||
|
||||
ADD w8 , w3, w7, lsl #1 //sum_0qi = mul_0qi + (mul_2qi << 1)
|
||||
SUB w3 , w3, w7, lsl #1 //sum_1qi = mul_0qi - (mul_2qi << 1)
|
||||
ADD w7 , w5, w9 //sum_2qi = mul_1qi + mul_3qi
|
||||
SUB w5 , w5, w9 //sum_3qi = mul_1qi - mul_3qi
|
||||
|
||||
ADD w9 , w10, w6, lsl #1 //sum_0qr + (sum_2qr << 1)
|
||||
SUB w10, w10, w6, lsl #1 //sum_0qr - (sum_2qr << 1)
|
||||
ADD w6 , w2 , w5, lsl #1 //sum_1qr + (sum_3qi << 1)
|
||||
SUB w2 , w2 , w5, lsl #1 //sum_1qr - (sum_3qi << 1)
|
||||
STR w9 , [x1, #8] //y[2 ] = sum_0qr + (sum_2qr << 1)
|
||||
STR w10, [x1, #72] //y[18] = sum_0qr - (sum_2qr << 1)
|
||||
STR w6 , [x1, #40] //y[10] = sum_1qr + (sum_3qi << 1)
|
||||
STR w2 , [x1, #104] //y[26] = sum_1qr - (sum_3qi << 1)
|
||||
|
||||
ADD w5 , w8 , w7, lsl #1 //sum_0qi + (sum_2qi << 1)
|
||||
SUB w8 , w8 , w7, lsl #1 //sum_0qi - (sum_2qi << 1)
|
||||
SUB w7 , w3 , w4, lsl #1 //sum_1qi - (sum_3qr << 1)
|
||||
ADD w3 , w3 , w4, lsl #1 //sum_1qi + (sum_3qr << 1)
|
||||
STR w5 , [x1, #12] //y[2 +1] = sum_0qi + (sum_2qi << 1)
|
||||
STR w8 , [x1, #76] //y[18+1] = sum_0qi - (sum_2qi << 1)
|
||||
STR w7 , [x1, #44] //y[10+1] = sum_1qi - (sum_3qr << 1)
|
||||
STR w3 , [x1, #108] //y[26+1] = sum_1qi + (sum_3qr << 1)
|
||||
|
||||
//Third Butterfly
|
||||
LDR w2, [x0, #64] //mul_0qr = inp_0qr = x[16]
|
||||
sxtw x2, w2
|
||||
LDR w5, [x0, #72] //inp_1qr = x[18]
|
||||
sxtw x5, w5
|
||||
LDR w6, [x0, #76] //inp_1qi = x[19]
|
||||
sxtw x6, w6
|
||||
//Moved for delay slot
|
||||
LDR w3, [x0, #68] //mul_0qi = inp_1qr = x[17]
|
||||
sxtw x3, w3
|
||||
|
||||
ADD w4, w5, w6 //(inp_1qr + inp_1qi)
|
||||
|
||||
SMULL x4, w4, w14
|
||||
ASR x4, x4, #16
|
||||
// SMULWB x4, x4, x14 //mul_1qr = mpy_16_32_ns(0x5A83 , (inp_1qr + inp_1qi))
|
||||
SUB w5, w6, w5 //(-inp_1qr + inp_1qi)
|
||||
|
||||
SMULL x5, w5, w14
|
||||
ASR x5, x5, #16
|
||||
// SMULWB x5, x5, x14 //mul_1qi = mpy_16_32_ns(0x5A83 , (-inp_1qr + inp_1qi))
|
||||
|
||||
LDR w6, [x0, #84] //mul_2qr = inp_2qi = x[21]
|
||||
sxtw x6, w6
|
||||
LDR x9 , [x0, #88] //inp_3qr = x[22]
|
||||
sxtw x9, w9
|
||||
LDR w10, [x0, #92] //inp_3qi = x[23]
|
||||
sxtw x10, w10
|
||||
//Moved for delay slot
|
||||
LDR w7, [x0, #80] //mul_2qi = inp_2qr = x[20]
|
||||
sxtw x7, w7
|
||||
|
||||
SUB w8 , w10, w9 //(-inp_3qr + inp_3qi)
|
||||
|
||||
SMULL x8, w8, w14
|
||||
ASR x8, x8, #16
|
||||
// SMULWB x8 , x8 , x14 //mul_3qr = mpy_16_32_ns( 0x5A83 , (-inp_3qr + inp_3qi))
|
||||
|
||||
ADD w9 , w9 , w10 //(inp_3qr + inp_3qi)
|
||||
|
||||
SMULL x9, w9, w24
|
||||
ASR x9, x9, #16
|
||||
// SMULWT x9 , x9 , x14 //mul_3qi = mpy_16_32_ns(-0x5A83 , (inp_3qr + inp_3qi))
|
||||
|
||||
ADD w10, w2, w6 //sum_0qr = mul_0qr + mul_2qr
|
||||
SUB w2 , w2, w6 //sum_1qr = mul_0qr - mul_2qr
|
||||
ADD w6 , w4, w8 //sum_2qr = mul_1qr + mul_3qr
|
||||
SUB w4 , w4, w8 //sum_3qr = mul_1qr - mul_3qr
|
||||
|
||||
SUB w8 , w3, w7 //sum_0qi = mul_0qi - mul_2qi
|
||||
ADD w3 , w3, w7 //sum_1qi = mul_0qi + mul_2qi
|
||||
ADD w7 , w5, w9 //sum_2qi = mul_1qi + mul_3qi
|
||||
SUB w5 , w5, w9 //sum_3qi = mul_1qi - mul_3qi
|
||||
|
||||
ADD w9 , w10, w6, lsl #1 //sum_0qr + (sum_2qr << 1)
|
||||
SUB w10, w10, w6, lsl #1 //sum_0qr - (sum_2qr << 1)
|
||||
ADD w6 , w2 , w5, lsl #1 //sum_1qr + (sum_3qi << 1)
|
||||
SUB w2 , w2 , w5, lsl #1 //sum_1qr - (sum_3qi << 1)
|
||||
STR w9 , [x1, #16] //y[4 ] = sum_0qr + (sum_2qr << 1)
|
||||
STR w10, [x1, #80] //y[20] = sum_0qr - (sum_2qr << 1)
|
||||
STR w6 , [x1, #48] //y[12] = sum_1qr + (sum_3qi << 1)
|
||||
STR w2 , [x1, #112] //y[28] = sum_1qr - (sum_3qi << 1)
|
||||
|
||||
ADD w5, w8, w7, lsl #1 //sum_0qi + (sum_2qi << 1)
|
||||
SUB w8, w8, w7, lsl #1 //sum_0qi - (sum_2qi << 1)
|
||||
SUB w7, w3, w4, lsl #1 //sum_1qi - (sum_3qr << 1)
|
||||
ADD w3, w3, w4, lsl #1 //sum_1qi + (sum_3qr << 1)
|
||||
STR w5 , [x1, #20] //y[4 +1] = sum_0qi + (sum_2qi << 1)
|
||||
STR w8 , [x1, #84] //y[20+1] = sum_0qi - (sum_2qi << 1)
|
||||
STR w7 , [x1, #52] //y[12+1] = sum_1qi - (sum_3qr << 1)
|
||||
STR w3 , [x1, #116] //y[28+1] = sum_1qi + (sum_3qr << 1)
|
||||
|
||||
//Fourth Butterfly
|
||||
LDR w2, [x0, #96] //mul_0qr = inp_0qr = x[24]
|
||||
sxtw x2, w2
|
||||
LDR w3, [x0, #100] //mul_0qi = inp_1qr = x[25]
|
||||
sxtw x3, w3
|
||||
|
||||
LDR w5, [x0, #104] //inp_1qr = x[26]
|
||||
sxtw x5, w5
|
||||
LDR w6, [x0, #108] //inp_1qi = x[27]
|
||||
sxtw x6, w6
|
||||
|
||||
SMULL x4, w5, w12
|
||||
ASR x4, x4, #16
|
||||
// SMULWB x4, x5, x12 //mul_1qr = mpy_16_32_ns( 0x30FC , inp_1qr)
|
||||
|
||||
SMULL x20, w6, w11
|
||||
ASR x20, x20, #16
|
||||
ADD w4, w4, w20
|
||||
// SMLAWB x4, x6, x11, x4 //mul_1qr -= mpy_16_32_ns(-0x7642 , inp_1qi)
|
||||
|
||||
SMULL x5, w5, w21
|
||||
ASR x5, x5, #16
|
||||
// SMULWT x5, x5, x11 //mul_1qi = mpy_16_32_ns(-0x7642 , inp_1qr)
|
||||
|
||||
LDR w7, [x0, #112] //inp_2qr = x[28]
|
||||
sxtw x7, w7
|
||||
LDR w8, [x0, #116] //inp_2qi = x[29]
|
||||
sxtw x8, w8
|
||||
|
||||
//Moved for delay slot
|
||||
SMULL x20, w6, w12
|
||||
ASR x20, x20, #16
|
||||
ADD w5, w5, w20
|
||||
// SMLAWB x5, x6, x12, x5 //mul_1qi += mpy_16_32_ns( 0x30FC , inp_1qi)
|
||||
|
||||
SUB w6, w8, w7 //(-inp_2qr + inp_2qi)
|
||||
|
||||
SMULL x6, w6, w14
|
||||
ASR x6, x6, #16
|
||||
// SMULWB x6, x6, x14 //mul_2qr = mpy_16_32_ns( 0x5A83 , (-inp_2qr + inp_2qi))
|
||||
ADD w7, w8, w7 //(inp_2qr + inp_2qi)
|
||||
|
||||
SMULL x7, w7, w24
|
||||
ASR x7, x7, #16
|
||||
// SMULWT x7, x7, x14 //mul_2qi = mpy_16_32_ns(-0x5A83 , (inp_2qr + inp_2qi))
|
||||
|
||||
LDR w9 , [x0, #120] //inp_3qr = x[30]
|
||||
sxtw x9, w9
|
||||
LDR w10, [x0, #124] //inp_3qi = x[31]
|
||||
sxtw x10, w10
|
||||
|
||||
SMULL x8, w9, w21
|
||||
ASR x8, x8, #16
|
||||
// SMULWT x8, x9 , x11 //mul_3qr = mpy_16_32_ns(-0x7642 , inp_3qr)
|
||||
|
||||
SMULL x20, w10, w22
|
||||
ASR x20, x20, #16
|
||||
ADD w8, w8, w20
|
||||
// SMLAWT x8, x10, x12, x8 //mul_3qr -= mpy_16_32_ns( 0x30FC , inp_3qi)//
|
||||
|
||||
SMULL x9, w9, w12
|
||||
ASR x9, x9, #16
|
||||
// SMULWB x9, x9 , x12 //mul_3qi = mpy_16_32_ns( 0x30FC , inp_3qr)
|
||||
|
||||
SMULL x20, w10, w21
|
||||
ASR x20, x20, #16
|
||||
ADD w9, w9, w20
|
||||
// SMLAWT x9, x10, x11, x9 //mul_3qi += mpy_16_32_ns(-0x7642 , inp_3qi)
|
||||
|
||||
ADD w10, w2, w6, lsl #1 //sum_0qr = mul_0qr + (mul_2qr << 1)
|
||||
SUB w2 , w2, w6, lsl #1 //sum_1qr = mul_0qr - (mul_2qr << 1)
|
||||
ADD w6 , w4, w8 //sum_2qr = mul_1qr + mul_3qr
|
||||
SUB w4 , w4, w8 //sum_3qr = mul_1qr - mul_3qr
|
||||
|
||||
ADD w8 , w3, w7, lsl #1 //sum_0qi = mul_0qi + (mul_2qi << 1)
|
||||
SUB w3 , w3, w7, lsl #1 //sum_1qi = mul_0qi - (mul_2qi << 1)
|
||||
ADD w7 , w5, w9 //sum_2qi = mul_1qi + mul_3qi
|
||||
SUB w5 , w5, w9 //sum_3qi = mul_1qi - mul_3qi
|
||||
|
||||
ADD w9 , w10, w6, lsl #1 //sum_0qr + (sum_2qr << 1)
|
||||
SUB w10, w10, w6, lsl #1 //sum_0qr - (sum_2qr << 1)
|
||||
ADD w6 , w2 , w5, lsl #1 //sum_1qr + (sum_3qi << 1)
|
||||
SUB w2 , w2 , w5, lsl #1 //sum_1qr - (sum_3qi << 1)
|
||||
STR w9 , [x1, #24] //y[6 ] = sum_0qr + (sum_2qr << 1)
|
||||
STR w10, [x1, #88] //y[22] = sum_0qr - (sum_2qr << 1)
|
||||
STR w6 , [x1, #56] //y[14] = sum_1qr + (sum_3qi << 1)
|
||||
STR w2 , [x1, #120] //y[30] = sum_1qr - (sum_3qi << 1)
|
||||
|
||||
ADD w5 , w8 , w7, lsl #1 //sum_0qi + (sum_2qi << 1)
|
||||
SUB w8 , w8 , w7, lsl #1 //sum_0qi - (sum_2qi << 1)
|
||||
SUB w7 , w3 , w4, lsl #1 //sum_1qi - (sum_3qr << 1)
|
||||
ADD w3 , w3 , w4, lsl #1 //sum_1qi + (sum_3qr << 1)
|
||||
STR w5 , [x1, #28] //y[6 +1] = sum_0qi + (sum_2qi << 1)
|
||||
STR w8 , [x1, #92] //y[22+1] = sum_0qi - (sum_2qi << 1)
|
||||
STR w7 , [x1, #60] //y[14+1] = sum_1qi - (sum_3qr << 1)
|
||||
STR w3 , [x1, #124] //y[30+1] = sum_1qi + (sum_3qr << 1)
|
||||
|
||||
// LDMFD sp!, {x4-x12,x15}
|
||||
ldp x19, x20, [sp], #16
|
||||
pop_v_regs
|
||||
ret
|
||||
|
||||
248
decoder/armv8/ixheaacd_function_selector_armv8.c
Normal file
248
decoder/armv8/ixheaacd_function_selector_armv8.c
Normal file
|
|
@ -0,0 +1,248 @@
|
|||
/******************************************************************************
|
||||
* *
|
||||
* Copyright (C) 2018 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include "ixheaacd_sbr_common.h"
|
||||
#include <ixheaacd_type_def.h>
|
||||
|
||||
#include "ixheaacd_constants.h"
|
||||
#include <ixheaacd_basic_ops32.h>
|
||||
#include <ixheaacd_basic_ops16.h>
|
||||
#include <ixheaacd_basic_ops40.h>
|
||||
#include "ixheaacd_basic_ops.h"
|
||||
|
||||
#include <ixheaacd_basic_op.h>
|
||||
#include "ixheaacd_intrinsics.h"
|
||||
#include "ixheaacd_common_rom.h"
|
||||
#include "ixheaacd_sbrdecsettings.h"
|
||||
#include "ixheaacd_bitbuffer.h"
|
||||
#include "ixheaacd_defines.h"
|
||||
|
||||
#include "ixheaacd_pns.h"
|
||||
|
||||
#include <ixheaacd_aac_rom.h>
|
||||
#include "ixheaacd_aac_imdct.h"
|
||||
#include "ixheaacd_pulsedata.h"
|
||||
|
||||
#include "ixheaacd_drc_data_struct.h"
|
||||
|
||||
#include "ixheaacd_lt_predict.h"
|
||||
|
||||
#include "ixheaacd_channelinfo.h"
|
||||
#include "ixheaacd_drc_dec.h"
|
||||
|
||||
#include "ixheaacd_sbrdecoder.h"
|
||||
#include "ixheaacd_tns.h"
|
||||
#include "ixheaacd_sbr_scale.h"
|
||||
#include "ixheaacd_lpp_tran.h"
|
||||
#include "ixheaacd_env_extr_part.h"
|
||||
#include <ixheaacd_sbr_rom.h>
|
||||
#include "ixheaacd_block.h"
|
||||
#include "ixheaacd_hybrid.h"
|
||||
#include "ixheaacd_ps_dec.h"
|
||||
#include "ixheaacd_env_extr.h"
|
||||
#include "ixheaacd_basic_funcs.h"
|
||||
#include "ixheaacd_env_calc.h"
|
||||
#include "ixheaacd_dsp_fft32x32s.h"
|
||||
#include "ixheaacd_interface.h"
|
||||
|
||||
WORD32 (*ixheaacd_fix_div)(WORD32, WORD32) = &ixheaacd_fix_div_dec;
|
||||
|
||||
VOID(*ixheaacd_covariance_matrix_calc)
|
||||
(WORD32 *, ixheaacd_lpp_trans_cov_matrix *,
|
||||
WORD32) = &ixheaacd_covariance_matrix_calc_dec;
|
||||
|
||||
VOID(*ixheaacd_covariance_matrix_calc_2)
|
||||
(ixheaacd_lpp_trans_cov_matrix *, WORD32 *, WORD32,
|
||||
WORD16) = &ixheaacd_covariance_matrix_calc_2_dec;
|
||||
|
||||
VOID(*ixheaacd_over_lap_add1)
|
||||
(WORD32 *, WORD32 *, WORD16 *, const WORD16 *, WORD16, WORD16,
|
||||
WORD16) = &ixheaacd_over_lap_add1_armv8;
|
||||
|
||||
VOID(*ixheaacd_over_lap_add2)
|
||||
(WORD32 *, WORD32 *, WORD32 *, const WORD16 *, WORD16, WORD16,
|
||||
WORD16) = &ixheaacd_over_lap_add2_armv8;
|
||||
|
||||
VOID(*ixheaacd_decorr_filter2)
|
||||
(ia_ps_dec_struct *ptr_ps_dec, WORD32 *p_buf_left_real, WORD32 *p_buf_left_imag,
|
||||
WORD32 *p_buf_right_real, WORD32 *p_buf_right_imag,
|
||||
ia_ps_tables_struct *ps_tables_ptr,
|
||||
WORD16 *transient_ratio) = &ixheaacd_decorr_filter2_dec;
|
||||
|
||||
VOID(*ixheaacd_decorr_filter1)
|
||||
(ia_ps_dec_struct *ptr_ps_dec, ia_ps_tables_struct *ps_tables_ptr,
|
||||
WORD16 *transient_ratio) = &ixheaacd_decorr_filter1_dec;
|
||||
|
||||
WORD32(*ixheaacd_divide16_pos)
|
||||
(WORD32 op1, WORD32 op2) = &ixheaacd_divide16_pos_dec;
|
||||
|
||||
VOID(*ixheaacd_decorrelation)
|
||||
(ia_ps_dec_struct *ptr_ps_dec, WORD32 *p_buf_left_real, WORD32 *p_buf_left_imag,
|
||||
WORD32 *p_buf_right_real, WORD32 *p_buf_right_imag,
|
||||
ia_ps_tables_struct *ps_tables_ptr) = &ixheaacd_decorrelation_dec;
|
||||
|
||||
VOID(*ixheaacd_apply_rot)
|
||||
(ia_ps_dec_struct *ptr_ps_dec, WORD32 *p_qmf_left_re, WORD32 *p_qmf_left_im,
|
||||
WORD32 *p_qmf_right_re, WORD32 *p_qmf_right_im,
|
||||
ia_sbr_tables_struct *sbr_tables_ptr,
|
||||
const WORD16 *ptr_res) = &ixheaacd_apply_rot_dec;
|
||||
|
||||
VOID(*ixheaacd_conv_ergtoamplitudelp)
|
||||
(WORD32 bands, WORD16 noise_e, WORD16 *nrg_sine, WORD16 *nrg_gain,
|
||||
WORD16 *noise_level_mant,
|
||||
WORD16 *sqrt_table) = &ixheaacd_conv_ergtoamplitudelp_dec;
|
||||
|
||||
VOID(*ixheaacd_conv_ergtoamplitude)
|
||||
(WORD32 bands, WORD16 noise_e, WORD16 *nrg_sine, WORD16 *nrg_gain,
|
||||
WORD16 *noise_level_mant,
|
||||
WORD16 *sqrt_table) = &ixheaacd_conv_ergtoamplitude_dec;
|
||||
|
||||
VOID(*ixheaacd_adjust_scale)
|
||||
(WORD32 **re, WORD32 **im, WORD32 sub_band_start, WORD32 sub_band_end,
|
||||
WORD32 start_pos, WORD32 next_pos, WORD32 shift,
|
||||
FLAG low_pow_flag) = &ixheaacd_adjust_scale_dec;
|
||||
|
||||
WORD16(*ixheaacd_ixheaacd_expsubbandsamples)
|
||||
(WORD32 **re, WORD32 **im, WORD32 sub_band_start, WORD32 sub_band_end,
|
||||
WORD32 start_pos, WORD32 next_pos,
|
||||
FLAG low_pow_flag) = &ixheaacd_expsubbandsamples_dec;
|
||||
|
||||
VOID(*ixheaacd_enery_calc_per_subband)
|
||||
(WORD32 start_pos, WORD32 next_pos, WORD32 sub_band_start, WORD32 sub_band_end,
|
||||
WORD32 frame_exp, WORD16 *nrg_est_mant, FLAG low_pow_flag,
|
||||
ia_sbr_tables_struct *ptr_sbr_tables,
|
||||
WORD32 *ptr_qmf_matrix) = &ixheaacd_enery_calc_per_subband_dec;
|
||||
|
||||
VOID(*ixheaacd_harm_idx_zerotwolp)
|
||||
(WORD32 *ptr_real_buf, WORD16 *ptr_gain_buf, WORD32 scale_change,
|
||||
WORD16 *ptr_sine_level_buf, const WORD32 *ptr_rand_ph,
|
||||
WORD16 *noise_level_mant, WORD32 num_sub_bands, FLAG noise_absc_flag,
|
||||
WORD32 harm_index) = &ixheaacd_harm_idx_zerotwolp_dec;
|
||||
|
||||
VOID(*ixheaacd_tns_ar_filter_fixed)
|
||||
(WORD32 *spectrum, WORD32 size, WORD32 inc, WORD32 *lpc, WORD32 order,
|
||||
WORD32 shift_value, WORD scale_spec) = &ixheaacd_tns_ar_filter_fixed_armv8;
|
||||
|
||||
VOID(*ixheaacd_tns_ar_filter)
|
||||
(WORD32 *spectrum, WORD32 size, WORD32 inc, WORD16 *lpc, WORD32 order,
|
||||
WORD32 shift_value, WORD scale_spec,
|
||||
WORD32 *ptr_filter_state) = &ixheaacd_tns_ar_filter_dec;
|
||||
|
||||
VOID(*ixheaacd_tns_parcor_lpc_convert)
|
||||
(WORD16 *parcor, WORD16 *lpc, WORD16 *scale,
|
||||
WORD order) = &ixheaacd_tns_parcor_lpc_convert_dec;
|
||||
|
||||
WORD32(*ixheaacd_calc_max_spectral_line)
|
||||
(WORD32 *ptr_tmp, WORD32 size) = &ixheaacd_calc_max_spectral_line_armv8;
|
||||
|
||||
VOID(*ixheaacd_post_twiddle)
|
||||
(WORD32 out_ptr[], WORD32 spec_data[],
|
||||
ia_aac_dec_imdct_tables_struct *ptr_imdct_tables,
|
||||
WORD npoints) = &ixheaacd_post_twiddle_armv8;
|
||||
|
||||
VOID(*ixheaacd_post_twid_overlap_add)
|
||||
(WORD16 pcm_out[], WORD32 spec_data[],
|
||||
ia_aac_dec_imdct_tables_struct *ptr_imdct_tables, WORD npoints,
|
||||
WORD32 *ptr_overlap_buf, WORD16 q_shift, const WORD16 *window,
|
||||
WORD16 ch_fac) = &ixheaacd_post_twid_overlap_add_armv8;
|
||||
|
||||
VOID(*ixheaacd_neg_shift_spec)
|
||||
(WORD32 *coef, WORD16 *out, WORD16 q_shift,
|
||||
WORD16 ch_fac) = &ixheaacd_neg_shift_spec_armv8;
|
||||
|
||||
VOID(*ixheaacd_spec_to_overlapbuf)
|
||||
(WORD32 *ptr_overlap_buf, WORD32 *ptr_spec_coeff, WORD32 q_shift,
|
||||
WORD32 size) = &ixheaacd_spec_to_overlapbuf_dec;
|
||||
|
||||
VOID(*ixheaacd_overlap_buf_out)
|
||||
(WORD16 *out_samples, WORD32 *ptr_overlap_buf, WORD32 size,
|
||||
const WORD16 ch_fac) = &ixheaacd_overlap_buf_out_dec;
|
||||
|
||||
VOID(*ixheaacd_overlap_out_copy)
|
||||
(WORD16 *out_samples, WORD32 *ptr_overlap_buf, WORD32 *ptr_overlap_buf1,
|
||||
const WORD16 ch_fac) = &ixheaacd_overlap_out_copy_dec;
|
||||
|
||||
VOID(*ixheaacd_pretwiddle_compute)
|
||||
(WORD32 *spec_data1, WORD32 *spec_data2, WORD32 *out_ptr,
|
||||
ia_aac_dec_imdct_tables_struct *ptr_imdct_tables, WORD npoints4,
|
||||
WORD32 neg_expo) = &ixheaacd_pretwiddle_compute_armv8;
|
||||
|
||||
VOID(*ixheaacd_imdct_using_fft)
|
||||
(ia_aac_dec_imdct_tables_struct *ptr_imdct_tables, WORD32 npoints,
|
||||
WORD32 *ptr_x, WORD32 *ptr_y) = &ixheaacd_imdct_using_fft_armv8;
|
||||
|
||||
VOID(*ixheaacd_complex_fft_p2)
|
||||
(WORD32 *xr, WORD32 *xi, WORD32 nlength, WORD32 fft_mode,
|
||||
WORD32 *preshift) = &ixheaacd_complex_fft_p2_dec;
|
||||
|
||||
VOID(*ixheaacd_mps_complex_fft_64)
|
||||
(WORD32 *ptr_x, WORD32 *fin_re, WORD32 *fin_im,
|
||||
WORD32 nlength) = &ixheaacd_mps_complex_fft_64_dec;
|
||||
|
||||
VOID(*ixheaacd_mps_synt_pre_twiddle)
|
||||
(WORD32 *ptr_in, WORD32 *table_re, WORD32 *table_im,
|
||||
WORD32 resolution) = &ixheaacd_mps_synt_pre_twiddle_dec;
|
||||
|
||||
VOID(*ixheaacd_mps_synt_post_twiddle)
|
||||
(WORD32 *ptr_in, WORD32 *table_re, WORD32 *table_im,
|
||||
WORD32 resolution) = &ixheaacd_mps_synt_post_twiddle_dec;
|
||||
|
||||
VOID(*ixheaacd_calc_pre_twid)
|
||||
(WORD32 *ptr_x, WORD32 *r_ptr, WORD32 *i_ptr, WORD32 nlength,
|
||||
const WORD32 *cos_ptr, const WORD32 *sin_ptr) = &ixheaacd_calc_pre_twid_dec;
|
||||
|
||||
VOID(*ixheaacd_calc_post_twid)
|
||||
(WORD32 *ptr_x, WORD32 *r_ptr, WORD32 *i_ptr, WORD32 nlength,
|
||||
const WORD32 *cos_ptr, const WORD32 *sin_ptr) = &ixheaacd_calc_post_twid_dec;
|
||||
|
||||
VOID(*ixheaacd_mps_synt_post_fft_twiddle)
|
||||
(WORD32 resolution, WORD32 *fin_re, WORD32 *fin_im, WORD32 *table_re,
|
||||
WORD32 *table_im, WORD32 *state) = &ixheaacd_mps_synt_post_fft_twiddle_dec;
|
||||
|
||||
VOID(*ixheaacd_mps_synt_out_calc)
|
||||
(WORD32 resolution, WORD32 *out, WORD32 *state,
|
||||
const WORD32 *filter_coeff) = &ixheaacd_mps_synt_out_calc_dec;
|
||||
|
||||
VOID(*ixheaacd_fft_15_ld)
|
||||
(WORD32 *inp, WORD32 *op, WORD32 *fft3out,
|
||||
UWORD8 *re_arr_tab_sml_240_ptr) = &ixheaacd_fft_15_ld_dec;
|
||||
|
||||
VOID(*ixheaacd_aac_ld_dec_rearrange)
|
||||
(WORD32 *ip, WORD32 *op, WORD32 mdct_len_2,
|
||||
UWORD8 *re_arr_tab) = &ixheaacd_rearrange_dec;
|
||||
|
||||
VOID (*ixheaacd_fft32x32_ld)
|
||||
(ia_aac_dec_imdct_tables_struct *imdct_tables_ptr, WORD32 npoints,
|
||||
WORD32 *ptr_x, WORD32 *ptr_y) = &ixheaacd_imdct_using_fft_armv8;
|
||||
|
||||
VOID (*ixheaacd_fft32x32_ld2)
|
||||
(ia_aac_dec_imdct_tables_struct *imdct_tables_ptr, WORD32 npoints,
|
||||
WORD32 *ptr_x, WORD32 *ptr_y) = &ixheaacd_fft32x32_ld2_armv8;
|
||||
|
||||
WORD16 (*ixheaacd_neg_expo_inc)(WORD16 neg_expo) = &ixheaacd_neg_expo_inc_arm;
|
||||
|
||||
VOID (*ixheaacd_inv_dit_fft_8pt)
|
||||
(WORD32 *x, WORD32 *real, WORD32 *imag) = &ixheaacd_inv_dit_fft_8pt_armv8;
|
||||
|
||||
VOID (*ixheaacd_scale_factor_process)
|
||||
(WORD32 *x_invquant, WORD16 *scale_fact, WORD no_band, WORD8 *width,
|
||||
WORD32 *scale_tables_ptr, WORD32 total_channels, WORD32 object_type,
|
||||
WORD32 aac_sf_data_resil_flag) = &ixheaacd_scale_factor_process_armv8;
|
||||
819
decoder/armv8/ixheaacd_imdct_using_fft.s
Normal file
819
decoder/armv8/ixheaacd_imdct_using_fft.s
Normal file
|
|
@ -0,0 +1,819 @@
|
|||
///******************************************************************************
|
||||
// *
|
||||
// * Copyright (C) 2018 The Android Open Source Project
|
||||
// *
|
||||
// * Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// * you may not use this file except in compliance with the License.
|
||||
// * You may obtain a copy of the License at:
|
||||
// *
|
||||
// * http://www.apache.org/licenses/LICENSE-2.0
|
||||
// *
|
||||
// * Unless required by applicable law or agreed to in writing, software
|
||||
// * distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// * See the License for the specific language governing permissions and
|
||||
// * limitations under the License.
|
||||
// *
|
||||
// *****************************************************************************
|
||||
// * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
//*/
|
||||
|
||||
|
||||
.macro push_v_regs
|
||||
stp q8, q9, [sp, #-32]!
|
||||
stp q10, q11, [sp, #-32]!
|
||||
stp q12, q13, [sp, #-32]!
|
||||
stp q14, q15, [sp, #-32]!
|
||||
stp X8, X9, [sp, #-16]!
|
||||
stp X10, X11, [sp, #-16]!
|
||||
stp X12, X13, [sp, #-16]!
|
||||
stp X14, X15, [sp, #-16]!
|
||||
stp X16, X17, [sp, #-16]!
|
||||
stp X29, X30, [sp, #-16]!
|
||||
.endm
|
||||
.macro pop_v_regs
|
||||
ldp X29, X30, [sp], #16
|
||||
ldp X16, X17, [sp], #16
|
||||
ldp X14, X15, [sp], #16
|
||||
ldp X12, X13, [sp], #16
|
||||
ldp X10, X11, [sp], #16
|
||||
ldp X8, X9, [sp], #16
|
||||
ldp q14, q15, [sp], #32
|
||||
ldp q12, q13, [sp], #32
|
||||
ldp q10, q11, [sp], #32
|
||||
ldp q8, q9, [sp], #32
|
||||
.endm
|
||||
|
||||
.macro swp reg1, reg2
|
||||
MOv x16, \reg1
|
||||
MOv \reg1, \reg2
|
||||
MOv \reg2, x16
|
||||
.endm
|
||||
.text
|
||||
.p2align 2
|
||||
.global ixheaacd_imdct_using_fft_armv8
|
||||
ixheaacd_imdct_using_fft_armv8:
|
||||
push_v_regs
|
||||
|
||||
LDR X29, =11600
|
||||
ADD X4, X0, X29
|
||||
LDR X29, =11856
|
||||
ADD X5, X0, X29
|
||||
LDR X29, =11920
|
||||
ADD X6, X0, X29
|
||||
LDR X29, =11936
|
||||
ADD X7, X0, X29
|
||||
|
||||
COND_1: CMP X1, #0x400
|
||||
BNE COND_2
|
||||
MOv X8, #4
|
||||
B RADIX_4_FIRST_START
|
||||
|
||||
|
||||
COND_2: CMP X1, #0x200
|
||||
BNE COND_3
|
||||
MOv X8, #3
|
||||
MOv X4, X5
|
||||
B RADIX_8_FIRST_START
|
||||
|
||||
COND_3: CMP X1, #0x100
|
||||
BNE COND_4
|
||||
MOv X8, #3
|
||||
MOv X4, X5
|
||||
B RADIX_4_FIRST_START
|
||||
|
||||
COND_4: CMP X1, #0x80
|
||||
BNE COND_5
|
||||
MOv X8, #2
|
||||
MOv X4, X6
|
||||
B RADIX_8_FIRST_START
|
||||
|
||||
COND_5: CMP X1, #0x40
|
||||
BNE COND_6
|
||||
MOv X8, #2
|
||||
MOv X4, X6
|
||||
B RADIX_4_FIRST_START
|
||||
COND_6:
|
||||
MOv X8, #1
|
||||
MOv X4, X7
|
||||
|
||||
|
||||
|
||||
RADIX_8_FIRST_START:
|
||||
LSR W9 , W1, #5
|
||||
LSL W1, W1, #1
|
||||
|
||||
RADIX_8_FIRST_LOOP:
|
||||
|
||||
MOv X5 , X2
|
||||
MOv X6 , X2
|
||||
MOv X7 , X2
|
||||
MOv X11 , X2
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
LDRB W12, [X4]
|
||||
ADD X5, X5, X12, LSL #3
|
||||
LD2 {v0.S, v1.S}[0], [X5], X1
|
||||
ADD X5, X5, X1
|
||||
LD2 {v4.S, v5.S}[0], [X5], X1
|
||||
SUB X5, X5, X1, LSL #1
|
||||
LD2 {v2.S, v3.S}[0], [X5], X1
|
||||
ADD X5, X5, X1
|
||||
LD2 {v6.S, v7.S}[0], [X5], X1
|
||||
SUB X5, X5, X1, LSL #2
|
||||
|
||||
LDRB W12, [X4, #1]
|
||||
ADD X6, X6, X12, LSL #3
|
||||
LD2 {v0.S, v1.S}[1], [X6] , X1
|
||||
ADD X6, X6, X1
|
||||
LD2 {v4.S, v5.S}[1], [X6] , X1
|
||||
SUB X6, X6, X1, LSL #1
|
||||
LD2 {v2.S, v3.S}[1], [X6] , X1
|
||||
ADD X6, X6, X1
|
||||
LD2 {v6.S, v7.S}[1], [X6], X1
|
||||
SUB X6, X6, X1, LSL #2
|
||||
|
||||
|
||||
LDRB W12, [X4, #2]
|
||||
ADD X7, X7, X12, LSL #3
|
||||
LD2 {v0.S, v1.S}[2], [X7] , X1
|
||||
ADD X7, X7, X1
|
||||
LD2 {v4.S, v5.S}[2], [X7] , X1
|
||||
SUB X7, X7, X1, LSL #1
|
||||
|
||||
LDRB W12, [X4, #3]
|
||||
ADD X11, X11, X12, LSL #3
|
||||
LD2 {v0.S, v1.S}[3], [X11] , X1
|
||||
ADD X11, X11, X1
|
||||
LD2 {v4.S, v5.S}[3], [X11] , X1
|
||||
SUB X11, X11, X1, LSL #1
|
||||
|
||||
|
||||
ADD v8.4S, v0.4S, v4.4S
|
||||
LD2 {v2.S, v3.S}[2], [X7] , X1
|
||||
ADD X7, X7, X1
|
||||
|
||||
|
||||
SUB v9.4S, v0.4S, v4.4S
|
||||
LD2 {v6.S, v7.S}[2], [X7], X1
|
||||
SUB X7, X7, X1, LSL #2
|
||||
|
||||
|
||||
ADD v0.4S, v1.4S, v5.4S
|
||||
LD2 {v2.S, v3.S}[3], [X11] , X1
|
||||
ADD X11, X11, X1
|
||||
|
||||
SUB v4.4S, v1.4S, v5.4S
|
||||
LD2 {v6.S, v7.S}[3], [X11], X1
|
||||
SUB X11, X11, X1, LSL #2
|
||||
|
||||
ADD X4, X4, #4
|
||||
|
||||
ADD X5, X5, X1, LSR #1
|
||||
ADD X6, X6, X1, LSR #1
|
||||
ADD X7, X7, X1, LSR #1
|
||||
ADD X11, X11, X1, LSR #1
|
||||
|
||||
|
||||
ADD v1.4S, v2.4S, v6.4S
|
||||
LD2 {v14.S, v15.S}[0], [X5] , X1
|
||||
|
||||
|
||||
SUB v5.4S, v2.4S, v6.4S
|
||||
LD2 {v10.S, v11.S}[0], [X5] , X1
|
||||
|
||||
|
||||
ADD v2.4S, v3.4S, v7.4S
|
||||
LD2 {v12.S, v13.S}[0], [X5] , X1
|
||||
|
||||
|
||||
SUB v6.4S, v3.4S, v7.4S
|
||||
LD2 {v14.S, v15.S}[1], [X6] , X1
|
||||
|
||||
ADD v3.4S, v9.4S, v6.4S
|
||||
LD2 {v10.S, v11.S}[1], [X6] , X1
|
||||
|
||||
SUB v7.4S, v9.4S, v6.4S
|
||||
LD2 {v12.S, v13.S}[1], [X6] , X1
|
||||
|
||||
SUB v6.4S, v4.4S, v5.4S
|
||||
LD2 {v14.S, v15.S}[2], [X7] , X1
|
||||
|
||||
ADD v9.4S, v4.4S, v5.4S
|
||||
LD2 {v10.S, v11.S}[2], [X7] , X1
|
||||
|
||||
ADD v4.4S, v8.4S, v1.4S
|
||||
LD2 {v12.S, v13.S}[2], [X7] , X1
|
||||
|
||||
SUB v5.4S, v8.4S, v1.4S
|
||||
LD2 {v14.S, v15.S}[3], [X11] , X1
|
||||
|
||||
ADD v8.4S, v0.4S, v2.4S
|
||||
LD2 {v10.S, v11.S}[3], [X11] , X1
|
||||
|
||||
SUB v0.4S, v0.4S, v2.4S
|
||||
LD2 {v12.S, v13.S}[3], [X11] , X1
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
LD2 {v1.S, v2.S}[0], [X5], X1
|
||||
|
||||
ADD v17.4S, v14.4S, v12.4S
|
||||
|
||||
LD2 {v1.S, v2.S}[1], [X6] , X1
|
||||
|
||||
SUB v16.4S, v14.4S, v12.4S
|
||||
|
||||
LD2 {v1.S, v2.S}[2], [X7] , X1
|
||||
|
||||
ADD v14.4S, v15.4S, v13.4S
|
||||
|
||||
LD2 {v1.S, v2.S}[3], [X11] , X1
|
||||
|
||||
SUB v12.4S, v15.4S, v13.4S
|
||||
|
||||
ADD v15.4S, v10.4S, v1.4S
|
||||
SUB v13.4S, v10.4S, v1.4S
|
||||
ADD v10.4S, v11.4S, v2.4S
|
||||
SUB v1.4S, v11.4S, v2.4S
|
||||
|
||||
ADD v11.4S, v17.4S, v15.4S
|
||||
SUB v2.4S, v17.4S, v15.4S
|
||||
ADD v17.4S, v14.4S, v10.4S
|
||||
SUB v15.4S, v14.4S, v10.4S
|
||||
|
||||
ADD v14.4S, v16.4S, v12.4S
|
||||
SUB v10.4S, v16.4S, v12.4S
|
||||
ADD v16.4S, v13.4S, v1.4S
|
||||
SUB v12.4S, v13.4S, v1.4S
|
||||
|
||||
ADD v1.4S , v14.4S, v12.4S
|
||||
SUB v13.4S, v14.4S, v12.4S
|
||||
SUB v12.4S, v16.4S, v10.4S
|
||||
|
||||
|
||||
UZP1 v22.8H, v1.8H, v1.8H
|
||||
UZP2 v23.8H, v1.8H, v1.8H
|
||||
ADD v14.4S, v16.4S, v10.4S
|
||||
|
||||
UZP1 v26.8H, v13.8H, v13.8H
|
||||
UZP2 v27.8H, v13.8H, v13.8H
|
||||
ADD v16.4S, v4.4S, v11.4S
|
||||
|
||||
UZP1 v24.8H, v12.8H, v12.8H
|
||||
UZP2 v25.8H, v12.8H, v12.8H
|
||||
SUB v10.4S, v4.4S, v11.4S
|
||||
|
||||
UZP1 v28.8H, v14.8H, v14.8H
|
||||
UZP2 v29.8H, v14.8H, v14.8H
|
||||
ADD v4.4S, v8.4S, v17.4S
|
||||
|
||||
MOv W14, #0x5a82
|
||||
|
||||
SUB v11.4S, v8.4S, v17.4S
|
||||
|
||||
ADD v8.4S, v5.4S, v15.4S
|
||||
SUB v17.4S, v5.4S, v15.4S
|
||||
SUB v5.4S, v0.4S, v2.4S
|
||||
ADD v15.4S, v0.4S, v2.4S
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
DUP v31.4H, W14
|
||||
|
||||
UMULL v19.4S, v26.4H, v31.4H
|
||||
UMULL v18.4S, v28.4H, v31.4H
|
||||
SSHR v19.4S, v19.4S, #15
|
||||
SSHR v18.4S, v18.4S, #15
|
||||
|
||||
|
||||
SQDMLAL v19.4S, v27.4H, v31.4H
|
||||
SQDMLAL v18.4S, v29.4H, v31.4H
|
||||
|
||||
|
||||
UMULL v13.4S, v24.4H, v31.4H
|
||||
UMULL v14.4S, v22.4H, v31.4H
|
||||
|
||||
ADD v20.4S, v3.4S, v19.4S
|
||||
SUB v21.4S, v3.4S, v19.4S
|
||||
ADD v30.4S, v6.4S, v18.4S
|
||||
SUB v6.4S, v6.4S, v18.4S
|
||||
|
||||
SSHR v13.4S, v13.4S, #15
|
||||
SSHR v14.4S, v14.4S, #15
|
||||
|
||||
SQDMLAL v13.4S, v25.4H, v31.4H
|
||||
SQDMLAL v14.4S, v23.4H, v31.4H
|
||||
|
||||
|
||||
|
||||
|
||||
ADD v3.4S, v7.4S, v13.4S
|
||||
SUB v19.4S, v7.4S, v13.4S
|
||||
ADD v1.4S, v9.4S, v14.4S
|
||||
SUB v18.4S, v9.4S, v14.4S
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
swp v17.D[0], v8.D[0]
|
||||
swp v17.D[1], v8.D[1]
|
||||
swp v4.D[0], v16.D[0]
|
||||
swp v4.D[1], v16.D[1]
|
||||
|
||||
TRN1 v12.4S, v4.4S, v20.4S
|
||||
TRN2 v22.4S, v4.4S, v20.4S
|
||||
|
||||
SHL v12.4S, v12.4S, #3
|
||||
TRN1 v9.4S, v17.4S, v3.4S
|
||||
TRN2 v2.4S, v17.4S, v3.4S
|
||||
SHL v22.4S, v22.4S, #3
|
||||
|
||||
SHL v9.4S, v9.4S, #3
|
||||
TRN1 v24.4S, v10.4S, v21.4S
|
||||
TRN2 v7.4S, v10.4S, v21.4S
|
||||
SHL v2.4S, v2.4S, #3
|
||||
|
||||
SHL v24.4S, v24.4S, #3
|
||||
TRN1 v13.4S, v16.4S, v6.4S
|
||||
TRN2 v23.4S, v16.4S, v6.4S
|
||||
SHL v7.4S, v7.4S, #3
|
||||
|
||||
SHL v13.4S, v13.4S, #3
|
||||
TRN1 v10.4S, v5.4S, v18.4S
|
||||
TRN2 v3.4S, v5.4S, v18.4S
|
||||
SHL v23.4S, v23.4S, #3
|
||||
|
||||
SHL v10.4S, v10.4S, #3
|
||||
TRN1 v26.4S, v8.4S, v19.4S
|
||||
TRN2 v4.4S, v8.4S, v19.4S
|
||||
SHL v3.4S, v3.4S, #3
|
||||
|
||||
SHL v26.4S, v26.4S, #3
|
||||
TRN1 v25.4S, v11.4S, v30.4S
|
||||
TRN2 v8.4S, v11.4S, v30.4S
|
||||
SHL v4.4S, v4.4S, #3
|
||||
|
||||
SHL v25.4S, v25.4S, #3
|
||||
TRN1 v27.4S, v15.4S, v1.4S
|
||||
TRN2 v5.4S, v15.4S, v1.4S
|
||||
SHL v8.4S, v8.4S, #3
|
||||
|
||||
SHL v27.4S, v27.4S, #3
|
||||
swp v9.D[0], v12.D[1]
|
||||
SHL v5.4S, v5.4S, #3
|
||||
swp v2.D[0], v22.D[1]
|
||||
|
||||
swp v24.D[1], v26.D[0]
|
||||
swp v7.D[1], v4.D[0]
|
||||
swp v10.D[0], v13.D[1]
|
||||
swp v3.D[0], v23.D[1]
|
||||
swp v27.D[0], v25.D[1]
|
||||
swp v5.D[0], v8.D[1]
|
||||
|
||||
MOv X15, #32
|
||||
ST2 {v12.4S, v13.4S}, [X3], X15
|
||||
ST2 {v24.4S, v25.4S}, [X3], X15
|
||||
ST2 {v22.4S, v23.4S}, [X3], X15
|
||||
ST2 {v7.4S, v8.4S}, [X3], X15
|
||||
ST2 {v9.4S, v10.4S}, [X3], X15
|
||||
ST2 {v26.4S, v27.4S}, [X3], X15
|
||||
ST2 {v2.4S, v3.4S}, [X3], X15
|
||||
ST2 {v4.4S, v5.4S}, [X3], X15
|
||||
|
||||
|
||||
SUBS X9, X9, #1
|
||||
BNE RADIX_8_FIRST_LOOP
|
||||
|
||||
LSR X1, X1, #1
|
||||
LSL X15, X1, #3
|
||||
SUB X3, X3, X15
|
||||
|
||||
MOv X5, #8
|
||||
MOv X4, #32
|
||||
LSR X15, X1, #5
|
||||
MOv X6, X15
|
||||
B RADIX_4_FIRST_ENDS
|
||||
RADIX_8_FIRST_ENDS:
|
||||
|
||||
RADIX_4_FIRST_START:
|
||||
|
||||
LSR W9, W1, #4
|
||||
LSL W1, W1, #1
|
||||
RADIX_4_LOOP:
|
||||
|
||||
MOv X5 , X2
|
||||
MOv X6 , X2
|
||||
MOv X7 , X2
|
||||
MOv X11 , X2
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
LDRB W12, [X4, #0]
|
||||
ADD X5, X5, X12, LSL #3
|
||||
|
||||
LD2 {v0.S, v1.S}[0], [X5] , X1
|
||||
ADD X5, X5, X1
|
||||
LD2 {v8.S, v9.S}[0], [X5] , X1
|
||||
SUB X5, X5, X1, LSL #1
|
||||
LD2 {v4.S, v5.S}[0], [X5] , X1
|
||||
ADD X5, X5, X1
|
||||
LD2 {v12.S, v13.S}[0], [X5] , X1
|
||||
|
||||
LDRB W12, [X4, #1]
|
||||
ADD X6, X6, X12, LSL #3
|
||||
LD2 {v0.S, v1.S}[1], [X6] , X1
|
||||
ADD X6, X6, X1
|
||||
LD2 {v8.S, v9.S}[1], [X6] , X1
|
||||
SUB X6, X6, X1, LSL #1
|
||||
LD2 {v4.S, v5.S}[1], [X6] , X1
|
||||
ADD X6, X6, X1
|
||||
LD2 {v12.S, v13.S}[1], [X6] , X1
|
||||
|
||||
LDRB W12, [X4, #2]
|
||||
ADD X7, X7, X12, LSL #3
|
||||
|
||||
LD2 {v0.S, v1.S}[2], [X7] , X1
|
||||
ADD X7, X7, X1
|
||||
LD2 {v8.S, v9.S}[2], [X7] , X1
|
||||
|
||||
|
||||
LDRB W12, [X4, #3]
|
||||
ADD X11, X11, X12 , LSL #3
|
||||
|
||||
|
||||
LD2 {v0.S, v1.S}[3], [X11] , X1
|
||||
ADD X11, X11, X1
|
||||
LD2 {v8.S, v9.S}[3], [X11] , X1
|
||||
|
||||
SUB X7, X7, X1, LSL #1
|
||||
ADD v16.4S, v0.4S, v8.4S
|
||||
LD2 {v4.S, v5.S}[2], [X7] , X1
|
||||
ADD X7, X7, X1
|
||||
ADD v18.4S, v1.4S, v9.4S
|
||||
LD2 {v12.S, v13.S}[2], [X7] , X1
|
||||
|
||||
SUB X11, X11, X1, LSL #1
|
||||
SUB v20.4S, v0.4S, v8.4S
|
||||
LD2 {v4.S, v5.S}[3], [X11] , X1
|
||||
ADD X11, X11, X1
|
||||
SUB v22.4S, v1.4S, v9.4S
|
||||
LD2 {v12.S, v13.S}[3], [X11] , X1
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
ADD X4, X4, #4
|
||||
|
||||
ADD v24.4S, v4.4S, v12.4S
|
||||
ADD v26.4S, v5.4S, v13.4S
|
||||
SUB v28.4S, v4.4S, v12.4S
|
||||
SUB v30.4S, v5.4S, v13.4S
|
||||
|
||||
ADD v17.4S, v16.4S, v24.4S
|
||||
ADD v11.4S, v18.4S, v26.4S
|
||||
SUB v19.4S, v16.4S, v24.4S
|
||||
SUB v15.4S, v18.4S, v26.4S
|
||||
|
||||
ADD v8.4S, v20.4S, v30.4S
|
||||
SUB v9.4S, v22.4S, v28.4S
|
||||
ADD v13.4S, v22.4S, v28.4S
|
||||
SUB v12.4S, v20.4S, v30.4S
|
||||
|
||||
|
||||
|
||||
|
||||
TRN1 v0.4S, v17.4S, v8.4S
|
||||
TRN2 v8.4S, v17.4S, v8.4S
|
||||
|
||||
SHL v0.4S, v0.4S, #2
|
||||
TRN1 v4.4S, v19.4S, v12.4S
|
||||
TRN2 v12.4S, v19.4S, v12.4S
|
||||
SHL v8.4S, v8.4S, #2
|
||||
|
||||
SHL v4.4S, v4.4S, #2
|
||||
TRN1 v1.4S, v11.4S, v9.4S
|
||||
TRN2 v9.4S, v11.4S, v9.4S
|
||||
SHL v12.4S, v12.4S, #2
|
||||
|
||||
SHL v1.4S, v1.4S, #2
|
||||
TRN1 v5.4S, v15.4S, v13.4S
|
||||
TRN2 v13.4S, v15.4S, v13.4S
|
||||
SHL v9.4S, v9.4S, #2
|
||||
|
||||
SHL v5.4S, v5.4S, #2
|
||||
swp v4.D[0], v0.D[1]
|
||||
SHL v13.4S, v13.4S, #2
|
||||
|
||||
swp v12.D[0], v8.D[1]
|
||||
swp v5.D[0], v1.D[1]
|
||||
swp v13.D[0], v9.D[1]
|
||||
|
||||
MOv X15, #32
|
||||
ST2 {v0.4S, v1.4S}, [X3], X15
|
||||
ST2 {v8.4S, v9.4S}, [X3], X15
|
||||
ST2 {v4.4S, v5.4S}, [X3], X15
|
||||
ST2 {v12.4S, v13.4S}, [X3], X15
|
||||
|
||||
|
||||
SUBS W9, W9, #1
|
||||
BNE RADIX_4_LOOP
|
||||
|
||||
LSR X1, X1, #1
|
||||
SUB X3, X3, X1, LSL #3
|
||||
MOv X5, #4
|
||||
MOv X4, #64
|
||||
LSR X6, X1, #4
|
||||
|
||||
|
||||
RADIX_4_FIRST_ENDS:
|
||||
|
||||
MOv x30, X3
|
||||
LSR X5, X5, #2
|
||||
|
||||
LDR X14, =8528
|
||||
ADD X0, X0, X14
|
||||
|
||||
OUTER_LOOP_R4:
|
||||
|
||||
MOv X14, x30
|
||||
|
||||
MOv X7, X5
|
||||
MOv X2, #0
|
||||
MOv X9, X0
|
||||
LSL X12, X5, #5
|
||||
MIDDLE_LOOP_R4:
|
||||
|
||||
LD2 {v20.H, v21.H}[0], [X9], X2
|
||||
LD2 {v22.H, v23.H}[0], [X9], X2
|
||||
ADD X11, X2, X4, LSL #2
|
||||
LD2 {v24.H, v25.H}[0], [X9]
|
||||
ADD X10, X0, X11
|
||||
|
||||
LD2 {v20.H, v21.H}[1], [X10], X11
|
||||
LD2 {v22.H, v23.H}[1], [X10], X11
|
||||
ADD X2, X11, X4, LSL #2
|
||||
LD2 {v24.H, v25.H}[1], [X10]
|
||||
ADD X9, X0, X2
|
||||
|
||||
LD2 {v20.H, v21.H}[2], [X9], X2
|
||||
LD2 {v22.H, v23.H}[2], [X9], X2
|
||||
ADD X11, X2, X4, LSL #2
|
||||
LD2 {v24.H, v25.H}[2], [X9]
|
||||
ADD X10, X0, X11
|
||||
|
||||
LD2 {v20.H, v21.H}[3], [X10], X11
|
||||
LD2 {v22.H, v23.H}[3], [X10], X11
|
||||
ADD X2, X11, X4, LSL #2
|
||||
LD2 {v24.H, v25.H}[3], [X10]
|
||||
ADD X9, X0, X2
|
||||
|
||||
MOv X10, X6
|
||||
INNER_LOOP_R4:
|
||||
|
||||
LD2 {v30.4S, v31.4S}, [X14], X12
|
||||
SSHR v30.4S, v30.4S, #1
|
||||
LD4 {v16.4H, v17.4H, v18.4H, v19.4H}, [X14], X12
|
||||
SSHR v31.4S, v31.4S, #1
|
||||
|
||||
USHR v16.4H, v16.4H, #1
|
||||
LD4 {v26.4H, v27.4H, v28.4H, v29.4H}, [X14], X12
|
||||
USHR v18.4H, v18.4H, #1
|
||||
|
||||
SMULL v11.4S, v16.4H, v20.4H
|
||||
SMLSL v11.4S, v18.4H, v21.4H
|
||||
|
||||
LD4 {v0.4H, v1.4H, v2.4H, v3.4H}, [X14], X12
|
||||
SMULL v12.4S, v16.4H, v21.4H
|
||||
SMLAL v12.4S, v18.4H, v20.4H
|
||||
|
||||
USHR v26.4H, v26.4H, #1
|
||||
USHR v28.4H, v28.4H, #1
|
||||
|
||||
LSL x29, X12, #2
|
||||
SUB X14, X14, X12, LSL #2
|
||||
|
||||
USHR v0.4H, v0.4H, #1
|
||||
USHR v2.4H, v2.4H, #1
|
||||
|
||||
SMULL v13.4S, v26.4H, v22.4H
|
||||
SMLSL v13.4S, v28.4H, v23.4H
|
||||
|
||||
SSHR v11.4S, v11.4S, #15
|
||||
|
||||
SMULL v14.4S, v26.4H, v23.4H
|
||||
SMLAL v14.4S, v28.4H, v22.4H
|
||||
|
||||
SMULL v15.4S, v0.4H, v24.4H
|
||||
SMLSL v15.4S, v2.4H, v25.4H
|
||||
|
||||
SMLAL v11.4S, v17.4H, v20.4H
|
||||
SMLSL v11.4S, v19.4H, v21.4H
|
||||
|
||||
SSHR v12.4S, v12.4S, #15
|
||||
SSHR v13.4S, v13.4S, #15
|
||||
SSHR v14.4S, v14.4S, #15
|
||||
SSHR v15.4S, v15.4S, #15
|
||||
|
||||
SMLAL v12.4S, v17.4H, v21.4H
|
||||
SMLAL v12.4S, v19.4H, v20.4H
|
||||
|
||||
SMULL v5.4S, v0.4H, v25.4H
|
||||
SMLAL v5.4S, v2.4H, v24.4H
|
||||
|
||||
SMLAL v13.4S, v27.4H, v22.4H
|
||||
SMLSL v13.4S, v29.4H, v23.4H
|
||||
|
||||
SMLAL v14.4S, v27.4H, v23.4H
|
||||
SMLAL v14.4S, v29.4H, v22.4H
|
||||
|
||||
SMLAL v15.4S, v1.4H, v24.4H
|
||||
SMLSL v15.4S, v3.4H, v25.4H
|
||||
|
||||
SSHR v5.4S, v5.4S, #15
|
||||
|
||||
SMLAL v5.4S, v1.4H, v25.4H
|
||||
SMLAL v5.4S, v3.4H, v24.4H
|
||||
|
||||
|
||||
|
||||
SUBS x17, X7, X5
|
||||
BNE BYPASS_IF
|
||||
|
||||
ADD X14, X14, X12
|
||||
|
||||
LDR W3, [X14]
|
||||
ADD X14, X14, X12
|
||||
ASR W3, W3, #1
|
||||
|
||||
MOv v11.S[0], W3
|
||||
|
||||
LDR W3, [X14]
|
||||
ADD X14, X14, X12
|
||||
ASR W3, W3, #1
|
||||
MOv v13.S[0], W3
|
||||
|
||||
LDR W3, [X14]
|
||||
ASR W3, W3, #1
|
||||
MOv v15.S[0], W3
|
||||
|
||||
SUB X14, X14, X12, LSL #1
|
||||
ADD X14, X14, #4
|
||||
|
||||
LDR W3, [X14]
|
||||
ADD X14, X14, X12
|
||||
ASR W3, W3, #1
|
||||
MOv v12.S[0], W3
|
||||
|
||||
LDR W3, [X14]
|
||||
ADD X14, X14, X12
|
||||
ASR W3, W3, #1
|
||||
MOv v14.S[0], W3
|
||||
|
||||
LDR W3, [X14]
|
||||
ADD X14, X14, X12
|
||||
ASR W3, W3, #1
|
||||
MOv v5.S[0], W3
|
||||
|
||||
SUB X14, X14, #4
|
||||
|
||||
SUB X14, X14, x29
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
BYPASS_IF:
|
||||
|
||||
ADD v6.4S, v30.4S, v13.4S
|
||||
ADD v7.4S, v31.4S, v14.4S
|
||||
SUB v30.4S, v30.4S, v13.4S
|
||||
SUB v31.4S, v31.4S, v14.4S
|
||||
ADD v8.4S, v11.4S, v15.4S
|
||||
ADD v9.4S, v12.4S, v5.4S
|
||||
|
||||
SUB v15.4S, v11.4S, v15.4S
|
||||
SUB v14.4S, v12.4S, v5.4S
|
||||
|
||||
|
||||
ADD v10.4S, v6.4S, v8.4S
|
||||
ADD v11.4S, v7.4S, v9.4S
|
||||
ADD v12.4S, v30.4S, v14.4S
|
||||
SUB v13.4S, v31.4S, v15.4S
|
||||
|
||||
SUB v6.4S, v6.4S, v8.4S
|
||||
ST2 {v10.4S, v11.4S}, [X14], X12
|
||||
SUB v7.4S, v7.4S, v9.4S
|
||||
|
||||
SUB v8.4S, v30.4S, v14.4S
|
||||
ST2 {v12.4S, v13.4S}, [X14], X12
|
||||
ADD v9.4S, v31.4S, v15.4S
|
||||
|
||||
ST2 {v6.4S, v7.4S}, [X14], X12
|
||||
ST2 {v8.4S, v9.4S}, [X14], X12
|
||||
SUBS X10, X10, #1
|
||||
BNE INNER_LOOP_R4
|
||||
|
||||
SUB X14, X14, X1, LSL #3
|
||||
ADD X14, X14, #32
|
||||
|
||||
SUBS X7, X7, #1
|
||||
BNE MIDDLE_LOOP_R4
|
||||
|
||||
|
||||
|
||||
|
||||
LSR X4, X4, #2
|
||||
LSL X5, X5, #2
|
||||
LSR X6, X6, #2
|
||||
SUBS X8, X8, #1
|
||||
BNE OUTER_LOOP_R4
|
||||
END_LOOPS:
|
||||
pop_v_regs
|
||||
RET
|
||||
|
||||
|
||||
|
||||
174
decoder/armv8/ixheaacd_inv_dit_fft_8pt.s
Normal file
174
decoder/armv8/ixheaacd_inv_dit_fft_8pt.s
Normal file
|
|
@ -0,0 +1,174 @@
|
|||
//VOID ixheaacd_inv_dit_fft_8pt(WORD32 *y,
|
||||
// WORD32 *real,
|
||||
// WORD32 *imag)
|
||||
|
||||
.macro push_v_regs
|
||||
stp q8, q9, [sp, #-32]!
|
||||
stp q10, q11, [sp, #-32]!
|
||||
stp q12, q13, [sp, #-32]!
|
||||
stp q14, q15, [sp, #-32]!
|
||||
.endm
|
||||
.macro pop_v_regs
|
||||
ldp q14, q15, [sp], #32
|
||||
ldp q12, q13, [sp], #32
|
||||
ldp q10, q11, [sp], #32
|
||||
ldp q8, q9, [sp], #32
|
||||
.endm
|
||||
|
||||
|
||||
.text
|
||||
.global ixheaacd_inv_dit_fft_8pt_armv8
|
||||
ixheaacd_inv_dit_fft_8pt_armv8:
|
||||
push_v_regs
|
||||
LDR w3, =0x5A820000
|
||||
DUP v0.2s, w3
|
||||
MOV x5, #8
|
||||
ADD x6, x0, #4
|
||||
|
||||
//LD2 {v1.2s,v2.2s},[x0],x5
|
||||
//LD2 {v3.2s,v4.2s},[x0],x5
|
||||
//LD2 {v5.2s,v6.2s},[x0],x5
|
||||
//LD2 {v7.2s,v8.2s},[x0],x5
|
||||
|
||||
LD1 {v1.s}[0], [x0], x5
|
||||
LD1 {v2.s}[0], [x6], x5
|
||||
LD1 {v1.s}[1], [x0], x5
|
||||
LD1 {v2.s}[1], [x6], x5
|
||||
LD1 {v3.s}[0], [x0], x5
|
||||
LD1 {v4.s}[0], [x6], x5
|
||||
LD1 {v3.s}[1], [x0], x5
|
||||
LD1 {v4.s}[1], [x6], x5
|
||||
LD1 {v5.s}[0], [x0], x5
|
||||
LD1 {v6.s}[0], [x6], x5
|
||||
LD1 {v5.s}[1], [x0], x5
|
||||
LD1 {v6.s}[1], [x6], x5
|
||||
LD1 {v7.s}[0], [x0], x5
|
||||
LD1 {v8.s}[0], [x6], x5
|
||||
LD1 {v7.s}[1], [x0], x5
|
||||
LD1 {v8.s}[1], [x6], x5
|
||||
|
||||
//v1 - y0_2
|
||||
//v2 - y1_3
|
||||
//v3 - y4_6
|
||||
//v4 - y5_7
|
||||
//v5 - y8_10
|
||||
//v6 - y9_11
|
||||
//v7 - y12_14
|
||||
//v8 - y13_15
|
||||
|
||||
SQADD v9.2s, v1.2s, v5.2s //a00_v = vqadd_s32(y0_2,y8_10);
|
||||
SQADD v10.2s, v2.2s, v6.2s //a20_v = vqadd_s32(y1_3,y9_11);
|
||||
SQADD v11.2s, v3.2s, v7.2s //a10_v = vqadd_s32(y4_6,y12_14);
|
||||
SQADD v12.2s, v4.2s, v8.2s //a30_v = vqadd_s32(y5_7,y13_15);
|
||||
|
||||
SQSUB v1.2s, v1.2s, v5.2s //a0_v = vqsub_s32(y0_2,y8_10);
|
||||
SQSUB v5.2s, v2.2s, v6.2s //a3_v = vqsub_s32(y1_3,y9_11);
|
||||
SQSUB v2.2s, v3.2s, v7.2s //a2_v = vqsub_s32(y4_6,y12_14);
|
||||
SQSUB v6.2s, v4.2s, v8.2s //a1_v = vqsub_s32(y5_7,y13_15);
|
||||
|
||||
SQADD v3.2s, v9.2s, v11.2s //x0_8 = vqadd_s32(a00_v,a10_v);
|
||||
SQADD v7.2s, v10.2s, v12.2s //x1_9 = vqadd_s32(a20_v,a30_v);
|
||||
|
||||
SQSUB v4.2s, v9.2s, v11.2s //x4_12 = vqsub_s32(a00_v,a10_v);
|
||||
SQSUB v8.2s, v10.2s, v12.2s //x5_13 = vqsub_s32(a20_v,a30_v);
|
||||
|
||||
SQADD v9.2s, v1.2s, v6.2s //x6_14 = vqadd_s32(a0_v,a1_v);
|
||||
SQADD v11.2s, v5.2s, v2.2s //x3_11 = vqadd_s32(a3_v,a2_v);
|
||||
SQSUB v10.2s, v1.2s, v6.2s //x2_10 = vqsub_s32(a0_v,a1_v);
|
||||
SQSUB v13.2s, v5.2s, v2.2s //x7_15 = vqsub_s32(a3_v,a2_v);
|
||||
|
||||
UZP1 v1.2s, v3.2s, v7.2s //x0_1 = vuzp1_s32(x0_8,x1_9);
|
||||
UZP2 v5.2s, v3.2s, v7.2s //x8_9 = vuzp2_s32(x0_8,x1_9);
|
||||
|
||||
UZP1 v6.2s, v4.2s, v8.2s //x4_5 = vuzp1_s32(x4_12,x5_13);
|
||||
UZP2 v7.2s, v4.2s, v8.2s //x12_13 = vuzp2_s32(x4_12,x5_13);
|
||||
REV64 v7.2s, v7.2s //x13_12 = vrev64_s32(x12_13);
|
||||
|
||||
SQADD v3.2s, v1.2s, v5.2s //real_imag0 = vqadd_s32(x0_1,x8_9);
|
||||
SQSUB v8.2s, v1.2s, v5.2s //a00_10_v = vqsub_s32(x0_1,x8_9);
|
||||
|
||||
SQADD v12.2s, v6.2s, v7.2s //real_imag4 = vqadd_s32(x4_5,x13_12);
|
||||
SQSUB v14.2s, v6.2s, v7.2s //a0_1_v = vqsub_s32(x4_5,x13_12);
|
||||
|
||||
|
||||
MOV w4, v12.s[1]
|
||||
MOV v12.s[1], v14.s[1]
|
||||
MOV v14.s[1], w4
|
||||
|
||||
UZP1 v6.2s, v10.2s, v11.2s //x2_3
|
||||
|
||||
SQSUB v1.2s, v10.2s, v11.2s //tempr = vqsub_s32(x2_10,x3_11)
|
||||
SQADD v5.2s, v10.2s, v11.2s //tempi = vqadd_s32(x2_10,x3_11)
|
||||
|
||||
SMULL v7.2d, v1.2s, v0.2s
|
||||
SMULL v10.2d, v5.2s, v0.2s
|
||||
|
||||
SSHR v7.2d, v7.2d, #32 //tempr_q
|
||||
SSHR v10.2d, v10.2d, #32 //tempi_q
|
||||
|
||||
SHL v7.4s, v7.4s, #1
|
||||
SHL v10.4s, v10.4s, #1
|
||||
|
||||
|
||||
|
||||
MOV v1.s[0], v7.s[2]
|
||||
MOV v1.s[1], v10.s[2] //vr_i
|
||||
|
||||
SQSUB v7.2s, v6.2s, v1.2s //a2_3_v = vqsub_s32(x2_3,vr_i);
|
||||
SQADD v4.2s, v6.2s, v1.2s //real_imag1 = vqadd_s32(x2_3,vr_i);
|
||||
SQADD v5.2s, v14.2s, v7.2s //real_imag2 = vqadd_s32(a0_1_v,a2_3_v);
|
||||
|
||||
UZP1 v1.2s, v9.2s, v13.2s //x6_7
|
||||
SQADD v6.2s, v9.2s, v13.2s //tempr = vqadd_s32(x6_14,x7_15);
|
||||
SQSUB v14.2s, v9.2s, v13.2s //tempi = vqsub_s32(x6_14,x7_15);
|
||||
|
||||
SMULL v9.2d, v6.2s, v0.2s
|
||||
SMULL v13.2d, v14.2s, v0.2s
|
||||
|
||||
SSHR v9.2d, v9.2d, #32
|
||||
SSHR v13.2d, v13.2d, #32
|
||||
|
||||
SHL v9.4s, v9.4s, #1
|
||||
SHL v13.4s, v13.4s, #1
|
||||
|
||||
|
||||
|
||||
MOV v0.s[0], v9.s[2]
|
||||
MOV v0.s[1], v13.s[2]
|
||||
|
||||
SQSUB v9.2s, v1.2s, v0.2s // a20_30_v
|
||||
SQADD v13.2s, v1.2s, v0.2s //real_imag5
|
||||
|
||||
|
||||
MOV w4, v9.s[1]
|
||||
MOV v9.s[1], v13.s[1]
|
||||
MOV v13.s[1], w4
|
||||
|
||||
SQADD v6.2s, v9.2s, v8.2s //real_imag3
|
||||
|
||||
ST1 {v3.s}[0], [x1], #4
|
||||
ST1 {v4.s}[0], [x1], #4
|
||||
ST1 {v5.s}[0], [x1], #4
|
||||
ST1 {v6.s}[0], [x1], #4
|
||||
ST1 {v12.s}[0], [x1], #4
|
||||
ST1 {v13.s}[0], [x1], #4
|
||||
|
||||
ST1 {v3.s}[1], [x2], #4
|
||||
ST1 {v4.s}[1], [x2], #4
|
||||
ST1 {v5.s}[1], [x2], #4
|
||||
ST1 {v6.s}[1], [x2], #4
|
||||
ST1 {v12.s}[1], [x2], #4
|
||||
ST1 {v13.s}[1], [x2], #4
|
||||
//ST4 {v3.s,v4.s,v5.s,v6.s}[0],[x1],x5
|
||||
//ST4 {v3.s,v4.s,v5.s,v6.s}[1],[x2],x5
|
||||
//ST2 {v12.s,v13.s}[0],[x1]
|
||||
//ST2 {v12.s,v13.s}[1],[x2]
|
||||
pop_v_regs
|
||||
ret
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
123
decoder/armv8/ixheaacd_no_lap1.s
Normal file
123
decoder/armv8/ixheaacd_no_lap1.s
Normal file
|
|
@ -0,0 +1,123 @@
|
|||
///******************************************************************************
|
||||
// *
|
||||
// * Copyright (C) 2018 The Android Open Source Project
|
||||
// *
|
||||
// * Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// * you may not use this file except in compliance with the License.
|
||||
// * You may obtain a copy of the License at:
|
||||
// *
|
||||
// * http://www.apache.org/licenses/LICENSE-2.0
|
||||
// *
|
||||
// * Unless required by applicable law or agreed to in writing, software
|
||||
// * distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// * See the License for the specific language governing permissions and
|
||||
// * limitations under the License.
|
||||
// *
|
||||
// *****************************************************************************
|
||||
// * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
//*/
|
||||
|
||||
|
||||
.macro push_v_regs
|
||||
stp q8, q9, [sp, #-32]!
|
||||
stp q10, q11, [sp, #-32]!
|
||||
stp q12, q13, [sp, #-32]!
|
||||
stp q14, q15, [sp, #-32]!
|
||||
stp X8, X9, [sp, #-16]!
|
||||
stp X10, X11, [sp, #-16]!
|
||||
stp X12, X13, [sp, #-16]!
|
||||
stp X14, X15, [sp, #-16]!
|
||||
stp X16, X17, [sp, #-16]!
|
||||
stp X29, X30, [sp, #-16]!
|
||||
.endm
|
||||
.macro pop_v_regs
|
||||
ldp X29, X30, [sp], #16
|
||||
ldp X16, X17, [sp], #16
|
||||
ldp X14, X15, [sp], #16
|
||||
ldp X12, X13, [sp], #16
|
||||
ldp X10, X11, [sp], #16
|
||||
ldp X8, X9, [sp], #16
|
||||
ldp q14, q15, [sp], #32
|
||||
ldp q12, q13, [sp], #32
|
||||
ldp q10, q11, [sp], #32
|
||||
ldp q8, q9, [sp], #32
|
||||
.endm
|
||||
.text
|
||||
.global ixheaacd_neg_shift_spec_armv8
|
||||
ixheaacd_neg_shift_spec_armv8:
|
||||
push_v_regs
|
||||
MOV X5, #448
|
||||
SUB X6, X5, #1
|
||||
LSL X6, X6, #2
|
||||
ADD X6, X6, X0
|
||||
MOV X8, #-16
|
||||
SUB X6, X6, #12
|
||||
LSL X7, X3, #1
|
||||
DUP V31.4S, W2
|
||||
MOV W4, #0x8000
|
||||
DUP V30.4S, W4
|
||||
|
||||
LD1 {V0.4S}, [X6], X8
|
||||
SQNEG V0.4S, V0.4S
|
||||
|
||||
LD1 {V6.4S}, [X6], X8
|
||||
SQSHL V25.4S, V0.4S, V31.4S
|
||||
SQADD V24.4S, V25.4S, V30.4S
|
||||
SSHR V23.4S, V24.4S, #16
|
||||
REV64 V23.4S, V23.4S
|
||||
SUB X5, X5, #8
|
||||
|
||||
UZP1 V27.8H, V23.8H, V23.8H
|
||||
SQNEG V29.4S, V6.4S
|
||||
|
||||
LOOP_1:
|
||||
|
||||
ST1 {V27.H}[2], [X1], X7
|
||||
SQSHL V22.4S, V29.4S, V31.4S
|
||||
LD1 {V0.4S}, [X6], X8
|
||||
ST1 {V27.H}[3], [X1], X7
|
||||
SQADD V21.4S, V22.4S, V30.4S
|
||||
ST1 {V27.H}[0], [X1], X7
|
||||
SQNEG V0.4S, V0.4S
|
||||
ST1 {V27.H}[1], [X1], X7
|
||||
SSHR V20.4S, V21.4S, #16
|
||||
REV64 V20.4S, V20.4S
|
||||
SUBS X5, X5, #8
|
||||
|
||||
|
||||
UZP1 V27.8H, V20.8H, V20.8H
|
||||
SQSHL V25.4S, V0.4S, V31.4S
|
||||
ST1 {V27.H}[2], [X1], X7
|
||||
LD1 {V6.4S}, [X6], X8
|
||||
SQADD V24.4S, V25.4S, V30.4S
|
||||
ST1 {V27.H}[3], [X1], X7
|
||||
SSHR V23.4S, V24.4S, #16
|
||||
ST1 {V27.H}[0], [X1], X7
|
||||
REV64 V23.4S, V23.4S
|
||||
ST1 {V27.H}[1], [X1], X7
|
||||
|
||||
|
||||
UZP1 V27.8H, V23.8H, V23.8H
|
||||
SQNEG V29.4S, V6.4S
|
||||
|
||||
BGT LOOP_1
|
||||
|
||||
ST1 {V27.H}[2], [X1], X7
|
||||
SQSHL V22.4S, V29.4S, V31.4S
|
||||
ST1 {V27.H}[3], [X1], X7
|
||||
ST1 {V27.H}[0], [X1], X7
|
||||
SQADD V21.4S, V22.4S, V30.4S
|
||||
ST1 {V27.H}[1], [X1], X7
|
||||
SSHR V20.4S, V21.4S, #16
|
||||
|
||||
REV64 V20.4S, V20.4S
|
||||
|
||||
UZP1 V27.8H, V20.8H, V20.8H
|
||||
|
||||
ST1 {V27.H}[2], [X1], X7
|
||||
ST1 {V27.H}[3], [X1], X7
|
||||
ST1 {V27.H}[0], [X1], X7
|
||||
ST1 {V27.H}[1], [X1], X7
|
||||
pop_v_regs
|
||||
RET
|
||||
333
decoder/armv8/ixheaacd_overlap_add1.s
Normal file
333
decoder/armv8/ixheaacd_overlap_add1.s
Normal file
|
|
@ -0,0 +1,333 @@
|
|||
///******************************************************************************
|
||||
// *
|
||||
// * Copyright (C) 2018 The Android Open Source Project
|
||||
// *
|
||||
// * Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// * you may not use this file except in compliance with the License.
|
||||
// * You may obtain a copy of the License at:
|
||||
// *
|
||||
// * http://www.apache.org/licenses/LICENSE-2.0
|
||||
// *
|
||||
// * Unless required by applicable law or agreed to in writing, software
|
||||
// * distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// * See the License for the specific language governing permissions and
|
||||
// * limitations under the License.
|
||||
// *
|
||||
// *****************************************************************************
|
||||
// * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
//*/
|
||||
|
||||
.macro push_v_regs
|
||||
stp q8, q9, [sp, #-32]!
|
||||
stp q10, q11, [sp, #-32]!
|
||||
stp q12, q13, [sp, #-32]!
|
||||
stp q14, q15, [sp, #-32]!
|
||||
stp X8, X9, [sp, #-16]!
|
||||
stp X10, X11, [sp, #-16]!
|
||||
stp X12, X13, [sp, #-16]!
|
||||
stp X14, X15, [sp, #-16]!
|
||||
stp X16, X17, [sp, #-16]!
|
||||
stp X29, X30, [sp, #-16]!
|
||||
.endm
|
||||
.macro pop_v_regs
|
||||
ldp X29, X30, [sp], #16
|
||||
ldp X16, X17, [sp], #16
|
||||
ldp X14, X15, [sp], #16
|
||||
ldp X12, X13, [sp], #16
|
||||
ldp X10, X11, [sp], #16
|
||||
ldp X8, X9, [sp], #16
|
||||
ldp q14, q15, [sp], #32
|
||||
ldp q12, q13, [sp], #32
|
||||
ldp q10, q11, [sp], #32
|
||||
ldp q8, q9, [sp], #32
|
||||
.endm
|
||||
|
||||
.text
|
||||
.global ixheaacd_over_lap_add1_armv8
|
||||
ixheaacd_over_lap_add1_armv8:
|
||||
push_v_regs
|
||||
LSL X10, X5, #1
|
||||
SUB X11, X10, #1
|
||||
LSL X10, X11, #2
|
||||
ADD X10, X0, X10
|
||||
SUB X10, X10, #12
|
||||
LSL X8, X11, #1
|
||||
ADD X8, X8, X3
|
||||
SUB X8, X8, #14
|
||||
MOV X12, #-16
|
||||
DUP V11.8H, W4
|
||||
LD1 {V3.4S}, [X10], X12
|
||||
MOV W7, #0x2000
|
||||
|
||||
NEG W7, W7
|
||||
SQNEG V0.4S, V3.4S
|
||||
DUP V10.4S, W7
|
||||
UZP1 V31.8H, V0.8H, V0.8H
|
||||
UZP2 V30.8H, V0.8H, V0.8H
|
||||
REV64 V31.8h, V31.8h
|
||||
REV64 V30.8h, V30.8h
|
||||
SUB X11, X5, #1
|
||||
UZP1 V7.8H, V3.8H, V3.8H
|
||||
UZP2 V6.8H, V3.8H, V3.8H
|
||||
REV64 V7.8H, V7.8H
|
||||
REV64 V6.8H, V6.8H
|
||||
MOV V16.S[0], W6
|
||||
MOV V17.S[0], W11
|
||||
SMULL V17.4S, V16.4H, V17.4H
|
||||
MOV W11, V17.S[0]
|
||||
LSL X11, X11, #1
|
||||
|
||||
LD2 {V2.4H, V3.4H}, [X8], X12
|
||||
ADD X11, X11, X2
|
||||
REV64 V2.4H, V2.4H
|
||||
REV64 V3.4H, V3.4H
|
||||
LSL X4, X6, #1
|
||||
NEG X4, X4
|
||||
LSL X9, X6, #1
|
||||
MOV V16.S[0], W5
|
||||
MOV V17.S[0], W6
|
||||
SMULL V17.4S, V16.4H, V17.4H
|
||||
MOV W6, V17.S[0]
|
||||
LSL W6, W6, #1
|
||||
ADD X6, X6, X2
|
||||
|
||||
UMULL V15.4S, V7.4H, V2.4H
|
||||
LD1 {V4.4S}, [X1], #16
|
||||
USHR V15.4S, V15.4S, #16
|
||||
|
||||
SMLAL V15.4S, V6.4H, V2.4H
|
||||
SQSHL V15.4S, V15.4S, V11.4S
|
||||
SSHLL V27.4S, V3.4H, #0
|
||||
SMULL V28.2D, V27.2S, V4.2S
|
||||
SMULL2 V29.2D, V27.4S, V4.4S
|
||||
SQXTN V28.2S, V28.2D
|
||||
SQXTN2 V28.4S, V29.2D
|
||||
MOV V14.16B, V28.16B
|
||||
|
||||
SQADD V14.4S, V14.4S, V10.4S
|
||||
SQSUB V13.4S, V15.4S, V14.4S
|
||||
SQSHL V13.4S, V13.4S, #2
|
||||
SSHR V13.4S, V13.4S, #16
|
||||
UZP1 V26.8H, V13.8H, V13.8H
|
||||
|
||||
UMULL V12.4S, V31.4H, V3.4H
|
||||
USHR V12.4S, V12.4S, #16
|
||||
SMLAL V12.4S, V30.4H, V3.4H
|
||||
SQSHL V12.4S, V12.4S, V11.4S
|
||||
LD1 {V3.4S}, [X10], X12
|
||||
|
||||
SSHLL V27.4S, V2.4H, #0
|
||||
SMULL V28.2D, V27.2S, V4.2S
|
||||
SMULL2 V29.2D, V27.4S, V4.4S
|
||||
SQXTN V28.2S, V28.2D
|
||||
SQXTN2 V28.4S, V29.2D
|
||||
MOV V8.16B, V28.16B
|
||||
|
||||
SQADD V8.4S, V8.4S, V10.4S
|
||||
|
||||
SQNEG V0.4S, V3.4S
|
||||
UZP1 V1.8H, V0.8H, V0.8H
|
||||
UZP2 V0.8H, V0.8H, V0.8H
|
||||
REV64 V1.8h, V1.8h
|
||||
REV64 V0.8h, V0.8h
|
||||
SQSUB V9.4S, V12.4S, V8.4S
|
||||
UZP1 V7.8H, V3.8H, V3.8H
|
||||
UZP2 V6.8H, V3.8H, V3.8H
|
||||
REV64 V7.8h, V7.8h
|
||||
REV64 V6.8h, V6.8h
|
||||
SQSHL V9.4S, V9.4S, #2
|
||||
LD2 {V2.4H, V3.4H}, [X8], X12
|
||||
SSHR V9.4S, V9.4S, #16
|
||||
REV64 V2.4H, V2.4H
|
||||
REV64 V3.4H, V3.4H
|
||||
UZP1 V18.8H, V9.8H, V9.8H
|
||||
|
||||
LD1 {V4.4S}, [X1], #16
|
||||
SUB W5, W5, #8
|
||||
|
||||
|
||||
LOOP_1:
|
||||
|
||||
ST1 {V26.H}[0], [X11], X4
|
||||
UMULL V15.4S, V7.4H, V2.4H
|
||||
ST1 {V26.H}[1], [X11], X4
|
||||
UMULL V12.4S, V1.4H, V3.4H
|
||||
ST1 {V26.H}[2], [X11], X4
|
||||
USHR V15.4S, V15.4S, #16
|
||||
ST1 {V26.H}[3], [X11], X4
|
||||
USHR V12.4S, V12.4S, #16
|
||||
ST1 {V18.H}[0], [X6], X9
|
||||
SMLAL V15.4S, V6.4H, V2.4H
|
||||
ST1 {V18.H}[1], [X6], X9
|
||||
SMLAL V12.4S, V0.4H, V3.4H
|
||||
ST1 {V18.H}[2], [X6], X9
|
||||
SQSHL V15.4S, V15.4S, V11.4S
|
||||
ST1 {V18.H}[3], [X6], X9
|
||||
SQSHL V12.4S, V12.4S, V11.4S
|
||||
LD1 {V6.4S}, [X10], X12
|
||||
|
||||
SSHLL V27.4S, V3.4H, #0
|
||||
SMULL V28.2D, V27.2S, V4.2S
|
||||
SMULL2 V29.2D, V27.4S, V4.4S
|
||||
SQXTN V28.2S, V28.2D
|
||||
SQXTN2 V28.4S, V29.2D
|
||||
MOV V14.16B, V28.16B
|
||||
|
||||
SSHLL V27.4S, V2.4H, #0
|
||||
SMULL V28.2D, V27.2S, V4.2S
|
||||
SMULL2 V29.2D, V27.4S, V4.4S
|
||||
SQXTN V28.2S, V28.2D
|
||||
SQXTN2 V28.4S, V29.2D
|
||||
MOV V8.16B, V28.16B
|
||||
|
||||
LD2 {V2.4H, V3.4H}, [X8], X12
|
||||
|
||||
SQNEG V0.4S, V6.4S
|
||||
|
||||
LD1 {V4.4S}, [X1], #16
|
||||
|
||||
SQADD V14.4S, V14.4S, V10.4S
|
||||
UZP1 V1.8H, V0.8H, V0.8H
|
||||
UZP2 V0.8H, V0.8H, V0.8H
|
||||
REV64 V1.8h, V1.8h
|
||||
REV64 V0.8h, V0.8h
|
||||
SQADD V8.4S, V8.4S, V10.4S
|
||||
UZP1 V7.8H, V6.8H, V6.8H
|
||||
UZP2 V6.8H, V6.8H, V6.8H
|
||||
REV64 V7.8h, V7.8h
|
||||
REV64 V6.8h, V6.8h
|
||||
SQSUB V13.4S, V15.4S, V14.4S
|
||||
REV64 V2.4H, V2.4H
|
||||
REV64 V3.4H, V3.4H
|
||||
SQSUB V9.4S, V12.4S, V8.4S
|
||||
SQSHL V13.4S, V13.4S, #2
|
||||
SQSHL V9.4S, V9.4S, #2
|
||||
UMULL V15.4S, V7.4H, V2.4H
|
||||
SSHR V13.4S, V13.4S, #16
|
||||
UZP1 V26.8H, V13.8H, V13.8H
|
||||
SSHR V9.4S, V9.4S, #16
|
||||
ST1 {V26.H}[0], [X11], X4
|
||||
UMULL V12.4S, V1.4H, V3.4H
|
||||
UZP1 V18.8H, V9.8H, V9.8H
|
||||
USHR V15.4S, V15.4S, #16
|
||||
ST1 {V26.H}[1], [X11], X4
|
||||
SMLAL V15.4S, V6.4H, V2.4H
|
||||
ST1 {V26.H}[2], [X11], X4
|
||||
USHR V12.4S, V12.4S, #16
|
||||
ST1 {V26.H}[3], [X11], X4
|
||||
SMLAL V12.4S, V0.4H, V3.4H
|
||||
ST1 {V18.H}[0], [X6], X9
|
||||
SQSHL V15.4S, V15.4S, V11.4S
|
||||
ST1 {V18.H}[1], [X6], X9
|
||||
SQSHL V12.4S, V12.4S, V11.4S
|
||||
ST1 {V18.H}[2], [X6], X9
|
||||
|
||||
SSHLL V27.4S, V3.4H, #0
|
||||
SMULL V28.2D, V27.2S, V4.2S
|
||||
SMULL2 V29.2D, V27.4S, V4.4S
|
||||
SQXTN V28.2S, V28.2D
|
||||
SQXTN2 V28.4S, V29.2D
|
||||
MOV V14.16B, V28.16B
|
||||
|
||||
ST1 {V18.H}[3], [X6], X9
|
||||
|
||||
|
||||
SSHLL V27.4S, V2.4H, #0
|
||||
SMULL V28.2D, V27.2S, V4.2S
|
||||
SMULL2 V29.2D, V27.4S, V4.4S
|
||||
SQXTN V28.2S, V28.2D
|
||||
SQXTN2 V28.4S, V29.2D
|
||||
MOV V8.16B, V28.16B
|
||||
|
||||
LD1 {V3.4S}, [X10], X12
|
||||
SQADD V14.4S, V14.4S, V10.4S
|
||||
|
||||
SQNEG V0.4S, V3.4S
|
||||
UZP1 V1.8H, V0.8H, V0.8H
|
||||
UZP2 V0.8H, V0.8H, V0.8H
|
||||
REV64 V1.8H, V1.8H
|
||||
REV64 V0.8H, V0.8H
|
||||
SQSUB V13.4S, V15.4S, V14.4S
|
||||
UZP1 V7.8H, V3.8H, V3.8H
|
||||
UZP2 V6.8H, V3.8H, V3.8H
|
||||
REV64 V7.8H, V7.8H
|
||||
REV64 V6.8H, V6.8H
|
||||
SQADD V8.4S, V8.4S, V10.4S
|
||||
LD2 {V2.4H, V3.4H}, [X8], X12
|
||||
SQSUB V9.4S, V12.4S, V8.4S
|
||||
REV64 V2.4H, V2.4H
|
||||
REV64 V3.4H, V3.4H
|
||||
SQSHL V13.4S, V13.4S, #2
|
||||
LD1 {V4.4S}, [X1], #16
|
||||
|
||||
SQSHL V9.4S, V9.4S, #2
|
||||
SSHR V13.4S, V13.4S, #16
|
||||
SUBS X5, X5, #8
|
||||
SSHR V9.4S, V9.4S, #16
|
||||
UZP1 V26.8H, V13.8H, V13.8H
|
||||
UZP1 V18.8H, V9.8H, V9.8H
|
||||
|
||||
BGT LOOP_1
|
||||
|
||||
ST1 {V26.H}[0], [X11], X4
|
||||
UMULL V15.4S, V7.4H, V2.4H
|
||||
ST1 {V26.H}[1], [X11], X4
|
||||
UMULL V12.4s, V1.4H, V3.4H
|
||||
ST1 {V26.H}[2], [X11], X4
|
||||
USHR V15.4S, V15.4S, #16
|
||||
ST1 {V26.H}[3], [X11], X4
|
||||
USHR V12.4S, V12.4S, #16
|
||||
|
||||
ST1 {V18.H}[0], [X6], X9
|
||||
SMLAL V15.4S, V6.4H, V2.4H
|
||||
ST1 {V18.H}[1], [X6], X9
|
||||
SMLAL V12.4S, V0.4H, V3.4H
|
||||
ST1 {V18.H}[2], [X6], X9
|
||||
SQSHL V15.4S, V15.4S, V11.4S
|
||||
ST1 {V18.H}[3], [X6], X9
|
||||
SQSHL V12.4S, V12.4S, V11.4S
|
||||
|
||||
|
||||
SSHLL V27.4S, V3.4H, #0
|
||||
SMULL V28.2D, V27.2S, V4.2S
|
||||
SMULL2 V29.2D, V27.4S, V4.4S
|
||||
SQXTN V28.2S, V28.2D
|
||||
SQXTN2 V28.4S, V29.2D
|
||||
MOV V14.16B, V28.16B
|
||||
|
||||
SSHLL V27.4S, V2.4H, #0
|
||||
SMULL V28.2D, V27.2S, V4.2S
|
||||
SMULL2 V29.2D, V27.4S, V4.4S
|
||||
SQXTN V28.2S, V28.2D
|
||||
SQXTN2 V28.4S, V29.2D
|
||||
MOV V8.16B, V28.16B
|
||||
|
||||
SQADD V14.4S, V14.4S, V10.4S
|
||||
SQADD V8.4S, V8.4S, V10.4S
|
||||
SQSUB V13.4S, V15.4S, V14.4S
|
||||
SQSUB V9.4S, V12.4S, V8.4S
|
||||
SQSHL V13.4S, V13.4S, #2
|
||||
SQSHL V9.4S, V9.4S, #2
|
||||
SSHR V13.4S, V13.4S, #16
|
||||
SSHR V9.4S, V9.4S, #16
|
||||
UZP1 V26.8H, V13.8H, V13.8H
|
||||
|
||||
UZP1 V18.8H, V9.8H, V9.8H
|
||||
|
||||
|
||||
ST1 {V26.H}[0], [X11], X4
|
||||
ST1 {V26.H}[1], [X11], X4
|
||||
ST1 {V26.H}[2], [X11], X4
|
||||
ST1 {V26.H}[3], [X11], X4
|
||||
|
||||
ST1 {V18.H}[0], [X6], X9
|
||||
ST1 {V18.H}[1], [X6], X9
|
||||
ST1 {V18.H}[2], [X6], X9
|
||||
ST1 {V18.H}[3], [X6], X9
|
||||
pop_v_regs
|
||||
RET
|
||||
|
||||
|
||||
|
||||
|
||||
305
decoder/armv8/ixheaacd_overlap_add2.s
Normal file
305
decoder/armv8/ixheaacd_overlap_add2.s
Normal file
|
|
@ -0,0 +1,305 @@
|
|||
///******************************************************************************
|
||||
// *
|
||||
// * Copyright (C) 2018 The Android Open Source Project
|
||||
// *
|
||||
// * Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// * you may not use this file except in compliance with the License.
|
||||
// * You may obtain a copy of the License at:
|
||||
// *
|
||||
// * http://www.apache.org/licenses/LICENSE-2.0
|
||||
// *
|
||||
// * Unless required by applicable law or agreed to in writing, software
|
||||
// * distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// * See the License for the specific language governing permissions and
|
||||
// * limitations under the License.
|
||||
// *
|
||||
// *****************************************************************************
|
||||
// * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
//*/
|
||||
|
||||
|
||||
.macro push_v_regs
|
||||
stp q8, q9, [sp, #-32]!
|
||||
stp q10, q11, [sp, #-32]!
|
||||
stp q12, q13, [sp, #-32]!
|
||||
stp q14, q15, [sp, #-32]!
|
||||
stp X8, X9, [sp, #-16]!
|
||||
stp X10, X11, [sp, #-16]!
|
||||
stp X12, X13, [sp, #-16]!
|
||||
stp X14, X15, [sp, #-16]!
|
||||
stp X16, X17, [sp, #-16]!
|
||||
stp X29, X30, [sp, #-16]!
|
||||
.endm
|
||||
.macro pop_v_regs
|
||||
ldp X29, X30, [sp], #16
|
||||
ldp X16, X17, [sp], #16
|
||||
ldp X14, X15, [sp], #16
|
||||
ldp X12, X13, [sp], #16
|
||||
ldp X10, X11, [sp], #16
|
||||
ldp X8, X9, [sp], #16
|
||||
ldp q14, q15, [sp], #32
|
||||
ldp q12, q13, [sp], #32
|
||||
ldp q10, q11, [sp], #32
|
||||
ldp q8, q9, [sp], #32
|
||||
.endm
|
||||
.text
|
||||
.global ixheaacd_over_lap_add2_armv8
|
||||
|
||||
|
||||
ixheaacd_over_lap_add2_armv8:
|
||||
push_v_regs
|
||||
MOV X8, X5
|
||||
SUB X12, X5, #1
|
||||
LSL X9, X5, #2
|
||||
LSL X12, X12, #2
|
||||
ADD X10, X0, X9
|
||||
ADD X7, X1, X12
|
||||
ADD X4, X4, #1
|
||||
LD2 {V0.4H, V1.4H}, [X10], #16
|
||||
LSL X11, X6, #2
|
||||
SUB X7, X7, #12
|
||||
SUB X4, X4, #16
|
||||
MOV X12, #-16
|
||||
MOV X13, #1
|
||||
ADD X14, X4, #1
|
||||
NEG X14, X14
|
||||
DUP V21.4S, W4
|
||||
LD2 {V6.4H, V7.4H}, [X7], X12
|
||||
LSL X4, X13, X14
|
||||
REV64 V4.4H, V6.4H
|
||||
DUP V20.4S, W4
|
||||
REV64 V5.4H, V7.4H
|
||||
MOV X4, X3
|
||||
|
||||
MOV X9, X2
|
||||
LD2 {V2.4H, V3.4H}, [X3], #16
|
||||
|
||||
UMULL V23.4S, V0.4H, V2.4H
|
||||
UMLSL V23.4S, V4.4H, V3.4H
|
||||
LD2 {V8.4H, V9.4H}, [X10], #16
|
||||
SSHR V23.4S, V23.4S, #16
|
||||
LD2 {V10.4H, V11.4H}, [X3], #16
|
||||
SMLAL V23.4S, V1.4H, V2.4H
|
||||
SMLSL V23.4S, V5.4H, V3.4H
|
||||
LD2 {V14.4H, V15.4H}, [X7], X12
|
||||
REV64 V12.4H, V14.4H
|
||||
REV64 V13.4H, V15.4H
|
||||
SQADD V22.4S, V23.4S, V20.4S
|
||||
SSHL V22.4S, V22.4S, V21.4S
|
||||
MOV V24.16B, V22.16B
|
||||
SUB X8, X8, #8
|
||||
|
||||
LOOP_1:
|
||||
|
||||
LD2 {V0.4H, V1.4H}, [X10], #16
|
||||
UMULL V19.4S, V8.4H, V10.4H
|
||||
LD2 {V2.4H, V3.4H}, [X3], #16
|
||||
UMLSL V19.4S, V12.4H, V11.4H
|
||||
LD2 {V6.4H, V7.4H}, [X7], X12
|
||||
UMULL V23.4S, V0.4H, V2.4H
|
||||
REV64 V4.4H, V6.4H
|
||||
UMLSL V23.4S, V4.4H, V3.4H
|
||||
REV64 V5.4H, V7.4H
|
||||
SSHR V19.4S, V19.4S, #16
|
||||
ST1 {V24.S}[0], [X2], X11
|
||||
SMLAL V19.4S, V9.4H, V10.4H
|
||||
ST1 {V24.S}[1], [X2], X11
|
||||
SSHR V23.4S, V23.4S, #16
|
||||
ST1 {V24.S}[2], [X2], X11
|
||||
SMLAL V23.4S, V1.4H, V2.4H
|
||||
|
||||
ST1 {V24.S}[3], [X2], X11
|
||||
SMLSL V19.4S, V13.4H, V11.4H
|
||||
SMLSL V23.4S, V5.4H, V3.4H
|
||||
|
||||
LD2 {V8.4H, V9.4H}, [X10], #16
|
||||
LD2 {V10.4H, V11.4H}, [X3], #16
|
||||
|
||||
|
||||
LD2 {V14.4H, V15.4H}, [X7], X12
|
||||
SQADD V18.4S, V19.4S, V20.4S
|
||||
REV64 V12.4H, V14.4H
|
||||
REV64 V13.4H, V15.4H
|
||||
SQADD V22.4S, V23.4S, V20.4S
|
||||
SSHL V18.4S, V18.4S, V21.4S
|
||||
MOV V16.16B, V18.16B
|
||||
ST1 {V16.S}[0], [X2], X11
|
||||
SSHL V22.4S, V22.4S, V21.4S
|
||||
|
||||
|
||||
MOV V24.16B, V22.16B
|
||||
SUBS X8, X8, #8
|
||||
|
||||
ST1 {V16.S}[1], [X2], X11
|
||||
ST1 {V16.S}[2], [X2], X11
|
||||
ST1 {V16.S}[3], [X2], X11
|
||||
|
||||
|
||||
BGT LOOP_1
|
||||
|
||||
|
||||
ST1 {V24.S}[0], [X2], X11
|
||||
UMULL V19.4S, V8.4H, V10.4H
|
||||
UMLSL V19.4S, V12.4H, V11.4H
|
||||
ST1 {V24.S}[1], [X2], X11
|
||||
ST1 {V24.S}[2], [X2], X11
|
||||
SSHR V19.4S, V19.4S, #16
|
||||
ST1 {V24.S}[3], [X2], X11
|
||||
SMLAL V19.4S, V9.4H, V10.4H
|
||||
SMLSL V19.4S, V13.4H, V11.4H
|
||||
MOV X12, #12
|
||||
MOV V30.S[0], W5
|
||||
MOV V31.S[0], W6
|
||||
SMULL V29.4S, V30.4H, V31.4H
|
||||
MOV W7, V29.S[0]
|
||||
|
||||
LSL W10, W5, #1
|
||||
SQADD V18.4S, V19.4S, V20.4S
|
||||
SSHL V18.4S, V18.4S, V21.4S
|
||||
MOV V16.16B, V18.16B
|
||||
|
||||
ST1 {V16.S}[0], [X2], X11
|
||||
LSL X7, X7, #2
|
||||
|
||||
ST1 {V16.S}[1], [X2], X11
|
||||
ADD X7, X7, X9
|
||||
|
||||
ST1 {V16.S}[2], [X2], X11
|
||||
ST1 {V16.S}[3], [X2], X11
|
||||
|
||||
SUB X11, X10, #1
|
||||
LSL X10, X11, #2
|
||||
ADD X10, X0, X10
|
||||
LSL X11, X11, #1
|
||||
SUB X10, X10, X12
|
||||
LSL X8, X6, #2
|
||||
MOV X12, #-16
|
||||
ADD X11, X11, X4
|
||||
|
||||
LD1 {V6.4S}, [X10], X12
|
||||
SUB X11, X11, #14
|
||||
|
||||
|
||||
REV64 V0.4S, V6.4S
|
||||
SQNEG V0.4S, V0.4S
|
||||
|
||||
|
||||
UZP1 V1.8H, V0.8H, V0.8H
|
||||
UZP2 V0.8H, V0.8H, V0.8H
|
||||
REV64 V1.4S, V1.4S
|
||||
REV64 V0.4S, V0.4S
|
||||
LD2 {V2.4H, V3.4H}, [X11], X12
|
||||
REV64 V2.4H, V2.4H
|
||||
REV64 V3.4H, V3.4H
|
||||
|
||||
LD2 {V4.4H, V5.4H}, [X1], #16
|
||||
|
||||
UMULL V23.4S, V1.4H, V3.4H
|
||||
UMLSL V23.4S, V4.4H, V2.4H
|
||||
SSHR V23.4S, V23.4S, #16
|
||||
SMLAL V23.4S, V0.4H, V3.4H
|
||||
SMLSL V23.4S, V5.4H, V2.4H
|
||||
SQADD V22.4S, V23.4S, V20.4S
|
||||
SSHL V22.4S, V22.4S, V21.4S
|
||||
MOV V24.16B, V22.16B
|
||||
|
||||
|
||||
LD1 {V14.4S}, [X10], X12
|
||||
UMULL V23.4S, V1.4H, V3.4H
|
||||
UMLSL V23.4S, V4.4H, V2.4H
|
||||
REV64 V8.4S, V14.4S
|
||||
SQNEG V8.4S, V8.4S
|
||||
LD2 {V10.4H, V11.4H}, [X11], X12
|
||||
SSHR V23.4S, V23.4S, #16
|
||||
LD2 {V12.4H, V13.4H}, [X1], #16
|
||||
SMLAL V23.4S, V0.4H, V3.4H
|
||||
SMLSL V23.4S, V5.4H, V2.4H
|
||||
UZP1 V9.8H, V8.8H, V8.8H
|
||||
UZP2 V8.8H, V8.8H, V8.8H
|
||||
rev64 v9.4s, v9.4s
|
||||
rev64 v8.4s, v8.4s
|
||||
REV64 V10.4H, V10.4H
|
||||
REV64 V11.4H, V11.4H
|
||||
SQADD V22.4S, V23.4S, V20.4S
|
||||
SUB X5, X5, #8
|
||||
SSHL V22.4S, V22.4S, V21.4S
|
||||
MOV V24.16B, V22.16B
|
||||
|
||||
|
||||
LOOP_2:
|
||||
|
||||
|
||||
LD1 {V6.4S}, [X10], X12
|
||||
UMULL V19.4S, V9.4H, V11.4H
|
||||
REV64 V0.4S, V6.4S
|
||||
SQNEG V0.4S, V0.4S
|
||||
UZP1 V1.8H, V0.8H, V0.8H
|
||||
UZP2 V0.8H, V0.8H, V0.8H
|
||||
REV64 V1.4S, V1.4S
|
||||
REV64 V0.4S, V0.4S
|
||||
LD2 {V2.4H, V3.4H}, [X11], X12
|
||||
REV64 V2.8H, V2.8H
|
||||
REV64 V3.8H, V3.8H
|
||||
|
||||
LD2 {V4.4H, V5.4H}, [X1], #16
|
||||
UMLSL V19.4S, V12.4H, V10.4H
|
||||
ST1 {V24.S}[0], [X7], X8
|
||||
UMULL V23.4S, V1.4H, V3.4H
|
||||
ST1 {V24.S}[1], [X7], X8
|
||||
SSHR V19.4S, V19.4S, #16
|
||||
ST1 {V24.S}[2], [X7], X8
|
||||
UMLSL V23.4S, V4.4H, V2.4H
|
||||
ST1 {V24.S}[3], [X7], X8
|
||||
SMLAL V19.4S, V8.4H, V11.4H
|
||||
LD1 {V14.4S}, [X10], X12
|
||||
SSHR V23.4S, V23.4S, #16
|
||||
SMLSL V19.4S, V13.4H, V10.4H
|
||||
LD2 {V10.4H, V11.4H}, [X11], X12
|
||||
SMLAL V23.4S, V0.4H, V3.4H
|
||||
SMLSL V23.4S, V5.4H, V2.4H
|
||||
REV64 V8.4S, V14.4S
|
||||
LD2 {V12.4H, V13.4H}, [X1], #16
|
||||
SQNEG V8.4S, V8.4S
|
||||
REV64 V11.4H, V11.4h
|
||||
REV64 V10.4H, V10.4H
|
||||
SQADD V18.4S, V19.4S, V20.4S
|
||||
UZP1 V9.8H, V8.8H, V8.8H
|
||||
UZP2 V8.8H, V8.8H, V8.8H
|
||||
rev64 v9.4s, v9.4s
|
||||
rev64 v8.4s, v8.4s
|
||||
SQADD V22.4S, V23.4S, V20.4S
|
||||
SSHL V18.4S, V18.4S, V21.4S
|
||||
SUBS X5, X5, #8
|
||||
MOV V16.16B, V18.16B
|
||||
ST1 {V16.S}[0], [X7], X8
|
||||
SSHL V22.4S, V22.4S, V21.4S
|
||||
ST1 {V16.S}[1], [X7], X8
|
||||
MOV V24.16B, V22.16B
|
||||
|
||||
ST1 {V16.S}[2], [X7], X8
|
||||
ST1 {V16.S}[3], [X7], X8
|
||||
|
||||
BGT LOOP_2
|
||||
|
||||
ST1 {V24.S}[0], [X7], X8
|
||||
UMULL V19.4S, V9.4H, V11.4H
|
||||
UMLSL V19.4S, V12.4H, V10.4H
|
||||
ST1 {V24.S}[1], [X7], X8
|
||||
ST1 {V24.S}[2], [X7], X8
|
||||
SSHR V19.4S, V19.4S, #16
|
||||
ST1 {V24.S}[3], [X7], X8
|
||||
|
||||
SMLAL V19.4S, V8.4H, V11.4H
|
||||
SMLSL V19.4S, V13.4H, V10.4H
|
||||
SQADD V18.4S, V19.4S, V20.4S
|
||||
SSHL V18.4S, V18.4S, V21.4S
|
||||
MOV V16.16B, V18.16B
|
||||
|
||||
ST1 {V16.S}[0], [X7], X8
|
||||
ST1 {V16.S}[1], [X7], X8
|
||||
ST1 {V16.S}[2], [X7], X8
|
||||
ST1 {V16.S}[3], [X7], X8
|
||||
|
||||
pop_v_regs
|
||||
RET
|
||||
713
decoder/armv8/ixheaacd_post_twiddle.s
Normal file
713
decoder/armv8/ixheaacd_post_twiddle.s
Normal file
|
|
@ -0,0 +1,713 @@
|
|||
///******************************************************************************
|
||||
// *
|
||||
// * Copyright (C) 2018 The Android Open Source Project
|
||||
// *
|
||||
// * Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// * you may not use this file except in compliance with the License.
|
||||
// * You may obtain a copy of the License at:
|
||||
// *
|
||||
// * http://www.apache.org/licenses/LICENSE-2.0
|
||||
// *
|
||||
// * Unless required by applicable law or agreed to in writing, software
|
||||
// * distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// * See the License for the specific language governing permissions and
|
||||
// * limitations under the License.
|
||||
// *
|
||||
// *****************************************************************************
|
||||
// * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
//*/
|
||||
|
||||
|
||||
.macro push_v_regs
|
||||
stp q8, q9, [sp, #-32]!
|
||||
stp q10, q11, [sp, #-32]!
|
||||
stp q12, q13, [sp, #-32]!
|
||||
stp q14, q15, [sp, #-32]!
|
||||
stp x21, x22, [sp, #-16]!
|
||||
stp x23, x24, [sp, #-16]!
|
||||
.endm
|
||||
.macro pop_v_regs
|
||||
ldp x23, x24, [sp], #16
|
||||
ldp x21, x22, [sp], #16
|
||||
ldp q14, q15, [sp], #32
|
||||
ldp q12, q13, [sp], #32
|
||||
ldp q10, q11, [sp], #32
|
||||
ldp q8, q9, [sp], #32
|
||||
.endm
|
||||
.macro swp reg1, reg2
|
||||
MOV X16, \reg1
|
||||
MOV \reg1, \reg2
|
||||
MOV \reg2, x16
|
||||
.endm
|
||||
.text
|
||||
.global ixheaacd_post_twiddle_armv8
|
||||
ixheaacd_post_twiddle_armv8:
|
||||
|
||||
|
||||
push_v_regs
|
||||
|
||||
ARM_PROLOGUE:
|
||||
CMP w3, #0x400
|
||||
LDR x21, =7500
|
||||
ADD x2, x2, x21
|
||||
BLT NEXT
|
||||
MOV w4, #50
|
||||
MOV w5, #-50
|
||||
MOV x6, #4
|
||||
dup v10.4h, w4
|
||||
B NEXT1
|
||||
|
||||
NEXT:
|
||||
MOV w4, #0x192
|
||||
MOV w5, #0xfe6e
|
||||
MOV x6, #32
|
||||
dup v10.4h, w4
|
||||
|
||||
NEXT1:
|
||||
LDR w9, [x2]
|
||||
LSL W22, W9, #16
|
||||
AND W21, W9, #0xFFFF0000
|
||||
|
||||
LDR w7, [x1], #4
|
||||
LDR w8, [x1], #4
|
||||
|
||||
ADD x2, x2, x6
|
||||
|
||||
|
||||
SMULL X11, w8, w21
|
||||
ASR X11, x11, #32
|
||||
SMULL X10, w8, w22
|
||||
ASR X10, x10, #32
|
||||
SMULL X12, w7, w21
|
||||
ASR X12, x12, #32
|
||||
SMULL X23, w7, w22
|
||||
ASR X23, x23, #32
|
||||
ADD w8, w11, w23
|
||||
|
||||
|
||||
SUB w10, w10, w12
|
||||
|
||||
MVN w8, w8
|
||||
ADD w8, w8, #1
|
||||
|
||||
|
||||
|
||||
LSL w21, w5, #16
|
||||
LSL w22, w4, #16
|
||||
SMULL X23, w10, w21
|
||||
ASR X23, x23, #32
|
||||
ADD w9, w8, w23
|
||||
SMULL X23, w8, w22
|
||||
ASR X23, x23, #32
|
||||
ADD w11, w10, w23
|
||||
|
||||
LSL x7, x3, #2
|
||||
ADD x7, x0, x7
|
||||
SUB x7, x7, #4
|
||||
|
||||
STR w11, [x7], #-4
|
||||
|
||||
STR w9, [x0], #4
|
||||
|
||||
LSL x5, x3, #2
|
||||
ADD x5, x1, x5
|
||||
SUB x5, x5, #40
|
||||
|
||||
|
||||
SUB w3, w3, #1
|
||||
ASR w3, w3, #4
|
||||
|
||||
|
||||
SUB x7, x7, #28
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
MOV x8, #-32
|
||||
|
||||
NEON_PROLOGUE:
|
||||
|
||||
LD4 {v0.4h, v1.4h, v2.4h, v3.4h}, [x5], x8
|
||||
|
||||
LD4 {v4.4h, v5.4h, v6.4h, v7.4h}, [x1], #32
|
||||
LD2 {v8.h, v9.h}[0], [x2], x6
|
||||
LD2 {v8.h, v9.h}[1], [x2], x6
|
||||
LD2 {v8.h, v9.h}[2], [x2], x6
|
||||
LD2 {v8.h, v9.h}[3], [x2], x6
|
||||
|
||||
rev64 v12.4h, v8.4h
|
||||
rev64 v13.4h, v9.4h
|
||||
|
||||
uMULL v30.4s, v2.4h, v13.4h
|
||||
uMULL v28.4s, v0.4h, v13.4h
|
||||
uMULL v26.4s, v2.4h, v12.4h
|
||||
uMULL v24.4s, v0.4h, v12.4h
|
||||
|
||||
ushR v30.4s, v30.4s, #16
|
||||
ushR v28.4s, v28.4s, #16
|
||||
ushR v26.4s, v26.4s, #16
|
||||
ushR v24.4s, v24.4s, #16
|
||||
|
||||
sMLAL v30.4s, v3.4h, v13.4h
|
||||
sMLAL v28.4s, v1.4h, v13.4h
|
||||
sMLAL v26.4s, v3.4h, v12.4h
|
||||
sMLAL v24.4s, v1.4h, v12.4h
|
||||
|
||||
uMULL v22.4s, v6.4h, v9.4h
|
||||
uMULL v20.4s, v4.4h, v9.4h
|
||||
|
||||
ADD v28.4s, v28.4s , v26.4s
|
||||
SUB v30.4s, v30.4s , v24.4s
|
||||
NEG v28.4s, v28.4s
|
||||
|
||||
uMULL v18.4s, v6.4h, v8.4h
|
||||
uMULL v16.4s, v4.4h, v8.4h
|
||||
|
||||
mov v31.8b, v30.8b
|
||||
mov v27.D[0], v30.D[1]
|
||||
ushR v22.4s, v22.4s, #16
|
||||
|
||||
mov v24.8b, v28.8b
|
||||
mov v25.D[0], v28.D[1]
|
||||
ushR v20.4s, v20.4s, #16
|
||||
|
||||
|
||||
UZP1 v26.4h, v31.4h, v27.4h
|
||||
UZP2 v27.4h, v31.4h, v27.4h
|
||||
ushR v18.4s, v18.4s, #16
|
||||
|
||||
|
||||
mov v31.8B , v24.8B
|
||||
UZP1 v24.4h, v31.4h, v25.4h
|
||||
UZP2 v25.4h, v31.4h, v25.4h
|
||||
ushR v16.4s, v16.4s, #16
|
||||
|
||||
|
||||
sMLAL v22.4s, v7.4h, v9.4h
|
||||
sMLAL v20.4s, v5.4h, v9.4h
|
||||
sMLAL v18.4s, v7.4h, v8.4h
|
||||
sMLAL v16.4s, v5.4h, v8.4h
|
||||
|
||||
LD2 {v8.h, v9.h}[0], [x2], x6
|
||||
uMULL v0.4s, v26.4h, v10.4h
|
||||
|
||||
LD2 {v8.h, v9.h}[1], [x2], x6
|
||||
uMULL v2.4s, v24.4h, v10.4h
|
||||
|
||||
|
||||
LD2 {v8.h, v9.h}[2], [x2], x6
|
||||
ADD v22.4s, v22.4s , v16.4s
|
||||
|
||||
LD2 {v8.h, v9.h}[3], [x2], x6
|
||||
SUB v20.4s, v18.4s , v20.4s
|
||||
|
||||
rev64 v12.4h, v8.4h
|
||||
rev64 v13.4h, v9.4h
|
||||
NEG v22.4s, v22.4s
|
||||
|
||||
|
||||
mov v18.8b, v22.8b
|
||||
mov v19.D[0], v22.D[1]
|
||||
ushR v0.4s, v0.4s, #16
|
||||
|
||||
mov v16.16b, v20.16b
|
||||
mov v17.D[0], v20.D[1]
|
||||
ushR v2.4s, v2.4s, #16
|
||||
|
||||
|
||||
MOV v31.8b, v18.8b
|
||||
UZP1 v18.4h, v31.4h, v19.4h
|
||||
UZP2 v19.4h, v31.4h, v19.4h
|
||||
sMLAL v0.4s, v27.4h, v10.4h
|
||||
|
||||
|
||||
MOV v31.8b, v16.8b
|
||||
UZP1 v16.4h, v31.4h, v17.4h
|
||||
UZP2 v17.4h, v31.4h, v17.4h
|
||||
sMLAL v2.4s, v25.4h, v10.4h
|
||||
|
||||
uMULL v4.4s, v18.4h, v10.4h
|
||||
uMULL v6.4s, v16.4h, v10.4h
|
||||
|
||||
NEG v0.4s, v0.4s
|
||||
ADD v14.4s, v30.4s , v2.4s
|
||||
ADD v26.4s, v28.4s , v0.4s
|
||||
|
||||
rev64 v14.4s, v14.4s
|
||||
ushR v4.4s, v4.4s, #16
|
||||
|
||||
swp v14.D[0], v14.D[1]
|
||||
ushR v6.4s, v6.4s, #16
|
||||
|
||||
sMLAL v4.4s, v19.4h, v10.4h
|
||||
LD4 {v0.4h, v1.4h, v2.4h, v3.4h}, [x5], x8
|
||||
sMLAL v6.4s, v17.4h, v10.4h
|
||||
|
||||
|
||||
SUB x3, x3, #2
|
||||
|
||||
ADD v24.4s, v20.4s , v4.4s
|
||||
|
||||
rev64 v24.4s, v24.4s
|
||||
NEG v16.4s, v6.4s
|
||||
|
||||
LD4 {v4.4h, v5.4h, v6.4h, v7.4h}, [x1], #32
|
||||
|
||||
swp v24.D[0], v24.D[1]
|
||||
ADD v16.4s, v22.4s , v16.4s
|
||||
|
||||
|
||||
|
||||
CORE_LOOP:
|
||||
uMULL v30.4s, v2.4h, v13.4h
|
||||
MOV v25.16B, v24.16B
|
||||
ST2 { v25.4s, v26.4s}, [x7], x8
|
||||
uMULL v28.4s, v0.4h, v13.4h
|
||||
|
||||
uMULL v26.4s, v2.4h, v12.4h
|
||||
MOV v15.16B, v14.16B
|
||||
ST2 { v15.4s, v16.4s}, [x0], #32
|
||||
uMULL v24.4s, v0.4h, v12.4h
|
||||
|
||||
ushR v30.4s, v30.4s, #16
|
||||
ushR v28.4s, v28.4s, #16
|
||||
ushR v26.4s, v26.4s, #16
|
||||
ushR v24.4s, v24.4s, #16
|
||||
|
||||
sMLAL v30.4s, v3.4h, v13.4h
|
||||
sMLAL v28.4s, v1.4h, v13.4h
|
||||
sMLAL v26.4s, v3.4h, v12.4h
|
||||
sMLAL v24.4s, v1.4h, v12.4h
|
||||
|
||||
uMULL v22.4s, v6.4h, v9.4h
|
||||
uMULL v20.4s, v4.4h, v9.4h
|
||||
|
||||
|
||||
ADD v28.4s, v28.4s , v26.4s
|
||||
SUB v30.4s, v30.4s , v24.4s
|
||||
NEG v28.4s, v28.4s
|
||||
|
||||
uMULL v18.4s, v6.4h, v8.4h
|
||||
uMULL v16.4s, v4.4h, v8.4h
|
||||
|
||||
|
||||
mov v26.8b, v30.8b
|
||||
mov v27.D[0], v30.D[1]
|
||||
ushR v22.4s, v22.4s, #16
|
||||
|
||||
|
||||
mov v24.8b, v28.8b
|
||||
mov v25.D[0], v28.D[1]
|
||||
ushR v20.4s, v20.4s, #16
|
||||
|
||||
|
||||
MOV v31.8b, v26.8b
|
||||
UZP1 v26.4h, v31.4h, v27.4h
|
||||
UZP2 v27.4h, v31.4h, v27.4h
|
||||
ushR v18.4s, v18.4s, #16
|
||||
|
||||
|
||||
MOV v31.8b, v24.8b
|
||||
UZP1 v24.4h, v31.4h, v25.4h
|
||||
UZP2 v25.4h, v31.4h, v25.4h
|
||||
ushR v16.4s, v16.4s, #16
|
||||
|
||||
|
||||
sMLAL v22.4s, v7.4h, v9.4h
|
||||
sMLAL v20.4s, v5.4h, v9.4h
|
||||
sMLAL v18.4s, v7.4h, v8.4h
|
||||
sMLAL v16.4s, v5.4h, v8.4h
|
||||
|
||||
LD2 {v8.h, v9.h}[0], [x2], x6
|
||||
uMULL v0.4s, v26.4h, v10.4h
|
||||
|
||||
LD2 {v8.h, v9.h}[1], [x2], x6
|
||||
uMULL v2.4s, v24.4h, v10.4h
|
||||
|
||||
LD2 {v8.h, v9.h}[2], [x2], x6
|
||||
ADD v22.4s, v22.4s , v16.4s
|
||||
|
||||
LD2 {v8.h, v9.h}[3], [x2], x6
|
||||
SUB v20.4s, v18.4s , v20.4s
|
||||
|
||||
rev64 v12.4h, v8.4h
|
||||
rev64 v13.4h, v9.4h
|
||||
NEG v22.4s, v22.4s
|
||||
|
||||
mov v18.8b, v22.8b
|
||||
mov v19.D[0], v22.D[1]
|
||||
ushR v0.4s, v0.4s, #16
|
||||
|
||||
mov v16.8b, v20.8b
|
||||
mov v17.D[0], v20.D[1]
|
||||
ushR v2.4s, v2.4s, #16
|
||||
|
||||
|
||||
MOV v31.8b, v18.8b
|
||||
UZP1 v18.4h, v31.4h, v19.4h
|
||||
UZP2 v19.4h, v31.4h, v19.4h
|
||||
sMLAL v0.4s, v27.4h, v10.4h
|
||||
|
||||
|
||||
MOV v31.8b, v16.8b
|
||||
UZP1 v16.4h, v31.4h, v17.4h
|
||||
UZP2 v17.4h, v31.4h, v17.4h
|
||||
sMLAL v2.4s, v25.4h, v10.4h
|
||||
|
||||
uMULL v4.4s, v18.4h, v10.4h
|
||||
uMULL v6.4s, v16.4h, v10.4h
|
||||
|
||||
NEG v0.4s, v0.4s
|
||||
ADD v14.4s, v30.4s , v2.4s
|
||||
ADD v26.4s, v28.4s , v0.4s
|
||||
|
||||
rev64 v14.4s, v14.4s
|
||||
ushR v4.4s, v4.4s, #16
|
||||
|
||||
swp v14.D[0], v14.D[1]
|
||||
ushR v6.4s, v6.4s, #16
|
||||
|
||||
sMLAL v4.4s, v19.4h, v10.4h
|
||||
LD4 {v0.4h, v1.4h, v2.4h, v3.4h}, [x5], x8
|
||||
sMLAL v6.4s, v17.4h, v10.4h
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
ADD v24.4s, v20.4s , v4.4s
|
||||
|
||||
rev64 v24.4s, v24.4s
|
||||
NEG v16.4s, v6.4s
|
||||
|
||||
LD4 {v4.4h, v5.4h, v6.4h, v7.4h}, [x1], #32
|
||||
|
||||
swp v24.D[0], v24.D[1]
|
||||
ADD v16.4s, v22.4s , v16.4s
|
||||
|
||||
SUBS x3, x3, #1
|
||||
|
||||
BNE CORE_LOOP
|
||||
|
||||
|
||||
|
||||
|
||||
NEON_EPILOGUE:
|
||||
uMULL v30.4s, v2.4h, v13.4h
|
||||
MOV v25.16B, v24.16B
|
||||
ST2 { v25.4s, v26.4s}, [x7], x8
|
||||
uMULL v28.4s, v0.4h, v13.4h
|
||||
|
||||
uMULL v26.4s, v2.4h, v12.4h
|
||||
MOV v15.16B, v14.16B
|
||||
ST2 { v15.4s, v16.4s}, [x0], #32
|
||||
uMULL v24.4s, v0.4h, v12.4h
|
||||
|
||||
|
||||
|
||||
ushR v30.4s, v30.4s, #16
|
||||
ushR v28.4s, v28.4s, #16
|
||||
ushR v26.4s, v26.4s, #16
|
||||
ushR v24.4s, v24.4s, #16
|
||||
|
||||
sMLAL v30.4s, v3.4h, v13.4h
|
||||
sMLAL v28.4s, v1.4h, v13.4h
|
||||
sMLAL v26.4s, v3.4h, v12.4h
|
||||
sMLAL v24.4s, v1.4h, v12.4h
|
||||
|
||||
|
||||
uMULL v22.4s, v6.4h, v9.4h
|
||||
uMULL v20.4s, v4.4h, v9.4h
|
||||
|
||||
|
||||
ADD v28.4s, v28.4s , v26.4s
|
||||
SUB v30.4s, v30.4s , v24.4s
|
||||
NEG v28.4s, v28.4s
|
||||
|
||||
uMULL v18.4s, v6.4h, v8.4h
|
||||
uMULL v16.4s, v4.4h, v8.4h
|
||||
|
||||
|
||||
mov v26.8b, v30.8b
|
||||
mov v27.D[0], v30.D[1]
|
||||
ushR v22.4s, v22.4s, #16
|
||||
|
||||
mov v24.16b, v28.16b
|
||||
mov v25.D[0], v28.D[1]
|
||||
ushR v20.4s, v20.4s, #16
|
||||
|
||||
|
||||
mov v31.8b, v26.8b
|
||||
UZP1 v26.4h, v31.4h, v27.4h
|
||||
UZP2 v27.4h, v31.4h, v27.4h
|
||||
ushR v18.4s, v18.4s, #16
|
||||
|
||||
|
||||
mov v31.8b, v24.8b
|
||||
UZP1 v24.4h, v31.4h, v25.4h
|
||||
UZP2 v25.4h, v31.4h, v25.4h
|
||||
ushR v16.4s, v16.4s, #16
|
||||
|
||||
|
||||
sMLAL v22.4s, v7.4h, v9.4h
|
||||
sMLAL v20.4s, v5.4h, v9.4h
|
||||
sMLAL v18.4s, v7.4h, v8.4h
|
||||
sMLAL v16.4s, v5.4h, v8.4h
|
||||
|
||||
|
||||
uMULL v0.4s, v26.4h, v10.4h
|
||||
|
||||
|
||||
uMULL v2.4s, v24.4h, v10.4h
|
||||
|
||||
|
||||
ADD v22.4s, v22.4s , v16.4s
|
||||
|
||||
|
||||
SUB v20.4s, v18.4s , v20.4s
|
||||
|
||||
|
||||
NEG v22.4s, v22.4s
|
||||
|
||||
|
||||
mov v18.16b, v22.16b
|
||||
ushR v0.4s, v0.4s, #16
|
||||
|
||||
mov v16.16b, v20.16b
|
||||
ushR v2.4s, v2.4s, #16
|
||||
|
||||
|
||||
mov v31.16b, v18.16b
|
||||
mov v19.d[0], v31.d[1]
|
||||
UZP1 v18.4h, v31.4h, v19.4h
|
||||
UZP2 v19.4h, v31.4h, v19.4h
|
||||
sMLAL v0.4s, v27.4h, v10.4h
|
||||
|
||||
|
||||
mov v31.16b, v16.16b
|
||||
mov v17.d[0], v31.d[1]
|
||||
UZP1 v16.4h, v31.4h, v17.4h
|
||||
UZP2 v17.4h, v31.4h, v17.4h
|
||||
sMLAL v2.4s, v25.4h, v10.4h
|
||||
|
||||
uMULL v4.4s, v18.4h, v10.4h
|
||||
uMULL v6.4s, v16.4h, v10.4h
|
||||
|
||||
NEG v0.4s, v0.4s
|
||||
ADD v14.4s, v30.4s , v2.4s
|
||||
ADD v26.4s, v28.4s , v0.4s
|
||||
|
||||
rev64 v14.4s, v14.4s
|
||||
ushR v4.4s, v4.4s, #16
|
||||
|
||||
swp v14.D[0], v14.D[1]
|
||||
ushR v6.4s, v6.4s, #16
|
||||
|
||||
sMLAL v4.4s, v19.4h, v10.4h
|
||||
|
||||
sMLAL v6.4s, v17.4h, v10.4h
|
||||
|
||||
|
||||
|
||||
|
||||
ADD v24.4s, v20.4s , v4.4s
|
||||
|
||||
rev64 v24.4s, v24.4s
|
||||
NEG v16.4s, v6.4s
|
||||
|
||||
|
||||
|
||||
swp v24.D[0], v24.D[1]
|
||||
ADD v16.4s, v22.4s , v16.4s
|
||||
|
||||
MOV v25.16B, v24.16B
|
||||
MOV v15.16B, v14.16B
|
||||
ST2 { v15.4s, v16.4s}, [x0], #32
|
||||
ST2 { v25.4s, v26.4s}, [x7], x8
|
||||
|
||||
|
||||
|
||||
|
||||
LD4 {v0.4h, v1.4h, v2.4h, v3.4h}, [x5], x8
|
||||
|
||||
movi v6.2s, #0x00000000
|
||||
movi v7.2s, #0x00000000
|
||||
|
||||
LD2 {v4.2s, v5.2s}, [x1], #16
|
||||
LD2 {v6.s, v7.s}[0], [x1]
|
||||
|
||||
LD2 {v8.h, v9.h}[0], [x2], x6
|
||||
LD2 {v8.h, v9.h}[1], [x2], x6
|
||||
LD2 {v8.h, v9.h}[2], [x2], x6
|
||||
LD2 {v8.h, v9.h}[3], [x2], x6
|
||||
|
||||
rev64 v12.8h, v8.8h
|
||||
rev64 v13.8h, v9.8h
|
||||
swp v5.D[0], v6.D[0]
|
||||
|
||||
|
||||
MOV v30.8B, V4.8B
|
||||
UZP1 v4.4h, v30.4h, v5.4h
|
||||
UZP2 v5.4h, v30.4h, v5.4h
|
||||
MOV v30.8B, V6.8B
|
||||
UZP1 v6.4h, v30.4h, v7.4h
|
||||
UZP2 v7.4h, v30.4h, v7.4h
|
||||
uMULL v30.4s, v2.4h, v13.4h
|
||||
uMULL v28.4s, v0.4h, v13.4h
|
||||
|
||||
uMULL v26.4s, v2.4h, v12.4h
|
||||
uMULL v24.4s, v0.4h, v12.4h
|
||||
|
||||
ushR v30.4s, v30.4s, #16
|
||||
ushR v28.4s, v28.4s, #16
|
||||
ushR v26.4s, v26.4s, #16
|
||||
ushR v24.4s, v24.4s, #16
|
||||
|
||||
sMLAL v30.4s, v3.4h, v13.4h
|
||||
sMLAL v28.4s, v1.4h, v13.4h
|
||||
sMLAL v26.4s, v3.4h, v12.4h
|
||||
sMLAL v24.4s, v1.4h, v12.4h
|
||||
|
||||
uMULL v22.4s, v6.4h, v9.4h
|
||||
uMULL v20.4s, v4.4h, v9.4h
|
||||
|
||||
|
||||
ADD v28.4s, v28.4s , v26.4s
|
||||
SUB v30.4s, v30.4s , v24.4s
|
||||
NEG v28.4s, v28.4s
|
||||
|
||||
uMULL v18.4s, v6.4h, v8.4h
|
||||
uMULL v16.4s, v4.4h, v8.4h
|
||||
|
||||
mov v26.8b, v30.8b
|
||||
mov v27.D[0], v30.D[1]
|
||||
ushR v22.4s, v22.4s, #16
|
||||
|
||||
mov v24.16b, v28.16b
|
||||
mov v25.D[0], v28.D[1]
|
||||
ushR v20.4s, v20.4s, #16
|
||||
|
||||
|
||||
MOV v31.8B, V26.8B
|
||||
UZP1 v26.4h, v31.4h, v27.4h
|
||||
UZP2 v27.4h, v31.4h, v27.4h
|
||||
ushr v18.4s, v18.4s, #16
|
||||
|
||||
MOV v31.8B, V24.8B
|
||||
UZP1 v24.4h, v31.4h, v25.4h
|
||||
UZP2 v25.4h, v31.4h, v25.4h
|
||||
ushR v16.4s, v16.4s, #16
|
||||
|
||||
sMLAL v22.4s, v7.4h, v9.4h
|
||||
sMLAL v20.4s, v5.4h, v9.4h
|
||||
sMLAL v18.4s, v7.4h, v8.4h
|
||||
sMLAL v16.4s, v5.4h, v8.4h
|
||||
|
||||
|
||||
uMULL v0.4s, v26.4h, v10.4h
|
||||
|
||||
|
||||
uMULL v2.4s, v24.4h, v10.4h
|
||||
|
||||
ADD v22.4s, v22.4s , v16.4s
|
||||
|
||||
|
||||
SUB v20.4s, v18.4s , v20.4s
|
||||
|
||||
|
||||
NEG v22.4s, v22.4s
|
||||
|
||||
|
||||
mov v18.8B, v22.8B
|
||||
mov v19.D[0], v22.D[1]
|
||||
ushR v0.4s, v0.4s, #16
|
||||
|
||||
mov v16.16b, v20.16b
|
||||
mov v17.D[0], v20.D[1]
|
||||
ushR v2.4s, v2.4s, #16
|
||||
|
||||
|
||||
MOV v31.8B, V18.8B
|
||||
UZP1 v18.4h, v31.4h, v19.4h
|
||||
UZP2 v19.4h, v31.4h, v19.4h
|
||||
sMLAL v0.4s, v27.4h, v10.4h
|
||||
|
||||
|
||||
MOV v31.8B, V16.8B
|
||||
UZP1 v16.4h, v31.4h, v17.4h
|
||||
UZP2 v17.4h, v31.4h, v17.4h
|
||||
sMLAL v2.4s, v25.4h, v10.4h
|
||||
|
||||
uMULL v4.4s, v18.4h, v10.4h
|
||||
uMULL v6.4s, v16.4h, v10.4h
|
||||
|
||||
NEG v0.4s, v0.4s
|
||||
ADD v14.4s, v30.4s , v2.4s
|
||||
ADD v26.4s, v28.4s , v0.4s
|
||||
|
||||
rev64 v14.4s, v14.4s
|
||||
ushR v4.4s, v4.4s, #16
|
||||
|
||||
swp v14.D[0], v14.D[1]
|
||||
ushR v6.4s, v6.4s, #16
|
||||
|
||||
sMLAL v4.4s, v19.4h, v10.4h
|
||||
|
||||
sMLAL v6.4s, v17.4h, v10.4h
|
||||
|
||||
|
||||
|
||||
|
||||
ADD v24.4s, v20.4s , v4.4s
|
||||
|
||||
rev64 v24.4s, v24.4s
|
||||
NEG v16.4s, v6.4s
|
||||
|
||||
swp v24.D[0], v24.D[1]
|
||||
ADD v16.4s, v22.4s , v16.4s
|
||||
|
||||
|
||||
MOV v15.16B, v14.16B
|
||||
ST2 {v15.2s, v16.2s}, [x0], #16
|
||||
|
||||
ST2 {v15.s, v16.s}[2], [x0], #8
|
||||
|
||||
ST1 {v15.s}[3], [x0]
|
||||
|
||||
ADD x7, x7, #4
|
||||
|
||||
ST1 {v26.s}[0], [x7], #4
|
||||
MOV v25.16B, v24.16B
|
||||
ST2 {v25.s, v26.s}[1], [x7], #8
|
||||
MOV v27.D[0], V26.d[1]
|
||||
mov v26.d[0], v25.d[1]
|
||||
ST2 {v26.2s, v27.2s}, [x7]
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
pop_v_regs
|
||||
ret
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
148
decoder/armv8/ixheaacd_postradixcompute4.s
Normal file
148
decoder/armv8/ixheaacd_postradixcompute4.s
Normal file
|
|
@ -0,0 +1,148 @@
|
|||
//.include "ihevc_neon_macros.s"
|
||||
.macro push_v_regs
|
||||
stp x8, x9, [sp, #-16]!
|
||||
stp x10, X11, [sp, #-16]!
|
||||
stp X12, X13, [sp, #-16]!
|
||||
stp X14, X15, [sp, #-16]!
|
||||
stp X29, X30, [sp, #-16]!
|
||||
.endm
|
||||
.macro pop_v_regs
|
||||
ldp X29, X30, [sp], #16
|
||||
ldp X14, X15, [sp], #16
|
||||
ldp X12, X13, [sp], #16
|
||||
ldp X10, X11, [sp], #16
|
||||
ldp X8, X9, [sp], #16
|
||||
.endm
|
||||
|
||||
.text
|
||||
.p2align 2
|
||||
.global ixheaacd_postradixcompute4
|
||||
|
||||
|
||||
ixheaacd_postradixcompute4:
|
||||
|
||||
// STMFD sp!, {x4-x12, x14}
|
||||
push_v_regs
|
||||
//SUB sp, sp, #16
|
||||
|
||||
//HARD CODED for FFT Length of 16
|
||||
// x3 is always 16
|
||||
|
||||
|
||||
//SUB x4, x3, #2 ; y to y offset calculated
|
||||
//MOV x4, #14
|
||||
//STR x4, [sp, #8] ; (npoints / 2)*4bytes - 4bytes
|
||||
|
||||
//STR x0, [sp, #12] ; (3*(npoints/2))*4bytes - 4bytes
|
||||
// x0 to x2 offset (npoints / 2)*4bytes
|
||||
ADD x4, x1, x3, lsl #1 // x1 -> x0, x4 -> x2
|
||||
MOV x3, #2
|
||||
|
||||
|
||||
POSTRADIX4_START:
|
||||
|
||||
// LDMIA x1!, {x5-x12} // x_0 :x_7
|
||||
|
||||
LDP w5, w6, [x1], #8 // x_0 :x_1
|
||||
LDP w7, w8, [x1], #8 // x_2 :x_3
|
||||
LDP w9, w10, [x1], #8 // x_4 :x_5
|
||||
LDP w11, w12, [x1], #8 // x_6 :x_7
|
||||
|
||||
ADD w14, w5, w9 // xh0_0 = x_0 + x_4
|
||||
SUB w5, w5, w9 // xl0_0 = x_0 - x_4
|
||||
|
||||
ADD w9, w6, w10 // xh1_0 = x_1 + x_5
|
||||
SUB w6, w6, w10 // xl1_0 = x_1 - x_5
|
||||
|
||||
ADD w10, w7, w11 // xh0_1 = x_2 + x_6
|
||||
SUB w7, w7, w11 // xl0_1 = x_2 - x_6
|
||||
|
||||
ADD w11, w8, w12 // xh1_1 = x_3 + x_7
|
||||
SUB w8, w8, w12 // xl1_1 = x_3 - x_7
|
||||
|
||||
ADD w12, w14, w10 // n00 = xh0_0 + xh0_1
|
||||
SUB w14, w14, w10 // n20 = xh0_0 - xh0_1
|
||||
|
||||
ADD w10, w9, w11 // n01 = xh1_0 + xh1_1
|
||||
SUB w9, w9, w11 // n21 = xh1_0 - xh1_1
|
||||
|
||||
ADD w11, w5, w8 // n10 = xl0_0 + xl1_1
|
||||
SUB w5, w5, w8 // n30 = xl0_0 - xl1_1
|
||||
|
||||
ADD w8, w6, w7 // n31 = xl1_0 + xl0_1
|
||||
SUB w6, w6, w7 // n11 = xl1_0 - xl0_1
|
||||
|
||||
|
||||
STR w12, [x0], #4 // y0[h2] = n00, x7 -> y0[h2 + 1]
|
||||
|
||||
STR w10, [x0], #14<<1 // y0[h2 + 1] = n01, x7 -> y1[h2]
|
||||
|
||||
STR w11, [x0], #4 // y1[h2] = n10, x7 -> y1[h2 + 1]
|
||||
STR w6 , [x0], #14<<1 // y1[h2 + 1] = n11, x7 -> y2[h2]
|
||||
|
||||
STR w14, [x0], #4 // y2[h2] = n20, x7 -> y2[h2 + 1]
|
||||
STR w9 , [x0], #14<<1 // y2[h2 + 1] = n21, x7 -> y3[h2]
|
||||
|
||||
STR w5, [x0], #4 // y3[h2] = n30, x7 -> y3[h2 + 1]
|
||||
STR w8, [x0], #0 // y3[h2 + 1] = n31, x7 -> y0[h2+2]
|
||||
|
||||
// LDMIA x4!, {x5-x12} // x_0 :x_7
|
||||
|
||||
LDP w5, w6, [x4], #8 // x_8 :x_8
|
||||
LDP w7, w8, [x4], #8 // x_a :x_b
|
||||
LDP w9, w10, [x4], #8 // x_c :x_d
|
||||
LDP w11, w12, [x4], #8 // x_e :x_f
|
||||
|
||||
SUB x0, x0, #92 // #4*3 + #14<<1 * 3 - 8
|
||||
|
||||
|
||||
ADD w14, w5, w9
|
||||
SUB w5, w5, w9
|
||||
|
||||
ADD w9, w6, w10
|
||||
SUB w6, w6, w10
|
||||
|
||||
ADD w10, w7, w11
|
||||
SUB w7, w7, w11
|
||||
|
||||
ADD w11, w8, w12
|
||||
SUB w8, w8, w12
|
||||
|
||||
ADD w12, w14, w10
|
||||
SUB w14, w14, w10
|
||||
|
||||
ADD w10, w9, w11
|
||||
SUB w9, w9, w11
|
||||
|
||||
ADD w11, w5, w8
|
||||
SUB w5, w5, w8
|
||||
|
||||
ADD w8, w6, w7
|
||||
SUB w6, w6, w7
|
||||
|
||||
STR w12, [x0], #4
|
||||
STR w10, [x0], #14<<1
|
||||
|
||||
STR w11, [x0], #4
|
||||
STR w6, [x0], #14<<1
|
||||
|
||||
STR w14, [x0], #4
|
||||
STR w9, [x0], #14<<1
|
||||
|
||||
|
||||
STR w5, [x0], #4
|
||||
STR w8, [x0], #0
|
||||
|
||||
ADD x1, x1, #1 << 5 // x0 += (Word32) npoints >> 1
|
||||
ADD x4, x4, #1 << 5 // x2 += (Word32) npoints >> 1
|
||||
SUB x0, x0, #100-8
|
||||
|
||||
SUBS w3, w3, #1
|
||||
|
||||
BGT POSTRADIX4_START
|
||||
|
||||
// LDMFD sp!, {x4-x12, x15}
|
||||
pop_v_regs
|
||||
ret
|
||||
|
||||
|
||||
512
decoder/armv8/ixheaacd_pre_twiddle.s
Normal file
512
decoder/armv8/ixheaacd_pre_twiddle.s
Normal file
|
|
@ -0,0 +1,512 @@
|
|||
///******************************************************************************
|
||||
// *
|
||||
// * Copyright (C) 2018 The Android Open Source Project
|
||||
// *
|
||||
// * Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// * you may not use this file except in compliance with the License.
|
||||
// * You may obtain a copy of the License at:
|
||||
// *
|
||||
// * http://www.apache.org/licenses/LICENSE-2.0
|
||||
// *
|
||||
// * Unless required by applicable law or agreed to in writing, software
|
||||
// * distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// * See the License for the specific language governing permissions and
|
||||
// * limitations under the License.
|
||||
// *
|
||||
// *****************************************************************************
|
||||
// * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
//*/
|
||||
|
||||
|
||||
.macro push_v_regs
|
||||
stp q8, q9, [sp, #-32]!
|
||||
stp q10, q11, [sp, #-32]!
|
||||
stp q12, q13, [sp, #-32]!
|
||||
stp q14, q15, [sp, #-32]!
|
||||
stp X8, X9, [sp, #-16]!
|
||||
stp X10, X11, [sp, #-16]!
|
||||
stp X12, X13, [sp, #-16]!
|
||||
stp X22, X23, [sp, #-16]!
|
||||
stp X16, X17, [sp, #-16]!
|
||||
stp X20, X21, [sp, #-16]!
|
||||
.endm
|
||||
.macro pop_v_regs
|
||||
ldp X20, X21, [sp], #16
|
||||
ldp X16, X17, [sp], #16
|
||||
ldp X22, X23, [sp], #16
|
||||
ldp X12, X13, [sp], #16
|
||||
ldp X10, X11, [sp], #16
|
||||
ldp X8, X9, [sp], #16
|
||||
ldp q14, q15, [sp], #32
|
||||
ldp q12, q13, [sp], #32
|
||||
ldp q10, q11, [sp], #32
|
||||
ldp q8, q9, [sp], #32
|
||||
.endm
|
||||
|
||||
.macro swp reg1, reg2
|
||||
MOV X16, \reg1
|
||||
MOV \reg1, \reg2
|
||||
MOV \reg2, x16
|
||||
.endm
|
||||
.text
|
||||
.global ixheaacd_pretwiddle_compute_armv8
|
||||
|
||||
ixheaacd_pretwiddle_compute_armv8:
|
||||
|
||||
push_v_regs
|
||||
|
||||
LSL x7, x4, #4
|
||||
ADD x7, x2, x7
|
||||
SUB x7, x7, #4
|
||||
LDR x22, =7500
|
||||
ADD x3, x3, x22
|
||||
MVN w5, w5
|
||||
ADD w5, w5, #1
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
ARM_PROLOGUE:
|
||||
LDRH w21, [x3]
|
||||
LDRH w22, [x3, #2]
|
||||
LSL w22, w22, #16
|
||||
LSL w21, w21, #16
|
||||
|
||||
LDR w8, [x3], #4
|
||||
LDR w9, [x0], #4
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
SMULL X12, w9, w21
|
||||
ASR X12, x12, #32
|
||||
LDR w10, [x1], #-4
|
||||
SMULL X11, w9, w22
|
||||
ASR X11, x11, #32
|
||||
SMULL X23, w10, w22
|
||||
ASR X23, x23, #32
|
||||
ADD w9, w12, w23
|
||||
SMULL X6, w10, w21
|
||||
ASR X6, x6, #32
|
||||
|
||||
|
||||
MVN w9, w9
|
||||
ADD w9, w9, #1
|
||||
SUB w11, w11, w6
|
||||
CMP w5, #0
|
||||
BGT NEXT
|
||||
MVN w8, w5
|
||||
ADD w8, w8, #1
|
||||
ASR w11, w11, w8
|
||||
ASR w9, w9, w8
|
||||
B NEXT1
|
||||
|
||||
NEXT:
|
||||
LSL w11, w11, w5
|
||||
LSL w9, w9, w5
|
||||
|
||||
|
||||
|
||||
NEXT1:
|
||||
STR w9, [x2], #4
|
||||
STR w11, [x2], #4
|
||||
|
||||
CMP X4, #0x100
|
||||
BNE NXT
|
||||
MOV X6, #4
|
||||
B NXT1
|
||||
NXT:
|
||||
MOV X6, #32
|
||||
ADD X3, X3, #28
|
||||
|
||||
NXT1:
|
||||
SUB X4, X4, #1
|
||||
ASR X4, X4, #2
|
||||
SUB x7, x7, #28
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
NEON_PROLOGUE:
|
||||
|
||||
MOV x8, #-32
|
||||
|
||||
dup v14.4s, w5
|
||||
|
||||
SUB X1, X1, #28
|
||||
|
||||
LD2 {v8.h, v9.h}[0], [x3], x6
|
||||
LD2 {v8.h, v9.h}[1], [x3], x6
|
||||
LD2 {v8.h, v9.h}[2], [x3], x6
|
||||
LD2 {v8.h, v9.h}[3], [x3], x6
|
||||
|
||||
rev64 v10.4h, v8.4h
|
||||
rev64 v11.4h, v9.4h
|
||||
|
||||
LD4 {v0.4h, v1.4h, v2.4h, v3.4h}, [x0], #32
|
||||
|
||||
LD4 {v4.4h, v5.4h, v6.4h, v7.4h}, [x1], x8
|
||||
|
||||
rev64 v0.4h, v0.4h
|
||||
rev64 v1.4h, v1.4h
|
||||
rev64 v4.4h, v4.4h
|
||||
rev64 v5.4h, v5.4h
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
uMULL v30.4s, v2.4h, v9.4h
|
||||
uMULL v28.4s, v4.4h, v9.4h
|
||||
uMULL v26.4s, v2.4h, v8.4h
|
||||
uMULL v24.4s, v4.4h, v8.4h
|
||||
|
||||
ushR v30.4s, v30.4s, #16
|
||||
ushR v28.4s, v28.4s, #16
|
||||
ushR v26.4s, v26.4s, #16
|
||||
ushR v24.4s, v24.4s, #16
|
||||
|
||||
sMLAL v30.4s, v3.4h, v9.4h
|
||||
sMLAL v28.4s, v5.4h, v9.4h
|
||||
sMLAL v26.4s, v3.4h, v8.4h
|
||||
sMLAL v24.4s, v5.4h, v8.4h
|
||||
|
||||
ADD v28.4s, v26.4s , v28.4s
|
||||
NEG v28.4s, v28.4s
|
||||
SUB v30.4s, v30.4s , v24.4s
|
||||
|
||||
uMULL v22.4s, v0.4h, v11.4h
|
||||
uMULL v20.4s, v6.4h, v11.4h
|
||||
uMULL v18.4s, v0.4h, v10.4h
|
||||
uMULL v16.4s, v6.4h, v10.4h
|
||||
|
||||
ushR v22.4s, v22.4s, #16
|
||||
ushR v20.4s, v20.4s, #16
|
||||
ushR v18.4s, v18.4s, #16
|
||||
ushR v16.4s, v16.4s, #16
|
||||
|
||||
sMLAL v22.4s, v1.4h, v11.4h
|
||||
LD2 {v8.h, v9.h}[0], [x3], x6
|
||||
|
||||
sMLAL v20.4s, v7.4h, v11.4h
|
||||
LD2 {v8.h, v9.h}[1], [x3], x6
|
||||
|
||||
sMLAL v18.4s, v1.4h, v10.4h
|
||||
LD2 {v8.h, v9.h}[2], [x3], x6
|
||||
|
||||
sMLAL v16.4s, v7.4h, v10.4h
|
||||
LD2 {v8.h, v9.h}[3], [x3], x6
|
||||
|
||||
ADD v20.4s, v20.4s , v18.4s
|
||||
|
||||
NEG v20.4s, v20.4s
|
||||
rev64 v10.4h, v8.4h
|
||||
rev64 v11.4h, v9.4h
|
||||
SUB v22.4s, v16.4s , v22.4s
|
||||
LD4 {v0.4h, v1.4h, v2.4h, v3.4h}, [x0], #32
|
||||
|
||||
|
||||
|
||||
sshL v20.4s, v20.4s, v14.4s
|
||||
LD4 {v4.4h, v5.4h, v6.4h, v7.4h}, [x1], x8
|
||||
|
||||
rev64 v0.4h, v0.4h
|
||||
rev64 v1.4h, v1.4h
|
||||
sshL v22.4s, v22.4s, v14.4s
|
||||
|
||||
rev64 v4.4h, v4.4h
|
||||
rev64 v5.4h, v5.4h
|
||||
sshL v18.4s, v30.4s, v14.4s
|
||||
|
||||
|
||||
sshL v16.4s, v28.4s, v14.4s
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
SUB X4, X4, #2
|
||||
|
||||
CORE_LOOP:
|
||||
uMULL v30.4s, v2.4h, v9.4h
|
||||
MOV v17.16B, v18.16B
|
||||
ST2 { v16.4s, v17.4s}, [x2]
|
||||
ADD x2, x2, #32
|
||||
uMULL v28.4s, v4.4h, v9.4h
|
||||
|
||||
uMULL v26.4s, v2.4h, v8.4h
|
||||
MOV v21.16B, v22.16B
|
||||
ST2 { v20.4s, v21.4s}, [x7], x8
|
||||
uMULL v24.4s, v4.4h, v8.4h
|
||||
|
||||
ushR v30.4s, v30.4s, #16
|
||||
ushR v28.4s, v28.4s, #16
|
||||
ushR v26.4s, v26.4s, #16
|
||||
ushR v24.4s, v24.4s, #16
|
||||
|
||||
sMLAL v30.4s, v3.4h, v9.4h
|
||||
sMLAL v28.4s, v5.4h, v9.4h
|
||||
sMLAL v26.4s, v3.4h, v8.4h
|
||||
sMLAL v24.4s, v5.4h, v8.4h
|
||||
|
||||
ADD v28.4s, v26.4s , v28.4s
|
||||
NEG v28.4s, v28.4s
|
||||
SUB v30.4s, v30.4s , v24.4s
|
||||
|
||||
uMULL v22.4s, v0.4h, v11.4h
|
||||
LD2 {v8.h, v9.h}[0], [x3], x6
|
||||
uMULL v20.4s, v6.4h, v11.4h
|
||||
|
||||
uMULL v18.4s, v0.4h, v10.4h
|
||||
LD2 {v8.h, v9.h}[1], [x3], x6
|
||||
uMULL v16.4s, v6.4h, v10.4h
|
||||
|
||||
ushR v22.4s, v22.4s, #16
|
||||
LD2 {v8.h, v9.h}[2], [x3], x6
|
||||
ushR v20.4s, v20.4s, #16
|
||||
|
||||
|
||||
ushR v18.4s, v18.4s, #16
|
||||
LD2 {v8.h, v9.h}[3], [x3], x6
|
||||
ushR v16.4s, v16.4s, #16
|
||||
|
||||
sMLAL v22.4s, v1.4h, v11.4h
|
||||
|
||||
sMLAL v20.4s, v7.4h, v11.4h
|
||||
|
||||
|
||||
sMLAL v18.4s, v1.4h, v10.4h
|
||||
|
||||
|
||||
sMLAL v16.4s, v7.4h, v10.4h
|
||||
LD4 {v0.4h, v1.4h, v2.4h, v3.4h}, [x0], #32
|
||||
ADD v20.4s, v20.4s , v18.4s
|
||||
|
||||
NEG v20.4s, v20.4s
|
||||
rev64 v10.4h, v8.4h
|
||||
rev64 v11.4h, v9.4h
|
||||
|
||||
SUB v22.4s, v16.4s , v22.4s
|
||||
LD4 {v4.4h, v5.4h, v6.4h, v7.4h}, [x1], x8
|
||||
sshL v20.4s, v20.4s, v14.4s
|
||||
|
||||
|
||||
sshL v22.4s, v22.4s, v14.4s
|
||||
|
||||
rev64 v0.4h, v0.4h
|
||||
rev64 v1.4h, v1.4h
|
||||
sshL v18.4s, v30.4s, v14.4s
|
||||
|
||||
rev64 v4.4h, v4.4h
|
||||
rev64 v5.4h, v5.4h
|
||||
sshL v16.4s, v28.4s, v14.4s
|
||||
|
||||
|
||||
SUBS x4, x4, #1
|
||||
BNE CORE_LOOP
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
NEON_EPILOGUE:
|
||||
uMULL v30.4s, v2.4h, v9.4h
|
||||
MOV v17.16B, v18.16B
|
||||
ST2 { v16.4s, v17.4s}, [x2]
|
||||
ADD x2, x2, #32
|
||||
uMULL v28.4s, v4.4h, v9.4h
|
||||
|
||||
uMULL v26.4s, v2.4h, v8.4h
|
||||
MOV v21.16B, v22.16B
|
||||
|
||||
ST2 { v20.4s, v21.4s}, [x7], x8
|
||||
uMULL v24.4s, v4.4h, v8.4h
|
||||
|
||||
ushR v30.4s, v30.4s, #16
|
||||
ushR v28.4s, v28.4s, #16
|
||||
ushR v26.4s, v26.4s, #16
|
||||
ushR v24.4s, v24.4s, #16
|
||||
|
||||
sMLAL v30.4s, v3.4h, v9.4h
|
||||
sMLAL v28.4s, v5.4h, v9.4h
|
||||
sMLAL v26.4s, v3.4h, v8.4h
|
||||
sMLAL v24.4s, v5.4h, v8.4h
|
||||
|
||||
ADD v28.4s, v26.4s , v28.4s
|
||||
NEG v28.4s, v28.4s
|
||||
SUB v30.4s, v30.4s , v24.4s
|
||||
|
||||
uMULL v22.4s, v0.4h, v11.4h
|
||||
uMULL v20.4s, v6.4h, v11.4h
|
||||
uMULL v18.4s, v0.4h, v10.4h
|
||||
uMULL v16.4s, v6.4h, v10.4h
|
||||
|
||||
ushR v22.4s, v22.4s, #16
|
||||
ushR v20.4s, v20.4s, #16
|
||||
ushR v18.4s, v18.4s, #16
|
||||
ushR v16.4s, v16.4s, #16
|
||||
|
||||
sMLAL v22.4s, v1.4h, v11.4h
|
||||
sMLAL v20.4s, v7.4h, v11.4h
|
||||
sMLAL v18.4s, v1.4h, v10.4h
|
||||
sMLAL v16.4s, v7.4h, v10.4h
|
||||
|
||||
ADD v20.4s, v20.4s , v18.4s
|
||||
NEG v20.4s, v20.4s
|
||||
SUB v22.4s, v16.4s , v22.4s
|
||||
|
||||
|
||||
sshL v20.4s, v20.4s, v14.4s
|
||||
sshL v22.4s, v22.4s, v14.4s
|
||||
sshL v18.4s, v30.4s, v14.4s
|
||||
sshL v16.4s, v28.4s, v14.4s
|
||||
MOV v17.16B, v18.16B
|
||||
ST2 { v16.4s, v17.4s}, [x2]
|
||||
ADD x2, x2, #32
|
||||
MOV v21.16B, v22.16B
|
||||
ST2 { v20.4s, v21.4s}, [x7], x8
|
||||
|
||||
|
||||
RESIDUE_NEON:
|
||||
MOV x10, #-16
|
||||
movi v3.2s, #0x00000000
|
||||
movi v4.2s, #0x00000000
|
||||
|
||||
LD2 {v21.2s, v22.2s}, [x0], #16
|
||||
MOV v0.8B, v21.8B
|
||||
MOV v2.8B, v22.8B
|
||||
|
||||
LD1 {v1.s}[0], [x0], #4;
|
||||
LD1 {v3.s}[0], [x0], #4;
|
||||
LD1 {v1.s}[1], [x0]
|
||||
MOV v21.8B, v0.8B
|
||||
|
||||
UZP1 v0.4h, v21.4h, v1.4h
|
||||
UZP2 v1.4h, v21.4h, v1.4h
|
||||
MOV v21.8B, v2.8B
|
||||
UZP1 v2.4h, v21.4h, v3.4h
|
||||
UZP2 v3.4h, v21.4h, v3.4h
|
||||
|
||||
ADD x1, x1, #4
|
||||
|
||||
LD1 {v6.s}[0], [x1], #4
|
||||
LD1 {v4.s}[1], [x1], #4
|
||||
LD1 {v6.s}[1], [x1], #4
|
||||
|
||||
|
||||
LD2 {v21.2s, v22.2s}, [x1], #16
|
||||
MOV v5.8B, v21.8B
|
||||
MOV v7.8B, v22.8B
|
||||
|
||||
|
||||
MOV v21.8B, v4.8B
|
||||
UZP1 v4.4h, v21.4h, v5.4h
|
||||
UZP2 v5.4h, v21.4h, v5.4h
|
||||
MOV v21.8B, v6.8B
|
||||
UZP1 v6.4h, v21.4h, v7.4h
|
||||
UZP2 v7.4h, v21.4h, v7.4h
|
||||
rev64 v0.4h, v0.4h
|
||||
rev64 v1.4h, v1.4h
|
||||
rev64 v4.4h, v4.4h
|
||||
rev64 v5.4h, v5.4h
|
||||
|
||||
LD2 {v8.h, v9.h}[0], [x3], x6
|
||||
LD2 {v8.h, v9.h}[1], [x3], x6
|
||||
LD2 {v8.h, v9.h}[2], [x3], x6
|
||||
LD2 {v8.h, v9.h}[3], [x3], x6
|
||||
|
||||
rev64 v10.4h, v8.4h
|
||||
rev64 v11.4h, v9.4h
|
||||
|
||||
|
||||
|
||||
uMULL v30.4s, v2.4h, v9.4h
|
||||
uMULL v28.4s, v4.4h, v9.4h
|
||||
uMULL v26.4s, v2.4h, v8.4h
|
||||
uMULL v24.4s, v4.4h, v8.4h
|
||||
|
||||
ushR v30.4s, v30.4s, #16
|
||||
ushR v28.4s, v28.4s, #16
|
||||
ushR v26.4s, v26.4s, #16
|
||||
ushR v24.4s, v24.4s, #16
|
||||
|
||||
sMLAL v30.4s, v3.4h, v9.4h
|
||||
sMLAL v28.4s, v5.4h, v9.4h
|
||||
sMLAL v26.4s, v3.4h, v8.4h
|
||||
sMLAL v24.4s, v5.4h, v8.4h
|
||||
|
||||
ADD v28.4s, v26.4s , v28.4s
|
||||
NEG v28.4s, v28.4s
|
||||
SUB v30.4s, v30.4s , v24.4s
|
||||
|
||||
uMULL v22.4s, v0.4h, v11.4h
|
||||
uMULL v20.4s, v6.4h, v11.4h
|
||||
uMULL v18.4s, v0.4h, v10.4h
|
||||
uMULL v16.4s, v6.4h, v10.4h
|
||||
|
||||
ushR v22.4s, v22.4s, #16
|
||||
ushR v20.4s, v20.4s, #16
|
||||
ushR v18.4s, v18.4s, #16
|
||||
ushR v16.4s, v16.4s, #16
|
||||
|
||||
sMLAL v22.4s, v1.4h, v11.4h
|
||||
sMLAL v20.4s, v7.4h, v11.4h
|
||||
sMLAL v18.4s, v1.4h, v10.4h
|
||||
sMLAL v16.4s, v7.4h, v10.4h
|
||||
|
||||
ADD v20.4s, v20.4s , v18.4s
|
||||
NEG v20.4s, v20.4s
|
||||
SUB v22.4s, v16.4s , v22.4s
|
||||
|
||||
|
||||
|
||||
sshL v20.4s, v20.4s, v14.4s
|
||||
sshL v22.4s, v22.4s, v14.4s
|
||||
sshL v18.4s, v30.4s, v14.4s
|
||||
sshL v16.4s, v28.4s, v14.4s
|
||||
MOV v21.16B, v22.16B
|
||||
ST2 { v20.4s, v21.4s}, [x7]
|
||||
mov v17.16B, v18.16B
|
||||
ST2 {v16.2s, v17.2s}, [x2]
|
||||
ADD x2, x2, #16
|
||||
|
||||
ST2 {v16.s, v17.s}[2], [x2]
|
||||
ADD x2, x2, #8
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
END1:
|
||||
pop_v_regs
|
||||
ret
|
||||
|
||||
|
||||
|
||||
2415
decoder/armv8/ixheaacd_qmf_dec.c
Normal file
2415
decoder/armv8/ixheaacd_qmf_dec.c
Normal file
File diff suppressed because it is too large
Load diff
777
decoder/armv8/ixheaacd_sbr_imdct_using_fft.s
Normal file
777
decoder/armv8/ixheaacd_sbr_imdct_using_fft.s
Normal file
|
|
@ -0,0 +1,777 @@
|
|||
///******************************************************************************
|
||||
// *
|
||||
// * Copyright (C) 2018 The Android Open Source Project
|
||||
// *
|
||||
// * Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// * you may not use this file except in compliance with the License.
|
||||
// * You may obtain a copy of the License at:
|
||||
// *
|
||||
// * http://www.apache.org/licenses/LICENSE-2.0
|
||||
// *
|
||||
// * Unless required by applicable law or agreed to in writing, software
|
||||
// * distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// * See the License for the specific language governing permissions and
|
||||
// * limitations under the License.
|
||||
// *
|
||||
// *****************************************************************************
|
||||
// * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
//*/
|
||||
|
||||
|
||||
.macro push_v_regs
|
||||
stp d8, d9, [sp, #-16]!
|
||||
stp d10, d11, [sp, #-16]!
|
||||
stp d12, d13, [sp, #-16]!
|
||||
stp d14, d15, [sp, #-16]!
|
||||
stp X8, X9, [sp, #-16]!
|
||||
stp X10, X11, [sp, #-16]!
|
||||
stp X12, X13, [sp, #-16]!
|
||||
stp X14, X15, [sp, #-16]!
|
||||
stp X16, X17, [sp, #-16]!
|
||||
stp X29, X30, [sp, #-16]!
|
||||
.endm
|
||||
.macro pop_v_regs
|
||||
ldp X29, X30, [sp], #16
|
||||
ldp X16, X17, [sp], #16
|
||||
ldp X14, X15, [sp], #16
|
||||
ldp X12, X13, [sp], #16
|
||||
ldp X10, X11, [sp], #16
|
||||
ldp X8, X9, [sp], #16
|
||||
ldp d14, d15, [sp], #16
|
||||
ldp d12, d13, [sp], #16
|
||||
ldp d10, d11, [sp], #16
|
||||
ldp d8, d9, [sp], #16
|
||||
.endm
|
||||
|
||||
.macro swp reg1, reg2
|
||||
MOV x16, \reg1
|
||||
MOV \reg1, \reg2
|
||||
MOV \reg2, x16
|
||||
.endm
|
||||
.text
|
||||
.p2align 2
|
||||
.global ixheaacd_sbr_imdct_using_fft
|
||||
ixheaacd_sbr_imdct_using_fft:
|
||||
push_v_regs
|
||||
|
||||
|
||||
COND_6: cmp x1, #0x10
|
||||
bne COND_7
|
||||
MOV X8, #1
|
||||
MOV X4, X7
|
||||
B RADIX_4_FIRST_START
|
||||
|
||||
COND_7: cmp x1, #0x20
|
||||
|
||||
mov x8, #1
|
||||
mov x4, x7
|
||||
|
||||
|
||||
RADIX_8_FIRST_START:
|
||||
|
||||
LSR W9 , W1, #5
|
||||
LSL W1, W1, #1
|
||||
|
||||
RADIX_8_FIRST_LOOP:
|
||||
|
||||
MOV X5 , X2
|
||||
MOV X6 , X2
|
||||
MOV X7 , X2
|
||||
MOV X11 , X2
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
LDRB W12, [X4]
|
||||
ADD X5, X5, X12, LSL #3
|
||||
LD2 {V0.S, V1.S}[0], [X5], X1
|
||||
ADD X5, X5, X1
|
||||
LD2 {V4.S, V5.S}[0], [X5], X1
|
||||
SUB X5, X5, X1, LSL #1
|
||||
LD2 {V2.S, V3.S}[0], [X5], X1
|
||||
ADD X5, X5, X1
|
||||
LD2 {V6.S, V7.S}[0], [X5], X1
|
||||
SUB X5, X5, X1, LSL #2
|
||||
|
||||
LDRB W12, [X4, #1]
|
||||
ADD X6, X6, X12, LSL #3
|
||||
LD2 {V0.S, V1.S}[1], [X6] , X1
|
||||
ADD X6, X6, X1
|
||||
LD2 {V4.S, V5.S}[1], [X6] , X1
|
||||
SUB X6, X6, X1, LSL #1
|
||||
LD2 {V2.S, V3.S}[1], [X6] , X1
|
||||
ADD X6, X6, X1
|
||||
LD2 {V6.S, V7.S}[1], [X6], X1
|
||||
SUB X6, X6, X1, LSL #2
|
||||
|
||||
|
||||
LDRB W12, [X4, #2]
|
||||
ADD X7, X7, X12, LSL #3
|
||||
LD2 {V0.S, V1.S}[2], [X7] , X1
|
||||
ADD X7, X7, X1
|
||||
LD2 {V4.S, V5.S}[2], [X7] , X1
|
||||
SUB X7, X7, X1, LSL #1
|
||||
|
||||
LDRB W12, [X4, #3]
|
||||
ADD X11, X11, X12, LSL #3
|
||||
LD2 {V0.S, V1.S}[3], [X11] , X1
|
||||
ADD X11, X11, X1
|
||||
LD2 {V4.S, V5.S}[3], [X11] , X1
|
||||
SUB X11, X11, X1, LSL #1
|
||||
|
||||
|
||||
ADD V8.4S, V0.4S, V4.4S
|
||||
LD2 {V2.S, V3.S}[2], [X7] , X1
|
||||
ADD X7, X7, X1
|
||||
|
||||
|
||||
SUB V9.4S, V0.4S, V4.4S
|
||||
LD2 {V6.S, V7.S}[2], [X7], X1
|
||||
SUB X7, X7, X1, LSL #2
|
||||
|
||||
|
||||
ADD V0.4S, V1.4S, V5.4S
|
||||
LD2 {V2.S, V3.S}[3], [X11] , X1
|
||||
ADD X11, X11, X1
|
||||
|
||||
SUB V4.4S, V1.4S, V5.4S
|
||||
LD2 {V6.S, V7.S}[3], [X11], X1
|
||||
SUB X11, X11, X1, LSL #2
|
||||
|
||||
ADD X4, X4, #4
|
||||
|
||||
ADD X5, X5, X1, LSR #1
|
||||
ADD X6, X6, X1, LSR #1
|
||||
ADD X7, X7, X1, LSR #1
|
||||
ADD X11, X11, X1, LSR #1
|
||||
|
||||
|
||||
ADD V1.4S, V2.4S, V6.4S
|
||||
LD2 {V14.S, V15.S}[0], [X5] , X1
|
||||
|
||||
|
||||
SUB V5.4S, V2.4S, V6.4S
|
||||
LD2 {V10.S, V11.S}[0], [X5] , X1
|
||||
|
||||
|
||||
ADD V2.4S, V3.4S, V7.4S
|
||||
LD2 {V12.S, V13.S}[0], [X5] , X1
|
||||
|
||||
|
||||
SUB V6.4S, V3.4S, V7.4S
|
||||
LD2 {V14.S, V15.S}[1], [X6] , X1
|
||||
|
||||
ADD V3.4S, V9.4S, V6.4S
|
||||
LD2 {V10.S, V11.S}[1], [X6] , X1
|
||||
|
||||
SUB V7.4S, V9.4S, V6.4S
|
||||
LD2 {V12.S, V13.S}[1], [X6] , X1
|
||||
|
||||
SUB V6.4S, V4.4S, V5.4S
|
||||
LD2 {V14.S, V15.S}[2], [X7] , X1
|
||||
|
||||
ADD V9.4S, V4.4S, V5.4S
|
||||
LD2 {V10.S, V11.S}[2], [X7] , X1
|
||||
|
||||
ADD V4.4S, V8.4S, V1.4S
|
||||
LD2 {V12.S, V13.S}[2], [X7] , X1
|
||||
|
||||
SUB V5.4S, V8.4S, V1.4S
|
||||
LD2 {V14.S, V15.S}[3], [X11] , X1
|
||||
|
||||
ADD V8.4S, V0.4S, V2.4S
|
||||
LD2 {V10.S, V11.S}[3], [X11] , X1
|
||||
|
||||
SUB V0.4S, V0.4S, V2.4S
|
||||
LD2 {V12.S, V13.S}[3], [X11] , X1
|
||||
|
||||
|
||||
LD2 {V1.S, V2.S}[0], [X5], X1
|
||||
|
||||
ADD V17.4S, V14.4S, V12.4S
|
||||
|
||||
LD2 {V1.S, V2.S}[1], [X6] , X1
|
||||
|
||||
SUB V16.4S, V14.4S, V12.4S
|
||||
|
||||
LD2 {V1.S, V2.S}[2], [X7] , X1
|
||||
|
||||
ADD V14.4S, V15.4S, V13.4S
|
||||
|
||||
LD2 {V1.S, V2.S}[3], [X11] , X1
|
||||
|
||||
SUB V12.4S, V15.4S, V13.4S
|
||||
|
||||
ADD V15.4S, V10.4S, V1.4S
|
||||
SUB V13.4S, V10.4S, V1.4S
|
||||
ADD V10.4S, V11.4S, V2.4S
|
||||
SUB V1.4S, V11.4S, V2.4S
|
||||
|
||||
ADD V11.4S, V17.4S, V15.4S
|
||||
SUB V2.4S, V17.4S, V15.4S
|
||||
ADD V17.4S, V14.4S, V10.4S
|
||||
SUB V15.4S, V14.4S, V10.4S
|
||||
|
||||
ADD V14.4S, V16.4S, V12.4S
|
||||
SUB V10.4S, V16.4S, V12.4S
|
||||
ADD V16.4S, V13.4S, V1.4S
|
||||
SUB V12.4S, V13.4S, V1.4S
|
||||
|
||||
ADD V1.4S , V14.4S, V12.4S
|
||||
SUB V13.4S, V14.4S, V12.4S
|
||||
SUB V12.4S, V16.4S, V10.4S
|
||||
|
||||
UZP1 V22.8H, V1.8H, V1.8H
|
||||
UZP2 V23.8H, V1.8H, V1.8H
|
||||
ADD V14.4S, V16.4S, V10.4S
|
||||
|
||||
UZP1 V26.8H, V13.8H, V13.8H
|
||||
UZP2 V27.8H, V13.8H, V13.8H
|
||||
ADD V16.4S, V4.4S, V11.4S
|
||||
|
||||
UZP1 V24.8H, V12.8H, V12.8H
|
||||
UZP2 V25.8H, V12.8H, V12.8H
|
||||
SUB V10.4S, V4.4S, V11.4S
|
||||
|
||||
UZP1 V28.8H, V14.8H, V14.8H
|
||||
UZP2 V29.8H, V14.8H, V14.8H
|
||||
ADD V4.4S, V8.4S, V17.4S
|
||||
|
||||
MOV W14, #0x5a82
|
||||
|
||||
SUB V11.4S, V8.4S, V17.4S
|
||||
|
||||
ADD V8.4S, V5.4S, V15.4S
|
||||
SUB V17.4S, V5.4S, V15.4S
|
||||
SUB V5.4S, V0.4S, V2.4S
|
||||
ADD V15.4S, V0.4S, V2.4S
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
DUP V31.4H, W14
|
||||
|
||||
UMULL V19.4S, V26.4H, V31.4H
|
||||
UMULL V18.4S, V28.4H, V31.4H
|
||||
SSHR V19.4S, V19.4S, #15
|
||||
SSHR V18.4S, V18.4S, #15
|
||||
|
||||
SQDMLAL V19.4S, V27.4H, V31.4H
|
||||
SQDMLAL V18.4S, V29.4H, V31.4H
|
||||
|
||||
UMULL V13.4S, V24.4H, V31.4H
|
||||
UMULL V14.4S, V22.4H, V31.4H
|
||||
|
||||
ADD V20.4S, V3.4S, V19.4S
|
||||
SUB V21.4S, V3.4S, V19.4S
|
||||
ADD V30.4S, V6.4S, V18.4S
|
||||
SUB V6.4S, V6.4S, V18.4S
|
||||
|
||||
SSHR V13.4S, V13.4S, #15
|
||||
SSHR V14.4S, V14.4S, #15
|
||||
|
||||
SQDMLAL V13.4S, V25.4H, V31.4H
|
||||
SQDMLAL V14.4S, V23.4H, V31.4H
|
||||
|
||||
ADD V3.4S, V7.4S, V13.4S
|
||||
SUB V19.4S, V7.4S, V13.4S
|
||||
ADD V1.4S, V9.4S, V14.4S
|
||||
SUB V18.4S, V9.4S, V14.4S
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
swp V17.D[0], V8.D[0]
|
||||
swp V17.D[1], V8.D[1]
|
||||
swp V4.D[0], V16.D[0]
|
||||
swp V4.D[1], V16.D[1]
|
||||
|
||||
TRN1 V12.4S, V4.4S, V20.4S
|
||||
TRN2 V22.4S, V4.4S, V20.4S
|
||||
|
||||
SHL V12.4S, V12.4S, #1
|
||||
TRN1 V9.4S, V17.4S, V3.4S
|
||||
TRN2 V2.4S, V17.4S, V3.4S
|
||||
SHL V22.4S, V22.4S, #1
|
||||
|
||||
SHL V9.4S, V9.4S, #1
|
||||
TRN1 V24.4S, V10.4S, V21.4S
|
||||
TRN2 V7.4S, V10.4S, V21.4S
|
||||
SHL V2.4S, V2.4S, #1
|
||||
|
||||
SHL V24.4S, V24.4S, #1
|
||||
TRN1 V13.4S, V16.4S, V6.4S
|
||||
TRN2 V23.4S, V16.4S, V6.4S
|
||||
SHL V7.4S, V7.4S, #1
|
||||
|
||||
SHL V13.4S, V13.4S, #1
|
||||
TRN1 V10.4S, V5.4S, V18.4S
|
||||
TRN2 V3.4S, V5.4S, V18.4S
|
||||
SHL V23.4S, V23.4S, #1
|
||||
|
||||
SHL V10.4S, V10.4S, #1
|
||||
TRN1 V26.4S, V8.4S, V19.4S
|
||||
TRN2 V4.4S, V8.4S, V19.4S
|
||||
SHL V3.4S, V3.4S, #1
|
||||
|
||||
SHL V26.4S, V26.4S, #1
|
||||
TRN1 V25.4S, V11.4S, V30.4S
|
||||
TRN2 V8.4S, V11.4S, V30.4S
|
||||
SHL V4.4S, V4.4S, #1
|
||||
|
||||
SHL V25.4S, V25.4S, #1
|
||||
TRN1 V27.4S, V15.4S, V1.4S
|
||||
TRN2 V5.4S, V15.4S, V1.4S
|
||||
SHL V8.4S, V8.4S, #1
|
||||
|
||||
SHL V27.4S, V27.4S, #1
|
||||
swp V9.D[0], V12.D[1]
|
||||
SHL V5.4S, V5.4S, #1
|
||||
swp V2.D[0], V22.D[1]
|
||||
|
||||
swp V24.D[1], V26.D[0]
|
||||
swp V7.D[1], V4.D[0]
|
||||
swp V10.D[0], V13.D[1]
|
||||
swp V3.D[0], V23.D[1]
|
||||
swp V27.D[0], V25.D[1]
|
||||
swp V5.D[0], V8.D[1]
|
||||
|
||||
|
||||
MOV X15, #32
|
||||
ST2 {V12.4S, V13.4S}, [X3], X15
|
||||
ST2 {V24.4S, V25.4S}, [X3], X15
|
||||
ST2 {V22.4S, V23.4S}, [X3], X15
|
||||
ST2 {V7.4S, V8.4S}, [X3], X15
|
||||
ST2 {V9.4S, V10.4S}, [X3], X15
|
||||
ST2 {V26.4S, V27.4S}, [X3], X15
|
||||
ST2 {V2.4S, V3.4S}, [X3], X15
|
||||
ST2 {V4.4S, V5.4S}, [X3], X15
|
||||
|
||||
|
||||
SUBS X9, X9, #1
|
||||
BNE RADIX_8_FIRST_LOOP
|
||||
|
||||
LSR X1, X1, #1
|
||||
LSL X15, X1, #3
|
||||
SUB X3, X3, X15
|
||||
|
||||
MOV X5, #8
|
||||
MOV X4, #32
|
||||
LSR X15, X1, #5
|
||||
MOV X6, X15
|
||||
B RADIX_4_FIRST_ENDS
|
||||
|
||||
RADIX_8_FIRST_ENDS:
|
||||
|
||||
|
||||
|
||||
RADIX_4_FIRST_START:
|
||||
|
||||
|
||||
LSR W9, W1, #4
|
||||
LSL W1, W1, #1
|
||||
|
||||
RADIX_4_LOOP:
|
||||
|
||||
MOV X5 , X2
|
||||
MOV X6 , X2
|
||||
MOV X7 , X2
|
||||
MOV X11 , X2
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
LDRB W12, [X4, #0]
|
||||
ADD X5, X5, X12, LSL #3
|
||||
|
||||
LD2 {V0.S, V1.S}[0], [X5] , X1
|
||||
ADD X5, X5, X1
|
||||
LD2 {V8.S, V9.S}[0], [X5] , X1
|
||||
SUB X5, X5, X1, LSL #1
|
||||
LD2 {V4.S, V5.S}[0], [X5] , X1
|
||||
ADD X5, X5, X1
|
||||
LD2 {V12.S, V13.S}[0], [X5] , X1
|
||||
|
||||
LDRB W12, [X4, #1]
|
||||
ADD X6, X6, X12, LSL #3
|
||||
LD2 {V0.S, V1.S}[1], [X6] , X1
|
||||
ADD X6, X6, X1
|
||||
LD2 {V8.S, V9.S}[1], [X6] , X1
|
||||
SUB X6, X6, X1, LSL #1
|
||||
LD2 {V4.S, V5.S}[1], [X6] , X1
|
||||
ADD X6, X6, X1
|
||||
LD2 {V12.S, V13.S}[1], [X6] , X1
|
||||
|
||||
LDRB W12, [X4, #2]
|
||||
ADD X7, X7, X12, LSL #3
|
||||
|
||||
LD2 {V0.S, V1.S}[2], [X7] , X1
|
||||
ADD X7, X7, X1
|
||||
LD2 {V8.S, V9.S}[2], [X7] , X1
|
||||
|
||||
|
||||
LDRB W12, [X4, #3]
|
||||
ADD X11, X11, X12 , LSL #3
|
||||
|
||||
|
||||
LD2 {V0.S, V1.S}[3], [X11] , X1
|
||||
ADD X11, X11, X1
|
||||
LD2 {V8.S, V9.S}[3], [X11] , X1
|
||||
|
||||
SUB X7, X7, X1, LSL #1
|
||||
ADD V16.4S, V0.4S, V8.4S
|
||||
LD2 {V4.S, V5.S}[2], [X7] , X1
|
||||
ADD X7, X7, X1
|
||||
ADD V18.4S, V1.4S, V9.4S
|
||||
LD2 {V12.S, V13.S}[2], [X7] , X1
|
||||
|
||||
SUB X11, X11, X1, LSL #1
|
||||
SUB V20.4S, V0.4S, V8.4S
|
||||
LD2 {V4.S, V5.S}[3], [X11] , X1
|
||||
ADD X11, X11, X1
|
||||
SUB V22.4S, V1.4S, V9.4S
|
||||
LD2 {V12.S, V13.S}[3], [X11] , X1
|
||||
|
||||
ADD X4, X4, #4
|
||||
|
||||
ADD V24.4S, V4.4S, V12.4S
|
||||
ADD V26.4S, V5.4S, V13.4S
|
||||
SUB V28.4S, V4.4S, V12.4S
|
||||
SUB V30.4S, V5.4S, V13.4S
|
||||
|
||||
ADD V17.4S, V16.4S, V24.4S
|
||||
ADD V11.4S, V18.4S, V26.4S
|
||||
SUB V19.4S, V16.4S, V24.4S
|
||||
SUB V15.4S, V18.4S, V26.4S
|
||||
|
||||
ADD V8.4S, V20.4S, V30.4S
|
||||
SUB V9.4S, V22.4S, V28.4S
|
||||
ADD V13.4S, V22.4S, V28.4S
|
||||
SUB V12.4S, V20.4S, V30.4S
|
||||
|
||||
|
||||
|
||||
|
||||
TRN1 V0.4S, V17.4S, V8.4S
|
||||
TRN2 V8.4S, V17.4S, V8.4S
|
||||
|
||||
SHL V0.4S, V0.4S, #1
|
||||
TRN1 V4.4S, V19.4S, V12.4S
|
||||
TRN2 V12.4S, V19.4S, V12.4S
|
||||
SHL V8.4S, V8.4S, #1
|
||||
|
||||
SHL V4.4S, V4.4S, #1
|
||||
TRN1 V1.4S, V11.4S, V9.4S
|
||||
TRN2 V9.4S, V11.4S, V9.4S
|
||||
SHL V12.4S, V12.4S, #1
|
||||
|
||||
SHL V1.4S, V1.4S, #1
|
||||
TRN1 V5.4S, V15.4S, V13.4S
|
||||
TRN2 V13.4S, V15.4S, V13.4S
|
||||
SHL V9.4S, V9.4S, #1
|
||||
|
||||
SHL V5.4S, V5.4S, #1
|
||||
swp V4.D[0], V0.D[1]
|
||||
SHL V13.4S, V13.4S, #1
|
||||
|
||||
swp V12.D[0], V8.D[1]
|
||||
|
||||
|
||||
swp V5.D[0], V1.D[1]
|
||||
swp V13.D[0], V9.D[1]
|
||||
|
||||
MOV X15, #32
|
||||
ST2 {V0.4S, V1.4S}, [X3], X15
|
||||
ST2 {V8.4S, V9.4S}, [X3], X15
|
||||
ST2 {V4.4S, V5.4S}, [X3], X15
|
||||
ST2 {V12.4S, V13.4S}, [X3], X15
|
||||
|
||||
|
||||
SUBS W9, W9, #1
|
||||
BNE RADIX_4_LOOP
|
||||
|
||||
LSR X1, X1, #1
|
||||
SUB X3, X3, X1, LSL #3
|
||||
MOV X5, #4
|
||||
MOV X4, #64
|
||||
LSR X6, X1, #4
|
||||
|
||||
|
||||
RADIX_4_FIRST_ENDS:
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
MOV x30, X3
|
||||
LSR X5, X5, #2
|
||||
|
||||
OUTER_LOOP_R4:
|
||||
|
||||
|
||||
MOV X14, x30
|
||||
|
||||
MOV X7, X5
|
||||
MOV X2, #0
|
||||
MOV X9, X0
|
||||
LSL X12, X5, #5
|
||||
MIDDLE_LOOP_R4:
|
||||
|
||||
|
||||
LD2 {V20.H, V21.H}[0], [X9], X2
|
||||
LD2 {V22.H, V23.H}[0], [X9], X2
|
||||
ADD X11, X2, X4, LSL #2
|
||||
LD2 {V24.H, V25.H}[0], [X9]
|
||||
ADD X10, X0, X11
|
||||
|
||||
LD2 {V20.H, V21.H}[1], [X10], X11
|
||||
LD2 {V22.H, V23.H}[1], [X10], X11
|
||||
ADD X2, X11, X4, LSL #2
|
||||
LD2 {V24.H, V25.H}[1], [X10]
|
||||
ADD X9, X0, X2
|
||||
|
||||
LD2 {V20.H, V21.H}[2], [X9], X2
|
||||
LD2 {V22.H, V23.H}[2], [X9], X2
|
||||
ADD X11, X2, X4, LSL #2
|
||||
LD2 {V24.H, V25.H}[2], [X9]
|
||||
ADD X10, X0, X11
|
||||
|
||||
LD2 {V20.H, V21.H}[3], [X10], X11
|
||||
LD2 {V22.H, V23.H}[3], [X10], X11
|
||||
ADD X2, X11, X4, LSL #2
|
||||
LD2 {V24.H, V25.H}[3], [X10]
|
||||
ADD X9, X0, X2
|
||||
|
||||
MOV X10, X6
|
||||
INNER_LOOP_R4:
|
||||
|
||||
LD2 {V30.4S, V31.4S}, [X14], X12
|
||||
SSHR V30.4S, V30.4S, #1
|
||||
LD4 {V16.4H, V17.4H, V18.4H, V19.4H}, [X14], X12
|
||||
SSHR V31.4S, V31.4S, #1
|
||||
|
||||
USHR V16.4H, V16.4H, #1
|
||||
LD4 {V26.4H, V27.4H, V28.4H, V29.4H}, [X14], X12
|
||||
USHR V18.4H, V18.4H, #1
|
||||
|
||||
SMULL V11.4S, V16.4H, V20.4H
|
||||
SMLSL V11.4S, V18.4H, V21.4H
|
||||
LD4 {V0.4H, V1.4H, V2.4H, V3.4H}, [X14], X12
|
||||
SMULL V12.4S, V16.4H, V21.4H
|
||||
SMLAL V12.4S, V18.4H, V20.4H
|
||||
|
||||
USHR V26.4H, V26.4H, #1
|
||||
USHR V28.4H, V28.4H, #1
|
||||
|
||||
LSL x29, X12, #2
|
||||
SUB X14, X14, X12, LSL #2
|
||||
|
||||
USHR V0.4H, V0.4H, #1
|
||||
USHR V2.4H, V2.4H, #1
|
||||
|
||||
SMULL V13.4S, V26.4H, V22.4H
|
||||
SMLSL V13.4S, V28.4H, V23.4H
|
||||
|
||||
SSHR V11.4S, V11.4S, #15
|
||||
|
||||
SMULL V14.4S, V26.4H, V23.4H
|
||||
SMLAL V14.4S, V28.4H, V22.4H
|
||||
|
||||
SMULL V15.4S, V0.4H, V24.4H
|
||||
SMLSL V15.4S, V2.4H, V25.4H
|
||||
|
||||
SMLAL V11.4S, V17.4H, V20.4H
|
||||
SMLSL V11.4S, V19.4H, V21.4H
|
||||
|
||||
SSHR V12.4S, V12.4S, #15
|
||||
SSHR V13.4S, V13.4S, #15
|
||||
SSHR V14.4S, V14.4S, #15
|
||||
SSHR V15.4S, V15.4S, #15
|
||||
|
||||
SMLAL V12.4S, V17.4H, V21.4H
|
||||
SMLAL V12.4S, V19.4H, V20.4H
|
||||
|
||||
SMULL V5.4S, V0.4H, V25.4H
|
||||
SMLAL V5.4S, V2.4H, V24.4H
|
||||
|
||||
SMLAL V13.4S, V27.4H, V22.4H
|
||||
SMLSL V13.4S, V29.4H, V23.4H
|
||||
|
||||
SMLAL V14.4S, V27.4H, V23.4H
|
||||
SMLAL V14.4S, V29.4H, V22.4H
|
||||
|
||||
SMLAL V15.4S, V1.4H, V24.4H
|
||||
SMLSL V15.4S, V3.4H, V25.4H
|
||||
|
||||
SSHR V5.4S, V5.4S, #15
|
||||
|
||||
SMLAL V5.4S, V1.4H, V25.4H
|
||||
SMLAL V5.4S, V3.4H, V24.4H
|
||||
|
||||
|
||||
|
||||
SUBS x17, X7, X5
|
||||
BNE BYPASS_IF
|
||||
|
||||
ADD X14, X14, X12
|
||||
|
||||
LDR W3, [X14]
|
||||
ADD X14, X14, X12
|
||||
ASR W3, W3, #1
|
||||
MOV V11.S[0], W3
|
||||
|
||||
LDR W3, [X14]
|
||||
ADD X14, X14, X12
|
||||
ASR W3, W3, #1
|
||||
MOV V13.S[0], W3
|
||||
|
||||
LDR W3, [X14]
|
||||
ASR W3, W3, #1
|
||||
MOV V15.S[0], W3
|
||||
|
||||
SUB X14, X14, X12, LSL #1
|
||||
ADD X14, X14, #4
|
||||
|
||||
LDR W3, [X14]
|
||||
ADD X14, X14, X12
|
||||
ASR W3, W3, #1
|
||||
MOV V12.S[0], W3
|
||||
|
||||
LDR W3, [X14]
|
||||
ADD X14, X14, X12
|
||||
ASR W3, W3, #1
|
||||
MOV V14.S[0], W3
|
||||
|
||||
LDR W3, [X14]
|
||||
ADD X14, X14, X12
|
||||
ASR W3, W3, #1
|
||||
MOV V5.S[0], W3
|
||||
|
||||
SUB X14, X14, #4
|
||||
|
||||
SUB X14, X14, x29
|
||||
|
||||
BYPASS_IF:
|
||||
|
||||
ADD V6.4S, V30.4S, V13.4S
|
||||
ADD V7.4S, V31.4S, V14.4S
|
||||
SUB V30.4S, V30.4S, V13.4S
|
||||
SUB V31.4S, V31.4S, V14.4S
|
||||
ADD V8.4S, V11.4S, V15.4S
|
||||
ADD V9.4S, V12.4S, V5.4S
|
||||
|
||||
SUB V15.4S, V11.4S, V15.4S
|
||||
SUB V14.4S, V12.4S, V5.4S
|
||||
|
||||
|
||||
ADD V10.4S, V6.4S, V8.4S
|
||||
ADD V11.4S, V7.4S, V9.4S
|
||||
ADD V12.4S, V30.4S, V14.4S
|
||||
SUB V13.4S, V31.4S, V15.4S
|
||||
|
||||
SUB V6.4S, V6.4S, V8.4S
|
||||
ST2 {V10.4S, V11.4S}, [X14], X12
|
||||
SUB V7.4S, V7.4S, V9.4S
|
||||
|
||||
SUB V8.4S, V30.4S, V14.4S
|
||||
ST2 {V12.4S, V13.4S}, [X14], X12
|
||||
ADD V9.4S, V31.4S, V15.4S
|
||||
|
||||
ST2 {V6.4S, V7.4S}, [X14], X12
|
||||
ST2 {V8.4S, V9.4S}, [X14], X12
|
||||
SUBS X10, X10, #1
|
||||
BNE INNER_LOOP_R4
|
||||
|
||||
SUB X14, X14, X1, LSL #3
|
||||
ADD X14, X14, #32
|
||||
|
||||
SUBS X7, X7, #1
|
||||
BNE MIDDLE_LOOP_R4
|
||||
|
||||
LSR X4, X4, #2
|
||||
LSL X5, X5, #2
|
||||
LSR X6, X6, #2
|
||||
SUBS X8, X8, #1
|
||||
BNE OUTER_LOOP_R4
|
||||
END_LOOPS:
|
||||
pop_v_regs
|
||||
RET
|
||||
341
decoder/armv8/ixheaacd_sbr_qmf_analysis32_neon.s
Normal file
341
decoder/armv8/ixheaacd_sbr_qmf_analysis32_neon.s
Normal file
|
|
@ -0,0 +1,341 @@
|
|||
.macro push_v_regs
|
||||
stp d8, d9, [sp, #-16]!
|
||||
stp d10, d11, [sp, #-16]!
|
||||
stp d12, d13, [sp, #-16]!
|
||||
stp d14, d15, [sp, #-16]!
|
||||
stp X8, X9, [sp, #-16]!
|
||||
stp X10, X11, [sp, #-16]!
|
||||
stp X12, X13, [sp, #-16]!
|
||||
stp X14, X15, [sp, #-16]!
|
||||
stp X16, X17, [sp, #-16]!
|
||||
stp X29, X30, [sp, #-16]!
|
||||
.endm
|
||||
.macro pop_v_regs
|
||||
ldp X29, X30, [sp], #16
|
||||
ldp X16, X17, [sp], #16
|
||||
ldp X14, X15, [sp], #16
|
||||
ldp X12, X13, [sp], #16
|
||||
ldp X10, X11, [sp], #16
|
||||
ldp X8, X9, [sp], #16
|
||||
ldp d14, d15, [sp], #16
|
||||
ldp d12, d13, [sp], #16
|
||||
ldp d10, d11, [sp], #16
|
||||
ldp d8, d9, [sp], #16
|
||||
.endm
|
||||
|
||||
.text
|
||||
.p2align 2
|
||||
.global ixheaacd_sbr_qmfanal32_winadds
|
||||
|
||||
ixheaacd_sbr_qmfanal32_winadds: // PROC
|
||||
|
||||
// STMFD sp!, {x4-x12, x14}
|
||||
push_v_regs
|
||||
stp x19, x20, [sp, #-16]!
|
||||
//VPUSH {D8 - D15}
|
||||
//LDR w5, [SP, #108] //filterStates
|
||||
//sxtw x5,w5
|
||||
//LDR w6, [SP, #112] //timeIn
|
||||
//sxtw x6,w6
|
||||
//LDR w7, [SP, #116] //stride
|
||||
//sxtw x7,w7
|
||||
|
||||
LSL x9, x7, #1
|
||||
|
||||
|
||||
MOV x20, x4
|
||||
ADD x5, x5, #64
|
||||
MOV w10, #3
|
||||
|
||||
//ADD x5, x5, #56
|
||||
//MOV x10, #1
|
||||
////SUB x6, x6, x9
|
||||
//CMP x7, #1
|
||||
//MOV x11, #-8
|
||||
//BGT LOOP_SKIP_ODD
|
||||
|
||||
LOOP:
|
||||
LDRSH w4 , [x6]
|
||||
ADD x6, x6, x9
|
||||
LDRSH w8 , [x6]
|
||||
ADD x6, x6, x9
|
||||
LDRSH w11 , [x6]
|
||||
ADD x6, x6, x9
|
||||
LDRSH w12 , [x6]
|
||||
ADD x6, x6, x9
|
||||
|
||||
STRH w4 , [x5 , #-2]!
|
||||
STRH w8 , [x5 , #-2]!
|
||||
STRH w11 , [x5 , #-2]!
|
||||
STRH w12 , [x5 , #-2]!
|
||||
|
||||
LDRSH w4 , [x6]
|
||||
ADD x6, x6, x9
|
||||
LDRSH w8 , [x6]
|
||||
ADD x6, x6, x9
|
||||
LDRSH w11 , [x6]
|
||||
ADD x6, x6, x9
|
||||
LDRSH w12 , [x6]
|
||||
ADD x6, x6, x9
|
||||
|
||||
STRH w4 , [x5 , #-2]!
|
||||
STRH w8 , [x5 , #-2]!
|
||||
STRH w11 , [x5 , #-2]!
|
||||
STRH w12 , [x5 , #-2]!
|
||||
SUBS w10, w10, #1
|
||||
|
||||
BPL LOOP
|
||||
|
||||
|
||||
//LOOP:
|
||||
// LD1 {v0.4h} , [x6], #8
|
||||
// LD1 {v1.4h} , [x6], #8
|
||||
//
|
||||
// REV64 v4.4h , v0.4h
|
||||
// REV64 v5.4h , v1.4h
|
||||
//
|
||||
// ST1 {v4.4h} , [x5] , x11
|
||||
// ST1 {v5.4h} , [x5] , x11
|
||||
//
|
||||
// LD1 {v2.4h} , [x6], #8
|
||||
// LD1 {v3.4h} , [x6], #8
|
||||
//
|
||||
// REV64 v6.4h , v2.4h
|
||||
// REV64 v7.4h , v3.4h
|
||||
//
|
||||
// ST1 {v6.4h} , [x5] , x11
|
||||
// ST1 {v7.4h} , [x5] , x11
|
||||
//
|
||||
// SUBS x10, x10, #1
|
||||
// BPL LOOP
|
||||
// B SKIP_LOOP
|
||||
//
|
||||
//LOOP_SKIP_ODD:
|
||||
// LD2 {v0.4h , v1.4h} , [x6], #16
|
||||
// LD2 {v2.4h , v3.4h} , [x6], #16
|
||||
//
|
||||
// REV64 v1.4h , v0.4h
|
||||
// REV64 v3.4h , v2.4h
|
||||
//
|
||||
// ST1 {v1.4h} , [x5], x11
|
||||
// ST1 {v3.4h} , [x5], x11
|
||||
//
|
||||
// LD2 {v4.4h , v5.4h} , [x6], #16
|
||||
// LD2 {v6.4h , v7.4h} , [x6], #16
|
||||
//
|
||||
//
|
||||
// REV64 v5.4h , v4.4h
|
||||
// REV64 v7.4h , v6.4h
|
||||
//
|
||||
// ST1 {v5.4h} , [x5], x11
|
||||
// ST1 {v7.4h} , [x5], x11
|
||||
//
|
||||
// SUBS x10, x10, #1
|
||||
// BPL LOOP_SKIP_ODD
|
||||
|
||||
SKIP_LOOP:
|
||||
|
||||
//LDR w4, [SP, #104] //winAdd
|
||||
// sxtw x4,w4
|
||||
|
||||
MOV x4, x20
|
||||
MOV x5, #8
|
||||
LD1 {v0.4h}, [x0], #8
|
||||
MOV x6, #64
|
||||
|
||||
LSL x6, x6, #1
|
||||
LD2 {v1.4h, v2.4h}, [x2], #16
|
||||
MOV x7, #244
|
||||
|
||||
MOV x9, x0
|
||||
ADD x0, x0, #120
|
||||
|
||||
MOV x11, x4
|
||||
LD1 {v2.4h}, [x0], x6
|
||||
ADD x11, x11, #128
|
||||
|
||||
|
||||
|
||||
|
||||
MOV x10, x2
|
||||
ADD x2, x2, #240
|
||||
|
||||
sMULL v30.4s, v0.4h, v1.4h
|
||||
LD2 {v3.4h, v4.4h}, [x2], #16
|
||||
ADD x2, x2, #240
|
||||
|
||||
|
||||
LD1 {v4.4h}, [x0], x6
|
||||
sMLAL v30.4s, v2.4h, v3.4h
|
||||
|
||||
LD2 {v5.4h, v6.4h}, [x2], #16
|
||||
|
||||
|
||||
ADD x2, x2, #240
|
||||
LD1 {v6.4h}, [x0], x6
|
||||
sMLAL v30.4s, v4.4h, v5.4h
|
||||
|
||||
LD2 {v7.4h, v8.4h}, [x2], #16
|
||||
|
||||
|
||||
ADD x2, x2, #240
|
||||
LD1 {v8.4h}, [x0], x6
|
||||
sMLAL v30.4s, v6.4h, v7.4h
|
||||
|
||||
MOV x0, x9
|
||||
LD2 {v9.4h, v10.4h}, [x2], #16
|
||||
|
||||
|
||||
ADD x2, x2, #240
|
||||
LD1 {v10.4h}, [x1], #8
|
||||
sMLAL v30.4s, v8.4h, v9.4h
|
||||
|
||||
|
||||
|
||||
MOV x9, x1
|
||||
LD2 {v11.4h, v12.4h}, [x3], #16
|
||||
ADD x1, x1, #120
|
||||
|
||||
|
||||
MOV x2, x10
|
||||
LD1 {v12.4h}, [x1], x6
|
||||
MOV x10, x3
|
||||
|
||||
ADD x3, x3, #240
|
||||
LD2 {v13.4h, v14.4h}, [x3], #16
|
||||
ADD x3, x3, #240
|
||||
|
||||
|
||||
LD2 {v15.4h, v16.4h}, [x3], #16
|
||||
|
||||
LD1 {v14.4h}, [x1], x6
|
||||
ADD x3, x3, #240
|
||||
|
||||
|
||||
|
||||
LD1 {v16.4h}, [x1], x6
|
||||
SUB x5, x5, #1
|
||||
|
||||
LD2 {v17.4h, v18.4h}, [x3], #16
|
||||
|
||||
|
||||
ADD x3, x3, #240
|
||||
LD1 {v18.4h}, [x1], x6
|
||||
|
||||
MOV x1, x9
|
||||
LD2 {v19.4h, v20.4h}, [x3], #16
|
||||
|
||||
ADD x3, x3, #240
|
||||
|
||||
MOV x3, x10
|
||||
|
||||
|
||||
LOOP_1:
|
||||
|
||||
|
||||
LD1 {v0.4h}, [x0], #8
|
||||
|
||||
MOV x9, x0
|
||||
LD2 {v1.4h, v2.4h}, [x2], #16
|
||||
ADD x0, x0, #120
|
||||
|
||||
MOV x10, x2
|
||||
ST1 { v30.4s}, [x4], #16
|
||||
ADD x2, x2, #240
|
||||
|
||||
|
||||
sMULL v30.4s, v10.4h, v11.4h
|
||||
LD1 {v2.4h}, [x0], x6
|
||||
sMLAL v30.4s, v12.4h, v13.4h
|
||||
|
||||
sMLAL v30.4s, v14.4h, v15.4h
|
||||
LD2 {v3.4h, v4.4h}, [x2], #16
|
||||
sMLAL v30.4s, v16.4h, v17.4h
|
||||
|
||||
sMLAL v30.4s, v18.4h, v19.4h
|
||||
LD1 {v4.4h}, [x0], x6
|
||||
ADD x2, x2, #240
|
||||
|
||||
ST1 { v30.4s}, [x11], #16
|
||||
|
||||
|
||||
sMULL v30.4s, v0.4h, v1.4h
|
||||
LD2 {v5.4h, v6.4h}, [x2], #16
|
||||
sMLAL v30.4s, v2.4h, v3.4h
|
||||
|
||||
|
||||
|
||||
ADD x2, x2, #240
|
||||
LD1 {v6.4h}, [x0], x6
|
||||
sMLAL v30.4s, v4.4h, v5.4h
|
||||
|
||||
LD2 {v7.4h, v8.4h}, [x2], #16
|
||||
|
||||
|
||||
ADD x2, x2, #240
|
||||
LD1 {v8.4h}, [x0], x6
|
||||
sMLAL v30.4s, v6.4h, v7.4h
|
||||
|
||||
MOV x0, x9
|
||||
LD2 {v9.4h, v10.4h}, [x2], #16
|
||||
|
||||
|
||||
|
||||
ADD x2, x2, #240
|
||||
LD1 {v10.4h}, [x1], #8
|
||||
MOV x2, x10
|
||||
|
||||
MOV x9, x1
|
||||
LD2 {v11.4h, v12.4h}, [x3], #16
|
||||
ADD x1, x1, #120
|
||||
|
||||
|
||||
sMLAL v30.4s, v8.4h, v9.4h
|
||||
LD1 {v12.4h}, [x1], x6
|
||||
MOV x10, x3
|
||||
|
||||
|
||||
ADD x3, x3, #240
|
||||
LD2 {v13.4h, v14.4h}, [x3], #16
|
||||
ADD x3, x3, #240
|
||||
|
||||
|
||||
|
||||
LD1 {v14.4h}, [x1], x6
|
||||
LD2 {v15.4h, v16.4h}, [x3], #16
|
||||
ADD x3, x3, #240
|
||||
|
||||
|
||||
LD1 {v16.4h}, [x1], x6
|
||||
LD2 {v17.4h, v18.4h}, [x3], #16
|
||||
ADD x3, x3, #240
|
||||
|
||||
|
||||
LD1 {v18.4h}, [x1], x6
|
||||
SUBS x5, x5, #1
|
||||
|
||||
MOV x1, x9
|
||||
LD2 {v19.4h, v20.4h}, [x3], #16
|
||||
|
||||
ADD x3, x3, #240
|
||||
|
||||
MOV x3, x10
|
||||
|
||||
BGT LOOP_1
|
||||
|
||||
ST1 { v30.4s}, [x4], #16
|
||||
sMULL v30.4s, v10.4h, v11.4h
|
||||
sMLAL v30.4s, v12.4h, v13.4h
|
||||
|
||||
sMLAL v30.4s, v14.4h, v15.4h
|
||||
sMLAL v30.4s, v16.4h, v17.4h
|
||||
sMLAL v30.4s, v18.4h, v19.4h
|
||||
|
||||
ST1 { v30.4s}, [x11], #16
|
||||
|
||||
//VPOP {D8 - D15}
|
||||
// LDMFD sp!, {x4-x12, x15}
|
||||
ldp x19, x20, [sp], #16
|
||||
pop_v_regs
|
||||
ret
|
||||
// ENDP
|
||||
403
decoder/armv8/ixheaacd_sbr_qmfsyn64_winadd.s
Normal file
403
decoder/armv8/ixheaacd_sbr_qmfsyn64_winadd.s
Normal file
|
|
@ -0,0 +1,403 @@
|
|||
///******************************************************************************
|
||||
// *
|
||||
// * Copyright (C) 2018 The Android Open Source Project
|
||||
// *
|
||||
// * Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// * you may not use this file except in compliance with the License.
|
||||
// * You may obtain a copy of the License at:
|
||||
// *
|
||||
// * http://www.apache.org/licenses/LICENSE-2.0
|
||||
// *
|
||||
// * Unless required by applicable law or agreed to in writing, software
|
||||
// * distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// * See the License for the specific language governing permissions and
|
||||
// * limitations under the License.
|
||||
// *
|
||||
// *****************************************************************************
|
||||
// * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
//*/
|
||||
|
||||
|
||||
.macro push_v_regs
|
||||
stp q8, q9, [sp, #-32]!
|
||||
stp q10, q11, [sp, #-32]!
|
||||
stp q12, q13, [sp, #-32]!
|
||||
stp q14, q15, [sp, #-32]!
|
||||
stp x21, x22, [sp, #-16]!
|
||||
stp x23, x24, [sp, #-16]!
|
||||
.endm
|
||||
.macro pop_v_regs
|
||||
ldp x23, x24, [sp], #16
|
||||
ldp x21, x22, [sp], #16
|
||||
ldp q14, q15, [sp], #32
|
||||
ldp q12, q13, [sp], #32
|
||||
ldp q10, q11, [sp], #32
|
||||
ldp q8, q9, [sp], #32
|
||||
.endm
|
||||
|
||||
.macro swp reg1, reg2
|
||||
MOV X16, \reg1
|
||||
MOV \reg1, \reg2
|
||||
MOV \reg2, x16
|
||||
.endm
|
||||
.text
|
||||
.global ixheaacd_sbr_qmfsyn64_winadd
|
||||
|
||||
ixheaacd_sbr_qmfsyn64_winadd:
|
||||
|
||||
push_v_regs
|
||||
|
||||
|
||||
|
||||
MOV w7, #0x8000
|
||||
LD1 {v0.4h}, [x0], #8
|
||||
MOV x12, x2
|
||||
|
||||
dup v30.4s, w7
|
||||
LD1 {v1.4h}, [x2], #8
|
||||
dup v22.4s, w4
|
||||
|
||||
MOV x10, x0
|
||||
MOV x11, x2
|
||||
ADD x0, x0, #504
|
||||
ADD x2, x2, #248
|
||||
|
||||
NEG v28.4s, v22.4s
|
||||
sshL v20.4s, v30.4s, v28.4s
|
||||
MOV x6, #64
|
||||
LSL x6, x6, #1
|
||||
ADD x12, x12, x6
|
||||
MOV x7, #128
|
||||
LSL x9, x7, #1
|
||||
ADD x1, x1, x9
|
||||
MOV x6, #16
|
||||
MOV x7, #128
|
||||
LSL x9, x7, #1
|
||||
MOV x7, #256
|
||||
LSL x8, x7, #1
|
||||
|
||||
LSL x5, x5, #1
|
||||
LD1 {v2.4h}, [x0], x8
|
||||
mov v26.16b, v20.16b
|
||||
|
||||
|
||||
sMLAL v26.4s, v0.4h, v1.4h
|
||||
LD1 {v3.4h}, [x2], x9
|
||||
|
||||
LD1 {v4.4h}, [x0], x8
|
||||
sMLAL v26.4s, v2.4h, v3.4h
|
||||
|
||||
LD1 {v5.4h}, [x2], x9
|
||||
|
||||
LD1 {v6.4h}, [x0], x8
|
||||
sMLAL v26.4s, v5.4h, v4.4h
|
||||
|
||||
LD1 {v7.4h}, [x2], x9
|
||||
|
||||
LD1 {v8.4h}, [x0], x8
|
||||
sMLAL v26.4s, v7.4h, v6.4h
|
||||
|
||||
LD1 {v9.4h}, [x2], x9
|
||||
MOV x0, x10
|
||||
|
||||
|
||||
MOV x2, x11
|
||||
LD1 {v10.4h}, [x1], #8
|
||||
sMLAL v26.4s, v9.4h, v8.4h
|
||||
|
||||
MOV x10, x1
|
||||
LD1 {v11.4h}, [x12], #8
|
||||
ADD x1, x1, #504
|
||||
|
||||
|
||||
|
||||
MOV x11, x12
|
||||
LD1 {v12.4h}, [x1], x8
|
||||
ADD x12, x12, #248
|
||||
|
||||
sMLAL v26.4s, v10.4h, v11.4h
|
||||
LD1 {v13.4h}, [x12], x9
|
||||
|
||||
LD1 {v14.4h}, [x1], x8
|
||||
sMLAL v26.4s, v12.4h, v13.4h
|
||||
|
||||
LD1 {v15.4h}, [x12], x9
|
||||
|
||||
LD1 {v16.4h}, [x1], x8
|
||||
sMLAL v26.4s, v15.4h, v14.4h
|
||||
|
||||
LD1 {v17.4h}, [x12], x9
|
||||
|
||||
LD1 {v18.4h}, [x1], x8
|
||||
sMLAL v26.4s, v17.4h, v16.4h
|
||||
|
||||
LD1 {v19.4h}, [x12], x9
|
||||
|
||||
sMLAL v26.4s, v19.4h, v18.4h
|
||||
LD1 {v0.4h}, [x0], #8
|
||||
MOV x12, x11
|
||||
|
||||
MOV x1, x10
|
||||
LD1 {v1.4h}, [x2], #8
|
||||
MOV x10, x0
|
||||
|
||||
sQshL v26.4s, v26.4s, v22.4s
|
||||
|
||||
ADD x0, x0, #504
|
||||
|
||||
MOV x11, x2
|
||||
LD1 {v2.4h}, [x0], x8
|
||||
ADD x2, x2, #248
|
||||
|
||||
sshR v28.4s, v26.4s, #16
|
||||
LD1 {v3.4h}, [x2], x9
|
||||
|
||||
|
||||
UZP2 v29.8h, v28.8h, v28.8h
|
||||
UZP1 v28.8h, v28.8h, v28.8h
|
||||
mov v26.16b, v20.16b
|
||||
|
||||
|
||||
|
||||
|
||||
LD1 {v4.4h}, [x0], x8
|
||||
LD1 {v5.4h}, [x2], x9
|
||||
|
||||
LD1 {v6.4h}, [x0], x8
|
||||
LD1 {v7.4h}, [x2], x9
|
||||
|
||||
LD1 {v8.4h}, [x0], x8
|
||||
LD1 {v9.4h}, [x2], x9
|
||||
MOV x0, x10
|
||||
|
||||
|
||||
MOV x2, x11
|
||||
LD1 {v10.4h}, [x1], #8
|
||||
|
||||
MOV x10, x1
|
||||
LD1 {v11.4h}, [x12], #8
|
||||
ADD x1, x1, #504
|
||||
|
||||
|
||||
MOV x11, x12
|
||||
LD1 {v12.4h}, [x1], x8
|
||||
ADD x12, x12, #248
|
||||
|
||||
|
||||
LD1 {v13.4h}, [x12], x9
|
||||
|
||||
LD1 {v14.4h}, [x1], x8
|
||||
LD1 {v15.4h}, [x12], x9
|
||||
|
||||
LD1 {v16.4h}, [x1], x8
|
||||
LD1 {v17.4h}, [x12], x9
|
||||
|
||||
LD1 {v18.4h}, [x1], x8
|
||||
SUB x6, x6, #2
|
||||
LD1 {v19.4h}, [x12], x9
|
||||
MOV x1, x10
|
||||
|
||||
MOV x12, x11
|
||||
|
||||
LOOP_1:
|
||||
|
||||
sMLAL v26.4s, v0.4h, v1.4h
|
||||
ST1 {v28.h}[0], [x3], x5
|
||||
|
||||
sMLAL v26.4s, v2.4h, v3.4h
|
||||
LD1 {v0.4h}, [x0], #8
|
||||
sMLAL v26.4s, v5.4h, v4.4h
|
||||
|
||||
sMLAL v26.4s, v7.4h, v6.4h
|
||||
ST1 {v28.h}[1], [x3], x5
|
||||
|
||||
|
||||
MOV x10, x0
|
||||
LD1 {v1.4h}, [x2], #8
|
||||
ADD x0, x0, #504
|
||||
|
||||
sMLAL v26.4s, v9.4h, v8.4h
|
||||
ST1 {v28.h}[2], [x3], x5
|
||||
|
||||
sMLAL v26.4s, v10.4h, v11.4h
|
||||
ST1 {v28.h}[3], [x3], x5
|
||||
|
||||
MOV x11, x2
|
||||
LD1 {v2.4h}, [x0], x8
|
||||
ADD x2, x2, #248
|
||||
|
||||
sMLAL v26.4s, v12.4h, v13.4h
|
||||
LD1 {v3.4h}, [x2], x9
|
||||
sMLAL v26.4s, v15.4h, v14.4h
|
||||
|
||||
sMLAL v26.4s, v17.4h, v16.4h
|
||||
LD1 {v4.4h}, [x0], x8
|
||||
sMLAL v26.4s, v19.4h, v18.4h
|
||||
|
||||
LD1 {v5.4h}, [x2], x9
|
||||
|
||||
LD1 {v6.4h}, [x0], x8
|
||||
sQshL v26.4s, v26.4s, v22.4s
|
||||
|
||||
sshR v28.4s, v26.4s, #16
|
||||
LD1 {v7.4h}, [x2], x9
|
||||
mov v26.16b, v20.16b
|
||||
|
||||
|
||||
UZP2 v29.8h, v28.8h, v28.8h
|
||||
UZP1 v28.8h, v28.8h, v28.8h
|
||||
sMLAL v26.4s, v0.4h, v1.4h
|
||||
|
||||
sMLAL v26.4s, v2.4h, v3.4h
|
||||
LD1 {v8.4h}, [x0], x8
|
||||
sMLAL v26.4s, v5.4h, v4.4h
|
||||
|
||||
sMLAL v26.4s, v7.4h, v6.4h
|
||||
LD1 {v9.4h}, [x2], x9
|
||||
|
||||
|
||||
LD1 {v10.4h}, [x1], #8
|
||||
sMLAL v26.4s, v9.4h, v8.4h
|
||||
|
||||
MOV x2, x11
|
||||
LD1 {v11.4h}, [x12], #8
|
||||
MOV x0, x10
|
||||
|
||||
MOV x10, x1
|
||||
|
||||
ADD x1, x1, #504
|
||||
|
||||
MOV x11, x12
|
||||
LD1 {v12.4h}, [x1], x8
|
||||
ADD x12, x12, #248
|
||||
|
||||
LD1 {v13.4h}, [x12], x9
|
||||
sMLAL v26.4s, v10.4h, v11.4h
|
||||
|
||||
LD1 {v14.4h}, [x1], x8
|
||||
sMLAL v26.4s, v12.4h, v13.4h
|
||||
|
||||
LD1 {v15.4h}, [x12], x9
|
||||
|
||||
LD1 {v16.4h}, [x1], x8
|
||||
sMLAL v26.4s, v15.4h, v14.4h
|
||||
|
||||
LD1 {v17.4h}, [x12], x9
|
||||
|
||||
LD1 {v18.4h}, [x1], x8
|
||||
sMLAL v26.4s, v17.4h, v16.4h
|
||||
|
||||
LD1 {v19.4h}, [x12], x9
|
||||
MOV x1, x10
|
||||
|
||||
sMLAL v26.4s, v19.4h, v18.4h
|
||||
ST1 {v28.h}[0], [x3], x5
|
||||
|
||||
MOV x12, x11
|
||||
LD1 {v0.4h}, [x0], #8
|
||||
|
||||
LD1 {v1.4h}, [x2], #8
|
||||
sQshL v26.4s, v26.4s, v22.4s
|
||||
|
||||
|
||||
ST1 {v28.h}[1], [x3], x5
|
||||
MOV x10, x0
|
||||
|
||||
ST1 {v28.h}[2], [x3], x5
|
||||
ADD x0, x0, #504
|
||||
|
||||
ST1 {v28.h}[3], [x3], x5
|
||||
MOV x11, x2
|
||||
|
||||
sshR v28.4s, v26.4s, #16
|
||||
LD1 {v2.4h}, [x0], x8
|
||||
ADD x2, x2, #248
|
||||
|
||||
LD1 {v3.4h}, [x2], x9
|
||||
LD1 {v4.4h}, [x0], x8
|
||||
LD1 {v5.4h}, [x2], x9
|
||||
LD1 {v6.4h}, [x0], x8
|
||||
LD1 {v7.4h}, [x2], x9
|
||||
LD1 {v8.4h}, [x0], x8
|
||||
LD1 {v9.4h}, [x2], x9
|
||||
|
||||
UZP2 v29.8h, v28.8h, v28.8h
|
||||
UZP1 v28.8h, v28.8h, v28.8h
|
||||
mov v26.16b, v20.16b
|
||||
|
||||
|
||||
|
||||
|
||||
MOV x0, x10
|
||||
LD1 {v10.4h}, [x1], #8
|
||||
MOV x2, x11
|
||||
|
||||
MOV x10, x1
|
||||
LD1 {v11.4h}, [x12], #8
|
||||
ADD x1, x1, #504
|
||||
|
||||
|
||||
MOV x11, x12
|
||||
LD1 {v12.4h}, [x1], x8
|
||||
ADD x12, x12, #248
|
||||
|
||||
|
||||
LD1 {v13.4h}, [x12], x9
|
||||
|
||||
LD1 {v14.4h}, [x1], x8
|
||||
LD1 {v15.4h}, [x12], x9
|
||||
|
||||
LD1 {v16.4h}, [x1], x8
|
||||
LD1 {v17.4h}, [x12], x9
|
||||
|
||||
SUBS x6, x6, #2
|
||||
LD1 {v18.4h}, [x1], x8
|
||||
|
||||
MOV x1, x10
|
||||
LD1 {v19.4h}, [x12], x9
|
||||
|
||||
MOV x12, x11
|
||||
|
||||
|
||||
BGT LOOP_1
|
||||
|
||||
sMLAL v26.4s, v0.4h, v1.4h
|
||||
ST1 {v28.h}[0], [x3], x5
|
||||
sMLAL v26.4s, v2.4h, v3.4h
|
||||
|
||||
sMLAL v26.4s, v5.4h, v4.4h
|
||||
ST1 {v28.h}[1], [x3], x5
|
||||
sMLAL v26.4s, v7.4h, v6.4h
|
||||
|
||||
sMLAL v26.4s, v9.4h, v8.4h
|
||||
ST1 {v28.h}[2], [x3], x5
|
||||
sMLAL v26.4s, v10.4h, v11.4h
|
||||
|
||||
sMLAL v26.4s, v12.4h, v13.4h
|
||||
ST1 {v28.h}[3], [x3], x5
|
||||
sMLAL v26.4s, v15.4h, v14.4h
|
||||
|
||||
|
||||
|
||||
sMLAL v26.4s, v17.4h, v16.4h
|
||||
|
||||
sMLAL v26.4s, v19.4h, v18.4h
|
||||
|
||||
sQshL v26.4s, v26.4s, v22.4s
|
||||
|
||||
sshR v28.4s, v26.4s, #16
|
||||
|
||||
UZP2 v29.8h, v28.8h, v28.8h
|
||||
UZP1 v28.8h, v28.8h, v28.8h
|
||||
|
||||
|
||||
ST1 {v28.h}[0], [x3], x5
|
||||
ST1 {v28.h}[1], [x3], x5
|
||||
ST1 {v28.h}[2], [x3], x5
|
||||
ST1 {v28.h}[3], [x3], x5
|
||||
|
||||
|
||||
pop_v_regs
|
||||
ret
|
||||
|
||||
73
decoder/armv8/ixheaacd_shiftrountine_with_round.s
Normal file
73
decoder/armv8/ixheaacd_shiftrountine_with_round.s
Normal file
|
|
@ -0,0 +1,73 @@
|
|||
.macro push_v_regs
|
||||
stp X8, X9, [sp, #-16]!
|
||||
stp X10, X11, [sp, #-16]!
|
||||
stp X12, X13, [sp, #-16]!
|
||||
stp X14, X15, [sp, #-16]!
|
||||
stp X20, X21, [sp, #-16]!
|
||||
stp X26, X17, [sp, #-16]!
|
||||
stp X27, X28, [sp, #-16]!
|
||||
stp q2, q3, [sp, #-32]!
|
||||
stp q0, q1, [sp, #-32]!
|
||||
.endm
|
||||
.macro pop_v_regs
|
||||
ldp q0, q1, [sp], #32
|
||||
ldp q2, q3, [sp], #32
|
||||
ldp X27, X28, [sp], #16
|
||||
ldp X26, X17, [sp], #16
|
||||
ldp X20, X21, [sp], #16
|
||||
ldp X14, X15, [sp], #16
|
||||
ldp X12, X13, [sp], #16
|
||||
ldp X10, X11, [sp], #16
|
||||
ldp X8, X9, [sp], #16
|
||||
.endm
|
||||
|
||||
.text
|
||||
.p2align 2
|
||||
.global ixheaacd_shiftrountine_with_rnd
|
||||
ixheaacd_shiftrountine_with_rnd:
|
||||
push_v_regs
|
||||
|
||||
ADD x12, x2, x3, LSL #1
|
||||
MOV W9, #0x00008000
|
||||
DUP V0.4s, w9
|
||||
MOVI v3.4s, #10
|
||||
MOV W27, #0x80000000
|
||||
MOV W28, #0x7fffffff
|
||||
MOV W26, #0
|
||||
SUBS W3, W3, #1
|
||||
BMI S_WITH_R_L6
|
||||
|
||||
S_WITH_R_L5:
|
||||
LDR w5, [x1, x3, LSL #2] //i2 = qmfImag[j]
|
||||
LDR w7, [x0, x3, LSL #2] //x2 = qmfReal[j]
|
||||
LDR w14, [x0], #4 //x1 = *qmfReal
|
||||
LDR w10, [x1], #4 //i1 = *qmfImag
|
||||
|
||||
ADD w6, w5, w7 //*qmfImag++ = add32(i2, x2)
|
||||
SUB w5, w5, w7 //qmfReal[j] = sub32(i2, x2)
|
||||
ADD w7, w10, w14 //qmfImag[j] = add32(i1, x1)
|
||||
SUB w4, w10, w14 //*qmfReal++ = sub32(i1, x1)
|
||||
|
||||
MOV v1.s[0], W4 //QADD x4, x4, x9
|
||||
MOV v1.s[1], W5 //QADD x4, x4, x9
|
||||
MOV v1.s[2], W6 //QADD x4, x4, x9
|
||||
MOV v1.s[3], W7 //QADD x4, x4, x9
|
||||
lsl w14, w3, #1
|
||||
|
||||
SQSHL v1.4s, v1.4s, v3.4s
|
||||
ADD X17, X2, X14
|
||||
|
||||
SQADD v2.4s, v1.4s, v0.4s
|
||||
|
||||
ST1 {v2.h}[1], [x2], #2
|
||||
ST1 {v2.h}[3], [X17]
|
||||
ADD X17, X12, X14
|
||||
ST1 {v2.h}[7], [x17] //STRH w7, [x12, x14]
|
||||
ST1 {v2.h}[5], [x12], #2 //STRH w6, [x12], #2
|
||||
|
||||
SUBS x3, x3, #2
|
||||
|
||||
BGE S_WITH_R_L5
|
||||
S_WITH_R_L6:
|
||||
pop_v_regs
|
||||
ret
|
||||
79
decoder/armv8/ixheaacd_shiftrountine_with_round_eld.s
Normal file
79
decoder/armv8/ixheaacd_shiftrountine_with_round_eld.s
Normal file
|
|
@ -0,0 +1,79 @@
|
|||
.macro push_v_regs
|
||||
stp X8, X9, [sp, #-16]!
|
||||
stp X10, X11, [sp, #-16]!
|
||||
stp X12, X13, [sp, #-16]!
|
||||
stp X14, X15, [sp, #-16]!
|
||||
stp X20, X21, [sp, #-16]!
|
||||
stp X26, X17, [sp, #-16]!
|
||||
stp X27, X28, [sp, #-16]!
|
||||
stp q2, q3, [sp, #-32]!
|
||||
stp q0, q1, [sp, #-32]!
|
||||
.endm
|
||||
.macro pop_v_regs
|
||||
ldp q0, q1, [sp], #32
|
||||
ldp q2, q3, [sp], #32
|
||||
ldp X27, X28, [sp], #16
|
||||
ldp X26, X17, [sp], #16
|
||||
ldp X20, X21, [sp], #16
|
||||
ldp X14, X15, [sp], #16
|
||||
ldp X12, X13, [sp], #16
|
||||
ldp X10, X11, [sp], #16
|
||||
ldp X8, X9, [sp], #16
|
||||
.endm
|
||||
|
||||
.text
|
||||
.p2align 2
|
||||
.global ixheaacd_shiftrountine_with_rnd_eld
|
||||
ixheaacd_shiftrountine_with_rnd_eld:
|
||||
push_v_regs
|
||||
|
||||
ADD x12, x2, x3, LSL #1
|
||||
MOV W9, #0x00008000
|
||||
DUP V0.4s, w9
|
||||
MOVI v3.4s, #9
|
||||
MOV W27, #0x80000000
|
||||
MOV W28, #0x7fffffff
|
||||
MOV W26, #0
|
||||
SUBS W3, W3, #1
|
||||
BMI S_WITH_R_L6
|
||||
|
||||
S_WITH_R_L5:
|
||||
LDR w5, [x1, x3, LSL #2] //i2 = qmfImag[j]
|
||||
LDR w7, [x0, x3, LSL #2] //x2 = qmfReal[j]
|
||||
LDR w14, [x0], #4 //x1 = *qmfReal
|
||||
LDR w10, [x1], #4 //i1 = *qmfImag
|
||||
|
||||
ADD w6, w5, w7 //*qmfImag++ = add32(i2, x2)
|
||||
MVN w6, w6
|
||||
ADD w6, w6, #1
|
||||
SUB w5, w7, w5 //qmfReal[j] = sub32(i2, x2)
|
||||
ADD w7, w10, w14 //qmfImag[j] = add32(i1, x1)
|
||||
MVN w7, w7
|
||||
ADD w7, w7, #1
|
||||
SUB w4, w14, w10 //*qmfReal++ = sub32(i1, x1)
|
||||
|
||||
|
||||
|
||||
MOV v1.s[0], W4 //QADD x4, x4, x9
|
||||
MOV v1.s[1], W5 //QADD x4, x4, x9
|
||||
MOV v1.s[2], W6 //QADD x4, x4, x9
|
||||
MOV v1.s[3], W7 //QADD x4, x4, x9
|
||||
lsl w14, w3, #1
|
||||
|
||||
SQSHL v1.4s, v1.4s, v3.4s
|
||||
ADD X17, X2, X14
|
||||
|
||||
SQADD v2.4s, v1.4s, v0.4s
|
||||
|
||||
ST1 {v2.h}[1], [x2], #2
|
||||
ST1 {v2.h}[3], [X17]
|
||||
ADD X17, X12, X14
|
||||
ST1 {v2.h}[7], [x17] //STRH w7, [x12, x14]
|
||||
ST1 {v2.h}[5], [x12], #2 //STRH w6, [x12], #2
|
||||
|
||||
SUBS x3, x3, #2
|
||||
|
||||
BGE S_WITH_R_L5
|
||||
S_WITH_R_L6:
|
||||
pop_v_regs
|
||||
ret
|
||||
106
decoder/drc_src/impd_apicmd_standards.h
Normal file
106
decoder/drc_src/impd_apicmd_standards.h
Normal file
|
|
@ -0,0 +1,106 @@
|
|||
/******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2018 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
|
||||
#ifndef IMPD_API_CMD_STANDARDS_H
|
||||
#define IMPD_API_CMD_STANDARDS_H
|
||||
|
||||
/*****************************************************************************/
|
||||
/* Ittiam standard API commands */
|
||||
/*****************************************************************************/
|
||||
#define IA_API_CMD_GET_LIB_ID_STRINGS 0x0001
|
||||
|
||||
#define IA_API_CMD_GET_API_SIZE 0x0002
|
||||
#define IA_API_CMD_INIT 0x0003
|
||||
|
||||
#define IA_API_CMD_SET_CONFIG_PARAM 0x0004
|
||||
#define IA_API_CMD_GET_CONFIG_PARAM 0x0005
|
||||
|
||||
#define IA_API_CMD_GET_MEMTABS_SIZE 0x0006
|
||||
#define IA_API_CMD_SET_MEMTABS_PTR 0x0007
|
||||
#define IA_API_CMD_GET_N_MEMTABS 0x0008
|
||||
|
||||
#define IA_API_CMD_EXECUTE 0x0009
|
||||
|
||||
#define IA_API_CMD_PUT_INPUT_QUERY 0x000A
|
||||
#define IA_API_CMD_GET_CURIDX_INPUT_BUF 0x000B
|
||||
#define IA_API_CMD_SET_INPUT_BYTES 0x000C
|
||||
#define IA_API_CMD_GET_OUTPUT_BYTES 0x000D
|
||||
#define IA_API_CMD_RESET 0x0010
|
||||
|
||||
#define IA_API_CMD_GET_MEM_INFO_SIZE 0x0011
|
||||
#define IA_API_CMD_GET_MEM_INFO_ALIGNMENT 0x0012
|
||||
#define IA_API_CMD_GET_MEM_INFO_TYPE 0x0013
|
||||
#define IA_API_CMD_GET_MEM_INFO_PLACEMENT 0x0014
|
||||
#define IA_API_CMD_GET_MEM_INFO_PRIORITY 0x0015
|
||||
#define IA_API_CMD_SET_MEM_PTR 0x0016
|
||||
#define IA_API_CMD_SET_MEM_INFO_SIZE 0x0017
|
||||
#define IA_API_CMD_SET_MEM_PLACEMENT 0x0018
|
||||
|
||||
#define IA_API_CMD_GET_N_TABLES 0x0019
|
||||
#define IA_API_CMD_GET_TABLE_INFO_SIZE 0x001A
|
||||
#define IA_API_CMD_GET_TABLE_INFO_ALIGNMENT 0x001B
|
||||
#define IA_API_CMD_GET_TABLE_INFO_PRIORITY 0x001C
|
||||
#define IA_API_CMD_SET_TABLE_PTR 0x001D
|
||||
#define IA_API_CMD_GET_TABLE_PTR 0x001E
|
||||
|
||||
#define IA_API_CMD_INPUT_OVER 0x0020
|
||||
#define IA_API_CMD_INPUT_OVER_BS 0x0021
|
||||
#define IA_API_CMD_INPUT_OVER_IC_BS 0x0022
|
||||
#define IA_API_CMD_INPUT_OVER_IG_BS 0x0023
|
||||
#define IA_API_CMD_INPUT_OVER_IL_BS 0x0024
|
||||
#define IA_API_CMD_INPUT_OVER_IN_BS 0x0025
|
||||
|
||||
#define IA_API_CMD_SET_INPUT_BYTES_BS 0x0026
|
||||
#define IA_API_CMD_SET_INPUT_BYTES_IC_BS 0x0027
|
||||
#define IA_API_CMD_SET_INPUT_BYTES_IG_BS 0x0028
|
||||
#define IA_API_CMD_SET_INPUT_BYTES_IL_BS 0x0029
|
||||
#define IA_API_CMD_SET_INPUT_BYTES_IN_BS 0x002A
|
||||
/*****************************************************************************/
|
||||
/* Ittiam standard API command indices */
|
||||
/*****************************************************************************/
|
||||
/* IA_API_CMD_GET_LIB_ID_STRINGS indices */
|
||||
#define IA_CMD_TYPE_LIB_NAME 0x0100
|
||||
#define IA_CMD_TYPE_LIB_VERSION 0x0200
|
||||
#define IA_CMD_TYPE_API_VERSION 0x0300
|
||||
|
||||
/* IA_API_CMD_INIT indices */
|
||||
#define IA_CMD_TYPE_INIT_API_PRE_CONFIG_PARAMS 0x0100
|
||||
#define IA_CMD_TYPE_INIT_API_POST_CONFIG_PARAMS 0x0200
|
||||
#define IA_CMD_TYPE_INIT_PROCESS 0x0300
|
||||
#define IA_CMD_TYPE_INIT_DONE_QUERY 0x0400
|
||||
|
||||
#define IA_CMD_TYPE_INIT_CPY_BSF_BUFF 0x0201
|
||||
#define IA_CMD_TYPE_INIT_CPY_IC_BSF_BUFF 0x0202
|
||||
#define IA_CMD_TYPE_INIT_CPY_IL_BSF_BUFF 0x0203
|
||||
#define IA_CMD_TYPE_INIT_CPY_IG_BSF_BUFF 0x0204
|
||||
#define IA_CMD_TYPE_INIT_CPY_IN_BSF_BUFF 0x0205
|
||||
|
||||
#define IA_CMD_TYPE_INIT_CPY_BSF_BUFF_OVER_QUERY 0x0206
|
||||
#define IA_CMD_TYPE_INIT_CPY_IC_BSF_BUFF_OVER_QUERY 0x0207
|
||||
#define IA_CMD_TYPE_INIT_CPY_IL_BSF_BUFF_OVER_QUERY 0x0208
|
||||
#define IA_CMD_TYPE_INIT_CPY_IG_BSF_BUFF_OVER_QUERY 0x0209
|
||||
#define IA_CMD_TYPE_INIT_CPY_IN_BSF_BUFF_OVER_QUERY 0x020A
|
||||
#define IA_CMD_TYPE_INIT_SET_BUFF_PTR 0x020B
|
||||
|
||||
/* IA_API_CMD_EXECUTE indices */
|
||||
#define IA_CMD_TYPE_DO_EXECUTE 0x0100
|
||||
#define IA_CMD_TYPE_DONE_QUERY 0x0200
|
||||
|
||||
#endif
|
||||
699
decoder/drc_src/impd_drc_api.c
Normal file
699
decoder/drc_src/impd_drc_api.c
Normal file
|
|
@ -0,0 +1,699 @@
|
|||
/******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2018 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "impd_type_def.h"
|
||||
#include "impd_error_standards.h"
|
||||
#include "impd_apicmd_standards.h"
|
||||
#include "impd_memory_standards.h"
|
||||
|
||||
|
||||
#include "impd_drc_bitbuffer.h"
|
||||
#include "impd_drc_extr_delta_coded_info.h"
|
||||
#include "impd_drc_common.h"
|
||||
#include "impd_drc_struct.h"
|
||||
#include "impd_drc_interface.h"
|
||||
#include "impd_parametric_drc_dec.h"
|
||||
#include "impd_drc_gain_dec.h"
|
||||
#include "impd_drc_filter_bank.h"
|
||||
#include "impd_drc_multi_band.h"
|
||||
#include "impd_drc_process_audio.h"
|
||||
#include "impd_drc_eq.h"
|
||||
#include "impd_drc_gain_decoder.h"
|
||||
#include "impd_drc_config_params.h"
|
||||
#include "impd_drc_api_defs.h"
|
||||
#include "impd_drc_definitions.h"
|
||||
#include "impd_drc_hashdefines.h"
|
||||
#include "impd_drc_peak_limiter.h"
|
||||
|
||||
#include "impd_drc_selection_process.h"
|
||||
#include "impd_drc_api_struct_def.h"
|
||||
#include "impd_drc_error_codes.h"
|
||||
|
||||
WORD32 impd_init_process_audio_main_qmf(ia_drc_api_struct *p_obj_drc);
|
||||
WORD32 impd_init_process_audio_main_stft(ia_drc_api_struct *p_obj_drc);
|
||||
WORD32 impd_init_process_audio_main_td_qmf(ia_drc_api_struct *p_obj_drc);
|
||||
|
||||
IA_ERRORCODE impd_drc_mem_api(ia_drc_api_struct *p_obj_drc,
|
||||
WORD32 i_cmd, WORD32 i_idx, pVOID pv_value);
|
||||
|
||||
IA_ERRORCODE impd_drc_fill_mem_tables(ia_drc_api_struct *p_obj_drc);
|
||||
|
||||
|
||||
|
||||
VOID impd_drc_set_default_config_params(ia_drc_config_struct* ptr_config);
|
||||
|
||||
|
||||
IA_ERRORCODE impd_drc_process_frame(ia_drc_api_struct *p_obj_drc);
|
||||
IA_ERRORCODE impd_drc_init(ia_drc_api_struct *p_obj_drc);
|
||||
IA_ERRORCODE impd_drc_set_default_config(ia_drc_api_struct *p_obj_drc);
|
||||
IA_ERRORCODE impd_drc_set_struct_pointer(ia_drc_api_struct *p_obj_drc);
|
||||
IA_ERRORCODE impd_process_time_domain(ia_drc_api_struct *p_obj_drc);
|
||||
|
||||
#define NUM_DRC_TABLES 4
|
||||
#define SCRATCH_MEM_SIZE 1024*256*64
|
||||
|
||||
|
||||
|
||||
IA_ERRORCODE ia_drc_dec_api(pVOID p_ia_drc_dec_obj,
|
||||
WORD32 i_cmd,
|
||||
WORD32 i_idx,
|
||||
pVOID pv_value)
|
||||
{
|
||||
ia_drc_api_struct *p_obj_drc = p_ia_drc_dec_obj;
|
||||
IA_ERRORCODE error_code=IA_NO_ERROR;
|
||||
LOOPIDX i;
|
||||
|
||||
pUWORD32 pui_value = pv_value;
|
||||
pUWORD32 pus_value = pv_value;
|
||||
pWORD8 pb_value = pv_value;
|
||||
SIZE_T *ps_value = pv_value;
|
||||
|
||||
switch(i_cmd)
|
||||
{
|
||||
case IA_API_CMD_GET_MEM_INFO_SIZE:
|
||||
case IA_API_CMD_GET_MEM_INFO_ALIGNMENT:
|
||||
case IA_API_CMD_GET_MEM_INFO_TYPE:
|
||||
case IA_API_CMD_GET_MEM_INFO_PLACEMENT:
|
||||
case IA_API_CMD_GET_MEM_INFO_PRIORITY:
|
||||
case IA_API_CMD_SET_MEM_PTR:
|
||||
case IA_API_CMD_SET_MEM_PLACEMENT:
|
||||
{
|
||||
return impd_drc_mem_api(p_ia_drc_dec_obj, i_cmd, i_idx, pv_value);
|
||||
}
|
||||
};
|
||||
|
||||
switch(i_cmd)
|
||||
{
|
||||
case IA_API_CMD_GET_LIB_ID_STRINGS:
|
||||
{
|
||||
switch(i_idx)
|
||||
{
|
||||
case IA_CMD_TYPE_LIB_NAME:
|
||||
{
|
||||
WORD8 lib_name[] = LIBNAME;
|
||||
for(i = 0; i < IA_API_STR_LEN && lib_name[i -1] != 0; i++)
|
||||
{
|
||||
pb_value[i] = lib_name[i];
|
||||
}
|
||||
break;
|
||||
}
|
||||
case IA_CMD_TYPE_LIB_VERSION:
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
case IA_CMD_TYPE_API_VERSION:
|
||||
{
|
||||
|
||||
}
|
||||
default:
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
};
|
||||
break;
|
||||
}
|
||||
case IA_API_CMD_GET_API_SIZE:
|
||||
{
|
||||
*pui_value = sizeof(ia_drc_api_struct)+(sizeof(ia_drc_state_struct)+8)+8080*1024;
|
||||
|
||||
break;
|
||||
}
|
||||
case IA_API_CMD_INIT:
|
||||
{
|
||||
switch(i_idx)
|
||||
{
|
||||
case IA_CMD_TYPE_INIT_SET_BUFF_PTR:
|
||||
{
|
||||
p_obj_drc->p_state->persistant_ptr=p_obj_drc->pp_mem[IA_DRC_PERSIST_IDX];
|
||||
impd_drc_set_struct_pointer(p_obj_drc);
|
||||
|
||||
break;
|
||||
}
|
||||
case IA_CMD_TYPE_INIT_API_PRE_CONFIG_PARAMS:
|
||||
{
|
||||
impd_drc_set_default_config(p_obj_drc);
|
||||
break;
|
||||
}
|
||||
case IA_CMD_TYPE_INIT_API_POST_CONFIG_PARAMS:
|
||||
{
|
||||
p_obj_drc->p_state=(ia_drc_state_struct *)((SIZE_T)p_obj_drc+8000*1024);
|
||||
p_obj_drc->p_mem_info=(ia_mem_info_struct *)((SIZE_T)p_obj_drc+8002*1024);
|
||||
p_obj_drc->pp_mem=(pVOID)((SIZE_T)p_obj_drc+8006*1024);
|
||||
impd_drc_fill_mem_tables(p_obj_drc);
|
||||
break;
|
||||
}
|
||||
case IA_CMD_TYPE_INIT_PROCESS:
|
||||
{
|
||||
IA_ERRORCODE Error=0;
|
||||
|
||||
if(p_obj_drc->pp_mem[IA_DRC_PERSIST_IDX] == 0)
|
||||
{
|
||||
return(-1);
|
||||
}
|
||||
|
||||
|
||||
Error = impd_drc_init(p_obj_drc);
|
||||
if(Error)
|
||||
return Error;
|
||||
p_obj_drc->p_state->ui_init_done = 1;
|
||||
return Error;
|
||||
break;
|
||||
}
|
||||
case IA_CMD_TYPE_INIT_DONE_QUERY:
|
||||
{
|
||||
|
||||
if(p_obj_drc->p_state->ui_init_done == 1)
|
||||
{
|
||||
*pui_value = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
*pui_value = 0;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case IA_CMD_TYPE_INIT_CPY_BSF_BUFF_OVER_QUERY:
|
||||
{
|
||||
*pui_value = p_obj_drc->str_bit_handler.cpy_over;
|
||||
break;
|
||||
}
|
||||
case IA_CMD_TYPE_INIT_CPY_IC_BSF_BUFF_OVER_QUERY:
|
||||
{
|
||||
*pui_value = p_obj_drc->str_bit_handler.cpy_over_ic;
|
||||
break;
|
||||
}
|
||||
|
||||
case IA_CMD_TYPE_INIT_CPY_IL_BSF_BUFF_OVER_QUERY:
|
||||
{
|
||||
*pui_value = p_obj_drc->str_bit_handler.cpy_over_il;
|
||||
break;
|
||||
}
|
||||
case IA_CMD_TYPE_INIT_CPY_IN_BSF_BUFF_OVER_QUERY:
|
||||
{
|
||||
*pui_value = p_obj_drc->str_bit_handler.cpy_over_in;
|
||||
break;
|
||||
}
|
||||
case IA_CMD_TYPE_INIT_CPY_BSF_BUFF:
|
||||
{
|
||||
memcpy(p_obj_drc->str_bit_handler.it_bit_buf+p_obj_drc->str_bit_handler.num_bytes_bs, p_obj_drc->pp_mem[2],p_obj_drc->str_bit_handler.num_byts_cur);
|
||||
p_obj_drc->str_bit_handler.num_bytes_bs=p_obj_drc->str_bit_handler.num_bytes_bs+p_obj_drc->str_bit_handler.num_byts_cur;
|
||||
break;
|
||||
|
||||
}
|
||||
case IA_CMD_TYPE_INIT_CPY_IC_BSF_BUFF:
|
||||
{
|
||||
memcpy(p_obj_drc->str_bit_handler.bitstream_drc_config+p_obj_drc->str_bit_handler.num_bytes_bs_drc_config, p_obj_drc->pp_mem[2],p_obj_drc->str_bit_handler.num_byts_cur_ic);
|
||||
p_obj_drc->str_bit_handler.num_bytes_bs_drc_config=p_obj_drc->str_bit_handler.num_bytes_bs_drc_config+p_obj_drc->str_bit_handler.num_byts_cur_ic;
|
||||
break;
|
||||
|
||||
}
|
||||
case IA_CMD_TYPE_INIT_CPY_IL_BSF_BUFF:
|
||||
{
|
||||
memcpy(p_obj_drc->str_bit_handler.bitstream_loudness_info+p_obj_drc->str_bit_handler.num_bytes_bs_loudness_info, p_obj_drc->pp_mem[2],p_obj_drc->str_bit_handler.num_byts_cur_il);
|
||||
p_obj_drc->str_bit_handler.num_bytes_bs_loudness_info=p_obj_drc->str_bit_handler.num_bytes_bs_loudness_info+p_obj_drc->str_bit_handler.num_byts_cur_il;
|
||||
break;
|
||||
|
||||
}
|
||||
case IA_CMD_TYPE_INIT_CPY_IN_BSF_BUFF:
|
||||
{
|
||||
memcpy(p_obj_drc->str_bit_handler.bitstream_unidrc_interface+p_obj_drc->str_bit_handler.num_bytes_bs_unidrc_interface, p_obj_drc->pp_mem[2],p_obj_drc->str_bit_handler.num_byts_cur_in);
|
||||
p_obj_drc->str_bit_handler.num_bytes_bs_unidrc_interface=p_obj_drc->str_bit_handler.num_bytes_bs_unidrc_interface+p_obj_drc->str_bit_handler.num_byts_cur_in;
|
||||
break;
|
||||
|
||||
}
|
||||
default:
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
};
|
||||
break;
|
||||
}
|
||||
case IA_API_CMD_GET_CONFIG_PARAM:
|
||||
{
|
||||
switch(i_idx)
|
||||
{
|
||||
|
||||
case IA_DRC_DEC_CONFIG_PARAM_SAMP_FREQ:
|
||||
{
|
||||
|
||||
*pus_value=p_obj_drc->str_config.sampling_rate;
|
||||
break;
|
||||
}
|
||||
|
||||
case IA_DRC_DEC_CONFIG_PARAM_NUM_CHANNELS:
|
||||
{
|
||||
|
||||
*pus_value=p_obj_drc->str_config.num_ch_out;
|
||||
break;
|
||||
}
|
||||
case IA_DRC_DEC_CONFIG_PROC_OUT_PTR:
|
||||
{
|
||||
|
||||
*ps_value=(SIZE_T)p_obj_drc->str_payload.pstr_drc_sel_proc_output;
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
break;
|
||||
}
|
||||
case IA_API_CMD_SET_CONFIG_PARAM:
|
||||
{
|
||||
switch(i_idx)
|
||||
{
|
||||
case IA_DRC_DEC_CONFIG_PARAM_DEC_TYPE:
|
||||
{
|
||||
if(*pus_value==1){
|
||||
p_obj_drc->str_config.dec_type = DEC_TYPE_TD_QMF64;
|
||||
p_obj_drc->str_config.sub_band_domain_mode = SUBBAND_DOMAIN_MODE_QMF64;
|
||||
p_obj_drc->str_config.sub_band_down_sampling_factor = AUDIO_CODEC_SUBBAND_DOWNSAMPLING_FACTOR_QMF64;
|
||||
p_obj_drc->str_config.sub_band_count = AUDIO_CODEC_SUBBAND_COUNT_QMF64;
|
||||
}
|
||||
else if(*pus_value==2){
|
||||
p_obj_drc->str_config.dec_type = DEC_TYPE_QMF64;
|
||||
p_obj_drc->str_config.sub_band_domain_mode = SUBBAND_DOMAIN_MODE_QMF64;
|
||||
p_obj_drc->str_config.sub_band_down_sampling_factor = AUDIO_CODEC_SUBBAND_DOWNSAMPLING_FACTOR_QMF64;
|
||||
p_obj_drc->str_config.sub_band_count = AUDIO_CODEC_SUBBAND_COUNT_QMF64;
|
||||
}
|
||||
else if(*pus_value==3){
|
||||
p_obj_drc->str_config.dec_type = DEC_TYPE_STFT256;
|
||||
p_obj_drc->str_config.sub_band_domain_mode = SUBBAND_DOMAIN_MODE_STFT256;
|
||||
p_obj_drc->str_config.sub_band_down_sampling_factor = AUDIO_CODEC_SUBBAND_DOWNSAMPLING_FACTOR_STFT256;
|
||||
p_obj_drc->str_config.sub_band_count = AUDIO_CODEC_SUBBAND_COUNT_STFT256;
|
||||
}
|
||||
else{
|
||||
p_obj_drc->str_config.dec_type = DEC_TYPE_TD;
|
||||
p_obj_drc->str_config.sub_band_domain_mode = SUBBAND_DOMAIN_MODE_OFF;
|
||||
}
|
||||
|
||||
if(*pus_value<0 || *pus_value >3)
|
||||
{
|
||||
return IA_DRC_DEC_CONFIG_NON_FATAL_INVALID_DECODE_TYPE;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case IA_DRC_DEC_CONFIG_PARAM_CTRL_PARAM:
|
||||
{
|
||||
if(*pus_value<1 || *pus_value>39)
|
||||
{
|
||||
return IA_DRC_DEC_CONFIG_NON_FATAL_INVALID_CTRL_PARAM_IDX;
|
||||
}
|
||||
p_obj_drc->str_config.control_parameter_index = *pus_value;
|
||||
break;
|
||||
}
|
||||
case IA_DRC_DEC_CONFIG_PARAM_PEAK_LIMITER:
|
||||
{
|
||||
if(*pus_value<0 || *pus_value>1)
|
||||
{
|
||||
return IA_DRC_DEC_CONFIG_NON_FATAL_INVALID_PEAK_LIM_FLAG;
|
||||
}
|
||||
p_obj_drc->str_config.peak_limiter = *pus_value;
|
||||
break;
|
||||
}
|
||||
|
||||
case IA_DRC_DEC_CONFIG_PARAM_VER_MODE:
|
||||
{
|
||||
break;
|
||||
}
|
||||
case IA_DRC_DEC_CONFIG_PARAM_SAMP_FREQ:
|
||||
{
|
||||
if(*pus_value<8000 || *pus_value>96000)
|
||||
{
|
||||
return IA_DRC_DEC_CONFIG_NON_FATAL_INVALID_SAMP_FREQ;
|
||||
}
|
||||
p_obj_drc->str_config.sampling_rate = *pus_value;
|
||||
break;
|
||||
}
|
||||
case IA_DRC_DEC_CONFIG_PARAM_NUM_CHANNELS:
|
||||
{
|
||||
p_obj_drc->str_config.num_ch_in = *pus_value;
|
||||
if(*pus_value < 1 || *pus_value > MAX_CHANNEL_COUNT)
|
||||
{
|
||||
return IA_DRC_DEC_CONFIG_NON_FATAL_INVALID_NUM_OF_CHANNELS;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case IA_DRC_DEC_CONFIG_PARAM_PCM_WDSZ:
|
||||
{
|
||||
if((*pus_value!=16) && (*pus_value!=32))
|
||||
{
|
||||
return IA_DRC_DEC_CONFIG_NON_FATAL_INVALID_PCM_SIZE;
|
||||
}
|
||||
|
||||
p_obj_drc->str_config.pcm_size = *pus_value;
|
||||
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case IA_DRC_DEC_CONFIG_PARAM_BITS_FORMAT:
|
||||
{
|
||||
if((*pus_value!=1)&&(*pus_value!=0))
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
p_obj_drc->str_config.bitstream_file_format = *pus_value;
|
||||
|
||||
break;
|
||||
}
|
||||
case IA_DRC_DEC_CONFIG_PARAM_INT_PRESENT:
|
||||
{
|
||||
if((*pus_value!=1)&&(*pus_value!=0))
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
p_obj_drc->str_config.interface_bitstream_present = *pus_value;
|
||||
|
||||
break;
|
||||
}
|
||||
case IA_DRC_DEC_CONFIG_PARAM_DELAY_MODE:
|
||||
{
|
||||
if((*pus_value!=1)&&(*pus_value!=0))
|
||||
{
|
||||
return IA_DRC_DEC_CONFIG_NON_FATAL_INVALID_DELAY_MODE;
|
||||
}
|
||||
p_obj_drc->str_config.delay_mode = *pus_value;
|
||||
|
||||
break;
|
||||
}
|
||||
case IA_DRC_DEC_CONFIG_PARAM_GAIN_DELAY:
|
||||
{
|
||||
if ((*pus_value > MAX_SIGNAL_DELAY) || (*pus_value < 0))
|
||||
{
|
||||
return IA_DRC_DEC_CONFIG_NON_FATAL_INVALID_GAIN_DELAY;
|
||||
}
|
||||
|
||||
p_obj_drc->str_config.gain_delay_samples = *pus_value;
|
||||
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
/*Sujith: introduce error*/
|
||||
case IA_DRC_DEC_CONFIG_PARAM_AUDIO_DELAY:
|
||||
{
|
||||
break;
|
||||
}
|
||||
case IA_DRC_DEC_CONFIG_PARAM_CON_DELAY_MODE:
|
||||
{
|
||||
if(*pus_value<0 || *pus_value>1)
|
||||
{
|
||||
return IA_DRC_DEC_CONFIG_PARAM_CON_DELAY_MODE;
|
||||
}
|
||||
p_obj_drc->str_config.constant_delay_on = *pus_value;
|
||||
|
||||
break;
|
||||
}
|
||||
case IA_DRC_DEC_CONFIG_PARAM_ABSO_DELAY_OFF:
|
||||
{
|
||||
p_obj_drc->str_config.absorb_delay_on = *pus_value;
|
||||
|
||||
break;
|
||||
}
|
||||
case IA_DRC_DEC_CONFIG_PARAM_FRAME_SIZE:
|
||||
{
|
||||
if(*pus_value<1 || *pus_value>4096)
|
||||
{
|
||||
return IA_DRC_DEC_CONFIG_NON_FATAL_INVALID_FRAME_SIZE;
|
||||
}
|
||||
|
||||
p_obj_drc->str_config.frame_size = *pus_value;
|
||||
|
||||
|
||||
break;
|
||||
}
|
||||
case IA_DRC_DEC_CONFIG_GAIN_STREAM_FLAG:
|
||||
{
|
||||
p_obj_drc->str_bit_handler.gain_stream_flag = *pus_value;
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case IA_API_CMD_GET_MEMTABS_SIZE:
|
||||
{
|
||||
|
||||
break;
|
||||
}
|
||||
case IA_API_CMD_SET_MEMTABS_PTR:
|
||||
{
|
||||
|
||||
break;
|
||||
}
|
||||
case IA_API_CMD_GET_N_MEMTABS:
|
||||
{
|
||||
|
||||
*pui_value = NUM_DRC_TABLES;
|
||||
break;
|
||||
}
|
||||
case IA_API_CMD_GET_N_TABLES:
|
||||
{
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case IA_API_CMD_EXECUTE:
|
||||
{
|
||||
switch(i_idx)
|
||||
{
|
||||
case IA_CMD_TYPE_DO_EXECUTE:
|
||||
{
|
||||
if(p_obj_drc->str_config.dec_type==DEC_TYPE_TD){
|
||||
error_code=impd_process_time_domain(p_obj_drc);}
|
||||
else if(p_obj_drc->str_config.dec_type==DEC_TYPE_QMF64){
|
||||
error_code=impd_init_process_audio_main_qmf(p_obj_drc);
|
||||
}
|
||||
else if(p_obj_drc->str_config.dec_type==DEC_TYPE_STFT256){
|
||||
error_code=impd_init_process_audio_main_stft(p_obj_drc);
|
||||
}
|
||||
else if(p_obj_drc->str_config.dec_type==DEC_TYPE_TD_QMF64){
|
||||
error_code=impd_init_process_audio_main_td_qmf(p_obj_drc);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case IA_CMD_TYPE_DONE_QUERY:
|
||||
{
|
||||
|
||||
*pui_value = p_obj_drc->p_state->ui_exe_done;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
};
|
||||
break;
|
||||
}
|
||||
case IA_API_CMD_PUT_INPUT_QUERY:
|
||||
{
|
||||
*pui_value = 1;
|
||||
break;
|
||||
}
|
||||
case IA_API_CMD_GET_CURIDX_INPUT_BUF:
|
||||
{
|
||||
UWORD32 ui_in_buf_size =
|
||||
p_obj_drc->p_mem_info[IA_DRC_INPUT_IDX].ui_size;
|
||||
UWORD32 ui_in_bytes = p_obj_drc->p_state->ui_in_bytes;
|
||||
*pui_value = ui_in_buf_size > ui_in_bytes ? ui_in_bytes : ui_in_buf_size;
|
||||
break;
|
||||
}
|
||||
case IA_API_CMD_SET_INPUT_BYTES:
|
||||
{
|
||||
p_obj_drc->p_state->ui_in_bytes = *pui_value;
|
||||
break;
|
||||
}
|
||||
|
||||
case IA_API_CMD_GET_OUTPUT_BYTES:
|
||||
{
|
||||
*pui_value = p_obj_drc->p_state->ui_out_bytes;
|
||||
break;
|
||||
}
|
||||
case IA_API_CMD_INPUT_OVER:
|
||||
{
|
||||
p_obj_drc->p_state->ui_exe_done=1;
|
||||
break;
|
||||
}
|
||||
case IA_API_CMD_INPUT_OVER_BS:
|
||||
{
|
||||
p_obj_drc->str_bit_handler.cpy_over = 1;
|
||||
break;
|
||||
}
|
||||
case IA_API_CMD_INPUT_OVER_IC_BS:
|
||||
{
|
||||
p_obj_drc->str_bit_handler.cpy_over_ic = 1;
|
||||
break;
|
||||
}
|
||||
case IA_API_CMD_INPUT_OVER_IL_BS:
|
||||
{
|
||||
p_obj_drc->str_bit_handler.cpy_over_il = 1;
|
||||
break;
|
||||
}
|
||||
case IA_API_CMD_INPUT_OVER_IN_BS:
|
||||
{
|
||||
p_obj_drc->str_bit_handler.cpy_over_in = 1;
|
||||
break;
|
||||
}
|
||||
case IA_API_CMD_SET_INPUT_BYTES_BS:
|
||||
{
|
||||
p_obj_drc->str_bit_handler.num_byts_cur=*pus_value;
|
||||
break;
|
||||
}
|
||||
case IA_API_CMD_SET_INPUT_BYTES_IC_BS:
|
||||
{
|
||||
p_obj_drc->str_bit_handler.num_byts_cur_ic=*pus_value;
|
||||
break;
|
||||
}
|
||||
case IA_API_CMD_SET_INPUT_BYTES_IL_BS:
|
||||
{
|
||||
p_obj_drc->str_bit_handler.num_byts_cur_il=*pus_value;
|
||||
break;
|
||||
}
|
||||
case IA_API_CMD_SET_INPUT_BYTES_IN_BS:
|
||||
{
|
||||
p_obj_drc->str_bit_handler.num_byts_cur_in=*pus_value;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
};
|
||||
return error_code;
|
||||
}
|
||||
|
||||
|
||||
IA_ERRORCODE impd_drc_mem_api(ia_drc_api_struct *p_obj_drc,
|
||||
WORD32 i_cmd, WORD32 i_idx, pVOID pv_value)
|
||||
{
|
||||
pUWORD32 pui_value = pv_value;
|
||||
|
||||
switch(i_cmd)
|
||||
{
|
||||
case IA_API_CMD_GET_MEM_INFO_SIZE:
|
||||
{
|
||||
*pui_value = p_obj_drc->p_mem_info[i_idx].ui_size;
|
||||
break;
|
||||
}
|
||||
case IA_API_CMD_GET_MEM_INFO_ALIGNMENT:
|
||||
{
|
||||
*pui_value = p_obj_drc->p_mem_info[i_idx].ui_alignment;
|
||||
break;
|
||||
}
|
||||
case IA_API_CMD_GET_MEM_INFO_TYPE:
|
||||
{
|
||||
*pui_value = p_obj_drc->p_mem_info[i_idx].ui_type;
|
||||
break;
|
||||
}
|
||||
case IA_API_CMD_GET_MEM_INFO_PLACEMENT:
|
||||
{
|
||||
*pui_value = p_obj_drc->p_mem_info[i_idx].ui_placement[0];
|
||||
*(pui_value + 1) = p_obj_drc->p_mem_info[i_idx].ui_placement[1];
|
||||
break;
|
||||
}
|
||||
case IA_API_CMD_GET_MEM_INFO_PRIORITY:
|
||||
{
|
||||
*pui_value = p_obj_drc->p_mem_info[i_idx].ui_priority;
|
||||
break;
|
||||
}
|
||||
case IA_API_CMD_SET_MEM_PTR:
|
||||
{
|
||||
|
||||
pWORD8 pbtemp;
|
||||
UWORD32 sz;
|
||||
if(pv_value == 0)
|
||||
{
|
||||
return(-1);
|
||||
}
|
||||
if(((SIZE_T)pv_value % p_obj_drc->p_mem_info[i_idx].ui_alignment) != 0)
|
||||
{
|
||||
return(-1);
|
||||
}
|
||||
p_obj_drc->pp_mem[i_idx] = pv_value;
|
||||
pbtemp = p_obj_drc->pp_mem[i_idx];
|
||||
sz = p_obj_drc->p_mem_info[i_idx].ui_size;
|
||||
|
||||
memset(pbtemp,0,sz);
|
||||
}
|
||||
case IA_API_CMD_SET_MEM_PLACEMENT:
|
||||
{
|
||||
|
||||
}
|
||||
};
|
||||
return IA_NO_ERROR;
|
||||
}
|
||||
|
||||
IA_ERRORCODE impd_drc_fill_mem_tables(ia_drc_api_struct *p_obj_drc)
|
||||
{
|
||||
ia_mem_info_struct *p_mem_info;
|
||||
{
|
||||
p_mem_info = &p_obj_drc->p_mem_info[IA_DRC_PERSIST_IDX];
|
||||
p_mem_info->ui_size = 64*1024*1024;
|
||||
p_mem_info->ui_alignment = 8;
|
||||
p_mem_info->ui_type = IA_MEMTYPE_PERSIST;
|
||||
p_mem_info->ui_placement[0] = 0;
|
||||
p_mem_info->ui_placement[1] = 0;
|
||||
p_mem_info->ui_priority = IA_MEMPRIORITY_ANYWHERE;
|
||||
p_mem_info->ui_placed[0] = 0;
|
||||
p_mem_info->ui_placed[1] = 0;
|
||||
}
|
||||
{
|
||||
p_mem_info = &p_obj_drc->p_mem_info[IA_DRC_INPUT_IDX];
|
||||
p_mem_info->ui_size = p_obj_drc->str_config.frame_size*(p_obj_drc->str_config.pcm_size>>3)*p_obj_drc->str_config.num_ch_in;
|
||||
p_mem_info->ui_alignment = 4;
|
||||
p_mem_info->ui_type = IA_MEMTYPE_INPUT;
|
||||
p_mem_info->ui_placement[0] = 0;
|
||||
p_mem_info->ui_placement[1] = 0;
|
||||
p_mem_info->ui_priority = IA_MEMPRIORITY_ANYWHERE;
|
||||
p_mem_info->ui_placed[0] = 0;
|
||||
p_mem_info->ui_placed[1] = 0;
|
||||
}
|
||||
{
|
||||
p_mem_info = &p_obj_drc->p_mem_info[IA_DRC_OUTPUT_IDX];
|
||||
p_mem_info->ui_size = p_obj_drc->str_config.frame_size*(p_obj_drc->str_config.pcm_size>>3)*p_obj_drc->str_config.num_ch_in;
|
||||
p_mem_info->ui_alignment = 4;
|
||||
p_mem_info->ui_type = IA_MEMTYPE_OUTPUT;
|
||||
p_mem_info->ui_placement[0] = 0;
|
||||
p_mem_info->ui_placement[1] = 0;
|
||||
p_mem_info->ui_priority = IA_MEMPRIORITY_ANYWHERE;
|
||||
p_mem_info->ui_placed[0] = 0;
|
||||
p_mem_info->ui_placed[1] = 0;
|
||||
}
|
||||
{
|
||||
p_mem_info = &p_obj_drc->p_mem_info[IA_DRC_SCRATCH_IDX];
|
||||
p_mem_info->ui_size = SCRATCH_MEM_SIZE;
|
||||
p_mem_info->ui_alignment = 8;
|
||||
p_mem_info->ui_type = IA_MEMTYPE_SCRATCH;
|
||||
p_mem_info->ui_placement[0] = 0;
|
||||
p_mem_info->ui_placement[1] = 0;
|
||||
p_mem_info->ui_priority = IA_MEMPRIORITY_ANYWHERE;
|
||||
p_mem_info->ui_placed[0] = 0;
|
||||
p_mem_info->ui_placed[1] = 0;
|
||||
}
|
||||
return IA_NO_ERROR;
|
||||
}
|
||||
51
decoder/drc_src/impd_drc_api_defs.h
Normal file
51
decoder/drc_src/impd_drc_api_defs.h
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
/******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2018 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
|
||||
|
||||
#ifndef IMPD_API_DEFS_H
|
||||
#define IMPD_API_DEFS_H
|
||||
|
||||
/*****************************************************************************/
|
||||
/* Constant hash defines */
|
||||
/*****************************************************************************/
|
||||
/* A constant to let API copy small strings to buffers outside */
|
||||
#define IA_API_STR_LEN 30
|
||||
#define IA_APIVERSION_MAJOR 1
|
||||
#define IA_APIVERSION_MINOR 10
|
||||
|
||||
/* last compatible version */
|
||||
/* sometimes a new API version is just for a bugfix, or a added feature in */
|
||||
/* this case it is better to use a newer version even though a library was */
|
||||
/* made for an older version, library API can then be upgraded to newer API */
|
||||
/* version after checking for compatibility or by adding features */
|
||||
#define IA_LASTCOMP_APIVERSION_MAJOR 1
|
||||
#define IA_LASTCOMP_APIVERSION_MINOR 10
|
||||
|
||||
#define IA_STR(str) #str
|
||||
#define IA_MAKE_VERSION_STR(maj, min) IA_STR(maj) "." IA_STR(min)
|
||||
#define IA_APIVERSION IA_MAKE_VERSION_STR(\
|
||||
IA_APIVERSION_MAJOR, \
|
||||
IA_APIVERSION_MINOR)
|
||||
|
||||
#define IA_LAST_COMP_APIVERSION IA_MAKE_VERSION_STR(\
|
||||
IA_LASTCOMP_APIVERSION_MAJOR, \
|
||||
IA_LASTCOMP_APIVERSION_MINOR)
|
||||
|
||||
#endif
|
||||
130
decoder/drc_src/impd_drc_api_struct_def.h
Normal file
130
decoder/drc_src/impd_drc_api_struct_def.h
Normal file
|
|
@ -0,0 +1,130 @@
|
|||
/******************************************************************************
|
||||
*
|
||||
* Copyright (C) 2018 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at:
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
|
||||
*/
|
||||
#ifndef IMPD_DRC_API_STRUCT_DEF_H
|
||||
#define IMPD_DRC_API_STRUCT_DEF_H
|
||||
|
||||
/****************************************************************************/
|
||||
/* structure definitions */
|
||||
/****************************************************************************/
|
||||
/* DRC Configuration */
|
||||
typedef struct ia_drc_config_struct
|
||||
{
|
||||
WORD32 bitstream_file_format;
|
||||
WORD32 dec_type;
|
||||
WORD32 sub_band_domain_mode;
|
||||
WORD32 num_ch_in;
|
||||
WORD32 num_ch_out;
|
||||
WORD32 sampling_rate;
|
||||
WORD32 control_parameter_index;
|
||||
WORD32 delay_mode;
|
||||
WORD32 absorb_delay_on;
|
||||
WORD32 gain_delay_samples;
|
||||
WORD32 subband_domain_io_flag;
|
||||
WORD32 frame_size;
|
||||
WORD32 sub_band_down_sampling_factor;
|
||||
WORD32 sub_band_count;
|
||||
WORD32 peak_limiter;
|
||||
WORD32 interface_bitstream_present;
|
||||
WORD32 pcm_size;
|
||||
WORD32 parametric_drc_delay_gain_dec_instance ;
|
||||
WORD32 parametric_drc_delay;
|
||||
WORD32 parametric_drc_delay_max;
|
||||
WORD32 eq_delay_gain_dec_instance;
|
||||
WORD32 eq_delay;
|
||||
WORD32 eq_delay_max;
|
||||
WORD32 delay_line_samples;
|
||||
WORD32 constant_delay_on;
|
||||
WORD32 audio_delay_samples;
|
||||
|
||||
}ia_drc_config_struct;
|
||||
|
||||
/* DRC bitsteam handler */
|
||||
typedef struct bits_handler
|
||||
{
|
||||
UWORD8* bitstream_drc_config;
|
||||
UWORD8* bitstream_loudness_info ;
|
||||
UWORD8* bitstream_unidrc_interface ;
|
||||
UWORD8* it_bit_buf;
|
||||
WORD32 num_bytes_bs_drc_config;
|
||||
WORD32 num_bytes_bs_loudness_info;
|
||||
WORD32 num_bits_read_bs_unidrc_interface;
|
||||
WORD32 num_bytes_bs_unidrc_interface;
|
||||
WORD32 num_bits_read_bs;
|
||||
WORD32 num_bytes_read_bs;
|
||||
WORD32 num_bytes_bs;
|
||||
WORD32 num_bits_offset_bs;
|
||||
WORD32 byte_index_bs;
|
||||
WORD32 num_byts_cur;
|
||||
WORD32 num_byts_cur_ic;
|
||||
WORD32 num_byts_cur_il;
|
||||
WORD32 num_byts_cur_in;
|
||||
WORD32 cpy_over;
|
||||
WORD32 cpy_over_ic;
|
||||
WORD32 cpy_over_il;
|
||||
WORD32 cpy_over_in;
|
||||
WORD32 gain_stream_flag;
|
||||
}ia_drc_bits_handler_struct;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
ia_drc_bits_dec_struct *pstr_bitstream_dec;
|
||||
ia_drc_gain_dec_struct *pstr_gain_dec[2];
|
||||
ia_drc_sel_pro_struct *pstr_selection_proc;
|
||||
ia_drc_config *pstr_drc_config;
|
||||
ia_drc_loudness_info_set_struct *pstr_loudness_info;
|
||||
ia_drc_gain_struct *pstr_drc_gain;
|
||||
ia_drc_interface_struct *pstr_drc_interface;
|
||||
|
||||
ia_drc_peak_limiter_struct *pstr_peak_limiter;
|
||||
ia_drc_qmf_filt_struct *pstr_qmf_filter;
|
||||
ia_drc_sel_proc_params_struct *pstr_drc_sel_proc_params;
|
||||
ia_drc_sel_proc_output_struct *pstr_drc_sel_proc_output;
|
||||
|
||||
}ia_drc_payload_struct;
|
||||
|
||||
typedef struct ia_drc_state_struct
|
||||
{
|
||||
UWORD32 ui_out_bytes;
|
||||
UWORD32 ui_in_bytes;
|
||||
UWORD32 ui_ir_bytes;
|
||||
UWORD32 total_num_out_samples;
|
||||
UWORD32 frame_no;
|
||||
UWORD32 out_size;
|
||||
UWORD32 ui_init_done;
|
||||
UWORD32 ui_exe_done;
|
||||
UWORD32 ui_ir_used;
|
||||
WORD32 delay_in_output;
|
||||
WORD32 delay_adjust_samples;
|
||||
pVOID persistant_ptr;
|
||||
}ia_drc_state_struct;
|
||||
|
||||
typedef struct IA_PSM_API_Struct
|
||||
{
|
||||
ia_drc_state_struct *p_state;
|
||||
ia_drc_config_struct str_config;
|
||||
ia_drc_payload_struct str_payload;
|
||||
ia_drc_bits_handler_struct str_bit_handler;
|
||||
ia_mem_info_struct *p_mem_info;
|
||||
pVOID *pp_mem;
|
||||
struct ia_bit_buf_struct str_bit_buf, *pstr_bit_buf;
|
||||
|
||||
} ia_drc_api_struct;
|
||||
|
||||
#endif
|
||||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue