DO NOT MERGE - Merge pie-platform-release (PPRL.190505.001) into master.

Bug: 132622481
Change-Id: I1c7eddee82c6753f702c230196f2c69a250524fd
This commit is contained in:
Xin Li 2019-05-15 16:55:32 -07:00
commit eade97648d
6 changed files with 408 additions and 409 deletions

View file

@ -28,8 +28,6 @@ ixheaacd_covariance_matrix_calc_2_armv7:
AUTO_CORR_LOOP: AUTO_CORR_LOOP:
STR r0 , [sp, #-4]! STR r0 , [sp, #-4]!
STR r1 , [sp, #-4]!
LDR r1 , [sp], #4

View file

@ -4,19 +4,18 @@
ixheaacd_complex_fft_p2_asm: ixheaacd_complex_fft_p2_asm:
STMFD sp!, {r0-r12, lr} STMFD sp!, {r0-r12, lr}
SUB sp, sp, #0x28 SUB sp, sp, #0x44
LDR r0, [sp, #0x2c] LDR r0, [sp, #0x48]
@LDR r12,[sp,#0x5c+4]
EOR r0, r0, r0, ASR #31 EOR r0, r0, r0, ASR #31
CLZ r0, r0 CLZ r0, r0
SUB r12, r0, #16 @dig_rev_shift = norm32(npoints) + 1 -16@ SUB r12, r0, #16 @dig_rev_shift = norm32(npoints) + 1 -16@
SUB r0, r0, #1 SUB r0, r0, #1
RSB r0, r0, #0x1e RSB r0, r0, #0x1e
AND r1, r0, #1 AND r1, r0, #1
STR r1, [sp, #0x14] STR r1, [sp, #0x30]
MOV r1, r0, ASR #1 MOV r1, r0, ASR #1
LDR r0, [sp, #0x2c] @npoints LDR r0, [sp, #0x48] @npoints
STR r1, [sp, #-4]! STR r1, [sp, #0x18]
MOV lr, r0, LSL #1 @(npoints >>1) * 4 MOV lr, r0, LSL #1 @(npoints >>1) * 4
MOV r0, #0 MOV r0, #0
@ -33,7 +32,7 @@ FIRST_STAGE_R4:
BIC r7, r4, #0x00FF0000 BIC r7, r4, #0x00FF0000
MOV r7, r7, LSR #8 MOV r7, r7, LSR #8
ORR r4, r7, r6, LSL #8 ORR r4, r7, r6, LSL #8
LDR r5, [sp, #0x18] LDR r5, [sp, #0x30]
MOV r10, r4, LSR r12 MOV r10, r4, LSR r12
CMP r5, #0 CMP r5, #0
ADDNE r10, r10, #1 ADDNE r10, r10, #1
@ -70,24 +69,24 @@ FIRST_STAGE_R4:
STMIA r3!, {r4-r11} STMIA r3!, {r4-r11}
BLT FIRST_STAGE_R4 BLT FIRST_STAGE_R4
LDR r1, [sp], #4 LDR r1, [sp, #0x18]
LDR r0, [sp, #0x2c] LDR r0, [sp, #0x48]
MOV r12, #0x40 @nodespacing = 64@ MOV r12, #0x40 @nodespacing = 64@
STR r12, [sp, #0x1c] STR r12, [sp, #0x38]
LDR r12, [sp, #0x2c] LDR r12, [sp, #0x48]
SUB r3, r3, r0, LSL #3 SUB r3, r3, r0, LSL #3
SUBS r1, r1, #1 SUBS r1, r1, #1
STR r3, [sp, #0x34] STR r3, [sp, #0x50]
MOV r4, r12, ASR #4 MOV r4, r12, ASR #4
MOV r0, #4 MOV r0, #4
STR r4, [sp, #0x18] STR r4, [sp, #0x34]
STR r1, [sp, #0x20] STR r1, [sp, #0x3c]
BLE RADIX2 BLE RADIX2
OUTER_LOOP: OUTER_LOOP:
LDR r1, [sp, #0x28] LDR r1, [sp, #0x44]
LDR r12, [sp, #0x34] @WORD32 *data = ptr_y@ LDR r12, [sp, #0x50] @WORD32 *data = ptr_y@
STR r1, [sp, #0x10] STR r1, [sp, #0x2c]
LDR r1, [sp, #0x18] LDR r1, [sp, #0x34]
MOV r0, r0, LSL #3 @(del<<1) * 4 MOV r0, r0, LSL #3 @(del<<1) * 4
LOOP_TRIVIAL_TWIDDLE: LOOP_TRIVIAL_TWIDDLE:
@ -141,11 +140,11 @@ LOOP_TRIVIAL_TWIDDLE:
BNE LOOP_TRIVIAL_TWIDDLE BNE LOOP_TRIVIAL_TWIDDLE
MOV r0, r0, ASR #3 MOV r0, r0, ASR #3
LDR r4, [sp, #0x1c] LDR r4, [sp, #0x38]
LDR r3, [sp, #0x34] LDR r3, [sp, #0x50]
MUL r1, r0, r4 MUL r1, r0, r4
ADD r12, r3, #8 ADD r12, r3, #8
STR r1, [sp, #0x24] STR r1, [sp, #0x40]
MOV r3, r1, ASR #2 MOV r3, r1, ASR #2
ADD r3, r3, r1, ASR #3 ADD r3, r3, r1, ASR #3
SUB r3, r3, r1, ASR #4 SUB r3, r3, r1, ASR #4
@ -153,25 +152,25 @@ LOOP_TRIVIAL_TWIDDLE:
SUB r3, r3, r1, ASR #6 SUB r3, r3, r1, ASR #6
ADD r3, r3, r1, ASR #7 ADD r3, r3, r1, ASR #7
SUB r3, r3, r1, ASR #8 SUB r3, r3, r1, ASR #8
STR r3, [sp, #-4]! STR r3, [sp, #0x18]
SECOND_LOOP: SECOND_LOOP:
LDR r3, [sp, #0x10+4] LDR r3, [sp, #0x2c]
LDR r14, [sp, #0x18+4] LDR r14, [sp, #0x34]
MOV r0, r0, LSL #3 @(del<<1) * 4 MOV r0, r0, LSL #3 @(del<<1) * 4
LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@ LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@
LDR r2, [r3, #4] @w1l = *(twiddles + 2*j + 1)@ LDR r2, [r3, #0x04] @w1l = *(twiddles + 2*j + 1)@
LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@ LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@
LDR r6, [r3, #4] @w2l = *(twiddles + 2*(j<<1) + 1)@ LDR r6, [r3, #0x04] @w2l = *(twiddles + 2*(j<<1) + 1)@
LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@ LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@
LDR r8, [r3, #4] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@ LDR r8, [r3, #0x04] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@
STR r4, [sp, #8+4] STR r4, [sp, #0x24]
STR r1, [sp, #-4] STR r1, [sp, #0x14]
STR r2, [sp, #-8] STR r2, [sp, #0x10]
STR r5, [sp, #-12] STR r5, [sp, #0x0c]
STR r6, [sp, #-16] STR r6, [sp, #0x08]
STR r7, [sp, #-20] STR r7, [sp, #0x04]
STR r8, [sp, #-24] STR r8, [sp]
RADIX4_BFLY: RADIX4_BFLY:
@ -180,8 +179,8 @@ RADIX4_BFLY:
LDRD r10, [r12, r0] @r10=x3r, r11=x3i LDRD r10, [r12, r0] @r10=x3r, r11=x3i
SUBS r14, r14, #1 SUBS r14, r14, #1
LDR r1, [sp, #-4] LDR r1, [sp, #0x14]
LDR r2, [sp, #-8] LDR r2, [sp, #0x10]
SMULL r3, r4, r6, r2 @ixheaacd_mult32(x1r,w1l) SMULL r3, r4, r6, r2 @ixheaacd_mult32(x1r,w1l)
LSR r3, r3, #31 LSR r3, r3, #31
@ -198,8 +197,8 @@ RADIX4_BFLY:
ADD r7, r7, r6 ADD r7, r7, r6
SUB r6, r4, r5 @ SUB r6, r4, r5 @
LDR r1, [sp, #-12] LDR r1, [sp, #0x0c]
LDR r2, [sp, #-16] LDR r2, [sp, #0x08]
SMULL r3, r4, r8, r2 @ixheaacd_mult32(x2r,w2l) SMULL r3, r4, r8, r2 @ixheaacd_mult32(x2r,w2l)
LSR r3, r3, #31 LSR r3, r3, #31
@ -216,8 +215,8 @@ RADIX4_BFLY:
ADD r9, r9, r8 ADD r9, r9, r8
SUB r8, r4, r5 @ SUB r8, r4, r5 @
LDR r1, [sp, #-20] LDR r1, [sp, #0x04]
LDR r2, [sp, #-24] LDR r2, [sp]
SMULL r3, r4, r10, r2 @ixheaacd_mult32(x3r,w3l) SMULL r3, r4, r10, r2 @ixheaacd_mult32(x3r,w3l)
LSR r3, r3, #31 LSR r3, r3, #31
@ -237,7 +236,7 @@ RADIX4_BFLY:
@SUB r12,r12,r0,lsl #1 @SUB r12,r12,r0,lsl #1
@LDRD r4,[r12] @r4=x0r, r5=x0i @LDRD r4,[r12] @r4=x0r, r5=x0i
LDR r4, [r12, -r0, lsl #1]! @ LDR r4, [r12, -r0, lsl #1]! @
LDR r5, [r12, #4] LDR r5, [r12, #0x04]
ADD r4, r8, r4 @x0r = x0r + x2r@ ADD r4, r8, r4 @x0r = x0r + x2r@
@ -271,45 +270,45 @@ RADIX4_BFLY:
BNE RADIX4_BFLY BNE RADIX4_BFLY
MOV r0, r0, ASR #3 MOV r0, r0, ASR #3
LDR r1, [sp, #0x2c+4] LDR r1, [sp, #0x48]
LDR r4, [sp, #8+4] LDR r4, [sp, #0x24]
SUB r1, r12, r1, LSL #3 SUB r1, r12, r1, LSL #3
LDR r6, [sp, #0x1c+4] LDR r6, [sp, #0x38]
ADD r12, r1, #8 ADD r12, r1, #8
LDR r7, [sp, #0] LDR r7, [sp, #0x18]
ADD r4, r4, r6 ADD r4, r4, r6
CMP r4, r7 CMP r4, r7
BLE SECOND_LOOP BLE SECOND_LOOP
SECOND_LOOP_2: SECOND_LOOP_2:
LDR r3, [sp, #0x10+4] LDR r3, [sp, #0x2c]
LDR r14, [sp, #0x18+4] LDR r14, [sp, #0x34]
MOV r0, r0, LSL #3 @(del<<1) * 4 MOV r0, r0, LSL #3 @(del<<1) * 4
LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@ LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@
LDR r2, [r3, #4] @w1l = *(twiddles + 2*j + 1)@ LDR r2, [r3, #0x04] @w1l = *(twiddles + 2*j + 1)@
LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@ LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@
LDR r6, [r3, #4] @w2l = *(twiddles + 2*(j<<1) + 1)@ LDR r6, [r3, #0x04] @w2l = *(twiddles + 2*(j<<1) + 1)@
SUB r3, r3, #2048 @ 512 *4 SUB r3, r3, #2048 @ 512 *4
LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@ LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@
LDR r8, [r3, #4] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@ LDR r8, [r3, #0x04] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@
STR r4, [sp, #8+4] STR r4, [sp, #0x24]
STR r1, [sp, #-4] STR r1, [sp, #0x14]
STR r2, [sp, #-8] STR r2, [sp, #0x10]
STR r5, [sp, #-12] STR r5, [sp, #0x0c]
STR r6, [sp, #-16] STR r6, [sp, #0x08]
STR r7, [sp, #-20] STR r7, [sp, #0x04]
STR r8, [sp, #-24] STR r8, [sp]
RADIX4_BFLY_2: RADIX4_BFLY_2:
LDRD r6, [r12, r0]! @r6=x1r, r7=x1i LDRD r6, [r12, r0]! @r6=x1r, r7=x1i
LDRD r8, [r12, r0]! @r8=x2r, r9=x2i LDRD r8, [r12, r0]! @r8=x2r, r9=x2i
LDRD r10, [r12, r0] @r10=x3r, r11=x3i LDRD r10, [r12, r0] @r10=x3r, r11=x3i
SUBS r14, r14, #1 SUBS r14, r14, #1
LDR r1, [sp, #-4] LDR r1, [sp, #0x14]
LDR r2, [sp, #-8] LDR r2, [sp, #0x10]
SMULL r3, r4, r6, r2 @ixheaacd_mult32(x1r,w1l) SMULL r3, r4, r6, r2 @ixheaacd_mult32(x1r,w1l)
LSR r3, r3, #31 LSR r3, r3, #31
@ -326,8 +325,8 @@ RADIX4_BFLY_2:
ADD r7, r7, r6 ADD r7, r7, r6
SUB r6, r4, r5 @ SUB r6, r4, r5 @
LDR r1, [sp, #-12] LDR r1, [sp, #0x0c]
LDR r2, [sp, #-16] LDR r2, [sp, #0x08]
SMULL r3, r4, r8, r2 @ixheaacd_mult32(x2r,w2l) SMULL r3, r4, r8, r2 @ixheaacd_mult32(x2r,w2l)
LSR r3, r3, #31 LSR r3, r3, #31
@ -344,8 +343,8 @@ RADIX4_BFLY_2:
ADD r9, r9, r8 ADD r9, r9, r8
SUB r8, r4, r5 @ SUB r8, r4, r5 @
LDR r1, [sp, #-20] LDR r1, [sp, #0x04]
LDR r2, [sp, #-24] LDR r2, [sp]
SMULL r3, r4, r10, r2 @ixheaacd_mult32(x3r,w3l) SMULL r3, r4, r10, r2 @ixheaacd_mult32(x3r,w3l)
LSR r3, r3, #31 LSR r3, r3, #31
@ -365,7 +364,7 @@ RADIX4_BFLY_2:
@SUB r12,r12,r0,lsl #1 @SUB r12,r12,r0,lsl #1
@LDRD r4,[r12] @r4=x0r, r5=x0i @LDRD r4,[r12] @r4=x0r, r5=x0i
LDR r4, [r12, -r0, lsl #1]! @ LDR r4, [r12, -r0, lsl #1]! @
LDR r5, [r12, #4] LDR r5, [r12, #0x04]
ADD r4, r8, r4 @x0r = x0r + x2r@ ADD r4, r8, r4 @x0r = x0r + x2r@
@ -399,39 +398,39 @@ RADIX4_BFLY_2:
BNE RADIX4_BFLY_2 BNE RADIX4_BFLY_2
MOV r0, r0, ASR #3 MOV r0, r0, ASR #3
LDR r1, [sp, #0x2c+4] LDR r1, [sp, #0x48]
LDR r4, [sp, #8+4] LDR r4, [sp, #0x24]
SUB r1, r12, r1, LSL #3 SUB r1, r12, r1, LSL #3
LDR r6, [sp, #0x1c+4] LDR r6, [sp, #0x38]
ADD r12, r1, #8 ADD r12, r1, #8
LDR r7, [sp, #0x24+4] LDR r7, [sp, #0x40]
ADD r4, r4, r6 ADD r4, r4, r6
CMP r4, r7, ASR #1 CMP r4, r7, ASR #1
BLE SECOND_LOOP_2 BLE SECOND_LOOP_2
LDR r7, [sp, #0] LDR r7, [sp, #0x18]
CMP r4, r7, LSL #1 CMP r4, r7, LSL #1
BGT SECOND_LOOP_4 BGT SECOND_LOOP_4
SECOND_LOOP_3: SECOND_LOOP_3:
LDR r3, [sp, #0x10+4] LDR r3, [sp, #0x2c]
LDR r14, [sp, #0x18+4] LDR r14, [sp, #0x34]
MOV r0, r0, LSL #3 @(del<<1) * 4 MOV r0, r0, LSL #3 @(del<<1) * 4
LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@ LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@
LDR r2, [r3, #4] @w1l = *(twiddles + 2*j + 1)@ LDR r2, [r3, #0x04] @w1l = *(twiddles + 2*j + 1)@
SUB r3, r3, #2048 @ 512 *4 SUB r3, r3, #2048 @ 512 *4
LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@ LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@
LDR r6, [r3, #4] @w2l = *(twiddles + 2*(j<<1) + 1)@ LDR r6, [r3, #0x04] @w2l = *(twiddles + 2*(j<<1) + 1)@
LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@ LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@
LDR r8, [r3, #4] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@ LDR r8, [r3, #0x04] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@
STR r4, [sp, #8+4] STR r4, [sp, #0x24]
STR r1, [sp, #-4] STR r1, [sp, #0x14]
STR r2, [sp, #-8] STR r2, [sp, #0x10]
STR r5, [sp, #-12] STR r5, [sp, #0x0c]
STR r6, [sp, #-16] STR r6, [sp, #0x08]
STR r7, [sp, #-20] STR r7, [sp, #0x04]
STR r8, [sp, #-24] STR r8, [sp]
RADIX4_BFLY_3: RADIX4_BFLY_3:
@ -440,8 +439,8 @@ RADIX4_BFLY_3:
LDRD r10, [r12, r0] @r10=x3r, r11=x3i LDRD r10, [r12, r0] @r10=x3r, r11=x3i
SUBS r14, r14, #1 SUBS r14, r14, #1
LDR r1, [sp, #-4] LDR r1, [sp, #0x14]
LDR r2, [sp, #-8] LDR r2, [sp, #0x10]
SMULL r3, r4, r6, r2 @ixheaacd_mult32(x1r,w1l) SMULL r3, r4, r6, r2 @ixheaacd_mult32(x1r,w1l)
LSR r3, r3, #31 LSR r3, r3, #31
@ -458,8 +457,8 @@ RADIX4_BFLY_3:
ADD r7, r7, r6 ADD r7, r7, r6
SUB r6, r4, r5 @ SUB r6, r4, r5 @
LDR r1, [sp, #-12] LDR r1, [sp, #0x0c]
LDR r2, [sp, #-16] LDR r2, [sp, #0x08]
SMULL r3, r4, r8, r2 @ixheaacd_mult32(x2r,w2l) SMULL r3, r4, r8, r2 @ixheaacd_mult32(x2r,w2l)
LSR r3, r3, #31 LSR r3, r3, #31
@ -476,8 +475,8 @@ RADIX4_BFLY_3:
ADD r8, r9, r8 ADD r8, r9, r8
SUB r9, r5, r4 @ SUB r9, r5, r4 @
LDR r1, [sp, #-20] LDR r1, [sp, #0x04]
LDR r2, [sp, #-24] LDR r2, [sp]
SMULL r3, r4, r10, r2 @ixheaacd_mult32(x3r,w3l) SMULL r3, r4, r10, r2 @ixheaacd_mult32(x3r,w3l)
LSR r3, r3, #31 LSR r3, r3, #31
@ -497,7 +496,7 @@ RADIX4_BFLY_3:
@SUB r12,r12,r0,lsl #1 @SUB r12,r12,r0,lsl #1
@LDRD r4,[r12] @r4=x0r, r5=x0i @LDRD r4,[r12] @r4=x0r, r5=x0i
LDR r4, [r12, -r0, lsl #1]! @ LDR r4, [r12, -r0, lsl #1]! @
LDR r5, [r12, #4] LDR r5, [r12, #0x04]
ADD r4, r8, r4 @x0r = x0r + x2r@ ADD r4, r8, r4 @x0r = x0r + x2r@
@ -531,38 +530,38 @@ RADIX4_BFLY_3:
BNE RADIX4_BFLY_3 BNE RADIX4_BFLY_3
MOV r0, r0, ASR #3 MOV r0, r0, ASR #3
LDR r1, [sp, #0x2c+4] LDR r1, [sp, #0x48]
LDR r4, [sp, #8+4] LDR r4, [sp, #0x24]
SUB r1, r12, r1, LSL #3 SUB r1, r12, r1, LSL #3
LDR r6, [sp, #0x1c+4] LDR r6, [sp, #0x38]
ADD r12, r1, #8 ADD r12, r1, #8
LDR r7, [sp, #0] LDR r7, [sp, #0x18]
ADD r4, r4, r6 ADD r4, r4, r6
CMP r4, r7, LSL #1 CMP r4, r7, LSL #1
BLE SECOND_LOOP_3 BLE SECOND_LOOP_3
SECOND_LOOP_4: SECOND_LOOP_4:
LDR r3, [sp, #0x10+4] LDR r3, [sp, #0x2c]
LDR r14, [sp, #0x18+4] LDR r14, [sp, #0x34]
MOV r0, r0, LSL #3 @(del<<1) * 4 MOV r0, r0, LSL #3 @(del<<1) * 4
LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@ LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@
LDR r2, [r3, #4] @w1l = *(twiddles + 2*j + 1)@ LDR r2, [r3, #0x04] @w1l = *(twiddles + 2*j + 1)@
SUB r3, r3, #2048 @ 512 *4 SUB r3, r3, #2048 @ 512 *4
LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@ LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@
LDR r6, [r3, #4] @w2l = *(twiddles + 2*(j<<1) + 1)@ LDR r6, [r3, #0x04] @w2l = *(twiddles + 2*(j<<1) + 1)@
SUB r3, r3, #2048 @ 512 *4 SUB r3, r3, #2048 @ 512 *4
LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@ LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@
LDR r8, [r3, #4] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@ LDR r8, [r3, #0x04] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@
STR r4, [sp, #8+4] STR r4, [sp, #0x24]
STR r1, [sp, #-4] STR r1, [sp, #0x14]
STR r2, [sp, #-8] STR r2, [sp, #0x10]
STR r5, [sp, #-12] STR r5, [sp, #0x0c]
STR r6, [sp, #-16] STR r6, [sp, #0x08]
STR r7, [sp, #-20] STR r7, [sp, #0x04]
STR r8, [sp, #-24] STR r8, [sp]
RADIX4_BFLY_4: RADIX4_BFLY_4:
LDRD r6, [r12, r0]! @r6=x1r, r7=x1i LDRD r6, [r12, r0]! @r6=x1r, r7=x1i
@ -570,8 +569,8 @@ RADIX4_BFLY_4:
LDRD r10, [r12, r0] @r10=x3r, r11=x3i LDRD r10, [r12, r0] @r10=x3r, r11=x3i
SUBS r14, r14, #1 SUBS r14, r14, #1
LDR r1, [sp, #-4] LDR r1, [sp, #0x14]
LDR r2, [sp, #-8] LDR r2, [sp, #0x10]
SMULL r3, r4, r6, r2 @ixheaacd_mult32(x1r,w1l) SMULL r3, r4, r6, r2 @ixheaacd_mult32(x1r,w1l)
LSR r3, r3, #31 LSR r3, r3, #31
@ -588,8 +587,8 @@ RADIX4_BFLY_4:
ADD r7, r7, r6 ADD r7, r7, r6
SUB r6, r4, r5 @ SUB r6, r4, r5 @
LDR r1, [sp, #-12] LDR r1, [sp, #0x0c]
LDR r2, [sp, #-16] LDR r2, [sp, #0x08]
SMULL r3, r4, r8, r2 @ixheaacd_mult32(x2r,w2l) SMULL r3, r4, r8, r2 @ixheaacd_mult32(x2r,w2l)
LSR r3, r3, #31 LSR r3, r3, #31
@ -606,8 +605,8 @@ RADIX4_BFLY_4:
ADD r8, r9, r8 ADD r8, r9, r8
SUB r9, r5, r4 @ SUB r9, r5, r4 @
LDR r1, [sp, #-20] LDR r1, [sp, #0x04]
LDR r2, [sp, #-24] LDR r2, [sp]
SMULL r3, r4, r10, r2 @ixheaacd_mult32(x3r,w3l) SMULL r3, r4, r10, r2 @ixheaacd_mult32(x3r,w3l)
LSR r3, r3, #31 LSR r3, r3, #31
@ -627,7 +626,7 @@ RADIX4_BFLY_4:
@SUB r12,r12,r0,lsl #1 @SUB r12,r12,r0,lsl #1
@LDRD r4,[r12] @r4=x0r, r5=x0i @LDRD r4,[r12] @r4=x0r, r5=x0i
LDR r4, [r12, -r0, lsl #1]! @ LDR r4, [r12, -r0, lsl #1]! @
LDR r5, [r12, #4] LDR r5, [r12, #0x04]
ADD r4, r8, r4 @x0r = x0r + x2r@ ADD r4, r8, r4 @x0r = x0r + x2r@
@ -661,48 +660,46 @@ RADIX4_BFLY_4:
BNE RADIX4_BFLY_4 BNE RADIX4_BFLY_4
MOV r0, r0, ASR #3 MOV r0, r0, ASR #3
LDR r1, [sp, #0x2c+4] LDR r1, [sp, #0x48]
LDR r4, [sp, #8+4] LDR r4, [sp, #0x24]
SUB r1, r12, r1, LSL #3 SUB r1, r12, r1, LSL #3
LDR r6, [sp, #0x1c+4] LDR r6, [sp, #0x38]
ADD r12, r1, #8 ADD r12, r1, #8
LDR r7, [sp, #0x24+4] LDR r7, [sp, #0x40]
ADD r4, r4, r6 ADD r4, r4, r6
CMP r4, r7 CMP r4, r7
BLT SECOND_LOOP_4 BLT SECOND_LOOP_4
ADD sp, sp, #4
LDR r1, [sp, #0x1c] LDR r1, [sp, #0x38]
MOV r0, r0, LSL #2 MOV r0, r0, LSL #2
MOV r1, r1, ASR #2 MOV r1, r1, ASR #2
STR r1, [sp, #0x1c] STR r1, [sp, #0x38]
LDR r1, [sp, #0x18] LDR r1, [sp, #0x34]
MOV r1, r1, ASR #2 MOV r1, r1, ASR #2
STR r1, [sp, #0x18] STR r1, [sp, #0x34]
LDR r1, [sp, #0x20] LDR r1, [sp, #0x3c]
SUBS r1, r1, #1 SUBS r1, r1, #1
STR r1, [sp, #0x20] STR r1, [sp, #0x3c]
BGT OUTER_LOOP BGT OUTER_LOOP
RADIX2: RADIX2:
LDR r1, [sp, #0x14] LDR r1, [sp, #0x30]
CMP r1, #0 CMP r1, #0
BEQ EXIT BEQ EXIT
LDR r12, [sp, #0x1c] LDR r12, [sp, #0x38]
LDR r1, [sp, #0x28] LDR r1, [sp, #0x44]
CMP r12, #0 CMP r12, #0
LDRNE r12, [sp, #0x1c]
MOVEQ r4, #1 MOVEQ r4, #1
MOVNE r4, r12, LSL #1 MOVNE r4, r12, LSL #1
MOVS r3, r0 MOVS r3, r0
BEQ EXIT BEQ EXIT
MOV r3, r3, ASR #1 MOV r3, r3, ASR #1
LDR r5, [sp, #0x34] LDR r5, [sp, #0x50]
MOV r0, r0, LSL #3 @(del<<1) * 4 MOV r0, r0, LSL #3 @(del<<1) * 4
STR r1, [sp, #-4] STR r1, [sp, #0x18]
RADIX2_BFLY: RADIX2_BFLY:
LDR r1, [sp, #-4] LDR r1, [sp, #0x18]
LDRD r6, [r5] @r6 = x0r LDRD r6, [r5] @r6 = x0r
ADD r5, r5, r0 ADD r5, r5, r0
LDRD r8, [r5] @r8 = x1r LDRD r8, [r5] @r8 = x1r
@ -719,10 +716,10 @@ RADIX2_BFLY:
ORR r10, r1, r10, LSL#1 ORR r10, r1, r10, LSL#1
LDR r1, [sp, #-4] LDR r1, [sp, #0x18]
LDR r2, [r1, #4] LDR r2, [r1, #0x04]
ADD r1, r1, r4, LSL #3 ADD r1, r1, r4, LSL #3
STR r1, [sp, #-4] STR r1, [sp, #0x18]
SMULL r1, r8, r8, r2 @ixheaacd_mult32(x1r,w1l) SMULL r1, r8, r8, r2 @ixheaacd_mult32(x1r,w1l)
LSR r1, r1, #31 LSR r1, r1, #31
@ -750,11 +747,11 @@ RADIX2_BFLY:
BNE RADIX2_BFLY BNE RADIX2_BFLY
LDR r1, [sp, #0x28] LDR r1, [sp, #0x44]
MOV r3, r0, ASR #4 MOV r3, r0, ASR #4
STR r1, [sp, #-4] STR r1, [sp, #0x18]
RADIX2_BFLY_2: RADIX2_BFLY_2:
LDR r1, [sp, #-4] LDR r1, [sp, #0x18]
LDRD r6, [r5] @r6 = x0r LDRD r6, [r5] @r6 = x0r
ADD r5, r5, r0 ADD r5, r5, r0
LDRD r8, [r5] @r8 = x1r LDRD r8, [r5] @r8 = x1r
@ -772,10 +769,10 @@ RADIX2_BFLY_2:
ORR r10, r1, r10, LSL#1 ORR r10, r1, r10, LSL#1
LDR r1, [sp, #-4] LDR r1, [sp, #0x18]
LDR r2, [r1, #4] LDR r2, [r1, #0x04]
ADD r1, r1, r4, LSL #3 ADD r1, r1, r4, LSL #3
STR r1, [sp, #-4] STR r1, [sp, #0x18]
SMULL r1, r8, r8, r2 @ixheaacd_mult32(x1r,w1l) SMULL r1, r8, r8, r2 @ixheaacd_mult32(x1r,w1l)
LSR r1, r1, #31 LSR r1, r1, #31
@ -804,6 +801,6 @@ RADIX2_BFLY_2:
BNE RADIX2_BFLY_2 BNE RADIX2_BFLY_2
EXIT: EXIT:
ADD sp, sp, #0x38 ADD sp, sp, #0x54
LDMFD sp!, {r4-r12, pc} LDMFD sp!, {r4-r12, pc}

View file

@ -4,19 +4,18 @@
ixheaacd_complex_ifft_p2_asm: ixheaacd_complex_ifft_p2_asm:
STMFD sp!, {r0-r12, lr} STMFD sp!, {r0-r12, lr}
SUB sp, sp, #0x28 SUB sp, sp, #0x44
LDR r0, [sp, #0x2c] LDR r0, [sp, #0x48]
@LDR r12,[sp,#0x5c+4]
EOR r0, r0, r0, ASR #31 EOR r0, r0, r0, ASR #31
CLZ r0, r0 CLZ r0, r0
SUB r12, r0, #16 @dig_rev_shift = norm32(npoints) + 1 -16@ SUB r12, r0, #16 @dig_rev_shift = norm32(npoints) + 1 -16@
SUB r0, r0, #1 SUB r0, r0, #1
RSB r0, r0, #0x1e RSB r0, r0, #0x1e
AND r1, r0, #1 AND r1, r0, #1
STR r1, [sp, #0x14] STR r1, [sp, #0x30]
MOV r1, r0, ASR #1 MOV r1, r0, ASR #1
LDR r0, [sp, #0x2c] @npoints LDR r0, [sp, #0x48] @npoints
STR r1, [sp, #-4]! STR r1, [sp, #0x18]
MOV lr, r0, LSL #1 @(npoints >>1) * 4 MOV lr, r0, LSL #1 @(npoints >>1) * 4
MOV r0, #0 MOV r0, #0
@ -33,7 +32,7 @@ FIRST_STAGE_R4:
BIC r7, r4, #0x00FF0000 BIC r7, r4, #0x00FF0000
MOV r7, r7, LSR #8 MOV r7, r7, LSR #8
ORR r4, r7, r6, LSL #8 ORR r4, r7, r6, LSL #8
LDR r5, [sp, #0x18] LDR r5, [sp, #0x30]
MOV r10, r4, LSR r12 MOV r10, r4, LSR r12
CMP r5, #0 CMP r5, #0
ADDNE r10, r10, #1 ADDNE r10, r10, #1
@ -70,24 +69,24 @@ FIRST_STAGE_R4:
STMIA r3!, {r4-r11} STMIA r3!, {r4-r11}
BLT FIRST_STAGE_R4 BLT FIRST_STAGE_R4
LDR r1, [sp], #4 LDR r1, [sp, #0x18]
LDR r0, [sp, #0x2c] LDR r0, [sp, #0x48]
MOV r12, #0x40 @nodespacing = 64@ MOV r12, #0x40 @nodespacing = 64@
STR r12, [sp, #0x1c] STR r12, [sp, #0x38]
LDR r12, [sp, #0x2c] LDR r12, [sp, #0x48]
SUB r3, r3, r0, LSL #3 SUB r3, r3, r0, LSL #3
SUBS r1, r1, #1 SUBS r1, r1, #1
STR r3, [sp, #0x34] STR r3, [sp, #0x50]
MOV r4, r12, ASR #4 MOV r4, r12, ASR #4
MOV r0, #4 MOV r0, #4
STR r4, [sp, #0x18] STR r4, [sp, #0x34]
STR r1, [sp, #0x20] STR r1, [sp, #0x3c]
BLE RADIX2 BLE RADIX2
OUTER_LOOP: OUTER_LOOP:
LDR r1, [sp, #0x28] LDR r1, [sp, #0x44]
LDR r12, [sp, #0x34] @WORD32 *data = ptr_y@ LDR r12, [sp, #0x50] @WORD32 *data = ptr_y@
STR r1, [sp, #0x10] STR r1, [sp, #0x2c]
LDR r1, [sp, #0x18] LDR r1, [sp, #0x34]
MOV r0, r0, LSL #3 @(del<<1) * 4 MOV r0, r0, LSL #3 @(del<<1) * 4
LOOP_TRIVIAL_TWIDDLE: LOOP_TRIVIAL_TWIDDLE:
@ -141,11 +140,11 @@ LOOP_TRIVIAL_TWIDDLE:
BNE LOOP_TRIVIAL_TWIDDLE BNE LOOP_TRIVIAL_TWIDDLE
MOV r0, r0, ASR #3 MOV r0, r0, ASR #3
LDR r4, [sp, #0x1c] LDR r4, [sp, #0x38]
LDR r3, [sp, #0x34] LDR r3, [sp, #0x50]
MUL r1, r0, r4 MUL r1, r0, r4
ADD r12, r3, #8 ADD r12, r3, #8
STR r1, [sp, #0x24] STR r1, [sp, #0x40]
MOV r3, r1, ASR #2 MOV r3, r1, ASR #2
ADD r3, r3, r1, ASR #3 ADD r3, r3, r1, ASR #3
SUB r3, r3, r1, ASR #4 SUB r3, r3, r1, ASR #4
@ -153,25 +152,25 @@ LOOP_TRIVIAL_TWIDDLE:
SUB r3, r3, r1, ASR #6 SUB r3, r3, r1, ASR #6
ADD r3, r3, r1, ASR #7 ADD r3, r3, r1, ASR #7
SUB r3, r3, r1, ASR #8 SUB r3, r3, r1, ASR #8
STR r3, [sp, #-4]! STR r3, [sp, #0x18]
SECOND_LOOP: SECOND_LOOP:
LDR r3, [sp, #0x10+4] LDR r3, [sp, #0x2c]
LDR r14, [sp, #0x18+4] LDR r14, [sp, #0x34]
MOV r0, r0, LSL #3 @(del<<1) * 4 MOV r0, r0, LSL #3 @(del<<1) * 4
LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@ LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@
LDR r2, [r3, #4] @w1l = *(twiddles + 2*j + 1)@ LDR r2, [r3, #0x04] @w1l = *(twiddles + 2*j + 1)@
LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@ LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@
LDR r6, [r3, #4] @w2l = *(twiddles + 2*(j<<1) + 1)@ LDR r6, [r3, #0x04] @w2l = *(twiddles + 2*(j<<1) + 1)@
LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@ LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@
LDR r8, [r3, #4] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@ LDR r8, [r3, #0x04] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@
STR r4, [sp, #8+4] STR r4, [sp, #0x24]
STR r1, [sp, #-4] STR r1, [sp, #0x14]
STR r2, [sp, #-8] STR r2, [sp, #0x10]
STR r5, [sp, #-12] STR r5, [sp, #0x0c]
STR r6, [sp, #-16] STR r6, [sp, #0x08]
STR r7, [sp, #-20] STR r7, [sp, #0x04]
STR r8, [sp, #-24] STR r8, [sp]
RADIX4_BFLY: RADIX4_BFLY:
@ -180,8 +179,8 @@ RADIX4_BFLY:
LDRD r10, [r12, r0] @r10=x3r, r11=x3i LDRD r10, [r12, r0] @r10=x3r, r11=x3i
SUBS r14, r14, #1 SUBS r14, r14, #1
LDR r1, [sp, #-4] LDR r1, [sp, #0x14]
LDR r2, [sp, #-8] LDR r2, [sp, #0x10]
SMULL r3, r4, r6, r2 @ixheaacd_mult32(x1r,w1l) SMULL r3, r4, r6, r2 @ixheaacd_mult32(x1r,w1l)
LSR r3, r3, #31 LSR r3, r3, #31
@ -198,8 +197,8 @@ RADIX4_BFLY:
SUB r7, r7, r6 SUB r7, r7, r6
ADD r6, r4, r5 @ ADD r6, r4, r5 @
LDR r1, [sp, #-12] LDR r1, [sp, #0x0c]
LDR r2, [sp, #-16] LDR r2, [sp, #0x08]
SMULL r3, r4, r8, r2 @ixheaacd_mult32(x2r,w2l) SMULL r3, r4, r8, r2 @ixheaacd_mult32(x2r,w2l)
LSR r3, r3, #31 LSR r3, r3, #31
@ -216,8 +215,8 @@ RADIX4_BFLY:
SUB r9, r9, r8 SUB r9, r9, r8
ADD r8, r4, r5 @ ADD r8, r4, r5 @
LDR r1, [sp, #-20] LDR r1, [sp, #0x04]
LDR r2, [sp, #-24] LDR r2, [sp]
SMULL r3, r4, r10, r2 @ixheaacd_mult32(x3r,w3l) SMULL r3, r4, r10, r2 @ixheaacd_mult32(x3r,w3l)
LSR r3, r3, #31 LSR r3, r3, #31
@ -237,7 +236,7 @@ RADIX4_BFLY:
@SUB r12,r12,r0,lsl #1 @SUB r12,r12,r0,lsl #1
@LDRD r4,[r12] @r4=x0r, r5=x0i @LDRD r4,[r12] @r4=x0r, r5=x0i
LDR r4, [r12, -r0, lsl #1]! @ LDR r4, [r12, -r0, lsl #1]! @
LDR r5, [r12, #4] LDR r5, [r12, #0x04]
ADD r4, r8, r4 @x0r = x0r + x2r@ ADD r4, r8, r4 @x0r = x0r + x2r@
@ -271,45 +270,45 @@ RADIX4_BFLY:
BNE RADIX4_BFLY BNE RADIX4_BFLY
MOV r0, r0, ASR #3 MOV r0, r0, ASR #3
LDR r1, [sp, #0x2c+4] LDR r1, [sp, #0x48]
LDR r4, [sp, #8+4] LDR r4, [sp, #0x24]
SUB r1, r12, r1, LSL #3 SUB r1, r12, r1, LSL #3
LDR r6, [sp, #0x1c+4] LDR r6, [sp, #0x38]
ADD r12, r1, #8 ADD r12, r1, #8
LDR r7, [sp, #0] LDR r7, [sp, #0x18]
ADD r4, r4, r6 ADD r4, r4, r6
CMP r4, r7 CMP r4, r7
BLE SECOND_LOOP BLE SECOND_LOOP
SECOND_LOOP_2: SECOND_LOOP_2:
LDR r3, [sp, #0x10+4] LDR r3, [sp, #0x2c]
LDR r14, [sp, #0x18+4] LDR r14, [sp, #0x34]
MOV r0, r0, LSL #3 @(del<<1) * 4 MOV r0, r0, LSL #3 @(del<<1) * 4
LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@ LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@
LDR r2, [r3, #4] @w1l = *(twiddles + 2*j + 1)@ LDR r2, [r3, #0x04] @w1l = *(twiddles + 2*j + 1)@
LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@ LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@
LDR r6, [r3, #4] @w2l = *(twiddles + 2*(j<<1) + 1)@ LDR r6, [r3, #0x04] @w2l = *(twiddles + 2*(j<<1) + 1)@
SUB r3, r3, #2048 @ 512 *4 SUB r3, r3, #2048 @ 512 *4
LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@ LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@
LDR r8, [r3, #4] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@ LDR r8, [r3, #0x04] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@
STR r4, [sp, #8+4] STR r4, [sp, #0x24]
STR r1, [sp, #-4] STR r1, [sp, #0x14]
STR r2, [sp, #-8] STR r2, [sp, #0x10]
STR r5, [sp, #-12] STR r5, [sp, #0x0c]
STR r6, [sp, #-16] STR r6, [sp, #0x08]
STR r7, [sp, #-20] STR r7, [sp, #0x04]
STR r8, [sp, #-24] STR r8, [sp]
RADIX4_BFLY_2: RADIX4_BFLY_2:
LDRD r6, [r12, r0]! @r6=x1r, r7=x1i LDRD r6, [r12, r0]! @r6=x1r, r7=x1i
LDRD r8, [r12, r0]! @r8=x2r, r9=x2i LDRD r8, [r12, r0]! @r8=x2r, r9=x2i
LDRD r10, [r12, r0] @r10=x3r, r11=x3i LDRD r10, [r12, r0] @r10=x3r, r11=x3i
SUBS r14, r14, #1 SUBS r14, r14, #1
LDR r1, [sp, #-4] LDR r1, [sp, #0x14]
LDR r2, [sp, #-8] LDR r2, [sp, #0x10]
SMULL r3, r4, r6, r2 @ixheaacd_mult32(x1r,w1l) SMULL r3, r4, r6, r2 @ixheaacd_mult32(x1r,w1l)
LSR r3, r3, #31 LSR r3, r3, #31
@ -326,8 +325,8 @@ RADIX4_BFLY_2:
SUB r7, r7, r6 SUB r7, r7, r6
ADD r6, r4, r5 @ ADD r6, r4, r5 @
LDR r1, [sp, #-12] LDR r1, [sp, #0x0c]
LDR r2, [sp, #-16] LDR r2, [sp, #0x08]
SMULL r3, r4, r8, r2 @ixheaacd_mult32(x2r,w2l) SMULL r3, r4, r8, r2 @ixheaacd_mult32(x2r,w2l)
LSR r3, r3, #31 LSR r3, r3, #31
@ -344,8 +343,8 @@ RADIX4_BFLY_2:
SUB r9, r9, r8 SUB r9, r9, r8
ADD r8, r4, r5 @ ADD r8, r4, r5 @
LDR r1, [sp, #-20] LDR r1, [sp, #0x04]
LDR r2, [sp, #-24] LDR r2, [sp]
SMULL r3, r4, r10, r2 @ixheaacd_mult32(x3r,w3l) SMULL r3, r4, r10, r2 @ixheaacd_mult32(x3r,w3l)
LSR r3, r3, #31 LSR r3, r3, #31
@ -365,7 +364,7 @@ RADIX4_BFLY_2:
@SUB r12,r12,r0,lsl #1 @SUB r12,r12,r0,lsl #1
@LDRD r4,[r12] @r4=x0r, r5=x0i @LDRD r4,[r12] @r4=x0r, r5=x0i
LDR r4, [r12, -r0, lsl #1]! @ LDR r4, [r12, -r0, lsl #1]! @
LDR r5, [r12, #4] LDR r5, [r12, #0x04]
ADD r4, r8, r4 @x0r = x0r + x2r@ ADD r4, r8, r4 @x0r = x0r + x2r@
@ -399,39 +398,39 @@ RADIX4_BFLY_2:
BNE RADIX4_BFLY_2 BNE RADIX4_BFLY_2
MOV r0, r0, ASR #3 MOV r0, r0, ASR #3
LDR r1, [sp, #0x2c+4] LDR r1, [sp, #0x48]
LDR r4, [sp, #8+4] LDR r4, [sp, #0x24]
SUB r1, r12, r1, LSL #3 SUB r1, r12, r1, LSL #3
LDR r6, [sp, #0x1c+4] LDR r6, [sp, #0x38]
ADD r12, r1, #8 ADD r12, r1, #8
LDR r7, [sp, #0x24+4] LDR r7, [sp, #0x40]
ADD r4, r4, r6 ADD r4, r4, r6
CMP r4, r7, ASR #1 CMP r4, r7, ASR #1
BLE SECOND_LOOP_2 BLE SECOND_LOOP_2
LDR r7, [sp, #0] LDR r7, [sp, #0x18]
CMP r4, r7, LSL #1 CMP r4, r7, LSL #1
BGT SECOND_LOOP_4 BGT SECOND_LOOP_4
SECOND_LOOP_3: SECOND_LOOP_3:
LDR r3, [sp, #0x10+4] LDR r3, [sp, #0x2c]
LDR r14, [sp, #0x18+4] LDR r14, [sp, #0x34]
MOV r0, r0, LSL #3 @(del<<1) * 4 MOV r0, r0, LSL #3 @(del<<1) * 4
LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@ LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@
LDR r2, [r3, #4] @w1l = *(twiddles + 2*j + 1)@ LDR r2, [r3, #0x04] @w1l = *(twiddles + 2*j + 1)@
SUB r3, r3, #2048 @ 512 *4 SUB r3, r3, #2048 @ 512 *4
LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@ LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@
LDR r6, [r3, #4] @w2l = *(twiddles + 2*(j<<1) + 1)@ LDR r6, [r3, #0x04] @w2l = *(twiddles + 2*(j<<1) + 1)@
LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@ LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@
LDR r8, [r3, #4] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@ LDR r8, [r3, #0x04] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@
STR r4, [sp, #8+4] STR r4, [sp, #0x24]
STR r1, [sp, #-4] STR r1, [sp, #0x14]
STR r2, [sp, #-8] STR r2, [sp, #0x10]
STR r5, [sp, #-12] STR r5, [sp, #0x0c]
STR r6, [sp, #-16] STR r6, [sp, #0x08]
STR r7, [sp, #-20] STR r7, [sp, #0x04]
STR r8, [sp, #-24] STR r8, [sp]
RADIX4_BFLY_3: RADIX4_BFLY_3:
@ -440,8 +439,8 @@ RADIX4_BFLY_3:
LDRD r10, [r12, r0] @r10=x3r, r11=x3i LDRD r10, [r12, r0] @r10=x3r, r11=x3i
SUBS r14, r14, #1 SUBS r14, r14, #1
LDR r1, [sp, #-4] LDR r1, [sp, #0x14]
LDR r2, [sp, #-8] LDR r2, [sp, #0x10]
SMULL r3, r4, r6, r2 @ixheaacd_mult32(x1r,w1l) SMULL r3, r4, r6, r2 @ixheaacd_mult32(x1r,w1l)
LSR r3, r3, #31 LSR r3, r3, #31
@ -458,8 +457,8 @@ RADIX4_BFLY_3:
SUB r7, r7, r6 SUB r7, r7, r6
ADD r6, r4, r5 @ ADD r6, r4, r5 @
LDR r1, [sp, #-12] LDR r1, [sp, #0x0c]
LDR r2, [sp, #-16] LDR r2, [sp, #0x08]
SMULL r3, r4, r8, r2 @ixheaacd_mult32(x2r,w2l) SMULL r3, r4, r8, r2 @ixheaacd_mult32(x2r,w2l)
LSR r3, r3, #31 LSR r3, r3, #31
@ -476,8 +475,8 @@ RADIX4_BFLY_3:
SUB r8, r8, r9 SUB r8, r8, r9
ADD r9, r5, r4 @ ADD r9, r5, r4 @
LDR r1, [sp, #-20] LDR r1, [sp, #0x04]
LDR r2, [sp, #-24] LDR r2, [sp]
SMULL r3, r4, r10, r2 @ixheaacd_mult32(x3r,w3l) SMULL r3, r4, r10, r2 @ixheaacd_mult32(x3r,w3l)
LSR r3, r3, #31 LSR r3, r3, #31
@ -497,7 +496,7 @@ RADIX4_BFLY_3:
@SUB r12,r12,r0,lsl #1 @SUB r12,r12,r0,lsl #1
@LDRD r4,[r12] @r4=x0r, r5=x0i @LDRD r4,[r12] @r4=x0r, r5=x0i
LDR r4, [r12, -r0, lsl #1]! @ LDR r4, [r12, -r0, lsl #1]! @
LDR r5, [r12, #4] LDR r5, [r12, #0x04]
ADD r4, r8, r4 @x0r = x0r + x2r@ ADD r4, r8, r4 @x0r = x0r + x2r@
@ -531,38 +530,38 @@ RADIX4_BFLY_3:
BNE RADIX4_BFLY_3 BNE RADIX4_BFLY_3
MOV r0, r0, ASR #3 MOV r0, r0, ASR #3
LDR r1, [sp, #0x2c+4] LDR r1, [sp, #0x48]
LDR r4, [sp, #8+4] LDR r4, [sp, #0x24]
SUB r1, r12, r1, LSL #3 SUB r1, r12, r1, LSL #3
LDR r6, [sp, #0x1c+4] LDR r6, [sp, #0x38]
ADD r12, r1, #8 ADD r12, r1, #8
LDR r7, [sp, #0] LDR r7, [sp, #0x18]
ADD r4, r4, r6 ADD r4, r4, r6
CMP r4, r7, LSL #1 CMP r4, r7, LSL #1
BLE SECOND_LOOP_3 BLE SECOND_LOOP_3
SECOND_LOOP_4: SECOND_LOOP_4:
LDR r3, [sp, #0x10+4] LDR r3, [sp, #0x2c]
LDR r14, [sp, #0x18+4] LDR r14, [sp, #0x34]
MOV r0, r0, LSL #3 @(del<<1) * 4 MOV r0, r0, LSL #3 @(del<<1) * 4
LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@ LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@
LDR r2, [r3, #4] @w1l = *(twiddles + 2*j + 1)@ LDR r2, [r3, #0x04] @w1l = *(twiddles + 2*j + 1)@
SUB r3, r3, #2048 @ 512 *4 SUB r3, r3, #2048 @ 512 *4
LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@ LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@
LDR r6, [r3, #4] @w2l = *(twiddles + 2*(j<<1) + 1)@ LDR r6, [r3, #0x04] @w2l = *(twiddles + 2*(j<<1) + 1)@
SUB r3, r3, #2048 @ 512 *4 SUB r3, r3, #2048 @ 512 *4
LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@ LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@
LDR r8, [r3, #4] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@ LDR r8, [r3, #0x04] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@
STR r4, [sp, #8+4] STR r4, [sp, #0x24]
STR r1, [sp, #-4] STR r1, [sp, #0x14]
STR r2, [sp, #-8] STR r2, [sp, #0x10]
STR r5, [sp, #-12] STR r5, [sp, #0x0c]
STR r6, [sp, #-16] STR r6, [sp, #0x08]
STR r7, [sp, #-20] STR r7, [sp, #0x04]
STR r8, [sp, #-24] STR r8, [sp]
RADIX4_BFLY_4: RADIX4_BFLY_4:
LDRD r6, [r12, r0]! @r6=x1r, r7=x1i LDRD r6, [r12, r0]! @r6=x1r, r7=x1i
@ -570,8 +569,8 @@ RADIX4_BFLY_4:
LDRD r10, [r12, r0] @r10=x3r, r11=x3i LDRD r10, [r12, r0] @r10=x3r, r11=x3i
SUBS r14, r14, #1 SUBS r14, r14, #1
LDR r1, [sp, #-4] LDR r1, [sp, #0x14]
LDR r2, [sp, #-8] LDR r2, [sp, #0x10]
SMULL r3, r4, r6, r2 @ixheaacd_mult32(x1r,w1l) SMULL r3, r4, r6, r2 @ixheaacd_mult32(x1r,w1l)
LSR r3, r3, #31 LSR r3, r3, #31
@ -588,8 +587,8 @@ RADIX4_BFLY_4:
SUB r7, r7, r6 SUB r7, r7, r6
ADD r6, r4, r5 @ ADD r6, r4, r5 @
LDR r1, [sp, #-12] LDR r1, [sp, #0x0c]
LDR r2, [sp, #-16] LDR r2, [sp, #0x08]
SMULL r3, r4, r8, r2 @ixheaacd_mult32(x2r,w2l) SMULL r3, r4, r8, r2 @ixheaacd_mult32(x2r,w2l)
LSR r3, r3, #31 LSR r3, r3, #31
@ -606,8 +605,8 @@ RADIX4_BFLY_4:
SUB r8, r8, r9 SUB r8, r8, r9
ADD r9, r5, r4 @ ADD r9, r5, r4 @
LDR r1, [sp, #-20] LDR r1, [sp, #0x04]
LDR r2, [sp, #-24] LDR r2, [sp]
SMULL r3, r4, r10, r2 @ixheaacd_mult32(x3r,w3l) SMULL r3, r4, r10, r2 @ixheaacd_mult32(x3r,w3l)
LSR r3, r3, #31 LSR r3, r3, #31
@ -628,7 +627,7 @@ RADIX4_BFLY_4:
@SUB r12,r12,r0,lsl #1 @SUB r12,r12,r0,lsl #1
@LDRD r4,[r12] @r4=x0r, r5=x0i @LDRD r4,[r12] @r4=x0r, r5=x0i
LDR r4, [r12, -r0, lsl #1]! @ LDR r4, [r12, -r0, lsl #1]! @
LDR r5, [r12, #4] LDR r5, [r12, #0x04]
ADD r4, r8, r4 @x0r = x0r + x2r@ ADD r4, r8, r4 @x0r = x0r + x2r@
@ -662,48 +661,46 @@ RADIX4_BFLY_4:
BNE RADIX4_BFLY_4 BNE RADIX4_BFLY_4
MOV r0, r0, ASR #3 MOV r0, r0, ASR #3
LDR r1, [sp, #0x2c+4] LDR r1, [sp, #0x48]
LDR r4, [sp, #8+4] LDR r4, [sp, #0x24]
SUB r1, r12, r1, LSL #3 SUB r1, r12, r1, LSL #3
LDR r6, [sp, #0x1c+4] LDR r6, [sp, #0x38]
ADD r12, r1, #8 ADD r12, r1, #8
LDR r7, [sp, #0x24+4] LDR r7, [sp, #0x40]
ADD r4, r4, r6 ADD r4, r4, r6
CMP r4, r7 CMP r4, r7
BLT SECOND_LOOP_4 BLT SECOND_LOOP_4
ADD sp, sp, #4
LDR r1, [sp, #0x1c] LDR r1, [sp, #0x38]
MOV r0, r0, LSL #2 MOV r0, r0, LSL #2
MOV r1, r1, ASR #2 MOV r1, r1, ASR #2
STR r1, [sp, #0x1c] STR r1, [sp, #0x38]
LDR r1, [sp, #0x18] LDR r1, [sp, #0x34]
MOV r1, r1, ASR #2 MOV r1, r1, ASR #2
STR r1, [sp, #0x18] STR r1, [sp, #0x34]
LDR r1, [sp, #0x20] LDR r1, [sp, #0x3c]
SUBS r1, r1, #1 SUBS r1, r1, #1
STR r1, [sp, #0x20] STR r1, [sp, #0x3c]
BGT OUTER_LOOP BGT OUTER_LOOP
RADIX2: RADIX2:
LDR r1, [sp, #0x14] LDR r1, [sp, #0x30]
CMP r1, #0 CMP r1, #0
BEQ EXIT BEQ EXIT
LDR r12, [sp, #0x1c] LDR r12, [sp, #0x38]
LDR r1, [sp, #0x28] LDR r1, [sp, #0x44]
CMP r12, #0 CMP r12, #0
LDRNE r12, [sp, #0x1c]
MOVEQ r4, #1 MOVEQ r4, #1
MOVNE r4, r12, LSL #1 MOVNE r4, r12, LSL #1
MOVS r3, r0 MOVS r3, r0
BEQ EXIT BEQ EXIT
MOV r3, r3, ASR #1 MOV r3, r3, ASR #1
LDR r5, [sp, #0x34] LDR r5, [sp, #0x50]
MOV r0, r0, LSL #3 @(del<<1) * 4 MOV r0, r0, LSL #3 @(del<<1) * 4
STR r1, [sp, #-4] STR r1, [sp, #0x18]
RADIX2_BFLY: RADIX2_BFLY:
LDR r1, [sp, #-4] LDR r1, [sp, #0x18]
LDRD r6, [r5] @r6 = x0r LDRD r6, [r5] @r6 = x0r
ADD r5, r5, r0 ADD r5, r5, r0
LDRD r8, [r5] @r8 = x1r LDRD r8, [r5] @r8 = x1r
@ -720,10 +717,10 @@ RADIX2_BFLY:
ORR r10, r1, r10, LSL#1 ORR r10, r1, r10, LSL#1
LDR r1, [sp, #-4] LDR r1, [sp, #0x18]
LDR r2, [r1, #4] LDR r2, [r1, #0x04]
ADD r1, r1, r4, LSL #3 ADD r1, r1, r4, LSL #3
STR r1, [sp, #-4] STR r1, [sp, #0x18]
SMULL r1, r8, r8, r2 @ixheaacd_mult32(x1r,w1l) SMULL r1, r8, r8, r2 @ixheaacd_mult32(x1r,w1l)
LSR r1, r1, #31 LSR r1, r1, #31
@ -750,11 +747,11 @@ RADIX2_BFLY:
BNE RADIX2_BFLY BNE RADIX2_BFLY
LDR r1, [sp, #0x28] LDR r1, [sp, #0x44]
MOV r3, r0, ASR #4 MOV r3, r0, ASR #4
STR r1, [sp, #-4] STR r1, [sp, #0x18]
RADIX2_BFLY_2: RADIX2_BFLY_2:
LDR r1, [sp, #-4] LDR r1, [sp, #0x18]
LDRD r6, [r5] @r6 = x0r LDRD r6, [r5] @r6 = x0r
ADD r5, r5, r0 ADD r5, r5, r0
LDRD r8, [r5] @r8 = x1r LDRD r8, [r5] @r8 = x1r
@ -772,10 +769,10 @@ RADIX2_BFLY_2:
ORR r10, r1, r10, LSL#1 ORR r10, r1, r10, LSL#1
LDR r1, [sp, #-4] LDR r1, [sp, #0x18]
LDR r2, [r1, #4] LDR r2, [r1, #0x04]
ADD r1, r1, r4, LSL #3 ADD r1, r1, r4, LSL #3
STR r1, [sp, #-4] STR r1, [sp, #0x18]
SMULL r1, r8, r8, r2 @ixheaacd_mult32(x1r,w1l) SMULL r1, r8, r8, r2 @ixheaacd_mult32(x1r,w1l)
LSR r1, r1, #31 LSR r1, r1, #31
@ -804,6 +801,6 @@ RADIX2_BFLY_2:
BNE RADIX2_BFLY_2 BNE RADIX2_BFLY_2
EXIT: EXIT:
ADD sp, sp, #0x38 ADD sp, sp, #0x54
LDMFD sp!, {r4-r12, pc} LDMFD sp!, {r4-r12, pc}

View file

@ -6,20 +6,18 @@ ixheaacd_mps_complex_fft_64_asm:
@LDR r4,[sp] @LDR r4,[sp]
STMFD sp!, {r0-r12, lr} STMFD sp!, {r0-r12, lr}
LDR r4, [sp, #0x38] LDR r4, [sp, #0x38]
SUB sp, sp, #0x28 SUB sp, sp, #0x44
@ LDR r4,[sp,#0x30] LDR r0, [sp, #0x48]
LDR r0, [sp, #0x2c]
@LDR r12,[sp,#0x5c+4]
EOR r0, r0, r0, ASR #31 EOR r0, r0, r0, ASR #31
CLZ r0, r0 CLZ r0, r0
SUB r12, r0, #16 @dig_rev_shift = norm32(npoints) + 1 -16@ SUB r12, r0, #16 @dig_rev_shift = norm32(npoints) + 1 -16@
SUB r0, r0, #1 SUB r0, r0, #1
RSB r0, r0, #0x1e RSB r0, r0, #0x1e
AND r1, r0, #1 AND r1, r0, #1
STR r1, [sp, #0x14] STR r1, [sp, #0x30]
MOV r1, r0, ASR #1 MOV r1, r0, ASR #1
LDR r0, [sp, #0x2c] @npoints LDR r0, [sp, #0x48] @npoints
STR r1, [sp, #-4]! STR r1, [sp, #0x18]
MOV lr, r0, LSL #1 @(npoints >>1) * 4 MOV lr, r0, LSL #1 @(npoints >>1) * 4
MOV r0, #0 MOV r0, #0
MOV r12, r4 MOV r12, r4
@ -58,24 +56,24 @@ FIRST_STAGE_R4:
STMIA r3!, {r4-r11} STMIA r3!, {r4-r11}
BLT FIRST_STAGE_R4 BLT FIRST_STAGE_R4
LDR r1, [sp], #4 LDR r1, [sp, #0x18]
LDR r0, [sp, #0x2c] LDR r0, [sp, #0x48]
MOV r12, #0x40 @nodespacing = 64@ MOV r12, #0x40 @nodespacing = 64@
STR r12, [sp, #0x1c] STR r12, [sp, #0x38]
LDR r12, [sp, #0x2c] LDR r12, [sp, #0x48]
SUB r3, r3, r0, LSL #3 SUB r3, r3, r0, LSL #3
SUBS r1, r1, #1 SUBS r1, r1, #1
STR r3, [sp, #0x34] STR r3, [sp, #0x50]
MOV r4, r12, ASR #4 MOV r4, r12, ASR #4
MOV r0, #4 MOV r0, #4
STR r4, [sp, #0x18] STR r4, [sp, #0x34]
STR r1, [sp, #0x20] STR r1, [sp, #0x3c]
BLE EXIT BLE EXIT
OUTER_LOOP: OUTER_LOOP:
LDR r1, [sp, #0x28] LDR r1, [sp, #0x44]
LDR r12, [sp, #0x34] @WORD32 *data = ptr_y@ LDR r12, [sp, #0x50] @WORD32 *data = ptr_y@
STR r1, [sp, #0x10] STR r1, [sp, #0x2c]
LDR r1, [sp, #0x18] LDR r1, [sp, #0x34]
MOV r0, r0, LSL #3 @(del<<1) * 4 MOV r0, r0, LSL #3 @(del<<1) * 4
LOOP_TRIVIAL_TWIDDLE: LOOP_TRIVIAL_TWIDDLE:
@ -129,11 +127,11 @@ LOOP_TRIVIAL_TWIDDLE:
BNE LOOP_TRIVIAL_TWIDDLE BNE LOOP_TRIVIAL_TWIDDLE
MOV r0, r0, ASR #3 MOV r0, r0, ASR #3
LDR r4, [sp, #0x1c] LDR r4, [sp, #0x38]
LDR r3, [sp, #0x34] LDR r3, [sp, #0x50]
MUL r1, r0, r4 MUL r1, r0, r4
ADD r12, r3, #8 ADD r12, r3, #8
STR r1, [sp, #0x24] STR r1, [sp, #0x40]
MOV r3, r1, ASR #2 MOV r3, r1, ASR #2
ADD r3, r3, r1, ASR #3 ADD r3, r3, r1, ASR #3
SUB r3, r3, r1, ASR #4 SUB r3, r3, r1, ASR #4
@ -141,25 +139,25 @@ LOOP_TRIVIAL_TWIDDLE:
SUB r3, r3, r1, ASR #6 SUB r3, r3, r1, ASR #6
ADD r3, r3, r1, ASR #7 ADD r3, r3, r1, ASR #7
SUB r3, r3, r1, ASR #8 SUB r3, r3, r1, ASR #8
STR r3, [sp, #-4]! STR r3, [sp, #0x18]
SECOND_LOOP: SECOND_LOOP:
LDR r3, [sp, #0x10+4] LDR r3, [sp, #0x2c]
LDR r14, [sp, #0x18+4] LDR r14, [sp, #0x34]
MOV r0, r0, LSL #3 @(del<<1) * 4 MOV r0, r0, LSL #3 @(del<<1) * 4
LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@ LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@
LDR r2, [r3, #4] @w1l = *(twiddles + 2*j + 1)@ LDR r2, [r3, #0x04] @w1l = *(twiddles + 2*j + 1)@
LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@ LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@
LDR r6, [r3, #4] @w2l = *(twiddles + 2*(j<<1) + 1)@ LDR r6, [r3, #0x04] @w2l = *(twiddles + 2*(j<<1) + 1)@
LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@ LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@
LDR r8, [r3, #4] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@ LDR r8, [r3, #0x04] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@
STR r4, [sp, #8+4] STR r4, [sp, #0x24]
STR r1, [sp, #-4] STR r1, [sp, #0x14]
STR r2, [sp, #-8] STR r2, [sp, #0x10]
STR r5, [sp, #-12] STR r5, [sp, #0x0c]
STR r6, [sp, #-16] STR r6, [sp, #0x08]
STR r7, [sp, #-20] STR r7, [sp, #0x04]
STR r8, [sp, #-24] STR r8, [sp]
RADIX4_BFLY: RADIX4_BFLY:
@ -168,8 +166,8 @@ RADIX4_BFLY:
LDRD r10, [r12, r0] @r10=x3r, r11=x3i LDRD r10, [r12, r0] @r10=x3r, r11=x3i
SUBS r14, r14, #1 SUBS r14, r14, #1
LDR r1, [sp, #-4] LDR r1, [sp, #0x14]
LDR r2, [sp, #-8] LDR r2, [sp, #0x10]
SMULL r3, r4, r6, r2 @ixheaacd_mult32(x1r,w1l) SMULL r3, r4, r6, r2 @ixheaacd_mult32(x1r,w1l)
LSR r3, r3, #31 LSR r3, r3, #31
@ -186,8 +184,8 @@ RADIX4_BFLY:
ADD r7, r7, r6 ADD r7, r7, r6
SUB r6, r4, r5 @ SUB r6, r4, r5 @
LDR r1, [sp, #-12] LDR r1, [sp, #0x0c]
LDR r2, [sp, #-16] LDR r2, [sp, #0x08]
SMULL r3, r4, r8, r2 @ixheaacd_mult32(x2r,w2l) SMULL r3, r4, r8, r2 @ixheaacd_mult32(x2r,w2l)
LSR r3, r3, #31 LSR r3, r3, #31
@ -204,8 +202,8 @@ RADIX4_BFLY:
ADD r9, r9, r8 ADD r9, r9, r8
SUB r8, r4, r5 @ SUB r8, r4, r5 @
LDR r1, [sp, #-20] LDR r1, [sp, #0x04]
LDR r2, [sp, #-24] LDR r2, [sp]
SMULL r3, r4, r10, r2 @ixheaacd_mult32(x3r,w3l) SMULL r3, r4, r10, r2 @ixheaacd_mult32(x3r,w3l)
LSR r3, r3, #31 LSR r3, r3, #31
@ -225,7 +223,7 @@ RADIX4_BFLY:
@SUB r12,r12,r0,lsl #1 @SUB r12,r12,r0,lsl #1
@LDRD r4,[r12] @r4=x0r, r5=x0i @LDRD r4,[r12] @r4=x0r, r5=x0i
LDR r4, [r12, -r0, lsl #1]! @ LDR r4, [r12, -r0, lsl #1]! @
LDR r5, [r12, #4] LDR r5, [r12, #0x04]
ADD r4, r8, r4 @x0r = x0r + x2r@ ADD r4, r8, r4 @x0r = x0r + x2r@
@ -259,45 +257,45 @@ RADIX4_BFLY:
BNE RADIX4_BFLY BNE RADIX4_BFLY
MOV r0, r0, ASR #3 MOV r0, r0, ASR #3
LDR r1, [sp, #0x2c+4] LDR r1, [sp, #0x48]
LDR r4, [sp, #8+4] LDR r4, [sp, #0x24]
SUB r1, r12, r1, LSL #3 SUB r1, r12, r1, LSL #3
LDR r6, [sp, #0x1c+4] LDR r6, [sp, #0x38]
ADD r12, r1, #8 ADD r12, r1, #8
LDR r7, [sp, #0] LDR r7, [sp, #0x18]
ADD r4, r4, r6 ADD r4, r4, r6
CMP r4, r7 CMP r4, r7
BLE SECOND_LOOP BLE SECOND_LOOP
SECOND_LOOP_2: SECOND_LOOP_2:
LDR r3, [sp, #0x10+4] LDR r3, [sp, #0x2c]
LDR r14, [sp, #0x18+4] LDR r14, [sp, #0x34]
MOV r0, r0, LSL #3 @(del<<1) * 4 MOV r0, r0, LSL #3 @(del<<1) * 4
LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@ LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@
LDR r2, [r3, #4] @w1l = *(twiddles + 2*j + 1)@ LDR r2, [r3, #0x04] @w1l = *(twiddles + 2*j + 1)@
LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@ LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@
LDR r6, [r3, #4] @w2l = *(twiddles + 2*(j<<1) + 1)@ LDR r6, [r3, #0x04] @w2l = *(twiddles + 2*(j<<1) + 1)@
SUB r3, r3, #2048 @ 512 *4 SUB r3, r3, #2048 @ 512 *4
LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@ LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@
LDR r8, [r3, #4] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@ LDR r8, [r3, #0x04] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@
STR r4, [sp, #8+4] STR r4, [sp, #0x24]
STR r1, [sp, #-4] STR r1, [sp, #0x14]
STR r2, [sp, #-8] STR r2, [sp, #0x10]
STR r5, [sp, #-12] STR r5, [sp, #0x0c]
STR r6, [sp, #-16] STR r6, [sp, #0x08]
STR r7, [sp, #-20] STR r7, [sp, #0x04]
STR r8, [sp, #-24] STR r8, [sp]
RADIX4_BFLY_2: RADIX4_BFLY_2:
LDRD r6, [r12, r0]! @r6=x1r, r7=x1i LDRD r6, [r12, r0]! @r6=x1r, r7=x1i
LDRD r8, [r12, r0]! @r8=x2r, r9=x2i LDRD r8, [r12, r0]! @r8=x2r, r9=x2i
LDRD r10, [r12, r0] @r10=x3r, r11=x3i LDRD r10, [r12, r0] @r10=x3r, r11=x3i
SUBS r14, r14, #1 SUBS r14, r14, #1
LDR r1, [sp, #-4] LDR r1, [sp, #0x14]
LDR r2, [sp, #-8] LDR r2, [sp, #0x10]
SMULL r3, r4, r6, r2 @ixheaacd_mult32(x1r,w1l) SMULL r3, r4, r6, r2 @ixheaacd_mult32(x1r,w1l)
LSR r3, r3, #31 LSR r3, r3, #31
@ -314,8 +312,8 @@ RADIX4_BFLY_2:
ADD r7, r7, r6 ADD r7, r7, r6
SUB r6, r4, r5 @ SUB r6, r4, r5 @
LDR r1, [sp, #-12] LDR r1, [sp, #0x0c]
LDR r2, [sp, #-16] LDR r2, [sp, #0x08]
SMULL r3, r4, r8, r2 @ixheaacd_mult32(x2r,w2l) SMULL r3, r4, r8, r2 @ixheaacd_mult32(x2r,w2l)
LSR r3, r3, #31 LSR r3, r3, #31
@ -332,8 +330,8 @@ RADIX4_BFLY_2:
ADD r9, r9, r8 ADD r9, r9, r8
SUB r8, r4, r5 @ SUB r8, r4, r5 @
LDR r1, [sp, #-20] LDR r1, [sp, #0x04]
LDR r2, [sp, #-24] LDR r2, [sp]
SMULL r3, r4, r10, r2 @ixheaacd_mult32(x3r,w3l) SMULL r3, r4, r10, r2 @ixheaacd_mult32(x3r,w3l)
LSR r3, r3, #31 LSR r3, r3, #31
@ -353,7 +351,7 @@ RADIX4_BFLY_2:
@SUB r12,r12,r0,lsl #1 @SUB r12,r12,r0,lsl #1
@LDRD r4,[r12] @r4=x0r, r5=x0i @LDRD r4,[r12] @r4=x0r, r5=x0i
LDR r4, [r12, -r0, lsl #1]! @ LDR r4, [r12, -r0, lsl #1]! @
LDR r5, [r12, #4] LDR r5, [r12, #0x04]
ADD r4, r8, r4 @x0r = x0r + x2r@ ADD r4, r8, r4 @x0r = x0r + x2r@
@ -387,39 +385,39 @@ RADIX4_BFLY_2:
BNE RADIX4_BFLY_2 BNE RADIX4_BFLY_2
MOV r0, r0, ASR #3 MOV r0, r0, ASR #3
LDR r1, [sp, #0x2c+4] LDR r1, [sp, #0x48]
LDR r4, [sp, #8+4] LDR r4, [sp, #0x24]
SUB r1, r12, r1, LSL #3 SUB r1, r12, r1, LSL #3
LDR r6, [sp, #0x1c+4] LDR r6, [sp, #0x38]
ADD r12, r1, #8 ADD r12, r1, #8
LDR r7, [sp, #0x24+4] LDR r7, [sp, #0x40]
ADD r4, r4, r6 ADD r4, r4, r6
CMP r4, r7, ASR #1 CMP r4, r7, ASR #1
BLE SECOND_LOOP_2 BLE SECOND_LOOP_2
LDR r7, [sp, #0] LDR r7, [sp, #0x18]
CMP r4, r7, LSL #1 CMP r4, r7, LSL #1
BGT SECOND_LOOP_4 BGT SECOND_LOOP_4
SECOND_LOOP_3: SECOND_LOOP_3:
LDR r3, [sp, #0x10+4] LDR r3, [sp, #0x2c]
LDR r14, [sp, #0x18+4] LDR r14, [sp, #0x34]
MOV r0, r0, LSL #3 @(del<<1) * 4 MOV r0, r0, LSL #3 @(del<<1) * 4
LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@ LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@
LDR r2, [r3, #4] @w1l = *(twiddles + 2*j + 1)@ LDR r2, [r3, #0x04] @w1l = *(twiddles + 2*j + 1)@
SUB r3, r3, #2048 @ 512 *4 SUB r3, r3, #2048 @ 512 *4
LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@ LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@
LDR r6, [r3, #4] @w2l = *(twiddles + 2*(j<<1) + 1)@ LDR r6, [r3, #0x04] @w2l = *(twiddles + 2*(j<<1) + 1)@
LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@ LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@
LDR r8, [r3, #4] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@ LDR r8, [r3, #0x04] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@
STR r4, [sp, #8+4] STR r4, [sp, #0x24]
STR r1, [sp, #-4] STR r1, [sp, #0x14]
STR r2, [sp, #-8] STR r2, [sp, #0x10]
STR r5, [sp, #-12] STR r5, [sp, #0x0c]
STR r6, [sp, #-16] STR r6, [sp, #0x08]
STR r7, [sp, #-20] STR r7, [sp, #0x04]
STR r8, [sp, #-24] STR r8, [sp]
RADIX4_BFLY_3: RADIX4_BFLY_3:
@ -428,8 +426,8 @@ RADIX4_BFLY_3:
LDRD r10, [r12, r0] @r10=x3r, r11=x3i LDRD r10, [r12, r0] @r10=x3r, r11=x3i
SUBS r14, r14, #1 SUBS r14, r14, #1
LDR r1, [sp, #-4] LDR r1, [sp, #0x14]
LDR r2, [sp, #-8] LDR r2, [sp, #0x10]
SMULL r3, r4, r6, r2 @ixheaacd_mult32(x1r,w1l) SMULL r3, r4, r6, r2 @ixheaacd_mult32(x1r,w1l)
LSR r3, r3, #31 LSR r3, r3, #31
@ -446,8 +444,8 @@ RADIX4_BFLY_3:
ADD r7, r7, r6 ADD r7, r7, r6
SUB r6, r4, r5 @ SUB r6, r4, r5 @
LDR r1, [sp, #-12] LDR r1, [sp, #0x0c]
LDR r2, [sp, #-16] LDR r2, [sp, #0x08]
SMULL r3, r4, r8, r2 @ixheaacd_mult32(x2r,w2l) SMULL r3, r4, r8, r2 @ixheaacd_mult32(x2r,w2l)
LSR r3, r3, #31 LSR r3, r3, #31
@ -464,8 +462,8 @@ RADIX4_BFLY_3:
ADD r8, r9, r8 ADD r8, r9, r8
SUB r9, r5, r4 @ SUB r9, r5, r4 @
LDR r1, [sp, #-20] LDR r1, [sp, #0x04]
LDR r2, [sp, #-24] LDR r2, [sp]
SMULL r3, r4, r10, r2 @ixheaacd_mult32(x3r,w3l) SMULL r3, r4, r10, r2 @ixheaacd_mult32(x3r,w3l)
LSR r3, r3, #31 LSR r3, r3, #31
@ -485,7 +483,7 @@ RADIX4_BFLY_3:
@SUB r12,r12,r0,lsl #1 @SUB r12,r12,r0,lsl #1
@LDRD r4,[r12] @r4=x0r, r5=x0i @LDRD r4,[r12] @r4=x0r, r5=x0i
LDR r4, [r12, -r0, lsl #1]! @ LDR r4, [r12, -r0, lsl #1]! @
LDR r5, [r12, #4] LDR r5, [r12, #0x04]
ADD r4, r8, r4 @x0r = x0r + x2r@ ADD r4, r8, r4 @x0r = x0r + x2r@
@ -519,38 +517,38 @@ RADIX4_BFLY_3:
BNE RADIX4_BFLY_3 BNE RADIX4_BFLY_3
MOV r0, r0, ASR #3 MOV r0, r0, ASR #3
LDR r1, [sp, #0x2c+4] LDR r1, [sp, #0x48]
LDR r4, [sp, #8+4] LDR r4, [sp, #0x24]
SUB r1, r12, r1, LSL #3 SUB r1, r12, r1, LSL #3
LDR r6, [sp, #0x1c+4] LDR r6, [sp, #0x38]
ADD r12, r1, #8 ADD r12, r1, #8
LDR r7, [sp, #0] LDR r7, [sp, #0x18]
ADD r4, r4, r6 ADD r4, r4, r6
CMP r4, r7, LSL #1 CMP r4, r7, LSL #1
BLE SECOND_LOOP_3 BLE SECOND_LOOP_3
SECOND_LOOP_4: SECOND_LOOP_4:
LDR r3, [sp, #0x10+4] LDR r3, [sp, #0x2c]
LDR r14, [sp, #0x18+4] LDR r14, [sp, #0x34]
MOV r0, r0, LSL #3 @(del<<1) * 4 MOV r0, r0, LSL #3 @(del<<1) * 4
LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@ LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@
LDR r2, [r3, #4] @w1l = *(twiddles + 2*j + 1)@ LDR r2, [r3, #0x04] @w1l = *(twiddles + 2*j + 1)@
SUB r3, r3, #2048 @ 512 *4 SUB r3, r3, #2048 @ 512 *4
LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@ LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@
LDR r6, [r3, #4] @w2l = *(twiddles + 2*(j<<1) + 1)@ LDR r6, [r3, #0x04] @w2l = *(twiddles + 2*(j<<1) + 1)@
SUB r3, r3, #2048 @ 512 *4 SUB r3, r3, #2048 @ 512 *4
LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@ LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@
LDR r8, [r3, #4] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@ LDR r8, [r3, #0x04] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@
STR r4, [sp, #8+4] STR r4, [sp, #0x24]
STR r1, [sp, #-4] STR r1, [sp, #0x14]
STR r2, [sp, #-8] STR r2, [sp, #0x10]
STR r5, [sp, #-12] STR r5, [sp, #0x0c]
STR r6, [sp, #-16] STR r6, [sp, #0x08]
STR r7, [sp, #-20] STR r7, [sp, #0x04]
STR r8, [sp, #-24] STR r8, [sp]
RADIX4_BFLY_4: RADIX4_BFLY_4:
LDRD r6, [r12, r0]! @r6=x1r, r7=x1i LDRD r6, [r12, r0]! @r6=x1r, r7=x1i
@ -558,8 +556,8 @@ RADIX4_BFLY_4:
LDRD r10, [r12, r0] @r10=x3r, r11=x3i LDRD r10, [r12, r0] @r10=x3r, r11=x3i
SUBS r14, r14, #1 SUBS r14, r14, #1
LDR r1, [sp, #-4] LDR r1, [sp, #0x14]
LDR r2, [sp, #-8] LDR r2, [sp, #0x10]
SMULL r3, r4, r6, r2 @ixheaacd_mult32(x1r,w1l) SMULL r3, r4, r6, r2 @ixheaacd_mult32(x1r,w1l)
LSR r3, r3, #31 LSR r3, r3, #31
@ -576,8 +574,8 @@ RADIX4_BFLY_4:
ADD r7, r7, r6 ADD r7, r7, r6
SUB r6, r4, r5 @ SUB r6, r4, r5 @
LDR r1, [sp, #-12] LDR r1, [sp, #0x0c]
LDR r2, [sp, #-16] LDR r2, [sp, #0x08]
SMULL r3, r4, r8, r2 @ixheaacd_mult32(x2r,w2l) SMULL r3, r4, r8, r2 @ixheaacd_mult32(x2r,w2l)
LSR r3, r3, #31 LSR r3, r3, #31
@ -594,8 +592,8 @@ RADIX4_BFLY_4:
ADD r8, r9, r8 ADD r8, r9, r8
SUB r9, r5, r4 @ SUB r9, r5, r4 @
LDR r1, [sp, #-20] LDR r1, [sp, #0x04]
LDR r2, [sp, #-24] LDR r2, [sp]
SMULL r3, r4, r10, r2 @ixheaacd_mult32(x3r,w3l) SMULL r3, r4, r10, r2 @ixheaacd_mult32(x3r,w3l)
LSR r3, r3, #31 LSR r3, r3, #31
@ -615,7 +613,7 @@ RADIX4_BFLY_4:
@SUB r12,r12,r0,lsl #1 @SUB r12,r12,r0,lsl #1
@LDRD r4,[r12] @r4=x0r, r5=x0i @LDRD r4,[r12] @r4=x0r, r5=x0i
LDR r4, [r12, -r0, lsl #1]! @ LDR r4, [r12, -r0, lsl #1]! @
LDR r5, [r12, #4] LDR r5, [r12, #0x04]
ADD r4, r8, r4 @x0r = x0r + x2r@ ADD r4, r8, r4 @x0r = x0r + x2r@
@ -648,47 +646,45 @@ RADIX4_BFLY_4:
BNE RADIX4_BFLY_4 BNE RADIX4_BFLY_4
MOV r0, r0, ASR #3 MOV r0, r0, ASR #3
LDR r1, [sp, #0x2c+4] LDR r1, [sp, #0x48]
LDR r4, [sp, #8+4] LDR r4, [sp, #0x24]
SUB r1, r12, r1, LSL #3 SUB r1, r12, r1, LSL #3
LDR r6, [sp, #0x1c+4] LDR r6, [sp, #0x38]
ADD r12, r1, #8 ADD r12, r1, #8
LDR r7, [sp, #0x24+4] LDR r7, [sp, #0x40]
ADD r4, r4, r6 ADD r4, r4, r6
CMP r4, r7 CMP r4, r7
BLT SECOND_LOOP_4 BLT SECOND_LOOP_4
ADD sp, sp, #4
LDR r1, [sp, #0x1c] LDR r1, [sp, #0x38]
MOV r0, r0, LSL #2 MOV r0, r0, LSL #2
MOV r1, r1, ASR #2 MOV r1, r1, ASR #2
STR r1, [sp, #0x1c] STR r1, [sp, #0x38]
LDR r1, [sp, #0x18] LDR r1, [sp, #0x34]
MOV r1, r1, ASR #2 MOV r1, r1, ASR #2
STR r1, [sp, #0x18] STR r1, [sp, #0x34]
LDR r1, [sp, #0x20] LDR r1, [sp, #0x3c]
SUBS r1, r1, #1 SUBS r1, r1, #1
STR r1, [sp, #0x20] STR r1, [sp, #0x3c]
BGT OUTER_LOOP BGT OUTER_LOOP
LDR r1, [sp, #0x14] LDR r1, [sp, #0x30]
CMP r1, #0 CMP r1, #0
BEQ EXIT BEQ EXIT
LDR r12, [sp, #0x1c] LDR r12, [sp, #0x38]
LDR r1, [sp, #0x28] LDR r1, [sp, #0x44]
CMP r12, #0 CMP r12, #0
LDRNE r12, [sp, #0x1c]
MOVEQ r4, #1 MOVEQ r4, #1
MOVNE r4, r12, LSL #1 MOVNE r4, r12, LSL #1
MOVS r3, r0 MOVS r3, r0
BEQ EXIT BEQ EXIT
MOV r3, r3, ASR #1 MOV r3, r3, ASR #1
LDR r5, [sp, #0x34] LDR r5, [sp, #0x50]
MOV r0, r0, LSL #3 @(del<<1) * 4 MOV r0, r0, LSL #3 @(del<<1) * 4
STR r1, [sp, #-4] STR r1, [sp, #0x18]
EXIT: EXIT:
ADD sp, sp, #0x38 ADD sp, sp, #0x54
LDMFD sp!, {r4-r12, pc} LDMFD sp!, {r4-r12, pc}

View file

@ -646,6 +646,8 @@ WORD32 impd_parse_filt_block(ia_bit_buf_struct* it_bit_buff,
if (it_bit_buff->error) return it_bit_buff->error; if (it_bit_buff->error) return it_bit_buff->error;
str_filter_element->filt_ele_idx = (temp & 0x7E) >> 1; str_filter_element->filt_ele_idx = (temp & 0x7E) >> 1;
if (str_filter_element->filt_ele_idx >= FILTER_ELEMENT_COUNT_MAX)
return (UNEXPECTED_ERROR);
str_filter_element->filt_ele_gain_flag = temp & 1; str_filter_element->filt_ele_gain_flag = temp & 1;
; ;
@ -1029,10 +1031,17 @@ WORD32 impd_parser_td_filter_cascade(
str_filter_block_refs->filter_block_count = str_filter_block_refs->filter_block_count =
impd_read_bits_buf(it_bit_buff, 4); impd_read_bits_buf(it_bit_buff, 4);
if (it_bit_buff->error) return it_bit_buff->error; if (it_bit_buff->error) return it_bit_buff->error;
if (str_filter_block_refs->filter_block_count > EQ_FILTER_BLOCK_COUNT_MAX) {
return (UNEXPECTED_ERROR);
}
for (ii = 0; ii < str_filter_block_refs->filter_block_count; ii++) { for (ii = 0; ii < str_filter_block_refs->filter_block_count; ii++) {
str_filter_block_refs->filter_block_index[ii] = str_filter_block_refs->filter_block_index[ii] =
impd_read_bits_buf(it_bit_buff, 7); impd_read_bits_buf(it_bit_buff, 7);
if (it_bit_buff->error) return it_bit_buff->error; if (it_bit_buff->error) return it_bit_buff->error;
if (str_filter_block_refs->filter_block_index[ii] >=
FILTER_BLOCK_COUNT_MAX)
return (UNEXPECTED_ERROR);
} }
str_filter_block_refs++; str_filter_block_refs++;
} }

View file

@ -1084,6 +1084,8 @@ WORD32 ixheaacd_mps_header_decode(ia_mps_dec_state_struct *self) {
} }
} }
if (self->num_bands_ipd > MAX_PARAMETER_BANDS) return -1;
self->dir_sig_count = 1; self->dir_sig_count = 1;
self->decor_sig_count = 1; self->decor_sig_count = 1;