summaryrefslogtreecommitdiff
path: root/ac3dec/imdct512_kni.S
diff options
context:
space:
mode:
authorKlaus Schmidinger <kls (at) cadsoft (dot) de>2002-02-24 18:00:00 +0100
committerKlaus Schmidinger <kls (at) cadsoft (dot) de>2002-02-24 18:00:00 +0100
commitfb8e7fa302ef8a73feb6958b0cb32cc54f76e677 (patch)
tree74cf9dd5cd1892aaf1c05db32796eb356d7bc860 /ac3dec/imdct512_kni.S
parenta1da0e5c5de55009716e2c327dda16c61c1dae83 (diff)
downloadvdr-patch-lnbsharing-fb8e7fa302ef8a73feb6958b0cb32cc54f76e677.tar.gz
vdr-patch-lnbsharing-fb8e7fa302ef8a73feb6958b0cb32cc54f76e677.tar.bz2
Version 1.0.0pre1vdr-1.0.0pre1
- Added scanning for EPG data for another 4 days on channels that support this (thanks to Oleg Assovski). - Removed '#define VFAT 1' from recording.c (was a leftover from testing). - Fixed the "Low disk space!" message (thanks to Sergei Haller). - Added the TPID to Hessen-3 in 'channels.conf' (thanks to Sergei Haller). - Fixed a crash when replaying with DEBUG_OSD=1 (thanks to Stefan Huelswitt). - Implemented the "First day" parameter for repeating timers. See FORMATS for information about the enhanced 'timers.conf' file format, and MANUAL for a description of the new item in the "Edit Timer" menu and the enhanced functionality of the "Blue" button in the "Timers" menu. - When deleting a recording that is currently still being recorded, the related timer will now automatically be terminated. If this is a repeating timer, it will receive a "First day" setting that skips the timer for this day. - Fixed closing all unused file descriptors when opening a pipe (thanks to Werner Fink). - Instant recordings now take the EPG data from the point in time at 5 minutes from the start time of the recording. In order for this to work the 'active' parameter of a timer now uses the second bit to indicate that this is an "instant" recording (see FORMATS for details). - Fixed the SVDRP GRAB command in case the video device can't be opened (thanks to Adrian Stabiszewski). - At startup the data written into 'epg.data' is now read into the EPG data structures. In order for this to work, the 'E' record has been extended to (optionally) contain the 'table ID' (see FORMATS for details). - The new SVDRP command PUTE can be used to put EPG data into the EPG list. See FORMATS for details about the required data format. - Taking the German umlauts 'as is' when compiled with VFAT. - The new Setup parameter RecordDolbyDigital can be used to generally turn off recording the Dolby Digital audio channels in case you want to save disk space or don't have the equipment to replay Dolby Digital audio. - Reading the 'setup.conf' file no longer terminates in case of an error, but rather attempts to read the rest of the file. - Removed DVD support from the core VDR source, since the current version from Andreas Schultz is already much further developed (DVD menu navigation) and the concept of "additional players" in VDR is going to change in version 1.1.0, where a new "plugin" interface shall allow the easy implementation of new players without having to patch the core VDR source. Until then, Andreas has agreed to provide his DVD support as a completely external patch. - The contents of the distribution archive now contains the directory name with the current version number, as in 'vdr-1.0.0pre1/...' in order to avoid inadvertently overwriting an existing VDR directory with a new version. - Added a missing error message in SVDRP command LSTC in case the given channel can't be found.
Diffstat (limited to 'ac3dec/imdct512_kni.S')
-rw-r--r--ac3dec/imdct512_kni.S548
1 files changed, 0 insertions, 548 deletions
diff --git a/ac3dec/imdct512_kni.S b/ac3dec/imdct512_kni.S
deleted file mode 100644
index 10b8de6..0000000
--- a/ac3dec/imdct512_kni.S
+++ /dev/null
@@ -1,548 +0,0 @@
-/*
- * imdct512_kni.S
- *
- * Copyright (C) Yuqing Deng <Yuqing_Deng@brown.edu> - October 2000
- *
- *
- * imdct512_kni.S is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * imdct512_kni.S is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with GNU Make; see the file COPYING. If not, write to
- * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- */
-
-#ifdef __i386__
-
-.text
- .align 4
-.global imdct512_pre_ifft_twiddle_kni
- .type imdct512_pre_ifft_twiddle_kni, @function
-imdct512_pre_ifft_twiddle_kni:
-
- pushl %ebp
- movl %esp, %ebp
- addl $-4, %esp /* local variable, loop counter */
-
- pushl %eax
- pushl %ebx
- pushl %ecx
- pushl %edx
- pushl %edi
- pushl %esi
-
- movl 8(%ebp), %eax /* pmt */
- movl 12(%ebp), %ebx /* buf */
- movl 16(%ebp), %ecx /* data */
- movl 20(%ebp), %edx /* xcos_sin_sse */
- movl $64, -4(%ebp)
-
-
-.loop:
- movl (%eax), %esi
- movl 4(%eax), %edi
- movss (%ecx, %esi, 8), %xmm1 /* 2j */
- movss (%ecx, %edi, 8), %xmm3 /* 2(j+1) */
-
- shll $1, %esi
- shll $1, %edi
-
- movaps (%edx, %esi, 8), %xmm0; /* -c_j | -s_j | -s_j | c_j */
- movaps (%edx, %edi, 8), %xmm2; /* -c_j+1 | -s_j+1 | -s_j+1 | c_j+1 */
-
- negl %esi
- negl %edi
-
- movss 1020(%ecx, %esi, 4), %xmm4 /* 255-2j */
- addl $8, %eax
- movss 1020(%ecx, %edi, 4), %xmm5 /* 255-2(j+1) */
-
- shufps $0, %xmm1, %xmm4 /* 2j | 2j | 255-2j | 255-2j */
- shufps $0, %xmm3, %xmm5 /* 2(j+1) | 2(j+1) | 255-2(j+1) | 255-2(j+1) */
- mulps %xmm4, %xmm0
- mulps %xmm5, %xmm2
- movhlps %xmm0, %xmm1
- movhlps %xmm2, %xmm3
- addl $16, %ebx
- addps %xmm1, %xmm0
- addps %xmm3, %xmm2
- movlhps %xmm2, %xmm0
- movaps %xmm0, -16(%ebx)
- decl -4(%ebp)
- jnz .loop
-
- popl %esi
- popl %edi
- popl %edx
- popl %ecx
- popl %ebx
- popl %eax
-
- addl $4, %esp
- popl %ebp
-
- ret
- .p2align 4,0
-
-.global imdct512_post_ifft_twiddle_kni
- .type imdct512_post_ifft_twiddle_kni, @function
-imdct512_post_ifft_twiddle_kni:
-
- pushl %ebp
- movl %esp, %ebp
-
- pushl %eax
- pushl %ebx
- pushl %ecx
-
- movl 8(%ebp), %eax /* buf[] */
- movl 12(%ebp), %ebx /* xcos_sin_sse[] */
- movl $32, %ecx /* loop counter */
-
-.loop1:
- movaps (%eax), %xmm0 /* im1 | re1 | im0 | re0 */
-
- movaps (%ebx), %xmm2 /* -c | -s | -s | c */
- movhlps %xmm0, %xmm1 /* im1 | re1 */
- movaps 16(%ebx), %xmm3 /* -c1 | -s1 | -s1 | c1 */
-
- shufps $0x50, %xmm0, %xmm0 /* im0 | im0 | re0 | re0 */
- shufps $0x50, %xmm1, %xmm1 /* im1 | im1 | re1 | re1 */
-
- movaps 16(%eax), %xmm4 /* im3 | re3 | im2 | re2 */
-
- shufps $0x27, %xmm2, %xmm2 /* c | -s | -s | -c */
- movhlps %xmm4, %xmm5 /* im3 | re3 */
- shufps $0x27, %xmm3, %xmm3 /* c1 | -s1 | -s1 | -c1 */
-
- movaps 32(%ebx), %xmm6 /* -c2 | -s2 | -s2 | c2 */
- movaps 48(%ebx), %xmm7 /* -c3 | -s3 | -s3 | c3 */
-
- shufps $0x50, %xmm4, %xmm4 /* im2 | im2 | re2 | re2 */
- shufps $0x50, %xmm5, %xmm5 /* im3 | im3 | re3 | re3 */
-
- mulps %xmm2, %xmm0
- mulps %xmm3, %xmm1
-
- shufps $0x27, %xmm6, %xmm6 /* c2 | -s2 | -s2 | -c2 */
- shufps $0x27, %xmm7, %xmm7 /* c3 | -s3 | -s3 | -c3 */
-
- movhlps %xmm0, %xmm2
- movhlps %xmm1, %xmm3
-
- mulps %xmm6, %xmm4
- mulps %xmm7, %xmm5
-
- addps %xmm2, %xmm0
- addps %xmm3, %xmm1
-
- movhlps %xmm4, %xmm6
- movhlps %xmm5, %xmm7
-
- addps %xmm6, %xmm4
- addps %xmm7, %xmm5
-
- movlhps %xmm1, %xmm0
- movlhps %xmm5, %xmm4
-
- movaps %xmm0, (%eax)
- movaps %xmm4, 16(%eax)
- addl $64, %ebx
- addl $32, %eax
- decl %ecx
- jnz .loop1
-
- popl %ecx
- popl %ebx
- popl %eax
-
- leave
- ret
- .p2align 4,0
-
-.global imdct512_window_delay_kni
- .type imdct512_window_delay_kni, @function
-imdct512_window_delay_kni:
-
- pushl %ebp
- movl %esp, %ebp
-
- pushl %eax
- pushl %ebx
- pushl %ecx
- pushl %edx
- pushl %esi
- pushl %edi
-
- movl 20(%ebp), %ebx /* delay */
- movl 16(%ebp), %edx /* window */
-
- movl 8(%ebp), %eax /* buf */
- movl $16, %ecx /* loop count */
- leal 516(%eax), %esi /* buf[64].im */
- leal 504(%eax), %edi /* buf[63].re */
- movl 12(%ebp), %eax /* data */
-.first_128_samples:
-
- movss (%esi), %xmm0
- movss 8(%esi), %xmm2
- movss (%edi), %xmm1
- movss -8(%edi), %xmm3
-
- movlhps %xmm2, %xmm0 /* 0.0 | im1 | 0.0 | im0 */
- movlhps %xmm3, %xmm1 /* 0.0 | re1 | 0.0 | re0 */
-
- movaps (%edx), %xmm4 /* w3 | w2 | w1 | w0 */
- movaps (%ebx), %xmm5 /* d3 | d2 | d1 | d0 */
- shufps $0xb1, %xmm1, %xmm1 /* re1 | 0.0 | re0 | 0.0 */
-
- movss 16(%esi), %xmm6 /* im2 */
- movss 24(%esi), %xmm7 /* im3 */
- subps %xmm1, %xmm0 /* -re1 | im1 | -re0 | im0 */
- movss -16(%edi), %xmm2 /* re2 */
- movss -24(%edi), %xmm3 /* re3 */
- mulps %xmm4, %xmm0
- movlhps %xmm7, %xmm6 /* 0.0 | im3 | 0.0 | im2 */
- movlhps %xmm3, %xmm2 /* 0.0 | re3 | 0.0 | re2 */
- addps %xmm5, %xmm0
- shufps $0xb1, %xmm2, %xmm2 /* re3 | 0.0 | re2 | 0.0 */
- movaps 16(%edx), %xmm4 /* w7 | w6 | w5 | w4 */
- movaps 16(%ebx), %xmm5 /* d7 | d6 | d5 | d4 */
- subps %xmm2, %xmm6 /* -re3 | im3 | -re2 | im2 */
- addl $32, %edx
- movaps %xmm0, (%eax)
- addl $32, %ebx
- mulps %xmm4, %xmm6
- addl $32, %esi
- addl $32, %eax
- addps %xmm5, %xmm6
- addl $-32, %edi
- movaps %xmm6, -16(%eax)
- decl %ecx
- jnz .first_128_samples
-
- movl 8(%ebp), %esi /* buf[0].re */
- leal 1020(%esi), %edi /* buf[127].im */
- movl $16, %ecx /* loop count */
-.second_128_samples:
-
- movss (%esi), %xmm0 /* buf[i].re */
- movss 8(%esi), %xmm2 /* re1 */
- movss (%edi), %xmm1 /* buf[127-i].im */
- movss -8(%edi), %xmm3 /* im1 */
-
- movlhps %xmm2, %xmm0 /* 0.0 | re1 | 0.0 | re0 */
- movlhps %xmm3, %xmm1 /* 0.0 | im1 | 0.0 | im1 */
-
- movaps (%edx), %xmm4 /* w3 | w2 | w1 | w0 */
- movaps (%ebx), %xmm5 /* d3 | d2 | d1 | d0 */
-
- shufps $0xb1, %xmm1, %xmm1 /* im1 | 0.0 | im0 | 0.0 */
- movss 16(%esi), %xmm6 /* re2 */
- movss 24(%esi), %xmm7 /* re3 */
- movss -16(%edi), %xmm2 /* im2 */
- movss -24(%edi), %xmm3 /* im3 */
- subps %xmm1, %xmm0 /* -im1 | re1 | -im0 | re0 */
- movlhps %xmm7, %xmm6 /* 0.0 | re3 | 0.0 | re2 */
- movlhps %xmm3, %xmm2 /* 0.0 | im3 | 0.0 | im2 */
- mulps %xmm4, %xmm0
- shufps $0xb1, %xmm2, %xmm2 /* im3 | 0.0 | im2 | 0.0 */
- movaps 16(%edx), %xmm4 /* w7 | w6 | w5 | w4 */
- addl $32, %esi
- subps %xmm2, %xmm6 /* -im3 | re3 | -im2 | re2 */
- addps %xmm5, %xmm0
- mulps %xmm4, %xmm6
- addl $-32, %edi
- movaps 16(%ebx), %xmm5 /* d7 | d6 | d5 | d4 */
- movaps %xmm0, (%eax)
- addps %xmm5, %xmm6
- addl $32, %edx
- addl $32, %eax
- addl $32, %ebx
- movaps %xmm6, -16(%eax)
- decl %ecx
- jnz .second_128_samples
-
- movl 8(%ebp), %eax
- leal 512(%eax), %esi /* buf[64].re */
- leal 508(%eax), %edi /* buf[63].im */
- movl $16, %ecx /* loop count */
- movl 20(%ebp), %eax /* delay */
-.first_128_delay:
-
- movss (%esi), %xmm0
- movss 8(%esi), %xmm2
- movss (%edi), %xmm1
- movss -8(%edi), %xmm3
-
- movlhps %xmm2, %xmm0 /* 0.0 | re1 | 0.0 | re0 */
- movlhps %xmm3, %xmm1 /* 0.0 | im1 | 0.0 | im0 */
-
- movaps -16(%edx), %xmm4 /* w3 | w2 | w1 | w0 */
- shufps $0xb1, %xmm1, %xmm1 /* im1 | 0.0 | im0 | 0.0 */
- movss 16(%esi), %xmm6 /* re2 */
- movss 24(%esi), %xmm7 /* re3 */
- movss -16(%edi), %xmm2 /* im2 */
- movss -24(%edi), %xmm3 /* im3 */
- subps %xmm1, %xmm0 /* -im1 | re1 | -im0 | re0 */
- addl $-32, %edx
- movlhps %xmm7, %xmm6 /* 0.0 | re3 | 0.0 | re2 */
- movlhps %xmm3, %xmm2 /* 0.0 | im3 | 0.0 | im2 */
- mulps %xmm4, %xmm0
- movaps (%edx), %xmm5 /* w7 | w6 | w5 | w4 */
- shufps $0xb1, %xmm2, %xmm2 /* im3 | 0.0 | im2 | 0.0 */
- movaps %xmm0, (%eax)
- addl $32, %esi
- subps %xmm2, %xmm6 /* -im3 | re3 | -im2 | re2 */
- addl $-32, %edi
- mulps %xmm5, %xmm6
- addl $32, %eax
- movaps %xmm6, -16(%eax)
- decl %ecx
- jnz .first_128_delay
-
- movl 8(%ebp), %ebx
- leal 4(%ebx), %esi /* buf[0].im */
- leal 1016(%ebx), %edi /* buf[127].re */
- movl $16, %ecx /* loop count */
-.second_128_delay:
-
- movss (%esi), %xmm0
- movss 8(%esi), %xmm2
- movss (%edi), %xmm1
- movss -8(%edi), %xmm3
-
- movlhps %xmm2, %xmm0 /* 0.0 | im1 | 0.0 | im0 */
- movlhps %xmm3, %xmm1 /* 0.0 | re1 | 0.0 | re0 */
-
- movaps -16(%edx), %xmm4 /* w3 | w2 | w1 | w0 */
- shufps $0xb1, %xmm1, %xmm1 /* re1 | 0.0 | re0 | 0.0 */
- movss 16(%esi), %xmm6 /* im2 */
- movss 24(%esi), %xmm7 /* im3 */
- movss -16(%edi), %xmm2 /* re2 */
- movss -24(%edi), %xmm3 /* re3 */
- subps %xmm0, %xmm1 /* re1 | -im1 | re0 | -im0 */
- addl $-32, %edx
- movlhps %xmm7, %xmm6 /* 0.0 | im3 | 0.0 | im2 */
- movlhps %xmm3, %xmm2 /* 0.0 | re3 | 0.0 | re2 */
- mulps %xmm4, %xmm1
- movaps (%edx), %xmm5 /* w7 | w6 | w5 | w4 */
- shufps $0xb1, %xmm2, %xmm2 /* re3 | 0.0 | re2 | 0.0 */
- movaps %xmm1, (%eax)
- addl $32, %esi
- subps %xmm6, %xmm2 /* re | -im3 | re | -im2 */
- addl $-32, %edi
- mulps %xmm5, %xmm2
- addl $32, %eax
- movaps %xmm2, -16(%eax)
- decl %ecx
- jnz .second_128_delay
-
- popl %edi
- popl %esi
- popl %edx
- popl %ecx
- popl %ebx
- popl %eax
-
- leave
- ret
- .p2align 4,0
-
-.global imdct512_window_delay_nol_kni
- .type imdct512_window_delay_nol_kni, @function
-imdct512_window_delay_nol_kni:
-
- pushl %ebp
- movl %esp, %ebp
-
- pushl %eax
- pushl %ebx
- pushl %ecx
- pushl %edx
- pushl %esi
- pushl %edi
-
- /* movl 20(%ebp), %ebx delay */
- movl 16(%ebp), %edx /* window */
-
- movl 8(%ebp), %eax /* buf */
- movl $16, %ecx /* loop count */
- leal 516(%eax), %esi /* buf[64].im */
- leal 504(%eax), %edi /* buf[63].re */
- movl 12(%ebp), %eax /* data */
-.first_128_sample:
-
- movss (%esi), %xmm0
- movss 8(%esi), %xmm2
- movss (%edi), %xmm1
- movss -8(%edi), %xmm3
-
- movlhps %xmm2, %xmm0 /* 0.0 | im1 | 0.0 | im0 */
- movlhps %xmm3, %xmm1 /* 0.0 | re1 | 0.0 | re0 */
-
- movaps (%edx), %xmm4 /* w3 | w2 | w1 | w0 */
- /* movaps (%ebx), %xmm5 d3 | d2 | d1 | d0 */
- shufps $0xb1, %xmm1, %xmm1 /* re1 | 0.0 | re0 | 0.0 */
-
- movss 16(%esi), %xmm6 /* im2 */
- movss 24(%esi), %xmm7 /* im3 */
- subps %xmm1, %xmm0 /* -re1 | im1 | -re0 | im0 */
- movss -16(%edi), %xmm2 /* re2 */
- movss -24(%edi), %xmm3 /* re3 */
- mulps %xmm4, %xmm0
- movlhps %xmm7, %xmm6 /* 0.0 | im3 | 0.0 | im2 */
- movlhps %xmm3, %xmm2 /* 0.0 | re3 | 0.0 | re2 */
- /* addps %xmm5, %xmm0 */
- shufps $0xb1, %xmm2, %xmm2 /* re3 | 0.0 | re2 | 0.0 */
- movaps 16(%edx), %xmm4 /* w7 | w6 | w5 | w4 */
- /* movaps 16(%ebx), %xmm5 d7 | d6 | d5 | d4 */
- subps %xmm2, %xmm6 /* -re3 | im3 | -re2 | im2 */
- addl $32, %edx
- movaps %xmm0, (%eax)
- /* addl $32, %ebx */
- mulps %xmm4, %xmm6
- addl $32, %esi
- addl $32, %eax
- /* addps %xmm5, %xmm6 */
- addl $-32, %edi
- movaps %xmm6, -16(%eax)
- decl %ecx
- jnz .first_128_sample
-
- movl 8(%ebp), %esi /* buf[0].re */
- leal 1020(%esi), %edi /* buf[127].im */
- movl $16, %ecx /* loop count */
-.second_128_sample:
-
- movss (%esi), %xmm0 /* buf[i].re */
- movss 8(%esi), %xmm2 /* re1 */
- movss (%edi), %xmm1 /* buf[127-i].im */
- movss -8(%edi), %xmm3 /* im1 */
-
- movlhps %xmm2, %xmm0 /* 0.0 | re1 | 0.0 | re0 */
- movlhps %xmm3, %xmm1 /* 0.0 | im1 | 0.0 | im1 */
-
- movaps (%edx), %xmm4 /* w3 | w2 | w1 | w0 */
- /* movaps (%ebx), %xmm5 d3 | d2 | d1 | d0 */
-
- shufps $0xb1, %xmm1, %xmm1 /* im1 | 0.0 | im0 | 0.0 */
- movss 16(%esi), %xmm6 /* re2 */
- movss 24(%esi), %xmm7 /* re3 */
- movss -16(%edi), %xmm2 /* im2 */
- movss -24(%edi), %xmm3 /* im3 */
- subps %xmm1, %xmm0 /* -im1 | re1 | -im0 | re0 */
- movlhps %xmm7, %xmm6 /* 0.0 | re3 | 0.0 | re2 */
- movlhps %xmm3, %xmm2 /* 0.0 | im3 | 0.0 | im2 */
- mulps %xmm4, %xmm0
- shufps $0xb1, %xmm2, %xmm2 /* im3 | 0.0 | im2 | 0.0 */
- movaps 16(%edx), %xmm4 /* w7 | w6 | w5 | w4 */
- addl $32, %esi
- subps %xmm2, %xmm6 /* -im3 | re3 | -im2 | re2 */
- /* addps %xmm5, %xmm0 */
- mulps %xmm4, %xmm6
- addl $-32, %edi
- /* movaps 16(%ebx), %xmm5 d7 | d6 | d5 | d4 */
- movaps %xmm0, (%eax)
- /* addps %xmm5, %xmm6 */
- addl $32, %edx
- addl $32, %eax
- /* addl $32, %ebx */
- movaps %xmm6, -16(%eax)
- decl %ecx
- jnz .second_128_sample
-
- movl 8(%ebp), %eax
- leal 512(%eax), %esi /* buf[64].re */
- leal 508(%eax), %edi /* buf[63].im */
- movl $16, %ecx /* loop count */
- movl 20(%ebp), %eax /* delay */
-.first_128_delays:
-
- movss (%esi), %xmm0
- movss 8(%esi), %xmm2
- movss (%edi), %xmm1
- movss -8(%edi), %xmm3
-
- movlhps %xmm2, %xmm0 /* 0.0 | re1 | 0.0 | re0 */
- movlhps %xmm3, %xmm1 /* 0.0 | im1 | 0.0 | im0 */
-
- movaps -16(%edx), %xmm4 /* w3 | w2 | w1 | w0 */
- shufps $0xb1, %xmm1, %xmm1 /* im1 | 0.0 | im0 | 0.0 */
- movss 16(%esi), %xmm6 /* re2 */
- movss 24(%esi), %xmm7 /* re3 */
- movss -16(%edi), %xmm2 /* im2 */
- movss -24(%edi), %xmm3 /* im3 */
- subps %xmm1, %xmm0 /* -im1 | re1 | -im0 | re0 */
- addl $-32, %edx
- movlhps %xmm7, %xmm6 /* 0.0 | re3 | 0.0 | re2 */
- movlhps %xmm3, %xmm2 /* 0.0 | im3 | 0.0 | im2 */
- mulps %xmm4, %xmm0
- movaps (%edx), %xmm5 /* w7 | w6 | w5 | w4 */
- shufps $0xb1, %xmm2, %xmm2 /* im3 | 0.0 | im2 | 0.0 */
- movaps %xmm0, (%eax)
- addl $32, %esi
- subps %xmm2, %xmm6 /* -im3 | re3 | -im2 | re2 */
- addl $-32, %edi
- mulps %xmm5, %xmm6
- addl $32, %eax
- movaps %xmm6, -16(%eax)
- decl %ecx
- jnz .first_128_delays
-
- movl 8(%ebp), %ebx
- leal 4(%ebx), %esi /* buf[0].im */
- leal 1016(%ebx), %edi /* buf[127].re */
- movl $16, %ecx /* loop count */
-.second_128_delays:
-
- movss (%esi), %xmm0
- movss 8(%esi), %xmm2
- movss (%edi), %xmm1
- movss -8(%edi), %xmm3
-
- movlhps %xmm2, %xmm0 /* 0.0 | im1 | 0.0 | im0 */
- movlhps %xmm3, %xmm1 /* 0.0 | re1 | 0.0 | re0 */
-
- movaps -16(%edx), %xmm4 /* w3 | w2 | w1 | w0 */
- shufps $0xb1, %xmm1, %xmm1 /* re1 | 0.0 | re0 | 0.0 */
- movss 16(%esi), %xmm6 /* im2 */
- movss 24(%esi), %xmm7 /* im3 */
- movss -16(%edi), %xmm2 /* re2 */
- movss -24(%edi), %xmm3 /* re3 */
- subps %xmm0, %xmm1 /* re1 | -im1 | re0 | -im0 */
- addl $-32, %edx
- movlhps %xmm7, %xmm6 /* 0.0 | im3 | 0.0 | im2 */
- movlhps %xmm3, %xmm2 /* 0.0 | re3 | 0.0 | re2 */
- mulps %xmm4, %xmm1
- movaps (%edx), %xmm5 /* w7 | w6 | w5 | w4 */
- shufps $0xb1, %xmm2, %xmm2 /* re3 | 0.0 | re2 | 0.0 */
- movaps %xmm1, (%eax)
- addl $32, %esi
- subps %xmm6, %xmm2 /* re | -im3 | re | -im2 */
- addl $-32, %edi
- mulps %xmm5, %xmm2
- addl $32, %eax
- movaps %xmm2, -16(%eax)
- decl %ecx
- jnz .second_128_delays
-
- popl %edi
- popl %esi
- popl %edx
- popl %ecx
- popl %ebx
- popl %eax
-
- leave
- ret
- .p2align 4,0
-#endif