From efea0f64d08052b0189d962101e1a3634d4adfc8 Mon Sep 17 00:00:00 2001 From: Klaus Schmidinger Date: Sun, 12 Aug 2001 18:00:00 +0200 Subject: Version 0.91 - Fixed displaying colored button texts that are too long. - Suppressing replay progress display when replaying a DVD. - Updated channels.conf.cable (thanks to Uwe Scheffler). - Updated French OSD texts (thanks to Jean-Claude Repetto). - Improved AC3 decoding when replaying DVDs (thanks to Matjaz Thaler). - Fixed handling DVB card indexes when using only one card in a multi-card system. - Changed the 'Eject DVD' button text to a simple 'Eject' (the German text was too long...). - Made the font file generation more stable (thanks to Artur Skawina). - Changed the default value for the "DiSEqC" setup parameter to "off". - The new command line option '-E' can be used to define where the EPG data shall be written to. This is especially useful if VDR runs in a system that turns off the video disk when it is not used, and therefore needs to write the EPG file to a ramdisk (or turn off writing it alltogether). See 'vdr --help' for details. - Making sure the disk is up and running before starting recording (this is important for systems that turn off the video disk when it is not used). - Added the "Jump" function in replay mode (thanks to Stefan Huelswitt). See the description of the "Red" key in MANUAL under "Replay Control" for details. - Fixed displaying editing marks when toggling a mark in "pause" mode. - If there is no free DVB device to record, the log message will now be given only once. - Made I/O more robust by handling EINTR (thanks to Werner Fink). --- ac3dec/srfft_kni.S | 289 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 289 insertions(+) create mode 100644 ac3dec/srfft_kni.S (limited to 'ac3dec/srfft_kni.S') diff --git a/ac3dec/srfft_kni.S b/ac3dec/srfft_kni.S new file mode 100644 index 0000000..a42a41b --- /dev/null +++ b/ac3dec/srfft_kni.S @@ -0,0 +1,289 @@ +/* + * srfft_kni.S + * + * Copyright (C) Yuqing Deng - October 2000 + * + * + * srfft_kni.S is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * srfft_kni.S is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + +#ifdef __i386__ + +.section .rodata + .align 16 +hsqrt2: .float 0f0.707106781188 + .float 0f0.707106781188 + .float 0f-0.707106781188 + .float 0f-0.707106781188 +C_1: .float 0f-1.0 + .float 0f1.0 + .float 0f-1.0 + .float 0f1.0 + +.text + .align 4 +.global fft_4_kni + .type fft_4_kni, @function +fft_4_kni: + pushl %ebp + movl %esp, %ebp + movl 8(%ebp), %eax /* complex_t * */ + + movaps (%eax), %xmm0 /* x[1] | x[0] */ + movaps 16(%eax), %xmm2 /* x[3] | x[2] */ + movaps %xmm0, %xmm1 /* x[1] | x[0] */ + addps %xmm2, %xmm0 /* x[1] + x[3] | x[0] + x[2] */ + subps %xmm2, %xmm1 /* x[1] - x[3] | x[0] - x[2] */ + xorps %xmm6, %xmm6 + movhlps %xmm1, %xmm4 /* x[1] - x[3] */ + movhlps %xmm0, %xmm3 /* x[1] + x[3] */ + subss %xmm4, %xmm6 /* -(x[1] - x[3]).re */ + movlhps %xmm1, %xmm0 /* x[0] - x[2] | x[0] + x[2] */ + movss %xmm6, %xmm4 /* (x[1] - x[3]).im | (x[3]-x[1]).re */ + movaps %xmm0, %xmm2 /* x[0] - x[2] | x[0] + x[2] */ + shufps $0x14, %xmm4, %xmm3 /* -i*(x[2] - x[3] | x[2] + x[3] */ + addps %xmm3, %xmm0 + subps %xmm3, %xmm2 + movaps %xmm0, (%eax) + movaps %xmm2, 16(%eax) + + leave + ret + + + .align 4 +.global fft_8_kni + .type fft_8_kni, @function +fft_8_kni: + pushl %ebp + movl %esp, %ebp + movl 8(%ebp), %eax /* complext_t */ + + pushl %ebx + movlps (%eax), %xmm0 /* x[0] */ + movlps 32(%eax), %xmm1 /* x[4] */ + movhps 16(%eax), %xmm0 /* x[2] | x[0] */ + movhps 48(%eax), %xmm1 /* x[6] | x[4] */ + movaps %xmm0, %xmm2 /* x[2] | x[0] */ + xorps %xmm3, %xmm3 + addps %xmm1, %xmm0 /* x[2] + x[6] | x[0] + x[4] */ + subps %xmm1, %xmm2 /* x[2] - x[6] | x[0] - x[4] */ + movhlps %xmm0, %xmm5 /* x[2] + x[6] */ + movhlps %xmm2, %xmm4 + movlhps %xmm2, %xmm0 /* x[0] - x[4] | x[0] + x[4] */ + subss %xmm4, %xmm3 /* -(x[2]-x[6]).re */ + movaps %xmm0, %xmm7 /* x[0] - x[4] | x[0] + x[4] */ + movss %xmm3, %xmm4 /* (x[2]-x[6]).im | -(x[2]-x[6]).re */ + movlps 8(%eax), %xmm1 /* x[1] */ + shufps $0x14, %xmm4, %xmm5 /* -i*(x[2] - x[6]) | x[2] + x[6] */ + + addps %xmm5, %xmm0 /* yt */ + subps %xmm5, %xmm7 /* yb */ + + movhps 24(%eax), %xmm1 /* x[3] | x[1] */ + movl $hsqrt2, %ebx + movlps 40(%eax), %xmm2 /* x[5] */ + movhps 56(%eax), %xmm2 /* /x[7] | x[5] */ + movaps %xmm1, %xmm3 /* x[3] | x[1] */ + addps %xmm2, %xmm1 /* x[3] + x[7] | x[1] + x[5] */ + subps %xmm2, %xmm3 /* x[3] - x[7] | x[1] - x[5] */ + movaps (%ebx), %xmm4 /* -1/sqrt2 | -1/sqrt2 | 1/sqrt2 | 1/sqrt2 */ + movaps %xmm3, %xmm6 /* x[3] - x[7] | x[1] - x[5] */ + mulps %xmm4, %xmm3 + shufps $0xc8, %xmm4, %xmm4 /* -1/sqrt2 | 1/sqrt2 | -1/sqrt2 | 1/sqrt2 */ + shufps $0xb1, %xmm6, %xmm6 + mulps %xmm4, %xmm6 + addps %xmm3, %xmm6 /* (-1-i)/sqrt2 * (x[3]-x[7]) | (1-i)/sqrt2 * (x[1] - x[5] */ + movhlps %xmm1, %xmm5 /* x[3] + x[7] */ + movlhps %xmm6, %xmm1 /* (1+i)/sqrt2 * (x[1]-x[5]) | x[1]+x[5] */ + shufps $0xe4, %xmm6, %xmm5 /* (-1-i)/sqrt2 * (x[3]-x[7]) | x[3]+x[7] */ + movaps %xmm1, %xmm3 /* (1-i)/sqrt2 * (x[1]-x[5]) | x[1]+x[5] */ + movl $C_1, %ebx + addps %xmm5, %xmm1 /* u */ + subps %xmm5, %xmm3 /* v */ + movaps %xmm0, %xmm2 /* yb */ + movaps %xmm7, %xmm4 /* yt */ + movaps (%ebx), %xmm5 + mulps %xmm5, %xmm3 + addps %xmm1, %xmm0 /* yt + u */ + subps %xmm1, %xmm2 /* yt - u */ + shufps $0xb1, %xmm3, %xmm3 /* -i * v */ + movaps %xmm0, (%eax) + movaps %xmm2, 32(%eax) + addps %xmm3, %xmm4 /* yb - i*v */ + subps %xmm3, %xmm7 /* yb + i*v */ + movaps %xmm4, 16(%eax) + movaps %xmm7, 48(%eax) + + popl %ebx + leave + ret + + .align 4 +.global fft_asmb_kni + .type fft_asmb, @function +fft_asmb_kni: + pushl %ebp + movl %esp, %ebp + + subl $4, %esp + + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + pushl %esi + pushl %edi + + movl 8(%ebp), %ecx /* k */ + movl 12(%ebp), %eax /* x */ + movl %ecx, -4(%ebp) /* k */ + movl 16(%ebp), %ebx /* wT */ + movl 20(%ebp), %edx /* d */ + movl 24(%ebp), %esi /* d3 */ + shll $4, %ecx /* 16k */ + addl $8, %edx + leal (%eax, %ecx, 2), %edi + addl $8, %esi + + /* TRANSZERO and TRANS */ + movaps (%eax), %xmm0 /* x[1] | x[0] */ + movaps (%ebx), %xmm1 /* wT[1] | wT[0] */ + movaps (%ebx, %ecx), %xmm2 /* wB[1] | wB[0] */ + movlps (%edx), %xmm3 /* d */ + movlps (%esi), %xmm4 /* d3 */ + movhlps %xmm1, %xmm5 /* wT[1] */ + movhlps %xmm2, %xmm6 /* wB[1] */ + shufps $0x50, %xmm3, %xmm3 /* d[1].im | d[1].im | d[1].re | d[1].re */ + shufps $0x50, %xmm4, %xmm4 /* d3[1].im | d3[1].im | d3[i].re | d3[i].re */ + movlhps %xmm5, %xmm5 /* wT[1] | wT[1] */ + movlhps %xmm6, %xmm6 /* wB[1] | wB[1] */ + mulps %xmm3, %xmm5 + mulps %xmm4, %xmm6 + movhlps %xmm5, %xmm7 /* wT[1].im * d[1].im | wT[1].re * d[1].im */ + movlhps %xmm6, %xmm5 /* wB[1].im * d3[1].re | wB[1].re * d3[1].re | wT[1].im * d[1].re | wT[1].re * d[1].re */ + shufps $0xb1, %xmm6, %xmm7 /* wB[1].re * d3[1].im | wB[i].im * d3[1].im | wT[1].re * d[1].im | wT[1].im * d[1].im */ + movl $C_1, %edi + movaps (%edi), %xmm4 + mulps %xmm4, %xmm7 + addps %xmm7, %xmm5 /* wB[1] * d3[1] | wT[1] * d[1] */ + movlhps %xmm5, %xmm1 /* d[1] * wT[1] | wT[0] */ + shufps $0xe4, %xmm5, %xmm2 /* d3[1] * wB[1] | wB[0] */ + movaps %xmm1, %xmm3 /* d[1] * wT[1] | wT[0] */ + leal (%eax, %ecx, 2), %edi + addps %xmm2, %xmm1 /* u */ + subps %xmm2, %xmm3 /* v */ + mulps %xmm4, %xmm3 + movaps (%eax, %ecx), %xmm5 /* xk[1] | xk[0] */ + shufps $0xb1, %xmm3, %xmm3 /* -i * v */ + movaps %xmm0, %xmm2 /* x[1] | x[0] */ + movaps %xmm5, %xmm6 /* xk[1] | xk[0] */ + addps %xmm1, %xmm0 + subps %xmm1, %xmm2 + addps %xmm3, %xmm5 + subps %xmm3, %xmm6 + movaps %xmm0, (%eax) + movaps %xmm2, (%edi) + movaps %xmm5, (%eax, %ecx) + movaps %xmm6, (%edi, %ecx) + addl $16, %eax + addl $16, %ebx + addl $8, %edx + addl $8, %esi + decl -4(%ebp) + +.loop: + movaps (%ebx), %xmm0 /* wT[1] | wT[0] */ + movaps (%edx), %xmm1 /* d[1] | d[0] */ + + movaps (%ebx, %ecx), %xmm4 /* wB[1] | wB[0] */ + movaps (%esi), %xmm5 /* d3[1] | d3[0] */ + + movhlps %xmm0, %xmm2 /* wT[1] */ + movhlps %xmm1, %xmm3 /* d[1] */ + + movhlps %xmm4, %xmm6 /* wB[1] */ + movhlps %xmm5, %xmm7 /* d3[1] */ + + shufps $0x50, %xmm1, %xmm1 /* d[0].im | d[0].im | d[0].re | d[0].re */ + shufps $0x50, %xmm3, %xmm3 /* d[1].im | d[1].im | d[1].re | d[1].re */ + + movlhps %xmm0, %xmm0 /* wT[0] | wT[0] */ + shufps $0x50, %xmm5, %xmm5 /* d3[0].im | d3[0].im | d3[0].re | d3[0].re */ + movlhps %xmm2, %xmm2 /* wT[1] | wT[1] */ + shufps $0x50, %xmm7, %xmm7 /* d3[1].im | d3[1].im | d3[1].re | d3[1].re */ + + mulps %xmm1, %xmm0 /* d[0].im * wT[0].im | d[0].im * wT[0].re | d[0].re * wT[0].im | d[0].re * wT[0].re */ + mulps %xmm3, %xmm2 /* d[1].im * wT[1].im | d[1].im * wT[1].re | d[1].re * wT[1].im | d[1].re * wT[1].re */ + movlhps %xmm4, %xmm4 /* wB[0] | wB[0] */ + movlhps %xmm6, %xmm6 /* wB[1] | wB[1] */ + + movhlps %xmm0, %xmm1 /* d[0].im * wT[0].im | d[0].im * wT[0].re */ + movlhps %xmm2, %xmm0 /* d[1].re * wT[1].im | d[1].re * wT[1].re | d[0].re * wT[0].im | d[0].re * wT[0].re */ + mulps %xmm5, %xmm4 /* wB[0].im * d3[0].im | wB[0].re * d3[0].im | wB[0].im * d3[0].re | wB[0].re * d3[0].re */ + mulps %xmm7, %xmm6 /* wB[1].im * d3[1].im | wB[1].re * d3[1].im | wB[1].im * d3[1].re | wB[1].re * d3[1].re */ + shufps $0xb1, %xmm2, %xmm1 /* d[1].im * wT[1].re | d[1].im * wT[1].im | d[0].im * wT[0].re | d[0].im * wT[0].im */ + movl $C_1, %edi + movaps (%edi), %xmm3 /* 1.0 | -1.0 | 1.0 | -1.0 */ + + movhlps %xmm4, %xmm5 /* wB[0].im * d3[0].im | wB[0].re * d3[0].im */ + mulps %xmm3, %xmm1 /* d[1].im * wT[1].re | -d[1].im * wT[1].im | d[0].im * wT[0].re | -d[0].im * wT[0].im */ + movlhps %xmm6, %xmm4 /* wB[1].im * d3[1].re | wB[1].re * d3[1].re | wB[0].im * d3[0].re | wB[0].im * d3[0].re */ + addps %xmm1, %xmm0 /* wT[1] * d[1] | wT[0] * d[0] */ + + shufps $0xb1, %xmm6, %xmm5 /* wB[1].re * d3[1].im | wB[1].im * d3[1].im | wB[0].re * d3[0].im | wB[0].im * d3[0].im */ + mulps %xmm3, %xmm5 /* wB[1].re * d3[1].im | -wB[1].im * d3[1].im | wB[0].re * d3[0].im | -wB[0].im * d3[0].im */ + addps %xmm5, %xmm4 /* wB[1] * d3[1] | wB[0] * d3[0] */ + + movaps %xmm0, %xmm1 /* wT[1] * d[1] | wT[0] * d[0] */ + addps %xmm4, %xmm0 /* u */ + subps %xmm4, %xmm1 /* v */ + movaps (%eax), %xmm6 /* x[1] | x[0] */ + leal (%eax, %ecx, 2), %edi + mulps %xmm3, %xmm1 + addl $16, %ebx + addl $16, %esi + shufps $0xb1, %xmm1, %xmm1 /* -i * v */ + movaps (%eax, %ecx), %xmm7 /* xk[1] | xk[0] */ + movaps %xmm6, %xmm2 + movaps %xmm7, %xmm4 + addps %xmm0, %xmm6 + subps %xmm0, %xmm2 + movaps %xmm6, (%eax) + movaps %xmm2, (%edi) + addps %xmm1, %xmm7 + subps %xmm1, %xmm4 + addl $16, %edx + movaps %xmm7, (%eax, %ecx) + movaps %xmm4, (%edi, %ecx) + + addl $16, %eax + decl -4(%ebp) + jnz .loop + +.end: + popl %edi + popl %esi + popl %edx + popl %ecx + popl %ebx + popl %eax + + addl $4, %esp + + leave + ret +#endif -- cgit v1.2.3