summaryrefslogtreecommitdiff
path: root/src/libfaad/filtbank.c
diff options
context:
space:
mode:
authorMike Melanson <mike@multimedia.cx>2005-10-29 23:57:06 +0000
committerMike Melanson <mike@multimedia.cx>2005-10-29 23:57:06 +0000
commit03ac29c63fd3d5019c67b3662669b1c443896f0b (patch)
tree00db769b2943fce16b9967591652d2c0724be168 /src/libfaad/filtbank.c
parentc7976c4d0d8d02fa18bc9fd82bafe99e333e2a53 (diff)
downloadxine-lib-03ac29c63fd3d5019c67b3662669b1c443896f0b.tar.gz
xine-lib-03ac29c63fd3d5019c67b3662669b1c443896f0b.tar.bz2
update libfaad2 to CVS snapshot 2004-09-15
CVS patchset: 7777 CVS date: 2005/10/29 23:57:06
Diffstat (limited to 'src/libfaad/filtbank.c')
-rw-r--r--src/libfaad/filtbank.c397
1 files changed, 39 insertions, 358 deletions
diff --git a/src/libfaad/filtbank.c b/src/libfaad/filtbank.c
index 3fc2cf456..2919b5be3 100644
--- a/src/libfaad/filtbank.c
+++ b/src/libfaad/filtbank.c
@@ -22,7 +22,7 @@
** Commercial non-GPL licensing of this software is possible.
** For more info contact Ahead Software through Mpeg4AAClicense@nero.com.
**
-** $Id: filtbank.c,v 1.9 2004/12/03 01:15:30 tmattern Exp $
+** $Id: filtbank.c,v 1.10 2005/10/29 23:57:06 tmmm Exp $
**/
#include "common.h"
@@ -87,15 +87,6 @@ fb_info *filter_bank_init(uint16_t frame_len)
}
#endif
-#ifdef USE_SSE
- if (cpu_has_sse())
- {
- fb->if_func = ifilter_bank_sse;
- } else {
- fb->if_func = ifilter_bank;
- }
-#endif
-
return fb;
}
@@ -140,30 +131,6 @@ static INLINE void imdct_long(fb_info *fb, real_t *in_data, real_t *out_data, ui
#endif
}
-#ifdef USE_SSE
-static INLINE void imdct_long_sse(fb_info *fb, real_t *in_data, real_t *out_data, uint16_t len)
-{
-#ifdef LD_DEC
- mdct_info *mdct = NULL;
-
- switch (len)
- {
- case 2048:
- case 1920:
- mdct = fb->mdct2048;
- break;
- case 1024:
- case 960:
- mdct = fb->mdct1024;
- break;
- }
-
- faad_imdct_sse(mdct, in_data, out_data);
-#else
- faad_imdct_sse(fb->mdct2048, in_data, out_data);
-#endif
-}
-#endif
#ifdef LTP_DEC
static INLINE void mdct(fb_info *fb, real_t *in_data, real_t *out_data, uint16_t len)
@@ -215,6 +182,7 @@ void ifilter_bank(fb_info *fb, uint8_t window_sequence, uint8_t window_shape,
int64_t count = faad_get_ts();
#endif
+ /* select windows of current frame and previous frame (Sine or KBD) */
#ifdef LD_DEC
if (object_type == LD)
{
@@ -230,11 +198,24 @@ void ifilter_bank(fb_info *fb, uint8_t window_sequence, uint8_t window_shape,
}
#endif
+#if 0
+ for (i = 0; i < 1024; i++)
+ {
+ printf("%d\n", freq_in[i]);
+ }
+#endif
+
+#if 0
+ printf("%d %d\n", window_sequence, window_shape);
+#endif
switch (window_sequence)
{
case ONLY_LONG_SEQUENCE:
+ /* perform iMDCT */
imdct_long(fb, freq_in, transf_buf, 2*nlong);
+
+ /* add second half output of previous frame to windowed output of current frame */
for (i = 0; i < nlong; i+=4)
{
time_out[i] = overlap[i] + MUL_F(transf_buf[i],window_long_prev[i]);
@@ -242,6 +223,8 @@ void ifilter_bank(fb_info *fb, uint8_t window_sequence, uint8_t window_shape,
time_out[i+2] = overlap[i+2] + MUL_F(transf_buf[i+2],window_long_prev[i+2]);
time_out[i+3] = overlap[i+3] + MUL_F(transf_buf[i+3],window_long_prev[i+3]);
}
+
+ /* window the second half and save as overlap for next frame */
for (i = 0; i < nlong; i+=4)
{
overlap[i] = MUL_F(transf_buf[nlong+i],window_long[nlong-1-i]);
@@ -252,7 +235,10 @@ void ifilter_bank(fb_info *fb, uint8_t window_sequence, uint8_t window_shape,
break;
case LONG_START_SEQUENCE:
+ /* perform iMDCT */
imdct_long(fb, freq_in, transf_buf, 2*nlong);
+
+ /* add second half output of previous frame to windowed output of current frame */
for (i = 0; i < nlong; i+=4)
{
time_out[i] = overlap[i] + MUL_F(transf_buf[i],window_long_prev[i]);
@@ -260,6 +246,9 @@ void ifilter_bank(fb_info *fb, uint8_t window_sequence, uint8_t window_shape,
time_out[i+2] = overlap[i+2] + MUL_F(transf_buf[i+2],window_long_prev[i+2]);
time_out[i+3] = overlap[i+3] + MUL_F(transf_buf[i+3],window_long_prev[i+3]);
}
+
+ /* window the second half and save as overlap for next frame */
+ /* construct second half window using padding with 1's and 0's */
for (i = 0; i < nflat_ls; i++)
overlap[i] = transf_buf[nlong+i];
for (i = 0; i < nshort; i++)
@@ -269,6 +258,7 @@ void ifilter_bank(fb_info *fb, uint8_t window_sequence, uint8_t window_shape,
break;
case EIGHT_SHORT_SEQUENCE:
+ /* perform iMDCT for each short block */
faad_imdct(fb->mdct256, freq_in+0*nshort, transf_buf+2*nshort*0);
faad_imdct(fb->mdct256, freq_in+1*nshort, transf_buf+2*nshort*1);
faad_imdct(fb->mdct256, freq_in+2*nshort, transf_buf+2*nshort*2);
@@ -277,6 +267,8 @@ void ifilter_bank(fb_info *fb, uint8_t window_sequence, uint8_t window_shape,
faad_imdct(fb->mdct256, freq_in+5*nshort, transf_buf+2*nshort*5);
faad_imdct(fb->mdct256, freq_in+6*nshort, transf_buf+2*nshort*6);
faad_imdct(fb->mdct256, freq_in+7*nshort, transf_buf+2*nshort*7);
+
+ /* add second half output of previous frame to windowed output of current frame */
for (i = 0; i < nflat_ls; i++)
time_out[i] = overlap[i];
for(i = 0; i < nshort; i++)
@@ -288,6 +280,8 @@ void ifilter_bank(fb_info *fb, uint8_t window_sequence, uint8_t window_shape,
if (i < trans)
time_out[nflat_ls+4*nshort+i] = overlap[nflat_ls+nshort*4+i] + MUL_F(transf_buf[nshort*7+i],window_short[nshort-1-i]) + MUL_F(transf_buf[nshort*8+i],window_short[i]);
}
+
+ /* window the second half and save as overlap for next frame */
for(i = 0; i < nshort; i++)
{
if (i >= trans)
@@ -302,352 +296,39 @@ void ifilter_bank(fb_info *fb, uint8_t window_sequence, uint8_t window_shape,
break;
case LONG_STOP_SEQUENCE:
+ /* perform iMDCT */
imdct_long(fb, freq_in, transf_buf, 2*nlong);
+
+ /* add second half output of previous frame to windowed output of current frame */
+ /* construct first half window using padding with 1's and 0's */
for (i = 0; i < nflat_ls; i++)
time_out[i] = overlap[i];
for (i = 0; i < nshort; i++)
time_out[nflat_ls+i] = overlap[nflat_ls+i] + MUL_F(transf_buf[nflat_ls+i],window_short_prev[i]);
for (i = 0; i < nflat_ls; i++)
time_out[nflat_ls+nshort+i] = overlap[nflat_ls+nshort+i] + transf_buf[nflat_ls+nshort+i];
+
+ /* window the second half and save as overlap for next frame */
for (i = 0; i < nlong; i++)
overlap[i] = MUL_F(transf_buf[nlong+i],window_long[nlong-1-i]);
break;
}
-#ifdef PROFILE
- count = faad_get_ts() - count;
- fb->cycles += count;
-#endif
-}
-
-#ifdef USE_SSE
-void ifilter_bank_sse(fb_info *fb, uint8_t window_sequence, uint8_t window_shape,
- uint8_t window_shape_prev, real_t *freq_in,
- real_t *time_out, uint8_t object_type, uint16_t frame_len)
-{
- int16_t i;
- ALIGN real_t transf_buf[2*1024] = {0};
-
- const real_t *window_long = NULL;
- const real_t *window_long_prev = NULL;
- const real_t *window_short = NULL;
- const real_t *window_short_prev = NULL;
-
- uint16_t nlong = frame_len;
- uint16_t nshort = frame_len/8;
- uint16_t trans = nshort/2;
-
- uint16_t nflat_ls = (nlong-nshort)/2;
-
-#ifdef PROFILE
- int64_t count = faad_get_ts();
-#endif
-
-#ifdef LD_DEC
- if (object_type == LD)
+#if 0
+ for (i = 0; i < 1024; i++)
{
- window_long = fb->ld_window[window_shape];
- window_long_prev = fb->ld_window[window_shape_prev];
- } else {
-#endif
- window_long = fb->long_window[window_shape];
- window_long_prev = fb->long_window[window_shape_prev];
- window_short = fb->short_window[window_shape];
- window_short_prev = fb->short_window[window_shape_prev];
-#ifdef LD_DEC
+ printf("%d\n", time_out[i]);
+ //printf("0x%.8X\n", time_out[i]);
}
#endif
- switch (window_sequence)
- {
- case ONLY_LONG_SEQUENCE:
- imdct_long_sse(fb, freq_in, transf_buf, 2*nlong);
- for (i = 0; i < nlong; i+=4)
- {
- __m128 m1, m2, m3, m4, m5, m6, m7, m8;
-
- m1 = _mm_load_ps(&transf_buf[i]);
- m2 = _mm_load_ps(&window_long_prev[i]);
- m6 = _mm_load_ps(&window_long[nlong-4-i]);
- m3 = _mm_load_ps(&time_out[nlong+i]);
- m5 = _mm_load_ps(&transf_buf[nlong+i]);
-
- m4 = _mm_mul_ps(m1, m2);
- m7 = _mm_shuffle_ps(m6, m6, _MM_SHUFFLE(0, 1, 2, 3));
-
- m4 = _mm_add_ps(m4, m3);
- m8 = _mm_mul_ps(m5, m7);
-
- _mm_store_ps(&time_out[i], m4);
- _mm_store_ps(&time_out[nlong+i], m8);
- }
- break;
-
- case LONG_START_SEQUENCE:
- imdct_long_sse(fb, freq_in, transf_buf, 2*nlong);
- for (i = 0; i < nlong; i+=4)
- {
- __m128 m1 = _mm_load_ps(&transf_buf[i]);
- __m128 m2 = _mm_load_ps(&window_long_prev[i]);
- __m128 m3 = _mm_load_ps(&time_out[nlong+i]);
-
- __m128 m4 = _mm_mul_ps(m1, m2);
- m4 = _mm_add_ps(m4, m3);
-
- _mm_store_ps(&time_out[i], m4);
- }
- for (i = 0; i < nflat_ls; i+=4)
- {
- __m128 m1 = _mm_load_ps(&transf_buf[nlong+i]);
- _mm_store_ps(&time_out[nlong+i], m1);
- }
- for (i = 0; i < nshort; i+=4)
- {
- __m128 m1 = _mm_load_ps(&transf_buf[nlong+nflat_ls+i]);
- __m128 m2 = _mm_load_ps(&window_short[nshort-4-i]);
- __m128 m3, m4;
-
- m3 = _mm_shuffle_ps(m2, m2, _MM_SHUFFLE(0, 1, 2, 3));
-
- m4 = _mm_mul_ps(m1, m3);
-
- _mm_store_ps(&time_out[nlong+nflat_ls+i], m4);
- }
- for (i = 0; i < nflat_ls; i+=4)
- {
- __m128 m1 = _mm_setzero_ps();
- _mm_store_ps(&time_out[nlong+nflat_ls+nshort+i], m1);
- }
- break;
-
- case EIGHT_SHORT_SEQUENCE:
- faad_imdct_sse(fb->mdct256, &freq_in[0*nshort], &transf_buf[2*nshort*0]);
- faad_imdct_sse(fb->mdct256, &freq_in[1*nshort], &transf_buf[2*nshort*1]);
- faad_imdct_sse(fb->mdct256, &freq_in[2*nshort], &transf_buf[2*nshort*2]);
- faad_imdct_sse(fb->mdct256, &freq_in[3*nshort], &transf_buf[2*nshort*3]);
- faad_imdct_sse(fb->mdct256, &freq_in[4*nshort], &transf_buf[2*nshort*4]);
- faad_imdct_sse(fb->mdct256, &freq_in[5*nshort], &transf_buf[2*nshort*5]);
- faad_imdct_sse(fb->mdct256, &freq_in[6*nshort], &transf_buf[2*nshort*6]);
- faad_imdct_sse(fb->mdct256, &freq_in[7*nshort], &transf_buf[2*nshort*7]);
- for (i = 0; i < nflat_ls; i+=4)
- {
- __m128 m1 = _mm_load_ps(&time_out[nlong+i]);
- _mm_store_ps(&time_out[i], m1);
- }
- for (i = 0; i < nshort; i+=4)
- {
- __m128 m1 = _mm_load_ps(&transf_buf[nshort*0+i]);
- __m128 m2 = _mm_load_ps(&window_short_prev[i]);
- __m128 m3 = _mm_load_ps(&time_out[nlong+nflat_ls+i]);
-
- __m128 m4 = _mm_mul_ps(m1, m2);
- m4 = _mm_add_ps(m4, m3);
-
- _mm_store_ps(&time_out[nflat_ls+i], m4);
- }
- for (i = 0; i < nshort; i+=4)
- {
- __m128 m1, m2, m3, m4, m5, m6, m7, m8;
- m1 = _mm_load_ps(&transf_buf[nshort*1+i]);
- m2 = _mm_load_ps(&window_short[nshort-4-i]);
- m3 = _mm_load_ps(&time_out[nlong+nflat_ls+nshort*1+i]);
- m6 = _mm_load_ps(&transf_buf[nshort*2+i]);
- m7 = _mm_load_ps(&window_short[i]);
-
- m5 = _mm_shuffle_ps(m2, m2, _MM_SHUFFLE(0, 1, 2, 3));
-
- m4 = _mm_mul_ps(m1, m5);
- m8 = _mm_mul_ps(m6, m7);
- m4 = _mm_add_ps(m4, m3);
- m4 = _mm_add_ps(m4, m8);
-
- _mm_store_ps(&time_out[nflat_ls+1*nshort+i], m4);
- }
- for (i = 0; i < nshort; i+=4)
- {
- __m128 m1, m2, m3, m4, m5, m6, m7, m8;
- m1 = _mm_load_ps(&transf_buf[nshort*3+i]);
- m2 = _mm_load_ps(&window_short[nshort-4-i]);
- m3 = _mm_load_ps(&time_out[nlong+nflat_ls+nshort*2+i]);
- m6 = _mm_load_ps(&transf_buf[nshort*4+i]);
- m7 = _mm_load_ps(&window_short[i]);
-
- m5 = _mm_shuffle_ps(m2, m2, _MM_SHUFFLE(0, 1, 2, 3));
-
- m4 = _mm_mul_ps(m1, m5);
- m8 = _mm_mul_ps(m6, m7);
- m4 = _mm_add_ps(m4, m3);
- m4 = _mm_add_ps(m4, m8);
-
- _mm_store_ps(&time_out[nflat_ls+2*nshort+i], m4);
- }
- for (i = 0; i < nshort; i+=4)
- {
- __m128 m1, m2, m3, m4, m5, m6, m7, m8;
- m1 = _mm_load_ps(&transf_buf[nshort*5+i]);
- m2 = _mm_load_ps(&window_short[nshort-4-i]);
- m3 = _mm_load_ps(&time_out[nlong+nflat_ls+nshort*3+i]);
- m6 = _mm_load_ps(&transf_buf[nshort*6+i]);
- m7 = _mm_load_ps(&window_short[i]);
-
- m5 = _mm_shuffle_ps(m2, m2, _MM_SHUFFLE(0, 1, 2, 3));
-
- m4 = _mm_mul_ps(m1, m5);
- m8 = _mm_mul_ps(m6, m7);
- m4 = _mm_add_ps(m4, m3);
- m4 = _mm_add_ps(m4, m8);
-
- _mm_store_ps(&time_out[nflat_ls+3*nshort+i], m4);
- }
- for(i = 0; i < trans; i+=4)
- {
- __m128 m1, m2, m3, m4, m5, m6, m7, m8;
- m1 = _mm_load_ps(&transf_buf[nshort*7+i]);
- m2 = _mm_load_ps(&window_short[nshort-4-i]);
- m3 = _mm_load_ps(&time_out[nlong+nflat_ls+nshort*4+i]);
- m6 = _mm_load_ps(&transf_buf[nshort*8+i]);
- m7 = _mm_load_ps(&window_short[i]);
-
- m5 = _mm_shuffle_ps(m2, m2, _MM_SHUFFLE(0, 1, 2, 3));
-
- m4 = _mm_mul_ps(m1, m5);
- m8 = _mm_mul_ps(m6, m7);
- m4 = _mm_add_ps(m4, m3);
- m4 = _mm_add_ps(m4, m8);
-
- _mm_store_ps(&time_out[nflat_ls+4*nshort+i], m4);
- }
- for (i = trans; i < nshort; i+=4)
- {
- __m128 m1, m2, m3, m4, m5, m6, m7, m8;
- m1 = _mm_load_ps(&transf_buf[nshort*7+i]);
- m2 = _mm_load_ps(&window_short[nshort-4-i]);
- m6 = _mm_load_ps(&transf_buf[nshort*8+i]);
- m7 = _mm_load_ps(&window_short[i]);
-
- m5 = _mm_shuffle_ps(m2, m2, _MM_SHUFFLE(0, 1, 2, 3));
-
- m4 = _mm_mul_ps(m1, m5);
- m8 = _mm_mul_ps(m6, m7);
- m3 = _mm_add_ps(m4, m8);
-
- _mm_store_ps(&time_out[nflat_ls+4*nshort+i], m3);
- }
- for (i = 0; i < nshort; i+=4)
- {
- __m128 m1, m2, m3, m4, m5, m6, m7, m8;
- m1 = _mm_load_ps(&transf_buf[nshort*9+i]);
- m2 = _mm_load_ps(&window_short[nshort-4-i]);
- m6 = _mm_load_ps(&transf_buf[nshort*10+i]);
- m7 = _mm_load_ps(&window_short[i]);
-
- m5 = _mm_shuffle_ps(m2, m2, _MM_SHUFFLE(0, 1, 2, 3));
-
- m4 = _mm_mul_ps(m1, m5);
- m8 = _mm_mul_ps(m6, m7);
- m3 = _mm_add_ps(m4, m8);
-
- _mm_store_ps(&time_out[nflat_ls+5*nshort+i], m3);
- }
- for (i = 0; i < nshort; i+=4)
- {
- __m128 m1, m2, m3, m4, m5, m6, m7, m8;
- m1 = _mm_load_ps(&transf_buf[nshort*11+i]);
- m2 = _mm_load_ps(&window_short[nshort-4-i]);
- m6 = _mm_load_ps(&transf_buf[nshort*12+i]);
- m7 = _mm_load_ps(&window_short[i]);
-
- m5 = _mm_shuffle_ps(m2, m2, _MM_SHUFFLE(0, 1, 2, 3));
-
- m4 = _mm_mul_ps(m1, m5);
- m8 = _mm_mul_ps(m6, m7);
- m3 = _mm_add_ps(m4, m8);
-
- _mm_store_ps(&time_out[nflat_ls+6*nshort+i], m3);
- }
- for (i = 0; i < nshort; i+=4)
- {
- __m128 m1, m2, m3, m4, m5, m6, m7, m8;
- m1 = _mm_load_ps(&transf_buf[nshort*13+i]);
- m2 = _mm_load_ps(&window_short[nshort-4-i]);
- m6 = _mm_load_ps(&transf_buf[nshort*14+i]);
- m7 = _mm_load_ps(&window_short[i]);
-
- m5 = _mm_shuffle_ps(m2, m2, _MM_SHUFFLE(0, 1, 2, 3));
-
- m4 = _mm_mul_ps(m1, m5);
- m8 = _mm_mul_ps(m6, m7);
- m3 = _mm_add_ps(m4, m8);
-
- _mm_store_ps(&time_out[nflat_ls+7*nshort+i], m3);
- }
- for (i = 0; i < nshort; i+=4)
- {
- __m128 m1, m2, m3, m5;
- m1 = _mm_load_ps(&transf_buf[nshort*15+i]);
- m2 = _mm_load_ps(&window_short[nshort-4-i]);
-
- m5 = _mm_shuffle_ps(m2, m2, _MM_SHUFFLE(0, 1, 2, 3));
-
- m3 = _mm_mul_ps(m1, m5);
-
- _mm_store_ps(&time_out[nflat_ls+8*nshort+i], m3);
- }
- for (i = 0; i < nflat_ls; i+=4)
- {
- __m128 m1 = _mm_setzero_ps();
- _mm_store_ps(&time_out[nlong+nflat_ls+nshort+i], m1);
- }
- break;
-
- case LONG_STOP_SEQUENCE:
- imdct_long_sse(fb, freq_in, transf_buf, 2*nlong);
- for (i = 0; i < nflat_ls; i+=4)
- {
- __m128 m1 = _mm_load_ps(&time_out[nlong+i]);
- _mm_store_ps(&time_out[i], m1);
- }
- for (i = 0; i < nshort; i+=4)
- {
- __m128 m1 = _mm_load_ps(&transf_buf[nflat_ls+i]);
- __m128 m2 = _mm_load_ps(&window_short_prev[i]);
- __m128 m3 = _mm_load_ps(&time_out[nlong+nflat_ls+i]);
-
- __m128 m4 = _mm_mul_ps(m1, m2);
- m4 = _mm_add_ps(m4, m3);
-
- _mm_store_ps(&time_out[nflat_ls+i], m4);
- }
- for (i = 0; i < nflat_ls; i+=4)
- {
- __m128 m1 = _mm_load_ps(&transf_buf[nflat_ls+nshort+i]);
- __m128 m2 = _mm_load_ps(&time_out[nlong+nflat_ls+nshort+i]);
-
- __m128 m3 = _mm_add_ps(m1, m2);
-
- _mm_store_ps(&time_out[nflat_ls+nshort+i], m3);
- }
- for (i = 0; i < nlong; i+=4)
- {
- __m128 m1 = _mm_load_ps(&transf_buf[nlong+i]);
- __m128 m2 = _mm_load_ps(&window_long[nlong-4-i]);
- __m128 m3, m4;
-
- m3 = _mm_shuffle_ps(m2, m2, _MM_SHUFFLE(0, 1, 2, 3));
-
- m4 = _mm_mul_ps(m1, m3);
-
- _mm_store_ps(&time_out[nlong+i], m4);
- }
- break;
- }
#ifdef PROFILE
count = faad_get_ts() - count;
fb->cycles += count;
#endif
}
-#endif
+
#ifdef LTP_DEC
/* only works for LTP -> no overlapping, no short blocks */