diff options
author | Miguel Freitas <miguelfreitas@users.sourceforge.net> | 2002-12-16 18:59:50 +0000 |
---|---|---|
committer | Miguel Freitas <miguelfreitas@users.sourceforge.net> | 2002-12-16 18:59:50 +0000 |
commit | 94ef6649dd5f4e95337af00dcede2337ea7cfb49 (patch) | |
tree | 07d679ce92b4e4517815abc42394480eebf44904 /src/libfaad/cfft.c | |
parent | 48f4c5809db11a6df4a5e7285d5e60a2ed924e2a (diff) | |
download | xine-lib-94ef6649dd5f4e95337af00dcede2337ea7cfb49.tar.gz xine-lib-94ef6649dd5f4e95337af00dcede2337ea7cfb49.tar.bz2 |
updated libfaad
CVS patchset: 3560
CVS date: 2002/12/16 18:59:50
Diffstat (limited to 'src/libfaad/cfft.c')
-rw-r--r-- | src/libfaad/cfft.c | 728 |
1 files changed, 303 insertions, 425 deletions
diff --git a/src/libfaad/cfft.c b/src/libfaad/cfft.c index 70aa084a1..567e8ce34 100644 --- a/src/libfaad/cfft.c +++ b/src/libfaad/cfft.c @@ -16,465 +16,324 @@ ** along with this program; if not, write to the Free Software ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. ** -** $Id: cfft.c,v 1.2 2002/10/22 04:48:41 storri Exp $ +** $Id: cfft.c,v 1.3 2002/12/16 18:59:54 miguelfreitas Exp $ **/ /* * Algorithmically based on Fortran-77 FFTPACK * by Paul N. Swarztrauber(Version 4, 1985). + * + * Does even sized fft only */ /* isign is +1 for backward and -1 for forward transforms */ - #include "common.h" +#include "structs.h" + #include <stdlib.h> +#ifdef _WIN32_WCE +#define assert(x) +#else +#include <assert.h> +#endif #include "cfft.h" +#include "cfft_tab.h" /*---------------------------------------------------------------------- - passf2, passf3, passf4, passf5, passf. Complex FFT passes fwd and bwd. + passf2, passf3, passf4, passf5. Complex FFT passes fwd and bwd. ----------------------------------------------------------------------*/ -static void passf2(uint16_t ido, uint16_t l1, real_t *cc, real_t *ch, - real_t *wa1, int8_t isign) +static void passf2(uint16_t ido, uint16_t l1, complex_t *cc, complex_t *ch, + complex_t *wa, int8_t isign) { uint16_t i, k, ah, ac; - real_t ti2, tr2; - if (ido <= 2) + if (ido == 1) { for (k = 0; k < l1; k++) { - ah = k*ido; - ac = 2*k*ido; - ch[ah] = cc[ac] + cc[ac+ido]; - ch[ah+ido*l1] = cc[ac] - cc[ac+ido]; - ch[ah+1] = cc[ac+1] + cc[ac+ido+1]; - ch[ah+ido*l1+1] = cc[ac+1] - cc[ac+ido+1]; + ah = 2*k; + ac = 4*k; + + RE(ch[ah]) = RE(cc[ac]) + RE(cc[ac+1]); + IM(ch[ah]) = IM(cc[ac]) + IM(cc[ac+1]); + RE(ch[ah+l1]) = RE(cc[ac]) - RE(cc[ac+1]); + IM(ch[ah+l1]) = IM(cc[ac]) - IM(cc[ac+1]); } } else { for (k = 0; k < l1; k++) { - for(i = 0; i < ido-1; i += 2) + ah = k*ido; + ac = 2*k*ido; + + for (i = 0; i < ido; i++) { - ah = i + k*ido; - ac = i + 2*k*ido; - ch[ah] = cc[ac] + cc[ac+ido]; - tr2 = cc[ac] - cc[ac+ido]; - ch[ah+1] = cc[ac+1] + cc[ac+1+ido]; - ti2 = cc[ac+1] - cc[ac+1+ido]; - ch[ah+l1*ido+1] = wa1[i]*ti2 + isign*wa1[i+1]*tr2; - ch[ah+l1*ido] = wa1[i]*tr2 - isign*wa1[i+1]*ti2; - } - } - } -} + complex_t t2; + RE(ch[ah]) = RE(cc[ac]) + RE(cc[ac+ido]); + IM(ch[ah]) = IM(cc[ac]) + IM(cc[ac+ido]); -static void passf3(uint16_t ido, uint16_t l1, real_t *cc, real_t *ch, - real_t *wa1, real_t *wa2, int8_t isign) -{ - static real_t taur = -0.5; - static real_t taui = 0.866025403784439; - uint16_t i, k, ac, ah; - real_t ci2, ci3, di2, di3, cr2, cr3, dr2, dr3, ti2, tr2; + RE(t2) = RE(cc[ac]) - RE(cc[ac+ido]); + IM(t2) = IM(cc[ac]) - IM(cc[ac+ido]); - if (ido == 2) - { - for (k = 1; k <= l1; k++) - { - ac = (3*k-2) * ido; - tr2 = cc[ac] + cc[ac+ido]; - cr2 = cc[ac-ido] + taur*tr2; - ah = (k-1) * ido; - ch[ah] = cc[ac-ido] + tr2; - - ti2 = cc[ac+1] + cc[ac+ido+1]; - ci2 = cc[ac-ido+1] + taur*ti2; - ch[ah+1] = cc[ac-ido+1] + ti2; - - cr3 = isign * taui * (cc[ac] - cc[ac+ido]); - ci3 = isign * taui * (cc[ac+1] - cc[ac+ido+1]); - ch[ah+l1*ido] = cr2 - ci3; - ch[ah+2*l1*ido] = cr2 + ci3; - ch[ah+l1*ido+1] = ci2 + cr3; - ch[ah+2*l1*ido+1] = ci2 - cr3; - } - } else { - for (k = 1; k <= l1; k++) - { - for (i = 0; i < ido-1; i += 2) - { - ac = i + (3*k-2) * ido; - tr2 = cc[ac] + cc[ac+ido]; - cr2 = cc[ac-ido] + taur*tr2; - ah = i + (k-1) * ido; - ch[ah] = cc[ac-ido] + tr2; - ti2 = cc[ac+1] + cc[ac+ido+1]; - ci2 = cc[ac-ido+1] + taur*ti2; - ch[ah+1] = cc[ac-ido+1] + ti2; - cr3 = isign * taui * (cc[ac] - cc[ac+ido]); - ci3 = isign * taui * (cc[ac+1] - cc[ac+ido+1]); - dr2 = cr2 - ci3; - dr3 = cr2 + ci3; - di2 = ci2 + cr3; - di3 = ci2 - cr3; - ch[ah+l1*ido+1] = wa1[i]*di2 + isign*wa1[i+1]*dr2; - ch[ah+l1*ido] = wa1[i]*dr2 - isign*wa1[i+1]*di2; - ch[ah+2*l1*ido+1] = wa2[i]*di3 + isign*wa2[i+1]*dr3; - ch[ah+2*l1*ido] = wa2[i]*dr3 - isign*wa2[i+1]*di3; + RE(ch[ah+l1*ido]) = MUL_R_C(RE(t2),RE(wa[i])) - MUL_R_C(IM(t2),IM(wa[i]))*isign; + IM(ch[ah+l1*ido]) = MUL_R_C(IM(t2),RE(wa[i])) + MUL_R_C(RE(t2),IM(wa[i]))*isign; + ah++; + ac++; } } } } -static void passf4(uint16_t ido, uint16_t l1, real_t *cc, real_t *ch, - real_t *wa1, real_t *wa2, real_t *wa3, int8_t isign) +static void passf3(uint16_t ido, uint16_t l1, complex_t *cc, complex_t *ch, + complex_t *wa1, complex_t *wa2, int8_t isign) { + static real_t taur = COEF_CONST(-0.5); + static real_t taui = COEF_CONST(0.866025403784439); uint16_t i, k, ac, ah; - real_t ci2, ci3, ci4, cr2, cr3, cr4, ti1, ti2, ti3, ti4, tr1, tr2, - tr3, tr4; + complex_t c2, c3, d2, d3, t2; - if (ido == 2) + if (ido == 1) { for (k = 0; k < l1; k++) { - ac = 4*k*ido + 1; - ti1 = cc[ac] - cc[ac+2*ido]; - ti2 = cc[ac] + cc[ac+2*ido]; - tr4 = cc[ac+3*ido] - cc[ac+ido]; - ti3 = cc[ac+ido] + cc[ac+3*ido]; - tr1 = cc[ac-1] - cc[ac+2*ido-1]; - tr2 = cc[ac-1] + cc[ac+2*ido-1]; - ti4 = cc[ac+ido-1] - cc[ac+3*ido-1]; - tr3 = cc[ac+ido-1] + cc[ac+3*ido-1]; - ah = k*ido; - ch[ah] = tr2 + tr3; - ch[ah+2*l1*ido] = tr2 - tr3; - ch[ah+1] = ti2 + ti3; - ch[ah+2*l1*ido+1] = ti2 - ti3; - ch[ah+l1*ido] = tr1 + isign*tr4; - ch[ah+3*l1*ido] = tr1 - isign*tr4; - ch[ah+l1*ido+1] = ti1 + isign*ti4; - ch[ah+3*l1*ido+1] = ti1 - isign*ti4; + ac = 3*k+1; + ah = k; + + RE(t2) = RE(cc[ac]) + RE(cc[ac+1]); + IM(t2) = IM(cc[ac]) + IM(cc[ac+1]); + RE(c2) = RE(cc[ac-1]) + MUL_R_C(RE(t2),taur); + IM(c2) = IM(cc[ac-1]) + MUL_R_C(IM(t2),taur); + + RE(ch[ah]) = RE(cc[ac-1]) + RE(t2); + IM(ch[ah]) = IM(cc[ac-1]) + IM(t2); + + RE(c3) = MUL_R_C((RE(cc[ac]) - RE(cc[ac+1])), taui)*isign; + IM(c3) = MUL_R_C((IM(cc[ac]) - IM(cc[ac+1])), taui)*isign; + + RE(ch[ah+l1]) = RE(c2) - IM(c3); + IM(ch[ah+l1]) = IM(c2) + RE(c3); + RE(ch[ah+2*l1]) = RE(c2) + IM(c3); + IM(ch[ah+2*l1]) = IM(c2) - RE(c3); } } else { for (k = 0; k < l1; k++) { - for (i = 0; i < ido-1; i += 2) + for (i = 0; i < ido; i++) { - ac = i + 1 + 4*k*ido; - ti1 = cc[ac] - cc[ac+2*ido]; - ti2 = cc[ac] + cc[ac+2*ido]; - ti3 = cc[ac+ido] + cc[ac+3*ido]; - tr4 = cc[ac+3*ido] - cc[ac+ido]; - tr1 = cc[ac-1] - cc[ac+2*ido-1]; - tr2 = cc[ac-1] + cc[ac+2*ido-1]; - ti4 = cc[ac+ido-1] - cc[ac+3*ido-1]; - tr3 = cc[ac+ido-1] + cc[ac+3*ido-1]; - ah = i + k*ido; - ch[ah] = tr2 + tr3; - cr3 = tr2 - tr3; - ch[ah+1] = ti2 + ti3; - ci3 = ti2 - ti3; - cr2 = tr1 + isign*tr4; - cr4 = tr1 - isign*tr4; - ci2 = ti1 + isign*ti4; - ci4 = ti1 - isign*ti4; - ch[ah+l1*ido] = wa1[i]*cr2 - isign*wa1[i+1]*ci2; - ch[ah+l1*ido+1] = wa1[i]*ci2 + isign*wa1[i+1]*cr2; - ch[ah+2*l1*ido] = wa2[i]*cr3 - isign*wa2[i+1]*ci3; - ch[ah+2*l1*ido+1] = wa2[i]*ci3 + isign*wa2[i+1]*cr3; - ch[ah+3*l1*ido] = wa3[i]*cr4 - isign*wa3[i+1]*ci4; - ch[ah+3*l1*ido+1] = wa3[i]*ci4 + isign*wa3[i+1]*cr4; - } - } - } -} + ac = i + (3*k+1)*ido; + ah = i + k * ido; + RE(t2) = RE(cc[ac]) + RE(cc[ac+ido]); + RE(c2) = RE(cc[ac-ido]) + MUL_R_C(RE(t2),taur); + IM(t2) = IM(cc[ac]) + IM(cc[ac+ido]); + IM(c2) = IM(cc[ac-ido]) + MUL_R_C(IM(t2),taur); -static void passf5(uint16_t ido, uint16_t l1, real_t *cc, real_t *ch, - real_t *wa1, real_t *wa2, real_t *wa3, real_t *wa4, - int8_t isign) -{ - static real_t tr11 = 0.309016994374947; - static real_t ti11 = 0.951056516295154; - static real_t tr12 = -0.809016994374947; - static real_t ti12 = 0.587785252292473; - uint16_t i, k, ac, ah; - real_t ci2, ci3, ci4, ci5, di3, di4, di5, di2, cr2, cr3, cr5, cr4, - ti2, ti3, ti4, ti5, dr3, dr4, dr5, dr2, tr2, tr3, tr4, tr5; + RE(ch[ah]) = RE(cc[ac-ido]) + RE(t2); + IM(ch[ah]) = IM(cc[ac-ido]) + IM(t2); - if (ido == 2) - { - for (k = 1; k <= l1; ++k) - { - ac = (5*k-4) * ido + 1; - ti5 = cc[ac] - cc[ac+3*ido]; - ti2 = cc[ac] + cc[ac+3*ido]; - ti4 = cc[ac+ido] - cc[ac+2*ido]; - ti3 = cc[ac+ido] + cc[ac+2*ido]; - tr5 = cc[ac-1] - cc[ac+3*ido-1]; - tr2 = cc[ac-1] + cc[ac+3*ido-1]; - tr4 = cc[ac+ido-1] - cc[ac+2*ido-1]; - tr3 = cc[ac+ido-1] + cc[ac+2*ido-1]; - ah = (k-1) * ido; - ch[ah] = cc[ac-ido-1] + tr2 + tr3; - ch[ah+1] = cc[ac-ido] + ti2 + ti3; - cr2 = cc[ac-ido-1] + tr11*tr2 + tr12*tr3; - ci2 = cc[ac-ido] + tr11*ti2 + tr12*ti3; - cr3 = cc[ac-ido-1] + tr12*tr2 + tr11*tr3; - ci3 = cc[ac-ido] + tr12*ti2 + tr11*ti3; - cr5 = isign * (ti11*tr5 + ti12*tr4); - ci5 = isign * (ti11*ti5 + ti12*ti4); - cr4 = isign * (ti12*tr5 - ti11*tr4); - ci4 = isign * (ti12*ti5 - ti11*ti4); - ch[ah+l1*ido] = cr2 - ci5; - ch[ah+4*l1*ido] = cr2 + ci5; - ch[ah+l1*ido+1] = ci2 + cr5; - ch[ah+2*l1*ido+1]=ci3 + cr4; - ch[ah+2*l1*ido] = cr3 - ci4; - ch[ah+3*l1*ido] = cr3 + ci4; - ch[ah+3*l1*ido+1] = ci3 - cr4; - ch[ah+4*l1*ido+1] = ci2 - cr5; - } - } else { - for (k = 1; k <= l1; k++) - { - for (i = 0; i < ido-1; i += 2) - { - ac = i + 1 + (k*5-4) * ido; - ti5 = cc[ac] - cc[ac+3*ido]; - ti2 = cc[ac] + cc[ac+3*ido]; - ti4 = cc[ac+ido] - cc[ac+2*ido]; - ti3 = cc[ac+ido] + cc[ac+2*ido]; - tr5 = cc[ac-1] - cc[ac+3*ido-1]; - tr2 = cc[ac-1] + cc[ac+3*ido-1]; - tr4 = cc[ac+ido-1] - cc[ac+2*ido-1]; - tr3 = cc[ac+ido-1] + cc[ac+2*ido-1]; - ah = i + (k-1) * ido; - ch[ah] = cc[ac-ido-1] + tr2 + tr3; - ch[ah+1] = cc[ac-ido] + ti2 + ti3; - cr2 = cc[ac-ido-1] + tr11*tr2 + tr12*tr3; - ci2 = cc[ac-ido] + tr11*ti2 + tr12*ti3; - cr3 = cc[ac-ido-1] + tr12*tr2 + tr11*tr3; - ci3 = cc[ac-ido] + tr12*ti2 + tr11*ti3; - cr5 = isign * (ti11*tr5 + ti12*tr4); - ci5 = isign * (ti11*ti5 + ti12*ti4); - cr4 = isign * (ti12*tr5 - ti11*tr4); - ci4 = isign * (ti12*ti5 - ti11*ti4); - dr3 = cr3 - ci4; - dr4 = cr3 + ci4; - di3 = ci3 + cr4; - di4 = ci3 - cr4; - dr5 = cr2 + ci5; - dr2 = cr2 - ci5; - di5 = ci2 - cr5; - di2 = ci2 + cr5; - ch[ah+l1*ido] = wa1[i]*dr2 - isign*wa1[i+1]*di2; - ch[ah+l1*ido+1] = wa1[i]*di2 + isign*wa1[i+1]*dr2; - ch[ah+2*l1*ido] = wa2[i]*dr3 - isign*wa2[i+1]*di3; - ch[ah+2*l1*ido+1] = wa2[i]*di3 + isign*wa2[i+1]*dr3; - ch[ah+3*l1*ido] = wa3[i]*dr4 - isign*wa3[i+1]*di4; - ch[ah+3*l1*ido+1] = wa3[i]*di4 + isign*wa3[i+1]*dr4; - ch[ah+4*l1*ido] = wa4[i]*dr5 - isign*wa4[i+1]*di5; - ch[ah+4*l1*ido+1] = wa4[i]*di5 + isign*wa4[i+1]*dr5; + RE(c3) = MUL_R_C((RE(cc[ac]) - RE(cc[ac+ido])), taui)*isign; + IM(c3) = MUL_R_C((IM(cc[ac]) - IM(cc[ac+ido])), taui)*isign; + + RE(d2) = RE(c2) - IM(c3); + IM(d3) = IM(c2) - RE(c3); + RE(d3) = RE(c2) + IM(c3); + IM(d2) = IM(c2) + RE(c3); + + RE(ch[ah+l1*ido]) = MUL_R_C(RE(d2),RE(wa1[i])) - MUL_R_C(IM(d2),IM(wa1[i]))*isign; + IM(ch[ah+l1*ido]) = MUL_R_C(IM(d2),RE(wa1[i])) + MUL_R_C(RE(d2),IM(wa1[i]))*isign; + RE(ch[ah+l1*2*ido]) = MUL_R_C(RE(d3),RE(wa2[i])) - MUL_R_C(IM(d3),IM(wa2[i]))*isign; + IM(ch[ah+l1*2*ido]) = MUL_R_C(IM(d3),RE(wa2[i])) + MUL_R_C(RE(d3),IM(wa2[i]))*isign; } } } } -static void passf(uint16_t *nac, uint16_t ido, uint16_t ip, uint16_t l1, - uint16_t idl1, real_t *cc, real_t *ch, real_t *wa, - int8_t isign) +static void passf4(uint16_t ido, uint16_t l1, complex_t *cc, complex_t *ch, + complex_t *wa1, complex_t *wa2, complex_t *wa3, int8_t isign) { - uint16_t idij, idlj, idot, ipph, i, j, k, l, jc, lc, ik, nt, idj, idl; - uint16_t inc, idp; - real_t wai, war; - - idot = ido / 2; - nt = ip*idl1; - ipph = (ip+1) / 2; - idp = ip*ido; + uint16_t i, k, ac, ah; + complex_t c2, c3, c4, t1, t2, t3, t4; - if (ido >= l1) + if (ido == 1) { - for (j = 1; j < ipph; j++) - { - jc = ip - j; - - for (k = 0; k < l1; k++) - { - for (i = 0; i < ido; i++) - { - ch[i+(k+j*l1)*ido] = cc[i+(j+k*ip)*ido] + cc[i+(jc+k*ip)*ido]; - ch[i+(k+jc*l1)*ido] = cc[i+(j+k*ip)*ido] - cc[i+(jc+k*ip)*ido]; - } - } - } - for (k = 0; k < l1; k++) { - for (i = 0; i < ido; i++) - ch[i+k*ido] = cc[i+k*ip*ido]; + ac = 4*k; + ah = k; + + RE(t2) = RE(cc[ac]) + RE(cc[ac+2]); + IM(t2) = IM(cc[ac]) + IM(cc[ac+2]); + RE(t3) = RE(cc[ac+1]) + RE(cc[ac+3]); + IM(t3) = IM(cc[ac+1]) + IM(cc[ac+3]); + RE(t1) = RE(cc[ac]) - RE(cc[ac+2]); + IM(t1) = IM(cc[ac]) - IM(cc[ac+2]); + RE(t4) = IM(cc[ac+3]) - IM(cc[ac+1]); + IM(t4) = RE(cc[ac+1]) - RE(cc[ac+3]); + + RE(ch[ah]) = RE(t2) + RE(t3); + IM(ch[ah]) = IM(t2) + IM(t3); + RE(ch[ah+l1]) = RE(t1) + RE(t4)*isign; + IM(ch[ah+l1]) = IM(t1) + IM(t4)*isign; + RE(ch[ah+2*l1]) = RE(t2) - RE(t3); + IM(ch[ah+2*l1]) = IM(t2) - IM(t3); + RE(ch[ah+3*l1]) = RE(t1) - RE(t4)*isign; + IM(ch[ah+3*l1]) = IM(t1) - IM(t4)*isign; } } else { - for (j = 1; j < ipph; j++) + for (k = 0; k < l1; k++) { - jc = ip - j; - for (i = 0; i < ido; i++) { - for (k = 0; k < l1; k++) - { - ch[i+(k+j*l1)*ido] = cc[i+(j+k*ip)*ido] + cc[i+(jc+k*ip)*ido]; - ch[i+(k+jc*l1)*ido] = cc[i+(j+k*ip)*ido] - cc[i+(jc+k*ip)*ido]; - } - } - } - - for (i = 0; i < ido; i++) - { - for (k = 0; k < l1; k++) - ch[i+k*ido] = cc[i+k*ip*ido]; - } - } - - idl = 2 - ido; - inc = 0; - - for (l = 1; l < ipph; l++) - { - lc = ip - l; - idl += ido; - - for (ik = 0; ik < idl1; ik++) - { - cc[ik+l*idl1] = ch[ik] + wa[idl-2]*ch[ik+idl1]; - cc[ik+lc*idl1] = isign*wa[idl-1]*ch[ik+(ip-1)*idl1]; - } - - idlj = idl; - inc += ido; - - for (j = 2; j < ipph; j++) - { - jc = ip - j; - idlj += inc; - - if (idlj > idp) - idlj -= idp; - - war = wa[idlj-2]; - wai = wa[idlj-1]; + ac = i + 4*k*ido; + ah = i + k*ido; - for (ik = 0; ik < idl1; ik++) - { - cc[ik+l*idl1] += war*ch[ik+j*idl1]; - cc[ik+lc*idl1] += isign*wai*ch[ik+jc*idl1]; + RE(t2) = RE(cc[ac]) + RE(cc[ac+2*ido]); + IM(t2) = IM(cc[ac]) + IM(cc[ac+2*ido]); + RE(t3) = RE(cc[ac+ido]) + RE(cc[ac+3*ido]); + IM(t3) = IM(cc[ac+ido]) + IM(cc[ac+3*ido]); + RE(t1) = RE(cc[ac]) - RE(cc[ac+2*ido]); + IM(t1) = IM(cc[ac]) - IM(cc[ac+2*ido]); + RE(t4) = IM(cc[ac+3*ido]) - IM(cc[ac+ido]); + IM(t4) = RE(cc[ac+ido]) - RE(cc[ac+3*ido]); + + RE(ch[ah]) = RE(t2) + RE(t3); + IM(ch[ah]) = IM(t2) + IM(t3); + + RE(c2) = RE(t1) + RE(t4)*isign; + IM(c2) = IM(t1) + IM(t4)*isign; + RE(c3) = RE(t2) - RE(t3); + IM(c3) = IM(t2) - IM(t3); + RE(c4) = RE(t1) - RE(t4)*isign; + IM(c4) = IM(t1) - IM(t4)*isign; + + RE(ch[ah+l1*ido]) = MUL_R_C(RE(c2),RE(wa1[i])) - MUL_R_C(IM(c2),IM(wa1[i]))*isign; + IM(ch[ah+l1*ido]) = MUL_R_C(IM(c2),RE(wa1[i])) + MUL_R_C(RE(c2),IM(wa1[i]))*isign; + RE(ch[ah+2*l1*ido]) = MUL_R_C(RE(c3),RE(wa2[i])) - MUL_R_C(IM(c3),IM(wa2[i]))*isign; + IM(ch[ah+2*l1*ido]) = MUL_R_C(IM(c3),RE(wa2[i])) + MUL_R_C(RE(c3),IM(wa2[i]))*isign; + RE(ch[ah+3*l1*ido]) = MUL_R_C(RE(c4),RE(wa3[i])) - MUL_R_C(IM(c4),IM(wa3[i]))*isign; + IM(ch[ah+3*l1*ido]) = MUL_R_C(IM(c4),RE(wa3[i])) + MUL_R_C(RE(c4),IM(wa3[i]))*isign; } } } +} - for (j = 1; j < ipph; j++) - { - for (ik = 0; ik < idl1; ik++) - ch[ik] += ch[ik+j*idl1]; - } - - for (j = 1; j < ipph; j++) - { - jc = ip - j; - - for (ik = 1; ik < idl1; ik += 2) - { - ch[ik-1+j*idl1] = cc[ik-1+j*idl1] - cc[ik+jc*idl1]; - ch[ik-1+jc*idl1] = cc[ik-1+j*idl1] + cc[ik+jc*idl1]; - ch[ik+j*idl1] = cc[ik+j*idl1] + cc[ik-1+jc*idl1]; - ch[ik+jc*idl1] = cc[ik+j*idl1] - cc[ik-1+jc*idl1]; - } - } - - *nac = 1; - - if (ido == 2) - return; - - *nac = 0; - for (ik = 0; ik < idl1; ik++) - cc[ik] = ch[ik]; +static void passf5(uint16_t ido, uint16_t l1, complex_t *cc, complex_t *ch, + complex_t *wa1, complex_t *wa2, complex_t *wa3, complex_t *wa4, + int8_t isign) +{ + static real_t tr11 = COEF_CONST(0.309016994374947); + static real_t ti11 = COEF_CONST(0.951056516295154); + static real_t tr12 = COEF_CONST(-0.809016994374947); + static real_t ti12 = COEF_CONST(0.587785252292473); + uint16_t i, k, ac, ah; + complex_t c2, c3, c4, c5, d3, d4, d5, d2, t2, t3, t4, t5; - for (j = 1; j < ip; j++) + if (ido == 1) { for (k = 0; k < l1; k++) { - cc[(k+j*l1)*ido+0] = ch[(k+j*l1)*ido+0]; - cc[(k+j*l1)*ido+1] = ch[(k+j*l1)*ido+1]; - } - } - - if (idot <= l1) - { - idij = 0; - - for (j = 1; j < ip; j++) - { - idij += 2; - - for (i = 3; i < ido; i += 2) - { - idij += 2; - - for (k = 0; k < l1; k++) - { - cc[i-1+(k+j*l1)*ido] = wa[idij-2] * ch[i-1+(k+j*l1)*ido] - - isign * wa[idij-1] * ch[i+(k+j*l1)*ido]; - cc[i+(k+j*l1)*ido] = wa[idij-2] * ch[i+(k+j*l1)*ido] + - isign * wa[idij-1] * ch[i-1+(k+j*l1)*ido]; - } - } + ac = 5*k + 1; + ah = k; + + RE(t2) = RE(cc[ac]) + RE(cc[ac+3]); + IM(t2) = IM(cc[ac]) + IM(cc[ac+3]); + RE(t3) = RE(cc[ac+1]) + RE(cc[ac+2]); + IM(t3) = IM(cc[ac+1]) + IM(cc[ac+2]); + RE(t4) = RE(cc[ac+1]) - RE(cc[ac+2]); + IM(t4) = IM(cc[ac+1]) - IM(cc[ac+2]); + RE(t5) = RE(cc[ac]) - RE(cc[ac+3]); + IM(t5) = IM(cc[ac]) - IM(cc[ac+3]); + + RE(ch[ah]) = RE(cc[ac-1]) + RE(t2) + RE(t3); + IM(ch[ah]) = IM(cc[ac-1]) + IM(t2) + IM(t3); + + RE(c2) = RE(cc[ac-1]) + MUL_R_C(RE(t2),tr11) + MUL_R_C(RE(t3),tr12); + IM(c2) = IM(cc[ac-1]) + MUL_R_C(IM(t2),tr11) + MUL_R_C(IM(t3),tr12); + RE(c3) = RE(cc[ac-1]) + MUL_R_C(RE(t2),tr12) + MUL_R_C(RE(t3),tr11); + IM(c3) = IM(cc[ac-1]) + MUL_R_C(IM(t2),tr12) + MUL_R_C(IM(t3),tr11); + RE(c4) = (MUL_R_C(RE(t5),ti12)*isign - MUL_R_C(RE(t4),ti11)); + IM(c4) = (MUL_R_C(IM(t5),ti12)*isign - MUL_R_C(IM(t4),ti11)); + RE(c5) = (MUL_R_C(RE(t5),ti11)*isign + MUL_R_C(RE(t4),ti12)); + IM(c5) = (MUL_R_C(IM(t5),ti11)*isign + MUL_R_C(IM(t4),ti12)); + + RE(ch[ah+l1]) = RE(c2) - IM(c5); + IM(ch[ah+l1]) = IM(c2) + RE(c5); + RE(ch[ah+2*l1]) = RE(c3) - IM(c4); + IM(ch[ah+2*l1]) = IM(c3) + RE(c4); + RE(ch[ah+3*l1]) = RE(c3) + IM(c4); + IM(ch[ah+3*l1]) = IM(c3) - RE(c4); + RE(ch[ah+4*l1]) = RE(c2) + IM(c5); + IM(ch[ah+4*l1]) = IM(c2) - RE(c5); } } else { - idj = 2 - ido; - - for (j = 1; j < ip; j++) + for (k = 0; k < l1; k++) { - idj += ido; - - for (k = 0; k < l1; k++) + for (i = 0; i < ido; i++) { - idij = idj; - - for (i = 3; i < ido; i += 2) - { - idij += 2; - cc[i-1+(k+j*l1)*ido] = wa[idij-2] * ch[i-1+(k+j*l1)*ido] - - isign * wa[idij-1] * ch[i+(k+j*l1)*ido]; - cc[i+(k+j*l1)*ido] = wa[idij-2] * ch[i+(k+j*l1)*ido] + - isign * wa[idij-1] * ch[i-1+(k+j*l1)*ido]; - } + ac = i + (k*5 + 1) * ido; + ah = i + k * ido; + + RE(t2) = RE(cc[ac]) + RE(cc[ac+3*ido]); + IM(t2) = IM(cc[ac]) + IM(cc[ac+3*ido]); + RE(t3) = RE(cc[ac+ido]) + RE(cc[ac+2*ido]); + IM(t3) = IM(cc[ac+ido]) + IM(cc[ac+2*ido]); + RE(t4) = RE(cc[ac+ido]) - RE(cc[ac+2*ido]); + IM(t4) = IM(cc[ac+ido]) - IM(cc[ac+2*ido]); + RE(t5) = RE(cc[ac]) - RE(cc[ac+3*ido]); + IM(t5) = IM(cc[ac]) - IM(cc[ac+3*ido]); + + RE(ch[ah]) = RE(cc[ac-ido]) + RE(t2) + RE(t3); + IM(ch[ah]) = IM(cc[ac-ido]) + IM(t2) + IM(t3); + + RE(c2) = RE(cc[ac-ido]) + MUL_R_C(RE(t2),tr11) + MUL_R_C(RE(t3),tr12); + IM(c2) = IM(cc[ac-ido]) + MUL_R_C(IM(t2),tr11) + MUL_R_C(IM(t3),tr12); + RE(c3) = RE(cc[ac-ido]) + MUL_R_C(RE(t2),tr12) + MUL_R_C(RE(t3),tr11); + IM(c3) = IM(cc[ac-ido]) + MUL_R_C(IM(t2),tr12) + MUL_R_C(IM(t3),tr11); + RE(c4) = (MUL_R_C(RE(t5),ti12)*isign - MUL_R_C(RE(t4),ti11)); + IM(c4) = (MUL_R_C(IM(t5),ti12)*isign - MUL_R_C(IM(t4),ti11)); + RE(c5) = (MUL_R_C(RE(t5),ti11)*isign + MUL_R_C(RE(t4),ti12)); + IM(c5) = (MUL_R_C(IM(t5),ti11)*isign + MUL_R_C(IM(t4),ti12)); + + IM(d2) = IM(c2) + RE(c5); + IM(d3) = IM(c3) + RE(c4); + RE(d4) = RE(c3) + IM(c4); + RE(d5) = RE(c2) + IM(c5); + RE(d2) = RE(c2) - IM(c5); + IM(d5) = IM(c2) - RE(c5); + RE(d3) = RE(c3) - IM(c4); + IM(d4) = IM(c3) - RE(c4); + + RE(ch[ah+l1*ido]) = MUL_R_C(RE(d2),RE(wa1[i])) - MUL_R_C(IM(d2),IM(wa1[i]))*isign; + IM(ch[ah+l1*ido]) = MUL_R_C(IM(d2),RE(wa1[i])) + MUL_R_C(RE(d2),IM(wa1[i]))*isign; + RE(ch[ah+2*l1*ido]) = MUL_R_C(RE(d3),RE(wa2[i])) - MUL_R_C(IM(d3),IM(wa2[i]))*isign; + IM(ch[ah+2*l1*ido]) = MUL_R_C(IM(d3),RE(wa2[i])) + MUL_R_C(RE(d3),IM(wa2[i]))*isign; + RE(ch[ah+3*l1*ido]) = MUL_R_C(RE(d4),RE(wa3[i])) - MUL_R_C(IM(d4),IM(wa3[i]))*isign; + IM(ch[ah+3*l1*ido]) = MUL_R_C(IM(d4),RE(wa3[i])) + MUL_R_C(RE(d4),IM(wa3[i]))*isign; + RE(ch[ah+4*l1*ido]) = MUL_R_C(RE(d5),RE(wa4[i])) - MUL_R_C(IM(d5),IM(wa4[i]))*isign; + IM(ch[ah+4*l1*ido]) = MUL_R_C(IM(d5),RE(wa4[i])) + MUL_R_C(RE(d5),IM(wa4[i]))*isign; } } } } - /*---------------------------------------------------------------------- cfftf1, cfftf, cfftb, cffti1, cffti. Complex FFTs. ----------------------------------------------------------------------*/ -INLINE void cfftf1(uint16_t n, real_t *c, real_t *ch, real_t *wa, - uint16_t *ifac, int8_t isign) +INLINE void cfftf1(uint16_t n, complex_t *c, complex_t *ch, + uint16_t *ifac, complex_t *wa, int8_t isign) { - uint16_t idot, i; + uint16_t i; uint16_t k1, l1, l2; - uint16_t na, nf, ip, iw, ix2, ix3, ix4, nac, ido, idl1; + uint16_t na, nf, ip, iw, ix2, ix3, ix4, ido, idl1; nf = ifac[1]; na = 0; @@ -486,92 +345,89 @@ INLINE void cfftf1(uint16_t n, real_t *c, real_t *ch, real_t *wa, ip = ifac[k1]; l2 = ip*l1; ido = n / l2; - idot = ido+ido; - idl1 = idot*l1; + idl1 = ido*l1; switch (ip) { case 2: if (na == 0) - passf2(idot, l1, c, ch, &wa[iw], isign); + passf2(ido, l1, c, ch, &wa[iw], isign); else - passf2(idot, l1, ch, c, &wa[iw], isign); + passf2(ido, l1, ch, c, &wa[iw], isign); na = 1 - na; break; case 3: - ix2 = iw + idot; + ix2 = iw + ido; if (na == 0) - passf3(idot, l1, c, ch, &wa[iw], &wa[ix2], isign); + passf3(ido, l1, c, ch, &wa[iw], &wa[ix2], isign); else - passf3(idot, l1, ch, c, &wa[iw], &wa[ix2], isign); + passf3(ido, l1, ch, c, &wa[iw], &wa[ix2], isign); na = 1 - na; break; case 4: - ix2 = iw + idot; - ix3 = ix2 + idot; + ix2 = iw + ido; + ix3 = ix2 + ido; if (na == 0) - passf4(idot, l1, c, ch, &wa[iw], &wa[ix2], &wa[ix3], isign); + passf4(ido, l1, c, ch, &wa[iw], &wa[ix2], &wa[ix3], isign); else - passf4(idot, l1, ch, c, &wa[iw], &wa[ix2], &wa[ix3], isign); + passf4(ido, l1, ch, c, &wa[iw], &wa[ix2], &wa[ix3], isign); na = 1 - na; break; case 5: - ix2 = iw + idot; - ix3 = ix2 + idot; - ix4 = ix3 + idot; + ix2 = iw + ido; + ix3 = ix2 + ido; + ix4 = ix3 + ido; if (na == 0) - passf5(idot, l1, c, ch, &wa[iw], &wa[ix2], &wa[ix3], &wa[ix4], isign); + passf5(ido, l1, c, ch, &wa[iw], &wa[ix2], &wa[ix3], &wa[ix4], isign); else - passf5(idot, l1, ch, c, &wa[iw], &wa[ix2], &wa[ix3], &wa[ix4], isign); + passf5(ido, l1, ch, c, &wa[iw], &wa[ix2], &wa[ix3], &wa[ix4], isign); na = 1 - na; break; - default: - if (na == 0) - passf(&nac, idot, ip, l1, idl1, c, ch, &wa[iw], isign); - else - passf(&nac, idot, ip, l1, idl1, ch, c, &wa[iw], isign); - - if (nac != 0) - na = 1 - na; - break; } l1 = l2; - iw += (ip-1) * idot; + iw += (ip-1) * ido; } if (na == 0) return; - for (i = 0; i < 2*n; i++) - c[i] = ch[i]; + for (i = 0; i < n; i++) + { + RE(c[i]) = RE(ch[i]); + IM(c[i]) = IM(ch[i]); + } } -void cfftf(cfft_info cfft, real_t *c) +void cfftf(cfft_info *cfft, complex_t *c) { - cfftf1(cfft.n, c, cfft.work, cfft.tab, cfft.ifac, -1); + cfftf1(cfft->n, c, cfft->work, cfft->ifac, cfft->tab, -1); } -void cfftb(cfft_info cfft, real_t *c) +void cfftb(cfft_info *cfft, complex_t *c) { - cfftf1(cfft.n, c, cfft.work, cfft.tab, cfft.ifac, +1); + cfftf1(cfft->n, c, cfft->work, cfft->ifac, cfft->tab, +1); } -static void cffti1(uint16_t n, real_t *wa, uint16_t *ifac) +static void cffti1(uint16_t n, complex_t *wa, uint16_t *ifac) { static uint16_t ntryh[4] = {3, 4, 2, 5}; +#ifndef FIXED_POINT real_t arg, argh, argld, fi; - uint16_t idot, ntry, i, j; - uint16_t i1, k1, l1, l2, ib; - uint16_t ld, ii, nf, ip, nl, nq, nr; uint16_t ido, ipm; + uint16_t i1, k1, l1, l2; + uint16_t ld, ii, ip; +#endif + uint16_t ntry, i, j; + uint16_t ib; + uint16_t nf, nl, nq, nr; nl = n; nf = 0; @@ -610,8 +466,10 @@ startloop: ifac[0] = n; ifac[1] = nf; - argh = 2*M_PI / (real_t)n; - i = 1; + +#ifndef FIXED_POINT + argh = 2.0*M_PI / (real_t)n; + i = 0; l1 = 1; for (k1 = 1; k1 <= nf; k1++) @@ -620,53 +478,73 @@ startloop: ld = 0; l2 = l1*ip; ido = n / l2; - idot = ido + ido + 2; ipm = ip - 1; - for (j = 1; j <= ipm; j++) + for (j = 0; j < ipm; j++) { i1 = i; - wa[i-1] = 1; - wa[i] = 0; + RE(wa[i]) = 1.0; + IM(wa[i]) = 0.0; ld += l1; fi = 0; argld = ld*argh; - for (ii = 4; ii <= idot; ii += 2) + for (ii = 0; ii < ido; ii++) { - i += 2; - fi += 1; - arg = fi*argld; - wa[i-1] = cos(arg); - wa[i] = sin(arg); + i++; + fi++; + arg = fi * argld; + RE(wa[i]) = cos(arg); + IM(wa[i]) = sin(arg); } if (ip > 5) { - wa[i1-1] = wa[i-1]; - wa[i1] = wa[i]; + RE(wa[i1]) = RE(wa[i]); + IM(wa[i1]) = IM(wa[i]); } } l1 = l2; } +#endif } -cfft_info cffti(uint16_t n) +cfft_info *cffti(uint16_t n) { - cfft_info cfft; + cfft_info *cfft = (cfft_info*)malloc(sizeof(cfft_info)); - cfft.n = n; - cfft.work = malloc(2*n*sizeof(real_t)); - cfft.tab = malloc(2*n*sizeof(real_t)); + cfft->n = n; + cfft->work = (complex_t*)malloc(n*sizeof(complex_t)); - cffti1(n, cfft.tab, cfft.ifac); +#ifndef FIXED_POINT + cfft->tab = (complex_t*)malloc(n*sizeof(complex_t)); + + cffti1(n, cfft->tab, cfft->ifac); +#else + cffti1(n, NULL, cfft->ifac); + + switch (n) + { + case 60: cfft->tab = cfft_tab_60; break; + case 64: cfft->tab = cfft_tab_64; break; + case 480: cfft->tab = cfft_tab_480; break; + case 512: cfft->tab = cfft_tab_512; break; +#ifdef LD_DEC + case 240: cfft->tab = cfft_tab_240; break; + case 256: cfft->tab = cfft_tab_256; break; +#endif + } +#endif return cfft; } -void cfftu(cfft_info cfft) +void cfftu(cfft_info *cfft) { - if (cfft.work) free(cfft.work); - if (cfft.tab) free(cfft.tab); -} + if (cfft->work) free(cfft->work); +#ifndef FIXED_POINT + if (cfft->tab) free(cfft->tab); +#endif + if (cfft) free(cfft); +}
\ No newline at end of file |