diff options
47 files changed, 4450 insertions, 1723 deletions
diff --git a/ac3dec/Makefile b/ac3dec/Makefile index 4599a912..8a4791f2 100644 --- a/ac3dec/Makefile +++ b/ac3dec/Makefile @@ -1,10 +1,16 @@ # # Makefile for 'ac3dec' # -# $Id: Makefile 1.1 2001/08/02 13:18:08 kls Exp $ +# $Id: Makefile 1.2 2001/08/09 11:41:39 kls Exp $ + +#OBJS = coeff.o decode.o exponent.o rematrix.o bit_allocate.o crc.o dither.o\ +# imdct.o sanity_check.o bitstream.o debug.o downmix.o parse.o stats.o\ +# downmix_c.o imdct.o + +OBJS = bit_allocate.o bitstream.o coeff.o cpu_accel.o crc.o debug.o decode.o\ + dither.o downmix.o downmix_c.o exponent.o imdct.o imdct_c.o imdct_kni.o\ + parse.o rematrix.o sanity_check.o srfft.o srfft_kni_c.o stats.o -OBJS = coeff.o decode.o exponent.o rematrix.o bit_allocate.o crc.o dither.o\ - imdct.o sanity_check.o bitstream.o debug.o downmix.o parse.o stats.o DEFINES += -DDOLBY_SURROUND @@ -16,7 +22,7 @@ libac3.a: $(OBJS) # Implicit rules: %.o: %.c - gcc -g -O2 -Wall -m486 -c $(DEFINES) $< + gcc -g -O2 -Wall -m486 -I./ -c $(DEFINES) $< clean: rm -f *~ libac3.a $(OBJS) diff --git a/ac3dec/ac3.h b/ac3dec/ac3.h index 4919fc50..d325f3bb 100644 --- a/ac3dec/ac3.h +++ b/ac3dec/ac3.h @@ -22,39 +22,37 @@ * */ -#ifndef AARONS_TYPES -#define AARONS_TYPES -typedef unsigned long long uint_64; -typedef unsigned int uint_32; -typedef unsigned short uint_16; -typedef unsigned char uint_8; - -typedef signed long long sint_64; -typedef signed int sint_32; -typedef signed short sint_16; -typedef signed char sint_8; +#define AC3_BUFFER_SIZE (6*1024*16) + +#ifndef __AC3_H__ +#define __AC3_H__ + +#ifdef __OMS__ +#include <oms/plugin/output_audio.h> +#else +//#include "audio_out.h" #endif -#define AC3_DOLBY_SURR_ENABLE 0x1 -#define AC3_3DNOW_ENABLE 0x2 -#define AC3_MMX_ENABLE 0x4 -#define AC3_ALTIVEC_ENABLE 0x8 - -typedef struct ac3_config_s -{ - //Bit flags that enable various things - uint_32 flags; - //Callback that points the decoder to new stream data - void (*fill_buffer_callback)(uint_8 **, uint_8 **); - //Number of discrete channels in final output (for downmixing) - uint_16 num_output_ch; - //Which channel of a dual mono stream to select - uint_16 dual_mono_ch_sel; -} ac3_config_t; +#include <inttypes.h> -void ac3_init(ac3_config_t *); -uint_32 ac3_decode_data(uint_8 *data_start,uint_8 *data_end, int ac3reset, int *input_pointer, int *output_pointer, char *ac3_data); +#define AC3_DOLBY_SURR_ENABLE (1<<0) +#define AC3_ALTIVEC_ENABLE (1<<1) +typedef struct ac3_config_s { + // Bit flags that enable various things + uint32_t flags; + // Number of discrete channels in final output (for downmixing) + uint16_t num_output_ch; + // Which channel of a dual mono stream to select + uint16_t dual_mono_ch_sel; +} ac3_config_t; +void ac3dec_init (void); +#ifdef __OMS__ +size_t ac3dec_decode_data (plugin_output_audio_t *output, uint8_t *data_start, uint8_t *data_end); +#else +size_t ac3dec_decode_data (uint8_t *data_start ,uint8_t *data_end, int ac3reset, int *input_pointer, int *output_pointer, char *ac3_data); +#endif +#endif diff --git a/ac3dec/ac3_internal.h b/ac3dec/ac3_internal.h index 235189ed..259396fd 100644 --- a/ac3dec/ac3_internal.h +++ b/ac3dec/ac3_internal.h @@ -25,6 +25,8 @@ #define inline #endif +#include <setjmp.h> + /* Exponent strategy constants */ #define EXP_REUSE (0) #define EXP_D15 (1) @@ -43,7 +45,8 @@ typedef float stream_samples_t[6][256]; /* global config structure */ extern ac3_config_t ac3_config; /* global error flag */ -extern uint_32 error_flag; +extern jmp_buf error_jmp_mark; +#define HANDLE_ERROR() longjmp (error_jmp_mark, -1) /* Everything you wanted to know about band structure */ /* @@ -72,273 +75,279 @@ extern uint_32 error_flag; typedef struct syncinfo_s { - uint_32 magic; + uint32_t magic; /* Sync word == 0x0B77 */ - uint_16 syncword; + uint16_t syncword; /* crc for the first 5/8 of the sync block */ - /* uint_16 crc1; */ + /* uint16_t crc1; */ /* Stream Sampling Rate (kHz) 0 = 48, 1 = 44.1, 2 = 32, 3 = reserved */ - uint_16 fscod; + uint16_t fscod; /* Frame size code */ - uint_16 frmsizecod; + uint16_t frmsizecod; /* Information not in the AC-3 bitstream, but derived */ /* Frame size in 16 bit words */ - uint_16 frame_size; + uint16_t frame_size; /* Bit rate in kilobits */ - uint_16 bit_rate; + uint16_t bit_rate; /* sampling rate in hertz */ - uint_32 sampling_rate; + uint32_t sampling_rate; } syncinfo_t; typedef struct bsi_s { - uint_32 magic; + uint32_t magic; /* Bit stream identification == 0x8 */ - uint_16 bsid; + uint16_t bsid; /* Bit stream mode */ - uint_16 bsmod; + uint16_t bsmod; /* Audio coding mode */ - uint_16 acmod; + uint16_t acmod; /* If we're using the centre channel then */ /* centre mix level */ - uint_16 cmixlev; + uint16_t cmixlev; /* If we're using the surround channel then */ /* surround mix level */ - uint_16 surmixlev; + uint16_t surmixlev; /* If we're in 2/0 mode then */ /* Dolby surround mix level - NOT USED - */ - uint_16 dsurmod; + uint16_t dsurmod; /* Low frequency effects on */ - uint_16 lfeon; + uint16_t lfeon; /* Dialogue Normalization level */ - uint_16 dialnorm; + uint16_t dialnorm; /* Compression exists */ - uint_16 compre; + uint16_t compre; /* Compression level */ - uint_16 compr; + uint16_t compr; /* Language code exists */ - uint_16 langcode; + uint16_t langcode; /* Language code */ - uint_16 langcod; + uint16_t langcod; /* Audio production info exists*/ - uint_16 audprodie; - uint_16 mixlevel; - uint_16 roomtyp; + uint16_t audprodie; + uint16_t mixlevel; + uint16_t roomtyp; /* If we're in dual mono mode (acmod == 0) then extra stuff */ - uint_16 dialnorm2; - uint_16 compr2e; - uint_16 compr2; - uint_16 langcod2e; - uint_16 langcod2; - uint_16 audprodi2e; - uint_16 mixlevel2; - uint_16 roomtyp2; + uint16_t dialnorm2; + uint16_t compr2e; + uint16_t compr2; + uint16_t langcod2e; + uint16_t langcod2; + uint16_t audprodi2e; + uint16_t mixlevel2; + uint16_t roomtyp2; /* Copyright bit */ - uint_16 copyrightb; + uint16_t copyrightb; /* Original bit */ - uint_16 origbs; + uint16_t origbs; /* Timecode 1 exists */ - uint_16 timecod1e; + uint16_t timecod1e; /* Timecode 1 */ - uint_16 timecod1; + uint16_t timecod1; /* Timecode 2 exists */ - uint_16 timecod2e; + uint16_t timecod2e; /* Timecode 2 */ - uint_16 timecod2; + uint16_t timecod2; /* Additional bit stream info exists */ - uint_16 addbsie; + uint16_t addbsie; /* Additional bit stream length - 1 (in bytes) */ - uint_16 addbsil; + uint16_t addbsil; /* Additional bit stream information (max 64 bytes) */ - uint_8 addbsi[64]; + uint8_t addbsi[64]; /* Information not in the AC-3 bitstream, but derived */ /* Number of channels (excluding LFE) * Derived from acmod */ - uint_16 nfchans; + uint16_t nfchans; } bsi_t; /* more pain */ typedef struct audblk_s { - uint_32 magic1; + uint32_t magic1; /* block switch bit indexed by channel num */ - uint_16 blksw[5]; + uint16_t blksw[5]; /* dither enable bit indexed by channel num */ - uint_16 dithflag[5]; + uint16_t dithflag[5]; /* dynamic range gain exists */ - uint_16 dynrnge; + uint16_t dynrnge; /* dynamic range gain */ - uint_16 dynrng; + uint16_t dynrng; /* if acmod==0 then */ /* dynamic range 2 gain exists */ - uint_16 dynrng2e; + uint16_t dynrng2e; /* dynamic range 2 gain */ - uint_16 dynrng2; + uint16_t dynrng2; /* coupling strategy exists */ - uint_16 cplstre; + uint16_t cplstre; /* coupling in use */ - uint_16 cplinu; + uint16_t cplinu; /* channel coupled */ - uint_16 chincpl[5]; + uint16_t chincpl[5]; /* if acmod==2 then */ /* Phase flags in use */ - uint_16 phsflginu; + uint16_t phsflginu; /* coupling begin frequency code */ - uint_16 cplbegf; + uint16_t cplbegf; /* coupling end frequency code */ - uint_16 cplendf; + uint16_t cplendf; /* coupling band structure bits */ - uint_16 cplbndstrc[18]; + uint16_t cplbndstrc[18]; /* Do coupling co-ords exist for this channel? */ - uint_16 cplcoe[5]; + uint16_t cplcoe[5]; /* Master coupling co-ordinate */ - uint_16 mstrcplco[5]; + uint16_t mstrcplco[5]; /* Per coupling band coupling co-ordinates */ - uint_16 cplcoexp[5][18]; - uint_16 cplcomant[5][18]; + uint16_t cplcoexp[5][18]; + uint16_t cplcomant[5][18]; /* Phase flags for dual mono */ - uint_16 phsflg[18]; + uint16_t phsflg[18]; /* Is there a rematrixing strategy */ - uint_16 rematstr; + uint16_t rematstr; /* Rematrixing bits */ - uint_16 rematflg[4]; + uint16_t rematflg[4]; /* Coupling exponent strategy */ - uint_16 cplexpstr; + uint16_t cplexpstr; /* Exponent strategy for full bandwidth channels */ - uint_16 chexpstr[5]; + uint16_t chexpstr[5]; /* Exponent strategy for lfe channel */ - uint_16 lfeexpstr; + uint16_t lfeexpstr; /* Channel bandwidth for independent channels */ - uint_16 chbwcod[5]; + uint16_t chbwcod[5]; /* The absolute coupling exponent */ - uint_16 cplabsexp; + uint16_t cplabsexp; /* Coupling channel exponents (D15 mode gives 18 * 12 /3 encoded exponents */ - uint_16 cplexps[18 * 12 / 3]; + uint16_t cplexps[18 * 12 / 3]; /* Sanity checking constant */ - uint_32 magic2; + uint32_t magic2; /* fbw channel exponents */ - uint_16 exps[5][252 / 3]; + uint16_t exps[5][252 / 3]; /* channel gain range */ - uint_16 gainrng[5]; + uint16_t gainrng[5]; /* low frequency exponents */ - uint_16 lfeexps[3]; + uint16_t lfeexps[3]; /* Bit allocation info */ - uint_16 baie; + uint16_t baie; /* Slow decay code */ - uint_16 sdcycod; + uint16_t sdcycod; /* Fast decay code */ - uint_16 fdcycod; + uint16_t fdcycod; /* Slow gain code */ - uint_16 sgaincod; + uint16_t sgaincod; /* dB per bit code */ - uint_16 dbpbcod; + uint16_t dbpbcod; /* masking floor code */ - uint_16 floorcod; + uint16_t floorcod; /* SNR offset info */ - uint_16 snroffste; + uint16_t snroffste; /* coarse SNR offset */ - uint_16 csnroffst; + uint16_t csnroffst; /* coupling fine SNR offset */ - uint_16 cplfsnroffst; + uint16_t cplfsnroffst; /* coupling fast gain code */ - uint_16 cplfgaincod; + uint16_t cplfgaincod; /* fbw fine SNR offset */ - uint_16 fsnroffst[5]; + uint16_t fsnroffst[5]; /* fbw fast gain code */ - uint_16 fgaincod[5]; + uint16_t fgaincod[5]; /* lfe fine SNR offset */ - uint_16 lfefsnroffst; + uint16_t lfefsnroffst; /* lfe fast gain code */ - uint_16 lfefgaincod; + uint16_t lfefgaincod; /* Coupling leak info */ - uint_16 cplleake; + uint16_t cplleake; /* coupling fast leak initialization */ - uint_16 cplfleak; + uint16_t cplfleak; /* coupling slow leak initialization */ - uint_16 cplsleak; + uint16_t cplsleak; /* delta bit allocation info */ - uint_16 deltbaie; + uint16_t deltbaie; /* coupling delta bit allocation exists */ - uint_16 cpldeltbae; + uint16_t cpldeltbae; /* fbw delta bit allocation exists */ - uint_16 deltbae[5]; + uint16_t deltbae[5]; /* number of cpl delta bit segments */ - uint_16 cpldeltnseg; + uint16_t cpldeltnseg; /* coupling delta bit allocation offset */ - uint_16 cpldeltoffst[8]; + uint16_t cpldeltoffst[8]; /* coupling delta bit allocation length */ - uint_16 cpldeltlen[8]; + uint16_t cpldeltlen[8]; /* coupling delta bit allocation length */ - uint_16 cpldeltba[8]; + uint16_t cpldeltba[8]; /* number of delta bit segments */ - uint_16 deltnseg[5]; + uint16_t deltnseg[5]; /* fbw delta bit allocation offset */ - uint_16 deltoffst[5][8]; + uint16_t deltoffst[5][8]; /* fbw delta bit allocation length */ - uint_16 deltlen[5][8]; + uint16_t deltlen[5][8]; /* fbw delta bit allocation length */ - uint_16 deltba[5][8]; + uint16_t deltba[5][8]; /* skip length exists */ - uint_16 skiple; + uint16_t skiple; /* skip length */ - uint_16 skipl; + uint16_t skipl; //Removed Feb 2000 -ah + //added Jul 2000 ++dent /* channel mantissas */ - //uint_16 chmant[5][256]; + uint16_t chmant[5][256]; /* coupling mantissas */ - uint_16 cplmant[256]; +// uint16_t cplmant[256]; + + //Added Jun 2000 -MaXX + /* coupling floats */ + float cpl_flt[ 256 ]; //Removed Feb 2000 -ah + //added Jul 2000 ++dent /* coupling mantissas */ - //uint_16 lfemant[7]; + uint16_t lfemant[7]; /* -- Information not in the bitstream, but derived thereof -- */ /* Number of coupling sub-bands */ - uint_16 ncplsubnd; + uint16_t ncplsubnd; /* Number of combined coupling sub-bands * Derived from ncplsubnd and cplbndstrc */ - uint_16 ncplbnd; + uint16_t ncplbnd; /* Number of exponent groups by channel * Derived from strmant, endmant */ - uint_16 nchgrps[5]; + uint16_t nchgrps[5]; /* Number of coupling exponent groups * Derived from cplbegf, cplendf, cplexpstr */ - uint_16 ncplgrps; + uint16_t ncplgrps; /* End mantissa numbers of fbw channels */ - uint_16 endmant[5]; + uint16_t endmant[5]; /* Start and end mantissa numbers for the coupling channel */ - uint_16 cplstrtmant; - uint_16 cplendmant; + uint16_t cplstrtmant; + uint16_t cplendmant; /* Decoded exponent info */ - uint_16 fbw_exp[5][256]; - uint_16 cpl_exp[256]; - uint_16 lfe_exp[7]; + uint16_t fbw_exp[5][256]; + uint16_t cpl_exp[256]; + uint16_t lfe_exp[7]; /* Bit allocation pointer results */ - uint_16 fbw_bap[5][256]; - uint_16 cpl_bap[256]; - uint_16 lfe_bap[7]; + uint16_t fbw_bap[5][256]; + uint16_t cpl_bap[256]; + uint16_t lfe_bap[7]; - uint_32 magic3; + uint32_t magic3; } audblk_t; diff --git a/ac3dec/bit_allocate.c b/ac3dec/bit_allocate.c index 053e09cd..00874340 100644 --- a/ac3dec/bit_allocate.c +++ b/ac3dec/bit_allocate.c @@ -28,46 +28,46 @@ -static inline sint_16 logadd(sint_16 a,sint_16 b); -static sint_16 calc_lowcomp(sint_16 a,sint_16 b0,sint_16 b1,sint_16 bin); -static inline uint_16 min(sint_16 a,sint_16 b); -static inline uint_16 max(sint_16 a,sint_16 b); -static void ba_compute_psd(sint_16 start, sint_16 end, sint_16 exps[], - sint_16 psd[], sint_16 bndpsd[]); - -static void ba_compute_excitation(sint_16 start, sint_16 end,sint_16 fgain, - sint_16 fastleak, sint_16 slowleak, sint_16 is_lfe, sint_16 bndpsd[], - sint_16 excite[]); -static void ba_compute_mask(sint_16 start, sint_16 end, uint_16 fscod, - uint_16 deltbae, uint_16 deltnseg, uint_16 deltoffst[], uint_16 deltba[], - uint_16 deltlen[], sint_16 excite[], sint_16 mask[]); -static void ba_compute_bap(sint_16 start, sint_16 end, sint_16 snroffset, - sint_16 psd[], sint_16 mask[], sint_16 bap[]); +static inline int16_t logadd(int16_t a,int16_t b); +static int16_t calc_lowcomp(int16_t a,int16_t b0,int16_t b1,int16_t bin); +static inline uint16_t min(int16_t a,int16_t b); +static inline uint16_t max(int16_t a,int16_t b); +static void ba_compute_psd(int16_t start, int16_t end, int16_t exps[], + int16_t psd[], int16_t bndpsd[]); + +static void ba_compute_excitation(int16_t start, int16_t end,int16_t fgain, + int16_t fastleak, int16_t slowleak, int16_t is_lfe, int16_t bndpsd[], + int16_t excite[]); +static void ba_compute_mask(int16_t start, int16_t end, uint16_t fscod, + uint16_t deltbae, uint16_t deltnseg, uint16_t deltoffst[], uint16_t deltba[], + uint16_t deltlen[], int16_t excite[], int16_t mask[]); +static void ba_compute_bap(int16_t start, int16_t end, int16_t snroffset, + int16_t psd[], int16_t mask[], int16_t bap[]); /* Misc LUTs for bit allocation process */ -static sint_16 slowdec[] = { 0x0f, 0x11, 0x13, 0x15 }; -static sint_16 fastdec[] = { 0x3f, 0x53, 0x67, 0x7b }; -static sint_16 slowgain[] = { 0x540, 0x4d8, 0x478, 0x410 }; -static sint_16 dbpbtab[] = { 0x000, 0x700, 0x900, 0xb00 }; +static int16_t slowdec[] = { 0x0f, 0x11, 0x13, 0x15 }; +static int16_t fastdec[] = { 0x3f, 0x53, 0x67, 0x7b }; +static int16_t slowgain[] = { 0x540, 0x4d8, 0x478, 0x410 }; +static int16_t dbpbtab[] = { 0x000, 0x700, 0x900, 0xb00 }; -static uint_16 floortab[] = { 0x2f0, 0x2b0, 0x270, 0x230, 0x1f0, 0x170, 0x0f0, 0xf800 }; -static sint_16 fastgain[] = { 0x080, 0x100, 0x180, 0x200, 0x280, 0x300, 0x380, 0x400 }; +static uint16_t floortab[] = { 0x2f0, 0x2b0, 0x270, 0x230, 0x1f0, 0x170, 0x0f0, 0xf800 }; +static int16_t fastgain[] = { 0x080, 0x100, 0x180, 0x200, 0x280, 0x300, 0x380, 0x400 }; -static sint_16 bndtab[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, +static int16_t bndtab[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 31, 34, 37, 40, 43, 46, 49, 55, 61, 67, 73, 79, 85, 97, 109, 121, 133, 157, 181, 205, 229 }; -static sint_16 bndsz[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, +static int16_t bndsz[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 6, 6, 6, 6, 6, 6, 12, 12, 12, 12, 24, 24, 24, 24, 24 }; -static sint_16 masktab[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, +static int16_t masktab[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 28, 28, 29, 29, 29, 30, 30, 30, 31, 31, 31, 32, 32, 32, 33, 33, 33, 34, 34, 34, 35, 35, 35, 35, 35, 35, 36, 36, 36, 36, 36, 36, 37, 37, 37, @@ -85,7 +85,7 @@ static sint_16 masktab[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 0, 0, 0 }; -static sint_16 latab[] = { 0x0040, 0x003f, 0x003e, 0x003d, 0x003c, 0x003b, 0x003a, 0x0039, +static int16_t latab[] = { 0x0040, 0x003f, 0x003e, 0x003d, 0x003c, 0x003b, 0x003a, 0x0039, 0x0038, 0x0037, 0x0036, 0x0035, 0x0034, 0x0034, 0x0033, 0x0032, 0x0031, 0x0030, 0x002f, 0x002f, 0x002e, 0x002d, 0x002c, 0x002c, 0x002b, 0x002a, 0x0029, 0x0029, 0x0028, 0x0027, 0x0026, 0x0026, @@ -119,7 +119,7 @@ static sint_16 latab[] = { 0x0040, 0x003f, 0x003e, 0x003d, 0x003c, 0x003b, 0x003 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000}; -static sint_16 hth[][50] = {{ 0x04d0, 0x04d0, 0x0440, 0x0400, 0x03e0, 0x03c0, 0x03b0, 0x03b0, +static int16_t hth[][50] = {{ 0x04d0, 0x04d0, 0x0440, 0x0400, 0x03e0, 0x03c0, 0x03b0, 0x03b0, 0x03a0, 0x03a0, 0x03a0, 0x03a0, 0x03a0, 0x0390, 0x0390, 0x0390, 0x0380, 0x0380, 0x0370, 0x0370, 0x0360, 0x0360, 0x0350, 0x0350, 0x0340, 0x0340, 0x0330, 0x0320, 0x0310, 0x0300, 0x02f0, 0x02f0, @@ -144,38 +144,50 @@ static sint_16 hth[][50] = {{ 0x04d0, 0x04d0, 0x0440, 0x0400, 0x03e0, 0x03c0, 0x 0x0450, 0x04e0 }}; -static sint_16 baptab[] = { 0, 1, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 5, 5, 6, +static int16_t baptab[] = { 0, 1, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11, 12, 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 15 }; -static sint_16 sdecay; -static sint_16 fdecay; -static sint_16 sgain; -static sint_16 dbknee; -static sint_16 floor; -static sint_16 psd[256]; -static sint_16 bndpsd[256]; -static sint_16 excite[256]; -static sint_16 mask[256]; - -static inline uint_16 -max(sint_16 a,sint_16 b) +static int16_t sdecay; +static int16_t fdecay; +static int16_t sgain; +static int16_t dbknee; +static int16_t floor; +static int16_t psd[256]; +static int16_t bndpsd[256]; +static int16_t excite[256]; +static int16_t mask[256]; + + +/** + * + **/ + +static inline uint16_t max(int16_t a,int16_t b) { return (a > b ? a : b); } + + +/** + * + **/ -static inline uint_16 -min(sint_16 a,sint_16 b) +static inline uint16_t min(int16_t a,int16_t b) { return (a < b ? a : b); } -static inline sint_16 -logadd(sint_16 a,sint_16 b) + +/** + * + **/ + +static inline int16_t logadd(int16_t a,int16_t b) { - sint_16 c; - sint_16 address; + int16_t c; + int16_t address; c = a - b; address = min((abs(c) >> 1), 255); @@ -187,15 +199,19 @@ logadd(sint_16 a,sint_16 b) } -void bit_allocate(uint_16 fscod, bsi_t *bsi, audblk_t *audblk) +/** + * + **/ + +void bit_allocate(uint16_t fscod, bsi_t *bsi, audblk_t *audblk) { - uint_16 i; - sint_16 fgain; - sint_16 snroffset; - sint_16 start; - sint_16 end; - sint_16 fastleak; - sint_16 slowleak; + uint16_t i; + int16_t fgain; + int16_t snroffset; + int16_t start; + int16_t end; + int16_t fastleak; + int16_t slowleak; /* Only perform bit_allocation if the exponents have changed or we * have new sideband information */ @@ -217,11 +233,10 @@ void bit_allocate(uint_16 fscod, bsi_t *bsi, audblk_t *audblk) if(!audblk->csnroffst && !audblk->fsnroffst[0] && !audblk->fsnroffst[1] && !audblk->fsnroffst[2] && !audblk->fsnroffst[3] && !audblk->fsnroffst[4] && - !audblk->cplfsnroffst && !audblk->lfefsnroffst) - { - memset(audblk->fbw_bap,0,sizeof(uint_16) * 256 * 5); - memset(audblk->cpl_bap,0,sizeof(uint_16) * 256); - memset(audblk->lfe_bap,0,sizeof(uint_16) * 7); + !audblk->cplfsnroffst && !audblk->lfefsnroffst) { + memset(audblk->fbw_bap,0,sizeof(uint16_t) * 256 * 5); + memset(audblk->cpl_bap,0,sizeof(uint16_t) * 256); + memset(audblk->lfe_bap,0,sizeof(uint16_t) * 7); return; } @@ -245,8 +260,7 @@ void bit_allocate(uint_16 fscod, bsi_t *bsi, audblk_t *audblk) ba_compute_bap(start, end, snroffset, psd, mask, audblk->fbw_bap[i]); } - if(audblk->cplinu) - { + if(audblk->cplinu) { start = audblk->cplstrtmant; end = audblk->cplendmant; fgain = fastgain[audblk->cplfgaincod]; @@ -264,8 +278,7 @@ void bit_allocate(uint_16 fscod, bsi_t *bsi, audblk_t *audblk) ba_compute_bap(start, end, snroffset, psd, mask, audblk->cpl_bap); } - if(bsi->lfeon) - { + if(bsi->lfeon) { start = 0; end = 7; fgain = fastgain[audblk->lfefgaincod]; @@ -285,15 +298,18 @@ void bit_allocate(uint_16 fscod, bsi_t *bsi, audblk_t *audblk) } -static void ba_compute_psd(sint_16 start, sint_16 end, sint_16 exps[], - sint_16 psd[], sint_16 bndpsd[]) +/** + * + **/ + +static void ba_compute_psd(int16_t start, int16_t end, int16_t exps[], + int16_t psd[], int16_t bndpsd[]) { int bin,i,j,k; - sint_16 lastbin = 0; + int16_t lastbin = 0; /* Map the exponents into dBs */ - for (bin=start; bin<end; bin++) - { + for (bin=start; bin<end; bin++) { psd[bin] = (3072 - (exps[bin] << 7)); } @@ -301,14 +317,12 @@ static void ba_compute_psd(sint_16 start, sint_16 end, sint_16 exps[], j = start; k = masktab[start]; - do - { + do { lastbin = min(bndtab[k] + bndsz[k], end); bndpsd[k] = psd[j]; j++; - for (i = j; i < lastbin; i++) - { + for (i = j; i < lastbin; i++) { bndpsd[k] = logadd(bndpsd[k], psd[j]); j++; } @@ -317,49 +331,49 @@ static void ba_compute_psd(sint_16 start, sint_16 end, sint_16 exps[], } while (end > lastbin); } -static void ba_compute_excitation(sint_16 start, sint_16 end,sint_16 fgain, - sint_16 fastleak, sint_16 slowleak, sint_16 is_lfe, sint_16 bndpsd[], - sint_16 excite[]) + +/** + * + **/ + +static void ba_compute_excitation(int16_t start, int16_t end,int16_t fgain, + int16_t fastleak, int16_t slowleak, int16_t is_lfe, int16_t bndpsd[], + int16_t excite[]) { int bin; - sint_16 bndstrt; - sint_16 bndend; - sint_16 lowcomp = 0; - sint_16 begin = 0; + int16_t bndstrt; + int16_t bndend; + int16_t lowcomp = 0; + int16_t begin = 0; /* Compute excitation function */ bndstrt = masktab[start]; bndend = masktab[end - 1] + 1; - if (bndstrt == 0) /* For fbw and lfe channels */ - { + if (bndstrt == 0) { /* For fbw and lfe channels */ lowcomp = calc_lowcomp(lowcomp, bndpsd[0], bndpsd[1], 0); excite[0] = bndpsd[0] - fgain - lowcomp; lowcomp = calc_lowcomp(lowcomp, bndpsd[1], bndpsd[2], 1); excite[1] = bndpsd[1] - fgain - lowcomp; begin = 7 ; - /* Note: Do not call calc_lowcomp() for the last band of the lfe channel, (bin = 6) */ - for (bin = 2; bin < 7; bin++) - { +// Note: Do not call calc_lowcomp() for the last band of the lfe channel,(bin=6) + for (bin = 2; bin < 7; bin++) { if (!(is_lfe && (bin == 6))) lowcomp = calc_lowcomp(lowcomp, bndpsd[bin], bndpsd[bin+1], bin); fastleak = bndpsd[bin] - fgain; slowleak = bndpsd[bin] - sgain; excite[bin] = fastleak - lowcomp; - if (!(is_lfe && (bin == 6))) - { - if (bndpsd[bin] <= bndpsd[bin+1]) - { + if (!(is_lfe && (bin == 6))) { + if (bndpsd[bin] <= bndpsd[bin+1]) { begin = bin + 1 ; break; } } } - for (bin = begin; bin < min(bndend, 22); bin++) - { + for (bin = begin; bin < min(bndend, 22); bin++) { if (!(is_lfe && (bin == 6))) lowcomp = calc_lowcomp(lowcomp, bndpsd[bin], bndpsd[bin+1], bin); fastleak -= fdecay ; @@ -371,12 +385,9 @@ static void ba_compute_excitation(sint_16 start, sint_16 end,sint_16 fgain, begin = 22; } else /* For coupling channel */ - { begin = bndstrt; - } - for (bin = begin; bin < bndend; bin++) - { + for (bin = begin; bin < bndend; bin++) { fastleak -= fdecay; fastleak = max(fastleak, bndpsd[bin] - fgain); slowleak -= sdecay; @@ -385,49 +396,47 @@ static void ba_compute_excitation(sint_16 start, sint_16 end,sint_16 fgain, } } -static void ba_compute_mask(sint_16 start, sint_16 end, uint_16 fscod, - uint_16 deltbae, uint_16 deltnseg, uint_16 deltoffst[], uint_16 deltba[], - uint_16 deltlen[], sint_16 excite[], sint_16 mask[]) + +/** + * + **/ + +static void ba_compute_mask(int16_t start, int16_t end, uint16_t fscod, + uint16_t deltbae, uint16_t deltnseg, uint16_t deltoffst[], uint16_t deltba[], + uint16_t deltlen[], int16_t excite[], int16_t mask[]) { int bin,k; - sint_16 bndstrt; - sint_16 bndend; - sint_16 delta; + int16_t bndstrt; + int16_t bndend; + int16_t delta; bndstrt = masktab[start]; bndend = masktab[end - 1] + 1; /* Compute the masking curve */ - for (bin = bndstrt; bin < bndend; bin++) - { - if (bndpsd[bin] < dbknee) - { + for (bin = bndstrt; bin < bndend; bin++) { + if (bndpsd[bin] < dbknee) { excite[bin] += ((dbknee - bndpsd[bin]) >> 2); } mask[bin] = max(excite[bin], hth[fscod][bin]); } /* Perform delta bit modulation if necessary */ - if ((deltbae == DELTA_BIT_REUSE) || (deltbae == DELTA_BIT_NEW)) - { - sint_16 band = 0; - sint_16 seg = 0; + if ((deltbae == DELTA_BIT_REUSE) || (deltbae == DELTA_BIT_NEW)) { + int16_t band = 0; + int16_t seg = 0; - for (seg = 0; seg < deltnseg+1; seg++) - { + for (seg = 0; seg < deltnseg+1; seg++) { band += deltoffst[seg]; - if (deltba[seg] >= 4) - { + + if (deltba[seg] >= 4) { delta = (deltba[seg] - 3) << 7; - } - else - { + } else { delta = (deltba[seg] - 4) << 7; } - for (k = 0; k < deltlen[seg]; k++) - { + for (k = 0; k < deltlen[seg]; k++) { mask[band] += delta; band++; } @@ -435,19 +444,23 @@ static void ba_compute_mask(sint_16 start, sint_16 end, uint_16 fscod, } } -static void ba_compute_bap(sint_16 start, sint_16 end, sint_16 snroffset, - sint_16 psd[], sint_16 mask[], sint_16 bap[]) + +/** + * + **/ + +static void ba_compute_bap(int16_t start, int16_t end, int16_t snroffset, + int16_t psd[], int16_t mask[], int16_t bap[]) { int i,j,k; - sint_16 lastbin = 0; - sint_16 address = 0; + int16_t lastbin = 0; + int16_t address = 0; /* Compute the bit allocation pointer for each bin */ i = start; j = masktab[start]; - do - { + do { lastbin = min(bndtab[j] + bndsz[j], end); mask[j] -= snroffset; mask[j] -= floor; @@ -457,8 +470,7 @@ static void ba_compute_bap(sint_16 start, sint_16 end, sint_16 snroffset, mask[j] &= 0x1fe0; mask[j] += floor; - for (k = i; k < lastbin; k++) - { + for (k = i; k < lastbin; k++) { address = (psd[i] - mask[j]) >> 5; address = min(63, max(0, address)); bap[i] = baptab[address]; @@ -468,25 +480,25 @@ static void ba_compute_bap(sint_16 start, sint_16 end, sint_16 snroffset, } while (end > lastbin); } -static sint_16 -calc_lowcomp(sint_16 a,sint_16 b0,sint_16 b1,sint_16 bin) + +/** + * + **/ + +static int16_t calc_lowcomp (int16_t a, int16_t b0, int16_t b1, int16_t bin) { - if (bin < 7) - { + if (bin < 7) { if ((b0 + 256) == b1) a = 384; else if (b0 > b1) a = max(0, a - 64); - } - else if (bin < 20) - { + } else if (bin < 20) { if ((b0 + 256) == b1) a = 320; else if (b0 > b1) a = max(0, a - 64) ; - } - else + } else a = max(0, a - 128); return(a); diff --git a/ac3dec/bit_allocate.h b/ac3dec/bit_allocate.h index e48b0b2b..a6a3c770 100644 --- a/ac3dec/bit_allocate.h +++ b/ac3dec/bit_allocate.h @@ -21,4 +21,4 @@ * */ -void bit_allocate(uint_16 fscod, bsi_t *bsi, audblk_t *audblk); +void bit_allocate(uint16_t fscod, bsi_t *bsi, audblk_t *audblk); diff --git a/ac3dec/bitstream.c b/ac3dec/bitstream.c index 296d5ee3..f48d2408 100644 --- a/ac3dec/bitstream.c +++ b/ac3dec/bitstream.c @@ -23,54 +23,57 @@ #include <stdlib.h> #include <stdio.h> +#include <inttypes.h> + +#include <bswap.h> +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif #include "ac3.h" #include "ac3_internal.h" #include "bitstream.h" -uint_8 *buffer_start = 0; -uint_32 bits_left = 0; -uint_32 current_word; +uint32_t bits_left = 0; +uint64_t current_word; +uint64_t *buffer_start = 0; + + +static inline uint64_t getdword (void) +{ + return be2me_64 (*buffer_start++); +} + -static inline void -bitstream_fill_current() +static inline void bitstream_fill_current (void) { - current_word = *((uint_32*)buffer_start)++; - current_word = swab32(current_word); + //current_word = bswap_64 (*buffer_start++); + current_word = getdword (); } -// -// The fast paths for _get is in the -// bitstream.h header file so it can be inlined. -// -// The "bottom half" of this routine is suffixed _bh -// -// -ah -// - -uint_32 -bitstream_get_bh(uint_32 num_bits) + +uint32_t bitstream_get_bh (uint32_t num_bits) { - uint_32 result; + uint32_t result; num_bits -= bits_left; - result = (current_word << (32 - bits_left)) >> (32 - bits_left); + result = (current_word << (64 - bits_left)) >> (64 - bits_left); bitstream_fill_current(); if(num_bits != 0) - result = (result << num_bits) | (current_word >> (32 - num_bits)); + result = (result << num_bits) | (current_word >> (64 - num_bits)); - bits_left = 32 - num_bits; + bits_left = 64 - num_bits; return result; } -void -bitstream_init(uint_8 *start) + +void bitstream_init (uint8_t *start) { //initialize the start of the buffer - buffer_start = start; + buffer_start = (uint64_t *) start; bits_left = 0; } diff --git a/ac3dec/bitstream.h b/ac3dec/bitstream.h index 33519306..f34726a7 100644 --- a/ac3dec/bitstream.h +++ b/ac3dec/bitstream.h @@ -21,56 +21,24 @@ * */ -//My new and improved vego-matic endian swapping routine -//(stolen from the kernel) -#ifdef WORDS_BIGENDIAN -# define swab32(x) (x) +#include <inttypes.h> -#else +extern uint32_t bits_left; +extern uint64_t current_word; -# if defined (__i386__) +void bitstream_init(uint8_t *start); +inline uint32_t bitstream_get_bh(uint32_t num_bits); -# define swab32(x) __i386_swab32(x) - static inline const uint_32 __i386_swab32(uint_32 x) - { - __asm__("bswap %0" : "=r" (x) : "0" (x)); - return x; - } - -# else - -# define swab32(x)\ -((((uint_8*)&x)[0] << 24) | (((uint_8*)&x)[1] << 16) | \ - (((uint_8*)&x)[2] << 8) | (((uint_8*)&x)[3])) - -# endif -#endif - -extern uint_32 bits_left; -extern uint_32 current_word; - -void bitstream_init(uint_8 *start); - -uint_8 bitstream_get_byte(void); - -uint_8 *bitstream_get_buffer_start(void); -void bitstream_buffer_frame(uint_32 frame_size); - -uint_32 bitstream_get_bh(uint_32 num_bits); - -static inline uint_32 -bitstream_get(uint_32 num_bits) +static inline uint32_t bitstream_get (uint32_t num_bits) { - uint_32 result; - - if(num_bits < bits_left) - { - result = (current_word << (32 - bits_left)) >> (32 - num_bits); + uint32_t result; + + if (num_bits < bits_left) { + result = (current_word << (64 - bits_left)) >> (64 - num_bits); bits_left -= num_bits; return result; } - return bitstream_get_bh(num_bits); + return bitstream_get_bh (num_bits); } - diff --git a/ac3dec/bswap.h b/ac3dec/bswap.h new file mode 100644 index 00000000..73d398aa --- /dev/null +++ b/ac3dec/bswap.h @@ -0,0 +1,80 @@ +/***** +* +* This file is part of the OMS program. +* +* This program is free software; you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation; either version 2, or (at your option) +* any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program; see the file COPYING. If not, write to +* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. +* +*****/ + +#ifndef __BSWAP_H__ +#define __BSWAP_H__ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#ifdef HAVE_BYTESWAP_H +#include <byteswap.h> +#else + +#include <inttypes.h> + +#ifdef WORDS_BIGENDIAN +// FIXME these need to actually swap ;) +#define bswap_16(x) (x) +#define bswap_32(x) (x) +#define bswap_64(x) (x) +#else +// This is wrong, 'cannot take address of ...' +#define bswap_16(x) ((((uint8_t*)&x)[2] << 8) \ + | (((uint8_t*)&x)[3])) + +// code from bits/byteswap.h (C) 1997, 1998 Free Software Foundation, Inc. +#define bswap_32(x) \ + ((((x) & 0xff000000) >> 24) | (((x) & 0x00ff0000) >> 8) | \ + (((x) & 0x0000ff00) << 8) | (((x) & 0x000000ff) << 24)) + +#define bswap_64(x) \ + (__extension__ \ + ({ union { __extension__ unsigned long long int __ll; \ + unsigned long int __l[2]; } __w, __r; \ + __w.__ll = (x); \ + __r.__l[0] = bswap_32 (__w.__l[1]); \ + __r.__l[1] = bswap_32 (__w.__l[0]); \ + __r.__ll; })) +#endif + +#endif + +// be2me ... BigEndian to MachineEndian +// le2me ... LittleEndian to MachineEndian + +#ifdef WORDS_BIGENDIAN +#define be2me_16(x) (x) +#define be2me_32(x) (x) +#define be2me_64(x) (x) +#define le2me_16(x) bswap_16(x) +#define le2me_32(x) bswap_32(x) +#define le2me_64(x) bswap_64(x) +#else +#define be2me_16(x) bswap_16(x) +#define be2me_32(x) bswap_32(x) +#define be2me_64(x) bswap_64(x) +#define le2me_16(x) (x) +#define le2me_32(x) (x) +#define le2me_64(x) (x) +#endif + +#endif diff --git a/ac3dec/cmplx.h b/ac3dec/cmplx.h new file mode 100644 index 00000000..9f74721e --- /dev/null +++ b/ac3dec/cmplx.h @@ -0,0 +1,29 @@ +/***** +* +* This file is part of the OMS program. +* +* This program is free software; you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation; either version 2, or (at your option) +* any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program; see the file COPYING. If not, write to +* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. +* +*****/ + +#ifndef __COMPLEX_H__ +#define __COMPLEX_H__ + +typedef struct complex { + float re; + float im; +} complex_t; + +#endif diff --git a/ac3dec/coeff.c b/ac3dec/coeff.c index b9f03ff6..7d5579ca 100644 --- a/ac3dec/coeff.c +++ b/ac3dec/coeff.c @@ -28,51 +28,183 @@ #include "ac3_internal.h" -#include "decode.h" #include "bitstream.h" #include "dither.h" #include "coeff.h" + // //Lookup tables of 0.15 two's complement quantization values // -static const uint_16 q_1[3] = +#define Q0 ((-2 << 15) / 3.0) +#define Q1 (0) +#define Q2 ((2 << 15) / 3.0) + +static const float q_1_0[ 32 ] = { - ( -2 << 15)/3, 0,( 2 << 15)/3 -}; + Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0, + Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1, + Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2, + 0,0,0,0,0 +}; -static const uint_16 q_2[5] = +static const float q_1_1[ 32 ] = { - ( -4 << 15)/5,( -2 << 15)/5, 0, - ( 2 << 15)/5,( 4 << 15)/5 -}; + Q0,Q0,Q0,Q1,Q1,Q1,Q2,Q2,Q2, + Q0,Q0,Q0,Q1,Q1,Q1,Q2,Q2,Q2, + Q0,Q0,Q0,Q1,Q1,Q1,Q2,Q2,Q2, + 0,0,0,0,0 +}; -static const uint_16 q_3[7] = +static const float q_1_2[ 32 ] = +{ + Q0,Q1,Q2,Q0,Q1,Q2,Q0,Q1,Q2, + Q0,Q1,Q2,Q0,Q1,Q2,Q0,Q1,Q2, + Q0,Q1,Q2,Q0,Q1,Q2,Q0,Q1,Q2, + 0,0,0,0,0 +}; + +#undef Q0 +#undef Q1 +#undef Q2 + +#define Q0 ((-4 << 15) / 5.0) +#define Q1 ((-2 << 15) / 5.0) +#define Q2 (0) +#define Q3 ((2 << 15) / 5.0) +#define Q4 ((4 << 15) / 5.0) + +static const float q_2_0[ 128 ] = +{ + Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0, + Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0, + Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1, + Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1, + Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2, + Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2, + Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3, + Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3, + Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4, + Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4, + 0,0,0 +}; + +static const float q_2_1[ 128 ] = +{ + Q0,Q0,Q0,Q0,Q0,Q1,Q1,Q1,Q1,Q1, + Q2,Q2,Q2,Q2,Q2,Q3,Q3,Q3,Q3,Q3, + Q4,Q4,Q4,Q4,Q4,Q0,Q0,Q0,Q0,Q0, + Q1,Q1,Q1,Q1,Q1,Q2,Q2,Q2,Q2,Q2, + Q3,Q3,Q3,Q3,Q3,Q4,Q4,Q4,Q4,Q4, + Q0,Q0,Q0,Q0,Q0,Q1,Q1,Q1,Q1,Q1, + Q2,Q2,Q2,Q2,Q2,Q3,Q3,Q3,Q3,Q3, + Q4,Q4,Q4,Q4,Q4,Q0,Q0,Q0,Q0,Q0, + Q1,Q1,Q1,Q1,Q1,Q2,Q2,Q2,Q2,Q2, + Q3,Q3,Q3,Q3,Q3,Q4,Q4,Q4,Q4,Q4, + Q0,Q0,Q0,Q0,Q0,Q1,Q1,Q1,Q1,Q1, + Q2,Q2,Q2,Q2,Q2,Q3,Q3,Q3,Q3,Q3, + Q4,Q4,Q4,Q4,Q4,0,0,0 + }; + +static const float q_2_2[ 128 ] = + { + Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4, + Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4, + Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4, + Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4, + Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4, + Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4, + Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4, + Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4, + Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4, + Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4, + Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4, + Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4, + Q0,Q1,Q2,Q3,Q4,0,0,0 +}; + +#undef Q0 +#undef Q1 +#undef Q2 +#undef Q3 +#undef Q4 + +static const float q_3[7] = { - ( -6 << 15)/7,( -4 << 15)/7,( -2 << 15)/7, 0, - ( 2 << 15)/7,( 4 << 15)/7,( 6 << 15)/7 + (-6 << 15)/7.0, (-4 << 15)/7.0, (-2 << 15)/7.0, 0.0, + ( 2 << 15)/7.0, ( 4 << 15)/7.0, ( 6 << 15)/7.0 }; -static const uint_16 q_4[11] = +#define Q0 ((-10 << 15) / 11.0) +#define Q1 ((-8 << 15) / 11.0) +#define Q2 ((-6 << 15) / 11.0) +#define Q3 ((-4 << 15) / 11.0) +#define Q4 ((-2 << 15) / 11.0) +#define Q5 (0) +#define Q6 ((2 << 15) / 11.0) +#define Q7 ((4 << 15) / 11.0) +#define Q8 ((6 << 15) / 11.0) +#define Q9 ((8 << 15) / 11.0) +#define QA ((10 << 15) / 11.0) + +static const float q_4_0[ 128 ] = { - (-10 << 15)/11,(-8 << 15)/11,(-6 << 15)/11, ( -4 << 15)/11,(-2 << 15)/11, 0, - ( 2 << 15)/11,( 4 << 15)/11,( 6 << 15)/11, ( 8 << 15)/11,(10 << 15)/11 + Q0, Q0, Q0, Q0, Q0, Q0, Q0, Q0, Q0, Q0, Q0, + Q1, Q1, Q1, Q1, Q1, Q1, Q1, Q1, Q1, Q1, Q1, + Q2, Q2, Q2, Q2, Q2, Q2, Q2, Q2, Q2, Q2, Q2, + Q3, Q3, Q3, Q3, Q3, Q3, Q3, Q3, Q3, Q3, Q3, + Q4, Q4, Q4, Q4, Q4, Q4, Q4, Q4, Q4, Q4, Q4, + Q5, Q5, Q5, Q5, Q5, Q5, Q5, Q5, Q5, Q5, Q5, + Q6, Q6, Q6, Q6, Q6, Q6, Q6, Q6, Q6, Q6, Q6, + Q7, Q7, Q7, Q7, Q7, Q7, Q7, Q7, Q7, Q7, Q7, + Q8, Q8, Q8, Q8, Q8, Q8, Q8, Q8, Q8, Q8, Q8, + Q9, Q9, Q9, Q9, Q9, Q9, Q9, Q9, Q9, Q9, Q9, + QA, QA, QA, QA, QA, QA, QA, QA, QA, QA, QA, + 0, 0, 0, 0, 0, 0, 0 + }; + +static const float q_4_1[ 128 ] = +{ + Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA, + Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA, + Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA, + Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA, + Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA, + Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA, + Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA, + Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA, + Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA, + Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA, + Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA, + 0, 0, 0, 0, 0, 0, 0 }; -static const uint_16 q_5[15] = +#undef Q0 +#undef Q1 +#undef Q2 +#undef Q3 +#undef Q4 +#undef Q5 +#undef Q6 +#undef Q7 +#undef Q8 +#undef Q9 +#undef QA + +static const float q_5[15] = { - (-14 << 15)/15,(-12 << 15)/15,(-10 << 15)/15, - ( -8 << 15)/15,( -6 << 15)/15,( -4 << 15)/15, - ( -2 << 15)/15, 0 ,( 2 << 15)/15, - ( 4 << 15)/15,( 6 << 15)/15,( 8 << 15)/15, - ( 10 << 15)/15,( 12 << 15)/15,( 14 << 15)/15 -}; + (-14 << 15)/15.0,(-12 << 15)/15.0,(-10 << 15)/15.0, + ( -8 << 15)/15.0,( -6 << 15)/15.0,( -4 << 15)/15.0, + ( -2 << 15)/15.0, 0.0 ,( 2 << 15)/15.0, + ( 4 << 15)/15.0,( 6 << 15)/15.0,( 8 << 15)/15.0, + ( 10 << 15)/15.0,( 12 << 15)/15.0,( 14 << 15)/15.0 +}; // // Scale factors for convert_to_float // -static const uint_32 u32_scale_factors[25] = +static const uint32_t u32_scale_factors[25] = { 0x38000000, //2 ^ -(0 + 15) 0x37800000, //2 ^ -(1 + 15) @@ -104,230 +236,179 @@ static const uint_32 u32_scale_factors[25] = static float *scale_factor = (float*)u32_scale_factors; //These store the persistent state of the packed mantissas -static uint_16 m_1[3]; -static uint_16 m_2[3]; -static uint_16 m_4[2]; -static uint_16 m_1_pointer; -static uint_16 m_2_pointer; -static uint_16 m_4_pointer; +static float q_1[2]; +static float q_2[2]; +static float q_4[1]; +static int32_t q_1_pointer; +static int32_t q_2_pointer; +static int32_t q_4_pointer; +static float __inline__ +coeff_get_float(uint16_t bap, uint16_t dithflag, uint16_t exp); //Conversion from bap to number of bits in the mantissas //zeros account for cases 0,1,2,4 which are special cased -static uint_16 qnttztab[16] = { 0, 0, 0, 3, 0 , 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16}; - -static void coeff_reset(void); -static sint_16 coeff_get_mantissa(uint_16 bap, uint_16 dithflag); -static void coeff_uncouple_ch(float samples[],bsi_t *bsi,audblk_t *audblk,uint_32 ch); - -// -// Convert a 0.15 fixed point number into IEEE single -// precision floating point and scale by 2^-exp -// -static inline float -convert_to_float(uint_16 exp, sint_16 mantissa) +static uint16_t qnttztab[16] = { - float x; - - //the scale by 2^-15 is built into the scale factor table - x = mantissa * scale_factor[exp]; + 0, 0, 0, 3, + 0, 4, 5, 6, + 7, 8, 9, 10, + 11, 12, 14, 16 +}; - return x; -} +static void coeff_reset(void); +static float coeff_get_float(uint16_t bap, uint16_t dithflag, uint16_t exp); +static void coeff_uncouple_ch(float samples[],bsi_t *bsi,audblk_t *audblk,uint32_t ch); -void -coeff_unpack(bsi_t *bsi, audblk_t *audblk, stream_samples_t samples) +void coeff_unpack(bsi_t *bsi, audblk_t *audblk, stream_samples_t samples) { - uint_16 i,j; - uint_32 done_cpl = 0; - sint_16 mantissa; + uint16_t i,j; + uint32_t done_cpl = 0; coeff_reset(); - for(i=0; i< bsi->nfchans; i++) - { + for(i=0; i< bsi->nfchans; i++) { for(j=0; j < audblk->endmant[i]; j++) - { - mantissa = coeff_get_mantissa(audblk->fbw_bap[i][j],audblk->dithflag[i]); - samples[i][j] = convert_to_float(audblk->fbw_exp[i][j],mantissa); - } + samples[i][j] = coeff_get_float(audblk->fbw_bap[i][j], audblk->dithflag[i], audblk->fbw_exp[i][j]); - if(audblk->cplinu && audblk->chincpl[i] && !(done_cpl)) - { + if(audblk->cplinu && audblk->chincpl[i] && !(done_cpl)) { // ncplmant is equal to 12 * ncplsubnd - // Don't dither coupling channel until channel separation so that - // interchannel noise is uncorrelated + // Don't dither coupling channel until channel + // separation so that interchannel noise is uncorrelated for(j=audblk->cplstrtmant; j < audblk->cplendmant; j++) - audblk->cplmant[j] = coeff_get_mantissa(audblk->cpl_bap[j],0); + audblk->cpl_flt[j] = coeff_get_float(audblk->cpl_bap[j],0, audblk->cpl_exp[j]); done_cpl = 1; } } //uncouple the channel if necessary - if(audblk->cplinu) - { - for(i=0; i< bsi->nfchans; i++) - { + if(audblk->cplinu) { + for(i=0; i< bsi->nfchans; i++) { if(audblk->chincpl[i]) coeff_uncouple_ch(samples[i],bsi,audblk,i); } } - if(bsi->lfeon) - { + if(bsi->lfeon) { // There are always 7 mantissas for lfe, no dither for lfe for(j=0; j < 7 ; j++) - { - mantissa = coeff_get_mantissa(audblk->lfe_bap[j],0); - samples[5][j] = convert_to_float(audblk->lfe_exp[j],mantissa); - } + samples[5][j] = coeff_get_float(audblk->lfe_bap[j], 0, audblk->lfe_exp[j]); } } -// -//Fetch a mantissa from the bitstream -// -//The mantissa returned is a signed 0.15 fixed point number -// -static sint_16 -coeff_get_mantissa(uint_16 bap, uint_16 dithflag) + +/** + * Fetch a float from the bitstream + **/ + +static float inline coeff_get_float (uint16_t bap, uint16_t dithflag, uint16_t exp) { - uint_16 mantissa; - uint_16 group_code; + uint16_t dummy = 0; //If the bap is 0-5 then we have special cases to take care of - switch(bap) - { + switch(bap) { case 0: if(dithflag) - mantissa = dither_gen(); - else - mantissa = 0; - break; + return (dither_gen() * scale_factor[exp]); + + return 0.0; case 1: - if(m_1_pointer > 2) - { - group_code = bitstream_get(5); - - if(group_code > 26) - goto error; - - m_1[0] = group_code / 9; - m_1[1] = (group_code % 9) / 3; - m_1[2] = (group_code % 9) % 3; - m_1_pointer = 0; - } - mantissa = m_1[m_1_pointer++]; - mantissa = q_1[mantissa]; - break; + if (q_1_pointer >= 0) + return(q_1[q_1_pointer--] * scale_factor[exp]); + + if ((dummy = bitstream_get (5)) > 26) + goto error; + + q_1[1] = q_1_1[dummy]; + q_1[0] = q_1_2[dummy]; + q_1_pointer = 1; + + return (q_1_0[dummy] * scale_factor[exp]); + case 2: + if(q_2_pointer >= 0) + return (q_2[q_2_pointer--] * scale_factor[exp]); - if(m_2_pointer > 2) - { - group_code = bitstream_get(7); + if ((dummy = bitstream_get (7)) > 124) + goto error; - if(group_code > 124) - goto error; + q_2[1] = q_2_1[dummy]; + q_2[0] = q_2_2[dummy]; + q_2_pointer = 1; - m_2[0] = group_code / 25; - m_2[1] = (group_code % 25) / 5 ; - m_2[2] = (group_code % 25) % 5 ; - m_2_pointer = 0; - } - mantissa = m_2[m_2_pointer++]; - mantissa = q_2[mantissa]; - break; + return (q_2_0[dummy] * scale_factor[exp]); case 3: - mantissa = bitstream_get(3); - - if(mantissa > 6) + if ((dummy = bitstream_get (3)) > 6) goto error; - mantissa = q_3[mantissa]; - break; + return (q_3[dummy] * scale_factor[exp]); case 4: - if(m_4_pointer > 1) - { - group_code = bitstream_get(7); + if(q_4_pointer >= 0) + return (q_4[q_4_pointer--] * scale_factor[exp]); - if(group_code > 120) - goto error; + if ((dummy = bitstream_get (7)) > 120) + goto error; - m_4[0] = group_code / 11; - m_4[1] = group_code % 11; - m_4_pointer = 0; - } - mantissa = m_4[m_4_pointer++]; - mantissa = q_4[mantissa]; - break; + q_4[0] = q_4_1[dummy]; + q_4_pointer = 0; - case 5: - mantissa = bitstream_get(4); + return (q_4_0[dummy] * scale_factor[exp]); - if(mantissa > 14) + case 5: + if ((dummy = bitstream_get (4)) > 14) goto error; - mantissa = q_5[mantissa]; - break; + return (q_5[dummy] * scale_factor[exp]); default: - mantissa = bitstream_get(qnttztab[bap]); - mantissa <<= 16 - qnttztab[bap]; + dummy = bitstream_get(qnttztab[bap]); + dummy <<= 16 - qnttztab[bap]; + return ((int16_t)dummy * scale_factor[exp]); } - return mantissa; - - - error: - if(!error_flag) - fprintf(stderr,"** Invalid mantissa - skipping frame **\n"); - error_flag = 1; - - return 0; +#ifdef DEBUG + fprintf(stderr,"** Invalid mantissa - skipping frame **\n"); +#endif + HANDLE_ERROR(); } -// -// Reset the mantissa state -// -static void -coeff_reset(void) + +/** + * Reset the mantissa state + **/ + +static void coeff_reset(void) { - m_1[2] = m_1[1] = m_1[0] = 0; - m_2[2] = m_2[1] = m_2[0] = 0; - m_4[1] = m_4[0] = 0; - m_1_pointer = m_2_pointer = m_4_pointer = 3; + q_1_pointer = q_2_pointer = q_4_pointer = -1; } -// -// Uncouple the coupling channel into a fbw channel -// -static void -coeff_uncouple_ch(float samples[],bsi_t *bsi,audblk_t *audblk,uint_32 ch) + +/** + * Uncouple the coupling channel into a fbw channel + **/ + +static void coeff_uncouple_ch (float samples[],bsi_t *bsi,audblk_t *audblk,uint32_t ch) { - uint_32 bnd = 0; - uint_32 sub_bnd = 0; - uint_32 i,j; + uint32_t bnd = 0; + uint32_t sub_bnd = 0; + uint32_t i,j; float cpl_coord = 1.0; - uint_32 cpl_exp_tmp; - uint_32 cpl_mant_tmp; - sint_16 mantissa; - + uint32_t cpl_exp_tmp; + uint32_t cpl_mant_tmp; - for(i=audblk->cplstrtmant;i<audblk->cplendmant;) - { - if(!audblk->cplbndstrc[sub_bnd++]) - { + for (i=audblk->cplstrtmant;i<audblk->cplendmant;) { + if (!audblk->cplbndstrc[sub_bnd++]) { cpl_exp_tmp = audblk->cplcoexp[ch][bnd] + 3 * audblk->mstrcplco[ch]; - if(audblk->cplcoexp[ch][bnd] == 15) + if (audblk->cplcoexp[ch][bnd] == 15) cpl_mant_tmp = (audblk->cplcomant[ch][bnd]) << 11; else cpl_mant_tmp = ((0x10) | audblk->cplcomant[ch][bnd]) << 10; - cpl_coord = convert_to_float(cpl_exp_tmp,cpl_mant_tmp) * 8.0f; + cpl_coord = (cpl_mant_tmp * scale_factor[cpl_exp_tmp]) * 8.0f; //Invert the phase for the right channel if necessary if(bsi->acmod == 0x2 && audblk->phsflginu && ch == 1 && audblk->phsflg[bnd]) @@ -336,16 +417,13 @@ coeff_uncouple_ch(float samples[],bsi_t *bsi,audblk_t *audblk,uint_32 ch) bnd++; } - for(j=0;j < 12; j++) - { - //Get new dither values for each channel if necessary, so - //the channels are uncorrelated - if(audblk->dithflag[ch] && audblk->cpl_bap[i] == 0) - mantissa = dither_gen(); + for(j=0;j < 12; j++) { + // Get new dither values for each channel if necessary, + // so the channels are uncorrelated + if(audblk->dithflag[ch] && !audblk->cpl_bap[i]) + samples[i] = cpl_coord * (dither_gen() * scale_factor[audblk->cpl_exp[i]]); else - mantissa = audblk->cplmant[i]; - - samples[i] = cpl_coord * convert_to_float(audblk->cpl_exp[i],mantissa); + samples[i] = cpl_coord * audblk->cpl_flt[i]; i++; } diff --git a/ac3dec/cpu_accel.c b/ac3dec/cpu_accel.c new file mode 100644 index 00000000..e01db83f --- /dev/null +++ b/ac3dec/cpu_accel.c @@ -0,0 +1,129 @@ +/***** +* +* This file is part of the OMS program. +* +* This program is free software; you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation; either version 2, or (at your option) +* any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program; see the file COPYING. If not, write to +* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. +* +*****/ + +#include <inttypes.h> +#include "mm_accel.h" + +//#ifdef __i386__ +#if 0 + +#ifdef __PIC__ +#define cpuid(op, eax, ebx, ecx, edx) \ + __asm__ __volatile__ ("pushl %%ebx\n\t" \ + "cpuid\n\t" \ + "movl %%ebx, %%esi\n\t" \ + "popl %%ebx" \ + : "=a" (eax), \ + "=S" (ebx), \ + "=c" (ecx), \ + "=d" (edx) \ + : "a" (op) \ + : "cc") +#else +#define cpuid(op, eax, ebx, ecx, edx) \ + __asm__("cpuid" \ + : "=a" (eax), \ + "=b" (ebx), \ + "=c" (ecx), \ + "=d" (edx) \ + : "a" (op) \ + : "cc") +#endif + +static inline int has_cpuid () +{ + return 1; +/* + uint32_t eax, ebx; + + asm ("pushfl\n\t" + "popl %0\n\t" + "movl %0,%1\n\t" + "xorl $0x200000,%0\n\t" + "pushl %0\n\t" + "popfl\n\t" + "pushfl\n\t" + "popl %0" + : "=a" (eax), + "=b" (ebx) + : + : "cc"); + + return (eax != ebx); +*/ +} + + +static uint32_t x86_accel (void) +{ + uint32_t eax, ebx, ecx, edx; + int AMD; + uint32_t caps; + + if (!has_cpuid ()) // no cpuid + return 0; + + cpuid (0x00000000, eax, ebx, ecx, edx); + if (!eax) // vendor string only + return 0; + + AMD = (ebx == 0x68747541) && (ecx == 0x444d4163) && (edx == 0x69746e65); + + cpuid (0x00000001, eax, ebx, ecx, edx); + if (! (edx & 0x00800000)) // no MMX + return 0; + + caps = OMS_ACCEL_X86_MMX; + if (edx & 0x02000000) // SSE - identical to AMD MMX extensions + caps = OMS_ACCEL_X86_MMX | OMS_ACCEL_X86_MMXEXT; + + cpuid (0x80000000, eax, ebx, ecx, edx); + if (eax < 0x80000001) // no extended capabilities + return caps; + + cpuid (0x80000001, eax, ebx, ecx, edx); + + if (edx & 0x80000000) + caps |= OMS_ACCEL_X86_3DNOW; + + if (AMD && (edx & 0x00400000)) // AMD MMX extensions + caps |= OMS_ACCEL_X86_MMXEXT; + + return caps; +} +#endif + +uint32_t mm_accel (void) +{ +//#ifdef __i386__ +#if 0 + static int got_accel = 0; + static uint32_t accel; + + if (!got_accel) { + got_accel = 1; + accel = x86_accel (); + } + + return accel; +#else + return 0; +#endif +} diff --git a/ac3dec/crc.c b/ac3dec/crc.c index 3210ce7f..2d5ef806 100644 --- a/ac3dec/crc.c +++ b/ac3dec/crc.c @@ -30,7 +30,7 @@ #include "crc.h" -static const uint_16 crc_lut[256] = +static const uint16_t crc_lut[256] = { 0x0000,0x8005,0x800f,0x000a,0x801b,0x001e,0x0014,0x8011, 0x8033,0x0036,0x003c,0x8039,0x0028,0x802d,0x8027,0x0022, @@ -66,31 +66,31 @@ static const uint_16 crc_lut[256] = 0x8213,0x0216,0x021c,0x8219,0x0208,0x820d,0x8207,0x0202 }; -static uint_16 state; +static uint16_t state; -void -crc_init(void) + +void crc_init(void) { state = 0; } -inline void crc_process_byte(uint_8 data) +inline void crc_process_byte (uint8_t data) { state = crc_lut[data ^ (state>>8)] ^ (state<<8); } -void -crc_process_frame(uint_8 *data,uint_32 num_bytes) + +void crc_process_frame (uint8_t *data,uint32_t num_bytes) { - uint_32 i; + uint32_t i; - for(i=0;i<num_bytes;i++) - crc_process_byte(data[i]); + for(i=0; i<num_bytes; i++) + crc_process_byte (data[i]); } -int -crc_validate(void) + +int crc_validate(void) { - return(state == 0); + return (state == 0); } diff --git a/ac3dec/crc.h b/ac3dec/crc.h index 07d57b15..16489656 100644 --- a/ac3dec/crc.h +++ b/ac3dec/crc.h @@ -23,5 +23,5 @@ int crc_validate(void); void crc_init(void); -void crc_process_byte(uint_8 data); -void crc_process_frame(uint_8 *data,uint_32 num_bytes); +void crc_process_byte(uint8_t data); +void crc_process_frame(uint8_t *data,uint32_t num_bytes); diff --git a/ac3dec/debug.c b/ac3dec/debug.c index b7d6a3b0..b84511d7 100644 --- a/ac3dec/debug.c +++ b/ac3dec/debug.c @@ -31,18 +31,8 @@ static int debug_level = -1; // We could potentially have multiple levels of debug info int debug_is_on(void) { - char *env_var; - - if(debug_level < 0) - { - env_var = getenv("AC3_DEBUG"); - - if (env_var) - { - debug_level = 1; - } - else - debug_level = 0; + if(debug_level < 0) { + debug_level = getenv ("AC3_DEBUG") ? 1 : 0; } return debug_level; @@ -52,7 +42,7 @@ int debug_is_on(void) #ifndef __GNUC__ void dprintf(char fmt[],...) { - int foo = 0; + return; } #endif diff --git a/ac3dec/debug.h b/ac3dec/debug.h index f45cb5b1..691d4032 100644 --- a/ac3dec/debug.h +++ b/ac3dec/debug.h @@ -22,16 +22,20 @@ * */ -int debug_is_on(void); +int debug_is_on (void); #ifdef __GNUC__ -#define dprintf(format,args...)\ +#ifdef DEBUG +#define dprintf(args...)\ {\ if (debug_is_on())\ {\ - fprintf(stderr,format,## args);\ + fprintf(stderr, args);\ }\ } #else +#define dprintf(args...) { }; +#endif +#else void dprintf(char fmt[],...); #endif diff --git a/ac3dec/decode.c b/ac3dec/decode.c index 5e7f034b..fb852055 100644 --- a/ac3dec/decode.c +++ b/ac3dec/decode.c @@ -3,9 +3,6 @@ * * Copyright (C) Aaron Holtzman - May 1999 * - * Added support for DVB-s PCI card by: - * Matjaz Thaler <matjaz.thaler@rd.iskraemeco.si> - November 2000 - * * This file is part of ac3dec, a free Dolby AC-3 stream decoder. * * ac3dec is free software; you can redistribute it and/or modify @@ -22,6 +19,17 @@ * along with GNU Make; see the file COPYING. If not, write to * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. * + *------------------------------------------------------------ + * + * Thomas Mirlacher <dent@cosy.sbg.ac.at> + * added OMS support + * 11 Jan 2001 + * Thomas Mirlacher <dent@cosy.sbg.ac.at> + * faster error response using jmp functions + * + * 9 Aug 2001 + * Matjaz Thaler <matjaz.thaler@rd.iskraemeco.si> + * Added support for DVB-s PCI card * */ @@ -34,14 +42,17 @@ #include <errno.h> #include <string.h> #include <sys/time.h> -#include <unistd.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <fcntl.h> + +#ifdef __OMS__ +#include <oms/oms.h> +#include <oms/plugin/output_audio.h> +#endif #include "ac3.h" #include "ac3_internal.h" #include "bitstream.h" +#include "downmix.h" +#include "srfft.h" #include "imdct.h" #include "exponent.h" #include "coeff.h" @@ -51,96 +62,91 @@ #include "stats.h" #include "rematrix.h" #include "sanity_check.h" -#include "downmix.h" #include "debug.h" - -#define AC3_BUFFER_SIZE (6*1024*16) - +#ifndef __OMS__ +//#include "audio_out.h" +#endif //our global config structure ac3_config_t ac3_config; -uint_32 error_flag = 0; static audblk_t audblk; static bsi_t bsi; static syncinfo_t syncinfo; -static uint_32 frame_count = 0; -//static uint_32 is_output_initialized = 0; +#ifndef __OMS__ +static uint32_t done_banner; +#endif +static uint32_t is_output_initialized = 0; //the floating point samples for one audblk static stream_samples_t samples; //the integer samples for the entire frame (with enough space for 2 ch out) //if this size change, be sure to change the size when muting -static sint_16 s16_samples[2 * 6 * 256]; - +static int16_t s16_samples[2 * 6 * 256] __attribute__ ((aligned(16))); +// downmix stuff +static float cmixlev_lut[4] = { 0.707, 0.595, 0.500, 0.707 }; +static float smixlev_lut[4] = { 0.707, 0.500, 0.0 , 0.500 }; +static dm_par_t dm_par; + //Storage for the syncframe -#define SYNC_BUFFER_MAX_SIZE 4096 -static uint_8 sync_buffer[SYNC_BUFFER_MAX_SIZE]; -static uint_32 sync_buffer_size = 0;; +#define BUFFER_MAX_SIZE 4096 +static uint8_t buffer[BUFFER_MAX_SIZE]; +static uint32_t buffer_size = 0;; +// for error handling +jmp_buf error_jmp_mark; -uint_32 -decode_sync_buffer_syncframe(syncinfo_t *syncinfo, uint_8 **start,uint_8 *end) +static uint32_t decode_buffer_syncframe (syncinfo_t *syncinfo, uint8_t **start, uint8_t *end) { - uint_8 *cur = *start; - uint_16 syncword = syncinfo->syncword; - uint_32 ret = 0; + uint8_t *cur = *start; + uint16_t syncword = syncinfo->syncword; + uint32_t ret = 0; - // // Find an ac3 sync frame. - // -resync: - - while(syncword != 0x0b77) - { - if(cur >= end) + while (syncword != 0x0b77) { + if (cur >= end) goto done; syncword = (syncword << 8) + *cur++; } //need the next 3 bytes to decide how big the frame is - while(sync_buffer_size < 3) - { + while (buffer_size < 3) { if(cur >= end) goto done; - sync_buffer[sync_buffer_size++] = *cur++; + buffer[buffer_size++] = *cur++; } - parse_syncinfo(syncinfo,sync_buffer); - stats_print_syncinfo(syncinfo); + parse_syncinfo (syncinfo,buffer); + stats_print_syncinfo (syncinfo); - while(sync_buffer_size < syncinfo->frame_size * 2 - 2) - { + while (buffer_size < syncinfo->frame_size * 2 - 2) { if(cur >= end) goto done; - sync_buffer[sync_buffer_size++] = *cur++; + buffer[buffer_size++] = *cur++; } - + +#if 0 // Check the crc over the entire frame crc_init(); - crc_process_frame(sync_buffer,syncinfo->frame_size * 2 - 2); + crc_process_frame (buffer, syncinfo->frame_size * 2 - 2); - if(!crc_validate()) - { + if (!crc_validate()) { fprintf(stderr,"** CRC failed - skipping frame **\n"); - syncword = 0xffff; - sync_buffer_size = 0; - goto resync; + goto done; } - - // +#endif + //if we got to this point, we found a valid ac3 frame to decode - // - bitstream_init(sync_buffer); + bitstream_init (buffer); //get rid of the syncinfo struct as we already parsed it - bitstream_get(24); + bitstream_get (24); //reset the syncword for next time syncword = 0xffff; - sync_buffer_size = 0; + buffer_size = 0; ret = 1; done: @@ -149,100 +155,131 @@ done: return ret; } -void -decode_mute(void) + +void inline decode_mute (void) { - fprintf(stderr,"muting frame\n"); //mute the frame - memset(s16_samples,0,sizeof(sint_16) * 256 * 2 * 6); - error_flag = 0; + memset (s16_samples, 0, sizeof(int16_t) * 256 * 2 * 6); } -void -ac3_init(ac3_config_t *config) +void ac3dec_init (void) { - memcpy(&ac3_config,config,sizeof(ac3_config_t)); - - imdct_init(); - sanity_check_init(&syncinfo,&bsi,&audblk); - - // ac3_output = *foo; +// FIXME - don't do that statically here + ac3_config.num_output_ch = 2; + ac3_config.flags = 0; + + imdct_init (); + downmix_init (); + memset (&syncinfo, 0, sizeof (syncinfo)); + memset (&bsi, 0, sizeof (bsi)); + memset (&audblk, 0, sizeof (audblk)); + sanity_check_init (&syncinfo,&bsi,&audblk); } -uint_32 ac3_decode_data(uint_8 *data_start,uint_8 *data_end, int ac3reset, int *input_pointer, int *output_pointer, char *ac3_data) +#ifdef __OMS__ +size_t ac3dec_decode_data (plugin_output_audio_t *output, uint8_t *data_start, uint8_t *data_end) +#else +size_t ac3dec_decode_data (uint8_t *data_start ,uint8_t *data_end, int ac3reset, int *input_pointer, int *output_pointer, char *ac3_data) +#endif { - uint_32 i; + uint32_t i; int datasize; char *data; + if(ac3reset != 0){ syncinfo.syncword = 0xffff; - sync_buffer_size = 0; + buffer_size = 0; + } + + if (setjmp (error_jmp_mark) < 0) { + ac3dec_init (); + return 0; } - - while(decode_sync_buffer_syncframe(&syncinfo,&data_start,data_end)) - { - dprintf("(decode) begin frame %d\n",frame_count++); - if(error_flag) - { - decode_mute(); - continue; + while (decode_buffer_syncframe (&syncinfo, &data_start, data_end)) { + parse_bsi (&bsi); + +#ifndef __OMS__ + if(!done_banner) { + // stats_print_banner(&syncinfo,&bsi); + done_banner = 1; } +#endif + + // compute downmix parameters + // downmix to tow channels for now + dm_par.clev = 0.0; dm_par.slev = 0.0; dm_par.unit = 1.0; + if (bsi.acmod & 0x1) // have center + dm_par.clev = cmixlev_lut[bsi.cmixlev]; + + if (bsi.acmod & 0x4) // have surround channels + dm_par.slev = smixlev_lut[bsi.surmixlev]; - parse_bsi(&bsi); + dm_par.unit /= 1.0 + dm_par.clev + dm_par.slev; + dm_par.clev *= dm_par.unit; + dm_par.slev *= dm_par.unit; - for(i=0; i < 6; i++) - { + for(i=0; i < 6; i++) { //Initialize freq/time sample storage - memset(samples,0,sizeof(float) * 256 * (bsi.nfchans + bsi.lfeon)); + memset (samples, 0, sizeof(float) * 256 * (bsi.nfchans + bsi.lfeon)); // Extract most of the audblk info from the bitstream // (minus the mantissas - parse_audblk(&bsi,&audblk); + parse_audblk (&bsi,&audblk); // Take the differential exponent data and turn it into // absolute exponents - exponent_unpack(&bsi,&audblk); - if(error_flag) - goto error; + exponent_unpack (&bsi,&audblk); // Figure out how many bits per mantissa - bit_allocate(syncinfo.fscod,&bsi,&audblk); + bit_allocate (syncinfo.fscod,&bsi,&audblk); // Extract the mantissas from the stream and // generate floating point frequency coefficients - coeff_unpack(&bsi,&audblk,samples); - if(error_flag) - goto error; + coeff_unpack (&bsi,&audblk,samples); - if(bsi.acmod == 0x2) - rematrix(&audblk,samples); + if (bsi.acmod == 0x2) + rematrix (&audblk,samples); // Convert the frequency samples into time samples - imdct(&bsi,&audblk,samples); + imdct (&bsi,&audblk,samples, &s16_samples[i * 2 * 256], &dm_par); // Downmix into the requested number of channels - // and convert floating point to sint_16 - downmix(&bsi,samples,&s16_samples[i * 2 * 256]); + // and convert floating point to int16_t + // downmix(&bsi,samples,&s16_samples[i * 2 * 256]); - sanity_check(&syncinfo,&bsi,&audblk); - if(error_flag) - goto error; + if (sanity_check(&syncinfo,&bsi,&audblk) < 0) { + HANDLE_ERROR (); + return 0; + } continue; } - - parse_auxdata(&syncinfo); - /* - if(!is_output_initialized) - { - ac3_output.open(16,syncinfo.sampling_rate,2); + if (!is_output_initialized) { +#ifdef __OMS__ + plugin_output_audio_attr_t attr; +#ifdef __sun__ + attr.format = 16; +#else + attr.format = AFMT_S16_NE; +#endif + attr.speed = syncinfo.sampling_rate; + attr.channels = 2; + + // output->setup (&attr); +#else + // ao_functions->open (16, syncinfo.sampling_rate, 2); +#endif is_output_initialized = 1; } - */ + +#ifdef __OMS__ + output->write (s16_samples, 256 * 6 * 2 * 2); +#else + // ao_functions->play(s16_samples, 256 * 6 * 2); data = (char *)s16_samples; datasize = 0; while(datasize < 6144){ @@ -257,13 +294,11 @@ uint_32 ac3_decode_data(uint_8 *data_start,uint_8 *data_end, int ac3reset, int * } } +#endif - //write(1, s16_samples, 256 * 6 * 2* 2); - //ac3_output.play(s16_samples, 256 * 6 * 2); -error: - ; - //find a new frame } + decode_mute (); + return 0; } diff --git a/ac3dec/dither.c b/ac3dec/dither.c index 31e74f62..079c04d6 100644 --- a/ac3dec/dither.c +++ b/ac3dec/dither.c @@ -31,7 +31,7 @@ #include "dither.h" -const uint_16 dither_lut[256] = +const uint16_t dither_lut[256] = { 0x0000, 0xa011, 0xe033, 0x4022, 0x6077, 0xc066, 0x8044, 0x2055, 0xc0ee, 0x60ff, 0x20dd, 0x80cc, 0xa099, 0x0088, 0x40aa, 0xe0bb, @@ -67,7 +67,7 @@ const uint_16 dither_lut[256] = 0x8bf4, 0x2be5, 0x6bc7, 0xcbd6, 0xeb83, 0x4b92, 0x0bb0, 0xaba1 }; -uint_16 lfsr_state = 1; +uint16_t lfsr_state = 1; // // see dither_gen (inline-able) in dither.h @@ -89,18 +89,17 @@ uint_16 lfsr_state = 1; * */ -uint_16 dither_gen(void) +uint16_t dither_gen(void) { int i; - uint_32 state; + uint32_t state; //explicitly bring the state into a local var as gcc > 3.0? //doesn't know how to optimize out the stores state = lfsr_state; //Generate eight pseudo random bits - for(i=0;i<8;i++) - { + for(i=0;i<8;i++) { state <<= 1; if(state & 0x10000) @@ -109,7 +108,7 @@ uint_16 dither_gen(void) lfsr_state = state; - return (((((sint_32)state<<8)>>8) * (sint_32) (0.707106 * 256.0))>>16); + return (((((int32_t)state<<8)>>8) * (int32_t) (0.707106 * 256.0))>>16); } #endif diff --git a/ac3dec/dither.h b/ac3dec/dither.h index 6d68e1b5..abb9f518 100644 --- a/ac3dec/dither.h +++ b/ac3dec/dither.h @@ -22,16 +22,16 @@ */ -extern uint_16 lfsr_state; -extern const uint_16 dither_lut[256]; +extern uint16_t lfsr_state; +extern const uint16_t dither_lut[256]; -static inline uint_16 dither_gen(void) +static inline uint16_t dither_gen(void) { - sint_16 state; + int16_t state; state = dither_lut[lfsr_state >> 8] ^ (lfsr_state << 8); - lfsr_state = (uint_16) state; + lfsr_state = (uint16_t) state; - return ((state * (sint_32) (0.707106 * 256.0))>>8); + return ((state * (int32_t) (0.707106 * 256.0))>>8); } diff --git a/ac3dec/downmix.c b/ac3dec/downmix.c index 94bc51a8..8eac3e21 100644 --- a/ac3dec/downmix.c +++ b/ac3dec/downmix.c @@ -1,10 +1,7 @@ /* - * - * downmix.c + * imdct.c * - * Copyright (C) Aaron Holtzman - Sept 1999 - * - * Originally based on code by Yuqing Deng. + * Copyright (C) Aaron Holtzman - May 1999 * * This file is part of ac3dec, a free Dolby AC-3 stream decoder. * @@ -28,401 +25,65 @@ #include <stdlib.h> #include <stdio.h> #include <math.h> +#include <mm_accel.h> + #include "ac3.h" #include "ac3_internal.h" - -#include "decode.h" -#include "downmix.h" #include "debug.h" +#include "downmix.h" +#include "downmix_c.h" +#include "downmix_i386.h" +#ifdef HAVE_KNI +#include "downmix_kni.h" +#endif +void (*downmix_3f_2r_to_2ch)(float *samples, dm_par_t * dm_par); +void (*downmix_3f_1r_to_2ch)(float *samples, dm_par_t * dm_par); +void (*downmix_2f_2r_to_2ch)(float *samples, dm_par_t * dm_par); +void (*downmix_2f_1r_to_2ch)(float *samples, dm_par_t * dm_par); +void (*downmix_3f_0r_to_2ch)(float *samples, dm_par_t * dm_par); +void (*stream_sample_2ch_to_s16)(int16_t *s16_samples, float *left, float *right); +void (*stream_sample_1ch_to_s16)(int16_t *s16_samples, float *center); -//Pre-scaled downmix coefficients -static float cmixlev_lut[4] = { 0.2928, 0.2468, 0.2071, 0.2468 }; -static float smixlev_lut[4] = { 0.2928, 0.2071, 0.0 , 0.2071 }; -static void -downmix_3f_2r_to_2ch(bsi_t* bsi, stream_samples_t samples,sint_16 *s16_samples) +void downmix_init() { - uint_32 j; - float right_tmp; - float left_tmp; - float clev,slev; - float *centre = 0, *left = 0, *right = 0, *left_sur = 0, *right_sur = 0; - - left = samples[0]; - centre = samples[1]; - right = samples[2]; - left_sur = samples[3]; - right_sur = samples[4]; - - clev = cmixlev_lut[bsi->cmixlev]; - slev = smixlev_lut[bsi->surmixlev]; - -#if defined DOLBY_SURROUND - for (j = 0; j < 256; j++) +#ifdef __i386__ +#ifdef HAVE_KNI + uint32_t accel = mm_accel (); + +// other dowmixing should go here too + if (accel & MM_ACCEL_X86_MMXEXT) { + dprintf("Using SSE for downmix\n"); + downmix_3f_2r_to_2ch = downmix_3f_2r_to_2ch_kni; + downmix_2f_2r_to_2ch = downmix_2f_2r_to_2ch_kni; + downmix_3f_1r_to_2ch = downmix_3f_1r_to_2ch_kni; + downmix_2f_1r_to_2ch = downmix_2f_1r_to_2ch_kni; + downmix_3f_0r_to_2ch = downmix_3f_0r_to_2ch_kni; + stream_sample_2ch_to_s16 = stream_sample_2ch_to_s16_kni; + stream_sample_1ch_to_s16 = stream_sample_1ch_to_s16_kni; + } else if (accel & MM_ACCEL_X86_3DNOW) { + } else +#endif +#endif { - right_tmp = *left_sur++ + *right_sur++; - left_tmp = 1.4142f * *left++ + *centre - right_tmp; - right_tmp += 1.4142f * *right++ + *centre++; - - s16_samples[j * 2 ] = (sint_16) (left_tmp * 16000.0f); - s16_samples[j * 2 + 1] = (sint_16) (right_tmp * 16000.0f); - } + downmix_3f_2r_to_2ch = downmix_3f_2r_to_2ch_c; + downmix_2f_2r_to_2ch = downmix_2f_2r_to_2ch_c; + downmix_3f_1r_to_2ch = downmix_3f_1r_to_2ch_c; + downmix_2f_1r_to_2ch = downmix_2f_1r_to_2ch_c; + downmix_3f_0r_to_2ch = downmix_3f_0r_to_2ch_c; +#ifdef __i386__ +#if 1 + stream_sample_2ch_to_s16 = stream_sample_2ch_to_s16_c; + stream_sample_1ch_to_s16 = stream_sample_1ch_to_s16_c; #else - for (j = 0; j < 256; j++) - { - left_tmp = 0.4142f * *left++ + clev * *centre + slev * *left_sur++; - right_tmp= 0.4142f * *right++ + clev * *centre++ + slev * *right_sur++; - - s16_samples[j * 2 ] = (sint_16) (left_tmp * 32767.0f); - s16_samples[j * 2 + 1] = (sint_16) (right_tmp * 32767.0f); - } + stream_sample_2ch_to_s16 = stream_sample_2ch_to_s16_i386; + stream_sample_1ch_to_s16 = stream_sample_1ch_to_s16_i386; +#endif +#else + stream_sample_2ch_to_s16 = stream_sample_2ch_to_s16_c; + stream_sample_1ch_to_s16 = stream_sample_1ch_to_s16_c; #endif -} - -static void -downmix_2f_2r_to_2ch(bsi_t* bsi, stream_samples_t samples,sint_16 *s16_samples) -{ - uint_32 j; - float right_tmp; - float left_tmp; - float slev; - float *left = 0, *right = 0, *left_sur = 0, *right_sur = 0; - - left = samples[0]; - right = samples[1]; - left_sur = samples[2]; - right_sur = samples[3]; - - slev = smixlev_lut[bsi->surmixlev]; - - for (j = 0; j < 256; j++) - { - left_tmp = 0.4142f * *left++ + slev * *left_sur++; - right_tmp= 0.4142f * *right++ + slev * *right_sur++; - - s16_samples[j * 2 ] = (sint_16) (left_tmp * 32767.0f); - s16_samples[j * 2 + 1] = (sint_16) (right_tmp * 32767.0f); - } -} - -static void -downmix_3f_1r_to_2ch(bsi_t* bsi, stream_samples_t samples,sint_16 *s16_samples) -{ - uint_32 j; - float right_tmp; - float left_tmp; - float clev,slev; - float *centre = 0, *left = 0, *right = 0, *sur = 0; - - left = samples[0]; - centre = samples[1]; - right = samples[2]; - //Mono surround - sur = samples[3]; - - clev = cmixlev_lut[bsi->cmixlev]; - slev = smixlev_lut[bsi->surmixlev]; - - for (j = 0; j < 256; j++) - { - left_tmp = 0.4142f * *left++ + clev * *centre++ + slev * *sur; - right_tmp= 0.4142f * *right++ + clev * *centre + slev * *sur++; - - s16_samples[j * 2 ] = (sint_16) (left_tmp * 32767.0f); - s16_samples[j * 2 + 1] = (sint_16) (right_tmp * 32767.0f); - } -} - - -static void -downmix_2f_1r_to_2ch(bsi_t* bsi, stream_samples_t samples,sint_16 *s16_samples) -{ - uint_32 j; - float right_tmp; - float left_tmp; - float slev; - float *left = 0, *right = 0, *sur = 0; - - left = samples[0]; - right = samples[1]; - //Mono surround - sur = samples[2]; - - slev = smixlev_lut[bsi->surmixlev]; - - for (j = 0; j < 256; j++) - { - left_tmp = 0.4142f * *left++ + slev * *sur; - right_tmp= 0.4142f * *right++ + slev * *sur++; - - s16_samples[j * 2 ] = (sint_16) (left_tmp * 32767.0f); - s16_samples[j * 2 + 1] = (sint_16) (right_tmp * 32767.0f); - } -} - -static void -downmix_3f_0r_to_2ch(bsi_t* bsi, stream_samples_t samples,sint_16 *s16_samples) -{ - uint_32 j; - float right_tmp; - float left_tmp; - float clev; - float *centre = 0, *left = 0, *right = 0; - - left = samples[0]; - centre = samples[1]; - right = samples[2]; - - clev = cmixlev_lut[bsi->cmixlev]; - - for (j = 0; j < 256; j++) - { - left_tmp = 0.4142f * *left++ + clev * *centre; - right_tmp= 0.4142f * *right++ + clev * *centre++; - - s16_samples[j * 2 ] = (sint_16) (left_tmp * 32767.0f); - s16_samples[j * 2 + 1] = (sint_16) (right_tmp * 32767.0f); - } -} - -static void -downmix_2f_0r_to_2ch(bsi_t* bsi, stream_samples_t samples,sint_16 *s16_samples) -{ - uint_32 j; - float *left = 0, *right = 0; - - left = samples[0]; - right = samples[1]; - - for (j = 0; j < 256; j++) - { - s16_samples[j * 2 ] = (sint_16) (*left++ * 32767.0f); - s16_samples[j * 2 + 1] = (sint_16) (*right++ * 32767.0f); - } -} - -static void -downmix_1f_0r_to_2ch(float *centre,sint_16 *s16_samples) -{ - uint_32 j; - float tmp; - - //Mono program! - - for (j = 0; j < 256; j++) - { - tmp = 32767.0f * 0.7071f * *centre++; - - s16_samples[j * 2 ] = s16_samples[j * 2 + 1] = (sint_16) tmp; - } -} - -// -// Downmix into 2 or 4 channels (4 ch isn't in quite yet) -// -// The downmix function names have the following format -// -// downmix_Xf_Yr_to_[2|4]ch[_dolby] -// -// where X = number of front channels -// Y = number of rear channels -// [2|4] = number of output channels -// [_dolby] = with or without dolby surround mix -// - -void downmix(bsi_t* bsi, stream_samples_t samples,sint_16 *s16_samples) -{ - if(bsi->acmod > 7) - dprintf("(downmix) invalid acmod number\n"); - - // - //There are two main cases, with or without Dolby Surround - // - if(ac3_config.flags & AC3_DOLBY_SURR_ENABLE) - { - fprintf(stderr,"Dolby Surround Mixes not currently enabled\n"); - exit(1); - } - - //Non-Dolby surround downmixes - switch(bsi->acmod) - { - // 3/2 - case 7: - downmix_3f_2r_to_2ch(bsi,samples,s16_samples); - break; - - // 2/2 - case 6: - downmix_2f_2r_to_2ch(bsi,samples,s16_samples); - break; - - // 3/1 - case 5: - downmix_3f_1r_to_2ch(bsi,samples,s16_samples); - break; - - // 2/1 - case 4: - downmix_2f_1r_to_2ch(bsi,samples,s16_samples); - break; - - // 3/0 - case 3: - downmix_3f_0r_to_2ch(bsi,samples,s16_samples); - break; - - case 2: - downmix_2f_0r_to_2ch(bsi,samples,s16_samples); - break; - - // 1/0 - case 1: - downmix_1f_0r_to_2ch(samples[0],s16_samples); - break; - - // 1+1 - case 0: - downmix_1f_0r_to_2ch(samples[ac3_config.dual_mono_ch_sel],s16_samples); - break; } } - - - -#if 0 - - //the dolby mixes lay here for the time being - switch(bsi->acmod) - { - // 3/2 - case 7: - left = samples[0]; - centre = samples[1]; - right = samples[2]; - left_sur = samples[3]; - right_sur = samples[4]; - - for (j = 0; j < 256; j++) - { - right_tmp = 0.2265f * *left_sur++ + 0.2265f * *right_sur++; - left_tmp = -1 * right_tmp; - right_tmp += 0.3204f * *right++ + 0.2265f * *centre; - left_tmp += 0.3204f * *left++ + 0.2265f * *centre++; - - samples[1][j] = right_tmp; - samples[0][j] = left_tmp; - } - - break; - - // 2/2 - case 6: - left = samples[0]; - right = samples[1]; - left_sur = samples[2]; - right_sur = samples[3]; - - for (j = 0; j < 256; j++) - { - right_tmp = 0.2265f * *left_sur++ + 0.2265f * *right_sur++; - left_tmp = -1 * right_tmp; - right_tmp += 0.3204f * *right++; - left_tmp += 0.3204f * *left++ ; - - samples[1][j] = right_tmp; - samples[0][j] = left_tmp; - } - break; - - // 3/1 - case 5: - left = samples[0]; - centre = samples[1]; - right = samples[2]; - //Mono surround - right_sur = samples[3]; - - for (j = 0; j < 256; j++) - { - right_tmp = 0.2265f * *right_sur++; - left_tmp = -1 * right_tmp; - right_tmp += 0.3204f * *right++ + 0.2265f * *centre; - left_tmp += 0.3204f * *left++ + 0.2265f * *centre++; - - samples[1][j] = right_tmp; - samples[0][j] = left_tmp; - } - break; - - // 2/1 - case 4: - left = samples[0]; - right = samples[1]; - //Mono surround - right_sur = samples[2]; - - for (j = 0; j < 256; j++) - { - right_tmp = 0.2265f * *right_sur++; - left_tmp = -1 * right_tmp; - right_tmp += 0.3204f * *right++; - left_tmp += 0.3204f * *left++; - - samples[1][j] = right_tmp; - samples[0][j] = left_tmp; - } - break; - - // 3/0 - case 3: - left = samples[0]; - centre = samples[1]; - right = samples[2]; - - for (j = 0; j < 256; j++) - { - right_tmp = 0.3204f * *right++ + 0.2265f * *centre; - left_tmp = 0.3204f * *left++ + 0.2265f * *centre++; - - samples[1][j] = right_tmp; - samples[0][j] = left_tmp; - } - break; - - // 2/0 - case 2: - //Do nothing! - break; - - // 1/0 - case 1: - //Mono program! - right = samples[0]; - - for (j = 0; j < 256; j++) - { - right_tmp = 0.7071f * *right++; - - samples[1][j] = right_tmp; - samples[0][j] = right_tmp; - } - - break; - - // 1+1 - case 0: - //Dual mono, output selected by user - right = samples[ac3_config.dual_mono_ch_sel]; - - for (j = 0; j < 256; j++) - { - right_tmp = 0.7071f * *right++; - - samples[1][j] = right_tmp; - samples[0][j] = right_tmp; - } - break; -#endif diff --git a/ac3dec/downmix.h b/ac3dec/downmix.h index 7537c622..dcfe1e12 100644 --- a/ac3dec/downmix.h +++ b/ac3dec/downmix.h @@ -25,4 +25,10 @@ * */ -void downmix(bsi_t* bsi, stream_samples_t stream_samples,sint_16 *s16_samples); +typedef struct dm_par_s { + float unit; + float clev; + float slev; +} dm_par_t; + +void downmix_init(); diff --git a/ac3dec/downmix_c.c b/ac3dec/downmix_c.c new file mode 100644 index 00000000..875d785e --- /dev/null +++ b/ac3dec/downmix_c.c @@ -0,0 +1,161 @@ +/* + * downmix_c.c + * + * Copyright (C) Aaron Holtzman - May 1999 + * + * This file is part of ac3dec, a free Dolby AC-3 stream decoder. + * + * ac3dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * ac3dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * + * + */ + +#include <stdlib.h> +#include <stdio.h> +#include <math.h> +#include "ac3.h" +#include "ac3_internal.h" + +#include "debug.h" +#include "downmix.h" +#include "downmix_c.h" + + +void downmix_3f_2r_to_2ch_c (float *samples, dm_par_t *dm_par) +{ + int i; + float *left, *right, *center, *left_sur, *right_sur; + float left_tmp, right_tmp; + + left = samples; + right = samples + 256 * 2; + center = samples + 256; + left_sur = samples + 256 * 3; + right_sur = samples + 256 * 4; + + for (i=0; i < 256; i++) { +#if defined DOLBY_SURROUND + left_tmp = dm_par->unit * *left + dm_par->clev * *center - dm_par->slev * (*left_sur + *right_sur); + right_tmp= dm_par->unit * *right++ + dm_par->clev * *center + dm_par->slev * (*left_sur++ + *right_sur++); +#else + left_tmp = dm_par->unit * *left + dm_par->clev * *center + dm_par->slev * *left_sur++; + right_tmp= dm_par->unit * *right++ + dm_par->clev * *center + dm_par->slev * *right_sur++; +#endif + *left++ = left_tmp; + *center++ = right_tmp; + + } +} + + +void downmix_2f_2r_to_2ch_c (float *samples, dm_par_t *dm_par) +{ + int i; + float *left, *right, *left_sur, *right_sur; + float left_tmp, right_tmp; + + left = &samples[0]; + right = &samples[256]; + left_sur = &samples[512]; + right_sur = &samples[768]; + + for (i = 0; i < 256; i++) { + left_tmp = dm_par->unit * *left + dm_par->slev * *left_sur++; + right_tmp= dm_par->unit * *right + dm_par->slev * *right_sur++; + *left++ = left_tmp; + *right++ = right_tmp; + } +} + + +void downmix_3f_1r_to_2ch_c (float *samples, dm_par_t *dm_par) +{ + int i; + float *left, *right, *center, *right_sur; + float left_tmp, right_tmp; + + left = &samples[0]; + right = &samples[512]; + center = &samples[256]; + right_sur = &samples[768]; + + for (i = 0; i < 256; i++) { + left_tmp = dm_par->unit * *left + dm_par->clev * *center - dm_par->slev * *right_sur; + right_tmp= dm_par->unit * *right++ + dm_par->clev * *center + dm_par->slev * *right_sur++; + *left++ = left_tmp; + *center++ = right_tmp; + } +} + + +void downmix_2f_1r_to_2ch_c (float *samples, dm_par_t *dm_par) +{ + int i; + float *left, *right, *right_sur; + float left_tmp, right_tmp; + + left = &samples[0]; + right = &samples[256]; + right_sur = &samples[512]; + + for (i = 0; i < 256; i++) { + left_tmp = dm_par->unit * *left - dm_par->slev * *right_sur; + right_tmp= dm_par->unit * *right + dm_par->slev * *right_sur++; + *left++ = left_tmp; + *right++ = right_tmp; + } +} + + +void downmix_3f_0r_to_2ch_c (float *samples, dm_par_t *dm_par) +{ + int i; + float *left, *right, *center; + float left_tmp, right_tmp; + + left = &samples[0]; + center = &samples[256]; + right = &samples[512]; + + for (i = 0; i < 256; i++) { + left_tmp = dm_par->unit * *left + dm_par->clev * *center; + right_tmp= dm_par->unit * *right++ + dm_par->clev * *center; + *left++ = left_tmp; + *center++ = right_tmp; + } +} + + +void stream_sample_2ch_to_s16_c (int16_t *s16_samples, float *left, float *right) +{ + int i; + + for (i=0; i < 256; i++) { + *s16_samples++ = (int16_t) *left++; + *s16_samples++ = (int16_t) *right++; + } +} + + +void stream_sample_1ch_to_s16_c (int16_t *s16_samples, float *center) +{ + int i; + float tmp; + + for (i=0; i < 256; i++) { + *s16_samples++ = tmp = (int16_t) (0.7071f * *center++); + *s16_samples++ = tmp; + } +} diff --git a/ac3dec/downmix_c.h b/ac3dec/downmix_c.h new file mode 100644 index 00000000..3643f65e --- /dev/null +++ b/ac3dec/downmix_c.h @@ -0,0 +1,32 @@ +/***** +* +* This file is part of the OMS program. +* +* This program is free software; you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation; either version 2, or (at your option) +* any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program; see the file COPYING. If not, write to +* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. +* +*****/ + +#ifndef __DOWNMIX_C_H__ +#define __DOWNMIX_C_H__ + +void downmix_3f_2r_to_2ch_c(float *samples, dm_par_t * dm_par); +void downmix_3f_1r_to_2ch_c(float *samples, dm_par_t * dm_par); +void downmix_2f_2r_to_2ch_c(float *samples, dm_par_t * dm_par); +void downmix_2f_1r_to_2ch_c(float *samples, dm_par_t * dm_par); +void downmix_3f_0r_to_2ch_c(float *samples, dm_par_t * dm_par); +void stream_sample_2ch_to_s16_c(int16_t *s16_samples, float *left, float *right); +void stream_sample_1ch_to_s16_c(int16_t *s16_samples, float *center); + +#endif diff --git a/ac3dec/downmix_i386.S b/ac3dec/downmix_i386.S new file mode 100644 index 00000000..20f2e8c3 --- /dev/null +++ b/ac3dec/downmix_i386.S @@ -0,0 +1,92 @@ +/* This is basicly gcc generated. + * Only the floating point rounding mode loads and saves + * are removed in the stream_sample_to_s16 functions. + */ + +#ifdef __i386__ + + .file "downmix.c" + .version "01.01" +gcc2_compiled.: +.text + .align 4 +.globl stream_sample_2ch_to_s16_i386 + .type stream_sample_2ch_to_s16_i386,@function +stream_sample_2ch_to_s16_i386: + pushl %ebp + movl %esp,%ebp + subl $28,%esp + pushl %edi + pushl %esi + pushl %ebx + movl 8(%ebp),%edx + movl 12(%ebp),%ebx + movl 16(%ebp),%ecx + movl $255,%esi + .p2align 4,,7 +.L373: + flds (%ebx) + fistpl -8(%ebp) + movl -8(%ebp),%eax + movw %ax,(%edx) + addl $2,%edx + addl $4,%ebx + flds (%ecx) + fistpl -8(%ebp) + movl -8(%ebp),%eax + movw %ax,(%edx) + addl $4,%ecx + addl $2,%edx + decl %esi + jns .L373 + popl %ebx + popl %esi + popl %edi + leave + ret +.Lfe6: + .size stream_sample_2ch_to_s16_i386,.Lfe6-stream_sample_2ch_to_s16_i386 +.section .rodata + .align 4 +.LC46: + .long 0x3f350481 +.text + .align 4 +.globl stream_sample_1ch_to_s16_i386 + .type stream_sample_1ch_to_s16_i386,@function +stream_sample_1ch_to_s16_i386: + pushl %ebp + movl %esp,%ebp + subl $16,%esp + pushl %esi + pushl %ebx + movl 8(%ebp),%edx + movl 12(%ebp),%ecx + flds .LC46 + movl $255,%ebx + .p2align 4,,7 +.L379: + flds (%ecx) + fmul %st(1),%st + fistpl -8(%ebp) + movl -8(%ebp),%eax + movw %ax,-2(%ebp) + addl $4,%ecx + flds -2(%ebp) + fistpl -8(%ebp) + movl -8(%ebp),%eax + movw %ax,(%edx) + addl $2,%edx + movw %ax,(%edx) + addl $2,%edx + decl %ebx + jns .L379 + fstp %st(0) + popl %ebx + popl %esi + leave + ret +.Lfe7: + .size stream_sample_1ch_to_s16_i386,.Lfe7-stream_sample_1ch_to_s16_i386 + .ident "GCC: (GNU) 2.95.3 19991030 (prerelease)" +#endif diff --git a/ac3dec/downmix_i386.h b/ac3dec/downmix_i386.h new file mode 100644 index 00000000..e34d7d6c --- /dev/null +++ b/ac3dec/downmix_i386.h @@ -0,0 +1,27 @@ +/***** +* +* This file is part of the OMS program. +* +* This program is free software; you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation; either version 2, or (at your option) +* any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program; see the file COPYING. If not, write to +* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. +* +*****/ + +#ifndef __DOWNMIX_I386_H__ +#define __DOWNMIX_I386_H__ + +void stream_sample_2ch_to_s16_i386(int16_t *s16_samples, float *left, float *right); +void stream_sample_1ch_to_s16_i386(int16_t *s16_samples, float *center); + +#endif diff --git a/ac3dec/downmix_kni.S b/ac3dec/downmix_kni.S new file mode 100644 index 00000000..7df8c060 --- /dev/null +++ b/ac3dec/downmix_kni.S @@ -0,0 +1,396 @@ +/* + * downmix_kni.S + * + * Copyright (C) Yuqing Deng <Yuqing_Deng@brown.edu> - October 2000 + * + * + * downmix_kni.S is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * downmix_kni.S is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + +#ifdef __i386__ + +.section .rodata + .align 4 +sqrt2: .float 0f0.7071068 + .p2align 5,0, + + .section .text + + .align 4 + .global downmix_3f_2r_to_2ch_kni + .type downmix_3f_2r_to_2ch_kni, @function + +downmix_3f_2r_to_2ch_kni: + pushl %ebp + movl %esp, %ebp + + pushl %eax + pushl %ebx + pushl %ecx + + movl 8(%ebp), %eax /* samples[] */ + movl 12(%ebp), %ebx /* &dm_par */ + movl $64, %ecx /* loop counter */ + + movss (%ebx), %xmm5 /* unit */ + shufps $0, %xmm5, %xmm5 /* unit | unit | unit | unit */ + + movss 4(%ebx), %xmm6 /* clev */ + shufps $0, %xmm6, %xmm6 /* clev | clev | clev | clev */ + + movss 8(%ebx), %xmm7 /* slev */ + shufps $0, %xmm7, %xmm7 /* slev | slev | slev | slev */ + +.loop: + movaps (%eax), %xmm0 /* left */ + movaps 2048(%eax), %xmm1 /* right */ + movaps 1024(%eax), %xmm2 /* center */ + mulps %xmm5, %xmm0 + mulps %xmm5, %xmm1 + + mulps %xmm6, %xmm2 + movaps 3072(%eax), %xmm3 /* leftsur */ + movaps 4096(%eax), %xmm4 /* rithgsur */ + addps %xmm2, %xmm0 + addps %xmm2, %xmm1 + + mulps %xmm7, %xmm3 + mulps %xmm7, %xmm4 + addps %xmm3, %xmm0 + addps %xmm4, %xmm1 + + movaps %xmm0, (%eax) + movaps %xmm1, 1024(%eax) + + addl $16, %eax + decl %ecx + jnz .loop + + popl %ecx + popl %ebx + popl %eax + + leave + ret + .p2align 4,,7 + + .global downmix_2f_2r_to_2ch_kni + .type downmix_2f_2r_to_2ch_kni, @function + +downmix_2f_2r_to_2ch_kni: + pushl %ebp + movl %esp, %ebp + + pushl %eax + pushl %ebx + pushl %ecx + + movl 8(%ebp), %eax /* samples[] */ + movl 12(%ebp), %ebx /* &dm_par */ + movl $64, %ecx /* loop counter */ + + movss (%ebx), %xmm5 /* unit */ + shufps $0, %xmm5, %xmm5 /* unit | unit | unit | unit */ + + movss 8(%ebx), %xmm7 /* slev */ + shufps $0, %xmm7, %xmm7 /* slev | slev | slev | slev */ + +.loop3: + movaps (%eax), %xmm0 /* left */ + movaps 1024(%eax), %xmm1 /* right */ + movaps 2048(%eax), %xmm3 /* leftsur */ + mulps %xmm5, %xmm0 + mulps %xmm5, %xmm1 + + movaps 3072(%eax), %xmm4 /* rightsur */ + + mulps %xmm7, %xmm3 + mulps %xmm7, %xmm4 + addps %xmm3, %xmm0 + addps %xmm4, %xmm1 + + movaps %xmm0, (%eax) + movaps %xmm1, 1024(%eax) + + addl $16, %eax + decl %ecx + jnz .loop3 + + popl %ecx + popl %ebx + popl %eax + + leave + ret + .p2align 4,,7 + + .global downmix_3f_1r_to_2ch_kni + .type downmix_3f_1r_to_2ch_kni, @function + +downmix_3f_1r_to_2ch_kni: + pushl %ebp + movl %esp, %ebp + + pushl %eax + pushl %ebx + pushl %ecx + + movl 8(%ebp), %eax /* samples[] */ + movl 12(%ebp), %ebx /* &dm_par */ + movl $64, %ecx /* loop counter */ + + movss (%ebx), %xmm5 /* unit */ + shufps $0, %xmm5, %xmm5 /* unit | unit | unit | unit */ + + movss 4(%ebx), %xmm6 /* clev */ + shufps $0, %xmm6, %xmm6 /* clev | clev | clev | clev */ + + movss 8(%ebx), %xmm7 /* slev */ + shufps $0, %xmm7, %xmm7 /* slev | slev | slev | slev */ + +.loop4: + movaps (%eax), %xmm0 /* left */ + movaps 2048(%eax), %xmm1 /* right */ + movaps 1024(%eax), %xmm2 /* center */ + mulps %xmm5, %xmm0 + mulps %xmm5, %xmm1 + + mulps %xmm6, %xmm2 + movaps 3072(%eax), %xmm3 /* sur */ + + addps %xmm2, %xmm0 + mulps %xmm7, %xmm3 + + addps %xmm2, %xmm1 + + subps %xmm3, %xmm0 + addps %xmm3, %xmm1 + + movaps %xmm0, (%eax) + movaps %xmm1, 1024(%eax) + + addl $16, %eax + decl %ecx + jnz .loop4 + + popl %ecx + popl %ebx + popl %eax + + leave + ret + .p2align 4,,7 + + .global downmix_2f_1r_to_2ch_kni + .type downmix_2f_1r_to_2ch_kni, @function + +downmix_2f_1r_to_2ch_kni: + pushl %ebp + movl %esp, %ebp + + pushl %eax + pushl %ebx + pushl %ecx + + movl 8(%ebp), %eax /* samples[] */ + movl 12(%ebp), %ebx /* &dm_par */ + movl $64, %ecx /* loop counter */ + + movss (%ebx), %xmm5 /* unit */ + shufps $0, %xmm5, %xmm5 /* unit | unit | unit | unit */ + + movss 8(%ebx), %xmm7 /* slev */ + shufps $0, %xmm7, %xmm7 /* slev | slev | slev | slev */ + +.loop5: + movaps (%eax), %xmm0 /* left */ + movaps 1024(%eax), %xmm1 /* right */ + + mulps %xmm5, %xmm0 + mulps %xmm5, %xmm1 + + movaps 2048(%eax), %xmm3 /* sur */ + + mulps %xmm7, %xmm3 + + subps %xmm3, %xmm0 + addps %xmm3, %xmm1 + + movaps %xmm0, (%eax) + movaps %xmm1, 1024(%eax) + + addl $16, %eax + decl %ecx + jnz .loop5 + + popl %ecx + popl %ebx + popl %eax + + leave + ret + .p2align 4,,7 + + .global downmix_3f_0r_to_2ch_kni + .type downmix_3f_0r_to_2ch_kni, @function + +downmix_3f_0r_to_2ch_kni: + pushl %ebp + movl %esp, %ebp + + pushl %eax + pushl %ebx + pushl %ecx + + movl 8(%ebp), %eax /* samples[] */ + movl 12(%ebp), %ebx /* &dm_par */ + movl $64, %ecx /* loop counter */ + + movss (%ebx), %xmm5 /* unit */ + shufps $0, %xmm5, %xmm5 /* unit | unit | unit | unit */ + + movss 4(%ebx), %xmm6 /* clev */ + shufps $0, %xmm6, %xmm6 /* clev | clev | clev | clev */ + + +.loop6: + movaps (%eax), %xmm0 /* left */ + movaps 2048(%eax), %xmm1 /* right */ + movaps 1024(%eax), %xmm2 /* center */ + mulps %xmm5, %xmm0 + mulps %xmm5, %xmm1 + + mulps %xmm6, %xmm2 + + addps %xmm2, %xmm0 + + addps %xmm2, %xmm1 + + movaps %xmm0, (%eax) + movaps %xmm1, 1024(%eax) + + addl $16, %eax + decl %ecx + jnz .loop6 + + popl %ecx + popl %ebx + popl %eax + + leave + ret + .p2align 4,,7 + + .global stream_sample_2ch_to_s16_kni + .type stream_sample_2ch_to_s16_kni, @function + +stream_sample_2ch_to_s16_kni: + pushl %ebp + movl %esp, %ebp + + pushl %eax + pushl %ebx + pushl %edx + pushl %ecx + + movl 8(%ebp), %eax /* s16_samples */ + movl 12(%ebp), %ebx /* left */ + movl 16(%ebp), %edx /* right */ + movl $64, %ecx + +.loop1: + movaps (%ebx), %xmm0 /* l3 | l2 | l1 | l0 */ + movaps (%edx), %xmm1 /* r3 | r2 | r1 | r0 */ + movhlps %xmm0, %xmm2 /* l3 | l2 */ + movhlps %xmm1, %xmm3 /* r3 | r2 */ + unpcklps %xmm1, %xmm0 /* r1 | l1 | r0 | l0 */ + unpcklps %xmm3, %xmm2 /* r3 | l3 | r2 | l2 */ + + cvtps2pi %xmm0, %mm0 /* r0 l0 --> mm0, int_32 */ + movhlps %xmm0, %xmm0 + cvtps2pi %xmm0, %mm1 /* r1 l1 --> mm1, int_32 */ + + cvtps2pi %xmm2, %mm2 /* r2 l2 --> mm2, int_32 */ + movhlps %xmm2, %xmm2 + cvtps2pi %xmm2, %mm3 /* r3 l3 --> mm3, int_32 */ + packssdw %mm1, %mm0 /* r1 l1 r0 l0 --> mm0, int_16 */ + packssdw %mm3, %mm2 /* r3 l3 r2 l2 --> mm2, int_16 */ + + movq %mm0, (%eax) + movq %mm2, 8(%eax) + addl $16, %eax + addl $16, %ebx + addl $16, %edx + + decl %ecx + jnz .loop1 + + popl %ecx + popl %edx + popl %ebx + popl %eax + + emms + + leave + ret + .p2align 4,,7 + + .global stream_sample_1ch_to_s16_kni + .type stream_sample_1ch_to_s16_kni, @function + +stream_sample_1ch_to_s16_kni: + pushl %ebp + movl %esp, %ebp + + pushl %eax + pushl %ebx + pushl %ecx + + movl $sqrt2, %eax + movss (%eax), %xmm7 + movl 8(%ebp), %eax /* s16_samples */ + movl 12(%ebp), %ebx /* left */ + shufps $0, %xmm7, %xmm7 + movl $64, %ecx + +.loop2: + movaps (%ebx), %xmm0 /* c3 | c2 | c1 | c0 */ + mulps %xmm7, %xmm0 + movhlps %xmm0, %xmm2 /* c3 | c2 */ + + cvtps2pi %xmm0, %mm0 /* c1 c0 --> mm0, int_32 */ + cvtps2pi %xmm2, %mm1 /* c3 c2 --> mm1, int_32 */ + + packssdw %mm0, %mm0 /* c1 c1 c0 c0 --> mm0, int_16 */ + packssdw %mm1, %mm1 /* c3 c3 c2 c2 --> mm1, int_16 */ + + movq %mm0, (%eax) + movq %mm1, 8(%eax) + addl $16, %eax + addl $16, %ebx + + decl %ecx + jnz .loop2 + + popl %ecx + popl %ebx + popl %eax + + emms + leave + ret +#endif diff --git a/ac3dec/downmix_kni.h b/ac3dec/downmix_kni.h new file mode 100644 index 00000000..323f2a75 --- /dev/null +++ b/ac3dec/downmix_kni.h @@ -0,0 +1,32 @@ +/***** +* +* This file is part of the OMS program. +* +* This program is free software; you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation; either version 2, or (at your option) +* any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program; see the file COPYING. If not, write to +* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. +* +*****/ + +#ifndef __DOWNMIX_KNI_H__ +#define __DOWNMIX_KNI_H__ + +void downmix_3f_2r_to_2ch_kni(float *samples, dm_par_t * dm_par); +void downmix_3f_1r_to_2ch_kni(float *samples, dm_par_t * dm_par); +void downmix_2f_2r_to_2ch_kni(float *samples, dm_par_t * dm_par); +void downmix_2f_1r_to_2ch_kni(float *samples, dm_par_t * dm_par); +void downmix_3f_0r_to_2ch_kni(float *samples, dm_par_t * dm_par); +void stream_sample_2ch_to_s16_kni(int16_t *s16_samples, float *left, float *right); +void stream_sample_1ch_to_s16_kni(int16_t *s16_samples, float *center); + +#endif diff --git a/ac3dec/exponent.c b/ac3dec/exponent.c index 98111a5b..16fca465 100644 --- a/ac3dec/exponent.c +++ b/ac3dec/exponent.c @@ -28,41 +28,43 @@ #include "ac3_internal.h" -#include "decode.h" #include "exponent.h" -static void exp_unpack_ch(uint_16 type,uint_16 expstr,uint_16 ngrps,uint_16 initial_exp, - uint_16 exps[], uint_16 *dest); +static inline void exp_unpack_ch(uint16_t type,uint16_t expstr,uint16_t ngrps,uint16_t initial_exp, uint16_t exps[], uint16_t *dest); -void -exponent_unpack( bsi_t *bsi, audblk_t *audblk) + +/** + * + **/ + +void exponent_unpack( bsi_t *bsi, audblk_t *audblk) { - uint_16 i; + uint16_t i; for(i=0; i< bsi->nfchans; i++) - exp_unpack_ch(UNPACK_FBW, audblk->chexpstr[i], audblk->nchgrps[i], audblk->exps[i][0], - &audblk->exps[i][1], audblk->fbw_exp[i]); + exp_unpack_ch(UNPACK_FBW, audblk->chexpstr[i], audblk->nchgrps[i], audblk->exps[i][0], &audblk->exps[i][1], audblk->fbw_exp[i]); if(audblk->cplinu) - exp_unpack_ch(UNPACK_CPL, audblk->cplexpstr, audblk->ncplgrps, audblk->cplabsexp << 1, - audblk->cplexps, &audblk->cpl_exp[audblk->cplstrtmant]); + exp_unpack_ch(UNPACK_CPL, audblk->cplexpstr, audblk->ncplgrps, audblk->cplabsexp << 1, audblk->cplexps, &audblk->cpl_exp[audblk->cplstrtmant]); if(bsi->lfeon) - exp_unpack_ch(UNPACK_LFE, audblk->lfeexpstr, 2, audblk->lfeexps[0], - &audblk->lfeexps[1], audblk->lfe_exp); + exp_unpack_ch(UNPACK_LFE, audblk->lfeexpstr, 2, audblk->lfeexps[0], &audblk->lfeexps[1], audblk->lfe_exp); } -static void -exp_unpack_ch(uint_16 type,uint_16 expstr,uint_16 ngrps,uint_16 initial_exp, - uint_16 exps[], uint_16 *dest) +/** + * + **/ + +static inline void exp_unpack_ch(uint16_t type,uint16_t expstr,uint16_t ngrps,uint16_t initial_exp, + uint16_t exps[], uint16_t *dest) { - uint_16 i,j; - sint_16 exp_acc; - sint_16 exp_1,exp_2,exp_3; + uint16_t i,j; + int16_t exp_acc; + int16_t exp_1,exp_2,exp_3; - if(expstr == EXP_REUSE) + if (expstr == EXP_REUSE) return; /* Handle the initial absolute exponent */ @@ -75,8 +77,7 @@ exp_unpack_ch(uint_16 type,uint_16 expstr,uint_16 ngrps,uint_16 initial_exp, dest[j++] = exp_acc; /* Loop through the groups and fill the dest array appropriately */ - for(i=0; i< ngrps; i++) - { + for(i=0; i< ngrps; i++) { if(exps[i] > 124) goto error; @@ -86,8 +87,7 @@ exp_unpack_ch(uint_16 type,uint_16 expstr,uint_16 ngrps,uint_16 initial_exp, exp_acc += (exp_1 - 2); - switch(expstr) - { + switch(expstr) { case EXP_D45: dest[j++] = exp_acc; dest[j++] = exp_acc; @@ -99,8 +99,7 @@ exp_unpack_ch(uint_16 type,uint_16 expstr,uint_16 ngrps,uint_16 initial_exp, exp_acc += (exp_2 - 2); - switch(expstr) - { + switch(expstr) { case EXP_D45: dest[j++] = exp_acc; dest[j++] = exp_acc; @@ -112,8 +111,7 @@ exp_unpack_ch(uint_16 type,uint_16 expstr,uint_16 ngrps,uint_16 initial_exp, exp_acc += (exp_3 - 2); - switch(expstr) - { + switch(expstr) { case EXP_D45: dest[j++] = exp_acc; dest[j++] = exp_acc; @@ -125,11 +123,10 @@ exp_unpack_ch(uint_16 type,uint_16 expstr,uint_16 ngrps,uint_16 initial_exp, } return; - - goto error; error: - if(!error_flag) - fprintf(stderr,"** Invalid exponent - skipping frame **\n"); - error_flag = 1; +#ifdef DEBUG + fprintf (stderr,"** Invalid exponent - skipping frame **\n"); +#endif + HANDLE_ERROR (); } diff --git a/ac3dec/imdct.c b/ac3dec/imdct.c index ae2794ed..c09ef413 100644 --- a/ac3dec/imdct.c +++ b/ac3dec/imdct.c @@ -28,75 +28,48 @@ #include "ac3.h" #include "ac3_internal.h" - -#include "decode.h" +#include "downmix.h" #include "imdct.h" +#include "imdct_c.h" +#ifdef HAVE_KNI +#include "imdct_kni.h" +#endif +#include "srfft.h" -void imdct_do_256(float data[],float delay[]); -void imdct_do_512(float data[],float delay[]); -typedef struct complex_s -{ - float real; - float imag; -} complex_t; +extern void (*downmix_3f_2r_to_2ch)(float *samples, dm_par_t * dm_par); +extern void (*downmix_3f_1r_to_2ch)(float *samples, dm_par_t * dm_par); +extern void (*downmix_2f_2r_to_2ch)(float *samples, dm_par_t * dm_par); +extern void (*downmix_2f_1r_to_2ch)(float *samples, dm_par_t * dm_par); +extern void (*downmix_3f_0r_to_2ch)(float *samples, dm_par_t * dm_par); + +extern void (*stream_sample_2ch_to_s16)(int16_t *s16_samples, float *left, float *right); +extern void (*stream_sample_1ch_to_s16)(int16_t *s16_samples, float *center); + +void (*fft_64p) (complex_t *); +void (*imdct_do_512) (float data[],float delay[]); +void (*imdct_do_512_nol) (float data[], float delay[]); + +void imdct_do_256 (float data[],float delay[]); #define N 512 +/* static complex_t buf[128]; */ +//static complex_t buf[128] __attribute__((aligned(16))); +complex_t buf[128] __attribute__((aligned(16))); -/* 128 point bit-reverse LUT */ -static uint_8 bit_reverse_512[128] = { - 0x00, 0x40, 0x20, 0x60, 0x10, 0x50, 0x30, 0x70, - 0x08, 0x48, 0x28, 0x68, 0x18, 0x58, 0x38, 0x78, - 0x04, 0x44, 0x24, 0x64, 0x14, 0x54, 0x34, 0x74, - 0x0c, 0x4c, 0x2c, 0x6c, 0x1c, 0x5c, 0x3c, 0x7c, - 0x02, 0x42, 0x22, 0x62, 0x12, 0x52, 0x32, 0x72, - 0x0a, 0x4a, 0x2a, 0x6a, 0x1a, 0x5a, 0x3a, 0x7a, - 0x06, 0x46, 0x26, 0x66, 0x16, 0x56, 0x36, 0x76, - 0x0e, 0x4e, 0x2e, 0x6e, 0x1e, 0x5e, 0x3e, 0x7e, - 0x01, 0x41, 0x21, 0x61, 0x11, 0x51, 0x31, 0x71, - 0x09, 0x49, 0x29, 0x69, 0x19, 0x59, 0x39, 0x79, - 0x05, 0x45, 0x25, 0x65, 0x15, 0x55, 0x35, 0x75, - 0x0d, 0x4d, 0x2d, 0x6d, 0x1d, 0x5d, 0x3d, 0x7d, - 0x03, 0x43, 0x23, 0x63, 0x13, 0x53, 0x33, 0x73, - 0x0b, 0x4b, 0x2b, 0x6b, 0x1b, 0x5b, 0x3b, 0x7b, - 0x07, 0x47, 0x27, 0x67, 0x17, 0x57, 0x37, 0x77, - 0x0f, 0x4f, 0x2f, 0x6f, 0x1f, 0x5f, 0x3f, 0x7f}; - -static uint_8 bit_reverse_256[64] = { - 0x00, 0x20, 0x10, 0x30, 0x08, 0x28, 0x18, 0x38, - 0x04, 0x24, 0x14, 0x34, 0x0c, 0x2c, 0x1c, 0x3c, - 0x02, 0x22, 0x12, 0x32, 0x0a, 0x2a, 0x1a, 0x3a, - 0x06, 0x26, 0x16, 0x36, 0x0e, 0x2e, 0x1e, 0x3e, - 0x01, 0x21, 0x11, 0x31, 0x09, 0x29, 0x19, 0x39, - 0x05, 0x25, 0x15, 0x35, 0x0d, 0x2d, 0x1d, 0x3d, - 0x03, 0x23, 0x13, 0x33, 0x0b, 0x2b, 0x1b, 0x3b, - 0x07, 0x27, 0x17, 0x37, 0x0f, 0x2f, 0x1f, 0x3f}; - -static complex_t buf[128]; - -/* Twiddle factor LUT */ -static complex_t *w[7]; -static complex_t w_1[1]; -static complex_t w_2[2]; -static complex_t w_4[4]; -static complex_t w_8[8]; -static complex_t w_16[16]; -static complex_t w_32[32]; -static complex_t w_64[64]; +/* Delay buffer for time domain interleaving */ +static float delay[6][256]; +static float delay1[6][256]; /* Twiddle factors for IMDCT */ -static float xcos1[128]; -static float xsin1[128]; static float xcos2[64]; static float xsin2[64]; -/* Delay buffer for time domain interleaving */ -static float delay[6][256]; - /* Windowing function for Modified DCT - Thank you acroread */ -static float window[] = { +//static float window[] = { +float window[] = { 0.00014, 0.00024, 0.00037, 0.00051, 0.00067, 0.00086, 0.00107, 0.00130, 0.00157, 0.00187, 0.00220, 0.00256, 0.00297, 0.00341, 0.00390, 0.00443, 0.00501, 0.00564, 0.00632, 0.00706, 0.00785, 0.00871, 0.00962, 0.01061, @@ -128,146 +101,116 @@ static float window[] = { 0.99978, 0.99981, 0.99984, 0.99986, 0.99988, 0.99990, 0.99992, 0.99993, 0.99994, 0.99995, 0.99996, 0.99997, 0.99998, 0.99998, 0.99998, 0.99999, 0.99999, 0.99999, 0.99999, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, - 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000 }; - + 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000 +}; -static inline void swap_cmplx(complex_t *a, complex_t *b) +//static const int pm128[128] = +const int pm128[128] = { - complex_t tmp; - - tmp = *a; - *a = *b; - *b = tmp; -} - - - -static inline complex_t cmplx_mult(complex_t a, complex_t b) + 0, 16, 32, 48, 64, 80, 96, 112, 8, 40, 72, 104, 24, 56, 88, 120, + 4, 20, 36, 52, 68, 84, 100, 116, 12, 28, 44, 60, 76, 92, 108, 124, + 2, 18, 34, 50, 66, 82, 98, 114, 10, 42, 74, 106, 26, 58, 90, 122, + 6, 22, 38, 54, 70, 86, 102, 118, 14, 46, 78, 110, 30, 62, 94, 126, + 1, 17, 33, 49, 65, 81, 97, 113, 9, 41, 73, 105, 25, 57, 89, 121, + 5, 21, 37, 53, 69, 85, 101, 117, 13, 29, 45, 61, 77, 93, 109, 125, + 3, 19, 35, 51, 67, 83, 99, 115, 11, 43, 75, 107, 27, 59, 91, 123, + 7, 23, 39, 55, 71, 87, 103, 119, 15, 31, 47, 63, 79, 95, 111, 127 +}; + +static const int pm64[64] = { - complex_t ret; - - ret.real = a.real * b.real - a.imag * b.imag; - ret.imag = a.real * b.imag + a.imag * b.real; - - return ret; -} - -void imdct_init(void) -{ - int i,k; - complex_t angle_step; - complex_t current_angle; - - /* Twiddle factors to turn IFFT into IMDCT */ - for( i=0; i < 128; i++) - { - xcos1[i] = -cos(2.0f * M_PI * (8*i+1)/(8*N)) ; - xsin1[i] = -sin(2.0f * M_PI * (8*i+1)/(8*N)) ; - } - - /* More twiddle factors to turn IFFT into IMDCT */ - for( i=0; i < 64; i++) - { - xcos2[i] = -cos(2.0f * M_PI * (8*i+1)/(4*N)) ; - xsin2[i] = -sin(2.0f * M_PI * (8*i+1)/(4*N)) ; - } - - /* Canonical twiddle factors for FFT */ - w[0] = w_1; - w[1] = w_2; - w[2] = w_4; - w[3] = w_8; - w[4] = w_16; - w[5] = w_32; - w[6] = w_64; - - for( i = 0; i < 7; i++) - { - angle_step.real = cos(-2.0 * M_PI / (1 << (i+1))); - angle_step.imag = sin(-2.0 * M_PI / (1 << (i+1))); - - current_angle.real = 1.0; - current_angle.imag = 0.0; - - for (k = 0; k < 1 << i; k++) - { - w[i][k] = current_angle; - current_angle = cmplx_mult(current_angle,angle_step); - } + 0, 8, 16, 24, 32, 40, 48, 56, + 4, 20, 36, 52, 12, 28, 44, 60, + 2, 10, 18, 26, 34, 42, 50, 58, + 6, 14, 22, 30, 38, 46, 54, 62, + 1, 9, 17, 25, 33, 41, 49, 57, + 5, 21, 37, 53, 13, 29, 45, 61, + 3, 11, 19, 27, 35, 43, 51, 59, + 7, 23, 39, 55, 15, 31, 47, 63 +}; + + +void imdct_init (void) + { + int i; + float scale = 255.99609372; + +#ifdef __i386__ +#ifdef HAVE_KNI + if (!imdct_init_kni ()); + else +#endif +#endif + if (!imdct_init_c ()); + + // More twiddle factors to turn IFFT into IMDCT */ + for (i=0; i < 64; i++) { + xcos2[i] = cos(2.0f * M_PI * (8*i+1)/(4*N)) * scale; + xsin2[i] = sin(2.0f * M_PI * (8*i+1)/(4*N)) * scale; } } -void -imdct_do_512(float data[],float delay[]) + +void imdct_do_256 (float data[],float delay[]) { - int i,k; - int p,q; - int m; - int two_m; - int two_m_plus_one; + int i, j, k; + int p, q; float tmp_a_i; float tmp_a_r; - float tmp_b_i; - float tmp_b_r; float *data_ptr; float *delay_ptr; float *window_ptr; - - // - // 512 IMDCT with source and dest data in 'data' - // - - // Pre IFFT complex multiply plus IFFT cmplx conjugate and bit reverse - // permutation - for( i=0; i < 128; i++) - { - k = bit_reverse_512[i]; - - /* z[i] = (X[256-2*i-1] + j * X[2*i]) * (xcos1[i] + j * xsin1[i]) ; */ - buf[k].real = (data[256-2*i-1] * xcos1[i]) - (data[2*i] * xsin1[i]); - buf[k].imag = -1.0 * ((data[2*i] * xcos1[i]) + (data[256-2*i-1] * xsin1[i])); - } - // FFT Merge - for (m=0; m < 7; m++) - { - if(m) - two_m = (1 << m); - else - two_m = 1; - - two_m_plus_one = (1 << (m+1)); - - for(k = 0; k < two_m; k++) - { - for(i = 0; i < 128; i += two_m_plus_one) - { - p = k + i; - q = p + two_m; - tmp_a_r = buf[p].real; - tmp_a_i = buf[p].imag; - tmp_b_r = buf[q].real * w[m][k].real - buf[q].imag * w[m][k].imag; - tmp_b_i = buf[q].imag * w[m][k].real + buf[q].real * w[m][k].imag; - buf[p].real = tmp_a_r + tmp_b_r; - buf[p].imag = tmp_a_i + tmp_b_i; - buf[q].real = tmp_a_r - tmp_b_r; - buf[q].imag = tmp_a_i - tmp_b_i; + complex_t *buf1, *buf2; - } - } + buf1 = &buf[0]; + buf2 = &buf[64]; + +// Pre IFFT complex multiply plus IFFT complex conjugate + for (k=0; k<64; k++) { + /* X1[k] = X[2*k] */ + /* X2[k] = X[2*k+1] */ + + j = pm64[k]; + p = 2 * (128-2*j-1); + q = 2 * (2 * j); + + /* Z1[k] = (X1[128-2*k-1] + j * X1[2*k]) * (xcos2[k] + j * xsin2[k]); */ + buf1[k].re = data[p] * xcos2[j] - data[q] * xsin2[j]; + buf1[k].im = -1.0f * (data[q] * xcos2[j] + data[p] * xsin2[j]); + /* Z2[k] = (X2[128-2*k-1] + j * X2[2*k]) * (xcos2[k] + j * xsin2[k]); */ + buf2[k].re = data[p + 1] * xcos2[j] - data[q + 1] * xsin2[j]; + buf2[k].im = -1.0f * ( data[q + 1] * xcos2[j] + data[p + 1] * xsin2[j]); } - /* Post IFFT complex multiply plus IFFT complex conjugate*/ - for( i=0; i < 128; i++) - { - /* y[n] = z[n] * (xcos1[n] + j * xsin1[n]) ; */ - tmp_a_r = buf[i].real; - tmp_a_i = buf[i].imag; - //Note that I flipped the signs on the imaginary ops to do the complex conj - buf[i].real =(tmp_a_r * xcos1[i]) + (tmp_a_i * xsin1[i]); - buf[i].imag =(tmp_a_r * xsin1[i]) - (tmp_a_i * xcos1[i]); + fft_64p(&buf1[0]); + fft_64p(&buf2[0]); + +#ifdef DEBUG + //DEBUG FFT +#if 0 + printf ("Post FFT, buf1\n"); + for (i=0; i < 64; i++) + printf("%d %f %f\n", i, buf_1[i].re, buf_1[i].im); + printf ("Post FFT, buf2\n"); + for (i=0; i < 64; i++) + printf("%d %f %f\n", i, buf_2[i].re, buf_2[i].im); +#endif +#endif + + + // Post IFFT complex multiply + for( i=0; i < 64; i++) { + tmp_a_r = buf1[i].re; + tmp_a_i = -buf1[i].im; + buf1[i].re =(tmp_a_r * xcos2[i]) - (tmp_a_i * xsin2[i]); + buf1[i].im =(tmp_a_r * xsin2[i]) + (tmp_a_i * xcos2[i]); + tmp_a_r = buf2[i].re; + tmp_a_i = -buf2[i].im; + buf2[i].re =(tmp_a_r * xcos2[i]) - (tmp_a_i * xsin2[i]); + buf2[i].im =(tmp_a_r * xsin2[i]) + (tmp_a_i * xcos2[i]); } data_ptr = data; @@ -275,164 +218,122 @@ imdct_do_512(float data[],float delay[]) window_ptr = window; /* Window and convert to real valued signal */ - for(i=0; i< 64; i++) - { - *data_ptr++ = 2.0f * (-buf[64+i].imag * *window_ptr++ + *delay_ptr++); - *data_ptr++ = 2.0f * ( buf[64-i-1].real * *window_ptr++ + *delay_ptr++); + for(i=0; i< 64; i++) { + *data_ptr++ = -buf1[i].im * *window_ptr++ + *delay_ptr++; + *data_ptr++ = buf1[64-i-1].re * *window_ptr++ + *delay_ptr++; } - for(i=0; i< 64; i++) - { - *data_ptr++ = 2.0f * (-buf[i].real * *window_ptr++ + *delay_ptr++); - *data_ptr++ = 2.0f * ( buf[128-i-1].imag * *window_ptr++ + *delay_ptr++); + for(i=0; i< 64; i++) { + *data_ptr++ = -buf1[i].re * *window_ptr++ + *delay_ptr++; + *data_ptr++ = buf1[64-i-1].im * *window_ptr++ + *delay_ptr++; } - /* The trailing edge of the window goes into the delay line */ delay_ptr = delay; - for(i=0; i< 64; i++) - { - *delay_ptr++ = -buf[64+i].real * *--window_ptr; - *delay_ptr++ = buf[64-i-1].imag * *--window_ptr; + for(i=0; i< 64; i++) { + *delay_ptr++ = -buf2[i].re * *--window_ptr; + *delay_ptr++ = buf2[64-i-1].im * *--window_ptr; } - for(i=0; i<64; i++) - { - *delay_ptr++ = buf[i].imag * *--window_ptr; - *delay_ptr++ = -buf[128-i-1].real * *--window_ptr; + for(i=0; i< 64; i++) { + *delay_ptr++ = buf2[i].im * *--window_ptr; + *delay_ptr++ = -buf2[64-i-1].re * *--window_ptr; } } -void -imdct_do_256(float data[],float delay[]) + +/** + * + **/ + +void imdct_do_256_nol (float data[], float delay[]) { - int i,k; - int p,q; - int m; - int two_m; - int two_m_plus_one; + int i, j, k; + int p, q; float tmp_a_i; float tmp_a_r; - float tmp_b_i; - float tmp_b_r; float *data_ptr; float *delay_ptr; float *window_ptr; - complex_t *buf_1, *buf_2; - - buf_1 = &buf[0]; - buf_2 = &buf[64]; - - // Pre IFFT complex multiply plus IFFT cmplx conjugate and bit reverse - // permutation - for(i=0; i<64; i++) - { - /* X1[i] = X[2*i] */ - /* X2[i] = X[2*i+1] */ - - k = bit_reverse_256[i]; - - p = 2 * (128-2*i-1); - q = 2 * (2 * i); - - /* Z1[i] = (X1[128-2*i-1] + j * X1[2*i]) * (xcos2[i] + j * xsin2[i]); */ - buf_1[k].real = data[p] * xcos2[i] - data[q] * xsin2[i]; - buf_1[k].imag = -1.0f * (data[q] * xcos2[i] + data[p] * xsin2[i]); - /* Z2[i] = (X2[128-2*i-1] + j * X2[2*i]) * (xcos2[i] + j * xsin2[i]); */ - buf_2[k].real = data[p + 1] * xcos2[i] - data[q + 1] * xsin2[i]; - buf_2[k].imag = -1.0f * ( data[q + 1] * xcos2[i] + data[p + 1] * xsin2[i]); - } - - // FFT Merge - for (m=0; m < 6; m++) - { - two_m = (1 << m); - two_m_plus_one = (1 << (m+1)); - - if(m) - two_m = (1 << m); - else - two_m = 1; - - for(k = 0; k < two_m; k++) - { - for(i = 0; i < 64; i += two_m_plus_one) - { - p = k + i; - q = p + two_m; - //Do block 1 - tmp_a_r = buf_1[p].real; - tmp_a_i = buf_1[p].imag; - tmp_b_r = buf_1[q].real * w[m][k].real - buf_1[q].imag * w[m][k].imag; - tmp_b_i = buf_1[q].imag * w[m][k].real + buf_1[q].real * w[m][k].imag; - buf_1[p].real = tmp_a_r + tmp_b_r; - buf_1[p].imag = tmp_a_i + tmp_b_i; - buf_1[q].real = tmp_a_r - tmp_b_r; - buf_1[q].imag = tmp_a_i - tmp_b_i; - - //Do block 2 - tmp_a_r = buf_2[p].real; - tmp_a_i = buf_2[p].imag; - tmp_b_r = buf_2[q].real * w[m][k].real - buf_2[q].imag * w[m][k].imag; - tmp_b_i = buf_2[q].imag * w[m][k].real + buf_2[q].real * w[m][k].imag; - buf_2[p].real = tmp_a_r + tmp_b_r; - buf_2[p].imag = tmp_a_i + tmp_b_i; - buf_2[q].real = tmp_a_r - tmp_b_r; - buf_2[q].imag = tmp_a_i - tmp_b_i; - - } - } - } - - // Post IFFT complex multiply - for( i=0; i < 64; i++) - { - //Note that I flipped the signs on the imaginary ops to do the complex conj - - /* y1[n] = z1[n] * (xcos2[n] + j * xs in2[n]) ; */ - tmp_a_r = buf_1[i].real; - tmp_a_i = buf_1[i].imag; - buf_1[i].real =(tmp_a_r * xcos2[i]) + (tmp_a_i * xsin2[i]); - buf_1[i].imag =(tmp_a_r * xsin2[i]) - (tmp_a_i * xcos2[i]); - /* y2[n] = z2[n] * (xcos2[n] + j * xsin2[n]) ; */ - tmp_a_r = buf_2[i].real; - tmp_a_i = buf_2[i].imag; - buf_2[i].real =(tmp_a_r * xcos2[i]) + (tmp_a_i * xsin2[i]); - buf_2[i].imag =(tmp_a_r * xsin2[i]) - (tmp_a_i * xcos2[i]); - } - - data_ptr = data; - delay_ptr = delay; - window_ptr = window; - - /* Window and convert to real valued signal */ - for(i=0; i< 64; i++) - { - *data_ptr++ = 2.0f * (-buf_1[i].imag * *window_ptr++ + *delay_ptr++); - *data_ptr++ = 2.0f * ( buf_1[64-i-1].real * *window_ptr++ + *delay_ptr++); - } - - for(i=0; i< 64; i++) - { - *data_ptr++ = 2.0f * (-buf_1[i].real * *window_ptr++ + *delay_ptr++); - *data_ptr++ = 2.0f * ( buf_1[64-i-1].imag * *window_ptr++ + *delay_ptr++); - } - - delay_ptr = delay; - - for(i=0; i< 64; i++) - { - *delay_ptr++ = -buf_2[i].real * *--window_ptr; - *delay_ptr++ = buf_2[64-i-1].imag * *--window_ptr; - } - - for(i=0; i< 64; i++) - { - *delay_ptr++ = buf_2[i].imag * *--window_ptr; - *delay_ptr++ = -buf_2[64-i-1].real * *--window_ptr; + complex_t *buf1, *buf2; + + buf1 = &buf[0]; + buf2 = &buf[64]; + + /* Pre IFFT complex multiply plus IFFT cmplx conjugate */ + for(k=0; k<64; k++) { + /* X1[k] = X[2*k] */ + /* X2[k] = X[2*k+1] */ + j = pm64[k]; + p = 2 * (128-2*j-1); + q = 2 * (2 * j); + + /* Z1[k] = (X1[128-2*k-1] + j * X1[2*k]) * (xcos2[k] + j * xsin2[k]); */ + buf1[k].re = data[p] * xcos2[j] - data[q] * xsin2[j]; + buf1[k].im = -1.0f * (data[q] * xcos2[j] + data[p] * xsin2[j]); + /* Z2[k] = (X2[128-2*k-1] + j * X2[2*k]) * (xcos2[k] + j * xsin2[k]); */ + buf2[k].re = data[p + 1] * xcos2[j] - data[q + 1] * xsin2[j]; + buf2[k].im = -1.0f * ( data[q + 1] * xcos2[j] + data[p + 1] * xsin2[j]); + } + + + fft_64p(&buf1[0]); + fft_64p(&buf2[0]); + +#ifdef DEBUG + //DEBUG FFT +#if 0 + printf("Post FFT, buf1\n"); + for (i=0; i < 64; i++) + printf("%d %f %f\n", i, buf_1[i].re, buf_1[i].im); + printf("Post FFT, buf2\n"); + for (i=0; i < 64; i++) + printf("%d %f %f\n", i, buf_2[i].re, buf_2[i].im); +#endif +#endif + + /* Post IFFT complex multiply */ + for( i=0; i < 64; i++) { + /* y1[n] = z1[n] * (xcos2[n] + j * xs in2[n]) ; */ + tmp_a_r = buf1[i].re; + tmp_a_i = -buf1[i].im; + buf1[i].re =(tmp_a_r * xcos2[i]) - (tmp_a_i * xsin2[i]); + buf1[i].im =(tmp_a_r * xsin2[i]) + (tmp_a_i * xcos2[i]); + /* y2[n] = z2[n] * (xcos2[n] + j * xsin2[n]) ; */ + tmp_a_r = buf2[i].re; + tmp_a_i = -buf2[i].im; + buf2[i].re =(tmp_a_r * xcos2[i]) - (tmp_a_i * xsin2[i]); + buf2[i].im =(tmp_a_r * xsin2[i]) + (tmp_a_i * xcos2[i]); + } + + data_ptr = data; + delay_ptr = delay; + window_ptr = window; + + /* Window and convert to real valued signal, no overlap */ + for(i=0; i< 64; i++) { + *data_ptr++ = -buf1[i].im * *window_ptr++; + *data_ptr++ = buf1[64-i-1].re * *window_ptr++; + } + + for(i=0; i< 64; i++) { + *data_ptr++ = -buf1[i].re * *window_ptr++ + *delay_ptr++; + *data_ptr++ = buf1[64-i-1].im * *window_ptr++ + *delay_ptr++; + } + + delay_ptr = delay; + + for(i=0; i< 64; i++) { + *delay_ptr++ = -buf2[i].re * *--window_ptr; + *delay_ptr++ = buf2[64-i-1].im * *--window_ptr; + } + + for(i=0; i< 64; i++) { + *delay_ptr++ = buf2[i].im * *--window_ptr; + *delay_ptr++ = -buf2[64-i-1].re * *--window_ptr; } } @@ -440,29 +341,190 @@ imdct_do_256(float data[],float delay[]) ///#include <sys/time.h> //FIXME remove -void -imdct(bsi_t *bsi,audblk_t *audblk, stream_samples_t samples) { +void imdct (bsi_t *bsi,audblk_t *audblk, stream_samples_t samples, int16_t *s16_samples, dm_par_t* dm_par) +{ int i; + int doable = 0; + float *center=NULL, *left, *right, *left_sur, *right_sur; + float *delay_left, *delay_right; + float *delay1_left, *delay1_right, *delay1_center, *delay1_sr, *delay1_sl; + float right_tmp, left_tmp; + void (*do_imdct)(float data[], float deley[]); + + // test if dm in frequency is doable + if (!(doable = audblk->blksw[0])) + do_imdct = imdct_do_512; + else + do_imdct = imdct_do_256; + + // downmix in the frequency domain if all the channels + // use the same imdct + for (i=0; i < bsi->nfchans; i++) { + if (doable != audblk->blksw[i]) { + do_imdct = NULL; + break; + } + } - //handy timing code - //struct timeval start,end; + if (do_imdct) { + //dowmix first and imdct + switch(bsi->acmod) { + case 7: // 3/2 + downmix_3f_2r_to_2ch (samples[0], dm_par); + break; + case 6: // 2/2 + downmix_2f_2r_to_2ch (samples[0], dm_par); + break; + case 5: // 3/1 + downmix_3f_1r_to_2ch (samples[0], dm_par); + break; + case 4: // 2/1 + downmix_2f_1r_to_2ch (samples[0], dm_par); + break; + case 3: // 3/0 + downmix_3f_0r_to_2ch (samples[0], dm_par); + break; + case 2: + break; + default: // 1/0 + if (bsi->acmod == 1) + center = samples[0]; + else if (bsi->acmod == 0) + center = samples[ac3_config.dual_mono_ch_sel]; + do_imdct(center, delay[0]); // no downmix + + stream_sample_1ch_to_s16 (s16_samples, center); + + return; + //goto done; + break; + } - //gettimeofday(&start,0); - - for(i=0; i<bsi->nfchans;i++) - { - if(audblk->blksw[i]) - imdct_do_256(samples[i],delay[i]); - else - imdct_do_512(samples[i],delay[i]); + do_imdct (samples[0], delay[0]); + do_imdct (samples[1], delay[1]); + stream_sample_2ch_to_s16(s16_samples, samples[0], samples[1]); + + } else { //imdct and then dowmix + // delay and samples should be saved and mixed + //fprintf(stderr, "time domain downmix\n"); + for (i=0; i<bsi->nfchans; i++) { + if (audblk->blksw[i]) + imdct_do_256_nol (samples[i],delay1[i]); + else + imdct_do_512_nol (samples[i],delay1[i]); + } + + // mix the sample, overlap + switch(bsi->acmod) { + case 7: // 3/2 + left = samples[0]; + center = samples[1]; + right = samples[2]; + left_sur = samples[3]; + right_sur = samples[4]; + delay_left = delay[0]; + delay_right = delay[1]; + delay1_left = delay1[0]; + delay1_center = delay1[1]; + delay1_right = delay1[2]; + delay1_sl = delay1[3]; + delay1_sr = delay1[4]; + + for (i = 0; i < 256; i++) { + left_tmp = dm_par->unit * *left++ + dm_par->clev * *center + dm_par->slev * *left_sur++; + right_tmp= dm_par->unit * *right++ + dm_par->clev * *center++ + dm_par->slev * *right_sur++; + *s16_samples++ = (int16_t)(left_tmp + *delay_left); + *s16_samples++ = (int16_t)(right_tmp + *delay_right); + *delay_left++ = dm_par->unit * *delay1_left++ + dm_par->clev * *delay1_center + dm_par->slev * *delay1_sl++; + *delay_right++ = dm_par->unit * *delay1_right++ + dm_par->clev * *center++ + dm_par->slev * *delay1_sr++; + } + break; + case 6: // 2/2 + left = samples[0]; + right = samples[1]; + left_sur = samples[2]; + right_sur = samples[3]; + delay_left = delay[0]; + delay_right = delay[1]; + delay1_left = delay1[0]; + delay1_right = delay1[1]; + delay1_sl = delay1[2]; + delay1_sr = delay1[3]; + + for (i = 0; i < 256; i++) { + left_tmp = dm_par->unit * *left++ + dm_par->slev * *left_sur++; + right_tmp= dm_par->unit * *right++ + dm_par->slev * *right_sur++; + *s16_samples++ = (int16_t)(left_tmp + *delay_left); + *s16_samples++ = (int16_t)(right_tmp + *delay_right); + *delay_left++ = dm_par->unit * *delay1_left++ + dm_par->slev * *delay1_sl++; + *delay_right++ = dm_par->unit * *delay1_right++ + dm_par->slev * *delay1_sr++; + } + break; + case 5: // 3/1 + left = samples[0]; + center = samples[1]; + right = samples[2]; + right_sur = samples[3]; + delay_left = delay[0]; + delay_right = delay[1]; + delay1_left = delay1[0]; + delay1_center = delay1[1]; + delay1_right = delay1[2]; + delay1_sl = delay1[3]; + + for (i = 0; i < 256; i++) { + left_tmp = dm_par->unit * *left++ + dm_par->clev * *center - dm_par->slev * *right_sur; + right_tmp= dm_par->unit * *right++ + dm_par->clev * *center++ + dm_par->slev * *right_sur++; + *s16_samples++ = (int16_t)(left_tmp + *delay_left); + *s16_samples++ = (int16_t)(right_tmp + *delay_right); + *delay_left++ = dm_par->unit * *delay1_left++ + dm_par->clev * *delay1_center + dm_par->slev * *delay1_sl; + *delay_right++ = dm_par->unit * *delay1_right++ + dm_par->clev * *center++ + dm_par->slev * *delay1_sl++; + } + break; + case 4: // 2/1 + left = samples[0]; + right = samples[1]; + right_sur = samples[2]; + delay_left = delay[0]; + delay_right = delay[1]; + delay1_left = delay1[0]; + delay1_right = delay1[1]; + delay1_sl = delay1[2]; + + for (i = 0; i < 256; i++) { + left_tmp = dm_par->unit * *left++ - dm_par->slev * *right_sur; + right_tmp= dm_par->unit * *right++ + dm_par->slev * *right_sur++; + *s16_samples++ = (int16_t)(left_tmp + *delay_left); + *s16_samples++ = (int16_t)(right_tmp + *delay_right); + *delay_left++ = dm_par->unit * *delay1_left++ + dm_par->slev * *delay1_sl; + *delay_right++ = dm_par->unit * *delay1_right++ + dm_par->slev * *delay1_sl++; + } + break; + case 3: // 3/0 + left = samples[0]; + center = samples[1]; + right = samples[2]; + delay_left = delay[0]; + delay_right = delay[1]; + delay1_left = delay1[0]; + delay1_center = delay1[1]; + delay1_right = delay1[2]; + + for (i = 0; i < 256; i++) { + left_tmp = dm_par->unit * *left++ + dm_par->clev * *center; + right_tmp= dm_par->unit * *right++ + dm_par->clev * *center++; + *s16_samples++ = (int16_t)(left_tmp + *delay_left); + *s16_samples++ = (int16_t)(right_tmp + *delay_right); + *delay_left++ = dm_par->unit * *delay1_left++ + dm_par->clev * *delay1_center; + *delay_right++ = dm_par->unit * *delay1_right++ + dm_par->clev * *center++; + } + break; + case 2: // copy to output + for (i = 0; i < 256; i++) { + *s16_samples++ = (int16_t)samples[0][i]; + *s16_samples++ = (int16_t)samples[1][i]; + } + break; + } } - //gettimeofday(&end,0); - //printf("imdct %ld us\n",(end.tv_sec - start.tv_sec) * 1000000 + - //end.tv_usec - start.tv_usec); - - //XXX We don't bother with the IMDCT for the LFE as it's currently - //unused. - //if (bsi->lfeon) - // imdct_do_512(coeffs->lfe,samples->channel[5],delay[5]); - // } diff --git a/ac3dec/imdct.h b/ac3dec/imdct.h index 750aa879..8f10af00 100644 --- a/ac3dec/imdct.h +++ b/ac3dec/imdct.h @@ -22,5 +22,5 @@ * */ -void imdct(bsi_t *bsi,audblk_t *audblk, stream_samples_t samples); +void imdct (bsi_t *bsi,audblk_t *audblk, stream_samples_t samples, int16_t *s16_samples, dm_par_t *dm_par); void imdct_init(void); diff --git a/ac3dec/imdct512_kni.S b/ac3dec/imdct512_kni.S new file mode 100644 index 00000000..10b8de6f --- /dev/null +++ b/ac3dec/imdct512_kni.S @@ -0,0 +1,548 @@ +/* + * imdct512_kni.S + * + * Copyright (C) Yuqing Deng <Yuqing_Deng@brown.edu> - October 2000 + * + * + * imdct512_kni.S is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * imdct512_kni.S is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + +#ifdef __i386__ + +.text + .align 4 +.global imdct512_pre_ifft_twiddle_kni + .type imdct512_pre_ifft_twiddle_kni, @function +imdct512_pre_ifft_twiddle_kni: + + pushl %ebp + movl %esp, %ebp + addl $-4, %esp /* local variable, loop counter */ + + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + pushl %edi + pushl %esi + + movl 8(%ebp), %eax /* pmt */ + movl 12(%ebp), %ebx /* buf */ + movl 16(%ebp), %ecx /* data */ + movl 20(%ebp), %edx /* xcos_sin_sse */ + movl $64, -4(%ebp) + + +.loop: + movl (%eax), %esi + movl 4(%eax), %edi + movss (%ecx, %esi, 8), %xmm1 /* 2j */ + movss (%ecx, %edi, 8), %xmm3 /* 2(j+1) */ + + shll $1, %esi + shll $1, %edi + + movaps (%edx, %esi, 8), %xmm0; /* -c_j | -s_j | -s_j | c_j */ + movaps (%edx, %edi, 8), %xmm2; /* -c_j+1 | -s_j+1 | -s_j+1 | c_j+1 */ + + negl %esi + negl %edi + + movss 1020(%ecx, %esi, 4), %xmm4 /* 255-2j */ + addl $8, %eax + movss 1020(%ecx, %edi, 4), %xmm5 /* 255-2(j+1) */ + + shufps $0, %xmm1, %xmm4 /* 2j | 2j | 255-2j | 255-2j */ + shufps $0, %xmm3, %xmm5 /* 2(j+1) | 2(j+1) | 255-2(j+1) | 255-2(j+1) */ + mulps %xmm4, %xmm0 + mulps %xmm5, %xmm2 + movhlps %xmm0, %xmm1 + movhlps %xmm2, %xmm3 + addl $16, %ebx + addps %xmm1, %xmm0 + addps %xmm3, %xmm2 + movlhps %xmm2, %xmm0 + movaps %xmm0, -16(%ebx) + decl -4(%ebp) + jnz .loop + + popl %esi + popl %edi + popl %edx + popl %ecx + popl %ebx + popl %eax + + addl $4, %esp + popl %ebp + + ret + .p2align 4,0 + +.global imdct512_post_ifft_twiddle_kni + .type imdct512_post_ifft_twiddle_kni, @function +imdct512_post_ifft_twiddle_kni: + + pushl %ebp + movl %esp, %ebp + + pushl %eax + pushl %ebx + pushl %ecx + + movl 8(%ebp), %eax /* buf[] */ + movl 12(%ebp), %ebx /* xcos_sin_sse[] */ + movl $32, %ecx /* loop counter */ + +.loop1: + movaps (%eax), %xmm0 /* im1 | re1 | im0 | re0 */ + + movaps (%ebx), %xmm2 /* -c | -s | -s | c */ + movhlps %xmm0, %xmm1 /* im1 | re1 */ + movaps 16(%ebx), %xmm3 /* -c1 | -s1 | -s1 | c1 */ + + shufps $0x50, %xmm0, %xmm0 /* im0 | im0 | re0 | re0 */ + shufps $0x50, %xmm1, %xmm1 /* im1 | im1 | re1 | re1 */ + + movaps 16(%eax), %xmm4 /* im3 | re3 | im2 | re2 */ + + shufps $0x27, %xmm2, %xmm2 /* c | -s | -s | -c */ + movhlps %xmm4, %xmm5 /* im3 | re3 */ + shufps $0x27, %xmm3, %xmm3 /* c1 | -s1 | -s1 | -c1 */ + + movaps 32(%ebx), %xmm6 /* -c2 | -s2 | -s2 | c2 */ + movaps 48(%ebx), %xmm7 /* -c3 | -s3 | -s3 | c3 */ + + shufps $0x50, %xmm4, %xmm4 /* im2 | im2 | re2 | re2 */ + shufps $0x50, %xmm5, %xmm5 /* im3 | im3 | re3 | re3 */ + + mulps %xmm2, %xmm0 + mulps %xmm3, %xmm1 + + shufps $0x27, %xmm6, %xmm6 /* c2 | -s2 | -s2 | -c2 */ + shufps $0x27, %xmm7, %xmm7 /* c3 | -s3 | -s3 | -c3 */ + + movhlps %xmm0, %xmm2 + movhlps %xmm1, %xmm3 + + mulps %xmm6, %xmm4 + mulps %xmm7, %xmm5 + + addps %xmm2, %xmm0 + addps %xmm3, %xmm1 + + movhlps %xmm4, %xmm6 + movhlps %xmm5, %xmm7 + + addps %xmm6, %xmm4 + addps %xmm7, %xmm5 + + movlhps %xmm1, %xmm0 + movlhps %xmm5, %xmm4 + + movaps %xmm0, (%eax) + movaps %xmm4, 16(%eax) + addl $64, %ebx + addl $32, %eax + decl %ecx + jnz .loop1 + + popl %ecx + popl %ebx + popl %eax + + leave + ret + .p2align 4,0 + +.global imdct512_window_delay_kni + .type imdct512_window_delay_kni, @function +imdct512_window_delay_kni: + + pushl %ebp + movl %esp, %ebp + + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + pushl %esi + pushl %edi + + movl 20(%ebp), %ebx /* delay */ + movl 16(%ebp), %edx /* window */ + + movl 8(%ebp), %eax /* buf */ + movl $16, %ecx /* loop count */ + leal 516(%eax), %esi /* buf[64].im */ + leal 504(%eax), %edi /* buf[63].re */ + movl 12(%ebp), %eax /* data */ +.first_128_samples: + + movss (%esi), %xmm0 + movss 8(%esi), %xmm2 + movss (%edi), %xmm1 + movss -8(%edi), %xmm3 + + movlhps %xmm2, %xmm0 /* 0.0 | im1 | 0.0 | im0 */ + movlhps %xmm3, %xmm1 /* 0.0 | re1 | 0.0 | re0 */ + + movaps (%edx), %xmm4 /* w3 | w2 | w1 | w0 */ + movaps (%ebx), %xmm5 /* d3 | d2 | d1 | d0 */ + shufps $0xb1, %xmm1, %xmm1 /* re1 | 0.0 | re0 | 0.0 */ + + movss 16(%esi), %xmm6 /* im2 */ + movss 24(%esi), %xmm7 /* im3 */ + subps %xmm1, %xmm0 /* -re1 | im1 | -re0 | im0 */ + movss -16(%edi), %xmm2 /* re2 */ + movss -24(%edi), %xmm3 /* re3 */ + mulps %xmm4, %xmm0 + movlhps %xmm7, %xmm6 /* 0.0 | im3 | 0.0 | im2 */ + movlhps %xmm3, %xmm2 /* 0.0 | re3 | 0.0 | re2 */ + addps %xmm5, %xmm0 + shufps $0xb1, %xmm2, %xmm2 /* re3 | 0.0 | re2 | 0.0 */ + movaps 16(%edx), %xmm4 /* w7 | w6 | w5 | w4 */ + movaps 16(%ebx), %xmm5 /* d7 | d6 | d5 | d4 */ + subps %xmm2, %xmm6 /* -re3 | im3 | -re2 | im2 */ + addl $32, %edx + movaps %xmm0, (%eax) + addl $32, %ebx + mulps %xmm4, %xmm6 + addl $32, %esi + addl $32, %eax + addps %xmm5, %xmm6 + addl $-32, %edi + movaps %xmm6, -16(%eax) + decl %ecx + jnz .first_128_samples + + movl 8(%ebp), %esi /* buf[0].re */ + leal 1020(%esi), %edi /* buf[127].im */ + movl $16, %ecx /* loop count */ +.second_128_samples: + + movss (%esi), %xmm0 /* buf[i].re */ + movss 8(%esi), %xmm2 /* re1 */ + movss (%edi), %xmm1 /* buf[127-i].im */ + movss -8(%edi), %xmm3 /* im1 */ + + movlhps %xmm2, %xmm0 /* 0.0 | re1 | 0.0 | re0 */ + movlhps %xmm3, %xmm1 /* 0.0 | im1 | 0.0 | im1 */ + + movaps (%edx), %xmm4 /* w3 | w2 | w1 | w0 */ + movaps (%ebx), %xmm5 /* d3 | d2 | d1 | d0 */ + + shufps $0xb1, %xmm1, %xmm1 /* im1 | 0.0 | im0 | 0.0 */ + movss 16(%esi), %xmm6 /* re2 */ + movss 24(%esi), %xmm7 /* re3 */ + movss -16(%edi), %xmm2 /* im2 */ + movss -24(%edi), %xmm3 /* im3 */ + subps %xmm1, %xmm0 /* -im1 | re1 | -im0 | re0 */ + movlhps %xmm7, %xmm6 /* 0.0 | re3 | 0.0 | re2 */ + movlhps %xmm3, %xmm2 /* 0.0 | im3 | 0.0 | im2 */ + mulps %xmm4, %xmm0 + shufps $0xb1, %xmm2, %xmm2 /* im3 | 0.0 | im2 | 0.0 */ + movaps 16(%edx), %xmm4 /* w7 | w6 | w5 | w4 */ + addl $32, %esi + subps %xmm2, %xmm6 /* -im3 | re3 | -im2 | re2 */ + addps %xmm5, %xmm0 + mulps %xmm4, %xmm6 + addl $-32, %edi + movaps 16(%ebx), %xmm5 /* d7 | d6 | d5 | d4 */ + movaps %xmm0, (%eax) + addps %xmm5, %xmm6 + addl $32, %edx + addl $32, %eax + addl $32, %ebx + movaps %xmm6, -16(%eax) + decl %ecx + jnz .second_128_samples + + movl 8(%ebp), %eax + leal 512(%eax), %esi /* buf[64].re */ + leal 508(%eax), %edi /* buf[63].im */ + movl $16, %ecx /* loop count */ + movl 20(%ebp), %eax /* delay */ +.first_128_delay: + + movss (%esi), %xmm0 + movss 8(%esi), %xmm2 + movss (%edi), %xmm1 + movss -8(%edi), %xmm3 + + movlhps %xmm2, %xmm0 /* 0.0 | re1 | 0.0 | re0 */ + movlhps %xmm3, %xmm1 /* 0.0 | im1 | 0.0 | im0 */ + + movaps -16(%edx), %xmm4 /* w3 | w2 | w1 | w0 */ + shufps $0xb1, %xmm1, %xmm1 /* im1 | 0.0 | im0 | 0.0 */ + movss 16(%esi), %xmm6 /* re2 */ + movss 24(%esi), %xmm7 /* re3 */ + movss -16(%edi), %xmm2 /* im2 */ + movss -24(%edi), %xmm3 /* im3 */ + subps %xmm1, %xmm0 /* -im1 | re1 | -im0 | re0 */ + addl $-32, %edx + movlhps %xmm7, %xmm6 /* 0.0 | re3 | 0.0 | re2 */ + movlhps %xmm3, %xmm2 /* 0.0 | im3 | 0.0 | im2 */ + mulps %xmm4, %xmm0 + movaps (%edx), %xmm5 /* w7 | w6 | w5 | w4 */ + shufps $0xb1, %xmm2, %xmm2 /* im3 | 0.0 | im2 | 0.0 */ + movaps %xmm0, (%eax) + addl $32, %esi + subps %xmm2, %xmm6 /* -im3 | re3 | -im2 | re2 */ + addl $-32, %edi + mulps %xmm5, %xmm6 + addl $32, %eax + movaps %xmm6, -16(%eax) + decl %ecx + jnz .first_128_delay + + movl 8(%ebp), %ebx + leal 4(%ebx), %esi /* buf[0].im */ + leal 1016(%ebx), %edi /* buf[127].re */ + movl $16, %ecx /* loop count */ +.second_128_delay: + + movss (%esi), %xmm0 + movss 8(%esi), %xmm2 + movss (%edi), %xmm1 + movss -8(%edi), %xmm3 + + movlhps %xmm2, %xmm0 /* 0.0 | im1 | 0.0 | im0 */ + movlhps %xmm3, %xmm1 /* 0.0 | re1 | 0.0 | re0 */ + + movaps -16(%edx), %xmm4 /* w3 | w2 | w1 | w0 */ + shufps $0xb1, %xmm1, %xmm1 /* re1 | 0.0 | re0 | 0.0 */ + movss 16(%esi), %xmm6 /* im2 */ + movss 24(%esi), %xmm7 /* im3 */ + movss -16(%edi), %xmm2 /* re2 */ + movss -24(%edi), %xmm3 /* re3 */ + subps %xmm0, %xmm1 /* re1 | -im1 | re0 | -im0 */ + addl $-32, %edx + movlhps %xmm7, %xmm6 /* 0.0 | im3 | 0.0 | im2 */ + movlhps %xmm3, %xmm2 /* 0.0 | re3 | 0.0 | re2 */ + mulps %xmm4, %xmm1 + movaps (%edx), %xmm5 /* w7 | w6 | w5 | w4 */ + shufps $0xb1, %xmm2, %xmm2 /* re3 | 0.0 | re2 | 0.0 */ + movaps %xmm1, (%eax) + addl $32, %esi + subps %xmm6, %xmm2 /* re | -im3 | re | -im2 */ + addl $-32, %edi + mulps %xmm5, %xmm2 + addl $32, %eax + movaps %xmm2, -16(%eax) + decl %ecx + jnz .second_128_delay + + popl %edi + popl %esi + popl %edx + popl %ecx + popl %ebx + popl %eax + + leave + ret + .p2align 4,0 + +.global imdct512_window_delay_nol_kni + .type imdct512_window_delay_nol_kni, @function +imdct512_window_delay_nol_kni: + + pushl %ebp + movl %esp, %ebp + + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + pushl %esi + pushl %edi + + /* movl 20(%ebp), %ebx delay */ + movl 16(%ebp), %edx /* window */ + + movl 8(%ebp), %eax /* buf */ + movl $16, %ecx /* loop count */ + leal 516(%eax), %esi /* buf[64].im */ + leal 504(%eax), %edi /* buf[63].re */ + movl 12(%ebp), %eax /* data */ +.first_128_sample: + + movss (%esi), %xmm0 + movss 8(%esi), %xmm2 + movss (%edi), %xmm1 + movss -8(%edi), %xmm3 + + movlhps %xmm2, %xmm0 /* 0.0 | im1 | 0.0 | im0 */ + movlhps %xmm3, %xmm1 /* 0.0 | re1 | 0.0 | re0 */ + + movaps (%edx), %xmm4 /* w3 | w2 | w1 | w0 */ + /* movaps (%ebx), %xmm5 d3 | d2 | d1 | d0 */ + shufps $0xb1, %xmm1, %xmm1 /* re1 | 0.0 | re0 | 0.0 */ + + movss 16(%esi), %xmm6 /* im2 */ + movss 24(%esi), %xmm7 /* im3 */ + subps %xmm1, %xmm0 /* -re1 | im1 | -re0 | im0 */ + movss -16(%edi), %xmm2 /* re2 */ + movss -24(%edi), %xmm3 /* re3 */ + mulps %xmm4, %xmm0 + movlhps %xmm7, %xmm6 /* 0.0 | im3 | 0.0 | im2 */ + movlhps %xmm3, %xmm2 /* 0.0 | re3 | 0.0 | re2 */ + /* addps %xmm5, %xmm0 */ + shufps $0xb1, %xmm2, %xmm2 /* re3 | 0.0 | re2 | 0.0 */ + movaps 16(%edx), %xmm4 /* w7 | w6 | w5 | w4 */ + /* movaps 16(%ebx), %xmm5 d7 | d6 | d5 | d4 */ + subps %xmm2, %xmm6 /* -re3 | im3 | -re2 | im2 */ + addl $32, %edx + movaps %xmm0, (%eax) + /* addl $32, %ebx */ + mulps %xmm4, %xmm6 + addl $32, %esi + addl $32, %eax + /* addps %xmm5, %xmm6 */ + addl $-32, %edi + movaps %xmm6, -16(%eax) + decl %ecx + jnz .first_128_sample + + movl 8(%ebp), %esi /* buf[0].re */ + leal 1020(%esi), %edi /* buf[127].im */ + movl $16, %ecx /* loop count */ +.second_128_sample: + + movss (%esi), %xmm0 /* buf[i].re */ + movss 8(%esi), %xmm2 /* re1 */ + movss (%edi), %xmm1 /* buf[127-i].im */ + movss -8(%edi), %xmm3 /* im1 */ + + movlhps %xmm2, %xmm0 /* 0.0 | re1 | 0.0 | re0 */ + movlhps %xmm3, %xmm1 /* 0.0 | im1 | 0.0 | im1 */ + + movaps (%edx), %xmm4 /* w3 | w2 | w1 | w0 */ + /* movaps (%ebx), %xmm5 d3 | d2 | d1 | d0 */ + + shufps $0xb1, %xmm1, %xmm1 /* im1 | 0.0 | im0 | 0.0 */ + movss 16(%esi), %xmm6 /* re2 */ + movss 24(%esi), %xmm7 /* re3 */ + movss -16(%edi), %xmm2 /* im2 */ + movss -24(%edi), %xmm3 /* im3 */ + subps %xmm1, %xmm0 /* -im1 | re1 | -im0 | re0 */ + movlhps %xmm7, %xmm6 /* 0.0 | re3 | 0.0 | re2 */ + movlhps %xmm3, %xmm2 /* 0.0 | im3 | 0.0 | im2 */ + mulps %xmm4, %xmm0 + shufps $0xb1, %xmm2, %xmm2 /* im3 | 0.0 | im2 | 0.0 */ + movaps 16(%edx), %xmm4 /* w7 | w6 | w5 | w4 */ + addl $32, %esi + subps %xmm2, %xmm6 /* -im3 | re3 | -im2 | re2 */ + /* addps %xmm5, %xmm0 */ + mulps %xmm4, %xmm6 + addl $-32, %edi + /* movaps 16(%ebx), %xmm5 d7 | d6 | d5 | d4 */ + movaps %xmm0, (%eax) + /* addps %xmm5, %xmm6 */ + addl $32, %edx + addl $32, %eax + /* addl $32, %ebx */ + movaps %xmm6, -16(%eax) + decl %ecx + jnz .second_128_sample + + movl 8(%ebp), %eax + leal 512(%eax), %esi /* buf[64].re */ + leal 508(%eax), %edi /* buf[63].im */ + movl $16, %ecx /* loop count */ + movl 20(%ebp), %eax /* delay */ +.first_128_delays: + + movss (%esi), %xmm0 + movss 8(%esi), %xmm2 + movss (%edi), %xmm1 + movss -8(%edi), %xmm3 + + movlhps %xmm2, %xmm0 /* 0.0 | re1 | 0.0 | re0 */ + movlhps %xmm3, %xmm1 /* 0.0 | im1 | 0.0 | im0 */ + + movaps -16(%edx), %xmm4 /* w3 | w2 | w1 | w0 */ + shufps $0xb1, %xmm1, %xmm1 /* im1 | 0.0 | im0 | 0.0 */ + movss 16(%esi), %xmm6 /* re2 */ + movss 24(%esi), %xmm7 /* re3 */ + movss -16(%edi), %xmm2 /* im2 */ + movss -24(%edi), %xmm3 /* im3 */ + subps %xmm1, %xmm0 /* -im1 | re1 | -im0 | re0 */ + addl $-32, %edx + movlhps %xmm7, %xmm6 /* 0.0 | re3 | 0.0 | re2 */ + movlhps %xmm3, %xmm2 /* 0.0 | im3 | 0.0 | im2 */ + mulps %xmm4, %xmm0 + movaps (%edx), %xmm5 /* w7 | w6 | w5 | w4 */ + shufps $0xb1, %xmm2, %xmm2 /* im3 | 0.0 | im2 | 0.0 */ + movaps %xmm0, (%eax) + addl $32, %esi + subps %xmm2, %xmm6 /* -im3 | re3 | -im2 | re2 */ + addl $-32, %edi + mulps %xmm5, %xmm6 + addl $32, %eax + movaps %xmm6, -16(%eax) + decl %ecx + jnz .first_128_delays + + movl 8(%ebp), %ebx + leal 4(%ebx), %esi /* buf[0].im */ + leal 1016(%ebx), %edi /* buf[127].re */ + movl $16, %ecx /* loop count */ +.second_128_delays: + + movss (%esi), %xmm0 + movss 8(%esi), %xmm2 + movss (%edi), %xmm1 + movss -8(%edi), %xmm3 + + movlhps %xmm2, %xmm0 /* 0.0 | im1 | 0.0 | im0 */ + movlhps %xmm3, %xmm1 /* 0.0 | re1 | 0.0 | re0 */ + + movaps -16(%edx), %xmm4 /* w3 | w2 | w1 | w0 */ + shufps $0xb1, %xmm1, %xmm1 /* re1 | 0.0 | re0 | 0.0 */ + movss 16(%esi), %xmm6 /* im2 */ + movss 24(%esi), %xmm7 /* im3 */ + movss -16(%edi), %xmm2 /* re2 */ + movss -24(%edi), %xmm3 /* re3 */ + subps %xmm0, %xmm1 /* re1 | -im1 | re0 | -im0 */ + addl $-32, %edx + movlhps %xmm7, %xmm6 /* 0.0 | im3 | 0.0 | im2 */ + movlhps %xmm3, %xmm2 /* 0.0 | re3 | 0.0 | re2 */ + mulps %xmm4, %xmm1 + movaps (%edx), %xmm5 /* w7 | w6 | w5 | w4 */ + shufps $0xb1, %xmm2, %xmm2 /* re3 | 0.0 | re2 | 0.0 */ + movaps %xmm1, (%eax) + addl $32, %esi + subps %xmm6, %xmm2 /* re | -im3 | re | -im2 */ + addl $-32, %edi + mulps %xmm5, %xmm2 + addl $32, %eax + movaps %xmm2, -16(%eax) + decl %ecx + jnz .second_128_delays + + popl %edi + popl %esi + popl %edx + popl %ecx + popl %ebx + popl %eax + + leave + ret + .p2align 4,0 +#endif diff --git a/ac3dec/imdct_c.c b/ac3dec/imdct_c.c new file mode 100644 index 00000000..1f2bfe84 --- /dev/null +++ b/ac3dec/imdct_c.c @@ -0,0 +1,218 @@ +/* + * imdct.c + * + * Copyright (C) Aaron Holtzman - May 1999 + * + * This file is part of ac3dec, a free Dolby AC-3 stream decoder. + * + * ac3dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * ac3dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * + * + */ + +#include <stdlib.h> +#include <stdio.h> +#include <math.h> +#include "ac3.h" +#include "ac3_internal.h" + +#include "downmix.h" +#include "imdct_c.h" +#include "srfft.h" + + +#define N 512 + +extern void (*imdct_do_512) (float data[],float delay[]); +extern void (*imdct_do_512_nol) (float data[], float delay[]); +extern void (*fft_64p) (complex_t *); + +extern const int pm128[]; +extern float window[]; +extern complex_t buf[128]; + +extern void fft_64p_c (complex_t *); +extern void fft_128p_c (complex_t *); + +static void imdct_do_512_c (float data[],float delay[]); +static void imdct_do_512_nol_c (float data[], float delay[]); + +/* Twiddle factors for IMDCT */ +static float xcos1[128] __attribute__((aligned(16))); +static float xsin1[128] __attribute__((aligned(16))); + + +int imdct_init_c (void) +{ + int i; + float scale = 255.99609372; + + imdct_do_512 = imdct_do_512_c; + imdct_do_512_nol = imdct_do_512_nol_c; + fft_64p = fft_64p_c; + + /* Twiddle factors to turn IFFT into IMDCT */ + + for (i=0; i < 128; i++) { + xcos1[i] = cos(2.0f * M_PI * (8*i+1)/(8*N)) * scale; + xsin1[i] = sin(2.0f * M_PI * (8*i+1)/(8*N)) * scale; + } + + return 0; +} + + +static void imdct_do_512_c (float data[], float delay[]) +{ + int i, j; + float tmp_a_r, tmp_a_i; + float *data_ptr; + float *delay_ptr; + float *window_ptr; + +// 512 IMDCT with source and dest data in 'data' +// Pre IFFT complex multiply plus IFFT complex conjugate + + for( i=0; i < 128; i++) { + j = pm128[i]; + //a = (data[256-2*j-1] - data[2*j]) * (xcos1[j] + xsin1[j]); + //c = data[2*j] * xcos1[j]; + //b = data[256-2*j-1] * xsin1[j]; + //buf1[i].re = a - b + c; + //buf1[i].im = b + c; + buf[i].re = (data[256-2*j-1] * xcos1[j]) - (data[2*j] * xsin1[j]); + buf[i].im = -1.0 * (data[2*j] * xcos1[j] + data[256-2*j-1] * xsin1[j]); + } + + fft_128p_c (&buf[0]); + +// Post IFFT complex multiply plus IFFT complex conjugate + for (i=0; i < 128; i++) { + tmp_a_r = buf[i].re; + tmp_a_i = buf[i].im; + //a = (tmp_a_r - tmp_a_i) * (xcos1[j] + xsin1[j]); + //b = tmp_a_r * xsin1[j]; + //c = tmp_a_i * xcos1[j]; + //buf[j].re = a - b + c; + //buf[j].im = b + c; + buf[i].re =(tmp_a_r * xcos1[i]) + (tmp_a_i * xsin1[i]); + buf[i].im =(tmp_a_r * xsin1[i]) - (tmp_a_i * xcos1[i]); + } + + data_ptr = data; + delay_ptr = delay; + window_ptr = window; + +// Window and convert to real valued signal + for (i=0; i< 64; i++) { + *data_ptr++ = -buf[64+i].im * *window_ptr++ + *delay_ptr++; + *data_ptr++ = buf[64-i-1].re * *window_ptr++ + *delay_ptr++; + } + + for(i=0; i< 64; i++) { + *data_ptr++ = -buf[i].re * *window_ptr++ + *delay_ptr++; + *data_ptr++ = buf[128-i-1].im * *window_ptr++ + *delay_ptr++; + } + +// The trailing edge of the window goes into the delay line + delay_ptr = delay; + + for(i=0; i< 64; i++) { + *delay_ptr++ = -buf[64+i].re * *--window_ptr; + *delay_ptr++ = buf[64-i-1].im * *--window_ptr; + } + + for(i=0; i<64; i++) { + *delay_ptr++ = buf[i].im * *--window_ptr; + *delay_ptr++ = -buf[128-i-1].re * *--window_ptr; + } +} + + +static void imdct_do_512_nol_c (float data[], float delay[]) +{ + int i, j; + + float tmp_a_i; + float tmp_a_r; + + float *data_ptr; + float *delay_ptr; + float *window_ptr; + + // + // 512 IMDCT with source and dest data in 'data' + // + + // Pre IFFT complex multiply plus IFFT cmplx conjugate + + for( i=0; i < 128; i++) { + /* z[i] = (X[256-2*i-1] + j * X[2*i]) * (xcos1[i] + j * xsin1[i]) */ + j = pm128[i]; + //a = (data[256-2*j-1] - data[2*j]) * (xcos1[j] + xsin1[j]); + //c = data[2*j] * xcos1[j]; + //b = data[256-2*j-1] * xsin1[j]; + //buf1[i].re = a - b + c; + + //buf1[i].im = b + c; + buf[i].re = (data[256-2*j-1] * xcos1[j]) - (data[2*j] * xsin1[j]); + buf[i].im = -1.0 * (data[2*j] * xcos1[j] + data[256-2*j-1] * xsin1[j]); + } + + fft_128p_c (&buf[0]); + + /* Post IFFT complex multiply plus IFFT complex conjugate*/ + for (i=0; i < 128; i++) { + /* y[n] = z[n] * (xcos1[n] + j * xsin1[n]) ; */ + /* int j1 = i; */ + tmp_a_r = buf[i].re; + tmp_a_i = buf[i].im; + //a = (tmp_a_r - tmp_a_i) * (xcos1[j] + xsin1[j]); + //b = tmp_a_r * xsin1[j]; + //c = tmp_a_i * xcos1[j]; + //buf[j].re = a - b + c; + //buf[j].im = b + c; + buf[i].re =(tmp_a_r * xcos1[i]) + (tmp_a_i * xsin1[i]); + buf[i].im =(tmp_a_r * xsin1[i]) - (tmp_a_i * xcos1[i]); + } + + data_ptr = data; + delay_ptr = delay; + window_ptr = window; + + /* Window and convert to real valued signal, no overlap here*/ + for (i=0; i< 64; i++) { + *data_ptr++ = -buf[64+i].im * *window_ptr++; + *data_ptr++ = buf[64-i-1].re * *window_ptr++; + } + + for(i=0; i< 64; i++) { + *data_ptr++ = -buf[i].re * *window_ptr++; + *data_ptr++ = buf[128-i-1].im * *window_ptr++; + } + + /* The trailing edge of the window goes into the delay line */ + delay_ptr = delay; + + for(i=0; i< 64; i++) { + *delay_ptr++ = -buf[64+i].re * *--window_ptr; + *delay_ptr++ = buf[64-i-1].im * *--window_ptr; + } + + for(i=0; i<64; i++) { + *delay_ptr++ = buf[i].im * *--window_ptr; + *delay_ptr++ = -buf[128-i-1].re * *--window_ptr; + } +} diff --git a/ac3dec/imdct_c.h b/ac3dec/imdct_c.h new file mode 100644 index 00000000..d2c31b54 --- /dev/null +++ b/ac3dec/imdct_c.h @@ -0,0 +1,36 @@ +/***** +* +* This file is part of the OMS program. +* +* This program is free software; you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation; either version 2, or (at your option) +* any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program; see the file COPYING. If not, write to +* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. +* +*****/ + +#ifndef __IMDCT_C_H__ +#define __IMDCT_C_H__ + +#include "cmplx.h" + +int imdct_init_c (void); + +void fft_128p_c (complex_t *); +void fft_64p_c (complex_t *); + +void imdct512_pre_ifft_twiddle_c (const int *pmt, complex_t *buf, float *data, float *xcos_sin_sse); +void imdct512_post_ifft_twiddle_c (complex_t *buf, float *xcos_sin_sse); +void imdct512_window_delay_c (complex_t *buf, float *data_ptr, float *window_prt, float *delay_prt); +void imdct512_window_delay_nol_c (complex_t *buf, float *data_ptr, float *window_prt, float *delay_prt); + +#endif diff --git a/ac3dec/imdct_kni.c b/ac3dec/imdct_kni.c new file mode 100644 index 00000000..b44547a9 --- /dev/null +++ b/ac3dec/imdct_kni.c @@ -0,0 +1,103 @@ +/* + * imdct_kni.c + * + * Copyright (C) Aaron Holtzman - May 1999 + * + * This file is part of ac3dec, a free Dolby AC-3 stream decoder. + * + * ac3dec is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * ac3dec is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * + * + */ + +#ifdef __i386__ + +#include <stdlib.h> +#include <stdio.h> +#include <math.h> +#include <mm_accel.h> +#include "ac3.h" +#include "ac3_internal.h" + +#include "downmix.h" +#include "imdct_kni.h" +#include "srfft.h" + +#define N 512 + +/* Delay buffer for time domain interleaving */ +static float xcos_sin_sse[128 * 4] __attribute__((aligned(16))); + +extern void (*imdct_do_512) (float data[],float delay[]); +extern void (*imdct_do_512_nol) (float data[], float delay[]); +extern void (*fft_64p) (complex_t *); + +extern const int pm128[]; +extern float window[]; +extern complex_t buf[128]; + +extern void fft_64p_kni (complex_t *); +extern void fft_128p_kni (complex_t *); + +static void imdct_do_512_kni (float data[], float delay[]); +static void imdct_do_512_nol_kni (float data[], float delay[]); + + +int imdct_init_kni (void) +{ + uint32_t accel = mm_accel (); + + if (accel & MM_ACCEL_X86_MMXEXT) { + int i; + float scale = 255.99609372; + + fprintf (stderr, "Using SSE for IMDCT\n"); + imdct_do_512 = imdct_do_512_kni; + imdct_do_512_nol = imdct_do_512_nol_kni; + fft_64p = fft_64p_kni; + + for (i=0; i < 128; i++) { + float xcos_i = cos(2.0f * M_PI * (8*i+1)/(8*N)) * scale; + float xsin_i = sin(2.0f * M_PI * (8*i+1)/(8*N)) * scale; + xcos_sin_sse[i * 4] = xcos_i; + xcos_sin_sse[i * 4 + 1] = -xsin_i; + xcos_sin_sse[i * 4 + 2] = -xsin_i; + xcos_sin_sse[i * 4 + 3] = -xcos_i; + } + + return 0; + } else + return -1; +} + + +static void imdct_do_512_kni (float data[], float delay[]) +{ + imdct512_pre_ifft_twiddle_kni (pm128, buf, data, xcos_sin_sse); + fft_128p_kni (buf); + imdct512_post_ifft_twiddle_kni (buf, xcos_sin_sse); + imdct512_window_delay_kni (buf, data, window, delay); +} + + +static void imdct_do_512_nol_kni (float data[], float delay[]) +{ + imdct512_pre_ifft_twiddle_kni (pm128, buf, data, xcos_sin_sse); + fft_128p_kni (buf); + imdct512_post_ifft_twiddle_kni (buf, xcos_sin_sse); + imdct512_window_delay_nol_kni (buf, data, window, delay); +} + +#endif diff --git a/ac3dec/imdct_kni.h b/ac3dec/imdct_kni.h new file mode 100644 index 00000000..2193c075 --- /dev/null +++ b/ac3dec/imdct_kni.h @@ -0,0 +1,36 @@ +/***** +* +* This file is part of the OMS program. +* +* This program is free software; you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation; either version 2, or (at your option) +* any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program; see the file COPYING. If not, write to +* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. +* +*****/ + +#ifndef __IMDCT_KNI_H__ +#define __IMDCT_KNI_H__ + +#include "cmplx.h" + +int imdct_init_kni (void); + +void fft_128p_kni(complex_t *); +void fft_64p_kni(complex_t *); + +void imdct512_pre_ifft_twiddle_kni(const int *pmt, complex_t *buf, float *data, float *xcos_sin_sse); +void imdct512_post_ifft_twiddle_kni(complex_t *buf, float *xcos_sin_sse); +void imdct512_window_delay_kni(complex_t *buf, float *data_ptr, float *window_prt, float *delay_prt); +void imdct512_window_delay_nol_kni(complex_t *buf, float *data_ptr, float *window_prt, float *delay_prt); + +#endif diff --git a/ac3dec/mm_accel.h b/ac3dec/mm_accel.h new file mode 100644 index 00000000..82dc5837 --- /dev/null +++ b/ac3dec/mm_accel.h @@ -0,0 +1,36 @@ +/***** +* +* This file is part of the OMS program. +* +* This program is free software; you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation; either version 2, or (at your option) +* any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program; see the file COPYING. If not, write to +* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. +* +*****/ + +#ifndef __MM_ACCEL_H__ +#define __MM_ACCEL_H__ + +#include <inttypes.h> + +// generic accelerations +#define MM_ACCEL_MLIB 0x00000001 + +// x86 accelerations +#define MM_ACCEL_X86_MMX 0x80000000 +#define MM_ACCEL_X86_3DNOW 0x40000000 +#define MM_ACCEL_X86_MMXEXT 0x20000000 + +uint32_t mm_accel (void); + +#endif diff --git a/ac3dec/parse.c b/ac3dec/parse.c index 3560bc5a..49ea57c6 100644 --- a/ac3dec/parse.c +++ b/ac3dec/parse.c @@ -31,15 +31,16 @@ #include "bitstream.h" #include "stats.h" #include "debug.h" +#include "crc.h" #include "parse.h" /* Misc LUT */ -static const uint_16 nfchans[8] = {2,1,2,3,3,4,4,5}; +static const uint16_t nfchans[8] = {2,1,2,3,3,4,4,5}; struct frmsize_s { - uint_16 bit_rate; - uint_16 frm_size[3]; + uint16_t bit_rate; + uint16_t frm_size[3]; }; static const struct frmsize_s frmsizecod_tbl[64] = @@ -85,8 +86,7 @@ static const struct frmsize_s frmsizecod_tbl[64] = }; /* Parse a syncinfo structure, minus the sync word */ -void -parse_syncinfo(syncinfo_t *syncinfo,uint_8 *data) +void parse_syncinfo(syncinfo_t *syncinfo, uint8_t *data) { // // We need to read in the entire syncinfo struct (0x0b77 + 24 bits) @@ -96,10 +96,8 @@ parse_syncinfo(syncinfo_t *syncinfo,uint_8 *data) // Get the sampling rate syncinfo->fscod = (data[2] >> 6) & 0x3; - if(syncinfo->fscod == 3) - { + if(syncinfo->fscod == 3) { //invalid sampling rate code - error_flag = 1; return; } else if(syncinfo->fscod == 2) @@ -119,15 +117,13 @@ parse_syncinfo(syncinfo_t *syncinfo,uint_8 *data) } -/* + +/** * This routine fills a bsi struct from the AC3 stream - */ + **/ -void -parse_bsi(bsi_t *bsi) +void parse_bsi(bsi_t *bsi) { - uint_32 i; - /* Check the AC-3 version number */ bsi->bsid = bitstream_get(5); @@ -159,25 +155,19 @@ parse_bsi(bsi_t *bsi) bsi->dialnorm = bitstream_get(5); /* Does compression gain exist? */ - bsi->compre = bitstream_get(1); - if (bsi->compre) - { + if ((bsi->compre = bitstream_get(1))) { /* Get compression gain */ bsi->compr = bitstream_get(8); } /* Does language code exist? */ - bsi->langcode = bitstream_get(1); - if (bsi->langcode) - { + if ((bsi->langcode = bitstream_get(1))) { /* Get langauge code */ bsi->langcod = bitstream_get(8); } /* Does audio production info exist? */ - bsi->audprodie = bitstream_get(1); - if (bsi->audprodie) - { + if ((bsi->audprodie = bitstream_get(1))) { /* Get mix level */ bsi->mixlevel = bitstream_get(5); @@ -186,31 +176,24 @@ parse_bsi(bsi_t *bsi) } /* If we're in dual mono mode then get some extra info */ - if (bsi->acmod ==0) - { + if (!bsi->acmod) { /* Get the dialogue normalization level two */ bsi->dialnorm2 = bitstream_get(5); /* Does compression gain two exist? */ - bsi->compr2e = bitstream_get(1); - if (bsi->compr2e) - { + if ((bsi->compr2e = bitstream_get(1))) { /* Get compression gain two */ bsi->compr2 = bitstream_get(8); } /* Does language code two exist? */ - bsi->langcod2e = bitstream_get(1); - if (bsi->langcod2e) - { + if ((bsi->langcod2e = bitstream_get(1))) { /* Get langauge code two */ bsi->langcod2 = bitstream_get(8); } /* Does audio production info two exist? */ - bsi->audprodi2e = bitstream_get(1); - if (bsi->audprodi2e) - { + if ((bsi->audprodi2e = bitstream_get(1))) { /* Get mix level two */ bsi->mixlevel2 = bitstream_get(5); @@ -226,22 +209,17 @@ parse_bsi(bsi_t *bsi) bsi->origbs = bitstream_get(1); /* Does timecode one exist? */ - bsi->timecod1e = bitstream_get(1); - - if(bsi->timecod1e) + if ((bsi->timecod1e = bitstream_get(1))) bsi->timecod1 = bitstream_get(14); /* Does timecode two exist? */ - bsi->timecod2e = bitstream_get(1); - - if(bsi->timecod2e) + if ((bsi->timecod2e = bitstream_get(1))) bsi->timecod2 = bitstream_get(14); /* Does addition info exist? */ - bsi->addbsie = bitstream_get(1); + if ((bsi->addbsie = bitstream_get(1))) { + uint32_t i; - if(bsi->addbsie) - { /* Get how much info is there */ bsi->addbsil = bitstream_get(6); @@ -253,52 +231,41 @@ parse_bsi(bsi_t *bsi) stats_print_bsi(bsi); } + /* More pain inducing parsing */ -void -parse_audblk(bsi_t *bsi,audblk_t *audblk) +void parse_audblk(bsi_t *bsi,audblk_t *audblk) { int i,j; - for (i=0;i < bsi->nfchans; i++) - { + for (i=0; i < bsi->nfchans; i++) { /* Is this channel an interleaved 256 + 256 block ? */ audblk->blksw[i] = bitstream_get(1); } - for (i=0;i < bsi->nfchans; i++) - { + for (i=0;i < bsi->nfchans; i++) { /* Should we dither this channel? */ audblk->dithflag[i] = bitstream_get(1); } /* Does dynamic range control exist? */ - audblk->dynrnge = bitstream_get(1); - if (audblk->dynrnge) - { + if ((audblk->dynrnge = bitstream_get(1))) { /* Get dynamic range info */ audblk->dynrng = bitstream_get(8); } /* If we're in dual mono mode then get the second channel DR info */ - if (bsi->acmod == 0) - { + if (bsi->acmod == 0) { /* Does dynamic range control two exist? */ - audblk->dynrng2e = bitstream_get(1); - if (audblk->dynrng2e) - { + if ((audblk->dynrng2e = bitstream_get(1))) { /* Get dynamic range info */ audblk->dynrng2 = bitstream_get(8); } } /* Does coupling strategy exist? */ - audblk->cplstre = bitstream_get(1); - if (audblk->cplstre) - { + if ((audblk->cplstre = bitstream_get(1))) { /* Is coupling turned on? */ - audblk->cplinu = bitstream_get(1); - if(audblk->cplinu) - { + if ((audblk->cplinu = bitstream_get(1))) { for(i=0;i < bsi->nfchans; i++) audblk->chincpl[i] = bitstream_get(1); if(bsi->acmod == 0x2) @@ -315,30 +282,23 @@ parse_audblk(bsi_t *bsi,audblk_t *audblk) * band */ audblk->ncplbnd = audblk->ncplsubnd; - for(i=1; i< audblk->ncplsubnd; i++) - { + for(i=1; i< audblk->ncplsubnd; i++) { audblk->cplbndstrc[i] = bitstream_get(1); audblk->ncplbnd -= audblk->cplbndstrc[i]; } } } - if(audblk->cplinu) - { + if(audblk->cplinu) { /* Loop through all the channels and get their coupling co-ords */ - for(i=0;i < bsi->nfchans;i++) - { + for(i=0;i < bsi->nfchans;i++) { if(!audblk->chincpl[i]) continue; /* Is there new coupling co-ordinate info? */ - audblk->cplcoe[i] = bitstream_get(1); - - if(audblk->cplcoe[i]) - { + if ((audblk->cplcoe[i] = bitstream_get(1))) { audblk->mstrcplco[i] = bitstream_get(2); - for(j=0;j < audblk->ncplbnd; j++) - { + for(j=0;j < audblk->ncplbnd; j++) { audblk->cplcoexp[i][j] = bitstream_get(4); audblk->cplcomant[i][j] = bitstream_get(4); } @@ -347,8 +307,7 @@ parse_audblk(bsi_t *bsi,audblk_t *audblk) /* If we're in dual mono mode, there's going to be some phase info */ if( (bsi->acmod == 0x2) && audblk->phsflginu && - (audblk->cplcoe[0] || audblk->cplcoe[1])) - { + (audblk->cplcoe[0] || audblk->cplcoe[1])) { for(j=0;j < audblk->ncplbnd; j++) audblk->phsflg[j] = bitstream_get(1); @@ -356,23 +315,17 @@ parse_audblk(bsi_t *bsi,audblk_t *audblk) } /* If we're in dual mono mode, there may be a rematrix strategy */ - if(bsi->acmod == 0x2) - { - audblk->rematstr = bitstream_get(1); - if(audblk->rematstr) - { - if (audblk->cplinu == 0) - { + if(bsi->acmod == 0x2) { + if ((audblk->rematstr = bitstream_get(1))) { + if (!audblk->cplinu) { for(i = 0; i < 4; i++) audblk->rematflg[i] = bitstream_get(1); } - if((audblk->cplbegf > 2) && audblk->cplinu) - { + if((audblk->cplbegf > 2) && audblk->cplinu) { for(i = 0; i < 4; i++) audblk->rematflg[i] = bitstream_get(1); } - if((audblk->cplbegf <= 2) && audblk->cplinu) - { + if((audblk->cplbegf <= 2) && audblk->cplinu) { for(i = 0; i < 3; i++) audblk->rematflg[i] = bitstream_get(1); } @@ -383,8 +336,7 @@ parse_audblk(bsi_t *bsi,audblk_t *audblk) } } - if (audblk->cplinu) - { + if (audblk->cplinu) { /* Get the coupling channel exponent strategy */ audblk->cplexpstr = bitstream_get(2); audblk->ncplgrps = (audblk->cplendmant - audblk->cplstrtmant) / @@ -399,18 +351,13 @@ parse_audblk(bsi_t *bsi,audblk_t *audblk) audblk->lfeexpstr = bitstream_get(1); /* Determine the bandwidths of all the fbw channels */ - for(i = 0; i < bsi->nfchans; i++) - { - uint_16 grp_size; - - if(audblk->chexpstr[i] != EXP_REUSE) - { - if (audblk->cplinu && audblk->chincpl[i]) - { + for(i = 0; i < bsi->nfchans; i++) { + uint16_t grp_size; + + if(audblk->chexpstr[i] != EXP_REUSE) { + if (audblk->cplinu && audblk->chincpl[i]) { audblk->endmant[i] = audblk->cplstrtmant; - } - else - { + } else { audblk->chbwcod[i] = bitstream_get(6); audblk->endmant[i] = ((audblk->chbwcod[i] + 12) * 3) + 37; } @@ -422,18 +369,15 @@ parse_audblk(bsi_t *bsi,audblk_t *audblk) } /* Get the coupling exponents if they exist */ - if(audblk->cplinu && (audblk->cplexpstr != EXP_REUSE)) - { + if(audblk->cplinu && (audblk->cplexpstr != EXP_REUSE)) { audblk->cplabsexp = bitstream_get(4); for(i=0;i< audblk->ncplgrps;i++) audblk->cplexps[i] = bitstream_get(7); } /* Get the fwb channel exponents */ - for(i=0;i < bsi->nfchans; i++) - { - if(audblk->chexpstr[i] != EXP_REUSE) - { + for(i=0;i < bsi->nfchans; i++) { + if(audblk->chexpstr[i] != EXP_REUSE) { audblk->exps[i][0] = bitstream_get(4); for(j=1;j<=audblk->nchgrps[i];j++) audblk->exps[i][j] = bitstream_get(7); @@ -442,8 +386,7 @@ parse_audblk(bsi_t *bsi,audblk_t *audblk) } /* Get the lfe channel exponents */ - if(bsi->lfeon && (audblk->lfeexpstr != EXP_REUSE)) - { + if(bsi->lfeon && (audblk->lfeexpstr != EXP_REUSE)) { audblk->lfeexps[0] = bitstream_get(4); audblk->lfeexps[1] = bitstream_get(7); audblk->lfeexps[2] = bitstream_get(7); @@ -452,8 +395,7 @@ parse_audblk(bsi_t *bsi,audblk_t *audblk) /* Get the parametric bit allocation parameters */ audblk->baie = bitstream_get(1); - if(audblk->baie) - { + if(audblk->baie) { audblk->sdcycod = bitstream_get(2); audblk->fdcycod = bitstream_get(2); audblk->sgaincod = bitstream_get(2); @@ -464,23 +406,19 @@ parse_audblk(bsi_t *bsi,audblk_t *audblk) /* Get the SNR off set info if it exists */ audblk->snroffste = bitstream_get(1); - if(audblk->snroffste) - { + if(audblk->snroffste) { audblk->csnroffst = bitstream_get(6); - if(audblk->cplinu) - { + if(audblk->cplinu) { audblk->cplfsnroffst = bitstream_get(4); audblk->cplfgaincod = bitstream_get(3); } - for(i = 0;i < bsi->nfchans; i++) - { + for(i = 0;i < bsi->nfchans; i++) { audblk->fsnroffst[i] = bitstream_get(4); audblk->fgaincod[i] = bitstream_get(3); } - if(bsi->lfeon) - { + if(bsi->lfeon) { audblk->lfefsnroffst = bitstream_get(4); audblk->lfefgaincod = bitstream_get(3); @@ -488,12 +426,10 @@ parse_audblk(bsi_t *bsi,audblk_t *audblk) } /* Get coupling leakage info if it exists */ - if(audblk->cplinu) - { + if(audblk->cplinu) { audblk->cplleake = bitstream_get(1); - if(audblk->cplleake) - { + if(audblk->cplleake) { audblk->cplfleak = bitstream_get(3); audblk->cplsleak = bitstream_get(3); } @@ -502,32 +438,26 @@ parse_audblk(bsi_t *bsi,audblk_t *audblk) /* Get the delta bit alloaction info */ audblk->deltbaie = bitstream_get(1); - if(audblk->deltbaie) - { + if(audblk->deltbaie) { if(audblk->cplinu) audblk->cpldeltbae = bitstream_get(2); for(i = 0;i < bsi->nfchans; i++) audblk->deltbae[i] = bitstream_get(2); - if (audblk->cplinu && (audblk->cpldeltbae == DELTA_BIT_NEW)) - { + if (audblk->cplinu && (audblk->cpldeltbae == DELTA_BIT_NEW)) { audblk->cpldeltnseg = bitstream_get(3); - for(i = 0;i < audblk->cpldeltnseg + 1; i++) - { + for(i = 0;i < audblk->cpldeltnseg + 1; i++) { audblk->cpldeltoffst[i] = bitstream_get(5); audblk->cpldeltlen[i] = bitstream_get(4); audblk->cpldeltba[i] = bitstream_get(3); } } - for(i = 0;i < bsi->nfchans; i++) - { - if (audblk->deltbae[i] == DELTA_BIT_NEW) - { + for(i = 0;i < bsi->nfchans; i++) { + if (audblk->deltbae[i] == DELTA_BIT_NEW) { audblk->deltnseg[i] = bitstream_get(3); - for(j = 0; j < audblk->deltnseg[i] + 1; j++) - { + for(j = 0; j < audblk->deltnseg[i] + 1; j++) { audblk->deltoffst[i][j] = bitstream_get(5); audblk->deltlen[i][j] = bitstream_get(4); audblk->deltba[i][j] = bitstream_get(3); @@ -537,61 +467,15 @@ parse_audblk(bsi_t *bsi,audblk_t *audblk) } /* Check to see if there's any dummy info to get */ - if((audblk->skiple = bitstream_get(1))) - { - uint_16 skip_data; + if((audblk->skiple = bitstream_get(1))) { + uint16_t skip_data; audblk->skipl = bitstream_get(9); - //XXX remove - //fprintf(stderr,"(parse) skipping %d bytes\n",audblk->skipl); - for(i = 0; i < audblk->skipl ; i++) - { + for (i = 0; i < audblk->skipl; i++) { skip_data = bitstream_get(8); - //XXX remove - //fprintf(stderr,"skipped data %2x\n",skip_data); - //if(skip_data != 0) - //{ - //dprintf("(parse) Invalid skipped data %2x\n",skip_data); - //exit(1); - //} } } stats_print_audblk(bsi,audblk); } - -void -parse_auxdata(syncinfo_t *syncinfo) -{ - //FIXME keep this now that we don't really need it? -#if 0 - int i; - int skip_length; - uint_16 crc; - uint_16 auxdatae; - - skip_length = (syncinfo->frame_size * 16) - bitstream_get_total_bits() - 17 - 1; - - //XXX remove - //dprintf("(auxdata) skipping %d auxbits\n",skip_length); - - for(i=0; i < skip_length; i++) - //printf("Skipped bit %i\n",(uint_16)bitstream_get(1)); - bitstream_get(1); - - //get the auxdata exists bit - auxdatae = bitstream_get(1); - - //XXX remove - //dprintf("auxdatae = %i\n",auxdatae); - - //Skip the CRC reserved bit - bitstream_get(1); - - //Get the crc - crc = bitstream_get(16); -#endif -} - - diff --git a/ac3dec/parse.h b/ac3dec/parse.h index cdaee66f..6264fea1 100644 --- a/ac3dec/parse.h +++ b/ac3dec/parse.h @@ -21,8 +21,6 @@ * */ -void parse_syncinfo(syncinfo_t *syncinfo,uint_8 *data); +void parse_syncinfo(syncinfo_t *syncinfo,uint8_t *data); void parse_audblk(bsi_t *bsi,audblk_t *audblk); void parse_bsi(bsi_t *bsi); -void parse_auxdata(syncinfo_t *syncinfo); - diff --git a/ac3dec/rematrix.c b/ac3dec/rematrix.c index caa70943..f1df19b1 100644 --- a/ac3dec/rematrix.c +++ b/ac3dec/rematrix.c @@ -28,52 +28,60 @@ #include "ac3_internal.h" -#include "decode.h" #include "rematrix.h" + struct rematrix_band_s { - uint_32 start; - uint_32 end; + uint32_t start; + uint32_t end; +} rematrix_band[] = { + {13, 24}, + {25, 36}, + {37, 60}, + {61, 252} }; -struct rematrix_band_s rematrix_band[] = { {13,24}, {25,36}, {37 ,60}, {61,252}}; -static inline uint_32 min(uint_32 a,uint_32 b); +/** + * + **/ -static inline uint_32 -min(uint_32 a,uint_32 b) +inline uint32_t min (uint32_t a, uint32_t b) { - return (a < b ? a : b); + return (a < b) ? a : b; } -/* This routine simply does stereo rematixing for the 2 channel - * stereo mode */ -void rematrix(audblk_t *audblk, stream_samples_t samples) + +/** + * This routine simply does stereo remartixing for the 2 channel + * stereo mode + **/ + +void rematrix (audblk_t *audblk, stream_samples_t samples) { - uint_32 num_bands; - uint_32 start; - uint_32 end; - uint_32 i,j; - float left,right; + uint32_t num_bands; + uint32_t start; + uint32_t end; + int i,j; - if(!audblk->cplinu || audblk->cplbegf > 2) + if (!audblk->cplinu || audblk->cplbegf > 2) num_bands = 4; else if (audblk->cplbegf > 0) num_bands = 3; else num_bands = 2; - for(i=0;i < num_bands; i++) - { - if(!audblk->rematflg[i]) + for (i=0; i < num_bands; i++) { + if (!audblk->rematflg[i]) continue; start = rematrix_band[i].start; - end = min(rematrix_band[i].end ,12 * audblk->cplbegf + 36); + end = min (rematrix_band[i].end ,12 * audblk->cplbegf + 36); - for(j=start;j < end; j++) - { + for (j=start;j < end; j++) { + float left,right; + left = samples[0][j] + samples[1][j]; right = samples[0][j] - samples[1][j]; samples[0][j] = left; diff --git a/ac3dec/sanity_check.c b/ac3dec/sanity_check.c index 461f20ea..d475de14 100644 --- a/ac3dec/sanity_check.c +++ b/ac3dec/sanity_check.c @@ -28,8 +28,11 @@ #include "sanity_check.h" -void -sanity_check_init(syncinfo_t *syncinfo, bsi_t *bsi, audblk_t *audblk) +/** + * + **/ + +void sanity_check_init(syncinfo_t *syncinfo, bsi_t *bsi, audblk_t *audblk) { syncinfo->magic = AC3_MAGIC_NUMBER; bsi->magic = AC3_MAGIC_NUMBER; @@ -38,94 +41,84 @@ sanity_check_init(syncinfo_t *syncinfo, bsi_t *bsi, audblk_t *audblk) audblk->magic3 = AC3_MAGIC_NUMBER; } -void -sanity_check(syncinfo_t *syncinfo, bsi_t *bsi, audblk_t *audblk) + +/** + * + **/ + +int sanity_check(syncinfo_t *syncinfo, bsi_t *bsi, audblk_t *audblk) { int i; - if(syncinfo->magic != AC3_MAGIC_NUMBER) - { + if(syncinfo->magic != AC3_MAGIC_NUMBER) { fprintf(stderr,"\n** Sanity check failed -- syncinfo magic number **"); - error_flag = 1; + return -1; } - if(bsi->magic != AC3_MAGIC_NUMBER) - { + if(bsi->magic != AC3_MAGIC_NUMBER) { fprintf(stderr,"\n** Sanity check failed -- bsi magic number **"); - error_flag = 1; + return -1; } - if(audblk->magic1 != AC3_MAGIC_NUMBER) - { + if(audblk->magic1 != AC3_MAGIC_NUMBER) { fprintf(stderr,"\n** Sanity check failed -- audblk magic number 1 **"); - error_flag = 1; + return -1; } - if(audblk->magic2 != AC3_MAGIC_NUMBER) - { + if(audblk->magic2 != AC3_MAGIC_NUMBER) { fprintf(stderr,"\n** Sanity check failed -- audblk magic number 2 **"); - error_flag = 1; + return -1; } - if(audblk->magic3 != AC3_MAGIC_NUMBER) - { + if(audblk->magic3 != AC3_MAGIC_NUMBER) { fprintf(stderr,"\n** Sanity check failed -- audblk magic number 3 **"); - error_flag = 1; + return -1; } - for(i = 0;i < 5 ; i++) - { + for(i = 0;i < 5 ; i++) { if (audblk->fbw_exp[i][255] !=0 || audblk->fbw_exp[i][254] !=0 || - audblk->fbw_exp[i][253] !=0) - { + audblk->fbw_exp[i][253] !=0) { fprintf(stderr,"\n** Sanity check failed -- fbw_exp out of bounds **"); - error_flag = 1; + return -1; } if (audblk->fbw_bap[i][255] !=0 || audblk->fbw_bap[i][254] !=0 || - audblk->fbw_bap[i][253] !=0) - { + audblk->fbw_bap[i][253] !=0) { fprintf(stderr,"\n** Sanity check failed -- fbw_bap out of bounds **"); - error_flag = 1; + return -1; } } if (audblk->cpl_exp[255] !=0 || audblk->cpl_exp[254] !=0 || - audblk->cpl_exp[253] !=0) - { + audblk->cpl_exp[253] !=0) { fprintf(stderr,"\n** Sanity check failed -- cpl_exp out of bounds **"); - error_flag = 1; + return -1; } if (audblk->cpl_bap[255] !=0 || audblk->cpl_bap[254] !=0 || - audblk->cpl_bap[253] !=0) - { + audblk->cpl_bap[253] !=0) { fprintf(stderr,"\n** Sanity check failed -- cpl_bap out of bounds **"); - error_flag = 1; + return -1; } - if (audblk->cplmant[255] !=0 || audblk->cplmant[254] !=0 || - audblk->cplmant[253] !=0) - { + if (audblk->cpl_flt[255] !=0 || audblk->cpl_flt[254] !=0 || + audblk->cpl_flt[253] !=0) { fprintf(stderr,"\n** Sanity check failed -- cpl_mant out of bounds **"); - error_flag = 1; + return -1; } - if ((audblk->cplinu == 1) && (audblk->cplbegf > (audblk->cplendf+2))) - { + if ((audblk->cplinu == 1) && (audblk->cplbegf > (audblk->cplendf+2))) { fprintf(stderr,"\n** Sanity check failed -- cpl params inconsistent **"); - error_flag = 1; + return -1; } - for(i=0; i < bsi->nfchans; i++) - { - if((audblk->chincpl[i] == 0) && (audblk->chbwcod[i] > 60)) - { + for(i=0; i < bsi->nfchans; i++) { + if((audblk->chincpl[i] == 0) && (audblk->chbwcod[i] > 60)) { fprintf(stderr,"\n** Sanity check failed -- chbwcod too big **"); - error_flag = 1; + return -1; } } - return; + return 0; } diff --git a/ac3dec/sanity_check.h b/ac3dec/sanity_check.h index c4ca63a2..ead9399c 100644 --- a/ac3dec/sanity_check.h +++ b/ac3dec/sanity_check.h @@ -23,5 +23,5 @@ #define AC3_MAGIC_NUMBER 0xdeadbeef -void sanity_check_init(syncinfo_t *syncinfo, bsi_t *bsi, audblk_t *audblk); -void sanity_check(syncinfo_t *syncinfo, bsi_t *bsi, audblk_t *audblk); +void sanity_check_init (syncinfo_t *syncinfo, bsi_t *bsi, audblk_t *audblk); +int sanity_check (syncinfo_t *syncinfo, bsi_t *bsi, audblk_t *audblk); diff --git a/ac3dec/srfft.c b/ac3dec/srfft.c new file mode 100644 index 00000000..4cbb6295 --- /dev/null +++ b/ac3dec/srfft.c @@ -0,0 +1,305 @@ +/* + * srfft.c + * + * Copyright (C) Yuqing Deng <Yuqing_Deng@brown.edu> - April 2000 + * + * 64 and 128 point split radix fft for ac3dec + * + * The algorithm is desribed in the book: + * "Computational Frameworks of the Fast Fourier Transform". + * + * The ideas and the the organization of code borrowed from djbfft written by + * D. J. Bernstein <djb@cr.py.to>. djbff can be found at + * http://cr.yp.to/djbfft.html. + * + * srfft.c is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * srfft.c is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + +#include <stdio.h> + +#include "srfft.h" +#include "srfftp.h" + +void fft_8 (complex_t *x); + +void fft_4(complex_t *x) +{ + /* delta_p = 1 here */ + /* x[k] = sum_{i=0..3} x[i] * w^{i*k}, w=e^{-2*pi/4} + */ + + register float yt_r, yt_i, yb_r, yb_i, u_r, u_i, vi_r, vi_i; + + yt_r = x[0].re; + yb_r = yt_r - x[2].re; + yt_r += x[2].re; + + u_r = x[1].re; + vi_i = x[3].re - u_r; + u_r += x[3].re; + + u_i = x[1].im; + vi_r = u_i - x[3].im; + u_i += x[3].im; + + yt_i = yt_r; + yt_i += u_r; + x[0].re = yt_i; + yt_r -= u_r; + x[2].re = yt_r; + yt_i = yb_r; + yt_i += vi_r; + x[1].re = yt_i; + yb_r -= vi_r; + x[3].re = yb_r; + + yt_i = x[0].im; + yb_i = yt_i - x[2].im; + yt_i += x[2].im; + + yt_r = yt_i; + yt_r += u_i; + x[0].im = yt_r; + yt_i -= u_i; + x[2].im = yt_i; + yt_r = yb_i; + yt_r += vi_i; + x[1].im = yt_r; + yb_i -= vi_i; + x[3].im = yb_i; +} + + +void fft_8 (complex_t *x) +{ + /* delta_p = diag{1, sqrt(i)} here */ + /* x[k] = sum_{i=0..7} x[i] * w^{i*k}, w=e^{-2*pi/8} + */ + register float wT1_r, wT1_i, wB1_r, wB1_i, wT2_r, wT2_i, wB2_r, wB2_i; + + wT1_r = x[1].re; + wT1_i = x[1].im; + wB1_r = x[3].re; + wB1_i = x[3].im; + + x[1] = x[2]; + x[2] = x[4]; + x[3] = x[6]; + fft_4(&x[0]); + + + /* x[0] x[4] */ + wT2_r = x[5].re; + wT2_r += x[7].re; + wT2_r += wT1_r; + wT2_r += wB1_r; + wT2_i = wT2_r; + wT2_r += x[0].re; + wT2_i = x[0].re - wT2_i; + x[0].re = wT2_r; + x[4].re = wT2_i; + + wT2_i = x[5].im; + wT2_i += x[7].im; + wT2_i += wT1_i; + wT2_i += wB1_i; + wT2_r = wT2_i; + wT2_r += x[0].im; + wT2_i = x[0].im - wT2_i; + x[0].im = wT2_r; + x[4].im = wT2_i; + + /* x[2] x[6] */ + wT2_r = x[5].im; + wT2_r -= x[7].im; + wT2_r += wT1_i; + wT2_r -= wB1_i; + wT2_i = wT2_r; + wT2_r += x[2].re; + wT2_i = x[2].re - wT2_i; + x[2].re = wT2_r; + x[6].re = wT2_i; + + wT2_i = x[5].re; + wT2_i -= x[7].re; + wT2_i += wT1_r; + wT2_i -= wB1_r; + wT2_r = wT2_i; + wT2_r += x[2].im; + wT2_i = x[2].im - wT2_i; + x[2].im = wT2_i; + x[6].im = wT2_r; + + + /* x[1] x[5] */ + wT2_r = wT1_r; + wT2_r += wB1_i; + wT2_r -= x[5].re; + wT2_r -= x[7].im; + wT2_i = wT1_i; + wT2_i -= wB1_r; + wT2_i -= x[5].im; + wT2_i += x[7].re; + + wB2_r = wT2_r; + wB2_r += wT2_i; + wT2_i -= wT2_r; + wB2_r *= HSQRT2; + wT2_i *= HSQRT2; + wT2_r = wB2_r; + wB2_r += x[1].re; + wT2_r = x[1].re - wT2_r; + + wB2_i = x[5].re; + x[1].re = wB2_r; + x[5].re = wT2_r; + + wT2_r = wT2_i; + wT2_r += x[1].im; + wT2_i = x[1].im - wT2_i; + wB2_r = x[5].im; + x[1].im = wT2_r; + x[5].im = wT2_i; + + /* x[3] x[7] */ + wT1_r -= wB1_i; + wT1_i += wB1_r; + wB1_r = wB2_i - x[7].im; + wB1_i = wB2_r + x[7].re; + wT1_r -= wB1_r; + wT1_i -= wB1_i; + wB1_r = wT1_r + wT1_i; + wB1_r *= HSQRT2; + wT1_i -= wT1_r; + wT1_i *= HSQRT2; + wB2_r = x[3].re; + wB2_i = wB2_r + wT1_i; + wB2_r -= wT1_i; + x[3].re = wB2_i; + x[7].re = wB2_r; + wB2_i = x[3].im; + wB2_r = wB2_i + wB1_r; + wB2_i -= wB1_r; + x[3].im = wB2_i; + x[7].im = wB2_r; +} + + +void fft_asmb(int k, complex_t *x, complex_t *wTB, + const complex_t *d, const complex_t *d_3) +{ + register complex_t *x2k, *x3k, *x4k, *wB; + register float a_r, a_i, a1_r, a1_i, u_r, u_i, v_r, v_i; + + x2k = x + 2 * k; + x3k = x2k + 2 * k; + x4k = x3k + 2 * k; + wB = wTB + 2 * k; + + TRANSZERO(x[0],x2k[0],x3k[0],x4k[0]); + TRANS(x[1],x2k[1],x3k[1],x4k[1],wTB[1],wB[1],d[1],d_3[1]); + + --k; + for(;;) { + TRANS(x[2],x2k[2],x3k[2],x4k[2],wTB[2],wB[2],d[2],d_3[2]); + TRANS(x[3],x2k[3],x3k[3],x4k[3],wTB[3],wB[3],d[3],d_3[3]); + if (!--k) break; + x += 2; + x2k += 2; + x3k += 2; + x4k += 2; + d += 2; + d_3 += 2; + wTB += 2; + wB += 2; + } + +} + +void fft_asmb16(complex_t *x, complex_t *wTB) +{ + register float a_r, a_i, a1_r, a1_i, u_r, u_i, v_r, v_i; + int k = 2; + + /* transform x[0], x[8], x[4], x[12] */ + TRANSZERO(x[0],x[4],x[8],x[12]); + + /* transform x[1], x[9], x[5], x[13] */ + TRANS(x[1],x[5],x[9],x[13],wTB[1],wTB[5],delta16[1],delta16_3[1]); + + /* transform x[2], x[10], x[6], x[14] */ + TRANSHALF_16(x[2],x[6],x[10],x[14]); + + /* transform x[3], x[11], x[7], x[15] */ + TRANS(x[3],x[7],x[11],x[15],wTB[3],wTB[7],delta16[3],delta16_3[3]); + +} + + +void fft_64p_c (complex_t *a) +{ + fft_8(&a[0]); fft_4(&a[8]); fft_4(&a[12]); + fft_asmb16(&a[0], &a[8]); + + fft_8(&a[16]), fft_8(&a[24]); + fft_asmb(4, &a[0], &a[16],&delta32[0], &delta32_3[0]); + + fft_8(&a[32]); fft_4(&a[40]); fft_4(&a[44]); + fft_asmb16(&a[32], &a[40]); + + fft_8(&a[48]); fft_4(&a[56]); fft_4(&a[60]); + fft_asmb16(&a[48], &a[56]); + + fft_asmb(8, &a[0], &a[32],&delta64[0], &delta64_3[0]); +} + + +void fft_128p_c (complex_t *a) +{ + fft_8(&a[0]); fft_4(&a[8]); fft_4(&a[12]); + fft_asmb16(&a[0], &a[8]); + + fft_8(&a[16]), fft_8(&a[24]); + fft_asmb(4, &a[0], &a[16],&delta32[0], &delta32_3[0]); + + fft_8(&a[32]); fft_4(&a[40]); fft_4(&a[44]); + fft_asmb16(&a[32], &a[40]); + + fft_8(&a[48]); fft_4(&a[56]); fft_4(&a[60]); + fft_asmb16(&a[48], &a[56]); + + fft_asmb(8, &a[0], &a[32],&delta64[0], &delta64_3[0]); + + fft_8(&a[64]); fft_4(&a[72]); fft_4(&a[76]); + /* fft_16(&a[64]); */ + fft_asmb16(&a[64], &a[72]); + + fft_8(&a[80]); fft_8(&a[88]); + + /* fft_32(&a[64]); */ + fft_asmb(4, &a[64], &a[80],&delta32[0], &delta32_3[0]); + + fft_8(&a[96]); fft_4(&a[104]), fft_4(&a[108]); + /* fft_16(&a[96]); */ + fft_asmb16(&a[96], &a[104]); + + fft_8(&a[112]), fft_8(&a[120]); + /* fft_32(&a[96]); */ + fft_asmb(4, &a[96], &a[112], &delta32[0], &delta32_3[0]); + + /* fft_128(&a[0]); */ + fft_asmb(16, &a[0], &a[64], &delta128[0], &delta128_3[0]); +} diff --git a/ac3dec/srfft.h b/ac3dec/srfft.h new file mode 100644 index 00000000..7916092a --- /dev/null +++ b/ac3dec/srfft.h @@ -0,0 +1,39 @@ +/* + * srfft.h + * + * Copyright (C) Yuqing Deng <Yuqing_Deng@brown.edu> - April 2000 + * + * 64 and 128 point split radix fft for ac3dec + * + * The algorithm is desribed in the book: + * "Computational Frameworks of the Fast Fourier Transform". + * + * The ideas and the the organization of code borrowed from djbfft written by + * D. J. Bernstein <djb@cr.py.to>. djbff can be found at + * http://cr.yp.to/djbfft.html. + * + * srfft.h is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * srfft.h is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + +#ifndef SRFFT_H__ +#define SRFFT_H__ + +#include "cmplx.h" + +void fft_64p_c (complex_t *x); +void fft_128p_c (complex_t *x); + +#endif /* SRFFT_H__ */ diff --git a/ac3dec/srfft_kni.S b/ac3dec/srfft_kni.S new file mode 100644 index 00000000..a42a41b1 --- /dev/null +++ b/ac3dec/srfft_kni.S @@ -0,0 +1,289 @@ +/* + * srfft_kni.S + * + * Copyright (C) Yuqing Deng <Yuqing_Deng@brown.edu> - October 2000 + * + * + * srfft_kni.S is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * srfft_kni.S is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + +#ifdef __i386__ + +.section .rodata + .align 16 +hsqrt2: .float 0f0.707106781188 + .float 0f0.707106781188 + .float 0f-0.707106781188 + .float 0f-0.707106781188 +C_1: .float 0f-1.0 + .float 0f1.0 + .float 0f-1.0 + .float 0f1.0 + +.text + .align 4 +.global fft_4_kni + .type fft_4_kni, @function +fft_4_kni: + pushl %ebp + movl %esp, %ebp + movl 8(%ebp), %eax /* complex_t * */ + + movaps (%eax), %xmm0 /* x[1] | x[0] */ + movaps 16(%eax), %xmm2 /* x[3] | x[2] */ + movaps %xmm0, %xmm1 /* x[1] | x[0] */ + addps %xmm2, %xmm0 /* x[1] + x[3] | x[0] + x[2] */ + subps %xmm2, %xmm1 /* x[1] - x[3] | x[0] - x[2] */ + xorps %xmm6, %xmm6 + movhlps %xmm1, %xmm4 /* x[1] - x[3] */ + movhlps %xmm0, %xmm3 /* x[1] + x[3] */ + subss %xmm4, %xmm6 /* -(x[1] - x[3]).re */ + movlhps %xmm1, %xmm0 /* x[0] - x[2] | x[0] + x[2] */ + movss %xmm6, %xmm4 /* (x[1] - x[3]).im | (x[3]-x[1]).re */ + movaps %xmm0, %xmm2 /* x[0] - x[2] | x[0] + x[2] */ + shufps $0x14, %xmm4, %xmm3 /* -i*(x[2] - x[3] | x[2] + x[3] */ + addps %xmm3, %xmm0 + subps %xmm3, %xmm2 + movaps %xmm0, (%eax) + movaps %xmm2, 16(%eax) + + leave + ret + + + .align 4 +.global fft_8_kni + .type fft_8_kni, @function +fft_8_kni: + pushl %ebp + movl %esp, %ebp + movl 8(%ebp), %eax /* complext_t */ + + pushl %ebx + movlps (%eax), %xmm0 /* x[0] */ + movlps 32(%eax), %xmm1 /* x[4] */ + movhps 16(%eax), %xmm0 /* x[2] | x[0] */ + movhps 48(%eax), %xmm1 /* x[6] | x[4] */ + movaps %xmm0, %xmm2 /* x[2] | x[0] */ + xorps %xmm3, %xmm3 + addps %xmm1, %xmm0 /* x[2] + x[6] | x[0] + x[4] */ + subps %xmm1, %xmm2 /* x[2] - x[6] | x[0] - x[4] */ + movhlps %xmm0, %xmm5 /* x[2] + x[6] */ + movhlps %xmm2, %xmm4 + movlhps %xmm2, %xmm0 /* x[0] - x[4] | x[0] + x[4] */ + subss %xmm4, %xmm3 /* -(x[2]-x[6]).re */ + movaps %xmm0, %xmm7 /* x[0] - x[4] | x[0] + x[4] */ + movss %xmm3, %xmm4 /* (x[2]-x[6]).im | -(x[2]-x[6]).re */ + movlps 8(%eax), %xmm1 /* x[1] */ + shufps $0x14, %xmm4, %xmm5 /* -i*(x[2] - x[6]) | x[2] + x[6] */ + + addps %xmm5, %xmm0 /* yt */ + subps %xmm5, %xmm7 /* yb */ + + movhps 24(%eax), %xmm1 /* x[3] | x[1] */ + movl $hsqrt2, %ebx + movlps 40(%eax), %xmm2 /* x[5] */ + movhps 56(%eax), %xmm2 /* /x[7] | x[5] */ + movaps %xmm1, %xmm3 /* x[3] | x[1] */ + addps %xmm2, %xmm1 /* x[3] + x[7] | x[1] + x[5] */ + subps %xmm2, %xmm3 /* x[3] - x[7] | x[1] - x[5] */ + movaps (%ebx), %xmm4 /* -1/sqrt2 | -1/sqrt2 | 1/sqrt2 | 1/sqrt2 */ + movaps %xmm3, %xmm6 /* x[3] - x[7] | x[1] - x[5] */ + mulps %xmm4, %xmm3 + shufps $0xc8, %xmm4, %xmm4 /* -1/sqrt2 | 1/sqrt2 | -1/sqrt2 | 1/sqrt2 */ + shufps $0xb1, %xmm6, %xmm6 + mulps %xmm4, %xmm6 + addps %xmm3, %xmm6 /* (-1-i)/sqrt2 * (x[3]-x[7]) | (1-i)/sqrt2 * (x[1] - x[5] */ + movhlps %xmm1, %xmm5 /* x[3] + x[7] */ + movlhps %xmm6, %xmm1 /* (1+i)/sqrt2 * (x[1]-x[5]) | x[1]+x[5] */ + shufps $0xe4, %xmm6, %xmm5 /* (-1-i)/sqrt2 * (x[3]-x[7]) | x[3]+x[7] */ + movaps %xmm1, %xmm3 /* (1-i)/sqrt2 * (x[1]-x[5]) | x[1]+x[5] */ + movl $C_1, %ebx + addps %xmm5, %xmm1 /* u */ + subps %xmm5, %xmm3 /* v */ + movaps %xmm0, %xmm2 /* yb */ + movaps %xmm7, %xmm4 /* yt */ + movaps (%ebx), %xmm5 + mulps %xmm5, %xmm3 + addps %xmm1, %xmm0 /* yt + u */ + subps %xmm1, %xmm2 /* yt - u */ + shufps $0xb1, %xmm3, %xmm3 /* -i * v */ + movaps %xmm0, (%eax) + movaps %xmm2, 32(%eax) + addps %xmm3, %xmm4 /* yb - i*v */ + subps %xmm3, %xmm7 /* yb + i*v */ + movaps %xmm4, 16(%eax) + movaps %xmm7, 48(%eax) + + popl %ebx + leave + ret + + .align 4 +.global fft_asmb_kni + .type fft_asmb, @function +fft_asmb_kni: + pushl %ebp + movl %esp, %ebp + + subl $4, %esp + + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + pushl %esi + pushl %edi + + movl 8(%ebp), %ecx /* k */ + movl 12(%ebp), %eax /* x */ + movl %ecx, -4(%ebp) /* k */ + movl 16(%ebp), %ebx /* wT */ + movl 20(%ebp), %edx /* d */ + movl 24(%ebp), %esi /* d3 */ + shll $4, %ecx /* 16k */ + addl $8, %edx + leal (%eax, %ecx, 2), %edi + addl $8, %esi + + /* TRANSZERO and TRANS */ + movaps (%eax), %xmm0 /* x[1] | x[0] */ + movaps (%ebx), %xmm1 /* wT[1] | wT[0] */ + movaps (%ebx, %ecx), %xmm2 /* wB[1] | wB[0] */ + movlps (%edx), %xmm3 /* d */ + movlps (%esi), %xmm4 /* d3 */ + movhlps %xmm1, %xmm5 /* wT[1] */ + movhlps %xmm2, %xmm6 /* wB[1] */ + shufps $0x50, %xmm3, %xmm3 /* d[1].im | d[1].im | d[1].re | d[1].re */ + shufps $0x50, %xmm4, %xmm4 /* d3[1].im | d3[1].im | d3[i].re | d3[i].re */ + movlhps %xmm5, %xmm5 /* wT[1] | wT[1] */ + movlhps %xmm6, %xmm6 /* wB[1] | wB[1] */ + mulps %xmm3, %xmm5 + mulps %xmm4, %xmm6 + movhlps %xmm5, %xmm7 /* wT[1].im * d[1].im | wT[1].re * d[1].im */ + movlhps %xmm6, %xmm5 /* wB[1].im * d3[1].re | wB[1].re * d3[1].re | wT[1].im * d[1].re | wT[1].re * d[1].re */ + shufps $0xb1, %xmm6, %xmm7 /* wB[1].re * d3[1].im | wB[i].im * d3[1].im | wT[1].re * d[1].im | wT[1].im * d[1].im */ + movl $C_1, %edi + movaps (%edi), %xmm4 + mulps %xmm4, %xmm7 + addps %xmm7, %xmm5 /* wB[1] * d3[1] | wT[1] * d[1] */ + movlhps %xmm5, %xmm1 /* d[1] * wT[1] | wT[0] */ + shufps $0xe4, %xmm5, %xmm2 /* d3[1] * wB[1] | wB[0] */ + movaps %xmm1, %xmm3 /* d[1] * wT[1] | wT[0] */ + leal (%eax, %ecx, 2), %edi + addps %xmm2, %xmm1 /* u */ + subps %xmm2, %xmm3 /* v */ + mulps %xmm4, %xmm3 + movaps (%eax, %ecx), %xmm5 /* xk[1] | xk[0] */ + shufps $0xb1, %xmm3, %xmm3 /* -i * v */ + movaps %xmm0, %xmm2 /* x[1] | x[0] */ + movaps %xmm5, %xmm6 /* xk[1] | xk[0] */ + addps %xmm1, %xmm0 + subps %xmm1, %xmm2 + addps %xmm3, %xmm5 + subps %xmm3, %xmm6 + movaps %xmm0, (%eax) + movaps %xmm2, (%edi) + movaps %xmm5, (%eax, %ecx) + movaps %xmm6, (%edi, %ecx) + addl $16, %eax + addl $16, %ebx + addl $8, %edx + addl $8, %esi + decl -4(%ebp) + +.loop: + movaps (%ebx), %xmm0 /* wT[1] | wT[0] */ + movaps (%edx), %xmm1 /* d[1] | d[0] */ + + movaps (%ebx, %ecx), %xmm4 /* wB[1] | wB[0] */ + movaps (%esi), %xmm5 /* d3[1] | d3[0] */ + + movhlps %xmm0, %xmm2 /* wT[1] */ + movhlps %xmm1, %xmm3 /* d[1] */ + + movhlps %xmm4, %xmm6 /* wB[1] */ + movhlps %xmm5, %xmm7 /* d3[1] */ + + shufps $0x50, %xmm1, %xmm1 /* d[0].im | d[0].im | d[0].re | d[0].re */ + shufps $0x50, %xmm3, %xmm3 /* d[1].im | d[1].im | d[1].re | d[1].re */ + + movlhps %xmm0, %xmm0 /* wT[0] | wT[0] */ + shufps $0x50, %xmm5, %xmm5 /* d3[0].im | d3[0].im | d3[0].re | d3[0].re */ + movlhps %xmm2, %xmm2 /* wT[1] | wT[1] */ + shufps $0x50, %xmm7, %xmm7 /* d3[1].im | d3[1].im | d3[1].re | d3[1].re */ + + mulps %xmm1, %xmm0 /* d[0].im * wT[0].im | d[0].im * wT[0].re | d[0].re * wT[0].im | d[0].re * wT[0].re */ + mulps %xmm3, %xmm2 /* d[1].im * wT[1].im | d[1].im * wT[1].re | d[1].re * wT[1].im | d[1].re * wT[1].re */ + movlhps %xmm4, %xmm4 /* wB[0] | wB[0] */ + movlhps %xmm6, %xmm6 /* wB[1] | wB[1] */ + + movhlps %xmm0, %xmm1 /* d[0].im * wT[0].im | d[0].im * wT[0].re */ + movlhps %xmm2, %xmm0 /* d[1].re * wT[1].im | d[1].re * wT[1].re | d[0].re * wT[0].im | d[0].re * wT[0].re */ + mulps %xmm5, %xmm4 /* wB[0].im * d3[0].im | wB[0].re * d3[0].im | wB[0].im * d3[0].re | wB[0].re * d3[0].re */ + mulps %xmm7, %xmm6 /* wB[1].im * d3[1].im | wB[1].re * d3[1].im | wB[1].im * d3[1].re | wB[1].re * d3[1].re */ + shufps $0xb1, %xmm2, %xmm1 /* d[1].im * wT[1].re | d[1].im * wT[1].im | d[0].im * wT[0].re | d[0].im * wT[0].im */ + movl $C_1, %edi + movaps (%edi), %xmm3 /* 1.0 | -1.0 | 1.0 | -1.0 */ + + movhlps %xmm4, %xmm5 /* wB[0].im * d3[0].im | wB[0].re * d3[0].im */ + mulps %xmm3, %xmm1 /* d[1].im * wT[1].re | -d[1].im * wT[1].im | d[0].im * wT[0].re | -d[0].im * wT[0].im */ + movlhps %xmm6, %xmm4 /* wB[1].im * d3[1].re | wB[1].re * d3[1].re | wB[0].im * d3[0].re | wB[0].im * d3[0].re */ + addps %xmm1, %xmm0 /* wT[1] * d[1] | wT[0] * d[0] */ + + shufps $0xb1, %xmm6, %xmm5 /* wB[1].re * d3[1].im | wB[1].im * d3[1].im | wB[0].re * d3[0].im | wB[0].im * d3[0].im */ + mulps %xmm3, %xmm5 /* wB[1].re * d3[1].im | -wB[1].im * d3[1].im | wB[0].re * d3[0].im | -wB[0].im * d3[0].im */ + addps %xmm5, %xmm4 /* wB[1] * d3[1] | wB[0] * d3[0] */ + + movaps %xmm0, %xmm1 /* wT[1] * d[1] | wT[0] * d[0] */ + addps %xmm4, %xmm0 /* u */ + subps %xmm4, %xmm1 /* v */ + movaps (%eax), %xmm6 /* x[1] | x[0] */ + leal (%eax, %ecx, 2), %edi + mulps %xmm3, %xmm1 + addl $16, %ebx + addl $16, %esi + shufps $0xb1, %xmm1, %xmm1 /* -i * v */ + movaps (%eax, %ecx), %xmm7 /* xk[1] | xk[0] */ + movaps %xmm6, %xmm2 + movaps %xmm7, %xmm4 + addps %xmm0, %xmm6 + subps %xmm0, %xmm2 + movaps %xmm6, (%eax) + movaps %xmm2, (%edi) + addps %xmm1, %xmm7 + subps %xmm1, %xmm4 + addl $16, %edx + movaps %xmm7, (%eax, %ecx) + movaps %xmm4, (%edi, %ecx) + + addl $16, %eax + decl -4(%ebp) + jnz .loop + +.end: + popl %edi + popl %esi + popl %edx + popl %ecx + popl %ebx + popl %eax + + addl $4, %esp + + leave + ret +#endif diff --git a/ac3dec/srfft_kni.h b/ac3dec/srfft_kni.h new file mode 100644 index 00000000..6dc468e3 --- /dev/null +++ b/ac3dec/srfft_kni.h @@ -0,0 +1,30 @@ +/***** +* +* This file is part of the OMS program. +* +* This program is free software; you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation; either version 2, or (at your option) +* any later version. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* You should have received a copy of the GNU General Public License +* along with this program; see the file COPYING. If not, write to +* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. +* +*****/ + +#ifndef __SRFF_KNI__ +#define __SRFF_KNI__ + +#include "cmplx.h" + +void fft_4_kni (complex_t *a); +void fft_8_kni (complex_t *a); +void fft_asmb_kni (int, complex_t*, complex_t *, complex_t *, complex_t*); + +#endif diff --git a/ac3dec/srfft_kni_c.c b/ac3dec/srfft_kni_c.c new file mode 100644 index 00000000..d461110f --- /dev/null +++ b/ac3dec/srfft_kni_c.c @@ -0,0 +1,93 @@ +/* + * srfft_kni.c + * + * Copyright (C) Yuqing Deng <Yuqing_Deng@brown.edu> - April 2000 + * + * 64 and 128 point split radix fft for ac3dec + * + * The algorithm is desribed in the book: + * "Computational Frameworks of the Fast Fourier Transform". + * + * The ideas and the the organization of code borrowed from djbfft written by + * D. J. Bernstein <djb@cr.py.to>. djbff can be found at + * http://cr.yp.to/djbfft.html. + * + * srfft.c is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * srfft.c is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + +#ifdef __i386__ + +#include <stdio.h> + +#include "srfft_kni.h" +#include "srfftp.h" + +void fft_64p_kni(complex_t *a) +{ + fft_8_kni(&a[0]); fft_4_kni(&a[8]); fft_4_kni(&a[12]); + fft_asmb_kni(2, &a[0], &a[8], &delta16[0], &delta16_3[0]); + + fft_8_kni(&a[16]), fft_8_kni(&a[24]); + fft_asmb_kni(4, &a[0], &a[16],&delta32[0], &delta32_3[0]); + + fft_8_kni(&a[32]); fft_4_kni(&a[40]); fft_4_kni(&a[44]); + fft_asmb_kni(2, &a[32], &a[40], &delta16[0], &delta16_3[0]); + + fft_8_kni(&a[48]); fft_4_kni(&a[56]); fft_4_kni(&a[60]); + fft_asmb_kni(2, &a[48], &a[56], &delta16[0], &delta16_3[0]); + + fft_asmb_kni(8, &a[0], &a[32],&delta64[0], &delta64_3[0]); +} + + +void fft_128p_kni(complex_t *a) +{ + fft_8_kni(&a[0]); fft_4_kni(&a[8]); fft_4_kni(&a[12]); + fft_asmb_kni(2, &a[0], &a[8], &delta16[0], &delta16_3[0]); + + fft_8_kni(&a[16]), fft_8_kni(&a[24]); + fft_asmb_kni(4, &a[0], &a[16],&delta32[0], &delta32_3[0]); + + fft_8_kni(&a[32]); fft_4_kni(&a[40]); fft_4_kni(&a[44]); + fft_asmb_kni(2, &a[32], &a[40], &delta16[0], &delta16_3[0]); + + fft_8_kni(&a[48]); fft_4_kni(&a[56]); fft_4_kni(&a[60]); + fft_asmb_kni(2, &a[48], &a[56], &delta16[0], &delta16_3[0]); + + fft_asmb_kni(8, &a[0], &a[32],&delta64[0], &delta64_3[0]); + + fft_8_kni(&a[64]); fft_4_kni(&a[72]); fft_4_kni(&a[76]); + /* fft_16(&a[64]); */ + fft_asmb_kni(2, &a[64], &a[72], &delta16[0], &delta16_3[0]); + + fft_8_kni(&a[80]); fft_8_kni(&a[88]); + + /* fft_32(&a[64]); */ + fft_asmb_kni(4, &a[64], &a[80],&delta32[0], &delta32_3[0]); + + fft_8_kni(&a[96]); fft_4_kni(&a[104]), fft_4_kni(&a[108]); + /* fft_16(&a[96]); */ + fft_asmb_kni(2, &a[96], &a[104], &delta16[0], &delta16_3[0]); + + fft_8_kni(&a[112]), fft_8_kni(&a[120]); + /* fft_32(&a[96]); */ + fft_asmb_kni(4, &a[96], &a[112], &delta32[0], &delta32_3[0]); + + /* fft_128(&a[0]); */ + fft_asmb_kni(16, &a[0], &a[64], &delta128[0], &delta128_3[0]); +} + +#endif diff --git a/ac3dec/srfftp.h b/ac3dec/srfftp.h new file mode 100644 index 00000000..6f447153 --- /dev/null +++ b/ac3dec/srfftp.h @@ -0,0 +1,305 @@ + +/* + * srfftp.h + * + * Copyright (C) Yuqing Deng <Yuqing_Deng@brown.edu> - April 2000 + * + * 64 and 128 point split radix fft for ac3dec + * + * The algorithm is desribed in the book: + * "Computational Frameworks of the Fast Fourier Transform". + * + * The ideas and the the organization of code borrowed from djbfft written by + * D. J. Bernstein <djb@cr.py.to>. djbff can be found at + * http://cr.yp.to/djbfft.html. + * + * srfftp.h is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * srfftp.h is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU Make; see the file COPYING. If not, write to + * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + * + */ + +#ifndef SRFFTP_H__ +#define SRFFTP_H__ + +#include "cmplx.h" + +static complex_t delta16[4] = + { {1.00000000000000, 0.00000000000000}, + {0.92387953251129, -0.38268343236509}, + {0.70710678118655, -0.70710678118655}, + {0.38268343236509, -0.92387953251129}}; + +static complex_t delta16_3[4] = + { {1.00000000000000, 0.00000000000000}, + {0.38268343236509, -0.92387953251129}, + {-0.70710678118655, -0.70710678118655}, + {-0.92387953251129, 0.38268343236509}}; + +static complex_t delta32[8] = + { {1.00000000000000, 0.00000000000000}, + {0.98078528040323, -0.19509032201613}, + {0.92387953251129, -0.38268343236509}, + {0.83146961230255, -0.55557023301960}, + {0.70710678118655, -0.70710678118655}, + {0.55557023301960, -0.83146961230255}, + {0.38268343236509, -0.92387953251129}, + {0.19509032201613, -0.98078528040323}}; + +static complex_t delta32_3[8] = + { {1.00000000000000, 0.00000000000000}, + {0.83146961230255, -0.55557023301960}, + {0.38268343236509, -0.92387953251129}, + {-0.19509032201613, -0.98078528040323}, + {-0.70710678118655, -0.70710678118655}, + {-0.98078528040323, -0.19509032201613}, + {-0.92387953251129, 0.38268343236509}, + {-0.55557023301960, 0.83146961230255}}; + +static complex_t delta64[16] = + { {1.00000000000000, 0.00000000000000}, + {0.99518472667220, -0.09801714032956}, + {0.98078528040323, -0.19509032201613}, + {0.95694033573221, -0.29028467725446}, + {0.92387953251129, -0.38268343236509}, + {0.88192126434836, -0.47139673682600}, + {0.83146961230255, -0.55557023301960}, + {0.77301045336274, -0.63439328416365}, + {0.70710678118655, -0.70710678118655}, + {0.63439328416365, -0.77301045336274}, + {0.55557023301960, -0.83146961230255}, + {0.47139673682600, -0.88192126434835}, + {0.38268343236509, -0.92387953251129}, + {0.29028467725446, -0.95694033573221}, + {0.19509032201613, -0.98078528040323}, + {0.09801714032956, -0.99518472667220}}; + +static complex_t delta64_3[16] = + { {1.00000000000000, 0.00000000000000}, + {0.95694033573221, -0.29028467725446}, + {0.83146961230255, -0.55557023301960}, + {0.63439328416365, -0.77301045336274}, + {0.38268343236509, -0.92387953251129}, + {0.09801714032956, -0.99518472667220}, + {-0.19509032201613, -0.98078528040323}, + {-0.47139673682600, -0.88192126434836}, + {-0.70710678118655, -0.70710678118655}, + {-0.88192126434835, -0.47139673682600}, + {-0.98078528040323, -0.19509032201613}, + {-0.99518472667220, 0.09801714032956}, + {-0.92387953251129, 0.38268343236509}, + {-0.77301045336274, 0.63439328416365}, + {-0.55557023301960, 0.83146961230255}, + {-0.29028467725446, 0.95694033573221}}; + +static complex_t delta128[32] = + { {1.00000000000000, 0.00000000000000}, + {0.99879545620517, -0.04906767432742}, + {0.99518472667220, -0.09801714032956}, + {0.98917650996478, -0.14673047445536}, + {0.98078528040323, -0.19509032201613}, + {0.97003125319454, -0.24298017990326}, + {0.95694033573221, -0.29028467725446}, + {0.94154406518302, -0.33688985339222}, + {0.92387953251129, -0.38268343236509}, + {0.90398929312344, -0.42755509343028}, + {0.88192126434836, -0.47139673682600}, + {0.85772861000027, -0.51410274419322}, + {0.83146961230255, -0.55557023301960}, + {0.80320753148064, -0.59569930449243}, + {0.77301045336274, -0.63439328416365}, + {0.74095112535496, -0.67155895484702}, + {0.70710678118655, -0.70710678118655}, + {0.67155895484702, -0.74095112535496}, + {0.63439328416365, -0.77301045336274}, + {0.59569930449243, -0.80320753148064}, + {0.55557023301960, -0.83146961230255}, + {0.51410274419322, -0.85772861000027}, + {0.47139673682600, -0.88192126434835}, + {0.42755509343028, -0.90398929312344}, + {0.38268343236509, -0.92387953251129}, + {0.33688985339222, -0.94154406518302}, + {0.29028467725446, -0.95694033573221}, + {0.24298017990326, -0.97003125319454}, + {0.19509032201613, -0.98078528040323}, + {0.14673047445536, -0.98917650996478}, + {0.09801714032956, -0.99518472667220}, + {0.04906767432742, -0.99879545620517}}; + +static complex_t delta128_3[32] = + { {1.00000000000000, 0.00000000000000}, + {0.98917650996478, -0.14673047445536}, + {0.95694033573221, -0.29028467725446}, + {0.90398929312344, -0.42755509343028}, + {0.83146961230255, -0.55557023301960}, + {0.74095112535496, -0.67155895484702}, + {0.63439328416365, -0.77301045336274}, + {0.51410274419322, -0.85772861000027}, + {0.38268343236509, -0.92387953251129}, + {0.24298017990326, -0.97003125319454}, + {0.09801714032956, -0.99518472667220}, + {-0.04906767432742, -0.99879545620517}, + {-0.19509032201613, -0.98078528040323}, + {-0.33688985339222, -0.94154406518302}, + {-0.47139673682600, -0.88192126434836}, + {-0.59569930449243, -0.80320753148065}, + {-0.70710678118655, -0.70710678118655}, + {-0.80320753148065, -0.59569930449243}, + {-0.88192126434835, -0.47139673682600}, + {-0.94154406518302, -0.33688985339222}, + {-0.98078528040323, -0.19509032201613}, + {-0.99879545620517, -0.04906767432742}, + {-0.99518472667220, 0.09801714032956}, + {-0.97003125319454, 0.24298017990326}, + {-0.92387953251129, 0.38268343236509}, + {-0.85772861000027, 0.51410274419322}, + {-0.77301045336274, 0.63439328416365}, + {-0.67155895484702, 0.74095112535496}, + {-0.55557023301960, 0.83146961230255}, + {-0.42755509343028, 0.90398929312344}, + {-0.29028467725446, 0.95694033573221}, + {-0.14673047445536, 0.98917650996478}}; + +#define HSQRT2 0.707106781188; + +#define TRANSZERO(A0,A4,A8,A12) { \ + u_r = wTB[0].re; \ + v_i = u_r - wTB[k*2].re; \ + u_r += wTB[k*2].re; \ + u_i = wTB[0].im; \ + v_r = wTB[k*2].im - u_i; \ + u_i += wTB[k*2].im; \ + a_r = A0.re; \ + a_i = A0.im; \ + a1_r = a_r; \ + a1_r += u_r; \ + A0.re = a1_r; \ + a_r -= u_r; \ + A8.re = a_r; \ + a1_i = a_i; \ + a1_i += u_i; \ + A0.im = a1_i; \ + a_i -= u_i; \ + A8.im = a_i; \ + a1_r = A4.re; \ + a1_i = A4.im; \ + a_r = a1_r; \ + a_r -= v_r; \ + A4.re = a_r; \ + a1_r += v_r; \ + A12.re = a1_r; \ + a_i = a1_i; \ + a_i -= v_i; \ + A4.im = a_i; \ + a1_i += v_i; \ + A12.im = a1_i; \ + } + +#define TRANSHALF_16(A2,A6,A10,A14) {\ + u_r = wTB[2].re; \ + a_r = u_r; \ + u_i = wTB[2].im; \ + u_r += u_i; \ + u_i -= a_r; \ + a_r = wTB[6].re; \ + a1_r = a_r; \ + a_i = wTB[6].im; \ + a_r = a_i - a_r; \ + a_i += a1_r; \ + v_i = u_r - a_r; \ + u_r += a_r; \ + v_r = u_i + a_i; \ + u_i -= a_i; \ + v_i *= HSQRT2; \ + v_r *= HSQRT2; \ + u_r *= HSQRT2; \ + u_i *= HSQRT2; \ + a_r = A2.re; \ + a_i = A2.im; \ + a1_r = a_r; \ + a1_r += u_r; \ + A2.re = a1_r; \ + a_r -= u_r; \ + A10.re = a_r; \ + a1_i = a_i; \ + a1_i += u_i; \ + A2.im = a1_i; \ + a_i -= u_i; \ + A10.im = a_i; \ + a1_r = A6.re; \ + a1_i = A6.im; \ + a_r = a1_r; \ + a1_r += v_r; \ + A6.re = a1_r; \ + a_r -= v_r; \ + A14.re = a_r; \ + a_i = a1_i; \ + a1_i -= v_i; \ + A6.im = a1_i; \ + a_i += v_i; \ + A14.im = a_i; \ + } + +#define TRANS(A1,A5,A9,A13,WT,WB,D,D3) { \ + u_r = WT.re; \ + a_r = u_r; \ + a_r *= D.im; \ + u_r *= D.re; \ + a_i = WT.im; \ + a1_i = a_i; \ + a1_i *= D.re; \ + a_i *= D.im; \ + u_r -= a_i; \ + u_i = a_r; \ + u_i += a1_i; \ + a_r = WB.re; \ + a1_r = a_r; \ + a1_r *= D3.re; \ + a_r *= D3.im; \ + a_i = WB.im; \ + a1_i = a_i; \ + a_i *= D3.re; \ + a1_i *= D3.im; \ + a1_r -= a1_i; \ + a_r += a_i; \ + v_i = u_r - a1_r; \ + u_r += a1_r; \ + v_r = a_r - u_i; \ + u_i += a_r; \ + a_r = A1.re; \ + a_i = A1.im; \ + a1_r = a_r; \ + a1_r += u_r; \ + A1.re = a1_r; \ + a_r -= u_r; \ + A9.re = a_r; \ + a1_i = a_i; \ + a1_i += u_i; \ + A1.im = a1_i; \ + a_i -= u_i; \ + A9.im = a_i; \ + a1_r = A5.re; \ + a1_i = A5.im; \ + a_r = a1_r; \ + a1_r -= v_r; \ + A5.re = a1_r; \ + a_r += v_r; \ + A13.re = a_r; \ + a_i = a1_i; \ + a1_i -= v_i; \ + A5.im = a1_i; \ + a_i += v_i; \ + A13.im = a_i; \ + } + +#endif diff --git a/ac3dec/stats.c b/ac3dec/stats.c index cfeb453f..4bdad09a 100644 --- a/ac3dec/stats.c +++ b/ac3dec/stats.c @@ -28,16 +28,16 @@ #include "ac3_internal.h" -#include "decode.h" #include "stats.h" #include "debug.h" - +#if !defined (__GNUC__) || defined (DEBUG) static const char *service_ids[8] = { "CM","ME","VI","HI", "D", "C","E", "VO" }; +#endif struct mixlev_s { @@ -77,9 +77,10 @@ static const char *language[128] = "Bengali", "Belorussian", "Bambora", "Azerbijani", "Assamese", "Armenian", "Arabic", "Amharic" }; + void stats_print_banner(syncinfo_t *syncinfo,bsi_t *bsi) { - fprintf(stdout,"ac3dec-0.6.2-cvs (C) 2000 Aaron Holtzman (aholtzma@ess.engr.uvic.ca)\n"); + // fprintf(stdout,PACKAGE"-"VERSION" (C) 2000 Aaron Holtzman (aholtzma@ess.engr.uvic.ca)\n"); fprintf(stdout,"%d.%d Mode ",bsi->nfchans,bsi->lfeon); fprintf(stdout,"%2.1f KHz",syncinfo->sampling_rate * 1e-3); @@ -87,8 +88,7 @@ void stats_print_banner(syncinfo_t *syncinfo,bsi_t *bsi) if (bsi->langcode && (bsi->langcod < 128)) fprintf(stdout,"%s ", language[bsi->langcod]); - switch(bsi->bsmod) - { + switch(bsi->bsmod) { case 0: fprintf(stdout,"Complete Main Audio Service"); break; @@ -116,12 +116,12 @@ void stats_print_banner(syncinfo_t *syncinfo,bsi_t *bsi) fprintf(stdout,"\n"); } -void stats_print_syncinfo(syncinfo_t *syncinfo) + +void stats_print_syncinfo (syncinfo_t *syncinfo) { dprintf("(syncinfo) "); - switch (syncinfo->fscod) - { + switch (syncinfo->fscod) { case 2: dprintf("32 KHz "); break; @@ -140,9 +140,9 @@ void stats_print_syncinfo(syncinfo_t *syncinfo) syncinfo->frame_size); } + -void stats_print_bsi(bsi_t *bsi) -{ +void stats_print_bsi(bsi_t *bsi) { dprintf("(bsi) "); dprintf("%s",service_ids[bsi->bsmod]); dprintf(" %d.%d Mode ",bsi->nfchans,bsi->lfeon); @@ -154,11 +154,11 @@ void stats_print_bsi(bsi_t *bsi) } + char *exp_strat_tbl[4] = {"R ","D15 ","D25 ","D45 "}; -void stats_print_audblk(bsi_t *bsi,audblk_t *audblk) -{ - uint_32 i; +void stats_print_audblk(bsi_t *bsi,audblk_t *audblk) { + uint32_t i; dprintf("(audblk) "); dprintf("%s ",audblk->cplinu ? "cpl on " : "cpl off"); |