summaryrefslogtreecommitdiff
path: root/ac3dec
diff options
context:
space:
mode:
authorKlaus Schmidinger <kls (at) cadsoft (dot) de>2001-08-12 18:00:00 +0200
committerKlaus Schmidinger <kls (at) cadsoft (dot) de>2001-08-12 18:00:00 +0200
commitefea0f64d08052b0189d962101e1a3634d4adfc8 (patch)
treea3feff7ccf6874da549b25ea3b671d15a65d0e3d /ac3dec
parentf1d1c9849c8e27cccb46cf9c0d0ccb59da3f91f9 (diff)
downloadvdr-patch-lnbsharing-efea0f64d08052b0189d962101e1a3634d4adfc8.tar.gz
vdr-patch-lnbsharing-efea0f64d08052b0189d962101e1a3634d4adfc8.tar.bz2
Version 0.91vdr-0.91
- Fixed displaying colored button texts that are too long. - Suppressing replay progress display when replaying a DVD. - Updated channels.conf.cable (thanks to Uwe Scheffler). - Updated French OSD texts (thanks to Jean-Claude Repetto). - Improved AC3 decoding when replaying DVDs (thanks to Matjaz Thaler). - Fixed handling DVB card indexes when using only one card in a multi-card system. - Changed the 'Eject DVD' button text to a simple 'Eject' (the German text was too long...). - Made the font file generation more stable (thanks to Artur Skawina). - Changed the default value for the "DiSEqC" setup parameter to "off". - The new command line option '-E' can be used to define where the EPG data shall be written to. This is especially useful if VDR runs in a system that turns off the video disk when it is not used, and therefore needs to write the EPG file to a ramdisk (or turn off writing it alltogether). See 'vdr --help' for details. - Making sure the disk is up and running before starting recording (this is important for systems that turn off the video disk when it is not used). - Added the "Jump" function in replay mode (thanks to Stefan Huelswitt). See the description of the "Red" key in MANUAL under "Replay Control" for details. - Fixed displaying editing marks when toggling a mark in "pause" mode. - If there is no free DVB device to record, the log message will now be given only once. - Made I/O more robust by handling EINTR (thanks to Werner Fink).
Diffstat (limited to 'ac3dec')
-rw-r--r--ac3dec/Makefile14
-rw-r--r--ac3dec/ac3.h56
-rw-r--r--ac3dec/ac3_internal.h251
-rw-r--r--ac3dec/bit_allocate.c278
-rw-r--r--ac3dec/bit_allocate.h2
-rw-r--r--ac3dec/bitstream.c53
-rw-r--r--ac3dec/bitstream.h54
-rw-r--r--ac3dec/bswap.h80
-rw-r--r--ac3dec/cmplx.h29
-rw-r--r--ac3dec/coeff.c446
-rw-r--r--ac3dec/cpu_accel.c129
-rw-r--r--ac3dec/crc.c26
-rw-r--r--ac3dec/crc.h4
-rw-r--r--ac3dec/debug.c16
-rw-r--r--ac3dec/debug.h10
-rw-r--r--ac3dec/decode.c251
-rw-r--r--ac3dec/dither.c13
-rw-r--r--ac3dec/dither.h12
-rw-r--r--ac3dec/downmix.c441
-rw-r--r--ac3dec/downmix.h8
-rw-r--r--ac3dec/downmix_c.c161
-rw-r--r--ac3dec/downmix_c.h32
-rw-r--r--ac3dec/downmix_i386.S92
-rw-r--r--ac3dec/downmix_i386.h27
-rw-r--r--ac3dec/downmix_kni.S396
-rw-r--r--ac3dec/downmix_kni.h32
-rw-r--r--ac3dec/exponent.c61
-rw-r--r--ac3dec/imdct.c734
-rw-r--r--ac3dec/imdct.h2
-rw-r--r--ac3dec/imdct512_kni.S548
-rw-r--r--ac3dec/imdct_c.c218
-rw-r--r--ac3dec/imdct_c.h36
-rw-r--r--ac3dec/imdct_kni.c103
-rw-r--r--ac3dec/imdct_kni.h36
-rw-r--r--ac3dec/mm_accel.h36
-rw-r--r--ac3dec/parse.c252
-rw-r--r--ac3dec/parse.h4
-rw-r--r--ac3dec/rematrix.c54
-rw-r--r--ac3dec/sanity_check.c85
-rw-r--r--ac3dec/sanity_check.h4
-rw-r--r--ac3dec/srfft.c305
-rw-r--r--ac3dec/srfft.h39
-rw-r--r--ac3dec/srfft_kni.S289
-rw-r--r--ac3dec/srfft_kni.h30
-rw-r--r--ac3dec/srfft_kni_c.c93
-rw-r--r--ac3dec/srfftp.h305
-rw-r--r--ac3dec/stats.c26
47 files changed, 4450 insertions, 1723 deletions
diff --git a/ac3dec/Makefile b/ac3dec/Makefile
index a0547e9..7ab5ad0 100644
--- a/ac3dec/Makefile
+++ b/ac3dec/Makefile
@@ -1,10 +1,16 @@
#
# Makefile for 'ac3dec'
#
-# $Id: Makefile 1.1 2001/08/03 12:58:06 kls Exp $
+# $Id: Makefile 1.2 2001/08/10 12:44:07 kls Exp $
+
+#OBJS = coeff.o decode.o exponent.o rematrix.o bit_allocate.o crc.o dither.o\
+# imdct.o sanity_check.o bitstream.o debug.o downmix.o parse.o stats.o\
+# downmix_c.o imdct.o
+
+OBJS = bit_allocate.o bitstream.o coeff.o cpu_accel.o crc.o debug.o decode.o\
+ dither.o downmix.o downmix_c.o exponent.o imdct.o imdct_c.o imdct_kni.o\
+ parse.o rematrix.o sanity_check.o srfft.o srfft_kni_c.o stats.o
-OBJS = coeff.o decode.o exponent.o rematrix.o bit_allocate.o crc.o dither.o\
- imdct.o sanity_check.o bitstream.o debug.o downmix.o parse.o stats.o
DEFINES += -DDOLBY_SURROUND
@@ -16,7 +22,7 @@ libac3.a: $(OBJS)
# Implicit rules:
%.o: %.c
- gcc -g -O2 -Wall -m486 -c $(DEFINES) $<
+ gcc -g -O2 -Wall -m486 -I./ -c $(DEFINES) $<
clean:
rm -f *~ libac3.a $(OBJS)
diff --git a/ac3dec/ac3.h b/ac3dec/ac3.h
index 4919fc5..d325f3b 100644
--- a/ac3dec/ac3.h
+++ b/ac3dec/ac3.h
@@ -22,39 +22,37 @@
*
*/
-#ifndef AARONS_TYPES
-#define AARONS_TYPES
-typedef unsigned long long uint_64;
-typedef unsigned int uint_32;
-typedef unsigned short uint_16;
-typedef unsigned char uint_8;
-
-typedef signed long long sint_64;
-typedef signed int sint_32;
-typedef signed short sint_16;
-typedef signed char sint_8;
+#define AC3_BUFFER_SIZE (6*1024*16)
+
+#ifndef __AC3_H__
+#define __AC3_H__
+
+#ifdef __OMS__
+#include <oms/plugin/output_audio.h>
+#else
+//#include "audio_out.h"
#endif
-#define AC3_DOLBY_SURR_ENABLE 0x1
-#define AC3_3DNOW_ENABLE 0x2
-#define AC3_MMX_ENABLE 0x4
-#define AC3_ALTIVEC_ENABLE 0x8
-
-typedef struct ac3_config_s
-{
- //Bit flags that enable various things
- uint_32 flags;
- //Callback that points the decoder to new stream data
- void (*fill_buffer_callback)(uint_8 **, uint_8 **);
- //Number of discrete channels in final output (for downmixing)
- uint_16 num_output_ch;
- //Which channel of a dual mono stream to select
- uint_16 dual_mono_ch_sel;
-} ac3_config_t;
+#include <inttypes.h>
-void ac3_init(ac3_config_t *);
-uint_32 ac3_decode_data(uint_8 *data_start,uint_8 *data_end, int ac3reset, int *input_pointer, int *output_pointer, char *ac3_data);
+#define AC3_DOLBY_SURR_ENABLE (1<<0)
+#define AC3_ALTIVEC_ENABLE (1<<1)
+typedef struct ac3_config_s {
+ // Bit flags that enable various things
+ uint32_t flags;
+ // Number of discrete channels in final output (for downmixing)
+ uint16_t num_output_ch;
+ // Which channel of a dual mono stream to select
+ uint16_t dual_mono_ch_sel;
+} ac3_config_t;
+void ac3dec_init (void);
+#ifdef __OMS__
+size_t ac3dec_decode_data (plugin_output_audio_t *output, uint8_t *data_start, uint8_t *data_end);
+#else
+size_t ac3dec_decode_data (uint8_t *data_start ,uint8_t *data_end, int ac3reset, int *input_pointer, int *output_pointer, char *ac3_data);
+#endif
+#endif
diff --git a/ac3dec/ac3_internal.h b/ac3dec/ac3_internal.h
index 235189e..259396f 100644
--- a/ac3dec/ac3_internal.h
+++ b/ac3dec/ac3_internal.h
@@ -25,6 +25,8 @@
#define inline
#endif
+#include <setjmp.h>
+
/* Exponent strategy constants */
#define EXP_REUSE (0)
#define EXP_D15 (1)
@@ -43,7 +45,8 @@ typedef float stream_samples_t[6][256];
/* global config structure */
extern ac3_config_t ac3_config;
/* global error flag */
-extern uint_32 error_flag;
+extern jmp_buf error_jmp_mark;
+#define HANDLE_ERROR() longjmp (error_jmp_mark, -1)
/* Everything you wanted to know about band structure */
/*
@@ -72,273 +75,279 @@ extern uint_32 error_flag;
typedef struct syncinfo_s
{
- uint_32 magic;
+ uint32_t magic;
/* Sync word == 0x0B77 */
- uint_16 syncword;
+ uint16_t syncword;
/* crc for the first 5/8 of the sync block */
- /* uint_16 crc1; */
+ /* uint16_t crc1; */
/* Stream Sampling Rate (kHz) 0 = 48, 1 = 44.1, 2 = 32, 3 = reserved */
- uint_16 fscod;
+ uint16_t fscod;
/* Frame size code */
- uint_16 frmsizecod;
+ uint16_t frmsizecod;
/* Information not in the AC-3 bitstream, but derived */
/* Frame size in 16 bit words */
- uint_16 frame_size;
+ uint16_t frame_size;
/* Bit rate in kilobits */
- uint_16 bit_rate;
+ uint16_t bit_rate;
/* sampling rate in hertz */
- uint_32 sampling_rate;
+ uint32_t sampling_rate;
} syncinfo_t;
typedef struct bsi_s
{
- uint_32 magic;
+ uint32_t magic;
/* Bit stream identification == 0x8 */
- uint_16 bsid;
+ uint16_t bsid;
/* Bit stream mode */
- uint_16 bsmod;
+ uint16_t bsmod;
/* Audio coding mode */
- uint_16 acmod;
+ uint16_t acmod;
/* If we're using the centre channel then */
/* centre mix level */
- uint_16 cmixlev;
+ uint16_t cmixlev;
/* If we're using the surround channel then */
/* surround mix level */
- uint_16 surmixlev;
+ uint16_t surmixlev;
/* If we're in 2/0 mode then */
/* Dolby surround mix level - NOT USED - */
- uint_16 dsurmod;
+ uint16_t dsurmod;
/* Low frequency effects on */
- uint_16 lfeon;
+ uint16_t lfeon;
/* Dialogue Normalization level */
- uint_16 dialnorm;
+ uint16_t dialnorm;
/* Compression exists */
- uint_16 compre;
+ uint16_t compre;
/* Compression level */
- uint_16 compr;
+ uint16_t compr;
/* Language code exists */
- uint_16 langcode;
+ uint16_t langcode;
/* Language code */
- uint_16 langcod;
+ uint16_t langcod;
/* Audio production info exists*/
- uint_16 audprodie;
- uint_16 mixlevel;
- uint_16 roomtyp;
+ uint16_t audprodie;
+ uint16_t mixlevel;
+ uint16_t roomtyp;
/* If we're in dual mono mode (acmod == 0) then extra stuff */
- uint_16 dialnorm2;
- uint_16 compr2e;
- uint_16 compr2;
- uint_16 langcod2e;
- uint_16 langcod2;
- uint_16 audprodi2e;
- uint_16 mixlevel2;
- uint_16 roomtyp2;
+ uint16_t dialnorm2;
+ uint16_t compr2e;
+ uint16_t compr2;
+ uint16_t langcod2e;
+ uint16_t langcod2;
+ uint16_t audprodi2e;
+ uint16_t mixlevel2;
+ uint16_t roomtyp2;
/* Copyright bit */
- uint_16 copyrightb;
+ uint16_t copyrightb;
/* Original bit */
- uint_16 origbs;
+ uint16_t origbs;
/* Timecode 1 exists */
- uint_16 timecod1e;
+ uint16_t timecod1e;
/* Timecode 1 */
- uint_16 timecod1;
+ uint16_t timecod1;
/* Timecode 2 exists */
- uint_16 timecod2e;
+ uint16_t timecod2e;
/* Timecode 2 */
- uint_16 timecod2;
+ uint16_t timecod2;
/* Additional bit stream info exists */
- uint_16 addbsie;
+ uint16_t addbsie;
/* Additional bit stream length - 1 (in bytes) */
- uint_16 addbsil;
+ uint16_t addbsil;
/* Additional bit stream information (max 64 bytes) */
- uint_8 addbsi[64];
+ uint8_t addbsi[64];
/* Information not in the AC-3 bitstream, but derived */
/* Number of channels (excluding LFE)
* Derived from acmod */
- uint_16 nfchans;
+ uint16_t nfchans;
} bsi_t;
/* more pain */
typedef struct audblk_s
{
- uint_32 magic1;
+ uint32_t magic1;
/* block switch bit indexed by channel num */
- uint_16 blksw[5];
+ uint16_t blksw[5];
/* dither enable bit indexed by channel num */
- uint_16 dithflag[5];
+ uint16_t dithflag[5];
/* dynamic range gain exists */
- uint_16 dynrnge;
+ uint16_t dynrnge;
/* dynamic range gain */
- uint_16 dynrng;
+ uint16_t dynrng;
/* if acmod==0 then */
/* dynamic range 2 gain exists */
- uint_16 dynrng2e;
+ uint16_t dynrng2e;
/* dynamic range 2 gain */
- uint_16 dynrng2;
+ uint16_t dynrng2;
/* coupling strategy exists */
- uint_16 cplstre;
+ uint16_t cplstre;
/* coupling in use */
- uint_16 cplinu;
+ uint16_t cplinu;
/* channel coupled */
- uint_16 chincpl[5];
+ uint16_t chincpl[5];
/* if acmod==2 then */
/* Phase flags in use */
- uint_16 phsflginu;
+ uint16_t phsflginu;
/* coupling begin frequency code */
- uint_16 cplbegf;
+ uint16_t cplbegf;
/* coupling end frequency code */
- uint_16 cplendf;
+ uint16_t cplendf;
/* coupling band structure bits */
- uint_16 cplbndstrc[18];
+ uint16_t cplbndstrc[18];
/* Do coupling co-ords exist for this channel? */
- uint_16 cplcoe[5];
+ uint16_t cplcoe[5];
/* Master coupling co-ordinate */
- uint_16 mstrcplco[5];
+ uint16_t mstrcplco[5];
/* Per coupling band coupling co-ordinates */
- uint_16 cplcoexp[5][18];
- uint_16 cplcomant[5][18];
+ uint16_t cplcoexp[5][18];
+ uint16_t cplcomant[5][18];
/* Phase flags for dual mono */
- uint_16 phsflg[18];
+ uint16_t phsflg[18];
/* Is there a rematrixing strategy */
- uint_16 rematstr;
+ uint16_t rematstr;
/* Rematrixing bits */
- uint_16 rematflg[4];
+ uint16_t rematflg[4];
/* Coupling exponent strategy */
- uint_16 cplexpstr;
+ uint16_t cplexpstr;
/* Exponent strategy for full bandwidth channels */
- uint_16 chexpstr[5];
+ uint16_t chexpstr[5];
/* Exponent strategy for lfe channel */
- uint_16 lfeexpstr;
+ uint16_t lfeexpstr;
/* Channel bandwidth for independent channels */
- uint_16 chbwcod[5];
+ uint16_t chbwcod[5];
/* The absolute coupling exponent */
- uint_16 cplabsexp;
+ uint16_t cplabsexp;
/* Coupling channel exponents (D15 mode gives 18 * 12 /3 encoded exponents */
- uint_16 cplexps[18 * 12 / 3];
+ uint16_t cplexps[18 * 12 / 3];
/* Sanity checking constant */
- uint_32 magic2;
+ uint32_t magic2;
/* fbw channel exponents */
- uint_16 exps[5][252 / 3];
+ uint16_t exps[5][252 / 3];
/* channel gain range */
- uint_16 gainrng[5];
+ uint16_t gainrng[5];
/* low frequency exponents */
- uint_16 lfeexps[3];
+ uint16_t lfeexps[3];
/* Bit allocation info */
- uint_16 baie;
+ uint16_t baie;
/* Slow decay code */
- uint_16 sdcycod;
+ uint16_t sdcycod;
/* Fast decay code */
- uint_16 fdcycod;
+ uint16_t fdcycod;
/* Slow gain code */
- uint_16 sgaincod;
+ uint16_t sgaincod;
/* dB per bit code */
- uint_16 dbpbcod;
+ uint16_t dbpbcod;
/* masking floor code */
- uint_16 floorcod;
+ uint16_t floorcod;
/* SNR offset info */
- uint_16 snroffste;
+ uint16_t snroffste;
/* coarse SNR offset */
- uint_16 csnroffst;
+ uint16_t csnroffst;
/* coupling fine SNR offset */
- uint_16 cplfsnroffst;
+ uint16_t cplfsnroffst;
/* coupling fast gain code */
- uint_16 cplfgaincod;
+ uint16_t cplfgaincod;
/* fbw fine SNR offset */
- uint_16 fsnroffst[5];
+ uint16_t fsnroffst[5];
/* fbw fast gain code */
- uint_16 fgaincod[5];
+ uint16_t fgaincod[5];
/* lfe fine SNR offset */
- uint_16 lfefsnroffst;
+ uint16_t lfefsnroffst;
/* lfe fast gain code */
- uint_16 lfefgaincod;
+ uint16_t lfefgaincod;
/* Coupling leak info */
- uint_16 cplleake;
+ uint16_t cplleake;
/* coupling fast leak initialization */
- uint_16 cplfleak;
+ uint16_t cplfleak;
/* coupling slow leak initialization */
- uint_16 cplsleak;
+ uint16_t cplsleak;
/* delta bit allocation info */
- uint_16 deltbaie;
+ uint16_t deltbaie;
/* coupling delta bit allocation exists */
- uint_16 cpldeltbae;
+ uint16_t cpldeltbae;
/* fbw delta bit allocation exists */
- uint_16 deltbae[5];
+ uint16_t deltbae[5];
/* number of cpl delta bit segments */
- uint_16 cpldeltnseg;
+ uint16_t cpldeltnseg;
/* coupling delta bit allocation offset */
- uint_16 cpldeltoffst[8];
+ uint16_t cpldeltoffst[8];
/* coupling delta bit allocation length */
- uint_16 cpldeltlen[8];
+ uint16_t cpldeltlen[8];
/* coupling delta bit allocation length */
- uint_16 cpldeltba[8];
+ uint16_t cpldeltba[8];
/* number of delta bit segments */
- uint_16 deltnseg[5];
+ uint16_t deltnseg[5];
/* fbw delta bit allocation offset */
- uint_16 deltoffst[5][8];
+ uint16_t deltoffst[5][8];
/* fbw delta bit allocation length */
- uint_16 deltlen[5][8];
+ uint16_t deltlen[5][8];
/* fbw delta bit allocation length */
- uint_16 deltba[5][8];
+ uint16_t deltba[5][8];
/* skip length exists */
- uint_16 skiple;
+ uint16_t skiple;
/* skip length */
- uint_16 skipl;
+ uint16_t skipl;
//Removed Feb 2000 -ah
+ //added Jul 2000 ++dent
/* channel mantissas */
- //uint_16 chmant[5][256];
+ uint16_t chmant[5][256];
/* coupling mantissas */
- uint_16 cplmant[256];
+// uint16_t cplmant[256];
+
+ //Added Jun 2000 -MaXX
+ /* coupling floats */
+ float cpl_flt[ 256 ];
//Removed Feb 2000 -ah
+ //added Jul 2000 ++dent
/* coupling mantissas */
- //uint_16 lfemant[7];
+ uint16_t lfemant[7];
/* -- Information not in the bitstream, but derived thereof -- */
/* Number of coupling sub-bands */
- uint_16 ncplsubnd;
+ uint16_t ncplsubnd;
/* Number of combined coupling sub-bands
* Derived from ncplsubnd and cplbndstrc */
- uint_16 ncplbnd;
+ uint16_t ncplbnd;
/* Number of exponent groups by channel
* Derived from strmant, endmant */
- uint_16 nchgrps[5];
+ uint16_t nchgrps[5];
/* Number of coupling exponent groups
* Derived from cplbegf, cplendf, cplexpstr */
- uint_16 ncplgrps;
+ uint16_t ncplgrps;
/* End mantissa numbers of fbw channels */
- uint_16 endmant[5];
+ uint16_t endmant[5];
/* Start and end mantissa numbers for the coupling channel */
- uint_16 cplstrtmant;
- uint_16 cplendmant;
+ uint16_t cplstrtmant;
+ uint16_t cplendmant;
/* Decoded exponent info */
- uint_16 fbw_exp[5][256];
- uint_16 cpl_exp[256];
- uint_16 lfe_exp[7];
+ uint16_t fbw_exp[5][256];
+ uint16_t cpl_exp[256];
+ uint16_t lfe_exp[7];
/* Bit allocation pointer results */
- uint_16 fbw_bap[5][256];
- uint_16 cpl_bap[256];
- uint_16 lfe_bap[7];
+ uint16_t fbw_bap[5][256];
+ uint16_t cpl_bap[256];
+ uint16_t lfe_bap[7];
- uint_32 magic3;
+ uint32_t magic3;
} audblk_t;
diff --git a/ac3dec/bit_allocate.c b/ac3dec/bit_allocate.c
index 053e09c..0087434 100644
--- a/ac3dec/bit_allocate.c
+++ b/ac3dec/bit_allocate.c
@@ -28,46 +28,46 @@
-static inline sint_16 logadd(sint_16 a,sint_16 b);
-static sint_16 calc_lowcomp(sint_16 a,sint_16 b0,sint_16 b1,sint_16 bin);
-static inline uint_16 min(sint_16 a,sint_16 b);
-static inline uint_16 max(sint_16 a,sint_16 b);
-static void ba_compute_psd(sint_16 start, sint_16 end, sint_16 exps[],
- sint_16 psd[], sint_16 bndpsd[]);
-
-static void ba_compute_excitation(sint_16 start, sint_16 end,sint_16 fgain,
- sint_16 fastleak, sint_16 slowleak, sint_16 is_lfe, sint_16 bndpsd[],
- sint_16 excite[]);
-static void ba_compute_mask(sint_16 start, sint_16 end, uint_16 fscod,
- uint_16 deltbae, uint_16 deltnseg, uint_16 deltoffst[], uint_16 deltba[],
- uint_16 deltlen[], sint_16 excite[], sint_16 mask[]);
-static void ba_compute_bap(sint_16 start, sint_16 end, sint_16 snroffset,
- sint_16 psd[], sint_16 mask[], sint_16 bap[]);
+static inline int16_t logadd(int16_t a,int16_t b);
+static int16_t calc_lowcomp(int16_t a,int16_t b0,int16_t b1,int16_t bin);
+static inline uint16_t min(int16_t a,int16_t b);
+static inline uint16_t max(int16_t a,int16_t b);
+static void ba_compute_psd(int16_t start, int16_t end, int16_t exps[],
+ int16_t psd[], int16_t bndpsd[]);
+
+static void ba_compute_excitation(int16_t start, int16_t end,int16_t fgain,
+ int16_t fastleak, int16_t slowleak, int16_t is_lfe, int16_t bndpsd[],
+ int16_t excite[]);
+static void ba_compute_mask(int16_t start, int16_t end, uint16_t fscod,
+ uint16_t deltbae, uint16_t deltnseg, uint16_t deltoffst[], uint16_t deltba[],
+ uint16_t deltlen[], int16_t excite[], int16_t mask[]);
+static void ba_compute_bap(int16_t start, int16_t end, int16_t snroffset,
+ int16_t psd[], int16_t mask[], int16_t bap[]);
/* Misc LUTs for bit allocation process */
-static sint_16 slowdec[] = { 0x0f, 0x11, 0x13, 0x15 };
-static sint_16 fastdec[] = { 0x3f, 0x53, 0x67, 0x7b };
-static sint_16 slowgain[] = { 0x540, 0x4d8, 0x478, 0x410 };
-static sint_16 dbpbtab[] = { 0x000, 0x700, 0x900, 0xb00 };
+static int16_t slowdec[] = { 0x0f, 0x11, 0x13, 0x15 };
+static int16_t fastdec[] = { 0x3f, 0x53, 0x67, 0x7b };
+static int16_t slowgain[] = { 0x540, 0x4d8, 0x478, 0x410 };
+static int16_t dbpbtab[] = { 0x000, 0x700, 0x900, 0xb00 };
-static uint_16 floortab[] = { 0x2f0, 0x2b0, 0x270, 0x230, 0x1f0, 0x170, 0x0f0, 0xf800 };
-static sint_16 fastgain[] = { 0x080, 0x100, 0x180, 0x200, 0x280, 0x300, 0x380, 0x400 };
+static uint16_t floortab[] = { 0x2f0, 0x2b0, 0x270, 0x230, 0x1f0, 0x170, 0x0f0, 0xf800 };
+static int16_t fastgain[] = { 0x080, 0x100, 0x180, 0x200, 0x280, 0x300, 0x380, 0x400 };
-static sint_16 bndtab[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
+static int16_t bndtab[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
20, 21, 22, 23, 24, 25, 26, 27, 28, 31,
34, 37, 40, 43, 46, 49, 55, 61, 67, 73,
79, 85, 97, 109, 121, 133, 157, 181, 205, 229 };
-static sint_16 bndsz[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+static int16_t bndsz[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 3, 3,
3, 3, 3, 3, 3, 6, 6, 6, 6, 6,
6, 12, 12, 12, 12, 24, 24, 24, 24, 24 };
-static sint_16 masktab[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+static int16_t masktab[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 28, 28, 29,
29, 29, 30, 30, 30, 31, 31, 31, 32, 32, 32, 33, 33, 33, 34, 34,
34, 35, 35, 35, 35, 35, 35, 36, 36, 36, 36, 36, 36, 37, 37, 37,
@@ -85,7 +85,7 @@ static sint_16 masktab[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 0, 0, 0 };
-static sint_16 latab[] = { 0x0040, 0x003f, 0x003e, 0x003d, 0x003c, 0x003b, 0x003a, 0x0039,
+static int16_t latab[] = { 0x0040, 0x003f, 0x003e, 0x003d, 0x003c, 0x003b, 0x003a, 0x0039,
0x0038, 0x0037, 0x0036, 0x0035, 0x0034, 0x0034, 0x0033, 0x0032,
0x0031, 0x0030, 0x002f, 0x002f, 0x002e, 0x002d, 0x002c, 0x002c,
0x002b, 0x002a, 0x0029, 0x0029, 0x0028, 0x0027, 0x0026, 0x0026,
@@ -119,7 +119,7 @@ static sint_16 latab[] = { 0x0040, 0x003f, 0x003e, 0x003d, 0x003c, 0x003b, 0x003
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000};
-static sint_16 hth[][50] = {{ 0x04d0, 0x04d0, 0x0440, 0x0400, 0x03e0, 0x03c0, 0x03b0, 0x03b0,
+static int16_t hth[][50] = {{ 0x04d0, 0x04d0, 0x0440, 0x0400, 0x03e0, 0x03c0, 0x03b0, 0x03b0,
0x03a0, 0x03a0, 0x03a0, 0x03a0, 0x03a0, 0x0390, 0x0390, 0x0390,
0x0380, 0x0380, 0x0370, 0x0370, 0x0360, 0x0360, 0x0350, 0x0350,
0x0340, 0x0340, 0x0330, 0x0320, 0x0310, 0x0300, 0x02f0, 0x02f0,
@@ -144,38 +144,50 @@ static sint_16 hth[][50] = {{ 0x04d0, 0x04d0, 0x0440, 0x0400, 0x03e0, 0x03c0, 0x
0x0450, 0x04e0 }};
-static sint_16 baptab[] = { 0, 1, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 5, 5, 6,
+static int16_t baptab[] = { 0, 1, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 5, 5, 6,
6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, 9, 9, 9, 9, 10,
10, 10, 10, 11, 11, 11, 11, 12, 12, 12, 12, 13, 13, 13, 13, 14,
14, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 15, 15, 15, 15 };
-static sint_16 sdecay;
-static sint_16 fdecay;
-static sint_16 sgain;
-static sint_16 dbknee;
-static sint_16 floor;
-static sint_16 psd[256];
-static sint_16 bndpsd[256];
-static sint_16 excite[256];
-static sint_16 mask[256];
-
-static inline uint_16
-max(sint_16 a,sint_16 b)
+static int16_t sdecay;
+static int16_t fdecay;
+static int16_t sgain;
+static int16_t dbknee;
+static int16_t floor;
+static int16_t psd[256];
+static int16_t bndpsd[256];
+static int16_t excite[256];
+static int16_t mask[256];
+
+
+/**
+ *
+ **/
+
+static inline uint16_t max(int16_t a,int16_t b)
{
return (a > b ? a : b);
}
+
+
+/**
+ *
+ **/
-static inline uint_16
-min(sint_16 a,sint_16 b)
+static inline uint16_t min(int16_t a,int16_t b)
{
return (a < b ? a : b);
}
-static inline sint_16
-logadd(sint_16 a,sint_16 b)
+
+/**
+ *
+ **/
+
+static inline int16_t logadd(int16_t a,int16_t b)
{
- sint_16 c;
- sint_16 address;
+ int16_t c;
+ int16_t address;
c = a - b;
address = min((abs(c) >> 1), 255);
@@ -187,15 +199,19 @@ logadd(sint_16 a,sint_16 b)
}
-void bit_allocate(uint_16 fscod, bsi_t *bsi, audblk_t *audblk)
+/**
+ *
+ **/
+
+void bit_allocate(uint16_t fscod, bsi_t *bsi, audblk_t *audblk)
{
- uint_16 i;
- sint_16 fgain;
- sint_16 snroffset;
- sint_16 start;
- sint_16 end;
- sint_16 fastleak;
- sint_16 slowleak;
+ uint16_t i;
+ int16_t fgain;
+ int16_t snroffset;
+ int16_t start;
+ int16_t end;
+ int16_t fastleak;
+ int16_t slowleak;
/* Only perform bit_allocation if the exponents have changed or we
* have new sideband information */
@@ -217,11 +233,10 @@ void bit_allocate(uint_16 fscod, bsi_t *bsi, audblk_t *audblk)
if(!audblk->csnroffst && !audblk->fsnroffst[0] &&
!audblk->fsnroffst[1] && !audblk->fsnroffst[2] &&
!audblk->fsnroffst[3] && !audblk->fsnroffst[4] &&
- !audblk->cplfsnroffst && !audblk->lfefsnroffst)
- {
- memset(audblk->fbw_bap,0,sizeof(uint_16) * 256 * 5);
- memset(audblk->cpl_bap,0,sizeof(uint_16) * 256);
- memset(audblk->lfe_bap,0,sizeof(uint_16) * 7);
+ !audblk->cplfsnroffst && !audblk->lfefsnroffst) {
+ memset(audblk->fbw_bap,0,sizeof(uint16_t) * 256 * 5);
+ memset(audblk->cpl_bap,0,sizeof(uint16_t) * 256);
+ memset(audblk->lfe_bap,0,sizeof(uint16_t) * 7);
return;
}
@@ -245,8 +260,7 @@ void bit_allocate(uint_16 fscod, bsi_t *bsi, audblk_t *audblk)
ba_compute_bap(start, end, snroffset, psd, mask, audblk->fbw_bap[i]);
}
- if(audblk->cplinu)
- {
+ if(audblk->cplinu) {
start = audblk->cplstrtmant;
end = audblk->cplendmant;
fgain = fastgain[audblk->cplfgaincod];
@@ -264,8 +278,7 @@ void bit_allocate(uint_16 fscod, bsi_t *bsi, audblk_t *audblk)
ba_compute_bap(start, end, snroffset, psd, mask, audblk->cpl_bap);
}
- if(bsi->lfeon)
- {
+ if(bsi->lfeon) {
start = 0;
end = 7;
fgain = fastgain[audblk->lfefgaincod];
@@ -285,15 +298,18 @@ void bit_allocate(uint_16 fscod, bsi_t *bsi, audblk_t *audblk)
}
-static void ba_compute_psd(sint_16 start, sint_16 end, sint_16 exps[],
- sint_16 psd[], sint_16 bndpsd[])
+/**
+ *
+ **/
+
+static void ba_compute_psd(int16_t start, int16_t end, int16_t exps[],
+ int16_t psd[], int16_t bndpsd[])
{
int bin,i,j,k;
- sint_16 lastbin = 0;
+ int16_t lastbin = 0;
/* Map the exponents into dBs */
- for (bin=start; bin<end; bin++)
- {
+ for (bin=start; bin<end; bin++) {
psd[bin] = (3072 - (exps[bin] << 7));
}
@@ -301,14 +317,12 @@ static void ba_compute_psd(sint_16 start, sint_16 end, sint_16 exps[],
j = start;
k = masktab[start];
- do
- {
+ do {
lastbin = min(bndtab[k] + bndsz[k], end);
bndpsd[k] = psd[j];
j++;
- for (i = j; i < lastbin; i++)
- {
+ for (i = j; i < lastbin; i++) {
bndpsd[k] = logadd(bndpsd[k], psd[j]);
j++;
}
@@ -317,49 +331,49 @@ static void ba_compute_psd(sint_16 start, sint_16 end, sint_16 exps[],
} while (end > lastbin);
}
-static void ba_compute_excitation(sint_16 start, sint_16 end,sint_16 fgain,
- sint_16 fastleak, sint_16 slowleak, sint_16 is_lfe, sint_16 bndpsd[],
- sint_16 excite[])
+
+/**
+ *
+ **/
+
+static void ba_compute_excitation(int16_t start, int16_t end,int16_t fgain,
+ int16_t fastleak, int16_t slowleak, int16_t is_lfe, int16_t bndpsd[],
+ int16_t excite[])
{
int bin;
- sint_16 bndstrt;
- sint_16 bndend;
- sint_16 lowcomp = 0;
- sint_16 begin = 0;
+ int16_t bndstrt;
+ int16_t bndend;
+ int16_t lowcomp = 0;
+ int16_t begin = 0;
/* Compute excitation function */
bndstrt = masktab[start];
bndend = masktab[end - 1] + 1;
- if (bndstrt == 0) /* For fbw and lfe channels */
- {
+ if (bndstrt == 0) { /* For fbw and lfe channels */
lowcomp = calc_lowcomp(lowcomp, bndpsd[0], bndpsd[1], 0);
excite[0] = bndpsd[0] - fgain - lowcomp;
lowcomp = calc_lowcomp(lowcomp, bndpsd[1], bndpsd[2], 1);
excite[1] = bndpsd[1] - fgain - lowcomp;
begin = 7 ;
- /* Note: Do not call calc_lowcomp() for the last band of the lfe channel, (bin = 6) */
- for (bin = 2; bin < 7; bin++)
- {
+// Note: Do not call calc_lowcomp() for the last band of the lfe channel,(bin=6)
+ for (bin = 2; bin < 7; bin++) {
if (!(is_lfe && (bin == 6)))
lowcomp = calc_lowcomp(lowcomp, bndpsd[bin], bndpsd[bin+1], bin);
fastleak = bndpsd[bin] - fgain;
slowleak = bndpsd[bin] - sgain;
excite[bin] = fastleak - lowcomp;
- if (!(is_lfe && (bin == 6)))
- {
- if (bndpsd[bin] <= bndpsd[bin+1])
- {
+ if (!(is_lfe && (bin == 6))) {
+ if (bndpsd[bin] <= bndpsd[bin+1]) {
begin = bin + 1 ;
break;
}
}
}
- for (bin = begin; bin < min(bndend, 22); bin++)
- {
+ for (bin = begin; bin < min(bndend, 22); bin++) {
if (!(is_lfe && (bin == 6)))
lowcomp = calc_lowcomp(lowcomp, bndpsd[bin], bndpsd[bin+1], bin);
fastleak -= fdecay ;
@@ -371,12 +385,9 @@ static void ba_compute_excitation(sint_16 start, sint_16 end,sint_16 fgain,
begin = 22;
}
else /* For coupling channel */
- {
begin = bndstrt;
- }
- for (bin = begin; bin < bndend; bin++)
- {
+ for (bin = begin; bin < bndend; bin++) {
fastleak -= fdecay;
fastleak = max(fastleak, bndpsd[bin] - fgain);
slowleak -= sdecay;
@@ -385,49 +396,47 @@ static void ba_compute_excitation(sint_16 start, sint_16 end,sint_16 fgain,
}
}
-static void ba_compute_mask(sint_16 start, sint_16 end, uint_16 fscod,
- uint_16 deltbae, uint_16 deltnseg, uint_16 deltoffst[], uint_16 deltba[],
- uint_16 deltlen[], sint_16 excite[], sint_16 mask[])
+
+/**
+ *
+ **/
+
+static void ba_compute_mask(int16_t start, int16_t end, uint16_t fscod,
+ uint16_t deltbae, uint16_t deltnseg, uint16_t deltoffst[], uint16_t deltba[],
+ uint16_t deltlen[], int16_t excite[], int16_t mask[])
{
int bin,k;
- sint_16 bndstrt;
- sint_16 bndend;
- sint_16 delta;
+ int16_t bndstrt;
+ int16_t bndend;
+ int16_t delta;
bndstrt = masktab[start];
bndend = masktab[end - 1] + 1;
/* Compute the masking curve */
- for (bin = bndstrt; bin < bndend; bin++)
- {
- if (bndpsd[bin] < dbknee)
- {
+ for (bin = bndstrt; bin < bndend; bin++) {
+ if (bndpsd[bin] < dbknee) {
excite[bin] += ((dbknee - bndpsd[bin]) >> 2);
}
mask[bin] = max(excite[bin], hth[fscod][bin]);
}
/* Perform delta bit modulation if necessary */
- if ((deltbae == DELTA_BIT_REUSE) || (deltbae == DELTA_BIT_NEW))
- {
- sint_16 band = 0;
- sint_16 seg = 0;
+ if ((deltbae == DELTA_BIT_REUSE) || (deltbae == DELTA_BIT_NEW)) {
+ int16_t band = 0;
+ int16_t seg = 0;
- for (seg = 0; seg < deltnseg+1; seg++)
- {
+ for (seg = 0; seg < deltnseg+1; seg++) {
band += deltoffst[seg];
- if (deltba[seg] >= 4)
- {
+
+ if (deltba[seg] >= 4) {
delta = (deltba[seg] - 3) << 7;
- }
- else
- {
+ } else {
delta = (deltba[seg] - 4) << 7;
}
- for (k = 0; k < deltlen[seg]; k++)
- {
+ for (k = 0; k < deltlen[seg]; k++) {
mask[band] += delta;
band++;
}
@@ -435,19 +444,23 @@ static void ba_compute_mask(sint_16 start, sint_16 end, uint_16 fscod,
}
}
-static void ba_compute_bap(sint_16 start, sint_16 end, sint_16 snroffset,
- sint_16 psd[], sint_16 mask[], sint_16 bap[])
+
+/**
+ *
+ **/
+
+static void ba_compute_bap(int16_t start, int16_t end, int16_t snroffset,
+ int16_t psd[], int16_t mask[], int16_t bap[])
{
int i,j,k;
- sint_16 lastbin = 0;
- sint_16 address = 0;
+ int16_t lastbin = 0;
+ int16_t address = 0;
/* Compute the bit allocation pointer for each bin */
i = start;
j = masktab[start];
- do
- {
+ do {
lastbin = min(bndtab[j] + bndsz[j], end);
mask[j] -= snroffset;
mask[j] -= floor;
@@ -457,8 +470,7 @@ static void ba_compute_bap(sint_16 start, sint_16 end, sint_16 snroffset,
mask[j] &= 0x1fe0;
mask[j] += floor;
- for (k = i; k < lastbin; k++)
- {
+ for (k = i; k < lastbin; k++) {
address = (psd[i] - mask[j]) >> 5;
address = min(63, max(0, address));
bap[i] = baptab[address];
@@ -468,25 +480,25 @@ static void ba_compute_bap(sint_16 start, sint_16 end, sint_16 snroffset,
} while (end > lastbin);
}
-static sint_16
-calc_lowcomp(sint_16 a,sint_16 b0,sint_16 b1,sint_16 bin)
+
+/**
+ *
+ **/
+
+static int16_t calc_lowcomp (int16_t a, int16_t b0, int16_t b1, int16_t bin)
{
- if (bin < 7)
- {
+ if (bin < 7) {
if ((b0 + 256) == b1)
a = 384;
else if (b0 > b1)
a = max(0, a - 64);
- }
- else if (bin < 20)
- {
+ } else if (bin < 20) {
if ((b0 + 256) == b1)
a = 320;
else if (b0 > b1)
a = max(0, a - 64) ;
- }
- else
+ } else
a = max(0, a - 128);
return(a);
diff --git a/ac3dec/bit_allocate.h b/ac3dec/bit_allocate.h
index e48b0b2..a6a3c77 100644
--- a/ac3dec/bit_allocate.h
+++ b/ac3dec/bit_allocate.h
@@ -21,4 +21,4 @@
*
*/
-void bit_allocate(uint_16 fscod, bsi_t *bsi, audblk_t *audblk);
+void bit_allocate(uint16_t fscod, bsi_t *bsi, audblk_t *audblk);
diff --git a/ac3dec/bitstream.c b/ac3dec/bitstream.c
index 296d5ee..f48d240 100644
--- a/ac3dec/bitstream.c
+++ b/ac3dec/bitstream.c
@@ -23,54 +23,57 @@
#include <stdlib.h>
#include <stdio.h>
+#include <inttypes.h>
+
+#include <bswap.h>
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
#include "ac3.h"
#include "ac3_internal.h"
#include "bitstream.h"
-uint_8 *buffer_start = 0;
-uint_32 bits_left = 0;
-uint_32 current_word;
+uint32_t bits_left = 0;
+uint64_t current_word;
+uint64_t *buffer_start = 0;
+
+
+static inline uint64_t getdword (void)
+{
+ return be2me_64 (*buffer_start++);
+}
+
-static inline void
-bitstream_fill_current()
+static inline void bitstream_fill_current (void)
{
- current_word = *((uint_32*)buffer_start)++;
- current_word = swab32(current_word);
+ //current_word = bswap_64 (*buffer_start++);
+ current_word = getdword ();
}
-//
-// The fast paths for _get is in the
-// bitstream.h header file so it can be inlined.
-//
-// The "bottom half" of this routine is suffixed _bh
-//
-// -ah
-//
-
-uint_32
-bitstream_get_bh(uint_32 num_bits)
+
+uint32_t bitstream_get_bh (uint32_t num_bits)
{
- uint_32 result;
+ uint32_t result;
num_bits -= bits_left;
- result = (current_word << (32 - bits_left)) >> (32 - bits_left);
+ result = (current_word << (64 - bits_left)) >> (64 - bits_left);
bitstream_fill_current();
if(num_bits != 0)
- result = (result << num_bits) | (current_word >> (32 - num_bits));
+ result = (result << num_bits) | (current_word >> (64 - num_bits));
- bits_left = 32 - num_bits;
+ bits_left = 64 - num_bits;
return result;
}
-void
-bitstream_init(uint_8 *start)
+
+void bitstream_init (uint8_t *start)
{
//initialize the start of the buffer
- buffer_start = start;
+ buffer_start = (uint64_t *) start;
bits_left = 0;
}
diff --git a/ac3dec/bitstream.h b/ac3dec/bitstream.h
index 3351930..f34726a 100644
--- a/ac3dec/bitstream.h
+++ b/ac3dec/bitstream.h
@@ -21,56 +21,24 @@
*
*/
-//My new and improved vego-matic endian swapping routine
-//(stolen from the kernel)
-#ifdef WORDS_BIGENDIAN
-# define swab32(x) (x)
+#include <inttypes.h>
-#else
+extern uint32_t bits_left;
+extern uint64_t current_word;
-# if defined (__i386__)
+void bitstream_init(uint8_t *start);
+inline uint32_t bitstream_get_bh(uint32_t num_bits);
-# define swab32(x) __i386_swab32(x)
- static inline const uint_32 __i386_swab32(uint_32 x)
- {
- __asm__("bswap %0" : "=r" (x) : "0" (x));
- return x;
- }
-
-# else
-
-# define swab32(x)\
-((((uint_8*)&x)[0] << 24) | (((uint_8*)&x)[1] << 16) | \
- (((uint_8*)&x)[2] << 8) | (((uint_8*)&x)[3]))
-
-# endif
-#endif
-
-extern uint_32 bits_left;
-extern uint_32 current_word;
-
-void bitstream_init(uint_8 *start);
-
-uint_8 bitstream_get_byte(void);
-
-uint_8 *bitstream_get_buffer_start(void);
-void bitstream_buffer_frame(uint_32 frame_size);
-
-uint_32 bitstream_get_bh(uint_32 num_bits);
-
-static inline uint_32
-bitstream_get(uint_32 num_bits)
+static inline uint32_t bitstream_get (uint32_t num_bits)
{
- uint_32 result;
-
- if(num_bits < bits_left)
- {
- result = (current_word << (32 - bits_left)) >> (32 - num_bits);
+ uint32_t result;
+
+ if (num_bits < bits_left) {
+ result = (current_word << (64 - bits_left)) >> (64 - num_bits);
bits_left -= num_bits;
return result;
}
- return bitstream_get_bh(num_bits);
+ return bitstream_get_bh (num_bits);
}
-
diff --git a/ac3dec/bswap.h b/ac3dec/bswap.h
new file mode 100644
index 0000000..73d398a
--- /dev/null
+++ b/ac3dec/bswap.h
@@ -0,0 +1,80 @@
+/*****
+*
+* This file is part of the OMS program.
+*
+* This program is free software; you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation; either version 2, or (at your option)
+* any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program; see the file COPYING. If not, write to
+* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+*
+*****/
+
+#ifndef __BSWAP_H__
+#define __BSWAP_H__
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifdef HAVE_BYTESWAP_H
+#include <byteswap.h>
+#else
+
+#include <inttypes.h>
+
+#ifdef WORDS_BIGENDIAN
+// FIXME these need to actually swap ;)
+#define bswap_16(x) (x)
+#define bswap_32(x) (x)
+#define bswap_64(x) (x)
+#else
+// This is wrong, 'cannot take address of ...'
+#define bswap_16(x) ((((uint8_t*)&x)[2] << 8) \
+ | (((uint8_t*)&x)[3]))
+
+// code from bits/byteswap.h (C) 1997, 1998 Free Software Foundation, Inc.
+#define bswap_32(x) \
+ ((((x) & 0xff000000) >> 24) | (((x) & 0x00ff0000) >> 8) | \
+ (((x) & 0x0000ff00) << 8) | (((x) & 0x000000ff) << 24))
+
+#define bswap_64(x) \
+ (__extension__ \
+ ({ union { __extension__ unsigned long long int __ll; \
+ unsigned long int __l[2]; } __w, __r; \
+ __w.__ll = (x); \
+ __r.__l[0] = bswap_32 (__w.__l[1]); \
+ __r.__l[1] = bswap_32 (__w.__l[0]); \
+ __r.__ll; }))
+#endif
+
+#endif
+
+// be2me ... BigEndian to MachineEndian
+// le2me ... LittleEndian to MachineEndian
+
+#ifdef WORDS_BIGENDIAN
+#define be2me_16(x) (x)
+#define be2me_32(x) (x)
+#define be2me_64(x) (x)
+#define le2me_16(x) bswap_16(x)
+#define le2me_32(x) bswap_32(x)
+#define le2me_64(x) bswap_64(x)
+#else
+#define be2me_16(x) bswap_16(x)
+#define be2me_32(x) bswap_32(x)
+#define be2me_64(x) bswap_64(x)
+#define le2me_16(x) (x)
+#define le2me_32(x) (x)
+#define le2me_64(x) (x)
+#endif
+
+#endif
diff --git a/ac3dec/cmplx.h b/ac3dec/cmplx.h
new file mode 100644
index 0000000..9f74721
--- /dev/null
+++ b/ac3dec/cmplx.h
@@ -0,0 +1,29 @@
+/*****
+*
+* This file is part of the OMS program.
+*
+* This program is free software; you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation; either version 2, or (at your option)
+* any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program; see the file COPYING. If not, write to
+* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+*
+*****/
+
+#ifndef __COMPLEX_H__
+#define __COMPLEX_H__
+
+typedef struct complex {
+ float re;
+ float im;
+} complex_t;
+
+#endif
diff --git a/ac3dec/coeff.c b/ac3dec/coeff.c
index b9f03ff..7d5579c 100644
--- a/ac3dec/coeff.c
+++ b/ac3dec/coeff.c
@@ -28,51 +28,183 @@
#include "ac3_internal.h"
-#include "decode.h"
#include "bitstream.h"
#include "dither.h"
#include "coeff.h"
+
//
//Lookup tables of 0.15 two's complement quantization values
//
-static const uint_16 q_1[3] =
+#define Q0 ((-2 << 15) / 3.0)
+#define Q1 (0)
+#define Q2 ((2 << 15) / 3.0)
+
+static const float q_1_0[ 32 ] =
{
- ( -2 << 15)/3, 0,( 2 << 15)/3
-};
+ Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,
+ Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,
+ Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,
+ 0,0,0,0,0
+};
-static const uint_16 q_2[5] =
+static const float q_1_1[ 32 ] =
{
- ( -4 << 15)/5,( -2 << 15)/5, 0,
- ( 2 << 15)/5,( 4 << 15)/5
-};
+ Q0,Q0,Q0,Q1,Q1,Q1,Q2,Q2,Q2,
+ Q0,Q0,Q0,Q1,Q1,Q1,Q2,Q2,Q2,
+ Q0,Q0,Q0,Q1,Q1,Q1,Q2,Q2,Q2,
+ 0,0,0,0,0
+};
-static const uint_16 q_3[7] =
+static const float q_1_2[ 32 ] =
+{
+ Q0,Q1,Q2,Q0,Q1,Q2,Q0,Q1,Q2,
+ Q0,Q1,Q2,Q0,Q1,Q2,Q0,Q1,Q2,
+ Q0,Q1,Q2,Q0,Q1,Q2,Q0,Q1,Q2,
+ 0,0,0,0,0
+};
+
+#undef Q0
+#undef Q1
+#undef Q2
+
+#define Q0 ((-4 << 15) / 5.0)
+#define Q1 ((-2 << 15) / 5.0)
+#define Q2 (0)
+#define Q3 ((2 << 15) / 5.0)
+#define Q4 ((4 << 15) / 5.0)
+
+static const float q_2_0[ 128 ] =
+{
+ Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,
+ Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,
+ Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,
+ Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,
+ Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,
+ Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,
+ Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,
+ Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,
+ Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,
+ Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,
+ 0,0,0
+};
+
+static const float q_2_1[ 128 ] =
+{
+ Q0,Q0,Q0,Q0,Q0,Q1,Q1,Q1,Q1,Q1,
+ Q2,Q2,Q2,Q2,Q2,Q3,Q3,Q3,Q3,Q3,
+ Q4,Q4,Q4,Q4,Q4,Q0,Q0,Q0,Q0,Q0,
+ Q1,Q1,Q1,Q1,Q1,Q2,Q2,Q2,Q2,Q2,
+ Q3,Q3,Q3,Q3,Q3,Q4,Q4,Q4,Q4,Q4,
+ Q0,Q0,Q0,Q0,Q0,Q1,Q1,Q1,Q1,Q1,
+ Q2,Q2,Q2,Q2,Q2,Q3,Q3,Q3,Q3,Q3,
+ Q4,Q4,Q4,Q4,Q4,Q0,Q0,Q0,Q0,Q0,
+ Q1,Q1,Q1,Q1,Q1,Q2,Q2,Q2,Q2,Q2,
+ Q3,Q3,Q3,Q3,Q3,Q4,Q4,Q4,Q4,Q4,
+ Q0,Q0,Q0,Q0,Q0,Q1,Q1,Q1,Q1,Q1,
+ Q2,Q2,Q2,Q2,Q2,Q3,Q3,Q3,Q3,Q3,
+ Q4,Q4,Q4,Q4,Q4,0,0,0
+ };
+
+static const float q_2_2[ 128 ] =
+ {
+ Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,
+ Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,
+ Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,
+ Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,
+ Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,
+ Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,
+ Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,
+ Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,
+ Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,
+ Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,
+ Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,
+ Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,
+ Q0,Q1,Q2,Q3,Q4,0,0,0
+};
+
+#undef Q0
+#undef Q1
+#undef Q2
+#undef Q3
+#undef Q4
+
+static const float q_3[7] =
{
- ( -6 << 15)/7,( -4 << 15)/7,( -2 << 15)/7, 0,
- ( 2 << 15)/7,( 4 << 15)/7,( 6 << 15)/7
+ (-6 << 15)/7.0, (-4 << 15)/7.0, (-2 << 15)/7.0, 0.0,
+ ( 2 << 15)/7.0, ( 4 << 15)/7.0, ( 6 << 15)/7.0
};
-static const uint_16 q_4[11] =
+#define Q0 ((-10 << 15) / 11.0)
+#define Q1 ((-8 << 15) / 11.0)
+#define Q2 ((-6 << 15) / 11.0)
+#define Q3 ((-4 << 15) / 11.0)
+#define Q4 ((-2 << 15) / 11.0)
+#define Q5 (0)
+#define Q6 ((2 << 15) / 11.0)
+#define Q7 ((4 << 15) / 11.0)
+#define Q8 ((6 << 15) / 11.0)
+#define Q9 ((8 << 15) / 11.0)
+#define QA ((10 << 15) / 11.0)
+
+static const float q_4_0[ 128 ] =
{
- (-10 << 15)/11,(-8 << 15)/11,(-6 << 15)/11, ( -4 << 15)/11,(-2 << 15)/11, 0,
- ( 2 << 15)/11,( 4 << 15)/11,( 6 << 15)/11, ( 8 << 15)/11,(10 << 15)/11
+ Q0, Q0, Q0, Q0, Q0, Q0, Q0, Q0, Q0, Q0, Q0,
+ Q1, Q1, Q1, Q1, Q1, Q1, Q1, Q1, Q1, Q1, Q1,
+ Q2, Q2, Q2, Q2, Q2, Q2, Q2, Q2, Q2, Q2, Q2,
+ Q3, Q3, Q3, Q3, Q3, Q3, Q3, Q3, Q3, Q3, Q3,
+ Q4, Q4, Q4, Q4, Q4, Q4, Q4, Q4, Q4, Q4, Q4,
+ Q5, Q5, Q5, Q5, Q5, Q5, Q5, Q5, Q5, Q5, Q5,
+ Q6, Q6, Q6, Q6, Q6, Q6, Q6, Q6, Q6, Q6, Q6,
+ Q7, Q7, Q7, Q7, Q7, Q7, Q7, Q7, Q7, Q7, Q7,
+ Q8, Q8, Q8, Q8, Q8, Q8, Q8, Q8, Q8, Q8, Q8,
+ Q9, Q9, Q9, Q9, Q9, Q9, Q9, Q9, Q9, Q9, Q9,
+ QA, QA, QA, QA, QA, QA, QA, QA, QA, QA, QA,
+ 0, 0, 0, 0, 0, 0, 0
+ };
+
+static const float q_4_1[ 128 ] =
+{
+ Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
+ Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
+ Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
+ Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
+ Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
+ Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
+ Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
+ Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
+ Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
+ Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
+ Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
+ 0, 0, 0, 0, 0, 0, 0
};
-static const uint_16 q_5[15] =
+#undef Q0
+#undef Q1
+#undef Q2
+#undef Q3
+#undef Q4
+#undef Q5
+#undef Q6
+#undef Q7
+#undef Q8
+#undef Q9
+#undef QA
+
+static const float q_5[15] =
{
- (-14 << 15)/15,(-12 << 15)/15,(-10 << 15)/15,
- ( -8 << 15)/15,( -6 << 15)/15,( -4 << 15)/15,
- ( -2 << 15)/15, 0 ,( 2 << 15)/15,
- ( 4 << 15)/15,( 6 << 15)/15,( 8 << 15)/15,
- ( 10 << 15)/15,( 12 << 15)/15,( 14 << 15)/15
-};
+ (-14 << 15)/15.0,(-12 << 15)/15.0,(-10 << 15)/15.0,
+ ( -8 << 15)/15.0,( -6 << 15)/15.0,( -4 << 15)/15.0,
+ ( -2 << 15)/15.0, 0.0 ,( 2 << 15)/15.0,
+ ( 4 << 15)/15.0,( 6 << 15)/15.0,( 8 << 15)/15.0,
+ ( 10 << 15)/15.0,( 12 << 15)/15.0,( 14 << 15)/15.0
+};
//
// Scale factors for convert_to_float
//
-static const uint_32 u32_scale_factors[25] =
+static const uint32_t u32_scale_factors[25] =
{
0x38000000, //2 ^ -(0 + 15)
0x37800000, //2 ^ -(1 + 15)
@@ -104,230 +236,179 @@ static const uint_32 u32_scale_factors[25] =
static float *scale_factor = (float*)u32_scale_factors;
//These store the persistent state of the packed mantissas
-static uint_16 m_1[3];
-static uint_16 m_2[3];
-static uint_16 m_4[2];
-static uint_16 m_1_pointer;
-static uint_16 m_2_pointer;
-static uint_16 m_4_pointer;
+static float q_1[2];
+static float q_2[2];
+static float q_4[1];
+static int32_t q_1_pointer;
+static int32_t q_2_pointer;
+static int32_t q_4_pointer;
+static float __inline__
+coeff_get_float(uint16_t bap, uint16_t dithflag, uint16_t exp);
//Conversion from bap to number of bits in the mantissas
//zeros account for cases 0,1,2,4 which are special cased
-static uint_16 qnttztab[16] = { 0, 0, 0, 3, 0 , 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16};
-
-static void coeff_reset(void);
-static sint_16 coeff_get_mantissa(uint_16 bap, uint_16 dithflag);
-static void coeff_uncouple_ch(float samples[],bsi_t *bsi,audblk_t *audblk,uint_32 ch);
-
-//
-// Convert a 0.15 fixed point number into IEEE single
-// precision floating point and scale by 2^-exp
-//
-static inline float
-convert_to_float(uint_16 exp, sint_16 mantissa)
+static uint16_t qnttztab[16] =
{
- float x;
-
- //the scale by 2^-15 is built into the scale factor table
- x = mantissa * scale_factor[exp];
+ 0, 0, 0, 3,
+ 0, 4, 5, 6,
+ 7, 8, 9, 10,
+ 11, 12, 14, 16
+};
- return x;
-}
+static void coeff_reset(void);
+static float coeff_get_float(uint16_t bap, uint16_t dithflag, uint16_t exp);
+static void coeff_uncouple_ch(float samples[],bsi_t *bsi,audblk_t *audblk,uint32_t ch);
-void
-coeff_unpack(bsi_t *bsi, audblk_t *audblk, stream_samples_t samples)
+void coeff_unpack(bsi_t *bsi, audblk_t *audblk, stream_samples_t samples)
{
- uint_16 i,j;
- uint_32 done_cpl = 0;
- sint_16 mantissa;
+ uint16_t i,j;
+ uint32_t done_cpl = 0;
coeff_reset();
- for(i=0; i< bsi->nfchans; i++)
- {
+ for(i=0; i< bsi->nfchans; i++) {
for(j=0; j < audblk->endmant[i]; j++)
- {
- mantissa = coeff_get_mantissa(audblk->fbw_bap[i][j],audblk->dithflag[i]);
- samples[i][j] = convert_to_float(audblk->fbw_exp[i][j],mantissa);
- }
+ samples[i][j] = coeff_get_float(audblk->fbw_bap[i][j], audblk->dithflag[i], audblk->fbw_exp[i][j]);
- if(audblk->cplinu && audblk->chincpl[i] && !(done_cpl))
- {
+ if(audblk->cplinu && audblk->chincpl[i] && !(done_cpl)) {
// ncplmant is equal to 12 * ncplsubnd
- // Don't dither coupling channel until channel separation so that
- // interchannel noise is uncorrelated
+ // Don't dither coupling channel until channel
+ // separation so that interchannel noise is uncorrelated
for(j=audblk->cplstrtmant; j < audblk->cplendmant; j++)
- audblk->cplmant[j] = coeff_get_mantissa(audblk->cpl_bap[j],0);
+ audblk->cpl_flt[j] = coeff_get_float(audblk->cpl_bap[j],0, audblk->cpl_exp[j]);
done_cpl = 1;
}
}
//uncouple the channel if necessary
- if(audblk->cplinu)
- {
- for(i=0; i< bsi->nfchans; i++)
- {
+ if(audblk->cplinu) {
+ for(i=0; i< bsi->nfchans; i++) {
if(audblk->chincpl[i])
coeff_uncouple_ch(samples[i],bsi,audblk,i);
}
}
- if(bsi->lfeon)
- {
+ if(bsi->lfeon) {
// There are always 7 mantissas for lfe, no dither for lfe
for(j=0; j < 7 ; j++)
- {
- mantissa = coeff_get_mantissa(audblk->lfe_bap[j],0);
- samples[5][j] = convert_to_float(audblk->lfe_exp[j],mantissa);
- }
+ samples[5][j] = coeff_get_float(audblk->lfe_bap[j], 0, audblk->lfe_exp[j]);
}
}
-//
-//Fetch a mantissa from the bitstream
-//
-//The mantissa returned is a signed 0.15 fixed point number
-//
-static sint_16
-coeff_get_mantissa(uint_16 bap, uint_16 dithflag)
+
+/**
+ * Fetch a float from the bitstream
+ **/
+
+static float inline coeff_get_float (uint16_t bap, uint16_t dithflag, uint16_t exp)
{
- uint_16 mantissa;
- uint_16 group_code;
+ uint16_t dummy = 0;
//If the bap is 0-5 then we have special cases to take care of
- switch(bap)
- {
+ switch(bap) {
case 0:
if(dithflag)
- mantissa = dither_gen();
- else
- mantissa = 0;
- break;
+ return (dither_gen() * scale_factor[exp]);
+
+ return 0.0;
case 1:
- if(m_1_pointer > 2)
- {
- group_code = bitstream_get(5);
-
- if(group_code > 26)
- goto error;
-
- m_1[0] = group_code / 9;
- m_1[1] = (group_code % 9) / 3;
- m_1[2] = (group_code % 9) % 3;
- m_1_pointer = 0;
- }
- mantissa = m_1[m_1_pointer++];
- mantissa = q_1[mantissa];
- break;
+ if (q_1_pointer >= 0)
+ return(q_1[q_1_pointer--] * scale_factor[exp]);
+
+ if ((dummy = bitstream_get (5)) > 26)
+ goto error;
+
+ q_1[1] = q_1_1[dummy];
+ q_1[0] = q_1_2[dummy];
+ q_1_pointer = 1;
+
+ return (q_1_0[dummy] * scale_factor[exp]);
+
case 2:
+ if(q_2_pointer >= 0)
+ return (q_2[q_2_pointer--] * scale_factor[exp]);
- if(m_2_pointer > 2)
- {
- group_code = bitstream_get(7);
+ if ((dummy = bitstream_get (7)) > 124)
+ goto error;
- if(group_code > 124)
- goto error;
+ q_2[1] = q_2_1[dummy];
+ q_2[0] = q_2_2[dummy];
+ q_2_pointer = 1;
- m_2[0] = group_code / 25;
- m_2[1] = (group_code % 25) / 5 ;
- m_2[2] = (group_code % 25) % 5 ;
- m_2_pointer = 0;
- }
- mantissa = m_2[m_2_pointer++];
- mantissa = q_2[mantissa];
- break;
+ return (q_2_0[dummy] * scale_factor[exp]);
case 3:
- mantissa = bitstream_get(3);
-
- if(mantissa > 6)
+ if ((dummy = bitstream_get (3)) > 6)
goto error;
- mantissa = q_3[mantissa];
- break;
+ return (q_3[dummy] * scale_factor[exp]);
case 4:
- if(m_4_pointer > 1)
- {
- group_code = bitstream_get(7);
+ if(q_4_pointer >= 0)
+ return (q_4[q_4_pointer--] * scale_factor[exp]);
- if(group_code > 120)
- goto error;
+ if ((dummy = bitstream_get (7)) > 120)
+ goto error;
- m_4[0] = group_code / 11;
- m_4[1] = group_code % 11;
- m_4_pointer = 0;
- }
- mantissa = m_4[m_4_pointer++];
- mantissa = q_4[mantissa];
- break;
+ q_4[0] = q_4_1[dummy];
+ q_4_pointer = 0;
- case 5:
- mantissa = bitstream_get(4);
+ return (q_4_0[dummy] * scale_factor[exp]);
- if(mantissa > 14)
+ case 5:
+ if ((dummy = bitstream_get (4)) > 14)
goto error;
- mantissa = q_5[mantissa];
- break;
+ return (q_5[dummy] * scale_factor[exp]);
default:
- mantissa = bitstream_get(qnttztab[bap]);
- mantissa <<= 16 - qnttztab[bap];
+ dummy = bitstream_get(qnttztab[bap]);
+ dummy <<= 16 - qnttztab[bap];
+ return ((int16_t)dummy * scale_factor[exp]);
}
- return mantissa;
-
-
-
error:
- if(!error_flag)
- fprintf(stderr,"** Invalid mantissa - skipping frame **\n");
- error_flag = 1;
-
- return 0;
+#ifdef DEBUG
+ fprintf(stderr,"** Invalid mantissa - skipping frame **\n");
+#endif
+ HANDLE_ERROR();
}
-//
-// Reset the mantissa state
-//
-static void
-coeff_reset(void)
+
+/**
+ * Reset the mantissa state
+ **/
+
+static void coeff_reset(void)
{
- m_1[2] = m_1[1] = m_1[0] = 0;
- m_2[2] = m_2[1] = m_2[0] = 0;
- m_4[1] = m_4[0] = 0;
- m_1_pointer = m_2_pointer = m_4_pointer = 3;
+ q_1_pointer = q_2_pointer = q_4_pointer = -1;
}
-//
-// Uncouple the coupling channel into a fbw channel
-//
-static void
-coeff_uncouple_ch(float samples[],bsi_t *bsi,audblk_t *audblk,uint_32 ch)
+
+/**
+ * Uncouple the coupling channel into a fbw channel
+ **/
+
+static void coeff_uncouple_ch (float samples[],bsi_t *bsi,audblk_t *audblk,uint32_t ch)
{
- uint_32 bnd = 0;
- uint_32 sub_bnd = 0;
- uint_32 i,j;
+ uint32_t bnd = 0;
+ uint32_t sub_bnd = 0;
+ uint32_t i,j;
float cpl_coord = 1.0;
- uint_32 cpl_exp_tmp;
- uint_32 cpl_mant_tmp;
- sint_16 mantissa;
-
+ uint32_t cpl_exp_tmp;
+ uint32_t cpl_mant_tmp;
- for(i=audblk->cplstrtmant;i<audblk->cplendmant;)
- {
- if(!audblk->cplbndstrc[sub_bnd++])
- {
+ for (i=audblk->cplstrtmant;i<audblk->cplendmant;) {
+ if (!audblk->cplbndstrc[sub_bnd++]) {
cpl_exp_tmp = audblk->cplcoexp[ch][bnd] + 3 * audblk->mstrcplco[ch];
- if(audblk->cplcoexp[ch][bnd] == 15)
+ if (audblk->cplcoexp[ch][bnd] == 15)
cpl_mant_tmp = (audblk->cplcomant[ch][bnd]) << 11;
else
cpl_mant_tmp = ((0x10) | audblk->cplcomant[ch][bnd]) << 10;
- cpl_coord = convert_to_float(cpl_exp_tmp,cpl_mant_tmp) * 8.0f;
+ cpl_coord = (cpl_mant_tmp * scale_factor[cpl_exp_tmp]) * 8.0f;
//Invert the phase for the right channel if necessary
if(bsi->acmod == 0x2 && audblk->phsflginu && ch == 1 && audblk->phsflg[bnd])
@@ -336,16 +417,13 @@ coeff_uncouple_ch(float samples[],bsi_t *bsi,audblk_t *audblk,uint_32 ch)
bnd++;
}
- for(j=0;j < 12; j++)
- {
- //Get new dither values for each channel if necessary, so
- //the channels are uncorrelated
- if(audblk->dithflag[ch] && audblk->cpl_bap[i] == 0)
- mantissa = dither_gen();
+ for(j=0;j < 12; j++) {
+ // Get new dither values for each channel if necessary,
+ // so the channels are uncorrelated
+ if(audblk->dithflag[ch] && !audblk->cpl_bap[i])
+ samples[i] = cpl_coord * (dither_gen() * scale_factor[audblk->cpl_exp[i]]);
else
- mantissa = audblk->cplmant[i];
-
- samples[i] = cpl_coord * convert_to_float(audblk->cpl_exp[i],mantissa);
+ samples[i] = cpl_coord * audblk->cpl_flt[i];
i++;
}
diff --git a/ac3dec/cpu_accel.c b/ac3dec/cpu_accel.c
new file mode 100644
index 0000000..e01db83
--- /dev/null
+++ b/ac3dec/cpu_accel.c
@@ -0,0 +1,129 @@
+/*****
+*
+* This file is part of the OMS program.
+*
+* This program is free software; you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation; either version 2, or (at your option)
+* any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program; see the file COPYING. If not, write to
+* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+*
+*****/
+
+#include <inttypes.h>
+#include "mm_accel.h"
+
+//#ifdef __i386__
+#if 0
+
+#ifdef __PIC__
+#define cpuid(op, eax, ebx, ecx, edx) \
+ __asm__ __volatile__ ("pushl %%ebx\n\t" \
+ "cpuid\n\t" \
+ "movl %%ebx, %%esi\n\t" \
+ "popl %%ebx" \
+ : "=a" (eax), \
+ "=S" (ebx), \
+ "=c" (ecx), \
+ "=d" (edx) \
+ : "a" (op) \
+ : "cc")
+#else
+#define cpuid(op, eax, ebx, ecx, edx) \
+ __asm__("cpuid" \
+ : "=a" (eax), \
+ "=b" (ebx), \
+ "=c" (ecx), \
+ "=d" (edx) \
+ : "a" (op) \
+ : "cc")
+#endif
+
+static inline int has_cpuid ()
+{
+ return 1;
+/*
+ uint32_t eax, ebx;
+
+ asm ("pushfl\n\t"
+ "popl %0\n\t"
+ "movl %0,%1\n\t"
+ "xorl $0x200000,%0\n\t"
+ "pushl %0\n\t"
+ "popfl\n\t"
+ "pushfl\n\t"
+ "popl %0"
+ : "=a" (eax),
+ "=b" (ebx)
+ :
+ : "cc");
+
+ return (eax != ebx);
+*/
+}
+
+
+static uint32_t x86_accel (void)
+{
+ uint32_t eax, ebx, ecx, edx;
+ int AMD;
+ uint32_t caps;
+
+ if (!has_cpuid ()) // no cpuid
+ return 0;
+
+ cpuid (0x00000000, eax, ebx, ecx, edx);
+ if (!eax) // vendor string only
+ return 0;
+
+ AMD = (ebx == 0x68747541) && (ecx == 0x444d4163) && (edx == 0x69746e65);
+
+ cpuid (0x00000001, eax, ebx, ecx, edx);
+ if (! (edx & 0x00800000)) // no MMX
+ return 0;
+
+ caps = OMS_ACCEL_X86_MMX;
+ if (edx & 0x02000000) // SSE - identical to AMD MMX extensions
+ caps = OMS_ACCEL_X86_MMX | OMS_ACCEL_X86_MMXEXT;
+
+ cpuid (0x80000000, eax, ebx, ecx, edx);
+ if (eax < 0x80000001) // no extended capabilities
+ return caps;
+
+ cpuid (0x80000001, eax, ebx, ecx, edx);
+
+ if (edx & 0x80000000)
+ caps |= OMS_ACCEL_X86_3DNOW;
+
+ if (AMD && (edx & 0x00400000)) // AMD MMX extensions
+ caps |= OMS_ACCEL_X86_MMXEXT;
+
+ return caps;
+}
+#endif
+
+uint32_t mm_accel (void)
+{
+//#ifdef __i386__
+#if 0
+ static int got_accel = 0;
+ static uint32_t accel;
+
+ if (!got_accel) {
+ got_accel = 1;
+ accel = x86_accel ();
+ }
+
+ return accel;
+#else
+ return 0;
+#endif
+}
diff --git a/ac3dec/crc.c b/ac3dec/crc.c
index 3210ce7..2d5ef80 100644
--- a/ac3dec/crc.c
+++ b/ac3dec/crc.c
@@ -30,7 +30,7 @@
#include "crc.h"
-static const uint_16 crc_lut[256] =
+static const uint16_t crc_lut[256] =
{
0x0000,0x8005,0x800f,0x000a,0x801b,0x001e,0x0014,0x8011,
0x8033,0x0036,0x003c,0x8039,0x0028,0x802d,0x8027,0x0022,
@@ -66,31 +66,31 @@ static const uint_16 crc_lut[256] =
0x8213,0x0216,0x021c,0x8219,0x0208,0x820d,0x8207,0x0202
};
-static uint_16 state;
+static uint16_t state;
-void
-crc_init(void)
+
+void crc_init(void)
{
state = 0;
}
-inline void crc_process_byte(uint_8 data)
+inline void crc_process_byte (uint8_t data)
{
state = crc_lut[data ^ (state>>8)] ^ (state<<8);
}
-void
-crc_process_frame(uint_8 *data,uint_32 num_bytes)
+
+void crc_process_frame (uint8_t *data,uint32_t num_bytes)
{
- uint_32 i;
+ uint32_t i;
- for(i=0;i<num_bytes;i++)
- crc_process_byte(data[i]);
+ for(i=0; i<num_bytes; i++)
+ crc_process_byte (data[i]);
}
-int
-crc_validate(void)
+
+int crc_validate(void)
{
- return(state == 0);
+ return (state == 0);
}
diff --git a/ac3dec/crc.h b/ac3dec/crc.h
index 07d57b1..1648965 100644
--- a/ac3dec/crc.h
+++ b/ac3dec/crc.h
@@ -23,5 +23,5 @@
int crc_validate(void);
void crc_init(void);
-void crc_process_byte(uint_8 data);
-void crc_process_frame(uint_8 *data,uint_32 num_bytes);
+void crc_process_byte(uint8_t data);
+void crc_process_frame(uint8_t *data,uint32_t num_bytes);
diff --git a/ac3dec/debug.c b/ac3dec/debug.c
index b7d6a3b..b84511d 100644
--- a/ac3dec/debug.c
+++ b/ac3dec/debug.c
@@ -31,18 +31,8 @@ static int debug_level = -1;
// We could potentially have multiple levels of debug info
int debug_is_on(void)
{
- char *env_var;
-
- if(debug_level < 0)
- {
- env_var = getenv("AC3_DEBUG");
-
- if (env_var)
- {
- debug_level = 1;
- }
- else
- debug_level = 0;
+ if(debug_level < 0) {
+ debug_level = getenv ("AC3_DEBUG") ? 1 : 0;
}
return debug_level;
@@ -52,7 +42,7 @@ int debug_is_on(void)
#ifndef __GNUC__
void dprintf(char fmt[],...)
{
- int foo = 0;
+ return;
}
#endif
diff --git a/ac3dec/debug.h b/ac3dec/debug.h
index f45cb5b..691d403 100644
--- a/ac3dec/debug.h
+++ b/ac3dec/debug.h
@@ -22,16 +22,20 @@
*
*/
-int debug_is_on(void);
+int debug_is_on (void);
#ifdef __GNUC__
-#define dprintf(format,args...)\
+#ifdef DEBUG
+#define dprintf(args...)\
{\
if (debug_is_on())\
{\
- fprintf(stderr,format,## args);\
+ fprintf(stderr, args);\
}\
}
#else
+#define dprintf(args...) { };
+#endif
+#else
void dprintf(char fmt[],...);
#endif
diff --git a/ac3dec/decode.c b/ac3dec/decode.c
index 5e7f034..fb85205 100644
--- a/ac3dec/decode.c
+++ b/ac3dec/decode.c
@@ -3,9 +3,6 @@
*
* Copyright (C) Aaron Holtzman - May 1999
*
- * Added support for DVB-s PCI card by:
- * Matjaz Thaler <matjaz.thaler@rd.iskraemeco.si> - November 2000
- *
* This file is part of ac3dec, a free Dolby AC-3 stream decoder.
*
* ac3dec is free software; you can redistribute it and/or modify
@@ -22,6 +19,17 @@
* along with GNU Make; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
*
+ *------------------------------------------------------------
+ *
+ * Thomas Mirlacher <dent@cosy.sbg.ac.at>
+ * added OMS support
+ * 11 Jan 2001
+ * Thomas Mirlacher <dent@cosy.sbg.ac.at>
+ * faster error response using jmp functions
+ *
+ * 9 Aug 2001
+ * Matjaz Thaler <matjaz.thaler@rd.iskraemeco.si>
+ * Added support for DVB-s PCI card
*
*/
@@ -34,14 +42,17 @@
#include <errno.h>
#include <string.h>
#include <sys/time.h>
-#include <unistd.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
+
+#ifdef __OMS__
+#include <oms/oms.h>
+#include <oms/plugin/output_audio.h>
+#endif
#include "ac3.h"
#include "ac3_internal.h"
#include "bitstream.h"
+#include "downmix.h"
+#include "srfft.h"
#include "imdct.h"
#include "exponent.h"
#include "coeff.h"
@@ -51,96 +62,91 @@
#include "stats.h"
#include "rematrix.h"
#include "sanity_check.h"
-#include "downmix.h"
#include "debug.h"
-
-#define AC3_BUFFER_SIZE (6*1024*16)
-
+#ifndef __OMS__
+//#include "audio_out.h"
+#endif
//our global config structure
ac3_config_t ac3_config;
-uint_32 error_flag = 0;
static audblk_t audblk;
static bsi_t bsi;
static syncinfo_t syncinfo;
-static uint_32 frame_count = 0;
-//static uint_32 is_output_initialized = 0;
+#ifndef __OMS__
+static uint32_t done_banner;
+#endif
+static uint32_t is_output_initialized = 0;
//the floating point samples for one audblk
static stream_samples_t samples;
//the integer samples for the entire frame (with enough space for 2 ch out)
//if this size change, be sure to change the size when muting
-static sint_16 s16_samples[2 * 6 * 256];
-
+static int16_t s16_samples[2 * 6 * 256] __attribute__ ((aligned(16)));
+// downmix stuff
+static float cmixlev_lut[4] = { 0.707, 0.595, 0.500, 0.707 };
+static float smixlev_lut[4] = { 0.707, 0.500, 0.0 , 0.500 };
+static dm_par_t dm_par;
+
//Storage for the syncframe
-#define SYNC_BUFFER_MAX_SIZE 4096
-static uint_8 sync_buffer[SYNC_BUFFER_MAX_SIZE];
-static uint_32 sync_buffer_size = 0;;
+#define BUFFER_MAX_SIZE 4096
+static uint8_t buffer[BUFFER_MAX_SIZE];
+static uint32_t buffer_size = 0;;
+// for error handling
+jmp_buf error_jmp_mark;
-uint_32
-decode_sync_buffer_syncframe(syncinfo_t *syncinfo, uint_8 **start,uint_8 *end)
+static uint32_t decode_buffer_syncframe (syncinfo_t *syncinfo, uint8_t **start, uint8_t *end)
{
- uint_8 *cur = *start;
- uint_16 syncword = syncinfo->syncword;
- uint_32 ret = 0;
+ uint8_t *cur = *start;
+ uint16_t syncword = syncinfo->syncword;
+ uint32_t ret = 0;
- //
// Find an ac3 sync frame.
- //
-resync:
-
- while(syncword != 0x0b77)
- {
- if(cur >= end)
+ while (syncword != 0x0b77) {
+ if (cur >= end)
goto done;
syncword = (syncword << 8) + *cur++;
}
//need the next 3 bytes to decide how big the frame is
- while(sync_buffer_size < 3)
- {
+ while (buffer_size < 3) {
if(cur >= end)
goto done;
- sync_buffer[sync_buffer_size++] = *cur++;
+ buffer[buffer_size++] = *cur++;
}
- parse_syncinfo(syncinfo,sync_buffer);
- stats_print_syncinfo(syncinfo);
+ parse_syncinfo (syncinfo,buffer);
+ stats_print_syncinfo (syncinfo);
- while(sync_buffer_size < syncinfo->frame_size * 2 - 2)
- {
+ while (buffer_size < syncinfo->frame_size * 2 - 2) {
if(cur >= end)
goto done;
- sync_buffer[sync_buffer_size++] = *cur++;
+ buffer[buffer_size++] = *cur++;
}
-
+
+#if 0
// Check the crc over the entire frame
crc_init();
- crc_process_frame(sync_buffer,syncinfo->frame_size * 2 - 2);
+ crc_process_frame (buffer, syncinfo->frame_size * 2 - 2);
- if(!crc_validate())
- {
+ if (!crc_validate()) {
fprintf(stderr,"** CRC failed - skipping frame **\n");
- syncword = 0xffff;
- sync_buffer_size = 0;
- goto resync;
+ goto done;
}
-
- //
+#endif
+
//if we got to this point, we found a valid ac3 frame to decode
- //
- bitstream_init(sync_buffer);
+ bitstream_init (buffer);
//get rid of the syncinfo struct as we already parsed it
- bitstream_get(24);
+ bitstream_get (24);
//reset the syncword for next time
syncword = 0xffff;
- sync_buffer_size = 0;
+ buffer_size = 0;
ret = 1;
done:
@@ -149,100 +155,131 @@ done:
return ret;
}
-void
-decode_mute(void)
+
+void inline decode_mute (void)
{
- fprintf(stderr,"muting frame\n");
//mute the frame
- memset(s16_samples,0,sizeof(sint_16) * 256 * 2 * 6);
- error_flag = 0;
+ memset (s16_samples, 0, sizeof(int16_t) * 256 * 2 * 6);
}
-void
-ac3_init(ac3_config_t *config)
+void ac3dec_init (void)
{
- memcpy(&ac3_config,config,sizeof(ac3_config_t));
-
- imdct_init();
- sanity_check_init(&syncinfo,&bsi,&audblk);
-
- // ac3_output = *foo;
+// FIXME - don't do that statically here
+ ac3_config.num_output_ch = 2;
+ ac3_config.flags = 0;
+
+ imdct_init ();
+ downmix_init ();
+ memset (&syncinfo, 0, sizeof (syncinfo));
+ memset (&bsi, 0, sizeof (bsi));
+ memset (&audblk, 0, sizeof (audblk));
+ sanity_check_init (&syncinfo,&bsi,&audblk);
}
-uint_32 ac3_decode_data(uint_8 *data_start,uint_8 *data_end, int ac3reset, int *input_pointer, int *output_pointer, char *ac3_data)
+#ifdef __OMS__
+size_t ac3dec_decode_data (plugin_output_audio_t *output, uint8_t *data_start, uint8_t *data_end)
+#else
+size_t ac3dec_decode_data (uint8_t *data_start ,uint8_t *data_end, int ac3reset, int *input_pointer, int *output_pointer, char *ac3_data)
+#endif
{
- uint_32 i;
+ uint32_t i;
int datasize;
char *data;
+
if(ac3reset != 0){
syncinfo.syncword = 0xffff;
- sync_buffer_size = 0;
+ buffer_size = 0;
+ }
+
+ if (setjmp (error_jmp_mark) < 0) {
+ ac3dec_init ();
+ return 0;
}
-
- while(decode_sync_buffer_syncframe(&syncinfo,&data_start,data_end))
- {
- dprintf("(decode) begin frame %d\n",frame_count++);
- if(error_flag)
- {
- decode_mute();
- continue;
+ while (decode_buffer_syncframe (&syncinfo, &data_start, data_end)) {
+ parse_bsi (&bsi);
+
+#ifndef __OMS__
+ if(!done_banner) {
+ // stats_print_banner(&syncinfo,&bsi);
+ done_banner = 1;
}
+#endif
+
+ // compute downmix parameters
+ // downmix to tow channels for now
+ dm_par.clev = 0.0; dm_par.slev = 0.0; dm_par.unit = 1.0;
+ if (bsi.acmod & 0x1) // have center
+ dm_par.clev = cmixlev_lut[bsi.cmixlev];
+
+ if (bsi.acmod & 0x4) // have surround channels
+ dm_par.slev = smixlev_lut[bsi.surmixlev];
- parse_bsi(&bsi);
+ dm_par.unit /= 1.0 + dm_par.clev + dm_par.slev;
+ dm_par.clev *= dm_par.unit;
+ dm_par.slev *= dm_par.unit;
- for(i=0; i < 6; i++)
- {
+ for(i=0; i < 6; i++) {
//Initialize freq/time sample storage
- memset(samples,0,sizeof(float) * 256 * (bsi.nfchans + bsi.lfeon));
+ memset (samples, 0, sizeof(float) * 256 * (bsi.nfchans + bsi.lfeon));
// Extract most of the audblk info from the bitstream
// (minus the mantissas
- parse_audblk(&bsi,&audblk);
+ parse_audblk (&bsi,&audblk);
// Take the differential exponent data and turn it into
// absolute exponents
- exponent_unpack(&bsi,&audblk);
- if(error_flag)
- goto error;
+ exponent_unpack (&bsi,&audblk);
// Figure out how many bits per mantissa
- bit_allocate(syncinfo.fscod,&bsi,&audblk);
+ bit_allocate (syncinfo.fscod,&bsi,&audblk);
// Extract the mantissas from the stream and
// generate floating point frequency coefficients
- coeff_unpack(&bsi,&audblk,samples);
- if(error_flag)
- goto error;
+ coeff_unpack (&bsi,&audblk,samples);
- if(bsi.acmod == 0x2)
- rematrix(&audblk,samples);
+ if (bsi.acmod == 0x2)
+ rematrix (&audblk,samples);
// Convert the frequency samples into time samples
- imdct(&bsi,&audblk,samples);
+ imdct (&bsi,&audblk,samples, &s16_samples[i * 2 * 256], &dm_par);
// Downmix into the requested number of channels
- // and convert floating point to sint_16
- downmix(&bsi,samples,&s16_samples[i * 2 * 256]);
+ // and convert floating point to int16_t
+ // downmix(&bsi,samples,&s16_samples[i * 2 * 256]);
- sanity_check(&syncinfo,&bsi,&audblk);
- if(error_flag)
- goto error;
+ if (sanity_check(&syncinfo,&bsi,&audblk) < 0) {
+ HANDLE_ERROR ();
+ return 0;
+ }
continue;
}
-
- parse_auxdata(&syncinfo);
- /*
- if(!is_output_initialized)
- {
- ac3_output.open(16,syncinfo.sampling_rate,2);
+ if (!is_output_initialized) {
+#ifdef __OMS__
+ plugin_output_audio_attr_t attr;
+#ifdef __sun__
+ attr.format = 16;
+#else
+ attr.format = AFMT_S16_NE;
+#endif
+ attr.speed = syncinfo.sampling_rate;
+ attr.channels = 2;
+
+ // output->setup (&attr);
+#else
+ // ao_functions->open (16, syncinfo.sampling_rate, 2);
+#endif
is_output_initialized = 1;
}
- */
+
+#ifdef __OMS__
+ output->write (s16_samples, 256 * 6 * 2 * 2);
+#else
+ // ao_functions->play(s16_samples, 256 * 6 * 2);
data = (char *)s16_samples;
datasize = 0;
while(datasize < 6144){
@@ -257,13 +294,11 @@ uint_32 ac3_decode_data(uint_8 *data_start,uint_8 *data_end, int ac3reset, int *
}
}
+#endif
- //write(1, s16_samples, 256 * 6 * 2* 2);
- //ac3_output.play(s16_samples, 256 * 6 * 2);
-error:
- ;
- //find a new frame
}
+ decode_mute ();
+
return 0;
}
diff --git a/ac3dec/dither.c b/ac3dec/dither.c
index 31e74f6..079c04d 100644
--- a/ac3dec/dither.c
+++ b/ac3dec/dither.c
@@ -31,7 +31,7 @@
#include "dither.h"
-const uint_16 dither_lut[256] =
+const uint16_t dither_lut[256] =
{
0x0000, 0xa011, 0xe033, 0x4022, 0x6077, 0xc066, 0x8044, 0x2055,
0xc0ee, 0x60ff, 0x20dd, 0x80cc, 0xa099, 0x0088, 0x40aa, 0xe0bb,
@@ -67,7 +67,7 @@ const uint_16 dither_lut[256] =
0x8bf4, 0x2be5, 0x6bc7, 0xcbd6, 0xeb83, 0x4b92, 0x0bb0, 0xaba1
};
-uint_16 lfsr_state = 1;
+uint16_t lfsr_state = 1;
//
// see dither_gen (inline-able) in dither.h
@@ -89,18 +89,17 @@ uint_16 lfsr_state = 1;
*
*/
-uint_16 dither_gen(void)
+uint16_t dither_gen(void)
{
int i;
- uint_32 state;
+ uint32_t state;
//explicitly bring the state into a local var as gcc > 3.0?
//doesn't know how to optimize out the stores
state = lfsr_state;
//Generate eight pseudo random bits
- for(i=0;i<8;i++)
- {
+ for(i=0;i<8;i++) {
state <<= 1;
if(state & 0x10000)
@@ -109,7 +108,7 @@ uint_16 dither_gen(void)
lfsr_state = state;
- return (((((sint_32)state<<8)>>8) * (sint_32) (0.707106 * 256.0))>>16);
+ return (((((int32_t)state<<8)>>8) * (int32_t) (0.707106 * 256.0))>>16);
}
#endif
diff --git a/ac3dec/dither.h b/ac3dec/dither.h
index 6d68e1b..abb9f51 100644
--- a/ac3dec/dither.h
+++ b/ac3dec/dither.h
@@ -22,16 +22,16 @@
*/
-extern uint_16 lfsr_state;
-extern const uint_16 dither_lut[256];
+extern uint16_t lfsr_state;
+extern const uint16_t dither_lut[256];
-static inline uint_16 dither_gen(void)
+static inline uint16_t dither_gen(void)
{
- sint_16 state;
+ int16_t state;
state = dither_lut[lfsr_state >> 8] ^ (lfsr_state << 8);
- lfsr_state = (uint_16) state;
+ lfsr_state = (uint16_t) state;
- return ((state * (sint_32) (0.707106 * 256.0))>>8);
+ return ((state * (int32_t) (0.707106 * 256.0))>>8);
}
diff --git a/ac3dec/downmix.c b/ac3dec/downmix.c
index 94bc51a..8eac3e2 100644
--- a/ac3dec/downmix.c
+++ b/ac3dec/downmix.c
@@ -1,10 +1,7 @@
/*
- *
- * downmix.c
+ * imdct.c
*
- * Copyright (C) Aaron Holtzman - Sept 1999
- *
- * Originally based on code by Yuqing Deng.
+ * Copyright (C) Aaron Holtzman - May 1999
*
* This file is part of ac3dec, a free Dolby AC-3 stream decoder.
*
@@ -28,401 +25,65 @@
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
+#include <mm_accel.h>
+
#include "ac3.h"
#include "ac3_internal.h"
-
-#include "decode.h"
-#include "downmix.h"
#include "debug.h"
+#include "downmix.h"
+#include "downmix_c.h"
+#include "downmix_i386.h"
+#ifdef HAVE_KNI
+#include "downmix_kni.h"
+#endif
+void (*downmix_3f_2r_to_2ch)(float *samples, dm_par_t * dm_par);
+void (*downmix_3f_1r_to_2ch)(float *samples, dm_par_t * dm_par);
+void (*downmix_2f_2r_to_2ch)(float *samples, dm_par_t * dm_par);
+void (*downmix_2f_1r_to_2ch)(float *samples, dm_par_t * dm_par);
+void (*downmix_3f_0r_to_2ch)(float *samples, dm_par_t * dm_par);
+void (*stream_sample_2ch_to_s16)(int16_t *s16_samples, float *left, float *right);
+void (*stream_sample_1ch_to_s16)(int16_t *s16_samples, float *center);
-//Pre-scaled downmix coefficients
-static float cmixlev_lut[4] = { 0.2928, 0.2468, 0.2071, 0.2468 };
-static float smixlev_lut[4] = { 0.2928, 0.2071, 0.0 , 0.2071 };
-static void
-downmix_3f_2r_to_2ch(bsi_t* bsi, stream_samples_t samples,sint_16 *s16_samples)
+void downmix_init()
{
- uint_32 j;
- float right_tmp;
- float left_tmp;
- float clev,slev;
- float *centre = 0, *left = 0, *right = 0, *left_sur = 0, *right_sur = 0;
-
- left = samples[0];
- centre = samples[1];
- right = samples[2];
- left_sur = samples[3];
- right_sur = samples[4];
-
- clev = cmixlev_lut[bsi->cmixlev];
- slev = smixlev_lut[bsi->surmixlev];
-
-#if defined DOLBY_SURROUND
- for (j = 0; j < 256; j++)
+#ifdef __i386__
+#ifdef HAVE_KNI
+ uint32_t accel = mm_accel ();
+
+// other dowmixing should go here too
+ if (accel & MM_ACCEL_X86_MMXEXT) {
+ dprintf("Using SSE for downmix\n");
+ downmix_3f_2r_to_2ch = downmix_3f_2r_to_2ch_kni;
+ downmix_2f_2r_to_2ch = downmix_2f_2r_to_2ch_kni;
+ downmix_3f_1r_to_2ch = downmix_3f_1r_to_2ch_kni;
+ downmix_2f_1r_to_2ch = downmix_2f_1r_to_2ch_kni;
+ downmix_3f_0r_to_2ch = downmix_3f_0r_to_2ch_kni;
+ stream_sample_2ch_to_s16 = stream_sample_2ch_to_s16_kni;
+ stream_sample_1ch_to_s16 = stream_sample_1ch_to_s16_kni;
+ } else if (accel & MM_ACCEL_X86_3DNOW) {
+ } else
+#endif
+#endif
{
- right_tmp = *left_sur++ + *right_sur++;
- left_tmp = 1.4142f * *left++ + *centre - right_tmp;
- right_tmp += 1.4142f * *right++ + *centre++;
-
- s16_samples[j * 2 ] = (sint_16) (left_tmp * 16000.0f);
- s16_samples[j * 2 + 1] = (sint_16) (right_tmp * 16000.0f);
- }
+ downmix_3f_2r_to_2ch = downmix_3f_2r_to_2ch_c;
+ downmix_2f_2r_to_2ch = downmix_2f_2r_to_2ch_c;
+ downmix_3f_1r_to_2ch = downmix_3f_1r_to_2ch_c;
+ downmix_2f_1r_to_2ch = downmix_2f_1r_to_2ch_c;
+ downmix_3f_0r_to_2ch = downmix_3f_0r_to_2ch_c;
+#ifdef __i386__
+#if 1
+ stream_sample_2ch_to_s16 = stream_sample_2ch_to_s16_c;
+ stream_sample_1ch_to_s16 = stream_sample_1ch_to_s16_c;
#else
- for (j = 0; j < 256; j++)
- {
- left_tmp = 0.4142f * *left++ + clev * *centre + slev * *left_sur++;
- right_tmp= 0.4142f * *right++ + clev * *centre++ + slev * *right_sur++;
-
- s16_samples[j * 2 ] = (sint_16) (left_tmp * 32767.0f);
- s16_samples[j * 2 + 1] = (sint_16) (right_tmp * 32767.0f);
- }
+ stream_sample_2ch_to_s16 = stream_sample_2ch_to_s16_i386;
+ stream_sample_1ch_to_s16 = stream_sample_1ch_to_s16_i386;
+#endif
+#else
+ stream_sample_2ch_to_s16 = stream_sample_2ch_to_s16_c;
+ stream_sample_1ch_to_s16 = stream_sample_1ch_to_s16_c;
#endif
-}
-
-static void
-downmix_2f_2r_to_2ch(bsi_t* bsi, stream_samples_t samples,sint_16 *s16_samples)
-{
- uint_32 j;
- float right_tmp;
- float left_tmp;
- float slev;
- float *left = 0, *right = 0, *left_sur = 0, *right_sur = 0;
-
- left = samples[0];
- right = samples[1];
- left_sur = samples[2];
- right_sur = samples[3];
-
- slev = smixlev_lut[bsi->surmixlev];
-
- for (j = 0; j < 256; j++)
- {
- left_tmp = 0.4142f * *left++ + slev * *left_sur++;
- right_tmp= 0.4142f * *right++ + slev * *right_sur++;
-
- s16_samples[j * 2 ] = (sint_16) (left_tmp * 32767.0f);
- s16_samples[j * 2 + 1] = (sint_16) (right_tmp * 32767.0f);
- }
-}
-
-static void
-downmix_3f_1r_to_2ch(bsi_t* bsi, stream_samples_t samples,sint_16 *s16_samples)
-{
- uint_32 j;
- float right_tmp;
- float left_tmp;
- float clev,slev;
- float *centre = 0, *left = 0, *right = 0, *sur = 0;
-
- left = samples[0];
- centre = samples[1];
- right = samples[2];
- //Mono surround
- sur = samples[3];
-
- clev = cmixlev_lut[bsi->cmixlev];
- slev = smixlev_lut[bsi->surmixlev];
-
- for (j = 0; j < 256; j++)
- {
- left_tmp = 0.4142f * *left++ + clev * *centre++ + slev * *sur;
- right_tmp= 0.4142f * *right++ + clev * *centre + slev * *sur++;
-
- s16_samples[j * 2 ] = (sint_16) (left_tmp * 32767.0f);
- s16_samples[j * 2 + 1] = (sint_16) (right_tmp * 32767.0f);
- }
-}
-
-
-static void
-downmix_2f_1r_to_2ch(bsi_t* bsi, stream_samples_t samples,sint_16 *s16_samples)
-{
- uint_32 j;
- float right_tmp;
- float left_tmp;
- float slev;
- float *left = 0, *right = 0, *sur = 0;
-
- left = samples[0];
- right = samples[1];
- //Mono surround
- sur = samples[2];
-
- slev = smixlev_lut[bsi->surmixlev];
-
- for (j = 0; j < 256; j++)
- {
- left_tmp = 0.4142f * *left++ + slev * *sur;
- right_tmp= 0.4142f * *right++ + slev * *sur++;
-
- s16_samples[j * 2 ] = (sint_16) (left_tmp * 32767.0f);
- s16_samples[j * 2 + 1] = (sint_16) (right_tmp * 32767.0f);
- }
-}
-
-static void
-downmix_3f_0r_to_2ch(bsi_t* bsi, stream_samples_t samples,sint_16 *s16_samples)
-{
- uint_32 j;
- float right_tmp;
- float left_tmp;
- float clev;
- float *centre = 0, *left = 0, *right = 0;
-
- left = samples[0];
- centre = samples[1];
- right = samples[2];
-
- clev = cmixlev_lut[bsi->cmixlev];
-
- for (j = 0; j < 256; j++)
- {
- left_tmp = 0.4142f * *left++ + clev * *centre;
- right_tmp= 0.4142f * *right++ + clev * *centre++;
-
- s16_samples[j * 2 ] = (sint_16) (left_tmp * 32767.0f);
- s16_samples[j * 2 + 1] = (sint_16) (right_tmp * 32767.0f);
- }
-}
-
-static void
-downmix_2f_0r_to_2ch(bsi_t* bsi, stream_samples_t samples,sint_16 *s16_samples)
-{
- uint_32 j;
- float *left = 0, *right = 0;
-
- left = samples[0];
- right = samples[1];
-
- for (j = 0; j < 256; j++)
- {
- s16_samples[j * 2 ] = (sint_16) (*left++ * 32767.0f);
- s16_samples[j * 2 + 1] = (sint_16) (*right++ * 32767.0f);
- }
-}
-
-static void
-downmix_1f_0r_to_2ch(float *centre,sint_16 *s16_samples)
-{
- uint_32 j;
- float tmp;
-
- //Mono program!
-
- for (j = 0; j < 256; j++)
- {
- tmp = 32767.0f * 0.7071f * *centre++;
-
- s16_samples[j * 2 ] = s16_samples[j * 2 + 1] = (sint_16) tmp;
- }
-}
-
-//
-// Downmix into 2 or 4 channels (4 ch isn't in quite yet)
-//
-// The downmix function names have the following format
-//
-// downmix_Xf_Yr_to_[2|4]ch[_dolby]
-//
-// where X = number of front channels
-// Y = number of rear channels
-// [2|4] = number of output channels
-// [_dolby] = with or without dolby surround mix
-//
-
-void downmix(bsi_t* bsi, stream_samples_t samples,sint_16 *s16_samples)
-{
- if(bsi->acmod > 7)
- dprintf("(downmix) invalid acmod number\n");
-
- //
- //There are two main cases, with or without Dolby Surround
- //
- if(ac3_config.flags & AC3_DOLBY_SURR_ENABLE)
- {
- fprintf(stderr,"Dolby Surround Mixes not currently enabled\n");
- exit(1);
- }
-
- //Non-Dolby surround downmixes
- switch(bsi->acmod)
- {
- // 3/2
- case 7:
- downmix_3f_2r_to_2ch(bsi,samples,s16_samples);
- break;
-
- // 2/2
- case 6:
- downmix_2f_2r_to_2ch(bsi,samples,s16_samples);
- break;
-
- // 3/1
- case 5:
- downmix_3f_1r_to_2ch(bsi,samples,s16_samples);
- break;
-
- // 2/1
- case 4:
- downmix_2f_1r_to_2ch(bsi,samples,s16_samples);
- break;
-
- // 3/0
- case 3:
- downmix_3f_0r_to_2ch(bsi,samples,s16_samples);
- break;
-
- case 2:
- downmix_2f_0r_to_2ch(bsi,samples,s16_samples);
- break;
-
- // 1/0
- case 1:
- downmix_1f_0r_to_2ch(samples[0],s16_samples);
- break;
-
- // 1+1
- case 0:
- downmix_1f_0r_to_2ch(samples[ac3_config.dual_mono_ch_sel],s16_samples);
- break;
}
}
-
-
-
-#if 0
-
- //the dolby mixes lay here for the time being
- switch(bsi->acmod)
- {
- // 3/2
- case 7:
- left = samples[0];
- centre = samples[1];
- right = samples[2];
- left_sur = samples[3];
- right_sur = samples[4];
-
- for (j = 0; j < 256; j++)
- {
- right_tmp = 0.2265f * *left_sur++ + 0.2265f * *right_sur++;
- left_tmp = -1 * right_tmp;
- right_tmp += 0.3204f * *right++ + 0.2265f * *centre;
- left_tmp += 0.3204f * *left++ + 0.2265f * *centre++;
-
- samples[1][j] = right_tmp;
- samples[0][j] = left_tmp;
- }
-
- break;
-
- // 2/2
- case 6:
- left = samples[0];
- right = samples[1];
- left_sur = samples[2];
- right_sur = samples[3];
-
- for (j = 0; j < 256; j++)
- {
- right_tmp = 0.2265f * *left_sur++ + 0.2265f * *right_sur++;
- left_tmp = -1 * right_tmp;
- right_tmp += 0.3204f * *right++;
- left_tmp += 0.3204f * *left++ ;
-
- samples[1][j] = right_tmp;
- samples[0][j] = left_tmp;
- }
- break;
-
- // 3/1
- case 5:
- left = samples[0];
- centre = samples[1];
- right = samples[2];
- //Mono surround
- right_sur = samples[3];
-
- for (j = 0; j < 256; j++)
- {
- right_tmp = 0.2265f * *right_sur++;
- left_tmp = -1 * right_tmp;
- right_tmp += 0.3204f * *right++ + 0.2265f * *centre;
- left_tmp += 0.3204f * *left++ + 0.2265f * *centre++;
-
- samples[1][j] = right_tmp;
- samples[0][j] = left_tmp;
- }
- break;
-
- // 2/1
- case 4:
- left = samples[0];
- right = samples[1];
- //Mono surround
- right_sur = samples[2];
-
- for (j = 0; j < 256; j++)
- {
- right_tmp = 0.2265f * *right_sur++;
- left_tmp = -1 * right_tmp;
- right_tmp += 0.3204f * *right++;
- left_tmp += 0.3204f * *left++;
-
- samples[1][j] = right_tmp;
- samples[0][j] = left_tmp;
- }
- break;
-
- // 3/0
- case 3:
- left = samples[0];
- centre = samples[1];
- right = samples[2];
-
- for (j = 0; j < 256; j++)
- {
- right_tmp = 0.3204f * *right++ + 0.2265f * *centre;
- left_tmp = 0.3204f * *left++ + 0.2265f * *centre++;
-
- samples[1][j] = right_tmp;
- samples[0][j] = left_tmp;
- }
- break;
-
- // 2/0
- case 2:
- //Do nothing!
- break;
-
- // 1/0
- case 1:
- //Mono program!
- right = samples[0];
-
- for (j = 0; j < 256; j++)
- {
- right_tmp = 0.7071f * *right++;
-
- samples[1][j] = right_tmp;
- samples[0][j] = right_tmp;
- }
-
- break;
-
- // 1+1
- case 0:
- //Dual mono, output selected by user
- right = samples[ac3_config.dual_mono_ch_sel];
-
- for (j = 0; j < 256; j++)
- {
- right_tmp = 0.7071f * *right++;
-
- samples[1][j] = right_tmp;
- samples[0][j] = right_tmp;
- }
- break;
-#endif
diff --git a/ac3dec/downmix.h b/ac3dec/downmix.h
index 7537c62..dcfe1e1 100644
--- a/ac3dec/downmix.h
+++ b/ac3dec/downmix.h
@@ -25,4 +25,10 @@
*
*/
-void downmix(bsi_t* bsi, stream_samples_t stream_samples,sint_16 *s16_samples);
+typedef struct dm_par_s {
+ float unit;
+ float clev;
+ float slev;
+} dm_par_t;
+
+void downmix_init();
diff --git a/ac3dec/downmix_c.c b/ac3dec/downmix_c.c
new file mode 100644
index 0000000..875d785
--- /dev/null
+++ b/ac3dec/downmix_c.c
@@ -0,0 +1,161 @@
+/*
+ * downmix_c.c
+ *
+ * Copyright (C) Aaron Holtzman - May 1999
+ *
+ * This file is part of ac3dec, a free Dolby AC-3 stream decoder.
+ *
+ * ac3dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * ac3dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Make; see the file COPYING. If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ *
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+#include "ac3.h"
+#include "ac3_internal.h"
+
+#include "debug.h"
+#include "downmix.h"
+#include "downmix_c.h"
+
+
+void downmix_3f_2r_to_2ch_c (float *samples, dm_par_t *dm_par)
+{
+ int i;
+ float *left, *right, *center, *left_sur, *right_sur;
+ float left_tmp, right_tmp;
+
+ left = samples;
+ right = samples + 256 * 2;
+ center = samples + 256;
+ left_sur = samples + 256 * 3;
+ right_sur = samples + 256 * 4;
+
+ for (i=0; i < 256; i++) {
+#if defined DOLBY_SURROUND
+ left_tmp = dm_par->unit * *left + dm_par->clev * *center - dm_par->slev * (*left_sur + *right_sur);
+ right_tmp= dm_par->unit * *right++ + dm_par->clev * *center + dm_par->slev * (*left_sur++ + *right_sur++);
+#else
+ left_tmp = dm_par->unit * *left + dm_par->clev * *center + dm_par->slev * *left_sur++;
+ right_tmp= dm_par->unit * *right++ + dm_par->clev * *center + dm_par->slev * *right_sur++;
+#endif
+ *left++ = left_tmp;
+ *center++ = right_tmp;
+
+ }
+}
+
+
+void downmix_2f_2r_to_2ch_c (float *samples, dm_par_t *dm_par)
+{
+ int i;
+ float *left, *right, *left_sur, *right_sur;
+ float left_tmp, right_tmp;
+
+ left = &samples[0];
+ right = &samples[256];
+ left_sur = &samples[512];
+ right_sur = &samples[768];
+
+ for (i = 0; i < 256; i++) {
+ left_tmp = dm_par->unit * *left + dm_par->slev * *left_sur++;
+ right_tmp= dm_par->unit * *right + dm_par->slev * *right_sur++;
+ *left++ = left_tmp;
+ *right++ = right_tmp;
+ }
+}
+
+
+void downmix_3f_1r_to_2ch_c (float *samples, dm_par_t *dm_par)
+{
+ int i;
+ float *left, *right, *center, *right_sur;
+ float left_tmp, right_tmp;
+
+ left = &samples[0];
+ right = &samples[512];
+ center = &samples[256];
+ right_sur = &samples[768];
+
+ for (i = 0; i < 256; i++) {
+ left_tmp = dm_par->unit * *left + dm_par->clev * *center - dm_par->slev * *right_sur;
+ right_tmp= dm_par->unit * *right++ + dm_par->clev * *center + dm_par->slev * *right_sur++;
+ *left++ = left_tmp;
+ *center++ = right_tmp;
+ }
+}
+
+
+void downmix_2f_1r_to_2ch_c (float *samples, dm_par_t *dm_par)
+{
+ int i;
+ float *left, *right, *right_sur;
+ float left_tmp, right_tmp;
+
+ left = &samples[0];
+ right = &samples[256];
+ right_sur = &samples[512];
+
+ for (i = 0; i < 256; i++) {
+ left_tmp = dm_par->unit * *left - dm_par->slev * *right_sur;
+ right_tmp= dm_par->unit * *right + dm_par->slev * *right_sur++;
+ *left++ = left_tmp;
+ *right++ = right_tmp;
+ }
+}
+
+
+void downmix_3f_0r_to_2ch_c (float *samples, dm_par_t *dm_par)
+{
+ int i;
+ float *left, *right, *center;
+ float left_tmp, right_tmp;
+
+ left = &samples[0];
+ center = &samples[256];
+ right = &samples[512];
+
+ for (i = 0; i < 256; i++) {
+ left_tmp = dm_par->unit * *left + dm_par->clev * *center;
+ right_tmp= dm_par->unit * *right++ + dm_par->clev * *center;
+ *left++ = left_tmp;
+ *center++ = right_tmp;
+ }
+}
+
+
+void stream_sample_2ch_to_s16_c (int16_t *s16_samples, float *left, float *right)
+{
+ int i;
+
+ for (i=0; i < 256; i++) {
+ *s16_samples++ = (int16_t) *left++;
+ *s16_samples++ = (int16_t) *right++;
+ }
+}
+
+
+void stream_sample_1ch_to_s16_c (int16_t *s16_samples, float *center)
+{
+ int i;
+ float tmp;
+
+ for (i=0; i < 256; i++) {
+ *s16_samples++ = tmp = (int16_t) (0.7071f * *center++);
+ *s16_samples++ = tmp;
+ }
+}
diff --git a/ac3dec/downmix_c.h b/ac3dec/downmix_c.h
new file mode 100644
index 0000000..3643f65
--- /dev/null
+++ b/ac3dec/downmix_c.h
@@ -0,0 +1,32 @@
+/*****
+*
+* This file is part of the OMS program.
+*
+* This program is free software; you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation; either version 2, or (at your option)
+* any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program; see the file COPYING. If not, write to
+* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+*
+*****/
+
+#ifndef __DOWNMIX_C_H__
+#define __DOWNMIX_C_H__
+
+void downmix_3f_2r_to_2ch_c(float *samples, dm_par_t * dm_par);
+void downmix_3f_1r_to_2ch_c(float *samples, dm_par_t * dm_par);
+void downmix_2f_2r_to_2ch_c(float *samples, dm_par_t * dm_par);
+void downmix_2f_1r_to_2ch_c(float *samples, dm_par_t * dm_par);
+void downmix_3f_0r_to_2ch_c(float *samples, dm_par_t * dm_par);
+void stream_sample_2ch_to_s16_c(int16_t *s16_samples, float *left, float *right);
+void stream_sample_1ch_to_s16_c(int16_t *s16_samples, float *center);
+
+#endif
diff --git a/ac3dec/downmix_i386.S b/ac3dec/downmix_i386.S
new file mode 100644
index 0000000..20f2e8c
--- /dev/null
+++ b/ac3dec/downmix_i386.S
@@ -0,0 +1,92 @@
+/* This is basicly gcc generated.
+ * Only the floating point rounding mode loads and saves
+ * are removed in the stream_sample_to_s16 functions.
+ */
+
+#ifdef __i386__
+
+ .file "downmix.c"
+ .version "01.01"
+gcc2_compiled.:
+.text
+ .align 4
+.globl stream_sample_2ch_to_s16_i386
+ .type stream_sample_2ch_to_s16_i386,@function
+stream_sample_2ch_to_s16_i386:
+ pushl %ebp
+ movl %esp,%ebp
+ subl $28,%esp
+ pushl %edi
+ pushl %esi
+ pushl %ebx
+ movl 8(%ebp),%edx
+ movl 12(%ebp),%ebx
+ movl 16(%ebp),%ecx
+ movl $255,%esi
+ .p2align 4,,7
+.L373:
+ flds (%ebx)
+ fistpl -8(%ebp)
+ movl -8(%ebp),%eax
+ movw %ax,(%edx)
+ addl $2,%edx
+ addl $4,%ebx
+ flds (%ecx)
+ fistpl -8(%ebp)
+ movl -8(%ebp),%eax
+ movw %ax,(%edx)
+ addl $4,%ecx
+ addl $2,%edx
+ decl %esi
+ jns .L373
+ popl %ebx
+ popl %esi
+ popl %edi
+ leave
+ ret
+.Lfe6:
+ .size stream_sample_2ch_to_s16_i386,.Lfe6-stream_sample_2ch_to_s16_i386
+.section .rodata
+ .align 4
+.LC46:
+ .long 0x3f350481
+.text
+ .align 4
+.globl stream_sample_1ch_to_s16_i386
+ .type stream_sample_1ch_to_s16_i386,@function
+stream_sample_1ch_to_s16_i386:
+ pushl %ebp
+ movl %esp,%ebp
+ subl $16,%esp
+ pushl %esi
+ pushl %ebx
+ movl 8(%ebp),%edx
+ movl 12(%ebp),%ecx
+ flds .LC46
+ movl $255,%ebx
+ .p2align 4,,7
+.L379:
+ flds (%ecx)
+ fmul %st(1),%st
+ fistpl -8(%ebp)
+ movl -8(%ebp),%eax
+ movw %ax,-2(%ebp)
+ addl $4,%ecx
+ flds -2(%ebp)
+ fistpl -8(%ebp)
+ movl -8(%ebp),%eax
+ movw %ax,(%edx)
+ addl $2,%edx
+ movw %ax,(%edx)
+ addl $2,%edx
+ decl %ebx
+ jns .L379
+ fstp %st(0)
+ popl %ebx
+ popl %esi
+ leave
+ ret
+.Lfe7:
+ .size stream_sample_1ch_to_s16_i386,.Lfe7-stream_sample_1ch_to_s16_i386
+ .ident "GCC: (GNU) 2.95.3 19991030 (prerelease)"
+#endif
diff --git a/ac3dec/downmix_i386.h b/ac3dec/downmix_i386.h
new file mode 100644
index 0000000..e34d7d6
--- /dev/null
+++ b/ac3dec/downmix_i386.h
@@ -0,0 +1,27 @@
+/*****
+*
+* This file is part of the OMS program.
+*
+* This program is free software; you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation; either version 2, or (at your option)
+* any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program; see the file COPYING. If not, write to
+* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+*
+*****/
+
+#ifndef __DOWNMIX_I386_H__
+#define __DOWNMIX_I386_H__
+
+void stream_sample_2ch_to_s16_i386(int16_t *s16_samples, float *left, float *right);
+void stream_sample_1ch_to_s16_i386(int16_t *s16_samples, float *center);
+
+#endif
diff --git a/ac3dec/downmix_kni.S b/ac3dec/downmix_kni.S
new file mode 100644
index 0000000..7df8c06
--- /dev/null
+++ b/ac3dec/downmix_kni.S
@@ -0,0 +1,396 @@
+/*
+ * downmix_kni.S
+ *
+ * Copyright (C) Yuqing Deng <Yuqing_Deng@brown.edu> - October 2000
+ *
+ *
+ * downmix_kni.S is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * downmix_kni.S is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Make; see the file COPYING. If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#ifdef __i386__
+
+.section .rodata
+ .align 4
+sqrt2: .float 0f0.7071068
+ .p2align 5,0,
+
+ .section .text
+
+ .align 4
+ .global downmix_3f_2r_to_2ch_kni
+ .type downmix_3f_2r_to_2ch_kni, @function
+
+downmix_3f_2r_to_2ch_kni:
+ pushl %ebp
+ movl %esp, %ebp
+
+ pushl %eax
+ pushl %ebx
+ pushl %ecx
+
+ movl 8(%ebp), %eax /* samples[] */
+ movl 12(%ebp), %ebx /* &dm_par */
+ movl $64, %ecx /* loop counter */
+
+ movss (%ebx), %xmm5 /* unit */
+ shufps $0, %xmm5, %xmm5 /* unit | unit | unit | unit */
+
+ movss 4(%ebx), %xmm6 /* clev */
+ shufps $0, %xmm6, %xmm6 /* clev | clev | clev | clev */
+
+ movss 8(%ebx), %xmm7 /* slev */
+ shufps $0, %xmm7, %xmm7 /* slev | slev | slev | slev */
+
+.loop:
+ movaps (%eax), %xmm0 /* left */
+ movaps 2048(%eax), %xmm1 /* right */
+ movaps 1024(%eax), %xmm2 /* center */
+ mulps %xmm5, %xmm0
+ mulps %xmm5, %xmm1
+
+ mulps %xmm6, %xmm2
+ movaps 3072(%eax), %xmm3 /* leftsur */
+ movaps 4096(%eax), %xmm4 /* rithgsur */
+ addps %xmm2, %xmm0
+ addps %xmm2, %xmm1
+
+ mulps %xmm7, %xmm3
+ mulps %xmm7, %xmm4
+ addps %xmm3, %xmm0
+ addps %xmm4, %xmm1
+
+ movaps %xmm0, (%eax)
+ movaps %xmm1, 1024(%eax)
+
+ addl $16, %eax
+ decl %ecx
+ jnz .loop
+
+ popl %ecx
+ popl %ebx
+ popl %eax
+
+ leave
+ ret
+ .p2align 4,,7
+
+ .global downmix_2f_2r_to_2ch_kni
+ .type downmix_2f_2r_to_2ch_kni, @function
+
+downmix_2f_2r_to_2ch_kni:
+ pushl %ebp
+ movl %esp, %ebp
+
+ pushl %eax
+ pushl %ebx
+ pushl %ecx
+
+ movl 8(%ebp), %eax /* samples[] */
+ movl 12(%ebp), %ebx /* &dm_par */
+ movl $64, %ecx /* loop counter */
+
+ movss (%ebx), %xmm5 /* unit */
+ shufps $0, %xmm5, %xmm5 /* unit | unit | unit | unit */
+
+ movss 8(%ebx), %xmm7 /* slev */
+ shufps $0, %xmm7, %xmm7 /* slev | slev | slev | slev */
+
+.loop3:
+ movaps (%eax), %xmm0 /* left */
+ movaps 1024(%eax), %xmm1 /* right */
+ movaps 2048(%eax), %xmm3 /* leftsur */
+ mulps %xmm5, %xmm0
+ mulps %xmm5, %xmm1
+
+ movaps 3072(%eax), %xmm4 /* rightsur */
+
+ mulps %xmm7, %xmm3
+ mulps %xmm7, %xmm4
+ addps %xmm3, %xmm0
+ addps %xmm4, %xmm1
+
+ movaps %xmm0, (%eax)
+ movaps %xmm1, 1024(%eax)
+
+ addl $16, %eax
+ decl %ecx
+ jnz .loop3
+
+ popl %ecx
+ popl %ebx
+ popl %eax
+
+ leave
+ ret
+ .p2align 4,,7
+
+ .global downmix_3f_1r_to_2ch_kni
+ .type downmix_3f_1r_to_2ch_kni, @function
+
+downmix_3f_1r_to_2ch_kni:
+ pushl %ebp
+ movl %esp, %ebp
+
+ pushl %eax
+ pushl %ebx
+ pushl %ecx
+
+ movl 8(%ebp), %eax /* samples[] */
+ movl 12(%ebp), %ebx /* &dm_par */
+ movl $64, %ecx /* loop counter */
+
+ movss (%ebx), %xmm5 /* unit */
+ shufps $0, %xmm5, %xmm5 /* unit | unit | unit | unit */
+
+ movss 4(%ebx), %xmm6 /* clev */
+ shufps $0, %xmm6, %xmm6 /* clev | clev | clev | clev */
+
+ movss 8(%ebx), %xmm7 /* slev */
+ shufps $0, %xmm7, %xmm7 /* slev | slev | slev | slev */
+
+.loop4:
+ movaps (%eax), %xmm0 /* left */
+ movaps 2048(%eax), %xmm1 /* right */
+ movaps 1024(%eax), %xmm2 /* center */
+ mulps %xmm5, %xmm0
+ mulps %xmm5, %xmm1
+
+ mulps %xmm6, %xmm2
+ movaps 3072(%eax), %xmm3 /* sur */
+
+ addps %xmm2, %xmm0
+ mulps %xmm7, %xmm3
+
+ addps %xmm2, %xmm1
+
+ subps %xmm3, %xmm0
+ addps %xmm3, %xmm1
+
+ movaps %xmm0, (%eax)
+ movaps %xmm1, 1024(%eax)
+
+ addl $16, %eax
+ decl %ecx
+ jnz .loop4
+
+ popl %ecx
+ popl %ebx
+ popl %eax
+
+ leave
+ ret
+ .p2align 4,,7
+
+ .global downmix_2f_1r_to_2ch_kni
+ .type downmix_2f_1r_to_2ch_kni, @function
+
+downmix_2f_1r_to_2ch_kni:
+ pushl %ebp
+ movl %esp, %ebp
+
+ pushl %eax
+ pushl %ebx
+ pushl %ecx
+
+ movl 8(%ebp), %eax /* samples[] */
+ movl 12(%ebp), %ebx /* &dm_par */
+ movl $64, %ecx /* loop counter */
+
+ movss (%ebx), %xmm5 /* unit */
+ shufps $0, %xmm5, %xmm5 /* unit | unit | unit | unit */
+
+ movss 8(%ebx), %xmm7 /* slev */
+ shufps $0, %xmm7, %xmm7 /* slev | slev | slev | slev */
+
+.loop5:
+ movaps (%eax), %xmm0 /* left */
+ movaps 1024(%eax), %xmm1 /* right */
+
+ mulps %xmm5, %xmm0
+ mulps %xmm5, %xmm1
+
+ movaps 2048(%eax), %xmm3 /* sur */
+
+ mulps %xmm7, %xmm3
+
+ subps %xmm3, %xmm0
+ addps %xmm3, %xmm1
+
+ movaps %xmm0, (%eax)
+ movaps %xmm1, 1024(%eax)
+
+ addl $16, %eax
+ decl %ecx
+ jnz .loop5
+
+ popl %ecx
+ popl %ebx
+ popl %eax
+
+ leave
+ ret
+ .p2align 4,,7
+
+ .global downmix_3f_0r_to_2ch_kni
+ .type downmix_3f_0r_to_2ch_kni, @function
+
+downmix_3f_0r_to_2ch_kni:
+ pushl %ebp
+ movl %esp, %ebp
+
+ pushl %eax
+ pushl %ebx
+ pushl %ecx
+
+ movl 8(%ebp), %eax /* samples[] */
+ movl 12(%ebp), %ebx /* &dm_par */
+ movl $64, %ecx /* loop counter */
+
+ movss (%ebx), %xmm5 /* unit */
+ shufps $0, %xmm5, %xmm5 /* unit | unit | unit | unit */
+
+ movss 4(%ebx), %xmm6 /* clev */
+ shufps $0, %xmm6, %xmm6 /* clev | clev | clev | clev */
+
+
+.loop6:
+ movaps (%eax), %xmm0 /* left */
+ movaps 2048(%eax), %xmm1 /* right */
+ movaps 1024(%eax), %xmm2 /* center */
+ mulps %xmm5, %xmm0
+ mulps %xmm5, %xmm1
+
+ mulps %xmm6, %xmm2
+
+ addps %xmm2, %xmm0
+
+ addps %xmm2, %xmm1
+
+ movaps %xmm0, (%eax)
+ movaps %xmm1, 1024(%eax)
+
+ addl $16, %eax
+ decl %ecx
+ jnz .loop6
+
+ popl %ecx
+ popl %ebx
+ popl %eax
+
+ leave
+ ret
+ .p2align 4,,7
+
+ .global stream_sample_2ch_to_s16_kni
+ .type stream_sample_2ch_to_s16_kni, @function
+
+stream_sample_2ch_to_s16_kni:
+ pushl %ebp
+ movl %esp, %ebp
+
+ pushl %eax
+ pushl %ebx
+ pushl %edx
+ pushl %ecx
+
+ movl 8(%ebp), %eax /* s16_samples */
+ movl 12(%ebp), %ebx /* left */
+ movl 16(%ebp), %edx /* right */
+ movl $64, %ecx
+
+.loop1:
+ movaps (%ebx), %xmm0 /* l3 | l2 | l1 | l0 */
+ movaps (%edx), %xmm1 /* r3 | r2 | r1 | r0 */
+ movhlps %xmm0, %xmm2 /* l3 | l2 */
+ movhlps %xmm1, %xmm3 /* r3 | r2 */
+ unpcklps %xmm1, %xmm0 /* r1 | l1 | r0 | l0 */
+ unpcklps %xmm3, %xmm2 /* r3 | l3 | r2 | l2 */
+
+ cvtps2pi %xmm0, %mm0 /* r0 l0 --> mm0, int_32 */
+ movhlps %xmm0, %xmm0
+ cvtps2pi %xmm0, %mm1 /* r1 l1 --> mm1, int_32 */
+
+ cvtps2pi %xmm2, %mm2 /* r2 l2 --> mm2, int_32 */
+ movhlps %xmm2, %xmm2
+ cvtps2pi %xmm2, %mm3 /* r3 l3 --> mm3, int_32 */
+ packssdw %mm1, %mm0 /* r1 l1 r0 l0 --> mm0, int_16 */
+ packssdw %mm3, %mm2 /* r3 l3 r2 l2 --> mm2, int_16 */
+
+ movq %mm0, (%eax)
+ movq %mm2, 8(%eax)
+ addl $16, %eax
+ addl $16, %ebx
+ addl $16, %edx
+
+ decl %ecx
+ jnz .loop1
+
+ popl %ecx
+ popl %edx
+ popl %ebx
+ popl %eax
+
+ emms
+
+ leave
+ ret
+ .p2align 4,,7
+
+ .global stream_sample_1ch_to_s16_kni
+ .type stream_sample_1ch_to_s16_kni, @function
+
+stream_sample_1ch_to_s16_kni:
+ pushl %ebp
+ movl %esp, %ebp
+
+ pushl %eax
+ pushl %ebx
+ pushl %ecx
+
+ movl $sqrt2, %eax
+ movss (%eax), %xmm7
+ movl 8(%ebp), %eax /* s16_samples */
+ movl 12(%ebp), %ebx /* left */
+ shufps $0, %xmm7, %xmm7
+ movl $64, %ecx
+
+.loop2:
+ movaps (%ebx), %xmm0 /* c3 | c2 | c1 | c0 */
+ mulps %xmm7, %xmm0
+ movhlps %xmm0, %xmm2 /* c3 | c2 */
+
+ cvtps2pi %xmm0, %mm0 /* c1 c0 --> mm0, int_32 */
+ cvtps2pi %xmm2, %mm1 /* c3 c2 --> mm1, int_32 */
+
+ packssdw %mm0, %mm0 /* c1 c1 c0 c0 --> mm0, int_16 */
+ packssdw %mm1, %mm1 /* c3 c3 c2 c2 --> mm1, int_16 */
+
+ movq %mm0, (%eax)
+ movq %mm1, 8(%eax)
+ addl $16, %eax
+ addl $16, %ebx
+
+ decl %ecx
+ jnz .loop2
+
+ popl %ecx
+ popl %ebx
+ popl %eax
+
+ emms
+ leave
+ ret
+#endif
diff --git a/ac3dec/downmix_kni.h b/ac3dec/downmix_kni.h
new file mode 100644
index 0000000..323f2a7
--- /dev/null
+++ b/ac3dec/downmix_kni.h
@@ -0,0 +1,32 @@
+/*****
+*
+* This file is part of the OMS program.
+*
+* This program is free software; you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation; either version 2, or (at your option)
+* any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program; see the file COPYING. If not, write to
+* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+*
+*****/
+
+#ifndef __DOWNMIX_KNI_H__
+#define __DOWNMIX_KNI_H__
+
+void downmix_3f_2r_to_2ch_kni(float *samples, dm_par_t * dm_par);
+void downmix_3f_1r_to_2ch_kni(float *samples, dm_par_t * dm_par);
+void downmix_2f_2r_to_2ch_kni(float *samples, dm_par_t * dm_par);
+void downmix_2f_1r_to_2ch_kni(float *samples, dm_par_t * dm_par);
+void downmix_3f_0r_to_2ch_kni(float *samples, dm_par_t * dm_par);
+void stream_sample_2ch_to_s16_kni(int16_t *s16_samples, float *left, float *right);
+void stream_sample_1ch_to_s16_kni(int16_t *s16_samples, float *center);
+
+#endif
diff --git a/ac3dec/exponent.c b/ac3dec/exponent.c
index 98111a5..16fca46 100644
--- a/ac3dec/exponent.c
+++ b/ac3dec/exponent.c
@@ -28,41 +28,43 @@
#include "ac3_internal.h"
-#include "decode.h"
#include "exponent.h"
-static void exp_unpack_ch(uint_16 type,uint_16 expstr,uint_16 ngrps,uint_16 initial_exp,
- uint_16 exps[], uint_16 *dest);
+static inline void exp_unpack_ch(uint16_t type,uint16_t expstr,uint16_t ngrps,uint16_t initial_exp, uint16_t exps[], uint16_t *dest);
-void
-exponent_unpack( bsi_t *bsi, audblk_t *audblk)
+
+/**
+ *
+ **/
+
+void exponent_unpack( bsi_t *bsi, audblk_t *audblk)
{
- uint_16 i;
+ uint16_t i;
for(i=0; i< bsi->nfchans; i++)
- exp_unpack_ch(UNPACK_FBW, audblk->chexpstr[i], audblk->nchgrps[i], audblk->exps[i][0],
- &audblk->exps[i][1], audblk->fbw_exp[i]);
+ exp_unpack_ch(UNPACK_FBW, audblk->chexpstr[i], audblk->nchgrps[i], audblk->exps[i][0], &audblk->exps[i][1], audblk->fbw_exp[i]);
if(audblk->cplinu)
- exp_unpack_ch(UNPACK_CPL, audblk->cplexpstr, audblk->ncplgrps, audblk->cplabsexp << 1,
- audblk->cplexps, &audblk->cpl_exp[audblk->cplstrtmant]);
+ exp_unpack_ch(UNPACK_CPL, audblk->cplexpstr, audblk->ncplgrps, audblk->cplabsexp << 1, audblk->cplexps, &audblk->cpl_exp[audblk->cplstrtmant]);
if(bsi->lfeon)
- exp_unpack_ch(UNPACK_LFE, audblk->lfeexpstr, 2, audblk->lfeexps[0],
- &audblk->lfeexps[1], audblk->lfe_exp);
+ exp_unpack_ch(UNPACK_LFE, audblk->lfeexpstr, 2, audblk->lfeexps[0], &audblk->lfeexps[1], audblk->lfe_exp);
}
-static void
-exp_unpack_ch(uint_16 type,uint_16 expstr,uint_16 ngrps,uint_16 initial_exp,
- uint_16 exps[], uint_16 *dest)
+/**
+ *
+ **/
+
+static inline void exp_unpack_ch(uint16_t type,uint16_t expstr,uint16_t ngrps,uint16_t initial_exp,
+ uint16_t exps[], uint16_t *dest)
{
- uint_16 i,j;
- sint_16 exp_acc;
- sint_16 exp_1,exp_2,exp_3;
+ uint16_t i,j;
+ int16_t exp_acc;
+ int16_t exp_1,exp_2,exp_3;
- if(expstr == EXP_REUSE)
+ if (expstr == EXP_REUSE)
return;
/* Handle the initial absolute exponent */
@@ -75,8 +77,7 @@ exp_unpack_ch(uint_16 type,uint_16 expstr,uint_16 ngrps,uint_16 initial_exp,
dest[j++] = exp_acc;
/* Loop through the groups and fill the dest array appropriately */
- for(i=0; i< ngrps; i++)
- {
+ for(i=0; i< ngrps; i++) {
if(exps[i] > 124)
goto error;
@@ -86,8 +87,7 @@ exp_unpack_ch(uint_16 type,uint_16 expstr,uint_16 ngrps,uint_16 initial_exp,
exp_acc += (exp_1 - 2);
- switch(expstr)
- {
+ switch(expstr) {
case EXP_D45:
dest[j++] = exp_acc;
dest[j++] = exp_acc;
@@ -99,8 +99,7 @@ exp_unpack_ch(uint_16 type,uint_16 expstr,uint_16 ngrps,uint_16 initial_exp,
exp_acc += (exp_2 - 2);
- switch(expstr)
- {
+ switch(expstr) {
case EXP_D45:
dest[j++] = exp_acc;
dest[j++] = exp_acc;
@@ -112,8 +111,7 @@ exp_unpack_ch(uint_16 type,uint_16 expstr,uint_16 ngrps,uint_16 initial_exp,
exp_acc += (exp_3 - 2);
- switch(expstr)
- {
+ switch(expstr) {
case EXP_D45:
dest[j++] = exp_acc;
dest[j++] = exp_acc;
@@ -125,11 +123,10 @@ exp_unpack_ch(uint_16 type,uint_16 expstr,uint_16 ngrps,uint_16 initial_exp,
}
return;
-
- goto error;
error:
- if(!error_flag)
- fprintf(stderr,"** Invalid exponent - skipping frame **\n");
- error_flag = 1;
+#ifdef DEBUG
+ fprintf (stderr,"** Invalid exponent - skipping frame **\n");
+#endif
+ HANDLE_ERROR ();
}
diff --git a/ac3dec/imdct.c b/ac3dec/imdct.c
index ae2794e..c09ef41 100644
--- a/ac3dec/imdct.c
+++ b/ac3dec/imdct.c
@@ -28,75 +28,48 @@
#include "ac3.h"
#include "ac3_internal.h"
-
-#include "decode.h"
+#include "downmix.h"
#include "imdct.h"
+#include "imdct_c.h"
+#ifdef HAVE_KNI
+#include "imdct_kni.h"
+#endif
+#include "srfft.h"
-void imdct_do_256(float data[],float delay[]);
-void imdct_do_512(float data[],float delay[]);
-typedef struct complex_s
-{
- float real;
- float imag;
-} complex_t;
+extern void (*downmix_3f_2r_to_2ch)(float *samples, dm_par_t * dm_par);
+extern void (*downmix_3f_1r_to_2ch)(float *samples, dm_par_t * dm_par);
+extern void (*downmix_2f_2r_to_2ch)(float *samples, dm_par_t * dm_par);
+extern void (*downmix_2f_1r_to_2ch)(float *samples, dm_par_t * dm_par);
+extern void (*downmix_3f_0r_to_2ch)(float *samples, dm_par_t * dm_par);
+
+extern void (*stream_sample_2ch_to_s16)(int16_t *s16_samples, float *left, float *right);
+extern void (*stream_sample_1ch_to_s16)(int16_t *s16_samples, float *center);
+
+void (*fft_64p) (complex_t *);
+void (*imdct_do_512) (float data[],float delay[]);
+void (*imdct_do_512_nol) (float data[], float delay[]);
+
+void imdct_do_256 (float data[],float delay[]);
#define N 512
+/* static complex_t buf[128]; */
+//static complex_t buf[128] __attribute__((aligned(16)));
+complex_t buf[128] __attribute__((aligned(16)));
-/* 128 point bit-reverse LUT */
-static uint_8 bit_reverse_512[128] = {
- 0x00, 0x40, 0x20, 0x60, 0x10, 0x50, 0x30, 0x70,
- 0x08, 0x48, 0x28, 0x68, 0x18, 0x58, 0x38, 0x78,
- 0x04, 0x44, 0x24, 0x64, 0x14, 0x54, 0x34, 0x74,
- 0x0c, 0x4c, 0x2c, 0x6c, 0x1c, 0x5c, 0x3c, 0x7c,
- 0x02, 0x42, 0x22, 0x62, 0x12, 0x52, 0x32, 0x72,
- 0x0a, 0x4a, 0x2a, 0x6a, 0x1a, 0x5a, 0x3a, 0x7a,
- 0x06, 0x46, 0x26, 0x66, 0x16, 0x56, 0x36, 0x76,
- 0x0e, 0x4e, 0x2e, 0x6e, 0x1e, 0x5e, 0x3e, 0x7e,
- 0x01, 0x41, 0x21, 0x61, 0x11, 0x51, 0x31, 0x71,
- 0x09, 0x49, 0x29, 0x69, 0x19, 0x59, 0x39, 0x79,
- 0x05, 0x45, 0x25, 0x65, 0x15, 0x55, 0x35, 0x75,
- 0x0d, 0x4d, 0x2d, 0x6d, 0x1d, 0x5d, 0x3d, 0x7d,
- 0x03, 0x43, 0x23, 0x63, 0x13, 0x53, 0x33, 0x73,
- 0x0b, 0x4b, 0x2b, 0x6b, 0x1b, 0x5b, 0x3b, 0x7b,
- 0x07, 0x47, 0x27, 0x67, 0x17, 0x57, 0x37, 0x77,
- 0x0f, 0x4f, 0x2f, 0x6f, 0x1f, 0x5f, 0x3f, 0x7f};
-
-static uint_8 bit_reverse_256[64] = {
- 0x00, 0x20, 0x10, 0x30, 0x08, 0x28, 0x18, 0x38,
- 0x04, 0x24, 0x14, 0x34, 0x0c, 0x2c, 0x1c, 0x3c,
- 0x02, 0x22, 0x12, 0x32, 0x0a, 0x2a, 0x1a, 0x3a,
- 0x06, 0x26, 0x16, 0x36, 0x0e, 0x2e, 0x1e, 0x3e,
- 0x01, 0x21, 0x11, 0x31, 0x09, 0x29, 0x19, 0x39,
- 0x05, 0x25, 0x15, 0x35, 0x0d, 0x2d, 0x1d, 0x3d,
- 0x03, 0x23, 0x13, 0x33, 0x0b, 0x2b, 0x1b, 0x3b,
- 0x07, 0x27, 0x17, 0x37, 0x0f, 0x2f, 0x1f, 0x3f};
-
-static complex_t buf[128];
-
-/* Twiddle factor LUT */
-static complex_t *w[7];
-static complex_t w_1[1];
-static complex_t w_2[2];
-static complex_t w_4[4];
-static complex_t w_8[8];
-static complex_t w_16[16];
-static complex_t w_32[32];
-static complex_t w_64[64];
+/* Delay buffer for time domain interleaving */
+static float delay[6][256];
+static float delay1[6][256];
/* Twiddle factors for IMDCT */
-static float xcos1[128];
-static float xsin1[128];
static float xcos2[64];
static float xsin2[64];
-/* Delay buffer for time domain interleaving */
-static float delay[6][256];
-
/* Windowing function for Modified DCT - Thank you acroread */
-static float window[] = {
+//static float window[] = {
+float window[] = {
0.00014, 0.00024, 0.00037, 0.00051, 0.00067, 0.00086, 0.00107, 0.00130,
0.00157, 0.00187, 0.00220, 0.00256, 0.00297, 0.00341, 0.00390, 0.00443,
0.00501, 0.00564, 0.00632, 0.00706, 0.00785, 0.00871, 0.00962, 0.01061,
@@ -128,146 +101,116 @@ static float window[] = {
0.99978, 0.99981, 0.99984, 0.99986, 0.99988, 0.99990, 0.99992, 0.99993,
0.99994, 0.99995, 0.99996, 0.99997, 0.99998, 0.99998, 0.99998, 0.99999,
0.99999, 0.99999, 0.99999, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000,
- 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000 };
-
+ 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000
+};
-static inline void swap_cmplx(complex_t *a, complex_t *b)
+//static const int pm128[128] =
+const int pm128[128] =
{
- complex_t tmp;
-
- tmp = *a;
- *a = *b;
- *b = tmp;
-}
-
-
-
-static inline complex_t cmplx_mult(complex_t a, complex_t b)
+ 0, 16, 32, 48, 64, 80, 96, 112, 8, 40, 72, 104, 24, 56, 88, 120,
+ 4, 20, 36, 52, 68, 84, 100, 116, 12, 28, 44, 60, 76, 92, 108, 124,
+ 2, 18, 34, 50, 66, 82, 98, 114, 10, 42, 74, 106, 26, 58, 90, 122,
+ 6, 22, 38, 54, 70, 86, 102, 118, 14, 46, 78, 110, 30, 62, 94, 126,
+ 1, 17, 33, 49, 65, 81, 97, 113, 9, 41, 73, 105, 25, 57, 89, 121,
+ 5, 21, 37, 53, 69, 85, 101, 117, 13, 29, 45, 61, 77, 93, 109, 125,
+ 3, 19, 35, 51, 67, 83, 99, 115, 11, 43, 75, 107, 27, 59, 91, 123,
+ 7, 23, 39, 55, 71, 87, 103, 119, 15, 31, 47, 63, 79, 95, 111, 127
+};
+
+static const int pm64[64] =
{
- complex_t ret;
-
- ret.real = a.real * b.real - a.imag * b.imag;
- ret.imag = a.real * b.imag + a.imag * b.real;
-
- return ret;
-}
-
-void imdct_init(void)
-{
- int i,k;
- complex_t angle_step;
- complex_t current_angle;
-
- /* Twiddle factors to turn IFFT into IMDCT */
- for( i=0; i < 128; i++)
- {
- xcos1[i] = -cos(2.0f * M_PI * (8*i+1)/(8*N)) ;
- xsin1[i] = -sin(2.0f * M_PI * (8*i+1)/(8*N)) ;
- }
-
- /* More twiddle factors to turn IFFT into IMDCT */
- for( i=0; i < 64; i++)
- {
- xcos2[i] = -cos(2.0f * M_PI * (8*i+1)/(4*N)) ;
- xsin2[i] = -sin(2.0f * M_PI * (8*i+1)/(4*N)) ;
- }
-
- /* Canonical twiddle factors for FFT */
- w[0] = w_1;
- w[1] = w_2;
- w[2] = w_4;
- w[3] = w_8;
- w[4] = w_16;
- w[5] = w_32;
- w[6] = w_64;
-
- for( i = 0; i < 7; i++)
- {
- angle_step.real = cos(-2.0 * M_PI / (1 << (i+1)));
- angle_step.imag = sin(-2.0 * M_PI / (1 << (i+1)));
-
- current_angle.real = 1.0;
- current_angle.imag = 0.0;
-
- for (k = 0; k < 1 << i; k++)
- {
- w[i][k] = current_angle;
- current_angle = cmplx_mult(current_angle,angle_step);
- }
+ 0, 8, 16, 24, 32, 40, 48, 56,
+ 4, 20, 36, 52, 12, 28, 44, 60,
+ 2, 10, 18, 26, 34, 42, 50, 58,
+ 6, 14, 22, 30, 38, 46, 54, 62,
+ 1, 9, 17, 25, 33, 41, 49, 57,
+ 5, 21, 37, 53, 13, 29, 45, 61,
+ 3, 11, 19, 27, 35, 43, 51, 59,
+ 7, 23, 39, 55, 15, 31, 47, 63
+};
+
+
+void imdct_init (void)
+ {
+ int i;
+ float scale = 255.99609372;
+
+#ifdef __i386__
+#ifdef HAVE_KNI
+ if (!imdct_init_kni ());
+ else
+#endif
+#endif
+ if (!imdct_init_c ());
+
+ // More twiddle factors to turn IFFT into IMDCT */
+ for (i=0; i < 64; i++) {
+ xcos2[i] = cos(2.0f * M_PI * (8*i+1)/(4*N)) * scale;
+ xsin2[i] = sin(2.0f * M_PI * (8*i+1)/(4*N)) * scale;
}
}
-void
-imdct_do_512(float data[],float delay[])
+
+void imdct_do_256 (float data[],float delay[])
{
- int i,k;
- int p,q;
- int m;
- int two_m;
- int two_m_plus_one;
+ int i, j, k;
+ int p, q;
float tmp_a_i;
float tmp_a_r;
- float tmp_b_i;
- float tmp_b_r;
float *data_ptr;
float *delay_ptr;
float *window_ptr;
-
- //
- // 512 IMDCT with source and dest data in 'data'
- //
-
- // Pre IFFT complex multiply plus IFFT cmplx conjugate and bit reverse
- // permutation
- for( i=0; i < 128; i++)
- {
- k = bit_reverse_512[i];
-
- /* z[i] = (X[256-2*i-1] + j * X[2*i]) * (xcos1[i] + j * xsin1[i]) ; */
- buf[k].real = (data[256-2*i-1] * xcos1[i]) - (data[2*i] * xsin1[i]);
- buf[k].imag = -1.0 * ((data[2*i] * xcos1[i]) + (data[256-2*i-1] * xsin1[i]));
- }
- // FFT Merge
- for (m=0; m < 7; m++)
- {
- if(m)
- two_m = (1 << m);
- else
- two_m = 1;
-
- two_m_plus_one = (1 << (m+1));
-
- for(k = 0; k < two_m; k++)
- {
- for(i = 0; i < 128; i += two_m_plus_one)
- {
- p = k + i;
- q = p + two_m;
- tmp_a_r = buf[p].real;
- tmp_a_i = buf[p].imag;
- tmp_b_r = buf[q].real * w[m][k].real - buf[q].imag * w[m][k].imag;
- tmp_b_i = buf[q].imag * w[m][k].real + buf[q].real * w[m][k].imag;
- buf[p].real = tmp_a_r + tmp_b_r;
- buf[p].imag = tmp_a_i + tmp_b_i;
- buf[q].real = tmp_a_r - tmp_b_r;
- buf[q].imag = tmp_a_i - tmp_b_i;
+ complex_t *buf1, *buf2;
- }
- }
+ buf1 = &buf[0];
+ buf2 = &buf[64];
+
+// Pre IFFT complex multiply plus IFFT complex conjugate
+ for (k=0; k<64; k++) {
+ /* X1[k] = X[2*k] */
+ /* X2[k] = X[2*k+1] */
+
+ j = pm64[k];
+ p = 2 * (128-2*j-1);
+ q = 2 * (2 * j);
+
+ /* Z1[k] = (X1[128-2*k-1] + j * X1[2*k]) * (xcos2[k] + j * xsin2[k]); */
+ buf1[k].re = data[p] * xcos2[j] - data[q] * xsin2[j];
+ buf1[k].im = -1.0f * (data[q] * xcos2[j] + data[p] * xsin2[j]);
+ /* Z2[k] = (X2[128-2*k-1] + j * X2[2*k]) * (xcos2[k] + j * xsin2[k]); */
+ buf2[k].re = data[p + 1] * xcos2[j] - data[q + 1] * xsin2[j];
+ buf2[k].im = -1.0f * ( data[q + 1] * xcos2[j] + data[p + 1] * xsin2[j]);
}
- /* Post IFFT complex multiply plus IFFT complex conjugate*/
- for( i=0; i < 128; i++)
- {
- /* y[n] = z[n] * (xcos1[n] + j * xsin1[n]) ; */
- tmp_a_r = buf[i].real;
- tmp_a_i = buf[i].imag;
- //Note that I flipped the signs on the imaginary ops to do the complex conj
- buf[i].real =(tmp_a_r * xcos1[i]) + (tmp_a_i * xsin1[i]);
- buf[i].imag =(tmp_a_r * xsin1[i]) - (tmp_a_i * xcos1[i]);
+ fft_64p(&buf1[0]);
+ fft_64p(&buf2[0]);
+
+#ifdef DEBUG
+ //DEBUG FFT
+#if 0
+ printf ("Post FFT, buf1\n");
+ for (i=0; i < 64; i++)
+ printf("%d %f %f\n", i, buf_1[i].re, buf_1[i].im);
+ printf ("Post FFT, buf2\n");
+ for (i=0; i < 64; i++)
+ printf("%d %f %f\n", i, buf_2[i].re, buf_2[i].im);
+#endif
+#endif
+
+
+ // Post IFFT complex multiply
+ for( i=0; i < 64; i++) {
+ tmp_a_r = buf1[i].re;
+ tmp_a_i = -buf1[i].im;
+ buf1[i].re =(tmp_a_r * xcos2[i]) - (tmp_a_i * xsin2[i]);
+ buf1[i].im =(tmp_a_r * xsin2[i]) + (tmp_a_i * xcos2[i]);
+ tmp_a_r = buf2[i].re;
+ tmp_a_i = -buf2[i].im;
+ buf2[i].re =(tmp_a_r * xcos2[i]) - (tmp_a_i * xsin2[i]);
+ buf2[i].im =(tmp_a_r * xsin2[i]) + (tmp_a_i * xcos2[i]);
}
data_ptr = data;
@@ -275,164 +218,122 @@ imdct_do_512(float data[],float delay[])
window_ptr = window;
/* Window and convert to real valued signal */
- for(i=0; i< 64; i++)
- {
- *data_ptr++ = 2.0f * (-buf[64+i].imag * *window_ptr++ + *delay_ptr++);
- *data_ptr++ = 2.0f * ( buf[64-i-1].real * *window_ptr++ + *delay_ptr++);
+ for(i=0; i< 64; i++) {
+ *data_ptr++ = -buf1[i].im * *window_ptr++ + *delay_ptr++;
+ *data_ptr++ = buf1[64-i-1].re * *window_ptr++ + *delay_ptr++;
}
- for(i=0; i< 64; i++)
- {
- *data_ptr++ = 2.0f * (-buf[i].real * *window_ptr++ + *delay_ptr++);
- *data_ptr++ = 2.0f * ( buf[128-i-1].imag * *window_ptr++ + *delay_ptr++);
+ for(i=0; i< 64; i++) {
+ *data_ptr++ = -buf1[i].re * *window_ptr++ + *delay_ptr++;
+ *data_ptr++ = buf1[64-i-1].im * *window_ptr++ + *delay_ptr++;
}
- /* The trailing edge of the window goes into the delay line */
delay_ptr = delay;
- for(i=0; i< 64; i++)
- {
- *delay_ptr++ = -buf[64+i].real * *--window_ptr;
- *delay_ptr++ = buf[64-i-1].imag * *--window_ptr;
+ for(i=0; i< 64; i++) {
+ *delay_ptr++ = -buf2[i].re * *--window_ptr;
+ *delay_ptr++ = buf2[64-i-1].im * *--window_ptr;
}
- for(i=0; i<64; i++)
- {
- *delay_ptr++ = buf[i].imag * *--window_ptr;
- *delay_ptr++ = -buf[128-i-1].real * *--window_ptr;
+ for(i=0; i< 64; i++) {
+ *delay_ptr++ = buf2[i].im * *--window_ptr;
+ *delay_ptr++ = -buf2[64-i-1].re * *--window_ptr;
}
}
-void
-imdct_do_256(float data[],float delay[])
+
+/**
+ *
+ **/
+
+void imdct_do_256_nol (float data[], float delay[])
{
- int i,k;
- int p,q;
- int m;
- int two_m;
- int two_m_plus_one;
+ int i, j, k;
+ int p, q;
float tmp_a_i;
float tmp_a_r;
- float tmp_b_i;
- float tmp_b_r;
float *data_ptr;
float *delay_ptr;
float *window_ptr;
- complex_t *buf_1, *buf_2;
-
- buf_1 = &buf[0];
- buf_2 = &buf[64];
-
- // Pre IFFT complex multiply plus IFFT cmplx conjugate and bit reverse
- // permutation
- for(i=0; i<64; i++)
- {
- /* X1[i] = X[2*i] */
- /* X2[i] = X[2*i+1] */
-
- k = bit_reverse_256[i];
-
- p = 2 * (128-2*i-1);
- q = 2 * (2 * i);
-
- /* Z1[i] = (X1[128-2*i-1] + j * X1[2*i]) * (xcos2[i] + j * xsin2[i]); */
- buf_1[k].real = data[p] * xcos2[i] - data[q] * xsin2[i];
- buf_1[k].imag = -1.0f * (data[q] * xcos2[i] + data[p] * xsin2[i]);
- /* Z2[i] = (X2[128-2*i-1] + j * X2[2*i]) * (xcos2[i] + j * xsin2[i]); */
- buf_2[k].real = data[p + 1] * xcos2[i] - data[q + 1] * xsin2[i];
- buf_2[k].imag = -1.0f * ( data[q + 1] * xcos2[i] + data[p + 1] * xsin2[i]);
- }
-
- // FFT Merge
- for (m=0; m < 6; m++)
- {
- two_m = (1 << m);
- two_m_plus_one = (1 << (m+1));
-
- if(m)
- two_m = (1 << m);
- else
- two_m = 1;
-
- for(k = 0; k < two_m; k++)
- {
- for(i = 0; i < 64; i += two_m_plus_one)
- {
- p = k + i;
- q = p + two_m;
- //Do block 1
- tmp_a_r = buf_1[p].real;
- tmp_a_i = buf_1[p].imag;
- tmp_b_r = buf_1[q].real * w[m][k].real - buf_1[q].imag * w[m][k].imag;
- tmp_b_i = buf_1[q].imag * w[m][k].real + buf_1[q].real * w[m][k].imag;
- buf_1[p].real = tmp_a_r + tmp_b_r;
- buf_1[p].imag = tmp_a_i + tmp_b_i;
- buf_1[q].real = tmp_a_r - tmp_b_r;
- buf_1[q].imag = tmp_a_i - tmp_b_i;
-
- //Do block 2
- tmp_a_r = buf_2[p].real;
- tmp_a_i = buf_2[p].imag;
- tmp_b_r = buf_2[q].real * w[m][k].real - buf_2[q].imag * w[m][k].imag;
- tmp_b_i = buf_2[q].imag * w[m][k].real + buf_2[q].real * w[m][k].imag;
- buf_2[p].real = tmp_a_r + tmp_b_r;
- buf_2[p].imag = tmp_a_i + tmp_b_i;
- buf_2[q].real = tmp_a_r - tmp_b_r;
- buf_2[q].imag = tmp_a_i - tmp_b_i;
-
- }
- }
- }
-
- // Post IFFT complex multiply
- for( i=0; i < 64; i++)
- {
- //Note that I flipped the signs on the imaginary ops to do the complex conj
-
- /* y1[n] = z1[n] * (xcos2[n] + j * xs in2[n]) ; */
- tmp_a_r = buf_1[i].real;
- tmp_a_i = buf_1[i].imag;
- buf_1[i].real =(tmp_a_r * xcos2[i]) + (tmp_a_i * xsin2[i]);
- buf_1[i].imag =(tmp_a_r * xsin2[i]) - (tmp_a_i * xcos2[i]);
- /* y2[n] = z2[n] * (xcos2[n] + j * xsin2[n]) ; */
- tmp_a_r = buf_2[i].real;
- tmp_a_i = buf_2[i].imag;
- buf_2[i].real =(tmp_a_r * xcos2[i]) + (tmp_a_i * xsin2[i]);
- buf_2[i].imag =(tmp_a_r * xsin2[i]) - (tmp_a_i * xcos2[i]);
- }
-
- data_ptr = data;
- delay_ptr = delay;
- window_ptr = window;
-
- /* Window and convert to real valued signal */
- for(i=0; i< 64; i++)
- {
- *data_ptr++ = 2.0f * (-buf_1[i].imag * *window_ptr++ + *delay_ptr++);
- *data_ptr++ = 2.0f * ( buf_1[64-i-1].real * *window_ptr++ + *delay_ptr++);
- }
-
- for(i=0; i< 64; i++)
- {
- *data_ptr++ = 2.0f * (-buf_1[i].real * *window_ptr++ + *delay_ptr++);
- *data_ptr++ = 2.0f * ( buf_1[64-i-1].imag * *window_ptr++ + *delay_ptr++);
- }
-
- delay_ptr = delay;
-
- for(i=0; i< 64; i++)
- {
- *delay_ptr++ = -buf_2[i].real * *--window_ptr;
- *delay_ptr++ = buf_2[64-i-1].imag * *--window_ptr;
- }
-
- for(i=0; i< 64; i++)
- {
- *delay_ptr++ = buf_2[i].imag * *--window_ptr;
- *delay_ptr++ = -buf_2[64-i-1].real * *--window_ptr;
+ complex_t *buf1, *buf2;
+
+ buf1 = &buf[0];
+ buf2 = &buf[64];
+
+ /* Pre IFFT complex multiply plus IFFT cmplx conjugate */
+ for(k=0; k<64; k++) {
+ /* X1[k] = X[2*k] */
+ /* X2[k] = X[2*k+1] */
+ j = pm64[k];
+ p = 2 * (128-2*j-1);
+ q = 2 * (2 * j);
+
+ /* Z1[k] = (X1[128-2*k-1] + j * X1[2*k]) * (xcos2[k] + j * xsin2[k]); */
+ buf1[k].re = data[p] * xcos2[j] - data[q] * xsin2[j];
+ buf1[k].im = -1.0f * (data[q] * xcos2[j] + data[p] * xsin2[j]);
+ /* Z2[k] = (X2[128-2*k-1] + j * X2[2*k]) * (xcos2[k] + j * xsin2[k]); */
+ buf2[k].re = data[p + 1] * xcos2[j] - data[q + 1] * xsin2[j];
+ buf2[k].im = -1.0f * ( data[q + 1] * xcos2[j] + data[p + 1] * xsin2[j]);
+ }
+
+
+ fft_64p(&buf1[0]);
+ fft_64p(&buf2[0]);
+
+#ifdef DEBUG
+ //DEBUG FFT
+#if 0
+ printf("Post FFT, buf1\n");
+ for (i=0; i < 64; i++)
+ printf("%d %f %f\n", i, buf_1[i].re, buf_1[i].im);
+ printf("Post FFT, buf2\n");
+ for (i=0; i < 64; i++)
+ printf("%d %f %f\n", i, buf_2[i].re, buf_2[i].im);
+#endif
+#endif
+
+ /* Post IFFT complex multiply */
+ for( i=0; i < 64; i++) {
+ /* y1[n] = z1[n] * (xcos2[n] + j * xs in2[n]) ; */
+ tmp_a_r = buf1[i].re;
+ tmp_a_i = -buf1[i].im;
+ buf1[i].re =(tmp_a_r * xcos2[i]) - (tmp_a_i * xsin2[i]);
+ buf1[i].im =(tmp_a_r * xsin2[i]) + (tmp_a_i * xcos2[i]);
+ /* y2[n] = z2[n] * (xcos2[n] + j * xsin2[n]) ; */
+ tmp_a_r = buf2[i].re;
+ tmp_a_i = -buf2[i].im;
+ buf2[i].re =(tmp_a_r * xcos2[i]) - (tmp_a_i * xsin2[i]);
+ buf2[i].im =(tmp_a_r * xsin2[i]) + (tmp_a_i * xcos2[i]);
+ }
+
+ data_ptr = data;
+ delay_ptr = delay;
+ window_ptr = window;
+
+ /* Window and convert to real valued signal, no overlap */
+ for(i=0; i< 64; i++) {
+ *data_ptr++ = -buf1[i].im * *window_ptr++;
+ *data_ptr++ = buf1[64-i-1].re * *window_ptr++;
+ }
+
+ for(i=0; i< 64; i++) {
+ *data_ptr++ = -buf1[i].re * *window_ptr++ + *delay_ptr++;
+ *data_ptr++ = buf1[64-i-1].im * *window_ptr++ + *delay_ptr++;
+ }
+
+ delay_ptr = delay;
+
+ for(i=0; i< 64; i++) {
+ *delay_ptr++ = -buf2[i].re * *--window_ptr;
+ *delay_ptr++ = buf2[64-i-1].im * *--window_ptr;
+ }
+
+ for(i=0; i< 64; i++) {
+ *delay_ptr++ = buf2[i].im * *--window_ptr;
+ *delay_ptr++ = -buf2[64-i-1].re * *--window_ptr;
}
}
@@ -440,29 +341,190 @@ imdct_do_256(float data[],float delay[])
///#include <sys/time.h>
//FIXME remove
-void
-imdct(bsi_t *bsi,audblk_t *audblk, stream_samples_t samples) {
+void imdct (bsi_t *bsi,audblk_t *audblk, stream_samples_t samples, int16_t *s16_samples, dm_par_t* dm_par)
+{
int i;
+ int doable = 0;
+ float *center=NULL, *left, *right, *left_sur, *right_sur;
+ float *delay_left, *delay_right;
+ float *delay1_left, *delay1_right, *delay1_center, *delay1_sr, *delay1_sl;
+ float right_tmp, left_tmp;
+ void (*do_imdct)(float data[], float deley[]);
+
+ // test if dm in frequency is doable
+ if (!(doable = audblk->blksw[0]))
+ do_imdct = imdct_do_512;
+ else
+ do_imdct = imdct_do_256;
+
+ // downmix in the frequency domain if all the channels
+ // use the same imdct
+ for (i=0; i < bsi->nfchans; i++) {
+ if (doable != audblk->blksw[i]) {
+ do_imdct = NULL;
+ break;
+ }
+ }
- //handy timing code
- //struct timeval start,end;
+ if (do_imdct) {
+ //dowmix first and imdct
+ switch(bsi->acmod) {
+ case 7: // 3/2
+ downmix_3f_2r_to_2ch (samples[0], dm_par);
+ break;
+ case 6: // 2/2
+ downmix_2f_2r_to_2ch (samples[0], dm_par);
+ break;
+ case 5: // 3/1
+ downmix_3f_1r_to_2ch (samples[0], dm_par);
+ break;
+ case 4: // 2/1
+ downmix_2f_1r_to_2ch (samples[0], dm_par);
+ break;
+ case 3: // 3/0
+ downmix_3f_0r_to_2ch (samples[0], dm_par);
+ break;
+ case 2:
+ break;
+ default: // 1/0
+ if (bsi->acmod == 1)
+ center = samples[0];
+ else if (bsi->acmod == 0)
+ center = samples[ac3_config.dual_mono_ch_sel];
+ do_imdct(center, delay[0]); // no downmix
+
+ stream_sample_1ch_to_s16 (s16_samples, center);
+
+ return;
+ //goto done;
+ break;
+ }
- //gettimeofday(&start,0);
-
- for(i=0; i<bsi->nfchans;i++)
- {
- if(audblk->blksw[i])
- imdct_do_256(samples[i],delay[i]);
- else
- imdct_do_512(samples[i],delay[i]);
+ do_imdct (samples[0], delay[0]);
+ do_imdct (samples[1], delay[1]);
+ stream_sample_2ch_to_s16(s16_samples, samples[0], samples[1]);
+
+ } else { //imdct and then dowmix
+ // delay and samples should be saved and mixed
+ //fprintf(stderr, "time domain downmix\n");
+ for (i=0; i<bsi->nfchans; i++) {
+ if (audblk->blksw[i])
+ imdct_do_256_nol (samples[i],delay1[i]);
+ else
+ imdct_do_512_nol (samples[i],delay1[i]);
+ }
+
+ // mix the sample, overlap
+ switch(bsi->acmod) {
+ case 7: // 3/2
+ left = samples[0];
+ center = samples[1];
+ right = samples[2];
+ left_sur = samples[3];
+ right_sur = samples[4];
+ delay_left = delay[0];
+ delay_right = delay[1];
+ delay1_left = delay1[0];
+ delay1_center = delay1[1];
+ delay1_right = delay1[2];
+ delay1_sl = delay1[3];
+ delay1_sr = delay1[4];
+
+ for (i = 0; i < 256; i++) {
+ left_tmp = dm_par->unit * *left++ + dm_par->clev * *center + dm_par->slev * *left_sur++;
+ right_tmp= dm_par->unit * *right++ + dm_par->clev * *center++ + dm_par->slev * *right_sur++;
+ *s16_samples++ = (int16_t)(left_tmp + *delay_left);
+ *s16_samples++ = (int16_t)(right_tmp + *delay_right);
+ *delay_left++ = dm_par->unit * *delay1_left++ + dm_par->clev * *delay1_center + dm_par->slev * *delay1_sl++;
+ *delay_right++ = dm_par->unit * *delay1_right++ + dm_par->clev * *center++ + dm_par->slev * *delay1_sr++;
+ }
+ break;
+ case 6: // 2/2
+ left = samples[0];
+ right = samples[1];
+ left_sur = samples[2];
+ right_sur = samples[3];
+ delay_left = delay[0];
+ delay_right = delay[1];
+ delay1_left = delay1[0];
+ delay1_right = delay1[1];
+ delay1_sl = delay1[2];
+ delay1_sr = delay1[3];
+
+ for (i = 0; i < 256; i++) {
+ left_tmp = dm_par->unit * *left++ + dm_par->slev * *left_sur++;
+ right_tmp= dm_par->unit * *right++ + dm_par->slev * *right_sur++;
+ *s16_samples++ = (int16_t)(left_tmp + *delay_left);
+ *s16_samples++ = (int16_t)(right_tmp + *delay_right);
+ *delay_left++ = dm_par->unit * *delay1_left++ + dm_par->slev * *delay1_sl++;
+ *delay_right++ = dm_par->unit * *delay1_right++ + dm_par->slev * *delay1_sr++;
+ }
+ break;
+ case 5: // 3/1
+ left = samples[0];
+ center = samples[1];
+ right = samples[2];
+ right_sur = samples[3];
+ delay_left = delay[0];
+ delay_right = delay[1];
+ delay1_left = delay1[0];
+ delay1_center = delay1[1];
+ delay1_right = delay1[2];
+ delay1_sl = delay1[3];
+
+ for (i = 0; i < 256; i++) {
+ left_tmp = dm_par->unit * *left++ + dm_par->clev * *center - dm_par->slev * *right_sur;
+ right_tmp= dm_par->unit * *right++ + dm_par->clev * *center++ + dm_par->slev * *right_sur++;
+ *s16_samples++ = (int16_t)(left_tmp + *delay_left);
+ *s16_samples++ = (int16_t)(right_tmp + *delay_right);
+ *delay_left++ = dm_par->unit * *delay1_left++ + dm_par->clev * *delay1_center + dm_par->slev * *delay1_sl;
+ *delay_right++ = dm_par->unit * *delay1_right++ + dm_par->clev * *center++ + dm_par->slev * *delay1_sl++;
+ }
+ break;
+ case 4: // 2/1
+ left = samples[0];
+ right = samples[1];
+ right_sur = samples[2];
+ delay_left = delay[0];
+ delay_right = delay[1];
+ delay1_left = delay1[0];
+ delay1_right = delay1[1];
+ delay1_sl = delay1[2];
+
+ for (i = 0; i < 256; i++) {
+ left_tmp = dm_par->unit * *left++ - dm_par->slev * *right_sur;
+ right_tmp= dm_par->unit * *right++ + dm_par->slev * *right_sur++;
+ *s16_samples++ = (int16_t)(left_tmp + *delay_left);
+ *s16_samples++ = (int16_t)(right_tmp + *delay_right);
+ *delay_left++ = dm_par->unit * *delay1_left++ + dm_par->slev * *delay1_sl;
+ *delay_right++ = dm_par->unit * *delay1_right++ + dm_par->slev * *delay1_sl++;
+ }
+ break;
+ case 3: // 3/0
+ left = samples[0];
+ center = samples[1];
+ right = samples[2];
+ delay_left = delay[0];
+ delay_right = delay[1];
+ delay1_left = delay1[0];
+ delay1_center = delay1[1];
+ delay1_right = delay1[2];
+
+ for (i = 0; i < 256; i++) {
+ left_tmp = dm_par->unit * *left++ + dm_par->clev * *center;
+ right_tmp= dm_par->unit * *right++ + dm_par->clev * *center++;
+ *s16_samples++ = (int16_t)(left_tmp + *delay_left);
+ *s16_samples++ = (int16_t)(right_tmp + *delay_right);
+ *delay_left++ = dm_par->unit * *delay1_left++ + dm_par->clev * *delay1_center;
+ *delay_right++ = dm_par->unit * *delay1_right++ + dm_par->clev * *center++;
+ }
+ break;
+ case 2: // copy to output
+ for (i = 0; i < 256; i++) {
+ *s16_samples++ = (int16_t)samples[0][i];
+ *s16_samples++ = (int16_t)samples[1][i];
+ }
+ break;
+ }
}
- //gettimeofday(&end,0);
- //printf("imdct %ld us\n",(end.tv_sec - start.tv_sec) * 1000000 +
- //end.tv_usec - start.tv_usec);
-
- //XXX We don't bother with the IMDCT for the LFE as it's currently
- //unused.
- //if (bsi->lfeon)
- // imdct_do_512(coeffs->lfe,samples->channel[5],delay[5]);
- //
}
diff --git a/ac3dec/imdct.h b/ac3dec/imdct.h
index 750aa87..8f10af0 100644
--- a/ac3dec/imdct.h
+++ b/ac3dec/imdct.h
@@ -22,5 +22,5 @@
*
*/
-void imdct(bsi_t *bsi,audblk_t *audblk, stream_samples_t samples);
+void imdct (bsi_t *bsi,audblk_t *audblk, stream_samples_t samples, int16_t *s16_samples, dm_par_t *dm_par);
void imdct_init(void);
diff --git a/ac3dec/imdct512_kni.S b/ac3dec/imdct512_kni.S
new file mode 100644
index 0000000..10b8de6
--- /dev/null
+++ b/ac3dec/imdct512_kni.S
@@ -0,0 +1,548 @@
+/*
+ * imdct512_kni.S
+ *
+ * Copyright (C) Yuqing Deng <Yuqing_Deng@brown.edu> - October 2000
+ *
+ *
+ * imdct512_kni.S is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * imdct512_kni.S is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Make; see the file COPYING. If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#ifdef __i386__
+
+.text
+ .align 4
+.global imdct512_pre_ifft_twiddle_kni
+ .type imdct512_pre_ifft_twiddle_kni, @function
+imdct512_pre_ifft_twiddle_kni:
+
+ pushl %ebp
+ movl %esp, %ebp
+ addl $-4, %esp /* local variable, loop counter */
+
+ pushl %eax
+ pushl %ebx
+ pushl %ecx
+ pushl %edx
+ pushl %edi
+ pushl %esi
+
+ movl 8(%ebp), %eax /* pmt */
+ movl 12(%ebp), %ebx /* buf */
+ movl 16(%ebp), %ecx /* data */
+ movl 20(%ebp), %edx /* xcos_sin_sse */
+ movl $64, -4(%ebp)
+
+
+.loop:
+ movl (%eax), %esi
+ movl 4(%eax), %edi
+ movss (%ecx, %esi, 8), %xmm1 /* 2j */
+ movss (%ecx, %edi, 8), %xmm3 /* 2(j+1) */
+
+ shll $1, %esi
+ shll $1, %edi
+
+ movaps (%edx, %esi, 8), %xmm0; /* -c_j | -s_j | -s_j | c_j */
+ movaps (%edx, %edi, 8), %xmm2; /* -c_j+1 | -s_j+1 | -s_j+1 | c_j+1 */
+
+ negl %esi
+ negl %edi
+
+ movss 1020(%ecx, %esi, 4), %xmm4 /* 255-2j */
+ addl $8, %eax
+ movss 1020(%ecx, %edi, 4), %xmm5 /* 255-2(j+1) */
+
+ shufps $0, %xmm1, %xmm4 /* 2j | 2j | 255-2j | 255-2j */
+ shufps $0, %xmm3, %xmm5 /* 2(j+1) | 2(j+1) | 255-2(j+1) | 255-2(j+1) */
+ mulps %xmm4, %xmm0
+ mulps %xmm5, %xmm2
+ movhlps %xmm0, %xmm1
+ movhlps %xmm2, %xmm3
+ addl $16, %ebx
+ addps %xmm1, %xmm0
+ addps %xmm3, %xmm2
+ movlhps %xmm2, %xmm0
+ movaps %xmm0, -16(%ebx)
+ decl -4(%ebp)
+ jnz .loop
+
+ popl %esi
+ popl %edi
+ popl %edx
+ popl %ecx
+ popl %ebx
+ popl %eax
+
+ addl $4, %esp
+ popl %ebp
+
+ ret
+ .p2align 4,0
+
+.global imdct512_post_ifft_twiddle_kni
+ .type imdct512_post_ifft_twiddle_kni, @function
+imdct512_post_ifft_twiddle_kni:
+
+ pushl %ebp
+ movl %esp, %ebp
+
+ pushl %eax
+ pushl %ebx
+ pushl %ecx
+
+ movl 8(%ebp), %eax /* buf[] */
+ movl 12(%ebp), %ebx /* xcos_sin_sse[] */
+ movl $32, %ecx /* loop counter */
+
+.loop1:
+ movaps (%eax), %xmm0 /* im1 | re1 | im0 | re0 */
+
+ movaps (%ebx), %xmm2 /* -c | -s | -s | c */
+ movhlps %xmm0, %xmm1 /* im1 | re1 */
+ movaps 16(%ebx), %xmm3 /* -c1 | -s1 | -s1 | c1 */
+
+ shufps $0x50, %xmm0, %xmm0 /* im0 | im0 | re0 | re0 */
+ shufps $0x50, %xmm1, %xmm1 /* im1 | im1 | re1 | re1 */
+
+ movaps 16(%eax), %xmm4 /* im3 | re3 | im2 | re2 */
+
+ shufps $0x27, %xmm2, %xmm2 /* c | -s | -s | -c */
+ movhlps %xmm4, %xmm5 /* im3 | re3 */
+ shufps $0x27, %xmm3, %xmm3 /* c1 | -s1 | -s1 | -c1 */
+
+ movaps 32(%ebx), %xmm6 /* -c2 | -s2 | -s2 | c2 */
+ movaps 48(%ebx), %xmm7 /* -c3 | -s3 | -s3 | c3 */
+
+ shufps $0x50, %xmm4, %xmm4 /* im2 | im2 | re2 | re2 */
+ shufps $0x50, %xmm5, %xmm5 /* im3 | im3 | re3 | re3 */
+
+ mulps %xmm2, %xmm0
+ mulps %xmm3, %xmm1
+
+ shufps $0x27, %xmm6, %xmm6 /* c2 | -s2 | -s2 | -c2 */
+ shufps $0x27, %xmm7, %xmm7 /* c3 | -s3 | -s3 | -c3 */
+
+ movhlps %xmm0, %xmm2
+ movhlps %xmm1, %xmm3
+
+ mulps %xmm6, %xmm4
+ mulps %xmm7, %xmm5
+
+ addps %xmm2, %xmm0
+ addps %xmm3, %xmm1
+
+ movhlps %xmm4, %xmm6
+ movhlps %xmm5, %xmm7
+
+ addps %xmm6, %xmm4
+ addps %xmm7, %xmm5
+
+ movlhps %xmm1, %xmm0
+ movlhps %xmm5, %xmm4
+
+ movaps %xmm0, (%eax)
+ movaps %xmm4, 16(%eax)
+ addl $64, %ebx
+ addl $32, %eax
+ decl %ecx
+ jnz .loop1
+
+ popl %ecx
+ popl %ebx
+ popl %eax
+
+ leave
+ ret
+ .p2align 4,0
+
+.global imdct512_window_delay_kni
+ .type imdct512_window_delay_kni, @function
+imdct512_window_delay_kni:
+
+ pushl %ebp
+ movl %esp, %ebp
+
+ pushl %eax
+ pushl %ebx
+ pushl %ecx
+ pushl %edx
+ pushl %esi
+ pushl %edi
+
+ movl 20(%ebp), %ebx /* delay */
+ movl 16(%ebp), %edx /* window */
+
+ movl 8(%ebp), %eax /* buf */
+ movl $16, %ecx /* loop count */
+ leal 516(%eax), %esi /* buf[64].im */
+ leal 504(%eax), %edi /* buf[63].re */
+ movl 12(%ebp), %eax /* data */
+.first_128_samples:
+
+ movss (%esi), %xmm0
+ movss 8(%esi), %xmm2
+ movss (%edi), %xmm1
+ movss -8(%edi), %xmm3
+
+ movlhps %xmm2, %xmm0 /* 0.0 | im1 | 0.0 | im0 */
+ movlhps %xmm3, %xmm1 /* 0.0 | re1 | 0.0 | re0 */
+
+ movaps (%edx), %xmm4 /* w3 | w2 | w1 | w0 */
+ movaps (%ebx), %xmm5 /* d3 | d2 | d1 | d0 */
+ shufps $0xb1, %xmm1, %xmm1 /* re1 | 0.0 | re0 | 0.0 */
+
+ movss 16(%esi), %xmm6 /* im2 */
+ movss 24(%esi), %xmm7 /* im3 */
+ subps %xmm1, %xmm0 /* -re1 | im1 | -re0 | im0 */
+ movss -16(%edi), %xmm2 /* re2 */
+ movss -24(%edi), %xmm3 /* re3 */
+ mulps %xmm4, %xmm0
+ movlhps %xmm7, %xmm6 /* 0.0 | im3 | 0.0 | im2 */
+ movlhps %xmm3, %xmm2 /* 0.0 | re3 | 0.0 | re2 */
+ addps %xmm5, %xmm0
+ shufps $0xb1, %xmm2, %xmm2 /* re3 | 0.0 | re2 | 0.0 */
+ movaps 16(%edx), %xmm4 /* w7 | w6 | w5 | w4 */
+ movaps 16(%ebx), %xmm5 /* d7 | d6 | d5 | d4 */
+ subps %xmm2, %xmm6 /* -re3 | im3 | -re2 | im2 */
+ addl $32, %edx
+ movaps %xmm0, (%eax)
+ addl $32, %ebx
+ mulps %xmm4, %xmm6
+ addl $32, %esi
+ addl $32, %eax
+ addps %xmm5, %xmm6
+ addl $-32, %edi
+ movaps %xmm6, -16(%eax)
+ decl %ecx
+ jnz .first_128_samples
+
+ movl 8(%ebp), %esi /* buf[0].re */
+ leal 1020(%esi), %edi /* buf[127].im */
+ movl $16, %ecx /* loop count */
+.second_128_samples:
+
+ movss (%esi), %xmm0 /* buf[i].re */
+ movss 8(%esi), %xmm2 /* re1 */
+ movss (%edi), %xmm1 /* buf[127-i].im */
+ movss -8(%edi), %xmm3 /* im1 */
+
+ movlhps %xmm2, %xmm0 /* 0.0 | re1 | 0.0 | re0 */
+ movlhps %xmm3, %xmm1 /* 0.0 | im1 | 0.0 | im1 */
+
+ movaps (%edx), %xmm4 /* w3 | w2 | w1 | w0 */
+ movaps (%ebx), %xmm5 /* d3 | d2 | d1 | d0 */
+
+ shufps $0xb1, %xmm1, %xmm1 /* im1 | 0.0 | im0 | 0.0 */
+ movss 16(%esi), %xmm6 /* re2 */
+ movss 24(%esi), %xmm7 /* re3 */
+ movss -16(%edi), %xmm2 /* im2 */
+ movss -24(%edi), %xmm3 /* im3 */
+ subps %xmm1, %xmm0 /* -im1 | re1 | -im0 | re0 */
+ movlhps %xmm7, %xmm6 /* 0.0 | re3 | 0.0 | re2 */
+ movlhps %xmm3, %xmm2 /* 0.0 | im3 | 0.0 | im2 */
+ mulps %xmm4, %xmm0
+ shufps $0xb1, %xmm2, %xmm2 /* im3 | 0.0 | im2 | 0.0 */
+ movaps 16(%edx), %xmm4 /* w7 | w6 | w5 | w4 */
+ addl $32, %esi
+ subps %xmm2, %xmm6 /* -im3 | re3 | -im2 | re2 */
+ addps %xmm5, %xmm0
+ mulps %xmm4, %xmm6
+ addl $-32, %edi
+ movaps 16(%ebx), %xmm5 /* d7 | d6 | d5 | d4 */
+ movaps %xmm0, (%eax)
+ addps %xmm5, %xmm6
+ addl $32, %edx
+ addl $32, %eax
+ addl $32, %ebx
+ movaps %xmm6, -16(%eax)
+ decl %ecx
+ jnz .second_128_samples
+
+ movl 8(%ebp), %eax
+ leal 512(%eax), %esi /* buf[64].re */
+ leal 508(%eax), %edi /* buf[63].im */
+ movl $16, %ecx /* loop count */
+ movl 20(%ebp), %eax /* delay */
+.first_128_delay:
+
+ movss (%esi), %xmm0
+ movss 8(%esi), %xmm2
+ movss (%edi), %xmm1
+ movss -8(%edi), %xmm3
+
+ movlhps %xmm2, %xmm0 /* 0.0 | re1 | 0.0 | re0 */
+ movlhps %xmm3, %xmm1 /* 0.0 | im1 | 0.0 | im0 */
+
+ movaps -16(%edx), %xmm4 /* w3 | w2 | w1 | w0 */
+ shufps $0xb1, %xmm1, %xmm1 /* im1 | 0.0 | im0 | 0.0 */
+ movss 16(%esi), %xmm6 /* re2 */
+ movss 24(%esi), %xmm7 /* re3 */
+ movss -16(%edi), %xmm2 /* im2 */
+ movss -24(%edi), %xmm3 /* im3 */
+ subps %xmm1, %xmm0 /* -im1 | re1 | -im0 | re0 */
+ addl $-32, %edx
+ movlhps %xmm7, %xmm6 /* 0.0 | re3 | 0.0 | re2 */
+ movlhps %xmm3, %xmm2 /* 0.0 | im3 | 0.0 | im2 */
+ mulps %xmm4, %xmm0
+ movaps (%edx), %xmm5 /* w7 | w6 | w5 | w4 */
+ shufps $0xb1, %xmm2, %xmm2 /* im3 | 0.0 | im2 | 0.0 */
+ movaps %xmm0, (%eax)
+ addl $32, %esi
+ subps %xmm2, %xmm6 /* -im3 | re3 | -im2 | re2 */
+ addl $-32, %edi
+ mulps %xmm5, %xmm6
+ addl $32, %eax
+ movaps %xmm6, -16(%eax)
+ decl %ecx
+ jnz .first_128_delay
+
+ movl 8(%ebp), %ebx
+ leal 4(%ebx), %esi /* buf[0].im */
+ leal 1016(%ebx), %edi /* buf[127].re */
+ movl $16, %ecx /* loop count */
+.second_128_delay:
+
+ movss (%esi), %xmm0
+ movss 8(%esi), %xmm2
+ movss (%edi), %xmm1
+ movss -8(%edi), %xmm3
+
+ movlhps %xmm2, %xmm0 /* 0.0 | im1 | 0.0 | im0 */
+ movlhps %xmm3, %xmm1 /* 0.0 | re1 | 0.0 | re0 */
+
+ movaps -16(%edx), %xmm4 /* w3 | w2 | w1 | w0 */
+ shufps $0xb1, %xmm1, %xmm1 /* re1 | 0.0 | re0 | 0.0 */
+ movss 16(%esi), %xmm6 /* im2 */
+ movss 24(%esi), %xmm7 /* im3 */
+ movss -16(%edi), %xmm2 /* re2 */
+ movss -24(%edi), %xmm3 /* re3 */
+ subps %xmm0, %xmm1 /* re1 | -im1 | re0 | -im0 */
+ addl $-32, %edx
+ movlhps %xmm7, %xmm6 /* 0.0 | im3 | 0.0 | im2 */
+ movlhps %xmm3, %xmm2 /* 0.0 | re3 | 0.0 | re2 */
+ mulps %xmm4, %xmm1
+ movaps (%edx), %xmm5 /* w7 | w6 | w5 | w4 */
+ shufps $0xb1, %xmm2, %xmm2 /* re3 | 0.0 | re2 | 0.0 */
+ movaps %xmm1, (%eax)
+ addl $32, %esi
+ subps %xmm6, %xmm2 /* re | -im3 | re | -im2 */
+ addl $-32, %edi
+ mulps %xmm5, %xmm2
+ addl $32, %eax
+ movaps %xmm2, -16(%eax)
+ decl %ecx
+ jnz .second_128_delay
+
+ popl %edi
+ popl %esi
+ popl %edx
+ popl %ecx
+ popl %ebx
+ popl %eax
+
+ leave
+ ret
+ .p2align 4,0
+
+.global imdct512_window_delay_nol_kni
+ .type imdct512_window_delay_nol_kni, @function
+imdct512_window_delay_nol_kni:
+
+ pushl %ebp
+ movl %esp, %ebp
+
+ pushl %eax
+ pushl %ebx
+ pushl %ecx
+ pushl %edx
+ pushl %esi
+ pushl %edi
+
+ /* movl 20(%ebp), %ebx delay */
+ movl 16(%ebp), %edx /* window */
+
+ movl 8(%ebp), %eax /* buf */
+ movl $16, %ecx /* loop count */
+ leal 516(%eax), %esi /* buf[64].im */
+ leal 504(%eax), %edi /* buf[63].re */
+ movl 12(%ebp), %eax /* data */
+.first_128_sample:
+
+ movss (%esi), %xmm0
+ movss 8(%esi), %xmm2
+ movss (%edi), %xmm1
+ movss -8(%edi), %xmm3
+
+ movlhps %xmm2, %xmm0 /* 0.0 | im1 | 0.0 | im0 */
+ movlhps %xmm3, %xmm1 /* 0.0 | re1 | 0.0 | re0 */
+
+ movaps (%edx), %xmm4 /* w3 | w2 | w1 | w0 */
+ /* movaps (%ebx), %xmm5 d3 | d2 | d1 | d0 */
+ shufps $0xb1, %xmm1, %xmm1 /* re1 | 0.0 | re0 | 0.0 */
+
+ movss 16(%esi), %xmm6 /* im2 */
+ movss 24(%esi), %xmm7 /* im3 */
+ subps %xmm1, %xmm0 /* -re1 | im1 | -re0 | im0 */
+ movss -16(%edi), %xmm2 /* re2 */
+ movss -24(%edi), %xmm3 /* re3 */
+ mulps %xmm4, %xmm0
+ movlhps %xmm7, %xmm6 /* 0.0 | im3 | 0.0 | im2 */
+ movlhps %xmm3, %xmm2 /* 0.0 | re3 | 0.0 | re2 */
+ /* addps %xmm5, %xmm0 */
+ shufps $0xb1, %xmm2, %xmm2 /* re3 | 0.0 | re2 | 0.0 */
+ movaps 16(%edx), %xmm4 /* w7 | w6 | w5 | w4 */
+ /* movaps 16(%ebx), %xmm5 d7 | d6 | d5 | d4 */
+ subps %xmm2, %xmm6 /* -re3 | im3 | -re2 | im2 */
+ addl $32, %edx
+ movaps %xmm0, (%eax)
+ /* addl $32, %ebx */
+ mulps %xmm4, %xmm6
+ addl $32, %esi
+ addl $32, %eax
+ /* addps %xmm5, %xmm6 */
+ addl $-32, %edi
+ movaps %xmm6, -16(%eax)
+ decl %ecx
+ jnz .first_128_sample
+
+ movl 8(%ebp), %esi /* buf[0].re */
+ leal 1020(%esi), %edi /* buf[127].im */
+ movl $16, %ecx /* loop count */
+.second_128_sample:
+
+ movss (%esi), %xmm0 /* buf[i].re */
+ movss 8(%esi), %xmm2 /* re1 */
+ movss (%edi), %xmm1 /* buf[127-i].im */
+ movss -8(%edi), %xmm3 /* im1 */
+
+ movlhps %xmm2, %xmm0 /* 0.0 | re1 | 0.0 | re0 */
+ movlhps %xmm3, %xmm1 /* 0.0 | im1 | 0.0 | im1 */
+
+ movaps (%edx), %xmm4 /* w3 | w2 | w1 | w0 */
+ /* movaps (%ebx), %xmm5 d3 | d2 | d1 | d0 */
+
+ shufps $0xb1, %xmm1, %xmm1 /* im1 | 0.0 | im0 | 0.0 */
+ movss 16(%esi), %xmm6 /* re2 */
+ movss 24(%esi), %xmm7 /* re3 */
+ movss -16(%edi), %xmm2 /* im2 */
+ movss -24(%edi), %xmm3 /* im3 */
+ subps %xmm1, %xmm0 /* -im1 | re1 | -im0 | re0 */
+ movlhps %xmm7, %xmm6 /* 0.0 | re3 | 0.0 | re2 */
+ movlhps %xmm3, %xmm2 /* 0.0 | im3 | 0.0 | im2 */
+ mulps %xmm4, %xmm0
+ shufps $0xb1, %xmm2, %xmm2 /* im3 | 0.0 | im2 | 0.0 */
+ movaps 16(%edx), %xmm4 /* w7 | w6 | w5 | w4 */
+ addl $32, %esi
+ subps %xmm2, %xmm6 /* -im3 | re3 | -im2 | re2 */
+ /* addps %xmm5, %xmm0 */
+ mulps %xmm4, %xmm6
+ addl $-32, %edi
+ /* movaps 16(%ebx), %xmm5 d7 | d6 | d5 | d4 */
+ movaps %xmm0, (%eax)
+ /* addps %xmm5, %xmm6 */
+ addl $32, %edx
+ addl $32, %eax
+ /* addl $32, %ebx */
+ movaps %xmm6, -16(%eax)
+ decl %ecx
+ jnz .second_128_sample
+
+ movl 8(%ebp), %eax
+ leal 512(%eax), %esi /* buf[64].re */
+ leal 508(%eax), %edi /* buf[63].im */
+ movl $16, %ecx /* loop count */
+ movl 20(%ebp), %eax /* delay */
+.first_128_delays:
+
+ movss (%esi), %xmm0
+ movss 8(%esi), %xmm2
+ movss (%edi), %xmm1
+ movss -8(%edi), %xmm3
+
+ movlhps %xmm2, %xmm0 /* 0.0 | re1 | 0.0 | re0 */
+ movlhps %xmm3, %xmm1 /* 0.0 | im1 | 0.0 | im0 */
+
+ movaps -16(%edx), %xmm4 /* w3 | w2 | w1 | w0 */
+ shufps $0xb1, %xmm1, %xmm1 /* im1 | 0.0 | im0 | 0.0 */
+ movss 16(%esi), %xmm6 /* re2 */
+ movss 24(%esi), %xmm7 /* re3 */
+ movss -16(%edi), %xmm2 /* im2 */
+ movss -24(%edi), %xmm3 /* im3 */
+ subps %xmm1, %xmm0 /* -im1 | re1 | -im0 | re0 */
+ addl $-32, %edx
+ movlhps %xmm7, %xmm6 /* 0.0 | re3 | 0.0 | re2 */
+ movlhps %xmm3, %xmm2 /* 0.0 | im3 | 0.0 | im2 */
+ mulps %xmm4, %xmm0
+ movaps (%edx), %xmm5 /* w7 | w6 | w5 | w4 */
+ shufps $0xb1, %xmm2, %xmm2 /* im3 | 0.0 | im2 | 0.0 */
+ movaps %xmm0, (%eax)
+ addl $32, %esi
+ subps %xmm2, %xmm6 /* -im3 | re3 | -im2 | re2 */
+ addl $-32, %edi
+ mulps %xmm5, %xmm6
+ addl $32, %eax
+ movaps %xmm6, -16(%eax)
+ decl %ecx
+ jnz .first_128_delays
+
+ movl 8(%ebp), %ebx
+ leal 4(%ebx), %esi /* buf[0].im */
+ leal 1016(%ebx), %edi /* buf[127].re */
+ movl $16, %ecx /* loop count */
+.second_128_delays:
+
+ movss (%esi), %xmm0
+ movss 8(%esi), %xmm2
+ movss (%edi), %xmm1
+ movss -8(%edi), %xmm3
+
+ movlhps %xmm2, %xmm0 /* 0.0 | im1 | 0.0 | im0 */
+ movlhps %xmm3, %xmm1 /* 0.0 | re1 | 0.0 | re0 */
+
+ movaps -16(%edx), %xmm4 /* w3 | w2 | w1 | w0 */
+ shufps $0xb1, %xmm1, %xmm1 /* re1 | 0.0 | re0 | 0.0 */
+ movss 16(%esi), %xmm6 /* im2 */
+ movss 24(%esi), %xmm7 /* im3 */
+ movss -16(%edi), %xmm2 /* re2 */
+ movss -24(%edi), %xmm3 /* re3 */
+ subps %xmm0, %xmm1 /* re1 | -im1 | re0 | -im0 */
+ addl $-32, %edx
+ movlhps %xmm7, %xmm6 /* 0.0 | im3 | 0.0 | im2 */
+ movlhps %xmm3, %xmm2 /* 0.0 | re3 | 0.0 | re2 */
+ mulps %xmm4, %xmm1
+ movaps (%edx), %xmm5 /* w7 | w6 | w5 | w4 */
+ shufps $0xb1, %xmm2, %xmm2 /* re3 | 0.0 | re2 | 0.0 */
+ movaps %xmm1, (%eax)
+ addl $32, %esi
+ subps %xmm6, %xmm2 /* re | -im3 | re | -im2 */
+ addl $-32, %edi
+ mulps %xmm5, %xmm2
+ addl $32, %eax
+ movaps %xmm2, -16(%eax)
+ decl %ecx
+ jnz .second_128_delays
+
+ popl %edi
+ popl %esi
+ popl %edx
+ popl %ecx
+ popl %ebx
+ popl %eax
+
+ leave
+ ret
+ .p2align 4,0
+#endif
diff --git a/ac3dec/imdct_c.c b/ac3dec/imdct_c.c
new file mode 100644
index 0000000..1f2bfe8
--- /dev/null
+++ b/ac3dec/imdct_c.c
@@ -0,0 +1,218 @@
+/*
+ * imdct.c
+ *
+ * Copyright (C) Aaron Holtzman - May 1999
+ *
+ * This file is part of ac3dec, a free Dolby AC-3 stream decoder.
+ *
+ * ac3dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * ac3dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Make; see the file COPYING. If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ *
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+#include "ac3.h"
+#include "ac3_internal.h"
+
+#include "downmix.h"
+#include "imdct_c.h"
+#include "srfft.h"
+
+
+#define N 512
+
+extern void (*imdct_do_512) (float data[],float delay[]);
+extern void (*imdct_do_512_nol) (float data[], float delay[]);
+extern void (*fft_64p) (complex_t *);
+
+extern const int pm128[];
+extern float window[];
+extern complex_t buf[128];
+
+extern void fft_64p_c (complex_t *);
+extern void fft_128p_c (complex_t *);
+
+static void imdct_do_512_c (float data[],float delay[]);
+static void imdct_do_512_nol_c (float data[], float delay[]);
+
+/* Twiddle factors for IMDCT */
+static float xcos1[128] __attribute__((aligned(16)));
+static float xsin1[128] __attribute__((aligned(16)));
+
+
+int imdct_init_c (void)
+{
+ int i;
+ float scale = 255.99609372;
+
+ imdct_do_512 = imdct_do_512_c;
+ imdct_do_512_nol = imdct_do_512_nol_c;
+ fft_64p = fft_64p_c;
+
+ /* Twiddle factors to turn IFFT into IMDCT */
+
+ for (i=0; i < 128; i++) {
+ xcos1[i] = cos(2.0f * M_PI * (8*i+1)/(8*N)) * scale;
+ xsin1[i] = sin(2.0f * M_PI * (8*i+1)/(8*N)) * scale;
+ }
+
+ return 0;
+}
+
+
+static void imdct_do_512_c (float data[], float delay[])
+{
+ int i, j;
+ float tmp_a_r, tmp_a_i;
+ float *data_ptr;
+ float *delay_ptr;
+ float *window_ptr;
+
+// 512 IMDCT with source and dest data in 'data'
+// Pre IFFT complex multiply plus IFFT complex conjugate
+
+ for( i=0; i < 128; i++) {
+ j = pm128[i];
+ //a = (data[256-2*j-1] - data[2*j]) * (xcos1[j] + xsin1[j]);
+ //c = data[2*j] * xcos1[j];
+ //b = data[256-2*j-1] * xsin1[j];
+ //buf1[i].re = a - b + c;
+ //buf1[i].im = b + c;
+ buf[i].re = (data[256-2*j-1] * xcos1[j]) - (data[2*j] * xsin1[j]);
+ buf[i].im = -1.0 * (data[2*j] * xcos1[j] + data[256-2*j-1] * xsin1[j]);
+ }
+
+ fft_128p_c (&buf[0]);
+
+// Post IFFT complex multiply plus IFFT complex conjugate
+ for (i=0; i < 128; i++) {
+ tmp_a_r = buf[i].re;
+ tmp_a_i = buf[i].im;
+ //a = (tmp_a_r - tmp_a_i) * (xcos1[j] + xsin1[j]);
+ //b = tmp_a_r * xsin1[j];
+ //c = tmp_a_i * xcos1[j];
+ //buf[j].re = a - b + c;
+ //buf[j].im = b + c;
+ buf[i].re =(tmp_a_r * xcos1[i]) + (tmp_a_i * xsin1[i]);
+ buf[i].im =(tmp_a_r * xsin1[i]) - (tmp_a_i * xcos1[i]);
+ }
+
+ data_ptr = data;
+ delay_ptr = delay;
+ window_ptr = window;
+
+// Window and convert to real valued signal
+ for (i=0; i< 64; i++) {
+ *data_ptr++ = -buf[64+i].im * *window_ptr++ + *delay_ptr++;
+ *data_ptr++ = buf[64-i-1].re * *window_ptr++ + *delay_ptr++;
+ }
+
+ for(i=0; i< 64; i++) {
+ *data_ptr++ = -buf[i].re * *window_ptr++ + *delay_ptr++;
+ *data_ptr++ = buf[128-i-1].im * *window_ptr++ + *delay_ptr++;
+ }
+
+// The trailing edge of the window goes into the delay line
+ delay_ptr = delay;
+
+ for(i=0; i< 64; i++) {
+ *delay_ptr++ = -buf[64+i].re * *--window_ptr;
+ *delay_ptr++ = buf[64-i-1].im * *--window_ptr;
+ }
+
+ for(i=0; i<64; i++) {
+ *delay_ptr++ = buf[i].im * *--window_ptr;
+ *delay_ptr++ = -buf[128-i-1].re * *--window_ptr;
+ }
+}
+
+
+static void imdct_do_512_nol_c (float data[], float delay[])
+{
+ int i, j;
+
+ float tmp_a_i;
+ float tmp_a_r;
+
+ float *data_ptr;
+ float *delay_ptr;
+ float *window_ptr;
+
+ //
+ // 512 IMDCT with source and dest data in 'data'
+ //
+
+ // Pre IFFT complex multiply plus IFFT cmplx conjugate
+
+ for( i=0; i < 128; i++) {
+ /* z[i] = (X[256-2*i-1] + j * X[2*i]) * (xcos1[i] + j * xsin1[i]) */
+ j = pm128[i];
+ //a = (data[256-2*j-1] - data[2*j]) * (xcos1[j] + xsin1[j]);
+ //c = data[2*j] * xcos1[j];
+ //b = data[256-2*j-1] * xsin1[j];
+ //buf1[i].re = a - b + c;
+
+ //buf1[i].im = b + c;
+ buf[i].re = (data[256-2*j-1] * xcos1[j]) - (data[2*j] * xsin1[j]);
+ buf[i].im = -1.0 * (data[2*j] * xcos1[j] + data[256-2*j-1] * xsin1[j]);
+ }
+
+ fft_128p_c (&buf[0]);
+
+ /* Post IFFT complex multiply plus IFFT complex conjugate*/
+ for (i=0; i < 128; i++) {
+ /* y[n] = z[n] * (xcos1[n] + j * xsin1[n]) ; */
+ /* int j1 = i; */
+ tmp_a_r = buf[i].re;
+ tmp_a_i = buf[i].im;
+ //a = (tmp_a_r - tmp_a_i) * (xcos1[j] + xsin1[j]);
+ //b = tmp_a_r * xsin1[j];
+ //c = tmp_a_i * xcos1[j];
+ //buf[j].re = a - b + c;
+ //buf[j].im = b + c;
+ buf[i].re =(tmp_a_r * xcos1[i]) + (tmp_a_i * xsin1[i]);
+ buf[i].im =(tmp_a_r * xsin1[i]) - (tmp_a_i * xcos1[i]);
+ }
+
+ data_ptr = data;
+ delay_ptr = delay;
+ window_ptr = window;
+
+ /* Window and convert to real valued signal, no overlap here*/
+ for (i=0; i< 64; i++) {
+ *data_ptr++ = -buf[64+i].im * *window_ptr++;
+ *data_ptr++ = buf[64-i-1].re * *window_ptr++;
+ }
+
+ for(i=0; i< 64; i++) {
+ *data_ptr++ = -buf[i].re * *window_ptr++;
+ *data_ptr++ = buf[128-i-1].im * *window_ptr++;
+ }
+
+ /* The trailing edge of the window goes into the delay line */
+ delay_ptr = delay;
+
+ for(i=0; i< 64; i++) {
+ *delay_ptr++ = -buf[64+i].re * *--window_ptr;
+ *delay_ptr++ = buf[64-i-1].im * *--window_ptr;
+ }
+
+ for(i=0; i<64; i++) {
+ *delay_ptr++ = buf[i].im * *--window_ptr;
+ *delay_ptr++ = -buf[128-i-1].re * *--window_ptr;
+ }
+}
diff --git a/ac3dec/imdct_c.h b/ac3dec/imdct_c.h
new file mode 100644
index 0000000..d2c31b5
--- /dev/null
+++ b/ac3dec/imdct_c.h
@@ -0,0 +1,36 @@
+/*****
+*
+* This file is part of the OMS program.
+*
+* This program is free software; you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation; either version 2, or (at your option)
+* any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program; see the file COPYING. If not, write to
+* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+*
+*****/
+
+#ifndef __IMDCT_C_H__
+#define __IMDCT_C_H__
+
+#include "cmplx.h"
+
+int imdct_init_c (void);
+
+void fft_128p_c (complex_t *);
+void fft_64p_c (complex_t *);
+
+void imdct512_pre_ifft_twiddle_c (const int *pmt, complex_t *buf, float *data, float *xcos_sin_sse);
+void imdct512_post_ifft_twiddle_c (complex_t *buf, float *xcos_sin_sse);
+void imdct512_window_delay_c (complex_t *buf, float *data_ptr, float *window_prt, float *delay_prt);
+void imdct512_window_delay_nol_c (complex_t *buf, float *data_ptr, float *window_prt, float *delay_prt);
+
+#endif
diff --git a/ac3dec/imdct_kni.c b/ac3dec/imdct_kni.c
new file mode 100644
index 0000000..b44547a
--- /dev/null
+++ b/ac3dec/imdct_kni.c
@@ -0,0 +1,103 @@
+/*
+ * imdct_kni.c
+ *
+ * Copyright (C) Aaron Holtzman - May 1999
+ *
+ * This file is part of ac3dec, a free Dolby AC-3 stream decoder.
+ *
+ * ac3dec is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * ac3dec is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Make; see the file COPYING. If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ *
+ */
+
+#ifdef __i386__
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+#include <mm_accel.h>
+#include "ac3.h"
+#include "ac3_internal.h"
+
+#include "downmix.h"
+#include "imdct_kni.h"
+#include "srfft.h"
+
+#define N 512
+
+/* Delay buffer for time domain interleaving */
+static float xcos_sin_sse[128 * 4] __attribute__((aligned(16)));
+
+extern void (*imdct_do_512) (float data[],float delay[]);
+extern void (*imdct_do_512_nol) (float data[], float delay[]);
+extern void (*fft_64p) (complex_t *);
+
+extern const int pm128[];
+extern float window[];
+extern complex_t buf[128];
+
+extern void fft_64p_kni (complex_t *);
+extern void fft_128p_kni (complex_t *);
+
+static void imdct_do_512_kni (float data[], float delay[]);
+static void imdct_do_512_nol_kni (float data[], float delay[]);
+
+
+int imdct_init_kni (void)
+{
+ uint32_t accel = mm_accel ();
+
+ if (accel & MM_ACCEL_X86_MMXEXT) {
+ int i;
+ float scale = 255.99609372;
+
+ fprintf (stderr, "Using SSE for IMDCT\n");
+ imdct_do_512 = imdct_do_512_kni;
+ imdct_do_512_nol = imdct_do_512_nol_kni;
+ fft_64p = fft_64p_kni;
+
+ for (i=0; i < 128; i++) {
+ float xcos_i = cos(2.0f * M_PI * (8*i+1)/(8*N)) * scale;
+ float xsin_i = sin(2.0f * M_PI * (8*i+1)/(8*N)) * scale;
+ xcos_sin_sse[i * 4] = xcos_i;
+ xcos_sin_sse[i * 4 + 1] = -xsin_i;
+ xcos_sin_sse[i * 4 + 2] = -xsin_i;
+ xcos_sin_sse[i * 4 + 3] = -xcos_i;
+ }
+
+ return 0;
+ } else
+ return -1;
+}
+
+
+static void imdct_do_512_kni (float data[], float delay[])
+{
+ imdct512_pre_ifft_twiddle_kni (pm128, buf, data, xcos_sin_sse);
+ fft_128p_kni (buf);
+ imdct512_post_ifft_twiddle_kni (buf, xcos_sin_sse);
+ imdct512_window_delay_kni (buf, data, window, delay);
+}
+
+
+static void imdct_do_512_nol_kni (float data[], float delay[])
+{
+ imdct512_pre_ifft_twiddle_kni (pm128, buf, data, xcos_sin_sse);
+ fft_128p_kni (buf);
+ imdct512_post_ifft_twiddle_kni (buf, xcos_sin_sse);
+ imdct512_window_delay_nol_kni (buf, data, window, delay);
+}
+
+#endif
diff --git a/ac3dec/imdct_kni.h b/ac3dec/imdct_kni.h
new file mode 100644
index 0000000..2193c07
--- /dev/null
+++ b/ac3dec/imdct_kni.h
@@ -0,0 +1,36 @@
+/*****
+*
+* This file is part of the OMS program.
+*
+* This program is free software; you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation; either version 2, or (at your option)
+* any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program; see the file COPYING. If not, write to
+* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+*
+*****/
+
+#ifndef __IMDCT_KNI_H__
+#define __IMDCT_KNI_H__
+
+#include "cmplx.h"
+
+int imdct_init_kni (void);
+
+void fft_128p_kni(complex_t *);
+void fft_64p_kni(complex_t *);
+
+void imdct512_pre_ifft_twiddle_kni(const int *pmt, complex_t *buf, float *data, float *xcos_sin_sse);
+void imdct512_post_ifft_twiddle_kni(complex_t *buf, float *xcos_sin_sse);
+void imdct512_window_delay_kni(complex_t *buf, float *data_ptr, float *window_prt, float *delay_prt);
+void imdct512_window_delay_nol_kni(complex_t *buf, float *data_ptr, float *window_prt, float *delay_prt);
+
+#endif
diff --git a/ac3dec/mm_accel.h b/ac3dec/mm_accel.h
new file mode 100644
index 0000000..82dc583
--- /dev/null
+++ b/ac3dec/mm_accel.h
@@ -0,0 +1,36 @@
+/*****
+*
+* This file is part of the OMS program.
+*
+* This program is free software; you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation; either version 2, or (at your option)
+* any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program; see the file COPYING. If not, write to
+* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+*
+*****/
+
+#ifndef __MM_ACCEL_H__
+#define __MM_ACCEL_H__
+
+#include <inttypes.h>
+
+// generic accelerations
+#define MM_ACCEL_MLIB 0x00000001
+
+// x86 accelerations
+#define MM_ACCEL_X86_MMX 0x80000000
+#define MM_ACCEL_X86_3DNOW 0x40000000
+#define MM_ACCEL_X86_MMXEXT 0x20000000
+
+uint32_t mm_accel (void);
+
+#endif
diff --git a/ac3dec/parse.c b/ac3dec/parse.c
index 3560bc5..49ea57c 100644
--- a/ac3dec/parse.c
+++ b/ac3dec/parse.c
@@ -31,15 +31,16 @@
#include "bitstream.h"
#include "stats.h"
#include "debug.h"
+#include "crc.h"
#include "parse.h"
/* Misc LUT */
-static const uint_16 nfchans[8] = {2,1,2,3,3,4,4,5};
+static const uint16_t nfchans[8] = {2,1,2,3,3,4,4,5};
struct frmsize_s
{
- uint_16 bit_rate;
- uint_16 frm_size[3];
+ uint16_t bit_rate;
+ uint16_t frm_size[3];
};
static const struct frmsize_s frmsizecod_tbl[64] =
@@ -85,8 +86,7 @@ static const struct frmsize_s frmsizecod_tbl[64] =
};
/* Parse a syncinfo structure, minus the sync word */
-void
-parse_syncinfo(syncinfo_t *syncinfo,uint_8 *data)
+void parse_syncinfo(syncinfo_t *syncinfo, uint8_t *data)
{
//
// We need to read in the entire syncinfo struct (0x0b77 + 24 bits)
@@ -96,10 +96,8 @@ parse_syncinfo(syncinfo_t *syncinfo,uint_8 *data)
// Get the sampling rate
syncinfo->fscod = (data[2] >> 6) & 0x3;
- if(syncinfo->fscod == 3)
- {
+ if(syncinfo->fscod == 3) {
//invalid sampling rate code
- error_flag = 1;
return;
}
else if(syncinfo->fscod == 2)
@@ -119,15 +117,13 @@ parse_syncinfo(syncinfo_t *syncinfo,uint_8 *data)
}
-/*
+
+/**
* This routine fills a bsi struct from the AC3 stream
- */
+ **/
-void
-parse_bsi(bsi_t *bsi)
+void parse_bsi(bsi_t *bsi)
{
- uint_32 i;
-
/* Check the AC-3 version number */
bsi->bsid = bitstream_get(5);
@@ -159,25 +155,19 @@ parse_bsi(bsi_t *bsi)
bsi->dialnorm = bitstream_get(5);
/* Does compression gain exist? */
- bsi->compre = bitstream_get(1);
- if (bsi->compre)
- {
+ if ((bsi->compre = bitstream_get(1))) {
/* Get compression gain */
bsi->compr = bitstream_get(8);
}
/* Does language code exist? */
- bsi->langcode = bitstream_get(1);
- if (bsi->langcode)
- {
+ if ((bsi->langcode = bitstream_get(1))) {
/* Get langauge code */
bsi->langcod = bitstream_get(8);
}
/* Does audio production info exist? */
- bsi->audprodie = bitstream_get(1);
- if (bsi->audprodie)
- {
+ if ((bsi->audprodie = bitstream_get(1))) {
/* Get mix level */
bsi->mixlevel = bitstream_get(5);
@@ -186,31 +176,24 @@ parse_bsi(bsi_t *bsi)
}
/* If we're in dual mono mode then get some extra info */
- if (bsi->acmod ==0)
- {
+ if (!bsi->acmod) {
/* Get the dialogue normalization level two */
bsi->dialnorm2 = bitstream_get(5);
/* Does compression gain two exist? */
- bsi->compr2e = bitstream_get(1);
- if (bsi->compr2e)
- {
+ if ((bsi->compr2e = bitstream_get(1))) {
/* Get compression gain two */
bsi->compr2 = bitstream_get(8);
}
/* Does language code two exist? */
- bsi->langcod2e = bitstream_get(1);
- if (bsi->langcod2e)
- {
+ if ((bsi->langcod2e = bitstream_get(1))) {
/* Get langauge code two */
bsi->langcod2 = bitstream_get(8);
}
/* Does audio production info two exist? */
- bsi->audprodi2e = bitstream_get(1);
- if (bsi->audprodi2e)
- {
+ if ((bsi->audprodi2e = bitstream_get(1))) {
/* Get mix level two */
bsi->mixlevel2 = bitstream_get(5);
@@ -226,22 +209,17 @@ parse_bsi(bsi_t *bsi)
bsi->origbs = bitstream_get(1);
/* Does timecode one exist? */
- bsi->timecod1e = bitstream_get(1);
-
- if(bsi->timecod1e)
+ if ((bsi->timecod1e = bitstream_get(1)))
bsi->timecod1 = bitstream_get(14);
/* Does timecode two exist? */
- bsi->timecod2e = bitstream_get(1);
-
- if(bsi->timecod2e)
+ if ((bsi->timecod2e = bitstream_get(1)))
bsi->timecod2 = bitstream_get(14);
/* Does addition info exist? */
- bsi->addbsie = bitstream_get(1);
+ if ((bsi->addbsie = bitstream_get(1))) {
+ uint32_t i;
- if(bsi->addbsie)
- {
/* Get how much info is there */
bsi->addbsil = bitstream_get(6);
@@ -253,52 +231,41 @@ parse_bsi(bsi_t *bsi)
stats_print_bsi(bsi);
}
+
/* More pain inducing parsing */
-void
-parse_audblk(bsi_t *bsi,audblk_t *audblk)
+void parse_audblk(bsi_t *bsi,audblk_t *audblk)
{
int i,j;
- for (i=0;i < bsi->nfchans; i++)
- {
+ for (i=0; i < bsi->nfchans; i++) {
/* Is this channel an interleaved 256 + 256 block ? */
audblk->blksw[i] = bitstream_get(1);
}
- for (i=0;i < bsi->nfchans; i++)
- {
+ for (i=0;i < bsi->nfchans; i++) {
/* Should we dither this channel? */
audblk->dithflag[i] = bitstream_get(1);
}
/* Does dynamic range control exist? */
- audblk->dynrnge = bitstream_get(1);
- if (audblk->dynrnge)
- {
+ if ((audblk->dynrnge = bitstream_get(1))) {
/* Get dynamic range info */
audblk->dynrng = bitstream_get(8);
}
/* If we're in dual mono mode then get the second channel DR info */
- if (bsi->acmod == 0)
- {
+ if (bsi->acmod == 0) {
/* Does dynamic range control two exist? */
- audblk->dynrng2e = bitstream_get(1);
- if (audblk->dynrng2e)
- {
+ if ((audblk->dynrng2e = bitstream_get(1))) {
/* Get dynamic range info */
audblk->dynrng2 = bitstream_get(8);
}
}
/* Does coupling strategy exist? */
- audblk->cplstre = bitstream_get(1);
- if (audblk->cplstre)
- {
+ if ((audblk->cplstre = bitstream_get(1))) {
/* Is coupling turned on? */
- audblk->cplinu = bitstream_get(1);
- if(audblk->cplinu)
- {
+ if ((audblk->cplinu = bitstream_get(1))) {
for(i=0;i < bsi->nfchans; i++)
audblk->chincpl[i] = bitstream_get(1);
if(bsi->acmod == 0x2)
@@ -315,30 +282,23 @@ parse_audblk(bsi_t *bsi,audblk_t *audblk)
* band */
audblk->ncplbnd = audblk->ncplsubnd;
- for(i=1; i< audblk->ncplsubnd; i++)
- {
+ for(i=1; i< audblk->ncplsubnd; i++) {
audblk->cplbndstrc[i] = bitstream_get(1);
audblk->ncplbnd -= audblk->cplbndstrc[i];
}
}
}
- if(audblk->cplinu)
- {
+ if(audblk->cplinu) {
/* Loop through all the channels and get their coupling co-ords */
- for(i=0;i < bsi->nfchans;i++)
- {
+ for(i=0;i < bsi->nfchans;i++) {
if(!audblk->chincpl[i])
continue;
/* Is there new coupling co-ordinate info? */
- audblk->cplcoe[i] = bitstream_get(1);
-
- if(audblk->cplcoe[i])
- {
+ if ((audblk->cplcoe[i] = bitstream_get(1))) {
audblk->mstrcplco[i] = bitstream_get(2);
- for(j=0;j < audblk->ncplbnd; j++)
- {
+ for(j=0;j < audblk->ncplbnd; j++) {
audblk->cplcoexp[i][j] = bitstream_get(4);
audblk->cplcomant[i][j] = bitstream_get(4);
}
@@ -347,8 +307,7 @@ parse_audblk(bsi_t *bsi,audblk_t *audblk)
/* If we're in dual mono mode, there's going to be some phase info */
if( (bsi->acmod == 0x2) && audblk->phsflginu &&
- (audblk->cplcoe[0] || audblk->cplcoe[1]))
- {
+ (audblk->cplcoe[0] || audblk->cplcoe[1])) {
for(j=0;j < audblk->ncplbnd; j++)
audblk->phsflg[j] = bitstream_get(1);
@@ -356,23 +315,17 @@ parse_audblk(bsi_t *bsi,audblk_t *audblk)
}
/* If we're in dual mono mode, there may be a rematrix strategy */
- if(bsi->acmod == 0x2)
- {
- audblk->rematstr = bitstream_get(1);
- if(audblk->rematstr)
- {
- if (audblk->cplinu == 0)
- {
+ if(bsi->acmod == 0x2) {
+ if ((audblk->rematstr = bitstream_get(1))) {
+ if (!audblk->cplinu) {
for(i = 0; i < 4; i++)
audblk->rematflg[i] = bitstream_get(1);
}
- if((audblk->cplbegf > 2) && audblk->cplinu)
- {
+ if((audblk->cplbegf > 2) && audblk->cplinu) {
for(i = 0; i < 4; i++)
audblk->rematflg[i] = bitstream_get(1);
}
- if((audblk->cplbegf <= 2) && audblk->cplinu)
- {
+ if((audblk->cplbegf <= 2) && audblk->cplinu) {
for(i = 0; i < 3; i++)
audblk->rematflg[i] = bitstream_get(1);
}
@@ -383,8 +336,7 @@ parse_audblk(bsi_t *bsi,audblk_t *audblk)
}
}
- if (audblk->cplinu)
- {
+ if (audblk->cplinu) {
/* Get the coupling channel exponent strategy */
audblk->cplexpstr = bitstream_get(2);
audblk->ncplgrps = (audblk->cplendmant - audblk->cplstrtmant) /
@@ -399,18 +351,13 @@ parse_audblk(bsi_t *bsi,audblk_t *audblk)
audblk->lfeexpstr = bitstream_get(1);
/* Determine the bandwidths of all the fbw channels */
- for(i = 0; i < bsi->nfchans; i++)
- {
- uint_16 grp_size;
-
- if(audblk->chexpstr[i] != EXP_REUSE)
- {
- if (audblk->cplinu && audblk->chincpl[i])
- {
+ for(i = 0; i < bsi->nfchans; i++) {
+ uint16_t grp_size;
+
+ if(audblk->chexpstr[i] != EXP_REUSE) {
+ if (audblk->cplinu && audblk->chincpl[i]) {
audblk->endmant[i] = audblk->cplstrtmant;
- }
- else
- {
+ } else {
audblk->chbwcod[i] = bitstream_get(6);
audblk->endmant[i] = ((audblk->chbwcod[i] + 12) * 3) + 37;
}
@@ -422,18 +369,15 @@ parse_audblk(bsi_t *bsi,audblk_t *audblk)
}
/* Get the coupling exponents if they exist */
- if(audblk->cplinu && (audblk->cplexpstr != EXP_REUSE))
- {
+ if(audblk->cplinu && (audblk->cplexpstr != EXP_REUSE)) {
audblk->cplabsexp = bitstream_get(4);
for(i=0;i< audblk->ncplgrps;i++)
audblk->cplexps[i] = bitstream_get(7);
}
/* Get the fwb channel exponents */
- for(i=0;i < bsi->nfchans; i++)
- {
- if(audblk->chexpstr[i] != EXP_REUSE)
- {
+ for(i=0;i < bsi->nfchans; i++) {
+ if(audblk->chexpstr[i] != EXP_REUSE) {
audblk->exps[i][0] = bitstream_get(4);
for(j=1;j<=audblk->nchgrps[i];j++)
audblk->exps[i][j] = bitstream_get(7);
@@ -442,8 +386,7 @@ parse_audblk(bsi_t *bsi,audblk_t *audblk)
}
/* Get the lfe channel exponents */
- if(bsi->lfeon && (audblk->lfeexpstr != EXP_REUSE))
- {
+ if(bsi->lfeon && (audblk->lfeexpstr != EXP_REUSE)) {
audblk->lfeexps[0] = bitstream_get(4);
audblk->lfeexps[1] = bitstream_get(7);
audblk->lfeexps[2] = bitstream_get(7);
@@ -452,8 +395,7 @@ parse_audblk(bsi_t *bsi,audblk_t *audblk)
/* Get the parametric bit allocation parameters */
audblk->baie = bitstream_get(1);
- if(audblk->baie)
- {
+ if(audblk->baie) {
audblk->sdcycod = bitstream_get(2);
audblk->fdcycod = bitstream_get(2);
audblk->sgaincod = bitstream_get(2);
@@ -464,23 +406,19 @@ parse_audblk(bsi_t *bsi,audblk_t *audblk)
/* Get the SNR off set info if it exists */
audblk->snroffste = bitstream_get(1);
- if(audblk->snroffste)
- {
+ if(audblk->snroffste) {
audblk->csnroffst = bitstream_get(6);
- if(audblk->cplinu)
- {
+ if(audblk->cplinu) {
audblk->cplfsnroffst = bitstream_get(4);
audblk->cplfgaincod = bitstream_get(3);
}
- for(i = 0;i < bsi->nfchans; i++)
- {
+ for(i = 0;i < bsi->nfchans; i++) {
audblk->fsnroffst[i] = bitstream_get(4);
audblk->fgaincod[i] = bitstream_get(3);
}
- if(bsi->lfeon)
- {
+ if(bsi->lfeon) {
audblk->lfefsnroffst = bitstream_get(4);
audblk->lfefgaincod = bitstream_get(3);
@@ -488,12 +426,10 @@ parse_audblk(bsi_t *bsi,audblk_t *audblk)
}
/* Get coupling leakage info if it exists */
- if(audblk->cplinu)
- {
+ if(audblk->cplinu) {
audblk->cplleake = bitstream_get(1);
- if(audblk->cplleake)
- {
+ if(audblk->cplleake) {
audblk->cplfleak = bitstream_get(3);
audblk->cplsleak = bitstream_get(3);
}
@@ -502,32 +438,26 @@ parse_audblk(bsi_t *bsi,audblk_t *audblk)
/* Get the delta bit alloaction info */
audblk->deltbaie = bitstream_get(1);
- if(audblk->deltbaie)
- {
+ if(audblk->deltbaie) {
if(audblk->cplinu)
audblk->cpldeltbae = bitstream_get(2);
for(i = 0;i < bsi->nfchans; i++)
audblk->deltbae[i] = bitstream_get(2);
- if (audblk->cplinu && (audblk->cpldeltbae == DELTA_BIT_NEW))
- {
+ if (audblk->cplinu && (audblk->cpldeltbae == DELTA_BIT_NEW)) {
audblk->cpldeltnseg = bitstream_get(3);
- for(i = 0;i < audblk->cpldeltnseg + 1; i++)
- {
+ for(i = 0;i < audblk->cpldeltnseg + 1; i++) {
audblk->cpldeltoffst[i] = bitstream_get(5);
audblk->cpldeltlen[i] = bitstream_get(4);
audblk->cpldeltba[i] = bitstream_get(3);
}
}
- for(i = 0;i < bsi->nfchans; i++)
- {
- if (audblk->deltbae[i] == DELTA_BIT_NEW)
- {
+ for(i = 0;i < bsi->nfchans; i++) {
+ if (audblk->deltbae[i] == DELTA_BIT_NEW) {
audblk->deltnseg[i] = bitstream_get(3);
- for(j = 0; j < audblk->deltnseg[i] + 1; j++)
- {
+ for(j = 0; j < audblk->deltnseg[i] + 1; j++) {
audblk->deltoffst[i][j] = bitstream_get(5);
audblk->deltlen[i][j] = bitstream_get(4);
audblk->deltba[i][j] = bitstream_get(3);
@@ -537,61 +467,15 @@ parse_audblk(bsi_t *bsi,audblk_t *audblk)
}
/* Check to see if there's any dummy info to get */
- if((audblk->skiple = bitstream_get(1)))
- {
- uint_16 skip_data;
+ if((audblk->skiple = bitstream_get(1))) {
+ uint16_t skip_data;
audblk->skipl = bitstream_get(9);
- //XXX remove
- //fprintf(stderr,"(parse) skipping %d bytes\n",audblk->skipl);
- for(i = 0; i < audblk->skipl ; i++)
- {
+ for (i = 0; i < audblk->skipl; i++) {
skip_data = bitstream_get(8);
- //XXX remove
- //fprintf(stderr,"skipped data %2x\n",skip_data);
- //if(skip_data != 0)
- //{
- //dprintf("(parse) Invalid skipped data %2x\n",skip_data);
- //exit(1);
- //}
}
}
stats_print_audblk(bsi,audblk);
}
-
-void
-parse_auxdata(syncinfo_t *syncinfo)
-{
- //FIXME keep this now that we don't really need it?
-#if 0
- int i;
- int skip_length;
- uint_16 crc;
- uint_16 auxdatae;
-
- skip_length = (syncinfo->frame_size * 16) - bitstream_get_total_bits() - 17 - 1;
-
- //XXX remove
- //dprintf("(auxdata) skipping %d auxbits\n",skip_length);
-
- for(i=0; i < skip_length; i++)
- //printf("Skipped bit %i\n",(uint_16)bitstream_get(1));
- bitstream_get(1);
-
- //get the auxdata exists bit
- auxdatae = bitstream_get(1);
-
- //XXX remove
- //dprintf("auxdatae = %i\n",auxdatae);
-
- //Skip the CRC reserved bit
- bitstream_get(1);
-
- //Get the crc
- crc = bitstream_get(16);
-#endif
-}
-
-
diff --git a/ac3dec/parse.h b/ac3dec/parse.h
index cdaee66..6264fea 100644
--- a/ac3dec/parse.h
+++ b/ac3dec/parse.h
@@ -21,8 +21,6 @@
*
*/
-void parse_syncinfo(syncinfo_t *syncinfo,uint_8 *data);
+void parse_syncinfo(syncinfo_t *syncinfo,uint8_t *data);
void parse_audblk(bsi_t *bsi,audblk_t *audblk);
void parse_bsi(bsi_t *bsi);
-void parse_auxdata(syncinfo_t *syncinfo);
-
diff --git a/ac3dec/rematrix.c b/ac3dec/rematrix.c
index caa7094..f1df19b 100644
--- a/ac3dec/rematrix.c
+++ b/ac3dec/rematrix.c
@@ -28,52 +28,60 @@
#include "ac3_internal.h"
-#include "decode.h"
#include "rematrix.h"
+
struct rematrix_band_s
{
- uint_32 start;
- uint_32 end;
+ uint32_t start;
+ uint32_t end;
+} rematrix_band[] = {
+ {13, 24},
+ {25, 36},
+ {37, 60},
+ {61, 252}
};
-struct rematrix_band_s rematrix_band[] = { {13,24}, {25,36}, {37 ,60}, {61,252}};
-static inline uint_32 min(uint_32 a,uint_32 b);
+/**
+ *
+ **/
-static inline uint_32
-min(uint_32 a,uint_32 b)
+inline uint32_t min (uint32_t a, uint32_t b)
{
- return (a < b ? a : b);
+ return (a < b) ? a : b;
}
-/* This routine simply does stereo rematixing for the 2 channel
- * stereo mode */
-void rematrix(audblk_t *audblk, stream_samples_t samples)
+
+/**
+ * This routine simply does stereo remartixing for the 2 channel
+ * stereo mode
+ **/
+
+void rematrix (audblk_t *audblk, stream_samples_t samples)
{
- uint_32 num_bands;
- uint_32 start;
- uint_32 end;
- uint_32 i,j;
- float left,right;
+ uint32_t num_bands;
+ uint32_t start;
+ uint32_t end;
+ int i,j;
- if(!audblk->cplinu || audblk->cplbegf > 2)
+ if (!audblk->cplinu || audblk->cplbegf > 2)
num_bands = 4;
else if (audblk->cplbegf > 0)
num_bands = 3;
else
num_bands = 2;
- for(i=0;i < num_bands; i++)
- {
- if(!audblk->rematflg[i])
+ for (i=0; i < num_bands; i++) {
+ if (!audblk->rematflg[i])
continue;
start = rematrix_band[i].start;
- end = min(rematrix_band[i].end ,12 * audblk->cplbegf + 36);
+ end = min (rematrix_band[i].end ,12 * audblk->cplbegf + 36);
- for(j=start;j < end; j++)
- {
+ for (j=start;j < end; j++) {
+ float left,right;
+
left = samples[0][j] + samples[1][j];
right = samples[0][j] - samples[1][j];
samples[0][j] = left;
diff --git a/ac3dec/sanity_check.c b/ac3dec/sanity_check.c
index 461f20e..d475de1 100644
--- a/ac3dec/sanity_check.c
+++ b/ac3dec/sanity_check.c
@@ -28,8 +28,11 @@
#include "sanity_check.h"
-void
-sanity_check_init(syncinfo_t *syncinfo, bsi_t *bsi, audblk_t *audblk)
+/**
+ *
+ **/
+
+void sanity_check_init(syncinfo_t *syncinfo, bsi_t *bsi, audblk_t *audblk)
{
syncinfo->magic = AC3_MAGIC_NUMBER;
bsi->magic = AC3_MAGIC_NUMBER;
@@ -38,94 +41,84 @@ sanity_check_init(syncinfo_t *syncinfo, bsi_t *bsi, audblk_t *audblk)
audblk->magic3 = AC3_MAGIC_NUMBER;
}
-void
-sanity_check(syncinfo_t *syncinfo, bsi_t *bsi, audblk_t *audblk)
+
+/**
+ *
+ **/
+
+int sanity_check(syncinfo_t *syncinfo, bsi_t *bsi, audblk_t *audblk)
{
int i;
- if(syncinfo->magic != AC3_MAGIC_NUMBER)
- {
+ if(syncinfo->magic != AC3_MAGIC_NUMBER) {
fprintf(stderr,"\n** Sanity check failed -- syncinfo magic number **");
- error_flag = 1;
+ return -1;
}
- if(bsi->magic != AC3_MAGIC_NUMBER)
- {
+ if(bsi->magic != AC3_MAGIC_NUMBER) {
fprintf(stderr,"\n** Sanity check failed -- bsi magic number **");
- error_flag = 1;
+ return -1;
}
- if(audblk->magic1 != AC3_MAGIC_NUMBER)
- {
+ if(audblk->magic1 != AC3_MAGIC_NUMBER) {
fprintf(stderr,"\n** Sanity check failed -- audblk magic number 1 **");
- error_flag = 1;
+ return -1;
}
- if(audblk->magic2 != AC3_MAGIC_NUMBER)
- {
+ if(audblk->magic2 != AC3_MAGIC_NUMBER) {
fprintf(stderr,"\n** Sanity check failed -- audblk magic number 2 **");
- error_flag = 1;
+ return -1;
}
- if(audblk->magic3 != AC3_MAGIC_NUMBER)
- {
+ if(audblk->magic3 != AC3_MAGIC_NUMBER) {
fprintf(stderr,"\n** Sanity check failed -- audblk magic number 3 **");
- error_flag = 1;
+ return -1;
}
- for(i = 0;i < 5 ; i++)
- {
+ for(i = 0;i < 5 ; i++) {
if (audblk->fbw_exp[i][255] !=0 || audblk->fbw_exp[i][254] !=0 ||
- audblk->fbw_exp[i][253] !=0)
- {
+ audblk->fbw_exp[i][253] !=0) {
fprintf(stderr,"\n** Sanity check failed -- fbw_exp out of bounds **");
- error_flag = 1;
+ return -1;
}
if (audblk->fbw_bap[i][255] !=0 || audblk->fbw_bap[i][254] !=0 ||
- audblk->fbw_bap[i][253] !=0)
- {
+ audblk->fbw_bap[i][253] !=0) {
fprintf(stderr,"\n** Sanity check failed -- fbw_bap out of bounds **");
- error_flag = 1;
+ return -1;
}
}
if (audblk->cpl_exp[255] !=0 || audblk->cpl_exp[254] !=0 ||
- audblk->cpl_exp[253] !=0)
- {
+ audblk->cpl_exp[253] !=0) {
fprintf(stderr,"\n** Sanity check failed -- cpl_exp out of bounds **");
- error_flag = 1;
+ return -1;
}
if (audblk->cpl_bap[255] !=0 || audblk->cpl_bap[254] !=0 ||
- audblk->cpl_bap[253] !=0)
- {
+ audblk->cpl_bap[253] !=0) {
fprintf(stderr,"\n** Sanity check failed -- cpl_bap out of bounds **");
- error_flag = 1;
+ return -1;
}
- if (audblk->cplmant[255] !=0 || audblk->cplmant[254] !=0 ||
- audblk->cplmant[253] !=0)
- {
+ if (audblk->cpl_flt[255] !=0 || audblk->cpl_flt[254] !=0 ||
+ audblk->cpl_flt[253] !=0) {
fprintf(stderr,"\n** Sanity check failed -- cpl_mant out of bounds **");
- error_flag = 1;
+ return -1;
}
- if ((audblk->cplinu == 1) && (audblk->cplbegf > (audblk->cplendf+2)))
- {
+ if ((audblk->cplinu == 1) && (audblk->cplbegf > (audblk->cplendf+2))) {
fprintf(stderr,"\n** Sanity check failed -- cpl params inconsistent **");
- error_flag = 1;
+ return -1;
}
- for(i=0; i < bsi->nfchans; i++)
- {
- if((audblk->chincpl[i] == 0) && (audblk->chbwcod[i] > 60))
- {
+ for(i=0; i < bsi->nfchans; i++) {
+ if((audblk->chincpl[i] == 0) && (audblk->chbwcod[i] > 60)) {
fprintf(stderr,"\n** Sanity check failed -- chbwcod too big **");
- error_flag = 1;
+ return -1;
}
}
- return;
+ return 0;
}
diff --git a/ac3dec/sanity_check.h b/ac3dec/sanity_check.h
index c4ca63a..ead9399 100644
--- a/ac3dec/sanity_check.h
+++ b/ac3dec/sanity_check.h
@@ -23,5 +23,5 @@
#define AC3_MAGIC_NUMBER 0xdeadbeef
-void sanity_check_init(syncinfo_t *syncinfo, bsi_t *bsi, audblk_t *audblk);
-void sanity_check(syncinfo_t *syncinfo, bsi_t *bsi, audblk_t *audblk);
+void sanity_check_init (syncinfo_t *syncinfo, bsi_t *bsi, audblk_t *audblk);
+int sanity_check (syncinfo_t *syncinfo, bsi_t *bsi, audblk_t *audblk);
diff --git a/ac3dec/srfft.c b/ac3dec/srfft.c
new file mode 100644
index 0000000..4cbb629
--- /dev/null
+++ b/ac3dec/srfft.c
@@ -0,0 +1,305 @@
+/*
+ * srfft.c
+ *
+ * Copyright (C) Yuqing Deng <Yuqing_Deng@brown.edu> - April 2000
+ *
+ * 64 and 128 point split radix fft for ac3dec
+ *
+ * The algorithm is desribed in the book:
+ * "Computational Frameworks of the Fast Fourier Transform".
+ *
+ * The ideas and the the organization of code borrowed from djbfft written by
+ * D. J. Bernstein <djb@cr.py.to>. djbff can be found at
+ * http://cr.yp.to/djbfft.html.
+ *
+ * srfft.c is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * srfft.c is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Make; see the file COPYING. If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#include <stdio.h>
+
+#include "srfft.h"
+#include "srfftp.h"
+
+void fft_8 (complex_t *x);
+
+void fft_4(complex_t *x)
+{
+ /* delta_p = 1 here */
+ /* x[k] = sum_{i=0..3} x[i] * w^{i*k}, w=e^{-2*pi/4}
+ */
+
+ register float yt_r, yt_i, yb_r, yb_i, u_r, u_i, vi_r, vi_i;
+
+ yt_r = x[0].re;
+ yb_r = yt_r - x[2].re;
+ yt_r += x[2].re;
+
+ u_r = x[1].re;
+ vi_i = x[3].re - u_r;
+ u_r += x[3].re;
+
+ u_i = x[1].im;
+ vi_r = u_i - x[3].im;
+ u_i += x[3].im;
+
+ yt_i = yt_r;
+ yt_i += u_r;
+ x[0].re = yt_i;
+ yt_r -= u_r;
+ x[2].re = yt_r;
+ yt_i = yb_r;
+ yt_i += vi_r;
+ x[1].re = yt_i;
+ yb_r -= vi_r;
+ x[3].re = yb_r;
+
+ yt_i = x[0].im;
+ yb_i = yt_i - x[2].im;
+ yt_i += x[2].im;
+
+ yt_r = yt_i;
+ yt_r += u_i;
+ x[0].im = yt_r;
+ yt_i -= u_i;
+ x[2].im = yt_i;
+ yt_r = yb_i;
+ yt_r += vi_i;
+ x[1].im = yt_r;
+ yb_i -= vi_i;
+ x[3].im = yb_i;
+}
+
+
+void fft_8 (complex_t *x)
+{
+ /* delta_p = diag{1, sqrt(i)} here */
+ /* x[k] = sum_{i=0..7} x[i] * w^{i*k}, w=e^{-2*pi/8}
+ */
+ register float wT1_r, wT1_i, wB1_r, wB1_i, wT2_r, wT2_i, wB2_r, wB2_i;
+
+ wT1_r = x[1].re;
+ wT1_i = x[1].im;
+ wB1_r = x[3].re;
+ wB1_i = x[3].im;
+
+ x[1] = x[2];
+ x[2] = x[4];
+ x[3] = x[6];
+ fft_4(&x[0]);
+
+
+ /* x[0] x[4] */
+ wT2_r = x[5].re;
+ wT2_r += x[7].re;
+ wT2_r += wT1_r;
+ wT2_r += wB1_r;
+ wT2_i = wT2_r;
+ wT2_r += x[0].re;
+ wT2_i = x[0].re - wT2_i;
+ x[0].re = wT2_r;
+ x[4].re = wT2_i;
+
+ wT2_i = x[5].im;
+ wT2_i += x[7].im;
+ wT2_i += wT1_i;
+ wT2_i += wB1_i;
+ wT2_r = wT2_i;
+ wT2_r += x[0].im;
+ wT2_i = x[0].im - wT2_i;
+ x[0].im = wT2_r;
+ x[4].im = wT2_i;
+
+ /* x[2] x[6] */
+ wT2_r = x[5].im;
+ wT2_r -= x[7].im;
+ wT2_r += wT1_i;
+ wT2_r -= wB1_i;
+ wT2_i = wT2_r;
+ wT2_r += x[2].re;
+ wT2_i = x[2].re - wT2_i;
+ x[2].re = wT2_r;
+ x[6].re = wT2_i;
+
+ wT2_i = x[5].re;
+ wT2_i -= x[7].re;
+ wT2_i += wT1_r;
+ wT2_i -= wB1_r;
+ wT2_r = wT2_i;
+ wT2_r += x[2].im;
+ wT2_i = x[2].im - wT2_i;
+ x[2].im = wT2_i;
+ x[6].im = wT2_r;
+
+
+ /* x[1] x[5] */
+ wT2_r = wT1_r;
+ wT2_r += wB1_i;
+ wT2_r -= x[5].re;
+ wT2_r -= x[7].im;
+ wT2_i = wT1_i;
+ wT2_i -= wB1_r;
+ wT2_i -= x[5].im;
+ wT2_i += x[7].re;
+
+ wB2_r = wT2_r;
+ wB2_r += wT2_i;
+ wT2_i -= wT2_r;
+ wB2_r *= HSQRT2;
+ wT2_i *= HSQRT2;
+ wT2_r = wB2_r;
+ wB2_r += x[1].re;
+ wT2_r = x[1].re - wT2_r;
+
+ wB2_i = x[5].re;
+ x[1].re = wB2_r;
+ x[5].re = wT2_r;
+
+ wT2_r = wT2_i;
+ wT2_r += x[1].im;
+ wT2_i = x[1].im - wT2_i;
+ wB2_r = x[5].im;
+ x[1].im = wT2_r;
+ x[5].im = wT2_i;
+
+ /* x[3] x[7] */
+ wT1_r -= wB1_i;
+ wT1_i += wB1_r;
+ wB1_r = wB2_i - x[7].im;
+ wB1_i = wB2_r + x[7].re;
+ wT1_r -= wB1_r;
+ wT1_i -= wB1_i;
+ wB1_r = wT1_r + wT1_i;
+ wB1_r *= HSQRT2;
+ wT1_i -= wT1_r;
+ wT1_i *= HSQRT2;
+ wB2_r = x[3].re;
+ wB2_i = wB2_r + wT1_i;
+ wB2_r -= wT1_i;
+ x[3].re = wB2_i;
+ x[7].re = wB2_r;
+ wB2_i = x[3].im;
+ wB2_r = wB2_i + wB1_r;
+ wB2_i -= wB1_r;
+ x[3].im = wB2_i;
+ x[7].im = wB2_r;
+}
+
+
+void fft_asmb(int k, complex_t *x, complex_t *wTB,
+ const complex_t *d, const complex_t *d_3)
+{
+ register complex_t *x2k, *x3k, *x4k, *wB;
+ register float a_r, a_i, a1_r, a1_i, u_r, u_i, v_r, v_i;
+
+ x2k = x + 2 * k;
+ x3k = x2k + 2 * k;
+ x4k = x3k + 2 * k;
+ wB = wTB + 2 * k;
+
+ TRANSZERO(x[0],x2k[0],x3k[0],x4k[0]);
+ TRANS(x[1],x2k[1],x3k[1],x4k[1],wTB[1],wB[1],d[1],d_3[1]);
+
+ --k;
+ for(;;) {
+ TRANS(x[2],x2k[2],x3k[2],x4k[2],wTB[2],wB[2],d[2],d_3[2]);
+ TRANS(x[3],x2k[3],x3k[3],x4k[3],wTB[3],wB[3],d[3],d_3[3]);
+ if (!--k) break;
+ x += 2;
+ x2k += 2;
+ x3k += 2;
+ x4k += 2;
+ d += 2;
+ d_3 += 2;
+ wTB += 2;
+ wB += 2;
+ }
+
+}
+
+void fft_asmb16(complex_t *x, complex_t *wTB)
+{
+ register float a_r, a_i, a1_r, a1_i, u_r, u_i, v_r, v_i;
+ int k = 2;
+
+ /* transform x[0], x[8], x[4], x[12] */
+ TRANSZERO(x[0],x[4],x[8],x[12]);
+
+ /* transform x[1], x[9], x[5], x[13] */
+ TRANS(x[1],x[5],x[9],x[13],wTB[1],wTB[5],delta16[1],delta16_3[1]);
+
+ /* transform x[2], x[10], x[6], x[14] */
+ TRANSHALF_16(x[2],x[6],x[10],x[14]);
+
+ /* transform x[3], x[11], x[7], x[15] */
+ TRANS(x[3],x[7],x[11],x[15],wTB[3],wTB[7],delta16[3],delta16_3[3]);
+
+}
+
+
+void fft_64p_c (complex_t *a)
+{
+ fft_8(&a[0]); fft_4(&a[8]); fft_4(&a[12]);
+ fft_asmb16(&a[0], &a[8]);
+
+ fft_8(&a[16]), fft_8(&a[24]);
+ fft_asmb(4, &a[0], &a[16],&delta32[0], &delta32_3[0]);
+
+ fft_8(&a[32]); fft_4(&a[40]); fft_4(&a[44]);
+ fft_asmb16(&a[32], &a[40]);
+
+ fft_8(&a[48]); fft_4(&a[56]); fft_4(&a[60]);
+ fft_asmb16(&a[48], &a[56]);
+
+ fft_asmb(8, &a[0], &a[32],&delta64[0], &delta64_3[0]);
+}
+
+
+void fft_128p_c (complex_t *a)
+{
+ fft_8(&a[0]); fft_4(&a[8]); fft_4(&a[12]);
+ fft_asmb16(&a[0], &a[8]);
+
+ fft_8(&a[16]), fft_8(&a[24]);
+ fft_asmb(4, &a[0], &a[16],&delta32[0], &delta32_3[0]);
+
+ fft_8(&a[32]); fft_4(&a[40]); fft_4(&a[44]);
+ fft_asmb16(&a[32], &a[40]);
+
+ fft_8(&a[48]); fft_4(&a[56]); fft_4(&a[60]);
+ fft_asmb16(&a[48], &a[56]);
+
+ fft_asmb(8, &a[0], &a[32],&delta64[0], &delta64_3[0]);
+
+ fft_8(&a[64]); fft_4(&a[72]); fft_4(&a[76]);
+ /* fft_16(&a[64]); */
+ fft_asmb16(&a[64], &a[72]);
+
+ fft_8(&a[80]); fft_8(&a[88]);
+
+ /* fft_32(&a[64]); */
+ fft_asmb(4, &a[64], &a[80],&delta32[0], &delta32_3[0]);
+
+ fft_8(&a[96]); fft_4(&a[104]), fft_4(&a[108]);
+ /* fft_16(&a[96]); */
+ fft_asmb16(&a[96], &a[104]);
+
+ fft_8(&a[112]), fft_8(&a[120]);
+ /* fft_32(&a[96]); */
+ fft_asmb(4, &a[96], &a[112], &delta32[0], &delta32_3[0]);
+
+ /* fft_128(&a[0]); */
+ fft_asmb(16, &a[0], &a[64], &delta128[0], &delta128_3[0]);
+}
diff --git a/ac3dec/srfft.h b/ac3dec/srfft.h
new file mode 100644
index 0000000..7916092
--- /dev/null
+++ b/ac3dec/srfft.h
@@ -0,0 +1,39 @@
+/*
+ * srfft.h
+ *
+ * Copyright (C) Yuqing Deng <Yuqing_Deng@brown.edu> - April 2000
+ *
+ * 64 and 128 point split radix fft for ac3dec
+ *
+ * The algorithm is desribed in the book:
+ * "Computational Frameworks of the Fast Fourier Transform".
+ *
+ * The ideas and the the organization of code borrowed from djbfft written by
+ * D. J. Bernstein <djb@cr.py.to>. djbff can be found at
+ * http://cr.yp.to/djbfft.html.
+ *
+ * srfft.h is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * srfft.h is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Make; see the file COPYING. If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#ifndef SRFFT_H__
+#define SRFFT_H__
+
+#include "cmplx.h"
+
+void fft_64p_c (complex_t *x);
+void fft_128p_c (complex_t *x);
+
+#endif /* SRFFT_H__ */
diff --git a/ac3dec/srfft_kni.S b/ac3dec/srfft_kni.S
new file mode 100644
index 0000000..a42a41b
--- /dev/null
+++ b/ac3dec/srfft_kni.S
@@ -0,0 +1,289 @@
+/*
+ * srfft_kni.S
+ *
+ * Copyright (C) Yuqing Deng <Yuqing_Deng@brown.edu> - October 2000
+ *
+ *
+ * srfft_kni.S is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * srfft_kni.S is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Make; see the file COPYING. If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#ifdef __i386__
+
+.section .rodata
+ .align 16
+hsqrt2: .float 0f0.707106781188
+ .float 0f0.707106781188
+ .float 0f-0.707106781188
+ .float 0f-0.707106781188
+C_1: .float 0f-1.0
+ .float 0f1.0
+ .float 0f-1.0
+ .float 0f1.0
+
+.text
+ .align 4
+.global fft_4_kni
+ .type fft_4_kni, @function
+fft_4_kni:
+ pushl %ebp
+ movl %esp, %ebp
+ movl 8(%ebp), %eax /* complex_t * */
+
+ movaps (%eax), %xmm0 /* x[1] | x[0] */
+ movaps 16(%eax), %xmm2 /* x[3] | x[2] */
+ movaps %xmm0, %xmm1 /* x[1] | x[0] */
+ addps %xmm2, %xmm0 /* x[1] + x[3] | x[0] + x[2] */
+ subps %xmm2, %xmm1 /* x[1] - x[3] | x[0] - x[2] */
+ xorps %xmm6, %xmm6
+ movhlps %xmm1, %xmm4 /* x[1] - x[3] */
+ movhlps %xmm0, %xmm3 /* x[1] + x[3] */
+ subss %xmm4, %xmm6 /* -(x[1] - x[3]).re */
+ movlhps %xmm1, %xmm0 /* x[0] - x[2] | x[0] + x[2] */
+ movss %xmm6, %xmm4 /* (x[1] - x[3]).im | (x[3]-x[1]).re */
+ movaps %xmm0, %xmm2 /* x[0] - x[2] | x[0] + x[2] */
+ shufps $0x14, %xmm4, %xmm3 /* -i*(x[2] - x[3] | x[2] + x[3] */
+ addps %xmm3, %xmm0
+ subps %xmm3, %xmm2
+ movaps %xmm0, (%eax)
+ movaps %xmm2, 16(%eax)
+
+ leave
+ ret
+
+
+ .align 4
+.global fft_8_kni
+ .type fft_8_kni, @function
+fft_8_kni:
+ pushl %ebp
+ movl %esp, %ebp
+ movl 8(%ebp), %eax /* complext_t */
+
+ pushl %ebx
+ movlps (%eax), %xmm0 /* x[0] */
+ movlps 32(%eax), %xmm1 /* x[4] */
+ movhps 16(%eax), %xmm0 /* x[2] | x[0] */
+ movhps 48(%eax), %xmm1 /* x[6] | x[4] */
+ movaps %xmm0, %xmm2 /* x[2] | x[0] */
+ xorps %xmm3, %xmm3
+ addps %xmm1, %xmm0 /* x[2] + x[6] | x[0] + x[4] */
+ subps %xmm1, %xmm2 /* x[2] - x[6] | x[0] - x[4] */
+ movhlps %xmm0, %xmm5 /* x[2] + x[6] */
+ movhlps %xmm2, %xmm4
+ movlhps %xmm2, %xmm0 /* x[0] - x[4] | x[0] + x[4] */
+ subss %xmm4, %xmm3 /* -(x[2]-x[6]).re */
+ movaps %xmm0, %xmm7 /* x[0] - x[4] | x[0] + x[4] */
+ movss %xmm3, %xmm4 /* (x[2]-x[6]).im | -(x[2]-x[6]).re */
+ movlps 8(%eax), %xmm1 /* x[1] */
+ shufps $0x14, %xmm4, %xmm5 /* -i*(x[2] - x[6]) | x[2] + x[6] */
+
+ addps %xmm5, %xmm0 /* yt */
+ subps %xmm5, %xmm7 /* yb */
+
+ movhps 24(%eax), %xmm1 /* x[3] | x[1] */
+ movl $hsqrt2, %ebx
+ movlps 40(%eax), %xmm2 /* x[5] */
+ movhps 56(%eax), %xmm2 /* /x[7] | x[5] */
+ movaps %xmm1, %xmm3 /* x[3] | x[1] */
+ addps %xmm2, %xmm1 /* x[3] + x[7] | x[1] + x[5] */
+ subps %xmm2, %xmm3 /* x[3] - x[7] | x[1] - x[5] */
+ movaps (%ebx), %xmm4 /* -1/sqrt2 | -1/sqrt2 | 1/sqrt2 | 1/sqrt2 */
+ movaps %xmm3, %xmm6 /* x[3] - x[7] | x[1] - x[5] */
+ mulps %xmm4, %xmm3
+ shufps $0xc8, %xmm4, %xmm4 /* -1/sqrt2 | 1/sqrt2 | -1/sqrt2 | 1/sqrt2 */
+ shufps $0xb1, %xmm6, %xmm6
+ mulps %xmm4, %xmm6
+ addps %xmm3, %xmm6 /* (-1-i)/sqrt2 * (x[3]-x[7]) | (1-i)/sqrt2 * (x[1] - x[5] */
+ movhlps %xmm1, %xmm5 /* x[3] + x[7] */
+ movlhps %xmm6, %xmm1 /* (1+i)/sqrt2 * (x[1]-x[5]) | x[1]+x[5] */
+ shufps $0xe4, %xmm6, %xmm5 /* (-1-i)/sqrt2 * (x[3]-x[7]) | x[3]+x[7] */
+ movaps %xmm1, %xmm3 /* (1-i)/sqrt2 * (x[1]-x[5]) | x[1]+x[5] */
+ movl $C_1, %ebx
+ addps %xmm5, %xmm1 /* u */
+ subps %xmm5, %xmm3 /* v */
+ movaps %xmm0, %xmm2 /* yb */
+ movaps %xmm7, %xmm4 /* yt */
+ movaps (%ebx), %xmm5
+ mulps %xmm5, %xmm3
+ addps %xmm1, %xmm0 /* yt + u */
+ subps %xmm1, %xmm2 /* yt - u */
+ shufps $0xb1, %xmm3, %xmm3 /* -i * v */
+ movaps %xmm0, (%eax)
+ movaps %xmm2, 32(%eax)
+ addps %xmm3, %xmm4 /* yb - i*v */
+ subps %xmm3, %xmm7 /* yb + i*v */
+ movaps %xmm4, 16(%eax)
+ movaps %xmm7, 48(%eax)
+
+ popl %ebx
+ leave
+ ret
+
+ .align 4
+.global fft_asmb_kni
+ .type fft_asmb, @function
+fft_asmb_kni:
+ pushl %ebp
+ movl %esp, %ebp
+
+ subl $4, %esp
+
+ pushl %eax
+ pushl %ebx
+ pushl %ecx
+ pushl %edx
+ pushl %esi
+ pushl %edi
+
+ movl 8(%ebp), %ecx /* k */
+ movl 12(%ebp), %eax /* x */
+ movl %ecx, -4(%ebp) /* k */
+ movl 16(%ebp), %ebx /* wT */
+ movl 20(%ebp), %edx /* d */
+ movl 24(%ebp), %esi /* d3 */
+ shll $4, %ecx /* 16k */
+ addl $8, %edx
+ leal (%eax, %ecx, 2), %edi
+ addl $8, %esi
+
+ /* TRANSZERO and TRANS */
+ movaps (%eax), %xmm0 /* x[1] | x[0] */
+ movaps (%ebx), %xmm1 /* wT[1] | wT[0] */
+ movaps (%ebx, %ecx), %xmm2 /* wB[1] | wB[0] */
+ movlps (%edx), %xmm3 /* d */
+ movlps (%esi), %xmm4 /* d3 */
+ movhlps %xmm1, %xmm5 /* wT[1] */
+ movhlps %xmm2, %xmm6 /* wB[1] */
+ shufps $0x50, %xmm3, %xmm3 /* d[1].im | d[1].im | d[1].re | d[1].re */
+ shufps $0x50, %xmm4, %xmm4 /* d3[1].im | d3[1].im | d3[i].re | d3[i].re */
+ movlhps %xmm5, %xmm5 /* wT[1] | wT[1] */
+ movlhps %xmm6, %xmm6 /* wB[1] | wB[1] */
+ mulps %xmm3, %xmm5
+ mulps %xmm4, %xmm6
+ movhlps %xmm5, %xmm7 /* wT[1].im * d[1].im | wT[1].re * d[1].im */
+ movlhps %xmm6, %xmm5 /* wB[1].im * d3[1].re | wB[1].re * d3[1].re | wT[1].im * d[1].re | wT[1].re * d[1].re */
+ shufps $0xb1, %xmm6, %xmm7 /* wB[1].re * d3[1].im | wB[i].im * d3[1].im | wT[1].re * d[1].im | wT[1].im * d[1].im */
+ movl $C_1, %edi
+ movaps (%edi), %xmm4
+ mulps %xmm4, %xmm7
+ addps %xmm7, %xmm5 /* wB[1] * d3[1] | wT[1] * d[1] */
+ movlhps %xmm5, %xmm1 /* d[1] * wT[1] | wT[0] */
+ shufps $0xe4, %xmm5, %xmm2 /* d3[1] * wB[1] | wB[0] */
+ movaps %xmm1, %xmm3 /* d[1] * wT[1] | wT[0] */
+ leal (%eax, %ecx, 2), %edi
+ addps %xmm2, %xmm1 /* u */
+ subps %xmm2, %xmm3 /* v */
+ mulps %xmm4, %xmm3
+ movaps (%eax, %ecx), %xmm5 /* xk[1] | xk[0] */
+ shufps $0xb1, %xmm3, %xmm3 /* -i * v */
+ movaps %xmm0, %xmm2 /* x[1] | x[0] */
+ movaps %xmm5, %xmm6 /* xk[1] | xk[0] */
+ addps %xmm1, %xmm0
+ subps %xmm1, %xmm2
+ addps %xmm3, %xmm5
+ subps %xmm3, %xmm6
+ movaps %xmm0, (%eax)
+ movaps %xmm2, (%edi)
+ movaps %xmm5, (%eax, %ecx)
+ movaps %xmm6, (%edi, %ecx)
+ addl $16, %eax
+ addl $16, %ebx
+ addl $8, %edx
+ addl $8, %esi
+ decl -4(%ebp)
+
+.loop:
+ movaps (%ebx), %xmm0 /* wT[1] | wT[0] */
+ movaps (%edx), %xmm1 /* d[1] | d[0] */
+
+ movaps (%ebx, %ecx), %xmm4 /* wB[1] | wB[0] */
+ movaps (%esi), %xmm5 /* d3[1] | d3[0] */
+
+ movhlps %xmm0, %xmm2 /* wT[1] */
+ movhlps %xmm1, %xmm3 /* d[1] */
+
+ movhlps %xmm4, %xmm6 /* wB[1] */
+ movhlps %xmm5, %xmm7 /* d3[1] */
+
+ shufps $0x50, %xmm1, %xmm1 /* d[0].im | d[0].im | d[0].re | d[0].re */
+ shufps $0x50, %xmm3, %xmm3 /* d[1].im | d[1].im | d[1].re | d[1].re */
+
+ movlhps %xmm0, %xmm0 /* wT[0] | wT[0] */
+ shufps $0x50, %xmm5, %xmm5 /* d3[0].im | d3[0].im | d3[0].re | d3[0].re */
+ movlhps %xmm2, %xmm2 /* wT[1] | wT[1] */
+ shufps $0x50, %xmm7, %xmm7 /* d3[1].im | d3[1].im | d3[1].re | d3[1].re */
+
+ mulps %xmm1, %xmm0 /* d[0].im * wT[0].im | d[0].im * wT[0].re | d[0].re * wT[0].im | d[0].re * wT[0].re */
+ mulps %xmm3, %xmm2 /* d[1].im * wT[1].im | d[1].im * wT[1].re | d[1].re * wT[1].im | d[1].re * wT[1].re */
+ movlhps %xmm4, %xmm4 /* wB[0] | wB[0] */
+ movlhps %xmm6, %xmm6 /* wB[1] | wB[1] */
+
+ movhlps %xmm0, %xmm1 /* d[0].im * wT[0].im | d[0].im * wT[0].re */
+ movlhps %xmm2, %xmm0 /* d[1].re * wT[1].im | d[1].re * wT[1].re | d[0].re * wT[0].im | d[0].re * wT[0].re */
+ mulps %xmm5, %xmm4 /* wB[0].im * d3[0].im | wB[0].re * d3[0].im | wB[0].im * d3[0].re | wB[0].re * d3[0].re */
+ mulps %xmm7, %xmm6 /* wB[1].im * d3[1].im | wB[1].re * d3[1].im | wB[1].im * d3[1].re | wB[1].re * d3[1].re */
+ shufps $0xb1, %xmm2, %xmm1 /* d[1].im * wT[1].re | d[1].im * wT[1].im | d[0].im * wT[0].re | d[0].im * wT[0].im */
+ movl $C_1, %edi
+ movaps (%edi), %xmm3 /* 1.0 | -1.0 | 1.0 | -1.0 */
+
+ movhlps %xmm4, %xmm5 /* wB[0].im * d3[0].im | wB[0].re * d3[0].im */
+ mulps %xmm3, %xmm1 /* d[1].im * wT[1].re | -d[1].im * wT[1].im | d[0].im * wT[0].re | -d[0].im * wT[0].im */
+ movlhps %xmm6, %xmm4 /* wB[1].im * d3[1].re | wB[1].re * d3[1].re | wB[0].im * d3[0].re | wB[0].im * d3[0].re */
+ addps %xmm1, %xmm0 /* wT[1] * d[1] | wT[0] * d[0] */
+
+ shufps $0xb1, %xmm6, %xmm5 /* wB[1].re * d3[1].im | wB[1].im * d3[1].im | wB[0].re * d3[0].im | wB[0].im * d3[0].im */
+ mulps %xmm3, %xmm5 /* wB[1].re * d3[1].im | -wB[1].im * d3[1].im | wB[0].re * d3[0].im | -wB[0].im * d3[0].im */
+ addps %xmm5, %xmm4 /* wB[1] * d3[1] | wB[0] * d3[0] */
+
+ movaps %xmm0, %xmm1 /* wT[1] * d[1] | wT[0] * d[0] */
+ addps %xmm4, %xmm0 /* u */
+ subps %xmm4, %xmm1 /* v */
+ movaps (%eax), %xmm6 /* x[1] | x[0] */
+ leal (%eax, %ecx, 2), %edi
+ mulps %xmm3, %xmm1
+ addl $16, %ebx
+ addl $16, %esi
+ shufps $0xb1, %xmm1, %xmm1 /* -i * v */
+ movaps (%eax, %ecx), %xmm7 /* xk[1] | xk[0] */
+ movaps %xmm6, %xmm2
+ movaps %xmm7, %xmm4
+ addps %xmm0, %xmm6
+ subps %xmm0, %xmm2
+ movaps %xmm6, (%eax)
+ movaps %xmm2, (%edi)
+ addps %xmm1, %xmm7
+ subps %xmm1, %xmm4
+ addl $16, %edx
+ movaps %xmm7, (%eax, %ecx)
+ movaps %xmm4, (%edi, %ecx)
+
+ addl $16, %eax
+ decl -4(%ebp)
+ jnz .loop
+
+.end:
+ popl %edi
+ popl %esi
+ popl %edx
+ popl %ecx
+ popl %ebx
+ popl %eax
+
+ addl $4, %esp
+
+ leave
+ ret
+#endif
diff --git a/ac3dec/srfft_kni.h b/ac3dec/srfft_kni.h
new file mode 100644
index 0000000..6dc468e
--- /dev/null
+++ b/ac3dec/srfft_kni.h
@@ -0,0 +1,30 @@
+/*****
+*
+* This file is part of the OMS program.
+*
+* This program is free software; you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation; either version 2, or (at your option)
+* any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License
+* along with this program; see the file COPYING. If not, write to
+* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+*
+*****/
+
+#ifndef __SRFF_KNI__
+#define __SRFF_KNI__
+
+#include "cmplx.h"
+
+void fft_4_kni (complex_t *a);
+void fft_8_kni (complex_t *a);
+void fft_asmb_kni (int, complex_t*, complex_t *, complex_t *, complex_t*);
+
+#endif
diff --git a/ac3dec/srfft_kni_c.c b/ac3dec/srfft_kni_c.c
new file mode 100644
index 0000000..d461110
--- /dev/null
+++ b/ac3dec/srfft_kni_c.c
@@ -0,0 +1,93 @@
+/*
+ * srfft_kni.c
+ *
+ * Copyright (C) Yuqing Deng <Yuqing_Deng@brown.edu> - April 2000
+ *
+ * 64 and 128 point split radix fft for ac3dec
+ *
+ * The algorithm is desribed in the book:
+ * "Computational Frameworks of the Fast Fourier Transform".
+ *
+ * The ideas and the the organization of code borrowed from djbfft written by
+ * D. J. Bernstein <djb@cr.py.to>. djbff can be found at
+ * http://cr.yp.to/djbfft.html.
+ *
+ * srfft.c is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * srfft.c is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Make; see the file COPYING. If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#ifdef __i386__
+
+#include <stdio.h>
+
+#include "srfft_kni.h"
+#include "srfftp.h"
+
+void fft_64p_kni(complex_t *a)
+{
+ fft_8_kni(&a[0]); fft_4_kni(&a[8]); fft_4_kni(&a[12]);
+ fft_asmb_kni(2, &a[0], &a[8], &delta16[0], &delta16_3[0]);
+
+ fft_8_kni(&a[16]), fft_8_kni(&a[24]);
+ fft_asmb_kni(4, &a[0], &a[16],&delta32[0], &delta32_3[0]);
+
+ fft_8_kni(&a[32]); fft_4_kni(&a[40]); fft_4_kni(&a[44]);
+ fft_asmb_kni(2, &a[32], &a[40], &delta16[0], &delta16_3[0]);
+
+ fft_8_kni(&a[48]); fft_4_kni(&a[56]); fft_4_kni(&a[60]);
+ fft_asmb_kni(2, &a[48], &a[56], &delta16[0], &delta16_3[0]);
+
+ fft_asmb_kni(8, &a[0], &a[32],&delta64[0], &delta64_3[0]);
+}
+
+
+void fft_128p_kni(complex_t *a)
+{
+ fft_8_kni(&a[0]); fft_4_kni(&a[8]); fft_4_kni(&a[12]);
+ fft_asmb_kni(2, &a[0], &a[8], &delta16[0], &delta16_3[0]);
+
+ fft_8_kni(&a[16]), fft_8_kni(&a[24]);
+ fft_asmb_kni(4, &a[0], &a[16],&delta32[0], &delta32_3[0]);
+
+ fft_8_kni(&a[32]); fft_4_kni(&a[40]); fft_4_kni(&a[44]);
+ fft_asmb_kni(2, &a[32], &a[40], &delta16[0], &delta16_3[0]);
+
+ fft_8_kni(&a[48]); fft_4_kni(&a[56]); fft_4_kni(&a[60]);
+ fft_asmb_kni(2, &a[48], &a[56], &delta16[0], &delta16_3[0]);
+
+ fft_asmb_kni(8, &a[0], &a[32],&delta64[0], &delta64_3[0]);
+
+ fft_8_kni(&a[64]); fft_4_kni(&a[72]); fft_4_kni(&a[76]);
+ /* fft_16(&a[64]); */
+ fft_asmb_kni(2, &a[64], &a[72], &delta16[0], &delta16_3[0]);
+
+ fft_8_kni(&a[80]); fft_8_kni(&a[88]);
+
+ /* fft_32(&a[64]); */
+ fft_asmb_kni(4, &a[64], &a[80],&delta32[0], &delta32_3[0]);
+
+ fft_8_kni(&a[96]); fft_4_kni(&a[104]), fft_4_kni(&a[108]);
+ /* fft_16(&a[96]); */
+ fft_asmb_kni(2, &a[96], &a[104], &delta16[0], &delta16_3[0]);
+
+ fft_8_kni(&a[112]), fft_8_kni(&a[120]);
+ /* fft_32(&a[96]); */
+ fft_asmb_kni(4, &a[96], &a[112], &delta32[0], &delta32_3[0]);
+
+ /* fft_128(&a[0]); */
+ fft_asmb_kni(16, &a[0], &a[64], &delta128[0], &delta128_3[0]);
+}
+
+#endif
diff --git a/ac3dec/srfftp.h b/ac3dec/srfftp.h
new file mode 100644
index 0000000..6f44715
--- /dev/null
+++ b/ac3dec/srfftp.h
@@ -0,0 +1,305 @@
+
+/*
+ * srfftp.h
+ *
+ * Copyright (C) Yuqing Deng <Yuqing_Deng@brown.edu> - April 2000
+ *
+ * 64 and 128 point split radix fft for ac3dec
+ *
+ * The algorithm is desribed in the book:
+ * "Computational Frameworks of the Fast Fourier Transform".
+ *
+ * The ideas and the the organization of code borrowed from djbfft written by
+ * D. J. Bernstein <djb@cr.py.to>. djbff can be found at
+ * http://cr.yp.to/djbfft.html.
+ *
+ * srfftp.h is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ *
+ * srfftp.h is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GNU Make; see the file COPYING. If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ */
+
+#ifndef SRFFTP_H__
+#define SRFFTP_H__
+
+#include "cmplx.h"
+
+static complex_t delta16[4] =
+ { {1.00000000000000, 0.00000000000000},
+ {0.92387953251129, -0.38268343236509},
+ {0.70710678118655, -0.70710678118655},
+ {0.38268343236509, -0.92387953251129}};
+
+static complex_t delta16_3[4] =
+ { {1.00000000000000, 0.00000000000000},
+ {0.38268343236509, -0.92387953251129},
+ {-0.70710678118655, -0.70710678118655},
+ {-0.92387953251129, 0.38268343236509}};
+
+static complex_t delta32[8] =
+ { {1.00000000000000, 0.00000000000000},
+ {0.98078528040323, -0.19509032201613},
+ {0.92387953251129, -0.38268343236509},
+ {0.83146961230255, -0.55557023301960},
+ {0.70710678118655, -0.70710678118655},
+ {0.55557023301960, -0.83146961230255},
+ {0.38268343236509, -0.92387953251129},
+ {0.19509032201613, -0.98078528040323}};
+
+static complex_t delta32_3[8] =
+ { {1.00000000000000, 0.00000000000000},
+ {0.83146961230255, -0.55557023301960},
+ {0.38268343236509, -0.92387953251129},
+ {-0.19509032201613, -0.98078528040323},
+ {-0.70710678118655, -0.70710678118655},
+ {-0.98078528040323, -0.19509032201613},
+ {-0.92387953251129, 0.38268343236509},
+ {-0.55557023301960, 0.83146961230255}};
+
+static complex_t delta64[16] =
+ { {1.00000000000000, 0.00000000000000},
+ {0.99518472667220, -0.09801714032956},
+ {0.98078528040323, -0.19509032201613},
+ {0.95694033573221, -0.29028467725446},
+ {0.92387953251129, -0.38268343236509},
+ {0.88192126434836, -0.47139673682600},
+ {0.83146961230255, -0.55557023301960},
+ {0.77301045336274, -0.63439328416365},
+ {0.70710678118655, -0.70710678118655},
+ {0.63439328416365, -0.77301045336274},
+ {0.55557023301960, -0.83146961230255},
+ {0.47139673682600, -0.88192126434835},
+ {0.38268343236509, -0.92387953251129},
+ {0.29028467725446, -0.95694033573221},
+ {0.19509032201613, -0.98078528040323},
+ {0.09801714032956, -0.99518472667220}};
+
+static complex_t delta64_3[16] =
+ { {1.00000000000000, 0.00000000000000},
+ {0.95694033573221, -0.29028467725446},
+ {0.83146961230255, -0.55557023301960},
+ {0.63439328416365, -0.77301045336274},
+ {0.38268343236509, -0.92387953251129},
+ {0.09801714032956, -0.99518472667220},
+ {-0.19509032201613, -0.98078528040323},
+ {-0.47139673682600, -0.88192126434836},
+ {-0.70710678118655, -0.70710678118655},
+ {-0.88192126434835, -0.47139673682600},
+ {-0.98078528040323, -0.19509032201613},
+ {-0.99518472667220, 0.09801714032956},
+ {-0.92387953251129, 0.38268343236509},
+ {-0.77301045336274, 0.63439328416365},
+ {-0.55557023301960, 0.83146961230255},
+ {-0.29028467725446, 0.95694033573221}};
+
+static complex_t delta128[32] =
+ { {1.00000000000000, 0.00000000000000},
+ {0.99879545620517, -0.04906767432742},
+ {0.99518472667220, -0.09801714032956},
+ {0.98917650996478, -0.14673047445536},
+ {0.98078528040323, -0.19509032201613},
+ {0.97003125319454, -0.24298017990326},
+ {0.95694033573221, -0.29028467725446},
+ {0.94154406518302, -0.33688985339222},
+ {0.92387953251129, -0.38268343236509},
+ {0.90398929312344, -0.42755509343028},
+ {0.88192126434836, -0.47139673682600},
+ {0.85772861000027, -0.51410274419322},
+ {0.83146961230255, -0.55557023301960},
+ {0.80320753148064, -0.59569930449243},
+ {0.77301045336274, -0.63439328416365},
+ {0.74095112535496, -0.67155895484702},
+ {0.70710678118655, -0.70710678118655},
+ {0.67155895484702, -0.74095112535496},
+ {0.63439328416365, -0.77301045336274},
+ {0.59569930449243, -0.80320753148064},
+ {0.55557023301960, -0.83146961230255},
+ {0.51410274419322, -0.85772861000027},
+ {0.47139673682600, -0.88192126434835},
+ {0.42755509343028, -0.90398929312344},
+ {0.38268343236509, -0.92387953251129},
+ {0.33688985339222, -0.94154406518302},
+ {0.29028467725446, -0.95694033573221},
+ {0.24298017990326, -0.97003125319454},
+ {0.19509032201613, -0.98078528040323},
+ {0.14673047445536, -0.98917650996478},
+ {0.09801714032956, -0.99518472667220},
+ {0.04906767432742, -0.99879545620517}};
+
+static complex_t delta128_3[32] =
+ { {1.00000000000000, 0.00000000000000},
+ {0.98917650996478, -0.14673047445536},
+ {0.95694033573221, -0.29028467725446},
+ {0.90398929312344, -0.42755509343028},
+ {0.83146961230255, -0.55557023301960},
+ {0.74095112535496, -0.67155895484702},
+ {0.63439328416365, -0.77301045336274},
+ {0.51410274419322, -0.85772861000027},
+ {0.38268343236509, -0.92387953251129},
+ {0.24298017990326, -0.97003125319454},
+ {0.09801714032956, -0.99518472667220},
+ {-0.04906767432742, -0.99879545620517},
+ {-0.19509032201613, -0.98078528040323},
+ {-0.33688985339222, -0.94154406518302},
+ {-0.47139673682600, -0.88192126434836},
+ {-0.59569930449243, -0.80320753148065},
+ {-0.70710678118655, -0.70710678118655},
+ {-0.80320753148065, -0.59569930449243},
+ {-0.88192126434835, -0.47139673682600},
+ {-0.94154406518302, -0.33688985339222},
+ {-0.98078528040323, -0.19509032201613},
+ {-0.99879545620517, -0.04906767432742},
+ {-0.99518472667220, 0.09801714032956},
+ {-0.97003125319454, 0.24298017990326},
+ {-0.92387953251129, 0.38268343236509},
+ {-0.85772861000027, 0.51410274419322},
+ {-0.77301045336274, 0.63439328416365},
+ {-0.67155895484702, 0.74095112535496},
+ {-0.55557023301960, 0.83146961230255},
+ {-0.42755509343028, 0.90398929312344},
+ {-0.29028467725446, 0.95694033573221},
+ {-0.14673047445536, 0.98917650996478}};
+
+#define HSQRT2 0.707106781188;
+
+#define TRANSZERO(A0,A4,A8,A12) { \
+ u_r = wTB[0].re; \
+ v_i = u_r - wTB[k*2].re; \
+ u_r += wTB[k*2].re; \
+ u_i = wTB[0].im; \
+ v_r = wTB[k*2].im - u_i; \
+ u_i += wTB[k*2].im; \
+ a_r = A0.re; \
+ a_i = A0.im; \
+ a1_r = a_r; \
+ a1_r += u_r; \
+ A0.re = a1_r; \
+ a_r -= u_r; \
+ A8.re = a_r; \
+ a1_i = a_i; \
+ a1_i += u_i; \
+ A0.im = a1_i; \
+ a_i -= u_i; \
+ A8.im = a_i; \
+ a1_r = A4.re; \
+ a1_i = A4.im; \
+ a_r = a1_r; \
+ a_r -= v_r; \
+ A4.re = a_r; \
+ a1_r += v_r; \
+ A12.re = a1_r; \
+ a_i = a1_i; \
+ a_i -= v_i; \
+ A4.im = a_i; \
+ a1_i += v_i; \
+ A12.im = a1_i; \
+ }
+
+#define TRANSHALF_16(A2,A6,A10,A14) {\
+ u_r = wTB[2].re; \
+ a_r = u_r; \
+ u_i = wTB[2].im; \
+ u_r += u_i; \
+ u_i -= a_r; \
+ a_r = wTB[6].re; \
+ a1_r = a_r; \
+ a_i = wTB[6].im; \
+ a_r = a_i - a_r; \
+ a_i += a1_r; \
+ v_i = u_r - a_r; \
+ u_r += a_r; \
+ v_r = u_i + a_i; \
+ u_i -= a_i; \
+ v_i *= HSQRT2; \
+ v_r *= HSQRT2; \
+ u_r *= HSQRT2; \
+ u_i *= HSQRT2; \
+ a_r = A2.re; \
+ a_i = A2.im; \
+ a1_r = a_r; \
+ a1_r += u_r; \
+ A2.re = a1_r; \
+ a_r -= u_r; \
+ A10.re = a_r; \
+ a1_i = a_i; \
+ a1_i += u_i; \
+ A2.im = a1_i; \
+ a_i -= u_i; \
+ A10.im = a_i; \
+ a1_r = A6.re; \
+ a1_i = A6.im; \
+ a_r = a1_r; \
+ a1_r += v_r; \
+ A6.re = a1_r; \
+ a_r -= v_r; \
+ A14.re = a_r; \
+ a_i = a1_i; \
+ a1_i -= v_i; \
+ A6.im = a1_i; \
+ a_i += v_i; \
+ A14.im = a_i; \
+ }
+
+#define TRANS(A1,A5,A9,A13,WT,WB,D,D3) { \
+ u_r = WT.re; \
+ a_r = u_r; \
+ a_r *= D.im; \
+ u_r *= D.re; \
+ a_i = WT.im; \
+ a1_i = a_i; \
+ a1_i *= D.re; \
+ a_i *= D.im; \
+ u_r -= a_i; \
+ u_i = a_r; \
+ u_i += a1_i; \
+ a_r = WB.re; \
+ a1_r = a_r; \
+ a1_r *= D3.re; \
+ a_r *= D3.im; \
+ a_i = WB.im; \
+ a1_i = a_i; \
+ a_i *= D3.re; \
+ a1_i *= D3.im; \
+ a1_r -= a1_i; \
+ a_r += a_i; \
+ v_i = u_r - a1_r; \
+ u_r += a1_r; \
+ v_r = a_r - u_i; \
+ u_i += a_r; \
+ a_r = A1.re; \
+ a_i = A1.im; \
+ a1_r = a_r; \
+ a1_r += u_r; \
+ A1.re = a1_r; \
+ a_r -= u_r; \
+ A9.re = a_r; \
+ a1_i = a_i; \
+ a1_i += u_i; \
+ A1.im = a1_i; \
+ a_i -= u_i; \
+ A9.im = a_i; \
+ a1_r = A5.re; \
+ a1_i = A5.im; \
+ a_r = a1_r; \
+ a1_r -= v_r; \
+ A5.re = a1_r; \
+ a_r += v_r; \
+ A13.re = a_r; \
+ a_i = a1_i; \
+ a1_i -= v_i; \
+ A5.im = a1_i; \
+ a_i += v_i; \
+ A13.im = a_i; \
+ }
+
+#endif
diff --git a/ac3dec/stats.c b/ac3dec/stats.c
index cfeb453..4bdad09 100644
--- a/ac3dec/stats.c
+++ b/ac3dec/stats.c
@@ -28,16 +28,16 @@
#include "ac3_internal.h"
-#include "decode.h"
#include "stats.h"
#include "debug.h"
-
+#if !defined (__GNUC__) || defined (DEBUG)
static const char *service_ids[8] =
{
"CM","ME","VI","HI",
"D", "C","E", "VO"
};
+#endif
struct mixlev_s
{
@@ -77,9 +77,10 @@ static const char *language[128] =
"Bengali", "Belorussian", "Bambora", "Azerbijani", "Assamese", "Armenian", "Arabic", "Amharic"
};
+
void stats_print_banner(syncinfo_t *syncinfo,bsi_t *bsi)
{
- fprintf(stdout,"ac3dec-0.6.2-cvs (C) 2000 Aaron Holtzman (aholtzma@ess.engr.uvic.ca)\n");
+ // fprintf(stdout,PACKAGE"-"VERSION" (C) 2000 Aaron Holtzman (aholtzma@ess.engr.uvic.ca)\n");
fprintf(stdout,"%d.%d Mode ",bsi->nfchans,bsi->lfeon);
fprintf(stdout,"%2.1f KHz",syncinfo->sampling_rate * 1e-3);
@@ -87,8 +88,7 @@ void stats_print_banner(syncinfo_t *syncinfo,bsi_t *bsi)
if (bsi->langcode && (bsi->langcod < 128))
fprintf(stdout,"%s ", language[bsi->langcod]);
- switch(bsi->bsmod)
- {
+ switch(bsi->bsmod) {
case 0:
fprintf(stdout,"Complete Main Audio Service");
break;
@@ -116,12 +116,12 @@ void stats_print_banner(syncinfo_t *syncinfo,bsi_t *bsi)
fprintf(stdout,"\n");
}
-void stats_print_syncinfo(syncinfo_t *syncinfo)
+
+void stats_print_syncinfo (syncinfo_t *syncinfo)
{
dprintf("(syncinfo) ");
- switch (syncinfo->fscod)
- {
+ switch (syncinfo->fscod) {
case 2:
dprintf("32 KHz ");
break;
@@ -140,9 +140,9 @@ void stats_print_syncinfo(syncinfo_t *syncinfo)
syncinfo->frame_size);
}
+
-void stats_print_bsi(bsi_t *bsi)
-{
+void stats_print_bsi(bsi_t *bsi) {
dprintf("(bsi) ");
dprintf("%s",service_ids[bsi->bsmod]);
dprintf(" %d.%d Mode ",bsi->nfchans,bsi->lfeon);
@@ -154,11 +154,11 @@ void stats_print_bsi(bsi_t *bsi)
}
+
char *exp_strat_tbl[4] = {"R ","D15 ","D25 ","D45 "};
-void stats_print_audblk(bsi_t *bsi,audblk_t *audblk)
-{
- uint_32 i;
+void stats_print_audblk(bsi_t *bsi,audblk_t *audblk) {
+ uint32_t i;
dprintf("(audblk) ");
dprintf("%s ",audblk->cplinu ? "cpl on " : "cpl off");