diff options
Diffstat (limited to 'src/libffmpeg/libavcodec/dsputil.h')
-rw-r--r-- | src/libffmpeg/libavcodec/dsputil.h | 134 |
1 files changed, 126 insertions, 8 deletions
diff --git a/src/libffmpeg/libavcodec/dsputil.h b/src/libffmpeg/libavcodec/dsputil.h index df7830564..de3c1d564 100644 --- a/src/libffmpeg/libavcodec/dsputil.h +++ b/src/libffmpeg/libavcodec/dsputil.h @@ -3,18 +3,20 @@ * Copyright (c) 2000, 2001, 2002 Fabrice Bellard. * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> * - * This library is free software; you can redistribute it and/or + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. + * version 2.1 of the License, or (at your option) any later version. * - * This library is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software + * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ @@ -61,6 +63,10 @@ void ff_h264_idct_dc_add_c(uint8_t *dst, DCTELEM *block, int stride); void ff_h264_lowres_idct_add_c(uint8_t *dst, int stride, DCTELEM *block); void ff_h264_lowres_idct_put_c(uint8_t *dst, int stride, DCTELEM *block); +void ff_vector_fmul_add_add_c(float *dst, const float *src0, const float *src1, + const float *src2, int src3, int blocksize, int step); +void ff_float_to_int16_c(int16_t *dst, const float *src, int len); + /* encoding scans */ extern const uint8_t ff_alternate_horizontal_scan[64]; extern const uint8_t ff_alternate_vertical_scan[64]; @@ -71,8 +77,8 @@ extern const uint8_t ff_zigzag248_direct[64]; #define MAX_NEG_CROP 1024 /* temporary */ -extern uint32_t squareTbl[512]; -extern uint8_t cropTbl[256 + 2 * MAX_NEG_CROP]; +extern uint32_t ff_squareTbl[512]; +extern uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP]; /* VP3 DSP functions */ void ff_vp3_idct_c(DCTELEM *block/* align 16*/); @@ -269,11 +275,16 @@ typedef struct DSPContext { * h264 Chram MC */ h264_chroma_mc_func put_h264_chroma_pixels_tab[3]; + /* This is really one func used in VC-1 decoding */ + h264_chroma_mc_func put_no_rnd_h264_chroma_pixels_tab[3]; h264_chroma_mc_func avg_h264_chroma_pixels_tab[3]; qpel_mc_func put_h264_qpel_pixels_tab[4][16]; qpel_mc_func avg_h264_qpel_pixels_tab[4][16]; + qpel_mc_func put_2tap_qpel_pixels_tab[4][16]; + qpel_mc_func avg_2tap_qpel_pixels_tab[4][16]; + h264_weight_func weight_h264_pixels_tab[10]; h264_biweight_func biweight_h264_pixels_tab[10]; @@ -304,12 +315,27 @@ typedef struct DSPContext { void (*h264_h_loop_filter_chroma)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0); void (*h264_v_loop_filter_chroma_intra)(uint8_t *pix, int stride, int alpha, int beta); void (*h264_h_loop_filter_chroma_intra)(uint8_t *pix, int stride, int alpha, int beta); + // h264_loop_filter_strength: simd only. the C version is inlined in h264.c + void (*h264_loop_filter_strength)(int16_t bS[2][4][4], uint8_t nnz[40], int8_t ref[2][40], int16_t mv[2][40][2], + int bidir, int edges, int step, int mask_mv0, int mask_mv1); void (*h263_v_loop_filter)(uint8_t *src, int stride, int qscale); void (*h263_h_loop_filter)(uint8_t *src, int stride, int qscale); void (*h261_loop_filter)(uint8_t *src, int stride); + /* assume len is a multiple of 4, and arrays are 16-byte aligned */ + void (*vorbis_inverse_coupling)(float *mag, float *ang, int blocksize); + /* assume len is a multiple of 8, and arrays are 16-byte aligned */ + void (*vector_fmul)(float *dst, const float *src, int len); + void (*vector_fmul_reverse)(float *dst, const float *src0, const float *src1, int len); + /* assume len is a multiple of 8, and src arrays are 16-byte aligned */ + void (*vector_fmul_add_add)(float *dst, const float *src0, const float *src1, const float *src2, int src3, int len, int step); + + /* C version: convert floats from the range [384.0,386.0] to ints in [-32768,32767] + * simd versions: convert floats from [-32768.0,32767.0] without rescaling and arrays are 16byte aligned */ + void (*float_to_int16)(int16_t *dst, const float *src, int len); + /* (I)DCT */ void (*fdct)(DCTELEM *block/* align 16*/); void (*fdct248)(DCTELEM *block/* align 16*/); @@ -374,8 +400,8 @@ typedef struct DSPContext { void (*vc1_inv_trans_8x4)(DCTELEM *b, int n); void (*vc1_inv_trans_4x8)(DCTELEM *b, int n); void (*vc1_inv_trans_4x4)(DCTELEM *b, int n); - void (*vc1_v_overlap)(uint8_t* src, int stride, int rnd); - void (*vc1_h_overlap)(uint8_t* src, int stride, int rnd); + void (*vc1_v_overlap)(uint8_t* src, int stride); + void (*vc1_h_overlap)(uint8_t* src, int stride); /* put 8x8 block with bicubic interpolation and quarterpel precision * last argument is actually round value instead of height */ @@ -553,6 +579,13 @@ void dsputil_init_mmi(DSPContext* c, AVCodecContext *avctx); void dsputil_init_sh4(DSPContext* c, AVCodecContext *avctx); +#elif defined(ARCH_BFIN) + +#define DECLARE_ALIGNED_8(t,v) t v __attribute__ ((aligned (8))) +#define STRIDE_ALIGN 8 + +void dsputil_init_bfin(DSPContext* c, AVCodecContext *avctx); + #else #define DECLARE_ALIGNED_8(t,v) t v __attribute__ ((aligned (8))) @@ -595,6 +628,8 @@ void get_psnr(uint8_t *orig_image[3], uint8_t *coded_image[3], FFTSample type */ typedef float FFTSample; +struct MDCTContext; + typedef struct FFTComplex { FFTSample re, im; } FFTComplex; @@ -606,6 +641,8 @@ typedef struct FFTContext { FFTComplex *exptab; FFTComplex *exptab1; /* only used by SSE code */ void (*fft_calc)(struct FFTContext *s, FFTComplex *z); + void (*imdct_calc)(struct MDCTContext *s, FFTSample *output, + const FFTSample *input, FFTSample *tmp); } FFTContext; int ff_fft_init(FFTContext *s, int nbits, int inverse); @@ -636,6 +673,10 @@ typedef struct MDCTContext { int ff_mdct_init(MDCTContext *s, int nbits, int inverse); void ff_imdct_calc(MDCTContext *s, FFTSample *output, const FFTSample *input, FFTSample *tmp); +void ff_imdct_calc_3dn2(MDCTContext *s, FFTSample *output, + const FFTSample *input, FFTSample *tmp); +void ff_imdct_calc_sse(MDCTContext *s, FFTSample *output, + const FFTSample *input, FFTSample *tmp); void ff_mdct_calc(MDCTContext *s, FFTSample *out, const FFTSample *input, FFTSample *tmp); void ff_mdct_end(MDCTContext *s); @@ -660,4 +701,81 @@ static int name16(void /*MpegEncContext*/ *s, uint8_t *dst, uint8_t *src, int st return score;\ } + +static inline void copy_block2(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h) +{ + int i; + for(i=0; i<h; i++) + { + ST16(dst , LD16(src )); + dst+=dstStride; + src+=srcStride; + } +} + +static inline void copy_block4(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h) +{ + int i; + for(i=0; i<h; i++) + { + ST32(dst , LD32(src )); + dst+=dstStride; + src+=srcStride; + } +} + +static inline void copy_block8(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h) +{ + int i; + for(i=0; i<h; i++) + { + ST32(dst , LD32(src )); + ST32(dst+4 , LD32(src+4 )); + dst+=dstStride; + src+=srcStride; + } +} + +static inline void copy_block9(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h) +{ + int i; + for(i=0; i<h; i++) + { + ST32(dst , LD32(src )); + ST32(dst+4 , LD32(src+4 )); + dst[8]= src[8]; + dst+=dstStride; + src+=srcStride; + } +} + +static inline void copy_block16(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h) +{ + int i; + for(i=0; i<h; i++) + { + ST32(dst , LD32(src )); + ST32(dst+4 , LD32(src+4 )); + ST32(dst+8 , LD32(src+8 )); + ST32(dst+12, LD32(src+12)); + dst+=dstStride; + src+=srcStride; + } +} + +static inline void copy_block17(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h) +{ + int i; + for(i=0; i<h; i++) + { + ST32(dst , LD32(src )); + ST32(dst+4 , LD32(src+4 )); + ST32(dst+8 , LD32(src+8 )); + ST32(dst+12, LD32(src+12)); + dst[16]= src[16]; + dst+=dstStride; + src+=srcStride; + } +} + #endif |