diff options
author | Mike Melanson <mike@multimedia.cx> | 2004-02-01 05:31:16 +0000 |
---|---|---|
committer | Mike Melanson <mike@multimedia.cx> | 2004-02-01 05:31:16 +0000 |
commit | 61d793ef13ac2ef8f9c2b41b71430f21fac80337 (patch) | |
tree | 69a1b20b01993b4d61fe1c8c2be33dd2e362a40c /src/libffmpeg/libavcodec/alpha | |
parent | f707774ac5d48c02c6a36327304d88629b0e38f9 (diff) | |
download | xine-lib-61d793ef13ac2ef8f9c2b41b71430f21fac80337.tar.gz xine-lib-61d793ef13ac2ef8f9c2b41b71430f21fac80337.tar.bz2 |
sync to ffmpeg build 4699
CVS patchset: 6090
CVS date: 2004/02/01 05:31:16
Diffstat (limited to 'src/libffmpeg/libavcodec/alpha')
-rw-r--r-- | src/libffmpeg/libavcodec/alpha/dsputil_alpha.c | 26 | ||||
-rw-r--r-- | src/libffmpeg/libavcodec/alpha/motion_est_alpha.c | 12 | ||||
-rw-r--r-- | src/libffmpeg/libavcodec/alpha/mpegvideo_alpha.c | 77 |
3 files changed, 79 insertions, 36 deletions
diff --git a/src/libffmpeg/libavcodec/alpha/dsputil_alpha.c b/src/libffmpeg/libavcodec/alpha/dsputil_alpha.c index 82ff7db66..496f46120 100644 --- a/src/libffmpeg/libavcodec/alpha/dsputil_alpha.c +++ b/src/libffmpeg/libavcodec/alpha/dsputil_alpha.c @@ -39,11 +39,11 @@ void get_pixels_mvi(DCTELEM *restrict block, const uint8_t *restrict pixels, int line_size); void diff_pixels_mvi(DCTELEM *block, const uint8_t *s1, const uint8_t *s2, int stride); -int pix_abs8x8_mvi(uint8_t *pix1, uint8_t *pix2, int line_size); +int pix_abs8x8_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h); int pix_abs16x16_mvi_asm(uint8_t *pix1, uint8_t *pix2, int line_size); -int pix_abs16x16_x2_mvi(uint8_t *pix1, uint8_t *pix2, int line_size); -int pix_abs16x16_y2_mvi(uint8_t *pix1, uint8_t *pix2, int line_size); -int pix_abs16x16_xy2_mvi(uint8_t *pix1, uint8_t *pix2, int line_size); +int pix_abs16x16_x2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h); +int pix_abs16x16_y2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h); +int pix_abs16x16_xy2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h); #if 0 /* These functions were the base for the optimized assembler routines, @@ -290,11 +290,6 @@ static int sad16x16_mvi(void *s, uint8_t *a, uint8_t *b, int stride) return pix_abs16x16_mvi_asm(a, b, stride); } -static int sad8x8_mvi(void *s, uint8_t *a, uint8_t *b, int stride) -{ - return pix_abs8x8_mvi(a, b, stride); -} - void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx) { c->put_pixels_tab[0][0] = put_pixels16_axp_asm; @@ -347,12 +342,13 @@ void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx) c->get_pixels = get_pixels_mvi; c->diff_pixels = diff_pixels_mvi; c->sad[0] = sad16x16_mvi; - c->sad[1] = sad8x8_mvi; - c->pix_abs8x8 = pix_abs8x8_mvi; - c->pix_abs16x16 = pix_abs16x16_mvi_asm; - c->pix_abs16x16_x2 = pix_abs16x16_x2_mvi; - c->pix_abs16x16_y2 = pix_abs16x16_y2_mvi; - c->pix_abs16x16_xy2 = pix_abs16x16_xy2_mvi; + c->sad[1] = pix_abs8x8_mvi; +// c->pix_abs[0][0] = pix_abs16x16_mvi_asm; //FIXME function arguments for the asm must be fixed + c->pix_abs[0][0] = sad16x16_mvi; + c->pix_abs[1][0] = pix_abs8x8_mvi; + c->pix_abs[0][1] = pix_abs16x16_x2_mvi; + c->pix_abs[0][2] = pix_abs16x16_y2_mvi; + c->pix_abs[0][3] = pix_abs16x16_xy2_mvi; } put_pixels_clamped_axp_p = c->put_pixels_clamped; diff --git a/src/libffmpeg/libavcodec/alpha/motion_est_alpha.c b/src/libffmpeg/libavcodec/alpha/motion_est_alpha.c index 804e1d2b6..8b8a0a25c 100644 --- a/src/libffmpeg/libavcodec/alpha/motion_est_alpha.c +++ b/src/libffmpeg/libavcodec/alpha/motion_est_alpha.c @@ -84,10 +84,9 @@ static inline uint64_t avg4(uint64_t l1, uint64_t l2, uint64_t l3, uint64_t l4) return r1 + r2; } -int pix_abs8x8_mvi(uint8_t *pix1, uint8_t *pix2, int line_size) +int pix_abs8x8_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) { int result = 0; - int h = 8; if ((size_t) pix2 & 0x7) { /* works only when pix2 is actually unaligned */ @@ -160,10 +159,9 @@ int pix_abs16x16_mvi(uint8_t *pix1, uint8_t *pix2, int line_size) } #endif -int pix_abs16x16_x2_mvi(uint8_t *pix1, uint8_t *pix2, int line_size) +int pix_abs16x16_x2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) { int result = 0; - int h = 16; uint64_t disalign = (size_t) pix2 & 0x7; switch (disalign) { @@ -234,10 +232,9 @@ int pix_abs16x16_x2_mvi(uint8_t *pix1, uint8_t *pix2, int line_size) return result; } -int pix_abs16x16_y2_mvi(uint8_t *pix1, uint8_t *pix2, int line_size) +int pix_abs16x16_y2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) { int result = 0; - int h = 16; if ((size_t) pix2 & 0x7) { uint64_t t, p2_l, p2_r; @@ -288,10 +285,9 @@ int pix_abs16x16_y2_mvi(uint8_t *pix1, uint8_t *pix2, int line_size) return result; } -int pix_abs16x16_xy2_mvi(uint8_t *pix1, uint8_t *pix2, int line_size) +int pix_abs16x16_xy2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) { int result = 0; - int h = 16; uint64_t p1_l, p1_r; uint64_t p2_l, p2_r, p2_x; diff --git a/src/libffmpeg/libavcodec/alpha/mpegvideo_alpha.c b/src/libffmpeg/libavcodec/alpha/mpegvideo_alpha.c index 6b720373c..f64fb7472 100644 --- a/src/libffmpeg/libavcodec/alpha/mpegvideo_alpha.c +++ b/src/libffmpeg/libavcodec/alpha/mpegvideo_alpha.c @@ -21,7 +21,7 @@ #include "../dsputil.h" #include "../mpegvideo.h" -static void dct_unquantize_h263_axp(MpegEncContext *s, DCTELEM *block, +static void dct_unquantize_h263_intra_axp(MpegEncContext *s, DCTELEM *block, int n, int qscale) { int i, n_coeffs; @@ -35,19 +35,15 @@ static void dct_unquantize_h263_axp(MpegEncContext *s, DCTELEM *block, /* This mask kills spill from negative subwords to the next subword. */ correction = WORD_VEC((qmul - 1) + 1); /* multiplication / addition */ - if (s->mb_intra) { - if (!s->h263_aic) { - if (n < 4) - block0 = block[0] * s->y_dc_scale; - else - block0 = block[0] * s->c_dc_scale; - } else { - qadd = 0; - } - n_coeffs = 63; // does not always use zigzag table + if (!s->h263_aic) { + if (n < 4) + block0 = block[0] * s->y_dc_scale; + else + block0 = block[0] * s->c_dc_scale; } else { - n_coeffs = s->intra_scantable.raster_end[s->block_last_index[n]]; + qadd = 0; } + n_coeffs = 63; // does not always use zigzag table for(i = 0; i <= n_coeffs; block += 4, i += 4) { uint64_t levels, negmask, zeros, add; @@ -90,7 +86,62 @@ static void dct_unquantize_h263_axp(MpegEncContext *s, DCTELEM *block, orig_block[0] = block0; } +static void dct_unquantize_h263_inter_axp(MpegEncContext *s, DCTELEM *block, + int n, int qscale) +{ + int i, n_coeffs; + uint64_t qmul, qadd; + uint64_t correction; + DCTELEM *orig_block = block; + DCTELEM block0; + + qadd = WORD_VEC((qscale - 1) | 1); + qmul = qscale << 1; + /* This mask kills spill from negative subwords to the next subword. */ + correction = WORD_VEC((qmul - 1) + 1); /* multiplication / addition */ + + n_coeffs = s->intra_scantable.raster_end[s->block_last_index[n]]; + + for(i = 0; i <= n_coeffs; block += 4, i += 4) { + uint64_t levels, negmask, zeros, add; + + levels = ldq(block); + if (levels == 0) + continue; + +#ifdef __alpha_max__ + /* I don't think the speed difference justifies runtime + detection. */ + negmask = maxsw4(levels, -1); /* negative -> ffff (-1) */ + negmask = minsw4(negmask, 0); /* positive -> 0000 (0) */ +#else + negmask = cmpbge(WORD_VEC(0x7fff), levels); + negmask &= (negmask >> 1) | (1 << 7); + negmask = zap(-1, negmask); +#endif + + zeros = cmpbge(0, levels); + zeros &= zeros >> 1; + /* zeros |= zeros << 1 is not needed since qadd <= 255, so + zapping the lower byte suffices. */ + + levels *= qmul; + levels -= correction & (negmask << 16); + + /* Negate qadd for negative levels. */ + add = qadd ^ negmask; + add += WORD_VEC(0x0001) & negmask; + /* Set qadd to 0 for levels == 0. */ + add = zap(add, zeros); + + levels += add; + + stq(levels, block); + } +} + void MPV_common_init_axp(MpegEncContext *s) { - s->dct_unquantize_h263 = dct_unquantize_h263_axp; + s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_axp; + s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_axp; } |