diff options
Diffstat (limited to 'src/libffmpeg/libavcodec/alpha')
-rw-r--r-- | src/libffmpeg/libavcodec/alpha/dsputil_alpha.c | 246 | ||||
-rw-r--r-- | src/libffmpeg/libavcodec/alpha/dsputil_alpha_asm.S | 306 | ||||
-rw-r--r-- | src/libffmpeg/libavcodec/alpha/mpegvideo_alpha.c | 110 | ||||
-rw-r--r-- | src/libffmpeg/libavcodec/alpha/regdef.h | 45 |
4 files changed, 552 insertions, 155 deletions
diff --git a/src/libffmpeg/libavcodec/alpha/dsputil_alpha.c b/src/libffmpeg/libavcodec/alpha/dsputil_alpha.c index 5e1aa2093..9a3fb1eac 100644 --- a/src/libffmpeg/libavcodec/alpha/dsputil_alpha.c +++ b/src/libffmpeg/libavcodec/alpha/dsputil_alpha.c @@ -22,6 +22,8 @@ void simple_idct_axp(DCTELEM *block); +void put_pixels_axp_asm(uint8_t *block, const uint8_t *pixels, + int line_size, int h); void put_pixels_clamped_mvi_asm(const DCTELEM *block, uint8_t *pixels, int line_size); void add_pixels_clamped_mvi_asm(const DCTELEM *block, uint8_t *pixels, @@ -103,145 +105,183 @@ void add_pixels_clamped_mvi(const DCTELEM *block, uint8_t *pixels, } #endif -/* Average 8 unsigned bytes in parallel: (b1 + b2) >> 1 - Since the immediate result could be greater than 255, we do the - shift first. The result is too low by one if the bytes were both - odd, so we need to add (l1 & l2) & BYTE_VEC(0x01). */ -static inline UINT64 avg2_no_rnd(UINT64 l1, UINT64 l2) -{ - UINT64 correction = (l1 & l2) & BYTE_VEC(0x01); - l1 = (l1 & ~BYTE_VEC(0x01)) >> 1; - l2 = (l2 & ~BYTE_VEC(0x01)) >> 1; - return l1 + l2 + correction; +static void clear_blocks_axp(DCTELEM *blocks) { + uint64_t *p = (uint64_t *) blocks; + int n = sizeof(DCTELEM) * 6 * 64; + + do { + p[0] = 0; + p[1] = 0; + p[2] = 0; + p[3] = 0; + p[4] = 0; + p[5] = 0; + p[6] = 0; + p[7] = 0; + p += 8; + n -= 8 * 8; + } while (n); } -/* Average 8 bytes with rounding: (b1 + b2 + 1) >> 1 - The '1' only has an effect when one byte is even and the other odd, - i. e. we also need to add (l1 ^ l2) & BYTE_VEC(0x01). - Incidentally, that is equivalent to (l1 | l2) & BYTE_VEC(0x01). */ -static inline UINT64 avg2(UINT64 l1, UINT64 l2) +static inline uint64_t avg2_no_rnd(uint64_t a, uint64_t b) { - UINT64 correction = (l1 | l2) & BYTE_VEC(0x01); - l1 = (l1 & ~BYTE_VEC(0x01)) >> 1; - l2 = (l2 & ~BYTE_VEC(0x01)) >> 1; - return l1 + l2 + correction; + return (a & b) + (((a ^ b) & BYTE_VEC(0xfe)) >> 1); } -static inline UINT64 avg4(UINT64 l1, UINT64 l2, UINT64 l3, UINT64 l4) +static inline uint64_t avg2(uint64_t a, uint64_t b) { - UINT64 r1 = ((l1 & ~BYTE_VEC(0x03)) >> 2) - + ((l2 & ~BYTE_VEC(0x03)) >> 2) - + ((l3 & ~BYTE_VEC(0x03)) >> 2) - + ((l4 & ~BYTE_VEC(0x03)) >> 2); - UINT64 r2 = (( (l1 & BYTE_VEC(0x03)) - + (l2 & BYTE_VEC(0x03)) - + (l3 & BYTE_VEC(0x03)) - + (l4 & BYTE_VEC(0x03)) - + BYTE_VEC(0x02)) >> 2) & BYTE_VEC(0x03); - return r1 + r2; + return (a | b) - (((a ^ b) & BYTE_VEC(0xfe)) >> 1); } -static inline UINT64 avg4_no_rnd(UINT64 l1, UINT64 l2, UINT64 l3, UINT64 l4) +#if 0 +/* The XY2 routines basically utilize this scheme, but reuse parts in + each iteration. */ +static inline uint64_t avg4(uint64_t l1, uint64_t l2, uint64_t l3, uint64_t l4) { - UINT64 r1 = ((l1 & ~BYTE_VEC(0x03)) >> 2) - + ((l2 & ~BYTE_VEC(0x03)) >> 2) - + ((l3 & ~BYTE_VEC(0x03)) >> 2) - + ((l4 & ~BYTE_VEC(0x03)) >> 2); - UINT64 r2 = (( (l1 & BYTE_VEC(0x03)) - + (l2 & BYTE_VEC(0x03)) - + (l3 & BYTE_VEC(0x03)) - + (l4 & BYTE_VEC(0x03)) - + BYTE_VEC(0x01)) >> 2) & BYTE_VEC(0x03); + uint64_t r1 = ((l1 & ~BYTE_VEC(0x03)) >> 2) + + ((l2 & ~BYTE_VEC(0x03)) >> 2) + + ((l3 & ~BYTE_VEC(0x03)) >> 2) + + ((l4 & ~BYTE_VEC(0x03)) >> 2); + uint64_t r2 = (( (l1 & BYTE_VEC(0x03)) + + (l2 & BYTE_VEC(0x03)) + + (l3 & BYTE_VEC(0x03)) + + (l4 & BYTE_VEC(0x03)) + + BYTE_VEC(0x02)) >> 2) & BYTE_VEC(0x03); return r1 + r2; } +#endif -#define PIXOPNAME(suffix) put ## suffix -#define BTYPE UINT8 -#define AVG2 avg2 -#define AVG4 avg4 -#define STORE(l, b) stq(l, b) -#include "pixops.h" -#undef PIXOPNAME -#undef BTYPE -#undef AVG2 -#undef AVG4 -#undef STORE +#define OP(LOAD, STORE) \ + do { \ + STORE(LOAD(pixels), block); \ + pixels += line_size; \ + block += line_size; \ + } while (--h) -#define PIXOPNAME(suffix) put_no_rnd ## suffix -#define BTYPE UINT8 -#define AVG2 avg2_no_rnd -#define AVG4 avg4_no_rnd -#define STORE(l, b) stq(l, b) -#include "pixops.h" -#undef PIXOPNAME -#undef BTYPE -#undef AVG2 -#undef AVG4 -#undef STORE +#define OP_X2(LOAD, STORE) \ + do { \ + uint64_t pix1, pix2; \ + \ + pix1 = LOAD(pixels); \ + pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56); \ + STORE(AVG2(pix1, pix2), block); \ + pixels += line_size; \ + block += line_size; \ + } while (--h) -/* The following functions are untested. */ -#if 0 +#define OP_Y2(LOAD, STORE) \ + do { \ + uint64_t pix = LOAD(pixels); \ + do { \ + uint64_t next_pix; \ + \ + pixels += line_size; \ + next_pix = LOAD(pixels); \ + STORE(AVG2(pix, next_pix), block); \ + block += line_size; \ + pix = next_pix; \ + } while (--h); \ + } while (0) + +#define OP_XY2(LOAD, STORE) \ + do { \ + uint64_t pix1 = LOAD(pixels); \ + uint64_t pix2 = pix1 >> 8 | ((uint64_t) pixels[8] << 56); \ + uint64_t pix_l = (pix1 & BYTE_VEC(0x03)) \ + + (pix2 & BYTE_VEC(0x03)); \ + uint64_t pix_h = ((pix1 & ~BYTE_VEC(0x03)) >> 2) \ + + ((pix2 & ~BYTE_VEC(0x03)) >> 2); \ + \ + do { \ + uint64_t npix1, npix2; \ + uint64_t npix_l, npix_h; \ + uint64_t avg; \ + \ + pixels += line_size; \ + npix1 = LOAD(pixels); \ + npix2 = npix1 >> 8 | ((uint64_t) pixels[8] << 56); \ + npix_l = (npix1 & BYTE_VEC(0x03)) \ + + (npix2 & BYTE_VEC(0x03)); \ + npix_h = ((npix1 & ~BYTE_VEC(0x03)) >> 2) \ + + ((npix2 & ~BYTE_VEC(0x03)) >> 2); \ + avg = (((pix_l + npix_l + AVG4_ROUNDER) >> 2) & BYTE_VEC(0x03)) \ + + pix_h + npix_h; \ + STORE(avg, block); \ + \ + block += line_size; \ + pix_l = npix_l; \ + pix_h = npix_h; \ + } while (--h); \ + } while (0) + +#define MAKE_OP(OPNAME, SUFF, OPKIND, STORE) \ +static void OPNAME ## _pixels ## SUFF ## _axp \ + (uint8_t *restrict block, const uint8_t *restrict pixels, \ + int line_size, int h) \ +{ \ + if ((size_t) pixels & 0x7) { \ + OPKIND(uldq, STORE); \ + } else { \ + OPKIND(ldq, STORE); \ + } \ +} -#define PIXOPNAME(suffix) avg ## suffix -#define BTYPE UINT8 +#define PIXOP(OPNAME, STORE) \ + MAKE_OP(OPNAME, , OP, STORE) \ + MAKE_OP(OPNAME, _x2, OP_X2, STORE) \ + MAKE_OP(OPNAME, _y2, OP_Y2, STORE) \ + MAKE_OP(OPNAME, _xy2, OP_XY2, STORE) + +/* Rounding primitives. */ #define AVG2 avg2 #define AVG4 avg4 +#define AVG4_ROUNDER BYTE_VEC(0x02) +#define STORE(l, b) stq(l, b) +PIXOP(put, STORE); + +#undef STORE #define STORE(l, b) stq(AVG2(l, ldq(b)), b); -#include "pixops.h" -#undef PIXOPNAME -#undef BTYPE +PIXOP(avg, STORE); + +/* Not rounding primitives. */ #undef AVG2 #undef AVG4 +#undef AVG4_ROUNDER #undef STORE - -#define PIXOPNAME(suffix) avg_no_rnd ## suffix -#define BTYPE UINT8 #define AVG2 avg2_no_rnd #define AVG4 avg4_no_rnd -#define STORE(l, b) stq(AVG2(l, ldq(b)), b); -#include "pixops.h" -#undef PIXOPNAME -#undef BTYPE -#undef AVG2 -#undef AVG4 -#undef STORE +#define AVG4_ROUNDER BYTE_VEC(0x01) +#define STORE(l, b) stq(l, b) +PIXOP(put_no_rnd, STORE); -#define PIXOPNAME(suffix) sub ## suffix -#define BTYPE DCTELEM -#define AVG2 avg2 -#define AVG4 avg4 -#define STORE(l, block) do { \ - UINT64 xxx = l; \ - (block)[0] -= (xxx >> 0) & 0xff; \ - (block)[1] -= (xxx >> 8) & 0xff; \ - (block)[2] -= (xxx >> 16) & 0xff; \ - (block)[3] -= (xxx >> 24) & 0xff; \ - (block)[4] -= (xxx >> 32) & 0xff; \ - (block)[5] -= (xxx >> 40) & 0xff; \ - (block)[6] -= (xxx >> 48) & 0xff; \ - (block)[7] -= (xxx >> 56) & 0xff; \ -} while (0) -#include "pixops.h" -#undef PIXOPNAME -#undef BTYPE -#undef AVG2 -#undef AVG4 #undef STORE - -#endif +#define STORE(l, b) stq(AVG2(l, ldq(b)), b); +PIXOP(avg_no_rnd, STORE); void dsputil_init_alpha(void) { - put_pixels_tab[0] = put_pixels_axp; + put_pixels_tab[0] = put_pixels_axp_asm; put_pixels_tab[1] = put_pixels_x2_axp; put_pixels_tab[2] = put_pixels_y2_axp; put_pixels_tab[3] = put_pixels_xy2_axp; - put_no_rnd_pixels_tab[0] = put_pixels_axp; + put_no_rnd_pixels_tab[0] = put_pixels_axp_asm; put_no_rnd_pixels_tab[1] = put_no_rnd_pixels_x2_axp; put_no_rnd_pixels_tab[2] = put_no_rnd_pixels_y2_axp; put_no_rnd_pixels_tab[3] = put_no_rnd_pixels_xy2_axp; + avg_pixels_tab[0] = avg_pixels_axp; + avg_pixels_tab[1] = avg_pixels_x2_axp; + avg_pixels_tab[2] = avg_pixels_y2_axp; + avg_pixels_tab[3] = avg_pixels_xy2_axp; + + avg_no_rnd_pixels_tab[0] = avg_no_rnd_pixels_axp; + avg_no_rnd_pixels_tab[1] = avg_no_rnd_pixels_x2_axp; + avg_no_rnd_pixels_tab[2] = avg_no_rnd_pixels_y2_axp; + avg_no_rnd_pixels_tab[3] = avg_no_rnd_pixels_xy2_axp; + + clear_blocks = clear_blocks_axp; + /* amask clears all bits that correspond to present features. */ if (amask(AMASK_MVI) == 0) { put_pixels_clamped = put_pixels_clamped_mvi_asm; diff --git a/src/libffmpeg/libavcodec/alpha/dsputil_alpha_asm.S b/src/libffmpeg/libavcodec/alpha/dsputil_alpha_asm.S new file mode 100644 index 000000000..5349e443c --- /dev/null +++ b/src/libffmpeg/libavcodec/alpha/dsputil_alpha_asm.S @@ -0,0 +1,306 @@ +/* + * Alpha optimized DSP utils + * Copyright (c) 2002 Falk Hueffner <falk@debian.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +/* + * These functions are scheduled for pca56. They should work + * reasonably on ev6, though. + */ + +#include "regdef.h" +#ifdef HAVE_AV_CONFIG_H +#include "config.h" +#endif + +/* Some nicer register names. */ +#define ta t10 +#define tb t11 +#define tc t12 +#define td AT +/* Danger: these overlap with the argument list and the return value */ +#define te a5 +#define tf a4 +#define tg a3 +#define th v0 + + .set noat + .set noreorder + .arch pca56 + .text + +/************************************************************************ + * void put_pixels_axp_asm(uint8_t *block, const uint8_t *pixels, + * int line_size, int h) + */ + .align 6 + .globl put_pixels_axp_asm + .ent put_pixels_axp_asm +put_pixels_axp_asm: + .frame sp, 0, ra + .prologue 0 + +#ifdef HAVE_GPROF + lda AT, _mcount + jsr AT, (AT), _mcount +#endif + + and a1, 7, t0 + beq t0, $aligned + + .align 4 +$unaligned: + ldq_u t0, 0(a1) + ldq_u t1, 8(a1) + addq a1, a2, a1 + nop + + ldq_u t2, 0(a1) + ldq_u t3, 8(a1) + addq a1, a2, a1 + nop + + ldq_u t4, 0(a1) + ldq_u t5, 8(a1) + addq a1, a2, a1 + nop + + ldq_u t6, 0(a1) + ldq_u t7, 8(a1) + extql t0, a1, t0 + addq a1, a2, a1 + + extqh t1, a1, t1 + addq a0, a2, t8 + extql t2, a1, t2 + addq t8, a2, t9 + + extqh t3, a1, t3 + addq t9, a2, ta + extql t4, a1, t4 + or t0, t1, t0 + + extqh t5, a1, t5 + or t2, t3, t2 + extql t6, a1, t6 + or t4, t5, t4 + + extqh t7, a1, t7 + or t6, t7, t6 + stq t0, 0(a0) + stq t2, 0(t8) + + stq t4, 0(t9) + subq a3, 4, a3 + stq t6, 0(ta) + addq ta, a2, a0 + + bne a3, $unaligned + ret + + .align 4 +$aligned: + ldq t0, 0(a1) + addq a1, a2, a1 + ldq t1, 0(a1) + addq a1, a2, a1 + + ldq t2, 0(a1) + addq a1, a2, a1 + ldq t3, 0(a1) + addq a1, a2, a1 + + ldq t4, 0(a1) + addq a1, a2, a1 + ldq t5, 0(a1) + addq a1, a2, a1 + + ldq t6, 0(a1) + addq a1, a2, a1 + ldq t7, 0(a1) + addq a1, a2, a1 + + addq a0, a2, t8 + stq t0, 0(a0) + addq t8, a2, t9 + stq t1, 0(t8) + + addq t9, a2, ta + stq t2, 0(t9) + addq ta, a2, tb + stq t3, 0(ta) + + addq tb, a2, tc + stq t4, 0(tb) + addq tc, a2, td + stq t5, 0(tc) + + addq td, a2, te + stq t6, 0(td) + addq te, a2, a0 + stq t7, 0(te) + + subq a3, 8, a3 + bne a3, $aligned + + ret + .end put_pixels_axp_asm + +/************************************************************************ + * void put_pixels_clamped_mvi_asm(const DCTELEM *block, uint8_t *pixels, + * int line_size) + */ + .align 6 + .globl put_pixels_clamped_mvi_asm + .ent put_pixels_clamped_mvi_asm +put_pixels_clamped_mvi_asm: + .frame sp, 0, ra + .prologue 0 + +#ifdef HAVE_GPROF + lda AT, _mcount + jsr AT, (AT), _mcount +#endif + + lda t8, -1 + lda t9, 8 # loop counter + zap t8, 0xaa, t8 # 00ff00ff00ff00ff + + .align 4 +1: ldq t0, 0(a0) + ldq t1, 8(a0) + ldq t2, 16(a0) + ldq t3, 24(a0) + + maxsw4 t0, zero, t0 + subq t9, 2, t9 + maxsw4 t1, zero, t1 + lda a0, 32(a0) + + maxsw4 t2, zero, t2 + addq a1, a2, ta + maxsw4 t3, zero, t3 + minsw4 t0, t8, t0 + + minsw4 t1, t8, t1 + minsw4 t2, t8, t2 + minsw4 t3, t8, t3 + pkwb t0, t0 + + pkwb t1, t1 + pkwb t2, t2 + pkwb t3, t3 + stl t0, 0(a1) + + stl t1, 4(a1) + addq ta, a2, a1 + stl t2, 0(ta) + stl t3, 4(ta) + + bne t9, 1b + ret + .end put_pixels_clamped_mvi_asm + +/************************************************************************ + * void add_pixels_clamped_mvi_asm(const DCTELEM *block, uint8_t *pixels, + * int line_size) + */ + .align 6 + .globl add_pixels_clamped_mvi_asm + .ent add_pixels_clamped_mvi_asm +add_pixels_clamped_mvi_asm: + .frame sp, 0, ra + .prologue 0 + +#ifdef HAVE_GPROF + lda AT, _mcount + jsr AT, (AT), _mcount +#endif + + lda t1, -1 + lda th, 8 + zap t1, 0x33, tg + nop + + srl tg, 1, t0 + xor tg, t0, tg # 0x8000800080008000 + zap t1, 0xaa, tf # 0x00ff00ff00ff00ff + + .align 4 +1: ldl t1, 0(a1) # pix0 (try to hit cache line soon) + ldl t4, 4(a1) # pix1 + addq a1, a2, te # pixels += line_size + ldq t0, 0(a0) # shorts0 + + ldl t7, 0(te) # pix2 (try to hit cache line soon) + ldl ta, 4(te) # pix3 + ldq t3, 8(a0) # shorts1 + ldq t6, 16(a0) # shorts2 + + ldq t9, 24(a0) # shorts3 + unpkbw t1, t1 # 0 0 (quarter/op no.) + and t0, tg, t2 # 0 1 + unpkbw t4, t4 # 1 0 + + bic t0, tg, t0 # 0 2 + unpkbw t7, t7 # 2 0 + and t3, tg, t5 # 1 1 + addq t0, t1, t0 # 0 3 + + xor t0, t2, t0 # 0 4 + unpkbw ta, ta # 3 0 + and t6, tg, t8 # 2 1 + maxsw4 t0, zero, t0 # 0 5 + + bic t3, tg, t3 # 1 2 + bic t6, tg, t6 # 2 2 + minsw4 t0, tf, t0 # 0 6 + addq t3, t4, t3 # 1 3 + + pkwb t0, t0 # 0 7 + xor t3, t5, t3 # 1 4 + maxsw4 t3, zero, t3 # 1 5 + addq t6, t7, t6 # 2 3 + + xor t6, t8, t6 # 2 4 + and t9, tg, tb # 3 1 + minsw4 t3, tf, t3 # 1 6 + bic t9, tg, t9 # 3 2 + + maxsw4 t6, zero, t6 # 2 5 + addq t9, ta, t9 # 3 3 + stl t0, 0(a1) # 0 8 + minsw4 t6, tf, t6 # 2 6 + + xor t9, tb, t9 # 3 4 + maxsw4 t9, zero, t9 # 3 5 + lda a0, 32(a0) # block += 16; + pkwb t3, t3 # 1 7 + + minsw4 t9, tf, t9 # 3 6 + subq th, 2, th + pkwb t6, t6 # 2 7 + pkwb t9, t9 # 3 7 + + stl t3, 4(a1) # 1 8 + addq te, a2, a1 # pixels += line_size + stl t6, 0(te) # 2 8 + stl t9, 4(te) # 3 8 + + bne th, 1b + ret + .end add_pixels_clamped_mvi_asm diff --git a/src/libffmpeg/libavcodec/alpha/mpegvideo_alpha.c b/src/libffmpeg/libavcodec/alpha/mpegvideo_alpha.c index eb1997eee..0be327079 100644 --- a/src/libffmpeg/libavcodec/alpha/mpegvideo_alpha.c +++ b/src/libffmpeg/libavcodec/alpha/mpegvideo_alpha.c @@ -23,69 +23,75 @@ extern UINT8 zigzag_end[64]; -static void dct_unquantize_h263_axp(MpegEncContext *s, - DCTELEM *block, int n, int qscale) +static void dct_unquantize_h263_axp(MpegEncContext *s, DCTELEM *block, + int n, int qscale) { - int i, level; - UINT64 qmul, qadd; + int i, n_coeffs; + uint64_t qmul, qadd; + uint64_t correction; + DCTELEM *orig_block = block; + DCTELEM block0; - ASM_ACCEPT_MVI; - if (s->mb_intra) { - if (n < 4) - block[0] = block[0] * s->y_dc_scale; - else - block[0] = block[0] * s->c_dc_scale; - /* Catch up to aligned point. */ - qmul = s->qscale << 1; - qadd = (s->qscale - 1) | 1; - for (i = 1; i < 4; ++i) { - level = block[i]; - if (level) { - if (level < 0) { - level = level * qmul - qadd; - } else { - level = level * qmul + qadd; - } - block[i] = level; - } - } - block += 4; - i = 60 / 4; + if (!s->h263_aic) { + if (n < 4) + block0 = block[0] * s->y_dc_scale; + else + block0 = block[0] * s->c_dc_scale; + } + n_coeffs = 64; // does not always use zigzag table } else { - i = zigzag_end[s->block_last_index[n]] / 4; + n_coeffs = zigzag_end[s->block_last_index[n]]; } - qmul = s->qscale << 1; + + qmul = qscale << 1; qadd = WORD_VEC((qscale - 1) | 1); - do { - UINT64 levels, negmask, zeromask, corr; - levels = ldq(block); - if (levels == 0) - continue; - zeromask = cmpbge(0, levels); - zeromask &= zeromask >> 1; - /* Negate all negative words. */ - negmask = maxsw4(levels, WORD_VEC(0xffff)); /* negative -> ffff (-1) */ - negmask = minsw4(negmask, 0); /* positive -> 0000 (0) */ - corr = negmask & WORD_VEC(0x0001); /* twos-complement correction */ - levels ^= negmask; - levels += corr; + /* This mask kills spill from negative subwords to the next subword. */ + correction = WORD_VEC((qmul - 1) + 1); /* multiplication / addition */ + + for(i = 0; i < n_coeffs; block += 4, i += 4) { + uint64_t levels, negmask, zeros, add; + + levels = ldq(block); + if (levels == 0) + continue; + +#ifdef __alpha_max__ + /* I don't think the speed difference justifies runtime + detection. */ + ASM_ACCEPT_MVI; + negmask = maxsw4(levels, -1); /* negative -> ffff (-1) */ + negmask = minsw4(negmask, 0); /* positive -> 0000 (0) */ +#else + negmask = cmpbge(WORD_VEC(0x7fff), levels); + negmask &= (negmask >> 1) | (1 << 7); + negmask = zap(-1, negmask); +#endif + + zeros = cmpbge(0, levels); + zeros &= zeros >> 1; + /* zeros |= zeros << 1 is not needed since qadd <= 255, so + zapping the lower byte suffices. */ - levels = levels * qmul; - levels += zap(qadd, zeromask); + levels *= qmul; + levels -= correction & (negmask << 16); - /* Re-negate negative words. */ - levels -= corr; - levels ^= negmask; + /* Negate qadd for negative levels. */ + add = qadd ^ negmask; + add += WORD_VEC(0x0001) & negmask; + /* Set qadd to 0 for levels == 0. */ + add = zap(add, zeros); - stq(levels, block); - } while (block += 4, --i); + levels += add; + + stq(levels, block); + } + + if (s->mb_intra && !s->h263_aic) + orig_block[0] = block0; } void MPV_common_init_axp(MpegEncContext *s) { - if (amask(AMASK_MVI) == 0) { - if (s->out_format == FMT_H263) - s->dct_unquantize = dct_unquantize_h263_axp; - } + s->dct_unquantize_h263 = dct_unquantize_h263_axp; } diff --git a/src/libffmpeg/libavcodec/alpha/regdef.h b/src/libffmpeg/libavcodec/alpha/regdef.h new file mode 100644 index 000000000..7e7fc06b2 --- /dev/null +++ b/src/libffmpeg/libavcodec/alpha/regdef.h @@ -0,0 +1,45 @@ +/* Some BSDs don't seem to have regdef.h... sigh */ +#ifndef alpha_regdef_h +#define alpha_regdef_h + +#define v0 $0 /* function return value */ + +#define t0 $1 /* temporary registers (caller-saved) */ +#define t1 $2 +#define t2 $3 +#define t3 $4 +#define t4 $5 +#define t5 $6 +#define t6 $7 +#define t7 $8 + +#define s0 $9 /* saved-registers (callee-saved registers) */ +#define s1 $10 +#define s2 $11 +#define s3 $12 +#define s4 $13 +#define s5 $14 +#define s6 $15 +#define fp s6 /* frame-pointer (s6 in frame-less procedures) */ + +#define a0 $16 /* argument registers (caller-saved) */ +#define a1 $17 +#define a2 $18 +#define a3 $19 +#define a4 $20 +#define a5 $21 + +#define t8 $22 /* more temps (caller-saved) */ +#define t9 $23 +#define t10 $24 +#define t11 $25 +#define ra $26 /* return address register */ +#define t12 $27 + +#define pv t12 /* procedure-variable register */ +#define AT $at /* assembler temporary */ +#define gp $29 /* global pointer */ +#define sp $30 /* stack pointer */ +#define zero $31 /* reads as zero, writes are noops */ + +#endif /* alpha_regdef_h */ |