diff options
Diffstat (limited to 'src')
21 files changed, 1077 insertions, 144 deletions
diff --git a/src/libffmpeg/libavcodec/alpha/dsputil_alpha.c b/src/libffmpeg/libavcodec/alpha/dsputil_alpha.c index 401dfd787..706462a59 100644 --- a/src/libffmpeg/libavcodec/alpha/dsputil_alpha.c +++ b/src/libffmpeg/libavcodec/alpha/dsputil_alpha.c @@ -21,11 +21,15 @@ #include "../dsputil.h" void put_pixels_axp_asm(uint8_t *block, const uint8_t *pixels, - int line_size, int h); + int line_size, int h); void put_pixels_clamped_mvi_asm(const DCTELEM *block, uint8_t *pixels, - int line_size); + int line_size); void add_pixels_clamped_mvi_asm(const DCTELEM *block, uint8_t *pixels, - int line_size); + int line_size); +void (*put_pixels_clamped_axp_p)(const DCTELEM *block, uint8_t *pixels, + int line_size); +void (*add_pixels_clamped_axp_p)(const DCTELEM *block, uint8_t *pixels, + int line_size); void get_pixels_mvi(DCTELEM *restrict block, const uint8_t *restrict pixels, int line_size); @@ -147,14 +151,14 @@ static inline uint64_t avg2(uint64_t a, uint64_t b) static inline uint64_t avg4(uint64_t l1, uint64_t l2, uint64_t l3, uint64_t l4) { uint64_t r1 = ((l1 & ~BYTE_VEC(0x03)) >> 2) - + ((l2 & ~BYTE_VEC(0x03)) >> 2) - + ((l3 & ~BYTE_VEC(0x03)) >> 2) - + ((l4 & ~BYTE_VEC(0x03)) >> 2); + + ((l2 & ~BYTE_VEC(0x03)) >> 2) + + ((l3 & ~BYTE_VEC(0x03)) >> 2) + + ((l4 & ~BYTE_VEC(0x03)) >> 2); uint64_t r2 = (( (l1 & BYTE_VEC(0x03)) - + (l2 & BYTE_VEC(0x03)) - + (l3 & BYTE_VEC(0x03)) - + (l4 & BYTE_VEC(0x03)) - + BYTE_VEC(0x02)) >> 2) & BYTE_VEC(0x03); + + (l2 & BYTE_VEC(0x03)) + + (l3 & BYTE_VEC(0x03)) + + (l4 & BYTE_VEC(0x03)) + + BYTE_VEC(0x02)) >> 2) & BYTE_VEC(0x03); return r1 + r2; } #endif @@ -338,4 +342,7 @@ void dsputil_init_alpha(DSPContext* c, unsigned mask) c->pix_abs16x16_y2 = pix_abs16x16_y2_mvi; c->pix_abs16x16_xy2 = pix_abs16x16_xy2_mvi; } + + put_pixels_clamped_axp_p = c->put_pixels_clamped; + add_pixels_clamped_axp_p = c->add_pixels_clamped; } diff --git a/src/libffmpeg/libavcodec/alpha/simple_idct_alpha.c b/src/libffmpeg/libavcodec/alpha/simple_idct_alpha.c new file mode 100644 index 000000000..5ce017740 --- /dev/null +++ b/src/libffmpeg/libavcodec/alpha/simple_idct_alpha.c @@ -0,0 +1,311 @@ +/* + * Simple IDCT (Alpha optimized) + * + * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * based upon some outcommented c code from mpeg2dec (idct_mmx.c + * written by Aaron Holtzman <aholtzma@ess.engr.uvic.ca>) + * + * Alpha optimiziations by Måns Rullgård <mru@users.sourceforge.net> + * and Falk Hueffner <falk@debian.org> + */ + +#include "asm.h" +#include "../dsputil.h" + +extern void (*put_pixels_clamped_axp_p)(const DCTELEM *block, uint8_t *pixels, + int line_size); +extern void (*add_pixels_clamped_axp_p)(const DCTELEM *block, uint8_t *pixels, + int line_size); + +// cos(i * M_PI / 16) * sqrt(2) * (1 << 14) +// W4 is actually exactly 16384, but using 16383 works around +// accumulating rounding errors for some encoders +#define W1 ((int_fast32_t) 22725) +#define W2 ((int_fast32_t) 21407) +#define W3 ((int_fast32_t) 19266) +#define W4 ((int_fast32_t) 16383) +#define W5 ((int_fast32_t) 12873) +#define W6 ((int_fast32_t) 8867) +#define W7 ((int_fast32_t) 4520) +#define ROW_SHIFT 11 +#define COL_SHIFT 20 + +/* 0: all entries 0, 1: only first entry nonzero, 2: otherwise */ +static inline int idct_row(DCTELEM *row) +{ + int_fast32_t a0, a1, a2, a3, b0, b1, b2, b3, t; + uint64_t l, r; + l = ldq(row); + r = ldq(row + 4); + + if (l == 0 && r == 0) + return 0; + + a0 = W4 * sextw(l) + (1 << (ROW_SHIFT - 1)); + + if (((l & ~0xffffUL) | r) == 0) { + a0 >>= ROW_SHIFT; + a0 = (uint16_t) a0; + a0 |= a0 << 16; + a0 |= a0 << 32; + + stq(a0, row); + stq(a0, row + 4); + return 1; + } + + a1 = a0; + a2 = a0; + a3 = a0; + + t = extwl(l, 4); /* row[2] */ + if (t != 0) { + t = sextw(t); + a0 += W2 * t; + a1 += W6 * t; + a2 -= W6 * t; + a3 -= W2 * t; + } + + t = extwl(r, 0); /* row[4] */ + if (t != 0) { + t = sextw(t); + a0 += W4 * t; + a1 -= W4 * t; + a2 -= W4 * t; + a3 += W4 * t; + } + + t = extwl(r, 4); /* row[6] */ + if (t != 0) { + t = sextw(t); + a0 += W6 * t; + a1 -= W2 * t; + a2 += W2 * t; + a3 -= W6 * t; + } + + t = extwl(l, 2); /* row[1] */ + if (t != 0) { + t = sextw(t); + b0 = W1 * t; + b1 = W3 * t; + b2 = W5 * t; + b3 = W7 * t; + } else { + b0 = 0; + b1 = 0; + b2 = 0; + b3 = 0; + } + + t = extwl(l, 6); /* row[3] */ + if (t) { + t = sextw(t); + b0 += W3 * t; + b1 -= W7 * t; + b2 -= W1 * t; + b3 -= W5 * t; + } + + + t = extwl(r, 2); /* row[5] */ + if (t) { + t = sextw(t); + b0 += W5 * t; + b1 -= W1 * t; + b2 += W7 * t; + b3 += W3 * t; + } + + t = extwl(r, 6); /* row[7] */ + if (t) { + t = sextw(t); + b0 += W7 * t; + b1 -= W5 * t; + b2 += W3 * t; + b3 -= W1 * t; + } + + row[0] = (a0 + b0) >> ROW_SHIFT; + row[1] = (a1 + b1) >> ROW_SHIFT; + row[2] = (a2 + b2) >> ROW_SHIFT; + row[3] = (a3 + b3) >> ROW_SHIFT; + row[4] = (a3 - b3) >> ROW_SHIFT; + row[5] = (a2 - b2) >> ROW_SHIFT; + row[6] = (a1 - b1) >> ROW_SHIFT; + row[7] = (a0 - b0) >> ROW_SHIFT; + + return 2; +} + +static inline void idct_col(DCTELEM *col) +{ + int_fast32_t a0, a1, a2, a3, b0, b1, b2, b3; + + col[0] += (1 << (COL_SHIFT - 1)) / W4; + + a0 = W4 * col[8 * 0]; + a1 = W4 * col[8 * 0]; + a2 = W4 * col[8 * 0]; + a3 = W4 * col[8 * 0]; + + if (col[8 * 2]) { + a0 += W2 * col[8 * 2]; + a1 += W6 * col[8 * 2]; + a2 -= W6 * col[8 * 2]; + a3 -= W2 * col[8 * 2]; + } + + if (col[8 * 4]) { + a0 += W4 * col[8 * 4]; + a1 -= W4 * col[8 * 4]; + a2 -= W4 * col[8 * 4]; + a3 += W4 * col[8 * 4]; + } + + if (col[8 * 6]) { + a0 += W6 * col[8 * 6]; + a1 -= W2 * col[8 * 6]; + a2 += W2 * col[8 * 6]; + a3 -= W6 * col[8 * 6]; + } + + if (col[8 * 1]) { + b0 = W1 * col[8 * 1]; + b1 = W3 * col[8 * 1]; + b2 = W5 * col[8 * 1]; + b3 = W7 * col[8 * 1]; + } else { + b0 = 0; + b1 = 0; + b2 = 0; + b3 = 0; + } + + if (col[8 * 3]) { + b0 += W3 * col[8 * 3]; + b1 -= W7 * col[8 * 3]; + b2 -= W1 * col[8 * 3]; + b3 -= W5 * col[8 * 3]; + } + + if (col[8 * 5]) { + b0 += W5 * col[8 * 5]; + b1 -= W1 * col[8 * 5]; + b2 += W7 * col[8 * 5]; + b3 += W3 * col[8 * 5]; + } + + if (col[8 * 7]) { + b0 += W7 * col[8 * 7]; + b1 -= W5 * col[8 * 7]; + b2 += W3 * col[8 * 7]; + b3 -= W1 * col[8 * 7]; + } + + col[8 * 0] = (a0 + b0) >> COL_SHIFT; + col[8 * 7] = (a0 - b0) >> COL_SHIFT; + col[8 * 1] = (a1 + b1) >> COL_SHIFT; + col[8 * 6] = (a1 - b1) >> COL_SHIFT; + col[8 * 2] = (a2 + b2) >> COL_SHIFT; + col[8 * 5] = (a2 - b2) >> COL_SHIFT; + col[8 * 3] = (a3 + b3) >> COL_SHIFT; + col[8 * 4] = (a3 - b3) >> COL_SHIFT; +} + +/* If all rows but the first one are zero after row transformation, + all rows will be identical after column transformation. */ +static inline void idct_col2(DCTELEM *col) +{ + int i; + uint64_t l, r; + uint64_t *lcol = (uint64_t *) col; + + for (i = 0; i < 8; ++i) { + int_fast32_t a0 = col[0] + (1 << (COL_SHIFT - 1)) / W4; + + a0 *= W4; + col[0] = a0 >> COL_SHIFT; + ++col; + } + + l = lcol[0]; + r = lcol[1]; + lcol[ 2] = l; lcol[ 3] = r; + lcol[ 4] = l; lcol[ 5] = r; + lcol[ 6] = l; lcol[ 7] = r; + lcol[ 8] = l; lcol[ 9] = r; + lcol[10] = l; lcol[11] = r; + lcol[12] = l; lcol[13] = r; + lcol[14] = l; lcol[15] = r; +} + +void simple_idct_axp(DCTELEM *block) +{ + + int i; + int rowsZero = 1; /* all rows except row 0 zero */ + int rowsConstant = 1; /* all rows consist of a constant value */ + + for (i = 0; i < 8; i++) { + int sparseness = idct_row(block + 8 * i); + + if (i > 0 && sparseness > 0) + rowsZero = 0; + if (sparseness == 2) + rowsConstant = 0; + } + + if (rowsZero) { + idct_col2(block); + } else if (rowsConstant) { + uint64_t *lblock = (uint64_t *) block; + + idct_col(block); + for (i = 0; i < 8; i += 2) { + uint64_t v = (uint16_t) block[i * 8]; + uint64_t w = (uint16_t) block[i * 8 + 8]; + + v |= v << 16; + w |= w << 16; + v |= v << 32; + w |= w << 32; + lblock[0] = v; + lblock[1] = v; + lblock[2] = w; + lblock[3] = w; + lblock += 4; + } + } else { + for (i = 0; i < 8; i++) + idct_col(block + i); + } +} + +void simple_idct_put_axp(uint8_t *dest, int line_size, DCTELEM *block) +{ + simple_idct_axp(block); + put_pixels_clamped_axp_p(block, dest, line_size); +} + +void simple_idct_add_axp(uint8_t *dest, int line_size, DCTELEM *block) +{ + simple_idct_axp(block); + add_pixels_clamped_axp_p(block, dest, line_size); +} diff --git a/src/libffmpeg/libavcodec/armv4l/dsputil_arm.c b/src/libffmpeg/libavcodec/armv4l/dsputil_arm.c index 66358b38d..e459b3391 100644 --- a/src/libffmpeg/libavcodec/armv4l/dsputil_arm.c +++ b/src/libffmpeg/libavcodec/armv4l/dsputil_arm.c @@ -21,7 +21,7 @@ extern void j_rev_dct_ARM(DCTELEM *data); -void dsputil_init_armv4l(void) +void dsputil_init_armv4l(DSPContext* c, unsigned mask) { // ff_idct = j_rev_dct_ARM; } diff --git a/src/libffmpeg/libavcodec/avcodec.h b/src/libffmpeg/libavcodec/avcodec.h index f88184d50..963e5f100 100644 --- a/src/libffmpeg/libavcodec/avcodec.h +++ b/src/libffmpeg/libavcodec/avcodec.h @@ -5,8 +5,8 @@ #define LIBAVCODEC_VERSION_INT 0x000406 #define LIBAVCODEC_VERSION "0.4.6" -#define LIBAVCODEC_BUILD 4639 -#define LIBAVCODEC_BUILD_STR "4639" +#define LIBAVCODEC_BUILD 4640 +#define LIBAVCODEC_BUILD_STR "4640" enum CodecID { CODEC_ID_NONE, @@ -18,6 +18,7 @@ enum CodecID { CODEC_ID_VORBIS, CODEC_ID_AC3, CODEC_ID_MJPEG, + CODEC_ID_MJPEGB, CODEC_ID_MPEG4, CODEC_ID_RAWVIDEO, CODEC_ID_MSMPEG4V1, @@ -145,6 +146,7 @@ static const int Motion_Est_QTab[] = { ME_ZERO, ME_PHODS, ME_LOG, #define CODEC_FLAG_NORMALIZE_AQP 0x00020000 /* normalize adaptive quantization */ #define CODEC_FLAG_INTERLACED_DCT 0x00040000 /* use interlaced dct */ #define CODEC_FLAG_LOW_DELAY 0x00080000 /* force low delay / will fail on b frames */ +#define CODEC_FLAG_ALT_SCAN 0x00100000 /* use alternate scan */ /* codec capabilities */ @@ -222,8 +224,7 @@ typedef struct AVCodecContext { int width, height; /** - * encoding: set by user. 0 if not known - * decoding: set by lavc. 0 if not known + * Obsolete, will be removed */ int aspect_ratio_info; #define FF_ASPECT_SQUARE 1 @@ -646,9 +647,7 @@ typedef struct AVCodecContext { float rc_initial_cplx; /** - * custom aspect ratio, used if aspect_info==FF_ASPECT_EXTENDED - * encoding: set by user. - * decoding: set by lavc. + * Obsolete, will be removed */ int aspected_width; int aspected_height; @@ -795,6 +794,13 @@ typedef struct AVCodecContext { #define FF_PRED_LEFT 0 #define FF_PRED_PLANE 1 #define FF_PRED_MEDIAN 2 + + /** + * aspect ratio. (0 if unknown) + * encoding: set by user. + * decoding: set by lavc. + */ + float aspect_ratio; } AVCodecContext; typedef struct AVCodec { @@ -853,11 +859,13 @@ extern AVCodec dvaudio_decoder; extern AVCodec wmav1_decoder; extern AVCodec wmav2_decoder; extern AVCodec mjpeg_decoder; +extern AVCodec mjpegb_decoder; extern AVCodec mp2_decoder; extern AVCodec mp3_decoder; extern AVCodec mace3_decoder; extern AVCodec mace6_decoder; extern AVCodec huffyuv_decoder; +extern AVCodec oggvorbis_decoder; /* pcm codecs */ #define PCM_CODEC(id, name) \ diff --git a/src/libffmpeg/libavcodec/common.c b/src/libffmpeg/libavcodec/common.c index 2344dc6be..40ba49811 100644 --- a/src/libffmpeg/libavcodec/common.c +++ b/src/libffmpeg/libavcodec/common.c @@ -326,3 +326,29 @@ int ff_gcd(int a, int b){ if(b) return ff_gcd(b, a%b); else return a; } + +void ff_float2fraction(int *nom_arg, int *denom_arg, double f, int max){ + double best_diff=1E10, diff; + int best_denom=1, best_nom=1; + int nom, denom, gcd; + + //brute force here, perhaps we should try continued fractions if we need large max ... + for(denom=1; denom<=max; denom++){ + nom= (int)(f*denom + 0.5); + if(nom<=0 || nom>max) continue; + + diff= ABS( f - (double)nom / (double)denom ); + if(diff < best_diff){ + best_diff= diff; + best_nom= nom; + best_denom= denom; + } + } + + gcd= ff_gcd(best_nom, best_denom); + best_nom /= gcd; + best_denom /= gcd; + + *nom_arg= best_nom; + *denom_arg= best_denom; +} diff --git a/src/libffmpeg/libavcodec/common.h b/src/libffmpeg/libavcodec/common.h index 348fb7d3f..9dfd7dcda 100644 --- a/src/libffmpeg/libavcodec/common.h +++ b/src/libffmpeg/libavcodec/common.h @@ -256,6 +256,10 @@ typedef struct RL_VLC_ELEM { uint8_t run; } RL_VLC_ELEM; +#ifdef ARCH_SPARC64 +#define UNALIGNED_STORES_ARE_BAD +#endif + /* used to avoid missaligned exceptions on some archs (alpha, ...) */ #ifdef ARCH_X86 # define unaligned32(a) (*(UINT32*)(a)) @@ -302,6 +306,14 @@ static inline void put_bits(PutBitContext *s, int n, unsigned int value) } else { bit_buf<<=bit_left; bit_buf |= value >> (n - bit_left); +#ifdef UNALIGNED_STORES_ARE_BAD + if (3 & (int) s->buf_ptr) { + s->buf_ptr[0] = bit_buf >> 24; + s->buf_ptr[1] = bit_buf >> 16; + s->buf_ptr[2] = bit_buf >> 8; + s->buf_ptr[3] = bit_buf ; + } else +#endif *(UINT32 *)s->buf_ptr = be2me_32(bit_buf); //printf("bitbuf = %08x\n", bit_buf); s->buf_ptr+=4; @@ -850,6 +862,8 @@ static inline int ff_get_fourcc(const char *s){ return (s[0]) + (s[1]<<8) + (s[2]<<16) + (s[3]<<24); } +void ff_float2fraction(int *nom_arg, int *denom_arg, double f, int max); + #ifdef ARCH_X86 #define MASK_ABS(mask, level)\ diff --git a/src/libffmpeg/libavcodec/dsputil.c b/src/libffmpeg/libavcodec/dsputil.c index 5952d9246..9039d2625 100644 --- a/src/libffmpeg/libavcodec/dsputil.c +++ b/src/libffmpeg/libavcodec/dsputil.c @@ -144,6 +144,31 @@ static int pix_norm1_c(UINT8 * pix, int line_size) } +static int pix_norm_c(UINT8 * pix1, UINT8 * pix2, int line_size) +{ + int s, i, j; + UINT32 *sq = squareTbl + 256; + + s = 0; + for (i = 0; i < 16; i++) { + for (j = 0; j < 16; j += 8) { + s += sq[pix1[0] - pix2[0]]; + s += sq[pix1[1] - pix2[1]]; + s += sq[pix1[2] - pix2[2]]; + s += sq[pix1[3] - pix2[3]]; + s += sq[pix1[4] - pix2[4]]; + s += sq[pix1[5] - pix2[5]]; + s += sq[pix1[6] - pix2[6]]; + s += sq[pix1[7] - pix2[7]]; + pix1 += 8; + pix2 += 8; + } + pix1 += line_size - 16; + pix2 += line_size - 16; + } + return s; +} + static void get_pixels_c(DCTELEM *restrict block, const UINT8 *pixels, int line_size) { int i; @@ -1322,7 +1347,7 @@ void ff_block_permute(INT16 *block, UINT8 *permutation, const UINT8 *scantable, INT16 temp[64]; if(last<=0) return; - if(permutation[1]==1) return; //FIXME its ok but not clean and might fail for some perms + //if(permutation[1]==1) return; //FIXME its ok but not clean and might fail for some perms for(i=0; i<=last; i++){ const int j= scantable[i]; @@ -1404,6 +1429,7 @@ void dsputil_init(DSPContext* c, unsigned mask) c->clear_blocks = clear_blocks_c; c->pix_sum = pix_sum_c; c->pix_norm1 = pix_norm1_c; + c->pix_norm = pix_norm_c; /* TODO [0] 16 [1] 8 */ c->pix_abs16x16 = pix_abs16x16_c; diff --git a/src/libffmpeg/libavcodec/dsputil.h b/src/libffmpeg/libavcodec/dsputil.h index 43734df85..d52b0419c 100644 --- a/src/libffmpeg/libavcodec/dsputil.h +++ b/src/libffmpeg/libavcodec/dsputil.h @@ -103,6 +103,7 @@ typedef struct DSPContext { void (*clear_blocks)(DCTELEM *blocks/*align 16*/); int (*pix_sum)(UINT8 * pix, int line_size); int (*pix_norm1)(UINT8 * pix, int line_size); + int (*pix_norm)(UINT8 * pix1, UINT8 * pix2, int line_size); /* maybe create an array for 16/8 functions */ op_pixels_func put_pixels_tab[2][4]; @@ -198,6 +199,10 @@ void dsputil_init_alpha(DSPContext* c, unsigned mask); #elif defined(ARCH_POWERPC) +#define MM_ALTIVEC 0x0001 /* standard AltiVec */ + +extern int mm_flags; + #define __align8 __attribute__ ((aligned (16))) void dsputil_init_ppc(DSPContext* c, unsigned mask); diff --git a/src/libffmpeg/libavcodec/h263.c b/src/libffmpeg/libavcodec/h263.c index b2c37ccb5..0083ca280 100644 --- a/src/libffmpeg/libavcodec/h263.c +++ b/src/libffmpeg/libavcodec/h263.c @@ -120,6 +120,27 @@ int h263_get_picture_format(int width, int height) return format; } +static void float_aspect_to_info(MpegEncContext * s, float aspect){ + int i; + + aspect*= s->height/(double)s->width; +//printf("%f\n", aspect); + + if(aspect==0) aspect= 1.0; + + ff_float2fraction(&s->aspected_width, &s->aspected_height, aspect, 255); + +//printf("%d %d\n", s->aspected_width, s->aspected_height); + for(i=1; i<6; i++){ + if(s->aspected_width == pixel_aspect[i][0] && s->aspected_height== pixel_aspect[i][1]){ + s->aspect_ratio_info=i; + return; + } + } + + s->aspect_ratio_info= FF_ASPECT_EXTENDED; +} + void h263_encode_picture_header(MpegEncContext * s, int picture_number) { int format; @@ -196,11 +217,9 @@ void h263_encode_picture_header(MpegEncContext * s, int picture_number) if (format == 7) { /* Custom Picture Format (CPFMT) */ - - if (s->aspect_ratio_info) - put_bits(&s->pb,4,s->aspect_ratio_info); - else - put_bits(&s->pb,4,2); /* Aspect ratio: CIF 12:11 (4:3) picture */ + float_aspect_to_info(s, s->avctx->aspect_ratio); + + put_bits(&s->pb,4,s->aspect_ratio_info); put_bits(&s->pb,9,(s->width >> 2) - 1); put_bits(&s->pb,1,1); /* "1" to prevent start code emulation */ put_bits(&s->pb,9,(s->height >> 2)); @@ -1508,10 +1527,10 @@ static void mpeg4_encode_vol_header(MpegEncContext * s) put_bits(&s->pb, 1, 1); /* is obj layer id= yes */ put_bits(&s->pb, 4, vo_ver_id); /* is obj layer ver id */ put_bits(&s->pb, 3, 1); /* is obj layer priority */ - if(s->aspect_ratio_info) - put_bits(&s->pb, 4, s->aspect_ratio_info);/* aspect ratio info */ - else - put_bits(&s->pb, 4, 1); /* aspect ratio info= sqare pixel */ + + float_aspect_to_info(s, s->avctx->aspect_ratio); + + put_bits(&s->pb, 4, s->aspect_ratio_info);/* aspect ratio info */ if (s->aspect_ratio_info == FF_ASPECT_EXTENDED) { put_bits(&s->pb, 8, s->aspected_width); @@ -3273,14 +3292,14 @@ end: return SLICE_END; } }else{ - if(get_bits_count(&s->gb) + 7 >= s->gb.size*8){ - int v= show_bits(&s->gb, 8) >> (((get_bits_count(&s->gb)-1)&7)+1); - if(v==0) - return SLICE_END; - }else{ - if(show_bits(&s->gb, 16)==0) - return SLICE_END; + int v= show_bits(&s->gb, 16); + + if(get_bits_count(&s->gb) + 16 > s->gb.size*8){ + v>>= get_bits_count(&s->gb) + 16 - s->gb.size*8; } + + if(v==0) + return SLICE_END; } return SLICE_OK; @@ -3815,6 +3834,9 @@ int h263_decode_picture_header(MpegEncContext *s) /* aspected dimensions */ s->aspected_width = get_bits(&s->gb, 8); s->aspected_height = get_bits(&s->gb, 8); + }else{ + s->aspected_width = pixel_aspect[s->aspect_ratio_info][0]; + s->aspected_height= pixel_aspect[s->aspect_ratio_info][1]; } } else { width = h263_format[format][0]; @@ -4080,6 +4102,9 @@ static int decode_vol_header(MpegEncContext *s, GetBitContext *gb){ if(s->aspect_ratio_info == FF_ASPECT_EXTENDED){ s->aspected_width = get_bits(gb, 8); // par_width s->aspected_height = get_bits(gb, 8); // par_height + }else{ + s->aspected_width = pixel_aspect[s->aspect_ratio_info][0]; + s->aspected_height= pixel_aspect[s->aspect_ratio_info][1]; } if ((s->vol_control_parameters=get_bits1(gb))) { /* vol control parameter */ diff --git a/src/libffmpeg/libavcodec/h263dec.c b/src/libffmpeg/libavcodec/h263dec.c index 69e161766..0c88cd758 100644 --- a/src/libffmpeg/libavcodec/h263dec.c +++ b/src/libffmpeg/libavcodec/h263dec.c @@ -348,6 +348,8 @@ static int h263_decode_frame(AVCodecContext *avctx, MpegEncContext *s = avctx->priv_data; int ret,i; AVPicture *pict = data; + float new_aspect; + #ifdef PRINT_FRAME_TIME uint64_t time= rdtsc(); #endif @@ -356,9 +358,6 @@ uint64_t time= rdtsc(); printf("bytes=%x %x %x %x\n", buf[0], buf[1], buf[2], buf[3]); #endif - s->hurry_up= avctx->hurry_up; - s->error_resilience= avctx->error_resilience; - s->flags= avctx->flags; *data_size = 0; @@ -495,10 +494,13 @@ retry: /* and other parameters. So then we could init the picture */ /* FIXME: By the way H263 decoder is evolving it should have */ /* an H263EncContext */ + if(s->aspected_height) + new_aspect= s->aspected_width*s->width / (float)(s->height*s->aspected_height); + else + new_aspect=0; + if ( s->width != avctx->width || s->height != avctx->height - || avctx->aspect_ratio_info != s->aspect_ratio_info - || avctx->aspected_width != s->aspected_width - || avctx->aspected_height != s->aspected_height) { + || ABS(new_aspect - avctx->aspect_ratio) > 0.001) { /* H.263 could change picture size any time */ MPV_common_end(s); s->context_initialized=0; @@ -506,12 +508,7 @@ retry: if (!s->context_initialized) { avctx->width = s->width; avctx->height = s->height; - avctx->aspect_ratio_info= s->aspect_ratio_info; - if (s->aspect_ratio_info == FF_ASPECT_EXTENDED) - { - avctx->aspected_width = s->aspected_width; - avctx->aspected_height = s->aspected_height; - } + avctx->aspect_ratio= new_aspect; goto retry; } @@ -532,9 +529,9 @@ retry: /* skip b frames if we dont have reference frames */ if(s->num_available_buffers<2 && s->pict_type==B_TYPE) return get_consumed_bytes(s, buf_size); /* skip b frames if we are in a hurry */ - if(s->hurry_up && s->pict_type==B_TYPE) return get_consumed_bytes(s, buf_size); + if(avctx->hurry_up && s->pict_type==B_TYPE) return get_consumed_bytes(s, buf_size); /* skip everything if we are in a hurry>=5 */ - if(s->hurry_up>=5) return get_consumed_bytes(s, buf_size); + if(avctx->hurry_up>=5) return get_consumed_bytes(s, buf_size); if(s->next_p_frame_damaged){ if(s->pict_type==B_TYPE) diff --git a/src/libffmpeg/libavcodec/imgresample.c b/src/libffmpeg/libavcodec/imgresample.c index 1197f858b..b1cfab973 100644 --- a/src/libffmpeg/libavcodec/imgresample.c +++ b/src/libffmpeg/libavcodec/imgresample.c @@ -22,7 +22,7 @@ #ifdef USE_FASTMEMCPY #include "fastmemcpy.h" #endif - +extern int mm_flags; #define NB_COMPONENTS 3 @@ -264,6 +264,133 @@ static void v_resample4_mmx(UINT8 *dst, int dst_width, UINT8 *src, int wrap, } #endif +#ifdef HAVE_ALTIVEC +typedef union { + vector unsigned char v; + unsigned char c[16]; +} vec_uc_t; + +typedef union { + vector signed short v; + signed short s[8]; +} vec_ss_t; + +void v_resample16_altivec(UINT8 *dst, int dst_width, UINT8 *src, int wrap, + INT16 *filter) +{ + int sum, i; + uint8_t *s; + vector unsigned char *tv, tmp, dstv, zero; + vec_ss_t srchv[4], srclv[4], fv[4]; + vector signed short zeros, sumhv, sumlv; + s = src; + + for(i=0;i<4;i++) + { + /* + The vec_madds later on does an implicit >>15 on the result. + Since FILTER_BITS is 8, and we have 15 bits of magnitude in + a signed short, we have just enough bits to pre-shift our + filter constants <<7 to compensate for vec_madds. + */ + fv[i].s[0] = filter[i] << (15-FILTER_BITS); + fv[i].v = vec_splat(fv[i].v, 0); + } + + zero = vec_splat_u8(0); + zeros = vec_splat_s16(0); + + + /* + When we're resampling, we'd ideally like both our input buffers, + and output buffers to be 16-byte aligned, so we can do both aligned + reads and writes. Sadly we can't always have this at the moment, so + we opt for aligned writes, as unaligned writes have a huge overhead. + To do this, do enough scalar resamples to get dst 16-byte aligned. + */ + i = (-(int)dst) & 0xf; + while(i>0) { + sum = s[0 * wrap] * filter[0] + + s[1 * wrap] * filter[1] + + s[2 * wrap] * filter[2] + + s[3 * wrap] * filter[3]; + sum = sum >> FILTER_BITS; + if (sum<0) sum = 0; else if (sum>255) sum=255; + dst[0] = sum; + dst++; + s++; + dst_width--; + i--; + } + + /* Do our altivec resampling on 16 pixels at once. */ + while(dst_width>=16) { + /* + Read 16 (potentially unaligned) bytes from each of + 4 lines into 4 vectors, and split them into shorts. + Interleave the multipy/accumulate for the resample + filter with the loads to hide the 3 cycle latency + the vec_madds have. + */ + tv = (vector unsigned char *) &s[0 * wrap]; + tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[i * wrap])); + srchv[0].v = (vector signed short) vec_mergeh(zero, tmp); + srclv[0].v = (vector signed short) vec_mergel(zero, tmp); + sumhv = vec_madds(srchv[0].v, fv[0].v, zeros); + sumlv = vec_madds(srclv[0].v, fv[0].v, zeros); + + tv = (vector unsigned char *) &s[1 * wrap]; + tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[1 * wrap])); + srchv[1].v = (vector signed short) vec_mergeh(zero, tmp); + srclv[1].v = (vector signed short) vec_mergel(zero, tmp); + sumhv = vec_madds(srchv[1].v, fv[1].v, sumhv); + sumlv = vec_madds(srclv[1].v, fv[1].v, sumlv); + + tv = (vector unsigned char *) &s[2 * wrap]; + tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[2 * wrap])); + srchv[2].v = (vector signed short) vec_mergeh(zero, tmp); + srclv[2].v = (vector signed short) vec_mergel(zero, tmp); + sumhv = vec_madds(srchv[2].v, fv[2].v, sumhv); + sumlv = vec_madds(srclv[2].v, fv[2].v, sumlv); + + tv = (vector unsigned char *) &s[3 * wrap]; + tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[3 * wrap])); + srchv[3].v = (vector signed short) vec_mergeh(zero, tmp); + srclv[3].v = (vector signed short) vec_mergel(zero, tmp); + sumhv = vec_madds(srchv[3].v, fv[3].v, sumhv); + sumlv = vec_madds(srclv[3].v, fv[3].v, sumlv); + + /* + Pack the results into our destination vector, + and do an aligned write of that back to memory. + */ + dstv = vec_packsu(sumhv, sumlv) ; + vec_st(dstv, 0, (vector unsigned char *) dst); + + dst+=16; + s+=16; + dst_width-=16; + } + + /* + If there are any leftover pixels, resample them + with the slow scalar method. + */ + while(dst_width>0) { + sum = s[0 * wrap] * filter[0] + + s[1 * wrap] * filter[1] + + s[2 * wrap] * filter[2] + + s[3 * wrap] * filter[3]; + sum = sum >> FILTER_BITS; + if (sum<0) sum = 0; else if (sum>255) sum=255; + dst[0] = sum; + dst++; + s++; + dst_width--; + } +} +#endif + /* slow version to handle limit cases. Does not need optimisation */ static void h_resample_slow(UINT8 *dst, int dst_width, UINT8 *src, int src_width, int src_start, int src_incr, INT16 *filters) @@ -384,6 +511,13 @@ static void component_resample(ImgReSampleContext *s, &s->v_filters[phase_y][0]); else #endif +#ifdef HAVE_ALTIVEC + if ((mm_flags & MM_ALTIVEC) && NB_TAPS == 4 && FILTER_BITS == 8) + v_resample16_altivec(output, owidth, + s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth, + &s->v_filters[phase_y][0]); + else +#endif v_resample(output, owidth, s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth, &s->v_filters[phase_y][0]); diff --git a/src/libffmpeg/libavcodec/mjpeg.c b/src/libffmpeg/libavcodec/mjpeg.c index 8b167a157..899aa06e5 100644 --- a/src/libffmpeg/libavcodec/mjpeg.c +++ b/src/libffmpeg/libavcodec/mjpeg.c @@ -17,7 +17,7 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * * Support for external huffman table, various fixes (AVID workaround), - * aspecting and new decode_frame mechanism + * aspecting, new decode_frame mechanism and apple mjpeg-b support * by Alex Beregszaszi <alex@naxine.org> */ //#define DEBUG @@ -1061,7 +1061,7 @@ static int mjpeg_decode_sos(MJpegDecodeContext *s) h_count[0] = 1; v_count[0] = 1; } - + for(mb_y = 0; mb_y < mb_height; mb_y++) { for(mb_x = 0; mb_x < mb_width; mb_x++) { for(i=0;i<nb_components;i++) { @@ -1098,8 +1098,8 @@ static int mjpeg_decode_sos(MJpegDecodeContext *s) } } /* (< 1350) buggy workaround for Spectralfan.mov, should be fixed */ - - if ((s->restart_interval < 1350) && !--s->restart_count) { + if (s->restart_interval && (s->restart_interval < 1350) && + !--s->restart_count) { align_get_bits(&s->gb); skip_bits(&s->gb, 16); /* skip RSTn */ for (j=0; j<nb_components; j++) /* reset dc */ @@ -1373,10 +1373,6 @@ static int mjpeg_decode_frame(AVCodecContext *avctx, { UINT8 x = *(src++); -#if 0 - if (x == 0xff && *src == 0xff) - break; -#endif *(dst++) = x; if (x == 0xff) { @@ -1510,12 +1506,135 @@ not_the_end: } } the_end: - dprintf("mjpeg decode frame unused %d bytes\n", buf_end - buf_ptr); // return buf_end - buf_ptr; return buf_ptr - buf; } +static int mjpegb_decode_frame(AVCodecContext *avctx, + void *data, int *data_size, + UINT8 *buf, int buf_size) +{ + MJpegDecodeContext *s = avctx->priv_data; + UINT8 *buf_end, *buf_ptr; + int i; + AVPicture *picture = data; + GetBitContext hgb; /* for the header */ + uint32_t dqt_offs, dht_offs, sof_offs, sos_offs, second_field_offs; + uint32_t field_size; + + *data_size = 0; + + /* no supplementary picture */ + if (buf_size == 0) + return 0; + + buf_ptr = buf; + buf_end = buf + buf_size; + +read_header: + /* reset on every SOI */ + s->restart_interval = 0; + + init_get_bits(&hgb, buf_ptr, /*buf_size*/buf_end - buf_ptr); + + skip_bits(&hgb, 32); /* reserved zeros */ + + if (get_bits(&hgb, 32) != be2me_32(ff_get_fourcc("mjpg"))) + { + dprintf("not mjpeg-b (bad fourcc)\n"); + return 0; + } + + field_size = get_bits(&hgb, 32); /* field size */ + dprintf("field size: 0x%x\n", field_size); + skip_bits(&hgb, 32); /* padded field size */ + second_field_offs = get_bits(&hgb, 32); + dprintf("second field offs: 0x%x\n", second_field_offs); + if (second_field_offs) + s->interlaced = 1; + + dqt_offs = get_bits(&hgb, 32); + dprintf("dqt offs: 0x%x\n", dqt_offs); + if (dqt_offs) + { + init_get_bits(&s->gb, buf+dqt_offs, buf_end - (buf+dqt_offs)); + s->start_code = DQT; + mjpeg_decode_dqt(s); + } + + dht_offs = get_bits(&hgb, 32); + dprintf("dht offs: 0x%x\n", dht_offs); + if (dht_offs) + { + init_get_bits(&s->gb, buf+dht_offs, buf_end - (buf+dht_offs)); + s->start_code = DHT; + mjpeg_decode_dht(s); + } + + sof_offs = get_bits(&hgb, 32); + dprintf("sof offs: 0x%x\n", sof_offs); + if (sof_offs) + { + init_get_bits(&s->gb, buf+sof_offs, buf_end - (buf+sof_offs)); + s->start_code = SOF0; + mjpeg_decode_sof0(s); + } + + sos_offs = get_bits(&hgb, 32); + dprintf("sos offs: 0x%x\n", sos_offs); + if (sos_offs) + { +// init_get_bits(&s->gb, buf+sos_offs, buf_end - (buf+sos_offs)); + init_get_bits(&s->gb, buf+sos_offs, field_size); + s->start_code = SOS; + mjpeg_decode_sos(s); + } + + skip_bits(&hgb, 32); /* start of data offset */ + + if (s->interlaced) { + s->bottom_field ^= 1; + /* if not bottom field, do not output image yet */ + if (s->bottom_field && second_field_offs) + { + buf_ptr = buf + second_field_offs; + second_field_offs = 0; + goto read_header; + } + } + + for(i=0;i<3;i++) { + picture->data[i] = s->current_picture[i]; + picture->linesize[i] = (s->interlaced) ? + s->linesize[i] >> 1 : s->linesize[i]; + } + *data_size = sizeof(AVPicture); + avctx->height = s->height; + if (s->interlaced) + avctx->height *= 2; + avctx->width = s->width; + /* XXX: not complete test ! */ + switch((s->h_count[0] << 4) | s->v_count[0]) { + case 0x11: + avctx->pix_fmt = PIX_FMT_YUV444P; + break; + case 0x21: + avctx->pix_fmt = PIX_FMT_YUV422P; + break; + default: + case 0x22: + avctx->pix_fmt = PIX_FMT_YUV420P; + break; + } + /* dummy quality */ + /* XXX: infer it with matrix */ + avctx->quality = 3; + + return buf_ptr - buf; +} + + static int mjpeg_decode_end(AVCodecContext *avctx) { MJpegDecodeContext *s = avctx->priv_data; @@ -1543,3 +1662,16 @@ AVCodec mjpeg_decoder = { 0, NULL }; + +AVCodec mjpegb_decoder = { + "mjpegb", + CODEC_TYPE_VIDEO, + CODEC_ID_MJPEGB, + sizeof(MJpegDecodeContext), + mjpeg_decode_init, + NULL, + mjpeg_decode_end, + mjpegb_decode_frame, + 0, + NULL +}; diff --git a/src/libffmpeg/libavcodec/mlib/dsputil_mlib.c b/src/libffmpeg/libavcodec/mlib/dsputil_mlib.c index dc2e5554b..e75f858a2 100644 --- a/src/libffmpeg/libavcodec/mlib/dsputil_mlib.c +++ b/src/libffmpeg/libavcodec/mlib/dsputil_mlib.c @@ -25,6 +25,7 @@ #include <mlib_sys.h> #include <mlib_video.h> + /* copy block, width 16 pixel, height 8/16 */ static void put_pixels16_mlib (uint8_t * dest, const uint8_t * ref, @@ -196,53 +197,25 @@ static void avg_pixels8_xy2_mlib (uint8_t * dest, const uint8_t * ref, mlib_VideoInterpAveXY_U8_U8_8x8 (dest, (uint8_t *)ref, stride, stride); } -static void put_pixels_clamped_mlib(const DCTELEM *block, UINT8 *pixels, int line_size) -{ - int i; - uint8_t *p = pixels; - - for (i=0; i<8; i++) { - memset(p, 0, 8); - p += line_size; - } - - mlib_VideoAddBlock_U8_S16(pixels, (mlib_s16 *)block, line_size); - - /*int i; - UINT8 *cm = cropTbl + MAX_NEG_CROP; - - for(i=0;i<8;i++) { - pixels[0] = cm[block[0]]; - pixels[1] = cm[block[1]]; - pixels[2] = cm[block[2]]; - pixels[3] = cm[block[3]]; - pixels[4] = cm[block[4]]; - pixels[5] = cm[block[5]]; - pixels[6] = cm[block[6]]; - pixels[7] = cm[block[7]]; - - pixels += line_size; - block += 8; - }*/ -} static void add_pixels_clamped_mlib(const DCTELEM *block, UINT8 *pixels, int line_size) { mlib_VideoAddBlock_U8_S16(pixels, (mlib_s16 *)block, line_size); } + /* XXX: those functions should be suppressed ASAP when all IDCTs are converted */ static void ff_idct_put_mlib(UINT8 *dest, int line_size, DCTELEM *data) { mlib_VideoIDCT8x8_S16_S16 (data, data); - put_pixels_clamped_mlib(data, dest, line_size); + put_pixels_clamped(data, dest, line_size); } static void ff_idct_add_mlib(UINT8 *dest, int line_size, DCTELEM *data) { mlib_VideoIDCT8x8_S16_S16 (data, data); - add_pixels_clamped_mlib(data, dest, line_size); + add_pixels_clamped(data, dest, line_size); } static void ff_fdct_mlib(DCTELEM *data) diff --git a/src/libffmpeg/libavcodec/motion_est.c b/src/libffmpeg/libavcodec/motion_est.c index 95e67f66a..b1a3c61f7 100644 --- a/src/libffmpeg/libavcodec/motion_est.c +++ b/src/libffmpeg/libavcodec/motion_est.c @@ -63,31 +63,6 @@ static int pix_dev(UINT8 * pix, int line_size, int mean) return s; } -static int pix_norm(UINT8 * pix1, UINT8 * pix2, int line_size) -{ - int s, i, j; - UINT32 *sq = squareTbl + 256; - - s = 0; - for (i = 0; i < 16; i++) { - for (j = 0; j < 16; j += 8) { - s += sq[pix1[0] - pix2[0]]; - s += sq[pix1[1] - pix2[1]]; - s += sq[pix1[2] - pix2[2]]; - s += sq[pix1[3] - pix2[3]]; - s += sq[pix1[4] - pix2[4]]; - s += sq[pix1[5] - pix2[5]]; - s += sq[pix1[6] - pix2[6]]; - s += sq[pix1[7] - pix2[7]]; - pix1 += 8; - pix2 += 8; - } - pix1 += line_size - 16; - pix2 += line_size - 16; - } - return s; -} - static inline void no_motion_search(MpegEncContext * s, int *mx_ptr, int *my_ptr) { @@ -1137,7 +1112,7 @@ void ff_estimate_p_frame_motion(MpegEncContext * s, varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8; // FIXME: MMX OPTIMIZE - vard = (pix_norm(pix, ppix, s->linesize)+128)>>8; + vard = (s->dsp.pix_norm(pix, ppix, s->linesize)+128)>>8; //printf("%d %d %d %X %X %X\n", s->mb_width, mb_x, mb_y,(int)s, (int)s->mb_var, (int)s->mc_mb_var); fflush(stdout); s->mb_var [s->mb_width * mb_y + mb_x] = varc; diff --git a/src/libffmpeg/libavcodec/mpeg12.c b/src/libffmpeg/libavcodec/mpeg12.c index 5a87f4287..c55259974 100644 --- a/src/libffmpeg/libavcodec/mpeg12.c +++ b/src/libffmpeg/libavcodec/mpeg12.c @@ -1402,6 +1402,10 @@ static int mpeg1_decode_picture(AVCodecContext *avctx, ref = get_bits(&s->gb, 10); /* temporal ref */ s->pict_type = get_bits(&s->gb, 3); dprintf("pict_type=%d number=%d\n", s->pict_type, s->picture_number); + + avctx->pict_type= s->pict_type; + avctx->key_frame= s->pict_type == I_TYPE; + skip_bits(&s->gb, 16); if (s->pict_type == P_TYPE || s->pict_type == B_TYPE) { s->full_pel[0] = get_bits1(&s->gb); @@ -1911,7 +1915,7 @@ static int mpeg_decode_frame(AVCodecContext *avctx, break; default: if (start_code >= SLICE_MIN_START_CODE && - start_code <= SLICE_MAX_START_CODE) { + start_code <= SLICE_MAX_START_CODE && avctx->hurry_up<5) { ret = mpeg_decode_slice(avctx, picture, start_code, s->buffer, input_size); if (ret == DECODE_SLICE_EOP) { @@ -1941,7 +1945,7 @@ static int mpeg_decode_frame(AVCodecContext *avctx, *data_size = sizeof(AVPicture); goto the_end; }else if(ret<0){ - printf("Error while decoding slice\n"); + fprintf(stderr,"Error while decoding slice\n"); if(ret==DECODE_SLICE_FATAL_ERROR) return -1; } } diff --git a/src/libffmpeg/libavcodec/mpegvideo.c b/src/libffmpeg/libavcodec/mpegvideo.c index f1a94ccbe..b3565d3cc 100644 --- a/src/libffmpeg/libavcodec/mpegvideo.c +++ b/src/libffmpeg/libavcodec/mpegvideo.c @@ -228,7 +228,7 @@ int DCT_common_init(MpegEncContext *s) MPV_common_init_mmi(s); #endif #ifdef ARCH_ARMV4L - MPV_common_init_armv4l(); + MPV_common_init_armv4l(s); #endif #ifdef ARCH_POWERPC MPV_common_init_ppc(s); @@ -557,12 +557,6 @@ int MPV_encode_init(AVCodecContext *avctx) s->qcompress= avctx->qcompress; s->qblur= avctx->qblur; s->avctx = avctx; - s->aspect_ratio_info= avctx->aspect_ratio_info; - if (avctx->aspect_ratio_info == FF_ASPECT_EXTENDED) - { - s->aspected_width = avctx->aspected_width; - s->aspected_height = avctx->aspected_height; - } s->flags= avctx->flags; s->max_b_frames= avctx->max_b_frames; s->b_frame_strategy= avctx->b_frame_strategy; @@ -825,6 +819,9 @@ int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx) s->mb_skiped = 0; avctx->mbskip_table= s->mbskip_table; + s->hurry_up= s->avctx->hurry_up; + s->error_resilience= avctx->error_resilience; + if(avctx->flags&CODEC_FLAG_DR1){ if(avctx->get_buffer_callback(avctx, s->width, s->height, s->pict_type) < 0){ fprintf(stderr, "get_buffer() failed\n"); @@ -3080,7 +3077,8 @@ static int dct_quantize_c(MpegEncContext *s, *overflow= s->max_qcoeff < max; //overflow might have happend /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */ - ff_block_permute(block, s->idct_permutation, scantable, last_non_zero); + if (s->idct_permutation_type != FF_NO_IDCT_PERM) + ff_block_permute(block, s->idct_permutation, scantable, last_non_zero); return last_non_zero; } diff --git a/src/libffmpeg/libavcodec/mpegvideo.h b/src/libffmpeg/libavcodec/mpegvideo.h index 9b344ee7a..73092b9c1 100644 --- a/src/libffmpeg/libavcodec/mpegvideo.h +++ b/src/libffmpeg/libavcodec/mpegvideo.h @@ -551,6 +551,9 @@ void MPV_common_init_mlib(MpegEncContext *s); #ifdef HAVE_MMI void MPV_common_init_mmi(MpegEncContext *s); #endif +#ifdef ARCH_ARMV4L +void MPV_common_init_armv4l(MpegEncContext *s); +#endif #ifdef ARCH_POWERPC void MPV_common_init_ppc(MpegEncContext *s); #endif diff --git a/src/libffmpeg/libavcodec/ppc/dsputil_altivec.c b/src/libffmpeg/libavcodec/ppc/dsputil_altivec.c index 8a50ccb90..ed34a2d92 100644 --- a/src/libffmpeg/libavcodec/ppc/dsputil_altivec.c +++ b/src/libffmpeg/libavcodec/ppc/dsputil_altivec.c @@ -24,6 +24,211 @@ #include <sys/sysctl.h> #endif +int pix_abs16x16_x2_altivec(uint8_t *pix1, uint8_t *pix2, int line_size) +{ + int s, i; + vector unsigned char *tv, zero; + vector unsigned char pix1v, pix2v, pix2iv, avgv, t5; + vector unsigned int sad; + vector signed int sumdiffs; + + s = 0; + zero = vec_splat_u8(0); + sad = vec_splat_u32(0); + for(i=0;i<16;i++) { + /* + Read unaligned pixels into our vectors. The vectors are as follows: + pix1v: pix1[0]-pix1[15] + pix2v: pix2[0]-pix2[15] pix2iv: pix2[1]-pix2[16] + */ + tv = (vector unsigned char *) pix1; + pix1v = vec_perm(tv[0], tv[1], vec_lvsl(0, pix1)); + + tv = (vector unsigned char *) &pix2[0]; + pix2v = vec_perm(tv[0], tv[1], vec_lvsl(0, &pix2[0])); + + tv = (vector unsigned char *) &pix2[1]; + pix2iv = vec_perm(tv[0], tv[1], vec_lvsl(0, &pix2[1])); + + /* Calculate the average vector */ + avgv = vec_avg(pix2v, pix2iv); + + /* Calculate a sum of abs differences vector */ + t5 = vec_sub(vec_max(pix1v, avgv), vec_min(pix1v, avgv)); + + /* Add each 4 pixel group together and put 4 results into sad */ + sad = vec_sum4s(t5, sad); + + pix1 += line_size; + pix2 += line_size; + } + /* Sum up the four partial sums, and put the result into s */ + sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero); + sumdiffs = vec_splat(sumdiffs, 3); + vec_ste(sumdiffs, 0, &s); + + return s; +} + +int pix_abs16x16_y2_altivec(uint8_t *pix1, uint8_t *pix2, int line_size) +{ + int s, i; + vector unsigned char *tv, zero; + vector unsigned char pix1v, pix2v, pix3v, avgv, t5; + vector unsigned int sad; + vector signed int sumdiffs; + uint8_t *pix3 = pix2 + line_size; + + s = 0; + zero = vec_splat_u8(0); + sad = vec_splat_u32(0); + + /* + Due to the fact that pix3 = pix2 + line_size, the pix3 of one + iteration becomes pix2 in the next iteration. We can use this + fact to avoid a potentially expensive unaligned read, each + time around the loop. + Read unaligned pixels into our vectors. The vectors are as follows: + pix2v: pix2[0]-pix2[15] + Split the pixel vectors into shorts + */ + tv = (vector unsigned char *) &pix2[0]; + pix2v = vec_perm(tv[0], tv[1], vec_lvsl(0, &pix2[0])); + + for(i=0;i<16;i++) { + /* + Read unaligned pixels into our vectors. The vectors are as follows: + pix1v: pix1[0]-pix1[15] + pix3v: pix3[0]-pix3[15] + */ + tv = (vector unsigned char *) pix1; + pix1v = vec_perm(tv[0], tv[1], vec_lvsl(0, pix1)); + + tv = (vector unsigned char *) &pix3[0]; + pix3v = vec_perm(tv[0], tv[1], vec_lvsl(0, &pix3[0])); + + /* Calculate the average vector */ + avgv = vec_avg(pix2v, pix3v); + + /* Calculate a sum of abs differences vector */ + t5 = vec_sub(vec_max(pix1v, avgv), vec_min(pix1v, avgv)); + + /* Add each 4 pixel group together and put 4 results into sad */ + sad = vec_sum4s(t5, sad); + + pix1 += line_size; + pix2v = pix3v; + pix3 += line_size; + + } + + /* Sum up the four partial sums, and put the result into s */ + sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero); + sumdiffs = vec_splat(sumdiffs, 3); + vec_ste(sumdiffs, 0, &s); + return s; +} + +int pix_abs16x16_xy2_altivec(uint8_t *pix1, uint8_t *pix2, int line_size) +{ + int s, i; + uint8_t *pix3 = pix2 + line_size; + vector unsigned char *tv, avgv, t5, zero; + vector unsigned char pix1v, pix2v, pix3v, pix2iv, pix3iv; + vector unsigned short pix2lv, pix2hv, pix2ilv, pix2ihv; + vector unsigned short pix3lv, pix3hv, pix3ilv, pix3ihv; + vector unsigned short avghv, avglv, two; + vector unsigned short t1, t2, t3, t4; + vector unsigned int sad; + vector signed int sumdiffs; + + zero = vec_splat_u8(0); + two = vec_splat_u16(2); + sad = vec_splat_u32(0); + + s = 0; + + /* + Due to the fact that pix3 = pix2 + line_size, the pix3 of one + iteration becomes pix2 in the next iteration. We can use this + fact to avoid a potentially expensive unaligned read, as well + as some splitting, and vector addition each time around the loop. + Read unaligned pixels into our vectors. The vectors are as follows: + pix2v: pix2[0]-pix2[15] pix2iv: pix2[1]-pix2[16] + Split the pixel vectors into shorts + */ + tv = (vector unsigned char *) &pix2[0]; + pix2v = vec_perm(tv[0], tv[1], vec_lvsl(0, &pix2[0])); + + tv = (vector unsigned char *) &pix2[1]; + pix2iv = vec_perm(tv[0], tv[1], vec_lvsl(0, &pix2[1])); + + pix2hv = (vector unsigned short) vec_mergeh(zero, pix2v); + pix2lv = (vector unsigned short) vec_mergel(zero, pix2v); + pix2ihv = (vector unsigned short) vec_mergeh(zero, pix2iv); + pix2ilv = (vector unsigned short) vec_mergel(zero, pix2iv); + t1 = vec_add(pix2hv, pix2ihv); + t2 = vec_add(pix2lv, pix2ilv); + + for(i=0;i<16;i++) { + /* + Read unaligned pixels into our vectors. The vectors are as follows: + pix1v: pix1[0]-pix1[15] + pix3v: pix3[0]-pix3[15] pix3iv: pix3[1]-pix3[16] + */ + tv = (vector unsigned char *) pix1; + pix1v = vec_perm(tv[0], tv[1], vec_lvsl(0, pix1)); + + tv = (vector unsigned char *) &pix3[0]; + pix3v = vec_perm(tv[0], tv[1], vec_lvsl(0, &pix3[0])); + + tv = (vector unsigned char *) &pix3[1]; + pix3iv = vec_perm(tv[0], tv[1], vec_lvsl(0, &pix3[1])); + + /* + Note that Altivec does have vec_avg, but this works on vector pairs + and rounds up. We could do avg(avg(a,b),avg(c,d)), but the rounding + would mean that, for example, avg(3,0,0,1) = 2, when it should be 1. + Instead, we have to split the pixel vectors into vectors of shorts, + and do the averaging by hand. + */ + + /* Split the pixel vectors into shorts */ + pix3hv = (vector unsigned short) vec_mergeh(zero, pix3v); + pix3lv = (vector unsigned short) vec_mergel(zero, pix3v); + pix3ihv = (vector unsigned short) vec_mergeh(zero, pix3iv); + pix3ilv = (vector unsigned short) vec_mergel(zero, pix3iv); + + /* Do the averaging on them */ + t3 = vec_add(pix3hv, pix3ihv); + t4 = vec_add(pix3lv, pix3ilv); + + avghv = vec_sr(vec_add(vec_add(t1, t3), two), two); + avglv = vec_sr(vec_add(vec_add(t2, t4), two), two); + + /* Pack the shorts back into a result */ + avgv = vec_pack(avghv, avglv); + + /* Calculate a sum of abs differences vector */ + t5 = vec_sub(vec_max(pix1v, avgv), vec_min(pix1v, avgv)); + + /* Add each 4 pixel group together and put 4 results into sad */ + sad = vec_sum4s(t5, sad); + + pix1 += line_size; + pix3 += line_size; + /* Transfer the calculated values for pix3 into pix2 */ + t1 = t3; + t2 = t4; + } + /* Sum up the four partial sums, and put the result into s */ + sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero); + sumdiffs = vec_splat(sumdiffs, 3); + vec_ste(sumdiffs, 0, &s); + + return s; +} + int pix_abs16x16_altivec(uint8_t *pix1, uint8_t *pix2, int line_size) { int i, s; @@ -108,6 +313,78 @@ int pix_abs8x8_altivec(uint8_t *pix1, uint8_t *pix2, int line_size) return s; } +int pix_norm1_altivec(uint8_t *pix, int line_size) +{ + int s, i; + vector unsigned char *tv, zero; + vector unsigned char pixv; + vector unsigned int sv; + vector signed int sum; + + zero = vec_splat_u8(0); + sv = vec_splat_u32(0); + + s = 0; + for (i = 0; i < 16; i++) { + /* Read in the potentially unaligned pixels */ + tv = (vector unsigned char *) pix; + pixv = vec_perm(tv[0], tv[1], vec_lvsl(0, pix)); + + /* Square the values, and add them to our sum */ + sv = vec_msum(pixv, pixv, sv); + + pix += line_size; + } + /* Sum up the four partial sums, and put the result into s */ + sum = vec_sums((vector signed int) sv, (vector signed int) zero); + sum = vec_splat(sum, 3); + vec_ste(sum, 0, &s); + + return s; +} + + +int pix_norm_altivec(uint8_t *pix1, uint8_t *pix2, int line_size) +{ + int s, i; + vector unsigned char *tv, zero; + vector unsigned char pix1v, pix2v, t5; + vector unsigned int sv; + vector signed int sum; + + zero = vec_splat_u8(0); + sv = vec_splat_u32(0); + s = 0; + for (i = 0; i < 16; i++) { + /* Read in the potentially unaligned pixels */ + tv = (vector unsigned char *) pix1; + pix1v = vec_perm(tv[0], tv[1], vec_lvsl(0, pix1)); + + tv = (vector unsigned char *) pix2; + pix2v = vec_perm(tv[0], tv[1], vec_lvsl(0, pix2)); + + /* + Since we want to use unsigned chars, we can take advantage + of the fact that abs(a-b)^2 = (a-b)^2. + */ + + /* Calculate a sum of abs differences vector */ + t5 = vec_sub(vec_max(pix1v, pix2v), vec_min(pix1v, pix2v)); + + /* Square the values and add them to our sum */ + sv = vec_msum(t5, t5, sv); + + pix1 += line_size; + pix2 += line_size; + } + /* Sum up the four partial sums, and put the result into s */ + sum = vec_sums((vector signed int) sv, (vector signed int) zero); + sum = vec_splat(sum, 3); + vec_ste(sum, 0, &s); + return s; +} + + int pix_sum_altivec(UINT8 * pix, int line_size) { diff --git a/src/libffmpeg/libavcodec/ppc/dsputil_altivec.h b/src/libffmpeg/libavcodec/ppc/dsputil_altivec.h index bdf8f5ffe..94fe3a023 100644 --- a/src/libffmpeg/libavcodec/ppc/dsputil_altivec.h +++ b/src/libffmpeg/libavcodec/ppc/dsputil_altivec.h @@ -16,9 +16,14 @@ * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ - + +extern int pix_abs16x16_x2_altivec(uint8_t *pix1, uint8_t *pix2, int line_size); +extern int pix_abs16x16_y2_altivec(uint8_t *pix1, uint8_t *pix2, int line_size); +extern int pix_abs16x16_xy2_altivec(uint8_t *pix1, uint8_t *pix2, int line_size); extern int pix_abs16x16_altivec(uint8_t *pix1, uint8_t *pix2, int line_size); extern int pix_abs8x8_altivec(uint8_t *pix1, uint8_t *pix2, int line_size); +extern int pix_norm1_altivec(uint8_t *pix, int line_size); +extern int pix_norm_altivec(uint8_t *pix1, uint8_t *pix2, int line_size); extern int pix_sum_altivec(UINT8 * pix, int line_size); extern void diff_pixels_altivec(DCTELEM* block, const UINT8* s1, const UINT8* s2, int stride); extern void get_pixels_altivec(DCTELEM* block, const UINT8 * pixels, int line_size); diff --git a/src/libffmpeg/libavcodec/ppc/dsputil_ppc.c b/src/libffmpeg/libavcodec/ppc/dsputil_ppc.c index 4be3abb28..ffe3ce063 100644 --- a/src/libffmpeg/libavcodec/ppc/dsputil_ppc.c +++ b/src/libffmpeg/libavcodec/ppc/dsputil_ppc.c @@ -23,6 +23,8 @@ #include "dsputil_altivec.h" #endif +int mm_flags = 0; + void dsputil_init_ppc(DSPContext* c, unsigned mask) { // Common optimisations whether Altivec or not @@ -31,9 +33,16 @@ void dsputil_init_ppc(DSPContext* c, unsigned mask) #if HAVE_ALTIVEC if (has_altivec()) { + mm_flags |= MM_ALTIVEC; + // Altivec specific optimisations - c->pix_abs16x16 = pix_abs16x16_altivec; + c->pix_abs16x16_x2 = pix_abs16x16_x2_altivec; + c->pix_abs16x16_y2 = pix_abs16x16_y2_altivec; + c->pix_abs16x16_xy2 = pix_abs16x16_xy2_altivec; + c->pix_abs16x16 = pix_abs16x16_altivec; c->pix_abs8x8 = pix_abs8x8_altivec; + c->pix_norm1 = pix_norm1_altivec; + c->pix_norm = pix_norm_altivec; c->pix_sum = pix_sum_altivec; c->diff_pixels = diff_pixels_altivec; c->get_pixels = get_pixels_altivec; diff --git a/src/libffmpeg/xine_decoder.c b/src/libffmpeg/xine_decoder.c index 9ddbf2a7b..9b2b5c836 100644 --- a/src/libffmpeg/xine_decoder.c +++ b/src/libffmpeg/xine_decoder.c @@ -17,7 +17,7 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA * - * $Id: xine_decoder.c,v 1.72 2002/11/20 11:57:43 mroi Exp $ + * $Id: xine_decoder.c,v 1.73 2002/12/02 22:00:14 miguelfreitas Exp $ * * xine decoder plugin using ffmpeg * @@ -38,6 +38,7 @@ #include "buffer.h" #include "metronom.h" #include "xineutils.h" +#include "math.h" #include "libavcodec/avcodec.h" #include "libavcodec/dsputil.h" @@ -50,7 +51,6 @@ typedef struct { video_decoder_class_t decoder_class; - int illegal_vlc; } ff_video_class_t; typedef struct ff_decoder_s { @@ -124,6 +124,7 @@ static void init_video_codec (ff_video_decoder_t *this, AVCodec *codec) { this->context = avcodec_alloc_context(); this->context->width = this->bih.biWidth; this->context->height = this->bih.biHeight; + this->context->fourcc = this->stream->stream_info[XINE_STREAM_INFO_VIDEO_FOURCC]; if (avcodec_open (this->context, codec) < 0) { printf ("ffmpeg: couldn't open decoder\n"); @@ -134,14 +135,6 @@ static void init_video_codec (ff_video_decoder_t *this, AVCodec *codec) { this->decoder_ok = 1; this->stream->video_out->open (this->stream->video_out, this->stream); - /* needed to play streams generated by MS ISO MPEG4 codec. - Michael Niedermayer explained: - M$ "ISO MPEG4" uses illegal vlc code combinations, a ISO MPEG4 compliant - decoder which support error resilience should handle them like errors. - */ - if (this->class->illegal_vlc) - this->context->error_resilience=-1; - if (this->buf) free (this->buf); @@ -309,6 +302,7 @@ static void ff_decode_data (video_decoder_t *this_gen, buf_element_t *buf) { if (buf->decoder_flags & BUF_FLAG_HEADER) { AVCodec *codec = NULL; + xine_bmiheader *bih; int codec_type; #ifdef LOG @@ -317,7 +311,8 @@ static void ff_decode_data (video_decoder_t *this_gen, buf_element_t *buf) { /* init package containing bih */ - memcpy ( &this->bih, buf->content, sizeof (xine_bmiheader)); + bih = (xine_bmiheader *)buf->content; + memcpy ( &this->bih, bih, sizeof (xine_bmiheader)); this->video_step = buf->decoder_info[1]; this->stream->stream_info[XINE_STREAM_INFO_VIDEO_WIDTH] = this->bih.biWidth; @@ -405,6 +400,14 @@ static void ff_decode_data (video_decoder_t *this_gen, buf_element_t *buf) { } init_video_codec (this, codec); + + if( bih->biSize > sizeof(xine_bmiheader) ) { + this->context->extradata_size = bih->biSize - sizeof(xine_bmiheader); + this->context->extradata = malloc(this->context->extradata_size); + memcpy( this->context->extradata, + (uint8_t *)bih + sizeof(xine_bmiheader), + this->context->extradata_size ); + } } else if (this->decoder_ok) { @@ -634,6 +637,7 @@ void avcodec_register_all(void) register_avcodec(&dvvideo_decoder); // register_avcodec(&dvaudio_decoder); register_avcodec(&mjpeg_decoder); + register_avcodec(&mjpegb_decoder); register_avcodec(&wmav1_decoder); register_avcodec(&wmav2_decoder); @@ -653,6 +657,9 @@ static void ff_dispose (video_decoder_t *this_gen) { this->decoder_ok = 0; } + if(this->context && this->context->extradata) + free(this->context->extradata); + if( this->context ) free( this->context ); @@ -730,10 +737,6 @@ static void *init_video_plugin (xine_t *xine, void *data) { this->decoder_class.get_description = ff_video_get_description; this->decoder_class.dispose = ff_video_dispose_class; - this->illegal_vlc = xine->config->register_bool (xine->config, "codec.ffmpeg_illegal_vlc", 1, - _("allow illegal vlc codes in mpeg4 streams"), NULL, - 10, NULL, NULL); - pthread_once( &once_control, init_once_routine ); return this; @@ -784,6 +787,7 @@ static void ff_audio_decode_data (audio_decoder_t *this_gen, buf_element_t *buf) this->context->block_align = audio_header->nBlockAlign; this->context->bit_rate = audio_header->nAvgBytesPerSec * 8; this->context->codec_id = codec->id; + this->context->fourcc = this->stream->stream_info[XINE_STREAM_INFO_AUDIO_FOURCC]; if( audio_header->cbSize > 0 ) { this->context->extradata = malloc(audio_header->cbSize); this->context->extradata_size = audio_header->cbSize; @@ -981,7 +985,7 @@ static uint32_t supported_video_types[] = { BUF_VIDEO_MSMPEG4_V2, BUF_VIDEO_MSMPEG4_V3, BUF_VIDEO_WMV7, - /*BUF_VIDEO_WMV8,*/ + /*BUF_VIDEO_WMV8,*/ BUF_VIDEO_MPEG4, BUF_VIDEO_XVID, BUF_VIDEO_DIVX5, |