diff options
Diffstat (limited to 'contrib/ffmpeg/libavcodec/imgresample.c')
-rw-r--r-- | contrib/ffmpeg/libavcodec/imgresample.c | 148 |
1 files changed, 13 insertions, 135 deletions
diff --git a/contrib/ffmpeg/libavcodec/imgresample.c b/contrib/ffmpeg/libavcodec/imgresample.c index 2722d5acf..c481e9df3 100644 --- a/contrib/ffmpeg/libavcodec/imgresample.c +++ b/contrib/ffmpeg/libavcodec/imgresample.c @@ -28,8 +28,8 @@ #include "swscale.h" #include "dsputil.h" -#ifdef USE_FASTMEMCPY -#include "libvo/fastmemcpy.h" +#ifdef HAVE_ALTIVEC +#include "ppc/imgresample_altivec.h" #endif #define NB_COMPONENTS 3 @@ -48,6 +48,7 @@ #define LINE_BUF_HEIGHT (NB_TAPS * 4) struct SwsContext { + AVClass *av_class; struct ImgReSampleContext *resampling_ctx; enum PixelFormat src_pix_fmt, dst_pix_fmt; }; @@ -282,136 +283,9 @@ static void v_resample4_mmx(uint8_t *dst, int dst_width, const uint8_t *src, } emms(); } -#endif - -#ifdef HAVE_ALTIVEC -typedef union { - vector unsigned char v; - unsigned char c[16]; -} vec_uc_t; - -typedef union { - vector signed short v; - signed short s[8]; -} vec_ss_t; - -void v_resample16_altivec(uint8_t *dst, int dst_width, const uint8_t *src, - int wrap, int16_t *filter) -{ - int sum, i; - const uint8_t *s; - vector unsigned char *tv, tmp, dstv, zero; - vec_ss_t srchv[4], srclv[4], fv[4]; - vector signed short zeros, sumhv, sumlv; - s = src; - - for(i=0;i<4;i++) - { - /* - The vec_madds later on does an implicit >>15 on the result. - Since FILTER_BITS is 8, and we have 15 bits of magnitude in - a signed short, we have just enough bits to pre-shift our - filter constants <<7 to compensate for vec_madds. - */ - fv[i].s[0] = filter[i] << (15-FILTER_BITS); - fv[i].v = vec_splat(fv[i].v, 0); - } - - zero = vec_splat_u8(0); - zeros = vec_splat_s16(0); - +#endif /* HAVE_MMX */ - /* - When we're resampling, we'd ideally like both our input buffers, - and output buffers to be 16-byte aligned, so we can do both aligned - reads and writes. Sadly we can't always have this at the moment, so - we opt for aligned writes, as unaligned writes have a huge overhead. - To do this, do enough scalar resamples to get dst 16-byte aligned. - */ - i = (-(int)dst) & 0xf; - while(i>0) { - sum = s[0 * wrap] * filter[0] + - s[1 * wrap] * filter[1] + - s[2 * wrap] * filter[2] + - s[3 * wrap] * filter[3]; - sum = sum >> FILTER_BITS; - if (sum<0) sum = 0; else if (sum>255) sum=255; - dst[0] = sum; - dst++; - s++; - dst_width--; - i--; - } - - /* Do our altivec resampling on 16 pixels at once. */ - while(dst_width>=16) { - /* - Read 16 (potentially unaligned) bytes from each of - 4 lines into 4 vectors, and split them into shorts. - Interleave the multipy/accumulate for the resample - filter with the loads to hide the 3 cycle latency - the vec_madds have. - */ - tv = (vector unsigned char *) &s[0 * wrap]; - tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[i * wrap])); - srchv[0].v = (vector signed short) vec_mergeh(zero, tmp); - srclv[0].v = (vector signed short) vec_mergel(zero, tmp); - sumhv = vec_madds(srchv[0].v, fv[0].v, zeros); - sumlv = vec_madds(srclv[0].v, fv[0].v, zeros); - - tv = (vector unsigned char *) &s[1 * wrap]; - tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[1 * wrap])); - srchv[1].v = (vector signed short) vec_mergeh(zero, tmp); - srclv[1].v = (vector signed short) vec_mergel(zero, tmp); - sumhv = vec_madds(srchv[1].v, fv[1].v, sumhv); - sumlv = vec_madds(srclv[1].v, fv[1].v, sumlv); - - tv = (vector unsigned char *) &s[2 * wrap]; - tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[2 * wrap])); - srchv[2].v = (vector signed short) vec_mergeh(zero, tmp); - srclv[2].v = (vector signed short) vec_mergel(zero, tmp); - sumhv = vec_madds(srchv[2].v, fv[2].v, sumhv); - sumlv = vec_madds(srclv[2].v, fv[2].v, sumlv); - - tv = (vector unsigned char *) &s[3 * wrap]; - tmp = vec_perm(tv[0], tv[1], vec_lvsl(0, &s[3 * wrap])); - srchv[3].v = (vector signed short) vec_mergeh(zero, tmp); - srclv[3].v = (vector signed short) vec_mergel(zero, tmp); - sumhv = vec_madds(srchv[3].v, fv[3].v, sumhv); - sumlv = vec_madds(srclv[3].v, fv[3].v, sumlv); - - /* - Pack the results into our destination vector, - and do an aligned write of that back to memory. - */ - dstv = vec_packsu(sumhv, sumlv) ; - vec_st(dstv, 0, (vector unsigned char *) dst); - - dst+=16; - s+=16; - dst_width-=16; - } - - /* - If there are any leftover pixels, resample them - with the slow scalar method. - */ - while(dst_width>0) { - sum = s[0 * wrap] * filter[0] + - s[1 * wrap] * filter[1] + - s[2 * wrap] * filter[2] + - s[3 * wrap] * filter[3]; - sum = sum >> FILTER_BITS; - if (sum<0) sum = 0; else if (sum>255) sum=255; - dst[0] = sum; - dst++; - s++; - dst_width--; - } -} -#endif - -/* slow version to handle limit cases. Does not need optimisation */ +/* slow version to handle limit cases. Does not need optimization */ static void h_resample_slow(uint8_t *dst, int dst_width, const uint8_t *src, int src_width, int src_start, int src_incr, int16_t *filters) @@ -517,7 +391,7 @@ static void component_resample(ImgReSampleContext *s, h_resample(new_line, owidth, src_line, iwidth, - FCENTER * POS_FRAC, s->h_incr, &s->h_filters[0][0]); - /* handle ring buffer wraping */ + /* handle ring buffer wrapping */ if (ring_y >= LINE_BUF_HEIGHT) { memcpy(s->line_buf + (ring_y - LINE_BUF_HEIGHT) * owidth, new_line, owidth); @@ -646,7 +520,9 @@ struct SwsContext *sws_getContext(int srcW, int srcH, int srcFormat, struct SwsContext *ctx; ctx = av_malloc(sizeof(struct SwsContext)); - if (ctx == NULL) { + if (ctx) + ctx->av_class = av_mallocz(sizeof(AVClass)); + if (!ctx || !ctx->av_class) { av_log(NULL, AV_LOG_ERROR, "Cannot allocate a resampling context!\n"); return NULL; @@ -680,6 +556,7 @@ void sws_freeContext(struct SwsContext *ctx) } else { av_free(ctx->resampling_ctx); } + av_free(ctx->av_class); av_free(ctx); } @@ -815,6 +692,7 @@ the_end: #ifdef TEST #include <stdio.h> +#undef exit /* input */ #define XSIZE 256 @@ -942,8 +820,8 @@ int main(int argc, char **argv) exit(1); } av_log(NULL, AV_LOG_INFO, "MMX OK\n"); -#endif +#endif /* HAVE_MMX */ return 0; } -#endif +#endif /* TEST */ |