diff options
author | Guenter Bartsch <guenter@users.sourceforge.net> | 2001-08-07 12:41:46 +0000 |
---|---|---|
committer | Guenter Bartsch <guenter@users.sourceforge.net> | 2001-08-07 12:41:46 +0000 |
commit | 7925b0a98d936c4b8ad0a5fee9ef08b3b2dafd5e (patch) | |
tree | 472f0d251a6071daafa7c1d6fc4cfe9e289af7f6 /src/libffmpeg/libavcodec/dsputil_mmx_avg.h | |
parent | c002c9aa9c541fa75bc8e9a5fb15722f70d853f9 (diff) | |
download | xine-lib-7925b0a98d936c4b8ad0a5fee9ef08b3b2dafd5e.tar.gz xine-lib-7925b0a98d936c4b8ad0a5fee9ef08b3b2dafd5e.tar.bz2 |
introducing the ffmpeg video decoder plugin for ms mpeg 4, opendivx and motion jpeg decoding
CVS patchset: 395
CVS date: 2001/08/07 12:41:46
Diffstat (limited to 'src/libffmpeg/libavcodec/dsputil_mmx_avg.h')
-rw-r--r-- | src/libffmpeg/libavcodec/dsputil_mmx_avg.h | 344 |
1 files changed, 344 insertions, 0 deletions
diff --git a/src/libffmpeg/libavcodec/dsputil_mmx_avg.h b/src/libffmpeg/libavcodec/dsputil_mmx_avg.h new file mode 100644 index 000000000..5cd640f71 --- /dev/null +++ b/src/libffmpeg/libavcodec/dsputil_mmx_avg.h @@ -0,0 +1,344 @@ +/* + * DSP utils : average functions are compiled twice for 3dnow/mmx2 + * Copyright (c) 2000, 2001 Gerard Lantau. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * MMX optimization by Nick Kurshev <nickols_k@mail.ru> + */ + +static void DEF(put_pixels_x2)(UINT8 *block, const UINT8 *pixels, int line_size, int h) +{ + int dh, hh; + UINT8 *p; + const UINT8 *pix; + p = block; + pix = pixels; + hh=h>>2; + dh=h&3; + while(hh--) { + __asm __volatile( + "movq %4, %%mm0\n\t" + "movq 1%4, %%mm1\n\t" + "movq %5, %%mm2\n\t" + "movq 1%5, %%mm3\n\t" + "movq %6, %%mm4\n\t" + "movq 1%6, %%mm5\n\t" + "movq %7, %%mm6\n\t" + "movq 1%7, %%mm7\n\t" + PAVGB" %%mm1, %%mm0\n\t" + PAVGB" %%mm3, %%mm2\n\t" + PAVGB" %%mm5, %%mm4\n\t" + PAVGB" %%mm7, %%mm6\n\t" + "movq %%mm0, %0\n\t" + "movq %%mm2, %1\n\t" + "movq %%mm4, %2\n\t" + "movq %%mm6, %3\n\t" + :"=m"(*p), "=m"(*(p+line_size)), "=m"(*(p+line_size*2)), "=m"(*(p+line_size*3)) + :"m"(*pix), "m"(*(pix+line_size)), "m"(*(pix+line_size*2)), "m"(*(pix+line_size*3)) + :"memory"); + pix += line_size*4; p += line_size*4; + } + while(dh--) { + __asm __volatile( + "movq %1, %%mm0\n\t" + "movq 1%1, %%mm1\n\t" + PAVGB" %%mm1, %%mm0\n\t" + "movq %%mm0, %0\n\t" + :"=m"(*p) + :"m"(*pix) + :"memory"); + pix += line_size; p += line_size; + } +} + +static void DEF(put_pixels_y2)(UINT8 *block, const UINT8 *pixels, int line_size, int h) +{ + int dh, hh; + UINT8 *p; + const UINT8 *pix; + p = block; + pix = pixels; + + hh=h>>1; + dh=h&1; + while(hh--) { + __asm __volatile( + "movq %2, %%mm0\n\t" + "movq %3, %%mm1\n\t" + "movq %4, %%mm2\n\t" + PAVGB" %%mm1, %%mm0\n\t" + PAVGB" %%mm2, %%mm1\n\t" + "movq %%mm0, %0\n\t" + "movq %%mm1, %1\n\t" + :"=m"(*p), "=m"(*(p+line_size)) + :"m"(*pix), "m"(*(pix+line_size)), + "m"(*(pix+line_size*2)) + :"memory"); + pix += line_size*2; + p += line_size*2; + } + if(dh) { + __asm __volatile( + "movq %1, %%mm0\n\t" + "movq %2, %%mm1\n\t" + PAVGB" %%mm1, %%mm0\n\t" + "movq %%mm0, %0\n\t" + :"=m"(*p) + :"m"(*pix), + "m"(*(pix+line_size)) + :"memory"); + } +} + +static void DEF(avg_pixels)(UINT8 *block, const UINT8 *pixels, int line_size, int h) +{ + int dh, hh; + UINT8 *p; + const UINT8 *pix; + p = block; + pix = pixels; + hh=h>>2; + dh=h&3; + while(hh--) { + __asm __volatile( + "movq %0, %%mm0\n\t" + "movq %4, %%mm1\n\t" + "movq %1, %%mm2\n\t" + "movq %5, %%mm3\n\t" + "movq %2, %%mm4\n\t" + "movq %6, %%mm5\n\t" + "movq %3, %%mm6\n\t" + "movq %7, %%mm7\n\t" + PAVGB" %%mm1, %%mm0\n\t" + PAVGB" %%mm3, %%mm2\n\t" + PAVGB" %%mm5, %%mm4\n\t" + PAVGB" %%mm7, %%mm6\n\t" + "movq %%mm0, %0\n\t" + "movq %%mm2, %1\n\t" + "movq %%mm4, %2\n\t" + "movq %%mm6, %3\n\t" + :"=m"(*p), "=m"(*(p+line_size)), "=m"(*(p+line_size*2)), "=m"(*(p+line_size*3)) + :"m"(*pix), "m"(*(pix+line_size)), "m"(*(pix+line_size*2)), "m"(*(pix+line_size*3)) + :"memory"); + pix += line_size*4; p += line_size*4; + } + while(dh--) { + __asm __volatile( + "movq %0, %%mm0\n\t" + "movq %1, %%mm1\n\t" + PAVGB" %%mm1, %%mm0\n\t" + "movq %%mm0, %0\n\t" + :"=m"(*p) + :"m"(*pix) + :"memory"); + pix += line_size; p += line_size; + } +} + +static void DEF(avg_pixels_x2)( UINT8 *block, const UINT8 *pixels, int line_size, int h) +{ + int dh, hh; + UINT8 *p; + const UINT8 *pix; + p = block; + pix = pixels; + hh=h>>1; + dh=h&1; + while(hh--) { + __asm __volatile( + "movq %2, %%mm2\n\t" + "movq 1%2, %%mm3\n\t" + "movq %3, %%mm4\n\t" + "movq 1%3, %%mm5\n\t" + "movq %0, %%mm0\n\t" + "movq %1, %%mm1\n\t" + PAVGB" %%mm3, %%mm2\n\t" + PAVGB" %%mm2, %%mm0\n\t" + PAVGB" %%mm5, %%mm4\n\t" + PAVGB" %%mm4, %%mm1\n\t" + "movq %%mm0, %0\n\t" + "movq %%mm1, %1\n\t" + :"=m"(*p), "=m"(*(p+line_size)) + :"m"(*pix), "m"(*(pix+line_size)) + :"memory"); + pix += line_size*2; + p += line_size*2; + } + if(dh) { + __asm __volatile( + "movq %1, %%mm1\n\t" + "movq 1%1, %%mm2\n\t" + "movq %0, %%mm0\n\t" + PAVGB" %%mm2, %%mm1\n\t" + PAVGB" %%mm1, %%mm0\n\t" + "movq %%mm0, %0\n\t" + :"=m"(*p) + :"m"(*pix) + :"memory"); + } +} + +static void DEF(avg_pixels_y2)( UINT8 *block, const UINT8 *pixels, int line_size, int h) +{ + int dh, hh; + UINT8 *p; + const UINT8 *pix; + p = block; + pix = pixels; + hh=h>>1; + dh=h&1; + while(hh--) { + __asm __volatile( + "movq %2, %%mm2\n\t" + "movq %3, %%mm3\n\t" + "movq %3, %%mm4\n\t" + "movq %4, %%mm5\n\t" + "movq %0, %%mm0\n\t" + "movq %1, %%mm1\n\t" + PAVGB" %%mm3, %%mm2\n\t" + PAVGB" %%mm2, %%mm0\n\t" + PAVGB" %%mm5, %%mm4\n\t" + PAVGB" %%mm4, %%mm1\n\t" + "movq %%mm0, %0\n\t" + "movq %%mm1, %1\n\t" + :"=m"(*p), "=m"(*(p+line_size)) + :"m"(*pix), "m"(*(pix+line_size)), "m"(*(pix+line_size*2)) + :"memory"); + pix += line_size*2; + p += line_size*2; + } + if(dh) { + __asm __volatile( + "movq %1, %%mm1\n\t" + "movq %2, %%mm2\n\t" + "movq %0, %%mm0\n\t" + PAVGB" %%mm2, %%mm1\n\t" + PAVGB" %%mm1, %%mm0\n\t" + "movq %%mm0, %0\n\t" + :"=m"(*p) + :"m"(*pix), "m"(*(pix+line_size)) + :"memory"); + } +} + +static void DEF(avg_pixels_xy2)( UINT8 *block, const UINT8 *pixels, int line_size, int h) +{ + UINT8 *p; + const UINT8 *pix; + p = block; + pix = pixels; + __asm __volatile( + "pxor %%mm7, %%mm7\n\t" + "movq %0, %%mm6\n\t" + ::"m"(mm_wtwo):"memory"); + do { + __asm __volatile( + "movq %1, %%mm0\n\t" + "movq %2, %%mm1\n\t" + "movq 1%1, %%mm4\n\t" + "movq 1%2, %%mm5\n\t" + "movq %%mm0, %%mm2\n\t" + "movq %%mm1, %%mm3\n\t" + "punpcklbw %%mm7, %%mm0\n\t" + "punpcklbw %%mm7, %%mm1\n\t" + "punpckhbw %%mm7, %%mm2\n\t" + "punpckhbw %%mm7, %%mm3\n\t" + "paddusw %%mm1, %%mm0\n\t" + "paddusw %%mm3, %%mm2\n\t" + "movq %%mm4, %%mm1\n\t" + "movq %%mm5, %%mm3\n\t" + "punpcklbw %%mm7, %%mm4\n\t" + "punpcklbw %%mm7, %%mm5\n\t" + "punpckhbw %%mm7, %%mm1\n\t" + "punpckhbw %%mm7, %%mm3\n\t" + "paddusw %%mm5, %%mm4\n\t" + "paddusw %%mm3, %%mm1\n\t" + "paddusw %%mm6, %%mm4\n\t" + "paddusw %%mm6, %%mm1\n\t" + "paddusw %%mm4, %%mm0\n\t" + "paddusw %%mm1, %%mm2\n\t" + "psrlw $2, %%mm0\n\t" + "psrlw $2, %%mm2\n\t" + "packuswb %%mm2, %%mm0\n\t" + PAVGB" %0, %%mm0\n\t" + "movq %%mm0, %0\n\t" + :"=m"(*p) + :"m"(*pix), + "m"(*(pix+line_size)) + :"memory"); + pix += line_size; + p += line_size ; + } while(--h); +} + +static void DEF(sub_pixels_x2)( DCTELEM *block, const UINT8 *pixels, int line_size, int h) +{ + DCTELEM *p; + const UINT8 *pix; + p = block; + pix = pixels; + __asm __volatile( + "pxor %%mm7, %%mm7":::"memory"); + do { + __asm __volatile( + "movq 1%1, %%mm2\n\t" + "movq %0, %%mm0\n\t" + PAVGB" %1, %%mm2\n\t" + "movq 8%0, %%mm1\n\t" + "movq %%mm2, %%mm3\n\t" + "punpcklbw %%mm7, %%mm2\n\t" + "punpckhbw %%mm7, %%mm3\n\t" + "psubsw %%mm2, %%mm0\n\t" + "psubsw %%mm3, %%mm1\n\t" + "movq %%mm0, %0\n\t" + "movq %%mm1, 8%0\n\t" + :"=m"(*p) + :"m"(*pix) + :"memory"); + pix += line_size; + p += 8; + } while (--h); +} + +static void DEF(sub_pixels_y2)( DCTELEM *block, const UINT8 *pixels, int line_size, int h) +{ + DCTELEM *p; + const UINT8 *pix; + p = block; + pix = pixels; + __asm __volatile( + "pxor %%mm7, %%mm7":::"memory"); + do { + __asm __volatile( + "movq %2, %%mm2\n\t" + "movq %0, %%mm0\n\t" + PAVGB" %1, %%mm2\n\t" + "movq 8%0, %%mm1\n\t" + "movq %%mm2, %%mm3\n\t" + "punpcklbw %%mm7, %%mm2\n\t" + "punpckhbw %%mm7, %%mm3\n\t" + "psubsw %%mm2, %%mm0\n\t" + "psubsw %%mm3, %%mm1\n\t" + "movq %%mm0, %0\n\t" + "movq %%mm1, 8%0\n\t" + :"=m"(*p) + :"m"(*pix), "m"(*(pix+line_size)) + :"memory"); + pix += line_size; + p += 8; + } while (--h); +} + |