diff options
-rw-r--r-- | src/video_out/yuv2rgb.c | 32 | ||||
-rw-r--r-- | src/xine-utils/Makefile.am | 2 | ||||
-rw-r--r-- | src/xine-utils/memcpy.c | 15 | ||||
-rw-r--r-- | src/xine-utils/ppcasm_string.S | 202 | ||||
-rw-r--r-- | src/xine-utils/ppcasm_string.h | 3 |
5 files changed, 236 insertions, 18 deletions
diff --git a/src/video_out/yuv2rgb.c b/src/video_out/yuv2rgb.c index e67743599..deb60d640 100644 --- a/src/video_out/yuv2rgb.c +++ b/src/video_out/yuv2rgb.c @@ -22,7 +22,7 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * - * $Id: yuv2rgb.c,v 1.30 2002/04/29 23:32:00 jcdutton Exp $ + * $Id: yuv2rgb.c,v 1.31 2002/05/28 12:44:02 siggi Exp $ */ #include "config.h" @@ -1384,7 +1384,7 @@ static void yuv2rgb_c_32 (yuv2rgb_t *this, uint8_t * _dst, while (--dst_height > 0 && dy < 32768) { - memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width*4); + xine_fast_memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width*4); dy += this->step_dy; _dst += this->rgb_stride; @@ -1515,7 +1515,7 @@ static void yuv2rgb_c_24_rgb (yuv2rgb_t *this, uint8_t * _dst, while (--dst_height > 0 && dy < 32768) { - memcpy (_dst, _dst-this->rgb_stride, this->dest_width*3); + xine_fast_memcpy (_dst, _dst-this->rgb_stride, this->dest_width*3); dy += this->step_dy; _dst += this->rgb_stride; @@ -1646,7 +1646,7 @@ static void yuv2rgb_c_24_bgr (yuv2rgb_t *this, uint8_t * _dst, while (--dst_height > 0 && dy < 32768) { - memcpy (_dst, _dst-this->rgb_stride, this->dest_width*3); + xine_fast_memcpy (_dst, _dst-this->rgb_stride, this->dest_width*3); dy += this->step_dy; _dst += this->rgb_stride; @@ -1777,7 +1777,7 @@ static void yuv2rgb_c_16 (yuv2rgb_t *this, uint8_t * _dst, while (--dst_height > 0 && dy < 32768) { - memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width*2); + xine_fast_memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width*2); dy += this->step_dy; _dst += this->rgb_stride; @@ -1907,7 +1907,7 @@ static void yuv2rgb_c_8 (yuv2rgb_t *this, uint8_t * _dst, while (--dst_height > 0 && dy < 32768) { - memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width); + xine_fast_memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width); dy += this->step_dy; _dst += this->rgb_stride; @@ -2002,7 +2002,7 @@ static void yuv2rgb_c_gray (yuv2rgb_t *this, uint8_t * _dst, while (--dst_height > 0 && dy < 32768) { - memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width); + xine_fast_memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width); dy += this->step_dy; _dst += this->rgb_stride; @@ -2019,7 +2019,7 @@ static void yuv2rgb_c_gray (yuv2rgb_t *this, uint8_t * _dst, } } else { for (height = this->source_height; --height >= 0; ) { - memcpy(_dst, _py, this->dest_width); + xine_fast_memcpy(_dst, _py, this->dest_width); _dst += this->rgb_stride; _py += this->y_stride; } @@ -2082,7 +2082,7 @@ static void yuv2rgb_c_palette (yuv2rgb_t *this, uint8_t * _dst, while (--dst_height > 0 && dy < 32768) { - memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width); + xine_fast_memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width); dy += this->step_dy; _dst += this->rgb_stride; @@ -2557,7 +2557,7 @@ static void yuy22rgb_c_32 (yuv2rgb_t *this, uint8_t * _dst, uint8_t * _p) while (--height > 0 && dy < 32768) { - memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width*4); + xine_fast_memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width*4); dy += this->step_dy; _dst += this->rgb_stride; @@ -2635,7 +2635,7 @@ static void yuy22rgb_c_24_rgb (yuv2rgb_t *this, uint8_t * _dst, uint8_t * _p) while (--height > 0 && dy < 32768) { - memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width*3); + xine_fast_memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width*3); dy += this->step_dy; _dst += this->rgb_stride; @@ -2713,7 +2713,7 @@ static void yuy22rgb_c_24_bgr (yuv2rgb_t *this, uint8_t * _dst, uint8_t * _p) while (--height > 0 && dy < 32768) { - memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width*3); + xine_fast_memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width*3); dy += this->step_dy; _dst += this->rgb_stride; @@ -2787,7 +2787,7 @@ static void yuy22rgb_c_16 (yuv2rgb_t *this, uint8_t * _dst, uint8_t * _p) while (--height > 0 && dy < 32768) { - memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width*2); + xine_fast_memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width*2); dy += this->step_dy; _dst += this->rgb_stride; @@ -2861,7 +2861,7 @@ static void yuy22rgb_c_8 (yuv2rgb_t *this, uint8_t * _dst, uint8_t * _p) while (--height > 0 && dy < 32768) { - memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width); + xine_fast_memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width); dy += this->step_dy; _dst += this->rgb_stride; @@ -2901,7 +2901,7 @@ static void yuy22rgb_c_gray (yuv2rgb_t *this, uint8_t * _dst, uint8_t * _p) while (--height > 0 && dy < 32768) { - memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width); + xine_fast_memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width); dy += this->step_dy; _dst += this->rgb_stride; @@ -2978,7 +2978,7 @@ static void yuy22rgb_c_palette (yuv2rgb_t *this, uint8_t * _dst, uint8_t * _p) while (--height > 0 && dy < 32768) { - memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width); + xine_fast_memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width); dy += this->step_dy; _dst += this->rgb_stride; diff --git a/src/xine-utils/Makefile.am b/src/xine-utils/Makefile.am index 333e21e13..34546034f 100644 --- a/src/xine-utils/Makefile.am +++ b/src/xine-utils/Makefile.am @@ -4,7 +4,7 @@ lib_LTLIBRARIES = libxineutils.la -libxineutils_la_SOURCES = utils.c memcpy.c monitor.c cpu_accel.c xine_mutex.c xmllexer.c xmlparser.c +libxineutils_la_SOURCES = utils.c ppcasm_string.S memcpy.c monitor.c cpu_accel.c xine_mutex.c xmllexer.c xmlparser.c libxineutils_la_LDFLAGS = \ -version-info $(LT_CURRENT):$(LT_REVISION):$(LT_AGE) diff --git a/src/xine-utils/memcpy.c b/src/xine-utils/memcpy.c index 564a61cdd..a1e114b70 100644 --- a/src/xine-utils/memcpy.c +++ b/src/xine-utils/memcpy.c @@ -33,6 +33,11 @@ #include "config.h" #endif +#ifdef ARCH_PPC +#include "ppcasm_string.h" +#endif +#include <sys/times.h> + #include <stdlib.h> #include <string.h> #include "xine_internal.h" @@ -368,6 +373,10 @@ static struct { { "MMXEXT optimized memcpy()", mmx2_memcpy, 0, MM_MMXEXT }, { "SSE optimized memcpy()", sse_memcpy, 0, MM_MMXEXT|MM_SSE }, #endif /* ARCH_X86 */ +#ifdef ARCH_PPC + { "ppcasm_memcpy()", ppcasm_memcpy, 0, 0 }, + { "ppcasm_cacheable_memcpy()", ppcasm_cacheable_memcpy, 0, 0 }, +#endif /* ARCH_PPC */ { NULL, NULL, 0, 0 } }; @@ -383,7 +392,8 @@ static unsigned long long int rdtsc() { /* FIXME: implement an equivalent for using optimized memcpy on other architectures */ - return 0; +#warning "Using rdtsc() hack (via times(NULL))" + return times(NULL); } #endif @@ -415,6 +425,9 @@ void xine_probe_fast_memcpy(config_values_t *config) #ifdef ARCH_X86 "kernel", "mmx", "mmxext", "sse", #endif +#ifdef ARCH_PPC + "ppcasm_memcpy", "ppcasm_cacheable_memcpy", +#endif NULL}; config_flags = xine_mm_accel(); diff --git a/src/xine-utils/ppcasm_string.S b/src/xine-utils/ppcasm_string.S new file mode 100644 index 000000000..87457ef6e --- /dev/null +++ b/src/xine-utils/ppcasm_string.S @@ -0,0 +1,202 @@ +/* + * BK Id: SCCS/s.string.S 1.9 10/25/01 10:08:51 trini + */ +/* + * String handling functions for PowerPC. + * + * Copyright (C) 1996 Paul Mackerras. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include "../../config.h" + +#if ARCH_PPC + +#warning Be forewarned - using PowerPC assembly + +#define __KERNEL__ +#define __ASSEMBLY__ + +#include "ppc_asm.tmpl" +#include <linux/config.h> +#include <asm/processor.h> +#include <asm/cache.h> +#include <asm/errno.h> + +#define COPY_16_BYTES \ + lwz r7,4(r4); \ + lwz r8,8(r4); \ + lwz r9,12(r4); \ + lwzu r10,16(r4); \ + stw r7,4(r6); \ + stw r8,8(r6); \ + stw r9,12(r6); \ + stwu r10,16(r6) + +#define COPY_16_BYTES_WITHEX(n) \ +8 ## n ## 0: \ + lwz r7,4(r4); \ +8 ## n ## 1: \ + lwz r8,8(r4); \ +8 ## n ## 2: \ + lwz r9,12(r4); \ +8 ## n ## 3: \ + lwzu r10,16(r4); \ +8 ## n ## 4: \ + stw r7,4(r6); \ +8 ## n ## 5: \ + stw r8,8(r6); \ +8 ## n ## 6: \ + stw r9,12(r6); \ +8 ## n ## 7: \ + stwu r10,16(r6) + +#define COPY_16_BYTES_EXCODE(n) \ +9 ## n ## 0: \ + addi r5,r5,-(16 * n); \ + b 104f; \ +9 ## n ## 1: \ + addi r5,r5,-(16 * n); \ + b 105f; \ +.section __ex_table,"a"; \ + .align 2; \ + .long 8 ## n ## 0b,9 ## n ## 0b; \ + .long 8 ## n ## 1b,9 ## n ## 0b; \ + .long 8 ## n ## 2b,9 ## n ## 0b; \ + .long 8 ## n ## 3b,9 ## n ## 0b; \ + .long 8 ## n ## 4b,9 ## n ## 1b; \ + .long 8 ## n ## 5b,9 ## n ## 1b; \ + .long 8 ## n ## 6b,9 ## n ## 1b; \ + .long 8 ## n ## 7b,9 ## n ## 1b; \ + .text + + .text + +CACHELINE_BYTES = L1_CACHE_LINE_SIZE +LG_CACHELINE_BYTES = LG_L1_CACHE_LINE_SIZE +CACHELINE_MASK = (L1_CACHE_LINE_SIZE-1) + + .global ppcasm_cacheable_memcpy +ppcasm_cacheable_memcpy: +#if 0 /* this part causes "error loading shared library: unexpected reloc type + 0x0b (???) */ + add r7,r3,r5 /* test if the src & dst overlap */ + add r8,r4,r5 + cmplw 0,r4,r7 + cmplw 1,r3,r8 + crand 0,0,4 /* cr0.lt &= cr1.lt */ + blt ppcasm_memcpy /* if regions overlap */ +#endif + addi r4,r4,-4 + addi r6,r3,-4 + neg r0,r3 + andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */ + beq 58f + + cmplw 0,r5,r0 /* is this more than total to do? */ + blt 63f /* if not much to do */ + andi. r8,r0,3 /* get it word-aligned first */ + subf r5,r0,r5 + mtctr r8 + beq+ 61f +70: lbz r9,4(r4) /* do some bytes */ + stb r9,4(r6) + addi r4,r4,1 + addi r6,r6,1 + bdnz 70b +61: srwi. r0,r0,2 + mtctr r0 + beq 58f +72: lwzu r9,4(r4) /* do some words */ + stwu r9,4(r6) + bdnz 72b + +58: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */ + clrlwi r5,r5,32-LG_CACHELINE_BYTES + li r11,4 + mtctr r0 + beq 63f +53: +#if !defined(CONFIG_8xx) + dcbz r11,r6 +#endif + COPY_16_BYTES +#if L1_CACHE_LINE_SIZE >= 32 + COPY_16_BYTES +#if L1_CACHE_LINE_SIZE >= 64 + COPY_16_BYTES + COPY_16_BYTES +#if L1_CACHE_LINE_SIZE >= 128 + COPY_16_BYTES + COPY_16_BYTES + COPY_16_BYTES + COPY_16_BYTES +#endif +#endif +#endif + bdnz 53b + +63: srwi. r0,r5,2 + mtctr r0 + beq 64f +30: lwzu r0,4(r4) + stwu r0,4(r6) + bdnz 30b + +64: andi. r0,r5,3 + mtctr r0 + beq+ 65f +40: lbz r0,4(r4) + stb r0,4(r6) + addi r4,r4,1 + addi r6,r6,1 + bdnz 40b +65: blr + + .globl ppcasm_memcpy +ppcasm_memcpy: + srwi. r7,r5,3 + addi r6,r3,-4 + addi r4,r4,-4 + beq 2f /* if less than 8 bytes to do */ + andi. r0,r6,3 /* get dest word aligned */ + mtctr r7 + bne 5f +1: lwz r7,4(r4) + lwzu r8,8(r4) + stw r7,4(r6) + stwu r8,8(r6) + bdnz 1b + andi. r5,r5,7 +2: cmplwi 0,r5,4 + blt 3f + lwzu r0,4(r4) + addi r5,r5,-4 + stwu r0,4(r6) +3: cmpwi 0,r5,0 + beqlr + mtctr r5 + addi r4,r4,3 + addi r6,r6,3 +4: lbzu r0,1(r4) + stbu r0,1(r6) + bdnz 4b + blr +5: subfic r0,r0,4 + mtctr r0 +6: lbz r7,4(r4) + addi r4,r4,1 + stb r7,4(r6) + addi r6,r6,1 + bdnz 6b + subf r5,r0,r5 + rlwinm. r7,r5,32-3,3,31 + beq 2b + mtctr r7 + b 1b + +#endif /* ARCH_PPC */ diff --git a/src/xine-utils/ppcasm_string.h b/src/xine-utils/ppcasm_string.h new file mode 100644 index 000000000..b4b3f9431 --- /dev/null +++ b/src/xine-utils/ppcasm_string.h @@ -0,0 +1,3 @@ +#include <stdlib.h> +static void *ppcasm_cacheable_memcpy(void *, const void *, size_t); +static void *ppcasm_memcpy(void *, const void *, size_t); |