diff options
author | Manfred Tremmel <manfredtremmel@users.sourceforge.net> | 2004-04-25 17:43:58 +0000 |
---|---|---|
committer | Manfred Tremmel <manfredtremmel@users.sourceforge.net> | 2004-04-25 17:43:58 +0000 |
commit | acb7dc0f256afc24e875a168da989ef25d86b7b7 (patch) | |
tree | facbe6040f87272a10af25f18bbcdd1f5babb75f | |
parent | 62ce4ec803f98fafe4b1b70a0ad27b3bf5751611 (diff) | |
download | xine-lib-acb7dc0f256afc24e875a168da989ef25d86b7b7.tar.gz xine-lib-acb7dc0f256afc24e875a168da989ef25d86b7b7.tar.bz2 |
addeed ppcasm_cacheable_memcpy fix from David Woodhouse <dwmw2@infradead.org>
CVS patchset: 6436
CVS date: 2004/04/25 17:43:58
-rw-r--r-- | src/xine-utils/cpu_accel.c | 19 | ||||
-rw-r--r-- | src/xine-utils/memcpy.c | 2 | ||||
-rw-r--r-- | src/xine-utils/ppcasm_string.S | 86 | ||||
-rw-r--r-- | src/xine-utils/ppcasm_string.h | 4 | ||||
-rw-r--r-- | src/xine-utils/xineutils.h | 67 |
5 files changed, 82 insertions, 96 deletions
diff --git a/src/xine-utils/cpu_accel.c b/src/xine-utils/cpu_accel.c index 33a168aea..962eb7456 100644 --- a/src/xine-utils/cpu_accel.c +++ b/src/xine-utils/cpu_accel.c @@ -41,7 +41,7 @@ static uint32_t arch_accel (void) { uint32_t caps; - /* No need to test for this on AMD64, we know what the + /* No need to test for this on AMD64, we know what the platform has. */ caps = MM_ACCEL_X86_MMX | MM_ACCEL_X86_SSE | MM_ACCEL_X86_MMXEXT | MM_ACCEL_X86_SSE2; @@ -112,8 +112,8 @@ static uint32_t arch_accel (void) caps |= MM_ACCEL_X86_SSE | MM_ACCEL_X86_MMXEXT; if (edx & 0x04000000) /* SSE2 */ - caps |= MM_ACCEL_X86_SSE2; - + caps |= MM_ACCEL_X86_SSE2; + cpuid (0x80000000, eax, ebx, ecx, edx); if (eax < 0x80000001) /* no extended capabilities */ return caps; @@ -157,10 +157,13 @@ static void sigill_handler (int sig) static uint32_t arch_accel (void) { + /* FIXME: Autodetect cache line size via AUX ELF vector or otherwise */ + uint32_t flags = MM_ACCEL_PPC_CACHE32; + signal (SIGILL, sigill_handler); if (sigsetjmp (jmpbuf, 1)) { - signal (SIGILL, SIG_DFL); - return 0; + signal (SIGILL, SIG_DFL); + return flags; } canjump = 1; @@ -171,7 +174,7 @@ static uint32_t arch_accel (void) : "r" (-1)); signal (SIGILL, SIG_DFL); - return MM_ACCEL_PPC_ALTIVEC; + return flags|MM_ACCEL_PPC_ALTIVEC; } #endif /* ARCH_PPC */ @@ -207,7 +210,7 @@ uint32_t xine_mm_accel (void) if( accel & MM_ACCEL_X86_SSE ) { void (*old_sigill_handler)(int); - old_sigill_handler = signal (SIGILL, sigill_handler); + old_sigill_handler = signal (SIGILL, sigill_handler); if (setjmp(sigill_return)) { lprintf ("OS doesn't support SSE instructions.\n"); @@ -220,7 +223,7 @@ uint32_t xine_mm_accel (void) } #endif /* _MSC_VER */ #endif /* ARCH_X86 || ARCH_X86_64 */ - + if(getenv("XINE_NO_ACCEL")) { accel = 0; } diff --git a/src/xine-utils/memcpy.c b/src/xine-utils/memcpy.c index 46bdc8b53..8353d6ba1 100644 --- a/src/xine-utils/memcpy.c +++ b/src/xine-utils/memcpy.c @@ -403,7 +403,7 @@ static struct { #endif /* ARCH_X86 */ #ifdef ARCH_PPC { "ppcasm_memcpy()", ppcasm_memcpy, 0, 0 }, - { "ppcasm_cacheable_memcpy()", ppcasm_cacheable_memcpy, 0, 0 }, + { "ppcasm_cacheable_memcpy()", ppcasm_cacheable_memcpy, 0, MM_ACCEL_PPC_CACHE32 }, #endif /* ARCH_PPC */ { NULL, NULL, 0, 0 } }; diff --git a/src/xine-utils/ppcasm_string.S b/src/xine-utils/ppcasm_string.S index ff4013e07..3091a9d68 100644 --- a/src/xine-utils/ppcasm_string.S +++ b/src/xine-utils/ppcasm_string.S @@ -16,16 +16,11 @@ #ifdef ARCH_PPC -#warning Be forewarned - using PowerPC assembly - -#define __KERNEL__ -#define __ASSEMBLY__ +//#warning Be forewarned - using PowerPC assembly #include "ppc_asm.tmpl" -#include <linux/config.h> -#include <asm/processor.h> -#include <asm/cache.h> -#include <asm/errno.h> +#define N_FUN 36 +#define N_SO 100 #define COPY_16_BYTES \ lwz r7,4(r4); \ @@ -37,60 +32,42 @@ stw r9,12(r6); \ stwu r10,16(r6) -#define COPY_16_BYTES_WITHEX(n) \ -8 ## n ## 0: \ - lwz r7,4(r4); \ -8 ## n ## 1: \ - lwz r8,8(r4); \ -8 ## n ## 2: \ - lwz r9,12(r4); \ -8 ## n ## 3: \ - lwzu r10,16(r4); \ -8 ## n ## 4: \ - stw r7,4(r6); \ -8 ## n ## 5: \ - stw r8,8(r6); \ -8 ## n ## 6: \ - stw r9,12(r6); \ -8 ## n ## 7: \ - stwu r10,16(r6) +#define __stringify_1(x) #x +#define __stringify(x) __stringify_1(x) -#define COPY_16_BYTES_EXCODE(n) \ -9 ## n ## 0: \ - addi r5,r5,-(16 * n); \ - b 104f; \ -9 ## n ## 1: \ - addi r5,r5,-(16 * n); \ - b 105f; \ -.section __ex_table,"a"; \ - .align 2; \ - .long 8 ## n ## 0b,9 ## n ## 0b; \ - .long 8 ## n ## 1b,9 ## n ## 0b; \ - .long 8 ## n ## 2b,9 ## n ## 0b; \ - .long 8 ## n ## 3b,9 ## n ## 0b; \ - .long 8 ## n ## 4b,9 ## n ## 1b; \ - .long 8 ## n ## 5b,9 ## n ## 1b; \ - .long 8 ## n ## 6b,9 ## n ## 1b; \ - .long 8 ## n ## 7b,9 ## n ## 1b; \ - .text +#define _GLOBFN(n)\ + .stabs __stringify(n:F-1),N_FUN,0,0,n;\ + .type n,@function; \ + .globl n;\ +n: + +#define _SIZE(n) \ + .size n, .-n .text + .stabs "src/xine-utils",N_SO,0,0,. + .stabs "ppcasm_string.S",N_SO,0,0,. + +#warning FIXME: Get cache line sizes from /proc +#define L1_CACHE_LINE_SIZE 32 CACHELINE_BYTES = 32 LG_CACHELINE_BYTES = 5 CACHELINE_MASK = (32 -1) - - .global ppcasm_cacheable_memcpy -ppcasm_cacheable_memcpy: -#if 0 /* this part causes "error loading shared library: unexpected reloc type - 0x0b (???) */ +/* + * This version uses dcbz on the complete cache lines in the + * destination area to reduce memory traffic. This requires that + * the destination area is cacheable. + * We only use this version if the source and dest don't overlap. + * -- paulus. + */ +_GLOBFN(ppcasm_cacheable_memcpy) add r7,r3,r5 /* test if the src & dst overlap */ add r8,r4,r5 cmplw 0,r4,r7 cmplw 1,r3,r8 crand 0,0,4 /* cr0.lt &= cr1.lt */ - blt ppcasm_memcpy /* if regions overlap */ -#endif + blt 66f //ppcasm_memcpy /* if regions overlap */ addi r4,r4,-4 addi r6,r3,-4 neg r0,r3 @@ -157,9 +134,10 @@ ppcasm_cacheable_memcpy: bdnz 40b 65: blr - .globl ppcasm_memcpy -ppcasm_memcpy: - srwi. r7,r5,3 +_SIZE(ppcasm_cacheable_memcpy) + +_GLOBFN(ppcasm_memcpy) +66: srwi. r7,r5,3 addi r6,r3,-4 addi r4,r4,-4 beq 2f /* if less than 8 bytes to do */ @@ -199,4 +177,6 @@ ppcasm_memcpy: mtctr r7 b 1b +_SIZE(ppcasm_memcpy) + #endif /* ARCH_PPC */ diff --git a/src/xine-utils/ppcasm_string.h b/src/xine-utils/ppcasm_string.h index b4b3f9431..174162477 100644 --- a/src/xine-utils/ppcasm_string.h +++ b/src/xine-utils/ppcasm_string.h @@ -1,3 +1,3 @@ #include <stdlib.h> -static void *ppcasm_cacheable_memcpy(void *, const void *, size_t); -static void *ppcasm_memcpy(void *, const void *, size_t); +void *ppcasm_cacheable_memcpy(void *, const void *, size_t); +void *ppcasm_memcpy(void *, const void *, size_t); diff --git a/src/xine-utils/xineutils.h b/src/xine-utils/xineutils.h index 2e9c2edd8..c64e4f12a 100644 --- a/src/xine-utils/xineutils.h +++ b/src/xine-utils/xineutils.h @@ -1,23 +1,23 @@ /* * Copyright (C) 2000-2004 the xine project - * + * * This file is part of xine, a free video player. - * + * * xine is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. - * + * * xine is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * + * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA * - * $Id: xineutils.h,v 1.84 2004/04/06 19:20:17 valtri Exp $ + * $Id: xineutils.h,v 1.85 2004/04/25 17:43:58 manfredtremmel Exp $ * */ #ifndef XINEUTILS_H @@ -54,7 +54,7 @@ extern "C" { #ifdef HAVE_CONFIG_H #include "config.h" #endif - + #include <stdio.h> #include <string.h> @@ -70,7 +70,7 @@ extern "C" { int xine_mutex_init (xine_mutex_t *mutex, const pthread_mutexattr_t *mutexattr, char *id); - + int xine_mutex_lock (xine_mutex_t *mutex, char *who); int xine_mutex_unlock (xine_mutex_t *mutex, char *who); int xine_mutex_destroy (xine_mutex_t *mutex); @@ -84,7 +84,7 @@ extern "C" { * long constant values MUST be suffixed by LL and unsigned long long * values by ULL, lest they be truncated by the compiler) */ - + /* generic accelerations */ #define MM_ACCEL_MLIB 0x00000001 @@ -92,10 +92,13 @@ extern "C" { #define MM_ACCEL_X86_MMX 0x80000000 #define MM_ACCEL_X86_3DNOW 0x40000000 #define MM_ACCEL_X86_MMXEXT 0x20000000 -#define MM_ACCEL_X86_SSE 0x10000000 -#define MM_ACCEL_X86_SSE2 0x08000000 -/* powerpc accelerations */ +#define MM_ACCEL_X86_SSE 0x10000000 +#define MM_ACCEL_X86_SSE2 0x08000000 + +/* powerpc accelerations and features */ #define MM_ACCEL_PPC_ALTIVEC 0x04000000 +#define MM_ACCEL_PPC_CACHE32 0x02000000 + /* x86 compat defines */ #define MM_MMX MM_ACCEL_X86_MMX #define MM_3DNOW MM_ACCEL_X86_3DNOW @@ -624,7 +627,7 @@ void *xine_xmalloc(size_t size) __attribute__ ((__malloc__)); */ void *xine_xmalloc_aligned(size_t alignment, size_t size, void **base); -/* +/* * Get user home directory. */ const char *xine_get_homedir(void); @@ -679,14 +682,14 @@ static inline char *_private_strpbrk(const char *s, const char *accept) { #else static inline char *_private_strsep(char **stringp, const char *delim) { char *begin, *end; - + begin = *stringp; if(begin == NULL) return NULL; - + if(delim[0] == '\0' || delim[1] == '\0') { char ch = delim[0]; - + if(ch == '\0') end = NULL; else { @@ -700,14 +703,14 @@ static inline char *_private_strsep(char **stringp, const char *delim) { } else end = xine_strpbrk(begin, delim); - + if(end) { *end++ = '\0'; *stringp = end; } else *stringp = NULL; - + return begin; } #define xine_strsep _private_strsep @@ -720,7 +723,7 @@ static inline char *_private_strsep(char **stringp, const char *delim) { static inline void _private_setenv(const char *name, const char *val, int _xx) { int len = strlen(name) + strlen(val) + 2; char *env = (char *)malloc(len); - + sprintf(env, "%s%c%s", name, '=', val); putenv(env); /*free(env); The string passed to putenv must not be freed*/ @@ -731,8 +734,8 @@ static inline void _private_setenv(const char *name, const char *val, int _xx) { /* * Color Conversion Utility Functions * The following data structures and functions facilitate the conversion - * of RGB images to packed YUV (YUY2) images. There are also functions to - * convert from YUV9 -> YV12. All of the meaty details are written in + * of RGB images to packed YUV (YUY2) images. There are also functions to + * convert from YUV9 -> YV12. All of the meaty details are written in * color.c. */ @@ -763,16 +766,16 @@ extern void (*yuv411_to_yv12) unsigned char *v_src, int v_src_pitch, unsigned char *v_dest, int v_dest_pitch, int width, int height); extern void (*yv12_to_yuy2) - (unsigned char *y_src, int y_src_pitch, - unsigned char *u_src, int u_src_pitch, - unsigned char *v_src, int v_src_pitch, + (unsigned char *y_src, int y_src_pitch, + unsigned char *u_src, int u_src_pitch, + unsigned char *v_src, int v_src_pitch, unsigned char *yuy2_map, int yuy2_pitch, int width, int height, int progressive); extern void (*yuy2_to_yv12) (unsigned char *yuy2_map, int yuy2_pitch, - unsigned char *y_dst, int y_dst_pitch, - unsigned char *u_dst, int u_dst_pitch, - unsigned char *v_dst, int v_dst_pitch, + unsigned char *y_dst, int y_dst_pitch, + unsigned char *u_dst, int u_dst_pitch, + unsigned char *v_dst, int v_dst_pitch, int width, int height); #define SCALEFACTOR 65536 @@ -830,7 +833,7 @@ extern void yuy2_to_yuy2 (unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch, int width, int height); - + /* print a hexdump of the given data */ void xine_hexdump (const char *buf, int length); @@ -845,7 +848,7 @@ void xine_hexdump (const char *buf, int length); # define EXPECT_TRUE(x) __builtin_expect((x),1) # define EXPECT_FALSE(x) __builtin_expect((x),0) #endif - + #ifdef NDEBUG #define _x_assert(exp) \ do { \ @@ -938,7 +941,7 @@ void xine_hexdump (const char *buf, int length); printf( "%s", fmtargs ); \ } \ }while(0) -#else +#else #define llprintf(cat, ...) \ do{ \ if(cat){ \ @@ -964,7 +967,7 @@ void xine_hexdump (const char *buf, int length); xine_log(xine, XINE_LOG_TRACE, fmtargs); \ } \ } while(0) -#else +#else #define xprintf(xine, verbose, ...) \ do { \ if((xine) && (xine)->verbosity >= verbose){ \ @@ -1012,11 +1015,11 @@ void xine_hexdump (const char *buf, int length); typedef struct xine_node_s { struct xine_node_s *next, *prev; - + void *content; int priority; - + } xine_node_t; |