summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorManfred Tremmel <manfredtremmel@users.sourceforge.net>2004-04-25 17:43:58 +0000
committerManfred Tremmel <manfredtremmel@users.sourceforge.net>2004-04-25 17:43:58 +0000
commitacb7dc0f256afc24e875a168da989ef25d86b7b7 (patch)
treefacbe6040f87272a10af25f18bbcdd1f5babb75f
parent62ce4ec803f98fafe4b1b70a0ad27b3bf5751611 (diff)
downloadxine-lib-acb7dc0f256afc24e875a168da989ef25d86b7b7.tar.gz
xine-lib-acb7dc0f256afc24e875a168da989ef25d86b7b7.tar.bz2
addeed ppcasm_cacheable_memcpy fix from David Woodhouse <dwmw2@infradead.org>
CVS patchset: 6436 CVS date: 2004/04/25 17:43:58
-rw-r--r--src/xine-utils/cpu_accel.c19
-rw-r--r--src/xine-utils/memcpy.c2
-rw-r--r--src/xine-utils/ppcasm_string.S86
-rw-r--r--src/xine-utils/ppcasm_string.h4
-rw-r--r--src/xine-utils/xineutils.h67
5 files changed, 82 insertions, 96 deletions
diff --git a/src/xine-utils/cpu_accel.c b/src/xine-utils/cpu_accel.c
index 33a168aea..962eb7456 100644
--- a/src/xine-utils/cpu_accel.c
+++ b/src/xine-utils/cpu_accel.c
@@ -41,7 +41,7 @@
static uint32_t arch_accel (void)
{
uint32_t caps;
- /* No need to test for this on AMD64, we know what the
+ /* No need to test for this on AMD64, we know what the
platform has. */
caps = MM_ACCEL_X86_MMX | MM_ACCEL_X86_SSE | MM_ACCEL_X86_MMXEXT | MM_ACCEL_X86_SSE2;
@@ -112,8 +112,8 @@ static uint32_t arch_accel (void)
caps |= MM_ACCEL_X86_SSE | MM_ACCEL_X86_MMXEXT;
if (edx & 0x04000000) /* SSE2 */
- caps |= MM_ACCEL_X86_SSE2;
-
+ caps |= MM_ACCEL_X86_SSE2;
+
cpuid (0x80000000, eax, ebx, ecx, edx);
if (eax < 0x80000001) /* no extended capabilities */
return caps;
@@ -157,10 +157,13 @@ static void sigill_handler (int sig)
static uint32_t arch_accel (void)
{
+ /* FIXME: Autodetect cache line size via AUX ELF vector or otherwise */
+ uint32_t flags = MM_ACCEL_PPC_CACHE32;
+
signal (SIGILL, sigill_handler);
if (sigsetjmp (jmpbuf, 1)) {
- signal (SIGILL, SIG_DFL);
- return 0;
+ signal (SIGILL, SIG_DFL);
+ return flags;
}
canjump = 1;
@@ -171,7 +174,7 @@ static uint32_t arch_accel (void)
: "r" (-1));
signal (SIGILL, SIG_DFL);
- return MM_ACCEL_PPC_ALTIVEC;
+ return flags|MM_ACCEL_PPC_ALTIVEC;
}
#endif /* ARCH_PPC */
@@ -207,7 +210,7 @@ uint32_t xine_mm_accel (void)
if( accel & MM_ACCEL_X86_SSE ) {
void (*old_sigill_handler)(int);
- old_sigill_handler = signal (SIGILL, sigill_handler);
+ old_sigill_handler = signal (SIGILL, sigill_handler);
if (setjmp(sigill_return)) {
lprintf ("OS doesn't support SSE instructions.\n");
@@ -220,7 +223,7 @@ uint32_t xine_mm_accel (void)
}
#endif /* _MSC_VER */
#endif /* ARCH_X86 || ARCH_X86_64 */
-
+
if(getenv("XINE_NO_ACCEL")) {
accel = 0;
}
diff --git a/src/xine-utils/memcpy.c b/src/xine-utils/memcpy.c
index 46bdc8b53..8353d6ba1 100644
--- a/src/xine-utils/memcpy.c
+++ b/src/xine-utils/memcpy.c
@@ -403,7 +403,7 @@ static struct {
#endif /* ARCH_X86 */
#ifdef ARCH_PPC
{ "ppcasm_memcpy()", ppcasm_memcpy, 0, 0 },
- { "ppcasm_cacheable_memcpy()", ppcasm_cacheable_memcpy, 0, 0 },
+ { "ppcasm_cacheable_memcpy()", ppcasm_cacheable_memcpy, 0, MM_ACCEL_PPC_CACHE32 },
#endif /* ARCH_PPC */
{ NULL, NULL, 0, 0 }
};
diff --git a/src/xine-utils/ppcasm_string.S b/src/xine-utils/ppcasm_string.S
index ff4013e07..3091a9d68 100644
--- a/src/xine-utils/ppcasm_string.S
+++ b/src/xine-utils/ppcasm_string.S
@@ -16,16 +16,11 @@
#ifdef ARCH_PPC
-#warning Be forewarned - using PowerPC assembly
-
-#define __KERNEL__
-#define __ASSEMBLY__
+//#warning Be forewarned - using PowerPC assembly
#include "ppc_asm.tmpl"
-#include <linux/config.h>
-#include <asm/processor.h>
-#include <asm/cache.h>
-#include <asm/errno.h>
+#define N_FUN 36
+#define N_SO 100
#define COPY_16_BYTES \
lwz r7,4(r4); \
@@ -37,60 +32,42 @@
stw r9,12(r6); \
stwu r10,16(r6)
-#define COPY_16_BYTES_WITHEX(n) \
-8 ## n ## 0: \
- lwz r7,4(r4); \
-8 ## n ## 1: \
- lwz r8,8(r4); \
-8 ## n ## 2: \
- lwz r9,12(r4); \
-8 ## n ## 3: \
- lwzu r10,16(r4); \
-8 ## n ## 4: \
- stw r7,4(r6); \
-8 ## n ## 5: \
- stw r8,8(r6); \
-8 ## n ## 6: \
- stw r9,12(r6); \
-8 ## n ## 7: \
- stwu r10,16(r6)
+#define __stringify_1(x) #x
+#define __stringify(x) __stringify_1(x)
-#define COPY_16_BYTES_EXCODE(n) \
-9 ## n ## 0: \
- addi r5,r5,-(16 * n); \
- b 104f; \
-9 ## n ## 1: \
- addi r5,r5,-(16 * n); \
- b 105f; \
-.section __ex_table,"a"; \
- .align 2; \
- .long 8 ## n ## 0b,9 ## n ## 0b; \
- .long 8 ## n ## 1b,9 ## n ## 0b; \
- .long 8 ## n ## 2b,9 ## n ## 0b; \
- .long 8 ## n ## 3b,9 ## n ## 0b; \
- .long 8 ## n ## 4b,9 ## n ## 1b; \
- .long 8 ## n ## 5b,9 ## n ## 1b; \
- .long 8 ## n ## 6b,9 ## n ## 1b; \
- .long 8 ## n ## 7b,9 ## n ## 1b; \
- .text
+#define _GLOBFN(n)\
+ .stabs __stringify(n:F-1),N_FUN,0,0,n;\
+ .type n,@function; \
+ .globl n;\
+n:
+
+#define _SIZE(n) \
+ .size n, .-n
.text
+ .stabs "src/xine-utils",N_SO,0,0,.
+ .stabs "ppcasm_string.S",N_SO,0,0,.
+
+#warning FIXME: Get cache line sizes from /proc
+#define L1_CACHE_LINE_SIZE 32
CACHELINE_BYTES = 32
LG_CACHELINE_BYTES = 5
CACHELINE_MASK = (32 -1)
-
- .global ppcasm_cacheable_memcpy
-ppcasm_cacheable_memcpy:
-#if 0 /* this part causes "error loading shared library: unexpected reloc type
- 0x0b (???) */
+/*
+ * This version uses dcbz on the complete cache lines in the
+ * destination area to reduce memory traffic. This requires that
+ * the destination area is cacheable.
+ * We only use this version if the source and dest don't overlap.
+ * -- paulus.
+ */
+_GLOBFN(ppcasm_cacheable_memcpy)
add r7,r3,r5 /* test if the src & dst overlap */
add r8,r4,r5
cmplw 0,r4,r7
cmplw 1,r3,r8
crand 0,0,4 /* cr0.lt &= cr1.lt */
- blt ppcasm_memcpy /* if regions overlap */
-#endif
+ blt 66f //ppcasm_memcpy /* if regions overlap */
addi r4,r4,-4
addi r6,r3,-4
neg r0,r3
@@ -157,9 +134,10 @@ ppcasm_cacheable_memcpy:
bdnz 40b
65: blr
- .globl ppcasm_memcpy
-ppcasm_memcpy:
- srwi. r7,r5,3
+_SIZE(ppcasm_cacheable_memcpy)
+
+_GLOBFN(ppcasm_memcpy)
+66: srwi. r7,r5,3
addi r6,r3,-4
addi r4,r4,-4
beq 2f /* if less than 8 bytes to do */
@@ -199,4 +177,6 @@ ppcasm_memcpy:
mtctr r7
b 1b
+_SIZE(ppcasm_memcpy)
+
#endif /* ARCH_PPC */
diff --git a/src/xine-utils/ppcasm_string.h b/src/xine-utils/ppcasm_string.h
index b4b3f9431..174162477 100644
--- a/src/xine-utils/ppcasm_string.h
+++ b/src/xine-utils/ppcasm_string.h
@@ -1,3 +1,3 @@
#include <stdlib.h>
-static void *ppcasm_cacheable_memcpy(void *, const void *, size_t);
-static void *ppcasm_memcpy(void *, const void *, size_t);
+void *ppcasm_cacheable_memcpy(void *, const void *, size_t);
+void *ppcasm_memcpy(void *, const void *, size_t);
diff --git a/src/xine-utils/xineutils.h b/src/xine-utils/xineutils.h
index 2e9c2edd8..c64e4f12a 100644
--- a/src/xine-utils/xineutils.h
+++ b/src/xine-utils/xineutils.h
@@ -1,23 +1,23 @@
/*
* Copyright (C) 2000-2004 the xine project
- *
+ *
* This file is part of xine, a free video player.
- *
+ *
* xine is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
- *
+ *
* xine is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
- *
+ *
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
*
- * $Id: xineutils.h,v 1.84 2004/04/06 19:20:17 valtri Exp $
+ * $Id: xineutils.h,v 1.85 2004/04/25 17:43:58 manfredtremmel Exp $
*
*/
#ifndef XINEUTILS_H
@@ -54,7 +54,7 @@ extern "C" {
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
-
+
#include <stdio.h>
#include <string.h>
@@ -70,7 +70,7 @@ extern "C" {
int xine_mutex_init (xine_mutex_t *mutex, const pthread_mutexattr_t *mutexattr,
char *id);
-
+
int xine_mutex_lock (xine_mutex_t *mutex, char *who);
int xine_mutex_unlock (xine_mutex_t *mutex, char *who);
int xine_mutex_destroy (xine_mutex_t *mutex);
@@ -84,7 +84,7 @@ extern "C" {
* long constant values MUST be suffixed by LL and unsigned long long
* values by ULL, lest they be truncated by the compiler)
*/
-
+
/* generic accelerations */
#define MM_ACCEL_MLIB 0x00000001
@@ -92,10 +92,13 @@ extern "C" {
#define MM_ACCEL_X86_MMX 0x80000000
#define MM_ACCEL_X86_3DNOW 0x40000000
#define MM_ACCEL_X86_MMXEXT 0x20000000
-#define MM_ACCEL_X86_SSE 0x10000000
-#define MM_ACCEL_X86_SSE2 0x08000000
-/* powerpc accelerations */
+#define MM_ACCEL_X86_SSE 0x10000000
+#define MM_ACCEL_X86_SSE2 0x08000000
+
+/* powerpc accelerations and features */
#define MM_ACCEL_PPC_ALTIVEC 0x04000000
+#define MM_ACCEL_PPC_CACHE32 0x02000000
+
/* x86 compat defines */
#define MM_MMX MM_ACCEL_X86_MMX
#define MM_3DNOW MM_ACCEL_X86_3DNOW
@@ -624,7 +627,7 @@ void *xine_xmalloc(size_t size) __attribute__ ((__malloc__));
*/
void *xine_xmalloc_aligned(size_t alignment, size_t size, void **base);
-/*
+/*
* Get user home directory.
*/
const char *xine_get_homedir(void);
@@ -679,14 +682,14 @@ static inline char *_private_strpbrk(const char *s, const char *accept) {
#else
static inline char *_private_strsep(char **stringp, const char *delim) {
char *begin, *end;
-
+
begin = *stringp;
if(begin == NULL)
return NULL;
-
+
if(delim[0] == '\0' || delim[1] == '\0') {
char ch = delim[0];
-
+
if(ch == '\0')
end = NULL;
else {
@@ -700,14 +703,14 @@ static inline char *_private_strsep(char **stringp, const char *delim) {
}
else
end = xine_strpbrk(begin, delim);
-
+
if(end) {
*end++ = '\0';
*stringp = end;
}
else
*stringp = NULL;
-
+
return begin;
}
#define xine_strsep _private_strsep
@@ -720,7 +723,7 @@ static inline char *_private_strsep(char **stringp, const char *delim) {
static inline void _private_setenv(const char *name, const char *val, int _xx) {
int len = strlen(name) + strlen(val) + 2;
char *env = (char *)malloc(len);
-
+
sprintf(env, "%s%c%s", name, '=', val);
putenv(env);
/*free(env); The string passed to putenv must not be freed*/
@@ -731,8 +734,8 @@ static inline void _private_setenv(const char *name, const char *val, int _xx) {
/*
* Color Conversion Utility Functions
* The following data structures and functions facilitate the conversion
- * of RGB images to packed YUV (YUY2) images. There are also functions to
- * convert from YUV9 -> YV12. All of the meaty details are written in
+ * of RGB images to packed YUV (YUY2) images. There are also functions to
+ * convert from YUV9 -> YV12. All of the meaty details are written in
* color.c.
*/
@@ -763,16 +766,16 @@ extern void (*yuv411_to_yv12)
unsigned char *v_src, int v_src_pitch, unsigned char *v_dest, int v_dest_pitch,
int width, int height);
extern void (*yv12_to_yuy2)
- (unsigned char *y_src, int y_src_pitch,
- unsigned char *u_src, int u_src_pitch,
- unsigned char *v_src, int v_src_pitch,
+ (unsigned char *y_src, int y_src_pitch,
+ unsigned char *u_src, int u_src_pitch,
+ unsigned char *v_src, int v_src_pitch,
unsigned char *yuy2_map, int yuy2_pitch,
int width, int height, int progressive);
extern void (*yuy2_to_yv12)
(unsigned char *yuy2_map, int yuy2_pitch,
- unsigned char *y_dst, int y_dst_pitch,
- unsigned char *u_dst, int u_dst_pitch,
- unsigned char *v_dst, int v_dst_pitch,
+ unsigned char *y_dst, int y_dst_pitch,
+ unsigned char *u_dst, int u_dst_pitch,
+ unsigned char *v_dst, int v_dst_pitch,
int width, int height);
#define SCALEFACTOR 65536
@@ -830,7 +833,7 @@ extern void yuy2_to_yuy2
(unsigned char *src, int src_pitch,
unsigned char *dst, int dst_pitch,
int width, int height);
-
+
/* print a hexdump of the given data */
void xine_hexdump (const char *buf, int length);
@@ -845,7 +848,7 @@ void xine_hexdump (const char *buf, int length);
# define EXPECT_TRUE(x) __builtin_expect((x),1)
# define EXPECT_FALSE(x) __builtin_expect((x),0)
#endif
-
+
#ifdef NDEBUG
#define _x_assert(exp) \
do { \
@@ -938,7 +941,7 @@ void xine_hexdump (const char *buf, int length);
printf( "%s", fmtargs ); \
} \
}while(0)
-#else
+#else
#define llprintf(cat, ...) \
do{ \
if(cat){ \
@@ -964,7 +967,7 @@ void xine_hexdump (const char *buf, int length);
xine_log(xine, XINE_LOG_TRACE, fmtargs); \
} \
} while(0)
-#else
+#else
#define xprintf(xine, verbose, ...) \
do { \
if((xine) && (xine)->verbosity >= verbose){ \
@@ -1012,11 +1015,11 @@ void xine_hexdump (const char *buf, int length);
typedef struct xine_node_s {
struct xine_node_s *next, *prev;
-
+
void *content;
int priority;
-
+
} xine_node_t;