summaryrefslogtreecommitdiff
path: root/src/xine-utils/ppcasm_string.S
diff options
context:
space:
mode:
Diffstat (limited to 'src/xine-utils/ppcasm_string.S')
-rw-r--r--src/xine-utils/ppcasm_string.S86
1 files changed, 33 insertions, 53 deletions
diff --git a/src/xine-utils/ppcasm_string.S b/src/xine-utils/ppcasm_string.S
index ff4013e07..3091a9d68 100644
--- a/src/xine-utils/ppcasm_string.S
+++ b/src/xine-utils/ppcasm_string.S
@@ -16,16 +16,11 @@
#ifdef ARCH_PPC
-#warning Be forewarned - using PowerPC assembly
-
-#define __KERNEL__
-#define __ASSEMBLY__
+//#warning Be forewarned - using PowerPC assembly
#include "ppc_asm.tmpl"
-#include <linux/config.h>
-#include <asm/processor.h>
-#include <asm/cache.h>
-#include <asm/errno.h>
+#define N_FUN 36
+#define N_SO 100
#define COPY_16_BYTES \
lwz r7,4(r4); \
@@ -37,60 +32,42 @@
stw r9,12(r6); \
stwu r10,16(r6)
-#define COPY_16_BYTES_WITHEX(n) \
-8 ## n ## 0: \
- lwz r7,4(r4); \
-8 ## n ## 1: \
- lwz r8,8(r4); \
-8 ## n ## 2: \
- lwz r9,12(r4); \
-8 ## n ## 3: \
- lwzu r10,16(r4); \
-8 ## n ## 4: \
- stw r7,4(r6); \
-8 ## n ## 5: \
- stw r8,8(r6); \
-8 ## n ## 6: \
- stw r9,12(r6); \
-8 ## n ## 7: \
- stwu r10,16(r6)
+#define __stringify_1(x) #x
+#define __stringify(x) __stringify_1(x)
-#define COPY_16_BYTES_EXCODE(n) \
-9 ## n ## 0: \
- addi r5,r5,-(16 * n); \
- b 104f; \
-9 ## n ## 1: \
- addi r5,r5,-(16 * n); \
- b 105f; \
-.section __ex_table,"a"; \
- .align 2; \
- .long 8 ## n ## 0b,9 ## n ## 0b; \
- .long 8 ## n ## 1b,9 ## n ## 0b; \
- .long 8 ## n ## 2b,9 ## n ## 0b; \
- .long 8 ## n ## 3b,9 ## n ## 0b; \
- .long 8 ## n ## 4b,9 ## n ## 1b; \
- .long 8 ## n ## 5b,9 ## n ## 1b; \
- .long 8 ## n ## 6b,9 ## n ## 1b; \
- .long 8 ## n ## 7b,9 ## n ## 1b; \
- .text
+#define _GLOBFN(n)\
+ .stabs __stringify(n:F-1),N_FUN,0,0,n;\
+ .type n,@function; \
+ .globl n;\
+n:
+
+#define _SIZE(n) \
+ .size n, .-n
.text
+ .stabs "src/xine-utils",N_SO,0,0,.
+ .stabs "ppcasm_string.S",N_SO,0,0,.
+
+#warning FIXME: Get cache line sizes from /proc
+#define L1_CACHE_LINE_SIZE 32
CACHELINE_BYTES = 32
LG_CACHELINE_BYTES = 5
CACHELINE_MASK = (32 -1)
-
- .global ppcasm_cacheable_memcpy
-ppcasm_cacheable_memcpy:
-#if 0 /* this part causes "error loading shared library: unexpected reloc type
- 0x0b (???) */
+/*
+ * This version uses dcbz on the complete cache lines in the
+ * destination area to reduce memory traffic. This requires that
+ * the destination area is cacheable.
+ * We only use this version if the source and dest don't overlap.
+ * -- paulus.
+ */
+_GLOBFN(ppcasm_cacheable_memcpy)
add r7,r3,r5 /* test if the src & dst overlap */
add r8,r4,r5
cmplw 0,r4,r7
cmplw 1,r3,r8
crand 0,0,4 /* cr0.lt &= cr1.lt */
- blt ppcasm_memcpy /* if regions overlap */
-#endif
+ blt 66f //ppcasm_memcpy /* if regions overlap */
addi r4,r4,-4
addi r6,r3,-4
neg r0,r3
@@ -157,9 +134,10 @@ ppcasm_cacheable_memcpy:
bdnz 40b
65: blr
- .globl ppcasm_memcpy
-ppcasm_memcpy:
- srwi. r7,r5,3
+_SIZE(ppcasm_cacheable_memcpy)
+
+_GLOBFN(ppcasm_memcpy)
+66: srwi. r7,r5,3
addi r6,r3,-4
addi r4,r4,-4
beq 2f /* if less than 8 bytes to do */
@@ -199,4 +177,6 @@ ppcasm_memcpy:
mtctr r7
b 1b
+_SIZE(ppcasm_memcpy)
+
#endif /* ARCH_PPC */