summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSiggi Langauf <siggi@users.sourceforge.net>2002-05-28 12:44:02 +0000
committerSiggi Langauf <siggi@users.sourceforge.net>2002-05-28 12:44:02 +0000
commit9402cd07dba3632fea3273795b3223350742fe5c (patch)
tree673ea756d80c80baca98934c1eeef963fc577dc2
parent4ed51e17c6c8e3157d1ebaac8c06d4e0e5b26aa0 (diff)
downloadxine-lib-9402cd07dba3632fea3273795b3223350742fe5c.tar.gz
xine-lib-9402cd07dba3632fea3273795b3223350742fe5c.tar.bz2
PowerPC Assembly Patch by Andrew Patrikalakis <anrp@irulethe.net>
This seems to be _always_ using xine_fast_memcpy() in yuv2rgb.c, which _may_ be a bad idea. CVS patchset: 1961 CVS date: 2002/05/28 12:44:02
-rw-r--r--src/video_out/yuv2rgb.c32
-rw-r--r--src/xine-utils/Makefile.am2
-rw-r--r--src/xine-utils/memcpy.c15
-rw-r--r--src/xine-utils/ppcasm_string.S202
-rw-r--r--src/xine-utils/ppcasm_string.h3
5 files changed, 236 insertions, 18 deletions
diff --git a/src/video_out/yuv2rgb.c b/src/video_out/yuv2rgb.c
index e67743599..deb60d640 100644
--- a/src/video_out/yuv2rgb.c
+++ b/src/video_out/yuv2rgb.c
@@ -22,7 +22,7 @@
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
- * $Id: yuv2rgb.c,v 1.30 2002/04/29 23:32:00 jcdutton Exp $
+ * $Id: yuv2rgb.c,v 1.31 2002/05/28 12:44:02 siggi Exp $
*/
#include "config.h"
@@ -1384,7 +1384,7 @@ static void yuv2rgb_c_32 (yuv2rgb_t *this, uint8_t * _dst,
while (--dst_height > 0 && dy < 32768) {
- memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width*4);
+ xine_fast_memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width*4);
dy += this->step_dy;
_dst += this->rgb_stride;
@@ -1515,7 +1515,7 @@ static void yuv2rgb_c_24_rgb (yuv2rgb_t *this, uint8_t * _dst,
while (--dst_height > 0 && dy < 32768) {
- memcpy (_dst, _dst-this->rgb_stride, this->dest_width*3);
+ xine_fast_memcpy (_dst, _dst-this->rgb_stride, this->dest_width*3);
dy += this->step_dy;
_dst += this->rgb_stride;
@@ -1646,7 +1646,7 @@ static void yuv2rgb_c_24_bgr (yuv2rgb_t *this, uint8_t * _dst,
while (--dst_height > 0 && dy < 32768) {
- memcpy (_dst, _dst-this->rgb_stride, this->dest_width*3);
+ xine_fast_memcpy (_dst, _dst-this->rgb_stride, this->dest_width*3);
dy += this->step_dy;
_dst += this->rgb_stride;
@@ -1777,7 +1777,7 @@ static void yuv2rgb_c_16 (yuv2rgb_t *this, uint8_t * _dst,
while (--dst_height > 0 && dy < 32768) {
- memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width*2);
+ xine_fast_memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width*2);
dy += this->step_dy;
_dst += this->rgb_stride;
@@ -1907,7 +1907,7 @@ static void yuv2rgb_c_8 (yuv2rgb_t *this, uint8_t * _dst,
while (--dst_height > 0 && dy < 32768) {
- memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width);
+ xine_fast_memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width);
dy += this->step_dy;
_dst += this->rgb_stride;
@@ -2002,7 +2002,7 @@ static void yuv2rgb_c_gray (yuv2rgb_t *this, uint8_t * _dst,
while (--dst_height > 0 && dy < 32768) {
- memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width);
+ xine_fast_memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width);
dy += this->step_dy;
_dst += this->rgb_stride;
@@ -2019,7 +2019,7 @@ static void yuv2rgb_c_gray (yuv2rgb_t *this, uint8_t * _dst,
}
} else {
for (height = this->source_height; --height >= 0; ) {
- memcpy(_dst, _py, this->dest_width);
+ xine_fast_memcpy(_dst, _py, this->dest_width);
_dst += this->rgb_stride;
_py += this->y_stride;
}
@@ -2082,7 +2082,7 @@ static void yuv2rgb_c_palette (yuv2rgb_t *this, uint8_t * _dst,
while (--dst_height > 0 && dy < 32768) {
- memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width);
+ xine_fast_memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width);
dy += this->step_dy;
_dst += this->rgb_stride;
@@ -2557,7 +2557,7 @@ static void yuy22rgb_c_32 (yuv2rgb_t *this, uint8_t * _dst, uint8_t * _p)
while (--height > 0 && dy < 32768) {
- memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width*4);
+ xine_fast_memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width*4);
dy += this->step_dy;
_dst += this->rgb_stride;
@@ -2635,7 +2635,7 @@ static void yuy22rgb_c_24_rgb (yuv2rgb_t *this, uint8_t * _dst, uint8_t * _p)
while (--height > 0 && dy < 32768) {
- memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width*3);
+ xine_fast_memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width*3);
dy += this->step_dy;
_dst += this->rgb_stride;
@@ -2713,7 +2713,7 @@ static void yuy22rgb_c_24_bgr (yuv2rgb_t *this, uint8_t * _dst, uint8_t * _p)
while (--height > 0 && dy < 32768) {
- memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width*3);
+ xine_fast_memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width*3);
dy += this->step_dy;
_dst += this->rgb_stride;
@@ -2787,7 +2787,7 @@ static void yuy22rgb_c_16 (yuv2rgb_t *this, uint8_t * _dst, uint8_t * _p)
while (--height > 0 && dy < 32768) {
- memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width*2);
+ xine_fast_memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width*2);
dy += this->step_dy;
_dst += this->rgb_stride;
@@ -2861,7 +2861,7 @@ static void yuy22rgb_c_8 (yuv2rgb_t *this, uint8_t * _dst, uint8_t * _p)
while (--height > 0 && dy < 32768) {
- memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width);
+ xine_fast_memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width);
dy += this->step_dy;
_dst += this->rgb_stride;
@@ -2901,7 +2901,7 @@ static void yuy22rgb_c_gray (yuv2rgb_t *this, uint8_t * _dst, uint8_t * _p)
while (--height > 0 && dy < 32768) {
- memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width);
+ xine_fast_memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width);
dy += this->step_dy;
_dst += this->rgb_stride;
@@ -2978,7 +2978,7 @@ static void yuy22rgb_c_palette (yuv2rgb_t *this, uint8_t * _dst, uint8_t * _p)
while (--height > 0 && dy < 32768) {
- memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width);
+ xine_fast_memcpy (_dst, (uint8_t*)_dst-this->rgb_stride, this->dest_width);
dy += this->step_dy;
_dst += this->rgb_stride;
diff --git a/src/xine-utils/Makefile.am b/src/xine-utils/Makefile.am
index 333e21e13..34546034f 100644
--- a/src/xine-utils/Makefile.am
+++ b/src/xine-utils/Makefile.am
@@ -4,7 +4,7 @@
lib_LTLIBRARIES = libxineutils.la
-libxineutils_la_SOURCES = utils.c memcpy.c monitor.c cpu_accel.c xine_mutex.c xmllexer.c xmlparser.c
+libxineutils_la_SOURCES = utils.c ppcasm_string.S memcpy.c monitor.c cpu_accel.c xine_mutex.c xmllexer.c xmlparser.c
libxineutils_la_LDFLAGS = \
-version-info $(LT_CURRENT):$(LT_REVISION):$(LT_AGE)
diff --git a/src/xine-utils/memcpy.c b/src/xine-utils/memcpy.c
index 564a61cdd..a1e114b70 100644
--- a/src/xine-utils/memcpy.c
+++ b/src/xine-utils/memcpy.c
@@ -33,6 +33,11 @@
#include "config.h"
#endif
+#ifdef ARCH_PPC
+#include "ppcasm_string.h"
+#endif
+#include <sys/times.h>
+
#include <stdlib.h>
#include <string.h>
#include "xine_internal.h"
@@ -368,6 +373,10 @@ static struct {
{ "MMXEXT optimized memcpy()", mmx2_memcpy, 0, MM_MMXEXT },
{ "SSE optimized memcpy()", sse_memcpy, 0, MM_MMXEXT|MM_SSE },
#endif /* ARCH_X86 */
+#ifdef ARCH_PPC
+ { "ppcasm_memcpy()", ppcasm_memcpy, 0, 0 },
+ { "ppcasm_cacheable_memcpy()", ppcasm_cacheable_memcpy, 0, 0 },
+#endif /* ARCH_PPC */
{ NULL, NULL, 0, 0 }
};
@@ -383,7 +392,8 @@ static unsigned long long int rdtsc()
{
/* FIXME: implement an equivalent for using optimized memcpy on other
architectures */
- return 0;
+#warning "Using rdtsc() hack (via times(NULL))"
+ return times(NULL);
}
#endif
@@ -415,6 +425,9 @@ void xine_probe_fast_memcpy(config_values_t *config)
#ifdef ARCH_X86
"kernel", "mmx", "mmxext", "sse",
#endif
+#ifdef ARCH_PPC
+ "ppcasm_memcpy", "ppcasm_cacheable_memcpy",
+#endif
NULL};
config_flags = xine_mm_accel();
diff --git a/src/xine-utils/ppcasm_string.S b/src/xine-utils/ppcasm_string.S
new file mode 100644
index 000000000..87457ef6e
--- /dev/null
+++ b/src/xine-utils/ppcasm_string.S
@@ -0,0 +1,202 @@
+/*
+ * BK Id: SCCS/s.string.S 1.9 10/25/01 10:08:51 trini
+ */
+/*
+ * String handling functions for PowerPC.
+ *
+ * Copyright (C) 1996 Paul Mackerras.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include "../../config.h"
+
+#if ARCH_PPC
+
+#warning Be forewarned - using PowerPC assembly
+
+#define __KERNEL__
+#define __ASSEMBLY__
+
+#include "ppc_asm.tmpl"
+#include <linux/config.h>
+#include <asm/processor.h>
+#include <asm/cache.h>
+#include <asm/errno.h>
+
+#define COPY_16_BYTES \
+ lwz r7,4(r4); \
+ lwz r8,8(r4); \
+ lwz r9,12(r4); \
+ lwzu r10,16(r4); \
+ stw r7,4(r6); \
+ stw r8,8(r6); \
+ stw r9,12(r6); \
+ stwu r10,16(r6)
+
+#define COPY_16_BYTES_WITHEX(n) \
+8 ## n ## 0: \
+ lwz r7,4(r4); \
+8 ## n ## 1: \
+ lwz r8,8(r4); \
+8 ## n ## 2: \
+ lwz r9,12(r4); \
+8 ## n ## 3: \
+ lwzu r10,16(r4); \
+8 ## n ## 4: \
+ stw r7,4(r6); \
+8 ## n ## 5: \
+ stw r8,8(r6); \
+8 ## n ## 6: \
+ stw r9,12(r6); \
+8 ## n ## 7: \
+ stwu r10,16(r6)
+
+#define COPY_16_BYTES_EXCODE(n) \
+9 ## n ## 0: \
+ addi r5,r5,-(16 * n); \
+ b 104f; \
+9 ## n ## 1: \
+ addi r5,r5,-(16 * n); \
+ b 105f; \
+.section __ex_table,"a"; \
+ .align 2; \
+ .long 8 ## n ## 0b,9 ## n ## 0b; \
+ .long 8 ## n ## 1b,9 ## n ## 0b; \
+ .long 8 ## n ## 2b,9 ## n ## 0b; \
+ .long 8 ## n ## 3b,9 ## n ## 0b; \
+ .long 8 ## n ## 4b,9 ## n ## 1b; \
+ .long 8 ## n ## 5b,9 ## n ## 1b; \
+ .long 8 ## n ## 6b,9 ## n ## 1b; \
+ .long 8 ## n ## 7b,9 ## n ## 1b; \
+ .text
+
+ .text
+
+CACHELINE_BYTES = L1_CACHE_LINE_SIZE
+LG_CACHELINE_BYTES = LG_L1_CACHE_LINE_SIZE
+CACHELINE_MASK = (L1_CACHE_LINE_SIZE-1)
+
+ .global ppcasm_cacheable_memcpy
+ppcasm_cacheable_memcpy:
+#if 0 /* this part causes "error loading shared library: unexpected reloc type
+ 0x0b (???) */
+ add r7,r3,r5 /* test if the src & dst overlap */
+ add r8,r4,r5
+ cmplw 0,r4,r7
+ cmplw 1,r3,r8
+ crand 0,0,4 /* cr0.lt &= cr1.lt */
+ blt ppcasm_memcpy /* if regions overlap */
+#endif
+ addi r4,r4,-4
+ addi r6,r3,-4
+ neg r0,r3
+ andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */
+ beq 58f
+
+ cmplw 0,r5,r0 /* is this more than total to do? */
+ blt 63f /* if not much to do */
+ andi. r8,r0,3 /* get it word-aligned first */
+ subf r5,r0,r5
+ mtctr r8
+ beq+ 61f
+70: lbz r9,4(r4) /* do some bytes */
+ stb r9,4(r6)
+ addi r4,r4,1
+ addi r6,r6,1
+ bdnz 70b
+61: srwi. r0,r0,2
+ mtctr r0
+ beq 58f
+72: lwzu r9,4(r4) /* do some words */
+ stwu r9,4(r6)
+ bdnz 72b
+
+58: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */
+ clrlwi r5,r5,32-LG_CACHELINE_BYTES
+ li r11,4
+ mtctr r0
+ beq 63f
+53:
+#if !defined(CONFIG_8xx)
+ dcbz r11,r6
+#endif
+ COPY_16_BYTES
+#if L1_CACHE_LINE_SIZE >= 32
+ COPY_16_BYTES
+#if L1_CACHE_LINE_SIZE >= 64
+ COPY_16_BYTES
+ COPY_16_BYTES
+#if L1_CACHE_LINE_SIZE >= 128
+ COPY_16_BYTES
+ COPY_16_BYTES
+ COPY_16_BYTES
+ COPY_16_BYTES
+#endif
+#endif
+#endif
+ bdnz 53b
+
+63: srwi. r0,r5,2
+ mtctr r0
+ beq 64f
+30: lwzu r0,4(r4)
+ stwu r0,4(r6)
+ bdnz 30b
+
+64: andi. r0,r5,3
+ mtctr r0
+ beq+ 65f
+40: lbz r0,4(r4)
+ stb r0,4(r6)
+ addi r4,r4,1
+ addi r6,r6,1
+ bdnz 40b
+65: blr
+
+ .globl ppcasm_memcpy
+ppcasm_memcpy:
+ srwi. r7,r5,3
+ addi r6,r3,-4
+ addi r4,r4,-4
+ beq 2f /* if less than 8 bytes to do */
+ andi. r0,r6,3 /* get dest word aligned */
+ mtctr r7
+ bne 5f
+1: lwz r7,4(r4)
+ lwzu r8,8(r4)
+ stw r7,4(r6)
+ stwu r8,8(r6)
+ bdnz 1b
+ andi. r5,r5,7
+2: cmplwi 0,r5,4
+ blt 3f
+ lwzu r0,4(r4)
+ addi r5,r5,-4
+ stwu r0,4(r6)
+3: cmpwi 0,r5,0
+ beqlr
+ mtctr r5
+ addi r4,r4,3
+ addi r6,r6,3
+4: lbzu r0,1(r4)
+ stbu r0,1(r6)
+ bdnz 4b
+ blr
+5: subfic r0,r0,4
+ mtctr r0
+6: lbz r7,4(r4)
+ addi r4,r4,1
+ stb r7,4(r6)
+ addi r6,r6,1
+ bdnz 6b
+ subf r5,r0,r5
+ rlwinm. r7,r5,32-3,3,31
+ beq 2b
+ mtctr r7
+ b 1b
+
+#endif /* ARCH_PPC */
diff --git a/src/xine-utils/ppcasm_string.h b/src/xine-utils/ppcasm_string.h
new file mode 100644
index 000000000..b4b3f9431
--- /dev/null
+++ b/src/xine-utils/ppcasm_string.h
@@ -0,0 +1,3 @@
+#include <stdlib.h>
+static void *ppcasm_cacheable_memcpy(void *, const void *, size_t);
+static void *ppcasm_memcpy(void *, const void *, size_t);