diff options
author | scop <scop> | 2005-03-14 13:53:29 +0000 |
---|---|---|
committer | scop <scop> | 2005-03-14 13:53:29 +0000 |
commit | 199be12e1e23d7d741e49ea3495cf9dc5f44bdbb (patch) | |
tree | e91213a087a59bd333745340c8ca2f732472c0cd /dxr3memcpy.c | |
parent | b7225753e91a5500a9624ce7de9365d9f8523106 (diff) | |
download | vdr-plugin-dxr3-199be12e1e23d7d741e49ea3495cf9dc5f44bdbb.tar.gz vdr-plugin-dxr3-199be12e1e23d7d741e49ea3495cf9dc5f44bdbb.tar.bz2 |
More GCC 3.4+ compilation fixes.
Diffstat (limited to 'dxr3memcpy.c')
-rw-r--r-- | dxr3memcpy.c | 108 |
1 files changed, 49 insertions, 59 deletions
diff --git a/dxr3memcpy.c b/dxr3memcpy.c index 0d59e09..30c857d 100644 --- a/dxr3memcpy.c +++ b/dxr3memcpy.c @@ -34,20 +34,12 @@ #include "dxr3log.h" #include "dxr3cpu.h" #include "dxr3memcpy.h" -#include <sys/times.h> -#include <limits.h> - -// ================================== -//! our function pointer void *(* dxr3_memcpy)(void *to, const void *from, size_t len); -#ifdef __i386__ +#if defined(ARCH_X86) || defined(ARCH_X86_64) // ================================== // for small memory blocks (<256 bytes) this version is faster -#define small_memcpy(to,from,n) { register unsigned long int dummy; __asm__ __volatile__("rep; movsb":"=&D"(to), "=&S"(from), "=&c"(dummy) :"0" (to), "1" (from),"2" (n) : "memory"); } -/* -// -- dosn't compile with 2.95 gcc -- #define small_memcpy(to,from,n)\ {\ register unsigned long int dummy;\ @@ -57,9 +49,9 @@ __asm__ __volatile__(\ :"0" (to), "1" (from),"2" (n)\ : "memory");\ } -*/ + // ================================== -//! linux kernel __memcpy (from: /include/asm/string.h) +// linux kernel __memcpy (from: /include/asm/string.h) static __inline__ void * __memcpy ( void * to, const void * from, @@ -94,6 +86,9 @@ int d0, d1, d2; #define MIN_LEN 0x40 /* 64-byte blocks */ +// Test for GCC > 3.2.0 +#if GCC_VERSION > 30200 + // ================================== /* SSE note: i tried to move 128 bytes a time instead of 64 but it didn't make any measureable difference. i'm using 64 for the sake of @@ -146,9 +141,9 @@ static void * sse_memcpy(void * to, const void * from, size_t len) "movntps %%xmm1, 16(%1)\n" "movntps %%xmm2, 32(%1)\n" "movntps %%xmm3, 48(%1)\n" - : : "r" (from), "r" (to) : "memory"); - ((const unsigned char *)from)+=64; - ((unsigned char *)to)+=64; + :: "r" (from), "r" (to) : "memory"); + from = ((const unsigned char *)from) + 64; + to = ((unsigned char *)to) + 64; } else /* @@ -169,15 +164,15 @@ static void * sse_memcpy(void * to, const void * from, size_t len) "movntps %%xmm1, 16(%1)\n" "movntps %%xmm2, 32(%1)\n" "movntps %%xmm3, 48(%1)\n" - : : "r" (from), "r" (to) : "memory"); - ((const unsigned char *)from)+=64; - ((unsigned char *)to)+=64; + :: "r" (from), "r" (to) : "memory"); + from = ((const unsigned char *)from) + 64; + to = ((unsigned char *)to) + 64; } /* since movntq is weakly-ordered, a "sfence" * is needed to become ordered again. */ - __asm__ __volatile__ ("sfence": : :"memory"); + __asm__ __volatile__ ("sfence":::"memory"); /* enables to use FPU */ - __asm__ __volatile__ ("emms": : :"memory"); + __asm__ __volatile__ ("emms":::"memory"); } /* * Now do the tail of the block @@ -225,11 +220,11 @@ static void * mmx_memcpy(void * to, const void * from, size_t len) "movq %%mm5, 40(%1)\n" "movq %%mm6, 48(%1)\n" "movq %%mm7, 56(%1)\n" - : : "r" (from), "r" (to) : "memory"); - ((const unsigned char *)from)+=64; - ((unsigned char *)to)+=64; + :: "r" (from), "r" (to) : "memory"); + from = ((const unsigned char *)from) + 64; + to = ((unsigned char *)to) + 64; } - __asm__ __volatile__ ("emms": : :"memory"); + __asm__ __volatile__ ("emms":::"memory"); } /* * Now do the tail of the block @@ -257,7 +252,7 @@ static void * mmx2_memcpy(void * to, const void * from, size_t len) " prefetchnta 224(%0)\n" " prefetchnta 256(%0)\n" " prefetchnta 288(%0)\n" - : : "r" (from) ); + :: "r" (from) ); if(len >= MIN_LEN) { @@ -293,14 +288,14 @@ static void * mmx2_memcpy(void * to, const void * from, size_t len) "movntq %%mm5, 40(%1)\n" "movntq %%mm6, 48(%1)\n" "movntq %%mm7, 56(%1)\n" - : : "r" (from), "r" (to) : "memory"); - ((const unsigned char *)from)+=64; - ((unsigned char *)to)+=64; + :: "r" (from), "r" (to) : "memory"); + from = ((const unsigned char *)from) + 64; + to = ((unsigned char *)to) + 64; } /* since movntq is weakly-ordered, a "sfence" * is needed to become ordered again. */ - __asm__ __volatile__ ("sfence": : :"memory"); - __asm__ __volatile__ ("emms": : :"memory"); + __asm__ __volatile__ ("sfence":::"memory"); + __asm__ __volatile__ ("emms":::"memory"); } /* * Now do the tail of the block @@ -309,15 +304,17 @@ static void * mmx2_memcpy(void * to, const void * from, size_t len) return retval; } +#endif /*GCC_VERSION > 30200*/ + // ================================== static void *linux_kernel_memcpy(void *to, const void *from, size_t len) { return __memcpy(to,from,len); } -#endif /*__i386__*/ +#endif /*ARCH_X86/ARCH_X86_64*/ // ================================== -//! constr. +// constr. cDxr3MemcpyBench::cDxr3MemcpyBench(uint32_t config_flags) { // @@ -333,7 +330,7 @@ cDxr3MemcpyBench::cDxr3MemcpyBench(uint32_t config_flags) routine.cpu_require = 0; m_methods.push_back(routine); - #ifdef __i386__ + #if defined(ARCH_X86) || defined(ARCH_X86_64) // linux_kernel_memcpy routine.name = "linux_kernel_memcpy()"; @@ -341,6 +338,9 @@ cDxr3MemcpyBench::cDxr3MemcpyBench(uint32_t config_flags) routine.cpu_require = 0; m_methods.push_back(routine); + // Test for GCC > 3.2.0 + # if GCC_VERSION > 30200 + // MMX optimized memcpy() routine.name = "MMX optimized memcpy()"; routine.function = mmx_memcpy; @@ -353,7 +353,7 @@ cDxr3MemcpyBench::cDxr3MemcpyBench(uint32_t config_flags) routine.cpu_require = CC_MMXEXT; m_methods.push_back(routine); - # ifndef __FreeBSD__ + # ifndef __FreeBSD__ // SSE optimized memcpy() routine.name = "SSE optimized memcpy()"; @@ -361,8 +361,9 @@ cDxr3MemcpyBench::cDxr3MemcpyBench(uint32_t config_flags) routine.cpu_require = CC_MMXEXT|CC_SSE; m_methods.push_back(routine); - # endif /*__FreeBSD__*/ - #endif /*__i386__*/ + # endif /*__FreeBSD__*/ + # endif /*GCC_VERSION > 30200*/ + #endif /*ARCH_X86/ARCH_X86_64*/ // @@ -394,13 +395,13 @@ cDxr3MemcpyBench::cDxr3MemcpyBench(uint32_t config_flags) } // count 100 runs of the memcpy function - t = Rdtsc(config_flags); + t = Rdtsc(); for (j = 0; j < 50; j++) { m_methods[i].function(buf2,buf1,BUFSIZE); m_methods[i].function(buf1,buf2,BUFSIZE); } - t = Rdtsc(config_flags) - t; + t = Rdtsc() - t; m_methods[i].time = t; @@ -422,27 +423,16 @@ cDxr3MemcpyBench::cDxr3MemcpyBench(uint32_t config_flags) } // ================================== -//! needed for exact timing -#ifdef __i386__ -unsigned long long int cDxr3MemcpyBench::Rdtsc(uint32_t config_flags) -{ - // we need rdtsc support - if (config_flags && CC_MMX) - { - unsigned long long int x; - __asm__ volatile (".byte 0x0f, 0x31" : "=A" (x)); - return x; - } - else - { - return times(NULL); - } - -} -#else -unsigned long long int cDxr3MemcpyBench::Rdtsc(uint32_t config_flags) +// neede for exact timing +unsigned long long int cDxr3MemcpyBench::Rdtsc() { - struct tms tp; - return times(&tp); + #ifdef ARCH_X86 + unsigned long long int x; + __asm__ volatile (".byte 0x0f, 0x31" : "=A" (x)); + return x; + #else + /* FIXME: implement an equivalent for using optimized memcpy on other + architectures */ + return 0; + #endif } -#endif |