Remove optimized memcpy stuff.

author: scop <scop> 2005-05-17 20:26:00 +0000
committer: scop <scop> 2005-05-17 20:26:00 +0000
commit: 9baae216f02485ac6b0b1c4df122e2c75ac75b4a (patch)
tree: 46379fee8c98f8ad56d77df8aeab5eccff453cd5 /dxr3memcpy.c
parent: db76939b99be30bb77a14b38ace07a5774c30c96 (diff)
download: vdr-plugin-dxr3-9baae216f02485ac6b0b1c4df122e2c75ac75b4a.tar.gz
vdr-plugin-dxr3-9baae216f02485ac6b0b1c4df122e2c75ac75b4a.tar.bz2
1 files changed, 0 insertions, 455 deletions
diff --git a/dxr3memcpy.c b/dxr3memcpy.c
deleted file mode 100644
index 19f7fda..0000000
--- a/dxr3memcpy.c
+++ /dev/null
@@ -1,455 +0,0 @@
-/*
- * dxr3memcpy.c
- *
- * Copyright (C) 2004 Christian Gmeiner
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public License
- * as published by the Free Software Foundation; either version 2.1
- * of the License, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
- *
- *
- * Orgianal from xine:
- *
- * Copyright (C) 2001-2003 the xine project
- *
- * This file is part of xine, a free video player.
- *
- * These are the MMX/MMX2/SSE optimized versions of memcpy
- *
- * This code was adapted from Linux Kernel sources by Nick Kurshev to
- * the mplayer program. (http://mplayer.sourceforge.net)
- *
- */
-
-#include "dxr3log.h"
-#include "dxr3cpu.h"
-#include "dxr3memcpy.h"
-#include <sys/times.h>
-#include <limits.h>
-
-
-// ==================================
-//! our function pointer
-void *(* dxr3_memcpy)(void *to, const void *from, size_t len);
-
-#if defined(__i386__) || defined(__x86_64__)
-// ==================================
-// for small memory blocks (<256 bytes) this version is faster
-#define small_memcpy(to,from,n) { register unsigned long int dummy; __asm__ __volatile__("rep; movsb":"=&D"(to), "=&S"(from), "=&c"(dummy) :"0" (to), "1" (from),"2" (n) : "memory"); }
-/*
-// -- doesn't compile with 2.95 gcc --
-#define small_memcpy(to,from,n)\
-{\
-register unsigned long int dummy;\
-__asm__ __volatile__(\
-  "rep; movsb"\
-  :"=&D"(to), "=&S"(from), "=&c"(dummy)\
-  :"0" (to), "1" (from),"2" (n)\
-  : "memory");\
-}
-*/
-// ==================================
-//! linux kernel __memcpy (from: /include/asm/string.h)
-static __inline__ void * __memcpy (void * to, const void * from, size_t n)
-{
-    int d0, d1, d2;
-
-    if (n < 4)
-    {
-	small_memcpy(to, from, n);
-    }
-    else
-	__asm__ __volatile__(
-	    "rep ; movsl\n\t"
-	    "testb $2,%b4\n\t"
-	    "je 1f\n\t"
-	    "movsw\n"
-	    "1:\ttestb $1,%b4\n\t"
-	    "je 2f\n\t"
-	    "movsb\n"
-	    "2:"
-	    : "=&c" (d0), "=&D" (d1), "=&S" (d2)
-	    :"0" (n/4), "q" (n),"1" ((long) to),"2" ((long) from)
-	    : "memory");
-
-    return (to);
-}
-
-#define SSE_MMREG_SIZE 16
-#define MMX_MMREG_SIZE 8
-
-#define MMX1_MIN_LEN 0x800  /* 2K blocks */
-#define MIN_LEN 0x40        /* 64-byte blocks */
-
-
-// ==================================
-/* SSE note: i tried to move 128 bytes a time instead of 64 but it
-   didn't make any measureable difference. i'm using 64 for the sake of
-   simplicity. [MF] */
-static void * sse_memcpy(void * to, const void * from, size_t len)
-{
-    void *retval;
-    size_t i;
-    retval = to;
-
-    /* PREFETCH has effect even for MOVSB instruction ;) */
-    __asm__ __volatile__ (
-	"   prefetchnta (%0)\n"
-	"   prefetchnta 32(%0)\n"
-	"   prefetchnta 64(%0)\n"
-	"   prefetchnta 96(%0)\n"
-	"   prefetchnta 128(%0)\n"
-	"   prefetchnta 160(%0)\n"
-	"   prefetchnta 192(%0)\n"
-	"   prefetchnta 224(%0)\n"
-	"   prefetchnta 256(%0)\n"
-	"   prefetchnta 288(%0)\n"
-	: : "r" (from) );
-
-    if (len >= MIN_LEN)
-    {
-	register unsigned long int delta;
-	/* Align destinition to MMREG_SIZE -boundary */
-	delta = ((unsigned long int)to) & (SSE_MMREG_SIZE - 1);
-	if (delta)
-	{
-	    delta = SSE_MMREG_SIZE - delta;
-	    len -= delta;
-	    small_memcpy(to, from, delta);
-	}
-	i = len >> 6; /* len/64 */
-	len&=63;
-	if (((unsigned long)from) & 15) /* if SRC is misaligned */
-	{
-	    for( ; i > 0; i--)
-	    {
-		__asm__ __volatile__ (
-		    "prefetchnta 320(%0)\n"
-		    "prefetchnta 352(%0)\n"
-		    "movups (%0), %%xmm0\n"
-		    "movups 16(%0), %%xmm1\n"
-		    "movups 32(%0), %%xmm2\n"
-		    "movups 48(%0), %%xmm3\n"
-		    "movntps %%xmm0, (%1)\n"
-		    "movntps %%xmm1, 16(%1)\n"
-		    "movntps %%xmm2, 32(%1)\n"
-		    "movntps %%xmm3, 48(%1)\n"
-		    : : "r" (from), "r" (to) : "memory");
-		from = ((const unsigned char *)from) + 64;
-		to = ((unsigned char *)to) + 64;
-	    }
-	}
-	else
-	{
-	    /*
-	      Only if SRC is aligned on 16-byte boundary.
-	      It allows to use movaps instead of movups, which required data
-	      to be aligned or a general-protection exception (#GP) is generated.
-	    */
-	    for( ; i > 0; i--)
-	    {
-		__asm__ __volatile__ (
-		    "prefetchnta 320(%0)\n"
-		    "prefetchnta 352(%0)\n"
-		    "movaps (%0), %%xmm0\n"
-		    "movaps 16(%0), %%xmm1\n"
-		    "movaps 32(%0), %%xmm2\n"
-		    "movaps 48(%0), %%xmm3\n"
-		    "movntps %%xmm0, (%1)\n"
-		    "movntps %%xmm1, 16(%1)\n"
-		    "movntps %%xmm2, 32(%1)\n"
-		    "movntps %%xmm3, 48(%1)\n"
-		    : : "r" (from), "r" (to) : "memory");
-		from = ((const unsigned char *)from) + 64;
-		to = ((unsigned char *)to) + 64;
-	    }
-	}
-	/* since movntq is weakly-ordered, a "sfence"
-	 * is needed to become ordered again. */
-	__asm__ __volatile__ ("sfence": : :"memory");
-	/* enables to use FPU */
-	__asm__ __volatile__ ("emms": : :"memory");
-    }
-    /*
-     *	Now do the tail of the block
-     */
-    if(len) __memcpy(to, from, len);
-    return retval;
-}
-
-// ==================================
-static void * mmx_memcpy(void * to, const void * from, size_t len)
-{
-    void *retval;
-    size_t i;
-    retval = to;
-
-    if(len >= MMX1_MIN_LEN)
-    {
-	register unsigned long int delta;
-	/* Align destinition to MMREG_SIZE -boundary */
-	delta = ((unsigned long int)to) & (MMX_MMREG_SIZE - 1);
-	if (delta)
-	{
-	    delta = MMX_MMREG_SIZE - delta;
-	    len -= delta;
-	    small_memcpy(to, from, delta);
-	}
-	i = len >> 6; /* len/64 */
-	len&=63;
-	for( ; i > 0; i--)
-	{
-	    __asm__ __volatile__ (
-		"movq (%0), %%mm0\n"
-		"movq 8(%0), %%mm1\n"
-		"movq 16(%0), %%mm2\n"
-		"movq 24(%0), %%mm3\n"
-		"movq 32(%0), %%mm4\n"
-		"movq 40(%0), %%mm5\n"
-		"movq 48(%0), %%mm6\n"
-		"movq 56(%0), %%mm7\n"
-		"movq %%mm0, (%1)\n"
-		"movq %%mm1, 8(%1)\n"
-		"movq %%mm2, 16(%1)\n"
-		"movq %%mm3, 24(%1)\n"
-		"movq %%mm4, 32(%1)\n"
-		"movq %%mm5, 40(%1)\n"
-		"movq %%mm6, 48(%1)\n"
-		"movq %%mm7, 56(%1)\n"
-		: : "r" (from), "r" (to) : "memory");
-	    from = ((const unsigned char *)from) + 64;
-	    to = ((unsigned char *)to) + 64;
-	}
-	__asm__ __volatile__ ("emms": : :"memory");
-    }
-    /*
-     *	Now do the tail of the block
-     */
-    if(len) __memcpy(to, from, len);
-    return retval;
-}
-
-// ==================================
-static void * mmx2_memcpy(void * to, const void * from, size_t len)
-{
-    void *retval;
-    size_t i;
-    retval = to;
-
-    /* PREFETCH has effect even for MOVSB instruction ;) */
-    __asm__ __volatile__ (
-	"   prefetchnta (%0)\n"
-	"   prefetchnta 32(%0)\n"
-	"   prefetchnta 64(%0)\n"
-	"   prefetchnta 96(%0)\n"
-	"   prefetchnta 128(%0)\n"
-	"   prefetchnta 160(%0)\n"
-	"   prefetchnta 192(%0)\n"
-	"   prefetchnta 224(%0)\n"
-	"   prefetchnta 256(%0)\n"
-	"   prefetchnta 288(%0)\n"
-	: : "r" (from) );
-
-    if (len >= MIN_LEN)
-    {
-	register unsigned long int delta;
-	/* Align destinition to MMREG_SIZE -boundary */
-	delta = ((unsigned long int)to) & (MMX_MMREG_SIZE - 1);
-	if (delta)
-	{
-	    delta = MMX_MMREG_SIZE - delta;
-	    len -= delta;
-	    small_memcpy(to, from, delta);
-	}
-	i = len >> 6; /* len/64 */
-	len&=63;
-	for( ; i > 0; i--)
-	{
-	    __asm__ __volatile__ (
-		"prefetchnta 320(%0)\n"
-		"prefetchnta 352(%0)\n"
-		"movq (%0), %%mm0\n"
-		"movq 8(%0), %%mm1\n"
-		"movq 16(%0), %%mm2\n"
-		"movq 24(%0), %%mm3\n"
-		"movq 32(%0), %%mm4\n"
-		"movq 40(%0), %%mm5\n"
-		"movq 48(%0), %%mm6\n"
-		"movq 56(%0), %%mm7\n"
-		"movntq %%mm0, (%1)\n"
-		"movntq %%mm1, 8(%1)\n"
-		"movntq %%mm2, 16(%1)\n"
-		"movntq %%mm3, 24(%1)\n"
-		"movntq %%mm4, 32(%1)\n"
-		"movntq %%mm5, 40(%1)\n"
-		"movntq %%mm6, 48(%1)\n"
-		"movntq %%mm7, 56(%1)\n"
-		: : "r" (from), "r" (to) : "memory");
-	    from = ((const unsigned char *)from) + 64;
-	    to = ((unsigned char *)to) + 64;
-	}
-	/* since movntq is weakly-ordered, a "sfence"
-	 * is needed to become ordered again. */
-	__asm__ __volatile__ ("sfence": : :"memory");
-	__asm__ __volatile__ ("emms": : :"memory");
-    }
-    /*
-     *	Now do the tail of the block
-     */
-    if(len) __memcpy(to, from, len);
-    return retval;
-}
-
-// ==================================
-static void *linux_kernel_memcpy(void *to, const void *from, size_t len)
-{
-    return __memcpy(to, from, len);
-}
-#endif /* __i386__ || __x86_64__ */
-
-
-// ==================================
-//! constr.
-cDxr3MemcpyBench::cDxr3MemcpyBench(uint32_t config_flags)
-{
-    //
-    // add all available memcpy routines
-    //
-
-    memcpy_routine routine;
-
-    // glibc memcpy
-    routine.name = "glibc memcpy()";
-    routine.function = memcpy;
-    routine.time = 0;
-    routine.cpu_require = 0;
-    m_methods.push_back(routine);
-
-#if defined(__i386__) || defined(__x86_64__)
-
-    // linux_kernel_memcpy
-    routine.name = "linux_kernel_memcpy()";
-    routine.function = linux_kernel_memcpy;
-    routine.cpu_require = 0;
-    m_methods.push_back(routine);
-
-    // MMX optimized memcpy()
-    routine.name = "MMX optimized memcpy()";
-    routine.function = mmx_memcpy;
-    routine.cpu_require = CC_MMX;
-    m_methods.push_back(routine);
-
-    // MMXEXT optimized memcpy()
-    routine.name = "MMXEXT optimized memcpy()";
-    routine.function = mmx2_memcpy;
-    routine.cpu_require = CC_MMXEXT;
-    m_methods.push_back(routine);
-
-#ifndef __FreeBSD__
-
-    // SSE optimized memcpy()
-    routine.name = "SSE optimized memcpy()";
-    routine.function = sse_memcpy;
-    routine.cpu_require = CC_MMXEXT|CC_SSE;
-    m_methods.push_back(routine);
-
-#endif /* not __FreeBSD__ */
-#endif /* __i386__ || __x86_64__ */
-
-    //
-    // run benchmarking
-    //
-
-    unsigned long long t = 0;
-    void *buf1, *buf2;
-    int j, best = -1;
-
-    if ((buf1 = malloc(BUFSIZE)) == NULL)
-	return;
-
-    if ((buf2 = malloc(BUFSIZE)) == NULL)
-    {
-	free(buf1);
-	return;
-    }
-
-    cLog::Instance() <<
-	"\nBenchmarking memcpy() methods (smaller is better):\n";
-    // make sure buffers are present on physical memory
-    memcpy(buf1, buf2, BUFSIZE);
-
-    for (size_t i = 0; i < m_methods.size(); i++)
-    {
-	if ((config_flags & m_methods[i].cpu_require) != m_methods[i].cpu_require)
-	{
-	    continue;
-	}
-
-	// count 100 runs of the memcpy function
-	t = Rdtsc(config_flags);
-	for (j = 0; j < 50; j++)
-	{
-	    m_methods[i].function(buf2, buf1, BUFSIZE);
-	    m_methods[i].function(buf1, buf2, BUFSIZE);
-	}
-	t = Rdtsc(config_flags) - t;
-
-	m_methods[i].time = t;
-
-	cLog::Instance() << m_methods[i].name.c_str() << ": "
-			 << (unsigned long long)t << "\n";
-
-	if (best == -1 || t < m_methods[best].time)
-	{
-	    best = i;
-	}
-    }
-    cLog::Instance() << "\nBest one: "
-		     << m_methods[best].name.c_str() << "\n\n";
-
-    dxr3_memcpy = m_methods[best].function;
-
-
-    // clear unused memory
-    free(buf1);
-    free(buf2);
-}
-
-// ==================================
-//! needed for exact timing
-unsigned long long int cDxr3MemcpyBench::Rdtsc(uint32_t config_flags)
-{
-#if defined(__i386__) || defined(__x86_64__)
-    // we need rdtsc support
-    if (config_flags && CC_MMX)
-    {
-	unsigned long long int x;
-	__asm__ volatile (".byte 0x0f, 0x31" : "=A" (x));
-	return x;
-    }
-    else
-    {
-	return times(NULL);
-    }
-#else
-    struct tms tp;
-    return times(&tp);
-#endif /* __i386__ || __x86_64__ */
-}
-
-// Local variables:
-// mode: c++
-// c-file-style: "stroustrup"
-// c-file-offsets: ((inline-open . 0))
-// indent-tabs-mode: t
-// End:
author	scop <scop>	2005-05-17 20:26:00 +0000
committer	scop <scop>	2005-05-17 20:26:00 +0000
commit	9baae216f02485ac6b0b1c4df122e2c75ac75b4a (patch)
tree	46379fee8c98f8ad56d77df8aeab5eccff453cd5 /dxr3memcpy.c
parent	db76939b99be30bb77a14b38ace07a5774c30c96 (diff)
download	vdr-plugin-dxr3-9baae216f02485ac6b0b1c4df122e2c75ac75b4a.tar.gz vdr-plugin-dxr3-9baae216f02485ac6b0b1c4df122e2c75ac75b4a.tar.bz2