diff options
Diffstat (limited to 'src/libmpeg2new/libmpeg2/cpu_accel.c')
-rw-r--r-- | src/libmpeg2new/libmpeg2/cpu_accel.c | 228 |
1 files changed, 151 insertions, 77 deletions
diff --git a/src/libmpeg2new/libmpeg2/cpu_accel.c b/src/libmpeg2new/libmpeg2/cpu_accel.c index dac3cf83d..7846f1e88 100644 --- a/src/libmpeg2new/libmpeg2/cpu_accel.c +++ b/src/libmpeg2new/libmpeg2/cpu_accel.c @@ -1,6 +1,6 @@ /* * cpu_accel.c - * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org> + * Copyright (C) 2000-2004 Michel Lespinasse <walken@zoy.org> * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> * * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. @@ -26,16 +26,25 @@ #include <inttypes.h> #include "../include/mpeg2.h" +#include "../include/attributes.h" +#include "mpeg2_internal.h" -#define ACCEL_DETECT /* Force accel on */ - -#ifdef ACCEL_DETECT #ifdef ARCH_X86 -static inline uint32_t arch_accel (void) +static inline uint32_t arch_accel (uint32_t accel) { - uint32_t eax, ebx, ecx, edx; - int AMD; - uint32_t caps; + if (accel & (MPEG2_ACCEL_X86_3DNOW | MPEG2_ACCEL_X86_MMXEXT)) + accel |= MPEG2_ACCEL_X86_MMX; + + if (accel & (MPEG2_ACCEL_X86_SSE2 | MPEG2_ACCEL_X86_SSE3)) + accel |= MPEG2_ACCEL_X86_MMXEXT; + + if (accel & (MPEG2_ACCEL_X86_SSE3)) + accel |= MPEG2_ACCEL_X86_SSE2; + +#ifdef ACCEL_DETECT + if (accel & MPEG2_ACCEL_DETECT) { + uint32_t eax, ebx, ecx, edx; + int AMD; #if !defined(PIC) && !defined(__PIC__) #define cpuid(op,eax,ebx,ecx,edx) \ @@ -60,55 +69,63 @@ static inline uint32_t arch_accel (void) : "cc") #endif - __asm__ ("pushf\n\t" - "pushf\n\t" - "pop %0\n\t" - "movl %0,%1\n\t" - "xorl $0x200000,%0\n\t" - "push %0\n\t" - "popf\n\t" - "pushf\n\t" - "pop %0\n\t" - "popf" - : "=r" (eax), - "=r" (ebx) - : - : "cc"); + __asm__ ("pushf\n\t" + "pushf\n\t" + "pop %0\n\t" + "movl %0,%1\n\t" + "xorl $0x200000,%0\n\t" + "push %0\n\t" + "popf\n\t" + "pushf\n\t" + "pop %0\n\t" + "popf" + : "=r" (eax), + "=r" (ebx) + : + : "cc"); - if (eax == ebx) /* no cpuid */ - return 0; + if (eax == ebx) /* no cpuid */ + return accel; - cpuid (0x00000000, eax, ebx, ecx, edx); - if (!eax) /* vendor string only */ - return 0; + cpuid (0x00000000, eax, ebx, ecx, edx); + if (!eax) /* vendor string only */ + return accel; - AMD = (ebx == 0x68747541) && (ecx == 0x444d4163) && (edx == 0x69746e65); + AMD = (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65); - cpuid (0x00000001, eax, ebx, ecx, edx); - if (! (edx & 0x00800000)) /* no MMX */ - return 0; + cpuid (0x00000001, eax, ebx, ecx, edx); + if (! (edx & 0x00800000)) /* no MMX */ + return accel; - caps = MPEG2_ACCEL_X86_MMX; - if (edx & 0x02000000) /* SSE - identical to AMD MMX extensions */ - caps = MPEG2_ACCEL_X86_MMX | MPEG2_ACCEL_X86_MMXEXT; + accel |= MPEG2_ACCEL_X86_MMX; + if (edx & 0x02000000) /* SSE - identical to AMD MMX extensions */ + accel |= MPEG2_ACCEL_X86_MMXEXT; - cpuid (0x80000000, eax, ebx, ecx, edx); - if (eax < 0x80000001) /* no extended capabilities */ - return caps; + if (edx & 0x04000000) /* SSE2 */ + accel |= MPEG2_ACCEL_X86_SSE2; + + if (ecx & 0x00000001) /* SSE3 */ + accel |= MPEG2_ACCEL_X86_SSE3; + + cpuid (0x80000000, eax, ebx, ecx, edx); + if (eax < 0x80000001) /* no extended capabilities */ + return accel; - cpuid (0x80000001, eax, ebx, ecx, edx); + cpuid (0x80000001, eax, ebx, ecx, edx); - if (edx & 0x80000000) - caps |= MPEG2_ACCEL_X86_3DNOW; + if (edx & 0x80000000) + accel |= MPEG2_ACCEL_X86_3DNOW; - if (AMD && (edx & 0x00400000)) /* AMD MMX extensions */ - caps |= MPEG2_ACCEL_X86_MMXEXT; + if (AMD && (edx & 0x00400000)) /* AMD MMX extensions */ + accel |= MPEG2_ACCEL_X86_MMXEXT; + } +#endif /* ACCEL_DETECT */ - return caps; + return accel; } #endif /* ARCH_X86 */ -#ifdef ARCH_PPC +#if defined(ACCEL_DETECT) && (defined(ARCH_PPC) || defined(ARCH_SPARC)) #include <signal.h> #include <setjmp.h> @@ -125,60 +142,117 @@ static RETSIGTYPE sigill_handler (int sig) canjump = 0; siglongjmp (jmpbuf, 1); } +#endif /* ACCEL_DETECT && (ARCH_PPC || ARCH_SPARC) */ -static inline uint32_t arch_accel (void) +#ifdef ARCH_PPC +static inline uint32_t arch_accel (uint32_t accel) { - static RETSIGTYPE (* oldsig) (int); +#ifdef ACCEL_DETECT + if (accel & (MPEG2_ACCEL_PPC_ALTIVEC | MPEG2_ACCEL_DETECT) == + MPEG2_ACCEL_DETECT) { + static RETSIGTYPE (* oldsig) (int); - oldsig = signal (SIGILL, sigill_handler); - if (sigsetjmp (jmpbuf, 1)) { - signal (SIGILL, oldsig); - return 0; - } + oldsig = signal (SIGILL, sigill_handler); + if (sigsetjmp (jmpbuf, 1)) { + signal (SIGILL, oldsig); + return accel; + } - canjump = 1; + canjump = 1; #ifdef HAVE_ALTIVEC_H /* gnu */ #define VAND(a,b,c) "vand " #a "," #b "," #c "\n\t" #else /* apple */ #define VAND(a,b,c) "vand v" #a ",v" #b ",v" #c "\n\t" #endif - asm volatile ("mtspr 256, %0\n\t" - VAND (0, 0, 0) - : - : "r" (-1)); + asm volatile ("mtspr 256, %0\n\t" + VAND (0, 0, 0) + : + : "r" (-1)); - signal (SIGILL, oldsig); - return MPEG2_ACCEL_PPC_ALTIVEC; + canjump = 0; + accel |= MPEG2_ACCEL_PPC_ALTIVEC; + + signal (SIGILL, oldsig); + } +#endif /* ACCEL_DETECT */ + + return accel; } #endif /* ARCH_PPC */ -#ifdef ARCH_ALPHA -static inline uint32_t arch_accel (void) +#ifdef ARCH_SPARC +static inline uint32_t arch_accel (uint32_t accel) { - uint64_t no_mvi; + if (accel & MPEG2_ACCEL_SPARC_VIS2) + accel |= MPEG2_ACCEL_SPARC_VIS; + +#ifdef ACCEL_DETECT + if (accel & (MPEG2_ACCEL_SPARC_VIS2 | MPEG2_ACCEL_DETECT) == + MPEG2_ACCEL_DETECT) { + static RETSIGTYPE (* oldsig) (int); + + oldsig = signal (SIGILL, sigill_handler); + if (sigsetjmp (jmpbuf, 1)) { + signal (SIGILL, oldsig); + return accel; + } + + canjump = 1; + + /* pdist %f0, %f0, %f0 */ + __asm__ __volatile__(".word\t0x81b007c0"); - asm volatile ("amask %1, %0" - : "=r" (no_mvi) - : "rI" (256)); /* AMASK_MVI */ - return no_mvi ? MPEG2_ACCEL_ALPHA : (MPEG2_ACCEL_ALPHA | - MPEG2_ACCEL_ALPHA_MVI); + canjump = 0; + accel |= MPEG2_ACCEL_SPARC_VIS; + + if (sigsetjmp (jmpbuf, 1)) { + signal (SIGILL, oldsig); + return accel; + } + + canjump = 1; + + /* edge8n %g0, %g0, %g0 */ + __asm__ __volatile__(".word\t0x81b00020"); + + canjump = 0; + accel |= MPEG2_ACCEL_SPARC_VIS2; + + signal (SIGILL, oldsig); + } +#endif /* ACCEL_DETECT */ + + return accel; } -#endif /* ARCH_ALPHA */ -#endif +#endif /* ARCH_SPARC */ -uint32_t mpeg2_detect_accel (void) +#ifdef ARCH_ALPHA +static inline uint32_t arch_accel (uint32_t accel) { - uint32_t accel; + if (accel & MPEG2_ACCEL_ALPHA_MVI) + accel |= MPEG2_ACCEL_ALPHA; - accel = 0; #ifdef ACCEL_DETECT -#ifdef LIBMPEG2_MLIB - accel = MPEG2_ACCEL_MLIB; -#endif -#if defined (ARCH_X86) || defined (ARCH_PPC) || defined (ARCH_ALPHA) - accel |= arch_accel (); -#endif + if (accel & MPEG2_ACCEL_DETECT) { + uint64_t no_mvi; + + asm volatile ("amask %1, %0" + : "=r" (no_mvi) + : "rI" (256)); /* AMASK_MVI */ + accel |= no_mvi ? MPEG2_ACCEL_ALPHA : (MPEG2_ACCEL_ALPHA | + MPEG2_ACCEL_ALPHA_MVI); + } +#endif /* ACCEL_DETECT */ + + return accel; +} +#endif /* ARCH_ALPHA */ + +uint32_t mpeg2_detect_accel (uint32_t accel) +{ +#if defined (ARCH_X86) || defined (ARCH_PPC) || defined (ARCH_ALPHA) || defined (ARCH_SPARC) + accel = arch_accel (accel); #endif return accel; } |