diff options
-rw-r--r-- | include/xine/xineutils.h | 5 | ||||
-rw-r--r-- | src/xine-utils/cpu_accel.c | 76 |
2 files changed, 66 insertions, 15 deletions
diff --git a/include/xine/xineutils.h b/include/xine/xineutils.h index 608384ad7..12f2256f9 100644 --- a/include/xine/xineutils.h +++ b/include/xine/xineutils.h @@ -92,6 +92,11 @@ extern "C" { #define MM_ACCEL_X86_MMXEXT 0x20000000 #define MM_ACCEL_X86_SSE 0x10000000 #define MM_ACCEL_X86_SSE2 0x08000000 +#define MM_ACCEL_X86_SSE3 0x04000000 +#define MM_ACCEL_X86_SSSE3 0x02000000 +#define MM_ACCEL_X86_SSE4 0x01000000 +#define MM_ACCEL_X86_SSE42 0x00800000 +#define MM_ACCEL_X86_AVX 0x00400000 /* powerpc accelerations and features */ #define MM_ACCEL_PPC_ALTIVEC 0x04000000 diff --git a/src/xine-utils/cpu_accel.c b/src/xine-utils/cpu_accel.c index adcc4edc0..43a702c90 100644 --- a/src/xine-utils/cpu_accel.c +++ b/src/xine-utils/cpu_accel.c @@ -47,7 +47,6 @@ #if defined(__i386__) || defined(__x86_64__) -#ifndef __x86_64__ #include <signal.h> #include <setjmp.h> @@ -56,11 +55,10 @@ static jmp_buf sigill_return; static void sigill_handler (int n) { longjmp(sigill_return, 1); } -#endif static uint32_t arch_accel (void) { - uint32_t caps; + uint32_t caps = 0; #if defined(__x86_64__) || \ ( defined(__SSE__) && defined(__SSE2__) && defined(__MMX__) ) @@ -71,14 +69,25 @@ static uint32_t arch_accel (void) | MM_ACCEL_X86_3DNOW # endif ; -#else +#endif #ifndef _MSC_VER + void (*old_sigill_handler)(int); uint32_t eax, ebx, ecx, edx; - int AMD; - caps = 0; -#ifndef __PIC__ +#if defined(__x86_64__) +#define cpuid(op,eax,ebx,ecx,edx) \ + __asm__ ("push %%rbx\n\t" \ + "cpuid\n\t" \ + "movl %%ebx,%1\n\t" \ + "pop %%rbx" \ + : "=a" (eax), \ + "=r" (ebx), \ + "=c" (ecx), \ + "=d" (edx) \ + : "a" (op) \ + : "cc") +#elif !defined(__PIC__) #define cpuid(op,eax,ebx,ecx,edx) \ __asm__ ("cpuid" \ : "=a" (eax), \ @@ -101,6 +110,7 @@ static uint32_t arch_accel (void) : "cc") #endif +#ifndef __x86_64__ __asm__ ("pushfl\n\t" "pushfl\n\t" "popl %0\n\t" @@ -127,9 +137,13 @@ static uint32_t arch_accel (void) return 0; } - AMD = (ebx == 0x68747541) && (ecx == 0x444d4163) && (edx == 0x69746e65); + int AMD = (ebx == 0x68747541) && (ecx == 0x444d4163) && (edx == 0x69746e65); + +#endif /* __x86_64__ */ cpuid (0x00000001, eax, ebx, ecx, edx); + +#ifndef __x86_64__ if (edx & 0x00800000) { /* MMX */ caps |= MM_ACCEL_X86_MMX; @@ -144,7 +158,42 @@ static uint32_t arch_accel (void) /* SSE2 */ caps |= MM_ACCEL_X86_SSE2; } +#endif /* __x86_64__ */ + + if (ecx & 0x00000001) { + caps |= MM_ACCEL_X86_SSE3; + } + if (ecx & 0x00000200) { + caps |= MM_ACCEL_X86_SSSE3; + } + if (ecx & 0x00080000) { + caps |= MM_ACCEL_X86_SSE4; + } + if (ecx & 0x00100000) { + caps |= MM_ACCEL_X86_SSE42; + } + + /* Check OXSAVE and AVX bits */ + if ((ecx & 0x18000000) == 0x18000000) { + /* test OS support for AVX */ + + old_sigill_handler = signal (SIGILL, sigill_handler); + + if (setjmp(sigill_return)) { + lprintf("OS doesn't support AVX instructions.\n"); + } else { + /* Get value of extended control register 0 */ + __asm__ (".byte 0x0f, 0x01, 0xd0" : "=a"(eax), "=d"(edx) : "c" (0)); + if ((eax & 0x6) == 0x6) { + caps |= MM_ACCEL_X86_AVX; + } + } + + signal(SIGILL, old_sigill_handler); + } + +#ifndef __x86_64__ cpuid (0x80000000, eax, ebx, ecx, edx); if (eax >= 0x80000001) { cpuid (0x80000001, eax, ebx, ecx, edx); @@ -159,22 +208,19 @@ static uint32_t arch_accel (void) caps |= MM_ACCEL_X86_MMXEXT; } } -#else - caps = 0; +#endif /* __x86_64__ */ #endif /* _MSC_VER */ -#endif /* x86_64 or built-in options */ - #ifndef __x86_64__ /* test OS support for SSE */ if (caps & MM_ACCEL_X86_SSE) { - void (*old_sigill_handler)(int); - old_sigill_handler = signal (SIGILL, sigill_handler); if (setjmp(sigill_return)) { lprintf("OS doesn't support SSE instructions.\n"); - caps &= ~(MM_ACCEL_X86_SSE|MM_ACCEL_X86_SSE2); + caps &= ~(MM_ACCEL_X86_SSE|MM_ACCEL_X86_SSE2| + MM_ACCEL_X86_SSE3|MM_ACCEL_X86_SSSE3| + MM_ACCEL_X86_SSE4|MM_ACCEL_X86_SSE42); } else { __asm__ volatile ("xorps %xmm0, %xmm0"); } |