From f56650d12bcdc5f1052143d872add024b6982b90 Mon Sep 17 00:00:00 2001 From: Torsten Jager Date: Tue, 11 Feb 2014 14:09:30 +0100 Subject: Unaligned integer read optimization attempt. Example gcc -S -O2, _X_BE_32 old: movzbl 1(%edx), %eax movzbl 2(%edx), %ecx sall $24, %eax sall $16, %ecx orl %ecx, %eax movzbl 4(%edx), %ecx movzbl 3(%edx), %edx orl %ecx, %eax sall $8, %edx orl %edx, %eax New: movl 1(%edx), %eax bswap %eax --- src/xine-engine/bswap.h | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/src/xine-engine/bswap.h b/src/xine-engine/bswap.h index 018ac10f5..f1db98090 100644 --- a/src/xine-engine/bswap.h +++ b/src/xine-engine/bswap.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2000-2009 the xine project + * Copyright (C) 2000-2014 the xine project * * This file is part of xine, a free video player. * @@ -71,6 +71,25 @@ ((uint64_t)(((uint8_t*)(x))[1]) << 8) | \ ((uint64_t)((uint8_t*)(x))[0])) +/* Tested with gcc 4.5 */ +#if defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ > 4)) +# undef _X_BE_32 +# undef _X_BE_64 +# undef _X_LE_32 +# undef _X_LE_64 +# ifdef WORDS_BIGENDIAN +# define _X_LE_32(x) ({ int32_t tempi; __builtin_memcpy (&tempi, (x), 4); (uint32_t)(__builtin_bswap32 (tempi)); }) +# define _X_LE_64(x) ({ int64_t tempi; __builtin_memcpy (&tempi, (x), 8); (uint64_t)(__builtin_bswap64 (tempi)); }) +# define _X_BE_32(x) ({ uint32_t tempi; __builtin_memcpy (&tempi, (x), 4); tempi; }) +# define _X_BE_64(x) ({ uint64_t tempi; __builtin_memcpy (&tempi, (x), 8); tempi; }) +# else +# define _X_BE_32(x) ({ int32_t tempi; __builtin_memcpy (&tempi, (x), 4); (uint32_t)(__builtin_bswap32 (tempi)); }) +# define _X_BE_64(x) ({ int64_t tempi; __builtin_memcpy (&tempi, (x), 8); (uint64_t)(__builtin_bswap64 (tempi)); }) +# define _X_LE_32(x) ({ uint32_t tempi; __builtin_memcpy (&tempi, (x), 4); tempi; }) +# define _X_LE_64(x) ({ uint64_t tempi; __builtin_memcpy (&tempi, (x), 8); tempi; }) +# endif +#endif + #ifdef WORDS_BIGENDIAN #define _X_ME_16(x) _X_BE_16(x) #define _X_ME_32(x) _X_BE_32(x) -- cgit v1.2.3