From 4d4eeb689385c0a7173ff408f195d36c90409c83 Mon Sep 17 00:00:00 2001 From: proller Date: Thu, 21 Jun 2018 20:19:27 +0300 Subject: [PATCH] Use linux line endings for asm-bits.c asm-bits.h --- libcpuid/asm-bits.c | 1670 +++++++++++++++++++++---------------------- libcpuid/asm-bits.h | 138 ++-- 2 files changed, 904 insertions(+), 904 deletions(-) diff --git a/libcpuid/asm-bits.c b/libcpuid/asm-bits.c index e881668..bfabd40 100644 --- a/libcpuid/asm-bits.c +++ b/libcpuid/asm-bits.c @@ -1,836 +1,836 @@ -/* - * Copyright 2008 Veselin Georgiev, - * anrieffNOSPAM @ mgail_DOT.com (convert to gmail) - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "libcpuid.h" -#include "asm-bits.h" - -int cpuid_exists_by_eflags(void) -{ -#if defined(PLATFORM_X64) - return 1; /* CPUID is always present on the x86_64 */ -#elif defined(PLATFORM_X86) -# if defined(COMPILER_GCC) || defined(COMPILER_CLANG) - int result; - __asm __volatile( - " pushfl\n" - " pop %%eax\n" - " mov %%eax, %%ecx\n" - " xor $0x200000, %%eax\n" - " push %%eax\n" - " popfl\n" - " pushfl\n" - " pop %%eax\n" - " xor %%ecx, %%eax\n" - " mov %%eax, %0\n" - " push %%ecx\n" - " popfl\n" - : "=m"(result) - : :"eax", "ecx", "memory"); - return (result != 0); -# elif defined(COMPILER_MICROSOFT) - int result; - __asm { - pushfd - pop eax - mov ecx, eax - xor eax, 0x200000 - push eax - popfd - pushfd - pop eax - xor eax, ecx - mov result, eax - push ecx - popfd - }; - return (result != 0); -# else - return 0; -# endif /* COMPILER_MICROSOFT */ -#elif defined(PLATFORM_ARM) - return 0; -#else - return 0; -#endif /* PLATFORM_X86 */ -} - -#ifdef INLINE_ASM_SUPPORTED -/* - * with MSVC/AMD64, the exec_cpuid() and cpu_rdtsc() functions - * are implemented in separate .asm files. Otherwise, use inline assembly - */ -void exec_cpuid(uint32_t *regs) -{ -# if defined(COMPILER_GCC) || defined(COMPILER_CLANG) -# ifdef PLATFORM_X64 - __asm __volatile( - " mov %0, %%rdi\n" - - " push %%rbx\n" - " push %%rcx\n" - " push %%rdx\n" - - " mov (%%rdi), %%eax\n" - " mov 4(%%rdi), %%ebx\n" - " mov 8(%%rdi), %%ecx\n" - " mov 12(%%rdi), %%edx\n" - - " cpuid\n" - - " movl %%eax, (%%rdi)\n" - " movl %%ebx, 4(%%rdi)\n" - " movl %%ecx, 8(%%rdi)\n" - " movl %%edx, 12(%%rdi)\n" - " pop %%rdx\n" - " pop %%rcx\n" - " pop %%rbx\n" - : - :"m"(regs) - :"memory", "eax", "rdi" - ); -# elif defined(PLATFORM_X86) - __asm __volatile( - " mov %0, %%edi\n" - - " push %%ebx\n" - " push %%ecx\n" - " push %%edx\n" - - " mov (%%edi), %%eax\n" - " mov 4(%%edi), %%ebx\n" - " mov 8(%%edi), %%ecx\n" - " mov 12(%%edi), %%edx\n" - - " cpuid\n" - - " mov %%eax, (%%edi)\n" - " mov %%ebx, 4(%%edi)\n" - " mov %%ecx, 8(%%edi)\n" - " mov %%edx, 12(%%edi)\n" - " pop %%edx\n" - " pop %%ecx\n" - " pop %%ebx\n" - : - :"m"(regs) - :"memory", "eax", "edi" - ); -# elif defined(PLATFORM_ARM) -# endif /* COMPILER_GCC */ -#else -# ifdef COMPILER_MICROSOFT - __asm { - push ebx - push ecx - push edx - push edi - mov edi, regs - - mov eax, [edi] - mov ebx, [edi+4] - mov ecx, [edi+8] - mov edx, [edi+12] - - cpuid - - mov [edi], eax - mov [edi+4], ebx - mov [edi+8], ecx - mov [edi+12], edx - - pop edi - pop edx - pop ecx - pop ebx - } -# else -# error "Unsupported compiler" -# endif /* COMPILER_MICROSOFT */ -#endif -} -#endif /* INLINE_ASSEMBLY_SUPPORTED */ - -#ifdef INLINE_ASM_SUPPORTED -void cpu_rdtsc(uint64_t* result) -{ - uint32_t low_part, hi_part; -#if defined(COMPILER_GCC) || defined(COMPILER_CLANG) -#ifdef PLATFORM_ARM - low_part = 0; - hi_part = 0; -#else - __asm __volatile ( - " rdtsc\n" - " mov %%eax, %0\n" - " mov %%edx, %1\n" - :"=m"(low_part), "=m"(hi_part)::"memory", "eax", "edx" - ); -#endif -#else -# ifdef COMPILER_MICROSOFT - __asm { - rdtsc - mov low_part, eax - mov hi_part, edx - }; -# else -# error "Unsupported compiler" -# endif /* COMPILER_MICROSOFT */ -#endif /* COMPILER_GCC */ - *result = (uint64_t)low_part + (((uint64_t) hi_part) << 32); -} -#endif /* INLINE_ASM_SUPPORTED */ - -#ifdef INLINE_ASM_SUPPORTED -void busy_sse_loop(int cycles) -{ -# if defined(COMPILER_GCC) || defined(COMPILER_CLANG) -#ifndef __APPLE__ -# define XALIGN ".balign 16\n" -#else -# define XALIGN ".align 4\n" -#endif -#ifdef PLATFORM_ARM -#else - __asm __volatile ( - " xorps %%xmm0, %%xmm0\n" - " xorps %%xmm1, %%xmm1\n" - " xorps %%xmm2, %%xmm2\n" - " xorps %%xmm3, %%xmm3\n" - " xorps %%xmm4, %%xmm4\n" - " xorps %%xmm5, %%xmm5\n" - " xorps %%xmm6, %%xmm6\n" - " xorps %%xmm7, %%xmm7\n" - XALIGN - /* ".bsLoop:\n" */ - "1:\n" - // 0: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - // 1: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - // 2: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - // 3: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - // 4: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - // 5: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - // 6: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - // 7: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - // 8: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - // 9: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - //10: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - //11: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - //12: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - //13: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - //14: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - //15: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - //16: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - //17: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - //18: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - //19: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - //20: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - //21: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - //22: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - //23: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - //24: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - //25: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - //26: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - //27: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - //28: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - //29: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - //30: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - //31: - " addps %%xmm1, %%xmm0\n" - " addps %%xmm2, %%xmm1\n" - " addps %%xmm3, %%xmm2\n" - " addps %%xmm4, %%xmm3\n" - " addps %%xmm5, %%xmm4\n" - " addps %%xmm6, %%xmm5\n" - " addps %%xmm7, %%xmm6\n" - " addps %%xmm0, %%xmm7\n" - - " dec %%eax\n" - /* "jnz .bsLoop\n" */ - " jnz 1b\n" - ::"a"(cycles) - ); -#endif -#else -# ifdef COMPILER_MICROSOFT - __asm { - mov eax, cycles - xorps xmm0, xmm0 - xorps xmm1, xmm1 - xorps xmm2, xmm2 - xorps xmm3, xmm3 - xorps xmm4, xmm4 - xorps xmm5, xmm5 - xorps xmm6, xmm6 - xorps xmm7, xmm7 - //-- - align 16 -bsLoop: - // 0: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 1: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 2: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 3: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 4: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 5: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 6: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 7: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 8: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 9: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 10: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 11: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 12: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 13: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 14: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 15: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 16: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 17: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 18: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 19: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 20: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 21: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 22: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 23: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 24: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 25: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 26: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 27: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 28: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 29: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 30: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - // 31: - addps xmm0, xmm1 - addps xmm1, xmm2 - addps xmm2, xmm3 - addps xmm3, xmm4 - addps xmm4, xmm5 - addps xmm5, xmm6 - addps xmm6, xmm7 - addps xmm7, xmm0 - //---------------------- - dec eax - jnz bsLoop - } -# else -# error "Unsupported compiler" -# endif /* COMPILER_MICROSOFT */ -#endif /* COMPILER_GCC */ -} +/* + * Copyright 2008 Veselin Georgiev, + * anrieffNOSPAM @ mgail_DOT.com (convert to gmail) + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "libcpuid.h" +#include "asm-bits.h" + +int cpuid_exists_by_eflags(void) +{ +#if defined(PLATFORM_X64) + return 1; /* CPUID is always present on the x86_64 */ +#elif defined(PLATFORM_X86) +# if defined(COMPILER_GCC) || defined(COMPILER_CLANG) + int result; + __asm __volatile( + " pushfl\n" + " pop %%eax\n" + " mov %%eax, %%ecx\n" + " xor $0x200000, %%eax\n" + " push %%eax\n" + " popfl\n" + " pushfl\n" + " pop %%eax\n" + " xor %%ecx, %%eax\n" + " mov %%eax, %0\n" + " push %%ecx\n" + " popfl\n" + : "=m"(result) + : :"eax", "ecx", "memory"); + return (result != 0); +# elif defined(COMPILER_MICROSOFT) + int result; + __asm { + pushfd + pop eax + mov ecx, eax + xor eax, 0x200000 + push eax + popfd + pushfd + pop eax + xor eax, ecx + mov result, eax + push ecx + popfd + }; + return (result != 0); +# else + return 0; +# endif /* COMPILER_MICROSOFT */ +#elif defined(PLATFORM_ARM) + return 0; +#else + return 0; +#endif /* PLATFORM_X86 */ +} + +#ifdef INLINE_ASM_SUPPORTED +/* + * with MSVC/AMD64, the exec_cpuid() and cpu_rdtsc() functions + * are implemented in separate .asm files. Otherwise, use inline assembly + */ +void exec_cpuid(uint32_t *regs) +{ +# if defined(COMPILER_GCC) || defined(COMPILER_CLANG) +# ifdef PLATFORM_X64 + __asm __volatile( + " mov %0, %%rdi\n" + + " push %%rbx\n" + " push %%rcx\n" + " push %%rdx\n" + + " mov (%%rdi), %%eax\n" + " mov 4(%%rdi), %%ebx\n" + " mov 8(%%rdi), %%ecx\n" + " mov 12(%%rdi), %%edx\n" + + " cpuid\n" + + " movl %%eax, (%%rdi)\n" + " movl %%ebx, 4(%%rdi)\n" + " movl %%ecx, 8(%%rdi)\n" + " movl %%edx, 12(%%rdi)\n" + " pop %%rdx\n" + " pop %%rcx\n" + " pop %%rbx\n" + : + :"m"(regs) + :"memory", "eax", "rdi" + ); +# elif defined(PLATFORM_X86) + __asm __volatile( + " mov %0, %%edi\n" + + " push %%ebx\n" + " push %%ecx\n" + " push %%edx\n" + + " mov (%%edi), %%eax\n" + " mov 4(%%edi), %%ebx\n" + " mov 8(%%edi), %%ecx\n" + " mov 12(%%edi), %%edx\n" + + " cpuid\n" + + " mov %%eax, (%%edi)\n" + " mov %%ebx, 4(%%edi)\n" + " mov %%ecx, 8(%%edi)\n" + " mov %%edx, 12(%%edi)\n" + " pop %%edx\n" + " pop %%ecx\n" + " pop %%ebx\n" + : + :"m"(regs) + :"memory", "eax", "edi" + ); +# elif defined(PLATFORM_ARM) +# endif /* COMPILER_GCC */ +#else +# ifdef COMPILER_MICROSOFT + __asm { + push ebx + push ecx + push edx + push edi + mov edi, regs + + mov eax, [edi] + mov ebx, [edi+4] + mov ecx, [edi+8] + mov edx, [edi+12] + + cpuid + + mov [edi], eax + mov [edi+4], ebx + mov [edi+8], ecx + mov [edi+12], edx + + pop edi + pop edx + pop ecx + pop ebx + } +# else +# error "Unsupported compiler" +# endif /* COMPILER_MICROSOFT */ +#endif +} +#endif /* INLINE_ASSEMBLY_SUPPORTED */ + +#ifdef INLINE_ASM_SUPPORTED +void cpu_rdtsc(uint64_t* result) +{ + uint32_t low_part, hi_part; +#if defined(COMPILER_GCC) || defined(COMPILER_CLANG) +#ifdef PLATFORM_ARM + low_part = 0; + hi_part = 0; +#else + __asm __volatile ( + " rdtsc\n" + " mov %%eax, %0\n" + " mov %%edx, %1\n" + :"=m"(low_part), "=m"(hi_part)::"memory", "eax", "edx" + ); +#endif +#else +# ifdef COMPILER_MICROSOFT + __asm { + rdtsc + mov low_part, eax + mov hi_part, edx + }; +# else +# error "Unsupported compiler" +# endif /* COMPILER_MICROSOFT */ +#endif /* COMPILER_GCC */ + *result = (uint64_t)low_part + (((uint64_t) hi_part) << 32); +} +#endif /* INLINE_ASM_SUPPORTED */ + +#ifdef INLINE_ASM_SUPPORTED +void busy_sse_loop(int cycles) +{ +# if defined(COMPILER_GCC) || defined(COMPILER_CLANG) +#ifndef __APPLE__ +# define XALIGN ".balign 16\n" +#else +# define XALIGN ".align 4\n" +#endif +#ifdef PLATFORM_ARM +#else + __asm __volatile ( + " xorps %%xmm0, %%xmm0\n" + " xorps %%xmm1, %%xmm1\n" + " xorps %%xmm2, %%xmm2\n" + " xorps %%xmm3, %%xmm3\n" + " xorps %%xmm4, %%xmm4\n" + " xorps %%xmm5, %%xmm5\n" + " xorps %%xmm6, %%xmm6\n" + " xorps %%xmm7, %%xmm7\n" + XALIGN + /* ".bsLoop:\n" */ + "1:\n" + // 0: + " addps %%xmm1, %%xmm0\n" + " addps %%xmm2, %%xmm1\n" + " addps %%xmm3, %%xmm2\n" + " addps %%xmm4, %%xmm3\n" + " addps %%xmm5, %%xmm4\n" + " addps %%xmm6, %%xmm5\n" + " addps %%xmm7, %%xmm6\n" + " addps %%xmm0, %%xmm7\n" + // 1: + " addps %%xmm1, %%xmm0\n" + " addps %%xmm2, %%xmm1\n" + " addps %%xmm3, %%xmm2\n" + " addps %%xmm4, %%xmm3\n" + " addps %%xmm5, %%xmm4\n" + " addps %%xmm6, %%xmm5\n" + " addps %%xmm7, %%xmm6\n" + " addps %%xmm0, %%xmm7\n" + // 2: + " addps %%xmm1, %%xmm0\n" + " addps %%xmm2, %%xmm1\n" + " addps %%xmm3, %%xmm2\n" + " addps %%xmm4, %%xmm3\n" + " addps %%xmm5, %%xmm4\n" + " addps %%xmm6, %%xmm5\n" + " addps %%xmm7, %%xmm6\n" + " addps %%xmm0, %%xmm7\n" + // 3: + " addps %%xmm1, %%xmm0\n" + " addps %%xmm2, %%xmm1\n" + " addps %%xmm3, %%xmm2\n" + " addps %%xmm4, %%xmm3\n" + " addps %%xmm5, %%xmm4\n" + " addps %%xmm6, %%xmm5\n" + " addps %%xmm7, %%xmm6\n" + " addps %%xmm0, %%xmm7\n" + // 4: + " addps %%xmm1, %%xmm0\n" + " addps %%xmm2, %%xmm1\n" + " addps %%xmm3, %%xmm2\n" + " addps %%xmm4, %%xmm3\n" + " addps %%xmm5, %%xmm4\n" + " addps %%xmm6, %%xmm5\n" + " addps %%xmm7, %%xmm6\n" + " addps %%xmm0, %%xmm7\n" + // 5: + " addps %%xmm1, %%xmm0\n" + " addps %%xmm2, %%xmm1\n" + " addps %%xmm3, %%xmm2\n" + " addps %%xmm4, %%xmm3\n" + " addps %%xmm5, %%xmm4\n" + " addps %%xmm6, %%xmm5\n" + " addps %%xmm7, %%xmm6\n" + " addps %%xmm0, %%xmm7\n" + // 6: + " addps %%xmm1, %%xmm0\n" + " addps %%xmm2, %%xmm1\n" + " addps %%xmm3, %%xmm2\n" + " addps %%xmm4, %%xmm3\n" + " addps %%xmm5, %%xmm4\n" + " addps %%xmm6, %%xmm5\n" + " addps %%xmm7, %%xmm6\n" + " addps %%xmm0, %%xmm7\n" + // 7: + " addps %%xmm1, %%xmm0\n" + " addps %%xmm2, %%xmm1\n" + " addps %%xmm3, %%xmm2\n" + " addps %%xmm4, %%xmm3\n" + " addps %%xmm5, %%xmm4\n" + " addps %%xmm6, %%xmm5\n" + " addps %%xmm7, %%xmm6\n" + " addps %%xmm0, %%xmm7\n" + // 8: + " addps %%xmm1, %%xmm0\n" + " addps %%xmm2, %%xmm1\n" + " addps %%xmm3, %%xmm2\n" + " addps %%xmm4, %%xmm3\n" + " addps %%xmm5, %%xmm4\n" + " addps %%xmm6, %%xmm5\n" + " addps %%xmm7, %%xmm6\n" + " addps %%xmm0, %%xmm7\n" + // 9: + " addps %%xmm1, %%xmm0\n" + " addps %%xmm2, %%xmm1\n" + " addps %%xmm3, %%xmm2\n" + " addps %%xmm4, %%xmm3\n" + " addps %%xmm5, %%xmm4\n" + " addps %%xmm6, %%xmm5\n" + " addps %%xmm7, %%xmm6\n" + " addps %%xmm0, %%xmm7\n" + //10: + " addps %%xmm1, %%xmm0\n" + " addps %%xmm2, %%xmm1\n" + " addps %%xmm3, %%xmm2\n" + " addps %%xmm4, %%xmm3\n" + " addps %%xmm5, %%xmm4\n" + " addps %%xmm6, %%xmm5\n" + " addps %%xmm7, %%xmm6\n" + " addps %%xmm0, %%xmm7\n" + //11: + " addps %%xmm1, %%xmm0\n" + " addps %%xmm2, %%xmm1\n" + " addps %%xmm3, %%xmm2\n" + " addps %%xmm4, %%xmm3\n" + " addps %%xmm5, %%xmm4\n" + " addps %%xmm6, %%xmm5\n" + " addps %%xmm7, %%xmm6\n" + " addps %%xmm0, %%xmm7\n" + //12: + " addps %%xmm1, %%xmm0\n" + " addps %%xmm2, %%xmm1\n" + " addps %%xmm3, %%xmm2\n" + " addps %%xmm4, %%xmm3\n" + " addps %%xmm5, %%xmm4\n" + " addps %%xmm6, %%xmm5\n" + " addps %%xmm7, %%xmm6\n" + " addps %%xmm0, %%xmm7\n" + //13: + " addps %%xmm1, %%xmm0\n" + " addps %%xmm2, %%xmm1\n" + " addps %%xmm3, %%xmm2\n" + " addps %%xmm4, %%xmm3\n" + " addps %%xmm5, %%xmm4\n" + " addps %%xmm6, %%xmm5\n" + " addps %%xmm7, %%xmm6\n" + " addps %%xmm0, %%xmm7\n" + //14: + " addps %%xmm1, %%xmm0\n" + " addps %%xmm2, %%xmm1\n" + " addps %%xmm3, %%xmm2\n" + " addps %%xmm4, %%xmm3\n" + " addps %%xmm5, %%xmm4\n" + " addps %%xmm6, %%xmm5\n" + " addps %%xmm7, %%xmm6\n" + " addps %%xmm0, %%xmm7\n" + //15: + " addps %%xmm1, %%xmm0\n" + " addps %%xmm2, %%xmm1\n" + " addps %%xmm3, %%xmm2\n" + " addps %%xmm4, %%xmm3\n" + " addps %%xmm5, %%xmm4\n" + " addps %%xmm6, %%xmm5\n" + " addps %%xmm7, %%xmm6\n" + " addps %%xmm0, %%xmm7\n" + //16: + " addps %%xmm1, %%xmm0\n" + " addps %%xmm2, %%xmm1\n" + " addps %%xmm3, %%xmm2\n" + " addps %%xmm4, %%xmm3\n" + " addps %%xmm5, %%xmm4\n" + " addps %%xmm6, %%xmm5\n" + " addps %%xmm7, %%xmm6\n" + " addps %%xmm0, %%xmm7\n" + //17: + " addps %%xmm1, %%xmm0\n" + " addps %%xmm2, %%xmm1\n" + " addps %%xmm3, %%xmm2\n" + " addps %%xmm4, %%xmm3\n" + " addps %%xmm5, %%xmm4\n" + " addps %%xmm6, %%xmm5\n" + " addps %%xmm7, %%xmm6\n" + " addps %%xmm0, %%xmm7\n" + //18: + " addps %%xmm1, %%xmm0\n" + " addps %%xmm2, %%xmm1\n" + " addps %%xmm3, %%xmm2\n" + " addps %%xmm4, %%xmm3\n" + " addps %%xmm5, %%xmm4\n" + " addps %%xmm6, %%xmm5\n" + " addps %%xmm7, %%xmm6\n" + " addps %%xmm0, %%xmm7\n" + //19: + " addps %%xmm1, %%xmm0\n" + " addps %%xmm2, %%xmm1\n" + " addps %%xmm3, %%xmm2\n" + " addps %%xmm4, %%xmm3\n" + " addps %%xmm5, %%xmm4\n" + " addps %%xmm6, %%xmm5\n" + " addps %%xmm7, %%xmm6\n" + " addps %%xmm0, %%xmm7\n" + //20: + " addps %%xmm1, %%xmm0\n" + " addps %%xmm2, %%xmm1\n" + " addps %%xmm3, %%xmm2\n" + " addps %%xmm4, %%xmm3\n" + " addps %%xmm5, %%xmm4\n" + " addps %%xmm6, %%xmm5\n" + " addps %%xmm7, %%xmm6\n" + " addps %%xmm0, %%xmm7\n" + //21: + " addps %%xmm1, %%xmm0\n" + " addps %%xmm2, %%xmm1\n" + " addps %%xmm3, %%xmm2\n" + " addps %%xmm4, %%xmm3\n" + " addps %%xmm5, %%xmm4\n" + " addps %%xmm6, %%xmm5\n" + " addps %%xmm7, %%xmm6\n" + " addps %%xmm0, %%xmm7\n" + //22: + " addps %%xmm1, %%xmm0\n" + " addps %%xmm2, %%xmm1\n" + " addps %%xmm3, %%xmm2\n" + " addps %%xmm4, %%xmm3\n" + " addps %%xmm5, %%xmm4\n" + " addps %%xmm6, %%xmm5\n" + " addps %%xmm7, %%xmm6\n" + " addps %%xmm0, %%xmm7\n" + //23: + " addps %%xmm1, %%xmm0\n" + " addps %%xmm2, %%xmm1\n" + " addps %%xmm3, %%xmm2\n" + " addps %%xmm4, %%xmm3\n" + " addps %%xmm5, %%xmm4\n" + " addps %%xmm6, %%xmm5\n" + " addps %%xmm7, %%xmm6\n" + " addps %%xmm0, %%xmm7\n" + //24: + " addps %%xmm1, %%xmm0\n" + " addps %%xmm2, %%xmm1\n" + " addps %%xmm3, %%xmm2\n" + " addps %%xmm4, %%xmm3\n" + " addps %%xmm5, %%xmm4\n" + " addps %%xmm6, %%xmm5\n" + " addps %%xmm7, %%xmm6\n" + " addps %%xmm0, %%xmm7\n" + //25: + " addps %%xmm1, %%xmm0\n" + " addps %%xmm2, %%xmm1\n" + " addps %%xmm3, %%xmm2\n" + " addps %%xmm4, %%xmm3\n" + " addps %%xmm5, %%xmm4\n" + " addps %%xmm6, %%xmm5\n" + " addps %%xmm7, %%xmm6\n" + " addps %%xmm0, %%xmm7\n" + //26: + " addps %%xmm1, %%xmm0\n" + " addps %%xmm2, %%xmm1\n" + " addps %%xmm3, %%xmm2\n" + " addps %%xmm4, %%xmm3\n" + " addps %%xmm5, %%xmm4\n" + " addps %%xmm6, %%xmm5\n" + " addps %%xmm7, %%xmm6\n" + " addps %%xmm0, %%xmm7\n" + //27: + " addps %%xmm1, %%xmm0\n" + " addps %%xmm2, %%xmm1\n" + " addps %%xmm3, %%xmm2\n" + " addps %%xmm4, %%xmm3\n" + " addps %%xmm5, %%xmm4\n" + " addps %%xmm6, %%xmm5\n" + " addps %%xmm7, %%xmm6\n" + " addps %%xmm0, %%xmm7\n" + //28: + " addps %%xmm1, %%xmm0\n" + " addps %%xmm2, %%xmm1\n" + " addps %%xmm3, %%xmm2\n" + " addps %%xmm4, %%xmm3\n" + " addps %%xmm5, %%xmm4\n" + " addps %%xmm6, %%xmm5\n" + " addps %%xmm7, %%xmm6\n" + " addps %%xmm0, %%xmm7\n" + //29: + " addps %%xmm1, %%xmm0\n" + " addps %%xmm2, %%xmm1\n" + " addps %%xmm3, %%xmm2\n" + " addps %%xmm4, %%xmm3\n" + " addps %%xmm5, %%xmm4\n" + " addps %%xmm6, %%xmm5\n" + " addps %%xmm7, %%xmm6\n" + " addps %%xmm0, %%xmm7\n" + //30: + " addps %%xmm1, %%xmm0\n" + " addps %%xmm2, %%xmm1\n" + " addps %%xmm3, %%xmm2\n" + " addps %%xmm4, %%xmm3\n" + " addps %%xmm5, %%xmm4\n" + " addps %%xmm6, %%xmm5\n" + " addps %%xmm7, %%xmm6\n" + " addps %%xmm0, %%xmm7\n" + //31: + " addps %%xmm1, %%xmm0\n" + " addps %%xmm2, %%xmm1\n" + " addps %%xmm3, %%xmm2\n" + " addps %%xmm4, %%xmm3\n" + " addps %%xmm5, %%xmm4\n" + " addps %%xmm6, %%xmm5\n" + " addps %%xmm7, %%xmm6\n" + " addps %%xmm0, %%xmm7\n" + + " dec %%eax\n" + /* "jnz .bsLoop\n" */ + " jnz 1b\n" + ::"a"(cycles) + ); +#endif +#else +# ifdef COMPILER_MICROSOFT + __asm { + mov eax, cycles + xorps xmm0, xmm0 + xorps xmm1, xmm1 + xorps xmm2, xmm2 + xorps xmm3, xmm3 + xorps xmm4, xmm4 + xorps xmm5, xmm5 + xorps xmm6, xmm6 + xorps xmm7, xmm7 + //-- + align 16 +bsLoop: + // 0: + addps xmm0, xmm1 + addps xmm1, xmm2 + addps xmm2, xmm3 + addps xmm3, xmm4 + addps xmm4, xmm5 + addps xmm5, xmm6 + addps xmm6, xmm7 + addps xmm7, xmm0 + // 1: + addps xmm0, xmm1 + addps xmm1, xmm2 + addps xmm2, xmm3 + addps xmm3, xmm4 + addps xmm4, xmm5 + addps xmm5, xmm6 + addps xmm6, xmm7 + addps xmm7, xmm0 + // 2: + addps xmm0, xmm1 + addps xmm1, xmm2 + addps xmm2, xmm3 + addps xmm3, xmm4 + addps xmm4, xmm5 + addps xmm5, xmm6 + addps xmm6, xmm7 + addps xmm7, xmm0 + // 3: + addps xmm0, xmm1 + addps xmm1, xmm2 + addps xmm2, xmm3 + addps xmm3, xmm4 + addps xmm4, xmm5 + addps xmm5, xmm6 + addps xmm6, xmm7 + addps xmm7, xmm0 + // 4: + addps xmm0, xmm1 + addps xmm1, xmm2 + addps xmm2, xmm3 + addps xmm3, xmm4 + addps xmm4, xmm5 + addps xmm5, xmm6 + addps xmm6, xmm7 + addps xmm7, xmm0 + // 5: + addps xmm0, xmm1 + addps xmm1, xmm2 + addps xmm2, xmm3 + addps xmm3, xmm4 + addps xmm4, xmm5 + addps xmm5, xmm6 + addps xmm6, xmm7 + addps xmm7, xmm0 + // 6: + addps xmm0, xmm1 + addps xmm1, xmm2 + addps xmm2, xmm3 + addps xmm3, xmm4 + addps xmm4, xmm5 + addps xmm5, xmm6 + addps xmm6, xmm7 + addps xmm7, xmm0 + // 7: + addps xmm0, xmm1 + addps xmm1, xmm2 + addps xmm2, xmm3 + addps xmm3, xmm4 + addps xmm4, xmm5 + addps xmm5, xmm6 + addps xmm6, xmm7 + addps xmm7, xmm0 + // 8: + addps xmm0, xmm1 + addps xmm1, xmm2 + addps xmm2, xmm3 + addps xmm3, xmm4 + addps xmm4, xmm5 + addps xmm5, xmm6 + addps xmm6, xmm7 + addps xmm7, xmm0 + // 9: + addps xmm0, xmm1 + addps xmm1, xmm2 + addps xmm2, xmm3 + addps xmm3, xmm4 + addps xmm4, xmm5 + addps xmm5, xmm6 + addps xmm6, xmm7 + addps xmm7, xmm0 + // 10: + addps xmm0, xmm1 + addps xmm1, xmm2 + addps xmm2, xmm3 + addps xmm3, xmm4 + addps xmm4, xmm5 + addps xmm5, xmm6 + addps xmm6, xmm7 + addps xmm7, xmm0 + // 11: + addps xmm0, xmm1 + addps xmm1, xmm2 + addps xmm2, xmm3 + addps xmm3, xmm4 + addps xmm4, xmm5 + addps xmm5, xmm6 + addps xmm6, xmm7 + addps xmm7, xmm0 + // 12: + addps xmm0, xmm1 + addps xmm1, xmm2 + addps xmm2, xmm3 + addps xmm3, xmm4 + addps xmm4, xmm5 + addps xmm5, xmm6 + addps xmm6, xmm7 + addps xmm7, xmm0 + // 13: + addps xmm0, xmm1 + addps xmm1, xmm2 + addps xmm2, xmm3 + addps xmm3, xmm4 + addps xmm4, xmm5 + addps xmm5, xmm6 + addps xmm6, xmm7 + addps xmm7, xmm0 + // 14: + addps xmm0, xmm1 + addps xmm1, xmm2 + addps xmm2, xmm3 + addps xmm3, xmm4 + addps xmm4, xmm5 + addps xmm5, xmm6 + addps xmm6, xmm7 + addps xmm7, xmm0 + // 15: + addps xmm0, xmm1 + addps xmm1, xmm2 + addps xmm2, xmm3 + addps xmm3, xmm4 + addps xmm4, xmm5 + addps xmm5, xmm6 + addps xmm6, xmm7 + addps xmm7, xmm0 + // 16: + addps xmm0, xmm1 + addps xmm1, xmm2 + addps xmm2, xmm3 + addps xmm3, xmm4 + addps xmm4, xmm5 + addps xmm5, xmm6 + addps xmm6, xmm7 + addps xmm7, xmm0 + // 17: + addps xmm0, xmm1 + addps xmm1, xmm2 + addps xmm2, xmm3 + addps xmm3, xmm4 + addps xmm4, xmm5 + addps xmm5, xmm6 + addps xmm6, xmm7 + addps xmm7, xmm0 + // 18: + addps xmm0, xmm1 + addps xmm1, xmm2 + addps xmm2, xmm3 + addps xmm3, xmm4 + addps xmm4, xmm5 + addps xmm5, xmm6 + addps xmm6, xmm7 + addps xmm7, xmm0 + // 19: + addps xmm0, xmm1 + addps xmm1, xmm2 + addps xmm2, xmm3 + addps xmm3, xmm4 + addps xmm4, xmm5 + addps xmm5, xmm6 + addps xmm6, xmm7 + addps xmm7, xmm0 + // 20: + addps xmm0, xmm1 + addps xmm1, xmm2 + addps xmm2, xmm3 + addps xmm3, xmm4 + addps xmm4, xmm5 + addps xmm5, xmm6 + addps xmm6, xmm7 + addps xmm7, xmm0 + // 21: + addps xmm0, xmm1 + addps xmm1, xmm2 + addps xmm2, xmm3 + addps xmm3, xmm4 + addps xmm4, xmm5 + addps xmm5, xmm6 + addps xmm6, xmm7 + addps xmm7, xmm0 + // 22: + addps xmm0, xmm1 + addps xmm1, xmm2 + addps xmm2, xmm3 + addps xmm3, xmm4 + addps xmm4, xmm5 + addps xmm5, xmm6 + addps xmm6, xmm7 + addps xmm7, xmm0 + // 23: + addps xmm0, xmm1 + addps xmm1, xmm2 + addps xmm2, xmm3 + addps xmm3, xmm4 + addps xmm4, xmm5 + addps xmm5, xmm6 + addps xmm6, xmm7 + addps xmm7, xmm0 + // 24: + addps xmm0, xmm1 + addps xmm1, xmm2 + addps xmm2, xmm3 + addps xmm3, xmm4 + addps xmm4, xmm5 + addps xmm5, xmm6 + addps xmm6, xmm7 + addps xmm7, xmm0 + // 25: + addps xmm0, xmm1 + addps xmm1, xmm2 + addps xmm2, xmm3 + addps xmm3, xmm4 + addps xmm4, xmm5 + addps xmm5, xmm6 + addps xmm6, xmm7 + addps xmm7, xmm0 + // 26: + addps xmm0, xmm1 + addps xmm1, xmm2 + addps xmm2, xmm3 + addps xmm3, xmm4 + addps xmm4, xmm5 + addps xmm5, xmm6 + addps xmm6, xmm7 + addps xmm7, xmm0 + // 27: + addps xmm0, xmm1 + addps xmm1, xmm2 + addps xmm2, xmm3 + addps xmm3, xmm4 + addps xmm4, xmm5 + addps xmm5, xmm6 + addps xmm6, xmm7 + addps xmm7, xmm0 + // 28: + addps xmm0, xmm1 + addps xmm1, xmm2 + addps xmm2, xmm3 + addps xmm3, xmm4 + addps xmm4, xmm5 + addps xmm5, xmm6 + addps xmm6, xmm7 + addps xmm7, xmm0 + // 29: + addps xmm0, xmm1 + addps xmm1, xmm2 + addps xmm2, xmm3 + addps xmm3, xmm4 + addps xmm4, xmm5 + addps xmm5, xmm6 + addps xmm6, xmm7 + addps xmm7, xmm0 + // 30: + addps xmm0, xmm1 + addps xmm1, xmm2 + addps xmm2, xmm3 + addps xmm3, xmm4 + addps xmm4, xmm5 + addps xmm5, xmm6 + addps xmm6, xmm7 + addps xmm7, xmm0 + // 31: + addps xmm0, xmm1 + addps xmm1, xmm2 + addps xmm2, xmm3 + addps xmm3, xmm4 + addps xmm4, xmm5 + addps xmm5, xmm6 + addps xmm6, xmm7 + addps xmm7, xmm0 + //---------------------- + dec eax + jnz bsLoop + } +# else +# error "Unsupported compiler" +# endif /* COMPILER_MICROSOFT */ +#endif /* COMPILER_GCC */ +} #endif /* INLINE_ASSEMBLY_SUPPORTED */ \ No newline at end of file diff --git a/libcpuid/asm-bits.h b/libcpuid/asm-bits.h index af8ed7a..b2fca8f 100644 --- a/libcpuid/asm-bits.h +++ b/libcpuid/asm-bits.h @@ -1,69 +1,69 @@ -/* - * Copyright 2008 Veselin Georgiev, - * anrieffNOSPAM @ mgail_DOT.com (convert to gmail) - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ -#ifndef __ASM_BITS_H__ -#define __ASM_BITS_H__ -#include "libcpuid.h" - -/* Determine Compiler: */ -#if defined(_MSC_VER) -#if !defined(COMPILER_MICROSOFT) -# define COMPILER_MICROSOFT -#endif -#elif defined(__GNUC__) -#if !defined(COMPILER_GCC) -# define COMPILER_GCC -#endif -#elif defined(__clang__) -#if !defined(COMPILER_CLANG) -# define COMPILER_CLANG -#endif -#endif - -/* Determine Platform */ -#if defined(__x86_64__) || defined(_M_AMD64) -#if !defined(PLATFORM_X64) -# define PLATFORM_X64 -#endif -#elif defined(__i386__) || defined(_M_IX86) -#if !defined(PLATFORM_X86) -# define PLATFORM_X86 -#endif -#elif defined(__ARMEL__) -#if !defined(PLATFORM_ARM) -# define PLATFORM_ARM -#endif -#endif - -/* Under Windows/AMD64 with MSVC, inline assembly isn't supported */ -#if ((defined(COMPILER_GCC) || defined(COMPILER_CLANG))) && (defined(PLATFORM_X64) || defined(PLATFORM_X86) || defined(PLATFORM_ARM)) -# define INLINE_ASM_SUPPORTED -#endif - -int cpuid_exists_by_eflags(void); -void exec_cpuid(uint32_t *regs); -void busy_sse_loop(int cycles); - -#endif /* __ASM_BITS_H__ */ +/* + * Copyright 2008 Veselin Georgiev, + * anrieffNOSPAM @ mgail_DOT.com (convert to gmail) + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef __ASM_BITS_H__ +#define __ASM_BITS_H__ +#include "libcpuid.h" + +/* Determine Compiler: */ +#if defined(_MSC_VER) +#if !defined(COMPILER_MICROSOFT) +# define COMPILER_MICROSOFT +#endif +#elif defined(__GNUC__) +#if !defined(COMPILER_GCC) +# define COMPILER_GCC +#endif +#elif defined(__clang__) +#if !defined(COMPILER_CLANG) +# define COMPILER_CLANG +#endif +#endif + +/* Determine Platform */ +#if defined(__x86_64__) || defined(_M_AMD64) +#if !defined(PLATFORM_X64) +# define PLATFORM_X64 +#endif +#elif defined(__i386__) || defined(_M_IX86) +#if !defined(PLATFORM_X86) +# define PLATFORM_X86 +#endif +#elif defined(__ARMEL__) +#if !defined(PLATFORM_ARM) +# define PLATFORM_ARM +#endif +#endif + +/* Under Windows/AMD64 with MSVC, inline assembly isn't supported */ +#if ((defined(COMPILER_GCC) || defined(COMPILER_CLANG))) && (defined(PLATFORM_X64) || defined(PLATFORM_X86) || defined(PLATFORM_ARM)) +# define INLINE_ASM_SUPPORTED +#endif + +int cpuid_exists_by_eflags(void); +void exec_cpuid(uint32_t *regs); +void busy_sse_loop(int cycles); + +#endif /* __ASM_BITS_H__ */