mirror of
https://github.com/anrieff/libcpuid
synced 2024-11-10 22:59:13 +00:00
Fixed issue #18: Win64 issues
- move the INLINE_ASM_SUPPORTED guards outside the body of exec_cpuid, as suggested by Genoil; - copy the asm code of busy_sse_loop to masm-x64.asm. Some fixup was required, because the microsoft calling convention doesn't expect xmm6 & xmm7 to be clobbered in functions. Confirmed that --clock-ic from cpuid_tool works with the resulting library.
This commit is contained in:
parent
2722a10738
commit
2834683640
2 changed files with 319 additions and 2 deletions
|
@ -75,13 +75,13 @@ int cpuid_exists_by_eflags(void)
|
||||||
#endif /* PLATFORM_X86 */
|
#endif /* PLATFORM_X86 */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef INLINE_ASM_SUPPORTED
|
||||||
/*
|
/*
|
||||||
* with MSVC/AMD64, the exec_cpuid() and cpu_rdtsc() functions
|
* with MSVC/AMD64, the exec_cpuid() and cpu_rdtsc() functions
|
||||||
* are implemented in separate .asm files. Otherwise, use inline assembly
|
* are implemented in separate .asm files. Otherwise, use inline assembly
|
||||||
*/
|
*/
|
||||||
void exec_cpuid(uint32_t *regs)
|
void exec_cpuid(uint32_t *regs)
|
||||||
{
|
{
|
||||||
#ifdef INLINE_ASM_SUPPORTED
|
|
||||||
#ifdef COMPILER_GCC
|
#ifdef COMPILER_GCC
|
||||||
# ifdef PLATFORM_X64
|
# ifdef PLATFORM_X64
|
||||||
__asm __volatile(
|
__asm __volatile(
|
||||||
|
@ -166,8 +166,8 @@ void exec_cpuid(uint32_t *regs)
|
||||||
# error "Unsupported compiler"
|
# error "Unsupported compiler"
|
||||||
# endif /* COMPILER_MICROSOFT */
|
# endif /* COMPILER_MICROSOFT */
|
||||||
#endif
|
#endif
|
||||||
#endif /* INLINE_ASSEMBLY_SUPPORTED */
|
|
||||||
}
|
}
|
||||||
|
#endif /* INLINE_ASSEMBLY_SUPPORTED */
|
||||||
|
|
||||||
#ifdef INLINE_ASM_SUPPORTED
|
#ifdef INLINE_ASM_SUPPORTED
|
||||||
void cpu_rdtsc(uint64_t* result)
|
void cpu_rdtsc(uint64_t* result)
|
||||||
|
|
|
@ -39,4 +39,321 @@ cpu_rdtsc Proc
|
||||||
ret
|
ret
|
||||||
cpu_rdtsc endp
|
cpu_rdtsc endp
|
||||||
|
|
||||||
|
; procedure busy_sse_loop
|
||||||
|
; Signature: void busy_sse_loop(int cycles)
|
||||||
|
busy_sse_loop Proc
|
||||||
|
; save xmm6 & xmm7 into the shadow area, as Visual C++ 2008
|
||||||
|
; expects that we don't touch them:
|
||||||
|
movups [rsp + 8], xmm6
|
||||||
|
movups [rsp + 24], xmm7
|
||||||
|
|
||||||
|
xorps xmm0, xmm0
|
||||||
|
xorps xmm1, xmm1
|
||||||
|
xorps xmm2, xmm2
|
||||||
|
xorps xmm3, xmm3
|
||||||
|
xorps xmm4, xmm4
|
||||||
|
xorps xmm5, xmm5
|
||||||
|
xorps xmm6, xmm6
|
||||||
|
xorps xmm7, xmm7
|
||||||
|
; --
|
||||||
|
align 16
|
||||||
|
bsLoop:
|
||||||
|
; 0:
|
||||||
|
addps xmm0, xmm1
|
||||||
|
addps xmm1, xmm2
|
||||||
|
addps xmm2, xmm3
|
||||||
|
addps xmm3, xmm4
|
||||||
|
addps xmm4, xmm5
|
||||||
|
addps xmm5, xmm6
|
||||||
|
addps xmm6, xmm7
|
||||||
|
addps xmm7, xmm0
|
||||||
|
; 1:
|
||||||
|
addps xmm0, xmm1
|
||||||
|
addps xmm1, xmm2
|
||||||
|
addps xmm2, xmm3
|
||||||
|
addps xmm3, xmm4
|
||||||
|
addps xmm4, xmm5
|
||||||
|
addps xmm5, xmm6
|
||||||
|
addps xmm6, xmm7
|
||||||
|
addps xmm7, xmm0
|
||||||
|
; 2:
|
||||||
|
addps xmm0, xmm1
|
||||||
|
addps xmm1, xmm2
|
||||||
|
addps xmm2, xmm3
|
||||||
|
addps xmm3, xmm4
|
||||||
|
addps xmm4, xmm5
|
||||||
|
addps xmm5, xmm6
|
||||||
|
addps xmm6, xmm7
|
||||||
|
addps xmm7, xmm0
|
||||||
|
; 3:
|
||||||
|
addps xmm0, xmm1
|
||||||
|
addps xmm1, xmm2
|
||||||
|
addps xmm2, xmm3
|
||||||
|
addps xmm3, xmm4
|
||||||
|
addps xmm4, xmm5
|
||||||
|
addps xmm5, xmm6
|
||||||
|
addps xmm6, xmm7
|
||||||
|
addps xmm7, xmm0
|
||||||
|
; 4:
|
||||||
|
addps xmm0, xmm1
|
||||||
|
addps xmm1, xmm2
|
||||||
|
addps xmm2, xmm3
|
||||||
|
addps xmm3, xmm4
|
||||||
|
addps xmm4, xmm5
|
||||||
|
addps xmm5, xmm6
|
||||||
|
addps xmm6, xmm7
|
||||||
|
addps xmm7, xmm0
|
||||||
|
; 5:
|
||||||
|
addps xmm0, xmm1
|
||||||
|
addps xmm1, xmm2
|
||||||
|
addps xmm2, xmm3
|
||||||
|
addps xmm3, xmm4
|
||||||
|
addps xmm4, xmm5
|
||||||
|
addps xmm5, xmm6
|
||||||
|
addps xmm6, xmm7
|
||||||
|
addps xmm7, xmm0
|
||||||
|
; 6:
|
||||||
|
addps xmm0, xmm1
|
||||||
|
addps xmm1, xmm2
|
||||||
|
addps xmm2, xmm3
|
||||||
|
addps xmm3, xmm4
|
||||||
|
addps xmm4, xmm5
|
||||||
|
addps xmm5, xmm6
|
||||||
|
addps xmm6, xmm7
|
||||||
|
addps xmm7, xmm0
|
||||||
|
; 7:
|
||||||
|
addps xmm0, xmm1
|
||||||
|
addps xmm1, xmm2
|
||||||
|
addps xmm2, xmm3
|
||||||
|
addps xmm3, xmm4
|
||||||
|
addps xmm4, xmm5
|
||||||
|
addps xmm5, xmm6
|
||||||
|
addps xmm6, xmm7
|
||||||
|
addps xmm7, xmm0
|
||||||
|
; 8:
|
||||||
|
addps xmm0, xmm1
|
||||||
|
addps xmm1, xmm2
|
||||||
|
addps xmm2, xmm3
|
||||||
|
addps xmm3, xmm4
|
||||||
|
addps xmm4, xmm5
|
||||||
|
addps xmm5, xmm6
|
||||||
|
addps xmm6, xmm7
|
||||||
|
addps xmm7, xmm0
|
||||||
|
; 9:
|
||||||
|
addps xmm0, xmm1
|
||||||
|
addps xmm1, xmm2
|
||||||
|
addps xmm2, xmm3
|
||||||
|
addps xmm3, xmm4
|
||||||
|
addps xmm4, xmm5
|
||||||
|
addps xmm5, xmm6
|
||||||
|
addps xmm6, xmm7
|
||||||
|
addps xmm7, xmm0
|
||||||
|
; 10:
|
||||||
|
addps xmm0, xmm1
|
||||||
|
addps xmm1, xmm2
|
||||||
|
addps xmm2, xmm3
|
||||||
|
addps xmm3, xmm4
|
||||||
|
addps xmm4, xmm5
|
||||||
|
addps xmm5, xmm6
|
||||||
|
addps xmm6, xmm7
|
||||||
|
addps xmm7, xmm0
|
||||||
|
; 11:
|
||||||
|
addps xmm0, xmm1
|
||||||
|
addps xmm1, xmm2
|
||||||
|
addps xmm2, xmm3
|
||||||
|
addps xmm3, xmm4
|
||||||
|
addps xmm4, xmm5
|
||||||
|
addps xmm5, xmm6
|
||||||
|
addps xmm6, xmm7
|
||||||
|
addps xmm7, xmm0
|
||||||
|
; 12:
|
||||||
|
addps xmm0, xmm1
|
||||||
|
addps xmm1, xmm2
|
||||||
|
addps xmm2, xmm3
|
||||||
|
addps xmm3, xmm4
|
||||||
|
addps xmm4, xmm5
|
||||||
|
addps xmm5, xmm6
|
||||||
|
addps xmm6, xmm7
|
||||||
|
addps xmm7, xmm0
|
||||||
|
; 13:
|
||||||
|
addps xmm0, xmm1
|
||||||
|
addps xmm1, xmm2
|
||||||
|
addps xmm2, xmm3
|
||||||
|
addps xmm3, xmm4
|
||||||
|
addps xmm4, xmm5
|
||||||
|
addps xmm5, xmm6
|
||||||
|
addps xmm6, xmm7
|
||||||
|
addps xmm7, xmm0
|
||||||
|
; 14:
|
||||||
|
addps xmm0, xmm1
|
||||||
|
addps xmm1, xmm2
|
||||||
|
addps xmm2, xmm3
|
||||||
|
addps xmm3, xmm4
|
||||||
|
addps xmm4, xmm5
|
||||||
|
addps xmm5, xmm6
|
||||||
|
addps xmm6, xmm7
|
||||||
|
addps xmm7, xmm0
|
||||||
|
; 15:
|
||||||
|
addps xmm0, xmm1
|
||||||
|
addps xmm1, xmm2
|
||||||
|
addps xmm2, xmm3
|
||||||
|
addps xmm3, xmm4
|
||||||
|
addps xmm4, xmm5
|
||||||
|
addps xmm5, xmm6
|
||||||
|
addps xmm6, xmm7
|
||||||
|
addps xmm7, xmm0
|
||||||
|
; 16:
|
||||||
|
addps xmm0, xmm1
|
||||||
|
addps xmm1, xmm2
|
||||||
|
addps xmm2, xmm3
|
||||||
|
addps xmm3, xmm4
|
||||||
|
addps xmm4, xmm5
|
||||||
|
addps xmm5, xmm6
|
||||||
|
addps xmm6, xmm7
|
||||||
|
addps xmm7, xmm0
|
||||||
|
; 17:
|
||||||
|
addps xmm0, xmm1
|
||||||
|
addps xmm1, xmm2
|
||||||
|
addps xmm2, xmm3
|
||||||
|
addps xmm3, xmm4
|
||||||
|
addps xmm4, xmm5
|
||||||
|
addps xmm5, xmm6
|
||||||
|
addps xmm6, xmm7
|
||||||
|
addps xmm7, xmm0
|
||||||
|
; 18:
|
||||||
|
addps xmm0, xmm1
|
||||||
|
addps xmm1, xmm2
|
||||||
|
addps xmm2, xmm3
|
||||||
|
addps xmm3, xmm4
|
||||||
|
addps xmm4, xmm5
|
||||||
|
addps xmm5, xmm6
|
||||||
|
addps xmm6, xmm7
|
||||||
|
addps xmm7, xmm0
|
||||||
|
; 19:
|
||||||
|
addps xmm0, xmm1
|
||||||
|
addps xmm1, xmm2
|
||||||
|
addps xmm2, xmm3
|
||||||
|
addps xmm3, xmm4
|
||||||
|
addps xmm4, xmm5
|
||||||
|
addps xmm5, xmm6
|
||||||
|
addps xmm6, xmm7
|
||||||
|
addps xmm7, xmm0
|
||||||
|
; 20:
|
||||||
|
addps xmm0, xmm1
|
||||||
|
addps xmm1, xmm2
|
||||||
|
addps xmm2, xmm3
|
||||||
|
addps xmm3, xmm4
|
||||||
|
addps xmm4, xmm5
|
||||||
|
addps xmm5, xmm6
|
||||||
|
addps xmm6, xmm7
|
||||||
|
addps xmm7, xmm0
|
||||||
|
; 21:
|
||||||
|
addps xmm0, xmm1
|
||||||
|
addps xmm1, xmm2
|
||||||
|
addps xmm2, xmm3
|
||||||
|
addps xmm3, xmm4
|
||||||
|
addps xmm4, xmm5
|
||||||
|
addps xmm5, xmm6
|
||||||
|
addps xmm6, xmm7
|
||||||
|
addps xmm7, xmm0
|
||||||
|
; 22:
|
||||||
|
addps xmm0, xmm1
|
||||||
|
addps xmm1, xmm2
|
||||||
|
addps xmm2, xmm3
|
||||||
|
addps xmm3, xmm4
|
||||||
|
addps xmm4, xmm5
|
||||||
|
addps xmm5, xmm6
|
||||||
|
addps xmm6, xmm7
|
||||||
|
addps xmm7, xmm0
|
||||||
|
; 23:
|
||||||
|
addps xmm0, xmm1
|
||||||
|
addps xmm1, xmm2
|
||||||
|
addps xmm2, xmm3
|
||||||
|
addps xmm3, xmm4
|
||||||
|
addps xmm4, xmm5
|
||||||
|
addps xmm5, xmm6
|
||||||
|
addps xmm6, xmm7
|
||||||
|
addps xmm7, xmm0
|
||||||
|
; 24:
|
||||||
|
addps xmm0, xmm1
|
||||||
|
addps xmm1, xmm2
|
||||||
|
addps xmm2, xmm3
|
||||||
|
addps xmm3, xmm4
|
||||||
|
addps xmm4, xmm5
|
||||||
|
addps xmm5, xmm6
|
||||||
|
addps xmm6, xmm7
|
||||||
|
addps xmm7, xmm0
|
||||||
|
; 25:
|
||||||
|
addps xmm0, xmm1
|
||||||
|
addps xmm1, xmm2
|
||||||
|
addps xmm2, xmm3
|
||||||
|
addps xmm3, xmm4
|
||||||
|
addps xmm4, xmm5
|
||||||
|
addps xmm5, xmm6
|
||||||
|
addps xmm6, xmm7
|
||||||
|
addps xmm7, xmm0
|
||||||
|
; 26:
|
||||||
|
addps xmm0, xmm1
|
||||||
|
addps xmm1, xmm2
|
||||||
|
addps xmm2, xmm3
|
||||||
|
addps xmm3, xmm4
|
||||||
|
addps xmm4, xmm5
|
||||||
|
addps xmm5, xmm6
|
||||||
|
addps xmm6, xmm7
|
||||||
|
addps xmm7, xmm0
|
||||||
|
; 27:
|
||||||
|
addps xmm0, xmm1
|
||||||
|
addps xmm1, xmm2
|
||||||
|
addps xmm2, xmm3
|
||||||
|
addps xmm3, xmm4
|
||||||
|
addps xmm4, xmm5
|
||||||
|
addps xmm5, xmm6
|
||||||
|
addps xmm6, xmm7
|
||||||
|
addps xmm7, xmm0
|
||||||
|
; 28:
|
||||||
|
addps xmm0, xmm1
|
||||||
|
addps xmm1, xmm2
|
||||||
|
addps xmm2, xmm3
|
||||||
|
addps xmm3, xmm4
|
||||||
|
addps xmm4, xmm5
|
||||||
|
addps xmm5, xmm6
|
||||||
|
addps xmm6, xmm7
|
||||||
|
addps xmm7, xmm0
|
||||||
|
; 29:
|
||||||
|
addps xmm0, xmm1
|
||||||
|
addps xmm1, xmm2
|
||||||
|
addps xmm2, xmm3
|
||||||
|
addps xmm3, xmm4
|
||||||
|
addps xmm4, xmm5
|
||||||
|
addps xmm5, xmm6
|
||||||
|
addps xmm6, xmm7
|
||||||
|
addps xmm7, xmm0
|
||||||
|
; 30:
|
||||||
|
addps xmm0, xmm1
|
||||||
|
addps xmm1, xmm2
|
||||||
|
addps xmm2, xmm3
|
||||||
|
addps xmm3, xmm4
|
||||||
|
addps xmm4, xmm5
|
||||||
|
addps xmm5, xmm6
|
||||||
|
addps xmm6, xmm7
|
||||||
|
addps xmm7, xmm0
|
||||||
|
; 31:
|
||||||
|
addps xmm0, xmm1
|
||||||
|
addps xmm1, xmm2
|
||||||
|
addps xmm2, xmm3
|
||||||
|
addps xmm3, xmm4
|
||||||
|
addps xmm4, xmm5
|
||||||
|
addps xmm5, xmm6
|
||||||
|
addps xmm6, xmm7
|
||||||
|
addps xmm7, xmm0
|
||||||
|
; ----------------------
|
||||||
|
dec ecx
|
||||||
|
jnz bsLoop
|
||||||
|
|
||||||
|
; restore xmm6 & xmm7:
|
||||||
|
movups xmm6, [rsp + 8]
|
||||||
|
movups xmm7, [rsp + 24]
|
||||||
|
ret
|
||||||
|
busy_sse_loop endp
|
||||||
|
|
||||||
END
|
END
|
||||||
|
|
Loading…
Reference in a new issue