1
0
Fork 0
mirror of https://github.com/anrieff/libcpuid synced 2024-12-16 16:35:45 +00:00

Fixed issue #18: Win64 issues

- move the INLINE_ASM_SUPPORTED guards outside the body of exec_cpuid, as
  suggested by Genoil;
- copy the asm code of busy_sse_loop to masm-x64.asm. Some fixup was
  required, because the microsoft calling convention doesn't expect
  xmm6 & xmm7 to be clobbered in functions.

Confirmed that --clock-ic from cpuid_tool works with the resulting library.
This commit is contained in:
Veselin Georgiev 2015-05-07 17:16:46 +02:00
parent 2722a10738
commit 2834683640
2 changed files with 319 additions and 2 deletions

View file

@ -75,13 +75,13 @@ int cpuid_exists_by_eflags(void)
#endif /* PLATFORM_X86 */
}
#ifdef INLINE_ASM_SUPPORTED
/*
* with MSVC/AMD64, the exec_cpuid() and cpu_rdtsc() functions
* are implemented in separate .asm files. Otherwise, use inline assembly
*/
void exec_cpuid(uint32_t *regs)
{
#ifdef INLINE_ASM_SUPPORTED
#ifdef COMPILER_GCC
# ifdef PLATFORM_X64
__asm __volatile(
@ -166,8 +166,8 @@ void exec_cpuid(uint32_t *regs)
# error "Unsupported compiler"
# endif /* COMPILER_MICROSOFT */
#endif
#endif /* INLINE_ASSEMBLY_SUPPORTED */
}
#endif /* INLINE_ASSEMBLY_SUPPORTED */
#ifdef INLINE_ASM_SUPPORTED
void cpu_rdtsc(uint64_t* result)

View file

@ -39,4 +39,321 @@ cpu_rdtsc Proc
ret
cpu_rdtsc endp
; procedure busy_sse_loop
; Signature: void busy_sse_loop(int cycles)
busy_sse_loop Proc
; save xmm6 & xmm7 into the shadow area, as Visual C++ 2008
; expects that we don't touch them:
movups [rsp + 8], xmm6
movups [rsp + 24], xmm7
xorps xmm0, xmm0
xorps xmm1, xmm1
xorps xmm2, xmm2
xorps xmm3, xmm3
xorps xmm4, xmm4
xorps xmm5, xmm5
xorps xmm6, xmm6
xorps xmm7, xmm7
; --
align 16
bsLoop:
; 0:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
; 1:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
; 2:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
; 3:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
; 4:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
; 5:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
; 6:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
; 7:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
; 8:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
; 9:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
; 10:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
; 11:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
; 12:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
; 13:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
; 14:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
; 15:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
; 16:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
; 17:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
; 18:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
; 19:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
; 20:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
; 21:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
; 22:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
; 23:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
; 24:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
; 25:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
; 26:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
; 27:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
; 28:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
; 29:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
; 30:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
; 31:
addps xmm0, xmm1
addps xmm1, xmm2
addps xmm2, xmm3
addps xmm3, xmm4
addps xmm4, xmm5
addps xmm5, xmm6
addps xmm6, xmm7
addps xmm7, xmm0
; ----------------------
dec ecx
jnz bsLoop
; restore xmm6 & xmm7:
movups xmm6, [rsp + 8]
movups xmm7, [rsp + 24]
ret
busy_sse_loop endp
END