mirror of
https://github.com/anrieff/libcpuid
synced 2024-12-16 16:35:45 +00:00
Fix CPU speed detection for Bulldozer and related cores.
It seems that our SSE-based speed test is 1 IPC (instructions per clock) on all current CPUs, and 1.4 IPC on the Bulldozer, which leads to its result being 40% too high. Correct that in the function.
This commit is contained in:
parent
f883e2b592
commit
f5ab18506f
2 changed files with 23 additions and 4 deletions
|
@ -678,6 +678,15 @@ int cpu_clock_measure(int millis, int quad_check);
|
|||
*
|
||||
* Recommended values - millis = 50, runs = 4. For more robustness,
|
||||
* increase the number of runs.
|
||||
*
|
||||
* NOTE: on Bulldozer and later CPUs, the busy-wait cycle runs at 1.4 IPC, thus
|
||||
* the results are skewed. This is corrected internally by dividing the resulting
|
||||
* value by 1.4.
|
||||
* However, this only occurs if the thread is executed on a single CMT
|
||||
* module - if there are other threads competing for resources, the results are
|
||||
* unpredictable. Make sure you run cpu_clock_by_ic() on a CPU that is free from
|
||||
* competing threads, or if there are such threads, they shouldn't exceed the
|
||||
* number of modules. On a Bulldozer X8, that means 4 threads.
|
||||
*
|
||||
* @returns the CPU clock frequency in MHz (within some measurement error
|
||||
* margin). If SSE is not supported, the result is -1. If the input parameters
|
||||
|
|
|
@ -231,18 +231,28 @@ int cpu_clock_by_ic(int millis, int runs)
|
|||
int max_value = 0, cur_value, i, ri, cycles_inner, cycles_outer, c;
|
||||
struct cpu_id_t* id;
|
||||
uint64_t t0, t1, tl, hz;
|
||||
int sse_multiplier = 1;
|
||||
int multiplier_numerator = 1, multiplier_denom = 1;
|
||||
if (millis <= 0 || runs <= 0) return -2;
|
||||
id = get_cached_cpuid();
|
||||
// if there aren't SSE instructions - we can't run the test at all
|
||||
if (!id || !id->flags[CPU_FEATURE_SSE]) return -1;
|
||||
//
|
||||
if (id->sse_size < 128) {
|
||||
debugf(1, "SSE execution path is 64-bit\n");
|
||||
sse_multiplier = 2;
|
||||
// on a CPU with half SSE unit length, SSE instructions execute at 0.5 IPC;
|
||||
// the resulting value must be multiplied by 2:
|
||||
multiplier_numerator = 2;
|
||||
} else {
|
||||
debugf(1, "SSE execution path is 128-bit\n");
|
||||
}
|
||||
//
|
||||
// on a Bulldozer or later CPU, SSE instructions execute at 1.4 IPC, handle that as well:
|
||||
if (id->vendor == VENDOR_AMD && id->ext_family >= 21) {
|
||||
debugf(1, "cpu_clock_by_ic: Bulldozer (or later) detected, dividing result by 1.4\n");
|
||||
multiplier_numerator = 5;
|
||||
multiplier_denom = 7; // multiply by 5/7, to divide by 1.4
|
||||
}
|
||||
//
|
||||
tl = millis * 125; // (*1000 / 8)
|
||||
cycles_inner = 128;
|
||||
cycles_outer = 1;
|
||||
|
@ -267,8 +277,8 @@ int cpu_clock_by_ic(int millis, int runs)
|
|||
// cpu_Hz = cycles_inner * cycles_outer * 256 / (t1 - t0) * 1000000
|
||||
debugf(2, "c = %d, td = %llu\n", c, t1 - t0);
|
||||
hz = ((uint64_t) cycles_inner * (uint64_t) 256 + 12) *
|
||||
(uint64_t) cycles_outer * (uint64_t) sse_multiplier * (uint64_t) c * (uint64_t) 1000000
|
||||
/ (t1 - t0);
|
||||
(uint64_t) cycles_outer * (uint64_t) multiplier_numerator * (uint64_t) c * (uint64_t) 1000000
|
||||
/ ((t1 - t0) * (uint64_t) multiplier_denom);
|
||||
cur_value = (int) (hz / 1000000);
|
||||
if (cur_value > max_value) max_value = cur_value;
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue