1
0
Fork 0
mirror of https://github.com/anrieff/libcpuid synced 2025-01-23 20:06:41 +00:00

Fix CPU speed detection for Bulldozer and related cores.

It seems that our SSE-based speed test is 1 IPC (instructions per clock)
on all current CPUs, and 1.4 IPC on the Bulldozer, which leads to its
result being 40% too high. Correct that in the function.
This commit is contained in:
Veselin Georgiev 2014-07-23 21:29:34 +03:00
parent f883e2b592
commit f5ab18506f
2 changed files with 23 additions and 4 deletions

View file

@ -678,6 +678,15 @@ int cpu_clock_measure(int millis, int quad_check);
*
* Recommended values - millis = 50, runs = 4. For more robustness,
* increase the number of runs.
*
* NOTE: on Bulldozer and later CPUs, the busy-wait cycle runs at 1.4 IPC, thus
* the results are skewed. This is corrected internally by dividing the resulting
* value by 1.4.
* However, this only occurs if the thread is executed on a single CMT
* module - if there are other threads competing for resources, the results are
* unpredictable. Make sure you run cpu_clock_by_ic() on a CPU that is free from
* competing threads, or if there are such threads, they shouldn't exceed the
* number of modules. On a Bulldozer X8, that means 4 threads.
*
* @returns the CPU clock frequency in MHz (within some measurement error
* margin). If SSE is not supported, the result is -1. If the input parameters

View file

@ -231,18 +231,28 @@ int cpu_clock_by_ic(int millis, int runs)
int max_value = 0, cur_value, i, ri, cycles_inner, cycles_outer, c;
struct cpu_id_t* id;
uint64_t t0, t1, tl, hz;
int sse_multiplier = 1;
int multiplier_numerator = 1, multiplier_denom = 1;
if (millis <= 0 || runs <= 0) return -2;
id = get_cached_cpuid();
// if there aren't SSE instructions - we can't run the test at all
if (!id || !id->flags[CPU_FEATURE_SSE]) return -1;
//
if (id->sse_size < 128) {
debugf(1, "SSE execution path is 64-bit\n");
sse_multiplier = 2;
// on a CPU with half SSE unit length, SSE instructions execute at 0.5 IPC;
// the resulting value must be multiplied by 2:
multiplier_numerator = 2;
} else {
debugf(1, "SSE execution path is 128-bit\n");
}
//
// on a Bulldozer or later CPU, SSE instructions execute at 1.4 IPC, handle that as well:
if (id->vendor == VENDOR_AMD && id->ext_family >= 21) {
debugf(1, "cpu_clock_by_ic: Bulldozer (or later) detected, dividing result by 1.4\n");
multiplier_numerator = 5;
multiplier_denom = 7; // multiply by 5/7, to divide by 1.4
}
//
tl = millis * 125; // (*1000 / 8)
cycles_inner = 128;
cycles_outer = 1;
@ -267,8 +277,8 @@ int cpu_clock_by_ic(int millis, int runs)
// cpu_Hz = cycles_inner * cycles_outer * 256 / (t1 - t0) * 1000000
debugf(2, "c = %d, td = %llu\n", c, t1 - t0);
hz = ((uint64_t) cycles_inner * (uint64_t) 256 + 12) *
(uint64_t) cycles_outer * (uint64_t) sse_multiplier * (uint64_t) c * (uint64_t) 1000000
/ (t1 - t0);
(uint64_t) cycles_outer * (uint64_t) multiplier_numerator * (uint64_t) c * (uint64_t) 1000000
/ ((t1 - t0) * (uint64_t) multiplier_denom);
cur_value = (int) (hz / 1000000);
if (cur_value > max_value) max_value = cur_value;
}