1
0
Fork 0
mirror of https://github.com/anrieff/libcpuid synced 2025-07-02 14:04:15 +00:00

Fix CPU speed detection for Bulldozer and related cores.

It seems that our SSE-based speed test is 1 IPC (instructions per clock)
on all current CPUs, and 1.4 IPC on the Bulldozer, which leads to its
result being 40% too high. Correct that in the function.
This commit is contained in:
Veselin Georgiev 2014-07-23 21:29:34 +03:00
parent f883e2b592
commit f5ab18506f
2 changed files with 23 additions and 4 deletions

View file

@ -678,6 +678,15 @@ int cpu_clock_measure(int millis, int quad_check);
* *
* Recommended values - millis = 50, runs = 4. For more robustness, * Recommended values - millis = 50, runs = 4. For more robustness,
* increase the number of runs. * increase the number of runs.
*
* NOTE: on Bulldozer and later CPUs, the busy-wait cycle runs at 1.4 IPC, thus
* the results are skewed. This is corrected internally by dividing the resulting
* value by 1.4.
* However, this only occurs if the thread is executed on a single CMT
* module - if there are other threads competing for resources, the results are
* unpredictable. Make sure you run cpu_clock_by_ic() on a CPU that is free from
* competing threads, or if there are such threads, they shouldn't exceed the
* number of modules. On a Bulldozer X8, that means 4 threads.
* *
* @returns the CPU clock frequency in MHz (within some measurement error * @returns the CPU clock frequency in MHz (within some measurement error
* margin). If SSE is not supported, the result is -1. If the input parameters * margin). If SSE is not supported, the result is -1. If the input parameters

View file

@ -231,18 +231,28 @@ int cpu_clock_by_ic(int millis, int runs)
int max_value = 0, cur_value, i, ri, cycles_inner, cycles_outer, c; int max_value = 0, cur_value, i, ri, cycles_inner, cycles_outer, c;
struct cpu_id_t* id; struct cpu_id_t* id;
uint64_t t0, t1, tl, hz; uint64_t t0, t1, tl, hz;
int sse_multiplier = 1; int multiplier_numerator = 1, multiplier_denom = 1;
if (millis <= 0 || runs <= 0) return -2; if (millis <= 0 || runs <= 0) return -2;
id = get_cached_cpuid(); id = get_cached_cpuid();
// if there aren't SSE instructions - we can't run the test at all
if (!id || !id->flags[CPU_FEATURE_SSE]) return -1; if (!id || !id->flags[CPU_FEATURE_SSE]) return -1;
// //
if (id->sse_size < 128) { if (id->sse_size < 128) {
debugf(1, "SSE execution path is 64-bit\n"); debugf(1, "SSE execution path is 64-bit\n");
sse_multiplier = 2; // on a CPU with half SSE unit length, SSE instructions execute at 0.5 IPC;
// the resulting value must be multiplied by 2:
multiplier_numerator = 2;
} else { } else {
debugf(1, "SSE execution path is 128-bit\n"); debugf(1, "SSE execution path is 128-bit\n");
} }
// //
// on a Bulldozer or later CPU, SSE instructions execute at 1.4 IPC, handle that as well:
if (id->vendor == VENDOR_AMD && id->ext_family >= 21) {
debugf(1, "cpu_clock_by_ic: Bulldozer (or later) detected, dividing result by 1.4\n");
multiplier_numerator = 5;
multiplier_denom = 7; // multiply by 5/7, to divide by 1.4
}
//
tl = millis * 125; // (*1000 / 8) tl = millis * 125; // (*1000 / 8)
cycles_inner = 128; cycles_inner = 128;
cycles_outer = 1; cycles_outer = 1;
@ -267,8 +277,8 @@ int cpu_clock_by_ic(int millis, int runs)
// cpu_Hz = cycles_inner * cycles_outer * 256 / (t1 - t0) * 1000000 // cpu_Hz = cycles_inner * cycles_outer * 256 / (t1 - t0) * 1000000
debugf(2, "c = %d, td = %llu\n", c, t1 - t0); debugf(2, "c = %d, td = %llu\n", c, t1 - t0);
hz = ((uint64_t) cycles_inner * (uint64_t) 256 + 12) * hz = ((uint64_t) cycles_inner * (uint64_t) 256 + 12) *
(uint64_t) cycles_outer * (uint64_t) sse_multiplier * (uint64_t) c * (uint64_t) 1000000 (uint64_t) cycles_outer * (uint64_t) multiplier_numerator * (uint64_t) c * (uint64_t) 1000000
/ (t1 - t0); / ((t1 - t0) * (uint64_t) multiplier_denom);
cur_value = (int) (hz / 1000000); cur_value = (int) (hz / 1000000);
if (cur_value > max_value) max_value = cur_value; if (cur_value > max_value) max_value = cur_value;
} }