1
0
Fork 0
mirror of https://github.com/anrieff/libcpuid synced 2024-11-10 22:59:13 +00:00

Added SSE unit size detection, based on the AMD extended leaf 1a, bit 0. Added a field in cpu_id_t to specify SSE unit size. Also added a hints array, similar to the flags array, which will hold various detection-specific hints. The only currently present hint is about the way the SSE unit size is inferred - whether it is based on the old CPU family/model guesswork (which fails for the AMD Brazos-based cores). Also, added the features XOP, FMA4, TBM and F16C. Changed the library version due to breaking binary compatibility.

git-svn-id: https://svn.code.sf.net/p/libcpuid/code/HEAD/libcpuid@96 3b4be424-7ac5-41d7-8526-f4ddcb85d872
This commit is contained in:
Veselin Georgiev 2011-10-11 16:38:41 +00:00
parent 2f949b18d9
commit 3623c5639d
7 changed files with 78 additions and 22 deletions

View file

@ -1,4 +1,4 @@
AC_INIT([libcpuid CPU Identification library], [0.1.4], [libcpuid-devel@lists.sourceforge.net], [libcpuid]) AC_INIT([libcpuid CPU Identification library], [0.2.0], [libcpuid-devel@lists.sourceforge.net], [libcpuid])
AC_CONFIG_SRCDIR([libcpuid/libcpuid.h]) AC_CONFIG_SRCDIR([libcpuid/libcpuid.h])
AC_CONFIG_HEADERS([config.h]) AC_CONFIG_HEADERS([config.h])
AM_INIT_AUTOMAKE([-Wall dist-bzip2 dist-zip foreign]) AM_INIT_AUTOMAKE([-Wall dist-bzip2 dist-zip foreign])
@ -15,9 +15,10 @@ dnl 11:0:0 Version 0.1.1: four more fields to cpu_raw_data_t
dnl 11:0:1 Version 0.1.2: added cpu_msr* functions dnl 11:0:1 Version 0.1.2: added cpu_msr* functions
dnl 11:0:2 Version 0.1.3: added cpu_clock_by_ic() function dnl 11:0:2 Version 0.1.3: added cpu_clock_by_ic() function
dnl 11:0:3 Version 0.1.4: just an identification change, to reflect the detection table upgrades dnl 11:0:3 Version 0.1.4: just an identification change, to reflect the detection table upgrades
LIBCPUID_CURRENT=11 dnl 12:1:0 Version 0.2.0: two more fields to cpu_id_t
LIBCPUID_AGE=0 LIBCPUID_CURRENT=12
LIBCPUID_REVISION=3 LIBCPUID_AGE=1
LIBCPUID_REVISION=0
AC_SUBST([LIBCPUID_AGE]) AC_SUBST([LIBCPUID_AGE])
AC_SUBST([LIBCPUID_REVISION]) AC_SUBST([LIBCPUID_REVISION])
AC_SUBST([LIBCPUID_CURRENT]) AC_SUBST([LIBCPUID_CURRENT])

View file

@ -85,6 +85,7 @@ typedef enum {
NEED_CLOCK_RDTSC, NEED_CLOCK_RDTSC,
NEED_CLOCK_IC, NEED_CLOCK_IC,
NEED_RDMSR, NEED_RDMSR,
NEED_SSE_UNIT_SIZE,
} output_data_switch; } output_data_switch;
int need_input = 0, int need_input = 0,
@ -136,6 +137,7 @@ matchtable[] = {
{ NEED_CLOCK_RDTSC , "--clock-rdtsc" , 1}, { NEED_CLOCK_RDTSC , "--clock-rdtsc" , 1},
{ NEED_CLOCK_IC , "--clock-ic" , 1}, { NEED_CLOCK_IC , "--clock-ic" , 1},
{ NEED_RDMSR , "--rdmsr" , 0}, { NEED_RDMSR , "--rdmsr" , 0},
{ NEED_SSE_UNIT_SIZE, "--sse_size" , 1},
}; };
const int sz_match = (sizeof(matchtable) / sizeof(matchtable[0])); const int sz_match = (sizeof(matchtable) / sizeof(matchtable[0]));
@ -422,6 +424,12 @@ static void print_info(output_data_switch query, struct cpu_raw_data_t* raw,
} }
break; break;
} }
case NEED_SSE_UNIT_SIZE:
{
fprintf(fout, "%d (%s)\n", data->sse_size,
data->detection_hints[CPU_HINT_SSE_SIZE_AUTH] ? "authoritative" : "non-authoritative");
break;
}
default: default:
fprintf(fout, "How did you get here?!?\n"); fprintf(fout, "How did you get here?!?\n");
break; break;
@ -580,6 +588,7 @@ int main(int argc, char** argv)
fprintf(fout, " L1D line sz: %d bytes\n", data.l1_cacheline); fprintf(fout, " L1D line sz: %d bytes\n", data.l1_cacheline);
fprintf(fout, " L2 line sz : %d bytes\n", data.l2_cacheline); fprintf(fout, " L2 line sz : %d bytes\n", data.l2_cacheline);
fprintf(fout, " L3 line sz : %d bytes\n", data.l3_cacheline); fprintf(fout, " L3 line sz : %d bytes\n", data.l3_cacheline);
fprintf(fout, " SSE units : %d bits (%s)\n", data.sse_size, data.detection_hints[CPU_HINT_SSE_SIZE_AUTH] ? "authoritative" : "non-authoritative");
fprintf(fout, " code name : `%s'\n", data.cpu_codename); fprintf(fout, " code name : `%s'\n", data.cpu_codename);
fprintf(fout, " features :"); fprintf(fout, " features :");
/* /*

View file

@ -56,6 +56,7 @@ static void cpu_id_t_constructor(struct cpu_id_t* id)
id->l1_data_cache = id->l1_instruction_cache = id->l2_cache = id->l3_cache = -1; id->l1_data_cache = id->l1_instruction_cache = id->l2_cache = id->l3_cache = -1;
id->l1_assoc = id->l2_assoc = id->l3_assoc = -1; id->l1_assoc = id->l2_assoc = id->l3_assoc = -1;
id->l1_cacheline = id->l2_cacheline = id->l3_cacheline = -1; id->l1_cacheline = id->l2_cacheline = id->l3_cacheline = -1;
id->sse_size = -1;
} }
static int parse_token(const char* expected_token, const char *token, static int parse_token(const char* expected_token, const char *token,
@ -172,6 +173,7 @@ static void load_features_common(struct cpu_raw_data_t* raw, struct cpu_id_t* da
{ 13, CPU_FEATURE_CX16 }, { 13, CPU_FEATURE_CX16 },
{ 19, CPU_FEATURE_SSE4_1 }, { 19, CPU_FEATURE_SSE4_1 },
{ 23, CPU_FEATURE_POPCNT }, { 23, CPU_FEATURE_POPCNT },
{ 29, CPU_FEATURE_F16C },
}; };
const struct feature_map_t matchtable_edx81[] = { const struct feature_map_t matchtable_edx81[] = {
{ 11, CPU_FEATURE_SYSCALL }, { 11, CPU_FEATURE_SYSCALL },
@ -188,6 +190,21 @@ static void load_features_common(struct cpu_raw_data_t* raw, struct cpu_id_t* da
match_features(matchtable_edx81, COUNT_OF(matchtable_edx81), raw->ext_cpuid[1][3], data); match_features(matchtable_edx81, COUNT_OF(matchtable_edx81), raw->ext_cpuid[1][3], data);
match_features(matchtable_ecx81, COUNT_OF(matchtable_ecx81), raw->ext_cpuid[1][2], data); match_features(matchtable_ecx81, COUNT_OF(matchtable_ecx81), raw->ext_cpuid[1][2], data);
} }
if (data->flags[CPU_FEATURE_SSE]) {
/* apply guesswork to check if the SSE unit width is 128 bit */
switch (data->vendor) {
case VENDOR_AMD:
data->sse_size = (data->ext_family >= 16 && data->ext_family != 23) ? 128 : 64;
break;
case VENDOR_INTEL:
data->sse_size = (data->family == 6 && data->ext_model >= 15) ? 128 : 64;
break;
default:
break;
}
/* leave the CPU_FEATURE_128BIT_SSE_AUTH 0; the advanced per-vendor detection routines
* will set it accordingly if they detect the needed bit */
}
} }
static int cpuid_basic_identify(struct cpu_raw_data_t* raw, struct cpu_id_t* data) static int cpuid_basic_identify(struct cpu_raw_data_t* raw, struct cpu_id_t* data)
@ -522,6 +539,12 @@ const char* cpu_feature_str(cpu_feature_t feature)
{ CPU_FEATURE_100MHZSTEPS, "100mhzsteps" }, { CPU_FEATURE_100MHZSTEPS, "100mhzsteps" },
{ CPU_FEATURE_HWPSTATE, "hwpstate" }, { CPU_FEATURE_HWPSTATE, "hwpstate" },
{ CPU_FEATURE_CONSTANT_TSC, "constant_tsc" }, { CPU_FEATURE_CONSTANT_TSC, "constant_tsc" },
{ CPU_FEATURE_XOP, "xop" },
{ CPU_FEATURE_FMA3, "fma3" },
{ CPU_FEATURE_FMA4, "fma4" },
{ CPU_FEATURE_TBM, "tbm" },
{ CPU_FEATURE_F16C, "f16c" },
}; };
unsigned i, n = COUNT_OF(matchtable); unsigned i, n = COUNT_OF(matchtable);
if (n != NUM_CPU_FEATURES) { if (n != NUM_CPU_FEATURES) {

View file

@ -29,7 +29,7 @@
* @File libcpuid.h * @File libcpuid.h
* @Author Veselin Georgiev * @Author Veselin Georgiev
* @Date Oct 2008 * @Date Oct 2008
* @Version 0.1.3 * @Version 0.2.0
* *
* Version history: * Version history:
* *
@ -38,8 +38,11 @@
* new processor topology enumeration required on Core i7 * new processor topology enumeration required on Core i7
* 0.1.2 (2009-09-26): Added support for MSR reading through self-extracting * 0.1.2 (2009-09-26): Added support for MSR reading through self-extracting
* kernel driver on Win32. * kernel driver on Win32.
* 0.1.3 (2010-04-20): Added support for greater more accurate CPU clock measurements * 0.1.3 (2010-04-20): Added support for greater more accurate CPU clock
* with cpu_clock_by_ic() * measurements with cpu_clock_by_ic()
* 0.2.0 (2011-10-11): Support for AMD Bulldozer CPUs, 128-bit SSE unit size
* checking. A backwards-incompatible change, since the
* sizeof cpu_id_t is now different.
*/ */
/** @mainpage A simple libcpuid introduction /** @mainpage A simple libcpuid introduction
@ -223,6 +226,15 @@ struct cpu_id_t {
* @endcode * @endcode
*/ */
char cpu_codename[64]; char cpu_codename[64];
/** SSE execution unit size (64 or 128; -1 if N/A) */
int32_t sse_size;
/**
* contain miscellaneous detection information. Used to test about specifics of
* certain detected features. See CPU_HINT_* macros below. @see Hints
*/
uint8_t detection_hints[CPU_HINTS_MAX];
}; };
/** /**
@ -317,7 +329,7 @@ typedef enum {
CPU_FEATURE_3DNOWPREFETCH, /*!< PREFETCH/PREFETCHW support */ CPU_FEATURE_3DNOWPREFETCH, /*!< PREFETCH/PREFETCHW support */
CPU_FEATURE_OSVW, /*!< OS Visible Workaround (AMD) */ CPU_FEATURE_OSVW, /*!< OS Visible Workaround (AMD) */
CPU_FEATURE_IBS, /*!< Instruction-based sampling */ CPU_FEATURE_IBS, /*!< Instruction-based sampling */
CPU_FEATURE_SSE5, /*!< SSE 5 instructions supported */ CPU_FEATURE_SSE5, /*!< SSE 5 instructions supported (deprecated, will never be 1) */
CPU_FEATURE_SKINIT, /*!< SKINIT / STGI supported */ CPU_FEATURE_SKINIT, /*!< SKINIT / STGI supported */
CPU_FEATURE_WDT, /*!< Watchdog timer support */ CPU_FEATURE_WDT, /*!< Watchdog timer support */
CPU_FEATURE_TS, /*!< Temperature sensor */ CPU_FEATURE_TS, /*!< Temperature sensor */
@ -329,10 +341,26 @@ typedef enum {
CPU_FEATURE_100MHZSTEPS,/*!< 100 MHz multiplier control */ CPU_FEATURE_100MHZSTEPS,/*!< 100 MHz multiplier control */
CPU_FEATURE_HWPSTATE, /*!< Hardware P-state control */ CPU_FEATURE_HWPSTATE, /*!< Hardware P-state control */
CPU_FEATURE_CONSTANT_TSC, /*!< TSC ticks at constant rate */ CPU_FEATURE_CONSTANT_TSC, /*!< TSC ticks at constant rate */
CPU_FEATURE_XOP, /*!< The XOP instruction set (same as the old CPU_FEATURE_SSE5) */
CPU_FEATURE_FMA3, /*!< The FMA3 instruction set */
CPU_FEATURE_FMA4, /*!< The FMA4 instruction set */
CPU_FEATURE_TBM, /*!< Trailing bit manipulation instruction support */
CPU_FEATURE_F16C, /*!< 16-bit FP convert instruction support */
/* termination: */ /* termination: */
NUM_CPU_FEATURES, NUM_CPU_FEATURES,
} cpu_feature_t; } cpu_feature_t;
/**
* @brief CPU detection hints identifiers
*
* Usage: similar to the flags usage
*/
typedef enum {
CPU_HINT_SSE_SIZE_AUTH = 0, /*!< SSE unit size is authoritative (not only a Family/Model guesswork, but based on an actual CPUID bit) */
/* termination */
NUM_CPU_HINTS,
} cpu_hint_t;
/** /**
* @brief Describes common library error codes * @brief Describes common library error codes
*/ */

View file

@ -39,5 +39,6 @@
#define MAX_EXT_CPUID_LEVEL 32 #define MAX_EXT_CPUID_LEVEL 32
#define MAX_INTELFN4_LEVEL 4 #define MAX_INTELFN4_LEVEL 4
#define MAX_INTELFN11_LEVEL 4 #define MAX_INTELFN11_LEVEL 4
#define CPU_HINTS_MAX 16
#endif /* __LIBCPUID_CONSTANTS_H__ */ #endif /* __LIBCPUID_CONSTANTS_H__ */

View file

@ -226,18 +226,6 @@ int cpu_clock_measure(int millis, int quad_check)
return (results[bi] + results[bj] + _zero) / 2; return (results[bi] + results[bj] + _zero) / 2;
} }
static int sse_is_128_bit(struct cpu_id_t* id)
{
switch (id->vendor) {
case VENDOR_AMD:
return (id->ext_family >= 16 && id->ext_family != 23);
case VENDOR_INTEL:
return (id->family == 6 && id->ext_model >= 15);
default:
return 0;
}
}
int cpu_clock_by_ic(int millis, int runs) int cpu_clock_by_ic(int millis, int runs)
{ {
int max_value = 0, cur_value, i, ri, cycles_inner, cycles_outer, c; int max_value = 0, cur_value, i, ri, cycles_inner, cycles_outer, c;
@ -248,7 +236,7 @@ int cpu_clock_by_ic(int millis, int runs)
id = get_cached_cpuid(); id = get_cached_cpuid();
if (!id || !id->flags[CPU_FEATURE_SSE]) return -1; if (!id || !id->flags[CPU_FEATURE_SSE]) return -1;
// //
if (!sse_is_128_bit(id)) { if (id->sse_size < 128) {
debugf(1, "SSE execution path is 64-bit\n"); debugf(1, "SSE execution path is 64-bit\n");
sse_multiplier = 2; sse_multiplier = 2;
} else { } else {

View file

@ -259,9 +259,10 @@ static void load_amd_features(struct cpu_raw_data_t* raw, struct cpu_id_t* data)
{ 8, CPU_FEATURE_3DNOWPREFETCH }, { 8, CPU_FEATURE_3DNOWPREFETCH },
{ 9, CPU_FEATURE_OSVW }, { 9, CPU_FEATURE_OSVW },
{ 10, CPU_FEATURE_IBS }, { 10, CPU_FEATURE_IBS },
{ 11, CPU_FEATURE_SSE5 }, { 11, CPU_FEATURE_XOP },
{ 12, CPU_FEATURE_SKINIT }, { 12, CPU_FEATURE_SKINIT },
{ 13, CPU_FEATURE_WDT }, { 13, CPU_FEATURE_WDT },
{ 16, CPU_FEATURE_FMA4 },
}; };
const struct feature_map_t matchtable_edx87[] = { const struct feature_map_t matchtable_edx87[] = {
{ 0, CPU_FEATURE_TS }, { 0, CPU_FEATURE_TS },
@ -280,6 +281,11 @@ static void load_amd_features(struct cpu_raw_data_t* raw, struct cpu_id_t* data)
} }
if (raw->ext_cpuid[0][0] >= 7) if (raw->ext_cpuid[0][0] >= 7)
match_features(matchtable_edx87, COUNT_OF(matchtable_edx87), raw->ext_cpuid[7][3], data); match_features(matchtable_edx87, COUNT_OF(matchtable_edx87), raw->ext_cpuid[7][3], data);
if (raw->ext_cpuid[0][0] >= 0x1a) {
/* We have the extended info about SSE unit size */
data->detection_hints[CPU_HINT_SSE_SIZE_AUTH] = 1;
data->sse_size = (raw->ext_cpuid[0x1a][0] & 1) ? 128 : 64;
}
} }
static void decode_amd_cache_info(struct cpu_raw_data_t* raw, struct cpu_id_t* data) static void decode_amd_cache_info(struct cpu_raw_data_t* raw, struct cpu_id_t* data)