mirror of
https://github.com/anrieff/libcpuid
synced 2024-11-10 22:59:13 +00:00
Added SSE unit size detection, based on the AMD extended leaf 1a, bit 0. Added a field in cpu_id_t to specify SSE unit size. Also added a hints array, similar to the flags array, which will hold various detection-specific hints. The only currently present hint is about the way the SSE unit size is inferred - whether it is based on the old CPU family/model guesswork (which fails for the AMD Brazos-based cores). Also, added the features XOP, FMA4, TBM and F16C. Changed the library version due to breaking binary compatibility.
git-svn-id: https://svn.code.sf.net/p/libcpuid/code/HEAD/libcpuid@96 3b4be424-7ac5-41d7-8526-f4ddcb85d872
This commit is contained in:
parent
2f949b18d9
commit
3623c5639d
7 changed files with 78 additions and 22 deletions
|
@ -1,4 +1,4 @@
|
||||||
AC_INIT([libcpuid CPU Identification library], [0.1.4], [libcpuid-devel@lists.sourceforge.net], [libcpuid])
|
AC_INIT([libcpuid CPU Identification library], [0.2.0], [libcpuid-devel@lists.sourceforge.net], [libcpuid])
|
||||||
AC_CONFIG_SRCDIR([libcpuid/libcpuid.h])
|
AC_CONFIG_SRCDIR([libcpuid/libcpuid.h])
|
||||||
AC_CONFIG_HEADERS([config.h])
|
AC_CONFIG_HEADERS([config.h])
|
||||||
AM_INIT_AUTOMAKE([-Wall dist-bzip2 dist-zip foreign])
|
AM_INIT_AUTOMAKE([-Wall dist-bzip2 dist-zip foreign])
|
||||||
|
@ -15,9 +15,10 @@ dnl 11:0:0 Version 0.1.1: four more fields to cpu_raw_data_t
|
||||||
dnl 11:0:1 Version 0.1.2: added cpu_msr* functions
|
dnl 11:0:1 Version 0.1.2: added cpu_msr* functions
|
||||||
dnl 11:0:2 Version 0.1.3: added cpu_clock_by_ic() function
|
dnl 11:0:2 Version 0.1.3: added cpu_clock_by_ic() function
|
||||||
dnl 11:0:3 Version 0.1.4: just an identification change, to reflect the detection table upgrades
|
dnl 11:0:3 Version 0.1.4: just an identification change, to reflect the detection table upgrades
|
||||||
LIBCPUID_CURRENT=11
|
dnl 12:1:0 Version 0.2.0: two more fields to cpu_id_t
|
||||||
LIBCPUID_AGE=0
|
LIBCPUID_CURRENT=12
|
||||||
LIBCPUID_REVISION=3
|
LIBCPUID_AGE=1
|
||||||
|
LIBCPUID_REVISION=0
|
||||||
AC_SUBST([LIBCPUID_AGE])
|
AC_SUBST([LIBCPUID_AGE])
|
||||||
AC_SUBST([LIBCPUID_REVISION])
|
AC_SUBST([LIBCPUID_REVISION])
|
||||||
AC_SUBST([LIBCPUID_CURRENT])
|
AC_SUBST([LIBCPUID_CURRENT])
|
||||||
|
|
|
@ -85,6 +85,7 @@ typedef enum {
|
||||||
NEED_CLOCK_RDTSC,
|
NEED_CLOCK_RDTSC,
|
||||||
NEED_CLOCK_IC,
|
NEED_CLOCK_IC,
|
||||||
NEED_RDMSR,
|
NEED_RDMSR,
|
||||||
|
NEED_SSE_UNIT_SIZE,
|
||||||
} output_data_switch;
|
} output_data_switch;
|
||||||
|
|
||||||
int need_input = 0,
|
int need_input = 0,
|
||||||
|
@ -136,6 +137,7 @@ matchtable[] = {
|
||||||
{ NEED_CLOCK_RDTSC , "--clock-rdtsc" , 1},
|
{ NEED_CLOCK_RDTSC , "--clock-rdtsc" , 1},
|
||||||
{ NEED_CLOCK_IC , "--clock-ic" , 1},
|
{ NEED_CLOCK_IC , "--clock-ic" , 1},
|
||||||
{ NEED_RDMSR , "--rdmsr" , 0},
|
{ NEED_RDMSR , "--rdmsr" , 0},
|
||||||
|
{ NEED_SSE_UNIT_SIZE, "--sse_size" , 1},
|
||||||
};
|
};
|
||||||
|
|
||||||
const int sz_match = (sizeof(matchtable) / sizeof(matchtable[0]));
|
const int sz_match = (sizeof(matchtable) / sizeof(matchtable[0]));
|
||||||
|
@ -422,6 +424,12 @@ static void print_info(output_data_switch query, struct cpu_raw_data_t* raw,
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case NEED_SSE_UNIT_SIZE:
|
||||||
|
{
|
||||||
|
fprintf(fout, "%d (%s)\n", data->sse_size,
|
||||||
|
data->detection_hints[CPU_HINT_SSE_SIZE_AUTH] ? "authoritative" : "non-authoritative");
|
||||||
|
break;
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
fprintf(fout, "How did you get here?!?\n");
|
fprintf(fout, "How did you get here?!?\n");
|
||||||
break;
|
break;
|
||||||
|
@ -580,6 +588,7 @@ int main(int argc, char** argv)
|
||||||
fprintf(fout, " L1D line sz: %d bytes\n", data.l1_cacheline);
|
fprintf(fout, " L1D line sz: %d bytes\n", data.l1_cacheline);
|
||||||
fprintf(fout, " L2 line sz : %d bytes\n", data.l2_cacheline);
|
fprintf(fout, " L2 line sz : %d bytes\n", data.l2_cacheline);
|
||||||
fprintf(fout, " L3 line sz : %d bytes\n", data.l3_cacheline);
|
fprintf(fout, " L3 line sz : %d bytes\n", data.l3_cacheline);
|
||||||
|
fprintf(fout, " SSE units : %d bits (%s)\n", data.sse_size, data.detection_hints[CPU_HINT_SSE_SIZE_AUTH] ? "authoritative" : "non-authoritative");
|
||||||
fprintf(fout, " code name : `%s'\n", data.cpu_codename);
|
fprintf(fout, " code name : `%s'\n", data.cpu_codename);
|
||||||
fprintf(fout, " features :");
|
fprintf(fout, " features :");
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -56,6 +56,7 @@ static void cpu_id_t_constructor(struct cpu_id_t* id)
|
||||||
id->l1_data_cache = id->l1_instruction_cache = id->l2_cache = id->l3_cache = -1;
|
id->l1_data_cache = id->l1_instruction_cache = id->l2_cache = id->l3_cache = -1;
|
||||||
id->l1_assoc = id->l2_assoc = id->l3_assoc = -1;
|
id->l1_assoc = id->l2_assoc = id->l3_assoc = -1;
|
||||||
id->l1_cacheline = id->l2_cacheline = id->l3_cacheline = -1;
|
id->l1_cacheline = id->l2_cacheline = id->l3_cacheline = -1;
|
||||||
|
id->sse_size = -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int parse_token(const char* expected_token, const char *token,
|
static int parse_token(const char* expected_token, const char *token,
|
||||||
|
@ -172,6 +173,7 @@ static void load_features_common(struct cpu_raw_data_t* raw, struct cpu_id_t* da
|
||||||
{ 13, CPU_FEATURE_CX16 },
|
{ 13, CPU_FEATURE_CX16 },
|
||||||
{ 19, CPU_FEATURE_SSE4_1 },
|
{ 19, CPU_FEATURE_SSE4_1 },
|
||||||
{ 23, CPU_FEATURE_POPCNT },
|
{ 23, CPU_FEATURE_POPCNT },
|
||||||
|
{ 29, CPU_FEATURE_F16C },
|
||||||
};
|
};
|
||||||
const struct feature_map_t matchtable_edx81[] = {
|
const struct feature_map_t matchtable_edx81[] = {
|
||||||
{ 11, CPU_FEATURE_SYSCALL },
|
{ 11, CPU_FEATURE_SYSCALL },
|
||||||
|
@ -188,6 +190,21 @@ static void load_features_common(struct cpu_raw_data_t* raw, struct cpu_id_t* da
|
||||||
match_features(matchtable_edx81, COUNT_OF(matchtable_edx81), raw->ext_cpuid[1][3], data);
|
match_features(matchtable_edx81, COUNT_OF(matchtable_edx81), raw->ext_cpuid[1][3], data);
|
||||||
match_features(matchtable_ecx81, COUNT_OF(matchtable_ecx81), raw->ext_cpuid[1][2], data);
|
match_features(matchtable_ecx81, COUNT_OF(matchtable_ecx81), raw->ext_cpuid[1][2], data);
|
||||||
}
|
}
|
||||||
|
if (data->flags[CPU_FEATURE_SSE]) {
|
||||||
|
/* apply guesswork to check if the SSE unit width is 128 bit */
|
||||||
|
switch (data->vendor) {
|
||||||
|
case VENDOR_AMD:
|
||||||
|
data->sse_size = (data->ext_family >= 16 && data->ext_family != 23) ? 128 : 64;
|
||||||
|
break;
|
||||||
|
case VENDOR_INTEL:
|
||||||
|
data->sse_size = (data->family == 6 && data->ext_model >= 15) ? 128 : 64;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
/* leave the CPU_FEATURE_128BIT_SSE_AUTH 0; the advanced per-vendor detection routines
|
||||||
|
* will set it accordingly if they detect the needed bit */
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static int cpuid_basic_identify(struct cpu_raw_data_t* raw, struct cpu_id_t* data)
|
static int cpuid_basic_identify(struct cpu_raw_data_t* raw, struct cpu_id_t* data)
|
||||||
|
@ -522,6 +539,12 @@ const char* cpu_feature_str(cpu_feature_t feature)
|
||||||
{ CPU_FEATURE_100MHZSTEPS, "100mhzsteps" },
|
{ CPU_FEATURE_100MHZSTEPS, "100mhzsteps" },
|
||||||
{ CPU_FEATURE_HWPSTATE, "hwpstate" },
|
{ CPU_FEATURE_HWPSTATE, "hwpstate" },
|
||||||
{ CPU_FEATURE_CONSTANT_TSC, "constant_tsc" },
|
{ CPU_FEATURE_CONSTANT_TSC, "constant_tsc" },
|
||||||
|
{ CPU_FEATURE_XOP, "xop" },
|
||||||
|
{ CPU_FEATURE_FMA3, "fma3" },
|
||||||
|
{ CPU_FEATURE_FMA4, "fma4" },
|
||||||
|
{ CPU_FEATURE_TBM, "tbm" },
|
||||||
|
{ CPU_FEATURE_F16C, "f16c" },
|
||||||
|
|
||||||
};
|
};
|
||||||
unsigned i, n = COUNT_OF(matchtable);
|
unsigned i, n = COUNT_OF(matchtable);
|
||||||
if (n != NUM_CPU_FEATURES) {
|
if (n != NUM_CPU_FEATURES) {
|
||||||
|
|
|
@ -29,7 +29,7 @@
|
||||||
* @File libcpuid.h
|
* @File libcpuid.h
|
||||||
* @Author Veselin Georgiev
|
* @Author Veselin Georgiev
|
||||||
* @Date Oct 2008
|
* @Date Oct 2008
|
||||||
* @Version 0.1.3
|
* @Version 0.2.0
|
||||||
*
|
*
|
||||||
* Version history:
|
* Version history:
|
||||||
*
|
*
|
||||||
|
@ -38,8 +38,11 @@
|
||||||
* new processor topology enumeration required on Core i7
|
* new processor topology enumeration required on Core i7
|
||||||
* 0.1.2 (2009-09-26): Added support for MSR reading through self-extracting
|
* 0.1.2 (2009-09-26): Added support for MSR reading through self-extracting
|
||||||
* kernel driver on Win32.
|
* kernel driver on Win32.
|
||||||
* 0.1.3 (2010-04-20): Added support for greater more accurate CPU clock measurements
|
* 0.1.3 (2010-04-20): Added support for greater more accurate CPU clock
|
||||||
* with cpu_clock_by_ic()
|
* measurements with cpu_clock_by_ic()
|
||||||
|
* 0.2.0 (2011-10-11): Support for AMD Bulldozer CPUs, 128-bit SSE unit size
|
||||||
|
* checking. A backwards-incompatible change, since the
|
||||||
|
* sizeof cpu_id_t is now different.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/** @mainpage A simple libcpuid introduction
|
/** @mainpage A simple libcpuid introduction
|
||||||
|
@ -223,6 +226,15 @@ struct cpu_id_t {
|
||||||
* @endcode
|
* @endcode
|
||||||
*/
|
*/
|
||||||
char cpu_codename[64];
|
char cpu_codename[64];
|
||||||
|
|
||||||
|
/** SSE execution unit size (64 or 128; -1 if N/A) */
|
||||||
|
int32_t sse_size;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* contain miscellaneous detection information. Used to test about specifics of
|
||||||
|
* certain detected features. See CPU_HINT_* macros below. @see Hints
|
||||||
|
*/
|
||||||
|
uint8_t detection_hints[CPU_HINTS_MAX];
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -317,7 +329,7 @@ typedef enum {
|
||||||
CPU_FEATURE_3DNOWPREFETCH, /*!< PREFETCH/PREFETCHW support */
|
CPU_FEATURE_3DNOWPREFETCH, /*!< PREFETCH/PREFETCHW support */
|
||||||
CPU_FEATURE_OSVW, /*!< OS Visible Workaround (AMD) */
|
CPU_FEATURE_OSVW, /*!< OS Visible Workaround (AMD) */
|
||||||
CPU_FEATURE_IBS, /*!< Instruction-based sampling */
|
CPU_FEATURE_IBS, /*!< Instruction-based sampling */
|
||||||
CPU_FEATURE_SSE5, /*!< SSE 5 instructions supported */
|
CPU_FEATURE_SSE5, /*!< SSE 5 instructions supported (deprecated, will never be 1) */
|
||||||
CPU_FEATURE_SKINIT, /*!< SKINIT / STGI supported */
|
CPU_FEATURE_SKINIT, /*!< SKINIT / STGI supported */
|
||||||
CPU_FEATURE_WDT, /*!< Watchdog timer support */
|
CPU_FEATURE_WDT, /*!< Watchdog timer support */
|
||||||
CPU_FEATURE_TS, /*!< Temperature sensor */
|
CPU_FEATURE_TS, /*!< Temperature sensor */
|
||||||
|
@ -329,10 +341,26 @@ typedef enum {
|
||||||
CPU_FEATURE_100MHZSTEPS,/*!< 100 MHz multiplier control */
|
CPU_FEATURE_100MHZSTEPS,/*!< 100 MHz multiplier control */
|
||||||
CPU_FEATURE_HWPSTATE, /*!< Hardware P-state control */
|
CPU_FEATURE_HWPSTATE, /*!< Hardware P-state control */
|
||||||
CPU_FEATURE_CONSTANT_TSC, /*!< TSC ticks at constant rate */
|
CPU_FEATURE_CONSTANT_TSC, /*!< TSC ticks at constant rate */
|
||||||
|
CPU_FEATURE_XOP, /*!< The XOP instruction set (same as the old CPU_FEATURE_SSE5) */
|
||||||
|
CPU_FEATURE_FMA3, /*!< The FMA3 instruction set */
|
||||||
|
CPU_FEATURE_FMA4, /*!< The FMA4 instruction set */
|
||||||
|
CPU_FEATURE_TBM, /*!< Trailing bit manipulation instruction support */
|
||||||
|
CPU_FEATURE_F16C, /*!< 16-bit FP convert instruction support */
|
||||||
/* termination: */
|
/* termination: */
|
||||||
NUM_CPU_FEATURES,
|
NUM_CPU_FEATURES,
|
||||||
} cpu_feature_t;
|
} cpu_feature_t;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @brief CPU detection hints identifiers
|
||||||
|
*
|
||||||
|
* Usage: similar to the flags usage
|
||||||
|
*/
|
||||||
|
typedef enum {
|
||||||
|
CPU_HINT_SSE_SIZE_AUTH = 0, /*!< SSE unit size is authoritative (not only a Family/Model guesswork, but based on an actual CPUID bit) */
|
||||||
|
/* termination */
|
||||||
|
NUM_CPU_HINTS,
|
||||||
|
} cpu_hint_t;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @brief Describes common library error codes
|
* @brief Describes common library error codes
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -39,5 +39,6 @@
|
||||||
#define MAX_EXT_CPUID_LEVEL 32
|
#define MAX_EXT_CPUID_LEVEL 32
|
||||||
#define MAX_INTELFN4_LEVEL 4
|
#define MAX_INTELFN4_LEVEL 4
|
||||||
#define MAX_INTELFN11_LEVEL 4
|
#define MAX_INTELFN11_LEVEL 4
|
||||||
|
#define CPU_HINTS_MAX 16
|
||||||
|
|
||||||
#endif /* __LIBCPUID_CONSTANTS_H__ */
|
#endif /* __LIBCPUID_CONSTANTS_H__ */
|
||||||
|
|
|
@ -226,18 +226,6 @@ int cpu_clock_measure(int millis, int quad_check)
|
||||||
return (results[bi] + results[bj] + _zero) / 2;
|
return (results[bi] + results[bj] + _zero) / 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int sse_is_128_bit(struct cpu_id_t* id)
|
|
||||||
{
|
|
||||||
switch (id->vendor) {
|
|
||||||
case VENDOR_AMD:
|
|
||||||
return (id->ext_family >= 16 && id->ext_family != 23);
|
|
||||||
case VENDOR_INTEL:
|
|
||||||
return (id->family == 6 && id->ext_model >= 15);
|
|
||||||
default:
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
int cpu_clock_by_ic(int millis, int runs)
|
int cpu_clock_by_ic(int millis, int runs)
|
||||||
{
|
{
|
||||||
int max_value = 0, cur_value, i, ri, cycles_inner, cycles_outer, c;
|
int max_value = 0, cur_value, i, ri, cycles_inner, cycles_outer, c;
|
||||||
|
@ -248,7 +236,7 @@ int cpu_clock_by_ic(int millis, int runs)
|
||||||
id = get_cached_cpuid();
|
id = get_cached_cpuid();
|
||||||
if (!id || !id->flags[CPU_FEATURE_SSE]) return -1;
|
if (!id || !id->flags[CPU_FEATURE_SSE]) return -1;
|
||||||
//
|
//
|
||||||
if (!sse_is_128_bit(id)) {
|
if (id->sse_size < 128) {
|
||||||
debugf(1, "SSE execution path is 64-bit\n");
|
debugf(1, "SSE execution path is 64-bit\n");
|
||||||
sse_multiplier = 2;
|
sse_multiplier = 2;
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -259,9 +259,10 @@ static void load_amd_features(struct cpu_raw_data_t* raw, struct cpu_id_t* data)
|
||||||
{ 8, CPU_FEATURE_3DNOWPREFETCH },
|
{ 8, CPU_FEATURE_3DNOWPREFETCH },
|
||||||
{ 9, CPU_FEATURE_OSVW },
|
{ 9, CPU_FEATURE_OSVW },
|
||||||
{ 10, CPU_FEATURE_IBS },
|
{ 10, CPU_FEATURE_IBS },
|
||||||
{ 11, CPU_FEATURE_SSE5 },
|
{ 11, CPU_FEATURE_XOP },
|
||||||
{ 12, CPU_FEATURE_SKINIT },
|
{ 12, CPU_FEATURE_SKINIT },
|
||||||
{ 13, CPU_FEATURE_WDT },
|
{ 13, CPU_FEATURE_WDT },
|
||||||
|
{ 16, CPU_FEATURE_FMA4 },
|
||||||
};
|
};
|
||||||
const struct feature_map_t matchtable_edx87[] = {
|
const struct feature_map_t matchtable_edx87[] = {
|
||||||
{ 0, CPU_FEATURE_TS },
|
{ 0, CPU_FEATURE_TS },
|
||||||
|
@ -280,6 +281,11 @@ static void load_amd_features(struct cpu_raw_data_t* raw, struct cpu_id_t* data)
|
||||||
}
|
}
|
||||||
if (raw->ext_cpuid[0][0] >= 7)
|
if (raw->ext_cpuid[0][0] >= 7)
|
||||||
match_features(matchtable_edx87, COUNT_OF(matchtable_edx87), raw->ext_cpuid[7][3], data);
|
match_features(matchtable_edx87, COUNT_OF(matchtable_edx87), raw->ext_cpuid[7][3], data);
|
||||||
|
if (raw->ext_cpuid[0][0] >= 0x1a) {
|
||||||
|
/* We have the extended info about SSE unit size */
|
||||||
|
data->detection_hints[CPU_HINT_SSE_SIZE_AUTH] = 1;
|
||||||
|
data->sse_size = (raw->ext_cpuid[0x1a][0] & 1) ? 128 : 64;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void decode_amd_cache_info(struct cpu_raw_data_t* raw, struct cpu_id_t* data)
|
static void decode_amd_cache_info(struct cpu_raw_data_t* raw, struct cpu_id_t* data)
|
||||||
|
|
Loading…
Reference in a new issue