From 3623c5639d9cc046eb5bc27e77d7d4a03779b58b Mon Sep 17 00:00:00 2001 From: Veselin Georgiev Date: Tue, 11 Oct 2011 16:38:41 +0000 Subject: [PATCH] Added SSE unit size detection, based on the AMD extended leaf 1a, bit 0. Added a field in cpu_id_t to specify SSE unit size. Also added a hints array, similar to the flags array, which will hold various detection-specific hints. The only currently present hint is about the way the SSE unit size is inferred - whether it is based on the old CPU family/model guesswork (which fails for the AMD Brazos-based cores). Also, added the features XOP, FMA4, TBM and F16C. Changed the library version due to breaking binary compatibility. git-svn-id: https://svn.code.sf.net/p/libcpuid/code/HEAD/libcpuid@96 3b4be424-7ac5-41d7-8526-f4ddcb85d872 --- configure.ac | 9 +++++---- cpuid_tool/cpuid_tool.c | 9 +++++++++ libcpuid/cpuid_main.c | 23 ++++++++++++++++++++++ libcpuid/libcpuid.h | 36 +++++++++++++++++++++++++++++++---- libcpuid/libcpuid_constants.h | 1 + libcpuid/rdtsc.c | 14 +------------- libcpuid/recog_amd.c | 8 +++++++- 7 files changed, 78 insertions(+), 22 deletions(-) diff --git a/configure.ac b/configure.ac index 9a44f7e..86e8abe 100644 --- a/configure.ac +++ b/configure.ac @@ -1,4 +1,4 @@ -AC_INIT([libcpuid CPU Identification library], [0.1.4], [libcpuid-devel@lists.sourceforge.net], [libcpuid]) +AC_INIT([libcpuid CPU Identification library], [0.2.0], [libcpuid-devel@lists.sourceforge.net], [libcpuid]) AC_CONFIG_SRCDIR([libcpuid/libcpuid.h]) AC_CONFIG_HEADERS([config.h]) AM_INIT_AUTOMAKE([-Wall dist-bzip2 dist-zip foreign]) @@ -15,9 +15,10 @@ dnl 11:0:0 Version 0.1.1: four more fields to cpu_raw_data_t dnl 11:0:1 Version 0.1.2: added cpu_msr* functions dnl 11:0:2 Version 0.1.3: added cpu_clock_by_ic() function dnl 11:0:3 Version 0.1.4: just an identification change, to reflect the detection table upgrades -LIBCPUID_CURRENT=11 -LIBCPUID_AGE=0 -LIBCPUID_REVISION=3 +dnl 12:1:0 Version 0.2.0: two more fields to cpu_id_t +LIBCPUID_CURRENT=12 +LIBCPUID_AGE=1 +LIBCPUID_REVISION=0 AC_SUBST([LIBCPUID_AGE]) AC_SUBST([LIBCPUID_REVISION]) AC_SUBST([LIBCPUID_CURRENT]) diff --git a/cpuid_tool/cpuid_tool.c b/cpuid_tool/cpuid_tool.c index 0af0425..0c440b7 100644 --- a/cpuid_tool/cpuid_tool.c +++ b/cpuid_tool/cpuid_tool.c @@ -85,6 +85,7 @@ typedef enum { NEED_CLOCK_RDTSC, NEED_CLOCK_IC, NEED_RDMSR, + NEED_SSE_UNIT_SIZE, } output_data_switch; int need_input = 0, @@ -136,6 +137,7 @@ matchtable[] = { { NEED_CLOCK_RDTSC , "--clock-rdtsc" , 1}, { NEED_CLOCK_IC , "--clock-ic" , 1}, { NEED_RDMSR , "--rdmsr" , 0}, + { NEED_SSE_UNIT_SIZE, "--sse_size" , 1}, }; const int sz_match = (sizeof(matchtable) / sizeof(matchtable[0])); @@ -422,6 +424,12 @@ static void print_info(output_data_switch query, struct cpu_raw_data_t* raw, } break; } + case NEED_SSE_UNIT_SIZE: + { + fprintf(fout, "%d (%s)\n", data->sse_size, + data->detection_hints[CPU_HINT_SSE_SIZE_AUTH] ? "authoritative" : "non-authoritative"); + break; + } default: fprintf(fout, "How did you get here?!?\n"); break; @@ -580,6 +588,7 @@ int main(int argc, char** argv) fprintf(fout, " L1D line sz: %d bytes\n", data.l1_cacheline); fprintf(fout, " L2 line sz : %d bytes\n", data.l2_cacheline); fprintf(fout, " L3 line sz : %d bytes\n", data.l3_cacheline); + fprintf(fout, " SSE units : %d bits (%s)\n", data.sse_size, data.detection_hints[CPU_HINT_SSE_SIZE_AUTH] ? "authoritative" : "non-authoritative"); fprintf(fout, " code name : `%s'\n", data.cpu_codename); fprintf(fout, " features :"); /* diff --git a/libcpuid/cpuid_main.c b/libcpuid/cpuid_main.c index 5ee2550..defa873 100644 --- a/libcpuid/cpuid_main.c +++ b/libcpuid/cpuid_main.c @@ -56,6 +56,7 @@ static void cpu_id_t_constructor(struct cpu_id_t* id) id->l1_data_cache = id->l1_instruction_cache = id->l2_cache = id->l3_cache = -1; id->l1_assoc = id->l2_assoc = id->l3_assoc = -1; id->l1_cacheline = id->l2_cacheline = id->l3_cacheline = -1; + id->sse_size = -1; } static int parse_token(const char* expected_token, const char *token, @@ -172,6 +173,7 @@ static void load_features_common(struct cpu_raw_data_t* raw, struct cpu_id_t* da { 13, CPU_FEATURE_CX16 }, { 19, CPU_FEATURE_SSE4_1 }, { 23, CPU_FEATURE_POPCNT }, + { 29, CPU_FEATURE_F16C }, }; const struct feature_map_t matchtable_edx81[] = { { 11, CPU_FEATURE_SYSCALL }, @@ -188,6 +190,21 @@ static void load_features_common(struct cpu_raw_data_t* raw, struct cpu_id_t* da match_features(matchtable_edx81, COUNT_OF(matchtable_edx81), raw->ext_cpuid[1][3], data); match_features(matchtable_ecx81, COUNT_OF(matchtable_ecx81), raw->ext_cpuid[1][2], data); } + if (data->flags[CPU_FEATURE_SSE]) { + /* apply guesswork to check if the SSE unit width is 128 bit */ + switch (data->vendor) { + case VENDOR_AMD: + data->sse_size = (data->ext_family >= 16 && data->ext_family != 23) ? 128 : 64; + break; + case VENDOR_INTEL: + data->sse_size = (data->family == 6 && data->ext_model >= 15) ? 128 : 64; + break; + default: + break; + } + /* leave the CPU_FEATURE_128BIT_SSE_AUTH 0; the advanced per-vendor detection routines + * will set it accordingly if they detect the needed bit */ + } } static int cpuid_basic_identify(struct cpu_raw_data_t* raw, struct cpu_id_t* data) @@ -522,6 +539,12 @@ const char* cpu_feature_str(cpu_feature_t feature) { CPU_FEATURE_100MHZSTEPS, "100mhzsteps" }, { CPU_FEATURE_HWPSTATE, "hwpstate" }, { CPU_FEATURE_CONSTANT_TSC, "constant_tsc" }, + { CPU_FEATURE_XOP, "xop" }, + { CPU_FEATURE_FMA3, "fma3" }, + { CPU_FEATURE_FMA4, "fma4" }, + { CPU_FEATURE_TBM, "tbm" }, + { CPU_FEATURE_F16C, "f16c" }, + }; unsigned i, n = COUNT_OF(matchtable); if (n != NUM_CPU_FEATURES) { diff --git a/libcpuid/libcpuid.h b/libcpuid/libcpuid.h index b8ec82c..bfadb6b 100644 --- a/libcpuid/libcpuid.h +++ b/libcpuid/libcpuid.h @@ -29,7 +29,7 @@ * @File libcpuid.h * @Author Veselin Georgiev * @Date Oct 2008 - * @Version 0.1.3 + * @Version 0.2.0 * * Version history: * @@ -38,8 +38,11 @@ * new processor topology enumeration required on Core i7 * 0.1.2 (2009-09-26): Added support for MSR reading through self-extracting * kernel driver on Win32. - * 0.1.3 (2010-04-20): Added support for greater more accurate CPU clock measurements - * with cpu_clock_by_ic() + * 0.1.3 (2010-04-20): Added support for greater more accurate CPU clock + * measurements with cpu_clock_by_ic() + * 0.2.0 (2011-10-11): Support for AMD Bulldozer CPUs, 128-bit SSE unit size + * checking. A backwards-incompatible change, since the + * sizeof cpu_id_t is now different. */ /** @mainpage A simple libcpuid introduction @@ -223,6 +226,15 @@ struct cpu_id_t { * @endcode */ char cpu_codename[64]; + + /** SSE execution unit size (64 or 128; -1 if N/A) */ + int32_t sse_size; + + /** + * contain miscellaneous detection information. Used to test about specifics of + * certain detected features. See CPU_HINT_* macros below. @see Hints + */ + uint8_t detection_hints[CPU_HINTS_MAX]; }; /** @@ -317,7 +329,7 @@ typedef enum { CPU_FEATURE_3DNOWPREFETCH, /*!< PREFETCH/PREFETCHW support */ CPU_FEATURE_OSVW, /*!< OS Visible Workaround (AMD) */ CPU_FEATURE_IBS, /*!< Instruction-based sampling */ - CPU_FEATURE_SSE5, /*!< SSE 5 instructions supported */ + CPU_FEATURE_SSE5, /*!< SSE 5 instructions supported (deprecated, will never be 1) */ CPU_FEATURE_SKINIT, /*!< SKINIT / STGI supported */ CPU_FEATURE_WDT, /*!< Watchdog timer support */ CPU_FEATURE_TS, /*!< Temperature sensor */ @@ -329,10 +341,26 @@ typedef enum { CPU_FEATURE_100MHZSTEPS,/*!< 100 MHz multiplier control */ CPU_FEATURE_HWPSTATE, /*!< Hardware P-state control */ CPU_FEATURE_CONSTANT_TSC, /*!< TSC ticks at constant rate */ + CPU_FEATURE_XOP, /*!< The XOP instruction set (same as the old CPU_FEATURE_SSE5) */ + CPU_FEATURE_FMA3, /*!< The FMA3 instruction set */ + CPU_FEATURE_FMA4, /*!< The FMA4 instruction set */ + CPU_FEATURE_TBM, /*!< Trailing bit manipulation instruction support */ + CPU_FEATURE_F16C, /*!< 16-bit FP convert instruction support */ /* termination: */ NUM_CPU_FEATURES, } cpu_feature_t; +/** + * @brief CPU detection hints identifiers + * + * Usage: similar to the flags usage + */ +typedef enum { + CPU_HINT_SSE_SIZE_AUTH = 0, /*!< SSE unit size is authoritative (not only a Family/Model guesswork, but based on an actual CPUID bit) */ + /* termination */ + NUM_CPU_HINTS, +} cpu_hint_t; + /** * @brief Describes common library error codes */ diff --git a/libcpuid/libcpuid_constants.h b/libcpuid/libcpuid_constants.h index 981d5f9..8af4718 100644 --- a/libcpuid/libcpuid_constants.h +++ b/libcpuid/libcpuid_constants.h @@ -39,5 +39,6 @@ #define MAX_EXT_CPUID_LEVEL 32 #define MAX_INTELFN4_LEVEL 4 #define MAX_INTELFN11_LEVEL 4 +#define CPU_HINTS_MAX 16 #endif /* __LIBCPUID_CONSTANTS_H__ */ diff --git a/libcpuid/rdtsc.c b/libcpuid/rdtsc.c index 46eb26b..a202940 100644 --- a/libcpuid/rdtsc.c +++ b/libcpuid/rdtsc.c @@ -226,18 +226,6 @@ int cpu_clock_measure(int millis, int quad_check) return (results[bi] + results[bj] + _zero) / 2; } -static int sse_is_128_bit(struct cpu_id_t* id) -{ - switch (id->vendor) { - case VENDOR_AMD: - return (id->ext_family >= 16 && id->ext_family != 23); - case VENDOR_INTEL: - return (id->family == 6 && id->ext_model >= 15); - default: - return 0; - } -} - int cpu_clock_by_ic(int millis, int runs) { int max_value = 0, cur_value, i, ri, cycles_inner, cycles_outer, c; @@ -248,7 +236,7 @@ int cpu_clock_by_ic(int millis, int runs) id = get_cached_cpuid(); if (!id || !id->flags[CPU_FEATURE_SSE]) return -1; // - if (!sse_is_128_bit(id)) { + if (id->sse_size < 128) { debugf(1, "SSE execution path is 64-bit\n"); sse_multiplier = 2; } else { diff --git a/libcpuid/recog_amd.c b/libcpuid/recog_amd.c index 77235cd..209906d 100644 --- a/libcpuid/recog_amd.c +++ b/libcpuid/recog_amd.c @@ -259,9 +259,10 @@ static void load_amd_features(struct cpu_raw_data_t* raw, struct cpu_id_t* data) { 8, CPU_FEATURE_3DNOWPREFETCH }, { 9, CPU_FEATURE_OSVW }, { 10, CPU_FEATURE_IBS }, - { 11, CPU_FEATURE_SSE5 }, + { 11, CPU_FEATURE_XOP }, { 12, CPU_FEATURE_SKINIT }, { 13, CPU_FEATURE_WDT }, + { 16, CPU_FEATURE_FMA4 }, }; const struct feature_map_t matchtable_edx87[] = { { 0, CPU_FEATURE_TS }, @@ -280,6 +281,11 @@ static void load_amd_features(struct cpu_raw_data_t* raw, struct cpu_id_t* data) } if (raw->ext_cpuid[0][0] >= 7) match_features(matchtable_edx87, COUNT_OF(matchtable_edx87), raw->ext_cpuid[7][3], data); + if (raw->ext_cpuid[0][0] >= 0x1a) { + /* We have the extended info about SSE unit size */ + data->detection_hints[CPU_HINT_SSE_SIZE_AUTH] = 1; + data->sse_size = (raw->ext_cpuid[0x1a][0] & 1) ? 128 : 64; + } } static void decode_amd_cache_info(struct cpu_raw_data_t* raw, struct cpu_id_t* data)