From 046d2ca2ab1d48d149ca5dced008f6f77d473e31 Mon Sep 17 00:00:00 2001 From: Veselin Georgiev Date: Thu, 16 Apr 2015 20:54:37 +0300 Subject: [PATCH] Better support for AVX, AVX2, BMI1 and BMI2 instruction set detection. - Detect AVX and AVX2 on both Intel and AMD CPUs - Detect BMI1 and BMI2 instruction sets (BMI2 is only on Haswell, BMI1 is also present on Bulldozers). - Fix tests to reflect changes. --- libcpuid/cpuid_main.c | 11 +++++++++++ libcpuid/libcpuid.h | 2 ++ libcpuid/recog_intel.c | 7 ------- tests/amd/bulldozer/bulldozer-x4.test | 2 +- tests/amd/bulldozer/vishera-x4.test | 2 +- tests/intel/haswell/haswell-i3.test | 2 +- 6 files changed, 16 insertions(+), 10 deletions(-) diff --git a/libcpuid/cpuid_main.c b/libcpuid/cpuid_main.c index 99aea8e..0d7aed7 100644 --- a/libcpuid/cpuid_main.c +++ b/libcpuid/cpuid_main.c @@ -189,8 +189,14 @@ static void load_features_common(struct cpu_raw_data_t* raw, struct cpu_id_t* da { 19, CPU_FEATURE_SSE4_1 }, { 21, CPU_FEATURE_X2APIC }, { 23, CPU_FEATURE_POPCNT }, + { 28, CPU_FEATURE_AVX }, { 29, CPU_FEATURE_F16C }, }; + const struct feature_map_t matchtable_ebx7[] = { + { 3, CPU_FEATURE_BMI1 }, + { 5, CPU_FEATURE_AVX2 }, + { 8, CPU_FEATURE_BMI2 }, + }; const struct feature_map_t matchtable_edx81[] = { { 11, CPU_FEATURE_SYSCALL }, { 27, CPU_FEATURE_RDTSCP }, @@ -206,6 +212,9 @@ static void load_features_common(struct cpu_raw_data_t* raw, struct cpu_id_t* da match_features(matchtable_edx1, COUNT_OF(matchtable_edx1), raw->basic_cpuid[1][3], data); match_features(matchtable_ecx1, COUNT_OF(matchtable_ecx1), raw->basic_cpuid[1][2], data); } + if (raw->basic_cpuid[0][0] >= 7) { + match_features(matchtable_ebx7, COUNT_OF(matchtable_ebx7), raw->basic_cpuid[7][1], data); + } if (raw->ext_cpuid[0][0] >= 0x80000001) { match_features(matchtable_edx81, COUNT_OF(matchtable_edx81), raw->ext_cpuid[1][3], data); match_features(matchtable_ecx81, COUNT_OF(matchtable_ecx81), raw->ext_cpuid[1][2], data); @@ -578,6 +587,8 @@ const char* cpu_feature_str(cpu_feature_t feature) { CPU_FEATURE_PFI, "pfi" }, { CPU_FEATURE_PA, "pa" }, { CPU_FEATURE_AVX2, "avx2" }, + { CPU_FEATURE_BMI1, "bmi1" }, + { CPU_FEATURE_BMI2, "bmi2" }, }; unsigned i, n = COUNT_OF(matchtable); if (n != NUM_CPU_FEATURES) { diff --git a/libcpuid/libcpuid.h b/libcpuid/libcpuid.h index b78b0d6..8d2280a 100644 --- a/libcpuid/libcpuid.h +++ b/libcpuid/libcpuid.h @@ -355,6 +355,8 @@ typedef enum { CPU_FEATURE_PFI, /*!< Processor Feedback Interface support */ CPU_FEATURE_PA, /*!< Processor accumulator */ CPU_FEATURE_AVX2, /*!< AVX2 instructions */ + CPU_FEATURE_BMI1, /*!< BMI1 instructions */ + CPU_FEATURE_BMI2, /*!< BMI2 instructions */ /* termination: */ NUM_CPU_FEATURES, } cpu_feature_t; diff --git a/libcpuid/recog_intel.c b/libcpuid/recog_intel.c index 625b277..0cbfead 100644 --- a/libcpuid/recog_intel.c +++ b/libcpuid/recog_intel.c @@ -359,12 +359,8 @@ static void load_intel_features(struct cpu_raw_data_t* raw, struct cpu_id_t* dat { 25, CPU_FEATURE_AES }, { 26, CPU_FEATURE_XSAVE }, { 27, CPU_FEATURE_OSXSAVE }, - { 28, CPU_FEATURE_AVX }, { 30, CPU_FEATURE_RDRAND }, }; - const struct feature_map_t matchtable_ebx7[] = { - { 5, CPU_FEATURE_AVX2 }, - }; const struct feature_map_t matchtable_edx81[] = { { 20, CPU_FEATURE_XD }, }; @@ -372,9 +368,6 @@ static void load_intel_features(struct cpu_raw_data_t* raw, struct cpu_id_t* dat match_features(matchtable_edx1, COUNT_OF(matchtable_edx1), raw->basic_cpuid[1][3], data); match_features(matchtable_ecx1, COUNT_OF(matchtable_ecx1), raw->basic_cpuid[1][2], data); } - if (raw->basic_cpuid[0][0] >= 7) { - match_features(matchtable_ebx7, COUNT_OF(matchtable_ebx7), raw->basic_cpuid[7][1], data); - } if (raw->ext_cpuid[0][0] >= 1) { match_features(matchtable_edx81, COUNT_OF(matchtable_edx81), raw->ext_cpuid[1][3], data); } diff --git a/tests/amd/bulldozer/bulldozer-x4.test b/tests/amd/bulldozer/bulldozer-x4.test index 085c1c9..893851e 100644 --- a/tests/amd/bulldozer/bulldozer-x4.test +++ b/tests/amd/bulldozer/bulldozer-x4.test @@ -90,4 +90,4 @@ intel_fn11[3]=00000000 00000000 00000000 00000000 64 128 (authoritative) Bulldozer X4 -fpu vme de pse tsc msr pae mce cx8 apic mtrr sep pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht pni monitor ssse3 cx16 sse4_1 syscall popcnt mmxext nx fxsr_opt rdtscp lm lahf_lm cmp_legacy svm abm misalignsse sse4a 3dnowprefetch osvw ibs skinit wdt ts ttp tm_amd 100mhzsteps hwpstate constant_tsc xop fma4 cpb +fpu vme de pse tsc msr pae mce cx8 apic mtrr sep pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht pni monitor ssse3 cx16 sse4_1 syscall popcnt avx mmxext nx fxsr_opt rdtscp lm lahf_lm cmp_legacy svm abm misalignsse sse4a 3dnowprefetch osvw ibs skinit wdt ts ttp tm_amd 100mhzsteps hwpstate constant_tsc xop fma4 cpb diff --git a/tests/amd/bulldozer/vishera-x4.test b/tests/amd/bulldozer/vishera-x4.test index 4292a07..9a0ea84 100644 --- a/tests/amd/bulldozer/vishera-x4.test +++ b/tests/amd/bulldozer/vishera-x4.test @@ -90,4 +90,4 @@ intel_fn11[3]=00000000 00000000 00000000 00000000 64 128 (authoritative) Vishera X4 -fpu vme de pse tsc msr pae mce cx8 apic mtrr sep pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht pni monitor ssse3 cx16 sse4_1 syscall popcnt mmxext nx fxsr_opt rdtscp lm lahf_lm cmp_legacy svm abm misalignsse sse4a 3dnowprefetch osvw ibs skinit wdt ts ttp tm_amd 100mhzsteps hwpstate constant_tsc xop fma3 fma4 f16c cpb aperfmperf +fpu vme de pse tsc msr pae mce cx8 apic mtrr sep pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht pni monitor ssse3 cx16 sse4_1 syscall popcnt avx mmxext nx fxsr_opt rdtscp lm lahf_lm cmp_legacy svm abm misalignsse sse4a 3dnowprefetch osvw ibs skinit wdt ts ttp tm_amd 100mhzsteps hwpstate constant_tsc xop fma3 fma4 f16c cpb aperfmperf bmi1 diff --git a/tests/intel/haswell/haswell-i3.test b/tests/intel/haswell/haswell-i3.test index de0c83d..ba14535 100644 --- a/tests/intel/haswell/haswell-i3.test +++ b/tests/intel/haswell/haswell-i3.test @@ -90,4 +90,4 @@ intel_fn11[3]=00000000 00000000 00000003 00000001 64 128 (non-authoritative) Haswell (Core i3) -fpu vme de pse tsc msr pae mce cx8 apic mtrr sep pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe pni pclmul dts64 monitor ds_cpl vmx est tm2 ssse3 cx16 xtpr pdcm sse4_1 sse4_2 syscall xd movbe popcnt aes xsave osxsave avx rdtscp lm lahf_lm constant_tsc fma3 f16c rdrand avx2 +fpu vme de pse tsc msr pae mce cx8 apic mtrr sep pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe pni pclmul dts64 monitor ds_cpl vmx est tm2 ssse3 cx16 xtpr pdcm sse4_1 sse4_2 syscall xd movbe popcnt aes xsave osxsave avx rdtscp lm lahf_lm constant_tsc fma3 f16c rdrand avx2 bmi1 bmi2