1
0
Fork 0
mirror of https://github.com/anrieff/libcpuid synced 2024-12-16 16:35:45 +00:00

Better support for AVX, AVX2, BMI1 and BMI2 instruction set detection.

- Detect AVX and AVX2 on both Intel and AMD CPUs
- Detect BMI1 and BMI2 instruction sets (BMI2 is only on Haswell, BMI1 is
  also present on Bulldozers).
- Fix tests to reflect changes.
This commit is contained in:
Veselin Georgiev 2015-04-16 20:54:37 +03:00
parent 325904f498
commit 046d2ca2ab
6 changed files with 16 additions and 10 deletions

View file

@ -189,8 +189,14 @@ static void load_features_common(struct cpu_raw_data_t* raw, struct cpu_id_t* da
{ 19, CPU_FEATURE_SSE4_1 }, { 19, CPU_FEATURE_SSE4_1 },
{ 21, CPU_FEATURE_X2APIC }, { 21, CPU_FEATURE_X2APIC },
{ 23, CPU_FEATURE_POPCNT }, { 23, CPU_FEATURE_POPCNT },
{ 28, CPU_FEATURE_AVX },
{ 29, CPU_FEATURE_F16C }, { 29, CPU_FEATURE_F16C },
}; };
const struct feature_map_t matchtable_ebx7[] = {
{ 3, CPU_FEATURE_BMI1 },
{ 5, CPU_FEATURE_AVX2 },
{ 8, CPU_FEATURE_BMI2 },
};
const struct feature_map_t matchtable_edx81[] = { const struct feature_map_t matchtable_edx81[] = {
{ 11, CPU_FEATURE_SYSCALL }, { 11, CPU_FEATURE_SYSCALL },
{ 27, CPU_FEATURE_RDTSCP }, { 27, CPU_FEATURE_RDTSCP },
@ -206,6 +212,9 @@ static void load_features_common(struct cpu_raw_data_t* raw, struct cpu_id_t* da
match_features(matchtable_edx1, COUNT_OF(matchtable_edx1), raw->basic_cpuid[1][3], data); match_features(matchtable_edx1, COUNT_OF(matchtable_edx1), raw->basic_cpuid[1][3], data);
match_features(matchtable_ecx1, COUNT_OF(matchtable_ecx1), raw->basic_cpuid[1][2], data); match_features(matchtable_ecx1, COUNT_OF(matchtable_ecx1), raw->basic_cpuid[1][2], data);
} }
if (raw->basic_cpuid[0][0] >= 7) {
match_features(matchtable_ebx7, COUNT_OF(matchtable_ebx7), raw->basic_cpuid[7][1], data);
}
if (raw->ext_cpuid[0][0] >= 0x80000001) { if (raw->ext_cpuid[0][0] >= 0x80000001) {
match_features(matchtable_edx81, COUNT_OF(matchtable_edx81), raw->ext_cpuid[1][3], data); match_features(matchtable_edx81, COUNT_OF(matchtable_edx81), raw->ext_cpuid[1][3], data);
match_features(matchtable_ecx81, COUNT_OF(matchtable_ecx81), raw->ext_cpuid[1][2], data); match_features(matchtable_ecx81, COUNT_OF(matchtable_ecx81), raw->ext_cpuid[1][2], data);
@ -578,6 +587,8 @@ const char* cpu_feature_str(cpu_feature_t feature)
{ CPU_FEATURE_PFI, "pfi" }, { CPU_FEATURE_PFI, "pfi" },
{ CPU_FEATURE_PA, "pa" }, { CPU_FEATURE_PA, "pa" },
{ CPU_FEATURE_AVX2, "avx2" }, { CPU_FEATURE_AVX2, "avx2" },
{ CPU_FEATURE_BMI1, "bmi1" },
{ CPU_FEATURE_BMI2, "bmi2" },
}; };
unsigned i, n = COUNT_OF(matchtable); unsigned i, n = COUNT_OF(matchtable);
if (n != NUM_CPU_FEATURES) { if (n != NUM_CPU_FEATURES) {

View file

@ -355,6 +355,8 @@ typedef enum {
CPU_FEATURE_PFI, /*!< Processor Feedback Interface support */ CPU_FEATURE_PFI, /*!< Processor Feedback Interface support */
CPU_FEATURE_PA, /*!< Processor accumulator */ CPU_FEATURE_PA, /*!< Processor accumulator */
CPU_FEATURE_AVX2, /*!< AVX2 instructions */ CPU_FEATURE_AVX2, /*!< AVX2 instructions */
CPU_FEATURE_BMI1, /*!< BMI1 instructions */
CPU_FEATURE_BMI2, /*!< BMI2 instructions */
/* termination: */ /* termination: */
NUM_CPU_FEATURES, NUM_CPU_FEATURES,
} cpu_feature_t; } cpu_feature_t;

View file

@ -359,12 +359,8 @@ static void load_intel_features(struct cpu_raw_data_t* raw, struct cpu_id_t* dat
{ 25, CPU_FEATURE_AES }, { 25, CPU_FEATURE_AES },
{ 26, CPU_FEATURE_XSAVE }, { 26, CPU_FEATURE_XSAVE },
{ 27, CPU_FEATURE_OSXSAVE }, { 27, CPU_FEATURE_OSXSAVE },
{ 28, CPU_FEATURE_AVX },
{ 30, CPU_FEATURE_RDRAND }, { 30, CPU_FEATURE_RDRAND },
}; };
const struct feature_map_t matchtable_ebx7[] = {
{ 5, CPU_FEATURE_AVX2 },
};
const struct feature_map_t matchtable_edx81[] = { const struct feature_map_t matchtable_edx81[] = {
{ 20, CPU_FEATURE_XD }, { 20, CPU_FEATURE_XD },
}; };
@ -372,9 +368,6 @@ static void load_intel_features(struct cpu_raw_data_t* raw, struct cpu_id_t* dat
match_features(matchtable_edx1, COUNT_OF(matchtable_edx1), raw->basic_cpuid[1][3], data); match_features(matchtable_edx1, COUNT_OF(matchtable_edx1), raw->basic_cpuid[1][3], data);
match_features(matchtable_ecx1, COUNT_OF(matchtable_ecx1), raw->basic_cpuid[1][2], data); match_features(matchtable_ecx1, COUNT_OF(matchtable_ecx1), raw->basic_cpuid[1][2], data);
} }
if (raw->basic_cpuid[0][0] >= 7) {
match_features(matchtable_ebx7, COUNT_OF(matchtable_ebx7), raw->basic_cpuid[7][1], data);
}
if (raw->ext_cpuid[0][0] >= 1) { if (raw->ext_cpuid[0][0] >= 1) {
match_features(matchtable_edx81, COUNT_OF(matchtable_edx81), raw->ext_cpuid[1][3], data); match_features(matchtable_edx81, COUNT_OF(matchtable_edx81), raw->ext_cpuid[1][3], data);
} }

View file

@ -90,4 +90,4 @@ intel_fn11[3]=00000000 00000000 00000000 00000000
64 64
128 (authoritative) 128 (authoritative)
Bulldozer X4 Bulldozer X4
fpu vme de pse tsc msr pae mce cx8 apic mtrr sep pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht pni monitor ssse3 cx16 sse4_1 syscall popcnt mmxext nx fxsr_opt rdtscp lm lahf_lm cmp_legacy svm abm misalignsse sse4a 3dnowprefetch osvw ibs skinit wdt ts ttp tm_amd 100mhzsteps hwpstate constant_tsc xop fma4 cpb fpu vme de pse tsc msr pae mce cx8 apic mtrr sep pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht pni monitor ssse3 cx16 sse4_1 syscall popcnt avx mmxext nx fxsr_opt rdtscp lm lahf_lm cmp_legacy svm abm misalignsse sse4a 3dnowprefetch osvw ibs skinit wdt ts ttp tm_amd 100mhzsteps hwpstate constant_tsc xop fma4 cpb

View file

@ -90,4 +90,4 @@ intel_fn11[3]=00000000 00000000 00000000 00000000
64 64
128 (authoritative) 128 (authoritative)
Vishera X4 Vishera X4
fpu vme de pse tsc msr pae mce cx8 apic mtrr sep pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht pni monitor ssse3 cx16 sse4_1 syscall popcnt mmxext nx fxsr_opt rdtscp lm lahf_lm cmp_legacy svm abm misalignsse sse4a 3dnowprefetch osvw ibs skinit wdt ts ttp tm_amd 100mhzsteps hwpstate constant_tsc xop fma3 fma4 f16c cpb aperfmperf fpu vme de pse tsc msr pae mce cx8 apic mtrr sep pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht pni monitor ssse3 cx16 sse4_1 syscall popcnt avx mmxext nx fxsr_opt rdtscp lm lahf_lm cmp_legacy svm abm misalignsse sse4a 3dnowprefetch osvw ibs skinit wdt ts ttp tm_amd 100mhzsteps hwpstate constant_tsc xop fma3 fma4 f16c cpb aperfmperf bmi1

View file

@ -90,4 +90,4 @@ intel_fn11[3]=00000000 00000000 00000003 00000001
64 64
128 (non-authoritative) 128 (non-authoritative)
Haswell (Core i3) Haswell (Core i3)
fpu vme de pse tsc msr pae mce cx8 apic mtrr sep pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe pni pclmul dts64 monitor ds_cpl vmx est tm2 ssse3 cx16 xtpr pdcm sse4_1 sse4_2 syscall xd movbe popcnt aes xsave osxsave avx rdtscp lm lahf_lm constant_tsc fma3 f16c rdrand avx2 fpu vme de pse tsc msr pae mce cx8 apic mtrr sep pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe pni pclmul dts64 monitor ds_cpl vmx est tm2 ssse3 cx16 xtpr pdcm sse4_1 sse4_2 syscall xd movbe popcnt aes xsave osxsave avx rdtscp lm lahf_lm constant_tsc fma3 f16c rdrand avx2 bmi1 bmi2