Build fix for aarch64

But there is a bug in meson currently that makes it so that
neon is never detected on aarch64 so the generic crc32
implementation is always chosen. Not a big deal right now.
It will fix itself once meson fixes their bug.
This commit is contained in:
King_DuckZ 2022-06-03 20:41:00 +02:00
parent 730f4f45ef
commit 576452a8ac
2 changed files with 22 additions and 19 deletions

View file

@ -31,32 +31,36 @@ namespace wren::detail {
//King_DuckZ - adapted from //King_DuckZ - adapted from
//https://github.com/rurban/smhasher/blob/master/sse2neon.h //https://github.com/rurban/smhasher/blob/master/sse2neon.h
namespace { namespace {
[[gnu::always_inline]] [[gnu::always_inline]] [[gnu::const]]
std::uint32_t neon_crc32cb(std::uint32_t crc, std::uint8_t v) { inline std::uint32_t neon_crc32cb(std::uint32_t crc, std::uint8_t v) {
__asm__ __volatile__("crc32cb %w[c], %w[c], %w[v]\n\t" __asm__ __volatile__("crc32cb %w[c], %w[c], %w[v]\n\t"
: [c] "+r"(crc) : [c] "+r"(crc)
: [v] "r"(v)); : [v] "r"(v));
return crc;
} }
[[gnu::always_inline]] [[gnu::always_inline]] [[gnu::const]]
std::uint32_t neon_crc32ch (std::uint32_t crc, std::uint16_t v) { inline std::uint32_t neon_crc32ch (std::uint32_t crc, std::uint16_t v) {
__asm__ __volatile__("crc32ch %w[c], %w[c], %w[v]\n\t" __asm__ __volatile__("crc32ch %w[c], %w[c], %w[v]\n\t"
: [c] "+r"(crc) : [c] "+r"(crc)
: [v] "r"(v)); : [v] "r"(v));
return crc;
} }
[[gnu::always_inline]] [[gnu::always_inline]] [[gnu::const]]
std::uint32_t neon_crc32cw(std::uint32_t crc, std::uint32_t v) { inline std::uint32_t neon_crc32cw(std::uint32_t crc, std::uint32_t v) {
__asm__ __volatile__("crc32cw %w[c], %w[c], %w[v]\n\t" __asm__ __volatile__("crc32cw %w[c], %w[c], %w[v]\n\t"
: [c] "+r"(crc) : [c] "+r"(crc)
: [v] "r"(v)); : [v] "r"(v));
return crc;
} }
[[gnu::always_inline]] [[gnu::always_inline]] [[gnu::const]]
std::uint64_t neon_crc32cx (std::uint64_t crc, std::uint64_t v) { inline std::uint64_t neon_crc32cx (std::uint64_t crc, std::uint64_t v) {
__asm__ __volatile__("crc32cx %w[c], %w[c], %x[v]\n\t" __asm__ __volatile__("crc32cx %w[c], %w[c], %x[v]\n\t"
: [c] "+r"(crc) : [c] "+r"(crc)
: [v] "r"(v)); : [v] "r"(v));
return crc;
} }
} //unnamed namespace } //unnamed namespace

View file

@ -1,11 +1,20 @@
simd = import('unstable-simd') simd = import('unstable-simd')
compiler_opts = [] compiler_opts = []
if arch == 'amd64'
compiler_opts += ['-msse4.2']
elif arch == 'aarch64'
#gcc options here:
#https://gcc.gnu.org/onlinedocs/gcc-11.2.0/gcc/AArch64-Options.html
compiler_opts += ['-mcpu=generic+crc']
endif
cpp = meson.get_compiler('cpp') cpp = meson.get_compiler('cpp')
crc32_simd = simd.check('crc32_hw', crc32_simd = simd.check('crc32_hw',
sse42: 'crc32_sse42.cpp', sse42: 'crc32_sse42.cpp',
neon: 'crc32_neon.cpp', neon: 'crc32_neon.cpp',
compiler: cpp, compiler: cpp,
cpp_args: compiler_opts,
) )
crc32_objs = crc32_simd[0] crc32_objs = crc32_simd[0]
@ -16,21 +25,11 @@ project_config_file = configure_file(
configuration: crc32_config configuration: crc32_config
) )
if get_option('wrenpp_with_sse42')
if arch == 'amd64'
compiler_opts += ['-msse4.2']
elif arch == 'aarch64'
#gcc options here:
#https://gcc.gnu.org/onlinedocs/gcc-11.2.0/gcc/AArch64-Options.html
compiler_opts += ['-mcpu=generic+crc']
endif
endif
crc32 = static_library('crc32', crc32 = static_library('crc32',
'crc32.cpp', 'crc32.cpp',
include_directories: [public_incl], include_directories: [public_incl],
install: false, install: false,
cpp_args: compiler_opts + global_compiler_opts, cpp_args: global_compiler_opts,
link_with: crc32_objs, link_with: crc32_objs,
) )