diff --git a/src/crc32/crc32_neon.cpp b/src/crc32/crc32_neon.cpp index c0131a5..42158c4 100644 --- a/src/crc32/crc32_neon.cpp +++ b/src/crc32/crc32_neon.cpp @@ -31,32 +31,36 @@ namespace wren::detail { //King_DuckZ - adapted from //https://github.com/rurban/smhasher/blob/master/sse2neon.h namespace { - [[gnu::always_inline]] - std::uint32_t neon_crc32cb(std::uint32_t crc, std::uint8_t v) { + [[gnu::always_inline]] [[gnu::const]] + inline std::uint32_t neon_crc32cb(std::uint32_t crc, std::uint8_t v) { __asm__ __volatile__("crc32cb %w[c], %w[c], %w[v]\n\t" : [c] "+r"(crc) : [v] "r"(v)); + return crc; } - [[gnu::always_inline]] - std::uint32_t neon_crc32ch (std::uint32_t crc, std::uint16_t v) { + [[gnu::always_inline]] [[gnu::const]] + inline std::uint32_t neon_crc32ch (std::uint32_t crc, std::uint16_t v) { __asm__ __volatile__("crc32ch %w[c], %w[c], %w[v]\n\t" : [c] "+r"(crc) : [v] "r"(v)); + return crc; } - [[gnu::always_inline]] - std::uint32_t neon_crc32cw(std::uint32_t crc, std::uint32_t v) { + [[gnu::always_inline]] [[gnu::const]] + inline std::uint32_t neon_crc32cw(std::uint32_t crc, std::uint32_t v) { __asm__ __volatile__("crc32cw %w[c], %w[c], %w[v]\n\t" : [c] "+r"(crc) : [v] "r"(v)); + return crc; } - [[gnu::always_inline]] - std::uint64_t neon_crc32cx (std::uint64_t crc, std::uint64_t v) { + [[gnu::always_inline]] [[gnu::const]] + inline std::uint64_t neon_crc32cx (std::uint64_t crc, std::uint64_t v) { __asm__ __volatile__("crc32cx %w[c], %w[c], %x[v]\n\t" : [c] "+r"(crc) : [v] "r"(v)); + return crc; } } //unnamed namespace diff --git a/src/crc32/meson.build b/src/crc32/meson.build index 4ca156c..bc51009 100644 --- a/src/crc32/meson.build +++ b/src/crc32/meson.build @@ -1,11 +1,20 @@ simd = import('unstable-simd') + compiler_opts = [] +if arch == 'amd64' + compiler_opts += ['-msse4.2'] +elif arch == 'aarch64' + #gcc options here: + #https://gcc.gnu.org/onlinedocs/gcc-11.2.0/gcc/AArch64-Options.html + compiler_opts += ['-mcpu=generic+crc'] +endif cpp = meson.get_compiler('cpp') crc32_simd = simd.check('crc32_hw', sse42: 'crc32_sse42.cpp', neon: 'crc32_neon.cpp', compiler: cpp, + cpp_args: compiler_opts, ) crc32_objs = crc32_simd[0] @@ -16,21 +25,11 @@ project_config_file = configure_file( configuration: crc32_config ) -if get_option('wrenpp_with_sse42') - if arch == 'amd64' - compiler_opts += ['-msse4.2'] - elif arch == 'aarch64' - #gcc options here: - #https://gcc.gnu.org/onlinedocs/gcc-11.2.0/gcc/AArch64-Options.html - compiler_opts += ['-mcpu=generic+crc'] - endif -endif - crc32 = static_library('crc32', 'crc32.cpp', include_directories: [public_incl], install: false, - cpp_args: compiler_opts + global_compiler_opts, + cpp_args: global_compiler_opts, link_with: crc32_objs, )