From f4b3600ee532c69f393908005806b4268270ae63 Mon Sep 17 00:00:00 2001 From: King_DuckZ Date: Fri, 3 Jun 2022 10:52:26 +0200 Subject: [PATCH] Possible build fix for aarch64 --- include/wrenpp/detail/module_and_name.hpp | 8 +- meson.build | 6 +- src/crc32.cpp | 12 ++ src/sse2neon.h | 141 ++++++++++++++++++++++ 4 files changed, 163 insertions(+), 4 deletions(-) create mode 100644 src/sse2neon.h diff --git a/include/wrenpp/detail/module_and_name.hpp b/include/wrenpp/detail/module_and_name.hpp index 4bf4341..9552be0 100644 --- a/include/wrenpp/detail/module_and_name.hpp +++ b/include/wrenpp/detail/module_and_name.hpp @@ -29,9 +29,9 @@ namespace wren { class ModuleAndName; namespace detail { - template + template struct ModuleAndNameStaticStorage { - static constexpr const auto value = S; + static constexpr dhandy::bt::string value = Str; }; [[gnu::const]] @@ -148,7 +148,9 @@ namespace wren { template consteval ModuleAndName make_module_and_name() noexcept { using dhandy::bt::string; - using StaticStorage = detail::ModuleAndNameStaticStorage; + using detail::ModuleAndNameStaticStorage; + constexpr string null_char{"\0"}; + using StaticStorage = ModuleAndNameStaticStorage; constexpr const char* data = StaticStorage::value.data(); constexpr std::uint16_t s1_len = static_cast(S1.size()); diff --git a/meson.build b/meson.build index e59c60c..29faf19 100644 --- a/meson.build +++ b/meson.build @@ -45,7 +45,11 @@ if get_option('wrenpp_with_name_guessing') compiler_opts += ['-DWRENPP_WITH_NAME_GUESSING'] endif if get_option('wrenpp_with_sse42') - compiler_opts += ['-msse4.2'] + if arch == 'amd64' + compiler_opts += ['-msse4.2'] + elif arch == 'aarch64' + compiler_opts += ['-mcpu=generic+crc'] + endif endif conf.set('POINTER_SIZE', ptr_size) diff --git a/src/crc32.cpp b/src/crc32.cpp index f8dde26..c892de8 100644 --- a/src/crc32.cpp +++ b/src/crc32.cpp @@ -19,7 +19,11 @@ #include "pvt_config.h" #if defined(WRENPP_WITH_SSE42) # if defined(__aarch64__) +extern "C" { # include "sse2neon.h" +# include +# include +} // extern C # else # include # include @@ -83,10 +87,18 @@ namespace wren { [[gnu::const]] bool has_hw_crc32() { #if defined(WRENPP_WITH_SSE42) +# if defined(__amd64__) || defined(_M_AMD64) //if (_may_i_use_cpu_feature(_FEATURE_SSE4_2)) { if (__builtin_cpu_supports("sse4.2")) return true; else +# elif defined(__aarch64__) + //see + //https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu + if (getauxval(AT_HWCAP) & HWCAP_CRC32) + return true; + else +# endif #endif return false; } diff --git a/src/sse2neon.h b/src/sse2neon.h new file mode 100644 index 0000000..9a4afb9 --- /dev/null +++ b/src/sse2neon.h @@ -0,0 +1,141 @@ +#ifndef SSE2NEON_H +#define SSE2NEON_H + +// This header file provides a simple API translation layer +// between SSE intrinsics to their corresponding Arm/Aarch64 NEON versions +// +// This header file does not yet translate all of the SSE intrinsics. +// +// Contributors to this work are: +// John W. Ratcliff +// Brandon Rowlett +// Ken Fast +// Eric van Beurden +// Alexander Potylitsin +// Hasindu Gamaarachchi +// Jim Huang +// Mark Cheng +// Malcolm James MacLeod +// Devin Hussey (easyaspi314) +// Sebastian Pop +// Developer Ecosystem Engineering +// Danila Kutenin +// François Turban (JishinMaster) +// Pei-Hsuan Hung +// Yang-Hao Yuan +// Syoyo Fujita +// Brecht Van Lommel + +/* + * sse2neon is freely redistributable under the MIT License. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +//King_DuckZ +//trimmed down version of +//https://github.com/rurban/smhasher/blob/master/sse2neon.h + +#if defined(__GNUC__) || defined(__clang__) +#pragma push_macro("FORCE_INLINE") +#pragma push_macro("ALIGN_STRUCT") +#ifndef FORCE_INLINE +#define FORCE_INLINE static inline __attribute__((always_inline)) +#endif +#else +#error "Macro name collisions may happen with unsupported compiler." +#ifdef FORCE_INLINE +#undef FORCE_INLINE +#endif +#define FORCE_INLINE static inline +#endif + +#include +#include + +// Starting with the initial value in crc, accumulates a CRC32 value for +// unsigned 8-bit integer v. +// https://msdn.microsoft.com/en-us/library/bb514036(v=vs.100) +FORCE_INLINE uint32_t _mm_crc32_u8(uint32_t crc, uint8_t v) +{ +#if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) + __asm__ __volatile__("crc32cb %w[c], %w[c], %w[v]\n\t" + : [c] "+r"(crc) + : [v] "r"(v)); +#else + crc ^= v; + for (int bit = 0; bit < 8; bit++) { + if (crc & 1) + crc = (crc >> 1) ^ UINT32_C(0x82f63b78); + else + crc = (crc >> 1); + } +#endif + return crc; +} + +// Starting with the initial value in crc, accumulates a CRC32 value for +// unsigned 16-bit integer v. +// https://msdn.microsoft.com/en-us/library/bb531411(v=vs.100) +FORCE_INLINE uint32_t _mm_crc32_u16(uint32_t crc, uint16_t v) +{ +#if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) + __asm__ __volatile__("crc32ch %w[c], %w[c], %w[v]\n\t" + : [c] "+r"(crc) + : [v] "r"(v)); +#else + crc = _mm_crc32_u8(crc, v & 0xff); + crc = _mm_crc32_u8(crc, (v >> 8) & 0xff); +#endif + return crc; +} + +// Starting with the initial value in crc, accumulates a CRC32 value for +// unsigned 32-bit integer v. +// https://msdn.microsoft.com/en-us/library/bb531394(v=vs.100) +FORCE_INLINE uint32_t _mm_crc32_u32(uint32_t crc, uint32_t v) +{ +#if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) + __asm__ __volatile__("crc32cw %w[c], %w[c], %w[v]\n\t" + : [c] "+r"(crc) + : [v] "r"(v)); +#else + crc = _mm_crc32_u16(crc, v & 0xffff); + crc = _mm_crc32_u16(crc, (v >> 16) & 0xffff); +#endif + return crc; +} + +// Starting with the initial value in crc, accumulates a CRC32 value for +// unsigned 64-bit integer v. +// https://msdn.microsoft.com/en-us/library/bb514033(v=vs.100) +FORCE_INLINE uint64_t _mm_crc32_u64(uint64_t crc, uint64_t v) +{ +#if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32) + __asm__ __volatile__("crc32cx %w[c], %w[c], %x[v]\n\t" + : [c] "+r"(crc) + : [v] "r"(v)); +#else + crc = _mm_crc32_u32((uint32_t)(crc), v & 0xffffffff); + crc = _mm_crc32_u32((uint32_t)(crc), (v >> 32) & 0xffffffff); +#endif + return crc; +} + +#endif