Possible build fix for aarch64

This commit is contained in:
King_DuckZ 2022-06-03 10:52:26 +02:00
parent b3ecb69ec0
commit f4b3600ee5
4 changed files with 163 additions and 4 deletions

View file

@ -29,9 +29,9 @@ namespace wren {
class ModuleAndName; class ModuleAndName;
namespace detail { namespace detail {
template <dhandy::bt::string S> template <dhandy::bt::string Str>
struct ModuleAndNameStaticStorage { struct ModuleAndNameStaticStorage {
static constexpr const auto value = S; static constexpr dhandy::bt::string value = Str;
}; };
[[gnu::const]] [[gnu::const]]
@ -148,7 +148,9 @@ namespace wren {
template <dhandy::bt::string S1, dhandy::bt::string S2> template <dhandy::bt::string S1, dhandy::bt::string S2>
consteval ModuleAndName make_module_and_name() noexcept { consteval ModuleAndName make_module_and_name() noexcept {
using dhandy::bt::string; using dhandy::bt::string;
using StaticStorage = detail::ModuleAndNameStaticStorage<S1 + string("\0") + S2>; using detail::ModuleAndNameStaticStorage;
constexpr string null_char{"\0"};
using StaticStorage = ModuleAndNameStaticStorage<S1 + null_char + S2>;
constexpr const char* data = StaticStorage::value.data(); constexpr const char* data = StaticStorage::value.data();
constexpr std::uint16_t s1_len = static_cast<std::uint16_t>(S1.size()); constexpr std::uint16_t s1_len = static_cast<std::uint16_t>(S1.size());

View file

@ -45,7 +45,11 @@ if get_option('wrenpp_with_name_guessing')
compiler_opts += ['-DWRENPP_WITH_NAME_GUESSING'] compiler_opts += ['-DWRENPP_WITH_NAME_GUESSING']
endif endif
if get_option('wrenpp_with_sse42') if get_option('wrenpp_with_sse42')
if arch == 'amd64'
compiler_opts += ['-msse4.2'] compiler_opts += ['-msse4.2']
elif arch == 'aarch64'
compiler_opts += ['-mcpu=generic+crc']
endif
endif endif
conf.set('POINTER_SIZE', ptr_size) conf.set('POINTER_SIZE', ptr_size)

View file

@ -19,7 +19,11 @@
#include "pvt_config.h" #include "pvt_config.h"
#if defined(WRENPP_WITH_SSE42) #if defined(WRENPP_WITH_SSE42)
# if defined(__aarch64__) # if defined(__aarch64__)
extern "C" {
# include "sse2neon.h" # include "sse2neon.h"
# include <sys/auxv.h>
# include <asm/hwcap.h>
} // extern C
# else # else
# include <smmintrin.h> # include <smmintrin.h>
# include <immintrin.h> # include <immintrin.h>
@ -83,10 +87,18 @@ namespace wren {
[[gnu::const]] [[gnu::const]]
bool has_hw_crc32() { bool has_hw_crc32() {
#if defined(WRENPP_WITH_SSE42) #if defined(WRENPP_WITH_SSE42)
# if defined(__amd64__) || defined(_M_AMD64)
//if (_may_i_use_cpu_feature(_FEATURE_SSE4_2)) { //if (_may_i_use_cpu_feature(_FEATURE_SSE4_2)) {
if (__builtin_cpu_supports("sse4.2")) if (__builtin_cpu_supports("sse4.2"))
return true; return true;
else else
# elif defined(__aarch64__)
//see
//https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu
if (getauxval(AT_HWCAP) & HWCAP_CRC32)
return true;
else
# endif
#endif #endif
return false; return false;
} }

141
src/sse2neon.h Normal file
View file

@ -0,0 +1,141 @@
#ifndef SSE2NEON_H
#define SSE2NEON_H
// This header file provides a simple API translation layer
// between SSE intrinsics to their corresponding Arm/Aarch64 NEON versions
//
// This header file does not yet translate all of the SSE intrinsics.
//
// Contributors to this work are:
// John W. Ratcliff <jratcliffscarab@gmail.com>
// Brandon Rowlett <browlett@nvidia.com>
// Ken Fast <kfast@gdeb.com>
// Eric van Beurden <evanbeurden@nvidia.com>
// Alexander Potylitsin <apotylitsin@nvidia.com>
// Hasindu Gamaarachchi <hasindu2008@gmail.com>
// Jim Huang <jserv@biilabs.io>
// Mark Cheng <marktwtn@biilabs.io>
// Malcolm James MacLeod <malcolm@gulden.com>
// Devin Hussey (easyaspi314) <husseydevin@gmail.com>
// Sebastian Pop <spop@amazon.com>
// Developer Ecosystem Engineering <DeveloperEcosystemEngineering@apple.com>
// Danila Kutenin <danilak@google.com>
// François Turban (JishinMaster) <francois.turban@gmail.com>
// Pei-Hsuan Hung <afcidk@gmail.com>
// Yang-Hao Yuan <yanghau@biilabs.io>
// Syoyo Fujita <syoyo@lighttransport.com>
// Brecht Van Lommel <brecht@blender.org>
/*
* sse2neon is freely redistributable under the MIT License.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
//King_DuckZ
//trimmed down version of
//https://github.com/rurban/smhasher/blob/master/sse2neon.h
#if defined(__GNUC__) || defined(__clang__)
#pragma push_macro("FORCE_INLINE")
#pragma push_macro("ALIGN_STRUCT")
#ifndef FORCE_INLINE
#define FORCE_INLINE static inline __attribute__((always_inline))
#endif
#else
#error "Macro name collisions may happen with unsupported compiler."
#ifdef FORCE_INLINE
#undef FORCE_INLINE
#endif
#define FORCE_INLINE static inline
#endif
#include <stdint.h>
#include <stdlib.h>
// Starting with the initial value in crc, accumulates a CRC32 value for
// unsigned 8-bit integer v.
// https://msdn.microsoft.com/en-us/library/bb514036(v=vs.100)
FORCE_INLINE uint32_t _mm_crc32_u8(uint32_t crc, uint8_t v)
{
#if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32)
__asm__ __volatile__("crc32cb %w[c], %w[c], %w[v]\n\t"
: [c] "+r"(crc)
: [v] "r"(v));
#else
crc ^= v;
for (int bit = 0; bit < 8; bit++) {
if (crc & 1)
crc = (crc >> 1) ^ UINT32_C(0x82f63b78);
else
crc = (crc >> 1);
}
#endif
return crc;
}
// Starting with the initial value in crc, accumulates a CRC32 value for
// unsigned 16-bit integer v.
// https://msdn.microsoft.com/en-us/library/bb531411(v=vs.100)
FORCE_INLINE uint32_t _mm_crc32_u16(uint32_t crc, uint16_t v)
{
#if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32)
__asm__ __volatile__("crc32ch %w[c], %w[c], %w[v]\n\t"
: [c] "+r"(crc)
: [v] "r"(v));
#else
crc = _mm_crc32_u8(crc, v & 0xff);
crc = _mm_crc32_u8(crc, (v >> 8) & 0xff);
#endif
return crc;
}
// Starting with the initial value in crc, accumulates a CRC32 value for
// unsigned 32-bit integer v.
// https://msdn.microsoft.com/en-us/library/bb531394(v=vs.100)
FORCE_INLINE uint32_t _mm_crc32_u32(uint32_t crc, uint32_t v)
{
#if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32)
__asm__ __volatile__("crc32cw %w[c], %w[c], %w[v]\n\t"
: [c] "+r"(crc)
: [v] "r"(v));
#else
crc = _mm_crc32_u16(crc, v & 0xffff);
crc = _mm_crc32_u16(crc, (v >> 16) & 0xffff);
#endif
return crc;
}
// Starting with the initial value in crc, accumulates a CRC32 value for
// unsigned 64-bit integer v.
// https://msdn.microsoft.com/en-us/library/bb514033(v=vs.100)
FORCE_INLINE uint64_t _mm_crc32_u64(uint64_t crc, uint64_t v)
{
#if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32)
__asm__ __volatile__("crc32cx %w[c], %w[c], %x[v]\n\t"
: [c] "+r"(crc)
: [v] "r"(v));
#else
crc = _mm_crc32_u32((uint32_t)(crc), v & 0xffffffff);
crc = _mm_crc32_u32((uint32_t)(crc), (v >> 32) & 0xffffffff);
#endif
return crc;
}
#endif