Possible build fix for aarch64
This commit is contained in:
parent
b3ecb69ec0
commit
f4b3600ee5
4 changed files with 163 additions and 4 deletions
|
@ -29,9 +29,9 @@ namespace wren {
|
||||||
class ModuleAndName;
|
class ModuleAndName;
|
||||||
|
|
||||||
namespace detail {
|
namespace detail {
|
||||||
template <dhandy::bt::string S>
|
template <dhandy::bt::string Str>
|
||||||
struct ModuleAndNameStaticStorage {
|
struct ModuleAndNameStaticStorage {
|
||||||
static constexpr const auto value = S;
|
static constexpr dhandy::bt::string value = Str;
|
||||||
};
|
};
|
||||||
|
|
||||||
[[gnu::const]]
|
[[gnu::const]]
|
||||||
|
@ -148,7 +148,9 @@ namespace wren {
|
||||||
template <dhandy::bt::string S1, dhandy::bt::string S2>
|
template <dhandy::bt::string S1, dhandy::bt::string S2>
|
||||||
consteval ModuleAndName make_module_and_name() noexcept {
|
consteval ModuleAndName make_module_and_name() noexcept {
|
||||||
using dhandy::bt::string;
|
using dhandy::bt::string;
|
||||||
using StaticStorage = detail::ModuleAndNameStaticStorage<S1 + string("\0") + S2>;
|
using detail::ModuleAndNameStaticStorage;
|
||||||
|
constexpr string null_char{"\0"};
|
||||||
|
using StaticStorage = ModuleAndNameStaticStorage<S1 + null_char + S2>;
|
||||||
|
|
||||||
constexpr const char* data = StaticStorage::value.data();
|
constexpr const char* data = StaticStorage::value.data();
|
||||||
constexpr std::uint16_t s1_len = static_cast<std::uint16_t>(S1.size());
|
constexpr std::uint16_t s1_len = static_cast<std::uint16_t>(S1.size());
|
||||||
|
|
|
@ -45,7 +45,11 @@ if get_option('wrenpp_with_name_guessing')
|
||||||
compiler_opts += ['-DWRENPP_WITH_NAME_GUESSING']
|
compiler_opts += ['-DWRENPP_WITH_NAME_GUESSING']
|
||||||
endif
|
endif
|
||||||
if get_option('wrenpp_with_sse42')
|
if get_option('wrenpp_with_sse42')
|
||||||
compiler_opts += ['-msse4.2']
|
if arch == 'amd64'
|
||||||
|
compiler_opts += ['-msse4.2']
|
||||||
|
elif arch == 'aarch64'
|
||||||
|
compiler_opts += ['-mcpu=generic+crc']
|
||||||
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
conf.set('POINTER_SIZE', ptr_size)
|
conf.set('POINTER_SIZE', ptr_size)
|
||||||
|
|
|
@ -19,7 +19,11 @@
|
||||||
#include "pvt_config.h"
|
#include "pvt_config.h"
|
||||||
#if defined(WRENPP_WITH_SSE42)
|
#if defined(WRENPP_WITH_SSE42)
|
||||||
# if defined(__aarch64__)
|
# if defined(__aarch64__)
|
||||||
|
extern "C" {
|
||||||
# include "sse2neon.h"
|
# include "sse2neon.h"
|
||||||
|
# include <sys/auxv.h>
|
||||||
|
# include <asm/hwcap.h>
|
||||||
|
} // extern C
|
||||||
# else
|
# else
|
||||||
# include <smmintrin.h>
|
# include <smmintrin.h>
|
||||||
# include <immintrin.h>
|
# include <immintrin.h>
|
||||||
|
@ -83,10 +87,18 @@ namespace wren {
|
||||||
[[gnu::const]]
|
[[gnu::const]]
|
||||||
bool has_hw_crc32() {
|
bool has_hw_crc32() {
|
||||||
#if defined(WRENPP_WITH_SSE42)
|
#if defined(WRENPP_WITH_SSE42)
|
||||||
|
# if defined(__amd64__) || defined(_M_AMD64)
|
||||||
//if (_may_i_use_cpu_feature(_FEATURE_SSE4_2)) {
|
//if (_may_i_use_cpu_feature(_FEATURE_SSE4_2)) {
|
||||||
if (__builtin_cpu_supports("sse4.2"))
|
if (__builtin_cpu_supports("sse4.2"))
|
||||||
return true;
|
return true;
|
||||||
else
|
else
|
||||||
|
# elif defined(__aarch64__)
|
||||||
|
//see
|
||||||
|
//https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu
|
||||||
|
if (getauxval(AT_HWCAP) & HWCAP_CRC32)
|
||||||
|
return true;
|
||||||
|
else
|
||||||
|
# endif
|
||||||
#endif
|
#endif
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
141
src/sse2neon.h
Normal file
141
src/sse2neon.h
Normal file
|
@ -0,0 +1,141 @@
|
||||||
|
#ifndef SSE2NEON_H
|
||||||
|
#define SSE2NEON_H
|
||||||
|
|
||||||
|
// This header file provides a simple API translation layer
|
||||||
|
// between SSE intrinsics to their corresponding Arm/Aarch64 NEON versions
|
||||||
|
//
|
||||||
|
// This header file does not yet translate all of the SSE intrinsics.
|
||||||
|
//
|
||||||
|
// Contributors to this work are:
|
||||||
|
// John W. Ratcliff <jratcliffscarab@gmail.com>
|
||||||
|
// Brandon Rowlett <browlett@nvidia.com>
|
||||||
|
// Ken Fast <kfast@gdeb.com>
|
||||||
|
// Eric van Beurden <evanbeurden@nvidia.com>
|
||||||
|
// Alexander Potylitsin <apotylitsin@nvidia.com>
|
||||||
|
// Hasindu Gamaarachchi <hasindu2008@gmail.com>
|
||||||
|
// Jim Huang <jserv@biilabs.io>
|
||||||
|
// Mark Cheng <marktwtn@biilabs.io>
|
||||||
|
// Malcolm James MacLeod <malcolm@gulden.com>
|
||||||
|
// Devin Hussey (easyaspi314) <husseydevin@gmail.com>
|
||||||
|
// Sebastian Pop <spop@amazon.com>
|
||||||
|
// Developer Ecosystem Engineering <DeveloperEcosystemEngineering@apple.com>
|
||||||
|
// Danila Kutenin <danilak@google.com>
|
||||||
|
// François Turban (JishinMaster) <francois.turban@gmail.com>
|
||||||
|
// Pei-Hsuan Hung <afcidk@gmail.com>
|
||||||
|
// Yang-Hao Yuan <yanghau@biilabs.io>
|
||||||
|
// Syoyo Fujita <syoyo@lighttransport.com>
|
||||||
|
// Brecht Van Lommel <brecht@blender.org>
|
||||||
|
|
||||||
|
/*
|
||||||
|
* sse2neon is freely redistributable under the MIT License.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in
|
||||||
|
* all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
* SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
//King_DuckZ
|
||||||
|
//trimmed down version of
|
||||||
|
//https://github.com/rurban/smhasher/blob/master/sse2neon.h
|
||||||
|
|
||||||
|
#if defined(__GNUC__) || defined(__clang__)
|
||||||
|
#pragma push_macro("FORCE_INLINE")
|
||||||
|
#pragma push_macro("ALIGN_STRUCT")
|
||||||
|
#ifndef FORCE_INLINE
|
||||||
|
#define FORCE_INLINE static inline __attribute__((always_inline))
|
||||||
|
#endif
|
||||||
|
#else
|
||||||
|
#error "Macro name collisions may happen with unsupported compiler."
|
||||||
|
#ifdef FORCE_INLINE
|
||||||
|
#undef FORCE_INLINE
|
||||||
|
#endif
|
||||||
|
#define FORCE_INLINE static inline
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
// Starting with the initial value in crc, accumulates a CRC32 value for
|
||||||
|
// unsigned 8-bit integer v.
|
||||||
|
// https://msdn.microsoft.com/en-us/library/bb514036(v=vs.100)
|
||||||
|
FORCE_INLINE uint32_t _mm_crc32_u8(uint32_t crc, uint8_t v)
|
||||||
|
{
|
||||||
|
#if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32)
|
||||||
|
__asm__ __volatile__("crc32cb %w[c], %w[c], %w[v]\n\t"
|
||||||
|
: [c] "+r"(crc)
|
||||||
|
: [v] "r"(v));
|
||||||
|
#else
|
||||||
|
crc ^= v;
|
||||||
|
for (int bit = 0; bit < 8; bit++) {
|
||||||
|
if (crc & 1)
|
||||||
|
crc = (crc >> 1) ^ UINT32_C(0x82f63b78);
|
||||||
|
else
|
||||||
|
crc = (crc >> 1);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
return crc;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Starting with the initial value in crc, accumulates a CRC32 value for
|
||||||
|
// unsigned 16-bit integer v.
|
||||||
|
// https://msdn.microsoft.com/en-us/library/bb531411(v=vs.100)
|
||||||
|
FORCE_INLINE uint32_t _mm_crc32_u16(uint32_t crc, uint16_t v)
|
||||||
|
{
|
||||||
|
#if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32)
|
||||||
|
__asm__ __volatile__("crc32ch %w[c], %w[c], %w[v]\n\t"
|
||||||
|
: [c] "+r"(crc)
|
||||||
|
: [v] "r"(v));
|
||||||
|
#else
|
||||||
|
crc = _mm_crc32_u8(crc, v & 0xff);
|
||||||
|
crc = _mm_crc32_u8(crc, (v >> 8) & 0xff);
|
||||||
|
#endif
|
||||||
|
return crc;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Starting with the initial value in crc, accumulates a CRC32 value for
|
||||||
|
// unsigned 32-bit integer v.
|
||||||
|
// https://msdn.microsoft.com/en-us/library/bb531394(v=vs.100)
|
||||||
|
FORCE_INLINE uint32_t _mm_crc32_u32(uint32_t crc, uint32_t v)
|
||||||
|
{
|
||||||
|
#if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32)
|
||||||
|
__asm__ __volatile__("crc32cw %w[c], %w[c], %w[v]\n\t"
|
||||||
|
: [c] "+r"(crc)
|
||||||
|
: [v] "r"(v));
|
||||||
|
#else
|
||||||
|
crc = _mm_crc32_u16(crc, v & 0xffff);
|
||||||
|
crc = _mm_crc32_u16(crc, (v >> 16) & 0xffff);
|
||||||
|
#endif
|
||||||
|
return crc;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Starting with the initial value in crc, accumulates a CRC32 value for
|
||||||
|
// unsigned 64-bit integer v.
|
||||||
|
// https://msdn.microsoft.com/en-us/library/bb514033(v=vs.100)
|
||||||
|
FORCE_INLINE uint64_t _mm_crc32_u64(uint64_t crc, uint64_t v)
|
||||||
|
{
|
||||||
|
#if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32)
|
||||||
|
__asm__ __volatile__("crc32cx %w[c], %w[c], %x[v]\n\t"
|
||||||
|
: [c] "+r"(crc)
|
||||||
|
: [v] "r"(v));
|
||||||
|
#else
|
||||||
|
crc = _mm_crc32_u32((uint32_t)(crc), v & 0xffffffff);
|
||||||
|
crc = _mm_crc32_u32((uint32_t)(crc), (v >> 32) & 0xffffffff);
|
||||||
|
#endif
|
||||||
|
return crc;
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
Loading…
Reference in a new issue