Tidy up hardware crc32 implementations
This commit is contained in:
parent
9566553856
commit
730f4f45ef
10 changed files with 217 additions and 239 deletions
|
@ -69,7 +69,7 @@ namespace wren {
|
||||||
constexpr inline auto g_polynomial_table = PolynomialTable<Polynomial>::table;
|
constexpr inline auto g_polynomial_table = PolynomialTable<Polynomial>::table;
|
||||||
|
|
||||||
template <std::uint32_t Polynomial, std::uint32_t XorIn=0xffffffff, std::uint32_t XorOut=XorIn>
|
template <std::uint32_t Polynomial, std::uint32_t XorIn=0xffffffff, std::uint32_t XorOut=XorIn>
|
||||||
constexpr std::uint32_t crc32 (const char* data, std::size_t len, std::uint32_t crc) {
|
constexpr std::uint32_t crc32 (const char* data, std::size_t len, std::uint32_t crc) noexcept {
|
||||||
//static_assert(g_polynomial_table<Polynomial>[0b10000000] == Polynomial);
|
//static_assert(g_polynomial_table<Polynomial>[0b10000000] == Polynomial);
|
||||||
crc ^= XorIn;
|
crc ^= XorIn;
|
||||||
for (std::size_t z = 0; z < len; ++z) {
|
for (std::size_t z = 0; z < len; ++z) {
|
||||||
|
|
|
@ -48,7 +48,6 @@ endif
|
||||||
conf.set('POINTER_SIZE', ptr_size)
|
conf.set('POINTER_SIZE', ptr_size)
|
||||||
conf.set('FUNC_POINTER_SIZE', func_ptr_size)
|
conf.set('FUNC_POINTER_SIZE', func_ptr_size)
|
||||||
conf.set('WRENPP_NAME', meson.project_name())
|
conf.set('WRENPP_NAME', meson.project_name())
|
||||||
conf.set('WRENPP_WITH_SSE42', get_option('wrenpp_with_sse42'))
|
|
||||||
|
|
||||||
subdir('include')
|
subdir('include')
|
||||||
subdir('src')
|
subdir('src')
|
||||||
|
|
|
@ -3,4 +3,3 @@ option('build_examples', type: 'boolean', value: false)
|
||||||
option('wren_with_rand', type: 'boolean', value: false)
|
option('wren_with_rand', type: 'boolean', value: false)
|
||||||
option('wren_with_meta', type: 'boolean', value: false)
|
option('wren_with_meta', type: 'boolean', value: false)
|
||||||
option('wrenpp_with_name_guessing', type: 'boolean', value: true)
|
option('wrenpp_with_name_guessing', type: 'boolean', value: true)
|
||||||
option('wrenpp_with_sse42', type: 'boolean', value: true)
|
|
||||||
|
|
|
@ -16,103 +16,44 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "wrenpp/detail/crc32.hpp"
|
#include "wrenpp/detail/crc32.hpp"
|
||||||
#include "pvt_config.h"
|
#include "crc32_config.h"
|
||||||
#if defined(WRENPP_WITH_SSE42)
|
|
||||||
# if defined(__aarch64__)
|
|
||||||
extern "C" {
|
|
||||||
# include "sse2neon.h"
|
|
||||||
# include <sys/auxv.h>
|
|
||||||
# include <asm/hwcap.h>
|
|
||||||
} // extern C
|
|
||||||
# else
|
|
||||||
# include <smmintrin.h>
|
|
||||||
# include <immintrin.h>
|
|
||||||
# endif
|
|
||||||
#endif
|
|
||||||
#include <cassert>
|
|
||||||
|
|
||||||
#if defined(WRENPP_WITH_SSE42)
|
namespace wren::detail {
|
||||||
// Byte-boundary alignment issues
|
#if HAVE_NEON
|
||||||
# define CALC_CRC(op, crc, type, buf, len) \
|
[[gnu::const]] bool has_crc32_neon() noexcept;
|
||||||
do { \
|
[[gnu::pure]] std::uint32_t crc32c_neon(const char*, std::size_t, std::uint32_t) noexcept;
|
||||||
for (; (len) >= sizeof (type); (len) -= sizeof(type), buf += sizeof (type)) { \
|
|
||||||
(crc) = op((crc), *(type *) (buf)); \
|
|
||||||
} \
|
|
||||||
} while(0)
|
|
||||||
#endif
|
#endif
|
||||||
|
#if HAVE_SSE42
|
||||||
|
[[gnu::const]] bool has_crc32_sse42() noexcept;
|
||||||
|
[[gnu::pure]] std::uint32_t crc32c_sse42(const char*, std::size_t, std::uint32_t) noexcept;
|
||||||
|
#endif
|
||||||
|
} //namespace wren::detail
|
||||||
|
|
||||||
|
namespace wren::detail {
|
||||||
|
//Some useful polynomials:
|
||||||
|
// zlib: 0x04C11DB7
|
||||||
|
// castagnoli (intel): 0x1EDC6F41
|
||||||
|
|
||||||
namespace wren {
|
|
||||||
namespace {
|
namespace {
|
||||||
//Some useful polynomials:
|
|
||||||
// zlib: 0x04C11DB7
|
|
||||||
// castagnoli (intel): 0x1EDC6F41
|
|
||||||
|
|
||||||
/* Compute CRC-32C using the Intel hardware instruction. */
|
|
||||||
/* for better parallelization with bigger buffers see
|
|
||||||
http://www.drdobbs.com/parallel/fast-parallelized-crc-computation-using/229401411 */
|
|
||||||
std::uint32_t crc32c_hw(const char* input, std::size_t len, std::uint32_t crc)
|
|
||||||
{
|
|
||||||
#if defined(WRENPP_WITH_SSE42)
|
|
||||||
//see https://github.com/rurban/smhasher/blob/master/crc32_hw.c
|
|
||||||
constexpr std::size_t align_size = alignof(std::uint64_t);
|
|
||||||
constexpr std::size_t align_mask = align_size - 1;
|
|
||||||
|
|
||||||
// XOR the initial CRC with INT_MAX
|
|
||||||
//crc ^= 0xFFFFFFFF;
|
|
||||||
crc = ~crc;
|
|
||||||
|
|
||||||
// Align the input to the word boundary
|
|
||||||
for (; (len > 0) && (reinterpret_cast<std::uintptr_t>(input) & align_mask); len--, input++) {
|
|
||||||
crc = _mm_crc32_u8(crc, *input);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Blast off the CRC32 calculation
|
|
||||||
#if defined(__x86_64__) || defined(__aarch64__)
|
|
||||||
CALC_CRC(_mm_crc32_u64, crc, std::uint64_t, input, len);
|
|
||||||
#endif
|
|
||||||
CALC_CRC(_mm_crc32_u32, crc, std::uint32_t, input, len);
|
|
||||||
CALC_CRC(_mm_crc32_u16, crc, std::uint16_t, input, len);
|
|
||||||
CALC_CRC(_mm_crc32_u8, crc, std::uint8_t, input, len);
|
|
||||||
|
|
||||||
// Post-process the crc
|
|
||||||
return ~crc;
|
|
||||||
#else
|
|
||||||
static_cast<void>(input);
|
|
||||||
static_cast<void>(crc);
|
|
||||||
static_cast<void>(len);
|
|
||||||
assert(false); //not available in this build, this code should be unreachable
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
[[gnu::const]]
|
[[gnu::const]]
|
||||||
bool has_hw_crc32() {
|
auto best_crc32_function() {
|
||||||
#if defined(WRENPP_WITH_SSE42)
|
#if HAVE_NEON
|
||||||
# if defined(__amd64__) || defined(_M_AMD64)
|
if (has_crc32_neon())
|
||||||
//if (_may_i_use_cpu_feature(_FEATURE_SSE4_2)) {
|
return &crc32c_neon;
|
||||||
if (__builtin_cpu_supports("sse4.2"))
|
|
||||||
return true;
|
|
||||||
else
|
|
||||||
# elif defined(__aarch64__)
|
|
||||||
//see
|
|
||||||
//https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu
|
|
||||||
if (getauxval(AT_HWCAP) & HWCAP_CRC32)
|
|
||||||
return true;
|
|
||||||
else
|
|
||||||
# endif
|
|
||||||
#endif
|
#endif
|
||||||
return false;
|
|
||||||
|
#if HAVE_SSE42
|
||||||
|
if (has_crc32_sse42())
|
||||||
|
return &crc32c_sse42;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return &crc32<detail::g_castagnoli_polynomial>;
|
||||||
}
|
}
|
||||||
} //unnamed namespace
|
} //unnamed namespace
|
||||||
|
|
||||||
namespace detail {
|
[[gnu::const]]
|
||||||
[[gnu::const]]
|
std::uint32_t runtime_crc32c (const char* data, std::size_t size, std::uint32_t crc) {
|
||||||
std::uint32_t runtime_crc32c (const char* data, std::size_t size, std::uint32_t crc) {
|
static const auto crc32c_implem = best_crc32_function();
|
||||||
static const auto crc32c_implem = (has_hw_crc32() ?
|
return (*crc32c_implem)(data, size, crc);
|
||||||
&crc32c_hw
|
}
|
||||||
:
|
} //namespace wren::detail
|
||||||
&detail::crc32<detail::g_castagnoli_polynomial>
|
|
||||||
);
|
|
||||||
return (*crc32c_implem)(data, size, crc);
|
|
||||||
}
|
|
||||||
} //namespace detail
|
|
||||||
} //namespace wren
|
|
||||||
|
|
100
src/crc32/crc32_neon.cpp
Normal file
100
src/crc32/crc32_neon.cpp
Normal file
|
@ -0,0 +1,100 @@
|
||||||
|
/* Copyright 2020-2022, Michele Santullo
|
||||||
|
* This file is part of wrenpp.
|
||||||
|
*
|
||||||
|
* Wrenpp is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* Wrenpp is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with wrenpp. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <sys/auxv.h>
|
||||||
|
#include <asm/hwcap.h>
|
||||||
|
#include <cstdint>
|
||||||
|
|
||||||
|
// Byte-boundary alignment issues
|
||||||
|
#define CALC_CRC(op, crc, type, buf, len) \
|
||||||
|
do { \
|
||||||
|
for (; (len) >= sizeof (type); (len) -= sizeof(type), buf += sizeof (type)) { \
|
||||||
|
(crc) = op((crc), *(type *) (buf)); \
|
||||||
|
} \
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
namespace wren::detail {
|
||||||
|
//King_DuckZ - adapted from
|
||||||
|
//https://github.com/rurban/smhasher/blob/master/sse2neon.h
|
||||||
|
namespace {
|
||||||
|
[[gnu::always_inline]]
|
||||||
|
std::uint32_t neon_crc32cb(std::uint32_t crc, std::uint8_t v) {
|
||||||
|
__asm__ __volatile__("crc32cb %w[c], %w[c], %w[v]\n\t"
|
||||||
|
: [c] "+r"(crc)
|
||||||
|
: [v] "r"(v));
|
||||||
|
}
|
||||||
|
|
||||||
|
[[gnu::always_inline]]
|
||||||
|
std::uint32_t neon_crc32ch (std::uint32_t crc, std::uint16_t v) {
|
||||||
|
__asm__ __volatile__("crc32ch %w[c], %w[c], %w[v]\n\t"
|
||||||
|
: [c] "+r"(crc)
|
||||||
|
: [v] "r"(v));
|
||||||
|
}
|
||||||
|
|
||||||
|
[[gnu::always_inline]]
|
||||||
|
std::uint32_t neon_crc32cw(std::uint32_t crc, std::uint32_t v) {
|
||||||
|
__asm__ __volatile__("crc32cw %w[c], %w[c], %w[v]\n\t"
|
||||||
|
: [c] "+r"(crc)
|
||||||
|
: [v] "r"(v));
|
||||||
|
}
|
||||||
|
|
||||||
|
[[gnu::always_inline]]
|
||||||
|
std::uint64_t neon_crc32cx (std::uint64_t crc, std::uint64_t v) {
|
||||||
|
__asm__ __volatile__("crc32cx %w[c], %w[c], %x[v]\n\t"
|
||||||
|
: [c] "+r"(crc)
|
||||||
|
: [v] "r"(v));
|
||||||
|
}
|
||||||
|
} //unnamed namespace
|
||||||
|
|
||||||
|
[[gnu::const]]
|
||||||
|
bool has_crc32_neon() noexcept {
|
||||||
|
//see
|
||||||
|
//https://community.arm.com/arm-community-blogs/b/operating-systems-blog/posts/runtime-detection-of-cpu-features-on-an-armv8-a-cpu
|
||||||
|
const bool has_hw_crc32 = (getauxval(AT_HWCAP) & HWCAP_CRC32 ? true : false);
|
||||||
|
return has_hw_crc32;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Compute CRC-32C using the Intel hardware instruction. */
|
||||||
|
/* for better parallelization with bigger buffers see
|
||||||
|
http://www.drdobbs.com/parallel/fast-parallelized-crc-computation-using/229401411 */
|
||||||
|
[[gnu::pure]]
|
||||||
|
std::uint32_t crc32c_neon(const char* input, std::size_t len, std::uint32_t crc) noexcept {
|
||||||
|
//see https://github.com/rurban/smhasher/blob/master/crc32_hw.c
|
||||||
|
constexpr std::size_t align_size = alignof(std::uint64_t);
|
||||||
|
constexpr std::size_t align_mask = align_size - 1;
|
||||||
|
|
||||||
|
// XOR the initial CRC with INT_MAX
|
||||||
|
//crc ^= 0xFFFFFFFF;
|
||||||
|
crc = ~crc;
|
||||||
|
|
||||||
|
// Align the input to the word boundary
|
||||||
|
for (; (len > 0) && (reinterpret_cast<std::uintptr_t>(input) & align_mask); len--, input++) {
|
||||||
|
crc = neon_crc32cb(crc, *input);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Blast off the CRC32 calculation
|
||||||
|
#if defined(__x86_64__) || defined(__aarch64__)
|
||||||
|
CALC_CRC(neon_crc32cx, crc, std::uint64_t, input, len);
|
||||||
|
#endif
|
||||||
|
CALC_CRC(neon_crc32cw, crc, std::uint32_t, input, len);
|
||||||
|
CALC_CRC(neon_crc32ch, crc, std::uint16_t, input, len);
|
||||||
|
CALC_CRC(neon_crc32cb, crc, std::uint8_t, input, len);
|
||||||
|
|
||||||
|
// Post-process the crc
|
||||||
|
return ~crc;
|
||||||
|
}
|
||||||
|
} //namespace wren::detail
|
66
src/crc32/crc32_sse42.cpp
Normal file
66
src/crc32/crc32_sse42.cpp
Normal file
|
@ -0,0 +1,66 @@
|
||||||
|
/* Copyright 2020-2022, Michele Santullo
|
||||||
|
* This file is part of wrenpp.
|
||||||
|
*
|
||||||
|
* Wrenpp is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* Wrenpp is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with wrenpp. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <smmintrin.h>
|
||||||
|
#include <immintrin.h>
|
||||||
|
#include <cstdint>
|
||||||
|
|
||||||
|
// Byte-boundary alignment issues
|
||||||
|
#define CALC_CRC(op, crc, type, buf, len) \
|
||||||
|
do { \
|
||||||
|
for (; (len) >= sizeof (type); (len) -= sizeof(type), buf += sizeof (type)) { \
|
||||||
|
(crc) = op((crc), *(type *) (buf)); \
|
||||||
|
} \
|
||||||
|
} while(0)
|
||||||
|
|
||||||
|
namespace wren::detail {
|
||||||
|
[[gnu::const]]
|
||||||
|
bool has_crc32_sse42() noexcept {
|
||||||
|
const bool has_hw_crc32 = (__builtin_cpu_supports("sse4.2") ? true : false);
|
||||||
|
return has_hw_crc32;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Compute CRC-32C using the Intel hardware instruction. */
|
||||||
|
/* for better parallelization with bigger buffers see
|
||||||
|
http://www.drdobbs.com/parallel/fast-parallelized-crc-computation-using/229401411 */
|
||||||
|
[[gnu::pure]]
|
||||||
|
std::uint32_t crc32c_sse42(const char* input, std::size_t len, std::uint32_t crc) noexcept {
|
||||||
|
//see https://github.com/rurban/smhasher/blob/master/crc32_hw.c
|
||||||
|
constexpr std::size_t align_size = alignof(std::uint64_t);
|
||||||
|
constexpr std::size_t align_mask = align_size - 1;
|
||||||
|
|
||||||
|
// XOR the initial CRC with INT_MAX
|
||||||
|
//crc ^= 0xFFFFFFFF;
|
||||||
|
crc = ~crc;
|
||||||
|
|
||||||
|
// Align the input to the word boundary
|
||||||
|
for (; (len > 0) && (reinterpret_cast<std::uintptr_t>(input) & align_mask); len--, input++) {
|
||||||
|
crc = _mm_crc32_u8(crc, *input);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Blast off the CRC32 calculation
|
||||||
|
#if defined(__x86_64__) || defined(__aarch64__)
|
||||||
|
CALC_CRC(_mm_crc32_u64, crc, std::uint64_t, input, len);
|
||||||
|
#endif
|
||||||
|
CALC_CRC(_mm_crc32_u32, crc, std::uint32_t, input, len);
|
||||||
|
CALC_CRC(_mm_crc32_u16, crc, std::uint16_t, input, len);
|
||||||
|
CALC_CRC(_mm_crc32_u8, crc, std::uint8_t, input, len);
|
||||||
|
|
||||||
|
// Post-process the crc
|
||||||
|
return ~crc;
|
||||||
|
}
|
||||||
|
} //namespace wren::detail
|
|
@ -1,5 +1,21 @@
|
||||||
|
simd = import('unstable-simd')
|
||||||
compiler_opts = []
|
compiler_opts = []
|
||||||
|
|
||||||
|
cpp = meson.get_compiler('cpp')
|
||||||
|
crc32_simd = simd.check('crc32_hw',
|
||||||
|
sse42: 'crc32_sse42.cpp',
|
||||||
|
neon: 'crc32_neon.cpp',
|
||||||
|
compiler: cpp,
|
||||||
|
)
|
||||||
|
|
||||||
|
crc32_objs = crc32_simd[0]
|
||||||
|
crc32_config = crc32_simd[1]
|
||||||
|
|
||||||
|
project_config_file = configure_file(
|
||||||
|
output: 'crc32_config.h',
|
||||||
|
configuration: crc32_config
|
||||||
|
)
|
||||||
|
|
||||||
if get_option('wrenpp_with_sse42')
|
if get_option('wrenpp_with_sse42')
|
||||||
if arch == 'amd64'
|
if arch == 'amd64'
|
||||||
compiler_opts += ['-msse4.2']
|
compiler_opts += ['-msse4.2']
|
||||||
|
@ -12,9 +28,10 @@ endif
|
||||||
|
|
||||||
crc32 = static_library('crc32',
|
crc32 = static_library('crc32',
|
||||||
'crc32.cpp',
|
'crc32.cpp',
|
||||||
include_directories: [public_incl, src_incl],
|
include_directories: [public_incl],
|
||||||
install: false,
|
install: false,
|
||||||
cpp_args: compiler_opts + global_compiler_opts,
|
cpp_args: compiler_opts + global_compiler_opts,
|
||||||
|
link_with: crc32_objs,
|
||||||
)
|
)
|
||||||
|
|
||||||
crc32_dep = declare_dependency(
|
crc32_dep = declare_dependency(
|
||||||
|
|
|
@ -1,141 +0,0 @@
|
||||||
#ifndef SSE2NEON_H
|
|
||||||
#define SSE2NEON_H
|
|
||||||
|
|
||||||
// This header file provides a simple API translation layer
|
|
||||||
// between SSE intrinsics to their corresponding Arm/Aarch64 NEON versions
|
|
||||||
//
|
|
||||||
// This header file does not yet translate all of the SSE intrinsics.
|
|
||||||
//
|
|
||||||
// Contributors to this work are:
|
|
||||||
// John W. Ratcliff <jratcliffscarab@gmail.com>
|
|
||||||
// Brandon Rowlett <browlett@nvidia.com>
|
|
||||||
// Ken Fast <kfast@gdeb.com>
|
|
||||||
// Eric van Beurden <evanbeurden@nvidia.com>
|
|
||||||
// Alexander Potylitsin <apotylitsin@nvidia.com>
|
|
||||||
// Hasindu Gamaarachchi <hasindu2008@gmail.com>
|
|
||||||
// Jim Huang <jserv@biilabs.io>
|
|
||||||
// Mark Cheng <marktwtn@biilabs.io>
|
|
||||||
// Malcolm James MacLeod <malcolm@gulden.com>
|
|
||||||
// Devin Hussey (easyaspi314) <husseydevin@gmail.com>
|
|
||||||
// Sebastian Pop <spop@amazon.com>
|
|
||||||
// Developer Ecosystem Engineering <DeveloperEcosystemEngineering@apple.com>
|
|
||||||
// Danila Kutenin <danilak@google.com>
|
|
||||||
// François Turban (JishinMaster) <francois.turban@gmail.com>
|
|
||||||
// Pei-Hsuan Hung <afcidk@gmail.com>
|
|
||||||
// Yang-Hao Yuan <yanghau@biilabs.io>
|
|
||||||
// Syoyo Fujita <syoyo@lighttransport.com>
|
|
||||||
// Brecht Van Lommel <brecht@blender.org>
|
|
||||||
|
|
||||||
/*
|
|
||||||
* sse2neon is freely redistributable under the MIT License.
|
|
||||||
*
|
|
||||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
||||||
* of this software and associated documentation files (the "Software"), to deal
|
|
||||||
* in the Software without restriction, including without limitation the rights
|
|
||||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
||||||
* copies of the Software, and to permit persons to whom the Software is
|
|
||||||
* furnished to do so, subject to the following conditions:
|
|
||||||
*
|
|
||||||
* The above copyright notice and this permission notice shall be included in
|
|
||||||
* all copies or substantial portions of the Software.
|
|
||||||
*
|
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
||||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
||||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
||||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
||||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
||||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
||||||
* SOFTWARE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
//King_DuckZ
|
|
||||||
//trimmed down version of
|
|
||||||
//https://github.com/rurban/smhasher/blob/master/sse2neon.h
|
|
||||||
|
|
||||||
#if defined(__GNUC__) || defined(__clang__)
|
|
||||||
#pragma push_macro("FORCE_INLINE")
|
|
||||||
#pragma push_macro("ALIGN_STRUCT")
|
|
||||||
#ifndef FORCE_INLINE
|
|
||||||
#define FORCE_INLINE static inline __attribute__((always_inline))
|
|
||||||
#endif
|
|
||||||
#else
|
|
||||||
#error "Macro name collisions may happen with unsupported compiler."
|
|
||||||
#ifdef FORCE_INLINE
|
|
||||||
#undef FORCE_INLINE
|
|
||||||
#endif
|
|
||||||
#define FORCE_INLINE static inline
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#include <stdint.h>
|
|
||||||
#include <stdlib.h>
|
|
||||||
|
|
||||||
// Starting with the initial value in crc, accumulates a CRC32 value for
|
|
||||||
// unsigned 8-bit integer v.
|
|
||||||
// https://msdn.microsoft.com/en-us/library/bb514036(v=vs.100)
|
|
||||||
FORCE_INLINE uint32_t _mm_crc32_u8(uint32_t crc, uint8_t v)
|
|
||||||
{
|
|
||||||
#if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32)
|
|
||||||
__asm__ __volatile__("crc32cb %w[c], %w[c], %w[v]\n\t"
|
|
||||||
: [c] "+r"(crc)
|
|
||||||
: [v] "r"(v));
|
|
||||||
#else
|
|
||||||
crc ^= v;
|
|
||||||
for (int bit = 0; bit < 8; bit++) {
|
|
||||||
if (crc & 1)
|
|
||||||
crc = (crc >> 1) ^ UINT32_C(0x82f63b78);
|
|
||||||
else
|
|
||||||
crc = (crc >> 1);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
return crc;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Starting with the initial value in crc, accumulates a CRC32 value for
|
|
||||||
// unsigned 16-bit integer v.
|
|
||||||
// https://msdn.microsoft.com/en-us/library/bb531411(v=vs.100)
|
|
||||||
FORCE_INLINE uint32_t _mm_crc32_u16(uint32_t crc, uint16_t v)
|
|
||||||
{
|
|
||||||
#if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32)
|
|
||||||
__asm__ __volatile__("crc32ch %w[c], %w[c], %w[v]\n\t"
|
|
||||||
: [c] "+r"(crc)
|
|
||||||
: [v] "r"(v));
|
|
||||||
#else
|
|
||||||
crc = _mm_crc32_u8(crc, v & 0xff);
|
|
||||||
crc = _mm_crc32_u8(crc, (v >> 8) & 0xff);
|
|
||||||
#endif
|
|
||||||
return crc;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Starting with the initial value in crc, accumulates a CRC32 value for
|
|
||||||
// unsigned 32-bit integer v.
|
|
||||||
// https://msdn.microsoft.com/en-us/library/bb531394(v=vs.100)
|
|
||||||
FORCE_INLINE uint32_t _mm_crc32_u32(uint32_t crc, uint32_t v)
|
|
||||||
{
|
|
||||||
#if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32)
|
|
||||||
__asm__ __volatile__("crc32cw %w[c], %w[c], %w[v]\n\t"
|
|
||||||
: [c] "+r"(crc)
|
|
||||||
: [v] "r"(v));
|
|
||||||
#else
|
|
||||||
crc = _mm_crc32_u16(crc, v & 0xffff);
|
|
||||||
crc = _mm_crc32_u16(crc, (v >> 16) & 0xffff);
|
|
||||||
#endif
|
|
||||||
return crc;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Starting with the initial value in crc, accumulates a CRC32 value for
|
|
||||||
// unsigned 64-bit integer v.
|
|
||||||
// https://msdn.microsoft.com/en-us/library/bb514033(v=vs.100)
|
|
||||||
FORCE_INLINE uint64_t _mm_crc32_u64(uint64_t crc, uint64_t v)
|
|
||||||
{
|
|
||||||
#if defined(__aarch64__) && defined(__ARM_FEATURE_CRC32)
|
|
||||||
__asm__ __volatile__("crc32cx %w[c], %w[c], %x[v]\n\t"
|
|
||||||
: [c] "+r"(crc)
|
|
||||||
: [v] "r"(v));
|
|
||||||
#else
|
|
||||||
crc = _mm_crc32_u32((uint32_t)(crc), v & 0xffffffff);
|
|
||||||
crc = _mm_crc32_u32((uint32_t)(crc), (v >> 32) & 0xffffffff);
|
|
||||||
#endif
|
|
||||||
return crc;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
|
@ -1,4 +1,3 @@
|
||||||
src_incl = include_directories('.')
|
|
||||||
subdir('crc32')
|
subdir('crc32')
|
||||||
|
|
||||||
project_config_file = configure_file(
|
project_config_file = configure_file(
|
||||||
|
|
|
@ -27,5 +27,3 @@ static_assert(sizeof(void*) == ASM_PTR_SIZE, "Build system reports an unexpected
|
||||||
|
|
||||||
static_assert(sizeof(void(*)(int)) == ASM_FUNC_PTR_SIZE, "Build system reports an unexpected function pointer size, please ensure assembly code is correct");
|
static_assert(sizeof(void(*)(int)) == ASM_FUNC_PTR_SIZE, "Build system reports an unexpected function pointer size, please ensure assembly code is correct");
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#mesondefine WRENPP_WITH_SSE42
|
|
||||||
|
|
Loading…
Reference in a new issue