Replace crc32 implementation

This is a properly constexpr version for modern c++. Also
added a fast runtime version of it.
This commit is contained in:
King_DuckZ 2022-05-23 04:19:54 +02:00
commit 6a47be250f
8 changed files with 199 additions and 152 deletions

92
src/crc32.cpp Normal file
View file

@ -0,0 +1,92 @@
/* Copyright 2020-2022, Michele Santullo
* This file is part of wrenpp.
*
* Wrenpp is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Wrenpp is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with wrenpp. If not, see <http://www.gnu.org/licenses/>.
*/
#include "wrenpp/detail/crc32.hpp"
#include "pvt_config.h"
#if defined(WRENPP_WITH_SSE42)
# if defined(__aarch64__)
# include "sse2neon.h"
# else
# include <smmintrin.h>
# include <immintrin.h>
# endif
#endif
#if defined(WRENPP_WITH_SSE42)
// Byte-boundary alignment issues
# define CALC_CRC(op, crc, type, buf, len) \
do { \
for (; (len) >= sizeof (type); (len) -= sizeof(type), buf += sizeof (type)) { \
(crc) = op((crc), *(type *) (buf)); \
} \
} while(0)
#endif
namespace wren {
namespace {
//Some useful polynomials:
// zlib: 0x04C11DB7
// castagnoli (intel): 0x1EDC6F41
#if defined(WRENPP_WITH_SSE42)
/* Compute CRC-32C using the Intel hardware instruction. */
/* for better parallelization with bigger buffers see
http://www.drdobbs.com/parallel/fast-parallelized-crc-computation-using/229401411 */
std::uint32_t crc32c_hw(const void *input, std::size_t len, std::uint32_t crc)
{
//see https://github.com/rurban/smhasher/blob/master/crc32_hw.c
constexpr std::size_t align_size = alignof(std::uint64_t);
constexpr std::size_t align_mask = align_size - 1;
const char* buf = static_cast<const char*>(input);
// XOR the initial CRC with INT_MAX
crc ^= 0xFFFFFFFF;
// Align the input to the word boundary
for (; (len > 0) && (reinterpret_cast<std::uintptr_t>(buf) & align_mask); len--, buf++) {
crc = _mm_crc32_u8(crc, *buf);
}
// Blast off the CRC32 calculation
#if defined(__x86_64__) || defined(__aarch64__)
CALC_CRC(_mm_crc32_u64, crc, std::uint64_t, buf, len);
#endif
CALC_CRC(_mm_crc32_u32, crc, std::uint32_t, buf, len);
CALC_CRC(_mm_crc32_u16, crc, std::uint16_t, buf, len);
CALC_CRC(_mm_crc32_u8, crc, std::uint8_t, buf, len);
// Post-process the crc
return ~crc;
}
#endif
} //unnamed namespace
[[gnu::const]]
std::uint32_t runtime_crc32c (const char* data, std::size_t size, std::uint32_t crc) {
#if defined(WRENPP_WITH_SSE42)
//if (_may_i_use_cpu_feature(_FEATURE_SSE4_2)) {
if (__builtin_cpu_supports("sse4.2")) {
return crc32c_hw(data, size, crc);
}
else
#endif
{
return detail::crc32<detail::g_castagnoli_polynomial>(data, size, crc);
}
}
} //namespace wren

View file

@ -27,3 +27,5 @@ static_assert(sizeof(void*) == ASM_PTR_SIZE, "Build system reports an unexpected
static_assert(sizeof(void(*)(int)) == ASM_FUNC_PTR_SIZE, "Build system reports an unexpected function pointer size, please ensure assembly code is correct");
#endif
#mesondefine WRENPP_WITH_SSE42