From 6a47be250f438fcb976c0e01facd76c559c2980e Mon Sep 17 00:00:00 2001 From: King_DuckZ Date: Mon, 23 May 2022 04:19:54 +0200 Subject: [PATCH] Replace crc32 implementation This is a properly constexpr version for modern c++. Also added a fast runtime version of it. --- include/wrenpp/detail/StringCRC32.hpp | 148 -------------------------- include/wrenpp/detail/crc32.hpp | 95 +++++++++++++++++ include/wrenpp/detail/meson.build | 2 +- include/wrenpp/vm.hpp | 6 +- meson.build | 5 + meson_options.txt | 1 + src/crc32.cpp | 92 ++++++++++++++++ src/pvt_config.h.in | 2 + 8 files changed, 199 insertions(+), 152 deletions(-) delete mode 100644 include/wrenpp/detail/StringCRC32.hpp create mode 100644 include/wrenpp/detail/crc32.hpp create mode 100644 src/crc32.cpp diff --git a/include/wrenpp/detail/StringCRC32.hpp b/include/wrenpp/detail/StringCRC32.hpp deleted file mode 100644 index fb67598..0000000 --- a/include/wrenpp/detail/StringCRC32.hpp +++ /dev/null @@ -1,148 +0,0 @@ -#ifndef id42C91D2875AE4E56BA61051619B58C03 -#define id42C91D2875AE4E56BA61051619B58C03 - -#include -#include - -namespace duckcore { - class StringCRC32 { - struct FnvHashBase { - [[gnu::pure]] [[gnu::always_inline]] - static uint32_t Calculate ( const char parChar, uint32_t parCrc ); - }; - - //Update step: crc_32_tab[(crc ^ ch) & 0xff] ^ (crc >> 8) - template - struct FnvHash : private FnvHashBase { - [[gnu::always_inline]] - static uint32_t Calculate ( const char (&str)[N], uint32_t crc ) { - const uint32_t prevcrc = FnvHash::Calculate(str, crc); - return FnvHashBase::Calculate(str[I-1], prevcrc); - } - }; - template - struct FnvHash : private FnvHashBase { - [[gnu::always_inline]] - static uint32_t Calculate ( const char (&str)[N], uint32_t crc ) { - return FnvHashBase::Calculate(str[0], crc); - } - }; - - class ConstCharWrapper { - public: - ConstCharWrapper ( const char* parStr ) : str(parStr) { } - const char* const str; - }; - public: - - StringCRC32 ( ConstCharWrapper parStr ); - - template - [[gnu::always_inline]] - StringCRC32 ( const char (&str)[N] ) : - m_hash(~FnvHash::Calculate(str, 0xffffffff)) - { - } - - operator uint32_t ( void ) const { return m_hash; } - - private: - uint32_t m_hash; - - private: - //This is meant to represent a list of bits that are set to 1 - template - struct BitArray { - enum { value = N }; - typedef T Next; - }; - //Polynomial from zlib: {0,1,2,4,5,7,8,10,11,12,16,22,23,26} or 0xedb88320UL -// for (int poly = 0, n = 0; n < (int)(sizeof(p)/sizeof(unsigned char)); n++) -// poly |= (z_crc_t)1 << (31 - p[n]); - typedef BitArray<0, BitArray<1, BitArray<2, BitArray<4, BitArray<5, - BitArray<7, BitArray<8, BitArray<10, BitArray<11, BitArray<12, - BitArray<16, BitArray<22, BitArray<23, - BitArray<26, std::nullptr_t> > > > > > > > > > > > > > PolynomialBits; - template - struct MakePolynomial { - enum { value = (1 << (M - P::value)) bitor MakePolynomial::value }; - }; - template - struct MakePolynomial { - enum { value = 0 }; - }; - - //CRC32 algorithm from zlib: -// for (int n = 0; n < 256; n++) { -// c = n; -// for (k = 0; k < 8; k++) c = c & 1 ? poly ^ (c >> 1) : c >> 1; -// crc_table[n] = c; -// } - template (MakePolynomial::value)> - struct TableLookup { - static_assert(S > 1); - enum { - poly = P, - value = TableLookup::value, 1, poly>::value - }; - }; - template - struct TableLookup { - enum { - poly = P, - value = ((C & 1) == 1 ? poly ^ (static_cast(C) >> 1) : static_cast(C) >> 1) - }; - }; - }; - - inline uint32_t StringCRC32::FnvHashBase::Calculate (const char parChar, uint32_t parCrc) { - static const uint32_t crc_32_tab[256] = { /* CRC polynomial 0xedb88320 */ - 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f, - 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, - 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, 0x1db71064, 0x6ab020f2, - 0xf3b97148, 0x84be41de, 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, - 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9, - 0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172, - 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, 0x35b5a8fa, 0x42b2986c, - 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59, - 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, - 0xcfba9599, 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, - 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190, 0x01db7106, - 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433, - 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, - 0x91646c97, 0xe6635c01, 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, - 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950, - 0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65, - 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, 0x4adfa541, 0x3dd895d7, - 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0, - 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa, - 0xbe0b1010, 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, - 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, 0x2eb40d81, - 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a, - 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, 0xe3630b12, 0x94643b84, - 0x0d6d6a3e, 0x7a6a5aa8, 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, - 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb, - 0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc, - 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, 0xd6d6a3e8, 0xa1d1937e, - 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b, - 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, - 0x316e8eef, 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, - 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, 0xb2bd0b28, - 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d, - 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, 0x9c0906a9, 0xeb0e363f, - 0x72076785, 0x05005713, 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, - 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242, - 0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777, - 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, 0x8f659eff, 0xf862ae69, - 0x616bffd3, 0x166ccf45, 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2, - 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc, - 0x40df0b66, 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, - 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, 0xcdd70693, - 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94, - 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d - }; - return crc_32_tab[(parCrc ^ parChar) & 0xff] ^ (parCrc >> 8); - } -} //namespace duckcore - -#endif diff --git a/include/wrenpp/detail/crc32.hpp b/include/wrenpp/detail/crc32.hpp new file mode 100644 index 0000000..8d38624 --- /dev/null +++ b/include/wrenpp/detail/crc32.hpp @@ -0,0 +1,95 @@ +/* Copyright 2020-2022, Michele Santullo + * This file is part of wrenpp. + * + * Wrenpp is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Wrenpp is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with wrenpp. If not, see . + */ + +#pragma once + +#include +#include +#include +#include + +namespace wren { + namespace detail { + [[gnu::const]] + std::uint32_t runtime_crc32c (const char* data, std::size_t size, std::uint32_t crc); + + constexpr std::uint32_t g_castagnoli_polynomial = 0x1EDC6F41; + + constexpr std::uint8_t reverse (std::uint8_t b) { + //see https://graphics.stanford.edu/~seander/bithacks.html#ReverseByteWith64Bits + return ((b * 0x80200802ULL) & 0x0884422110ULL) * 0x0101010101ULL >> 32; + } + + constexpr std::uint32_t reverse (std::uint32_t val) { + return (reverse(static_cast(val & 0xff)) << 24) | + (reverse(static_cast(val >> 8 & 0xff)) << 16) | + (reverse(static_cast(val >> 16 & 0xff)) << 8) | + reverse(static_cast(val >> 24 & 0xff)); + } + + consteval std::uint32_t calc_table_entry (std::uint32_t index, std::uint32_t polynomial) { + polynomial = reverse(polynomial); + auto c = static_cast(index); + for (std::uint_fast32_t k = 0; k < 8; ++k) { + c = (c >> 1) ^ ((c & 0x1u) ? polynomial : 0); + } + return static_cast(c); + } + + template + struct PolynomialTableBase; + + template + struct PolynomialTableBase> { + static constexpr std::size_t table_size = sizeof...(Indices); + static constexpr std::uint32_t table[table_size] = {calc_table_entry(Indices, Polynomial)...}; + }; + + template + struct PolynomialTable : public PolynomialTableBase{})> { + }; + + template + constexpr inline auto g_polynomial_table = PolynomialTable::table; + + template + constexpr std::uint32_t crc32 (const char* data, std::size_t len, std::uint32_t crc) { + //static_assert(g_polynomial_table[0b10000000] == Polynomial); + crc ^= XorIn; + for (std::size_t z = 0; z < len; ++z) { + crc = g_polynomial_table[(crc ^ static_cast(data[z])) & 0xff] ^ (crc >> 8); + } + return crc ^ XorOut; + } + } //namespace detail + + [[gnu::const]] + constexpr std::uint32_t crc32c (const char* data, std::size_t size) { + if (not std::is_constant_evaluated()) { + return detail::runtime_crc32c(data, size, 0); + } + else { + return detail::crc32(data, size, 0); + } + } + + template + [[gnu::const]] + constexpr std::uint32_t crc32c (const char (&data)[N]) { + return crc32c(data, N); + } +} //namespace wren diff --git a/include/wrenpp/detail/meson.build b/include/wrenpp/detail/meson.build index 2e0b131..5e7f344 100644 --- a/include/wrenpp/detail/meson.build +++ b/include/wrenpp/detail/meson.build @@ -1,12 +1,12 @@ include_files = [ 'construct_foreign_class.hpp', + 'crc32.hpp', 'error_type.hpp', 'guess_class_name.hpp', 'has_method.hpp', 'module_and_name.hpp', 'setters_getters.hpp', 'string_bt.hpp', - 'StringCRC32.hpp', 'strings_in_vector.hpp', 'wren_class_name_from_type.hpp', 'wren_types.hpp', diff --git a/include/wrenpp/vm.hpp b/include/wrenpp/vm.hpp index 70f4674..dbe6221 100644 --- a/include/wrenpp/vm.hpp +++ b/include/wrenpp/vm.hpp @@ -19,7 +19,7 @@ #include "detail/has_method.hpp" #include "detail/error_type.hpp" -#include "detail/StringCRC32.hpp" +#include "detail/crc32.hpp" #include "detail/wren_types.hpp" #include "handle.hpp" #include @@ -154,8 +154,8 @@ namespace wren { template [[gnu::const]] - inline constexpr std::uint32_t type_id() { - return duckcore::StringCRC32(__PRETTY_FUNCTION__); + inline consteval std::uint32_t type_id() { + return crc32c(__PRETTY_FUNCTION__); } } //namespace detail diff --git a/meson.build b/meson.build index d1315e1..8bda0ca 100644 --- a/meson.build +++ b/meson.build @@ -44,10 +44,14 @@ compiler_opts = [] if get_option('wrenpp_with_name_guessing') compiler_opts += ['-DWRENPP_WITH_NAME_GUESSING'] endif +if get_option('wrenpp_with_sse42') + compiler_opts += ['-msse4.2'] +endif conf.set('POINTER_SIZE', ptr_size) conf.set('FUNC_POINTER_SIZE', func_ptr_size) conf.set('WRENPP_NAME', meson.project_name()) +conf.set('WRENPP_WITH_SSE42', get_option('wrenpp_with_sse42')) project_config_file = configure_file( input: 'src/pvt_config.h.in', @@ -69,6 +73,7 @@ wrenpp = library(meson.project_name(), 'src/callback_manager.cpp', 'src/class_manager.cpp', 'src/wren_class_name_from_type.cpp', + 'src/crc32.cpp', dependencies: [wren_dep], include_directories: public_incl, install: (not meson.is_subproject() or get_option('default_library')=='shared'), diff --git a/meson_options.txt b/meson_options.txt index 2a9a580..ee47212 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -3,3 +3,4 @@ option('build_examples', type: 'boolean', value: false) option('wren_with_rand', type: 'boolean', value: false) option('wren_with_meta', type: 'boolean', value: false) option('wrenpp_with_name_guessing', type: 'boolean', value: true) +option('wrenpp_with_sse42', type: 'boolean', value: true) diff --git a/src/crc32.cpp b/src/crc32.cpp new file mode 100644 index 0000000..535f20a --- /dev/null +++ b/src/crc32.cpp @@ -0,0 +1,92 @@ +/* Copyright 2020-2022, Michele Santullo + * This file is part of wrenpp. + * + * Wrenpp is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Wrenpp is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with wrenpp. If not, see . + */ + +#include "wrenpp/detail/crc32.hpp" +#include "pvt_config.h" +#if defined(WRENPP_WITH_SSE42) +# if defined(__aarch64__) +# include "sse2neon.h" +# else +# include +# include +# endif +#endif + +#if defined(WRENPP_WITH_SSE42) + // Byte-boundary alignment issues +# define CALC_CRC(op, crc, type, buf, len) \ + do { \ + for (; (len) >= sizeof (type); (len) -= sizeof(type), buf += sizeof (type)) { \ + (crc) = op((crc), *(type *) (buf)); \ + } \ + } while(0) +#endif + +namespace wren { + namespace { + //Some useful polynomials: + // zlib: 0x04C11DB7 + // castagnoli (intel): 0x1EDC6F41 + +#if defined(WRENPP_WITH_SSE42) + /* Compute CRC-32C using the Intel hardware instruction. */ + /* for better parallelization with bigger buffers see + http://www.drdobbs.com/parallel/fast-parallelized-crc-computation-using/229401411 */ + std::uint32_t crc32c_hw(const void *input, std::size_t len, std::uint32_t crc) + { + //see https://github.com/rurban/smhasher/blob/master/crc32_hw.c + constexpr std::size_t align_size = alignof(std::uint64_t); + constexpr std::size_t align_mask = align_size - 1; + + const char* buf = static_cast(input); + + // XOR the initial CRC with INT_MAX + crc ^= 0xFFFFFFFF; + + // Align the input to the word boundary + for (; (len > 0) && (reinterpret_cast(buf) & align_mask); len--, buf++) { + crc = _mm_crc32_u8(crc, *buf); + } + + // Blast off the CRC32 calculation +#if defined(__x86_64__) || defined(__aarch64__) + CALC_CRC(_mm_crc32_u64, crc, std::uint64_t, buf, len); +#endif + CALC_CRC(_mm_crc32_u32, crc, std::uint32_t, buf, len); + CALC_CRC(_mm_crc32_u16, crc, std::uint16_t, buf, len); + CALC_CRC(_mm_crc32_u8, crc, std::uint8_t, buf, len); + + // Post-process the crc + return ~crc; + } +#endif + } //unnamed namespace + + [[gnu::const]] + std::uint32_t runtime_crc32c (const char* data, std::size_t size, std::uint32_t crc) { +#if defined(WRENPP_WITH_SSE42) + //if (_may_i_use_cpu_feature(_FEATURE_SSE4_2)) { + if (__builtin_cpu_supports("sse4.2")) { + return crc32c_hw(data, size, crc); + } + else +#endif + { + return detail::crc32(data, size, crc); + } + } +} //namespace wren diff --git a/src/pvt_config.h.in b/src/pvt_config.h.in index 15e1d98..259dfa4 100644 --- a/src/pvt_config.h.in +++ b/src/pvt_config.h.in @@ -27,3 +27,5 @@ static_assert(sizeof(void*) == ASM_PTR_SIZE, "Build system reports an unexpected static_assert(sizeof(void(*)(int)) == ASM_FUNC_PTR_SIZE, "Build system reports an unexpected function pointer size, please ensure assembly code is correct"); #endif + +#mesondefine WRENPP_WITH_SSE42