mirror of
https://github.com/KingDuckZ/kamokan.git
synced 2025-02-17 09:35:49 +00:00
Stock indices (1-based) in escape_bytes instead of just a flag.
The corresponding byte in escape_bytes to every to-be-escaped characters in the source buffer gets set to the index of the character in Needle +1. expand() is similar to slow_copy(), except that it doesn't need to re-compare each character in the source buffer to check if it needs to be replaced. Each non-zero byte (that's why indices are 1-based) in parWhich is the index of the sequence that should be used to replace the original character (needles[curr_byte-1]).
This commit is contained in:
parent
6bee1af080
commit
e649e9a196
1 changed files with 20 additions and 7 deletions
|
@ -24,6 +24,7 @@
|
||||||
#if !defined(HTML_ESCAPE_WITH_HOUDINI)
|
#if !defined(HTML_ESCAPE_WITH_HOUDINI)
|
||||||
# include <algorithm>
|
# include <algorithm>
|
||||||
#endif
|
#endif
|
||||||
|
#include <climits>
|
||||||
|
|
||||||
namespace tawashi {
|
namespace tawashi {
|
||||||
namespace {
|
namespace {
|
||||||
|
@ -73,6 +74,20 @@ namespace tawashi {
|
||||||
parDest.push_back(parSource[z]);
|
parDest.push_back(parSource[z]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
template <char... Needle, typename P, std::size_t... Sizes>
|
||||||
|
void expand (const char* parSource, std::string& parDest, P parWhich, const char (&...parWith)[Sizes]) {
|
||||||
|
static_assert(sizeof...(Needle) + 1 <= 0xFF, "Too many search chars, their indices won't fit in a byte");
|
||||||
|
std::array<const char*, sizeof...(Needle)> withs {parWith...};
|
||||||
|
std::array<uint32_t, sizeof...(Needle)> sizes {(static_cast<uint32_t>(Sizes) - 1)...};
|
||||||
|
|
||||||
|
for (unsigned int z = 0; z < sizeof(P) * CHAR_BIT; z += CHAR_BIT) {
|
||||||
|
const auto curr = 0xFF bitand (parWhich >> z);
|
||||||
|
if (curr)
|
||||||
|
parDest.append(withs[curr - 1], sizes[curr - 1]);
|
||||||
|
else
|
||||||
|
parDest.push_back(parSource[z / CHAR_BIT]);
|
||||||
|
}
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if !defined(HTML_ESCAPE_WITH_HOUDINI)
|
#if !defined(HTML_ESCAPE_WITH_HOUDINI)
|
||||||
|
@ -142,16 +157,14 @@ namespace tawashi {
|
||||||
for (unsigned int z = pre_bytes; z < inp_size - post_bytes; z += sizeof(packs[0])) {
|
for (unsigned int z = pre_bytes; z < inp_size - post_bytes; z += sizeof(packs[0])) {
|
||||||
const uint32_t& val = *reinterpret_cast<const uint32_t*>(parStr.data() + z);
|
const uint32_t& val = *reinterpret_cast<const uint32_t*>(parStr.data() + z);
|
||||||
uint32_t escape_bytes = 0;
|
uint32_t escape_bytes = 0;
|
||||||
|
uint8_t char_index = 1; //indices are 1-based
|
||||||
for (uint32_t pack : packs) {
|
for (uint32_t pack : packs) {
|
||||||
const uint32_t t = val xor pack;
|
const uint32_t t = val xor pack;
|
||||||
escape_bytes = (t - c1) bitand compl t bitand c2;
|
const uint32_t placeholders = ((t - c1) bitand compl t bitand c2) >> 7;
|
||||||
if (escape_bytes)
|
assert((escape_bytes bitand (placeholders * 0xFF)) == 0);
|
||||||
break;
|
escape_bytes |= placeholders * char_index++;
|
||||||
}
|
}
|
||||||
if (escape_bytes)
|
expand<Needle...>(parStr.data() + z, retval, escape_bytes, parWith...);
|
||||||
slow_copy<Needle...>(parStr.data() + z, retval, sizeof(packs[0]), parWith...);
|
|
||||||
else
|
|
||||||
retval.append(parStr.data() + z, sizeof(packs[0]));
|
|
||||||
}
|
}
|
||||||
slow_copy<Needle...>(parStr.data() + inp_size - post_bytes, retval, post_bytes, parWith...);
|
slow_copy<Needle...>(parStr.data() + inp_size - post_bytes, retval, post_bytes, parWith...);
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue