From e649e9a19644f9bd95c6431ff7fb809621ee2997 Mon Sep 17 00:00:00 2001 From: King_DuckZ Date: Mon, 19 Jun 2017 19:19:42 +0100 Subject: [PATCH] Stock indices (1-based) in escape_bytes instead of just a flag. The corresponding byte in escape_bytes to every to-be-escaped characters in the source buffer gets set to the index of the character in Needle +1. expand() is similar to slow_copy(), except that it doesn't need to re-compare each character in the source buffer to check if it needs to be replaced. Each non-zero byte (that's why indices are 1-based) in parWhich is the index of the sequence that should be used to replace the original character (needles[curr_byte-1]). --- src/tawashi/escapist.cpp | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/src/tawashi/escapist.cpp b/src/tawashi/escapist.cpp index 0fddfa1..f72bd39 100644 --- a/src/tawashi/escapist.cpp +++ b/src/tawashi/escapist.cpp @@ -24,6 +24,7 @@ #if !defined(HTML_ESCAPE_WITH_HOUDINI) # include #endif +#include namespace tawashi { namespace { @@ -73,6 +74,20 @@ namespace tawashi { parDest.push_back(parSource[z]); } } + template + void expand (const char* parSource, std::string& parDest, P parWhich, const char (&...parWith)[Sizes]) { + static_assert(sizeof...(Needle) + 1 <= 0xFF, "Too many search chars, their indices won't fit in a byte"); + std::array withs {parWith...}; + std::array sizes {(static_cast(Sizes) - 1)...}; + + for (unsigned int z = 0; z < sizeof(P) * CHAR_BIT; z += CHAR_BIT) { + const auto curr = 0xFF bitand (parWhich >> z); + if (curr) + parDest.append(withs[curr - 1], sizes[curr - 1]); + else + parDest.push_back(parSource[z / CHAR_BIT]); + } + } #endif #if !defined(HTML_ESCAPE_WITH_HOUDINI) @@ -142,16 +157,14 @@ namespace tawashi { for (unsigned int z = pre_bytes; z < inp_size - post_bytes; z += sizeof(packs[0])) { const uint32_t& val = *reinterpret_cast(parStr.data() + z); uint32_t escape_bytes = 0; + uint8_t char_index = 1; //indices are 1-based for (uint32_t pack : packs) { const uint32_t t = val xor pack; - escape_bytes = (t - c1) bitand compl t bitand c2; - if (escape_bytes) - break; + const uint32_t placeholders = ((t - c1) bitand compl t bitand c2) >> 7; + assert((escape_bytes bitand (placeholders * 0xFF)) == 0); + escape_bytes |= placeholders * char_index++; } - if (escape_bytes) - slow_copy(parStr.data() + z, retval, sizeof(packs[0]), parWith...); - else - retval.append(parStr.data() + z, sizeof(packs[0])); + expand(parStr.data() + z, retval, escape_bytes, parWith...); } slow_copy(parStr.data() + inp_size - post_bytes, retval, post_bytes, parWith...);