mirror of
https://github.com/KingDuckZ/kamokan.git
synced 2024-11-23 00:33:44 +00:00
Fix calculations and add a test for the html escaping code.
This commit is contained in:
parent
3de5e3fd27
commit
6bee1af080
3 changed files with 76 additions and 10 deletions
|
@ -89,9 +89,13 @@ namespace tawashi {
|
||||||
};
|
};
|
||||||
|
|
||||||
//Calculate the new string's size
|
//Calculate the new string's size
|
||||||
const unsigned int pre_bytes = reinterpret_cast<uintptr_t>(parStr.data()) % alignof(decltype(packs[0]));
|
const std::size_t front_padding = (alignof(decltype(packs[0])) - reinterpret_cast<uintptr_t>(parStr.data()) % alignof(decltype(packs[0]))) % alignof(decltype(packs[0]));
|
||||||
const unsigned int in_size = static_cast<unsigned int>(parStr.size());
|
const unsigned int pre_bytes = std::min(front_padding, parStr.size());
|
||||||
unsigned int new_size = in_size;
|
assert(pre_bytes < alignof(decltype(packs[0])));
|
||||||
|
const unsigned int inp_size = static_cast<unsigned int>(parStr.size());
|
||||||
|
const unsigned int mid_bytes = (inp_size - pre_bytes) - (inp_size - pre_bytes) % alignof(decltype(packs[0]));
|
||||||
|
assert(0 == mid_bytes % alignof(decltype(packs[0])));
|
||||||
|
unsigned int new_size = inp_size;
|
||||||
unsigned int replace_count = 0;
|
unsigned int replace_count = 0;
|
||||||
for (unsigned int z = 0; z < pre_bytes; ++z) {
|
for (unsigned int z = 0; z < pre_bytes; ++z) {
|
||||||
const auto needle_index = find<Needle...>(parStr[z]);
|
const auto needle_index = find<Needle...>(parStr[z]);
|
||||||
|
@ -101,11 +105,15 @@ namespace tawashi {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(0 == (reinterpret_cast<uintptr_t>(parStr.data()) + pre_bytes) % alignof(decltype(packs[0])));
|
assert(0 == (reinterpret_cast<uintptr_t>(parStr.data()) + pre_bytes) % alignof(decltype(packs[0])) or 0 == mid_bytes);
|
||||||
const uint32_t c1 = 0x01010101UL;
|
const uint32_t c1 = 0x01010101UL;
|
||||||
const uint32_t c2 = 0x80808080UL;
|
const uint32_t c2 = 0x80808080UL;
|
||||||
const unsigned int post_bytes = (in_size - pre_bytes) % alignof(decltype(packs[0]));
|
assert(inp_size >= pre_bytes + mid_bytes);
|
||||||
for (unsigned int z = pre_bytes; z < in_size - post_bytes; z += sizeof(packs[0])) {
|
const unsigned int post_bytes = inp_size - pre_bytes - mid_bytes;
|
||||||
|
assert(post_bytes < alignof(decltype(packs[0])));
|
||||||
|
assert(post_bytes == (inp_size - pre_bytes) % alignof(decltype(packs[0])));
|
||||||
|
assert(inp_size == pre_bytes + mid_bytes + post_bytes);
|
||||||
|
for (unsigned int z = pre_bytes; z < inp_size - post_bytes; z += sizeof(packs[0])) {
|
||||||
const uint32_t& val = *reinterpret_cast<const uint32_t*>(parStr.data() + z);
|
const uint32_t& val = *reinterpret_cast<const uint32_t*>(parStr.data() + z);
|
||||||
for (unsigned int i = 0; i < sizeof...(Needle); ++i) {
|
for (unsigned int i = 0; i < sizeof...(Needle); ++i) {
|
||||||
const uint32_t t = val xor packs[i];
|
const uint32_t t = val xor packs[i];
|
||||||
|
@ -115,7 +123,7 @@ namespace tawashi {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (unsigned int z = in_size - post_bytes; z < in_size; ++z) {
|
for (unsigned int z = inp_size - post_bytes; z < inp_size; ++z) {
|
||||||
const auto needle_index = find<Needle...>(parStr[z]);
|
const auto needle_index = find<Needle...>(parStr[z]);
|
||||||
if (sizeof...(Needle) > needle_index) {
|
if (sizeof...(Needle) > needle_index) {
|
||||||
new_size += sizes[needle_index] - 1;
|
new_size += sizes[needle_index] - 1;
|
||||||
|
@ -128,10 +136,10 @@ namespace tawashi {
|
||||||
|
|
||||||
//Make the new string
|
//Make the new string
|
||||||
std::string retval;
|
std::string retval;
|
||||||
assert(new_size >= in_size);
|
assert(new_size >= inp_size);
|
||||||
retval.reserve(new_size);
|
retval.reserve(new_size);
|
||||||
slow_copy<Needle...>(parStr.data(), retval, pre_bytes, parWith...);
|
slow_copy<Needle...>(parStr.data(), retval, pre_bytes, parWith...);
|
||||||
for (unsigned int z = pre_bytes; z < in_size - post_bytes; z += sizeof(packs[0])) {
|
for (unsigned int z = pre_bytes; z < inp_size - post_bytes; z += sizeof(packs[0])) {
|
||||||
const uint32_t& val = *reinterpret_cast<const uint32_t*>(parStr.data() + z);
|
const uint32_t& val = *reinterpret_cast<const uint32_t*>(parStr.data() + z);
|
||||||
uint32_t escape_bytes = 0;
|
uint32_t escape_bytes = 0;
|
||||||
for (uint32_t pack : packs) {
|
for (uint32_t pack : packs) {
|
||||||
|
@ -145,7 +153,7 @@ namespace tawashi {
|
||||||
else
|
else
|
||||||
retval.append(parStr.data() + z, sizeof(packs[0]));
|
retval.append(parStr.data() + z, sizeof(packs[0]));
|
||||||
}
|
}
|
||||||
slow_copy<Needle...>(parStr.data() + in_size - post_bytes, retval, post_bytes, parWith...);
|
slow_copy<Needle...>(parStr.data() + inp_size - post_bytes, retval, post_bytes, parWith...);
|
||||||
|
|
||||||
assert(new_size == retval.size());
|
assert(new_size == retval.size());
|
||||||
return retval;
|
return retval;
|
||||||
|
|
|
@ -16,6 +16,7 @@ add_executable(${PROJECT_NAME}
|
||||||
../data/UTF-8-test.txt.c
|
../data/UTF-8-test.txt.c
|
||||||
test_invalid_utf8_get.cpp
|
test_invalid_utf8_get.cpp
|
||||||
test_mime_split.cpp
|
test_mime_split.cpp
|
||||||
|
test_html_escape.cpp
|
||||||
)
|
)
|
||||||
|
|
||||||
target_include_directories(${PROJECT_NAME}
|
target_include_directories(${PROJECT_NAME}
|
||||||
|
|
57
test/unit/test_html_escape.cpp
Normal file
57
test/unit/test_html_escape.cpp
Normal file
|
@ -0,0 +1,57 @@
|
||||||
|
/* Copyright 2017, Michele Santullo
|
||||||
|
* This file is part of "tawashi".
|
||||||
|
*
|
||||||
|
* "tawashi" is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* "tawashi" is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with "tawashi". If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "catch.hpp"
|
||||||
|
#include "escapist.hpp"
|
||||||
|
#include <utility>
|
||||||
|
#include <vector>
|
||||||
|
#include <string>
|
||||||
|
#include <boost/utility/string_view.hpp>
|
||||||
|
|
||||||
|
TEST_CASE ("Test html escaping", "[security][escape]") {
|
||||||
|
using boost::string_view;
|
||||||
|
using StrPair = std::pair<string_view, string_view>;
|
||||||
|
|
||||||
|
const std::vector<StrPair> test_data {
|
||||||
|
{"", ""},
|
||||||
|
{"a", "a"},
|
||||||
|
{"&", "&"},
|
||||||
|
{">", ">"},
|
||||||
|
{"<", "<"},
|
||||||
|
{"/", "/"},
|
||||||
|
{"\"", """},
|
||||||
|
{"'", "'"},
|
||||||
|
{">a", ">a"},
|
||||||
|
{"a>", "a>"},
|
||||||
|
{"abcd", "abcd"},
|
||||||
|
{"abcdefgh", "abcdefgh"},
|
||||||
|
{"abcdefghi", "abcdefghi"},
|
||||||
|
{"abcdefgh&", "abcdefgh&"},
|
||||||
|
{"ab&defghi", "ab&defghi"},
|
||||||
|
{"<>&123''", "<>&123''"},
|
||||||
|
{"</body>", "</body>"},
|
||||||
|
{"&\"lol\"&", "&"lol"&"}
|
||||||
|
};
|
||||||
|
|
||||||
|
tawashi::Escapist esc;
|
||||||
|
for (const auto& p : test_data) {
|
||||||
|
const auto& in = p.first;
|
||||||
|
const auto& expected = p.second;
|
||||||
|
std::string out = esc.escape_html(in);
|
||||||
|
CHECK(out == expected);
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in a new issue