diff --git a/src/tawashi/escapist.cpp b/src/tawashi/escapist.cpp index 6c6828d..0fddfa1 100644 --- a/src/tawashi/escapist.cpp +++ b/src/tawashi/escapist.cpp @@ -89,9 +89,13 @@ namespace tawashi { }; //Calculate the new string's size - const unsigned int pre_bytes = reinterpret_cast(parStr.data()) % alignof(decltype(packs[0])); - const unsigned int in_size = static_cast(parStr.size()); - unsigned int new_size = in_size; + const std::size_t front_padding = (alignof(decltype(packs[0])) - reinterpret_cast(parStr.data()) % alignof(decltype(packs[0]))) % alignof(decltype(packs[0])); + const unsigned int pre_bytes = std::min(front_padding, parStr.size()); + assert(pre_bytes < alignof(decltype(packs[0]))); + const unsigned int inp_size = static_cast(parStr.size()); + const unsigned int mid_bytes = (inp_size - pre_bytes) - (inp_size - pre_bytes) % alignof(decltype(packs[0])); + assert(0 == mid_bytes % alignof(decltype(packs[0]))); + unsigned int new_size = inp_size; unsigned int replace_count = 0; for (unsigned int z = 0; z < pre_bytes; ++z) { const auto needle_index = find(parStr[z]); @@ -101,11 +105,15 @@ namespace tawashi { } } - assert(0 == (reinterpret_cast(parStr.data()) + pre_bytes) % alignof(decltype(packs[0]))); + assert(0 == (reinterpret_cast(parStr.data()) + pre_bytes) % alignof(decltype(packs[0])) or 0 == mid_bytes); const uint32_t c1 = 0x01010101UL; const uint32_t c2 = 0x80808080UL; - const unsigned int post_bytes = (in_size - pre_bytes) % alignof(decltype(packs[0])); - for (unsigned int z = pre_bytes; z < in_size - post_bytes; z += sizeof(packs[0])) { + assert(inp_size >= pre_bytes + mid_bytes); + const unsigned int post_bytes = inp_size - pre_bytes - mid_bytes; + assert(post_bytes < alignof(decltype(packs[0]))); + assert(post_bytes == (inp_size - pre_bytes) % alignof(decltype(packs[0]))); + assert(inp_size == pre_bytes + mid_bytes + post_bytes); + for (unsigned int z = pre_bytes; z < inp_size - post_bytes; z += sizeof(packs[0])) { const uint32_t& val = *reinterpret_cast(parStr.data() + z); for (unsigned int i = 0; i < sizeof...(Needle); ++i) { const uint32_t t = val xor packs[i]; @@ -115,7 +123,7 @@ namespace tawashi { } } - for (unsigned int z = in_size - post_bytes; z < in_size; ++z) { + for (unsigned int z = inp_size - post_bytes; z < inp_size; ++z) { const auto needle_index = find(parStr[z]); if (sizeof...(Needle) > needle_index) { new_size += sizes[needle_index] - 1; @@ -128,10 +136,10 @@ namespace tawashi { //Make the new string std::string retval; - assert(new_size >= in_size); + assert(new_size >= inp_size); retval.reserve(new_size); slow_copy(parStr.data(), retval, pre_bytes, parWith...); - for (unsigned int z = pre_bytes; z < in_size - post_bytes; z += sizeof(packs[0])) { + for (unsigned int z = pre_bytes; z < inp_size - post_bytes; z += sizeof(packs[0])) { const uint32_t& val = *reinterpret_cast(parStr.data() + z); uint32_t escape_bytes = 0; for (uint32_t pack : packs) { @@ -145,7 +153,7 @@ namespace tawashi { else retval.append(parStr.data() + z, sizeof(packs[0])); } - slow_copy(parStr.data() + in_size - post_bytes, retval, post_bytes, parWith...); + slow_copy(parStr.data() + inp_size - post_bytes, retval, post_bytes, parWith...); assert(new_size == retval.size()); return retval; diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt index e7668aa..fd547fc 100644 --- a/test/unit/CMakeLists.txt +++ b/test/unit/CMakeLists.txt @@ -16,6 +16,7 @@ add_executable(${PROJECT_NAME} ../data/UTF-8-test.txt.c test_invalid_utf8_get.cpp test_mime_split.cpp + test_html_escape.cpp ) target_include_directories(${PROJECT_NAME} diff --git a/test/unit/test_html_escape.cpp b/test/unit/test_html_escape.cpp new file mode 100644 index 0000000..27f54fc --- /dev/null +++ b/test/unit/test_html_escape.cpp @@ -0,0 +1,57 @@ +/* Copyright 2017, Michele Santullo + * This file is part of "tawashi". + * + * "tawashi" is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * "tawashi" is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with "tawashi". If not, see . + */ + +#include "catch.hpp" +#include "escapist.hpp" +#include +#include +#include +#include + +TEST_CASE ("Test html escaping", "[security][escape]") { + using boost::string_view; + using StrPair = std::pair; + + const std::vector test_data { + {"", ""}, + {"a", "a"}, + {"&", "&"}, + {">", ">"}, + {"<", "<"}, + {"/", "/"}, + {"\"", """}, + {"'", "'"}, + {">a", ">a"}, + {"a>", "a>"}, + {"abcd", "abcd"}, + {"abcdefgh", "abcdefgh"}, + {"abcdefghi", "abcdefghi"}, + {"abcdefgh&", "abcdefgh&"}, + {"ab&defghi", "ab&defghi"}, + {"<>&123''", "<>&123''"}, + {"", "</body>"}, + {"&\"lol\"&", "&"lol"&"} + }; + + tawashi::Escapist esc; + for (const auto& p : test_data) { + const auto& in = p.first; + const auto& expected = p.second; + std::string out = esc.escape_html(in); + CHECK(out == expected); + } +}