Fix calculations and add a test for the html escaping code.

2025-10-02 15:00:02 +00:00 · 2017-06-19 18:08:34 +01:00 · 2017-06-19 18:08:34 +01:00 · 6bee1af080
commit 6bee1af080
parent 3de5e3fd27
3 changed files with 76 additions and 10 deletions
--- a/src/tawashi/escapist.cpp
+++ b/src/tawashi/escapist.cpp
@ -89,9 +89,13 @@ namespace tawashi {
 			};
 			//Calculate the new string's size
-			const unsigned int pre_bytes = reinterpret_cast<uintptr_t>(parStr.data()) % alignof(decltype(packs[0]));
+			const std::size_t front_padding = (alignof(decltype(packs[0])) - reinterpret_cast<uintptr_t>(parStr.data()) % alignof(decltype(packs[0]))) % alignof(decltype(packs[0]));
-			const unsigned int in_size = static_cast<unsigned int>(parStr.size());
+			const unsigned int pre_bytes = std::min(front_padding, parStr.size());
-			unsigned int new_size = in_size;
+			assert(pre_bytes < alignof(decltype(packs[0])));
 			const unsigned int inp_size = static_cast<unsigned int>(parStr.size());
 			const unsigned int mid_bytes = (inp_size - pre_bytes) - (inp_size - pre_bytes) % alignof(decltype(packs[0]));
 			assert(0 == mid_bytes % alignof(decltype(packs[0])));
 			unsigned int new_size = inp_size;
 			unsigned int replace_count = 0;
 			for (unsigned int z = 0; z < pre_bytes; ++z) {
 				const auto needle_index = find<Needle...>(parStr[z]);
@ -101,11 +105,15 @@ namespace tawashi {
 				}
 			}
-			assert(0 == (reinterpret_cast<uintptr_t>(parStr.data()) + pre_bytes) % alignof(decltype(packs[0])));
+			assert(0 == (reinterpret_cast<uintptr_t>(parStr.data()) + pre_bytes) % alignof(decltype(packs[0])) or 0 == mid_bytes);
 			const uint32_t c1 = 0x01010101UL;
 			const uint32_t c2 = 0x80808080UL;
-			const unsigned int post_bytes = (in_size - pre_bytes) % alignof(decltype(packs[0]));
+			assert(inp_size >= pre_bytes + mid_bytes);
-			for (unsigned int z = pre_bytes; z < in_size - post_bytes; z += sizeof(packs[0])) {
+			const unsigned int post_bytes = inp_size - pre_bytes - mid_bytes;
 			assert(post_bytes < alignof(decltype(packs[0])));
 			assert(post_bytes == (inp_size - pre_bytes) % alignof(decltype(packs[0])));
 			assert(inp_size == pre_bytes + mid_bytes + post_bytes);
 			for (unsigned int z = pre_bytes; z < inp_size - post_bytes; z += sizeof(packs[0])) {
 				const uint32_t& val = *reinterpret_cast<const uint32_t*>(parStr.data() + z);
 				for (unsigned int i = 0; i < sizeof...(Needle); ++i) {
 					const uint32_t t = val xor packs[i];
@ -115,7 +123,7 @@ namespace tawashi {
 				}
 			}
-			for (unsigned int z = in_size - post_bytes; z < in_size; ++z) {
+			for (unsigned int z = inp_size - post_bytes; z < inp_size; ++z) {
 				const auto needle_index = find<Needle...>(parStr[z]);
 				if (sizeof...(Needle) > needle_index) {
 					new_size += sizes[needle_index] - 1;
@ -128,10 +136,10 @@ namespace tawashi {
 			//Make the new string
 			std::string retval;
-			assert(new_size >= in_size);
+			assert(new_size >= inp_size);
 			retval.reserve(new_size);
 			slow_copy<Needle...>(parStr.data(), retval, pre_bytes, parWith...);
-			for (unsigned int z = pre_bytes; z < in_size - post_bytes; z += sizeof(packs[0])) {
+			for (unsigned int z = pre_bytes; z < inp_size - post_bytes; z += sizeof(packs[0])) {
 				const uint32_t& val = *reinterpret_cast<const uint32_t*>(parStr.data() + z);
 				uint32_t escape_bytes = 0;
 				for (uint32_t pack : packs) {
@ -145,7 +153,7 @@ namespace tawashi {
 				else
 					retval.append(parStr.data() + z, sizeof(packs[0]));
 			}
-			slow_copy<Needle...>(parStr.data() + in_size - post_bytes, retval, post_bytes, parWith...);
+			slow_copy<Needle...>(parStr.data() + inp_size - post_bytes, retval, post_bytes, parWith...);
 			assert(new_size == retval.size());
 			return retval;
--- a/test/unit/CMakeLists.txt
+++ b/test/unit/CMakeLists.txt
@ -16,6 +16,7 @@ add_executable(${PROJECT_NAME}
 	../data/UTF-8-test.txt.c
 	test_invalid_utf8_get.cpp
 	test_mime_split.cpp
 	test_html_escape.cpp
 )
 target_include_directories(${PROJECT_NAME}
--- a/test/unit/test_html_escape.cpp
+++ b/test/unit/test_html_escape.cpp
@ -0,0 +1,57 @@
 /* Copyright 2017, Michele Santullo
 * This file is part of "tawashi".
 *
 * "tawashi" is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * "tawashi" is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with "tawashi".  If not, see <http://www.gnu.org/licenses/>.
 */
 #include "catch.hpp"
 #include "escapist.hpp"
 #include <utility>
 #include <vector>
 #include <string>
 #include <boost/utility/string_view.hpp>
 TEST_CASE ("Test html escaping", "[security][escape]") {
 	using boost::string_view;
 	using StrPair = std::pair<string_view, string_view>;
 	const std::vector<StrPair> test_data {
 		{"", ""},
 		{"a", "a"},
 		{"&", "&amp;"},
 		{">", "&gt;"},
 		{"<", "&lt;"},
 		{"/", "&#x2F;"},
 		{"\"", "&quot;"},
 		{"'", "&#x27;"},
 		{">a", "&gt;a"},
 		{"a>", "a&gt;"},
 		{"abcd", "abcd"},
 		{"abcdefgh", "abcdefgh"},
 		{"abcdefghi", "abcdefghi"},
 		{"abcdefgh&", "abcdefgh&amp;"},
 		{"ab&defghi", "ab&amp;defghi"},
 		{"<>&123''", "&lt;&gt;&amp;123&#x27;&#x27;"},
 		{"</body>", "&lt;&#x2F;body&gt;"},
 		{"&\"lol\"&", "&amp;&quot;lol&quot;&amp;"}
 	};
 	tawashi::Escapist esc;
 	for (const auto& p : test_data) {
 		const auto& in = p.first;
 		const auto& expected = p.second;
 		std::string out = esc.escape_html(in);
 		CHECK(out == expected);
 	}
 }