1
0
Fork 0
mirror of https://github.com/KingDuckZ/kamokan.git synced 2025-01-13 19:56:40 +00:00

Add unit test for utf8 sanitization.

This commit is contained in:
King_DuckZ 2017-05-10 20:47:24 +01:00
parent 13e46ab1e6
commit d449781c40
7 changed files with 2125 additions and 1 deletions

View file

@ -0,0 +1,122 @@
# - Try to find Glib and its components (gio, gobject etc)
# Once done, this will define
#
# GLIB_FOUND - system has Glib
# GLIB_INCLUDE_DIRS - the Glib include directories
# GLIB_LIBRARIES - link these to use Glib
#
# Optionally, the COMPONENTS keyword can be passed to find_package()
# and Glib components can be looked for. Currently, the following
# components can be used, and they define the following variables if
# found:
#
# gio: GLIB_GIO_LIBRARIES
# gobject: GLIB_GOBJECT_LIBRARIES
# gmodule: GLIB_GMODULE_LIBRARIES
# gthread: GLIB_GTHREAD_LIBRARIES
#
# Note that the respective _INCLUDE_DIR variables are not set, since
# all headers are in the same directory as GLIB_INCLUDE_DIRS.
#
# Copyright (C) 2012 Raphael Kubo da Costa <rakuco@webkit.org>
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND ITS CONTRIBUTORS ``AS
# IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR ITS
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
find_package(PkgConfig)
pkg_check_modules(PC_GLIB QUIET glib-2.0)
find_library(GLIB_LIBRARIES
NAMES glib-2.0
HINTS ${PC_GLIB_LIBDIR}
${PC_GLIB_LIBRARY_DIRS}
)
# Files in glib's main include path may include glibconfig.h, which,
# for some odd reason, is normally in $LIBDIR/glib-2.0/include.
get_filename_component(_GLIB_LIBRARY_DIR ${GLIB_LIBRARIES} PATH)
find_path(GLIBCONFIG_INCLUDE_DIR
NAMES glibconfig.h
HINTS ${PC_LIBDIR} ${PC_LIBRARY_DIRS} ${_GLIB_LIBRARY_DIR}
${PC_GLIB_INCLUDEDIR} ${PC_GLIB_INCLUDE_DIRS}
PATH_SUFFIXES glib-2.0/include
)
find_path(GLIB_INCLUDE_DIR
NAMES glib.h
HINTS ${PC_GLIB_INCLUDEDIR}
${PC_GLIB_INCLUDE_DIRS}
PATH_SUFFIXES glib-2.0
)
set(GLIB_INCLUDE_DIRS ${GLIB_INCLUDE_DIR} ${GLIBCONFIG_INCLUDE_DIR})
# Version detection
if (EXISTS "${GLIBCONFIG_INCLUDE_DIR}/glibconfig.h")
file(READ "${GLIBCONFIG_INCLUDE_DIR}/glibconfig.h" GLIBCONFIG_H_CONTENTS)
string(REGEX MATCH "#define GLIB_MAJOR_VERSION ([0-9]+)" _dummy "${GLIBCONFIG_H_CONTENTS}")
set(GLIB_VERSION_MAJOR "${CMAKE_MATCH_1}")
string(REGEX MATCH "#define GLIB_MINOR_VERSION ([0-9]+)" _dummy "${GLIBCONFIG_H_CONTENTS}")
set(GLIB_VERSION_MINOR "${CMAKE_MATCH_1}")
string(REGEX MATCH "#define GLIB_MICRO_VERSION ([0-9]+)" _dummy "${GLIBCONFIG_H_CONTENTS}")
set(GLIB_VERSION_MICRO "${CMAKE_MATCH_1}")
set(GLIB_VERSION "${GLIB_VERSION_MAJOR}.${GLIB_VERSION_MINOR}.${GLIB_VERSION_MICRO}")
endif ()
# Additional Glib components. We only look for libraries, as not all of them
# have corresponding headers and all headers are installed alongside the main
# glib ones.
foreach (_component ${GLIB_FIND_COMPONENTS})
if (${_component} STREQUAL "gio")
find_library(GLIB_GIO_LIBRARIES NAMES gio-2.0 HINTS ${_GLIB_LIBRARY_DIR})
set(ADDITIONAL_REQUIRED_VARS ${ADDITIONAL_REQUIRED_VARS} GLIB_GIO_LIBRARIES)
elseif (${_component} STREQUAL "gobject")
find_library(GLIB_GOBJECT_LIBRARIES NAMES gobject-2.0 HINTS ${_GLIB_LIBRARY_DIR})
set(ADDITIONAL_REQUIRED_VARS ${ADDITIONAL_REQUIRED_VARS} GLIB_GOBJECT_LIBRARIES)
elseif (${_component} STREQUAL "gmodule")
find_library(GLIB_GMODULE_LIBRARIES NAMES gmodule-2.0 HINTS ${_GLIB_LIBRARY_DIR})
set(ADDITIONAL_REQUIRED_VARS ${ADDITIONAL_REQUIRED_VARS} GLIB_GMODULE_LIBRARIES)
elseif (${_component} STREQUAL "gthread")
find_library(GLIB_GTHREAD_LIBRARIES NAMES gthread-2.0 HINTS ${_GLIB_LIBRARY_DIR})
set(ADDITIONAL_REQUIRED_VARS ${ADDITIONAL_REQUIRED_VARS} GLIB_GTHREAD_LIBRARIES)
elseif (${_component} STREQUAL "gio-unix")
# gio-unix is compiled as part of the gio library, but the include paths
# are separate from the shared glib ones. Since this is currently only used
# by WebKitGTK+ we don't go to extraordinary measures beyond pkg-config.
pkg_check_modules(GIO_UNIX QUIET gio-unix-2.0)
endif ()
endforeach ()
include(FindPackageHandleStandardArgs)
FIND_PACKAGE_HANDLE_STANDARD_ARGS(GLIB REQUIRED_VARS GLIB_INCLUDE_DIRS GLIB_LIBRARIES ${ADDITIONAL_REQUIRED_VARS}
VERSION_VAR GLIB_VERSION)
mark_as_advanced(
GLIBCONFIG_INCLUDE_DIR
GLIB_GIO_LIBRARIES
GLIB_GIO_UNIX_LIBRARIES
GLIB_GMODULE_LIBRARIES
GLIB_GOBJECT_LIBRARIES
GLIB_GTHREAD_LIBRARIES
GLIB_INCLUDE_DIR
GLIB_INCLUDE_DIRS
GLIB_LIBRARIES
)

View file

@ -53,6 +53,24 @@ namespace tawashi {
return std::string(buf->ptr, buf->size);
}
std::string Escapist::escape_url (const boost::string_ref& parURL) const {
if (parURL.empty())
return std::string();
assert(m_gh_buf);
gh_buf* const buf = static_cast<gh_buf*>(m_gh_buf);
const int escaped = houdini_escape_url(
buf,
reinterpret_cast<const uint8_t*>(parURL.data()),
parURL.size()
);
if (0 == escaped)
return std::string(parURL.data(), parURL.size());
else
return std::string(buf->ptr, buf->size);
}
std::string Escapist::escape_html (const boost::string_ref& parHtml) const {
if (parHtml.empty())
return std::string();

View file

@ -37,6 +37,7 @@ namespace tawashi {
~Escapist() noexcept;
std::string unescape_url (const boost::string_ref& parURL) const;
std::string escape_url (const boost::string_ref& parURL) const;
std::string escape_html (const boost::string_ref& parHtml) const;
private:

BIN
test/data/UTF-8-test.txt Normal file

Binary file not shown.

1902
test/data/UTF-8-test.txt.c Normal file

File diff suppressed because it is too large Load diff

View file

@ -1,23 +1,32 @@
project(tawashi_unittest CXX)
project(tawashi_unittest CXX C)
find_package(GLIB 2.20 REQUIRED)
set(CMAKE_CXX_STANDARD 14)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -Wall -Wpedantic -Wconversion -Werror")
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_DEBUG} -Wall -Wpedantic -Wconversion -Werror")
add_executable(${PROJECT_NAME}
check.cpp
fake_cgi_env.cpp
test_ini_file.cpp
test_settings_bag.cpp
test_index_response.cpp
test_invalid_utf8_post.cpp
../data/UTF-8-test.txt.c
)
target_include_directories(${PROJECT_NAME}
PRIVATE ${TAWASHI_SOURCE_ROOT}/lib/Catch/single_include
PRIVATE ${TAWASHI_GEN_INCLUDE_DIR}
PRIVATE ${GLIB_INCLUDE_DIRS}
)
target_link_libraries(${PROJECT_NAME}
PRIVATE tawashi_implem
PRIVATE duckhandy
PRIVATE ${GLIB_LIBRARIES}
)
add_test(

View file

@ -0,0 +1,72 @@
/* Copyright 2017, Michele Santullo
* This file is part of "tawashi".
*
* "tawashi" is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* "tawashi" is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with "tawashi". If not, see <http://www.gnu.org/licenses/>.
*/
#include "catch.hpp"
#include "cgi_post.hpp"
#include "cgi_env.hpp"
#include "sanitized_utf8.hpp"
#include <vector>
#include <string>
#include <sstream>
#include <utility>
#include <iterator>
#include <fstream>
#include <ciso646>
#include <glib.h>
extern "C" {
extern const unsigned char UTF_8_test_txt[];
extern const unsigned int UTF_8_test_txt_len;
} //extern C
TEST_CASE ("Retrieve and sanitize invalid an invalid utf-8 text", "[utf8][security]") {
using tawashi::cgi::PostMapType;
auto content_length = std::string("CONTENT_LENGTH=") + std::to_string(UTF_8_test_txt_len);
const std::string invalid_text_prefix("invalid_text=");
std::string invalid_text;
invalid_text.reserve(invalid_text_prefix.size() + UTF_8_test_txt_len);
invalid_text = "invalid_text=";
std::copy(reinterpret_cast<const char*>(UTF_8_test_txt), reinterpret_cast<const char*>(UTF_8_test_txt) + UTF_8_test_txt_len, std::back_inserter(invalid_text));
std::istringstream iss;
iss >> std::noskipws;
iss.str(std::move(invalid_text));
const char* const fake_env[] = {
content_length.c_str(),
nullptr
};
tawashi::cgi::Env env(fake_env);
const PostMapType& post_data = read_post(iss, env);
CHECK(g_utf8_validate(post_data.at("invalid_text").data(), post_data.at("invalid_text").size(), nullptr));
//std::istringstream iss_expected;
//iss_expected >> std::noskipws;
//iss_expected.str(std::string(reinterpret_cast<const char*>(libreoffice_UTF_8_test_txt), libreoffice_UTF_8_test_txt_len));
//std::string expected_line;
//std::istringstream iss_obtained;
//iss_obtained >> std::noskipws;
//iss_obtained.str(post_data.at("invalid_text"));
//for (std::string line; std::getline(iss_obtained, line); ) {
// std::getline(iss_expected, expected_line);
// std::cout << '"' << line << "\n\"" << expected_line << "\"\n";
// REQUIRE(line == expected_line);
//}
}