mirror of
https://github.com/KingDuckZ/kamokan.git
synced 2025-01-13 19:56:40 +00:00
Add unit test for utf8 sanitization.
This commit is contained in:
parent
13e46ab1e6
commit
d449781c40
7 changed files with 2125 additions and 1 deletions
122
cmake/modules/FindGLIB.cmake
Normal file
122
cmake/modules/FindGLIB.cmake
Normal file
|
@ -0,0 +1,122 @@
|
|||
# - Try to find Glib and its components (gio, gobject etc)
|
||||
# Once done, this will define
|
||||
#
|
||||
# GLIB_FOUND - system has Glib
|
||||
# GLIB_INCLUDE_DIRS - the Glib include directories
|
||||
# GLIB_LIBRARIES - link these to use Glib
|
||||
#
|
||||
# Optionally, the COMPONENTS keyword can be passed to find_package()
|
||||
# and Glib components can be looked for. Currently, the following
|
||||
# components can be used, and they define the following variables if
|
||||
# found:
|
||||
#
|
||||
# gio: GLIB_GIO_LIBRARIES
|
||||
# gobject: GLIB_GOBJECT_LIBRARIES
|
||||
# gmodule: GLIB_GMODULE_LIBRARIES
|
||||
# gthread: GLIB_GTHREAD_LIBRARIES
|
||||
#
|
||||
# Note that the respective _INCLUDE_DIR variables are not set, since
|
||||
# all headers are in the same directory as GLIB_INCLUDE_DIRS.
|
||||
#
|
||||
# Copyright (C) 2012 Raphael Kubo da Costa <rakuco@webkit.org>
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
# 1. Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND ITS CONTRIBUTORS ``AS
|
||||
# IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
|
||||
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR ITS
|
||||
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
|
||||
# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
|
||||
# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
|
||||
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
|
||||
# ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
find_package(PkgConfig)
|
||||
pkg_check_modules(PC_GLIB QUIET glib-2.0)
|
||||
|
||||
find_library(GLIB_LIBRARIES
|
||||
NAMES glib-2.0
|
||||
HINTS ${PC_GLIB_LIBDIR}
|
||||
${PC_GLIB_LIBRARY_DIRS}
|
||||
)
|
||||
|
||||
# Files in glib's main include path may include glibconfig.h, which,
|
||||
# for some odd reason, is normally in $LIBDIR/glib-2.0/include.
|
||||
get_filename_component(_GLIB_LIBRARY_DIR ${GLIB_LIBRARIES} PATH)
|
||||
find_path(GLIBCONFIG_INCLUDE_DIR
|
||||
NAMES glibconfig.h
|
||||
HINTS ${PC_LIBDIR} ${PC_LIBRARY_DIRS} ${_GLIB_LIBRARY_DIR}
|
||||
${PC_GLIB_INCLUDEDIR} ${PC_GLIB_INCLUDE_DIRS}
|
||||
PATH_SUFFIXES glib-2.0/include
|
||||
)
|
||||
|
||||
find_path(GLIB_INCLUDE_DIR
|
||||
NAMES glib.h
|
||||
HINTS ${PC_GLIB_INCLUDEDIR}
|
||||
${PC_GLIB_INCLUDE_DIRS}
|
||||
PATH_SUFFIXES glib-2.0
|
||||
)
|
||||
|
||||
set(GLIB_INCLUDE_DIRS ${GLIB_INCLUDE_DIR} ${GLIBCONFIG_INCLUDE_DIR})
|
||||
|
||||
# Version detection
|
||||
if (EXISTS "${GLIBCONFIG_INCLUDE_DIR}/glibconfig.h")
|
||||
file(READ "${GLIBCONFIG_INCLUDE_DIR}/glibconfig.h" GLIBCONFIG_H_CONTENTS)
|
||||
string(REGEX MATCH "#define GLIB_MAJOR_VERSION ([0-9]+)" _dummy "${GLIBCONFIG_H_CONTENTS}")
|
||||
set(GLIB_VERSION_MAJOR "${CMAKE_MATCH_1}")
|
||||
string(REGEX MATCH "#define GLIB_MINOR_VERSION ([0-9]+)" _dummy "${GLIBCONFIG_H_CONTENTS}")
|
||||
set(GLIB_VERSION_MINOR "${CMAKE_MATCH_1}")
|
||||
string(REGEX MATCH "#define GLIB_MICRO_VERSION ([0-9]+)" _dummy "${GLIBCONFIG_H_CONTENTS}")
|
||||
set(GLIB_VERSION_MICRO "${CMAKE_MATCH_1}")
|
||||
set(GLIB_VERSION "${GLIB_VERSION_MAJOR}.${GLIB_VERSION_MINOR}.${GLIB_VERSION_MICRO}")
|
||||
endif ()
|
||||
|
||||
# Additional Glib components. We only look for libraries, as not all of them
|
||||
# have corresponding headers and all headers are installed alongside the main
|
||||
# glib ones.
|
||||
foreach (_component ${GLIB_FIND_COMPONENTS})
|
||||
if (${_component} STREQUAL "gio")
|
||||
find_library(GLIB_GIO_LIBRARIES NAMES gio-2.0 HINTS ${_GLIB_LIBRARY_DIR})
|
||||
set(ADDITIONAL_REQUIRED_VARS ${ADDITIONAL_REQUIRED_VARS} GLIB_GIO_LIBRARIES)
|
||||
elseif (${_component} STREQUAL "gobject")
|
||||
find_library(GLIB_GOBJECT_LIBRARIES NAMES gobject-2.0 HINTS ${_GLIB_LIBRARY_DIR})
|
||||
set(ADDITIONAL_REQUIRED_VARS ${ADDITIONAL_REQUIRED_VARS} GLIB_GOBJECT_LIBRARIES)
|
||||
elseif (${_component} STREQUAL "gmodule")
|
||||
find_library(GLIB_GMODULE_LIBRARIES NAMES gmodule-2.0 HINTS ${_GLIB_LIBRARY_DIR})
|
||||
set(ADDITIONAL_REQUIRED_VARS ${ADDITIONAL_REQUIRED_VARS} GLIB_GMODULE_LIBRARIES)
|
||||
elseif (${_component} STREQUAL "gthread")
|
||||
find_library(GLIB_GTHREAD_LIBRARIES NAMES gthread-2.0 HINTS ${_GLIB_LIBRARY_DIR})
|
||||
set(ADDITIONAL_REQUIRED_VARS ${ADDITIONAL_REQUIRED_VARS} GLIB_GTHREAD_LIBRARIES)
|
||||
elseif (${_component} STREQUAL "gio-unix")
|
||||
# gio-unix is compiled as part of the gio library, but the include paths
|
||||
# are separate from the shared glib ones. Since this is currently only used
|
||||
# by WebKitGTK+ we don't go to extraordinary measures beyond pkg-config.
|
||||
pkg_check_modules(GIO_UNIX QUIET gio-unix-2.0)
|
||||
endif ()
|
||||
endforeach ()
|
||||
|
||||
include(FindPackageHandleStandardArgs)
|
||||
FIND_PACKAGE_HANDLE_STANDARD_ARGS(GLIB REQUIRED_VARS GLIB_INCLUDE_DIRS GLIB_LIBRARIES ${ADDITIONAL_REQUIRED_VARS}
|
||||
VERSION_VAR GLIB_VERSION)
|
||||
|
||||
mark_as_advanced(
|
||||
GLIBCONFIG_INCLUDE_DIR
|
||||
GLIB_GIO_LIBRARIES
|
||||
GLIB_GIO_UNIX_LIBRARIES
|
||||
GLIB_GMODULE_LIBRARIES
|
||||
GLIB_GOBJECT_LIBRARIES
|
||||
GLIB_GTHREAD_LIBRARIES
|
||||
GLIB_INCLUDE_DIR
|
||||
GLIB_INCLUDE_DIRS
|
||||
GLIB_LIBRARIES
|
||||
)
|
|
@ -53,6 +53,24 @@ namespace tawashi {
|
|||
return std::string(buf->ptr, buf->size);
|
||||
}
|
||||
|
||||
std::string Escapist::escape_url (const boost::string_ref& parURL) const {
|
||||
if (parURL.empty())
|
||||
return std::string();
|
||||
|
||||
assert(m_gh_buf);
|
||||
gh_buf* const buf = static_cast<gh_buf*>(m_gh_buf);
|
||||
|
||||
const int escaped = houdini_escape_url(
|
||||
buf,
|
||||
reinterpret_cast<const uint8_t*>(parURL.data()),
|
||||
parURL.size()
|
||||
);
|
||||
if (0 == escaped)
|
||||
return std::string(parURL.data(), parURL.size());
|
||||
else
|
||||
return std::string(buf->ptr, buf->size);
|
||||
}
|
||||
|
||||
std::string Escapist::escape_html (const boost::string_ref& parHtml) const {
|
||||
if (parHtml.empty())
|
||||
return std::string();
|
||||
|
|
|
@ -37,6 +37,7 @@ namespace tawashi {
|
|||
~Escapist() noexcept;
|
||||
|
||||
std::string unescape_url (const boost::string_ref& parURL) const;
|
||||
std::string escape_url (const boost::string_ref& parURL) const;
|
||||
std::string escape_html (const boost::string_ref& parHtml) const;
|
||||
|
||||
private:
|
||||
|
|
BIN
test/data/UTF-8-test.txt
Normal file
BIN
test/data/UTF-8-test.txt
Normal file
Binary file not shown.
1902
test/data/UTF-8-test.txt.c
Normal file
1902
test/data/UTF-8-test.txt.c
Normal file
File diff suppressed because it is too large
Load diff
|
@ -1,23 +1,32 @@
|
|||
project(tawashi_unittest CXX)
|
||||
project(tawashi_unittest CXX C)
|
||||
|
||||
find_package(GLIB 2.20 REQUIRED)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 14)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
|
||||
set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -Wall -Wpedantic -Wconversion -Werror")
|
||||
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_DEBUG} -Wall -Wpedantic -Wconversion -Werror")
|
||||
|
||||
add_executable(${PROJECT_NAME}
|
||||
check.cpp
|
||||
fake_cgi_env.cpp
|
||||
test_ini_file.cpp
|
||||
test_settings_bag.cpp
|
||||
test_index_response.cpp
|
||||
test_invalid_utf8_post.cpp
|
||||
../data/UTF-8-test.txt.c
|
||||
)
|
||||
|
||||
target_include_directories(${PROJECT_NAME}
|
||||
PRIVATE ${TAWASHI_SOURCE_ROOT}/lib/Catch/single_include
|
||||
PRIVATE ${TAWASHI_GEN_INCLUDE_DIR}
|
||||
PRIVATE ${GLIB_INCLUDE_DIRS}
|
||||
)
|
||||
target_link_libraries(${PROJECT_NAME}
|
||||
PRIVATE tawashi_implem
|
||||
PRIVATE duckhandy
|
||||
PRIVATE ${GLIB_LIBRARIES}
|
||||
)
|
||||
|
||||
add_test(
|
||||
|
|
72
test/unit/test_invalid_utf8_post.cpp
Normal file
72
test/unit/test_invalid_utf8_post.cpp
Normal file
|
@ -0,0 +1,72 @@
|
|||
/* Copyright 2017, Michele Santullo
|
||||
* This file is part of "tawashi".
|
||||
*
|
||||
* "tawashi" is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* "tawashi" is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with "tawashi". If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "catch.hpp"
|
||||
#include "cgi_post.hpp"
|
||||
#include "cgi_env.hpp"
|
||||
#include "sanitized_utf8.hpp"
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
#include <utility>
|
||||
#include <iterator>
|
||||
#include <fstream>
|
||||
#include <ciso646>
|
||||
#include <glib.h>
|
||||
|
||||
extern "C" {
|
||||
extern const unsigned char UTF_8_test_txt[];
|
||||
extern const unsigned int UTF_8_test_txt_len;
|
||||
} //extern C
|
||||
|
||||
TEST_CASE ("Retrieve and sanitize invalid an invalid utf-8 text", "[utf8][security]") {
|
||||
using tawashi::cgi::PostMapType;
|
||||
|
||||
auto content_length = std::string("CONTENT_LENGTH=") + std::to_string(UTF_8_test_txt_len);
|
||||
const std::string invalid_text_prefix("invalid_text=");
|
||||
std::string invalid_text;
|
||||
invalid_text.reserve(invalid_text_prefix.size() + UTF_8_test_txt_len);
|
||||
invalid_text = "invalid_text=";
|
||||
std::copy(reinterpret_cast<const char*>(UTF_8_test_txt), reinterpret_cast<const char*>(UTF_8_test_txt) + UTF_8_test_txt_len, std::back_inserter(invalid_text));
|
||||
|
||||
std::istringstream iss;
|
||||
iss >> std::noskipws;
|
||||
iss.str(std::move(invalid_text));
|
||||
|
||||
const char* const fake_env[] = {
|
||||
content_length.c_str(),
|
||||
nullptr
|
||||
};
|
||||
|
||||
tawashi::cgi::Env env(fake_env);
|
||||
const PostMapType& post_data = read_post(iss, env);
|
||||
|
||||
CHECK(g_utf8_validate(post_data.at("invalid_text").data(), post_data.at("invalid_text").size(), nullptr));
|
||||
|
||||
//std::istringstream iss_expected;
|
||||
//iss_expected >> std::noskipws;
|
||||
//iss_expected.str(std::string(reinterpret_cast<const char*>(libreoffice_UTF_8_test_txt), libreoffice_UTF_8_test_txt_len));
|
||||
//std::string expected_line;
|
||||
//std::istringstream iss_obtained;
|
||||
//iss_obtained >> std::noskipws;
|
||||
//iss_obtained.str(post_data.at("invalid_text"));
|
||||
//for (std::string line; std::getline(iss_obtained, line); ) {
|
||||
// std::getline(iss_expected, expected_line);
|
||||
// std::cout << '"' << line << "\n\"" << expected_line << "\"\n";
|
||||
// REQUIRE(line == expected_line);
|
||||
//}
|
||||
}
|
Loading…
Reference in a new issue