diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index b29817a..d07fb1b 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -27,6 +27,7 @@ add_executable(${PROJECT_NAME} response_factory.cpp list_highlight_langs.cpp settings_bag.cpp + sanitized_utf8.cpp ) configure_file( diff --git a/src/cgi_environment_vars.cpp b/src/cgi_environment_vars.cpp index b1913e3..167982c 100644 --- a/src/cgi_environment_vars.cpp +++ b/src/cgi_environment_vars.cpp @@ -17,6 +17,7 @@ #include "cgi_environment_vars.hpp" #include "get_env.hpp" +#include namespace tawashi { std::vector cgi_environment_vars() { @@ -26,8 +27,8 @@ namespace tawashi { retlist.reserve(CGIVars::_size()); for (CGIVars var : CGIVars::_values()) { - auto value = get_env_as(var._to_string(), ""); - retlist.push_back(std::string(value.data(), value.size())); + auto value = get_env_as(var._to_string(), ""); + retlist.push_back(std::move(value)); } return retlist; } diff --git a/src/cgi_post.cpp b/src/cgi_post.cpp index d9ab676..c88d094 100644 --- a/src/cgi_post.cpp +++ b/src/cgi_post.cpp @@ -19,6 +19,7 @@ #include "cgi_env.hpp" #include "split_get_vars.hpp" #include "escapist.hpp" +#include "sanitized_utf8.hpp" #include #include #include @@ -48,6 +49,7 @@ namespace tawashi { input_len, std::back_inserter(original_data) ); + original_data = sanitized_utf8(original_data); Escapist houdini; for (auto& itm : split_env_vars(original_data)) { diff --git a/src/get_env.cpp b/src/get_env.cpp index 833ebbc..a9aff07 100644 --- a/src/get_env.cpp +++ b/src/get_env.cpp @@ -17,26 +17,24 @@ #include "get_env.hpp" #include "duckhandy/lexical_cast.hpp" +#include "sanitized_utf8.hpp" #include namespace tawashi { - boost::optional get_env (const char* parName) { + boost::optional get_env (const char* parName) { using boost::string_ref; using boost::make_optional; using boost::optional; const char* const raw_getvar = secure_getenv(parName); - return (raw_getvar ? make_optional(string_ref(raw_getvar)) : optional()); + if (raw_getvar) + return sanitized_utf8(boost::string_ref(raw_getvar)); + else + return optional(); } template <> std::string get_env_as (const char* parName, const std::string& parDefault) { - auto var = get_env(parName); - return (var ? std::string(var->data(), var->size()) : parDefault); - } - - template <> - boost::string_ref get_env_as (const char* parName, const boost::string_ref& parDefault) { auto var = get_env(parName); return (var ? *var : parDefault); } diff --git a/src/get_env.hpp b/src/get_env.hpp index 4493a15..46dbae7 100644 --- a/src/get_env.hpp +++ b/src/get_env.hpp @@ -27,7 +27,7 @@ #include namespace tawashi { - boost::optional get_env (const char* parName); + boost::optional get_env (const char* parName); template A get_env_as (const char* parName, const A& parDefault); @@ -35,7 +35,5 @@ namespace tawashi { template <> std::string get_env_as (const char* parName, const std::string& parDefault); template <> - boost::string_ref get_env_as (const char* parName, const boost::string_ref& parDefault); - template <> std::size_t get_env_as (const char* parName, const std::size_t& parDefault); } //namespace tawashi diff --git a/src/sanitized_utf8.cpp b/src/sanitized_utf8.cpp new file mode 100644 index 0000000..2454828 --- /dev/null +++ b/src/sanitized_utf8.cpp @@ -0,0 +1,29 @@ +/* Copyright 2017, Michele Santullo + * This file is part of "tawashi". + * + * "tawashi" is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * "tawashi" is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with "tawashi". If not, see . + */ + +#include "sanitized_utf8.hpp" +#include "utf8.h" +#include + +namespace tawashi { + std::string sanitized_utf8 (const boost::string_ref& parStr) { + std::string sanitized; + sanitized.reserve(parStr.size()); + utf8::replace_invalid(parStr.begin(), parStr.end(), std::back_inserter(sanitized)); + return sanitized; + } +} //namespace tawashi diff --git a/src/sanitized_utf8.hpp b/src/sanitized_utf8.hpp new file mode 100644 index 0000000..4bdfeb9 --- /dev/null +++ b/src/sanitized_utf8.hpp @@ -0,0 +1,25 @@ +/* Copyright 2017, Michele Santullo + * This file is part of "tawashi". + * + * "tawashi" is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * "tawashi" is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with "tawashi". If not, see . + */ + +#pragma once + +#include +#include + +namespace tawashi { + std::string sanitized_utf8 (const boost::string_ref& parStr); +} //namespace tawashi