From 2f0001475897c65e042c5a4c65788c26fa5cb10c Mon Sep 17 00:00:00 2001 From: King_DuckZ Date: Fri, 2 Jun 2017 09:23:35 +0100 Subject: [PATCH] Fail if CONTENT_TYPE is not application/x-www-form-urlencoded. As part of the partial improvement to the POST reading code I also added a max_post_size setting which defaults to 1 MiB. POST inputs longer than that size get truncated. This is separate to max_pastie_size, which is just the size of one of the values in the POST data. --- src/tawashi/main.cpp | 1 + src/tawashi_implem/cgi_env.cpp | 17 ++++++ src/tawashi_implem/cgi_env.hpp | 3 ++ src/tawashi_implem/cgi_post.cpp | 56 ++++++++++++++------ src/tawashi_implem/cgi_post.hpp | 7 +++ src/tawashi_implem/error_reasons.hpp | 4 +- src/tawashi_implem/error_response.cpp | 4 +- src/tawashi_implem/submit_paste_response.cpp | 16 ++++-- test/unit/test_invalid_utf8_post.cpp | 1 + 9 files changed, 87 insertions(+), 22 deletions(-) diff --git a/src/tawashi/main.cpp b/src/tawashi/main.cpp index a794b09..79ca02c 100644 --- a/src/tawashi/main.cpp +++ b/src/tawashi/main.cpp @@ -84,6 +84,7 @@ namespace { parSettings.add_default("resubmit_wait", "10"); parSettings.add_default("log_file", "-"); parSettings.add_default("highlight_css", "sh_darkness.css"); + parSettings.add_default("max_post_size", "1048576"); } void print_buildtime_info() { diff --git a/src/tawashi_implem/cgi_env.cpp b/src/tawashi_implem/cgi_env.cpp index a601219..0c39366 100644 --- a/src/tawashi_implem/cgi_env.cpp +++ b/src/tawashi_implem/cgi_env.cpp @@ -18,6 +18,7 @@ #include "cgi_env.hpp" #include "cgi_environment_vars.hpp" #include "duckhandy/lexical_cast.hpp" +#include "tawashi_exception.hpp" #include #include #include @@ -114,6 +115,18 @@ namespace cgi { m_skip_path_info(calculate_skip_path_length(m_cgi_env[CGIVars::PATH_INFO], parBasePath)), m_request_method_type(RequestMethodType::_from_string(m_cgi_env[CGIVars::REQUEST_METHOD].data())) { + { + const std::string& content_type = m_cgi_env.at(CGIVars::CONTENT_TYPE); + int parsed_chars; + bool parse_ok; + m_split_mime = string_to_mime(&content_type, parse_ok, parsed_chars); + if (not parse_ok) { + std::string err_msg = "Parsing failed at position " + + std::to_string(parsed_chars) + " for input \"" + + content_type + "\""; + throw TawashiException(ErrorReasons::InvalidContentType, boost::string_ref(err_msg)); + } + } } Env::~Env() noexcept = default; @@ -217,6 +230,10 @@ namespace cgi { return retval; } + const SplitMime& Env::content_type_split() const { + return m_split_mime; + } + std::ostream& Env::print_all (std::ostream& parStream, const char* parNewline) const { for (std::size_t z = 0; z < m_cgi_env.size(); ++z) { parStream << CGIVars::_from_integral(z) << diff --git a/src/tawashi_implem/cgi_env.hpp b/src/tawashi_implem/cgi_env.hpp index c41905c..f074ac0 100644 --- a/src/tawashi_implem/cgi_env.hpp +++ b/src/tawashi_implem/cgi_env.hpp @@ -22,6 +22,7 @@ #include "escapist.hpp" #include "kakoune/safe_ptr.hh" #include "request_method_type.hpp" +#include "mime_split.hpp" #include #include #include @@ -67,6 +68,7 @@ namespace tawashi { const std::string& server_software() const; GetMapType query_string_split() const a_pure; + const SplitMime& content_type_split() const a_pure; std::ostream& print_all (std::ostream& parStream, const char* parNewline) const; @@ -75,6 +77,7 @@ namespace tawashi { Escapist m_houdini; std::size_t m_skip_path_info; RequestMethodType m_request_method_type; + SplitMime m_split_mime; }; } //namespace cgi } //namespace tawashi diff --git a/src/tawashi_implem/cgi_post.cpp b/src/tawashi_implem/cgi_post.cpp index 4c9e583..5be1f89 100644 --- a/src/tawashi_implem/cgi_post.cpp +++ b/src/tawashi_implem/cgi_post.cpp @@ -28,11 +28,42 @@ #include namespace tawashi { + UnsupportedContentTypeException::UnsupportedContentTypeException (const boost::string_ref& parMessage) : + TawashiException(ErrorReasons::UnsupportedContentType, parMessage) + { + } + namespace cgi { namespace { + bool valid_content_type (const Env& parEnv) { + if (parEnv.content_type_split().type != "application" or + parEnv.content_type_split().subtype != + "x-www-form-urlencoded") { + return false; + } + return true; + } + + std::string read_n (std::istream& parSrc, std::size_t parSize) { + if (0 == parSize) + return std::string(); + + std::string original_data; + original_data.reserve(parSize); + std::copy_n( + std::istream_iterator(parSrc), + parSize, + std::back_inserter(original_data) + ); + return sanitized_utf8(original_data); + } } //unnamed namespace const PostMapType& read_post (std::istream& parSrc, const Env& parEnv) { + return read_post(parSrc, parEnv, parEnv.content_length()); + } + + const PostMapType& read_post (std::istream& parSrc, const Env& parEnv, std::size_t parMaxLen) { static bool already_read = false; static PostMapType map; static std::string original_data; @@ -41,22 +72,17 @@ namespace tawashi { assert(original_data.empty()); assert(map.empty()); - const auto input_len = parEnv.content_length(); - if (input_len > 0) { - original_data.reserve(input_len); - std::copy_n( - std::istream_iterator(parSrc), - input_len, - std::back_inserter(original_data) - ); - original_data = sanitized_utf8(original_data); + if (not valid_content_type(parEnv)) { + throw UnsupportedContentTypeException(parEnv.content_type()); + } - Escapist houdini; - for (auto& itm : split_env_vars(original_data)) { - std::string key(houdini.unescape_url(itm.first)); - std::string val(houdini.unescape_url(itm.second)); - map[std::move(key)] = std::move(val); - } + const auto input_len = std::min(parMaxLen, parEnv.content_length()); + original_data = read_n(parSrc, input_len); + Escapist houdini; + for (auto& itm : split_env_vars(original_data)) { + std::string key(houdini.unescape_url(itm.first)); + std::string val(houdini.unescape_url(itm.second)); + map[std::move(key)] = std::move(val); } already_read = true; diff --git a/src/tawashi_implem/cgi_post.hpp b/src/tawashi_implem/cgi_post.hpp index b445bc8..4b1d336 100644 --- a/src/tawashi_implem/cgi_post.hpp +++ b/src/tawashi_implem/cgi_post.hpp @@ -17,11 +17,17 @@ #pragma once +#include "tawashi_exception.hpp" #include #include #include +#include namespace tawashi { + class UnsupportedContentTypeException : public TawashiException { + public: + explicit UnsupportedContentTypeException (const boost::string_ref& parMessage); + }; namespace cgi { class Env; @@ -29,5 +35,6 @@ namespace tawashi { typedef boost::container::flat_map PostMapType; const PostMapType& read_post (std::istream& parSrc, const Env& parEnv); + const PostMapType& read_post (std::istream& parSrc, const Env& parEnv, std::size_t parMaxLen); } //namespace cgi } //namespace tawashi diff --git a/src/tawashi_implem/error_reasons.hpp b/src/tawashi_implem/error_reasons.hpp index acab9f3..2d3986f 100644 --- a/src/tawashi_implem/error_reasons.hpp +++ b/src/tawashi_implem/error_reasons.hpp @@ -27,6 +27,8 @@ namespace tawashi { UnkownReason, RedisDisconnected, MissingPostVariable, - PastieNotFound + PastieNotFound, + InvalidContentType, + UnsupportedContentType ) } //namespace tawashi diff --git a/src/tawashi_implem/error_response.cpp b/src/tawashi_implem/error_response.cpp index 292127c..0a1fe70 100644 --- a/src/tawashi_implem/error_response.cpp +++ b/src/tawashi_implem/error_response.cpp @@ -50,7 +50,9 @@ namespace tawashi { "An unknown error was raised.", "Unable to connect to Redis.", "Request is missing a POST variable.", - "Pastie not found." + "Pastie not found.", + "Invalid CONTENT_TYPE.", + "Unsupported CONTENT_TYPE." }; constexpr const auto lengths = string_lengths(err_descs); static_assert(err_descs.static_size == lengths.static_size, "Mismatching array sizes between strings and their lengths"); diff --git a/src/tawashi_implem/submit_paste_response.cpp b/src/tawashi_implem/submit_paste_response.cpp index b70d87c..69db5c5 100644 --- a/src/tawashi_implem/submit_paste_response.cpp +++ b/src/tawashi_implem/submit_paste_response.cpp @@ -84,7 +84,6 @@ namespace tawashi { } HttpHeader SubmitPasteResponse::on_process() { - auto post = cgi::read_post(std::cin, cgi_env()); boost::string_ref pastie; boost::string_ref lang; boost::string_ref duration; @@ -92,18 +91,25 @@ namespace tawashi { auto statuslog = spdlog::get("statuslog"); assert(statuslog); + const SettingsBag& settings = this->settings(); try { + auto post = cgi::read_post(std::cin, cgi_env(), settings.as("max_post_size")); pastie = get_value_from_post(post, make_string_ref(g_post_key)); + lang = get_value_from_post_log_failure(post, make_string_ref(g_language_key)); + duration = get_value_from_post_log_failure(post, make_string_ref(g_duration_key)); + } + catch (const UnsupportedContentTypeException& err) { + statuslog->info( + "Unsupported content type exception: \"{}\"", + err.what() + ); + return make_error_redirect(ErrorReasons::UnsupportedContentType); } catch (const TawashiException& e) { statuslog->error(e.what()); return make_error_redirect(e.reason()); } - lang = get_value_from_post_log_failure(post, make_string_ref(g_language_key)); - duration = get_value_from_post_log_failure(post, make_string_ref(g_duration_key)); - - const SettingsBag& settings = this->settings(); const auto max_sz = settings.as("max_pastie_size"); if (pastie.size() < settings.as("min_pastie_size")) { return make_error_redirect(ErrorReasons::PostLengthNotInRange); diff --git a/test/unit/test_invalid_utf8_post.cpp b/test/unit/test_invalid_utf8_post.cpp index df48a16..ff8be37 100644 --- a/test/unit/test_invalid_utf8_post.cpp +++ b/test/unit/test_invalid_utf8_post.cpp @@ -53,6 +53,7 @@ TEST_CASE ("Retrieve and sanitize invalid an invalid utf-8 text from POST data", content_length.c_str(), "PATH_INFO=/", "REQUEST_METHOD=GET", + "CONTENT_TYPE=application/x-www-form-urlencoded", nullptr };