diff --git a/src/tawashi_implem/CMakeLists.txt b/src/tawashi_implem/CMakeLists.txt index 074cd66..7b27dc8 100644 --- a/src/tawashi_implem/CMakeLists.txt +++ b/src/tawashi_implem/CMakeLists.txt @@ -29,6 +29,7 @@ add_library(${PROJECT_NAME} STATIC http_header.cpp quick_submit_paste_response.cpp ip_utils.cpp + mime_split.cpp ) target_include_directories(${PROJECT_NAME} diff --git a/src/tawashi_implem/mime_split.cpp b/src/tawashi_implem/mime_split.cpp new file mode 100644 index 0000000..ff96f73 --- /dev/null +++ b/src/tawashi_implem/mime_split.cpp @@ -0,0 +1,145 @@ +/* Copyright 2017, Michele Santullo + * This file is part of "tawashi". + * + * "tawashi" is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * "tawashi" is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with "tawashi". If not, see . + */ + +#include "mime_split.hpp" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// The Internet Media Type [9 <#ref-9>] of the attached entity. The syntax is +// the same as the HTTP Content-Type header. +// +// CONTENT_TYPE = "" | media-type +// media-type = type "/" subtype *( ";" parameter) +// type = token +// subtype = token +// parameter = attribute "=" value +// attribute = token +// value = token | quoted-string +// +// The type, subtype and parameter attribute names are not +// case-sensitive. Parameter values may be case sensitive. Media +// types and their use in HTTP are described section 3.6 <#section-3.6> of the +// HTTP/1.0 specification [3 <#ref-3>]. Example: +// +// application/x-www-form-urlencoded +// +// There is no default value for this variable. If and only if it is +// unset, then the script may attempt to determine the media type +// from the data received. If the type remains unknown, then +// application/octet-stream should be assumed. + +BOOST_FUSION_ADAPT_STRUCT( + tawashi::SplitMime, + (boost::string_ref, type) + (boost::string_ref, subtype) + (tawashi::MimeParametersMapType, parameters) +); + +namespace tawashi { + namespace { + template + struct MimeGrammar : boost::spirit::qi::grammar { + explicit MimeGrammar (const std::string* parString); + + boost::spirit::qi::rule content_type; + boost::spirit::qi::rule media_type; + boost::spirit::qi::rule type; + boost::spirit::qi::rule subtype; + boost::spirit::qi::rule parameter; + boost::spirit::qi::rule attribute; + boost::spirit::qi::rule value; + boost::spirit::qi::rule quoted_string; + boost::spirit::qi::rule token; + const std::string* m_master_string; + Iterator m_begin; + }; + + template + MimeGrammar::MimeGrammar (const std::string* parString) : + MimeGrammar::base_type(content_type), + m_master_string(parString), + m_begin(m_master_string->cbegin()) + { + using boost::spirit::ascii::space; + using boost::spirit::qi::char_; + using boost::spirit::qi::lit; + using boost::spirit::qi::raw; + using boost::spirit::qi::_val; + using boost::spirit::qi::lexeme; + using boost::string_ref; + using boost::spirit::_1; + using boost::phoenix::begin; + using boost::phoenix::size; + namespace px = boost::phoenix; + + content_type = -media_type; + media_type = type >> "/" >> subtype >> *(lit(";") >> parameter); + type = token.alias(); + subtype = token.alias(); + parameter = attribute >> "=" >> value; + attribute = token.alias(); + value = token | quoted_string; + + token = raw[+(char_ - ';' - '/' - '=')][_val = px::bind(&string_ref::substr, px::construct(px::ref(*m_master_string)), begin(_1) - px::ref(m_begin), size(_1))]; + quoted_string = raw[lexeme['"' >> +(char_ - '"') >> '"']][_val = px::bind(&string_ref::substr, px::construct(px::ref(*m_master_string)), begin(_1) - px::ref(m_begin), size(_1))]; + } + } //unnamed namespace + + SplitMime split_mime (const std::string* parMime, bool& parParseOk, int& parParsedCharCount) { + using boost::spirit::qi::blank; + using boost::spirit::qi::blank_type; + + MimeGrammar gramm(parMime); + SplitMime result; + + parParseOk = false; + parParsedCharCount = 0; + + std::string::const_iterator start_it = parMime->cbegin(); + const bool parse_ok = boost::spirit::qi::phrase_parse( + start_it, + parMime->cend(), + gramm, + blank, + result + ); + + parParseOk = parse_ok and (parMime->cend() == start_it); + parParsedCharCount = std::distance(parMime->cbegin(), start_it); + assert(parParsedCharCount >= 0); + return result; + } +} //namespace tawashi diff --git a/src/tawashi_implem/mime_split.hpp b/src/tawashi_implem/mime_split.hpp new file mode 100644 index 0000000..a767c9a --- /dev/null +++ b/src/tawashi_implem/mime_split.hpp @@ -0,0 +1,34 @@ +/* Copyright 2017, Michele Santullo + * This file is part of "tawashi". + * + * "tawashi" is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * "tawashi" is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with "tawashi". If not, see . + */ + +#pragma once + +#include +#include +#include + +namespace tawashi { + typedef boost::container::flat_map MimeParametersMapType; + + struct SplitMime { + boost::string_ref type; + boost::string_ref subtype; + MimeParametersMapType parameters; + }; + + SplitMime split_mime (const std::string* parMime, bool& parParseOk, int& parParsedCharCount); +} //namespace tawashi diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt index e2c2800..ec19c8c 100644 --- a/test/unit/CMakeLists.txt +++ b/test/unit/CMakeLists.txt @@ -15,6 +15,7 @@ add_executable(${PROJECT_NAME} test_invalid_utf8_post.cpp ../data/UTF-8-test.txt.c test_invalid_utf8_get.cpp + test_mime_split.cpp ) target_include_directories(${PROJECT_NAME} diff --git a/test/unit/test_ini_file.cpp b/test/unit/test_ini_file.cpp index 53a8463..7ce4ddf 100644 --- a/test/unit/test_ini_file.cpp +++ b/test/unit/test_ini_file.cpp @@ -21,7 +21,7 @@ #include #include -TEST_CASE ("Test parsing an ini text", "[ini]") { +TEST_CASE ("Test parsing an ini text", "[ini][parser]") { using tawashi::IniFile; //empty data diff --git a/test/unit/test_mime_split.cpp b/test/unit/test_mime_split.cpp new file mode 100644 index 0000000..bfb511a --- /dev/null +++ b/test/unit/test_mime_split.cpp @@ -0,0 +1,42 @@ +/* Copyright 2017, Michele Santullo + * This file is part of "tawashi". + * + * "tawashi" is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * "tawashi" is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with "tawashi". If not, see . + */ + +#include "catch.hpp" +#include "mime_split.hpp" + +TEST_CASE ("Test the Mime-type splitter", "[mime][parser]") { + using tawashi::SplitMime; + using tawashi::split_mime; + + bool ok; + int parsed_count; + { + std::string test("application/x-javascript; charset=UTF-8"); + SplitMime split = split_mime(&test, ok, parsed_count); + REQUIRE(ok); + CHECK(test.size() == parsed_count); + CHECK(split.type == "application"); + CHECK(split.subtype == "x-javascript"); + REQUIRE(split.parameters.size() == 1); + CHECK(split.parameters.find("charset") != split.parameters.end()); + CHECK(split.parameters.at("charset") == "UTF-8"); + } + + { + std::string test("image/jpeg; filename=genome.jpeg; modification-date=\"Wed, 12 Feb 1997 16:29:51 -0500\""); + } +}