From 4ff6719077fc54d86886e02c2fd338570aa9839e Mon Sep 17 00:00:00 2001 From: King_DuckZ Date: Thu, 25 May 2017 19:09:11 +0100 Subject: [PATCH] Fix bug in the mime parser. The token rule was matching even when the quoted_string rule should have matched, which was preventing the " stripping to work. In fact quoted_string was never being used. Also improve formatting of long lines. --- src/tawashi_implem/mime_split.cpp | 24 ++++++++++++++++++------ test/unit/test_mime_split.cpp | 28 +++++++++++++++++++++++++++- 2 files changed, 45 insertions(+), 7 deletions(-) diff --git a/src/tawashi_implem/mime_split.cpp b/src/tawashi_implem/mime_split.cpp index ff96f73..c86a59f 100644 --- a/src/tawashi_implem/mime_split.cpp +++ b/src/tawashi_implem/mime_split.cpp @@ -31,11 +31,14 @@ #include #include #include +#include +#include #include #include #include -#include -#include +#include +#include +#include #include // The Internet Media Type [9 <#ref-9>] of the attached entity. The syntax is @@ -96,13 +99,12 @@ namespace tawashi { using boost::spirit::ascii::space; using boost::spirit::qi::char_; using boost::spirit::qi::lit; + using boost::spirit::qi::alnum; using boost::spirit::qi::raw; using boost::spirit::qi::_val; using boost::spirit::qi::lexeme; using boost::string_ref; using boost::spirit::_1; - using boost::phoenix::begin; - using boost::phoenix::size; namespace px = boost::phoenix; content_type = -media_type; @@ -113,8 +115,18 @@ namespace tawashi { attribute = token.alias(); value = token | quoted_string; - token = raw[+(char_ - ';' - '/' - '=')][_val = px::bind(&string_ref::substr, px::construct(px::ref(*m_master_string)), begin(_1) - px::ref(m_begin), size(_1))]; - quoted_string = raw[lexeme['"' >> +(char_ - '"') >> '"']][_val = px::bind(&string_ref::substr, px::construct(px::ref(*m_master_string)), begin(_1) - px::ref(m_begin), size(_1))]; + token = raw[+(alnum | char_("-_."))][_val = px::bind(&string_ref::substr, px::construct(px::ref(*m_master_string)), px::begin(_1) - px::ref(m_begin), px::size(_1))]; + quoted_string = raw[ + lexeme[ + lit('"') >> + *(char_ - '"') >> + '"' + ] + ][_val = px::bind( + &string_ref::substr, px::construct(px::ref(*m_master_string)), + px::begin(_1) + 1 - px::ref(m_begin), + px::size(_1) - 2 + )]; } } //unnamed namespace diff --git a/test/unit/test_mime_split.cpp b/test/unit/test_mime_split.cpp index bfb511a..c626d4c 100644 --- a/test/unit/test_mime_split.cpp +++ b/test/unit/test_mime_split.cpp @@ -18,6 +18,12 @@ #include "catch.hpp" #include "mime_split.hpp" +namespace { + std::string to_string (const boost::string_ref& parRef) { + return std::string(parRef.data(), parRef.size()); + } +} //unnamed namespace + TEST_CASE ("Test the Mime-type splitter", "[mime][parser]") { using tawashi::SplitMime; using tawashi::split_mime; @@ -26,17 +32,37 @@ TEST_CASE ("Test the Mime-type splitter", "[mime][parser]") { int parsed_count; { std::string test("application/x-javascript; charset=UTF-8"); + std::string curr_val; SplitMime split = split_mime(&test, ok, parsed_count); + REQUIRE(ok); CHECK(test.size() == parsed_count); CHECK(split.type == "application"); CHECK(split.subtype == "x-javascript"); REQUIRE(split.parameters.size() == 1); + CHECK(split.parameters.find("charset") != split.parameters.end()); - CHECK(split.parameters.at("charset") == "UTF-8"); + curr_val = to_string(split.parameters.at("charset")); + CHECK(curr_val == "UTF-8"); } { std::string test("image/jpeg; filename=genome.jpeg; modification-date=\"Wed, 12 Feb 1997 16:29:51 -0500\""); + std::string curr_val; + SplitMime split = split_mime(&test, ok, parsed_count); + + REQUIRE(ok); + CHECK(test.size() == parsed_count); + CHECK(split.type == "image"); + CHECK(split.subtype == "jpeg"); + REQUIRE(split.parameters.size() == 2); + + CHECK(split.parameters.find("filename") != split.parameters.end()); + curr_val = to_string(split.parameters.at("filename")); + CHECK(curr_val == "genome.jpeg"); + + CHECK(split.parameters.find("modification-date") != split.parameters.end()); + curr_val = to_string(split.parameters.at("modification-date")); + CHECK(curr_val == "Wed, 12 Feb 1997 16:29:51 -0500"); } }