From 4ff6719077fc54d86886e02c2fd338570aa9839e Mon Sep 17 00:00:00 2001
From: King_DuckZ <king_duckz@gmx.com>
Date: Thu, 25 May 2017 19:09:11 +0100
Subject: [PATCH] Fix bug in the mime parser.

The token rule was matching even when the quoted_string
rule should have matched, which was preventing the "
stripping to work. In fact quoted_string was never being
used.
Also improve formatting of long lines.
---
 src/tawashi_implem/mime_split.cpp | 24 ++++++++++++++++++------
 test/unit/test_mime_split.cpp     | 28 +++++++++++++++++++++++++++-
 2 files changed, 45 insertions(+), 7 deletions(-)
diff --git a/src/tawashi_implem/mime_split.cpp b/src/tawashi_implem/mime_split.cpp
index ff96f73..c86a59f 100644
--- a/src/tawashi_implem/mime_split.cpp
+++ b/src/tawashi_implem/mime_split.cpp
@@ -31,11 +31,14 @@
 #include <boost/fusion/include/adapt_struct.hpp>
 #include <boost/spirit/include/qi_difference.hpp>
 #include <boost/fusion/include/std_pair.hpp>
+#include <boost/phoenix/function/lazy_prelude.hpp>
+#include <boost/phoenix/core.hpp>
 #include <boost/phoenix/object/construct.hpp>
 #include <boost/phoenix/stl/container.hpp>
 #include <boost/phoenix/bind/bind_member_function.hpp>
-#include <boost/phoenix/operator.hpp>
-#include <boost/functional/hash.hpp>
+#include <boost/phoenix/operator/arithmetic.hpp>
+#include <boost/phoenix/operator/self.hpp>
+#include <boost/phoenix/stl/algorithm/transformation.hpp>
 #include <cassert>
 
 //    The Internet Media Type [9 <#ref-9>] of the attached entity. The syntax is
@@ -96,13 +99,12 @@ namespace tawashi {
 			using boost::spirit::ascii::space;
 			using boost::spirit::qi::char_;
 			using boost::spirit::qi::lit;
+			using boost::spirit::qi::alnum;
 			using boost::spirit::qi::raw;
 			using boost::spirit::qi::_val;
 			using boost::spirit::qi::lexeme;
 			using boost::string_ref;
 			using boost::spirit::_1;
-			using boost::phoenix::begin;
-			using boost::phoenix::size;
 			namespace px = boost::phoenix;
 
 			content_type = -media_type;
@@ -113,8 +115,18 @@ namespace tawashi {
 			attribute = token.alias();
 			value = token | quoted_string;
 
-			token = raw[+(char_ - ';' - '/' - '=')][_val = px::bind(&string_ref::substr, px::construct<string_ref>(px::ref(*m_master_string)), begin(_1) - px::ref(m_begin), size(_1))];
-			quoted_string = raw[lexeme['"' >> +(char_ - '"') >> '"']][_val = px::bind(&string_ref::substr, px::construct<string_ref>(px::ref(*m_master_string)), begin(_1) - px::ref(m_begin), size(_1))];
+			token = raw[+(alnum | char_("-_."))][_val = px::bind(&string_ref::substr, px::construct<string_ref>(px::ref(*m_master_string)), px::begin(_1) - px::ref(m_begin), px::size(_1))];
+			quoted_string = raw[
+				lexeme[
+					lit('"') >>
+					*(char_ - '"') >>
+					'"'
+				]
+			][_val = px::bind(
+				&string_ref::substr, px::construct<string_ref>(px::ref(*m_master_string)),
+				px::begin(_1) + 1 - px::ref(m_begin),
+				px::size(_1) - 2
+			)];
 		}
 	} //unnamed namespace
 
diff --git a/test/unit/test_mime_split.cpp b/test/unit/test_mime_split.cpp
index bfb511a..c626d4c 100644
--- a/test/unit/test_mime_split.cpp
+++ b/test/unit/test_mime_split.cpp
@@ -18,6 +18,12 @@
 #include "catch.hpp"
 #include "mime_split.hpp"
 
+namespace {
+	std::string to_string (const boost::string_ref& parRef) {
+		return std::string(parRef.data(), parRef.size());
+	}
+} //unnamed namespace
+
 TEST_CASE ("Test the Mime-type splitter", "[mime][parser]") {
 	using tawashi::SplitMime;
 	using tawashi::split_mime;
@@ -26,17 +32,37 @@ TEST_CASE ("Test the Mime-type splitter", "[mime][parser]") {
 	int parsed_count;
 	{
 		std::string test("application/x-javascript; charset=UTF-8");
+		std::string curr_val;
 		SplitMime split = split_mime(&test, ok, parsed_count);
+
 		REQUIRE(ok);
 		CHECK(test.size() == parsed_count);
 		CHECK(split.type == "application");
 		CHECK(split.subtype == "x-javascript");
 		REQUIRE(split.parameters.size() == 1);
+
 		CHECK(split.parameters.find("charset") != split.parameters.end());
-		CHECK(split.parameters.at("charset") == "UTF-8");
+		curr_val = to_string(split.parameters.at("charset"));
+		CHECK(curr_val == "UTF-8");
 	}
 
 	{
 		std::string test("image/jpeg; filename=genome.jpeg; modification-date=\"Wed, 12 Feb 1997 16:29:51 -0500\"");
+		std::string curr_val;
+		SplitMime split = split_mime(&test, ok, parsed_count);
+
+		REQUIRE(ok);
+		CHECK(test.size() == parsed_count);
+		CHECK(split.type == "image");
+		CHECK(split.subtype == "jpeg");
+		REQUIRE(split.parameters.size() == 2);
+
+		CHECK(split.parameters.find("filename") != split.parameters.end());
+		curr_val = to_string(split.parameters.at("filename"));
+		CHECK(curr_val == "genome.jpeg");
+
+		CHECK(split.parameters.find("modification-date") != split.parameters.end());
+		curr_val = to_string(split.parameters.at("modification-date"));
+		CHECK(curr_val == "Wed, 12 Feb 1997 16:29:51 -0500");
 	}
 }