2016-03-27 08:47:46 +00:00
|
|
|
/*=============================================================================
|
2019-01-07 08:47:17 +00:00
|
|
|
Copyright (c) 2011-2019 Bolero MURAKAMI
|
2016-03-27 08:47:46 +00:00
|
|
|
https://github.com/bolero-MURAKAMI/Sprout
|
|
|
|
|
|
|
|
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
|
|
|
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
|
|
|
=============================================================================*/
|
|
|
|
|
2016-03-28 16:35:57 +00:00
|
|
|
#include <stdexcept>
|
2016-03-27 08:47:46 +00:00
|
|
|
#include <sprout/workaround/std/cstddef.hpp>
|
|
|
|
#include <sprout/array.hpp>
|
|
|
|
#include <sprout/sub_array.hpp>
|
|
|
|
#include <sprout/range.hpp>
|
|
|
|
#include <sprout/iterator.hpp>
|
|
|
|
#include <sprout/memory.hpp>
|
|
|
|
#include <sprout/string.hpp>
|
|
|
|
#include <sprout/algorithm.hpp>
|
|
|
|
|
|
|
|
template<typename Char>
|
|
|
|
struct csv_parser_settings {
|
|
|
|
public:
|
|
|
|
typedef Char value_type;
|
|
|
|
private:
|
|
|
|
value_type delim_;
|
|
|
|
value_type quote_;
|
|
|
|
value_type escape_;
|
|
|
|
public:
|
|
|
|
constexpr csv_parser_settings(value_type delim = Char(','), value_type quote = Char('\"'))
|
|
|
|
: delim_(delim), quote_(quote), escape_(quote)
|
|
|
|
{}
|
|
|
|
constexpr csv_parser_settings(value_type delim, value_type quote, value_type escape)
|
|
|
|
: delim_(delim), quote_(quote), escape_(escape)
|
|
|
|
{}
|
|
|
|
constexpr value_type delimiter() const {
|
|
|
|
return delim_;
|
|
|
|
}
|
|
|
|
constexpr value_type quote() const {
|
|
|
|
return quote_;
|
|
|
|
}
|
|
|
|
constexpr value_type escape() const {
|
|
|
|
return escape_;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template<std::size_t N, std::size_t L, typename String, typename ResultString = String>
|
2016-03-27 11:55:39 +00:00
|
|
|
constexpr sprout::array_sub_t<sprout::array_sub_t<ResultString, N>, L>
|
2016-03-27 08:47:46 +00:00
|
|
|
parse_csv(String const& src, csv_parser_settings<typename String::value_type> settings = csv_parser_settings<typename String::value_type>()) {
|
|
|
|
typedef typename String::value_type value_type;
|
2016-03-27 11:55:39 +00:00
|
|
|
sprout::array_sub_t<sprout::array_sub_t<ResultString, N>, L> result = {};
|
2016-03-27 08:47:46 +00:00
|
|
|
result.window(0, 1);
|
|
|
|
result.back().window(0, 0);
|
2016-03-28 16:35:57 +00:00
|
|
|
bool same_escape = settings.quote() == settings.escape();
|
2016-03-27 08:47:46 +00:00
|
|
|
auto delimiters = sprout::make_string(settings.delimiter(), value_type('\r'), value_type('\n'));
|
|
|
|
auto first = sprout::begin(src), last = sprout::end(src);
|
|
|
|
while (first != last) {
|
|
|
|
if (*first == settings.quote()) {
|
|
|
|
// quoted element
|
|
|
|
++first;
|
|
|
|
auto end_quote = sprout::find(first, last, settings.quote());
|
|
|
|
while (true) {
|
|
|
|
if (end_quote == last) {
|
2016-03-28 16:35:57 +00:00
|
|
|
throw std::runtime_error("end quote not found");
|
2016-03-27 08:47:46 +00:00
|
|
|
}
|
|
|
|
auto next = sprout::next(end_quote);
|
2016-03-28 16:35:57 +00:00
|
|
|
if (!same_escape && first != end_quote) {
|
|
|
|
auto prev = sprout::prev(end_quote);
|
|
|
|
if (*prev == settings.escape()) {
|
|
|
|
// escaped quote
|
|
|
|
end_quote = sprout::find(next, last, settings.quote());
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
2016-03-27 08:47:46 +00:00
|
|
|
if (next != last) {
|
2016-03-28 16:35:57 +00:00
|
|
|
if (*next == settings.quote()) {
|
2016-03-27 08:47:46 +00:00
|
|
|
// escaped quote
|
|
|
|
++next;
|
|
|
|
end_quote = sprout::find(next, last, settings.quote());
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (!sprout::any_of_equal(delimiters.begin(), delimiters.end(), *next)) {
|
2016-03-28 16:35:57 +00:00
|
|
|
throw std::runtime_error("invalid quote");
|
2016-03-27 08:47:46 +00:00
|
|
|
}
|
|
|
|
if (*next == value_type('\r') && sprout::next(next) != last && *sprout::next(next) == value_type('\n')) {
|
|
|
|
++next;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
result.back().push_back(
|
|
|
|
ResultString(sprout::addressof(*first), sprout::distance(first, end_quote))
|
|
|
|
);
|
|
|
|
first = next;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
// no quoted element
|
|
|
|
auto next = sprout::find_first_of(first, last, delimiters.begin(), delimiters.end());
|
|
|
|
if (next != last) {
|
|
|
|
if (*next == value_type('\r') && sprout::next(next) != last && *sprout::next(next) == value_type('\n')) {
|
|
|
|
++next;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
result.back().push_back(
|
|
|
|
ResultString(sprout::addressof(*first), sprout::distance(first, next))
|
|
|
|
);
|
|
|
|
first = next;
|
|
|
|
}
|
|
|
|
if (first != last) {
|
|
|
|
if ((*first == value_type('\r') || *first == value_type('\n')) && sprout::next(first) != last) {
|
|
|
|
result.offset(0, 1);
|
|
|
|
result.back().window(0, 0);
|
|
|
|
}
|
|
|
|
++first;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
#include <iostream>
|
|
|
|
#include <sprout/utility/string_view.hpp>
|
|
|
|
|
|
|
|
int main() {
|
|
|
|
using namespace sprout::udl;
|
2016-03-28 16:35:57 +00:00
|
|
|
constexpr auto settings = csv_parser_settings<char>(',', '\"', '\\');
|
2016-03-27 08:47:46 +00:00
|
|
|
constexpr auto src = R"(
|
|
|
|
(no quoted),a,b,c
|
|
|
|
(quoted),"d","e","f"
|
2016-03-28 16:35:57 +00:00
|
|
|
(escaped),"""g""","\"h\"","""i"""
|
2016-03-27 08:47:46 +00:00
|
|
|
(comma),"j,k","l,m","n,o"
|
|
|
|
(new line),"p
|
|
|
|
q","r
|
|
|
|
s","t
|
|
|
|
u"
|
|
|
|
)"_sv;
|
2016-03-28 16:35:57 +00:00
|
|
|
constexpr auto result = parse_csv<16, 16>(src, settings);
|
2016-03-27 08:47:46 +00:00
|
|
|
for (auto const& field : result) {
|
|
|
|
for (auto const& elem : field) {
|
|
|
|
std::cout
|
|
|
|
<< elem << std::endl
|
|
|
|
<< "----" << std::endl
|
|
|
|
;
|
|
|
|
}
|
|
|
|
std::cout
|
|
|
|
<< "----------------------------------------" << std::endl
|
|
|
|
;
|
|
|
|
}
|
|
|
|
}
|