mirror of
https://github.com/bolero-MURAKAMI/Sprout
synced 2024-11-12 21:09:01 +00:00
add csv-parser example
This commit is contained in:
parent
5bdb045730
commit
0f210dee5f
4 changed files with 190 additions and 34 deletions
137
example/csv/main.cpp
Normal file
137
example/csv/main.cpp
Normal file
|
@ -0,0 +1,137 @@
|
||||||
|
/*=============================================================================
|
||||||
|
Copyright (c) 2011-2016 Bolero MURAKAMI
|
||||||
|
https://github.com/bolero-MURAKAMI/Sprout
|
||||||
|
|
||||||
|
Distributed under the Boost Software License, Version 1.0. (See accompanying
|
||||||
|
file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
|
||||||
|
=============================================================================*/
|
||||||
|
|
||||||
|
#include <sprout/workaround/std/cstddef.hpp>
|
||||||
|
#include <sprout/array.hpp>
|
||||||
|
#include <sprout/sub_array.hpp>
|
||||||
|
#include <sprout/range.hpp>
|
||||||
|
#include <sprout/iterator.hpp>
|
||||||
|
#include <sprout/memory.hpp>
|
||||||
|
#include <sprout/string.hpp>
|
||||||
|
#include <sprout/algorithm.hpp>
|
||||||
|
|
||||||
|
template<typename T, std::size_t N>
|
||||||
|
using subbed_array = sprout::sub_array<sprout::array<T, N> >;
|
||||||
|
|
||||||
|
template<typename Char>
|
||||||
|
struct csv_parser_settings {
|
||||||
|
public:
|
||||||
|
typedef Char value_type;
|
||||||
|
private:
|
||||||
|
value_type delim_;
|
||||||
|
value_type quote_;
|
||||||
|
value_type escape_;
|
||||||
|
public:
|
||||||
|
constexpr csv_parser_settings(value_type delim = Char(','), value_type quote = Char('\"'))
|
||||||
|
: delim_(delim), quote_(quote), escape_(quote)
|
||||||
|
{}
|
||||||
|
constexpr csv_parser_settings(value_type delim, value_type quote, value_type escape)
|
||||||
|
: delim_(delim), quote_(quote), escape_(escape)
|
||||||
|
{}
|
||||||
|
constexpr value_type delimiter() const {
|
||||||
|
return delim_;
|
||||||
|
}
|
||||||
|
constexpr value_type quote() const {
|
||||||
|
return quote_;
|
||||||
|
}
|
||||||
|
constexpr value_type escape() const {
|
||||||
|
return escape_;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
template<std::size_t N, std::size_t L, typename String, typename ResultString = String>
|
||||||
|
constexpr subbed_array<subbed_array<ResultString, N>, L>
|
||||||
|
parse_csv(String const& src, csv_parser_settings<typename String::value_type> settings = csv_parser_settings<typename String::value_type>()) {
|
||||||
|
typedef typename String::value_type value_type;
|
||||||
|
subbed_array<subbed_array<ResultString, N>, L> result = {};
|
||||||
|
result.window(0, 1);
|
||||||
|
result.back().window(0, 0);
|
||||||
|
auto delimiters = sprout::make_string(settings.delimiter(), value_type('\r'), value_type('\n'));
|
||||||
|
auto first = sprout::begin(src), last = sprout::end(src);
|
||||||
|
while (first != last) {
|
||||||
|
if (*first == settings.quote()) {
|
||||||
|
// quoted element
|
||||||
|
++first;
|
||||||
|
auto end_quote = sprout::find(first, last, settings.quote());
|
||||||
|
while (true) {
|
||||||
|
if (end_quote == last) {
|
||||||
|
throw 0;
|
||||||
|
}
|
||||||
|
auto next = sprout::next(end_quote);
|
||||||
|
if (next != last) {
|
||||||
|
if (*next == settings.escape()) {
|
||||||
|
// escaped quote
|
||||||
|
++next;
|
||||||
|
end_quote = sprout::find(next, last, settings.quote());
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (!sprout::any_of_equal(delimiters.begin(), delimiters.end(), *next)) {
|
||||||
|
throw 0;
|
||||||
|
}
|
||||||
|
if (*next == value_type('\r') && sprout::next(next) != last && *sprout::next(next) == value_type('\n')) {
|
||||||
|
++next;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
result.back().push_back(
|
||||||
|
ResultString(sprout::addressof(*first), sprout::distance(first, end_quote))
|
||||||
|
);
|
||||||
|
first = next;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// no quoted element
|
||||||
|
auto next = sprout::find_first_of(first, last, delimiters.begin(), delimiters.end());
|
||||||
|
if (next != last) {
|
||||||
|
if (*next == value_type('\r') && sprout::next(next) != last && *sprout::next(next) == value_type('\n')) {
|
||||||
|
++next;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
result.back().push_back(
|
||||||
|
ResultString(sprout::addressof(*first), sprout::distance(first, next))
|
||||||
|
);
|
||||||
|
first = next;
|
||||||
|
}
|
||||||
|
if (first != last) {
|
||||||
|
if ((*first == value_type('\r') || *first == value_type('\n')) && sprout::next(first) != last) {
|
||||||
|
result.offset(0, 1);
|
||||||
|
result.back().window(0, 0);
|
||||||
|
}
|
||||||
|
++first;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <sprout/utility/string_view.hpp>
|
||||||
|
|
||||||
|
int main() {
|
||||||
|
using namespace sprout::udl;
|
||||||
|
constexpr auto src = R"(
|
||||||
|
(no quoted),a,b,c
|
||||||
|
(quoted),"d","e","f"
|
||||||
|
(escaped),"""g""","""h""","""i"""
|
||||||
|
(comma),"j,k","l,m","n,o"
|
||||||
|
(new line),"p
|
||||||
|
q","r
|
||||||
|
s","t
|
||||||
|
u"
|
||||||
|
)"_sv;
|
||||||
|
constexpr auto result = parse_csv<16, 16>(src);
|
||||||
|
for (auto const& field : result) {
|
||||||
|
for (auto const& elem : field) {
|
||||||
|
std::cout
|
||||||
|
<< elem << std::endl
|
||||||
|
<< "----" << std::endl
|
||||||
|
;
|
||||||
|
}
|
||||||
|
std::cout
|
||||||
|
<< "----------------------------------------" << std::endl
|
||||||
|
;
|
||||||
|
}
|
||||||
|
}
|
|
@ -249,7 +249,7 @@ namespace sprout {
|
||||||
return sprout::ascii::detail::get_value(c) & sprout::ascii::detail::lower ? c - (0x61 - 0x41) : c; \
|
return sprout::ascii::detail::get_value(c) & sprout::ascii::detail::lower ? c - (0x61 - 0x41) : c; \
|
||||||
} \
|
} \
|
||||||
inline SPROUT_CONSTEXPR bool \
|
inline SPROUT_CONSTEXPR bool \
|
||||||
SPROUT_PP_CAT(is, SPROUT_PP_CAT(PREFIX, classified))(sprout::ctypes::mask_t m, CHAR_TYPE c) { \
|
SPROUT_PP_CAT(is, SPROUT_PP_CAT(PREFIX, classified))(sprout::ctypes::mask m, CHAR_TYPE c) { \
|
||||||
return (m | sprout::ctypes::alnum && (sprout::ascii::detail::get_value(c) & (sprout::ascii::detail::alpha | sprout::ascii::detail::digit))) \
|
return (m | sprout::ctypes::alnum && (sprout::ascii::detail::get_value(c) & (sprout::ascii::detail::alpha | sprout::ascii::detail::digit))) \
|
||||||
|| (m | sprout::ctypes::alpha && (sprout::ascii::detail::get_value(c) & sprout::ascii::detail::alpha)) \
|
|| (m | sprout::ctypes::alpha && (sprout::ascii::detail::get_value(c) & sprout::ascii::detail::alpha)) \
|
||||||
|| (m | sprout::ctypes::blank && (sprout::ascii::detail::get_value(c) & sprout::ascii::detail::blank)) \
|
|| (m | sprout::ctypes::blank && (sprout::ascii::detail::get_value(c) & sprout::ascii::detail::blank)) \
|
||||||
|
|
|
@ -384,9 +384,9 @@ namespace sprout {
|
||||||
typedef T argument_type;
|
typedef T argument_type;
|
||||||
typedef bool result_type;
|
typedef bool result_type;
|
||||||
private:
|
private:
|
||||||
sprout::ctypes::mask_t m_;
|
sprout::ctypes::mask m_;
|
||||||
public:
|
public:
|
||||||
explicit SPROUT_CONSTEXPR is_classified(sprout::ctypes::mask_t m)
|
explicit SPROUT_CONSTEXPR is_classified(sprout::ctypes::mask m)
|
||||||
: m_(m)
|
: m_(m)
|
||||||
{}
|
{}
|
||||||
SPROUT_CONSTEXPR bool
|
SPROUT_CONSTEXPR bool
|
||||||
|
@ -399,9 +399,9 @@ namespace sprout {
|
||||||
public:
|
public:
|
||||||
typedef bool result_type;
|
typedef bool result_type;
|
||||||
private:
|
private:
|
||||||
sprout::ctypes::mask_t m_;
|
sprout::ctypes::mask m_;
|
||||||
public:
|
public:
|
||||||
explicit SPROUT_CONSTEXPR is_classified(sprout::ctypes::mask_t m)
|
explicit SPROUT_CONSTEXPR is_classified(sprout::ctypes::mask m)
|
||||||
: m_(m)
|
: m_(m)
|
||||||
{}
|
{}
|
||||||
template<typename T>
|
template<typename T>
|
||||||
|
|
|
@ -10,40 +10,59 @@
|
||||||
|
|
||||||
#include <locale>
|
#include <locale>
|
||||||
#include <sprout/config.hpp>
|
#include <sprout/config.hpp>
|
||||||
|
#include <sprout/detail/predef.hpp>
|
||||||
|
|
||||||
namespace sprout {
|
namespace sprout {
|
||||||
namespace ctypes {
|
namespace ctypes {
|
||||||
//
|
//
|
||||||
// mask_t
|
// mask
|
||||||
//
|
//
|
||||||
typedef std::ctype_base::mask mask_t;
|
enum mask {
|
||||||
|
space = std::ctype_base::space,
|
||||||
//
|
print = std::ctype_base::print,
|
||||||
// space
|
cntrl = std::ctype_base::cntrl,
|
||||||
// print
|
upper = std::ctype_base::upper,
|
||||||
// cntrl
|
lower = std::ctype_base::lower,
|
||||||
// upper
|
alpha = std::ctype_base::alpha,
|
||||||
// lower
|
digit = std::ctype_base::digit,
|
||||||
// alpha
|
punct = std::ctype_base::punct,
|
||||||
// digit
|
xdigit = std::ctype_base::xdigit,
|
||||||
// punct
|
#if SPROUT_GCC_EARLIER(5, 0, 0)
|
||||||
// xdigit
|
blank = 0x0001,
|
||||||
// blank
|
#else // #if SPROUT_GCC_EARLIER(5, 0, 0)
|
||||||
// alnum
|
blank = std::ctype_base::blank,
|
||||||
// graph
|
#endif // #if SPROUT_GCC_EARLIER(5, 0, 0)
|
||||||
//
|
alnum = std::ctype_base::alnum,
|
||||||
SPROUT_STATIC_CONSTEXPR sprout::ctypes::mask_t space = std::ctype_base::space;
|
graph = std::ctype_base::graph
|
||||||
SPROUT_STATIC_CONSTEXPR sprout::ctypes::mask_t print = std::ctype_base::print;
|
};
|
||||||
SPROUT_STATIC_CONSTEXPR sprout::ctypes::mask_t cntrl = std::ctype_base::cntrl;
|
inline SPROUT_CONSTEXPR sprout::ctypes::mask
|
||||||
SPROUT_STATIC_CONSTEXPR sprout::ctypes::mask_t upper = std::ctype_base::upper;
|
operator&(sprout::ctypes::mask x, sprout::ctypes::mask y) {
|
||||||
SPROUT_STATIC_CONSTEXPR sprout::ctypes::mask_t lower = std::ctype_base::lower;
|
return sprout::ctypes::mask(static_cast<int>(x) & static_cast<int>(y));
|
||||||
SPROUT_STATIC_CONSTEXPR sprout::ctypes::mask_t alpha = std::ctype_base::alpha;
|
}
|
||||||
SPROUT_STATIC_CONSTEXPR sprout::ctypes::mask_t digit = std::ctype_base::digit;
|
inline SPROUT_CONSTEXPR sprout::ctypes::mask
|
||||||
SPROUT_STATIC_CONSTEXPR sprout::ctypes::mask_t punct = std::ctype_base::punct;
|
operator|(sprout::ctypes::mask x, sprout::ctypes::mask y) {
|
||||||
SPROUT_STATIC_CONSTEXPR sprout::ctypes::mask_t xdigit = std::ctype_base::xdigit;
|
return sprout::ctypes::mask(static_cast<int>(x) | static_cast<int>(y));
|
||||||
SPROUT_STATIC_CONSTEXPR sprout::ctypes::mask_t blank = /*std::ctype_base::blank*/1 << 9;
|
}
|
||||||
SPROUT_STATIC_CONSTEXPR sprout::ctypes::mask_t alnum = std::ctype_base::alnum;
|
inline SPROUT_CONSTEXPR sprout::ctypes::mask
|
||||||
SPROUT_STATIC_CONSTEXPR sprout::ctypes::mask_t graph = std::ctype_base::graph;
|
operator^(sprout::ctypes::mask x, sprout::ctypes::mask y) {
|
||||||
|
return sprout::ctypes::mask(static_cast<int>(x) ^ static_cast<int>(y));
|
||||||
|
}
|
||||||
|
inline SPROUT_CONSTEXPR sprout::ctypes::mask
|
||||||
|
operator~(sprout::ctypes::mask x) {
|
||||||
|
return sprout::ctypes::mask(~static_cast<int>(x));
|
||||||
|
}
|
||||||
|
inline SPROUT_CXX14_CONSTEXPR sprout::ctypes::mask&
|
||||||
|
operator&=(sprout::ctypes::mask& x, sprout::ctypes::mask y) {
|
||||||
|
return x = x & y;
|
||||||
|
}
|
||||||
|
inline SPROUT_CXX14_CONSTEXPR sprout::ctypes::mask&
|
||||||
|
operator|=(sprout::ctypes::mask& x, sprout::ctypes::mask y) {
|
||||||
|
return x = x | y;
|
||||||
|
}
|
||||||
|
inline SPROUT_CXX14_CONSTEXPR sprout::ctypes::mask&
|
||||||
|
operator^=(sprout::ctypes::mask& x, sprout::ctypes::mask y) {
|
||||||
|
return x = x ^ y;
|
||||||
|
}
|
||||||
} // namespace ctypes
|
} // namespace ctypes
|
||||||
} // namespace sprout
|
} // namespace sprout
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue