Implement parsing of scraplang.

This commit is contained in:
King_DuckZ 2015-10-01 01:32:27 +02:00
parent 7dfd1f4a38
commit dfd0ec343e
6 changed files with 228 additions and 0 deletions

View file

@ -35,6 +35,7 @@ add_executable(${PROJECT_NAME}
src/main.cpp
src/htmlretrieve.cpp
src/commandline.cpp
src/scraplang/scraplang.cpp
)
if (BUILD_SHARED_TIDY)

View file

@ -0,0 +1,57 @@
#ifndef id9919CCB09DDD429C8128632F13D370ED
#define id9919CCB09DDD429C8128632F13D370ED
#include "scraplang_element.hpp"
#include <boost/spirit/include/support_extended_variant.hpp>
#include <string>
#include <vector>
#include <map>
namespace duck {
struct ScrapNode;
namespace implem {
struct map;
struct array;
struct element : boost::spirit::extended_variant<
boost::recursive_wrapper<map>,
boost::recursive_wrapper<array>,
std::string,
int,
double
>
{
element ( void ) = default;
element ( const map& parOther ) : base_type(parOther) {}
element ( const array& parOther ) : base_type(parOther) {}
element ( const std::string& parOther ) : base_type(parOther) {}
element ( double parOther ) : base_type(parOther) {}
element ( int parOther ) : base_type(parOther) {}
};
struct map : std::map<std::string, element> {
};
struct array : std::vector<element> {
};
struct node_list {
std::vector<ScrapNode> nodes;
};
} //namespace implem
struct ScrapNode : boost::spirit::extended_variant<
element_def,
implem::map,
implem::node_list
>
{
ScrapNode ( void ) = default;
ScrapNode ( const element_def& parOther ) : base_type(parOther) {}
ScrapNode ( const implem::map& parOther ) : base_type(parOther) {}
ScrapNode ( const implem::node_list& parOther ) : base_type(parOther) {}
};
} //namespace duck
#endif

View file

@ -0,0 +1,88 @@
#include "scraplang.hpp"
#include "scrapast.hpp"
#include "scraplang_visit_xpath.hpp"
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/phoenix_stl.hpp>
#include <boost/spirit/include/phoenix_fusion.hpp>
#include <boost/fusion/adapted/struct.hpp>
#include <boost/fusion/adapted/std_pair.hpp>
#include <utility>
#include <boost/variant/apply_visitor.hpp>
namespace qi = boost::spirit::qi;
namespace sp = boost::spirit;
BOOST_FUSION_ADAPT_STRUCT(
duck::element_def,
(std::string, name)
(std::string, xpath)
(duck::ElementTypes, type)
)
BOOST_FUSION_ADAPT_STRUCT(
duck::implem::node_list,
(std::vector<duck::ScrapNode>, nodes)
)
namespace duck {
namespace {
struct ElementTypeSymbol : qi::symbols<char, ElementTypes> {
ElementTypeSymbol() {
add
("string", ElementType_String)
("integer", ElementType_Integer)
("boolean", ElementType_Boolean)
("null", ElementType_Null)
("double", ElementType_Double)
;
}
};
template <typename I>
struct ScrapGrammar : qi::grammar<I, ScrapNode(), sp::ascii::space_type> {
ScrapGrammar() : ScrapGrammar::base_type(start) {
using qi::lit;
using qi::char_;
using qi::lexeme;
using qi::double_;
using qi::int_;
using qi::eps;
start = whole;
whole = eps >> *xpath_definition >> -map;
xpath_definition = identifier >> lit('=') >> string >> "as" >> data_type;
identifier = (char_('a', 'z') | char_('A', 'Z') | '_') >> *(char_('a', 'z') | char_('A', 'Z') | '_' | char_('0', '9'));
string %= lexeme['"' >> +(char_ - '"') >> '"'];
map = lit('{') >> ((identifier >> lit('=') >> value) % lit(',')) >> lit('}');
array = lit('[') >> *(value % lit(',')) >> lit(']');
value = string | double_ | int_ | array | map | identifier;
}
qi::rule<I, ScrapNode(), sp::ascii::space_type> start;
qi::rule<I, implem::node_list(), sp::ascii::space_type> whole;
qi::rule<I, element_def(), sp::ascii::space_type> xpath_definition;
qi::rule<I, std::string(), sp::ascii::space_type> identifier;
qi::rule<I, std::string(), sp::ascii::space_type> string;
qi::rule<I, implem::map(), sp::ascii::space_type> map;
qi::rule<I, implem::array(), sp::ascii::space_type> array;
qi::rule<I, implem::element(), sp::ascii::space_type> value;
ElementTypeSymbol data_type;
};
} //unnamed namespace
std::unique_ptr<ScrapNode> parse_scraplang (const std::string& parData) {
ScrapGrammar<std::string::const_iterator> gramm;
std::unique_ptr<ScrapNode> retval(new ScrapNode);
auto it_start = parData.cbegin();
qi::phrase_parse(it_start, parData.cend(), gramm, sp::ascii::space, *retval);
return std::move(retval);
}
std::vector<element_def> get_xpath_definitions (const ScrapNode& parAST) {
std::vector<element_def> retval;
implem::XPathVisitor xpath_vis(&retval);
boost::apply_visitor(xpath_vis, parAST);
return std::move(retval);
}
} //namespace duck

View file

@ -0,0 +1,16 @@
#ifndef idBE96C2D49C4C413888A79EAEB2B9C0FA
#define idBE96C2D49C4C413888A79EAEB2B9C0FA
#include <vector>
#include <string>
#include <memory>
namespace duck {
struct ScrapNode;
struct element_def;
std::unique_ptr<ScrapNode> parse_scraplang ( const std::string& parData );
std::vector<element_def> get_xpath_definitions ( const ScrapNode& parAST );
} //namespace duck
#endif

View file

@ -0,0 +1,22 @@
#ifndef id3875B5F868524EC3A1B83971D4A85777
#define id3875B5F868524EC3A1B83971D4A85777
#include <string>
namespace duck {
enum ElementTypes {
ElementType_String,
ElementType_Integer,
ElementType_Boolean,
ElementType_Null,
ElementType_Double
};
struct element_def {
std::string name;
std::string xpath;
ElementTypes type;
};
} //namespace duck
#endif

View file

@ -0,0 +1,44 @@
#ifndef id7648347E8EE84E65B69018880358C8DF
#define id7648347E8EE84E65B69018880358C8DF
#include "scrapast.hpp"
#include <vector>
namespace duck {
namespace implem {
class XPathVisitor {
public:
typedef void result_type;
explicit XPathVisitor ( std::vector<element_def>* parElements );
void operator() ( const element_def& parElem );
void operator() ( const implem::map& parMap );
void operator() ( const node_list& parNodes );
private:
std::vector<element_def>* const m_elements;
};
inline XPathVisitor::XPathVisitor (std::vector<element_def>* parElements) :
m_elements(parElements)
{
}
inline void XPathVisitor::operator() (const element_def& parElem) {
m_elements->push_back(parElem);
}
inline void XPathVisitor::operator() (const implem::map&) {
return;
}
inline void XPathVisitor::operator() (const node_list& parNodes) {
for (const auto& node : parNodes.nodes) {
boost::apply_visitor(*this, node);
}
}
} //namespace implem
} //namespace duck
#endif