Allow nesting of structs.

This commit is contained in:
King_DuckZ 2018-01-16 10:23:15 +00:00
parent fcb25ed456
commit 3572803f66
3 changed files with 137 additions and 55 deletions

View file

@ -33,15 +33,108 @@ namespace duck { namespace sl {
#endif
namespace {
struct XPathEntry {
std::string value;
class DictBuilder : public boost::static_visitor<> {
struct ApplyEntry {
const SourceInfo* apply_to;
const std::vector<StructItem>* content;
const std::string* mustache_name;
};
typedef std::map<std::string, const XPathElement*> XPathMap;
class StructItemExtractor : public boost::static_visitor<> {
public:
DictBuilder (
StructItemExtractor (std::string&& parPrefix, XPathMap& parMap) :
m_prefix(std::move(parPrefix)),
m_map(parMap)
{
if (not m_prefix.empty()) {
m_prefix += ".";
}
}
void operator() (const XPathElement& parVal) {
m_map[m_prefix + parVal.name] = &parVal;
}
void operator() (const StructBlock& parVal) {
StructItemExtractor visitor(
m_prefix + parVal.name,
m_map
);
for (auto& itm : parVal.xpaths) {
boost::apply_visitor(visitor, itm);
}
}
private:
XPathSink m_xpaths;
std::map<std::string, std::string> m_vars;
std::string m_prefix;
XPathMap& m_map;
};
class DictBuilder : public boost::static_visitor<> {
public:
explicit DictBuilder (HtmlPoolBaseSP parHtmlPool) :
m_current_mustache_name(nullptr),
m_current_mustache(nullptr)
{
}
void operator() (const std::vector<ScrapNode>& parVal) {
for (auto& val : parVal) {
boost::apply_visitor(*this, val);
}
}
void operator() (const FromBlock& parVal) {
#if defined(APPLY_VERBOSE)
std::cout << parVal << '\n';
#endif
for (auto& itm : parVal.xpaths) {
StructItemExtractor extractor("", m_global_entries);
boost::apply_visitor(extractor, itm);
}
}
void operator() (const ApplyBlock& parVal) {
#if defined(APPLY_VERBOSE)
std::cout << parVal << '\n';
#endif
m_apply_entries.push_back(ApplyEntry {
&parVal.source,
&parVal.xpaths,
&parVal.mustache_model
});
}
void operator() (const MustacheBlock& parVal) {
#if defined(APPLY_VERBOSE)
std::cout << "Mustache block \"" << parVal.name << "\"\n";
#endif
const auto curr_name = m_current_mustache_name;
if (not curr_name or *curr_name != parVal.name) {
m_mustaches[parVal.name] = MustacheEntry();
auto it_found = m_mustaches.find(parVal.name);
m_current_mustache_name = &it_found->first;
m_current_mustache = &it_found->second;
}
m_current_mustache->text = parVal.content;
}
const XPathMap& global_entries() const { return m_global_entries; }
private:
struct MustacheEntry {
std::string text;
mstch::map context;
};
XPathMap m_global_entries;
std::vector<ApplyEntry> m_apply_entries;
std::map<std::string, MustacheEntry> m_mustaches;
const std::string* m_current_mustache_name;
MustacheEntry* m_current_mustache;
};
} //unnamed namespace
@ -52,5 +145,14 @@ namespace duck { namespace sl {
DictBuilder dict_builder(html_pool);
boost::apply_visitor(dict_builder, node);
std::cout << "-------------- visiting done ----------------\n";
for (auto& itm : dict_builder.global_entries()) {
std::cout << "item: \"" << itm.first << "\", \"" <<
itm.second->xpath << "\"\n";
}
return std::vector<std::string>();
}
}} //namespace duck::sl

View file

@ -41,12 +41,12 @@ BOOST_FUSION_ADAPT_STRUCT(
BOOST_FUSION_ADAPT_STRUCT(
duck::sl::FromBlock,
(duck::sl::SourceInfo, source)
(std::vector<duck::sl::XPathElement>, xpaths)
(std::vector<duck::sl::StructItem>, xpaths)
)
BOOST_FUSION_ADAPT_STRUCT(
duck::sl::StructBlock,
(std::string, name)
(std::vector<duck::sl::XPathElement>, xpaths)
(std::vector<duck::sl::StructItem>, xpaths)
)
BOOST_FUSION_ADAPT_STRUCT(
duck::sl::ApplyBlock,
@ -68,18 +68,6 @@ BOOST_FUSION_ADAPT_STRUCT(
namespace duck { namespace sl {
namespace {
struct ElementTypeSymbol : qi::symbols<char, ElementTypes> {
ElementTypeSymbol() {
add
("string", ElementType_String)
("integer", ElementType_Integer)
("boolean", ElementType_Boolean)
("null", ElementType_Null)
("double", ElementType_Double)
;
}
};
template <typename I, typename Skipper>
class ScrapGrammar : public qi::grammar<I, std::vector<ScrapNode>(), Skipper> {
public:
@ -98,20 +86,24 @@ namespace duck { namespace sl {
using qi::no_skip;
start = *eol >> (from_block | apply_block | mustache_block) % +eol >> *eol >> eoi;
from_block = lit("from") >> source_info >> +eol >> (xpath_assignment % +eol) >> +eol >> "end";
from_block = lit("from") >> source_info >> +eol >> assignment_list >> +eol >> "end";
source_info = (url >> attr(SourceInfo::URL)) | (mustache_like_token >> attr(SourceInfo::Token));
url = -(+alpha >> string("://")) >> alpha >> *graph;
mustache_like_token = "{{" >> identifier >> "}}";
quoted_string %= lexeme['"' >> +(char_ - '"') >> '"'];
xpath_assignment = identifier >> -(lit("default") >> '(' >> quoted_string >> ')') >> "=" >> +graph;
xpath_assignment %= identifier >>
-(lit("default") >> '(' >> quoted_string >> ')') >> "=" >>
as_string[lexeme[+(graph | char_(" \t"))]];
identifier %= lexeme[(alpha | char_('_')) >> *(alnum | char_('_'))];
apply_block = lit("apply") >> mustache_like_token >> "to" >> source_info >> +eol >>
((xpath_assignment | struct_block) % +eol) >> +eol >> "end";
struct_block = "struct" >> identifier >> +eol >> (xpath_assignment % +eol) >> +eol >> "end";
assignment_list >> +eol >> "end";
struct_block = "struct" >> identifier >> +eol >> assignment_list >> +eol >> "end";
mustache_block %= as_string[lit("==") >> identifier] >> eol >>
as_string[no_skip[+(!lit("==end") >> char_)]] >> "==end";
assignment_list = (xpath_assignment | struct_block) % +eol;
}
private:
@ -129,6 +121,7 @@ namespace duck { namespace sl {
RuleType<ApplyBlock()> apply_block;
RuleType<StructBlock()> struct_block;
RuleType<MustacheBlock()> mustache_block;
RuleType<std::vector<StructItem>()> assignment_list;
};
} //unnamed namespace
@ -150,19 +143,4 @@ namespace duck { namespace sl {
}
return retval;
}
// std::vector<element_def> get_xpath_definitions (const ScrapNode& parAST) {
// std::vector<element_def> retval;
// implem::XPathVisitor xpath_vis(&retval);
// boost::apply_visitor(xpath_vis, parAST);
// return std::move(retval);
// }
//
// void print_results (std::ostream& parOut, const ScrapNode& parAST, const std::vector<element_def>& parOutcome, const ResultList& parResList) {
//#if !defined(NDEBUG)
// std::cout << "print_results()...\n";
//#endif
// implem::ResultPrinter printer(&parOut, &parOutcome, & parResList);
// boost::apply_visitor(printer, parAST);
// }
}} //namespace duck::sl

View file

@ -34,6 +34,23 @@ namespace duck { namespace sl {
std::string xpath;
};
struct StructBlock;
struct StructItem : boost::spirit::extended_variant<
XPathElement,
boost::recursive_wrapper<StructBlock>
> {
StructItem() : base_type() {}
StructItem (const XPathElement& value) : base_type(value) {}
StructItem (const StructBlock& value) : base_type(value) {}
using base_type::operator=;
};
struct StructBlock {
std::string name;
std::vector<StructItem> xpaths;
};
struct SourceInfo {
enum Type { URL, Token };
@ -43,22 +60,7 @@ namespace duck { namespace sl {
struct FromBlock {
SourceInfo source;
std::vector<XPathElement> xpaths;
};
struct StructBlock {
std::string name;
std::vector<XPathElement> xpaths;
};
struct StructItem : boost::spirit::extended_variant<
XPathElement,
StructBlock
> {
StructItem() : base_type() {}
StructItem (const XPathElement& value) : base_type(value) {}
StructItem (const StructBlock& value) : base_type(value) {}
using base_type::operator=;
std::vector<StructItem> xpaths;
};
struct ApplyBlock {