Allow nesting of structs.
This commit is contained in:
parent
fcb25ed456
commit
3572803f66
3 changed files with 137 additions and 55 deletions
|
@ -33,15 +33,108 @@ namespace duck { namespace sl {
|
|||
#endif
|
||||
|
||||
namespace {
|
||||
struct XPathEntry {
|
||||
std::string value;
|
||||
class DictBuilder : public boost::static_visitor<> {
|
||||
struct ApplyEntry {
|
||||
const SourceInfo* apply_to;
|
||||
const std::vector<StructItem>* content;
|
||||
const std::string* mustache_name;
|
||||
};
|
||||
|
||||
typedef std::map<std::string, const XPathElement*> XPathMap;
|
||||
|
||||
class StructItemExtractor : public boost::static_visitor<> {
|
||||
public:
|
||||
DictBuilder (
|
||||
StructItemExtractor (std::string&& parPrefix, XPathMap& parMap) :
|
||||
m_prefix(std::move(parPrefix)),
|
||||
m_map(parMap)
|
||||
{
|
||||
if (not m_prefix.empty()) {
|
||||
m_prefix += ".";
|
||||
}
|
||||
}
|
||||
|
||||
void operator() (const XPathElement& parVal) {
|
||||
m_map[m_prefix + parVal.name] = &parVal;
|
||||
}
|
||||
|
||||
void operator() (const StructBlock& parVal) {
|
||||
StructItemExtractor visitor(
|
||||
m_prefix + parVal.name,
|
||||
m_map
|
||||
);
|
||||
for (auto& itm : parVal.xpaths) {
|
||||
boost::apply_visitor(visitor, itm);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
XPathSink m_xpaths;
|
||||
std::map<std::string, std::string> m_vars;
|
||||
std::string m_prefix;
|
||||
XPathMap& m_map;
|
||||
};
|
||||
|
||||
class DictBuilder : public boost::static_visitor<> {
|
||||
public:
|
||||
explicit DictBuilder (HtmlPoolBaseSP parHtmlPool) :
|
||||
m_current_mustache_name(nullptr),
|
||||
m_current_mustache(nullptr)
|
||||
{
|
||||
}
|
||||
|
||||
void operator() (const std::vector<ScrapNode>& parVal) {
|
||||
for (auto& val : parVal) {
|
||||
boost::apply_visitor(*this, val);
|
||||
}
|
||||
}
|
||||
|
||||
void operator() (const FromBlock& parVal) {
|
||||
#if defined(APPLY_VERBOSE)
|
||||
std::cout << parVal << '\n';
|
||||
#endif
|
||||
for (auto& itm : parVal.xpaths) {
|
||||
StructItemExtractor extractor("", m_global_entries);
|
||||
boost::apply_visitor(extractor, itm);
|
||||
}
|
||||
}
|
||||
|
||||
void operator() (const ApplyBlock& parVal) {
|
||||
#if defined(APPLY_VERBOSE)
|
||||
std::cout << parVal << '\n';
|
||||
#endif
|
||||
m_apply_entries.push_back(ApplyEntry {
|
||||
&parVal.source,
|
||||
&parVal.xpaths,
|
||||
&parVal.mustache_model
|
||||
});
|
||||
}
|
||||
|
||||
void operator() (const MustacheBlock& parVal) {
|
||||
#if defined(APPLY_VERBOSE)
|
||||
std::cout << "Mustache block \"" << parVal.name << "\"\n";
|
||||
#endif
|
||||
const auto curr_name = m_current_mustache_name;
|
||||
|
||||
if (not curr_name or *curr_name != parVal.name) {
|
||||
m_mustaches[parVal.name] = MustacheEntry();
|
||||
auto it_found = m_mustaches.find(parVal.name);
|
||||
m_current_mustache_name = &it_found->first;
|
||||
m_current_mustache = &it_found->second;
|
||||
}
|
||||
|
||||
m_current_mustache->text = parVal.content;
|
||||
}
|
||||
|
||||
const XPathMap& global_entries() const { return m_global_entries; }
|
||||
|
||||
private:
|
||||
struct MustacheEntry {
|
||||
std::string text;
|
||||
mstch::map context;
|
||||
};
|
||||
|
||||
XPathMap m_global_entries;
|
||||
std::vector<ApplyEntry> m_apply_entries;
|
||||
std::map<std::string, MustacheEntry> m_mustaches;
|
||||
const std::string* m_current_mustache_name;
|
||||
MustacheEntry* m_current_mustache;
|
||||
};
|
||||
} //unnamed namespace
|
||||
|
||||
|
@ -52,5 +145,14 @@ namespace duck { namespace sl {
|
|||
DictBuilder dict_builder(html_pool);
|
||||
|
||||
boost::apply_visitor(dict_builder, node);
|
||||
|
||||
|
||||
std::cout << "-------------- visiting done ----------------\n";
|
||||
for (auto& itm : dict_builder.global_entries()) {
|
||||
std::cout << "item: \"" << itm.first << "\", \"" <<
|
||||
itm.second->xpath << "\"\n";
|
||||
}
|
||||
|
||||
return std::vector<std::string>();
|
||||
}
|
||||
}} //namespace duck::sl
|
||||
|
|
|
@ -41,12 +41,12 @@ BOOST_FUSION_ADAPT_STRUCT(
|
|||
BOOST_FUSION_ADAPT_STRUCT(
|
||||
duck::sl::FromBlock,
|
||||
(duck::sl::SourceInfo, source)
|
||||
(std::vector<duck::sl::XPathElement>, xpaths)
|
||||
(std::vector<duck::sl::StructItem>, xpaths)
|
||||
)
|
||||
BOOST_FUSION_ADAPT_STRUCT(
|
||||
duck::sl::StructBlock,
|
||||
(std::string, name)
|
||||
(std::vector<duck::sl::XPathElement>, xpaths)
|
||||
(std::vector<duck::sl::StructItem>, xpaths)
|
||||
)
|
||||
BOOST_FUSION_ADAPT_STRUCT(
|
||||
duck::sl::ApplyBlock,
|
||||
|
@ -68,18 +68,6 @@ BOOST_FUSION_ADAPT_STRUCT(
|
|||
|
||||
namespace duck { namespace sl {
|
||||
namespace {
|
||||
struct ElementTypeSymbol : qi::symbols<char, ElementTypes> {
|
||||
ElementTypeSymbol() {
|
||||
add
|
||||
("string", ElementType_String)
|
||||
("integer", ElementType_Integer)
|
||||
("boolean", ElementType_Boolean)
|
||||
("null", ElementType_Null)
|
||||
("double", ElementType_Double)
|
||||
;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename I, typename Skipper>
|
||||
class ScrapGrammar : public qi::grammar<I, std::vector<ScrapNode>(), Skipper> {
|
||||
public:
|
||||
|
@ -98,20 +86,24 @@ namespace duck { namespace sl {
|
|||
using qi::no_skip;
|
||||
|
||||
start = *eol >> (from_block | apply_block | mustache_block) % +eol >> *eol >> eoi;
|
||||
from_block = lit("from") >> source_info >> +eol >> (xpath_assignment % +eol) >> +eol >> "end";
|
||||
from_block = lit("from") >> source_info >> +eol >> assignment_list >> +eol >> "end";
|
||||
source_info = (url >> attr(SourceInfo::URL)) | (mustache_like_token >> attr(SourceInfo::Token));
|
||||
url = -(+alpha >> string("://")) >> alpha >> *graph;
|
||||
mustache_like_token = "{{" >> identifier >> "}}";
|
||||
quoted_string %= lexeme['"' >> +(char_ - '"') >> '"'];
|
||||
xpath_assignment = identifier >> -(lit("default") >> '(' >> quoted_string >> ')') >> "=" >> +graph;
|
||||
xpath_assignment %= identifier >>
|
||||
-(lit("default") >> '(' >> quoted_string >> ')') >> "=" >>
|
||||
as_string[lexeme[+(graph | char_(" \t"))]];
|
||||
identifier %= lexeme[(alpha | char_('_')) >> *(alnum | char_('_'))];
|
||||
|
||||
apply_block = lit("apply") >> mustache_like_token >> "to" >> source_info >> +eol >>
|
||||
((xpath_assignment | struct_block) % +eol) >> +eol >> "end";
|
||||
struct_block = "struct" >> identifier >> +eol >> (xpath_assignment % +eol) >> +eol >> "end";
|
||||
assignment_list >> +eol >> "end";
|
||||
struct_block = "struct" >> identifier >> +eol >> assignment_list >> +eol >> "end";
|
||||
|
||||
mustache_block %= as_string[lit("==") >> identifier] >> eol >>
|
||||
as_string[no_skip[+(!lit("==end") >> char_)]] >> "==end";
|
||||
|
||||
assignment_list = (xpath_assignment | struct_block) % +eol;
|
||||
}
|
||||
|
||||
private:
|
||||
|
@ -129,6 +121,7 @@ namespace duck { namespace sl {
|
|||
RuleType<ApplyBlock()> apply_block;
|
||||
RuleType<StructBlock()> struct_block;
|
||||
RuleType<MustacheBlock()> mustache_block;
|
||||
RuleType<std::vector<StructItem>()> assignment_list;
|
||||
};
|
||||
} //unnamed namespace
|
||||
|
||||
|
@ -150,19 +143,4 @@ namespace duck { namespace sl {
|
|||
}
|
||||
return retval;
|
||||
}
|
||||
|
||||
// std::vector<element_def> get_xpath_definitions (const ScrapNode& parAST) {
|
||||
// std::vector<element_def> retval;
|
||||
// implem::XPathVisitor xpath_vis(&retval);
|
||||
// boost::apply_visitor(xpath_vis, parAST);
|
||||
// return std::move(retval);
|
||||
// }
|
||||
//
|
||||
// void print_results (std::ostream& parOut, const ScrapNode& parAST, const std::vector<element_def>& parOutcome, const ResultList& parResList) {
|
||||
//#if !defined(NDEBUG)
|
||||
// std::cout << "print_results()...\n";
|
||||
//#endif
|
||||
// implem::ResultPrinter printer(&parOut, &parOutcome, & parResList);
|
||||
// boost::apply_visitor(printer, parAST);
|
||||
// }
|
||||
}} //namespace duck::sl
|
||||
|
|
|
@ -34,6 +34,23 @@ namespace duck { namespace sl {
|
|||
std::string xpath;
|
||||
};
|
||||
|
||||
struct StructBlock;
|
||||
|
||||
struct StructItem : boost::spirit::extended_variant<
|
||||
XPathElement,
|
||||
boost::recursive_wrapper<StructBlock>
|
||||
> {
|
||||
StructItem() : base_type() {}
|
||||
StructItem (const XPathElement& value) : base_type(value) {}
|
||||
StructItem (const StructBlock& value) : base_type(value) {}
|
||||
using base_type::operator=;
|
||||
};
|
||||
|
||||
struct StructBlock {
|
||||
std::string name;
|
||||
std::vector<StructItem> xpaths;
|
||||
};
|
||||
|
||||
struct SourceInfo {
|
||||
enum Type { URL, Token };
|
||||
|
||||
|
@ -43,22 +60,7 @@ namespace duck { namespace sl {
|
|||
|
||||
struct FromBlock {
|
||||
SourceInfo source;
|
||||
std::vector<XPathElement> xpaths;
|
||||
};
|
||||
|
||||
struct StructBlock {
|
||||
std::string name;
|
||||
std::vector<XPathElement> xpaths;
|
||||
};
|
||||
|
||||
struct StructItem : boost::spirit::extended_variant<
|
||||
XPathElement,
|
||||
StructBlock
|
||||
> {
|
||||
StructItem() : base_type() {}
|
||||
StructItem (const XPathElement& value) : base_type(value) {}
|
||||
StructItem (const StructBlock& value) : base_type(value) {}
|
||||
using base_type::operator=;
|
||||
std::vector<StructItem> xpaths;
|
||||
};
|
||||
|
||||
struct ApplyBlock {
|
||||
|
|
Loading…
Reference in a new issue