Allow nesting of structs.
This commit is contained in:
parent
fcb25ed456
commit
3572803f66
3 changed files with 137 additions and 55 deletions
|
@ -33,15 +33,108 @@ namespace duck { namespace sl {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
struct XPathEntry {
|
struct ApplyEntry {
|
||||||
std::string value;
|
const SourceInfo* apply_to;
|
||||||
class DictBuilder : public boost::static_visitor<> {
|
const std::vector<StructItem>* content;
|
||||||
|
const std::string* mustache_name;
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef std::map<std::string, const XPathElement*> XPathMap;
|
||||||
|
|
||||||
|
class StructItemExtractor : public boost::static_visitor<> {
|
||||||
public:
|
public:
|
||||||
DictBuilder (
|
StructItemExtractor (std::string&& parPrefix, XPathMap& parMap) :
|
||||||
|
m_prefix(std::move(parPrefix)),
|
||||||
|
m_map(parMap)
|
||||||
|
{
|
||||||
|
if (not m_prefix.empty()) {
|
||||||
|
m_prefix += ".";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void operator() (const XPathElement& parVal) {
|
||||||
|
m_map[m_prefix + parVal.name] = &parVal;
|
||||||
|
}
|
||||||
|
|
||||||
|
void operator() (const StructBlock& parVal) {
|
||||||
|
StructItemExtractor visitor(
|
||||||
|
m_prefix + parVal.name,
|
||||||
|
m_map
|
||||||
|
);
|
||||||
|
for (auto& itm : parVal.xpaths) {
|
||||||
|
boost::apply_visitor(visitor, itm);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
XPathSink m_xpaths;
|
std::string m_prefix;
|
||||||
std::map<std::string, std::string> m_vars;
|
XPathMap& m_map;
|
||||||
|
};
|
||||||
|
|
||||||
|
class DictBuilder : public boost::static_visitor<> {
|
||||||
|
public:
|
||||||
|
explicit DictBuilder (HtmlPoolBaseSP parHtmlPool) :
|
||||||
|
m_current_mustache_name(nullptr),
|
||||||
|
m_current_mustache(nullptr)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
void operator() (const std::vector<ScrapNode>& parVal) {
|
||||||
|
for (auto& val : parVal) {
|
||||||
|
boost::apply_visitor(*this, val);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void operator() (const FromBlock& parVal) {
|
||||||
|
#if defined(APPLY_VERBOSE)
|
||||||
|
std::cout << parVal << '\n';
|
||||||
|
#endif
|
||||||
|
for (auto& itm : parVal.xpaths) {
|
||||||
|
StructItemExtractor extractor("", m_global_entries);
|
||||||
|
boost::apply_visitor(extractor, itm);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void operator() (const ApplyBlock& parVal) {
|
||||||
|
#if defined(APPLY_VERBOSE)
|
||||||
|
std::cout << parVal << '\n';
|
||||||
|
#endif
|
||||||
|
m_apply_entries.push_back(ApplyEntry {
|
||||||
|
&parVal.source,
|
||||||
|
&parVal.xpaths,
|
||||||
|
&parVal.mustache_model
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
void operator() (const MustacheBlock& parVal) {
|
||||||
|
#if defined(APPLY_VERBOSE)
|
||||||
|
std::cout << "Mustache block \"" << parVal.name << "\"\n";
|
||||||
|
#endif
|
||||||
|
const auto curr_name = m_current_mustache_name;
|
||||||
|
|
||||||
|
if (not curr_name or *curr_name != parVal.name) {
|
||||||
|
m_mustaches[parVal.name] = MustacheEntry();
|
||||||
|
auto it_found = m_mustaches.find(parVal.name);
|
||||||
|
m_current_mustache_name = &it_found->first;
|
||||||
|
m_current_mustache = &it_found->second;
|
||||||
|
}
|
||||||
|
|
||||||
|
m_current_mustache->text = parVal.content;
|
||||||
|
}
|
||||||
|
|
||||||
|
const XPathMap& global_entries() const { return m_global_entries; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
struct MustacheEntry {
|
||||||
|
std::string text;
|
||||||
|
mstch::map context;
|
||||||
|
};
|
||||||
|
|
||||||
|
XPathMap m_global_entries;
|
||||||
|
std::vector<ApplyEntry> m_apply_entries;
|
||||||
|
std::map<std::string, MustacheEntry> m_mustaches;
|
||||||
|
const std::string* m_current_mustache_name;
|
||||||
|
MustacheEntry* m_current_mustache;
|
||||||
};
|
};
|
||||||
} //unnamed namespace
|
} //unnamed namespace
|
||||||
|
|
||||||
|
@ -52,5 +145,14 @@ namespace duck { namespace sl {
|
||||||
DictBuilder dict_builder(html_pool);
|
DictBuilder dict_builder(html_pool);
|
||||||
|
|
||||||
boost::apply_visitor(dict_builder, node);
|
boost::apply_visitor(dict_builder, node);
|
||||||
|
|
||||||
|
|
||||||
|
std::cout << "-------------- visiting done ----------------\n";
|
||||||
|
for (auto& itm : dict_builder.global_entries()) {
|
||||||
|
std::cout << "item: \"" << itm.first << "\", \"" <<
|
||||||
|
itm.second->xpath << "\"\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
return std::vector<std::string>();
|
||||||
}
|
}
|
||||||
}} //namespace duck::sl
|
}} //namespace duck::sl
|
||||||
|
|
|
@ -41,12 +41,12 @@ BOOST_FUSION_ADAPT_STRUCT(
|
||||||
BOOST_FUSION_ADAPT_STRUCT(
|
BOOST_FUSION_ADAPT_STRUCT(
|
||||||
duck::sl::FromBlock,
|
duck::sl::FromBlock,
|
||||||
(duck::sl::SourceInfo, source)
|
(duck::sl::SourceInfo, source)
|
||||||
(std::vector<duck::sl::XPathElement>, xpaths)
|
(std::vector<duck::sl::StructItem>, xpaths)
|
||||||
)
|
)
|
||||||
BOOST_FUSION_ADAPT_STRUCT(
|
BOOST_FUSION_ADAPT_STRUCT(
|
||||||
duck::sl::StructBlock,
|
duck::sl::StructBlock,
|
||||||
(std::string, name)
|
(std::string, name)
|
||||||
(std::vector<duck::sl::XPathElement>, xpaths)
|
(std::vector<duck::sl::StructItem>, xpaths)
|
||||||
)
|
)
|
||||||
BOOST_FUSION_ADAPT_STRUCT(
|
BOOST_FUSION_ADAPT_STRUCT(
|
||||||
duck::sl::ApplyBlock,
|
duck::sl::ApplyBlock,
|
||||||
|
@ -68,18 +68,6 @@ BOOST_FUSION_ADAPT_STRUCT(
|
||||||
|
|
||||||
namespace duck { namespace sl {
|
namespace duck { namespace sl {
|
||||||
namespace {
|
namespace {
|
||||||
struct ElementTypeSymbol : qi::symbols<char, ElementTypes> {
|
|
||||||
ElementTypeSymbol() {
|
|
||||||
add
|
|
||||||
("string", ElementType_String)
|
|
||||||
("integer", ElementType_Integer)
|
|
||||||
("boolean", ElementType_Boolean)
|
|
||||||
("null", ElementType_Null)
|
|
||||||
("double", ElementType_Double)
|
|
||||||
;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
template <typename I, typename Skipper>
|
template <typename I, typename Skipper>
|
||||||
class ScrapGrammar : public qi::grammar<I, std::vector<ScrapNode>(), Skipper> {
|
class ScrapGrammar : public qi::grammar<I, std::vector<ScrapNode>(), Skipper> {
|
||||||
public:
|
public:
|
||||||
|
@ -98,20 +86,24 @@ namespace duck { namespace sl {
|
||||||
using qi::no_skip;
|
using qi::no_skip;
|
||||||
|
|
||||||
start = *eol >> (from_block | apply_block | mustache_block) % +eol >> *eol >> eoi;
|
start = *eol >> (from_block | apply_block | mustache_block) % +eol >> *eol >> eoi;
|
||||||
from_block = lit("from") >> source_info >> +eol >> (xpath_assignment % +eol) >> +eol >> "end";
|
from_block = lit("from") >> source_info >> +eol >> assignment_list >> +eol >> "end";
|
||||||
source_info = (url >> attr(SourceInfo::URL)) | (mustache_like_token >> attr(SourceInfo::Token));
|
source_info = (url >> attr(SourceInfo::URL)) | (mustache_like_token >> attr(SourceInfo::Token));
|
||||||
url = -(+alpha >> string("://")) >> alpha >> *graph;
|
url = -(+alpha >> string("://")) >> alpha >> *graph;
|
||||||
mustache_like_token = "{{" >> identifier >> "}}";
|
mustache_like_token = "{{" >> identifier >> "}}";
|
||||||
quoted_string %= lexeme['"' >> +(char_ - '"') >> '"'];
|
quoted_string %= lexeme['"' >> +(char_ - '"') >> '"'];
|
||||||
xpath_assignment = identifier >> -(lit("default") >> '(' >> quoted_string >> ')') >> "=" >> +graph;
|
xpath_assignment %= identifier >>
|
||||||
|
-(lit("default") >> '(' >> quoted_string >> ')') >> "=" >>
|
||||||
|
as_string[lexeme[+(graph | char_(" \t"))]];
|
||||||
identifier %= lexeme[(alpha | char_('_')) >> *(alnum | char_('_'))];
|
identifier %= lexeme[(alpha | char_('_')) >> *(alnum | char_('_'))];
|
||||||
|
|
||||||
apply_block = lit("apply") >> mustache_like_token >> "to" >> source_info >> +eol >>
|
apply_block = lit("apply") >> mustache_like_token >> "to" >> source_info >> +eol >>
|
||||||
((xpath_assignment | struct_block) % +eol) >> +eol >> "end";
|
assignment_list >> +eol >> "end";
|
||||||
struct_block = "struct" >> identifier >> +eol >> (xpath_assignment % +eol) >> +eol >> "end";
|
struct_block = "struct" >> identifier >> +eol >> assignment_list >> +eol >> "end";
|
||||||
|
|
||||||
mustache_block %= as_string[lit("==") >> identifier] >> eol >>
|
mustache_block %= as_string[lit("==") >> identifier] >> eol >>
|
||||||
as_string[no_skip[+(!lit("==end") >> char_)]] >> "==end";
|
as_string[no_skip[+(!lit("==end") >> char_)]] >> "==end";
|
||||||
|
|
||||||
|
assignment_list = (xpath_assignment | struct_block) % +eol;
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
@ -129,6 +121,7 @@ namespace duck { namespace sl {
|
||||||
RuleType<ApplyBlock()> apply_block;
|
RuleType<ApplyBlock()> apply_block;
|
||||||
RuleType<StructBlock()> struct_block;
|
RuleType<StructBlock()> struct_block;
|
||||||
RuleType<MustacheBlock()> mustache_block;
|
RuleType<MustacheBlock()> mustache_block;
|
||||||
|
RuleType<std::vector<StructItem>()> assignment_list;
|
||||||
};
|
};
|
||||||
} //unnamed namespace
|
} //unnamed namespace
|
||||||
|
|
||||||
|
@ -150,19 +143,4 @@ namespace duck { namespace sl {
|
||||||
}
|
}
|
||||||
return retval;
|
return retval;
|
||||||
}
|
}
|
||||||
|
|
||||||
// std::vector<element_def> get_xpath_definitions (const ScrapNode& parAST) {
|
|
||||||
// std::vector<element_def> retval;
|
|
||||||
// implem::XPathVisitor xpath_vis(&retval);
|
|
||||||
// boost::apply_visitor(xpath_vis, parAST);
|
|
||||||
// return std::move(retval);
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// void print_results (std::ostream& parOut, const ScrapNode& parAST, const std::vector<element_def>& parOutcome, const ResultList& parResList) {
|
|
||||||
//#if !defined(NDEBUG)
|
|
||||||
// std::cout << "print_results()...\n";
|
|
||||||
//#endif
|
|
||||||
// implem::ResultPrinter printer(&parOut, &parOutcome, & parResList);
|
|
||||||
// boost::apply_visitor(printer, parAST);
|
|
||||||
// }
|
|
||||||
}} //namespace duck::sl
|
}} //namespace duck::sl
|
||||||
|
|
|
@ -34,6 +34,23 @@ namespace duck { namespace sl {
|
||||||
std::string xpath;
|
std::string xpath;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct StructBlock;
|
||||||
|
|
||||||
|
struct StructItem : boost::spirit::extended_variant<
|
||||||
|
XPathElement,
|
||||||
|
boost::recursive_wrapper<StructBlock>
|
||||||
|
> {
|
||||||
|
StructItem() : base_type() {}
|
||||||
|
StructItem (const XPathElement& value) : base_type(value) {}
|
||||||
|
StructItem (const StructBlock& value) : base_type(value) {}
|
||||||
|
using base_type::operator=;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct StructBlock {
|
||||||
|
std::string name;
|
||||||
|
std::vector<StructItem> xpaths;
|
||||||
|
};
|
||||||
|
|
||||||
struct SourceInfo {
|
struct SourceInfo {
|
||||||
enum Type { URL, Token };
|
enum Type { URL, Token };
|
||||||
|
|
||||||
|
@ -43,22 +60,7 @@ namespace duck { namespace sl {
|
||||||
|
|
||||||
struct FromBlock {
|
struct FromBlock {
|
||||||
SourceInfo source;
|
SourceInfo source;
|
||||||
std::vector<XPathElement> xpaths;
|
std::vector<StructItem> xpaths;
|
||||||
};
|
|
||||||
|
|
||||||
struct StructBlock {
|
|
||||||
std::string name;
|
|
||||||
std::vector<XPathElement> xpaths;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct StructItem : boost::spirit::extended_variant<
|
|
||||||
XPathElement,
|
|
||||||
StructBlock
|
|
||||||
> {
|
|
||||||
StructItem() : base_type() {}
|
|
||||||
StructItem (const XPathElement& value) : base_type(value) {}
|
|
||||||
StructItem (const StructBlock& value) : base_type(value) {}
|
|
||||||
using base_type::operator=;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
struct ApplyBlock {
|
struct ApplyBlock {
|
||||||
|
|
Loading…
Add table
Reference in a new issue