Making ApplyBlocks work with {{variable}} sources.

This commit is contained in:
King_DuckZ 2018-02-15 10:29:05 +00:00
parent b028e8c492
commit 5d2c5863a5
4 changed files with 110 additions and 29 deletions

View file

@ -1,17 +1,17 @@
from http://sid-story.wikia.com/wiki/Album
pages = //blah/blah/text()
pages = //section/header/h2/a/@href
end
apply {{sidian_info_model}} to {{pages}}
struct Sidians
sidian_name default("n/a") = //table[@class="wikitable sortable"]/tr/td[4]/a/text()
activ_probability default("0") = //table[@class="wikitable sortable"]/tr/td[3]/text()
apply {{test_mustache}} to {{pages}}
struct paragraphs
paragraph = //section/header/h2/a/text()
end
something_else = /html/head/text()
end
==sidian_info_model
{{#Sidians}}
{{sidian_name}} {{activ_probability}}
{{/Sidians}}
==test_mustache
Paragraphs: {{#paragraphs}}
- {{paragraph}}
{{/paragraphs}}
kthx bye!
==end

View file

@ -24,6 +24,12 @@
#include <utility>
#include <iostream>
//#define HTML_ALWAYS_STDIN
#if !defined(NDEBUG) && defined(HTML_ALWAYS_STDIN)
# define HTML_ALWAYS_STDIN_ENABLED
#endif
namespace duck {
HtmlPool::HtmlPool (std::string&& agent_name) :
m_agent(std::move(agent_name))
@ -33,7 +39,13 @@ namespace duck {
auto HtmlPool::OnResourceLoad (ResourceObjectParameterType parRes) -> ResourceType* {
std::unique_ptr<std::string> html;
std::cout << "Fetching html from \"" << parRes << "\"\n";
#if defined(HTML_ALWAYS_STDIN_ENABLED)
if (true) {
#else
if (parRes == "-") {
#endif
html = std::make_unique<std::string>(read_all(std::cin));
}
else {

View file

@ -29,6 +29,7 @@
#include <map>
#include <boost/variant/apply_visitor.hpp>
#include <string_view>
#include <list>
namespace std {
} //namespace std
@ -52,13 +53,14 @@ namespace duck { namespace sl {
using MustacheEntryMap = std::map<std::string, MustacheEntry>;
struct EntryNode {
EntryNode (const EntryNode&) = default;
explicit EntryNode (const std::string& parName) :
explicit EntryNode (const std::string_view& parName) :
name(parName)
{
}
EntryNode (EntryNode&&) = default;
EntryNode (const EntryNode&) = default;
EntryNode& operator= (EntryNode&&) = default;
EntryNode& operator= (const EntryNode&) = default;
std::string_view name;
std::vector<EntryNode> structs;
@ -85,6 +87,7 @@ namespace duck { namespace sl {
class StructItemExtractor : public boost::static_visitor<> {
public:
StructItemExtractor() = delete;
explicit StructItemExtractor (EntryNode& parRoot) :
m_root(parRoot)
{
@ -145,7 +148,8 @@ namespace duck { namespace sl {
EntryNode("")
));
store_entry_subtree(parVal.xpaths, m_global_entries.back().second);
EntryNode& curr_node = m_global_entries.back().second;
store_entry_subtree(parVal.xpaths, curr_node);
}
void operator() (const ApplyBlock& parVal) {
@ -232,6 +236,39 @@ namespace duck { namespace sl {
const std::size_t m_expected_size;
};
const std::vector<std::string>& query_xpath_by_name (
const EntryNodeList& parNodes,
const std::string_view& parName,
XPathRunner& parRunner
) {
for (auto& curr_node : parNodes) {
assert(curr_node.first);
const SourceInfo& source = *curr_node.first;
const EntryNode& entry = curr_node.second;
assert(entry.name.empty());
auto it_found = std::find_if(
entry.xpaths.begin(),
entry.xpaths.end(),
[&parName](const auto& xpath_elem) {
return xpath_elem->name == parName;
}
);
if (it_found != entry.xpaths.end()) {
const XPathElement* const val = *it_found;
assert(val);
return parRunner.query(source.value, val->xpath);
}
}
static const std::vector<std::string> empty_retval;
std::cout << "query_xpath_by_name(parNodes, \"" << parName <<
"\", parRunner) -> nothing found" << std::endl;
assert(false); //throw?
return empty_retval;
}
std::size_t largest_array_size_in (mstch::map& parMap) {
typedef ItemCountingVisitor ITC;
using boost::apply_visitor;
@ -318,11 +355,14 @@ namespace duck { namespace sl {
assert(entry.first);
std::string_view src_url;
if (SourceInfo::URL == entry.first->type) {
switch (entry.first->type) {
case SourceInfo::URL:
src_url = entry.first->value;
}
else {
assert(false); //not implemented
break;
case SourceInfo::Token:
default:
assert(false); //not reached
}
mstch::map curr_entry_map = to_mustache_dict_recursive(entry.second, src_url, parRunner);
@ -332,6 +372,26 @@ namespace duck { namespace sl {
return retval;
}
void exec_apply_block (
const SourceInfo& parSourceInfo,
const EntryNode& parEntryNode,
const MustacheEntry& parMustache,
XPathRunner& parXPathRunner
) {
EntryNodeList entry_node {std::make_pair(&parSourceInfo, parEntryNode)};
mstch::map entry_ctx = to_mustache_map(entry_node, parXPathRunner);
for (auto& ctx : parMustache.context) {
entry_ctx[ctx.first] = ctx.second;
}
std::cout << "context size: " << entry_ctx.size() << '\n';
for (auto& ctx_itm : entry_ctx) {
std::cout << '\t' << ctx_itm.first << '\n';
}
std::cout << mstch::render(parMustache.text, entry_ctx) << std::endl;
}
} //unnamed namespace
std::vector<std::string> apply (
@ -351,21 +411,30 @@ namespace duck { namespace sl {
std::cout << "-------------- visiting done ----------------\n";
XPathRunner xpath_runner(html_pool);
mstch::map mustache_ctx = to_mustache_map(global_entries, xpath_runner);
for (auto& apply_entry : apply_entries) {
EntryNodeList entry_node {std::make_pair(apply_entry.apply_to, apply_entry.content)};
mstch::map entry_ctx = to_mustache_map(entry_node, xpath_runner);
std::string name(apply_entry.mustache_name);
const auto& mustache = mustaches.at(name);
if (SourceInfo::Token == apply_entry.apply_to->type) {
std::vector<std::string> sources =
query_xpath_by_name(global_entries, apply_entry.apply_to->value, xpath_runner);
std::cout << "context size: " << entry_ctx.size() << '\n';
for (auto& ctx_itm : entry_ctx) {
std::cout << '\t' << ctx_itm.first << '\n';
for (auto& source : sources) {
SourceInfo new_source;
new_source.value = source;
new_source.type = SourceInfo::URL;
EntryNode new_node(apply_entry.content.name);
new_node.structs = apply_entry.content.structs;
new_node.xpaths = apply_entry.content.xpaths;
exec_apply_block(new_source, new_node, mustache, xpath_runner);
}
}
else {
assert(apply_entry.apply_to);
exec_apply_block(*apply_entry.apply_to, apply_entry.content, mustache, xpath_runner);
}
std::cout << "Raw mustache for \"" << name << "\":\n" <<
mustaches.at(name).text << "\nRendered mustache:\n";
std::cout << mstch::render(mustaches.at(name).text, entry_ctx) << std::endl;
}
return retval;

View file

@ -53,7 +53,7 @@ namespace duck { namespace sl {
std::string_view parSrc,
std::string_view parQuery
) {
std::cout << "XPathRunner::query()\n";
std::cout << "XPathRunner::query() - \"" << parQuery << "\"\n";
auto ins_retval = m_cached_results.insert(std::make_pair(XPathKey(parSrc, parQuery), std::vector<std::string>()));
const bool inserted = ins_retval.second;
assert(ins_retval.first != m_cached_results.end());
@ -68,7 +68,7 @@ namespace duck { namespace sl {
std::cout << "returning " << curr_vec.size() << " items: ";
for (auto& i : curr_vec) {
std:: cout << '"' << i << "\", ";
std::cout << '"' << i << "\", ";
}
std::cout << '\n';
return curr_vec;