Making ApplyBlocks work with {{variable}} sources.
This commit is contained in:
parent
b028e8c492
commit
5d2c5863a5
4 changed files with 110 additions and 29 deletions
20
sample.scrap
20
sample.scrap
|
@ -1,17 +1,17 @@
|
|||
from http://sid-story.wikia.com/wiki/Album
|
||||
pages = //blah/blah/text()
|
||||
pages = //section/header/h2/a/@href
|
||||
end
|
||||
|
||||
apply {{sidian_info_model}} to {{pages}}
|
||||
struct Sidians
|
||||
sidian_name default("n/a") = //table[@class="wikitable sortable"]/tr/td[4]/a/text()
|
||||
activ_probability default("0") = //table[@class="wikitable sortable"]/tr/td[3]/text()
|
||||
apply {{test_mustache}} to {{pages}}
|
||||
struct paragraphs
|
||||
paragraph = //section/header/h2/a/text()
|
||||
end
|
||||
something_else = /html/head/text()
|
||||
end
|
||||
|
||||
==sidian_info_model
|
||||
{{#Sidians}}
|
||||
{{sidian_name}} {{activ_probability}}
|
||||
{{/Sidians}}
|
||||
==test_mustache
|
||||
Paragraphs: {{#paragraphs}}
|
||||
- {{paragraph}}
|
||||
{{/paragraphs}}
|
||||
|
||||
kthx bye!
|
||||
==end
|
||||
|
|
|
@ -24,6 +24,12 @@
|
|||
#include <utility>
|
||||
#include <iostream>
|
||||
|
||||
//#define HTML_ALWAYS_STDIN
|
||||
|
||||
#if !defined(NDEBUG) && defined(HTML_ALWAYS_STDIN)
|
||||
# define HTML_ALWAYS_STDIN_ENABLED
|
||||
#endif
|
||||
|
||||
namespace duck {
|
||||
HtmlPool::HtmlPool (std::string&& agent_name) :
|
||||
m_agent(std::move(agent_name))
|
||||
|
@ -33,7 +39,13 @@ namespace duck {
|
|||
auto HtmlPool::OnResourceLoad (ResourceObjectParameterType parRes) -> ResourceType* {
|
||||
std::unique_ptr<std::string> html;
|
||||
|
||||
std::cout << "Fetching html from \"" << parRes << "\"\n";
|
||||
|
||||
#if defined(HTML_ALWAYS_STDIN_ENABLED)
|
||||
if (true) {
|
||||
#else
|
||||
if (parRes == "-") {
|
||||
#endif
|
||||
html = std::make_unique<std::string>(read_all(std::cin));
|
||||
}
|
||||
else {
|
||||
|
|
|
@ -29,6 +29,7 @@
|
|||
#include <map>
|
||||
#include <boost/variant/apply_visitor.hpp>
|
||||
#include <string_view>
|
||||
#include <list>
|
||||
|
||||
namespace std {
|
||||
} //namespace std
|
||||
|
@ -52,13 +53,14 @@ namespace duck { namespace sl {
|
|||
using MustacheEntryMap = std::map<std::string, MustacheEntry>;
|
||||
|
||||
struct EntryNode {
|
||||
EntryNode (const EntryNode&) = default;
|
||||
explicit EntryNode (const std::string& parName) :
|
||||
explicit EntryNode (const std::string_view& parName) :
|
||||
name(parName)
|
||||
{
|
||||
}
|
||||
EntryNode (EntryNode&&) = default;
|
||||
EntryNode (const EntryNode&) = default;
|
||||
EntryNode& operator= (EntryNode&&) = default;
|
||||
EntryNode& operator= (const EntryNode&) = default;
|
||||
|
||||
std::string_view name;
|
||||
std::vector<EntryNode> structs;
|
||||
|
@ -85,6 +87,7 @@ namespace duck { namespace sl {
|
|||
|
||||
class StructItemExtractor : public boost::static_visitor<> {
|
||||
public:
|
||||
StructItemExtractor() = delete;
|
||||
explicit StructItemExtractor (EntryNode& parRoot) :
|
||||
m_root(parRoot)
|
||||
{
|
||||
|
@ -145,7 +148,8 @@ namespace duck { namespace sl {
|
|||
EntryNode("")
|
||||
));
|
||||
|
||||
store_entry_subtree(parVal.xpaths, m_global_entries.back().second);
|
||||
EntryNode& curr_node = m_global_entries.back().second;
|
||||
store_entry_subtree(parVal.xpaths, curr_node);
|
||||
}
|
||||
|
||||
void operator() (const ApplyBlock& parVal) {
|
||||
|
@ -232,6 +236,39 @@ namespace duck { namespace sl {
|
|||
const std::size_t m_expected_size;
|
||||
};
|
||||
|
||||
const std::vector<std::string>& query_xpath_by_name (
|
||||
const EntryNodeList& parNodes,
|
||||
const std::string_view& parName,
|
||||
XPathRunner& parRunner
|
||||
) {
|
||||
for (auto& curr_node : parNodes) {
|
||||
assert(curr_node.first);
|
||||
const SourceInfo& source = *curr_node.first;
|
||||
const EntryNode& entry = curr_node.second;
|
||||
assert(entry.name.empty());
|
||||
|
||||
auto it_found = std::find_if(
|
||||
entry.xpaths.begin(),
|
||||
entry.xpaths.end(),
|
||||
[&parName](const auto& xpath_elem) {
|
||||
return xpath_elem->name == parName;
|
||||
}
|
||||
);
|
||||
|
||||
if (it_found != entry.xpaths.end()) {
|
||||
const XPathElement* const val = *it_found;
|
||||
assert(val);
|
||||
return parRunner.query(source.value, val->xpath);
|
||||
}
|
||||
}
|
||||
|
||||
static const std::vector<std::string> empty_retval;
|
||||
std::cout << "query_xpath_by_name(parNodes, \"" << parName <<
|
||||
"\", parRunner) -> nothing found" << std::endl;
|
||||
assert(false); //throw?
|
||||
return empty_retval;
|
||||
}
|
||||
|
||||
std::size_t largest_array_size_in (mstch::map& parMap) {
|
||||
typedef ItemCountingVisitor ITC;
|
||||
using boost::apply_visitor;
|
||||
|
@ -318,11 +355,14 @@ namespace duck { namespace sl {
|
|||
|
||||
assert(entry.first);
|
||||
std::string_view src_url;
|
||||
if (SourceInfo::URL == entry.first->type) {
|
||||
|
||||
switch (entry.first->type) {
|
||||
case SourceInfo::URL:
|
||||
src_url = entry.first->value;
|
||||
}
|
||||
else {
|
||||
assert(false); //not implemented
|
||||
break;
|
||||
case SourceInfo::Token:
|
||||
default:
|
||||
assert(false); //not reached
|
||||
}
|
||||
|
||||
mstch::map curr_entry_map = to_mustache_dict_recursive(entry.second, src_url, parRunner);
|
||||
|
@ -332,6 +372,26 @@ namespace duck { namespace sl {
|
|||
|
||||
return retval;
|
||||
}
|
||||
|
||||
void exec_apply_block (
|
||||
const SourceInfo& parSourceInfo,
|
||||
const EntryNode& parEntryNode,
|
||||
const MustacheEntry& parMustache,
|
||||
XPathRunner& parXPathRunner
|
||||
) {
|
||||
EntryNodeList entry_node {std::make_pair(&parSourceInfo, parEntryNode)};
|
||||
mstch::map entry_ctx = to_mustache_map(entry_node, parXPathRunner);
|
||||
for (auto& ctx : parMustache.context) {
|
||||
entry_ctx[ctx.first] = ctx.second;
|
||||
}
|
||||
|
||||
std::cout << "context size: " << entry_ctx.size() << '\n';
|
||||
for (auto& ctx_itm : entry_ctx) {
|
||||
std::cout << '\t' << ctx_itm.first << '\n';
|
||||
}
|
||||
|
||||
std::cout << mstch::render(parMustache.text, entry_ctx) << std::endl;
|
||||
}
|
||||
} //unnamed namespace
|
||||
|
||||
std::vector<std::string> apply (
|
||||
|
@ -351,21 +411,30 @@ namespace duck { namespace sl {
|
|||
|
||||
std::cout << "-------------- visiting done ----------------\n";
|
||||
XPathRunner xpath_runner(html_pool);
|
||||
mstch::map mustache_ctx = to_mustache_map(global_entries, xpath_runner);
|
||||
|
||||
for (auto& apply_entry : apply_entries) {
|
||||
EntryNodeList entry_node {std::make_pair(apply_entry.apply_to, apply_entry.content)};
|
||||
mstch::map entry_ctx = to_mustache_map(entry_node, xpath_runner);
|
||||
std::string name(apply_entry.mustache_name);
|
||||
const auto& mustache = mustaches.at(name);
|
||||
if (SourceInfo::Token == apply_entry.apply_to->type) {
|
||||
std::vector<std::string> sources =
|
||||
query_xpath_by_name(global_entries, apply_entry.apply_to->value, xpath_runner);
|
||||
|
||||
std::cout << "context size: " << entry_ctx.size() << '\n';
|
||||
for (auto& ctx_itm : entry_ctx) {
|
||||
std::cout << '\t' << ctx_itm.first << '\n';
|
||||
for (auto& source : sources) {
|
||||
SourceInfo new_source;
|
||||
new_source.value = source;
|
||||
new_source.type = SourceInfo::URL;
|
||||
|
||||
EntryNode new_node(apply_entry.content.name);
|
||||
new_node.structs = apply_entry.content.structs;
|
||||
new_node.xpaths = apply_entry.content.xpaths;
|
||||
|
||||
exec_apply_block(new_source, new_node, mustache, xpath_runner);
|
||||
}
|
||||
}
|
||||
else {
|
||||
assert(apply_entry.apply_to);
|
||||
exec_apply_block(*apply_entry.apply_to, apply_entry.content, mustache, xpath_runner);
|
||||
}
|
||||
std::cout << "Raw mustache for \"" << name << "\":\n" <<
|
||||
mustaches.at(name).text << "\nRendered mustache:\n";
|
||||
|
||||
std::cout << mstch::render(mustaches.at(name).text, entry_ctx) << std::endl;
|
||||
}
|
||||
|
||||
return retval;
|
||||
|
|
|
@ -53,7 +53,7 @@ namespace duck { namespace sl {
|
|||
std::string_view parSrc,
|
||||
std::string_view parQuery
|
||||
) {
|
||||
std::cout << "XPathRunner::query()\n";
|
||||
std::cout << "XPathRunner::query() - \"" << parQuery << "\"\n";
|
||||
auto ins_retval = m_cached_results.insert(std::make_pair(XPathKey(parSrc, parQuery), std::vector<std::string>()));
|
||||
const bool inserted = ins_retval.second;
|
||||
assert(ins_retval.first != m_cached_results.end());
|
||||
|
@ -68,7 +68,7 @@ namespace duck { namespace sl {
|
|||
|
||||
std::cout << "returning " << curr_vec.size() << " items: ";
|
||||
for (auto& i : curr_vec) {
|
||||
std:: cout << '"' << i << "\", ";
|
||||
std::cout << '"' << i << "\", ";
|
||||
}
|
||||
std::cout << '\n';
|
||||
return curr_vec;
|
||||
|
|
Loading…
Add table
Reference in a new issue