Making ApplyBlocks work with {{variable}} sources.
This commit is contained in:
parent
b028e8c492
commit
5d2c5863a5
4 changed files with 110 additions and 29 deletions
20
sample.scrap
20
sample.scrap
|
@ -1,17 +1,17 @@
|
||||||
from http://sid-story.wikia.com/wiki/Album
|
from http://sid-story.wikia.com/wiki/Album
|
||||||
pages = //blah/blah/text()
|
pages = //section/header/h2/a/@href
|
||||||
end
|
end
|
||||||
|
|
||||||
apply {{sidian_info_model}} to {{pages}}
|
apply {{test_mustache}} to {{pages}}
|
||||||
struct Sidians
|
struct paragraphs
|
||||||
sidian_name default("n/a") = //table[@class="wikitable sortable"]/tr/td[4]/a/text()
|
paragraph = //section/header/h2/a/text()
|
||||||
activ_probability default("0") = //table[@class="wikitable sortable"]/tr/td[3]/text()
|
|
||||||
end
|
end
|
||||||
something_else = /html/head/text()
|
|
||||||
end
|
end
|
||||||
|
|
||||||
==sidian_info_model
|
==test_mustache
|
||||||
{{#Sidians}}
|
Paragraphs: {{#paragraphs}}
|
||||||
{{sidian_name}} {{activ_probability}}
|
- {{paragraph}}
|
||||||
{{/Sidians}}
|
{{/paragraphs}}
|
||||||
|
|
||||||
|
kthx bye!
|
||||||
==end
|
==end
|
||||||
|
|
|
@ -24,6 +24,12 @@
|
||||||
#include <utility>
|
#include <utility>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
|
||||||
|
//#define HTML_ALWAYS_STDIN
|
||||||
|
|
||||||
|
#if !defined(NDEBUG) && defined(HTML_ALWAYS_STDIN)
|
||||||
|
# define HTML_ALWAYS_STDIN_ENABLED
|
||||||
|
#endif
|
||||||
|
|
||||||
namespace duck {
|
namespace duck {
|
||||||
HtmlPool::HtmlPool (std::string&& agent_name) :
|
HtmlPool::HtmlPool (std::string&& agent_name) :
|
||||||
m_agent(std::move(agent_name))
|
m_agent(std::move(agent_name))
|
||||||
|
@ -33,7 +39,13 @@ namespace duck {
|
||||||
auto HtmlPool::OnResourceLoad (ResourceObjectParameterType parRes) -> ResourceType* {
|
auto HtmlPool::OnResourceLoad (ResourceObjectParameterType parRes) -> ResourceType* {
|
||||||
std::unique_ptr<std::string> html;
|
std::unique_ptr<std::string> html;
|
||||||
|
|
||||||
|
std::cout << "Fetching html from \"" << parRes << "\"\n";
|
||||||
|
|
||||||
|
#if defined(HTML_ALWAYS_STDIN_ENABLED)
|
||||||
|
if (true) {
|
||||||
|
#else
|
||||||
if (parRes == "-") {
|
if (parRes == "-") {
|
||||||
|
#endif
|
||||||
html = std::make_unique<std::string>(read_all(std::cin));
|
html = std::make_unique<std::string>(read_all(std::cin));
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
|
|
@ -29,6 +29,7 @@
|
||||||
#include <map>
|
#include <map>
|
||||||
#include <boost/variant/apply_visitor.hpp>
|
#include <boost/variant/apply_visitor.hpp>
|
||||||
#include <string_view>
|
#include <string_view>
|
||||||
|
#include <list>
|
||||||
|
|
||||||
namespace std {
|
namespace std {
|
||||||
} //namespace std
|
} //namespace std
|
||||||
|
@ -52,13 +53,14 @@ namespace duck { namespace sl {
|
||||||
using MustacheEntryMap = std::map<std::string, MustacheEntry>;
|
using MustacheEntryMap = std::map<std::string, MustacheEntry>;
|
||||||
|
|
||||||
struct EntryNode {
|
struct EntryNode {
|
||||||
EntryNode (const EntryNode&) = default;
|
explicit EntryNode (const std::string_view& parName) :
|
||||||
explicit EntryNode (const std::string& parName) :
|
|
||||||
name(parName)
|
name(parName)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
EntryNode (EntryNode&&) = default;
|
EntryNode (EntryNode&&) = default;
|
||||||
|
EntryNode (const EntryNode&) = default;
|
||||||
EntryNode& operator= (EntryNode&&) = default;
|
EntryNode& operator= (EntryNode&&) = default;
|
||||||
|
EntryNode& operator= (const EntryNode&) = default;
|
||||||
|
|
||||||
std::string_view name;
|
std::string_view name;
|
||||||
std::vector<EntryNode> structs;
|
std::vector<EntryNode> structs;
|
||||||
|
@ -85,6 +87,7 @@ namespace duck { namespace sl {
|
||||||
|
|
||||||
class StructItemExtractor : public boost::static_visitor<> {
|
class StructItemExtractor : public boost::static_visitor<> {
|
||||||
public:
|
public:
|
||||||
|
StructItemExtractor() = delete;
|
||||||
explicit StructItemExtractor (EntryNode& parRoot) :
|
explicit StructItemExtractor (EntryNode& parRoot) :
|
||||||
m_root(parRoot)
|
m_root(parRoot)
|
||||||
{
|
{
|
||||||
|
@ -145,7 +148,8 @@ namespace duck { namespace sl {
|
||||||
EntryNode("")
|
EntryNode("")
|
||||||
));
|
));
|
||||||
|
|
||||||
store_entry_subtree(parVal.xpaths, m_global_entries.back().second);
|
EntryNode& curr_node = m_global_entries.back().second;
|
||||||
|
store_entry_subtree(parVal.xpaths, curr_node);
|
||||||
}
|
}
|
||||||
|
|
||||||
void operator() (const ApplyBlock& parVal) {
|
void operator() (const ApplyBlock& parVal) {
|
||||||
|
@ -232,6 +236,39 @@ namespace duck { namespace sl {
|
||||||
const std::size_t m_expected_size;
|
const std::size_t m_expected_size;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const std::vector<std::string>& query_xpath_by_name (
|
||||||
|
const EntryNodeList& parNodes,
|
||||||
|
const std::string_view& parName,
|
||||||
|
XPathRunner& parRunner
|
||||||
|
) {
|
||||||
|
for (auto& curr_node : parNodes) {
|
||||||
|
assert(curr_node.first);
|
||||||
|
const SourceInfo& source = *curr_node.first;
|
||||||
|
const EntryNode& entry = curr_node.second;
|
||||||
|
assert(entry.name.empty());
|
||||||
|
|
||||||
|
auto it_found = std::find_if(
|
||||||
|
entry.xpaths.begin(),
|
||||||
|
entry.xpaths.end(),
|
||||||
|
[&parName](const auto& xpath_elem) {
|
||||||
|
return xpath_elem->name == parName;
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
if (it_found != entry.xpaths.end()) {
|
||||||
|
const XPathElement* const val = *it_found;
|
||||||
|
assert(val);
|
||||||
|
return parRunner.query(source.value, val->xpath);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static const std::vector<std::string> empty_retval;
|
||||||
|
std::cout << "query_xpath_by_name(parNodes, \"" << parName <<
|
||||||
|
"\", parRunner) -> nothing found" << std::endl;
|
||||||
|
assert(false); //throw?
|
||||||
|
return empty_retval;
|
||||||
|
}
|
||||||
|
|
||||||
std::size_t largest_array_size_in (mstch::map& parMap) {
|
std::size_t largest_array_size_in (mstch::map& parMap) {
|
||||||
typedef ItemCountingVisitor ITC;
|
typedef ItemCountingVisitor ITC;
|
||||||
using boost::apply_visitor;
|
using boost::apply_visitor;
|
||||||
|
@ -318,11 +355,14 @@ namespace duck { namespace sl {
|
||||||
|
|
||||||
assert(entry.first);
|
assert(entry.first);
|
||||||
std::string_view src_url;
|
std::string_view src_url;
|
||||||
if (SourceInfo::URL == entry.first->type) {
|
|
||||||
|
switch (entry.first->type) {
|
||||||
|
case SourceInfo::URL:
|
||||||
src_url = entry.first->value;
|
src_url = entry.first->value;
|
||||||
}
|
break;
|
||||||
else {
|
case SourceInfo::Token:
|
||||||
assert(false); //not implemented
|
default:
|
||||||
|
assert(false); //not reached
|
||||||
}
|
}
|
||||||
|
|
||||||
mstch::map curr_entry_map = to_mustache_dict_recursive(entry.second, src_url, parRunner);
|
mstch::map curr_entry_map = to_mustache_dict_recursive(entry.second, src_url, parRunner);
|
||||||
|
@ -332,6 +372,26 @@ namespace duck { namespace sl {
|
||||||
|
|
||||||
return retval;
|
return retval;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void exec_apply_block (
|
||||||
|
const SourceInfo& parSourceInfo,
|
||||||
|
const EntryNode& parEntryNode,
|
||||||
|
const MustacheEntry& parMustache,
|
||||||
|
XPathRunner& parXPathRunner
|
||||||
|
) {
|
||||||
|
EntryNodeList entry_node {std::make_pair(&parSourceInfo, parEntryNode)};
|
||||||
|
mstch::map entry_ctx = to_mustache_map(entry_node, parXPathRunner);
|
||||||
|
for (auto& ctx : parMustache.context) {
|
||||||
|
entry_ctx[ctx.first] = ctx.second;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout << "context size: " << entry_ctx.size() << '\n';
|
||||||
|
for (auto& ctx_itm : entry_ctx) {
|
||||||
|
std::cout << '\t' << ctx_itm.first << '\n';
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout << mstch::render(parMustache.text, entry_ctx) << std::endl;
|
||||||
|
}
|
||||||
} //unnamed namespace
|
} //unnamed namespace
|
||||||
|
|
||||||
std::vector<std::string> apply (
|
std::vector<std::string> apply (
|
||||||
|
@ -351,21 +411,30 @@ namespace duck { namespace sl {
|
||||||
|
|
||||||
std::cout << "-------------- visiting done ----------------\n";
|
std::cout << "-------------- visiting done ----------------\n";
|
||||||
XPathRunner xpath_runner(html_pool);
|
XPathRunner xpath_runner(html_pool);
|
||||||
mstch::map mustache_ctx = to_mustache_map(global_entries, xpath_runner);
|
|
||||||
|
|
||||||
for (auto& apply_entry : apply_entries) {
|
for (auto& apply_entry : apply_entries) {
|
||||||
EntryNodeList entry_node {std::make_pair(apply_entry.apply_to, apply_entry.content)};
|
|
||||||
mstch::map entry_ctx = to_mustache_map(entry_node, xpath_runner);
|
|
||||||
std::string name(apply_entry.mustache_name);
|
std::string name(apply_entry.mustache_name);
|
||||||
|
const auto& mustache = mustaches.at(name);
|
||||||
|
if (SourceInfo::Token == apply_entry.apply_to->type) {
|
||||||
|
std::vector<std::string> sources =
|
||||||
|
query_xpath_by_name(global_entries, apply_entry.apply_to->value, xpath_runner);
|
||||||
|
|
||||||
std::cout << "context size: " << entry_ctx.size() << '\n';
|
for (auto& source : sources) {
|
||||||
for (auto& ctx_itm : entry_ctx) {
|
SourceInfo new_source;
|
||||||
std::cout << '\t' << ctx_itm.first << '\n';
|
new_source.value = source;
|
||||||
|
new_source.type = SourceInfo::URL;
|
||||||
|
|
||||||
|
EntryNode new_node(apply_entry.content.name);
|
||||||
|
new_node.structs = apply_entry.content.structs;
|
||||||
|
new_node.xpaths = apply_entry.content.xpaths;
|
||||||
|
|
||||||
|
exec_apply_block(new_source, new_node, mustache, xpath_runner);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
assert(apply_entry.apply_to);
|
||||||
|
exec_apply_block(*apply_entry.apply_to, apply_entry.content, mustache, xpath_runner);
|
||||||
}
|
}
|
||||||
std::cout << "Raw mustache for \"" << name << "\":\n" <<
|
|
||||||
mustaches.at(name).text << "\nRendered mustache:\n";
|
|
||||||
|
|
||||||
std::cout << mstch::render(mustaches.at(name).text, entry_ctx) << std::endl;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return retval;
|
return retval;
|
||||||
|
|
|
@ -53,7 +53,7 @@ namespace duck { namespace sl {
|
||||||
std::string_view parSrc,
|
std::string_view parSrc,
|
||||||
std::string_view parQuery
|
std::string_view parQuery
|
||||||
) {
|
) {
|
||||||
std::cout << "XPathRunner::query()\n";
|
std::cout << "XPathRunner::query() - \"" << parQuery << "\"\n";
|
||||||
auto ins_retval = m_cached_results.insert(std::make_pair(XPathKey(parSrc, parQuery), std::vector<std::string>()));
|
auto ins_retval = m_cached_results.insert(std::make_pair(XPathKey(parSrc, parQuery), std::vector<std::string>()));
|
||||||
const bool inserted = ins_retval.second;
|
const bool inserted = ins_retval.second;
|
||||||
assert(ins_retval.first != m_cached_results.end());
|
assert(ins_retval.first != m_cached_results.end());
|
||||||
|
@ -68,7 +68,7 @@ namespace duck { namespace sl {
|
||||||
|
|
||||||
std::cout << "returning " << curr_vec.size() << " items: ";
|
std::cout << "returning " << curr_vec.size() << " items: ";
|
||||||
for (auto& i : curr_vec) {
|
for (auto& i : curr_vec) {
|
||||||
std:: cout << '"' << i << "\", ";
|
std::cout << '"' << i << "\", ";
|
||||||
}
|
}
|
||||||
std::cout << '\n';
|
std::cout << '\n';
|
||||||
return curr_vec;
|
return curr_vec;
|
||||||
|
|
Loading…
Add table
Reference in a new issue