diff --git a/src/htmlretrieve.cpp b/src/htmlretrieve.cpp
index b466fd3..0c9ef66 100644
--- a/src/htmlretrieve.cpp
+++ b/src/htmlretrieve.cpp
@@ -45,7 +45,7 @@ namespace duck {
}
}
- bool isHttps (const std::string& parUrl) {
+ bool isHttps (const std::string_view& parUrl) {
const char protocol[] = "https://";
const size_t protocolLen = sizeof(protocol) / sizeof(protocol[0]) - 1;
if (parUrl.size() < protocolLen)
@@ -103,7 +103,7 @@ namespace duck {
}
- std::string fetch_html (const std::string& parSource, std::string parUserAgent, bool parSslVerifyPeer, bool parSslVerifyHost) {
+ std::string fetch_html (const std::string_view& parSource, std::string parUserAgent, bool parSslVerifyPeer, bool parSslVerifyHost) {
using curl::curl_easy;
using curl::curl_pair;
using curl::curl_ios;
@@ -111,7 +111,7 @@ namespace duck {
std::ostringstream oss;
curl_ios wr(oss);
curl_easy easy(wr);
- easy.add(curl_pair(CURLOPT_URL, parSource));
+ easy.add(curl_pair(CURLOPT_URL, std::string(parSource)));
if (isHttps(parSource)) {
easy.add(curl_pair(CURLOPT_SSL_VERIFYPEER, parSslVerifyPeer));
easy.add(curl_pair(CURLOPT_SSL_VERIFYHOST, parSslVerifyHost));
diff --git a/src/htmlretrieve.hpp b/src/htmlretrieve.hpp
index ea1123d..578585a 100644
--- a/src/htmlretrieve.hpp
+++ b/src/htmlretrieve.hpp
@@ -20,9 +20,10 @@
#define idC6776D903059465191FFB64FCFD6B86A
#include
+#include
namespace duck {
- std::string fetch_html ( const std::string& parSource, std::string parUserAgent, bool parSslVerifyPeer, bool parSslVerifyHost );
+ std::string fetch_html ( const std::string_view& parSource, std::string parUserAgent, bool parSslVerifyPeer, bool parSslVerifyHost );
std::string clean_html ( std::string&& html );
} //namespace duck
diff --git a/src/scraplang/apply.cpp b/src/scraplang/apply.cpp
index 58fdda6..edcb188 100644
--- a/src/scraplang/apply.cpp
+++ b/src/scraplang/apply.cpp
@@ -72,6 +72,8 @@ namespace duck { namespace sl {
content(""),
mustache_name(parMstchName)
{
+ assert(apply_to);
+ assert(not apply_to->value.empty());
}
ApplyEntry (ApplyEntry&&) = default;
ApplyEntry& operator=(ApplyEntry&&) = default;
@@ -150,6 +152,7 @@ namespace duck { namespace sl {
#if defined(APPLY_VERBOSE)
std::cout << parVal << '\n';
#endif
+ assert(not parVal.source.value.empty());
m_apply_entries.emplace_back(&parVal.source, parVal.mustache_model);
store_entry_subtree(parVal.xpaths, m_apply_entries.back().content);
}
@@ -353,9 +356,15 @@ namespace duck { namespace sl {
for (auto& apply_entry : apply_entries) {
EntryNodeList entry_node {std::make_pair(apply_entry.apply_to, apply_entry.content)};
mstch::map entry_ctx = to_mustache_map(entry_node, xpath_runner);
- //std::cout << "Raw mustache for \"" << must.first << "\":\n" <<
- // must.second.text << "\nRendered mustache:\n";
std::string name(apply_entry.mustache_name);
+
+ std::cout << "context size: " << entry_ctx.size() << '\n';
+ for (auto& ctx_itm : entry_ctx) {
+ std::cout << '\t' << ctx_itm.first << '\n';
+ }
+ std::cout << "Raw mustache for \"" << name << "\":\n" <<
+ mustaches.at(name).text << "\nRendered mustache:\n";
+
std::cout << mstch::render(mustaches.at(name).text, entry_ctx) << std::endl;
}
diff --git a/src/scraplang/html_pool_base.hpp b/src/scraplang/html_pool_base.hpp
index ec659be..8b22459 100644
--- a/src/scraplang/html_pool_base.hpp
+++ b/src/scraplang/html_pool_base.hpp
@@ -21,10 +21,11 @@
#include "implem/ResourcePool.hpp"
#include "kakoune/safe_ptr.hh"
+#include
namespace duck { namespace sl {
namespace implem {
- typedef duckutil::ResourcePool HtmlPoolBase;
+ typedef duckutil::ResourcePool HtmlPoolBase;
} //namespace implem
class HtmlPoolBase : public implem::HtmlPoolBase, public Kakoune::SafeCountable {
diff --git a/src/scraplang/xpath_runner.cpp b/src/scraplang/xpath_runner.cpp
index 058f9bd..432df09 100644
--- a/src/scraplang/xpath_runner.cpp
+++ b/src/scraplang/xpath_runner.cpp
@@ -17,13 +17,15 @@
*/
#include "xpath_runner.hpp"
+#include "xpath.hpp"
#include
+#include
namespace duck { namespace sl {
struct XPathRunner::XPathKey {
- XPathKey (const std::string& parSrc, const std::string& parQuery) :
- source_address(parSrc),
- xpath_query(parQuery)
+ XPathKey (const std::string_view& parSrc, const std::string_view& parQuery) :
+ source_address(std::string(parSrc)),
+ xpath_query(std::string(parQuery))
{
assert(not source_address.empty());
}
@@ -51,7 +53,24 @@ namespace duck { namespace sl {
std::string_view parSrc,
std::string_view parQuery
) {
- static std::vector deleme {"hello", "world"};
- return deleme;
+ std::cout << "XPathRunner::query()\n";
+ auto ins_retval = m_cached_results.insert(std::make_pair(XPathKey(parSrc, parQuery), std::vector()));
+ const bool inserted = ins_retval.second;
+ assert(ins_retval.first != m_cached_results.end());
+ std::vector& curr_vec = ins_retval.first->second;
+
+ if (inserted) {
+ const auto id = m_pool->AddResource(parSrc);
+ const std::string* html = m_pool->GetByID(id);
+
+ curr_vec = xpath_query(*html, std::string(parQuery));
+ }
+
+ std::cout << "returning " << curr_vec.size() << " items: ";
+ for (auto& i : curr_vec) {
+ std:: cout << '"' << i << "\", ";
+ }
+ std::cout << '\n';
+ return curr_vec;
}
}} //namespace duck::sl
diff --git a/src/xpath.cpp b/src/xpath.cpp
index 6c34279..0ef3db0 100644
--- a/src/xpath.cpp
+++ b/src/xpath.cpp
@@ -76,12 +76,18 @@ namespace duck {
return std::move(retval);
}
- std::string xpath_query (const std::string& parXML, const std::string& parQuery) {
- auto retval = xpath_query(parXML, std::vector{parQuery});
- if (retval.empty() or retval.front().empty())
- return std::string();
- else
- return retval.front().front().second;
+ std::vector xpath_query (const std::string& parXML, const std::string& parQuery) {
+ auto query_res = xpath_query(parXML, std::vector{parQuery});
+ if (query_res.empty() or query_res.front().empty()) {
+ return std::vector();
+ }
+ else {
+ std::vector retval;
+ const std::vector>& src = query_res.front();
+ retval.reserve(src.size());
+ std::transform(src.begin(), src.end(), std::back_inserter(retval), [](const auto& pair) { return pair.second; });
+ return retval;
+ }
}
ParseError::ParseError (int parLine, int parColumn, std::string parMessage) {
diff --git a/src/xpath.hpp b/src/xpath.hpp
index d95d1f2..12ce69c 100644
--- a/src/xpath.hpp
+++ b/src/xpath.hpp
@@ -36,7 +36,7 @@ namespace duck {
};
XPathBatchResults xpath_query ( const std::string& parXML, const std::vector& parQueries );
- std::string xpath_query ( const std::string& parXML, const std::string& parQuery );
+ std::vector xpath_query ( const std::string& parXML, const std::string& parQuery );
} //namespace duck
#endif