Trying to get scraplang implemented
Lots of changes I made on the train and had little time to make tidily. Use c++17 (for std::optional) Clean up the cmake script a bit Get rid of unused stuff Skeleton implementation of some classes for scraplang
This commit is contained in:
parent
c31d317d51
commit
f0e7a1d136
32 changed files with 1765 additions and 288 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -1 +1,2 @@
|
|||
build/
|
||||
tags
|
||||
|
|
3
.gitmodules
vendored
3
.gitmodules
vendored
|
@ -4,3 +4,6 @@
|
|||
[submodule "lib/tidy"]
|
||||
path = lib/tidy
|
||||
url = https://github.com/htacg/tidy-html5.git
|
||||
[submodule "lib/mstch"]
|
||||
path = lib/mstch
|
||||
url = https://github.com/KingDuckZ/mstch.git
|
||||
|
|
|
@ -8,8 +8,11 @@ include(GetGitRevisionDescription)
|
|||
find_package(PugiXML REQUIRED)
|
||||
find_package(Boost 1.32.0 COMPONENTS program_options)
|
||||
|
||||
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -std=c++11 -Wall -Wextra -g -O0 -fno-omit-frame-pointer")
|
||||
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -std=c++11 -Wall -Wextra -g -O3 -fomit-frame-pointer")
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
|
||||
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Wall -Wextra -g -O0 -fno-omit-frame-pointer")
|
||||
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -Wall -Wextra -g -O3 -fomit-frame-pointer")
|
||||
|
||||
set(DEFAULT_USER_AGENT "DuckScraper")
|
||||
set(PROJECT_VERSION_BETA "1")
|
||||
|
@ -20,25 +23,30 @@ configure_file(
|
|||
"${PROJECT_BINARY_DIR}/${PROJECT_NAME}Config.h"
|
||||
)
|
||||
|
||||
include_directories(SYSTEM
|
||||
lib/tidy/include
|
||||
${PUGIXML_INCLUDE_DIR}
|
||||
lib/curlcpp/include
|
||||
${Boost_INCLUDE_DIRS}
|
||||
)
|
||||
include_directories(
|
||||
src/
|
||||
"${PROJECT_BINARY_DIR}"
|
||||
)
|
||||
|
||||
add_executable(${PROJECT_NAME}
|
||||
src/main.cpp
|
||||
src/html_pool.cpp
|
||||
src/htmlretrieve.cpp
|
||||
src/commandline.cpp
|
||||
src/scraplang/scraplang.cpp
|
||||
src/scraplang/parse.cpp
|
||||
src/scraplang/apply.cpp
|
||||
src/scraplang/xpath_manager.cpp
|
||||
src/scraplang/variables.cpp
|
||||
src/xpath.cpp
|
||||
)
|
||||
|
||||
target_include_directories(${PROJECT_NAME} SYSTEM
|
||||
PRIVATE lib/tidy/include
|
||||
PRIVATE ${PUGIXML_INCLUDE_DIR}
|
||||
PRIVATE lib/curlcpp/include
|
||||
PRIVATE ${Boost_INCLUDE_DIRS}
|
||||
PRIVATE lib/mstch/include
|
||||
)
|
||||
target_include_directories(${PROJECT_NAME}
|
||||
PRIVATE src/
|
||||
PRIVATE "${PROJECT_BINARY_DIR}"
|
||||
)
|
||||
|
||||
if (BUILD_SHARED_TIDY)
|
||||
set(TIDY_LIB "tidy-share")
|
||||
else(BUILD_SHARED_TIDY)
|
||||
|
@ -46,10 +54,15 @@ else(BUILD_SHARED_TIDY)
|
|||
endif(BUILD_SHARED_TIDY)
|
||||
|
||||
target_link_libraries(${PROJECT_NAME}
|
||||
${TIDY_LIB}
|
||||
${PUGIXML_LIBRARIES}
|
||||
curlcpp
|
||||
${Boost_LIBRARIES}
|
||||
PRIVATE ${TIDY_LIB}
|
||||
PRIVATE ${PUGIXML_LIBRARIES}
|
||||
PRIVATE curlcpp
|
||||
PRIVATE ${Boost_LIBRARIES}
|
||||
PRIVATE mstch
|
||||
)
|
||||
|
||||
target_compile_definitions(${PROJECT_NAME}
|
||||
PRIVATE $<$<CONFIG:DEBUG>:KAK_DEBUG>
|
||||
)
|
||||
|
||||
#unset those variables so cmake files from dependencies won't complain about
|
||||
|
@ -62,3 +75,4 @@ unset(PROJECT_VERSION)
|
|||
set(BUILD_SHARED_LIB ${BUILD_SHARED_TIDY}) #for tidy
|
||||
add_subdirectory(lib/tidy)
|
||||
add_subdirectory(lib/curlcpp)
|
||||
add_subdirectory(lib/mstch)
|
||||
|
|
1
lib/mstch
Submodule
1
lib/mstch
Submodule
|
@ -0,0 +1 @@
|
|||
Subproject commit 45122d1d515c90a54d509d4b2d8d9279348518f5
|
17
sample.scrap
Normal file
17
sample.scrap
Normal file
|
@ -0,0 +1,17 @@
|
|||
from http://sid-story.wikia.com/wiki/Album
|
||||
pages = //blah/blah/text()
|
||||
end
|
||||
|
||||
apply {{sidian_info_model}} to {{pages}}
|
||||
struct Sidians
|
||||
sidian_name default("n/a") = //table[@class="wikitable sortable"]/tr/td[4]/a/text()
|
||||
activ_probability default("0") = //table[@class="wikitable sortable"]/tr/td[3]/text()
|
||||
end
|
||||
something_else = /html/head/text()
|
||||
end
|
||||
|
||||
==sidian_info_model
|
||||
{{#Sidians}}
|
||||
{{sidian_name}} {{activ_probability}}
|
||||
{{/Sidians}}
|
||||
==end
|
|
@ -52,6 +52,7 @@ namespace duck {
|
|||
po::options_description query_options("Query options");
|
||||
query_options.add_options()
|
||||
("agent", po::value<std::string>()->default_value(DEFAULT_USER_AGENT), "User agent that will be passed to the server")
|
||||
("model,m", po::value<std::string>(), "Read XPath expressions from the specified file instead of command line")
|
||||
;
|
||||
po::options_description positional_options("Positional options");
|
||||
positional_options.add_options()
|
||||
|
@ -86,6 +87,7 @@ namespace duck {
|
|||
std::cout << "redistribute it under certain conditions.\n"; //type `show c' for details.
|
||||
std::cout << '\n';
|
||||
std::cout << "Usage: " << PROGRAM_NAME << " [options...] <url> <xpath>\n";
|
||||
std::cout << " " << PROGRAM_NAME << " [options...] --model <path> <url>\n";
|
||||
std::cout << "You can pass - as the url to read from stdin\n";
|
||||
std::cout << visible;
|
||||
return true;
|
||||
|
@ -96,11 +98,14 @@ namespace duck {
|
|||
return true;
|
||||
}
|
||||
|
||||
if (parVarMap.count("input-url") == 0) {
|
||||
if (parVarMap.count("input-url") == 0 and parVarMap.count("model") == 0) {
|
||||
throw std::invalid_argument("No input URL specified");
|
||||
}
|
||||
if (parVarMap.count("xpath") == 0) {
|
||||
throw std::invalid_argument("No XPath expression specified");
|
||||
if (not (parVarMap.count("xpath") or parVarMap.count("model"))) {
|
||||
throw std::invalid_argument("No XPath expression specified and no input model given");
|
||||
}
|
||||
else if (parVarMap.count("xpath") and parVarMap.count("model")) {
|
||||
throw std::invalid_argument("Received both model and XPath expression, but only one of the two is allowed");
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -16,34 +16,31 @@
|
|||
* along with DuckScraper. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef idBE96C2D49C4C413888A79EAEB2B9C0FA
|
||||
#define idBE96C2D49C4C413888A79EAEB2B9C0FA
|
||||
|
||||
#include <vector>
|
||||
#include "html_pool.hpp"
|
||||
#include "htmlretrieve.hpp"
|
||||
#include <string>
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
|
||||
namespace duck {
|
||||
struct ScrapNode;
|
||||
struct element_def;
|
||||
HtmlPool::HtmlPool (std::string&& agent_name) :
|
||||
m_agent(std::move(agent_name))
|
||||
{
|
||||
}
|
||||
|
||||
class ScrapNodePtr {
|
||||
public:
|
||||
explicit ScrapNodePtr ( ScrapNode* parPtr );
|
||||
ScrapNodePtr ( ScrapNodePtr&& parOther );
|
||||
~ScrapNodePtr ( void ) noexcept;
|
||||
auto HtmlPool::OnResourceLoad (ResourceObjectParameterType parRes) -> ResourceType* {
|
||||
auto html = std::make_unique<std::string>(
|
||||
fetch_html(parRes, m_agent, false, false)
|
||||
);
|
||||
*html = duck::clean_html(std::move(*html));
|
||||
return html.release();
|
||||
}
|
||||
|
||||
ScrapNode& operator* ( void ) { return *m_ptr; }
|
||||
const ScrapNode& operator* ( void ) const { return *m_ptr; }
|
||||
ScrapNode& operator-> ( void ) { return *m_ptr; }
|
||||
const ScrapNode& operator-> ( void ) const { return *m_ptr; }
|
||||
void HtmlPool::OnResourceDestroy (ResourceNameParamType, ResourceType* parRes) noexcept {
|
||||
delete parRes;
|
||||
}
|
||||
|
||||
private:
|
||||
std::unique_ptr<ScrapNode> m_ptr;
|
||||
};
|
||||
|
||||
ScrapNodePtr parse_scraplang ( const std::string& parData );
|
||||
std::vector<element_def> get_xpath_definitions ( const ScrapNode& parAST );
|
||||
auto HtmlPool::GetResourceNameFromResourceObject (ResourceObjectParameterType parRes) -> ResourceNameType {
|
||||
return parRes;
|
||||
}
|
||||
} //namespace duck
|
||||
|
||||
#endif
|
42
src/html_pool.hpp
Normal file
42
src/html_pool.hpp
Normal file
|
@ -0,0 +1,42 @@
|
|||
/* Copyright (C) 2015 Michele Santullo
|
||||
*
|
||||
* This file is part of DuckScraper.
|
||||
*
|
||||
* DuckScraper is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* DuckScraper is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with DuckScraper. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef idCDCACC393BE24CBD94A3B5E2985984A3
|
||||
#define idCDCACC393BE24CBD94A3B5E2985984A3
|
||||
|
||||
#include "scraplang/html_pool_base.hpp"
|
||||
|
||||
namespace duck {
|
||||
class HtmlPool : public ::duck::sl::HtmlPoolBase {
|
||||
typedef ::duck::sl::HtmlPoolBase::ResourceType ResourceType;
|
||||
typedef ::duck::sl::HtmlPoolBase::ResourceNameType ResourceNameType;
|
||||
typedef ::duck::sl::HtmlPoolBase::ResourceObjectParameterType ResourceObjectParameterType;
|
||||
typedef ::duck::sl::HtmlPoolBase::ResourceNameParamType ResourceNameParamType;
|
||||
|
||||
virtual ResourceType* OnResourceLoad (ResourceObjectParameterType parRes);
|
||||
virtual void OnResourceDestroy (ResourceNameParamType parName, ResourceType* parRes) noexcept;
|
||||
virtual ResourceNameType GetResourceNameFromResourceObject (ResourceObjectParameterType parRes);
|
||||
|
||||
std::string m_agent;
|
||||
|
||||
public:
|
||||
explicit HtmlPool (std::string&& agent_name);
|
||||
};
|
||||
} //namespace duck
|
||||
|
||||
#endif
|
115
src/kakoune/ref_ptr.hh
Normal file
115
src/kakoune/ref_ptr.hh
Normal file
|
@ -0,0 +1,115 @@
|
|||
#ifndef ref_ptr_hh_INCLUDED
|
||||
#define ref_ptr_hh_INCLUDED
|
||||
|
||||
#include <utility>
|
||||
|
||||
namespace Kakoune
|
||||
{
|
||||
|
||||
struct RefCountable
|
||||
{
|
||||
int refcount = 0;
|
||||
virtual ~RefCountable() = default;
|
||||
};
|
||||
|
||||
struct RefCountablePolicy
|
||||
{
|
||||
static void inc_ref(RefCountable* r, void*) noexcept { ++r->refcount; }
|
||||
static void dec_ref(RefCountable* r, void*) { if (--r->refcount == 0) delete r; }
|
||||
static void ptr_moved(RefCountable*, void*, void*) noexcept {}
|
||||
};
|
||||
|
||||
template<typename T, typename Policy = RefCountablePolicy>
|
||||
struct RefPtr
|
||||
{
|
||||
RefPtr() = default;
|
||||
explicit RefPtr(T* ptr) : m_ptr(ptr) { acquire(); }
|
||||
~RefPtr() { release(); }
|
||||
RefPtr(const RefPtr& other) : m_ptr(other.m_ptr) { acquire(); }
|
||||
RefPtr(RefPtr&& other)
|
||||
noexcept(noexcept(std::declval<RefPtr>().moved(nullptr)))
|
||||
: m_ptr(other.m_ptr) { other.m_ptr = nullptr; moved(&other); }
|
||||
|
||||
RefPtr& operator=(const RefPtr& other)
|
||||
{
|
||||
if (other.m_ptr != m_ptr)
|
||||
{
|
||||
release();
|
||||
m_ptr = other.m_ptr;
|
||||
acquire();
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
RefPtr& operator=(RefPtr&& other)
|
||||
{
|
||||
release();
|
||||
m_ptr = other.m_ptr;
|
||||
other.m_ptr = nullptr;
|
||||
moved(&other);
|
||||
return *this;
|
||||
}
|
||||
|
||||
RefPtr& operator=(T* ptr)
|
||||
{
|
||||
if (ptr != m_ptr)
|
||||
{
|
||||
release();
|
||||
m_ptr = ptr;
|
||||
acquire();
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
[[gnu::always_inline]]
|
||||
T* operator->() const { return m_ptr; }
|
||||
[[gnu::always_inline]]
|
||||
T& operator*() const { return *m_ptr; }
|
||||
|
||||
[[gnu::always_inline]]
|
||||
T* get() const { return m_ptr; }
|
||||
|
||||
[[gnu::always_inline]]
|
||||
explicit operator bool() const { return m_ptr; }
|
||||
|
||||
void reset(T* ptr = nullptr)
|
||||
{
|
||||
if (ptr == m_ptr)
|
||||
return;
|
||||
release();
|
||||
m_ptr = ptr;
|
||||
acquire();
|
||||
}
|
||||
|
||||
friend bool operator==(const RefPtr& lhs, const RefPtr& rhs) { return lhs.m_ptr == rhs.m_ptr; }
|
||||
friend bool operator!=(const RefPtr& lhs, const RefPtr& rhs) { return lhs.m_ptr != rhs.m_ptr; }
|
||||
|
||||
private:
|
||||
T* m_ptr = nullptr;
|
||||
|
||||
[[gnu::always_inline]]
|
||||
void acquire()
|
||||
{
|
||||
if (m_ptr)
|
||||
Policy::inc_ref(m_ptr, this);
|
||||
}
|
||||
|
||||
[[gnu::always_inline]]
|
||||
void release()
|
||||
{
|
||||
if (m_ptr)
|
||||
Policy::dec_ref(m_ptr, this);
|
||||
}
|
||||
|
||||
[[gnu::always_inline]]
|
||||
void moved(void* from)
|
||||
noexcept(noexcept(Policy::ptr_moved(nullptr, nullptr, nullptr)))
|
||||
{
|
||||
if (m_ptr)
|
||||
Policy::ptr_moved(m_ptr, from, this);
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif // ref_ptr_hh_INCLUDED
|
109
src/kakoune/safe_ptr.hh
Normal file
109
src/kakoune/safe_ptr.hh
Normal file
|
@ -0,0 +1,109 @@
|
|||
#ifndef safe_ptr_hh_INCLUDED
|
||||
#define safe_ptr_hh_INCLUDED
|
||||
|
||||
// #define SAFE_PTR_TRACK_CALLSTACKS
|
||||
|
||||
//King_DuckZ:
|
||||
#include <cassert>
|
||||
#define kak_assert(a) assert(a)
|
||||
|
||||
//#include "assert.hh"
|
||||
#include "ref_ptr.hh"
|
||||
|
||||
#include <type_traits>
|
||||
#include <utility>
|
||||
|
||||
#ifdef SAFE_PTR_TRACK_CALLSTACKS
|
||||
#include "backtrace.hh"
|
||||
#include "vector.hh"
|
||||
#include <algorithm>
|
||||
#endif
|
||||
|
||||
namespace Kakoune
|
||||
{
|
||||
|
||||
// *** SafePtr: objects that assert nobody references them when they die ***
|
||||
|
||||
class SafeCountable
|
||||
{
|
||||
public:
|
||||
#ifdef KAK_DEBUG
|
||||
SafeCountable() : m_count(0) {}
|
||||
SafeCountable (SafeCountable&&) : m_count(0) {}
|
||||
~SafeCountable()
|
||||
{
|
||||
kak_assert(m_count == 0);
|
||||
#ifdef SAFE_PTR_TRACK_CALLSTACKS
|
||||
kak_assert(m_callstacks.empty());
|
||||
#endif
|
||||
}
|
||||
|
||||
private:
|
||||
friend struct SafeCountablePolicy;
|
||||
#ifdef SAFE_PTR_TRACK_CALLSTACKS
|
||||
struct Callstack
|
||||
{
|
||||
Callstack(void* p) : ptr(p) {}
|
||||
void* ptr;
|
||||
Backtrace bt;
|
||||
};
|
||||
|
||||
mutable Vector<Callstack> m_callstacks;
|
||||
#endif
|
||||
mutable int m_count;
|
||||
#endif
|
||||
};
|
||||
|
||||
struct SafeCountablePolicy
|
||||
{
|
||||
#ifdef KAK_DEBUG
|
||||
static void inc_ref(const SafeCountable* sc, void* ptr) noexcept
|
||||
{
|
||||
++sc->m_count;
|
||||
#ifdef SAFE_PTR_TRACK_CALLSTACKS
|
||||
sc->m_callstacks.emplace_back(ptr);
|
||||
#else
|
||||
static_cast<void>(ptr);
|
||||
#endif
|
||||
}
|
||||
|
||||
static void dec_ref(const SafeCountable* sc, void* ptr) noexcept
|
||||
{
|
||||
--sc->m_count;
|
||||
kak_assert(sc->m_count >= 0);
|
||||
#ifdef SAFE_PTR_TRACK_CALLSTACKS
|
||||
auto it = std::find_if(sc->m_callstacks.begin(), sc->m_callstacks.end(),
|
||||
[=](const SafeCountable::Callstack& cs) { return cs.ptr == ptr; });
|
||||
kak_assert(it != sc->m_callstacks.end());
|
||||
sc->m_callstacks.erase(it);
|
||||
#else
|
||||
static_cast<void>(ptr);
|
||||
#endif
|
||||
}
|
||||
|
||||
static void ptr_moved(const SafeCountable* sc, void* from, void* to) noexcept
|
||||
{
|
||||
#ifdef SAFE_PTR_TRACK_CALLSTACKS
|
||||
auto it = std::find_if(sc->m_callstacks.begin(), sc->m_callstacks.end(),
|
||||
[=](const SafeCountable::Callstack& cs) { return cs.ptr == from; });
|
||||
kak_assert(it != sc->m_callstacks.end());
|
||||
it->ptr = to;
|
||||
#else
|
||||
static_cast<void>(sc);
|
||||
static_cast<void>(from);
|
||||
static_cast<void>(to);
|
||||
#endif
|
||||
}
|
||||
#else
|
||||
static void inc_ref(const SafeCountable*, void*) noexcept {}
|
||||
static void dec_ref(const SafeCountable*, void*) noexcept {}
|
||||
static void ptr_moved(const SafeCountable*, void*, void*) noexcept {}
|
||||
#endif
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
using SafePtr = RefPtr<T, SafeCountablePolicy>;
|
||||
|
||||
}
|
||||
|
||||
#endif // safe_ptr_hh_INCLUDED
|
128
src/main.cpp
128
src/main.cpp
|
@ -27,9 +27,15 @@
|
|||
#include <memory>
|
||||
#include <iterator>
|
||||
#include <stdexcept>
|
||||
#include "scraplang.hpp"
|
||||
#include "html_pool.hpp"
|
||||
|
||||
namespace {
|
||||
void dump_string ( const std::string& parPathDest, const std::string& parData );
|
||||
std::string read_all ( std::istream& parStream );
|
||||
std::string read_all ( std::istream&& parStream );
|
||||
void load_from_commandline ( const boost::program_options::variables_map& parVarMap );
|
||||
void load_from_model ( const boost::program_options::variables_map& parVarMap );
|
||||
} //unnamed namespace
|
||||
|
||||
int main (int argc, char* argv[]) {
|
||||
|
@ -46,54 +52,20 @@ int main (int argc, char* argv[]) {
|
|||
return 2;
|
||||
}
|
||||
|
||||
const auto url = vm["input-url"].as<std::string>();
|
||||
const auto xpath = vm["xpath"].as<std::string>();
|
||||
#if !defined(NDEBUG)
|
||||
std::cout << "URL : " << url << "\n";
|
||||
std::cout << "XPath: " << xpath << std::endl;
|
||||
std::cout << "Agent: " << vm["agent"].as<std::string>() << std::endl;
|
||||
#endif
|
||||
|
||||
std::string html;
|
||||
|
||||
if ("-" != url) {
|
||||
html = duck::fetch_html(url, vm["agent"].as<std::string>(), false, false);
|
||||
}
|
||||
else {
|
||||
std::cin >> std::noskipws;
|
||||
std::istream_iterator<char> it(std::cin);
|
||||
std::istream_iterator<char> end;
|
||||
html = std::string(it, end);
|
||||
}
|
||||
|
||||
if (vm.count("dump-raw")) {
|
||||
dump_string(vm["dump-raw"].as<std::string>(), html);
|
||||
}
|
||||
|
||||
html = duck::clean_html(std::move(html));
|
||||
if (vm.count("dump")) {
|
||||
dump_string(vm["dump"].as<std::string>(), html);
|
||||
}
|
||||
|
||||
try {
|
||||
std::vector<std::string> queries;
|
||||
queries.reserve(1);
|
||||
queries.push_back(std::move(xpath));
|
||||
auto results = duck::xpath_query(html, queries);
|
||||
for (const auto& lst : results[0]) {
|
||||
std::cout << lst.first << ": " << lst.second << '\n';
|
||||
}
|
||||
if (vm.count("model"))
|
||||
load_from_model(vm);
|
||||
else
|
||||
load_from_commandline(vm);
|
||||
}
|
||||
catch (const duck::ParseError& err) {
|
||||
std::cerr << err.what() << std::endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
void dump_string (const std::string& parPathDest, const std::string& parData) {
|
||||
std::unique_ptr<std::ofstream> ofs;
|
||||
const bool use_stdout = ("-" == parPathDest);
|
||||
|
@ -103,4 +75,84 @@ namespace {
|
|||
std::ostream* const os = (use_stdout ? &std::cout : ofs.get());
|
||||
*os << parData;
|
||||
}
|
||||
|
||||
std::string read_all (std::istream& parStream) {
|
||||
parStream >> std::noskipws;
|
||||
std::istream_iterator<char> it(parStream);
|
||||
std::istream_iterator<char> end;
|
||||
return std::string(it, end);
|
||||
}
|
||||
|
||||
std::string read_all (std::istream&& parStream) {
|
||||
return read_all(parStream);
|
||||
}
|
||||
|
||||
void load_from_commandline (const boost::program_options::variables_map& parVarMap) {
|
||||
const auto& vm = parVarMap;
|
||||
const auto url = vm["input-url"].as<std::string>();
|
||||
|
||||
std::string html;
|
||||
if ("-" != url) {
|
||||
html = duck::fetch_html(url, vm["agent"].as<std::string>(), false, false);
|
||||
}
|
||||
else {
|
||||
html = read_all(std::cin);
|
||||
}
|
||||
|
||||
if (vm.count("dump-raw")) {
|
||||
dump_string(vm["dump-raw"].as<std::string>(), html);
|
||||
}
|
||||
|
||||
html = duck::clean_html(std::move(html));
|
||||
if (vm.count("dump")) {
|
||||
dump_string(vm["dump"].as<std::string>(), html);
|
||||
}
|
||||
|
||||
const std::string xpath = parVarMap["xpath"].as<std::string>();
|
||||
|
||||
#if !defined(NDEBUG)
|
||||
std::cout << " -- XPath direct mode --\n";
|
||||
std::cout << "URL : " << parVarMap["input-url"].as<std::string>() << "\n";
|
||||
std::cout << "XPath: " << xpath << std::endl;
|
||||
std::cout << "Agent: " << parVarMap["agent"].as<std::string>() << std::endl;
|
||||
#endif
|
||||
|
||||
std::vector<std::string> queries;
|
||||
queries.reserve(1);
|
||||
queries.push_back(std::move(xpath));
|
||||
auto results = duck::xpath_query(html, queries);
|
||||
for (const auto& lst : results[0]) {
|
||||
std::cout << lst.first << ": " << lst.second << '\n';
|
||||
}
|
||||
}
|
||||
|
||||
void load_from_model (const boost::program_options::variables_map& parVarMap) {
|
||||
#if !defined(NDEBUG)
|
||||
std::cout << " -- XPath model mode --\n";
|
||||
if (parVarMap.count("input-url"))
|
||||
std::cout << "URL : " << parVarMap["input-url"].as<std::string>() << "\n";
|
||||
std::cout << "Model: " << parVarMap["model"].as<std::string>() << std::endl;
|
||||
std::cout << "Agent: " << parVarMap["agent"].as<std::string>() << std::endl;
|
||||
#endif
|
||||
const std::string script = read_all(std::ifstream(parVarMap["model"].as<std::string>()));
|
||||
auto ast = duck::sl::parse(script);
|
||||
|
||||
duck::HtmlPool html_pool(std::string(parVarMap["agent"].as<std::string>()));
|
||||
duck::sl::apply(ast, duck::sl::HtmlPoolBaseSP(&html_pool));
|
||||
//auto list = duck::get_xpath_definitions(*ast);
|
||||
|
||||
//std::vector<std::string> expressions;
|
||||
//expressions.reserve(list.size());
|
||||
//for (duck::element_def& elem : list) {
|
||||
// expressions.push_back(std::move(elem.xpath));
|
||||
//}
|
||||
//auto results = duck::xpath_query(parXML, expressions);
|
||||
//duck::print_results(std::cout, *ast, list, results);
|
||||
//for (const auto& list : results) {
|
||||
// std::cout << "------\n";
|
||||
// for (const auto& result : list) {
|
||||
// std::cout << result.first << ": " << result.second << '\n';
|
||||
// }
|
||||
//}
|
||||
}
|
||||
} //unnamed namespace
|
||||
|
|
25
src/scraplang.hpp
Normal file
25
src/scraplang.hpp
Normal file
|
@ -0,0 +1,25 @@
|
|||
/* Copyright (C) 2017 Michele Santullo
|
||||
*
|
||||
* This file is part of DuckScraper.
|
||||
*
|
||||
* DuckScraper is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* DuckScraper is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with DuckScraper. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef id8483FDE5CA0E4F40BDE0F4469AC2DF79
|
||||
#define id8483FDE5CA0E4F40BDE0F4469AC2DF79
|
||||
|
||||
#include "scraplang/parse.hpp"
|
||||
#include "scraplang/apply.hpp"
|
||||
|
||||
#endif
|
188
src/scraplang/apply.cpp
Normal file
188
src/scraplang/apply.cpp
Normal file
|
@ -0,0 +1,188 @@
|
|||
/* Copyright (C) 2015 Michele Santullo
|
||||
*
|
||||
* This file is part of DuckScraper.
|
||||
*
|
||||
* DuckScraper is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* DuckScraper is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with DuckScraper. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
//#define APPLY_VERBOSE
|
||||
|
||||
#include "apply.hpp"
|
||||
#include "mstch/mstch.hpp"
|
||||
#include "variables.hpp"
|
||||
#include "html_pool_base.hpp"
|
||||
#include "xpath_manager.hpp"
|
||||
#include <boost/variant/apply_visitor.hpp>
|
||||
#include <map>
|
||||
#if defined(APPLY_VERBOSE)
|
||||
# include <iostream>
|
||||
#endif
|
||||
|
||||
namespace duck { namespace sl {
|
||||
#if defined(APPLY_VERBOSE)
|
||||
std::ostream& operator<< (std::ostream& stream, XPathElement xpath) {
|
||||
stream << "xpath \"" << xpath.name << "\": \"" << xpath.xpath << "\" ";
|
||||
if (xpath.def_val)
|
||||
stream << "default: \"" << *xpath.def_val << '"';
|
||||
else
|
||||
stream << "no default";
|
||||
return stream;
|
||||
}
|
||||
#endif
|
||||
|
||||
namespace {
|
||||
typedef std::map<std::string, std::string> MustacheMap;
|
||||
typedef Kakoune::SafePtr<Variables> VariablesSP;
|
||||
typedef Kakoune::SafePtr<const Variables> VariablesCSP;
|
||||
typedef Kakoune::SafePtr<XPathManager> XPathManagerSP;
|
||||
typedef Kakoune::SafePtr<const XPathManager> XPathManagerCSP;
|
||||
|
||||
struct ApplyBlockContext {
|
||||
mstch::map context;
|
||||
SourceInfo target;
|
||||
std::string name;
|
||||
};
|
||||
|
||||
class StructVisitor : public boost::static_visitor<> {
|
||||
public:
|
||||
StructVisitor (VariablesCSP vars, XPathManagerCSP xpath_man) :
|
||||
m_variables(vars),
|
||||
m_xpath_man(xpath_man)
|
||||
{
|
||||
assert(m_variables);
|
||||
assert(m_xpath_man);
|
||||
}
|
||||
|
||||
void operator() (const XPathElement& v) {
|
||||
#if defined(APPLY_VERBOSE)
|
||||
std::cout << '\t' << v << '\n';
|
||||
#endif
|
||||
m_context[v.name] = m_xpath_man->extract_one(v);
|
||||
}
|
||||
|
||||
void operator() (const StructBlock& v) {
|
||||
#if defined(APPLY_VERBOSE)
|
||||
std::cout << "\tstruct " << v.name << '\n';
|
||||
for (auto& xpath : v.xpaths) {
|
||||
std::cout << "\t\t" << xpath << '\n';
|
||||
}
|
||||
#endif
|
||||
|
||||
mstch::map submap;
|
||||
for (const auto& xpath : v.xpaths) {
|
||||
submap[xpath.name] = m_xpath_man->extract_one(xpath);
|
||||
}
|
||||
}
|
||||
|
||||
mstch::map&& steal_context() { return std::move(m_context); }
|
||||
|
||||
private:
|
||||
VariablesCSP m_variables;
|
||||
XPathManagerCSP m_xpath_man;
|
||||
mstch::map m_context;
|
||||
};
|
||||
|
||||
class NodeVisitor : public boost::static_visitor<> {
|
||||
public:
|
||||
explicit NodeVisitor (HtmlPoolBaseSP html_pool) :
|
||||
m_xpath_man(html_pool)
|
||||
{
|
||||
assert(html_pool);
|
||||
}
|
||||
|
||||
void operator() (const std::vector<ScrapNode>& v) {
|
||||
for (auto& itm : v) {
|
||||
boost::apply_visitor(*this, itm);
|
||||
}
|
||||
}
|
||||
|
||||
void operator() (const FromBlock& v) {
|
||||
#if defined(APPLY_VERBOSE)
|
||||
std::cout << "--- FromBlock\n";
|
||||
std::cout << "\tsource: " << v.source.value << ", type " << static_cast<int>(v.source.type) << '\n';
|
||||
#endif
|
||||
|
||||
for (auto& xpath : v.xpaths) {
|
||||
#if defined(APPLY_VERBOSE)
|
||||
std::cout << '\t' << xpath << '\n';
|
||||
#endif
|
||||
m_variables.add_xpath(v.source, xpath);
|
||||
}
|
||||
}
|
||||
|
||||
void operator() (const ApplyBlock& v) {
|
||||
#if defined(APPLY_VERBOSE)
|
||||
std::cout << "--- ApplyBlock\n";
|
||||
std::cout << "\tmustache model: " << v.mustache_model << '\n';
|
||||
std::cout << "\tsource: " << v.source.value << ", type " << static_cast<int>(v.source.type) << '\n';
|
||||
#endif
|
||||
|
||||
auto struct_visitor = StructVisitor(
|
||||
VariablesCSP(&m_variables),
|
||||
XPathManagerCSP(&m_xpath_man)
|
||||
);
|
||||
|
||||
for (auto& xpath : v.xpaths) {
|
||||
boost::apply_visitor(struct_visitor, xpath);
|
||||
}
|
||||
|
||||
ApplyBlockContext abctx;
|
||||
abctx.context = struct_visitor.steal_context();
|
||||
|
||||
m_apply_blocks.push_back(std::move(abctx));
|
||||
}
|
||||
|
||||
void operator() (const MustacheBlock& v) {
|
||||
#if defined(APPLY_VERBOSE)
|
||||
std::cout << "--- MustacheBlock\n";
|
||||
std::cout << "\tname: " << v.name << '\n';
|
||||
std::cout << "\tcontent: " << v.content << '\n';
|
||||
#endif
|
||||
|
||||
m_mustaches[v.name] = v.content;
|
||||
}
|
||||
|
||||
const MustacheMap& mustaches() const { return m_mustaches; }
|
||||
const Variables& variables() const { return m_variables; }
|
||||
const std::vector<ApplyBlockContext>& apply_blocks() const { return m_apply_blocks; }
|
||||
|
||||
private:
|
||||
MustacheMap m_mustaches;
|
||||
Variables m_variables;
|
||||
XPathManager m_xpath_man;
|
||||
std::vector<ApplyBlockContext> m_apply_blocks;
|
||||
};
|
||||
} //unnamed namespace
|
||||
|
||||
std::vector<std::string> apply (const ScrapNode& node, HtmlPoolBaseSP html_pool) {
|
||||
assert(html_pool);
|
||||
|
||||
NodeVisitor visitor(html_pool);
|
||||
boost::apply_visitor(visitor, node);
|
||||
|
||||
std::vector<std::string> retval;
|
||||
for (auto& apply_block : visitor.apply_blocks()) {
|
||||
std::string mustache_model_name = visitor.variables().resolve_string(apply_block.name);
|
||||
const std::string& mustache_model = visitor.mustaches().at(mustache_model_name);
|
||||
std::vector<SourceInfo> src_urls = visitor.variables().resolve_array(apply_block.target);
|
||||
|
||||
for (const auto& src_url : src_urls) {
|
||||
const auto html_id = html_pool->GetOrAdd(visitor.variables().resolve_string(src_url));
|
||||
const auto* const html = html_pool->GetByID(html_id);
|
||||
}
|
||||
}
|
||||
|
||||
return retval;
|
||||
}
|
||||
}} //namespace duck::sl
|
30
src/scraplang/apply.hpp
Normal file
30
src/scraplang/apply.hpp
Normal file
|
@ -0,0 +1,30 @@
|
|||
/* Copyright (C) 2015 Michele Santullo
|
||||
*
|
||||
* This file is part of DuckScraper.
|
||||
*
|
||||
* DuckScraper is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* DuckScraper is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with DuckScraper. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef idC73DBB42FB76433BAFC0B73EAC3B70FF
|
||||
#define idC73DBB42FB76433BAFC0B73EAC3B70FF
|
||||
|
||||
#include "scrap_node.hpp"
|
||||
#include "scraplang/html_pool_base.hpp"
|
||||
#include <string>
|
||||
|
||||
namespace duck { namespace sl {
|
||||
std::vector<std::string> apply (const ScrapNode& node, HtmlPoolBaseSP html_pool);
|
||||
}} //namespace duck::sl
|
||||
|
||||
#endif
|
|
@ -1,4 +1,4 @@
|
|||
/* Copyright (C) 2015 Michele Santullo
|
||||
/* Copyright (C) 2017 Michele Santullo
|
||||
*
|
||||
* This file is part of DuckScraper.
|
||||
*
|
||||
|
@ -19,22 +19,15 @@
|
|||
#ifndef id3875B5F868524EC3A1B83971D4A85777
|
||||
#define id3875B5F868524EC3A1B83971D4A85777
|
||||
|
||||
#include "element_types.hpp"
|
||||
#include <string>
|
||||
|
||||
namespace duck {
|
||||
enum ElementTypes {
|
||||
ElementType_String,
|
||||
ElementType_Integer,
|
||||
ElementType_Boolean,
|
||||
ElementType_Null,
|
||||
ElementType_Double
|
||||
};
|
||||
|
||||
struct element_def {
|
||||
namespace duck { namespace sl {
|
||||
struct ElementDef {
|
||||
std::string name;
|
||||
std::string xpath;
|
||||
ElementTypes type;
|
||||
};
|
||||
} //namespace duck
|
||||
}} //namespace duck::sl
|
||||
|
||||
#endif
|
32
src/scraplang/element_types.hpp
Normal file
32
src/scraplang/element_types.hpp
Normal file
|
@ -0,0 +1,32 @@
|
|||
/* Copyright (C) 2017 Michele Santullo
|
||||
*
|
||||
* This file is part of DuckScraper.
|
||||
*
|
||||
* DuckScraper is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* DuckScraper is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with DuckScraper. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef id1AC876186C4B48DD900399994C27A741
|
||||
#define id1AC876186C4B48DD900399994C27A741
|
||||
|
||||
namespace duck { namespace sl {
|
||||
enum ElementTypes {
|
||||
ElementType_String,
|
||||
ElementType_Integer,
|
||||
ElementType_Boolean,
|
||||
ElementType_Null,
|
||||
ElementType_Double
|
||||
};
|
||||
}} //namespace duck::sl
|
||||
|
||||
#endif
|
39
src/scraplang/html_pool_base.hpp
Normal file
39
src/scraplang/html_pool_base.hpp
Normal file
|
@ -0,0 +1,39 @@
|
|||
/* Copyright (C) 2015 Michele Santullo
|
||||
*
|
||||
* This file is part of DuckScraper.
|
||||
*
|
||||
* DuckScraper is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* DuckScraper is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with DuckScraper. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef idDD58822D7D8B4AA7A0DD16B1CDEF413E
|
||||
#define idDD58822D7D8B4AA7A0DD16B1CDEF413E
|
||||
|
||||
#include "implem/ResourcePool.hpp"
|
||||
#include "kakoune/safe_ptr.hh"
|
||||
|
||||
namespace duck { namespace sl {
|
||||
namespace implem {
|
||||
typedef duckutil::ResourcePool<std::string, std::string> HtmlPoolBase;
|
||||
} //namespace implem
|
||||
|
||||
class HtmlPoolBase : public implem::HtmlPoolBase, public Kakoune::SafeCountable {
|
||||
public:
|
||||
using implem::HtmlPoolBase::HtmlPoolBase;
|
||||
using implem::HtmlPoolBase::operator=;
|
||||
};
|
||||
|
||||
typedef Kakoune::SafePtr<HtmlPoolBase> HtmlPoolBaseSP;
|
||||
}} //namespace duck::sl
|
||||
|
||||
#endif
|
121
src/scraplang/implem/ResourcePool.hpp
Normal file
121
src/scraplang/implem/ResourcePool.hpp
Normal file
|
@ -0,0 +1,121 @@
|
|||
/* Copyright (C) 2015 Michele Santullo
|
||||
*
|
||||
* This file is part of DuckScraper.
|
||||
*
|
||||
* DuckScraper is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* DuckScraper is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with DuckScraper. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef id1A180A0568E84FD88D57FAB82C69600E
|
||||
#define id1A180A0568E84FD88D57FAB82C69600E
|
||||
|
||||
#include "SaltedIndex.hpp"
|
||||
#include <map>
|
||||
#include <cstdint>
|
||||
#include <cassert>
|
||||
#include <type_traits>
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
|
||||
namespace duckutil {
|
||||
namespace Implem {
|
||||
template <typename Res, typename Name, typename IDT>
|
||||
class ResourceResNameWrapper {
|
||||
public:
|
||||
typedef IDT IDType;
|
||||
|
||||
ResourceResNameWrapper ( const Name* parName, Res* parRes, IDType parID );
|
||||
ResourceResNameWrapper ( const ResourceResNameWrapper& ) = delete;
|
||||
~ResourceResNameWrapper ( void ) { assert(0 == m_refcount); }
|
||||
|
||||
ResourceResNameWrapper& operator= (const ResourceResNameWrapper&) = delete;
|
||||
|
||||
Res& GetResource ( void ) { return *m_resource; }
|
||||
const Res& GetResource ( void ) const { return *m_resource; }
|
||||
const Name& GetName ( void ) const { return *m_name; }
|
||||
void Retain ( void ) { ++m_refcount; }
|
||||
bool Release ( void ) { assert(m_refcount > 0); --m_refcount; return (0 == m_refcount); }
|
||||
uint32_t GetRefCount ( void ) const { return m_refcount; }
|
||||
uint32_t GetResourceID ( void ) const { return m_resId; }
|
||||
bool IsEmpty ( void ) const { return NULL == m_resource; }
|
||||
void Reset ( void ) { m_resource = NULL; m_name = NULL; m_refcount = m_resId = 0; }
|
||||
void DropRefCount ( void ) { m_refcount = 0; }
|
||||
|
||||
bool operator== ( const ResourceResNameWrapper& parOther ) const { return (GetName() == parOther.GetName()); }
|
||||
bool operator!= ( const ResourceResNameWrapper& parOther ) const { return not (GetName() == parOther.GetName()); }
|
||||
bool operator< ( const ResourceResNameWrapper& parOther ) const { return (GetName() < parOther.GetName()); }
|
||||
bool operator> ( const ResourceResNameWrapper& parOther ) const { return (parOther.GetName() < GetName()); }
|
||||
bool operator>= ( const ResourceResNameWrapper& parOther ) const { return not (GetName() < parOther.GetName()); }
|
||||
bool operator<= ( const ResourceResNameWrapper& parOther ) const { return not (parOther.GetName() < GetName()); }
|
||||
|
||||
private:
|
||||
Res* m_resource;
|
||||
const Name* m_name;
|
||||
uint16_t m_refcount;
|
||||
uint16_t m_resId;
|
||||
};
|
||||
} //namespace Implem
|
||||
|
||||
template <typename Res, typename Name, typename Object=Name>
|
||||
class ResourcePool {
|
||||
public:
|
||||
typedef uint32_t IDType;
|
||||
typedef Name ResourceNameType;
|
||||
private:
|
||||
typedef Implem::ResourceResNameWrapper<Res, Name, IDType> ResourceWrapperType;
|
||||
typedef std::map<Name, ResourceWrapperType*> ResourceMapType;
|
||||
typedef std::vector<ResourceWrapperType*> ResourceVectorType;
|
||||
protected:
|
||||
typedef typename std::conditional<std::is_fundamental<ResourceNameType>::value, ResourceNameType, const ResourceNameType&>::type ResourceNameParamType;
|
||||
typedef typename std::conditional<std::is_fundamental<Object>::value, Object, const Object&>::type ResourceObjectParameterType;
|
||||
public:
|
||||
typedef Res ResourceType;
|
||||
typedef Object ResourceObjectType;
|
||||
|
||||
ResourcePool ( void ) = default;
|
||||
ResourcePool ( const ResourcePool& ) = delete;
|
||||
virtual ~ResourcePool ( void ) = default;
|
||||
|
||||
ResourcePool& operator= (const ResourcePool&) = delete;
|
||||
|
||||
ResourceType* GetByName ( ResourceNameParamType parName );
|
||||
const ResourceType* GetByName ( ResourceNameParamType parName ) const;
|
||||
IDType GetOrAdd ( ResourceObjectParameterType parObjectName );
|
||||
ResourceType* GetByID ( IDType parID );
|
||||
const ResourceType* GetByID ( IDType parID ) const;
|
||||
bool IsEmpty ( void ) const;
|
||||
|
||||
IDType AddResource ( ResourceObjectParameterType parRes );
|
||||
void ReleaseResource ( IDType parRes );
|
||||
void ReleaseResourceByName ( ResourceNameParamType parName );
|
||||
|
||||
void Dispose ( void ) noexcept;
|
||||
|
||||
protected:
|
||||
void Dispose_IgnoreReferenceCount ( void );
|
||||
|
||||
virtual ResourceType* OnResourceLoad ( ResourceObjectParameterType parRes ) = 0;
|
||||
virtual void OnResourceDestroy ( ResourceNameParamType parName, ResourceType* parRes ) noexcept = 0;
|
||||
virtual ResourceNameType GetResourceNameFromResourceObject ( ResourceObjectParameterType parRes ) = 0;
|
||||
|
||||
private:
|
||||
bool ReleaseResource ( typename ResourceMapType::iterator& parMapRes, typename ResourceVectorType::iterator& parVecRes );
|
||||
|
||||
ResourceMapType m_mapContainer; //For accesses by name
|
||||
ResourceVectorType m_linearContainer; //For accesses by index
|
||||
};
|
||||
} //namespace duckutil
|
||||
|
||||
#include "ResourcePool.inl"
|
||||
|
||||
#endif
|
243
src/scraplang/implem/ResourcePool.inl
Normal file
243
src/scraplang/implem/ResourcePool.inl
Normal file
|
@ -0,0 +1,243 @@
|
|||
/* Copyright (C) 2015 Michele Santullo
|
||||
*
|
||||
* This file is part of DuckScraper.
|
||||
*
|
||||
* DuckScraper is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* DuckScraper is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with DuckScraper. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
namespace duckutil {
|
||||
namespace Implem {
|
||||
///---------------------------------------------------------------------
|
||||
///---------------------------------------------------------------------
|
||||
template <typename Res, typename Name, typename IDT>
|
||||
ResourceResNameWrapper<Res, Name, IDT>::ResourceResNameWrapper (const Name* parName, Res* parRes, IDType parID) {
|
||||
assert(nullptr != parRes);
|
||||
assert(nullptr != parName);
|
||||
|
||||
m_resource = parRes;
|
||||
m_name = parName;
|
||||
m_refcount = 0;
|
||||
m_resId = static_cast<uint16_t>(parID);
|
||||
}
|
||||
|
||||
///---------------------------------------------------------------------
|
||||
///---------------------------------------------------------------------
|
||||
template <typename V>
|
||||
inline void TrimTrailingNulls (V& parVector) {
|
||||
const std::size_t nullsCount = std::find_if(parVector.rbegin(), parVector.rend(), std::bind1st(std::not_equal_to<typename V::value_type>(), nullptr)) - parVector.rbegin();
|
||||
assert(nullsCount <= parVector.size());
|
||||
if (nullsCount) {
|
||||
assert(nullptr == parVector.back());
|
||||
parVector.resize(parVector.size() - nullsCount);
|
||||
}
|
||||
}
|
||||
} //namespace Implem
|
||||
|
||||
///-------------------------------------------------------------------------
|
||||
///-------------------------------------------------------------------------
|
||||
template <typename Res, typename Name, typename Object>
|
||||
typename ResourcePool<Res, Name, Object>::IDType ResourcePool<Res, Name, Object>::GetOrAdd (ResourceObjectParameterType parObjectName) {
|
||||
const ResourceNameType name = GetResourceNameFromResourceObject(parObjectName);
|
||||
typename ResourceMapType::const_iterator itFind = m_mapContainer.find(name);
|
||||
IDType retVal;
|
||||
if (m_mapContainer.end() == itFind) {
|
||||
retVal = AddResource(parObjectName);
|
||||
}
|
||||
else {
|
||||
typename ResourceVectorType::const_iterator itVecFind = std::find(m_linearContainer.begin(), m_linearContainer.end(), itFind->second);
|
||||
assert(m_linearContainer.end() != itVecFind);
|
||||
retVal = static_cast<IDType>(itVecFind - m_linearContainer.begin() + 1);
|
||||
}
|
||||
return retVal;
|
||||
}
|
||||
|
||||
///-------------------------------------------------------------------------
|
||||
///-------------------------------------------------------------------------
|
||||
template <typename Res, typename Name, typename Object>
|
||||
typename ResourcePool<Res, Name, Object>::ResourceType* ResourcePool<Res, Name, Object>::GetByName (ResourceNameParamType parName) {
|
||||
typename ResourceMapType::iterator itFind = m_mapContainer.find(parName);
|
||||
if (m_mapContainer.end() == itFind)
|
||||
return nullptr;
|
||||
else
|
||||
return itFind->second;
|
||||
}
|
||||
|
||||
///-------------------------------------------------------------------------
|
||||
///-------------------------------------------------------------------------
|
||||
template <typename Res, typename Name, typename Object>
|
||||
const typename ResourcePool<Res, Name, Object>::ResourceType* ResourcePool<Res, Name, Object>::GetByName (ResourceNameParamType parName) const {
|
||||
typename ResourceMapType::const_iterator itFind = m_mapContainer.find(parName);
|
||||
if (m_mapContainer.end() == itFind)
|
||||
return nullptr;
|
||||
else
|
||||
return itFind->second;
|
||||
}
|
||||
|
||||
///-------------------------------------------------------------------------
|
||||
///-------------------------------------------------------------------------
|
||||
template <typename Res, typename Name, typename Object>
|
||||
typename ResourcePool<Res, Name, Object>::ResourceType* ResourcePool<Res, Name, Object>::GetByID (IDType parID) {
|
||||
assert(parID > 0);
|
||||
if (0 == parID)
|
||||
return nullptr;
|
||||
|
||||
const auto index = static_cast<std::size_t>(parID - 1);
|
||||
if (index < m_linearContainer.size()) {
|
||||
ResourceWrapperType* res = m_linearContainer[index];
|
||||
return &res->GetResource();
|
||||
}
|
||||
else {
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
///-------------------------------------------------------------------------
|
||||
///-------------------------------------------------------------------------
|
||||
template <typename Res, typename Name, typename Object>
|
||||
const typename ResourcePool<Res, Name, Object>::ResourceType* ResourcePool<Res, Name, Object>::GetByID (IDType parID) const {
|
||||
assert(parID > 0);
|
||||
if (0 == parID)
|
||||
return nullptr;
|
||||
|
||||
const auto index = static_cast<std::size_t>(parID - 1);
|
||||
if (index < m_linearContainer.size()) {
|
||||
ResourceWrapperType* res = m_linearContainer[index];
|
||||
return &res->GetResource();
|
||||
}
|
||||
else {
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
///-------------------------------------------------------------------------
|
||||
///-------------------------------------------------------------------------
|
||||
template <typename Res, typename Name, typename Object>
|
||||
typename ResourcePool<Res, Name, Object>::IDType ResourcePool<Res, Name, Object>::AddResource (ResourceObjectParameterType parRes) {
|
||||
const ResourceNameType name = GetResourceNameFromResourceObject(parRes);
|
||||
typename ResourceMapType::iterator itPreExisting = m_mapContainer.find(name);
|
||||
if (m_mapContainer.end() != itPreExisting) {
|
||||
// if (itPreExisting->IsEmpty()) {
|
||||
// OnResourceReload(name);
|
||||
assert(nullptr != itPreExisting->second);
|
||||
assert(not itPreExisting->second->IsEmpty());
|
||||
itPreExisting->second->Retain();
|
||||
return itPreExisting->second->GetResourceID();
|
||||
}
|
||||
else {
|
||||
assert(m_mapContainer.end() == m_mapContainer.find(name));
|
||||
ResourceType* const newRes = OnResourceLoad(parRes);
|
||||
if (newRes) {
|
||||
std::pair<typename ResourceMapType::iterator, bool> newIt = m_mapContainer.insert(std::pair<ResourceNameType, ResourceWrapperType*>(name, nullptr));
|
||||
|
||||
IDType newID = static_cast<IDType>(m_linearContainer.size() + 1);
|
||||
ResourceWrapperType* const newWrapper = new ResourceWrapperType(&newIt.first->first, newRes, newID);
|
||||
assert(nullptr != newWrapper);
|
||||
m_linearContainer.push_back(newWrapper);
|
||||
newIt.first->second = newWrapper;
|
||||
newWrapper->Retain();
|
||||
return newID;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
///-------------------------------------------------------------------------
|
||||
///-------------------------------------------------------------------------
|
||||
template <typename Res, typename Name, typename Object>
|
||||
void ResourcePool<Res, Name, Object>::ReleaseResource (IDType parRes) {
|
||||
assert(parRes > 0);
|
||||
assert(static_cast<std::size_t>(parRes) <= m_linearContainer.size());
|
||||
|
||||
typename ResourceVectorType::iterator rele = m_linearContainer.begin() + (parRes - 1);
|
||||
assert(nullptr != *rele);
|
||||
assert(rele - m_linearContainer.begin() == static_cast<int>(parRes - 1));
|
||||
|
||||
typename ResourceMapType::iterator relemap = m_mapContainer.find((*rele)->GetName());
|
||||
assert(m_mapContainer.end() != relemap);
|
||||
|
||||
if (ReleaseResource(relemap, rele)) {
|
||||
delete relemap->second;
|
||||
m_mapContainer.erase(relemap);
|
||||
Implem::TrimTrailingNulls(m_linearContainer);
|
||||
}
|
||||
}
|
||||
|
||||
///-------------------------------------------------------------------------
|
||||
///-------------------------------------------------------------------------
|
||||
template <typename Res, typename Name, typename Object>
|
||||
void ResourcePool<Res, Name, Object>::ReleaseResourceByName (ResourceNameParamType parName) {
|
||||
typename ResourceMapType::iterator rele = m_mapContainer.find(parName);
|
||||
assert(m_mapContainer.end() != rele);
|
||||
|
||||
const IDType resId = rele->second->GetResourceID();
|
||||
assert(static_cast<std::size_t>(resId) <= m_linearContainer.size());
|
||||
assert(resId > 0);
|
||||
|
||||
if (ReleaseResource(rele, m_linearContainer.begin() + (resId - 1))) {
|
||||
delete rele->second;
|
||||
m_mapContainer.erase(rele);
|
||||
Implem::TrimTrailingNulls(m_linearContainer);
|
||||
}
|
||||
}
|
||||
|
||||
///-------------------------------------------------------------------------
|
||||
///-------------------------------------------------------------------------
|
||||
template <typename Res, typename Name, typename Object>
|
||||
bool ResourcePool<Res, Name, Object>::ReleaseResource (typename ResourceMapType::iterator& parMapRes, typename ResourceVectorType::iterator& parVecRes) {
|
||||
assert(parMapRes->second == *parVecRes);
|
||||
assert(nullptr != *parVecRes);
|
||||
ResourceWrapperType& currRes = **parVecRes;
|
||||
|
||||
assert(not currRes.IsEmpty());
|
||||
if (not currRes.IsEmpty()) {
|
||||
if (currRes.Release()) {
|
||||
this->OnResourceDestroy(currRes.GetName(), &currRes.GetResource());
|
||||
currRes.Reset();
|
||||
*parVecRes = nullptr;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
///-------------------------------------------------------------------------
|
||||
///-------------------------------------------------------------------------
|
||||
template <typename Res, typename Name, typename Object>
|
||||
bool ResourcePool<Res, Name, Object>::IsEmpty() const {
|
||||
return m_mapContainer.empty() and m_linearContainer.empty();
|
||||
}
|
||||
|
||||
///-------------------------------------------------------------------------
|
||||
///-------------------------------------------------------------------------
|
||||
template <typename Res, typename Name, typename Object>
|
||||
void ResourcePool<Res, Name, Object>::Dispose() noexcept {
|
||||
for (auto& currItem : m_linearContainer) {
|
||||
if (nullptr != currItem)
|
||||
this->OnResourceDestroy(currItem->GetName(), &currItem->GetResource());
|
||||
delete currItem;
|
||||
}
|
||||
m_linearContainer.clear();
|
||||
m_mapContainer.clear();
|
||||
}
|
||||
|
||||
///-------------------------------------------------------------------------
|
||||
///-------------------------------------------------------------------------
|
||||
template <typename Res, typename Name, typename Object>
|
||||
void ResourcePool<Res, Name, Object>::Dispose_IgnoreReferenceCount() {
|
||||
for (auto& currItem : m_linearContainer) {
|
||||
currItem->DropRefCount();
|
||||
}
|
||||
Dispose();
|
||||
}
|
||||
} //namespace duckcore
|
68
src/scraplang/implem/SaltedIndex.hpp
Normal file
68
src/scraplang/implem/SaltedIndex.hpp
Normal file
|
@ -0,0 +1,68 @@
|
|||
/* Copyright (C) 2015 Michele Santullo
|
||||
*
|
||||
* This file is part of DuckScraper.
|
||||
*
|
||||
* DuckScraper is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* DuckScraper is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with DuckScraper. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef id8D3B62D447574A23A82F8E9C60A629BD
|
||||
#define id8D3B62D447574A23A82F8E9C60A629BD
|
||||
|
||||
#include <cstddef>
|
||||
|
||||
namespace duckutil {
|
||||
template <typename T, std::size_t IndexBitSize, std::size_t SaltBitSize=sizeof(T)*8-IndexBitSize>
|
||||
class SaltedIndex {
|
||||
static_assert((SaltBitSize + IndexBitSize) == sizeof(T) * 8, "Type size is too small");
|
||||
static_assert(SaltBitSize > 0, "Invalid salt size");
|
||||
static_assert(IndexBitSize > 0, "Invalid index size");
|
||||
public:
|
||||
enum {
|
||||
SaltSize = SaltBitSize,
|
||||
IndexSize = IndexBitSize,
|
||||
MaxSalt = (1 << SaltBitSize) - 1,
|
||||
MaxIndex = (1 << IndexBitSize) - 1
|
||||
};
|
||||
|
||||
SaltedIndex ( void );
|
||||
SaltedIndex ( const SaltedIndex& parOther );
|
||||
explicit SaltedIndex ( T parIndex );
|
||||
SaltedIndex ( T parSalt, T parIndex );
|
||||
~SaltedIndex ( void );
|
||||
|
||||
T GetSaltOnly ( void ) const { return m_salt; }
|
||||
T GetIndexOnly ( void ) const { return m_index; }
|
||||
T GetSaltedIndex ( void ) const { return m_saltedIndex; }
|
||||
void SetSalt ( T parSalt );
|
||||
void SetIndex ( T parIndex );
|
||||
T IncreaseSalt ( void );
|
||||
|
||||
bool operator== ( const SaltedIndex& parOther ) const { return GetSaltedIndex() == parOther.GetSaltedIndex(); }
|
||||
bool operator!= ( const SaltedIndex& parOther ) const { return GetSaltedIndex() != parOther.GetSaltedIndex(); }
|
||||
bool operator< ( const SaltedIndex& parOther ) const { return GetSaltedIndex() < parOther.GetSaltedIndex(); }
|
||||
|
||||
private:
|
||||
union {
|
||||
struct {
|
||||
T m_index : IndexBitSize;
|
||||
T m_salt : SaltBitSize;
|
||||
};
|
||||
T m_saltedIndex;
|
||||
};
|
||||
};
|
||||
} //namespace duckutil
|
||||
|
||||
#include "SaltedIndex.inl"
|
||||
|
||||
#endif
|
86
src/scraplang/implem/SaltedIndex.inl
Normal file
86
src/scraplang/implem/SaltedIndex.inl
Normal file
|
@ -0,0 +1,86 @@
|
|||
/* Copyright (C) 2015 Michele Santullo
|
||||
*
|
||||
* This file is part of DuckScraper.
|
||||
*
|
||||
* DuckScraper is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* DuckScraper is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with DuckScraper. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
namespace duckutil {
|
||||
///-------------------------------------------------------------------------
|
||||
///-------------------------------------------------------------------------
|
||||
template <typename T, std::size_t IndexBitSize, std::size_t SaltBitSize>
|
||||
SaltedIndex<T, IndexBitSize, SaltBitSize>::SaltedIndex() :
|
||||
m_saltedIndex(0)
|
||||
{
|
||||
}
|
||||
|
||||
///-------------------------------------------------------------------------
|
||||
///-------------------------------------------------------------------------
|
||||
template <typename T, std::size_t IndexBitSize, std::size_t SaltBitSize>
|
||||
SaltedIndex<T, IndexBitSize, SaltBitSize>::SaltedIndex (const SaltedIndex& parOther) :
|
||||
m_saltedIndex(parOther.GetSaltedIndex())
|
||||
{
|
||||
}
|
||||
|
||||
///-------------------------------------------------------------------------
|
||||
///-------------------------------------------------------------------------
|
||||
template <typename T, std::size_t IndexBitSize, std::size_t SaltBitSize>
|
||||
SaltedIndex<T, IndexBitSize, SaltBitSize>::SaltedIndex (T parIndex) :
|
||||
m_saltedIndex(parIndex)
|
||||
{
|
||||
Assert(m_saltedIndex <= MaxIndex);
|
||||
Assert(m_salt == 0);
|
||||
}
|
||||
|
||||
///-------------------------------------------------------------------------
|
||||
///-------------------------------------------------------------------------
|
||||
template <typename T, std::size_t IndexBitSize, std::size_t SaltBitSize>
|
||||
SaltedIndex<T, IndexBitSize, SaltBitSize>::SaltedIndex (T parSalt, T parIndex) :
|
||||
m_index(parIndex),
|
||||
m_salt(parSalt)
|
||||
{
|
||||
Assert(parSalt <= MaxSalt);
|
||||
Assert(parIndex <= MaxIndex);
|
||||
}
|
||||
|
||||
///-------------------------------------------------------------------------
|
||||
///-------------------------------------------------------------------------
|
||||
template <typename T, std::size_t IndexBitSize, std::size_t SaltBitSize>
|
||||
SaltedIndex<T, IndexBitSize, SaltBitSize>::~SaltedIndex() {
|
||||
}
|
||||
|
||||
///-------------------------------------------------------------------------
|
||||
///-------------------------------------------------------------------------
|
||||
template <typename T, std::size_t IndexBitSize, std::size_t SaltBitSize>
|
||||
void SaltedIndex<T, IndexBitSize, SaltBitSize>::SetSalt (T parSalt) {
|
||||
Assert(parSalt <= MaxSalt);
|
||||
m_salt = parSalt;
|
||||
}
|
||||
|
||||
///-------------------------------------------------------------------------
|
||||
///-------------------------------------------------------------------------
|
||||
template <typename T, std::size_t IndexBitSize, std::size_t SaltBitSize>
|
||||
void SaltedIndex<T, IndexBitSize, SaltBitSize>::SetIndex (T parIndex) {
|
||||
Assert(parIndex <= MaxIndex);
|
||||
m_index = parIndex;
|
||||
}
|
||||
|
||||
///-------------------------------------------------------------------------
|
||||
///-------------------------------------------------------------------------
|
||||
template <typename T, std::size_t IndexBitSize, std::size_t SaltBitSize>
|
||||
T SaltedIndex<T, IndexBitSize, SaltBitSize>::IncreaseSalt() {
|
||||
Assert(m_salt < MaxSalt);
|
||||
++m_salt;
|
||||
}
|
||||
} //namespace duckutil
|
168
src/scraplang/parse.cpp
Normal file
168
src/scraplang/parse.cpp
Normal file
|
@ -0,0 +1,168 @@
|
|||
/* Copyright (C) 2017 Michele Santullo
|
||||
*
|
||||
* This file is part of DuckScraper.
|
||||
*
|
||||
* DuckScraper is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* DuckScraper is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with DuckScraper. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "parse.hpp"
|
||||
#include "element_def.hpp"
|
||||
#include <boost/spirit/include/qi.hpp>
|
||||
#include <boost/spirit/include/phoenix_stl.hpp>
|
||||
#include <boost/spirit/include/phoenix_fusion.hpp>
|
||||
#include <boost/fusion/adapted/struct.hpp>
|
||||
#include <boost/fusion/adapted/std_pair.hpp>
|
||||
#include <utility>
|
||||
#if !defined(NDEBUG)
|
||||
# include <iostream>
|
||||
#endif
|
||||
#include <boost/variant/apply_visitor.hpp>
|
||||
#include <stdexcept>
|
||||
|
||||
namespace qi = boost::spirit::qi;
|
||||
namespace sp = boost::spirit;
|
||||
|
||||
BOOST_FUSION_ADAPT_STRUCT(
|
||||
duck::sl::SourceInfo,
|
||||
(std::string, value)
|
||||
(duck::sl::SourceInfo::Type, type)
|
||||
)
|
||||
BOOST_FUSION_ADAPT_STRUCT(
|
||||
duck::sl::FromBlock,
|
||||
(duck::sl::SourceInfo, source)
|
||||
(std::vector<duck::sl::XPathElement>, xpaths)
|
||||
)
|
||||
BOOST_FUSION_ADAPT_STRUCT(
|
||||
duck::sl::StructBlock,
|
||||
(std::string, name)
|
||||
(std::vector<duck::sl::XPathElement>, xpaths)
|
||||
)
|
||||
BOOST_FUSION_ADAPT_STRUCT(
|
||||
duck::sl::ApplyBlock,
|
||||
(std::string, mustache_model)
|
||||
(duck::sl::SourceInfo, source)
|
||||
(std::vector<duck::sl::StructItem>, xpaths)
|
||||
)
|
||||
BOOST_FUSION_ADAPT_STRUCT(
|
||||
duck::sl::MustacheBlock,
|
||||
(std::string, name)
|
||||
(std::string, content)
|
||||
)
|
||||
BOOST_FUSION_ADAPT_STRUCT(
|
||||
duck::sl::XPathElement,
|
||||
(std::string, name)
|
||||
(std::optional<std::string>, def_val)
|
||||
(std::string, xpath)
|
||||
)
|
||||
|
||||
namespace duck { namespace sl {
|
||||
namespace {
|
||||
struct ElementTypeSymbol : qi::symbols<char, ElementTypes> {
|
||||
ElementTypeSymbol() {
|
||||
add
|
||||
("string", ElementType_String)
|
||||
("integer", ElementType_Integer)
|
||||
("boolean", ElementType_Boolean)
|
||||
("null", ElementType_Null)
|
||||
("double", ElementType_Double)
|
||||
;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename I, typename Skipper>
|
||||
class ScrapGrammar : public qi::grammar<I, std::vector<ScrapNode>(), Skipper> {
|
||||
public:
|
||||
ScrapGrammar() : ScrapGrammar::base_type(start) {
|
||||
using qi::char_;
|
||||
using qi::lexeme;
|
||||
using qi::alpha;
|
||||
using qi::alnum;
|
||||
using qi::graph;
|
||||
using qi::attr;
|
||||
using qi::eol;
|
||||
using qi::eoi;
|
||||
using qi::lit;
|
||||
using qi::string;
|
||||
using qi::as_string;
|
||||
using qi::no_skip;
|
||||
|
||||
start = *eol >> (from_block | apply_block | mustache_block) % +eol >> *eol >> eoi;
|
||||
from_block = lit("from") >> source_info >> +eol >> (xpath_assignment % +eol) >> +eol >> "end";
|
||||
source_info = (url >> attr(SourceInfo::URL)) | (mustache_like_token >> attr(SourceInfo::Token));
|
||||
url = -(+alpha >> string("://")) >> alpha >> *graph;
|
||||
mustache_like_token = "{{" >> identifier >> "}}";
|
||||
quoted_string %= lexeme['"' >> +(char_ - '"') >> '"'];
|
||||
xpath_assignment = identifier >> -(lit("default") >> '(' >> quoted_string >> ')') >> "=" >> +graph;
|
||||
identifier %= lexeme[(alpha | char_('_')) >> *(alnum | char_('_'))];
|
||||
|
||||
apply_block = lit("apply") >> mustache_like_token >> "to" >> source_info >> +eol >>
|
||||
((xpath_assignment | struct_block) % +eol) >> +eol >> "end";
|
||||
struct_block = "struct" >> identifier >> +eol >> (xpath_assignment % +eol) >> +eol >> "end";
|
||||
|
||||
mustache_block %= as_string[lit("==") >> identifier] >> eol >>
|
||||
as_string[no_skip[+(!lit("==end") >> char_)]] >> "==end";
|
||||
}
|
||||
|
||||
private:
|
||||
template <typename F>
|
||||
using RuleType = qi::rule<I, F, Skipper>;
|
||||
|
||||
RuleType<std::vector<ScrapNode>()> start;
|
||||
RuleType<FromBlock()> from_block;
|
||||
RuleType<std::string()> url;
|
||||
RuleType<std::string()> mustache_like_token;
|
||||
RuleType<std::string()> quoted_string;
|
||||
RuleType<XPathElement()> xpath_assignment;
|
||||
RuleType<std::string()> identifier;
|
||||
RuleType<SourceInfo()> source_info;
|
||||
RuleType<ApplyBlock()> apply_block;
|
||||
RuleType<StructBlock()> struct_block;
|
||||
RuleType<MustacheBlock()> mustache_block;
|
||||
};
|
||||
} //unnamed namespace
|
||||
|
||||
std::vector<ScrapNode> parse (const std::string& parData) {
|
||||
ScrapGrammar<std::string::const_iterator, boost::spirit::qi::ascii::blank_type> gramm;
|
||||
auto it_start = parData.cbegin();
|
||||
|
||||
std::vector<ScrapNode> retval;
|
||||
const bool ok = qi::phrase_parse(it_start, parData.cend(), gramm, sp::ascii::blank, retval);
|
||||
|
||||
std::cout << "parse ok: " << std::boolalpha << ok << '\n';
|
||||
std::cout << "end == it: " << std::boolalpha << (parData.cend() == it_start) << '\n';
|
||||
std::cout << "begin == it: " << std::boolalpha << (parData.cbegin() == it_start) << '\n';
|
||||
std::cout << "parse distance: " << std::distance(parData.cbegin(), it_start) << '\n';
|
||||
std::cout << "all distance: " << std::distance(parData.cbegin(), parData.cend()) << " (size: " << parData.size() << ")\n";
|
||||
|
||||
if (parData.cend() != it_start or not ok) {
|
||||
throw std::runtime_error("Error parsing input script");
|
||||
}
|
||||
return retval;
|
||||
}
|
||||
|
||||
// std::vector<element_def> get_xpath_definitions (const ScrapNode& parAST) {
|
||||
// std::vector<element_def> retval;
|
||||
// implem::XPathVisitor xpath_vis(&retval);
|
||||
// boost::apply_visitor(xpath_vis, parAST);
|
||||
// return std::move(retval);
|
||||
// }
|
||||
//
|
||||
// void print_results (std::ostream& parOut, const ScrapNode& parAST, const std::vector<element_def>& parOutcome, const ResultList& parResList) {
|
||||
//#if !defined(NDEBUG)
|
||||
// std::cout << "print_results()...\n";
|
||||
//#endif
|
||||
// implem::ResultPrinter printer(&parOut, &parOutcome, & parResList);
|
||||
// boost::apply_visitor(printer, parAST);
|
||||
// }
|
||||
}} //namespace duck::sl
|
30
src/scraplang/parse.hpp
Normal file
30
src/scraplang/parse.hpp
Normal file
|
@ -0,0 +1,30 @@
|
|||
/* Copyright (C) 2017 Michele Santullo
|
||||
*
|
||||
* This file is part of DuckScraper.
|
||||
*
|
||||
* DuckScraper is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* DuckScraper is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with DuckScraper. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef idBE96C2D49C4C413888A79EAEB2B9C0FA
|
||||
#define idBE96C2D49C4C413888A79EAEB2B9C0FA
|
||||
|
||||
#include "scrap_node.hpp"
|
||||
#include <string>
|
||||
|
||||
namespace duck { namespace sl {
|
||||
std::vector<ScrapNode> parse ( const std::string& parData );
|
||||
//std::vector<element_def> get_xpath_definitions ( const ScrapNode& parAST );
|
||||
}} //namespace duck::sl
|
||||
|
||||
#endif
|
94
src/scraplang/scrap_node.hpp
Normal file
94
src/scraplang/scrap_node.hpp
Normal file
|
@ -0,0 +1,94 @@
|
|||
/* Copyright (C) 2015 Michele Santullo
|
||||
*
|
||||
* This file is part of DuckScraper.
|
||||
*
|
||||
* DuckScraper is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* DuckScraper is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with DuckScraper. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef id9919CCB09DDD429C8128632F13D370ED
|
||||
#define id9919CCB09DDD429C8128632F13D370ED
|
||||
|
||||
//#include "element_def.hpp"
|
||||
#include <boost/spirit/include/support_extended_variant.hpp>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <optional>
|
||||
#include <utility>
|
||||
|
||||
namespace duck { namespace sl {
|
||||
struct XPathElement {
|
||||
std::string name;
|
||||
std::optional<std::string> def_val;
|
||||
std::string xpath;
|
||||
};
|
||||
|
||||
struct SourceInfo {
|
||||
enum Type { URL, Token };
|
||||
|
||||
SourceInfo() = default;
|
||||
SourceInfo (std::string&& val) : value(std::move(val)), type(Token) {}
|
||||
SourceInfo (const std::string& val) : value(val), type(Token) {}
|
||||
|
||||
std::string value;
|
||||
Type type;
|
||||
};
|
||||
|
||||
struct FromBlock {
|
||||
SourceInfo source;
|
||||
std::vector<XPathElement> xpaths;
|
||||
};
|
||||
|
||||
struct StructBlock {
|
||||
std::string name;
|
||||
std::vector<XPathElement> xpaths;
|
||||
};
|
||||
|
||||
struct StructItem : boost::spirit::extended_variant<
|
||||
XPathElement,
|
||||
StructBlock
|
||||
> {
|
||||
StructItem() : base_type() {}
|
||||
StructItem (const XPathElement& value) : base_type(value) {}
|
||||
StructItem (const StructBlock& value) : base_type(value) {}
|
||||
using base_type::operator=;
|
||||
};
|
||||
|
||||
struct ApplyBlock {
|
||||
std::string mustache_model;
|
||||
SourceInfo source;
|
||||
std::vector<StructItem> xpaths;
|
||||
};
|
||||
|
||||
struct MustacheBlock {
|
||||
std::string name;
|
||||
std::string content;
|
||||
};
|
||||
|
||||
struct ScrapNode : boost::spirit::extended_variant<
|
||||
boost::recursive_wrapper<std::vector<ScrapNode>>,
|
||||
FromBlock,
|
||||
ApplyBlock,
|
||||
MustacheBlock
|
||||
> {
|
||||
ScrapNode() : base_type() {}
|
||||
ScrapNode (const std::vector<ScrapNode>& value) : base_type(value) {}
|
||||
ScrapNode (const FromBlock& value) : base_type(value) {}
|
||||
ScrapNode (const ApplyBlock& value) : base_type(value) {}
|
||||
ScrapNode (const MustacheBlock& value) : base_type(value) {}
|
||||
using base_type::operator=;
|
||||
};
|
||||
}} //namespace duck::sl
|
||||
|
||||
#endif
|
|
@ -1,75 +0,0 @@
|
|||
/* Copyright (C) 2015 Michele Santullo
|
||||
*
|
||||
* This file is part of DuckScraper.
|
||||
*
|
||||
* DuckScraper is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* DuckScraper is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with DuckScraper. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef id9919CCB09DDD429C8128632F13D370ED
|
||||
#define id9919CCB09DDD429C8128632F13D370ED
|
||||
|
||||
#include "scraplang_element.hpp"
|
||||
#include <boost/spirit/include/support_extended_variant.hpp>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
|
||||
namespace duck {
|
||||
struct ScrapNode;
|
||||
|
||||
namespace implem {
|
||||
struct map;
|
||||
struct array;
|
||||
|
||||
struct element : boost::spirit::extended_variant<
|
||||
boost::recursive_wrapper<map>,
|
||||
boost::recursive_wrapper<array>,
|
||||
std::string,
|
||||
int,
|
||||
double
|
||||
>
|
||||
{
|
||||
element ( void ) = default;
|
||||
element ( const map& parOther ) : base_type(parOther) {}
|
||||
element ( const array& parOther ) : base_type(parOther) {}
|
||||
element ( const std::string& parOther ) : base_type(parOther) {}
|
||||
element ( double parOther ) : base_type(parOther) {}
|
||||
element ( int parOther ) : base_type(parOther) {}
|
||||
};
|
||||
|
||||
struct map : std::map<std::string, element> {
|
||||
};
|
||||
|
||||
struct array : std::vector<element> {
|
||||
};
|
||||
|
||||
struct node_list {
|
||||
std::vector<ScrapNode> nodes;
|
||||
};
|
||||
} //namespace implem
|
||||
|
||||
struct ScrapNode : boost::spirit::extended_variant<
|
||||
element_def,
|
||||
implem::map,
|
||||
implem::node_list
|
||||
>
|
||||
{
|
||||
ScrapNode ( void ) = default;
|
||||
ScrapNode ( const element_def& parOther ) : base_type(parOther) {}
|
||||
ScrapNode ( const implem::map& parOther ) : base_type(parOther) {}
|
||||
ScrapNode ( const implem::node_list& parOther ) : base_type(parOther) {}
|
||||
};
|
||||
} //namespace duck
|
||||
|
||||
#endif
|
|
@ -1,119 +0,0 @@
|
|||
/* Copyright (C) 2015 Michele Santullo
|
||||
*
|
||||
* This file is part of DuckScraper.
|
||||
*
|
||||
* DuckScraper is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* DuckScraper is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with DuckScraper. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "scraplang.hpp"
|
||||
#include "scrapast.hpp"
|
||||
#include "scraplang_visit_xpath.hpp"
|
||||
#include <boost/spirit/include/qi.hpp>
|
||||
#include <boost/spirit/include/phoenix_stl.hpp>
|
||||
#include <boost/spirit/include/phoenix_fusion.hpp>
|
||||
#include <boost/fusion/adapted/struct.hpp>
|
||||
#include <boost/fusion/adapted/std_pair.hpp>
|
||||
#include <utility>
|
||||
|
||||
#include <boost/variant/apply_visitor.hpp>
|
||||
|
||||
namespace qi = boost::spirit::qi;
|
||||
namespace sp = boost::spirit;
|
||||
|
||||
BOOST_FUSION_ADAPT_STRUCT(
|
||||
duck::element_def,
|
||||
(std::string, name)
|
||||
(std::string, xpath)
|
||||
(duck::ElementTypes, type)
|
||||
)
|
||||
BOOST_FUSION_ADAPT_STRUCT(
|
||||
duck::implem::node_list,
|
||||
(std::vector<duck::ScrapNode>, nodes)
|
||||
)
|
||||
|
||||
namespace duck {
|
||||
namespace {
|
||||
struct ElementTypeSymbol : qi::symbols<char, ElementTypes> {
|
||||
ElementTypeSymbol() {
|
||||
add
|
||||
("string", ElementType_String)
|
||||
("integer", ElementType_Integer)
|
||||
("boolean", ElementType_Boolean)
|
||||
("null", ElementType_Null)
|
||||
("double", ElementType_Double)
|
||||
;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename I>
|
||||
struct ScrapGrammar : qi::grammar<I, ScrapNode(), sp::ascii::space_type> {
|
||||
ScrapGrammar() : ScrapGrammar::base_type(start) {
|
||||
using qi::lit;
|
||||
using qi::char_;
|
||||
using qi::lexeme;
|
||||
using qi::double_;
|
||||
using qi::int_;
|
||||
using qi::eps;
|
||||
|
||||
start = whole;
|
||||
whole = eps >> *xpath_definition >> -map;
|
||||
xpath_definition = identifier >> lit('=') >> string >> "as" >> data_type;
|
||||
identifier = (char_('a', 'z') | char_('A', 'Z') | '_') >> *(char_('a', 'z') | char_('A', 'Z') | '_' | char_('0', '9'));
|
||||
string %= lexeme['"' >> +(char_ - '"') >> '"'];
|
||||
map = lit('{') >> ((identifier >> lit('=') >> value) % lit(',')) >> lit('}');
|
||||
array = lit('[') >> *(value % lit(',')) >> lit(']');
|
||||
value = string | double_ | int_ | array | map | identifier;
|
||||
}
|
||||
|
||||
qi::rule<I, ScrapNode(), sp::ascii::space_type> start;
|
||||
qi::rule<I, implem::node_list(), sp::ascii::space_type> whole;
|
||||
qi::rule<I, element_def(), sp::ascii::space_type> xpath_definition;
|
||||
qi::rule<I, std::string(), sp::ascii::space_type> identifier;
|
||||
qi::rule<I, std::string(), sp::ascii::space_type> string;
|
||||
qi::rule<I, implem::map(), sp::ascii::space_type> map;
|
||||
qi::rule<I, implem::array(), sp::ascii::space_type> array;
|
||||
qi::rule<I, implem::element(), sp::ascii::space_type> value;
|
||||
ElementTypeSymbol data_type;
|
||||
};
|
||||
} //unnamed namespace
|
||||
|
||||
ScrapNodePtr parse_scraplang (const std::string& parData) {
|
||||
ScrapGrammar<std::string::const_iterator> gramm;
|
||||
ScrapNodePtr retval(new ScrapNode);
|
||||
auto it_start = parData.cbegin();
|
||||
|
||||
qi::phrase_parse(it_start, parData.cend(), gramm, sp::ascii::space, *retval);
|
||||
return std::move(retval);
|
||||
}
|
||||
|
||||
std::vector<element_def> get_xpath_definitions (const ScrapNode& parAST) {
|
||||
std::vector<element_def> retval;
|
||||
implem::XPathVisitor xpath_vis(&retval);
|
||||
boost::apply_visitor(xpath_vis, parAST);
|
||||
return std::move(retval);
|
||||
}
|
||||
|
||||
ScrapNodePtr::ScrapNodePtr (ScrapNode* parPtr) :
|
||||
m_ptr(parPtr)
|
||||
{
|
||||
}
|
||||
|
||||
ScrapNodePtr::ScrapNodePtr (ScrapNodePtr&& parOther) :
|
||||
m_ptr(std::move(parOther.m_ptr))
|
||||
{
|
||||
}
|
||||
|
||||
ScrapNodePtr::~ScrapNodePtr() noexcept {
|
||||
}
|
||||
} //namespace duck
|
0
src/scraplang/scraplang_print_results.cpp
Normal file
0
src/scraplang/scraplang_print_results.cpp
Normal file
29
src/scraplang/scraplang_print_results.hpp
Normal file
29
src/scraplang/scraplang_print_results.hpp
Normal file
|
@ -0,0 +1,29 @@
|
|||
#ifndef idB20734D678524FAA8AC94F2AB2FDAA94
|
||||
#define idB20734D678524FAA8AC94F2AB2FDAA94
|
||||
|
||||
#include "scrapast.hpp"
|
||||
#include <vector>
|
||||
|
||||
namespace duck {
|
||||
typedef std::vector<std::vector<std::pair<std::string, std::string>>> ResulList;
|
||||
struct element_def;
|
||||
|
||||
namespace implem {
|
||||
class ResultPrinter {
|
||||
public:
|
||||
typedef void result_type;
|
||||
|
||||
explicit ResultPrinter ( const std::vector<element_def>* parQueries, const ResultList* parResults );
|
||||
|
||||
void operator() ( const element_def& parElem );
|
||||
void operator() ( const implem::map& parMap );
|
||||
void operator() ( const node_list& parNodes );
|
||||
|
||||
private:
|
||||
const std::vector<element_def>* const m_queries;
|
||||
const ResulList* const m_results;
|
||||
};
|
||||
} //namespace implem
|
||||
} //namespace duck
|
||||
|
||||
#endif
|
43
src/scraplang/variables.cpp
Normal file
43
src/scraplang/variables.cpp
Normal file
|
@ -0,0 +1,43 @@
|
|||
/* Copyright (C) 2015 Michele Santullo
|
||||
*
|
||||
* This file is part of DuckScraper.
|
||||
*
|
||||
* DuckScraper is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* DuckScraper is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with DuckScraper. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "variables.hpp"
|
||||
#include "scrap_node.hpp"
|
||||
|
||||
namespace duck { namespace sl {
|
||||
Variables::Variables() = default;
|
||||
Variables::~Variables() = default;
|
||||
|
||||
std::string Variables::resolve_string (const SourceInfo& parName) const {
|
||||
assert(false);
|
||||
return std::string();
|
||||
}
|
||||
|
||||
std::vector<SourceInfo> Variables::resolve_array (const SourceInfo& parName) const {
|
||||
assert(false);
|
||||
return std::vector<SourceInfo>();
|
||||
}
|
||||
|
||||
void Variables::add_xpath (const SourceInfo& parSource, const XPathElement& parVal) {
|
||||
assert(false);
|
||||
}
|
||||
|
||||
void Variables::add_struct (const SourceInfo& parSource, const StructBlock& parVal) {
|
||||
assert(false);
|
||||
}
|
||||
}} //namespace duck::sl
|
49
src/scraplang/variables.hpp
Normal file
49
src/scraplang/variables.hpp
Normal file
|
@ -0,0 +1,49 @@
|
|||
/* Copyright (C) 2015 Michele Santullo
|
||||
*
|
||||
* This file is part of DuckScraper.
|
||||
*
|
||||
* DuckScraper is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* DuckScraper is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with DuckScraper. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef id700FA165E5194907867EB4C02C4C1385
|
||||
#define id700FA165E5194907867EB4C02C4C1385
|
||||
|
||||
#include "mstch/mstch.hpp"
|
||||
#include "kakoune/safe_ptr.hh"
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
namespace duck { namespace sl {
|
||||
struct SourceInfo;
|
||||
struct StructBlock;
|
||||
struct XPathElement;
|
||||
|
||||
class Variables : public Kakoune::SafeCountable {
|
||||
public:
|
||||
Variables();
|
||||
~Variables();
|
||||
|
||||
std::string resolve_string (const SourceInfo& parName) const;
|
||||
std::vector<SourceInfo> resolve_array (const SourceInfo& parName) const;
|
||||
|
||||
void add_xpath (const SourceInfo& parSource, const XPathElement& parVal);
|
||||
void add_struct (const SourceInfo& parSource, const StructBlock& parVal);
|
||||
|
||||
private:
|
||||
//std::map<std::string,
|
||||
};
|
||||
}} //namespace duck::sl
|
||||
|
||||
#endif
|
34
src/scraplang/xpath_manager.cpp
Normal file
34
src/scraplang/xpath_manager.cpp
Normal file
|
@ -0,0 +1,34 @@
|
|||
/* Copyright (C) 2015 Michele Santullo
|
||||
*
|
||||
* This file is part of DuckScraper.
|
||||
*
|
||||
* DuckScraper is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* DuckScraper is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with DuckScraper. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "xpath_manager.hpp"
|
||||
#include "scrap_node.hpp"
|
||||
|
||||
namespace duck { namespace sl {
|
||||
XPathManager::XPathManager (HtmlPoolBaseSP parHtmlPool) :
|
||||
m_html_pool(parHtmlPool)
|
||||
{
|
||||
}
|
||||
|
||||
std::string XPathManager::extract_one (
|
||||
const XPathElement& parXPath
|
||||
) const {
|
||||
assert(false);
|
||||
return std::string();
|
||||
}
|
||||
}} //namespace duck::sl
|
43
src/scraplang/xpath_manager.hpp
Normal file
43
src/scraplang/xpath_manager.hpp
Normal file
|
@ -0,0 +1,43 @@
|
|||
/* Copyright (C) 2015 Michele Santullo
|
||||
*
|
||||
* This file is part of DuckScraper.
|
||||
*
|
||||
* DuckScraper is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* DuckScraper is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with DuckScraper. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef id69826186710D4048BF6810202EDF310D
|
||||
#define id69826186710D4048BF6810202EDF310D
|
||||
|
||||
#include "scraplang/html_pool_base.hpp"
|
||||
#include "scraplang/scrap_node.hpp"
|
||||
#include "kakoune/safe_ptr.hh"
|
||||
#include <string>
|
||||
|
||||
namespace duck { namespace sl {
|
||||
struct XPathElement;
|
||||
|
||||
class XPathManager : public Kakoune::SafeCountable {
|
||||
public:
|
||||
explicit XPathManager (HtmlPoolBaseSP parHtmlPool);
|
||||
~XPathManager() = default;
|
||||
|
||||
std::string extract_one (const XPathElement& parXPath) const;
|
||||
HtmlPoolBaseSP html_pool() const;
|
||||
|
||||
private:
|
||||
HtmlPoolBaseSP m_html_pool;
|
||||
};
|
||||
}} //namespace duck::sl
|
||||
|
||||
#endif
|
Loading…
Add table
Reference in a new issue