Use XQilla and Xerces-c from the system instead of pugixml.
I don't think this commit works or even compiles, I have too many changes and I have to start committing from somewhere. At the same time I don't want to make a "lots of changes here and there" kind of commit.
This commit is contained in:
parent
9dba8043f1
commit
430886085c
6 changed files with 158 additions and 21 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -1,2 +1,3 @@
|
||||||
build/
|
build/
|
||||||
tags
|
tags
|
||||||
|
compile_commands.json
|
||||||
|
|
|
@ -5,8 +5,8 @@ project(duckscraper VERSION 0.2.1 LANGUAGES CXX)
|
||||||
option(BUILD_SHARED_TIDY "Wheter you want to build tidy-html5 as a shared library" OFF)
|
option(BUILD_SHARED_TIDY "Wheter you want to build tidy-html5 as a shared library" OFF)
|
||||||
|
|
||||||
include(GetGitRevisionDescription)
|
include(GetGitRevisionDescription)
|
||||||
find_package(PugiXML REQUIRED)
|
|
||||||
find_package(Boost 1.32.0 COMPONENTS program_options)
|
find_package(Boost 1.32.0 COMPONENTS program_options)
|
||||||
|
find_package(XQilla 2.3.3 REQUIRED)
|
||||||
|
|
||||||
set(CMAKE_CXX_STANDARD 17)
|
set(CMAKE_CXX_STANDARD 17)
|
||||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||||
|
@ -59,6 +59,7 @@ target_link_libraries(${PROJECT_NAME}
|
||||||
PRIVATE curlcpp
|
PRIVATE curlcpp
|
||||||
PRIVATE ${Boost_LIBRARIES}
|
PRIVATE ${Boost_LIBRARIES}
|
||||||
PRIVATE mstch
|
PRIVATE mstch
|
||||||
|
PRIVATE XQilla::XQilla
|
||||||
)
|
)
|
||||||
|
|
||||||
target_compile_definitions(${PROJECT_NAME}
|
target_compile_definitions(${PROJECT_NAME}
|
||||||
|
|
28
cmake/Modules/FindXQilla.cmake
Normal file
28
cmake/Modules/FindXQilla.cmake
Normal file
|
@ -0,0 +1,28 @@
|
||||||
|
# Find the XQilla library
|
||||||
|
# originally taken from
|
||||||
|
# https://github.com/rug-compling/alpinocorpus/blob/master/cmake/FindXQilla.cmake
|
||||||
|
|
||||||
|
find_path(XQILLA_INCLUDE_DIR NAMES xqilla/xqilla-simple.hpp)
|
||||||
|
find_library(XQILLA_LIBRARY NAMES xqilla)
|
||||||
|
include(FindPackageHandleStandardArgs)
|
||||||
|
find_package_handle_standard_args(
|
||||||
|
XQILLA
|
||||||
|
DEFAULT_MSG
|
||||||
|
XQILLA_INCLUDE_DIR
|
||||||
|
XQILLA_LIBRARY
|
||||||
|
)
|
||||||
|
set(XQILLA_LIBRARIES ${XQILLA_LIBRARY})
|
||||||
|
mark_as_advanced(XQILLA_INCLUDE_DIR XQILLA_LIBRARY)
|
||||||
|
|
||||||
|
if (XQILLA_FOUND)
|
||||||
|
find_package(XercesC REQUIRED)
|
||||||
|
|
||||||
|
if (NOT TARGET XQilla::XQilla)
|
||||||
|
add_library(XQilla::XQilla UNKNOWN IMPORTED)
|
||||||
|
set_target_properties(XQilla::XQilla PROPERTIES
|
||||||
|
INTERFACE_INCLUDE_DIRECTORIES "${XQILLA_INCLUDE_DIR}"
|
||||||
|
IMPORTED_LOCATION "${XQILLA_LIBRARY}"
|
||||||
|
INTERFACE_LINK_LIBRARIES XercesC::XercesC
|
||||||
|
)
|
||||||
|
endif()
|
||||||
|
endif()
|
104
src/safe_stack_object.hpp
Normal file
104
src/safe_stack_object.hpp
Normal file
|
@ -0,0 +1,104 @@
|
||||||
|
/*
|
||||||
|
Copyright 2016, 2017 Michele "King_DuckZ" Santullo
|
||||||
|
|
||||||
|
This file is part of MyCurry.
|
||||||
|
|
||||||
|
MyCurry is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation, either version 3 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
MyCurry is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License
|
||||||
|
along with MyCurry. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "kakoune/safe_ptr.hh"
|
||||||
|
#include <utility>
|
||||||
|
|
||||||
|
namespace curry {
|
||||||
|
template <typename T>
|
||||||
|
class SafeStackObject {
|
||||||
|
public:
|
||||||
|
typedef Kakoune::SafePtr<T> safe_ptr;
|
||||||
|
|
||||||
|
SafeStackObject();
|
||||||
|
SafeStackObject (SafeStackObject&& parOther);
|
||||||
|
SafeStackObject (const SafeStackObject& parOther) = delete;
|
||||||
|
template <typename... Args> explicit SafeStackObject (Args&&... parArgs);
|
||||||
|
~SafeStackObject() noexcept = default;
|
||||||
|
|
||||||
|
SafeStackObject& operator= (SafeStackObject&& parOther) = delete;
|
||||||
|
SafeStackObject& operator= (const SafeStackObject& parOther) = delete;
|
||||||
|
|
||||||
|
operator Kakoune::SafePtr<T>&();
|
||||||
|
template <typename U>
|
||||||
|
operator Kakoune::SafePtr<U>();
|
||||||
|
T& operator*();
|
||||||
|
safe_ptr& operator->();
|
||||||
|
|
||||||
|
private:
|
||||||
|
T m_obj;
|
||||||
|
safe_ptr m_obj_ptr;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
SafeStackObject<T>::SafeStackObject() :
|
||||||
|
m_obj(),
|
||||||
|
m_obj_ptr(&m_obj)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
SafeStackObject<T>::SafeStackObject (SafeStackObject&& parOther) :
|
||||||
|
m_obj(std::move(parOther.m_obj)),
|
||||||
|
m_obj_ptr(&m_obj)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
template <typename... Args>
|
||||||
|
SafeStackObject<T>::SafeStackObject (Args&&... parArgs) :
|
||||||
|
m_obj(std::forward<Args>(parArgs)...),
|
||||||
|
m_obj_ptr(&m_obj)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
//template <typename T>
|
||||||
|
//SafeStackObject& SafeStackObject<T>::operator= (SafeStackObject&& parOther) {
|
||||||
|
// m_obj = std::move(parOther.m_obj);
|
||||||
|
// m_obj_ptr = std::move(parOther.m_obj_ptr);
|
||||||
|
// m_ob
|
||||||
|
//}
|
||||||
|
|
||||||
|
//template <typename T>
|
||||||
|
//SafeStackObject& SafeStackObject<T>::operator= (const SafeStackObject& parOther) {
|
||||||
|
//}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
SafeStackObject<T>::operator Kakoune::SafePtr<T>&() {
|
||||||
|
return m_obj_ptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
template <typename U>
|
||||||
|
SafeStackObject<T>::operator Kakoune::SafePtr<U>() {
|
||||||
|
return Kakoune::SafePtr<U>(&m_obj);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
T& SafeStackObject<T>::operator*() {
|
||||||
|
return *m_obj_ptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
auto SafeStackObject<T>::operator->() -> safe_ptr& {
|
||||||
|
return m_obj_ptr;
|
||||||
|
}
|
||||||
|
} //namespace curry
|
|
@ -17,7 +17,9 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "xpath.hpp"
|
#include "xpath.hpp"
|
||||||
#include <pugixml.hpp>
|
#include <xercesc/framework/MemBufInputSource.hpp>
|
||||||
|
#include <xercesc/util/XMLString.hpp>
|
||||||
|
#include <xqilla/exceptions/XMLParseException.hpp>
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
#include <stdexcept>
|
#include <stdexcept>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
@ -46,30 +48,30 @@ namespace duck {
|
||||||
} //unnamed namespace
|
} //unnamed namespace
|
||||||
|
|
||||||
XPathBatchResults xpath_query (const std::string& parXML, const std::vector<std::string>& parQueries) {
|
XPathBatchResults xpath_query (const std::string& parXML, const std::vector<std::string>& parQueries) {
|
||||||
pugi::xml_document doc;
|
XQilla& xqilla = m_xqilla;
|
||||||
std::istringstream iss(parXML);
|
XercesConfiguration xconfig;
|
||||||
pugi::xml_parse_result result(doc.load(iss));
|
AutoDelete<DynamicContext> context(xqilla.createContext(XQilla::XQUERY_UPDATE, &xconfig));
|
||||||
if (not result) {
|
xercesc::MemBufInputSource input_buf(reinterpret_cast<const XMLByte*>(parXML.c_str()), parXML.size(), "n/a", false);
|
||||||
auto line_col = line_col_from_offset(result.offset, parXML);
|
Node::Ptr ptr;
|
||||||
throw ParseError(line_col.first, line_col.second, result.description());
|
try {
|
||||||
|
ptr = context->parseDocument(input_buf);
|
||||||
}
|
}
|
||||||
|
catch (const XMLParseException& err) {
|
||||||
|
throw ParseError(err.getXQueryLine(), err.getXQueryColumn(), xercesc::XMLString::transcode(err.getError()));
|
||||||
|
}
|
||||||
|
context->setContextItem(ptr);
|
||||||
|
|
||||||
XPathBatchResults retval;
|
XPathBatchResults retval;
|
||||||
for (const auto& xpath : parQueries) {
|
for (const auto& xpath : parQueries) {
|
||||||
pugi::xpath_node_set xpathRes = doc.select_nodes(xpath.c_str());
|
AutoDelete<XQQuery> query(xqilla.parse(X(xpath.c_str())));
|
||||||
|
context->setContextPosition(1);
|
||||||
|
context->setContextSize(1);
|
||||||
|
|
||||||
|
Result result = query->execute(context);
|
||||||
|
Item::Ptr item;
|
||||||
std::vector<std::pair<std::string, std::string>> new_lst;
|
std::vector<std::pair<std::string, std::string>> new_lst;
|
||||||
for (pugi::xpath_node_set::const_iterator itFind(xpathRes.begin()), itFindEND(xpathRes.end()); itFind != itFindEND; ++itFind) {
|
while(nullptr != (item = result->next(context))) {
|
||||||
const pugi::xpath_node& node = *itFind;
|
new_lst.push_back(std::make_pair(std::string(), UTF8(item->asString(context))));
|
||||||
std::pair<std::string, std::string> new_itm;
|
|
||||||
if (node.node()) {
|
|
||||||
new_itm.first = std::string(node.node().name());
|
|
||||||
new_itm.second = std::string(node.node().value());
|
|
||||||
}
|
|
||||||
else if (node.attribute()) {
|
|
||||||
new_itm.first = std::string(node.attribute().name());
|
|
||||||
new_itm.second = std::string(node.attribute().value());
|
|
||||||
}
|
|
||||||
new_lst.push_back(std::move(new_itm));
|
|
||||||
}
|
}
|
||||||
retval.push_back(std::move(new_lst));
|
retval.push_back(std::move(new_lst));
|
||||||
}
|
}
|
||||||
|
|
|
@ -23,6 +23,7 @@
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <exception>
|
#include <exception>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
|
#include <xqilla/xqilla-simple.hpp>
|
||||||
|
|
||||||
namespace duck {
|
namespace duck {
|
||||||
typedef std::vector<std::vector<std::pair<std::string, std::string>>> XPathBatchResults;
|
typedef std::vector<std::vector<std::pair<std::string, std::string>>> XPathBatchResults;
|
||||||
|
|
Loading…
Add table
Reference in a new issue