Use XQilla and Xerces-c from the system instead of pugixml.
I don't think this commit works or even compiles, I have too many changes and I have to start committing from somewhere. At the same time I don't want to make a "lots of changes here and there" kind of commit.
This commit is contained in:
parent
9dba8043f1
commit
430886085c
6 changed files with 158 additions and 21 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -1,2 +1,3 @@
|
|||
build/
|
||||
tags
|
||||
compile_commands.json
|
||||
|
|
|
@ -5,8 +5,8 @@ project(duckscraper VERSION 0.2.1 LANGUAGES CXX)
|
|||
option(BUILD_SHARED_TIDY "Wheter you want to build tidy-html5 as a shared library" OFF)
|
||||
|
||||
include(GetGitRevisionDescription)
|
||||
find_package(PugiXML REQUIRED)
|
||||
find_package(Boost 1.32.0 COMPONENTS program_options)
|
||||
find_package(XQilla 2.3.3 REQUIRED)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
|
@ -59,6 +59,7 @@ target_link_libraries(${PROJECT_NAME}
|
|||
PRIVATE curlcpp
|
||||
PRIVATE ${Boost_LIBRARIES}
|
||||
PRIVATE mstch
|
||||
PRIVATE XQilla::XQilla
|
||||
)
|
||||
|
||||
target_compile_definitions(${PROJECT_NAME}
|
||||
|
|
28
cmake/Modules/FindXQilla.cmake
Normal file
28
cmake/Modules/FindXQilla.cmake
Normal file
|
@ -0,0 +1,28 @@
|
|||
# Find the XQilla library
|
||||
# originally taken from
|
||||
# https://github.com/rug-compling/alpinocorpus/blob/master/cmake/FindXQilla.cmake
|
||||
|
||||
find_path(XQILLA_INCLUDE_DIR NAMES xqilla/xqilla-simple.hpp)
|
||||
find_library(XQILLA_LIBRARY NAMES xqilla)
|
||||
include(FindPackageHandleStandardArgs)
|
||||
find_package_handle_standard_args(
|
||||
XQILLA
|
||||
DEFAULT_MSG
|
||||
XQILLA_INCLUDE_DIR
|
||||
XQILLA_LIBRARY
|
||||
)
|
||||
set(XQILLA_LIBRARIES ${XQILLA_LIBRARY})
|
||||
mark_as_advanced(XQILLA_INCLUDE_DIR XQILLA_LIBRARY)
|
||||
|
||||
if (XQILLA_FOUND)
|
||||
find_package(XercesC REQUIRED)
|
||||
|
||||
if (NOT TARGET XQilla::XQilla)
|
||||
add_library(XQilla::XQilla UNKNOWN IMPORTED)
|
||||
set_target_properties(XQilla::XQilla PROPERTIES
|
||||
INTERFACE_INCLUDE_DIRECTORIES "${XQILLA_INCLUDE_DIR}"
|
||||
IMPORTED_LOCATION "${XQILLA_LIBRARY}"
|
||||
INTERFACE_LINK_LIBRARIES XercesC::XercesC
|
||||
)
|
||||
endif()
|
||||
endif()
|
104
src/safe_stack_object.hpp
Normal file
104
src/safe_stack_object.hpp
Normal file
|
@ -0,0 +1,104 @@
|
|||
/*
|
||||
Copyright 2016, 2017 Michele "King_DuckZ" Santullo
|
||||
|
||||
This file is part of MyCurry.
|
||||
|
||||
MyCurry is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
MyCurry is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with MyCurry. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "kakoune/safe_ptr.hh"
|
||||
#include <utility>
|
||||
|
||||
namespace curry {
|
||||
template <typename T>
|
||||
class SafeStackObject {
|
||||
public:
|
||||
typedef Kakoune::SafePtr<T> safe_ptr;
|
||||
|
||||
SafeStackObject();
|
||||
SafeStackObject (SafeStackObject&& parOther);
|
||||
SafeStackObject (const SafeStackObject& parOther) = delete;
|
||||
template <typename... Args> explicit SafeStackObject (Args&&... parArgs);
|
||||
~SafeStackObject() noexcept = default;
|
||||
|
||||
SafeStackObject& operator= (SafeStackObject&& parOther) = delete;
|
||||
SafeStackObject& operator= (const SafeStackObject& parOther) = delete;
|
||||
|
||||
operator Kakoune::SafePtr<T>&();
|
||||
template <typename U>
|
||||
operator Kakoune::SafePtr<U>();
|
||||
T& operator*();
|
||||
safe_ptr& operator->();
|
||||
|
||||
private:
|
||||
T m_obj;
|
||||
safe_ptr m_obj_ptr;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
SafeStackObject<T>::SafeStackObject() :
|
||||
m_obj(),
|
||||
m_obj_ptr(&m_obj)
|
||||
{
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
SafeStackObject<T>::SafeStackObject (SafeStackObject&& parOther) :
|
||||
m_obj(std::move(parOther.m_obj)),
|
||||
m_obj_ptr(&m_obj)
|
||||
{
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
template <typename... Args>
|
||||
SafeStackObject<T>::SafeStackObject (Args&&... parArgs) :
|
||||
m_obj(std::forward<Args>(parArgs)...),
|
||||
m_obj_ptr(&m_obj)
|
||||
{
|
||||
}
|
||||
|
||||
//template <typename T>
|
||||
//SafeStackObject& SafeStackObject<T>::operator= (SafeStackObject&& parOther) {
|
||||
// m_obj = std::move(parOther.m_obj);
|
||||
// m_obj_ptr = std::move(parOther.m_obj_ptr);
|
||||
// m_ob
|
||||
//}
|
||||
|
||||
//template <typename T>
|
||||
//SafeStackObject& SafeStackObject<T>::operator= (const SafeStackObject& parOther) {
|
||||
//}
|
||||
|
||||
template <typename T>
|
||||
SafeStackObject<T>::operator Kakoune::SafePtr<T>&() {
|
||||
return m_obj_ptr;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
template <typename U>
|
||||
SafeStackObject<T>::operator Kakoune::SafePtr<U>() {
|
||||
return Kakoune::SafePtr<U>(&m_obj);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
T& SafeStackObject<T>::operator*() {
|
||||
return *m_obj_ptr;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
auto SafeStackObject<T>::operator->() -> safe_ptr& {
|
||||
return m_obj_ptr;
|
||||
}
|
||||
} //namespace curry
|
|
@ -17,7 +17,9 @@
|
|||
*/
|
||||
|
||||
#include "xpath.hpp"
|
||||
#include <pugixml.hpp>
|
||||
#include <xercesc/framework/MemBufInputSource.hpp>
|
||||
#include <xercesc/util/XMLString.hpp>
|
||||
#include <xqilla/exceptions/XMLParseException.hpp>
|
||||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
#include <algorithm>
|
||||
|
@ -46,30 +48,30 @@ namespace duck {
|
|||
} //unnamed namespace
|
||||
|
||||
XPathBatchResults xpath_query (const std::string& parXML, const std::vector<std::string>& parQueries) {
|
||||
pugi::xml_document doc;
|
||||
std::istringstream iss(parXML);
|
||||
pugi::xml_parse_result result(doc.load(iss));
|
||||
if (not result) {
|
||||
auto line_col = line_col_from_offset(result.offset, parXML);
|
||||
throw ParseError(line_col.first, line_col.second, result.description());
|
||||
XQilla& xqilla = m_xqilla;
|
||||
XercesConfiguration xconfig;
|
||||
AutoDelete<DynamicContext> context(xqilla.createContext(XQilla::XQUERY_UPDATE, &xconfig));
|
||||
xercesc::MemBufInputSource input_buf(reinterpret_cast<const XMLByte*>(parXML.c_str()), parXML.size(), "n/a", false);
|
||||
Node::Ptr ptr;
|
||||
try {
|
||||
ptr = context->parseDocument(input_buf);
|
||||
}
|
||||
catch (const XMLParseException& err) {
|
||||
throw ParseError(err.getXQueryLine(), err.getXQueryColumn(), xercesc::XMLString::transcode(err.getError()));
|
||||
}
|
||||
context->setContextItem(ptr);
|
||||
|
||||
XPathBatchResults retval;
|
||||
for (const auto& xpath : parQueries) {
|
||||
pugi::xpath_node_set xpathRes = doc.select_nodes(xpath.c_str());
|
||||
AutoDelete<XQQuery> query(xqilla.parse(X(xpath.c_str())));
|
||||
context->setContextPosition(1);
|
||||
context->setContextSize(1);
|
||||
|
||||
Result result = query->execute(context);
|
||||
Item::Ptr item;
|
||||
std::vector<std::pair<std::string, std::string>> new_lst;
|
||||
for (pugi::xpath_node_set::const_iterator itFind(xpathRes.begin()), itFindEND(xpathRes.end()); itFind != itFindEND; ++itFind) {
|
||||
const pugi::xpath_node& node = *itFind;
|
||||
std::pair<std::string, std::string> new_itm;
|
||||
if (node.node()) {
|
||||
new_itm.first = std::string(node.node().name());
|
||||
new_itm.second = std::string(node.node().value());
|
||||
}
|
||||
else if (node.attribute()) {
|
||||
new_itm.first = std::string(node.attribute().name());
|
||||
new_itm.second = std::string(node.attribute().value());
|
||||
}
|
||||
new_lst.push_back(std::move(new_itm));
|
||||
while(nullptr != (item = result->next(context))) {
|
||||
new_lst.push_back(std::make_pair(std::string(), UTF8(item->asString(context))));
|
||||
}
|
||||
retval.push_back(std::move(new_lst));
|
||||
}
|
||||
|
|
|
@ -23,6 +23,7 @@
|
|||
#include <vector>
|
||||
#include <exception>
|
||||
#include <utility>
|
||||
#include <xqilla/xqilla-simple.hpp>
|
||||
|
||||
namespace duck {
|
||||
typedef std::vector<std::vector<std::pair<std::string, std::string>>> XPathBatchResults;
|
||||
|
|
Loading…
Reference in a new issue