Use XQilla and Xerces-c from the system instead of pugixml.

I don't think this commit works or even compiles, I have too many
changes and I have to start committing from somewhere. At the same
time I don't want to make a "lots of changes here and there" kind
of commit.
This commit is contained in:
King_DuckZ 2020-02-18 10:19:51 +01:00
parent 9dba8043f1
commit 430886085c
6 changed files with 158 additions and 21 deletions

1
.gitignore vendored
View file

@ -1,2 +1,3 @@
build/
tags
compile_commands.json

View file

@ -5,8 +5,8 @@ project(duckscraper VERSION 0.2.1 LANGUAGES CXX)
option(BUILD_SHARED_TIDY "Wheter you want to build tidy-html5 as a shared library" OFF)
include(GetGitRevisionDescription)
find_package(PugiXML REQUIRED)
find_package(Boost 1.32.0 COMPONENTS program_options)
find_package(XQilla 2.3.3 REQUIRED)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
@ -59,6 +59,7 @@ target_link_libraries(${PROJECT_NAME}
PRIVATE curlcpp
PRIVATE ${Boost_LIBRARIES}
PRIVATE mstch
PRIVATE XQilla::XQilla
)
target_compile_definitions(${PROJECT_NAME}

View file

@ -0,0 +1,28 @@
# Find the XQilla library
# originally taken from
# https://github.com/rug-compling/alpinocorpus/blob/master/cmake/FindXQilla.cmake
find_path(XQILLA_INCLUDE_DIR NAMES xqilla/xqilla-simple.hpp)
find_library(XQILLA_LIBRARY NAMES xqilla)
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(
XQILLA
DEFAULT_MSG
XQILLA_INCLUDE_DIR
XQILLA_LIBRARY
)
set(XQILLA_LIBRARIES ${XQILLA_LIBRARY})
mark_as_advanced(XQILLA_INCLUDE_DIR XQILLA_LIBRARY)
if (XQILLA_FOUND)
find_package(XercesC REQUIRED)
if (NOT TARGET XQilla::XQilla)
add_library(XQilla::XQilla UNKNOWN IMPORTED)
set_target_properties(XQilla::XQilla PROPERTIES
INTERFACE_INCLUDE_DIRECTORIES "${XQILLA_INCLUDE_DIR}"
IMPORTED_LOCATION "${XQILLA_LIBRARY}"
INTERFACE_LINK_LIBRARIES XercesC::XercesC
)
endif()
endif()

104
src/safe_stack_object.hpp Normal file
View file

@ -0,0 +1,104 @@
/*
Copyright 2016, 2017 Michele "King_DuckZ" Santullo
This file is part of MyCurry.
MyCurry is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
MyCurry is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with MyCurry. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include "kakoune/safe_ptr.hh"
#include <utility>
namespace curry {
template <typename T>
class SafeStackObject {
public:
typedef Kakoune::SafePtr<T> safe_ptr;
SafeStackObject();
SafeStackObject (SafeStackObject&& parOther);
SafeStackObject (const SafeStackObject& parOther) = delete;
template <typename... Args> explicit SafeStackObject (Args&&... parArgs);
~SafeStackObject() noexcept = default;
SafeStackObject& operator= (SafeStackObject&& parOther) = delete;
SafeStackObject& operator= (const SafeStackObject& parOther) = delete;
operator Kakoune::SafePtr<T>&();
template <typename U>
operator Kakoune::SafePtr<U>();
T& operator*();
safe_ptr& operator->();
private:
T m_obj;
safe_ptr m_obj_ptr;
};
template <typename T>
SafeStackObject<T>::SafeStackObject() :
m_obj(),
m_obj_ptr(&m_obj)
{
}
template <typename T>
SafeStackObject<T>::SafeStackObject (SafeStackObject&& parOther) :
m_obj(std::move(parOther.m_obj)),
m_obj_ptr(&m_obj)
{
}
template <typename T>
template <typename... Args>
SafeStackObject<T>::SafeStackObject (Args&&... parArgs) :
m_obj(std::forward<Args>(parArgs)...),
m_obj_ptr(&m_obj)
{
}
//template <typename T>
//SafeStackObject& SafeStackObject<T>::operator= (SafeStackObject&& parOther) {
// m_obj = std::move(parOther.m_obj);
// m_obj_ptr = std::move(parOther.m_obj_ptr);
// m_ob
//}
//template <typename T>
//SafeStackObject& SafeStackObject<T>::operator= (const SafeStackObject& parOther) {
//}
template <typename T>
SafeStackObject<T>::operator Kakoune::SafePtr<T>&() {
return m_obj_ptr;
}
template <typename T>
template <typename U>
SafeStackObject<T>::operator Kakoune::SafePtr<U>() {
return Kakoune::SafePtr<U>(&m_obj);
}
template <typename T>
T& SafeStackObject<T>::operator*() {
return *m_obj_ptr;
}
template <typename T>
auto SafeStackObject<T>::operator->() -> safe_ptr& {
return m_obj_ptr;
}
} //namespace curry

View file

@ -17,7 +17,9 @@
*/
#include "xpath.hpp"
#include <pugixml.hpp>
#include <xercesc/framework/MemBufInputSource.hpp>
#include <xercesc/util/XMLString.hpp>
#include <xqilla/exceptions/XMLParseException.hpp>
#include <sstream>
#include <stdexcept>
#include <algorithm>
@ -46,30 +48,30 @@ namespace duck {
} //unnamed namespace
XPathBatchResults xpath_query (const std::string& parXML, const std::vector<std::string>& parQueries) {
pugi::xml_document doc;
std::istringstream iss(parXML);
pugi::xml_parse_result result(doc.load(iss));
if (not result) {
auto line_col = line_col_from_offset(result.offset, parXML);
throw ParseError(line_col.first, line_col.second, result.description());
XQilla& xqilla = m_xqilla;
XercesConfiguration xconfig;
AutoDelete<DynamicContext> context(xqilla.createContext(XQilla::XQUERY_UPDATE, &xconfig));
xercesc::MemBufInputSource input_buf(reinterpret_cast<const XMLByte*>(parXML.c_str()), parXML.size(), "n/a", false);
Node::Ptr ptr;
try {
ptr = context->parseDocument(input_buf);
}
catch (const XMLParseException& err) {
throw ParseError(err.getXQueryLine(), err.getXQueryColumn(), xercesc::XMLString::transcode(err.getError()));
}
context->setContextItem(ptr);
XPathBatchResults retval;
for (const auto& xpath : parQueries) {
pugi::xpath_node_set xpathRes = doc.select_nodes(xpath.c_str());
AutoDelete<XQQuery> query(xqilla.parse(X(xpath.c_str())));
context->setContextPosition(1);
context->setContextSize(1);
Result result = query->execute(context);
Item::Ptr item;
std::vector<std::pair<std::string, std::string>> new_lst;
for (pugi::xpath_node_set::const_iterator itFind(xpathRes.begin()), itFindEND(xpathRes.end()); itFind != itFindEND; ++itFind) {
const pugi::xpath_node& node = *itFind;
std::pair<std::string, std::string> new_itm;
if (node.node()) {
new_itm.first = std::string(node.node().name());
new_itm.second = std::string(node.node().value());
}
else if (node.attribute()) {
new_itm.first = std::string(node.attribute().name());
new_itm.second = std::string(node.attribute().value());
}
new_lst.push_back(std::move(new_itm));
while(nullptr != (item = result->next(context))) {
new_lst.push_back(std::make_pair(std::string(), UTF8(item->asString(context))));
}
retval.push_back(std::move(new_lst));
}

View file

@ -23,6 +23,7 @@
#include <vector>
#include <exception>
#include <utility>
#include <xqilla/xqilla-simple.hpp>
namespace duck {
typedef std::vector<std::vector<std::pair<std::string, std::string>>> XPathBatchResults;