1
0
Fork 0
mirror of https://github.com/KingDuckZ/dindexer.git synced 2025-02-21 12:34:56 +00:00

Merge branch 'master' into sqlite

This commit is contained in:
King_DuckZ 2017-09-29 20:38:31 +01:00
commit 5ecaecda49
21 changed files with 398 additions and 70 deletions

6
.gitmodules vendored
View file

@ -13,6 +13,6 @@
[submodule "lib/incredis"]
path = lib/incredis
url = ../incredis.git
[submodule "lib/SQLiteCpp"]
path = lib/SQLiteCpp
url = https://github.com/SRombauts/SQLiteCpp.git
[submodule "cmake/binary_resource"]
path = cmake/binary_resource
url = https://github.com/KingDuckZ/binary_resource.git

View file

@ -2,6 +2,7 @@ cmake_minimum_required(VERSION 3.3 FATAL_ERROR)
set(bare_name "dindexer")
project("${bare_name}-if" VERSION 0.1.5 LANGUAGES CXX C)
list (APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules)
list (APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake/binary_resource)
set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "A file indexing program to help you keep track of your backed up files")
set(CPACK_PACKAGE_VENDOR "King_DuckZ")

1
cmake/binary_resource Submodule

@ -0,0 +1 @@
Subproject commit 07053fcc0bd6c5aa05e51f08ad3eae2e8d4befc8

View file

@ -21,6 +21,7 @@
#include <string>
#include <memory>
#include <boost/utility/string_ref.hpp>
#include <exception>
namespace YAML {
class Node;
@ -31,6 +32,11 @@ namespace dindb {
using BackendPtr = std::unique_ptr<dindb::Backend, void(*)(dindb::Backend*)>;
class SOLoadException : public std::runtime_error {
public:
explicit SOLoadException (std::string&& parMessage);
};
class BackendPlugin {
public:
BackendPlugin ( void );

View file

@ -0,0 +1,41 @@
/* Copyright 2015, 2016, Michele Santullo
* This file is part of "dindexer".
*
* "dindexer" is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* "dindexer" is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
*/
#include "dindexer-machinery/scantask/dirtree.hpp"
#include <string>
struct stat;
namespace mchlib {
namespace scantask {
class SingleFileTask : public Base<std::vector<mchlib::FileRecordData>> {
public:
typedef std::vector<mchlib::FileRecordData> PathList;
explicit SingleFileTask ( std::string parPath );
explicit SingleFileTask ( std::string parPath, const struct stat* parStat );
virtual ~SingleFileTask ( void ) noexcept = default;
private:
virtual void on_data_destroy ( PathList& parData ) override;
virtual void on_data_create ( PathList& parData ) override;
std::string m_path;
const struct stat* m_stat;
};
} //namespace scantask
} //namespace mchlib

View file

@ -42,6 +42,7 @@ namespace pq {
void connect ( void );
void disconnect ( void );
ResultSet query ( const std::string& parQuery );
ResultSet query ( const char* parQuery );
std::string escaped_literal ( const std::string& parString );
std::string escaped_literal ( boost::string_ref parString );

View file

@ -21,6 +21,9 @@
#include "backends/backend_loader.hpp"
#include "backends/exposed_functions.hpp"
#include "backends/backend_version.hpp"
#include "dindexer-machinery/scantask/mime.hpp"
#include "dindexer-machinery/scantask/singlefile.hpp"
#include "dindexer-machinery/recorddata.hpp"
#include <dlfcn.h>
#include <cassert>
#include <functional>
@ -72,17 +75,38 @@ namespace dindb {
auto get_version = reinterpret_cast<GetVersionFun>(dlsym(parSOHandle, "dindexer_backend_iface_version"));
return get_version();
}
std::string get_mime_type (const std::string& parPath) {
using mchlib::scantask::SingleFileTask;
using mchlib::scantask::Mime;
using std::shared_ptr;
shared_ptr<SingleFileTask> file(new SingleFileTask(parPath));
Mime mime(file, true);
const std::vector<mchlib::FileRecordData>& result = mime.get_or_create();
assert(result.size() == 1);
boost::string_ref retval = result.front().mime_type();
return std::string(retval.data(), retval.size());
}
} //unnamed namespace
SOLoadException::SOLoadException (std::string&& parMessage) :
std::runtime_error(std::move(parMessage))
{
}
std::string backend_name (const std::string& parSOPath) {
assert(not parSOPath.empty());
using SoHandle = std::unique_ptr<void, int(*)(void*)>;
if (get_mime_type(parSOPath) != "application/x-sharedlib")
return std::string();
auto handle = SoHandle(dlopen(parSOPath.c_str(), RTLD_LAZY), &dlclose);
if (handle)
return backend_name(handle.get());
else
return std::string();
throw SOLoadException(dlerror());
}
BackendPlugin::BackendPlugin() :

View file

@ -1,5 +1,20 @@
project(${bare_name}-backend-postgresql CXX)
find_package(ZLIB)
include(binary_resource)
if (ZLIB_FOUND)
set(gzip GZIP)
else()
set(gzip "")
endif()
make_binary_resource(${gzip}
INPUT ${CMAKE_BINARY_DIR}/dindexer.sql
ARRAY_NAME create_tables_query
EXTENSION .cpp
)
add_library(${PROJECT_NAME} SHARED
tag.cpp
delete.cpp
@ -7,8 +22,13 @@ add_library(${PROJECT_NAME} SHARED
scan.cpp
navigate.cpp
backend_postgresql.cpp
${CMAKE_CURRENT_BINARY_DIR}/create_tables_query.cpp
create_tables.cpp
)
target_include_directories(${PROJECT_NAME}
PRIVATE ${CMAKE_CURRENT_BINARY_DIR}
)
target_include_directories(${PROJECT_NAME} SYSTEM
PUBLIC ${Boost_INCLUDE_DIRS}
)
@ -17,10 +37,19 @@ target_link_libraries(${PROJECT_NAME}
PRIVATE ${bare_name}-inc
PRIVATE ${bare_name}-pq
)
if (ZLIB_FOUND)
target_link_libraries(${PROJECT_NAME} PRIVATE ZLIB::ZLIB)
endif()
install(TARGETS ${PROJECT_NAME}
LIBRARY DESTINATION lib
RUNTIME DESTINATION bin
ARCHIVE DESTINATION lib/static
)
configure_file(
backend_postgresql_config.h.in
${CMAKE_CURRENT_BINARY_DIR}/backend_postgresql_config.h
)
ln_backend(${PROJECT_NAME})

View file

@ -18,6 +18,7 @@
#include "backend_postgresql.hpp"
#include "backends/exposed_functions.hpp"
#include "backends/backend_version.hpp"
#include "create_tables.hpp"
#include "tag.hpp"
#include "delete.hpp"
#include "scan.hpp"
@ -83,6 +84,12 @@ namespace dindb {
void BackendPostgreSql::connect() {
m_conn->connect();
if (m_conn->is_connected()) {
pq::ResultSet res = m_conn->query("SELECT EXISTS(SELECT 1 FROM pg_tables WHERE tablename = 'files'), EXISTS(SELECT 1 FROM pg_tables WHERE tablename = 'sets');");
if (res.size() == 1 and res[0].size() == 2 and (res[0][0] == "f" or res[0][1] == "f")) {
create_tables(*m_conn);
}
}
}
void BackendPostgreSql::disconnect() {

View file

@ -0,0 +1,23 @@
/* Copyright 2015, 2016, Michele Santullo
* This file is part of "dindexer".
*
* "dindexer" is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* "dindexer" is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef id119764EBCC3345668A406E9A8FC1B6C7
#define id119764EBCC3345668A406E9A8FC1B6C7
#cmakedefine ZLIB_FOUND
#endif

View file

@ -0,0 +1,66 @@
/* Copyright 2015, 2016, Michele Santullo
* This file is part of "dindexer".
*
* "dindexer" is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* "dindexer" is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
*/
#include "create_tables.hpp"
#include "create_tables_query.hpp"
#include "pq/connection.hpp"
#include "backend_postgresql_config.h"
#if defined(ZLIB_FOUND)
# include <boost/iostreams/device/array.hpp>
# include <boost/iostreams/stream_buffer.hpp>
# include <boost/iostreams/filter/gzip.hpp>
# include <boost/iostreams/filtering_stream.hpp>
# include <algorithm>
# include <iterator>
# include <iostream>
#endif
#include <string>
#include <cassert>
namespace dindb {
void create_tables (pq::Connection& parConn) {
assert(parConn.is_connected());
#if defined(ZLIB_FOUND)
using boost::iostreams::filtering_istream;
using boost::iostreams::stream_buffer;
using boost::iostreams::array_source;
using boost::iostreams::gzip_decompressor;
using std::istream_iterator;
using std::string;
filtering_istream fs;
stream_buffer<array_source> text_stream(reinterpret_cast<const char*>(create_tables_query), create_tables_query_len);
fs.push(gzip_decompressor{});
fs.push(text_stream);
string query_str;
query_str.reserve(create_tables_query_len);
std::copy(
istream_iterator<unsigned char>(fs >> std::noskipws),
istream_iterator<unsigned char>(),
std::back_inserter(query_str)
);
#else
std::string query_str(reinterpret_cast<const char*>(create_tables_query), create_tables_query_len);
#endif
std::cout << query_str << std::endl;
parConn.query(query_str);
}
} //namespace dindb

View file

@ -0,0 +1,29 @@
/* Copyright 2015, 2016, Michele Santullo
* This file is part of "dindexer".
*
* "dindexer" is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* "dindexer" is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef id16D167FB279D4A9097EDB94FB43BD3B6
#define id16D167FB279D4A9097EDB94FB43BD3B6
namespace pq {
class Connection;
} //namespace pq
namespace dindb {
void create_tables (pq::Connection& parConn);
} //namespace dindb
#endif

View file

@ -0,0 +1,24 @@
/* Copyright 2015, 2016, Michele Santullo
* This file is part of "dindexer".
*
* "dindexer" is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* "dindexer" is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef id3885F59E78B044ABA51EBB161746354D
#define id3885F59E78B044ABA51EBB161746354D
extern const unsigned char create_tables_query[];
extern const unsigned int create_tables_query_len;
#endif

View file

@ -49,12 +49,15 @@ namespace dinlib {
parOut.backend_name = settings["backend_name"].as<std::string>();
const std::string backend_settings_section = parOut.backend_name + "_settings";
if (settings[backend_settings_section]) {
auto settings_node = settings[backend_settings_section];
const std::string plugin_path = find_plugin_by_name(split_and_trim(search_paths, ':'), parOut.backend_name);
if (plugin_path.empty())
throw std::runtime_error(std::string("Unable to find any suitable plugin with the specified name \"") + parOut.backend_name + "\"");
parOut.backend_plugin = dindb::BackendPlugin(plugin_path, &settings_node);
throw_if_plugin_failed(parOut.backend_plugin, plugin_path, parOut.backend_name);
try {
auto settings_node = settings[backend_settings_section];
const std::string plugin_path = find_plugin_by_name(split_and_trim(search_paths, ':'), parOut.backend_name);
parOut.backend_plugin = dindb::BackendPlugin(plugin_path, &settings_node);
throw_if_plugin_failed(parOut.backend_plugin, plugin_path, parOut.backend_name);
}
catch (const dindb::SOLoadException& err) {
throw std::runtime_error(std::string("Unable to find any suitable plugin with the specified name \"") + err.what() + "\"");
}
}
}

View file

@ -17,7 +17,7 @@
#include "hash.hpp"
#include "dindexer-machinery/scantask/hashing.hpp"
#include "dindexer-machinery/scantask/dirtree.hpp"
#include "dindexer-machinery/scantask/singlefile.hpp"
#include "dindexer-machinery/recorddata.hpp"
#include "dindexer-machinery/make_filerecord_tree.hpp"
#include <memory>
@ -31,59 +31,19 @@ namespace stask = mchlib::scantask;
namespace din {
namespace {
class SingleFileTask : public stask::Base<std::vector<mchlib::FileRecordData>> {
public:
typedef std::vector<mchlib::FileRecordData> PathList;
SingleFileTask ( std::string parPath, const struct stat* parStat );
virtual ~SingleFileTask ( void ) noexcept = default;
private:
virtual void on_data_destroy ( PathList& parData ) override;
virtual void on_data_create ( PathList& parData ) override;
std::string m_path;
const struct stat* m_stat;
};
SingleFileTask::SingleFileTask (std::string parPath, const struct stat* parStat) :
m_path(std::move(parPath)),
m_stat(parStat)
{
assert(not m_path.empty());
assert(m_stat);
}
void SingleFileTask::on_data_destroy (PathList& parData) {
assert(not parData.empty());
parData.clear();
}
void SingleFileTask::on_data_create (PathList& parData) {
assert(parData.empty());
parData.reserve(1);
parData.push_back(mchlib::FileRecordData(
std::string(m_path),
0,
m_stat->st_atime,
m_stat->st_mtime,
0,
false,
false
));
}
void fill_hash_nodes (
const std::vector<mchlib::FileRecordData>& parRefData,
const std::vector<mchlib::FileRecordNode>& parNodesIn,
std::vector<din::HashNode>& parNodesOut
) {
using std::string;
const std::size_t sz = parNodesIn.size();
parNodesOut.reserve(sz);
for (const auto& in : parNodesIn) {
assert(in.index < parRefData.size());
const auto& data = parRefData[in.index];
parNodesOut.push_back(HashNode{data.hash, {}});
parNodesOut.push_back(HashNode{string(data.path()), data.hash, {}});
}
assert(parNodesOut.size() == sz);
@ -97,6 +57,7 @@ namespace din {
std::vector<HashNode> hash (const std::string& parPath) {
using mchlib::FileRecordData;
using HashingTaskPtr = std::shared_ptr<stask::Hashing>;
using stask::SingleFileTask;
struct stat path_stat;
{

View file

@ -20,9 +20,11 @@
#include "dindexer-machinery/tiger.hpp"
#include <vector>
#include <string>
namespace din {
struct HashNode {
std::string path;
mchlib::TigerHash hash;
std::vector<HashNode> children;
};

View file

@ -25,13 +25,9 @@
#include <ciso646>
#include <iterator>
#include <algorithm>
#include <iterator>
namespace dindb {
std::ostream& operator<< (std::ostream& parStream, const LocatedItem& parItem) {
parStream << parItem.group_id << '\t' << parItem.id << '\t' << parItem.path;
return parStream;
}
std::ostream& operator<< (std::ostream& parStream, const LocatedSet& parItem) {
const decltype(parItem.dir_count) one = 1;
const auto dircount = std::max(parItem.dir_count, one) - one;
@ -43,6 +39,17 @@ namespace dindb {
} //namespace dindb
namespace {
struct LocatedItemInfo {
LocatedItemInfo (dindb::LocatedItem&& parItem, std::string&& parPath) :
item(std::move(parItem)),
local_path(std::move(parPath))
{
}
dindb::LocatedItem item;
std::string local_path;
};
std::vector<boost::string_ref> extract_tags (const boost::program_options::variables_map& parVM) {
if (not parVM.count("tags"))
return std::vector<boost::string_ref>();
@ -52,23 +59,34 @@ namespace {
void collect_matching_recursively (
dindb::Backend& parDB,
const std::vector<din::HashNode>& parHashes,
std::vector<din::HashNode>&& parHashes,
const std::vector<boost::string_ref>& parTags,
std::vector<dindb::LocatedItem>& parOut
std::vector<LocatedItemInfo>& parOut
) {
for (auto& hash : parHashes) {
for (auto&& hash : parHashes) {
std::vector<dindb::LocatedItem> results = parDB.locate_in_db(hash.hash, parTags);
if (results.empty()) {
collect_matching_recursively(parDB, hash.children, parTags, parOut);
collect_matching_recursively(parDB, std::move(hash.children), parTags, parOut);
}
else {
assert(1 == results.size());
for (auto&& res : results) {
parOut.push_back(std::move(res));
parOut.push_back(LocatedItemInfo(std::move(res), std::move(hash.path)));
}
}
}
}
std::ostream& operator<< (std::ostream& parStream, const LocatedItemInfo& parItem) {
if (not parItem.local_path.empty())
parStream << '"' << parItem.local_path << "\" -->\t";
parStream << "group id: " << parItem.item.group_id << '\t' <<
"item id: " << parItem.item.id << '\t' <<
'"' << parItem.item.path << '"'
;
return parStream;
}
} //unnamed namespace
int main (int parArgc, char* parArgv[]) {
@ -106,18 +124,25 @@ int main (int parArgc, char* parArgv[]) {
std::copy(results.begin(), results.end(), std::ostream_iterator<dindb::LocatedSet>(std::cout, "\n"));
}
else {
std::vector<dindb::LocatedItem> results;
std::vector<LocatedItemInfo> results;
const std::vector<boost::string_ref> tags = extract_tags(vm);
if (vm.count("byhash")) {
const auto hashes = din::hash(vm["substring"].as<std::string>());
collect_matching_recursively(db, hashes, tags, results);
auto hashes = din::hash(vm["substring"].as<std::string>());
collect_matching_recursively(db, std::move(hashes), tags, results);
}
else {
const auto search_regex = g2r::convert(vm["substring"].as<std::string>(), not vm.count("case-insensitive"));
results = db.locate_in_db(search_regex, tags);
auto located_items(db.locate_in_db(search_regex, tags));
results.reserve(located_items.size());
std::transform(
std::make_move_iterator(located_items.begin()),
std::make_move_iterator(located_items.end()),
std::back_inserter(results),
[](dindb::LocatedItem&& itm) { return LocatedItemInfo(std::move(itm), std::string()); }
);
}
std::copy(results.begin(), results.end(), std::ostream_iterator<dindb::LocatedItem>(std::cout, "\n"));
std::copy(results.begin(), results.end(), std::ostream_iterator<LocatedItemInfo>(std::cout, "\n"));
}
return 0;
}

View file

@ -24,6 +24,7 @@ add_library(${PROJECT_NAME} SHARED
scantask/contenttype.cpp
scantask/mime.cpp
scantask/setbasic.cpp
scantask/singlefile.cpp
make_filerecord_tree.cpp
)

View file

@ -115,6 +115,16 @@ namespace mchlib {
throw e;
}
}
catch (...) { //workaround for bug in gcc 5 and 6, see:
//https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66145
if (parIgnoreErrors) {
it->unreadable = true;
it->hash = TigerHash {};
}
else {
throw;
}
}
}
#if defined(INDEXER_VERBOSE)

View file

@ -0,0 +1,68 @@
/* Copyright 2015, 2016, Michele Santullo
* This file is part of "dindexer".
*
* "dindexer" is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* "dindexer" is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
*/
#include "dindexer-machinery/scantask/singlefile.hpp"
#include "dindexer-machinery/recorddata.hpp"
#include <cassert>
#include <ciso646>
#include <sys/stat.h>
namespace mchlib {
namespace scantask {
SingleFileTask::SingleFileTask (std::string parPath) :
SingleFileTask(parPath, nullptr)
{
}
SingleFileTask::SingleFileTask (std::string parPath, const struct stat* parStat) :
m_path(std::move(parPath)),
m_stat(parStat)
{
assert(not m_path.empty());
}
void SingleFileTask::on_data_destroy (PathList& parData) {
assert(not parData.empty());
parData.clear();
}
void SingleFileTask::on_data_create (PathList& parData) {
struct stat path_stat;
const struct stat* stat_to_use = m_stat;
if (not stat_to_use) {
const int retval = stat(m_path.c_str(), &path_stat);
if (retval) {
throw std::runtime_error("Can't access file \"" + m_path + "\"");
}
stat_to_use = &path_stat;
}
assert(parData.empty());
parData.reserve(1);
parData.push_back(mchlib::FileRecordData(
std::string(m_path),
0,
stat_to_use->st_atime,
stat_to_use->st_mtime,
0,
false,
false
));
}
} //namespace scantask
} //namespace mchlib

View file

@ -132,7 +132,13 @@ namespace pq {
}
ResultSet Connection::query (const std::string& parQuery) {
ResultInfo info(PQexec(m_localData->connection, parQuery.c_str()));
assert(not parQuery.empty());
return this->query(parQuery.c_str());
}
ResultSet Connection::query (const char* parQuery) {
assert(parQuery);
ResultInfo info(PQexec(m_localData->connection, parQuery));
if (not info.result)
throw DatabaseException("Error running query", "Error allocating result object", __FILE__, __LINE__);
const int ress = PQresultStatus(info.result.get());