1
0
Fork 0
mirror of https://github.com/KingDuckZ/dindexer.git synced 2024-11-25 00:53:43 +00:00

Implement sqlite backend functions for dir scanning.

This commit is contained in:
King_DuckZ 2017-10-23 10:51:38 +01:00
parent 7dff61f2a9
commit a9f6a58926
7 changed files with 323 additions and 98 deletions

View file

@ -133,9 +133,15 @@ endif()
string(REPLACE "," ";" backend_list "${DINDEXER_ENABLED_BACKENDS}")
list(FIND backend_list "redis" redis_found_index)
set(DINDEXER_WITH_REDIS OFF)
if (${redis_found_index} GREATER -1)
set(DINDEXER_WITH_REDIS ON)
endif()
list(FIND backend_list "sqlite" sqlite_found_index)
set(DINDEXER_WITH_SQLITE OFF)
if (${sqlite_found_index} GREATER -1)
set(DINDEXER_WITH_SQLITE ON)
endif()
unset(redis_found_index)
unset(backend_list)
@ -145,13 +151,15 @@ add_subdirectory(src/common)
add_subdirectory(src/machinery)
add_subdirectory(lib/pbl)
add_subdirectory(lib/glob2regex)
add_subdirectory(lib/SQLiteCpp EXCLUDE_FROM_ALL)
add_subdirectory(src/backends)
add_subdirectory(src/core)
add_shared_git_project(lib/duckhandy)
if (DINDEXER_WITH_REDIS)
add_shared_git_project(lib/incredis EXCLUDE_FROM_ALL)
endif()
if (DINDEXER_WITH_SQLITE)
add_subdirectory(lib/SQLiteCpp EXCLUDE_FROM_ALL)
endif()
#Actions
add_subdirectory(src/main)

View file

@ -9,6 +9,8 @@ endif()
add_library(${PROJECT_NAME} SHARED
backend_sqlite.cpp
db_functions.cpp
time_t_to_timestamp.cpp
)
target_link_libraries(${PROJECT_NAME}

View file

@ -23,6 +23,7 @@
#include "dindexerConfig.h"
#include "duckhandy/stringize.h"
#include "SQLiteCpp/SQLiteCpp.h"
#include "db_functions.hpp"
#include <utility>
#include <yaml-cpp/yaml.h>
#include <array>
@ -59,8 +60,19 @@ access_time DATETIME,
modify_time DATETIME,
unreadable INTEGER NOT NULL,
mimetype TEXT NOT NULL,
charset TEXT NOT NULL,
tags TEXT NOT NULL
charset TEXT NOT NULL
);)";
const char g_create_table_file_tags[] = R"(CREATE TABLE file_tags(
id INTEGER PRIMARY KEY AUTOINCREMENT,
tag TEXT NOT NULL,
file_id INTEGER NOT NULL
);)";
const char g_create_table_set_tags[] = R"(CREATE TABLE set_tags(
id INTEGER PRIMARY KEY AUTOINCREMENT,
tag TEXT NOT NULL,
set_id INTEGER NOT NULL
);)";
struct SqliteConnectionSettings {
@ -79,7 +91,7 @@ namespace YAML {
}
static bool decode (const Node& parNode, dindb::SqliteConnectionSettings& parSettings) {
if (not parNode.IsMap() or parNode.size() < 2) {
if (not parNode.IsMap() or parNode.size() < 1) {
return false;
}
@ -105,10 +117,11 @@ namespace dindb {
using SQLite::Database;
using SQLite::OPEN_READONLY;
using SQLite::OPEN_CREATE;
using SQLite::OPEN_READWRITE;
assert(not m_db);
if (not m_db)
m_db.reset(new Database(m_db_path, (m_read_only ? OPEN_READONLY : OPEN_CREATE)));
m_db.reset(new Database(m_db_path, (m_read_only ? OPEN_READONLY : OPEN_CREATE bitor OPEN_READWRITE)));
assert(m_db);
if (not m_read_only) {
@ -116,6 +129,10 @@ namespace dindb {
m_db->exec(g_create_table_files);
if (not m_db->tableExists("sets"))
m_db->exec(g_create_table_sets);
if (not m_db->tableExists("file_tags"))
m_db->exec(g_create_table_file_tags);
if (not m_db->tableExists("set_tags"))
m_db->exec(g_create_table_set_tags);
}
}
@ -153,100 +170,11 @@ namespace dindb {
}
void BackendSQLite::write_files (const std::vector<mchlib::FileRecordData>& parData, const mchlib::SetRecordDataFull& parSetData, const std::string& parSignature) {
//using dhandy::lexical_cast;
//using boost::string_ref;
//const auto data_size = static_cast<int>(parData.size());
//const auto group_id_int = m_redis.hincrby(PROGRAM_NAME ":indices", "set", 1);
//const auto file_id_int = m_redis.hincrby(PROGRAM_NAME ":indices", "files", data_size);
//const auto group_id = lexical_cast<std::string>(group_id_int);
//const std::string set_key = PROGRAM_NAME ":set:" + group_id;
//const std::string level_key = PROGRAM_NAME ":levels:" + group_id;
//assert(file_id_int >= data_size);
//const auto base_file_id = file_id_int - data_size + 1;
//auto batch = m_redis.make_batch();
//batch.hmset(
// set_key,
// "name", parSetData.name,
// "disk_label", parSetData.disk_label,
// "fs_uuid", parSetData.fs_uuid,
// "type", parSetData.type,
// "content_type", parSetData.content_type,
// "base_file_id", lexical_cast<std::string>(base_file_id),
// "item_count", lexical_cast<std::string>(parData.size()),
// "dir_count", lexical_cast<std::string>(std::count_if(parData.begin(), parData.end(), [](const mchlib::FileRecordData& r){return r.is_directory;})),
// "creation", lexical_cast<std::string>(std::time(nullptr)),
// "app_name", parSignature
//);
//#if !defined(NDEBUG)
// std::size_t inserted_count = 0;
//#endif
// for (auto z = base_file_id; z < base_file_id + data_size; ++z) {
// const std::string file_key = PROGRAM_NAME ":file:" + lexical_cast<std::string>(z);
// assert(z >= base_file_id);
// assert(static_cast<std::size_t>(z - base_file_id) < parData.size());
// const auto& file_data = parData[z - base_file_id];
// const std::string hash = tiger_to_string(file_data.hash);
// batch.hmset(
// file_key,
// "hash", hash,
// "path", file_data.path(),
// "size", lexical_cast<std::string>(file_data.size),
// "level", lexical_cast<std::string>(file_data.level),
// "mime_type", file_data.mime_type(),
// "mime_charset", file_data.mime_charset(),
// "is_directory", (file_data.is_directory ? '1' : '0'),
// "is_symlink", (file_data.is_symlink ? '1' : '0'),
// "unreadable", (file_data.unreadable ? '1' : '0'),
// "hash_valid", (file_data.hash_valid ? '1' : '0'),
// "group_id", group_id,
// "atime", lexical_cast<std::string>(file_data.atime),
// "mtime", lexical_cast<std::string>(file_data.mtime)
// );
//
// batch.sadd(
// PROGRAM_NAME ":hash:" + hash,
// lexical_cast<std::string>(z)
// );
//
// batch.zadd(level_key, redis::IncRedisBatch::ZADD_None, false, static_cast<double>(file_data.level), file_key);
//#if !defined(NDEBUG)
// ++inserted_count;
//#endif
// }
// assert(inserted_count == parData.size());
//
// batch.throw_if_failed();
write_to_db(*m_db, parData, parSetData, parSignature);
}
bool BackendSQLite::search_file_by_hash (mchlib::FileRecordData& parItem, mchlib::SetRecordDataFull& parSet, const mchlib::TigerHash& parHash) {
// using boost::empty;
//
// const std::string hash_key = PROGRAM_NAME ":hash:" + tiger_to_string(parHash);
// auto hash_reply = m_redis.srandmember(hash_key);
// if (not hash_reply) {
// return false;
// }
// else {
// const auto file_key = PROGRAM_NAME ":file:" + *hash_reply;
// auto set_key_and_file_item = redis::range_as<FileRecordDataWithGroup>(m_redis.hscan(file_key));
// parItem = std::move(set_key_and_file_item.second);
// assert(parItem.hash == parHash);
// const std::string group_key = PROGRAM_NAME ":set:" + set_key_and_file_item.first;
//
// auto scan_range = m_redis.hscan(group_key);
// if (empty(scan_range)) {
// return false;
// }
// else {
// parSet = redis::range_as<mchlib::SetRecordDataFull>(m_redis.hscan(group_key));
// return true;
// }
// }
return read_from_db(*m_db, parItem, parSet, parHash);
}
std::vector<LocatedItem> BackendSQLite::locate_in_db (const std::string& parSearch, const TagList& parTags) {
@ -282,7 +210,7 @@ namespace dindb {
}
} //namespace dindb
extern "C" dindb::Backend* dindexer_create_backend (const YAML::Node* parConfig) {
extern "C" [[gnu::used]] dindb::Backend* dindexer_create_backend (const YAML::Node* parConfig) {
if (not parConfig)
return nullptr;
@ -302,7 +230,7 @@ extern "C" void dindexer_destroy_backend (dindb::Backend* parDele) {
}
extern "C" const char* dindexer_backend_name() {
return "redis";
return "sqlite";
}
extern "C" int dindexer_backend_iface_version() {

View file

@ -0,0 +1,176 @@
/* Copyright 2015, 2016, Michele Santullo
* This file is part of "dindexer".
*
* "dindexer" is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* "dindexer" is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
*/
#include "db_functions.hpp"
#include "SQLiteCpp/SQLiteCpp.h"
#include "dindexer-machinery/recorddata.hpp"
#include "time_t_to_timestamp.hpp"
#include <ctime>
namespace dindb {
namespace {
template <typename T> T to (const SQLite::Column& parCol);
template <> uint64_t to (const SQLite::Column& parCol) {
static_assert(sizeof(long long int) == sizeof(uint64_t), "Unexpected type size");
const auto v = static_cast<long long int>(parCol);
return static_cast<uint64_t>(v);
}
template <> bool to (const SQLite::Column& parCol) {
const int v = parCol;
return static_cast<bool>(v);
}
template <> uint16_t to (const SQLite::Column& parCol) {
const int v = parCol;
return static_cast<uint16_t>(v);
}
template <> uint32_t to (const SQLite::Column& parCol) {
static_assert(sizeof(int) == sizeof(uint32_t), "Unexpected type size");
const int v = parCol;
return static_cast<uint32_t>(v);
}
template <> std::string to (const SQLite::Column& parCol) {
const char* v = parCol;
return std::string(v);
}
template <> char to (const SQLite::Column& parCol) {
const char* v = parCol;
return *v;
}
} //unnamed namespace
void tag_files (SQLite::Database& parDB, const std::vector<uint64_t>& parFiles, const std::vector<boost::string_ref>& parTags, GroupIDType parSet) {
SQLite::Transaction trans(parDB);
if (InvalidGroupID != parSet) {
//SQLite::Statement query(parDB, "UPDATE files SET tags =
}
}
void tag_files (SQLite::Database& parDB, const std::vector<std::string>& parRegexes, const std::vector<boost::string_ref>& parTags, GroupIDType parSet) {
SQLite::Transaction trans(parDB);
trans.commit();
}
void write_to_db (SQLite::Database& parDB, const std::vector<mchlib::FileRecordData>& parData, const mchlib::SetRecordDataFull& parSetData, const std::string& parSignature) {
SQLite::Transaction trans(parDB);
SQLite::Statement query(
parDB,
"INSERT INTO "
"sets(desc,type,app_name,content_type,fs_uuid,disk_label) "
"VALUES(?,?,?,?,?,?);"
);
query.bind(1, parSetData.name);
query.bind(2, std::string(&parSetData.type, 1));
query.bind(3, parSignature);
query.bind(4, std::string(&parSetData.content_type, 1));
query.bind(5, parSetData.fs_uuid);
query.bind(6, parSetData.disk_label);
query.exec();
const auto new_group_id = parDB.getLastInsertRowid();
assert(new_group_id > 0);
for (std::size_t z = 0; z < parData.size(); ++z) {
SQLite::Statement query(
parDB,
"INSERT INTO files(path,hash,level,group_id,is_directory,"
"is_symlink,size,access_time,modify_time,is_hash_valid,"
"unreadable,mimetype,charset) VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?);"
);
const auto& itm = parData[z];
query.bind(1, std::string(itm.path()));
query.bind(2, tiger_to_string(itm.hash));
query.bind(3, itm.level);
query.bind(4, new_group_id);
query.bind(5, itm.is_directory);
query.bind(6, itm.is_symlink);
query.bind(7, static_cast<long long int>(itm.size));
query.bind(8, time_t_to_timestamp(itm.atime));
query.bind(9, time_t_to_timestamp(itm.mtime));
query.bind(10, itm.hash_valid);
query.bind(11, itm.unreadable);
query.bind(12, std::string(itm.mime_type()));
query.bind(13, std::string(itm.mime_charset()));
query.exec();
}
trans.commit();
}
bool read_from_db (
SQLite::Database& parDB,
mchlib::FileRecordData& parItem,
mchlib::SetRecordDataFull& parSet,
const mchlib::TigerHash& parHash
) {
uint32_t group_id;
{
SQLite::Statement query(
parDB,
"SELECT path,level,group_id,is_directory,is_symlink,size "
"FROM files WHERE hash=?;"
);
query.bind(1, tiger_to_string(parHash, true));
if (not query.executeStep())
return false;
parItem.abs_path = to<std::string>(query.getColumn("path"));
parItem.hash = parHash;
parItem.level = to<uint16_t>(query.getColumn("level"));
parItem.size = to<uint64_t>(query.getColumn("size"));
parItem.is_directory = to<bool>(query.getColumn("is_directory"));
parItem.is_symlink = to<bool>(query.getColumn("is_symlink"));
group_id = query.getColumn("group_id");
if (parItem.abs_path.size() != 1 or parItem.abs_path != "/") {
parItem.abs_path = std::string("/") + parItem.abs_path;
}
parItem.path_offset = 1;
}
{
SQLite::Statement query(
parDB,
"SELECT desc,type,disk_number,fs_uuid,disk_label,content_type "
"FROM sets WHERE id=?;"
);
query.bind(1, group_id);
bool no_results = true;
if (query.executeStep()) {
no_results = false;
parSet.type = to<char>(query.getColumn("type"));
parSet.name = to<std::string>(query.getColumn("name"));
parSet.disk_number = to<uint32_t>(query.getColumn("disk_number"));
parSet.fs_uuid = to<std::string>(query.getColumn("fs_uuid"));
parSet.disk_label = to<std::string>(query.getColumn("disk_label"));
parSet.content_type = to<char>(query.getColumn("content_type"));
}
if (no_results) {
std::ostringstream err_msg;
err_msg << "Missing set: found a record with group_id=" << group_id;
err_msg << " but there is no such id in table \"sets\"";
throw std::length_error(err_msg.str());
}
}
return true;
}
} //namespace dindb

View file

@ -0,0 +1,46 @@
/* Copyright 2015, 2016, Michele Santullo
* This file is part of "dindexer".
*
* "dindexer" is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* "dindexer" is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef id48FE261DDC9F4F608C9E82FCCA4E55C0
#define id48FE261DDC9F4F608C9E82FCCA4E55C0
#include "backends/db_backend.hpp"
#include <vector>
#include <string>
#include <boost/utility/string_ref.hpp>
namespace SQLite {
class Database;
} //namespace SQLite
namespace mchlib {
struct FileRecordData;
struct SetRecordDataFull;
struct TigerHash;
} //namespace mchlib
namespace dindb {
//Tag
void tag_files (SQLite::Database& parDB, const std::vector<uint64_t>& parFiles, const std::vector<boost::string_ref>& parTags, GroupIDType parSet);
void tag_files (SQLite::Database& parDB, const std::vector<std::string>& parRegexes, const std::vector<boost::string_ref>& parTags, GroupIDType parSet);
//Scan
void write_to_db (SQLite::Database& parDB, const std::vector<mchlib::FileRecordData>& parData, const mchlib::SetRecordDataFull& parSetData, const std::string& parSignature);
bool read_from_db (SQLite::Database& parDB, mchlib::FileRecordData& parItem, mchlib::SetRecordDataFull& parSet, const mchlib::TigerHash& parHash);
} //namespace dindb
#endif

View file

@ -0,0 +1,37 @@
/* Copyright 2015, 2016, Michele Santullo
* This file is part of "dindexer".
*
* "dindexer" is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* "dindexer" is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
*/
#include "time_t_to_timestamp.hpp"
//#include <cassert>
namespace dindb {
long long int time_t_to_timestamp (const time_t& parTime) {
std::tm epoch_start;
epoch_start.tm_sec = 0;
epoch_start.tm_min = 0;
epoch_start.tm_hour = 0;
epoch_start.tm_mday = 1;
epoch_start.tm_mon = 0;
epoch_start.tm_year = 70;
epoch_start.tm_isdst = -1;
std::time_t basetime = std::mktime(&epoch_start);
const auto retval = static_cast<long long int>(std::difftime(parTime, basetime));
//assert(retval > 42ll * 365 * 24 * 60 * 60);
return retval;
}
} //namespace dindb

View file

@ -0,0 +1,28 @@
/* Copyright 2015, 2016, Michele Santullo
* This file is part of "dindexer".
*
* "dindexer" is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* "dindexer" is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef idDA9D82D61C5F431EA7257AA7E2E7B1F5
#define idDA9D82D61C5F431EA7257AA7E2E7B1F5
#include <ctime>
namespace dindb {
[[gnu::pure]]
long long int time_t_to_timestamp (const time_t& parTime);
} //namespace dindb
#endif