From a9f6a5892618b85f7ae73dd3691d5fe8863603c9 Mon Sep 17 00:00:00 2001 From: King_DuckZ Date: Mon, 23 Oct 2017 10:51:38 +0100 Subject: [PATCH] Implement sqlite backend functions for dir scanning. --- CMakeLists.txt | 10 +- src/backends/sqlite/CMakeLists.txt | 2 + src/backends/sqlite/backend_sqlite.cpp | 122 +++----------- src/backends/sqlite/db_functions.cpp | 176 ++++++++++++++++++++ src/backends/sqlite/db_functions.hpp | 46 +++++ src/backends/sqlite/time_t_to_timestamp.cpp | 37 ++++ src/backends/sqlite/time_t_to_timestamp.hpp | 28 ++++ 7 files changed, 323 insertions(+), 98 deletions(-) create mode 100644 src/backends/sqlite/db_functions.cpp create mode 100644 src/backends/sqlite/db_functions.hpp create mode 100644 src/backends/sqlite/time_t_to_timestamp.cpp create mode 100644 src/backends/sqlite/time_t_to_timestamp.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 01d27f1..4c82e86 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -133,9 +133,15 @@ endif() string(REPLACE "," ";" backend_list "${DINDEXER_ENABLED_BACKENDS}") list(FIND backend_list "redis" redis_found_index) +set(DINDEXER_WITH_REDIS OFF) if (${redis_found_index} GREATER -1) set(DINDEXER_WITH_REDIS ON) endif() +list(FIND backend_list "sqlite" sqlite_found_index) +set(DINDEXER_WITH_SQLITE OFF) +if (${sqlite_found_index} GREATER -1) + set(DINDEXER_WITH_SQLITE ON) +endif() unset(redis_found_index) unset(backend_list) @@ -145,13 +151,15 @@ add_subdirectory(src/common) add_subdirectory(src/machinery) add_subdirectory(lib/pbl) add_subdirectory(lib/glob2regex) -add_subdirectory(lib/SQLiteCpp EXCLUDE_FROM_ALL) add_subdirectory(src/backends) add_subdirectory(src/core) add_shared_git_project(lib/duckhandy) if (DINDEXER_WITH_REDIS) add_shared_git_project(lib/incredis EXCLUDE_FROM_ALL) endif() +if (DINDEXER_WITH_SQLITE) + add_subdirectory(lib/SQLiteCpp EXCLUDE_FROM_ALL) +endif() #Actions add_subdirectory(src/main) diff --git a/src/backends/sqlite/CMakeLists.txt b/src/backends/sqlite/CMakeLists.txt index 45d87f1..0c9d977 100644 --- a/src/backends/sqlite/CMakeLists.txt +++ b/src/backends/sqlite/CMakeLists.txt @@ -9,6 +9,8 @@ endif() add_library(${PROJECT_NAME} SHARED backend_sqlite.cpp + db_functions.cpp + time_t_to_timestamp.cpp ) target_link_libraries(${PROJECT_NAME} diff --git a/src/backends/sqlite/backend_sqlite.cpp b/src/backends/sqlite/backend_sqlite.cpp index d4578c3..ab02457 100644 --- a/src/backends/sqlite/backend_sqlite.cpp +++ b/src/backends/sqlite/backend_sqlite.cpp @@ -23,6 +23,7 @@ #include "dindexerConfig.h" #include "duckhandy/stringize.h" #include "SQLiteCpp/SQLiteCpp.h" +#include "db_functions.hpp" #include #include #include @@ -59,8 +60,19 @@ access_time DATETIME, modify_time DATETIME, unreadable INTEGER NOT NULL, mimetype TEXT NOT NULL, -charset TEXT NOT NULL, -tags TEXT NOT NULL +charset TEXT NOT NULL +);)"; + + const char g_create_table_file_tags[] = R"(CREATE TABLE file_tags( +id INTEGER PRIMARY KEY AUTOINCREMENT, +tag TEXT NOT NULL, +file_id INTEGER NOT NULL +);)"; + + const char g_create_table_set_tags[] = R"(CREATE TABLE set_tags( +id INTEGER PRIMARY KEY AUTOINCREMENT, +tag TEXT NOT NULL, +set_id INTEGER NOT NULL );)"; struct SqliteConnectionSettings { @@ -79,7 +91,7 @@ namespace YAML { } static bool decode (const Node& parNode, dindb::SqliteConnectionSettings& parSettings) { - if (not parNode.IsMap() or parNode.size() < 2) { + if (not parNode.IsMap() or parNode.size() < 1) { return false; } @@ -105,10 +117,11 @@ namespace dindb { using SQLite::Database; using SQLite::OPEN_READONLY; using SQLite::OPEN_CREATE; + using SQLite::OPEN_READWRITE; assert(not m_db); if (not m_db) - m_db.reset(new Database(m_db_path, (m_read_only ? OPEN_READONLY : OPEN_CREATE))); + m_db.reset(new Database(m_db_path, (m_read_only ? OPEN_READONLY : OPEN_CREATE bitor OPEN_READWRITE))); assert(m_db); if (not m_read_only) { @@ -116,6 +129,10 @@ namespace dindb { m_db->exec(g_create_table_files); if (not m_db->tableExists("sets")) m_db->exec(g_create_table_sets); + if (not m_db->tableExists("file_tags")) + m_db->exec(g_create_table_file_tags); + if (not m_db->tableExists("set_tags")) + m_db->exec(g_create_table_set_tags); } } @@ -153,100 +170,11 @@ namespace dindb { } void BackendSQLite::write_files (const std::vector& parData, const mchlib::SetRecordDataFull& parSetData, const std::string& parSignature) { - //using dhandy::lexical_cast; - //using boost::string_ref; - - //const auto data_size = static_cast(parData.size()); - //const auto group_id_int = m_redis.hincrby(PROGRAM_NAME ":indices", "set", 1); - //const auto file_id_int = m_redis.hincrby(PROGRAM_NAME ":indices", "files", data_size); - - //const auto group_id = lexical_cast(group_id_int); - //const std::string set_key = PROGRAM_NAME ":set:" + group_id; - //const std::string level_key = PROGRAM_NAME ":levels:" + group_id; - //assert(file_id_int >= data_size); - //const auto base_file_id = file_id_int - data_size + 1; - - //auto batch = m_redis.make_batch(); - - //batch.hmset( - // set_key, - // "name", parSetData.name, - // "disk_label", parSetData.disk_label, - // "fs_uuid", parSetData.fs_uuid, - // "type", parSetData.type, - // "content_type", parSetData.content_type, - // "base_file_id", lexical_cast(base_file_id), - // "item_count", lexical_cast(parData.size()), - // "dir_count", lexical_cast(std::count_if(parData.begin(), parData.end(), [](const mchlib::FileRecordData& r){return r.is_directory;})), - // "creation", lexical_cast(std::time(nullptr)), - // "app_name", parSignature - //); - -//#if !defined(NDEBUG) -// std::size_t inserted_count = 0; -//#endif -// for (auto z = base_file_id; z < base_file_id + data_size; ++z) { -// const std::string file_key = PROGRAM_NAME ":file:" + lexical_cast(z); -// assert(z >= base_file_id); -// assert(static_cast(z - base_file_id) < parData.size()); -// const auto& file_data = parData[z - base_file_id]; -// const std::string hash = tiger_to_string(file_data.hash); -// batch.hmset( -// file_key, -// "hash", hash, -// "path", file_data.path(), -// "size", lexical_cast(file_data.size), -// "level", lexical_cast(file_data.level), -// "mime_type", file_data.mime_type(), -// "mime_charset", file_data.mime_charset(), -// "is_directory", (file_data.is_directory ? '1' : '0'), -// "is_symlink", (file_data.is_symlink ? '1' : '0'), -// "unreadable", (file_data.unreadable ? '1' : '0'), -// "hash_valid", (file_data.hash_valid ? '1' : '0'), -// "group_id", group_id, -// "atime", lexical_cast(file_data.atime), -// "mtime", lexical_cast(file_data.mtime) -// ); -// -// batch.sadd( -// PROGRAM_NAME ":hash:" + hash, -// lexical_cast(z) -// ); -// -// batch.zadd(level_key, redis::IncRedisBatch::ZADD_None, false, static_cast(file_data.level), file_key); -//#if !defined(NDEBUG) -// ++inserted_count; -//#endif -// } -// assert(inserted_count == parData.size()); -// -// batch.throw_if_failed(); + write_to_db(*m_db, parData, parSetData, parSignature); } bool BackendSQLite::search_file_by_hash (mchlib::FileRecordData& parItem, mchlib::SetRecordDataFull& parSet, const mchlib::TigerHash& parHash) { -// using boost::empty; -// -// const std::string hash_key = PROGRAM_NAME ":hash:" + tiger_to_string(parHash); -// auto hash_reply = m_redis.srandmember(hash_key); -// if (not hash_reply) { -// return false; -// } -// else { -// const auto file_key = PROGRAM_NAME ":file:" + *hash_reply; -// auto set_key_and_file_item = redis::range_as(m_redis.hscan(file_key)); -// parItem = std::move(set_key_and_file_item.second); -// assert(parItem.hash == parHash); -// const std::string group_key = PROGRAM_NAME ":set:" + set_key_and_file_item.first; -// -// auto scan_range = m_redis.hscan(group_key); -// if (empty(scan_range)) { -// return false; -// } -// else { -// parSet = redis::range_as(m_redis.hscan(group_key)); -// return true; -// } -// } + return read_from_db(*m_db, parItem, parSet, parHash); } std::vector BackendSQLite::locate_in_db (const std::string& parSearch, const TagList& parTags) { @@ -282,7 +210,7 @@ namespace dindb { } } //namespace dindb -extern "C" dindb::Backend* dindexer_create_backend (const YAML::Node* parConfig) { +extern "C" [[gnu::used]] dindb::Backend* dindexer_create_backend (const YAML::Node* parConfig) { if (not parConfig) return nullptr; @@ -302,7 +230,7 @@ extern "C" void dindexer_destroy_backend (dindb::Backend* parDele) { } extern "C" const char* dindexer_backend_name() { - return "redis"; + return "sqlite"; } extern "C" int dindexer_backend_iface_version() { diff --git a/src/backends/sqlite/db_functions.cpp b/src/backends/sqlite/db_functions.cpp new file mode 100644 index 0000000..4de9013 --- /dev/null +++ b/src/backends/sqlite/db_functions.cpp @@ -0,0 +1,176 @@ +/* Copyright 2015, 2016, Michele Santullo + * This file is part of "dindexer". + * + * "dindexer" is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * "dindexer" is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with "dindexer". If not, see . + */ + +#include "db_functions.hpp" +#include "SQLiteCpp/SQLiteCpp.h" +#include "dindexer-machinery/recorddata.hpp" +#include "time_t_to_timestamp.hpp" +#include + +namespace dindb { + namespace { + template T to (const SQLite::Column& parCol); + template <> uint64_t to (const SQLite::Column& parCol) { + static_assert(sizeof(long long int) == sizeof(uint64_t), "Unexpected type size"); + const auto v = static_cast(parCol); + return static_cast(v); + } + template <> bool to (const SQLite::Column& parCol) { + const int v = parCol; + return static_cast(v); + } + template <> uint16_t to (const SQLite::Column& parCol) { + const int v = parCol; + return static_cast(v); + } + template <> uint32_t to (const SQLite::Column& parCol) { + static_assert(sizeof(int) == sizeof(uint32_t), "Unexpected type size"); + const int v = parCol; + return static_cast(v); + } + template <> std::string to (const SQLite::Column& parCol) { + const char* v = parCol; + return std::string(v); + } + template <> char to (const SQLite::Column& parCol) { + const char* v = parCol; + return *v; + } + } //unnamed namespace + + void tag_files (SQLite::Database& parDB, const std::vector& parFiles, const std::vector& parTags, GroupIDType parSet) { + SQLite::Transaction trans(parDB); + + if (InvalidGroupID != parSet) { + //SQLite::Statement query(parDB, "UPDATE files SET tags = + } + + } + + void tag_files (SQLite::Database& parDB, const std::vector& parRegexes, const std::vector& parTags, GroupIDType parSet) { + SQLite::Transaction trans(parDB); + + trans.commit(); + } + + void write_to_db (SQLite::Database& parDB, const std::vector& parData, const mchlib::SetRecordDataFull& parSetData, const std::string& parSignature) { + SQLite::Transaction trans(parDB); + + SQLite::Statement query( + parDB, + "INSERT INTO " + "sets(desc,type,app_name,content_type,fs_uuid,disk_label) " + "VALUES(?,?,?,?,?,?);" + ); + query.bind(1, parSetData.name); + query.bind(2, std::string(&parSetData.type, 1)); + query.bind(3, parSignature); + query.bind(4, std::string(&parSetData.content_type, 1)); + query.bind(5, parSetData.fs_uuid); + query.bind(6, parSetData.disk_label); + + query.exec(); + const auto new_group_id = parDB.getLastInsertRowid(); + assert(new_group_id > 0); + + for (std::size_t z = 0; z < parData.size(); ++z) { + SQLite::Statement query( + parDB, + "INSERT INTO files(path,hash,level,group_id,is_directory," + "is_symlink,size,access_time,modify_time,is_hash_valid," + "unreadable,mimetype,charset) VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?);" + ); + const auto& itm = parData[z]; + query.bind(1, std::string(itm.path())); + query.bind(2, tiger_to_string(itm.hash)); + query.bind(3, itm.level); + query.bind(4, new_group_id); + query.bind(5, itm.is_directory); + query.bind(6, itm.is_symlink); + query.bind(7, static_cast(itm.size)); + query.bind(8, time_t_to_timestamp(itm.atime)); + query.bind(9, time_t_to_timestamp(itm.mtime)); + query.bind(10, itm.hash_valid); + query.bind(11, itm.unreadable); + query.bind(12, std::string(itm.mime_type())); + query.bind(13, std::string(itm.mime_charset())); + query.exec(); + } + trans.commit(); + } + + bool read_from_db ( + SQLite::Database& parDB, + mchlib::FileRecordData& parItem, + mchlib::SetRecordDataFull& parSet, + const mchlib::TigerHash& parHash + ) { + uint32_t group_id; + { + SQLite::Statement query( + parDB, + "SELECT path,level,group_id,is_directory,is_symlink,size " + "FROM files WHERE hash=?;" + ); + query.bind(1, tiger_to_string(parHash, true)); + if (not query.executeStep()) + return false; + parItem.abs_path = to(query.getColumn("path")); + parItem.hash = parHash; + parItem.level = to(query.getColumn("level")); + parItem.size = to(query.getColumn("size")); + parItem.is_directory = to(query.getColumn("is_directory")); + parItem.is_symlink = to(query.getColumn("is_symlink")); + group_id = query.getColumn("group_id"); + + if (parItem.abs_path.size() != 1 or parItem.abs_path != "/") { + parItem.abs_path = std::string("/") + parItem.abs_path; + } + parItem.path_offset = 1; + } + + { + SQLite::Statement query( + parDB, + "SELECT desc,type,disk_number,fs_uuid,disk_label,content_type " + "FROM sets WHERE id=?;" + ); + query.bind(1, group_id); + + bool no_results = true; + if (query.executeStep()) { + no_results = false; + + parSet.type = to(query.getColumn("type")); + parSet.name = to(query.getColumn("name")); + parSet.disk_number = to(query.getColumn("disk_number")); + parSet.fs_uuid = to(query.getColumn("fs_uuid")); + parSet.disk_label = to(query.getColumn("disk_label")); + parSet.content_type = to(query.getColumn("content_type")); + } + + if (no_results) { + std::ostringstream err_msg; + err_msg << "Missing set: found a record with group_id=" << group_id; + err_msg << " but there is no such id in table \"sets\""; + throw std::length_error(err_msg.str()); + } + } + + return true; + } +} //namespace dindb diff --git a/src/backends/sqlite/db_functions.hpp b/src/backends/sqlite/db_functions.hpp new file mode 100644 index 0000000..60f1821 --- /dev/null +++ b/src/backends/sqlite/db_functions.hpp @@ -0,0 +1,46 @@ +/* Copyright 2015, 2016, Michele Santullo + * This file is part of "dindexer". + * + * "dindexer" is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * "dindexer" is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with "dindexer". If not, see . + */ + +#ifndef id48FE261DDC9F4F608C9E82FCCA4E55C0 +#define id48FE261DDC9F4F608C9E82FCCA4E55C0 + +#include "backends/db_backend.hpp" +#include +#include +#include + +namespace SQLite { + class Database; +} //namespace SQLite + +namespace mchlib { + struct FileRecordData; + struct SetRecordDataFull; + struct TigerHash; +} //namespace mchlib + +namespace dindb { + //Tag + void tag_files (SQLite::Database& parDB, const std::vector& parFiles, const std::vector& parTags, GroupIDType parSet); + void tag_files (SQLite::Database& parDB, const std::vector& parRegexes, const std::vector& parTags, GroupIDType parSet); + + //Scan + void write_to_db (SQLite::Database& parDB, const std::vector& parData, const mchlib::SetRecordDataFull& parSetData, const std::string& parSignature); + bool read_from_db (SQLite::Database& parDB, mchlib::FileRecordData& parItem, mchlib::SetRecordDataFull& parSet, const mchlib::TigerHash& parHash); +} //namespace dindb + +#endif diff --git a/src/backends/sqlite/time_t_to_timestamp.cpp b/src/backends/sqlite/time_t_to_timestamp.cpp new file mode 100644 index 0000000..72d6d66 --- /dev/null +++ b/src/backends/sqlite/time_t_to_timestamp.cpp @@ -0,0 +1,37 @@ +/* Copyright 2015, 2016, Michele Santullo + * This file is part of "dindexer". + * + * "dindexer" is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * "dindexer" is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with "dindexer". If not, see . + */ + +#include "time_t_to_timestamp.hpp" +//#include + +namespace dindb { + long long int time_t_to_timestamp (const time_t& parTime) { + std::tm epoch_start; + epoch_start.tm_sec = 0; + epoch_start.tm_min = 0; + epoch_start.tm_hour = 0; + epoch_start.tm_mday = 1; + epoch_start.tm_mon = 0; + epoch_start.tm_year = 70; + epoch_start.tm_isdst = -1; + + std::time_t basetime = std::mktime(&epoch_start); + const auto retval = static_cast(std::difftime(parTime, basetime)); + //assert(retval > 42ll * 365 * 24 * 60 * 60); + return retval; + } +} //namespace dindb diff --git a/src/backends/sqlite/time_t_to_timestamp.hpp b/src/backends/sqlite/time_t_to_timestamp.hpp new file mode 100644 index 0000000..0987dbd --- /dev/null +++ b/src/backends/sqlite/time_t_to_timestamp.hpp @@ -0,0 +1,28 @@ +/* Copyright 2015, 2016, Michele Santullo + * This file is part of "dindexer". + * + * "dindexer" is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * "dindexer" is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with "dindexer". If not, see . + */ + +#ifndef idDA9D82D61C5F431EA7257AA7E2E7B1F5 +#define idDA9D82D61C5F431EA7257AA7E2E7B1F5 + +#include + +namespace dindb { + [[gnu::pure]] + long long int time_t_to_timestamp (const time_t& parTime); +} //namespace dindb + +#endif