From 8b9241757d0d5d63e35d20e285101dbfbd18cf1c Mon Sep 17 00:00:00 2001 From: King_DuckZ Date: Fri, 27 Nov 2015 20:41:36 +0000 Subject: [PATCH] Don't save to DB if the disk has been added already. --- src/dbbackend.cpp | 49 +++++++++++++++++++++++++++++++++++++++++++++++ src/dbbackend.hpp | 19 ++++++++++++------ src/indexer.cpp | 14 +++++++++++++- src/indexer.hpp | 2 +- src/main.cpp | 4 +++- 5 files changed, 79 insertions(+), 9 deletions(-) diff --git a/src/dbbackend.cpp b/src/dbbackend.cpp index 90fcdfa..538aaef 100644 --- a/src/dbbackend.cpp +++ b/src/dbbackend.cpp @@ -21,6 +21,8 @@ #include #include #include +#include +#include namespace din { namespace { @@ -36,6 +38,53 @@ namespace din { } } //unnamed namespace + bool read_from_db (FileRecordData& parItem, SetRecordDataFull& parSet, const DinDBSettings& parDB, std::string&& parHash) { + using boost::lexical_cast; + + pq::Connection conn(std::string(parDB.username), std::string(parDB.password), std::string(parDB.dbname), std::string(parDB.address), parDB.port); + conn.connect(); + + uint32_t group_id; + { + std::ostringstream oss; + oss << "SELECT path,level,group_id,is_directory,is_symlink,size FROM files WHERE hash=" << + conn.escaped_literal(parHash) << + " LIMIT 1;"; + + auto resultset = conn.query(oss.str()); + if (resultset.empty()) { + return false; + } + + auto row = resultset[0]; + parItem.path = row["path"]; + parItem.hash = std::move(parHash); + parItem.level = lexical_cast(row["level"]); + parItem.size = lexical_cast(row["size"]); + parItem.is_directory = (row["is_directory"] == "t" ? true : false); + parItem.is_symlink = (row["is_symlink"] == "t" ? true : false); + group_id = lexical_cast(row["group_id"]); + } + + { + std::ostringstream oss; + oss << "SELECT \"desc\",\"type\",\"disk_number\" FROM sets WHERE \"id\"=" << group_id << ';'; + + auto resultset = conn.query(oss.str()); + if (resultset.empty()) { + std::ostringstream err_msg; + err_msg << "Missing set: found a record with group_id=" << group_id; + err_msg << " but there is no such id in table \"sets\""; + throw std::length_error(err_msg.str()); + } + auto row = resultset[0]; + parSet.type = lexical_cast(row["type"]); + parSet.name = row["desc"]; + parSet.disk_number = lexical_cast(row["disk_number"]); + } + return true; + } + void write_to_db (const DinDBSettings& parDB, const std::vector& parData, const SetRecordData& parSetData) { if (parData.empty()) { return; diff --git a/src/dbbackend.hpp b/src/dbbackend.hpp index 204a2b8..42d22b8 100644 --- a/src/dbbackend.hpp +++ b/src/dbbackend.hpp @@ -27,12 +27,18 @@ namespace din { struct DinDBSettings; struct FileRecordData { - const std::string path; - const std::string hash; - const uint16_t level; - const uint64_t size; - const bool is_directory; - const bool is_symlink; + std::string path; + std::string hash; + uint16_t level; + uint64_t size; + bool is_directory; + bool is_symlink; + }; + + struct SetRecordDataFull { + std::string name; + uint32_t disk_number; + char type; }; struct SetRecordData { @@ -41,6 +47,7 @@ namespace din { }; void write_to_db ( const DinDBSettings& parDB, const std::vector& parData, const SetRecordData& parSetData ); + bool read_from_db ( FileRecordData& parItem, SetRecordDataFull& parSet, const DinDBSettings& parDB, std::string&& parHash ); } //namespace din #endif diff --git a/src/indexer.cpp b/src/indexer.cpp index eb2ebd7..706ccf7 100644 --- a/src/indexer.cpp +++ b/src/indexer.cpp @@ -277,10 +277,21 @@ namespace din { #endif } - void Indexer::add_to_db (const std::string& parSetName, char parType) const { + bool Indexer::add_to_db (const std::string& parSetName, char parType, bool parForce) const { #if defined(WITH_PROGRESS_FEEDBACK) assert(m_local_data->done_count == m_local_data->file_count); #endif + + if (not parForce) { + std::string first_hash(tiger_to_string(m_local_data->paths.front().hash, true)); + FileRecordData itm; + SetRecordDataFull set; + const bool already_in_db = read_from_db(itm, set, m_local_data->db_settings, std::move(first_hash)); + if (already_in_db) { + return false; + } + } + PathName base_path(m_local_data->paths.front().path); std::vector data; data.reserve(m_local_data->paths.size()); @@ -297,6 +308,7 @@ namespace din { SetRecordData set_data {parSetName, parType}; write_to_db(m_local_data->db_settings, data, set_data); + return true; } bool Indexer::add_path (const char* parPath, int parLevel, bool parIsDir, bool parIsSymLink) { diff --git a/src/indexer.hpp b/src/indexer.hpp index 24729b3..997f0d9 100644 --- a/src/indexer.hpp +++ b/src/indexer.hpp @@ -54,7 +54,7 @@ namespace din { std::condition_variable& step_notify ( void ); #endif void calculate_hash ( void ); - void add_to_db ( const std::string& parSetName, char parType ) const; + bool add_to_db ( const std::string& parSetName, char parType, bool parForce=false ) const; bool empty ( void ) const; private: diff --git a/src/main.cpp b/src/main.cpp index 79f3e44..0f9444f 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -94,7 +94,9 @@ int main (int parArgc, char* parArgv[]) { if (verbose) { std::cout << "Writing to database...\n"; } - indexer.add_to_db(vm["setname"].as(), vm["type"].as()); + if (not indexer.add_to_db(vm["setname"].as(), vm["type"].as())) { + std::cerr << "Not written to DB, likely because a set with the same hash already exists\n"; + } } return 0; }