diff --git a/src/dbbackend.cpp b/src/dbbackend.cpp index 6a77cb1..8fc7fa5 100644 --- a/src/dbbackend.cpp +++ b/src/dbbackend.cpp @@ -19,8 +19,12 @@ #include "pq/connection.hpp" #include #include +#include namespace din { + namespace { + } //unnamed namespace + void write_to_db (const std::vector& parData) { if (parData.empty()) { return; @@ -30,10 +34,14 @@ namespace din { query << "BEGIN;\n"; query << "INSERT INTO \"Files\" (path, hash, level, group_id, is_directory, is_symlink, size) VALUES "; + pq::Connection conn("michele", "password", "dindexer", "100.200.100.200", 5432); + conn.connect(); + const char* comma = ""; for (const auto& itm : parData) { query << comma; - query << "('" << itm.path << "','" << itm.hash << "'," << itm.level << ',' + query << '(' << conn.escape_literal(itm.path) << ",'" << itm.hash << "'," + << itm.level << ',' << 10 << ',' << (itm.is_directory ? "true" : "false") << ',' << (itm.is_symlink ? "true" : "false") << ',' << itm.size << ')' ; @@ -42,8 +50,6 @@ namespace din { query << ';'; query << "\nCOMMIT;"; - pq::Connection conn("michele", "password", "dindexer", "100.200.100.200", 5432); - conn.connect(); conn.query_void(query.str()); } } //namespace din diff --git a/src/dbbackend.hpp b/src/dbbackend.hpp index 548e55c..3fe5e58 100644 --- a/src/dbbackend.hpp +++ b/src/dbbackend.hpp @@ -18,16 +18,16 @@ #ifndef id842AF56BD80A4CF59957451DF9082AA2 #define id842AF56BD80A4CF59957451DF9082AA2 -#include +#include #include -#include +#include namespace din { struct FileRecordData { - const boost::string_ref path; + const std::string path; const std::string hash; const uint16_t level; - const std::size_t size; + const uint64_t size; const bool is_directory; const bool is_symlink; }; diff --git a/src/indexer.cpp b/src/indexer.cpp index 149ec35..fa93894 100644 --- a/src/indexer.cpp +++ b/src/indexer.cpp @@ -53,45 +53,61 @@ namespace din { std::string path; HashType hash; + uint64_t file_size; uint16_t level; bool is_dir; bool is_symlink; }; namespace { - void hash_dir (std::vector::iterator parEntry, std::vector::iterator parEnd, const PathName& parCurrDir) { + void hash_dir (std::vector::iterator parEntry, std::vector::iterator parEnd, const PathName& parCurrDir, std::atomic& parDone) { + assert(parEntry != parEnd); + assert(parEntry->is_dir); FileEntry& curr_entry = *parEntry; + auto& curr_entry_it = parEntry; //Build a blob with the hashes and filenames of every directory that //is a direct child of current entry { std::vector dir_blob; - auto it_entry = parEntry; + auto it_entry = curr_entry_it; while ( - it_entry != parEnd - and (not it_entry->is_dir or (it_entry->level <= curr_entry.level - and parCurrDir != PathName(it_entry->path).pop_right())) - ) { + it_entry != parEnd and ( + it_entry->level == curr_entry.level + or parCurrDir != PathName(it_entry->path).pop_right() + //and (not it_entry->is_dir or (it_entry->level <= curr_entry.level + //and parCurrDir != PathName(it_entry->path).pop_right())) + )) { + assert(it_entry->level >= curr_entry.level); ++it_entry; } #if !defined(NDEBUG) std::cout << "Making initial hash for " << parCurrDir << "...\n"; #endif - while (parEnd != it_entry and it_entry->is_dir and it_entry->level == parEntry->level + 1) { + while (parEnd != it_entry and it_entry->level == curr_entry_it->level + 1 and parCurrDir == PathName(it_entry->path).pop_right()) { PathName curr_subdir(it_entry->path); - hash_dir(it_entry, parEnd, curr_subdir); + if (it_entry->is_dir) { + hash_dir(it_entry, parEnd, curr_subdir, parDone); - std::string relpath = make_relative_path(parCurrDir, curr_subdir).path(); - const auto old_size = dir_blob.size(); - dir_blob.resize(old_size + sizeof(HashType) + relpath.size()); - std::copy(it_entry->hash.byte_data, it_entry->hash.byte_data + sizeof(HashType), dir_blob.begin() + old_size); - std::copy(relpath.begin(), relpath.end(), dir_blob.begin() + old_size + sizeof(HashType)); + std::string relpath = make_relative_path(parCurrDir, curr_subdir).path(); + const auto old_size = dir_blob.size(); + dir_blob.resize(old_size + sizeof(HashType) + relpath.size()); + std::copy(it_entry->hash.byte_data, it_entry->hash.byte_data + sizeof(HashType), dir_blob.begin() + old_size); + std::copy(relpath.begin(), relpath.end(), dir_blob.begin() + old_size + sizeof(HashType)); + } + else { + std::string relpath = make_relative_path(parCurrDir, curr_subdir).path(); + const auto old_size = dir_blob.size(); + dir_blob.resize(old_size + relpath.size()); + std::copy(relpath.begin(), relpath.end(), dir_blob.begin() + old_size); + } ++it_entry; } tiger_data(dir_blob, curr_entry.hash); + curr_entry.file_size = 0; #if !defined(NDEBUG) std::cout << "Got intermediate hash for dir " << parCurrDir << ": " << tiger_to_string(curr_entry.hash) << '\n'; #endif @@ -99,22 +115,24 @@ namespace din { //Now with the initial hash ready, let's start hashing files, if any { - auto it_entry = parEntry; + auto it_entry = curr_entry_it; while ( it_entry != parEnd and (it_entry->is_dir - or it_entry->level != parEntry->level + 1 + or it_entry->level != curr_entry_it->level + 1 or PathName(it_entry->path).pop_right() != parCurrDir ) ) { ++it_entry; } - while (it_entry != parEnd and not it_entry->is_dir and it_entry->level == parEntry->level + 1 and PathName(it_entry->path).pop_right() == parCurrDir) { + while (it_entry != parEnd and not it_entry->is_dir and it_entry->level == curr_entry_it->level + 1 and PathName(it_entry->path).pop_right() == parCurrDir) { + assert(not it_entry->is_dir); #if !defined(NDEBUG) std::cout << "Hashing file " << it_entry->path << "..."; #endif - tiger_file(it_entry->path, it_entry->hash, parEntry->hash); + tiger_file(it_entry->path, it_entry->hash, curr_entry_it->hash, it_entry->file_size); + ++parDone; #if !defined(NDEBUG) std::cout << ' ' << tiger_to_string(it_entry->hash) << '\n'; #endif @@ -123,8 +141,9 @@ namespace din { } #if !defined(NDEBUG) - std::cout << "Final hash for dir " << parCurrDir << " is " << tiger_to_string(parEntry->hash) << '\n'; + std::cout << "Final hash for dir " << parCurrDir << " is " << tiger_to_string(curr_entry_it->hash) << '\n'; #endif + ++parDone; } } //unnamed namespace @@ -201,7 +220,10 @@ namespace din { std::cout << "-----------------------------------------------------\n"; #endif - hash_dir(m_local_data->paths.begin(), m_local_data->paths.end(), base_path); + m_local_data->done_count = 0; + hash_dir(m_local_data->paths.begin(), m_local_data->paths.end(), base_path, m_local_data->done_count); + + assert(m_local_data->done_count == m_local_data->paths.size()); #if !defined(NDEBUG) for (const auto& itm : m_local_data->paths) { @@ -214,10 +236,10 @@ namespace din { data.reserve(m_local_data->paths.size()); for (const auto& itm : m_local_data->paths) { data.push_back(FileRecordData { - boost::string_ref(itm.path), + make_relative_path(base_path, PathName(itm.path)).path(), tiger_to_string(itm.hash), itm.level, - 0, + itm.file_size, itm.is_dir, itm.is_symlink }); diff --git a/src/pq/connection.cpp b/src/pq/connection.cpp index f1b9b57..8753f81 100644 --- a/src/pq/connection.cpp +++ b/src/pq/connection.cpp @@ -123,4 +123,11 @@ namespace pq { throw DatabaseException("Error running query", error_message(), __FILE__, __LINE__); } } + + std::string Connection::escape_literal (const std::string& parString) { + typedef std::unique_ptr PQArrayType; + + PQArrayType clean_str(PQescapeLiteral(m_localData->connection, parString.c_str(), parString.size()), &PQfreemem); + return std::string(clean_str.get()); + } } //namespace pq diff --git a/src/pq/connection.hpp b/src/pq/connection.hpp index 7f630e1..60bdfd1 100644 --- a/src/pq/connection.hpp +++ b/src/pq/connection.hpp @@ -36,6 +36,8 @@ namespace pq { void query_void ( const std::string& parQuery ); ResultSet query ( const std::string& parQuery ); + std::string escape_literal ( const std::string& parString ); + private: struct LocalData; @@ -46,6 +48,7 @@ namespace pq { const uint16_t m_port; std::unique_ptr m_localData; }; + } //namespace pq #endif diff --git a/src/tiger.cpp b/src/tiger.cpp index 963eb26..a4ec3fc 100644 --- a/src/tiger.cpp +++ b/src/tiger.cpp @@ -53,7 +53,7 @@ namespace din { parHash.part_c = 0xF096A5B4C3B2E187ULL; } - void tiger_file (const std::string& parPath, TigerHash& parHashFile, TigerHash& parHashDir) { + void tiger_file (const std::string& parPath, TigerHash& parHashFile, TigerHash& parHashDir, uint64_t& parSizeOut) { typedef decltype(std::declval().tellg()) FileSizeType; tiger_init_hash(parHashFile); @@ -99,6 +99,8 @@ namespace din { //dir's hash value (64 bytes) as if they were part of the data. tiger_sse2_last_chunk(buff_ptr + aligned_size, buff_ptr + aligned_size, remaining - aligned_size, file_size, file_size + hash_size, parHashFile.data, parHashDir.data, g_tiger_padding); } + + parSizeOut = static_cast(file_size); } std::string tiger_to_string (const TigerHash& parHash) { diff --git a/src/tiger.hpp b/src/tiger.hpp index 6b3d3dc..186cb3f 100644 --- a/src/tiger.hpp +++ b/src/tiger.hpp @@ -39,7 +39,7 @@ namespace din { static_assert(sizeof(TigerHash) == 24, "Wrong struct size"); - void tiger_file ( const std::string& parPath, TigerHash& parHashFile, TigerHash& parHashDir ); + void tiger_file ( const std::string& parPath, TigerHash& parHashFile, TigerHash& parHashDir, uint64_t& parSizeOut ); void tiger_init_hash ( TigerHash& parHash ); std::string tiger_to_string ( const TigerHash& parHash ); void tiger_data ( const std::string& parData, TigerHash& parHash );