1
0
Fork 0
mirror of https://github.com/KingDuckZ/dindexer.git synced 2024-11-25 00:53:43 +00:00

Fix hashing and db insertion.

This commit is contained in:
King_DuckZ 2015-11-11 12:28:04 +00:00
parent 777b91b898
commit 4236b2ece8
7 changed files with 70 additions and 30 deletions

View file

@ -19,8 +19,12 @@
#include "pq/connection.hpp" #include "pq/connection.hpp"
#include <string> #include <string>
#include <sstream> #include <sstream>
#include <utility>
namespace din { namespace din {
namespace {
} //unnamed namespace
void write_to_db (const std::vector<FileRecordData>& parData) { void write_to_db (const std::vector<FileRecordData>& parData) {
if (parData.empty()) { if (parData.empty()) {
return; return;
@ -30,10 +34,14 @@ namespace din {
query << "BEGIN;\n"; query << "BEGIN;\n";
query << "INSERT INTO \"Files\" (path, hash, level, group_id, is_directory, is_symlink, size) VALUES "; query << "INSERT INTO \"Files\" (path, hash, level, group_id, is_directory, is_symlink, size) VALUES ";
pq::Connection conn("michele", "password", "dindexer", "100.200.100.200", 5432);
conn.connect();
const char* comma = ""; const char* comma = "";
for (const auto& itm : parData) { for (const auto& itm : parData) {
query << comma; query << comma;
query << "('" << itm.path << "','" << itm.hash << "'," << itm.level << ',' query << '(' << conn.escape_literal(itm.path) << ",'" << itm.hash << "',"
<< itm.level << ','
<< 10 << ',' << (itm.is_directory ? "true" : "false") << ',' << 10 << ',' << (itm.is_directory ? "true" : "false") << ','
<< (itm.is_symlink ? "true" : "false") << ',' << itm.size << ')' << (itm.is_symlink ? "true" : "false") << ',' << itm.size << ')'
; ;
@ -42,8 +50,6 @@ namespace din {
query << ';'; query << ';';
query << "\nCOMMIT;"; query << "\nCOMMIT;";
pq::Connection conn("michele", "password", "dindexer", "100.200.100.200", 5432);
conn.connect();
conn.query_void(query.str()); conn.query_void(query.str());
} }
} //namespace din } //namespace din

View file

@ -18,16 +18,16 @@
#ifndef id842AF56BD80A4CF59957451DF9082AA2 #ifndef id842AF56BD80A4CF59957451DF9082AA2
#define id842AF56BD80A4CF59957451DF9082AA2 #define id842AF56BD80A4CF59957451DF9082AA2
#include <boost/utility/string_ref.hpp> #include <string>
#include <vector> #include <vector>
#include <utility> #include <cstdint>
namespace din { namespace din {
struct FileRecordData { struct FileRecordData {
const boost::string_ref path; const std::string path;
const std::string hash; const std::string hash;
const uint16_t level; const uint16_t level;
const std::size_t size; const uint64_t size;
const bool is_directory; const bool is_directory;
const bool is_symlink; const bool is_symlink;
}; };

View file

@ -53,45 +53,61 @@ namespace din {
std::string path; std::string path;
HashType hash; HashType hash;
uint64_t file_size;
uint16_t level; uint16_t level;
bool is_dir; bool is_dir;
bool is_symlink; bool is_symlink;
}; };
namespace { namespace {
void hash_dir (std::vector<FileEntry>::iterator parEntry, std::vector<FileEntry>::iterator parEnd, const PathName& parCurrDir) { void hash_dir (std::vector<FileEntry>::iterator parEntry, std::vector<FileEntry>::iterator parEnd, const PathName& parCurrDir, std::atomic<std::size_t>& parDone) {
assert(parEntry != parEnd);
assert(parEntry->is_dir);
FileEntry& curr_entry = *parEntry; FileEntry& curr_entry = *parEntry;
auto& curr_entry_it = parEntry;
//Build a blob with the hashes and filenames of every directory that //Build a blob with the hashes and filenames of every directory that
//is a direct child of current entry //is a direct child of current entry
{ {
std::vector<char> dir_blob; std::vector<char> dir_blob;
auto it_entry = parEntry; auto it_entry = curr_entry_it;
while ( while (
it_entry != parEnd it_entry != parEnd and (
and (not it_entry->is_dir or (it_entry->level <= curr_entry.level it_entry->level == curr_entry.level
and parCurrDir != PathName(it_entry->path).pop_right())) or parCurrDir != PathName(it_entry->path).pop_right()
) { //and (not it_entry->is_dir or (it_entry->level <= curr_entry.level
//and parCurrDir != PathName(it_entry->path).pop_right()))
)) {
assert(it_entry->level >= curr_entry.level);
++it_entry; ++it_entry;
} }
#if !defined(NDEBUG) #if !defined(NDEBUG)
std::cout << "Making initial hash for " << parCurrDir << "...\n"; std::cout << "Making initial hash for " << parCurrDir << "...\n";
#endif #endif
while (parEnd != it_entry and it_entry->is_dir and it_entry->level == parEntry->level + 1) { while (parEnd != it_entry and it_entry->level == curr_entry_it->level + 1 and parCurrDir == PathName(it_entry->path).pop_right()) {
PathName curr_subdir(it_entry->path); PathName curr_subdir(it_entry->path);
hash_dir(it_entry, parEnd, curr_subdir); if (it_entry->is_dir) {
hash_dir(it_entry, parEnd, curr_subdir, parDone);
std::string relpath = make_relative_path(parCurrDir, curr_subdir).path(); std::string relpath = make_relative_path(parCurrDir, curr_subdir).path();
const auto old_size = dir_blob.size(); const auto old_size = dir_blob.size();
dir_blob.resize(old_size + sizeof(HashType) + relpath.size()); dir_blob.resize(old_size + sizeof(HashType) + relpath.size());
std::copy(it_entry->hash.byte_data, it_entry->hash.byte_data + sizeof(HashType), dir_blob.begin() + old_size); std::copy(it_entry->hash.byte_data, it_entry->hash.byte_data + sizeof(HashType), dir_blob.begin() + old_size);
std::copy(relpath.begin(), relpath.end(), dir_blob.begin() + old_size + sizeof(HashType)); std::copy(relpath.begin(), relpath.end(), dir_blob.begin() + old_size + sizeof(HashType));
}
else {
std::string relpath = make_relative_path(parCurrDir, curr_subdir).path();
const auto old_size = dir_blob.size();
dir_blob.resize(old_size + relpath.size());
std::copy(relpath.begin(), relpath.end(), dir_blob.begin() + old_size);
}
++it_entry; ++it_entry;
} }
tiger_data(dir_blob, curr_entry.hash); tiger_data(dir_blob, curr_entry.hash);
curr_entry.file_size = 0;
#if !defined(NDEBUG) #if !defined(NDEBUG)
std::cout << "Got intermediate hash for dir " << parCurrDir << ": " << tiger_to_string(curr_entry.hash) << '\n'; std::cout << "Got intermediate hash for dir " << parCurrDir << ": " << tiger_to_string(curr_entry.hash) << '\n';
#endif #endif
@ -99,22 +115,24 @@ namespace din {
//Now with the initial hash ready, let's start hashing files, if any //Now with the initial hash ready, let's start hashing files, if any
{ {
auto it_entry = parEntry; auto it_entry = curr_entry_it;
while ( while (
it_entry != parEnd it_entry != parEnd
and (it_entry->is_dir and (it_entry->is_dir
or it_entry->level != parEntry->level + 1 or it_entry->level != curr_entry_it->level + 1
or PathName(it_entry->path).pop_right() != parCurrDir or PathName(it_entry->path).pop_right() != parCurrDir
) )
) { ) {
++it_entry; ++it_entry;
} }
while (it_entry != parEnd and not it_entry->is_dir and it_entry->level == parEntry->level + 1 and PathName(it_entry->path).pop_right() == parCurrDir) { while (it_entry != parEnd and not it_entry->is_dir and it_entry->level == curr_entry_it->level + 1 and PathName(it_entry->path).pop_right() == parCurrDir) {
assert(not it_entry->is_dir);
#if !defined(NDEBUG) #if !defined(NDEBUG)
std::cout << "Hashing file " << it_entry->path << "..."; std::cout << "Hashing file " << it_entry->path << "...";
#endif #endif
tiger_file(it_entry->path, it_entry->hash, parEntry->hash); tiger_file(it_entry->path, it_entry->hash, curr_entry_it->hash, it_entry->file_size);
++parDone;
#if !defined(NDEBUG) #if !defined(NDEBUG)
std::cout << ' ' << tiger_to_string(it_entry->hash) << '\n'; std::cout << ' ' << tiger_to_string(it_entry->hash) << '\n';
#endif #endif
@ -123,8 +141,9 @@ namespace din {
} }
#if !defined(NDEBUG) #if !defined(NDEBUG)
std::cout << "Final hash for dir " << parCurrDir << " is " << tiger_to_string(parEntry->hash) << '\n'; std::cout << "Final hash for dir " << parCurrDir << " is " << tiger_to_string(curr_entry_it->hash) << '\n';
#endif #endif
++parDone;
} }
} //unnamed namespace } //unnamed namespace
@ -201,7 +220,10 @@ namespace din {
std::cout << "-----------------------------------------------------\n"; std::cout << "-----------------------------------------------------\n";
#endif #endif
hash_dir(m_local_data->paths.begin(), m_local_data->paths.end(), base_path); m_local_data->done_count = 0;
hash_dir(m_local_data->paths.begin(), m_local_data->paths.end(), base_path, m_local_data->done_count);
assert(m_local_data->done_count == m_local_data->paths.size());
#if !defined(NDEBUG) #if !defined(NDEBUG)
for (const auto& itm : m_local_data->paths) { for (const auto& itm : m_local_data->paths) {
@ -214,10 +236,10 @@ namespace din {
data.reserve(m_local_data->paths.size()); data.reserve(m_local_data->paths.size());
for (const auto& itm : m_local_data->paths) { for (const auto& itm : m_local_data->paths) {
data.push_back(FileRecordData { data.push_back(FileRecordData {
boost::string_ref(itm.path), make_relative_path(base_path, PathName(itm.path)).path(),
tiger_to_string(itm.hash), tiger_to_string(itm.hash),
itm.level, itm.level,
0, itm.file_size,
itm.is_dir, itm.is_dir,
itm.is_symlink itm.is_symlink
}); });

View file

@ -123,4 +123,11 @@ namespace pq {
throw DatabaseException("Error running query", error_message(), __FILE__, __LINE__); throw DatabaseException("Error running query", error_message(), __FILE__, __LINE__);
} }
} }
std::string Connection::escape_literal (const std::string& parString) {
typedef std::unique_ptr<char[], void(*)(void*)> PQArrayType;
PQArrayType clean_str(PQescapeLiteral(m_localData->connection, parString.c_str(), parString.size()), &PQfreemem);
return std::string(clean_str.get());
}
} //namespace pq } //namespace pq

View file

@ -36,6 +36,8 @@ namespace pq {
void query_void ( const std::string& parQuery ); void query_void ( const std::string& parQuery );
ResultSet query ( const std::string& parQuery ); ResultSet query ( const std::string& parQuery );
std::string escape_literal ( const std::string& parString );
private: private:
struct LocalData; struct LocalData;
@ -46,6 +48,7 @@ namespace pq {
const uint16_t m_port; const uint16_t m_port;
std::unique_ptr<LocalData> m_localData; std::unique_ptr<LocalData> m_localData;
}; };
} //namespace pq } //namespace pq
#endif #endif

View file

@ -53,7 +53,7 @@ namespace din {
parHash.part_c = 0xF096A5B4C3B2E187ULL; parHash.part_c = 0xF096A5B4C3B2E187ULL;
} }
void tiger_file (const std::string& parPath, TigerHash& parHashFile, TigerHash& parHashDir) { void tiger_file (const std::string& parPath, TigerHash& parHashFile, TigerHash& parHashDir, uint64_t& parSizeOut) {
typedef decltype(std::declval<std::ifstream>().tellg()) FileSizeType; typedef decltype(std::declval<std::ifstream>().tellg()) FileSizeType;
tiger_init_hash(parHashFile); tiger_init_hash(parHashFile);
@ -99,6 +99,8 @@ namespace din {
//dir's hash value (64 bytes) as if they were part of the data. //dir's hash value (64 bytes) as if they were part of the data.
tiger_sse2_last_chunk(buff_ptr + aligned_size, buff_ptr + aligned_size, remaining - aligned_size, file_size, file_size + hash_size, parHashFile.data, parHashDir.data, g_tiger_padding); tiger_sse2_last_chunk(buff_ptr + aligned_size, buff_ptr + aligned_size, remaining - aligned_size, file_size, file_size + hash_size, parHashFile.data, parHashDir.data, g_tiger_padding);
} }
parSizeOut = static_cast<uint64_t>(file_size);
} }
std::string tiger_to_string (const TigerHash& parHash) { std::string tiger_to_string (const TigerHash& parHash) {

View file

@ -39,7 +39,7 @@ namespace din {
static_assert(sizeof(TigerHash) == 24, "Wrong struct size"); static_assert(sizeof(TigerHash) == 24, "Wrong struct size");
void tiger_file ( const std::string& parPath, TigerHash& parHashFile, TigerHash& parHashDir ); void tiger_file ( const std::string& parPath, TigerHash& parHashFile, TigerHash& parHashDir, uint64_t& parSizeOut );
void tiger_init_hash ( TigerHash& parHash ); void tiger_init_hash ( TigerHash& parHash );
std::string tiger_to_string ( const TigerHash& parHash ); std::string tiger_to_string ( const TigerHash& parHash );
void tiger_data ( const std::string& parData, TigerHash& parHash ); void tiger_data ( const std::string& parData, TigerHash& parHash );