mirror of
https://github.com/KingDuckZ/dindexer.git
synced 2024-11-25 00:53:43 +00:00
Fix hashing and db insertion.
This commit is contained in:
parent
777b91b898
commit
4236b2ece8
7 changed files with 70 additions and 30 deletions
|
@ -19,8 +19,12 @@
|
|||
#include "pq/connection.hpp"
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
#include <utility>
|
||||
|
||||
namespace din {
|
||||
namespace {
|
||||
} //unnamed namespace
|
||||
|
||||
void write_to_db (const std::vector<FileRecordData>& parData) {
|
||||
if (parData.empty()) {
|
||||
return;
|
||||
|
@ -30,10 +34,14 @@ namespace din {
|
|||
query << "BEGIN;\n";
|
||||
query << "INSERT INTO \"Files\" (path, hash, level, group_id, is_directory, is_symlink, size) VALUES ";
|
||||
|
||||
pq::Connection conn("michele", "password", "dindexer", "100.200.100.200", 5432);
|
||||
conn.connect();
|
||||
|
||||
const char* comma = "";
|
||||
for (const auto& itm : parData) {
|
||||
query << comma;
|
||||
query << "('" << itm.path << "','" << itm.hash << "'," << itm.level << ','
|
||||
query << '(' << conn.escape_literal(itm.path) << ",'" << itm.hash << "',"
|
||||
<< itm.level << ','
|
||||
<< 10 << ',' << (itm.is_directory ? "true" : "false") << ','
|
||||
<< (itm.is_symlink ? "true" : "false") << ',' << itm.size << ')'
|
||||
;
|
||||
|
@ -42,8 +50,6 @@ namespace din {
|
|||
query << ';';
|
||||
query << "\nCOMMIT;";
|
||||
|
||||
pq::Connection conn("michele", "password", "dindexer", "100.200.100.200", 5432);
|
||||
conn.connect();
|
||||
conn.query_void(query.str());
|
||||
}
|
||||
} //namespace din
|
||||
|
|
|
@ -18,16 +18,16 @@
|
|||
#ifndef id842AF56BD80A4CF59957451DF9082AA2
|
||||
#define id842AF56BD80A4CF59957451DF9082AA2
|
||||
|
||||
#include <boost/utility/string_ref.hpp>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
#include <cstdint>
|
||||
|
||||
namespace din {
|
||||
struct FileRecordData {
|
||||
const boost::string_ref path;
|
||||
const std::string path;
|
||||
const std::string hash;
|
||||
const uint16_t level;
|
||||
const std::size_t size;
|
||||
const uint64_t size;
|
||||
const bool is_directory;
|
||||
const bool is_symlink;
|
||||
};
|
||||
|
|
|
@ -53,45 +53,61 @@ namespace din {
|
|||
|
||||
std::string path;
|
||||
HashType hash;
|
||||
uint64_t file_size;
|
||||
uint16_t level;
|
||||
bool is_dir;
|
||||
bool is_symlink;
|
||||
};
|
||||
|
||||
namespace {
|
||||
void hash_dir (std::vector<FileEntry>::iterator parEntry, std::vector<FileEntry>::iterator parEnd, const PathName& parCurrDir) {
|
||||
void hash_dir (std::vector<FileEntry>::iterator parEntry, std::vector<FileEntry>::iterator parEnd, const PathName& parCurrDir, std::atomic<std::size_t>& parDone) {
|
||||
assert(parEntry != parEnd);
|
||||
assert(parEntry->is_dir);
|
||||
FileEntry& curr_entry = *parEntry;
|
||||
auto& curr_entry_it = parEntry;
|
||||
|
||||
//Build a blob with the hashes and filenames of every directory that
|
||||
//is a direct child of current entry
|
||||
{
|
||||
std::vector<char> dir_blob;
|
||||
auto it_entry = parEntry;
|
||||
auto it_entry = curr_entry_it;
|
||||
|
||||
while (
|
||||
it_entry != parEnd
|
||||
and (not it_entry->is_dir or (it_entry->level <= curr_entry.level
|
||||
and parCurrDir != PathName(it_entry->path).pop_right()))
|
||||
) {
|
||||
it_entry != parEnd and (
|
||||
it_entry->level == curr_entry.level
|
||||
or parCurrDir != PathName(it_entry->path).pop_right()
|
||||
//and (not it_entry->is_dir or (it_entry->level <= curr_entry.level
|
||||
//and parCurrDir != PathName(it_entry->path).pop_right()))
|
||||
)) {
|
||||
assert(it_entry->level >= curr_entry.level);
|
||||
++it_entry;
|
||||
}
|
||||
|
||||
#if !defined(NDEBUG)
|
||||
std::cout << "Making initial hash for " << parCurrDir << "...\n";
|
||||
#endif
|
||||
while (parEnd != it_entry and it_entry->is_dir and it_entry->level == parEntry->level + 1) {
|
||||
while (parEnd != it_entry and it_entry->level == curr_entry_it->level + 1 and parCurrDir == PathName(it_entry->path).pop_right()) {
|
||||
PathName curr_subdir(it_entry->path);
|
||||
hash_dir(it_entry, parEnd, curr_subdir);
|
||||
if (it_entry->is_dir) {
|
||||
hash_dir(it_entry, parEnd, curr_subdir, parDone);
|
||||
|
||||
std::string relpath = make_relative_path(parCurrDir, curr_subdir).path();
|
||||
const auto old_size = dir_blob.size();
|
||||
dir_blob.resize(old_size + sizeof(HashType) + relpath.size());
|
||||
std::copy(it_entry->hash.byte_data, it_entry->hash.byte_data + sizeof(HashType), dir_blob.begin() + old_size);
|
||||
std::copy(relpath.begin(), relpath.end(), dir_blob.begin() + old_size + sizeof(HashType));
|
||||
std::string relpath = make_relative_path(parCurrDir, curr_subdir).path();
|
||||
const auto old_size = dir_blob.size();
|
||||
dir_blob.resize(old_size + sizeof(HashType) + relpath.size());
|
||||
std::copy(it_entry->hash.byte_data, it_entry->hash.byte_data + sizeof(HashType), dir_blob.begin() + old_size);
|
||||
std::copy(relpath.begin(), relpath.end(), dir_blob.begin() + old_size + sizeof(HashType));
|
||||
}
|
||||
else {
|
||||
std::string relpath = make_relative_path(parCurrDir, curr_subdir).path();
|
||||
const auto old_size = dir_blob.size();
|
||||
dir_blob.resize(old_size + relpath.size());
|
||||
std::copy(relpath.begin(), relpath.end(), dir_blob.begin() + old_size);
|
||||
}
|
||||
++it_entry;
|
||||
}
|
||||
|
||||
tiger_data(dir_blob, curr_entry.hash);
|
||||
curr_entry.file_size = 0;
|
||||
#if !defined(NDEBUG)
|
||||
std::cout << "Got intermediate hash for dir " << parCurrDir << ": " << tiger_to_string(curr_entry.hash) << '\n';
|
||||
#endif
|
||||
|
@ -99,22 +115,24 @@ namespace din {
|
|||
|
||||
//Now with the initial hash ready, let's start hashing files, if any
|
||||
{
|
||||
auto it_entry = parEntry;
|
||||
auto it_entry = curr_entry_it;
|
||||
while (
|
||||
it_entry != parEnd
|
||||
and (it_entry->is_dir
|
||||
or it_entry->level != parEntry->level + 1
|
||||
or it_entry->level != curr_entry_it->level + 1
|
||||
or PathName(it_entry->path).pop_right() != parCurrDir
|
||||
)
|
||||
) {
|
||||
++it_entry;
|
||||
}
|
||||
|
||||
while (it_entry != parEnd and not it_entry->is_dir and it_entry->level == parEntry->level + 1 and PathName(it_entry->path).pop_right() == parCurrDir) {
|
||||
while (it_entry != parEnd and not it_entry->is_dir and it_entry->level == curr_entry_it->level + 1 and PathName(it_entry->path).pop_right() == parCurrDir) {
|
||||
assert(not it_entry->is_dir);
|
||||
#if !defined(NDEBUG)
|
||||
std::cout << "Hashing file " << it_entry->path << "...";
|
||||
#endif
|
||||
tiger_file(it_entry->path, it_entry->hash, parEntry->hash);
|
||||
tiger_file(it_entry->path, it_entry->hash, curr_entry_it->hash, it_entry->file_size);
|
||||
++parDone;
|
||||
#if !defined(NDEBUG)
|
||||
std::cout << ' ' << tiger_to_string(it_entry->hash) << '\n';
|
||||
#endif
|
||||
|
@ -123,8 +141,9 @@ namespace din {
|
|||
}
|
||||
|
||||
#if !defined(NDEBUG)
|
||||
std::cout << "Final hash for dir " << parCurrDir << " is " << tiger_to_string(parEntry->hash) << '\n';
|
||||
std::cout << "Final hash for dir " << parCurrDir << " is " << tiger_to_string(curr_entry_it->hash) << '\n';
|
||||
#endif
|
||||
++parDone;
|
||||
}
|
||||
} //unnamed namespace
|
||||
|
||||
|
@ -201,7 +220,10 @@ namespace din {
|
|||
std::cout << "-----------------------------------------------------\n";
|
||||
#endif
|
||||
|
||||
hash_dir(m_local_data->paths.begin(), m_local_data->paths.end(), base_path);
|
||||
m_local_data->done_count = 0;
|
||||
hash_dir(m_local_data->paths.begin(), m_local_data->paths.end(), base_path, m_local_data->done_count);
|
||||
|
||||
assert(m_local_data->done_count == m_local_data->paths.size());
|
||||
|
||||
#if !defined(NDEBUG)
|
||||
for (const auto& itm : m_local_data->paths) {
|
||||
|
@ -214,10 +236,10 @@ namespace din {
|
|||
data.reserve(m_local_data->paths.size());
|
||||
for (const auto& itm : m_local_data->paths) {
|
||||
data.push_back(FileRecordData {
|
||||
boost::string_ref(itm.path),
|
||||
make_relative_path(base_path, PathName(itm.path)).path(),
|
||||
tiger_to_string(itm.hash),
|
||||
itm.level,
|
||||
0,
|
||||
itm.file_size,
|
||||
itm.is_dir,
|
||||
itm.is_symlink
|
||||
});
|
||||
|
|
|
@ -123,4 +123,11 @@ namespace pq {
|
|||
throw DatabaseException("Error running query", error_message(), __FILE__, __LINE__);
|
||||
}
|
||||
}
|
||||
|
||||
std::string Connection::escape_literal (const std::string& parString) {
|
||||
typedef std::unique_ptr<char[], void(*)(void*)> PQArrayType;
|
||||
|
||||
PQArrayType clean_str(PQescapeLiteral(m_localData->connection, parString.c_str(), parString.size()), &PQfreemem);
|
||||
return std::string(clean_str.get());
|
||||
}
|
||||
} //namespace pq
|
||||
|
|
|
@ -36,6 +36,8 @@ namespace pq {
|
|||
void query_void ( const std::string& parQuery );
|
||||
ResultSet query ( const std::string& parQuery );
|
||||
|
||||
std::string escape_literal ( const std::string& parString );
|
||||
|
||||
private:
|
||||
struct LocalData;
|
||||
|
||||
|
@ -46,6 +48,7 @@ namespace pq {
|
|||
const uint16_t m_port;
|
||||
std::unique_ptr<LocalData> m_localData;
|
||||
};
|
||||
|
||||
} //namespace pq
|
||||
|
||||
#endif
|
||||
|
|
|
@ -53,7 +53,7 @@ namespace din {
|
|||
parHash.part_c = 0xF096A5B4C3B2E187ULL;
|
||||
}
|
||||
|
||||
void tiger_file (const std::string& parPath, TigerHash& parHashFile, TigerHash& parHashDir) {
|
||||
void tiger_file (const std::string& parPath, TigerHash& parHashFile, TigerHash& parHashDir, uint64_t& parSizeOut) {
|
||||
typedef decltype(std::declval<std::ifstream>().tellg()) FileSizeType;
|
||||
tiger_init_hash(parHashFile);
|
||||
|
||||
|
@ -99,6 +99,8 @@ namespace din {
|
|||
//dir's hash value (64 bytes) as if they were part of the data.
|
||||
tiger_sse2_last_chunk(buff_ptr + aligned_size, buff_ptr + aligned_size, remaining - aligned_size, file_size, file_size + hash_size, parHashFile.data, parHashDir.data, g_tiger_padding);
|
||||
}
|
||||
|
||||
parSizeOut = static_cast<uint64_t>(file_size);
|
||||
}
|
||||
|
||||
std::string tiger_to_string (const TigerHash& parHash) {
|
||||
|
|
|
@ -39,7 +39,7 @@ namespace din {
|
|||
|
||||
static_assert(sizeof(TigerHash) == 24, "Wrong struct size");
|
||||
|
||||
void tiger_file ( const std::string& parPath, TigerHash& parHashFile, TigerHash& parHashDir );
|
||||
void tiger_file ( const std::string& parPath, TigerHash& parHashFile, TigerHash& parHashDir, uint64_t& parSizeOut );
|
||||
void tiger_init_hash ( TigerHash& parHash );
|
||||
std::string tiger_to_string ( const TigerHash& parHash );
|
||||
void tiger_data ( const std::string& parData, TigerHash& parHash );
|
||||
|
|
Loading…
Reference in a new issue