mirror of
https://github.com/KingDuckZ/dindexer.git
synced 2024-11-25 00:53:43 +00:00
Fix hashing and db insertion.
This commit is contained in:
parent
777b91b898
commit
4236b2ece8
7 changed files with 70 additions and 30 deletions
|
@ -19,8 +19,12 @@
|
||||||
#include "pq/connection.hpp"
|
#include "pq/connection.hpp"
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
|
#include <utility>
|
||||||
|
|
||||||
namespace din {
|
namespace din {
|
||||||
|
namespace {
|
||||||
|
} //unnamed namespace
|
||||||
|
|
||||||
void write_to_db (const std::vector<FileRecordData>& parData) {
|
void write_to_db (const std::vector<FileRecordData>& parData) {
|
||||||
if (parData.empty()) {
|
if (parData.empty()) {
|
||||||
return;
|
return;
|
||||||
|
@ -30,10 +34,14 @@ namespace din {
|
||||||
query << "BEGIN;\n";
|
query << "BEGIN;\n";
|
||||||
query << "INSERT INTO \"Files\" (path, hash, level, group_id, is_directory, is_symlink, size) VALUES ";
|
query << "INSERT INTO \"Files\" (path, hash, level, group_id, is_directory, is_symlink, size) VALUES ";
|
||||||
|
|
||||||
|
pq::Connection conn("michele", "password", "dindexer", "100.200.100.200", 5432);
|
||||||
|
conn.connect();
|
||||||
|
|
||||||
const char* comma = "";
|
const char* comma = "";
|
||||||
for (const auto& itm : parData) {
|
for (const auto& itm : parData) {
|
||||||
query << comma;
|
query << comma;
|
||||||
query << "('" << itm.path << "','" << itm.hash << "'," << itm.level << ','
|
query << '(' << conn.escape_literal(itm.path) << ",'" << itm.hash << "',"
|
||||||
|
<< itm.level << ','
|
||||||
<< 10 << ',' << (itm.is_directory ? "true" : "false") << ','
|
<< 10 << ',' << (itm.is_directory ? "true" : "false") << ','
|
||||||
<< (itm.is_symlink ? "true" : "false") << ',' << itm.size << ')'
|
<< (itm.is_symlink ? "true" : "false") << ',' << itm.size << ')'
|
||||||
;
|
;
|
||||||
|
@ -42,8 +50,6 @@ namespace din {
|
||||||
query << ';';
|
query << ';';
|
||||||
query << "\nCOMMIT;";
|
query << "\nCOMMIT;";
|
||||||
|
|
||||||
pq::Connection conn("michele", "password", "dindexer", "100.200.100.200", 5432);
|
|
||||||
conn.connect();
|
|
||||||
conn.query_void(query.str());
|
conn.query_void(query.str());
|
||||||
}
|
}
|
||||||
} //namespace din
|
} //namespace din
|
||||||
|
|
|
@ -18,16 +18,16 @@
|
||||||
#ifndef id842AF56BD80A4CF59957451DF9082AA2
|
#ifndef id842AF56BD80A4CF59957451DF9082AA2
|
||||||
#define id842AF56BD80A4CF59957451DF9082AA2
|
#define id842AF56BD80A4CF59957451DF9082AA2
|
||||||
|
|
||||||
#include <boost/utility/string_ref.hpp>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <utility>
|
#include <cstdint>
|
||||||
|
|
||||||
namespace din {
|
namespace din {
|
||||||
struct FileRecordData {
|
struct FileRecordData {
|
||||||
const boost::string_ref path;
|
const std::string path;
|
||||||
const std::string hash;
|
const std::string hash;
|
||||||
const uint16_t level;
|
const uint16_t level;
|
||||||
const std::size_t size;
|
const uint64_t size;
|
||||||
const bool is_directory;
|
const bool is_directory;
|
||||||
const bool is_symlink;
|
const bool is_symlink;
|
||||||
};
|
};
|
||||||
|
|
|
@ -53,45 +53,61 @@ namespace din {
|
||||||
|
|
||||||
std::string path;
|
std::string path;
|
||||||
HashType hash;
|
HashType hash;
|
||||||
|
uint64_t file_size;
|
||||||
uint16_t level;
|
uint16_t level;
|
||||||
bool is_dir;
|
bool is_dir;
|
||||||
bool is_symlink;
|
bool is_symlink;
|
||||||
};
|
};
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
void hash_dir (std::vector<FileEntry>::iterator parEntry, std::vector<FileEntry>::iterator parEnd, const PathName& parCurrDir) {
|
void hash_dir (std::vector<FileEntry>::iterator parEntry, std::vector<FileEntry>::iterator parEnd, const PathName& parCurrDir, std::atomic<std::size_t>& parDone) {
|
||||||
|
assert(parEntry != parEnd);
|
||||||
|
assert(parEntry->is_dir);
|
||||||
FileEntry& curr_entry = *parEntry;
|
FileEntry& curr_entry = *parEntry;
|
||||||
|
auto& curr_entry_it = parEntry;
|
||||||
|
|
||||||
//Build a blob with the hashes and filenames of every directory that
|
//Build a blob with the hashes and filenames of every directory that
|
||||||
//is a direct child of current entry
|
//is a direct child of current entry
|
||||||
{
|
{
|
||||||
std::vector<char> dir_blob;
|
std::vector<char> dir_blob;
|
||||||
auto it_entry = parEntry;
|
auto it_entry = curr_entry_it;
|
||||||
|
|
||||||
while (
|
while (
|
||||||
it_entry != parEnd
|
it_entry != parEnd and (
|
||||||
and (not it_entry->is_dir or (it_entry->level <= curr_entry.level
|
it_entry->level == curr_entry.level
|
||||||
and parCurrDir != PathName(it_entry->path).pop_right()))
|
or parCurrDir != PathName(it_entry->path).pop_right()
|
||||||
) {
|
//and (not it_entry->is_dir or (it_entry->level <= curr_entry.level
|
||||||
|
//and parCurrDir != PathName(it_entry->path).pop_right()))
|
||||||
|
)) {
|
||||||
|
assert(it_entry->level >= curr_entry.level);
|
||||||
++it_entry;
|
++it_entry;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if !defined(NDEBUG)
|
#if !defined(NDEBUG)
|
||||||
std::cout << "Making initial hash for " << parCurrDir << "...\n";
|
std::cout << "Making initial hash for " << parCurrDir << "...\n";
|
||||||
#endif
|
#endif
|
||||||
while (parEnd != it_entry and it_entry->is_dir and it_entry->level == parEntry->level + 1) {
|
while (parEnd != it_entry and it_entry->level == curr_entry_it->level + 1 and parCurrDir == PathName(it_entry->path).pop_right()) {
|
||||||
PathName curr_subdir(it_entry->path);
|
PathName curr_subdir(it_entry->path);
|
||||||
hash_dir(it_entry, parEnd, curr_subdir);
|
if (it_entry->is_dir) {
|
||||||
|
hash_dir(it_entry, parEnd, curr_subdir, parDone);
|
||||||
|
|
||||||
std::string relpath = make_relative_path(parCurrDir, curr_subdir).path();
|
std::string relpath = make_relative_path(parCurrDir, curr_subdir).path();
|
||||||
const auto old_size = dir_blob.size();
|
const auto old_size = dir_blob.size();
|
||||||
dir_blob.resize(old_size + sizeof(HashType) + relpath.size());
|
dir_blob.resize(old_size + sizeof(HashType) + relpath.size());
|
||||||
std::copy(it_entry->hash.byte_data, it_entry->hash.byte_data + sizeof(HashType), dir_blob.begin() + old_size);
|
std::copy(it_entry->hash.byte_data, it_entry->hash.byte_data + sizeof(HashType), dir_blob.begin() + old_size);
|
||||||
std::copy(relpath.begin(), relpath.end(), dir_blob.begin() + old_size + sizeof(HashType));
|
std::copy(relpath.begin(), relpath.end(), dir_blob.begin() + old_size + sizeof(HashType));
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
std::string relpath = make_relative_path(parCurrDir, curr_subdir).path();
|
||||||
|
const auto old_size = dir_blob.size();
|
||||||
|
dir_blob.resize(old_size + relpath.size());
|
||||||
|
std::copy(relpath.begin(), relpath.end(), dir_blob.begin() + old_size);
|
||||||
|
}
|
||||||
++it_entry;
|
++it_entry;
|
||||||
}
|
}
|
||||||
|
|
||||||
tiger_data(dir_blob, curr_entry.hash);
|
tiger_data(dir_blob, curr_entry.hash);
|
||||||
|
curr_entry.file_size = 0;
|
||||||
#if !defined(NDEBUG)
|
#if !defined(NDEBUG)
|
||||||
std::cout << "Got intermediate hash for dir " << parCurrDir << ": " << tiger_to_string(curr_entry.hash) << '\n';
|
std::cout << "Got intermediate hash for dir " << parCurrDir << ": " << tiger_to_string(curr_entry.hash) << '\n';
|
||||||
#endif
|
#endif
|
||||||
|
@ -99,22 +115,24 @@ namespace din {
|
||||||
|
|
||||||
//Now with the initial hash ready, let's start hashing files, if any
|
//Now with the initial hash ready, let's start hashing files, if any
|
||||||
{
|
{
|
||||||
auto it_entry = parEntry;
|
auto it_entry = curr_entry_it;
|
||||||
while (
|
while (
|
||||||
it_entry != parEnd
|
it_entry != parEnd
|
||||||
and (it_entry->is_dir
|
and (it_entry->is_dir
|
||||||
or it_entry->level != parEntry->level + 1
|
or it_entry->level != curr_entry_it->level + 1
|
||||||
or PathName(it_entry->path).pop_right() != parCurrDir
|
or PathName(it_entry->path).pop_right() != parCurrDir
|
||||||
)
|
)
|
||||||
) {
|
) {
|
||||||
++it_entry;
|
++it_entry;
|
||||||
}
|
}
|
||||||
|
|
||||||
while (it_entry != parEnd and not it_entry->is_dir and it_entry->level == parEntry->level + 1 and PathName(it_entry->path).pop_right() == parCurrDir) {
|
while (it_entry != parEnd and not it_entry->is_dir and it_entry->level == curr_entry_it->level + 1 and PathName(it_entry->path).pop_right() == parCurrDir) {
|
||||||
|
assert(not it_entry->is_dir);
|
||||||
#if !defined(NDEBUG)
|
#if !defined(NDEBUG)
|
||||||
std::cout << "Hashing file " << it_entry->path << "...";
|
std::cout << "Hashing file " << it_entry->path << "...";
|
||||||
#endif
|
#endif
|
||||||
tiger_file(it_entry->path, it_entry->hash, parEntry->hash);
|
tiger_file(it_entry->path, it_entry->hash, curr_entry_it->hash, it_entry->file_size);
|
||||||
|
++parDone;
|
||||||
#if !defined(NDEBUG)
|
#if !defined(NDEBUG)
|
||||||
std::cout << ' ' << tiger_to_string(it_entry->hash) << '\n';
|
std::cout << ' ' << tiger_to_string(it_entry->hash) << '\n';
|
||||||
#endif
|
#endif
|
||||||
|
@ -123,8 +141,9 @@ namespace din {
|
||||||
}
|
}
|
||||||
|
|
||||||
#if !defined(NDEBUG)
|
#if !defined(NDEBUG)
|
||||||
std::cout << "Final hash for dir " << parCurrDir << " is " << tiger_to_string(parEntry->hash) << '\n';
|
std::cout << "Final hash for dir " << parCurrDir << " is " << tiger_to_string(curr_entry_it->hash) << '\n';
|
||||||
#endif
|
#endif
|
||||||
|
++parDone;
|
||||||
}
|
}
|
||||||
} //unnamed namespace
|
} //unnamed namespace
|
||||||
|
|
||||||
|
@ -201,7 +220,10 @@ namespace din {
|
||||||
std::cout << "-----------------------------------------------------\n";
|
std::cout << "-----------------------------------------------------\n";
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
hash_dir(m_local_data->paths.begin(), m_local_data->paths.end(), base_path);
|
m_local_data->done_count = 0;
|
||||||
|
hash_dir(m_local_data->paths.begin(), m_local_data->paths.end(), base_path, m_local_data->done_count);
|
||||||
|
|
||||||
|
assert(m_local_data->done_count == m_local_data->paths.size());
|
||||||
|
|
||||||
#if !defined(NDEBUG)
|
#if !defined(NDEBUG)
|
||||||
for (const auto& itm : m_local_data->paths) {
|
for (const auto& itm : m_local_data->paths) {
|
||||||
|
@ -214,10 +236,10 @@ namespace din {
|
||||||
data.reserve(m_local_data->paths.size());
|
data.reserve(m_local_data->paths.size());
|
||||||
for (const auto& itm : m_local_data->paths) {
|
for (const auto& itm : m_local_data->paths) {
|
||||||
data.push_back(FileRecordData {
|
data.push_back(FileRecordData {
|
||||||
boost::string_ref(itm.path),
|
make_relative_path(base_path, PathName(itm.path)).path(),
|
||||||
tiger_to_string(itm.hash),
|
tiger_to_string(itm.hash),
|
||||||
itm.level,
|
itm.level,
|
||||||
0,
|
itm.file_size,
|
||||||
itm.is_dir,
|
itm.is_dir,
|
||||||
itm.is_symlink
|
itm.is_symlink
|
||||||
});
|
});
|
||||||
|
|
|
@ -123,4 +123,11 @@ namespace pq {
|
||||||
throw DatabaseException("Error running query", error_message(), __FILE__, __LINE__);
|
throw DatabaseException("Error running query", error_message(), __FILE__, __LINE__);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::string Connection::escape_literal (const std::string& parString) {
|
||||||
|
typedef std::unique_ptr<char[], void(*)(void*)> PQArrayType;
|
||||||
|
|
||||||
|
PQArrayType clean_str(PQescapeLiteral(m_localData->connection, parString.c_str(), parString.size()), &PQfreemem);
|
||||||
|
return std::string(clean_str.get());
|
||||||
|
}
|
||||||
} //namespace pq
|
} //namespace pq
|
||||||
|
|
|
@ -36,6 +36,8 @@ namespace pq {
|
||||||
void query_void ( const std::string& parQuery );
|
void query_void ( const std::string& parQuery );
|
||||||
ResultSet query ( const std::string& parQuery );
|
ResultSet query ( const std::string& parQuery );
|
||||||
|
|
||||||
|
std::string escape_literal ( const std::string& parString );
|
||||||
|
|
||||||
private:
|
private:
|
||||||
struct LocalData;
|
struct LocalData;
|
||||||
|
|
||||||
|
@ -46,6 +48,7 @@ namespace pq {
|
||||||
const uint16_t m_port;
|
const uint16_t m_port;
|
||||||
std::unique_ptr<LocalData> m_localData;
|
std::unique_ptr<LocalData> m_localData;
|
||||||
};
|
};
|
||||||
|
|
||||||
} //namespace pq
|
} //namespace pq
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -53,7 +53,7 @@ namespace din {
|
||||||
parHash.part_c = 0xF096A5B4C3B2E187ULL;
|
parHash.part_c = 0xF096A5B4C3B2E187ULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
void tiger_file (const std::string& parPath, TigerHash& parHashFile, TigerHash& parHashDir) {
|
void tiger_file (const std::string& parPath, TigerHash& parHashFile, TigerHash& parHashDir, uint64_t& parSizeOut) {
|
||||||
typedef decltype(std::declval<std::ifstream>().tellg()) FileSizeType;
|
typedef decltype(std::declval<std::ifstream>().tellg()) FileSizeType;
|
||||||
tiger_init_hash(parHashFile);
|
tiger_init_hash(parHashFile);
|
||||||
|
|
||||||
|
@ -99,6 +99,8 @@ namespace din {
|
||||||
//dir's hash value (64 bytes) as if they were part of the data.
|
//dir's hash value (64 bytes) as if they were part of the data.
|
||||||
tiger_sse2_last_chunk(buff_ptr + aligned_size, buff_ptr + aligned_size, remaining - aligned_size, file_size, file_size + hash_size, parHashFile.data, parHashDir.data, g_tiger_padding);
|
tiger_sse2_last_chunk(buff_ptr + aligned_size, buff_ptr + aligned_size, remaining - aligned_size, file_size, file_size + hash_size, parHashFile.data, parHashDir.data, g_tiger_padding);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
parSizeOut = static_cast<uint64_t>(file_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string tiger_to_string (const TigerHash& parHash) {
|
std::string tiger_to_string (const TigerHash& parHash) {
|
||||||
|
|
|
@ -39,7 +39,7 @@ namespace din {
|
||||||
|
|
||||||
static_assert(sizeof(TigerHash) == 24, "Wrong struct size");
|
static_assert(sizeof(TigerHash) == 24, "Wrong struct size");
|
||||||
|
|
||||||
void tiger_file ( const std::string& parPath, TigerHash& parHashFile, TigerHash& parHashDir );
|
void tiger_file ( const std::string& parPath, TigerHash& parHashFile, TigerHash& parHashDir, uint64_t& parSizeOut );
|
||||||
void tiger_init_hash ( TigerHash& parHash );
|
void tiger_init_hash ( TigerHash& parHash );
|
||||||
std::string tiger_to_string ( const TigerHash& parHash );
|
std::string tiger_to_string ( const TigerHash& parHash );
|
||||||
void tiger_data ( const std::string& parData, TigerHash& parHash );
|
void tiger_data ( const std::string& parData, TigerHash& parHash );
|
||||||
|
|
Loading…
Reference in a new issue