mirror of
https://github.com/KingDuckZ/dindexer.git
synced 2025-07-03 14:14:11 +00:00
Use parametric sql functions to insert new files.
Refactoring so that there are no extra copies of data being inserted.
This commit is contained in:
parent
a91e75829f
commit
390b69e150
7 changed files with 360 additions and 147 deletions
|
@ -24,8 +24,82 @@
|
|||
#include <memory>
|
||||
#include <boost/lexical_cast.hpp>
|
||||
#include <sstream>
|
||||
#include <cstring>
|
||||
#include "libpqtypes.h"
|
||||
#include <cstdlib>
|
||||
#include <ctime>
|
||||
#include <cassert>
|
||||
|
||||
using sc = std::chrono::system_clock;
|
||||
|
||||
namespace pq {
|
||||
namespace implem {
|
||||
template <> const char* type_to_pqtypes_name<std::string>() { return "%text"; }
|
||||
template <> const char* type_to_pqtypes_name<boost::string_ref>() { return "%text"; }
|
||||
template <> const char* type_to_pqtypes_name<bool>() { return "%bool"; }
|
||||
template <> const char* type_to_pqtypes_name<float>() { return "%float4"; }
|
||||
template <> const char* type_to_pqtypes_name<double>() { return "%float8"; }
|
||||
template <> const char* type_to_pqtypes_name<int16_t>() { return "%int2"; }
|
||||
template <> const char* type_to_pqtypes_name<int32_t>() { return "%int4"; }
|
||||
template <> const char* type_to_pqtypes_name<int64_t>() { return "%int8"; }
|
||||
template <> const char* type_to_pqtypes_name<uint16_t>() { return "%int2"; }
|
||||
template <> const char* type_to_pqtypes_name<uint32_t>() { return "%int4"; }
|
||||
template <> const char* type_to_pqtypes_name<uint64_t>() { return "%int8"; }
|
||||
template <> const char* type_to_pqtypes_name<sc::time_point>() { return "%timestamptz"; }
|
||||
|
||||
template const char* type_to_pqtypes_name<std::string> ( void );
|
||||
template const char* type_to_pqtypes_name<boost::string_ref> ( void );
|
||||
template const char* type_to_pqtypes_name<bool> ( void );
|
||||
template const char* type_to_pqtypes_name<float> ( void );
|
||||
template const char* type_to_pqtypes_name<double> ( void );
|
||||
template const char* type_to_pqtypes_name<int16_t> ( void );
|
||||
template const char* type_to_pqtypes_name<int32_t> ( void );
|
||||
template const char* type_to_pqtypes_name<int64_t> ( void );
|
||||
template const char* type_to_pqtypes_name<uint16_t> ( void );
|
||||
template const char* type_to_pqtypes_name<uint32_t> ( void );
|
||||
template const char* type_to_pqtypes_name<uint64_t> ( void );
|
||||
|
||||
auto get_pqlib_c_type_struct<std::chrono::system_clock::time_point>::conv (const std::chrono::system_clock::time_point& parParam) -> type {
|
||||
static_assert(sizeof(storage) == sizeof(PGtimestamp), "Wrong size for timestamp, please update DATA_SIZE");
|
||||
static_assert(alignof(storage) == alignof(PGtimestamp), "Wrong alignment for timestamp, please update type");
|
||||
|
||||
using std::chrono::system_clock;
|
||||
|
||||
PGtimestamp ts;
|
||||
|
||||
std::memset(&ts, 0, sizeof(PGtimestamp));
|
||||
|
||||
auto t = system_clock::to_time_t(parParam);
|
||||
ts.epoch = t;
|
||||
auto tm = std::localtime(&t);
|
||||
ts.time.hour = tm->tm_hour;
|
||||
ts.time.min = tm->tm_min;
|
||||
ts.time.sec = tm->tm_sec;
|
||||
ts.time.usec = 0;
|
||||
ts.time.withtz = 1;
|
||||
ts.date.isbc = 0;
|
||||
ts.date.year = tm->tm_year + 1900;
|
||||
ts.date.mon = tm->tm_mon;
|
||||
ts.date.mday = tm->tm_mday;
|
||||
char* tzn;
|
||||
PQlocalTZInfo(&t, &ts.time.gmtoff, &ts.time.isdst, &tzn);
|
||||
std::strcpy(ts.time.tzabbr, tzn);
|
||||
|
||||
std::copy(reinterpret_cast<const char*>(&ts), reinterpret_cast<const char*>(&ts) + sizeof(ts), reinterpret_cast<char*>(&m_storage));
|
||||
return &m_storage;
|
||||
}
|
||||
|
||||
get_pqlib_c_type_struct<std::chrono::system_clock::time_point>::~get_pqlib_c_type_struct ( void ) noexcept {
|
||||
return;
|
||||
}
|
||||
} //namespace implem
|
||||
|
||||
namespace {
|
||||
int call_PQputf (PGparam* parParam, const std::string* parTypes, va_list parArgp) {
|
||||
return PQputvf(parParam, nullptr, 0, parTypes->c_str(), parArgp);
|
||||
}
|
||||
} //unnamed namespace
|
||||
|
||||
struct Connection::LocalData {
|
||||
PGconn* connection;
|
||||
};
|
||||
|
@ -81,10 +155,13 @@ namespace pq {
|
|||
throw DatabaseException(oss.str(), std::move(err), __FILE__, __LINE__);
|
||||
}
|
||||
query_void("SET NAMES 'utf8'");
|
||||
|
||||
PQinitTypes(m_localData->connection); //Init libpqtypes
|
||||
}
|
||||
|
||||
void Connection::disconnect() {
|
||||
if (is_connected()) {
|
||||
PQclearTypes(m_localData->connection); //clear libpqtypes
|
||||
PQfinish(m_localData->connection);
|
||||
m_localData->connection = nullptr;
|
||||
}
|
||||
|
@ -134,4 +211,41 @@ namespace pq {
|
|||
PQArrayType clean_str(PQescapeLiteral(m_localData->connection, parString.data(), parString.size()), &PQfreemem);
|
||||
return std::string(clean_str.get());
|
||||
}
|
||||
|
||||
void Connection::query_void_params (const std::string& parQuery, PGParams& parParams) {
|
||||
auto deleter = [](PGresult* r) { PQclear(r); };
|
||||
using ResultPtr = std::unique_ptr<PGresult, decltype(deleter)>;
|
||||
|
||||
int result_format = 1;
|
||||
assert(parParams.get());
|
||||
auto res = ResultPtr(
|
||||
PQparamExec(
|
||||
m_localData->connection,
|
||||
parParams.get(),
|
||||
parQuery.c_str(),
|
||||
result_format
|
||||
),
|
||||
deleter
|
||||
);
|
||||
if (not res) {
|
||||
std::ostringstream oss;
|
||||
oss << "Error allocating result object while running \"" << parQuery << "\": " << PQgeterror();
|
||||
throw DatabaseException("Error running query", oss.str(), __FILE__, __LINE__);
|
||||
}
|
||||
const int ress = PQresultStatus(res.get());
|
||||
if (ress != PGRES_TUPLES_OK && ress != PGRES_COMMAND_OK) {
|
||||
throw DatabaseException("Error running query", error_message(), __FILE__, __LINE__);
|
||||
}
|
||||
}
|
||||
|
||||
auto Connection::make_params (const std::string* parTypes, ...) -> PGParams {
|
||||
PGParams retval(PQparamCreate(m_localData->connection), &PQparamClear);
|
||||
va_list argp;
|
||||
|
||||
va_start(argp, parTypes);
|
||||
call_PQputf(retval.get(), parTypes, argp);
|
||||
va_end(argp);
|
||||
|
||||
return std::move(retval);
|
||||
}
|
||||
} //namespace pq
|
||||
|
|
|
@ -23,6 +23,11 @@
|
|||
#include <cstdint>
|
||||
#include <memory>
|
||||
#include <boost/utility/string_ref.hpp>
|
||||
#include <chrono>
|
||||
#include <type_traits>
|
||||
|
||||
struct pg_param;
|
||||
typedef pg_param PGparam;
|
||||
|
||||
namespace pq {
|
||||
class Connection {
|
||||
|
@ -40,8 +45,15 @@ namespace pq {
|
|||
std::string escaped_literal ( const std::string& parString );
|
||||
std::string escaped_literal ( boost::string_ref parString );
|
||||
|
||||
template <typename... Args>
|
||||
void query_void ( const std::string& parQuery, Args&&... parArgs );
|
||||
|
||||
private:
|
||||
struct LocalData;
|
||||
using PGParams = std::unique_ptr<::PGparam, void(*)(::PGparam*)>;
|
||||
|
||||
void query_void_params ( const std::string& parQuery, PGParams& parParams );
|
||||
PGParams make_params ( const std::string* parTypes, ... );
|
||||
|
||||
const std::string m_username;
|
||||
const std::string m_passwd;
|
||||
|
@ -51,6 +63,71 @@ namespace pq {
|
|||
std::unique_ptr<LocalData> m_localData;
|
||||
};
|
||||
|
||||
namespace implem {
|
||||
template <typename T>
|
||||
const char* type_to_pqtypes_name ( void );
|
||||
|
||||
template <typename T>
|
||||
struct get_pqlib_c_type_struct {
|
||||
using type = T;
|
||||
static type conv ( T parParam ) { return parParam; }
|
||||
};
|
||||
template <>
|
||||
struct get_pqlib_c_type_struct<std::string> {
|
||||
using type = const char*;
|
||||
static type conv ( const std::string& parParam ) { return parParam.c_str(); }
|
||||
};
|
||||
template <>
|
||||
struct get_pqlib_c_type_struct<boost::string_ref> {
|
||||
using type = const char*;
|
||||
static type conv ( const boost::string_ref& parParam ) { return parParam.data(); }
|
||||
};
|
||||
template <>
|
||||
struct get_pqlib_c_type_struct<bool> {
|
||||
using type = int;
|
||||
static type conv ( bool parParam ) { return (parParam ? 1 : 0); }
|
||||
};
|
||||
template <>
|
||||
struct get_pqlib_c_type_struct<std::chrono::system_clock::time_point> {
|
||||
struct StorageStruct { uint64_t epoch; int a[14]; char tzabbr[16]; };
|
||||
static constexpr std::size_t DATA_SIZE = sizeof(StorageStruct);
|
||||
using storage = std::aligned_storage<DATA_SIZE, alignof(uint64_t)>::type;
|
||||
storage m_storage;
|
||||
|
||||
public:
|
||||
using type = const storage*;
|
||||
|
||||
type conv ( const std::chrono::system_clock::time_point& parParam );
|
||||
~get_pqlib_c_type_struct ( void ) noexcept;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
inline typename get_pqlib_c_type_struct<T>::type get_pqlib_c_type (const T& parParam) {
|
||||
return get_pqlib_c_type_struct<T>::conv(parParam);
|
||||
}
|
||||
} //namespace implem
|
||||
|
||||
template <typename... Args>
|
||||
void Connection::query_void (const std::string& parQuery, Args&&... parArgs) {
|
||||
using std::remove_cv;
|
||||
using std::remove_reference;
|
||||
|
||||
auto make_pgparams = [&parArgs..., this](){
|
||||
using implem::type_to_pqtypes_name;
|
||||
|
||||
std::string types;
|
||||
int unpack[] {0, (types += type_to_pqtypes_name<typename remove_cv<typename remove_reference<Args>::type>::type>(), types += ' ', 0)...};
|
||||
if (not types.empty()) {
|
||||
types.resize(types.size() - 1);
|
||||
}
|
||||
static_cast<void>(unpack);
|
||||
|
||||
return this->make_params(&types, implem::get_pqlib_c_type_struct<typename remove_cv<typename remove_reference<Args>::type>::type>().conv(parArgs)...);
|
||||
};
|
||||
PGParams pgparams = make_pgparams();
|
||||
|
||||
this->query_void_params(parQuery, pgparams);
|
||||
}
|
||||
} //namespace pq
|
||||
|
||||
#endif
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#include "dbbackend.hpp"
|
||||
#include "pq/connection.hpp"
|
||||
#include "dindexer-common/settings.hpp"
|
||||
#include "recorddata.hpp"
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
#include <utility>
|
||||
|
@ -25,11 +26,10 @@
|
|||
#include <exception>
|
||||
#include <memory>
|
||||
#include <boost/utility/string_ref.hpp>
|
||||
#include <chrono>
|
||||
|
||||
namespace din {
|
||||
namespace {
|
||||
const std::size_t g_batch_size = 100;
|
||||
|
||||
std::string make_set_insert_query (pq::Connection& parConn, const SetRecordData& parSetData) {
|
||||
std::ostringstream oss;
|
||||
oss << "INSERT INTO \"sets\" (\"desc\",\"type\") VALUES ("
|
||||
|
@ -38,15 +38,9 @@ namespace din {
|
|||
<< ");";
|
||||
return oss.str();
|
||||
}
|
||||
|
||||
boost::string_ref time_to_str (const std::time_t parTime, char* parBuff, std::size_t parLength) {
|
||||
const auto gtm = std::gmtime(&parTime);
|
||||
const auto len = std::strftime(parBuff, parLength, "%F %T%z", gtm);
|
||||
return boost::string_ref(parBuff, len);
|
||||
}
|
||||
} //unnamed namespace
|
||||
|
||||
bool read_from_db (FileRecordData& parItem, SetRecordDataFull& parSet, const dinlib::SettingsDB& parDB, std::string&& parHash) {
|
||||
bool read_from_db (FileRecordData& parItem, SetRecordDataFull& parSet, const dinlib::SettingsDB& parDB, const TigerHash& parHash) {
|
||||
using boost::lexical_cast;
|
||||
|
||||
pq::Connection conn(std::string(parDB.username), std::string(parDB.password), std::string(parDB.dbname), std::string(parDB.address), parDB.port);
|
||||
|
@ -55,8 +49,8 @@ namespace din {
|
|||
uint32_t group_id;
|
||||
{
|
||||
std::ostringstream oss;
|
||||
oss << "SELECT path,level,group_id,is_directory,is_symlink,size FROM files WHERE hash=" <<
|
||||
conn.escaped_literal(parHash) <<
|
||||
oss << "SELECT path,level,group_id,is_directory,is_symlink,size FROM files WHERE hash='" <<
|
||||
tiger_to_string(parHash, true) << "'" <<
|
||||
" LIMIT 1;";
|
||||
|
||||
auto resultset = conn.query(oss.str());
|
||||
|
@ -66,7 +60,7 @@ namespace din {
|
|||
|
||||
auto row = resultset[0];
|
||||
parItem.path = row["path"];
|
||||
parItem.hash = std::move(parHash);
|
||||
parItem.hash = parHash;
|
||||
parItem.level = lexical_cast<uint16_t>(row["level"]);
|
||||
parItem.size = lexical_cast<uint64_t>(row["size"]);
|
||||
parItem.is_directory = (row["is_directory"] == "t" ? true : false);
|
||||
|
@ -94,47 +88,36 @@ namespace din {
|
|||
}
|
||||
|
||||
void write_to_db (const dinlib::SettingsDB& parDB, const std::vector<FileRecordData>& parData, const SetRecordData& parSetData) {
|
||||
auto bool_to_str = [](bool b) { return (b ? "true" : "false"); };
|
||||
using std::chrono::system_clock;
|
||||
|
||||
if (parData.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
const std::size_t strtime_buff_size = 512;
|
||||
std::unique_ptr<char[]> strtime_buff(new char[strtime_buff_size]);
|
||||
|
||||
pq::Connection conn(std::string(parDB.username), std::string(parDB.password), std::string(parDB.dbname), std::string(parDB.address), parDB.port);
|
||||
conn.connect();
|
||||
|
||||
conn.query_void("BEGIN;");
|
||||
conn.query_void(make_set_insert_query(conn, parSetData));
|
||||
//TODO: use COPY instead of INSERT INTO
|
||||
for (std::size_t z = 0; z < parData.size(); z += g_batch_size) {
|
||||
std::ostringstream query;
|
||||
query << "INSERT INTO \"files\" " <<
|
||||
"(path, hash, level, group_id, is_directory, is_symlink, size, " <<
|
||||
for (std::size_t z = 0; z < parData.size(); ++z) {
|
||||
const std::string query = "INSERT INTO \"files\" (path, hash, "
|
||||
"level, group_id, is_directory, is_symlink, size, "
|
||||
"access_time, modify_time, is_hash_valid, unreadable) VALUES "
|
||||
;
|
||||
"($1, $2, $3, currval('\"sets_id_seq\"'), $4, $5, $6, $7, $8, $9, $10);";
|
||||
|
||||
const char* comma = "";
|
||||
for (auto i = z; i < std::min(z + g_batch_size, parData.size()); ++i) {
|
||||
const auto& itm = parData[i];
|
||||
query << comma;
|
||||
query << '(' << conn.escaped_literal(itm.path) << ",'" << itm.hash << "',"
|
||||
<< itm.level << ','
|
||||
<< "currval('\"sets_id_seq\"')" << ','
|
||||
<< bool_to_str(itm.is_directory) << ','
|
||||
<< (itm.is_symlink ? "true" : "false") << ',' << itm.size
|
||||
<< ',' << '\'' << time_to_str(itm.atime, strtime_buff.get(), strtime_buff_size) << '\''
|
||||
<< ',' << '\'' << time_to_str(itm.mtime, strtime_buff.get(), strtime_buff_size) << '\''
|
||||
<< ',' << bool_to_str(itm.hash_valid)
|
||||
<< ',' << bool_to_str(itm.unreadable)
|
||||
<< ')';
|
||||
comma = ",";
|
||||
}
|
||||
query << ';';
|
||||
//query << "\nCOMMIT;";
|
||||
|
||||
conn.query_void(query.str());
|
||||
const auto& itm = parData[z];
|
||||
conn.query_void(query,
|
||||
itm.path,
|
||||
tiger_to_string(itm.hash),
|
||||
itm.level,
|
||||
itm.is_directory,
|
||||
itm.is_symlink,
|
||||
itm.size,
|
||||
system_clock::from_time_t(itm.atime),
|
||||
system_clock::from_time_t(itm.mtime),
|
||||
itm.hash_valid,
|
||||
itm.unreadable
|
||||
);
|
||||
}
|
||||
conn.query_void("COMMIT;");
|
||||
}
|
||||
|
|
|
@ -21,40 +21,19 @@
|
|||
#include <string>
|
||||
#include <vector>
|
||||
#include <cstdint>
|
||||
#include <boost/utility/string_ref.hpp>
|
||||
#include <ctime>
|
||||
|
||||
namespace dinlib {
|
||||
struct SettingsDB;;
|
||||
} //namespace dinlib
|
||||
|
||||
namespace din {
|
||||
struct FileRecordData {
|
||||
std::string path;
|
||||
std::string hash;
|
||||
std::time_t atime;
|
||||
std::time_t mtime;
|
||||
uint16_t level;
|
||||
uint64_t size;
|
||||
bool is_directory;
|
||||
bool is_symlink;
|
||||
bool unreadable;
|
||||
bool hash_valid;
|
||||
};
|
||||
|
||||
struct SetRecordDataFull {
|
||||
std::string name;
|
||||
uint32_t disk_number;
|
||||
char type;
|
||||
};
|
||||
|
||||
struct SetRecordData {
|
||||
const boost::string_ref name;
|
||||
const char type;
|
||||
};
|
||||
struct FileRecordData;
|
||||
struct SetRecordData;
|
||||
struct SetRecordDataFull;
|
||||
struct TigerHash;
|
||||
|
||||
void write_to_db ( const dinlib::SettingsDB& parDB, const std::vector<FileRecordData>& parData, const SetRecordData& parSetData );
|
||||
bool read_from_db ( FileRecordData& parItem, SetRecordDataFull& parSet, const dinlib::SettingsDB& parDB, std::string&& parHash );
|
||||
bool read_from_db ( FileRecordData& parItem, SetRecordDataFull& parSet, const dinlib::SettingsDB& parDB, const TigerHash& parHash );
|
||||
} //namespace din
|
||||
|
||||
#endif
|
||||
|
|
|
@ -22,6 +22,7 @@
|
|||
#include "dindexer-common/settings.hpp"
|
||||
#include "filestats.hpp"
|
||||
#include "mimetype.hpp"
|
||||
#include "recorddata.hpp"
|
||||
#include <algorithm>
|
||||
#include <functional>
|
||||
#include <vector>
|
||||
|
@ -40,50 +41,18 @@
|
|||
#if defined(INDEXER_VERBOSE)
|
||||
# include <iostream>
|
||||
#endif
|
||||
#include <boost/utility/string_ref.hpp>
|
||||
|
||||
namespace din {
|
||||
typedef TigerHash HashType;
|
||||
|
||||
struct FileEntry {
|
||||
FileEntry ( const char* parPath, const fastf::FileStats& parSt ) :
|
||||
path(parPath),
|
||||
hash {},
|
||||
access_time(parSt.atime),
|
||||
modify_time(parSt.mtime),
|
||||
//file_size(0),
|
||||
level(static_cast<uint16_t>(parSt.level)),
|
||||
is_dir(parSt.is_dir),
|
||||
is_symlink(parSt.is_symlink),
|
||||
unreadable(false)
|
||||
{
|
||||
}
|
||||
|
||||
FileEntry ( const FileEntry& ) = delete;
|
||||
FileEntry ( FileEntry&& ) = default;
|
||||
FileEntry& operator= ( const FileEntry& ) = delete;
|
||||
FileEntry& operator= ( FileEntry&& ) = default;
|
||||
bool operator< ( const FileEntry& parOther ) const;
|
||||
bool operator== ( const FileEntry& ) const = delete;
|
||||
|
||||
std::string path;
|
||||
std::string mime;
|
||||
HashType hash;
|
||||
std::time_t access_time;
|
||||
std::time_t modify_time;
|
||||
uint64_t file_size;
|
||||
uint16_t level;
|
||||
bool is_dir;
|
||||
bool is_symlink;
|
||||
bool unreadable;
|
||||
};
|
||||
using HashType = decltype(FileRecordData::hash);
|
||||
|
||||
namespace {
|
||||
typedef std::vector<FileEntry>::iterator FileEntryIt;
|
||||
typedef std::vector<FileRecordData>::iterator FileEntryIt;
|
||||
|
||||
void hash_dir (FileEntryIt parEntry, FileEntryIt parBegin, FileEntryIt parEnd, const PathName& parCurrDir, std::function<void(std::size_t)> parNextItemCallback, bool parIgnoreErrors, MimeType& parMime) {
|
||||
assert(parEntry != parEnd);
|
||||
assert(parEntry->is_dir);
|
||||
FileEntry& curr_entry = *parEntry;
|
||||
assert(parEntry->is_directory);
|
||||
FileRecordData& curr_entry = *parEntry;
|
||||
auto& curr_entry_it = parEntry;
|
||||
|
||||
//Build a blob with the hashes and filenames of every directory that
|
||||
|
@ -106,10 +75,10 @@ namespace din {
|
|||
#if defined(INDEXER_VERBOSE)
|
||||
std::cout << "Making initial hash for " << parCurrDir << "...\n";
|
||||
#endif
|
||||
curr_entry.mime = parMime.analyze(it_entry->path);
|
||||
curr_entry.mime_full = parMime.analyze(it_entry->path);
|
||||
while (parEnd != it_entry and it_entry->level == curr_entry_it->level + 1 and parCurrDir == PathName(it_entry->path).pop_right()) {
|
||||
PathName curr_subdir(it_entry->path);
|
||||
if (it_entry->is_dir) {
|
||||
if (it_entry->is_directory) {
|
||||
hash_dir(it_entry, parBegin, parEnd, curr_subdir, parNextItemCallback, parIgnoreErrors, parMime);
|
||||
|
||||
std::string relpath = make_relative_path(parCurrDir, curr_subdir).path();
|
||||
|
@ -128,11 +97,11 @@ namespace din {
|
|||
}
|
||||
|
||||
tiger_data(dir_blob, curr_entry.hash);
|
||||
curr_entry.file_size = 0;
|
||||
curr_entry.size = 0;
|
||||
#if defined(INDEXER_VERBOSE)
|
||||
std::cout << "Got intermediate hash for dir " << parCurrDir <<
|
||||
": " << tiger_to_string(curr_entry.hash) <<
|
||||
' ' << curr_entry.mime << '\n';
|
||||
' ' << curr_entry.mime_type << '\n';
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -141,7 +110,7 @@ namespace din {
|
|||
auto it_entry = curr_entry_it;
|
||||
while (
|
||||
it_entry != parEnd
|
||||
and (it_entry->is_dir
|
||||
and (it_entry->is_directory
|
||||
or it_entry->level != curr_entry_it->level + 1
|
||||
or PathName(it_entry->path).pop_right() != parCurrDir
|
||||
)
|
||||
|
@ -149,15 +118,19 @@ namespace din {
|
|||
++it_entry;
|
||||
}
|
||||
|
||||
while (it_entry != parEnd and not it_entry->is_dir and it_entry->level == curr_entry_it->level + 1 and PathName(it_entry->path).pop_right() == parCurrDir) {
|
||||
assert(not it_entry->is_dir);
|
||||
while (it_entry != parEnd and not it_entry->is_directory and it_entry->level == curr_entry_it->level + 1 and PathName(it_entry->path).pop_right() == parCurrDir) {
|
||||
assert(not it_entry->is_directory);
|
||||
#if defined(INDEXER_VERBOSE)
|
||||
std::cout << "Hashing file " << it_entry->path << "...";
|
||||
#endif
|
||||
parNextItemCallback(it_entry - parBegin);
|
||||
try {
|
||||
tiger_file(it_entry->path, it_entry->hash, curr_entry_it->hash, it_entry->file_size);
|
||||
it_entry->mime = parMime.analyze(it_entry->path);
|
||||
tiger_file(it_entry->path, it_entry->hash, curr_entry_it->hash, it_entry->size);
|
||||
it_entry->hash_valid = true;
|
||||
it_entry->mime_full = parMime.analyze(it_entry->path);
|
||||
auto mime_pair = split_mime(it_entry->mime_full);
|
||||
it_entry->mime_type = mime_pair.first;
|
||||
it_entry->mime_charset = mime_pair.second;
|
||||
}
|
||||
catch (const std::ios_base::failure& e) {
|
||||
if (parIgnoreErrors) {
|
||||
|
@ -170,8 +143,8 @@ namespace din {
|
|||
}
|
||||
|
||||
#if defined(INDEXER_VERBOSE)
|
||||
std::cout << ' ' << tiger_to_string(it_entry->hash) <<
|
||||
' ' << it_entry->mime << '\n';
|
||||
std::cout << ' ' << tiger_to_string(it_entry->hash) << ' ' <<
|
||||
"Mime type: \"" << it_entry->mime_type << "\"\n";
|
||||
#endif
|
||||
++it_entry;
|
||||
}
|
||||
|
@ -180,16 +153,28 @@ namespace din {
|
|||
#if defined(INDEXER_VERBOSE)
|
||||
std::cout << "Final hash for dir " << parCurrDir << " is " << tiger_to_string(curr_entry_it->hash) << '\n';
|
||||
#endif
|
||||
curr_entry_it->hash_valid = true;
|
||||
}
|
||||
|
||||
template <bool FileTrue=true>
|
||||
struct IsFile {
|
||||
bool operator() ( const FileEntry& parEntry ) const { return parEntry.is_dir xor FileTrue; }
|
||||
bool operator() ( const FileRecordData& parEntry ) const { return parEntry.is_directory xor FileTrue; }
|
||||
};
|
||||
|
||||
FileRecordData make_file_record_data (const char* parPath, const fastf::FileStats& parSt) {
|
||||
return FileRecordData(
|
||||
parPath,
|
||||
parSt.atime,
|
||||
parSt.mtime,
|
||||
parSt.level,
|
||||
parSt.is_dir,
|
||||
parSt.is_symlink
|
||||
);
|
||||
}
|
||||
} //unnamed namespace
|
||||
|
||||
struct Indexer::LocalData {
|
||||
typedef std::vector<FileEntry> PathList;
|
||||
typedef std::vector<FileRecordData> PathList;
|
||||
|
||||
dinlib::SettingsDB db_settings;
|
||||
PathList paths;
|
||||
|
@ -202,12 +187,13 @@ namespace din {
|
|||
bool ignore_read_errors;
|
||||
};
|
||||
|
||||
bool FileEntry::operator< (const FileEntry& parOther) const {
|
||||
const FileEntry& o = parOther;
|
||||
bool file_record_data_lt (const FileRecordData& parLeft, const FileRecordData& parRight) {
|
||||
const FileRecordData& l = parLeft;
|
||||
const FileRecordData& r = parRight;
|
||||
return
|
||||
(level < o.level)
|
||||
or (level == o.level and is_dir and not o.is_dir)
|
||||
or (level == o.level and is_dir == o.is_dir and path < o.path)
|
||||
(l.level < r.level)
|
||||
or (l.level == r.level and l.is_directory and not r.is_directory)
|
||||
or (l.level == r.level and l.is_directory == r.is_directory and l.path < r.path)
|
||||
|
||||
//sort by directory - parent first, children later
|
||||
//(level == o.level and is_dir and not o.is_dir)
|
||||
|
@ -257,7 +243,7 @@ namespace din {
|
|||
|
||||
void Indexer::calculate_hash() {
|
||||
PathName base_path(m_local_data->paths.front().path);
|
||||
std::sort(m_local_data->paths.begin(), m_local_data->paths.end());
|
||||
std::sort(m_local_data->paths.begin(), m_local_data->paths.end(), &file_record_data_lt);
|
||||
MimeType mime;
|
||||
|
||||
#if defined(INDEXER_VERBOSE)
|
||||
|
@ -266,7 +252,7 @@ namespace din {
|
|||
itm.hash.part_b = 1;
|
||||
itm.hash.part_c = 1;
|
||||
|
||||
if (itm.is_dir)
|
||||
if (itm.is_directory)
|
||||
std::cout << "(D) ";
|
||||
else
|
||||
std::cout << "(F) ";
|
||||
|
@ -317,41 +303,25 @@ namespace din {
|
|||
#endif
|
||||
|
||||
if (not parForce) {
|
||||
std::string first_hash(tiger_to_string(m_local_data->paths.front().hash, true));
|
||||
const auto& first_hash = m_local_data->paths.front().hash;
|
||||
FileRecordData itm;
|
||||
SetRecordDataFull set;
|
||||
const bool already_in_db = read_from_db(itm, set, m_local_data->db_settings, std::move(first_hash));
|
||||
const bool already_in_db = read_from_db(itm, set, m_local_data->db_settings, first_hash);
|
||||
if (already_in_db) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
PathName base_path(m_local_data->paths.front().path);
|
||||
std::vector<FileRecordData> data;
|
||||
data.reserve(m_local_data->paths.size());
|
||||
for (const auto& itm : m_local_data->paths) {
|
||||
data.push_back(FileRecordData {
|
||||
make_relative_path(base_path, PathName(itm.path)).path(),
|
||||
tiger_to_string(itm.hash),
|
||||
itm.access_time,
|
||||
itm.modify_time,
|
||||
itm.level,
|
||||
itm.file_size,
|
||||
itm.is_dir,
|
||||
itm.is_symlink,
|
||||
itm.unreadable,
|
||||
not itm.unreadable
|
||||
});
|
||||
}
|
||||
|
||||
SetRecordData set_data {parSetName, parType};
|
||||
write_to_db(m_local_data->db_settings, data, set_data);
|
||||
write_to_db(m_local_data->db_settings, m_local_data->paths, set_data);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Indexer::add_path (const char* parPath, const fastf::FileStats& parStats) {
|
||||
m_local_data->paths.push_back(
|
||||
FileEntry(parPath, parStats));
|
||||
make_file_record_data(parPath, parStats));
|
||||
if (not parStats.is_dir) {
|
||||
++m_local_data->file_count;
|
||||
}
|
||||
|
@ -364,14 +334,14 @@ namespace din {
|
|||
|
||||
std::cout << "---------------- FILE LIST ----------------\n";
|
||||
for (const auto& cur_itm : m_local_data->paths) {
|
||||
if (not cur_itm.is_dir) {
|
||||
if (not cur_itm.is_directory) {
|
||||
PathName cur_path(cur_itm.path);
|
||||
std::cout << make_relative_path(base_path, cur_path).path() << '\n';
|
||||
}
|
||||
}
|
||||
std::cout << "---------------- DIRECTORY LIST ----------------\n";
|
||||
for (const auto& cur_itm : m_local_data->paths) {
|
||||
if (cur_itm.is_dir) {
|
||||
if (cur_itm.is_directory) {
|
||||
PathName cur_path(cur_itm.path);
|
||||
std::cout << make_relative_path(base_path, cur_path).path() << '\n';
|
||||
}
|
||||
|
|
|
@ -133,6 +133,10 @@ namespace {
|
|||
parShowProgress = false;
|
||||
#endif
|
||||
if (not parShowProgress) {
|
||||
//Hashing file /mnt/cdrom/Sacred 2/Fallen Angel/UK/Sacred.2.Fallen.Angel-ArenaBG/DISC2/S2DISC2.md1... 512c201321ed01cc2a82c9f80bfeaaa673bc8eb3cea4e5c1
|
||||
//terminate called after throwing an instance of 'std::ios_base::failure'
|
||||
//what(): basic_filebuf::xsgetn error reading the file
|
||||
//Hashing file /mnt/cdrom/Sacred 2/Fallen Angel/UK/Sacred.2.Fallen.Angel-ArenaBG/DISC2/S2DISC2.mdf...Annullato
|
||||
parIndexer.calculate_hash();
|
||||
}
|
||||
#if defined(WITH_PROGRESS_FEEDBACK)
|
||||
|
|
86
src/scan/recorddata.hpp
Normal file
86
src/scan/recorddata.hpp
Normal file
|
@ -0,0 +1,86 @@
|
|||
/* Copyright 2015, Michele Santullo
|
||||
* This file is part of "dindexer".
|
||||
*
|
||||
* "dindexer" is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* "dindexer" is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef id3CD7F105AC314540A864487E981E5A7E
|
||||
#define id3CD7F105AC314540A864487E981E5A7E
|
||||
|
||||
#include "tiger.hpp"
|
||||
#include <string>
|
||||
#include <boost/utility/string_ref.hpp>
|
||||
#include <cstdint>
|
||||
#include <ctime>
|
||||
#include <boost/flyweight.hpp>
|
||||
#include <boost/flyweight/no_locking.hpp>
|
||||
|
||||
namespace din {
|
||||
struct FileRecordData {
|
||||
struct MimeStringTagStruct { };
|
||||
typedef boost::flyweights::tag<MimeStringTagStruct> MimeStringTag;
|
||||
typedef boost::flyweight<std::string, boost::flyweights::no_locking, MimeStringTag> mime_string;
|
||||
|
||||
FileRecordData ( void ) = default;
|
||||
FileRecordData ( const char* parPath, std::time_t parATime, std::time_t parMTime, uint64_t parLevel, bool parIsDir, bool parIsSymLink ) :
|
||||
hash {},
|
||||
path(parPath),
|
||||
mime_full(),
|
||||
atime(parATime),
|
||||
mtime(parMTime),
|
||||
mime_type(),
|
||||
mime_charset(),
|
||||
size(0),
|
||||
level(parLevel),
|
||||
is_directory(parIsDir),
|
||||
is_symlink(parIsSymLink),
|
||||
unreadable(false),
|
||||
hash_valid(false)
|
||||
{
|
||||
}
|
||||
|
||||
FileRecordData ( const FileRecordData& ) = delete;
|
||||
FileRecordData ( FileRecordData&& ) = default;
|
||||
FileRecordData& operator= ( const FileRecordData& ) = delete;
|
||||
FileRecordData& operator= ( FileRecordData&& ) = default;
|
||||
bool operator== ( const FileRecordData& ) const = delete;
|
||||
|
||||
TigerHash hash;
|
||||
std::string path;
|
||||
mime_string mime_full;
|
||||
std::time_t atime;
|
||||
std::time_t mtime;
|
||||
boost::string_ref mime_type;
|
||||
boost::string_ref mime_charset;
|
||||
uint64_t size;
|
||||
uint16_t level;
|
||||
bool is_directory;
|
||||
bool is_symlink;
|
||||
bool unreadable;
|
||||
bool hash_valid;
|
||||
};
|
||||
|
||||
struct SetRecordDataFull {
|
||||
std::string name;
|
||||
uint32_t disk_number;
|
||||
char type;
|
||||
};
|
||||
|
||||
struct SetRecordData {
|
||||
const boost::string_ref name;
|
||||
const char type;
|
||||
};
|
||||
} //namespace din
|
||||
|
||||
#endif
|
Loading…
Add table
Add a link
Reference in a new issue