1
0
Fork 0
mirror of https://github.com/KingDuckZ/dindexer.git synced 2025-08-21 15:50:50 +00:00

Bugfix - don't store absolute paths to files in the DB.

This was introduced in a previous commit.
This commit is contained in:
King_DuckZ 2016-01-11 12:46:06 +00:00
parent 3f48d00ad9
commit fa12bb2d74
3 changed files with 50 additions and 25 deletions

View file

@ -1,4 +1,4 @@
/* Copyright 2015, Michele Santullo
/* Copyright 2016, Michele Santullo
* This file is part of "dindexer".
*
* "dindexer" is free software: you can redistribute it and/or modify
@ -62,7 +62,7 @@ namespace mchlib {
while (
it_entry != parEnd and (
it_entry->level == curr_entry.level
or parCurrDir != PathName(it_entry->path).pop_right()
or parCurrDir != PathName(it_entry->abs_path).pop_right()
//and (not it_entry->is_dir or (it_entry->level <= curr_entry.level
//and parCurrDir != PathName(it_entry->path).pop_right()))
)) {
@ -73,9 +73,9 @@ namespace mchlib {
#if defined(INDEXER_VERBOSE)
std::cout << "Making initial hash for " << parCurrDir << "...\n";
#endif
curr_entry.mime_full = parMime.analyze(it_entry->path);
while (parEnd != it_entry and it_entry->level == curr_entry_it->level + 1 and parCurrDir == PathName(it_entry->path).pop_right()) {
PathName curr_subdir(it_entry->path);
curr_entry.mime_full = parMime.analyze(it_entry->abs_path);
while (parEnd != it_entry and it_entry->level == curr_entry_it->level + 1 and parCurrDir == PathName(it_entry->abs_path).pop_right()) {
PathName curr_subdir(it_entry->abs_path);
if (it_entry->is_directory) {
hash_dir(it_entry, parBegin, parEnd, curr_subdir, parNextItemCallback, parIgnoreErrors, parMime);
@ -110,22 +110,22 @@ namespace mchlib {
it_entry != parEnd
and (it_entry->is_directory
or it_entry->level != curr_entry_it->level + 1
or PathName(it_entry->path).pop_right() != parCurrDir
or PathName(it_entry->abs_path).pop_right() != parCurrDir
)
) {
++it_entry;
}
while (it_entry != parEnd and not it_entry->is_directory and it_entry->level == curr_entry_it->level + 1 and PathName(it_entry->path).pop_right() == parCurrDir) {
while (it_entry != parEnd and not it_entry->is_directory and it_entry->level == curr_entry_it->level + 1 and PathName(it_entry->abs_path).pop_right() == parCurrDir) {
assert(not it_entry->is_directory);
#if defined(INDEXER_VERBOSE)
std::cout << "Hashing file " << it_entry->path << "...";
std::cout << "Hashing file " << it_entry->abs_path << "...";
#endif
parNextItemCallback(it_entry - parBegin);
try {
tiger_file(it_entry->path, it_entry->hash, curr_entry_it->hash, it_entry->size);
tiger_file(it_entry->abs_path, it_entry->hash, curr_entry_it->hash, it_entry->size);
it_entry->hash_valid = true;
it_entry->mime_full = parMime.analyze(it_entry->path);
it_entry->mime_full = parMime.analyze(it_entry->abs_path);
auto mime_pair = split_mime(it_entry->mime_full);
it_entry->mime_type = mime_pair.first;
it_entry->mime_charset = mime_pair.second;
@ -158,7 +158,7 @@ namespace mchlib {
#endif
curr_entry_it->hash_valid = true;
{
curr_entry_it->mime_full = parMime.analyze(curr_entry_it->path);
curr_entry_it->mime_full = parMime.analyze(curr_entry_it->abs_path);
auto mime_pair = split_mime(curr_entry_it->mime_full);
curr_entry_it->mime_type = mime_pair.first;
curr_entry_it->mime_charset = mime_pair.second;
@ -187,7 +187,7 @@ namespace mchlib {
return
(l.level < r.level)
or (l.level == r.level and l.is_directory and not r.is_directory)
or (l.level == r.level and l.is_directory == r.is_directory and l.path < r.path)
or (l.level == r.level and l.is_directory == r.is_directory and l.abs_path < r.abs_path)
//sort by directory - parent first, children later
//(level == o.level and is_dir and not o.is_dir)
@ -198,6 +198,15 @@ namespace mchlib {
//or (level == o.level + 1 and not (o.is_dir and not is_dir and o.path == PathName(path).dirname()))
;
}
void populate_rel_paths (const PathName& parBase, std::vector<FileRecordData>& parItems) {
const std::size_t offset = parBase.str_path_size();
for (FileRecordData& itm : parItems) {
assert(itm.abs_path.size() >= offset);
itm.path = boost::string_ref(itm.abs_path).substr(offset);
assert(itm.path.data());
}
}
} //unnamed namespace
struct Indexer::LocalData {
@ -249,7 +258,7 @@ namespace mchlib {
#endif
void Indexer::calculate_hash() {
PathName base_path(m_local_data->paths.front().path);
PathName base_path(m_local_data->paths.front().abs_path);
std::sort(m_local_data->paths.begin(), m_local_data->paths.end(), &file_record_data_lt);
MimeType mime;
@ -263,7 +272,7 @@ namespace mchlib {
std::cout << "(D) ";
else
std::cout << "(F) ";
std::cout << itm.path << " (" << itm.level << ")\n";
std::cout << itm.abs_path << " (" << itm.level << ")\n";
}
std::cout << "-----------------------------------------------------\n";
#endif
@ -297,6 +306,8 @@ namespace mchlib {
);
#endif
populate_rel_paths(base_path, m_local_data->paths);
#if defined(INDEXER_VERBOSE)
for (const auto& itm : m_local_data->paths) {
assert(not (1 == itm.hash.part_a and 1 == itm.hash.part_b and 1 == itm.hash.part_c));
@ -315,19 +326,19 @@ namespace mchlib {
#if defined(INDEXER_VERBOSE)
void Indexer::dump() const {
PathName base_path(m_local_data->paths.front().path);
PathName base_path(m_local_data->paths.front().abs_path);
std::cout << "---------------- FILE LIST ----------------\n";
for (const auto& cur_itm : m_local_data->paths) {
if (not cur_itm.is_directory) {
PathName cur_path(cur_itm.path);
PathName cur_path(cur_itm.abs_path);
std::cout << make_relative_path(base_path, cur_path).path() << '\n';
}
}
std::cout << "---------------- DIRECTORY LIST ----------------\n";
for (const auto& cur_itm : m_local_data->paths) {
if (cur_itm.is_directory) {
PathName cur_path(cur_itm.path);
PathName cur_path(cur_itm.abs_path);
std::cout << make_relative_path(base_path, cur_path).path() << '\n';
}
}
@ -349,8 +360,8 @@ namespace mchlib {
if (m_local_data->paths.empty() or 0 == m_local_data->processing_index)
return std::string();
PathName base_path(m_local_data->paths.front().path);
PathName ret_path(m_local_data->paths[m_local_data->processing_index].path);
PathName base_path(m_local_data->paths.front().abs_path);
PathName ret_path(m_local_data->paths[m_local_data->processing_index].abs_path);
return make_relative_path(base_path, ret_path).path();
}
#endif
@ -365,7 +376,7 @@ namespace mchlib {
auto it = boost::make_filter_iterator<IsFile<>>(m_local_data->paths.begin(), m_local_data->paths.end());
assert(not m_local_data->paths.empty());
std::advance(it, parIndex);
return make_relative_path(PathName(m_local_data->paths.front().path), PathName(it->path)).path();
return make_relative_path(PathName(m_local_data->paths.front().abs_path), PathName(it->abs_path)).path();
}
void Indexer::ignore_read_errors (bool parIgnore) {

View file

@ -49,13 +49,18 @@ namespace din {
}
auto row = resultset[0];
parItem.path = row["path"];
parItem.abs_path = row["path"];
parItem.hash = parHash;
parItem.level = lexical_cast<uint16_t>(row["level"]);
parItem.size = lexical_cast<uint64_t>(row["size"]);
parItem.is_directory = (row["is_directory"] == "t" ? true : false);
parItem.is_symlink = (row["is_symlink"] == "t" ? true : false);
group_id = lexical_cast<uint32_t>(row["group_id"]);
if (parItem.abs_path.size() != 1 or parItem.abs_path != "/") {
parItem.abs_path = std::string("/") + parItem.abs_path;
}
parItem.path = boost::string_ref(parItem.abs_path).substr(1);
}
{
@ -101,6 +106,12 @@ namespace din {
new_group_id = lexical_cast<uint32_t>(id_res[0][0]);
}
//TODO: remove this empty_path part. This is a temporary fix needed to
//work around a bug in libpqtypes for which empty paths are inserted
//as null values in the db.
const char* empty_path = "/";
const auto empty_path_string = boost::string_ref(empty_path);
for (std::size_t z = 0; z < parData.size(); ++z) {
const std::string query = "INSERT INTO \"files\" (path, hash, "
"level, group_id, is_directory, is_symlink, size, "
@ -109,8 +120,9 @@ namespace din {
"($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13);";
const auto& itm = parData[z];
assert(itm.path.data());
conn.query(query,
itm.path,
(itm.path.empty() ? empty_path_string : itm.path),
tiger_to_string(itm.hash),
itm.level,
new_group_id,