mirror of
https://github.com/KingDuckZ/dindexer.git
synced 2025-08-21 15:50:50 +00:00
Bugfix - don't store absolute paths to files in the DB.
This was introduced in a previous commit.
This commit is contained in:
parent
3f48d00ad9
commit
fa12bb2d74
3 changed files with 50 additions and 25 deletions
|
@ -1,4 +1,4 @@
|
|||
/* Copyright 2015, Michele Santullo
|
||||
/* Copyright 2016, Michele Santullo
|
||||
* This file is part of "dindexer".
|
||||
*
|
||||
* "dindexer" is free software: you can redistribute it and/or modify
|
||||
|
@ -62,7 +62,7 @@ namespace mchlib {
|
|||
while (
|
||||
it_entry != parEnd and (
|
||||
it_entry->level == curr_entry.level
|
||||
or parCurrDir != PathName(it_entry->path).pop_right()
|
||||
or parCurrDir != PathName(it_entry->abs_path).pop_right()
|
||||
//and (not it_entry->is_dir or (it_entry->level <= curr_entry.level
|
||||
//and parCurrDir != PathName(it_entry->path).pop_right()))
|
||||
)) {
|
||||
|
@ -73,9 +73,9 @@ namespace mchlib {
|
|||
#if defined(INDEXER_VERBOSE)
|
||||
std::cout << "Making initial hash for " << parCurrDir << "...\n";
|
||||
#endif
|
||||
curr_entry.mime_full = parMime.analyze(it_entry->path);
|
||||
while (parEnd != it_entry and it_entry->level == curr_entry_it->level + 1 and parCurrDir == PathName(it_entry->path).pop_right()) {
|
||||
PathName curr_subdir(it_entry->path);
|
||||
curr_entry.mime_full = parMime.analyze(it_entry->abs_path);
|
||||
while (parEnd != it_entry and it_entry->level == curr_entry_it->level + 1 and parCurrDir == PathName(it_entry->abs_path).pop_right()) {
|
||||
PathName curr_subdir(it_entry->abs_path);
|
||||
if (it_entry->is_directory) {
|
||||
hash_dir(it_entry, parBegin, parEnd, curr_subdir, parNextItemCallback, parIgnoreErrors, parMime);
|
||||
|
||||
|
@ -110,22 +110,22 @@ namespace mchlib {
|
|||
it_entry != parEnd
|
||||
and (it_entry->is_directory
|
||||
or it_entry->level != curr_entry_it->level + 1
|
||||
or PathName(it_entry->path).pop_right() != parCurrDir
|
||||
or PathName(it_entry->abs_path).pop_right() != parCurrDir
|
||||
)
|
||||
) {
|
||||
++it_entry;
|
||||
}
|
||||
|
||||
while (it_entry != parEnd and not it_entry->is_directory and it_entry->level == curr_entry_it->level + 1 and PathName(it_entry->path).pop_right() == parCurrDir) {
|
||||
while (it_entry != parEnd and not it_entry->is_directory and it_entry->level == curr_entry_it->level + 1 and PathName(it_entry->abs_path).pop_right() == parCurrDir) {
|
||||
assert(not it_entry->is_directory);
|
||||
#if defined(INDEXER_VERBOSE)
|
||||
std::cout << "Hashing file " << it_entry->path << "...";
|
||||
std::cout << "Hashing file " << it_entry->abs_path << "...";
|
||||
#endif
|
||||
parNextItemCallback(it_entry - parBegin);
|
||||
try {
|
||||
tiger_file(it_entry->path, it_entry->hash, curr_entry_it->hash, it_entry->size);
|
||||
tiger_file(it_entry->abs_path, it_entry->hash, curr_entry_it->hash, it_entry->size);
|
||||
it_entry->hash_valid = true;
|
||||
it_entry->mime_full = parMime.analyze(it_entry->path);
|
||||
it_entry->mime_full = parMime.analyze(it_entry->abs_path);
|
||||
auto mime_pair = split_mime(it_entry->mime_full);
|
||||
it_entry->mime_type = mime_pair.first;
|
||||
it_entry->mime_charset = mime_pair.second;
|
||||
|
@ -158,7 +158,7 @@ namespace mchlib {
|
|||
#endif
|
||||
curr_entry_it->hash_valid = true;
|
||||
{
|
||||
curr_entry_it->mime_full = parMime.analyze(curr_entry_it->path);
|
||||
curr_entry_it->mime_full = parMime.analyze(curr_entry_it->abs_path);
|
||||
auto mime_pair = split_mime(curr_entry_it->mime_full);
|
||||
curr_entry_it->mime_type = mime_pair.first;
|
||||
curr_entry_it->mime_charset = mime_pair.second;
|
||||
|
@ -187,7 +187,7 @@ namespace mchlib {
|
|||
return
|
||||
(l.level < r.level)
|
||||
or (l.level == r.level and l.is_directory and not r.is_directory)
|
||||
or (l.level == r.level and l.is_directory == r.is_directory and l.path < r.path)
|
||||
or (l.level == r.level and l.is_directory == r.is_directory and l.abs_path < r.abs_path)
|
||||
|
||||
//sort by directory - parent first, children later
|
||||
//(level == o.level and is_dir and not o.is_dir)
|
||||
|
@ -198,6 +198,15 @@ namespace mchlib {
|
|||
//or (level == o.level + 1 and not (o.is_dir and not is_dir and o.path == PathName(path).dirname()))
|
||||
;
|
||||
}
|
||||
|
||||
void populate_rel_paths (const PathName& parBase, std::vector<FileRecordData>& parItems) {
|
||||
const std::size_t offset = parBase.str_path_size();
|
||||
for (FileRecordData& itm : parItems) {
|
||||
assert(itm.abs_path.size() >= offset);
|
||||
itm.path = boost::string_ref(itm.abs_path).substr(offset);
|
||||
assert(itm.path.data());
|
||||
}
|
||||
}
|
||||
} //unnamed namespace
|
||||
|
||||
struct Indexer::LocalData {
|
||||
|
@ -249,7 +258,7 @@ namespace mchlib {
|
|||
#endif
|
||||
|
||||
void Indexer::calculate_hash() {
|
||||
PathName base_path(m_local_data->paths.front().path);
|
||||
PathName base_path(m_local_data->paths.front().abs_path);
|
||||
std::sort(m_local_data->paths.begin(), m_local_data->paths.end(), &file_record_data_lt);
|
||||
MimeType mime;
|
||||
|
||||
|
@ -263,7 +272,7 @@ namespace mchlib {
|
|||
std::cout << "(D) ";
|
||||
else
|
||||
std::cout << "(F) ";
|
||||
std::cout << itm.path << " (" << itm.level << ")\n";
|
||||
std::cout << itm.abs_path << " (" << itm.level << ")\n";
|
||||
}
|
||||
std::cout << "-----------------------------------------------------\n";
|
||||
#endif
|
||||
|
@ -297,6 +306,8 @@ namespace mchlib {
|
|||
);
|
||||
#endif
|
||||
|
||||
populate_rel_paths(base_path, m_local_data->paths);
|
||||
|
||||
#if defined(INDEXER_VERBOSE)
|
||||
for (const auto& itm : m_local_data->paths) {
|
||||
assert(not (1 == itm.hash.part_a and 1 == itm.hash.part_b and 1 == itm.hash.part_c));
|
||||
|
@ -315,19 +326,19 @@ namespace mchlib {
|
|||
|
||||
#if defined(INDEXER_VERBOSE)
|
||||
void Indexer::dump() const {
|
||||
PathName base_path(m_local_data->paths.front().path);
|
||||
PathName base_path(m_local_data->paths.front().abs_path);
|
||||
|
||||
std::cout << "---------------- FILE LIST ----------------\n";
|
||||
for (const auto& cur_itm : m_local_data->paths) {
|
||||
if (not cur_itm.is_directory) {
|
||||
PathName cur_path(cur_itm.path);
|
||||
PathName cur_path(cur_itm.abs_path);
|
||||
std::cout << make_relative_path(base_path, cur_path).path() << '\n';
|
||||
}
|
||||
}
|
||||
std::cout << "---------------- DIRECTORY LIST ----------------\n";
|
||||
for (const auto& cur_itm : m_local_data->paths) {
|
||||
if (cur_itm.is_directory) {
|
||||
PathName cur_path(cur_itm.path);
|
||||
PathName cur_path(cur_itm.abs_path);
|
||||
std::cout << make_relative_path(base_path, cur_path).path() << '\n';
|
||||
}
|
||||
}
|
||||
|
@ -349,8 +360,8 @@ namespace mchlib {
|
|||
if (m_local_data->paths.empty() or 0 == m_local_data->processing_index)
|
||||
return std::string();
|
||||
|
||||
PathName base_path(m_local_data->paths.front().path);
|
||||
PathName ret_path(m_local_data->paths[m_local_data->processing_index].path);
|
||||
PathName base_path(m_local_data->paths.front().abs_path);
|
||||
PathName ret_path(m_local_data->paths[m_local_data->processing_index].abs_path);
|
||||
return make_relative_path(base_path, ret_path).path();
|
||||
}
|
||||
#endif
|
||||
|
@ -365,7 +376,7 @@ namespace mchlib {
|
|||
auto it = boost::make_filter_iterator<IsFile<>>(m_local_data->paths.begin(), m_local_data->paths.end());
|
||||
assert(not m_local_data->paths.empty());
|
||||
std::advance(it, parIndex);
|
||||
return make_relative_path(PathName(m_local_data->paths.front().path), PathName(it->path)).path();
|
||||
return make_relative_path(PathName(m_local_data->paths.front().abs_path), PathName(it->abs_path)).path();
|
||||
}
|
||||
|
||||
void Indexer::ignore_read_errors (bool parIgnore) {
|
||||
|
|
|
@ -49,13 +49,18 @@ namespace din {
|
|||
}
|
||||
|
||||
auto row = resultset[0];
|
||||
parItem.path = row["path"];
|
||||
parItem.abs_path = row["path"];
|
||||
parItem.hash = parHash;
|
||||
parItem.level = lexical_cast<uint16_t>(row["level"]);
|
||||
parItem.size = lexical_cast<uint64_t>(row["size"]);
|
||||
parItem.is_directory = (row["is_directory"] == "t" ? true : false);
|
||||
parItem.is_symlink = (row["is_symlink"] == "t" ? true : false);
|
||||
group_id = lexical_cast<uint32_t>(row["group_id"]);
|
||||
|
||||
if (parItem.abs_path.size() != 1 or parItem.abs_path != "/") {
|
||||
parItem.abs_path = std::string("/") + parItem.abs_path;
|
||||
}
|
||||
parItem.path = boost::string_ref(parItem.abs_path).substr(1);
|
||||
}
|
||||
|
||||
{
|
||||
|
@ -101,6 +106,12 @@ namespace din {
|
|||
new_group_id = lexical_cast<uint32_t>(id_res[0][0]);
|
||||
}
|
||||
|
||||
//TODO: remove this empty_path part. This is a temporary fix needed to
|
||||
//work around a bug in libpqtypes for which empty paths are inserted
|
||||
//as null values in the db.
|
||||
const char* empty_path = "/";
|
||||
const auto empty_path_string = boost::string_ref(empty_path);
|
||||
|
||||
for (std::size_t z = 0; z < parData.size(); ++z) {
|
||||
const std::string query = "INSERT INTO \"files\" (path, hash, "
|
||||
"level, group_id, is_directory, is_symlink, size, "
|
||||
|
@ -109,8 +120,9 @@ namespace din {
|
|||
"($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13);";
|
||||
|
||||
const auto& itm = parData[z];
|
||||
assert(itm.path.data());
|
||||
conn.query(query,
|
||||
itm.path,
|
||||
(itm.path.empty() ? empty_path_string : itm.path),
|
||||
tiger_to_string(itm.hash),
|
||||
itm.level,
|
||||
new_group_id,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue