mirror of
https://github.com/KingDuckZ/dindexer.git
synced 2024-12-03 02:15:41 +00:00
Restore DB writing functionality.
This ends the scantask refactoring. It is now possible to scan directories using the new system. Progress feedback is made available again, although it could use some improvements.
This commit is contained in:
parent
43c8024b0c
commit
34ead94c8d
3 changed files with 32 additions and 765 deletions
|
@ -1,77 +0,0 @@
|
||||||
/* Copyright 2015, 2016, Michele Santullo
|
|
||||||
* This file is part of "dindexer".
|
|
||||||
*
|
|
||||||
* "dindexer" is free software: you can redistribute it and/or modify
|
|
||||||
* it under the terms of the GNU General Public License as published by
|
|
||||||
* the Free Software Foundation, either version 3 of the License, or
|
|
||||||
* (at your option) any later version.
|
|
||||||
*
|
|
||||||
* "dindexer" is distributed in the hope that it will be useful,
|
|
||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
* GNU General Public License for more details.
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU General Public License
|
|
||||||
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef idE555EF56730442C1ADDC7B2AE7A9340E
|
|
||||||
#define idE555EF56730442C1ADDC7B2AE7A9340E
|
|
||||||
|
|
||||||
#include <memory>
|
|
||||||
#include <string>
|
|
||||||
#include <vector>
|
|
||||||
|
|
||||||
#if !defined(NDEBUG)
|
|
||||||
# define INDEXER_VERBOSE
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if defined(WITH_PROGRESS_FEEDBACK)
|
|
||||||
namespace std {
|
|
||||||
class condition_variable;
|
|
||||||
} //namespace std
|
|
||||||
#endif
|
|
||||||
|
|
||||||
namespace fastf {
|
|
||||||
struct FileStats;
|
|
||||||
} //namespace fastf
|
|
||||||
|
|
||||||
namespace dinlib {
|
|
||||||
struct Settings;
|
|
||||||
} //namespace dinlib
|
|
||||||
|
|
||||||
namespace mchlib {
|
|
||||||
struct FileRecordData;
|
|
||||||
|
|
||||||
class Indexer {
|
|
||||||
public:
|
|
||||||
Indexer ( void );
|
|
||||||
Indexer ( Indexer&& ) = default;
|
|
||||||
Indexer ( const Indexer& ) = delete;
|
|
||||||
~Indexer ( void ) noexcept;
|
|
||||||
|
|
||||||
bool add_path ( const char* parPath, const fastf::FileStats& parStats );
|
|
||||||
#if defined(INDEXER_VERBOSE)
|
|
||||||
void dump ( void ) const;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
std::size_t total_items ( void ) const;
|
|
||||||
std::string operator[] ( std::size_t parIndex ) const;
|
|
||||||
#if defined(WITH_PROGRESS_FEEDBACK)
|
|
||||||
std::size_t processed_items ( void ) const;
|
|
||||||
std::string current_item ( void ) const;
|
|
||||||
std::condition_variable& step_notify ( void );
|
|
||||||
#endif
|
|
||||||
void calculate_hash ( void );
|
|
||||||
bool empty ( void ) const;
|
|
||||||
void ignore_read_errors ( bool parIgnore );
|
|
||||||
const std::vector<FileRecordData>& record_data ( void ) const;
|
|
||||||
|
|
||||||
private:
|
|
||||||
struct LocalData;
|
|
||||||
|
|
||||||
std::unique_ptr<LocalData> m_local_data;
|
|
||||||
};
|
|
||||||
} //namespace mchlib
|
|
||||||
|
|
||||||
#endif
|
|
|
@ -1,526 +0,0 @@
|
||||||
/* Copyright 2015, 2016, Michele Santullo
|
|
||||||
* This file is part of "dindexer".
|
|
||||||
*
|
|
||||||
* "dindexer" is free software: you can redistribute it and/or modify
|
|
||||||
* it under the terms of the GNU General Public License as published by
|
|
||||||
* the Free Software Foundation, either version 3 of the License, or
|
|
||||||
* (at your option) any later version.
|
|
||||||
*
|
|
||||||
* "dindexer" is distributed in the hope that it will be useful,
|
|
||||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
* GNU General Public License for more details.
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU General Public License
|
|
||||||
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
|
|
||||||
*/
|
|
||||||
|
|
||||||
//WARNING: buggy code - intermediate hash for directories that contain files
|
|
||||||
//is likely wrong!
|
|
||||||
//#define USE_LEGACY_HASH_DIR
|
|
||||||
|
|
||||||
#include "dindexer-machinery/indexer.hpp"
|
|
||||||
#include "pathname.hpp"
|
|
||||||
#include "dindexer-machinery/tiger.hpp"
|
|
||||||
#include "dindexer-common/settings.hpp"
|
|
||||||
#include "dindexer-machinery/filestats.hpp"
|
|
||||||
#include "mimetype.hpp"
|
|
||||||
#include "dindexer-machinery/recorddata.hpp"
|
|
||||||
#if !defined(USE_LEGACY_HASH_DIR)
|
|
||||||
# include "dindexer-machinery/set_listing.hpp"
|
|
||||||
#endif
|
|
||||||
#include <algorithm>
|
|
||||||
#include <functional>
|
|
||||||
#include <stdexcept>
|
|
||||||
#if defined(WITH_PROGRESS_FEEDBACK)
|
|
||||||
# include <atomic>
|
|
||||||
# include <condition_variable>
|
|
||||||
#endif
|
|
||||||
#include <cstdint>
|
|
||||||
#include <ciso646>
|
|
||||||
#include <cassert>
|
|
||||||
#include <boost/iterator/filter_iterator.hpp>
|
|
||||||
#include <sstream>
|
|
||||||
#include <iterator>
|
|
||||||
|
|
||||||
#if defined(INDEXER_VERBOSE)
|
|
||||||
# include <iostream>
|
|
||||||
#endif
|
|
||||||
#include <boost/utility/string_ref.hpp>
|
|
||||||
#include <boost/range/empty.hpp>
|
|
||||||
|
|
||||||
namespace mchlib {
|
|
||||||
using HashType = decltype(FileRecordData::hash);
|
|
||||||
|
|
||||||
namespace {
|
|
||||||
typedef std::vector<FileRecordData>::iterator FileEntryIt;
|
|
||||||
|
|
||||||
void append_to_vec (std::vector<char>& parDest, const HashType& parHash, const std::string& parString) {
|
|
||||||
const auto old_size = parDest.size();
|
|
||||||
parDest.resize(old_size + sizeof(HashType) + parString.size());
|
|
||||||
std::copy(parHash.byte_data, parHash.byte_data + sizeof(HashType), parDest.begin() + old_size);
|
|
||||||
std::copy(parString.begin(), parString.end(), parDest.begin() + old_size + sizeof(HashType));
|
|
||||||
}
|
|
||||||
|
|
||||||
void append_to_vec (std::vector<char>& parDest, const std::string& parString) {
|
|
||||||
const auto old_size = parDest.size();
|
|
||||||
parDest.resize(old_size + parString.size());
|
|
||||||
std::copy(parString.begin(), parString.end(), parDest.begin() + old_size);
|
|
||||||
}
|
|
||||||
|
|
||||||
#if !defined(USE_LEGACY_HASH_DIR)
|
|
||||||
void hash_dir (FileRecordData& parEntry, MutableSetListingView& parList, const PathName& parCurrDir, MimeType& parMime, bool parIgnoreErrors) {
|
|
||||||
assert(parEntry.is_directory);
|
|
||||||
|
|
||||||
parEntry.mime_full = parMime.analyze(parEntry.abs_path);
|
|
||||||
|
|
||||||
//Build a blob with the hashes and filenames of every directory that
|
|
||||||
//is a direct child of current entry
|
|
||||||
std::vector<char> dir_blob;
|
|
||||||
#if defined(INDEXER_VERBOSE)
|
|
||||||
std::cout << "Making initial hash for " << parCurrDir << "...\n";
|
|
||||||
#endif
|
|
||||||
for (auto it = parList.begin(); it != parList.end(); ++it) {
|
|
||||||
assert(parCurrDir == PathName(it->abs_path).pop_right());
|
|
||||||
|
|
||||||
PathName curr_subdir(it->abs_path);
|
|
||||||
const std::string relpath = make_relative_path(parCurrDir, curr_subdir).path();
|
|
||||||
if (it->is_directory) {
|
|
||||||
auto cd_list = MutableSetListingView(it);
|
|
||||||
assert(boost::empty(cd_list) or cd_list.begin()->abs_path != it->abs_path);
|
|
||||||
|
|
||||||
hash_dir(*it, cd_list, curr_subdir, parMime, parIgnoreErrors);
|
|
||||||
append_to_vec(dir_blob, it->hash, relpath);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
append_to_vec(dir_blob, relpath);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
tiger_data(dir_blob, parEntry.hash);
|
|
||||||
parEntry.size = 0;
|
|
||||||
|
|
||||||
#if defined(INDEXER_VERBOSE)
|
|
||||||
std::cout << "Got intermediate hash for dir " << parCurrDir <<
|
|
||||||
": " << tiger_to_string(parEntry.hash) <<
|
|
||||||
' ' << parEntry.mime_type << '\n';
|
|
||||||
#endif
|
|
||||||
|
|
||||||
//Now with the initial hash ready, let's start hashing files, if any
|
|
||||||
for (auto it = first_file(parList); it != parList.end(); ++it) {
|
|
||||||
assert(not it->is_directory);
|
|
||||||
#if defined(INDEXER_VERBOSE)
|
|
||||||
std::cout << "Hashing file " << it->abs_path << "...";
|
|
||||||
#endif
|
|
||||||
//TODO: notify callback
|
|
||||||
try {
|
|
||||||
tiger_file(it->abs_path, it->hash, parEntry.hash, it->size);
|
|
||||||
it->hash_valid = true;
|
|
||||||
it->mime_full = parMime.analyze(it->abs_path);
|
|
||||||
auto mime_pair = split_mime(it->mime_full);
|
|
||||||
it->mime_type = mime_pair.first;
|
|
||||||
it->mime_charset = mime_pair.second;
|
|
||||||
}
|
|
||||||
catch (const std::ios_base::failure& e) {
|
|
||||||
if (parIgnoreErrors) {
|
|
||||||
it->unreadable = true;
|
|
||||||
it->hash = HashType {};
|
|
||||||
if (it->mime_full.get().empty()) {
|
|
||||||
it->mime_full = "unknown";
|
|
||||||
it->mime_type = boost::string_ref(it->mime_full.get());
|
|
||||||
it->mime_charset = boost::string_ref(it->mime_full.get());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
throw e;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#if defined(INDEXER_VERBOSE)
|
|
||||||
std::cout << ' ' << tiger_to_string(it->hash) << ' ' <<
|
|
||||||
"Mime type: \"" << it->mime_type << "\"\n";
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
#if defined(INDEXER_VERBOSE)
|
|
||||||
std::cout << "Final hash for dir " << parCurrDir << " is " << tiger_to_string(parEntry.hash) << '\n';
|
|
||||||
#endif
|
|
||||||
parEntry.hash_valid = true;
|
|
||||||
{
|
|
||||||
parEntry.mime_full = parMime.analyze(parEntry.abs_path);
|
|
||||||
auto mime_pair = split_mime(parEntry.mime_full);
|
|
||||||
parEntry.mime_type = mime_pair.first;
|
|
||||||
parEntry.mime_charset = mime_pair.second;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if defined(USE_LEGACY_HASH_DIR)
|
|
||||||
void hash_dir (FileEntryIt parEntry, FileEntryIt parBegin, FileEntryIt parEnd, const PathName& parCurrDir, std::function<void(std::size_t)> parNextItemCallback, bool parIgnoreErrors, MimeType& parMime) {
|
|
||||||
assert(parEntry != parEnd);
|
|
||||||
assert(parEntry->is_directory);
|
|
||||||
FileRecordData& curr_entry = *parEntry;
|
|
||||||
auto& curr_entry_it = parEntry;
|
|
||||||
|
|
||||||
curr_entry.mime_full = parMime.analyze(curr_entry.abs_path);
|
|
||||||
|
|
||||||
//Build a blob with the hashes and filenames of every directory that
|
|
||||||
//is a direct child of current entry
|
|
||||||
{
|
|
||||||
std::vector<char> dir_blob;
|
|
||||||
auto it_entry = curr_entry_it;
|
|
||||||
|
|
||||||
while (
|
|
||||||
it_entry != parEnd and (
|
|
||||||
it_entry->level == curr_entry.level
|
|
||||||
or parCurrDir != PathName(it_entry->abs_path).pop_right()
|
|
||||||
//and (not it_entry->is_dir or (it_entry->level <= curr_entry.level
|
|
||||||
//and parCurrDir != PathName(it_entry->path).pop_right()))
|
|
||||||
)) {
|
|
||||||
assert(it_entry->level >= curr_entry.level);
|
|
||||||
++it_entry;
|
|
||||||
}
|
|
||||||
|
|
||||||
#if defined(INDEXER_VERBOSE)
|
|
||||||
std::cout << "Making initial hash for " << parCurrDir << "...\n";
|
|
||||||
#endif
|
|
||||||
while (parEnd != it_entry and it_entry->level == curr_entry_it->level + 1 and parCurrDir == PathName(it_entry->abs_path).pop_right()) {
|
|
||||||
PathName curr_subdir(it_entry->abs_path);
|
|
||||||
const std::string relpath = make_relative_path(parCurrDir, curr_subdir).path();
|
|
||||||
|
|
||||||
if (it_entry->is_directory) {
|
|
||||||
hash_dir(it_entry, parBegin, parEnd, curr_subdir, parNextItemCallback, parIgnoreErrors, parMime);
|
|
||||||
append_to_vec(dir_blob, it_entry->hash, relpath);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
append_to_vec(dir_blob, relpath);
|
|
||||||
}
|
|
||||||
++it_entry;
|
|
||||||
}
|
|
||||||
|
|
||||||
tiger_data(dir_blob, curr_entry.hash);
|
|
||||||
curr_entry.size = 0;
|
|
||||||
#if defined(INDEXER_VERBOSE)
|
|
||||||
std::cout << "Got intermediate hash for dir " << parCurrDir <<
|
|
||||||
": " << tiger_to_string(curr_entry.hash) <<
|
|
||||||
' ' << curr_entry.mime_type << '\n';
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
//Now with the initial hash ready, let's start hashing files, if any
|
|
||||||
{
|
|
||||||
auto it_entry = curr_entry_it;
|
|
||||||
while (
|
|
||||||
it_entry != parEnd
|
|
||||||
and (it_entry->is_directory
|
|
||||||
or it_entry->level != curr_entry_it->level + 1
|
|
||||||
or PathName(it_entry->abs_path).pop_right() != parCurrDir
|
|
||||||
)
|
|
||||||
) {
|
|
||||||
++it_entry;
|
|
||||||
}
|
|
||||||
|
|
||||||
while (it_entry != parEnd and not it_entry->is_directory and it_entry->level == curr_entry_it->level + 1 and PathName(it_entry->abs_path).pop_right() == parCurrDir) {
|
|
||||||
assert(not it_entry->is_directory);
|
|
||||||
#if defined(INDEXER_VERBOSE)
|
|
||||||
std::cout << "Hashing file " << it_entry->abs_path << "...";
|
|
||||||
#endif
|
|
||||||
parNextItemCallback(it_entry - parBegin);
|
|
||||||
try {
|
|
||||||
tiger_file(it_entry->abs_path, it_entry->hash, curr_entry_it->hash, it_entry->size);
|
|
||||||
it_entry->hash_valid = true;
|
|
||||||
it_entry->mime_full = parMime.analyze(it_entry->abs_path);
|
|
||||||
auto mime_pair = split_mime(it_entry->mime_full);
|
|
||||||
it_entry->mime_type = mime_pair.first;
|
|
||||||
it_entry->mime_charset = mime_pair.second;
|
|
||||||
}
|
|
||||||
catch (const std::ios_base::failure& e) {
|
|
||||||
if (parIgnoreErrors) {
|
|
||||||
it_entry->unreadable = true;
|
|
||||||
it_entry->hash = HashType {};
|
|
||||||
if (it_entry->mime_full.get().empty()) {
|
|
||||||
it_entry->mime_full = "unknown";
|
|
||||||
it_entry->mime_type = boost::string_ref(it_entry->mime_full.get());
|
|
||||||
it_entry->mime_charset = boost::string_ref(it_entry->mime_full.get());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
throw e;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#if defined(INDEXER_VERBOSE)
|
|
||||||
std::cout << ' ' << tiger_to_string(it_entry->hash) << ' ' <<
|
|
||||||
"Mime type: \"" << it_entry->mime_type << "\"\n";
|
|
||||||
#endif
|
|
||||||
++it_entry;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#if defined(INDEXER_VERBOSE)
|
|
||||||
std::cout << "Final hash for dir " << parCurrDir << " is " << tiger_to_string(curr_entry_it->hash) << '\n';
|
|
||||||
#endif
|
|
||||||
curr_entry_it->hash_valid = true;
|
|
||||||
{
|
|
||||||
curr_entry_it->mime_full = parMime.analyze(curr_entry_it->abs_path);
|
|
||||||
auto mime_pair = split_mime(curr_entry_it->mime_full);
|
|
||||||
curr_entry_it->mime_type = mime_pair.first;
|
|
||||||
curr_entry_it->mime_charset = mime_pair.second;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
template <bool FileTrue=true>
|
|
||||||
struct IsFile {
|
|
||||||
bool operator() ( const FileRecordData& parEntry ) const { return parEntry.is_directory xor FileTrue; }
|
|
||||||
};
|
|
||||||
|
|
||||||
FileRecordData make_file_record_data (const char* parPath, const fastf::FileStats& parSt) {
|
|
||||||
return FileRecordData(
|
|
||||||
parPath,
|
|
||||||
parSt.atime,
|
|
||||||
parSt.mtime,
|
|
||||||
parSt.level,
|
|
||||||
parSt.is_dir,
|
|
||||||
parSt.is_symlink
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool file_record_data_lt (const FileRecordData& parLeft, const FileRecordData& parRight) {
|
|
||||||
const FileRecordData& l = parLeft;
|
|
||||||
const FileRecordData& r = parRight;
|
|
||||||
return
|
|
||||||
(l.level < r.level)
|
|
||||||
or (l.level == r.level and l.is_directory and not r.is_directory)
|
|
||||||
or (l.level == r.level and l.is_directory == r.is_directory and l.abs_path < r.abs_path)
|
|
||||||
|
|
||||||
//sort by directory - parent first, children later
|
|
||||||
//(level == o.level and is_dir and not o.is_dir)
|
|
||||||
//or (level == o.level and is_dir == o.is_dir and path < o.path)
|
|
||||||
//or (level > o.level + 1)
|
|
||||||
//or (level + 1 == o.level and is_dir and not o.is_dir and path < o.path)
|
|
||||||
//or (level + 1 == o.level and is_dir and not o.is_dir and path == PathName(o.path).dirname())
|
|
||||||
//or (level == o.level + 1 and not (o.is_dir and not is_dir and o.path == PathName(path).dirname()))
|
|
||||||
;
|
|
||||||
}
|
|
||||||
|
|
||||||
void populate_rel_paths (const PathName& parBase, std::vector<FileRecordData>& parItems) {
|
|
||||||
const std::size_t offset = parBase.str_path_size() + 1;
|
|
||||||
for (FileRecordData& itm : parItems) {
|
|
||||||
const auto curr_offset = std::min(offset, itm.abs_path.size());
|
|
||||||
itm.path = boost::string_ref(itm.abs_path).substr(curr_offset);
|
|
||||||
assert(itm.path.data());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} //unnamed namespace
|
|
||||||
|
|
||||||
struct Indexer::LocalData {
|
|
||||||
typedef std::vector<FileRecordData> PathList;
|
|
||||||
|
|
||||||
PathList paths;
|
|
||||||
#if defined(WITH_PROGRESS_FEEDBACK)
|
|
||||||
std::atomic<std::size_t> done_count;
|
|
||||||
std::atomic<std::size_t> processing_index;
|
|
||||||
std::condition_variable step_notify;
|
|
||||||
#endif
|
|
||||||
std::size_t file_count;
|
|
||||||
bool ignore_read_errors;
|
|
||||||
};
|
|
||||||
|
|
||||||
Indexer::Indexer() :
|
|
||||||
m_local_data(new LocalData)
|
|
||||||
{
|
|
||||||
#if !defined(NDEBUG)
|
|
||||||
//assert(FileEntry("/a/b/c", 3, true, false) < FileEntry("/a/b", 2, true, false));
|
|
||||||
//assert(FileEntry("/a/b/c", 3, true, false) < FileEntry("/a/b/c/file.txt", 4, false, false));
|
|
||||||
//assert(FileEntry("/a/b/c", 3, true, false) < FileEntry("/a/b/c/file.c", 4, false, false));
|
|
||||||
//assert(FileEntry("/a/b/c/d", 4, true, false) < FileEntry("/a/b", 2, true, false));
|
|
||||||
//assert(FileEntry("/a/b/c/d", 4, true, false) < FileEntry("/a/b/c", 3, true, false));
|
|
||||||
//assert(FileEntry("/a/b/c/1.txt", 4, true, false) < FileEntry("/a/b/c/2.txt", 4, true, false));
|
|
||||||
//assert(not (FileEntry("/a/b/file.txt", 3, false, false) < FileEntry("/a/b", 2, true, false)));
|
|
||||||
//assert(not (FileEntry("/a", 1, true, false) < FileEntry("/a/b", 2, true, false)));
|
|
||||||
//assert(not (FileEntry("/a/b/1.txt", 3, false, false) < FileEntry("/a/b/c/f.txt", 4, true, false)));
|
|
||||||
//assert(not (FileEntry("/a/b/c/file.c", 4, false, false) < FileEntry("/a/b/c", 3, true, false)));
|
|
||||||
#endif
|
|
||||||
#if defined(WITH_PROGRESS_FEEDBACK)
|
|
||||||
m_local_data->done_count = 0;
|
|
||||||
m_local_data->processing_index = 0;
|
|
||||||
#endif
|
|
||||||
m_local_data->file_count = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
Indexer::~Indexer() noexcept {
|
|
||||||
}
|
|
||||||
|
|
||||||
std::size_t Indexer::total_items() const {
|
|
||||||
return m_local_data->file_count;
|
|
||||||
}
|
|
||||||
|
|
||||||
#if defined(WITH_PROGRESS_FEEDBACK)
|
|
||||||
std::size_t Indexer::processed_items() const {
|
|
||||||
return m_local_data->done_count;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
void Indexer::calculate_hash() {
|
|
||||||
PathName base_path(m_local_data->paths.front().abs_path);
|
|
||||||
std::sort(m_local_data->paths.begin(), m_local_data->paths.end(), &file_record_data_lt);
|
|
||||||
MimeType mime;
|
|
||||||
|
|
||||||
#if defined(INDEXER_VERBOSE)
|
|
||||||
for (auto& itm : m_local_data->paths) {
|
|
||||||
itm.hash.part_a = 1;
|
|
||||||
itm.hash.part_b = 1;
|
|
||||||
itm.hash.part_c = 1;
|
|
||||||
|
|
||||||
if (itm.is_directory)
|
|
||||||
std::cout << "(D) ";
|
|
||||||
else
|
|
||||||
std::cout << "(F) ";
|
|
||||||
std::cout << itm.abs_path << " (" << itm.level << ")\n";
|
|
||||||
}
|
|
||||||
std::cout << "-----------------------------------------------------\n";
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if !defined(USE_LEGACY_HASH_DIR)
|
|
||||||
MutableSetListingView recordlist(m_local_data->paths.begin(), m_local_data->paths.end(), base_path.atom_count());
|
|
||||||
#endif
|
|
||||||
#if defined(WITH_PROGRESS_FEEDBACK)
|
|
||||||
m_local_data->done_count = 0;
|
|
||||||
hash_dir(
|
|
||||||
#if defined(USE_LEGACY_HASH_DIR)
|
|
||||||
m_local_data->paths.begin(),
|
|
||||||
m_local_data->paths.begin(),
|
|
||||||
m_local_data->paths.end(),
|
|
||||||
base_path,
|
|
||||||
[=](std::size_t parNext) {
|
|
||||||
++m_local_data->done_count;
|
|
||||||
m_local_data->processing_index = parNext;
|
|
||||||
m_local_data->step_notify.notify_all();
|
|
||||||
},
|
|
||||||
m_local_data->ignore_read_errors,
|
|
||||||
mime
|
|
||||||
#else
|
|
||||||
m_local_data->paths.front(),
|
|
||||||
recordlist,
|
|
||||||
base_path,
|
|
||||||
mime,
|
|
||||||
m_local_data->ignore_read_errors
|
|
||||||
#endif
|
|
||||||
);
|
|
||||||
|
|
||||||
//TODO: re-enable after hash_dir sends progress notifications again
|
|
||||||
//assert(m_local_data->done_count == m_local_data->file_count);
|
|
||||||
#else
|
|
||||||
hash_dir(
|
|
||||||
#if defined(USE_LEGACY_HASH_DIR)
|
|
||||||
m_local_data->paths.begin(),
|
|
||||||
m_local_data->paths.begin(),
|
|
||||||
m_local_data->paths.end(),
|
|
||||||
base_path,
|
|
||||||
[](std::size_t) {},
|
|
||||||
m_local_data->ignore_read_errors,
|
|
||||||
mime
|
|
||||||
#else
|
|
||||||
m_local_data->paths.front(),
|
|
||||||
recordlist,
|
|
||||||
base_path,
|
|
||||||
mime,
|
|
||||||
m_local_data->ignore_read_errors
|
|
||||||
#endif
|
|
||||||
);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
populate_rel_paths(base_path, m_local_data->paths);
|
|
||||||
|
|
||||||
#if defined(INDEXER_VERBOSE)
|
|
||||||
for (const auto& itm : m_local_data->paths) {
|
|
||||||
assert(not (1 == itm.hash.part_a and 1 == itm.hash.part_b and 1 == itm.hash.part_c));
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
bool Indexer::add_path (const char* parPath, const fastf::FileStats& parStats) {
|
|
||||||
auto it_before = SetListing::lower_bound(
|
|
||||||
m_local_data->paths,
|
|
||||||
parPath,
|
|
||||||
parStats.level,
|
|
||||||
parStats.is_dir
|
|
||||||
);
|
|
||||||
|
|
||||||
m_local_data->paths.insert(
|
|
||||||
it_before,
|
|
||||||
make_file_record_data(parPath, parStats)
|
|
||||||
);
|
|
||||||
if (not parStats.is_dir) {
|
|
||||||
++m_local_data->file_count;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
#if defined(INDEXER_VERBOSE)
|
|
||||||
void Indexer::dump() const {
|
|
||||||
PathName base_path(m_local_data->paths.front().abs_path);
|
|
||||||
|
|
||||||
std::cout << "---------------- FILE LIST ----------------\n";
|
|
||||||
for (const auto& cur_itm : m_local_data->paths) {
|
|
||||||
if (not cur_itm.is_directory) {
|
|
||||||
PathName cur_path(cur_itm.abs_path);
|
|
||||||
std::cout << make_relative_path(base_path, cur_path).path() << '\n';
|
|
||||||
}
|
|
||||||
}
|
|
||||||
std::cout << "---------------- DIRECTORY LIST ----------------\n";
|
|
||||||
for (const auto& cur_itm : m_local_data->paths) {
|
|
||||||
if (cur_itm.is_directory) {
|
|
||||||
PathName cur_path(cur_itm.abs_path);
|
|
||||||
std::cout << make_relative_path(base_path, cur_path).path() << '\n';
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
bool Indexer::empty() const {
|
|
||||||
return m_local_data->paths.size() < 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
#if defined(WITH_PROGRESS_FEEDBACK)
|
|
||||||
std::condition_variable& Indexer::step_notify() {
|
|
||||||
return m_local_data->step_notify;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if defined(WITH_PROGRESS_FEEDBACK)
|
|
||||||
std::string Indexer::current_item() const {
|
|
||||||
if (m_local_data->paths.empty() or 0 == m_local_data->processing_index)
|
|
||||||
return std::string();
|
|
||||||
|
|
||||||
PathName base_path(m_local_data->paths.front().abs_path);
|
|
||||||
PathName ret_path(m_local_data->paths[m_local_data->processing_index].abs_path);
|
|
||||||
return make_relative_path(base_path, ret_path).path();
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
std::string Indexer::operator[] (std::size_t parIndex) const {
|
|
||||||
if (parIndex >= m_local_data->file_count) {
|
|
||||||
std::ostringstream oss;
|
|
||||||
oss << "Requested index " << parIndex << " is out of range: only " << m_local_data->file_count << " items are available";
|
|
||||||
throw std::out_of_range(oss.str());
|
|
||||||
}
|
|
||||||
|
|
||||||
auto it = boost::make_filter_iterator<IsFile<>>(m_local_data->paths.begin(), m_local_data->paths.end());
|
|
||||||
assert(not m_local_data->paths.empty());
|
|
||||||
std::advance(it, parIndex);
|
|
||||||
return make_relative_path(PathName(m_local_data->paths.front().abs_path), PathName(it->abs_path)).path();
|
|
||||||
}
|
|
||||||
|
|
||||||
void Indexer::ignore_read_errors (bool parIgnore) {
|
|
||||||
m_local_data->ignore_read_errors = parIgnore;
|
|
||||||
}
|
|
||||||
|
|
||||||
const std::vector<FileRecordData>& Indexer::record_data() const {
|
|
||||||
#if defined(WITH_PROGRESS_FEEDBACK)
|
|
||||||
//TODO: re-enable after hash_dir sends progress notifications again
|
|
||||||
//assert(m_local_data->done_count == m_local_data->file_count);
|
|
||||||
#endif
|
|
||||||
return m_local_data->paths;
|
|
||||||
}
|
|
||||||
} //namespace mchlib
|
|
|
@ -36,18 +36,9 @@
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <iomanip>
|
#include <iomanip>
|
||||||
#include <ciso646>
|
#include <ciso646>
|
||||||
#include <sstream>
|
|
||||||
#include <algorithm>
|
|
||||||
#include <iterator>
|
|
||||||
#if defined(WITH_PROGRESS_FEEDBACK)
|
|
||||||
# include <thread>
|
|
||||||
# include <mutex>
|
|
||||||
# include <condition_variable>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
// void run_hash_calculation ( mchlib::Indexer& parIndexer, bool parShowProgress );
|
bool add_to_db ( const std::vector<mchlib::FileRecordData>& parData, const mchlib::SetRecordDataFull& parSet, const dinlib::SettingsDB& parDBSettings, bool parForce=false );
|
||||||
// bool add_to_db ( const std::vector<mchlib::FileRecordData>& parData, const std::string& parSetName, char parType, char parContent, const dinlib::SettingsDB& parDBSettings, bool parForce=false );
|
|
||||||
#if defined(WITH_PROGRESS_FEEDBACK)
|
#if defined(WITH_PROGRESS_FEEDBACK)
|
||||||
void print_progress ( const boost::string_ref parPath, uint64_t parFileBytes, uint64_t parTotalBytes, uint32_t parFileNum );
|
void print_progress ( const boost::string_ref parPath, uint64_t parFileBytes, uint64_t parTotalBytes, uint32_t parFileNum );
|
||||||
#endif
|
#endif
|
||||||
|
@ -73,9 +64,9 @@ int main (int parArgc, char* parArgv[]) {
|
||||||
return 2;
|
return 2;
|
||||||
}
|
}
|
||||||
#if defined(WITH_PROGRESS_FEEDBACK)
|
#if defined(WITH_PROGRESS_FEEDBACK)
|
||||||
const bool verbose = (0 == vm.count("quiet"));
|
//const bool verbose = (0 == vm.count("quiet"));
|
||||||
#else
|
#else
|
||||||
const bool verbose = false;
|
//const bool verbose = false;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
dinlib::Settings settings;
|
dinlib::Settings settings;
|
||||||
|
@ -87,11 +78,14 @@ int main (int parArgc, char* parArgv[]) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool ignore_read_errors = (vm.count("ignore-errors") > 0);
|
||||||
const std::string search_path(vm["search-path"].as<std::string>());
|
const std::string search_path(vm["search-path"].as<std::string>());
|
||||||
|
const char def_media_type = (vm.count("type") ? vm["type"].as<char>() : 'O');
|
||||||
|
|
||||||
std::shared_ptr<stask::SetBasic> setbasic(new stask::SetBasic(std::string(vm["setname"].as<std::string>())));
|
std::shared_ptr<stask::SetBasic> setbasic(new stask::SetBasic(std::string(vm["setname"].as<std::string>())));
|
||||||
std::shared_ptr<stask::DirTree> scan_dirtree(new stask::DirTree(search_path));
|
std::shared_ptr<stask::DirTree> scan_dirtree(new stask::DirTree(search_path));
|
||||||
std::shared_ptr<stask::MediaType> media_type(new stask::MediaType(setbasic, (vm.count("type") ? vm["type"].as<char>() : 'O'), vm.count("type"), search_path));
|
std::shared_ptr<stask::MediaType> media_type(new stask::MediaType(setbasic, def_media_type, vm.count("type"), search_path));
|
||||||
std::shared_ptr<stask::Hashing> hashing(new stask::Hashing(scan_dirtree, true));
|
std::shared_ptr<stask::Hashing> hashing(new stask::Hashing(scan_dirtree, ignore_read_errors));
|
||||||
std::shared_ptr<stask::ContentType> content_type(new stask::ContentType(setbasic, scan_dirtree, media_type));
|
std::shared_ptr<stask::ContentType> content_type(new stask::ContentType(setbasic, scan_dirtree, media_type));
|
||||||
std::shared_ptr<stask::Mime> mime(new stask::Mime(scan_dirtree));
|
std::shared_ptr<stask::Mime> mime(new stask::Mime(scan_dirtree));
|
||||||
std::shared_ptr<FileRecordDataFiller> filerecdata(new FileRecordDataFiller(mime, hashing));
|
std::shared_ptr<FileRecordDataFiller> filerecdata(new FileRecordDataFiller(mime, hashing));
|
||||||
|
@ -101,159 +95,35 @@ int main (int parArgc, char* parArgv[]) {
|
||||||
hashing->set_progress_callback(&print_progress);
|
hashing->set_progress_callback(&print_progress);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
std::cout << "Content type: " << setrecdata->get_or_create().type << std::endl;
|
if (not add_to_db(filerecdata->get_or_create(), setrecdata->get_or_create(), settings.db)) {
|
||||||
|
std::cerr << "Not written to DB, likely because a set with the same hash already exists\n";
|
||||||
const auto& hashes = filerecdata->get_or_create();
|
|
||||||
std::cout << std::endl;
|
|
||||||
for (const auto& hash : hashes) {
|
|
||||||
std::cout << '"' << hash.path <<
|
|
||||||
"\" -> " << mchlib::tiger_to_string(hash.hash) <<
|
|
||||||
" size: " << hash.size <<
|
|
||||||
" mime: " << hash.mime_type << "\n";
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
#if defined(WITH_MEDIA_AUTODETECT)
|
|
||||||
//char set_type;
|
|
||||||
//if (0 == vm.count("type")) {
|
|
||||||
// std::cout << "Analyzing disc... ";
|
|
||||||
// try {
|
|
||||||
// const auto guessed_type = mchlib::guess_media_type(std::string(search_path));
|
|
||||||
// set_type = guessed_type;
|
|
||||||
// std::cout << "Setting type to " << set_type << " ("
|
|
||||||
// << dinlib::media_type_to_str(guessed_type) << ")\n";
|
|
||||||
// }
|
|
||||||
// catch (const std::runtime_error& e) {
|
|
||||||
// std::cout << '\n';
|
|
||||||
// std::cerr << e.what();
|
|
||||||
// return 1;
|
|
||||||
// }
|
|
||||||
//}
|
|
||||||
//else {
|
|
||||||
// set_type = vm["type"].as<char>();
|
|
||||||
//}
|
|
||||||
//std::unique_ptr<mchlib::scantask::MediaAutodetect> media_autodetector(
|
|
||||||
//new mchlib::scantask::MediaAutodetect(vm["type"].as<char>())
|
|
||||||
//);
|
|
||||||
#else
|
|
||||||
const char set_type = vm["type"].as<char>();
|
|
||||||
#endif
|
|
||||||
|
|
||||||
std::cout << "constructing...\n";
|
|
||||||
|
|
||||||
//indexer.ignore_read_errors(vm.count("ignore-errors") > 0);
|
|
||||||
//if (verbose) {
|
|
||||||
// std::cout << "Fetching items list...\n";
|
|
||||||
//}
|
|
||||||
|
|
||||||
//if (indexer.empty()) {
|
|
||||||
// std::cerr << "Nothing found at the given location, quitting\n";
|
|
||||||
// return 1;
|
|
||||||
//}
|
|
||||||
//else {
|
|
||||||
{
|
|
||||||
//run_hash_calculation(indexer, verbose);
|
|
||||||
|
|
||||||
//TODO: guess_content_type() relies on FileRecordData::path being set to
|
|
||||||
//the relative path already. Unfortunately at this point it just got
|
|
||||||
//default-initialized to be the same as abs_path, so for a video DVD, for
|
|
||||||
//example, it's going to be like "/mnt/cdrom/VIDEO_TS" instead of just
|
|
||||||
//"VIDEO_TS". This will cause guess_content_type() to miss. Relative
|
|
||||||
//paths are populated at the end of calculate_hash(), so until I come up
|
|
||||||
//with a better system I'm just moving content detection to after hash
|
|
||||||
//calculation.
|
|
||||||
//const auto set_type_casted = dinlib::char_to_media_type(set_type);
|
|
||||||
//const mchlib::ContentTypes content = mchlib::guess_content_type(set_type_casted, indexer.record_data());
|
|
||||||
//const char content_type = mchlib::content_type_to_char(content);
|
|
||||||
|
|
||||||
if (verbose) {
|
|
||||||
std::cout << "Writing to database...\n";
|
|
||||||
}
|
|
||||||
//if (not add_to_db(indexer.record_data(), vm["setname"].as<std::string>(), set_type, content_type, settings.db)) {
|
|
||||||
// std::cerr << "Not written to DB, likely because a set with the same hash already exists\n";
|
|
||||||
//}
|
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
// void run_hash_calculation (mchlib::Indexer& parIndexer, bool parShowProgress) {
|
bool add_to_db (const std::vector<mchlib::FileRecordData>& parData, const mchlib::SetRecordDataFull& parSet, const dinlib::SettingsDB& parDBSettings, bool parForce) {
|
||||||
// if (parIndexer.empty()) {
|
using mchlib::FileRecordData;
|
||||||
// return;
|
using mchlib::SetRecordDataFull;
|
||||||
// }
|
using mchlib::SetRecordData;
|
||||||
//
|
|
||||||
//#if !defined(WITH_PROGRESS_FEEDBACK)
|
if (not parForce) {
|
||||||
// parShowProgress = false;
|
const auto& first_hash = parData.front().hash;
|
||||||
//#endif
|
FileRecordData itm;
|
||||||
// if (not parShowProgress) {
|
SetRecordDataFull set;
|
||||||
////Hashing file /mnt/cdrom/Sacred 2/Fallen Angel/UK/Sacred.2.Fallen.Angel-ArenaBG/DISC2/S2DISC2.md1... 512c201321ed01cc2a82c9f80bfeaaa673bc8eb3cea4e5c1
|
const bool already_in_db = din::read_from_db(itm, set, parDBSettings, first_hash);
|
||||||
////terminate called after throwing an instance of 'std::ios_base::failure'
|
if (already_in_db) {
|
||||||
////what(): basic_filebuf::xsgetn error reading the file
|
return false;
|
||||||
////Hashing file /mnt/cdrom/Sacred 2/Fallen Angel/UK/Sacred.2.Fallen.Angel-ArenaBG/DISC2/S2DISC2.mdf...Annullato
|
}
|
||||||
// parIndexer.calculate_hash();
|
}
|
||||||
// }
|
|
||||||
//#if defined(WITH_PROGRESS_FEEDBACK)
|
const SetRecordData& set_data {parSet.name, parSet.type, parSet.content_type };
|
||||||
// else {
|
const auto app_signature = dinlib::dindexer_signature();
|
||||||
// typedef std::ostream_iterator<char> cout_iterator;
|
const auto lib_signature = mchlib::lib_signature();
|
||||||
//
|
const std::string signature = std::string(app_signature.data(), app_signature.size()) + "/" + std::string(lib_signature.data(), lib_signature.size());
|
||||||
// std::cout << "Processing";
|
din::write_to_db(parDBSettings, parData, set_data, signature);
|
||||||
// std::cout.flush();
|
return true;
|
||||||
// const auto total_items = parIndexer.total_items();
|
}
|
||||||
// std::thread hash_thread(&mchlib::Indexer::calculate_hash, &parIndexer);
|
|
||||||
// std::mutex progress_print;
|
|
||||||
// std::size_t clear_size = 0;
|
|
||||||
// const auto digit_count = static_cast<std::size_t>(std::log10(static_cast<double>(total_items))) + 1;
|
|
||||||
// do {
|
|
||||||
// //TODO: fix this steaming pile of crap
|
|
||||||
// //std::unique_lock<std::mutex> lk(progress_print);
|
|
||||||
// //parIndexer.step_notify().wait(lk);
|
|
||||||
// std::cout << '\r';
|
|
||||||
// std::fill_n(cout_iterator(std::cout), clear_size, ' ');
|
|
||||||
// std::cout << '\r';
|
|
||||||
// {
|
|
||||||
// std::ostringstream oss;
|
|
||||||
// const auto item_index = std::min(total_items - 1, parIndexer.processed_items());
|
|
||||||
// oss << "Processing file "
|
|
||||||
// << std::setw(digit_count) << std::setfill(' ') << (item_index + 1)
|
|
||||||
// << " of " << total_items << " \"" << parIndexer.current_item() << '"';
|
|
||||||
// const auto msg(oss.str());
|
|
||||||
// clear_size = msg.size();
|
|
||||||
// std::cout << msg;
|
|
||||||
// std::cout.flush();
|
|
||||||
// }
|
|
||||||
// } while (false); //parIndexer.processed_items() != total_items);
|
|
||||||
//
|
|
||||||
// hash_thread.join();
|
|
||||||
// if (parIndexer.processed_items() > 0) {
|
|
||||||
// std::cout << '\n';
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
//#endif
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// bool add_to_db (const std::vector<mchlib::FileRecordData>& parData, const std::string& parSetName, char parType, char parContentType, const dinlib::SettingsDB& parDBSettings, bool parForce) {
|
|
||||||
// using mchlib::FileRecordData;
|
|
||||||
// using mchlib::SetRecordDataFull;
|
|
||||||
// using mchlib::SetRecordData;
|
|
||||||
//
|
|
||||||
// if (not parForce) {
|
|
||||||
// const auto& first_hash = parData.front().hash;
|
|
||||||
// FileRecordData itm;
|
|
||||||
// SetRecordDataFull set;
|
|
||||||
// const bool already_in_db = din::read_from_db(itm, set, parDBSettings, first_hash);
|
|
||||||
// if (already_in_db) {
|
|
||||||
// return false;
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// SetRecordData set_data {parSetName, parType, parContentType };
|
|
||||||
// const auto app_signature = dinlib::dindexer_signature();
|
|
||||||
// const auto lib_signature = mchlib::lib_signature();
|
|
||||||
// const std::string signature = std::string(app_signature.data(), app_signature.size()) + "/" + std::string(lib_signature.data(), lib_signature.size());
|
|
||||||
// din::write_to_db(parDBSettings, parData, set_data, signature);
|
|
||||||
// return true;
|
|
||||||
// }
|
|
||||||
|
|
||||||
#if defined(WITH_PROGRESS_FEEDBACK)
|
#if defined(WITH_PROGRESS_FEEDBACK)
|
||||||
void print_progress (const boost::string_ref parPath, uint64_t /*parFileBytes*/, uint64_t parTotalBytes, uint32_t parFileNum) {
|
void print_progress (const boost::string_ref parPath, uint64_t /*parFileBytes*/, uint64_t parTotalBytes, uint32_t parFileNum) {
|
||||||
|
|
Loading…
Reference in a new issue