1
0
Fork 0
mirror of https://github.com/KingDuckZ/dindexer.git synced 2024-12-03 02:15:41 +00:00

Restore DB writing functionality.

This ends the scantask refactoring. It is now possible to scan
directories using the new system. Progress feedback is made
available again, although it could use some improvements.
This commit is contained in:
King_DuckZ 2016-03-10 18:37:42 +01:00
parent 43c8024b0c
commit 34ead94c8d
3 changed files with 32 additions and 765 deletions

View file

@ -1,77 +0,0 @@
/* Copyright 2015, 2016, Michele Santullo
* This file is part of "dindexer".
*
* "dindexer" is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* "dindexer" is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef idE555EF56730442C1ADDC7B2AE7A9340E
#define idE555EF56730442C1ADDC7B2AE7A9340E
#include <memory>
#include <string>
#include <vector>
#if !defined(NDEBUG)
# define INDEXER_VERBOSE
#endif
#if defined(WITH_PROGRESS_FEEDBACK)
namespace std {
class condition_variable;
} //namespace std
#endif
namespace fastf {
struct FileStats;
} //namespace fastf
namespace dinlib {
struct Settings;
} //namespace dinlib
namespace mchlib {
struct FileRecordData;
class Indexer {
public:
Indexer ( void );
Indexer ( Indexer&& ) = default;
Indexer ( const Indexer& ) = delete;
~Indexer ( void ) noexcept;
bool add_path ( const char* parPath, const fastf::FileStats& parStats );
#if defined(INDEXER_VERBOSE)
void dump ( void ) const;
#endif
std::size_t total_items ( void ) const;
std::string operator[] ( std::size_t parIndex ) const;
#if defined(WITH_PROGRESS_FEEDBACK)
std::size_t processed_items ( void ) const;
std::string current_item ( void ) const;
std::condition_variable& step_notify ( void );
#endif
void calculate_hash ( void );
bool empty ( void ) const;
void ignore_read_errors ( bool parIgnore );
const std::vector<FileRecordData>& record_data ( void ) const;
private:
struct LocalData;
std::unique_ptr<LocalData> m_local_data;
};
} //namespace mchlib
#endif

View file

@ -1,526 +0,0 @@
/* Copyright 2015, 2016, Michele Santullo
* This file is part of "dindexer".
*
* "dindexer" is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* "dindexer" is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
*/
//WARNING: buggy code - intermediate hash for directories that contain files
//is likely wrong!
//#define USE_LEGACY_HASH_DIR
#include "dindexer-machinery/indexer.hpp"
#include "pathname.hpp"
#include "dindexer-machinery/tiger.hpp"
#include "dindexer-common/settings.hpp"
#include "dindexer-machinery/filestats.hpp"
#include "mimetype.hpp"
#include "dindexer-machinery/recorddata.hpp"
#if !defined(USE_LEGACY_HASH_DIR)
# include "dindexer-machinery/set_listing.hpp"
#endif
#include <algorithm>
#include <functional>
#include <stdexcept>
#if defined(WITH_PROGRESS_FEEDBACK)
# include <atomic>
# include <condition_variable>
#endif
#include <cstdint>
#include <ciso646>
#include <cassert>
#include <boost/iterator/filter_iterator.hpp>
#include <sstream>
#include <iterator>
#if defined(INDEXER_VERBOSE)
# include <iostream>
#endif
#include <boost/utility/string_ref.hpp>
#include <boost/range/empty.hpp>
namespace mchlib {
using HashType = decltype(FileRecordData::hash);
namespace {
typedef std::vector<FileRecordData>::iterator FileEntryIt;
void append_to_vec (std::vector<char>& parDest, const HashType& parHash, const std::string& parString) {
const auto old_size = parDest.size();
parDest.resize(old_size + sizeof(HashType) + parString.size());
std::copy(parHash.byte_data, parHash.byte_data + sizeof(HashType), parDest.begin() + old_size);
std::copy(parString.begin(), parString.end(), parDest.begin() + old_size + sizeof(HashType));
}
void append_to_vec (std::vector<char>& parDest, const std::string& parString) {
const auto old_size = parDest.size();
parDest.resize(old_size + parString.size());
std::copy(parString.begin(), parString.end(), parDest.begin() + old_size);
}
#if !defined(USE_LEGACY_HASH_DIR)
void hash_dir (FileRecordData& parEntry, MutableSetListingView& parList, const PathName& parCurrDir, MimeType& parMime, bool parIgnoreErrors) {
assert(parEntry.is_directory);
parEntry.mime_full = parMime.analyze(parEntry.abs_path);
//Build a blob with the hashes and filenames of every directory that
//is a direct child of current entry
std::vector<char> dir_blob;
#if defined(INDEXER_VERBOSE)
std::cout << "Making initial hash for " << parCurrDir << "...\n";
#endif
for (auto it = parList.begin(); it != parList.end(); ++it) {
assert(parCurrDir == PathName(it->abs_path).pop_right());
PathName curr_subdir(it->abs_path);
const std::string relpath = make_relative_path(parCurrDir, curr_subdir).path();
if (it->is_directory) {
auto cd_list = MutableSetListingView(it);
assert(boost::empty(cd_list) or cd_list.begin()->abs_path != it->abs_path);
hash_dir(*it, cd_list, curr_subdir, parMime, parIgnoreErrors);
append_to_vec(dir_blob, it->hash, relpath);
}
else {
append_to_vec(dir_blob, relpath);
}
}
tiger_data(dir_blob, parEntry.hash);
parEntry.size = 0;
#if defined(INDEXER_VERBOSE)
std::cout << "Got intermediate hash for dir " << parCurrDir <<
": " << tiger_to_string(parEntry.hash) <<
' ' << parEntry.mime_type << '\n';
#endif
//Now with the initial hash ready, let's start hashing files, if any
for (auto it = first_file(parList); it != parList.end(); ++it) {
assert(not it->is_directory);
#if defined(INDEXER_VERBOSE)
std::cout << "Hashing file " << it->abs_path << "...";
#endif
//TODO: notify callback
try {
tiger_file(it->abs_path, it->hash, parEntry.hash, it->size);
it->hash_valid = true;
it->mime_full = parMime.analyze(it->abs_path);
auto mime_pair = split_mime(it->mime_full);
it->mime_type = mime_pair.first;
it->mime_charset = mime_pair.second;
}
catch (const std::ios_base::failure& e) {
if (parIgnoreErrors) {
it->unreadable = true;
it->hash = HashType {};
if (it->mime_full.get().empty()) {
it->mime_full = "unknown";
it->mime_type = boost::string_ref(it->mime_full.get());
it->mime_charset = boost::string_ref(it->mime_full.get());
}
}
else {
throw e;
}
}
#if defined(INDEXER_VERBOSE)
std::cout << ' ' << tiger_to_string(it->hash) << ' ' <<
"Mime type: \"" << it->mime_type << "\"\n";
#endif
}
#if defined(INDEXER_VERBOSE)
std::cout << "Final hash for dir " << parCurrDir << " is " << tiger_to_string(parEntry.hash) << '\n';
#endif
parEntry.hash_valid = true;
{
parEntry.mime_full = parMime.analyze(parEntry.abs_path);
auto mime_pair = split_mime(parEntry.mime_full);
parEntry.mime_type = mime_pair.first;
parEntry.mime_charset = mime_pair.second;
}
}
#endif
#if defined(USE_LEGACY_HASH_DIR)
void hash_dir (FileEntryIt parEntry, FileEntryIt parBegin, FileEntryIt parEnd, const PathName& parCurrDir, std::function<void(std::size_t)> parNextItemCallback, bool parIgnoreErrors, MimeType& parMime) {
assert(parEntry != parEnd);
assert(parEntry->is_directory);
FileRecordData& curr_entry = *parEntry;
auto& curr_entry_it = parEntry;
curr_entry.mime_full = parMime.analyze(curr_entry.abs_path);
//Build a blob with the hashes and filenames of every directory that
//is a direct child of current entry
{
std::vector<char> dir_blob;
auto it_entry = curr_entry_it;
while (
it_entry != parEnd and (
it_entry->level == curr_entry.level
or parCurrDir != PathName(it_entry->abs_path).pop_right()
//and (not it_entry->is_dir or (it_entry->level <= curr_entry.level
//and parCurrDir != PathName(it_entry->path).pop_right()))
)) {
assert(it_entry->level >= curr_entry.level);
++it_entry;
}
#if defined(INDEXER_VERBOSE)
std::cout << "Making initial hash for " << parCurrDir << "...\n";
#endif
while (parEnd != it_entry and it_entry->level == curr_entry_it->level + 1 and parCurrDir == PathName(it_entry->abs_path).pop_right()) {
PathName curr_subdir(it_entry->abs_path);
const std::string relpath = make_relative_path(parCurrDir, curr_subdir).path();
if (it_entry->is_directory) {
hash_dir(it_entry, parBegin, parEnd, curr_subdir, parNextItemCallback, parIgnoreErrors, parMime);
append_to_vec(dir_blob, it_entry->hash, relpath);
}
else {
append_to_vec(dir_blob, relpath);
}
++it_entry;
}
tiger_data(dir_blob, curr_entry.hash);
curr_entry.size = 0;
#if defined(INDEXER_VERBOSE)
std::cout << "Got intermediate hash for dir " << parCurrDir <<
": " << tiger_to_string(curr_entry.hash) <<
' ' << curr_entry.mime_type << '\n';
#endif
}
//Now with the initial hash ready, let's start hashing files, if any
{
auto it_entry = curr_entry_it;
while (
it_entry != parEnd
and (it_entry->is_directory
or it_entry->level != curr_entry_it->level + 1
or PathName(it_entry->abs_path).pop_right() != parCurrDir
)
) {
++it_entry;
}
while (it_entry != parEnd and not it_entry->is_directory and it_entry->level == curr_entry_it->level + 1 and PathName(it_entry->abs_path).pop_right() == parCurrDir) {
assert(not it_entry->is_directory);
#if defined(INDEXER_VERBOSE)
std::cout << "Hashing file " << it_entry->abs_path << "...";
#endif
parNextItemCallback(it_entry - parBegin);
try {
tiger_file(it_entry->abs_path, it_entry->hash, curr_entry_it->hash, it_entry->size);
it_entry->hash_valid = true;
it_entry->mime_full = parMime.analyze(it_entry->abs_path);
auto mime_pair = split_mime(it_entry->mime_full);
it_entry->mime_type = mime_pair.first;
it_entry->mime_charset = mime_pair.second;
}
catch (const std::ios_base::failure& e) {
if (parIgnoreErrors) {
it_entry->unreadable = true;
it_entry->hash = HashType {};
if (it_entry->mime_full.get().empty()) {
it_entry->mime_full = "unknown";
it_entry->mime_type = boost::string_ref(it_entry->mime_full.get());
it_entry->mime_charset = boost::string_ref(it_entry->mime_full.get());
}
}
else {
throw e;
}
}
#if defined(INDEXER_VERBOSE)
std::cout << ' ' << tiger_to_string(it_entry->hash) << ' ' <<
"Mime type: \"" << it_entry->mime_type << "\"\n";
#endif
++it_entry;
}
}
#if defined(INDEXER_VERBOSE)
std::cout << "Final hash for dir " << parCurrDir << " is " << tiger_to_string(curr_entry_it->hash) << '\n';
#endif
curr_entry_it->hash_valid = true;
{
curr_entry_it->mime_full = parMime.analyze(curr_entry_it->abs_path);
auto mime_pair = split_mime(curr_entry_it->mime_full);
curr_entry_it->mime_type = mime_pair.first;
curr_entry_it->mime_charset = mime_pair.second;
}
}
#endif
template <bool FileTrue=true>
struct IsFile {
bool operator() ( const FileRecordData& parEntry ) const { return parEntry.is_directory xor FileTrue; }
};
FileRecordData make_file_record_data (const char* parPath, const fastf::FileStats& parSt) {
return FileRecordData(
parPath,
parSt.atime,
parSt.mtime,
parSt.level,
parSt.is_dir,
parSt.is_symlink
);
}
bool file_record_data_lt (const FileRecordData& parLeft, const FileRecordData& parRight) {
const FileRecordData& l = parLeft;
const FileRecordData& r = parRight;
return
(l.level < r.level)
or (l.level == r.level and l.is_directory and not r.is_directory)
or (l.level == r.level and l.is_directory == r.is_directory and l.abs_path < r.abs_path)
//sort by directory - parent first, children later
//(level == o.level and is_dir and not o.is_dir)
//or (level == o.level and is_dir == o.is_dir and path < o.path)
//or (level > o.level + 1)
//or (level + 1 == o.level and is_dir and not o.is_dir and path < o.path)
//or (level + 1 == o.level and is_dir and not o.is_dir and path == PathName(o.path).dirname())
//or (level == o.level + 1 and not (o.is_dir and not is_dir and o.path == PathName(path).dirname()))
;
}
void populate_rel_paths (const PathName& parBase, std::vector<FileRecordData>& parItems) {
const std::size_t offset = parBase.str_path_size() + 1;
for (FileRecordData& itm : parItems) {
const auto curr_offset = std::min(offset, itm.abs_path.size());
itm.path = boost::string_ref(itm.abs_path).substr(curr_offset);
assert(itm.path.data());
}
}
} //unnamed namespace
struct Indexer::LocalData {
typedef std::vector<FileRecordData> PathList;
PathList paths;
#if defined(WITH_PROGRESS_FEEDBACK)
std::atomic<std::size_t> done_count;
std::atomic<std::size_t> processing_index;
std::condition_variable step_notify;
#endif
std::size_t file_count;
bool ignore_read_errors;
};
Indexer::Indexer() :
m_local_data(new LocalData)
{
#if !defined(NDEBUG)
//assert(FileEntry("/a/b/c", 3, true, false) < FileEntry("/a/b", 2, true, false));
//assert(FileEntry("/a/b/c", 3, true, false) < FileEntry("/a/b/c/file.txt", 4, false, false));
//assert(FileEntry("/a/b/c", 3, true, false) < FileEntry("/a/b/c/file.c", 4, false, false));
//assert(FileEntry("/a/b/c/d", 4, true, false) < FileEntry("/a/b", 2, true, false));
//assert(FileEntry("/a/b/c/d", 4, true, false) < FileEntry("/a/b/c", 3, true, false));
//assert(FileEntry("/a/b/c/1.txt", 4, true, false) < FileEntry("/a/b/c/2.txt", 4, true, false));
//assert(not (FileEntry("/a/b/file.txt", 3, false, false) < FileEntry("/a/b", 2, true, false)));
//assert(not (FileEntry("/a", 1, true, false) < FileEntry("/a/b", 2, true, false)));
//assert(not (FileEntry("/a/b/1.txt", 3, false, false) < FileEntry("/a/b/c/f.txt", 4, true, false)));
//assert(not (FileEntry("/a/b/c/file.c", 4, false, false) < FileEntry("/a/b/c", 3, true, false)));
#endif
#if defined(WITH_PROGRESS_FEEDBACK)
m_local_data->done_count = 0;
m_local_data->processing_index = 0;
#endif
m_local_data->file_count = 0;
}
Indexer::~Indexer() noexcept {
}
std::size_t Indexer::total_items() const {
return m_local_data->file_count;
}
#if defined(WITH_PROGRESS_FEEDBACK)
std::size_t Indexer::processed_items() const {
return m_local_data->done_count;
}
#endif
void Indexer::calculate_hash() {
PathName base_path(m_local_data->paths.front().abs_path);
std::sort(m_local_data->paths.begin(), m_local_data->paths.end(), &file_record_data_lt);
MimeType mime;
#if defined(INDEXER_VERBOSE)
for (auto& itm : m_local_data->paths) {
itm.hash.part_a = 1;
itm.hash.part_b = 1;
itm.hash.part_c = 1;
if (itm.is_directory)
std::cout << "(D) ";
else
std::cout << "(F) ";
std::cout << itm.abs_path << " (" << itm.level << ")\n";
}
std::cout << "-----------------------------------------------------\n";
#endif
#if !defined(USE_LEGACY_HASH_DIR)
MutableSetListingView recordlist(m_local_data->paths.begin(), m_local_data->paths.end(), base_path.atom_count());
#endif
#if defined(WITH_PROGRESS_FEEDBACK)
m_local_data->done_count = 0;
hash_dir(
#if defined(USE_LEGACY_HASH_DIR)
m_local_data->paths.begin(),
m_local_data->paths.begin(),
m_local_data->paths.end(),
base_path,
[=](std::size_t parNext) {
++m_local_data->done_count;
m_local_data->processing_index = parNext;
m_local_data->step_notify.notify_all();
},
m_local_data->ignore_read_errors,
mime
#else
m_local_data->paths.front(),
recordlist,
base_path,
mime,
m_local_data->ignore_read_errors
#endif
);
//TODO: re-enable after hash_dir sends progress notifications again
//assert(m_local_data->done_count == m_local_data->file_count);
#else
hash_dir(
#if defined(USE_LEGACY_HASH_DIR)
m_local_data->paths.begin(),
m_local_data->paths.begin(),
m_local_data->paths.end(),
base_path,
[](std::size_t) {},
m_local_data->ignore_read_errors,
mime
#else
m_local_data->paths.front(),
recordlist,
base_path,
mime,
m_local_data->ignore_read_errors
#endif
);
#endif
populate_rel_paths(base_path, m_local_data->paths);
#if defined(INDEXER_VERBOSE)
for (const auto& itm : m_local_data->paths) {
assert(not (1 == itm.hash.part_a and 1 == itm.hash.part_b and 1 == itm.hash.part_c));
}
#endif
}
bool Indexer::add_path (const char* parPath, const fastf::FileStats& parStats) {
auto it_before = SetListing::lower_bound(
m_local_data->paths,
parPath,
parStats.level,
parStats.is_dir
);
m_local_data->paths.insert(
it_before,
make_file_record_data(parPath, parStats)
);
if (not parStats.is_dir) {
++m_local_data->file_count;
}
return true;
}
#if defined(INDEXER_VERBOSE)
void Indexer::dump() const {
PathName base_path(m_local_data->paths.front().abs_path);
std::cout << "---------------- FILE LIST ----------------\n";
for (const auto& cur_itm : m_local_data->paths) {
if (not cur_itm.is_directory) {
PathName cur_path(cur_itm.abs_path);
std::cout << make_relative_path(base_path, cur_path).path() << '\n';
}
}
std::cout << "---------------- DIRECTORY LIST ----------------\n";
for (const auto& cur_itm : m_local_data->paths) {
if (cur_itm.is_directory) {
PathName cur_path(cur_itm.abs_path);
std::cout << make_relative_path(base_path, cur_path).path() << '\n';
}
}
}
#endif
bool Indexer::empty() const {
return m_local_data->paths.size() < 2;
}
#if defined(WITH_PROGRESS_FEEDBACK)
std::condition_variable& Indexer::step_notify() {
return m_local_data->step_notify;
}
#endif
#if defined(WITH_PROGRESS_FEEDBACK)
std::string Indexer::current_item() const {
if (m_local_data->paths.empty() or 0 == m_local_data->processing_index)
return std::string();
PathName base_path(m_local_data->paths.front().abs_path);
PathName ret_path(m_local_data->paths[m_local_data->processing_index].abs_path);
return make_relative_path(base_path, ret_path).path();
}
#endif
std::string Indexer::operator[] (std::size_t parIndex) const {
if (parIndex >= m_local_data->file_count) {
std::ostringstream oss;
oss << "Requested index " << parIndex << " is out of range: only " << m_local_data->file_count << " items are available";
throw std::out_of_range(oss.str());
}
auto it = boost::make_filter_iterator<IsFile<>>(m_local_data->paths.begin(), m_local_data->paths.end());
assert(not m_local_data->paths.empty());
std::advance(it, parIndex);
return make_relative_path(PathName(m_local_data->paths.front().abs_path), PathName(it->abs_path)).path();
}
void Indexer::ignore_read_errors (bool parIgnore) {
m_local_data->ignore_read_errors = parIgnore;
}
const std::vector<FileRecordData>& Indexer::record_data() const {
#if defined(WITH_PROGRESS_FEEDBACK)
//TODO: re-enable after hash_dir sends progress notifications again
//assert(m_local_data->done_count == m_local_data->file_count);
#endif
return m_local_data->paths;
}
} //namespace mchlib

View file

@ -36,18 +36,9 @@
#include <iostream> #include <iostream>
#include <iomanip> #include <iomanip>
#include <ciso646> #include <ciso646>
#include <sstream>
#include <algorithm>
#include <iterator>
#if defined(WITH_PROGRESS_FEEDBACK)
# include <thread>
# include <mutex>
# include <condition_variable>
#endif
namespace { namespace {
// void run_hash_calculation ( mchlib::Indexer& parIndexer, bool parShowProgress ); bool add_to_db ( const std::vector<mchlib::FileRecordData>& parData, const mchlib::SetRecordDataFull& parSet, const dinlib::SettingsDB& parDBSettings, bool parForce=false );
// bool add_to_db ( const std::vector<mchlib::FileRecordData>& parData, const std::string& parSetName, char parType, char parContent, const dinlib::SettingsDB& parDBSettings, bool parForce=false );
#if defined(WITH_PROGRESS_FEEDBACK) #if defined(WITH_PROGRESS_FEEDBACK)
void print_progress ( const boost::string_ref parPath, uint64_t parFileBytes, uint64_t parTotalBytes, uint32_t parFileNum ); void print_progress ( const boost::string_ref parPath, uint64_t parFileBytes, uint64_t parTotalBytes, uint32_t parFileNum );
#endif #endif
@ -73,9 +64,9 @@ int main (int parArgc, char* parArgv[]) {
return 2; return 2;
} }
#if defined(WITH_PROGRESS_FEEDBACK) #if defined(WITH_PROGRESS_FEEDBACK)
const bool verbose = (0 == vm.count("quiet")); //const bool verbose = (0 == vm.count("quiet"));
#else #else
const bool verbose = false; //const bool verbose = false;
#endif #endif
dinlib::Settings settings; dinlib::Settings settings;
@ -87,11 +78,14 @@ int main (int parArgc, char* parArgv[]) {
} }
} }
bool ignore_read_errors = (vm.count("ignore-errors") > 0);
const std::string search_path(vm["search-path"].as<std::string>()); const std::string search_path(vm["search-path"].as<std::string>());
const char def_media_type = (vm.count("type") ? vm["type"].as<char>() : 'O');
std::shared_ptr<stask::SetBasic> setbasic(new stask::SetBasic(std::string(vm["setname"].as<std::string>()))); std::shared_ptr<stask::SetBasic> setbasic(new stask::SetBasic(std::string(vm["setname"].as<std::string>())));
std::shared_ptr<stask::DirTree> scan_dirtree(new stask::DirTree(search_path)); std::shared_ptr<stask::DirTree> scan_dirtree(new stask::DirTree(search_path));
std::shared_ptr<stask::MediaType> media_type(new stask::MediaType(setbasic, (vm.count("type") ? vm["type"].as<char>() : 'O'), vm.count("type"), search_path)); std::shared_ptr<stask::MediaType> media_type(new stask::MediaType(setbasic, def_media_type, vm.count("type"), search_path));
std::shared_ptr<stask::Hashing> hashing(new stask::Hashing(scan_dirtree, true)); std::shared_ptr<stask::Hashing> hashing(new stask::Hashing(scan_dirtree, ignore_read_errors));
std::shared_ptr<stask::ContentType> content_type(new stask::ContentType(setbasic, scan_dirtree, media_type)); std::shared_ptr<stask::ContentType> content_type(new stask::ContentType(setbasic, scan_dirtree, media_type));
std::shared_ptr<stask::Mime> mime(new stask::Mime(scan_dirtree)); std::shared_ptr<stask::Mime> mime(new stask::Mime(scan_dirtree));
std::shared_ptr<FileRecordDataFiller> filerecdata(new FileRecordDataFiller(mime, hashing)); std::shared_ptr<FileRecordDataFiller> filerecdata(new FileRecordDataFiller(mime, hashing));
@ -101,159 +95,35 @@ int main (int parArgc, char* parArgv[]) {
hashing->set_progress_callback(&print_progress); hashing->set_progress_callback(&print_progress);
#endif #endif
std::cout << "Content type: " << setrecdata->get_or_create().type << std::endl; if (not add_to_db(filerecdata->get_or_create(), setrecdata->get_or_create(), settings.db)) {
std::cerr << "Not written to DB, likely because a set with the same hash already exists\n";
const auto& hashes = filerecdata->get_or_create();
std::cout << std::endl;
for (const auto& hash : hashes) {
std::cout << '"' << hash.path <<
"\" -> " << mchlib::tiger_to_string(hash.hash) <<
" size: " << hash.size <<
" mime: " << hash.mime_type << "\n";
}
return 0;
#if defined(WITH_MEDIA_AUTODETECT)
//char set_type;
//if (0 == vm.count("type")) {
// std::cout << "Analyzing disc... ";
// try {
// const auto guessed_type = mchlib::guess_media_type(std::string(search_path));
// set_type = guessed_type;
// std::cout << "Setting type to " << set_type << " ("
// << dinlib::media_type_to_str(guessed_type) << ")\n";
// }
// catch (const std::runtime_error& e) {
// std::cout << '\n';
// std::cerr << e.what();
// return 1;
// }
//}
//else {
// set_type = vm["type"].as<char>();
//}
//std::unique_ptr<mchlib::scantask::MediaAutodetect> media_autodetector(
//new mchlib::scantask::MediaAutodetect(vm["type"].as<char>())
//);
#else
const char set_type = vm["type"].as<char>();
#endif
std::cout << "constructing...\n";
//indexer.ignore_read_errors(vm.count("ignore-errors") > 0);
//if (verbose) {
// std::cout << "Fetching items list...\n";
//}
//if (indexer.empty()) {
// std::cerr << "Nothing found at the given location, quitting\n";
// return 1;
//}
//else {
{
//run_hash_calculation(indexer, verbose);
//TODO: guess_content_type() relies on FileRecordData::path being set to
//the relative path already. Unfortunately at this point it just got
//default-initialized to be the same as abs_path, so for a video DVD, for
//example, it's going to be like "/mnt/cdrom/VIDEO_TS" instead of just
//"VIDEO_TS". This will cause guess_content_type() to miss. Relative
//paths are populated at the end of calculate_hash(), so until I come up
//with a better system I'm just moving content detection to after hash
//calculation.
//const auto set_type_casted = dinlib::char_to_media_type(set_type);
//const mchlib::ContentTypes content = mchlib::guess_content_type(set_type_casted, indexer.record_data());
//const char content_type = mchlib::content_type_to_char(content);
if (verbose) {
std::cout << "Writing to database...\n";
}
//if (not add_to_db(indexer.record_data(), vm["setname"].as<std::string>(), set_type, content_type, settings.db)) {
// std::cerr << "Not written to DB, likely because a set with the same hash already exists\n";
//}
} }
return 0; return 0;
} }
namespace { namespace {
// void run_hash_calculation (mchlib::Indexer& parIndexer, bool parShowProgress) { bool add_to_db (const std::vector<mchlib::FileRecordData>& parData, const mchlib::SetRecordDataFull& parSet, const dinlib::SettingsDB& parDBSettings, bool parForce) {
// if (parIndexer.empty()) { using mchlib::FileRecordData;
// return; using mchlib::SetRecordDataFull;
// } using mchlib::SetRecordData;
//
//#if !defined(WITH_PROGRESS_FEEDBACK) if (not parForce) {
// parShowProgress = false; const auto& first_hash = parData.front().hash;
//#endif FileRecordData itm;
// if (not parShowProgress) { SetRecordDataFull set;
////Hashing file /mnt/cdrom/Sacred 2/Fallen Angel/UK/Sacred.2.Fallen.Angel-ArenaBG/DISC2/S2DISC2.md1... 512c201321ed01cc2a82c9f80bfeaaa673bc8eb3cea4e5c1 const bool already_in_db = din::read_from_db(itm, set, parDBSettings, first_hash);
////terminate called after throwing an instance of 'std::ios_base::failure' if (already_in_db) {
////what(): basic_filebuf::xsgetn error reading the file return false;
////Hashing file /mnt/cdrom/Sacred 2/Fallen Angel/UK/Sacred.2.Fallen.Angel-ArenaBG/DISC2/S2DISC2.mdf...Annullato }
// parIndexer.calculate_hash(); }
// }
//#if defined(WITH_PROGRESS_FEEDBACK) const SetRecordData& set_data {parSet.name, parSet.type, parSet.content_type };
// else { const auto app_signature = dinlib::dindexer_signature();
// typedef std::ostream_iterator<char> cout_iterator; const auto lib_signature = mchlib::lib_signature();
// const std::string signature = std::string(app_signature.data(), app_signature.size()) + "/" + std::string(lib_signature.data(), lib_signature.size());
// std::cout << "Processing"; din::write_to_db(parDBSettings, parData, set_data, signature);
// std::cout.flush(); return true;
// const auto total_items = parIndexer.total_items(); }
// std::thread hash_thread(&mchlib::Indexer::calculate_hash, &parIndexer);
// std::mutex progress_print;
// std::size_t clear_size = 0;
// const auto digit_count = static_cast<std::size_t>(std::log10(static_cast<double>(total_items))) + 1;
// do {
// //TODO: fix this steaming pile of crap
// //std::unique_lock<std::mutex> lk(progress_print);
// //parIndexer.step_notify().wait(lk);
// std::cout << '\r';
// std::fill_n(cout_iterator(std::cout), clear_size, ' ');
// std::cout << '\r';
// {
// std::ostringstream oss;
// const auto item_index = std::min(total_items - 1, parIndexer.processed_items());
// oss << "Processing file "
// << std::setw(digit_count) << std::setfill(' ') << (item_index + 1)
// << " of " << total_items << " \"" << parIndexer.current_item() << '"';
// const auto msg(oss.str());
// clear_size = msg.size();
// std::cout << msg;
// std::cout.flush();
// }
// } while (false); //parIndexer.processed_items() != total_items);
//
// hash_thread.join();
// if (parIndexer.processed_items() > 0) {
// std::cout << '\n';
// }
// }
//#endif
// }
//
// bool add_to_db (const std::vector<mchlib::FileRecordData>& parData, const std::string& parSetName, char parType, char parContentType, const dinlib::SettingsDB& parDBSettings, bool parForce) {
// using mchlib::FileRecordData;
// using mchlib::SetRecordDataFull;
// using mchlib::SetRecordData;
//
// if (not parForce) {
// const auto& first_hash = parData.front().hash;
// FileRecordData itm;
// SetRecordDataFull set;
// const bool already_in_db = din::read_from_db(itm, set, parDBSettings, first_hash);
// if (already_in_db) {
// return false;
// }
// }
//
// SetRecordData set_data {parSetName, parType, parContentType };
// const auto app_signature = dinlib::dindexer_signature();
// const auto lib_signature = mchlib::lib_signature();
// const std::string signature = std::string(app_signature.data(), app_signature.size()) + "/" + std::string(lib_signature.data(), lib_signature.size());
// din::write_to_db(parDBSettings, parData, set_data, signature);
// return true;
// }
#if defined(WITH_PROGRESS_FEEDBACK) #if defined(WITH_PROGRESS_FEEDBACK)
void print_progress (const boost::string_ref parPath, uint64_t /*parFileBytes*/, uint64_t parTotalBytes, uint32_t parFileNum) { void print_progress (const boost::string_ref parPath, uint64_t /*parFileBytes*/, uint64_t parTotalBytes, uint32_t parFileNum) {