diff --git a/include/dindexer-machinery/indexer.hpp b/include/dindexer-machinery/indexer.hpp deleted file mode 100644 index a6274a6..0000000 --- a/include/dindexer-machinery/indexer.hpp +++ /dev/null @@ -1,77 +0,0 @@ -/* Copyright 2015, 2016, Michele Santullo - * This file is part of "dindexer". - * - * "dindexer" is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * "dindexer" is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with "dindexer". If not, see . - */ - -#ifndef idE555EF56730442C1ADDC7B2AE7A9340E -#define idE555EF56730442C1ADDC7B2AE7A9340E - -#include -#include -#include - -#if !defined(NDEBUG) -# define INDEXER_VERBOSE -#endif - -#if defined(WITH_PROGRESS_FEEDBACK) -namespace std { - class condition_variable; -} //namespace std -#endif - -namespace fastf { - struct FileStats; -} //namespace fastf - -namespace dinlib { - struct Settings; -} //namespace dinlib - -namespace mchlib { - struct FileRecordData; - - class Indexer { - public: - Indexer ( void ); - Indexer ( Indexer&& ) = default; - Indexer ( const Indexer& ) = delete; - ~Indexer ( void ) noexcept; - - bool add_path ( const char* parPath, const fastf::FileStats& parStats ); -#if defined(INDEXER_VERBOSE) - void dump ( void ) const; -#endif - - std::size_t total_items ( void ) const; - std::string operator[] ( std::size_t parIndex ) const; -#if defined(WITH_PROGRESS_FEEDBACK) - std::size_t processed_items ( void ) const; - std::string current_item ( void ) const; - std::condition_variable& step_notify ( void ); -#endif - void calculate_hash ( void ); - bool empty ( void ) const; - void ignore_read_errors ( bool parIgnore ); - const std::vector& record_data ( void ) const; - - private: - struct LocalData; - - std::unique_ptr m_local_data; - }; -} //namespace mchlib - -#endif diff --git a/src/machinery/indexer.cpp b/src/machinery/indexer.cpp deleted file mode 100644 index 1d93dd5..0000000 --- a/src/machinery/indexer.cpp +++ /dev/null @@ -1,526 +0,0 @@ -/* Copyright 2015, 2016, Michele Santullo - * This file is part of "dindexer". - * - * "dindexer" is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * "dindexer" is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with "dindexer". If not, see . - */ - -//WARNING: buggy code - intermediate hash for directories that contain files -//is likely wrong! -//#define USE_LEGACY_HASH_DIR - -#include "dindexer-machinery/indexer.hpp" -#include "pathname.hpp" -#include "dindexer-machinery/tiger.hpp" -#include "dindexer-common/settings.hpp" -#include "dindexer-machinery/filestats.hpp" -#include "mimetype.hpp" -#include "dindexer-machinery/recorddata.hpp" -#if !defined(USE_LEGACY_HASH_DIR) -# include "dindexer-machinery/set_listing.hpp" -#endif -#include -#include -#include -#if defined(WITH_PROGRESS_FEEDBACK) -# include -# include -#endif -#include -#include -#include -#include -#include -#include - -#if defined(INDEXER_VERBOSE) -# include -#endif -#include -#include - -namespace mchlib { - using HashType = decltype(FileRecordData::hash); - - namespace { - typedef std::vector::iterator FileEntryIt; - - void append_to_vec (std::vector& parDest, const HashType& parHash, const std::string& parString) { - const auto old_size = parDest.size(); - parDest.resize(old_size + sizeof(HashType) + parString.size()); - std::copy(parHash.byte_data, parHash.byte_data + sizeof(HashType), parDest.begin() + old_size); - std::copy(parString.begin(), parString.end(), parDest.begin() + old_size + sizeof(HashType)); - } - - void append_to_vec (std::vector& parDest, const std::string& parString) { - const auto old_size = parDest.size(); - parDest.resize(old_size + parString.size()); - std::copy(parString.begin(), parString.end(), parDest.begin() + old_size); - } - -#if !defined(USE_LEGACY_HASH_DIR) - void hash_dir (FileRecordData& parEntry, MutableSetListingView& parList, const PathName& parCurrDir, MimeType& parMime, bool parIgnoreErrors) { - assert(parEntry.is_directory); - - parEntry.mime_full = parMime.analyze(parEntry.abs_path); - - //Build a blob with the hashes and filenames of every directory that - //is a direct child of current entry - std::vector dir_blob; -#if defined(INDEXER_VERBOSE) - std::cout << "Making initial hash for " << parCurrDir << "...\n"; -#endif - for (auto it = parList.begin(); it != parList.end(); ++it) { - assert(parCurrDir == PathName(it->abs_path).pop_right()); - - PathName curr_subdir(it->abs_path); - const std::string relpath = make_relative_path(parCurrDir, curr_subdir).path(); - if (it->is_directory) { - auto cd_list = MutableSetListingView(it); - assert(boost::empty(cd_list) or cd_list.begin()->abs_path != it->abs_path); - - hash_dir(*it, cd_list, curr_subdir, parMime, parIgnoreErrors); - append_to_vec(dir_blob, it->hash, relpath); - } - else { - append_to_vec(dir_blob, relpath); - } - } - tiger_data(dir_blob, parEntry.hash); - parEntry.size = 0; - -#if defined(INDEXER_VERBOSE) - std::cout << "Got intermediate hash for dir " << parCurrDir << - ": " << tiger_to_string(parEntry.hash) << - ' ' << parEntry.mime_type << '\n'; -#endif - - //Now with the initial hash ready, let's start hashing files, if any - for (auto it = first_file(parList); it != parList.end(); ++it) { - assert(not it->is_directory); -#if defined(INDEXER_VERBOSE) - std::cout << "Hashing file " << it->abs_path << "..."; -#endif - //TODO: notify callback - try { - tiger_file(it->abs_path, it->hash, parEntry.hash, it->size); - it->hash_valid = true; - it->mime_full = parMime.analyze(it->abs_path); - auto mime_pair = split_mime(it->mime_full); - it->mime_type = mime_pair.first; - it->mime_charset = mime_pair.second; - } - catch (const std::ios_base::failure& e) { - if (parIgnoreErrors) { - it->unreadable = true; - it->hash = HashType {}; - if (it->mime_full.get().empty()) { - it->mime_full = "unknown"; - it->mime_type = boost::string_ref(it->mime_full.get()); - it->mime_charset = boost::string_ref(it->mime_full.get()); - } - } - else { - throw e; - } - } - -#if defined(INDEXER_VERBOSE) - std::cout << ' ' << tiger_to_string(it->hash) << ' ' << - "Mime type: \"" << it->mime_type << "\"\n"; -#endif - } - -#if defined(INDEXER_VERBOSE) - std::cout << "Final hash for dir " << parCurrDir << " is " << tiger_to_string(parEntry.hash) << '\n'; -#endif - parEntry.hash_valid = true; - { - parEntry.mime_full = parMime.analyze(parEntry.abs_path); - auto mime_pair = split_mime(parEntry.mime_full); - parEntry.mime_type = mime_pair.first; - parEntry.mime_charset = mime_pair.second; - } - } -#endif - -#if defined(USE_LEGACY_HASH_DIR) - void hash_dir (FileEntryIt parEntry, FileEntryIt parBegin, FileEntryIt parEnd, const PathName& parCurrDir, std::function parNextItemCallback, bool parIgnoreErrors, MimeType& parMime) { - assert(parEntry != parEnd); - assert(parEntry->is_directory); - FileRecordData& curr_entry = *parEntry; - auto& curr_entry_it = parEntry; - - curr_entry.mime_full = parMime.analyze(curr_entry.abs_path); - - //Build a blob with the hashes and filenames of every directory that - //is a direct child of current entry - { - std::vector dir_blob; - auto it_entry = curr_entry_it; - - while ( - it_entry != parEnd and ( - it_entry->level == curr_entry.level - or parCurrDir != PathName(it_entry->abs_path).pop_right() - //and (not it_entry->is_dir or (it_entry->level <= curr_entry.level - //and parCurrDir != PathName(it_entry->path).pop_right())) - )) { - assert(it_entry->level >= curr_entry.level); - ++it_entry; - } - -#if defined(INDEXER_VERBOSE) - std::cout << "Making initial hash for " << parCurrDir << "...\n"; -#endif - while (parEnd != it_entry and it_entry->level == curr_entry_it->level + 1 and parCurrDir == PathName(it_entry->abs_path).pop_right()) { - PathName curr_subdir(it_entry->abs_path); - const std::string relpath = make_relative_path(parCurrDir, curr_subdir).path(); - - if (it_entry->is_directory) { - hash_dir(it_entry, parBegin, parEnd, curr_subdir, parNextItemCallback, parIgnoreErrors, parMime); - append_to_vec(dir_blob, it_entry->hash, relpath); - } - else { - append_to_vec(dir_blob, relpath); - } - ++it_entry; - } - - tiger_data(dir_blob, curr_entry.hash); - curr_entry.size = 0; -#if defined(INDEXER_VERBOSE) - std::cout << "Got intermediate hash for dir " << parCurrDir << - ": " << tiger_to_string(curr_entry.hash) << - ' ' << curr_entry.mime_type << '\n'; -#endif - } - - //Now with the initial hash ready, let's start hashing files, if any - { - auto it_entry = curr_entry_it; - while ( - it_entry != parEnd - and (it_entry->is_directory - or it_entry->level != curr_entry_it->level + 1 - or PathName(it_entry->abs_path).pop_right() != parCurrDir - ) - ) { - ++it_entry; - } - - while (it_entry != parEnd and not it_entry->is_directory and it_entry->level == curr_entry_it->level + 1 and PathName(it_entry->abs_path).pop_right() == parCurrDir) { - assert(not it_entry->is_directory); -#if defined(INDEXER_VERBOSE) - std::cout << "Hashing file " << it_entry->abs_path << "..."; -#endif - parNextItemCallback(it_entry - parBegin); - try { - tiger_file(it_entry->abs_path, it_entry->hash, curr_entry_it->hash, it_entry->size); - it_entry->hash_valid = true; - it_entry->mime_full = parMime.analyze(it_entry->abs_path); - auto mime_pair = split_mime(it_entry->mime_full); - it_entry->mime_type = mime_pair.first; - it_entry->mime_charset = mime_pair.second; - } - catch (const std::ios_base::failure& e) { - if (parIgnoreErrors) { - it_entry->unreadable = true; - it_entry->hash = HashType {}; - if (it_entry->mime_full.get().empty()) { - it_entry->mime_full = "unknown"; - it_entry->mime_type = boost::string_ref(it_entry->mime_full.get()); - it_entry->mime_charset = boost::string_ref(it_entry->mime_full.get()); - } - } - else { - throw e; - } - } - -#if defined(INDEXER_VERBOSE) - std::cout << ' ' << tiger_to_string(it_entry->hash) << ' ' << - "Mime type: \"" << it_entry->mime_type << "\"\n"; -#endif - ++it_entry; - } - } - -#if defined(INDEXER_VERBOSE) - std::cout << "Final hash for dir " << parCurrDir << " is " << tiger_to_string(curr_entry_it->hash) << '\n'; -#endif - curr_entry_it->hash_valid = true; - { - curr_entry_it->mime_full = parMime.analyze(curr_entry_it->abs_path); - auto mime_pair = split_mime(curr_entry_it->mime_full); - curr_entry_it->mime_type = mime_pair.first; - curr_entry_it->mime_charset = mime_pair.second; - } - } -#endif - - template - struct IsFile { - bool operator() ( const FileRecordData& parEntry ) const { return parEntry.is_directory xor FileTrue; } - }; - - FileRecordData make_file_record_data (const char* parPath, const fastf::FileStats& parSt) { - return FileRecordData( - parPath, - parSt.atime, - parSt.mtime, - parSt.level, - parSt.is_dir, - parSt.is_symlink - ); - } - - bool file_record_data_lt (const FileRecordData& parLeft, const FileRecordData& parRight) { - const FileRecordData& l = parLeft; - const FileRecordData& r = parRight; - return - (l.level < r.level) - or (l.level == r.level and l.is_directory and not r.is_directory) - or (l.level == r.level and l.is_directory == r.is_directory and l.abs_path < r.abs_path) - - //sort by directory - parent first, children later - //(level == o.level and is_dir and not o.is_dir) - //or (level == o.level and is_dir == o.is_dir and path < o.path) - //or (level > o.level + 1) - //or (level + 1 == o.level and is_dir and not o.is_dir and path < o.path) - //or (level + 1 == o.level and is_dir and not o.is_dir and path == PathName(o.path).dirname()) - //or (level == o.level + 1 and not (o.is_dir and not is_dir and o.path == PathName(path).dirname())) - ; - } - - void populate_rel_paths (const PathName& parBase, std::vector& parItems) { - const std::size_t offset = parBase.str_path_size() + 1; - for (FileRecordData& itm : parItems) { - const auto curr_offset = std::min(offset, itm.abs_path.size()); - itm.path = boost::string_ref(itm.abs_path).substr(curr_offset); - assert(itm.path.data()); - } - } - } //unnamed namespace - - struct Indexer::LocalData { - typedef std::vector PathList; - - PathList paths; -#if defined(WITH_PROGRESS_FEEDBACK) - std::atomic done_count; - std::atomic processing_index; - std::condition_variable step_notify; -#endif - std::size_t file_count; - bool ignore_read_errors; - }; - - Indexer::Indexer() : - m_local_data(new LocalData) - { -#if !defined(NDEBUG) - //assert(FileEntry("/a/b/c", 3, true, false) < FileEntry("/a/b", 2, true, false)); - //assert(FileEntry("/a/b/c", 3, true, false) < FileEntry("/a/b/c/file.txt", 4, false, false)); - //assert(FileEntry("/a/b/c", 3, true, false) < FileEntry("/a/b/c/file.c", 4, false, false)); - //assert(FileEntry("/a/b/c/d", 4, true, false) < FileEntry("/a/b", 2, true, false)); - //assert(FileEntry("/a/b/c/d", 4, true, false) < FileEntry("/a/b/c", 3, true, false)); - //assert(FileEntry("/a/b/c/1.txt", 4, true, false) < FileEntry("/a/b/c/2.txt", 4, true, false)); - //assert(not (FileEntry("/a/b/file.txt", 3, false, false) < FileEntry("/a/b", 2, true, false))); - //assert(not (FileEntry("/a", 1, true, false) < FileEntry("/a/b", 2, true, false))); - //assert(not (FileEntry("/a/b/1.txt", 3, false, false) < FileEntry("/a/b/c/f.txt", 4, true, false))); - //assert(not (FileEntry("/a/b/c/file.c", 4, false, false) < FileEntry("/a/b/c", 3, true, false))); -#endif -#if defined(WITH_PROGRESS_FEEDBACK) - m_local_data->done_count = 0; - m_local_data->processing_index = 0; -#endif - m_local_data->file_count = 0; - } - - Indexer::~Indexer() noexcept { - } - - std::size_t Indexer::total_items() const { - return m_local_data->file_count; - } - -#if defined(WITH_PROGRESS_FEEDBACK) - std::size_t Indexer::processed_items() const { - return m_local_data->done_count; - } -#endif - - void Indexer::calculate_hash() { - PathName base_path(m_local_data->paths.front().abs_path); - std::sort(m_local_data->paths.begin(), m_local_data->paths.end(), &file_record_data_lt); - MimeType mime; - -#if defined(INDEXER_VERBOSE) - for (auto& itm : m_local_data->paths) { - itm.hash.part_a = 1; - itm.hash.part_b = 1; - itm.hash.part_c = 1; - - if (itm.is_directory) - std::cout << "(D) "; - else - std::cout << "(F) "; - std::cout << itm.abs_path << " (" << itm.level << ")\n"; - } - std::cout << "-----------------------------------------------------\n"; -#endif - -#if !defined(USE_LEGACY_HASH_DIR) - MutableSetListingView recordlist(m_local_data->paths.begin(), m_local_data->paths.end(), base_path.atom_count()); -#endif -#if defined(WITH_PROGRESS_FEEDBACK) - m_local_data->done_count = 0; - hash_dir( -#if defined(USE_LEGACY_HASH_DIR) - m_local_data->paths.begin(), - m_local_data->paths.begin(), - m_local_data->paths.end(), - base_path, - [=](std::size_t parNext) { - ++m_local_data->done_count; - m_local_data->processing_index = parNext; - m_local_data->step_notify.notify_all(); - }, - m_local_data->ignore_read_errors, - mime -#else - m_local_data->paths.front(), - recordlist, - base_path, - mime, - m_local_data->ignore_read_errors -#endif - ); - - //TODO: re-enable after hash_dir sends progress notifications again - //assert(m_local_data->done_count == m_local_data->file_count); -#else - hash_dir( -#if defined(USE_LEGACY_HASH_DIR) - m_local_data->paths.begin(), - m_local_data->paths.begin(), - m_local_data->paths.end(), - base_path, - [](std::size_t) {}, - m_local_data->ignore_read_errors, - mime -#else - m_local_data->paths.front(), - recordlist, - base_path, - mime, - m_local_data->ignore_read_errors -#endif - ); -#endif - - populate_rel_paths(base_path, m_local_data->paths); - -#if defined(INDEXER_VERBOSE) - for (const auto& itm : m_local_data->paths) { - assert(not (1 == itm.hash.part_a and 1 == itm.hash.part_b and 1 == itm.hash.part_c)); - } -#endif - } - - bool Indexer::add_path (const char* parPath, const fastf::FileStats& parStats) { - auto it_before = SetListing::lower_bound( - m_local_data->paths, - parPath, - parStats.level, - parStats.is_dir - ); - - m_local_data->paths.insert( - it_before, - make_file_record_data(parPath, parStats) - ); - if (not parStats.is_dir) { - ++m_local_data->file_count; - } - return true; - } - -#if defined(INDEXER_VERBOSE) - void Indexer::dump() const { - PathName base_path(m_local_data->paths.front().abs_path); - - std::cout << "---------------- FILE LIST ----------------\n"; - for (const auto& cur_itm : m_local_data->paths) { - if (not cur_itm.is_directory) { - PathName cur_path(cur_itm.abs_path); - std::cout << make_relative_path(base_path, cur_path).path() << '\n'; - } - } - std::cout << "---------------- DIRECTORY LIST ----------------\n"; - for (const auto& cur_itm : m_local_data->paths) { - if (cur_itm.is_directory) { - PathName cur_path(cur_itm.abs_path); - std::cout << make_relative_path(base_path, cur_path).path() << '\n'; - } - } - } -#endif - - bool Indexer::empty() const { - return m_local_data->paths.size() < 2; - } - -#if defined(WITH_PROGRESS_FEEDBACK) - std::condition_variable& Indexer::step_notify() { - return m_local_data->step_notify; - } -#endif - -#if defined(WITH_PROGRESS_FEEDBACK) - std::string Indexer::current_item() const { - if (m_local_data->paths.empty() or 0 == m_local_data->processing_index) - return std::string(); - - PathName base_path(m_local_data->paths.front().abs_path); - PathName ret_path(m_local_data->paths[m_local_data->processing_index].abs_path); - return make_relative_path(base_path, ret_path).path(); - } -#endif - - std::string Indexer::operator[] (std::size_t parIndex) const { - if (parIndex >= m_local_data->file_count) { - std::ostringstream oss; - oss << "Requested index " << parIndex << " is out of range: only " << m_local_data->file_count << " items are available"; - throw std::out_of_range(oss.str()); - } - - auto it = boost::make_filter_iterator>(m_local_data->paths.begin(), m_local_data->paths.end()); - assert(not m_local_data->paths.empty()); - std::advance(it, parIndex); - return make_relative_path(PathName(m_local_data->paths.front().abs_path), PathName(it->abs_path)).path(); - } - - void Indexer::ignore_read_errors (bool parIgnore) { - m_local_data->ignore_read_errors = parIgnore; - } - - const std::vector& Indexer::record_data() const { -#if defined(WITH_PROGRESS_FEEDBACK) - //TODO: re-enable after hash_dir sends progress notifications again - //assert(m_local_data->done_count == m_local_data->file_count); -#endif - return m_local_data->paths; - } -} //namespace mchlib diff --git a/src/scan/main.cpp b/src/scan/main.cpp index 87bfd84..c7a6313 100644 --- a/src/scan/main.cpp +++ b/src/scan/main.cpp @@ -36,18 +36,9 @@ #include #include #include -#include -#include -#include -#if defined(WITH_PROGRESS_FEEDBACK) -# include -# include -# include -#endif namespace { -// void run_hash_calculation ( mchlib::Indexer& parIndexer, bool parShowProgress ); -// bool add_to_db ( const std::vector& parData, const std::string& parSetName, char parType, char parContent, const dinlib::SettingsDB& parDBSettings, bool parForce=false ); + bool add_to_db ( const std::vector& parData, const mchlib::SetRecordDataFull& parSet, const dinlib::SettingsDB& parDBSettings, bool parForce=false ); #if defined(WITH_PROGRESS_FEEDBACK) void print_progress ( const boost::string_ref parPath, uint64_t parFileBytes, uint64_t parTotalBytes, uint32_t parFileNum ); #endif @@ -73,9 +64,9 @@ int main (int parArgc, char* parArgv[]) { return 2; } #if defined(WITH_PROGRESS_FEEDBACK) - const bool verbose = (0 == vm.count("quiet")); + //const bool verbose = (0 == vm.count("quiet")); #else - const bool verbose = false; + //const bool verbose = false; #endif dinlib::Settings settings; @@ -87,11 +78,14 @@ int main (int parArgc, char* parArgv[]) { } } + bool ignore_read_errors = (vm.count("ignore-errors") > 0); const std::string search_path(vm["search-path"].as()); + const char def_media_type = (vm.count("type") ? vm["type"].as() : 'O'); + std::shared_ptr setbasic(new stask::SetBasic(std::string(vm["setname"].as()))); std::shared_ptr scan_dirtree(new stask::DirTree(search_path)); - std::shared_ptr media_type(new stask::MediaType(setbasic, (vm.count("type") ? vm["type"].as() : 'O'), vm.count("type"), search_path)); - std::shared_ptr hashing(new stask::Hashing(scan_dirtree, true)); + std::shared_ptr media_type(new stask::MediaType(setbasic, def_media_type, vm.count("type"), search_path)); + std::shared_ptr hashing(new stask::Hashing(scan_dirtree, ignore_read_errors)); std::shared_ptr content_type(new stask::ContentType(setbasic, scan_dirtree, media_type)); std::shared_ptr mime(new stask::Mime(scan_dirtree)); std::shared_ptr filerecdata(new FileRecordDataFiller(mime, hashing)); @@ -101,159 +95,35 @@ int main (int parArgc, char* parArgv[]) { hashing->set_progress_callback(&print_progress); #endif - std::cout << "Content type: " << setrecdata->get_or_create().type << std::endl; - - const auto& hashes = filerecdata->get_or_create(); - std::cout << std::endl; - for (const auto& hash : hashes) { - std::cout << '"' << hash.path << - "\" -> " << mchlib::tiger_to_string(hash.hash) << - " size: " << hash.size << - " mime: " << hash.mime_type << "\n"; - } - - return 0; - -#if defined(WITH_MEDIA_AUTODETECT) - //char set_type; - //if (0 == vm.count("type")) { - // std::cout << "Analyzing disc... "; - // try { - // const auto guessed_type = mchlib::guess_media_type(std::string(search_path)); - // set_type = guessed_type; - // std::cout << "Setting type to " << set_type << " (" - // << dinlib::media_type_to_str(guessed_type) << ")\n"; - // } - // catch (const std::runtime_error& e) { - // std::cout << '\n'; - // std::cerr << e.what(); - // return 1; - // } - //} - //else { - // set_type = vm["type"].as(); - //} - //std::unique_ptr media_autodetector( - //new mchlib::scantask::MediaAutodetect(vm["type"].as()) - //); -#else - const char set_type = vm["type"].as(); -#endif - - std::cout << "constructing...\n"; - - //indexer.ignore_read_errors(vm.count("ignore-errors") > 0); - //if (verbose) { - // std::cout << "Fetching items list...\n"; - //} - - //if (indexer.empty()) { - // std::cerr << "Nothing found at the given location, quitting\n"; - // return 1; - //} - //else { - { - //run_hash_calculation(indexer, verbose); - - //TODO: guess_content_type() relies on FileRecordData::path being set to - //the relative path already. Unfortunately at this point it just got - //default-initialized to be the same as abs_path, so for a video DVD, for - //example, it's going to be like "/mnt/cdrom/VIDEO_TS" instead of just - //"VIDEO_TS". This will cause guess_content_type() to miss. Relative - //paths are populated at the end of calculate_hash(), so until I come up - //with a better system I'm just moving content detection to after hash - //calculation. - //const auto set_type_casted = dinlib::char_to_media_type(set_type); - //const mchlib::ContentTypes content = mchlib::guess_content_type(set_type_casted, indexer.record_data()); - //const char content_type = mchlib::content_type_to_char(content); - - if (verbose) { - std::cout << "Writing to database...\n"; - } - //if (not add_to_db(indexer.record_data(), vm["setname"].as(), set_type, content_type, settings.db)) { - // std::cerr << "Not written to DB, likely because a set with the same hash already exists\n"; - //} + if (not add_to_db(filerecdata->get_or_create(), setrecdata->get_or_create(), settings.db)) { + std::cerr << "Not written to DB, likely because a set with the same hash already exists\n"; } return 0; } namespace { -// void run_hash_calculation (mchlib::Indexer& parIndexer, bool parShowProgress) { -// if (parIndexer.empty()) { -// return; -// } -// -//#if !defined(WITH_PROGRESS_FEEDBACK) -// parShowProgress = false; -//#endif -// if (not parShowProgress) { -////Hashing file /mnt/cdrom/Sacred 2/Fallen Angel/UK/Sacred.2.Fallen.Angel-ArenaBG/DISC2/S2DISC2.md1... 512c201321ed01cc2a82c9f80bfeaaa673bc8eb3cea4e5c1 -////terminate called after throwing an instance of 'std::ios_base::failure' -////what(): basic_filebuf::xsgetn error reading the file -////Hashing file /mnt/cdrom/Sacred 2/Fallen Angel/UK/Sacred.2.Fallen.Angel-ArenaBG/DISC2/S2DISC2.mdf...Annullato -// parIndexer.calculate_hash(); -// } -//#if defined(WITH_PROGRESS_FEEDBACK) -// else { -// typedef std::ostream_iterator cout_iterator; -// -// std::cout << "Processing"; -// std::cout.flush(); -// const auto total_items = parIndexer.total_items(); -// std::thread hash_thread(&mchlib::Indexer::calculate_hash, &parIndexer); -// std::mutex progress_print; -// std::size_t clear_size = 0; -// const auto digit_count = static_cast(std::log10(static_cast(total_items))) + 1; -// do { -// //TODO: fix this steaming pile of crap -// //std::unique_lock lk(progress_print); -// //parIndexer.step_notify().wait(lk); -// std::cout << '\r'; -// std::fill_n(cout_iterator(std::cout), clear_size, ' '); -// std::cout << '\r'; -// { -// std::ostringstream oss; -// const auto item_index = std::min(total_items - 1, parIndexer.processed_items()); -// oss << "Processing file " -// << std::setw(digit_count) << std::setfill(' ') << (item_index + 1) -// << " of " << total_items << " \"" << parIndexer.current_item() << '"'; -// const auto msg(oss.str()); -// clear_size = msg.size(); -// std::cout << msg; -// std::cout.flush(); -// } -// } while (false); //parIndexer.processed_items() != total_items); -// -// hash_thread.join(); -// if (parIndexer.processed_items() > 0) { -// std::cout << '\n'; -// } -// } -//#endif -// } -// -// bool add_to_db (const std::vector& parData, const std::string& parSetName, char parType, char parContentType, const dinlib::SettingsDB& parDBSettings, bool parForce) { -// using mchlib::FileRecordData; -// using mchlib::SetRecordDataFull; -// using mchlib::SetRecordData; -// -// if (not parForce) { -// const auto& first_hash = parData.front().hash; -// FileRecordData itm; -// SetRecordDataFull set; -// const bool already_in_db = din::read_from_db(itm, set, parDBSettings, first_hash); -// if (already_in_db) { -// return false; -// } -// } -// -// SetRecordData set_data {parSetName, parType, parContentType }; -// const auto app_signature = dinlib::dindexer_signature(); -// const auto lib_signature = mchlib::lib_signature(); -// const std::string signature = std::string(app_signature.data(), app_signature.size()) + "/" + std::string(lib_signature.data(), lib_signature.size()); -// din::write_to_db(parDBSettings, parData, set_data, signature); -// return true; -// } + bool add_to_db (const std::vector& parData, const mchlib::SetRecordDataFull& parSet, const dinlib::SettingsDB& parDBSettings, bool parForce) { + using mchlib::FileRecordData; + using mchlib::SetRecordDataFull; + using mchlib::SetRecordData; + + if (not parForce) { + const auto& first_hash = parData.front().hash; + FileRecordData itm; + SetRecordDataFull set; + const bool already_in_db = din::read_from_db(itm, set, parDBSettings, first_hash); + if (already_in_db) { + return false; + } + } + + const SetRecordData& set_data {parSet.name, parSet.type, parSet.content_type }; + const auto app_signature = dinlib::dindexer_signature(); + const auto lib_signature = mchlib::lib_signature(); + const std::string signature = std::string(app_signature.data(), app_signature.size()) + "/" + std::string(lib_signature.data(), lib_signature.size()); + din::write_to_db(parDBSettings, parData, set_data, signature); + return true; + } #if defined(WITH_PROGRESS_FEEDBACK) void print_progress (const boost::string_ref parPath, uint64_t /*parFileBytes*/, uint64_t parTotalBytes, uint32_t parFileNum) {