mirror of
https://github.com/KingDuckZ/dindexer.git
synced 2025-02-21 12:34:56 +00:00
Implement hash_dir() using DirIterator. WiP
Keep the old implementation as well, so they can be easily tested. Uncomment the USE_LEGACY_HASH_DIR define at the top to build with the old function.
This commit is contained in:
parent
0a3e469951
commit
5908828390
2 changed files with 126 additions and 0 deletions
|
@ -34,6 +34,11 @@ namespace mchlib {
|
||||||
template <bool Const> class SetListingView;
|
template <bool Const> class SetListingView;
|
||||||
template <bool Const> const PathName& get_pathname ( const implem::DirIterator<Const>& parIter );
|
template <bool Const> const PathName& get_pathname ( const implem::DirIterator<Const>& parIter );
|
||||||
|
|
||||||
|
template <bool Const>
|
||||||
|
implem::DirIterator<Const> first_file ( const SetListingView<Const>& parList );
|
||||||
|
template <bool Const>
|
||||||
|
implem::DirIterator<Const> first_file ( SetListingView<Const>& parList );
|
||||||
|
|
||||||
namespace implem {
|
namespace implem {
|
||||||
template <bool Const>
|
template <bool Const>
|
||||||
class DirIterator : public boost::iterator_facade<DirIterator<Const>, FileRecordData, boost::forward_traversal_tag> {
|
class DirIterator : public boost::iterator_facade<DirIterator<Const>, FileRecordData, boost::forward_traversal_tag> {
|
||||||
|
@ -126,6 +131,26 @@ namespace mchlib {
|
||||||
inline const PathName& get_pathname (const implem::DirIterator<Const>& parIter) {
|
inline const PathName& get_pathname (const implem::DirIterator<Const>& parIter) {
|
||||||
return *parIter.m_base_path;
|
return *parIter.m_base_path;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <bool Const>
|
||||||
|
inline implem::DirIterator<Const> first_file (const SetListingView<Const>& parList) {
|
||||||
|
auto end = parList.end();
|
||||||
|
for (auto it = parList.begin(); it != end; ++it) {
|
||||||
|
if (not it->is_directory)
|
||||||
|
return std::move(it);
|
||||||
|
}
|
||||||
|
return parList.end();
|
||||||
|
}
|
||||||
|
|
||||||
|
template <bool Const>
|
||||||
|
inline implem::DirIterator<Const> first_file (SetListingView<Const>& parList) {
|
||||||
|
auto end = parList.end();
|
||||||
|
for (auto it = parList.begin(); it != end; ++it) {
|
||||||
|
if (not it->is_directory)
|
||||||
|
return std::move(it);
|
||||||
|
}
|
||||||
|
return parList.end();
|
||||||
|
}
|
||||||
} //namespace mchlib
|
} //namespace mchlib
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -15,6 +15,8 @@
|
||||||
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
|
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
//#define USE_LEGACY_HASH_DIR
|
||||||
|
|
||||||
#include "dindexer-machinery/indexer.hpp"
|
#include "dindexer-machinery/indexer.hpp"
|
||||||
#include "pathname.hpp"
|
#include "pathname.hpp"
|
||||||
#include "dindexer-machinery/tiger.hpp"
|
#include "dindexer-machinery/tiger.hpp"
|
||||||
|
@ -22,6 +24,9 @@
|
||||||
#include "dindexer-machinery/filestats.hpp"
|
#include "dindexer-machinery/filestats.hpp"
|
||||||
#include "mimetype.hpp"
|
#include "mimetype.hpp"
|
||||||
#include "dindexer-machinery/recorddata.hpp"
|
#include "dindexer-machinery/recorddata.hpp"
|
||||||
|
#if !defined(USE_LEGACY_HASH_DIR)
|
||||||
|
# include "dindexer-machinery/set_listing.hpp"
|
||||||
|
#endif
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <functional>
|
#include <functional>
|
||||||
#include <stdexcept>
|
#include <stdexcept>
|
||||||
|
@ -60,6 +65,82 @@ namespace mchlib {
|
||||||
std::copy(parString.begin(), parString.end(), parDest.begin() + old_size);
|
std::copy(parString.begin(), parString.end(), parDest.begin() + old_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if !defined(USE_LEGACY_HASH_DIR)
|
||||||
|
void hash_dir (FileRecordData& parEntry, SetListingView<false>& parList, const PathName& parCurrDir, MimeType& parMime, bool parIgnoreErrors) {
|
||||||
|
assert(parEntry.is_directory);
|
||||||
|
|
||||||
|
parEntry.mime_full = parMime.analyze(parEntry.abs_path);
|
||||||
|
|
||||||
|
//Build a blob with the hashes and filenames of every directory that
|
||||||
|
//is a direct child of current entry
|
||||||
|
std::vector<char> dir_blob;
|
||||||
|
#if defined(INDEXER_VERBOSE)
|
||||||
|
std::cout << "Making initial hash for " << parCurrDir << "...\n";
|
||||||
|
#endif
|
||||||
|
for (auto it = parList.begin(); it != parList.end(); ++it) {
|
||||||
|
if (not it->is_directory) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
PathName curr_subdir(it->abs_path);
|
||||||
|
const std::string relpath = make_relative_path(parCurrDir, curr_subdir).path();
|
||||||
|
if (it->is_directory) {
|
||||||
|
auto cd_list = SetListingView<false>(it);
|
||||||
|
hash_dir(*it, cd_list, get_pathname(it), parMime, parIgnoreErrors);
|
||||||
|
append_to_vec(dir_blob, it->hash, relpath);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
append_to_vec(dir_blob, relpath);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
tiger_data(dir_blob, parEntry.hash);
|
||||||
|
parEntry.size = 0;
|
||||||
|
|
||||||
|
#if defined(INDEXER_VERBOSE)
|
||||||
|
std::cout << "Got intermediate hash for dir " << parCurrDir <<
|
||||||
|
": " << tiger_to_string(parEntry.hash) <<
|
||||||
|
' ' << parEntry.mime_type << '\n';
|
||||||
|
#endif
|
||||||
|
|
||||||
|
//Now with the initial hash ready, let's start hashing files, if any
|
||||||
|
for (auto it = first_file(parList); it != parList.end(); ++it) {
|
||||||
|
assert(not it->is_directory);
|
||||||
|
#if defined(INDEXER_VERBOSE)
|
||||||
|
std::cout << "Hashing file " << it->abs_path << "...";
|
||||||
|
#endif
|
||||||
|
//TODO: notify callback
|
||||||
|
try {
|
||||||
|
tiger_file(it->abs_path, it->hash, parEntry.hash, it->size);
|
||||||
|
it->hash_valid = true;
|
||||||
|
it->mime_full = parMime.analyze(it->abs_path);
|
||||||
|
auto mime_pair = split_mime(it->mime_full);
|
||||||
|
it->mime_type = mime_pair.first;
|
||||||
|
it->mime_charset = mime_pair.second;
|
||||||
|
}
|
||||||
|
catch (const std::ios_base::failure& e) {
|
||||||
|
if (parIgnoreErrors) {
|
||||||
|
it->unreadable = true;
|
||||||
|
it->hash = HashType {};
|
||||||
|
if (it->mime_full.get().empty()) {
|
||||||
|
it->mime_full = "unknown";
|
||||||
|
it->mime_type = boost::string_ref(it->mime_full.get());
|
||||||
|
it->mime_charset = boost::string_ref(it->mime_full.get());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#if defined(INDEXER_VERBOSE)
|
||||||
|
std::cout << ' ' << tiger_to_string(it->hash) << ' ' <<
|
||||||
|
"Mime type: \"" << it->mime_type << "\"\n";
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(USE_LEGACY_HASH_DIR)
|
||||||
void hash_dir (FileEntryIt parEntry, FileEntryIt parBegin, FileEntryIt parEnd, const PathName& parCurrDir, std::function<void(std::size_t)> parNextItemCallback, bool parIgnoreErrors, MimeType& parMime) {
|
void hash_dir (FileEntryIt parEntry, FileEntryIt parBegin, FileEntryIt parEnd, const PathName& parCurrDir, std::function<void(std::size_t)> parNextItemCallback, bool parIgnoreErrors, MimeType& parMime) {
|
||||||
assert(parEntry != parEnd);
|
assert(parEntry != parEnd);
|
||||||
assert(parEntry->is_directory);
|
assert(parEntry->is_directory);
|
||||||
|
@ -172,6 +253,7 @@ namespace mchlib {
|
||||||
curr_entry_it->mime_charset = mime_pair.second;
|
curr_entry_it->mime_charset = mime_pair.second;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
template <bool FileTrue=true>
|
template <bool FileTrue=true>
|
||||||
struct IsFile {
|
struct IsFile {
|
||||||
|
@ -285,9 +367,13 @@ namespace mchlib {
|
||||||
std::cout << "-----------------------------------------------------\n";
|
std::cout << "-----------------------------------------------------\n";
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if !defined(USE_LEGACY_HASH_DIR)
|
||||||
|
SetListingView<false> recordlist(m_local_data->paths.begin(), m_local_data->paths.end());
|
||||||
|
#endif
|
||||||
#if defined(WITH_PROGRESS_FEEDBACK)
|
#if defined(WITH_PROGRESS_FEEDBACK)
|
||||||
m_local_data->done_count = 0;
|
m_local_data->done_count = 0;
|
||||||
hash_dir(
|
hash_dir(
|
||||||
|
#if defined(USE_LEGACY_HASH_DIR)
|
||||||
m_local_data->paths.begin(),
|
m_local_data->paths.begin(),
|
||||||
m_local_data->paths.begin(),
|
m_local_data->paths.begin(),
|
||||||
m_local_data->paths.end(),
|
m_local_data->paths.end(),
|
||||||
|
@ -299,11 +385,19 @@ namespace mchlib {
|
||||||
},
|
},
|
||||||
m_local_data->ignore_read_errors,
|
m_local_data->ignore_read_errors,
|
||||||
mime
|
mime
|
||||||
|
#else
|
||||||
|
m_local_data->paths.front(),
|
||||||
|
recordlist,
|
||||||
|
base_path,
|
||||||
|
mime,
|
||||||
|
m_local_data->ignore_read_errors
|
||||||
|
#endif
|
||||||
);
|
);
|
||||||
|
|
||||||
assert(m_local_data->done_count == m_local_data->file_count);
|
assert(m_local_data->done_count == m_local_data->file_count);
|
||||||
#else
|
#else
|
||||||
hash_dir(
|
hash_dir(
|
||||||
|
#if defined(USE_LEGACY_HASH_DIR)
|
||||||
m_local_data->paths.begin(),
|
m_local_data->paths.begin(),
|
||||||
m_local_data->paths.begin(),
|
m_local_data->paths.begin(),
|
||||||
m_local_data->paths.end(),
|
m_local_data->paths.end(),
|
||||||
|
@ -311,6 +405,13 @@ namespace mchlib {
|
||||||
[](std::size_t) {},
|
[](std::size_t) {},
|
||||||
m_local_data->ignore_read_errors,
|
m_local_data->ignore_read_errors,
|
||||||
mime
|
mime
|
||||||
|
#else
|
||||||
|
m_local_data->paths.front(),
|
||||||
|
recordlist,
|
||||||
|
base_path,
|
||||||
|
mime,
|
||||||
|
m_local_data->ignore_read_errors
|
||||||
|
#endif
|
||||||
);
|
);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue