mirror of
https://github.com/KingDuckZ/dindexer.git
synced 2024-11-25 00:53:43 +00:00
Add optional verbose mode that shows the number of hashed files.
This commit is contained in:
parent
a82ab4a4ed
commit
f7441292bc
6 changed files with 110 additions and 7 deletions
|
@ -47,3 +47,7 @@ target_link_libraries(${PROJECT_NAME}
|
|||
${YAMLCPP_LIBRARY}
|
||||
${Boost_LIBRARIES}
|
||||
)
|
||||
|
||||
target_compile_definitions(${PROJECT_NAME}
|
||||
PUBLIC WITH_PROGRESS_FEEDBACK
|
||||
)
|
||||
|
|
7
find_duplicate_files.sql
Normal file
7
find_duplicate_files.sql
Normal file
|
@ -0,0 +1,7 @@
|
|||
--select hash, group_id, count(hash) as hash_count from files group by hash, group_id having count(*) > 1 order by hash;
|
||||
|
||||
--select hash, group_id from files group by hash, group_id having count(*) > 1 and count(order by hash;
|
||||
|
||||
select files.hash, group_id, t.ct from files inner join (
|
||||
select hash, count(*) as ct from files group by hash having count(distinct group_id) > 1
|
||||
) t on t.hash = files.hash group by files.hash, group_id, t.ct order by files.hash;
|
|
@ -60,6 +60,9 @@ namespace din {
|
|||
("help,h", "Produces this help message")
|
||||
("version", "Prints the program's version and quits")
|
||||
//("dump-raw,D", po::value<std::string>(), "Saves the retrieved html to the named file; use - for stdout")
|
||||
#if defined(WITH_PROGRESS_FEEDBACK)
|
||||
("quiet,q", "Hide progress messages and print nothing at all")
|
||||
#endif
|
||||
;
|
||||
po::options_description set_options("Set options");
|
||||
set_options.add_options()
|
||||
|
|
|
@ -21,9 +21,13 @@
|
|||
#include "dbbackend.hpp"
|
||||
#include "settings.hpp"
|
||||
#include <algorithm>
|
||||
#include <functional>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#if defined(WITH_PROGRESS_FEEDBACK)
|
||||
# include <atomic>
|
||||
# include <condition_variable>
|
||||
#endif
|
||||
#include <cstdint>
|
||||
#include <ciso646>
|
||||
#include <cassert>
|
||||
|
@ -61,7 +65,7 @@ namespace din {
|
|||
};
|
||||
|
||||
namespace {
|
||||
void hash_dir (std::vector<FileEntry>::iterator parEntry, std::vector<FileEntry>::iterator parEnd, const PathName& parCurrDir, std::atomic<std::size_t>& parDone) {
|
||||
void hash_dir (std::vector<FileEntry>::iterator parEntry, std::vector<FileEntry>::iterator parEnd, const PathName& parCurrDir, std::function<void()> parItemDoneCallback) {
|
||||
assert(parEntry != parEnd);
|
||||
assert(parEntry->is_dir);
|
||||
FileEntry& curr_entry = *parEntry;
|
||||
|
@ -90,7 +94,7 @@ namespace din {
|
|||
while (parEnd != it_entry and it_entry->level == curr_entry_it->level + 1 and parCurrDir == PathName(it_entry->path).pop_right()) {
|
||||
PathName curr_subdir(it_entry->path);
|
||||
if (it_entry->is_dir) {
|
||||
hash_dir(it_entry, parEnd, curr_subdir, parDone);
|
||||
hash_dir(it_entry, parEnd, curr_subdir, parItemDoneCallback);
|
||||
|
||||
std::string relpath = make_relative_path(parCurrDir, curr_subdir).path();
|
||||
const auto old_size = dir_blob.size();
|
||||
|
@ -133,7 +137,7 @@ namespace din {
|
|||
std::cout << "Hashing file " << it_entry->path << "...";
|
||||
#endif
|
||||
tiger_file(it_entry->path, it_entry->hash, curr_entry_it->hash, it_entry->file_size);
|
||||
++parDone;
|
||||
parItemDoneCallback();
|
||||
#if defined(INDEXER_VERBOSE)
|
||||
std::cout << ' ' << tiger_to_string(it_entry->hash) << '\n';
|
||||
#endif
|
||||
|
@ -144,7 +148,7 @@ namespace din {
|
|||
#if defined(INDEXER_VERBOSE)
|
||||
std::cout << "Final hash for dir " << parCurrDir << " is " << tiger_to_string(curr_entry_it->hash) << '\n';
|
||||
#endif
|
||||
++parDone;
|
||||
//parItemDoneCallback();
|
||||
}
|
||||
} //unnamed namespace
|
||||
|
||||
|
@ -153,7 +157,10 @@ namespace din {
|
|||
|
||||
DinDBSettings db_settings;
|
||||
PathList paths;
|
||||
#if defined(WITH_PROGRESS_FEEDBACK)
|
||||
std::atomic<std::size_t> done_count;
|
||||
std::condition_variable step_notify;
|
||||
#endif
|
||||
std::size_t file_count;
|
||||
};
|
||||
|
||||
|
@ -189,7 +196,9 @@ namespace din {
|
|||
//assert(not (FileEntry("/a/b/1.txt", 3, false, false) < FileEntry("/a/b/c/f.txt", 4, true, false)));
|
||||
//assert(not (FileEntry("/a/b/c/file.c", 4, false, false) < FileEntry("/a/b/c", 3, true, false)));
|
||||
#endif
|
||||
#if defined(WITH_PROGRESS_FEEDBACK)
|
||||
m_local_data->done_count = 0;
|
||||
#endif
|
||||
m_local_data->file_count = 0;
|
||||
m_local_data->db_settings = parDBSettings;
|
||||
}
|
||||
|
@ -201,9 +210,11 @@ namespace din {
|
|||
return m_local_data->file_count;
|
||||
}
|
||||
|
||||
#if defined(WITH_PROGRESS_FEEDBACK)
|
||||
std::size_t Indexer::processed_items() const {
|
||||
return m_local_data->done_count;
|
||||
}
|
||||
#endif
|
||||
|
||||
void Indexer::calculate_hash() {
|
||||
PathName base_path(m_local_data->paths.front().path);
|
||||
|
@ -223,10 +234,27 @@ namespace din {
|
|||
std::cout << "-----------------------------------------------------\n";
|
||||
#endif
|
||||
|
||||
#if defined(WITH_PROGRESS_FEEDBACK)
|
||||
m_local_data->done_count = 0;
|
||||
hash_dir(m_local_data->paths.begin(), m_local_data->paths.end(), base_path, m_local_data->done_count);
|
||||
hash_dir(
|
||||
m_local_data->paths.begin(),
|
||||
m_local_data->paths.end(),
|
||||
base_path,
|
||||
[=]() {
|
||||
++m_local_data->done_count;
|
||||
m_local_data->step_notify.notify_all();
|
||||
}
|
||||
);
|
||||
|
||||
assert(m_local_data->done_count == m_local_data->paths.size());
|
||||
#else
|
||||
hash_dir(
|
||||
m_local_data->paths.begin(),
|
||||
m_local_data->paths.end(),
|
||||
base_path,
|
||||
[]() {}
|
||||
);
|
||||
#endif
|
||||
|
||||
#if defined(INDEXER_VERBOSE)
|
||||
for (const auto& itm : m_local_data->paths) {
|
||||
|
@ -236,7 +264,9 @@ namespace din {
|
|||
}
|
||||
|
||||
void Indexer::add_to_db (const std::string& parSetName, char parType) const {
|
||||
#if defined(WITH_PROGRESS_FEEDBACK)
|
||||
assert(m_local_data->done_count == m_local_data->paths.size());
|
||||
#endif
|
||||
PathName base_path(m_local_data->paths.front().path);
|
||||
std::vector<FileRecordData> data;
|
||||
data.reserve(m_local_data->paths.size());
|
||||
|
@ -287,4 +317,10 @@ namespace din {
|
|||
bool Indexer::empty() const {
|
||||
return m_local_data->paths.size() < 2;
|
||||
}
|
||||
|
||||
#if defined(WITH_PROGRESS_FEEDBACK)
|
||||
std::condition_variable& Indexer::step_notify() {
|
||||
return m_local_data->step_notify;
|
||||
}
|
||||
#endif
|
||||
} //namespace din
|
||||
|
|
|
@ -24,6 +24,12 @@
|
|||
# define INDEXER_VERBOSE
|
||||
#endif
|
||||
|
||||
#if defined(WITH_PROGRESS_FEEDBACK)
|
||||
namespace std {
|
||||
class condition_variable;
|
||||
} //namespace std
|
||||
#endif
|
||||
|
||||
namespace din {
|
||||
struct DinDBSettings;
|
||||
|
||||
|
@ -40,7 +46,10 @@ namespace din {
|
|||
#endif
|
||||
|
||||
std::size_t total_items ( void ) const;
|
||||
#if defined(WITH_PROGRESS_FEEDBACK)
|
||||
std::size_t processed_items ( void ) const;
|
||||
std::condition_variable& step_notify ( void );
|
||||
#endif
|
||||
void calculate_hash ( void );
|
||||
void add_to_db ( const std::string& parSetName, char parType ) const;
|
||||
bool empty ( void ) const;
|
||||
|
|
46
src/main.cpp
46
src/main.cpp
|
@ -17,11 +17,20 @@
|
|||
|
||||
#include <iostream>
|
||||
#include <ciso646>
|
||||
#if defined(WITH_PROGRESS_FEEDBACK)
|
||||
# include <thread>
|
||||
# include <mutex>
|
||||
# include <condition_variable>
|
||||
#endif
|
||||
#include "filesearcher.hpp"
|
||||
#include "indexer.hpp"
|
||||
#include "settings.hpp"
|
||||
#include "commandline.hpp"
|
||||
|
||||
namespace {
|
||||
void run_hash_calculation ( din::Indexer& parIndexer, bool parShowProgress );
|
||||
} //unnamed namespace
|
||||
|
||||
int main (int parArgc, char* parArgv[]) {
|
||||
using std::placeholders::_1;
|
||||
using std::placeholders::_2;
|
||||
|
@ -62,8 +71,43 @@ int main (int parArgc, char* parArgv[]) {
|
|||
return 1;
|
||||
}
|
||||
else {
|
||||
indexer.calculate_hash();
|
||||
#if defined(WITH_PROGRESS_FEEDBACK)
|
||||
const bool verbose = (0 == vm.count("quiet"));
|
||||
#else
|
||||
const bool verbose = false;
|
||||
#endif
|
||||
run_hash_calculation(indexer, verbose);
|
||||
indexer.add_to_db(vm["setname"].as<std::string>(), vm["type"].as<char>());
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
namespace {
|
||||
void run_hash_calculation (din::Indexer& parIndexer, bool parShowProgress) {
|
||||
#if !defined(WITH_PROGRESS_FEEDBACK)
|
||||
parShowProgress = false;
|
||||
#endif
|
||||
if (not parShowProgress) {
|
||||
parIndexer.calculate_hash();
|
||||
}
|
||||
#if defined(WITH_PROGRESS_FEEDBACK)
|
||||
else {
|
||||
std::cout << "Fetching items list...\n";
|
||||
const auto total_items = parIndexer.total_items();
|
||||
std::thread hash_thread(&din::Indexer::calculate_hash, &parIndexer);
|
||||
std::mutex progress_print;
|
||||
while (parIndexer.processed_items() != total_items) {
|
||||
std::unique_lock<std::mutex> lk(progress_print);
|
||||
parIndexer.step_notify().wait(lk);
|
||||
std::cout << "Processed " << parIndexer.processed_items() << " of " << total_items << '\r';
|
||||
std::cout.flush();
|
||||
};
|
||||
|
||||
hash_thread.join();
|
||||
if (parIndexer.processed_items() > 0) {
|
||||
std::cout << '\n';
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
} //unnamed namespace
|
||||
|
|
Loading…
Reference in a new issue