1
0
Fork 0
mirror of https://github.com/KingDuckZ/dindexer.git synced 2024-11-29 01:33:46 +00:00

Add a --byhash switch to the locate command to search by content.

This is mostly to demonstrate the new task classes.
This commit is contained in:
King_DuckZ 2016-03-12 00:32:15 +01:00
parent 34ead94c8d
commit e95cd6cc44
8 changed files with 195 additions and 14 deletions

View file

@ -4,6 +4,7 @@ add_executable(${PROJECT_NAME}
main.cpp main.cpp
commandline.cpp commandline.cpp
postgre_locate.cpp postgre_locate.cpp
hash.cpp
) )
target_include_directories(${PROJECT_NAME} target_include_directories(${PROJECT_NAME}
@ -13,6 +14,7 @@ target_include_directories(${PROJECT_NAME}
target_link_libraries(${PROJECT_NAME} target_link_libraries(${PROJECT_NAME}
PRIVATE ${bare_name}-if PRIVATE ${bare_name}-if
PRIVATE ${bare_name}-common PRIVATE ${bare_name}-common
PRIVATE ${bare_name}-machinery
) )
string(REPLACE "${bare_name}-" "" ACTION_NAME "${PROJECT_NAME}") string(REPLACE "${bare_name}-" "" ACTION_NAME "${PROJECT_NAME}")

View file

@ -28,6 +28,7 @@ namespace din {
set_options.add_options() set_options.add_options()
("case-insensitive,i", "Disable case sensitivity during search") ("case-insensitive,i", "Disable case sensitivity during search")
("set,s", "Look for matching sets instead of files") ("set,s", "Look for matching sets instead of files")
("byhash,a", "Paths on the command line are local paths and searching should be done by content hash")
//("option,o", po::value<std::string>()->default_value("default_value"), "Help message") //("option,o", po::value<std::string>()->default_value("default_value"), "Help message")
//("option2", po::value<int>(), "Help message") //("option2", po::value<int>(), "Help message")
; ;

98
src/locate/hash.cpp Normal file
View file

@ -0,0 +1,98 @@
/* Copyright 2015, 2016, Michele Santullo
* This file is part of "dindexer".
*
* "dindexer" is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* "dindexer" is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
*/
#include "hash.hpp"
#include "dindexer-machinery/scantask/hashing.hpp"
#include "dindexer-machinery/scantask/dirtree.hpp"
#include "dindexer-machinery/recorddata.hpp"
#include <memory>
#include <sys/stat.h>
#include <stdexcept>
#include <utility>
#include <cassert>
#include <ciso646>
namespace stask = mchlib::scantask;
namespace din {
namespace {
class SingleFileTask : public stask::Base<std::vector<mchlib::FileRecordData>> {
public:
typedef std::vector<mchlib::FileRecordData> PathList;
SingleFileTask ( std::string parPath, const struct stat* parStat );
virtual ~SingleFileTask ( void ) noexcept = default;
private:
virtual void on_data_destroy ( PathList& parData ) override;
virtual void on_data_create ( PathList& parData ) override;
std::string m_path;
const struct stat* m_stat;
};
SingleFileTask::SingleFileTask (std::string parPath, const struct stat* parStat) :
m_path(std::move(parPath)),
m_stat(parStat)
{
assert(not m_path.empty());
assert(m_stat);
}
void SingleFileTask::on_data_destroy (PathList& parData) {
assert(not parData.empty());
parData.clear();
}
void SingleFileTask::on_data_create (PathList& parData) {
assert(parData.empty());
parData.reserve(1);
parData.push_back(mchlib::FileRecordData(
std::string(m_path),
0,
m_stat->st_atime,
m_stat->st_mtime,
0,
false,
false
));
}
} //unnamed namespace
mchlib::TigerHash hash (const std::string& parPath) {
using mchlib::FileRecordData;
using HashingTaskPtr = std::shared_ptr<stask::Hashing>;
struct stat path_stat;
const int retval = stat(parPath.c_str(), &path_stat);
if (retval) {
throw std::runtime_error("Can't access file \"" + parPath + "\"");
}
std::shared_ptr<stask::Base<std::vector<FileRecordData>>> file_src_task;
if (S_ISDIR(path_stat.st_mode)) {
file_src_task.reset(new stask::DirTree(parPath));
}
else {
assert(S_ISREG(path_stat.st_mode));
file_src_task.reset(new SingleFileTask(parPath, &path_stat));
}
auto hashing = HashingTaskPtr(new stask::Hashing(file_src_task, false));
return hashing->get_or_create().front().hash;
}
} //namespace din

28
src/locate/hash.hpp Normal file
View file

@ -0,0 +1,28 @@
/* Copyright 2015, 2016, Michele Santullo
* This file is part of "dindexer".
*
* "dindexer" is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* "dindexer" is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef id3F3E29B28FAA44A190451198CF1FD166
#define id3F3E29B28FAA44A190451198CF1FD166
#include "dindexer-machinery/tiger.hpp"
#include <vector>
namespace din {
mchlib::TigerHash hash ( const std::string& parPath );
} //namespace din
#endif

View file

@ -19,6 +19,7 @@
#include "postgre_locate.hpp" #include "postgre_locate.hpp"
#include "dindexer-common/settings.hpp" #include "dindexer-common/settings.hpp"
#include "dindexerConfig.h" #include "dindexerConfig.h"
#include "hash.hpp"
#include <iostream> #include <iostream>
#include <ciso646> #include <ciso646>
#include <iterator> #include <iterator>
@ -73,7 +74,15 @@ int main (int parArgc, char* parArgv[]) {
std::copy(results.begin(), results.end(), std::ostream_iterator<din::LocatedSet>(std::cout, "\n")); std::copy(results.begin(), results.end(), std::ostream_iterator<din::LocatedSet>(std::cout, "\n"));
} }
else { else {
const auto results = din::locate_in_db(settings.db, vm["substring"].as<std::string>(), not not vm.count("case-insensitive")); std::vector<din::LocatedItem> results;
if (vm.count("byhash")) {
const auto hash = din::hash(vm["substring"].as<std::string>());
results = din::locate_in_db(settings.db, hash);
}
else {
results = din::locate_in_db(settings.db, vm["substring"].as<std::string>(), not not vm.count("case-insensitive"));
}
std::copy(results.begin(), results.end(), std::ostream_iterator<din::LocatedItem>(std::cout, "\n")); std::copy(results.begin(), results.end(), std::ostream_iterator<din::LocatedItem>(std::cout, "\n"));
} }
return 0; return 0;

View file

@ -17,6 +17,7 @@
#include "postgre_locate.hpp" #include "postgre_locate.hpp"
#include "pq/connection.hpp" #include "pq/connection.hpp"
#include "dindexer-machinery/tiger.hpp"
#include <utility> #include <utility>
#include <sstream> #include <sstream>
#include <boost/utility/string_ref.hpp> #include <boost/utility/string_ref.hpp>
@ -53,10 +54,25 @@ namespace din {
return std::move(retval); return std::move(retval);
} }
std::vector<LocatedItem> file_result_to_vec (pq::ResultSet&& parResult) {
using boost::lexical_cast;
std::vector<LocatedItem> retval;
retval.reserve(parResult.size());
for (const auto& record : parResult) {
retval.push_back(LocatedItem{
record["path"],
lexical_cast<decltype(LocatedItem::id)>(record["id"]),
lexical_cast<decltype(LocatedItem::group_id)>(record["group_id"])
});
}
return std::move(retval);
}
} //unnamed namespace } //unnamed namespace
std::vector<LocatedItem> locate_in_db (const dinlib::SettingsDB& parDB, const std::string& parSearch, bool parCaseInsensitive) { std::vector<LocatedItem> locate_in_db (const dinlib::SettingsDB& parDB, const std::string& parSearch, bool parCaseInsensitive) {
using boost::lexical_cast;
using boost::string_ref; using boost::string_ref;
namespace ba = boost::algorithm; namespace ba = boost::algorithm;
@ -78,17 +94,15 @@ namespace din {
oss << "LIMIT " << g_max_results << ';'; oss << "LIMIT " << g_max_results << ';';
auto result = conn.query(oss.str()); auto result = conn.query(oss.str());
std::vector<LocatedItem> retval; return file_result_to_vec(std::move(result));
retval.reserve(result.size());
for (const auto& record : result) {
retval.push_back(LocatedItem{
record["path"],
lexical_cast<decltype(LocatedItem::id)>(record["id"]),
lexical_cast<decltype(LocatedItem::group_id)>(record["group_id"])
});
} }
return std::move(retval); std::vector<LocatedItem> locate_in_db (const dinlib::SettingsDB& parDB, const mchlib::TigerHash& parSearch) {
const std::string query = std::string("SELECT \"path\",\"id\",\"group_id\" FROM \"files\" WHERE \"hash\"=$1 LIMIT ") + boost::lexical_cast<std::string>(g_max_results) + ';';
auto conn = make_pq_conn(parDB);
auto result = conn.query(query, mchlib::tiger_to_string(parSearch, true));
return file_result_to_vec(std::move(result));
} }
std::vector<LocatedSet> locate_sets_in_db (const dinlib::SettingsDB& parDB, const std::string& parSearch, bool parCaseInsensitive) { std::vector<LocatedSet> locate_sets_in_db (const dinlib::SettingsDB& parDB, const std::string& parSearch, bool parCaseInsensitive) {

View file

@ -23,6 +23,10 @@
#include <string> #include <string>
#include <cstdint> #include <cstdint>
namespace mchlib {
struct TigerHash;
} //namespace mchlib
namespace din { namespace din {
struct LocatedItem { struct LocatedItem {
std::string path; std::string path;
@ -38,6 +42,7 @@ namespace din {
}; };
std::vector<LocatedItem> locate_in_db ( const dinlib::SettingsDB& parDB, const std::string& parSearch, bool parCaseInsensitive ); std::vector<LocatedItem> locate_in_db ( const dinlib::SettingsDB& parDB, const std::string& parSearch, bool parCaseInsensitive );
std::vector<LocatedItem> locate_in_db ( const dinlib::SettingsDB& parDB, const mchlib::TigerHash& parSearch );
std::vector<LocatedSet> locate_sets_in_db ( const dinlib::SettingsDB& parDB, const std::string& parSearch, bool parCaseInsensitive ); std::vector<LocatedSet> locate_sets_in_db ( const dinlib::SettingsDB& parDB, const std::string& parSearch, bool parCaseInsensitive );
std::vector<LocatedSet> locate_sets_in_db ( const dinlib::SettingsDB& parDB, const std::string& parSearch, const std::vector<uint32_t>& parSets, bool parCaseInsensitive ); std::vector<LocatedSet> locate_sets_in_db ( const dinlib::SettingsDB& parDB, const std::string& parSearch, const std::vector<uint32_t>& parSets, bool parCaseInsensitive );
} //namespace din } //namespace din

View file

@ -145,6 +145,9 @@ namespace mchlib {
void Hashing::on_data_fill() { void Hashing::on_data_fill() {
std::vector<FileRecordData>& file_list = m_file_tree_task->get_or_create(); std::vector<FileRecordData>& file_list = m_file_tree_task->get_or_create();
if (file_list.empty()) {
return;
}
ProgressInfo progr_info; ProgressInfo progr_info;
progr_info.callback = m_progress_callback; progr_info.callback = m_progress_callback;
@ -153,9 +156,30 @@ namespace mchlib {
progr_info.total_bytes_read = 0; progr_info.total_bytes_read = 0;
progr_info.file_num = 0; progr_info.file_num = 0;
if (file_list.front().is_directory) {
MutableSetListingView recordlist(file_list.begin(), file_list.end(), 0); MutableSetListingView recordlist(file_list.begin(), file_list.end(), 0);
hash_dir(file_list.front(), recordlist, m_ignore_errors, progr_info); hash_dir(file_list.front(), recordlist, m_ignore_errors, progr_info);
} }
else {
assert(1 == file_list.size());
auto& curr_file_rec = file_list.front();
TigerHash dummy {};
try {
tiger_file(curr_file_rec.abs_path, curr_file_rec.hash, dummy, curr_file_rec.size);
curr_file_rec.hash_valid = true;
}
catch (const std::ios_base::failure& e) {
if (m_ignore_errors) {
curr_file_rec.unreadable = true;
curr_file_rec.hash = TigerHash {};
}
else {
throw e;
}
}
}
}
void Hashing::set_progress_callback (ProgressCallback parFunc) { void Hashing::set_progress_callback (ProgressCallback parFunc) {
if (parFunc) { if (parFunc) {