mirror of
https://github.com/KingDuckZ/dindexer.git
synced 2024-11-29 01:33:46 +00:00
Add a --byhash switch to the locate command to search by content.
This is mostly to demonstrate the new task classes.
This commit is contained in:
parent
34ead94c8d
commit
e95cd6cc44
8 changed files with 195 additions and 14 deletions
|
@ -4,6 +4,7 @@ add_executable(${PROJECT_NAME}
|
||||||
main.cpp
|
main.cpp
|
||||||
commandline.cpp
|
commandline.cpp
|
||||||
postgre_locate.cpp
|
postgre_locate.cpp
|
||||||
|
hash.cpp
|
||||||
)
|
)
|
||||||
|
|
||||||
target_include_directories(${PROJECT_NAME}
|
target_include_directories(${PROJECT_NAME}
|
||||||
|
@ -13,6 +14,7 @@ target_include_directories(${PROJECT_NAME}
|
||||||
target_link_libraries(${PROJECT_NAME}
|
target_link_libraries(${PROJECT_NAME}
|
||||||
PRIVATE ${bare_name}-if
|
PRIVATE ${bare_name}-if
|
||||||
PRIVATE ${bare_name}-common
|
PRIVATE ${bare_name}-common
|
||||||
|
PRIVATE ${bare_name}-machinery
|
||||||
)
|
)
|
||||||
|
|
||||||
string(REPLACE "${bare_name}-" "" ACTION_NAME "${PROJECT_NAME}")
|
string(REPLACE "${bare_name}-" "" ACTION_NAME "${PROJECT_NAME}")
|
||||||
|
|
|
@ -28,6 +28,7 @@ namespace din {
|
||||||
set_options.add_options()
|
set_options.add_options()
|
||||||
("case-insensitive,i", "Disable case sensitivity during search")
|
("case-insensitive,i", "Disable case sensitivity during search")
|
||||||
("set,s", "Look for matching sets instead of files")
|
("set,s", "Look for matching sets instead of files")
|
||||||
|
("byhash,a", "Paths on the command line are local paths and searching should be done by content hash")
|
||||||
//("option,o", po::value<std::string>()->default_value("default_value"), "Help message")
|
//("option,o", po::value<std::string>()->default_value("default_value"), "Help message")
|
||||||
//("option2", po::value<int>(), "Help message")
|
//("option2", po::value<int>(), "Help message")
|
||||||
;
|
;
|
||||||
|
|
98
src/locate/hash.cpp
Normal file
98
src/locate/hash.cpp
Normal file
|
@ -0,0 +1,98 @@
|
||||||
|
/* Copyright 2015, 2016, Michele Santullo
|
||||||
|
* This file is part of "dindexer".
|
||||||
|
*
|
||||||
|
* "dindexer" is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* "dindexer" is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "hash.hpp"
|
||||||
|
#include "dindexer-machinery/scantask/hashing.hpp"
|
||||||
|
#include "dindexer-machinery/scantask/dirtree.hpp"
|
||||||
|
#include "dindexer-machinery/recorddata.hpp"
|
||||||
|
#include <memory>
|
||||||
|
#include <sys/stat.h>
|
||||||
|
#include <stdexcept>
|
||||||
|
#include <utility>
|
||||||
|
#include <cassert>
|
||||||
|
#include <ciso646>
|
||||||
|
|
||||||
|
namespace stask = mchlib::scantask;
|
||||||
|
|
||||||
|
namespace din {
|
||||||
|
namespace {
|
||||||
|
class SingleFileTask : public stask::Base<std::vector<mchlib::FileRecordData>> {
|
||||||
|
public:
|
||||||
|
typedef std::vector<mchlib::FileRecordData> PathList;
|
||||||
|
|
||||||
|
SingleFileTask ( std::string parPath, const struct stat* parStat );
|
||||||
|
virtual ~SingleFileTask ( void ) noexcept = default;
|
||||||
|
|
||||||
|
private:
|
||||||
|
virtual void on_data_destroy ( PathList& parData ) override;
|
||||||
|
virtual void on_data_create ( PathList& parData ) override;
|
||||||
|
|
||||||
|
std::string m_path;
|
||||||
|
const struct stat* m_stat;
|
||||||
|
};
|
||||||
|
|
||||||
|
SingleFileTask::SingleFileTask (std::string parPath, const struct stat* parStat) :
|
||||||
|
m_path(std::move(parPath)),
|
||||||
|
m_stat(parStat)
|
||||||
|
{
|
||||||
|
assert(not m_path.empty());
|
||||||
|
assert(m_stat);
|
||||||
|
}
|
||||||
|
|
||||||
|
void SingleFileTask::on_data_destroy (PathList& parData) {
|
||||||
|
assert(not parData.empty());
|
||||||
|
parData.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
void SingleFileTask::on_data_create (PathList& parData) {
|
||||||
|
assert(parData.empty());
|
||||||
|
parData.reserve(1);
|
||||||
|
parData.push_back(mchlib::FileRecordData(
|
||||||
|
std::string(m_path),
|
||||||
|
0,
|
||||||
|
m_stat->st_atime,
|
||||||
|
m_stat->st_mtime,
|
||||||
|
0,
|
||||||
|
false,
|
||||||
|
false
|
||||||
|
));
|
||||||
|
}
|
||||||
|
} //unnamed namespace
|
||||||
|
|
||||||
|
mchlib::TigerHash hash (const std::string& parPath) {
|
||||||
|
using mchlib::FileRecordData;
|
||||||
|
using HashingTaskPtr = std::shared_ptr<stask::Hashing>;
|
||||||
|
|
||||||
|
struct stat path_stat;
|
||||||
|
const int retval = stat(parPath.c_str(), &path_stat);
|
||||||
|
if (retval) {
|
||||||
|
throw std::runtime_error("Can't access file \"" + parPath + "\"");
|
||||||
|
}
|
||||||
|
|
||||||
|
std::shared_ptr<stask::Base<std::vector<FileRecordData>>> file_src_task;
|
||||||
|
if (S_ISDIR(path_stat.st_mode)) {
|
||||||
|
file_src_task.reset(new stask::DirTree(parPath));
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
assert(S_ISREG(path_stat.st_mode));
|
||||||
|
file_src_task.reset(new SingleFileTask(parPath, &path_stat));
|
||||||
|
}
|
||||||
|
|
||||||
|
auto hashing = HashingTaskPtr(new stask::Hashing(file_src_task, false));
|
||||||
|
return hashing->get_or_create().front().hash;
|
||||||
|
}
|
||||||
|
} //namespace din
|
28
src/locate/hash.hpp
Normal file
28
src/locate/hash.hpp
Normal file
|
@ -0,0 +1,28 @@
|
||||||
|
/* Copyright 2015, 2016, Michele Santullo
|
||||||
|
* This file is part of "dindexer".
|
||||||
|
*
|
||||||
|
* "dindexer" is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* "dindexer" is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef id3F3E29B28FAA44A190451198CF1FD166
|
||||||
|
#define id3F3E29B28FAA44A190451198CF1FD166
|
||||||
|
|
||||||
|
#include "dindexer-machinery/tiger.hpp"
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
namespace din {
|
||||||
|
mchlib::TigerHash hash ( const std::string& parPath );
|
||||||
|
} //namespace din
|
||||||
|
|
||||||
|
#endif
|
|
@ -19,6 +19,7 @@
|
||||||
#include "postgre_locate.hpp"
|
#include "postgre_locate.hpp"
|
||||||
#include "dindexer-common/settings.hpp"
|
#include "dindexer-common/settings.hpp"
|
||||||
#include "dindexerConfig.h"
|
#include "dindexerConfig.h"
|
||||||
|
#include "hash.hpp"
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <ciso646>
|
#include <ciso646>
|
||||||
#include <iterator>
|
#include <iterator>
|
||||||
|
@ -73,7 +74,15 @@ int main (int parArgc, char* parArgv[]) {
|
||||||
std::copy(results.begin(), results.end(), std::ostream_iterator<din::LocatedSet>(std::cout, "\n"));
|
std::copy(results.begin(), results.end(), std::ostream_iterator<din::LocatedSet>(std::cout, "\n"));
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
const auto results = din::locate_in_db(settings.db, vm["substring"].as<std::string>(), not not vm.count("case-insensitive"));
|
std::vector<din::LocatedItem> results;
|
||||||
|
|
||||||
|
if (vm.count("byhash")) {
|
||||||
|
const auto hash = din::hash(vm["substring"].as<std::string>());
|
||||||
|
results = din::locate_in_db(settings.db, hash);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
results = din::locate_in_db(settings.db, vm["substring"].as<std::string>(), not not vm.count("case-insensitive"));
|
||||||
|
}
|
||||||
std::copy(results.begin(), results.end(), std::ostream_iterator<din::LocatedItem>(std::cout, "\n"));
|
std::copy(results.begin(), results.end(), std::ostream_iterator<din::LocatedItem>(std::cout, "\n"));
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
|
|
|
@ -17,6 +17,7 @@
|
||||||
|
|
||||||
#include "postgre_locate.hpp"
|
#include "postgre_locate.hpp"
|
||||||
#include "pq/connection.hpp"
|
#include "pq/connection.hpp"
|
||||||
|
#include "dindexer-machinery/tiger.hpp"
|
||||||
#include <utility>
|
#include <utility>
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
#include <boost/utility/string_ref.hpp>
|
#include <boost/utility/string_ref.hpp>
|
||||||
|
@ -53,10 +54,25 @@ namespace din {
|
||||||
|
|
||||||
return std::move(retval);
|
return std::move(retval);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::vector<LocatedItem> file_result_to_vec (pq::ResultSet&& parResult) {
|
||||||
|
using boost::lexical_cast;
|
||||||
|
|
||||||
|
std::vector<LocatedItem> retval;
|
||||||
|
retval.reserve(parResult.size());
|
||||||
|
for (const auto& record : parResult) {
|
||||||
|
retval.push_back(LocatedItem{
|
||||||
|
record["path"],
|
||||||
|
lexical_cast<decltype(LocatedItem::id)>(record["id"]),
|
||||||
|
lexical_cast<decltype(LocatedItem::group_id)>(record["group_id"])
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
return std::move(retval);
|
||||||
|
}
|
||||||
} //unnamed namespace
|
} //unnamed namespace
|
||||||
|
|
||||||
std::vector<LocatedItem> locate_in_db (const dinlib::SettingsDB& parDB, const std::string& parSearch, bool parCaseInsensitive) {
|
std::vector<LocatedItem> locate_in_db (const dinlib::SettingsDB& parDB, const std::string& parSearch, bool parCaseInsensitive) {
|
||||||
using boost::lexical_cast;
|
|
||||||
using boost::string_ref;
|
using boost::string_ref;
|
||||||
namespace ba = boost::algorithm;
|
namespace ba = boost::algorithm;
|
||||||
|
|
||||||
|
@ -78,17 +94,15 @@ namespace din {
|
||||||
oss << "LIMIT " << g_max_results << ';';
|
oss << "LIMIT " << g_max_results << ';';
|
||||||
|
|
||||||
auto result = conn.query(oss.str());
|
auto result = conn.query(oss.str());
|
||||||
std::vector<LocatedItem> retval;
|
return file_result_to_vec(std::move(result));
|
||||||
retval.reserve(result.size());
|
|
||||||
for (const auto& record : result) {
|
|
||||||
retval.push_back(LocatedItem{
|
|
||||||
record["path"],
|
|
||||||
lexical_cast<decltype(LocatedItem::id)>(record["id"]),
|
|
||||||
lexical_cast<decltype(LocatedItem::group_id)>(record["group_id"])
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return std::move(retval);
|
std::vector<LocatedItem> locate_in_db (const dinlib::SettingsDB& parDB, const mchlib::TigerHash& parSearch) {
|
||||||
|
const std::string query = std::string("SELECT \"path\",\"id\",\"group_id\" FROM \"files\" WHERE \"hash\"=$1 LIMIT ") + boost::lexical_cast<std::string>(g_max_results) + ';';
|
||||||
|
|
||||||
|
auto conn = make_pq_conn(parDB);
|
||||||
|
auto result = conn.query(query, mchlib::tiger_to_string(parSearch, true));
|
||||||
|
return file_result_to_vec(std::move(result));
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<LocatedSet> locate_sets_in_db (const dinlib::SettingsDB& parDB, const std::string& parSearch, bool parCaseInsensitive) {
|
std::vector<LocatedSet> locate_sets_in_db (const dinlib::SettingsDB& parDB, const std::string& parSearch, bool parCaseInsensitive) {
|
||||||
|
|
|
@ -23,6 +23,10 @@
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
|
|
||||||
|
namespace mchlib {
|
||||||
|
struct TigerHash;
|
||||||
|
} //namespace mchlib
|
||||||
|
|
||||||
namespace din {
|
namespace din {
|
||||||
struct LocatedItem {
|
struct LocatedItem {
|
||||||
std::string path;
|
std::string path;
|
||||||
|
@ -38,6 +42,7 @@ namespace din {
|
||||||
};
|
};
|
||||||
|
|
||||||
std::vector<LocatedItem> locate_in_db ( const dinlib::SettingsDB& parDB, const std::string& parSearch, bool parCaseInsensitive );
|
std::vector<LocatedItem> locate_in_db ( const dinlib::SettingsDB& parDB, const std::string& parSearch, bool parCaseInsensitive );
|
||||||
|
std::vector<LocatedItem> locate_in_db ( const dinlib::SettingsDB& parDB, const mchlib::TigerHash& parSearch );
|
||||||
std::vector<LocatedSet> locate_sets_in_db ( const dinlib::SettingsDB& parDB, const std::string& parSearch, bool parCaseInsensitive );
|
std::vector<LocatedSet> locate_sets_in_db ( const dinlib::SettingsDB& parDB, const std::string& parSearch, bool parCaseInsensitive );
|
||||||
std::vector<LocatedSet> locate_sets_in_db ( const dinlib::SettingsDB& parDB, const std::string& parSearch, const std::vector<uint32_t>& parSets, bool parCaseInsensitive );
|
std::vector<LocatedSet> locate_sets_in_db ( const dinlib::SettingsDB& parDB, const std::string& parSearch, const std::vector<uint32_t>& parSets, bool parCaseInsensitive );
|
||||||
} //namespace din
|
} //namespace din
|
||||||
|
|
|
@ -145,6 +145,9 @@ namespace mchlib {
|
||||||
|
|
||||||
void Hashing::on_data_fill() {
|
void Hashing::on_data_fill() {
|
||||||
std::vector<FileRecordData>& file_list = m_file_tree_task->get_or_create();
|
std::vector<FileRecordData>& file_list = m_file_tree_task->get_or_create();
|
||||||
|
if (file_list.empty()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
ProgressInfo progr_info;
|
ProgressInfo progr_info;
|
||||||
progr_info.callback = m_progress_callback;
|
progr_info.callback = m_progress_callback;
|
||||||
|
@ -153,9 +156,30 @@ namespace mchlib {
|
||||||
progr_info.total_bytes_read = 0;
|
progr_info.total_bytes_read = 0;
|
||||||
progr_info.file_num = 0;
|
progr_info.file_num = 0;
|
||||||
|
|
||||||
|
if (file_list.front().is_directory) {
|
||||||
MutableSetListingView recordlist(file_list.begin(), file_list.end(), 0);
|
MutableSetListingView recordlist(file_list.begin(), file_list.end(), 0);
|
||||||
hash_dir(file_list.front(), recordlist, m_ignore_errors, progr_info);
|
hash_dir(file_list.front(), recordlist, m_ignore_errors, progr_info);
|
||||||
}
|
}
|
||||||
|
else {
|
||||||
|
assert(1 == file_list.size());
|
||||||
|
auto& curr_file_rec = file_list.front();
|
||||||
|
TigerHash dummy {};
|
||||||
|
|
||||||
|
try {
|
||||||
|
tiger_file(curr_file_rec.abs_path, curr_file_rec.hash, dummy, curr_file_rec.size);
|
||||||
|
curr_file_rec.hash_valid = true;
|
||||||
|
}
|
||||||
|
catch (const std::ios_base::failure& e) {
|
||||||
|
if (m_ignore_errors) {
|
||||||
|
curr_file_rec.unreadable = true;
|
||||||
|
curr_file_rec.hash = TigerHash {};
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void Hashing::set_progress_callback (ProgressCallback parFunc) {
|
void Hashing::set_progress_callback (ProgressCallback parFunc) {
|
||||||
if (parFunc) {
|
if (parFunc) {
|
||||||
|
|
Loading…
Reference in a new issue