1
0
Fork 0
mirror of https://github.com/KingDuckZ/dindexer.git synced 2024-11-25 00:53:43 +00:00

Add a --byhash switch to the locate command to search by content.

This is mostly to demonstrate the new task classes.
This commit is contained in:
King_DuckZ 2016-03-12 00:32:15 +01:00
parent 34ead94c8d
commit e95cd6cc44
8 changed files with 195 additions and 14 deletions

View file

@ -4,6 +4,7 @@ add_executable(${PROJECT_NAME}
main.cpp
commandline.cpp
postgre_locate.cpp
hash.cpp
)
target_include_directories(${PROJECT_NAME}
@ -13,6 +14,7 @@ target_include_directories(${PROJECT_NAME}
target_link_libraries(${PROJECT_NAME}
PRIVATE ${bare_name}-if
PRIVATE ${bare_name}-common
PRIVATE ${bare_name}-machinery
)
string(REPLACE "${bare_name}-" "" ACTION_NAME "${PROJECT_NAME}")

View file

@ -28,6 +28,7 @@ namespace din {
set_options.add_options()
("case-insensitive,i", "Disable case sensitivity during search")
("set,s", "Look for matching sets instead of files")
("byhash,a", "Paths on the command line are local paths and searching should be done by content hash")
//("option,o", po::value<std::string>()->default_value("default_value"), "Help message")
//("option2", po::value<int>(), "Help message")
;

98
src/locate/hash.cpp Normal file
View file

@ -0,0 +1,98 @@
/* Copyright 2015, 2016, Michele Santullo
* This file is part of "dindexer".
*
* "dindexer" is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* "dindexer" is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
*/
#include "hash.hpp"
#include "dindexer-machinery/scantask/hashing.hpp"
#include "dindexer-machinery/scantask/dirtree.hpp"
#include "dindexer-machinery/recorddata.hpp"
#include <memory>
#include <sys/stat.h>
#include <stdexcept>
#include <utility>
#include <cassert>
#include <ciso646>
namespace stask = mchlib::scantask;
namespace din {
namespace {
class SingleFileTask : public stask::Base<std::vector<mchlib::FileRecordData>> {
public:
typedef std::vector<mchlib::FileRecordData> PathList;
SingleFileTask ( std::string parPath, const struct stat* parStat );
virtual ~SingleFileTask ( void ) noexcept = default;
private:
virtual void on_data_destroy ( PathList& parData ) override;
virtual void on_data_create ( PathList& parData ) override;
std::string m_path;
const struct stat* m_stat;
};
SingleFileTask::SingleFileTask (std::string parPath, const struct stat* parStat) :
m_path(std::move(parPath)),
m_stat(parStat)
{
assert(not m_path.empty());
assert(m_stat);
}
void SingleFileTask::on_data_destroy (PathList& parData) {
assert(not parData.empty());
parData.clear();
}
void SingleFileTask::on_data_create (PathList& parData) {
assert(parData.empty());
parData.reserve(1);
parData.push_back(mchlib::FileRecordData(
std::string(m_path),
0,
m_stat->st_atime,
m_stat->st_mtime,
0,
false,
false
));
}
} //unnamed namespace
mchlib::TigerHash hash (const std::string& parPath) {
using mchlib::FileRecordData;
using HashingTaskPtr = std::shared_ptr<stask::Hashing>;
struct stat path_stat;
const int retval = stat(parPath.c_str(), &path_stat);
if (retval) {
throw std::runtime_error("Can't access file \"" + parPath + "\"");
}
std::shared_ptr<stask::Base<std::vector<FileRecordData>>> file_src_task;
if (S_ISDIR(path_stat.st_mode)) {
file_src_task.reset(new stask::DirTree(parPath));
}
else {
assert(S_ISREG(path_stat.st_mode));
file_src_task.reset(new SingleFileTask(parPath, &path_stat));
}
auto hashing = HashingTaskPtr(new stask::Hashing(file_src_task, false));
return hashing->get_or_create().front().hash;
}
} //namespace din

28
src/locate/hash.hpp Normal file
View file

@ -0,0 +1,28 @@
/* Copyright 2015, 2016, Michele Santullo
* This file is part of "dindexer".
*
* "dindexer" is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* "dindexer" is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef id3F3E29B28FAA44A190451198CF1FD166
#define id3F3E29B28FAA44A190451198CF1FD166
#include "dindexer-machinery/tiger.hpp"
#include <vector>
namespace din {
mchlib::TigerHash hash ( const std::string& parPath );
} //namespace din
#endif

View file

@ -19,6 +19,7 @@
#include "postgre_locate.hpp"
#include "dindexer-common/settings.hpp"
#include "dindexerConfig.h"
#include "hash.hpp"
#include <iostream>
#include <ciso646>
#include <iterator>
@ -73,7 +74,15 @@ int main (int parArgc, char* parArgv[]) {
std::copy(results.begin(), results.end(), std::ostream_iterator<din::LocatedSet>(std::cout, "\n"));
}
else {
const auto results = din::locate_in_db(settings.db, vm["substring"].as<std::string>(), not not vm.count("case-insensitive"));
std::vector<din::LocatedItem> results;
if (vm.count("byhash")) {
const auto hash = din::hash(vm["substring"].as<std::string>());
results = din::locate_in_db(settings.db, hash);
}
else {
results = din::locate_in_db(settings.db, vm["substring"].as<std::string>(), not not vm.count("case-insensitive"));
}
std::copy(results.begin(), results.end(), std::ostream_iterator<din::LocatedItem>(std::cout, "\n"));
}
return 0;

View file

@ -17,6 +17,7 @@
#include "postgre_locate.hpp"
#include "pq/connection.hpp"
#include "dindexer-machinery/tiger.hpp"
#include <utility>
#include <sstream>
#include <boost/utility/string_ref.hpp>
@ -53,10 +54,25 @@ namespace din {
return std::move(retval);
}
std::vector<LocatedItem> file_result_to_vec (pq::ResultSet&& parResult) {
using boost::lexical_cast;
std::vector<LocatedItem> retval;
retval.reserve(parResult.size());
for (const auto& record : parResult) {
retval.push_back(LocatedItem{
record["path"],
lexical_cast<decltype(LocatedItem::id)>(record["id"]),
lexical_cast<decltype(LocatedItem::group_id)>(record["group_id"])
});
}
return std::move(retval);
}
} //unnamed namespace
std::vector<LocatedItem> locate_in_db (const dinlib::SettingsDB& parDB, const std::string& parSearch, bool parCaseInsensitive) {
using boost::lexical_cast;
using boost::string_ref;
namespace ba = boost::algorithm;
@ -78,17 +94,15 @@ namespace din {
oss << "LIMIT " << g_max_results << ';';
auto result = conn.query(oss.str());
std::vector<LocatedItem> retval;
retval.reserve(result.size());
for (const auto& record : result) {
retval.push_back(LocatedItem{
record["path"],
lexical_cast<decltype(LocatedItem::id)>(record["id"]),
lexical_cast<decltype(LocatedItem::group_id)>(record["group_id"])
});
}
return file_result_to_vec(std::move(result));
}
return std::move(retval);
std::vector<LocatedItem> locate_in_db (const dinlib::SettingsDB& parDB, const mchlib::TigerHash& parSearch) {
const std::string query = std::string("SELECT \"path\",\"id\",\"group_id\" FROM \"files\" WHERE \"hash\"=$1 LIMIT ") + boost::lexical_cast<std::string>(g_max_results) + ';';
auto conn = make_pq_conn(parDB);
auto result = conn.query(query, mchlib::tiger_to_string(parSearch, true));
return file_result_to_vec(std::move(result));
}
std::vector<LocatedSet> locate_sets_in_db (const dinlib::SettingsDB& parDB, const std::string& parSearch, bool parCaseInsensitive) {

View file

@ -23,6 +23,10 @@
#include <string>
#include <cstdint>
namespace mchlib {
struct TigerHash;
} //namespace mchlib
namespace din {
struct LocatedItem {
std::string path;
@ -38,6 +42,7 @@ namespace din {
};
std::vector<LocatedItem> locate_in_db ( const dinlib::SettingsDB& parDB, const std::string& parSearch, bool parCaseInsensitive );
std::vector<LocatedItem> locate_in_db ( const dinlib::SettingsDB& parDB, const mchlib::TigerHash& parSearch );
std::vector<LocatedSet> locate_sets_in_db ( const dinlib::SettingsDB& parDB, const std::string& parSearch, bool parCaseInsensitive );
std::vector<LocatedSet> locate_sets_in_db ( const dinlib::SettingsDB& parDB, const std::string& parSearch, const std::vector<uint32_t>& parSets, bool parCaseInsensitive );
} //namespace din

View file

@ -145,6 +145,9 @@ namespace mchlib {
void Hashing::on_data_fill() {
std::vector<FileRecordData>& file_list = m_file_tree_task->get_or_create();
if (file_list.empty()) {
return;
}
ProgressInfo progr_info;
progr_info.callback = m_progress_callback;
@ -153,8 +156,29 @@ namespace mchlib {
progr_info.total_bytes_read = 0;
progr_info.file_num = 0;
MutableSetListingView recordlist(file_list.begin(), file_list.end(), 0);
hash_dir(file_list.front(), recordlist, m_ignore_errors, progr_info);
if (file_list.front().is_directory) {
MutableSetListingView recordlist(file_list.begin(), file_list.end(), 0);
hash_dir(file_list.front(), recordlist, m_ignore_errors, progr_info);
}
else {
assert(1 == file_list.size());
auto& curr_file_rec = file_list.front();
TigerHash dummy {};
try {
tiger_file(curr_file_rec.abs_path, curr_file_rec.hash, dummy, curr_file_rec.size);
curr_file_rec.hash_valid = true;
}
catch (const std::ios_base::failure& e) {
if (m_ignore_errors) {
curr_file_rec.unreadable = true;
curr_file_rec.hash = TigerHash {};
}
else {
throw e;
}
}
}
}
void Hashing::set_progress_callback (ProgressCallback parFunc) {