1
0
Fork 0
mirror of https://github.com/KingDuckZ/dindexer.git synced 2024-11-29 01:33:46 +00:00

Print matched items by hash. Only print as deep as needed.

This commit is contained in:
King_DuckZ 2017-08-18 00:22:34 +01:00
parent a793f7b289
commit 7e6d13b496
3 changed files with 63 additions and 8 deletions

View file

@ -19,6 +19,7 @@
#include "dindexer-machinery/scantask/hashing.hpp"
#include "dindexer-machinery/scantask/dirtree.hpp"
#include "dindexer-machinery/recorddata.hpp"
#include "dindexer-machinery/make_filerecord_tree.hpp"
#include <memory>
#include <sys/stat.h>
#include <stdexcept>
@ -71,16 +72,38 @@ namespace din {
false
));
}
void fill_hash_nodes (
const std::vector<mchlib::FileRecordData>& parRefData,
const std::vector<mchlib::FileRecordNode>& parNodesIn,
std::vector<din::HashNode>& parNodesOut
) {
const std::size_t sz = parNodesIn.size();
parNodesOut.reserve(sz);
for (const auto& in : parNodesIn) {
assert(in.index < parRefData.size());
const auto& data = parRefData[in.index];
parNodesOut.push_back(HashNode{data.hash, {}});
}
assert(parNodesOut.size() == sz);
assert(parNodesIn.size() == sz);
for (std::size_t z = 0; z < sz; ++z) {
fill_hash_nodes(parRefData, parNodesIn[z].children, parNodesOut[z].children);
}
}
} //unnamed namespace
mchlib::TigerHash hash (const std::string& parPath) {
std::vector<HashNode> hash (const std::string& parPath) {
using mchlib::FileRecordData;
using HashingTaskPtr = std::shared_ptr<stask::Hashing>;
struct stat path_stat;
const int retval = stat(parPath.c_str(), &path_stat);
if (retval) {
throw std::runtime_error("Can't access file \"" + parPath + "\"");
{
const int retval = stat(parPath.c_str(), &path_stat);
if (retval) {
throw std::runtime_error("Can't access file \"" + parPath + "\"");
}
}
std::shared_ptr<stask::Base<std::vector<FileRecordData>>> file_src_task;
@ -93,6 +116,13 @@ namespace din {
}
auto hashing = HashingTaskPtr(new stask::Hashing(file_src_task, false));
return hashing->get_or_create().front().hash;
std::vector<FileRecordData> hashes = hashing->get_or_create();
std::vector<HashNode> retval;
{
std::vector<mchlib::FileRecordNode> tree_indices = mchlib::make_filerecord_tree(hashes);
fill_hash_nodes(hashes, tree_indices, retval);
}
return retval;
}
} //namespace din

View file

@ -22,7 +22,12 @@
#include <vector>
namespace din {
mchlib::TigerHash hash ( const std::string& parPath );
struct HashNode {
mchlib::TigerHash hash;
std::vector<HashNode> children;
};
std::vector<HashNode> hash ( const std::string& parPath );
} //namespace din
#endif

View file

@ -49,6 +49,26 @@ namespace {
else
return dincore::split_tags(parVM["tags"].as<std::string>());
}
void collect_matching_recursively (
dindb::Backend& parDB,
const std::vector<din::HashNode>& parHashes,
const std::vector<boost::string_ref>& parTags,
std::vector<dindb::LocatedItem>& parOut
) {
for (auto& hash : parHashes) {
std::vector<dindb::LocatedItem> results = parDB.locate_in_db(hash.hash, parTags);
if (results.empty()) {
collect_matching_recursively(parDB, hash.children, parTags, parOut);
}
else {
assert(1 == results.size());
for (auto&& res : results) {
parOut.push_back(std::move(res));
}
}
}
}
} //unnamed namespace
int main (int parArgc, char* parArgv[]) {
@ -90,8 +110,8 @@ int main (int parArgc, char* parArgv[]) {
const std::vector<boost::string_ref> tags = extract_tags(vm);
if (vm.count("byhash")) {
const auto hash = din::hash(vm["substring"].as<std::string>());
results = db.locate_in_db(hash, tags);
const auto hashes = din::hash(vm["substring"].as<std::string>());
collect_matching_recursively(db, hashes, tags, results);
}
else {
const auto search_regex = g2r::convert(vm["substring"].as<std::string>(), not vm.count("case-insensitive"));