From 7e6d13b496c11335b5a6b3a4192455a287d9b93c Mon Sep 17 00:00:00 2001 From: King_DuckZ Date: Fri, 18 Aug 2017 00:22:34 +0100 Subject: [PATCH] Print matched items by hash. Only print as deep as needed. --- src/locate/hash.cpp | 40 +++++++++++++++++++++++++++++++++++----- src/locate/hash.hpp | 7 ++++++- src/locate/main.cpp | 24 ++++++++++++++++++++++-- 3 files changed, 63 insertions(+), 8 deletions(-) diff --git a/src/locate/hash.cpp b/src/locate/hash.cpp index ceb0bde..7b3c622 100644 --- a/src/locate/hash.cpp +++ b/src/locate/hash.cpp @@ -19,6 +19,7 @@ #include "dindexer-machinery/scantask/hashing.hpp" #include "dindexer-machinery/scantask/dirtree.hpp" #include "dindexer-machinery/recorddata.hpp" +#include "dindexer-machinery/make_filerecord_tree.hpp" #include #include #include @@ -71,16 +72,38 @@ namespace din { false )); } + + void fill_hash_nodes ( + const std::vector& parRefData, + const std::vector& parNodesIn, + std::vector& parNodesOut + ) { + const std::size_t sz = parNodesIn.size(); + parNodesOut.reserve(sz); + for (const auto& in : parNodesIn) { + assert(in.index < parRefData.size()); + const auto& data = parRefData[in.index]; + parNodesOut.push_back(HashNode{data.hash, {}}); + } + + assert(parNodesOut.size() == sz); + assert(parNodesIn.size() == sz); + for (std::size_t z = 0; z < sz; ++z) { + fill_hash_nodes(parRefData, parNodesIn[z].children, parNodesOut[z].children); + } + } } //unnamed namespace - mchlib::TigerHash hash (const std::string& parPath) { + std::vector hash (const std::string& parPath) { using mchlib::FileRecordData; using HashingTaskPtr = std::shared_ptr; struct stat path_stat; - const int retval = stat(parPath.c_str(), &path_stat); - if (retval) { - throw std::runtime_error("Can't access file \"" + parPath + "\""); + { + const int retval = stat(parPath.c_str(), &path_stat); + if (retval) { + throw std::runtime_error("Can't access file \"" + parPath + "\""); + } } std::shared_ptr>> file_src_task; @@ -93,6 +116,13 @@ namespace din { } auto hashing = HashingTaskPtr(new stask::Hashing(file_src_task, false)); - return hashing->get_or_create().front().hash; + std::vector hashes = hashing->get_or_create(); + + std::vector retval; + { + std::vector tree_indices = mchlib::make_filerecord_tree(hashes); + fill_hash_nodes(hashes, tree_indices, retval); + } + return retval; } } //namespace din diff --git a/src/locate/hash.hpp b/src/locate/hash.hpp index 314006c..22ee81b 100644 --- a/src/locate/hash.hpp +++ b/src/locate/hash.hpp @@ -22,7 +22,12 @@ #include namespace din { - mchlib::TigerHash hash ( const std::string& parPath ); + struct HashNode { + mchlib::TigerHash hash; + std::vector children; + }; + + std::vector hash ( const std::string& parPath ); } //namespace din #endif diff --git a/src/locate/main.cpp b/src/locate/main.cpp index 6e8fa91..6c058d4 100644 --- a/src/locate/main.cpp +++ b/src/locate/main.cpp @@ -49,6 +49,26 @@ namespace { else return dincore::split_tags(parVM["tags"].as()); } + + void collect_matching_recursively ( + dindb::Backend& parDB, + const std::vector& parHashes, + const std::vector& parTags, + std::vector& parOut + ) { + for (auto& hash : parHashes) { + std::vector results = parDB.locate_in_db(hash.hash, parTags); + if (results.empty()) { + collect_matching_recursively(parDB, hash.children, parTags, parOut); + } + else { + assert(1 == results.size()); + for (auto&& res : results) { + parOut.push_back(std::move(res)); + } + } + } + } } //unnamed namespace int main (int parArgc, char* parArgv[]) { @@ -90,8 +110,8 @@ int main (int parArgc, char* parArgv[]) { const std::vector tags = extract_tags(vm); if (vm.count("byhash")) { - const auto hash = din::hash(vm["substring"].as()); - results = db.locate_in_db(hash, tags); + const auto hashes = din::hash(vm["substring"].as()); + collect_matching_recursively(db, hashes, tags, results); } else { const auto search_regex = g2r::convert(vm["substring"].as(), not vm.count("case-insensitive"));