mirror of
https://github.com/KingDuckZ/dindexer.git
synced 2024-11-25 00:53:43 +00:00
Print matched items by hash. Only print as deep as needed.
This commit is contained in:
parent
a793f7b289
commit
7e6d13b496
3 changed files with 63 additions and 8 deletions
|
@ -19,6 +19,7 @@
|
|||
#include "dindexer-machinery/scantask/hashing.hpp"
|
||||
#include "dindexer-machinery/scantask/dirtree.hpp"
|
||||
#include "dindexer-machinery/recorddata.hpp"
|
||||
#include "dindexer-machinery/make_filerecord_tree.hpp"
|
||||
#include <memory>
|
||||
#include <sys/stat.h>
|
||||
#include <stdexcept>
|
||||
|
@ -71,16 +72,38 @@ namespace din {
|
|||
false
|
||||
));
|
||||
}
|
||||
|
||||
void fill_hash_nodes (
|
||||
const std::vector<mchlib::FileRecordData>& parRefData,
|
||||
const std::vector<mchlib::FileRecordNode>& parNodesIn,
|
||||
std::vector<din::HashNode>& parNodesOut
|
||||
) {
|
||||
const std::size_t sz = parNodesIn.size();
|
||||
parNodesOut.reserve(sz);
|
||||
for (const auto& in : parNodesIn) {
|
||||
assert(in.index < parRefData.size());
|
||||
const auto& data = parRefData[in.index];
|
||||
parNodesOut.push_back(HashNode{data.hash, {}});
|
||||
}
|
||||
|
||||
assert(parNodesOut.size() == sz);
|
||||
assert(parNodesIn.size() == sz);
|
||||
for (std::size_t z = 0; z < sz; ++z) {
|
||||
fill_hash_nodes(parRefData, parNodesIn[z].children, parNodesOut[z].children);
|
||||
}
|
||||
}
|
||||
} //unnamed namespace
|
||||
|
||||
mchlib::TigerHash hash (const std::string& parPath) {
|
||||
std::vector<HashNode> hash (const std::string& parPath) {
|
||||
using mchlib::FileRecordData;
|
||||
using HashingTaskPtr = std::shared_ptr<stask::Hashing>;
|
||||
|
||||
struct stat path_stat;
|
||||
const int retval = stat(parPath.c_str(), &path_stat);
|
||||
if (retval) {
|
||||
throw std::runtime_error("Can't access file \"" + parPath + "\"");
|
||||
{
|
||||
const int retval = stat(parPath.c_str(), &path_stat);
|
||||
if (retval) {
|
||||
throw std::runtime_error("Can't access file \"" + parPath + "\"");
|
||||
}
|
||||
}
|
||||
|
||||
std::shared_ptr<stask::Base<std::vector<FileRecordData>>> file_src_task;
|
||||
|
@ -93,6 +116,13 @@ namespace din {
|
|||
}
|
||||
|
||||
auto hashing = HashingTaskPtr(new stask::Hashing(file_src_task, false));
|
||||
return hashing->get_or_create().front().hash;
|
||||
std::vector<FileRecordData> hashes = hashing->get_or_create();
|
||||
|
||||
std::vector<HashNode> retval;
|
||||
{
|
||||
std::vector<mchlib::FileRecordNode> tree_indices = mchlib::make_filerecord_tree(hashes);
|
||||
fill_hash_nodes(hashes, tree_indices, retval);
|
||||
}
|
||||
return retval;
|
||||
}
|
||||
} //namespace din
|
||||
|
|
|
@ -22,7 +22,12 @@
|
|||
#include <vector>
|
||||
|
||||
namespace din {
|
||||
mchlib::TigerHash hash ( const std::string& parPath );
|
||||
struct HashNode {
|
||||
mchlib::TigerHash hash;
|
||||
std::vector<HashNode> children;
|
||||
};
|
||||
|
||||
std::vector<HashNode> hash ( const std::string& parPath );
|
||||
} //namespace din
|
||||
|
||||
#endif
|
||||
|
|
|
@ -49,6 +49,26 @@ namespace {
|
|||
else
|
||||
return dincore::split_tags(parVM["tags"].as<std::string>());
|
||||
}
|
||||
|
||||
void collect_matching_recursively (
|
||||
dindb::Backend& parDB,
|
||||
const std::vector<din::HashNode>& parHashes,
|
||||
const std::vector<boost::string_ref>& parTags,
|
||||
std::vector<dindb::LocatedItem>& parOut
|
||||
) {
|
||||
for (auto& hash : parHashes) {
|
||||
std::vector<dindb::LocatedItem> results = parDB.locate_in_db(hash.hash, parTags);
|
||||
if (results.empty()) {
|
||||
collect_matching_recursively(parDB, hash.children, parTags, parOut);
|
||||
}
|
||||
else {
|
||||
assert(1 == results.size());
|
||||
for (auto&& res : results) {
|
||||
parOut.push_back(std::move(res));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} //unnamed namespace
|
||||
|
||||
int main (int parArgc, char* parArgv[]) {
|
||||
|
@ -90,8 +110,8 @@ int main (int parArgc, char* parArgv[]) {
|
|||
const std::vector<boost::string_ref> tags = extract_tags(vm);
|
||||
|
||||
if (vm.count("byhash")) {
|
||||
const auto hash = din::hash(vm["substring"].as<std::string>());
|
||||
results = db.locate_in_db(hash, tags);
|
||||
const auto hashes = din::hash(vm["substring"].as<std::string>());
|
||||
collect_matching_recursively(db, hashes, tags, results);
|
||||
}
|
||||
else {
|
||||
const auto search_regex = g2r::convert(vm["substring"].as<std::string>(), not vm.count("case-insensitive"));
|
||||
|
|
Loading…
Reference in a new issue