mirror of
https://github.com/KingDuckZ/dindexer.git
synced 2024-11-29 01:33:46 +00:00
Show which files were matched on the index when locating by hash.
Not sure why those changes were in my stash, I thought I had committed all this already.
This commit is contained in:
parent
42d6ad0eac
commit
0770ca704b
3 changed files with 45 additions and 16 deletions
|
@ -78,12 +78,14 @@ namespace din {
|
||||||
const std::vector<mchlib::FileRecordNode>& parNodesIn,
|
const std::vector<mchlib::FileRecordNode>& parNodesIn,
|
||||||
std::vector<din::HashNode>& parNodesOut
|
std::vector<din::HashNode>& parNodesOut
|
||||||
) {
|
) {
|
||||||
|
using std::string;
|
||||||
|
|
||||||
const std::size_t sz = parNodesIn.size();
|
const std::size_t sz = parNodesIn.size();
|
||||||
parNodesOut.reserve(sz);
|
parNodesOut.reserve(sz);
|
||||||
for (const auto& in : parNodesIn) {
|
for (const auto& in : parNodesIn) {
|
||||||
assert(in.index < parRefData.size());
|
assert(in.index < parRefData.size());
|
||||||
const auto& data = parRefData[in.index];
|
const auto& data = parRefData[in.index];
|
||||||
parNodesOut.push_back(HashNode{data.hash, {}});
|
parNodesOut.push_back(HashNode{string(data.path()), data.hash, {}});
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(parNodesOut.size() == sz);
|
assert(parNodesOut.size() == sz);
|
||||||
|
|
|
@ -20,9 +20,11 @@
|
||||||
|
|
||||||
#include "dindexer-machinery/tiger.hpp"
|
#include "dindexer-machinery/tiger.hpp"
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
namespace din {
|
namespace din {
|
||||||
struct HashNode {
|
struct HashNode {
|
||||||
|
std::string path;
|
||||||
mchlib::TigerHash hash;
|
mchlib::TigerHash hash;
|
||||||
std::vector<HashNode> children;
|
std::vector<HashNode> children;
|
||||||
};
|
};
|
||||||
|
|
|
@ -25,13 +25,9 @@
|
||||||
#include <ciso646>
|
#include <ciso646>
|
||||||
#include <iterator>
|
#include <iterator>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
#include <iterator>
|
||||||
|
|
||||||
namespace dindb {
|
namespace dindb {
|
||||||
std::ostream& operator<< (std::ostream& parStream, const LocatedItem& parItem) {
|
|
||||||
parStream << parItem.group_id << '\t' << parItem.id << '\t' << parItem.path;
|
|
||||||
return parStream;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::ostream& operator<< (std::ostream& parStream, const LocatedSet& parItem) {
|
std::ostream& operator<< (std::ostream& parStream, const LocatedSet& parItem) {
|
||||||
const decltype(parItem.dir_count) one = 1;
|
const decltype(parItem.dir_count) one = 1;
|
||||||
const auto dircount = std::max(parItem.dir_count, one) - one;
|
const auto dircount = std::max(parItem.dir_count, one) - one;
|
||||||
|
@ -43,6 +39,17 @@ namespace dindb {
|
||||||
} //namespace dindb
|
} //namespace dindb
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
struct LocatedItemInfo {
|
||||||
|
LocatedItemInfo (dindb::LocatedItem&& parItem, std::string&& parPath) :
|
||||||
|
item(std::move(parItem)),
|
||||||
|
local_path(std::move(parPath))
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
dindb::LocatedItem item;
|
||||||
|
std::string local_path;
|
||||||
|
};
|
||||||
|
|
||||||
std::vector<boost::string_ref> extract_tags (const boost::program_options::variables_map& parVM) {
|
std::vector<boost::string_ref> extract_tags (const boost::program_options::variables_map& parVM) {
|
||||||
if (not parVM.count("tags"))
|
if (not parVM.count("tags"))
|
||||||
return std::vector<boost::string_ref>();
|
return std::vector<boost::string_ref>();
|
||||||
|
@ -52,23 +59,34 @@ namespace {
|
||||||
|
|
||||||
void collect_matching_recursively (
|
void collect_matching_recursively (
|
||||||
dindb::Backend& parDB,
|
dindb::Backend& parDB,
|
||||||
const std::vector<din::HashNode>& parHashes,
|
std::vector<din::HashNode>&& parHashes,
|
||||||
const std::vector<boost::string_ref>& parTags,
|
const std::vector<boost::string_ref>& parTags,
|
||||||
std::vector<dindb::LocatedItem>& parOut
|
std::vector<LocatedItemInfo>& parOut
|
||||||
) {
|
) {
|
||||||
for (auto& hash : parHashes) {
|
for (auto&& hash : parHashes) {
|
||||||
std::vector<dindb::LocatedItem> results = parDB.locate_in_db(hash.hash, parTags);
|
std::vector<dindb::LocatedItem> results = parDB.locate_in_db(hash.hash, parTags);
|
||||||
if (results.empty()) {
|
if (results.empty()) {
|
||||||
collect_matching_recursively(parDB, hash.children, parTags, parOut);
|
collect_matching_recursively(parDB, std::move(hash.children), parTags, parOut);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
assert(1 == results.size());
|
assert(1 == results.size());
|
||||||
for (auto&& res : results) {
|
for (auto&& res : results) {
|
||||||
parOut.push_back(std::move(res));
|
parOut.push_back(LocatedItemInfo(std::move(res), std::move(hash.path)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::ostream& operator<< (std::ostream& parStream, const LocatedItemInfo& parItem) {
|
||||||
|
if (not parItem.local_path.empty())
|
||||||
|
parStream << '"' << parItem.local_path << "\" -->\t";
|
||||||
|
|
||||||
|
parStream << "group id: " << parItem.item.group_id << '\t' <<
|
||||||
|
"item id: " << parItem.item.id << '\t' <<
|
||||||
|
'"' << parItem.item.path << '"'
|
||||||
|
;
|
||||||
|
return parStream;
|
||||||
|
}
|
||||||
} //unnamed namespace
|
} //unnamed namespace
|
||||||
|
|
||||||
int main (int parArgc, char* parArgv[]) {
|
int main (int parArgc, char* parArgv[]) {
|
||||||
|
@ -106,18 +124,25 @@ int main (int parArgc, char* parArgv[]) {
|
||||||
std::copy(results.begin(), results.end(), std::ostream_iterator<dindb::LocatedSet>(std::cout, "\n"));
|
std::copy(results.begin(), results.end(), std::ostream_iterator<dindb::LocatedSet>(std::cout, "\n"));
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
std::vector<dindb::LocatedItem> results;
|
std::vector<LocatedItemInfo> results;
|
||||||
const std::vector<boost::string_ref> tags = extract_tags(vm);
|
const std::vector<boost::string_ref> tags = extract_tags(vm);
|
||||||
|
|
||||||
if (vm.count("byhash")) {
|
if (vm.count("byhash")) {
|
||||||
const auto hashes = din::hash(vm["substring"].as<std::string>());
|
auto hashes = din::hash(vm["substring"].as<std::string>());
|
||||||
collect_matching_recursively(db, hashes, tags, results);
|
collect_matching_recursively(db, std::move(hashes), tags, results);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
const auto search_regex = g2r::convert(vm["substring"].as<std::string>(), not vm.count("case-insensitive"));
|
const auto search_regex = g2r::convert(vm["substring"].as<std::string>(), not vm.count("case-insensitive"));
|
||||||
results = db.locate_in_db(search_regex, tags);
|
auto located_items(db.locate_in_db(search_regex, tags));
|
||||||
|
results.reserve(located_items.size());
|
||||||
|
std::transform(
|
||||||
|
std::make_move_iterator(located_items.begin()),
|
||||||
|
std::make_move_iterator(located_items.end()),
|
||||||
|
std::back_inserter(results),
|
||||||
|
[](dindb::LocatedItem&& itm) { return LocatedItemInfo(std::move(itm), std::string()); }
|
||||||
|
);
|
||||||
}
|
}
|
||||||
std::copy(results.begin(), results.end(), std::ostream_iterator<dindb::LocatedItem>(std::cout, "\n"));
|
std::copy(results.begin(), results.end(), std::ostream_iterator<LocatedItemInfo>(std::cout, "\n"));
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue