mirror of
https://github.com/KingDuckZ/dindexer.git
synced 2025-07-03 14:14:11 +00:00
Extract the boost::copy_range part into a new function.
By making a more generic function, both locate functions can use the same code now.
This commit is contained in:
parent
afb2e68849
commit
bcfd1287c9
1 changed files with 62 additions and 47 deletions
|
@ -21,15 +21,21 @@
|
||||||
#include "dindexerConfig.h"
|
#include "dindexerConfig.h"
|
||||||
#include "dindexer-core/split_tags.hpp"
|
#include "dindexer-core/split_tags.hpp"
|
||||||
#include "dindexer-machinery/tiger.hpp"
|
#include "dindexer-machinery/tiger.hpp"
|
||||||
|
#include "helpers/compatibility.h"
|
||||||
#include <boost/regex.hpp>
|
#include <boost/regex.hpp>
|
||||||
#include <ciso646>
|
#include <ciso646>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <boost/range/adaptor/filtered.hpp>
|
#include <boost/range/adaptor/filtered.hpp>
|
||||||
#include <boost/range/adaptor/transformed.hpp>
|
#include <boost/range/adaptor/transformed.hpp>
|
||||||
#include <boost/iterator/zip_iterator.hpp>
|
#include <boost/iterator/zip_iterator.hpp>
|
||||||
|
#include <functional>
|
||||||
|
#include <iterator>
|
||||||
|
|
||||||
namespace dindb {
|
namespace dindb {
|
||||||
namespace {
|
namespace {
|
||||||
|
inline std::string to_std_string ( boost::string_ref parStr ) a_always_inline;
|
||||||
|
inline void concatenate ( std::vector<LocatedItem>&& parAppend, std::vector<LocatedItem>& parOut ) a_always_inline;
|
||||||
|
|
||||||
bool all_tags_match (const TagList& parTags, const std::string& parTaglist) {
|
bool all_tags_match (const TagList& parTags, const std::string& parTaglist) {
|
||||||
const auto tags = dincore::split_tags(parTaglist);
|
const auto tags = dincore::split_tags(parTaglist);
|
||||||
|
|
||||||
|
@ -46,27 +52,6 @@ namespace dindb {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void store_matching_paths (redis::IncRedisBatch& parBatch, std::vector<LocatedItem>& parOut, std::vector<FileIDType>& parIDs, const boost::regex& parSearch, const TagList& parTags) {
|
|
||||||
using dinhelp::lexical_cast;
|
|
||||||
assert(parIDs.size() == parBatch.replies().size());
|
|
||||||
|
|
||||||
parBatch.throw_if_failed();
|
|
||||||
std::size_t id_index = 0;
|
|
||||||
for (const auto& itm : parBatch.replies()) {
|
|
||||||
const auto reply = redis::get_array(itm);
|
|
||||||
const auto& path = redis::get_string(reply[0]);
|
|
||||||
|
|
||||||
if (boost::regex_search(path, parSearch)) {
|
|
||||||
if (parTags.empty() or all_tags_match(parTags, redis::get_string(reply[2]))) {
|
|
||||||
const auto group_id = lexical_cast<GroupIDType>(redis::get_string(reply[1]));
|
|
||||||
parOut.push_back(LocatedItem{path, parIDs[id_index], group_id});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
assert(id_index < parIDs.size());
|
|
||||||
++id_index;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//See: http://stackoverflow.com/questions/12552277/whats-the-best-way-to-iterate-over-two-or-more-containers-simultaneously/12553437#12553437
|
//See: http://stackoverflow.com/questions/12552277/whats-the-best-way-to-iterate-over-two-or-more-containers-simultaneously/12553437#12553437
|
||||||
//(referenced from http://stackoverflow.com/questions/16982190/c-use-boost-range-transformed-adaptor-with-binary-function)
|
//(referenced from http://stackoverflow.com/questions/16982190/c-use-boost-range-transformed-adaptor-with-binary-function)
|
||||||
//What became of this? http://marc.info/?l=boost-users&m=129619765731342
|
//What became of this? http://marc.info/?l=boost-users&m=129619765731342
|
||||||
|
@ -80,6 +65,45 @@ namespace dindb {
|
||||||
boost::make_zip_iterator(boost::make_tuple(parConts.end()...))
|
boost::make_zip_iterator(boost::make_tuple(parConts.end()...))
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::vector<LocatedItem> store_filtered_items (
|
||||||
|
const std::vector<redis::Reply>& parReplies,
|
||||||
|
const std::vector<std::string>& parIDs,
|
||||||
|
std::function<bool(const boost::tuple<std::vector<redis::Reply>, std::string>&)> parFilter
|
||||||
|
) {
|
||||||
|
using boost::adaptors::filtered;
|
||||||
|
using boost::adaptors::transformed;
|
||||||
|
using boost::tuple;
|
||||||
|
using boost::make_tuple;
|
||||||
|
using redis::get_string;
|
||||||
|
using redis::Reply;
|
||||||
|
using std::vector;
|
||||||
|
using dinhelp::lexical_cast;
|
||||||
|
|
||||||
|
assert(parReplies.size() == parIDs.size());
|
||||||
|
return boost::copy_range<vector<LocatedItem>>(
|
||||||
|
zip_range(parReplies, parIDs) |
|
||||||
|
transformed([](const tuple<Reply, std::string>& r) {
|
||||||
|
return make_tuple(redis::get_array(r.get<0>()), r.get<1>());
|
||||||
|
}) |
|
||||||
|
filtered(parFilter) |
|
||||||
|
transformed([](const tuple<vector<Reply>, std::string>& t) {
|
||||||
|
return LocatedItem{
|
||||||
|
get_string(t.get<0>()[0]),
|
||||||
|
lexical_cast<FileIDType>(t.get<1>()),
|
||||||
|
lexical_cast<GroupIDType>(get_string(t.get<0>()[1]))
|
||||||
|
};
|
||||||
|
})
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string to_std_string (boost::string_ref parStr) {
|
||||||
|
return std::string(parStr.data(), parStr.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
void concatenate (std::vector<LocatedItem>&& parAppend, std::vector<LocatedItem>& parOut) {
|
||||||
|
parOut.insert(parOut.end(), std::make_move_iterator(parAppend.begin()), std::make_move_iterator(parAppend.end()));
|
||||||
|
}
|
||||||
} //unnamed namespace
|
} //unnamed namespace
|
||||||
|
|
||||||
std::vector<GroupIDType> find_all_sets (redis::IncRedis& parRedis) {
|
std::vector<GroupIDType> find_all_sets (redis::IncRedis& parRedis) {
|
||||||
|
@ -101,42 +125,39 @@ namespace dindb {
|
||||||
const int prefetch_count = 500;
|
const int prefetch_count = 500;
|
||||||
|
|
||||||
std::vector<LocatedItem> retval;
|
std::vector<LocatedItem> retval;
|
||||||
std::vector<FileIDType> ids;
|
std::vector<std::string> ids;
|
||||||
ids.reserve(prefetch_count);
|
ids.reserve(prefetch_count);
|
||||||
|
|
||||||
|
auto filter = [&parTags, &search](const boost::tuple<std::vector<redis::Reply>, std::string>& t) {
|
||||||
|
return (parTags.empty() or all_tags_match(parTags, redis::get_string(t.get<0>()[2]))) and
|
||||||
|
boost::regex_search(redis::get_string(t.get<0>()[0]), search);
|
||||||
|
};
|
||||||
|
|
||||||
int curr_count = 0;
|
int curr_count = 0;
|
||||||
auto batch = parRedis.make_batch();
|
auto batch = parRedis.make_batch();
|
||||||
for (const auto& itm : parRedis.scan(PROGRAM_NAME ":file:*")) {
|
for (const auto& itm : parRedis.scan(PROGRAM_NAME ":file:*")) {
|
||||||
++curr_count;
|
++curr_count;
|
||||||
batch.hmget(itm, "path", "group_id", "tags");
|
batch.hmget(itm, "path", "group_id", "tags");
|
||||||
ids.push_back(lexical_cast<FileIDType>(split_and_trim(itm, ':').back()));
|
ids.push_back(to_std_string(split_and_trim(itm, ':').back()));
|
||||||
|
|
||||||
if (curr_count == prefetch_count) {
|
if (curr_count == prefetch_count) {
|
||||||
store_matching_paths(batch, retval, ids, search, parTags);
|
concatenate(store_filtered_items(batch.replies(), ids, filter), retval);
|
||||||
batch.reset();
|
batch.reset();
|
||||||
curr_count = 0;
|
curr_count = 0;
|
||||||
ids.clear();
|
ids.clear();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (curr_count)
|
if (curr_count)
|
||||||
store_matching_paths(batch, retval, ids, search, parTags);
|
concatenate(store_filtered_items(batch.replies(), ids, filter), retval);
|
||||||
|
|
||||||
return retval;
|
return retval;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<LocatedItem> locate_in_db (redis::IncRedis& parRedis, const mchlib::TigerHash& parSearch, const TagList& parTags) {
|
std::vector<LocatedItem> locate_in_db (redis::IncRedis& parRedis, const mchlib::TigerHash& parSearch, const TagList& parTags) {
|
||||||
using boost::adaptors::filtered;
|
|
||||||
using boost::adaptors::transformed;
|
|
||||||
using boost::tuple;
|
|
||||||
using boost::make_tuple;
|
|
||||||
using redis::get_string;
|
|
||||||
using redis::Reply;
|
|
||||||
using std::vector;
|
|
||||||
using dinhelp::lexical_cast;
|
|
||||||
|
|
||||||
const auto hash_key = PROGRAM_NAME ":hash:" + mchlib::tiger_to_string(parSearch, false);
|
const auto hash_key = PROGRAM_NAME ":hash:" + mchlib::tiger_to_string(parSearch, false);
|
||||||
const auto file_ids = parRedis.smembers(hash_key);
|
const auto file_ids = parRedis.smembers(hash_key);
|
||||||
|
|
||||||
vector<std::string> ids;
|
std::vector<std::string> ids;
|
||||||
if (file_ids) {
|
if (file_ids) {
|
||||||
auto batch = parRedis.make_batch();
|
auto batch = parRedis.make_batch();
|
||||||
for (auto&& file_id : *file_ids) {
|
for (auto&& file_id : *file_ids) {
|
||||||
|
@ -149,18 +170,12 @@ namespace dindb {
|
||||||
}
|
}
|
||||||
batch.throw_if_failed();
|
batch.throw_if_failed();
|
||||||
|
|
||||||
assert(batch.replies().size() == ids.size());
|
return store_filtered_items(
|
||||||
return boost::copy_range<vector<LocatedItem>>(
|
batch.replies(),
|
||||||
zip_range(batch.replies(), ids) |
|
ids,
|
||||||
transformed([](const tuple<Reply, std::string>& r)
|
[&parTags](const boost::tuple<std::vector<redis::Reply>, std::string>& t) {
|
||||||
{ return make_tuple(redis::get_array(r.get<0>()), r.get<1>()); }
|
return parTags.empty() or all_tags_match(parTags, redis::get_string(t.get<0>()[2]));
|
||||||
) |
|
}
|
||||||
filtered([&parTags](const tuple<vector<Reply>, std::string>& t)
|
|
||||||
{ return parTags.empty() or all_tags_match(parTags, get_string(t.get<0>()[2])); }
|
|
||||||
) |
|
|
||||||
transformed([&ids](const tuple<vector<Reply>, std::string>& t)
|
|
||||||
{ return LocatedItem{ get_string(t.get<0>()[0]), lexical_cast<FileIDType>(t.get<1>()), lexical_cast<GroupIDType>(get_string(t.get<0>()[1])) }; }
|
|
||||||
)
|
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue