mirror of
https://github.com/KingDuckZ/dindexer.git
synced 2025-08-06 13:19:47 +00:00
Implement locate_sets_in_db().
This commit is contained in:
parent
bcfd1287c9
commit
e0eaad2587
4 changed files with 102 additions and 10 deletions
|
@ -179,7 +179,8 @@ namespace dindb {
|
||||||
"type", parSetData.type,
|
"type", parSetData.type,
|
||||||
"content_type", parSetData.content_type,
|
"content_type", parSetData.content_type,
|
||||||
"base_file_id", lexical_cast<std::string>(base_file_id),
|
"base_file_id", lexical_cast<std::string>(base_file_id),
|
||||||
"file_count", lexical_cast<std::string>(parData.size())
|
"item_count", lexical_cast<std::string>(parData.size()),
|
||||||
|
"dir_count", lexical_cast<std::string>(std::count_if(parData.begin(), parData.end(), [](const mchlib::FileRecordData& r){return r.is_directory;}))
|
||||||
);
|
);
|
||||||
|
|
||||||
#if !defined(NDEBUG)
|
#if !defined(NDEBUG)
|
||||||
|
@ -254,7 +255,7 @@ namespace dindb {
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<LocatedSet> BackendRedis::locate_sets_in_db (const std::string& parSearch, bool parCaseInsensitive) {
|
std::vector<LocatedSet> BackendRedis::locate_sets_in_db (const std::string& parSearch, bool parCaseInsensitive) {
|
||||||
return std::vector<LocatedSet>();
|
return dindb::locate_sets_in_db(m_redis, parSearch, parCaseInsensitive);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<LocatedSet> BackendRedis::locate_sets_in_db (const std::string& parSearch, const std::vector<GroupIDType>& parSets, bool parCaseInsensitive) {
|
std::vector<LocatedSet> BackendRedis::locate_sets_in_db (const std::string& parSearch, const std::vector<GroupIDType>& parSets, bool parCaseInsensitive) {
|
||||||
|
|
|
@ -37,7 +37,7 @@ namespace dindb {
|
||||||
|
|
||||||
for (auto id : parIDs) {
|
for (auto id : parIDs) {
|
||||||
const auto set_key = PROGRAM_NAME ":set:" + lexical_cast<std::string>(id);
|
const auto set_key = PROGRAM_NAME ":set:" + lexical_cast<std::string>(id);
|
||||||
parBatch.hmget(set_key, "base_file_id", "file_count", "name");
|
parBatch.hmget(set_key, "base_file_id", "item_count", "name");
|
||||||
}
|
}
|
||||||
|
|
||||||
std::map<GroupIDType, std::string> set_dele_list;
|
std::map<GroupIDType, std::string> set_dele_list;
|
||||||
|
|
|
@ -30,11 +30,15 @@
|
||||||
#include <boost/iterator/zip_iterator.hpp>
|
#include <boost/iterator/zip_iterator.hpp>
|
||||||
#include <functional>
|
#include <functional>
|
||||||
#include <iterator>
|
#include <iterator>
|
||||||
|
#include <cstdint>
|
||||||
|
|
||||||
namespace dindb {
|
namespace dindb {
|
||||||
namespace {
|
namespace {
|
||||||
|
const int g_prefetch_count = 500;
|
||||||
|
|
||||||
inline std::string to_std_string ( boost::string_ref parStr ) a_always_inline;
|
inline std::string to_std_string ( boost::string_ref parStr ) a_always_inline;
|
||||||
inline void concatenate ( std::vector<LocatedItem>&& parAppend, std::vector<LocatedItem>& parOut ) a_always_inline;
|
template <typename T>
|
||||||
|
inline void concatenate ( std::vector<T>&& parAppend, std::vector<T>& parOut ) a_always_inline;
|
||||||
|
|
||||||
bool all_tags_match (const TagList& parTags, const std::string& parTaglist) {
|
bool all_tags_match (const TagList& parTags, const std::string& parTaglist) {
|
||||||
const auto tags = dincore::split_tags(parTaglist);
|
const auto tags = dincore::split_tags(parTaglist);
|
||||||
|
@ -80,6 +84,8 @@ namespace dindb {
|
||||||
using std::vector;
|
using std::vector;
|
||||||
using dinhelp::lexical_cast;
|
using dinhelp::lexical_cast;
|
||||||
|
|
||||||
|
//Reply is expected to contain: "path", "group_id", "tags"
|
||||||
|
|
||||||
assert(parReplies.size() == parIDs.size());
|
assert(parReplies.size() == parIDs.size());
|
||||||
return boost::copy_range<vector<LocatedItem>>(
|
return boost::copy_range<vector<LocatedItem>>(
|
||||||
zip_range(parReplies, parIDs) |
|
zip_range(parReplies, parIDs) |
|
||||||
|
@ -97,11 +103,49 @@ namespace dindb {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::vector<LocatedSet> store_filtered_sets (
|
||||||
|
const std::vector<redis::Reply>& parReplies,
|
||||||
|
const std::vector<std::string>& parIDs,
|
||||||
|
std::function<bool(const boost::tuple<std::vector<redis::Reply>, std::string>&)> parFilter
|
||||||
|
) {
|
||||||
|
using boost::adaptors::filtered;
|
||||||
|
using boost::adaptors::transformed;
|
||||||
|
using boost::tuple;
|
||||||
|
using boost::make_tuple;
|
||||||
|
using redis::get_string;
|
||||||
|
using redis::Reply;
|
||||||
|
using std::vector;
|
||||||
|
using dinhelp::lexical_cast;
|
||||||
|
|
||||||
|
//Reply is expected to contain: "desc", "item_count", "dir_count"
|
||||||
|
|
||||||
|
assert(parReplies.size() == parIDs.size());
|
||||||
|
return boost::copy_range<vector<LocatedSet>>(
|
||||||
|
zip_range(parReplies, parIDs) |
|
||||||
|
transformed([](const tuple<Reply, std::string>& r) {
|
||||||
|
return make_tuple(redis::get_array(r.get<0>()), r.get<1>());
|
||||||
|
}) |
|
||||||
|
filtered(parFilter) |
|
||||||
|
transformed([](const tuple<vector<Reply>, std::string>& t) {
|
||||||
|
const auto itm_count = lexical_cast<uint32_t>(get_string(t.get<0>()[1]));
|
||||||
|
const auto dir_count = lexical_cast<uint32_t>(get_string(t.get<0>()[2]));
|
||||||
|
assert(dir_count <= itm_count);
|
||||||
|
return LocatedSet{
|
||||||
|
get_string(t.get<0>()[0]),
|
||||||
|
lexical_cast<GroupIDType>(t.get<1>()),
|
||||||
|
itm_count - dir_count,
|
||||||
|
dir_count
|
||||||
|
};
|
||||||
|
})
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
std::string to_std_string (boost::string_ref parStr) {
|
std::string to_std_string (boost::string_ref parStr) {
|
||||||
return std::string(parStr.data(), parStr.size());
|
return std::string(parStr.data(), parStr.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
void concatenate (std::vector<LocatedItem>&& parAppend, std::vector<LocatedItem>& parOut) {
|
template <typename T>
|
||||||
|
void concatenate (std::vector<T>&& parAppend, std::vector<T>& parOut) {
|
||||||
parOut.insert(parOut.end(), std::make_move_iterator(parAppend.begin()), std::make_move_iterator(parAppend.end()));
|
parOut.insert(parOut.end(), std::make_move_iterator(parAppend.begin()), std::make_move_iterator(parAppend.end()));
|
||||||
}
|
}
|
||||||
} //unnamed namespace
|
} //unnamed namespace
|
||||||
|
@ -117,12 +161,11 @@ namespace dindb {
|
||||||
return retval;
|
return retval;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<LocatedItem> locate_in_db (redis::IncRedis& parRedis, const std::string& parSearch, const TagList& parTags) {
|
std::vector<LocatedItem> locate_in_db (redis::IncRedis& parRedis, const std::string& parRegex, const TagList& parTags) {
|
||||||
using dincore::split_and_trim;
|
using dincore::split_and_trim;
|
||||||
using dinhelp::lexical_cast;
|
|
||||||
|
|
||||||
const boost::regex search(parSearch, boost::regex_constants::optimize | boost::regex_constants::nosubs | boost::regex_constants::perl);
|
const boost::regex search(parRegex, boost::regex_constants::optimize | boost::regex_constants::nosubs | boost::regex_constants::perl);
|
||||||
const int prefetch_count = 500;
|
const int prefetch_count = g_prefetch_count;
|
||||||
|
|
||||||
std::vector<LocatedItem> retval;
|
std::vector<LocatedItem> retval;
|
||||||
std::vector<std::string> ids;
|
std::vector<std::string> ids;
|
||||||
|
@ -182,4 +225,51 @@ namespace dindb {
|
||||||
return std::vector<LocatedItem>();
|
return std::vector<LocatedItem>();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::vector<LocatedSet> locate_sets_in_db (redis::IncRedis& parRedis, const std::string& parSubstr, bool parCaseInsensitive) {
|
||||||
|
using dincore::split_and_trim;
|
||||||
|
|
||||||
|
const int prefetch_count = g_prefetch_count;
|
||||||
|
std::vector<LocatedSet> retval;
|
||||||
|
std::vector<std::string> set_ids;
|
||||||
|
set_ids.reserve(prefetch_count);
|
||||||
|
|
||||||
|
auto filter_case_ins = [&parSubstr](const boost::tuple<std::vector<redis::Reply>, std::string>& t) {
|
||||||
|
const auto& s = redis::get_string(t.get<0>()[0]);
|
||||||
|
return s.end() != std::search(
|
||||||
|
s.begin(),
|
||||||
|
s.end(),
|
||||||
|
parSubstr.begin(),
|
||||||
|
parSubstr.end(),
|
||||||
|
[](char c1, char c2) {return std::toupper(c1) == std::toupper(c2);}
|
||||||
|
);
|
||||||
|
};
|
||||||
|
auto filter_case_sens = [&parSubstr](const boost::tuple<std::vector<redis::Reply>, std::string>& t) {
|
||||||
|
return redis::get_string(t.get<0>()[0]).find(parSubstr) != std::string::npos;
|
||||||
|
};
|
||||||
|
std::function<bool(const boost::tuple<std::vector<redis::Reply>, std::string>&)> filter;
|
||||||
|
if (parCaseInsensitive)
|
||||||
|
filter = filter_case_ins;
|
||||||
|
else
|
||||||
|
filter = filter_case_sens;
|
||||||
|
|
||||||
|
int curr_count = 0;
|
||||||
|
auto batch = parRedis.make_batch();
|
||||||
|
for (const auto& itm : parRedis.scan(PROGRAM_NAME ":set:*")) {
|
||||||
|
++curr_count;
|
||||||
|
batch.hmget(itm, "desc", "item_count", "dir_count");
|
||||||
|
set_ids.push_back(to_std_string(split_and_trim(itm, ':').back()));
|
||||||
|
|
||||||
|
if (curr_count == prefetch_count) {
|
||||||
|
concatenate(store_filtered_sets(batch.replies(), set_ids, filter), retval);
|
||||||
|
batch.reset();
|
||||||
|
curr_count = 0;
|
||||||
|
set_ids.clear();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (curr_count)
|
||||||
|
concatenate(store_filtered_sets(batch.replies(), set_ids, filter), retval);
|
||||||
|
|
||||||
|
return retval;
|
||||||
|
}
|
||||||
} //namespace dindb
|
} //namespace dindb
|
||||||
|
|
|
@ -27,8 +27,9 @@ namespace redis {
|
||||||
|
|
||||||
namespace dindb {
|
namespace dindb {
|
||||||
std::vector<GroupIDType> find_all_sets ( redis::IncRedis& parRedis );
|
std::vector<GroupIDType> find_all_sets ( redis::IncRedis& parRedis );
|
||||||
std::vector<LocatedItem> locate_in_db ( redis::IncRedis& parRedis, const std::string& parSearch, const TagList& parTags );
|
std::vector<LocatedItem> locate_in_db ( redis::IncRedis& parRedis, const std::string& parRegex, const TagList& parTags );
|
||||||
std::vector<LocatedItem> locate_in_db ( redis::IncRedis& parRedis, const mchlib::TigerHash& parSearch, const TagList& parTags );
|
std::vector<LocatedItem> locate_in_db ( redis::IncRedis& parRedis, const mchlib::TigerHash& parSearch, const TagList& parTags );
|
||||||
|
std::vector<LocatedSet> locate_sets_in_db ( redis::IncRedis& parRedis, const std::string& parSubstr, bool parCaseInsensitive );
|
||||||
} //namespace dindb
|
} //namespace dindb
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue