mirror of
https://github.com/KingDuckZ/dindexer.git
synced 2025-07-02 14:04:22 +00:00
Keep paths in a string pool.
This commit is contained in:
parent
109ce9b82a
commit
485796db48
5 changed files with 237 additions and 42 deletions
|
@ -52,6 +52,14 @@ int main (int parArgc, char* parArgv[]) {
|
||||||
std::cout << pn4.path() << '\n';
|
std::cout << pn4.path() << '\n';
|
||||||
|
|
||||||
pn1.join(pn2);
|
pn1.join(pn2);
|
||||||
|
pn1.join("..");
|
||||||
|
pn1.join("..");
|
||||||
|
pn1.join("..");
|
||||||
|
pn1.join("..");
|
||||||
|
pn1.join("..");
|
||||||
|
pn1.join("..");
|
||||||
|
pn1.join("..");
|
||||||
|
pn1.join("code");
|
||||||
std::cout << pn1.path() << '\n';
|
std::cout << pn1.path() << '\n';
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,9 +20,9 @@
|
||||||
#include <functional>
|
#include <functional>
|
||||||
#include <ciso646>
|
#include <ciso646>
|
||||||
|
|
||||||
#include <iostream>
|
|
||||||
|
|
||||||
namespace din {
|
namespace din {
|
||||||
|
const std::string PathName::m_empty_str("");
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
bool ptr_between (const char* parPtr, const char* parBeg, const char* parEnd) {
|
bool ptr_between (const char* parPtr, const char* parBeg, const char* parEnd) {
|
||||||
std::less<const char*> less;
|
std::less<const char*> less;
|
||||||
|
@ -31,8 +31,17 @@ namespace din {
|
||||||
return lesseq(parBeg, parPtr) and less(parPtr, parEnd);
|
return lesseq(parBeg, parPtr) and less(parPtr, parEnd);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::size_t count_grouped (boost::string_ref parIn, char parDelim) {
|
||||||
|
std::size_t retval = 0;
|
||||||
|
char prev = '\0';
|
||||||
|
for (auto c : parIn) {
|
||||||
|
retval += (parDelim == c and prev != parDelim ? 1 : 0);
|
||||||
|
prev = c;
|
||||||
|
}
|
||||||
|
return retval;
|
||||||
|
}
|
||||||
|
|
||||||
void split_path (std::vector<boost::string_ref>* parOut, boost::string_ref parPath) {
|
void split_path (std::vector<boost::string_ref>* parOut, boost::string_ref parPath) {
|
||||||
parOut->clear();
|
|
||||||
auto from = parPath.begin();
|
auto from = parPath.begin();
|
||||||
boost::string_ref::const_iterator next;
|
boost::string_ref::const_iterator next;
|
||||||
const auto end = parPath.end();
|
const auto end = parPath.end();
|
||||||
|
@ -50,17 +59,27 @@ namespace din {
|
||||||
}
|
}
|
||||||
} //unnamed namespace
|
} //unnamed namespace
|
||||||
|
|
||||||
PathName::PathName (const char* parPath) :
|
PathName::PathName (const char* parPath) {
|
||||||
m_original_path(parPath)
|
if (nullptr != parPath && *parPath != '\0') {
|
||||||
{
|
|
||||||
if (not m_original_path.empty()) {
|
|
||||||
m_absolute = ('/' == *parPath);
|
m_absolute = ('/' == *parPath);
|
||||||
split_path(&m_atoms, m_original_path);
|
std::string path(parPath);
|
||||||
|
|
||||||
|
const auto count = count_grouped(path, '/');
|
||||||
|
const std::size_t trailing = (path.back() == '/' ? 1 : 0);
|
||||||
|
const std::size_t absolute = (m_absolute ? 1 : 0);
|
||||||
|
const auto res = count + 1 - trailing - absolute;
|
||||||
|
std::vector<boost::string_ref> atoms;
|
||||||
|
atoms.reserve(res);
|
||||||
|
split_path(&atoms, path);
|
||||||
|
m_pool.insert(atoms, &path);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
m_original_path = nullptr;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string PathName::path() const {
|
std::string PathName::path() const {
|
||||||
if (m_atoms.empty()) {
|
if (m_pool.empty()) {
|
||||||
if (m_absolute) {
|
if (m_absolute) {
|
||||||
return std::string("/");
|
return std::string("/");
|
||||||
}
|
}
|
||||||
|
@ -70,15 +89,15 @@ namespace din {
|
||||||
}
|
}
|
||||||
|
|
||||||
std::size_t reserve = (m_absolute ? 1 : 0);
|
std::size_t reserve = (m_absolute ? 1 : 0);
|
||||||
for (const auto& itm : m_atoms) {
|
for (const auto& itm : m_pool) {
|
||||||
reserve += itm.size();
|
reserve += itm.size();
|
||||||
}
|
}
|
||||||
reserve += m_atoms.size() - 1;
|
reserve += m_pool.size() - 1;
|
||||||
|
|
||||||
std::string out;
|
std::string out;
|
||||||
out.reserve(reserve);
|
out.reserve(reserve);
|
||||||
const char* slash = (m_absolute ? "/" : "");
|
const char* slash = (m_absolute ? "/" : "");
|
||||||
for (const auto& itm : m_atoms) {
|
for (const auto& itm : m_pool) {
|
||||||
out += slash;
|
out += slash;
|
||||||
out.insert(out.end(), itm.begin(), itm.end());
|
out.insert(out.end(), itm.begin(), itm.end());
|
||||||
slash = "/";
|
slash = "/";
|
||||||
|
@ -87,31 +106,20 @@ namespace din {
|
||||||
}
|
}
|
||||||
|
|
||||||
void PathName::join (const PathName& parOther) {
|
void PathName::join (const PathName& parOther) {
|
||||||
typedef std::pair<std::string, std::size_t> PairType;
|
m_pool.update(parOther.m_pool);
|
||||||
using boost::string_ref;
|
}
|
||||||
|
|
||||||
for (const auto& itm : parOther.m_pool) {
|
const boost::string_ref PathName::operator[] (std::size_t parIndex) const {
|
||||||
m_pool[itm.first] += itm.second;
|
return *(m_pool.begin() + parIndex);
|
||||||
}
|
}
|
||||||
const auto& other_path = parOther.original_path();
|
|
||||||
const auto it_other_path = m_pool.insert(PairType(other_path, 0)).first;
|
|
||||||
|
|
||||||
for (auto str : parOther.m_atoms) {
|
std::size_t PathName::atom_count ( void ) const {
|
||||||
if (ptr_between(str.data(), other_path.data(), other_path.data() + other_path.size())) {
|
return m_pool.size();
|
||||||
it_other_path->second++;
|
}
|
||||||
auto offset = str.data() - other_path.data();
|
|
||||||
m_atoms.push_back(string_ref(it_other_path->first).substr(offset, str.size()));
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (not it_other_path->second) {
|
|
||||||
m_pool.erase(it_other_path);
|
|
||||||
}
|
|
||||||
|
|
||||||
std::cout << " --------------- content -----------------\n";
|
void PathName::join (const char* parOther) {
|
||||||
for (const auto& itm : m_pool) {
|
const std::string src(parOther);
|
||||||
std::cout << itm.first << " - " << itm.second << '\n';
|
const boost::string_ref ref(src);
|
||||||
}
|
m_pool.insert(ref, &src);
|
||||||
}
|
}
|
||||||
} //namespace din
|
} //namespace din
|
||||||
|
|
|
@ -18,6 +18,7 @@
|
||||||
#ifndef id279E04E31E2C4D98B8C902781A3CE018
|
#ifndef id279E04E31E2C4D98B8C902781A3CE018
|
||||||
#define id279E04E31E2C4D98B8C902781A3CE018
|
#define id279E04E31E2C4D98B8C902781A3CE018
|
||||||
|
|
||||||
|
#include "stringpool.hpp"
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <boost/utility/string_ref.hpp>
|
#include <boost/utility/string_ref.hpp>
|
||||||
|
@ -31,16 +32,17 @@ namespace din {
|
||||||
|
|
||||||
bool is_absolute ( void ) const;
|
bool is_absolute ( void ) const;
|
||||||
std::string path ( void ) const;
|
std::string path ( void ) const;
|
||||||
const std::string& original_path ( void ) const { return m_original_path; }
|
const std::string& original_path ( void ) const { return (m_original_path ? *m_original_path : m_empty_str); }
|
||||||
std::size_t atom_count ( void ) const { return m_atoms.size(); }
|
std::size_t atom_count ( void ) const;
|
||||||
const boost::string_ref operator[] ( std::size_t parIndex ) const { return m_atoms[parIndex]; }
|
const boost::string_ref operator[] ( std::size_t parIndex ) const;
|
||||||
void join ( const PathName& parOther );
|
void join ( const PathName& parOther );
|
||||||
|
void join ( const char* parOther );
|
||||||
|
|
||||||
private:
|
private:
|
||||||
typedef std::vector<boost::string_ref> AtomList;
|
static const std::string m_empty_str;
|
||||||
std::map<std::string, std::size_t> m_pool;
|
|
||||||
AtomList m_atoms;
|
StringPool<char> m_pool;
|
||||||
std::string m_original_path;
|
const std::string* m_original_path;
|
||||||
bool m_absolute;
|
bool m_absolute;
|
||||||
};
|
};
|
||||||
} //namespace din
|
} //namespace din
|
||||||
|
|
67
src/stringpool.hpp
Normal file
67
src/stringpool.hpp
Normal file
|
@ -0,0 +1,67 @@
|
||||||
|
/* Copyright 2015, Michele Santullo
|
||||||
|
* This file is part of "dindexer".
|
||||||
|
*
|
||||||
|
* "dindexer" is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* "dindexer" is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef id9CF5E6FA7E334DF09559C2968C494CB9
|
||||||
|
#define id9CF5E6FA7E334DF09559C2968C494CB9
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <boost/utility/string_ref.hpp>
|
||||||
|
#include <vector>
|
||||||
|
#include <utility>
|
||||||
|
#include <ciso646>
|
||||||
|
#include <cstdint>
|
||||||
|
#include <algorithm>
|
||||||
|
#include <functional>
|
||||||
|
#include <boost/iterator/transform_iterator.hpp>
|
||||||
|
|
||||||
|
namespace din {
|
||||||
|
template <typename C, typename Str=std::basic_string<C>, typename StrRef=boost::basic_string_ref<C>>
|
||||||
|
class StringPool {
|
||||||
|
typedef std::pair<StrRef, const Str*> StringListPair;
|
||||||
|
typedef std::vector<std::pair<Str, std::size_t>> PoolType;
|
||||||
|
typedef std::vector<StringListPair> StringListType;
|
||||||
|
typedef std::function<StrRef(const StringListPair&)> FuncGetFirst;
|
||||||
|
|
||||||
|
public:
|
||||||
|
typedef C char_type;
|
||||||
|
typedef Str string_type;
|
||||||
|
typedef StrRef stringref_type;
|
||||||
|
typedef boost::transform_iterator<FuncGetFirst, typename StringListType::const_iterator> const_iterator;
|
||||||
|
|
||||||
|
StringPool ( void ) = default;
|
||||||
|
~StringPool ( void ) noexcept = default;
|
||||||
|
|
||||||
|
template <typename ItR>
|
||||||
|
void update ( ItR parDataBeg, ItR parDataEnd );
|
||||||
|
void update ( const StringPool& parOther );
|
||||||
|
void insert ( const std::vector<stringref_type>& parStrings, const string_type* parBaseString );
|
||||||
|
void insert ( stringref_type parString, const string_type* parBaseString );
|
||||||
|
const string_type* ptr_to_literal ( const char* parLiteral );
|
||||||
|
std::size_t size ( void ) const { return m_strings.size(); }
|
||||||
|
bool empty ( void ) const { return m_strings.empty(); }
|
||||||
|
const_iterator begin ( void ) const;
|
||||||
|
const_iterator end ( void ) const;
|
||||||
|
|
||||||
|
private:
|
||||||
|
PoolType m_pool;
|
||||||
|
StringListType m_strings;
|
||||||
|
};
|
||||||
|
} //namespace din
|
||||||
|
|
||||||
|
#include "stringpool.inl"
|
||||||
|
|
||||||
|
#endif
|
110
src/stringpool.inl
Normal file
110
src/stringpool.inl
Normal file
|
@ -0,0 +1,110 @@
|
||||||
|
/* Copyright 2015, Michele Santullo
|
||||||
|
* This file is part of "dindexer".
|
||||||
|
*
|
||||||
|
* "dindexer" is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* "dindexer" is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
namespace din {
|
||||||
|
namespace implem {
|
||||||
|
template <typename StrRef>
|
||||||
|
std::pair<StrRef, bool> clone_ifp (const StrRef& parClone, StrRef parSource) {
|
||||||
|
const auto offset = parSource.find(parClone);
|
||||||
|
if (parSource.npos != offset) {
|
||||||
|
return std::make_pair(parSource.substr(offset, parClone.size()), true);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return std::make_pair(parClone, false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} //namespace implem
|
||||||
|
|
||||||
|
template <typename C, typename Str, typename StrRef>
|
||||||
|
auto StringPool<C, Str, StrRef>::ptr_to_literal (const char* parLiteral) -> const string_type* {
|
||||||
|
if (not parLiteral)
|
||||||
|
return nullptr;
|
||||||
|
|
||||||
|
for (const auto& p : m_pool) {
|
||||||
|
if (m_pool.first == parLiteral) {
|
||||||
|
return &m_pool.first;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename C, typename Str, typename StrRef>
|
||||||
|
template <typename ItR>
|
||||||
|
void StringPool<C, Str, StrRef>::update (ItR parDataBeg, ItR parDataEnd) {
|
||||||
|
typedef std::pair<string_type, std::size_t> PoolPair;
|
||||||
|
|
||||||
|
while (parDataBeg != parDataEnd) {
|
||||||
|
const auto& remote_str = parDataBeg->first;
|
||||||
|
const auto* remote_source_str = parDataBeg->second;
|
||||||
|
bool cloned = false;
|
||||||
|
|
||||||
|
for (auto& local_src : m_pool) {
|
||||||
|
const string_type& local_str = local_src.first;
|
||||||
|
auto& local_ref_count = local_src.second;
|
||||||
|
|
||||||
|
auto cloned_result = implem::clone_ifp<StrRef>(remote_str, local_str);
|
||||||
|
cloned = cloned_result.second;
|
||||||
|
const auto& cloned_str = cloned_result.first;
|
||||||
|
if (cloned) {
|
||||||
|
++local_ref_count;
|
||||||
|
m_strings.push_back(StringListPair(cloned_str, &local_str));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (not cloned) {
|
||||||
|
m_pool.push_back(PoolPair(*remote_source_str, static_cast<std::size_t>(1)));
|
||||||
|
const auto offset = remote_str.data() - remote_source_str->data();
|
||||||
|
m_strings.push_back(StringListPair(stringref_type(m_pool.back().first).substr(offset, remote_str.size()), &m_pool.back().first));
|
||||||
|
}
|
||||||
|
++parDataBeg;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename C, typename Str, typename StrRef>
|
||||||
|
void StringPool<C, Str, StrRef>::update (const StringPool& parOther) {
|
||||||
|
this->update(parOther.m_strings.begin(), parOther.m_strings.end());
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename C, typename Str, typename StrRef>
|
||||||
|
auto StringPool<C, Str, StrRef>::begin() const -> const_iterator {
|
||||||
|
return const_iterator(m_strings.cbegin(), [](const StringListPair& parItm) { return parItm.first; });
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename C, typename Str, typename StrRef>
|
||||||
|
auto StringPool<C, Str, StrRef>::end() const -> const_iterator {
|
||||||
|
return const_iterator(m_strings.cend(), [](const StringListPair& parItm) { return parItm.first; });
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename C, typename Str, typename StrRef>
|
||||||
|
void StringPool<C, Str, StrRef>::insert (const std::vector<stringref_type>& parStrings, const string_type* parBaseString) {
|
||||||
|
StringListType dummy;
|
||||||
|
dummy.reserve(parStrings.size());
|
||||||
|
for (const auto& itm : parStrings) {
|
||||||
|
dummy.push_back(StringListPair(itm, parBaseString));
|
||||||
|
}
|
||||||
|
this->update(dummy.begin(), dummy.end());
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename C, typename Str, typename StrRef>
|
||||||
|
void StringPool<C, Str, StrRef>::insert (stringref_type parString, const string_type* parBaseString) {
|
||||||
|
StringListType dummy;
|
||||||
|
dummy.reserve(1);
|
||||||
|
dummy.push_back(StringListPair(parString, parBaseString));
|
||||||
|
this->update(dummy.begin(), dummy.end());
|
||||||
|
}
|
||||||
|
} //namespace din
|
Loading…
Add table
Add a link
Reference in a new issue