1
0
Fork 0
mirror of https://github.com/KingDuckZ/dindexer.git synced 2024-11-29 01:33:46 +00:00

Keep paths in a string pool.

This commit is contained in:
King_DuckZ 2015-11-06 17:57:07 +00:00
parent 109ce9b82a
commit 485796db48
5 changed files with 237 additions and 42 deletions

View file

@ -52,6 +52,14 @@ int main (int parArgc, char* parArgv[]) {
std::cout << pn4.path() << '\n';
pn1.join(pn2);
pn1.join("..");
pn1.join("..");
pn1.join("..");
pn1.join("..");
pn1.join("..");
pn1.join("..");
pn1.join("..");
pn1.join("code");
std::cout << pn1.path() << '\n';
return 0;
}

View file

@ -20,9 +20,9 @@
#include <functional>
#include <ciso646>
#include <iostream>
namespace din {
const std::string PathName::m_empty_str("");
namespace {
bool ptr_between (const char* parPtr, const char* parBeg, const char* parEnd) {
std::less<const char*> less;
@ -31,8 +31,17 @@ namespace din {
return lesseq(parBeg, parPtr) and less(parPtr, parEnd);
}
std::size_t count_grouped (boost::string_ref parIn, char parDelim) {
std::size_t retval = 0;
char prev = '\0';
for (auto c : parIn) {
retval += (parDelim == c and prev != parDelim ? 1 : 0);
prev = c;
}
return retval;
}
void split_path (std::vector<boost::string_ref>* parOut, boost::string_ref parPath) {
parOut->clear();
auto from = parPath.begin();
boost::string_ref::const_iterator next;
const auto end = parPath.end();
@ -50,17 +59,27 @@ namespace din {
}
} //unnamed namespace
PathName::PathName (const char* parPath) :
m_original_path(parPath)
{
if (not m_original_path.empty()) {
PathName::PathName (const char* parPath) {
if (nullptr != parPath && *parPath != '\0') {
m_absolute = ('/' == *parPath);
split_path(&m_atoms, m_original_path);
std::string path(parPath);
const auto count = count_grouped(path, '/');
const std::size_t trailing = (path.back() == '/' ? 1 : 0);
const std::size_t absolute = (m_absolute ? 1 : 0);
const auto res = count + 1 - trailing - absolute;
std::vector<boost::string_ref> atoms;
atoms.reserve(res);
split_path(&atoms, path);
m_pool.insert(atoms, &path);
}
else {
m_original_path = nullptr;
}
}
std::string PathName::path() const {
if (m_atoms.empty()) {
if (m_pool.empty()) {
if (m_absolute) {
return std::string("/");
}
@ -70,15 +89,15 @@ namespace din {
}
std::size_t reserve = (m_absolute ? 1 : 0);
for (const auto& itm : m_atoms) {
for (const auto& itm : m_pool) {
reserve += itm.size();
}
reserve += m_atoms.size() - 1;
reserve += m_pool.size() - 1;
std::string out;
out.reserve(reserve);
const char* slash = (m_absolute ? "/" : "");
for (const auto& itm : m_atoms) {
for (const auto& itm : m_pool) {
out += slash;
out.insert(out.end(), itm.begin(), itm.end());
slash = "/";
@ -87,31 +106,20 @@ namespace din {
}
void PathName::join (const PathName& parOther) {
typedef std::pair<std::string, std::size_t> PairType;
using boost::string_ref;
m_pool.update(parOther.m_pool);
}
for (const auto& itm : parOther.m_pool) {
m_pool[itm.first] += itm.second;
}
const auto& other_path = parOther.original_path();
const auto it_other_path = m_pool.insert(PairType(other_path, 0)).first;
const boost::string_ref PathName::operator[] (std::size_t parIndex) const {
return *(m_pool.begin() + parIndex);
}
for (auto str : parOther.m_atoms) {
if (ptr_between(str.data(), other_path.data(), other_path.data() + other_path.size())) {
it_other_path->second++;
auto offset = str.data() - other_path.data();
m_atoms.push_back(string_ref(it_other_path->first).substr(offset, str.size()));
}
else {
}
}
if (not it_other_path->second) {
m_pool.erase(it_other_path);
}
std::size_t PathName::atom_count ( void ) const {
return m_pool.size();
}
std::cout << " --------------- content -----------------\n";
for (const auto& itm : m_pool) {
std::cout << itm.first << " - " << itm.second << '\n';
}
void PathName::join (const char* parOther) {
const std::string src(parOther);
const boost::string_ref ref(src);
m_pool.insert(ref, &src);
}
} //namespace din

View file

@ -18,6 +18,7 @@
#ifndef id279E04E31E2C4D98B8C902781A3CE018
#define id279E04E31E2C4D98B8C902781A3CE018
#include "stringpool.hpp"
#include <vector>
#include <string>
#include <boost/utility/string_ref.hpp>
@ -31,16 +32,17 @@ namespace din {
bool is_absolute ( void ) const;
std::string path ( void ) const;
const std::string& original_path ( void ) const { return m_original_path; }
std::size_t atom_count ( void ) const { return m_atoms.size(); }
const boost::string_ref operator[] ( std::size_t parIndex ) const { return m_atoms[parIndex]; }
const std::string& original_path ( void ) const { return (m_original_path ? *m_original_path : m_empty_str); }
std::size_t atom_count ( void ) const;
const boost::string_ref operator[] ( std::size_t parIndex ) const;
void join ( const PathName& parOther );
void join ( const char* parOther );
private:
typedef std::vector<boost::string_ref> AtomList;
std::map<std::string, std::size_t> m_pool;
AtomList m_atoms;
std::string m_original_path;
static const std::string m_empty_str;
StringPool<char> m_pool;
const std::string* m_original_path;
bool m_absolute;
};
} //namespace din

67
src/stringpool.hpp Normal file
View file

@ -0,0 +1,67 @@
/* Copyright 2015, Michele Santullo
* This file is part of "dindexer".
*
* "dindexer" is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* "dindexer" is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef id9CF5E6FA7E334DF09559C2968C494CB9
#define id9CF5E6FA7E334DF09559C2968C494CB9
#include <string>
#include <boost/utility/string_ref.hpp>
#include <vector>
#include <utility>
#include <ciso646>
#include <cstdint>
#include <algorithm>
#include <functional>
#include <boost/iterator/transform_iterator.hpp>
namespace din {
template <typename C, typename Str=std::basic_string<C>, typename StrRef=boost::basic_string_ref<C>>
class StringPool {
typedef std::pair<StrRef, const Str*> StringListPair;
typedef std::vector<std::pair<Str, std::size_t>> PoolType;
typedef std::vector<StringListPair> StringListType;
typedef std::function<StrRef(const StringListPair&)> FuncGetFirst;
public:
typedef C char_type;
typedef Str string_type;
typedef StrRef stringref_type;
typedef boost::transform_iterator<FuncGetFirst, typename StringListType::const_iterator> const_iterator;
StringPool ( void ) = default;
~StringPool ( void ) noexcept = default;
template <typename ItR>
void update ( ItR parDataBeg, ItR parDataEnd );
void update ( const StringPool& parOther );
void insert ( const std::vector<stringref_type>& parStrings, const string_type* parBaseString );
void insert ( stringref_type parString, const string_type* parBaseString );
const string_type* ptr_to_literal ( const char* parLiteral );
std::size_t size ( void ) const { return m_strings.size(); }
bool empty ( void ) const { return m_strings.empty(); }
const_iterator begin ( void ) const;
const_iterator end ( void ) const;
private:
PoolType m_pool;
StringListType m_strings;
};
} //namespace din
#include "stringpool.inl"
#endif

110
src/stringpool.inl Normal file
View file

@ -0,0 +1,110 @@
/* Copyright 2015, Michele Santullo
* This file is part of "dindexer".
*
* "dindexer" is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* "dindexer" is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
*/
namespace din {
namespace implem {
template <typename StrRef>
std::pair<StrRef, bool> clone_ifp (const StrRef& parClone, StrRef parSource) {
const auto offset = parSource.find(parClone);
if (parSource.npos != offset) {
return std::make_pair(parSource.substr(offset, parClone.size()), true);
}
else {
return std::make_pair(parClone, false);
}
}
} //namespace implem
template <typename C, typename Str, typename StrRef>
auto StringPool<C, Str, StrRef>::ptr_to_literal (const char* parLiteral) -> const string_type* {
if (not parLiteral)
return nullptr;
for (const auto& p : m_pool) {
if (m_pool.first == parLiteral) {
return &m_pool.first;
}
}
return nullptr;
}
template <typename C, typename Str, typename StrRef>
template <typename ItR>
void StringPool<C, Str, StrRef>::update (ItR parDataBeg, ItR parDataEnd) {
typedef std::pair<string_type, std::size_t> PoolPair;
while (parDataBeg != parDataEnd) {
const auto& remote_str = parDataBeg->first;
const auto* remote_source_str = parDataBeg->second;
bool cloned = false;
for (auto& local_src : m_pool) {
const string_type& local_str = local_src.first;
auto& local_ref_count = local_src.second;
auto cloned_result = implem::clone_ifp<StrRef>(remote_str, local_str);
cloned = cloned_result.second;
const auto& cloned_str = cloned_result.first;
if (cloned) {
++local_ref_count;
m_strings.push_back(StringListPair(cloned_str, &local_str));
break;
}
}
if (not cloned) {
m_pool.push_back(PoolPair(*remote_source_str, static_cast<std::size_t>(1)));
const auto offset = remote_str.data() - remote_source_str->data();
m_strings.push_back(StringListPair(stringref_type(m_pool.back().first).substr(offset, remote_str.size()), &m_pool.back().first));
}
++parDataBeg;
}
}
template <typename C, typename Str, typename StrRef>
void StringPool<C, Str, StrRef>::update (const StringPool& parOther) {
this->update(parOther.m_strings.begin(), parOther.m_strings.end());
}
template <typename C, typename Str, typename StrRef>
auto StringPool<C, Str, StrRef>::begin() const -> const_iterator {
return const_iterator(m_strings.cbegin(), [](const StringListPair& parItm) { return parItm.first; });
}
template <typename C, typename Str, typename StrRef>
auto StringPool<C, Str, StrRef>::end() const -> const_iterator {
return const_iterator(m_strings.cend(), [](const StringListPair& parItm) { return parItm.first; });
}
template <typename C, typename Str, typename StrRef>
void StringPool<C, Str, StrRef>::insert (const std::vector<stringref_type>& parStrings, const string_type* parBaseString) {
StringListType dummy;
dummy.reserve(parStrings.size());
for (const auto& itm : parStrings) {
dummy.push_back(StringListPair(itm, parBaseString));
}
this->update(dummy.begin(), dummy.end());
}
template <typename C, typename Str, typename StrRef>
void StringPool<C, Str, StrRef>::insert (stringref_type parString, const string_type* parBaseString) {
StringListType dummy;
dummy.reserve(1);
dummy.push_back(StringListPair(parString, parBaseString));
this->update(dummy.begin(), dummy.end());
}
} //namespace din