mirror of
https://github.com/KingDuckZ/dindexer.git
synced 2024-11-29 01:33:46 +00:00
Keep paths in a string pool.
This commit is contained in:
parent
109ce9b82a
commit
485796db48
5 changed files with 237 additions and 42 deletions
|
@ -52,6 +52,14 @@ int main (int parArgc, char* parArgv[]) {
|
|||
std::cout << pn4.path() << '\n';
|
||||
|
||||
pn1.join(pn2);
|
||||
pn1.join("..");
|
||||
pn1.join("..");
|
||||
pn1.join("..");
|
||||
pn1.join("..");
|
||||
pn1.join("..");
|
||||
pn1.join("..");
|
||||
pn1.join("..");
|
||||
pn1.join("code");
|
||||
std::cout << pn1.path() << '\n';
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -20,9 +20,9 @@
|
|||
#include <functional>
|
||||
#include <ciso646>
|
||||
|
||||
#include <iostream>
|
||||
|
||||
namespace din {
|
||||
const std::string PathName::m_empty_str("");
|
||||
|
||||
namespace {
|
||||
bool ptr_between (const char* parPtr, const char* parBeg, const char* parEnd) {
|
||||
std::less<const char*> less;
|
||||
|
@ -31,8 +31,17 @@ namespace din {
|
|||
return lesseq(parBeg, parPtr) and less(parPtr, parEnd);
|
||||
}
|
||||
|
||||
std::size_t count_grouped (boost::string_ref parIn, char parDelim) {
|
||||
std::size_t retval = 0;
|
||||
char prev = '\0';
|
||||
for (auto c : parIn) {
|
||||
retval += (parDelim == c and prev != parDelim ? 1 : 0);
|
||||
prev = c;
|
||||
}
|
||||
return retval;
|
||||
}
|
||||
|
||||
void split_path (std::vector<boost::string_ref>* parOut, boost::string_ref parPath) {
|
||||
parOut->clear();
|
||||
auto from = parPath.begin();
|
||||
boost::string_ref::const_iterator next;
|
||||
const auto end = parPath.end();
|
||||
|
@ -50,17 +59,27 @@ namespace din {
|
|||
}
|
||||
} //unnamed namespace
|
||||
|
||||
PathName::PathName (const char* parPath) :
|
||||
m_original_path(parPath)
|
||||
{
|
||||
if (not m_original_path.empty()) {
|
||||
PathName::PathName (const char* parPath) {
|
||||
if (nullptr != parPath && *parPath != '\0') {
|
||||
m_absolute = ('/' == *parPath);
|
||||
split_path(&m_atoms, m_original_path);
|
||||
std::string path(parPath);
|
||||
|
||||
const auto count = count_grouped(path, '/');
|
||||
const std::size_t trailing = (path.back() == '/' ? 1 : 0);
|
||||
const std::size_t absolute = (m_absolute ? 1 : 0);
|
||||
const auto res = count + 1 - trailing - absolute;
|
||||
std::vector<boost::string_ref> atoms;
|
||||
atoms.reserve(res);
|
||||
split_path(&atoms, path);
|
||||
m_pool.insert(atoms, &path);
|
||||
}
|
||||
else {
|
||||
m_original_path = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
std::string PathName::path() const {
|
||||
if (m_atoms.empty()) {
|
||||
if (m_pool.empty()) {
|
||||
if (m_absolute) {
|
||||
return std::string("/");
|
||||
}
|
||||
|
@ -70,15 +89,15 @@ namespace din {
|
|||
}
|
||||
|
||||
std::size_t reserve = (m_absolute ? 1 : 0);
|
||||
for (const auto& itm : m_atoms) {
|
||||
for (const auto& itm : m_pool) {
|
||||
reserve += itm.size();
|
||||
}
|
||||
reserve += m_atoms.size() - 1;
|
||||
reserve += m_pool.size() - 1;
|
||||
|
||||
std::string out;
|
||||
out.reserve(reserve);
|
||||
const char* slash = (m_absolute ? "/" : "");
|
||||
for (const auto& itm : m_atoms) {
|
||||
for (const auto& itm : m_pool) {
|
||||
out += slash;
|
||||
out.insert(out.end(), itm.begin(), itm.end());
|
||||
slash = "/";
|
||||
|
@ -87,31 +106,20 @@ namespace din {
|
|||
}
|
||||
|
||||
void PathName::join (const PathName& parOther) {
|
||||
typedef std::pair<std::string, std::size_t> PairType;
|
||||
using boost::string_ref;
|
||||
m_pool.update(parOther.m_pool);
|
||||
}
|
||||
|
||||
for (const auto& itm : parOther.m_pool) {
|
||||
m_pool[itm.first] += itm.second;
|
||||
}
|
||||
const auto& other_path = parOther.original_path();
|
||||
const auto it_other_path = m_pool.insert(PairType(other_path, 0)).first;
|
||||
const boost::string_ref PathName::operator[] (std::size_t parIndex) const {
|
||||
return *(m_pool.begin() + parIndex);
|
||||
}
|
||||
|
||||
for (auto str : parOther.m_atoms) {
|
||||
if (ptr_between(str.data(), other_path.data(), other_path.data() + other_path.size())) {
|
||||
it_other_path->second++;
|
||||
auto offset = str.data() - other_path.data();
|
||||
m_atoms.push_back(string_ref(it_other_path->first).substr(offset, str.size()));
|
||||
}
|
||||
else {
|
||||
}
|
||||
}
|
||||
if (not it_other_path->second) {
|
||||
m_pool.erase(it_other_path);
|
||||
}
|
||||
std::size_t PathName::atom_count ( void ) const {
|
||||
return m_pool.size();
|
||||
}
|
||||
|
||||
std::cout << " --------------- content -----------------\n";
|
||||
for (const auto& itm : m_pool) {
|
||||
std::cout << itm.first << " - " << itm.second << '\n';
|
||||
}
|
||||
void PathName::join (const char* parOther) {
|
||||
const std::string src(parOther);
|
||||
const boost::string_ref ref(src);
|
||||
m_pool.insert(ref, &src);
|
||||
}
|
||||
} //namespace din
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#ifndef id279E04E31E2C4D98B8C902781A3CE018
|
||||
#define id279E04E31E2C4D98B8C902781A3CE018
|
||||
|
||||
#include "stringpool.hpp"
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <boost/utility/string_ref.hpp>
|
||||
|
@ -31,16 +32,17 @@ namespace din {
|
|||
|
||||
bool is_absolute ( void ) const;
|
||||
std::string path ( void ) const;
|
||||
const std::string& original_path ( void ) const { return m_original_path; }
|
||||
std::size_t atom_count ( void ) const { return m_atoms.size(); }
|
||||
const boost::string_ref operator[] ( std::size_t parIndex ) const { return m_atoms[parIndex]; }
|
||||
const std::string& original_path ( void ) const { return (m_original_path ? *m_original_path : m_empty_str); }
|
||||
std::size_t atom_count ( void ) const;
|
||||
const boost::string_ref operator[] ( std::size_t parIndex ) const;
|
||||
void join ( const PathName& parOther );
|
||||
void join ( const char* parOther );
|
||||
|
||||
private:
|
||||
typedef std::vector<boost::string_ref> AtomList;
|
||||
std::map<std::string, std::size_t> m_pool;
|
||||
AtomList m_atoms;
|
||||
std::string m_original_path;
|
||||
static const std::string m_empty_str;
|
||||
|
||||
StringPool<char> m_pool;
|
||||
const std::string* m_original_path;
|
||||
bool m_absolute;
|
||||
};
|
||||
} //namespace din
|
||||
|
|
67
src/stringpool.hpp
Normal file
67
src/stringpool.hpp
Normal file
|
@ -0,0 +1,67 @@
|
|||
/* Copyright 2015, Michele Santullo
|
||||
* This file is part of "dindexer".
|
||||
*
|
||||
* "dindexer" is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* "dindexer" is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef id9CF5E6FA7E334DF09559C2968C494CB9
|
||||
#define id9CF5E6FA7E334DF09559C2968C494CB9
|
||||
|
||||
#include <string>
|
||||
#include <boost/utility/string_ref.hpp>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
#include <ciso646>
|
||||
#include <cstdint>
|
||||
#include <algorithm>
|
||||
#include <functional>
|
||||
#include <boost/iterator/transform_iterator.hpp>
|
||||
|
||||
namespace din {
|
||||
template <typename C, typename Str=std::basic_string<C>, typename StrRef=boost::basic_string_ref<C>>
|
||||
class StringPool {
|
||||
typedef std::pair<StrRef, const Str*> StringListPair;
|
||||
typedef std::vector<std::pair<Str, std::size_t>> PoolType;
|
||||
typedef std::vector<StringListPair> StringListType;
|
||||
typedef std::function<StrRef(const StringListPair&)> FuncGetFirst;
|
||||
|
||||
public:
|
||||
typedef C char_type;
|
||||
typedef Str string_type;
|
||||
typedef StrRef stringref_type;
|
||||
typedef boost::transform_iterator<FuncGetFirst, typename StringListType::const_iterator> const_iterator;
|
||||
|
||||
StringPool ( void ) = default;
|
||||
~StringPool ( void ) noexcept = default;
|
||||
|
||||
template <typename ItR>
|
||||
void update ( ItR parDataBeg, ItR parDataEnd );
|
||||
void update ( const StringPool& parOther );
|
||||
void insert ( const std::vector<stringref_type>& parStrings, const string_type* parBaseString );
|
||||
void insert ( stringref_type parString, const string_type* parBaseString );
|
||||
const string_type* ptr_to_literal ( const char* parLiteral );
|
||||
std::size_t size ( void ) const { return m_strings.size(); }
|
||||
bool empty ( void ) const { return m_strings.empty(); }
|
||||
const_iterator begin ( void ) const;
|
||||
const_iterator end ( void ) const;
|
||||
|
||||
private:
|
||||
PoolType m_pool;
|
||||
StringListType m_strings;
|
||||
};
|
||||
} //namespace din
|
||||
|
||||
#include "stringpool.inl"
|
||||
|
||||
#endif
|
110
src/stringpool.inl
Normal file
110
src/stringpool.inl
Normal file
|
@ -0,0 +1,110 @@
|
|||
/* Copyright 2015, Michele Santullo
|
||||
* This file is part of "dindexer".
|
||||
*
|
||||
* "dindexer" is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* "dindexer" is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
namespace din {
|
||||
namespace implem {
|
||||
template <typename StrRef>
|
||||
std::pair<StrRef, bool> clone_ifp (const StrRef& parClone, StrRef parSource) {
|
||||
const auto offset = parSource.find(parClone);
|
||||
if (parSource.npos != offset) {
|
||||
return std::make_pair(parSource.substr(offset, parClone.size()), true);
|
||||
}
|
||||
else {
|
||||
return std::make_pair(parClone, false);
|
||||
}
|
||||
}
|
||||
} //namespace implem
|
||||
|
||||
template <typename C, typename Str, typename StrRef>
|
||||
auto StringPool<C, Str, StrRef>::ptr_to_literal (const char* parLiteral) -> const string_type* {
|
||||
if (not parLiteral)
|
||||
return nullptr;
|
||||
|
||||
for (const auto& p : m_pool) {
|
||||
if (m_pool.first == parLiteral) {
|
||||
return &m_pool.first;
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
template <typename C, typename Str, typename StrRef>
|
||||
template <typename ItR>
|
||||
void StringPool<C, Str, StrRef>::update (ItR parDataBeg, ItR parDataEnd) {
|
||||
typedef std::pair<string_type, std::size_t> PoolPair;
|
||||
|
||||
while (parDataBeg != parDataEnd) {
|
||||
const auto& remote_str = parDataBeg->first;
|
||||
const auto* remote_source_str = parDataBeg->second;
|
||||
bool cloned = false;
|
||||
|
||||
for (auto& local_src : m_pool) {
|
||||
const string_type& local_str = local_src.first;
|
||||
auto& local_ref_count = local_src.second;
|
||||
|
||||
auto cloned_result = implem::clone_ifp<StrRef>(remote_str, local_str);
|
||||
cloned = cloned_result.second;
|
||||
const auto& cloned_str = cloned_result.first;
|
||||
if (cloned) {
|
||||
++local_ref_count;
|
||||
m_strings.push_back(StringListPair(cloned_str, &local_str));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (not cloned) {
|
||||
m_pool.push_back(PoolPair(*remote_source_str, static_cast<std::size_t>(1)));
|
||||
const auto offset = remote_str.data() - remote_source_str->data();
|
||||
m_strings.push_back(StringListPair(stringref_type(m_pool.back().first).substr(offset, remote_str.size()), &m_pool.back().first));
|
||||
}
|
||||
++parDataBeg;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename C, typename Str, typename StrRef>
|
||||
void StringPool<C, Str, StrRef>::update (const StringPool& parOther) {
|
||||
this->update(parOther.m_strings.begin(), parOther.m_strings.end());
|
||||
}
|
||||
|
||||
template <typename C, typename Str, typename StrRef>
|
||||
auto StringPool<C, Str, StrRef>::begin() const -> const_iterator {
|
||||
return const_iterator(m_strings.cbegin(), [](const StringListPair& parItm) { return parItm.first; });
|
||||
}
|
||||
|
||||
template <typename C, typename Str, typename StrRef>
|
||||
auto StringPool<C, Str, StrRef>::end() const -> const_iterator {
|
||||
return const_iterator(m_strings.cend(), [](const StringListPair& parItm) { return parItm.first; });
|
||||
}
|
||||
|
||||
template <typename C, typename Str, typename StrRef>
|
||||
void StringPool<C, Str, StrRef>::insert (const std::vector<stringref_type>& parStrings, const string_type* parBaseString) {
|
||||
StringListType dummy;
|
||||
dummy.reserve(parStrings.size());
|
||||
for (const auto& itm : parStrings) {
|
||||
dummy.push_back(StringListPair(itm, parBaseString));
|
||||
}
|
||||
this->update(dummy.begin(), dummy.end());
|
||||
}
|
||||
|
||||
template <typename C, typename Str, typename StrRef>
|
||||
void StringPool<C, Str, StrRef>::insert (stringref_type parString, const string_type* parBaseString) {
|
||||
StringListType dummy;
|
||||
dummy.reserve(1);
|
||||
dummy.push_back(StringListPair(parString, parBaseString));
|
||||
this->update(dummy.begin(), dummy.end());
|
||||
}
|
||||
} //namespace din
|
Loading…
Reference in a new issue