1
0
Fork 0
mirror of https://github.com/KingDuckZ/dindexer.git synced 2024-11-29 01:33:46 +00:00

Add a unit test for PathName and fix the errors I found.

The pool was storing strings and references to it. References
became invalid as strings got moved around, as a consequence of
push_back() in the owner container. I'm storing references in
a custom StrRange struct now, and string refs are built on the
fly when one is needed.
This commit is contained in:
King_DuckZ 2017-08-16 21:27:31 +01:00
parent 93fe091303
commit 929c5b8adf
5 changed files with 165 additions and 23 deletions

View file

@ -16,6 +16,7 @@
*/
#include "pathname.hpp"
#include "dindexer-core/split.hpp"
#include <algorithm>
#include <functional>
#include <ciso646>
@ -149,11 +150,14 @@ namespace mchlib {
void PathName::join (const char* parOther) {
const std::string src(parOther);
const boost::string_ref ref(src);
m_pool.insert(ref, &src);
this->join(ref, &src);
}
void PathName::join (boost::string_ref parOther, const std::string* parSource) {
m_pool.insert(parOther, parSource);
m_pool.insert(
dincore::split(parOther, '/', false, true),
parSource
);
}
PathName make_relative_path (const PathName& parBasePath, const PathName& parOtherPath) {

View file

@ -27,11 +27,17 @@
#include <algorithm>
#include <functional>
#include <boost/iterator/transform_iterator.hpp>
#include <cassert>
namespace mchlib {
template <typename C, typename Str=std::basic_string<C>, typename StrRef=boost::basic_string_ref<C>>
class StringPool {
typedef std::pair<StrRef, const Str*> StringListPair;
struct StrRange {
std::size_t start;
std::size_t len;
};
typedef std::pair<StrRange, std::size_t> StringListPair;
typedef std::vector<std::pair<Str, std::size_t>> PoolType;
typedef std::vector<StringListPair> StringListType;
typedef std::function<StrRef(const StringListPair&)> FuncGetFirst;
@ -46,7 +52,7 @@ namespace mchlib {
~StringPool ( void ) noexcept = default;
template <typename ItR>
void update ( ItR parDataBeg, ItR parDataEnd );
void update ( ItR parDataBeg, ItR parDataEnd, const std::vector<const string_type*>& parBaseStrings );
void update ( const StringPool& parOther );
void insert ( const std::vector<stringref_type>& parStrings, const string_type* parBaseString );
void insert ( stringref_type parString, const string_type* parBaseString );
@ -56,11 +62,13 @@ namespace mchlib {
const_iterator begin ( void ) const;
const_iterator end ( void ) const;
const string_type* get_stringref_source ( std::size_t parIndex ) const;
const stringref_type& operator[] ( std::size_t parIndex ) const;
stringref_type operator[] ( std::size_t parIndex ) const;
void pop ( void );
void swap (StringPool& parOther) noexcept;
private:
stringref_type make_stringref (const StringListPair& parStrPair) const;
PoolType m_pool;
StringListType m_strings;
};

View file

@ -27,6 +27,21 @@ namespace mchlib {
return std::make_pair(parClone, false);
}
}
template <typename StrRef, typename Str>
std::size_t start_pos (StrRef parSubstr, const Str* parData) {
typedef decltype(parData->data()) char_type;
assert(parData);
if (not parSubstr.empty()) {
assert(std::less_equal<char_type>()(parData->data(), parSubstr.data()));
const std::size_t offset = parSubstr.data() - parData->data();
return offset;
}
else {
return 0;
}
}
} //namespace implem
template <typename C, typename Str, typename StrRef>
@ -44,32 +59,44 @@ namespace mchlib {
template <typename C, typename Str, typename StrRef>
template <typename ItR>
void StringPool<C, Str, StrRef>::update (ItR parDataBeg, ItR parDataEnd) {
void StringPool<C, Str, StrRef>::update (ItR parDataBeg, ItR parDataEnd, const std::vector<const string_type*>& parBaseStrings) {
typedef std::pair<string_type, std::size_t> PoolPair;
while (parDataBeg != parDataEnd) {
const auto& remote_str = parDataBeg->first;
const auto* remote_source_str = parDataBeg->second;
assert(parDataBeg->second < parBaseStrings.size());
assert(parBaseStrings[parDataBeg->second] != nullptr);
const auto* remote_source_str = parBaseStrings[parDataBeg->second];
const StrRange& remote_str_rng = parDataBeg->first;
const auto& remote_str_ref = stringref_type(*remote_source_str).substr(remote_str_rng.start, remote_str_rng.len);
bool cloned = false;
std::size_t idx = 0;
for (auto& local_src : m_pool) {
const string_type& local_str = local_src.first;
auto& local_ref_count = local_src.second;
auto cloned_result = implem::clone_ifp<StrRef>(remote_str, local_str);
auto cloned_result = implem::clone_ifp<StrRef>(remote_str_ref, local_str);
cloned = cloned_result.second;
const auto& cloned_str = cloned_result.first;
if (cloned) {
++local_ref_count;
m_strings.push_back(StringListPair(cloned_str, &local_str));
StrRange str_range {implem::start_pos(cloned_str, &local_str), cloned_str.size()};
m_strings.push_back(StringListPair(str_range, idx));
break;
}
++idx;
}
if (not cloned) {
m_pool.push_back(PoolPair(*remote_source_str, static_cast<std::size_t>(1)));
const auto offset = remote_str.data() - remote_source_str->data();
m_strings.push_back(StringListPair(stringref_type(m_pool.back().first).substr(offset, remote_str.size()), &m_pool.back().first));
const std::size_t offset = implem::start_pos(remote_str_ref, remote_source_str);
m_strings.push_back(
StringListPair(
StrRange{offset, remote_str_ref.size()},
m_pool.size() - 1
)
);
}
++parDataBeg;
}
@ -77,45 +104,63 @@ namespace mchlib {
template <typename C, typename Str, typename StrRef>
void StringPool<C, Str, StrRef>::update (const StringPool& parOther) {
this->update(parOther.m_strings.begin(), parOther.m_strings.end());
std::vector<const string_type*> other_strs;
other_strs.reserve(parOther.m_pool.size());
for (auto& other_pool_itm : parOther.m_pool) {
other_strs.push_back(&other_pool_itm.first);
}
update(parOther.m_strings.begin(), parOther.m_strings.end(), other_strs);
}
template <typename C, typename Str, typename StrRef>
auto StringPool<C, Str, StrRef>::begin() const -> const_iterator {
return const_iterator(m_strings.cbegin(), [](const StringListPair& parItm) { return parItm.first; });
return const_iterator(m_strings.cbegin(), [this](const StringListPair& parItm) {
return this->make_stringref(parItm);
});
}
template <typename C, typename Str, typename StrRef>
auto StringPool<C, Str, StrRef>::end() const -> const_iterator {
return const_iterator(m_strings.cend(), [](const StringListPair& parItm) { return parItm.first; });
return const_iterator(m_strings.cend(), [this](const StringListPair& parItm) {
this->make_stringref(parItm);
});
}
template <typename C, typename Str, typename StrRef>
void StringPool<C, Str, StrRef>::insert (const std::vector<stringref_type>& parStrings, const string_type* parBaseString) {
assert(parBaseString);
StringListType dummy;
dummy.reserve(parStrings.size());
for (const auto& itm : parStrings) {
dummy.push_back(StringListPair(itm, parBaseString));
StrRange str_range {implem::start_pos(itm, parBaseString), itm.size()};
dummy.push_back(StringListPair(str_range, 0));
}
this->update(dummy.begin(), dummy.end());
const std::vector<const string_type*> other_strs(1, parBaseString);
update(dummy.begin(), dummy.end(), other_strs);
}
template <typename C, typename Str, typename StrRef>
void StringPool<C, Str, StrRef>::insert (stringref_type parString, const string_type* parBaseString) {
assert(parBaseString);
assert(std::less_equal<const C*>()(parBaseString->data(), parString.data()));
StringListType dummy;
dummy.reserve(1);
dummy.push_back(StringListPair(parString, parBaseString));
this->update(dummy.begin(), dummy.end());
StrRange str_range {implem::start_pos(parString, parBaseString), parString.size()};
dummy.push_back(StringListPair(str_range, 0));
const std::vector<const string_type*> other_strs(1, parBaseString);
update(dummy.begin(), dummy.end(), other_strs);
}
template <typename C, typename Str, typename StrRef>
auto StringPool<C, Str, StrRef>::get_stringref_source (std::size_t parIndex) const -> const string_type* {
return m_strings[parIndex].second;
return &m_pool[m_strings[parIndex].second].first;
}
template <typename C, typename Str, typename StrRef>
auto StringPool<C, Str, StrRef>::operator[] (std::size_t parIndex) const -> const stringref_type& {
return m_strings[parIndex].first;
auto StringPool<C, Str, StrRef>::operator[] (std::size_t parIndex) const -> stringref_type {
return make_stringref(m_strings[parIndex]);
}
template <typename C, typename Str, typename StrRef>
@ -126,7 +171,7 @@ namespace mchlib {
for (auto z = m_pool.size(); z > 0; --z) {
auto& pool_itm = m_pool[z - 1];
if (&pool_itm.first == m_strings.back().second) {
if (&pool_itm.first == &m_pool[m_strings.back().second].first) {
m_strings.resize(m_strings.size() - 1);
--pool_itm.second;
if (0 == pool_itm.second) {
@ -143,4 +188,10 @@ namespace mchlib {
m_pool.swap(parOther.m_pool);
m_strings.swap(parOther.m_strings);
}
template <typename C, typename Str, typename StrRef>
auto StringPool<C, Str, StrRef>::make_stringref (const StringListPair& parStrPair) const -> stringref_type {
assert(parStrPair.second < m_pool.size());
return stringref_type(m_pool[parStrPair.second].first).substr(parStrPair.first.start, parStrPair.first.len);
}
} //namespace mchlib

View file

@ -6,10 +6,12 @@ add_executable(${PROJECT_NAME}
test_glob2regex.cpp
test_tiger_string_conv.cpp
test_lexical_cast.cpp
test_pathname.cpp
)
target_include_directories(${PROJECT_NAME} SYSTEM
PRIVATE ../gtest/include
PRIVATE ../../src/machinery
)
target_link_libraries(${PROJECT_NAME}

View file

@ -0,0 +1,77 @@
/* Copyright 2015, 2016, Michele Santullo
* This file is part of "dindexer".
*
* "dindexer" is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* "dindexer" is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
*/
#include <gtest/gtest.h>
#include "pathname.hpp"
TEST(machinery, pathname) {
using mchlib::PathName;
{
PathName empty_path("");
EXPECT_EQ("", empty_path.path());
EXPECT_EQ(0, empty_path.atom_count());
}
{
PathName test_slash("home/");
EXPECT_FALSE(test_slash.is_absolute());
EXPECT_EQ("home", test_slash.path());
EXPECT_EQ(1, test_slash.atom_count());
PathName test("home");
EXPECT_FALSE(test.is_absolute());
EXPECT_EQ("home", test.path());
EXPECT_EQ(1, test.atom_count());
EXPECT_EQ(test, test_slash);
}
{
PathName test("/home/");
EXPECT_TRUE(test.is_absolute());
EXPECT_EQ("/home", test.path());
EXPECT_EQ(1, test.atom_count());
test.join("duckz/documents/libreoffice");
EXPECT_TRUE(test.is_absolute());
EXPECT_EQ(4, test.atom_count());
EXPECT_EQ("/home/duckz/documents/libreoffice", test.path());
test.pop_right();
EXPECT_EQ(3, test.atom_count());
EXPECT_EQ("/home/duckz/documents", test.path());
test.join("attachments");
test.join("important");
EXPECT_EQ(5, test.atom_count());
EXPECT_EQ("/home/duckz/documents/attachments/important", test.path());
PathName attachments = mchlib::make_relative_path(PathName("/home/duckz/documents"), test);
EXPECT_FALSE(attachments.is_absolute());
EXPECT_EQ(2, attachments.atom_count());
EXPECT_EQ("attachments/important", attachments.path());
}
}
TEST(machinery, pathname_functions) {
using mchlib::PathName;
using mchlib::make_relative_path;
using mchlib::basename;
using mchlib::is_ancestor;
using mchlib::are_siblings;
}