From 929c5b8adf1f728e2da1cd1dd582b25829e874b5 Mon Sep 17 00:00:00 2001 From: King_DuckZ Date: Wed, 16 Aug 2017 21:27:31 +0100 Subject: [PATCH] Add a unit test for PathName and fix the errors I found. The pool was storing strings and references to it. References became invalid as strings got moved around, as a consequence of push_back() in the owner container. I'm storing references in a custom StrRange struct now, and string refs are built on the fly when one is needed. --- src/machinery/pathname.cpp | 8 +++- src/machinery/stringpool.hpp | 14 ++++-- src/machinery/stringpool.inl | 87 ++++++++++++++++++++++++++++-------- test/unit/CMakeLists.txt | 2 + test/unit/test_pathname.cpp | 77 +++++++++++++++++++++++++++++++ 5 files changed, 165 insertions(+), 23 deletions(-) create mode 100644 test/unit/test_pathname.cpp diff --git a/src/machinery/pathname.cpp b/src/machinery/pathname.cpp index 2c10f89..8c1e655 100644 --- a/src/machinery/pathname.cpp +++ b/src/machinery/pathname.cpp @@ -16,6 +16,7 @@ */ #include "pathname.hpp" +#include "dindexer-core/split.hpp" #include #include #include @@ -149,11 +150,14 @@ namespace mchlib { void PathName::join (const char* parOther) { const std::string src(parOther); const boost::string_ref ref(src); - m_pool.insert(ref, &src); + this->join(ref, &src); } void PathName::join (boost::string_ref parOther, const std::string* parSource) { - m_pool.insert(parOther, parSource); + m_pool.insert( + dincore::split(parOther, '/', false, true), + parSource + ); } PathName make_relative_path (const PathName& parBasePath, const PathName& parOtherPath) { diff --git a/src/machinery/stringpool.hpp b/src/machinery/stringpool.hpp index 6ceabf7..cbc5916 100644 --- a/src/machinery/stringpool.hpp +++ b/src/machinery/stringpool.hpp @@ -27,11 +27,17 @@ #include #include #include +#include namespace mchlib { template , typename StrRef=boost::basic_string_ref> class StringPool { - typedef std::pair StringListPair; + struct StrRange { + std::size_t start; + std::size_t len; + }; + + typedef std::pair StringListPair; typedef std::vector> PoolType; typedef std::vector StringListType; typedef std::function FuncGetFirst; @@ -46,7 +52,7 @@ namespace mchlib { ~StringPool ( void ) noexcept = default; template - void update ( ItR parDataBeg, ItR parDataEnd ); + void update ( ItR parDataBeg, ItR parDataEnd, const std::vector& parBaseStrings ); void update ( const StringPool& parOther ); void insert ( const std::vector& parStrings, const string_type* parBaseString ); void insert ( stringref_type parString, const string_type* parBaseString ); @@ -56,11 +62,13 @@ namespace mchlib { const_iterator begin ( void ) const; const_iterator end ( void ) const; const string_type* get_stringref_source ( std::size_t parIndex ) const; - const stringref_type& operator[] ( std::size_t parIndex ) const; + stringref_type operator[] ( std::size_t parIndex ) const; void pop ( void ); void swap (StringPool& parOther) noexcept; private: + stringref_type make_stringref (const StringListPair& parStrPair) const; + PoolType m_pool; StringListType m_strings; }; diff --git a/src/machinery/stringpool.inl b/src/machinery/stringpool.inl index cfcc6a4..07bf7b1 100644 --- a/src/machinery/stringpool.inl +++ b/src/machinery/stringpool.inl @@ -27,6 +27,21 @@ namespace mchlib { return std::make_pair(parClone, false); } } + + template + std::size_t start_pos (StrRef parSubstr, const Str* parData) { + typedef decltype(parData->data()) char_type; + assert(parData); + + if (not parSubstr.empty()) { + assert(std::less_equal()(parData->data(), parSubstr.data())); + const std::size_t offset = parSubstr.data() - parData->data(); + return offset; + } + else { + return 0; + } + } } //namespace implem template @@ -44,32 +59,44 @@ namespace mchlib { template template - void StringPool::update (ItR parDataBeg, ItR parDataEnd) { + void StringPool::update (ItR parDataBeg, ItR parDataEnd, const std::vector& parBaseStrings) { typedef std::pair PoolPair; while (parDataBeg != parDataEnd) { - const auto& remote_str = parDataBeg->first; - const auto* remote_source_str = parDataBeg->second; + assert(parDataBeg->second < parBaseStrings.size()); + assert(parBaseStrings[parDataBeg->second] != nullptr); + const auto* remote_source_str = parBaseStrings[parDataBeg->second]; + const StrRange& remote_str_rng = parDataBeg->first; + const auto& remote_str_ref = stringref_type(*remote_source_str).substr(remote_str_rng.start, remote_str_rng.len); + bool cloned = false; + std::size_t idx = 0; for (auto& local_src : m_pool) { const string_type& local_str = local_src.first; auto& local_ref_count = local_src.second; - auto cloned_result = implem::clone_ifp(remote_str, local_str); + auto cloned_result = implem::clone_ifp(remote_str_ref, local_str); cloned = cloned_result.second; const auto& cloned_str = cloned_result.first; if (cloned) { ++local_ref_count; - m_strings.push_back(StringListPair(cloned_str, &local_str)); + StrRange str_range {implem::start_pos(cloned_str, &local_str), cloned_str.size()}; + m_strings.push_back(StringListPair(str_range, idx)); break; } + ++idx; } if (not cloned) { m_pool.push_back(PoolPair(*remote_source_str, static_cast(1))); - const auto offset = remote_str.data() - remote_source_str->data(); - m_strings.push_back(StringListPair(stringref_type(m_pool.back().first).substr(offset, remote_str.size()), &m_pool.back().first)); + const std::size_t offset = implem::start_pos(remote_str_ref, remote_source_str); + m_strings.push_back( + StringListPair( + StrRange{offset, remote_str_ref.size()}, + m_pool.size() - 1 + ) + ); } ++parDataBeg; } @@ -77,45 +104,63 @@ namespace mchlib { template void StringPool::update (const StringPool& parOther) { - this->update(parOther.m_strings.begin(), parOther.m_strings.end()); + std::vector other_strs; + other_strs.reserve(parOther.m_pool.size()); + for (auto& other_pool_itm : parOther.m_pool) { + other_strs.push_back(&other_pool_itm.first); + } + update(parOther.m_strings.begin(), parOther.m_strings.end(), other_strs); } template auto StringPool::begin() const -> const_iterator { - return const_iterator(m_strings.cbegin(), [](const StringListPair& parItm) { return parItm.first; }); + return const_iterator(m_strings.cbegin(), [this](const StringListPair& parItm) { + return this->make_stringref(parItm); + }); } template auto StringPool::end() const -> const_iterator { - return const_iterator(m_strings.cend(), [](const StringListPair& parItm) { return parItm.first; }); + return const_iterator(m_strings.cend(), [this](const StringListPair& parItm) { + this->make_stringref(parItm); + }); } template void StringPool::insert (const std::vector& parStrings, const string_type* parBaseString) { + assert(parBaseString); + StringListType dummy; dummy.reserve(parStrings.size()); for (const auto& itm : parStrings) { - dummy.push_back(StringListPair(itm, parBaseString)); + StrRange str_range {implem::start_pos(itm, parBaseString), itm.size()}; + dummy.push_back(StringListPair(str_range, 0)); } - this->update(dummy.begin(), dummy.end()); + const std::vector other_strs(1, parBaseString); + update(dummy.begin(), dummy.end(), other_strs); } template void StringPool::insert (stringref_type parString, const string_type* parBaseString) { + assert(parBaseString); + assert(std::less_equal()(parBaseString->data(), parString.data())); + StringListType dummy; dummy.reserve(1); - dummy.push_back(StringListPair(parString, parBaseString)); - this->update(dummy.begin(), dummy.end()); + StrRange str_range {implem::start_pos(parString, parBaseString), parString.size()}; + dummy.push_back(StringListPair(str_range, 0)); + const std::vector other_strs(1, parBaseString); + update(dummy.begin(), dummy.end(), other_strs); } template auto StringPool::get_stringref_source (std::size_t parIndex) const -> const string_type* { - return m_strings[parIndex].second; + return &m_pool[m_strings[parIndex].second].first; } template - auto StringPool::operator[] (std::size_t parIndex) const -> const stringref_type& { - return m_strings[parIndex].first; + auto StringPool::operator[] (std::size_t parIndex) const -> stringref_type { + return make_stringref(m_strings[parIndex]); } template @@ -126,7 +171,7 @@ namespace mchlib { for (auto z = m_pool.size(); z > 0; --z) { auto& pool_itm = m_pool[z - 1]; - if (&pool_itm.first == m_strings.back().second) { + if (&pool_itm.first == &m_pool[m_strings.back().second].first) { m_strings.resize(m_strings.size() - 1); --pool_itm.second; if (0 == pool_itm.second) { @@ -143,4 +188,10 @@ namespace mchlib { m_pool.swap(parOther.m_pool); m_strings.swap(parOther.m_strings); } + + template + auto StringPool::make_stringref (const StringListPair& parStrPair) const -> stringref_type { + assert(parStrPair.second < m_pool.size()); + return stringref_type(m_pool[parStrPair.second].first).substr(parStrPair.first.start, parStrPair.first.len); + } } //namespace mchlib diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt index f2b4b94..77cba59 100644 --- a/test/unit/CMakeLists.txt +++ b/test/unit/CMakeLists.txt @@ -6,10 +6,12 @@ add_executable(${PROJECT_NAME} test_glob2regex.cpp test_tiger_string_conv.cpp test_lexical_cast.cpp + test_pathname.cpp ) target_include_directories(${PROJECT_NAME} SYSTEM PRIVATE ../gtest/include + PRIVATE ../../src/machinery ) target_link_libraries(${PROJECT_NAME} diff --git a/test/unit/test_pathname.cpp b/test/unit/test_pathname.cpp new file mode 100644 index 0000000..5724cc3 --- /dev/null +++ b/test/unit/test_pathname.cpp @@ -0,0 +1,77 @@ +/* Copyright 2015, 2016, Michele Santullo + * This file is part of "dindexer". + * + * "dindexer" is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * "dindexer" is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with "dindexer". If not, see . + */ + +#include +#include "pathname.hpp" + +TEST(machinery, pathname) { + using mchlib::PathName; + + { + PathName empty_path(""); + EXPECT_EQ("", empty_path.path()); + EXPECT_EQ(0, empty_path.atom_count()); + } + + { + PathName test_slash("home/"); + EXPECT_FALSE(test_slash.is_absolute()); + EXPECT_EQ("home", test_slash.path()); + EXPECT_EQ(1, test_slash.atom_count()); + + PathName test("home"); + EXPECT_FALSE(test.is_absolute()); + EXPECT_EQ("home", test.path()); + EXPECT_EQ(1, test.atom_count()); + + EXPECT_EQ(test, test_slash); + } + + { + PathName test("/home/"); + EXPECT_TRUE(test.is_absolute()); + EXPECT_EQ("/home", test.path()); + EXPECT_EQ(1, test.atom_count()); + + test.join("duckz/documents/libreoffice"); + EXPECT_TRUE(test.is_absolute()); + EXPECT_EQ(4, test.atom_count()); + EXPECT_EQ("/home/duckz/documents/libreoffice", test.path()); + + test.pop_right(); + EXPECT_EQ(3, test.atom_count()); + EXPECT_EQ("/home/duckz/documents", test.path()); + + test.join("attachments"); + test.join("important"); + EXPECT_EQ(5, test.atom_count()); + EXPECT_EQ("/home/duckz/documents/attachments/important", test.path()); + + PathName attachments = mchlib::make_relative_path(PathName("/home/duckz/documents"), test); + EXPECT_FALSE(attachments.is_absolute()); + EXPECT_EQ(2, attachments.atom_count()); + EXPECT_EQ("attachments/important", attachments.path()); + } +} + +TEST(machinery, pathname_functions) { + using mchlib::PathName; + using mchlib::make_relative_path; + using mchlib::basename; + using mchlib::is_ancestor; + using mchlib::are_siblings; +}