From 7f50f264e5c95f7c3d3c3da40a32d6b1e66e675b Mon Sep 17 00:00:00 2001 From: King_DuckZ Date: Thu, 17 Aug 2017 20:23:41 +0100 Subject: [PATCH] Implement make_filerecord_tree() --- src/machinery/CMakeLists.txt | 1 + src/machinery/make_filerecord_tree.cpp | 91 +++++++++++++++++++++++++ src/machinery/pathname.cpp | 36 ++++++++++ src/machinery/pathname.hpp | 2 + test/unit/CMakeLists.txt | 1 + test/unit/test_make_filerecord_tree.cpp | 63 +++++++++++++++++ 6 files changed, 194 insertions(+) create mode 100644 src/machinery/make_filerecord_tree.cpp create mode 100644 test/unit/test_make_filerecord_tree.cpp diff --git a/src/machinery/CMakeLists.txt b/src/machinery/CMakeLists.txt index 3b32b59..2c8ac3e 100644 --- a/src/machinery/CMakeLists.txt +++ b/src/machinery/CMakeLists.txt @@ -24,6 +24,7 @@ add_library(${PROJECT_NAME} SHARED scantask/contenttype.cpp scantask/mime.cpp scantask/setbasic.cpp + make_filerecord_tree.cpp ) target_include_directories(${PROJECT_NAME} SYSTEM diff --git a/src/machinery/make_filerecord_tree.cpp b/src/machinery/make_filerecord_tree.cpp new file mode 100644 index 0000000..9f8be30 --- /dev/null +++ b/src/machinery/make_filerecord_tree.cpp @@ -0,0 +1,91 @@ +/* Copyright 2015, 2016, Michele Santullo + * This file is part of "dindexer". + * + * "dindexer" is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * "dindexer" is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with "dindexer". If not, see . + */ + +#include "dindexer-machinery/make_filerecord_tree.hpp" +#include "dindexer-machinery/recorddata.hpp" +#include "pathname.hpp" +#include +#include + +namespace mchlib { + namespace { + class Counter { + public: + Counter() : m_count(0) {} + std::size_t operator()() {return m_count++;} + private: + std::size_t m_count; + }; + } //unnamed namespace + + FileRecordNode::FileRecordNode() = default; + FileRecordNode::~FileRecordNode() = default; + + std::vector make_filerecord_tree (const std::vector& parRecords) { + if (parRecords.empty()) + return std::vector(); + + std::vector sorted_indices(parRecords.size()); + + std::generate(sorted_indices.begin(), sorted_indices.end(), Counter()); + std::sort(sorted_indices.begin(), sorted_indices.end(), [&parRecords](std::size_t a, std::size_t b) { + return parRecords[a].path() < parRecords[b].path(); + }); + + FileRecordNode retval; + FileRecordNode* curr_node = &retval; + std::stack node_stack; + + assert(not sorted_indices.empty()); + PathName prev_dir(""); + + for (auto idx : sorted_indices) { + const FileRecordData& record = parRecords[idx]; + PathName curr_path(record.path()); + + //1) new path is subpath + if (is_ancestor(prev_dir, curr_path, 0)) { + curr_node->children.emplace_back(idx); + } + //2) new path is different + else { + while (not node_stack.empty()) { + curr_node = node_stack.top(); + node_stack.pop(); + + PathName test_path(parRecords[curr_node->index].path()); + if (are_siblings(test_path, curr_path) or is_ancestor(test_path, curr_path, 0)) { + prev_dir.swap(test_path); + break; + } + }; + + assert(curr_node); + curr_node->children.emplace_back(idx); + } + + if (record.is_directory) { + node_stack.push(curr_node); + assert(not curr_node->children.empty()); + curr_node = &curr_node->children.back(); + prev_dir.swap(curr_path); + } + } + + return retval.children; + } +} //namespace mchlib diff --git a/src/machinery/pathname.cpp b/src/machinery/pathname.cpp index 8c1e655..0476334 100644 --- a/src/machinery/pathname.cpp +++ b/src/machinery/pathname.cpp @@ -218,6 +218,42 @@ namespace mchlib { return parPath[sz - 1]; } + bool is_ancestor (const PathName& parAncestor, const PathName& parChild, std::size_t parMaxLevels) { + const std::size_t anc_atom_count = parAncestor.atom_count(); + const std::size_t cld_atom_count = parChild.atom_count(); + + if (anc_atom_count + parMaxLevels >= cld_atom_count or not parMaxLevels) { + assert(not parMaxLevels or anc_atom_count <= cld_atom_count); + const std::size_t min_count = std::min(anc_atom_count, cld_atom_count); + for (std::size_t z = 0; z < min_count; ++z) { + if (parAncestor[z] != parChild[z]) + return false; + } + return true; + } + else { + return false; + } + } + + bool are_siblings (const PathName& parA, const PathName& parB) { + const std::size_t atom_count = parA.atom_count(); + if (atom_count != parB.atom_count()) { + return false; + } + else if (1 >= atom_count) { + return true; + } + else { + assert(atom_count > 1); + for (std::size_t z = 0; z < atom_count - 1; ++z) { + if (parA[z] != parB[z]) + return false; + } + return true; + } + } + PathName& PathName::pop_right() { m_pool.pop(); return *this; diff --git a/src/machinery/pathname.hpp b/src/machinery/pathname.hpp index 46ecf30..1490fda 100644 --- a/src/machinery/pathname.hpp +++ b/src/machinery/pathname.hpp @@ -59,6 +59,8 @@ namespace mchlib { PathName make_relative_path ( const PathName& parBasePath, const PathName& parOtherPath ); std::ostream& operator<< ( std::ostream& parStream, const PathName& parPath ); const boost::string_ref basename ( const PathName& parPath ); + bool is_ancestor (const PathName& parAncestor, const PathName& parChild, std::size_t parMaxLevels); + bool are_siblings (const PathName& parA, const PathName& parB); } //namespace mchlib #endif diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt index 77cba59..50c111d 100644 --- a/test/unit/CMakeLists.txt +++ b/test/unit/CMakeLists.txt @@ -6,6 +6,7 @@ add_executable(${PROJECT_NAME} test_glob2regex.cpp test_tiger_string_conv.cpp test_lexical_cast.cpp + test_make_filerecord_tree.cpp test_pathname.cpp ) diff --git a/test/unit/test_make_filerecord_tree.cpp b/test/unit/test_make_filerecord_tree.cpp new file mode 100644 index 0000000..5e1bb10 --- /dev/null +++ b/test/unit/test_make_filerecord_tree.cpp @@ -0,0 +1,63 @@ +/* Copyright 2015, 2016, Michele Santullo + * This file is part of "dindexer". + * + * "dindexer" is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * "dindexer" is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with "dindexer". If not, see . + */ + +#include "dindexer-machinery/make_filerecord_tree.hpp" +#include "dindexer-machinery/recorddata.hpp" +#include +#include + +TEST(machinery, make_filerecord_tree) { + using mchlib::make_filerecord_tree; + using mchlib::FileRecordNode; + using mchlib::FileRecordData; + + std::vector records { + {"unit/CMakeFiles/dindexer-test.dir/test_guess_content_type.cpp.o", 0, 0, 0, 4, false, false}, + {"unit/CMakeFiles/dindexer-test.dir/build.make", 0, 0, 0, 4, false, false}, + {"unit_cli/CMakeFiles", 0, 0, 0, 2, true, false}, + {"unit_cli/CMakeFiles/dindexer-test_cli.dir/C.includecache", 0, 0, 0, 4, false, false}, + {"unit/CMakeFiles", 0, 0, 0, 2, true, false}, + {"unit_cli/cmake_install.cmake", 0, 0, 0, 2, false, false}, + {"unit_cli/CTestTestfile.cmake", 0, 0, 0, 2, false, false}, + {"gtest/CMakeFiles/gtest_main.dir/src/gtest_main.cc.o", 0, 0, 0, 4, false, false}, + {"gtest/CMakeFiles/gtest_main.dir/link.txt", 0, 0, 0, 4, false, false}, + {"gtest/CMakeFiles/CMakeDirectoryInformation.cmake", 0, 0, 0, 4, false, false}, + {"unit_cli/Makefile", 0, 0, 0, 2, false, false}, + {"gtest/CMakeFiles/progress.marks", 0, 0, 0, 3, false, false}, + {"gtest/CMakeFiles/gtest.dir", 0, 0, 0, 3, true, false}, + {"unit/Makefile", 0, 0, 0, 2, false, false}, + {"unit/cmake_install.cmake", 0, 0, 0, 2, false, false}, + {"unit/dindexer-test", 0, 0, 0, 2, false, false}, + {"unit_cli/CMakeFiles/CMakeDirectoryInformation.cmake", 0, 0, 0, 3, false, false}, + {"unit_cli/CMakeFiles/progress.marks", 0, 0, 0, 3, false, false}, + {"gtest", 0, 0, 0, 1, true, false}, + {"gtest/CMakeFiles/gtest_main.dir/depend.internal", 0, 0, 0, 4, false, false}, + {"gtest/CMakeFiles/gtest_main.dir/src", 0, 0, 0, 4, true, false}, + {"unit/CTestTestfile.cmake", 0, 0, 0, 2, false, false}, + {"unit/CMakeFiles/CMakeDirectoryInformation.cmake", 0, 0, 0, 3, false, false}, + {"unit/CMakeFiles/progress.marks", 0, 0, 0, 3, false, false}, + {"gtest/CMakeFiles/gtest.dir/cmake_clean_target.cmake", 0, 0, 0, 4, false, false}, + {"gtest/CMakeFiles/gtest.dir/build.make", 0, 0, 0, 4, false, false}, + {"gtest/CMakeFiles/gtest.dir/cmake_clean.cmake", 0, 0, 0, 4, false, false}, + {"unit", 0, 0, 0, 1, true, false} + }; + std::vector results = make_filerecord_tree(records); + + for (const auto& result : results) { + std::cout << result.index << ' ' << records[result.index].path() << '\n'; + } +}