mirror of
https://github.com/KingDuckZ/dindexer.git
synced 2024-11-25 00:53:43 +00:00
Implement make_filerecord_tree()
This commit is contained in:
parent
929c5b8adf
commit
7f50f264e5
6 changed files with 194 additions and 0 deletions
|
@ -24,6 +24,7 @@ add_library(${PROJECT_NAME} SHARED
|
|||
scantask/contenttype.cpp
|
||||
scantask/mime.cpp
|
||||
scantask/setbasic.cpp
|
||||
make_filerecord_tree.cpp
|
||||
)
|
||||
|
||||
target_include_directories(${PROJECT_NAME} SYSTEM
|
||||
|
|
91
src/machinery/make_filerecord_tree.cpp
Normal file
91
src/machinery/make_filerecord_tree.cpp
Normal file
|
@ -0,0 +1,91 @@
|
|||
/* Copyright 2015, 2016, Michele Santullo
|
||||
* This file is part of "dindexer".
|
||||
*
|
||||
* "dindexer" is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* "dindexer" is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "dindexer-machinery/make_filerecord_tree.hpp"
|
||||
#include "dindexer-machinery/recorddata.hpp"
|
||||
#include "pathname.hpp"
|
||||
#include <algorithm>
|
||||
#include <stack>
|
||||
|
||||
namespace mchlib {
|
||||
namespace {
|
||||
class Counter {
|
||||
public:
|
||||
Counter() : m_count(0) {}
|
||||
std::size_t operator()() {return m_count++;}
|
||||
private:
|
||||
std::size_t m_count;
|
||||
};
|
||||
} //unnamed namespace
|
||||
|
||||
FileRecordNode::FileRecordNode() = default;
|
||||
FileRecordNode::~FileRecordNode() = default;
|
||||
|
||||
std::vector<FileRecordNode> make_filerecord_tree (const std::vector<FileRecordData>& parRecords) {
|
||||
if (parRecords.empty())
|
||||
return std::vector<FileRecordNode>();
|
||||
|
||||
std::vector<std::size_t> sorted_indices(parRecords.size());
|
||||
|
||||
std::generate(sorted_indices.begin(), sorted_indices.end(), Counter());
|
||||
std::sort(sorted_indices.begin(), sorted_indices.end(), [&parRecords](std::size_t a, std::size_t b) {
|
||||
return parRecords[a].path() < parRecords[b].path();
|
||||
});
|
||||
|
||||
FileRecordNode retval;
|
||||
FileRecordNode* curr_node = &retval;
|
||||
std::stack<FileRecordNode*> node_stack;
|
||||
|
||||
assert(not sorted_indices.empty());
|
||||
PathName prev_dir("");
|
||||
|
||||
for (auto idx : sorted_indices) {
|
||||
const FileRecordData& record = parRecords[idx];
|
||||
PathName curr_path(record.path());
|
||||
|
||||
//1) new path is subpath
|
||||
if (is_ancestor(prev_dir, curr_path, 0)) {
|
||||
curr_node->children.emplace_back(idx);
|
||||
}
|
||||
//2) new path is different
|
||||
else {
|
||||
while (not node_stack.empty()) {
|
||||
curr_node = node_stack.top();
|
||||
node_stack.pop();
|
||||
|
||||
PathName test_path(parRecords[curr_node->index].path());
|
||||
if (are_siblings(test_path, curr_path) or is_ancestor(test_path, curr_path, 0)) {
|
||||
prev_dir.swap(test_path);
|
||||
break;
|
||||
}
|
||||
};
|
||||
|
||||
assert(curr_node);
|
||||
curr_node->children.emplace_back(idx);
|
||||
}
|
||||
|
||||
if (record.is_directory) {
|
||||
node_stack.push(curr_node);
|
||||
assert(not curr_node->children.empty());
|
||||
curr_node = &curr_node->children.back();
|
||||
prev_dir.swap(curr_path);
|
||||
}
|
||||
}
|
||||
|
||||
return retval.children;
|
||||
}
|
||||
} //namespace mchlib
|
|
@ -218,6 +218,42 @@ namespace mchlib {
|
|||
return parPath[sz - 1];
|
||||
}
|
||||
|
||||
bool is_ancestor (const PathName& parAncestor, const PathName& parChild, std::size_t parMaxLevels) {
|
||||
const std::size_t anc_atom_count = parAncestor.atom_count();
|
||||
const std::size_t cld_atom_count = parChild.atom_count();
|
||||
|
||||
if (anc_atom_count + parMaxLevels >= cld_atom_count or not parMaxLevels) {
|
||||
assert(not parMaxLevels or anc_atom_count <= cld_atom_count);
|
||||
const std::size_t min_count = std::min(anc_atom_count, cld_atom_count);
|
||||
for (std::size_t z = 0; z < min_count; ++z) {
|
||||
if (parAncestor[z] != parChild[z])
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool are_siblings (const PathName& parA, const PathName& parB) {
|
||||
const std::size_t atom_count = parA.atom_count();
|
||||
if (atom_count != parB.atom_count()) {
|
||||
return false;
|
||||
}
|
||||
else if (1 >= atom_count) {
|
||||
return true;
|
||||
}
|
||||
else {
|
||||
assert(atom_count > 1);
|
||||
for (std::size_t z = 0; z < atom_count - 1; ++z) {
|
||||
if (parA[z] != parB[z])
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
PathName& PathName::pop_right() {
|
||||
m_pool.pop();
|
||||
return *this;
|
||||
|
|
|
@ -59,6 +59,8 @@ namespace mchlib {
|
|||
PathName make_relative_path ( const PathName& parBasePath, const PathName& parOtherPath );
|
||||
std::ostream& operator<< ( std::ostream& parStream, const PathName& parPath );
|
||||
const boost::string_ref basename ( const PathName& parPath );
|
||||
bool is_ancestor (const PathName& parAncestor, const PathName& parChild, std::size_t parMaxLevels);
|
||||
bool are_siblings (const PathName& parA, const PathName& parB);
|
||||
} //namespace mchlib
|
||||
|
||||
#endif
|
||||
|
|
|
@ -6,6 +6,7 @@ add_executable(${PROJECT_NAME}
|
|||
test_glob2regex.cpp
|
||||
test_tiger_string_conv.cpp
|
||||
test_lexical_cast.cpp
|
||||
test_make_filerecord_tree.cpp
|
||||
test_pathname.cpp
|
||||
)
|
||||
|
||||
|
|
63
test/unit/test_make_filerecord_tree.cpp
Normal file
63
test/unit/test_make_filerecord_tree.cpp
Normal file
|
@ -0,0 +1,63 @@
|
|||
/* Copyright 2015, 2016, Michele Santullo
|
||||
* This file is part of "dindexer".
|
||||
*
|
||||
* "dindexer" is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* "dindexer" is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "dindexer-machinery/make_filerecord_tree.hpp"
|
||||
#include "dindexer-machinery/recorddata.hpp"
|
||||
#include <gtest/gtest.h>
|
||||
#include <iostream>
|
||||
|
||||
TEST(machinery, make_filerecord_tree) {
|
||||
using mchlib::make_filerecord_tree;
|
||||
using mchlib::FileRecordNode;
|
||||
using mchlib::FileRecordData;
|
||||
|
||||
std::vector<FileRecordData> records {
|
||||
{"unit/CMakeFiles/dindexer-test.dir/test_guess_content_type.cpp.o", 0, 0, 0, 4, false, false},
|
||||
{"unit/CMakeFiles/dindexer-test.dir/build.make", 0, 0, 0, 4, false, false},
|
||||
{"unit_cli/CMakeFiles", 0, 0, 0, 2, true, false},
|
||||
{"unit_cli/CMakeFiles/dindexer-test_cli.dir/C.includecache", 0, 0, 0, 4, false, false},
|
||||
{"unit/CMakeFiles", 0, 0, 0, 2, true, false},
|
||||
{"unit_cli/cmake_install.cmake", 0, 0, 0, 2, false, false},
|
||||
{"unit_cli/CTestTestfile.cmake", 0, 0, 0, 2, false, false},
|
||||
{"gtest/CMakeFiles/gtest_main.dir/src/gtest_main.cc.o", 0, 0, 0, 4, false, false},
|
||||
{"gtest/CMakeFiles/gtest_main.dir/link.txt", 0, 0, 0, 4, false, false},
|
||||
{"gtest/CMakeFiles/CMakeDirectoryInformation.cmake", 0, 0, 0, 4, false, false},
|
||||
{"unit_cli/Makefile", 0, 0, 0, 2, false, false},
|
||||
{"gtest/CMakeFiles/progress.marks", 0, 0, 0, 3, false, false},
|
||||
{"gtest/CMakeFiles/gtest.dir", 0, 0, 0, 3, true, false},
|
||||
{"unit/Makefile", 0, 0, 0, 2, false, false},
|
||||
{"unit/cmake_install.cmake", 0, 0, 0, 2, false, false},
|
||||
{"unit/dindexer-test", 0, 0, 0, 2, false, false},
|
||||
{"unit_cli/CMakeFiles/CMakeDirectoryInformation.cmake", 0, 0, 0, 3, false, false},
|
||||
{"unit_cli/CMakeFiles/progress.marks", 0, 0, 0, 3, false, false},
|
||||
{"gtest", 0, 0, 0, 1, true, false},
|
||||
{"gtest/CMakeFiles/gtest_main.dir/depend.internal", 0, 0, 0, 4, false, false},
|
||||
{"gtest/CMakeFiles/gtest_main.dir/src", 0, 0, 0, 4, true, false},
|
||||
{"unit/CTestTestfile.cmake", 0, 0, 0, 2, false, false},
|
||||
{"unit/CMakeFiles/CMakeDirectoryInformation.cmake", 0, 0, 0, 3, false, false},
|
||||
{"unit/CMakeFiles/progress.marks", 0, 0, 0, 3, false, false},
|
||||
{"gtest/CMakeFiles/gtest.dir/cmake_clean_target.cmake", 0, 0, 0, 4, false, false},
|
||||
{"gtest/CMakeFiles/gtest.dir/build.make", 0, 0, 0, 4, false, false},
|
||||
{"gtest/CMakeFiles/gtest.dir/cmake_clean.cmake", 0, 0, 0, 4, false, false},
|
||||
{"unit", 0, 0, 0, 1, true, false}
|
||||
};
|
||||
std::vector<FileRecordNode> results = make_filerecord_tree(records);
|
||||
|
||||
for (const auto& result : results) {
|
||||
std::cout << result.index << ' ' << records[result.index].path() << '\n';
|
||||
}
|
||||
}
|
Loading…
Reference in a new issue