mirror of
https://github.com/KingDuckZ/dindexer.git
synced 2025-07-02 14:04:22 +00:00
Fix various problems introduced in scan with the last commit.
Fixes a crash due to an assertion trying to access a past-the-end iterator. Gets content type detection to work, although only after hashing is done (see comment in main.cpp for details). Fixes a problem with the array passed to guess_content_type being not sorted as the function expects. Adds more assertions.
This commit is contained in:
parent
22614432a9
commit
be9fc3eb0b
7 changed files with 93 additions and 10 deletions
|
@ -51,11 +51,17 @@ namespace mchlib {
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if defined(NDEBUG)
|
||||||
FileRecordData ( const FileRecordData& ) = delete;
|
FileRecordData ( const FileRecordData& ) = delete;
|
||||||
|
#else
|
||||||
|
FileRecordData ( const FileRecordData& ) = default;
|
||||||
|
#endif
|
||||||
FileRecordData ( FileRecordData&& ) = default;
|
FileRecordData ( FileRecordData&& ) = default;
|
||||||
FileRecordData& operator= ( const FileRecordData& ) = delete;
|
FileRecordData& operator= ( const FileRecordData& ) = delete;
|
||||||
FileRecordData& operator= ( FileRecordData&& ) = default;
|
FileRecordData& operator= ( FileRecordData&& ) = default;
|
||||||
bool operator== ( const FileRecordData& ) const = delete;
|
#if !defined(NDEBUG)
|
||||||
|
bool operator== ( const FileRecordData& parOther ) const;
|
||||||
|
#endif
|
||||||
|
|
||||||
TigerHash hash;
|
TigerHash hash;
|
||||||
std::string abs_path;
|
std::string abs_path;
|
||||||
|
@ -83,6 +89,12 @@ namespace mchlib {
|
||||||
std::string name;
|
std::string name;
|
||||||
uint32_t disk_number;
|
uint32_t disk_number;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#if !defined(NDEBUG)
|
||||||
|
inline bool FileRecordData::operator== (const FileRecordData& parOther) const {
|
||||||
|
return (this->abs_path == parOther.abs_path);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
} //namespace mchlib
|
} //namespace mchlib
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -25,6 +25,7 @@
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <type_traits>
|
#include <type_traits>
|
||||||
#include <ciso646>
|
#include <ciso646>
|
||||||
|
#include <cstdint>
|
||||||
|
|
||||||
namespace mchlib {
|
namespace mchlib {
|
||||||
namespace implem {
|
namespace implem {
|
||||||
|
@ -146,6 +147,10 @@ namespace mchlib {
|
||||||
std::size_t size ( void ) const;
|
std::size_t size ( void ) const;
|
||||||
std::size_t files_count ( void ) const;
|
std::size_t files_count ( void ) const;
|
||||||
std::size_t dir_count ( void ) const;
|
std::size_t dir_count ( void ) const;
|
||||||
|
const ListType& sorted_list ( void ) const;
|
||||||
|
|
||||||
|
static void sort_list ( ListType& parList );
|
||||||
|
static ListType::iterator lower_bound ( ListType& parList, const char* parPath, uint16_t parLevel, bool parIsDir );
|
||||||
|
|
||||||
private:
|
private:
|
||||||
ListType m_list;
|
ListType m_list;
|
||||||
|
|
|
@ -19,10 +19,12 @@
|
||||||
#include "dindexer-machinery/recorddata.hpp"
|
#include "dindexer-machinery/recorddata.hpp"
|
||||||
#include <fnmatch.h>
|
#include <fnmatch.h>
|
||||||
#include <ciso646>
|
#include <ciso646>
|
||||||
|
#include <cassert>
|
||||||
|
|
||||||
namespace mchlib {
|
namespace mchlib {
|
||||||
namespace implem {
|
namespace implem {
|
||||||
bool glob_matches (const FileRecordData& parData, const char* parGlob) {
|
bool glob_matches (const FileRecordData& parData, const char* parGlob) {
|
||||||
|
assert(parData.path.data());
|
||||||
//assert that the substring in path terminates at the same place
|
//assert that the substring in path terminates at the same place
|
||||||
//where the one in abs_path terminates (ie: it's null-terminated).
|
//where the one in abs_path terminates (ie: it's null-terminated).
|
||||||
assert(parData.path == std::string(parData.path.data()));
|
assert(parData.path == std::string(parData.path.data()));
|
||||||
|
|
|
@ -131,7 +131,9 @@ namespace mchlib {
|
||||||
};
|
};
|
||||||
|
|
||||||
const auto total_entries = (parEntriesCount ? parEntriesCount : count_listing_items_recursive(parContent));
|
const auto total_entries = (parEntriesCount ? parEntriesCount : count_listing_items_recursive(parContent));
|
||||||
|
assert(total_entries == count_listing_items_recursive(parContent));
|
||||||
auto flattened = flattened_listing(parContent);
|
auto flattened = flattened_listing(parContent);
|
||||||
|
assert(flattened.size() == total_entries);
|
||||||
|
|
||||||
for (const auto& chk : checker_chain) {
|
for (const auto& chk : checker_chain) {
|
||||||
if (chk.max_total_entries and chk.max_total_entries >= total_entries) {
|
if (chk.max_total_entries and chk.max_total_entries >= total_entries) {
|
||||||
|
@ -147,10 +149,16 @@ namespace mchlib {
|
||||||
if (parContent.empty())
|
if (parContent.empty())
|
||||||
return ContentType_Empty;
|
return ContentType_Empty;
|
||||||
|
|
||||||
|
//Assert that array being passed in is sorted in the same way SetListing
|
||||||
|
//would sort it. If it's not the case things will break because
|
||||||
|
//SetListingView won't do any sorting.
|
||||||
|
assert(std::equal(parContent.begin(), parContent.end(), SetListing(std::vector<FileRecordData>(parContent)).sorted_list().begin()));
|
||||||
|
|
||||||
//TODO: assert that the first item in the list is the shortest string
|
//TODO: assert that the first item in the list is the shortest string
|
||||||
std::shared_ptr<PathName> pathname(new PathName(parContent.front().abs_path));
|
std::shared_ptr<PathName> pathname(new PathName(parContent.front().abs_path));
|
||||||
ConstSetListingView view(parContent.begin(), parContent.end(), pathname->atom_count(), pathname);
|
ConstSetListingView view(parContent.begin(), parContent.end(), pathname->atom_count(), pathname);
|
||||||
return guess_content_type(parMediaType, view, parContent.size());
|
assert(parContent.size() >= 1);
|
||||||
|
return guess_content_type(parMediaType, view, parContent.size() - 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
char content_type_to_char (mchlib::ContentTypes parCType) {
|
char content_type_to_char (mchlib::ContentTypes parCType) {
|
||||||
|
|
|
@ -47,6 +47,7 @@
|
||||||
# include <iostream>
|
# include <iostream>
|
||||||
#endif
|
#endif
|
||||||
#include <boost/utility/string_ref.hpp>
|
#include <boost/utility/string_ref.hpp>
|
||||||
|
#include <boost/range/empty.hpp>
|
||||||
|
|
||||||
namespace mchlib {
|
namespace mchlib {
|
||||||
using HashType = decltype(FileRecordData::hash);
|
using HashType = decltype(FileRecordData::hash);
|
||||||
|
@ -88,7 +89,8 @@ namespace mchlib {
|
||||||
const std::string relpath = make_relative_path(parCurrDir, curr_subdir).path();
|
const std::string relpath = make_relative_path(parCurrDir, curr_subdir).path();
|
||||||
if (it->is_directory) {
|
if (it->is_directory) {
|
||||||
auto cd_list = MutableSetListingView(it);
|
auto cd_list = MutableSetListingView(it);
|
||||||
assert(cd_list.begin()->abs_path != it->abs_path);
|
assert(boost::empty(cd_list) or cd_list.begin()->abs_path != it->abs_path);
|
||||||
|
|
||||||
hash_dir(*it, cd_list, curr_subdir, parMime, parIgnoreErrors);
|
hash_dir(*it, cd_list, curr_subdir, parMime, parIgnoreErrors);
|
||||||
append_to_vec(dir_blob, it->hash, relpath);
|
append_to_vec(dir_blob, it->hash, relpath);
|
||||||
}
|
}
|
||||||
|
@ -440,8 +442,17 @@ namespace mchlib {
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Indexer::add_path (const char* parPath, const fastf::FileStats& parStats) {
|
bool Indexer::add_path (const char* parPath, const fastf::FileStats& parStats) {
|
||||||
m_local_data->paths.push_back(
|
auto it_before = SetListing::lower_bound(
|
||||||
make_file_record_data(parPath, parStats));
|
m_local_data->paths,
|
||||||
|
parPath,
|
||||||
|
parStats.level,
|
||||||
|
parStats.is_dir
|
||||||
|
);
|
||||||
|
|
||||||
|
m_local_data->paths.insert(
|
||||||
|
it_before,
|
||||||
|
make_file_record_data(parPath, parStats)
|
||||||
|
);
|
||||||
if (not parStats.is_dir) {
|
if (not parStats.is_dir) {
|
||||||
++m_local_data->file_count;
|
++m_local_data->file_count;
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,12 +21,30 @@
|
||||||
#include <ciso646>
|
#include <ciso646>
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
#include <boost/utility/string_ref.hpp>
|
||||||
|
|
||||||
namespace mchlib {
|
namespace mchlib {
|
||||||
namespace {
|
namespace {
|
||||||
bool file_record_data_lt (const FileRecordData& parLeft, const FileRecordData& parRight) {
|
//A struct that wraps the minimum necessary in order for LT comparison
|
||||||
|
//to be made.
|
||||||
|
struct FileRecordDataForSearch {
|
||||||
|
FileRecordDataForSearch ( const char* parPath, uint16_t parLevel, bool parIsDir) :
|
||||||
|
abs_path(parPath),
|
||||||
|
level(parLevel),
|
||||||
|
is_directory(parIsDir)
|
||||||
|
{
|
||||||
|
assert(parPath);
|
||||||
|
}
|
||||||
|
|
||||||
|
boost::string_ref abs_path;
|
||||||
|
uint16_t level;
|
||||||
|
bool is_directory;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <typename OtherRecord>
|
||||||
|
bool file_record_data_lt (const FileRecordData& parLeft, const OtherRecord& parRight) {
|
||||||
const FileRecordData& l = parLeft;
|
const FileRecordData& l = parLeft;
|
||||||
const FileRecordData& r = parRight;
|
const OtherRecord& r = parRight;
|
||||||
return
|
return
|
||||||
(l.level < r.level)
|
(l.level < r.level)
|
||||||
or (l.level == r.level and l.is_directory and not r.is_directory)
|
or (l.level == r.level and l.is_directory and not r.is_directory)
|
||||||
|
@ -84,7 +102,7 @@ namespace mchlib {
|
||||||
assert(m_current == m_end or m_base_path->atom_count() == PathName(m_current->abs_path).atom_count());
|
assert(m_current == m_end or m_base_path->atom_count() == PathName(m_current->abs_path).atom_count());
|
||||||
assert(m_current == m_end or m_base_path->atom_count() == m_current->level + m_level_offset);
|
assert(m_current == m_end or m_base_path->atom_count() == m_current->level + m_level_offset);
|
||||||
|
|
||||||
//Look for the point where the children of this entry starts
|
//Look for the point where the children of this entry start
|
||||||
while (
|
while (
|
||||||
m_current != m_end and (
|
m_current != m_end and (
|
||||||
m_current->level + m_level_offset == m_base_path->atom_count() or
|
m_current->level + m_level_offset == m_base_path->atom_count() or
|
||||||
|
@ -197,7 +215,11 @@ namespace mchlib {
|
||||||
m_base_path()
|
m_base_path()
|
||||||
{
|
{
|
||||||
if (parSort) {
|
if (parSort) {
|
||||||
std::sort(m_list.begin(), m_list.end(), &file_record_data_lt);
|
sort_list(m_list);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
//Assert that received list is already sorted
|
||||||
|
assert(std::equal(m_list.begin(), m_list.end(), SetListing(ListType(m_list), true).sorted_list().begin()));
|
||||||
}
|
}
|
||||||
if (not m_list.empty()) {
|
if (not m_list.empty()) {
|
||||||
m_base_path.reset(new PathName(m_list.front().abs_path));
|
m_base_path.reset(new PathName(m_list.front().abs_path));
|
||||||
|
@ -252,6 +274,20 @@ namespace mchlib {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const SetListing::ListType& SetListing::sorted_list() const {
|
||||||
|
return m_list;
|
||||||
|
}
|
||||||
|
|
||||||
|
void SetListing::sort_list (ListType& parList) {
|
||||||
|
std::sort(parList.begin(), parList.end(), &file_record_data_lt<FileRecordData>);
|
||||||
|
}
|
||||||
|
|
||||||
|
SetListing::ListType::iterator SetListing::lower_bound (ListType& parList, const char* parPath, uint16_t parLevel, bool parIsDir) {
|
||||||
|
using boost::string_ref;
|
||||||
|
FileRecordDataForSearch find_record(parPath, parLevel, parIsDir);
|
||||||
|
return std::lower_bound(parList.begin(), parList.end(), find_record, &file_record_data_lt<FileRecordDataForSearch>);
|
||||||
|
}
|
||||||
|
|
||||||
SetListingView<false> SetListing::make_view() {
|
SetListingView<false> SetListing::make_view() {
|
||||||
const auto offs = (m_list.empty() ? 0 : PathName(m_list.front().abs_path).atom_count());
|
const auto offs = (m_list.empty() ? 0 : PathName(m_list.front().abs_path).atom_count());
|
||||||
return SetListingView<false>(m_list.begin(), m_list.end(), offs, m_base_path);
|
return SetListingView<false>(m_list.begin(), m_list.end(), offs, m_base_path);
|
||||||
|
|
|
@ -118,11 +118,20 @@ int main (int parArgc, char* parArgv[]) {
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
run_hash_calculation(indexer, verbose);
|
||||||
|
|
||||||
|
//TODO: guess_content_type() relies on FileRecordData::path being set to
|
||||||
|
//the relative path already. Unfortunately at this point it just got
|
||||||
|
//default-initialized to be the same as abs_path, so for a video DVD, for
|
||||||
|
//example, it's going to be like "/mnt/cdrom/VIDEO_TS" instead of just
|
||||||
|
//"VIDEO_TS". This will cause guess_content_type() to miss. Relative
|
||||||
|
//paths are populated at the end of calculate_hash(), so until I come up
|
||||||
|
//with a better system I'm just moving content detection to after hash
|
||||||
|
//calculation.
|
||||||
const auto set_type_casted = dinlib::char_to_media_type(set_type);
|
const auto set_type_casted = dinlib::char_to_media_type(set_type);
|
||||||
const mchlib::ContentTypes content = mchlib::guess_content_type(set_type_casted, indexer.record_data());
|
const mchlib::ContentTypes content = mchlib::guess_content_type(set_type_casted, indexer.record_data());
|
||||||
const char content_type = mchlib::content_type_to_char(content);
|
const char content_type = mchlib::content_type_to_char(content);
|
||||||
|
|
||||||
run_hash_calculation(indexer, verbose);
|
|
||||||
if (verbose) {
|
if (verbose) {
|
||||||
std::cout << "Writing to database...\n";
|
std::cout << "Writing to database...\n";
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue