mirror of
https://github.com/KingDuckZ/dindexer.git
synced 2025-02-17 11:45:50 +00:00
Fix various problems introduced in scan with the last commit.
Fixes a crash due to an assertion trying to access a past-the-end iterator. Gets content type detection to work, although only after hashing is done (see comment in main.cpp for details). Fixes a problem with the array passed to guess_content_type being not sorted as the function expects. Adds more assertions.
This commit is contained in:
parent
22614432a9
commit
be9fc3eb0b
7 changed files with 93 additions and 10 deletions
|
@ -51,11 +51,17 @@ namespace mchlib {
|
|||
{
|
||||
}
|
||||
|
||||
#if defined(NDEBUG)
|
||||
FileRecordData ( const FileRecordData& ) = delete;
|
||||
#else
|
||||
FileRecordData ( const FileRecordData& ) = default;
|
||||
#endif
|
||||
FileRecordData ( FileRecordData&& ) = default;
|
||||
FileRecordData& operator= ( const FileRecordData& ) = delete;
|
||||
FileRecordData& operator= ( FileRecordData&& ) = default;
|
||||
bool operator== ( const FileRecordData& ) const = delete;
|
||||
#if !defined(NDEBUG)
|
||||
bool operator== ( const FileRecordData& parOther ) const;
|
||||
#endif
|
||||
|
||||
TigerHash hash;
|
||||
std::string abs_path;
|
||||
|
@ -83,6 +89,12 @@ namespace mchlib {
|
|||
std::string name;
|
||||
uint32_t disk_number;
|
||||
};
|
||||
|
||||
#if !defined(NDEBUG)
|
||||
inline bool FileRecordData::operator== (const FileRecordData& parOther) const {
|
||||
return (this->abs_path == parOther.abs_path);
|
||||
}
|
||||
#endif
|
||||
} //namespace mchlib
|
||||
|
||||
#endif
|
||||
|
|
|
@ -25,6 +25,7 @@
|
|||
#include <memory>
|
||||
#include <type_traits>
|
||||
#include <ciso646>
|
||||
#include <cstdint>
|
||||
|
||||
namespace mchlib {
|
||||
namespace implem {
|
||||
|
@ -146,6 +147,10 @@ namespace mchlib {
|
|||
std::size_t size ( void ) const;
|
||||
std::size_t files_count ( void ) const;
|
||||
std::size_t dir_count ( void ) const;
|
||||
const ListType& sorted_list ( void ) const;
|
||||
|
||||
static void sort_list ( ListType& parList );
|
||||
static ListType::iterator lower_bound ( ListType& parList, const char* parPath, uint16_t parLevel, bool parIsDir );
|
||||
|
||||
private:
|
||||
ListType m_list;
|
||||
|
|
|
@ -19,10 +19,12 @@
|
|||
#include "dindexer-machinery/recorddata.hpp"
|
||||
#include <fnmatch.h>
|
||||
#include <ciso646>
|
||||
#include <cassert>
|
||||
|
||||
namespace mchlib {
|
||||
namespace implem {
|
||||
bool glob_matches (const FileRecordData& parData, const char* parGlob) {
|
||||
assert(parData.path.data());
|
||||
//assert that the substring in path terminates at the same place
|
||||
//where the one in abs_path terminates (ie: it's null-terminated).
|
||||
assert(parData.path == std::string(parData.path.data()));
|
||||
|
|
|
@ -131,7 +131,9 @@ namespace mchlib {
|
|||
};
|
||||
|
||||
const auto total_entries = (parEntriesCount ? parEntriesCount : count_listing_items_recursive(parContent));
|
||||
assert(total_entries == count_listing_items_recursive(parContent));
|
||||
auto flattened = flattened_listing(parContent);
|
||||
assert(flattened.size() == total_entries);
|
||||
|
||||
for (const auto& chk : checker_chain) {
|
||||
if (chk.max_total_entries and chk.max_total_entries >= total_entries) {
|
||||
|
@ -147,10 +149,16 @@ namespace mchlib {
|
|||
if (parContent.empty())
|
||||
return ContentType_Empty;
|
||||
|
||||
//Assert that array being passed in is sorted in the same way SetListing
|
||||
//would sort it. If it's not the case things will break because
|
||||
//SetListingView won't do any sorting.
|
||||
assert(std::equal(parContent.begin(), parContent.end(), SetListing(std::vector<FileRecordData>(parContent)).sorted_list().begin()));
|
||||
|
||||
//TODO: assert that the first item in the list is the shortest string
|
||||
std::shared_ptr<PathName> pathname(new PathName(parContent.front().abs_path));
|
||||
ConstSetListingView view(parContent.begin(), parContent.end(), pathname->atom_count(), pathname);
|
||||
return guess_content_type(parMediaType, view, parContent.size());
|
||||
assert(parContent.size() >= 1);
|
||||
return guess_content_type(parMediaType, view, parContent.size() - 1);
|
||||
}
|
||||
|
||||
char content_type_to_char (mchlib::ContentTypes parCType) {
|
||||
|
|
|
@ -47,6 +47,7 @@
|
|||
# include <iostream>
|
||||
#endif
|
||||
#include <boost/utility/string_ref.hpp>
|
||||
#include <boost/range/empty.hpp>
|
||||
|
||||
namespace mchlib {
|
||||
using HashType = decltype(FileRecordData::hash);
|
||||
|
@ -88,7 +89,8 @@ namespace mchlib {
|
|||
const std::string relpath = make_relative_path(parCurrDir, curr_subdir).path();
|
||||
if (it->is_directory) {
|
||||
auto cd_list = MutableSetListingView(it);
|
||||
assert(cd_list.begin()->abs_path != it->abs_path);
|
||||
assert(boost::empty(cd_list) or cd_list.begin()->abs_path != it->abs_path);
|
||||
|
||||
hash_dir(*it, cd_list, curr_subdir, parMime, parIgnoreErrors);
|
||||
append_to_vec(dir_blob, it->hash, relpath);
|
||||
}
|
||||
|
@ -440,8 +442,17 @@ namespace mchlib {
|
|||
}
|
||||
|
||||
bool Indexer::add_path (const char* parPath, const fastf::FileStats& parStats) {
|
||||
m_local_data->paths.push_back(
|
||||
make_file_record_data(parPath, parStats));
|
||||
auto it_before = SetListing::lower_bound(
|
||||
m_local_data->paths,
|
||||
parPath,
|
||||
parStats.level,
|
||||
parStats.is_dir
|
||||
);
|
||||
|
||||
m_local_data->paths.insert(
|
||||
it_before,
|
||||
make_file_record_data(parPath, parStats)
|
||||
);
|
||||
if (not parStats.is_dir) {
|
||||
++m_local_data->file_count;
|
||||
}
|
||||
|
|
|
@ -21,12 +21,30 @@
|
|||
#include <ciso646>
|
||||
#include <cassert>
|
||||
#include <algorithm>
|
||||
#include <boost/utility/string_ref.hpp>
|
||||
|
||||
namespace mchlib {
|
||||
namespace {
|
||||
bool file_record_data_lt (const FileRecordData& parLeft, const FileRecordData& parRight) {
|
||||
//A struct that wraps the minimum necessary in order for LT comparison
|
||||
//to be made.
|
||||
struct FileRecordDataForSearch {
|
||||
FileRecordDataForSearch ( const char* parPath, uint16_t parLevel, bool parIsDir) :
|
||||
abs_path(parPath),
|
||||
level(parLevel),
|
||||
is_directory(parIsDir)
|
||||
{
|
||||
assert(parPath);
|
||||
}
|
||||
|
||||
boost::string_ref abs_path;
|
||||
uint16_t level;
|
||||
bool is_directory;
|
||||
};
|
||||
|
||||
template <typename OtherRecord>
|
||||
bool file_record_data_lt (const FileRecordData& parLeft, const OtherRecord& parRight) {
|
||||
const FileRecordData& l = parLeft;
|
||||
const FileRecordData& r = parRight;
|
||||
const OtherRecord& r = parRight;
|
||||
return
|
||||
(l.level < r.level)
|
||||
or (l.level == r.level and l.is_directory and not r.is_directory)
|
||||
|
@ -84,7 +102,7 @@ namespace mchlib {
|
|||
assert(m_current == m_end or m_base_path->atom_count() == PathName(m_current->abs_path).atom_count());
|
||||
assert(m_current == m_end or m_base_path->atom_count() == m_current->level + m_level_offset);
|
||||
|
||||
//Look for the point where the children of this entry starts
|
||||
//Look for the point where the children of this entry start
|
||||
while (
|
||||
m_current != m_end and (
|
||||
m_current->level + m_level_offset == m_base_path->atom_count() or
|
||||
|
@ -197,7 +215,11 @@ namespace mchlib {
|
|||
m_base_path()
|
||||
{
|
||||
if (parSort) {
|
||||
std::sort(m_list.begin(), m_list.end(), &file_record_data_lt);
|
||||
sort_list(m_list);
|
||||
}
|
||||
else {
|
||||
//Assert that received list is already sorted
|
||||
assert(std::equal(m_list.begin(), m_list.end(), SetListing(ListType(m_list), true).sorted_list().begin()));
|
||||
}
|
||||
if (not m_list.empty()) {
|
||||
m_base_path.reset(new PathName(m_list.front().abs_path));
|
||||
|
@ -252,6 +274,20 @@ namespace mchlib {
|
|||
);
|
||||
}
|
||||
|
||||
const SetListing::ListType& SetListing::sorted_list() const {
|
||||
return m_list;
|
||||
}
|
||||
|
||||
void SetListing::sort_list (ListType& parList) {
|
||||
std::sort(parList.begin(), parList.end(), &file_record_data_lt<FileRecordData>);
|
||||
}
|
||||
|
||||
SetListing::ListType::iterator SetListing::lower_bound (ListType& parList, const char* parPath, uint16_t parLevel, bool parIsDir) {
|
||||
using boost::string_ref;
|
||||
FileRecordDataForSearch find_record(parPath, parLevel, parIsDir);
|
||||
return std::lower_bound(parList.begin(), parList.end(), find_record, &file_record_data_lt<FileRecordDataForSearch>);
|
||||
}
|
||||
|
||||
SetListingView<false> SetListing::make_view() {
|
||||
const auto offs = (m_list.empty() ? 0 : PathName(m_list.front().abs_path).atom_count());
|
||||
return SetListingView<false>(m_list.begin(), m_list.end(), offs, m_base_path);
|
||||
|
|
|
@ -118,11 +118,20 @@ int main (int parArgc, char* parArgv[]) {
|
|||
return 1;
|
||||
}
|
||||
else {
|
||||
run_hash_calculation(indexer, verbose);
|
||||
|
||||
//TODO: guess_content_type() relies on FileRecordData::path being set to
|
||||
//the relative path already. Unfortunately at this point it just got
|
||||
//default-initialized to be the same as abs_path, so for a video DVD, for
|
||||
//example, it's going to be like "/mnt/cdrom/VIDEO_TS" instead of just
|
||||
//"VIDEO_TS". This will cause guess_content_type() to miss. Relative
|
||||
//paths are populated at the end of calculate_hash(), so until I come up
|
||||
//with a better system I'm just moving content detection to after hash
|
||||
//calculation.
|
||||
const auto set_type_casted = dinlib::char_to_media_type(set_type);
|
||||
const mchlib::ContentTypes content = mchlib::guess_content_type(set_type_casted, indexer.record_data());
|
||||
const char content_type = mchlib::content_type_to_char(content);
|
||||
|
||||
run_hash_calculation(indexer, verbose);
|
||||
if (verbose) {
|
||||
std::cout << "Writing to database...\n";
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue