1
0
Fork 0
mirror of https://github.com/KingDuckZ/dindexer.git synced 2025-02-17 11:45:50 +00:00

Fix various problems introduced in scan with the last commit.

Fixes a crash due to an assertion trying to access a
past-the-end iterator.
Gets content type detection to work, although only after
hashing is done (see comment in main.cpp for details).
Fixes a problem with the array passed to guess_content_type
being not sorted as the function expects.
Adds more assertions.
This commit is contained in:
King_DuckZ 2016-02-23 18:56:07 +01:00
parent 22614432a9
commit be9fc3eb0b
7 changed files with 93 additions and 10 deletions

View file

@ -51,11 +51,17 @@ namespace mchlib {
{
}
#if defined(NDEBUG)
FileRecordData ( const FileRecordData& ) = delete;
#else
FileRecordData ( const FileRecordData& ) = default;
#endif
FileRecordData ( FileRecordData&& ) = default;
FileRecordData& operator= ( const FileRecordData& ) = delete;
FileRecordData& operator= ( FileRecordData&& ) = default;
bool operator== ( const FileRecordData& ) const = delete;
#if !defined(NDEBUG)
bool operator== ( const FileRecordData& parOther ) const;
#endif
TigerHash hash;
std::string abs_path;
@ -83,6 +89,12 @@ namespace mchlib {
std::string name;
uint32_t disk_number;
};
#if !defined(NDEBUG)
inline bool FileRecordData::operator== (const FileRecordData& parOther) const {
return (this->abs_path == parOther.abs_path);
}
#endif
} //namespace mchlib
#endif

View file

@ -25,6 +25,7 @@
#include <memory>
#include <type_traits>
#include <ciso646>
#include <cstdint>
namespace mchlib {
namespace implem {
@ -146,6 +147,10 @@ namespace mchlib {
std::size_t size ( void ) const;
std::size_t files_count ( void ) const;
std::size_t dir_count ( void ) const;
const ListType& sorted_list ( void ) const;
static void sort_list ( ListType& parList );
static ListType::iterator lower_bound ( ListType& parList, const char* parPath, uint16_t parLevel, bool parIsDir );
private:
ListType m_list;

View file

@ -19,10 +19,12 @@
#include "dindexer-machinery/recorddata.hpp"
#include <fnmatch.h>
#include <ciso646>
#include <cassert>
namespace mchlib {
namespace implem {
bool glob_matches (const FileRecordData& parData, const char* parGlob) {
assert(parData.path.data());
//assert that the substring in path terminates at the same place
//where the one in abs_path terminates (ie: it's null-terminated).
assert(parData.path == std::string(parData.path.data()));

View file

@ -131,7 +131,9 @@ namespace mchlib {
};
const auto total_entries = (parEntriesCount ? parEntriesCount : count_listing_items_recursive(parContent));
assert(total_entries == count_listing_items_recursive(parContent));
auto flattened = flattened_listing(parContent);
assert(flattened.size() == total_entries);
for (const auto& chk : checker_chain) {
if (chk.max_total_entries and chk.max_total_entries >= total_entries) {
@ -147,10 +149,16 @@ namespace mchlib {
if (parContent.empty())
return ContentType_Empty;
//Assert that array being passed in is sorted in the same way SetListing
//would sort it. If it's not the case things will break because
//SetListingView won't do any sorting.
assert(std::equal(parContent.begin(), parContent.end(), SetListing(std::vector<FileRecordData>(parContent)).sorted_list().begin()));
//TODO: assert that the first item in the list is the shortest string
std::shared_ptr<PathName> pathname(new PathName(parContent.front().abs_path));
ConstSetListingView view(parContent.begin(), parContent.end(), pathname->atom_count(), pathname);
return guess_content_type(parMediaType, view, parContent.size());
assert(parContent.size() >= 1);
return guess_content_type(parMediaType, view, parContent.size() - 1);
}
char content_type_to_char (mchlib::ContentTypes parCType) {

View file

@ -47,6 +47,7 @@
# include <iostream>
#endif
#include <boost/utility/string_ref.hpp>
#include <boost/range/empty.hpp>
namespace mchlib {
using HashType = decltype(FileRecordData::hash);
@ -88,7 +89,8 @@ namespace mchlib {
const std::string relpath = make_relative_path(parCurrDir, curr_subdir).path();
if (it->is_directory) {
auto cd_list = MutableSetListingView(it);
assert(cd_list.begin()->abs_path != it->abs_path);
assert(boost::empty(cd_list) or cd_list.begin()->abs_path != it->abs_path);
hash_dir(*it, cd_list, curr_subdir, parMime, parIgnoreErrors);
append_to_vec(dir_blob, it->hash, relpath);
}
@ -440,8 +442,17 @@ namespace mchlib {
}
bool Indexer::add_path (const char* parPath, const fastf::FileStats& parStats) {
m_local_data->paths.push_back(
make_file_record_data(parPath, parStats));
auto it_before = SetListing::lower_bound(
m_local_data->paths,
parPath,
parStats.level,
parStats.is_dir
);
m_local_data->paths.insert(
it_before,
make_file_record_data(parPath, parStats)
);
if (not parStats.is_dir) {
++m_local_data->file_count;
}

View file

@ -21,12 +21,30 @@
#include <ciso646>
#include <cassert>
#include <algorithm>
#include <boost/utility/string_ref.hpp>
namespace mchlib {
namespace {
bool file_record_data_lt (const FileRecordData& parLeft, const FileRecordData& parRight) {
//A struct that wraps the minimum necessary in order for LT comparison
//to be made.
struct FileRecordDataForSearch {
FileRecordDataForSearch ( const char* parPath, uint16_t parLevel, bool parIsDir) :
abs_path(parPath),
level(parLevel),
is_directory(parIsDir)
{
assert(parPath);
}
boost::string_ref abs_path;
uint16_t level;
bool is_directory;
};
template <typename OtherRecord>
bool file_record_data_lt (const FileRecordData& parLeft, const OtherRecord& parRight) {
const FileRecordData& l = parLeft;
const FileRecordData& r = parRight;
const OtherRecord& r = parRight;
return
(l.level < r.level)
or (l.level == r.level and l.is_directory and not r.is_directory)
@ -84,7 +102,7 @@ namespace mchlib {
assert(m_current == m_end or m_base_path->atom_count() == PathName(m_current->abs_path).atom_count());
assert(m_current == m_end or m_base_path->atom_count() == m_current->level + m_level_offset);
//Look for the point where the children of this entry starts
//Look for the point where the children of this entry start
while (
m_current != m_end and (
m_current->level + m_level_offset == m_base_path->atom_count() or
@ -197,7 +215,11 @@ namespace mchlib {
m_base_path()
{
if (parSort) {
std::sort(m_list.begin(), m_list.end(), &file_record_data_lt);
sort_list(m_list);
}
else {
//Assert that received list is already sorted
assert(std::equal(m_list.begin(), m_list.end(), SetListing(ListType(m_list), true).sorted_list().begin()));
}
if (not m_list.empty()) {
m_base_path.reset(new PathName(m_list.front().abs_path));
@ -252,6 +274,20 @@ namespace mchlib {
);
}
const SetListing::ListType& SetListing::sorted_list() const {
return m_list;
}
void SetListing::sort_list (ListType& parList) {
std::sort(parList.begin(), parList.end(), &file_record_data_lt<FileRecordData>);
}
SetListing::ListType::iterator SetListing::lower_bound (ListType& parList, const char* parPath, uint16_t parLevel, bool parIsDir) {
using boost::string_ref;
FileRecordDataForSearch find_record(parPath, parLevel, parIsDir);
return std::lower_bound(parList.begin(), parList.end(), find_record, &file_record_data_lt<FileRecordDataForSearch>);
}
SetListingView<false> SetListing::make_view() {
const auto offs = (m_list.empty() ? 0 : PathName(m_list.front().abs_path).atom_count());
return SetListingView<false>(m_list.begin(), m_list.end(), offs, m_base_path);

View file

@ -118,11 +118,20 @@ int main (int parArgc, char* parArgv[]) {
return 1;
}
else {
run_hash_calculation(indexer, verbose);
//TODO: guess_content_type() relies on FileRecordData::path being set to
//the relative path already. Unfortunately at this point it just got
//default-initialized to be the same as abs_path, so for a video DVD, for
//example, it's going to be like "/mnt/cdrom/VIDEO_TS" instead of just
//"VIDEO_TS". This will cause guess_content_type() to miss. Relative
//paths are populated at the end of calculate_hash(), so until I come up
//with a better system I'm just moving content detection to after hash
//calculation.
const auto set_type_casted = dinlib::char_to_media_type(set_type);
const mchlib::ContentTypes content = mchlib::guess_content_type(set_type_casted, indexer.record_data());
const char content_type = mchlib::content_type_to_char(content);
run_hash_calculation(indexer, verbose);
if (verbose) {
std::cout << "Writing to database...\n";
}