1
0
Fork 0
mirror of https://github.com/KingDuckZ/dindexer.git synced 2025-07-02 14:04:22 +00:00

Fix various problems introduced in scan with the last commit.

Fixes a crash due to an assertion trying to access a
past-the-end iterator.
Gets content type detection to work, although only after
hashing is done (see comment in main.cpp for details).
Fixes a problem with the array passed to guess_content_type
being not sorted as the function expects.
Adds more assertions.
This commit is contained in:
King_DuckZ 2016-02-23 18:56:07 +01:00
parent 22614432a9
commit be9fc3eb0b
7 changed files with 93 additions and 10 deletions

View file

@ -51,11 +51,17 @@ namespace mchlib {
{ {
} }
#if defined(NDEBUG)
FileRecordData ( const FileRecordData& ) = delete; FileRecordData ( const FileRecordData& ) = delete;
#else
FileRecordData ( const FileRecordData& ) = default;
#endif
FileRecordData ( FileRecordData&& ) = default; FileRecordData ( FileRecordData&& ) = default;
FileRecordData& operator= ( const FileRecordData& ) = delete; FileRecordData& operator= ( const FileRecordData& ) = delete;
FileRecordData& operator= ( FileRecordData&& ) = default; FileRecordData& operator= ( FileRecordData&& ) = default;
bool operator== ( const FileRecordData& ) const = delete; #if !defined(NDEBUG)
bool operator== ( const FileRecordData& parOther ) const;
#endif
TigerHash hash; TigerHash hash;
std::string abs_path; std::string abs_path;
@ -83,6 +89,12 @@ namespace mchlib {
std::string name; std::string name;
uint32_t disk_number; uint32_t disk_number;
}; };
#if !defined(NDEBUG)
inline bool FileRecordData::operator== (const FileRecordData& parOther) const {
return (this->abs_path == parOther.abs_path);
}
#endif
} //namespace mchlib } //namespace mchlib
#endif #endif

View file

@ -25,6 +25,7 @@
#include <memory> #include <memory>
#include <type_traits> #include <type_traits>
#include <ciso646> #include <ciso646>
#include <cstdint>
namespace mchlib { namespace mchlib {
namespace implem { namespace implem {
@ -146,6 +147,10 @@ namespace mchlib {
std::size_t size ( void ) const; std::size_t size ( void ) const;
std::size_t files_count ( void ) const; std::size_t files_count ( void ) const;
std::size_t dir_count ( void ) const; std::size_t dir_count ( void ) const;
const ListType& sorted_list ( void ) const;
static void sort_list ( ListType& parList );
static ListType::iterator lower_bound ( ListType& parList, const char* parPath, uint16_t parLevel, bool parIsDir );
private: private:
ListType m_list; ListType m_list;

View file

@ -19,10 +19,12 @@
#include "dindexer-machinery/recorddata.hpp" #include "dindexer-machinery/recorddata.hpp"
#include <fnmatch.h> #include <fnmatch.h>
#include <ciso646> #include <ciso646>
#include <cassert>
namespace mchlib { namespace mchlib {
namespace implem { namespace implem {
bool glob_matches (const FileRecordData& parData, const char* parGlob) { bool glob_matches (const FileRecordData& parData, const char* parGlob) {
assert(parData.path.data());
//assert that the substring in path terminates at the same place //assert that the substring in path terminates at the same place
//where the one in abs_path terminates (ie: it's null-terminated). //where the one in abs_path terminates (ie: it's null-terminated).
assert(parData.path == std::string(parData.path.data())); assert(parData.path == std::string(parData.path.data()));

View file

@ -131,7 +131,9 @@ namespace mchlib {
}; };
const auto total_entries = (parEntriesCount ? parEntriesCount : count_listing_items_recursive(parContent)); const auto total_entries = (parEntriesCount ? parEntriesCount : count_listing_items_recursive(parContent));
assert(total_entries == count_listing_items_recursive(parContent));
auto flattened = flattened_listing(parContent); auto flattened = flattened_listing(parContent);
assert(flattened.size() == total_entries);
for (const auto& chk : checker_chain) { for (const auto& chk : checker_chain) {
if (chk.max_total_entries and chk.max_total_entries >= total_entries) { if (chk.max_total_entries and chk.max_total_entries >= total_entries) {
@ -147,10 +149,16 @@ namespace mchlib {
if (parContent.empty()) if (parContent.empty())
return ContentType_Empty; return ContentType_Empty;
//Assert that array being passed in is sorted in the same way SetListing
//would sort it. If it's not the case things will break because
//SetListingView won't do any sorting.
assert(std::equal(parContent.begin(), parContent.end(), SetListing(std::vector<FileRecordData>(parContent)).sorted_list().begin()));
//TODO: assert that the first item in the list is the shortest string //TODO: assert that the first item in the list is the shortest string
std::shared_ptr<PathName> pathname(new PathName(parContent.front().abs_path)); std::shared_ptr<PathName> pathname(new PathName(parContent.front().abs_path));
ConstSetListingView view(parContent.begin(), parContent.end(), pathname->atom_count(), pathname); ConstSetListingView view(parContent.begin(), parContent.end(), pathname->atom_count(), pathname);
return guess_content_type(parMediaType, view, parContent.size()); assert(parContent.size() >= 1);
return guess_content_type(parMediaType, view, parContent.size() - 1);
} }
char content_type_to_char (mchlib::ContentTypes parCType) { char content_type_to_char (mchlib::ContentTypes parCType) {

View file

@ -47,6 +47,7 @@
# include <iostream> # include <iostream>
#endif #endif
#include <boost/utility/string_ref.hpp> #include <boost/utility/string_ref.hpp>
#include <boost/range/empty.hpp>
namespace mchlib { namespace mchlib {
using HashType = decltype(FileRecordData::hash); using HashType = decltype(FileRecordData::hash);
@ -88,7 +89,8 @@ namespace mchlib {
const std::string relpath = make_relative_path(parCurrDir, curr_subdir).path(); const std::string relpath = make_relative_path(parCurrDir, curr_subdir).path();
if (it->is_directory) { if (it->is_directory) {
auto cd_list = MutableSetListingView(it); auto cd_list = MutableSetListingView(it);
assert(cd_list.begin()->abs_path != it->abs_path); assert(boost::empty(cd_list) or cd_list.begin()->abs_path != it->abs_path);
hash_dir(*it, cd_list, curr_subdir, parMime, parIgnoreErrors); hash_dir(*it, cd_list, curr_subdir, parMime, parIgnoreErrors);
append_to_vec(dir_blob, it->hash, relpath); append_to_vec(dir_blob, it->hash, relpath);
} }
@ -440,8 +442,17 @@ namespace mchlib {
} }
bool Indexer::add_path (const char* parPath, const fastf::FileStats& parStats) { bool Indexer::add_path (const char* parPath, const fastf::FileStats& parStats) {
m_local_data->paths.push_back( auto it_before = SetListing::lower_bound(
make_file_record_data(parPath, parStats)); m_local_data->paths,
parPath,
parStats.level,
parStats.is_dir
);
m_local_data->paths.insert(
it_before,
make_file_record_data(parPath, parStats)
);
if (not parStats.is_dir) { if (not parStats.is_dir) {
++m_local_data->file_count; ++m_local_data->file_count;
} }

View file

@ -21,12 +21,30 @@
#include <ciso646> #include <ciso646>
#include <cassert> #include <cassert>
#include <algorithm> #include <algorithm>
#include <boost/utility/string_ref.hpp>
namespace mchlib { namespace mchlib {
namespace { namespace {
bool file_record_data_lt (const FileRecordData& parLeft, const FileRecordData& parRight) { //A struct that wraps the minimum necessary in order for LT comparison
//to be made.
struct FileRecordDataForSearch {
FileRecordDataForSearch ( const char* parPath, uint16_t parLevel, bool parIsDir) :
abs_path(parPath),
level(parLevel),
is_directory(parIsDir)
{
assert(parPath);
}
boost::string_ref abs_path;
uint16_t level;
bool is_directory;
};
template <typename OtherRecord>
bool file_record_data_lt (const FileRecordData& parLeft, const OtherRecord& parRight) {
const FileRecordData& l = parLeft; const FileRecordData& l = parLeft;
const FileRecordData& r = parRight; const OtherRecord& r = parRight;
return return
(l.level < r.level) (l.level < r.level)
or (l.level == r.level and l.is_directory and not r.is_directory) or (l.level == r.level and l.is_directory and not r.is_directory)
@ -84,7 +102,7 @@ namespace mchlib {
assert(m_current == m_end or m_base_path->atom_count() == PathName(m_current->abs_path).atom_count()); assert(m_current == m_end or m_base_path->atom_count() == PathName(m_current->abs_path).atom_count());
assert(m_current == m_end or m_base_path->atom_count() == m_current->level + m_level_offset); assert(m_current == m_end or m_base_path->atom_count() == m_current->level + m_level_offset);
//Look for the point where the children of this entry starts //Look for the point where the children of this entry start
while ( while (
m_current != m_end and ( m_current != m_end and (
m_current->level + m_level_offset == m_base_path->atom_count() or m_current->level + m_level_offset == m_base_path->atom_count() or
@ -197,7 +215,11 @@ namespace mchlib {
m_base_path() m_base_path()
{ {
if (parSort) { if (parSort) {
std::sort(m_list.begin(), m_list.end(), &file_record_data_lt); sort_list(m_list);
}
else {
//Assert that received list is already sorted
assert(std::equal(m_list.begin(), m_list.end(), SetListing(ListType(m_list), true).sorted_list().begin()));
} }
if (not m_list.empty()) { if (not m_list.empty()) {
m_base_path.reset(new PathName(m_list.front().abs_path)); m_base_path.reset(new PathName(m_list.front().abs_path));
@ -252,6 +274,20 @@ namespace mchlib {
); );
} }
const SetListing::ListType& SetListing::sorted_list() const {
return m_list;
}
void SetListing::sort_list (ListType& parList) {
std::sort(parList.begin(), parList.end(), &file_record_data_lt<FileRecordData>);
}
SetListing::ListType::iterator SetListing::lower_bound (ListType& parList, const char* parPath, uint16_t parLevel, bool parIsDir) {
using boost::string_ref;
FileRecordDataForSearch find_record(parPath, parLevel, parIsDir);
return std::lower_bound(parList.begin(), parList.end(), find_record, &file_record_data_lt<FileRecordDataForSearch>);
}
SetListingView<false> SetListing::make_view() { SetListingView<false> SetListing::make_view() {
const auto offs = (m_list.empty() ? 0 : PathName(m_list.front().abs_path).atom_count()); const auto offs = (m_list.empty() ? 0 : PathName(m_list.front().abs_path).atom_count());
return SetListingView<false>(m_list.begin(), m_list.end(), offs, m_base_path); return SetListingView<false>(m_list.begin(), m_list.end(), offs, m_base_path);

View file

@ -118,11 +118,20 @@ int main (int parArgc, char* parArgv[]) {
return 1; return 1;
} }
else { else {
run_hash_calculation(indexer, verbose);
//TODO: guess_content_type() relies on FileRecordData::path being set to
//the relative path already. Unfortunately at this point it just got
//default-initialized to be the same as abs_path, so for a video DVD, for
//example, it's going to be like "/mnt/cdrom/VIDEO_TS" instead of just
//"VIDEO_TS". This will cause guess_content_type() to miss. Relative
//paths are populated at the end of calculate_hash(), so until I come up
//with a better system I'm just moving content detection to after hash
//calculation.
const auto set_type_casted = dinlib::char_to_media_type(set_type); const auto set_type_casted = dinlib::char_to_media_type(set_type);
const mchlib::ContentTypes content = mchlib::guess_content_type(set_type_casted, indexer.record_data()); const mchlib::ContentTypes content = mchlib::guess_content_type(set_type_casted, indexer.record_data());
const char content_type = mchlib::content_type_to_char(content); const char content_type = mchlib::content_type_to_char(content);
run_hash_calculation(indexer, verbose);
if (verbose) { if (verbose) {
std::cout << "Writing to database...\n"; std::cout << "Writing to database...\n";
} }