Fix various problems introduced in scan with the last commit.

Fixes a crash due to an assertion trying to access a past-the-end iterator. Gets content type detection to work, although only after hashing is done (see comment in main.cpp for details). Fixes a problem with the array passed to guess_content_type being not sorted as the function expects. Adds more assertions.
2025-08-18 15:29:48 +00:00 · 2016-02-23 18:56:07 +01:00 · 2016-02-23 18:56:07 +01:00 · be9fc3eb0b
commit be9fc3eb0b
parent 22614432a9
7 changed files with 93 additions and 10 deletions
--- a/include/dindexer-machinery/recorddata.hpp
+++ b/include/dindexer-machinery/recorddata.hpp
@ -51,11 +51,17 @@ namespace mchlib {
 		{
 		}
 #if defined(NDEBUG)
 		FileRecordData ( const FileRecordData& ) = delete;
 #else
 		FileRecordData ( const FileRecordData& ) = default;
 #endif
 		FileRecordData ( FileRecordData&& ) = default;
 		FileRecordData& operator= ( const FileRecordData& ) = delete;
 		FileRecordData& operator= ( FileRecordData&& ) = default;
-		bool operator== ( const FileRecordData& ) const = delete;
+#if !defined(NDEBUG)
 		bool operator== ( const FileRecordData& parOther ) const;
 #endif
 		TigerHash hash;
 		std::string abs_path;
@ -83,6 +89,12 @@ namespace mchlib {
 		std::string name;
 		uint32_t disk_number;
 	};
 #if !defined(NDEBUG)
 	inline bool FileRecordData::operator== (const FileRecordData& parOther) const {
 		return (this->abs_path == parOther.abs_path);
 	}
 #endif
 } //namespace mchlib
 #endif
--- a/include/dindexer-machinery/set_listing.hpp
+++ b/include/dindexer-machinery/set_listing.hpp
@ -25,6 +25,7 @@
 #include <memory>
 #include <type_traits>
 #include <ciso646>
 #include <cstdint>
 namespace mchlib {
 	namespace implem {
@ -146,6 +147,10 @@ namespace mchlib {
 		std::size_t size ( void ) const;
 		std::size_t files_count ( void ) const;
 		std::size_t dir_count ( void ) const;
 		const ListType& sorted_list ( void ) const;
 		static void sort_list ( ListType& parList );
 		static ListType::iterator lower_bound ( ListType& parList, const char* parPath, uint16_t parLevel, bool parIsDir );
 	private:
 		ListType m_list;
--- a/src/machinery/globbing.cpp
+++ b/src/machinery/globbing.cpp
@ -19,10 +19,12 @@
 #include "dindexer-machinery/recorddata.hpp"
 #include <fnmatch.h>
 #include <ciso646>
 #include <cassert>
 namespace mchlib {
 	namespace implem {
 		bool glob_matches (const FileRecordData& parData, const char* parGlob) {
 			assert(parData.path.data());
 			//assert that the substring in path terminates at the same place
 			//where the one in abs_path terminates (ie: it's null-terminated).
 			assert(parData.path == std::string(parData.path.data()));
--- a/src/machinery/guess_content_type.cpp
+++ b/src/machinery/guess_content_type.cpp
@ -131,7 +131,9 @@ namespace mchlib {
 		};
 		const auto total_entries = (parEntriesCount ? parEntriesCount : count_listing_items_recursive(parContent));
 		assert(total_entries == count_listing_items_recursive(parContent));
 		auto flattened = flattened_listing(parContent);
 		assert(flattened.size() == total_entries);
 		for (const auto& chk : checker_chain) {
 			if (chk.max_total_entries and chk.max_total_entries >= total_entries) {
@ -147,10 +149,16 @@ namespace mchlib {
 		if (parContent.empty())
 			return ContentType_Empty;
 		//Assert that array being passed in is sorted in the same way SetListing
 		//would sort it. If it's not the case things will break because
 		//SetListingView won't do any sorting.
 		assert(std::equal(parContent.begin(), parContent.end(), SetListing(std::vector<FileRecordData>(parContent)).sorted_list().begin()));
 		//TODO: assert that the first item in the list is the shortest string
 		std::shared_ptr<PathName> pathname(new PathName(parContent.front().abs_path));
 		ConstSetListingView view(parContent.begin(), parContent.end(), pathname->atom_count(), pathname);
-		return guess_content_type(parMediaType, view, parContent.size());
+		assert(parContent.size() >= 1);
 		return guess_content_type(parMediaType, view, parContent.size() - 1);
 	}
 	char content_type_to_char (mchlib::ContentTypes parCType) {
--- a/src/machinery/indexer.cpp
+++ b/src/machinery/indexer.cpp
@ -47,6 +47,7 @@
 #	include <iostream>
 #endif
 #include <boost/utility/string_ref.hpp>
 #include <boost/range/empty.hpp>
 namespace mchlib {
 	using HashType = decltype(FileRecordData::hash);
@ -88,7 +89,8 @@ namespace mchlib {
 				const std::string relpath = make_relative_path(parCurrDir, curr_subdir).path();
 				if (it->is_directory) {
 					auto cd_list = MutableSetListingView(it);
-					assert(cd_list.begin()->abs_path != it->abs_path);
+					assert(boost::empty(cd_list) or cd_list.begin()->abs_path != it->abs_path);
 					hash_dir(*it, cd_list, curr_subdir, parMime, parIgnoreErrors);
 					append_to_vec(dir_blob, it->hash, relpath);
 				}
@ -440,8 +442,17 @@ namespace mchlib {
 	}
 	bool Indexer::add_path (const char* parPath, const fastf::FileStats& parStats) {
-		m_local_data->paths.push_back(
+		auto it_before = SetListing::lower_bound(
-			make_file_record_data(parPath, parStats));
+			m_local_data->paths,
 			parPath,
 			parStats.level,
 			parStats.is_dir
 		);
 		m_local_data->paths.insert(
 			it_before,
 			make_file_record_data(parPath, parStats)
 		);
 		if (not parStats.is_dir) {
 			++m_local_data->file_count;
 		}
--- a/src/machinery/set_listing.cpp
+++ b/src/machinery/set_listing.cpp
@ -21,12 +21,30 @@
 #include <ciso646>
 #include <cassert>
 #include <algorithm>
 #include <boost/utility/string_ref.hpp>
 namespace mchlib {
 	namespace {
-		bool file_record_data_lt (const FileRecordData& parLeft, const FileRecordData& parRight) {
+		//A struct that wraps the minimum necessary in order for LT comparison
 		//to be made.
 		struct FileRecordDataForSearch {
 			FileRecordDataForSearch ( const char* parPath, uint16_t parLevel, bool parIsDir) :
 				abs_path(parPath),
 				level(parLevel),
 				is_directory(parIsDir)
 			{
 				assert(parPath);
 			}
 			boost::string_ref abs_path;
 			uint16_t level;
 			bool is_directory;
 		};
 		template <typename OtherRecord>
 		bool file_record_data_lt (const FileRecordData& parLeft, const OtherRecord& parRight) {
 			const FileRecordData& l = parLeft;
-			const FileRecordData& r = parRight;
+			const OtherRecord& r = parRight;
 			return
 				(l.level < r.level)
 				or (l.level == r.level and l.is_directory and not r.is_directory)
@ -84,7 +102,7 @@ namespace mchlib {
 			assert(m_current == m_end or m_base_path->atom_count() == PathName(m_current->abs_path).atom_count());
 			assert(m_current == m_end or m_base_path->atom_count() == m_current->level + m_level_offset);
-			//Look for the point where the children of this entry starts
+			//Look for the point where the children of this entry start
 			while (
 				m_current != m_end and (
 					m_current->level + m_level_offset == m_base_path->atom_count() or
@ -197,7 +215,11 @@ namespace mchlib {
 		m_base_path()
 	{
 		if (parSort) {
-			std::sort(m_list.begin(), m_list.end(), &file_record_data_lt);
+			sort_list(m_list);
 		}
 		else {
 			//Assert that received list is already sorted
 			assert(std::equal(m_list.begin(), m_list.end(), SetListing(ListType(m_list), true).sorted_list().begin()));
 		}
 		if (not m_list.empty()) {
 			m_base_path.reset(new PathName(m_list.front().abs_path));
@ -252,6 +274,20 @@ namespace mchlib {
 		);
 	}
 	const SetListing::ListType& SetListing::sorted_list() const {
 		return m_list;
 	}
 	void SetListing::sort_list (ListType& parList) {
 		std::sort(parList.begin(), parList.end(), &file_record_data_lt<FileRecordData>);
 	}
 	SetListing::ListType::iterator SetListing::lower_bound (ListType& parList, const char* parPath, uint16_t parLevel, bool parIsDir) {
 		using boost::string_ref;
 		FileRecordDataForSearch find_record(parPath, parLevel, parIsDir);
 		return std::lower_bound(parList.begin(), parList.end(), find_record, &file_record_data_lt<FileRecordDataForSearch>);
 	}
 	SetListingView<false> SetListing::make_view() {
 		const auto offs = (m_list.empty() ? 0 : PathName(m_list.front().abs_path).atom_count());
 		return SetListingView<false>(m_list.begin(), m_list.end(), offs, m_base_path);
--- a/src/scan/main.cpp
+++ b/src/scan/main.cpp
@ -118,11 +118,20 @@ int main (int parArgc, char* parArgv[]) {
 		return 1;
 	}
 	else {
 		run_hash_calculation(indexer, verbose);
 		//TODO: guess_content_type() relies on FileRecordData::path being set to
 		//the relative path already. Unfortunately at this point it just got
 		//default-initialized to be the same as abs_path, so for a video DVD, for
 		//example, it's going to be like "/mnt/cdrom/VIDEO_TS" instead of just
 		//"VIDEO_TS". This will cause guess_content_type() to miss. Relative
 		//paths are populated at the end of calculate_hash(), so until I come up
 		//with a better system I'm just moving content detection to after hash
 		//calculation.
 		const auto set_type_casted = dinlib::char_to_media_type(set_type);
 		const mchlib::ContentTypes content = mchlib::guess_content_type(set_type_casted, indexer.record_data());
 		const char content_type = mchlib::content_type_to_char(content);
 		run_hash_calculation(indexer, verbose);
 		if (verbose) {
 			std::cout << "Writing to database...\n";
 		}