Merge branch 'hashdir_refactoring'

2025-08-08 13:49:47 +00:00 · 2016-03-12 00:37:28 +01:00 · 2016-03-12 00:37:28 +01:00 · 859764b9a4
commit 859764b9a4
parent be1f47a5fd e95cd6cc44
52 changed files with 1367 additions and 815 deletions
--- a/action_skel_code/commandline.hpp
+++ b/action_skel_code/commandline.hpp
@ -19,7 +19,6 @@
 #define id1B7A42F6E46547A6AB0F914E2A91399F

 #include "dindexer-common/validationerror.hpp"
-#include "dindexer-common/mediatypes.hpp"
 #include <boost/program_options/variables_map.hpp>

 namespace din {
--- a/cscope_gen.sh
+++ b/cscope_gen.sh
@ -32,4 +32,4 @@ set -f
 find . \( $excl_paths -o $incl_extensions \) -a -type f $excl_files > cscope.files
 set +f

-cscope -b -q
+exec cscope -b -q
--- a/include/dindexer-machinery/guess_content_type.hpp
+++ b/include/dindexer-machinery/guess_content_type.hpp
@ -18,9 +18,9 @@
 #ifndef id17F1582F16C8478E8D9795BECBF275A3
 #define id17F1582F16C8478E8D9795BECBF275A3

-#include "dindexer-common/mediatypes.hpp"
+#include "dindexer-machinery/mediatypes.hpp"
 #include "dindexer-machinery/recorddata.hpp"
-#include "dindexer-common/compatibility.h"
+#include "helpers/compatibility.h"
 #include <vector>

 namespace mchlib {
@ -36,8 +36,8 @@ namespace mchlib {

 	template <bool> class SetListingView;

-	ContentTypes guess_content_type ( dinlib::MediaTypes parMediaType, const SetListingView<true>& parContent, std::size_t parEntriesCount=0 );
-	ContentTypes guess_content_type ( dinlib::MediaTypes parMediaType, const std::vector<FileRecordData>& parContent );
+	ContentTypes guess_content_type ( MediaTypes parMediaType, const SetListingView<true>& parContent, std::size_t parEntriesCount=0 );
+	ContentTypes guess_content_type ( MediaTypes parMediaType, const std::vector<FileRecordData>& parContent );

 	char content_type_to_char ( ContentTypes parCType ) a_pure;
 	ContentTypes char_to_content_type ( char parCType ) a_pure;
--- a/include/dindexer-machinery/indexer.hpp
+++ b/include/dindexer-machinery/indexer.hpp
@ -1,77 +0,0 @@
-/* Copyright 2015, 2016, Michele Santullo
- * This file is part of "dindexer".
- *
- * "dindexer" is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * "dindexer" is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with "dindexer".  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef idE555EF56730442C1ADDC7B2AE7A9340E
-#define idE555EF56730442C1ADDC7B2AE7A9340E
-
-#include <memory>
-#include <string>
-#include <vector>
-
-#if !defined(NDEBUG)
-#	define INDEXER_VERBOSE
-#endif
-
-#if defined(WITH_PROGRESS_FEEDBACK)
-namespace std {
-	class condition_variable;
-} //namespace std
-#endif
-
-namespace fastf {
-	struct FileStats;
-} //namespace fastf
-
-namespace dinlib {
-	struct Settings;
-} //namespace dinlib
-
-namespace mchlib {
-	struct FileRecordData;
-
-	class Indexer {
-	public:
-		Indexer ( void );
-		Indexer ( Indexer&& ) = default;
-		Indexer ( const Indexer& ) = delete;
-		~Indexer ( void ) noexcept;
-
-		bool add_path ( const char* parPath, const fastf::FileStats& parStats );
-#if defined(INDEXER_VERBOSE)
-		void dump ( void ) const;
-#endif
-
-		std::size_t total_items ( void ) const;
-		std::string operator[] ( std::size_t parIndex ) const;
-#if defined(WITH_PROGRESS_FEEDBACK)
-		std::size_t processed_items ( void ) const;
-		std::string current_item ( void ) const;
-		std::condition_variable& step_notify ( void );
-#endif
-		void calculate_hash ( void );
-		bool empty ( void ) const;
-		void ignore_read_errors ( bool parIgnore );
-		const std::vector<FileRecordData>& record_data ( void ) const;
-
-	private:
-		struct LocalData;
-
-		std::unique_ptr<LocalData> m_local_data;
-	};
-} //namespace mchlib
-
-#endif
--- a/include/dindexer-machinery/mediatype.hpp
+++ b/include/dindexer-machinery/mediatype.hpp
@ -20,11 +20,11 @@

 #include <string>
 #include <stdexcept>
-#include "dindexer-common/mediatypes.hpp"
+#include "dindexer-machinery/mediatypes.hpp"

 namespace mchlib {
 #if defined(WITH_MEDIA_AUTODETECT)
-	dinlib::MediaTypes guess_media_type ( std::string&& parPath );
+	MediaTypes guess_media_type ( std::string&& parPath );

 	class UnknownMediaTypeException : std::runtime_error {
 	public:
--- a/include/dindexer-machinery/mediatypes.hpp
+++ b/include/dindexer-machinery/mediatypes.hpp
@ -18,10 +18,10 @@
 #ifndef id700AFD0F33634ACC88079BB8853A9E13
 #define id700AFD0F33634ACC88079BB8853A9E13

-#include "dindexer-common/compatibility.h"
+#include "helpers/compatibility.h"
 #include <string>

-namespace dinlib {
+namespace mchlib {
 	enum MediaTypes {
 		MediaType_CDRom = 'C',
 		MediaType_Directory = 'D',
@ -35,6 +35,7 @@ namespace dinlib {

 	const std::string& media_type_to_str ( MediaTypes parType );
 	MediaTypes char_to_media_type ( char parMType ) a_pure;
-} //namespace dinlib
+	char media_type_to_char ( MediaTypes parMType ) a_pure;
+} //namespace mchlib

 #endif
--- a/include/dindexer-machinery/recorddata.hpp
+++ b/include/dindexer-machinery/recorddata.hpp
@ -51,6 +51,24 @@ namespace mchlib {
 		{
 		}

+		FileRecordData ( std::string&& parPath, std::size_t parRelPathOffs, std::time_t parATime, std::time_t parMTime, uint16_t parLevel, bool parIsDir, bool parIsSymLink ) :
+			hash {},
+			abs_path(std::move(parPath)),
+			mime_full(),
+			atime(parATime),
+			mtime(parMTime),
+			path(boost::string_ref(abs_path).substr(parRelPathOffs)),
+			mime_type(),
+			mime_charset(),
+			size(0),
+			level(parLevel),
+			is_directory(parIsDir),
+			is_symlink(parIsSymLink),
+			unreadable(false),
+			hash_valid(false)
+	{
+	}
+
 #if defined(NDEBUG)
 		FileRecordData ( const FileRecordData& ) = delete;
 #else
--- a/include/dindexer-machinery/scantask/base.hpp
+++ b/include/dindexer-machinery/scantask/base.hpp
@ -0,0 +1,70 @@
+/* Copyright 2015, 2016, Michele Santullo
+ * This file is part of "dindexer".
+ *
+ * "dindexer" is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * "dindexer" is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with "dindexer".  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef idCB253C1A5AFA46A18B8878ED4072CD96
+#define idCB253C1A5AFA46A18B8878ED4072CD96
+
+#include "dindexer-machinery/scantask/leanbase.hpp"
+#include <ciso646>
+#include <cassert>
+
+namespace mchlib {
+	namespace scantask {
+		template <typename T>
+		class Base : public LeanBase<T> {
+		protected:
+			Base ( void );
+			virtual ~Base ( void ) noexcept = default;
+
+		public:
+			void clear_data ( void );
+
+		private:
+			virtual void on_data_destroy ( T& parData ) = 0;
+			virtual void on_data_create ( T& parData ) = 0;
+
+			virtual T& on_data_get ( void ) final;
+			virtual void on_data_fill ( void ) final;
+
+			using LeanBase<T>::unset_data_created;
+
+			T m_data;
+		};
+
+		template <typename T>
+		Base<T>::Base() {
+		}
+
+		template <typename T>
+		void Base<T>::on_data_fill() {
+			this->on_data_create(m_data);
+		}
+
+		template <typename T>
+		void Base<T>::clear_data() {
+			this->unset_data_created();
+			this->on_data_destroy(m_data);
+		}
+
+		template <typename T>
+		T& Base<T>::on_data_get() {
+			return m_data;
+		}
+	} //namespace scantask
+} //namespace mchlib
+
+#endif
--- a/include/dindexer-machinery/scantask/contenttype.hpp
+++ b/include/dindexer-machinery/scantask/contenttype.hpp
@ -0,0 +1,51 @@
+/* Copyright 2015, 2016, Michele Santullo
+ * This file is part of "dindexer".
+ *
+ * "dindexer" is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * "dindexer" is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with "dindexer".  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef id148DBED10A0B45238E810683656BA7D5
+#define id148DBED10A0B45238E810683656BA7D5
+
+#include "dindexer-machinery/scantask/leanbase.hpp"
+#include "dindexer-machinery/scantask/base.hpp"
+#include "dindexer-machinery/mediatypes.hpp"
+#include <memory>
+#include <vector>
+
+namespace mchlib {
+	struct FileRecordData;
+	struct SetRecordDataFull;
+
+	namespace scantask {
+		class ContentType : public LeanBase<SetRecordDataFull> {
+		public:
+			using DirTreeTaskPtr = std::shared_ptr<Base<std::vector<FileRecordData>>>;
+			using MediaTypeTaskPtr = std::shared_ptr<LeanBase<SetRecordDataFull>>;
+			using SetTaskType = std::shared_ptr<LeanBase<SetRecordDataFull>>;
+
+			ContentType ( SetTaskType parSet, DirTreeTaskPtr parDirTree, MediaTypeTaskPtr parMediaType );
+
+		private:
+			virtual void on_data_fill ( void ) override;
+			virtual SetRecordDataFull& on_data_get ( void ) override;
+
+			SetTaskType m_set_task;
+			DirTreeTaskPtr m_dir_tree;
+			MediaTypeTaskPtr m_media_type;
+		};
+	} //namespace scantask
+} //namespace mchlib
+
+#endif
--- a/include/dindexer-machinery/scantask/dirtree.hpp
+++ b/include/dindexer-machinery/scantask/dirtree.hpp
@ -0,0 +1,45 @@
+/* Copyright 2015, 2016, Michele Santullo
+ * This file is part of "dindexer".
+ *
+ * "dindexer" is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * "dindexer" is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with "dindexer".  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef id0AA31B2E7D6244A08435CF9080E34AAE
+#define id0AA31B2E7D6244A08435CF9080E34AAE
+
+#include "dindexer-machinery/scantask/base.hpp"
+#include <string>
+#include <vector>
+
+namespace mchlib {
+	struct FileRecordData;
+
+	namespace scantask {
+		class DirTree : public Base<std::vector<FileRecordData>> {
+		public:
+			typedef std::vector<FileRecordData> PathList;
+
+			explicit DirTree ( std::string parRoot );
+			virtual ~DirTree ( void ) noexcept = default;
+
+		private:
+			virtual void on_data_destroy ( PathList& parData ) override;
+			virtual void on_data_create ( PathList& parData ) override;
+
+			std::string m_root;
+		};
+	} //namespace scantask
+} //namespace mchlib
+
+#endif
--- a/include/dindexer-machinery/scantask/generalfiller.hpp
+++ b/include/dindexer-machinery/scantask/generalfiller.hpp
@ -0,0 +1,72 @@
+/* Copyright 2015, 2016, Michele Santullo
+ * This file is part of "dindexer".
+ *
+ * "dindexer" is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * "dindexer" is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with "dindexer".  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef id904376BAD85D4782B83CAAEC2EF344D9
+#define id904376BAD85D4782B83CAAEC2EF344D9
+
+#include "dindexer-machinery/scantask/leanbase.hpp"
+#include <memory>
+#include <vector>
+#include <utility>
+#include <ciso646>
+
+namespace mchlib {
+	namespace scantask {
+		template <typename T>
+		class GeneralFiller : public LeanBase<T> {
+		public:
+			using FillingTaskPtr = std::shared_ptr<LeanBase<T>>;
+
+			template <typename P, typename... PP>
+			GeneralFiller ( std::shared_ptr<P> parFill, std::shared_ptr<PP>... parFillList );
+			virtual ~GeneralFiller ( void ) noexcept;
+
+		private:
+			virtual void on_data_fill ( void ) override;
+			virtual T& on_data_get ( void ) override;
+
+			std::vector<FillingTaskPtr> m_to_fill;
+		};
+
+		template <typename T>
+		template <typename P, typename... PP>
+		GeneralFiller<T>::GeneralFiller (std::shared_ptr<P> parFill, std::shared_ptr<PP>... parFillList) :
+			m_to_fill { std::move(parFill), std::move(parFillList)... }
+		{
+		}
+
+		template <typename T>
+		GeneralFiller<T>::~GeneralFiller() noexcept {
+			m_to_fill.clear();
+		}
+
+		template <typename T>
+		void GeneralFiller<T>::on_data_fill() {
+			for (auto& itm : m_to_fill) {
+				itm->get_or_create();
+			}
+		}
+
+		template <typename T>
+		T& GeneralFiller<T>::on_data_get() {
+			assert(not m_to_fill.empty());
+			return m_to_fill.front()->get_or_create();
+		}
+	} //namespace scantask
+} //namespace mchlib
+
+#endif
--- a/include/dindexer-machinery/scantask/hashing.hpp
+++ b/include/dindexer-machinery/scantask/hashing.hpp
@ -0,0 +1,53 @@
+/* Copyright 2015, 2016, Michele Santullo
+ * This file is part of "dindexer".
+ *
+ * "dindexer" is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * "dindexer" is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with "dindexer".  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef idC7CC55298AC049EAA80604D6C7FD081D
+#define idC7CC55298AC049EAA80604D6C7FD081D
+
+#include "dindexer-machinery/scantask/leanbase.hpp"
+#include <vector>
+#include <memory>
+#include <functional>
+#include <boost/utility/string_ref.hpp>
+#include <cstdint>
+
+namespace mchlib {
+	struct FileRecordData;
+
+	namespace scantask {
+		class Hashing : public LeanBase<std::vector<FileRecordData>> {
+		public:
+			typedef LeanBase<std::vector<FileRecordData>> FileTreeBase;
+			typedef std::function<void(const boost::string_ref, uint64_t, uint64_t, uint32_t)> ProgressCallback;
+
+			Hashing ( std::shared_ptr<FileTreeBase> parFileTree, bool parIgnoreErrors );
+			virtual ~Hashing ( void ) noexcept;
+
+			void set_progress_callback ( ProgressCallback parFunc );
+
+		private:
+			virtual void on_data_fill ( void ) override;
+			virtual std::vector<FileRecordData>& on_data_get ( void ) override;
+
+			std::shared_ptr<FileTreeBase> m_file_tree_task;
+			ProgressCallback m_progress_callback;
+			bool m_ignore_errors;
+		};
+	} //namespace scantask
+} //namespace mchlib
+
+#endif
--- a/include/dindexer-machinery/scantask/leanbase.hpp
+++ b/include/dindexer-machinery/scantask/leanbase.hpp
@ -0,0 +1,99 @@
+/* Copyright 2015, 2016, Michele Santullo
+ * This file is part of "dindexer".
+ *
+ * "dindexer" is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * "dindexer" is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with "dindexer".  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef id982AF1D5C59C415584F56C1E6DDFE55E
+#define id982AF1D5C59C415584F56C1E6DDFE55E
+
+#include <ciso646>
+#include <cassert>
+
+#if !defined(NDEBUG)
+#	 define LEANBASE_ASSERT_REENTRANCY
+#endif
+
+namespace mchlib {
+	namespace scantask {
+#if defined(LEANBASE_ASSERT_REENTRANCY)
+		struct AutoSetBool {
+			explicit AutoSetBool ( bool* parBool ) :
+				m_bool(parBool)
+			{
+				assert(m_bool);
+				assert(not *m_bool);
+				*m_bool = true;
+			}
+			~AutoSetBool ( void ) noexcept {
+				*m_bool = false;
+			}
+
+			bool* m_bool;
+		};
+#endif
+
+		template <typename T>
+		class LeanBase {
+		protected:
+			LeanBase ( void );
+			virtual ~LeanBase ( void ) noexcept = default;
+
+			void unset_data_created ( void );
+
+		public:
+			T& get_or_create ( void );
+
+		private:
+			virtual void on_data_fill ( void ) = 0;
+			virtual T& on_data_get ( void ) = 0;
+
+			bool m_data_created;
+#if defined(LEANBASE_ASSERT_REENTRANCY)
+			bool m_inside_call;
+#endif
+		};
+
+		template <typename T>
+		LeanBase<T>::LeanBase() :
+			m_data_created(false)
+#if defined(LEANBASE_ASSERT_REENTRANCY)
+			, m_inside_call(false)
+#endif
+		{
+		}
+
+		template <typename T>
+		T& LeanBase<T>::get_or_create() {
+#if defined(LEANBASE_ASSERT_REENTRANCY)
+			assert(not m_inside_call);
+			AutoSetBool auto_bool(&m_inside_call);
+#endif
+
+			if (not m_data_created) {
+				m_data_created = true;
+				this->on_data_fill();
+			}
+			return this->on_data_get();
+		}
+
+		template <typename T>
+		void LeanBase<T>::unset_data_created() {
+			assert(m_data_created);
+			m_data_created = false;
+		}
+	} //namespace scantask
+} //namespace mchlib
+
+#endif
--- a/include/dindexer-machinery/scantask/mediatype.hpp
+++ b/include/dindexer-machinery/scantask/mediatype.hpp
@ -0,0 +1,52 @@
+/* Copyright 2015, 2016, Michele Santullo
+ * This file is part of "dindexer".
+ *
+ * "dindexer" is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * "dindexer" is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with "dindexer".  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef id25B0BCA6D9464754920E1BC7C5D9DB57
+#define id25B0BCA6D9464754920E1BC7C5D9DB57
+
+#include "dindexer-machinery/scantask/leanbase.hpp"
+#include "dindexer-machinery/scantask/base.hpp"
+#include "dindexer-machinery/mediatypes.hpp"
+#include <string>
+#include <memory>
+
+namespace mchlib {
+	struct SetRecordDataFull;
+
+	namespace scantask {
+		class MediaType : public LeanBase<SetRecordDataFull> {
+		public:
+			using SetTaskType = std::shared_ptr<LeanBase<SetRecordDataFull>>;
+
+			MediaType ( SetTaskType parSet, char parDefault, bool parForce, std::string parSearchPath );
+			virtual ~MediaType ( void ) noexcept = default;
+
+		private:
+			virtual void on_data_fill ( void ) override;
+			virtual SetRecordDataFull& on_data_get ( void ) override;
+
+			SetTaskType m_set_task;
+			MediaTypes m_default;
+#if defined(WITH_MEDIA_AUTODETECT)
+			std::string m_search_path;
+			bool m_force;
+#endif
+		};
+	} //namespace scantask
+} //namespace mchlib
+
+#endif
--- a/include/dindexer-machinery/scantask/mime.hpp
+++ b/include/dindexer-machinery/scantask/mime.hpp
@ -0,0 +1,46 @@
+/* Copyright 2015, 2016, Michele Santullo
+ * This file is part of "dindexer".
+ *
+ * "dindexer" is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * "dindexer" is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with "dindexer".  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef idE173D2BA33744F448B870BB53AF52610
+#define idE173D2BA33744F448B870BB53AF52610
+
+#include "dindexer-machinery/scantask/base.hpp"
+#include "dindexer-machinery/scantask/leanbase.hpp"
+#include <vector>
+#include <memory>
+
+namespace mchlib {
+	struct FileRecordData;
+
+	namespace scantask {
+		class Mime : public LeanBase<std::vector<FileRecordData>> {
+		public:
+			using DirTreeTaskPtr = std::shared_ptr<Base<std::vector<FileRecordData>>>;
+
+			explicit Mime ( DirTreeTaskPtr parDirTree );
+			virtual ~Mime ( void ) noexcept;
+
+		private:
+			virtual void on_data_fill ( void ) override;
+			virtual std::vector<FileRecordData>& on_data_get ( void ) override;
+
+			DirTreeTaskPtr m_file_tree_task;
+		};
+	} //namespace scantask
+} //namespace mchlib
+
+#endif
--- a/include/dindexer-machinery/scantask/setbasic.hpp
+++ b/include/dindexer-machinery/scantask/setbasic.hpp
@ -0,0 +1,42 @@
+/* Copyright 2015, 2016, Michele Santullo
+ * This file is part of "dindexer".
+ *
+ * "dindexer" is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * "dindexer" is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with "dindexer".  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef idB323CEFC89C2468CA0A341C312C6C2C0
+#define idB323CEFC89C2468CA0A341C312C6C2C0
+
+#include "dindexer-machinery/scantask/base.hpp"
+#include "dindexer-machinery/recorddata.hpp"
+#include <string>
+
+namespace mchlib {
+	namespace scantask {
+		class SetBasic : public Base<SetRecordDataFull> {
+			typedef Base<SetRecordDataFull> ParentType;
+		public:
+			explicit SetBasic ( std::string&& parName );
+			virtual ~SetBasic ( void ) noexcept;
+
+		private:
+			virtual void on_data_destroy ( SetRecordDataFull& parData ) override;
+			virtual void on_data_create ( SetRecordDataFull& parData ) override;
+
+			std::string m_set_name;
+		};
+	} //namespace scantask
+} //namespace mchlib
+
+#endif
--- a/include/dindexer-machinery/set_listing.hpp
+++ b/include/dindexer-machinery/set_listing.hpp
@ -40,19 +40,21 @@ namespace mchlib {
 	template <bool Const>
 	implem::DirIterator<Const> first_file ( SetListingView<Const>& parList );

+	typedef FileRecordData SetListingItemType;
+
 	namespace implem {
 		template <bool Const>
-		class DirIterator : public boost::iterator_facade<DirIterator<Const>, FileRecordData, boost::forward_traversal_tag> {
+		class DirIterator : public boost::iterator_facade<DirIterator<Const>, SetListingItemType, boost::forward_traversal_tag> {
 			friend class mchlib::SetListingView<Const>;
 			friend class boost::iterator_core_access;
 			template <bool> friend class DirIterator;
-			typedef boost::iterator_facade<DirIterator<Const>, FileRecordData, boost::forward_traversal_tag> base_class;
+			typedef boost::iterator_facade<DirIterator<Const>, SetListingItemType, boost::forward_traversal_tag> base_class;
 			struct enabler {};
 		public:
 			typedef typename std::conditional<
 				Const,
-				std::vector<mchlib::FileRecordData>::const_iterator,
-				std::vector<mchlib::FileRecordData>::iterator
+				std::vector<SetListingItemType>::const_iterator,
+				std::vector<SetListingItemType>::iterator
 			>::type VecIterator;
 			typedef typename base_class::difference_type difference_type;
 			typedef typename base_class::value_type value_type;
@ -127,7 +129,7 @@ namespace mchlib {

 	class SetListing {
 	public:
-		typedef std::vector<FileRecordData> ListType;
+		typedef std::vector<SetListingItemType> ListType;
 		typedef implem::DirIterator<true> const_iterator;

 		explicit SetListing ( ListType&& parList, bool parSort=true );
--- a/include/dindexer-common/compatibility.h
+++ b/include/dindexer-common/compatibility.h
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@ -2,7 +2,6 @@ project(${bare_name}-common CXX C)

 add_library(${PROJECT_NAME}
 	commandline.cpp
-	mediatypes.cpp
 	settings.cpp
 	validationerror.cpp
 	common_info.cpp
@ -25,3 +24,10 @@ target_link_libraries(${PROJECT_NAME}
 #	RUNTIME DESTINATION bin
 #	ARCHIVE DESTINATION lib/static
 #)
+
+#Allow to link with .so
+#see https://cmake.org/pipermail/cmake/2007-May/014350.html
+#and http://stackoverflow.com/questions/6093547/what-do-r-x86-64-32s-and-r-x86-64-64-relocation-mean/6093910#6093910
+if (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" )
+	set_target_properties(${PROJECT_NAME} PROPERTIES COMPILE_FLAGS "-fPIC")
+endif()
--- a/src/delete/commandline.hpp
+++ b/src/delete/commandline.hpp
@ -19,7 +19,6 @@
 #define idB6191389C4AD4EE5862CCF1591BE6CE5

 #include "dindexer-common/validationerror.hpp"
-#include "dindexer-common/mediatypes.hpp"
 #include <boost/program_options/variables_map.hpp>

 namespace din {
--- a/src/locate/CMakeLists.txt
+++ b/src/locate/CMakeLists.txt
@ -4,6 +4,7 @@ add_executable(${PROJECT_NAME}
 	main.cpp
 	commandline.cpp
 	postgre_locate.cpp
+	hash.cpp
 )

 target_include_directories(${PROJECT_NAME}
@ -13,6 +14,7 @@ target_include_directories(${PROJECT_NAME}
 target_link_libraries(${PROJECT_NAME}
 	PRIVATE ${bare_name}-if
 	PRIVATE ${bare_name}-common
+	PRIVATE ${bare_name}-machinery
 )

 string(REPLACE "${bare_name}-" "" ACTION_NAME "${PROJECT_NAME}")
--- a/src/locate/commandline.cpp
+++ b/src/locate/commandline.cpp
@ -28,6 +28,7 @@ namespace din {
 		set_options.add_options()
 			("case-insensitive,i", "Disable case sensitivity during search")
 			("set,s", "Look for matching sets instead of files")
+			("byhash,a", "Paths on the command line are local paths and searching should be done by content hash")
 			//("option,o", po::value<std::string>()->default_value("default_value"), "Help message")
 			//("option2", po::value<int>(), "Help message")
 		;
--- a/src/locate/commandline.hpp
+++ b/src/locate/commandline.hpp
@ -19,7 +19,6 @@
 #define id1B7A42F6E46547A6AB0F914E2A91399F

 #include "dindexer-common/validationerror.hpp"
-#include "dindexer-common/mediatypes.hpp"
 #include <boost/program_options/variables_map.hpp>

 namespace din {
--- a/src/locate/hash.cpp
+++ b/src/locate/hash.cpp
@ -0,0 +1,98 @@
+/* Copyright 2015, 2016, Michele Santullo
+ * This file is part of "dindexer".
+ *
+ * "dindexer" is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * "dindexer" is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with "dindexer".  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "hash.hpp"
+#include "dindexer-machinery/scantask/hashing.hpp"
+#include "dindexer-machinery/scantask/dirtree.hpp"
+#include "dindexer-machinery/recorddata.hpp"
+#include <memory>
+#include <sys/stat.h>
+#include <stdexcept>
+#include <utility>
+#include <cassert>
+#include <ciso646>
+
+namespace stask = mchlib::scantask;
+
+namespace din {
+	namespace {
+		class SingleFileTask : public stask::Base<std::vector<mchlib::FileRecordData>> {
+		public:
+			typedef std::vector<mchlib::FileRecordData> PathList;
+
+			SingleFileTask ( std::string parPath, const struct stat* parStat );
+			virtual ~SingleFileTask ( void ) noexcept = default;
+
+		private:
+			virtual void on_data_destroy ( PathList& parData ) override;
+			virtual void on_data_create ( PathList& parData ) override;
+
+			std::string m_path;
+			const struct stat* m_stat;
+		};
+
+		SingleFileTask::SingleFileTask (std::string parPath, const struct stat* parStat) :
+			m_path(std::move(parPath)),
+			m_stat(parStat)
+		{
+			assert(not m_path.empty());
+			assert(m_stat);
+		}
+
+		void SingleFileTask::on_data_destroy (PathList& parData) {
+			assert(not parData.empty());
+			parData.clear();
+		}
+
+		void SingleFileTask::on_data_create (PathList& parData) {
+			assert(parData.empty());
+			parData.reserve(1);
+			parData.push_back(mchlib::FileRecordData(
+				std::string(m_path),
+				0,
+				m_stat->st_atime,
+				m_stat->st_mtime,
+				0,
+				false,
+				false
+			));
+		}
+	} //unnamed namespace
+
+	mchlib::TigerHash hash (const std::string& parPath) {
+		using mchlib::FileRecordData;
+		using HashingTaskPtr = std::shared_ptr<stask::Hashing>;
+
+		struct stat path_stat;
+		const int retval = stat(parPath.c_str(), &path_stat);
+		if (retval) {
+			throw std::runtime_error("Can't access file \"" + parPath + "\"");
+		}
+
+		std::shared_ptr<stask::Base<std::vector<FileRecordData>>> file_src_task;
+		if (S_ISDIR(path_stat.st_mode)) {
+			file_src_task.reset(new stask::DirTree(parPath));
+		}
+		else {
+			assert(S_ISREG(path_stat.st_mode));
+			file_src_task.reset(new SingleFileTask(parPath, &path_stat));
+		}
+
+		auto hashing = HashingTaskPtr(new stask::Hashing(file_src_task, false));
+		return hashing->get_or_create().front().hash;
+	}
+} //namespace din
--- a/src/locate/hash.hpp
+++ b/src/locate/hash.hpp
@ -0,0 +1,28 @@
+/* Copyright 2015, 2016, Michele Santullo
+ * This file is part of "dindexer".
+ *
+ * "dindexer" is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * "dindexer" is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with "dindexer".  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef id3F3E29B28FAA44A190451198CF1FD166
+#define id3F3E29B28FAA44A190451198CF1FD166
+
+#include "dindexer-machinery/tiger.hpp"
+#include <vector>
+
+namespace din {
+	mchlib::TigerHash hash ( const std::string& parPath );
+} //namespace din
+
+#endif
--- a/src/locate/main.cpp
+++ b/src/locate/main.cpp
@ -19,6 +19,7 @@
 #include "postgre_locate.hpp"
 #include "dindexer-common/settings.hpp"
 #include "dindexerConfig.h"
+#include "hash.hpp"
 #include <iostream>
 #include <ciso646>
 #include <iterator>
@ -73,7 +74,15 @@ int main (int parArgc, char* parArgv[]) {
 		std::copy(results.begin(), results.end(), std::ostream_iterator<din::LocatedSet>(std::cout, "\n"));
 	}
 	else {
-		const auto results = din::locate_in_db(settings.db, vm["substring"].as<std::string>(), not not vm.count("case-insensitive"));
+		std::vector<din::LocatedItem> results;
+
+		if (vm.count("byhash")) {
+			const auto hash = din::hash(vm["substring"].as<std::string>());
+			results = din::locate_in_db(settings.db, hash);
+		}
+		else {
+			results = din::locate_in_db(settings.db, vm["substring"].as<std::string>(), not not vm.count("case-insensitive"));
+		}
 		std::copy(results.begin(), results.end(), std::ostream_iterator<din::LocatedItem>(std::cout, "\n"));
 	}
 	return 0;
--- a/src/locate/postgre_locate.cpp
+++ b/src/locate/postgre_locate.cpp
@ -17,6 +17,7 @@

 #include "postgre_locate.hpp"
 #include "pq/connection.hpp"
+#include "dindexer-machinery/tiger.hpp"
 #include <utility>
 #include <sstream>
 #include <boost/utility/string_ref.hpp>
@ -53,10 +54,25 @@ namespace din {

 			return std::move(retval);
 		}
+
+		std::vector<LocatedItem> file_result_to_vec (pq::ResultSet&& parResult) {
+			using boost::lexical_cast;
+
+			std::vector<LocatedItem> retval;
+			retval.reserve(parResult.size());
+			for (const auto& record : parResult) {
+				retval.push_back(LocatedItem{
+					record["path"],
+					lexical_cast<decltype(LocatedItem::id)>(record["id"]),
+					lexical_cast<decltype(LocatedItem::group_id)>(record["group_id"])
+				});
+			}
+
+			return std::move(retval);
+		}
 	} //unnamed namespace

 	std::vector<LocatedItem> locate_in_db (const dinlib::SettingsDB& parDB, const std::string& parSearch, bool parCaseInsensitive) {
-		using boost::lexical_cast;
 		using boost::string_ref;
 		namespace ba = boost::algorithm;

@ -78,17 +94,15 @@ namespace din {
 		oss << "LIMIT " << g_max_results << ';';

 		auto result = conn.query(oss.str());
-		std::vector<LocatedItem> retval;
-		retval.reserve(result.size());
-		for (const auto& record : result) {
-			retval.push_back(LocatedItem{
-				record["path"],
-				lexical_cast<decltype(LocatedItem::id)>(record["id"]),
-				lexical_cast<decltype(LocatedItem::group_id)>(record["group_id"])
-			});
-		}
+		return file_result_to_vec(std::move(result));
+	}

-		return std::move(retval);
+	std::vector<LocatedItem> locate_in_db (const dinlib::SettingsDB& parDB, const mchlib::TigerHash& parSearch) {
+		const std::string query = std::string("SELECT \"path\",\"id\",\"group_id\" FROM \"files\" WHERE \"hash\"=$1 LIMIT ") + boost::lexical_cast<std::string>(g_max_results) + ';';
+
+		auto conn = make_pq_conn(parDB);
+		auto result = conn.query(query, mchlib::tiger_to_string(parSearch, true));
+		return file_result_to_vec(std::move(result));
 	}

 	std::vector<LocatedSet> locate_sets_in_db (const dinlib::SettingsDB& parDB, const std::string& parSearch, bool parCaseInsensitive) {
--- a/src/locate/postgre_locate.hpp
+++ b/src/locate/postgre_locate.hpp
@ -23,6 +23,10 @@
 #include <string>
 #include <cstdint>

+namespace mchlib {
+	struct TigerHash;
+} //namespace mchlib
+
 namespace din {
 	struct LocatedItem {
 		std::string path;
@ -38,6 +42,7 @@ namespace din {
 	};

 	std::vector<LocatedItem> locate_in_db ( const dinlib::SettingsDB& parDB, const std::string& parSearch, bool parCaseInsensitive );
+	std::vector<LocatedItem> locate_in_db ( const dinlib::SettingsDB& parDB, const mchlib::TigerHash& parSearch );
 	std::vector<LocatedSet> locate_sets_in_db ( const dinlib::SettingsDB& parDB, const std::string& parSearch, bool parCaseInsensitive );
 	std::vector<LocatedSet> locate_sets_in_db ( const dinlib::SettingsDB& parDB, const std::string& parSearch, const std::vector<uint32_t>& parSets, bool parCaseInsensitive );
 } //namespace din
--- a/src/machinery/CMakeLists.txt
+++ b/src/machinery/CMakeLists.txt
@ -4,7 +4,6 @@ include(WithMediaAutodetect)
 find_package(Magic REQUIRED)

 add_library(${PROJECT_NAME} SHARED
-	indexer.cpp
 	pathname.cpp
 	tiger.c
 	tiger.cpp
@ -12,10 +11,17 @@ add_library(${PROJECT_NAME} SHARED
 	filesearcher.cpp
 	discinfo.cpp
 	mediatype.cpp
+	mediatypes.cpp
 	machinery_info.cpp
 	guess_content_type.cpp
 	set_listing.cpp
 	globbing.cpp
+	scantask/dirtree.cpp
+	scantask/mediatype.cpp
+	scantask/hashing.cpp
+	scantask/contenttype.cpp
+	scantask/mime.cpp
+	scantask/setbasic.cpp
 )

 #target_include_directories(${PROJECT_NAME}
@ -29,6 +35,10 @@ target_link_libraries(${PROJECT_NAME}
 	PRIVATE ${MAGIC_LIBRARIES}
 )

+target_include_directories(${PROJECT_NAME}
+	PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}
+)
+
 if (DINDEXER_WITH_MEDIA_AUTODETECT)
 	target_include_directories(${PROJECT_NAME} SYSTEM
 		PRIVATE ${BLKID_INCLUDE_DIRS}
--- a/src/machinery/filesearcher.cpp
+++ b/src/machinery/filesearcher.cpp
@ -15,7 +15,7 @@
 * along with "dindexer".  If not, see <http://www.gnu.org/licenses/>.
 */

-#include "dindexer-machinery/filesearcher.hpp"
+#include "filesearcher.hpp"

 #if !defined(_XOPEN_SOURCE)
 #define _XOPEN_SOURCE 500
@ -59,6 +59,7 @@ namespace fastf {
      st.is_symlink = parSymlink;
      st.atime = parStat->st_atime;
      st.mtime = parStat->st_mtime;
+      st.size = 0;

      if (not (*g_searchOptions.callback)(parPath, st))
        return FTW_STOP;
@ -81,6 +82,7 @@ namespace fastf {
      st.is_symlink = parSymlink;
      st.atime = parStat->st_atime;
      st.mtime = parStat->st_mtime;
+      st.size = parStat->st_size;

      if (extensions.empty()) {
        if (not (*g_searchOptions.callback)(parPath, st)) {
--- a/include/dindexer-machinery/filesearcher.hpp
+++ b/include/dindexer-machinery/filesearcher.hpp
--- a/include/dindexer-machinery/filestats.hpp
+++ b/include/dindexer-machinery/filestats.hpp
@ -19,12 +19,16 @@
 #define id4A7D7AB671954418939FC0BDA19C5B3F

 #include <ctime>
+#include <cstdint>

 namespace fastf {
 	struct FileStats {
-		int level;
+		static_assert(sizeof(std::time_t) >= sizeof(uint64_t), "Reorder members or comment out this assertion");
+
 		std::time_t atime;
 		std::time_t mtime;
+		uint64_t size;
+		uint16_t level;
 		bool is_dir;
 		bool is_symlink;
 	};
--- a/src/machinery/guess_content_type.cpp
+++ b/src/machinery/guess_content_type.cpp
@ -42,7 +42,7 @@ namespace mchlib {
 		};

 		struct EntryChecking {
-			typedef bool(*CheckerFunction)(dinlib::MediaTypes, const ConstSetListingView&, const std::vector<const FileRecordData*>&);
+			typedef bool(*CheckerFunction)(MediaTypes, const ConstSetListingView&, const std::vector<const FileRecordData*>&);

 			std::size_t max_total_entries;
 			CheckerFunction checker_func;
@ -87,8 +87,8 @@ namespace mchlib {
 			return std::move(retval);
 		}

-		bool identify_video_dvd (dinlib::MediaTypes parMediaType, const ConstSetListingView& parContent, const std::vector<const FileRecordData*>& parFlatContent ) {
-			if (parMediaType != dinlib::MediaType_DVD and parMediaType != dinlib::MediaType_Directory)
+		bool identify_video_dvd (MediaTypes parMediaType, const ConstSetListingView& parContent, const std::vector<const FileRecordData*>& parFlatContent ) {
+			if (parMediaType != MediaType_DVD and parMediaType != MediaType_Directory)
 				return false;

 			const auto items_count = count_listing_items(parContent);
@ -103,8 +103,8 @@ namespace mchlib {
 			return check_missing_content(parFlatContent, should_have).empty();
 		}

-		bool identify_video_cd (dinlib::MediaTypes parMediaType, const ConstSetListingView& parContent, const std::vector<const FileRecordData*>& parFlatContent) {
-			if (parMediaType != dinlib::MediaType_CDRom and parMediaType != dinlib::MediaType_Directory)
+		bool identify_video_cd (MediaTypes parMediaType, const ConstSetListingView& parContent, const std::vector<const FileRecordData*>& parFlatContent) {
+			if (parMediaType != MediaType_CDRom and parMediaType != MediaType_Directory)
 				return false;

 			const auto items_count = count_listing_items(parContent);
@ -121,7 +121,7 @@ namespace mchlib {
 		}
 	} //unnamed namespace

-	ContentTypes guess_content_type (dinlib::MediaTypes parMediaType, const ConstSetListingView& parContent, std::size_t parEntriesCount) {
+	ContentTypes guess_content_type (MediaTypes parMediaType, const ConstSetListingView& parContent, std::size_t parEntriesCount) {
 		if (boost::empty(parContent))
 			return ContentType_Empty;

@ -145,7 +145,7 @@ namespace mchlib {
 		return ContentType_Generic;
 	}

-	ContentTypes guess_content_type (dinlib::MediaTypes parMediaType, const std::vector<FileRecordData>& parContent) {
+	ContentTypes guess_content_type (MediaTypes parMediaType, const std::vector<FileRecordData>& parContent) {
 		if (parContent.empty())
 			return ContentType_Empty;

@ -155,7 +155,7 @@ namespace mchlib {
 		assert(std::equal(parContent.begin(), parContent.end(), SetListing(std::vector<FileRecordData>(parContent)).sorted_list().begin()));

 		//TODO: assert that the first item in the list is the shortest string
-		std::shared_ptr<PathName> pathname(new PathName(parContent.front().abs_path));
+		std::shared_ptr<PathName> pathname(new PathName(""));
 		ConstSetListingView view(parContent.begin(), parContent.end(), pathname->atom_count(), pathname);
 		assert(parContent.size() >= 1);
 		return guess_content_type(parMediaType, view, parContent.size() - 1);
--- a/src/machinery/indexer.cpp
+++ b/src/machinery/indexer.cpp
@ -1,526 +0,0 @@
-/* Copyright 2015, 2016, Michele Santullo
- * This file is part of "dindexer".
- *
- * "dindexer" is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * "dindexer" is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with "dindexer".  If not, see <http://www.gnu.org/licenses/>.
- */
-
-//WARNING: buggy code - intermediate hash for directories that contain files
-//is likely wrong!
-//#define USE_LEGACY_HASH_DIR
-
-#include "dindexer-machinery/indexer.hpp"
-#include "pathname.hpp"
-#include "dindexer-machinery/tiger.hpp"
-#include "dindexer-common/settings.hpp"
-#include "dindexer-machinery/filestats.hpp"
-#include "mimetype.hpp"
-#include "dindexer-machinery/recorddata.hpp"
-#if !defined(USE_LEGACY_HASH_DIR)
-#	include "dindexer-machinery/set_listing.hpp"
-#endif
-#include <algorithm>
-#include <functional>
-#include <stdexcept>
-#if defined(WITH_PROGRESS_FEEDBACK)
-#	include <atomic>
-#	include <condition_variable>
-#endif
-#include <cstdint>
-#include <ciso646>
-#include <cassert>
-#include <boost/iterator/filter_iterator.hpp>
-#include <sstream>
-#include <iterator>
-
-#if defined(INDEXER_VERBOSE)
-#	include <iostream>
-#endif
-#include <boost/utility/string_ref.hpp>
-#include <boost/range/empty.hpp>
-
-namespace mchlib {
-	using HashType = decltype(FileRecordData::hash);
-
-	namespace {
-		typedef std::vector<FileRecordData>::iterator FileEntryIt;
-
-		void append_to_vec (std::vector<char>& parDest, const HashType& parHash, const std::string& parString) {
-			const auto old_size = parDest.size();
-			parDest.resize(old_size + sizeof(HashType) + parString.size());
-			std::copy(parHash.byte_data, parHash.byte_data + sizeof(HashType), parDest.begin() + old_size);
-			std::copy(parString.begin(), parString.end(), parDest.begin() + old_size + sizeof(HashType));
-		}
-
-		void append_to_vec (std::vector<char>& parDest, const std::string& parString) {
-			const auto old_size = parDest.size();
-			parDest.resize(old_size + parString.size());
-			std::copy(parString.begin(), parString.end(), parDest.begin() + old_size);
-		}
-
-#if !defined(USE_LEGACY_HASH_DIR)
-		void hash_dir (FileRecordData& parEntry, MutableSetListingView& parList, const PathName& parCurrDir, MimeType& parMime, bool parIgnoreErrors) {
-			assert(parEntry.is_directory);
-
-			parEntry.mime_full = parMime.analyze(parEntry.abs_path);
-
-			//Build a blob with the hashes and filenames of every directory that
-			//is a direct child of current entry
-			std::vector<char> dir_blob;
-#if defined(INDEXER_VERBOSE)
-			std::cout << "Making initial hash for " << parCurrDir << "...\n";
-#endif
-			for (auto it = parList.begin(); it != parList.end(); ++it) {
-				assert(parCurrDir == PathName(it->abs_path).pop_right());
-
-				PathName curr_subdir(it->abs_path);
-				const std::string relpath = make_relative_path(parCurrDir, curr_subdir).path();
-				if (it->is_directory) {
-					auto cd_list = MutableSetListingView(it);
-					assert(boost::empty(cd_list) or cd_list.begin()->abs_path != it->abs_path);
-
-					hash_dir(*it, cd_list, curr_subdir, parMime, parIgnoreErrors);
-					append_to_vec(dir_blob, it->hash, relpath);
-				}
-				else {
-					append_to_vec(dir_blob, relpath);
-				}
-			}
-			tiger_data(dir_blob, parEntry.hash);
-			parEntry.size = 0;
-
-#if defined(INDEXER_VERBOSE)
-			std::cout << "Got intermediate hash for dir " << parCurrDir <<
-				": " << tiger_to_string(parEntry.hash) <<
-				' ' << parEntry.mime_type << '\n';
-#endif
-
-			//Now with the initial hash ready, let's start hashing files, if any
-			for (auto it = first_file(parList); it != parList.end(); ++it) {
-				assert(not it->is_directory);
-#if defined(INDEXER_VERBOSE)
-				std::cout << "Hashing file " << it->abs_path << "...";
-#endif
-				//TODO: notify callback
-				try {
-					tiger_file(it->abs_path, it->hash, parEntry.hash, it->size);
-					it->hash_valid = true;
-					it->mime_full = parMime.analyze(it->abs_path);
-					auto mime_pair = split_mime(it->mime_full);
-					it->mime_type = mime_pair.first;
-					it->mime_charset = mime_pair.second;
-				}
-				catch (const std::ios_base::failure& e) {
-					if (parIgnoreErrors) {
-						it->unreadable = true;
-						it->hash = HashType {};
-						if (it->mime_full.get().empty()) {
-							it->mime_full = "unknown";
-							it->mime_type = boost::string_ref(it->mime_full.get());
-							it->mime_charset = boost::string_ref(it->mime_full.get());
-						}
-					}
-					else {
-						throw e;
-					}
-				}
-
-#if defined(INDEXER_VERBOSE)
-				std::cout << ' ' << tiger_to_string(it->hash) << ' ' <<
-					"Mime type: \"" << it->mime_type << "\"\n";
-#endif
-			}
-
-#if defined(INDEXER_VERBOSE)
-			std::cout << "Final hash for dir " << parCurrDir << " is " << tiger_to_string(parEntry.hash) << '\n';
-#endif
-			parEntry.hash_valid = true;
-			{
-				parEntry.mime_full = parMime.analyze(parEntry.abs_path);
-				auto mime_pair = split_mime(parEntry.mime_full);
-				parEntry.mime_type = mime_pair.first;
-				parEntry.mime_charset = mime_pair.second;
-			}
-		}
-#endif
-
-#if defined(USE_LEGACY_HASH_DIR)
-		void hash_dir (FileEntryIt parEntry, FileEntryIt parBegin, FileEntryIt parEnd, const PathName& parCurrDir, std::function<void(std::size_t)> parNextItemCallback, bool parIgnoreErrors, MimeType& parMime) {
-			assert(parEntry != parEnd);
-			assert(parEntry->is_directory);
-			FileRecordData& curr_entry = *parEntry;
-			auto& curr_entry_it = parEntry;
-
-			curr_entry.mime_full = parMime.analyze(curr_entry.abs_path);
-
-			//Build a blob with the hashes and filenames of every directory that
-			//is a direct child of current entry
-			{
-				std::vector<char> dir_blob;
-				auto it_entry = curr_entry_it;
-
-				while (
-					it_entry != parEnd and (
-						it_entry->level == curr_entry.level
-						or parCurrDir != PathName(it_entry->abs_path).pop_right()
-					//and (not it_entry->is_dir or (it_entry->level <= curr_entry.level
-					//and parCurrDir != PathName(it_entry->path).pop_right()))
-				)) {
-					assert(it_entry->level >= curr_entry.level);
-					++it_entry;
-				}
-
-#if defined(INDEXER_VERBOSE)
-				std::cout << "Making initial hash for " << parCurrDir << "...\n";
-#endif
-				while (parEnd != it_entry and it_entry->level == curr_entry_it->level + 1 and parCurrDir == PathName(it_entry->abs_path).pop_right()) {
-					PathName curr_subdir(it_entry->abs_path);
-					const std::string relpath = make_relative_path(parCurrDir, curr_subdir).path();
-
-					if (it_entry->is_directory) {
-						hash_dir(it_entry, parBegin, parEnd, curr_subdir, parNextItemCallback, parIgnoreErrors, parMime);
-						append_to_vec(dir_blob, it_entry->hash, relpath);
-					}
-					else {
-						append_to_vec(dir_blob, relpath);
-					}
-					++it_entry;
-				}
-
-				tiger_data(dir_blob, curr_entry.hash);
-				curr_entry.size = 0;
-#if defined(INDEXER_VERBOSE)
-				std::cout << "Got intermediate hash for dir " << parCurrDir <<
-					": " << tiger_to_string(curr_entry.hash) <<
-					' ' << curr_entry.mime_type << '\n';
-#endif
-			}
-
-			//Now with the initial hash ready, let's start hashing files, if any
-			{
-				auto it_entry = curr_entry_it;
-				while (
-					it_entry != parEnd
-					and (it_entry->is_directory
-						or it_entry->level != curr_entry_it->level + 1
-						or PathName(it_entry->abs_path).pop_right() != parCurrDir
-					)
-				) {
-					++it_entry;
-				}
-
-				while (it_entry != parEnd and not it_entry->is_directory and it_entry->level == curr_entry_it->level + 1 and PathName(it_entry->abs_path).pop_right() == parCurrDir) {
-					assert(not it_entry->is_directory);
-#if defined(INDEXER_VERBOSE)
-					std::cout << "Hashing file " << it_entry->abs_path << "...";
-#endif
-					parNextItemCallback(it_entry - parBegin);
-					try {
-						tiger_file(it_entry->abs_path, it_entry->hash, curr_entry_it->hash, it_entry->size);
-						it_entry->hash_valid = true;
-						it_entry->mime_full = parMime.analyze(it_entry->abs_path);
-						auto mime_pair = split_mime(it_entry->mime_full);
-						it_entry->mime_type = mime_pair.first;
-						it_entry->mime_charset = mime_pair.second;
-					}
-					catch (const std::ios_base::failure& e) {
-						if (parIgnoreErrors) {
-							it_entry->unreadable = true;
-							it_entry->hash = HashType {};
-							if (it_entry->mime_full.get().empty()) {
-								it_entry->mime_full = "unknown";
-								it_entry->mime_type = boost::string_ref(it_entry->mime_full.get());
-								it_entry->mime_charset = boost::string_ref(it_entry->mime_full.get());
-							}
-						}
-						else {
-							throw e;
-						}
-					}
-
-#if defined(INDEXER_VERBOSE)
-					std::cout << ' ' << tiger_to_string(it_entry->hash) << ' ' <<
-						"Mime type: \"" << it_entry->mime_type << "\"\n";
-#endif
-					++it_entry;
-				}
-			}
-
-#if defined(INDEXER_VERBOSE)
-			std::cout << "Final hash for dir " << parCurrDir << " is " << tiger_to_string(curr_entry_it->hash) << '\n';
-#endif
-			curr_entry_it->hash_valid = true;
-			{
-				curr_entry_it->mime_full = parMime.analyze(curr_entry_it->abs_path);
-				auto mime_pair = split_mime(curr_entry_it->mime_full);
-				curr_entry_it->mime_type = mime_pair.first;
-				curr_entry_it->mime_charset = mime_pair.second;
-			}
-		}
-#endif
-
-		template <bool FileTrue=true>
-		struct IsFile {
-			bool operator() ( const FileRecordData& parEntry ) const { return parEntry.is_directory xor FileTrue; }
-		};
-
-		FileRecordData make_file_record_data (const char* parPath, const fastf::FileStats& parSt) {
-			return FileRecordData(
-				parPath,
-				parSt.atime,
-				parSt.mtime,
-				parSt.level,
-				parSt.is_dir,
-				parSt.is_symlink
-			);
-		}
-
-		bool file_record_data_lt (const FileRecordData& parLeft, const FileRecordData& parRight) {
-			const FileRecordData& l = parLeft;
-			const FileRecordData& r = parRight;
-			return
-				(l.level < r.level)
-				or (l.level == r.level and l.is_directory and not r.is_directory)
-				or (l.level == r.level and l.is_directory == r.is_directory and l.abs_path < r.abs_path)
-
-				//sort by directory - parent first, children later
-				//(level == o.level and is_dir and not o.is_dir)
-				//or (level == o.level and is_dir == o.is_dir and path < o.path)
-				//or (level > o.level + 1)
-				//or (level + 1 == o.level and is_dir and not o.is_dir and path < o.path)
-				//or (level + 1 == o.level and is_dir and not o.is_dir and path == PathName(o.path).dirname())
-				//or (level == o.level + 1 and not (o.is_dir and not is_dir and o.path == PathName(path).dirname()))
-			;
-		}
-
-		void populate_rel_paths (const PathName& parBase, std::vector<FileRecordData>& parItems) {
-			const std::size_t offset = parBase.str_path_size() + 1;
-			for (FileRecordData& itm : parItems) {
-				const auto curr_offset = std::min(offset, itm.abs_path.size());
-				itm.path = boost::string_ref(itm.abs_path).substr(curr_offset);
-				assert(itm.path.data());
-			}
-		}
-	} //unnamed namespace
-
-	struct Indexer::LocalData {
-		typedef std::vector<FileRecordData> PathList;
-
-		PathList paths;
-#if defined(WITH_PROGRESS_FEEDBACK)
-		std::atomic<std::size_t> done_count;
-		std::atomic<std::size_t> processing_index;
-		std::condition_variable step_notify;
-#endif
-		std::size_t file_count;
-		bool ignore_read_errors;
-	};
-
-	Indexer::Indexer() :
-		m_local_data(new LocalData)
-	{
-#if !defined(NDEBUG)
-		//assert(FileEntry("/a/b/c", 3, true, false) < FileEntry("/a/b", 2, true, false));
-		//assert(FileEntry("/a/b/c", 3, true, false) < FileEntry("/a/b/c/file.txt", 4, false, false));
-		//assert(FileEntry("/a/b/c", 3, true, false) < FileEntry("/a/b/c/file.c", 4, false, false));
-		//assert(FileEntry("/a/b/c/d", 4, true, false) < FileEntry("/a/b", 2, true, false));
-		//assert(FileEntry("/a/b/c/d", 4, true, false) < FileEntry("/a/b/c", 3, true, false));
-		//assert(FileEntry("/a/b/c/1.txt", 4, true, false) < FileEntry("/a/b/c/2.txt", 4, true, false));
-		//assert(not (FileEntry("/a/b/file.txt", 3, false, false) < FileEntry("/a/b", 2, true, false)));
-		//assert(not (FileEntry("/a", 1, true, false) < FileEntry("/a/b", 2, true, false)));
-		//assert(not (FileEntry("/a/b/1.txt", 3, false, false) < FileEntry("/a/b/c/f.txt", 4, true, false)));
-		//assert(not (FileEntry("/a/b/c/file.c", 4, false, false) < FileEntry("/a/b/c", 3, true, false)));
-#endif
-#if defined(WITH_PROGRESS_FEEDBACK)
-		m_local_data->done_count = 0;
-		m_local_data->processing_index = 0;
-#endif
-		m_local_data->file_count = 0;
-	}
-
-	Indexer::~Indexer() noexcept {
-	}
-
-	std::size_t Indexer::total_items() const {
-		return m_local_data->file_count;
-	}
-
-#if defined(WITH_PROGRESS_FEEDBACK)
-	std::size_t Indexer::processed_items() const {
-		return m_local_data->done_count;
-	}
-#endif
-
-	void Indexer::calculate_hash() {
-		PathName base_path(m_local_data->paths.front().abs_path);
-		std::sort(m_local_data->paths.begin(), m_local_data->paths.end(), &file_record_data_lt);
-		MimeType mime;
-
-#if defined(INDEXER_VERBOSE)
-		for (auto& itm : m_local_data->paths) {
-			itm.hash.part_a = 1;
-			itm.hash.part_b = 1;
-			itm.hash.part_c = 1;
-
-			if (itm.is_directory)
-				std::cout << "(D) ";
-			else
-				std::cout << "(F) ";
-			std::cout << itm.abs_path << " (" << itm.level << ")\n";
-		}
-		std::cout << "-----------------------------------------------------\n";
-#endif
-
-#if !defined(USE_LEGACY_HASH_DIR)
-		MutableSetListingView recordlist(m_local_data->paths.begin(), m_local_data->paths.end(), base_path.atom_count());
-#endif
-#if defined(WITH_PROGRESS_FEEDBACK)
-		m_local_data->done_count = 0;
-		hash_dir(
-#if defined(USE_LEGACY_HASH_DIR)
-			m_local_data->paths.begin(),
-			m_local_data->paths.begin(),
-			m_local_data->paths.end(),
-			base_path,
-			[=](std::size_t parNext) {
-				++m_local_data->done_count;
-				m_local_data->processing_index = parNext;
-				m_local_data->step_notify.notify_all();
-			},
-			m_local_data->ignore_read_errors,
-			mime
-#else
-			m_local_data->paths.front(),
-			recordlist,
-			base_path,
-			mime,
-			m_local_data->ignore_read_errors
-#endif
-		);
-
-		//TODO: re-enable after hash_dir sends progress notifications again
-		//assert(m_local_data->done_count == m_local_data->file_count);
-#else
-		hash_dir(
-#if defined(USE_LEGACY_HASH_DIR)
-			m_local_data->paths.begin(),
-			m_local_data->paths.begin(),
-			m_local_data->paths.end(),
-			base_path,
-			[](std::size_t) {},
-			m_local_data->ignore_read_errors,
-			mime
-#else
-			m_local_data->paths.front(),
-			recordlist,
-			base_path,
-			mime,
-			m_local_data->ignore_read_errors
-#endif
-		);
-#endif
-
-		populate_rel_paths(base_path, m_local_data->paths);
-
-#if defined(INDEXER_VERBOSE)
-		for (const auto& itm : m_local_data->paths) {
-			assert(not (1 == itm.hash.part_a and 1 == itm.hash.part_b and 1 == itm.hash.part_c));
-		}
-#endif
-	}
-
-	bool Indexer::add_path (const char* parPath, const fastf::FileStats& parStats) {
-		auto it_before = SetListing::lower_bound(
-			m_local_data->paths,
-			parPath,
-			parStats.level,
-			parStats.is_dir
-		);
-
-		m_local_data->paths.insert(
-			it_before,
-			make_file_record_data(parPath, parStats)
-		);
-		if (not parStats.is_dir) {
-			++m_local_data->file_count;
-		}
-		return true;
-	}
-
-#if defined(INDEXER_VERBOSE)
-	void Indexer::dump() const {
-		PathName base_path(m_local_data->paths.front().abs_path);
-
-		std::cout << "---------------- FILE LIST ----------------\n";
-		for (const auto& cur_itm : m_local_data->paths) {
-			if (not cur_itm.is_directory) {
-				PathName cur_path(cur_itm.abs_path);
-				std::cout << make_relative_path(base_path, cur_path).path() << '\n';
-			}
-		}
-		std::cout << "---------------- DIRECTORY LIST ----------------\n";
-		for (const auto& cur_itm : m_local_data->paths) {
-			if (cur_itm.is_directory) {
-				PathName cur_path(cur_itm.abs_path);
-				std::cout << make_relative_path(base_path, cur_path).path() << '\n';
-			}
-		}
-	}
-#endif
-
-	bool Indexer::empty() const {
-		return m_local_data->paths.size() < 2;
-	}
-
-#if defined(WITH_PROGRESS_FEEDBACK)
-	std::condition_variable& Indexer::step_notify() {
-		return m_local_data->step_notify;
-	}
-#endif
-
-#if defined(WITH_PROGRESS_FEEDBACK)
-	std::string Indexer::current_item() const {
-		if (m_local_data->paths.empty() or 0 == m_local_data->processing_index)
-			return std::string();
-
-		PathName base_path(m_local_data->paths.front().abs_path);
-		PathName ret_path(m_local_data->paths[m_local_data->processing_index].abs_path);
-		return make_relative_path(base_path, ret_path).path();
-	}
-#endif
-
-	std::string Indexer::operator[] (std::size_t parIndex) const {
-		if (parIndex >= m_local_data->file_count) {
-			std::ostringstream oss;
-			oss << "Requested index " << parIndex << " is out of range: only " << m_local_data->file_count << " items are available";
-			throw std::out_of_range(oss.str());
-		}
-
-		auto it = boost::make_filter_iterator<IsFile<>>(m_local_data->paths.begin(), m_local_data->paths.end());
-		assert(not m_local_data->paths.empty());
-		std::advance(it, parIndex);
-		return make_relative_path(PathName(m_local_data->paths.front().abs_path), PathName(it->abs_path)).path();
-	}
-
-	void Indexer::ignore_read_errors (bool parIgnore) {
-		m_local_data->ignore_read_errors = parIgnore;
-	}
-
-	const std::vector<FileRecordData>& Indexer::record_data() const {
-#if defined(WITH_PROGRESS_FEEDBACK)
-		//TODO: re-enable after hash_dir sends progress notifications again
-		//assert(m_local_data->done_count == m_local_data->file_count);
-#endif
-		return m_local_data->paths;
-	}
-} //namespace mchlib
--- a/src/machinery/mediatype.cpp
+++ b/src/machinery/mediatype.cpp
@ -40,23 +40,23 @@ namespace mchlib {
 	{
 	}

-	dinlib::MediaTypes guess_media_type (std::string&& parPath) {
+	MediaTypes guess_media_type (std::string&& parPath) {
 		DiscInfo info(std::move(parPath));
 		const DriveTypes drive_type = info.drive_type();
 		if (DriveType_HardDisk == drive_type) {
 			if (info.mountpoint() == PathName(info.original_path()).path())
-				return dinlib::MediaType_HardDisk;
+				return MediaType_HardDisk;
 			else
-				return dinlib::MediaType_Directory;
+				return MediaType_Directory;
 		}
 		else if (DriveType_Optical == drive_type) {
 			switch (info.optical_type()) {
 			case OpticalType_DVD:
-				return dinlib::MediaType_DVD;
+				return MediaType_DVD;
 			case OpticalType_CDRom:
-				return dinlib::MediaType_CDRom;
+				return MediaType_CDRom;
 			case OpticalType_BluRay:
-				return dinlib::MediaType_BluRay;
+				return MediaType_BluRay;
 			default:
 				throw UnknownMediaTypeException("Set autodetect failed because this media type is unknown, please specify the set type manually");
 			}
--- a/src/machinery/mediatypes.cpp
+++ b/src/machinery/mediatypes.cpp
@ -15,11 +15,11 @@
 * along with "dindexer".  If not, see <http://www.gnu.org/licenses/>.
 */

-#include "dindexer-common/mediatypes.hpp"
+#include "dindexer-machinery/mediatypes.hpp"
 #include <map>
 #include <stdexcept>

-namespace dinlib {
+namespace mchlib {
 	const std::string& media_type_to_str (MediaTypes parType) {
 		static const std::map<MediaTypes, const std::string> types {
 			{MediaType_CDRom, "CD-Rom"},
@ -43,4 +43,8 @@ namespace dinlib {
 	MediaTypes char_to_media_type (char parMType) {
 		return static_cast<MediaTypes>(parMType);
 	}
-} //namespace dinlib
+
+	char media_type_to_char (MediaTypes parMType) {
+		return static_cast<char>(parMType);
+	}
+} //namespace mchlib
--- a/src/machinery/pathname.cpp
+++ b/src/machinery/pathname.cpp
@ -204,6 +204,17 @@ namespace mchlib {
 		return parStream;
 	}

+	const boost::string_ref basename (const PathName& parPath) {
+		static const char* const empty = "";
+		const auto sz = parPath.atom_count();
+		if (not sz) {
+			return boost::string_ref(empty);
+		}
+
+		assert(sz > 0);
+		return parPath[sz - 1];
+	}
+
 	PathName& PathName::pop_right() {
 		m_pool.pop();
 		return *this;
--- a/src/machinery/pathname.hpp
+++ b/src/machinery/pathname.hpp
@ -59,6 +59,7 @@ namespace mchlib {

 	PathName make_relative_path ( const PathName& parBasePath, const PathName& parOtherPath );
 	std::ostream& operator<< ( std::ostream& parStream, const PathName& parPath );
+	const boost::string_ref basename ( const PathName& parPath );
 } //namespace mchlib

 #endif
--- a/src/machinery/scantask/contenttype.cpp
+++ b/src/machinery/scantask/contenttype.cpp
@ -0,0 +1,46 @@
+/* Copyright 2015, 2016, Michele Santullo
+ * This file is part of "dindexer".
+ *
+ * "dindexer" is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * "dindexer" is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with "dindexer".  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "dindexer-machinery/scantask/contenttype.hpp"
+#include "dindexer-machinery/guess_content_type.hpp"
+#include <cassert>
+
+namespace mchlib {
+	namespace scantask {
+		ContentType::ContentType (SetTaskType parSet, DirTreeTaskPtr parDirTree, MediaTypeTaskPtr parMediaType) :
+			m_set_task(parSet),
+			m_dir_tree(parDirTree),
+			m_media_type(parMediaType)
+		{
+			assert(m_set_task);
+			assert(m_dir_tree);
+			assert(m_media_type);
+		}
+
+		SetRecordDataFull& ContentType::on_data_get() {
+			return m_set_task->get_or_create();
+		}
+
+		void ContentType::on_data_fill() {
+			auto& data = m_set_task->get_or_create();
+			auto media_type = char_to_media_type(m_media_type->get_or_create().type);
+			const auto& tree = m_dir_tree->get_or_create();
+			const auto cont_type = mchlib::guess_content_type(media_type, tree);
+			data.content_type = content_type_to_char(cont_type);
+		}
+	} //namespace scantask
+} //namespace mchlib
--- a/src/machinery/scantask/dirtree.cpp
+++ b/src/machinery/scantask/dirtree.cpp
@ -0,0 +1,107 @@
+/* Copyright 2015, 2016, Michele Santullo
+ * This file is part of "dindexer".
+ *
+ * "dindexer" is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * "dindexer" is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with "dindexer".  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "dindexer-machinery/scantask/dirtree.hpp"
+#include "dindexer-machinery/recorddata.hpp"
+#include "dindexer-machinery/set_listing.hpp"
+#include "helpers/compatibility.h"
+#include "filesearcher.hpp"
+#include "pathname.hpp"
+#include <utility>
+#include <cassert>
+#include <ciso646>
+#include <functional>
+#include <algorithm>
+
+namespace mchlib {
+	namespace {
+		std::size_t calc_rel_path_offs ( const PathName& parRoot, boost::string_ref parPath ) a_pure;
+
+		std::size_t calc_rel_path_offs (const PathName& parRoot, boost::string_ref parPath) {
+			PathName path(parPath);
+			PathName rel_path = make_relative_path(parRoot, path);
+			const auto rel_path_len = rel_path.str_path_size();
+			const auto path_len = path.str_path_size();
+			assert(rel_path_len <= path_len);
+			return path_len - rel_path_len;
+		}
+
+		bool add_path (scantask::DirTree::PathList& parOut, const PathName& parRoot, const char* parPath, const fastf::FileStats& parStats) {
+			using boost::string_ref;
+
+			auto it_before = SetListing::lower_bound(
+				parOut,
+				parPath,
+				parStats.level,
+				parStats.is_dir
+			);
+
+			//std::string curr_path(parPath);
+			//const std::size_t offset = parBase.str_path_size() + 1;
+			//for (FileRecordData& itm : parItems) {
+			//	const auto curr_offset = std::min(parRelPathOffs, curr_path.size());
+			//	itm.path = boost::string_ref(itm.abs_path).substr(curr_offset);
+			//	assert(itm.path.data());
+			//}
+
+			parOut.insert(
+				it_before,
+				FileRecordData(
+					parPath,
+					calc_rel_path_offs(parRoot, string_ref(parPath)),
+					parStats.atime,
+					parStats.mtime,
+					static_cast<uint16_t>(parStats.level),
+					static_cast<bool>(parStats.is_dir),
+					static_cast<bool>(parStats.is_symlink)
+				)
+			);
+			return true;
+		}
+	}
+
+	namespace scantask {
+		DirTree::DirTree (std::string parRoot) :
+			m_root(std::move(parRoot))
+		{
+			assert(not m_root.empty());
+		}
+
+		void DirTree::on_data_destroy (PathList& parData) {
+			parData.clear();
+		}
+
+		void DirTree::on_data_create (PathList& parData) {
+			using std::placeholders::_1;
+			using std::placeholders::_2;
+			using boost::string_ref;
+
+			assert(parData.empty());
+
+			fastf::FileSearcher searcher(m_root);
+			fastf::FileSearcher::ConstCharVecType ext, ignore;
+
+			searcher.SetFollowSymlinks(true);
+			searcher.SetCallback(
+				fastf::FileSearcher::CallbackType(
+					std::bind(&add_path, std::ref(parData), PathName(string_ref(m_root)), _1, _2)
+				)
+			);
+			searcher.Search(ext, ignore);
+		}
+	} //namespace scantask
+} //namespace mchlib
--- a/src/machinery/scantask/hashing.cpp
+++ b/src/machinery/scantask/hashing.cpp
@ -0,0 +1,193 @@
+/* Copyright 2015, 2016, Michele Santullo
+ * This file is part of "dindexer".
+ *
+ * "dindexer" is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * "dindexer" is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with "dindexer".  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "dindexer-machinery/scantask/hashing.hpp"
+#include "dindexer-machinery/recorddata.hpp"
+#include "dindexer-machinery/set_listing.hpp"
+#include "dindexer-machinery/tiger.hpp"
+#include "pathname.hpp"
+#include <cassert>
+#include <boost/range/empty.hpp>
+#include <boost/utility/string_ref.hpp>
+
+//#define INDEXER_VERBOSE
+
+#if defined(INDEXER_VERBOSE)
+#	include <iostream>
+#endif
+
+namespace mchlib {
+	namespace {
+		struct ProgressInfo {
+			scantask::Hashing::ProgressCallback callback;
+			boost::string_ref curr_path;
+			uint64_t file_bytes_read;
+			uint64_t total_bytes_read;
+			uint32_t file_num;
+
+			void notify ( void ) {
+				callback(curr_path, file_bytes_read, total_bytes_read, file_num);
+			}
+		};
+
+		void append_to_vec (std::vector<char>& parDest, const TigerHash& parHash, boost::string_ref parString) {
+			const auto old_size = parDest.size();
+			parDest.resize(old_size + sizeof(TigerHash) + parString.size());
+			std::copy(parHash.byte_data, parHash.byte_data + sizeof(TigerHash), parDest.begin() + old_size);
+			std::copy(parString.begin(), parString.end(), parDest.begin() + old_size + sizeof(TigerHash));
+		}
+
+		void append_to_vec (std::vector<char>& parDest, boost::string_ref parString) {
+			const auto old_size = parDest.size();
+			parDest.resize(old_size + parString.size());
+			std::copy(parString.begin(), parString.end(), parDest.begin() + old_size);
+		}
+
+		void hash_dir (FileRecordData& parEntry, MutableSetListingView& parList, bool parIgnoreErrors, ProgressInfo& parProgressInfo) {
+			assert(parEntry.is_directory);
+
+			//Build a blob with the hashes and filenames of every directory that
+			//is a direct child of current entry
+			std::vector<char> dir_blob;
+#if defined(INDEXER_VERBOSE)
+			std::cout << "Making initial hash for " << parEntry.abs_path << "...\n";
+#endif
+			for (auto it = parList.begin(); it != parList.end(); ++it) {
+				assert(PathName(parEntry.abs_path) == PathName(it->abs_path).pop_right());
+
+				PathName curr_path(it->path);
+				const auto basename = mchlib::basename(curr_path);
+				if (it->is_directory) {
+					auto cd_list = MutableSetListingView(it);
+					assert(boost::empty(cd_list) or cd_list.begin()->abs_path != it->abs_path);
+
+					hash_dir(*it, cd_list, parIgnoreErrors, parProgressInfo);
+					append_to_vec(dir_blob, it->hash, basename);
+				}
+				else {
+					append_to_vec(dir_blob, basename);
+				}
+			}
+			tiger_data(dir_blob, parEntry.hash);
+
+#if defined(INDEXER_VERBOSE)
+			std::cout << "Got intermediate hash for dir " << parEntry.abs_path <<
+				": " << tiger_to_string(parEntry.hash) <<
+				' ' << parEntry.mime_type << '\n';
+#endif
+
+			//Now with the initial hash ready, let's start hashing files, if any
+			for (auto it = first_file(parList); it != parList.end(); ++it) {
+				assert(not it->is_directory);
+#if defined(INDEXER_VERBOSE)
+				std::cout << "Hashing file " << it->abs_path << "...\n";
+#endif
+				//TODO: notify callback
+				try {
+					++parProgressInfo.file_num;
+					parProgressInfo.curr_path = it->abs_path;
+					parProgressInfo.notify();
+
+					tiger_file(it->abs_path, it->hash, parEntry.hash, it->size);
+					it->hash_valid = true;
+					parProgressInfo.total_bytes_read += it->size;
+				}
+				catch (const std::ios_base::failure& e) {
+					if (parIgnoreErrors) {
+						it->unreadable = true;
+						it->hash = TigerHash {};
+					}
+					else {
+						throw e;
+					}
+				}
+			}
+
+#if defined(INDEXER_VERBOSE)
+			std::cout << "Final hash for dir " << parEntry.abs_path << " is " << tiger_to_string(parEntry.hash) << '\n';
+#endif
+			parEntry.hash_valid = true;
+		}
+
+		void dummy_progress_callback (const boost::string_ref /*parPath*/, uint64_t /*parFileBytes*/, uint64_t /*parTotalBytes*/, uint32_t /*parFileNum*/) {
+		}
+	} //unnamed namespace
+
+	namespace scantask {
+		Hashing::Hashing (std::shared_ptr<FileTreeBase> parFileTree, bool parIgnoreErrors) :
+			m_file_tree_task(parFileTree),
+			m_progress_callback(&dummy_progress_callback),
+			m_ignore_errors(parIgnoreErrors)
+		{
+			assert(m_file_tree_task);
+		}
+
+		Hashing::~Hashing() noexcept {
+		}
+
+		std::vector<FileRecordData>& Hashing::on_data_get() {
+			return m_file_tree_task->get_or_create();
+		}
+
+		void Hashing::on_data_fill() {
+			std::vector<FileRecordData>& file_list = m_file_tree_task->get_or_create();
+			if (file_list.empty()) {
+				return;
+			}
+
+			ProgressInfo progr_info;
+			progr_info.callback = m_progress_callback;
+			progr_info.curr_path = "";
+			progr_info.file_bytes_read = 0;
+			progr_info.total_bytes_read = 0;
+			progr_info.file_num = 0;
+
+			if (file_list.front().is_directory) {
+				MutableSetListingView recordlist(file_list.begin(), file_list.end(), 0);
+				hash_dir(file_list.front(), recordlist, m_ignore_errors, progr_info);
+			}
+			else {
+				assert(1 == file_list.size());
+				auto& curr_file_rec = file_list.front();
+				TigerHash dummy {};
+
+				try {
+					tiger_file(curr_file_rec.abs_path, curr_file_rec.hash, dummy, curr_file_rec.size);
+					curr_file_rec.hash_valid = true;
+				}
+				catch (const std::ios_base::failure& e) {
+					if (m_ignore_errors) {
+						curr_file_rec.unreadable = true;
+						curr_file_rec.hash = TigerHash {};
+					}
+					else {
+						throw e;
+					}
+				}
+			}
+		}
+
+		void Hashing::set_progress_callback (ProgressCallback parFunc) {
+			if (parFunc) {
+				m_progress_callback = parFunc;
+			}
+			else {
+				m_progress_callback = &dummy_progress_callback;
+			}
+		}
+	} //namespace scantask
+} //namespace mchlib
--- a/src/machinery/scantask/mediatype.cpp
+++ b/src/machinery/scantask/mediatype.cpp
@ -0,0 +1,62 @@
+/* Copyright 2015, 2016, Michele Santullo
+ * This file is part of "dindexer".
+ *
+ * "dindexer" is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * "dindexer" is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with "dindexer".  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "dindexer-machinery/scantask/mediatype.hpp"
+//#include "dindexer-machinery/guess_content_type.hpp"
+#if defined(WITH_MEDIA_AUTODETECT)
+#	include "dindexer-machinery/mediatype.hpp"
+#endif
+#include "dindexer-machinery/recorddata.hpp"
+#include <utility>
+
+namespace mchlib {
+	namespace scantask {
+		MediaType::MediaType (SetTaskType parSet, char parDefault, bool parForce, std::string parSearchPath) :
+			m_set_task(parSet),
+			m_default(char_to_media_type(parDefault))
+#if defined(WITH_MEDIA_AUTODETECT)
+			, m_search_path(std::move(parSearchPath))
+			, m_force(parForce)
+#endif
+		{
+			assert(m_set_task);
+#if !defined(WITH_MEDIA_AUTODETECT)
+			static_cast<void>(parForce);
+			static_cast<void>(parSearchPath);
+#endif
+		}
+
+		SetRecordDataFull& MediaType::on_data_get() {
+			return m_set_task->get_or_create();
+		}
+
+		void MediaType::on_data_fill() {
+			auto& data = m_set_task->get_or_create();
+#if defined(WITH_MEDIA_AUTODETECT)
+			if (m_force) {
+				data.type = media_type_to_char(m_default);
+			}
+			else {
+				const auto guessed_type = mchlib::guess_media_type(std::string(m_search_path));
+				data.type = media_type_to_char(guessed_type);
+			}
+#else
+			data.type = media_type_to_char(m_default);
+#endif
+		}
+	} //namespace scantask
+} //namespace mchlib
--- a/src/machinery/scantask/mime.cpp
+++ b/src/machinery/scantask/mime.cpp
@ -0,0 +1,53 @@
+/* Copyright 2015, 2016, Michele Santullo
+ * This file is part of "dindexer".
+ *
+ * "dindexer" is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * "dindexer" is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with "dindexer".  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "dindexer-machinery/scantask/mime.hpp"
+#include "dindexer-machinery/recorddata.hpp"
+#include "mimetype.hpp"
+#include <cassert>
+
+namespace mchlib {
+	namespace {
+	} //unnamed namespace
+
+	namespace scantask {
+		Mime::Mime (DirTreeTaskPtr parDirTree) :
+			m_file_tree_task(parDirTree)
+		{
+			assert(m_file_tree_task);
+		}
+
+		Mime::~Mime() noexcept {
+		}
+
+		void Mime::on_data_fill() {
+			MimeType mime;
+			auto& list = m_file_tree_task->get_or_create();
+
+			for (auto& itm : list) {
+				itm.mime_full = mime.analyze(itm.abs_path);
+				auto mime_pair = split_mime(itm.mime_full);
+				itm.mime_type = mime_pair.first;
+				itm.mime_charset = mime_pair.second;
+			}
+		}
+
+		std::vector<FileRecordData>& Mime::on_data_get() {
+			return m_file_tree_task->get_or_create();
+		}
+	} //namespace scantask
+} //namespace mchlib
--- a/src/machinery/scantask/setbasic.cpp
+++ b/src/machinery/scantask/setbasic.cpp
@ -0,0 +1,41 @@
+/* Copyright 2015, 2016, Michele Santullo
+ * This file is part of "dindexer".
+ *
+ * "dindexer" is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * "dindexer" is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with "dindexer".  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "dindexer-machinery/scantask/setbasic.hpp"
+#include <utility>
+
+namespace mchlib {
+	namespace scantask {
+		SetBasic::SetBasic (std::string&& parName) :
+			m_set_name(std::move(parName))
+		{
+		}
+
+		SetBasic::~SetBasic() noexcept {
+		}
+
+		void SetBasic::on_data_destroy (SetRecordDataFull& parData) {
+			static_cast<SetRecordData&>(parData).name.clear();
+			parData.name.clear();
+		}
+
+		void SetBasic::on_data_create (SetRecordDataFull& parData) {
+			parData.name = m_set_name;
+			static_cast<SetRecordData&>(parData).name = parData.name;
+		}
+	} //namespace scantask
+} //namespace mchlib
--- a/src/machinery/set_listing.cpp
+++ b/src/machinery/set_listing.cpp
@ -42,8 +42,8 @@ namespace mchlib {
 		};

 		template <typename OtherRecord>
-		bool file_record_data_lt (const FileRecordData& parLeft, const OtherRecord& parRight) {
-			const FileRecordData& l = parLeft;
+		bool file_record_data_lt (const SetListingItemType& parLeft, const OtherRecord& parRight) {
+			const SetListingItemType& l = parLeft;
 			const OtherRecord& r = parRight;
 			return
 				(l.level < r.level)
@ -99,14 +99,14 @@ namespace mchlib {
 		{
 			assert(parBasePath);
 			assert(m_base_path or m_current == m_end);
-			assert(m_current == m_end or m_base_path->atom_count() == PathName(m_current->abs_path).atom_count());
+			assert(m_current == m_end or m_base_path->atom_count() == PathName(m_current->path).atom_count() + parLevelOffset);
 			assert(m_current == m_end or m_base_path->atom_count() == m_current->level + m_level_offset);

 			//Look for the point where the children of this entry start
 			while (
 				m_current != m_end and (
 					m_current->level + m_level_offset == m_base_path->atom_count() or
-					*m_base_path != PathName(m_current->abs_path).pop_right()
+					*m_base_path != PathName(m_current->path).pop_right()
 			)) {
 				assert(m_base_path);
 				++m_current;
@ -157,13 +157,13 @@ namespace mchlib {

 		template <bool Const>
 		void DirIterator<Const>::increment() {
-			assert(PathName(m_current->abs_path).pop_right() == *m_base_path);
+			assert(PathName(m_current->path).pop_right() == *m_base_path);
 			do {
 				++m_current;
 			} while(
 				m_current != m_end and
 				m_current->level + m_level_offset == m_base_path->atom_count() + 1 and
-				*m_base_path != PathName(m_current->abs_path).pop_right()
+				*m_base_path != PathName(m_current->path).pop_right()
 			);
 		}

@ -222,7 +222,7 @@ namespace mchlib {
 			assert(std::equal(m_list.begin(), m_list.end(), SetListing(ListType(m_list), true).sorted_list().begin()));
 		}
 		if (not m_list.empty()) {
-			m_base_path.reset(new PathName(m_list.front().abs_path));
+			m_base_path.reset(new PathName(m_list.front().path));
 		}
 	}

@ -258,7 +258,7 @@ namespace mchlib {
 		return std::count_if(
 			m_list.begin(),
 			m_list.end(),
-			[] (const FileRecordData& parItm) {
+			[] (const SetListingItemType& parItm) {
 				return not parItm.is_directory;
 			}
 		);
@ -268,7 +268,7 @@ namespace mchlib {
 		return std::count_if(
 			m_list.begin(),
 			m_list.end(),
-			[] (const FileRecordData& parItm) {
+			[] (const SetListingItemType& parItm) {
 				return parItm.is_directory;
 			}
 		);
@ -279,7 +279,7 @@ namespace mchlib {
 	}

 	void SetListing::sort_list (ListType& parList) {
-		std::sort(parList.begin(), parList.end(), &file_record_data_lt<FileRecordData>);
+		std::sort(parList.begin(), parList.end(), &file_record_data_lt<SetListingItemType>);
 	}

 	SetListing::ListType::iterator SetListing::lower_bound (ListType& parList, const char* parPath, uint16_t parLevel, bool parIsDir) {
@ -289,17 +289,17 @@ namespace mchlib {
 	}

 	SetListingView<false> SetListing::make_view() {
-		const auto offs = (m_list.empty() ? 0 : PathName(m_list.front().abs_path).atom_count());
+		const auto offs = (m_list.empty() ? 0 : PathName(m_list.front().path).atom_count());
 		return SetListingView<false>(m_list.begin(), m_list.end(), offs, m_base_path);
 	}

 	SetListingView<true> SetListing::make_view() const {
-		const auto offs = (m_list.empty() ? 0 : PathName(m_list.front().abs_path).atom_count());
+		const auto offs = (m_list.empty() ? 0 : PathName(m_list.front().path).atom_count());
 		return SetListingView<true>(m_list.begin(), m_list.end(), offs, m_base_path);
 	}

 	SetListingView<true> SetListing::make_cview() const {
-		const auto offs = (m_list.empty() ? 0 : PathName(m_list.front().abs_path).atom_count());
+		const auto offs = (m_list.empty() ? 0 : PathName(m_list.front().path).atom_count());
 		return SetListingView<true>(m_list.begin(), m_list.end(), offs, m_base_path);
 	}

@ -311,7 +311,7 @@ namespace mchlib {
 		m_level_offset(parIter.m_level_offset)
 	{
 		if (m_begin != m_end) {
-			m_base_path.reset(new PathName(m_begin->abs_path));
+			m_base_path.reset(new PathName(m_begin->path));
 		}
 	}

@ -323,7 +323,7 @@ namespace mchlib {
 		m_level_offset(parLevelOffset)
 	{
 		if (m_begin != m_end) {
-			m_base_path.reset(new PathName(m_begin->abs_path));
+			m_base_path.reset(new PathName(m_begin->path));
 		}
 	}

--- a/src/navigate/commandline.hpp
+++ b/src/navigate/commandline.hpp
@ -19,7 +19,6 @@
 #define id1B7A42F6E46547A6AB0F914E2A91399F

 #include "dindexer-common/validationerror.hpp"
-#include "dindexer-common/mediatypes.hpp"
 #include <boost/program_options/variables_map.hpp>

 namespace din {
--- a/src/query/commandline.hpp
+++ b/src/query/commandline.hpp
@ -19,7 +19,6 @@
 #define id259FD7C96B5049ECB50386F25455FBB2

 #include "dindexer-common/validationerror.hpp"
-#include "dindexer-common/mediatypes.hpp"
 #include <boost/program_options/variables_map.hpp>

 namespace din {
--- a/src/scan/CMakeLists.txt
+++ b/src/scan/CMakeLists.txt
@ -16,7 +16,6 @@ target_link_libraries(${PROJECT_NAME}
 	PRIVATE ${bare_name}-if
 	PRIVATE ${bare_name}-common
 	PRIVATE ${bare_name}-machinery
-	PRIVATE optimized pthread
 )

 string(REPLACE "${bare_name}-" "" ACTION_NAME "${PROJECT_NAME}")
--- a/src/scan/commandline.cpp
+++ b/src/scan/commandline.cpp
@ -27,14 +27,14 @@ namespace po = boost::program_options;
 namespace din {
 	namespace {
 		const char g_allowed_types[] = {
-			static_cast<char>(dinlib::MediaType_CDRom),
-			static_cast<char>(dinlib::MediaType_Directory),
-			static_cast<char>(dinlib::MediaType_DVD),
-			static_cast<char>(dinlib::MediaType_BluRay),
-			static_cast<char>(dinlib::MediaType_FloppyDisk),
-			static_cast<char>(dinlib::MediaType_HardDisk),
-			static_cast<char>(dinlib::MediaType_IomegaZip),
-			static_cast<char>(dinlib::MediaType_Other)
+			static_cast<char>(mchlib::MediaType_CDRom),
+			static_cast<char>(mchlib::MediaType_Directory),
+			static_cast<char>(mchlib::MediaType_DVD),
+			static_cast<char>(mchlib::MediaType_BluRay),
+			static_cast<char>(mchlib::MediaType_FloppyDisk),
+			static_cast<char>(mchlib::MediaType_HardDisk),
+			static_cast<char>(mchlib::MediaType_IomegaZip),
+			static_cast<char>(mchlib::MediaType_Other)
 		};
 	} //unnamed namespace

--- a/src/scan/commandline.hpp
+++ b/src/scan/commandline.hpp
@ -19,6 +19,7 @@
 #define id1B7A42F6E46547A6AB0F914E2A91399F

 #include <boost/program_options/variables_map.hpp>
+#include "dindexer-machinery/mediatypes.hpp"
 #include "dindexer-machinery/mediatype.hpp"

 namespace din {
--- a/src/scan/main.cpp
+++ b/src/scan/main.cpp
@ -21,35 +21,37 @@

 #include "dindexer-machinery/recorddata.hpp"
 #include "dindexerConfig.h"
-#include "dindexer-machinery/filesearcher.hpp"
-#include "dindexer-machinery/indexer.hpp"
 #include "dindexer-machinery/machinery_info.hpp"
 #include "dindexer-common/common_info.hpp"
 #include "dindexer-common/settings.hpp"
-#include "dindexer-machinery/guess_content_type.hpp"
 #include "commandline.hpp"
 #include "dbbackend.hpp"
+#include "dindexer-machinery/scantask/dirtree.hpp"
+#include "dindexer-machinery/scantask/mediatype.hpp"
+#include "dindexer-machinery/scantask/hashing.hpp"
+#include "dindexer-machinery/scantask/contenttype.hpp"
+#include "dindexer-machinery/scantask/mime.hpp"
+#include "dindexer-machinery/scantask/generalfiller.hpp"
+#include "dindexer-machinery/scantask/setbasic.hpp"
 #include <iostream>
 #include <iomanip>
 #include <ciso646>
-#include <sstream>
-#include <algorithm>
-#include <iterator>
-#if defined(WITH_PROGRESS_FEEDBACK)
-#	include <thread>
-#	include <mutex>
-#	include <condition_variable>
-#endif

 namespace {
-	void run_hash_calculation ( mchlib::Indexer& parIndexer, bool parShowProgress );
-	bool add_to_db ( const std::vector<mchlib::FileRecordData>& parData, const std::string& parSetName, char parType, char parContent, const dinlib::SettingsDB& parDBSettings, bool parForce=false );
+	bool add_to_db ( const std::vector<mchlib::FileRecordData>& parData, const mchlib::SetRecordDataFull& parSet, const dinlib::SettingsDB& parDBSettings, bool parForce=false );
+#if defined(WITH_PROGRESS_FEEDBACK)
+	void print_progress ( const boost::string_ref parPath, uint64_t parFileBytes, uint64_t parTotalBytes, uint32_t parFileNum );
+#endif
 } //unnamed namespace

+namespace stask = mchlib::scantask;
+
 int main (int parArgc, char* parArgv[]) {
 	using std::placeholders::_1;
 	using std::placeholders::_2;
 	using boost::program_options::variables_map;
+	using FileRecordDataFiller = stask::GeneralFiller<stask::DirTree::PathList>;
+	using SetRecordDataFiller = stask::GeneralFiller<mchlib::SetRecordDataFull>;

 	variables_map vm;
 	try {
@ -61,11 +63,10 @@ int main (int parArgc, char* parArgv[]) {
 		std::cerr << err.what() << "\nUse --help for help" << std::endl;
 		return 2;
 	}
-	const std::string search_path(vm["search-path"].as<std::string>());
 #if defined(WITH_PROGRESS_FEEDBACK)
-	const bool verbose = (0 == vm.count("quiet"));
+	//const bool verbose = (0 == vm.count("quiet"));
 #else
-	const bool verbose = false;
+	//const bool verbose = false;
 #endif

 	dinlib::Settings settings;
@ -77,127 +78,31 @@ int main (int parArgc, char* parArgv[]) {
 		}
 	}

-#if defined(WITH_MEDIA_AUTODETECT)
-	char set_type;
-	if (0 == vm.count("type")) {
-		std::cout << "Analyzing disc... ";
-		try {
-			const auto guessed_type = mchlib::guess_media_type(std::string(search_path));
-			set_type = guessed_type;
-			std::cout << "Setting type to " << set_type << " ("
-				<< dinlib::media_type_to_str(guessed_type) << ")\n";
-		}
-		catch (const std::runtime_error& e) {
-			std::cout << '\n';
-			std::cerr << e.what();
-			return 1;
-		}
-	}
-	else {
-		set_type = vm["type"].as<char>();
-	}
-#else
-	const char set_type = vm["type"].as<char>();
+	bool ignore_read_errors = (vm.count("ignore-errors") > 0);
+	const std::string search_path(vm["search-path"].as<std::string>());
+	const char def_media_type = (vm.count("type") ? vm["type"].as<char>() : 'O');
+
+	std::shared_ptr<stask::SetBasic> setbasic(new stask::SetBasic(std::string(vm["setname"].as<std::string>())));
+	std::shared_ptr<stask::DirTree> scan_dirtree(new stask::DirTree(search_path));
+	std::shared_ptr<stask::MediaType> media_type(new stask::MediaType(setbasic, def_media_type, vm.count("type"), search_path));
+	std::shared_ptr<stask::Hashing> hashing(new stask::Hashing(scan_dirtree, ignore_read_errors));
+	std::shared_ptr<stask::ContentType> content_type(new stask::ContentType(setbasic, scan_dirtree, media_type));
+	std::shared_ptr<stask::Mime> mime(new stask::Mime(scan_dirtree));
+	std::shared_ptr<FileRecordDataFiller> filerecdata(new FileRecordDataFiller(mime, hashing));
+	std::shared_ptr<SetRecordDataFiller> setrecdata(new SetRecordDataFiller(media_type, content_type));
+
+#if defined(WITH_PROGRESS_FEEDBACK)
+	hashing->set_progress_callback(&print_progress);
 #endif

-	std::cout << "constructing...\n";
-
-	mchlib::Indexer indexer;
-	indexer.ignore_read_errors(vm.count("ignore-errors") > 0);
-	fastf::FileSearcher searcher(search_path);
-	fastf::FileSearcher::ConstCharVecType ext, ignore;
-	searcher.SetFollowSymlinks(true);
-	searcher.SetCallback(fastf::FileSearcher::CallbackType(std::bind(&mchlib::Indexer::add_path, &indexer, _1, _2)));
-	searcher.Search(ext, ignore);
-	if (verbose) {
-		std::cout << "Fetching items list...\n";
-	}
-
-	if (indexer.empty()) {
-		std::cerr << "Nothing found at the given location, quitting\n";
-		return 1;
-	}
-	else {
-		run_hash_calculation(indexer, verbose);
-
-		//TODO: guess_content_type() relies on FileRecordData::path being set to
-		//the relative path already. Unfortunately at this point it just got
-		//default-initialized to be the same as abs_path, so for a video DVD, for
-		//example, it's going to be like "/mnt/cdrom/VIDEO_TS" instead of just
-		//"VIDEO_TS". This will cause guess_content_type() to miss. Relative
-		//paths are populated at the end of calculate_hash(), so until I come up
-		//with a better system I'm just moving content detection to after hash
-		//calculation.
-		const auto set_type_casted = dinlib::char_to_media_type(set_type);
-		const mchlib::ContentTypes content = mchlib::guess_content_type(set_type_casted, indexer.record_data());
-		const char content_type = mchlib::content_type_to_char(content);
-
-		if (verbose) {
-			std::cout << "Writing to database...\n";
-		}
-		if (not add_to_db(indexer.record_data(), vm["setname"].as<std::string>(), set_type, content_type, settings.db)) {
-			std::cerr << "Not written to DB, likely because a set with the same hash already exists\n";
-		}
+	if (not add_to_db(filerecdata->get_or_create(), setrecdata->get_or_create(), settings.db)) {
+		std::cerr << "Not written to DB, likely because a set with the same hash already exists\n";
 	}
 	return 0;
 }

 namespace {
-	void run_hash_calculation (mchlib::Indexer& parIndexer, bool parShowProgress) {
-		if (parIndexer.empty()) {
-			return;
-		}
-
-#if !defined(WITH_PROGRESS_FEEDBACK)
-		parShowProgress = false;
-#endif
-		if (not parShowProgress) {
-//Hashing file /mnt/cdrom/Sacred 2/Fallen Angel/UK/Sacred.2.Fallen.Angel-ArenaBG/DISC2/S2DISC2.md1... 512c201321ed01cc2a82c9f80bfeaaa673bc8eb3cea4e5c1
-//terminate called after throwing an instance of 'std::ios_base::failure'
-//what():  basic_filebuf::xsgetn error reading the file
-//Hashing file /mnt/cdrom/Sacred 2/Fallen Angel/UK/Sacred.2.Fallen.Angel-ArenaBG/DISC2/S2DISC2.mdf...Annullato
-			parIndexer.calculate_hash();
-		}
-#if defined(WITH_PROGRESS_FEEDBACK)
-		else {
-			typedef std::ostream_iterator<char> cout_iterator;
-
-			std::cout << "Processing";
-			std::cout.flush();
-			const auto total_items = parIndexer.total_items();
-			std::thread hash_thread(&mchlib::Indexer::calculate_hash, &parIndexer);
-			std::mutex progress_print;
-			std::size_t clear_size = 0;
-			const auto digit_count = static_cast<std::size_t>(std::log10(static_cast<double>(total_items))) + 1;
-			do {
-				//TODO: fix this steaming pile of crap
-				//std::unique_lock<std::mutex> lk(progress_print);
-				//parIndexer.step_notify().wait(lk);
-				std::cout << '\r';
-				std::fill_n(cout_iterator(std::cout), clear_size, ' ');
-				std::cout << '\r';
-				{
-					std::ostringstream oss;
-					const auto item_index = std::min(total_items - 1, parIndexer.processed_items());
-					oss << "Processing file "
-						<< std::setw(digit_count) << std::setfill(' ') << (item_index + 1)
-						<< " of " << total_items << " \"" << parIndexer.current_item() << '"';
-					const auto msg(oss.str());
-					clear_size = msg.size();
-					std::cout << msg;
-					std::cout.flush();
-				}
-			} while (false); //parIndexer.processed_items() != total_items);
-
-			hash_thread.join();
-			if (parIndexer.processed_items() > 0) {
-				std::cout << '\n';
-			}
-		}
-#endif
-	}
-
-	bool add_to_db (const std::vector<mchlib::FileRecordData>& parData, const std::string& parSetName, char parType, char parContentType, const dinlib::SettingsDB& parDBSettings, bool parForce) {
+	bool add_to_db (const std::vector<mchlib::FileRecordData>& parData, const mchlib::SetRecordDataFull& parSet, const dinlib::SettingsDB& parDBSettings, bool parForce) {
 		using mchlib::FileRecordData;
 		using mchlib::SetRecordDataFull;
 		using mchlib::SetRecordData;
@ -212,11 +117,18 @@ namespace {
 			}
 		}

-		SetRecordData set_data {parSetName, parType, parContentType };
+		const SetRecordData& set_data {parSet.name, parSet.type, parSet.content_type };
 		const auto app_signature = dinlib::dindexer_signature();
 		const auto lib_signature = mchlib::lib_signature();
 		const std::string signature = std::string(app_signature.data(), app_signature.size()) + "/" + std::string(lib_signature.data(), lib_signature.size());
 		din::write_to_db(parDBSettings, parData, set_data, signature);
 		return true;
 	}
+
+#if defined(WITH_PROGRESS_FEEDBACK)
+	void print_progress (const boost::string_ref parPath, uint64_t /*parFileBytes*/, uint64_t parTotalBytes, uint32_t parFileNum) {
+		std::cout << "Hashing file " << parFileNum << " \"" << parPath << "\" (" << parTotalBytes << " bytes hashed)\r";
+		std::cout.flush();
+	}
+#endif
 } //unnamed namespace
--- a/test/unit/test_guess_content_type.cpp
+++ b/test/unit/test_guess_content_type.cpp
@ -23,7 +23,7 @@

 namespace {
 	template <std::size_t N>
-	void detect_type (mchlib::FileRecordData (&parTestData)[N], mchlib::ContentTypes parExpected, dinlib::MediaTypes parMediaType) {
+	void detect_type (mchlib::FileRecordData (&parTestData)[N], mchlib::ContentTypes parExpected, mchlib::MediaTypes parMediaType) {
 		using mchlib::SetListing;
 		using mchlib::FileRecordData;

@ -52,7 +52,7 @@ TEST(machinery, guess_content_type) {
 			FileRecordData("VIDEO_TS/VTS_01_0.VOB",0,0,2,false,false),
 			FileRecordData("VIDEO_TS/VIDEO_TS.VOB",0,0,2,false,false)
 		};
-		detect_type(test_data, mchlib::ContentType_VideoDVD, dinlib::MediaType_DVD);
+		detect_type(test_data, mchlib::ContentType_VideoDVD, mchlib::MediaType_DVD);
 	}

 	{
@ -63,7 +63,7 @@ TEST(machinery, guess_content_type) {
 			FileRecordData("some_file.bin",0,0,1,false,false),
 			FileRecordData("another_dir/VTS_01_0.BUP",0,0,2,false,false)
 		};
-		detect_type(test_data, mchlib::ContentType_Generic, dinlib::MediaType_Directory);
+		detect_type(test_data, mchlib::ContentType_Generic, mchlib::MediaType_Directory);
 	}

 	{
@ -81,6 +81,6 @@ TEST(machinery, guess_content_type) {
 			FileRecordData("CDI",0,0,1,true,false),
 			FileRecordData("KARAOKE",0,0,1,true,false)
 		};
-		detect_type(test_data, mchlib::ContentType_VideoCD, dinlib::MediaType_CDRom);
+		detect_type(test_data, mchlib::ContentType_VideoCD, mchlib::MediaType_CDRom);
 	}
 }