Rename update action to scan.

2025-08-13 14:39:47 +00:00 · 2015-12-04 19:40:02 +00:00 · 2015-12-04 19:40:02 +00:00 · 87bc031e65
commit 87bc031e65
parent 20ebc10b0c
22 changed files with 2 additions and 2 deletions
--- a/src/scan/CMakeLists.txt
+++ b/src/scan/CMakeLists.txt
@ -0,0 +1,21 @@
+project(${bare_name}-scan CXX C)
+
+add_executable(${PROJECT_NAME}
+	main.cpp
+	filesearcher.cpp
+	pathname.cpp
+	indexer.cpp
+	tiger.c
+	tiger.cpp
+	dbbackend.cpp
+	settings.cpp
+	commandline.cpp
+)
+
+target_include_directories(${PROJECT_NAME}
+	PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/..
+)
+
+target_link_libraries(${PROJECT_NAME}
+	PRIVATE ${bare_name}-if
+)
--- a/src/scan/commandline.cpp
+++ b/src/scan/commandline.cpp
@ -0,0 +1,128 @@
+/* Copyright 2015, Michele Santullo
+ * This file is part of "dindexer".
+ *
+ * "dindexer" is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * "dindexer" is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with "dindexer".  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "commandline.hpp"
+#include "dindexerConfig.h"
+#include "helpers/lengthof.h"
+#include <boost/program_options.hpp>
+#include <iostream>
+#include <algorithm>
+
+#define STRINGIZE_IMPL(s) #s
+#define STRINGIZE(s) STRINGIZE_IMPL(s)
+
+namespace po = boost::program_options;
+
+namespace din {
+	namespace {
+		const char g_allowed_types[] = {
+			'C', //CD-Rom
+			'D', //Directory
+			'V', //DVD
+			'B', //BluRay
+			'F', //Floppy Disk
+			'H', //Hard Disk
+			'Z', //Iomega Zip
+			'O'  //Other
+		};
+		const char* const g_version_string =
+			PROGRAM_NAME " v"
+			STRINGIZE(VERSION_MAJOR) "."
+			STRINGIZE(VERSION_MINOR) "."
+			STRINGIZE(VERSION_PATCH)
+#if VERSION_BETA
+			"b"
+#endif
+			;
+	} //unnamed namespace
+
+	bool parse_commandline (int parArgc, char* parArgv[], po::variables_map& parVarMap) {
+		std::string type_param_help;
+		{
+			std::ostringstream oss;
+			oss << "Default set type. Valid values are ";
+			oss << g_allowed_types[0];
+			for (std::size_t z = 1; z < lengthof(g_allowed_types); ++z) {
+				oss << ", " << g_allowed_types[z];
+			}
+			oss << '.';
+			type_param_help = oss.str();
+		}
+
+		po::options_description desc("General");
+		desc.add_options()
+			("help,h", "Produces this help message")
+			("version", "Prints the program's version and quits")
+			//("dump-raw,D", po::value<std::string>(), "Saves the retrieved html to the named file; use - for stdout")
+#if defined(WITH_PROGRESS_FEEDBACK)
+			("quiet,q", "Hide progress messages and print nothing at all")
+#endif
+		;
+		po::options_description set_options("Set options");
+		set_options.add_options()
+			("setname,n", po::value<std::string>()->default_value("New set"), "Name to be given to the new set being scanned.")
+			("type,t", po::value<char>()->default_value('V'), type_param_help.c_str())
+		;
+		po::options_description positional_options("Positional options");
+		positional_options.add_options()
+			("search-path", po::value<std::string>(), "Search path")
+		;
+		po::options_description all("Available options");
+		all.add(desc).add(positional_options).add(set_options);
+		po::positional_options_description pd;
+		pd.add("search-path", 1);//.add("xpath", 1);
+		try {
+			po::store(po::command_line_parser(parArgc, parArgv).options(all).positional(pd).run(), parVarMap);
+		}
+		catch (const po::unknown_option& err) {
+			throw std::invalid_argument(err.what());
+		}
+
+		po::notify(parVarMap);
+
+		if (parVarMap.count("help")) {
+#if !defined(NDEBUG)
+			std::cout << "*******************\n";
+			std::cout << "*** DEBUG BUILD ***\n";
+			std::cout << "*******************\n";
+			std::cout << '\n';
+#endif
+			po::options_description visible("Available options");
+			visible.add(desc).add(set_options);
+			std::cout << PROGRAM_NAME << " Copyright (C) 2015  Michele Santullo\n";
+			std::cout << "This program comes with ABSOLUTELY NO WARRANTY.\n"; //for details type `show w'.
+			std::cout << "This is free software, and you are welcome to\n";
+			std::cout << "redistribute it under certain conditions.\n"; //type `show c' for details.
+			std::cout << '\n';
+			std::cout << "Usage: " << PROGRAM_NAME << " [options...] <search-path>\n";
+			std::cout << visible;
+			return true;
+		}
+		else if (parVarMap.count("version")) {
+			std::cout << g_version_string << '\n';
+			return true;
+		}
+
+		if (parVarMap.count("search-path") == 0) {
+			throw std::invalid_argument("No search path specified");
+		}
+		if (g_allowed_types + lengthof(g_allowed_types) == std::find(g_allowed_types, g_allowed_types + lengthof(g_allowed_types), parVarMap["type"].as<char>())) {
+			throw std::invalid_argument("Invalid value for parameter \"type\"");
+		}
+		return false;
+	}
+} //namespace din
--- a/src/scan/commandline.hpp
+++ b/src/scan/commandline.hpp
@ -0,0 +1,27 @@
+/* Copyright 2015, Michele Santullo
+ * This file is part of "dindexer".
+ *
+ * "dindexer" is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * "dindexer" is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with "dindexer".  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef id1B7A42F6E46547A6AB0F914E2A91399F
+#define id1B7A42F6E46547A6AB0F914E2A91399F
+
+#include <boost/program_options/variables_map.hpp>
+
+namespace din {
+	bool parse_commandline ( int parArgc, char* parArgv[], boost::program_options::variables_map& parVarMap );
+} //namespace din
+
+#endif
--- a/src/scan/dbbackend.cpp
+++ b/src/scan/dbbackend.cpp
@ -0,0 +1,138 @@
+/* Copyright 2015, Michele Santullo
+ * This file is part of "dindexer".
+ *
+ * "dindexer" is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * "dindexer" is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with "dindexer".  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "dbbackend.hpp"
+#include "pq/connection.hpp"
+#include "settings.hpp"
+#include <string>
+#include <sstream>
+#include <utility>
+#include <boost/lexical_cast.hpp>
+#include <exception>
+#include <memory>
+#include <boost/utility/string_ref.hpp>
+
+namespace din {
+	namespace {
+		const std::size_t g_batch_size = 100;
+
+		std::string make_set_insert_query (pq::Connection& parConn, const SetRecordData& parSetData) {
+			std::ostringstream oss;
+			oss << "INSERT INTO \"sets\" (\"desc\",\"type\") VALUES ("
+				<< parConn.escaped_literal(parSetData.name) << ','
+				<< '\'' << parSetData.type << '\''
+				<< ");";
+			return oss.str();
+		}
+
+		boost::string_ref time_to_str (const std::time_t parTime, char* parBuff, std::size_t parLength) {
+			const auto gtm = std::gmtime(&parTime);
+			const auto len = std::strftime(parBuff, parLength, "%F %T%z", gtm);
+			return boost::string_ref(parBuff, len);
+		}
+	} //unnamed namespace
+
+	bool read_from_db (FileRecordData& parItem, SetRecordDataFull& parSet, const DinDBSettings& parDB, std::string&& parHash) {
+		using boost::lexical_cast;
+
+		pq::Connection conn(std::string(parDB.username), std::string(parDB.password), std::string(parDB.dbname), std::string(parDB.address), parDB.port);
+		conn.connect();
+
+		uint32_t group_id;
+		{
+			std::ostringstream oss;
+			oss << "SELECT path,level,group_id,is_directory,is_symlink,size FROM files WHERE hash=" <<
+				conn.escaped_literal(parHash) <<
+				" LIMIT 1;";
+
+			auto resultset = conn.query(oss.str());
+			if (resultset.empty()) {
+				return false;
+			}
+
+			auto row = resultset[0];
+			parItem.path = row["path"];
+			parItem.hash = std::move(parHash);
+			parItem.level = lexical_cast<uint16_t>(row["level"]);
+			parItem.size = lexical_cast<uint64_t>(row["size"]);
+			parItem.is_directory = (row["is_directory"] == "t" ? true : false);
+			parItem.is_symlink = (row["is_symlink"] == "t" ? true : false);
+			group_id = lexical_cast<uint32_t>(row["group_id"]);
+		}
+
+		{
+			std::ostringstream oss;
+			oss << "SELECT \"desc\",\"type\",\"disk_number\" FROM sets WHERE \"id\"=" << group_id << ';';
+
+			auto resultset = conn.query(oss.str());
+			if (resultset.empty()) {
+				std::ostringstream err_msg;
+				err_msg << "Missing set: found a record with group_id=" << group_id;
+				err_msg << " but there is no such id in table \"sets\"";
+				throw std::length_error(err_msg.str());
+			}
+			auto row = resultset[0];
+			parSet.type = lexical_cast<char>(row["type"]);
+			parSet.name = row["desc"];
+			parSet.disk_number = lexical_cast<uint32_t>(row["disk_number"]);
+		}
+		return true;
+	}
+
+	void write_to_db (const DinDBSettings& parDB, const std::vector<FileRecordData>& parData, const SetRecordData& parSetData) {
+		if (parData.empty()) {
+			return;
+		}
+
+		const std::size_t strtime_buff_size = 512;
+		std::unique_ptr<char[]> strtime_buff(new char[strtime_buff_size]);
+
+		pq::Connection conn(std::string(parDB.username), std::string(parDB.password), std::string(parDB.dbname), std::string(parDB.address), parDB.port);
+		conn.connect();
+
+		conn.query_void("BEGIN;");
+		conn.query_void(make_set_insert_query(conn, parSetData));
+		//TODO: use COPY instead of INSERT INTO
+		for (std::size_t z = 0; z < parData.size(); z += g_batch_size) {
+			std::ostringstream query;
+			query << "INSERT INTO \"files\" " <<
+				"(path, hash, level, group_id, is_directory, is_symlink, size, " <<
+				"access_time, modify_time) VALUES "
+			;
+
+			const char* comma = "";
+			for (auto i = z; i < std::min(z + g_batch_size, parData.size()); ++i) {
+				const auto& itm = parData[i];
+				query << comma;
+				query << '(' << conn.escaped_literal(itm.path) << ",'" << itm.hash << "',"
+					<< itm.level << ','
+					<< "currval('\"sets_id_seq\"')" << ','
+					<< (itm.is_directory ? "true" : "false") << ','
+					<< (itm.is_symlink ? "true" : "false") << ',' << itm.size
+					<< ',' << '\'' << time_to_str(itm.atime, strtime_buff.get(), strtime_buff_size) << '\''
+					<< ',' << '\'' << time_to_str(itm.mtime, strtime_buff.get(), strtime_buff_size) << '\''
+				<< ')';
+				comma = ",";
+			}
+			query << ';';
+			//query << "\nCOMMIT;";
+
+			conn.query_void(query.str());
+		}
+		conn.query_void("COMMIT;");
+	}
+} //namespace din
--- a/src/scan/dbbackend.hpp
+++ b/src/scan/dbbackend.hpp
@ -0,0 +1,56 @@
+/* Copyright 2015, Michele Santullo
+ * This file is part of "dindexer".
+ *
+ * "dindexer" is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * "dindexer" is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with "dindexer".  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef id842AF56BD80A4CF59957451DF9082AA2
+#define id842AF56BD80A4CF59957451DF9082AA2
+
+#include <string>
+#include <vector>
+#include <cstdint>
+#include <boost/utility/string_ref.hpp>
+#include <ctime>
+
+namespace din {
+	struct DinDBSettings;
+
+	struct FileRecordData {
+		std::string path;
+		std::string hash;
+		std::time_t atime;
+		std::time_t mtime;
+		uint16_t level;
+		uint64_t size;
+		bool is_directory;
+		bool is_symlink;
+	};
+
+	struct SetRecordDataFull {
+		std::string name;
+		uint32_t disk_number;
+		char type;
+	};
+
+	struct SetRecordData {
+		const boost::string_ref name;
+		const char type;
+	};
+
+	void write_to_db ( const DinDBSettings& parDB, const std::vector<FileRecordData>& parData, const SetRecordData& parSetData );
+	bool read_from_db ( FileRecordData& parItem, SetRecordDataFull& parSet, const DinDBSettings& parDB, std::string&& parHash );
+} //namespace din
+
+#endif
--- a/src/scan/filesearcher.cpp
+++ b/src/scan/filesearcher.cpp
@ -0,0 +1,170 @@
+/* Copyright 2015, Michele Santullo
+ * This file is part of "dindexer".
+ *
+ * "dindexer" is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * "dindexer" is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with "dindexer".  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "filesearcher.hpp"
+
+#if !defined(_XOPEN_SOURCE)
+#define _XOPEN_SOURCE 500
+#endif
+
+#include <ftw.h>
+#include <iostream>
+#include <sys/types.h>
+#include <cstring>
+
+namespace fastf {
+  extern "C" int PrintName ( const char* parPath, const struct stat* parStat, int parType, FTW* parBuff );
+
+  namespace {
+    struct SearchOptions {
+      FileSearcher::CallbackType* callback;
+      const FileSearcher::ConstCharVecType* extensions;
+      const FileSearcher::ConstCharVecType* ignorePaths;
+      int search_flags;
+    };
+
+    __thread SearchOptions g_searchOptions;
+
+    bool print_def_callback (const char* parPath, const FileStats& parStats) {
+      std::cout << parPath;
+      if (parStats.is_dir) {
+        std::cout << '/';
+      }
+      if (parStats.is_symlink) {
+        std::cout << " (symlink)";
+      }
+      std::cout << " (" << parStats.level << ')';
+	  std::cout << '\n';
+      return true;
+    }
+
+    int HandleDir (const char* parPath, const struct stat* parStat, FTW* parBuff, bool parCanBeRead, bool parSymlink) {
+      FileStats st;
+      st.level = parBuff->level;
+      st.is_dir = true;
+      st.is_symlink = parSymlink;
+      st.atime = parStat->st_atime;
+      st.mtime = parStat->st_mtime;
+
+      if (not (*g_searchOptions.callback)(parPath, st))
+        return FTW_STOP;
+      if (!parCanBeRead)
+        return FTW_SKIP_SUBTREE;
+
+      const FileSearcher::ConstCharVecType& ignorePaths = *g_searchOptions.ignorePaths;
+      for (int z = 0; z < static_cast<int>(ignorePaths.size()); ++z) {
+        if (std::strncmp(ignorePaths[z].str, parPath + parBuff->base, ignorePaths[z].len) == 0)
+          return FTW_SKIP_SUBTREE;
+      }
+      return FTW_CONTINUE;
+    }
+
+    int HandleFile (const char* parPath, const struct stat* parStat, FTW* parBuff, bool parSymlink) {
+      const FileSearcher::ConstCharVecType& extensions = *g_searchOptions.extensions;
+      FileStats st;
+      st.level = parBuff->level;
+      st.is_dir = false;
+      st.is_symlink = parSymlink;
+      st.atime = parStat->st_atime;
+      st.mtime = parStat->st_mtime;
+
+      if (extensions.empty()) {
+        if (not (*g_searchOptions.callback)(parPath, st)) {
+          return FTW_STOP;
+        }
+      }
+      else {
+        for (int z = 0; z < static_cast<int>(extensions.size()); ++z) {
+          const int& extLen = extensions[z].len;
+          const int pathLen = parBuff->base + std::strlen(parPath + parBuff->base);
+          if (std::strncmp(extensions[z].str, parPath + pathLen - extLen, extensions[z].len) == 0) {
+            if (not (*g_searchOptions.callback)(parPath, st)) {
+              return FTW_STOP;
+            }
+            break;
+          }
+        }
+      }
+      return FTW_CONTINUE;
+    }
+
+    int HandleLink (const char* parPath, const struct stat* parStat, FTW* parBuff, bool parBroken) {
+      if (parBroken)
+        return FTW_CONTINUE;
+      else
+        return HandleFile(parPath, parStat, parBuff, true);
+    }
+
+    extern "C" int PrintName (const char* parPath, const struct stat* parStat, int parType, FTW* parBuff) {
+      struct stat st;
+      lstat(parPath, &st);
+      const bool is_symlink = S_ISLNK(st.st_mode);
+
+      switch (parType) {
+      //parPath is a directory which can't be read
+      case FTW_DNR:
+      //parPath is a directory
+      case FTW_D:
+        return HandleDir(parPath, parStat, parBuff, static_cast<bool>(FTW_DNR != parType), is_symlink);
+
+      //parPath is a regular file
+      case FTW_F:
+        return HandleFile(parPath, parStat, parBuff, is_symlink);
+
+      //The stat(2) call failed on parPath, which is not a symbolic link
+      case FTW_NS:
+        return FTW_CONTINUE;
+
+      //parPath is a symbolic link pointing to a nonexistent file (This occurs only if FTW_PHYS is not set)
+      case FTW_SLN:
+      //parPath is a symbolic link, and FTW_PHYS was set in flags
+      case FTW_SL:
+        return HandleLink(parPath, parStat, parBuff, static_cast<bool>(FTW_SLN == parType));
+      }
+
+      return FTW_CONTINUE;
+    }
+  } //unnamed namespace
+
+  FileSearcher::FileSearcher (boost::string_ref parBaseDir) :
+    callback_(&print_def_callback),
+    baseDir_(parBaseDir.begin(), parBaseDir.end()),
+    followSymlinks_(false),
+    remainInFilesystem_(true)
+  {
+  }
+
+  FileSearcher::~FileSearcher() noexcept {
+  }
+
+  void FileSearcher::Search (const ConstCharVecType& parExtensions, const ConstCharVecType& parIgnorePaths) {
+    g_searchOptions.search_flags = FTW_ACTIONRETVAL;
+    if (remainInFilesystem_)
+      g_searchOptions.search_flags |= FTW_MOUNT;
+    if (!followSymlinks_)
+      g_searchOptions.search_flags |= FTW_PHYS;
+
+    g_searchOptions.extensions = &parExtensions;
+    g_searchOptions.ignorePaths = &parIgnorePaths;
+    g_searchOptions.callback = &callback_;
+    nftw(baseDir_.c_str(), &PrintName, 15, g_searchOptions.search_flags);
+  }
+
+  void FileSearcher::SetCallback (CallbackType parCallback) {
+    callback_ = parCallback;
+  }
+} //namespace fastf
--- a/src/scan/filesearcher.hpp
+++ b/src/scan/filesearcher.hpp
@ -0,0 +1,62 @@
+/* Copyright 2015, Michele Santullo
+ * This file is part of "dindexer".
+ *
+ * "dindexer" is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * "dindexer" is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with "dindexer".  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef idB6D385B7779240308449D6081CB790F1
+#define idB6D385B7779240308449D6081CB790F1
+
+#include "filestats.hpp"
+#include <vector>
+#include <functional>
+#include <boost/utility/string_ref.hpp>
+
+namespace fastf {
+  struct StringWithLength {
+    StringWithLength ( const char* parStr, int parLen ) :
+      str(parStr),
+      len(parLen)
+    {
+    }
+
+    const char* const str;
+    const int len;
+  };
+
+  class FileSearcher {
+  public:
+    typedef std::vector<StringWithLength> ConstCharVecType;
+	typedef std::function<bool(const char*, const FileStats&)> CallbackType;
+
+    explicit FileSearcher ( boost::string_ref parBaseDir );
+    ~FileSearcher ( void ) noexcept;
+
+    void Search ( const ConstCharVecType& parExtensions, const ConstCharVecType& parIgnorePaths );
+
+    void SetFollowSymlinks ( bool parFollow ) noexcept { followSymlinks_ = parFollow; }
+    bool FollowSymlinks ( void ) const noexcept { return followSymlinks_; }
+    void SetRemainInFilesystem ( bool parRemain ) noexcept { remainInFilesystem_ = parRemain; }
+    bool RemainInFilesystem ( void ) const noexcept { return remainInFilesystem_; }
+	void SetCallback ( CallbackType parCallback );
+
+  private:
+  	CallbackType callback_;
+    const std::string baseDir_;
+    bool followSymlinks_;
+    bool remainInFilesystem_;
+  };
+} //namespace fastf
+
+#endif
--- a/src/scan/filestats.hpp
+++ b/src/scan/filestats.hpp
@ -0,0 +1,33 @@
+/* Copyright 2015, Michele Santullo
+ * This file is part of "dindexer".
+ *
+ * "dindexer" is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * "dindexer" is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with "dindexer".  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef id4A7D7AB671954418939FC0BDA19C5B3F
+#define id4A7D7AB671954418939FC0BDA19C5B3F
+
+#include <ctime>
+
+namespace fastf {
+	struct FileStats {
+		int level;
+		std::time_t atime;
+		std::time_t mtime;
+		bool is_dir;
+		bool is_symlink;
+	};
+} //namespace fastf
+
+#endif
--- a/src/scan/indexer.cpp
+++ b/src/scan/indexer.cpp
@ -0,0 +1,385 @@
+/* Copyright 2015, Michele Santullo
+ * This file is part of "dindexer".
+ *
+ * "dindexer" is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * "dindexer" is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with "dindexer".  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "indexer.hpp"
+#include "pathname.hpp"
+#include "tiger.hpp"
+#include "dbbackend.hpp"
+#include "settings.hpp"
+#include "filestats.hpp"
+#include <algorithm>
+#include <functional>
+#include <vector>
+#include <stdexcept>
+#if defined(WITH_PROGRESS_FEEDBACK)
+#	include <atomic>
+#	include <condition_variable>
+#endif
+#include <cstdint>
+#include <ciso646>
+#include <cassert>
+#include <boost/iterator/filter_iterator.hpp>
+#include <sstream>
+#include <iterator>
+
+#if defined(INDEXER_VERBOSE)
+#	include <iostream>
+#endif
+
+namespace din {
+	typedef TigerHash HashType;
+
+	struct FileEntry {
+		FileEntry ( const char* parPath, const fastf::FileStats& parSt ) :
+			path(parPath),
+			hash {},
+			access_time(parSt.atime),
+			modify_time(parSt.mtime),
+			//file_size(0),
+			level(static_cast<uint16_t>(parSt.level)),
+			is_dir(parSt.is_dir),
+			is_symlink(parSt.is_symlink)
+		{
+		}
+
+		FileEntry ( const FileEntry& ) = delete;
+		FileEntry ( FileEntry&& ) = default;
+		FileEntry& operator= ( const FileEntry& ) = delete;
+		FileEntry& operator= ( FileEntry&& ) = default;
+		bool operator< ( const FileEntry& parOther ) const;
+		bool operator== ( const FileEntry& ) const = delete;
+
+		std::string path;
+		HashType hash;
+		std::time_t access_time;
+		std::time_t modify_time;
+		uint64_t file_size;
+		uint16_t level;
+		bool is_dir;
+		bool is_symlink;
+	};
+
+	namespace {
+		typedef std::vector<FileEntry>::iterator FileEntryIt;
+
+		void hash_dir (FileEntryIt parEntry, FileEntryIt parBegin, FileEntryIt parEnd, const PathName& parCurrDir, std::function<void(std::size_t)> parNextItemCallback) {
+			assert(parEntry != parEnd);
+			assert(parEntry->is_dir);
+			FileEntry& curr_entry = *parEntry;
+			auto& curr_entry_it = parEntry;
+
+			//Build a blob with the hashes and filenames of every directory that
+			//is a direct child of current entry
+			{
+				std::vector<char> dir_blob;
+				auto it_entry = curr_entry_it;
+
+				while (
+					it_entry != parEnd and (
+						it_entry->level == curr_entry.level
+						or parCurrDir != PathName(it_entry->path).pop_right()
+					//and (not it_entry->is_dir or (it_entry->level <= curr_entry.level
+					//and parCurrDir != PathName(it_entry->path).pop_right()))
+				)) {
+					assert(it_entry->level >= curr_entry.level);
+					++it_entry;
+				}
+
+#if defined(INDEXER_VERBOSE)
+				std::cout << "Making initial hash for " << parCurrDir << "...\n";
+#endif
+				while (parEnd != it_entry and it_entry->level == curr_entry_it->level + 1 and parCurrDir == PathName(it_entry->path).pop_right()) {
+					PathName curr_subdir(it_entry->path);
+					if (it_entry->is_dir) {
+						hash_dir(it_entry, parBegin, parEnd, curr_subdir, parNextItemCallback);
+
+						std::string relpath = make_relative_path(parCurrDir, curr_subdir).path();
+						const auto old_size = dir_blob.size();
+						dir_blob.resize(old_size + sizeof(HashType) + relpath.size());
+						std::copy(it_entry->hash.byte_data, it_entry->hash.byte_data + sizeof(HashType), dir_blob.begin() + old_size);
+						std::copy(relpath.begin(), relpath.end(), dir_blob.begin() + old_size + sizeof(HashType));
+					}
+					else {
+						std::string relpath = make_relative_path(parCurrDir, curr_subdir).path();
+						const auto old_size = dir_blob.size();
+						dir_blob.resize(old_size + relpath.size());
+						std::copy(relpath.begin(), relpath.end(), dir_blob.begin() + old_size);
+					}
+					++it_entry;
+				}
+
+				tiger_data(dir_blob, curr_entry.hash);
+				curr_entry.file_size = 0;
+#if defined(INDEXER_VERBOSE)
+				std::cout << "Got intermediate hash for dir " << parCurrDir << ": " << tiger_to_string(curr_entry.hash) << '\n';
+#endif
+			}
+
+			//Now with the initial hash ready, let's start hashing files, if any
+			{
+				auto it_entry = curr_entry_it;
+				while (
+					it_entry != parEnd
+					and (it_entry->is_dir
+						or it_entry->level != curr_entry_it->level + 1
+						or PathName(it_entry->path).pop_right() != parCurrDir
+					)
+				) {
+					++it_entry;
+				}
+
+				while (it_entry != parEnd and not it_entry->is_dir and it_entry->level == curr_entry_it->level + 1 and PathName(it_entry->path).pop_right() == parCurrDir) {
+					assert(not it_entry->is_dir);
+#if defined(INDEXER_VERBOSE)
+					std::cout << "Hashing file " << it_entry->path << "...";
+#endif
+					parNextItemCallback(it_entry - parBegin);
+					tiger_file(it_entry->path, it_entry->hash, curr_entry_it->hash, it_entry->file_size);
+#if defined(INDEXER_VERBOSE)
+					std::cout << ' ' << tiger_to_string(it_entry->hash) << '\n';
+#endif
+					++it_entry;
+				}
+			}
+
+#if defined(INDEXER_VERBOSE)
+			std::cout << "Final hash for dir " << parCurrDir << " is " << tiger_to_string(curr_entry_it->hash) << '\n';
+#endif
+		}
+
+		template <bool FileTrue=true>
+		struct IsFile {
+			bool operator() ( const FileEntry& parEntry ) const { return parEntry.is_dir xor FileTrue; }
+		};
+	} //unnamed namespace
+
+	struct Indexer::LocalData {
+		typedef std::vector<FileEntry> PathList;
+
+		DinDBSettings db_settings;
+		PathList paths;
+#if defined(WITH_PROGRESS_FEEDBACK)
+		std::atomic<std::size_t> done_count;
+		std::atomic<std::size_t> processing_index;
+		std::condition_variable step_notify;
+#endif
+		std::size_t file_count;
+	};
+
+	bool FileEntry::operator< (const FileEntry& parOther) const {
+		const FileEntry& o = parOther;
+		return
+			(level < o.level)
+			or (level == o.level and is_dir and not o.is_dir)
+			or (level == o.level and is_dir == o.is_dir and path < o.path)
+
+			//sort by directory - parent first, children later
+			//(level == o.level and is_dir and not o.is_dir)
+			//or (level == o.level and is_dir == o.is_dir and path < o.path)
+			//or (level > o.level + 1)
+			//or (level + 1 == o.level and is_dir and not o.is_dir and path < o.path)
+			//or (level + 1 == o.level and is_dir and not o.is_dir and path == PathName(o.path).dirname())
+			//or (level == o.level + 1 and not (o.is_dir and not is_dir and o.path == PathName(path).dirname()))
+		;
+	}
+
+	Indexer::Indexer (const DinDBSettings& parDBSettings) :
+		m_local_data(new LocalData)
+	{
+#if !defined(NDEBUG)
+		//assert(FileEntry("/a/b/c", 3, true, false) < FileEntry("/a/b", 2, true, false));
+		//assert(FileEntry("/a/b/c", 3, true, false) < FileEntry("/a/b/c/file.txt", 4, false, false));
+		//assert(FileEntry("/a/b/c", 3, true, false) < FileEntry("/a/b/c/file.c", 4, false, false));
+		//assert(FileEntry("/a/b/c/d", 4, true, false) < FileEntry("/a/b", 2, true, false));
+		//assert(FileEntry("/a/b/c/d", 4, true, false) < FileEntry("/a/b/c", 3, true, false));
+		//assert(FileEntry("/a/b/c/1.txt", 4, true, false) < FileEntry("/a/b/c/2.txt", 4, true, false));
+		//assert(not (FileEntry("/a/b/file.txt", 3, false, false) < FileEntry("/a/b", 2, true, false)));
+		//assert(not (FileEntry("/a", 1, true, false) < FileEntry("/a/b", 2, true, false)));
+		//assert(not (FileEntry("/a/b/1.txt", 3, false, false) < FileEntry("/a/b/c/f.txt", 4, true, false)));
+		//assert(not (FileEntry("/a/b/c/file.c", 4, false, false) < FileEntry("/a/b/c", 3, true, false)));
+#endif
+#if defined(WITH_PROGRESS_FEEDBACK)
+		m_local_data->done_count = 0;
+		m_local_data->processing_index = 0;
+#endif
+		m_local_data->file_count = 0;
+		m_local_data->db_settings = parDBSettings;
+	}
+
+	Indexer::~Indexer() {
+	}
+
+	std::size_t Indexer::total_items() const {
+		return m_local_data->file_count;
+	}
+
+#if defined(WITH_PROGRESS_FEEDBACK)
+	std::size_t Indexer::processed_items() const {
+		return m_local_data->done_count;
+	}
+#endif
+
+	void Indexer::calculate_hash() {
+		PathName base_path(m_local_data->paths.front().path);
+		std::sort(m_local_data->paths.begin(), m_local_data->paths.end());
+#if defined(INDEXER_VERBOSE)
+		for (auto& itm : m_local_data->paths) {
+			itm.hash.part_a = 1;
+			itm.hash.part_b = 1;
+			itm.hash.part_c = 1;
+
+			if (itm.is_dir)
+				std::cout << "(D) ";
+			else
+				std::cout << "(F) ";
+			std::cout << itm.path << " (" << itm.level << ")\n";
+		}
+		std::cout << "-----------------------------------------------------\n";
+#endif
+
+#if defined(WITH_PROGRESS_FEEDBACK)
+		m_local_data->done_count = 0;
+		hash_dir(
+			m_local_data->paths.begin(),
+			m_local_data->paths.begin(),
+			m_local_data->paths.end(),
+			base_path,
+			[=](std::size_t parNext) {
+				++m_local_data->done_count;
+				m_local_data->processing_index = parNext;
+				m_local_data->step_notify.notify_all();
+			}
+		);
+
+		assert(m_local_data->done_count == m_local_data->file_count);
+#else
+		hash_dir(
+			m_local_data->paths.begin(),
+			m_local_data->paths.begin(),
+			m_local_data->paths.end(),
+			base_path,
+			[](std::size_t) {}
+		);
+#endif
+
+#if defined(INDEXER_VERBOSE)
+		for (const auto& itm : m_local_data->paths) {
+			assert(not (1 == itm.hash.part_a and 1 == itm.hash.part_b and 1 == itm.hash.part_c));
+		}
+#endif
+	}
+
+	bool Indexer::add_to_db (const std::string& parSetName, char parType, bool parForce) const {
+#if defined(WITH_PROGRESS_FEEDBACK)
+		assert(m_local_data->done_count == m_local_data->file_count);
+#endif
+
+		if (not parForce) {
+			std::string first_hash(tiger_to_string(m_local_data->paths.front().hash, true));
+			FileRecordData itm;
+			SetRecordDataFull set;
+			const bool already_in_db = read_from_db(itm, set, m_local_data->db_settings, std::move(first_hash));
+			if (already_in_db) {
+				return false;
+			}
+		}
+
+		PathName base_path(m_local_data->paths.front().path);
+		std::vector<FileRecordData> data;
+		data.reserve(m_local_data->paths.size());
+		for (const auto& itm : m_local_data->paths) {
+			data.push_back(FileRecordData {
+				make_relative_path(base_path, PathName(itm.path)).path(),
+				tiger_to_string(itm.hash),
+				itm.access_time,
+				itm.modify_time,
+				itm.level,
+				itm.file_size,
+				itm.is_dir,
+				itm.is_symlink
+			});
+		}
+
+		SetRecordData set_data {parSetName, parType};
+		write_to_db(m_local_data->db_settings, data, set_data);
+		return true;
+	}
+
+	bool Indexer::add_path (const char* parPath, const fastf::FileStats& parStats) {
+		m_local_data->paths.push_back(
+			FileEntry(parPath, parStats));
+		if (not parStats.is_dir) {
+			++m_local_data->file_count;
+		}
+		return true;
+	}
+
+#if defined(INDEXER_VERBOSE)
+	void Indexer::dump() const {
+		PathName base_path(m_local_data->paths.front().path);
+
+		std::cout << "---------------- FILE LIST ----------------\n";
+		for (const auto& cur_itm : m_local_data->paths) {
+			if (not cur_itm.is_dir) {
+				PathName cur_path(cur_itm.path);
+				std::cout << make_relative_path(base_path, cur_path).path() << '\n';
+			}
+		}
+		std::cout << "---------------- DIRECTORY LIST ----------------\n";
+		for (const auto& cur_itm : m_local_data->paths) {
+			if (cur_itm.is_dir) {
+				PathName cur_path(cur_itm.path);
+				std::cout << make_relative_path(base_path, cur_path).path() << '\n';
+			}
+		}
+	}
+#endif
+
+	bool Indexer::empty() const {
+		return m_local_data->paths.size() < 2;
+	}
+
+#if defined(WITH_PROGRESS_FEEDBACK)
+	std::condition_variable& Indexer::step_notify() {
+		return m_local_data->step_notify;
+	}
+#endif
+
+#if defined(WITH_PROGRESS_FEEDBACK)
+	std::string Indexer::current_item() const {
+		if (m_local_data->paths.empty() or 0 == m_local_data->processing_index)
+			return std::string();
+
+		PathName base_path(m_local_data->paths.front().path);
+		PathName ret_path(m_local_data->paths[m_local_data->processing_index].path);
+		return make_relative_path(base_path, ret_path).path();
+	}
+#endif
+
+	std::string Indexer::operator[] (std::size_t parIndex) const {
+		if (parIndex >= m_local_data->file_count) {
+			std::ostringstream oss;
+			oss << "Requested index " << parIndex << " is out of range: only " << m_local_data->file_count << " items are available";
+			throw std::out_of_range(oss.str());
+		}
+
+		auto it = boost::make_filter_iterator<IsFile<>>(m_local_data->paths.begin(), m_local_data->paths.end());
+		assert(not m_local_data->paths.empty());
+		std::advance(it, parIndex);
+		return make_relative_path(PathName(m_local_data->paths.front().path), PathName(it->path)).path();
+	}
+} //namespace din
--- a/src/scan/indexer.hpp
+++ b/src/scan/indexer.hpp
@ -0,0 +1,71 @@
+/* Copyright 2015, Michele Santullo
+ * This file is part of "dindexer".
+ *
+ * "dindexer" is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * "dindexer" is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with "dindexer".  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef idE555EF56730442C1ADDC7B2AE7A9340E
+#define idE555EF56730442C1ADDC7B2AE7A9340E
+
+#include <memory>
+#include <string>
+
+#if !defined(NDEBUG)
+#	define INDEXER_VERBOSE
+#endif
+
+#if defined(WITH_PROGRESS_FEEDBACK)
+namespace std {
+	class condition_variable;
+} //namespace std
+#endif
+
+namespace fastf {
+	struct FileStats;
+} //namespace fastf
+
+namespace din {
+	struct DinDBSettings;
+
+	class Indexer {
+	public:
+		explicit Indexer ( const DinDBSettings& parDBSettings );
+		Indexer ( Indexer&& ) = default;
+		Indexer ( const Indexer& ) = delete;
+		~Indexer ( void ) noexcept;
+
+		bool add_path ( const char* parPath, const fastf::FileStats& parStats );
+#if defined(INDEXER_VERBOSE)
+		void dump ( void ) const;
+#endif
+
+		std::size_t total_items ( void ) const;
+		std::string operator[] ( std::size_t parIndex ) const;
+#if defined(WITH_PROGRESS_FEEDBACK)
+		std::size_t processed_items ( void ) const;
+		std::string current_item ( void ) const;
+		std::condition_variable& step_notify ( void );
+#endif
+		void calculate_hash ( void );
+		bool add_to_db ( const std::string& parSetName, char parType, bool parForce=false ) const;
+		bool empty ( void ) const;
+
+	private:
+		struct LocalData;
+
+		std::unique_ptr<LocalData> m_local_data;
+	};
+} //namespace din
+
+#endif
--- a/src/scan/main.cpp
+++ b/src/scan/main.cpp
@ -0,0 +1,163 @@
+/* Copyright 2015, Michele Santullo
+ * This file is part of "dindexer".
+ *
+ * "dindexer" is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * "dindexer" is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with "dindexer".  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#if defined(WITH_PROGRESS_FEEDBACK) && !defined(NDEBUG)
+#	undef WITH_PROGRESS_FEEDBACK
+#endif
+
+#include <iostream>
+#include <iomanip>
+#include <ciso646>
+#include <sstream>
+#include <algorithm>
+#include <iterator>
+#if defined(WITH_PROGRESS_FEEDBACK)
+#	include <thread>
+#	include <mutex>
+#	include <condition_variable>
+#endif
+#include <wordexp.h>
+#include "dindexerConfig.h"
+#include "filesearcher.hpp"
+#include "indexer.hpp"
+#include "settings.hpp"
+#include "commandline.hpp"
+
+namespace {
+	void run_hash_calculation ( din::Indexer& parIndexer, bool parShowProgress );
+	std::string expand ( const char* parString );
+} //unnamed namespace
+
+int main (int parArgc, char* parArgv[]) {
+	using std::placeholders::_1;
+	using std::placeholders::_2;
+	using boost::program_options::variables_map;
+
+	variables_map vm;
+	try {
+		if (din::parse_commandline(parArgc, parArgv, vm)) {
+			return 0;
+		}
+	}
+	catch (const std::invalid_argument& err) {
+		std::cerr << err.what() << "\nUse --help for help" << std::endl;
+		return 2;
+	}
+	const std::string search_path(vm["search-path"].as<std::string>());
+#if defined(WITH_PROGRESS_FEEDBACK)
+	const bool verbose = (0 == vm.count("quiet"));
+#else
+	const bool verbose = false;
+#endif
+
+	din::DinDBSettings settings;
+	{
+		const bool loaded = din::load_settings(expand(CONFIG_FILE_PATH), settings);
+		if (not loaded) {
+			std::cerr << "Can't load settings from dindexerrc.yml, quitting\n";
+			return 1;
+		}
+	}
+
+	din::Indexer indexer(settings);
+	fastf::FileSearcher searcher(search_path);
+	fastf::FileSearcher::ConstCharVecType ext, ignore;
+	searcher.SetFollowSymlinks(true);
+	searcher.SetCallback(fastf::FileSearcher::CallbackType(std::bind(&din::Indexer::add_path, &indexer, _1, _2)));
+	searcher.Search(ext, ignore);
+	if (verbose) {
+		std::cout << "Fetching items list...\n";
+	}
+
+	if (indexer.empty()) {
+		std::cerr << "Nothing found at the given location, quitting\n";
+		return 1;
+	}
+	else {
+		run_hash_calculation(indexer, verbose);
+		if (verbose) {
+			std::cout << "Writing to database...\n";
+		}
+		if (not indexer.add_to_db(vm["setname"].as<std::string>(), vm["type"].as<char>())) {
+			std::cerr << "Not written to DB, likely because a set with the same hash already exists\n";
+		}
+	}
+	return 0;
+}
+
+namespace {
+	void run_hash_calculation (din::Indexer& parIndexer, bool parShowProgress) {
+		if (parIndexer.empty()) {
+			return;
+		}
+
+#if !defined(WITH_PROGRESS_FEEDBACK)
+		parShowProgress = false;
+#endif
+		if (not parShowProgress) {
+			parIndexer.calculate_hash();
+		}
+#if defined(WITH_PROGRESS_FEEDBACK)
+		else {
+			typedef std::ostream_iterator<char> cout_iterator;
+
+			std::cout << "Processing";
+			std::cout.flush();
+			const auto total_items = parIndexer.total_items();
+			std::thread hash_thread(&din::Indexer::calculate_hash, &parIndexer);
+			std::mutex progress_print;
+			std::size_t clear_size = 0;
+			const auto digit_count = static_cast<std::size_t>(std::log10(static_cast<double>(total_items))) + 1;
+			do {
+				std::unique_lock<std::mutex> lk(progress_print);
+				parIndexer.step_notify().wait(lk);
+				std::cout << '\r';
+				std::fill_n(cout_iterator(std::cout), clear_size, ' ');
+				std::cout << '\r';
+				{
+					std::ostringstream oss;
+					const auto item_index = std::min(total_items - 1, parIndexer.processed_items());
+					oss << "Processing file "
+						<< std::setw(digit_count) << std::setfill(' ') << (item_index + 1)
+						<< " of " << total_items << " \"" << parIndexer.current_item() << '"';
+					const auto msg(oss.str());
+					clear_size = msg.size();
+					std::cout << msg;
+					std::cout.flush();
+				}
+			} while (parIndexer.processed_items() != total_items);
+
+			hash_thread.join();
+			if (parIndexer.processed_items() > 0) {
+				std::cout << '\n';
+			}
+		}
+#endif
+	}
+
+	std::string expand (const char* parString) {
+		wordexp_t p;
+		wordexp(parString, &p, 0);
+		char** w = p.we_wordv;
+		std::ostringstream oss;
+		for (std::size_t z = 0; z < p.we_wordc; ++z) {
+			oss << w[z];
+		}
+		wordfree(&p);
+		return oss.str();
+	}
+} //unnamed namespace
--- a/src/scan/pathname.cpp
+++ b/src/scan/pathname.cpp
@ -0,0 +1,220 @@
+/* Copyright 2015, Michele Santullo
+ * This file is part of "dindexer".
+ *
+ * "dindexer" is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * "dindexer" is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with "dindexer".  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "pathname.hpp"
+#include <algorithm>
+#include <functional>
+#include <ciso646>
+#include <iostream>
+
+namespace din {
+	const std::string PathName::m_empty_str("");
+
+	namespace {
+		std::string get_joint_atoms ( const StringPool<char>& parPool, bool parAbs, std::size_t parSkipRight=0 );
+
+		bool ptr_between (const char* parPtr, const char* parBeg, const char* parEnd) {
+			std::less<const char*> less;
+			std::less_equal<const char*> lesseq;
+
+			return lesseq(parBeg, parPtr) and less(parPtr, parEnd);
+		}
+
+		std::size_t count_grouped (boost::string_ref parIn, char parDelim) {
+			std::size_t retval = 0;
+			char prev = '\0';
+			for (auto c : parIn) {
+				retval += (parDelim == c and prev != parDelim ? 1 : 0);
+				prev = c;
+			}
+			return retval;
+		}
+
+		void split_path (std::vector<boost::string_ref>* parOut, boost::string_ref parPath) {
+			auto from = parPath.begin();
+			boost::string_ref::const_iterator next;
+			const auto end = parPath.end();
+			const auto beg = parPath.begin();
+			while (end != (next = std::find(from, end, '/'))) {
+				if (next != from) {
+					parOut->push_back(parPath.substr(from - beg, next - from));
+					from = next;
+				}
+				++from;
+			}
+			if (next != from) {
+				parOut->push_back(parPath.substr(from - beg, next - from));
+			}
+		}
+
+		std::string get_joint_atoms (const StringPool<char>& parPool, bool parAbs, std::size_t parSkipRight) {
+			const auto orig_atom_count = parPool.size();
+			const auto atom_count = (parSkipRight >= orig_atom_count ? 0 : orig_atom_count - parSkipRight);
+			if (not atom_count) {
+				if (parPool.empty() and parAbs) {
+					return std::string("/");
+				}
+				else {
+					return std::string("");
+				}
+			}
+
+			std::size_t reserve = (parAbs ? 1 : 0);
+			for (std::size_t z = 0; z < atom_count; ++z) {
+				reserve += parPool[z].size();
+			}
+			reserve += atom_count - 1;
+
+			std::string out;
+			out.reserve(reserve);
+			const char* slash = (parAbs ? "/" : "");
+			for (std::size_t z = 0; z < atom_count; ++z) {
+				out += slash;
+				const auto& curr_itm = parPool[z];
+				out.insert(out.end(), curr_itm.begin(), curr_itm.end());
+				slash = "/";
+			}
+			return std::move(out);
+		}
+	} //unnamed namespace
+
+	PathName::PathName (boost::string_ref parPath) {
+		if (not parPath.empty()) {
+			m_absolute = ('/' == parPath.front());
+			std::string path(parPath.begin(), parPath.end());
+
+			const auto count = count_grouped(path, '/');
+			const std::size_t trailing = (path.back() == '/' ? 1 : 0);
+			const std::size_t absolute = (m_absolute ? 1 : 0);
+			const auto res = count + 1 - trailing - absolute;
+			std::vector<boost::string_ref> atoms;
+			atoms.reserve(res);
+			split_path(&atoms, path);
+			m_pool.insert(atoms, &path);
+		}
+		else {
+			m_original_path = nullptr;
+			m_absolute = false;
+		}
+	}
+
+	std::string PathName::path() const {
+		return get_joint_atoms(m_pool, m_absolute);
+	}
+
+	void PathName::join (const PathName& parOther) {
+		m_pool.update(parOther.m_pool);
+	}
+
+	const boost::string_ref PathName::operator[] (std::size_t parIndex) const {
+		return *(m_pool.begin() + parIndex);
+	}
+
+	std::size_t PathName::atom_count ( void ) const {
+		return m_pool.size();
+	}
+
+	void PathName::join (const char* parOther) {
+		const std::string src(parOther);
+		const boost::string_ref ref(src);
+		m_pool.insert(ref, &src);
+	}
+
+	void PathName::join (boost::string_ref parOther, const std::string* parSource) {
+		m_pool.insert(parOther, parSource);
+	}
+
+	PathName make_relative_path (const PathName& parBasePath, const PathName& parOtherPath) {
+		if (not parBasePath.is_absolute() and parOtherPath.is_absolute()) {
+			return parOtherPath;
+		}
+
+		std::size_t common_atoms = 0;
+		{
+			const std::size_t shortest = std::min(parOtherPath.atom_count(), parBasePath.atom_count());
+			for (std::size_t z = 0; z < shortest; ++z) {
+				if (parOtherPath[z] == parBasePath[z]) {
+					++common_atoms;
+				}
+				else {
+					break;
+				}
+			}
+		}
+
+		PathName retval("");
+		const auto ellipses_count = parBasePath.atom_count() - common_atoms;
+		for (std::size_t z = 0; z < ellipses_count; ++z) {
+			retval.join("..");
+		}
+
+		const auto remaining_atoms = parOtherPath.atom_count() - common_atoms;
+		for (std::size_t z = 0; z < remaining_atoms; ++z) {
+			retval.join(parOtherPath[z + common_atoms], parOtherPath.get_stringref_source(z + common_atoms));
+		}
+		return std::move(retval);
+	}
+
+	const std::string* PathName::get_stringref_source (std::size_t parIndex) const {
+		return m_pool.get_stringref_source(parIndex);
+	}
+
+	std::string PathName::dirname() const {
+		if (this->atom_count() == 0)
+			return std::string();
+
+		return get_joint_atoms(m_pool, m_absolute, 1);
+	}
+
+	std::ostream& operator<< (std::ostream& parStream, const PathName& parPath) {
+		parStream << parPath.path();
+		return parStream;
+	}
+
+	PathName& PathName::pop_right() {
+		m_pool.pop();
+		return *this;
+	}
+
+	bool PathName::operator!= (const PathName& parOther) const {
+		const auto count = atom_count();
+		if (count != parOther.atom_count()) {
+			return true;
+		}
+
+		for (std::size_t z = 0; z < count; ++z) {
+			if ((*this)[z] != parOther[z]) {
+				return true;
+			}
+		}
+		return false;
+	}
+
+	bool PathName::operator== (const PathName& parOther) const {
+		const auto count = atom_count();
+		if (count != parOther.atom_count()) {
+			return false;
+		}
+
+		for (std::size_t z = 0; z < count; ++z) {
+			if ((*this)[z] != parOther[z]) {
+				return false;
+			}
+		}
+		return true;
+	}
+} //namespace din
--- a/src/scan/pathname.hpp
+++ b/src/scan/pathname.hpp
@ -0,0 +1,62 @@
+/* Copyright 2015, Michele Santullo
+ * This file is part of "dindexer".
+ *
+ * "dindexer" is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * "dindexer" is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with "dindexer".  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef id279E04E31E2C4D98B8C902781A3CE018
+#define id279E04E31E2C4D98B8C902781A3CE018
+
+#include "stringpool.hpp"
+#include <vector>
+#include <string>
+#include <boost/utility/string_ref.hpp>
+#include <map>
+#include <iostream>
+
+namespace din {
+	class PathName {
+	public:
+		PathName ( PathName&& ) = default;
+		PathName ( const PathName& ) = default;
+		explicit PathName ( boost::string_ref parPath );
+		~PathName ( void ) noexcept = default;
+
+		bool is_absolute ( void ) const { return m_absolute; }
+		std::string path ( void ) const;
+		const std::string& original_path ( void ) const { return (m_original_path ? *m_original_path : m_empty_str); }
+		std::size_t atom_count ( void ) const;
+		const boost::string_ref operator[] ( std::size_t parIndex ) const;
+		void join ( const PathName& parOther );
+		void join ( const char* parOther );
+		void join ( boost::string_ref parOther, const std::string* parSource );
+		const std::string* get_stringref_source ( std::size_t parIndex ) const;
+		std::string dirname ( void ) const;
+		PathName& pop_right ( void );
+		bool operator!= ( const PathName& parOther ) const;
+		bool operator== ( const PathName& parOther ) const;
+
+	private:
+		static const std::string m_empty_str;
+
+		StringPool<char> m_pool;
+		const std::string* m_original_path;
+		bool m_absolute;
+	};
+
+	PathName make_relative_path ( const PathName& parBasePath, const PathName& parOtherPath );
+	std::ostream& operator<< ( std::ostream& parStream, const PathName& parPath );
+} //namespace din
+
+#endif
--- a/src/scan/settings.cpp
+++ b/src/scan/settings.cpp
@ -0,0 +1,66 @@
+/* Copyright 2015, Michele Santullo
+ * This file is part of "dindexer".
+ *
+ * "dindexer" is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * "dindexer" is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with "dindexer".  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "settings.hpp"
+#include <yaml-cpp/yaml.h>
+#include <ciso646>
+
+namespace YAML {
+	template<>
+	struct convert<din::DinDBSettings> {
+		static Node encode (const din::DinDBSettings& parSettings) {
+			Node node;
+			node["address"] = parSettings.address;
+			node["username"] = parSettings.username;
+			node["password"] = parSettings.password;
+			node["port"] = parSettings.port;
+			node["dbname"] = parSettings.dbname;
+			return node;
+		}
+
+		static bool decode (const Node& parNode, din::DinDBSettings& parSettings) {
+			if (not parNode.IsMap() or parNode.size() != 5) {
+				return false;
+			}
+
+			parSettings.address = parNode["address"].as<std::string>();
+			parSettings.username = parNode["username"].as<std::string>();
+			parSettings.password = parNode["password"].as<std::string>();
+			parSettings.dbname = parNode["dbname"].as<std::string>();
+			parSettings.port = parNode["port"].as<uint16_t>();
+			return true;
+		}
+	};
+} //namespace YAML
+
+namespace din {
+	bool load_settings (const std::string& parPath, DinDBSettings& parOut) {
+		try {
+			auto settings = YAML::LoadFile(parPath);
+
+			if (settings["db_settings"]) {
+				parOut = settings["db_settings"].as<DinDBSettings>();
+				return true;
+			}
+		}
+		catch (const std::exception&) {
+			return false;
+		}
+
+		return false;
+	}
+} //namespace din
--- a/src/scan/settings.hpp
+++ b/src/scan/settings.hpp
@ -0,0 +1,39 @@
+/* Copyright 2015, Michele Santullo
+ * This file is part of "dindexer".
+ *
+ * "dindexer" is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * "dindexer" is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with "dindexer".  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef idDC29E3C667BD4793BA0644AE7DC5BD3F
+#define idDC29E3C667BD4793BA0644AE7DC5BD3F
+
+#include <string>
+#include <cstdint>
+
+namespace din {
+	struct DinDBSettings {
+		std::string address;
+		std::string username;
+		std::string password;
+		std::string dbname;
+		uint16_t port;
+	};
+
+	//struct DinSettings {
+	//};
+
+	bool load_settings ( const std::string& parPath, DinDBSettings& parOut );
+} //namespace din
+
+#endif
--- a/src/scan/stringpool.hpp
+++ b/src/scan/stringpool.hpp
@ -0,0 +1,70 @@
+/* Copyright 2015, Michele Santullo
+ * This file is part of "dindexer".
+ *
+ * "dindexer" is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * "dindexer" is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with "dindexer".  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef id9CF5E6FA7E334DF09559C2968C494CB9
+#define id9CF5E6FA7E334DF09559C2968C494CB9
+
+#include <string>
+#include <boost/utility/string_ref.hpp>
+#include <vector>
+#include <utility>
+#include <ciso646>
+#include <cstdint>
+#include <algorithm>
+#include <functional>
+#include <boost/iterator/transform_iterator.hpp>
+
+namespace din {
+	template <typename C, typename Str=std::basic_string<C>, typename StrRef=boost::basic_string_ref<C>>
+	class StringPool {
+		typedef std::pair<StrRef, const Str*> StringListPair;
+		typedef std::vector<std::pair<Str, std::size_t>> PoolType;
+		typedef std::vector<StringListPair> StringListType;
+		typedef std::function<StrRef(const StringListPair&)> FuncGetFirst;
+
+	public:
+		typedef C char_type;
+		typedef Str string_type;
+		typedef StrRef stringref_type;
+		typedef boost::transform_iterator<FuncGetFirst, typename StringListType::const_iterator> const_iterator;
+
+		StringPool ( void ) = default;
+		~StringPool ( void ) noexcept = default;
+
+		template <typename ItR>
+		void update ( ItR parDataBeg, ItR parDataEnd );
+		void update ( const StringPool& parOther );
+		void insert ( const std::vector<stringref_type>& parStrings, const string_type* parBaseString );
+		void insert ( stringref_type parString, const string_type* parBaseString );
+		const string_type* ptr_to_literal ( const char* parLiteral );
+		std::size_t size ( void ) const { return m_strings.size(); }
+		bool empty ( void ) const { return m_strings.empty(); }
+		const_iterator begin ( void ) const;
+		const_iterator end ( void ) const;
+		const string_type* get_stringref_source ( std::size_t parIndex ) const;
+		const stringref_type& operator[] ( std::size_t parIndex ) const;
+		void pop ( void );
+
+	private:
+		PoolType m_pool;
+		StringListType m_strings;
+	};
+} //namespace din
+
+#include "stringpool.inl"
+
+#endif
--- a/src/scan/stringpool.inl
+++ b/src/scan/stringpool.inl
@ -0,0 +1,140 @@
+/* Copyright 2015, Michele Santullo
+ * This file is part of "dindexer".
+ *
+ * "dindexer" is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * "dindexer" is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with "dindexer".  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+namespace din {
+	namespace implem {
+		template <typename StrRef>
+		std::pair<StrRef, bool> clone_ifp (const StrRef& parClone, StrRef parSource) {
+			const auto offset = parSource.find(parClone);
+			if (parSource.npos != offset) {
+				return std::make_pair(parSource.substr(offset, parClone.size()), true);
+			}
+			else {
+				return std::make_pair(parClone, false);
+			}
+		}
+	} //namespace implem
+
+	template <typename C, typename Str, typename StrRef>
+	auto StringPool<C, Str, StrRef>::ptr_to_literal (const char* parLiteral) -> const string_type* {
+		if (not parLiteral)
+			return nullptr;
+
+		for (const auto& p : m_pool) {
+			if (m_pool.first == parLiteral) {
+				return &m_pool.first;
+			}
+		}
+		return nullptr;
+	}
+
+	template <typename C, typename Str, typename StrRef>
+	template <typename ItR>
+	void StringPool<C, Str, StrRef>::update (ItR parDataBeg, ItR parDataEnd) {
+		typedef std::pair<string_type, std::size_t> PoolPair;
+
+		while (parDataBeg != parDataEnd) {
+			const auto& remote_str = parDataBeg->first;
+			const auto* remote_source_str = parDataBeg->second;
+			bool cloned = false;
+
+			for (auto& local_src : m_pool) {
+				const string_type& local_str = local_src.first;
+				auto& local_ref_count = local_src.second;
+
+				auto cloned_result = implem::clone_ifp<StrRef>(remote_str, local_str);
+				cloned = cloned_result.second;
+				const auto& cloned_str = cloned_result.first;
+				if (cloned) {
+					++local_ref_count;
+					m_strings.push_back(StringListPair(cloned_str, &local_str));
+					break;
+				}
+			}
+
+			if (not cloned) {
+				m_pool.push_back(PoolPair(*remote_source_str, static_cast<std::size_t>(1)));
+				const auto offset = remote_str.data() - remote_source_str->data();
+				m_strings.push_back(StringListPair(stringref_type(m_pool.back().first).substr(offset, remote_str.size()), &m_pool.back().first));
+			}
+			++parDataBeg;
+		}
+	}
+
+	template <typename C, typename Str, typename StrRef>
+	void StringPool<C, Str, StrRef>::update (const StringPool& parOther) {
+		this->update(parOther.m_strings.begin(), parOther.m_strings.end());
+	}
+
+	template <typename C, typename Str, typename StrRef>
+	auto StringPool<C, Str, StrRef>::begin() const -> const_iterator {
+		return const_iterator(m_strings.cbegin(), [](const StringListPair& parItm) { return parItm.first; });
+	}
+
+	template <typename C, typename Str, typename StrRef>
+	auto StringPool<C, Str, StrRef>::end() const -> const_iterator {
+		return const_iterator(m_strings.cend(), [](const StringListPair& parItm) { return parItm.first; });
+	}
+
+	template <typename C, typename Str, typename StrRef>
+	void StringPool<C, Str, StrRef>::insert (const std::vector<stringref_type>& parStrings, const string_type* parBaseString) {
+		StringListType dummy;
+		dummy.reserve(parStrings.size());
+		for (const auto& itm : parStrings) {
+			dummy.push_back(StringListPair(itm, parBaseString));
+		}
+		this->update(dummy.begin(), dummy.end());
+	}
+
+	template <typename C, typename Str, typename StrRef>
+	void StringPool<C, Str, StrRef>::insert (stringref_type parString, const string_type* parBaseString) {
+		StringListType dummy;
+		dummy.reserve(1);
+		dummy.push_back(StringListPair(parString, parBaseString));
+		this->update(dummy.begin(), dummy.end());
+	}
+
+	template <typename C, typename Str, typename StrRef>
+	auto StringPool<C, Str, StrRef>::get_stringref_source (std::size_t parIndex) const -> const string_type* {
+		return m_strings[parIndex].second;
+	}
+
+	template <typename C, typename Str, typename StrRef>
+	auto StringPool<C, Str, StrRef>::operator[] (std::size_t parIndex) const -> const stringref_type& {
+		return m_strings[parIndex].first;
+	}
+
+	template <typename C, typename Str, typename StrRef>
+	void StringPool<C, Str, StrRef>::pop() {
+		if (m_strings.empty()) {
+			return;
+		}
+
+		for (auto z = m_pool.size(); z > 0; --z) {
+			auto& pool_itm = m_pool[z - 1];
+			if (&pool_itm.first == m_strings.back().second) {
+				m_strings.resize(m_strings.size() - 1);
+				--pool_itm.second;
+				if (0 == pool_itm.second) {
+					m_pool.erase(m_pool.begin() + (z - 1));
+				}
+				break;
+			}
+		}
+		return;
+	}
+} //namespace din
--- a/src/scan/tiger.c
+++ b/src/scan/tiger.c
--- a/src/scan/tiger.cpp
+++ b/src/scan/tiger.cpp
@ -0,0 +1,128 @@
+/* Copyright 2015, Michele Santullo
+ * This file is part of "dindexer".
+ *
+ * "dindexer" is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * "dindexer" is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with "dindexer".  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "tiger.hpp"
+#include <fstream>
+#include <cstdint>
+#include <memory>
+#include <cassert>
+#include <algorithm>
+#include <utility>
+#include <sstream>
+#include <iomanip>
+
+#if defined(__SSE2__)
+extern "C" void tiger_sse2_chunk ( const char* parStr1, const char* parStr2, uint64_t parLength, uint64_t parRes1[3], uint64_t parRes2[3] );
+extern "C" void tiger_sse2_last_chunk ( const char* parStr1, const char* parStr2, uint64_t parLength, uint64_t parRealLength1, uint64_t parRealLength2, uint64_t parRes1[3], uint64_t parRes2[3], char parPadding );
+extern "C" void tiger ( const char* parStr, uint64_t parLength, uint64_t parHash[3], char parPadding );
+
+#else
+#	error "Not implemented without SSE2"
+#endif
+
+namespace din {
+	namespace {
+		const uint32_t g_buff_size = 1024 * 8;
+		const char g_tiger_padding = 0x80; //0x01 for V1
+
+		uint64_t swap_long (uint64_t parNum) {
+			parNum = (parNum & 0x00000000FFFFFFFF) << 32 | (parNum & 0xFFFFFFFF00000000) >> 32;
+			parNum = (parNum & 0x0000FFFF0000FFFF) << 16 | (parNum & 0xFFFF0000FFFF0000) >> 16;
+			parNum = (parNum & 0x00FF00FF00FF00FF) << 8  | (parNum & 0xFF00FF00FF00FF00) >> 8;
+			return parNum;
+		}
+	} //unnamed namespace
+
+	void tiger_init_hash (TigerHash& parHash) {
+		parHash.part_a = 0x0123456789ABCDEFULL;
+		parHash.part_b = 0xFEDCBA9876543210ULL;
+		parHash.part_c = 0xF096A5B4C3B2E187ULL;
+	}
+
+	void tiger_file (const std::string& parPath, TigerHash& parHashFile, TigerHash& parHashDir, uint64_t& parSizeOut) {
+		typedef decltype(std::declval<std::ifstream>().tellg()) FileSizeType;
+		tiger_init_hash(parHashFile);
+
+		std::ifstream src(parPath, std::ios::binary);
+		src.seekg(0, std::ios_base::end);
+		const auto file_size = src.tellg();
+		src.seekg(0, std::ios_base::beg);
+
+		const FileSizeType hash_size = (sizeof(TigerHash) + 63) & -64;
+		const uint32_t buffsize = static_cast<uint32_t>(std::max(hash_size, std::min<FileSizeType>(file_size, g_buff_size)));
+		std::unique_ptr<char[]> buff(new char[63 + buffsize]);
+		char* const buff_ptr = reinterpret_cast<char*>(reinterpret_cast<std::intptr_t>(buff.get() + 63) & (-64));
+		assert(buff_ptr >= buff.get() and buff_ptr + buffsize <= buff.get() + 63 + buffsize);
+
+		//Use the initial value of the dir's hash as if it was part of the data to hash and start
+		//by processing that value. Hash is reset to the initial value before the call to tiger.
+		{
+			std::copy(parHashDir.byte_data, parHashDir.byte_data + sizeof(parHashDir), buff_ptr);
+			std::fill(buff_ptr + sizeof(parHashDir), buff_ptr + hash_size, 0);
+			TigerHash dummy = {};
+			tiger_init_hash(parHashDir);
+			tiger_sse2_chunk(buff_ptr, buff_ptr, hash_size, dummy.data, parHashDir.data);
+		}
+
+		auto remaining = file_size;
+		while (remaining > buffsize) {
+			assert(buffsize >= sizeof(uint64_t) * 3);
+			assert(buffsize == (buffsize & -64));
+			remaining -= buffsize;
+			src.read(buff_ptr, buffsize);
+			tiger_sse2_chunk(buff_ptr, buff_ptr, buffsize, parHashFile.data, parHashDir.data);
+		}
+
+		{
+			assert(remaining <= buffsize);
+			src.read(buff_ptr, remaining);
+			const auto aligned_size = remaining & -64;
+			if (aligned_size) {
+				tiger_sse2_chunk(buff_ptr, buff_ptr, aligned_size, parHashFile.data, parHashDir.data);
+			}
+
+			//Remember to pass the augmented data size for the second reallength value: we passed the initial
+			//dir's hash value (64 bytes) as if they were part of the data.
+			tiger_sse2_last_chunk(buff_ptr + aligned_size, buff_ptr + aligned_size, remaining - aligned_size, file_size, file_size + hash_size, parHashFile.data, parHashDir.data, g_tiger_padding);
+		}
+
+		parSizeOut = static_cast<uint64_t>(file_size);
+	}
+
+	std::string tiger_to_string (const TigerHash& parHash, bool parUpcase) {
+		std::ostringstream oss;
+		if (parUpcase) {
+			oss << std::uppercase;
+		}
+		oss << std::hex << std::setfill('0') << std::setw(2 * sizeof(uint64_t))
+			<< swap_long(parHash.part_a)
+			<< std::hex << std::setfill('0') << std::setw(2 * sizeof(uint64_t))
+			<< swap_long(parHash.part_b)
+			<< std::hex << std::setfill('0') << std::setw(2 * sizeof(uint64_t))
+			<< swap_long(parHash.part_c)
+		;
+		return oss.str();
+	}
+
+	void tiger_data (const std::string& parData, TigerHash& parHash) {
+		tiger (parData.data(), parData.size(), parHash.data, g_tiger_padding);
+	}
+
+	void tiger_data (const std::vector<char>& parData, TigerHash& parHash) {
+		tiger (parData.data(), parData.size(), parHash.data, g_tiger_padding);
+	}
+} //namespace din
--- a/src/scan/tiger.h
+++ b/src/scan/tiger.h
@ -0,0 +1,125 @@
+/**
+ * Copyright (c) 2012 Francisco Blas Izquierdo Riera (klondike)
+ * The Tiger algorithm was written by Eli Biham and Ross Anderson and is
+ * available on the official Tiger algorithm page.
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ *    the algorithm authorsip notice, this list of conditions and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ * 4. If this license is not appropriate for you please write me at
+ *    klondike ( a t ) klondike ( d o t ) es to negotiate another license.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
+ * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ **/
+
+/**
+ * These are some implementations of tiger made without looking at the original
+ * reference code to ensure the resulting code can be published under a free
+ * license. The paper was looked though to know how did tiger work.
+ */
+
+/** Implementation details:
+ * * Here we assume char and unsigned char have size 1. If thats not the case in
+ *     your compiler you may want to replace them by a type that does
+ */
+
+#ifndef TIGER_H
+#define TIGER_H 1
+#if !defined(_MSC_VER) || (_MSC_VER >= 1600)
+#include <stdint.h>
+#else
+
+typedef __int32 int32_t;
+typedef unsigned __int32 uint32_t;
+typedef __int64 int64_t;
+typedef unsigned __int64 uint64_t;
+
+#endif
+
+#if _M_IX86_FP >= 2
+#define __SSE2__
+#endif
+
+#ifdef __linux
+#include <endian.h>
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+#define IS_LITTLE_ENDIAN
+#elif  __BYTE_ORDER == __BIG_ENDIAN
+#define USE_BIG_ENDIAN
+#elif  __BYTE_ORDER == __PDP_ENDIAN
+#error "If you feel like writting code for PDP endianess go ahead, I'm not doing that"
+#else
+#error "Unknown endianess"
+#endif
+#else
+//Assume little endian if you know how to detect endianism well on other compilers state it.
+#define IS_LITTLE_ENDIAN
+#endif
+
+#if defined(_WIN64) || defined(__x86_64__) || defined(__amd64__)
+#define HASX64
+#endif
+
+
+/** A word in the tiger hash, 64 bits **/
+typedef uint64_t t_word;
+
+/** This one is provided as a commodity for people wanting an easy way to declare result variables **/
+typedef t_word t_res[3];
+
+/** Partial calculation as used by tigerp1 and tigerp2 **/
+typedef struct {
+    t_res h; // Hash status
+    char r[128]; // SALT
+    t_word n; // Number of characters of r used
+    t_word hs; // Amount of total data hashed
+} t_pres;
+
+/** This one is provided as a commodity for people wanting an easy way to declare block variables **/
+typedef t_word t_block[8];
+
+/** Standard tiger calculation, put your string in str and the string length on length and get the result on res **/
+void tiger(const char *str, t_word length, t_res res, char pad);
+/** Similar to tiger but interleaving accesses to both equally sized strings to reduce overhead and pipeline stalls you get the result of str1 on res1 and the one of str2 on res2 **/
+void tiger_2(const char *str1, const char *str2, t_word length, t_res res1, t_res res2);
+#ifdef __SSE2__
+/** This is equivalent to tiger_2 but uses SSE2 for the key schduling making it faster **/
+void tiger_sse2(const char *str1, const char *str2, t_word length, t_res res1, t_res res2, char pad);
+#endif
+/** This function is optimized for use on TTHs just send the two concatenated hashes and you will get back the hash with a prepended 0x01 **/
+void tiger_49(const char *str, t_res res);
+/** This function is optimized for use on TTHs just send the 1024 sized block and you will get back the hash with a prepended 0x00 **/
+void tiger_1025(const char *str, t_res res);
+/** Interleaved version of tiger_49 you insert two hashes and get back two results **/
+void tiger_2_49(const char *str1, const char *str2, t_res res1, t_res res2);
+/** Interleaved version of tiger_1025 you insert two hashes and get back two results **/
+void tiger_2_1025(const char *str1, const char *str2, t_res res1, t_res res2);
+#ifdef __SSE2__
+/** SSE2 version of tiger_49 you insert two hashes and get back two results **/
+void tiger_sse2_49(const char *str1, const char *str2, t_res res1, t_res res2);
+/** SSE2 version of tiger_1025 you insert two hashes and get back two results **/
+void tiger_sse2_1025(const char *str1, const char *str2, t_res res1, t_res res2);
+#endif
+/** First stage of partial tiger calculation to improve password security during storage **/
+void tigerp1(const char *password, t_word length, const char *salt, t_pres *pres);
+/** Second stage of partial tiger calculation **/
+void tigerp2(const t_pres *pres, const char *salt, t_word length, t_res res);
+
+
+#endif
--- a/src/scan/tiger.hpp
+++ b/src/scan/tiger.hpp
@ -0,0 +1,49 @@
+/* Copyright 2015, Michele Santullo
+ * This file is part of "dindexer".
+ *
+ * "dindexer" is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * "dindexer" is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with "dindexer".  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef idBE93AF97FA4343ECA2BC8FB1FD3E5E60
+#define idBE93AF97FA4343ECA2BC8FB1FD3E5E60
+
+#include <cstdint>
+#include <string>
+#include <vector>
+
+namespace din {
+	struct TigerHash {
+		TigerHash ( void ) = default;
+
+		union {
+			struct {
+				uint64_t part_a;
+				uint64_t part_b;
+				uint64_t part_c;
+			};
+			uint64_t data[3];
+			uint8_t byte_data[sizeof(uint64_t) * 3];
+		};
+	};
+
+	static_assert(sizeof(TigerHash) == 24, "Wrong struct size");
+
+	void tiger_file ( const std::string& parPath, TigerHash& parHashFile, TigerHash& parHashDir, uint64_t& parSizeOut );
+	void tiger_init_hash ( TigerHash& parHash );
+	std::string tiger_to_string ( const TigerHash& parHash, bool parUpcase=false );
+	void tiger_data ( const std::string& parData, TigerHash& parHash );
+	void tiger_data ( const std::vector<char>& parData, TigerHash& parHash );
+} //namespace din
+
+#endif