Bugfix - don't store absolute paths to files in the DB.

This was introduced in a previous commit.
2025-08-21 15:50:50 +00:00 · 2016-01-11 12:46:06 +00:00 · 2016-01-11 12:46:06 +00:00 · fa12bb2d74
commit fa12bb2d74
parent 3f48d00ad9
3 changed files with 50 additions and 25 deletions
--- a/src/machinery/indexer.cpp
+++ b/src/machinery/indexer.cpp
@ -1,4 +1,4 @@
-/* Copyright 2015, Michele Santullo
+/* Copyright 2016, Michele Santullo
 * This file is part of "dindexer".
 *
 * "dindexer" is free software: you can redistribute it and/or modify
@ -62,7 +62,7 @@ namespace mchlib {
 				while (
 					it_entry != parEnd and (
 						it_entry->level == curr_entry.level
-						or parCurrDir != PathName(it_entry->path).pop_right()
+						or parCurrDir != PathName(it_entry->abs_path).pop_right()
 					//and (not it_entry->is_dir or (it_entry->level <= curr_entry.level
 					//and parCurrDir != PathName(it_entry->path).pop_right()))
 				)) {
@ -73,9 +73,9 @@ namespace mchlib {
 #if defined(INDEXER_VERBOSE)
 				std::cout << "Making initial hash for " << parCurrDir << "...\n";
 #endif
-				curr_entry.mime_full = parMime.analyze(it_entry->path);
-				while (parEnd != it_entry and it_entry->level == curr_entry_it->level + 1 and parCurrDir == PathName(it_entry->path).pop_right()) {
-					PathName curr_subdir(it_entry->path);
+				curr_entry.mime_full = parMime.analyze(it_entry->abs_path);
+				while (parEnd != it_entry and it_entry->level == curr_entry_it->level + 1 and parCurrDir == PathName(it_entry->abs_path).pop_right()) {
+					PathName curr_subdir(it_entry->abs_path);
 					if (it_entry->is_directory) {
 						hash_dir(it_entry, parBegin, parEnd, curr_subdir, parNextItemCallback, parIgnoreErrors, parMime);

@ -110,22 +110,22 @@ namespace mchlib {
 					it_entry != parEnd
 					and (it_entry->is_directory
 						or it_entry->level != curr_entry_it->level + 1
-						or PathName(it_entry->path).pop_right() != parCurrDir
+						or PathName(it_entry->abs_path).pop_right() != parCurrDir
 					)
 				) {
 					++it_entry;
 				}

-				while (it_entry != parEnd and not it_entry->is_directory and it_entry->level == curr_entry_it->level + 1 and PathName(it_entry->path).pop_right() == parCurrDir) {
+				while (it_entry != parEnd and not it_entry->is_directory and it_entry->level == curr_entry_it->level + 1 and PathName(it_entry->abs_path).pop_right() == parCurrDir) {
 					assert(not it_entry->is_directory);
 #if defined(INDEXER_VERBOSE)
-					std::cout << "Hashing file " << it_entry->path << "...";
+					std::cout << "Hashing file " << it_entry->abs_path << "...";
 #endif
 					parNextItemCallback(it_entry - parBegin);
 					try {
-						tiger_file(it_entry->path, it_entry->hash, curr_entry_it->hash, it_entry->size);
+						tiger_file(it_entry->abs_path, it_entry->hash, curr_entry_it->hash, it_entry->size);
 						it_entry->hash_valid = true;
-						it_entry->mime_full = parMime.analyze(it_entry->path);
+						it_entry->mime_full = parMime.analyze(it_entry->abs_path);
 						auto mime_pair = split_mime(it_entry->mime_full);
 						it_entry->mime_type = mime_pair.first;
 						it_entry->mime_charset = mime_pair.second;
@ -158,7 +158,7 @@ namespace mchlib {
 #endif
 			curr_entry_it->hash_valid = true;
 			{
-				curr_entry_it->mime_full = parMime.analyze(curr_entry_it->path);
+				curr_entry_it->mime_full = parMime.analyze(curr_entry_it->abs_path);
 				auto mime_pair = split_mime(curr_entry_it->mime_full);
 				curr_entry_it->mime_type = mime_pair.first;
 				curr_entry_it->mime_charset = mime_pair.second;
@ -187,7 +187,7 @@ namespace mchlib {
 			return
 				(l.level < r.level)
 				or (l.level == r.level and l.is_directory and not r.is_directory)
-				or (l.level == r.level and l.is_directory == r.is_directory and l.path < r.path)
+				or (l.level == r.level and l.is_directory == r.is_directory and l.abs_path < r.abs_path)

 				//sort by directory - parent first, children later
 				//(level == o.level and is_dir and not o.is_dir)
@ -198,6 +198,15 @@ namespace mchlib {
 				//or (level == o.level + 1 and not (o.is_dir and not is_dir and o.path == PathName(path).dirname()))
 			;
 		}
+
+		void populate_rel_paths (const PathName& parBase, std::vector<FileRecordData>& parItems) {
+			const std::size_t offset = parBase.str_path_size();
+			for (FileRecordData& itm : parItems) {
+				assert(itm.abs_path.size() >= offset);
+				itm.path = boost::string_ref(itm.abs_path).substr(offset);
+				assert(itm.path.data());
+			}
+		}
 	} //unnamed namespace

 	struct Indexer::LocalData {
@ -249,7 +258,7 @@ namespace mchlib {
 #endif

 	void Indexer::calculate_hash() {
-		PathName base_path(m_local_data->paths.front().path);
+		PathName base_path(m_local_data->paths.front().abs_path);
 		std::sort(m_local_data->paths.begin(), m_local_data->paths.end(), &file_record_data_lt);
 		MimeType mime;

@ -263,7 +272,7 @@ namespace mchlib {
 				std::cout << "(D) ";
 			else
 				std::cout << "(F) ";
-			std::cout << itm.path << " (" << itm.level << ")\n";
+			std::cout << itm.abs_path << " (" << itm.level << ")\n";
 		}
 		std::cout << "-----------------------------------------------------\n";
 #endif
@ -297,6 +306,8 @@ namespace mchlib {
 		);
 #endif

+		populate_rel_paths(base_path, m_local_data->paths);
+
 #if defined(INDEXER_VERBOSE)
 		for (const auto& itm : m_local_data->paths) {
 			assert(not (1 == itm.hash.part_a and 1 == itm.hash.part_b and 1 == itm.hash.part_c));
@ -315,19 +326,19 @@ namespace mchlib {

 #if defined(INDEXER_VERBOSE)
 	void Indexer::dump() const {
-		PathName base_path(m_local_data->paths.front().path);
+		PathName base_path(m_local_data->paths.front().abs_path);

 		std::cout << "---------------- FILE LIST ----------------\n";
 		for (const auto& cur_itm : m_local_data->paths) {
 			if (not cur_itm.is_directory) {
-				PathName cur_path(cur_itm.path);
+				PathName cur_path(cur_itm.abs_path);
 				std::cout << make_relative_path(base_path, cur_path).path() << '\n';
 			}
 		}
 		std::cout << "---------------- DIRECTORY LIST ----------------\n";
 		for (const auto& cur_itm : m_local_data->paths) {
 			if (cur_itm.is_directory) {
-				PathName cur_path(cur_itm.path);
+				PathName cur_path(cur_itm.abs_path);
 				std::cout << make_relative_path(base_path, cur_path).path() << '\n';
 			}
 		}
@ -349,8 +360,8 @@ namespace mchlib {
 		if (m_local_data->paths.empty() or 0 == m_local_data->processing_index)
 			return std::string();

-		PathName base_path(m_local_data->paths.front().path);
-		PathName ret_path(m_local_data->paths[m_local_data->processing_index].path);
+		PathName base_path(m_local_data->paths.front().abs_path);
+		PathName ret_path(m_local_data->paths[m_local_data->processing_index].abs_path);
 		return make_relative_path(base_path, ret_path).path();
 	}
 #endif
@ -365,7 +376,7 @@ namespace mchlib {
 		auto it = boost::make_filter_iterator<IsFile<>>(m_local_data->paths.begin(), m_local_data->paths.end());
 		assert(not m_local_data->paths.empty());
 		std::advance(it, parIndex);
-		return make_relative_path(PathName(m_local_data->paths.front().path), PathName(it->path)).path();
+		return make_relative_path(PathName(m_local_data->paths.front().abs_path), PathName(it->abs_path)).path();
 	}

 	void Indexer::ignore_read_errors (bool parIgnore) {
--- a/src/scan/dbbackend.cpp
+++ b/src/scan/dbbackend.cpp
@ -49,13 +49,18 @@ namespace din {
 			}

 			auto row = resultset[0];
-			parItem.path = row["path"];
+			parItem.abs_path = row["path"];
 			parItem.hash = parHash;
 			parItem.level = lexical_cast<uint16_t>(row["level"]);
 			parItem.size = lexical_cast<uint64_t>(row["size"]);
 			parItem.is_directory = (row["is_directory"] == "t" ? true : false);
 			parItem.is_symlink = (row["is_symlink"] == "t" ? true : false);
 			group_id = lexical_cast<uint32_t>(row["group_id"]);
+
+			if (parItem.abs_path.size() != 1 or parItem.abs_path != "/") {
+				parItem.abs_path = std::string("/") + parItem.abs_path;
+			}
+			parItem.path = boost::string_ref(parItem.abs_path).substr(1);
 		}

 		{
@ -101,6 +106,12 @@ namespace din {
 			new_group_id = lexical_cast<uint32_t>(id_res[0][0]);
 		}

+		//TODO: remove this empty_path part. This is a temporary fix needed to
+		//work around a bug in libpqtypes for which empty paths are inserted
+		//as null values in the db.
+		const char* empty_path = "/";
+		const auto empty_path_string = boost::string_ref(empty_path);
+
 		for (std::size_t z = 0; z < parData.size(); ++z) {
 			const std::string query = "INSERT INTO \"files\" (path, hash, "
 				"level, group_id, is_directory, is_symlink, size, "
@ -109,8 +120,9 @@ namespace din {
 				"($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13);";

 			const auto& itm = parData[z];
+			assert(itm.path.data());
 			conn.query(query,
-				itm.path,
+				(itm.path.empty() ? empty_path_string : itm.path),
 				tiger_to_string(itm.hash),
 				itm.level,
 				new_group_id,