diff --git a/cmake/Modules/FindMagic.cmake b/cmake/Modules/FindMagic.cmake new file mode 100644 index 0000000..7644331 --- /dev/null +++ b/cmake/Modules/FindMagic.cmake @@ -0,0 +1,21 @@ +# Copyright 2011 by Alex Turbov +# +# - Try to find the libmagic library. +# +# Once done this will define +# +# MAGIC_FOUND - system has libmagic +# MAGIC_INCLUDE_DIR - the libmagic include directory +# MAGIC_LIBRARIES - The libraries needed to use libmagic + +find_library(MAGIC_LIBRARIES magic) +find_path(MAGIC_INCLUDE_DIR magic.h) + +find_package_handle_standard_args(Magic DEFAULT_MSG MAGIC_LIBRARIES MAGIC_INCLUDE_DIR) + +# TODO Check for some symbols? + +# X-Chewy-RepoBase: https://raw.githubusercontent.com/mutanabbi/chewy-cmake-rep/master/ +# X-Chewy-Path: FindMagic.cmake +# X-Chewy-Version: 1.0 +# X-Chewy-Description: Find MIME-type detection library diff --git a/src/scan/CMakeLists.txt b/src/scan/CMakeLists.txt index f901b97..6c6dc10 100644 --- a/src/scan/CMakeLists.txt +++ b/src/scan/CMakeLists.txt @@ -8,6 +8,8 @@ if (DINDEXER_WITH_MEDIA_AUTODETECT) endif() endif() +find_package(Magic REQUIRED) + add_executable(${PROJECT_NAME} main.cpp filesearcher.cpp @@ -19,15 +21,20 @@ add_executable(${PROJECT_NAME} commandline.cpp discinfo.cpp mediatype.cpp + mimetype.cpp ) target_include_directories(${PROJECT_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/.. ) +target_include_directories(${PROJECT_NAME} + PRIVATE ${MAGIC_INCLUDE_DIR} +) target_link_libraries(${PROJECT_NAME} PRIVATE ${bare_name}-if PRIVATE ${bare_name}-common + PRIVATE ${MAGIC_LIBRARIES} ) string(REPLACE "${bare_name}-" "" ACTION_NAME "${PROJECT_NAME}") diff --git a/src/scan/indexer.cpp b/src/scan/indexer.cpp index bdaf256..b18d318 100644 --- a/src/scan/indexer.cpp +++ b/src/scan/indexer.cpp @@ -21,6 +21,7 @@ #include "dbbackend.hpp" #include "dindexer-common/settings.hpp" #include "filestats.hpp" +#include "mimetype.hpp" #include #include #include @@ -65,6 +66,7 @@ namespace din { bool operator== ( const FileEntry& ) const = delete; std::string path; + std::string mime; HashType hash; std::time_t access_time; std::time_t modify_time; @@ -78,7 +80,7 @@ namespace din { namespace { typedef std::vector::iterator FileEntryIt; - void hash_dir (FileEntryIt parEntry, FileEntryIt parBegin, FileEntryIt parEnd, const PathName& parCurrDir, std::function parNextItemCallback, bool parIgnoreErrors) { + void hash_dir (FileEntryIt parEntry, FileEntryIt parBegin, FileEntryIt parEnd, const PathName& parCurrDir, std::function parNextItemCallback, bool parIgnoreErrors, MimeType& parMime) { assert(parEntry != parEnd); assert(parEntry->is_dir); FileEntry& curr_entry = *parEntry; @@ -104,10 +106,11 @@ namespace din { #if defined(INDEXER_VERBOSE) std::cout << "Making initial hash for " << parCurrDir << "...\n"; #endif + curr_entry.mime = parMime.analyze(it_entry->path); while (parEnd != it_entry and it_entry->level == curr_entry_it->level + 1 and parCurrDir == PathName(it_entry->path).pop_right()) { PathName curr_subdir(it_entry->path); if (it_entry->is_dir) { - hash_dir(it_entry, parBegin, parEnd, curr_subdir, parNextItemCallback, parIgnoreErrors); + hash_dir(it_entry, parBegin, parEnd, curr_subdir, parNextItemCallback, parIgnoreErrors, parMime); std::string relpath = make_relative_path(parCurrDir, curr_subdir).path(); const auto old_size = dir_blob.size(); @@ -127,7 +130,9 @@ namespace din { tiger_data(dir_blob, curr_entry.hash); curr_entry.file_size = 0; #if defined(INDEXER_VERBOSE) - std::cout << "Got intermediate hash for dir " << parCurrDir << ": " << tiger_to_string(curr_entry.hash) << '\n'; + std::cout << "Got intermediate hash for dir " << parCurrDir << + ": " << tiger_to_string(curr_entry.hash) << + ' ' << curr_entry.mime << '\n'; #endif } @@ -152,6 +157,7 @@ namespace din { parNextItemCallback(it_entry - parBegin); try { tiger_file(it_entry->path, it_entry->hash, curr_entry_it->hash, it_entry->file_size); + it_entry->mime = parMime.analyze(it_entry->path); } catch (const std::ios_base::failure& e) { if (parIgnoreErrors) { @@ -164,7 +170,8 @@ namespace din { } #if defined(INDEXER_VERBOSE) - std::cout << ' ' << tiger_to_string(it_entry->hash) << '\n'; + std::cout << ' ' << tiger_to_string(it_entry->hash) << + ' ' << it_entry->mime << '\n'; #endif ++it_entry; } @@ -251,6 +258,8 @@ namespace din { void Indexer::calculate_hash() { PathName base_path(m_local_data->paths.front().path); std::sort(m_local_data->paths.begin(), m_local_data->paths.end()); + MimeType mime; + #if defined(INDEXER_VERBOSE) for (auto& itm : m_local_data->paths) { itm.hash.part_a = 1; @@ -278,7 +287,8 @@ namespace din { m_local_data->processing_index = parNext; m_local_data->step_notify.notify_all(); }, - m_local_data->ignore_read_errors + m_local_data->ignore_read_errors, + mime ); assert(m_local_data->done_count == m_local_data->file_count); @@ -289,7 +299,8 @@ namespace din { m_local_data->paths.end(), base_path, [](std::size_t) {}, - m_local_data->ignore_read_errors + m_local_data->ignore_read_errors, + mime ); #endif diff --git a/src/scan/mimetype.cpp b/src/scan/mimetype.cpp new file mode 100644 index 0000000..7bfa415 --- /dev/null +++ b/src/scan/mimetype.cpp @@ -0,0 +1,64 @@ +#include "mimetype.hpp" +#include +#include +#include +#include +#include + +namespace din { + using MagicCookie = std::unique_ptr; + + struct MimeType::LocalData { + LocalData ( void ) : + magic_cookie(nullptr, &magic_close) + { + } + + MagicCookie magic_cookie; + }; + + MimeType::MimeType() : + m_local_data(new LocalData) + { + auto raw_cookie = magic_open(MAGIC_SYMLINK | MAGIC_MIME | MAGIC_PRESERVE_ATIME | MAGIC_ERROR); + MagicCookie cookie(raw_cookie, &magic_close); + + if (raw_cookie) { + if (0 != magic_load(raw_cookie, nullptr)) { + std::ostringstream oss; + oss << "MimeType cannot load magic database: \"" << + magic_error(raw_cookie) << + "\""; + + throw std::runtime_error(oss.str()); + } + } + + std::swap(m_local_data->magic_cookie, cookie); + } + + MimeType::~MimeType() noexcept { + } + + bool MimeType::initialized() const { + return not not m_local_data->magic_cookie; + } + + const char* MimeType::analyze (const std::string& parPath) { + if (initialized()) { + const auto cookie = m_local_data->magic_cookie.get(); + const auto retval = magic_file(cookie, parPath.c_str()); + if (not retval) { + std::ostringstream oss; + oss << "MimeType failed to analyze \"" << + parPath << "\": \"" << magic_error(cookie) << "\""; + + throw std::runtime_error(oss.str()); + } + return retval; + } + else { + return nullptr; + } + } +} //namespace din diff --git a/src/scan/mimetype.hpp b/src/scan/mimetype.hpp new file mode 100644 index 0000000..c09b4f3 --- /dev/null +++ b/src/scan/mimetype.hpp @@ -0,0 +1,40 @@ +/* Copyright 2015, Michele Santullo + * This file is part of "dindexer". + * + * "dindexer" is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * "dindexer" is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with "dindexer". If not, see . + */ + +#ifndef id0B4AABA220AE412F81409F63250E4853 +#define id0B4AABA220AE412F81409F63250E4853 + +#include +#include + +namespace din { + class MimeType { + public: + MimeType ( void ); + ~MimeType ( void ) noexcept; + + bool initialized ( void ) const; + const char* analyze ( const std::string& parPath ); + + private: + struct LocalData; + + std::unique_ptr m_local_data; + }; +} //namespace din + +#endif