1
0
Fork 0
mirror of https://github.com/KingDuckZ/dindexer.git synced 2024-11-29 01:33:46 +00:00

Add retrieving the mime type.

This is not being stored to the db yet.
This commit is contained in:
King_DuckZ 2015-12-15 11:26:04 +00:00
parent 2b7d8a6783
commit 148b60ee97
5 changed files with 149 additions and 6 deletions

View file

@ -0,0 +1,21 @@
# Copyright 2011 by Alex Turbov <i.zaufi@gmail.com>
#
# - Try to find the libmagic library.
#
# Once done this will define
#
# MAGIC_FOUND - system has libmagic
# MAGIC_INCLUDE_DIR - the libmagic include directory
# MAGIC_LIBRARIES - The libraries needed to use libmagic
find_library(MAGIC_LIBRARIES magic)
find_path(MAGIC_INCLUDE_DIR magic.h)
find_package_handle_standard_args(Magic DEFAULT_MSG MAGIC_LIBRARIES MAGIC_INCLUDE_DIR)
# TODO Check for some symbols?
# X-Chewy-RepoBase: https://raw.githubusercontent.com/mutanabbi/chewy-cmake-rep/master/
# X-Chewy-Path: FindMagic.cmake
# X-Chewy-Version: 1.0
# X-Chewy-Description: Find MIME-type detection library

View file

@ -8,6 +8,8 @@ if (DINDEXER_WITH_MEDIA_AUTODETECT)
endif() endif()
endif() endif()
find_package(Magic REQUIRED)
add_executable(${PROJECT_NAME} add_executable(${PROJECT_NAME}
main.cpp main.cpp
filesearcher.cpp filesearcher.cpp
@ -19,15 +21,20 @@ add_executable(${PROJECT_NAME}
commandline.cpp commandline.cpp
discinfo.cpp discinfo.cpp
mediatype.cpp mediatype.cpp
mimetype.cpp
) )
target_include_directories(${PROJECT_NAME} target_include_directories(${PROJECT_NAME}
PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/.. PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/..
) )
target_include_directories(${PROJECT_NAME}
PRIVATE ${MAGIC_INCLUDE_DIR}
)
target_link_libraries(${PROJECT_NAME} target_link_libraries(${PROJECT_NAME}
PRIVATE ${bare_name}-if PRIVATE ${bare_name}-if
PRIVATE ${bare_name}-common PRIVATE ${bare_name}-common
PRIVATE ${MAGIC_LIBRARIES}
) )
string(REPLACE "${bare_name}-" "" ACTION_NAME "${PROJECT_NAME}") string(REPLACE "${bare_name}-" "" ACTION_NAME "${PROJECT_NAME}")

View file

@ -21,6 +21,7 @@
#include "dbbackend.hpp" #include "dbbackend.hpp"
#include "dindexer-common/settings.hpp" #include "dindexer-common/settings.hpp"
#include "filestats.hpp" #include "filestats.hpp"
#include "mimetype.hpp"
#include <algorithm> #include <algorithm>
#include <functional> #include <functional>
#include <vector> #include <vector>
@ -65,6 +66,7 @@ namespace din {
bool operator== ( const FileEntry& ) const = delete; bool operator== ( const FileEntry& ) const = delete;
std::string path; std::string path;
std::string mime;
HashType hash; HashType hash;
std::time_t access_time; std::time_t access_time;
std::time_t modify_time; std::time_t modify_time;
@ -78,7 +80,7 @@ namespace din {
namespace { namespace {
typedef std::vector<FileEntry>::iterator FileEntryIt; typedef std::vector<FileEntry>::iterator FileEntryIt;
void hash_dir (FileEntryIt parEntry, FileEntryIt parBegin, FileEntryIt parEnd, const PathName& parCurrDir, std::function<void(std::size_t)> parNextItemCallback, bool parIgnoreErrors) { void hash_dir (FileEntryIt parEntry, FileEntryIt parBegin, FileEntryIt parEnd, const PathName& parCurrDir, std::function<void(std::size_t)> parNextItemCallback, bool parIgnoreErrors, MimeType& parMime) {
assert(parEntry != parEnd); assert(parEntry != parEnd);
assert(parEntry->is_dir); assert(parEntry->is_dir);
FileEntry& curr_entry = *parEntry; FileEntry& curr_entry = *parEntry;
@ -104,10 +106,11 @@ namespace din {
#if defined(INDEXER_VERBOSE) #if defined(INDEXER_VERBOSE)
std::cout << "Making initial hash for " << parCurrDir << "...\n"; std::cout << "Making initial hash for " << parCurrDir << "...\n";
#endif #endif
curr_entry.mime = parMime.analyze(it_entry->path);
while (parEnd != it_entry and it_entry->level == curr_entry_it->level + 1 and parCurrDir == PathName(it_entry->path).pop_right()) { while (parEnd != it_entry and it_entry->level == curr_entry_it->level + 1 and parCurrDir == PathName(it_entry->path).pop_right()) {
PathName curr_subdir(it_entry->path); PathName curr_subdir(it_entry->path);
if (it_entry->is_dir) { if (it_entry->is_dir) {
hash_dir(it_entry, parBegin, parEnd, curr_subdir, parNextItemCallback, parIgnoreErrors); hash_dir(it_entry, parBegin, parEnd, curr_subdir, parNextItemCallback, parIgnoreErrors, parMime);
std::string relpath = make_relative_path(parCurrDir, curr_subdir).path(); std::string relpath = make_relative_path(parCurrDir, curr_subdir).path();
const auto old_size = dir_blob.size(); const auto old_size = dir_blob.size();
@ -127,7 +130,9 @@ namespace din {
tiger_data(dir_blob, curr_entry.hash); tiger_data(dir_blob, curr_entry.hash);
curr_entry.file_size = 0; curr_entry.file_size = 0;
#if defined(INDEXER_VERBOSE) #if defined(INDEXER_VERBOSE)
std::cout << "Got intermediate hash for dir " << parCurrDir << ": " << tiger_to_string(curr_entry.hash) << '\n'; std::cout << "Got intermediate hash for dir " << parCurrDir <<
": " << tiger_to_string(curr_entry.hash) <<
' ' << curr_entry.mime << '\n';
#endif #endif
} }
@ -152,6 +157,7 @@ namespace din {
parNextItemCallback(it_entry - parBegin); parNextItemCallback(it_entry - parBegin);
try { try {
tiger_file(it_entry->path, it_entry->hash, curr_entry_it->hash, it_entry->file_size); tiger_file(it_entry->path, it_entry->hash, curr_entry_it->hash, it_entry->file_size);
it_entry->mime = parMime.analyze(it_entry->path);
} }
catch (const std::ios_base::failure& e) { catch (const std::ios_base::failure& e) {
if (parIgnoreErrors) { if (parIgnoreErrors) {
@ -164,7 +170,8 @@ namespace din {
} }
#if defined(INDEXER_VERBOSE) #if defined(INDEXER_VERBOSE)
std::cout << ' ' << tiger_to_string(it_entry->hash) << '\n'; std::cout << ' ' << tiger_to_string(it_entry->hash) <<
' ' << it_entry->mime << '\n';
#endif #endif
++it_entry; ++it_entry;
} }
@ -251,6 +258,8 @@ namespace din {
void Indexer::calculate_hash() { void Indexer::calculate_hash() {
PathName base_path(m_local_data->paths.front().path); PathName base_path(m_local_data->paths.front().path);
std::sort(m_local_data->paths.begin(), m_local_data->paths.end()); std::sort(m_local_data->paths.begin(), m_local_data->paths.end());
MimeType mime;
#if defined(INDEXER_VERBOSE) #if defined(INDEXER_VERBOSE)
for (auto& itm : m_local_data->paths) { for (auto& itm : m_local_data->paths) {
itm.hash.part_a = 1; itm.hash.part_a = 1;
@ -278,7 +287,8 @@ namespace din {
m_local_data->processing_index = parNext; m_local_data->processing_index = parNext;
m_local_data->step_notify.notify_all(); m_local_data->step_notify.notify_all();
}, },
m_local_data->ignore_read_errors m_local_data->ignore_read_errors,
mime
); );
assert(m_local_data->done_count == m_local_data->file_count); assert(m_local_data->done_count == m_local_data->file_count);
@ -289,7 +299,8 @@ namespace din {
m_local_data->paths.end(), m_local_data->paths.end(),
base_path, base_path,
[](std::size_t) {}, [](std::size_t) {},
m_local_data->ignore_read_errors m_local_data->ignore_read_errors,
mime
); );
#endif #endif

64
src/scan/mimetype.cpp Normal file
View file

@ -0,0 +1,64 @@
#include "mimetype.hpp"
#include <magic.h>
#include <ciso646>
#include <sstream>
#include <algorithm>
#include <stdexcept>
namespace din {
using MagicCookie = std::unique_ptr<magic_set, void(*)(magic_t)>;
struct MimeType::LocalData {
LocalData ( void ) :
magic_cookie(nullptr, &magic_close)
{
}
MagicCookie magic_cookie;
};
MimeType::MimeType() :
m_local_data(new LocalData)
{
auto raw_cookie = magic_open(MAGIC_SYMLINK | MAGIC_MIME | MAGIC_PRESERVE_ATIME | MAGIC_ERROR);
MagicCookie cookie(raw_cookie, &magic_close);
if (raw_cookie) {
if (0 != magic_load(raw_cookie, nullptr)) {
std::ostringstream oss;
oss << "MimeType cannot load magic database: \"" <<
magic_error(raw_cookie) <<
"\"";
throw std::runtime_error(oss.str());
}
}
std::swap(m_local_data->magic_cookie, cookie);
}
MimeType::~MimeType() noexcept {
}
bool MimeType::initialized() const {
return not not m_local_data->magic_cookie;
}
const char* MimeType::analyze (const std::string& parPath) {
if (initialized()) {
const auto cookie = m_local_data->magic_cookie.get();
const auto retval = magic_file(cookie, parPath.c_str());
if (not retval) {
std::ostringstream oss;
oss << "MimeType failed to analyze \"" <<
parPath << "\": \"" << magic_error(cookie) << "\"";
throw std::runtime_error(oss.str());
}
return retval;
}
else {
return nullptr;
}
}
} //namespace din

40
src/scan/mimetype.hpp Normal file
View file

@ -0,0 +1,40 @@
/* Copyright 2015, Michele Santullo
* This file is part of "dindexer".
*
* "dindexer" is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* "dindexer" is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef id0B4AABA220AE412F81409F63250E4853
#define id0B4AABA220AE412F81409F63250E4853
#include <memory>
#include <string>
namespace din {
class MimeType {
public:
MimeType ( void );
~MimeType ( void ) noexcept;
bool initialized ( void ) const;
const char* analyze ( const std::string& parPath );
private:
struct LocalData;
std::unique_ptr<LocalData> m_local_data;
};
} //namespace din
#endif