mirror of
https://github.com/KingDuckZ/dindexer.git
synced 2024-11-29 01:33:46 +00:00
Add retrieving the mime type.
This is not being stored to the db yet.
This commit is contained in:
parent
2b7d8a6783
commit
148b60ee97
5 changed files with 149 additions and 6 deletions
21
cmake/Modules/FindMagic.cmake
Normal file
21
cmake/Modules/FindMagic.cmake
Normal file
|
@ -0,0 +1,21 @@
|
||||||
|
# Copyright 2011 by Alex Turbov <i.zaufi@gmail.com>
|
||||||
|
#
|
||||||
|
# - Try to find the libmagic library.
|
||||||
|
#
|
||||||
|
# Once done this will define
|
||||||
|
#
|
||||||
|
# MAGIC_FOUND - system has libmagic
|
||||||
|
# MAGIC_INCLUDE_DIR - the libmagic include directory
|
||||||
|
# MAGIC_LIBRARIES - The libraries needed to use libmagic
|
||||||
|
|
||||||
|
find_library(MAGIC_LIBRARIES magic)
|
||||||
|
find_path(MAGIC_INCLUDE_DIR magic.h)
|
||||||
|
|
||||||
|
find_package_handle_standard_args(Magic DEFAULT_MSG MAGIC_LIBRARIES MAGIC_INCLUDE_DIR)
|
||||||
|
|
||||||
|
# TODO Check for some symbols?
|
||||||
|
|
||||||
|
# X-Chewy-RepoBase: https://raw.githubusercontent.com/mutanabbi/chewy-cmake-rep/master/
|
||||||
|
# X-Chewy-Path: FindMagic.cmake
|
||||||
|
# X-Chewy-Version: 1.0
|
||||||
|
# X-Chewy-Description: Find MIME-type detection library
|
|
@ -8,6 +8,8 @@ if (DINDEXER_WITH_MEDIA_AUTODETECT)
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
find_package(Magic REQUIRED)
|
||||||
|
|
||||||
add_executable(${PROJECT_NAME}
|
add_executable(${PROJECT_NAME}
|
||||||
main.cpp
|
main.cpp
|
||||||
filesearcher.cpp
|
filesearcher.cpp
|
||||||
|
@ -19,15 +21,20 @@ add_executable(${PROJECT_NAME}
|
||||||
commandline.cpp
|
commandline.cpp
|
||||||
discinfo.cpp
|
discinfo.cpp
|
||||||
mediatype.cpp
|
mediatype.cpp
|
||||||
|
mimetype.cpp
|
||||||
)
|
)
|
||||||
|
|
||||||
target_include_directories(${PROJECT_NAME}
|
target_include_directories(${PROJECT_NAME}
|
||||||
PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/..
|
PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/..
|
||||||
)
|
)
|
||||||
|
target_include_directories(${PROJECT_NAME}
|
||||||
|
PRIVATE ${MAGIC_INCLUDE_DIR}
|
||||||
|
)
|
||||||
|
|
||||||
target_link_libraries(${PROJECT_NAME}
|
target_link_libraries(${PROJECT_NAME}
|
||||||
PRIVATE ${bare_name}-if
|
PRIVATE ${bare_name}-if
|
||||||
PRIVATE ${bare_name}-common
|
PRIVATE ${bare_name}-common
|
||||||
|
PRIVATE ${MAGIC_LIBRARIES}
|
||||||
)
|
)
|
||||||
|
|
||||||
string(REPLACE "${bare_name}-" "" ACTION_NAME "${PROJECT_NAME}")
|
string(REPLACE "${bare_name}-" "" ACTION_NAME "${PROJECT_NAME}")
|
||||||
|
|
|
@ -21,6 +21,7 @@
|
||||||
#include "dbbackend.hpp"
|
#include "dbbackend.hpp"
|
||||||
#include "dindexer-common/settings.hpp"
|
#include "dindexer-common/settings.hpp"
|
||||||
#include "filestats.hpp"
|
#include "filestats.hpp"
|
||||||
|
#include "mimetype.hpp"
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <functional>
|
#include <functional>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
@ -65,6 +66,7 @@ namespace din {
|
||||||
bool operator== ( const FileEntry& ) const = delete;
|
bool operator== ( const FileEntry& ) const = delete;
|
||||||
|
|
||||||
std::string path;
|
std::string path;
|
||||||
|
std::string mime;
|
||||||
HashType hash;
|
HashType hash;
|
||||||
std::time_t access_time;
|
std::time_t access_time;
|
||||||
std::time_t modify_time;
|
std::time_t modify_time;
|
||||||
|
@ -78,7 +80,7 @@ namespace din {
|
||||||
namespace {
|
namespace {
|
||||||
typedef std::vector<FileEntry>::iterator FileEntryIt;
|
typedef std::vector<FileEntry>::iterator FileEntryIt;
|
||||||
|
|
||||||
void hash_dir (FileEntryIt parEntry, FileEntryIt parBegin, FileEntryIt parEnd, const PathName& parCurrDir, std::function<void(std::size_t)> parNextItemCallback, bool parIgnoreErrors) {
|
void hash_dir (FileEntryIt parEntry, FileEntryIt parBegin, FileEntryIt parEnd, const PathName& parCurrDir, std::function<void(std::size_t)> parNextItemCallback, bool parIgnoreErrors, MimeType& parMime) {
|
||||||
assert(parEntry != parEnd);
|
assert(parEntry != parEnd);
|
||||||
assert(parEntry->is_dir);
|
assert(parEntry->is_dir);
|
||||||
FileEntry& curr_entry = *parEntry;
|
FileEntry& curr_entry = *parEntry;
|
||||||
|
@ -104,10 +106,11 @@ namespace din {
|
||||||
#if defined(INDEXER_VERBOSE)
|
#if defined(INDEXER_VERBOSE)
|
||||||
std::cout << "Making initial hash for " << parCurrDir << "...\n";
|
std::cout << "Making initial hash for " << parCurrDir << "...\n";
|
||||||
#endif
|
#endif
|
||||||
|
curr_entry.mime = parMime.analyze(it_entry->path);
|
||||||
while (parEnd != it_entry and it_entry->level == curr_entry_it->level + 1 and parCurrDir == PathName(it_entry->path).pop_right()) {
|
while (parEnd != it_entry and it_entry->level == curr_entry_it->level + 1 and parCurrDir == PathName(it_entry->path).pop_right()) {
|
||||||
PathName curr_subdir(it_entry->path);
|
PathName curr_subdir(it_entry->path);
|
||||||
if (it_entry->is_dir) {
|
if (it_entry->is_dir) {
|
||||||
hash_dir(it_entry, parBegin, parEnd, curr_subdir, parNextItemCallback, parIgnoreErrors);
|
hash_dir(it_entry, parBegin, parEnd, curr_subdir, parNextItemCallback, parIgnoreErrors, parMime);
|
||||||
|
|
||||||
std::string relpath = make_relative_path(parCurrDir, curr_subdir).path();
|
std::string relpath = make_relative_path(parCurrDir, curr_subdir).path();
|
||||||
const auto old_size = dir_blob.size();
|
const auto old_size = dir_blob.size();
|
||||||
|
@ -127,7 +130,9 @@ namespace din {
|
||||||
tiger_data(dir_blob, curr_entry.hash);
|
tiger_data(dir_blob, curr_entry.hash);
|
||||||
curr_entry.file_size = 0;
|
curr_entry.file_size = 0;
|
||||||
#if defined(INDEXER_VERBOSE)
|
#if defined(INDEXER_VERBOSE)
|
||||||
std::cout << "Got intermediate hash for dir " << parCurrDir << ": " << tiger_to_string(curr_entry.hash) << '\n';
|
std::cout << "Got intermediate hash for dir " << parCurrDir <<
|
||||||
|
": " << tiger_to_string(curr_entry.hash) <<
|
||||||
|
' ' << curr_entry.mime << '\n';
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -152,6 +157,7 @@ namespace din {
|
||||||
parNextItemCallback(it_entry - parBegin);
|
parNextItemCallback(it_entry - parBegin);
|
||||||
try {
|
try {
|
||||||
tiger_file(it_entry->path, it_entry->hash, curr_entry_it->hash, it_entry->file_size);
|
tiger_file(it_entry->path, it_entry->hash, curr_entry_it->hash, it_entry->file_size);
|
||||||
|
it_entry->mime = parMime.analyze(it_entry->path);
|
||||||
}
|
}
|
||||||
catch (const std::ios_base::failure& e) {
|
catch (const std::ios_base::failure& e) {
|
||||||
if (parIgnoreErrors) {
|
if (parIgnoreErrors) {
|
||||||
|
@ -164,7 +170,8 @@ namespace din {
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(INDEXER_VERBOSE)
|
#if defined(INDEXER_VERBOSE)
|
||||||
std::cout << ' ' << tiger_to_string(it_entry->hash) << '\n';
|
std::cout << ' ' << tiger_to_string(it_entry->hash) <<
|
||||||
|
' ' << it_entry->mime << '\n';
|
||||||
#endif
|
#endif
|
||||||
++it_entry;
|
++it_entry;
|
||||||
}
|
}
|
||||||
|
@ -251,6 +258,8 @@ namespace din {
|
||||||
void Indexer::calculate_hash() {
|
void Indexer::calculate_hash() {
|
||||||
PathName base_path(m_local_data->paths.front().path);
|
PathName base_path(m_local_data->paths.front().path);
|
||||||
std::sort(m_local_data->paths.begin(), m_local_data->paths.end());
|
std::sort(m_local_data->paths.begin(), m_local_data->paths.end());
|
||||||
|
MimeType mime;
|
||||||
|
|
||||||
#if defined(INDEXER_VERBOSE)
|
#if defined(INDEXER_VERBOSE)
|
||||||
for (auto& itm : m_local_data->paths) {
|
for (auto& itm : m_local_data->paths) {
|
||||||
itm.hash.part_a = 1;
|
itm.hash.part_a = 1;
|
||||||
|
@ -278,7 +287,8 @@ namespace din {
|
||||||
m_local_data->processing_index = parNext;
|
m_local_data->processing_index = parNext;
|
||||||
m_local_data->step_notify.notify_all();
|
m_local_data->step_notify.notify_all();
|
||||||
},
|
},
|
||||||
m_local_data->ignore_read_errors
|
m_local_data->ignore_read_errors,
|
||||||
|
mime
|
||||||
);
|
);
|
||||||
|
|
||||||
assert(m_local_data->done_count == m_local_data->file_count);
|
assert(m_local_data->done_count == m_local_data->file_count);
|
||||||
|
@ -289,7 +299,8 @@ namespace din {
|
||||||
m_local_data->paths.end(),
|
m_local_data->paths.end(),
|
||||||
base_path,
|
base_path,
|
||||||
[](std::size_t) {},
|
[](std::size_t) {},
|
||||||
m_local_data->ignore_read_errors
|
m_local_data->ignore_read_errors,
|
||||||
|
mime
|
||||||
);
|
);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
64
src/scan/mimetype.cpp
Normal file
64
src/scan/mimetype.cpp
Normal file
|
@ -0,0 +1,64 @@
|
||||||
|
#include "mimetype.hpp"
|
||||||
|
#include <magic.h>
|
||||||
|
#include <ciso646>
|
||||||
|
#include <sstream>
|
||||||
|
#include <algorithm>
|
||||||
|
#include <stdexcept>
|
||||||
|
|
||||||
|
namespace din {
|
||||||
|
using MagicCookie = std::unique_ptr<magic_set, void(*)(magic_t)>;
|
||||||
|
|
||||||
|
struct MimeType::LocalData {
|
||||||
|
LocalData ( void ) :
|
||||||
|
magic_cookie(nullptr, &magic_close)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
MagicCookie magic_cookie;
|
||||||
|
};
|
||||||
|
|
||||||
|
MimeType::MimeType() :
|
||||||
|
m_local_data(new LocalData)
|
||||||
|
{
|
||||||
|
auto raw_cookie = magic_open(MAGIC_SYMLINK | MAGIC_MIME | MAGIC_PRESERVE_ATIME | MAGIC_ERROR);
|
||||||
|
MagicCookie cookie(raw_cookie, &magic_close);
|
||||||
|
|
||||||
|
if (raw_cookie) {
|
||||||
|
if (0 != magic_load(raw_cookie, nullptr)) {
|
||||||
|
std::ostringstream oss;
|
||||||
|
oss << "MimeType cannot load magic database: \"" <<
|
||||||
|
magic_error(raw_cookie) <<
|
||||||
|
"\"";
|
||||||
|
|
||||||
|
throw std::runtime_error(oss.str());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::swap(m_local_data->magic_cookie, cookie);
|
||||||
|
}
|
||||||
|
|
||||||
|
MimeType::~MimeType() noexcept {
|
||||||
|
}
|
||||||
|
|
||||||
|
bool MimeType::initialized() const {
|
||||||
|
return not not m_local_data->magic_cookie;
|
||||||
|
}
|
||||||
|
|
||||||
|
const char* MimeType::analyze (const std::string& parPath) {
|
||||||
|
if (initialized()) {
|
||||||
|
const auto cookie = m_local_data->magic_cookie.get();
|
||||||
|
const auto retval = magic_file(cookie, parPath.c_str());
|
||||||
|
if (not retval) {
|
||||||
|
std::ostringstream oss;
|
||||||
|
oss << "MimeType failed to analyze \"" <<
|
||||||
|
parPath << "\": \"" << magic_error(cookie) << "\"";
|
||||||
|
|
||||||
|
throw std::runtime_error(oss.str());
|
||||||
|
}
|
||||||
|
return retval;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} //namespace din
|
40
src/scan/mimetype.hpp
Normal file
40
src/scan/mimetype.hpp
Normal file
|
@ -0,0 +1,40 @@
|
||||||
|
/* Copyright 2015, Michele Santullo
|
||||||
|
* This file is part of "dindexer".
|
||||||
|
*
|
||||||
|
* "dindexer" is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* "dindexer" is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef id0B4AABA220AE412F81409F63250E4853
|
||||||
|
#define id0B4AABA220AE412F81409F63250E4853
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
namespace din {
|
||||||
|
class MimeType {
|
||||||
|
public:
|
||||||
|
MimeType ( void );
|
||||||
|
~MimeType ( void ) noexcept;
|
||||||
|
|
||||||
|
bool initialized ( void ) const;
|
||||||
|
const char* analyze ( const std::string& parPath );
|
||||||
|
|
||||||
|
private:
|
||||||
|
struct LocalData;
|
||||||
|
|
||||||
|
std::unique_ptr<LocalData> m_local_data;
|
||||||
|
};
|
||||||
|
} //namespace din
|
||||||
|
|
||||||
|
#endif
|
Loading…
Reference in a new issue