1
0
Fork 0
mirror of https://github.com/KingDuckZ/dindexer.git synced 2025-02-20 12:14:55 +00:00

Merge branch 'hashdir_refactoring'

This commit is contained in:
King_DuckZ 2016-03-12 00:37:28 +01:00
commit 859764b9a4
52 changed files with 1367 additions and 815 deletions

View file

@ -19,7 +19,6 @@
#define id1B7A42F6E46547A6AB0F914E2A91399F
#include "dindexer-common/validationerror.hpp"
#include "dindexer-common/mediatypes.hpp"
#include <boost/program_options/variables_map.hpp>
namespace din {

View file

@ -32,4 +32,4 @@ set -f
find . \( $excl_paths -o $incl_extensions \) -a -type f $excl_files > cscope.files
set +f
cscope -b -q
exec cscope -b -q

View file

@ -18,9 +18,9 @@
#ifndef id17F1582F16C8478E8D9795BECBF275A3
#define id17F1582F16C8478E8D9795BECBF275A3
#include "dindexer-common/mediatypes.hpp"
#include "dindexer-machinery/mediatypes.hpp"
#include "dindexer-machinery/recorddata.hpp"
#include "dindexer-common/compatibility.h"
#include "helpers/compatibility.h"
#include <vector>
namespace mchlib {
@ -36,8 +36,8 @@ namespace mchlib {
template <bool> class SetListingView;
ContentTypes guess_content_type ( dinlib::MediaTypes parMediaType, const SetListingView<true>& parContent, std::size_t parEntriesCount=0 );
ContentTypes guess_content_type ( dinlib::MediaTypes parMediaType, const std::vector<FileRecordData>& parContent );
ContentTypes guess_content_type ( MediaTypes parMediaType, const SetListingView<true>& parContent, std::size_t parEntriesCount=0 );
ContentTypes guess_content_type ( MediaTypes parMediaType, const std::vector<FileRecordData>& parContent );
char content_type_to_char ( ContentTypes parCType ) a_pure;
ContentTypes char_to_content_type ( char parCType ) a_pure;

View file

@ -1,77 +0,0 @@
/* Copyright 2015, 2016, Michele Santullo
* This file is part of "dindexer".
*
* "dindexer" is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* "dindexer" is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef idE555EF56730442C1ADDC7B2AE7A9340E
#define idE555EF56730442C1ADDC7B2AE7A9340E
#include <memory>
#include <string>
#include <vector>
#if !defined(NDEBUG)
# define INDEXER_VERBOSE
#endif
#if defined(WITH_PROGRESS_FEEDBACK)
namespace std {
class condition_variable;
} //namespace std
#endif
namespace fastf {
struct FileStats;
} //namespace fastf
namespace dinlib {
struct Settings;
} //namespace dinlib
namespace mchlib {
struct FileRecordData;
class Indexer {
public:
Indexer ( void );
Indexer ( Indexer&& ) = default;
Indexer ( const Indexer& ) = delete;
~Indexer ( void ) noexcept;
bool add_path ( const char* parPath, const fastf::FileStats& parStats );
#if defined(INDEXER_VERBOSE)
void dump ( void ) const;
#endif
std::size_t total_items ( void ) const;
std::string operator[] ( std::size_t parIndex ) const;
#if defined(WITH_PROGRESS_FEEDBACK)
std::size_t processed_items ( void ) const;
std::string current_item ( void ) const;
std::condition_variable& step_notify ( void );
#endif
void calculate_hash ( void );
bool empty ( void ) const;
void ignore_read_errors ( bool parIgnore );
const std::vector<FileRecordData>& record_data ( void ) const;
private:
struct LocalData;
std::unique_ptr<LocalData> m_local_data;
};
} //namespace mchlib
#endif

View file

@ -20,11 +20,11 @@
#include <string>
#include <stdexcept>
#include "dindexer-common/mediatypes.hpp"
#include "dindexer-machinery/mediatypes.hpp"
namespace mchlib {
#if defined(WITH_MEDIA_AUTODETECT)
dinlib::MediaTypes guess_media_type ( std::string&& parPath );
MediaTypes guess_media_type ( std::string&& parPath );
class UnknownMediaTypeException : std::runtime_error {
public:

View file

@ -18,10 +18,10 @@
#ifndef id700AFD0F33634ACC88079BB8853A9E13
#define id700AFD0F33634ACC88079BB8853A9E13
#include "dindexer-common/compatibility.h"
#include "helpers/compatibility.h"
#include <string>
namespace dinlib {
namespace mchlib {
enum MediaTypes {
MediaType_CDRom = 'C',
MediaType_Directory = 'D',
@ -35,6 +35,7 @@ namespace dinlib {
const std::string& media_type_to_str ( MediaTypes parType );
MediaTypes char_to_media_type ( char parMType ) a_pure;
} //namespace dinlib
char media_type_to_char ( MediaTypes parMType ) a_pure;
} //namespace mchlib
#endif

View file

@ -51,6 +51,24 @@ namespace mchlib {
{
}
FileRecordData ( std::string&& parPath, std::size_t parRelPathOffs, std::time_t parATime, std::time_t parMTime, uint16_t parLevel, bool parIsDir, bool parIsSymLink ) :
hash {},
abs_path(std::move(parPath)),
mime_full(),
atime(parATime),
mtime(parMTime),
path(boost::string_ref(abs_path).substr(parRelPathOffs)),
mime_type(),
mime_charset(),
size(0),
level(parLevel),
is_directory(parIsDir),
is_symlink(parIsSymLink),
unreadable(false),
hash_valid(false)
{
}
#if defined(NDEBUG)
FileRecordData ( const FileRecordData& ) = delete;
#else

View file

@ -0,0 +1,70 @@
/* Copyright 2015, 2016, Michele Santullo
* This file is part of "dindexer".
*
* "dindexer" is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* "dindexer" is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef idCB253C1A5AFA46A18B8878ED4072CD96
#define idCB253C1A5AFA46A18B8878ED4072CD96
#include "dindexer-machinery/scantask/leanbase.hpp"
#include <ciso646>
#include <cassert>
namespace mchlib {
namespace scantask {
template <typename T>
class Base : public LeanBase<T> {
protected:
Base ( void );
virtual ~Base ( void ) noexcept = default;
public:
void clear_data ( void );
private:
virtual void on_data_destroy ( T& parData ) = 0;
virtual void on_data_create ( T& parData ) = 0;
virtual T& on_data_get ( void ) final;
virtual void on_data_fill ( void ) final;
using LeanBase<T>::unset_data_created;
T m_data;
};
template <typename T>
Base<T>::Base() {
}
template <typename T>
void Base<T>::on_data_fill() {
this->on_data_create(m_data);
}
template <typename T>
void Base<T>::clear_data() {
this->unset_data_created();
this->on_data_destroy(m_data);
}
template <typename T>
T& Base<T>::on_data_get() {
return m_data;
}
} //namespace scantask
} //namespace mchlib
#endif

View file

@ -0,0 +1,51 @@
/* Copyright 2015, 2016, Michele Santullo
* This file is part of "dindexer".
*
* "dindexer" is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* "dindexer" is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef id148DBED10A0B45238E810683656BA7D5
#define id148DBED10A0B45238E810683656BA7D5
#include "dindexer-machinery/scantask/leanbase.hpp"
#include "dindexer-machinery/scantask/base.hpp"
#include "dindexer-machinery/mediatypes.hpp"
#include <memory>
#include <vector>
namespace mchlib {
struct FileRecordData;
struct SetRecordDataFull;
namespace scantask {
class ContentType : public LeanBase<SetRecordDataFull> {
public:
using DirTreeTaskPtr = std::shared_ptr<Base<std::vector<FileRecordData>>>;
using MediaTypeTaskPtr = std::shared_ptr<LeanBase<SetRecordDataFull>>;
using SetTaskType = std::shared_ptr<LeanBase<SetRecordDataFull>>;
ContentType ( SetTaskType parSet, DirTreeTaskPtr parDirTree, MediaTypeTaskPtr parMediaType );
private:
virtual void on_data_fill ( void ) override;
virtual SetRecordDataFull& on_data_get ( void ) override;
SetTaskType m_set_task;
DirTreeTaskPtr m_dir_tree;
MediaTypeTaskPtr m_media_type;
};
} //namespace scantask
} //namespace mchlib
#endif

View file

@ -0,0 +1,45 @@
/* Copyright 2015, 2016, Michele Santullo
* This file is part of "dindexer".
*
* "dindexer" is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* "dindexer" is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef id0AA31B2E7D6244A08435CF9080E34AAE
#define id0AA31B2E7D6244A08435CF9080E34AAE
#include "dindexer-machinery/scantask/base.hpp"
#include <string>
#include <vector>
namespace mchlib {
struct FileRecordData;
namespace scantask {
class DirTree : public Base<std::vector<FileRecordData>> {
public:
typedef std::vector<FileRecordData> PathList;
explicit DirTree ( std::string parRoot );
virtual ~DirTree ( void ) noexcept = default;
private:
virtual void on_data_destroy ( PathList& parData ) override;
virtual void on_data_create ( PathList& parData ) override;
std::string m_root;
};
} //namespace scantask
} //namespace mchlib
#endif

View file

@ -0,0 +1,72 @@
/* Copyright 2015, 2016, Michele Santullo
* This file is part of "dindexer".
*
* "dindexer" is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* "dindexer" is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef id904376BAD85D4782B83CAAEC2EF344D9
#define id904376BAD85D4782B83CAAEC2EF344D9
#include "dindexer-machinery/scantask/leanbase.hpp"
#include <memory>
#include <vector>
#include <utility>
#include <ciso646>
namespace mchlib {
namespace scantask {
template <typename T>
class GeneralFiller : public LeanBase<T> {
public:
using FillingTaskPtr = std::shared_ptr<LeanBase<T>>;
template <typename P, typename... PP>
GeneralFiller ( std::shared_ptr<P> parFill, std::shared_ptr<PP>... parFillList );
virtual ~GeneralFiller ( void ) noexcept;
private:
virtual void on_data_fill ( void ) override;
virtual T& on_data_get ( void ) override;
std::vector<FillingTaskPtr> m_to_fill;
};
template <typename T>
template <typename P, typename... PP>
GeneralFiller<T>::GeneralFiller (std::shared_ptr<P> parFill, std::shared_ptr<PP>... parFillList) :
m_to_fill { std::move(parFill), std::move(parFillList)... }
{
}
template <typename T>
GeneralFiller<T>::~GeneralFiller() noexcept {
m_to_fill.clear();
}
template <typename T>
void GeneralFiller<T>::on_data_fill() {
for (auto& itm : m_to_fill) {
itm->get_or_create();
}
}
template <typename T>
T& GeneralFiller<T>::on_data_get() {
assert(not m_to_fill.empty());
return m_to_fill.front()->get_or_create();
}
} //namespace scantask
} //namespace mchlib
#endif

View file

@ -0,0 +1,53 @@
/* Copyright 2015, 2016, Michele Santullo
* This file is part of "dindexer".
*
* "dindexer" is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* "dindexer" is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef idC7CC55298AC049EAA80604D6C7FD081D
#define idC7CC55298AC049EAA80604D6C7FD081D
#include "dindexer-machinery/scantask/leanbase.hpp"
#include <vector>
#include <memory>
#include <functional>
#include <boost/utility/string_ref.hpp>
#include <cstdint>
namespace mchlib {
struct FileRecordData;
namespace scantask {
class Hashing : public LeanBase<std::vector<FileRecordData>> {
public:
typedef LeanBase<std::vector<FileRecordData>> FileTreeBase;
typedef std::function<void(const boost::string_ref, uint64_t, uint64_t, uint32_t)> ProgressCallback;
Hashing ( std::shared_ptr<FileTreeBase> parFileTree, bool parIgnoreErrors );
virtual ~Hashing ( void ) noexcept;
void set_progress_callback ( ProgressCallback parFunc );
private:
virtual void on_data_fill ( void ) override;
virtual std::vector<FileRecordData>& on_data_get ( void ) override;
std::shared_ptr<FileTreeBase> m_file_tree_task;
ProgressCallback m_progress_callback;
bool m_ignore_errors;
};
} //namespace scantask
} //namespace mchlib
#endif

View file

@ -0,0 +1,99 @@
/* Copyright 2015, 2016, Michele Santullo
* This file is part of "dindexer".
*
* "dindexer" is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* "dindexer" is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef id982AF1D5C59C415584F56C1E6DDFE55E
#define id982AF1D5C59C415584F56C1E6DDFE55E
#include <ciso646>
#include <cassert>
#if !defined(NDEBUG)
# define LEANBASE_ASSERT_REENTRANCY
#endif
namespace mchlib {
namespace scantask {
#if defined(LEANBASE_ASSERT_REENTRANCY)
struct AutoSetBool {
explicit AutoSetBool ( bool* parBool ) :
m_bool(parBool)
{
assert(m_bool);
assert(not *m_bool);
*m_bool = true;
}
~AutoSetBool ( void ) noexcept {
*m_bool = false;
}
bool* m_bool;
};
#endif
template <typename T>
class LeanBase {
protected:
LeanBase ( void );
virtual ~LeanBase ( void ) noexcept = default;
void unset_data_created ( void );
public:
T& get_or_create ( void );
private:
virtual void on_data_fill ( void ) = 0;
virtual T& on_data_get ( void ) = 0;
bool m_data_created;
#if defined(LEANBASE_ASSERT_REENTRANCY)
bool m_inside_call;
#endif
};
template <typename T>
LeanBase<T>::LeanBase() :
m_data_created(false)
#if defined(LEANBASE_ASSERT_REENTRANCY)
, m_inside_call(false)
#endif
{
}
template <typename T>
T& LeanBase<T>::get_or_create() {
#if defined(LEANBASE_ASSERT_REENTRANCY)
assert(not m_inside_call);
AutoSetBool auto_bool(&m_inside_call);
#endif
if (not m_data_created) {
m_data_created = true;
this->on_data_fill();
}
return this->on_data_get();
}
template <typename T>
void LeanBase<T>::unset_data_created() {
assert(m_data_created);
m_data_created = false;
}
} //namespace scantask
} //namespace mchlib
#endif

View file

@ -0,0 +1,52 @@
/* Copyright 2015, 2016, Michele Santullo
* This file is part of "dindexer".
*
* "dindexer" is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* "dindexer" is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef id25B0BCA6D9464754920E1BC7C5D9DB57
#define id25B0BCA6D9464754920E1BC7C5D9DB57
#include "dindexer-machinery/scantask/leanbase.hpp"
#include "dindexer-machinery/scantask/base.hpp"
#include "dindexer-machinery/mediatypes.hpp"
#include <string>
#include <memory>
namespace mchlib {
struct SetRecordDataFull;
namespace scantask {
class MediaType : public LeanBase<SetRecordDataFull> {
public:
using SetTaskType = std::shared_ptr<LeanBase<SetRecordDataFull>>;
MediaType ( SetTaskType parSet, char parDefault, bool parForce, std::string parSearchPath );
virtual ~MediaType ( void ) noexcept = default;
private:
virtual void on_data_fill ( void ) override;
virtual SetRecordDataFull& on_data_get ( void ) override;
SetTaskType m_set_task;
MediaTypes m_default;
#if defined(WITH_MEDIA_AUTODETECT)
std::string m_search_path;
bool m_force;
#endif
};
} //namespace scantask
} //namespace mchlib
#endif

View file

@ -0,0 +1,46 @@
/* Copyright 2015, 2016, Michele Santullo
* This file is part of "dindexer".
*
* "dindexer" is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* "dindexer" is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef idE173D2BA33744F448B870BB53AF52610
#define idE173D2BA33744F448B870BB53AF52610
#include "dindexer-machinery/scantask/base.hpp"
#include "dindexer-machinery/scantask/leanbase.hpp"
#include <vector>
#include <memory>
namespace mchlib {
struct FileRecordData;
namespace scantask {
class Mime : public LeanBase<std::vector<FileRecordData>> {
public:
using DirTreeTaskPtr = std::shared_ptr<Base<std::vector<FileRecordData>>>;
explicit Mime ( DirTreeTaskPtr parDirTree );
virtual ~Mime ( void ) noexcept;
private:
virtual void on_data_fill ( void ) override;
virtual std::vector<FileRecordData>& on_data_get ( void ) override;
DirTreeTaskPtr m_file_tree_task;
};
} //namespace scantask
} //namespace mchlib
#endif

View file

@ -0,0 +1,42 @@
/* Copyright 2015, 2016, Michele Santullo
* This file is part of "dindexer".
*
* "dindexer" is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* "dindexer" is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef idB323CEFC89C2468CA0A341C312C6C2C0
#define idB323CEFC89C2468CA0A341C312C6C2C0
#include "dindexer-machinery/scantask/base.hpp"
#include "dindexer-machinery/recorddata.hpp"
#include <string>
namespace mchlib {
namespace scantask {
class SetBasic : public Base<SetRecordDataFull> {
typedef Base<SetRecordDataFull> ParentType;
public:
explicit SetBasic ( std::string&& parName );
virtual ~SetBasic ( void ) noexcept;
private:
virtual void on_data_destroy ( SetRecordDataFull& parData ) override;
virtual void on_data_create ( SetRecordDataFull& parData ) override;
std::string m_set_name;
};
} //namespace scantask
} //namespace mchlib
#endif

View file

@ -40,19 +40,21 @@ namespace mchlib {
template <bool Const>
implem::DirIterator<Const> first_file ( SetListingView<Const>& parList );
typedef FileRecordData SetListingItemType;
namespace implem {
template <bool Const>
class DirIterator : public boost::iterator_facade<DirIterator<Const>, FileRecordData, boost::forward_traversal_tag> {
class DirIterator : public boost::iterator_facade<DirIterator<Const>, SetListingItemType, boost::forward_traversal_tag> {
friend class mchlib::SetListingView<Const>;
friend class boost::iterator_core_access;
template <bool> friend class DirIterator;
typedef boost::iterator_facade<DirIterator<Const>, FileRecordData, boost::forward_traversal_tag> base_class;
typedef boost::iterator_facade<DirIterator<Const>, SetListingItemType, boost::forward_traversal_tag> base_class;
struct enabler {};
public:
typedef typename std::conditional<
Const,
std::vector<mchlib::FileRecordData>::const_iterator,
std::vector<mchlib::FileRecordData>::iterator
std::vector<SetListingItemType>::const_iterator,
std::vector<SetListingItemType>::iterator
>::type VecIterator;
typedef typename base_class::difference_type difference_type;
typedef typename base_class::value_type value_type;
@ -127,7 +129,7 @@ namespace mchlib {
class SetListing {
public:
typedef std::vector<FileRecordData> ListType;
typedef std::vector<SetListingItemType> ListType;
typedef implem::DirIterator<true> const_iterator;
explicit SetListing ( ListType&& parList, bool parSort=true );

View file

@ -2,7 +2,6 @@ project(${bare_name}-common CXX C)
add_library(${PROJECT_NAME}
commandline.cpp
mediatypes.cpp
settings.cpp
validationerror.cpp
common_info.cpp
@ -25,3 +24,10 @@ target_link_libraries(${PROJECT_NAME}
# RUNTIME DESTINATION bin
# ARCHIVE DESTINATION lib/static
#)
#Allow to link with .so
#see https://cmake.org/pipermail/cmake/2007-May/014350.html
#and http://stackoverflow.com/questions/6093547/what-do-r-x86-64-32s-and-r-x86-64-64-relocation-mean/6093910#6093910
if (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" )
set_target_properties(${PROJECT_NAME} PROPERTIES COMPILE_FLAGS "-fPIC")
endif()

View file

@ -19,7 +19,6 @@
#define idB6191389C4AD4EE5862CCF1591BE6CE5
#include "dindexer-common/validationerror.hpp"
#include "dindexer-common/mediatypes.hpp"
#include <boost/program_options/variables_map.hpp>
namespace din {

View file

@ -4,6 +4,7 @@ add_executable(${PROJECT_NAME}
main.cpp
commandline.cpp
postgre_locate.cpp
hash.cpp
)
target_include_directories(${PROJECT_NAME}
@ -13,6 +14,7 @@ target_include_directories(${PROJECT_NAME}
target_link_libraries(${PROJECT_NAME}
PRIVATE ${bare_name}-if
PRIVATE ${bare_name}-common
PRIVATE ${bare_name}-machinery
)
string(REPLACE "${bare_name}-" "" ACTION_NAME "${PROJECT_NAME}")

View file

@ -28,6 +28,7 @@ namespace din {
set_options.add_options()
("case-insensitive,i", "Disable case sensitivity during search")
("set,s", "Look for matching sets instead of files")
("byhash,a", "Paths on the command line are local paths and searching should be done by content hash")
//("option,o", po::value<std::string>()->default_value("default_value"), "Help message")
//("option2", po::value<int>(), "Help message")
;

View file

@ -19,7 +19,6 @@
#define id1B7A42F6E46547A6AB0F914E2A91399F
#include "dindexer-common/validationerror.hpp"
#include "dindexer-common/mediatypes.hpp"
#include <boost/program_options/variables_map.hpp>
namespace din {

98
src/locate/hash.cpp Normal file
View file

@ -0,0 +1,98 @@
/* Copyright 2015, 2016, Michele Santullo
* This file is part of "dindexer".
*
* "dindexer" is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* "dindexer" is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
*/
#include "hash.hpp"
#include "dindexer-machinery/scantask/hashing.hpp"
#include "dindexer-machinery/scantask/dirtree.hpp"
#include "dindexer-machinery/recorddata.hpp"
#include <memory>
#include <sys/stat.h>
#include <stdexcept>
#include <utility>
#include <cassert>
#include <ciso646>
namespace stask = mchlib::scantask;
namespace din {
namespace {
class SingleFileTask : public stask::Base<std::vector<mchlib::FileRecordData>> {
public:
typedef std::vector<mchlib::FileRecordData> PathList;
SingleFileTask ( std::string parPath, const struct stat* parStat );
virtual ~SingleFileTask ( void ) noexcept = default;
private:
virtual void on_data_destroy ( PathList& parData ) override;
virtual void on_data_create ( PathList& parData ) override;
std::string m_path;
const struct stat* m_stat;
};
SingleFileTask::SingleFileTask (std::string parPath, const struct stat* parStat) :
m_path(std::move(parPath)),
m_stat(parStat)
{
assert(not m_path.empty());
assert(m_stat);
}
void SingleFileTask::on_data_destroy (PathList& parData) {
assert(not parData.empty());
parData.clear();
}
void SingleFileTask::on_data_create (PathList& parData) {
assert(parData.empty());
parData.reserve(1);
parData.push_back(mchlib::FileRecordData(
std::string(m_path),
0,
m_stat->st_atime,
m_stat->st_mtime,
0,
false,
false
));
}
} //unnamed namespace
mchlib::TigerHash hash (const std::string& parPath) {
using mchlib::FileRecordData;
using HashingTaskPtr = std::shared_ptr<stask::Hashing>;
struct stat path_stat;
const int retval = stat(parPath.c_str(), &path_stat);
if (retval) {
throw std::runtime_error("Can't access file \"" + parPath + "\"");
}
std::shared_ptr<stask::Base<std::vector<FileRecordData>>> file_src_task;
if (S_ISDIR(path_stat.st_mode)) {
file_src_task.reset(new stask::DirTree(parPath));
}
else {
assert(S_ISREG(path_stat.st_mode));
file_src_task.reset(new SingleFileTask(parPath, &path_stat));
}
auto hashing = HashingTaskPtr(new stask::Hashing(file_src_task, false));
return hashing->get_or_create().front().hash;
}
} //namespace din

28
src/locate/hash.hpp Normal file
View file

@ -0,0 +1,28 @@
/* Copyright 2015, 2016, Michele Santullo
* This file is part of "dindexer".
*
* "dindexer" is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* "dindexer" is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef id3F3E29B28FAA44A190451198CF1FD166
#define id3F3E29B28FAA44A190451198CF1FD166
#include "dindexer-machinery/tiger.hpp"
#include <vector>
namespace din {
mchlib::TigerHash hash ( const std::string& parPath );
} //namespace din
#endif

View file

@ -19,6 +19,7 @@
#include "postgre_locate.hpp"
#include "dindexer-common/settings.hpp"
#include "dindexerConfig.h"
#include "hash.hpp"
#include <iostream>
#include <ciso646>
#include <iterator>
@ -73,7 +74,15 @@ int main (int parArgc, char* parArgv[]) {
std::copy(results.begin(), results.end(), std::ostream_iterator<din::LocatedSet>(std::cout, "\n"));
}
else {
const auto results = din::locate_in_db(settings.db, vm["substring"].as<std::string>(), not not vm.count("case-insensitive"));
std::vector<din::LocatedItem> results;
if (vm.count("byhash")) {
const auto hash = din::hash(vm["substring"].as<std::string>());
results = din::locate_in_db(settings.db, hash);
}
else {
results = din::locate_in_db(settings.db, vm["substring"].as<std::string>(), not not vm.count("case-insensitive"));
}
std::copy(results.begin(), results.end(), std::ostream_iterator<din::LocatedItem>(std::cout, "\n"));
}
return 0;

View file

@ -17,6 +17,7 @@
#include "postgre_locate.hpp"
#include "pq/connection.hpp"
#include "dindexer-machinery/tiger.hpp"
#include <utility>
#include <sstream>
#include <boost/utility/string_ref.hpp>
@ -53,10 +54,25 @@ namespace din {
return std::move(retval);
}
std::vector<LocatedItem> file_result_to_vec (pq::ResultSet&& parResult) {
using boost::lexical_cast;
std::vector<LocatedItem> retval;
retval.reserve(parResult.size());
for (const auto& record : parResult) {
retval.push_back(LocatedItem{
record["path"],
lexical_cast<decltype(LocatedItem::id)>(record["id"]),
lexical_cast<decltype(LocatedItem::group_id)>(record["group_id"])
});
}
return std::move(retval);
}
} //unnamed namespace
std::vector<LocatedItem> locate_in_db (const dinlib::SettingsDB& parDB, const std::string& parSearch, bool parCaseInsensitive) {
using boost::lexical_cast;
using boost::string_ref;
namespace ba = boost::algorithm;
@ -78,17 +94,15 @@ namespace din {
oss << "LIMIT " << g_max_results << ';';
auto result = conn.query(oss.str());
std::vector<LocatedItem> retval;
retval.reserve(result.size());
for (const auto& record : result) {
retval.push_back(LocatedItem{
record["path"],
lexical_cast<decltype(LocatedItem::id)>(record["id"]),
lexical_cast<decltype(LocatedItem::group_id)>(record["group_id"])
});
}
return file_result_to_vec(std::move(result));
}
return std::move(retval);
std::vector<LocatedItem> locate_in_db (const dinlib::SettingsDB& parDB, const mchlib::TigerHash& parSearch) {
const std::string query = std::string("SELECT \"path\",\"id\",\"group_id\" FROM \"files\" WHERE \"hash\"=$1 LIMIT ") + boost::lexical_cast<std::string>(g_max_results) + ';';
auto conn = make_pq_conn(parDB);
auto result = conn.query(query, mchlib::tiger_to_string(parSearch, true));
return file_result_to_vec(std::move(result));
}
std::vector<LocatedSet> locate_sets_in_db (const dinlib::SettingsDB& parDB, const std::string& parSearch, bool parCaseInsensitive) {

View file

@ -23,6 +23,10 @@
#include <string>
#include <cstdint>
namespace mchlib {
struct TigerHash;
} //namespace mchlib
namespace din {
struct LocatedItem {
std::string path;
@ -38,6 +42,7 @@ namespace din {
};
std::vector<LocatedItem> locate_in_db ( const dinlib::SettingsDB& parDB, const std::string& parSearch, bool parCaseInsensitive );
std::vector<LocatedItem> locate_in_db ( const dinlib::SettingsDB& parDB, const mchlib::TigerHash& parSearch );
std::vector<LocatedSet> locate_sets_in_db ( const dinlib::SettingsDB& parDB, const std::string& parSearch, bool parCaseInsensitive );
std::vector<LocatedSet> locate_sets_in_db ( const dinlib::SettingsDB& parDB, const std::string& parSearch, const std::vector<uint32_t>& parSets, bool parCaseInsensitive );
} //namespace din

View file

@ -4,7 +4,6 @@ include(WithMediaAutodetect)
find_package(Magic REQUIRED)
add_library(${PROJECT_NAME} SHARED
indexer.cpp
pathname.cpp
tiger.c
tiger.cpp
@ -12,10 +11,17 @@ add_library(${PROJECT_NAME} SHARED
filesearcher.cpp
discinfo.cpp
mediatype.cpp
mediatypes.cpp
machinery_info.cpp
guess_content_type.cpp
set_listing.cpp
globbing.cpp
scantask/dirtree.cpp
scantask/mediatype.cpp
scantask/hashing.cpp
scantask/contenttype.cpp
scantask/mime.cpp
scantask/setbasic.cpp
)
#target_include_directories(${PROJECT_NAME}
@ -29,6 +35,10 @@ target_link_libraries(${PROJECT_NAME}
PRIVATE ${MAGIC_LIBRARIES}
)
target_include_directories(${PROJECT_NAME}
PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}
)
if (DINDEXER_WITH_MEDIA_AUTODETECT)
target_include_directories(${PROJECT_NAME} SYSTEM
PRIVATE ${BLKID_INCLUDE_DIRS}

View file

@ -15,7 +15,7 @@
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
*/
#include "dindexer-machinery/filesearcher.hpp"
#include "filesearcher.hpp"
#if !defined(_XOPEN_SOURCE)
#define _XOPEN_SOURCE 500
@ -59,6 +59,7 @@ namespace fastf {
st.is_symlink = parSymlink;
st.atime = parStat->st_atime;
st.mtime = parStat->st_mtime;
st.size = 0;
if (not (*g_searchOptions.callback)(parPath, st))
return FTW_STOP;
@ -81,6 +82,7 @@ namespace fastf {
st.is_symlink = parSymlink;
st.atime = parStat->st_atime;
st.mtime = parStat->st_mtime;
st.size = parStat->st_size;
if (extensions.empty()) {
if (not (*g_searchOptions.callback)(parPath, st)) {

View file

@ -19,12 +19,16 @@
#define id4A7D7AB671954418939FC0BDA19C5B3F
#include <ctime>
#include <cstdint>
namespace fastf {
struct FileStats {
int level;
static_assert(sizeof(std::time_t) >= sizeof(uint64_t), "Reorder members or comment out this assertion");
std::time_t atime;
std::time_t mtime;
uint64_t size;
uint16_t level;
bool is_dir;
bool is_symlink;
};

View file

@ -42,7 +42,7 @@ namespace mchlib {
};
struct EntryChecking {
typedef bool(*CheckerFunction)(dinlib::MediaTypes, const ConstSetListingView&, const std::vector<const FileRecordData*>&);
typedef bool(*CheckerFunction)(MediaTypes, const ConstSetListingView&, const std::vector<const FileRecordData*>&);
std::size_t max_total_entries;
CheckerFunction checker_func;
@ -87,8 +87,8 @@ namespace mchlib {
return std::move(retval);
}
bool identify_video_dvd (dinlib::MediaTypes parMediaType, const ConstSetListingView& parContent, const std::vector<const FileRecordData*>& parFlatContent ) {
if (parMediaType != dinlib::MediaType_DVD and parMediaType != dinlib::MediaType_Directory)
bool identify_video_dvd (MediaTypes parMediaType, const ConstSetListingView& parContent, const std::vector<const FileRecordData*>& parFlatContent ) {
if (parMediaType != MediaType_DVD and parMediaType != MediaType_Directory)
return false;
const auto items_count = count_listing_items(parContent);
@ -103,8 +103,8 @@ namespace mchlib {
return check_missing_content(parFlatContent, should_have).empty();
}
bool identify_video_cd (dinlib::MediaTypes parMediaType, const ConstSetListingView& parContent, const std::vector<const FileRecordData*>& parFlatContent) {
if (parMediaType != dinlib::MediaType_CDRom and parMediaType != dinlib::MediaType_Directory)
bool identify_video_cd (MediaTypes parMediaType, const ConstSetListingView& parContent, const std::vector<const FileRecordData*>& parFlatContent) {
if (parMediaType != MediaType_CDRom and parMediaType != MediaType_Directory)
return false;
const auto items_count = count_listing_items(parContent);
@ -121,7 +121,7 @@ namespace mchlib {
}
} //unnamed namespace
ContentTypes guess_content_type (dinlib::MediaTypes parMediaType, const ConstSetListingView& parContent, std::size_t parEntriesCount) {
ContentTypes guess_content_type (MediaTypes parMediaType, const ConstSetListingView& parContent, std::size_t parEntriesCount) {
if (boost::empty(parContent))
return ContentType_Empty;
@ -145,7 +145,7 @@ namespace mchlib {
return ContentType_Generic;
}
ContentTypes guess_content_type (dinlib::MediaTypes parMediaType, const std::vector<FileRecordData>& parContent) {
ContentTypes guess_content_type (MediaTypes parMediaType, const std::vector<FileRecordData>& parContent) {
if (parContent.empty())
return ContentType_Empty;
@ -155,7 +155,7 @@ namespace mchlib {
assert(std::equal(parContent.begin(), parContent.end(), SetListing(std::vector<FileRecordData>(parContent)).sorted_list().begin()));
//TODO: assert that the first item in the list is the shortest string
std::shared_ptr<PathName> pathname(new PathName(parContent.front().abs_path));
std::shared_ptr<PathName> pathname(new PathName(""));
ConstSetListingView view(parContent.begin(), parContent.end(), pathname->atom_count(), pathname);
assert(parContent.size() >= 1);
return guess_content_type(parMediaType, view, parContent.size() - 1);

View file

@ -1,526 +0,0 @@
/* Copyright 2015, 2016, Michele Santullo
* This file is part of "dindexer".
*
* "dindexer" is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* "dindexer" is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
*/
//WARNING: buggy code - intermediate hash for directories that contain files
//is likely wrong!
//#define USE_LEGACY_HASH_DIR
#include "dindexer-machinery/indexer.hpp"
#include "pathname.hpp"
#include "dindexer-machinery/tiger.hpp"
#include "dindexer-common/settings.hpp"
#include "dindexer-machinery/filestats.hpp"
#include "mimetype.hpp"
#include "dindexer-machinery/recorddata.hpp"
#if !defined(USE_LEGACY_HASH_DIR)
# include "dindexer-machinery/set_listing.hpp"
#endif
#include <algorithm>
#include <functional>
#include <stdexcept>
#if defined(WITH_PROGRESS_FEEDBACK)
# include <atomic>
# include <condition_variable>
#endif
#include <cstdint>
#include <ciso646>
#include <cassert>
#include <boost/iterator/filter_iterator.hpp>
#include <sstream>
#include <iterator>
#if defined(INDEXER_VERBOSE)
# include <iostream>
#endif
#include <boost/utility/string_ref.hpp>
#include <boost/range/empty.hpp>
namespace mchlib {
using HashType = decltype(FileRecordData::hash);
namespace {
typedef std::vector<FileRecordData>::iterator FileEntryIt;
void append_to_vec (std::vector<char>& parDest, const HashType& parHash, const std::string& parString) {
const auto old_size = parDest.size();
parDest.resize(old_size + sizeof(HashType) + parString.size());
std::copy(parHash.byte_data, parHash.byte_data + sizeof(HashType), parDest.begin() + old_size);
std::copy(parString.begin(), parString.end(), parDest.begin() + old_size + sizeof(HashType));
}
void append_to_vec (std::vector<char>& parDest, const std::string& parString) {
const auto old_size = parDest.size();
parDest.resize(old_size + parString.size());
std::copy(parString.begin(), parString.end(), parDest.begin() + old_size);
}
#if !defined(USE_LEGACY_HASH_DIR)
void hash_dir (FileRecordData& parEntry, MutableSetListingView& parList, const PathName& parCurrDir, MimeType& parMime, bool parIgnoreErrors) {
assert(parEntry.is_directory);
parEntry.mime_full = parMime.analyze(parEntry.abs_path);
//Build a blob with the hashes and filenames of every directory that
//is a direct child of current entry
std::vector<char> dir_blob;
#if defined(INDEXER_VERBOSE)
std::cout << "Making initial hash for " << parCurrDir << "...\n";
#endif
for (auto it = parList.begin(); it != parList.end(); ++it) {
assert(parCurrDir == PathName(it->abs_path).pop_right());
PathName curr_subdir(it->abs_path);
const std::string relpath = make_relative_path(parCurrDir, curr_subdir).path();
if (it->is_directory) {
auto cd_list = MutableSetListingView(it);
assert(boost::empty(cd_list) or cd_list.begin()->abs_path != it->abs_path);
hash_dir(*it, cd_list, curr_subdir, parMime, parIgnoreErrors);
append_to_vec(dir_blob, it->hash, relpath);
}
else {
append_to_vec(dir_blob, relpath);
}
}
tiger_data(dir_blob, parEntry.hash);
parEntry.size = 0;
#if defined(INDEXER_VERBOSE)
std::cout << "Got intermediate hash for dir " << parCurrDir <<
": " << tiger_to_string(parEntry.hash) <<
' ' << parEntry.mime_type << '\n';
#endif
//Now with the initial hash ready, let's start hashing files, if any
for (auto it = first_file(parList); it != parList.end(); ++it) {
assert(not it->is_directory);
#if defined(INDEXER_VERBOSE)
std::cout << "Hashing file " << it->abs_path << "...";
#endif
//TODO: notify callback
try {
tiger_file(it->abs_path, it->hash, parEntry.hash, it->size);
it->hash_valid = true;
it->mime_full = parMime.analyze(it->abs_path);
auto mime_pair = split_mime(it->mime_full);
it->mime_type = mime_pair.first;
it->mime_charset = mime_pair.second;
}
catch (const std::ios_base::failure& e) {
if (parIgnoreErrors) {
it->unreadable = true;
it->hash = HashType {};
if (it->mime_full.get().empty()) {
it->mime_full = "unknown";
it->mime_type = boost::string_ref(it->mime_full.get());
it->mime_charset = boost::string_ref(it->mime_full.get());
}
}
else {
throw e;
}
}
#if defined(INDEXER_VERBOSE)
std::cout << ' ' << tiger_to_string(it->hash) << ' ' <<
"Mime type: \"" << it->mime_type << "\"\n";
#endif
}
#if defined(INDEXER_VERBOSE)
std::cout << "Final hash for dir " << parCurrDir << " is " << tiger_to_string(parEntry.hash) << '\n';
#endif
parEntry.hash_valid = true;
{
parEntry.mime_full = parMime.analyze(parEntry.abs_path);
auto mime_pair = split_mime(parEntry.mime_full);
parEntry.mime_type = mime_pair.first;
parEntry.mime_charset = mime_pair.second;
}
}
#endif
#if defined(USE_LEGACY_HASH_DIR)
void hash_dir (FileEntryIt parEntry, FileEntryIt parBegin, FileEntryIt parEnd, const PathName& parCurrDir, std::function<void(std::size_t)> parNextItemCallback, bool parIgnoreErrors, MimeType& parMime) {
assert(parEntry != parEnd);
assert(parEntry->is_directory);
FileRecordData& curr_entry = *parEntry;
auto& curr_entry_it = parEntry;
curr_entry.mime_full = parMime.analyze(curr_entry.abs_path);
//Build a blob with the hashes and filenames of every directory that
//is a direct child of current entry
{
std::vector<char> dir_blob;
auto it_entry = curr_entry_it;
while (
it_entry != parEnd and (
it_entry->level == curr_entry.level
or parCurrDir != PathName(it_entry->abs_path).pop_right()
//and (not it_entry->is_dir or (it_entry->level <= curr_entry.level
//and parCurrDir != PathName(it_entry->path).pop_right()))
)) {
assert(it_entry->level >= curr_entry.level);
++it_entry;
}
#if defined(INDEXER_VERBOSE)
std::cout << "Making initial hash for " << parCurrDir << "...\n";
#endif
while (parEnd != it_entry and it_entry->level == curr_entry_it->level + 1 and parCurrDir == PathName(it_entry->abs_path).pop_right()) {
PathName curr_subdir(it_entry->abs_path);
const std::string relpath = make_relative_path(parCurrDir, curr_subdir).path();
if (it_entry->is_directory) {
hash_dir(it_entry, parBegin, parEnd, curr_subdir, parNextItemCallback, parIgnoreErrors, parMime);
append_to_vec(dir_blob, it_entry->hash, relpath);
}
else {
append_to_vec(dir_blob, relpath);
}
++it_entry;
}
tiger_data(dir_blob, curr_entry.hash);
curr_entry.size = 0;
#if defined(INDEXER_VERBOSE)
std::cout << "Got intermediate hash for dir " << parCurrDir <<
": " << tiger_to_string(curr_entry.hash) <<
' ' << curr_entry.mime_type << '\n';
#endif
}
//Now with the initial hash ready, let's start hashing files, if any
{
auto it_entry = curr_entry_it;
while (
it_entry != parEnd
and (it_entry->is_directory
or it_entry->level != curr_entry_it->level + 1
or PathName(it_entry->abs_path).pop_right() != parCurrDir
)
) {
++it_entry;
}
while (it_entry != parEnd and not it_entry->is_directory and it_entry->level == curr_entry_it->level + 1 and PathName(it_entry->abs_path).pop_right() == parCurrDir) {
assert(not it_entry->is_directory);
#if defined(INDEXER_VERBOSE)
std::cout << "Hashing file " << it_entry->abs_path << "...";
#endif
parNextItemCallback(it_entry - parBegin);
try {
tiger_file(it_entry->abs_path, it_entry->hash, curr_entry_it->hash, it_entry->size);
it_entry->hash_valid = true;
it_entry->mime_full = parMime.analyze(it_entry->abs_path);
auto mime_pair = split_mime(it_entry->mime_full);
it_entry->mime_type = mime_pair.first;
it_entry->mime_charset = mime_pair.second;
}
catch (const std::ios_base::failure& e) {
if (parIgnoreErrors) {
it_entry->unreadable = true;
it_entry->hash = HashType {};
if (it_entry->mime_full.get().empty()) {
it_entry->mime_full = "unknown";
it_entry->mime_type = boost::string_ref(it_entry->mime_full.get());
it_entry->mime_charset = boost::string_ref(it_entry->mime_full.get());
}
}
else {
throw e;
}
}
#if defined(INDEXER_VERBOSE)
std::cout << ' ' << tiger_to_string(it_entry->hash) << ' ' <<
"Mime type: \"" << it_entry->mime_type << "\"\n";
#endif
++it_entry;
}
}
#if defined(INDEXER_VERBOSE)
std::cout << "Final hash for dir " << parCurrDir << " is " << tiger_to_string(curr_entry_it->hash) << '\n';
#endif
curr_entry_it->hash_valid = true;
{
curr_entry_it->mime_full = parMime.analyze(curr_entry_it->abs_path);
auto mime_pair = split_mime(curr_entry_it->mime_full);
curr_entry_it->mime_type = mime_pair.first;
curr_entry_it->mime_charset = mime_pair.second;
}
}
#endif
template <bool FileTrue=true>
struct IsFile {
bool operator() ( const FileRecordData& parEntry ) const { return parEntry.is_directory xor FileTrue; }
};
FileRecordData make_file_record_data (const char* parPath, const fastf::FileStats& parSt) {
return FileRecordData(
parPath,
parSt.atime,
parSt.mtime,
parSt.level,
parSt.is_dir,
parSt.is_symlink
);
}
bool file_record_data_lt (const FileRecordData& parLeft, const FileRecordData& parRight) {
const FileRecordData& l = parLeft;
const FileRecordData& r = parRight;
return
(l.level < r.level)
or (l.level == r.level and l.is_directory and not r.is_directory)
or (l.level == r.level and l.is_directory == r.is_directory and l.abs_path < r.abs_path)
//sort by directory - parent first, children later
//(level == o.level and is_dir and not o.is_dir)
//or (level == o.level and is_dir == o.is_dir and path < o.path)
//or (level > o.level + 1)
//or (level + 1 == o.level and is_dir and not o.is_dir and path < o.path)
//or (level + 1 == o.level and is_dir and not o.is_dir and path == PathName(o.path).dirname())
//or (level == o.level + 1 and not (o.is_dir and not is_dir and o.path == PathName(path).dirname()))
;
}
void populate_rel_paths (const PathName& parBase, std::vector<FileRecordData>& parItems) {
const std::size_t offset = parBase.str_path_size() + 1;
for (FileRecordData& itm : parItems) {
const auto curr_offset = std::min(offset, itm.abs_path.size());
itm.path = boost::string_ref(itm.abs_path).substr(curr_offset);
assert(itm.path.data());
}
}
} //unnamed namespace
struct Indexer::LocalData {
typedef std::vector<FileRecordData> PathList;
PathList paths;
#if defined(WITH_PROGRESS_FEEDBACK)
std::atomic<std::size_t> done_count;
std::atomic<std::size_t> processing_index;
std::condition_variable step_notify;
#endif
std::size_t file_count;
bool ignore_read_errors;
};
Indexer::Indexer() :
m_local_data(new LocalData)
{
#if !defined(NDEBUG)
//assert(FileEntry("/a/b/c", 3, true, false) < FileEntry("/a/b", 2, true, false));
//assert(FileEntry("/a/b/c", 3, true, false) < FileEntry("/a/b/c/file.txt", 4, false, false));
//assert(FileEntry("/a/b/c", 3, true, false) < FileEntry("/a/b/c/file.c", 4, false, false));
//assert(FileEntry("/a/b/c/d", 4, true, false) < FileEntry("/a/b", 2, true, false));
//assert(FileEntry("/a/b/c/d", 4, true, false) < FileEntry("/a/b/c", 3, true, false));
//assert(FileEntry("/a/b/c/1.txt", 4, true, false) < FileEntry("/a/b/c/2.txt", 4, true, false));
//assert(not (FileEntry("/a/b/file.txt", 3, false, false) < FileEntry("/a/b", 2, true, false)));
//assert(not (FileEntry("/a", 1, true, false) < FileEntry("/a/b", 2, true, false)));
//assert(not (FileEntry("/a/b/1.txt", 3, false, false) < FileEntry("/a/b/c/f.txt", 4, true, false)));
//assert(not (FileEntry("/a/b/c/file.c", 4, false, false) < FileEntry("/a/b/c", 3, true, false)));
#endif
#if defined(WITH_PROGRESS_FEEDBACK)
m_local_data->done_count = 0;
m_local_data->processing_index = 0;
#endif
m_local_data->file_count = 0;
}
Indexer::~Indexer() noexcept {
}
std::size_t Indexer::total_items() const {
return m_local_data->file_count;
}
#if defined(WITH_PROGRESS_FEEDBACK)
std::size_t Indexer::processed_items() const {
return m_local_data->done_count;
}
#endif
void Indexer::calculate_hash() {
PathName base_path(m_local_data->paths.front().abs_path);
std::sort(m_local_data->paths.begin(), m_local_data->paths.end(), &file_record_data_lt);
MimeType mime;
#if defined(INDEXER_VERBOSE)
for (auto& itm : m_local_data->paths) {
itm.hash.part_a = 1;
itm.hash.part_b = 1;
itm.hash.part_c = 1;
if (itm.is_directory)
std::cout << "(D) ";
else
std::cout << "(F) ";
std::cout << itm.abs_path << " (" << itm.level << ")\n";
}
std::cout << "-----------------------------------------------------\n";
#endif
#if !defined(USE_LEGACY_HASH_DIR)
MutableSetListingView recordlist(m_local_data->paths.begin(), m_local_data->paths.end(), base_path.atom_count());
#endif
#if defined(WITH_PROGRESS_FEEDBACK)
m_local_data->done_count = 0;
hash_dir(
#if defined(USE_LEGACY_HASH_DIR)
m_local_data->paths.begin(),
m_local_data->paths.begin(),
m_local_data->paths.end(),
base_path,
[=](std::size_t parNext) {
++m_local_data->done_count;
m_local_data->processing_index = parNext;
m_local_data->step_notify.notify_all();
},
m_local_data->ignore_read_errors,
mime
#else
m_local_data->paths.front(),
recordlist,
base_path,
mime,
m_local_data->ignore_read_errors
#endif
);
//TODO: re-enable after hash_dir sends progress notifications again
//assert(m_local_data->done_count == m_local_data->file_count);
#else
hash_dir(
#if defined(USE_LEGACY_HASH_DIR)
m_local_data->paths.begin(),
m_local_data->paths.begin(),
m_local_data->paths.end(),
base_path,
[](std::size_t) {},
m_local_data->ignore_read_errors,
mime
#else
m_local_data->paths.front(),
recordlist,
base_path,
mime,
m_local_data->ignore_read_errors
#endif
);
#endif
populate_rel_paths(base_path, m_local_data->paths);
#if defined(INDEXER_VERBOSE)
for (const auto& itm : m_local_data->paths) {
assert(not (1 == itm.hash.part_a and 1 == itm.hash.part_b and 1 == itm.hash.part_c));
}
#endif
}
bool Indexer::add_path (const char* parPath, const fastf::FileStats& parStats) {
auto it_before = SetListing::lower_bound(
m_local_data->paths,
parPath,
parStats.level,
parStats.is_dir
);
m_local_data->paths.insert(
it_before,
make_file_record_data(parPath, parStats)
);
if (not parStats.is_dir) {
++m_local_data->file_count;
}
return true;
}
#if defined(INDEXER_VERBOSE)
void Indexer::dump() const {
PathName base_path(m_local_data->paths.front().abs_path);
std::cout << "---------------- FILE LIST ----------------\n";
for (const auto& cur_itm : m_local_data->paths) {
if (not cur_itm.is_directory) {
PathName cur_path(cur_itm.abs_path);
std::cout << make_relative_path(base_path, cur_path).path() << '\n';
}
}
std::cout << "---------------- DIRECTORY LIST ----------------\n";
for (const auto& cur_itm : m_local_data->paths) {
if (cur_itm.is_directory) {
PathName cur_path(cur_itm.abs_path);
std::cout << make_relative_path(base_path, cur_path).path() << '\n';
}
}
}
#endif
bool Indexer::empty() const {
return m_local_data->paths.size() < 2;
}
#if defined(WITH_PROGRESS_FEEDBACK)
std::condition_variable& Indexer::step_notify() {
return m_local_data->step_notify;
}
#endif
#if defined(WITH_PROGRESS_FEEDBACK)
std::string Indexer::current_item() const {
if (m_local_data->paths.empty() or 0 == m_local_data->processing_index)
return std::string();
PathName base_path(m_local_data->paths.front().abs_path);
PathName ret_path(m_local_data->paths[m_local_data->processing_index].abs_path);
return make_relative_path(base_path, ret_path).path();
}
#endif
std::string Indexer::operator[] (std::size_t parIndex) const {
if (parIndex >= m_local_data->file_count) {
std::ostringstream oss;
oss << "Requested index " << parIndex << " is out of range: only " << m_local_data->file_count << " items are available";
throw std::out_of_range(oss.str());
}
auto it = boost::make_filter_iterator<IsFile<>>(m_local_data->paths.begin(), m_local_data->paths.end());
assert(not m_local_data->paths.empty());
std::advance(it, parIndex);
return make_relative_path(PathName(m_local_data->paths.front().abs_path), PathName(it->abs_path)).path();
}
void Indexer::ignore_read_errors (bool parIgnore) {
m_local_data->ignore_read_errors = parIgnore;
}
const std::vector<FileRecordData>& Indexer::record_data() const {
#if defined(WITH_PROGRESS_FEEDBACK)
//TODO: re-enable after hash_dir sends progress notifications again
//assert(m_local_data->done_count == m_local_data->file_count);
#endif
return m_local_data->paths;
}
} //namespace mchlib

View file

@ -40,23 +40,23 @@ namespace mchlib {
{
}
dinlib::MediaTypes guess_media_type (std::string&& parPath) {
MediaTypes guess_media_type (std::string&& parPath) {
DiscInfo info(std::move(parPath));
const DriveTypes drive_type = info.drive_type();
if (DriveType_HardDisk == drive_type) {
if (info.mountpoint() == PathName(info.original_path()).path())
return dinlib::MediaType_HardDisk;
return MediaType_HardDisk;
else
return dinlib::MediaType_Directory;
return MediaType_Directory;
}
else if (DriveType_Optical == drive_type) {
switch (info.optical_type()) {
case OpticalType_DVD:
return dinlib::MediaType_DVD;
return MediaType_DVD;
case OpticalType_CDRom:
return dinlib::MediaType_CDRom;
return MediaType_CDRom;
case OpticalType_BluRay:
return dinlib::MediaType_BluRay;
return MediaType_BluRay;
default:
throw UnknownMediaTypeException("Set autodetect failed because this media type is unknown, please specify the set type manually");
}

View file

@ -15,11 +15,11 @@
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
*/
#include "dindexer-common/mediatypes.hpp"
#include "dindexer-machinery/mediatypes.hpp"
#include <map>
#include <stdexcept>
namespace dinlib {
namespace mchlib {
const std::string& media_type_to_str (MediaTypes parType) {
static const std::map<MediaTypes, const std::string> types {
{MediaType_CDRom, "CD-Rom"},
@ -43,4 +43,8 @@ namespace dinlib {
MediaTypes char_to_media_type (char parMType) {
return static_cast<MediaTypes>(parMType);
}
} //namespace dinlib
char media_type_to_char (MediaTypes parMType) {
return static_cast<char>(parMType);
}
} //namespace mchlib

View file

@ -204,6 +204,17 @@ namespace mchlib {
return parStream;
}
const boost::string_ref basename (const PathName& parPath) {
static const char* const empty = "";
const auto sz = parPath.atom_count();
if (not sz) {
return boost::string_ref(empty);
}
assert(sz > 0);
return parPath[sz - 1];
}
PathName& PathName::pop_right() {
m_pool.pop();
return *this;

View file

@ -59,6 +59,7 @@ namespace mchlib {
PathName make_relative_path ( const PathName& parBasePath, const PathName& parOtherPath );
std::ostream& operator<< ( std::ostream& parStream, const PathName& parPath );
const boost::string_ref basename ( const PathName& parPath );
} //namespace mchlib
#endif

View file

@ -0,0 +1,46 @@
/* Copyright 2015, 2016, Michele Santullo
* This file is part of "dindexer".
*
* "dindexer" is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* "dindexer" is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
*/
#include "dindexer-machinery/scantask/contenttype.hpp"
#include "dindexer-machinery/guess_content_type.hpp"
#include <cassert>
namespace mchlib {
namespace scantask {
ContentType::ContentType (SetTaskType parSet, DirTreeTaskPtr parDirTree, MediaTypeTaskPtr parMediaType) :
m_set_task(parSet),
m_dir_tree(parDirTree),
m_media_type(parMediaType)
{
assert(m_set_task);
assert(m_dir_tree);
assert(m_media_type);
}
SetRecordDataFull& ContentType::on_data_get() {
return m_set_task->get_or_create();
}
void ContentType::on_data_fill() {
auto& data = m_set_task->get_or_create();
auto media_type = char_to_media_type(m_media_type->get_or_create().type);
const auto& tree = m_dir_tree->get_or_create();
const auto cont_type = mchlib::guess_content_type(media_type, tree);
data.content_type = content_type_to_char(cont_type);
}
} //namespace scantask
} //namespace mchlib

View file

@ -0,0 +1,107 @@
/* Copyright 2015, 2016, Michele Santullo
* This file is part of "dindexer".
*
* "dindexer" is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* "dindexer" is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
*/
#include "dindexer-machinery/scantask/dirtree.hpp"
#include "dindexer-machinery/recorddata.hpp"
#include "dindexer-machinery/set_listing.hpp"
#include "helpers/compatibility.h"
#include "filesearcher.hpp"
#include "pathname.hpp"
#include <utility>
#include <cassert>
#include <ciso646>
#include <functional>
#include <algorithm>
namespace mchlib {
namespace {
std::size_t calc_rel_path_offs ( const PathName& parRoot, boost::string_ref parPath ) a_pure;
std::size_t calc_rel_path_offs (const PathName& parRoot, boost::string_ref parPath) {
PathName path(parPath);
PathName rel_path = make_relative_path(parRoot, path);
const auto rel_path_len = rel_path.str_path_size();
const auto path_len = path.str_path_size();
assert(rel_path_len <= path_len);
return path_len - rel_path_len;
}
bool add_path (scantask::DirTree::PathList& parOut, const PathName& parRoot, const char* parPath, const fastf::FileStats& parStats) {
using boost::string_ref;
auto it_before = SetListing::lower_bound(
parOut,
parPath,
parStats.level,
parStats.is_dir
);
//std::string curr_path(parPath);
//const std::size_t offset = parBase.str_path_size() + 1;
//for (FileRecordData& itm : parItems) {
// const auto curr_offset = std::min(parRelPathOffs, curr_path.size());
// itm.path = boost::string_ref(itm.abs_path).substr(curr_offset);
// assert(itm.path.data());
//}
parOut.insert(
it_before,
FileRecordData(
parPath,
calc_rel_path_offs(parRoot, string_ref(parPath)),
parStats.atime,
parStats.mtime,
static_cast<uint16_t>(parStats.level),
static_cast<bool>(parStats.is_dir),
static_cast<bool>(parStats.is_symlink)
)
);
return true;
}
}
namespace scantask {
DirTree::DirTree (std::string parRoot) :
m_root(std::move(parRoot))
{
assert(not m_root.empty());
}
void DirTree::on_data_destroy (PathList& parData) {
parData.clear();
}
void DirTree::on_data_create (PathList& parData) {
using std::placeholders::_1;
using std::placeholders::_2;
using boost::string_ref;
assert(parData.empty());
fastf::FileSearcher searcher(m_root);
fastf::FileSearcher::ConstCharVecType ext, ignore;
searcher.SetFollowSymlinks(true);
searcher.SetCallback(
fastf::FileSearcher::CallbackType(
std::bind(&add_path, std::ref(parData), PathName(string_ref(m_root)), _1, _2)
)
);
searcher.Search(ext, ignore);
}
} //namespace scantask
} //namespace mchlib

View file

@ -0,0 +1,193 @@
/* Copyright 2015, 2016, Michele Santullo
* This file is part of "dindexer".
*
* "dindexer" is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* "dindexer" is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
*/
#include "dindexer-machinery/scantask/hashing.hpp"
#include "dindexer-machinery/recorddata.hpp"
#include "dindexer-machinery/set_listing.hpp"
#include "dindexer-machinery/tiger.hpp"
#include "pathname.hpp"
#include <cassert>
#include <boost/range/empty.hpp>
#include <boost/utility/string_ref.hpp>
//#define INDEXER_VERBOSE
#if defined(INDEXER_VERBOSE)
# include <iostream>
#endif
namespace mchlib {
namespace {
struct ProgressInfo {
scantask::Hashing::ProgressCallback callback;
boost::string_ref curr_path;
uint64_t file_bytes_read;
uint64_t total_bytes_read;
uint32_t file_num;
void notify ( void ) {
callback(curr_path, file_bytes_read, total_bytes_read, file_num);
}
};
void append_to_vec (std::vector<char>& parDest, const TigerHash& parHash, boost::string_ref parString) {
const auto old_size = parDest.size();
parDest.resize(old_size + sizeof(TigerHash) + parString.size());
std::copy(parHash.byte_data, parHash.byte_data + sizeof(TigerHash), parDest.begin() + old_size);
std::copy(parString.begin(), parString.end(), parDest.begin() + old_size + sizeof(TigerHash));
}
void append_to_vec (std::vector<char>& parDest, boost::string_ref parString) {
const auto old_size = parDest.size();
parDest.resize(old_size + parString.size());
std::copy(parString.begin(), parString.end(), parDest.begin() + old_size);
}
void hash_dir (FileRecordData& parEntry, MutableSetListingView& parList, bool parIgnoreErrors, ProgressInfo& parProgressInfo) {
assert(parEntry.is_directory);
//Build a blob with the hashes and filenames of every directory that
//is a direct child of current entry
std::vector<char> dir_blob;
#if defined(INDEXER_VERBOSE)
std::cout << "Making initial hash for " << parEntry.abs_path << "...\n";
#endif
for (auto it = parList.begin(); it != parList.end(); ++it) {
assert(PathName(parEntry.abs_path) == PathName(it->abs_path).pop_right());
PathName curr_path(it->path);
const auto basename = mchlib::basename(curr_path);
if (it->is_directory) {
auto cd_list = MutableSetListingView(it);
assert(boost::empty(cd_list) or cd_list.begin()->abs_path != it->abs_path);
hash_dir(*it, cd_list, parIgnoreErrors, parProgressInfo);
append_to_vec(dir_blob, it->hash, basename);
}
else {
append_to_vec(dir_blob, basename);
}
}
tiger_data(dir_blob, parEntry.hash);
#if defined(INDEXER_VERBOSE)
std::cout << "Got intermediate hash for dir " << parEntry.abs_path <<
": " << tiger_to_string(parEntry.hash) <<
' ' << parEntry.mime_type << '\n';
#endif
//Now with the initial hash ready, let's start hashing files, if any
for (auto it = first_file(parList); it != parList.end(); ++it) {
assert(not it->is_directory);
#if defined(INDEXER_VERBOSE)
std::cout << "Hashing file " << it->abs_path << "...\n";
#endif
//TODO: notify callback
try {
++parProgressInfo.file_num;
parProgressInfo.curr_path = it->abs_path;
parProgressInfo.notify();
tiger_file(it->abs_path, it->hash, parEntry.hash, it->size);
it->hash_valid = true;
parProgressInfo.total_bytes_read += it->size;
}
catch (const std::ios_base::failure& e) {
if (parIgnoreErrors) {
it->unreadable = true;
it->hash = TigerHash {};
}
else {
throw e;
}
}
}
#if defined(INDEXER_VERBOSE)
std::cout << "Final hash for dir " << parEntry.abs_path << " is " << tiger_to_string(parEntry.hash) << '\n';
#endif
parEntry.hash_valid = true;
}
void dummy_progress_callback (const boost::string_ref /*parPath*/, uint64_t /*parFileBytes*/, uint64_t /*parTotalBytes*/, uint32_t /*parFileNum*/) {
}
} //unnamed namespace
namespace scantask {
Hashing::Hashing (std::shared_ptr<FileTreeBase> parFileTree, bool parIgnoreErrors) :
m_file_tree_task(parFileTree),
m_progress_callback(&dummy_progress_callback),
m_ignore_errors(parIgnoreErrors)
{
assert(m_file_tree_task);
}
Hashing::~Hashing() noexcept {
}
std::vector<FileRecordData>& Hashing::on_data_get() {
return m_file_tree_task->get_or_create();
}
void Hashing::on_data_fill() {
std::vector<FileRecordData>& file_list = m_file_tree_task->get_or_create();
if (file_list.empty()) {
return;
}
ProgressInfo progr_info;
progr_info.callback = m_progress_callback;
progr_info.curr_path = "";
progr_info.file_bytes_read = 0;
progr_info.total_bytes_read = 0;
progr_info.file_num = 0;
if (file_list.front().is_directory) {
MutableSetListingView recordlist(file_list.begin(), file_list.end(), 0);
hash_dir(file_list.front(), recordlist, m_ignore_errors, progr_info);
}
else {
assert(1 == file_list.size());
auto& curr_file_rec = file_list.front();
TigerHash dummy {};
try {
tiger_file(curr_file_rec.abs_path, curr_file_rec.hash, dummy, curr_file_rec.size);
curr_file_rec.hash_valid = true;
}
catch (const std::ios_base::failure& e) {
if (m_ignore_errors) {
curr_file_rec.unreadable = true;
curr_file_rec.hash = TigerHash {};
}
else {
throw e;
}
}
}
}
void Hashing::set_progress_callback (ProgressCallback parFunc) {
if (parFunc) {
m_progress_callback = parFunc;
}
else {
m_progress_callback = &dummy_progress_callback;
}
}
} //namespace scantask
} //namespace mchlib

View file

@ -0,0 +1,62 @@
/* Copyright 2015, 2016, Michele Santullo
* This file is part of "dindexer".
*
* "dindexer" is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* "dindexer" is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
*/
#include "dindexer-machinery/scantask/mediatype.hpp"
//#include "dindexer-machinery/guess_content_type.hpp"
#if defined(WITH_MEDIA_AUTODETECT)
# include "dindexer-machinery/mediatype.hpp"
#endif
#include "dindexer-machinery/recorddata.hpp"
#include <utility>
namespace mchlib {
namespace scantask {
MediaType::MediaType (SetTaskType parSet, char parDefault, bool parForce, std::string parSearchPath) :
m_set_task(parSet),
m_default(char_to_media_type(parDefault))
#if defined(WITH_MEDIA_AUTODETECT)
, m_search_path(std::move(parSearchPath))
, m_force(parForce)
#endif
{
assert(m_set_task);
#if !defined(WITH_MEDIA_AUTODETECT)
static_cast<void>(parForce);
static_cast<void>(parSearchPath);
#endif
}
SetRecordDataFull& MediaType::on_data_get() {
return m_set_task->get_or_create();
}
void MediaType::on_data_fill() {
auto& data = m_set_task->get_or_create();
#if defined(WITH_MEDIA_AUTODETECT)
if (m_force) {
data.type = media_type_to_char(m_default);
}
else {
const auto guessed_type = mchlib::guess_media_type(std::string(m_search_path));
data.type = media_type_to_char(guessed_type);
}
#else
data.type = media_type_to_char(m_default);
#endif
}
} //namespace scantask
} //namespace mchlib

View file

@ -0,0 +1,53 @@
/* Copyright 2015, 2016, Michele Santullo
* This file is part of "dindexer".
*
* "dindexer" is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* "dindexer" is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
*/
#include "dindexer-machinery/scantask/mime.hpp"
#include "dindexer-machinery/recorddata.hpp"
#include "mimetype.hpp"
#include <cassert>
namespace mchlib {
namespace {
} //unnamed namespace
namespace scantask {
Mime::Mime (DirTreeTaskPtr parDirTree) :
m_file_tree_task(parDirTree)
{
assert(m_file_tree_task);
}
Mime::~Mime() noexcept {
}
void Mime::on_data_fill() {
MimeType mime;
auto& list = m_file_tree_task->get_or_create();
for (auto& itm : list) {
itm.mime_full = mime.analyze(itm.abs_path);
auto mime_pair = split_mime(itm.mime_full);
itm.mime_type = mime_pair.first;
itm.mime_charset = mime_pair.second;
}
}
std::vector<FileRecordData>& Mime::on_data_get() {
return m_file_tree_task->get_or_create();
}
} //namespace scantask
} //namespace mchlib

View file

@ -0,0 +1,41 @@
/* Copyright 2015, 2016, Michele Santullo
* This file is part of "dindexer".
*
* "dindexer" is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* "dindexer" is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
*/
#include "dindexer-machinery/scantask/setbasic.hpp"
#include <utility>
namespace mchlib {
namespace scantask {
SetBasic::SetBasic (std::string&& parName) :
m_set_name(std::move(parName))
{
}
SetBasic::~SetBasic() noexcept {
}
void SetBasic::on_data_destroy (SetRecordDataFull& parData) {
static_cast<SetRecordData&>(parData).name.clear();
parData.name.clear();
}
void SetBasic::on_data_create (SetRecordDataFull& parData) {
parData.name = m_set_name;
static_cast<SetRecordData&>(parData).name = parData.name;
}
} //namespace scantask
} //namespace mchlib

View file

@ -42,8 +42,8 @@ namespace mchlib {
};
template <typename OtherRecord>
bool file_record_data_lt (const FileRecordData& parLeft, const OtherRecord& parRight) {
const FileRecordData& l = parLeft;
bool file_record_data_lt (const SetListingItemType& parLeft, const OtherRecord& parRight) {
const SetListingItemType& l = parLeft;
const OtherRecord& r = parRight;
return
(l.level < r.level)
@ -99,14 +99,14 @@ namespace mchlib {
{
assert(parBasePath);
assert(m_base_path or m_current == m_end);
assert(m_current == m_end or m_base_path->atom_count() == PathName(m_current->abs_path).atom_count());
assert(m_current == m_end or m_base_path->atom_count() == PathName(m_current->path).atom_count() + parLevelOffset);
assert(m_current == m_end or m_base_path->atom_count() == m_current->level + m_level_offset);
//Look for the point where the children of this entry start
while (
m_current != m_end and (
m_current->level + m_level_offset == m_base_path->atom_count() or
*m_base_path != PathName(m_current->abs_path).pop_right()
*m_base_path != PathName(m_current->path).pop_right()
)) {
assert(m_base_path);
++m_current;
@ -157,13 +157,13 @@ namespace mchlib {
template <bool Const>
void DirIterator<Const>::increment() {
assert(PathName(m_current->abs_path).pop_right() == *m_base_path);
assert(PathName(m_current->path).pop_right() == *m_base_path);
do {
++m_current;
} while(
m_current != m_end and
m_current->level + m_level_offset == m_base_path->atom_count() + 1 and
*m_base_path != PathName(m_current->abs_path).pop_right()
*m_base_path != PathName(m_current->path).pop_right()
);
}
@ -222,7 +222,7 @@ namespace mchlib {
assert(std::equal(m_list.begin(), m_list.end(), SetListing(ListType(m_list), true).sorted_list().begin()));
}
if (not m_list.empty()) {
m_base_path.reset(new PathName(m_list.front().abs_path));
m_base_path.reset(new PathName(m_list.front().path));
}
}
@ -258,7 +258,7 @@ namespace mchlib {
return std::count_if(
m_list.begin(),
m_list.end(),
[] (const FileRecordData& parItm) {
[] (const SetListingItemType& parItm) {
return not parItm.is_directory;
}
);
@ -268,7 +268,7 @@ namespace mchlib {
return std::count_if(
m_list.begin(),
m_list.end(),
[] (const FileRecordData& parItm) {
[] (const SetListingItemType& parItm) {
return parItm.is_directory;
}
);
@ -279,7 +279,7 @@ namespace mchlib {
}
void SetListing::sort_list (ListType& parList) {
std::sort(parList.begin(), parList.end(), &file_record_data_lt<FileRecordData>);
std::sort(parList.begin(), parList.end(), &file_record_data_lt<SetListingItemType>);
}
SetListing::ListType::iterator SetListing::lower_bound (ListType& parList, const char* parPath, uint16_t parLevel, bool parIsDir) {
@ -289,17 +289,17 @@ namespace mchlib {
}
SetListingView<false> SetListing::make_view() {
const auto offs = (m_list.empty() ? 0 : PathName(m_list.front().abs_path).atom_count());
const auto offs = (m_list.empty() ? 0 : PathName(m_list.front().path).atom_count());
return SetListingView<false>(m_list.begin(), m_list.end(), offs, m_base_path);
}
SetListingView<true> SetListing::make_view() const {
const auto offs = (m_list.empty() ? 0 : PathName(m_list.front().abs_path).atom_count());
const auto offs = (m_list.empty() ? 0 : PathName(m_list.front().path).atom_count());
return SetListingView<true>(m_list.begin(), m_list.end(), offs, m_base_path);
}
SetListingView<true> SetListing::make_cview() const {
const auto offs = (m_list.empty() ? 0 : PathName(m_list.front().abs_path).atom_count());
const auto offs = (m_list.empty() ? 0 : PathName(m_list.front().path).atom_count());
return SetListingView<true>(m_list.begin(), m_list.end(), offs, m_base_path);
}
@ -311,7 +311,7 @@ namespace mchlib {
m_level_offset(parIter.m_level_offset)
{
if (m_begin != m_end) {
m_base_path.reset(new PathName(m_begin->abs_path));
m_base_path.reset(new PathName(m_begin->path));
}
}
@ -323,7 +323,7 @@ namespace mchlib {
m_level_offset(parLevelOffset)
{
if (m_begin != m_end) {
m_base_path.reset(new PathName(m_begin->abs_path));
m_base_path.reset(new PathName(m_begin->path));
}
}

View file

@ -19,7 +19,6 @@
#define id1B7A42F6E46547A6AB0F914E2A91399F
#include "dindexer-common/validationerror.hpp"
#include "dindexer-common/mediatypes.hpp"
#include <boost/program_options/variables_map.hpp>
namespace din {

View file

@ -19,7 +19,6 @@
#define id259FD7C96B5049ECB50386F25455FBB2
#include "dindexer-common/validationerror.hpp"
#include "dindexer-common/mediatypes.hpp"
#include <boost/program_options/variables_map.hpp>
namespace din {

View file

@ -16,7 +16,6 @@ target_link_libraries(${PROJECT_NAME}
PRIVATE ${bare_name}-if
PRIVATE ${bare_name}-common
PRIVATE ${bare_name}-machinery
PRIVATE optimized pthread
)
string(REPLACE "${bare_name}-" "" ACTION_NAME "${PROJECT_NAME}")

View file

@ -27,14 +27,14 @@ namespace po = boost::program_options;
namespace din {
namespace {
const char g_allowed_types[] = {
static_cast<char>(dinlib::MediaType_CDRom),
static_cast<char>(dinlib::MediaType_Directory),
static_cast<char>(dinlib::MediaType_DVD),
static_cast<char>(dinlib::MediaType_BluRay),
static_cast<char>(dinlib::MediaType_FloppyDisk),
static_cast<char>(dinlib::MediaType_HardDisk),
static_cast<char>(dinlib::MediaType_IomegaZip),
static_cast<char>(dinlib::MediaType_Other)
static_cast<char>(mchlib::MediaType_CDRom),
static_cast<char>(mchlib::MediaType_Directory),
static_cast<char>(mchlib::MediaType_DVD),
static_cast<char>(mchlib::MediaType_BluRay),
static_cast<char>(mchlib::MediaType_FloppyDisk),
static_cast<char>(mchlib::MediaType_HardDisk),
static_cast<char>(mchlib::MediaType_IomegaZip),
static_cast<char>(mchlib::MediaType_Other)
};
} //unnamed namespace

View file

@ -19,6 +19,7 @@
#define id1B7A42F6E46547A6AB0F914E2A91399F
#include <boost/program_options/variables_map.hpp>
#include "dindexer-machinery/mediatypes.hpp"
#include "dindexer-machinery/mediatype.hpp"
namespace din {

View file

@ -21,35 +21,37 @@
#include "dindexer-machinery/recorddata.hpp"
#include "dindexerConfig.h"
#include "dindexer-machinery/filesearcher.hpp"
#include "dindexer-machinery/indexer.hpp"
#include "dindexer-machinery/machinery_info.hpp"
#include "dindexer-common/common_info.hpp"
#include "dindexer-common/settings.hpp"
#include "dindexer-machinery/guess_content_type.hpp"
#include "commandline.hpp"
#include "dbbackend.hpp"
#include "dindexer-machinery/scantask/dirtree.hpp"
#include "dindexer-machinery/scantask/mediatype.hpp"
#include "dindexer-machinery/scantask/hashing.hpp"
#include "dindexer-machinery/scantask/contenttype.hpp"
#include "dindexer-machinery/scantask/mime.hpp"
#include "dindexer-machinery/scantask/generalfiller.hpp"
#include "dindexer-machinery/scantask/setbasic.hpp"
#include <iostream>
#include <iomanip>
#include <ciso646>
#include <sstream>
#include <algorithm>
#include <iterator>
#if defined(WITH_PROGRESS_FEEDBACK)
# include <thread>
# include <mutex>
# include <condition_variable>
#endif
namespace {
void run_hash_calculation ( mchlib::Indexer& parIndexer, bool parShowProgress );
bool add_to_db ( const std::vector<mchlib::FileRecordData>& parData, const std::string& parSetName, char parType, char parContent, const dinlib::SettingsDB& parDBSettings, bool parForce=false );
bool add_to_db ( const std::vector<mchlib::FileRecordData>& parData, const mchlib::SetRecordDataFull& parSet, const dinlib::SettingsDB& parDBSettings, bool parForce=false );
#if defined(WITH_PROGRESS_FEEDBACK)
void print_progress ( const boost::string_ref parPath, uint64_t parFileBytes, uint64_t parTotalBytes, uint32_t parFileNum );
#endif
} //unnamed namespace
namespace stask = mchlib::scantask;
int main (int parArgc, char* parArgv[]) {
using std::placeholders::_1;
using std::placeholders::_2;
using boost::program_options::variables_map;
using FileRecordDataFiller = stask::GeneralFiller<stask::DirTree::PathList>;
using SetRecordDataFiller = stask::GeneralFiller<mchlib::SetRecordDataFull>;
variables_map vm;
try {
@ -61,11 +63,10 @@ int main (int parArgc, char* parArgv[]) {
std::cerr << err.what() << "\nUse --help for help" << std::endl;
return 2;
}
const std::string search_path(vm["search-path"].as<std::string>());
#if defined(WITH_PROGRESS_FEEDBACK)
const bool verbose = (0 == vm.count("quiet"));
//const bool verbose = (0 == vm.count("quiet"));
#else
const bool verbose = false;
//const bool verbose = false;
#endif
dinlib::Settings settings;
@ -77,127 +78,31 @@ int main (int parArgc, char* parArgv[]) {
}
}
#if defined(WITH_MEDIA_AUTODETECT)
char set_type;
if (0 == vm.count("type")) {
std::cout << "Analyzing disc... ";
try {
const auto guessed_type = mchlib::guess_media_type(std::string(search_path));
set_type = guessed_type;
std::cout << "Setting type to " << set_type << " ("
<< dinlib::media_type_to_str(guessed_type) << ")\n";
}
catch (const std::runtime_error& e) {
std::cout << '\n';
std::cerr << e.what();
return 1;
}
}
else {
set_type = vm["type"].as<char>();
}
#else
const char set_type = vm["type"].as<char>();
bool ignore_read_errors = (vm.count("ignore-errors") > 0);
const std::string search_path(vm["search-path"].as<std::string>());
const char def_media_type = (vm.count("type") ? vm["type"].as<char>() : 'O');
std::shared_ptr<stask::SetBasic> setbasic(new stask::SetBasic(std::string(vm["setname"].as<std::string>())));
std::shared_ptr<stask::DirTree> scan_dirtree(new stask::DirTree(search_path));
std::shared_ptr<stask::MediaType> media_type(new stask::MediaType(setbasic, def_media_type, vm.count("type"), search_path));
std::shared_ptr<stask::Hashing> hashing(new stask::Hashing(scan_dirtree, ignore_read_errors));
std::shared_ptr<stask::ContentType> content_type(new stask::ContentType(setbasic, scan_dirtree, media_type));
std::shared_ptr<stask::Mime> mime(new stask::Mime(scan_dirtree));
std::shared_ptr<FileRecordDataFiller> filerecdata(new FileRecordDataFiller(mime, hashing));
std::shared_ptr<SetRecordDataFiller> setrecdata(new SetRecordDataFiller(media_type, content_type));
#if defined(WITH_PROGRESS_FEEDBACK)
hashing->set_progress_callback(&print_progress);
#endif
std::cout << "constructing...\n";
mchlib::Indexer indexer;
indexer.ignore_read_errors(vm.count("ignore-errors") > 0);
fastf::FileSearcher searcher(search_path);
fastf::FileSearcher::ConstCharVecType ext, ignore;
searcher.SetFollowSymlinks(true);
searcher.SetCallback(fastf::FileSearcher::CallbackType(std::bind(&mchlib::Indexer::add_path, &indexer, _1, _2)));
searcher.Search(ext, ignore);
if (verbose) {
std::cout << "Fetching items list...\n";
}
if (indexer.empty()) {
std::cerr << "Nothing found at the given location, quitting\n";
return 1;
}
else {
run_hash_calculation(indexer, verbose);
//TODO: guess_content_type() relies on FileRecordData::path being set to
//the relative path already. Unfortunately at this point it just got
//default-initialized to be the same as abs_path, so for a video DVD, for
//example, it's going to be like "/mnt/cdrom/VIDEO_TS" instead of just
//"VIDEO_TS". This will cause guess_content_type() to miss. Relative
//paths are populated at the end of calculate_hash(), so until I come up
//with a better system I'm just moving content detection to after hash
//calculation.
const auto set_type_casted = dinlib::char_to_media_type(set_type);
const mchlib::ContentTypes content = mchlib::guess_content_type(set_type_casted, indexer.record_data());
const char content_type = mchlib::content_type_to_char(content);
if (verbose) {
std::cout << "Writing to database...\n";
}
if (not add_to_db(indexer.record_data(), vm["setname"].as<std::string>(), set_type, content_type, settings.db)) {
std::cerr << "Not written to DB, likely because a set with the same hash already exists\n";
}
if (not add_to_db(filerecdata->get_or_create(), setrecdata->get_or_create(), settings.db)) {
std::cerr << "Not written to DB, likely because a set with the same hash already exists\n";
}
return 0;
}
namespace {
void run_hash_calculation (mchlib::Indexer& parIndexer, bool parShowProgress) {
if (parIndexer.empty()) {
return;
}
#if !defined(WITH_PROGRESS_FEEDBACK)
parShowProgress = false;
#endif
if (not parShowProgress) {
//Hashing file /mnt/cdrom/Sacred 2/Fallen Angel/UK/Sacred.2.Fallen.Angel-ArenaBG/DISC2/S2DISC2.md1... 512c201321ed01cc2a82c9f80bfeaaa673bc8eb3cea4e5c1
//terminate called after throwing an instance of 'std::ios_base::failure'
//what(): basic_filebuf::xsgetn error reading the file
//Hashing file /mnt/cdrom/Sacred 2/Fallen Angel/UK/Sacred.2.Fallen.Angel-ArenaBG/DISC2/S2DISC2.mdf...Annullato
parIndexer.calculate_hash();
}
#if defined(WITH_PROGRESS_FEEDBACK)
else {
typedef std::ostream_iterator<char> cout_iterator;
std::cout << "Processing";
std::cout.flush();
const auto total_items = parIndexer.total_items();
std::thread hash_thread(&mchlib::Indexer::calculate_hash, &parIndexer);
std::mutex progress_print;
std::size_t clear_size = 0;
const auto digit_count = static_cast<std::size_t>(std::log10(static_cast<double>(total_items))) + 1;
do {
//TODO: fix this steaming pile of crap
//std::unique_lock<std::mutex> lk(progress_print);
//parIndexer.step_notify().wait(lk);
std::cout << '\r';
std::fill_n(cout_iterator(std::cout), clear_size, ' ');
std::cout << '\r';
{
std::ostringstream oss;
const auto item_index = std::min(total_items - 1, parIndexer.processed_items());
oss << "Processing file "
<< std::setw(digit_count) << std::setfill(' ') << (item_index + 1)
<< " of " << total_items << " \"" << parIndexer.current_item() << '"';
const auto msg(oss.str());
clear_size = msg.size();
std::cout << msg;
std::cout.flush();
}
} while (false); //parIndexer.processed_items() != total_items);
hash_thread.join();
if (parIndexer.processed_items() > 0) {
std::cout << '\n';
}
}
#endif
}
bool add_to_db (const std::vector<mchlib::FileRecordData>& parData, const std::string& parSetName, char parType, char parContentType, const dinlib::SettingsDB& parDBSettings, bool parForce) {
bool add_to_db (const std::vector<mchlib::FileRecordData>& parData, const mchlib::SetRecordDataFull& parSet, const dinlib::SettingsDB& parDBSettings, bool parForce) {
using mchlib::FileRecordData;
using mchlib::SetRecordDataFull;
using mchlib::SetRecordData;
@ -212,11 +117,18 @@ namespace {
}
}
SetRecordData set_data {parSetName, parType, parContentType };
const SetRecordData& set_data {parSet.name, parSet.type, parSet.content_type };
const auto app_signature = dinlib::dindexer_signature();
const auto lib_signature = mchlib::lib_signature();
const std::string signature = std::string(app_signature.data(), app_signature.size()) + "/" + std::string(lib_signature.data(), lib_signature.size());
din::write_to_db(parDBSettings, parData, set_data, signature);
return true;
}
#if defined(WITH_PROGRESS_FEEDBACK)
void print_progress (const boost::string_ref parPath, uint64_t /*parFileBytes*/, uint64_t parTotalBytes, uint32_t parFileNum) {
std::cout << "Hashing file " << parFileNum << " \"" << parPath << "\" (" << parTotalBytes << " bytes hashed)\r";
std::cout.flush();
}
#endif
} //unnamed namespace

View file

@ -23,7 +23,7 @@
namespace {
template <std::size_t N>
void detect_type (mchlib::FileRecordData (&parTestData)[N], mchlib::ContentTypes parExpected, dinlib::MediaTypes parMediaType) {
void detect_type (mchlib::FileRecordData (&parTestData)[N], mchlib::ContentTypes parExpected, mchlib::MediaTypes parMediaType) {
using mchlib::SetListing;
using mchlib::FileRecordData;
@ -52,7 +52,7 @@ TEST(machinery, guess_content_type) {
FileRecordData("VIDEO_TS/VTS_01_0.VOB",0,0,2,false,false),
FileRecordData("VIDEO_TS/VIDEO_TS.VOB",0,0,2,false,false)
};
detect_type(test_data, mchlib::ContentType_VideoDVD, dinlib::MediaType_DVD);
detect_type(test_data, mchlib::ContentType_VideoDVD, mchlib::MediaType_DVD);
}
{
@ -63,7 +63,7 @@ TEST(machinery, guess_content_type) {
FileRecordData("some_file.bin",0,0,1,false,false),
FileRecordData("another_dir/VTS_01_0.BUP",0,0,2,false,false)
};
detect_type(test_data, mchlib::ContentType_Generic, dinlib::MediaType_Directory);
detect_type(test_data, mchlib::ContentType_Generic, mchlib::MediaType_Directory);
}
{
@ -81,6 +81,6 @@ TEST(machinery, guess_content_type) {
FileRecordData("CDI",0,0,1,true,false),
FileRecordData("KARAOKE",0,0,1,true,false)
};
detect_type(test_data, mchlib::ContentType_VideoCD, dinlib::MediaType_CDRom);
detect_type(test_data, mchlib::ContentType_VideoCD, mchlib::MediaType_CDRom);
}
}