1
0
Fork 0
mirror of https://github.com/KingDuckZ/dindexer.git synced 2025-08-08 13:49:47 +00:00

Merge branch 'hashdir_refactoring'

This commit is contained in:
King_DuckZ 2016-03-12 00:37:28 +01:00
commit 859764b9a4
52 changed files with 1367 additions and 815 deletions

View file

@ -19,7 +19,6 @@
#define id1B7A42F6E46547A6AB0F914E2A91399F #define id1B7A42F6E46547A6AB0F914E2A91399F
#include "dindexer-common/validationerror.hpp" #include "dindexer-common/validationerror.hpp"
#include "dindexer-common/mediatypes.hpp"
#include <boost/program_options/variables_map.hpp> #include <boost/program_options/variables_map.hpp>
namespace din { namespace din {

View file

@ -32,4 +32,4 @@ set -f
find . \( $excl_paths -o $incl_extensions \) -a -type f $excl_files > cscope.files find . \( $excl_paths -o $incl_extensions \) -a -type f $excl_files > cscope.files
set +f set +f
cscope -b -q exec cscope -b -q

View file

@ -18,9 +18,9 @@
#ifndef id17F1582F16C8478E8D9795BECBF275A3 #ifndef id17F1582F16C8478E8D9795BECBF275A3
#define id17F1582F16C8478E8D9795BECBF275A3 #define id17F1582F16C8478E8D9795BECBF275A3
#include "dindexer-common/mediatypes.hpp" #include "dindexer-machinery/mediatypes.hpp"
#include "dindexer-machinery/recorddata.hpp" #include "dindexer-machinery/recorddata.hpp"
#include "dindexer-common/compatibility.h" #include "helpers/compatibility.h"
#include <vector> #include <vector>
namespace mchlib { namespace mchlib {
@ -36,8 +36,8 @@ namespace mchlib {
template <bool> class SetListingView; template <bool> class SetListingView;
ContentTypes guess_content_type ( dinlib::MediaTypes parMediaType, const SetListingView<true>& parContent, std::size_t parEntriesCount=0 ); ContentTypes guess_content_type ( MediaTypes parMediaType, const SetListingView<true>& parContent, std::size_t parEntriesCount=0 );
ContentTypes guess_content_type ( dinlib::MediaTypes parMediaType, const std::vector<FileRecordData>& parContent ); ContentTypes guess_content_type ( MediaTypes parMediaType, const std::vector<FileRecordData>& parContent );
char content_type_to_char ( ContentTypes parCType ) a_pure; char content_type_to_char ( ContentTypes parCType ) a_pure;
ContentTypes char_to_content_type ( char parCType ) a_pure; ContentTypes char_to_content_type ( char parCType ) a_pure;

View file

@ -1,77 +0,0 @@
/* Copyright 2015, 2016, Michele Santullo
* This file is part of "dindexer".
*
* "dindexer" is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* "dindexer" is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef idE555EF56730442C1ADDC7B2AE7A9340E
#define idE555EF56730442C1ADDC7B2AE7A9340E
#include <memory>
#include <string>
#include <vector>
#if !defined(NDEBUG)
# define INDEXER_VERBOSE
#endif
#if defined(WITH_PROGRESS_FEEDBACK)
namespace std {
class condition_variable;
} //namespace std
#endif
namespace fastf {
struct FileStats;
} //namespace fastf
namespace dinlib {
struct Settings;
} //namespace dinlib
namespace mchlib {
struct FileRecordData;
class Indexer {
public:
Indexer ( void );
Indexer ( Indexer&& ) = default;
Indexer ( const Indexer& ) = delete;
~Indexer ( void ) noexcept;
bool add_path ( const char* parPath, const fastf::FileStats& parStats );
#if defined(INDEXER_VERBOSE)
void dump ( void ) const;
#endif
std::size_t total_items ( void ) const;
std::string operator[] ( std::size_t parIndex ) const;
#if defined(WITH_PROGRESS_FEEDBACK)
std::size_t processed_items ( void ) const;
std::string current_item ( void ) const;
std::condition_variable& step_notify ( void );
#endif
void calculate_hash ( void );
bool empty ( void ) const;
void ignore_read_errors ( bool parIgnore );
const std::vector<FileRecordData>& record_data ( void ) const;
private:
struct LocalData;
std::unique_ptr<LocalData> m_local_data;
};
} //namespace mchlib
#endif

View file

@ -20,11 +20,11 @@
#include <string> #include <string>
#include <stdexcept> #include <stdexcept>
#include "dindexer-common/mediatypes.hpp" #include "dindexer-machinery/mediatypes.hpp"
namespace mchlib { namespace mchlib {
#if defined(WITH_MEDIA_AUTODETECT) #if defined(WITH_MEDIA_AUTODETECT)
dinlib::MediaTypes guess_media_type ( std::string&& parPath ); MediaTypes guess_media_type ( std::string&& parPath );
class UnknownMediaTypeException : std::runtime_error { class UnknownMediaTypeException : std::runtime_error {
public: public:

View file

@ -18,10 +18,10 @@
#ifndef id700AFD0F33634ACC88079BB8853A9E13 #ifndef id700AFD0F33634ACC88079BB8853A9E13
#define id700AFD0F33634ACC88079BB8853A9E13 #define id700AFD0F33634ACC88079BB8853A9E13
#include "dindexer-common/compatibility.h" #include "helpers/compatibility.h"
#include <string> #include <string>
namespace dinlib { namespace mchlib {
enum MediaTypes { enum MediaTypes {
MediaType_CDRom = 'C', MediaType_CDRom = 'C',
MediaType_Directory = 'D', MediaType_Directory = 'D',
@ -35,6 +35,7 @@ namespace dinlib {
const std::string& media_type_to_str ( MediaTypes parType ); const std::string& media_type_to_str ( MediaTypes parType );
MediaTypes char_to_media_type ( char parMType ) a_pure; MediaTypes char_to_media_type ( char parMType ) a_pure;
} //namespace dinlib char media_type_to_char ( MediaTypes parMType ) a_pure;
} //namespace mchlib
#endif #endif

View file

@ -51,6 +51,24 @@ namespace mchlib {
{ {
} }
FileRecordData ( std::string&& parPath, std::size_t parRelPathOffs, std::time_t parATime, std::time_t parMTime, uint16_t parLevel, bool parIsDir, bool parIsSymLink ) :
hash {},
abs_path(std::move(parPath)),
mime_full(),
atime(parATime),
mtime(parMTime),
path(boost::string_ref(abs_path).substr(parRelPathOffs)),
mime_type(),
mime_charset(),
size(0),
level(parLevel),
is_directory(parIsDir),
is_symlink(parIsSymLink),
unreadable(false),
hash_valid(false)
{
}
#if defined(NDEBUG) #if defined(NDEBUG)
FileRecordData ( const FileRecordData& ) = delete; FileRecordData ( const FileRecordData& ) = delete;
#else #else

View file

@ -0,0 +1,70 @@
/* Copyright 2015, 2016, Michele Santullo
* This file is part of "dindexer".
*
* "dindexer" is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* "dindexer" is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef idCB253C1A5AFA46A18B8878ED4072CD96
#define idCB253C1A5AFA46A18B8878ED4072CD96
#include "dindexer-machinery/scantask/leanbase.hpp"
#include <ciso646>
#include <cassert>
namespace mchlib {
namespace scantask {
template <typename T>
class Base : public LeanBase<T> {
protected:
Base ( void );
virtual ~Base ( void ) noexcept = default;
public:
void clear_data ( void );
private:
virtual void on_data_destroy ( T& parData ) = 0;
virtual void on_data_create ( T& parData ) = 0;
virtual T& on_data_get ( void ) final;
virtual void on_data_fill ( void ) final;
using LeanBase<T>::unset_data_created;
T m_data;
};
template <typename T>
Base<T>::Base() {
}
template <typename T>
void Base<T>::on_data_fill() {
this->on_data_create(m_data);
}
template <typename T>
void Base<T>::clear_data() {
this->unset_data_created();
this->on_data_destroy(m_data);
}
template <typename T>
T& Base<T>::on_data_get() {
return m_data;
}
} //namespace scantask
} //namespace mchlib
#endif

View file

@ -0,0 +1,51 @@
/* Copyright 2015, 2016, Michele Santullo
* This file is part of "dindexer".
*
* "dindexer" is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* "dindexer" is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef id148DBED10A0B45238E810683656BA7D5
#define id148DBED10A0B45238E810683656BA7D5
#include "dindexer-machinery/scantask/leanbase.hpp"
#include "dindexer-machinery/scantask/base.hpp"
#include "dindexer-machinery/mediatypes.hpp"
#include <memory>
#include <vector>
namespace mchlib {
struct FileRecordData;
struct SetRecordDataFull;
namespace scantask {
class ContentType : public LeanBase<SetRecordDataFull> {
public:
using DirTreeTaskPtr = std::shared_ptr<Base<std::vector<FileRecordData>>>;
using MediaTypeTaskPtr = std::shared_ptr<LeanBase<SetRecordDataFull>>;
using SetTaskType = std::shared_ptr<LeanBase<SetRecordDataFull>>;
ContentType ( SetTaskType parSet, DirTreeTaskPtr parDirTree, MediaTypeTaskPtr parMediaType );
private:
virtual void on_data_fill ( void ) override;
virtual SetRecordDataFull& on_data_get ( void ) override;
SetTaskType m_set_task;
DirTreeTaskPtr m_dir_tree;
MediaTypeTaskPtr m_media_type;
};
} //namespace scantask
} //namespace mchlib
#endif

View file

@ -0,0 +1,45 @@
/* Copyright 2015, 2016, Michele Santullo
* This file is part of "dindexer".
*
* "dindexer" is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* "dindexer" is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef id0AA31B2E7D6244A08435CF9080E34AAE
#define id0AA31B2E7D6244A08435CF9080E34AAE
#include "dindexer-machinery/scantask/base.hpp"
#include <string>
#include <vector>
namespace mchlib {
struct FileRecordData;
namespace scantask {
class DirTree : public Base<std::vector<FileRecordData>> {
public:
typedef std::vector<FileRecordData> PathList;
explicit DirTree ( std::string parRoot );
virtual ~DirTree ( void ) noexcept = default;
private:
virtual void on_data_destroy ( PathList& parData ) override;
virtual void on_data_create ( PathList& parData ) override;
std::string m_root;
};
} //namespace scantask
} //namespace mchlib
#endif

View file

@ -0,0 +1,72 @@
/* Copyright 2015, 2016, Michele Santullo
* This file is part of "dindexer".
*
* "dindexer" is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* "dindexer" is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef id904376BAD85D4782B83CAAEC2EF344D9
#define id904376BAD85D4782B83CAAEC2EF344D9
#include "dindexer-machinery/scantask/leanbase.hpp"
#include <memory>
#include <vector>
#include <utility>
#include <ciso646>
namespace mchlib {
namespace scantask {
template <typename T>
class GeneralFiller : public LeanBase<T> {
public:
using FillingTaskPtr = std::shared_ptr<LeanBase<T>>;
template <typename P, typename... PP>
GeneralFiller ( std::shared_ptr<P> parFill, std::shared_ptr<PP>... parFillList );
virtual ~GeneralFiller ( void ) noexcept;
private:
virtual void on_data_fill ( void ) override;
virtual T& on_data_get ( void ) override;
std::vector<FillingTaskPtr> m_to_fill;
};
template <typename T>
template <typename P, typename... PP>
GeneralFiller<T>::GeneralFiller (std::shared_ptr<P> parFill, std::shared_ptr<PP>... parFillList) :
m_to_fill { std::move(parFill), std::move(parFillList)... }
{
}
template <typename T>
GeneralFiller<T>::~GeneralFiller() noexcept {
m_to_fill.clear();
}
template <typename T>
void GeneralFiller<T>::on_data_fill() {
for (auto& itm : m_to_fill) {
itm->get_or_create();
}
}
template <typename T>
T& GeneralFiller<T>::on_data_get() {
assert(not m_to_fill.empty());
return m_to_fill.front()->get_or_create();
}
} //namespace scantask
} //namespace mchlib
#endif

View file

@ -0,0 +1,53 @@
/* Copyright 2015, 2016, Michele Santullo
* This file is part of "dindexer".
*
* "dindexer" is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* "dindexer" is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef idC7CC55298AC049EAA80604D6C7FD081D
#define idC7CC55298AC049EAA80604D6C7FD081D
#include "dindexer-machinery/scantask/leanbase.hpp"
#include <vector>
#include <memory>
#include <functional>
#include <boost/utility/string_ref.hpp>
#include <cstdint>
namespace mchlib {
struct FileRecordData;
namespace scantask {
class Hashing : public LeanBase<std::vector<FileRecordData>> {
public:
typedef LeanBase<std::vector<FileRecordData>> FileTreeBase;
typedef std::function<void(const boost::string_ref, uint64_t, uint64_t, uint32_t)> ProgressCallback;
Hashing ( std::shared_ptr<FileTreeBase> parFileTree, bool parIgnoreErrors );
virtual ~Hashing ( void ) noexcept;
void set_progress_callback ( ProgressCallback parFunc );
private:
virtual void on_data_fill ( void ) override;
virtual std::vector<FileRecordData>& on_data_get ( void ) override;
std::shared_ptr<FileTreeBase> m_file_tree_task;
ProgressCallback m_progress_callback;
bool m_ignore_errors;
};
} //namespace scantask
} //namespace mchlib
#endif

View file

@ -0,0 +1,99 @@
/* Copyright 2015, 2016, Michele Santullo
* This file is part of "dindexer".
*
* "dindexer" is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* "dindexer" is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef id982AF1D5C59C415584F56C1E6DDFE55E
#define id982AF1D5C59C415584F56C1E6DDFE55E
#include <ciso646>
#include <cassert>
#if !defined(NDEBUG)
# define LEANBASE_ASSERT_REENTRANCY
#endif
namespace mchlib {
namespace scantask {
#if defined(LEANBASE_ASSERT_REENTRANCY)
struct AutoSetBool {
explicit AutoSetBool ( bool* parBool ) :
m_bool(parBool)
{
assert(m_bool);
assert(not *m_bool);
*m_bool = true;
}
~AutoSetBool ( void ) noexcept {
*m_bool = false;
}
bool* m_bool;
};
#endif
template <typename T>
class LeanBase {
protected:
LeanBase ( void );
virtual ~LeanBase ( void ) noexcept = default;
void unset_data_created ( void );
public:
T& get_or_create ( void );
private:
virtual void on_data_fill ( void ) = 0;
virtual T& on_data_get ( void ) = 0;
bool m_data_created;
#if defined(LEANBASE_ASSERT_REENTRANCY)
bool m_inside_call;
#endif
};
template <typename T>
LeanBase<T>::LeanBase() :
m_data_created(false)
#if defined(LEANBASE_ASSERT_REENTRANCY)
, m_inside_call(false)
#endif
{
}
template <typename T>
T& LeanBase<T>::get_or_create() {
#if defined(LEANBASE_ASSERT_REENTRANCY)
assert(not m_inside_call);
AutoSetBool auto_bool(&m_inside_call);
#endif
if (not m_data_created) {
m_data_created = true;
this->on_data_fill();
}
return this->on_data_get();
}
template <typename T>
void LeanBase<T>::unset_data_created() {
assert(m_data_created);
m_data_created = false;
}
} //namespace scantask
} //namespace mchlib
#endif

View file

@ -0,0 +1,52 @@
/* Copyright 2015, 2016, Michele Santullo
* This file is part of "dindexer".
*
* "dindexer" is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* "dindexer" is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef id25B0BCA6D9464754920E1BC7C5D9DB57
#define id25B0BCA6D9464754920E1BC7C5D9DB57
#include "dindexer-machinery/scantask/leanbase.hpp"
#include "dindexer-machinery/scantask/base.hpp"
#include "dindexer-machinery/mediatypes.hpp"
#include <string>
#include <memory>
namespace mchlib {
struct SetRecordDataFull;
namespace scantask {
class MediaType : public LeanBase<SetRecordDataFull> {
public:
using SetTaskType = std::shared_ptr<LeanBase<SetRecordDataFull>>;
MediaType ( SetTaskType parSet, char parDefault, bool parForce, std::string parSearchPath );
virtual ~MediaType ( void ) noexcept = default;
private:
virtual void on_data_fill ( void ) override;
virtual SetRecordDataFull& on_data_get ( void ) override;
SetTaskType m_set_task;
MediaTypes m_default;
#if defined(WITH_MEDIA_AUTODETECT)
std::string m_search_path;
bool m_force;
#endif
};
} //namespace scantask
} //namespace mchlib
#endif

View file

@ -0,0 +1,46 @@
/* Copyright 2015, 2016, Michele Santullo
* This file is part of "dindexer".
*
* "dindexer" is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* "dindexer" is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef idE173D2BA33744F448B870BB53AF52610
#define idE173D2BA33744F448B870BB53AF52610
#include "dindexer-machinery/scantask/base.hpp"
#include "dindexer-machinery/scantask/leanbase.hpp"
#include <vector>
#include <memory>
namespace mchlib {
struct FileRecordData;
namespace scantask {
class Mime : public LeanBase<std::vector<FileRecordData>> {
public:
using DirTreeTaskPtr = std::shared_ptr<Base<std::vector<FileRecordData>>>;
explicit Mime ( DirTreeTaskPtr parDirTree );
virtual ~Mime ( void ) noexcept;
private:
virtual void on_data_fill ( void ) override;
virtual std::vector<FileRecordData>& on_data_get ( void ) override;
DirTreeTaskPtr m_file_tree_task;
};
} //namespace scantask
} //namespace mchlib
#endif

View file

@ -0,0 +1,42 @@
/* Copyright 2015, 2016, Michele Santullo
* This file is part of "dindexer".
*
* "dindexer" is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* "dindexer" is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef idB323CEFC89C2468CA0A341C312C6C2C0
#define idB323CEFC89C2468CA0A341C312C6C2C0
#include "dindexer-machinery/scantask/base.hpp"
#include "dindexer-machinery/recorddata.hpp"
#include <string>
namespace mchlib {
namespace scantask {
class SetBasic : public Base<SetRecordDataFull> {
typedef Base<SetRecordDataFull> ParentType;
public:
explicit SetBasic ( std::string&& parName );
virtual ~SetBasic ( void ) noexcept;
private:
virtual void on_data_destroy ( SetRecordDataFull& parData ) override;
virtual void on_data_create ( SetRecordDataFull& parData ) override;
std::string m_set_name;
};
} //namespace scantask
} //namespace mchlib
#endif

View file

@ -40,19 +40,21 @@ namespace mchlib {
template <bool Const> template <bool Const>
implem::DirIterator<Const> first_file ( SetListingView<Const>& parList ); implem::DirIterator<Const> first_file ( SetListingView<Const>& parList );
typedef FileRecordData SetListingItemType;
namespace implem { namespace implem {
template <bool Const> template <bool Const>
class DirIterator : public boost::iterator_facade<DirIterator<Const>, FileRecordData, boost::forward_traversal_tag> { class DirIterator : public boost::iterator_facade<DirIterator<Const>, SetListingItemType, boost::forward_traversal_tag> {
friend class mchlib::SetListingView<Const>; friend class mchlib::SetListingView<Const>;
friend class boost::iterator_core_access; friend class boost::iterator_core_access;
template <bool> friend class DirIterator; template <bool> friend class DirIterator;
typedef boost::iterator_facade<DirIterator<Const>, FileRecordData, boost::forward_traversal_tag> base_class; typedef boost::iterator_facade<DirIterator<Const>, SetListingItemType, boost::forward_traversal_tag> base_class;
struct enabler {}; struct enabler {};
public: public:
typedef typename std::conditional< typedef typename std::conditional<
Const, Const,
std::vector<mchlib::FileRecordData>::const_iterator, std::vector<SetListingItemType>::const_iterator,
std::vector<mchlib::FileRecordData>::iterator std::vector<SetListingItemType>::iterator
>::type VecIterator; >::type VecIterator;
typedef typename base_class::difference_type difference_type; typedef typename base_class::difference_type difference_type;
typedef typename base_class::value_type value_type; typedef typename base_class::value_type value_type;
@ -127,7 +129,7 @@ namespace mchlib {
class SetListing { class SetListing {
public: public:
typedef std::vector<FileRecordData> ListType; typedef std::vector<SetListingItemType> ListType;
typedef implem::DirIterator<true> const_iterator; typedef implem::DirIterator<true> const_iterator;
explicit SetListing ( ListType&& parList, bool parSort=true ); explicit SetListing ( ListType&& parList, bool parSort=true );

View file

@ -2,7 +2,6 @@ project(${bare_name}-common CXX C)
add_library(${PROJECT_NAME} add_library(${PROJECT_NAME}
commandline.cpp commandline.cpp
mediatypes.cpp
settings.cpp settings.cpp
validationerror.cpp validationerror.cpp
common_info.cpp common_info.cpp
@ -25,3 +24,10 @@ target_link_libraries(${PROJECT_NAME}
# RUNTIME DESTINATION bin # RUNTIME DESTINATION bin
# ARCHIVE DESTINATION lib/static # ARCHIVE DESTINATION lib/static
#) #)
#Allow to link with .so
#see https://cmake.org/pipermail/cmake/2007-May/014350.html
#and http://stackoverflow.com/questions/6093547/what-do-r-x86-64-32s-and-r-x86-64-64-relocation-mean/6093910#6093910
if (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" )
set_target_properties(${PROJECT_NAME} PROPERTIES COMPILE_FLAGS "-fPIC")
endif()

View file

@ -19,7 +19,6 @@
#define idB6191389C4AD4EE5862CCF1591BE6CE5 #define idB6191389C4AD4EE5862CCF1591BE6CE5
#include "dindexer-common/validationerror.hpp" #include "dindexer-common/validationerror.hpp"
#include "dindexer-common/mediatypes.hpp"
#include <boost/program_options/variables_map.hpp> #include <boost/program_options/variables_map.hpp>
namespace din { namespace din {

View file

@ -4,6 +4,7 @@ add_executable(${PROJECT_NAME}
main.cpp main.cpp
commandline.cpp commandline.cpp
postgre_locate.cpp postgre_locate.cpp
hash.cpp
) )
target_include_directories(${PROJECT_NAME} target_include_directories(${PROJECT_NAME}
@ -13,6 +14,7 @@ target_include_directories(${PROJECT_NAME}
target_link_libraries(${PROJECT_NAME} target_link_libraries(${PROJECT_NAME}
PRIVATE ${bare_name}-if PRIVATE ${bare_name}-if
PRIVATE ${bare_name}-common PRIVATE ${bare_name}-common
PRIVATE ${bare_name}-machinery
) )
string(REPLACE "${bare_name}-" "" ACTION_NAME "${PROJECT_NAME}") string(REPLACE "${bare_name}-" "" ACTION_NAME "${PROJECT_NAME}")

View file

@ -28,6 +28,7 @@ namespace din {
set_options.add_options() set_options.add_options()
("case-insensitive,i", "Disable case sensitivity during search") ("case-insensitive,i", "Disable case sensitivity during search")
("set,s", "Look for matching sets instead of files") ("set,s", "Look for matching sets instead of files")
("byhash,a", "Paths on the command line are local paths and searching should be done by content hash")
//("option,o", po::value<std::string>()->default_value("default_value"), "Help message") //("option,o", po::value<std::string>()->default_value("default_value"), "Help message")
//("option2", po::value<int>(), "Help message") //("option2", po::value<int>(), "Help message")
; ;

View file

@ -19,7 +19,6 @@
#define id1B7A42F6E46547A6AB0F914E2A91399F #define id1B7A42F6E46547A6AB0F914E2A91399F
#include "dindexer-common/validationerror.hpp" #include "dindexer-common/validationerror.hpp"
#include "dindexer-common/mediatypes.hpp"
#include <boost/program_options/variables_map.hpp> #include <boost/program_options/variables_map.hpp>
namespace din { namespace din {

98
src/locate/hash.cpp Normal file
View file

@ -0,0 +1,98 @@
/* Copyright 2015, 2016, Michele Santullo
* This file is part of "dindexer".
*
* "dindexer" is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* "dindexer" is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
*/
#include "hash.hpp"
#include "dindexer-machinery/scantask/hashing.hpp"
#include "dindexer-machinery/scantask/dirtree.hpp"
#include "dindexer-machinery/recorddata.hpp"
#include <memory>
#include <sys/stat.h>
#include <stdexcept>
#include <utility>
#include <cassert>
#include <ciso646>
namespace stask = mchlib::scantask;
namespace din {
namespace {
class SingleFileTask : public stask::Base<std::vector<mchlib::FileRecordData>> {
public:
typedef std::vector<mchlib::FileRecordData> PathList;
SingleFileTask ( std::string parPath, const struct stat* parStat );
virtual ~SingleFileTask ( void ) noexcept = default;
private:
virtual void on_data_destroy ( PathList& parData ) override;
virtual void on_data_create ( PathList& parData ) override;
std::string m_path;
const struct stat* m_stat;
};
SingleFileTask::SingleFileTask (std::string parPath, const struct stat* parStat) :
m_path(std::move(parPath)),
m_stat(parStat)
{
assert(not m_path.empty());
assert(m_stat);
}
void SingleFileTask::on_data_destroy (PathList& parData) {
assert(not parData.empty());
parData.clear();
}
void SingleFileTask::on_data_create (PathList& parData) {
assert(parData.empty());
parData.reserve(1);
parData.push_back(mchlib::FileRecordData(
std::string(m_path),
0,
m_stat->st_atime,
m_stat->st_mtime,
0,
false,
false
));
}
} //unnamed namespace
mchlib::TigerHash hash (const std::string& parPath) {
using mchlib::FileRecordData;
using HashingTaskPtr = std::shared_ptr<stask::Hashing>;
struct stat path_stat;
const int retval = stat(parPath.c_str(), &path_stat);
if (retval) {
throw std::runtime_error("Can't access file \"" + parPath + "\"");
}
std::shared_ptr<stask::Base<std::vector<FileRecordData>>> file_src_task;
if (S_ISDIR(path_stat.st_mode)) {
file_src_task.reset(new stask::DirTree(parPath));
}
else {
assert(S_ISREG(path_stat.st_mode));
file_src_task.reset(new SingleFileTask(parPath, &path_stat));
}
auto hashing = HashingTaskPtr(new stask::Hashing(file_src_task, false));
return hashing->get_or_create().front().hash;
}
} //namespace din

28
src/locate/hash.hpp Normal file
View file

@ -0,0 +1,28 @@
/* Copyright 2015, 2016, Michele Santullo
* This file is part of "dindexer".
*
* "dindexer" is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* "dindexer" is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef id3F3E29B28FAA44A190451198CF1FD166
#define id3F3E29B28FAA44A190451198CF1FD166
#include "dindexer-machinery/tiger.hpp"
#include <vector>
namespace din {
mchlib::TigerHash hash ( const std::string& parPath );
} //namespace din
#endif

View file

@ -19,6 +19,7 @@
#include "postgre_locate.hpp" #include "postgre_locate.hpp"
#include "dindexer-common/settings.hpp" #include "dindexer-common/settings.hpp"
#include "dindexerConfig.h" #include "dindexerConfig.h"
#include "hash.hpp"
#include <iostream> #include <iostream>
#include <ciso646> #include <ciso646>
#include <iterator> #include <iterator>
@ -73,7 +74,15 @@ int main (int parArgc, char* parArgv[]) {
std::copy(results.begin(), results.end(), std::ostream_iterator<din::LocatedSet>(std::cout, "\n")); std::copy(results.begin(), results.end(), std::ostream_iterator<din::LocatedSet>(std::cout, "\n"));
} }
else { else {
const auto results = din::locate_in_db(settings.db, vm["substring"].as<std::string>(), not not vm.count("case-insensitive")); std::vector<din::LocatedItem> results;
if (vm.count("byhash")) {
const auto hash = din::hash(vm["substring"].as<std::string>());
results = din::locate_in_db(settings.db, hash);
}
else {
results = din::locate_in_db(settings.db, vm["substring"].as<std::string>(), not not vm.count("case-insensitive"));
}
std::copy(results.begin(), results.end(), std::ostream_iterator<din::LocatedItem>(std::cout, "\n")); std::copy(results.begin(), results.end(), std::ostream_iterator<din::LocatedItem>(std::cout, "\n"));
} }
return 0; return 0;

View file

@ -17,6 +17,7 @@
#include "postgre_locate.hpp" #include "postgre_locate.hpp"
#include "pq/connection.hpp" #include "pq/connection.hpp"
#include "dindexer-machinery/tiger.hpp"
#include <utility> #include <utility>
#include <sstream> #include <sstream>
#include <boost/utility/string_ref.hpp> #include <boost/utility/string_ref.hpp>
@ -53,10 +54,25 @@ namespace din {
return std::move(retval); return std::move(retval);
} }
std::vector<LocatedItem> file_result_to_vec (pq::ResultSet&& parResult) {
using boost::lexical_cast;
std::vector<LocatedItem> retval;
retval.reserve(parResult.size());
for (const auto& record : parResult) {
retval.push_back(LocatedItem{
record["path"],
lexical_cast<decltype(LocatedItem::id)>(record["id"]),
lexical_cast<decltype(LocatedItem::group_id)>(record["group_id"])
});
}
return std::move(retval);
}
} //unnamed namespace } //unnamed namespace
std::vector<LocatedItem> locate_in_db (const dinlib::SettingsDB& parDB, const std::string& parSearch, bool parCaseInsensitive) { std::vector<LocatedItem> locate_in_db (const dinlib::SettingsDB& parDB, const std::string& parSearch, bool parCaseInsensitive) {
using boost::lexical_cast;
using boost::string_ref; using boost::string_ref;
namespace ba = boost::algorithm; namespace ba = boost::algorithm;
@ -78,17 +94,15 @@ namespace din {
oss << "LIMIT " << g_max_results << ';'; oss << "LIMIT " << g_max_results << ';';
auto result = conn.query(oss.str()); auto result = conn.query(oss.str());
std::vector<LocatedItem> retval; return file_result_to_vec(std::move(result));
retval.reserve(result.size()); }
for (const auto& record : result) {
retval.push_back(LocatedItem{
record["path"],
lexical_cast<decltype(LocatedItem::id)>(record["id"]),
lexical_cast<decltype(LocatedItem::group_id)>(record["group_id"])
});
}
return std::move(retval); std::vector<LocatedItem> locate_in_db (const dinlib::SettingsDB& parDB, const mchlib::TigerHash& parSearch) {
const std::string query = std::string("SELECT \"path\",\"id\",\"group_id\" FROM \"files\" WHERE \"hash\"=$1 LIMIT ") + boost::lexical_cast<std::string>(g_max_results) + ';';
auto conn = make_pq_conn(parDB);
auto result = conn.query(query, mchlib::tiger_to_string(parSearch, true));
return file_result_to_vec(std::move(result));
} }
std::vector<LocatedSet> locate_sets_in_db (const dinlib::SettingsDB& parDB, const std::string& parSearch, bool parCaseInsensitive) { std::vector<LocatedSet> locate_sets_in_db (const dinlib::SettingsDB& parDB, const std::string& parSearch, bool parCaseInsensitive) {

View file

@ -23,6 +23,10 @@
#include <string> #include <string>
#include <cstdint> #include <cstdint>
namespace mchlib {
struct TigerHash;
} //namespace mchlib
namespace din { namespace din {
struct LocatedItem { struct LocatedItem {
std::string path; std::string path;
@ -38,6 +42,7 @@ namespace din {
}; };
std::vector<LocatedItem> locate_in_db ( const dinlib::SettingsDB& parDB, const std::string& parSearch, bool parCaseInsensitive ); std::vector<LocatedItem> locate_in_db ( const dinlib::SettingsDB& parDB, const std::string& parSearch, bool parCaseInsensitive );
std::vector<LocatedItem> locate_in_db ( const dinlib::SettingsDB& parDB, const mchlib::TigerHash& parSearch );
std::vector<LocatedSet> locate_sets_in_db ( const dinlib::SettingsDB& parDB, const std::string& parSearch, bool parCaseInsensitive ); std::vector<LocatedSet> locate_sets_in_db ( const dinlib::SettingsDB& parDB, const std::string& parSearch, bool parCaseInsensitive );
std::vector<LocatedSet> locate_sets_in_db ( const dinlib::SettingsDB& parDB, const std::string& parSearch, const std::vector<uint32_t>& parSets, bool parCaseInsensitive ); std::vector<LocatedSet> locate_sets_in_db ( const dinlib::SettingsDB& parDB, const std::string& parSearch, const std::vector<uint32_t>& parSets, bool parCaseInsensitive );
} //namespace din } //namespace din

View file

@ -4,7 +4,6 @@ include(WithMediaAutodetect)
find_package(Magic REQUIRED) find_package(Magic REQUIRED)
add_library(${PROJECT_NAME} SHARED add_library(${PROJECT_NAME} SHARED
indexer.cpp
pathname.cpp pathname.cpp
tiger.c tiger.c
tiger.cpp tiger.cpp
@ -12,10 +11,17 @@ add_library(${PROJECT_NAME} SHARED
filesearcher.cpp filesearcher.cpp
discinfo.cpp discinfo.cpp
mediatype.cpp mediatype.cpp
mediatypes.cpp
machinery_info.cpp machinery_info.cpp
guess_content_type.cpp guess_content_type.cpp
set_listing.cpp set_listing.cpp
globbing.cpp globbing.cpp
scantask/dirtree.cpp
scantask/mediatype.cpp
scantask/hashing.cpp
scantask/contenttype.cpp
scantask/mime.cpp
scantask/setbasic.cpp
) )
#target_include_directories(${PROJECT_NAME} #target_include_directories(${PROJECT_NAME}
@ -29,6 +35,10 @@ target_link_libraries(${PROJECT_NAME}
PRIVATE ${MAGIC_LIBRARIES} PRIVATE ${MAGIC_LIBRARIES}
) )
target_include_directories(${PROJECT_NAME}
PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}
)
if (DINDEXER_WITH_MEDIA_AUTODETECT) if (DINDEXER_WITH_MEDIA_AUTODETECT)
target_include_directories(${PROJECT_NAME} SYSTEM target_include_directories(${PROJECT_NAME} SYSTEM
PRIVATE ${BLKID_INCLUDE_DIRS} PRIVATE ${BLKID_INCLUDE_DIRS}

View file

@ -15,7 +15,7 @@
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>. * along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
*/ */
#include "dindexer-machinery/filesearcher.hpp" #include "filesearcher.hpp"
#if !defined(_XOPEN_SOURCE) #if !defined(_XOPEN_SOURCE)
#define _XOPEN_SOURCE 500 #define _XOPEN_SOURCE 500
@ -59,6 +59,7 @@ namespace fastf {
st.is_symlink = parSymlink; st.is_symlink = parSymlink;
st.atime = parStat->st_atime; st.atime = parStat->st_atime;
st.mtime = parStat->st_mtime; st.mtime = parStat->st_mtime;
st.size = 0;
if (not (*g_searchOptions.callback)(parPath, st)) if (not (*g_searchOptions.callback)(parPath, st))
return FTW_STOP; return FTW_STOP;
@ -81,6 +82,7 @@ namespace fastf {
st.is_symlink = parSymlink; st.is_symlink = parSymlink;
st.atime = parStat->st_atime; st.atime = parStat->st_atime;
st.mtime = parStat->st_mtime; st.mtime = parStat->st_mtime;
st.size = parStat->st_size;
if (extensions.empty()) { if (extensions.empty()) {
if (not (*g_searchOptions.callback)(parPath, st)) { if (not (*g_searchOptions.callback)(parPath, st)) {

View file

@ -19,12 +19,16 @@
#define id4A7D7AB671954418939FC0BDA19C5B3F #define id4A7D7AB671954418939FC0BDA19C5B3F
#include <ctime> #include <ctime>
#include <cstdint>
namespace fastf { namespace fastf {
struct FileStats { struct FileStats {
int level; static_assert(sizeof(std::time_t) >= sizeof(uint64_t), "Reorder members or comment out this assertion");
std::time_t atime; std::time_t atime;
std::time_t mtime; std::time_t mtime;
uint64_t size;
uint16_t level;
bool is_dir; bool is_dir;
bool is_symlink; bool is_symlink;
}; };

View file

@ -42,7 +42,7 @@ namespace mchlib {
}; };
struct EntryChecking { struct EntryChecking {
typedef bool(*CheckerFunction)(dinlib::MediaTypes, const ConstSetListingView&, const std::vector<const FileRecordData*>&); typedef bool(*CheckerFunction)(MediaTypes, const ConstSetListingView&, const std::vector<const FileRecordData*>&);
std::size_t max_total_entries; std::size_t max_total_entries;
CheckerFunction checker_func; CheckerFunction checker_func;
@ -87,8 +87,8 @@ namespace mchlib {
return std::move(retval); return std::move(retval);
} }
bool identify_video_dvd (dinlib::MediaTypes parMediaType, const ConstSetListingView& parContent, const std::vector<const FileRecordData*>& parFlatContent ) { bool identify_video_dvd (MediaTypes parMediaType, const ConstSetListingView& parContent, const std::vector<const FileRecordData*>& parFlatContent ) {
if (parMediaType != dinlib::MediaType_DVD and parMediaType != dinlib::MediaType_Directory) if (parMediaType != MediaType_DVD and parMediaType != MediaType_Directory)
return false; return false;
const auto items_count = count_listing_items(parContent); const auto items_count = count_listing_items(parContent);
@ -103,8 +103,8 @@ namespace mchlib {
return check_missing_content(parFlatContent, should_have).empty(); return check_missing_content(parFlatContent, should_have).empty();
} }
bool identify_video_cd (dinlib::MediaTypes parMediaType, const ConstSetListingView& parContent, const std::vector<const FileRecordData*>& parFlatContent) { bool identify_video_cd (MediaTypes parMediaType, const ConstSetListingView& parContent, const std::vector<const FileRecordData*>& parFlatContent) {
if (parMediaType != dinlib::MediaType_CDRom and parMediaType != dinlib::MediaType_Directory) if (parMediaType != MediaType_CDRom and parMediaType != MediaType_Directory)
return false; return false;
const auto items_count = count_listing_items(parContent); const auto items_count = count_listing_items(parContent);
@ -121,7 +121,7 @@ namespace mchlib {
} }
} //unnamed namespace } //unnamed namespace
ContentTypes guess_content_type (dinlib::MediaTypes parMediaType, const ConstSetListingView& parContent, std::size_t parEntriesCount) { ContentTypes guess_content_type (MediaTypes parMediaType, const ConstSetListingView& parContent, std::size_t parEntriesCount) {
if (boost::empty(parContent)) if (boost::empty(parContent))
return ContentType_Empty; return ContentType_Empty;
@ -145,7 +145,7 @@ namespace mchlib {
return ContentType_Generic; return ContentType_Generic;
} }
ContentTypes guess_content_type (dinlib::MediaTypes parMediaType, const std::vector<FileRecordData>& parContent) { ContentTypes guess_content_type (MediaTypes parMediaType, const std::vector<FileRecordData>& parContent) {
if (parContent.empty()) if (parContent.empty())
return ContentType_Empty; return ContentType_Empty;
@ -155,7 +155,7 @@ namespace mchlib {
assert(std::equal(parContent.begin(), parContent.end(), SetListing(std::vector<FileRecordData>(parContent)).sorted_list().begin())); assert(std::equal(parContent.begin(), parContent.end(), SetListing(std::vector<FileRecordData>(parContent)).sorted_list().begin()));
//TODO: assert that the first item in the list is the shortest string //TODO: assert that the first item in the list is the shortest string
std::shared_ptr<PathName> pathname(new PathName(parContent.front().abs_path)); std::shared_ptr<PathName> pathname(new PathName(""));
ConstSetListingView view(parContent.begin(), parContent.end(), pathname->atom_count(), pathname); ConstSetListingView view(parContent.begin(), parContent.end(), pathname->atom_count(), pathname);
assert(parContent.size() >= 1); assert(parContent.size() >= 1);
return guess_content_type(parMediaType, view, parContent.size() - 1); return guess_content_type(parMediaType, view, parContent.size() - 1);

View file

@ -1,526 +0,0 @@
/* Copyright 2015, 2016, Michele Santullo
* This file is part of "dindexer".
*
* "dindexer" is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* "dindexer" is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
*/
//WARNING: buggy code - intermediate hash for directories that contain files
//is likely wrong!
//#define USE_LEGACY_HASH_DIR
#include "dindexer-machinery/indexer.hpp"
#include "pathname.hpp"
#include "dindexer-machinery/tiger.hpp"
#include "dindexer-common/settings.hpp"
#include "dindexer-machinery/filestats.hpp"
#include "mimetype.hpp"
#include "dindexer-machinery/recorddata.hpp"
#if !defined(USE_LEGACY_HASH_DIR)
# include "dindexer-machinery/set_listing.hpp"
#endif
#include <algorithm>
#include <functional>
#include <stdexcept>
#if defined(WITH_PROGRESS_FEEDBACK)
# include <atomic>
# include <condition_variable>
#endif
#include <cstdint>
#include <ciso646>
#include <cassert>
#include <boost/iterator/filter_iterator.hpp>
#include <sstream>
#include <iterator>
#if defined(INDEXER_VERBOSE)
# include <iostream>
#endif
#include <boost/utility/string_ref.hpp>
#include <boost/range/empty.hpp>
namespace mchlib {
using HashType = decltype(FileRecordData::hash);
namespace {
typedef std::vector<FileRecordData>::iterator FileEntryIt;
void append_to_vec (std::vector<char>& parDest, const HashType& parHash, const std::string& parString) {
const auto old_size = parDest.size();
parDest.resize(old_size + sizeof(HashType) + parString.size());
std::copy(parHash.byte_data, parHash.byte_data + sizeof(HashType), parDest.begin() + old_size);
std::copy(parString.begin(), parString.end(), parDest.begin() + old_size + sizeof(HashType));
}
void append_to_vec (std::vector<char>& parDest, const std::string& parString) {
const auto old_size = parDest.size();
parDest.resize(old_size + parString.size());
std::copy(parString.begin(), parString.end(), parDest.begin() + old_size);
}
#if !defined(USE_LEGACY_HASH_DIR)
void hash_dir (FileRecordData& parEntry, MutableSetListingView& parList, const PathName& parCurrDir, MimeType& parMime, bool parIgnoreErrors) {
assert(parEntry.is_directory);
parEntry.mime_full = parMime.analyze(parEntry.abs_path);
//Build a blob with the hashes and filenames of every directory that
//is a direct child of current entry
std::vector<char> dir_blob;
#if defined(INDEXER_VERBOSE)
std::cout << "Making initial hash for " << parCurrDir << "...\n";
#endif
for (auto it = parList.begin(); it != parList.end(); ++it) {
assert(parCurrDir == PathName(it->abs_path).pop_right());
PathName curr_subdir(it->abs_path);
const std::string relpath = make_relative_path(parCurrDir, curr_subdir).path();
if (it->is_directory) {
auto cd_list = MutableSetListingView(it);
assert(boost::empty(cd_list) or cd_list.begin()->abs_path != it->abs_path);
hash_dir(*it, cd_list, curr_subdir, parMime, parIgnoreErrors);
append_to_vec(dir_blob, it->hash, relpath);
}
else {
append_to_vec(dir_blob, relpath);
}
}
tiger_data(dir_blob, parEntry.hash);
parEntry.size = 0;
#if defined(INDEXER_VERBOSE)
std::cout << "Got intermediate hash for dir " << parCurrDir <<
": " << tiger_to_string(parEntry.hash) <<
' ' << parEntry.mime_type << '\n';
#endif
//Now with the initial hash ready, let's start hashing files, if any
for (auto it = first_file(parList); it != parList.end(); ++it) {
assert(not it->is_directory);
#if defined(INDEXER_VERBOSE)
std::cout << "Hashing file " << it->abs_path << "...";
#endif
//TODO: notify callback
try {
tiger_file(it->abs_path, it->hash, parEntry.hash, it->size);
it->hash_valid = true;
it->mime_full = parMime.analyze(it->abs_path);
auto mime_pair = split_mime(it->mime_full);
it->mime_type = mime_pair.first;
it->mime_charset = mime_pair.second;
}
catch (const std::ios_base::failure& e) {
if (parIgnoreErrors) {
it->unreadable = true;
it->hash = HashType {};
if (it->mime_full.get().empty()) {
it->mime_full = "unknown";
it->mime_type = boost::string_ref(it->mime_full.get());
it->mime_charset = boost::string_ref(it->mime_full.get());
}
}
else {
throw e;
}
}
#if defined(INDEXER_VERBOSE)
std::cout << ' ' << tiger_to_string(it->hash) << ' ' <<
"Mime type: \"" << it->mime_type << "\"\n";
#endif
}
#if defined(INDEXER_VERBOSE)
std::cout << "Final hash for dir " << parCurrDir << " is " << tiger_to_string(parEntry.hash) << '\n';
#endif
parEntry.hash_valid = true;
{
parEntry.mime_full = parMime.analyze(parEntry.abs_path);
auto mime_pair = split_mime(parEntry.mime_full);
parEntry.mime_type = mime_pair.first;
parEntry.mime_charset = mime_pair.second;
}
}
#endif
#if defined(USE_LEGACY_HASH_DIR)
void hash_dir (FileEntryIt parEntry, FileEntryIt parBegin, FileEntryIt parEnd, const PathName& parCurrDir, std::function<void(std::size_t)> parNextItemCallback, bool parIgnoreErrors, MimeType& parMime) {
assert(parEntry != parEnd);
assert(parEntry->is_directory);
FileRecordData& curr_entry = *parEntry;
auto& curr_entry_it = parEntry;
curr_entry.mime_full = parMime.analyze(curr_entry.abs_path);
//Build a blob with the hashes and filenames of every directory that
//is a direct child of current entry
{
std::vector<char> dir_blob;
auto it_entry = curr_entry_it;
while (
it_entry != parEnd and (
it_entry->level == curr_entry.level
or parCurrDir != PathName(it_entry->abs_path).pop_right()
//and (not it_entry->is_dir or (it_entry->level <= curr_entry.level
//and parCurrDir != PathName(it_entry->path).pop_right()))
)) {
assert(it_entry->level >= curr_entry.level);
++it_entry;
}
#if defined(INDEXER_VERBOSE)
std::cout << "Making initial hash for " << parCurrDir << "...\n";
#endif
while (parEnd != it_entry and it_entry->level == curr_entry_it->level + 1 and parCurrDir == PathName(it_entry->abs_path).pop_right()) {
PathName curr_subdir(it_entry->abs_path);
const std::string relpath = make_relative_path(parCurrDir, curr_subdir).path();
if (it_entry->is_directory) {
hash_dir(it_entry, parBegin, parEnd, curr_subdir, parNextItemCallback, parIgnoreErrors, parMime);
append_to_vec(dir_blob, it_entry->hash, relpath);
}
else {
append_to_vec(dir_blob, relpath);
}
++it_entry;
}
tiger_data(dir_blob, curr_entry.hash);
curr_entry.size = 0;
#if defined(INDEXER_VERBOSE)
std::cout << "Got intermediate hash for dir " << parCurrDir <<
": " << tiger_to_string(curr_entry.hash) <<
' ' << curr_entry.mime_type << '\n';
#endif
}
//Now with the initial hash ready, let's start hashing files, if any
{
auto it_entry = curr_entry_it;
while (
it_entry != parEnd
and (it_entry->is_directory
or it_entry->level != curr_entry_it->level + 1
or PathName(it_entry->abs_path).pop_right() != parCurrDir
)
) {
++it_entry;
}
while (it_entry != parEnd and not it_entry->is_directory and it_entry->level == curr_entry_it->level + 1 and PathName(it_entry->abs_path).pop_right() == parCurrDir) {
assert(not it_entry->is_directory);
#if defined(INDEXER_VERBOSE)
std::cout << "Hashing file " << it_entry->abs_path << "...";
#endif
parNextItemCallback(it_entry - parBegin);
try {
tiger_file(it_entry->abs_path, it_entry->hash, curr_entry_it->hash, it_entry->size);
it_entry->hash_valid = true;
it_entry->mime_full = parMime.analyze(it_entry->abs_path);
auto mime_pair = split_mime(it_entry->mime_full);
it_entry->mime_type = mime_pair.first;
it_entry->mime_charset = mime_pair.second;
}
catch (const std::ios_base::failure& e) {
if (parIgnoreErrors) {
it_entry->unreadable = true;
it_entry->hash = HashType {};
if (it_entry->mime_full.get().empty()) {
it_entry->mime_full = "unknown";
it_entry->mime_type = boost::string_ref(it_entry->mime_full.get());
it_entry->mime_charset = boost::string_ref(it_entry->mime_full.get());
}
}
else {
throw e;
}
}
#if defined(INDEXER_VERBOSE)
std::cout << ' ' << tiger_to_string(it_entry->hash) << ' ' <<
"Mime type: \"" << it_entry->mime_type << "\"\n";
#endif
++it_entry;
}
}
#if defined(INDEXER_VERBOSE)
std::cout << "Final hash for dir " << parCurrDir << " is " << tiger_to_string(curr_entry_it->hash) << '\n';
#endif
curr_entry_it->hash_valid = true;
{
curr_entry_it->mime_full = parMime.analyze(curr_entry_it->abs_path);
auto mime_pair = split_mime(curr_entry_it->mime_full);
curr_entry_it->mime_type = mime_pair.first;
curr_entry_it->mime_charset = mime_pair.second;
}
}
#endif
template <bool FileTrue=true>
struct IsFile {
bool operator() ( const FileRecordData& parEntry ) const { return parEntry.is_directory xor FileTrue; }
};
FileRecordData make_file_record_data (const char* parPath, const fastf::FileStats& parSt) {
return FileRecordData(
parPath,
parSt.atime,
parSt.mtime,
parSt.level,
parSt.is_dir,
parSt.is_symlink
);
}
bool file_record_data_lt (const FileRecordData& parLeft, const FileRecordData& parRight) {
const FileRecordData& l = parLeft;
const FileRecordData& r = parRight;
return
(l.level < r.level)
or (l.level == r.level and l.is_directory and not r.is_directory)
or (l.level == r.level and l.is_directory == r.is_directory and l.abs_path < r.abs_path)
//sort by directory - parent first, children later
//(level == o.level and is_dir and not o.is_dir)
//or (level == o.level and is_dir == o.is_dir and path < o.path)
//or (level > o.level + 1)
//or (level + 1 == o.level and is_dir and not o.is_dir and path < o.path)
//or (level + 1 == o.level and is_dir and not o.is_dir and path == PathName(o.path).dirname())
//or (level == o.level + 1 and not (o.is_dir and not is_dir and o.path == PathName(path).dirname()))
;
}
void populate_rel_paths (const PathName& parBase, std::vector<FileRecordData>& parItems) {
const std::size_t offset = parBase.str_path_size() + 1;
for (FileRecordData& itm : parItems) {
const auto curr_offset = std::min(offset, itm.abs_path.size());
itm.path = boost::string_ref(itm.abs_path).substr(curr_offset);
assert(itm.path.data());
}
}
} //unnamed namespace
struct Indexer::LocalData {
typedef std::vector<FileRecordData> PathList;
PathList paths;
#if defined(WITH_PROGRESS_FEEDBACK)
std::atomic<std::size_t> done_count;
std::atomic<std::size_t> processing_index;
std::condition_variable step_notify;
#endif
std::size_t file_count;
bool ignore_read_errors;
};
Indexer::Indexer() :
m_local_data(new LocalData)
{
#if !defined(NDEBUG)
//assert(FileEntry("/a/b/c", 3, true, false) < FileEntry("/a/b", 2, true, false));
//assert(FileEntry("/a/b/c", 3, true, false) < FileEntry("/a/b/c/file.txt", 4, false, false));
//assert(FileEntry("/a/b/c", 3, true, false) < FileEntry("/a/b/c/file.c", 4, false, false));
//assert(FileEntry("/a/b/c/d", 4, true, false) < FileEntry("/a/b", 2, true, false));
//assert(FileEntry("/a/b/c/d", 4, true, false) < FileEntry("/a/b/c", 3, true, false));
//assert(FileEntry("/a/b/c/1.txt", 4, true, false) < FileEntry("/a/b/c/2.txt", 4, true, false));
//assert(not (FileEntry("/a/b/file.txt", 3, false, false) < FileEntry("/a/b", 2, true, false)));
//assert(not (FileEntry("/a", 1, true, false) < FileEntry("/a/b", 2, true, false)));
//assert(not (FileEntry("/a/b/1.txt", 3, false, false) < FileEntry("/a/b/c/f.txt", 4, true, false)));
//assert(not (FileEntry("/a/b/c/file.c", 4, false, false) < FileEntry("/a/b/c", 3, true, false)));
#endif
#if defined(WITH_PROGRESS_FEEDBACK)
m_local_data->done_count = 0;
m_local_data->processing_index = 0;
#endif
m_local_data->file_count = 0;
}
Indexer::~Indexer() noexcept {
}
std::size_t Indexer::total_items() const {
return m_local_data->file_count;
}
#if defined(WITH_PROGRESS_FEEDBACK)
std::size_t Indexer::processed_items() const {
return m_local_data->done_count;
}
#endif
void Indexer::calculate_hash() {
PathName base_path(m_local_data->paths.front().abs_path);
std::sort(m_local_data->paths.begin(), m_local_data->paths.end(), &file_record_data_lt);
MimeType mime;
#if defined(INDEXER_VERBOSE)
for (auto& itm : m_local_data->paths) {
itm.hash.part_a = 1;
itm.hash.part_b = 1;
itm.hash.part_c = 1;
if (itm.is_directory)
std::cout << "(D) ";
else
std::cout << "(F) ";
std::cout << itm.abs_path << " (" << itm.level << ")\n";
}
std::cout << "-----------------------------------------------------\n";
#endif
#if !defined(USE_LEGACY_HASH_DIR)
MutableSetListingView recordlist(m_local_data->paths.begin(), m_local_data->paths.end(), base_path.atom_count());
#endif
#if defined(WITH_PROGRESS_FEEDBACK)
m_local_data->done_count = 0;
hash_dir(
#if defined(USE_LEGACY_HASH_DIR)
m_local_data->paths.begin(),
m_local_data->paths.begin(),
m_local_data->paths.end(),
base_path,
[=](std::size_t parNext) {
++m_local_data->done_count;
m_local_data->processing_index = parNext;
m_local_data->step_notify.notify_all();
},
m_local_data->ignore_read_errors,
mime
#else
m_local_data->paths.front(),
recordlist,
base_path,
mime,
m_local_data->ignore_read_errors
#endif
);
//TODO: re-enable after hash_dir sends progress notifications again
//assert(m_local_data->done_count == m_local_data->file_count);
#else
hash_dir(
#if defined(USE_LEGACY_HASH_DIR)
m_local_data->paths.begin(),
m_local_data->paths.begin(),
m_local_data->paths.end(),
base_path,
[](std::size_t) {},
m_local_data->ignore_read_errors,
mime
#else
m_local_data->paths.front(),
recordlist,
base_path,
mime,
m_local_data->ignore_read_errors
#endif
);
#endif
populate_rel_paths(base_path, m_local_data->paths);
#if defined(INDEXER_VERBOSE)
for (const auto& itm : m_local_data->paths) {
assert(not (1 == itm.hash.part_a and 1 == itm.hash.part_b and 1 == itm.hash.part_c));
}
#endif
}
bool Indexer::add_path (const char* parPath, const fastf::FileStats& parStats) {
auto it_before = SetListing::lower_bound(
m_local_data->paths,
parPath,
parStats.level,
parStats.is_dir
);
m_local_data->paths.insert(
it_before,
make_file_record_data(parPath, parStats)
);
if (not parStats.is_dir) {
++m_local_data->file_count;
}
return true;
}
#if defined(INDEXER_VERBOSE)
void Indexer::dump() const {
PathName base_path(m_local_data->paths.front().abs_path);
std::cout << "---------------- FILE LIST ----------------\n";
for (const auto& cur_itm : m_local_data->paths) {
if (not cur_itm.is_directory) {
PathName cur_path(cur_itm.abs_path);
std::cout << make_relative_path(base_path, cur_path).path() << '\n';
}
}
std::cout << "---------------- DIRECTORY LIST ----------------\n";
for (const auto& cur_itm : m_local_data->paths) {
if (cur_itm.is_directory) {
PathName cur_path(cur_itm.abs_path);
std::cout << make_relative_path(base_path, cur_path).path() << '\n';
}
}
}
#endif
bool Indexer::empty() const {
return m_local_data->paths.size() < 2;
}
#if defined(WITH_PROGRESS_FEEDBACK)
std::condition_variable& Indexer::step_notify() {
return m_local_data->step_notify;
}
#endif
#if defined(WITH_PROGRESS_FEEDBACK)
std::string Indexer::current_item() const {
if (m_local_data->paths.empty() or 0 == m_local_data->processing_index)
return std::string();
PathName base_path(m_local_data->paths.front().abs_path);
PathName ret_path(m_local_data->paths[m_local_data->processing_index].abs_path);
return make_relative_path(base_path, ret_path).path();
}
#endif
std::string Indexer::operator[] (std::size_t parIndex) const {
if (parIndex >= m_local_data->file_count) {
std::ostringstream oss;
oss << "Requested index " << parIndex << " is out of range: only " << m_local_data->file_count << " items are available";
throw std::out_of_range(oss.str());
}
auto it = boost::make_filter_iterator<IsFile<>>(m_local_data->paths.begin(), m_local_data->paths.end());
assert(not m_local_data->paths.empty());
std::advance(it, parIndex);
return make_relative_path(PathName(m_local_data->paths.front().abs_path), PathName(it->abs_path)).path();
}
void Indexer::ignore_read_errors (bool parIgnore) {
m_local_data->ignore_read_errors = parIgnore;
}
const std::vector<FileRecordData>& Indexer::record_data() const {
#if defined(WITH_PROGRESS_FEEDBACK)
//TODO: re-enable after hash_dir sends progress notifications again
//assert(m_local_data->done_count == m_local_data->file_count);
#endif
return m_local_data->paths;
}
} //namespace mchlib

View file

@ -40,23 +40,23 @@ namespace mchlib {
{ {
} }
dinlib::MediaTypes guess_media_type (std::string&& parPath) { MediaTypes guess_media_type (std::string&& parPath) {
DiscInfo info(std::move(parPath)); DiscInfo info(std::move(parPath));
const DriveTypes drive_type = info.drive_type(); const DriveTypes drive_type = info.drive_type();
if (DriveType_HardDisk == drive_type) { if (DriveType_HardDisk == drive_type) {
if (info.mountpoint() == PathName(info.original_path()).path()) if (info.mountpoint() == PathName(info.original_path()).path())
return dinlib::MediaType_HardDisk; return MediaType_HardDisk;
else else
return dinlib::MediaType_Directory; return MediaType_Directory;
} }
else if (DriveType_Optical == drive_type) { else if (DriveType_Optical == drive_type) {
switch (info.optical_type()) { switch (info.optical_type()) {
case OpticalType_DVD: case OpticalType_DVD:
return dinlib::MediaType_DVD; return MediaType_DVD;
case OpticalType_CDRom: case OpticalType_CDRom:
return dinlib::MediaType_CDRom; return MediaType_CDRom;
case OpticalType_BluRay: case OpticalType_BluRay:
return dinlib::MediaType_BluRay; return MediaType_BluRay;
default: default:
throw UnknownMediaTypeException("Set autodetect failed because this media type is unknown, please specify the set type manually"); throw UnknownMediaTypeException("Set autodetect failed because this media type is unknown, please specify the set type manually");
} }

View file

@ -15,11 +15,11 @@
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>. * along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
*/ */
#include "dindexer-common/mediatypes.hpp" #include "dindexer-machinery/mediatypes.hpp"
#include <map> #include <map>
#include <stdexcept> #include <stdexcept>
namespace dinlib { namespace mchlib {
const std::string& media_type_to_str (MediaTypes parType) { const std::string& media_type_to_str (MediaTypes parType) {
static const std::map<MediaTypes, const std::string> types { static const std::map<MediaTypes, const std::string> types {
{MediaType_CDRom, "CD-Rom"}, {MediaType_CDRom, "CD-Rom"},
@ -43,4 +43,8 @@ namespace dinlib {
MediaTypes char_to_media_type (char parMType) { MediaTypes char_to_media_type (char parMType) {
return static_cast<MediaTypes>(parMType); return static_cast<MediaTypes>(parMType);
} }
} //namespace dinlib
char media_type_to_char (MediaTypes parMType) {
return static_cast<char>(parMType);
}
} //namespace mchlib

View file

@ -204,6 +204,17 @@ namespace mchlib {
return parStream; return parStream;
} }
const boost::string_ref basename (const PathName& parPath) {
static const char* const empty = "";
const auto sz = parPath.atom_count();
if (not sz) {
return boost::string_ref(empty);
}
assert(sz > 0);
return parPath[sz - 1];
}
PathName& PathName::pop_right() { PathName& PathName::pop_right() {
m_pool.pop(); m_pool.pop();
return *this; return *this;

View file

@ -59,6 +59,7 @@ namespace mchlib {
PathName make_relative_path ( const PathName& parBasePath, const PathName& parOtherPath ); PathName make_relative_path ( const PathName& parBasePath, const PathName& parOtherPath );
std::ostream& operator<< ( std::ostream& parStream, const PathName& parPath ); std::ostream& operator<< ( std::ostream& parStream, const PathName& parPath );
const boost::string_ref basename ( const PathName& parPath );
} //namespace mchlib } //namespace mchlib
#endif #endif

View file

@ -0,0 +1,46 @@
/* Copyright 2015, 2016, Michele Santullo
* This file is part of "dindexer".
*
* "dindexer" is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* "dindexer" is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
*/
#include "dindexer-machinery/scantask/contenttype.hpp"
#include "dindexer-machinery/guess_content_type.hpp"
#include <cassert>
namespace mchlib {
namespace scantask {
ContentType::ContentType (SetTaskType parSet, DirTreeTaskPtr parDirTree, MediaTypeTaskPtr parMediaType) :
m_set_task(parSet),
m_dir_tree(parDirTree),
m_media_type(parMediaType)
{
assert(m_set_task);
assert(m_dir_tree);
assert(m_media_type);
}
SetRecordDataFull& ContentType::on_data_get() {
return m_set_task->get_or_create();
}
void ContentType::on_data_fill() {
auto& data = m_set_task->get_or_create();
auto media_type = char_to_media_type(m_media_type->get_or_create().type);
const auto& tree = m_dir_tree->get_or_create();
const auto cont_type = mchlib::guess_content_type(media_type, tree);
data.content_type = content_type_to_char(cont_type);
}
} //namespace scantask
} //namespace mchlib

View file

@ -0,0 +1,107 @@
/* Copyright 2015, 2016, Michele Santullo
* This file is part of "dindexer".
*
* "dindexer" is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* "dindexer" is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
*/
#include "dindexer-machinery/scantask/dirtree.hpp"
#include "dindexer-machinery/recorddata.hpp"
#include "dindexer-machinery/set_listing.hpp"
#include "helpers/compatibility.h"
#include "filesearcher.hpp"
#include "pathname.hpp"
#include <utility>
#include <cassert>
#include <ciso646>
#include <functional>
#include <algorithm>
namespace mchlib {
namespace {
std::size_t calc_rel_path_offs ( const PathName& parRoot, boost::string_ref parPath ) a_pure;
std::size_t calc_rel_path_offs (const PathName& parRoot, boost::string_ref parPath) {
PathName path(parPath);
PathName rel_path = make_relative_path(parRoot, path);
const auto rel_path_len = rel_path.str_path_size();
const auto path_len = path.str_path_size();
assert(rel_path_len <= path_len);
return path_len - rel_path_len;
}
bool add_path (scantask::DirTree::PathList& parOut, const PathName& parRoot, const char* parPath, const fastf::FileStats& parStats) {
using boost::string_ref;
auto it_before = SetListing::lower_bound(
parOut,
parPath,
parStats.level,
parStats.is_dir
);
//std::string curr_path(parPath);
//const std::size_t offset = parBase.str_path_size() + 1;
//for (FileRecordData& itm : parItems) {
// const auto curr_offset = std::min(parRelPathOffs, curr_path.size());
// itm.path = boost::string_ref(itm.abs_path).substr(curr_offset);
// assert(itm.path.data());
//}
parOut.insert(
it_before,
FileRecordData(
parPath,
calc_rel_path_offs(parRoot, string_ref(parPath)),
parStats.atime,
parStats.mtime,
static_cast<uint16_t>(parStats.level),
static_cast<bool>(parStats.is_dir),
static_cast<bool>(parStats.is_symlink)
)
);
return true;
}
}
namespace scantask {
DirTree::DirTree (std::string parRoot) :
m_root(std::move(parRoot))
{
assert(not m_root.empty());
}
void DirTree::on_data_destroy (PathList& parData) {
parData.clear();
}
void DirTree::on_data_create (PathList& parData) {
using std::placeholders::_1;
using std::placeholders::_2;
using boost::string_ref;
assert(parData.empty());
fastf::FileSearcher searcher(m_root);
fastf::FileSearcher::ConstCharVecType ext, ignore;
searcher.SetFollowSymlinks(true);
searcher.SetCallback(
fastf::FileSearcher::CallbackType(
std::bind(&add_path, std::ref(parData), PathName(string_ref(m_root)), _1, _2)
)
);
searcher.Search(ext, ignore);
}
} //namespace scantask
} //namespace mchlib

View file

@ -0,0 +1,193 @@
/* Copyright 2015, 2016, Michele Santullo
* This file is part of "dindexer".
*
* "dindexer" is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* "dindexer" is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
*/
#include "dindexer-machinery/scantask/hashing.hpp"
#include "dindexer-machinery/recorddata.hpp"
#include "dindexer-machinery/set_listing.hpp"
#include "dindexer-machinery/tiger.hpp"
#include "pathname.hpp"
#include <cassert>
#include <boost/range/empty.hpp>
#include <boost/utility/string_ref.hpp>
//#define INDEXER_VERBOSE
#if defined(INDEXER_VERBOSE)
# include <iostream>
#endif
namespace mchlib {
namespace {
struct ProgressInfo {
scantask::Hashing::ProgressCallback callback;
boost::string_ref curr_path;
uint64_t file_bytes_read;
uint64_t total_bytes_read;
uint32_t file_num;
void notify ( void ) {
callback(curr_path, file_bytes_read, total_bytes_read, file_num);
}
};
void append_to_vec (std::vector<char>& parDest, const TigerHash& parHash, boost::string_ref parString) {
const auto old_size = parDest.size();
parDest.resize(old_size + sizeof(TigerHash) + parString.size());
std::copy(parHash.byte_data, parHash.byte_data + sizeof(TigerHash), parDest.begin() + old_size);
std::copy(parString.begin(), parString.end(), parDest.begin() + old_size + sizeof(TigerHash));
}
void append_to_vec (std::vector<char>& parDest, boost::string_ref parString) {
const auto old_size = parDest.size();
parDest.resize(old_size + parString.size());
std::copy(parString.begin(), parString.end(), parDest.begin() + old_size);
}
void hash_dir (FileRecordData& parEntry, MutableSetListingView& parList, bool parIgnoreErrors, ProgressInfo& parProgressInfo) {
assert(parEntry.is_directory);
//Build a blob with the hashes and filenames of every directory that
//is a direct child of current entry
std::vector<char> dir_blob;
#if defined(INDEXER_VERBOSE)
std::cout << "Making initial hash for " << parEntry.abs_path << "...\n";
#endif
for (auto it = parList.begin(); it != parList.end(); ++it) {
assert(PathName(parEntry.abs_path) == PathName(it->abs_path).pop_right());
PathName curr_path(it->path);
const auto basename = mchlib::basename(curr_path);
if (it->is_directory) {
auto cd_list = MutableSetListingView(it);
assert(boost::empty(cd_list) or cd_list.begin()->abs_path != it->abs_path);
hash_dir(*it, cd_list, parIgnoreErrors, parProgressInfo);
append_to_vec(dir_blob, it->hash, basename);
}
else {
append_to_vec(dir_blob, basename);
}
}
tiger_data(dir_blob, parEntry.hash);
#if defined(INDEXER_VERBOSE)
std::cout << "Got intermediate hash for dir " << parEntry.abs_path <<
": " << tiger_to_string(parEntry.hash) <<
' ' << parEntry.mime_type << '\n';
#endif
//Now with the initial hash ready, let's start hashing files, if any
for (auto it = first_file(parList); it != parList.end(); ++it) {
assert(not it->is_directory);
#if defined(INDEXER_VERBOSE)
std::cout << "Hashing file " << it->abs_path << "...\n";
#endif
//TODO: notify callback
try {
++parProgressInfo.file_num;
parProgressInfo.curr_path = it->abs_path;
parProgressInfo.notify();
tiger_file(it->abs_path, it->hash, parEntry.hash, it->size);
it->hash_valid = true;
parProgressInfo.total_bytes_read += it->size;
}
catch (const std::ios_base::failure& e) {
if (parIgnoreErrors) {
it->unreadable = true;
it->hash = TigerHash {};
}
else {
throw e;
}
}
}
#if defined(INDEXER_VERBOSE)
std::cout << "Final hash for dir " << parEntry.abs_path << " is " << tiger_to_string(parEntry.hash) << '\n';
#endif
parEntry.hash_valid = true;
}
void dummy_progress_callback (const boost::string_ref /*parPath*/, uint64_t /*parFileBytes*/, uint64_t /*parTotalBytes*/, uint32_t /*parFileNum*/) {
}
} //unnamed namespace
namespace scantask {
Hashing::Hashing (std::shared_ptr<FileTreeBase> parFileTree, bool parIgnoreErrors) :
m_file_tree_task(parFileTree),
m_progress_callback(&dummy_progress_callback),
m_ignore_errors(parIgnoreErrors)
{
assert(m_file_tree_task);
}
Hashing::~Hashing() noexcept {
}
std::vector<FileRecordData>& Hashing::on_data_get() {
return m_file_tree_task->get_or_create();
}
void Hashing::on_data_fill() {
std::vector<FileRecordData>& file_list = m_file_tree_task->get_or_create();
if (file_list.empty()) {
return;
}
ProgressInfo progr_info;
progr_info.callback = m_progress_callback;
progr_info.curr_path = "";
progr_info.file_bytes_read = 0;
progr_info.total_bytes_read = 0;
progr_info.file_num = 0;
if (file_list.front().is_directory) {
MutableSetListingView recordlist(file_list.begin(), file_list.end(), 0);
hash_dir(file_list.front(), recordlist, m_ignore_errors, progr_info);
}
else {
assert(1 == file_list.size());
auto& curr_file_rec = file_list.front();
TigerHash dummy {};
try {
tiger_file(curr_file_rec.abs_path, curr_file_rec.hash, dummy, curr_file_rec.size);
curr_file_rec.hash_valid = true;
}
catch (const std::ios_base::failure& e) {
if (m_ignore_errors) {
curr_file_rec.unreadable = true;
curr_file_rec.hash = TigerHash {};
}
else {
throw e;
}
}
}
}
void Hashing::set_progress_callback (ProgressCallback parFunc) {
if (parFunc) {
m_progress_callback = parFunc;
}
else {
m_progress_callback = &dummy_progress_callback;
}
}
} //namespace scantask
} //namespace mchlib

View file

@ -0,0 +1,62 @@
/* Copyright 2015, 2016, Michele Santullo
* This file is part of "dindexer".
*
* "dindexer" is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* "dindexer" is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
*/
#include "dindexer-machinery/scantask/mediatype.hpp"
//#include "dindexer-machinery/guess_content_type.hpp"
#if defined(WITH_MEDIA_AUTODETECT)
# include "dindexer-machinery/mediatype.hpp"
#endif
#include "dindexer-machinery/recorddata.hpp"
#include <utility>
namespace mchlib {
namespace scantask {
MediaType::MediaType (SetTaskType parSet, char parDefault, bool parForce, std::string parSearchPath) :
m_set_task(parSet),
m_default(char_to_media_type(parDefault))
#if defined(WITH_MEDIA_AUTODETECT)
, m_search_path(std::move(parSearchPath))
, m_force(parForce)
#endif
{
assert(m_set_task);
#if !defined(WITH_MEDIA_AUTODETECT)
static_cast<void>(parForce);
static_cast<void>(parSearchPath);
#endif
}
SetRecordDataFull& MediaType::on_data_get() {
return m_set_task->get_or_create();
}
void MediaType::on_data_fill() {
auto& data = m_set_task->get_or_create();
#if defined(WITH_MEDIA_AUTODETECT)
if (m_force) {
data.type = media_type_to_char(m_default);
}
else {
const auto guessed_type = mchlib::guess_media_type(std::string(m_search_path));
data.type = media_type_to_char(guessed_type);
}
#else
data.type = media_type_to_char(m_default);
#endif
}
} //namespace scantask
} //namespace mchlib

View file

@ -0,0 +1,53 @@
/* Copyright 2015, 2016, Michele Santullo
* This file is part of "dindexer".
*
* "dindexer" is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* "dindexer" is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
*/
#include "dindexer-machinery/scantask/mime.hpp"
#include "dindexer-machinery/recorddata.hpp"
#include "mimetype.hpp"
#include <cassert>
namespace mchlib {
namespace {
} //unnamed namespace
namespace scantask {
Mime::Mime (DirTreeTaskPtr parDirTree) :
m_file_tree_task(parDirTree)
{
assert(m_file_tree_task);
}
Mime::~Mime() noexcept {
}
void Mime::on_data_fill() {
MimeType mime;
auto& list = m_file_tree_task->get_or_create();
for (auto& itm : list) {
itm.mime_full = mime.analyze(itm.abs_path);
auto mime_pair = split_mime(itm.mime_full);
itm.mime_type = mime_pair.first;
itm.mime_charset = mime_pair.second;
}
}
std::vector<FileRecordData>& Mime::on_data_get() {
return m_file_tree_task->get_or_create();
}
} //namespace scantask
} //namespace mchlib

View file

@ -0,0 +1,41 @@
/* Copyright 2015, 2016, Michele Santullo
* This file is part of "dindexer".
*
* "dindexer" is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* "dindexer" is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
*/
#include "dindexer-machinery/scantask/setbasic.hpp"
#include <utility>
namespace mchlib {
namespace scantask {
SetBasic::SetBasic (std::string&& parName) :
m_set_name(std::move(parName))
{
}
SetBasic::~SetBasic() noexcept {
}
void SetBasic::on_data_destroy (SetRecordDataFull& parData) {
static_cast<SetRecordData&>(parData).name.clear();
parData.name.clear();
}
void SetBasic::on_data_create (SetRecordDataFull& parData) {
parData.name = m_set_name;
static_cast<SetRecordData&>(parData).name = parData.name;
}
} //namespace scantask
} //namespace mchlib

View file

@ -42,8 +42,8 @@ namespace mchlib {
}; };
template <typename OtherRecord> template <typename OtherRecord>
bool file_record_data_lt (const FileRecordData& parLeft, const OtherRecord& parRight) { bool file_record_data_lt (const SetListingItemType& parLeft, const OtherRecord& parRight) {
const FileRecordData& l = parLeft; const SetListingItemType& l = parLeft;
const OtherRecord& r = parRight; const OtherRecord& r = parRight;
return return
(l.level < r.level) (l.level < r.level)
@ -99,14 +99,14 @@ namespace mchlib {
{ {
assert(parBasePath); assert(parBasePath);
assert(m_base_path or m_current == m_end); assert(m_base_path or m_current == m_end);
assert(m_current == m_end or m_base_path->atom_count() == PathName(m_current->abs_path).atom_count()); assert(m_current == m_end or m_base_path->atom_count() == PathName(m_current->path).atom_count() + parLevelOffset);
assert(m_current == m_end or m_base_path->atom_count() == m_current->level + m_level_offset); assert(m_current == m_end or m_base_path->atom_count() == m_current->level + m_level_offset);
//Look for the point where the children of this entry start //Look for the point where the children of this entry start
while ( while (
m_current != m_end and ( m_current != m_end and (
m_current->level + m_level_offset == m_base_path->atom_count() or m_current->level + m_level_offset == m_base_path->atom_count() or
*m_base_path != PathName(m_current->abs_path).pop_right() *m_base_path != PathName(m_current->path).pop_right()
)) { )) {
assert(m_base_path); assert(m_base_path);
++m_current; ++m_current;
@ -157,13 +157,13 @@ namespace mchlib {
template <bool Const> template <bool Const>
void DirIterator<Const>::increment() { void DirIterator<Const>::increment() {
assert(PathName(m_current->abs_path).pop_right() == *m_base_path); assert(PathName(m_current->path).pop_right() == *m_base_path);
do { do {
++m_current; ++m_current;
} while( } while(
m_current != m_end and m_current != m_end and
m_current->level + m_level_offset == m_base_path->atom_count() + 1 and m_current->level + m_level_offset == m_base_path->atom_count() + 1 and
*m_base_path != PathName(m_current->abs_path).pop_right() *m_base_path != PathName(m_current->path).pop_right()
); );
} }
@ -222,7 +222,7 @@ namespace mchlib {
assert(std::equal(m_list.begin(), m_list.end(), SetListing(ListType(m_list), true).sorted_list().begin())); assert(std::equal(m_list.begin(), m_list.end(), SetListing(ListType(m_list), true).sorted_list().begin()));
} }
if (not m_list.empty()) { if (not m_list.empty()) {
m_base_path.reset(new PathName(m_list.front().abs_path)); m_base_path.reset(new PathName(m_list.front().path));
} }
} }
@ -258,7 +258,7 @@ namespace mchlib {
return std::count_if( return std::count_if(
m_list.begin(), m_list.begin(),
m_list.end(), m_list.end(),
[] (const FileRecordData& parItm) { [] (const SetListingItemType& parItm) {
return not parItm.is_directory; return not parItm.is_directory;
} }
); );
@ -268,7 +268,7 @@ namespace mchlib {
return std::count_if( return std::count_if(
m_list.begin(), m_list.begin(),
m_list.end(), m_list.end(),
[] (const FileRecordData& parItm) { [] (const SetListingItemType& parItm) {
return parItm.is_directory; return parItm.is_directory;
} }
); );
@ -279,7 +279,7 @@ namespace mchlib {
} }
void SetListing::sort_list (ListType& parList) { void SetListing::sort_list (ListType& parList) {
std::sort(parList.begin(), parList.end(), &file_record_data_lt<FileRecordData>); std::sort(parList.begin(), parList.end(), &file_record_data_lt<SetListingItemType>);
} }
SetListing::ListType::iterator SetListing::lower_bound (ListType& parList, const char* parPath, uint16_t parLevel, bool parIsDir) { SetListing::ListType::iterator SetListing::lower_bound (ListType& parList, const char* parPath, uint16_t parLevel, bool parIsDir) {
@ -289,17 +289,17 @@ namespace mchlib {
} }
SetListingView<false> SetListing::make_view() { SetListingView<false> SetListing::make_view() {
const auto offs = (m_list.empty() ? 0 : PathName(m_list.front().abs_path).atom_count()); const auto offs = (m_list.empty() ? 0 : PathName(m_list.front().path).atom_count());
return SetListingView<false>(m_list.begin(), m_list.end(), offs, m_base_path); return SetListingView<false>(m_list.begin(), m_list.end(), offs, m_base_path);
} }
SetListingView<true> SetListing::make_view() const { SetListingView<true> SetListing::make_view() const {
const auto offs = (m_list.empty() ? 0 : PathName(m_list.front().abs_path).atom_count()); const auto offs = (m_list.empty() ? 0 : PathName(m_list.front().path).atom_count());
return SetListingView<true>(m_list.begin(), m_list.end(), offs, m_base_path); return SetListingView<true>(m_list.begin(), m_list.end(), offs, m_base_path);
} }
SetListingView<true> SetListing::make_cview() const { SetListingView<true> SetListing::make_cview() const {
const auto offs = (m_list.empty() ? 0 : PathName(m_list.front().abs_path).atom_count()); const auto offs = (m_list.empty() ? 0 : PathName(m_list.front().path).atom_count());
return SetListingView<true>(m_list.begin(), m_list.end(), offs, m_base_path); return SetListingView<true>(m_list.begin(), m_list.end(), offs, m_base_path);
} }
@ -311,7 +311,7 @@ namespace mchlib {
m_level_offset(parIter.m_level_offset) m_level_offset(parIter.m_level_offset)
{ {
if (m_begin != m_end) { if (m_begin != m_end) {
m_base_path.reset(new PathName(m_begin->abs_path)); m_base_path.reset(new PathName(m_begin->path));
} }
} }
@ -323,7 +323,7 @@ namespace mchlib {
m_level_offset(parLevelOffset) m_level_offset(parLevelOffset)
{ {
if (m_begin != m_end) { if (m_begin != m_end) {
m_base_path.reset(new PathName(m_begin->abs_path)); m_base_path.reset(new PathName(m_begin->path));
} }
} }

View file

@ -19,7 +19,6 @@
#define id1B7A42F6E46547A6AB0F914E2A91399F #define id1B7A42F6E46547A6AB0F914E2A91399F
#include "dindexer-common/validationerror.hpp" #include "dindexer-common/validationerror.hpp"
#include "dindexer-common/mediatypes.hpp"
#include <boost/program_options/variables_map.hpp> #include <boost/program_options/variables_map.hpp>
namespace din { namespace din {

View file

@ -19,7 +19,6 @@
#define id259FD7C96B5049ECB50386F25455FBB2 #define id259FD7C96B5049ECB50386F25455FBB2
#include "dindexer-common/validationerror.hpp" #include "dindexer-common/validationerror.hpp"
#include "dindexer-common/mediatypes.hpp"
#include <boost/program_options/variables_map.hpp> #include <boost/program_options/variables_map.hpp>
namespace din { namespace din {

View file

@ -16,7 +16,6 @@ target_link_libraries(${PROJECT_NAME}
PRIVATE ${bare_name}-if PRIVATE ${bare_name}-if
PRIVATE ${bare_name}-common PRIVATE ${bare_name}-common
PRIVATE ${bare_name}-machinery PRIVATE ${bare_name}-machinery
PRIVATE optimized pthread
) )
string(REPLACE "${bare_name}-" "" ACTION_NAME "${PROJECT_NAME}") string(REPLACE "${bare_name}-" "" ACTION_NAME "${PROJECT_NAME}")

View file

@ -27,14 +27,14 @@ namespace po = boost::program_options;
namespace din { namespace din {
namespace { namespace {
const char g_allowed_types[] = { const char g_allowed_types[] = {
static_cast<char>(dinlib::MediaType_CDRom), static_cast<char>(mchlib::MediaType_CDRom),
static_cast<char>(dinlib::MediaType_Directory), static_cast<char>(mchlib::MediaType_Directory),
static_cast<char>(dinlib::MediaType_DVD), static_cast<char>(mchlib::MediaType_DVD),
static_cast<char>(dinlib::MediaType_BluRay), static_cast<char>(mchlib::MediaType_BluRay),
static_cast<char>(dinlib::MediaType_FloppyDisk), static_cast<char>(mchlib::MediaType_FloppyDisk),
static_cast<char>(dinlib::MediaType_HardDisk), static_cast<char>(mchlib::MediaType_HardDisk),
static_cast<char>(dinlib::MediaType_IomegaZip), static_cast<char>(mchlib::MediaType_IomegaZip),
static_cast<char>(dinlib::MediaType_Other) static_cast<char>(mchlib::MediaType_Other)
}; };
} //unnamed namespace } //unnamed namespace

View file

@ -19,6 +19,7 @@
#define id1B7A42F6E46547A6AB0F914E2A91399F #define id1B7A42F6E46547A6AB0F914E2A91399F
#include <boost/program_options/variables_map.hpp> #include <boost/program_options/variables_map.hpp>
#include "dindexer-machinery/mediatypes.hpp"
#include "dindexer-machinery/mediatype.hpp" #include "dindexer-machinery/mediatype.hpp"
namespace din { namespace din {

View file

@ -21,35 +21,37 @@
#include "dindexer-machinery/recorddata.hpp" #include "dindexer-machinery/recorddata.hpp"
#include "dindexerConfig.h" #include "dindexerConfig.h"
#include "dindexer-machinery/filesearcher.hpp"
#include "dindexer-machinery/indexer.hpp"
#include "dindexer-machinery/machinery_info.hpp" #include "dindexer-machinery/machinery_info.hpp"
#include "dindexer-common/common_info.hpp" #include "dindexer-common/common_info.hpp"
#include "dindexer-common/settings.hpp" #include "dindexer-common/settings.hpp"
#include "dindexer-machinery/guess_content_type.hpp"
#include "commandline.hpp" #include "commandline.hpp"
#include "dbbackend.hpp" #include "dbbackend.hpp"
#include "dindexer-machinery/scantask/dirtree.hpp"
#include "dindexer-machinery/scantask/mediatype.hpp"
#include "dindexer-machinery/scantask/hashing.hpp"
#include "dindexer-machinery/scantask/contenttype.hpp"
#include "dindexer-machinery/scantask/mime.hpp"
#include "dindexer-machinery/scantask/generalfiller.hpp"
#include "dindexer-machinery/scantask/setbasic.hpp"
#include <iostream> #include <iostream>
#include <iomanip> #include <iomanip>
#include <ciso646> #include <ciso646>
#include <sstream>
#include <algorithm>
#include <iterator>
#if defined(WITH_PROGRESS_FEEDBACK)
# include <thread>
# include <mutex>
# include <condition_variable>
#endif
namespace { namespace {
void run_hash_calculation ( mchlib::Indexer& parIndexer, bool parShowProgress ); bool add_to_db ( const std::vector<mchlib::FileRecordData>& parData, const mchlib::SetRecordDataFull& parSet, const dinlib::SettingsDB& parDBSettings, bool parForce=false );
bool add_to_db ( const std::vector<mchlib::FileRecordData>& parData, const std::string& parSetName, char parType, char parContent, const dinlib::SettingsDB& parDBSettings, bool parForce=false ); #if defined(WITH_PROGRESS_FEEDBACK)
void print_progress ( const boost::string_ref parPath, uint64_t parFileBytes, uint64_t parTotalBytes, uint32_t parFileNum );
#endif
} //unnamed namespace } //unnamed namespace
namespace stask = mchlib::scantask;
int main (int parArgc, char* parArgv[]) { int main (int parArgc, char* parArgv[]) {
using std::placeholders::_1; using std::placeholders::_1;
using std::placeholders::_2; using std::placeholders::_2;
using boost::program_options::variables_map; using boost::program_options::variables_map;
using FileRecordDataFiller = stask::GeneralFiller<stask::DirTree::PathList>;
using SetRecordDataFiller = stask::GeneralFiller<mchlib::SetRecordDataFull>;
variables_map vm; variables_map vm;
try { try {
@ -61,11 +63,10 @@ int main (int parArgc, char* parArgv[]) {
std::cerr << err.what() << "\nUse --help for help" << std::endl; std::cerr << err.what() << "\nUse --help for help" << std::endl;
return 2; return 2;
} }
const std::string search_path(vm["search-path"].as<std::string>());
#if defined(WITH_PROGRESS_FEEDBACK) #if defined(WITH_PROGRESS_FEEDBACK)
const bool verbose = (0 == vm.count("quiet")); //const bool verbose = (0 == vm.count("quiet"));
#else #else
const bool verbose = false; //const bool verbose = false;
#endif #endif
dinlib::Settings settings; dinlib::Settings settings;
@ -77,127 +78,31 @@ int main (int parArgc, char* parArgv[]) {
} }
} }
#if defined(WITH_MEDIA_AUTODETECT) bool ignore_read_errors = (vm.count("ignore-errors") > 0);
char set_type; const std::string search_path(vm["search-path"].as<std::string>());
if (0 == vm.count("type")) { const char def_media_type = (vm.count("type") ? vm["type"].as<char>() : 'O');
std::cout << "Analyzing disc... ";
try { std::shared_ptr<stask::SetBasic> setbasic(new stask::SetBasic(std::string(vm["setname"].as<std::string>())));
const auto guessed_type = mchlib::guess_media_type(std::string(search_path)); std::shared_ptr<stask::DirTree> scan_dirtree(new stask::DirTree(search_path));
set_type = guessed_type; std::shared_ptr<stask::MediaType> media_type(new stask::MediaType(setbasic, def_media_type, vm.count("type"), search_path));
std::cout << "Setting type to " << set_type << " (" std::shared_ptr<stask::Hashing> hashing(new stask::Hashing(scan_dirtree, ignore_read_errors));
<< dinlib::media_type_to_str(guessed_type) << ")\n"; std::shared_ptr<stask::ContentType> content_type(new stask::ContentType(setbasic, scan_dirtree, media_type));
} std::shared_ptr<stask::Mime> mime(new stask::Mime(scan_dirtree));
catch (const std::runtime_error& e) { std::shared_ptr<FileRecordDataFiller> filerecdata(new FileRecordDataFiller(mime, hashing));
std::cout << '\n'; std::shared_ptr<SetRecordDataFiller> setrecdata(new SetRecordDataFiller(media_type, content_type));
std::cerr << e.what();
return 1; #if defined(WITH_PROGRESS_FEEDBACK)
} hashing->set_progress_callback(&print_progress);
}
else {
set_type = vm["type"].as<char>();
}
#else
const char set_type = vm["type"].as<char>();
#endif #endif
std::cout << "constructing...\n"; if (not add_to_db(filerecdata->get_or_create(), setrecdata->get_or_create(), settings.db)) {
std::cerr << "Not written to DB, likely because a set with the same hash already exists\n";
mchlib::Indexer indexer;
indexer.ignore_read_errors(vm.count("ignore-errors") > 0);
fastf::FileSearcher searcher(search_path);
fastf::FileSearcher::ConstCharVecType ext, ignore;
searcher.SetFollowSymlinks(true);
searcher.SetCallback(fastf::FileSearcher::CallbackType(std::bind(&mchlib::Indexer::add_path, &indexer, _1, _2)));
searcher.Search(ext, ignore);
if (verbose) {
std::cout << "Fetching items list...\n";
}
if (indexer.empty()) {
std::cerr << "Nothing found at the given location, quitting\n";
return 1;
}
else {
run_hash_calculation(indexer, verbose);
//TODO: guess_content_type() relies on FileRecordData::path being set to
//the relative path already. Unfortunately at this point it just got
//default-initialized to be the same as abs_path, so for a video DVD, for
//example, it's going to be like "/mnt/cdrom/VIDEO_TS" instead of just
//"VIDEO_TS". This will cause guess_content_type() to miss. Relative
//paths are populated at the end of calculate_hash(), so until I come up
//with a better system I'm just moving content detection to after hash
//calculation.
const auto set_type_casted = dinlib::char_to_media_type(set_type);
const mchlib::ContentTypes content = mchlib::guess_content_type(set_type_casted, indexer.record_data());
const char content_type = mchlib::content_type_to_char(content);
if (verbose) {
std::cout << "Writing to database...\n";
}
if (not add_to_db(indexer.record_data(), vm["setname"].as<std::string>(), set_type, content_type, settings.db)) {
std::cerr << "Not written to DB, likely because a set with the same hash already exists\n";
}
} }
return 0; return 0;
} }
namespace { namespace {
void run_hash_calculation (mchlib::Indexer& parIndexer, bool parShowProgress) { bool add_to_db (const std::vector<mchlib::FileRecordData>& parData, const mchlib::SetRecordDataFull& parSet, const dinlib::SettingsDB& parDBSettings, bool parForce) {
if (parIndexer.empty()) {
return;
}
#if !defined(WITH_PROGRESS_FEEDBACK)
parShowProgress = false;
#endif
if (not parShowProgress) {
//Hashing file /mnt/cdrom/Sacred 2/Fallen Angel/UK/Sacred.2.Fallen.Angel-ArenaBG/DISC2/S2DISC2.md1... 512c201321ed01cc2a82c9f80bfeaaa673bc8eb3cea4e5c1
//terminate called after throwing an instance of 'std::ios_base::failure'
//what(): basic_filebuf::xsgetn error reading the file
//Hashing file /mnt/cdrom/Sacred 2/Fallen Angel/UK/Sacred.2.Fallen.Angel-ArenaBG/DISC2/S2DISC2.mdf...Annullato
parIndexer.calculate_hash();
}
#if defined(WITH_PROGRESS_FEEDBACK)
else {
typedef std::ostream_iterator<char> cout_iterator;
std::cout << "Processing";
std::cout.flush();
const auto total_items = parIndexer.total_items();
std::thread hash_thread(&mchlib::Indexer::calculate_hash, &parIndexer);
std::mutex progress_print;
std::size_t clear_size = 0;
const auto digit_count = static_cast<std::size_t>(std::log10(static_cast<double>(total_items))) + 1;
do {
//TODO: fix this steaming pile of crap
//std::unique_lock<std::mutex> lk(progress_print);
//parIndexer.step_notify().wait(lk);
std::cout << '\r';
std::fill_n(cout_iterator(std::cout), clear_size, ' ');
std::cout << '\r';
{
std::ostringstream oss;
const auto item_index = std::min(total_items - 1, parIndexer.processed_items());
oss << "Processing file "
<< std::setw(digit_count) << std::setfill(' ') << (item_index + 1)
<< " of " << total_items << " \"" << parIndexer.current_item() << '"';
const auto msg(oss.str());
clear_size = msg.size();
std::cout << msg;
std::cout.flush();
}
} while (false); //parIndexer.processed_items() != total_items);
hash_thread.join();
if (parIndexer.processed_items() > 0) {
std::cout << '\n';
}
}
#endif
}
bool add_to_db (const std::vector<mchlib::FileRecordData>& parData, const std::string& parSetName, char parType, char parContentType, const dinlib::SettingsDB& parDBSettings, bool parForce) {
using mchlib::FileRecordData; using mchlib::FileRecordData;
using mchlib::SetRecordDataFull; using mchlib::SetRecordDataFull;
using mchlib::SetRecordData; using mchlib::SetRecordData;
@ -212,11 +117,18 @@ namespace {
} }
} }
SetRecordData set_data {parSetName, parType, parContentType }; const SetRecordData& set_data {parSet.name, parSet.type, parSet.content_type };
const auto app_signature = dinlib::dindexer_signature(); const auto app_signature = dinlib::dindexer_signature();
const auto lib_signature = mchlib::lib_signature(); const auto lib_signature = mchlib::lib_signature();
const std::string signature = std::string(app_signature.data(), app_signature.size()) + "/" + std::string(lib_signature.data(), lib_signature.size()); const std::string signature = std::string(app_signature.data(), app_signature.size()) + "/" + std::string(lib_signature.data(), lib_signature.size());
din::write_to_db(parDBSettings, parData, set_data, signature); din::write_to_db(parDBSettings, parData, set_data, signature);
return true; return true;
} }
#if defined(WITH_PROGRESS_FEEDBACK)
void print_progress (const boost::string_ref parPath, uint64_t /*parFileBytes*/, uint64_t parTotalBytes, uint32_t parFileNum) {
std::cout << "Hashing file " << parFileNum << " \"" << parPath << "\" (" << parTotalBytes << " bytes hashed)\r";
std::cout.flush();
}
#endif
} //unnamed namespace } //unnamed namespace

View file

@ -23,7 +23,7 @@
namespace { namespace {
template <std::size_t N> template <std::size_t N>
void detect_type (mchlib::FileRecordData (&parTestData)[N], mchlib::ContentTypes parExpected, dinlib::MediaTypes parMediaType) { void detect_type (mchlib::FileRecordData (&parTestData)[N], mchlib::ContentTypes parExpected, mchlib::MediaTypes parMediaType) {
using mchlib::SetListing; using mchlib::SetListing;
using mchlib::FileRecordData; using mchlib::FileRecordData;
@ -52,7 +52,7 @@ TEST(machinery, guess_content_type) {
FileRecordData("VIDEO_TS/VTS_01_0.VOB",0,0,2,false,false), FileRecordData("VIDEO_TS/VTS_01_0.VOB",0,0,2,false,false),
FileRecordData("VIDEO_TS/VIDEO_TS.VOB",0,0,2,false,false) FileRecordData("VIDEO_TS/VIDEO_TS.VOB",0,0,2,false,false)
}; };
detect_type(test_data, mchlib::ContentType_VideoDVD, dinlib::MediaType_DVD); detect_type(test_data, mchlib::ContentType_VideoDVD, mchlib::MediaType_DVD);
} }
{ {
@ -63,7 +63,7 @@ TEST(machinery, guess_content_type) {
FileRecordData("some_file.bin",0,0,1,false,false), FileRecordData("some_file.bin",0,0,1,false,false),
FileRecordData("another_dir/VTS_01_0.BUP",0,0,2,false,false) FileRecordData("another_dir/VTS_01_0.BUP",0,0,2,false,false)
}; };
detect_type(test_data, mchlib::ContentType_Generic, dinlib::MediaType_Directory); detect_type(test_data, mchlib::ContentType_Generic, mchlib::MediaType_Directory);
} }
{ {
@ -81,6 +81,6 @@ TEST(machinery, guess_content_type) {
FileRecordData("CDI",0,0,1,true,false), FileRecordData("CDI",0,0,1,true,false),
FileRecordData("KARAOKE",0,0,1,true,false) FileRecordData("KARAOKE",0,0,1,true,false)
}; };
detect_type(test_data, mchlib::ContentType_VideoCD, dinlib::MediaType_CDRom); detect_type(test_data, mchlib::ContentType_VideoCD, mchlib::MediaType_CDRom);
} }
} }