1
0
Fork 0
mirror of https://github.com/KingDuckZ/dindexer.git synced 2025-08-12 14:29:50 +00:00

Add Glob class and use that one to help guessing content types.

This commit is contained in:
King_DuckZ 2016-02-20 01:36:31 +01:00
parent 8b1b9c48f4
commit 2e77e4dc0b
5 changed files with 171 additions and 75 deletions

View file

@ -15,6 +15,7 @@ add_library(${PROJECT_NAME} SHARED
machinery_info.cpp machinery_info.cpp
guess_content_type.cpp guess_content_type.cpp
set_listing.cpp set_listing.cpp
globbing.cpp
) )
#target_include_directories(${PROJECT_NAME} #target_include_directories(${PROJECT_NAME}

View file

@ -0,0 +1,36 @@
/* Copyright 2015, 2016, Michele Santullo
* This file is part of "dindexer".
*
* "dindexer" is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* "dindexer" is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
*/
#include "globbing.hpp"
#include "dindexer-machinery/recorddata.hpp"
#include <fnmatch.h>
#include <ciso646>
namespace mchlib {
namespace implem {
bool glob_matches (const FileRecordData& parData, const char* parGlob) {
//assert that the substring in path terminates at the same place
//where the one in abs_path terminates (ie: it's null-terminated).
assert(parData.path == std::string(parData.path.data()));
//See https://github.com/FlibbleMr/neolib/blob/master/include/neolib/string_utils.hpp
//for an alternative to fnmatch() (grep wildcard_match)
const int match = fnmatch(parGlob, parData.path.data(), FNM_PATHNAME);
return not match;
}
} //namespace implem
} //namespace mchlib

View file

@ -0,0 +1,94 @@
/* Copyright 2015, 2016, Michele Santullo
* This file is part of "dindexer".
*
* "dindexer" is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* "dindexer" is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef idAE026EED2DF54B118943156FB36AA46E
#define idAE026EED2DF54B118943156FB36AA46E
#include <boost/iterator/filter_iterator.hpp>
#include <functional>
namespace mchlib {
struct FileRecordData;
template <typename I>
using globbing_iterator =
boost::filter_iterator<
std::function<bool(const FileRecordData&)>,
I
>;
template <typename I>
globbing_iterator<I> make_globbing_iterator ( I parBegin, I parEnd, const char* parGlob );
namespace implem {
bool glob_matches ( const FileRecordData& parData, const char* parGlob );
} //namespace implem
template <typename I>
inline
globbing_iterator<I> make_globbing_iterator (I parBegin, I parEnd, const char* parGlob) {
return boost::make_filter_iterator<std::function<bool(const FileRecordData&)>>(
std::bind(&implem::glob_matches, std::placeholders::_1, parGlob),
parBegin,
parEnd
);
}
template <typename I>
class Glob {
public:
using const_iterator = globbing_iterator<I>;
explicit Glob ( I parBegin, I parEnd ) :
m_beg(parBegin),
m_end(parEnd),
m_pattern("*")
{
}
Glob ( const char* parPattern, I parBegin, I parEnd ) :
m_beg(parBegin),
m_end(parEnd),
m_pattern(parPattern)
{
}
~Glob ( void ) = default;
const_iterator begin ( void ) const {
return make_globbing_iterator(m_beg, m_end, m_pattern);
}
const_iterator end ( void ) const {
return make_globbing_iterator(m_end, m_end, m_pattern);
}
void set_pattern ( const char* parPattern ) {
if (parPattern)
m_pattern = parPattern;
else
m_pattern = "*";
}
private:
I m_beg;
I m_end;
const char* m_pattern;
};
} //namespace mchlib
#endif

View file

@ -21,7 +21,10 @@
#include "dindexer-machinery/guess_content_type.hpp" #include "dindexer-machinery/guess_content_type.hpp"
#include "dindexer-machinery/set_listing.hpp" #include "dindexer-machinery/set_listing.hpp"
#include "dindexer-machinery/set_listing_helpers.hpp" #include "dindexer-machinery/set_listing_helpers.hpp"
#include "globbing.hpp"
#include <boost/iterator/filter_iterator.hpp> #include <boost/iterator/filter_iterator.hpp>
#include <boost/iterator/indirect_iterator.hpp>
#include <boost/range/empty.hpp>
#include <cstdint> #include <cstdint>
#include <algorithm> #include <algorithm>
#include <functional> #include <functional>
@ -31,15 +34,13 @@
namespace mchlib { namespace mchlib {
namespace { namespace {
using FoundItemPair = std::pair<bool, ConstSetListingView::const_iterator>;
template <typename O, uint16_t L> template <typename O, uint16_t L>
struct IsLevelLikeO { struct IsLevelLikeO {
bool operator() ( const FileRecordData& parEntry ); bool operator() ( const FileRecordData& parEntry );
}; };
struct EntryChecking { struct EntryChecking {
typedef bool(*CheckerFunction)(dinlib::MediaTypes, const ConstSetListingView&); typedef bool(*CheckerFunction)(dinlib::MediaTypes, const ConstSetListingView&, const std::vector<const FileRecordData*>&);
std::size_t max_total_entries; std::size_t max_total_entries;
CheckerFunction checker_func; CheckerFunction checker_func;
@ -64,96 +65,57 @@ namespace mchlib {
return std::move(retval); return std::move(retval);
} }
bool is_path_eq (const char* parPath, const FileRecordData& parEntry) { std::vector<int> check_missing_content (const std::vector<const FileRecordData*>& parContent, const std::vector<const char*>& parCheck) {
return (parEntry.path == parPath); using boost::make_indirect_iterator;
} using FileRecordIterator =
boost::indirect_iterator<std::vector<const FileRecordData*>::const_iterator>;
//std::vector<int> check_missing_content (const ConstSetListingView& parContent, const std::vector<const char*>& parCheck) { std::vector<int> retval;
// std::vector<int> retval; auto glob = Glob<FileRecordIterator>(
// for (int z = 0; z < static_cast<int>(parCheck.size()), ++z) { make_indirect_iterator(parContent.begin()),
// auto glob_range = glob(parContent, parCheck[z]); make_indirect_iterator(parContent.end())
// if (boost::empty(glob_range)) {
// retval.push_back(z);
// }
// }
// return std::move(retval);
//}
FoundItemPair find_item (const ConstSetListingView& parContent, const char* parPath) {
auto it = std::find_if(
parContent.begin(),
parContent.end(),
[parPath](const FileRecordData& parEntry) {
return (parEntry.path == parPath);
}
); );
return std::make_pair(parContent.end() != it, it); for (int z = 0; z < static_cast<int>(parCheck.size()); ++z) {
glob.set_pattern(parCheck[z]);
if (boost::empty(glob)) {
retval.push_back(z);
}
}
return std::move(retval);
} }
bool identify_video_dvd (dinlib::MediaTypes parMediaType, const ConstSetListingView& parContent) { bool identify_video_dvd (dinlib::MediaTypes parMediaType, const ConstSetListingView& parContent, const std::vector<const FileRecordData*>& parFlatContent ) {
if (parMediaType != dinlib::MediaType_DVD) { if (parMediaType != dinlib::MediaType_DVD and parMediaType != dinlib::MediaType_Directory)
return false; return false;
}
const auto items_count = count_listing_items(parContent); const auto items_count = count_listing_items(parContent);
if (items_count < 2) { if (items_count < 2) {
return false; return false;
} }
auto it_video_ts = std::find_if(parContent.begin(), parContent.end(), std::bind(&is_path_eq, "VIDEO_TS", std::placeholders::_1)); const std::vector<const char*> should_have {
if (parContent.end() == it_video_ts) { "VIDEO_TS/*.VOB",
return false; "AUDIO_TS"
};
return check_missing_content(parFlatContent, should_have).empty();
} }
auto it_audio_ts = std::find_if(parContent.begin(), parContent.end(), std::bind(&is_path_eq, "AUDIO_TS", std::placeholders::_1)); bool identify_video_cd (dinlib::MediaTypes parMediaType, const ConstSetListingView& parContent, const std::vector<const FileRecordData*>& parFlatContent) {
if (parContent.end() == it_audio_ts) { if (parMediaType != dinlib::MediaType_CDRom and parMediaType != dinlib::MediaType_Directory)
return false;
}
return true;
}
bool identify_video_cd (dinlib::MediaTypes parMediaType, const ConstSetListingView& parContent) {
if (parMediaType != dinlib::MediaType_CDRom)
return false; return false;
const auto items_count = count_listing_items(parContent); const auto items_count = count_listing_items(parContent);
if (items_count < 4) if (items_count < 4)
return false; return false;
//const std::vector<const char*> should_have { const std::vector<const char*> should_have {
// "SVCD/*.VCD", "SVCD/*.VCD",
// "MPEGAV/AVSEQ??.DAT", "MPEGAV/AVSEQ??.DAT",
// "SEGMENT/ITEM???.DAT", "SEGMENT/ITEM???.DAT",
// "CDI" "CDI"
//}; };
return check_missing_content(parFlatContent, should_have).empty();
auto found = find_item(parContent, "SVCD");
if (not found.first or not found.second->is_directory)
return false;
found = find_item(parContent, "MPEGAV");
if (not found.first or not found.second->is_directory)
return false;
found = find_item(parContent, "SEGMENT");
if (not found.first or not found.second->is_directory)
return false;
found = find_item(parContent, "CDI");
if (not found.first or not found.second->is_directory)
return false;
//FileRecordData("SVCD",0,0,1,true,false),
//FileRecordData("SVCD/INFO.VCD",0,0,2,false,false),
//FileRecordData("SVCD/ENTRIES.VCD",0,0,2,false,false),
//FileRecordData("SVCD/SEARCH.DAT",0,0,2,false,false),
//FileRecordData("SVCD/PSD.VCD",0,0,2,false,false),
//FileRecordData("MPEGAV",0,0,1,true,false),
//FileRecordData("MPEGAV/AVSEQ01.DAT",0,0,2,false,false),
//FileRecordData("SEGMENT",0,0,1,true,false),
//FileRecordData("SEGMENT/ITEM001.DAT",0,0,2,false,false),
//FileRecordData("CDI",0,0,1,true,false),
//FileRecordData("KARAOKE",0,0,1,true,false)
return true;
} }
} //unnamed namespace } //unnamed namespace
@ -164,10 +126,11 @@ namespace mchlib {
}; };
const auto total_entries = (parEntriesCount ? parEntriesCount : count_listing_items_recursive(parContent)); const auto total_entries = (parEntriesCount ? parEntriesCount : count_listing_items_recursive(parContent));
auto flattened = flattened_listing(parContent);
for (const auto& chk : checker_chain) { for (const auto& chk : checker_chain) {
if (chk.max_total_entries and chk.max_total_entries >= total_entries) { if (chk.max_total_entries and chk.max_total_entries >= total_entries) {
if (chk.checker_func(parMediaType, parContent)) { if (chk.checker_func(parMediaType, parContent, flattened)) {
return chk.content_type; return chk.content_type;
} }
} }

View file

@ -48,7 +48,9 @@ TEST(machinery, guess_content_type) {
FileRecordData("",0,0,0,true,false), FileRecordData("",0,0,0,true,false),
FileRecordData("VIDEO_TS",0,0,1,true,false), FileRecordData("VIDEO_TS",0,0,1,true,false),
FileRecordData("AUDIO_TS",0,0,1,true,false), FileRecordData("AUDIO_TS",0,0,1,true,false),
FileRecordData("VIDEO_TS/VTS_01_0.BUP",0,0,2,false,false) FileRecordData("VIDEO_TS/VTS_01_0.BUP",0,0,2,false,false),
FileRecordData("VIDEO_TS/VTS_01_0.VOB",0,0,2,false,false),
FileRecordData("VIDEO_TS/VIDEO_TS.VOB",0,0,2,false,false)
}; };
detect_type(test_data, mchlib::ContentType_VideoDVD, dinlib::MediaType_DVD); detect_type(test_data, mchlib::ContentType_VideoDVD, dinlib::MediaType_DVD);
} }