mirror of
https://github.com/KingDuckZ/dindexer.git
synced 2025-02-19 12:04:54 +00:00
Detect content type after scanning and save to DB.
This commit is contained in:
parent
2e77e4dc0b
commit
22614432a9
9 changed files with 141 additions and 36 deletions
|
@ -4,7 +4,7 @@
|
|||
|
||||
-- Dumped from database version 9.4.5
|
||||
-- Dumped by pg_dump version 9.4.5
|
||||
-- Started on 2016-01-29 20:18:23 CET
|
||||
-- Started on 2016-02-22 08:40:56 CET
|
||||
|
||||
SET statement_timeout = 0;
|
||||
SET lock_timeout = 0;
|
||||
|
@ -22,7 +22,7 @@ CREATE EXTENSION IF NOT EXISTS plpgsql WITH SCHEMA pg_catalog;
|
|||
|
||||
|
||||
--
|
||||
-- TOC entry 2044 (class 0 OID 0)
|
||||
-- TOC entry 2045 (class 0 OID 0)
|
||||
-- Dependencies: 176
|
||||
-- Name: EXTENSION plpgsql; Type: COMMENT; Schema: -; Owner:
|
||||
--
|
||||
|
@ -112,7 +112,7 @@ CREATE TABLE files (
|
|||
ALTER TABLE files OWNER TO @DB_OWNER_NAME@;
|
||||
|
||||
--
|
||||
-- TOC entry 2045 (class 0 OID 0)
|
||||
-- TOC entry 2046 (class 0 OID 0)
|
||||
-- Dependencies: 172
|
||||
-- Name: CONSTRAINT chk_hash_0 ON files; Type: COMMENT; Schema: public; Owner: @DB_OWNER_NAME@
|
||||
--
|
||||
|
@ -136,7 +136,7 @@ CREATE SEQUENCE files_id_seq
|
|||
ALTER TABLE files_id_seq OWNER TO @DB_OWNER_NAME@;
|
||||
|
||||
--
|
||||
-- TOC entry 2046 (class 0 OID 0)
|
||||
-- TOC entry 2047 (class 0 OID 0)
|
||||
-- Dependencies: 173
|
||||
-- Name: files_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: @DB_OWNER_NAME@
|
||||
--
|
||||
|
@ -156,6 +156,7 @@ CREATE TABLE sets (
|
|||
disk_number integer DEFAULT 0 NOT NULL,
|
||||
creation timestamp with time zone DEFAULT now() NOT NULL,
|
||||
app_name character varying NOT NULL,
|
||||
content_type character(1) DEFAULT 'G'::bpchar NOT NULL,
|
||||
CONSTRAINT chk_sets_type CHECK (((((((((type = 'C'::bpchar) OR (type = 'D'::bpchar)) OR (type = 'V'::bpchar)) OR (type = 'B'::bpchar)) OR (type = 'F'::bpchar)) OR (type = 'H'::bpchar)) OR (type = 'Z'::bpchar)) OR (type = 'O'::bpchar)))
|
||||
);
|
||||
|
||||
|
@ -163,7 +164,7 @@ CREATE TABLE sets (
|
|||
ALTER TABLE sets OWNER TO @DB_OWNER_NAME@;
|
||||
|
||||
--
|
||||
-- TOC entry 2047 (class 0 OID 0)
|
||||
-- TOC entry 2048 (class 0 OID 0)
|
||||
-- Dependencies: 174
|
||||
-- Name: COLUMN sets.type; Type: COMMENT; Schema: public; Owner: @DB_OWNER_NAME@
|
||||
--
|
||||
|
@ -193,7 +194,7 @@ CREATE SEQUENCE sets_id_seq
|
|||
ALTER TABLE sets_id_seq OWNER TO @DB_OWNER_NAME@;
|
||||
|
||||
--
|
||||
-- TOC entry 2048 (class 0 OID 0)
|
||||
-- TOC entry 2049 (class 0 OID 0)
|
||||
-- Dependencies: 175
|
||||
-- Name: sets_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: @DB_OWNER_NAME@
|
||||
--
|
||||
|
@ -218,7 +219,7 @@ ALTER TABLE ONLY sets ALTER COLUMN id SET DEFAULT nextval('sets_id_seq'::regclas
|
|||
|
||||
|
||||
--
|
||||
-- TOC entry 1921 (class 2606 OID 16414)
|
||||
-- TOC entry 1922 (class 2606 OID 16414)
|
||||
-- Name: pk_files_id; Type: CONSTRAINT; Schema: public; Owner: @DB_OWNER_NAME@; Tablespace:
|
||||
--
|
||||
|
||||
|
@ -227,7 +228,7 @@ ALTER TABLE ONLY files
|
|||
|
||||
|
||||
--
|
||||
-- TOC entry 1925 (class 2606 OID 16416)
|
||||
-- TOC entry 1926 (class 2606 OID 16416)
|
||||
-- Name: pk_sets_id; Type: CONSTRAINT; Schema: public; Owner: @DB_OWNER_NAME@; Tablespace:
|
||||
--
|
||||
|
||||
|
@ -236,7 +237,7 @@ ALTER TABLE ONLY sets
|
|||
|
||||
|
||||
--
|
||||
-- TOC entry 1923 (class 2606 OID 16418)
|
||||
-- TOC entry 1924 (class 2606 OID 16418)
|
||||
-- Name: uniq_item; Type: CONSTRAINT; Schema: public; Owner: @DB_OWNER_NAME@; Tablespace:
|
||||
--
|
||||
|
||||
|
@ -245,7 +246,7 @@ ALTER TABLE ONLY files
|
|||
|
||||
|
||||
--
|
||||
-- TOC entry 1917 (class 1259 OID 16419)
|
||||
-- TOC entry 1918 (class 1259 OID 16419)
|
||||
-- Name: fki_files_sets; Type: INDEX; Schema: public; Owner: @DB_OWNER_NAME@; Tablespace:
|
||||
--
|
||||
|
||||
|
@ -253,7 +254,7 @@ CREATE INDEX fki_files_sets ON files USING btree (group_id);
|
|||
|
||||
|
||||
--
|
||||
-- TOC entry 1918 (class 1259 OID 16420)
|
||||
-- TOC entry 1919 (class 1259 OID 16420)
|
||||
-- Name: idx_mimetype; Type: INDEX; Schema: public; Owner: @DB_OWNER_NAME@; Tablespace:
|
||||
--
|
||||
|
||||
|
@ -261,7 +262,7 @@ CREATE INDEX idx_mimetype ON files USING btree (mimetype, charset);
|
|||
|
||||
|
||||
--
|
||||
-- TOC entry 1919 (class 1259 OID 16421)
|
||||
-- TOC entry 1920 (class 1259 OID 16421)
|
||||
-- Name: idx_paths; Type: INDEX; Schema: public; Owner: @DB_OWNER_NAME@; Tablespace:
|
||||
--
|
||||
|
||||
|
@ -269,7 +270,7 @@ CREATE INDEX idx_paths ON files USING btree (path);
|
|||
|
||||
|
||||
--
|
||||
-- TOC entry 1927 (class 2620 OID 16422)
|
||||
-- TOC entry 1928 (class 2620 OID 16422)
|
||||
-- Name: triggerupcasehash; Type: TRIGGER; Schema: public; Owner: @DB_OWNER_NAME@
|
||||
--
|
||||
|
||||
|
@ -277,7 +278,7 @@ CREATE TRIGGER triggerupcasehash BEFORE INSERT OR UPDATE ON files FOR EACH ROW E
|
|||
|
||||
|
||||
--
|
||||
-- TOC entry 1926 (class 2606 OID 16423)
|
||||
-- TOC entry 1927 (class 2606 OID 16423)
|
||||
-- Name: fk_files_sets; Type: FK CONSTRAINT; Schema: public; Owner: @DB_OWNER_NAME@
|
||||
--
|
||||
|
||||
|
@ -286,7 +287,7 @@ ALTER TABLE ONLY files
|
|||
|
||||
|
||||
--
|
||||
-- TOC entry 2043 (class 0 OID 0)
|
||||
-- TOC entry 2044 (class 0 OID 0)
|
||||
-- Dependencies: 6
|
||||
-- Name: public; Type: ACL; Schema: -; Owner: postgres
|
||||
--
|
||||
|
@ -297,7 +298,7 @@ GRANT ALL ON SCHEMA public TO postgres;
|
|||
GRANT ALL ON SCHEMA public TO PUBLIC;
|
||||
|
||||
|
||||
-- Completed on 2016-01-29 20:18:23 CET
|
||||
-- Completed on 2016-02-22 08:40:57 CET
|
||||
|
||||
--
|
||||
-- PostgreSQL database dump complete
|
||||
|
|
62
include/dindexer-common/compatibility.h
Normal file
62
include/dindexer-common/compatibility.h
Normal file
|
@ -0,0 +1,62 @@
|
|||
/* Copyright 2015, 2016, Michele Santullo
|
||||
* This file is part of "dindexer".
|
||||
*
|
||||
* "dindexer" is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* "dindexer" is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef id45CDD1DAEF4F42968E3C89F68FDDA9BC
|
||||
#define id45CDD1DAEF4F42968E3C89F68FDDA9BC
|
||||
|
||||
#if defined(__GNUC__)
|
||||
# if defined(__clang__)
|
||||
# if !defined(__has_attribute)
|
||||
//Fall back to version number comparing
|
||||
# else
|
||||
# if __has_attribute(flatten)
|
||||
# define a_flatten __attribute__((flatten))
|
||||
# else
|
||||
# define a_flatten
|
||||
# endif
|
||||
# if __has_attribute(always_inline)
|
||||
# define a_always_inline __attribute__((always_inline))
|
||||
# else
|
||||
# define a_always_inline
|
||||
# endif
|
||||
# if __has_attribute(pure)
|
||||
# define a_pure __attribute__((pure))
|
||||
# else
|
||||
# define a_pure
|
||||
# endif
|
||||
# if __has_attribute(deprecated)
|
||||
# define a_deprecated __attribute__((deprecated))
|
||||
# else
|
||||
# define a_deprecated
|
||||
#endif
|
||||
# endif
|
||||
# else
|
||||
//Fix here if you get warnings about unsupported attributes on your compiler
|
||||
# define a_flatten __attribute__((flatten))
|
||||
# define a_always_inline __attribute__((always_inline))
|
||||
# define a_pure __attribute__((pure))
|
||||
# define a_deprecated __attribute__((deprecated))
|
||||
# endif
|
||||
#else
|
||||
# warning "Unsupported compiler, please fill this section or file a bug"
|
||||
# define a_flatten
|
||||
# define a_always_inline
|
||||
# define a_pure
|
||||
# define a_deprecated
|
||||
#endif
|
||||
|
||||
#endif
|
|
@ -18,6 +18,7 @@
|
|||
#ifndef id700AFD0F33634ACC88079BB8853A9E13
|
||||
#define id700AFD0F33634ACC88079BB8853A9E13
|
||||
|
||||
#include "dindexer-common/compatibility.h"
|
||||
#include <string>
|
||||
|
||||
namespace dinlib {
|
||||
|
@ -33,6 +34,7 @@ namespace dinlib {
|
|||
};
|
||||
|
||||
const std::string& media_type_to_str ( MediaTypes parType );
|
||||
MediaTypes char_to_media_type ( char parMType ) a_pure;
|
||||
} //namespace dinlib
|
||||
|
||||
#endif
|
||||
|
|
|
@ -20,21 +20,27 @@
|
|||
|
||||
#include "dindexer-common/mediatypes.hpp"
|
||||
#include "dindexer-machinery/recorddata.hpp"
|
||||
#include "dindexer-common/compatibility.h"
|
||||
#include <vector>
|
||||
|
||||
namespace mchlib {
|
||||
enum ContentTypes {
|
||||
ContentType_Generic,
|
||||
ContentType_Backup,
|
||||
ContentType_VideoDVD,
|
||||
ContentType_VideoBD,
|
||||
ContentType_VideoCD,
|
||||
ContentType_Unknown
|
||||
ContentType_Empty = 'E',
|
||||
ContentType_Generic = 'G',
|
||||
ContentType_Backup = 'A',
|
||||
ContentType_VideoDVD = 'D',
|
||||
ContentType_VideoBD = 'B',
|
||||
ContentType_VideoCD = 'C',
|
||||
ContentType_Unknown = 'U'
|
||||
};
|
||||
|
||||
template <bool> class SetListingView;
|
||||
|
||||
ContentTypes guess_content_type ( dinlib::MediaTypes parMediaType, const SetListingView<true>& parContent, std::size_t parEntriesCount=0 );
|
||||
ContentTypes guess_content_type ( dinlib::MediaTypes parMediaType, const std::vector<FileRecordData>& parContent );
|
||||
|
||||
char content_type_to_char ( ContentTypes parCType ) a_pure;
|
||||
ContentTypes char_to_content_type ( char parCType ) a_pure;
|
||||
} //namespace mchlib
|
||||
|
||||
#endif
|
||||
|
|
|
@ -73,15 +73,15 @@ namespace mchlib {
|
|||
bool hash_valid;
|
||||
};
|
||||
|
||||
struct SetRecordDataFull {
|
||||
std::string name;
|
||||
uint32_t disk_number;
|
||||
struct SetRecordData {
|
||||
boost::string_ref name;
|
||||
char type;
|
||||
char content_type;
|
||||
};
|
||||
|
||||
struct SetRecordData {
|
||||
const boost::string_ref name;
|
||||
const char type;
|
||||
struct SetRecordDataFull : public SetRecordData {
|
||||
std::string name;
|
||||
uint32_t disk_number;
|
||||
};
|
||||
} //namespace mchlib
|
||||
|
||||
|
|
|
@ -39,4 +39,8 @@ namespace dinlib {
|
|||
}
|
||||
return it_ret->second;
|
||||
}
|
||||
|
||||
MediaTypes char_to_media_type (char parMType) {
|
||||
return static_cast<MediaTypes>(parMType);
|
||||
}
|
||||
} //namespace dinlib
|
||||
|
|
|
@ -22,6 +22,7 @@
|
|||
#include "dindexer-machinery/set_listing.hpp"
|
||||
#include "dindexer-machinery/set_listing_helpers.hpp"
|
||||
#include "globbing.hpp"
|
||||
#include "pathname.hpp"
|
||||
#include <boost/iterator/filter_iterator.hpp>
|
||||
#include <boost/iterator/indirect_iterator.hpp>
|
||||
#include <boost/range/empty.hpp>
|
||||
|
@ -31,6 +32,7 @@
|
|||
#include <ciso646>
|
||||
#include <regex>
|
||||
#include <utility>
|
||||
#include <memory>
|
||||
|
||||
namespace mchlib {
|
||||
namespace {
|
||||
|
@ -120,6 +122,9 @@ namespace mchlib {
|
|||
} //unnamed namespace
|
||||
|
||||
ContentTypes guess_content_type (dinlib::MediaTypes parMediaType, const ConstSetListingView& parContent, std::size_t parEntriesCount) {
|
||||
if (boost::empty(parContent))
|
||||
return ContentType_Empty;
|
||||
|
||||
std::vector<EntryChecking> checker_chain {
|
||||
{ 100, &identify_video_dvd, ContentType_VideoDVD },
|
||||
{ 200, &identify_video_cd, ContentType_VideoCD }
|
||||
|
@ -137,6 +142,24 @@ namespace mchlib {
|
|||
}
|
||||
return ContentType_Generic;
|
||||
}
|
||||
|
||||
ContentTypes guess_content_type (dinlib::MediaTypes parMediaType, const std::vector<FileRecordData>& parContent) {
|
||||
if (parContent.empty())
|
||||
return ContentType_Empty;
|
||||
|
||||
//TODO: assert that the first item in the list is the shortest string
|
||||
std::shared_ptr<PathName> pathname(new PathName(parContent.front().abs_path));
|
||||
ConstSetListingView view(parContent.begin(), parContent.end(), pathname->atom_count(), pathname);
|
||||
return guess_content_type(parMediaType, view, parContent.size());
|
||||
}
|
||||
|
||||
char content_type_to_char (mchlib::ContentTypes parCType) {
|
||||
return static_cast<char>(parCType);
|
||||
}
|
||||
|
||||
ContentTypes char_to_content_type (char parCType) {
|
||||
return static_cast<ContentTypes>(parCType);
|
||||
}
|
||||
} //namespace mchlib
|
||||
|
||||
#endif
|
||||
|
|
|
@ -97,11 +97,13 @@ namespace din {
|
|||
uint32_t new_group_id;
|
||||
{
|
||||
auto id_res = conn.query("INSERT INTO \"sets\" "
|
||||
"(\"desc\",\"type\",\"app_name\") "
|
||||
"VALUES ($1, $2, $3) RETURNING \"id\";",
|
||||
"(\"desc\",\"type\",\"app_name\""
|
||||
",\"content_type\") "
|
||||
"VALUES ($1, $2, $3, $4) RETURNING \"id\";",
|
||||
parSetData.name,
|
||||
std::string(1, parSetData.type),
|
||||
parSignature
|
||||
boost::string_ref(&parSetData.type, 1),
|
||||
parSignature,
|
||||
boost::string_ref(&parSetData.content_type, 1)
|
||||
);
|
||||
assert(id_res.size() == 1);
|
||||
assert(id_res[0].size() == 1);
|
||||
|
|
|
@ -26,6 +26,7 @@
|
|||
#include "dindexer-machinery/machinery_info.hpp"
|
||||
#include "dindexer-common/common_info.hpp"
|
||||
#include "dindexer-common/settings.hpp"
|
||||
#include "dindexer-machinery/guess_content_type.hpp"
|
||||
#include "commandline.hpp"
|
||||
#include "dbbackend.hpp"
|
||||
#include <iostream>
|
||||
|
@ -42,7 +43,7 @@
|
|||
|
||||
namespace {
|
||||
void run_hash_calculation ( mchlib::Indexer& parIndexer, bool parShowProgress );
|
||||
bool add_to_db ( const std::vector<mchlib::FileRecordData>& parData, const std::string& parSetName, char parType, const dinlib::SettingsDB& parDBSettings, bool parForce=false );
|
||||
bool add_to_db ( const std::vector<mchlib::FileRecordData>& parData, const std::string& parSetName, char parType, char parContent, const dinlib::SettingsDB& parDBSettings, bool parForce=false );
|
||||
} //unnamed namespace
|
||||
|
||||
int main (int parArgc, char* parArgv[]) {
|
||||
|
@ -117,11 +118,15 @@ int main (int parArgc, char* parArgv[]) {
|
|||
return 1;
|
||||
}
|
||||
else {
|
||||
const auto set_type_casted = dinlib::char_to_media_type(set_type);
|
||||
const mchlib::ContentTypes content = mchlib::guess_content_type(set_type_casted, indexer.record_data());
|
||||
const char content_type = mchlib::content_type_to_char(content);
|
||||
|
||||
run_hash_calculation(indexer, verbose);
|
||||
if (verbose) {
|
||||
std::cout << "Writing to database...\n";
|
||||
}
|
||||
if (not add_to_db(indexer.record_data(), vm["setname"].as<std::string>(), set_type, settings.db)) {
|
||||
if (not add_to_db(indexer.record_data(), vm["setname"].as<std::string>(), set_type, content_type, settings.db)) {
|
||||
std::cerr << "Not written to DB, likely because a set with the same hash already exists\n";
|
||||
}
|
||||
}
|
||||
|
@ -182,7 +187,7 @@ namespace {
|
|||
#endif
|
||||
}
|
||||
|
||||
bool add_to_db (const std::vector<mchlib::FileRecordData>& parData, const std::string& parSetName, char parType, const dinlib::SettingsDB& parDBSettings, bool parForce) {
|
||||
bool add_to_db (const std::vector<mchlib::FileRecordData>& parData, const std::string& parSetName, char parType, char parContentType, const dinlib::SettingsDB& parDBSettings, bool parForce) {
|
||||
using mchlib::FileRecordData;
|
||||
using mchlib::SetRecordDataFull;
|
||||
using mchlib::SetRecordData;
|
||||
|
@ -197,7 +202,7 @@ namespace {
|
|||
}
|
||||
}
|
||||
|
||||
SetRecordData set_data {parSetName, parType};
|
||||
SetRecordData set_data {parSetName, parType, parContentType };
|
||||
const auto app_signature = dinlib::dindexer_signature();
|
||||
const auto lib_signature = mchlib::lib_signature();
|
||||
const std::string signature = std::string(app_signature.data(), app_signature.size()) + "/" + std::string(lib_signature.data(), lib_signature.size());
|
||||
|
|
Loading…
Add table
Reference in a new issue