1
0
Fork 0
mirror of https://github.com/KingDuckZ/dindexer.git synced 2025-02-19 12:04:54 +00:00

Detect content type after scanning and save to DB.

This commit is contained in:
King_DuckZ 2016-02-22 19:44:48 +01:00
parent 2e77e4dc0b
commit 22614432a9
9 changed files with 141 additions and 36 deletions

View file

@ -4,7 +4,7 @@
-- Dumped from database version 9.4.5
-- Dumped by pg_dump version 9.4.5
-- Started on 2016-01-29 20:18:23 CET
-- Started on 2016-02-22 08:40:56 CET
SET statement_timeout = 0;
SET lock_timeout = 0;
@ -22,7 +22,7 @@ CREATE EXTENSION IF NOT EXISTS plpgsql WITH SCHEMA pg_catalog;
--
-- TOC entry 2044 (class 0 OID 0)
-- TOC entry 2045 (class 0 OID 0)
-- Dependencies: 176
-- Name: EXTENSION plpgsql; Type: COMMENT; Schema: -; Owner:
--
@ -112,7 +112,7 @@ CREATE TABLE files (
ALTER TABLE files OWNER TO @DB_OWNER_NAME@;
--
-- TOC entry 2045 (class 0 OID 0)
-- TOC entry 2046 (class 0 OID 0)
-- Dependencies: 172
-- Name: CONSTRAINT chk_hash_0 ON files; Type: COMMENT; Schema: public; Owner: @DB_OWNER_NAME@
--
@ -136,7 +136,7 @@ CREATE SEQUENCE files_id_seq
ALTER TABLE files_id_seq OWNER TO @DB_OWNER_NAME@;
--
-- TOC entry 2046 (class 0 OID 0)
-- TOC entry 2047 (class 0 OID 0)
-- Dependencies: 173
-- Name: files_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: @DB_OWNER_NAME@
--
@ -156,6 +156,7 @@ CREATE TABLE sets (
disk_number integer DEFAULT 0 NOT NULL,
creation timestamp with time zone DEFAULT now() NOT NULL,
app_name character varying NOT NULL,
content_type character(1) DEFAULT 'G'::bpchar NOT NULL,
CONSTRAINT chk_sets_type CHECK (((((((((type = 'C'::bpchar) OR (type = 'D'::bpchar)) OR (type = 'V'::bpchar)) OR (type = 'B'::bpchar)) OR (type = 'F'::bpchar)) OR (type = 'H'::bpchar)) OR (type = 'Z'::bpchar)) OR (type = 'O'::bpchar)))
);
@ -163,7 +164,7 @@ CREATE TABLE sets (
ALTER TABLE sets OWNER TO @DB_OWNER_NAME@;
--
-- TOC entry 2047 (class 0 OID 0)
-- TOC entry 2048 (class 0 OID 0)
-- Dependencies: 174
-- Name: COLUMN sets.type; Type: COMMENT; Schema: public; Owner: @DB_OWNER_NAME@
--
@ -193,7 +194,7 @@ CREATE SEQUENCE sets_id_seq
ALTER TABLE sets_id_seq OWNER TO @DB_OWNER_NAME@;
--
-- TOC entry 2048 (class 0 OID 0)
-- TOC entry 2049 (class 0 OID 0)
-- Dependencies: 175
-- Name: sets_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: @DB_OWNER_NAME@
--
@ -218,7 +219,7 @@ ALTER TABLE ONLY sets ALTER COLUMN id SET DEFAULT nextval('sets_id_seq'::regclas
--
-- TOC entry 1921 (class 2606 OID 16414)
-- TOC entry 1922 (class 2606 OID 16414)
-- Name: pk_files_id; Type: CONSTRAINT; Schema: public; Owner: @DB_OWNER_NAME@; Tablespace:
--
@ -227,7 +228,7 @@ ALTER TABLE ONLY files
--
-- TOC entry 1925 (class 2606 OID 16416)
-- TOC entry 1926 (class 2606 OID 16416)
-- Name: pk_sets_id; Type: CONSTRAINT; Schema: public; Owner: @DB_OWNER_NAME@; Tablespace:
--
@ -236,7 +237,7 @@ ALTER TABLE ONLY sets
--
-- TOC entry 1923 (class 2606 OID 16418)
-- TOC entry 1924 (class 2606 OID 16418)
-- Name: uniq_item; Type: CONSTRAINT; Schema: public; Owner: @DB_OWNER_NAME@; Tablespace:
--
@ -245,7 +246,7 @@ ALTER TABLE ONLY files
--
-- TOC entry 1917 (class 1259 OID 16419)
-- TOC entry 1918 (class 1259 OID 16419)
-- Name: fki_files_sets; Type: INDEX; Schema: public; Owner: @DB_OWNER_NAME@; Tablespace:
--
@ -253,7 +254,7 @@ CREATE INDEX fki_files_sets ON files USING btree (group_id);
--
-- TOC entry 1918 (class 1259 OID 16420)
-- TOC entry 1919 (class 1259 OID 16420)
-- Name: idx_mimetype; Type: INDEX; Schema: public; Owner: @DB_OWNER_NAME@; Tablespace:
--
@ -261,7 +262,7 @@ CREATE INDEX idx_mimetype ON files USING btree (mimetype, charset);
--
-- TOC entry 1919 (class 1259 OID 16421)
-- TOC entry 1920 (class 1259 OID 16421)
-- Name: idx_paths; Type: INDEX; Schema: public; Owner: @DB_OWNER_NAME@; Tablespace:
--
@ -269,7 +270,7 @@ CREATE INDEX idx_paths ON files USING btree (path);
--
-- TOC entry 1927 (class 2620 OID 16422)
-- TOC entry 1928 (class 2620 OID 16422)
-- Name: triggerupcasehash; Type: TRIGGER; Schema: public; Owner: @DB_OWNER_NAME@
--
@ -277,7 +278,7 @@ CREATE TRIGGER triggerupcasehash BEFORE INSERT OR UPDATE ON files FOR EACH ROW E
--
-- TOC entry 1926 (class 2606 OID 16423)
-- TOC entry 1927 (class 2606 OID 16423)
-- Name: fk_files_sets; Type: FK CONSTRAINT; Schema: public; Owner: @DB_OWNER_NAME@
--
@ -286,7 +287,7 @@ ALTER TABLE ONLY files
--
-- TOC entry 2043 (class 0 OID 0)
-- TOC entry 2044 (class 0 OID 0)
-- Dependencies: 6
-- Name: public; Type: ACL; Schema: -; Owner: postgres
--
@ -297,7 +298,7 @@ GRANT ALL ON SCHEMA public TO postgres;
GRANT ALL ON SCHEMA public TO PUBLIC;
-- Completed on 2016-01-29 20:18:23 CET
-- Completed on 2016-02-22 08:40:57 CET
--
-- PostgreSQL database dump complete

View file

@ -0,0 +1,62 @@
/* Copyright 2015, 2016, Michele Santullo
* This file is part of "dindexer".
*
* "dindexer" is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* "dindexer" is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef id45CDD1DAEF4F42968E3C89F68FDDA9BC
#define id45CDD1DAEF4F42968E3C89F68FDDA9BC
#if defined(__GNUC__)
# if defined(__clang__)
# if !defined(__has_attribute)
//Fall back to version number comparing
# else
# if __has_attribute(flatten)
# define a_flatten __attribute__((flatten))
# else
# define a_flatten
# endif
# if __has_attribute(always_inline)
# define a_always_inline __attribute__((always_inline))
# else
# define a_always_inline
# endif
# if __has_attribute(pure)
# define a_pure __attribute__((pure))
# else
# define a_pure
# endif
# if __has_attribute(deprecated)
# define a_deprecated __attribute__((deprecated))
# else
# define a_deprecated
#endif
# endif
# else
//Fix here if you get warnings about unsupported attributes on your compiler
# define a_flatten __attribute__((flatten))
# define a_always_inline __attribute__((always_inline))
# define a_pure __attribute__((pure))
# define a_deprecated __attribute__((deprecated))
# endif
#else
# warning "Unsupported compiler, please fill this section or file a bug"
# define a_flatten
# define a_always_inline
# define a_pure
# define a_deprecated
#endif
#endif

View file

@ -18,6 +18,7 @@
#ifndef id700AFD0F33634ACC88079BB8853A9E13
#define id700AFD0F33634ACC88079BB8853A9E13
#include "dindexer-common/compatibility.h"
#include <string>
namespace dinlib {
@ -33,6 +34,7 @@ namespace dinlib {
};
const std::string& media_type_to_str ( MediaTypes parType );
MediaTypes char_to_media_type ( char parMType ) a_pure;
} //namespace dinlib
#endif

View file

@ -20,21 +20,27 @@
#include "dindexer-common/mediatypes.hpp"
#include "dindexer-machinery/recorddata.hpp"
#include "dindexer-common/compatibility.h"
#include <vector>
namespace mchlib {
enum ContentTypes {
ContentType_Generic,
ContentType_Backup,
ContentType_VideoDVD,
ContentType_VideoBD,
ContentType_VideoCD,
ContentType_Unknown
ContentType_Empty = 'E',
ContentType_Generic = 'G',
ContentType_Backup = 'A',
ContentType_VideoDVD = 'D',
ContentType_VideoBD = 'B',
ContentType_VideoCD = 'C',
ContentType_Unknown = 'U'
};
template <bool> class SetListingView;
ContentTypes guess_content_type ( dinlib::MediaTypes parMediaType, const SetListingView<true>& parContent, std::size_t parEntriesCount=0 );
ContentTypes guess_content_type ( dinlib::MediaTypes parMediaType, const std::vector<FileRecordData>& parContent );
char content_type_to_char ( ContentTypes parCType ) a_pure;
ContentTypes char_to_content_type ( char parCType ) a_pure;
} //namespace mchlib
#endif

View file

@ -73,15 +73,15 @@ namespace mchlib {
bool hash_valid;
};
struct SetRecordDataFull {
std::string name;
uint32_t disk_number;
struct SetRecordData {
boost::string_ref name;
char type;
char content_type;
};
struct SetRecordData {
const boost::string_ref name;
const char type;
struct SetRecordDataFull : public SetRecordData {
std::string name;
uint32_t disk_number;
};
} //namespace mchlib

View file

@ -39,4 +39,8 @@ namespace dinlib {
}
return it_ret->second;
}
MediaTypes char_to_media_type (char parMType) {
return static_cast<MediaTypes>(parMType);
}
} //namespace dinlib

View file

@ -22,6 +22,7 @@
#include "dindexer-machinery/set_listing.hpp"
#include "dindexer-machinery/set_listing_helpers.hpp"
#include "globbing.hpp"
#include "pathname.hpp"
#include <boost/iterator/filter_iterator.hpp>
#include <boost/iterator/indirect_iterator.hpp>
#include <boost/range/empty.hpp>
@ -31,6 +32,7 @@
#include <ciso646>
#include <regex>
#include <utility>
#include <memory>
namespace mchlib {
namespace {
@ -120,6 +122,9 @@ namespace mchlib {
} //unnamed namespace
ContentTypes guess_content_type (dinlib::MediaTypes parMediaType, const ConstSetListingView& parContent, std::size_t parEntriesCount) {
if (boost::empty(parContent))
return ContentType_Empty;
std::vector<EntryChecking> checker_chain {
{ 100, &identify_video_dvd, ContentType_VideoDVD },
{ 200, &identify_video_cd, ContentType_VideoCD }
@ -137,6 +142,24 @@ namespace mchlib {
}
return ContentType_Generic;
}
ContentTypes guess_content_type (dinlib::MediaTypes parMediaType, const std::vector<FileRecordData>& parContent) {
if (parContent.empty())
return ContentType_Empty;
//TODO: assert that the first item in the list is the shortest string
std::shared_ptr<PathName> pathname(new PathName(parContent.front().abs_path));
ConstSetListingView view(parContent.begin(), parContent.end(), pathname->atom_count(), pathname);
return guess_content_type(parMediaType, view, parContent.size());
}
char content_type_to_char (mchlib::ContentTypes parCType) {
return static_cast<char>(parCType);
}
ContentTypes char_to_content_type (char parCType) {
return static_cast<ContentTypes>(parCType);
}
} //namespace mchlib
#endif

View file

@ -97,11 +97,13 @@ namespace din {
uint32_t new_group_id;
{
auto id_res = conn.query("INSERT INTO \"sets\" "
"(\"desc\",\"type\",\"app_name\") "
"VALUES ($1, $2, $3) RETURNING \"id\";",
"(\"desc\",\"type\",\"app_name\""
",\"content_type\") "
"VALUES ($1, $2, $3, $4) RETURNING \"id\";",
parSetData.name,
std::string(1, parSetData.type),
parSignature
boost::string_ref(&parSetData.type, 1),
parSignature,
boost::string_ref(&parSetData.content_type, 1)
);
assert(id_res.size() == 1);
assert(id_res[0].size() == 1);

View file

@ -26,6 +26,7 @@
#include "dindexer-machinery/machinery_info.hpp"
#include "dindexer-common/common_info.hpp"
#include "dindexer-common/settings.hpp"
#include "dindexer-machinery/guess_content_type.hpp"
#include "commandline.hpp"
#include "dbbackend.hpp"
#include <iostream>
@ -42,7 +43,7 @@
namespace {
void run_hash_calculation ( mchlib::Indexer& parIndexer, bool parShowProgress );
bool add_to_db ( const std::vector<mchlib::FileRecordData>& parData, const std::string& parSetName, char parType, const dinlib::SettingsDB& parDBSettings, bool parForce=false );
bool add_to_db ( const std::vector<mchlib::FileRecordData>& parData, const std::string& parSetName, char parType, char parContent, const dinlib::SettingsDB& parDBSettings, bool parForce=false );
} //unnamed namespace
int main (int parArgc, char* parArgv[]) {
@ -117,11 +118,15 @@ int main (int parArgc, char* parArgv[]) {
return 1;
}
else {
const auto set_type_casted = dinlib::char_to_media_type(set_type);
const mchlib::ContentTypes content = mchlib::guess_content_type(set_type_casted, indexer.record_data());
const char content_type = mchlib::content_type_to_char(content);
run_hash_calculation(indexer, verbose);
if (verbose) {
std::cout << "Writing to database...\n";
}
if (not add_to_db(indexer.record_data(), vm["setname"].as<std::string>(), set_type, settings.db)) {
if (not add_to_db(indexer.record_data(), vm["setname"].as<std::string>(), set_type, content_type, settings.db)) {
std::cerr << "Not written to DB, likely because a set with the same hash already exists\n";
}
}
@ -182,7 +187,7 @@ namespace {
#endif
}
bool add_to_db (const std::vector<mchlib::FileRecordData>& parData, const std::string& parSetName, char parType, const dinlib::SettingsDB& parDBSettings, bool parForce) {
bool add_to_db (const std::vector<mchlib::FileRecordData>& parData, const std::string& parSetName, char parType, char parContentType, const dinlib::SettingsDB& parDBSettings, bool parForce) {
using mchlib::FileRecordData;
using mchlib::SetRecordDataFull;
using mchlib::SetRecordData;
@ -197,7 +202,7 @@ namespace {
}
}
SetRecordData set_data {parSetName, parType};
SetRecordData set_data {parSetName, parType, parContentType };
const auto app_signature = dinlib::dindexer_signature();
const auto lib_signature = mchlib::lib_signature();
const std::string signature = std::string(app_signature.data(), app_signature.size()) + "/" + std::string(lib_signature.data(), lib_signature.size());