1
0
Fork 0
mirror of https://github.com/KingDuckZ/dindexer.git synced 2024-11-29 01:33:46 +00:00

Store mime type and charset in the db.

This commit is contained in:
King_DuckZ 2016-01-06 02:18:42 +00:00
parent 839b9dd49a
commit 6edfb08383
3 changed files with 41 additions and 17 deletions

View file

@ -4,7 +4,7 @@
-- Dumped from database version 9.4.5
-- Dumped by pg_dump version 9.4.5
-- Started on 2015-12-10 12:11:34 GMT
-- Started on 2016-01-06 01:55:35 GMT
SET statement_timeout = 0;
SET lock_timeout = 0;
@ -22,7 +22,7 @@ CREATE EXTENSION IF NOT EXISTS plpgsql WITH SCHEMA pg_catalog;
--
-- TOC entry 2039 (class 0 OID 0)
-- TOC entry 2040 (class 0 OID 0)
-- Dependencies: 178
-- Name: EXTENSION plpgsql; Type: COMMENT; Schema: -; Owner:
--
@ -71,6 +71,8 @@ CREATE TABLE files (
access_time timestamp with time zone,
modify_time timestamp with time zone,
unreadable boolean NOT NULL,
mimetype character varying NOT NULL,
charset character varying NOT NULL,
CONSTRAINT chk_files_dirsize_zero CHECK (((is_directory = false) OR (size = 0))),
CONSTRAINT chk_hash_0 CHECK ((((NOT unreadable) AND is_hash_valid) OR ((NOT is_hash_valid) AND (hash ~ '^0+$'::text))))
);
@ -79,7 +81,7 @@ CREATE TABLE files (
ALTER TABLE files OWNER TO @DB_OWNER_NAME@;
--
-- TOC entry 2040 (class 0 OID 0)
-- TOC entry 2041 (class 0 OID 0)
-- Dependencies: 175
-- Name: CONSTRAINT chk_hash_0 ON files; Type: COMMENT; Schema: public; Owner: @DB_OWNER_NAME@
--
@ -103,7 +105,7 @@ CREATE SEQUENCE files_id_seq
ALTER TABLE files_id_seq OWNER TO @DB_OWNER_NAME@;
--
-- TOC entry 2041 (class 0 OID 0)
-- TOC entry 2042 (class 0 OID 0)
-- Dependencies: 174
-- Name: files_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: @DB_OWNER_NAME@
--
@ -129,7 +131,7 @@ CREATE TABLE sets (
ALTER TABLE sets OWNER TO @DB_OWNER_NAME@;
--
-- TOC entry 2042 (class 0 OID 0)
-- TOC entry 2043 (class 0 OID 0)
-- Dependencies: 177
-- Name: COLUMN sets.type; Type: COMMENT; Schema: public; Owner: @DB_OWNER_NAME@
--
@ -159,7 +161,7 @@ CREATE SEQUENCE sets_id_seq
ALTER TABLE sets_id_seq OWNER TO @DB_OWNER_NAME@;
--
-- TOC entry 2043 (class 0 OID 0)
-- TOC entry 2044 (class 0 OID 0)
-- Dependencies: 176
-- Name: sets_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: @DB_OWNER_NAME@
--
@ -184,7 +186,7 @@ ALTER TABLE ONLY sets ALTER COLUMN id SET DEFAULT nextval('sets_id_seq'::regclas
--
-- TOC entry 1916 (class 2606 OID 31289)
-- TOC entry 1917 (class 2606 OID 31289)
-- Name: pk_files_id; Type: CONSTRAINT; Schema: public; Owner: @DB_OWNER_NAME@; Tablespace:
--
@ -193,7 +195,7 @@ ALTER TABLE ONLY files
--
-- TOC entry 1920 (class 2606 OID 31420)
-- TOC entry 1921 (class 2606 OID 31420)
-- Name: pk_sets_id; Type: CONSTRAINT; Schema: public; Owner: @DB_OWNER_NAME@; Tablespace:
--
@ -202,7 +204,7 @@ ALTER TABLE ONLY sets
--
-- TOC entry 1918 (class 2606 OID 31294)
-- TOC entry 1919 (class 2606 OID 31294)
-- Name: uniq_item; Type: CONSTRAINT; Schema: public; Owner: @DB_OWNER_NAME@; Tablespace:
--
@ -219,7 +221,15 @@ CREATE INDEX fki_files_sets ON files USING btree (group_id);
--
-- TOC entry 1914 (class 1259 OID 31292)
-- TOC entry 1914 (class 1259 OID 31651)
-- Name: idx_mimetype; Type: INDEX; Schema: public; Owner: @DB_OWNER_NAME@; Tablespace:
--
CREATE INDEX idx_mimetype ON files USING btree (mimetype, charset);
--
-- TOC entry 1915 (class 1259 OID 31292)
-- Name: idx_paths; Type: INDEX; Schema: public; Owner: @DB_OWNER_NAME@; Tablespace:
--
@ -227,7 +237,7 @@ CREATE INDEX idx_paths ON files USING btree (path);
--
-- TOC entry 1922 (class 2620 OID 31291)
-- TOC entry 1923 (class 2620 OID 31291)
-- Name: triggerupcasehash; Type: TRIGGER; Schema: public; Owner: @DB_OWNER_NAME@
--
@ -235,7 +245,7 @@ CREATE TRIGGER triggerupcasehash BEFORE INSERT OR UPDATE ON files FOR EACH ROW E
--
-- TOC entry 1921 (class 2606 OID 31421)
-- TOC entry 1922 (class 2606 OID 31421)
-- Name: fk_files_sets; Type: FK CONSTRAINT; Schema: public; Owner: @DB_OWNER_NAME@
--
@ -244,7 +254,7 @@ ALTER TABLE ONLY files
--
-- TOC entry 2038 (class 0 OID 0)
-- TOC entry 2039 (class 0 OID 0)
-- Dependencies: 8
-- Name: public; Type: ACL; Schema: -; Owner: postgres
--
@ -255,7 +265,7 @@ GRANT ALL ON SCHEMA public TO postgres;
GRANT ALL ON SCHEMA public TO PUBLIC;
-- Completed on 2015-12-10 12:11:36 GMT
-- Completed on 2016-01-06 01:55:37 GMT
--
-- PostgreSQL database dump complete

View file

@ -134,6 +134,11 @@ namespace mchlib {
if (parIgnoreErrors) {
it_entry->unreadable = true;
it_entry->hash = HashType {};
if (it_entry->mime_full.get().empty()) {
it_entry->mime_full = "unknown";
it_entry->mime_type = boost::string_ref(it_entry->mime_full.get());
it_entry->mime_charset = boost::string_ref(it_entry->mime_full.get());
}
}
else {
throw e;
@ -152,6 +157,12 @@ namespace mchlib {
std::cout << "Final hash for dir " << parCurrDir << " is " << tiger_to_string(curr_entry_it->hash) << '\n';
#endif
curr_entry_it->hash_valid = true;
{
curr_entry_it->mime_full = parMime.analyze(curr_entry_it->path);
auto mime_pair = split_mime(curr_entry_it->mime_full);
curr_entry_it->mime_type = mime_pair.first;
curr_entry_it->mime_charset = mime_pair.second;
}
}
template <bool FileTrue=true>

View file

@ -104,8 +104,9 @@ namespace din {
for (std::size_t z = 0; z < parData.size(); ++z) {
const std::string query = "INSERT INTO \"files\" (path, hash, "
"level, group_id, is_directory, is_symlink, size, "
"access_time, modify_time, is_hash_valid, unreadable) VALUES "
"($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11);";
"access_time, modify_time, is_hash_valid, unreadable, "
"mimetype, charset) VALUES "
"($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13);";
const auto& itm = parData[z];
conn.query(query,
@ -119,7 +120,9 @@ namespace din {
system_clock::from_time_t(itm.atime),
system_clock::from_time_t(itm.mtime),
itm.hash_valid,
itm.unreadable
itm.unreadable,
std::string(itm.mime_type.data(), itm.mime_type.size()),
std::string(itm.mime_charset.data(), itm.mime_charset.size())
);
}
conn.query("COMMIT;");