diff --git a/dindexer.sql.in b/dindexer.sql.in index c034f7e..0d0bb28 100644 --- a/dindexer.sql.in +++ b/dindexer.sql.in @@ -4,7 +4,7 @@ -- Dumped from database version 9.4.5 -- Dumped by pg_dump version 9.4.5 --- Started on 2015-12-10 12:11:34 GMT +-- Started on 2016-01-06 01:55:35 GMT SET statement_timeout = 0; SET lock_timeout = 0; @@ -22,7 +22,7 @@ CREATE EXTENSION IF NOT EXISTS plpgsql WITH SCHEMA pg_catalog; -- --- TOC entry 2039 (class 0 OID 0) +-- TOC entry 2040 (class 0 OID 0) -- Dependencies: 178 -- Name: EXTENSION plpgsql; Type: COMMENT; Schema: -; Owner: -- @@ -71,6 +71,8 @@ CREATE TABLE files ( access_time timestamp with time zone, modify_time timestamp with time zone, unreadable boolean NOT NULL, + mimetype character varying NOT NULL, + charset character varying NOT NULL, CONSTRAINT chk_files_dirsize_zero CHECK (((is_directory = false) OR (size = 0))), CONSTRAINT chk_hash_0 CHECK ((((NOT unreadable) AND is_hash_valid) OR ((NOT is_hash_valid) AND (hash ~ '^0+$'::text)))) ); @@ -79,7 +81,7 @@ CREATE TABLE files ( ALTER TABLE files OWNER TO @DB_OWNER_NAME@; -- --- TOC entry 2040 (class 0 OID 0) +-- TOC entry 2041 (class 0 OID 0) -- Dependencies: 175 -- Name: CONSTRAINT chk_hash_0 ON files; Type: COMMENT; Schema: public; Owner: @DB_OWNER_NAME@ -- @@ -103,7 +105,7 @@ CREATE SEQUENCE files_id_seq ALTER TABLE files_id_seq OWNER TO @DB_OWNER_NAME@; -- --- TOC entry 2041 (class 0 OID 0) +-- TOC entry 2042 (class 0 OID 0) -- Dependencies: 174 -- Name: files_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: @DB_OWNER_NAME@ -- @@ -129,7 +131,7 @@ CREATE TABLE sets ( ALTER TABLE sets OWNER TO @DB_OWNER_NAME@; -- --- TOC entry 2042 (class 0 OID 0) +-- TOC entry 2043 (class 0 OID 0) -- Dependencies: 177 -- Name: COLUMN sets.type; Type: COMMENT; Schema: public; Owner: @DB_OWNER_NAME@ -- @@ -159,7 +161,7 @@ CREATE SEQUENCE sets_id_seq ALTER TABLE sets_id_seq OWNER TO @DB_OWNER_NAME@; -- --- TOC entry 2043 (class 0 OID 0) +-- TOC entry 2044 (class 0 OID 0) -- Dependencies: 176 -- Name: sets_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: @DB_OWNER_NAME@ -- @@ -184,7 +186,7 @@ ALTER TABLE ONLY sets ALTER COLUMN id SET DEFAULT nextval('sets_id_seq'::regclas -- --- TOC entry 1916 (class 2606 OID 31289) +-- TOC entry 1917 (class 2606 OID 31289) -- Name: pk_files_id; Type: CONSTRAINT; Schema: public; Owner: @DB_OWNER_NAME@; Tablespace: -- @@ -193,7 +195,7 @@ ALTER TABLE ONLY files -- --- TOC entry 1920 (class 2606 OID 31420) +-- TOC entry 1921 (class 2606 OID 31420) -- Name: pk_sets_id; Type: CONSTRAINT; Schema: public; Owner: @DB_OWNER_NAME@; Tablespace: -- @@ -202,7 +204,7 @@ ALTER TABLE ONLY sets -- --- TOC entry 1918 (class 2606 OID 31294) +-- TOC entry 1919 (class 2606 OID 31294) -- Name: uniq_item; Type: CONSTRAINT; Schema: public; Owner: @DB_OWNER_NAME@; Tablespace: -- @@ -219,7 +221,15 @@ CREATE INDEX fki_files_sets ON files USING btree (group_id); -- --- TOC entry 1914 (class 1259 OID 31292) +-- TOC entry 1914 (class 1259 OID 31651) +-- Name: idx_mimetype; Type: INDEX; Schema: public; Owner: @DB_OWNER_NAME@; Tablespace: +-- + +CREATE INDEX idx_mimetype ON files USING btree (mimetype, charset); + + +-- +-- TOC entry 1915 (class 1259 OID 31292) -- Name: idx_paths; Type: INDEX; Schema: public; Owner: @DB_OWNER_NAME@; Tablespace: -- @@ -227,7 +237,7 @@ CREATE INDEX idx_paths ON files USING btree (path); -- --- TOC entry 1922 (class 2620 OID 31291) +-- TOC entry 1923 (class 2620 OID 31291) -- Name: triggerupcasehash; Type: TRIGGER; Schema: public; Owner: @DB_OWNER_NAME@ -- @@ -235,7 +245,7 @@ CREATE TRIGGER triggerupcasehash BEFORE INSERT OR UPDATE ON files FOR EACH ROW E -- --- TOC entry 1921 (class 2606 OID 31421) +-- TOC entry 1922 (class 2606 OID 31421) -- Name: fk_files_sets; Type: FK CONSTRAINT; Schema: public; Owner: @DB_OWNER_NAME@ -- @@ -244,7 +254,7 @@ ALTER TABLE ONLY files -- --- TOC entry 2038 (class 0 OID 0) +-- TOC entry 2039 (class 0 OID 0) -- Dependencies: 8 -- Name: public; Type: ACL; Schema: -; Owner: postgres -- @@ -255,7 +265,7 @@ GRANT ALL ON SCHEMA public TO postgres; GRANT ALL ON SCHEMA public TO PUBLIC; --- Completed on 2015-12-10 12:11:36 GMT +-- Completed on 2016-01-06 01:55:37 GMT -- -- PostgreSQL database dump complete diff --git a/src/machinery/indexer.cpp b/src/machinery/indexer.cpp index 34e7aad..4ada88a 100644 --- a/src/machinery/indexer.cpp +++ b/src/machinery/indexer.cpp @@ -134,6 +134,11 @@ namespace mchlib { if (parIgnoreErrors) { it_entry->unreadable = true; it_entry->hash = HashType {}; + if (it_entry->mime_full.get().empty()) { + it_entry->mime_full = "unknown"; + it_entry->mime_type = boost::string_ref(it_entry->mime_full.get()); + it_entry->mime_charset = boost::string_ref(it_entry->mime_full.get()); + } } else { throw e; @@ -152,6 +157,12 @@ namespace mchlib { std::cout << "Final hash for dir " << parCurrDir << " is " << tiger_to_string(curr_entry_it->hash) << '\n'; #endif curr_entry_it->hash_valid = true; + { + curr_entry_it->mime_full = parMime.analyze(curr_entry_it->path); + auto mime_pair = split_mime(curr_entry_it->mime_full); + curr_entry_it->mime_type = mime_pair.first; + curr_entry_it->mime_charset = mime_pair.second; + } } template diff --git a/src/scan/dbbackend.cpp b/src/scan/dbbackend.cpp index f12a94d..26a04bb 100644 --- a/src/scan/dbbackend.cpp +++ b/src/scan/dbbackend.cpp @@ -104,8 +104,9 @@ namespace din { for (std::size_t z = 0; z < parData.size(); ++z) { const std::string query = "INSERT INTO \"files\" (path, hash, " "level, group_id, is_directory, is_symlink, size, " - "access_time, modify_time, is_hash_valid, unreadable) VALUES " - "($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11);"; + "access_time, modify_time, is_hash_valid, unreadable, " + "mimetype, charset) VALUES " + "($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13);"; const auto& itm = parData[z]; conn.query(query, @@ -119,7 +120,9 @@ namespace din { system_clock::from_time_t(itm.atime), system_clock::from_time_t(itm.mtime), itm.hash_valid, - itm.unreadable + itm.unreadable, + std::string(itm.mime_type.data(), itm.mime_type.size()), + std::string(itm.mime_charset.data(), itm.mime_charset.size()) ); } conn.query("COMMIT;");