1
0
Fork 0
mirror of https://github.com/KingDuckZ/dindexer.git synced 2025-02-19 12:04:54 +00:00

Implement SSE2 Tiger hashing.

This commit is contained in:
King_DuckZ 2015-11-09 16:50:56 +00:00
parent d07dc9a7da
commit 1dee8e0f83
8 changed files with 1695 additions and 28 deletions

View file

@ -1,5 +1,5 @@
cmake_minimum_required(VERSION 3.0 FATAL_ERROR)
project(dindexer VERSION 0.1 LANGUAGES CXX)
project(dindexer VERSION 0.1 LANGUAGES CXX C)
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -std=c++11")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -std=c++11")
@ -11,6 +11,8 @@ add_executable(${PROJECT_NAME}
src/filesearcher.cpp
src/pathname.cpp
src/indexer.cpp
src/tiger.c
src/tiger.cpp
)
target_include_directories(${PROJECT_NAME} SYSTEM

View file

@ -17,46 +17,118 @@
#include "indexer.hpp"
#include "pathname.hpp"
#include "tiger.hpp"
#include <algorithm>
#include <vector>
#include <string>
#include <atomic>
#include <cstdint>
#include <ciso646>
#if !defined(NDEBUG)
# include <iostream>
#endif
namespace din {
bool Indexer::add_path (const char* parPath, int parLevel, bool parIsDir, bool) {
if (parLevel > 0) {
PathLists& path_lists = (parIsDir ? m_directories : m_files);
const auto size = static_cast<std::size_t>(parLevel);
typedef TigerHash HashType;
if (size > path_lists.size()) {
path_lists.resize(size);
struct FileEntry {
FileEntry ( const char* parPath, int parLevel, bool parIsDir, bool parIsSymLink) :
path(parPath),
hash {},
level(static_cast<uint16_t>(parLevel)),
is_dir(parIsDir),
is_symlink(parIsSymLink)
{
}
FileEntry ( const FileEntry& ) = delete;
FileEntry ( FileEntry&& ) = default;
FileEntry& operator= ( const FileEntry& ) = delete;
FileEntry& operator= ( FileEntry&& ) = default;
bool operator< ( const FileEntry& parOther ) const;
std::string path;
HashType hash;
uint16_t level;
bool is_dir;
bool is_symlink;
};
struct Indexer::LocalData {
typedef std::vector<FileEntry> PathList;
PathList paths;
std::string base_path;
std::atomic<std::size_t> done_count;
std::size_t file_count;
};
bool FileEntry::operator< (const FileEntry& parOther) const {
return (this->level < parOther.level)
or (this->level == parOther.level and this->path < parOther.path);
}
Indexer::Indexer() :
m_local_data(new LocalData)
{
m_local_data->done_count = 0;
m_local_data->file_count = 0;
}
Indexer::~Indexer() {
}
std::size_t Indexer::total_items() const {
return m_local_data->file_count;
}
std::size_t Indexer::processed_items() const {
return m_local_data->done_count;
}
void Indexer::calculate_hash() {
std::sort(m_local_data->paths.begin(), m_local_data->paths.end());
HashType dir_hash;
tiger_init_hash(dir_hash);
for (auto& cur_itm : m_local_data->paths) {
if (not cur_itm.is_dir) {
std::cout << "Hashing " << cur_itm.path << "...";
tiger_init_hash(cur_itm.hash);
tiger_file(cur_itm.path, cur_itm.hash, dir_hash);
std::cout << " --> " << tiger_to_string(cur_itm.hash) << '\n';
}
}
}
std::string path(parPath);
auto insert_point = std::lower_bound(path_lists[size - 1].begin(), path_lists[size - 1].end(), path);
path_lists[size - 1].insert(insert_point, std::move(path));
bool Indexer::add_path (const char* parPath, int parLevel, bool parIsDir, bool parIsSymLink) {
if (parLevel > 0) {
m_local_data->paths.push_back(FileEntry(parPath, parLevel, parIsDir, parIsSymLink));
if (not parIsDir) {
++m_local_data->file_count;
}
} else {
m_base_path = parPath;
m_local_data->base_path = parPath;
}
return true;
}
#if !defined(NDEBUG)
void Indexer::dump() const {
PathName base_path(m_base_path);
PathName base_path(m_local_data->base_path);
std::cout << "---------------- FILE LIST ----------------\n";
for (const auto& cur_list : m_files) {
for (const auto& cur_itm : cur_list) {
PathName cur_path(cur_itm);
for (const auto& cur_itm : m_local_data->paths) {
if (not cur_itm.is_dir) {
PathName cur_path(cur_itm.path);
std::cout << make_relative_path(base_path, cur_path).path() << '\n';
}
}
std::cout << "---------------- DIRECTORY LIST ----------------\n";
for (const auto& cur_list : m_directories) {
for (const auto& cur_itm : cur_list) {
PathName cur_path(cur_itm);
for (const auto& cur_itm : m_local_data->paths) {
if (cur_itm.is_dir) {
PathName cur_path(cur_itm.path);
std::cout << make_relative_path(base_path, cur_path).path() << '\n';
}
}

View file

@ -18,26 +18,29 @@
#ifndef idE555EF56730442C1ADDC7B2AE7A9340E
#define idE555EF56730442C1ADDC7B2AE7A9340E
#include <vector>
#include <string>
#include <memory>
namespace din {
class Indexer {
public:
Indexer ( void ) = default;
~Indexer ( void ) noexcept = default;
Indexer ( void );
Indexer ( Indexer&& ) = default;
Indexer ( const Indexer& ) = delete;
~Indexer ( void ) noexcept;
bool add_path ( const char* parPath, int parLevel, bool parIsDir, bool parIsSymlink );
#if !defined(NDEBUG)
void dump ( void ) const;
#endif
private:
typedef std::vector<std::vector<std::string>> PathLists;
std::size_t total_items ( void ) const;
std::size_t processed_items ( void ) const;
void calculate_hash ( void );
PathLists m_directories;
PathLists m_files;
std::string m_base_path;
private:
struct LocalData;
std::unique_ptr<LocalData> m_local_data;
};
} //namespace din

View file

@ -32,12 +32,13 @@ int main (int parArgc, char* parArgv[]) {
std::cout << std::endl;
din::Indexer indexer;
fastf::FileSearcher searcher("/home/duckz/dev/code/cpp/dindexer/test");
fastf::FileSearcher searcher("/home/michele/dev/code/cpp/dindexer/test");
fastf::FileSearcher::ConstCharVecType ext, ignore;
searcher.SetFollowSymlinks(true);
searcher.SetCallback(fastf::FileSearcher::CallbackType(std::bind(&din::Indexer::add_path, &indexer, _1, _2, _3, _4)));
searcher.Search(ext, ignore);
indexer.calculate_hash();
indexer.dump();
return 0;
}

1326
src/tiger.c Normal file

File diff suppressed because it is too large Load diff

92
src/tiger.cpp Normal file
View file

@ -0,0 +1,92 @@
/* Copyright 2015, Michele Santullo
* This file is part of "dindexer".
*
* "dindexer" is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* "dindexer" is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
*/
#include "tiger.hpp"
#include <fstream>
#include <cstdint>
#include <memory>
#include <cassert>
#include <algorithm>
#include <sstream>
#if defined(__SSE2__)
extern "C" void tiger_sse2_chunk ( const char* parStr1, const char* parStr2, uint64_t parLength, uint64_t parRes1[3], uint64_t parRes2[3] );
extern "C" void tiger_sse2_last_chunk ( const char* parStr1, const char* parStr2, uint64_t parLength, uint64_t parRealLength, uint64_t parRes1[3], uint64_t parRes2[3], char pad );
#else
# error "Not implemented without SSE2"
#endif
namespace din {
namespace {
const uint32_t g_buff_size = 1024 * 8;
const char g_tiger_padding = 0x80; //0x01 for V1
uint64_t swap_long (uint64_t parNum) {
parNum = (parNum & 0x00000000FFFFFFFF) << 32 | (parNum & 0xFFFFFFFF00000000) >> 32;
parNum = (parNum & 0x0000FFFF0000FFFF) << 16 | (parNum & 0xFFFF0000FFFF0000) >> 16;
parNum = (parNum & 0x00FF00FF00FF00FF) << 8 | (parNum & 0xFF00FF00FF00FF00) >> 8;
return parNum;
}
} //unnamed namespace
void tiger_init_hash (TigerHash& parHash) {
parHash.part_a = 0x0123456789ABCDEFULL;
parHash.part_b = 0xFEDCBA9876543210ULL;
parHash.part_c = 0xF096A5B4C3B2E187ULL;
}
void tiger_file (const std::string& parPath, TigerHash& parHashFile, TigerHash& parHashDir) {
tiger_init_hash(parHashFile);
std::ifstream src(parPath, std::ios::binary);
src.seekg(0, std::ios_base::end);
const auto file_size = src.tellg();
src.seekg(0, std::ios_base::beg);
const uint32_t buffsize = static_cast<uint32_t>(std::min<decltype(file_size)>(file_size, g_buff_size));
std::unique_ptr<char[]> buff(new char[63 + buffsize]);
char* const buff_ptr = reinterpret_cast<char*>(reinterpret_cast<std::intptr_t>(buff.get() + 63) & (-64));
assert(buff_ptr >= buff.get() and buff_ptr + buffsize <= buff.get() + 63 + buffsize);
auto remaining = file_size;
while (remaining > buffsize) {
assert(buffsize >= sizeof(uint64_t) * 3);
assert(buffsize == (buffsize & -64));
remaining -= buffsize;
src.read(buff_ptr, buffsize);
tiger_sse2_chunk(buff_ptr, buff_ptr, buffsize, parHashFile.data, parHashDir.data);
}
{
assert(remaining <= buffsize);
src.read(buff_ptr, remaining);
const auto aligned_size = remaining & -64;
if (aligned_size) {
tiger_sse2_chunk(buff_ptr, buff_ptr, aligned_size, parHashFile.data, parHashDir.data);
}
tiger_sse2_last_chunk(buff_ptr + aligned_size, buff_ptr + aligned_size, remaining - aligned_size, file_size, parHashFile.data, parHashDir.data, g_tiger_padding);
}
}
std::string tiger_to_string (const TigerHash& parHash) {
std::ostringstream oss;
oss << std::hex << swap_long(parHash.part_a) << swap_long(parHash.part_b) << swap_long(parHash.part_c);
return oss.str();
}
} //namespace din

125
src/tiger.h Normal file
View file

@ -0,0 +1,125 @@
/**
* Copyright (c) 2012 Francisco Blas Izquierdo Riera (klondike)
* The Tiger algorithm was written by Eli Biham and Ross Anderson and is
* available on the official Tiger algorithm page.
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* the algorithm authorsip notice, this list of conditions and the following
* disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
* 4. If this license is not appropriate for you please write me at
* klondike ( a t ) klondike ( d o t ) es to negotiate another license.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
* EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**/
/**
* These are some implementations of tiger made without looking at the original
* reference code to ensure the resulting code can be published under a free
* license. The paper was looked though to know how did tiger work.
*/
/** Implementation details:
* * Here we assume char and unsigned char have size 1. If thats not the case in
* your compiler you may want to replace them by a type that does
*/
#ifndef TIGER_H
#define TIGER_H 1
#if !defined(_MSC_VER) || (_MSC_VER >= 1600)
#include <stdint.h>
#else
typedef __int32 int32_t;
typedef unsigned __int32 uint32_t;
typedef __int64 int64_t;
typedef unsigned __int64 uint64_t;
#endif
#if _M_IX86_FP >= 2
#define __SSE2__
#endif
#ifdef __linux
#include <endian.h>
#if __BYTE_ORDER == __LITTLE_ENDIAN
#define IS_LITTLE_ENDIAN
#elif __BYTE_ORDER == __BIG_ENDIAN
#define USE_BIG_ENDIAN
#elif __BYTE_ORDER == __PDP_ENDIAN
#error "If you feel like writting code for PDP endianess go ahead, I'm not doing that"
#else
#error "Unknown endianess"
#endif
#else
//Assume little endian if you know how to detect endianism well on other compilers state it.
#define IS_LITTLE_ENDIAN
#endif
#if defined(_WIN64) || defined(__x86_64__) || defined(__amd64__)
#define HASX64
#endif
/** A word in the tiger hash, 64 bits **/
typedef uint64_t t_word;
/** This one is provided as a commodity for people wanting an easy way to declare result variables **/
typedef t_word t_res[3];
/** Partial calculation as used by tigerp1 and tigerp2 **/
typedef struct {
t_res h; // Hash status
char r[128]; // SALT
t_word n; // Number of characters of r used
t_word hs; // Amount of total data hashed
} t_pres;
/** This one is provided as a commodity for people wanting an easy way to declare block variables **/
typedef t_word t_block[8];
/** Standard tiger calculation, put your string in str and the string length on length and get the result on res **/
void tiger(const char *str, t_word length, t_res res, char pad);
/** Similar to tiger but interleaving accesses to both equally sized strings to reduce overhead and pipeline stalls you get the result of str1 on res1 and the one of str2 on res2 **/
void tiger_2(const char *str1, const char *str2, t_word length, t_res res1, t_res res2);
#ifdef __SSE2__
/** This is equivalent to tiger_2 but uses SSE2 for the key schduling making it faster **/
void tiger_sse2(const char *str1, const char *str2, t_word length, t_res res1, t_res res2, char pad);
#endif
/** This function is optimized for use on TTHs just send the two concatenated hashes and you will get back the hash with a prepended 0x01 **/
void tiger_49(const char *str, t_res res);
/** This function is optimized for use on TTHs just send the 1024 sized block and you will get back the hash with a prepended 0x00 **/
void tiger_1025(const char *str, t_res res);
/** Interleaved version of tiger_49 you insert two hashes and get back two results **/
void tiger_2_49(const char *str1, const char *str2, t_res res1, t_res res2);
/** Interleaved version of tiger_1025 you insert two hashes and get back two results **/
void tiger_2_1025(const char *str1, const char *str2, t_res res1, t_res res2);
#ifdef __SSE2__
/** SSE2 version of tiger_49 you insert two hashes and get back two results **/
void tiger_sse2_49(const char *str1, const char *str2, t_res res1, t_res res2);
/** SSE2 version of tiger_1025 you insert two hashes and get back two results **/
void tiger_sse2_1025(const char *str1, const char *str2, t_res res1, t_res res2);
#endif
/** First stage of partial tiger calculation to improve password security during storage **/
void tigerp1(const char *password, t_word length, const char *salt, t_pres *pres);
/** Second stage of partial tiger calculation **/
void tigerp2(const t_pres *pres, const char *salt, t_word length, t_res res);
#endif

46
src/tiger.hpp Normal file
View file

@ -0,0 +1,46 @@
/* Copyright 2015, Michele Santullo
* This file is part of "dindexer".
*
* "dindexer" is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* "dindexer" is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with "dindexer". If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef idBE93AF97FA4343ECA2BC8FB1FD3E5E60
#define idBE93AF97FA4343ECA2BC8FB1FD3E5E60
#include <cstdint>
#include <string>
namespace din {
struct TigerHash {
TigerHash ( void ) = default;
union {
struct {
uint64_t part_a;
uint64_t part_b;
uint64_t part_c;
};
uint64_t data[3];
uint8_t byte_data[sizeof(uint64_t) * 3];
};
};
static_assert(sizeof(TigerHash) == 24, "Wrong struct size");
void tiger_file ( const std::string& parPath, TigerHash& parHashFile, TigerHash& parHashDir );
void tiger_init_hash ( TigerHash& parHash );
std::string tiger_to_string ( const TigerHash& parHash );
} //namespace din
#endif