mirror of
https://github.com/WinampDesktop/winamp.git
synced 2024-09-24 15:54:12 +00:00
154 lines
3.7 KiB
C++
154 lines
3.7 KiB
C++
|
/*
|
||
|
BLAKE2 reference source code package - reference C implementations
|
||
|
|
||
|
Written in 2012 by Samuel Neves <sneves@dei.uc.pt>
|
||
|
|
||
|
To the extent possible under law, the author(s) have dedicated all copyright
|
||
|
and related and neighboring rights to this software to the public domain
|
||
|
worldwide. This software is distributed without any warranty.
|
||
|
|
||
|
You should have received a copy of the CC0 Public Domain Dedication along with
|
||
|
this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>.
|
||
|
*/
|
||
|
|
||
|
#define PARALLELISM_DEGREE 8
|
||
|
|
||
|
void blake2sp_init( blake2sp_state *S )
|
||
|
{
|
||
|
memset( S->buf, 0, sizeof( S->buf ) );
|
||
|
S->buflen = 0;
|
||
|
|
||
|
blake2s_init_param( &S->R, 0, 1 ); // Init root.
|
||
|
|
||
|
for( uint i = 0; i < PARALLELISM_DEGREE; ++i )
|
||
|
blake2s_init_param( &S->S[i], i, 0 ); // Init leaf.
|
||
|
|
||
|
S->R.last_node = 1;
|
||
|
S->S[PARALLELISM_DEGREE - 1].last_node = 1;
|
||
|
}
|
||
|
|
||
|
|
||
|
struct Blake2ThreadData
|
||
|
{
|
||
|
void Update();
|
||
|
blake2s_state *S;
|
||
|
const byte *in;
|
||
|
size_t inlen;
|
||
|
};
|
||
|
|
||
|
|
||
|
void Blake2ThreadData::Update()
|
||
|
{
|
||
|
size_t inlen__ = inlen;
|
||
|
const byte *in__ = ( const byte * )in;
|
||
|
|
||
|
while( inlen__ >= PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES )
|
||
|
{
|
||
|
#ifdef USE_SSE
|
||
|
// We gain 5% in i7 SSE mode by prefetching next data block.
|
||
|
if (_SSE_Version>=SSE_SSE && inlen__ >= 2 * PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES)
|
||
|
_mm_prefetch((char*)(in__ + PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES), _MM_HINT_T0);
|
||
|
#endif
|
||
|
blake2s_update( S, in__, BLAKE2S_BLOCKBYTES );
|
||
|
in__ += PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES;
|
||
|
inlen__ -= PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
#ifdef RAR_SMP
|
||
|
THREAD_PROC(Blake2Thread)
|
||
|
{
|
||
|
Blake2ThreadData *td=(Blake2ThreadData *)Data;
|
||
|
td->Update();
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
|
||
|
void blake2sp_update( blake2sp_state *S, const byte *in, size_t inlen )
|
||
|
{
|
||
|
size_t left = S->buflen;
|
||
|
size_t fill = sizeof( S->buf ) - left;
|
||
|
|
||
|
if( left && inlen >= fill )
|
||
|
{
|
||
|
memcpy( S->buf + left, in, fill );
|
||
|
|
||
|
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
|
||
|
blake2s_update( &S->S[i], S->buf + i * BLAKE2S_BLOCKBYTES, BLAKE2S_BLOCKBYTES );
|
||
|
|
||
|
in += fill;
|
||
|
inlen -= fill;
|
||
|
left = 0;
|
||
|
}
|
||
|
|
||
|
Blake2ThreadData btd_array[PARALLELISM_DEGREE];
|
||
|
|
||
|
#ifdef RAR_SMP
|
||
|
uint ThreadNumber = inlen < 0x1000 ? 1 : S->MaxThreads;
|
||
|
|
||
|
if (ThreadNumber==6 || ThreadNumber==7) // 6 and 7 threads work slower than 4 here.
|
||
|
ThreadNumber=4;
|
||
|
#else
|
||
|
uint ThreadNumber=1;
|
||
|
#endif
|
||
|
|
||
|
for (size_t id__=0;id__<PARALLELISM_DEGREE;)
|
||
|
{
|
||
|
for (uint Thread=0;Thread<ThreadNumber && id__<PARALLELISM_DEGREE;Thread++)
|
||
|
{
|
||
|
Blake2ThreadData *btd=btd_array+Thread;
|
||
|
|
||
|
btd->inlen = inlen;
|
||
|
btd->in = in + id__ * BLAKE2S_BLOCKBYTES;
|
||
|
btd->S = &S->S[id__];
|
||
|
|
||
|
#ifdef RAR_SMP
|
||
|
if (ThreadNumber>1)
|
||
|
S->ThPool->AddTask(Blake2Thread,(void*)btd);
|
||
|
else
|
||
|
btd->Update();
|
||
|
#else
|
||
|
btd->Update();
|
||
|
#endif
|
||
|
id__++;
|
||
|
}
|
||
|
#ifdef RAR_SMP
|
||
|
if (S->ThPool!=NULL) // Can be NULL in -mt1 mode.
|
||
|
S->ThPool->WaitDone();
|
||
|
#endif // RAR_SMP
|
||
|
}
|
||
|
|
||
|
in += inlen - inlen % ( PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES );
|
||
|
inlen %= PARALLELISM_DEGREE * BLAKE2S_BLOCKBYTES;
|
||
|
|
||
|
if( inlen > 0 )
|
||
|
memcpy( S->buf + left, in, (size_t)inlen );
|
||
|
|
||
|
S->buflen = left + (size_t)inlen;
|
||
|
}
|
||
|
|
||
|
|
||
|
void blake2sp_final( blake2sp_state *S, byte *digest )
|
||
|
{
|
||
|
byte hash[PARALLELISM_DEGREE][BLAKE2S_OUTBYTES];
|
||
|
|
||
|
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
|
||
|
{
|
||
|
if( S->buflen > i * BLAKE2S_BLOCKBYTES )
|
||
|
{
|
||
|
size_t left = S->buflen - i * BLAKE2S_BLOCKBYTES;
|
||
|
|
||
|
if( left > BLAKE2S_BLOCKBYTES ) left = BLAKE2S_BLOCKBYTES;
|
||
|
|
||
|
blake2s_update( &S->S[i], S->buf + i * BLAKE2S_BLOCKBYTES, left );
|
||
|
}
|
||
|
|
||
|
blake2s_final( &S->S[i], hash[i] );
|
||
|
}
|
||
|
|
||
|
for( size_t i = 0; i < PARALLELISM_DEGREE; ++i )
|
||
|
blake2s_update( &S->R, hash[i], BLAKE2S_OUTBYTES );
|
||
|
|
||
|
blake2s_final( &S->R, digest );
|
||
|
}
|