tigersum/tiger.c

234 lines
5.7 KiB
C
Raw Normal View History

2014-09-19 16:19:25 +00:00
typedef unsigned long long int word64;
typedef unsigned long word32;
typedef unsigned char byte;
/* Big endian: */
//#if !(defined(__alpha)||defined(__i386__)||defined(__vax__))
//#define BIG_ENDIAN
//#endif
/* The following macro denotes that an optimization */
/* for Alpha is required. It is used only for */
/* optimization of time. Otherwise it does nothing. */
#ifdef __alpha
#define OPTIMIZE_FOR_ALPHA
#endif
/* NOTE that this code is NOT FULLY OPTIMIZED for any */
/* machine. Assembly code might be much faster on some */
/* machines, especially if the code is compiled with */
/* gcc. */
/* The number of passes of the hash function. */
/* Three passes are recommended. */
/* Use four passes when you need extra security. */
/* Must be at least three. */
#define PASSES 3
extern word64 table[4*256];
union RawAccessULL4 {
word64 ull[8];
unsigned char raw[64];
};
2014-09-19 16:19:25 +00:00
#define t1 (table)
#define t2 (table+256)
#define t3 (table+256*2)
#define t4 (table+256*3)
#define save_abc \
aa = a; \
bb = b; \
cc = c;
#ifdef OPTIMIZE_FOR_ALPHA
/* This is the official definition of round */
#define round(a,b,c,x,mul) \
c ^= x; \
a -= t1[((c)>>(0*8))&0xFF] ^ t2[((c)>>(2*8))&0xFF] ^ \
t3[((c)>>(4*8))&0xFF] ^ t4[((c)>>(6*8))&0xFF] ; \
b += t4[((c)>>(1*8))&0xFF] ^ t3[((c)>>(3*8))&0xFF] ^ \
t2[((c)>>(5*8))&0xFF] ^ t1[((c)>>(7*8))&0xFF] ; \
b *= mul;
#else
/* This code works faster when compiled on 32-bit machines */
/* (but works slower on Alpha) */
#define round(a,b,c,x,mul) \
c ^= x; \
a -= t1[(byte)(c)] ^ \
t2[(byte)(((word32)(c))>>(2*8))] ^ \
t3[(byte)((c)>>(4*8))] ^ \
t4[(byte)(((word32)((c)>>(4*8)))>>(2*8))] ; \
b += t4[(byte)(((word32)(c))>>(1*8))] ^ \
t3[(byte)(((word32)(c))>>(3*8))] ^ \
t2[(byte)(((word32)((c)>>(4*8)))>>(1*8))] ^ \
t1[(byte)(((word32)((c)>>(4*8)))>>(3*8))]; \
b *= mul;
#endif
#define pass(a,b,c,mul) \
round(a,b,c,x0,mul) \
round(b,c,a,x1,mul) \
round(c,a,b,x2,mul) \
round(a,b,c,x3,mul) \
round(b,c,a,x4,mul) \
round(c,a,b,x5,mul) \
round(a,b,c,x6,mul) \
round(b,c,a,x7,mul)
#define key_schedule \
x0 -= x7 ^ 0xA5A5A5A5A5A5A5A5LL; \
x1 ^= x0; \
x2 += x1; \
x3 -= x2 ^ ((~x1)<<19); \
x4 ^= x3; \
x5 += x4; \
x6 -= x5 ^ ((~x4)>>23); \
x7 ^= x6; \
x0 += x7; \
x1 -= x0 ^ ((~x7)<<19); \
x2 ^= x1; \
x3 += x2; \
x4 -= x3 ^ ((~x2)>>23); \
x5 ^= x4; \
x6 += x5; \
x7 -= x6 ^ 0x0123456789ABCDEFLL;
#define feedforward \
a ^= aa; \
b -= bb; \
c += cc;
#ifdef OPTIMIZE_FOR_ALPHA
/* The loop is unrolled: works better on Alpha */
#define compress \
save_abc \
pass(a,b,c,5) \
key_schedule \
pass(c,a,b,7) \
key_schedule \
pass(b,c,a,9) \
for(pass_no=3; pass_no<PASSES; pass_no++) { \
key_schedule \
pass(a,b,c,9) \
tmpa=a; a=c; c=b; b=tmpa;} \
feedforward
#else
/* loop: works better on PC and Sun (smaller cache?) */
#define compress \
save_abc \
for(pass_no=0; pass_no<PASSES; pass_no++) { \
if(pass_no != 0) {key_schedule} \
pass(a,b,c,(pass_no==0?5:pass_no==1?7:9)); \
tmpa=a; a=c; c=b; b=tmpa;} \
feedforward
#endif
#define tiger_compress_macro(str, state) \
{ \
register word64 a, b, c, tmpa; \
word64 aa, bb, cc; \
register word64 x0, x1, x2, x3, x4, x5, x6, x7; \
int pass_no; \
\
a = state[0]; \
b = state[1]; \
c = state[2]; \
\
x0=str[0]; x1=str[1]; x2=str[2]; x3=str[3]; \
x4=str[4]; x5=str[5]; x6=str[6]; x7=str[7]; \
\
compress; \
\
state[0] = a; \
state[1] = b; \
state[2] = c; \
}
/* The compress function is a function. Requires smaller cache? */
void tiger_compress(word64 *str, word64 state[3])
2014-09-19 16:19:25 +00:00
{
tiger_compress_macro(((word64*)str), ((word64*)state));
}
#ifdef OPTIMIZE_FOR_ALPHA
/* The compress function is inlined: works better on Alpha. */
/* Still leaves the function above in the code, in case some other */
/* module calls it directly. */
#define tiger_compress(str, state) \
tiger_compress_macro(((word64*)str), ((word64*)state))
#endif
void tiger_init(word64 res[3])
2014-09-19 16:19:25 +00:00
{
res[0]=0x0123456789ABCDEFLL;
res[1]=0xFEDCBA9876543210LL;
res[2]=0xF096A5B4C3B2E187LL;
}
void tiger_chunk(word64 *str, word64 length, word64 res[3])
{
register word64 i;
#ifdef BIG_ENDIAN
register word64 j;
unsigned char temp[64];
#endif
2014-09-19 16:19:25 +00:00
for(i=length; i>=64; i-=64)
{
#ifdef BIG_ENDIAN
for(j=0; j<64; j++)
temp[j^7] = ((byte*)str)[j];
tiger_compress(((word64*)temp), res);
#else
tiger_compress(str, res);
#endif
str += 8;
}
}
void tiger_last_chunk(word64 *str, word64 length, word64 reallength, word64 res[3], byte pad)
{
const word64 i = length & 0x3f;
register word64 j;
union RawAccessULL4 temp;
2014-09-19 16:19:25 +00:00
#ifdef BIG_ENDIAN
for(j=0; j<i; j++)
temp.raw[j^7] = ((byte*)str)[j];
2014-09-19 16:19:25 +00:00
temp.raw[j^7] = pad;
2014-09-19 16:19:25 +00:00
j++;
for(; j&7; j++)
temp.raw[j^7] = 0;
2014-09-19 16:19:25 +00:00
#else
for(j=0; j<i; j++)
temp.raw[j] = ((byte*)str)[j];
2014-09-19 16:19:25 +00:00
temp.raw[j++] = pad;
2014-09-19 16:19:25 +00:00
for(; j&7; j++)
temp.raw[j] = 0;
2014-09-19 16:19:25 +00:00
#endif
if(j>56)
{
for(; j<64; j++)
temp.raw[j] = 0;
tiger_compress((temp.ull), res);
2014-09-19 16:19:25 +00:00
j=0;
}
for(; j<56; j++)
temp.raw[j] = 0;
temp.ull[7] = reallength<<3;
tiger_compress((temp.ull), res);
}
void tiger(word64 *str, word64 length, word64 res[3], byte pad)
{
const word64 proc_length = length & ~0x3fULL;
tiger_init(res);
tiger_chunk(str, proc_length, res);
tiger_last_chunk(str + (proc_length / 8), length - proc_length, length, res, pad);
2014-09-19 16:19:25 +00:00
}