@@ -39,19 +39,49 @@ __constant static const sph_u64 blake2b_IV[8] =
39
39
};
40
40
41
41
/*Blake2b's rotation*/
42
- static inline sph_u64 rotr64 ( const sph_u64 w , const unsigned c ){
43
- return rotate (w , (ulong )(64 - c ));
44
- }
45
42
46
- /*Blake2b's G function*/
43
+ static inline uint2 ror2 (uint2 v , unsigned a ) {
44
+ uint2 result ;
45
+ unsigned n = 64 - a ;
46
+ if (n == 32 ) { return (uint2 )(v .y ,v .x ); }
47
+ if (n < 32 ) {
48
+ result .y = ((v .y << (n )) | (v .x >> (32 - n )));
49
+ result .x = ((v .x << (n )) | (v .y >> (32 - n )));
50
+ }
51
+ else {
52
+ result .y = ((v .x << (n - 32 )) | (v .y >> (64 - n )));
53
+ result .x = ((v .y << (n - 32 )) | (v .x >> (64 - n )));
54
+ }
55
+ return result ;
56
+ }
57
+ static inline uint2 ror2l (uint2 v , unsigned a ) {
58
+ uint2 result ;
59
+ result .y = ((v .x << (32 - a )) | (v .y >> (a )));
60
+ result .x = ((v .y << (32 - a )) | (v .x >> (a )));
61
+ return result ;
62
+ }
63
+ static inline uint2 ror2r (uint2 v , unsigned a ) {
64
+ uint2 result ;
65
+ result .y = ((v .y << (64 - a )) | (v .x >> (a - 32 )));
66
+ result .x = ((v .x << (64 - a )) | (v .y >> (a - 32 )));
67
+ return result ;
68
+ }
69
+ /*
47
70
#define G(a,b,c,d) \
48
71
do { \
49
- a += b; d ^= a; d = SPH_ROTR64(d, 32); \
50
- c += d; b ^= c; b = SPH_ROTR64(b, 24); \
51
- a += b; d ^= a; d = SPH_ROTR64(d, 16); \
52
- c += d; b ^= c; b = SPH_ROTR64(b, 63); \
72
+ a = as_uint2(as_ulong(a)+as_ulong(b)); d ^= a; d = d.yx; \
73
+ c = as_uint2(as_ulong(c)+as_ulong(d)); b ^= c; b = ror2l(b, 24); \
74
+ a = as_uint2(as_ulong(a)+as_ulong(b)); d ^= a; d = ror2l(d, 16); \
75
+ c = as_uint2(as_ulong(c)+as_ulong(d)); b ^= c; b = ror2r(b, 63); \
76
+ } while(0)
77
+ */
78
+ #define G (a ,b ,c ,d ) \
79
+ do { \
80
+ a = as_uint2(as_ulong(a)+as_ulong(b)); d ^= a; d = d.yx; \
81
+ c = as_uint2(as_ulong(c)+as_ulong(d)); b ^= c; b = as_uint2(as_uchar8(b).s34567012); \
82
+ a = as_uint2(as_ulong(a)+as_ulong(b)); d ^= a; d = ror2l(d, 16); \
83
+ c = as_uint2(as_ulong(c)+as_ulong(d)); b ^= c; b = ror2r(b, 63); \
53
84
} while(0)
54
-
55
85
56
86
/*One Round of the Blake2b's compression function*/
57
87
#define round_lyra (v ) \
@@ -72,7 +102,7 @@ c += d; b ^= c; b = SPH_ROTR64(b, 63); \
72
102
for (int i = 0; i < 8; i++) \
73
103
{ \
74
104
\
75
- for (int j = 0; j < 12; j++) {state[j] ^= Matrix[12 * i + j][rowIn] + Matrix[12 * i + j][rowInOut];} \
105
+ for (int j = 0; j < 12; j++) {state[j] ^= as_uint2(as_ulong( Matrix[12 * i + j][rowIn]) + as_ulong( Matrix[12 * i + j][rowInOut])) ;} \
76
106
round_lyra(state); \
77
107
for (int j = 0; j < 12; j++) {Matrix[j + 84 - 12 * i][rowOut] = Matrix[12 * i + j][rowIn] ^ state[j];} \
78
108
\
@@ -97,7 +127,7 @@ c += d; b ^= c; b = SPH_ROTR64(b, 63); \
97
127
for (int i = 0; i < 8; i++) \
98
128
{ \
99
129
for (int j = 0; j < 12; j++) \
100
- state[j] ^= Matrix[12 * i + j][rowIn] + Matrix[12 * i + j][rowInOut]; \
130
+ state[j] ^= as_uint2(as_ulong( Matrix[12 * i + j][rowIn]) + as_ulong( Matrix[12 * i + j][rowInOut])) ; \
101
131
\
102
132
round_lyra(state); \
103
133
for (int j = 0; j < 12; j++) {Matrix[j + 12 * i][rowOut] ^= state[j];} \
@@ -142,4 +172,4 @@ c += d; b ^= c; b = SPH_ROTR64(b, 63); \
142
172
round_lyra(state); \
143
173
round_lyra(state); \
144
174
round_lyra(state); \
145
- }
175
+ }
0 commit comments