1 module crypto.blake2.blake2s_round; 2 3 import std.conv: to; 4 import inteli.emmintrin; 5 6 7 package: 8 pure nothrow @nogc: 9 10 11 alias LOADU = _mm_loadu_si128; 12 alias STOREU = _mm_storeu_si128; 13 alias TOF = _mm_castsi128_ps; 14 alias TOI = _mm_castps_si128; 15 16 17 __m128i _mm_roti_epi32( in __m128i r, in int c ) 18 @safe 19 { 20 return _mm_xor_si128(_mm_srli_epi32((r), -(c) ),_mm_slli_epi32((r), 32-(-(c)) )); 21 } 22 23 version(LDC) 24 { 25 template tmplG1 (int buf) 26 { 27 const tmplG1 = ` 28 rows[0] = _mm_add_epi32( _mm_add_epi32( rows[0], bufs[` ~to!string(buf)~ `]), rows[1] ); 29 rows[3] = _mm_xor_si128( rows[3], rows[0] ); 30 rows[3] = _mm_roti_epi32(rows[3], -16); 31 rows[2] = _mm_add_epi32( rows[2], rows[3] ); 32 rows[1] = _mm_xor_si128( rows[1], rows[2] ); 33 rows[1] = _mm_roti_epi32(rows[1], -12); 34 `; 35 } 36 37 template tmplG2 (int buf) 38 { 39 const tmplG2 = ` 40 rows[0] = _mm_add_epi32( _mm_add_epi32( rows[0], bufs[` ~to!string(buf)~ `]), rows[1] ); 41 rows[3] = _mm_xor_si128( rows[3], rows[0] ); 42 rows[3] = _mm_roti_epi32(rows[3], -8); 43 rows[2] = _mm_add_epi32( rows[2], rows[3] ); 44 rows[1] = _mm_xor_si128( rows[1], rows[2] ); 45 rows[1] = _mm_roti_epi32(rows[1], -7); 46 `; 47 } 48 } 49 else 50 { 51 void fG1 (ref __m128i[4] rows, in __m128i buf) 52 { 53 rows[0] = _mm_add_epi32(_mm_add_epi32(rows[0], buf), rows[1] ); 54 rows[3] = _mm_xor_si128(rows[3], rows[0] ); 55 rows[3] = _mm_roti_epi32(rows[3], -16); 56 rows[2] = _mm_add_epi32(rows[2], rows[3] ); 57 rows[1] = _mm_xor_si128(rows[1], rows[2] ); 58 rows[1] = _mm_roti_epi32(rows[1], -12); 59 } 60 61 void fG2 (ref __m128i[4] rows, in __m128i buf) 62 { 63 rows[0] = _mm_add_epi32(_mm_add_epi32(rows[0], buf), rows[1] ); 64 rows[3] = _mm_xor_si128(rows[3], rows[0] ); 65 rows[3] = _mm_roti_epi32(rows[3], -8 ); 66 rows[2] = _mm_add_epi32(rows[2], rows[3] ); 67 rows[1] = _mm_xor_si128(rows[1], rows[2] ); 68 rows[1] = _mm_roti_epi32(rows[1], -7 ); 69 } 70 } 71 72 73 immutable DIAGONALIZE = ` 74 rows[0] = _mm_shuffle_epi32!(_MM_SHUFFLE(2,1,0,3))( rows[0] ); 75 rows[3] = _mm_shuffle_epi32!(_MM_SHUFFLE(1,0,3,2))( rows[3] ); 76 rows[2] = _mm_shuffle_epi32!(_MM_SHUFFLE(0,3,2,1))( rows[2] ); 77 `; 78 79 immutable UNDIAGONALIZE = ` 80 rows[0] = _mm_shuffle_epi32!(_MM_SHUFFLE(0,3,2,1))( rows[0] ); 81 rows[3] = _mm_shuffle_epi32!(_MM_SHUFFLE(1,0,3,2))( rows[3] ); 82 rows[2] = _mm_shuffle_epi32!(_MM_SHUFFLE(2,1,0,3))( rows[2] ); 83 `; 84 85 immutable matrix = [ 86 [ 87 [6, 4, 2, 0 ], 88 [7, 5, 3, 1 ], 89 [12, 10, 8, 14], 90 [13, 11, 9, 15] 91 ], 92 [ 93 [13, 9, 4, 14], 94 [6, 15, 8, 10], 95 [11, 0, 1, 5 ], 96 [7, 2, 12, 3 ] 97 ], 98 [ 99 [15, 5, 12, 11], 100 [13, 2, 0, 8 ], 101 [7, 3, 10, 9 ], 102 [1, 6, 14, 4 ] 103 ], 104 [ 105 [11, 13, 3, 7 ], 106 [14, 12, 1, 9 ], 107 [4, 5, 2, 15], 108 [0, 10, 6, 8 ] 109 ], 110 [ 111 [10, 2, 5, 9 ], 112 [15, 4, 7, 0 ], 113 [6, 11, 14, 3 ], 114 [8, 12, 1, 13] 115 ], 116 [ 117 [8, 0, 6, 2 ], 118 [3, 11, 10, 12], 119 [15, 7, 4, 1 ], 120 [14, 5, 13, 9 ] 121 ], 122 [ 123 [4, 14, 1, 12], 124 [10, 13, 15, 5 ], 125 [9, 6, 0, 8 ], 126 [2, 3, 7, 11] 127 ], 128 [ 129 [3, 12, 7, 13], 130 [9, 1, 14, 11], 131 [8, 15, 5, 2 ], 132 [6, 4, 0, 10] 133 ], 134 [ 135 [0, 11, 14, 6 ], 136 [8, 3, 9, 15], 137 [1, 13, 12, 10], 138 [4, 7, 2, 5 ] 139 ], 140 [ 141 [1, 7, 8, 10], 142 [5, 6, 4, 2 ], 143 [3, 9, 15, 13], 144 [12, 14, 11, 0 ] 145 ] 146 ]; 147 148 149 version(LDC) 150 { 151 template tmplLoadMsg (int r, int c, int buf) 152 { 153 const cell = matrix[r][c]; 154 const tmplLoadMsg = " 155 bufs["~to!string(buf)~"] = _mm_set_epi32( 156 m["~to!string(cell[0])~"], 157 m["~to!string(cell[1])~"], 158 m["~to!string(cell[2])~"], 159 m["~to!string(cell[3])~"] 160 ); 161 "; 162 } 163 template tmplRound (int r) 164 { 165 const tmplRound = 166 tmplLoadMsg!(r, 0, 0) ~ 167 tmplG1!0 ~ 168 tmplLoadMsg!(r, 1, 1) ~ 169 tmplG2!1 ~ 170 DIAGONALIZE ~ 171 tmplLoadMsg!(r, 2, 2) ~ 172 tmplG1!2 ~ 173 tmplLoadMsg!(r, 3, 3) ~ 174 tmplG2!3 ~ 175 UNDIAGONALIZE 176 ; 177 } 178 179 } 180 else 181 { 182 void loadMsg (in const(uint)[16] m, in int r, in int c, out __m128i buf) 183 { 184 const cell = matrix[r][c]; 185 buf = _mm_set_epi32(m[cell[0]], m[cell[1]], m[cell[2]], m[cell[3]]); 186 } 187 188 void round (in const(uint)[16] m, in int r, ref __m128i[4] rows, ref __m128i[4] bufs) 189 { 190 loadMsg(m, r, 0, bufs[0]); 191 fG1(rows, bufs[0]); 192 loadMsg(m, r, 1, bufs[1]); 193 fG2(rows, bufs[1]); 194 mixin(DIAGONALIZE); 195 loadMsg(m, r, 2, bufs[2]); 196 fG1(rows, bufs[2]); 197 loadMsg(m, r, 3, bufs[3]); 198 fG2(rows, bufs[3]); 199 mixin(UNDIAGONALIZE); 200 } 201 }