randombit / botan

@@ -305,9 +305,13 @@
Loading
305 305
   {
306 306
   if(test_name.substr(0, 6) == "pkcs11")
307 307
      return true;
308 -
   if(test_name == "block" || test_name == "hash" || test_name == "mac" || test_name == "stream" || test_name == "aead")
309 -
      return true;
310 -
   return false;
308 +
309 +
   return (test_name == "block" ||
310 +
           test_name == "hash" ||
311 +
           test_name == "mac" ||
312 +
           test_name == "stream" ||
313 +
           test_name == "aead" ||
314 +
           test_name == "argon2");
311 315
   }
312 316
313 317
#endif

@@ -16,7 +16,11 @@
Loading
16 16
   m_M(M),
17 17
   m_t(t),
18 18
   m_p(p)
19 -
   {}
19 +
   {
20 +
   BOTAN_ARG_CHECK(m_p >= 1 && m_p <= 128, "Invalid Argon2 threads parameter");
21 +
   BOTAN_ARG_CHECK(m_M >= 8*m_p && m_M <= 8192*1024, "Invalid Argon2 M parameter");
22 +
   BOTAN_ARG_CHECK(m_t >= 1, "Invalid Argon2 t parameter");
23 +
   }
20 24
21 25
void Argon2::derive_key(uint8_t output[], size_t output_len,
22 26
                        const char* password, size_t password_len,
@@ -83,9 +87,13 @@
Loading
83 87
   Timer timer("Argon2");
84 88
   const auto tune_time = BOTAN_PBKDF_TUNING_TIME;
85 89
90 +
   auto pwhash = this->from_params(tune_M, t, p);
91 +
86 92
   timer.run_until_elapsed(tune_time, [&]() {
87 93
      uint8_t output[64] = { 0 };
88 -
      argon2(output, sizeof(output), "test", 4, nullptr, 0, nullptr, 0, nullptr, 0, m_family, p, tune_M, t);
94 +
      pwhash->derive_key(output, sizeof(output),
95 +
                         "test", 4,
96 +
                         nullptr, 0);
89 97
      });
90 98
91 99
   if(timer.events() == 0 || timer.value() == 0)

@@ -0,0 +1,260 @@
Loading
1 +
/**
2 +
* (C) 2022 Jack Lloyd
3 +
*
4 +
* Botan is released under the Simplified BSD License (see license.txt)
5 +
*/
6 +
7 +
#include <botan/internal/argon2_ssse3.h>
8 +
#include <tmmintrin.h>
9 +
10 +
namespace Botan {
11 +
12 +
namespace {
13 +
14 +
class SIMD_2x64 final
15 +
   {
16 +
   public:
17 +
      SIMD_2x64& operator=(const SIMD_2x64& other) = default;
18 +
      SIMD_2x64(const SIMD_2x64& other) = default;
19 +
20 +
      SIMD_2x64& operator=(SIMD_2x64&& other) = default;
21 +
      SIMD_2x64(SIMD_2x64&& other) = default;
22 +
23 +
      SIMD_2x64() // zero initialized
24 +
         {
25 +
         m_simd = _mm_setzero_si128();
26 +
         }
27 +
28 +
      static SIMD_2x64 load_le(const void* in)
29 +
         {
30 +
         return SIMD_2x64(_mm_loadu_si128(reinterpret_cast<const __m128i*>(in)));
31 +
         }
32 +
33 +
      void store_le(uint64_t out[2]) const
34 +
         {
35 +
         this->store_le(reinterpret_cast<uint8_t*>(out));
36 +
         }
37 +
38 +
      void store_le(uint8_t out[]) const
39 +
         {
40 +
         _mm_storeu_si128(reinterpret_cast<__m128i*>(out), m_simd);
41 +
         }
42 +
43 +
      SIMD_2x64 operator+(const SIMD_2x64& other) const
44 +
         {
45 +
         SIMD_2x64 retval(*this);
46 +
         retval += other;
47 +
         return retval;
48 +
         }
49 +
50 +
      SIMD_2x64 operator^(const SIMD_2x64& other) const
51 +
         {
52 +
         SIMD_2x64 retval(*this);
53 +
         retval ^= other;
54 +
         return retval;
55 +
         }
56 +
57 +
      void operator+=(const SIMD_2x64& other)
58 +
         {
59 +
         m_simd = _mm_add_epi64(m_simd, other.m_simd);
60 +
         }
61 +
62 +
      void operator^=(const SIMD_2x64& other)
63 +
         {
64 +
         m_simd = _mm_xor_si128(m_simd, other.m_simd);
65 +
         }
66 +
67 +
      template<size_t ROT>
68 +
      BOTAN_FUNC_ISA("ssse3")
69 +
      SIMD_2x64 rotr() const
70 +
         {
71 +
         static_assert(ROT > 0 && ROT < 64, "Invalid rotation constant");
72 +
73 +
         if(ROT == 16)
74 +
            {
75 +
            auto tab = _mm_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9);
76 +
            return SIMD_2x64(_mm_shuffle_epi8(m_simd, tab));
77 +
            }
78 +
         else if(ROT == 24)
79 +
            {
80 +
            auto tab = _mm_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10);
81 +
            return SIMD_2x64(_mm_shuffle_epi8(m_simd, tab));
82 +
         }
83 +
         else if(ROT == 32)
84 +
            {
85 +
            auto tab = _mm_setr_epi8(4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11);
86 +
            return SIMD_2x64(_mm_shuffle_epi8(m_simd, tab));
87 +
            }
88 +
         else
89 +
            {
90 +
            return SIMD_2x64(_mm_or_si128(_mm_srli_epi64(m_simd, static_cast<int>(ROT)),
91 +
                                          _mm_slli_epi64(m_simd, static_cast<int>(64-ROT))));
92 +
            }
93 +
         }
94 +
95 +
      template<size_t ROT>
96 +
      SIMD_2x64 rotl() const
97 +
         {
98 +
         return this->rotr<64-ROT>();
99 +
         }
100 +
101 +
      // Argon2 specific operation
102 +
      static SIMD_2x64 mul2_32(SIMD_2x64 x, SIMD_2x64 y)
103 +
         {
104 +
         const __m128i m = _mm_mul_epu32(x.m_simd, y.m_simd);
105 +
         return SIMD_2x64(_mm_add_epi64(m, m));
106 +
         }
107 +
108 +
      template<size_t T>
109 +
      BOTAN_FUNC_ISA("ssse3")
110 +
      static SIMD_2x64 alignr(SIMD_2x64 a, SIMD_2x64 b)
111 +
         {
112 +
         static_assert(T > 0 && T < 16, "Invalid alignr constant");
113 +
         return SIMD_2x64(_mm_alignr_epi8(a.m_simd, b.m_simd, T));
114 +
         }
115 +
116 +
      // Argon2 specific
117 +
      static void twist(
118 +
         SIMD_2x64& B0,
119 +
         SIMD_2x64& B1,
120 +
         SIMD_2x64& C0,
121 +
         SIMD_2x64& C1,
122 +
         SIMD_2x64& D0,
123 +
         SIMD_2x64& D1)
124 +
         {
125 +
         SIMD_2x64 T0, T1;
126 +
127 +
         T0 = SIMD_2x64::alignr<8>(B1, B0);
128 +
         T1 = SIMD_2x64::alignr<8>(B0, B1);
129 +
         B0 = T0;
130 +
         B1 = T1;
131 +
132 +
         T0 = C0;
133 +
         C0 = C1;
134 +
         C1 = T0;
135 +
136 +
         T0 = SIMD_2x64::alignr<8>(D0, D1);
137 +
         T1 = SIMD_2x64::alignr<8>(D1, D0);
138 +
         D0 = T0;
139 +
         D1 = T1;
140 +
         }
141 +
142 +
      // Argon2 specific
143 +
      static void untwist(
144 +
         SIMD_2x64& B0,
145 +
         SIMD_2x64& B1,
146 +
         SIMD_2x64& C0,
147 +
         SIMD_2x64& C1,
148 +
         SIMD_2x64& D0,
149 +
         SIMD_2x64& D1)
150 +
         {
151 +
         SIMD_2x64 T0, T1;
152 +
153 +
         T0 = SIMD_2x64::alignr<8>(B0, B1);
154 +
         T1 = SIMD_2x64::alignr<8>(B1, B0);
155 +
         B0 = T0;
156 +
         B1 = T1;
157 +
158 +
         T0 = C0;
159 +
         C0 = C1;
160 +
         C1 = T0;
161 +
162 +
         T0 = SIMD_2x64::alignr<8>(D1, D0);
163 +
         T1 = SIMD_2x64::alignr<8>(D0, D1);
164 +
         D0 = T0;
165 +
         D1 = T1;
166 +
         }
167 +
168 +
      explicit SIMD_2x64(__m128i x) : m_simd(x) {}
169 +
   private:
170 +
      __m128i m_simd;
171 +
   };
172 +
173 +
BOTAN_FORCE_INLINE void blamka_G(
174 +
   SIMD_2x64& A0,
175 +
   SIMD_2x64& A1,
176 +
   SIMD_2x64& B0,
177 +
   SIMD_2x64& B1,
178 +
   SIMD_2x64& C0,
179 +
   SIMD_2x64& C1,
180 +
   SIMD_2x64& D0,
181 +
   SIMD_2x64& D1)
182 +
   {
183 +
   A0 += B0 + SIMD_2x64::mul2_32(A0, B0);
184 +
   A1 += B1 + SIMD_2x64::mul2_32(A1, B1);
185 +
   D0 ^= A0;
186 +
   D1 ^= A1;
187 +
   D0 = D0.rotr<32>();
188 +
   D1 = D1.rotr<32>();
189 +
190 +
   C0 += D0 + SIMD_2x64::mul2_32(C0, D0);
191 +
   C1 += D1 + SIMD_2x64::mul2_32(C1, D1);
192 +
   B0 ^= C0;
193 +
   B1 ^= C1;
194 +
   B0 = B0.rotr<24>();
195 +
   B1 = B1.rotr<24>();
196 +
197 +
   A0 += B0 + SIMD_2x64::mul2_32(A0, B0);
198 +
   A1 += B1 + SIMD_2x64::mul2_32(A1, B1);
199 +
   D0 ^= A0;
200 +
   D1 ^= A1;
201 +
   D0 = D0.rotr<16>();
202 +
   D1 = D1.rotr<16>();
203 +
204 +
   C0 += D0 + SIMD_2x64::mul2_32(C0, D0);
205 +
   C1 += D1 + SIMD_2x64::mul2_32(C1, D1);
206 +
   B0 ^= C0;
207 +
   B1 ^= C1;
208 +
   B0 = B0.rotr<63>();
209 +
   B1 = B1.rotr<63>();
210 +
   }
211 +
212 +
BOTAN_FORCE_INLINE void blamka_R(
213 +
   SIMD_2x64& A0,
214 +
   SIMD_2x64& A1,
215 +
   SIMD_2x64& B0,
216 +
   SIMD_2x64& B1,
217 +
   SIMD_2x64& C0,
218 +
   SIMD_2x64& C1,
219 +
   SIMD_2x64& D0,
220 +
   SIMD_2x64& D1)
221 +
   {
222 +
   blamka_G(A0, A1, B0, B1, C0, C1, D0, D1);
223 +
224 +
   SIMD_2x64::twist(B0, B1, C0, C1, D0, D1);
225 +
   blamka_G(A0, A1, B0, B1, C0, C1, D0, D1);
226 +
   SIMD_2x64::untwist(B0, B1, C0, C1, D0, D1);
227 +
   }
228 +
229 +
}
230 +
231 +
void blamka_ssse3(uint64_t T[128])
232 +
   {
233 +
   for(size_t i = 0; i != 8; ++i)
234 +
      {
235 +
      SIMD_2x64 Tv[8];
236 +
      for(size_t j = 0; j != 8; ++j)
237 +
         Tv[j] = SIMD_2x64::load_le(&T[2*(8*i+j)]);
238 +
239 +
      blamka_R(Tv[0], Tv[1], Tv[2], Tv[3],
240 +
               Tv[4], Tv[5], Tv[6], Tv[7]);
241 +
242 +
      for(size_t j = 0; j != 8; ++j)
243 +
         Tv[j].store_le(&T[2*(8*i+j)]);
244 +
      }
245 +
246 +
   for(size_t i = 0; i != 8; ++i)
247 +
      {
248 +
      SIMD_2x64 Tv[8];
249 +
      for(size_t j = 0; j != 8; ++j)
250 +
         Tv[j] = SIMD_2x64::load_le(&T[2*(i+8*j)]);
251 +
252 +
      blamka_R(Tv[0], Tv[1], Tv[2], Tv[3],
253 +
               Tv[4], Tv[5], Tv[6], Tv[7]);
254 +
255 +
      for(size_t j = 0; j != 8; ++j)
256 +
         Tv[j].store_le(&T[2*(i+8*j)]);
257 +
      }
258 +
   }
259 +
260 +
}

@@ -1,5 +1,5 @@
Loading
1 1
/**
2 -
* (C) 2018,2019 Jack Lloyd
2 +
* (C) 2018,2019,2022 Jack Lloyd
3 3
*
4 4
* Botan is released under the Simplified BSD License (see license.txt)
5 5
*/
@@ -11,19 +11,29 @@
Loading
11 11
#include <botan/rotate.h>
12 12
#include <botan/exceptn.h>
13 13
14 +
#if defined(BOTAN_HAS_THREAD_UTILS)
15 +
   #include <botan/internal/thread_pool.h>
16 +
#endif
17 +
18 +
#if defined(BOTAN_HAS_ARGON2_SSSE3)
19 +
   #include <botan/internal/argon2_ssse3.h>
20 +
   #include <botan/cpuid.h>
21 +
#endif
22 +
14 23
namespace Botan {
15 24
16 25
namespace {
17 26
18 -
static const size_t SYNC_POINTS = 4;
27 +
const size_t SYNC_POINTS = 4;
19 28
20 -
secure_vector<uint8_t> argon2_H0(HashFunction& blake2b,
21 -
                                 size_t output_len,
22 -
                                 const char* password, size_t password_len,
23 -
                                 const uint8_t salt[], size_t salt_len,
24 -
                                 const uint8_t key[], size_t key_len,
25 -
                                 const uint8_t ad[], size_t ad_len,
26 -
                                 size_t y, size_t p, size_t M, size_t t)
29 +
void argon2_H0(uint8_t H0[64],
30 +
               HashFunction& blake2b,
31 +
               size_t output_len,
32 +
               const char* password, size_t password_len,
33 +
               const uint8_t salt[], size_t salt_len,
34 +
               const uint8_t key[], size_t key_len,
35 +
               const uint8_t ad[], size_t ad_len,
36 +
               size_t y, size_t p, size_t M, size_t t)
27 37
   {
28 38
   const uint8_t v = 19; // Argon2 version code
29 39
@@ -46,37 +56,7 @@
Loading
46 56
   blake2b.update_le(static_cast<uint32_t>(ad_len));
47 57
   blake2b.update(ad, ad_len);
48 58
49 -
   return blake2b.final();
50 -
   }
51 -
52 -
void Htick(secure_vector<uint8_t>& T,
53 -
           uint8_t output[],
54 -
           size_t output_len,
55 -
           HashFunction& blake2b,
56 -
           const secure_vector<uint8_t>& H0,
57 -
           size_t p0, size_t p1)
58 -
   {
59 -
   BOTAN_ASSERT_NOMSG(output_len % 64 == 0);
60 -
61 -
   blake2b.update_le(static_cast<uint32_t>(output_len));
62 -
   blake2b.update(H0);
63 -
   blake2b.update_le(static_cast<uint32_t>(p0));
64 -
   blake2b.update_le(static_cast<uint32_t>(p1));
65 -
66 -
   blake2b.final(&T[0]);
67 -
68 -
   while(output_len > 64)
69 -
      {
70 -
      copy_mem(output, &T[0], 32);
71 -
      output_len -= 32;
72 -
      output += 32;
73 -
74 -
      blake2b.update(T);
75 -
      blake2b.final(&T[0]);
76 -
      }
77 -
78 -
   if(output_len > 0)
79 -
      copy_mem(output, &T[0], output_len);
59 +
   blake2b.final(H0);
80 60
   }
81 61
82 62
void extract_key(uint8_t output[], size_t output_len,
@@ -85,12 +65,12 @@
Loading
85 65
   {
86 66
   const size_t lanes = memory / threads;
87 67
88 -
   secure_vector<uint64_t> sum(128);
68 +
   uint64_t sum[128] = { 0 };
89 69
90 70
   for(size_t lane = 0; lane != threads; ++lane)
91 71
      {
92 -
      size_t start = 128*(lane * lanes + lanes - 1);
93 -
      size_t end = 128*(lane * lanes + lanes);
72 +
      const size_t start = 128*(lane * lanes + lanes - 1);
73 +
      const size_t end = 128*(lane * lanes + lanes);
94 74
95 75
      for(size_t j = start; j != end; ++j)
96 76
         {
@@ -98,14 +78,12 @@
Loading
98 78
         }
99 79
      }
100 80
101 -
   secure_vector<uint8_t> sum8(1024);
102 -
   copy_out_le(sum8.data(), 1024, sum.data());
103 -
104 81
   if(output_len <= 64)
105 82
      {
106 83
      std::unique_ptr<HashFunction> blake2b = HashFunction::create_or_throw("BLAKE2b(" + std::to_string(output_len*8) + ")");
107 84
      blake2b->update_le(static_cast<uint32_t>(output_len));
108 -
      blake2b->update(sum8.data(), sum8.size());
85 +
      for(size_t i = 0; i != 128; ++i)
86 +
         blake2b->update_le(sum[i]);
109 87
      blake2b->final(output);
110 88
      }
111 89
   else
@@ -114,7 +92,8 @@
Loading
114 92
115 93
      std::unique_ptr<HashFunction> blake2b = HashFunction::create_or_throw("BLAKE2b(512)");
116 94
      blake2b->update_le(static_cast<uint32_t>(output_len));
117 -
      blake2b->update(sum8.data(), sum8.size());
95 +
      for(size_t i = 0; i != 128; ++i)
96 +
         blake2b->update_le(sum[i]);
118 97
      blake2b->final(&T[0]);
119 98
120 99
      while(output_len > 64)
@@ -146,38 +125,41 @@
Loading
146 125
147 126
void init_blocks(secure_vector<uint64_t>& B,
148 127
                 HashFunction& blake2b,
149 -
                 const secure_vector<uint8_t>& H0,
128 +
                 const uint8_t H0[64],
150 129
                 size_t memory,
151 130
                 size_t threads)
152 131
   {
153 132
   BOTAN_ASSERT_NOMSG(B.size() >= threads*256);
154 133
155 -
   secure_vector<uint8_t> H(1024);
156 -
   secure_vector<uint8_t> T(blake2b.output_length());
157 -
158 134
   for(size_t i = 0; i != threads; ++i)
159 135
      {
160 136
      const size_t B_off = i * (memory / threads);
161 137
162 138
      BOTAN_ASSERT_NOMSG(B.size() >= 128*(B_off+2));
163 139
164 -
      Htick(T, &H[0], H.size(), blake2b, H0, 0, i);
165 -
166 -
      for(size_t j = 0; j != 128; ++j)
140 +
      for(size_t j = 0; j != 2; ++j)
167 141
         {
168 -
         B[128*B_off+j] = load_le<uint64_t>(H.data(), j);
169 -
         }
142 +
         uint8_t T[64] = { 0 };
170 143
171 -
      Htick(T, &H[0], H.size(), blake2b, H0, 1, i);
144 +
         blake2b.update_le(static_cast<uint32_t>(1024));
145 +
         blake2b.update(H0, 64);
146 +
         blake2b.update_le(static_cast<uint32_t>(j));
147 +
         blake2b.update_le(static_cast<uint32_t>(i));
148 +
         blake2b.final(T);
172 149
173 -
      for(size_t j = 0; j != 128; ++j)
174 -
         {
175 -
         B[128*(B_off+1)+j] = load_le<uint64_t>(H.data(), j);
150 +
         for(size_t k = 0; k != 30; ++k)
151 +
            {
152 +
            load_le(&B[128*(B_off+j)+4*k], T, 32 / 8);
153 +
            blake2b.update(T, 64);
154 +
            blake2b.final(T);
155 +
            }
156 +
157 +
         load_le(&B[128*(B_off+j)+4*30], T, 64 / 8);
176 158
         }
177 159
      }
178 160
   }
179 161
180 -
inline void blamka_G(uint64_t& A, uint64_t& B, uint64_t& C, uint64_t& D)
162 +
BOTAN_FORCE_INLINE void blamka_G(uint64_t& A, uint64_t& B, uint64_t& C, uint64_t& D)
181 163
   {
182 164
   A += B + (static_cast<uint64_t>(2) * static_cast<uint32_t>(A)) * static_cast<uint32_t>(B);
183 165
   D = rotr<32>(A ^ D);
@@ -192,59 +174,46 @@
Loading
192 174
   B = rotr<63>(B ^ C);
193 175
   }
194 176
195 -
inline void blamka(uint64_t& V0, uint64_t& V1, uint64_t& V2, uint64_t& V3,
196 -
                   uint64_t& V4, uint64_t& V5, uint64_t& V6, uint64_t& V7,
197 -
                   uint64_t& V8, uint64_t& V9, uint64_t& VA, uint64_t& VB,
198 -
                   uint64_t& VC, uint64_t& VD, uint64_t& VE, uint64_t& VF)
199 -
   {
200 -
   blamka_G(V0, V4, V8, VC);
201 -
   blamka_G(V1, V5, V9, VD);
202 -
   blamka_G(V2, V6, VA, VE);
203 -
   blamka_G(V3, V7, VB, VF);
204 -
205 -
   blamka_G(V0, V5, VA, VF);
206 -
   blamka_G(V1, V6, VB, VC);
207 -
   blamka_G(V2, V7, V8, VD);
208 -
   blamka_G(V3, V4, V9, VE);
209 -
   }
210 -
211 -
void process_block_xor(secure_vector<uint64_t>& T,
212 -
                       secure_vector<uint64_t>& B,
213 -
                       size_t offset,
214 -
                       size_t prev,
215 -
                       size_t new_offset)
177 +
void blamka(uint64_t T[128])
216 178
   {
217 -
   for(size_t i = 0; i != 128; ++i)
218 -
      T[i] = B[128*prev+i] ^ B[128*new_offset+i];
179 +
#if defined(BOTAN_HAS_ARGON2_SSSE3)
180 +
   if(CPUID::has_ssse3())
181 +
      return blamka_ssse3(T);
182 +
#endif
219 183
220 184
   for(size_t i = 0; i != 128; i += 16)
221 185
      {
222 -
      blamka(T[i+ 0], T[i+ 1], T[i+ 2], T[i+ 3],
223 -
             T[i+ 4], T[i+ 5], T[i+ 6], T[i+ 7],
224 -
             T[i+ 8], T[i+ 9], T[i+10], T[i+11],
225 -
             T[i+12], T[i+13], T[i+14], T[i+15]);
186 +
      blamka_G(T[i+  0], T[i+  4], T[i+  8], T[i+ 12]);
187 +
      blamka_G(T[i+  1], T[i+  5], T[i+  9], T[i+ 13]);
188 +
      blamka_G(T[i+  2], T[i+  6], T[i+ 10], T[i+ 14]);
189 +
      blamka_G(T[i+  3], T[i+  7], T[i+ 11], T[i+ 15]);
190 +
191 +
      blamka_G(T[i+  0], T[i+  5], T[i+ 10], T[i+ 15]);
192 +
      blamka_G(T[i+  1], T[i+  6], T[i+ 11], T[i+ 12]);
193 +
      blamka_G(T[i+  2], T[i+  7], T[i+  8], T[i+ 13]);
194 +
      blamka_G(T[i+  3], T[i+  4], T[i+  9], T[i+ 14]);
226 195
      }
227 196
228 197
   for(size_t i = 0; i != 128 / 8; i += 2)
229 198
      {
230 -
      blamka(T[    i], T[    i+1], T[ 16+i], T[ 16+i+1],
231 -
             T[ 32+i], T[ 32+i+1], T[ 48+i], T[ 48+i+1],
232 -
             T[ 64+i], T[ 64+i+1], T[ 80+i], T[ 80+i+1],
233 -
             T[ 96+i], T[ 96+i+1], T[112+i], T[112+i+1]);
199 +
      blamka_G(T[i+  0], T[i+ 32], T[i+ 64], T[i+ 96]);
200 +
      blamka_G(T[i+  1], T[i+ 33], T[i+ 65], T[i+ 97]);
201 +
      blamka_G(T[i+ 16], T[i+ 48], T[i+ 80], T[i+112]);
202 +
      blamka_G(T[i+ 17], T[i+ 49], T[i+ 81], T[i+113]);
203 +
204 +
      blamka_G(T[i+  0], T[i+ 33], T[i+ 80], T[i+113]);
205 +
      blamka_G(T[i+  1], T[i+ 48], T[i+ 81], T[i+ 96]);
206 +
      blamka_G(T[i+ 16], T[i+ 49], T[i+ 64], T[i+ 97]);
207 +
      blamka_G(T[i+ 17], T[i+ 32], T[i+ 65], T[i+112]);
234 208
      }
235 -
236 -
   for(size_t i = 0; i != 128; ++i)
237 -
      B[128*offset + i] ^= T[i] ^ B[128*prev+i] ^ B[128*new_offset+i];
238 209
   }
239 210
240 -
void gen_2i_addresses(secure_vector<uint64_t>& T, secure_vector<uint64_t>& B,
211 +
void gen_2i_addresses(uint64_t T[128], uint64_t B[128],
241 212
                      size_t n, size_t lane, size_t slice, size_t memory,
242 213
                      size_t time, size_t mode, size_t cnt)
243 214
   {
244 -
   BOTAN_ASSERT_NOMSG(B.size() == 128);
245 -
   BOTAN_ASSERT_NOMSG(T.size() == 128);
215 +
   clear_mem(B, 128);
246 216
247 -
   clear_mem(B.data(), B.size());
248 217
   B[0] = n;
249 218
   B[1] = lane;
250 219
   B[2] = slice;
@@ -255,22 +224,9 @@
Loading
255 224
256 225
   for(size_t r = 0; r != 2; ++r)
257 226
      {
258 -
      copy_mem(T.data(), B.data(), B.size());
227 +
      copy_mem(T, B, 128);
259 228
260 -
      for(size_t i = 0; i != 128; i += 16)
261 -
         {
262 -
         blamka(T[i+ 0], T[i+ 1], T[i+ 2], T[i+ 3],
263 -
                T[i+ 4], T[i+ 5], T[i+ 6], T[i+ 7],
264 -
                T[i+ 8], T[i+ 9], T[i+10], T[i+11],
265 -
                T[i+12], T[i+13], T[i+14], T[i+15]);
266 -
         }
267 -
      for(size_t i = 0; i != 128 / 8; i += 2)
268 -
         {
269 -
         blamka(T[    i], T[    i+1], T[ 16+i], T[ 16+i+1],
270 -
                T[ 32+i], T[ 32+i+1], T[ 48+i], T[ 48+i+1],
271 -
                T[ 64+i], T[ 64+i+1], T[ 80+i], T[ 80+i+1],
272 -
                T[ 96+i], T[ 96+i+1], T[112+i], T[112+i+1]);
273 -
         }
229 +
      blamka(T);
274 230
275 231
      for(size_t i = 0; i != 128; ++i)
276 232
         B[i] ^= T[i];
@@ -297,12 +253,13 @@
Loading
297 253
   if(lane == ref_lane)
298 254
      m += index;
299 255
300 -
   if(n == 0) {
301 -
         m = slice*segments;
302 -
         s = 0;
303 -
         if(slice == 0 || lane == ref_lane)
304 -
            m += index;
305 -
   }
256 +
   if(n == 0)
257 +
      {
258 +
      m = slice*segments;
259 +
      s = 0;
260 +
      if(slice == 0 || lane == ref_lane)
261 +
         m += index;
262 +
      }
306 263
307 264
   if(index == 0 || lane == ref_lane)
308 265
      m -= 1;
@@ -314,46 +271,25 @@
Loading
314 271
   return static_cast<uint32_t>(ref_lane*lanes + (s + m - (p+1)) % lanes);
315 272
   }
316 273
317 -
void process_block_argon2d(secure_vector<uint64_t>& T,
318 -
                           secure_vector<uint64_t>& B,
319 -
                           size_t n, size_t slice, size_t lane,
320 -
                           size_t lanes, size_t segments, size_t threads)
274 +
void process_block(secure_vector<uint64_t>& B,
275 +
                   size_t n, size_t slice, size_t lane,
276 +
                   size_t lanes, size_t segments, size_t threads, uint8_t mode,
277 +
                   size_t memory, size_t time)
321 278
   {
279 +
   uint64_t T[128];
322 280
   size_t index = 0;
323 281
   if(n == 0 && slice == 0)
324 282
      index = 2;
325 283
326 -
   while(index < segments)
327 -
      {
328 -
      const size_t offset = lane*lanes + slice*segments + index;
329 -
330 -
      size_t prev = offset - 1;
331 -
      if(index == 0 && slice == 0)
332 -
         prev += lanes;
333 -
334 -
      const uint64_t random = B.at(128*prev);
335 -
      const size_t new_offset = index_alpha(random, lanes, segments, threads, n, slice, lane, index);
284 +
   const bool use_2i = mode == 1 || (mode == 2 && n == 0 && slice < SYNC_POINTS/2);
336 285
337 -
      process_block_xor(T, B, offset, prev, new_offset);
338 -
339 -
      index += 1;
340 -
      }
341 -
   }
342 -
343 -
void process_block_argon2i(secure_vector<uint64_t>& T,
344 -
                           secure_vector<uint64_t>& B,
345 -
                           size_t n, size_t slice, size_t lane,
346 -
                           size_t lanes, size_t segments, size_t threads, uint8_t mode,
347 -
                           size_t memory, size_t time)
348 -
   {
349 -
   size_t index = 0;
350 -
   if(n == 0 && slice == 0)
351 -
      index = 2;
352 -
353 -
   secure_vector<uint64_t> addresses(128);
286 +
   uint64_t addresses[128];
354 287
   size_t address_counter = 1;
355 288
356 -
   gen_2i_addresses(T, addresses, n, lane, slice, memory, time, mode, address_counter);
289 +
   if(use_2i)
290 +
      {
291 +
      gen_2i_addresses(T, addresses, n, lane, slice, memory, time, mode, address_counter);
292 +
      }
357 293
358 294
   while(index < segments)
359 295
      {
@@ -363,16 +299,22 @@
Loading
363 299
      if(index == 0 && slice == 0)
364 300
         prev += lanes;
365 301
366 -
      if(index > 0 && index % 128 == 0)
302 +
      if(use_2i && index > 0 && index % 128 == 0)
367 303
         {
368 304
         address_counter += 1;
369 305
         gen_2i_addresses(T, addresses, n, lane, slice, memory, time, mode, address_counter);
370 306
         }
371 307
372 -
      const uint64_t random = addresses[index % 128];
308 +
      const uint64_t random = use_2i ? addresses[index % 128] : B.at(128*prev);
373 309
      const size_t new_offset = index_alpha(random, lanes, segments, threads, n, slice, lane, index);
374 310
375 -
      process_block_xor(T, B, offset, prev, new_offset);
311 +
      for(size_t i = 0; i != 128; ++i)
312 +
         T[i] = B[128*prev+i] ^ B[128*new_offset+i];
313 +
314 +
      blamka(T);
315 +
316 +
      for(size_t i = 0; i != 128; ++i)
317 +
         B[128*offset + i] ^= T[i] ^ B[128*prev+i] ^ B[128*new_offset+i];
376 318
377 319
      index += 1;
378 320
      }
@@ -387,22 +329,40 @@
Loading
387 329
   const size_t lanes = memory / threads;
388 330
   const size_t segments = lanes / SYNC_POINTS;
389 331
390 -
   secure_vector<uint64_t> T(128);
332 +
#if defined(BOTAN_HAS_THREAD_UTILS)
333 +
   auto& thread_pool = Thread_Pool::global_instance();
334 +
#endif
335 +
391 336
   for(size_t n = 0; n != t; ++n)
392 337
      {
393 338
      for(size_t slice = 0; slice != SYNC_POINTS; ++slice)
394 339
         {
395 -
         // TODO can run this in Thread_Pool
340 +
#if defined(BOTAN_HAS_THREAD_UTILS)
341 +
         if(threads > 1)
342 +
            {
343 +
            std::vector<std::future<void>> fut_results;
344 +
            fut_results.reserve(threads);
345 +
346 +
            for(size_t lane = 0; lane != threads; ++lane)
347 +
               {
348 +
               fut_results.push_back(thread_pool.run(
349 +
                  process_block,
350 +
                  std::ref(B), n, slice, lane, lanes, segments, threads, mode, memory, t));
351 +
               }
352 +
353 +
            for(auto& fut : fut_results)
354 +
               fut.get();
355 +
356 +
            continue;
357 +
            }
358 +
#endif
359 +
396 360
         for(size_t lane = 0; lane != threads; ++lane)
397 361
            {
398 -
            if(mode == 1 || (mode == 2 && n == 0 && slice < SYNC_POINTS/2))
399 -
               process_block_argon2i(T, B, n, slice, lane, lanes, segments, threads, mode, memory, t);
400 -
            else
401 -
               process_block_argon2d(T, B, n, slice, lane, lanes, segments, threads);
362 +
            process_block(B, n, slice, lane, lanes, segments, threads, mode, memory, t);
402 363
            }
403 364
         }
404 365
      }
405 -
406 366
   }
407 367
408 368
}
@@ -420,14 +380,15 @@
Loading
420 380
   BOTAN_ARG_CHECK(M >= 8*threads && M <= 8192*1024, "Invalid Argon2 M parameter");
421 381
   BOTAN_ARG_CHECK(t >= 1, "Invalid Argon2 t parameter");
422 382
423 -
   std::unique_ptr<HashFunction> blake2 = HashFunction::create_or_throw("BLAKE2b");
383 +
   auto blake2 = HashFunction::create_or_throw("BLAKE2b");
424 384
425 -
   const auto H0 = argon2_H0(*blake2, output_len,
426 -
                             password, password_len,
427 -
                             salt, salt_len,
428 -
                             key, key_len,
429 -
                             ad, ad_len,
430 -
                             mode, threads, M, t);
385 +
   uint8_t H0[64] = { 0 };
386 +
   argon2_H0(H0, *blake2, output_len,
387 +
             password, password_len,
388 +
             salt, salt_len,
389 +
             key, key_len,
390 +
             ad, ad_len,
391 +
             mode, threads, M, t);
431 392
432 393
   const size_t memory = (M / (SYNC_POINTS*threads)) * (SYNC_POINTS*threads);
433 394

@@ -2348,9 +2348,9 @@
Loading
2348 2348
2349 2349
         for(size_t M : { 8*1024, 64*1024, 256*1024 })
2350 2350
            {
2351 -
            for(size_t t : { 1, 2, 4 })
2351 +
            for(size_t t : { 1, 4 })
2352 2352
               {
2353 -
               for(size_t p : { 1 })
2353 +
               for(size_t p : { 1, 4 })
2354 2354
                  {
2355 2355
                  std::unique_ptr<Timer> timer = make_timer(
2356 2356
                     "Argon2id M=" + std::to_string(M) + " t=" + std::to_string(t) + " p=" + std::to_string(p));
Files Coverage
src 91.38%
Project Totals (583 files) 91.38%
1
---
2

3
# Documentation
4
# https://github.com/codecov/support/wiki/Codecov-Yaml#full-yaml
5
#
6
# Validate this file
7
# curl --data-binary @codecov.yml https://codecov.io/validate
8

9
coverage:
10
  status:
11
    project:
12
      default:
13
        # Random seeds in tests lead to a +/-0.05% coverage span even for PRs
14
        # that do not change source code
15
        threshold: 0.05
Sunburst
The inner-most circle is the entire project, moving away from the center are folders then, finally, a single file. The size and color of each slice is representing the number of statements and the coverage, respectively.
Icicle
The top section represents the entire project. Proceeding with folders and finally individual files. The size and color of each slice is representing the number of statements and the coverage, respectively.
Grid
Each block represents a single file in the project. The size and color of each block is represented by the number of statements and the coverage, respectively.
Loading