Line data Source code
1 : #include <inttypes.h>
2 : #include <stdbool.h>
3 : #include <stddef.h>
4 : #include <stdlib.h>
5 : #include <string.h>
6 : #include <sys/random.h>
7 :
8 : #include <crater/container.h>
9 : #include <crater/prand.h>
10 :
11 : typedef union{
12 : double as_double;
13 : struct{
14 : uint64_t fraction:52, exp:11, sign:1;
15 : };
16 : } double_bits_t;
17 :
18 11 : bool cr8r_prng_seed(cr8r_prng *self, uint64_t seed){
19 11 : if(self->state_size > sizeof(uint64_t)){
20 9 : uint64_t _state = *(uint64_t*)cr8r_default_prng_splitmix->state;
21 9 : *(uint64_t*)cr8r_default_prng_splitmix->state = seed;
22 9 : cr8r_prng_get_bytes(cr8r_default_prng_splitmix, self->state_size, self->state);
23 9 : *(uint64_t*)cr8r_default_prng_splitmix->state = _state;
24 : }else{
25 2 : memcpy(self->state, &seed, self->state_size);
26 : }
27 11 : return self->fixup_state(self->state);
28 : }
29 :
30 27126642 : uint32_t cr8r_prng_get_u32(cr8r_prng *self){
31 27126642 : return self->get_u32(self->state);
32 : }
33 :
34 1010 : uint64_t cr8r_prng_get_u64(cr8r_prng *self){
35 1010 : uint64_t res = self->get_u32(self->state);
36 1010 : return res | ((uint64_t)self->get_u32(self->state) << 32);
37 : }
38 :
39 1009 : void cr8r_prng_get_bytes(cr8r_prng *self, uint64_t size, void *buf){
40 1009 : uint64_t i;
41 202949 : for(i = 0; i + sizeof(uint32_t) <= size; i += sizeof(uint32_t)){
42 201940 : uint32_t tmp = self->get_u32(self->state);
43 201940 : memcpy(buf + i, &tmp, sizeof(uint32_t));
44 : }
45 1009 : if(i != size){
46 1 : uint32_t tmp = self->get_u32(self->state);
47 1 : memcpy(buf + i, &tmp, size - i);
48 : }
49 1009 : }
50 :
51 15126633 : uint64_t cr8r_prng_uniform_u64(cr8r_prng *self, uint64_t a, uint64_t b){
52 15126633 : uint64_t l = b - a;
53 15126633 : if(!l){
54 : return a;
55 : }
56 15126633 : uint64_t one_call = l <= 0x100000000ull;
57 15126633 : uint64_t ub, r;
58 15126633 : if(one_call){
59 15126633 : ub = 0x100000000ull;
60 15126633 : ub -= ub%l;
61 15126642 : do{
62 15126642 : r = cr8r_prng_get_u32(self);
63 15126642 : }while(r >= ub);
64 : }else{
65 0 : ub = 0x7FFFFFFFFFFFFFFFull%l*(uint64_t)-2;
66 0 : do{
67 0 : r = cr8r_prng_get_u64(self);
68 0 : }while(r >= ub);
69 : }
70 15126633 : return r%l + a;
71 : }
72 :
73 10 : double cr8r_prng_uniform01_double(cr8r_prng *self){
74 10 : uint64_t u = cr8r_prng_get_u64(self);
75 10 : if(!u){
76 : return 0.;
77 : }
78 10 : uint64_t exp_decrease = __builtin_clzll(u);
79 10 : u <<= exp_decrease;
80 10 : double_bits_t bits = {.sign= 0, .exp= 1022 - exp_decrease, .fraction= u >> 12};
81 10 : return bits.as_double;
82 : }
83 :
84 : const uint64_t cr8r_prng_2tg_t64[4] = {1, (1ull << 63) + 3, (1ull << 63) - 1, -1};
85 :
86 : // raise b to the power of 2**i mod 2**j
87 : CR8R_ATTR_NO_SAN("unsigned-integer-overflow")
88 63254 : inline static uint64_t pow_ti(uint64_t b, uint64_t i){
89 2085587 : while(i--){
90 2022333 : b *= b;
91 : }
92 62254 : return b;
93 : }
94 :
95 : CR8R_ATTR_NO_SAN("unsigned-integer-overflow")
96 1000 : uint64_t cr8r_prng_log_mod_t64(uint64_t h){
97 1000 : uint64_t y = pow_ti(3, 61);
98 : // the inverse of 3 mod 2**64
99 : uint64_t g1 = 12297829382473034411ull;
100 3536 : for(uint64_t gi = 0; gi < 4; ++gi){
101 3023 : uint64_t x = 0;
102 3023 : uint64_t b = h*cr8r_prng_2tg_t64[gi];
103 : // maintain g1_ti = g1**(2**i)
104 3023 : uint64_t g1_tk = g1;
105 3023 : uint64_t k = 0;
106 62741 : for(; k < 62; ++k){
107 62254 : uint64_t hk = pow_ti(b, 61 - k);
108 62254 : if(hk == 1){
109 : // x is not modified
110 32574 : }else if(hk == y){
111 30038 : x |= 1ull << k;
112 30038 : b *= g1_tk;
113 : }else{
114 : break;
115 : }
116 59718 : g1_tk = g1_tk*g1_tk;
117 : }
118 3023 : if(k == 62){
119 : // x was successfully extended to 62 bits, set the high 2 bits
120 : // to the powers (0 or 1) of the order-2 generators 2**63+1 for
121 : // the highest bit, and 2**63-1 for the second highest bit.
122 487 : return (gi << 62) | (x&((1ull << 62) - 1));
123 : }
124 : }
125 : // reachable iff h is even
126 : return 0;
127 : }
128 :
129 2000000 : static uint32_t cr8r_prng_system_get_u32(void *_state){
130 2000000 : uint32_t res;
131 2000000 : (void)!getrandom(&res, sizeof(uint32_t), 0);
132 2000000 : return res;
133 : }
134 :
135 0 : static bool cr8r_prng_system_fixup(void *_state){
136 0 : return 1;
137 : }
138 :
139 1 : cr8r_prng *cr8r_prng_init_system(){
140 1 : cr8r_prng *res = malloc(offsetof(cr8r_prng, state));
141 1 : if(res){
142 1 : res->state_size = 0;
143 1 : res->get_u32 = cr8r_prng_system_get_u32;
144 1 : res->fixup_state = cr8r_prng_system_fixup;
145 : }
146 1 : return res;
147 : }
148 :
149 : CR8R_ATTR_NO_SAN("unsigned-integer-overflow", "implicit-unsigned-integer-truncation")
150 2015750 : static uint32_t cr8r_prng_lcg_get_u32(void *_state){
151 2015750 : uint64_t *state = _state;
152 2015750 : *state = 6364136223846793005*(*state) + 1;
153 2015750 : return *state >> 32;
154 : }
155 :
156 2 : static bool cr8r_prng_lcg_fixup(void *_state){
157 2 : uint64_t *state = _state;
158 2 : if(!*state){
159 0 : *state = CR8R_DEFAULT_PRNG_LCG_SEED;
160 : }
161 2 : return 1;
162 : }
163 :
164 2 : cr8r_prng *cr8r_prng_init_lcg(uint64_t seed){
165 2 : cr8r_prng *res = malloc(offsetof(cr8r_prng, state) + sizeof(uint64_t));
166 2 : if(res){
167 2 : res->state_size = sizeof(uint64_t);
168 2 : res->get_u32 = cr8r_prng_lcg_get_u32;
169 2 : res->fixup_state = cr8r_prng_lcg_fixup;
170 2 : if(!cr8r_prng_seed(res, seed)){
171 0 : free(res);
172 0 : return NULL;
173 : }
174 : }
175 : return res;
176 : }
177 :
178 : CR8R_ATTR_NO_SAN("unsigned-integer-overflow", "implicit-unsigned-integer-truncation")
179 2000000 : static uint32_t cr8r_prng_lfg_sc_get_u32(void *_state){
180 2000000 : uint64_t x0 = 0, x5 = 0, x12 = 0;
181 : // depends on little endianness
182 2000000 : memcpy(&x5, _state + 6*4, 6);
183 2000000 : memcpy(&x12, _state + 6*11, 6);
184 2000000 : x0 = x5 - x12 - ((uint8_t*)_state)[6*12];
185 : // set carry bit to 1 if x0 < 0
186 2000000 : ((uint8_t*)_state)[6*12] = x0 >= 0x1000000000000ull;
187 2000000 : memmove(_state + 6, _state, 6*11);
188 2000000 : memcpy(_state, &x0, 6);
189 2000000 : return x0 >> 16;
190 : }
191 :
192 1 : static bool cr8r_prng_lfg_sc_fixup(void *_state){
193 1 : char *state = _state;
194 1 : for(uint64_t i = 0; i < 12; i += 6){
195 : // depends on little endianness
196 1 : if(state[i]&1){
197 : return 1;
198 : }
199 : }
200 0 : for(uint64_t i = 0; i < 12; i += 6){
201 0 : state[i] |= 1;
202 : }
203 : return 1;
204 : }
205 :
206 1 : cr8r_prng *cr8r_prng_init_lfg_sc(uint64_t seed){
207 1 : cr8r_prng *res = malloc(offsetof(cr8r_prng, state) + 6*12 + 1);
208 1 : if(res){
209 1 : res->state_size = 6*12 + 1;
210 1 : res->get_u32 = cr8r_prng_lfg_sc_get_u32,
211 1 : res->fixup_state = cr8r_prng_lfg_sc_fixup;
212 1 : if(!cr8r_prng_seed(res, seed)){
213 0 : free(res);
214 0 : return NULL;
215 : }
216 : }
217 : return res;
218 : }
219 :
220 : typedef struct{
221 : uint64_t index;
222 : uint64_t XS[CR8R_PRNG_LFM_R];
223 : } cr8r_prng_lfg_m_state;
224 :
225 : CR8R_ATTR_NO_SAN("unsigned-integer-overflow", "implicit-unsigned-integer-truncation")
226 17310892 : static uint32_t cr8r_prng_lfg_m_get_u32(void *_state){
227 17310892 : cr8r_prng_lfg_m_state *state = _state;
228 17310892 : uint64_t next_index = state->index ? state->index - 1 : CR8R_PRNG_LFM_R - 1;
229 17310892 : state->XS[next_index] *= state->XS[(state->index + CR8R_PRNG_LFM_S - 1)%CR8R_PRNG_LFM_R];
230 17310892 : state->index = next_index;
231 17310892 : return state->XS[next_index] >> 16;
232 : }
233 :
234 5 : static bool cr8r_prng_lfg_m_fixup(void *_state){
235 5 : cr8r_prng_lfg_m_state *state = _state;
236 640 : for(uint64_t i = 0; i < CR8R_PRNG_LFM_R; ++i){
237 635 : state->XS[i] |= 1;
238 : }
239 5 : state->index %= CR8R_PRNG_LFM_R;
240 5 : return 1;
241 : }
242 :
243 5 : cr8r_prng *cr8r_prng_init_lfg_m(uint64_t seed){
244 5 : cr8r_prng *res = malloc(offsetof(cr8r_prng, state) + sizeof(cr8r_prng_lfg_m_state));
245 5 : if(res){
246 5 : res->state_size = sizeof(cr8r_prng_lfg_m_state);
247 5 : res->get_u32 = cr8r_prng_lfg_m_get_u32;
248 5 : res->fixup_state = cr8r_prng_lfg_m_fixup;
249 5 : if(!cr8r_prng_seed(res, seed)){
250 0 : free(res);
251 0 : return NULL;
252 : }
253 : }
254 : return res;
255 : }
256 :
257 :
258 : // the Mersenne twister implementation is tightly based on wikipedia
259 : // (https://en.wikipedia.org/wiki/Mersenne_Twister)
260 : // and uses the standard MT19937-64 generator configuration
261 : // with the modification that each 64 bit output is split into 2 32 bit
262 : // outputs to conform with the cr8r_prng interface.
263 : // The 32 bit generator configuration MT19937 could have been used instead.
264 : // Both have exactly the same state size and generate the same number
265 : // of random bits before needing to call "twist".
266 : // MT19937 will probably replace this hybrid approach, and parameterized
267 : // versions of LCG, LFG, and MT will probably be added.
268 : typedef struct{
269 : uint64_t index;
270 : uint64_t MT[CR8R_PRNG_MT_N];
271 : } cr8r_prng_mt_st;
272 :
273 : static const uint64_t cr8r_prng_mt_lomask = (1ull << CR8R_PRNG_MT_R) - 1;
274 : static const uint64_t cr8r_prng_mt_himask = ~cr8r_prng_mt_lomask;
275 :
276 : /*
277 : static bool cr8r_prng_mt_seed(void *_state, const void *_seed){
278 : uint64_t seed = *(const uint64_t*)_seed;
279 : if(!seed){
280 : return 0;
281 : }
282 : cr8r_prng_mt_st *state = _state;
283 : state->index = 2*CR8R_PRNG_MT_N;
284 : state->MT[0] = seed;
285 : for(uint64_t i = 1; i < CR8R_PRNG_MT_N; ++i){
286 : state->MT[i] = CR8R_PRNG_MT_F*(state->MT[i-1]^(state->MT[i-1] >> 62)) + i;
287 : }
288 : return 1;
289 : }
290 : */
291 :
292 1 : static bool cr8r_prng_mt_fixup(void *_state){
293 1 : return 1;
294 : }
295 :
296 : CR8R_ATTR_NO_SAN("unsigned-integer-overflow", "implicit-unsigned-integer-truncation")
297 2000020 : static uint32_t cr8r_prng_mt_get_u32(void *_state){
298 2000020 : cr8r_prng_mt_st *state = _state;
299 2000020 : if(state->index >= 2*CR8R_PRNG_MT_N){
300 1003478 : for(uint64_t i = 0; i < CR8R_PRNG_MT_N; ++i){
301 1000272 : uint64_t x = (state->MT[i]&cr8r_prng_mt_himask)
302 1000272 : | (state->MT[(i+1)%CR8R_PRNG_MT_N]&cr8r_prng_mt_lomask);
303 1000272 : uint64_t xA = x >> 1;
304 1000272 : if(x&1){
305 500096 : xA ^= CR8R_PRNG_MT_A;
306 : }
307 1000272 : state->MT[i] = state->MT[(i + CR8R_PRNG_MT_M)%CR8R_PRNG_MT_N]^xA;
308 : }
309 3206 : state->index = 0;
310 : }
311 : // we split each 64 bit output into 2 32 bit outputs,
312 : // but both are computed from the state which is a little redundant
313 2000020 : uint64_t y = state->MT[state->index >> 1];
314 2000020 : y ^= (y >> CR8R_PRNG_MT_U)&CR8R_PRNG_MT_D;
315 2000020 : y ^= (y << CR8R_PRNG_MT_S)&CR8R_PRNG_MT_B;
316 2000020 : y ^= (y << CR8R_PRNG_MT_T)&CR8R_PRNG_MT_C;
317 2000020 : y ^= (y >> CR8R_PRNG_MT_L);
318 2000020 : if(state->index++&1){
319 1000010 : return y >> 32;
320 : }
321 1000010 : return y;
322 : }
323 :
324 1 : cr8r_prng *cr8r_prng_init_mt(uint64_t seed){
325 1 : cr8r_prng *res = malloc(offsetof(cr8r_prng, state) + sizeof(cr8r_prng_mt_st));
326 1 : if(res){
327 1 : res->state_size = sizeof(cr8r_prng_mt_st);
328 1 : res->get_u32 = cr8r_prng_mt_get_u32,
329 1 : res->fixup_state = cr8r_prng_mt_fixup;
330 1 : if(!cr8r_prng_seed(res, seed)){
331 0 : free(res);
332 0 : return NULL;
333 : }
334 : }
335 : return res;
336 : }
337 :
338 : // The xoroshift256** by David Blackman and Sebastiano Vigna
339 : // is available in its original form at https://vigna.di.unimi.it/xorshift/
340 : // The original is released into the public domain with the following
341 : // notice, and this adapted version is also public domain
342 :
343 : // Original notice:
344 :
345 : /* Written in 2018 by David Blackman and Sebastiano Vigna (vigna@acm.org)
346 :
347 : To the extent possible under law, the author has dedicated all copyright
348 : and related and neighboring rights to this software to the public domain
349 : worldwide. This software is distributed without any warranty.
350 :
351 : See <http://creativecommons.org/publicdomain/zero/1.0/>. */
352 :
353 : /* This is xoshiro256** 1.0, one of our all-purpose, rock-solid
354 : generators. It has excellent (sub-ns) speed, a state (256 bits) that is
355 : large enough for any parallel application, and it passes all tests we
356 : are aware of.
357 :
358 : For generating just floating-point numbers, xoshiro256+ is even faster.
359 :
360 : The state must be seeded so that it is not everywhere zero. If you have
361 : a 64-bit seed, we suggest to seed a splitmix64 generator and use its
362 : output to fill s. */
363 :
364 : CR8R_ATTR_NO_SAN("unsigned-integer-overflow")
365 4004000 : static inline uint64_t rotl(uint64_t x, uint64_t k){
366 4004000 : return (x << k) | (x >> (64 - k));
367 : }
368 :
369 : CR8R_ATTR_NO_SAN("unsigned-integer-overflow", "implicit-unsigned-integer-truncation")
370 2002000 : static uint32_t cr8r_prng_xoro_get_u32(void *_state){
371 2002000 : uint64_t *s = _state;
372 2002000 : uint64_t res = rotl(s[1]*5, 7)*9;
373 2002000 : uint64_t t = s[1] << 17;
374 2002000 : s[2] ^= s[0];
375 2002000 : s[3] ^= s[1];
376 2002000 : s[1] ^= s[2];
377 2002000 : s[0] ^= s[3];
378 2002000 : s[2] ^= t;
379 2002000 : s[3] = rotl(s[3], 45);
380 2002000 : return res >> 16;
381 : }
382 :
383 0 : void cr8r_prng_xoro_jump_t128(cr8r_prng *self){
384 0 : uint64_t *s = (uint64_t*)self->state;
385 0 : static const uint64_t JUMP[] = {0x180ec6d33cfd0aba, 0xd5a61266f0c9392c, 0xa9582618e03fc9aa, 0x39abdc4529b1661c};
386 0 : uint64_t s0 = 0;
387 0 : uint64_t s1 = 0;
388 0 : uint64_t s2 = 0;
389 0 : uint64_t s3 = 0;
390 0 : for(uint64_t i = 0; i < 4; ++i){
391 0 : for(uint64_t b = 0; b < 64; ++b){
392 0 : if(JUMP[i]&(1ull << b)){
393 0 : s0 ^= s[0];
394 0 : s1 ^= s[1];
395 0 : s2 ^= s[2];
396 0 : s3 ^= s[3];
397 : }
398 0 : cr8r_prng_xoro_get_u32(self->state);
399 : }
400 : }
401 0 : s[0] = s0;
402 0 : s[1] = s1;
403 0 : s[2] = s2;
404 0 : s[3] = s3;
405 0 : }
406 :
407 0 : void cr8r_prng_xoro_jump_t192(cr8r_prng *self){
408 0 : uint64_t *s = (uint64_t*)self->state;
409 0 : static const uint64_t LONG_JUMP[] = {0x76e15d3efefdcbbf, 0xc5004e441c522fb3, 0x77710069854ee241, 0x39109bb02acbe635};
410 0 : uint64_t s0 = 0;
411 0 : uint64_t s1 = 0;
412 0 : uint64_t s2 = 0;
413 0 : uint64_t s3 = 0;
414 0 : for(uint64_t i = 0; i < 4; ++i){
415 0 : for(uint64_t b = 0; b < 64; ++b){
416 0 : if(LONG_JUMP[i]&(1ull << b)){
417 0 : s0 ^= s[0];
418 0 : s1 ^= s[1];
419 0 : s2 ^= s[2];
420 0 : s3 ^= s[3];
421 : }
422 0 : cr8r_prng_xoro_get_u32(self->state);
423 : }
424 : }
425 0 : s[0] = s0;
426 0 : s[1] = s1;
427 0 : s[2] = s2;
428 0 : s[3] = s3;
429 0 : }
430 :
431 2 : static bool cr8r_prng_xoro_fixup(void *_state){
432 2 : return 1;
433 : }
434 :
435 2 : cr8r_prng *cr8r_prng_init_xoro(uint64_t seed){
436 2 : cr8r_prng *res = malloc(offsetof(cr8r_prng, state) + 4*sizeof(uint64_t));
437 2 : if(res){
438 2 : res->state_size = 4*sizeof(uint64_t);
439 2 : res->get_u32 = cr8r_prng_xoro_get_u32,
440 2 : res->fixup_state = cr8r_prng_xoro_fixup;
441 2 : if(!cr8r_prng_seed(res, seed)){
442 0 : free(res);
443 0 : return NULL;
444 : }
445 : }
446 : return res;
447 : }
448 :
449 : /* Written in 2015 by Sebastiano Vigna (vigna@acm.org)
450 :
451 : To the extent possible under law, the author has dedicated all copyright
452 : and related and neighboring rights to this software to the public domain
453 : worldwide. This software is distributed without any warranty.
454 :
455 : See <http://creativecommons.org/publicdomain/zero/1.0/>. */
456 :
457 : /* This is a fixed-increment version of Java 8's SplittableRandom generator
458 : See http://dx.doi.org/10.1145/2714064.2660195 and
459 : http://docs.oracle.com/javase/8/docs/api/java/util/SplittableRandom.html
460 :
461 : It is a very fast generator passing BigCrush, and it can be useful if
462 : for some reason you absolutely want 64 bits of state. */
463 :
464 : CR8R_ATTR_NO_SAN("unsigned-integer-overflow", "implicit-unsigned-integer-truncation")
465 1941 : static uint32_t cr8r_prng_splitmix_get_u32(void *_state){
466 1941 : uint64_t *state = _state;
467 1941 : uint64_t z = (*state += 0x9e3779b97f4a7c15);
468 1941 : z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9;
469 1941 : z = (z ^ (z >> 27)) * 0x94d049bb133111eb;
470 1941 : z ^= (z >> 31);
471 1941 : return z >> 16;
472 : }
473 :
474 0 : static bool cr8r_prng_splitmix_fixup(void *_state){
475 0 : return 1;
476 : }
477 :
478 0 : cr8r_prng *cr8r_prng_init_splitmix(uint64_t seed){
479 0 : cr8r_prng *res = malloc(offsetof(cr8r_prng, state) + sizeof(uint64_t));
480 0 : if(res){
481 0 : res->state_size = sizeof(uint64_t);
482 0 : res->get_u32 = cr8r_prng_splitmix_get_u32,
483 0 : res->fixup_state = cr8r_prng_splitmix_fixup;
484 0 : if(!cr8r_prng_seed(res, seed)){
485 0 : free(res);
486 0 : return NULL;
487 : }
488 : }
489 : return res;
490 : }
491 :
492 : // need to create anonymous struct to statically allocate fla
493 : static struct{
494 : uint64_t state_size;
495 : uint32_t (*get_u32)(void*);
496 : bool (*fixup_state)(void*);
497 : uint64_t state;
498 : } _default_prng_splitmix = {
499 : .state_size = sizeof(uint64_t),
500 : .get_u32 = cr8r_prng_splitmix_get_u32,
501 : .fixup_state = cr8r_prng_splitmix_fixup,
502 : .state = CR8R_DEFAULT_PRNG_SM_SEED
503 : };
504 : cr8r_prng *cr8r_default_prng_splitmix = (cr8r_prng*)&_default_prng_splitmix;
505 :
506 : // End of xoroshift256** code
507 :
|