Line data Source code
1 : #include <stdlib.h>
2 : #include <string.h>
3 :
4 : #include <crater/vec.h>
5 : #include <crater/heap.h>
6 :
7 : #ifdef DEBUG
8 : #include <crater/vec_check.h>
9 : #endif
10 :
11 :
12 1 : bool cr8r_vec_ft_init(cr8r_vec_ft *ft,
13 : void *data, uint64_t size,
14 : uint64_t (*new_size)(cr8r_base_ft*, uint64_t cap),
15 : void *(*resize)(cr8r_base_ft*, void *p, uint64_t cap),
16 : void (*del)(cr8r_base_ft*, void *p),
17 : void (*copy)(cr8r_base_ft*, void *dest, const void *src),
18 : int (*cmp)(const cr8r_base_ft*, const void *a, const void *b),
19 : void (*swap)(cr8r_base_ft*, void *a, void *b)
20 : ){
21 1 : ft->base.data = data;
22 1 : ft->base.size = size;
23 1 : ft->new_size = new_size ?: cr8r_default_new_size;
24 1 : ft->resize = resize ?: cr8r_default_resize;
25 1 : ft->del = del;
26 1 : ft->copy = copy;
27 1 : ft->cmp = cmp;
28 1 : ft->swap = swap ?: cr8r_default_swap;
29 1 : return 1;
30 : }
31 :
32 :
33 32 : uint64_t cr8r_default_new_size(cr8r_base_ft *base, uint64_t cap){
34 32 : return cap ? cap << 1 : 8;
35 : }
36 :
37 1 : uint64_t cr8r_default_bump_size(cr8r_base_ft *base, uint64_t cap){
38 1 : return cap + 1;
39 : }
40 :
41 37356 : void *cr8r_default_resize(cr8r_base_ft *base, void *p, uint64_t cap){
42 37356 : if(!cap){
43 18658 : if(p){
44 18657 : free(p);
45 : }
46 18658 : return NULL;
47 : }
48 18698 : if(base->size){
49 18698 : return p ? realloc(p, cap*base->size) : malloc(cap*base->size);
50 : }
51 : return p;
52 : }
53 :
54 14 : void *cr8r_default_resize_pass(cr8r_base_ft *base, void *p, uint64_t cap){
55 14 : return NULL;
56 : }
57 :
58 711 : int cr8r_default_cmp(const cr8r_base_ft *base, const void *a, const void *b){
59 711 : return memcmp(a, b, base->size);
60 : }
61 :
62 194565925 : void cr8r_default_swap(cr8r_base_ft *base, void *a, void *b){
63 194565925 : if(a != b){
64 194524282 : char buf[base->size];
65 194524282 : memcpy(buf, a, base->size);
66 194524282 : memcpy(a, b, base->size);
67 194524282 : memcpy(b, buf, base->size);
68 : }
69 194565925 : }
70 :
71 18661 : bool cr8r_vec_init(cr8r_vec *self, cr8r_vec_ft *ft, uint64_t cap){
72 18661 : void *tmp = ft->resize(&ft->base, NULL, cap);
73 18661 : if(!tmp){
74 8 : return !(cap && ft->base.size);
75 : }
76 18653 : *self = (cr8r_vec){.buf=tmp, .cap=cap};
77 18653 : return 1;
78 : }
79 :
80 18657 : void cr8r_vec_delete(cr8r_vec *self, cr8r_vec_ft *ft){
81 18657 : cr8r_vec_clear(self, ft);
82 18657 : ft->resize(&ft->base, self->buf, 0);
83 18657 : *self = (cr8r_vec){};
84 18657 : }
85 :
86 5 : bool cr8r_vec_copy(cr8r_vec *dest, const cr8r_vec *src, cr8r_vec_ft *ft){
87 5 : if(!cr8r_vec_init(dest, ft, src->len)){
88 : return 0;
89 : }
90 3 : return cr8r_vec_augment(dest, src, ft);
91 : }
92 :
93 18612 : inline static bool _sub_copy(cr8r_vec *dest, const cr8r_vec *src, cr8r_vec_ft *ft, uint64_t a, uint64_t b){
94 18612 : if(!ft->copy){
95 18611 : memcpy(dest->buf, src->buf + a*ft->base.size, (b - a)*ft->base.size);
96 9 : }else for(uint64_t i = 0; i < b - a; ++i){
97 8 : ft->copy(&ft->base, dest->buf + i*ft->base.size, src->buf + (a + i)*ft->base.size);
98 : }
99 18612 : dest->len = b - a;
100 18612 : return 1;
101 : }
102 :
103 18615 : bool cr8r_vec_sub(cr8r_vec *dest, const cr8r_vec *src, cr8r_vec_ft *ft, uint64_t a, uint64_t b){
104 18615 : if(b > src->len || b < a || !cr8r_vec_init(dest, ft, b - a)){
105 3 : return 0;
106 : }
107 18612 : return _sub_copy(dest, src, ft, a, b);
108 : }
109 :
110 5 : bool cr8r_vec_resize(cr8r_vec *self, cr8r_vec_ft *ft, uint64_t cap){
111 5 : if(cap < self->len){
112 : return 0;
113 : }
114 4 : void *tmp = ft->resize(&ft->base, self->buf, cap);
115 4 : if(!tmp){
116 2 : return !(cap && ft->base.size);
117 : }
118 2 : self->buf = tmp;
119 2 : self->cap = cap;
120 2 : return 1;
121 : }
122 :
123 15 : bool cr8r_vec_trim(cr8r_vec *self, cr8r_vec_ft *ft){
124 15 : void *tmp = ft->resize(&ft->base, self->buf, self->len);
125 15 : if(!tmp){
126 2 : if(!(self->len && ft->base.size)){
127 1 : self->buf = NULL;
128 1 : self->cap = self->len;
129 1 : return 1;
130 : }
131 : return 0;
132 : }
133 13 : self->buf = tmp;
134 13 : self->cap = self->len;
135 13 : return 1;
136 : }
137 :
138 26203 : void cr8r_vec_clear(cr8r_vec *self, cr8r_vec_ft *ft){
139 26203 : if(ft->del){
140 37 : for(void *e = self->buf; e < self->buf + self->len*ft->base.size; e += ft->base.size){
141 29 : ft->del(&ft->base, e);
142 : }
143 : }
144 26203 : self->len = 0;
145 26203 : }
146 :
147 3 : bool cr8r_vec_combine(cr8r_vec *dest, const cr8r_vec *src_a, const cr8r_vec *src_b, cr8r_vec_ft *ft){
148 3 : if(!cr8r_vec_init(dest, ft, src_a->len + src_b->len)){
149 : return 0;
150 : }
151 2 : if(!ft->copy){
152 1 : memcpy(dest->buf, src_a->buf, src_a->len*ft->base.size);
153 1 : memcpy(dest->buf + src_a->len*ft->base.size, src_b->buf, src_b->len*ft->base.size);
154 : }else{
155 9 : for(uint64_t i = 0; i < src_a->len; ++i){
156 8 : ft->copy(&ft->base, dest->buf + i*ft->base.size, src_a->buf + i*ft->base.size);
157 : }
158 9 : for(uint64_t i = 0; i < src_b->len; ++i){
159 8 : ft->copy(&ft->base, dest->buf + (src_a->len + i)*ft->base.size, src_b->buf + i*ft->base.size);
160 : }
161 : }
162 2 : dest->len = src_a->len + src_b->len;
163 2 : return 1;
164 : }
165 :
166 5 : bool cr8r_vec_augment(cr8r_vec *self, const cr8r_vec *other, cr8r_vec_ft *ft){
167 5 : if(self->len + other->len > self->cap){
168 2 : uint64_t cap = ft->new_size(&ft->base, self->cap);
169 2 : if(self->len + other->len > cap){
170 1 : cap = self->len + other->len;
171 : }
172 2 : if(!cr8r_vec_resize(self, ft, cap)){
173 : return 0;
174 : }
175 : }
176 4 : if(!ft->copy){
177 2 : memcpy(self->buf + self->len*ft->base.size, other->buf, other->len*ft->base.size);
178 12 : }else for(uint64_t i = 0; i < other->len; ++i){
179 10 : ft->copy(&ft->base, self->buf + (self->len + i)*ft->base.size, other->buf + i*ft->base.size);
180 : }
181 4 : self->len += other->len;
182 4 : return 1;
183 : }
184 :
185 361079337 : void *cr8r_vec_get(cr8r_vec *self, const cr8r_vec_ft *ft, uint64_t i){
186 361079337 : if(self->len <= i){
187 : return NULL;
188 : }
189 361079336 : return self->buf + i*ft->base.size;
190 : }
191 :
192 2515343 : void *cr8r_vec_getx(cr8r_vec *self, const cr8r_vec_ft *ft, int64_t i){
193 2515343 : if(i < -(int64_t)self->len || (int64_t)self->len <= i){
194 1 : return NULL;
195 2515342 : }else if(i < 0){
196 2515121 : return self->buf + ((int64_t)self->len + i)*ft->base.size;
197 : }else{
198 221 : return self->buf + i*ft->base.size;
199 : }
200 : }
201 :
202 1 : uint64_t cr8r_vec_len(cr8r_vec *self){
203 1 : return self->len;
204 : }
205 :
206 6117 : void cr8r_vec_shuffle(cr8r_vec *self, cr8r_vec_ft *ft, cr8r_prng *prng){
207 15117000 : for(uint64_t i = self->len - 1, j; i; --i){
208 15110883 : j = cr8r_prng_uniform_u64(prng, 0, i + 1);
209 15110883 : ft->swap(&ft->base, self->buf + i*ft->base.size, self->buf + j*ft->base.size);
210 : }
211 6117 : }
212 :
213 2589448 : bool cr8r_vec_ensure_cap(cr8r_vec *self, cr8r_vec_ft *ft, uint64_t cap){
214 2589448 : if(cap > self->cap){
215 33 : uint64_t new_cap = ft->new_size(&ft->base, self->cap);
216 33 : if(cap > new_cap){
217 : new_cap = cap;
218 : }
219 33 : void *tmp = ft->resize(&ft->base, self->buf, new_cap);
220 33 : if(!tmp){
221 3 : return !ft->base.size;
222 : }
223 30 : self->buf = tmp;
224 30 : self->cap = new_cap;
225 : }
226 : return 1;
227 : }
228 :
229 2588943 : bool cr8r_vec_pushr(cr8r_vec *self, cr8r_vec_ft *ft, const void *e){
230 2588943 : if(!cr8r_vec_ensure_cap(self, ft, self->len + 1)){
231 : return 0;
232 2588941 : }else if(ft->base.size){
233 2588941 : memcpy(self->buf + self->len++*ft->base.size, e, ft->base.size);
234 : }
235 : return 1;
236 : }
237 :
238 14052371 : bool cr8r_vec_popr(cr8r_vec *self, cr8r_vec_ft *ft, void *o){
239 14052371 : if(!self->len){
240 : return 0;
241 : }
242 14052370 : memcpy(o, self->buf + (--self->len)*ft->base.size, ft->base.size);
243 14052370 : return 1;
244 : }
245 :
246 : //Accessing the left end of a vector is O(n) -- slow.
247 5 : bool cr8r_vec_pushl(cr8r_vec *self, cr8r_vec_ft *ft, const void *e){
248 5 : if(!cr8r_vec_ensure_cap(self, ft, self->len + 1)){
249 : return 0;
250 4 : }else if(ft->base.size){
251 4 : memmove(self->buf + ft->base.size, self->buf, self->len++*ft->base.size);
252 4 : memcpy(self->buf, e, ft->base.size);
253 : }
254 : return 1;
255 : }
256 :
257 3 : bool cr8r_vec_popl(cr8r_vec *self, cr8r_vec_ft *ft, void *o){
258 3 : if(!self->len){
259 : return 0;
260 : }
261 2 : memcpy(o, self->buf, ft->base.size);
262 2 : memmove(self->buf, self->buf + ft->base.size, (--self->len)*ft->base.size);
263 2 : return 1;
264 : }
265 :
266 10 : bool cr8r_vec_filter(cr8r_vec *self, cr8r_vec_ft *ft, cr8r_vec_pred pred, void *data){
267 10 : uint64_t j = 0;
268 10 : if(ft->swap == cr8r_default_swap){
269 333 : for(uint64_t i = 0; i < self->len; ++i){
270 324 : if(pred(ft, self->buf + i*ft->base.size, data)){
271 183 : if(i > j){
272 105 : memcpy(self->buf + j*ft->base.size, self->buf + i*ft->base.size, ft->base.size);
273 : }
274 183 : ++j;
275 141 : }else if(ft->del){
276 50 : ft->del(&ft->base, self->buf + i*ft->base.size);
277 : }
278 : }
279 : }else{// otherwise swap is nontrivial
280 : // we can only swap constructed elements, so we can only delete elements at the end.
281 : // we can maintain two regions in the array: a "passed" region and a "failed" region.
282 : // the array is always split into the passed region, followed by the failed region,
283 : // followed by the unchecked region. initially, we have checked no elements, so the
284 : // passed and failed regions both have length zero. if the first unexplored element passes
285 : // the predicate, we swap it with the first failed element if there is one (otherwise we leave it)
286 : // and then update the passed and failed regions. if the first unexplored element
287 : // fails the predicate, we extend the failed region to include it. when there are no
288 : // unexplored elements left, we can delete all failed elements.
289 : //
290 : // j is the number of passed elements, so let i be the number of failed elements
291 2954 : for(uint64_t i = 0; j + i < self->len;){
292 2953 : if(pred(ft, self->buf + (j + i)*ft->base.size, data)){
293 5 : if(i){
294 5 : ft->swap(&ft->base, self->buf + j*ft->base.size, self->buf + (j + i)*ft->base.size);
295 : }
296 5 : ++j;
297 : }else{
298 2948 : ++i;
299 : }
300 : }
301 1 : if(ft->del){
302 2949 : for(uint64_t i = j; i < self->len; ++i){
303 2948 : ft->del(&ft->base, self->buf + i*ft->base.size);
304 : }
305 : }
306 : }
307 10 : self->len = j;
308 10 : return cr8r_vec_trim(self, ft);
309 : }
310 :
311 4 : bool cr8r_vec_filtered(cr8r_vec *dest, const cr8r_vec *src, cr8r_vec_ft *ft, cr8r_vec_pred pred, void *data){
312 4 : if(!cr8r_vec_init(dest, ft, src->len)){
313 : return 0;
314 : }
315 3 : if(!ft->copy){
316 2001 : for(void *e = src->buf; e < src->buf + src->len*ft->base.size; e += ft->base.size){
317 1999 : if(pred(ft, e, data)){
318 19 : cr8r_vec_pushr(dest, ft, e);
319 : }
320 : }
321 : }else{
322 5613 : for(void *e = src->buf; e < src->buf + src->len*ft->base.size; e += ft->base.size){
323 5612 : if(pred(ft, e, data)){
324 4 : ft->copy(&ft->base, dest->buf + (dest->len++)*ft->base.size, e);
325 : }
326 : }
327 : }
328 3 : cr8r_vec_trim(dest, ft);
329 3 : return 1;
330 : }
331 :
332 2 : bool cr8r_vec_map(cr8r_vec *dest, const cr8r_vec *src, cr8r_vec_ft *src_ft, cr8r_vec_ft *dest_ft, cr8r_vec_mapper f, void *data){
333 2 : if(!cr8r_vec_init(dest, dest_ft, src->len)){
334 : return 0;
335 : }
336 12 : for(uint64_t i = 0; i < src->len; ++i){
337 11 : f(src_ft, dest_ft, dest->buf + i*dest_ft->base.size, src->buf + i*src_ft->base.size, data);
338 : }
339 1 : dest->len = src->len;
340 1 : return 1;
341 : }
342 :
343 5044 : int cr8r_vec_forEachPermutation(cr8r_vec *self, cr8r_vec_ft *ft, void (*f)(const cr8r_vec *self, const cr8r_vec_ft *ft, void *data), void *data){
344 5044 : uint64_t *c = calloc(self->len, sizeof(uint64_t));
345 5044 : if(!c){
346 : return 0;
347 : }
348 5044 : f(self, ft, data);
349 43663815 : for(uint64_t i = 0; i < self->len; ++i){
350 43658771 : if(c[i] < i){
351 25403756 : ft->swap(&ft->base, self->buf + (i&1 ? c[i] : 0)*ft->base.size, self->buf + i*ft->base.size);
352 25403756 : f(self, ft, data);
353 25403756 : c[i]++;
354 25403756 : i = 0;
355 : }else{
356 18255015 : c[i] = 0;
357 : }
358 : }
359 5044 : free(c);
360 5044 : return 1;
361 : }
362 :
363 2 : bool cr8r_vec_all(const cr8r_vec *self, const cr8r_vec_ft *ft, cr8r_vec_pred pred, void *data){
364 13 : for(void *e = self->buf; e < self->buf + self->len*ft->base.size; e += ft->base.size){
365 12 : if(!pred(ft, e, data)){
366 : return 0;
367 : }
368 : }
369 : return 1;
370 : }
371 :
372 2 : bool cr8r_vec_any(const cr8r_vec *self, const cr8r_vec_ft *ft, cr8r_vec_pred pred, void *data){
373 13 : for(void *e = self->buf; e < self->buf + self->len*ft->base.size; e += ft->base.size){
374 12 : if(pred(ft, e, data)){
375 : return 1;
376 : }
377 : }
378 : return 0;
379 : }
380 :
381 9973 : bool cr8r_vec_contains(const cr8r_vec *self, const cr8r_vec_ft *ft, const void *e){
382 9973 : return cr8r_vec_index(self, ft, e) != -1;
383 : }
384 :
385 10974 : int64_t cr8r_vec_index(const cr8r_vec *self, const cr8r_vec_ft *ft, const void *e){
386 50232121 : for(uint64_t i = 0; i < self->len; ++i){
387 50232120 : if(!ft->cmp(&ft->base, self->buf + i*ft->base.size, e)){
388 10973 : return i;
389 : }
390 : }
391 : return -1;
392 : }
393 :
394 3 : void *cr8r_vec_exm(cr8r_vec *self, const cr8r_vec_ft *ft, int ord){
395 3 : if(!self->len){
396 : return NULL;
397 : }
398 2 : void *exm = self->buf;
399 19946 : for(uint64_t i = 1; i < self->len; ++i){
400 19944 : void *e = self->buf + i*ft->base.size;
401 19944 : if(ord*ft->cmp(&ft->base, e, exm) < 0){
402 11 : exm = e;
403 : }
404 : }
405 : return exm;
406 : }
407 :
408 2 : void cr8r_vec_reverse(cr8r_vec *self, cr8r_vec_ft *ft){
409 2 : if(self->len < 2){
410 : return;
411 : }
412 5001 : for(uint64_t i = 0, j = self->len - 1; i < j; ++i, --j){
413 5000 : ft->swap(&ft->base, self->buf + i*ft->base.size, self->buf + j*ft->base.size);
414 : }
415 : }
416 :
417 : CR8R_ATTR_NO_SAN("unsigned-integer-overflow")
418 3 : bool cr8r_vec_reversed(cr8r_vec *dest, const cr8r_vec *src, cr8r_vec_ft *ft){
419 3 : if(!cr8r_vec_init(dest, ft, src->len)){
420 : return 0;
421 : }
422 2 : if(ft->copy){
423 3 : for(uint64_t i = 0, j = src->len - 1; i < src->len; ++i, --j){
424 2 : ft->copy(&ft->base, dest->buf + i*ft->base.size, src->buf + j*ft->base.size);
425 : }
426 10001 : }else for(uint64_t i = 0, j = src->len - 1; i < src->len; ++i, --j){
427 10000 : memcpy(dest->buf + i*ft->base.size, src->buf + j*ft->base.size, ft->base.size);
428 : }
429 2 : dest->len = src->len;
430 2 : return 1;
431 : }
432 :
433 3 : void *cr8r_vec_foldr(const cr8r_vec *self, const cr8r_vec_ft *ft, cr8r_vec_accumulator f, void *init){
434 29919 : for(void *e = self->buf; e < self->buf + self->len*ft->base.size; e += ft->base.size){
435 29916 : init = f(ft, init, e);
436 : }
437 3 : return init;
438 : }
439 :
440 2 : void *cr8r_vec_foldl(const cr8r_vec *self, const cr8r_vec_ft *ft, cr8r_vec_accumulator f, void *init){
441 19946 : for(void *e = self->buf + (self->len - 1)*ft->base.size; e >= self->buf; e -= ft->base.size){
442 19944 : init = f(ft, init, e);
443 : }
444 2 : return init;
445 : }
446 :
447 20113 : void cr8r_vec_sort(cr8r_vec *self, cr8r_vec_ft *ft){
448 : // TODO: call nontrivial ft->swap
449 20113 : cr8r_heap_ify(self, ft, 1);
450 20113 : uint64_t len = self->len;
451 20113 : char buf[ft->base.size];
452 11797383 : while(self->len > 1){
453 11777270 : cr8r_heap_pop(self, ft, buf, 1);
454 11777270 : memcpy(self->buf + self->len*ft->base.size, buf, ft->base.size);
455 : }
456 20113 : self->len = len;
457 20113 : }
458 :
459 2 : bool cr8r_vec_sorted(cr8r_vec *dest, const cr8r_vec *src, cr8r_vec_ft *ft){
460 2 : if(!cr8r_vec_copy(dest, src, ft)){
461 : return 0;
462 : }
463 1 : cr8r_vec_sort(dest, ft);
464 1 : return 1;
465 : }
466 :
467 9973 : bool cr8r_vec_containss(const cr8r_vec *self, const cr8r_vec_ft *ft, const void *e){
468 9973 : return cr8r_vec_indexs(self, ft, e) != -1;
469 : }
470 :
471 9977 : int64_t cr8r_vec_indexs(const cr8r_vec *self, const cr8r_vec_ft *ft, const void *e){
472 9977 : uint64_t a = 0, b = self->len;
473 123283 : while(a < b){
474 123282 : uint64_t i = (a + b) >> 1;
475 123282 : int ord = ft->cmp(&ft->base, e, self->buf + i*ft->base.size);
476 123282 : if(!ord){
477 9976 : return i;
478 113306 : }else if(ord < 0){
479 : b = i;
480 54456 : }else if(ord > 0){
481 54456 : a = i + 1;
482 : }
483 : }
484 : return -1;
485 : }
486 :
487 14334 : int64_t cr8r_vec_first_gts(const cr8r_vec *self, const cr8r_vec_ft *ft, const void *e){
488 14334 : uint64_t a = 0, b = self->len;
489 : // rightmost binary search
490 62256 : while(a < b){
491 47922 : uint64_t i = (a + b) >> 1;
492 47922 : int ord = ft->cmp(&ft->base, e, self->buf + i*ft->base.size);
493 47922 : if(ord < 0){
494 : b = i;
495 : }else{
496 24446 : a = i + 1;
497 : }
498 : }// Now, 1) b == 0, 2) self[b-1] == e, or 3) there are self->len - b elements of self > e
499 14334 : return b == self->len ? -1 : (int64_t)b;
500 : }
501 :
502 7167 : int64_t cr8r_vec_first_ges(const cr8r_vec *self, const cr8r_vec_ft *ft, const void *e){
503 7167 : int64_t i = cr8r_vec_last_lts(self, ft, e);
504 7167 : return i + 1 == (int64_t)self->len ? -1 : i + 1;
505 : }
506 :
507 14334 : int64_t cr8r_vec_last_lts(const cr8r_vec *self, const cr8r_vec_ft *ft, const void *e){
508 14334 : uint64_t a = 0, b = self->len;
509 : // leftmost binary search
510 64164 : while(a < b){
511 49830 : uint64_t i = (a + b) >> 1;
512 49830 : int ord = ft->cmp(&ft->base, e, self->buf + i*ft->base.size);
513 49830 : if(ord > 0){
514 18440 : a = i + 1;
515 : }else{
516 : b = i;
517 : }
518 : }// Now, 1) a == self->len, 2) self[a] == e, or 3) there are a elements of self < e
519 14334 : return (int64_t)a - 1;
520 : }
521 :
522 : CR8R_ATTR_NO_SAN("unsigned-integer-overflow")
523 7167 : int64_t cr8r_vec_last_les(const cr8r_vec *self, const cr8r_vec_ft *ft, const void *e){
524 7167 : int64_t i = cr8r_vec_first_gts(self, ft, e);
525 7167 : if(i < 0){
526 2047 : return self->len - 1;
527 : }
528 5120 : return i - 1;
529 : }
530 :
531 1002 : int cr8r_vec_cmp(const cr8r_vec *a, const cr8r_vec *b, const cr8r_vec_ft *ft){
532 10021002 : for(uint64_t i = 0;; ++i){
533 10021002 : if(i >= b->len){
534 1002 : return i < a->len;
535 10020000 : }else if(i >= a->len){
536 : return -1;
537 : }
538 10020000 : int ord = ft->cmp(&ft->base, a->buf + i*ft->base.size, b->buf + i*ft->base.size);
539 10020000 : if(ord){
540 : return ord;
541 : }
542 : }
543 : }
544 :
545 697869 : static inline void cr8r_vec_sort_i(cr8r_vec *self, cr8r_vec_ft *ft, uint64_t a, uint64_t b){
546 697869 : void *start = self->buf + a*ft->base.size;
547 697869 : void *end = self->buf + b*ft->base.size;
548 3485349 : for(void *it = start + ft->base.size; it < end; it += ft->base.size){
549 6212300 : for(void *jt = it; jt > start && ft->cmp(&ft->base, jt - ft->base.size, jt) > 0; jt -= ft->base.size){
550 3424820 : ft->swap(&ft->base, jt - ft->base.size, jt);
551 : }
552 : }
553 697869 : }
554 :
555 12594 : void *cr8r_vec_pivot_mm(cr8r_vec *self, cr8r_vec_ft *ft, uint64_t a, uint64_t b){
556 12594 : if(a >= b || b > self->len){
557 0 : return NULL;
558 : }
559 : // Selecting a pivot by median of medians is mutually recursive with quickselect itself.
560 : // First we split the vector into chunks of 5 and find the median of each chunk.
561 : // As we do this, we swap the median to the front of the array so at the end
562 : // the first 20% or so of the array is composed of the medians of the chunks.
563 : // Then we find the median of this portion of the array with quickselect.
564 : // This is guaranteed to be >= about 3/10 of the array and <= about 3/10 as well.
565 : uint64_t j = a;
566 713138 : for(uint64_t i = a; i < b; i += 5){
567 700544 : uint64_t curr_len = b - i < 5 ? b - i : 5;
568 700544 : if(curr_len < 3){
569 2675 : ft->swap(&ft->base, self->buf + (j++)*ft->base.size, self->buf + i*ft->base.size);
570 : }else{
571 697869 : cr8r_vec_sort_i(self, ft, i, i + curr_len);
572 697869 : ft->swap(&ft->base, self->buf + (j++)*ft->base.size, self->buf + (i + curr_len/2)*ft->base.size);
573 : }
574 : }
575 12594 : return cr8r_vec_ith(self, ft, a, a + (b - a + 4)/5, (b - a + 4)/10);
576 : }
577 :
578 1300 : void *cr8r_vec_pivot_m3(const cr8r_vec *self, cr8r_vec_ft *ft, uint64_t a, uint64_t b){
579 1300 : if(a >= b || b > self->len){
580 200 : return NULL;
581 : }
582 1100 : void *fst = self->buf + a*ft->base.size;
583 1100 : if(b - a < 3){
584 : return fst;
585 : }
586 1000 : void *lst = self->buf + (b - 1)*ft->base.size;
587 1000 : void *mid = self->buf + (a + b - 1)/2*ft->base.size;
588 1000 : int ord = ft->cmp(&ft->base, fst, mid);
589 1000 : int sorted;
590 1000 : if(!ord){// if *fst == *mid, then both are valid pivots regardless of *lst
591 : return mid;
592 1000 : }else if((sorted = ord*ft->cmp(&ft->base, mid, lst))){// multiplying the ord of *mid and *lst by the ord of *fst and *mid lets us easily detect if the sequence is monotonic
593 1000 : if(sorted > 0){// in this case, either *fst < *mid < *lst OR *fst > *mid > *lst
594 : return mid;
595 676 : }else if(ft->cmp(&ft->base, lst, fst)*ord >= 0){// if sorted < 0, *mid is either the max or min, determined by ord (the comparison between *fst and *mid)
596 : return fst;// if the ord of *fst and *mid is the same as the ord of *lst and *fst, then either *lst < *fst < *mid or *lst > *fst > *mid. Alternatively, if *lst == *fst, ten both are valid pivots
597 : }else{// otherwise, *fst is either the max or min, but *mid is also either the max or min, and since none of the three are equal in this case, we can conclude tht *lst is the median
598 341 : return lst;
599 : }
600 : }else{// if *fst != *mid but *mid == *lst, then both mid and lst are valid pivots regardless of *fst
601 : return mid;
602 : }
603 : }
604 :
605 12594 : void *cr8r_vec_partition(cr8r_vec *self, cr8r_vec_ft *ft, uint64_t a, uint64_t b, void *piv){
606 12594 : if(a >= b || b > self->len){
607 0 : return NULL;
608 : }
609 12594 : void *fst = self->buf + a*ft->base.size;
610 12594 : void *lst = self->buf + (b - 1)*ft->base.size;
611 12594 : if(piv < fst || piv > lst){
612 : return NULL;
613 : }
614 12594 : if(piv != lst){
615 12594 : ft->swap(&ft->base, piv, lst);
616 : }
617 : // now, lst points to the pivot
618 12594 : void *it = fst - ft->base.size;
619 12594 : void *jt = lst;
620 : // initially, *it is one before the start of the subrange, so that after
621 : // adding 1 to it, all elements preceding it (ie no elements) will be
622 : // known to be < pivot.
623 : // on the other hand, *jt points to the pivot so all elements beginning
624 : // at *jt are trivially >= the pivot (since *jt points to the last element
625 : // initially and the last element holds the pivot)
626 791986 : while(1){
627 : // now, all elements up to *it are < pivot, and all elements starting at *jt are >= pivot
628 1753564 : do{
629 1753564 : it += ft->base.size;
630 1753564 : }while(it < jt && ft->cmp(&ft->base, it, lst) < 0);
631 : // now, *it >= pivot but the elements preceding *it are < pivot
632 804580 : if(it == jt){// in this case, the partitions [a, it) and [jt, b) are complete
633 : break;
634 : }
635 1735783 : do{
636 : // currently, *jt and subsequent elements are all >= pivot
637 1735783 : jt -= ft->base.size;
638 1735783 : }while(it < jt && ft->cmp(&ft->base, jt, lst) >= 0);
639 : // now, EITHER it == jt so *jt >= pivot but the partitions [a, it) and [jt, b) are complete,
640 : // OR it < jt BUT *jt < pivot but the elements following *jt are >= pivot
641 798420 : if(it == jt){// *jt >= pivot and the partitions [a, it) and [jt, b) are complete
642 : break;
643 : }// OTHERWISE, *it >= pivot and *jt < pivot, so we swap them
644 791986 : ft->swap(&ft->base, it, jt);
645 : // now, all elements up to *it are < pivot, and all elements starting at *jt are >= pivot
646 : }
647 : // *jt and subsequent elements are >= pivot, so partitioning
648 : // is complete and *jt is the first element >= pivot
649 12594 : if(jt != lst){
650 : // we need to ensure the pivot is placed at the beginning of the
651 : // >= pivot region, so we have a (possibly empty) < pivot region,
652 : // the pivot, and a (possibly empty) >= pivot region
653 12593 : ft->swap(&ft->base, jt, lst);
654 : }
655 : #ifdef DEBUG
656 12594 : if(!cr8r_vec_check_partition(self, ft, a, b, jt)){
657 0 : __builtin_trap();
658 : }
659 : #endif
660 : return jt;
661 : }
662 :
663 156277 : static inline void sort_end_tail(cr8r_vec_ft *ft, int ord, void *start, void *it){
664 617563 : for(void *jt = it; jt > start && ord*ft->cmp(&ft->base, jt - ft->base.size, jt) > 0; jt -= ft->base.size){
665 461286 : ft->swap(&ft->base, jt - ft->base.size, jt);
666 : }
667 156277 : }
668 :
669 : // Partially sort a subrange of the given vector to find an element near the max or min.
670 : // If i < 0, find the (b + i)th element by partially sorting the -i largest elements
671 : // descending in the beginning of the subrange and returning a pointer to the -i largest.
672 : // If i >= 0, find the ith element by partially sorting the i+1 smallest elements ascending
673 : // in the beginning of the subrange and returning a pointer to the i+1 smallest.
674 17732 : static inline void *sort_end(cr8r_vec *self, cr8r_vec_ft *ft, uint64_t a, uint64_t b, int64_t i){
675 17732 : int ord = 1;
676 17732 : if(i < 0){
677 2346 : ord = -1;
678 2346 : i = -i - 1;
679 : }
680 17732 : void *start = self->buf + a*ft->base.size;
681 17732 : void *res = start + i*ft->base.size;
682 17732 : void *end = self->buf + b*ft->base.size;
683 17732 : if(res >= end){
684 : return NULL;// error
685 : }
686 : // initially, [a, a + 1) = [start, start + 1) is sorted
687 90230 : for(void *it = start + ft->base.size; it <= res; it += ft->base.size){
688 72498 : sort_end_tail(ft, ord, start, it);
689 : }
690 : // now [a, res + 1) is sorted
691 421004 : for(void *it = res + ft->base.size; it < end; it += ft->base.size){
692 403272 : if(ord*ft->cmp(&ft->base, res, it) <= 0){
693 : // [a, res + 1) is sorted and res is <= [res + 1, it + 1)
694 319493 : continue;
695 : }
696 83779 : ft->swap(&ft->base, res, it);
697 : // again [a, res + 1) is sorted and res is <= [res + 1, it + 1)
698 83779 : sort_end_tail(ft, ord, start, res);
699 : }
700 : return res;
701 : }
702 :
703 18610 : void *cr8r_vec_ith(cr8r_vec *self, cr8r_vec_ft *ft, uint64_t a, uint64_t b, uint64_t i){
704 : #ifdef DEBUG
705 18610 : cr8r_vec sorted;
706 18610 : if(!cr8r_vec_sub(&sorted, self, ft, a, b)){
707 : return NULL;
708 : }
709 18610 : cr8r_vec_sort(&sorted, ft);
710 18610 : void *sorted_res = sorted.buf + i*ft->base.size;
711 : #endif
712 29326 : void *res;
713 29326 : while(1){
714 : //fprintf(stderr, "cr8r_vec_ith(self, ft, %"PRIu64", %"PRIu64", %"PRIu64")\n", a, b, i);
715 29326 : if(i >= b - a){
716 : return NULL;
717 29326 : }else if(i < CR8R_VEC_ISORT_BOUND){
718 15386 : res = sort_end(self, ft, a, b, i);
719 15386 : break;
720 13940 : }else if(b - i <= CR8R_VEC_ISORT_BOUND){
721 2346 : res = sort_end(self, ft, a, b, (int64_t)i - (int64_t)b);
722 2346 : break;
723 : }
724 11594 : void *piv = cr8r_vec_pivot_mm(self, ft, a, b);
725 11594 : piv = cr8r_vec_partition(self, ft, a, b, piv);
726 11594 : if(!piv){
727 : return NULL;
728 : }
729 11594 : uint64_t j = (piv - self->buf)/ft->base.size - a;
730 11594 : if(i < j){
731 : b = a + j;
732 6694 : }else if(i > j){
733 5816 : i -= j + 1;
734 5816 : a += j + 1;
735 : }else{
736 : res = piv;
737 : break;
738 : }
739 : }
740 : #ifdef DEBUG
741 18610 : if(ft->cmp(&ft->base, sorted_res, res)){
742 0 : __builtin_trap();
743 : }
744 18610 : cr8r_vec_delete(&sorted, ft);
745 : #endif
746 18610 : return res;
747 : }
748 :
749 : typedef struct{
750 : uint64_t lb; // elements < the median occur in the range [a, lb)
751 : uint64_t ea;
752 : uint64_t eb; // elements == the median occur in the range [ea, eb)
753 : uint64_t ha; // elements > the median occur in the range [ha, b)
754 : } pwm_state;
755 :
756 : // Advance lb past any elements < med, since they belong in the range [a, lb)
757 : // When elements == med are encountered and there is room on the left of [ea, eb),
758 : // we swap it there and continue.
759 : // Finally, when an element > med is encountered or lb bumps into ea, we break,
760 : // unless ha has already bumped into eb, in which case we just shift over the == region
761 : // and keep going
762 7486 : static inline void pwm_advance_le(cr8r_vec *self, cr8r_vec_ft *ft, pwm_state *st, bool is_ge_done){
763 7486 : char tmp[ft->base.size];
764 27654 : while(st->lb < st->ea){
765 20245 : int ord = ft->cmp(&ft->base, self->buf + st->lb*ft->base.size, self->buf + st->ea*ft->base.size);
766 20245 : if(ord < 0){
767 20168 : ++st->lb;
768 77 : }else if(!ord){
769 23 : if(st->lb == --st->ea){
770 : break;
771 : }
772 0 : ft->swap(&ft->base, self->buf + st->lb*ft->base.size, self->buf + st->ea*ft->base.size);
773 : // otherwise lb is > the median so it must go on the right
774 54 : }else if(is_ge_done){// eb == ha so we must shift an == element over
775 0 : --st->eb;
776 0 : --st->ha;
777 0 : if(st->lb == --st->ea){
778 0 : ft->swap(&ft->base, self->buf + st->ea*ft->base.size, self->buf + st->ha*ft->base.size);
779 0 : break;
780 : }
781 : // triple swap lb, ea, and ha
782 0 : memcpy(tmp, self->buf + st->lb*ft->base.size, ft->base.size);
783 0 : memcpy(self->buf + st->lb*ft->base.size, self->buf + st->ea*ft->base.size, ft->base.size);
784 0 : memcpy(self->buf + st->ea*ft->base.size, self->buf + st->ha*ft->base.size, ft->base.size);
785 0 : memcpy(self->buf + st->ha*ft->base.size, tmp, ft->base.size);
786 : }else{ // eb < ha so we stop and see if continuing to search the elements in [eb, ha) contain any < med
787 : break;
788 : }
789 : }
790 7486 : }
791 :
792 : // Advance ha (backwards) past any elements > med, since they belong in the range [ha, b)
793 : // When elements == med are encountered and there is room on the right of [ea, eb),
794 : // we swap in there and continue.
795 : // Finally, when an element < med is encountered or ha bumps into eb, we break,
796 : // unless lb has already bumped int ea, in which case we just shift over the == region
797 : // and keep going
798 7486 : static inline void pwm_advance_ge(cr8r_vec *self, cr8r_vec_ft *ft, pwm_state *st, bool is_le_done){
799 7486 : char tmp[ft->base.size];
800 27117 : while(st->eb < st->ha){
801 19690 : int ord = ft->cmp(&ft->base, self->buf + st->ea*ft->base.size, self->buf + (st->ha - 1)*ft->base.size);
802 19690 : if(ord < 0){
803 19612 : --st->ha;
804 78 : }else if(!ord){
805 24 : if(++st->eb == st->ha){
806 : break;
807 : }
808 19 : ft->swap(&ft->base, self->buf + (st->eb - 1)*ft->base.size, self->buf + (st->ha - 1)*ft->base.size);
809 : // otherwise ha is < the median so it must go on the left
810 54 : }else if(is_le_done){ // lb == ea so we must shift an == element over
811 0 : ++st->lb;
812 0 : ++st->ea;
813 0 : if(++st->eb == st->ha){
814 0 : ft->swap(&ft->base, self->buf + (st->ea - 1)*ft->base.size, self->buf + (st->ha - 1)*ft->base.size);
815 0 : break;
816 : }
817 : // triple swap lb - 1, ha - 1, and eb -1
818 0 : memcpy(tmp, self->buf + (st->lb - 1)*ft->base.size, ft->base.size);
819 0 : memcpy(self->buf + (st->lb - 1)*ft->base.size, self->buf + (st->ha - 1)*ft->base.size, ft->base.size);
820 0 : memcpy(self->buf + (st->ha - 1)*ft->base.size, self->buf + (st->eb - 1)*ft->base.size, ft->base.size);
821 0 : memcpy(self->buf + (st->eb - 1)*ft->base.size, tmp, ft->base.size);
822 : }else{ // lb < ea so we stop and see if continuing to search the elements in [lb, ea) contain any < med
823 : break;
824 : }
825 : }
826 7486 : }
827 :
828 5000 : void *cr8r_vec_partition_with_median(cr8r_vec *self, cr8r_vec_ft *ft, uint64_t a, uint64_t b, void *med){
829 5000 : pwm_state st = {};
830 5000 : uint64_t med_idx = (med - self->buf)/ft->base.size;
831 5000 : if(a >= b || b > self->len || med_idx < a || med_idx >= b){
832 0 : return NULL;
833 : }
834 : // We need to partition our array but ensure the median element is placed in the center
835 : // of the array. This requires a partitioning algorithm that can handle equal elements to
836 : // the median, since the basic partitioning algorithm could have it out of place by up to
837 : // as many equal elements as there are
838 5000 : st.lb = a; // the elements < the median occur in the range [a, lb)
839 5000 : st.ea = (b + a)/2;
840 5000 : st.eb = st.ea + 1; // the elements == the median occur in the range [ea, eb)
841 5000 : st.ha = b; // the elements > the median occur in the range [ha, b)
842 5000 : if(med_idx != st.ea){
843 8 : ft->swap(&ft->base, med, self->buf + st.ea*ft->base.size); // ensure the median is placed at the center
844 : }
845 : // now we will extend [a, lb), [ha, b), and [ea, eb) until they encompass the entire array
846 : // TODO: consider alternating which side we place == elements on
847 5054 : while(st.lb < st.ea && st.eb < st.ha){
848 : // first we skip over any elements on the left < the median, and swap any == to the
849 : // central region
850 2486 : pwm_advance_le(self, ft, &st, false);
851 : // next we skip over any elements on the right > the median, and swap any == to the
852 : // central region
853 2486 : pwm_advance_ge(self, ft, &st, false);
854 : // now there are two possibilities: one of [a, lb) or [ha, b) bumped into [ea, eb),
855 : // or we have a pair of mismatched elements at lb and ha-1 which we can swap.
856 2486 : if(st.lb == st.ea || st.eb == st.ha){
857 : break;
858 : }
859 54 : --st.ha;
860 54 : ft->swap(&ft->base, self->buf + st.lb*ft->base.size, self->buf + st.ha*ft->base.size);
861 54 : ++st.lb;
862 : }
863 : // now either eb == ha or lb == ea, which will cause pwm_finish_lt and/or pwm_finish_gt to
864 : // become no-ops, respectively
865 5000 : pwm_advance_le(self, ft, &st, true);
866 5000 : pwm_advance_ge(self, ft, &st, true);
867 : #ifdef DEBUG
868 5000 : if(st.lb != st.ea || st.eb != st.ha || !cr8r_vec_check_pwm(ft, self, a, st.lb, st.ha, b, self->buf + st.ea*ft->base.size)){
869 0 : __builtin_trap();
870 : }
871 : #endif
872 : return med;
873 : }
874 :
875 12436 : uint64_t cr8r_powmod(uint64_t b, uint64_t e, uint64_t n){
876 12436 : uint64_t res = 1;
877 60326 : while(e){
878 47890 : if(e&1){
879 25297 : res = (unsigned __int128)res * (unsigned __int128)b % (unsigned __int128)n;
880 : }
881 47890 : b = (unsigned __int128)b * (unsigned __int128)b % (unsigned __int128)n;
882 47890 : e >>= 1;
883 : }
884 12436 : return res;
885 : }
886 :
887 487 : uint64_t cr8r_pow_u64(uint64_t b, uint64_t e){
888 487 : uint64_t res = 1;
889 30226 : while(e){
890 29739 : if(e&1){
891 15131 : res *= b;
892 : }
893 29739 : b *= b;
894 29739 : e >>= 1;
895 : }
896 487 : return res;
897 : }
898 :
899 39888 : void *cr8r_default_acc_sum_u64(const cr8r_vec_ft *ft, void *_acc, const void *e){
900 39888 : return (void*)((uint64_t)_acc + *(const uint64_t*)e);
901 : }
902 :
903 9972 : void *cr8r_default_acc_sumpowmod_u64(const cr8r_vec_ft *ft, void *_acc, const void *e){
904 9972 : uint64_t *acc = _acc;
905 9972 : acc[0] = (acc[0] + cr8r_powmod(*(const uint64_t*)e, acc[1], acc[2]))%acc[2];
906 9972 : return acc;
907 : }
908 :
909 : cr8r_vec_ft cr8r_vecft_u64 = {
910 : .base = {
911 : .data = NULL,
912 : .size = sizeof(uint64_t)
913 : },
914 : .new_size = cr8r_default_new_size,
915 : .resize = cr8r_default_resize,
916 : .del = NULL,
917 : .copy = NULL,
918 : .cmp = cr8r_default_cmp_u64,
919 : .swap = cr8r_default_swap
920 : };
921 :
922 : cr8r_vec_ft cr8r_vecft_cstr = {
923 : .base = {
924 : .data = NULL,
925 : .size = sizeof(char*)
926 : },
927 : .new_size = cr8r_default_new_size,
928 : .resize = cr8r_default_resize,
929 : .del = cr8r_default_free,
930 : .copy = cr8r_default_copy_cstr,
931 : .cmp = cr8r_default_cmp_cstr,
932 : .swap = cr8r_default_swap
933 : };
934 :
935 : cr8r_vec_ft cr8r_vecft_u8 = {
936 : .base = {
937 : .data = NULL,
938 : .size = sizeof(uint8_t)
939 : },
940 : .new_size = cr8r_default_new_size,
941 : .resize = cr8r_default_resize,
942 : .del = NULL,
943 : .copy = NULL,
944 : .cmp = cr8r_default_cmp_u8,
945 : .swap = cr8r_default_swap
946 : };
947 :
|