Line data Source code
1 : #include <stddef.h>
2 : #include <inttypes.h>
3 : #include <stdint.h>
4 : #include <string.h>
5 :
6 : #include <nut/modular_math.h>
7 : #include <nut/factorization.h>
8 : #include <nut/debug.h>
9 :
10 11405 : nut_Factors *nut_make_Factors_w(uint64_t max_primes){
11 11405 : static const nut_Factors dummy;
12 11405 : return calloc(1, sizeof(nut_Factors) + sizeof(dummy.factors[0])*max_primes);
13 : }
14 :
15 2 : nut_Factors *nut_make_Factors_ub(uint64_t n, uint64_t num_primes, const uint64_t primes[static num_primes]){
16 2 : uint64_t p = 1, w = 0;
17 11 : for(uint64_t i = 0; i < num_primes; ++i){
18 11 : p *= primes[i];
19 11 : if(p > n){
20 2 : return nut_make_Factors_w(w);
21 : }
22 9 : ++w;
23 : }
24 : return NULL;
25 : }
26 :
27 10000 : nut_Factors *nut_Factors_copy(const nut_Factors *factors){
28 10000 : nut_Factors *ret = nut_make_Factors_w(factors->num_primes);
29 10000 : if(ret){
30 10000 : memcpy(ret, factors, offsetof(nut_Factors, factors) + factors->num_primes*sizeof(factors->factors[0]));
31 : }
32 10000 : return ret;
33 : }
34 :
35 3006500 : uint64_t nut_Factors_prod(const nut_Factors *factors){
36 3006500 : uint64_t r = 1;
37 11930391 : for(uint64_t i = 0; i < factors->num_primes; ++i){
38 8923891 : r *= nut_u64_pow(factors->factors[i].prime, factors->factors[i].power);
39 : }
40 3006500 : return r;
41 : }
42 :
43 1000000 : uint64_t nut_Factor_divcount(const nut_Factors *factors){
44 1000000 : uint64_t s = 1;
45 3853708 : for(uint64_t i = 0; i < factors->num_primes; ++i){
46 2853708 : s *= factors->factors[i].power + 1;
47 : }
48 1000000 : return s;
49 : }
50 :
51 1000000 : uint64_t nut_Factor_divsum(const nut_Factors *factors){
52 1000000 : uint64_t s = 1;
53 3853708 : for(uint64_t i = 0; i < factors->num_primes; ++i){
54 2853708 : uint64_t p = factors->factors[i].prime;
55 2853708 : uint64_t a = factors->factors[i].power;
56 2853708 : s *= (nut_u64_pow(p, a + 1) - 1)/(p - 1);
57 : }
58 1000000 : return s;
59 : }
60 :
61 1000000 : uint64_t nut_Factor_divpowsum(const nut_Factors *factors, uint64_t power){
62 1000000 : if(power == 0){
63 0 : return nut_Factor_divcount(factors);
64 1000000 : }else if(power == 1){
65 0 : return nut_Factor_divsum(factors);
66 : }
67 : uint64_t s = 1;
68 3853708 : for(uint64_t i = 0; i < factors->num_primes; ++i){
69 2853708 : uint64_t p = factors->factors[i].prime;
70 2853708 : uint64_t a = factors->factors[i].power;
71 2853708 : s *= (nut_u64_pow(p, (a + 1)*power) - 1)/(nut_u64_pow(p, power) - 1);
72 : }
73 : return s;
74 : }
75 :
76 2000000 : uint64_t nut_Factor_divtupcount(const nut_Factors *factors, uint64_t k, uint64_t modulus){
77 2000000 : if(k == 0){
78 0 : return !factors->num_primes;
79 2000000 : }else if(k == 1){
80 : return 1;
81 : }
82 2000000 : uint64_t res = 1;
83 2000000 : if(k == 2){
84 0 : res = nut_Factor_divcount(factors)%modulus;
85 0 : return modulus ? res % modulus : res;
86 : }
87 2000000 : if(modulus){
88 3853708 : for(uint64_t i = 0; i < factors->num_primes; ++i){
89 2853708 : res = res*nut_u64_binom(factors->factors[i].power + k - 1, k - 1)%modulus;
90 : }
91 : }else{
92 3853708 : for(uint64_t i = 0; i < factors->num_primes; ++i){
93 2853708 : res *= nut_u64_binom(factors->factors[i].power + k - 1, k - 1);
94 : }
95 : }
96 : return res;
97 : }
98 :
99 0 : uint64_t nut_Factor_circle_latte(const nut_Factors *factors){
100 0 : uint64_t c1c1_d0 = 1;
101 0 : uint64_t c3c1_d0 = 1;
102 0 : uint64_t c3c3_d0 = 0;
103 0 : for(uint64_t i = 0; i < factors->num_primes; ++i){
104 0 : uint64_t p = factors->factors[i].prime;
105 0 : uint64_t a = factors->factors[i].power;
106 0 : if((p&3) == 1){
107 0 : c1c1_d0 *= a + 1;
108 0 : }else if((p&3) == 3){
109 0 : uint64_t tmp = c3c1_d0*(a/2 + 1) + c3c3_d0*(a - a/2);
110 0 : c3c3_d0 = c3c1_d0*(a - a/2) + c3c3_d0*(a/2 + 1);
111 0 : c3c1_d0 = tmp;
112 : }
113 : }
114 0 : return 4*c1c1_d0*(c3c1_d0 - c3c3_d0);
115 : }
116 :
117 500 : void nut_Factor_ipow(nut_Factors *factors, uint64_t power){
118 2147 : for(uint64_t i = 0; i < factors->num_primes; ++i){
119 1647 : factors->factors[i].power *= power;
120 : }
121 500 : }
122 :
123 1000000 : uint64_t nut_Factor_phi(const nut_Factors *factors){
124 1000000 : uint64_t s = 1;
125 3853708 : for(uint64_t i = 0; i < factors->num_primes; ++i){
126 2853708 : uint64_t p = factors->factors[i].prime;
127 2853708 : uint64_t a = factors->factors[i].power;
128 2853708 : if(a){
129 2853708 : s *= (nut_u64_pow(p, a - 1))*(p - 1);
130 : }
131 : }
132 1000000 : return s;
133 : }
134 :
135 1000001 : uint64_t nut_Factor_carmichael(const nut_Factors *factors){
136 1000001 : uint64_t s = 1;
137 1000001 : if(!factors->num_primes){
138 : return s;
139 : }
140 1000000 : uint64_t p = factors->factors[0].prime;
141 1000000 : uint64_t a = factors->factors[0].power;
142 1000000 : if(p == 2){
143 500000 : if(a >= 3){
144 125000 : s = 1ull << (a - 2);
145 : }else{
146 375000 : s = 1ull << (a - 1);
147 : }
148 : }else{
149 500000 : s = nut_u64_pow(p, a - 1)*(p - 1);
150 : }
151 2853709 : for(uint64_t i = 1; i < factors->num_primes; ++i){
152 1853709 : p = factors->factors[i].prime;
153 1853709 : a = factors->factors[i].power;
154 1853709 : uint64_t phi_pk = (nut_u64_pow(p, a - 1))*(p - 1);
155 1853709 : s = nut_i64_lcm(s, phi_pk);
156 : }
157 : return s;
158 : }
159 :
160 3999 : uint64_t nut_u64_order_mod(uint64_t a, uint64_t n, uint64_t cn, nut_Factors *cn_factors){
161 16432 : for(uint64_t i = 0; i < cn_factors->num_primes; ++i){
162 16033 : while(cn_factors->factors[i].power){
163 14231 : cn /= cn_factors->factors[i].prime;
164 14231 : if(nut_u64_powmod(a, cn, n) != 1){
165 10631 : cn *= cn_factors->factors[i].prime;
166 10631 : break;
167 : }
168 3600 : cn_factors->factors[i].power--;
169 : }
170 : }
171 3999 : return cn;
172 : }
173 :
174 4000 : int nut_Factor_forall_divs_tmptmp(const nut_Factors *restrict factors, int (*f)(const nut_Factors*, uint64_t, void*), void *restrict data){
175 8000 : nut_Factors *dfactors [[gnu::cleanup(cleanup_free)]] = nut_Factors_copy(factors);
176 8000 : nut_Factors *pfactors [[gnu::cleanup(cleanup_free)]] = nut_Factors_copy(factors);
177 4000 : return dfactors && pfactors && nut_Factor_forall_divs(factors, f, data, dfactors, pfactors);
178 : }
179 :
180 4000 : int nut_Factor_forall_divs(const nut_Factors *restrict factors, int (*f)(const nut_Factors*, uint64_t, void*), void *restrict data, nut_Factors *restrict dfactors, nut_Factors *restrict pfactors){
181 4000 : dfactors->num_primes = factors->num_primes;
182 16436 : for(uint64_t i = 0; i < factors->num_primes; ++i){
183 12436 : dfactors->factors[i].prime = factors->factors[i].prime;
184 12436 : dfactors->factors[i].power = 0;
185 12436 : pfactors->factors[i].prime = 1;
186 : }
187 : uint64_t d = 1;
188 66706 : while(1){
189 66706 : if(f(dfactors, d, data)){
190 : return 1;
191 : }
192 : uint64_t i;
193 105247 : for(i = 0; i < factors->num_primes; ++i){
194 101247 : if(dfactors->factors[i].power < factors->factors[i].power){
195 62706 : ++dfactors->factors[i].power;
196 62706 : pfactors->factors[i].prime *= dfactors->factors[i].prime;
197 62706 : d *= dfactors->factors[i].prime;
198 62706 : break;
199 : }
200 38541 : dfactors->factors[i].power = 0;
201 38541 : d /= pfactors->factors[i].prime;
202 38541 : pfactors->factors[i].prime = 1;
203 : }
204 66706 : if(i == factors->num_primes){
205 : return 0;
206 : }
207 : }
208 : }
209 :
210 1000 : int nut_Factor_forall_divs_le_tmptmp(const nut_Factors *restrict factors, uint64_t d_max, int (*f)(const nut_Factors*, uint64_t, void*), void *restrict data){
211 2000 : nut_Factors *dfactors [[gnu::cleanup(cleanup_free)]] = nut_Factors_copy(factors);
212 2000 : nut_Factors *pfactors [[gnu::cleanup(cleanup_free)]] = nut_Factors_copy(factors);
213 1000 : return dfactors && pfactors && nut_Factor_forall_divs_le(factors, d_max, f, data, dfactors, pfactors);
214 : }
215 :
216 1000 : int nut_Factor_forall_divs_le(const nut_Factors *restrict factors, uint64_t d_max, int (*f)(const nut_Factors*, uint64_t, void*), void *restrict data, nut_Factors *restrict dfactors, nut_Factors *restrict pfactors){
217 1000 : dfactors->num_primes = factors->num_primes;
218 3440 : for(uint64_t i = 0; i < factors->num_primes; ++i){
219 2440 : dfactors->factors[i].prime = factors->factors[i].prime;
220 2440 : dfactors->factors[i].power = 0;
221 2440 : pfactors->factors[i].prime = 1;
222 : }
223 1000 : uint64_t d = 1;
224 1000 : if(d_max <= 1){
225 0 : return d_max && f(dfactors, d, data);
226 : }
227 7862 : while(1){
228 7862 : if(f(dfactors, d, data)){
229 : return 1;
230 : }
231 : uint64_t i;
232 13558 : for(i = 0; i < factors->num_primes; ++i){
233 12558 : if(dfactors->factors[i].power < factors->factors[i].power){
234 7971 : ++dfactors->factors[i].power;
235 7971 : pfactors->factors[i].prime *= dfactors->factors[i].prime;
236 7971 : d *= dfactors->factors[i].prime;
237 7971 : if(d <= d_max){
238 : break;
239 : }
240 : }
241 5696 : dfactors->factors[i].power = 0;
242 5696 : d /= pfactors->factors[i].prime;
243 5696 : pfactors->factors[i].prime = 1;
244 : }
245 7862 : if(i == factors->num_primes){
246 : return 0;
247 : }
248 : }
249 : }
250 :
251 2875800 : void nut_Factor_append(nut_Factors *factors, uint64_t m, uint64_t k){
252 2884239 : for(uint64_t i = 0;; ++i){
253 2884239 : if(i == factors->num_primes){
254 1012231 : factors->factors[i].prime = m;
255 1012231 : factors->factors[i].power = k;
256 1012231 : ++factors->num_primes;
257 1012231 : break;
258 1872008 : }else if(factors->factors[i].prime == m){
259 0 : factors->factors[i].power += k;
260 0 : break;
261 1872008 : }else if(factors->factors[i].prime > m){
262 1863569 : memmove(factors->factors + i + 1, factors->factors + i, (factors->num_primes - i)*sizeof(*factors->factors));
263 1863569 : ++factors->num_primes;
264 1863569 : factors->factors[i].prime = m;
265 1863569 : factors->factors[i].power = k;
266 1863569 : break;
267 : }
268 : }
269 2875800 : }
270 :
271 500 : void nut_Factor_combine(nut_Factors *restrict factors, const nut_Factors *restrict factors2, uint64_t k){
272 500 : nut_Factors *factors3 [[gnu::cleanup(cleanup_free)]] = nut_make_Factors_w(factors->num_primes + factors2->num_primes);
273 500 : uint64_t i = 0, i2 = 0, i3 = 0;
274 3053 : while(i < factors->num_primes && i2 < factors2->num_primes){
275 2553 : if(factors->factors[i].prime < factors2->factors[i2].prime){
276 1170 : factors3->factors[i3++] = factors->factors[i++];
277 1383 : }else if(factors->factors[i].prime > factors2->factors[i2].prime){
278 1167 : factors3->factors[i3].prime = factors2->factors[i2].prime;
279 1167 : factors3->factors[i3++].power = factors2->factors[i2++].power*k;
280 : }else{
281 216 : factors3->factors[i3].prime = factors->factors[i].prime;
282 216 : factors3->factors[i3++].power = factors->factors[i++].power + factors2->factors[i2++].power*k;
283 : }
284 : }
285 762 : while(i < factors->num_primes){
286 262 : factors3->factors[i3++] = factors->factors[i++];
287 : }
288 764 : while(i2 < factors2->num_primes){
289 264 : factors3->factors[i3].prime = factors2->factors[i2].prime;
290 264 : factors3->factors[i3++].power = factors2->factors[i2++].power*k;
291 : }
292 : //TODO: elide copying initial factors from factors to factors3 and back to factors
293 500 : memcpy(factors->factors, factors3->factors, i3*sizeof(*factors->factors));
294 500 : factors->num_primes = i3;
295 500 : }
296 :
297 0 : void nut_Factor_divide(nut_Factors *restrict out, const nut_Factors *restrict factors, const nut_Factors *restrict dfactors){
298 0 : uint64_t i = 0, i2 = 0, i3 = 0;
299 0 : while(i < factors->num_primes && i2 < dfactors->num_primes){
300 0 : if(factors->factors[i].prime < dfactors->factors[i2].prime){
301 0 : out->factors[i3++] = factors->factors[i++];
302 0 : }else if(factors->factors[i].power > dfactors->factors[i2].power){
303 0 : out->factors[i3].prime = factors->factors[i].prime;
304 0 : out->factors[i3++].power = factors->factors[i++].power - dfactors->factors[i2++].power;
305 : }else{
306 0 : ++i, ++i2;
307 : }
308 : }
309 0 : while(i < factors->num_primes){
310 0 : out->factors[i3++] = factors->factors[i++];
311 : }
312 0 : out->num_primes = i3;
313 0 : }
314 :
315 24 : int nut_Factor_fprint(FILE *restrict file, const nut_Factors *restrict factors){
316 24 : int res = 0;
317 96 : for(uint64_t i = 0; i < factors->num_primes; ++i){
318 72 : uint64_t power = factors->factors[i].power;
319 72 : if(!power){
320 26 : continue;
321 : }
322 46 : if(res){
323 23 : res += fprintf(file, "*");
324 : }
325 46 : res += fprintf(file, "%"PRIu64, factors->factors[i].prime);
326 46 : if(power != 1){
327 20 : res += fprintf(file, "^%"PRIu64, power);
328 : }
329 : }
330 24 : if(!res){
331 1 : res += fprintf(file, "1");
332 : }
333 24 : return res;
334 : }
335 :
336 28427528 : bool nut_u64_is_prime_dmr(uint64_t n){
337 : //static const uint64_t DMR_PRIMES[7] = {2, 325, 9375, 28178, 450775, 9780504, 1795265022};//sufficient for 64 bit numbers
338 : //had to disable 7 base check because the last base is too large to be squared under 64 bit multiplication
339 28427528 : static const uint64_t DMR_PRIMES[12] = {2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37};//sufficient for 64 bit numbers
340 : //static const uint64_t DMR_PRIMES_C = 7;
341 28427528 : static const uint64_t DMR_PRIMES_C = 12;
342 28427528 : uint64_t s, d;//s, d | 2^s*d = n - 1
343 28427528 : if(n%2 == 0){
344 11177533 : return n == 2;
345 : }
346 17249995 : --n;
347 17249995 : s = __builtin_ctzll(n);
348 17249995 : d = n>>s;
349 17249995 : ++n;
350 88402346 : for(uint64_t i = 0, a, x; i < DMR_PRIMES_C; ++i){
351 83451414 : a = DMR_PRIMES[i];
352 83451414 : if(a >= n){
353 : break;
354 : }
355 80169871 : x = nut_u64_powmod(a, d, n);
356 80169871 : if(x == 1 || x == n - 1){
357 46863185 : goto CONTINUE_WITNESSLOOP;
358 : }
359 62281139 : for(a = 0; a < s - 1; ++a){
360 53263881 : x = nut_u64_powmod(x, 2, n);
361 53263881 : if(x == 1){
362 : return 0;
363 : }
364 53263619 : if(x == n - 1){
365 24289166 : goto CONTINUE_WITNESSLOOP;
366 : }
367 : }
368 : return false;
369 71152351 : CONTINUE_WITNESSLOOP:;
370 : }
371 : return true;
372 : }
373 :
374 0 : uint64_t nut_u64_next_prime_ge(uint64_t n){
375 0 : if(n <= 97){
376 0 : for(uint64_t i = 0; i < 25; ++i){
377 0 : uint64_t p = nut_small_primes[i];
378 0 : if(p >= n){
379 : return p;
380 : }
381 : }
382 : }
383 0 : uint64_t res = n%30;
384 0 : if(res <= 13){
385 0 : if(res <= 7){
386 0 : if(res <= 1){
387 0 : n += 1 - res;
388 : }else{
389 0 : n += 7 - res;
390 : }
391 0 : }else if(res <= 11){
392 0 : n += 11 - res;
393 : }else{
394 0 : n += 13 - res;
395 : }
396 0 : }else if(res <= 19){
397 0 : if(res <= 17){
398 0 : n += 17 - res;
399 : }else{
400 0 : n += 19 - res;
401 : }
402 0 : }else if(res <= 23){
403 0 : n += 23 - res;
404 : }else{
405 0 : n += 29 - res;
406 : }
407 0 : res = n%30;
408 0 : while(!nut_u64_is_prime_dmr(n)){
409 0 : switch(res){
410 0 : case 1: n += 6; res = 7; break;
411 0 : case 7: n += 4; res = 11; break;
412 0 : case 11: n += 2; res = 13; break;
413 0 : case 13: n += 4; res = 17; break;
414 0 : case 17: n += 2; res = 19; break;
415 0 : case 19: n += 4; res = 23; break;
416 0 : case 23: n += 6; res = 29; break;
417 0 : case 29: n += 2; res = 1; break;
418 0 : default: __builtin_unreachable();
419 : }
420 : }
421 : return n;
422 : }
423 :
424 : /*
425 : int is_prime_ecam(uint64_t n){
426 : static const uint64_t nDs[13] = {3, 4, 7, 8, 11, 12, 16, 19, 27, 28, 43, 67, 163};
427 : static const uint64_t nDs_len = 13;
428 : //Check if n is prime using a simplified version of the Atkin-Morain elliptic curve prime proving algorithm
429 : //First, we need to find a quadratic residue D so that a^2 + |D|b^2 = 4n and D is in (negative) nDs.
430 : //We can use Cornacchia's algorithm to help solve a^2 + |D|b^2 = 4n.
431 : //The number of potential solutions is very limited: a solution can be a primitive solution, or a primitive
432 : //solution to a^2 + |D|b^2 = n, since n is supposed to be prime and hence squarefree.
433 : //In the first case, there are at most 4 square roots of -|D| mod 4n and hence 2 starting points for Cornacchia.
434 : //In the second case, there are at most 2 square roots of -|D| mod n and hence 1 starting point for Cornacchia.
435 : //We can obtain a solution for each of these three starting points, although some may not lead to a solution.
436 : //Any solution a, b will have up to 3 associated solutions by negating components, thus there are at most
437 : //12 curves for every D, for at most 156 curves total (although it is very unlikely all Ds will be quadratic residues).
438 : //
439 : //Now, supposing we have a and b, we need to actually find a point on the curve. But for this,
440 : //trying random x until x^3 + ax + b is a quadratic residue mod N and then pick y as a corresponding root.
441 : //We have that m = n + 1 - a is the number of points on the curve.
442 : //So the final thing to check is that we can factor m quickly and into a good form.
443 : //We want a prime divisor q of m such that q > (n^(1/4)+1)^2.
444 : //If we find one, demonstrating a point P such that [m/q]P is not identity but [m]P is identity proves
445 : //q is prime implies n is prime.
446 : //
447 : }
448 : */
449 :
450 : const uint64_t nut_small_primes[25] = {2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97};
451 :
452 : const nut_FactorConf nut_default_factor_conf = {
453 : .pollard_max= 100000, //maximum number to use Pollard's Rho algorithm for
454 : .pollard_stride= 10, //number of gcd operations to coalesce, decreases time for a single iteration at the cost of potentially doing twice this many extra iterations
455 : .lenstra_max= UINT64_MAX,//maximum number to use Lenstra's Elliptic Curve algorithm for
456 : .lenstra_bfac= 10 //roughly speaking, the number of iterations to try before picking a new random point and curve
457 : };
458 :
459 : NUT_ATTR_NO_SAN("vla-bound")
460 3002 : uint64_t nut_u64_factor_trial_div(uint64_t n, uint64_t num_primes, const uint64_t primes[restrict static num_primes], nut_Factors *restrict factors){
461 3002 : factors->num_primes = 0;
462 60904 : for(uint64_t i = 0; i < num_primes; ++i){
463 58985 : uint64_t p = primes[i];
464 58985 : if(n%p == 0){
465 5107 : uint64_t j = factors->num_primes++;
466 5107 : factors->factors[j].prime = p;
467 5107 : factors->factors[j].power = 1;
468 5107 : n /= p;
469 7405 : while(n%p == 0){
470 2298 : ++factors->factors[j].power;
471 2298 : n /= p;
472 : }
473 : }
474 58985 : if(p*p > n){
475 1083 : if(n > 1){
476 1032 : uint64_t j = factors->num_primes++;
477 1032 : factors->factors[j].prime = n;
478 1032 : factors->factors[j].power = 1;
479 : }
480 1083 : return 1;
481 : }
482 : }
483 : return n;
484 : }
485 :
486 : NUT_ATTR_NO_SAN("vla-bound")
487 3002 : uint64_t nut_u64_factor_heuristic(uint64_t n, uint64_t num_primes, const uint64_t primes[restrict static num_primes], const nut_FactorConf *restrict conf, nut_Factors *restrict factors){
488 3002 : n = nut_u64_factor_trial_div(n, num_primes, primes, factors);
489 3002 : if(n == 1){
490 : return 1;
491 : }
492 1919 : uint64_t exponent = 1;
493 1919 : nut_u64_is_perfect_power(n, 9, &n, &exponent);
494 1919 : if(nut_u64_is_prime_dmr(n)){//TODO: get a better primality test and possibly run it later (ie after some pollard-rho)
495 1024 : nut_Factor_append(factors, n, exponent);
496 1024 : return 1;
497 : }
498 895 : uint64_t smoothness = 101*101;
499 1790 : nut_Factors *factors2 [[gnu::cleanup(cleanup_free)]] = nut_make_Factors_w(NUT_MAX_PRIMES_64);
500 955 : uint64_t m;
501 955 : while(1){
502 955 : if(n <= conf->pollard_max){//TODO: allow iteration count based stopping of pollard-rho brent so we can get small factors of big numbers that way?
503 25 : do{
504 25 : uint64_t x = nut_u64_prand(0, n);
505 25 : m = nut_u64_factor1_pollard_rho_brent(n, x, conf->pollard_stride);
506 25 : }while(m == n);
507 24 : uint64_t k = 1;
508 24 : n /= m;
509 24 : while(n%m == 0){
510 0 : k += 1;
511 0 : n /= m;
512 : }
513 24 : if(m < smoothness || nut_u64_is_prime_dmr(m)){
514 24 : nut_Factor_append(factors, m, k*exponent);
515 : }else{
516 0 : factors2->num_primes = 0;
517 0 : m = nut_u64_factor_heuristic(m, 0, primes, conf, factors2);
518 0 : if(m != 1){
519 : return m;//TODO: abort
520 : }
521 0 : nut_Factor_combine(factors, factors2, k*exponent);
522 : }
523 24 : if(n == 1){
524 : return 1;
525 : }
526 24 : if(n < smoothness || nut_u64_is_prime_dmr(n)){
527 24 : nut_Factor_append(factors, n, exponent);
528 24 : return 1;
529 : }
530 931 : }else if(n <= conf->lenstra_max){
531 24362 : do{
532 24362 : uint64_t x = nut_u64_prand(0, n);
533 24362 : uint64_t y = nut_u64_prand(0, n);
534 24362 : uint64_t a = nut_u64_prand(0, n);
535 24362 : m = nut_u64_factor1_lenstra(n, x, y, a, conf->lenstra_bfac);
536 24362 : }while(m == n);
537 931 : uint64_t k = 1;
538 931 : n /= m;
539 933 : while(n%m == 0){
540 2 : k += 1;
541 2 : n /= m;
542 : }
543 931 : if(m < smoothness || nut_u64_is_prime_dmr(m)){
544 931 : nut_Factor_append(factors, m, k*exponent);
545 : }else{
546 0 : factors2->num_primes = 0;
547 0 : m = nut_u64_factor_heuristic(m, 0, primes, conf, factors2);
548 0 : if(m != 1){
549 : return m;//TODO: abort
550 : }
551 0 : nut_Factor_combine(factors, factors2, k*exponent);
552 : }
553 931 : if(n == 1){
554 : return 1;
555 : }
556 931 : if(n < smoothness || nut_u64_is_prime_dmr(n)){
557 871 : nut_Factor_append(factors, n, exponent);
558 871 : return 1;
559 : }
560 : }else{//TODO: implement quadratic sieve and number field sieve
561 : return n;
562 : }
563 : }
564 : }
565 :
566 100258924 : uint64_t nut_u64_nth_root(uint64_t a, uint64_t n){
567 100258924 : if(a < 2){
568 : return a;
569 : }
570 100258923 : uint64_t x;
571 100258923 : if(n < 13){
572 51988385 : uint64_t floor_log_2_a = 63 - __builtin_clzll(a);
573 51988385 : x = 1ull << (floor_log_2_a/n);
574 51988385 : if(x == 1){
575 : return 1;
576 : }
577 : }else for(x = 1;; ++x){
578 91668577 : uint128_t x_pow = nut_u128_pow(x + 1, n);
579 91668577 : if(x_pow > a){
580 : return x;
581 : }
582 : }
583 448623207 : for(uint64_t i = 0;; ++i){
584 500583341 : uint128_t x_pow = nut_u128_pow(x, n - 1);
585 500583341 : uint64_t x_next = ((n - 1)*x_pow*x + a)/(n*x_pow);
586 500583341 : if(i > 1 && x_next >= x){
587 : return x;
588 : }
589 448623207 : x = x_next;
590 : }
591 : }
592 :
593 9437665 : bool nut_u64_is_perfect_power(uint64_t a, uint64_t max, uint64_t *restrict _base, uint64_t *restrict _exp){
594 9437665 : if(a < 2){
595 : return false;
596 : }
597 : /// the maximum exponent ever for a uint64 is 63, but if the exponent is composite we can find it in stages,
598 : /// so the maximum exponent we will ever have to take is 61.
599 : uint64_t x = 1;
600 104887576 : for(uint64_t i = 0; i < 18; ++i){
601 104887576 : uint64_t p = nut_small_primes[i];
602 104887576 : if(p > max){
603 : break;
604 : }
605 100243323 : while(1){
606 100243323 : uint64_t r = nut_u64_nth_root(a, p);
607 100243323 : if(nut_u64_pow(r, p) != a){
608 : break;
609 : }
610 4793412 : x *= p;
611 4793412 : a = r;
612 4793412 : max /= p;
613 4793412 : if(p > max){
614 499 : goto OUTPUT_RESULT;
615 : }
616 : }
617 : }
618 9437166 : OUTPUT_RESULT:;
619 9437665 : if(x != 1){
620 4721356 : *_base = a;
621 4721356 : *_exp = x;
622 4721356 : return true;
623 : }
624 : return false;
625 : }
626 :
627 267 : uint64_t nut_u64_factor1_pollard_rho(uint64_t n, uint64_t x){
628 267 : uint64_t y = x, d = 1;
629 7499 : while(d == 1){
630 7232 : x = (x*x + 1)%n;
631 7232 : y = (y*y + 1)%n;
632 7232 : y = (y*y + 1)%n;
633 7232 : d = nut_i64_egcd(x > y ? x - y : y - x, n, NULL, NULL);
634 : }
635 267 : return d;
636 : }
637 :
638 25 : uint64_t nut_u64_factor1_pollard_rho_brent(uint64_t n, uint64_t x, uint64_t m){
639 25 : uint64_t y = x, ys = x;
640 25 : uint64_t d = 1;//gcd of n and the difference of the terms in the sequence
641 25 : uint64_t r = 1;//power of two in brent's algorithm
642 25 : uint64_t q = 1;//product of all differences so far, allowing us to process m at a time when checking for a nontrival factor d
643 112 : while(d == 1){
644 398 : x = y;
645 398 : for(uint64_t i = 0; i < r; ++i){
646 311 : y = (y*y + 1)%n;
647 : }
648 174 : for(uint64_t k = 0; k < r && d == 1; k += m){
649 380 : ys = y;
650 380 : for(uint64_t i = 0; i < m && i < r - k; ++i){
651 293 : y = (y*y + 1)%n;
652 293 : q = q*(x > y ? x - y : y - x)%n;
653 : }
654 87 : d = nut_i64_egcd(q, n, NULL, NULL);
655 : }
656 87 : r *= 2;
657 : }
658 25 : if(d == n){
659 17 : do{
660 17 : ys = (ys*ys + 1)%n;
661 17 : d = nut_i64_egcd(x > ys ? x - ys : ys - x, n, NULL, NULL);
662 17 : }while(d == 1);
663 : }
664 25 : return d;
665 : }
666 :
667 : //convenience function to double a point on an elliptic curve. _xr and _yr are out params
668 : //returns 0 for an ordinary point, 1 for identity, and -1 if a nontrivial factor was found and placed in _xr
669 : [[gnu::nonnull(6, 7)]]
670 : NUT_ATTR_ACCESS(write_only, 6) NUT_ATTR_ACCESS(write_only, 7)
671 454918 : static inline int ecg_double(int64_t n, int64_t a, int64_t x, int64_t y, bool is_id, int64_t *restrict _xr, int64_t *restrict _yr){
672 454918 : if(is_id || y == 0){
673 : return 1;
674 : }
675 454918 : int64_t s, dy, dx;
676 454918 : dy = (3*(int128_t)x*x + a)%n;
677 454918 : if(dy < 0){
678 0 : dy += n;
679 : }
680 454918 : dx = nut_i64_mod(2*y, n);
681 454918 : int64_t d = nut_i64_egcd(dx, n, &s, NULL);
682 454918 : if(d != 1){
683 382 : *_xr = d;
684 382 : return -1;
685 : }
686 454536 : s = (int128_t)s*dy%n;
687 454536 : int64_t xr = ((int128_t)s*s - 2*x)%n;
688 454536 : if(xr < 0){
689 66 : xr += n;
690 : }
691 454536 : *_xr = xr;
692 454536 : int64_t yr = (y + (int128_t)s*(xr - x))%n;
693 454536 : if(yr < 0){
694 227248 : yr += n;
695 : }
696 454536 : *_yr = yr;
697 454536 : return 0;
698 : }
699 :
700 : //convenience function to add two points on an elliptic curve. _xr and _yr are out params
701 : //returns 0 for an ordinary point, 1 for identity, and -1 if a nontrivial factor was found and placed in _xr
702 : [[gnu::nonnull(9, 10)]]
703 : NUT_ATTR_ACCESS(write_only, 9) NUT_ATTR_ACCESS(write_only, 10)
704 167500 : static inline int ecg_add(int64_t n, int64_t a, int64_t xp, int64_t yp, bool is_id_p, int64_t xq, int64_t yq, bool is_id_q, int64_t *_xr, int64_t *_yr){
705 167500 : if(is_id_p){
706 0 : if(is_id_q){
707 : return 1;
708 : }
709 0 : *_xr = xq, *_yr = yq;
710 0 : return 0;
711 167500 : }else if(is_id_q){
712 0 : *_xr = xp, *_yr = yp;
713 0 : return 0;
714 : }
715 167500 : int64_t s, dy, dx;
716 167500 : if(xp != xq){
717 167500 : dy = nut_i64_mod(yp - yq, n);
718 167500 : dx = nut_i64_mod(xp - xq, n);
719 0 : }else if(yp != yq || yp == 0){
720 : return 1;
721 : }else{
722 0 : dy = (3*(int128_t)xp*xp + a)%n;
723 0 : if(dy < 0){
724 0 : dy += n;
725 : }
726 0 : dx = nut_i64_mod(2*yp, n);
727 : }
728 167500 : int64_t d = nut_i64_egcd(dx, n, &s, NULL);
729 167500 : if(d != 1){
730 533 : *_xr = d;
731 533 : return -1;
732 : }
733 166967 : s = (int128_t)s*dy%n;
734 166967 : int64_t xr = ((int128_t)s*s - xp - xq)%n;
735 166967 : if(xr < 0){
736 30 : xr += n;
737 : }
738 166967 : *_xr = xr;
739 166967 : int64_t yr = (yp + (int128_t)s*(xr + xp))%n;
740 166967 : if(yr < 0){
741 83374 : yr += n;
742 : }
743 166967 : *_yr = yr;
744 166967 : return 0;
745 : }
746 :
747 : //convenience function to add k copies of a point on an elliptic curve together. _xr and _yr are out params
748 : //returns 0 for an ordinary point, 1 for identity, and -1 if a nontrivial factor was found and placed in _xr
749 : [[gnu::nonnull(7, 8)]]
750 : NUT_ATTR_ACCESS(write_only, 7) NUT_ATTR_ACCESS(write_only, 8)
751 216307 : static inline int ecg_scale(int64_t n, int64_t a, int64_t x, int64_t y, bool is_id, int64_t k, int64_t *_xr, int64_t *_yr){
752 216307 : if(is_id || !k){
753 : return 1;
754 : }
755 216307 : int is_id_r = 0, is_id_s = 0;
756 216307 : int64_t xs = x, ys = y;
757 408095 : while(!(k&1)){
758 191945 : is_id_s = ecg_double(n, a, xs, ys, is_id_s, &xs, &ys);
759 191945 : if(is_id_s == -1){
760 157 : *_xr = xs;
761 157 : return -1;
762 191788 : }else if(is_id_s == 1){
763 : return 1;
764 : }
765 191788 : k >>= 1;
766 : }
767 216150 : *_xr = xs, *_yr = ys;
768 216150 : is_id_r = is_id_s;
769 478365 : while(1){
770 478365 : k >>= 1;
771 478365 : if(!k){
772 : return is_id_r;
773 : }
774 262973 : is_id_s = ecg_double(n, a, xs, ys, is_id_s, &xs, &ys);
775 262973 : if(is_id_s == -1){
776 225 : *_xr = xs;
777 225 : return -1;
778 262748 : }else if(is_id_s == 1){
779 : return is_id_r;
780 262748 : }else if(k&1){
781 167500 : is_id_r = ecg_add(n, a, xs, ys, is_id_s, *_xr, *_yr, is_id_r, _xr, _yr);
782 167500 : if(is_id_r == -1){
783 : return -1;
784 : }
785 : }
786 : }
787 : }
788 :
789 : //TODO: this whole function needs to be rewritten to use projective montgomery curves instead of affine wierstrass curves
790 : //TODO: consider adding brent's second stage for fun
791 24362 : int64_t nut_u64_factor1_lenstra(int64_t n, int64_t x, int64_t y, int64_t a, int64_t B){
792 24362 : int64_t d = nut_i64_egcd(n, 6, NULL, NULL);//ensure Z/nZ doesn't have a field of characteristic 2 or 3 as a subgroup
793 24362 : if(d != 1){
794 : return d;
795 : }
796 24362 : int64_t b = ((int128_t)x*x + a)%n;
797 24362 : b = ((int128_t)y*y - (int128_t)x*b)%n;
798 24362 : if(b < 0){
799 10916 : b += n;
800 : }
801 24362 : d = (int128_t)a*a%n;
802 24362 : d = 4*(int128_t)a*d%n;
803 24362 : d = nut_i64_egcd(d + 27*((int128_t)b*b%n), n, NULL, NULL);//ensure the rhs of the elliptic curve does not have a repeated zero, leading to a cusp
804 24362 : if(d != 1){
805 : return d;
806 : }
807 : int is_id = 0;
808 239738 : for(int64_t k = 2; k <= B; ++k){
809 216307 : is_id = ecg_scale(n, a, x, y, is_id, k, &x, &y);
810 216307 : if(is_id == -1){
811 915 : return x;
812 215392 : }else if(is_id == 1){
813 : return n;
814 : }
815 : }
816 : return n;
817 : }
818 :
819 : /* There are many possible forms for elliptic curves, similar to how there are many possible forms for parabolas (y=ax^2+bx+c, y=a(x-l)(x-m), etc).
820 : * The basic form, used above, is called Wierstrass form and looks like y^2=x^3+ax+b. Other common forms include twisted Edwards and Montgomery.
821 : * Montgomery form is by^2=x^3+ax^2+x. Aside from what form we use for the curve, there are many possible ways to store points on the curve.
822 : * The simplest is to simply store x and y and a boolean indicating if the point is actually identity or not. However, this creates branching
823 : * and complexity due to handling points at infinity and finite points on the curve separately. So instead we can use projective coordinates,
824 : * where x and y are expressed as ratios x=X/Z and y=Y/Z. If Z is zero, we have a point at infinity. Variations of this exist where X and Y are
825 : * divided by different powers of Z.
826 : *
827 : * Obviously, x and y are closely related by whatever curve we are using. For any x, there are at most 2 solutions y placing it on the curve.
828 : * For any y, there are at most 3 solutions x putting it on the curve. So it's pretty concievable that X and Z might be sufficient to compute
829 : * the x coordinate of any scalar multiple of a point on the curve. In fact, it turns out this is true. Using projective coordinates also lets
830 : * us do fewer gcd computations, hence the motivation to have a Z coordinate, yet we do not really need the y coordinate per se, so it is great
831 : * we can do away with it.
832 : *
833 : * To compute a scalar multiple of a point P, we can use a double and add algorithm, just like binary exponentiation. For explicit x, y coordinates
834 : * on a Wierstrass curve this is trivially easy. For a Montgomery curve, we get the following rules for addition and doubling:
835 : *
836 : * X_{m+n} = Z_{m-n}((X_m-Z_m)(X_n+Z_n)+(X_m+Z_m)(X_n-Z_n))^2
837 : * Z_{m+n} = X_{m-n}((X_m-Z_m)(X_n+Z_n)-(X_m+Z_m)(X_n-Z_n))^2
838 : *
839 : * X_{2m} = (X_n+Z_n)^2(X_n-Z_n)^2
840 : * Z_{2m} = (4X_n Z_n)((X_n-Z_n)^2+((a+20)/4)(4X_n Z_n))
841 : * where
842 : * 4X_n Z_n = (X_n + Z_n)^2 - (X_n - Z_n)^2
843 : *
844 : * The rule for doubling is great, but the rule for addition seems pretty much useless: we don't know a thing about X_{m-n} or Z_{m-n}!!!!
845 : * The simplest way around this, which is very good in our case where we need to compute kP for many arbitrary P and consecutive small k, is
846 : * to consider the case where m-n = 1 so the equation for P_{m+n} in terms of P_{m-n}, P_m, and P_n becomes an equation for P_{2n+1} in terms of
847 : * P_{n+1} and P_n.
848 : *
849 : * Thus, we can build up P_k by scanning over the binary representation of k from most significant to least. At each bit index, we have P_n and
850 : * P_{n+1} where n is the portion of k more significant than the current position, rounded down to be even, and from them we compute P_{2n+1}
851 : * and P_{2n}
852 : *
853 : * Ex: L=(0)P H=(1)P
854 : * 10110 L=L+H=(1)P=(2*0+1)P
855 : * ^ H=2H=(2)P=(2*1)P
856 : * 10110 H=L+H=(3)P=(2*1+1)P
857 : * ^ L=2L=(2)P=(2*1)P
858 : * 10110 L=L+H=(5)P=(2*2+1)P
859 : * ^ H=2H=(6)P=(2*3)P
860 : * 10110 L=L+H=(11)P=(2*5+1)P
861 : * ^ H=2H=(12)P=(2*6)P
862 : * 10110 H=L+H=(23)P=(2*11+1)P
863 : * ^ L=2L=(22)P=(2*11)P
864 : * and the result is L
865 : */
866 0 : int64_t nut_u64_factor1_lenstra_montgomery(int64_t n, int64_t x, int64_t y, int64_t a, int64_t B){
867 0 : int64_t d = nut_i64_egcd(n, 6, NULL, NULL);//check n does not contain a degenerate prime field
868 0 : if(d != 1){
869 : return d;
870 : }
871 0 : int64_t b = nut_i64_mod(y*y, n);
872 0 : d = nut_i64_egcd(b, n, &b, NULL);//check that x, y is on some montgomery curve mod n with a given
873 0 : if(d != 1){
874 : return d;
875 : }
876 0 : b = nut_i64_mod(b*x, n);
877 0 : d = nut_i64_mod(x + a, n);
878 0 : d = nut_i64_mod(x*d + 1, n);
879 0 : b = nut_i64_mod(b*d, n);
880 0 : d = nut_i64_mod(a*a - 4, n);
881 0 : d = nut_i64_egcd(b*d, n, NULL, NULL);//check that the curve does not have a sharp cusp
882 0 : if(d != 1){
883 : return d;
884 : }
885 0 : int64_t C;
886 0 : nut_i64_egcd(4, n, &C, NULL);
887 0 : C = nut_i64_mod((a + 2)*C, n);
888 0 : int64_t Zh = 1, Xh = x;
889 0 : int64_t Z1 = 1, X1 = x;
890 0 : for(int64_t k = 2; k <= B; ++k){
891 0 : int64_t Zl = 0, Xl = 1;
892 0 : for(int64_t t = 1ll << (63 - __builtin_clzll(k)); t; t >>= 1){
893 0 : if(k & t){
894 : //L = L + H
895 : //H = 2*H
896 0 : int64_t dh = nut_i64_mod(Xh - Zh, n);
897 0 : int64_t sl = nut_i64_mod(Xl + Zl, n);
898 0 : int64_t sh = nut_i64_mod(Xh + Zh, n);
899 0 : int64_t dl = nut_i64_mod(Xl - Zl, n);
900 0 : int64_t dhsl = nut_i64_mod(dh*sl, n);
901 0 : int64_t shdl = nut_i64_mod(sh*dl, n);
902 0 : Xl = nut_i64_mod(dhsl + shdl, n);
903 0 : Xl = nut_i64_mod(Xl*Xl, n);
904 0 : Xl = nut_i64_mod(Z1*Xl, n);
905 0 : Zl = nut_i64_mod(dhsl - shdl, n);
906 0 : Zl = nut_i64_mod(Zl*Zl, n);
907 0 : Zl = nut_i64_mod(X1*Zl, n);
908 0 : int64_t sh2 = nut_i64_mod(sh*sh, n);
909 0 : int64_t dh2 = nut_i64_mod(dh*dh, n);
910 0 : int64_t ch = nut_i64_mod(sh2 - dh2, n);
911 0 : Xh = nut_i64_mod(sh2*dh2, n);
912 0 : Zh = nut_i64_mod(C*ch, n);
913 0 : Zh = nut_i64_mod(ch*(dh2 + Zh), n);
914 : }else{
915 : //H = L + H
916 : //L = 2*L
917 0 : int64_t dh = nut_i64_mod(Xh - Zh, n);
918 0 : int64_t sl = nut_i64_mod(Xl + Zl, n);
919 0 : int64_t sh = nut_i64_mod(Xh + Zh, n);
920 0 : int64_t dl = nut_i64_mod(Xl - Zl, n);
921 0 : int64_t dhsl = nut_i64_mod(dh*sl, n);
922 0 : int64_t shdl = nut_i64_mod(sh*dl, n);
923 0 : Xh = nut_i64_mod(dhsl + shdl, n);
924 0 : Xh = nut_i64_mod(Xh*Xh, n);
925 0 : Zh = nut_i64_mod(dhsl - shdl, n);
926 0 : Zh = nut_i64_mod(Zh*Zh, n);
927 0 : Zh = nut_i64_mod(x*Zh, n);
928 0 : int64_t sl2 = nut_i64_mod(sl*sl, n);
929 0 : int64_t dl2 = nut_i64_mod(dl*dl, n);
930 0 : int64_t cl = nut_i64_mod(sl2 - dl2, n);
931 0 : Xl = nut_i64_mod(sl2*dl2, n);
932 0 : Zl = nut_i64_mod(C*cl, n);
933 0 : Zl = nut_i64_mod(cl*(dl2 + Zl), n);
934 : }
935 : }
936 0 : if(!Zl){
937 : return n;
938 : }
939 0 : d = nut_i64_egcd(Zl, n, NULL, NULL);
940 0 : if(d != 1){
941 : return d;
942 : }
943 0 : Z1 = Zl;
944 0 : X1 = Xl;
945 : }
946 : return n;
947 : }
948 :
|