|             Line data    Source code 
       1              : #include <stddef.h>
       2              : #include <inttypes.h>
       3              : #include <stdint.h>
       4              : #include <string.h>
       5              : 
       6              : #include <nut/modular_math.h>
       7              : #include <nut/factorization.h>
       8              : #include <nut/debug.h>
       9              : 
      10        11405 : nut_Factors *nut_make_Factors_w(uint64_t max_primes){
      11        11405 :     static const nut_Factors dummy;
      12        11405 :     return calloc(1, sizeof(nut_Factors) + sizeof(dummy.factors[0])*max_primes);
      13              : }
      14              : 
      15            2 : nut_Factors *nut_make_Factors_ub(uint64_t n, uint64_t num_primes, const uint64_t primes[static num_primes]){
      16            2 :     uint64_t p = 1, w = 0;
      17           11 :     for(uint64_t i = 0; i < num_primes; ++i){
      18           11 :         p *= primes[i];
      19           11 :         if(p > n){
      20            2 :             return nut_make_Factors_w(w);
      21              :         }
      22            9 :         ++w;
      23              :     }
      24              :     return NULL;
      25              : }
      26              : 
      27        10000 : nut_Factors *nut_Factors_copy(const nut_Factors *factors){
      28        10000 :     nut_Factors *ret = nut_make_Factors_w(factors->num_primes);
      29        10000 :     if(ret){
      30        10000 :         memcpy(ret, factors, offsetof(nut_Factors, factors) + factors->num_primes*sizeof(factors->factors[0]));
      31              :     }
      32        10000 :     return ret;
      33              : }
      34              : 
      35      3006500 : uint64_t nut_Factors_prod(const nut_Factors *factors){
      36      3006500 :     uint64_t r = 1;
      37     11930391 :     for(uint64_t i = 0; i < factors->num_primes; ++i){
      38      8923891 :         r *= nut_u64_pow(factors->factors[i].prime, factors->factors[i].power);
      39              :     }
      40      3006500 :     return r;
      41              : }
      42              : 
      43      1000000 : uint64_t nut_Factor_divcount(const nut_Factors *factors){
      44      1000000 :     uint64_t s = 1;
      45      3853708 :     for(uint64_t i = 0; i < factors->num_primes; ++i){
      46      2853708 :         s *= factors->factors[i].power + 1;
      47              :     }
      48      1000000 :     return s;
      49              : }
      50              : 
      51      1000000 : uint64_t nut_Factor_divsum(const nut_Factors *factors){
      52      1000000 :     uint64_t s = 1;
      53      3853708 :     for(uint64_t i = 0; i < factors->num_primes; ++i){
      54      2853708 :         uint64_t p = factors->factors[i].prime;
      55      2853708 :         uint64_t a = factors->factors[i].power;
      56      2853708 :         s *= (nut_u64_pow(p, a + 1) - 1)/(p - 1);
      57              :     }
      58      1000000 :     return s;
      59              : }
      60              : 
      61      1000000 : uint64_t nut_Factor_divpowsum(const nut_Factors *factors, uint64_t power){
      62      1000000 :     if(power == 0){
      63            0 :         return nut_Factor_divcount(factors);
      64      1000000 :     }else if(power == 1){
      65            0 :         return nut_Factor_divsum(factors);
      66              :     }
      67              :     uint64_t s = 1;
      68      3853708 :     for(uint64_t i = 0; i < factors->num_primes; ++i){
      69      2853708 :         uint64_t p = factors->factors[i].prime;
      70      2853708 :         uint64_t a = factors->factors[i].power;
      71      2853708 :         s *= (nut_u64_pow(p, (a + 1)*power) - 1)/(nut_u64_pow(p, power) - 1);
      72              :     }
      73              :     return s;
      74              : }
      75              : 
      76      2000000 : uint64_t nut_Factor_divtupcount(const nut_Factors *factors, uint64_t k, uint64_t modulus){
      77      2000000 :     if(k == 0){
      78            0 :         return !factors->num_primes;
      79      2000000 :     }else if(k == 1){
      80              :         return 1;
      81              :     }
      82      2000000 :     uint64_t res = 1;
      83      2000000 :     if(k == 2){
      84            0 :         res = nut_Factor_divcount(factors)%modulus;
      85            0 :         return modulus ? res % modulus : res;
      86              :     }
      87      2000000 :     if(modulus){
      88      3853708 :         for(uint64_t i = 0; i < factors->num_primes; ++i){
      89      2853708 :             res = res*nut_u64_binom(factors->factors[i].power + k - 1, k - 1)%modulus;
      90              :         }
      91              :     }else{
      92      3853708 :         for(uint64_t i = 0; i < factors->num_primes; ++i){
      93      2853708 :             res *= nut_u64_binom(factors->factors[i].power + k - 1, k - 1);
      94              :         }
      95              :     }
      96              :     return res;
      97              : }
      98              : 
      99            0 : uint64_t nut_Factor_circle_latte(const nut_Factors *factors){
     100            0 :     uint64_t c1c1_d0 = 1;
     101            0 :     uint64_t c3c1_d0 = 1;
     102            0 :     uint64_t c3c3_d0 = 0;
     103            0 :     for(uint64_t i = 0; i < factors->num_primes; ++i){
     104            0 :         uint64_t p = factors->factors[i].prime;
     105            0 :         uint64_t a = factors->factors[i].power;
     106            0 :         if((p&3) == 1){
     107            0 :             c1c1_d0 *= a + 1;
     108            0 :         }else if((p&3) == 3){
     109            0 :             uint64_t tmp = c3c1_d0*(a/2 + 1) + c3c3_d0*(a - a/2);
     110            0 :             c3c3_d0 = c3c1_d0*(a - a/2) + c3c3_d0*(a/2 + 1);
     111            0 :             c3c1_d0 = tmp;
     112              :         }
     113              :     }
     114            0 :     return 4*c1c1_d0*(c3c1_d0 - c3c3_d0);
     115              : }
     116              : 
     117          500 : void nut_Factor_ipow(nut_Factors *factors, uint64_t power){
     118         2147 :     for(uint64_t i = 0; i < factors->num_primes; ++i){
     119         1647 :         factors->factors[i].power *= power;
     120              :     }
     121          500 : }
     122              : 
     123      1000000 : uint64_t nut_Factor_phi(const nut_Factors *factors){
     124      1000000 :     uint64_t s = 1;
     125      3853708 :     for(uint64_t i = 0; i < factors->num_primes; ++i){
     126      2853708 :         uint64_t p = factors->factors[i].prime;
     127      2853708 :         uint64_t a = factors->factors[i].power;
     128      2853708 :         if(a){
     129      2853708 :             s *= (nut_u64_pow(p, a - 1))*(p - 1);
     130              :         }
     131              :     }
     132      1000000 :     return s;
     133              : }
     134              : 
     135      1000001 : uint64_t nut_Factor_carmichael(const nut_Factors *factors){
     136      1000001 :     uint64_t s = 1;
     137      1000001 :     if(!factors->num_primes){
     138              :         return s;
     139              :     }
     140      1000000 :     uint64_t p = factors->factors[0].prime;
     141      1000000 :     uint64_t a = factors->factors[0].power;
     142      1000000 :     if(p == 2){
     143       500000 :         if(a >= 3){
     144       125000 :             s = 1ull << (a - 2);
     145              :         }else{
     146       375000 :             s = 1ull << (a - 1);
     147              :         }
     148              :     }else{
     149       500000 :         s = nut_u64_pow(p, a - 1)*(p - 1);
     150              :     }
     151      2853709 :     for(uint64_t i = 1; i < factors->num_primes; ++i){
     152      1853709 :         p = factors->factors[i].prime;
     153      1853709 :         a = factors->factors[i].power;
     154      1853709 :         uint64_t phi_pk = (nut_u64_pow(p, a - 1))*(p - 1);
     155      1853709 :         s = nut_i64_lcm(s, phi_pk);
     156              :     }
     157              :     return s;
     158              : }
     159              : 
     160         3999 : uint64_t nut_u64_order_mod(uint64_t a, uint64_t n, uint64_t cn, nut_Factors *cn_factors){
     161        16432 :     for(uint64_t i = 0; i < cn_factors->num_primes; ++i){
     162        16033 :         while(cn_factors->factors[i].power){
     163        14231 :             cn /= cn_factors->factors[i].prime;
     164        14231 :             if(nut_u64_powmod(a, cn, n) != 1){
     165        10631 :                 cn *= cn_factors->factors[i].prime;
     166        10631 :                 break;
     167              :             }
     168         3600 :             cn_factors->factors[i].power--;
     169              :         }
     170              :     }
     171         3999 :     return cn;
     172              : }
     173              : 
     174         4000 : int nut_Factor_forall_divs_tmptmp(const nut_Factors *restrict factors, int (*f)(const nut_Factors*, uint64_t, void*), void *restrict data){
     175         8000 :     nut_Factors *dfactors [[gnu::cleanup(cleanup_free)]] = nut_Factors_copy(factors);
     176         8000 :     nut_Factors *pfactors [[gnu::cleanup(cleanup_free)]] = nut_Factors_copy(factors);
     177         4000 :     return dfactors && pfactors && nut_Factor_forall_divs(factors, f, data, dfactors, pfactors);
     178              : }
     179              : 
     180         4000 : int nut_Factor_forall_divs(const nut_Factors *restrict factors, int (*f)(const nut_Factors*, uint64_t, void*), void *restrict data, nut_Factors *restrict dfactors, nut_Factors *restrict pfactors){
     181         4000 :     dfactors->num_primes = factors->num_primes;
     182        16436 :     for(uint64_t i = 0; i < factors->num_primes; ++i){
     183        12436 :         dfactors->factors[i].prime = factors->factors[i].prime;
     184        12436 :         dfactors->factors[i].power = 0;
     185        12436 :         pfactors->factors[i].prime = 1;
     186              :     }
     187              :     uint64_t d = 1;
     188        66706 :     while(1){
     189        66706 :         if(f(dfactors, d, data)){
     190              :             return 1;
     191              :         }
     192              :         uint64_t i;
     193       105247 :         for(i = 0; i < factors->num_primes; ++i){
     194       101247 :             if(dfactors->factors[i].power < factors->factors[i].power){
     195        62706 :                 ++dfactors->factors[i].power;
     196        62706 :                 pfactors->factors[i].prime *= dfactors->factors[i].prime;
     197        62706 :                 d *= dfactors->factors[i].prime;
     198        62706 :                 break;
     199              :             }
     200        38541 :             dfactors->factors[i].power = 0;
     201        38541 :             d /= pfactors->factors[i].prime;
     202        38541 :             pfactors->factors[i].prime = 1;
     203              :         }
     204        66706 :         if(i == factors->num_primes){
     205              :             return 0;
     206              :         }
     207              :     }
     208              : }
     209              : 
     210         1000 : int nut_Factor_forall_divs_le_tmptmp(const nut_Factors *restrict factors, uint64_t d_max, int (*f)(const nut_Factors*, uint64_t, void*), void *restrict data){
     211         2000 :     nut_Factors *dfactors [[gnu::cleanup(cleanup_free)]] = nut_Factors_copy(factors);
     212         2000 :     nut_Factors *pfactors [[gnu::cleanup(cleanup_free)]] = nut_Factors_copy(factors);
     213         1000 :     return dfactors && pfactors && nut_Factor_forall_divs_le(factors, d_max, f, data, dfactors, pfactors);
     214              : }
     215              : 
     216         1000 : int nut_Factor_forall_divs_le(const nut_Factors *restrict factors, uint64_t d_max, int (*f)(const nut_Factors*, uint64_t, void*), void *restrict data, nut_Factors *restrict dfactors, nut_Factors *restrict pfactors){
     217         1000 :     dfactors->num_primes = factors->num_primes;
     218         3440 :     for(uint64_t i = 0; i < factors->num_primes; ++i){
     219         2440 :         dfactors->factors[i].prime = factors->factors[i].prime;
     220         2440 :         dfactors->factors[i].power = 0;
     221         2440 :         pfactors->factors[i].prime = 1;
     222              :     }
     223         1000 :     uint64_t d = 1;
     224         1000 :     if(d_max <= 1){
     225            0 :         return d_max && f(dfactors, d, data);
     226              :     }
     227         7862 :     while(1){
     228         7862 :         if(f(dfactors, d, data)){
     229              :             return 1;
     230              :         }
     231              :         uint64_t i;
     232        13558 :         for(i = 0; i < factors->num_primes; ++i){
     233        12558 :             if(dfactors->factors[i].power < factors->factors[i].power){
     234         7971 :                 ++dfactors->factors[i].power;
     235         7971 :                 pfactors->factors[i].prime *= dfactors->factors[i].prime;
     236         7971 :                 d *= dfactors->factors[i].prime;
     237         7971 :                 if(d <= d_max){
     238              :                     break;
     239              :                 }
     240              :             }
     241         5696 :             dfactors->factors[i].power = 0;
     242         5696 :             d /= pfactors->factors[i].prime;
     243         5696 :             pfactors->factors[i].prime = 1;
     244              :         }
     245         7862 :         if(i == factors->num_primes){
     246              :             return 0;
     247              :         }
     248              :     }
     249              : }
     250              : 
     251      2875800 : void nut_Factor_append(nut_Factors *factors, uint64_t m, uint64_t k){
     252      2884239 :     for(uint64_t i = 0;; ++i){
     253      2884239 :         if(i == factors->num_primes){
     254      1012231 :             factors->factors[i].prime = m;
     255      1012231 :             factors->factors[i].power = k;
     256      1012231 :             ++factors->num_primes;
     257      1012231 :             break;
     258      1872008 :         }else if(factors->factors[i].prime == m){
     259            0 :             factors->factors[i].power += k;
     260            0 :             break;
     261      1872008 :         }else if(factors->factors[i].prime > m){
     262      1863569 :             memmove(factors->factors + i + 1, factors->factors + i, (factors->num_primes - i)*sizeof(*factors->factors));
     263      1863569 :             ++factors->num_primes;
     264      1863569 :             factors->factors[i].prime = m;
     265      1863569 :             factors->factors[i].power = k;
     266      1863569 :             break;
     267              :         }
     268              :     }
     269      2875800 : }
     270              : 
     271          500 : void nut_Factor_combine(nut_Factors *restrict factors, const nut_Factors *restrict factors2, uint64_t k){
     272          500 :     nut_Factors *factors3 [[gnu::cleanup(cleanup_free)]] = nut_make_Factors_w(factors->num_primes + factors2->num_primes);
     273          500 :     uint64_t i = 0, i2 = 0, i3 = 0;
     274         3053 :     while(i < factors->num_primes && i2 < factors2->num_primes){
     275         2553 :         if(factors->factors[i].prime < factors2->factors[i2].prime){
     276         1170 :             factors3->factors[i3++] = factors->factors[i++];
     277         1383 :         }else if(factors->factors[i].prime > factors2->factors[i2].prime){
     278         1167 :             factors3->factors[i3].prime = factors2->factors[i2].prime;
     279         1167 :             factors3->factors[i3++].power = factors2->factors[i2++].power*k;
     280              :         }else{
     281          216 :             factors3->factors[i3].prime = factors->factors[i].prime;
     282          216 :             factors3->factors[i3++].power = factors->factors[i++].power + factors2->factors[i2++].power*k;
     283              :         }
     284              :     }
     285          762 :     while(i < factors->num_primes){
     286          262 :         factors3->factors[i3++] = factors->factors[i++];
     287              :     }
     288          764 :     while(i2 < factors2->num_primes){
     289          264 :         factors3->factors[i3].prime = factors2->factors[i2].prime;
     290          264 :         factors3->factors[i3++].power = factors2->factors[i2++].power*k;
     291              :     }
     292              :     //TODO: elide copying initial factors from factors to factors3 and back to factors
     293          500 :     memcpy(factors->factors, factors3->factors, i3*sizeof(*factors->factors));
     294          500 :     factors->num_primes = i3;
     295          500 : }
     296              : 
     297            0 : void nut_Factor_divide(nut_Factors *restrict out, const nut_Factors *restrict factors, const nut_Factors *restrict dfactors){
     298            0 :     uint64_t i = 0, i2 = 0, i3 = 0;
     299            0 :     while(i < factors->num_primes && i2 < dfactors->num_primes){
     300            0 :         if(factors->factors[i].prime < dfactors->factors[i2].prime){
     301            0 :             out->factors[i3++] = factors->factors[i++];
     302            0 :         }else if(factors->factors[i].power > dfactors->factors[i2].power){
     303            0 :             out->factors[i3].prime = factors->factors[i].prime;
     304            0 :             out->factors[i3++].power = factors->factors[i++].power - dfactors->factors[i2++].power;
     305              :         }else{
     306            0 :             ++i, ++i2;
     307              :         }
     308              :     }
     309            0 :     while(i < factors->num_primes){
     310            0 :         out->factors[i3++] = factors->factors[i++];
     311              :     }
     312            0 :     out->num_primes = i3;
     313            0 : }
     314              : 
     315           24 : int nut_Factor_fprint(FILE *restrict file, const nut_Factors *restrict factors){
     316           24 :     int res = 0;
     317           96 :     for(uint64_t i = 0; i < factors->num_primes; ++i){
     318           72 :         uint64_t power = factors->factors[i].power;
     319           72 :         if(!power){
     320           26 :             continue;
     321              :         }
     322           46 :         if(res){
     323           23 :             res += fprintf(file, "*");
     324              :         }
     325           46 :         res += fprintf(file, "%"PRIu64, factors->factors[i].prime);
     326           46 :         if(power != 1){
     327           20 :             res += fprintf(file, "^%"PRIu64, power);
     328              :         }
     329              :     }
     330           24 :     if(!res){
     331            1 :         res += fprintf(file, "1");
     332              :     }
     333           24 :     return res;
     334              : }
     335              : 
     336     28427528 : bool nut_u64_is_prime_dmr(uint64_t n){
     337              :     //static const uint64_t DMR_PRIMES[7] = {2, 325, 9375, 28178, 450775, 9780504, 1795265022};//sufficient for 64 bit numbers
     338              :     //had to disable 7 base check because the last base is too large to be squared under 64 bit multiplication
     339     28427528 :     static const uint64_t DMR_PRIMES[12] = {2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37};//sufficient for 64 bit numbers
     340              :     //static const uint64_t DMR_PRIMES_C = 7;
     341     28427528 :     static const uint64_t DMR_PRIMES_C = 12;
     342     28427528 :     uint64_t s, d;//s, d | 2^s*d = n - 1
     343     28427528 :     if(n%2 == 0){
     344     11177533 :         return n == 2;
     345              :     }
     346     17249995 :     --n;
     347     17249995 :     s = __builtin_ctzll(n);
     348     17249995 :     d = n>>s;
     349     17249995 :     ++n;
     350     88402346 :     for(uint64_t i = 0, a, x; i < DMR_PRIMES_C; ++i){
     351     83451414 :         a = DMR_PRIMES[i];
     352     83451414 :         if(a >= n){
     353              :             break;
     354              :         }
     355     80169871 :         x = nut_u64_powmod(a, d, n);
     356     80169871 :         if(x == 1 || x == n - 1){
     357     46863185 :             goto CONTINUE_WITNESSLOOP;
     358              :         }
     359     62281139 :         for(a = 0; a < s - 1; ++a){
     360     53263881 :             x = nut_u64_powmod(x, 2, n);
     361     53263881 :             if(x == 1){
     362              :                 return 0;
     363              :             }
     364     53263619 :             if(x == n - 1){
     365     24289166 :                 goto CONTINUE_WITNESSLOOP;
     366              :             }
     367              :         }
     368              :         return false;
     369     71152351 :         CONTINUE_WITNESSLOOP:;
     370              :     }
     371              :     return true;
     372              : }
     373              : 
     374            0 : uint64_t nut_u64_next_prime_ge(uint64_t n){
     375            0 :     if(n <= 97){
     376            0 :         for(uint64_t i = 0; i < 25; ++i){
     377            0 :             uint64_t p = nut_small_primes[i];
     378            0 :             if(p >= n){
     379              :                 return p;
     380              :             }
     381              :         }
     382              :     }
     383            0 :     uint64_t res = n%30;
     384            0 :     if(res <= 13){
     385            0 :         if(res <= 7){
     386            0 :             if(res <= 1){
     387            0 :                 n += 1 - res;
     388              :             }else{
     389            0 :                 n += 7 - res;
     390              :             }
     391            0 :         }else if(res <= 11){
     392            0 :             n += 11 - res;
     393              :         }else{
     394            0 :             n += 13 - res;
     395              :         }
     396            0 :     }else if(res <= 19){
     397            0 :         if(res <= 17){
     398            0 :             n += 17 - res;
     399              :         }else{
     400            0 :             n += 19 - res;
     401              :         }
     402            0 :     }else if(res <= 23){
     403            0 :         n += 23 - res;
     404              :     }else{
     405            0 :         n += 29 - res;
     406              :     }
     407            0 :     res = n%30;
     408            0 :     while(!nut_u64_is_prime_dmr(n)){
     409            0 :         switch(res){
     410            0 :             case 1: n += 6; res = 7; break;
     411            0 :             case 7: n += 4; res = 11; break;
     412            0 :             case 11: n += 2; res = 13; break;
     413            0 :             case 13: n += 4; res = 17; break;
     414            0 :             case 17: n += 2; res = 19; break;
     415            0 :             case 19: n += 4; res = 23; break;
     416            0 :             case 23: n += 6; res = 29; break;
     417            0 :             case 29: n += 2; res = 1; break;
     418            0 :             default: __builtin_unreachable();
     419              :         }
     420              :     }
     421              :     return n;
     422              : }
     423              : 
     424              : /*
     425              : int is_prime_ecam(uint64_t n){
     426              :     static const uint64_t nDs[13] = {3, 4, 7, 8, 11, 12, 16, 19, 27, 28, 43, 67, 163};
     427              :     static const uint64_t nDs_len = 13;
     428              :     //Check if n is prime using a simplified version of the Atkin-Morain elliptic curve prime proving algorithm
     429              :     //First, we need to find a quadratic residue D so that a^2 + |D|b^2 = 4n and D is in (negative) nDs.
     430              :     //We can use Cornacchia's algorithm to help solve a^2 + |D|b^2 = 4n.
     431              :     //The number of potential solutions is very limited: a solution can be a primitive solution, or a primitive
     432              :     //solution to a^2 + |D|b^2 = n, since n is supposed to be prime and hence squarefree.
     433              :     //In the first case, there are at most 4 square roots of -|D| mod 4n and hence 2 starting points for Cornacchia.
     434              :     //In the second case, there are at most 2 square roots of -|D| mod n and hence 1 starting point for Cornacchia.
     435              :     //We can obtain a solution for each of these three starting points, although some may not lead to a solution.
     436              :     //Any solution a, b will have up to 3 associated solutions by negating components, thus there are at most
     437              :     //12 curves for every D, for at most 156 curves total (although it is very unlikely all Ds will be quadratic residues).
     438              :     //
     439              :     //Now, supposing we have a and b, we need to actually find a point on the curve.  But for this,
     440              :     //trying random x until x^3 + ax + b is a quadratic residue mod N and then pick y as a corresponding root.
     441              :     //We have that m = n + 1 - a is the number of points on the curve.
     442              :     //So the final thing to check is that we can factor m quickly and into a good form.
     443              :     //We want a prime divisor q of m such that q > (n^(1/4)+1)^2.
     444              :     //If we find one, demonstrating a point P such that [m/q]P is not identity but [m]P is identity proves
     445              :     //q is prime implies n is prime.
     446              :     //
     447              : }
     448              : */
     449              : 
     450              : const uint64_t nut_small_primes[25] = {2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97};
     451              : 
     452              : const nut_FactorConf nut_default_factor_conf = {
     453              :     .pollard_max= 100000,    //maximum number to use Pollard's Rho algorithm for
     454              :     .pollard_stride= 10,     //number of gcd operations to coalesce, decreases time for a single iteration at the cost of potentially doing twice this many extra iterations
     455              :     .lenstra_max= UINT64_MAX,//maximum number to use Lenstra's Elliptic Curve algorithm for
     456              :     .lenstra_bfac= 10        //roughly speaking, the number of iterations to try before picking a new random point and curve
     457              : };
     458              : 
     459              : NUT_ATTR_NO_SAN("vla-bound")
     460         3002 : uint64_t nut_u64_factor_trial_div(uint64_t n, uint64_t num_primes, const uint64_t primes[restrict static num_primes], nut_Factors *restrict factors){
     461         3002 :     factors->num_primes = 0;
     462        60904 :     for(uint64_t i = 0; i < num_primes; ++i){
     463        58985 :         uint64_t p = primes[i];
     464        58985 :         if(n%p == 0){
     465         5107 :             uint64_t j = factors->num_primes++;
     466         5107 :             factors->factors[j].prime = p;
     467         5107 :             factors->factors[j].power = 1;
     468         5107 :             n /= p;
     469         7405 :             while(n%p == 0){
     470         2298 :                 ++factors->factors[j].power;
     471         2298 :                 n /= p;
     472              :             }
     473              :         }
     474        58985 :         if(p*p > n){
     475         1083 :             if(n > 1){
     476         1032 :                 uint64_t j = factors->num_primes++;
     477         1032 :                 factors->factors[j].prime = n;
     478         1032 :                 factors->factors[j].power = 1;
     479              :             }
     480         1083 :             return 1;
     481              :         }
     482              :     }
     483              :     return n;
     484              : }
     485              : 
     486              : NUT_ATTR_NO_SAN("vla-bound")
     487         3002 : uint64_t nut_u64_factor_heuristic(uint64_t n, uint64_t num_primes, const uint64_t primes[restrict static num_primes], const nut_FactorConf *restrict conf, nut_Factors *restrict factors){
     488         3002 :     n = nut_u64_factor_trial_div(n, num_primes, primes, factors);
     489         3002 :     if(n == 1){
     490              :         return 1;
     491              :     }
     492         1919 :     uint64_t exponent = 1;
     493         1919 :     nut_u64_is_perfect_power(n, 9, &n, &exponent);
     494         1919 :     if(nut_u64_is_prime_dmr(n)){//TODO: get a better primality test and possibly run it later (ie after some pollard-rho)
     495         1024 :         nut_Factor_append(factors, n, exponent);
     496         1024 :         return 1;
     497              :     }
     498          895 :     uint64_t smoothness = 101*101;
     499         1790 :     nut_Factors *factors2 [[gnu::cleanup(cleanup_free)]] = nut_make_Factors_w(NUT_MAX_PRIMES_64);
     500          955 :     uint64_t m;
     501          955 :     while(1){
     502          955 :         if(n <= conf->pollard_max){//TODO: allow iteration count based stopping of pollard-rho brent so we can get small factors of big numbers that way?
     503           25 :             do{
     504           25 :                 uint64_t x = nut_u64_prand(0, n);
     505           25 :                 m = nut_u64_factor1_pollard_rho_brent(n, x, conf->pollard_stride);
     506           25 :             }while(m == n);
     507           24 :             uint64_t k = 1;
     508           24 :             n /= m;
     509           24 :             while(n%m == 0){
     510            0 :                 k += 1;
     511            0 :                 n /= m;
     512              :             }
     513           24 :             if(m < smoothness || nut_u64_is_prime_dmr(m)){
     514           24 :                 nut_Factor_append(factors, m, k*exponent);
     515              :             }else{
     516            0 :                 factors2->num_primes = 0;
     517            0 :                 m = nut_u64_factor_heuristic(m, 0, primes, conf, factors2);
     518            0 :                 if(m != 1){
     519              :                     return m;//TODO: abort
     520              :                 }
     521            0 :                 nut_Factor_combine(factors, factors2, k*exponent);
     522              :             }
     523           24 :             if(n == 1){
     524              :                 return 1;
     525              :             }
     526           24 :             if(n < smoothness || nut_u64_is_prime_dmr(n)){
     527           24 :                 nut_Factor_append(factors, n, exponent);
     528           24 :                 return 1;
     529              :             }
     530          931 :         }else if(n <= conf->lenstra_max){
     531        24362 :             do{
     532        24362 :                 uint64_t x = nut_u64_prand(0, n);
     533        24362 :                 uint64_t y = nut_u64_prand(0, n);
     534        24362 :                 uint64_t a = nut_u64_prand(0, n);
     535        24362 :                 m = nut_u64_factor1_lenstra(n, x, y, a, conf->lenstra_bfac);
     536        24362 :             }while(m == n);
     537          931 :             uint64_t k = 1;
     538          931 :             n /= m;
     539          933 :             while(n%m == 0){
     540            2 :                 k += 1;
     541            2 :                 n /= m;
     542              :             }
     543          931 :             if(m < smoothness || nut_u64_is_prime_dmr(m)){
     544          931 :                 nut_Factor_append(factors, m, k*exponent);
     545              :             }else{
     546            0 :                 factors2->num_primes = 0;
     547            0 :                 m = nut_u64_factor_heuristic(m, 0, primes, conf, factors2);
     548            0 :                 if(m != 1){
     549              :                     return m;//TODO: abort
     550              :                 }
     551            0 :                 nut_Factor_combine(factors, factors2, k*exponent);
     552              :             }
     553          931 :             if(n == 1){
     554              :                 return 1;
     555              :             }
     556          931 :             if(n < smoothness || nut_u64_is_prime_dmr(n)){
     557          871 :                 nut_Factor_append(factors, n, exponent);
     558          871 :                 return 1;
     559              :             }
     560              :         }else{//TODO: implement quadratic sieve and number field sieve
     561              :             return n;
     562              :         }
     563              :     }
     564              : }
     565              : 
     566    100258924 : uint64_t nut_u64_nth_root(uint64_t a, uint64_t n){
     567    100258924 :     if(a < 2){
     568              :         return a;
     569              :     }
     570    100258923 :     uint64_t x;
     571    100258923 :     if(n < 13){
     572     51988385 :         uint64_t floor_log_2_a = 63 - __builtin_clzll(a);
     573     51988385 :         x = 1ull << (floor_log_2_a/n);
     574     51988385 :         if(x == 1){
     575              :             return 1;
     576              :         }
     577              :     }else for(x = 1;; ++x){
     578     91668577 :         uint128_t x_pow = nut_u128_pow(x + 1, n);
     579     91668577 :         if(x_pow > a){
     580              :             return x;
     581              :         }
     582              :     }
     583    448623207 :     for(uint64_t i = 0;; ++i){
     584    500583341 :         uint128_t x_pow = nut_u128_pow(x, n - 1);
     585    500583341 :         uint64_t x_next = ((n - 1)*x_pow*x + a)/(n*x_pow);
     586    500583341 :         if(i > 1 && x_next >= x){
     587              :             return x;
     588              :         }
     589    448623207 :         x = x_next;
     590              :     }
     591              : }
     592              : 
     593      9437665 : bool nut_u64_is_perfect_power(uint64_t a, uint64_t max, uint64_t *restrict _base, uint64_t *restrict _exp){
     594      9437665 :     if(a < 2){
     595              :         return false;
     596              :     }
     597              :     /// the maximum exponent ever for a uint64 is 63, but if the exponent is composite we can find it in stages,
     598              :     /// so the maximum exponent we will ever have to take is 61.
     599              :     uint64_t x = 1;
     600    104887576 :     for(uint64_t i = 0; i < 18; ++i){
     601    104887576 :         uint64_t p = nut_small_primes[i];
     602    104887576 :         if(p > max){
     603              :             break;
     604              :         }
     605    100243323 :         while(1){
     606    100243323 :             uint64_t r = nut_u64_nth_root(a, p);
     607    100243323 :             if(nut_u64_pow(r, p) != a){
     608              :                 break;
     609              :             }
     610      4793412 :             x *= p;
     611      4793412 :             a = r;
     612      4793412 :             max /= p;
     613      4793412 :             if(p > max){
     614          499 :                 goto OUTPUT_RESULT;
     615              :             }
     616              :         }
     617              :     }
     618      9437166 :     OUTPUT_RESULT:;
     619      9437665 :     if(x != 1){
     620      4721356 :         *_base = a;
     621      4721356 :         *_exp = x;
     622      4721356 :         return true;
     623              :     }
     624              :     return false;
     625              : }
     626              : 
     627          267 : uint64_t nut_u64_factor1_pollard_rho(uint64_t n, uint64_t x){
     628          267 :     uint64_t y = x, d = 1;
     629         7499 :     while(d == 1){
     630         7232 :         x = (x*x + 1)%n;
     631         7232 :         y = (y*y + 1)%n;
     632         7232 :         y = (y*y + 1)%n;
     633         7232 :         d = nut_i64_egcd(x > y ? x - y : y - x, n, NULL, NULL);
     634              :     }
     635          267 :     return d;
     636              : }
     637              : 
     638           25 : uint64_t nut_u64_factor1_pollard_rho_brent(uint64_t n, uint64_t x, uint64_t m){
     639           25 :     uint64_t y = x, ys = x;
     640           25 :     uint64_t d = 1;//gcd of n and the difference of the terms in the sequence
     641           25 :     uint64_t r = 1;//power of two in brent's algorithm
     642           25 :     uint64_t q = 1;//product of all differences so far, allowing us to process m at a time when checking for a nontrival factor d
     643          112 :     while(d == 1){
     644          398 :         x = y;
     645          398 :         for(uint64_t i = 0; i < r; ++i){
     646          311 :             y = (y*y + 1)%n;
     647              :         }
     648          174 :         for(uint64_t k = 0; k < r && d == 1; k += m){
     649          380 :             ys = y;
     650          380 :             for(uint64_t i = 0; i < m && i < r - k; ++i){
     651          293 :                 y = (y*y + 1)%n;
     652          293 :                 q = q*(x > y ? x - y : y - x)%n;
     653              :             }
     654           87 :             d = nut_i64_egcd(q, n, NULL, NULL);
     655              :         }
     656           87 :         r *= 2;
     657              :     }
     658           25 :     if(d == n){
     659           17 :         do{
     660           17 :             ys = (ys*ys + 1)%n;
     661           17 :             d = nut_i64_egcd(x > ys ? x - ys : ys - x, n, NULL, NULL);
     662           17 :         }while(d == 1);
     663              :     }
     664           25 :     return d;
     665              : }
     666              : 
     667              : //convenience function to double a point on an elliptic curve.  _xr and _yr are out params
     668              : //returns 0 for an ordinary point, 1 for identity, and -1 if a nontrivial factor was found and placed in _xr
     669              : [[gnu::nonnull(6, 7)]]
     670              : NUT_ATTR_ACCESS(write_only, 6) NUT_ATTR_ACCESS(write_only, 7)
     671       454918 : static inline int ecg_double(int64_t n, int64_t a, int64_t x, int64_t y, bool is_id, int64_t *restrict _xr, int64_t *restrict _yr){
     672       454918 :     if(is_id || y == 0){
     673              :         return 1;
     674              :     }
     675       454918 :     int64_t s, dy, dx;
     676       454918 :     dy = (3*(int128_t)x*x + a)%n;
     677       454918 :     if(dy < 0){
     678            0 :         dy += n;
     679              :     }
     680       454918 :     dx = nut_i64_mod(2*y, n);
     681       454918 :     int64_t d = nut_i64_egcd(dx, n, &s, NULL);
     682       454918 :     if(d != 1){
     683          382 :         *_xr = d;
     684          382 :         return -1;
     685              :     }
     686       454536 :     s = (int128_t)s*dy%n;
     687       454536 :     int64_t xr = ((int128_t)s*s - 2*x)%n;
     688       454536 :     if(xr < 0){
     689           66 :         xr += n;
     690              :     }
     691       454536 :     *_xr = xr;
     692       454536 :     int64_t yr = (y + (int128_t)s*(xr - x))%n;
     693       454536 :     if(yr < 0){
     694       227248 :         yr += n;
     695              :     }
     696       454536 :     *_yr = yr;
     697       454536 :     return 0;
     698              : }
     699              : 
     700              : //convenience function to add two points on an elliptic curve.  _xr and _yr are out params
     701              : //returns 0 for an ordinary point, 1 for identity, and -1 if a nontrivial factor was found and placed in _xr
     702              : [[gnu::nonnull(9, 10)]]
     703              : NUT_ATTR_ACCESS(write_only, 9) NUT_ATTR_ACCESS(write_only, 10)
     704       167500 : static inline int ecg_add(int64_t n, int64_t a, int64_t xp, int64_t yp, bool is_id_p, int64_t xq, int64_t yq, bool is_id_q, int64_t *_xr, int64_t *_yr){
     705       167500 :     if(is_id_p){
     706            0 :         if(is_id_q){
     707              :             return 1;
     708              :         }
     709            0 :         *_xr = xq, *_yr = yq;
     710            0 :         return 0;
     711       167500 :     }else if(is_id_q){
     712            0 :         *_xr = xp, *_yr = yp;
     713            0 :         return 0;
     714              :     }
     715       167500 :     int64_t s, dy, dx;
     716       167500 :     if(xp != xq){
     717       167500 :         dy = nut_i64_mod(yp - yq, n);
     718       167500 :         dx = nut_i64_mod(xp - xq, n);
     719            0 :     }else if(yp != yq || yp == 0){
     720              :         return 1;
     721              :     }else{
     722            0 :         dy = (3*(int128_t)xp*xp + a)%n;
     723            0 :         if(dy < 0){
     724            0 :             dy += n;
     725              :         }
     726            0 :         dx = nut_i64_mod(2*yp, n);
     727              :     }
     728       167500 :     int64_t d = nut_i64_egcd(dx, n, &s, NULL);
     729       167500 :     if(d != 1){
     730          533 :         *_xr = d;
     731          533 :         return -1;
     732              :     }
     733       166967 :     s = (int128_t)s*dy%n;
     734       166967 :     int64_t xr = ((int128_t)s*s - xp - xq)%n;
     735       166967 :     if(xr < 0){
     736           30 :         xr += n;
     737              :     }
     738       166967 :     *_xr = xr;
     739       166967 :     int64_t yr = (yp + (int128_t)s*(xr + xp))%n;
     740       166967 :     if(yr < 0){
     741        83374 :         yr += n;
     742              :     }
     743       166967 :     *_yr = yr;
     744       166967 :     return 0;
     745              : }
     746              : 
     747              : //convenience function to add k copies of a point on an elliptic curve together.  _xr and _yr are out params
     748              : //returns 0 for an ordinary point, 1 for identity, and -1 if a nontrivial factor was found and placed in _xr
     749              : [[gnu::nonnull(7, 8)]]
     750              : NUT_ATTR_ACCESS(write_only, 7) NUT_ATTR_ACCESS(write_only, 8)
     751       216307 : static inline int ecg_scale(int64_t n, int64_t a, int64_t x, int64_t y, bool is_id, int64_t k, int64_t *_xr, int64_t *_yr){
     752       216307 :     if(is_id || !k){
     753              :         return 1;
     754              :     }
     755       216307 :     int is_id_r = 0, is_id_s = 0;
     756       216307 :     int64_t xs = x, ys = y;
     757       408095 :     while(!(k&1)){
     758       191945 :         is_id_s = ecg_double(n, a, xs, ys, is_id_s, &xs, &ys);
     759       191945 :         if(is_id_s == -1){
     760          157 :             *_xr = xs;
     761          157 :             return -1;
     762       191788 :         }else if(is_id_s == 1){
     763              :             return 1;
     764              :         }
     765       191788 :         k >>= 1;
     766              :     }
     767       216150 :     *_xr = xs, *_yr = ys;
     768       216150 :     is_id_r = is_id_s;
     769       478365 :     while(1){
     770       478365 :         k >>= 1;
     771       478365 :         if(!k){
     772              :             return is_id_r;
     773              :         }
     774       262973 :         is_id_s = ecg_double(n, a, xs, ys, is_id_s, &xs, &ys);
     775       262973 :         if(is_id_s == -1){
     776          225 :             *_xr = xs;
     777          225 :             return -1;
     778       262748 :         }else if(is_id_s == 1){
     779              :             return is_id_r;
     780       262748 :         }else if(k&1){
     781       167500 :             is_id_r = ecg_add(n, a, xs, ys, is_id_s, *_xr, *_yr, is_id_r, _xr, _yr);
     782       167500 :             if(is_id_r == -1){
     783              :                 return -1;
     784              :             }
     785              :         }
     786              :     }
     787              : }
     788              : 
     789              : //TODO: this whole function needs to be rewritten to use projective montgomery curves instead of affine wierstrass curves
     790              : //TODO: consider adding brent's second stage for fun
     791        24362 : int64_t nut_u64_factor1_lenstra(int64_t n, int64_t x, int64_t y, int64_t a, int64_t B){
     792        24362 :     int64_t d = nut_i64_egcd(n, 6, NULL, NULL);//ensure Z/nZ doesn't have a field of characteristic 2 or 3 as a subgroup
     793        24362 :     if(d != 1){
     794              :         return d;
     795              :     }
     796        24362 :     int64_t b = ((int128_t)x*x + a)%n;
     797        24362 :     b = ((int128_t)y*y - (int128_t)x*b)%n;
     798        24362 :     if(b < 0){
     799        10916 :         b += n;
     800              :     }
     801        24362 :     d = (int128_t)a*a%n;
     802        24362 :     d = 4*(int128_t)a*d%n;
     803        24362 :     d = nut_i64_egcd(d + 27*((int128_t)b*b%n), n, NULL, NULL);//ensure the rhs of the elliptic curve does not have a repeated zero, leading to a cusp
     804        24362 :     if(d != 1){
     805              :         return d;
     806              :     }
     807              :     int is_id = 0;
     808       239738 :     for(int64_t k = 2; k <= B; ++k){
     809       216307 :         is_id = ecg_scale(n, a, x, y, is_id, k, &x, &y);
     810       216307 :         if(is_id == -1){
     811          915 :             return x;
     812       215392 :         }else if(is_id == 1){
     813              :             return n;
     814              :         }
     815              :     }
     816              :     return n;
     817              : }
     818              : 
     819              : /* There are many possible forms for elliptic curves, similar to how there are many possible forms for parabolas (y=ax^2+bx+c, y=a(x-l)(x-m), etc).
     820              :  * The basic form, used above, is called Wierstrass form and looks like y^2=x^3+ax+b.  Other common forms include twisted Edwards and Montgomery.
     821              :  * Montgomery form is by^2=x^3+ax^2+x.  Aside from what form we use for the curve, there are many possible ways to store points on the curve.
     822              :  * The simplest is to simply store x and y and a boolean indicating if the point is actually identity or not.  However, this creates branching
     823              :  * and complexity due to handling points at infinity and finite points on the curve separately.  So instead we can use projective coordinates,
     824              :  * where x and y are expressed as ratios x=X/Z and y=Y/Z.  If Z is zero, we have a point at infinity.  Variations of this exist where X and Y are
     825              :  * divided by different powers of Z.
     826              :  * 
     827              :  * Obviously, x and y are closely related by whatever curve we are using.  For any x, there are at most 2 solutions y placing it on the curve.
     828              :  * For any y, there are at most 3 solutions x putting it on the curve.  So it's pretty concievable that X and Z might be sufficient to compute
     829              :  * the x coordinate of any scalar multiple of a point on the curve.  In fact, it turns out this is true.  Using projective coordinates also lets
     830              :  * us do fewer gcd computations, hence the motivation to have a Z coordinate, yet we do not really need the y coordinate per se, so it is great
     831              :  * we can do away with it.
     832              :  * 
     833              :  * To compute a scalar multiple of a point P, we can use a double and add algorithm, just like binary exponentiation.  For explicit x, y coordinates
     834              :  * on a Wierstrass curve this is trivially easy.  For a Montgomery curve, we get the following rules for addition and doubling:
     835              :  * 
     836              :  * X_{m+n} = Z_{m-n}((X_m-Z_m)(X_n+Z_n)+(X_m+Z_m)(X_n-Z_n))^2
     837              :  * Z_{m+n} = X_{m-n}((X_m-Z_m)(X_n+Z_n)-(X_m+Z_m)(X_n-Z_n))^2
     838              :  * 
     839              :  * X_{2m} = (X_n+Z_n)^2(X_n-Z_n)^2
     840              :  * Z_{2m} = (4X_n Z_n)((X_n-Z_n)^2+((a+20)/4)(4X_n Z_n))
     841              :  * where
     842              :  * 4X_n Z_n = (X_n + Z_n)^2 - (X_n - Z_n)^2
     843              :  * 
     844              :  * The rule for doubling is great, but the rule for addition seems pretty much useless: we don't know a thing about X_{m-n} or Z_{m-n}!!!!
     845              :  * The simplest way around this, which is very good in our case where we need to compute kP for many arbitrary P and consecutive small k, is
     846              :  * to consider the case where m-n = 1 so the equation for P_{m+n} in terms of P_{m-n}, P_m, and P_n becomes an equation for P_{2n+1} in terms of
     847              :  * P_{n+1} and P_n.
     848              :  * 
     849              :  * Thus, we can build up P_k by scanning over the binary representation of k from most significant to least.  At each bit index, we have P_n and
     850              :  * P_{n+1} where n is the portion of k more significant than the current position, rounded down to be even, and from them we compute P_{2n+1}
     851              :  * and P_{2n}
     852              :  * 
     853              :  * Ex:      L=(0)P   H=(1)P
     854              :  * 10110    L=L+H=(1)P=(2*0+1)P
     855              :  * ^        H=2H=(2)P=(2*1)P
     856              :  * 10110    H=L+H=(3)P=(2*1+1)P
     857              :  *  ^       L=2L=(2)P=(2*1)P
     858              :  * 10110    L=L+H=(5)P=(2*2+1)P
     859              :  *   ^      H=2H=(6)P=(2*3)P
     860              :  * 10110    L=L+H=(11)P=(2*5+1)P
     861              :  *    ^     H=2H=(12)P=(2*6)P
     862              :  * 10110    H=L+H=(23)P=(2*11+1)P
     863              :  *     ^    L=2L=(22)P=(2*11)P
     864              :  * and the result is L
     865              :  */
     866            0 : int64_t nut_u64_factor1_lenstra_montgomery(int64_t n, int64_t x, int64_t y, int64_t a, int64_t B){
     867            0 :     int64_t d = nut_i64_egcd(n, 6, NULL, NULL);//check n does not contain a degenerate prime field
     868            0 :     if(d != 1){
     869              :         return d;
     870              :     }
     871            0 :     int64_t b = nut_i64_mod(y*y, n);
     872            0 :     d = nut_i64_egcd(b, n, &b, NULL);//check that x, y is on some montgomery curve mod n with a given
     873            0 :     if(d != 1){
     874              :         return d;
     875              :     }
     876            0 :     b = nut_i64_mod(b*x, n);
     877            0 :     d = nut_i64_mod(x + a, n);
     878            0 :     d = nut_i64_mod(x*d + 1, n);
     879            0 :     b = nut_i64_mod(b*d, n);
     880            0 :     d = nut_i64_mod(a*a - 4, n);
     881            0 :     d = nut_i64_egcd(b*d, n, NULL, NULL);//check that the curve does not have a sharp cusp
     882            0 :     if(d != 1){
     883              :         return d;
     884              :     }
     885            0 :     int64_t C;
     886            0 :     nut_i64_egcd(4, n, &C, NULL);
     887            0 :     C = nut_i64_mod((a + 2)*C, n);
     888            0 :     int64_t Zh = 1, Xh = x;
     889            0 :     int64_t Z1 = 1, X1 = x;
     890            0 :     for(int64_t k = 2; k <= B; ++k){
     891            0 :         int64_t Zl = 0, Xl = 1;
     892            0 :         for(int64_t t = 1ll << (63 - __builtin_clzll(k)); t; t >>= 1){
     893            0 :             if(k & t){
     894              :                 //L = L + H
     895              :                 //H = 2*H
     896            0 :                 int64_t dh = nut_i64_mod(Xh - Zh, n);
     897            0 :                 int64_t sl = nut_i64_mod(Xl + Zl, n);
     898            0 :                 int64_t sh = nut_i64_mod(Xh + Zh, n);
     899            0 :                 int64_t dl = nut_i64_mod(Xl - Zl, n);
     900            0 :                 int64_t dhsl = nut_i64_mod(dh*sl, n);
     901            0 :                 int64_t shdl = nut_i64_mod(sh*dl, n);
     902            0 :                 Xl = nut_i64_mod(dhsl + shdl, n);
     903            0 :                 Xl = nut_i64_mod(Xl*Xl, n);
     904            0 :                 Xl = nut_i64_mod(Z1*Xl, n);
     905            0 :                 Zl = nut_i64_mod(dhsl - shdl, n);
     906            0 :                 Zl = nut_i64_mod(Zl*Zl, n);
     907            0 :                 Zl = nut_i64_mod(X1*Zl, n);
     908            0 :                 int64_t sh2 = nut_i64_mod(sh*sh, n);
     909            0 :                 int64_t dh2 = nut_i64_mod(dh*dh, n);
     910            0 :                 int64_t ch = nut_i64_mod(sh2 - dh2, n);
     911            0 :                 Xh = nut_i64_mod(sh2*dh2, n);
     912            0 :                 Zh = nut_i64_mod(C*ch, n);
     913            0 :                 Zh = nut_i64_mod(ch*(dh2 + Zh), n);
     914              :             }else{
     915              :                 //H = L + H
     916              :                 //L = 2*L
     917            0 :                 int64_t dh = nut_i64_mod(Xh - Zh, n);
     918            0 :                 int64_t sl = nut_i64_mod(Xl + Zl, n);
     919            0 :                 int64_t sh = nut_i64_mod(Xh + Zh, n);
     920            0 :                 int64_t dl = nut_i64_mod(Xl - Zl, n);
     921            0 :                 int64_t dhsl = nut_i64_mod(dh*sl, n);
     922            0 :                 int64_t shdl = nut_i64_mod(sh*dl, n);
     923            0 :                 Xh = nut_i64_mod(dhsl + shdl, n);
     924            0 :                 Xh = nut_i64_mod(Xh*Xh, n);
     925            0 :                 Zh = nut_i64_mod(dhsl - shdl, n);
     926            0 :                 Zh = nut_i64_mod(Zh*Zh, n);
     927            0 :                 Zh = nut_i64_mod(x*Zh, n);
     928            0 :                 int64_t sl2 = nut_i64_mod(sl*sl, n);
     929            0 :                 int64_t dl2 = nut_i64_mod(dl*dl, n);
     930            0 :                 int64_t cl = nut_i64_mod(sl2 - dl2, n);
     931            0 :                 Xl = nut_i64_mod(sl2*dl2, n);
     932            0 :                 Zl = nut_i64_mod(C*cl, n);
     933            0 :                 Zl = nut_i64_mod(cl*(dl2 + Zl), n);
     934              :             }
     935              :         }
     936            0 :         if(!Zl){
     937              :             return n;
     938              :         }
     939            0 :         d = nut_i64_egcd(Zl, n, NULL, NULL);
     940            0 :         if(d != 1){
     941              :             return d;
     942              :         }
     943            0 :         Z1 = Zl;
     944            0 :         X1 = Xl;
     945              :     }
     946              :     return n;
     947              : }
     948              : 
         |