LCOV - unnamed - src/lib/nut/dirichlet.c

🥠 Number Theory Utils

Coverage

LCOV - code coverage report

Current view:	top level - src/lib/nut - dirichlet.c (source / functions)		Coverage	Total	Hit
Test:	unnamed	Lines:	79.9 %	498	398
Test Date:	2025-10-22 01:14:28	Functions:	78.3 %	23	18

            Line data    Source code

       1              : #include "nut/debug.h"
       2              : #include <stdlib.h>
       3              : #include <string.h>
       4              : 
       5              : #include <nut/matrix.h>
       6              : #include <nut/modular_math.h>
       7              : #include <nut/factorization.h>
       8              : #include <nut/dirichlet.h>
       9              : #include <nut/sieves.h>
      10              : 
      11         1000 : uint64_t nut_dirichlet_D(uint64_t max, uint64_t m){
      12         1000 :     uint64_t y = nut_u64_nth_root(max, 2);
      13         1000 :     uint64_t res = 0;
      14        21615 :     for(uint64_t n = 1; n <= y; ++n){
      15        20615 :         uint64_t term = max/n;
      16        20615 :         res = m ? (res + term)%m : res + term;
      17              :     }
      18         1000 :     return m ? (2*res + m - (y*y)%m)%m : 2*res - y*y;
      19              : }
      20              : 
      21            0 : uint64_t nut_dirichlet_Sigma(uint64_t max, uint64_t m){
      22            0 :     uint64_t y = nut_u64_nth_root(max, 2);
      23            0 :     uint64_t res = 0;
      24            0 :     for(uint64_t n = 1; n <= y; ++n){
      25            0 :         uint64_t k = max/n;
      26            0 :         if(m){
      27            0 :             k %= m;
      28              :         }
      29            0 :         uint64_t term = (k&1) ? k*((k + 1)>>1) : (k>>1)*(k + 1);
      30            0 :         res = m ? (res + n*k + term)%m : res + n*k + term;
      31              :     }
      32            0 :     if(m){
      33            0 :         uint64_t cross = (y&1) ? y*y%m*((y + 1)>>1)%m : (y + 1)*(y>>1)%m*y%m;
      34            0 :         return (res + m - cross)%m;
      35              :     }else{
      36            0 :         return res - (y*y*(y + 1)>>1);
      37              :     }
      38              : }
      39              : 
      40         5613 : static inline bool is_composite_unpacked(uint64_t n, const uint8_t buf[static n/8 + 1]){
      41         5613 :     return buf[n/8] & (1 << (n%8));
      42              : }
      43              : 
      44         3937 : static inline void mark_composite_unpacked(uint64_t n, uint8_t buf[static n/8 + 1]){
      45         3937 :     buf[n/8] |= (1 << (n%8));
      46         3937 : }
      47              : 
      48              : // Implemented based on (https://gbroxey.github.io/blog/2023/04/30/mult-sum-1.html)
      49              : // in turn based on (https://codeforces.com/blog/entry/54090)
      50              : // which is just euler's sieve (https://en.wikipedia.org/wiki/Sieve_of_Eratosthenes#Euler's_sieve)
      51              : // this is a modification of the sieve of eratosthenes which marks off each number only once.
      52              : // this makes strapping computation of an arbitrary multiplicative function on top of it easier,
      53              : // but even though the codeforces post claims it is linear, in practice it will often perform worse than
      54              : // a basic sieve of eratosthenes.  However, preliminary testing shows that the sieve of Eratosthenes
      55              : // is only about 14% faster than Euler's sieve in the release build, so it isn't worth upgrading.
      56              : // Euler's sieve generally works as follows:
      57              : // 1. Allocate result[1, ..., n] (in this case, this is f_conv_u_vals and smallest_ppow)
      58              : // 2. Initialize is_c_buf[1, ..., n] = false (array of bit flags for each number from 1 to n, 0 for primes and 1 for composites)
      59              : // 3. Initialize primes = [] a list of primes
      60              : // 4. For i = 2, ..., n:
      61              : // 5.   If i is not marked as composite by now (checked using is_c_buf), add it to the list of primes primes
      62              : // 6.   Either way, for every prime p in primes:
      63              : // 7.     If p divides i: (p is the smallest prime divisor of i since in this case we will break out of this loop)
      64              : // 8.       If the largest power of the smallest prime divisor of i (smallest_ppow[i]) equals i, then i is a perfect power of p, and so is p*i
      65              : // 9.         In this case, we must compute (f <*> g)(p*i) using the definition of dirichlet convolution
      66              : // 10.        We can set smallest_ppow[i*p] = i*p
      67              : // 11.      Otherwise, p divides i but i is not a perfect power of p, so we can write i*p as smallest_ppow[i]*p * i/smallest_ppow[i]
      68              : // 12.        These two parts are coprime, so we can find (f <*> g)(i*p) = (f <*> g)(smallest_ppow[i]*p) * (f <*> g)(i/smallest_ppow[i])
      69              : // 13.        And smallest_ppow[i*p] = smallest_ppow[i]*p
      70              : // 14.      Either way, break out of the loop over p
      71              : // 15.    Otherwise, p does not divide i, so p and i are already coprime (and p is smaller than any prime divisor of i)
      72              : // 16.      So we can set (f <*> g)(i*p) = (f <*> g)(i) * (f <*> g)(p)
      73              : // 17.      And smallest_ppow[i*p] = p
      74              : // Notice that we don't really need the smallest_ppow array, since we only use it when we know p the smallest prime divisor of i,
      75              : // we could just use a while loop to count how many times p divides i or find the largest power of p which divides i, but this is a space/time tradeoff.
      76              : // Also notice that we can cache smallest_ppow, primes, and is_c_buf, although the current implementation does not do that.
      77              : // On the other hand, the sieve of Eratosthenes generally works as follows:
      78              : // 1. Initialize result[1, ..., n] = 1
      79              : // 2. For i = 2, ..., n:
      80              : // 3.   If result[i] != 1, i is not prime so we can continue
      81              : // 4.   Otherwise, i is prime (with a caveat noted below)
      82              : // 5.   For every power pp of i which is <= n:
      83              : // 6.     Compute (f <*> g)(pp) using the definition of dirichlet convolution, and store this as h
      84              : // 7.     For every multiple m of pp <= n starting at 2*pp WHICH IS NOT a multiple of pp*p:
      85              : // 8.       Multiply (f <*> g)(m) by h
      86              : // When i is prime, result[i] will remain set to 1 when we get to the ith loop.
      87              : // HOWEVER, when (f <*> g)(n) can be 1 for composite values of n, this is a problem because result[i] = 1 no longer means i is prime.
      88              : // For convolutions which can be 1 for composite inputs, we would need to allocate a separate array to store whether or not each i is prime.
      89           12 : bool nut_euler_sieve_conv_u(int64_t n, int64_t modulus, const int64_t f_vals[restrict static n+1], int64_t f_conv_u_vals[restrict static n+1]){
      90           12 :     int64_t *smallest_ppow = malloc((n+1)*sizeof(int64_t));
      91           12 :     uint8_t *is_c_buf = calloc(n/8 + 1, sizeof(uint8_t));
      92           12 :     uint64_t num_primes = 0;
      93           12 :     int64_t *primes = malloc(nut_max_primes_le(n)*sizeof(int64_t));
      94           12 :     if(!smallest_ppow || !is_c_buf || !primes){
      95            0 :         free(smallest_ppow);
      96            0 :         free(is_c_buf);
      97            0 :         free(primes);
      98            0 :         return false;
      99              :     }
     100           12 :     f_conv_u_vals[1] = 1;
     101         2289 :     for(int64_t i = 2; i <= n; ++i){
     102              :         // if i is prime, add it to the list of primes
     103         2277 :         if(!is_composite_unpacked(i, is_c_buf)){
     104          439 :             primes[num_primes++] = i;
     105          439 :             int64_t term = f_vals[i] + 1;
     106          439 :             f_conv_u_vals[i] = modulus ? nut_i64_mod(term, modulus) : term;
     107          439 :             smallest_ppow[i] = i;
     108              :         }
     109              :         // the key feature of euler's sieve is that it ONLY marks off multiples
     110              :         // of each number i times primes <= i.  This requires storing an array of
     111              :         // primes and reading it A LOT of times, but ensures that each composite number
     112              :         // is visited only once
     113         3358 :         for(uint64_t j = 0; j < num_primes; ++j){
     114         3358 :             int64_t p = primes[j], m;
     115         3358 :             if(__builtin_mul_overflow(p, i, &m) || m > n){
     116              :                 break;
     117              :             }
     118         1838 :             mark_composite_unpacked(m, is_c_buf);
     119         1838 :             if(i%p == 0){// i is a multiple of p, so in particular i and p are not coprime
     120              :                 // also notice that because we break out of this loop at the bottom of this if statement,
     121              :                 // p is in fact the smallest prime divisor of i
     122          757 :                 int64_t ppow = smallest_ppow[i];
     123          757 :                 int64_t B = ppow*p;
     124          757 :                 smallest_ppow[m] = B;
     125          757 :                 int64_t v = i/ppow;
     126          757 :                 if(v != 1){// i is not a perfect power of p, that is, i = v*p**a with v != 1
     127          650 :                     int64_t term = f_conv_u_vals[v]*f_conv_u_vals[B];
     128          650 :                     f_conv_u_vals[m] = modulus ? nut_i64_mod(term, modulus) : term;
     129              :                 }else{// i is a perfect power of p (and so is m)
     130              :                     // from the definition, (f <*> u)(p**a) = f(1)*u(p**a) + f(p)*u(p**(a-1)) + ... + f(p**a)*u(1)
     131              :                     // = f(1) + f(p) + ... + f(p**a) = (f <*> u)(p**(a-1)) + f(p**a)
     132          107 :                     int64_t term = f_conv_u_vals[i] + f_vals[m];
     133          107 :                     f_conv_u_vals[m] = modulus ? nut_i64_mod(term, modulus) : term;
     134              :                 }
     135              :                 break;
     136              :             }// otherwise, i and p are coprime, so (f <*> u)(i*p) = (f <*> u)(i)*(f <*> u)(p)
     137         1081 :             int64_t term = f_conv_u_vals[i]*f_conv_u_vals[p];
     138         1081 :             f_conv_u_vals[m] = modulus ? nut_i64_mod(term, modulus) : term;
     139         1081 :             smallest_ppow[m] = p;
     140              :         }
     141              :     }
     142           12 :     free(smallest_ppow);
     143           12 :     free(primes);
     144           12 :     free(is_c_buf);
     145           12 :     return true;
     146              : }
     147              : 
     148            1 : bool nut_euler_sieve_conv_N(int64_t n, int64_t modulus, const int64_t f_vals[restrict static n+1], int64_t f_conv_N_vals[restrict static n+1]){
     149            1 :     int64_t *smallest_ppow = malloc((n+1)*sizeof(int64_t));
     150            1 :     uint8_t *is_c_buf = calloc(n/8 + 1, sizeof(uint8_t));
     151            1 :     uint64_t num_primes = 0;
     152            1 :     int64_t *primes = malloc(nut_max_primes_le(n)*sizeof(int64_t));
     153            1 :     if(!smallest_ppow || !is_c_buf || !primes){
     154            0 :         free(smallest_ppow);
     155            0 :         free(is_c_buf);
     156            0 :         free(primes);
     157            0 :         return false;
     158              :     }
     159            1 :     f_conv_N_vals[1] = 1;
     160           31 :     for(int64_t i = 2; i <= n; ++i){
     161              :         // if i is prime, add it to the list of primes
     162           30 :         if(!is_composite_unpacked(i, is_c_buf)){
     163           11 :             primes[num_primes++] = i;
     164           11 :             int64_t term = f_vals[i] + i;
     165           11 :             f_conv_N_vals[i] = modulus ? nut_i64_mod(term, modulus) : term;
     166           11 :             smallest_ppow[i] = i;
     167              :         }
     168              :         // the key feature of euler's sieve is that it ONLY marks off multiples
     169              :         // of each number i times primes <= i.  This requires storing an array of
     170              :         // primes and reading it A LOT of times, but ensures that each composite number
     171              :         // is visited only once
     172           39 :         for(uint64_t j = 0; j < num_primes; ++j){
     173           39 :             int64_t p = primes[j], m;
     174           39 :             if(__builtin_mul_overflow(p, i, &m) || m > n){
     175              :                 break;
     176              :             }
     177           19 :             mark_composite_unpacked(m, is_c_buf);
     178           19 :             if(i%p == 0){// i is a multiple of p, so in particular i and p are not coprime
     179              :                 // also notice that because we break out of this loop at the bottom of this if statement,
     180              :                 // p is in fact the smallest prime divisor of i
     181           10 :                 int64_t ppow = smallest_ppow[i];
     182           10 :                 int64_t B = ppow*p;
     183           10 :                 smallest_ppow[m] = B;
     184           10 :                 int64_t v = i/ppow;
     185           10 :                 if(v != 1){// i is not a perfect power of p, that is, i = v*p**a with v != 1
     186            4 :                     int64_t term = f_conv_N_vals[v]*f_conv_N_vals[B];
     187            4 :                     f_conv_N_vals[m] = modulus ? nut_i64_mod(term, modulus) : term;
     188              :                 }else{// i is a perfect power of p (and so is m)
     189              :                     // from the definition, (f <*> N)(p**a) = f(1)*N(p**a) + f(p)*N(p**(a-1)) + ... + f(p**a)*N(1)
     190              :                     // = f(1)*p**a + f(p)*p**(a-1) + ... + f(p**a)*1 = p*(f <*> N)(p**(a-1)) + f(p**a)
     191            6 :                     if(modulus){
     192            0 :                         int64_t term = (p%modulus)*f_conv_N_vals[i] + f_vals[m];
     193            0 :                         f_conv_N_vals[m] = nut_i64_mod(term, modulus);
     194              :                     }else{
     195            6 :                         f_conv_N_vals[m] = p*f_conv_N_vals[i] + f_vals[m];
     196              :                     }
     197              :                 }
     198              :                 break;
     199              :             }// otherwise, i and p are coprime, so (f <*> u)(i*p) = (f <*> u)(i)*(f <*> u)(p)
     200            9 :             int64_t term = f_conv_N_vals[i]*f_conv_N_vals[p];
     201            9 :             f_conv_N_vals[m] = modulus ? nut_i64_mod(term, modulus) : term;
     202            9 :             smallest_ppow[m] = p;
     203              :         }
     204              :     }
     205            1 :     free(smallest_ppow);
     206            1 :     free(primes);
     207            1 :     free(is_c_buf);
     208            1 :     return true;
     209              : }
     210              : 
     211          178 : bool nut_euler_sieve_conv(int64_t n, int64_t modulus, const int64_t f_vals[static n+1], const int64_t g_vals[static n+1], int64_t f_conv_vals[restrict static n+1]){
     212          178 :     int64_t *smallest_ppow = malloc((n+1)*sizeof(int64_t));
     213          178 :     uint8_t *is_c_buf = calloc(n/8 + 1, sizeof(uint8_t));
     214          178 :     uint64_t num_primes = 0;
     215          178 :     int64_t *primes = malloc(nut_max_primes_le(n)*sizeof(int64_t));
     216          178 :     if(!smallest_ppow || !is_c_buf || !primes){
     217            0 :         free(smallest_ppow);
     218            0 :         free(is_c_buf);
     219            0 :         free(primes);
     220            0 :         return false;
     221              :     }
     222          178 :     f_conv_vals[1] = 1;
     223         3484 :     for(int64_t i = 2; i <= n; ++i){
     224              :         // if i is prime, add it to the list of primes
     225         3306 :         if(!is_composite_unpacked(i, is_c_buf)){
     226         1226 :             primes[num_primes++] = i;
     227         1226 :             int64_t term = f_vals[i] + g_vals[i];
     228         1226 :             f_conv_vals[i] = modulus ? nut_i64_mod(term, modulus) : term;
     229         1226 :             smallest_ppow[i] = i;
     230              :         }
     231              :         // the key feature of euler's sieve is that it ONLY marks off multiples
     232              :         // of each number i times primes <= i.  This requires storing an array of
     233              :         // primes and reading it A LOT of times, but ensures that each composite number
     234              :         // is visited only once
     235         4341 :         for(uint64_t j = 0; j < num_primes; ++j){
     236         4341 :             int64_t p = primes[j], m;
     237         4341 :             if(__builtin_mul_overflow(p, i, &m) || m > n){
     238              :                 break;
     239              :             }
     240         2080 :             mark_composite_unpacked(m, is_c_buf);
     241         2080 :             if(i%p == 0){// i is a multiple of p, so in particular i and p are not coprime
     242              :                 // also notice that because we break out of this loop at the bottom of this if statement,
     243              :                 // p is in fact the smallest prime divisor of i
     244         1045 :                 int64_t ppow = smallest_ppow[i];
     245         1045 :                 int64_t B = ppow*p;
     246         1045 :                 smallest_ppow[m] = B;
     247         1045 :                 int64_t v = i/ppow;
     248         1045 :                 if(v != 1){// i is not a perfect power of p, that is, i = v*p**a with v != 1
     249          483 :                     int64_t term = f_conv_vals[v]*f_conv_vals[B];
     250          483 :                     f_conv_vals[m] = modulus ? nut_i64_mod(term, modulus) : term;
     251              :                 }else{// i is a perfect power of p (and so is m)
     252              :                     // from the definition, (f <*> g)(p**a) = f(1)*g(p**a) + f(p)*g(p**(a-1)) + ... + f(p**a)*g(1)
     253          562 :                     int64_t c = f_vals[m] + g_vals[m];
     254          562 :                     if(modulus){
     255          510 :                         c = nut_i64_mod(c, modulus);
     256              :                     }
     257              :                     int64_t A = p;
     258          768 :                     while(A < ppow){
     259          206 :                         if(modulus){
     260          170 :                             c = nut_i64_mod(c + f_vals[A]*g_vals[ppow], modulus);
     261          170 :                             c = nut_i64_mod(c + f_vals[ppow]*g_vals[A], modulus);
     262              :                         }else{
     263           36 :                             c += f_vals[A]*g_vals[ppow] + f_vals[ppow]*g_vals[A];
     264              :                         }
     265          206 :                         A *= p;
     266          206 :                         ppow /= p;
     267              :                     }
     268          562 :                     if(A == ppow){
     269          375 :                         if(modulus){
     270          340 :                             c = nut_i64_mod(c + f_vals[A]*g_vals[A], modulus);
     271              :                         }else{
     272           35 :                             c += f_vals[A]*g_vals[A];
     273              :                         }
     274              :                     }
     275          562 :                     f_conv_vals[m] = c;
     276              :                 }
     277              :                 break;
     278              :             }// otherwise, i and p are coprime, so (f <*> u)(i*p) = (f <*> u)(i)*(f <*> u)(p)
     279         1035 :             int64_t term = f_conv_vals[i]*f_conv_vals[p];
     280         1035 :             f_conv_vals[m] = modulus ? nut_i64_mod(term, modulus) : term;
     281         1035 :             smallest_ppow[m] = p;
     282              :         }
     283              :     }
     284          178 :     free(smallest_ppow);
     285          178 :     free(primes);
     286          178 :     free(is_c_buf);
     287          178 :     return true;
     288              : }
     289              : 
     290           25 : bool nut_Diri_init(nut_Diri *self, int64_t x, int64_t y){
     291           25 :     int64_t ymin = nut_u64_nth_root(x, 2);
     292           25 :     if(y < ymin){
     293              :         y = ymin;
     294              :     }
     295           25 :     int64_t yinv = x/y + 1;
     296           25 :     if(!(self->buf = malloc((y + yinv)*sizeof(int64_t)))){
     297              :         return false;
     298              :     }
     299           25 :     self->x = x;
     300           25 :     self->y = y;
     301           25 :     self->yinv = yinv;
     302           25 :     return true;
     303              : }
     304              : 
     305           25 : void nut_Diri_destroy(nut_Diri *self){
     306           25 :     free(self->buf);
     307           25 :     *self = (nut_Diri){};
     308           25 : }
     309              : 
     310           35 : void nut_Diri_copy(nut_Diri *restrict dest, const nut_Diri *restrict src){
     311           35 :     memcpy(dest->buf, src->buf, (src->y + src->yinv)*sizeof(int64_t));
     312           35 : }
     313              : 
     314            0 : void nut_Diri_compute_I(nut_Diri *self){
     315            0 :     memset(self->buf, 0, (self->y + 1)*sizeof(int64_t));
     316            0 :     self->buf[1] = 1;
     317            0 :     for(int64_t i = 1; i < self->yinv; ++i){
     318            0 :         self->buf[self->y + i] = 1;
     319              :     }
     320            0 : }
     321              : 
     322           25 : void nut_Diri_compute_u(nut_Diri *self, int64_t m){
     323          439 :     for(int64_t i = 0; i <= self->y; ++i){
     324          414 :         self->buf[i] = 1;
     325              :     }
     326          436 :     for(int64_t i = 1; i < self->yinv; ++i){
     327          411 :         int64_t term = self->x/i;
     328          411 :         self->buf[self->y + i] = m ? nut_i64_mod(term, m) : term;
     329              :     }
     330           25 : }
     331              : 
     332            0 : void nut_Diri_compute_N(nut_Diri *self, int64_t m){
     333            0 :     for(int64_t i = 0; i <= self->y; ++i){
     334            0 :         self->buf[i] = m ? i%m : i;
     335              :     }
     336            0 :     for(int64_t i = 1; i < self->yinv; ++i){
     337            0 :         int64_t v = self->x/i;
     338            0 :         if(m){
     339            0 :             v %= m;
     340              :         }
     341            0 :         int64_t term = (v&1) ? v*((v + 1) >> 1) : (v + 1)*(v >> 1);
     342            0 :         self->buf[self->y + i] = m ? term%m : term;
     343              :     }
     344            0 : }
     345              : 
     346            4 : bool nut_Diri_compute_Nk(nut_Diri *restrict self, uint64_t k, int64_t m){
     347            4 :     if(!m){
     348            2 :         nut_i64_Matrix pascal_mat [[gnu::cleanup(nut_i64_Matrix_destroy)]] = {}, faulhaber_mat [[gnu::cleanup(nut_i64_Matrix_destroy)]] = {};
     349            2 :         int64_t *vand_vec [[gnu::cleanup(cleanup_free)]] = malloc((k + 1)*sizeof(int64_t));
     350            1 :         if(!vand_vec){
     351              :             return false;
     352              :         }
     353            1 :         if(!nut_i64_Matrix_init(&pascal_mat, k + 1, k + 1)){
     354              :             return false;
     355              :         }
     356            1 :         if(!nut_i64_Matrix_init(&faulhaber_mat, k + 1, k + 1)){
     357              :             return false;
     358              :         }
     359            1 :         nut_i64_Matrix_fill_short_pascal(&pascal_mat);
     360            1 :         int64_t denom = nut_i64_Matrix_invert_ltr(&pascal_mat, &faulhaber_mat);
     361           33 :         for(int64_t i = 0; i <= self->y; ++i){
     362           32 :             self->buf[i] = nut_u64_pow(i, k);
     363              :         }
     364           33 :         for(int64_t i = 1; i < self->yinv; ++i){
     365           32 :             int64_t v = self->x/i;
     366           32 :             nut_i64_Matrix_fill_vandemond_vec(v + 1, k + 1, m, vand_vec);
     367           32 :             int64_t tot = 0;
     368          192 :             for(uint64_t j = 0; j < k + 1; ++j){
     369          160 :                 int64_t a = faulhaber_mat.buf[(k + 1)*k + j]*vand_vec[j];
     370          160 :                 tot = tot + a;
     371              :             }
     372           32 :             tot /= denom;
     373           32 :             self->buf[self->y + i] = tot;
     374              :         }
     375              :         return true;
     376              :     }else{
     377            6 :         nut_u64_ModMatrix pascal_mat [[gnu::cleanup(nut_u64_ModMatrix_destroy)]] = {}, faulhaber_mat [[gnu::cleanup(nut_u64_ModMatrix_destroy)]] = {};
     378            6 :         uint64_t *vand_vec [[gnu::cleanup(cleanup_free)]] = malloc((k + 1)*sizeof(uint64_t));
     379            3 :         if(!vand_vec){
     380              :             return false;
     381              :         }
     382            3 :         if(!nut_u64_ModMatrix_init(&pascal_mat, k + 1, k + 1, m)){
     383              :             return false;
     384              :         }
     385            3 :         if(!nut_u64_ModMatrix_init(&faulhaber_mat, k + 1, k + 1, m)){
     386              :             return false;
     387              :         }
     388            3 :         nut_u64_ModMatrix_fill_short_pascal(&pascal_mat);
     389            3 :         if(!nut_u64_ModMatrix_invert_ltr(&pascal_mat, &faulhaber_mat)){
     390              :             return false;
     391              :         }
     392           36 :         for(int64_t i = 0; i <= self->y; ++i){
     393           33 :             self->buf[i] = nut_u64_powmod(i, k, m);
     394              :         }
     395           33 :         for(int64_t i = 1; i < self->yinv; ++i){
     396           30 :             int64_t v = self->x/i;
     397           30 :             nut_u64_Matrix_fill_vandemond_vec((v + 1)%m, k + 1, m, vand_vec);
     398           30 :             int64_t tot = 0;
     399          200 :             for(uint64_t j = 0; j < k + 1; ++j){
     400          170 :                 uint64_t a = faulhaber_mat.buf[(k + 1)*k + j]*vand_vec[j];
     401          170 :                 tot = (tot + a)%m;
     402              :             }
     403           30 :             self->buf[self->y + i] = tot;
     404              :         }
     405              :         return true;
     406              :     }
     407              : }
     408              : 
     409            2 : void nut_Diri_compute_mertens(nut_Diri *restrict self, int64_t m, const uint8_t mobius[restrict static self->y/4 + 1]){
     410            2 :     nut_Diri_set_dense(self, 0, 0);
     411            2 :     nut_Diri_set_dense(self, 1, 1);
     412      1160428 :     for(int64_t i = 2, acc = 1, v; i <= self->y; ++i){
     413      1160426 :         if((v = nut_Bitfield2_arr_get(mobius, i)) == 3){
     414       352703 :             v = -1;
     415              :         }
     416      1160426 :         nut_Diri_set_dense(self, i, acc += v);
     417              :     }
     418         8651 :     for(int64_t i = self->yinv - 1; i; --i){
     419         8649 :         int64_t v = self->x/i;
     420         8649 :         int64_t M = 1;
     421         8649 :         int64_t vr = nut_u64_nth_root(v, 2);
     422     18424698 :         for(int64_t j = 1; j <= vr; ++j){
     423     18416049 :             int64_t term = (nut_Diri_get_dense(self, j) - nut_Diri_get_dense(self, j - 1))*(v/j);
     424     18416049 :             if(m){
     425            0 :                 M = nut_i64_mod(M - term, m);
     426              :             }else{
     427     18416049 :                 M -= term;
     428              :             }
     429              :         }
     430     18416049 :         for(int64_t j = 2, term; j <= vr; ++j){
     431     18407400 :             if(i*j >= self->yinv){
     432     18336517 :                 term = nut_Diri_get_dense(self, v/j);
     433              :             }else{
     434        70883 :                 term = nut_Diri_get_sparse(self, i*j);
     435              :             }
     436     18407400 :             if(m){
     437            0 :                 M = nut_i64_mod(M - term, m);
     438              :             }else{
     439     18407400 :                 M -= term;
     440              :             }
     441              :         }
     442         8649 :         int64_t term;
     443         8649 :         if(vr <= self->y){
     444         8649 :             term = nut_Diri_get_dense(self, vr)*vr;
     445              :         }else{
     446            0 :             term = nut_Diri_get_sparse(self, self->x/vr)*vr;
     447              :         }
     448         8649 :         if(m){
     449            0 :             M = nut_i64_mod(M + term, m);
     450              :         }else{
     451         8649 :             M += term;
     452              :         }
     453         8649 :         nut_Diri_set_sparse(self, i, M);
     454              :     }
     455      1160428 :     for(int64_t i = 2, v; i <= self->y; ++i){
     456      1160426 :         if((v = nut_Bitfield2_arr_get(mobius, i)) == 3){
     457       352703 :             v = -1;
     458              :         }
     459      1160426 :         nut_Diri_set_dense(self, i, v);
     460              :     }
     461            2 : }
     462              : 
     463           20 : bool nut_Diri_compute_dk(nut_Diri *restrict self, uint64_t k, int64_t m, nut_Diri *restrict f_tbl, nut_Diri *restrict g_tbl){
     464           20 :     if(self->y != f_tbl->y || self->y != g_tbl->y || self->x != f_tbl->x || self->x != g_tbl->x){
     465            0 :         return false;
     466           20 :     }else if(!k){
     467            0 :         nut_Diri_compute_I(self);
     468            0 :         return true;
     469           20 :     }else if(k == 1){
     470            0 :         nut_Diri_compute_u(self, m);
     471            0 :         return true;
     472              :     }
     473           20 :     nut_Diri_compute_u(f_tbl, m);
     474           20 :     nut_Diri *t = self, *s = f_tbl, *r = g_tbl;
     475           20 :     while(k%2 == 0){
     476            0 :         if(!nut_Diri_compute_conv(t, m, s, s)){
     477              :             return false;
     478              :         }else{
     479            0 :             void *tmp = t;
     480            0 :             t = s;
     481            0 :             s = tmp;
     482              :         }//s = s*s
     483            0 :         k >>= 1;
     484              :     }
     485           20 :     nut_Diri_copy(r, s);
     486          131 :     while((k >>= 1)){
     487          111 :         if(!nut_Diri_compute_conv(t, m, s, s)){
     488              :             return false;
     489              :         }else{
     490          111 :             void *tmp = t;
     491          111 :             t = s;
     492          111 :             s = tmp;
     493              :         }//s = s*s
     494          111 :         if(k%2){
     495           62 :             if(!nut_Diri_compute_conv(t, m, r, s)){
     496              :                 return false;
     497              :             }else{
     498              :                 void *tmp = t;
     499              :                 t = r;
     500              :                 r = tmp;
     501              :             }//r = r*s
     502              :         }
     503              :     }
     504           20 :     if(r != self){
     505           13 :         nut_Diri_copy(self, r);
     506              :     }
     507              :     return true;
     508              : }
     509              : 
     510            0 : bool nut_Diri_compute_J(nut_Diri *restrict self, uint64_t p){
     511            0 :     int64_t *partial_sums [[gnu::cleanup(cleanup_free)]] = malloc(p*sizeof(int64_t));
     512            0 :     uint64_t *is_qr [[gnu::cleanup(cleanup_free)]] = nut_u64_make_jacobi_tbl(p, partial_sums);
     513            0 :     if(!partial_sums || !is_qr){
     514              :         return false;
     515              :     }
     516            0 :     for(uint64_t r = 0; r < p; ++r){
     517            0 :         int64_t v = nut_u64_jacobi_tbl_get(r, p, is_qr);
     518            0 :         for(int64_t i = r; i <= self->y; i += p){
     519            0 :             self->buf[i] = v;
     520              :         }
     521              :     }
     522            0 :     for(int64_t i = 1; i < self->yinv; ++i){
     523            0 :         int64_t r = self->x/i%p;
     524            0 :         self->buf[self->y + i] = partial_sums[r];
     525              :     }
     526              :     return true;
     527              : }
     528              : 
     529            0 : bool nut_Diri_compute_Chi_balanced(nut_Diri *restrict self, uint64_t n, int64_t m, const int64_t period_tbl[restrict static n]){
     530            0 :     int64_t *partial_sums [[gnu::cleanup(cleanup_free)]] = malloc(n*sizeof(int64_t));
     531            0 :     if(!partial_sums){
     532              :         return false;
     533              :     }
     534            0 :     int64_t acc = period_tbl[0];
     535            0 :     partial_sums[0] = acc;
     536            0 :     for(uint64_t r = 1; r < n; ++r){
     537            0 :         acc += period_tbl[r];
     538            0 :         if(acc >= m){
     539            0 :             acc -= m;
     540              :         }
     541            0 :         partial_sums[r] = acc;
     542              :     }
     543            0 :     for(uint64_t r = 0; r < n; ++r){
     544            0 :         int64_t v = period_tbl[r];
     545            0 :         for(int64_t i = r; i <= self->y; i += n){
     546            0 :             self->buf[i] = v;
     547              :         }
     548              :     }
     549            0 :     for(int64_t i = 1; i < self->yinv; ++i){
     550            0 :         int64_t r = self->x/i%n;
     551            0 :         self->buf[self->y + i] = partial_sums[r];
     552              :     }
     553              :     return true;
     554              : }
     555              : 
     556            1 : bool nut_Diri_compute_pi(nut_Diri *restrict self){
     557            1 :     self->buf[0] = 0;
     558         1001 :     for(int64_t i = 1; i <= self->y; ++i){
     559         1000 :         self->buf[i] = i - 1;
     560              :     }
     561         1001 :     for(int64_t i = 1; i < self->yinv; ++i){
     562         1000 :         int64_t term = self->x/i;
     563         1000 :         self->buf[self->y + i] = term - 1;
     564              :     }
     565         1000 :     for(int64_t p = 2; p <= self->y; ++p){
     566          999 :         int64_t c = self->buf[p - 1];
     567          999 :         if(self->buf[p] == c){
     568          831 :             continue;
     569              :         }
     570        17167 :         for(int64_t i = 1; i < self->yinv; ++i){
     571        17156 :             int64_t v = self->x/i;
     572        17156 :             if(v < p*p){
     573          157 :                 goto CONTINUE_PRIMELOOP;
     574              :             }
     575        16999 :             int64_t j = v/p;
     576        16999 :             int64_t m = j <= self->y ? self->buf[j] : self->buf[self->y + self->x/j];
     577        16999 :             self->buf[self->y + i] -= m - c;
     578              :         }
     579         7664 :         for(int64_t v = self->y; v >= 0; --v){
     580         7664 :             if(v < p*p){
     581              :                 break;
     582              :             }
     583         7653 :             self->buf[v] -= (self->buf[v/p] - c);
     584              :         }
     585          999 :         CONTINUE_PRIMELOOP:;
     586              :     }
     587            1 :     return true;
     588              : }
     589              : 
     590           10 : bool nut_Diri_compute_conv_u(nut_Diri *restrict self, int64_t m, const nut_Diri *restrict f_tbl){
     591           10 :     if(self->y != f_tbl->y || self->x != f_tbl->x){
     592            0 :         return false;
     593              :     }
     594              :     // use the dense part of self to temporarily store the sums of f for small n up to y
     595           10 :     self->buf[0] = 0;
     596          299 :     for(int64_t i = 1; i <= self->y; ++i){
     597          289 :         int64_t term = self->buf[i-1] + f_tbl->buf[i];
     598          289 :         self->buf[i] = m ? nut_i64_mod(term, m) : term;
     599              :     }
     600          308 :     for(int64_t i = 1; i < self->yinv; ++i){
     601          298 :         int64_t v = self->x/i;
     602          298 :         int64_t vr = nut_u64_nth_root(v, 2);// TODO: v is close to the previous v, so only one newton step should be needed here
     603          298 :         int64_t h = 0;
     604         3027 :         for(int64_t n = 1, term; n <= vr; ++n){
     605         2729 :             if(v/n <= self->y){
     606         1689 :                 term = nut_Diri_get_dense(self, v/n);
     607              :             }else{
     608         1040 :                 term = nut_Diri_get_sparse(f_tbl, i*n);
     609              :             }
     610         2729 :             if(m){
     611            0 :                 h = nut_i64_mod(h + term, m);
     612              :             }else{
     613         2729 :                 h += term;
     614              :             }
     615         2729 :             if(m){
     616            0 :                 term = (v/n)%m*nut_Diri_get_dense(f_tbl, n);
     617            0 :                 h = nut_i64_mod(h + term, m);
     618              :             }else{
     619         2729 :                 h += nut_Diri_get_dense(f_tbl, n)*(v/n);
     620              :             }
     621              :         }
     622          298 :         int64_t term = nut_Diri_get_dense(self, vr)*vr;
     623          298 :         if(m){
     624            0 :             h = nut_i64_mod(h - term, m);
     625              :         }else{
     626          298 :             h -= term;
     627              :         }
     628          298 :         nut_Diri_set_sparse(self, i, h);
     629              :     }
     630           10 :     return nut_euler_sieve_conv_u(self->y, m, f_tbl->buf, self->buf);
     631              : }
     632              : 
     633            1 : bool nut_Diri_compute_conv_N(nut_Diri *restrict self, int64_t m, const nut_Diri *restrict f_tbl){
     634            1 :     if(self->y != f_tbl->y || self->x != f_tbl->x){
     635            0 :         return false;
     636              :     }
     637              :     // use the dense part of self to temporarily store the sums of f for small n up to y
     638            1 :     self->buf[0] = 0;
     639           32 :     for(int64_t i = 1; i <= self->y; ++i){
     640           31 :         int64_t term = self->buf[i-1] + f_tbl->buf[i];
     641           31 :         self->buf[i] = m ? nut_i64_mod(term, m) : term;
     642              :     }
     643           33 :     for(int64_t i = 1; i < self->yinv; ++i){
     644           32 :         int64_t v = self->x/i;
     645           32 :         int64_t vr = nut_u64_nth_root(v, 2);
     646           32 :         int64_t h = 0;
     647              :         // by hyperbola formula:
     648              :         // (f <*> N)(v) = sum(n = 1 ... vr, f(n)*(v/n)*((v/n) + 1)/2) + sum(n = 1 ... vr, F(v/n)*n) - F(vr)*vr*(vr + 1)/2
     649              :         // both sums are folded together into the following loop
     650          330 :         for(int64_t n = 1, term; n <= vr; ++n){
     651          298 :             if(v/n <= self->y){
     652          185 :                 term = nut_Diri_get_dense(self, v/n)*n;
     653              :             }else{
     654          113 :                 term = nut_Diri_get_sparse(f_tbl, i*n)*n;
     655              :             }
     656          298 :             if(m){
     657            0 :                 h = nut_i64_mod(h + term, m);
     658              :             }else{
     659          298 :                 h += term;
     660              :             }
     661          298 :             term = nut_Diri_get_dense(f_tbl, n);
     662          298 :             int64_t k = v/n;
     663          298 :             if(m){
     664            0 :                 int64_t Gvn = (k&1) ? k%m*(((k + 1)>>1)%m) : (k>>1)%m*((k + 1)%m);
     665            0 :                 Gvn %= m;
     666            0 :                 h = nut_i64_mod(h + term*Gvn, m);
     667              :             }else{
     668          298 :                 int64_t Gvn = (k&1) ? k*((k + 1)>>1) : (k>>1)*(k + 1);
     669          298 :                 h += term*Gvn;
     670              :             }
     671              :         }
     672              :         // finally we apply the corrective term (remove double counted values)
     673           32 :         int64_t term = nut_Diri_get_dense(self, vr);
     674           32 :         int64_t Gvr = (vr&1) ? vr*((vr + 1) >> 1) : (vr >> 1)*(vr + 1);
     675           32 :         if(m){
     676            0 :             Gvr %= m;
     677            0 :             h = nut_i64_mod(h - term*Gvr, m);
     678              :         }else{
     679           32 :             h -= term*Gvr;
     680              :         }
     681           32 :         nut_Diri_set_sparse(self, i, h);
     682              :     }
     683            1 :     return nut_euler_sieve_conv_N(self->y, m, f_tbl->buf, self->buf);
     684              : }
     685              : 
     686          177 : bool nut_Diri_compute_conv(nut_Diri *restrict self, int64_t m, const nut_Diri *f_tbl, const nut_Diri *g_tbl){
     687          177 :     if(self->y != f_tbl->y || self->x != f_tbl->x || self->y != g_tbl->y || self->x != g_tbl->x){
     688            0 :         return false;
     689              :     }
     690              :     // use the dense part of self to temporarily store the sums of f for small n up to y
     691          177 :     self->buf[0] = 0;
     692         2661 :     for(int64_t i = 1; i <= self->y; ++i){
     693         2484 :         int64_t term = self->buf[i-1] + f_tbl->buf[i];
     694         2484 :         self->buf[i] = m ? nut_i64_mod(term, m) : term;
     695              :     }
     696         2831 :     for(int64_t i = 1; i < self->yinv; ++i){
     697         2654 :         int64_t v = self->x/i;
     698         2654 :         int64_t vr = nut_u64_nth_root(v, 2);
     699         2654 :         int64_t h = 0;
     700        18195 :         for(int64_t n = 1, term; n <= vr; ++n){
     701        15541 :             if(v/n <= self->y){
     702         8266 :                 term = nut_Diri_get_dense(self, v/n)*nut_Diri_get_dense(g_tbl, n);
     703              :             }else{
     704         7275 :                 term = nut_Diri_get_sparse(f_tbl, i*n)*nut_Diri_get_dense(g_tbl, n);
     705              :             }
     706        15541 :             if(m){
     707        14710 :                 h = nut_i64_mod(h + term, m);
     708              :             }else{
     709          831 :                 h += term;
     710              :             }
     711              :         }
     712         2654 :         nut_Diri_set_sparse(self, i, h);
     713              :     }
     714              :     // use the dense part of self to temporarily store the sums of g for small n up to y
     715              :     // simultaniously, apply the adjustments to the h values.
     716              :     // H(x/j) has an adjustment of F(i)G(i) where i = sqrt(x/j)
     717              :     // so for every i = sqrt(x/j) value, we need to adjust all H(x/j) values
     718              :     // for j in the range (x/(i + 1)**2, x/i**2] && [1, yinv)
     719              :     // So we can ignore some small i values where this intersection is empty, that is, yinv <= x/(i + 1)**2
     720              :     // (i + 1)**2 <= x/yinv <=> i + 1 <= sqrt(x/yinv)
     721              :     // that is, for i <= sqrt(x/yinv) - 1, there are no corresponding j to adjust
     722              :     // note that x/yinv = y and i <= sqrt(y) - 1 <=> i < sqrt(y)
     723          177 :     int64_t i_ub = nut_u64_nth_root(self->y, 2);
     724          535 :     for(int64_t i = 1; i < i_ub; ++i){
     725          358 :         int64_t term = self->buf[i-1] + g_tbl->buf[i];
     726          358 :         self->buf[i] = m ? nut_i64_mod(term, m) : term;
     727              :     }
     728         2303 :     for(int64_t i = i_ub; i <= self->y; ++i){
     729         2126 :         int64_t F = self->buf[i];
     730         2126 :         int64_t G = self->buf[i-1] + g_tbl->buf[i];
     731         2126 :         if(m){
     732         2032 :             G = nut_i64_mod(G, m);
     733              :         }
     734         2126 :         self->buf[i] = G;
     735         2126 :         int64_t j_ub = self->x/(i*i) + 1;
     736         2126 :         if(j_ub > self->yinv){
     737          177 :             j_ub = self->yinv;
     738              :         }
     739         4780 :         for(int64_t j = self->x/((i + 1)*(i + 1)) + 1; j < j_ub; ++j){
     740         2654 :             int64_t h = nut_Diri_get_sparse(self, j);
     741         2654 :             if(m){
     742         2540 :                 h = nut_i64_mod(h - F*G, m);
     743              :             }else{
     744          114 :                 h -= F*G;
     745              :             }
     746         2654 :             nut_Diri_set_sparse(self, j, h);
     747              :         }
     748              :     }
     749         2831 :     for(int64_t i = 1; i < self->yinv; ++i){
     750         2654 :         int64_t v = self->x/i;
     751         2654 :         int64_t vr = nut_u64_nth_root(v, 2);
     752         2654 :         int64_t h = nut_Diri_get_sparse(self, i);
     753        18195 :         for(int64_t n = 1, term; n <= vr; ++n){
     754        15541 :             if(v/n <= self->y){
     755         8266 :                 term = nut_Diri_get_dense(f_tbl, n)*nut_Diri_get_dense(self, v/n);
     756              :             }else{
     757         7275 :                 term = nut_Diri_get_dense(f_tbl, n)*nut_Diri_get_sparse(g_tbl, i*n);
     758              :             }
     759        15541 :             if(m){
     760        14710 :                 h = nut_i64_mod(h + term, m);
     761              :             }else{
     762          831 :                 h += term;
     763              :             }
     764              :         }
     765         2654 :         nut_Diri_set_sparse(self, i, h);
     766              :     }
     767          177 :     return nut_euler_sieve_conv(self->y, m, f_tbl->buf, g_tbl->buf, self->buf);
     768              : }
     769              : 
     770            2 : bool nut_Diri_convdiv(nut_Diri *restrict self, int64_t m, const nut_Diri *restrict f_tbl, const nut_Diri *restrict g_tbl){
     771              :     // Here we want to find the diri table for h where f = g <*> h.  We recall the hyperbola formula again:
     772              :     // F(v) = sum(n = 1 ... vr, g(n)H(v/n)) + sum(n = 1 ... vr, G(v/n)h(n)) - G(vr)H(vr)
     773              :     // We can split out the n = 1 term from the first sum and rearrange to get
     774              :     // H(v) = F(v) + G(vr)H(vr) - sum(n = 2 ... vr, g(n)H(v/n)) - sum(n = 1 ... vr, G(v/n)h(n))
     775              :     // Now we want to be able to evaluate the H(v) values from smallest v to largest.
     776              :     // In the Diri_compute_conv* functions, the two sums and the correction term all depend only on the input functions,
     777              :     // so we can store partial sums of the input functions for low "dense" values in the dense part of the output.
     778              :     // Here, that isn't possible, due to the dependence of H(v) on smaller H(v).  So we must use a temporary array.
     779            2 :     if(self->y != f_tbl->y || self->x != f_tbl->x || self->y != g_tbl->y || self->x != g_tbl->x){
     780            0 :         return false;
     781              :     }
     782            4 :     int64_t *H_dense [[gnu::cleanup(cleanup_free)]] = malloc((self->y + 1)*sizeof(int64_t));
     783            4 :     int64_t *G_dense [[gnu::cleanup(cleanup_free)]] = malloc((self->y + 1)*sizeof(int64_t));
     784            2 :     if(!H_dense || !G_dense){
     785              :         return false;
     786              :     }
     787            2 :     memset(self->buf, 0, (self->y + 1)*sizeof(int64_t));
     788            2 :     H_dense[0] = G_dense[0] = 0;
     789              :     // First, we "sieve" the values of h into the dense part of the output
     790           22 :     for(int64_t i = 1; i <= self->y; ++i){
     791           20 :         int64_t term = self->buf[i] + f_tbl->buf[i];
     792           20 :         self->buf[i] = m ? nut_i64_mod(term, m) : term;
     793           20 :         if(!self->buf[i]){
     794            0 :             continue;
     795              :         }
     796           54 :         for(int64_t j = 2; j <= self->y/i; ++j){
     797           34 :             int64_t term = self->buf[i*j] - self->buf[i]*g_tbl->buf[j];
     798           34 :             self->buf[i*j] = m ? nut_i64_mod(term, m) : term;
     799              :         }
     800              :     }
     801              :     // Now, accumulate the sums of dense h values into H_dense
     802           22 :     for(int64_t i = 1; i <= self->y; ++i){
     803           20 :         int64_t term = self->buf[i] + H_dense[i - 1];
     804           20 :         H_dense[i] = m ? nut_i64_mod(term, m) : term;
     805              :     }
     806              :     // And accumulate the sums of dense g values into G_dense
     807           22 :     for(int64_t i = 1; i <= self->y; ++i){
     808           20 :         int64_t term = g_tbl->buf[i] + G_dense[i - 1];
     809           20 :         G_dense[i] = m ? nut_i64_mod(term, m) : term;
     810              :     }
     811              :     // Now we populate the sparse H values using the formula
     812              :     // H(v) = F(v) + G(vr)H(vr) - sum(n = 2 ... vr, g(n)H(v/n)) - sum(n = 1 ... vr, G(v/n)h(n))
     813              :     // We will do the sums first for each v, because when doing a modulus, subtraction is more expensive than addition
     814              :     // This requires H(v/2), ..., H(vr) to have been computed before, which we can accomplish by computing
     815              :     // H in terms of increasing values of v...
     816              :     // So i is decreasing in this loop...
     817           22 :     for(int64_t i = self->yinv - 1; i >= 1; --i){
     818              :         // so that v will be increasing here
     819           20 :         int64_t v = self->x/i;
     820           20 :         int64_t vr = nut_u64_nth_root(v, 2);
     821           20 :         int64_t H = 0; // H starts at 0 because we add the sums first
     822           20 :         if(1 <= vr){ // do the extra term of G(v/n)h(n) where n = 1
     823              :             // Because i < self->yinv, v >= self->y by definition of self->yinv,
     824              :             // So it is only possible for v to be <= self->y at most once (if it equals self->y when i is self->yinv - 1).
     825              :             // But if self->y == self->yinv - 1, we store both h(self->y) in the dense part and H(self->yinv - 1) in the sparse part.
     826              :             // Therefore, the value we want always exists in the sparse part here
     827           20 :             assert(v >= self->y);
     828           20 :             H = nut_Diri_get_sparse(g_tbl, i);
     829              :         }
     830           94 :         for(int64_t n = 2, gH_term, Gh_term; n <= vr; ++n){
     831           74 :             if(v/n <= self->y){
     832           46 :                 gH_term = nut_Diri_get_dense(g_tbl, n)*H_dense[v/n];
     833           46 :                 Gh_term = nut_Diri_get_dense(self, n)*G_dense[v/n];
     834              :             }else{
     835           28 :                 gH_term = nut_Diri_get_dense(g_tbl, n)*nut_Diri_get_sparse(self, i*n);
     836           28 :                 Gh_term = nut_Diri_get_dense(self, n)*nut_Diri_get_sparse(g_tbl, i*n);
     837              :             }
     838           74 :             H = m ? nut_i64_mod(H + gH_term%m + Gh_term%m, m) : H + gH_term + Gh_term;
     839              :         }
     840              :         // Now H consists of all the sums, so we have to take it and subtract it from F(v) + G(vr)H(vr)
     841           20 :         int64_t term = G_dense[vr]*H_dense[vr];
     842           20 :         assert(v >= self->y); // see above comment about i < self->yinv
     843           20 :         term = m ? nut_Diri_get_sparse(f_tbl, i) + term%m : nut_Diri_get_sparse(f_tbl, i) + term;
     844           20 :         H = m ? nut_i64_mod(term - H, m) : term - H;
     845           20 :         nut_Diri_set_sparse(self, i, H);
     846              :     }
     847              :     return true;
     848              : }
     849              :

Generated by: LCOV version 2.0-1