Line data Source code
1 : #pragma once
2 :
3 : /// @file
4 : /// @author hacatu
5 : /// @version 0.2.0
6 : /// @section LICENSE
7 : /// This Source Code Form is subject to the terms of the Mozilla Public
8 : /// License, v. 2.0. If a copy of the MPL was not distributed with this
9 : /// file, You can obtain one at http://mozilla.org/MPL/2.0/.
10 : /// @section DESCRIPTION
11 : /// Sieve based functions for computing primes in a range or divisor counts/
12 : /// power sums/Euler's totient function/Carmichael's function on all numbers
13 : /// in a range.
14 : /// Arrays returned from sieve functions should be freed when no longer needed.
15 :
16 : #include <inttypes.h>
17 : #include <stdlib.h>
18 :
19 : #include <nut/modular_math.h>
20 : #include <nut/factorization.h>
21 :
22 : /// Compute the maximum number of unique prime divisors a number can have.
23 : /// This has nothing to do with factoring the number and is just a simple binary
24 : /// decision diagram based on comparing the number to 2, 2*3, 2*3*5, etc.
25 : /// @param [in] max: number to find max unique prime divisors of
26 : /// @return the largest number of unique prime divisors any number not exceeding max can possibly have
27 : NUT_ATTR_CONST
28 : uint64_t nut_max_prime_divs(uint64_t max);
29 :
30 : /// Compute an upper bound on the number of primes up to max.
31 : /// This uses an inequality involving log derived from the prime number theorem to always get an
32 : /// upper bound.
33 : /// @param [in] max: number to find the number of primes up to
34 : /// @param an upper bound on the number of primes up to max
35 : NUT_ATTR_CONST
36 : uint64_t nut_max_primes_le(uint64_t max);
37 :
38 : /// Get an entry from a variable pitch array.
39 : /// In a normal array, the element type is a complete type with size known at compile time, or even a variably
40 : /// modified type for which sizeof and ordinary array operations will work as desired.
41 : /// However, it is sometimes useful to have arrays whose elements are structs with flexible length array members,
42 : /// and because flexible length arrays do not have their lengths automatically tracked like static or variable length arrays,
43 : /// we can't use sizeof or ordinary array operations. In particular, sizeof returns the padded size assuming the flexible
44 : /// length array has zero length, and array operations work as if we had an array of structs where the flexible length array
45 : /// has zero length. This function instead takes the base pointer of the array, the pitch, and the index, and returns a pointer
46 : /// to a member without bounds checking.
47 : /// @param [in] buf: pointer to the start of the array
48 : /// @param [in] pitch: offset from start of one element to start of next element (currently should be computed as
49 : /// offsetof(type, fla_member) + w*fla_element_size, but a more complex computation should be used if alignment is critically important)
50 : /// @param [in] i: index of element to get
51 : /// @return pointer to the i-th member of an array with given base and pitch
52 : NUT_ATTR_CONST
53 : NUT_ATTR_NONNULL(1)
54 : NUT_ATTR_RETURNS_NONNULL
55 : NUT_ATTR_ACCESS(none, 1)
56 11678331 : static inline void *nut_Pitcharr_get(void *buf, size_t pitch, uint64_t i){
57 11678331 : return buf + i*pitch;
58 : }
59 :
60 : /// Get a bit from a bitarray.
61 : /// Simply does buf[i/8] & (1ull << (i%8)).
62 : /// @param [in] buf: pointer to bitarray
63 : /// @param [in] i: index of element to get
64 : /// @return false if i-th element is false, true otherwise
65 : NUT_ATTR_PURE
66 : NUT_ATTR_NONNULL(1)
67 : NUT_ATTR_ACCESS(read_only, 1)
68 2008917 : static inline bool nut_Bitarray_get(const uint8_t *buf, uint64_t i){
69 2008917 : return buf[i/8] & (UINT8_C(1) << (i%8));
70 : }
71 :
72 : /// Set a bit in a bitarray
73 : /// @param [in, out] buf: pointer to bitarray
74 : /// @param [in] i: index of element to set
75 : /// @param [in] v: true to set ith bit, false to clear
76 : NUT_ATTR_NONNULL(1)
77 : NUT_ATTR_ACCESS(read_write, 1)
78 5725408 : static inline void nut_Bitarray_set(uint8_t *buf, uint64_t i, bool v){
79 5725408 : if(v){
80 5725408 : buf[i/8] |= 1ull << (i%8);
81 : }else{
82 0 : buf[i/8] &= ~(UINT8_C(1) << (i%8));
83 : }
84 5725408 : }
85 :
86 : /// Get an element from an array of bitfields of length 2, aka uint2's.
87 : /// The result will be shifted to the least significant position, that is,
88 : /// only 0, 1, 2, and 3 are possible results.
89 : /// @param [in] buf: pointer to array of bitfields
90 : /// @param [in] i: index of element to get
91 : /// @return i-th element (0, 1, 2, or 3)
92 : NUT_ATTR_PURE
93 : NUT_ATTR_NONNULL(1)
94 : NUT_ATTR_ACCESS(read_only, 1)
95 3320852 : static inline uint8_t nut_Bitfield2_arr_get(const uint8_t *buf, uint64_t i){
96 3320852 : return (buf[i/4] >> (i%4*2)) & 3;
97 : }
98 :
99 : /// Calculate factorials and inverse factorials for a given upper bound and modulus
100 : /// @param [in] k: factorials[bits + k - 1] is the last factorial that will be computed
101 : /// @param [in] modulus: modulus to reduce result by. Must be large enough that all inv factorials are actually invertable
102 : /// @param [in] bits: used with k to find the last factorial to compute
103 : /// @param [in] max_denom: inv_factorials[max_denom] is the last one that will be computed
104 : /// @param [out] factorials: output for factorial table
105 : /// @param [out] inv_factorials: output for inverse factorial table
106 : /// @return true on success, false on failure (if the inverse of some factorial can't be found)
107 : bool nut_u64_make_factorial_tbl(uint64_t k, uint64_t modulus, uint64_t bits, uint64_t max_denom, uint64_t factorials[static bits + k], uint64_t inv_factorials[static max_denom + 1]);
108 :
109 : /// Compute the factorization for every number in the range from 0 to max.
110 : /// The factorizations for 0 and 1 are not actually computed. The factorizations
111 : /// are stored in an array of factors_t structs with capacity w, where w is the maximum
112 : /// number of unique prime divisors of a number not exceeding max. The result is
113 : /// a pitched array.
114 : /// {@link nut_Pitcharr_get} should be used to handle the returned value.
115 : /// {@link get_factorizations_pitch} should be used to get the pitch from the output
116 : /// parameter w.
117 : /// @param [in] max: inclusive upper bound of sieving range in which to factor all numbers
118 : /// @param [out] _w: store w, the maximum number of unique prime divisors of a number not exceeding max
119 : /// @return a pointer to an array of factors_t structs containing the factorization of all numbers not exceeding max,
120 : /// or NULL on allocation failure
121 : [[deprecated("Replace with nut_sieve_smallest_factors_wheel6")]]
122 : NUT_ATTR_NONNULL(2)
123 : NUT_ATTR_MALLOC
124 : NUT_ATTR_ACCESS(write_only, 2)
125 : void *nut_sieve_factorizations(uint64_t max, uint64_t *_w);
126 :
127 : /// Get the pitch for a pitched array of factorization structs with w unique prime divisors.
128 : /// This is simply offsetof(factors_t, factors) + w*sizeof(dummy->factors[0]),
129 : /// where dummy is an expression with type factors_t.
130 : /// @param [in] w: The maximal number of unique prime divisors of any potential index of the pitched array.
131 : /// Should be obtained from {@link nut_sieve_factorizations}, {@link max_prime_divisors}, etc.
132 : /// @return The pitch of a pitched array of factorization structs whose flexible length members all have w
133 : /// elements.
134 : NUT_ATTR_CONST
135 : uint64_t nut_get_factorizations_pitch(uint64_t w);
136 :
137 : /// Compute the unique prime factors of every number in the range from 0 to max.
138 : /// The factors for 0 and 1 are not actually computed. The result is stored in
139 : /// an array of nut_u64_Pitcharr structs with capacity w, where w is the maximum number of unique
140 : /// prime divisors of a number not exceeding max. The pitch of the result may be obtained with
141 : /// {@link get_factors_pitch}.
142 : /// {@link nut_Pitcharr_get} should be used to handle the returned value.
143 : /// @param [in] max: inclusive upper bound of sieving range in which to find unique prime factors of all numbers
144 : /// @param [out] _w: maximum numer of unique prime divisors of a number not exceeding max
145 : /// @return a pointer to an array of nut_u64_Pitcharr structs containing lists of unique prime factors for all numbers not exceeding max,
146 : /// or NULL on allocation failure
147 : [[deprecated("Replace with nut_sieve_smallest_factors_wheel6")]]
148 : NUT_ATTR_NONNULL(2)
149 : NUT_ATTR_MALLOC
150 : NUT_ATTR_ACCESS(write_only, 2)
151 : void *nut_sieve_factors(uint64_t max, uint64_t *_w);
152 :
153 : /// Compute the number of distinct prime divisors of every number in the range from 0 to max.
154 : /// The divisors for 0 and 1 are not actually computed.
155 : /// @param [in] max: inclusive upper bound of sieving range in which to find distinct prime divisor counts of all numbers
156 : /// @return a pointer to an array of distinct prime divisor counts for all numbers not exceeding max,
157 : /// or NULL on allocation failure
158 : NUT_ATTR_MALLOC
159 : uint8_t *nut_sieve_omega(uint64_t max);
160 :
161 : /// Compute the largest prime factor of every number in the range from 0 to max.
162 : /// Compared to {@link nut_sieve_factors}, this uses up to 30 times less memory, so if
163 : /// the range is very large, most numbers will never actually have their factorizations
164 : /// accessed, or the factorizations will only be accessed about once, this function
165 : /// should be preferred. The factors of 0 and 1 are not actually computed, their entries in
166 : /// the returned array will be 0. You can use the resulting table as it is, or convert it to
167 : /// a factorization using {@link nut_fill_factors_from_largest}.
168 : /// @param [in] max: inclusive upper bound of sieving range in which to find largest prime factors of all numbers
169 : /// @return a pointer to an array of largest prime factors for all numbers not exceeding max, or NULL on allocation failure.
170 : [[deprecated("Replace with nut_sieve_smallest_factors_wheel6")]]
171 : NUT_ATTR_MALLOC
172 : uint64_t *nut_sieve_largest_factors(uint64_t max);
173 :
174 : /// Use a table of largest prime factors to get the factorization of a number
175 : /// @param [out] out: Factors struct to store result in. MUST be allocated already, use {@link nut_make_Factors_ub} or
176 : /// {@link nut_max_prime_divs} and {@link nut_make_Factors_w} if needed.
177 : /// @param [in] n: the number to get the factorization of
178 : /// @param [in] largest_factors: table of largest factors, from {@link nut_sieve_largest_factors}
179 : [[deprecated("Replace with nut_fill_factors_from_smallest_wheel6")]]
180 : NUT_ATTR_NONNULL(1, 3)
181 : NUT_ATTR_ACCESS(read_write, 1)
182 : NUT_ATTR_ACCESS(read_only, 3)
183 : void nut_fill_factors_from_largest(nut_Factors *restrict out, uint64_t n, const uint64_t largest_factors[restrict static n + 1]);
184 :
185 : /// Compute the smallest prime factor of every number in the range from 0 to max, or 1 for primes
186 : /// Compared to {@link nut_sieve_factors}, this uses up to 60 times less memory, so if
187 : /// the range is very large, most numbers will never actually have their factorizations
188 : /// accessed, or the factorizations will only be accessed about once, this function
189 : /// should be preferred. The factors of 0 and 1 are not actually computed, their entries in
190 : /// the returned array will be 0. You can use the resulting table as it is, or convert it to
191 : /// a factorization using {@link nut_fill_factors_from_smallest}.
192 : /// This function is able to store factors as 32 bit integers instead of 64, since the smallest prime factor of a
193 : /// composite number is at most its square root. This is why we store 1 for primes instead of themselves
194 : /// @param [in] max: inclusive upper bound of sieving range in which to find smallest prime factors of all numbers
195 : /// @return a pointer to an array of smallest prime factors for all numbers not exceeding max, or NULL on allocation failure.
196 : [[deprecated("Replace with nut_sieve_smallest_factors_wheel6")]]
197 : NUT_ATTR_MALLOC
198 : uint32_t *nut_sieve_smallest_factors(uint64_t max);
199 :
200 : /// Use a table of smallest prime factors to get the factorization of a number
201 : /// @param [out] out: Factors struct to store result in. MUST be allocated already, use {@link nut_make_Factors_ub} or
202 : /// {@link nut_max_prime_divs} and {@link nut_make_Factors_w} if needed.
203 : /// @param [in] n: the number to get the factorization of
204 : /// @param [in] smallest_factors: table of smallest factors, from {@link nut_sieve_smallest_factors}
205 : [[deprecated("Replace with nut_fill_factors_from_smallest_wheel6")]]
206 : NUT_ATTR_NONNULL(1, 3)
207 : NUT_ATTR_ACCESS(read_write, 1)
208 : NUT_ATTR_ACCESS(read_only, 3)
209 : void nut_fill_factors_from_smallest(nut_Factors *restrict out, uint64_t n, const uint32_t smallest_factors[restrict static n + 1]);
210 :
211 : /// Compute the smallest prime factor of every number coprime to 6 in the range from 0 to max, or 1 for primes
212 : /// Compared to {@link nut_sieve_factors}, this uses up to 180 times less memory, so if
213 : /// the range is very large, most numbers will never actually have their factorizations
214 : /// accessed, or the factorizations will only be accessed about once, this function
215 : /// should be preferred. The factors of 1 are not actually computed, its entry in
216 : /// the returned array will be 0. You can use the resulting table as it is, or convert it to
217 : /// a factorization using {@link nut_fill_factors_from_smallest_wheel6}.
218 : /// This function is able to store factors as 32 bit integers instead of 64, since the smallest prime factor of a
219 : /// composite number is at most its square root. This is why we store 1 for primes instead of themselves
220 : /// @param [in] max: inclusive upper bound of sieving range in which to find smallest prime factors of all numbers
221 : /// @return a pointer to an array of smallest prime factors for all numbers not exceeding max, or NULL on allocation failure.
222 : NUT_ATTR_MALLOC
223 : uint32_t *nut_sieve_smallest_factors_wheel6(uint64_t max);
224 :
225 : /// Use a table of smallest prime factors for a wheel of 6 to get the factorization of a number
226 : /// @param [out] out: Factors struct to store result in. MUST be allocated already, use {@link nut_make_Factors_ub} or
227 : /// {@link nut_max_prime_divs} and {@link nut_make_Factors_w} if needed.
228 : /// @param [in] n: the number to get the factorization of
229 : /// @param [in] smallest_factors: table of smallest factors, from {@link nut_sieve_smallest_factors}
230 : NUT_ATTR_NONNULL(1, 3)
231 : NUT_ATTR_ACCESS(read_write, 1)
232 : NUT_ATTR_ACCESS(read_only, 3)
233 : void nut_fill_factors_from_smallest_wheel6(nut_Factors *restrict out, uint64_t n, const uint32_t smallest_factors[restrict static n/3 + 1]);
234 :
235 : /// Get the pitch for a pitched array of {@link nut_u64_Pitcharr} factor lists.
236 : /// This is simply offsetof(nut_u64_Pitcharr, elems) + *_w*sizeof(uint64_t).
237 : /// @param [in] w: The maximal number of unique prime divisors of any potential index of the pitched array.
238 : /// Should be obtained from {@link nut_sieve_factors}, {@link max_prime_divisors}, etc.
239 : /// @return The pitch of a pitched array of factor list structs whose flexible length members all have w
240 : /// elements.
241 : NUT_ATTR_CONST
242 : uint64_t nut_get_factors_pitch(uint64_t w);
243 :
244 : /// Compute the number of divisors (including 1 and n) for every number n from 0 to max.
245 : /// The results for 0 and 1 are not actually computed. This effectively computes {@link divisor_count} for
246 : /// all numbers in the range, but without needing to compute or store the factorizations intermediately
247 : /// @param [in] max: inclusive upper bound of sieving range in which to find divisor counts for all numbers
248 : /// @return an array of divisor counts for all numbers in the range, or NULL on allocation failure
249 : NUT_ATTR_MALLOC
250 : uint64_t *nut_sieve_sigma_0(uint64_t max);
251 :
252 : /// Compute the sum of divisors (including 1 and n) for every number n from 0 to max.
253 : /// The results for 0 and 1 are not actually computed. This effectively computes {@link divisor_sum} for
254 : /// all numbers in the range, but without needing to compute or store the factorizations intermediately
255 : /// @param [in] max: inclusive upper bound of sieving range in which to find divisor sums for all numbers
256 : /// @return an array of divisor sums for all numbers in the range, or NULL on allocation failure
257 : NUT_ATTR_MALLOC
258 : uint64_t *nut_sieve_sigma_1(uint64_t max);
259 :
260 : /// Compute the sum of some power of divisors (including 1 and n) for every number n from 0 to max.
261 : /// The results for 0 and 1 are not actually computed. This effectively computes {@link divisor_power_sum} for
262 : /// all numbers in the range, but without needing to compute or store the factorizations intermediately
263 : /// @param [in] max: inclusive upper bound of sieving range in which to find divisor power sums for all numbers
264 : /// @param [in] e: power of divisors for summing, eg 0 would produce divisor counts, 1 divisor sums, 2 sums of squares of divisors, etc
265 : /// @return an array of divisor power sums for all numbers in the range, or NULL on allocation failure
266 : NUT_ATTR_MALLOC
267 : uint64_t *nut_sieve_sigma_e(uint64_t max, uint64_t e);
268 :
269 : /// Compute the generalized divisor function dk(n) (number of k-tuples with product n) for every number n from 0 to max.
270 : /// The results for 0 and 1 are not actually computed. This effectively computes {@link divisor_tuple_count}
271 : /// for all numbers in the range, but without needing to compute or store the factorizations intermediately.
272 : /// Note that dk is multiplicative so dk(mn) = dk(m)dk(n) when m and n are coprime, and dk(p^a) = binom(a + k, k) for prime powers.
273 : /// In other words, dk is exponential in k and this function will overflow if max^k is too large.
274 : /// @param [in] max: inclusive upper bound of sieving range in which to compute generalized divisor function
275 : /// @param [in] k: number of factors per factorization, eg for a prime power p^a we get binom(a + k, k).
276 : /// @param [in] modulus: modulus to reduce results by, or zero to skip reducing
277 : /// @return an array of dk results, or NULL on allocation failure
278 : NUT_ATTR_MALLOC
279 : uint64_t *nut_sieve_dk(uint64_t max, uint64_t k, uint64_t modulus);
280 :
281 : /// Compute Euler's totient function for every number from 0 to max.
282 : /// The results for 0 and 1 are not actually computed. This effectively computes {@link euler_phi} for
283 : /// all numbers in the range, but without needing to compute or store the factorizations intermediately
284 : /// @param [in] max: inclusive upper bound of sieving range in which to find totients for all numbers
285 : /// @return an array of totients for all numbers in the range, or NULL on allocation failure
286 : NUT_ATTR_MALLOC
287 : uint64_t *nut_sieve_phi(uint64_t max);
288 :
289 : /// Compute the Carmichael function for every number from 0 to max.
290 : /// The results for 0 and 1 are not actually computed. This effectively computes {@link carmichael_lambda} for
291 : /// all numbers in the range, but without needing to compute or store the factorizations intermediately
292 : /// @param [in] max: inclusive upper bound of sieving range in which to compute Carmichael for all numbers
293 : /// @return an array of Carmichael function outputs for all numbers in the range, or NULL on allocation failure
294 : NUT_ATTR_MALLOC
295 : uint64_t *nut_sieve_carmichael(uint64_t max);
296 :
297 : /// Compute the Mobius function for every number from 0 to max.
298 : /// The result is stored in an array of 2 bit integers, which should
299 : /// be accessed by {@link nut_Bitfield2_arr_get}. That function will return 0 for 0,
300 : /// 1 for 1, and 3 for -1. 2 will not be stored anywhere in bounds in the resulting array.
301 : /// @param [in] max: inclusive upper bound of sieving range in which to compute Mobius for all numbers
302 : /// @return a bitfield array of Mobius function outputs for all numbers in the range, with 3 instead of -1,
303 : /// or NULL on allocation failure
304 : NUT_ATTR_MALLOC
305 : uint8_t *nut_sieve_mobius(uint64_t max);
306 :
307 : /// Compute the Mertens function (sum of Mobius function) for every number from 0 to max.
308 : /// Note that this function is signed.
309 : /// @param [in] max: inclusive upper bound of range in which to compute Mertens for all numbers
310 : /// @param [in] mobius: bitfield array of Mobius function outputs (from {@link nut_sieve_mobius}).
311 : /// @return an array of Mertens function outputs for all numbers in the range, or NULL on allocation failure
312 : NUT_ATTR_MALLOC
313 : NUT_ATTR_NONNULL(2)
314 : NUT_ATTR_ACCESS(read_only, 2)
315 : int64_t *nut_compute_mertens_range(uint64_t max, const uint8_t mobius[static max/4 + 1]);
316 :
317 : /// Compute a bitarray of whether or not each number from 0 to max is composite.
318 : /// 1 is composite, and 0 is considered composite here.
319 : /// The result should be used with {@link nut_is_composite} since it is packed (only stores bitflags for numbers coprime to 30).
320 : /// @param [in] max: inclusive upper bound of sieving range in which to check compositeness for all numbers
321 : /// @param [out] _num_primes: the number of primes in the range will be stored here. May be NULL.
322 : /// @return a bitarray of whether or not each number in the range is composite, or NULL on allocation failure
323 : NUT_ATTR_MALLOC
324 : uint8_t *nut_sieve_is_composite(uint64_t max);
325 :
326 : /// Check if a number is composite using a packed bitarray from {@link nut_sieve_is_composite}
327 : /// @param [in] n: the number to check if composite
328 : /// @param [in] buf: packed bitarray from {@link nut_sieve_is_composite}
329 : /// @return true if n is composite, false if n is prime
330 : NUT_ATTR_PURE
331 : NUT_ATTR_NONNULL(2)
332 : NUT_ATTR_ACCESS(read_only, 2)
333 : bool nut_is_composite(uint64_t n, const uint8_t buf[static n/30 + 1]);
334 :
335 : /// Compute the pi (prime counting) function for every number from 0 to max.
336 : /// The result should be used with {@link nut_compute_pi_from_tables} since pi is only actually calculated at every 240th number
337 : /// since intermediate results can be computed with a single popcount on the packed buf bitarray.
338 : /// @param [in] max: inclusive upper bound of range to compute pi function
339 : /// @param [in] buf: packed bitarray from {@link nut_sieve_is_composite}
340 : /// @return an array of pi values at every 240th number (use {@link nut_compute_pi_from_tables})
341 : NUT_ATTR_NONNULL(2)
342 : NUT_ATTR_MALLOC
343 : NUT_ATTR_ACCESS(read_only, 2)
344 : uint64_t *nut_compute_pi_range(uint64_t max, const uint8_t buf[static max/30 + 1]);
345 :
346 : /// Get the value for the pi (prime counting) function for a particular number using precomputed tables.
347 : /// @param [in] n: the number to calculate pi for
348 : /// @param [in] pi_table: array of partial pi values from {@link nut_compute_pi_range}
349 : /// @param [in] buf: packed bitarray from {@link nut_sieve_is_composite}
350 : /// @return the number of primes <= n
351 : NUT_ATTR_PURE
352 : NUT_ATTR_NONNULL(2, 3)
353 : NUT_ATTR_ACCESS(read_only, 2)
354 : NUT_ATTR_ACCESS(read_only, 3)
355 : NUT_ATTR_NO_SAN("vla-bound")
356 : uint64_t nut_compute_pi_from_tables(uint64_t n, const uint64_t pi_table[restrict static n/30], const uint8_t buf[restrict static n/30 + 1]);
357 :
358 : /// Compute an array of all primes from 0 to max.
359 : /// @param [in] max: inclusive upper bound of sieving range
360 : /// @param [out] _num_primes: how many primes were found in the range (this pointer cannot be null)
361 : /// @return an array of all primes from 0 to max, or NULL on allocation failure
362 : NUT_ATTR_MALLOC
363 : NUT_ATTR_NONNULL(2)
364 : NUT_ATTR_ACCESS(write_only, 2)
365 : uint64_t *nut_sieve_primes(uint64_t max, uint64_t *_num_primes);
366 :
|