11/* 
22    Copyright (C) 2022 Daniel Schultz 
3+     Copyright (C) 2024 Fredrik Johansson 
34
45    This file is part of FLINT. 
56
1718#include  "crt_helpers.h" 
1819#include  "fft_small.h" 
1920
21+ /* 
22+ The following profiles are hardcoded. 
23+ 
24+     np bits  n m 
25+      4   84  4 3 
26+      4   88  4 3 
27+      4   92  4 3 
28+      5  112  4 4 
29+      5  116  4 4 
30+      5  120  4 4 
31+      6  136  5 4 
32+      6  140  5 4 
33+      6  144  5 4 
34+      7  160  6 5 
35+      7  164  6 5 
36+      7  168  6 5 
37+      8  184  7 6 
38+      8  188  7 6 
39+      8  192  7 6 
40+ 
41+ Here np is the number of primes {p[0], p[1], ...p[np-1]}. By default the 
42+ following 50-bit primes are used: 
43+ 
44+     p[0] = 1108307720798209 
45+     p[1] = 659706976665601 
46+     p[2] = 1086317488242689 
47+     p[3] = 910395627798529 
48+     p[4] = 699289395265537 
49+     p[5] = 1022545813831681 
50+     p[6] = 1013749720809473 
51+     p[7] = 868614185943041 
52+ 
53+ We suppose that the inputs to multiply are 
54+ 
55+     {a[0], ..., a[an-1]}  and  {b[0], ..., b[bn-1]} 
56+ 
57+ in base 2^64. They will be converted to 
58+ 
59+    {a'[0], ..., a'[an'-1]}  and  {b'[0], ..., a'[bn'-1]}. 
60+ 
61+ in base 2^bits, where an' = ceil(64*an/bits) and 
62+ bn' = ceil(64*bn/bits). The code for performing the conversion 
63+ actually processes the inputs as base 2^32 arrays of twice the lengths. 
64+ 
65+ Each dot product in the convolution of a' and b' is bounded by 
66+ 
67+     bn' * (2^bits-1)^2 
68+ 
69+ and we need this to be smaller than prod_primes = p[0], ..., p[np-1] 
70+ for the CRT reconstruction. A slight relaxation of this inequality is 
71+ 
72+     ceil(64*bn/bits) * 2^(2*bits) <= prod_primes. 
73+ 
74+ The function crt_data_find_bn_bound determines an upper bound for 
75+ permissible bn such that this holds. Numerical values of the bn bounds 
76+ are as follows: 
77+ 
78+     np = 4, bits =  84, bn <= 2536637511 
79+     np = 4, bits =  88, bn <= 10380583 
80+     np = 4, bits =  92, bn <= 42390 
81+     np = 5, bits = 112, bn <= 32822700 
82+     np = 5, bits = 116, bn <= 132790 
83+     np = 5, bits = 120, bn <= 534 
84+     np = 6, bits = 136, bn <= 144789875 
85+     np = 6, bits = 140, bn <= 582218 
86+     np = 6, bits = 144, bn <= 2337 
87+     np = 7, bits = 160, bn <= 613493872 
88+     np = 7, bits = 164, bn <= 2456369 
89+     np = 7, bits = 168, bn <= 9826 
90+     np = 8, bits = 184, bn <= 2177184315 
91+     np = 8, bits = 188, bn <= 8689506 
92+     np = 8, bits = 192, bn <= 34662 
93+ 
94+ Note that we do not require an >= bn for correctness, but for highly 
95+ unbalanced multiplications calling mpn_ctx_mpn_mul with operands 
96+ in this order gives a better bound. 
97+ 
98+ The profile parameters n and m describe the size of CRT operands. 
99+ Let P = p[0] * ... * p[n-1]. One CRT sum has the form 
100+ 
101+     s = f[0]*x[0] + ... + f[np-1]*x[np-1] 
102+ 
103+ where x[i] is a single limb, f[i] has m limbs, and the limb count n 
104+ is large enough to hold s <= P * np. 
105+ */ 
106+ 
20107void  crt_data_init (crt_data_t  C , ulong  prime , ulong  coeff_len , ulong  nprimes )
21108{
22109    C -> prime  =  prime ;
@@ -30,8 +117,6 @@ void crt_data_clear(crt_data_t C)
30117    flint_free (C -> data );
31118}
32119
33- 
34- 
35120/* 
36121    need  ceil(64*bn/bits) <= prod_primes/2^(2*bits) 
37122    i.e.  (64*bn+bits-1)/bits <= prod_primes/2^(2*bits) 
@@ -987,7 +1072,7 @@ void mpn_ctx_init(mpn_ctx_t R, ulong p)
9871072    R->profiles[i].bits      = bits_; \
9881073    R->profiles[i].bn_bound  = crt_data_find_bn_bound(R->crts + np_ - 1, bits_); \
9891074    R->profiles[i].to_ffts   = CAT3(mpn_to_ffts, np_, bits_); \
990- /*flint_printf("profile np = %wu, bits = %3wu, bn <= 0x%16wx \n", R->profiles[i].np, R->profiles[i].bits, R->profiles[i].bn_bound);*/  \
1075+ /*flint_printf("profile np = %wu, bits = %3wu, bn <= %wu \n", R->profiles[i].np, R->profiles[i].bits, R->profiles[i].bn_bound);  */  \
9911076    R -> profiles_size  =  i  +  1 ;
9921077
9931078    PUSH_PROFILE (4 , 84 , 4 ,3 );
0 commit comments