LAMMP 4.1.0
Lamina High-Precision Arithmetic Library
载入中...
搜索中...
未找到
mul_toom62.c
浏览该文件的文档.
1/*
2 * LAMMP - Copyright (C) 2025-2026 HJimmyK(Jericho Knox)
3 * This file is part of lammp, under the GNU LGPL v2 license.
4 * See LICENSE in the project root for the full license text.
5 */
6
7#include "../../include/lammp/impl/toom_interp.h"
8
9/*
10Evaluate in:
11 0, +1, -1, +2, -2, 1/2, +inf
12
13 <-s-><--n--><--n--><--n--><--n--><--n-->
14 |a5-|--a4--|--a3--|--a2--|--a1--|--a0--|
15 |-b1-|--b0--|
16 <-t--><--n-->
17
18 v0 = a0 * b0 # A(0)*B(0)
19 v1 = ( a0+ a1+ a2+ a3+ a4+ a5)*( b0+ b1) # A(1)*B(1) ah <= 5 bh <= 1
20 vm1 = ( a0- a1+ a2- a3+ a4- a5)*( b0- b1) # A(-1)*B(-1) |ah| <= 2 bh = 0
21 v2 = ( a0+ 2a1+4a2+8a3+16a4+32a5)*( b0+2b1) # A(2)*B(2) ah <= 62 bh <= 2
22 vm2 = ( a0- 2a1+4a2-8a3+16a4-32a5)*( b0-2b1) # A(-2)*B(-2) -41<=ah<=20 -1<=bh<=0
23 vh = (32a0+16a1+8a2+4a3+ 2a4+ a5)*(2b0+ b1) # A(1/2)*B(1/2) ah <= 62 bh <= 2
24 vinf= a5 * b1 # A(inf)*B(inf)
25*/
26
27void lmmp_mul_toom62_(mp_ptr restrict dst, mp_srcptr restrict numa, mp_size_t na, mp_srcptr restrict numb, mp_size_t nb) {
28 lmmp_param_assert(na >= 3 * nb);
29 lmmp_param_assert(5 * nb >= na);
30
31 mp_size_t n, s, t;
32 mp_limb_t cy;
33 mp_ptr as1, asm1, as2, asm2, ash;
34 mp_ptr bs1, bsm1, bs2, bsm2, bsh;
35 mp_ptr gp;
36 enum toom7_flags aflags, bflags;
38
39#define a0 numa
40#define a1 (numa + n)
41#define a2 (numa + 2 * n)
42#define a3 (numa + 3 * n)
43#define a4 (numa + 4 * n)
44#define a5 (numa + 5 * n)
45#define b0 numb
46#define b1 (numb + n)
47
48 n = 1 + (na >= 3 * nb ? (na - 1) / (mp_size_t)6 : (nb - 1) >> 1);
49
50 s = na - 5 * n;
51 t = nb - n;
52
53 lmmp_debug_assert(0 < s && s <= n);
54 lmmp_debug_assert(0 < t && t <= n);
55
56 mp_ptr restrict scratch = SALLOC_TYPE(10 * n + 10, mp_limb_t);
57
58 mp_ptr restrict tmp = SALLOC_TYPE(10 * n + 10, mp_limb_t);
59 as1 = tmp;
60 asm1 = as1 + n + 1;
61 as2 = asm1 + n + 1;
62 asm2 = as2 + n + 1;
63 ash = asm2 + n + 1;
64 bs1 = ash + n + 1;
65 bsm1 = bs1 + n + 1;
66 bs2 = bsm1 + n;
67 bsm2 = bs2 + n + 1;
68 bsh = bsm2 + n + 1;
69
70 gp = dst;
71
72 /* Compute as1 and asm1. */
73 aflags = (enum toom7_flags)(toom7_w3_neg & lmmp_toom_eval_pm1_(as1, asm1, 5, numa, n, s, gp));
74
75 /* Compute as2 and asm2. */
76 aflags = (enum toom7_flags)(aflags | (toom7_w1_neg & lmmp_toom_eval_pm2_(as2, asm2, 5, numa, n, s, gp)));
77
78 /* Compute ash = 32 a0 + 16 a1 + 8 a2 + 4 a3 + 2 a4 + a5
79 = 2*(2*(2*(2*(2*a0 + a1) + a2) + a3) + a4) + a5 */
80
81 cy = lmmp_addshl1_n_(ash, a1, a0, n);
82 cy = 2 * cy + lmmp_addshl1_n_(ash, a2, ash, n);
83 cy = 2 * cy + lmmp_addshl1_n_(ash, a3, ash, n);
84 cy = 2 * cy + lmmp_addshl1_n_(ash, a4, ash, n);
85 if (s < n) {
86 mp_limb_t cy2;
87 cy2 = lmmp_addshl1_n_(ash, a5, ash, s);
88 ash[n] = 2 * cy + lmmp_shl_(ash + s, ash + s, n - s, 1);
89 lmmp_inc_1(ash + s, cy2);
90 } else
91 ash[n] = 2 * cy + lmmp_addshl1_n_(ash, a5, ash, n);
92
93 /* Compute bs1 and bsm1. */
94 if (t == n) {
95 if (lmmp_cmp_(b0, b1, n) < 0) {
96 cy = lmmp_add_n_sub_n_(bs1, bsm1, b1, b0, n);
97 bflags = toom7_w3_neg;
98 } else {
99 cy = lmmp_add_n_sub_n_(bs1, bsm1, b0, b1, n);
100 bflags = (enum toom7_flags)0;
101 }
102 bs1[n] = cy >> 1;
103 } else {
104 bs1[n] = lmmp_add_(bs1, b0, n, b1, t);
105 if (lmmp_zero_q_(b0 + t, n - t) && lmmp_cmp_(b0, b1, t) < 0) {
106 lmmp_sub_n_(bsm1, b1, b0, t);
107 lmmp_zero(bsm1 + t, n - t);
108 bflags = toom7_w3_neg;
109 } else {
110 lmmp_sub_(bsm1, b0, n, b1, t);
111 bflags = (enum toom7_flags)0;
112 }
113 }
114
115 /* Compute bs2 and bsm2. Recycling bs1 and bsm1; bs2=bs1+b1, bsm2 =
116 bsm1 - b1 */
117 lmmp_add_(bs2, bs1, n + 1, b1, t);
118 if (bflags & toom7_w3_neg) {
119 bsm2[n] = lmmp_add_(bsm2, bsm1, n, b1, t);
120 bflags = (enum toom7_flags)(bflags | toom7_w1_neg);
121 } else {
122 if (t < n) {
123 if (lmmp_zero_q_(bsm1 + t, n - t) && lmmp_cmp_(bsm1, b1, t) < 0) {
124 lmmp_sub_n_(bsm2, b1, bsm1, t);
125 lmmp_zero(bsm2 + t, n + 1 - t);
126 bflags = (enum toom7_flags)(bflags | toom7_w1_neg);
127 } else {
128 lmmp_sub_(bsm2, bsm1, n, b1, t);
129 bsm2[n] = 0;
130 }
131 } else {
132 if (lmmp_cmp_(bsm1, b1, n) < 0) {
133 lmmp_sub_n_(bsm2, b1, bsm1, n);
134 bflags = (enum toom7_flags)(bflags | toom7_w1_neg);
135 } else {
136 lmmp_sub_n_(bsm2, bsm1, b1, n);
137 }
138 bsm2[n] = 0;
139 }
140 }
141
142 /* Compute bsh, recycling bs1. bsh=bs1+b0; */
143 bsh[n] = bs1[n] + lmmp_add_n_(bsh, bs1, b0, n);
144
145 lmmp_debug_assert(as1[n] <= 5);
146 lmmp_debug_assert(bs1[n] <= 1);
147 lmmp_debug_assert(asm1[n] <= 2);
148 lmmp_debug_assert(as2[n] <= 62);
149 lmmp_debug_assert(bs2[n] <= 2);
150 lmmp_debug_assert(asm2[n] <= 41);
151 lmmp_debug_assert(bsm2[n] <= 1);
152 lmmp_debug_assert(ash[n] <= 62);
153 lmmp_debug_assert(bsh[n] <= 2);
154
155#define v0 dst /* 2n */
156#define v1 (dst + 2 * n) /* 2n+1 */
157#define vinf (dst + 6 * n) /* s+t */
158#define v2 scratch /* 2n+1 */
159#define vm2 (scratch + 2 * n + 1) /* 2n+1 */
160#define vh (scratch + 4 * n + 2) /* 2n+1 */
161#define vm1 (scratch + 6 * n + 3) /* 2n+1 */
162#define scratch_out (scratch + 8 * n + 4) /* 2n+1 */
163 /* Total scratch need: 10*n+5 */
164
165 /* Must be in allocation order, as they overwrite one limb beyond
166 * 2n+1. */
167 lmmp_mul_n_(v2, as2, bs2, n + 1); /* v2, 2n+1 limbs */
168 lmmp_mul_n_(vm2, asm2, bsm2, n + 1); /* vm2, 2n+1 limbs */
169 lmmp_mul_n_(vh, ash, bsh, n + 1); /* vh, 2n+1 limbs */
170
171 /* vm1, 2n+1 limbs */
172 lmmp_mul_n_(vm1, asm1, bsm1, n);
173 cy = 0;
174 if (asm1[n] == 1) {
175 cy = lmmp_add_n_(vm1 + n, vm1 + n, bsm1, n);
176 } else if (asm1[n] == 2) {
177 cy = lmmp_addshl1_n_(vm1 + n, vm1 + n, bsm1, n);
178 }
179 vm1[2 * n] = cy;
180
181 /* v1, 2n+1 limbs */
182 lmmp_mul_n_(v1, as1, bs1, n);
183 if (as1[n] == 1) {
184 cy = bs1[n] + lmmp_add_n_(v1 + n, v1 + n, bs1, n);
185 } else if (as1[n] == 2) {
186 cy = 2 * bs1[n] + lmmp_addshl1_n_(v1 + n, v1 + n, bs1, n);
187 } else if (as1[n] != 0) {
188 cy = as1[n] * bs1[n] + lmmp_addmul_1_(v1 + n, bs1, n, as1[n]);
189 } else
190 cy = 0;
191 if (bs1[n] != 0)
192 cy += lmmp_add_n_(v1 + n, v1 + n, as1, n);
193 v1[2 * n] = cy;
194
195 lmmp_mul_n_(v0, a0, b0, n); /* v0, 2n limbs */
196
197 /* vinf, s+t limbs */
198 if (s > t)
199 lmmp_mul_(vinf, a5, s, b1, t);
200 else
201 lmmp_mul_(vinf, b1, t, a5, s);
202
203 lmmp_toom_interp7_(dst, n, (enum toom7_flags)(aflags ^ bflags), vm2, vm1, v2, vh, s + t, scratch_out);
204
206}
207
209 mp_ptr restrict dst,
210 mp_srcptr restrict numa,
211 mp_srcptr restrict numb,
212 mp_size_t n,
213 mp_size_t s,
214 mp_size_t t,
215 mp_ptr restrict scratch,
216 mp_ptr restrict tmp,
217 mp_ptr restrict bs1,
218 mp_ptr restrict bsm1,
219 mp_ptr restrict bs2,
220 mp_ptr restrict bsm2,
221 mp_ptr restrict bsh
222) {
223
224 mp_limb_t cy;
225 mp_ptr restrict as1, asm1, as2, asm2, ash;
226 enum toom7_flags aflags, bflags;
227
228#define a0 numa
229#define a1 (numa + n)
230#define a2 (numa + 2 * n)
231#define a3 (numa + 3 * n)
232#define a4 (numa + 4 * n)
233#define a5 (numa + 5 * n)
234#define b0 numb
235#define b1 (numb + n)
236
237
238 as1 = tmp;
239 asm1 = as1 + n + 1;
240 as2 = asm1 + n + 1;
241 asm2 = as2 + n + 1;
242 ash = asm2 + n + 1;
243
244
245 /* Compute as1 and asm1. */
246 aflags = (enum toom7_flags)(toom7_w3_neg & lmmp_toom_eval_pm1_(as1, asm1, 5, numa, n, s, dst));
247
248 /* Compute as2 and asm2. */
249 aflags = (enum toom7_flags)(aflags | (toom7_w1_neg & lmmp_toom_eval_pm2_(as2, asm2, 5, numa, n, s, dst)));
250
251 /* Compute ash = 32 a0 + 16 a1 + 8 a2 + 4 a3 + 2 a4 + a5
252 = 2*(2*(2*(2*(2*a0 + a1) + a2) + a3) + a4) + a5 */
253
254 cy = lmmp_addshl1_n_(ash, a1, a0, n);
255 cy = 2 * cy + lmmp_addshl1_n_(ash, a2, ash, n);
256 cy = 2 * cy + lmmp_addshl1_n_(ash, a3, ash, n);
257 cy = 2 * cy + lmmp_addshl1_n_(ash, a4, ash, n);
258 if (s < n) {
259 mp_limb_t cy2;
260 cy2 = lmmp_addshl1_n_(ash, a5, ash, s);
261 ash[n] = 2 * cy + lmmp_shl_(ash + s, ash + s, n - s, 1);
262 lmmp_inc_1(ash + s, cy2);
263 } else
264 ash[n] = 2 * cy + lmmp_addshl1_n_(ash, a5, ash, n);
265
266 /* Compute bs1 and bsm1. */
267 if (t == n) {
268 if (lmmp_cmp_(b0, b1, n) < 0) {
269 cy = lmmp_add_n_sub_n_(bs1, bsm1, b1, b0, n);
270 bflags = toom7_w3_neg;
271 } else {
272 cy = lmmp_add_n_sub_n_(bs1, bsm1, b0, b1, n);
273 bflags = (enum toom7_flags)0;
274 }
275 bs1[n] = cy >> 1;
276 } else {
277 bs1[n] = lmmp_add_(bs1, b0, n, b1, t);
278 if (lmmp_zero_q_(b0 + t, n - t) && lmmp_cmp_(b0, b1, t) < 0) {
279 lmmp_sub_n_(bsm1, b1, b0, t);
280 lmmp_zero(bsm1 + t, n - t);
281 bflags = toom7_w3_neg;
282 } else {
283 lmmp_sub_(bsm1, b0, n, b1, t);
284 bflags = (enum toom7_flags)0;
285 }
286 }
287
288 /* Compute bs2 and bsm2. Recycling bs1 and bsm1; bs2=bs1+b1, bsm2 =
289 bsm1 - b1 */
290 lmmp_add_(bs2, bs1, n + 1, b1, t);
291 if (bflags & toom7_w3_neg) {
292 bsm2[n] = lmmp_add_(bsm2, bsm1, n, b1, t);
293 bflags = (enum toom7_flags)(bflags | toom7_w1_neg);
294 } else {
295 if (t < n) {
296 if (lmmp_zero_q_(bsm1 + t, n - t) && lmmp_cmp_(bsm1, b1, t) < 0) {
297 lmmp_sub_n_(bsm2, b1, bsm1, t);
298 lmmp_zero(bsm2 + t, n + 1 - t);
299 bflags = (enum toom7_flags)(bflags | toom7_w1_neg);
300 } else {
301 lmmp_sub_(bsm2, bsm1, n, b1, t);
302 bsm2[n] = 0;
303 }
304 } else {
305 if (lmmp_cmp_(bsm1, b1, n) < 0) {
306 lmmp_sub_n_(bsm2, b1, bsm1, n);
307 bflags = (enum toom7_flags)(bflags | toom7_w1_neg);
308 } else {
309 lmmp_sub_n_(bsm2, bsm1, b1, n);
310 }
311 bsm2[n] = 0;
312 }
313 }
314
315 /* Compute bsh, recycling bs1. bsh=bs1+b0; */
316 bsh[n] = bs1[n] + lmmp_add_n_(bsh, bs1, b0, n);
317
318 lmmp_debug_assert(as1[n] <= 5);
319 lmmp_debug_assert(bs1[n] <= 1);
320 lmmp_debug_assert(asm1[n] <= 2);
321 lmmp_debug_assert(as2[n] <= 62);
322 lmmp_debug_assert(bs2[n] <= 2);
323 lmmp_debug_assert(asm2[n] <= 41);
324 lmmp_debug_assert(bsm2[n] <= 1);
325 lmmp_debug_assert(ash[n] <= 62);
326 lmmp_debug_assert(bsh[n] <= 2);
327
328#define v0 dst /* 2n */
329#define v1 (dst + 2 * n) /* 2n+1 */
330#define vinf (dst + 6 * n) /* s+t */
331#define v2 scratch /* 2n+1 */
332#define vm2 (scratch + 2 * n + 1) /* 2n+1 */
333#define vh (scratch + 4 * n + 2) /* 2n+1 */
334#define vm1 (scratch + 6 * n + 3) /* 2n+1 */
335#define scratch_out (scratch + 8 * n + 4) /* 2n+1 */
336 /* Total scratch need: 10*n+5 */
337
338 /* Must be in allocation order, as they overwrite one limb beyond
339 * 2n+1. */
340 lmmp_mul_n_(v2, as2, bs2, n + 1); /* v2, 2n+1 limbs */
341 lmmp_mul_n_(vm2, asm2, bsm2, n + 1); /* vm2, 2n+1 limbs */
342 lmmp_mul_n_(vh, ash, bsh, n + 1); /* vh, 2n+1 limbs */
343
344 /* vm1, 2n+1 limbs */
345 lmmp_mul_n_(vm1, asm1, bsm1, n);
346 cy = 0;
347 if (asm1[n] == 1) {
348 cy = lmmp_add_n_(vm1 + n, vm1 + n, bsm1, n);
349 } else if (asm1[n] == 2) {
350 cy = lmmp_addshl1_n_(vm1 + n, vm1 + n, bsm1, n);
351 }
352 vm1[2 * n] = cy;
353
354 /* v1, 2n+1 limbs */
355 lmmp_mul_n_(v1, as1, bs1, n);
356 if (as1[n] == 1) {
357 cy = bs1[n] + lmmp_add_n_(v1 + n, v1 + n, bs1, n);
358 } else if (as1[n] == 2) {
359 cy = 2 * bs1[n] + lmmp_addshl1_n_(v1 + n, v1 + n, bs1, n);
360 } else if (as1[n] != 0) {
361 cy = as1[n] * bs1[n] + lmmp_addmul_1_(v1 + n, bs1, n, as1[n]);
362 } else
363 cy = 0;
364 if (bs1[n] != 0)
365 cy += lmmp_add_n_(v1 + n, v1 + n, as1, n);
366 v1[2 * n] = cy;
367
368 lmmp_mul_n_(v0, a0, b0, n); /* v0, 2n limbs */
369
370 /* vinf, s+t limbs */
371 if (s > t)
372 lmmp_mul_(vinf, a5, s, b1, t);
373 else
374 lmmp_mul_(vinf, b1, t, a5, s);
375
376 lmmp_toom_interp7_(dst, n, (enum toom7_flags)(aflags ^ bflags), vm2, vm1, v2, vh, s + t, scratch_out);
377
378 return bflags;
379}
380
382 mp_ptr restrict dst,
383 mp_srcptr restrict numa,
384 mp_srcptr restrict numb,
385 mp_size_t n,
386 mp_size_t s,
387 mp_size_t t,
388 mp_ptr restrict scratch,
389 mp_ptr restrict tmp,
390 mp_srcptr restrict bs1,
391 mp_srcptr restrict bsm1,
392 mp_srcptr restrict bs2,
393 mp_srcptr restrict bsm2,
394 mp_srcptr restrict bsh,
395 enum toom7_flags bflags
396) {
397 mp_limb_t cy;
398 mp_ptr as1, asm1, as2, asm2, ash;
399 enum toom7_flags aflags;
400
401#define a0 numa
402#define a1 (numa + n)
403#define a2 (numa + 2 * n)
404#define a3 (numa + 3 * n)
405#define a4 (numa + 4 * n)
406#define a5 (numa + 5 * n)
407#define b0 numb
408#define b1 (numb + n)
409
410 as1 = tmp;
411 asm1 = as1 + n + 1;
412 as2 = asm1 + n + 1;
413 asm2 = as2 + n + 1;
414 ash = asm2 + n + 1;
415
416
417 /* Compute as1 and asm1. */
418 aflags = (enum toom7_flags)(toom7_w3_neg & lmmp_toom_eval_pm1_(as1, asm1, 5, numa, n, s, dst));
419
420 /* Compute as2 and asm2. */
421 aflags = (enum toom7_flags)(aflags | (toom7_w1_neg & lmmp_toom_eval_pm2_(as2, asm2, 5, numa, n, s, dst)));
422
423 /* Compute ash = 32 a0 + 16 a1 + 8 a2 + 4 a3 + 2 a4 + a5
424 = 2*(2*(2*(2*(2*a0 + a1) + a2) + a3) + a4) + a5 */
425
426 cy = lmmp_addshl1_n_(ash, a1, a0, n);
427 cy = 2 * cy + lmmp_addshl1_n_(ash, a2, ash, n);
428 cy = 2 * cy + lmmp_addshl1_n_(ash, a3, ash, n);
429 cy = 2 * cy + lmmp_addshl1_n_(ash, a4, ash, n);
430 if (s < n) {
431 mp_limb_t cy2;
432 cy2 = lmmp_addshl1_n_(ash, a5, ash, s);
433 ash[n] = 2 * cy + lmmp_shl_(ash + s, ash + s, n - s, 1);
434 lmmp_inc_1(ash + s, cy2);
435 } else
436 ash[n] = 2 * cy + lmmp_addshl1_n_(ash, a5, ash, n);
437
438 lmmp_debug_assert(as1[n] <= 5);
439 lmmp_debug_assert(asm1[n] <= 2);
440 lmmp_debug_assert(as2[n] <= 62);
441 lmmp_debug_assert(asm2[n] <= 41);
442 lmmp_debug_assert(ash[n] <= 62);
443
444#define v0 dst /* 2n */
445#define v1 (dst + 2 * n) /* 2n+1 */
446#define vinf (dst + 6 * n) /* s+t */
447#define v2 scratch /* 2n+1 */
448#define vm2 (scratch + 2 * n + 1) /* 2n+1 */
449#define vh (scratch + 4 * n + 2) /* 2n+1 */
450#define vm1 (scratch + 6 * n + 3) /* 2n+1 */
451#define scratch_out (scratch + 8 * n + 4) /* 2n+1 */
452 /* Total scratch need: 10*n+5 */
453
454 /* Must be in allocation order, as they overwrite one limb beyond
455 * 2n+1. */
456 lmmp_mul_n_(v2, as2, bs2, n + 1); /* v2, 2n+1 limbs */
457 lmmp_mul_n_(vm2, asm2, bsm2, n + 1); /* vm2, 2n+1 limbs */
458 lmmp_mul_n_(vh, ash, bsh, n + 1); /* vh, 2n+1 limbs */
459
460 /* vm1, 2n+1 limbs */
461 lmmp_mul_n_(vm1, asm1, bsm1, n);
462 cy = 0;
463 if (asm1[n] == 1) {
464 cy = lmmp_add_n_(vm1 + n, vm1 + n, bsm1, n);
465 } else if (asm1[n] == 2) {
466 cy = lmmp_addshl1_n_(vm1 + n, vm1 + n, bsm1, n);
467 }
468 vm1[2 * n] = cy;
469
470 /* v1, 2n+1 limbs */
471 lmmp_mul_n_(v1, as1, bs1, n);
472 if (as1[n] == 1) {
473 cy = bs1[n] + lmmp_add_n_(v1 + n, v1 + n, bs1, n);
474 } else if (as1[n] == 2) {
475 cy = 2 * bs1[n] + lmmp_addshl1_n_(v1 + n, v1 + n, bs1, n);
476 } else if (as1[n] != 0) {
477 cy = as1[n] * bs1[n] + lmmp_addmul_1_(v1 + n, bs1, n, as1[n]);
478 } else
479 cy = 0;
480 if (bs1[n] != 0)
481 cy += lmmp_add_n_(v1 + n, v1 + n, as1, n);
482 v1[2 * n] = cy;
483
484 lmmp_mul_n_(v0, a0, b0, n); /* v0, 2n limbs */
485
486 /* vinf, s+t limbs */
487 if (s > t)
488 lmmp_mul_(vinf, a5, s, b1, t);
489 else
490 lmmp_mul_(vinf, b1, t, a5, s);
491
492 lmmp_toom_interp7_(dst, n, (enum toom7_flags)(aflags ^ bflags), vm2, vm1, v2, vh, s + t, scratch_out);
493}
494
496 mp_ptr restrict dst,
497 mp_srcptr restrict numa,
498 mp_size_t na,
499 mp_srcptr restrict numb,
500 mp_size_t nb
501) {
502 lmmp_param_assert(na >= 5 * nb);
503 TEMP_DECL;
504 mp_size_t n = 1 + (3 * nb - 1) / (mp_size_t)6, s = 3 * nb - 5 * n, t = nb - n;
505 mp_limb_t* restrict ws = SALLOC_TYPE(nb, mp_limb_t);
506 mp_ptr restrict scratch = BALLOC_TYPE(20 * n + 20, mp_limb_t);
507 mp_ptr restrict tmp = scratch + 10 * n + 10;
508 mp_ptr restrict bs1, bsm1, bs2, bsm2, bsh;
509 bs1 = tmp + 5 * n + 5;
510 bsm1 = bs1 + n + 1;
511 bs2 = bsm1 + n;
512 bsm2 = bs2 + n + 1;
513 bsh = bsm2 + n + 1;
514 enum toom7_flags bflags = lmmp_mul_toom62_cache_init_(dst, numa, numb, n, s, t, scratch, tmp, bs1, bsm1, bs2, bsm2, bsh);
515 dst += 3 * nb;
516 numa += 3 * nb;
517 na -= 3 * nb;
518 lmmp_copy(ws, dst, nb);
519 while (na >= 5 * nb) {
520 lmmp_mul_toom62_cache_(dst, numa, numb, n, s, t, scratch, tmp, bs1, bsm1, bs2, bsm2, bsh, bflags);
521 if (lmmp_add_n_(dst, dst, ws, nb))
522 lmmp_inc(dst + nb);
523 dst += 3 * nb;
524 numa += 3 * nb;
525 na -= 3 * nb;
526 lmmp_copy(ws, dst, nb);
527 }
528 // 0 <= na < 2 nb
529 if (na >= nb)
530 lmmp_mul_(dst, numa, na, numb, nb);
531 else
532 lmmp_mul_(dst, numb, nb, numa, na);
533 if (lmmp_add_n_(dst, dst, ws, nb))
534 lmmp_inc(dst + nb);
535 TEMP_FREE;
536}
#define scratch
mp_limb_t * mp_ptr
Definition lmmp.h:215
#define lmmp_copy(dst, src, n)
Definition lmmp.h:364
#define lmmp_zero(dst, n)
Definition lmmp.h:366
uint64_t mp_size_t
Definition lmmp.h:212
#define lmmp_debug_assert(x)
Definition lmmp.h:387
const mp_limb_t * mp_srcptr
Definition lmmp.h:216
uint64_t mp_limb_t
Definition lmmp.h:211
#define lmmp_param_assert(x)
Definition lmmp.h:398
static mp_limb_t lmmp_add_(mp_ptr dst, mp_srcptr numa, mp_size_t na, mp_srcptr numb, mp_size_t nb)
大数加法静态内联函数 [dst,na]=[numa,na]+[numb,nb]
Definition lmmpn.h:1058
static int lmmp_cmp_(mp_srcptr numa, mp_srcptr numb, mp_size_t n)
大数比较函数(内联)
Definition lmmpn.h:1004
#define lmmp_inc(p)
大数加1宏(预期无进位)
Definition lmmpn.h:946
void lmmp_mul_(mp_ptr dst, mp_srcptr numa, mp_size_t na, mp_srcptr numb, mp_size_t nb)
不等长大数乘法操作 [dst,na+nb] = [numa,na] * [numb,nb]
void lmmp_mul_n_(mp_ptr dst, mp_srcptr numa, mp_srcptr numb, mp_size_t n)
等长大数乘法操作 [dst,2*n] = [numa,n] * [numb,n]
Definition mul.c:99
mp_limb_t lmmp_addshl1_n_(mp_ptr dst, mp_srcptr numa, mp_srcptr numb, mp_size_t n)
加法结合左移1位操作 [dst,n] = [numa,n] + ([numb,n] << 1)
Definition shl.c:56
mp_limb_t lmmp_shl_(mp_ptr dst, mp_srcptr numa, mp_size_t na, mp_size_t shl)
大数左移操作 [dst,na] = [numa,na]<<shl,dst的低shl位填充0
Definition shl.c:9
static mp_limb_t lmmp_sub_(mp_ptr dst, mp_srcptr numa, mp_size_t na, mp_srcptr numb, mp_size_t nb)
大数减法静态内联函数 [dst,na]=[numa,na]-[numb,nb]
Definition lmmpn.h:1072
mp_limb_t lmmp_addmul_1_(mp_ptr numa, mp_srcptr numb, mp_size_t n, mp_limb_t b)
大数乘以单limb并累加操作 [numa,n] += [numb,n] * b
mp_limb_t lmmp_add_n_sub_n_(mp_ptr dsta, mp_ptr dstb, mp_srcptr numa, mp_srcptr numb, mp_size_t n)
同时执行n位加法和减法 ([dsta,n],[dstb,n]) = ([numa,n]+[numb,n],[numa,n]-[numb,n])
Definition add_n_sub_n.c:10
mp_limb_t lmmp_sub_n_(mp_ptr dst, mp_srcptr numa, mp_srcptr numb, mp_size_t n)
无借位的n位减法 [dst,n] = [numa,n] - [numb,n]
Definition sub_n.c:70
#define lmmp_inc_1(p, inc)
大数加指定值宏(预期无进位)
Definition lmmpn.h:958
mp_limb_t lmmp_add_n_(mp_ptr dst, mp_srcptr numa, mp_srcptr numb, mp_size_t n)
无进位的n位加法 [dst,n] = [numa,n] + [numb,n]
Definition add_n.c:71
static int lmmp_zero_q_(mp_srcptr p, mp_size_t n)
大数判零函数(内联)
Definition lmmpn.h:1027
#define bsm1
#define asm1
#define bs1
#define bs2
#define as2
#define asm2
#define bsm2
#define as1
void lmmp_mul_toom62_(mp_ptr restrict dst, mp_srcptr restrict numa, mp_size_t na, mp_srcptr restrict numb, mp_size_t nb)
Definition mul_toom62.c:27
#define b0
#define v0
#define a4
#define a3
static void lmmp_mul_toom62_cache_(mp_ptr restrict dst, mp_srcptr restrict numa, mp_srcptr restrict numb, mp_size_t n, mp_size_t s, mp_size_t t, mp_ptr restrict scratch, mp_ptr restrict tmp, mp_srcptr restrict bs1, mp_srcptr restrict bsm1, mp_srcptr restrict bs2, mp_srcptr restrict bsm2, mp_srcptr restrict bsh, enum toom7_flags bflags)
Definition mul_toom62.c:381
static enum toom7_flags lmmp_mul_toom62_cache_init_(mp_ptr restrict dst, mp_srcptr restrict numa, mp_srcptr restrict numb, mp_size_t n, mp_size_t s, mp_size_t t, mp_ptr restrict scratch, mp_ptr restrict tmp, mp_ptr restrict bs1, mp_ptr restrict bsm1, mp_ptr restrict bs2, mp_ptr restrict bsm2, mp_ptr restrict bsh)
Definition mul_toom62.c:208
#define b1
#define v2
#define vm1
#define a5
#define scratch_out
#define vh
#define a2
#define a0
void lmmp_mul_toom62_unbalance_(mp_ptr restrict dst, mp_srcptr restrict numa, mp_size_t na, mp_srcptr restrict numb, mp_size_t nb)
Definition mul_toom62.c:495
#define a1
#define vinf
#define v1
#define vm2
#define TEMP_DECL
Definition tmp_alloc.h:72
#define TEMP_FREE
Definition tmp_alloc.h:93
#define SALLOC_TYPE(n, type)
Definition tmp_alloc.h:87
#define TEMP_S_DECL
Definition tmp_alloc.h:76
#define BALLOC_TYPE(n, type)
Definition tmp_alloc.h:89
#define TEMP_S_FREE
Definition tmp_alloc.h:105
int lmmp_toom_eval_pm2_(mp_ptr xp2, mp_ptr xm2, unsigned k, mp_srcptr xp, mp_size_t n, mp_size_t hn, mp_ptr tp)
通用高阶 Toom 求值:k次多项式在 x = +2 和 x = -2 处求值
toom7_flags
Definition toom_interp.h:27
@ toom7_w1_neg
Definition toom_interp.h:27
@ toom7_w3_neg
Definition toom_interp.h:27
void lmmp_toom_interp7_(mp_ptr dst, mp_size_t n, enum toom7_flags flags, mp_ptr w1, mp_ptr w3, mp_ptr w4, mp_ptr w5, mp_size_t w6n, mp_ptr tp)
Toom插值计算(7点插值):用于Toom-44、Toom-53、Toom-62 乘法算法
int lmmp_toom_eval_pm1_(mp_ptr xp1, mp_ptr xm1, unsigned k, mp_srcptr xp, mp_size_t n, mp_size_t hn, mp_ptr tp)
通用高阶 Toom 求值:k次多项式在 x = +1 和 x = -1 处求值