LAMMP 4.1.0
Lamina High-Precision Arithmetic Library
载入中...
搜索中...
未找到
mul_toom_eval.c
浏览该文件的文档.
1/*
2 * LAMMP - Copyright (C) 2025-2026 HJimmyK(Jericho Knox)
3 * This file is part of lammp, under the GNU LGPL v2 license.
4 * See LICENSE in the project root for the full license text.
5 */
6
7#include "../../include/lammp/impl/toom_interp.h"
8
10 int neg;
11 lmmp_param_assert(x3n > 0);
12 lmmp_param_assert(x3n <= n);
13
14 xp1[n] = lmmp_add_n_(xp1, xp, xp + 2 * n, n);
15 tp[n] = lmmp_add_(tp, xp + n, n, xp + 3 * n, x3n);
16
17 neg = (lmmp_cmp_(xp1, tp, n + 1) < 0) ? ~0 : 0;
18 if (neg)
19 lmmp_add_n_sub_n_(xp1, xm1, tp, xp1, n + 1);
20 else
21 lmmp_add_n_sub_n_(xp1, xm1, xp1, tp, n + 1);
22
23 lmmp_debug_assert(xp1[n] <= 3);
24 lmmp_debug_assert(xm1[n] <= 1);
25
26 return neg;
27}
28
30 mp_limb_t cy;
31 int neg;
32 lmmp_param_assert(x3n > 0);
33 lmmp_param_assert(x3n <= n);
34 /* (x0 + 4 * x2) +/- (2 x1 + 8 x_3) */
35
36 cy = lmmp_shl_(tp, xp + 2 * n, n, 2);
37 xp2[n] = cy + lmmp_add_n_(xp2, tp, xp, n);
38
39 tp[x3n] = lmmp_shl_(tp, xp + 3 * n, x3n, 2);
40 if (x3n < n)
41 tp[n] = lmmp_add_(tp, xp + n, n, tp, x3n + 1);
42 else
43 tp[n] += lmmp_add_n_(tp, xp + n, tp, n);
44
45 lmmp_shl_(tp, tp, n + 1, 1);
46
47 neg = (lmmp_cmp_(xp2, tp, n + 1) < 0) ? ~0 : 0;
48
49 if (neg)
50 lmmp_add_n_sub_n_(xp2, xm2, tp, xp2, n + 1);
51 else
52 lmmp_add_n_sub_n_(xp2, xm2, xp2, tp, n + 1);
53
54 lmmp_debug_assert(xp2[n] < 15);
55 lmmp_debug_assert(xm2[n] < 10);
56
57 return neg;
58}
59
61 unsigned i;
62 int neg;
63 lmmp_param_assert(k >= 4);
64
65 lmmp_param_assert(hn > 0);
66 lmmp_param_assert(hn <= n);
67
68 /* The degree k is also the number of full-size coefficients, so
69 * that last coefficient, of size hn, starts at xp + k*n. */
70
71 xp1[n] = lmmp_add_n_(xp1, xp, xp + 2 * n, n);
72 for (i = 4; i < k; i += 2) lmmp_add_(xp1, xp1, n + 1, xp + i * n, n);
73
74 tp[n] = lmmp_add_n_(tp, xp + n, xp + 3 * n, n);
75 for (i = 5; i < k; i += 2) lmmp_add_(tp, tp, n + 1, xp + i * n, n);
76
77 if (k & 1)
78 lmmp_add_(tp, tp, n + 1, xp + k * n, hn);
79 else
80 lmmp_add_(xp1, xp1, n + 1, xp + k * n, hn);
81
82 neg = (lmmp_cmp_(xp1, tp, n + 1) < 0) ? ~0 : 0;
83
84 if (neg)
85 lmmp_add_n_sub_n_(xp1, xm1, tp, xp1, n + 1);
86 else
87 lmmp_add_n_sub_n_(xp1, xm1, xp1, tp, n + 1);
88
89 lmmp_debug_assert(xp1[n] <= k);
90 lmmp_debug_assert(xm1[n] <= k / 2 + 1);
91
92 return neg;
93}
94
95/* DO_addlsh2(d,a,b,n,cy) computes cy,[d,n] <= [a,n] + 4*(cy,[b,n]), it
96 can be used as DO_addlsh2(d,a,d,n,d[n]), for accumulation on [d,n+1]. */
97
98/* The following is not a general substitute for addlsh2.
99 It is correct if d == b, but it is not if d == a. */
100#define DO_addlsh2(d, a, b, n, cy) \
101 do { \
102 (cy) <<= 2; \
103 (cy) += lmmp_shl_(d, b, n, 2); \
104 (cy) += lmmp_add_n_(d, d, a, n); \
105 } while (0)
106
108 int i;
109 int neg;
110 mp_limb_t cy;
111 lmmp_param_assert(k >= 3);
113
114 lmmp_param_assert(hn > 0);
115 lmmp_param_assert(hn <= n);
116
117 /* The degree k is also the number of full-size coefficients, so
118 * that last coefficient, of size hn, starts at xp + k*n. */
119
120 cy = 0;
121 DO_addlsh2(xp2, xp + (k - 2) * n, xp + k * n, hn, cy);
122 if (hn != n)
123 cy = lmmp_add_1_(xp2 + hn, xp + (k - 2) * n + hn, n - hn, cy);
124 for (i = k - 4; i >= 0; i -= 2) DO_addlsh2(xp2, xp + i * n, xp2, n, cy);
125 xp2[n] = cy;
126
127 k--;
128
129 cy = 0;
130 DO_addlsh2(tp, xp + (k - 2) * n, xp + k * n, n, cy);
131 for (i = k - 4; i >= 0; i -= 2) DO_addlsh2(tp, xp + i * n, tp, n, cy);
132 tp[n] = cy;
133
134 if (k & 1)
135 lmmp_shl_(tp, tp, n + 1, 1);
136 else
137 lmmp_shl_(xp2, xp2, n + 1, 1);
138
139 neg = (lmmp_cmp_(xp2, tp, n + 1) < 0) ? ~0 : 0;
140
141 if (neg)
142 lmmp_add_n_sub_n_(xp2, xm2, tp, xp2, n + 1);
143 else
144 lmmp_add_n_sub_n_(xp2, xm2, xp2, tp, n + 1);
145
146 lmmp_debug_assert(xp2[n] < (1ull << (k + 2)) - 1);
147 lmmp_debug_assert(xm2[n] < ((1 << (k + 3)) - 1 - (1 ^ (k & 1))) / 3);
148
149 neg ^= ((k & 1) - 1);
150
151 return neg;
152}
153
154#undef DO_addlsh2
#define k
mp_limb_t * mp_ptr
Definition lmmp.h:215
uint64_t mp_size_t
Definition lmmp.h:212
#define lmmp_debug_assert(x)
Definition lmmp.h:387
const mp_limb_t * mp_srcptr
Definition lmmp.h:216
uint64_t mp_limb_t
Definition lmmp.h:211
#define LIMB_BITS
Definition lmmp.h:221
#define lmmp_param_assert(x)
Definition lmmp.h:398
static mp_limb_t lmmp_add_(mp_ptr dst, mp_srcptr numa, mp_size_t na, mp_srcptr numb, mp_size_t nb)
大数加法静态内联函数 [dst,na]=[numa,na]+[numb,nb]
Definition lmmpn.h:1058
static int lmmp_cmp_(mp_srcptr numa, mp_srcptr numb, mp_size_t n)
大数比较函数(内联)
Definition lmmpn.h:1004
static mp_limb_t lmmp_add_1_(mp_ptr dst, mp_srcptr numa, mp_size_t na, mp_limb_t x)
大数加单精度数静态内联函数 [dst,na]=[numa,na]+x
Definition lmmpn.h:1111
mp_limb_t lmmp_shl_(mp_ptr dst, mp_srcptr numa, mp_size_t na, mp_size_t shl)
大数左移操作 [dst,na] = [numa,na]<<shl,dst的低shl位填充0
Definition shl.c:9
mp_limb_t lmmp_add_n_sub_n_(mp_ptr dsta, mp_ptr dstb, mp_srcptr numa, mp_srcptr numb, mp_size_t n)
同时执行n位加法和减法 ([dsta,n],[dstb,n]) = ([numa,n]+[numb,n],[numa,n]-[numb,n])
Definition add_n_sub_n.c:10
mp_limb_t lmmp_add_n_(mp_ptr dst, mp_srcptr numa, mp_srcptr numb, mp_size_t n)
无进位的n位加法 [dst,n] = [numa,n] + [numb,n]
Definition add_n.c:71
#define tp
int lmmp_toom_eval_pm2_(mp_ptr xp2, mp_ptr xm2, unsigned k, mp_srcptr xp, mp_size_t n, mp_size_t hn, mp_ptr tp)
通用高阶 Toom 求值:k次多项式在 x = +2 和 x = -2 处求值
int lmmp_toom_eval_pm1_(mp_ptr xp1, mp_ptr xm1, unsigned k, mp_srcptr xp, mp_size_t n, mp_size_t hn, mp_ptr tp)
通用高阶 Toom 求值:k次多项式在 x = +1 和 x = -1 处求值
int lmmp_toom_eval_dgr3_pm2_(mp_ptr xp2, mp_ptr xm2, mp_srcptr xp, mp_size_t n, mp_size_t x3n, mp_ptr tp)
Toom-3 专用:3次多项式在 x = +2 和 x = -2 处求值 计算 P(+2) 和 P(-2),其中 P(x) 是一个3次多项式(4段系数)。
#define DO_addlsh2(d, a, b, n, cy)
int lmmp_toom_eval_dgr3_pm1_(mp_ptr xp1, mp_ptr xm1, mp_srcptr xp, mp_size_t n, mp_size_t x3n, mp_ptr tp)
Toom-3 专用:3次多项式在 x = +1 和 x = -1 处求值 计算 P(+1) 和 P(-1),其中 P(x) 是一个3次多项式(4段系数)。