LAMMP 4.1.0
Lamina High-Precision Arithmetic Library
载入中...
搜索中...
未找到
mul_toom62.c 文件参考
+ mul_toom62.c 的引用(Include)关系图:

浏览源代码.

宏定义

#define a0   numa
 
#define a0   numa
 
#define a0   numa
 
#define a1   (numa + n)
 
#define a1   (numa + n)
 
#define a1   (numa + n)
 
#define a2   (numa + 2 * n)
 
#define a2   (numa + 2 * n)
 
#define a2   (numa + 2 * n)
 
#define a3   (numa + 3 * n)
 
#define a3   (numa + 3 * n)
 
#define a3   (numa + 3 * n)
 
#define a4   (numa + 4 * n)
 
#define a4   (numa + 4 * n)
 
#define a4   (numa + 4 * n)
 
#define a5   (numa + 5 * n)
 
#define a5   (numa + 5 * n)
 
#define a5   (numa + 5 * n)
 
#define b0   numb
 
#define b0   numb
 
#define b0   numb
 
#define b1   (numb + n)
 
#define b1   (numb + n)
 
#define b1   (numb + n)
 
#define scratch_out   (scratch + 8 * n + 4) /* 2n+1 */
 
#define scratch_out   (scratch + 8 * n + 4) /* 2n+1 */
 
#define scratch_out   (scratch + 8 * n + 4) /* 2n+1 */
 
#define v0   dst /* 2n */
 
#define v0   dst /* 2n */
 
#define v0   dst /* 2n */
 
#define v1   (dst + 2 * n) /* 2n+1 */
 
#define v1   (dst + 2 * n) /* 2n+1 */
 
#define v1   (dst + 2 * n) /* 2n+1 */
 
#define v2   scratch /* 2n+1 */
 
#define v2   scratch /* 2n+1 */
 
#define v2   scratch /* 2n+1 */
 
#define vh   (scratch + 4 * n + 2) /* 2n+1 */
 
#define vh   (scratch + 4 * n + 2) /* 2n+1 */
 
#define vh   (scratch + 4 * n + 2) /* 2n+1 */
 
#define vinf   (dst + 6 * n) /* s+t */
 
#define vinf   (dst + 6 * n) /* s+t */
 
#define vinf   (dst + 6 * n) /* s+t */
 
#define vm1   (scratch + 6 * n + 3) /* 2n+1 */
 
#define vm1   (scratch + 6 * n + 3) /* 2n+1 */
 
#define vm1   (scratch + 6 * n + 3) /* 2n+1 */
 
#define vm2   (scratch + 2 * n + 1) /* 2n+1 */
 
#define vm2   (scratch + 2 * n + 1) /* 2n+1 */
 
#define vm2   (scratch + 2 * n + 1) /* 2n+1 */
 

函数

void lmmp_mul_toom62_ (mp_ptr restrict dst, mp_srcptr restrict numa, mp_size_t na, mp_srcptr restrict numb, mp_size_t nb)
 
static void lmmp_mul_toom62_cache_ (mp_ptr restrict dst, mp_srcptr restrict numa, mp_srcptr restrict numb, mp_size_t n, mp_size_t s, mp_size_t t, mp_ptr restrict scratch, mp_ptr restrict tmp, mp_srcptr restrict bs1, mp_srcptr restrict bsm1, mp_srcptr restrict bs2, mp_srcptr restrict bsm2, mp_srcptr restrict bsh, enum toom7_flags bflags)
 
static enum toom7_flags lmmp_mul_toom62_cache_init_ (mp_ptr restrict dst, mp_srcptr restrict numa, mp_srcptr restrict numb, mp_size_t n, mp_size_t s, mp_size_t t, mp_ptr restrict scratch, mp_ptr restrict tmp, mp_ptr restrict bs1, mp_ptr restrict bsm1, mp_ptr restrict bs2, mp_ptr restrict bsm2, mp_ptr restrict bsh)
 
void lmmp_mul_toom62_unbalance_ (mp_ptr restrict dst, mp_srcptr restrict numa, mp_size_t na, mp_srcptr restrict numb, mp_size_t nb)
 

宏定义说明

◆ a0 [1/3]

#define a0   numa

◆ a0 [2/3]

#define a0   numa

◆ a0 [3/3]

#define a0   numa

◆ a1 [1/3]

#define a1   (numa + n)

◆ a1 [2/3]

#define a1   (numa + n)

◆ a1 [3/3]

#define a1   (numa + n)

◆ a2 [1/3]

#define a2   (numa + 2 * n)

◆ a2 [2/3]

#define a2   (numa + 2 * n)

◆ a2 [3/3]

#define a2   (numa + 2 * n)

◆ a3 [1/3]

#define a3   (numa + 3 * n)

◆ a3 [2/3]

#define a3   (numa + 3 * n)

◆ a3 [3/3]

#define a3   (numa + 3 * n)

◆ a4 [1/3]

#define a4   (numa + 4 * n)

◆ a4 [2/3]

#define a4   (numa + 4 * n)

◆ a4 [3/3]

#define a4   (numa + 4 * n)

◆ a5 [1/3]

#define a5   (numa + 5 * n)

◆ a5 [2/3]

#define a5   (numa + 5 * n)

◆ a5 [3/3]

#define a5   (numa + 5 * n)

◆ b0 [1/3]

#define b0   numb

◆ b0 [2/3]

#define b0   numb

◆ b0 [3/3]

#define b0   numb

◆ b1 [1/3]

#define b1   (numb + n)

◆ b1 [2/3]

#define b1   (numb + n)

◆ b1 [3/3]

#define b1   (numb + n)

◆ scratch_out [1/3]

#define scratch_out   (scratch + 8 * n + 4) /* 2n+1 */

◆ scratch_out [2/3]

#define scratch_out   (scratch + 8 * n + 4) /* 2n+1 */

◆ scratch_out [3/3]

#define scratch_out   (scratch + 8 * n + 4) /* 2n+1 */

◆ v0 [1/3]

#define v0   dst /* 2n */

◆ v0 [2/3]

#define v0   dst /* 2n */

◆ v0 [3/3]

#define v0   dst /* 2n */

◆ v1 [1/3]

#define v1   (dst + 2 * n) /* 2n+1 */

◆ v1 [2/3]

#define v1   (dst + 2 * n) /* 2n+1 */

◆ v1 [3/3]

#define v1   (dst + 2 * n) /* 2n+1 */

◆ v2 [1/3]

#define v2   scratch /* 2n+1 */

◆ v2 [2/3]

#define v2   scratch /* 2n+1 */

◆ v2 [3/3]

#define v2   scratch /* 2n+1 */

◆ vh [1/3]

#define vh   (scratch + 4 * n + 2) /* 2n+1 */

◆ vh [2/3]

#define vh   (scratch + 4 * n + 2) /* 2n+1 */

◆ vh [3/3]

#define vh   (scratch + 4 * n + 2) /* 2n+1 */

◆ vinf [1/3]

#define vinf   (dst + 6 * n) /* s+t */

◆ vinf [2/3]

#define vinf   (dst + 6 * n) /* s+t */

◆ vinf [3/3]

#define vinf   (dst + 6 * n) /* s+t */

◆ vm1 [1/3]

#define vm1   (scratch + 6 * n + 3) /* 2n+1 */

◆ vm1 [2/3]

#define vm1   (scratch + 6 * n + 3) /* 2n+1 */

◆ vm1 [3/3]

#define vm1   (scratch + 6 * n + 3) /* 2n+1 */

◆ vm2 [1/3]

#define vm2   (scratch + 2 * n + 1) /* 2n+1 */

◆ vm2 [2/3]

#define vm2   (scratch + 2 * n + 1) /* 2n+1 */

◆ vm2 [3/3]

#define vm2   (scratch + 2 * n + 1) /* 2n+1 */

函数说明

◆ lmmp_mul_toom62_()

void lmmp_mul_toom62_ ( mp_ptr restrict  dst,
mp_srcptr restrict  numa,
mp_size_t  na,
mp_srcptr restrict  numb,
mp_size_t  nb 
)

在文件 mul_toom62.c27 行定义.

27 {
28 lmmp_param_assert(na >= 3 * nb);
29 lmmp_param_assert(5 * nb >= na);
30
31 mp_size_t n, s, t;
32 mp_limb_t cy;
33 mp_ptr as1, asm1, as2, asm2, ash;
34 mp_ptr bs1, bsm1, bs2, bsm2, bsh;
35 mp_ptr gp;
36 enum toom7_flags aflags, bflags;
38
39#define a0 numa
40#define a1 (numa + n)
41#define a2 (numa + 2 * n)
42#define a3 (numa + 3 * n)
43#define a4 (numa + 4 * n)
44#define a5 (numa + 5 * n)
45#define b0 numb
46#define b1 (numb + n)
47
48 n = 1 + (na >= 3 * nb ? (na - 1) / (mp_size_t)6 : (nb - 1) >> 1);
49
50 s = na - 5 * n;
51 t = nb - n;
52
53 lmmp_debug_assert(0 < s && s <= n);
54 lmmp_debug_assert(0 < t && t <= n);
55
56 mp_ptr restrict scratch = SALLOC_TYPE(10 * n + 10, mp_limb_t);
57
58 mp_ptr restrict tmp = SALLOC_TYPE(10 * n + 10, mp_limb_t);
59 as1 = tmp;
60 asm1 = as1 + n + 1;
61 as2 = asm1 + n + 1;
62 asm2 = as2 + n + 1;
63 ash = asm2 + n + 1;
64 bs1 = ash + n + 1;
65 bsm1 = bs1 + n + 1;
66 bs2 = bsm1 + n;
67 bsm2 = bs2 + n + 1;
68 bsh = bsm2 + n + 1;
69
70 gp = dst;
71
72 /* Compute as1 and asm1. */
73 aflags = (enum toom7_flags)(toom7_w3_neg & lmmp_toom_eval_pm1_(as1, asm1, 5, numa, n, s, gp));
74
75 /* Compute as2 and asm2. */
76 aflags = (enum toom7_flags)(aflags | (toom7_w1_neg & lmmp_toom_eval_pm2_(as2, asm2, 5, numa, n, s, gp)));
77
78 /* Compute ash = 32 a0 + 16 a1 + 8 a2 + 4 a3 + 2 a4 + a5
79 = 2*(2*(2*(2*(2*a0 + a1) + a2) + a3) + a4) + a5 */
80
81 cy = lmmp_addshl1_n_(ash, a1, a0, n);
82 cy = 2 * cy + lmmp_addshl1_n_(ash, a2, ash, n);
83 cy = 2 * cy + lmmp_addshl1_n_(ash, a3, ash, n);
84 cy = 2 * cy + lmmp_addshl1_n_(ash, a4, ash, n);
85 if (s < n) {
86 mp_limb_t cy2;
87 cy2 = lmmp_addshl1_n_(ash, a5, ash, s);
88 ash[n] = 2 * cy + lmmp_shl_(ash + s, ash + s, n - s, 1);
89 lmmp_inc_1(ash + s, cy2);
90 } else
91 ash[n] = 2 * cy + lmmp_addshl1_n_(ash, a5, ash, n);
92
93 /* Compute bs1 and bsm1. */
94 if (t == n) {
95 if (lmmp_cmp_(b0, b1, n) < 0) {
96 cy = lmmp_add_n_sub_n_(bs1, bsm1, b1, b0, n);
97 bflags = toom7_w3_neg;
98 } else {
99 cy = lmmp_add_n_sub_n_(bs1, bsm1, b0, b1, n);
100 bflags = (enum toom7_flags)0;
101 }
102 bs1[n] = cy >> 1;
103 } else {
104 bs1[n] = lmmp_add_(bs1, b0, n, b1, t);
105 if (lmmp_zero_q_(b0 + t, n - t) && lmmp_cmp_(b0, b1, t) < 0) {
106 lmmp_sub_n_(bsm1, b1, b0, t);
107 lmmp_zero(bsm1 + t, n - t);
108 bflags = toom7_w3_neg;
109 } else {
110 lmmp_sub_(bsm1, b0, n, b1, t);
111 bflags = (enum toom7_flags)0;
112 }
113 }
114
115 /* Compute bs2 and bsm2. Recycling bs1 and bsm1; bs2=bs1+b1, bsm2 =
116 bsm1 - b1 */
117 lmmp_add_(bs2, bs1, n + 1, b1, t);
118 if (bflags & toom7_w3_neg) {
119 bsm2[n] = lmmp_add_(bsm2, bsm1, n, b1, t);
120 bflags = (enum toom7_flags)(bflags | toom7_w1_neg);
121 } else {
122 if (t < n) {
123 if (lmmp_zero_q_(bsm1 + t, n - t) && lmmp_cmp_(bsm1, b1, t) < 0) {
124 lmmp_sub_n_(bsm2, b1, bsm1, t);
125 lmmp_zero(bsm2 + t, n + 1 - t);
126 bflags = (enum toom7_flags)(bflags | toom7_w1_neg);
127 } else {
128 lmmp_sub_(bsm2, bsm1, n, b1, t);
129 bsm2[n] = 0;
130 }
131 } else {
132 if (lmmp_cmp_(bsm1, b1, n) < 0) {
133 lmmp_sub_n_(bsm2, b1, bsm1, n);
134 bflags = (enum toom7_flags)(bflags | toom7_w1_neg);
135 } else {
136 lmmp_sub_n_(bsm2, bsm1, b1, n);
137 }
138 bsm2[n] = 0;
139 }
140 }
141
142 /* Compute bsh, recycling bs1. bsh=bs1+b0; */
143 bsh[n] = bs1[n] + lmmp_add_n_(bsh, bs1, b0, n);
144
145 lmmp_debug_assert(as1[n] <= 5);
146 lmmp_debug_assert(bs1[n] <= 1);
147 lmmp_debug_assert(asm1[n] <= 2);
148 lmmp_debug_assert(as2[n] <= 62);
149 lmmp_debug_assert(bs2[n] <= 2);
150 lmmp_debug_assert(asm2[n] <= 41);
151 lmmp_debug_assert(bsm2[n] <= 1);
152 lmmp_debug_assert(ash[n] <= 62);
153 lmmp_debug_assert(bsh[n] <= 2);
154
155#define v0 dst /* 2n */
156#define v1 (dst + 2 * n) /* 2n+1 */
157#define vinf (dst + 6 * n) /* s+t */
158#define v2 scratch /* 2n+1 */
159#define vm2 (scratch + 2 * n + 1) /* 2n+1 */
160#define vh (scratch + 4 * n + 2) /* 2n+1 */
161#define vm1 (scratch + 6 * n + 3) /* 2n+1 */
162#define scratch_out (scratch + 8 * n + 4) /* 2n+1 */
163 /* Total scratch need: 10*n+5 */
164
165 /* Must be in allocation order, as they overwrite one limb beyond
166 * 2n+1. */
167 lmmp_mul_n_(v2, as2, bs2, n + 1); /* v2, 2n+1 limbs */
168 lmmp_mul_n_(vm2, asm2, bsm2, n + 1); /* vm2, 2n+1 limbs */
169 lmmp_mul_n_(vh, ash, bsh, n + 1); /* vh, 2n+1 limbs */
170
171 /* vm1, 2n+1 limbs */
172 lmmp_mul_n_(vm1, asm1, bsm1, n);
173 cy = 0;
174 if (asm1[n] == 1) {
175 cy = lmmp_add_n_(vm1 + n, vm1 + n, bsm1, n);
176 } else if (asm1[n] == 2) {
177 cy = lmmp_addshl1_n_(vm1 + n, vm1 + n, bsm1, n);
178 }
179 vm1[2 * n] = cy;
180
181 /* v1, 2n+1 limbs */
182 lmmp_mul_n_(v1, as1, bs1, n);
183 if (as1[n] == 1) {
184 cy = bs1[n] + lmmp_add_n_(v1 + n, v1 + n, bs1, n);
185 } else if (as1[n] == 2) {
186 cy = 2 * bs1[n] + lmmp_addshl1_n_(v1 + n, v1 + n, bs1, n);
187 } else if (as1[n] != 0) {
188 cy = as1[n] * bs1[n] + lmmp_addmul_1_(v1 + n, bs1, n, as1[n]);
189 } else
190 cy = 0;
191 if (bs1[n] != 0)
192 cy += lmmp_add_n_(v1 + n, v1 + n, as1, n);
193 v1[2 * n] = cy;
194
195 lmmp_mul_n_(v0, a0, b0, n); /* v0, 2n limbs */
196
197 /* vinf, s+t limbs */
198 if (s > t)
199 lmmp_mul_(vinf, a5, s, b1, t);
200 else
201 lmmp_mul_(vinf, b1, t, a5, s);
202
203 lmmp_toom_interp7_(dst, n, (enum toom7_flags)(aflags ^ bflags), vm2, vm1, v2, vh, s + t, scratch_out);
204
206}
#define scratch
mp_limb_t * mp_ptr
Definition lmmp.h:215
#define lmmp_zero(dst, n)
Definition lmmp.h:366
uint64_t mp_size_t
Definition lmmp.h:212
#define lmmp_debug_assert(x)
Definition lmmp.h:387
uint64_t mp_limb_t
Definition lmmp.h:211
#define lmmp_param_assert(x)
Definition lmmp.h:398
static mp_limb_t lmmp_add_(mp_ptr dst, mp_srcptr numa, mp_size_t na, mp_srcptr numb, mp_size_t nb)
大数加法静态内联函数 [dst,na]=[numa,na]+[numb,nb]
Definition lmmpn.h:1058
static int lmmp_cmp_(mp_srcptr numa, mp_srcptr numb, mp_size_t n)
大数比较函数(内联)
Definition lmmpn.h:1004
void lmmp_mul_(mp_ptr dst, mp_srcptr numa, mp_size_t na, mp_srcptr numb, mp_size_t nb)
不等长大数乘法操作 [dst,na+nb] = [numa,na] * [numb,nb]
void lmmp_mul_n_(mp_ptr dst, mp_srcptr numa, mp_srcptr numb, mp_size_t n)
等长大数乘法操作 [dst,2*n] = [numa,n] * [numb,n]
Definition mul.c:99
mp_limb_t lmmp_addshl1_n_(mp_ptr dst, mp_srcptr numa, mp_srcptr numb, mp_size_t n)
加法结合左移1位操作 [dst,n] = [numa,n] + ([numb,n] << 1)
Definition shl.c:56
mp_limb_t lmmp_shl_(mp_ptr dst, mp_srcptr numa, mp_size_t na, mp_size_t shl)
大数左移操作 [dst,na] = [numa,na]<<shl,dst的低shl位填充0
Definition shl.c:9
static mp_limb_t lmmp_sub_(mp_ptr dst, mp_srcptr numa, mp_size_t na, mp_srcptr numb, mp_size_t nb)
大数减法静态内联函数 [dst,na]=[numa,na]-[numb,nb]
Definition lmmpn.h:1072
mp_limb_t lmmp_addmul_1_(mp_ptr numa, mp_srcptr numb, mp_size_t n, mp_limb_t b)
大数乘以单limb并累加操作 [numa,n] += [numb,n] * b
mp_limb_t lmmp_add_n_sub_n_(mp_ptr dsta, mp_ptr dstb, mp_srcptr numa, mp_srcptr numb, mp_size_t n)
同时执行n位加法和减法 ([dsta,n],[dstb,n]) = ([numa,n]+[numb,n],[numa,n]-[numb,n])
Definition add_n_sub_n.c:10
mp_limb_t lmmp_sub_n_(mp_ptr dst, mp_srcptr numa, mp_srcptr numb, mp_size_t n)
无借位的n位减法 [dst,n] = [numa,n] - [numb,n]
Definition sub_n.c:70
#define lmmp_inc_1(p, inc)
大数加指定值宏(预期无进位)
Definition lmmpn.h:958
mp_limb_t lmmp_add_n_(mp_ptr dst, mp_srcptr numa, mp_srcptr numb, mp_size_t n)
无进位的n位加法 [dst,n] = [numa,n] + [numb,n]
Definition add_n.c:71
static int lmmp_zero_q_(mp_srcptr p, mp_size_t n)
大数判零函数(内联)
Definition lmmpn.h:1027
#define bsm1
#define asm1
#define bs1
#define bs2
#define as2
#define asm2
#define bsm2
#define as1
#define b0
#define v0
#define a4
#define a3
#define b1
#define v2
#define vm1
#define a5
#define scratch_out
#define vh
#define a2
#define a0
#define a1
#define vinf
#define v1
#define vm2
#define SALLOC_TYPE(n, type)
Definition tmp_alloc.h:87
#define TEMP_S_DECL
Definition tmp_alloc.h:76
#define TEMP_S_FREE
Definition tmp_alloc.h:105
int lmmp_toom_eval_pm2_(mp_ptr xp2, mp_ptr xm2, unsigned k, mp_srcptr xp, mp_size_t n, mp_size_t hn, mp_ptr tp)
通用高阶 Toom 求值:k次多项式在 x = +2 和 x = -2 处求值
toom7_flags
Definition toom_interp.h:27
@ toom7_w1_neg
Definition toom_interp.h:27
@ toom7_w3_neg
Definition toom_interp.h:27
void lmmp_toom_interp7_(mp_ptr dst, mp_size_t n, enum toom7_flags flags, mp_ptr w1, mp_ptr w3, mp_ptr w4, mp_ptr w5, mp_size_t w6n, mp_ptr tp)
Toom插值计算(7点插值):用于Toom-44、Toom-53、Toom-62 乘法算法
int lmmp_toom_eval_pm1_(mp_ptr xp1, mp_ptr xm1, unsigned k, mp_srcptr xp, mp_size_t n, mp_size_t hn, mp_ptr tp)
通用高阶 Toom 求值:k次多项式在 x = +1 和 x = -1 处求值

引用了 a0, a1, a2, a3, a4, a5, as1, as2, asm1, asm2, b0, b1, bs1, bs2, bsm1, bsm2, lmmp_add_(), lmmp_add_n_(), lmmp_add_n_sub_n_(), lmmp_addmul_1_(), lmmp_addshl1_n_(), lmmp_cmp_(), lmmp_debug_assert, lmmp_inc_1, lmmp_mul_(), lmmp_mul_n_(), lmmp_param_assert, lmmp_shl_(), lmmp_sub_(), lmmp_sub_n_(), lmmp_toom_eval_pm1_(), lmmp_toom_eval_pm2_(), lmmp_toom_interp7_(), lmmp_zero, lmmp_zero_q_(), SALLOC_TYPE, scratch, scratch_out, TEMP_S_DECL, TEMP_S_FREE, toom7_w1_neg, toom7_w3_neg, v0, v1, v2, vh, vinf, vm1 , 以及 vm2.

+ 函数调用图:

◆ lmmp_mul_toom62_cache_()

static void lmmp_mul_toom62_cache_ ( mp_ptr restrict  dst,
mp_srcptr restrict  numa,
mp_srcptr restrict  numb,
mp_size_t  n,
mp_size_t  s,
mp_size_t  t,
mp_ptr restrict  scratch,
mp_ptr restrict  tmp,
mp_srcptr restrict  bs1,
mp_srcptr restrict  bsm1,
mp_srcptr restrict  bs2,
mp_srcptr restrict  bsm2,
mp_srcptr restrict  bsh,
enum toom7_flags  bflags 
)
static

在文件 mul_toom62.c381 行定义.

396 {
397 mp_limb_t cy;
398 mp_ptr as1, asm1, as2, asm2, ash;
399 enum toom7_flags aflags;
400
401#define a0 numa
402#define a1 (numa + n)
403#define a2 (numa + 2 * n)
404#define a3 (numa + 3 * n)
405#define a4 (numa + 4 * n)
406#define a5 (numa + 5 * n)
407#define b0 numb
408#define b1 (numb + n)
409
410 as1 = tmp;
411 asm1 = as1 + n + 1;
412 as2 = asm1 + n + 1;
413 asm2 = as2 + n + 1;
414 ash = asm2 + n + 1;
415
416
417 /* Compute as1 and asm1. */
418 aflags = (enum toom7_flags)(toom7_w3_neg & lmmp_toom_eval_pm1_(as1, asm1, 5, numa, n, s, dst));
419
420 /* Compute as2 and asm2. */
421 aflags = (enum toom7_flags)(aflags | (toom7_w1_neg & lmmp_toom_eval_pm2_(as2, asm2, 5, numa, n, s, dst)));
422
423 /* Compute ash = 32 a0 + 16 a1 + 8 a2 + 4 a3 + 2 a4 + a5
424 = 2*(2*(2*(2*(2*a0 + a1) + a2) + a3) + a4) + a5 */
425
426 cy = lmmp_addshl1_n_(ash, a1, a0, n);
427 cy = 2 * cy + lmmp_addshl1_n_(ash, a2, ash, n);
428 cy = 2 * cy + lmmp_addshl1_n_(ash, a3, ash, n);
429 cy = 2 * cy + lmmp_addshl1_n_(ash, a4, ash, n);
430 if (s < n) {
431 mp_limb_t cy2;
432 cy2 = lmmp_addshl1_n_(ash, a5, ash, s);
433 ash[n] = 2 * cy + lmmp_shl_(ash + s, ash + s, n - s, 1);
434 lmmp_inc_1(ash + s, cy2);
435 } else
436 ash[n] = 2 * cy + lmmp_addshl1_n_(ash, a5, ash, n);
437
438 lmmp_debug_assert(as1[n] <= 5);
439 lmmp_debug_assert(asm1[n] <= 2);
440 lmmp_debug_assert(as2[n] <= 62);
441 lmmp_debug_assert(asm2[n] <= 41);
442 lmmp_debug_assert(ash[n] <= 62);
443
444#define v0 dst /* 2n */
445#define v1 (dst + 2 * n) /* 2n+1 */
446#define vinf (dst + 6 * n) /* s+t */
447#define v2 scratch /* 2n+1 */
448#define vm2 (scratch + 2 * n + 1) /* 2n+1 */
449#define vh (scratch + 4 * n + 2) /* 2n+1 */
450#define vm1 (scratch + 6 * n + 3) /* 2n+1 */
451#define scratch_out (scratch + 8 * n + 4) /* 2n+1 */
452 /* Total scratch need: 10*n+5 */
453
454 /* Must be in allocation order, as they overwrite one limb beyond
455 * 2n+1. */
456 lmmp_mul_n_(v2, as2, bs2, n + 1); /* v2, 2n+1 limbs */
457 lmmp_mul_n_(vm2, asm2, bsm2, n + 1); /* vm2, 2n+1 limbs */
458 lmmp_mul_n_(vh, ash, bsh, n + 1); /* vh, 2n+1 limbs */
459
460 /* vm1, 2n+1 limbs */
461 lmmp_mul_n_(vm1, asm1, bsm1, n);
462 cy = 0;
463 if (asm1[n] == 1) {
464 cy = lmmp_add_n_(vm1 + n, vm1 + n, bsm1, n);
465 } else if (asm1[n] == 2) {
466 cy = lmmp_addshl1_n_(vm1 + n, vm1 + n, bsm1, n);
467 }
468 vm1[2 * n] = cy;
469
470 /* v1, 2n+1 limbs */
471 lmmp_mul_n_(v1, as1, bs1, n);
472 if (as1[n] == 1) {
473 cy = bs1[n] + lmmp_add_n_(v1 + n, v1 + n, bs1, n);
474 } else if (as1[n] == 2) {
475 cy = 2 * bs1[n] + lmmp_addshl1_n_(v1 + n, v1 + n, bs1, n);
476 } else if (as1[n] != 0) {
477 cy = as1[n] * bs1[n] + lmmp_addmul_1_(v1 + n, bs1, n, as1[n]);
478 } else
479 cy = 0;
480 if (bs1[n] != 0)
481 cy += lmmp_add_n_(v1 + n, v1 + n, as1, n);
482 v1[2 * n] = cy;
483
484 lmmp_mul_n_(v0, a0, b0, n); /* v0, 2n limbs */
485
486 /* vinf, s+t limbs */
487 if (s > t)
488 lmmp_mul_(vinf, a5, s, b1, t);
489 else
490 lmmp_mul_(vinf, b1, t, a5, s);
491
492 lmmp_toom_interp7_(dst, n, (enum toom7_flags)(aflags ^ bflags), vm2, vm1, v2, vh, s + t, scratch_out);
493}

引用了 a0, a1, a2, a3, a4, a5, as1, as2, asm1, asm2, b0, b1, bs1, bs2, bsm1, bsm2, lmmp_add_n_(), lmmp_addmul_1_(), lmmp_addshl1_n_(), lmmp_debug_assert, lmmp_inc_1, lmmp_mul_(), lmmp_mul_n_(), lmmp_shl_(), lmmp_toom_eval_pm1_(), lmmp_toom_eval_pm2_(), lmmp_toom_interp7_(), scratch_out, toom7_w1_neg, toom7_w3_neg, v0, v1, v2, vh, vinf, vm1 , 以及 vm2.

被这些函数引用 lmmp_mul_toom62_unbalance_().

+ 函数调用图:
+ 这是这个函数的调用关系图:

◆ lmmp_mul_toom62_cache_init_()

static enum toom7_flags lmmp_mul_toom62_cache_init_ ( mp_ptr restrict  dst,
mp_srcptr restrict  numa,
mp_srcptr restrict  numb,
mp_size_t  n,
mp_size_t  s,
mp_size_t  t,
mp_ptr restrict  scratch,
mp_ptr restrict  tmp,
mp_ptr restrict  bs1,
mp_ptr restrict  bsm1,
mp_ptr restrict  bs2,
mp_ptr restrict  bsm2,
mp_ptr restrict  bsh 
)
static

在文件 mul_toom62.c208 行定义.

222 {
223
224 mp_limb_t cy;
225 mp_ptr restrict as1, asm1, as2, asm2, ash;
226 enum toom7_flags aflags, bflags;
227
228#define a0 numa
229#define a1 (numa + n)
230#define a2 (numa + 2 * n)
231#define a3 (numa + 3 * n)
232#define a4 (numa + 4 * n)
233#define a5 (numa + 5 * n)
234#define b0 numb
235#define b1 (numb + n)
236
237
238 as1 = tmp;
239 asm1 = as1 + n + 1;
240 as2 = asm1 + n + 1;
241 asm2 = as2 + n + 1;
242 ash = asm2 + n + 1;
243
244
245 /* Compute as1 and asm1. */
246 aflags = (enum toom7_flags)(toom7_w3_neg & lmmp_toom_eval_pm1_(as1, asm1, 5, numa, n, s, dst));
247
248 /* Compute as2 and asm2. */
249 aflags = (enum toom7_flags)(aflags | (toom7_w1_neg & lmmp_toom_eval_pm2_(as2, asm2, 5, numa, n, s, dst)));
250
251 /* Compute ash = 32 a0 + 16 a1 + 8 a2 + 4 a3 + 2 a4 + a5
252 = 2*(2*(2*(2*(2*a0 + a1) + a2) + a3) + a4) + a5 */
253
254 cy = lmmp_addshl1_n_(ash, a1, a0, n);
255 cy = 2 * cy + lmmp_addshl1_n_(ash, a2, ash, n);
256 cy = 2 * cy + lmmp_addshl1_n_(ash, a3, ash, n);
257 cy = 2 * cy + lmmp_addshl1_n_(ash, a4, ash, n);
258 if (s < n) {
259 mp_limb_t cy2;
260 cy2 = lmmp_addshl1_n_(ash, a5, ash, s);
261 ash[n] = 2 * cy + lmmp_shl_(ash + s, ash + s, n - s, 1);
262 lmmp_inc_1(ash + s, cy2);
263 } else
264 ash[n] = 2 * cy + lmmp_addshl1_n_(ash, a5, ash, n);
265
266 /* Compute bs1 and bsm1. */
267 if (t == n) {
268 if (lmmp_cmp_(b0, b1, n) < 0) {
269 cy = lmmp_add_n_sub_n_(bs1, bsm1, b1, b0, n);
270 bflags = toom7_w3_neg;
271 } else {
272 cy = lmmp_add_n_sub_n_(bs1, bsm1, b0, b1, n);
273 bflags = (enum toom7_flags)0;
274 }
275 bs1[n] = cy >> 1;
276 } else {
277 bs1[n] = lmmp_add_(bs1, b0, n, b1, t);
278 if (lmmp_zero_q_(b0 + t, n - t) && lmmp_cmp_(b0, b1, t) < 0) {
279 lmmp_sub_n_(bsm1, b1, b0, t);
280 lmmp_zero(bsm1 + t, n - t);
281 bflags = toom7_w3_neg;
282 } else {
283 lmmp_sub_(bsm1, b0, n, b1, t);
284 bflags = (enum toom7_flags)0;
285 }
286 }
287
288 /* Compute bs2 and bsm2. Recycling bs1 and bsm1; bs2=bs1+b1, bsm2 =
289 bsm1 - b1 */
290 lmmp_add_(bs2, bs1, n + 1, b1, t);
291 if (bflags & toom7_w3_neg) {
292 bsm2[n] = lmmp_add_(bsm2, bsm1, n, b1, t);
293 bflags = (enum toom7_flags)(bflags | toom7_w1_neg);
294 } else {
295 if (t < n) {
296 if (lmmp_zero_q_(bsm1 + t, n - t) && lmmp_cmp_(bsm1, b1, t) < 0) {
297 lmmp_sub_n_(bsm2, b1, bsm1, t);
298 lmmp_zero(bsm2 + t, n + 1 - t);
299 bflags = (enum toom7_flags)(bflags | toom7_w1_neg);
300 } else {
301 lmmp_sub_(bsm2, bsm1, n, b1, t);
302 bsm2[n] = 0;
303 }
304 } else {
305 if (lmmp_cmp_(bsm1, b1, n) < 0) {
306 lmmp_sub_n_(bsm2, b1, bsm1, n);
307 bflags = (enum toom7_flags)(bflags | toom7_w1_neg);
308 } else {
309 lmmp_sub_n_(bsm2, bsm1, b1, n);
310 }
311 bsm2[n] = 0;
312 }
313 }
314
315 /* Compute bsh, recycling bs1. bsh=bs1+b0; */
316 bsh[n] = bs1[n] + lmmp_add_n_(bsh, bs1, b0, n);
317
318 lmmp_debug_assert(as1[n] <= 5);
319 lmmp_debug_assert(bs1[n] <= 1);
320 lmmp_debug_assert(asm1[n] <= 2);
321 lmmp_debug_assert(as2[n] <= 62);
322 lmmp_debug_assert(bs2[n] <= 2);
323 lmmp_debug_assert(asm2[n] <= 41);
324 lmmp_debug_assert(bsm2[n] <= 1);
325 lmmp_debug_assert(ash[n] <= 62);
326 lmmp_debug_assert(bsh[n] <= 2);
327
328#define v0 dst /* 2n */
329#define v1 (dst + 2 * n) /* 2n+1 */
330#define vinf (dst + 6 * n) /* s+t */
331#define v2 scratch /* 2n+1 */
332#define vm2 (scratch + 2 * n + 1) /* 2n+1 */
333#define vh (scratch + 4 * n + 2) /* 2n+1 */
334#define vm1 (scratch + 6 * n + 3) /* 2n+1 */
335#define scratch_out (scratch + 8 * n + 4) /* 2n+1 */
336 /* Total scratch need: 10*n+5 */
337
338 /* Must be in allocation order, as they overwrite one limb beyond
339 * 2n+1. */
340 lmmp_mul_n_(v2, as2, bs2, n + 1); /* v2, 2n+1 limbs */
341 lmmp_mul_n_(vm2, asm2, bsm2, n + 1); /* vm2, 2n+1 limbs */
342 lmmp_mul_n_(vh, ash, bsh, n + 1); /* vh, 2n+1 limbs */
343
344 /* vm1, 2n+1 limbs */
345 lmmp_mul_n_(vm1, asm1, bsm1, n);
346 cy = 0;
347 if (asm1[n] == 1) {
348 cy = lmmp_add_n_(vm1 + n, vm1 + n, bsm1, n);
349 } else if (asm1[n] == 2) {
350 cy = lmmp_addshl1_n_(vm1 + n, vm1 + n, bsm1, n);
351 }
352 vm1[2 * n] = cy;
353
354 /* v1, 2n+1 limbs */
355 lmmp_mul_n_(v1, as1, bs1, n);
356 if (as1[n] == 1) {
357 cy = bs1[n] + lmmp_add_n_(v1 + n, v1 + n, bs1, n);
358 } else if (as1[n] == 2) {
359 cy = 2 * bs1[n] + lmmp_addshl1_n_(v1 + n, v1 + n, bs1, n);
360 } else if (as1[n] != 0) {
361 cy = as1[n] * bs1[n] + lmmp_addmul_1_(v1 + n, bs1, n, as1[n]);
362 } else
363 cy = 0;
364 if (bs1[n] != 0)
365 cy += lmmp_add_n_(v1 + n, v1 + n, as1, n);
366 v1[2 * n] = cy;
367
368 lmmp_mul_n_(v0, a0, b0, n); /* v0, 2n limbs */
369
370 /* vinf, s+t limbs */
371 if (s > t)
372 lmmp_mul_(vinf, a5, s, b1, t);
373 else
374 lmmp_mul_(vinf, b1, t, a5, s);
375
376 lmmp_toom_interp7_(dst, n, (enum toom7_flags)(aflags ^ bflags), vm2, vm1, v2, vh, s + t, scratch_out);
377
378 return bflags;
379}

引用了 a0, a1, a2, a3, a4, a5, as1, as2, asm1, asm2, b0, b1, bs1, bs2, bsm1, bsm2, lmmp_add_(), lmmp_add_n_(), lmmp_add_n_sub_n_(), lmmp_addmul_1_(), lmmp_addshl1_n_(), lmmp_cmp_(), lmmp_debug_assert, lmmp_inc_1, lmmp_mul_(), lmmp_mul_n_(), lmmp_shl_(), lmmp_sub_(), lmmp_sub_n_(), lmmp_toom_eval_pm1_(), lmmp_toom_eval_pm2_(), lmmp_toom_interp7_(), lmmp_zero, lmmp_zero_q_(), scratch_out, toom7_w1_neg, toom7_w3_neg, v0, v1, v2, vh, vinf, vm1 , 以及 vm2.

被这些函数引用 lmmp_mul_toom62_unbalance_().

+ 函数调用图:
+ 这是这个函数的调用关系图:

◆ lmmp_mul_toom62_unbalance_()

void lmmp_mul_toom62_unbalance_ ( mp_ptr restrict  dst,
mp_srcptr restrict  numa,
mp_size_t  na,
mp_srcptr restrict  numb,
mp_size_t  nb 
)

在文件 mul_toom62.c495 行定义.

501 {
502 lmmp_param_assert(na >= 5 * nb);
503 TEMP_DECL;
504 mp_size_t n = 1 + (3 * nb - 1) / (mp_size_t)6, s = 3 * nb - 5 * n, t = nb - n;
505 mp_limb_t* restrict ws = SALLOC_TYPE(nb, mp_limb_t);
506 mp_ptr restrict scratch = BALLOC_TYPE(20 * n + 20, mp_limb_t);
507 mp_ptr restrict tmp = scratch + 10 * n + 10;
508 mp_ptr restrict bs1, bsm1, bs2, bsm2, bsh;
509 bs1 = tmp + 5 * n + 5;
510 bsm1 = bs1 + n + 1;
511 bs2 = bsm1 + n;
512 bsm2 = bs2 + n + 1;
513 bsh = bsm2 + n + 1;
514 enum toom7_flags bflags = lmmp_mul_toom62_cache_init_(dst, numa, numb, n, s, t, scratch, tmp, bs1, bsm1, bs2, bsm2, bsh);
515 dst += 3 * nb;
516 numa += 3 * nb;
517 na -= 3 * nb;
518 lmmp_copy(ws, dst, nb);
519 while (na >= 5 * nb) {
520 lmmp_mul_toom62_cache_(dst, numa, numb, n, s, t, scratch, tmp, bs1, bsm1, bs2, bsm2, bsh, bflags);
521 if (lmmp_add_n_(dst, dst, ws, nb))
522 lmmp_inc(dst + nb);
523 dst += 3 * nb;
524 numa += 3 * nb;
525 na -= 3 * nb;
526 lmmp_copy(ws, dst, nb);
527 }
528 // 0 <= na < 2 nb
529 if (na >= nb)
530 lmmp_mul_(dst, numa, na, numb, nb);
531 else
532 lmmp_mul_(dst, numb, nb, numa, na);
533 if (lmmp_add_n_(dst, dst, ws, nb))
534 lmmp_inc(dst + nb);
535 TEMP_FREE;
536}
#define lmmp_copy(dst, src, n)
Definition lmmp.h:364
#define lmmp_inc(p)
大数加1宏(预期无进位)
Definition lmmpn.h:946
static void lmmp_mul_toom62_cache_(mp_ptr restrict dst, mp_srcptr restrict numa, mp_srcptr restrict numb, mp_size_t n, mp_size_t s, mp_size_t t, mp_ptr restrict scratch, mp_ptr restrict tmp, mp_srcptr restrict bs1, mp_srcptr restrict bsm1, mp_srcptr restrict bs2, mp_srcptr restrict bsm2, mp_srcptr restrict bsh, enum toom7_flags bflags)
Definition mul_toom62.c:381
static enum toom7_flags lmmp_mul_toom62_cache_init_(mp_ptr restrict dst, mp_srcptr restrict numa, mp_srcptr restrict numb, mp_size_t n, mp_size_t s, mp_size_t t, mp_ptr restrict scratch, mp_ptr restrict tmp, mp_ptr restrict bs1, mp_ptr restrict bsm1, mp_ptr restrict bs2, mp_ptr restrict bsm2, mp_ptr restrict bsh)
Definition mul_toom62.c:208
#define TEMP_DECL
Definition tmp_alloc.h:72
#define TEMP_FREE
Definition tmp_alloc.h:93
#define BALLOC_TYPE(n, type)
Definition tmp_alloc.h:89

引用了 BALLOC_TYPE, bs1, bs2, bsm1, bsm2, lmmp_add_n_(), lmmp_copy, lmmp_inc, lmmp_mul_(), lmmp_mul_toom62_cache_(), lmmp_mul_toom62_cache_init_(), lmmp_param_assert, SALLOC_TYPE, scratch, TEMP_DECL , 以及 TEMP_FREE.

+ 函数调用图: