LAMMP 4.1.0
Lamina High-Precision Arithmetic Library
载入中...
搜索中...
未找到
mul_toom52.c 文件参考
+ mul_toom52.c 的引用(Include)关系图:

浏览源代码.

宏定义

#define a0   numa
 
#define a0a2   scratch
 
#define a1   (numa + n)
 
#define a1a3   asm1
 
#define a2   (numa + 2 * n)
 
#define a3   (numa + 3 * n)
 
#define a4   (numa + 4 * n)
 
#define as1   (dst + 4 * n + 4) /* n+1 */
 
#define as2   (dst + 3 * n + 3) /* n+1 */
 
#define asm1   (scratch + 3 * n + 3) /* n+1 */
 
#define asm2   (scratch + 4 * n + 4) /* n+1 */
 
#define b0   numb
 
#define b1   (numb + n)
 
#define bs1   dst /* n+1 */
 
#define bs2   (dst + 2 * n + 2) /* n+1 */
 
#define bsm1   (scratch + 2 * n + 2) /* n */
 
#define bsm2   (dst + n + 1) /* n+1 */
 
#define v0   dst /* 2n */
 
#define v1   (dst + 2 * n) /* 2n+1 */
 
#define v2   (scratch + 4 * n + 2) /* 2n+1 */
 
#define vinf   (dst + 5 * n) /* s+t */
 
#define vm1   (scratch) /* 2n+1 */
 
#define vm2   (scratch + 2 * n + 1) /* 2n+1 */
 

函数

void lmmp_mul_toom52_ (mp_ptr restrict dst, mp_srcptr restrict numa, mp_size_t na, mp_srcptr restrict numb, mp_size_t nb)
 

宏定义说明

◆ a0

#define a0   numa

◆ a0a2

#define a0a2   scratch

◆ a1

#define a1   (numa + n)

◆ a1a3

#define a1a3   asm1

◆ a2

#define a2   (numa + 2 * n)

◆ a3

#define a3   (numa + 3 * n)

◆ a4

#define a4   (numa + 4 * n)

◆ as1

#define as1   (dst + 4 * n + 4) /* n+1 */

◆ as2

#define as2   (dst + 3 * n + 3) /* n+1 */

◆ asm1

#define asm1   (scratch + 3 * n + 3) /* n+1 */

◆ asm2

#define asm2   (scratch + 4 * n + 4) /* n+1 */

◆ b0

#define b0   numb

◆ b1

#define b1   (numb + n)

◆ bs1

#define bs1   dst /* n+1 */

◆ bs2

#define bs2   (dst + 2 * n + 2) /* n+1 */

◆ bsm1

#define bsm1   (scratch + 2 * n + 2) /* n */

◆ bsm2

#define bsm2   (dst + n + 1) /* n+1 */

◆ v0

#define v0   dst /* 2n */

◆ v1

#define v1   (dst + 2 * n) /* 2n+1 */

◆ v2

#define v2   (scratch + 4 * n + 2) /* 2n+1 */

◆ vinf

#define vinf   (dst + 5 * n) /* s+t */

◆ vm1

#define vm1   (scratch) /* 2n+1 */

◆ vm2

#define vm2   (scratch + 2 * n + 1) /* 2n+1 */

函数说明

◆ lmmp_mul_toom52_()

void lmmp_mul_toom52_ ( mp_ptr restrict  dst,
mp_srcptr restrict  numa,
mp_size_t  na,
mp_srcptr restrict  numb,
mp_size_t  nb 
)

在文件 mul_toom52.c29 行定义.

29 {
30 lmmp_param_assert(9 * na >= 20 * nb);
31 lmmp_param_assert(3 * nb >= na);
32 mp_size_t n, s, t;
33 enum toom6_flags flags;
34
35#define a0 numa
36#define a1 (numa + n)
37#define a2 (numa + 2 * n)
38#define a3 (numa + 3 * n)
39#define a4 (numa + 4 * n)
40#define b0 numb
41#define b1 (numb + n)
42
43 n = 1 + (2 * na >= 5 * nb ? (na - 1) / (mp_size_t)5 : (nb - 1) >> 1);
45 mp_ptr restrict scratch = SALLOC_TYPE(6 * n + 6, mp_limb_t);
46
47 s = na - 4 * n;
48 t = nb - n;
49
50 lmmp_debug_assert(0 < s && s <= n);
51 lmmp_debug_assert(0 < t && t <= n);
52
53 /* Ensures that 5 values of n+1 limbs each fits in the product area.
54 Borderline cases are na = 32, nb = 8, n = 7, and na = 36, bn = 9,
55 n = 8. */
56 lmmp_debug_assert(s + t >= 5);
57
58#define v0 dst /* 2n */
59#define vm1 (scratch) /* 2n+1 */
60#define v1 (dst + 2 * n) /* 2n+1 */
61#define vm2 (scratch + 2 * n + 1) /* 2n+1 */
62#define v2 (scratch + 4 * n + 2) /* 2n+1 */
63#define vinf (dst + 5 * n) /* s+t */
64#define bs1 dst /* n+1 */
65#define bsm1 (scratch + 2 * n + 2) /* n */
66#define asm1 (scratch + 3 * n + 3) /* n+1 */
67#define asm2 (scratch + 4 * n + 4) /* n+1 */
68#define bsm2 (dst + n + 1) /* n+1 */
69#define bs2 (dst + 2 * n + 2) /* n+1 */
70#define as2 (dst + 3 * n + 3) /* n+1 */
71#define as1 (dst + 4 * n + 4) /* n+1 */
72
73
74#define a0a2 scratch
75#define a1a3 asm1
76
77 /* Compute as2 and asm2. */
78 flags = (enum toom6_flags)(toom6_vm2_neg & lmmp_toom_eval_pm2_(as2, asm2, 4, numa, n, s, a1a3));
79
80 /* Compute bs1 and bsm1. */
81 if (t == n) {
82 mp_limb_t cy;
83 if (lmmp_cmp_(b0, b1, n) < 0) {
84 cy = lmmp_add_n_sub_n_(bs1, bsm1, b1, b0, n);
85 flags = (enum toom6_flags)(flags ^ toom6_vm1_neg);
86 } else {
87 cy = lmmp_add_n_sub_n_(bs1, bsm1, b0, b1, n);
88 }
89 bs1[n] = cy >> 1;
90 } else {
91 bs1[n] = lmmp_add_(bs1, b0, n, b1, t);
92 if (lmmp_zero_q_(b0 + t, n - t) && lmmp_cmp_(b0, b1, t) < 0) {
93 lmmp_sub_n_(bsm1, b1, b0, t);
94 lmmp_zero(bsm1 + t, n - t);
95 flags = (enum toom6_flags)(flags ^ toom6_vm1_neg);
96 } else {
97 lmmp_sub_(bsm1, b0, n, b1, t);
98 }
99 }
100
101 /* Compute bs2 and bsm2, recycling bs1 and bsm1. bs2=bs1+b1; bsm2=bsm1-b1 */
102 lmmp_add_(bs2, bs1, n + 1, b1, t);
103 if (flags & toom6_vm1_neg) {
104 bsm2[n] = lmmp_add_(bsm2, bsm1, n, b1, t);
105 flags = (enum toom6_flags)(flags ^ toom6_vm2_neg);
106 } else {
107 bsm2[n] = 0;
108 if (t == n) {
109 if (lmmp_cmp_(bsm1, b1, n) < 0) {
110 lmmp_sub_n_(bsm2, b1, bsm1, n);
111 flags = (enum toom6_flags)(flags ^ toom6_vm2_neg);
112 } else {
113 lmmp_sub_n_(bsm2, bsm1, b1, n);
114 }
115 } else {
116 if (lmmp_zero_q_(bsm1 + t, n - t) && lmmp_cmp_(bsm1, b1, t) < 0) {
117 lmmp_sub_n_(bsm2, b1, bsm1, t);
118 lmmp_zero(bsm2 + t, n - t);
119 flags = (enum toom6_flags)(flags ^ toom6_vm2_neg);
120 } else {
121 lmmp_sub_(bsm2, bsm1, n, b1, t);
122 }
123 }
124 }
125
126 /* Compute as1 and asm1. */
127 flags = (enum toom6_flags)(flags ^ (toom6_vm1_neg & lmmp_toom_eval_pm1_(as1, asm1, 4, numa, n, s, a0a2)));
128
129 lmmp_debug_assert(as1[n] <= 4);
130 lmmp_debug_assert(bs1[n] <= 1);
131 lmmp_debug_assert(asm1[n] <= 2);
132 /* lmmp_debug_assert (bsm1[n] <= 1); */
133 lmmp_debug_assert(as2[n] <= 30);
134 lmmp_debug_assert(bs2[n] <= 2);
135 lmmp_debug_assert(asm2[n] <= 20);
136 lmmp_debug_assert(bsm2[n] <= 1);
137
138 /* vm1, 2n+1 limbs */
139 lmmp_mul_(vm1, asm1, n + 1, bsm1, n); /* W4 */
140
141 /* vm2, 2n+1 limbs */
142 lmmp_mul_n_(vm2, asm2, bsm2, n + 1); /* W2 */
143
144 /* v2, 2n+1 limbs */
145 lmmp_mul_n_(v2, as2, bs2, n + 1); /* W1 */
146
147 /* v1, 2n+1 limbs */
148 lmmp_mul_n_(v1, as1, bs1, n + 1); /* W3 */
149
150 /* vinf, s+t limbs */ /* W0 */
151 if (s > t)
152 lmmp_mul_(vinf, a4, s, b1, t);
153 else
154 lmmp_mul_(vinf, b1, t, a4, s);
155
156 /* v0, 2n limbs */
157 lmmp_mul_n_(v0, numa, numb, n); /* W5 */
158
159 lmmp_toom_interp6_(dst, n, flags, vm1, vm2, v2, t + s);
161#undef v0
162#undef vm1
163#undef v1
164#undef vm2
165#undef v2
166#undef vinf
167#undef bs1
168#undef bs2
169#undef bsm1
170#undef bsm2
171#undef asm1
172#undef asm2
173#undef as1
174#undef as2
175#undef a0a2
176#undef b0b2
177#undef a1a3
178#undef a0
179#undef a1
180#undef a2
181#undef a3
182#undef b0
183#undef b1
184#undef b2
185}
#define scratch
mp_limb_t * mp_ptr
Definition lmmp.h:215
#define lmmp_zero(dst, n)
Definition lmmp.h:366
uint64_t mp_size_t
Definition lmmp.h:212
#define lmmp_debug_assert(x)
Definition lmmp.h:387
uint64_t mp_limb_t
Definition lmmp.h:211
#define lmmp_param_assert(x)
Definition lmmp.h:398
static mp_limb_t lmmp_add_(mp_ptr dst, mp_srcptr numa, mp_size_t na, mp_srcptr numb, mp_size_t nb)
大数加法静态内联函数 [dst,na]=[numa,na]+[numb,nb]
Definition lmmpn.h:1058
static int lmmp_cmp_(mp_srcptr numa, mp_srcptr numb, mp_size_t n)
大数比较函数(内联)
Definition lmmpn.h:1004
void lmmp_mul_(mp_ptr dst, mp_srcptr numa, mp_size_t na, mp_srcptr numb, mp_size_t nb)
不等长大数乘法操作 [dst,na+nb] = [numa,na] * [numb,nb]
void lmmp_mul_n_(mp_ptr dst, mp_srcptr numa, mp_srcptr numb, mp_size_t n)
等长大数乘法操作 [dst,2*n] = [numa,n] * [numb,n]
Definition mul.c:99
static mp_limb_t lmmp_sub_(mp_ptr dst, mp_srcptr numa, mp_size_t na, mp_srcptr numb, mp_size_t nb)
大数减法静态内联函数 [dst,na]=[numa,na]-[numb,nb]
Definition lmmpn.h:1072
mp_limb_t lmmp_add_n_sub_n_(mp_ptr dsta, mp_ptr dstb, mp_srcptr numa, mp_srcptr numb, mp_size_t n)
同时执行n位加法和减法 ([dsta,n],[dstb,n]) = ([numa,n]+[numb,n],[numa,n]-[numb,n])
Definition add_n_sub_n.c:10
mp_limb_t lmmp_sub_n_(mp_ptr dst, mp_srcptr numa, mp_srcptr numb, mp_size_t n)
无借位的n位减法 [dst,n] = [numa,n] - [numb,n]
Definition sub_n.c:70
static int lmmp_zero_q_(mp_srcptr p, mp_size_t n)
大数判零函数(内联)
Definition lmmpn.h:1027
#define bs1
#define bs2
#define b0
#define v0
#define a4
#define b1
#define bsm1
#define as2
#define v2
#define vm1
#define asm2
#define bsm2
#define a0a2
#define asm1
#define vinf
#define as1
#define v1
#define a1a3
#define vm2
#define SALLOC_TYPE(n, type)
Definition tmp_alloc.h:87
#define TEMP_S_DECL
Definition tmp_alloc.h:76
#define TEMP_S_FREE
Definition tmp_alloc.h:105
int lmmp_toom_eval_pm2_(mp_ptr xp2, mp_ptr xm2, unsigned k, mp_srcptr xp, mp_size_t n, mp_size_t hn, mp_ptr tp)
通用高阶 Toom 求值:k次多项式在 x = +2 和 x = -2 处求值
void lmmp_toom_interp6_(mp_ptr dst, mp_size_t n, enum toom6_flags flags, mp_ptr w4, mp_ptr w2, mp_ptr w1, mp_size_t w0n)
Toom插值计算(6点插值):用于Toom-43和Toom-52 乘法算法
toom6_flags
Definition toom_interp.h:25
@ toom6_vm2_neg
Definition toom_interp.h:25
@ toom6_vm1_neg
Definition toom_interp.h:25
int lmmp_toom_eval_pm1_(mp_ptr xp1, mp_ptr xm1, unsigned k, mp_srcptr xp, mp_size_t n, mp_size_t hn, mp_ptr tp)
通用高阶 Toom 求值:k次多项式在 x = +1 和 x = -1 处求值

引用了 a0a2, a1a3, a4, as1, as2, asm1, asm2, b0, b1, bs1, bs2, bsm1, bsm2, lmmp_add_(), lmmp_add_n_sub_n_(), lmmp_cmp_(), lmmp_debug_assert, lmmp_mul_(), lmmp_mul_n_(), lmmp_param_assert, lmmp_sub_(), lmmp_sub_n_(), lmmp_toom_eval_pm1_(), lmmp_toom_eval_pm2_(), lmmp_toom_interp6_(), lmmp_zero, lmmp_zero_q_(), SALLOC_TYPE, scratch, TEMP_S_DECL, TEMP_S_FREE, toom6_vm1_neg, toom6_vm2_neg, v0, v1, v2, vinf, vm1 , 以及 vm2.

+ 函数调用图: