LAMMP 4.1.0
Lamina High-Precision Arithmetic Library
载入中...
搜索中...
未找到
mul_toom32.c 文件参考
+ mul_toom32.c 的引用(Include)关系图:

浏览源代码.

宏定义

#define a0   numa
 
#define a1   (numa + n)
 
#define a2   (numa + 2 * n)
 
#define am1   (dst + 3 * n + 2)
 
#define ap1   (dst + 2 * n + 1)
 
#define b0   numb
 
#define b1   (numb + n)
 
#define bm1   (dst)
 
#define bp1   (dst + n)
 
#define r0   (dst)
 
#define r1   (dst + n)
 
#define r2   (dst + 2 * n)
 
#define r3   (dst + 3 * n)
 
#define v1   (tp)
 
#define vm1   (tp + 2 * n + 1)
 

函数

void lmmp_mul_toom32_ (mp_ptr restrict dst, mp_srcptr restrict numa, mp_size_t na, mp_srcptr restrict numb, mp_size_t nb)
 

宏定义说明

◆ a0

#define a0   numa

◆ a1

#define a1   (numa + n)

◆ a2

#define a2   (numa + 2 * n)

◆ am1

#define am1   (dst + 3 * n + 2)

◆ ap1

#define ap1   (dst + 2 * n + 1)

◆ b0

#define b0   numb

◆ b1

#define b1   (numb + n)

◆ bm1

#define bm1   (dst)

◆ bp1

#define bp1   (dst + n)

◆ r0

#define r0   (dst)

◆ r1

#define r1   (dst + n)

◆ r2

#define r2   (dst + 2 * n)

◆ r3

#define r3   (dst + 3 * n)

◆ v1

#define v1   (tp)

◆ vm1

#define vm1   (tp + 2 * n + 1)

函数说明

◆ lmmp_mul_toom32_()

void lmmp_mul_toom32_ ( mp_ptr restrict  dst,
mp_srcptr restrict  numa,
mp_size_t  na,
mp_srcptr restrict  numb,
mp_size_t  nb 
)

在文件 mul_toom32.c24 行定义.

24 {
25 lmmp_param_assert(nb >= 12);
26 lmmp_param_assert(4 * na >= 5 * nb);
27 lmmp_param_assert(5 * na <= 9 * nb);
29 mp_size_t n = 1 + (2 * na >= 3 * nb ? (na - 1) / 3 : (nb - 1) >> 1), s = na - 2 * n, t = nb - n;
30 int vm1_neg;
31 mp_limb_t cy, hi;
32 mp_limb_t* restrict tp = SALLOC_TYPE(4 * n + 2, mp_limb_t);
33
34#define a0 numa
35#define a1 (numa + n)
36#define a2 (numa + 2 * n)
37#define b0 numb
38#define b1 (numb + n)
39 // nb>=12, so that s+t>=n+2
40#define bm1 (dst) //[dst,n]
41#define bp1 (dst + n) //[dst+n,n+1]
42#define ap1 (dst + 2 * n + 1) //[dst+2*n+1,n+1]
43#define am1 (dst + 3 * n + 2) //[dst+3*n+2,n]:hi
44#define v1 (tp) //[tp,2*n+1]
45#define vm1 (tp + 2 * n + 1) //[tp+2*n+1,2*n+1]
46#define r0 (dst)
47#define r1 (dst + n)
48#define r2 (dst + 2 * n)
49#define r3 (dst + 3 * n)
50
51 // ap1=a0+a1+a3, am1=a0-a1+a3
52 ap1[n] = lmmp_add_(ap1, a0, n, a2, s);
53 if (ap1[n] == 0 && lmmp_cmp_(ap1, a1, n) < 0) {
54 ap1[n] = lmmp_add_n_sub_n_(ap1, am1, a1, ap1, n) >> 1;
55 hi = 0;
56 vm1_neg = 1;
57 } else {
58 cy = lmmp_add_n_sub_n_(ap1, am1, ap1, a1, n);
59 hi = ap1[n] - (cy & 1);
60 ap1[n] += (cy >> 1);
61 vm1_neg = 0;
62 }
63
64 // bp1=b0+b1, bm1=b0-b1
65 if (t == n) {
66 if (lmmp_cmp_(b0, b1, n) < 0) {
67 bp1[n] = lmmp_add_n_sub_n_(bp1, bm1, b1, b0, n) >> 1;
68 vm1_neg ^= 1;
69 } else {
70 bp1[n] = lmmp_add_n_sub_n_(bp1, bm1, b0, b1, n) >> 1;
71 }
72 } else {
73 if (lmmp_zero_q_(b0 + t, n - t) && lmmp_cmp_(b0, b1, t) < 0) {
74 cy = lmmp_add_n_sub_n_(bp1, bm1, b1, b0, t);
75 lmmp_zero(bm1 + t, n - t);
76 vm1_neg ^= 1;
77 } else {
78 cy = lmmp_add_n_sub_n_(bp1, bm1, b0, b1, t);
79 lmmp_sub_1_(bm1 + t, b0 + t, n - t, cy & 1);
80 }
81 bp1[n] = lmmp_add_1_(bp1 + t, b0 + t, n - t, cy >> 1);
82 }
83
84 // v1=ap1*bp1
85 lmmp_mul_n_(v1, ap1, bp1, n + 1);
86
87 // vm=am1*bm1
88 lmmp_mul_n_(vm1, am1, bm1, n);
89 if (hi)
90 hi = lmmp_add_n_(vm1 + n, vm1 + n, bm1, n);
91 vm1[2 * n] = hi;
92
93 // r0=a0*b0
94 // r3=a2*b1
95 lmmp_mul_n_(r0, a0, b0, n);
96 if (s > t)
97 lmmp_mul_(r3, a2, s, b1, t);
98 else
99 lmmp_mul_(r3, b1, t, a2, s);
100
101 // v1=(v1+vm1)/2, (=a0*b0+a2*b0+a1*b1)
102 // vm1=v1-vm1, (=a1*b0+a0*b1+a2*b1)
103 if (vm1_neg) {
104 lmmp_shr1sub_n_(v1, v1, vm1, 2 * n + 1);
105 lmmp_add_n_(vm1, v1, vm1, 2 * n + 1);
106 } else {
107 lmmp_shr1add_n_(v1, v1, vm1, 2 * n + 1);
108 lmmp_sub_n_(vm1, v1, vm1, 2 * n + 1);
109 }
110
111 // vm1-=r3, (=r1)
112 // v1-=r0, (=r2)
113 lmmp_sub_(vm1, vm1, 2 * n + 1, r3, s + t);
114 v1[2 * n] -= lmmp_sub_n_(v1, v1, r0, 2 * n);
115
116 // r=r0+vm1*B+v1*B^2+r3*B^4
117 cy = vm1[2 * n] + lmmp_add_(r1, vm1, 2 * n, r1, n);
118 lmmp_add_(r2, r2, n + s + t, v1, 2 * n + 1);
119 lmmp_inc_1(r3, cy);
121}
#define lmmp_zero(dst, n)
Definition lmmp.h:366
uint64_t mp_size_t
Definition lmmp.h:212
uint64_t mp_limb_t
Definition lmmp.h:211
#define lmmp_param_assert(x)
Definition lmmp.h:398
static mp_limb_t lmmp_add_(mp_ptr dst, mp_srcptr numa, mp_size_t na, mp_srcptr numb, mp_size_t nb)
大数加法静态内联函数 [dst,na]=[numa,na]+[numb,nb]
Definition lmmpn.h:1058
mp_limb_t lmmp_shr1add_n_(mp_ptr dst, mp_srcptr numa, mp_srcptr numb, mp_size_t n)
加法后右移1位 [dst,n] = ([numa,n] + [numb,n]) >> 1
Definition shr.c:52
static int lmmp_cmp_(mp_srcptr numa, mp_srcptr numb, mp_size_t n)
大数比较函数(内联)
Definition lmmpn.h:1004
static mp_limb_t lmmp_add_1_(mp_ptr dst, mp_srcptr numa, mp_size_t na, mp_limb_t x)
大数加单精度数静态内联函数 [dst,na]=[numa,na]+x
Definition lmmpn.h:1111
void lmmp_mul_(mp_ptr dst, mp_srcptr numa, mp_size_t na, mp_srcptr numb, mp_size_t nb)
不等长大数乘法操作 [dst,na+nb] = [numa,na] * [numb,nb]
void lmmp_mul_n_(mp_ptr dst, mp_srcptr numa, mp_srcptr numb, mp_size_t n)
等长大数乘法操作 [dst,2*n] = [numa,n] * [numb,n]
Definition mul.c:99
static mp_limb_t lmmp_sub_(mp_ptr dst, mp_srcptr numa, mp_size_t na, mp_srcptr numb, mp_size_t nb)
大数减法静态内联函数 [dst,na]=[numa,na]-[numb,nb]
Definition lmmpn.h:1072
mp_limb_t lmmp_add_n_sub_n_(mp_ptr dsta, mp_ptr dstb, mp_srcptr numa, mp_srcptr numb, mp_size_t n)
同时执行n位加法和减法 ([dsta,n],[dstb,n]) = ([numa,n]+[numb,n],[numa,n]-[numb,n])
Definition add_n_sub_n.c:10
static mp_limb_t lmmp_sub_1_(mp_ptr dst, mp_srcptr numa, mp_size_t na, mp_limb_t x)
大数减单精度数静态内联函数 [dst,na]=[numa,na]-x
Definition lmmpn.h:1122
mp_limb_t lmmp_sub_n_(mp_ptr dst, mp_srcptr numa, mp_srcptr numb, mp_size_t n)
无借位的n位减法 [dst,n] = [numa,n] - [numb,n]
Definition sub_n.c:70
mp_limb_t lmmp_shr1sub_n_(mp_ptr dst, mp_srcptr numa, mp_srcptr numb, mp_size_t n)
减法后右移1位 [dst,n] = ([numa,n] - [numb,n]) >> 1
Definition shr.c:106
#define lmmp_inc_1(p, inc)
大数加指定值宏(预期无进位)
Definition lmmpn.h:958
mp_limb_t lmmp_add_n_(mp_ptr dst, mp_srcptr numa, mp_srcptr numb, mp_size_t n)
无进位的n位加法 [dst,n] = [numa,n] + [numb,n]
Definition add_n.c:71
static int lmmp_zero_q_(mp_srcptr p, mp_size_t n)
大数判零函数(内联)
Definition lmmpn.h:1027
#define r2
#define b0
#define b1
#define am1
#define ap1
#define bp1
#define vm1
#define r1
#define bm1
#define a2
#define a0
#define a1
#define v1
#define r3
#define r0
#define tp
#define SALLOC_TYPE(n, type)
Definition tmp_alloc.h:87
#define TEMP_S_DECL
Definition tmp_alloc.h:76
#define TEMP_S_FREE
Definition tmp_alloc.h:105

引用了 a0, a1, a2, am1, ap1, b0, b1, bm1, bp1, lmmp_add_(), lmmp_add_1_(), lmmp_add_n_(), lmmp_add_n_sub_n_(), lmmp_cmp_(), lmmp_inc_1, lmmp_mul_(), lmmp_mul_n_(), lmmp_param_assert, lmmp_shr1add_n_(), lmmp_shr1sub_n_(), lmmp_sub_(), lmmp_sub_1_(), lmmp_sub_n_(), lmmp_zero, lmmp_zero_q_(), r0, r1, r2, r3, SALLOC_TYPE, TEMP_S_DECL, TEMP_S_FREE, tp, v1 , 以及 vm1.

+ 函数调用图: