LAMMP 4.1.0
Lamina High-Precision Arithmetic Library
载入中...
搜索中...
未找到
mul_toom_interp7.c
浏览该文件的文档.
1/*
2 * LAMMP - Copyright (C) 2025-2026 HJimmyK(Jericho Knox)
3 * This file is part of lammp, under the GNU LGPL v2 license.
4 * See LICENSE in the project root for the full license text.
5 */
6
7#include "../../include/lammp/impl/toom_interp.h"
8#include "../../include/lammp/impl/divexact.h"
9
10/*
11 syms x integer
12 a = 12*x^3 + 22*x^2 + 32*x + 42;
13 b = 56*x^3 + 62*x^2 + 7*x + 82;
14 c_true = a*b;
15
16 w0 = subs(a,0)*subs(b,0);
17 w1 = subs(a,-2)*subs(b,-2);
18 w2 = subs(a,1)*subs(b,1);
19 w3 = subs(a,-1)*subs(b,-1);
20 w4 = subs(a,2)*subs(b,2);
21 w5 = 64*subs(a,1/2)*subs(b,1/2);
22 w6 = 12*56;
23
24 w5 = w5 + w4;
25 w1 = (w4 - w1)/2;
26 w4 = w4 - w0;
27 w4 = (w4 - w1)/4 - w6*16;
28 w3 = (w2 - w3)/2;
29 w2 = w2 - w3;
30 w5 = w5 - 65*w2;
31 w2 = w2 - w6 - w0;
32 w5 = (w5 + 45*w2)/2;
33 w4 = (w4 - w2)/3;
34 w2 = w2 - w4;
35 w1 = w5 - w1;
36 w5 = (w5 - 8*w3)/9;
37 w3 = w3 - w5;
38 w1 = (w1/15 + w5)/2;
39 w5 = w5 - w1;
40
41 c0 = w0; c1 = w1; c2 = w2; c3 = w3; c4 = w4; c5 = w5; c6 = w6;
42 c_calc = c6*x^6 + c5*x^5 + c4*x^4 + c3*x^3 + c2*x^2 + c1*x + c0;
43*/
44
46 mp_ptr dst,
47 mp_size_t n,
48 enum toom7_flags flags,
49 mp_ptr w1,
50 mp_ptr w3,
51 mp_ptr w4,
52 mp_ptr w5,
53 mp_size_t w6n,
55) {
56 lmmp_param_assert(w6n > 0);
57 lmmp_param_assert(w6n <= 2 * n);
58 mp_size_t m;
59 mp_limb_t cy;
60
61 m = 2 * n + 1;
62#define w0 dst
63#define w2 (dst + 2 * n)
64#define w6 (dst + 6 * n)
65
66 lmmp_add_n_(w5, w5, w4, m);
67 if (flags & toom7_w1_neg) {
68 lmmp_shr1add_n_(w1, w1, w4, m);
69 } else {
70 lmmp_shr1sub_n_(w1, w4, w1, m);
71 }
72 lmmp_sub_(w4, w4, m, w0, 2 * n);
73 lmmp_sub_n_(w4, w4, w1, m);
74
75 lmmp_debug_assert(!(w4[0] & 3));
76
77 lmmp_shr_(w4, w4, m, 2); /* w4>=0 */
78
79 tp[w6n] = lmmp_shl_(tp, w6, w6n, 4);
80 lmmp_sub_(w4, w4, m, tp, w6n + 1);
81
82 if (flags & toom7_w3_neg) {
83 lmmp_shr1add_n_(w3, w3, w2, m);
84 } else {
85 lmmp_shr1sub_n_(w3, w2, w3, m);
86 }
87
88 lmmp_sub_n_(w2, w2, w3, m);
89
90 lmmp_submul_1_(w5, w2, m, 65);
91 lmmp_sub_(w2, w2, m, w6, w6n);
92 lmmp_sub_(w2, w2, m, w0, 2 * n);
93
94 lmmp_addmul_1_(w5, w2, m, 45);
95 lmmp_debug_assert(!(w5[0] & 1));
96 lmmp_shr_(w5, w5, m, 1);
97 lmmp_sub_n_(w4, w4, w2, m);
98
99 lmmp_divexact_by3_(w4, w4, m);
100 lmmp_sub_n_(w2, w2, w4, m);
101
102 lmmp_sub_n_(w1, w5, w1, m);
103 lmmp_shl_(tp, w3, m, 3);
104 lmmp_sub_n_(w5, w5, tp, m);
106 lmmp_sub_n_(w3, w3, w5, m);
107
108 lmmp_divexact_by15_(w1, w1, m);
109 lmmp_shr1add_n_(w1, w1, w5, m);
110 w1[m - 1] &= LIMB_MAX >> 1;
111
112 lmmp_sub_n_(w5, w5, w1, m);
113
114 /* These bounds are valid for the 4x4 polynomial product of toom44,
115 * and they are conservative for toom53 and toom62. */
116 lmmp_debug_assert(w1[2 * n] < 2);
117 lmmp_debug_assert(w2[2 * n] < 3);
118 lmmp_debug_assert(w3[2 * n] < 4);
119 lmmp_debug_assert(w4[2 * n] < 3);
120 lmmp_debug_assert(w5[2 * n] < 2);
121
122 cy = lmmp_add_n_(dst + n, dst + n, w1, m);
123 lmmp_inc_1(w2 + n + 1, cy);
124 cy = lmmp_add_n_(dst + 3 * n, dst + 3 * n, w3, n);
125 lmmp_inc_1(w3 + n, w2[2 * n] + cy);
126 cy = lmmp_add_n_(dst + 4 * n, w3 + n, w4, n);
127 lmmp_inc_1(w4 + n, w3[2 * n] + cy);
128 cy = lmmp_add_n_(dst + 5 * n, w4 + n, w5, n);
129 lmmp_inc_1(w5 + n, w4[2 * n] + cy);
130 if (w6n > n + 1) {
131 cy = lmmp_add_n_(dst + 6 * n, dst + 6 * n, w5 + n, n + 1);
132 lmmp_inc_1(dst + 7 * n + 1, cy);
133 } else {
134 lmmp_assert(lmmp_add_n_(dst + 6 * n, dst + 6 * n, w5 + n, w6n));
135 }
136}
static void lmmp_divexact_by15_(mp_ptr dst, mp_srcptr numa, mp_size_t na)
精确除以15([dst,na] = [numa,na] / 15)
Definition divexact.h:85
static void lmmp_divexact_by9_(mp_ptr dst, mp_srcptr numa, mp_size_t na)
精确除以9([dst,na] = [numa,na] / 9)
Definition divexact.h:59
static void lmmp_divexact_by3_(mp_ptr dst, mp_srcptr numa, mp_size_t na)
精确除以3([dst,na] = [numa,na] / 3)
Definition divexact.h:33
mp_limb_t * mp_ptr
Definition lmmp.h:215
uint64_t mp_size_t
Definition lmmp.h:212
#define lmmp_debug_assert(x)
Definition lmmp.h:387
#define LIMB_MAX
Definition lmmp.h:224
uint64_t mp_limb_t
Definition lmmp.h:211
#define lmmp_assert(x)
Definition lmmp.h:370
#define lmmp_param_assert(x)
Definition lmmp.h:398
mp_limb_t lmmp_shr1add_n_(mp_ptr dst, mp_srcptr numa, mp_srcptr numb, mp_size_t n)
加法后右移1位 [dst,n] = ([numa,n] + [numb,n]) >> 1
Definition shr.c:52
mp_limb_t lmmp_shr_(mp_ptr dst, mp_srcptr numa, mp_size_t na, mp_size_t shr)
大数右移操作 [dst,na] = [numa,na] >> shr,dst的高shr位填充0
Definition shr.c:9
mp_limb_t lmmp_shl_(mp_ptr dst, mp_srcptr numa, mp_size_t na, mp_size_t shl)
大数左移操作 [dst,na] = [numa,na]<<shl,dst的低shl位填充0
Definition shl.c:9
static mp_limb_t lmmp_sub_(mp_ptr dst, mp_srcptr numa, mp_size_t na, mp_srcptr numb, mp_size_t nb)
大数减法静态内联函数 [dst,na]=[numa,na]-[numb,nb]
Definition lmmpn.h:1072
mp_limb_t lmmp_addmul_1_(mp_ptr numa, mp_srcptr numb, mp_size_t n, mp_limb_t b)
大数乘以单limb并累加操作 [numa,n] += [numb,n] * b
mp_limb_t lmmp_submul_1_(mp_ptr numa, mp_srcptr numb, mp_size_t n, mp_limb_t b)
大数乘以单limb并累减操作 [numa,n] -= [numb,n] * b
mp_limb_t lmmp_sub_n_(mp_ptr dst, mp_srcptr numa, mp_srcptr numb, mp_size_t n)
无借位的n位减法 [dst,n] = [numa,n] - [numb,n]
Definition sub_n.c:70
mp_limb_t lmmp_shr1sub_n_(mp_ptr dst, mp_srcptr numa, mp_srcptr numb, mp_size_t n)
减法后右移1位 [dst,n] = ([numa,n] - [numb,n]) >> 1
Definition shr.c:106
#define lmmp_inc_1(p, inc)
大数加指定值宏(预期无进位)
Definition lmmpn.h:958
mp_limb_t lmmp_add_n_(mp_ptr dst, mp_srcptr numa, mp_srcptr numb, mp_size_t n)
无进位的n位加法 [dst,n] = [numa,n] + [numb,n]
Definition add_n.c:71
#define tp
#define w3
#define w5
#define w2
#define w0
void lmmp_toom_interp7_(mp_ptr dst, mp_size_t n, enum toom7_flags flags, mp_ptr w1, mp_ptr w3, mp_ptr w4, mp_ptr w5, mp_size_t w6n, mp_ptr tp)
Toom插值计算(7点插值):用于Toom-44、Toom-53、Toom-62 乘法算法
#define w6
toom7_flags
Definition toom_interp.h:27
@ toom7_w1_neg
Definition toom_interp.h:27
@ toom7_w3_neg
Definition toom_interp.h:27