LAMMP 4.1.0
Lamina High-Precision Arithmetic Library
载入中...
搜索中...
未找到
mul_toom_interp6.c
浏览该文件的文档.
1/*
2 * LAMMP - Copyright (C) 2025-2026 HJimmyK(Jericho Knox)
3 * This file is part of lammp, under the GNU LGPL v2 license.
4 * See LICENSE in the project root for the full license text.
5 */
6
7#include "../../include/lammp/impl/divexact.h"
8#include "../../include/lammp/impl/toom_interp.h"
9
10/*
11 syms x integer
12 a = 2*x^3 + 32*x^2 + 4*x + 5;
13 b = 6*x^2 + 712*x + 18;
14 c_true = a*b;
15
16 w0 = 2*6;
17 w1 = subs(a,2)*subs(b,2);
18 w2 = subs(a,-2)*subs(b,-2);
19 w3 = subs(a,1)*subs(b,1);
20 w4 = subs(a,-1)*subs(b,-1);
21 w5 = subs(a,0)*subs(b,0);
22
23 w2 = (w1 - w2)/4;
24 w1 = (w1 - w5)/2;
25 w1 = (w1 - w2)/2;
26 w4 = (w3 - w4)/2;
27 w2 = (w2 - w4)/3;
28 w3 = w3 - w4 - w5;
29 w1 = (w1 - w3)/3;
30 w2 = w2 - w0*4;
31 w4 = w4 - w2;
32 w3 = w3 - w1;
33 w2 = w2 - w0;
34
35 c0=w5; c1=w4; c2=w3; c3=w2; c4=w1; c5=w0;
36 c_calc = c5*x^5 + c4*x^4 + c3*x^3 + c2*x^2 + c1*x + c0;
37*/
38
40 mp_ptr dst,
41 mp_size_t n,
42 enum toom6_flags flags,
43 mp_ptr w4,
44 mp_ptr w2,
45 mp_ptr w1,
46 mp_size_t w0n
47) {
48
49 lmmp_param_assert(n > 0);
50 lmmp_param_assert(2 * n >= w0n && w0n > 0);
51 mp_limb_t cy;
52 /* cy6 can be stored in w1[2*n], cy4 in w4[0], embankment in w2[0] */
53 mp_limb_t cy4, cy6, embankment;
54
55#define w5 dst /* 2n */
56#define w3 (dst + 2 * n) /* 2n+1 */
57#define w0 (dst + 5 * n) /* w0n */
58
59 /* W2 =(W1 - W2)>>2 */
60 if (flags & toom6_vm2_neg)
61 lmmp_add_n_(w2, w1, w2, 2 * n + 1);
62 else
63 lmmp_sub_n_(w2, w1, w2, 2 * n + 1);
64 lmmp_shr_(w2, w2, 2 * n + 1, 2);
65
66 /* W1 =(W1 - W5)>>1 */
67 w1[2 * n] -= lmmp_sub_n_(w1, w1, w5, 2 * n);
68 lmmp_shr_(w1, w1, 2 * n + 1, 1);
69
70 /* W1 =(W1 - W2)>>1 */
71 lmmp_shr1sub_n_(w1, w1, w2, 2 * n + 1);
72
73 /* W4 =(W3 - W4)>>1 */
74 if (flags & toom6_vm1_neg) {
75 lmmp_shr1add_n_(w4, w3, w4, 2 * n + 1);
76 } else {
77 lmmp_shr1sub_n_(w4, w3, w4, 2 * n + 1);
78 }
79
80 /* W2 =(W2 - W4)/3 */
81 lmmp_sub_n_(w2, w2, w4, 2 * n + 1);
82 lmmp_divexact_by3_(w2, w2, 2 * n + 1);
83
84 /* W3 = W3 - W4 - W5 */
85 lmmp_sub_n_(w3, w3, w4, 2 * n + 1);
86 w3[2 * n] -= lmmp_sub_n_(w3, w3, w5, 2 * n);
87
88 /* W1 =(W1 - W3)/3 */
89 lmmp_sub_n_(w1, w1, w3, 2 * n + 1);
90 lmmp_divexact_by3_(w1, w1, 2 * n + 1);
91
92 cy = lmmp_add_n_(dst + n, dst + n, w4, 2 * n + 1);
93 lmmp_inc_1(dst + 3 * n + 1, cy);
94
95 /* W2 -= W0<<2 */
96 /* {W4,2*n+1} is now free and can be overwritten. */
97 cy = lmmp_shl_(w4, w0, w0n, 2);
98 cy += lmmp_sub_n_(w2, w2, w4, w0n);
99
100 lmmp_dec_1(w2 + w0n, cy);
101
102 /* W4L = W4L - W2L */
103 cy = lmmp_sub_n_(dst + n, dst + n, w2, n);
104 lmmp_dec_1(w3, cy);
105
106 /* W3H = W3H + W2L */
107 cy4 = w3[2 * n] + lmmp_add_n_(dst + 3 * n, dst + 3 * n, w2, n);
108 /* W1L + W2H */
109 cy = w2[2 * n] + lmmp_add_n_(dst + 4 * n, w1, w2 + n, n);
110 lmmp_inc_1(w1 + n, cy);
111
112 /* W0 = W0 + W1H */
113 if (w0n > n)
114 cy6 = w1[2 * n] + lmmp_add_n_(w0, w0, w1 + n, n);
115 else
116 cy6 = lmmp_add_n_(w0, w0, w1 + n, w0n);
117
118 cy = lmmp_sub_n_(dst + 2 * n, dst + 2 * n, dst + 4 * n, n + w0n);
119
120 /* embankment is a "dirty trick" to avoid carry/borrow propagation
121 beyond allocated memory */
122 embankment = w0[w0n - 1] - 1;
123 w0[w0n - 1] = 1;
124 if (w0n > n) {
125 if (cy4 > cy6)
126 lmmp_inc_1(dst + 4 * n, cy4 - cy6);
127 else
128 lmmp_dec_1(dst + 4 * n, cy6 - cy4);
129 lmmp_dec_1(dst + 3 * n + w0n, cy);
130 lmmp_inc_1(w0 + n, cy6);
131 } else {
132 lmmp_inc_1(dst + 4 * n, cy4);
133 lmmp_dec_1(dst + 3 * n + w0n, cy + cy6);
134 }
135 w0[w0n - 1] += embankment;
136
137#undef w5
138#undef w3
139#undef w0
140}
static void lmmp_divexact_by3_(mp_ptr dst, mp_srcptr numa, mp_size_t na)
精确除以3([dst,na] = [numa,na] / 3)
Definition divexact.h:33
mp_limb_t * mp_ptr
Definition lmmp.h:215
uint64_t mp_size_t
Definition lmmp.h:212
uint64_t mp_limb_t
Definition lmmp.h:211
#define lmmp_param_assert(x)
Definition lmmp.h:398
mp_limb_t lmmp_shr1add_n_(mp_ptr dst, mp_srcptr numa, mp_srcptr numb, mp_size_t n)
加法后右移1位 [dst,n] = ([numa,n] + [numb,n]) >> 1
Definition shr.c:52
mp_limb_t lmmp_shr_(mp_ptr dst, mp_srcptr numa, mp_size_t na, mp_size_t shr)
大数右移操作 [dst,na] = [numa,na] >> shr,dst的高shr位填充0
Definition shr.c:9
mp_limb_t lmmp_shl_(mp_ptr dst, mp_srcptr numa, mp_size_t na, mp_size_t shl)
大数左移操作 [dst,na] = [numa,na]<<shl,dst的低shl位填充0
Definition shl.c:9
#define lmmp_dec_1(p, dec)
大数减指定值宏(预期无借位)
Definition lmmpn.h:985
mp_limb_t lmmp_sub_n_(mp_ptr dst, mp_srcptr numa, mp_srcptr numb, mp_size_t n)
无借位的n位减法 [dst,n] = [numa,n] - [numb,n]
Definition sub_n.c:70
mp_limb_t lmmp_shr1sub_n_(mp_ptr dst, mp_srcptr numa, mp_srcptr numb, mp_size_t n)
减法后右移1位 [dst,n] = ([numa,n] - [numb,n]) >> 1
Definition shr.c:106
#define lmmp_inc_1(p, inc)
大数加指定值宏(预期无进位)
Definition lmmpn.h:958
mp_limb_t lmmp_add_n_(mp_ptr dst, mp_srcptr numa, mp_srcptr numb, mp_size_t n)
无进位的n位加法 [dst,n] = [numa,n] + [numb,n]
Definition add_n.c:71
void lmmp_toom_interp6_(mp_ptr dst, mp_size_t n, enum toom6_flags flags, mp_ptr w4, mp_ptr w2, mp_ptr w1, mp_size_t w0n)
Toom插值计算(6点插值):用于Toom-43和Toom-52 乘法算法
#define w3
#define w0
#define w5
#define w2
toom6_flags
Definition toom_interp.h:25
@ toom6_vm2_neg
Definition toom_interp.h:25
@ toom6_vm1_neg
Definition toom_interp.h:25