LAMMP 4.1.0
Lamina High-Precision Arithmetic Library
载入中...
搜索中...
未找到
mul_1.c
浏览该文件的文档.
1/*
2 * LAMMP - Copyright (C) 2025-2026 HJimmyK(Jericho Knox)
3 * This file is part of lammp, under the GNU LGPL v2 license.
4 * See LICENSE in the project root for the full license text.
5 */
6
7#include "../../../include/lammp/impl/longlong.h"
8#include "../../../include/lammp/lmmpn.h"
9
10mp_limb_t lmmp_mul_1_(mp_ptr restrict dst, mp_srcptr restrict numa, mp_size_t na, mp_limb_t x) {
11 mp_limb_t cl = 0;
12 mp_size_t i = 0;
13
14 mp_limb_t ul0, ul1, ul2, ul3;
15 mp_limb_t lpl0, hpl0, lpl1, hpl1;
16 mp_limb_t lpl2, hpl2, lpl3, hpl3;
17
18 if (dst == numa) {
19 for (; i + 4 <= na; i += 4) {
20 ul0 = dst[i + 0];
21 ul1 = dst[i + 1];
22 ul2 = dst[i + 2];
23 ul3 = dst[i + 3];
24
25 _umul64to128_(ul0, x, &lpl0, &hpl0);
26 lpl0 += cl;
27 cl = (lpl0 < cl) + hpl0;
28
29 _umul64to128_(ul1, x, &lpl1, &hpl1);
30 lpl1 += cl;
31 cl = (lpl1 < cl) + hpl1;
32
33 _umul64to128_(ul2, x, &lpl2, &hpl2);
34 lpl2 += cl;
35 cl = (lpl2 < cl) + hpl2;
36
37 _umul64to128_(ul3, x, &lpl3, &hpl3);
38 lpl3 += cl;
39 cl = (lpl3 < cl) + hpl3;
40
41 dst[i + 0] = lpl0;
42 dst[i + 1] = lpl1;
43 dst[i + 2] = lpl2;
44 dst[i + 3] = lpl3;
45 }
46
47 for (; i < na; i++) {
48 ul0 = dst[i];
49 _umul64to128_(ul0, x, &lpl0, &hpl0);
50 lpl0 += cl;
51 cl = (lpl0 < cl) + hpl0;
52 dst[i] = lpl0;
53 }
54 }
55
56 else {
57 for (; i + 4 <= na; i += 4) {
58 ul0 = numa[i + 0];
59 ul1 = numa[i + 1];
60 ul2 = numa[i + 2];
61 ul3 = numa[i + 3];
62
63 _umul64to128_(ul0, x, &lpl0, &hpl0);
64 lpl0 += cl;
65 cl = (lpl0 < cl) + hpl0;
66
67 _umul64to128_(ul1, x, &lpl1, &hpl1);
68 lpl1 += cl;
69 cl = (lpl1 < cl) + hpl1;
70
71 _umul64to128_(ul2, x, &lpl2, &hpl2);
72 lpl2 += cl;
73 cl = (lpl2 < cl) + hpl2;
74
75 _umul64to128_(ul3, x, &lpl3, &hpl3);
76 lpl3 += cl;
77 cl = (lpl3 < cl) + hpl3;
78
79 dst[i + 0] = lpl0;
80 dst[i + 1] = lpl1;
81 dst[i + 2] = lpl2;
82 dst[i + 3] = lpl3;
83 }
84
85 for (; i < na; i++) {
86 ul0 = numa[i];
87 _umul64to128_(ul0, x, &lpl0, &hpl0);
88 lpl0 += cl;
89 cl = (lpl0 < cl) + hpl0;
90 dst[i] = lpl0;
91 }
92 }
93
94 return cl;
95}
96
97mp_limb_t lmmp_addmul_1_(mp_ptr restrict numa, mp_srcptr restrict numb, mp_size_t n, mp_limb_t b) {
98 mp_limb_t cl = 0;
99 mp_size_t i = 0;
100
101 mp_limb_t ul0, ul1, ul2, ul3;
102 mp_limb_t lpl0, hpl0, rl0;
103 mp_limb_t lpl1, hpl1, rl1;
104 mp_limb_t lpl2, hpl2, rl2;
105 mp_limb_t lpl3, hpl3, rl3;
106
107 if (numa == numb) {
108 for (; i + 4 <= n; i += 4) {
109 ul0 = numa[i + 0];
110 ul1 = numa[i + 1];
111 ul2 = numa[i + 2];
112 ul3 = numa[i + 3];
113
114 _umul64to128_(ul0, b, &lpl0, &hpl0);
115 lpl0 += cl;
116 cl = (lpl0 < cl) + hpl0;
117 lpl0 = ul0 + lpl0;
118 cl += (lpl0 < ul0);
119
120 _umul64to128_(ul1, b, &lpl1, &hpl1);
121 lpl1 += cl;
122 cl = (lpl1 < cl) + hpl1;
123 lpl1 = ul1 + lpl1;
124 cl += (lpl1 < ul1);
125
126 _umul64to128_(ul2, b, &lpl2, &hpl2);
127 lpl2 += cl;
128 cl = (lpl2 < cl) + hpl2;
129 lpl2 = ul2 + lpl2;
130 cl += (lpl2 < ul2);
131
132 _umul64to128_(ul3, b, &lpl3, &hpl3);
133 lpl3 += cl;
134 cl = (lpl3 < cl) + hpl3;
135 lpl3 = ul3 + lpl3;
136 cl += (lpl3 < ul3);
137
138 numa[i + 0] = lpl0;
139 numa[i + 1] = lpl1;
140 numa[i + 2] = lpl2;
141 numa[i + 3] = lpl3;
142 }
143 for (; i < n; i++) {
144 ul0 = numa[i];
145 _umul64to128_(ul0, b, &lpl0, &hpl0);
146 lpl0 += cl;
147 cl = (lpl0 < cl) + hpl0;
148 lpl0 = ul0 + lpl0;
149 cl += (lpl0 < ul0);
150 numa[i] = lpl0;
151 }
152 } else {
153 for (; i + 4 <= n; i += 4) {
154 ul0 = numb[i + 0];
155 ul1 = numb[i + 1];
156 ul2 = numb[i + 2];
157 ul3 = numb[i + 3];
158 rl0 = numa[i + 0];
159 rl1 = numa[i + 1];
160 rl2 = numa[i + 2];
161 rl3 = numa[i + 3];
162
163 _umul64to128_(ul0, b, &lpl0, &hpl0);
164 lpl0 += cl;
165 cl = (lpl0 < cl) + hpl0;
166 lpl0 = rl0 + lpl0;
167 cl += (lpl0 < rl0);
168
169 _umul64to128_(ul1, b, &lpl1, &hpl1);
170 lpl1 += cl;
171 cl = (lpl1 < cl) + hpl1;
172 lpl1 = rl1 + lpl1;
173 cl += (lpl1 < rl1);
174
175 _umul64to128_(ul2, b, &lpl2, &hpl2);
176 lpl2 += cl;
177 cl = (lpl2 < cl) + hpl2;
178 lpl2 = rl2 + lpl2;
179 cl += (lpl2 < rl2);
180
181 _umul64to128_(ul3, b, &lpl3, &hpl3);
182 lpl3 += cl;
183 cl = (lpl3 < cl) + hpl3;
184 lpl3 = rl3 + lpl3;
185 cl += (lpl3 < rl3);
186
187 numa[i + 0] = lpl0;
188 numa[i + 1] = lpl1;
189 numa[i + 2] = lpl2;
190 numa[i + 3] = lpl3;
191 }
192 for (; i < n; i++) {
193 ul0 = numb[i];
194 rl0 = numa[i];
195 _umul64to128_(ul0, b, &lpl0, &hpl0);
196 lpl0 += cl;
197 cl = (lpl0 < cl) + hpl0;
198 lpl0 = rl0 + lpl0;
199 cl += (lpl0 < rl0);
200 numa[i] = lpl0;
201 }
202 }
203 return cl;
204}
205
206mp_limb_t lmmp_submul_1_(mp_ptr restrict numa, mp_srcptr restrict numb, mp_size_t n, mp_limb_t b) {
207 mp_limb_t cl = 0;
208 mp_size_t i = 0;
209
210 mp_limb_t ul0, ul1, ul2, ul3;
211 mp_limb_t lpl0, hpl0, rl0;
212 mp_limb_t lpl1, hpl1, rl1;
213 mp_limb_t lpl2, hpl2, rl2;
214 mp_limb_t lpl3, hpl3, rl3;
215
216 if (numa == numb) {
217 for (; i + 4 <= n; i += 4) {
218 ul0 = numa[i + 0];
219 ul1 = numa[i + 1];
220 ul2 = numa[i + 2];
221 ul3 = numa[i + 3];
222
223 _umul64to128_(ul0, b, &lpl0, &hpl0);
224 lpl0 += cl;
225 cl = (lpl0 < cl) + hpl0;
226 lpl0 = ul0 - lpl0;
227 cl += (lpl0 > ul0);
228
229 _umul64to128_(ul1, b, &lpl1, &hpl1);
230 lpl1 += cl;
231 cl = (lpl1 < cl) + hpl1;
232 lpl1 = ul1 - lpl1;
233 cl += (lpl1 > ul1);
234
235 _umul64to128_(ul2, b, &lpl2, &hpl2);
236 lpl2 += cl;
237 cl = (lpl2 < cl) + hpl2;
238 lpl2 = ul2 - lpl2;
239 cl += (lpl2 > ul2);
240
241 _umul64to128_(ul3, b, &lpl3, &hpl3);
242 lpl3 += cl;
243 cl = (lpl3 < cl) + hpl3;
244 lpl3 = ul3 - lpl3;
245 cl += (lpl3 > ul3);
246
247 numa[i + 0] = lpl0;
248 numa[i + 1] = lpl1;
249 numa[i + 2] = lpl2;
250 numa[i + 3] = lpl3;
251 }
252 for (; i < n; i++) {
253 ul0 = numa[i];
254 _umul64to128_(ul0, b, &lpl0, &hpl0);
255 lpl0 += cl;
256 cl = (lpl0 < cl) + hpl0;
257 lpl0 = ul0 - lpl0;
258 cl += (lpl0 > ul0);
259 numa[i] = lpl0;
260 }
261 } else {
262 for (; i + 4 <= n; i += 4) {
263 ul0 = numb[i + 0];
264 ul1 = numb[i + 1];
265 ul2 = numb[i + 2];
266 ul3 = numb[i + 3];
267 rl0 = numa[i + 0];
268 rl1 = numa[i + 1];
269 rl2 = numa[i + 2];
270 rl3 = numa[i + 3];
271
272 _umul64to128_(ul0, b, &lpl0, &hpl0);
273 lpl0 += cl;
274 cl = (lpl0 < cl) + hpl0;
275 lpl0 = rl0 - lpl0;
276 cl += (lpl0 > rl0);
277
278 _umul64to128_(ul1, b, &lpl1, &hpl1);
279 lpl1 += cl;
280 cl = (lpl1 < cl) + hpl1;
281 lpl1 = rl1 - lpl1;
282 cl += (lpl1 > rl1);
283
284 _umul64to128_(ul2, b, &lpl2, &hpl2);
285 lpl2 += cl;
286 cl = (lpl2 < cl) + hpl2;
287 lpl2 = rl2 - lpl2;
288 cl += (lpl2 > rl2);
289
290 _umul64to128_(ul3, b, &lpl3, &hpl3);
291 lpl3 += cl;
292 cl = (lpl3 < cl) + hpl3;
293 lpl3 = rl3 - lpl3;
294 cl += (lpl3 > rl3);
295
296 numa[i + 0] = lpl0;
297 numa[i + 1] = lpl1;
298 numa[i + 2] = lpl2;
299 numa[i + 3] = lpl3;
300 }
301 for (; i < n; i++) {
302 ul0 = numb[i];
303 rl0 = numa[i];
304 _umul64to128_(ul0, b, &lpl0, &hpl0);
305 lpl0 += cl;
306 cl = (lpl0 < cl) + hpl0;
307 lpl0 = rl0 - lpl0;
308 cl += (lpl0 > rl0);
309 numa[i] = lpl0;
310 }
311 }
312 return cl;
313}
mp_limb_t * mp_ptr
Definition lmmp.h:215
uint64_t mp_size_t
Definition lmmp.h:212
const mp_limb_t * mp_srcptr
Definition lmmp.h:216
uint64_t mp_limb_t
Definition lmmp.h:211
static void _umul64to128_(uint64_t a, uint64_t b, uint64_t *low, uint64_t *high)
Definition longlong.h:31
mp_limb_t lmmp_addmul_1_(mp_ptr restrict numa, mp_srcptr restrict numb, mp_size_t n, mp_limb_t b)
Definition mul_1.c:97
mp_limb_t lmmp_mul_1_(mp_ptr restrict dst, mp_srcptr restrict numa, mp_size_t na, mp_limb_t x)
Definition mul_1.c:10
mp_limb_t lmmp_submul_1_(mp_ptr restrict numa, mp_srcptr restrict numb, mp_size_t n, mp_limb_t b)
Definition mul_1.c:206