LAMMP 4.1.0
Lamina High-Precision Arithmetic Library
载入中...
搜索中...
未找到
mul_toom32.c
浏览该文件的文档.
1/*
2 * LAMMP - Copyright (C) 2025-2026 HJimmyK(Jericho Knox)
3 * This file is part of lammp, under the GNU LGPL v2 license.
4 * See LICENSE in the project root for the full license text.
5 */
6
7#include "../../include/lammp/impl/tmp_alloc.h"
8#include "../../include/lammp/lmmpn.h"
9
10/*
11Evaluate in: -1, 0, +1, +inf
12
13<-s-><--n--><--n-->
14|a2-|---a1-|---a0-|
15 |-b1-|---b0-|
16 <-t--><--n-->
17
18v0 = a0 * b0 # A(0)*B(0)
19v1 = (a0+a1+a2)*(b0+b1) # A(1)*B(1) ah <= 2 bh <= 1
20vm1 = (a0-a1+a2)*(b0-b1) # A(-1)*B(-1) |ah| <= 1 bh = 0
21vinf= a2 * b1 # A(inf)*B(inf)
22*/
23
24void lmmp_mul_toom32_(mp_ptr restrict dst, mp_srcptr restrict numa, mp_size_t na, mp_srcptr restrict numb, mp_size_t nb) {
25 lmmp_param_assert(nb >= 12);
26 lmmp_param_assert(4 * na >= 5 * nb);
27 lmmp_param_assert(5 * na <= 9 * nb);
29 mp_size_t n = 1 + (2 * na >= 3 * nb ? (na - 1) / 3 : (nb - 1) >> 1), s = na - 2 * n, t = nb - n;
30 int vm1_neg;
31 mp_limb_t cy, hi;
32 mp_limb_t* restrict tp = SALLOC_TYPE(4 * n + 2, mp_limb_t);
33
34#define a0 numa
35#define a1 (numa + n)
36#define a2 (numa + 2 * n)
37#define b0 numb
38#define b1 (numb + n)
39 // nb>=12, so that s+t>=n+2
40#define bm1 (dst) //[dst,n]
41#define bp1 (dst + n) //[dst+n,n+1]
42#define ap1 (dst + 2 * n + 1) //[dst+2*n+1,n+1]
43#define am1 (dst + 3 * n + 2) //[dst+3*n+2,n]:hi
44#define v1 (tp) //[tp,2*n+1]
45#define vm1 (tp + 2 * n + 1) //[tp+2*n+1,2*n+1]
46#define r0 (dst)
47#define r1 (dst + n)
48#define r2 (dst + 2 * n)
49#define r3 (dst + 3 * n)
50
51 // ap1=a0+a1+a3, am1=a0-a1+a3
52 ap1[n] = lmmp_add_(ap1, a0, n, a2, s);
53 if (ap1[n] == 0 && lmmp_cmp_(ap1, a1, n) < 0) {
54 ap1[n] = lmmp_add_n_sub_n_(ap1, am1, a1, ap1, n) >> 1;
55 hi = 0;
56 vm1_neg = 1;
57 } else {
58 cy = lmmp_add_n_sub_n_(ap1, am1, ap1, a1, n);
59 hi = ap1[n] - (cy & 1);
60 ap1[n] += (cy >> 1);
61 vm1_neg = 0;
62 }
63
64 // bp1=b0+b1, bm1=b0-b1
65 if (t == n) {
66 if (lmmp_cmp_(b0, b1, n) < 0) {
67 bp1[n] = lmmp_add_n_sub_n_(bp1, bm1, b1, b0, n) >> 1;
68 vm1_neg ^= 1;
69 } else {
70 bp1[n] = lmmp_add_n_sub_n_(bp1, bm1, b0, b1, n) >> 1;
71 }
72 } else {
73 if (lmmp_zero_q_(b0 + t, n - t) && lmmp_cmp_(b0, b1, t) < 0) {
74 cy = lmmp_add_n_sub_n_(bp1, bm1, b1, b0, t);
75 lmmp_zero(bm1 + t, n - t);
76 vm1_neg ^= 1;
77 } else {
78 cy = lmmp_add_n_sub_n_(bp1, bm1, b0, b1, t);
79 lmmp_sub_1_(bm1 + t, b0 + t, n - t, cy & 1);
80 }
81 bp1[n] = lmmp_add_1_(bp1 + t, b0 + t, n - t, cy >> 1);
82 }
83
84 // v1=ap1*bp1
85 lmmp_mul_n_(v1, ap1, bp1, n + 1);
86
87 // vm=am1*bm1
88 lmmp_mul_n_(vm1, am1, bm1, n);
89 if (hi)
90 hi = lmmp_add_n_(vm1 + n, vm1 + n, bm1, n);
91 vm1[2 * n] = hi;
92
93 // r0=a0*b0
94 // r3=a2*b1
95 lmmp_mul_n_(r0, a0, b0, n);
96 if (s > t)
97 lmmp_mul_(r3, a2, s, b1, t);
98 else
99 lmmp_mul_(r3, b1, t, a2, s);
100
101 // v1=(v1+vm1)/2, (=a0*b0+a2*b0+a1*b1)
102 // vm1=v1-vm1, (=a1*b0+a0*b1+a2*b1)
103 if (vm1_neg) {
104 lmmp_shr1sub_n_(v1, v1, vm1, 2 * n + 1);
105 lmmp_add_n_(vm1, v1, vm1, 2 * n + 1);
106 } else {
107 lmmp_shr1add_n_(v1, v1, vm1, 2 * n + 1);
108 lmmp_sub_n_(vm1, v1, vm1, 2 * n + 1);
109 }
110
111 // vm1-=r3, (=r1)
112 // v1-=r0, (=r2)
113 lmmp_sub_(vm1, vm1, 2 * n + 1, r3, s + t);
114 v1[2 * n] -= lmmp_sub_n_(v1, v1, r0, 2 * n);
115
116 // r=r0+vm1*B+v1*B^2+r3*B^4
117 cy = vm1[2 * n] + lmmp_add_(r1, vm1, 2 * n, r1, n);
118 lmmp_add_(r2, r2, n + s + t, v1, 2 * n + 1);
119 lmmp_inc_1(r3, cy);
121}
mp_limb_t * mp_ptr
Definition lmmp.h:215
#define lmmp_zero(dst, n)
Definition lmmp.h:366
uint64_t mp_size_t
Definition lmmp.h:212
const mp_limb_t * mp_srcptr
Definition lmmp.h:216
uint64_t mp_limb_t
Definition lmmp.h:211
#define lmmp_param_assert(x)
Definition lmmp.h:398
static mp_limb_t lmmp_add_(mp_ptr dst, mp_srcptr numa, mp_size_t na, mp_srcptr numb, mp_size_t nb)
大数加法静态内联函数 [dst,na]=[numa,na]+[numb,nb]
Definition lmmpn.h:1058
mp_limb_t lmmp_shr1add_n_(mp_ptr dst, mp_srcptr numa, mp_srcptr numb, mp_size_t n)
加法后右移1位 [dst,n] = ([numa,n] + [numb,n]) >> 1
Definition shr.c:52
static int lmmp_cmp_(mp_srcptr numa, mp_srcptr numb, mp_size_t n)
大数比较函数(内联)
Definition lmmpn.h:1004
static mp_limb_t lmmp_add_1_(mp_ptr dst, mp_srcptr numa, mp_size_t na, mp_limb_t x)
大数加单精度数静态内联函数 [dst,na]=[numa,na]+x
Definition lmmpn.h:1111
void lmmp_mul_(mp_ptr dst, mp_srcptr numa, mp_size_t na, mp_srcptr numb, mp_size_t nb)
不等长大数乘法操作 [dst,na+nb] = [numa,na] * [numb,nb]
void lmmp_mul_n_(mp_ptr dst, mp_srcptr numa, mp_srcptr numb, mp_size_t n)
等长大数乘法操作 [dst,2*n] = [numa,n] * [numb,n]
Definition mul.c:99
static mp_limb_t lmmp_sub_(mp_ptr dst, mp_srcptr numa, mp_size_t na, mp_srcptr numb, mp_size_t nb)
大数减法静态内联函数 [dst,na]=[numa,na]-[numb,nb]
Definition lmmpn.h:1072
mp_limb_t lmmp_add_n_sub_n_(mp_ptr dsta, mp_ptr dstb, mp_srcptr numa, mp_srcptr numb, mp_size_t n)
同时执行n位加法和减法 ([dsta,n],[dstb,n]) = ([numa,n]+[numb,n],[numa,n]-[numb,n])
Definition add_n_sub_n.c:10
static mp_limb_t lmmp_sub_1_(mp_ptr dst, mp_srcptr numa, mp_size_t na, mp_limb_t x)
大数减单精度数静态内联函数 [dst,na]=[numa,na]-x
Definition lmmpn.h:1122
mp_limb_t lmmp_sub_n_(mp_ptr dst, mp_srcptr numa, mp_srcptr numb, mp_size_t n)
无借位的n位减法 [dst,n] = [numa,n] - [numb,n]
Definition sub_n.c:70
mp_limb_t lmmp_shr1sub_n_(mp_ptr dst, mp_srcptr numa, mp_srcptr numb, mp_size_t n)
减法后右移1位 [dst,n] = ([numa,n] - [numb,n]) >> 1
Definition shr.c:106
#define lmmp_inc_1(p, inc)
大数加指定值宏(预期无进位)
Definition lmmpn.h:958
mp_limb_t lmmp_add_n_(mp_ptr dst, mp_srcptr numa, mp_srcptr numb, mp_size_t n)
无进位的n位加法 [dst,n] = [numa,n] + [numb,n]
Definition add_n.c:71
static int lmmp_zero_q_(mp_srcptr p, mp_size_t n)
大数判零函数(内联)
Definition lmmpn.h:1027
#define r2
#define b0
#define b1
#define am1
#define ap1
#define bp1
#define vm1
#define r1
#define bm1
#define a2
#define a0
#define a1
void lmmp_mul_toom32_(mp_ptr restrict dst, mp_srcptr restrict numa, mp_size_t na, mp_srcptr restrict numb, mp_size_t nb)
Definition mul_toom32.c:24
#define v1
#define r3
#define r0
#define tp
#define SALLOC_TYPE(n, type)
Definition tmp_alloc.h:87
#define TEMP_S_DECL
Definition tmp_alloc.h:76
#define TEMP_S_FREE
Definition tmp_alloc.h:105