LAMMP 4.1.0
Lamina High-Precision Arithmetic Library
载入中...
搜索中...
未找到
nCr.c
浏览该文件的文档.
1/*
2 * LAMMP - Copyright (C) 2025-2026 HJimmyK(Jericho Knox)
3 * This file is part of lammp, under the GNU LGPL v2 license.
4 * See LICENSE in the project root for the full license text.
5 */
6
7#include "../../../include/lammp/impl/ele_mul.h"
8#include "../../../include/lammp/impl/mparam.h"
9#include "../../../include/lammp/impl/prime_table.h"
10#include "../../../include/lammp/impl/longlong.h"
11#include "../../../include/lammp/numth.h"
12
14 double ln_comb = lgamma(n + 1.0) - lgamma(r + 1.0) - lgamma(n - r + 1.0);
15 double log2_comb = ln_comb / LOG2_;
16 mp_size_t rn = ceil(log2_comb / LIMB_BITS) + 2; /* more two limbs */
17 (*bits) = n - lmmp_limb_popcnt_(n);
18 (*bits) -= r - lmmp_limb_popcnt_(r);
19 (*bits) -= n - r - lmmp_limb_popcnt_(n - r);
20 return rn;
21}
22
23static inline uint factor_size_int(mp_size_t rn, uint n) {
24 /*
25 我们可以知道,nCr的大小一定不会超过B^rn,因此,B^rn的可以含有的质因数数量即为nCr可以含有的质因数数量的上限。
26 同时,我们这里只计算的是奇数部分,比如我们可以用B^rn可以含有的3的质因数个数来估计nCr的质因数种类数,
27 这是一个绝对上界,同时在不平衡时比pi(n)这个平方上界要紧得多。当然即使是这个上界,实际的质因数个数也可能远远
28 小于这个上界。一个改进想法是,我们使用更大一点的质数,对于n>0xffff,我们选取这个质数为251,
29 而log(B)/log(251)约等于8.02855802854906,我们近似视为8,这也是这里的常数的由来,当然,此估计可能存在低估,
30 但是经过大量的校验,我们未发现任何低估的反例。
31 为了同时处理不平衡与不平衡的情况,我们这里对两个估计进行比较,取较小的一个作为最终结果。不平衡时,approx1要更紧一些。
32 */
33 // 此处假定了LIMB_BITS为64
34 uint approx1 = rn * 8;
35 uint approx2 = lmmp_prime_size_(n);
36 return approx1 < approx2? approx1 : approx2;
37}
38
39static inline uint factor_size_short(mp_size_t rn) {
40 /*
41 经过大量的校验,*8即使是在ushort输入下,也极少低估,但是为了留有冗余,我们还是选择*10,大致相当于质数83。
42 */
43 // 此处假定了LIMB_BITS为64
44 return rn * 10;
45}
46
47static inline uint count_factors(fac_ptr fac, uint nfactors, uint n, uint r, uint nr, uint p) {
48 uint pn = n;
49 uint e = 0;
50 ulong inv = MP_ULONG_MAX / p + 1;
51 while (pn > 0) {
52 _udiv32by32_q_preinv(pn, pn, inv);
53 e += pn;
54 }
55 pn = r;
56 while (pn > 0) {
57 _udiv32by32_q_preinv(pn, pn, inv);
58 e -= pn;
59 }
60 pn = nr;
61 while (pn > 0) {
62 _udiv32by32_q_preinv(pn, pn, inv);
63 e -= pn;
64 }
65 if (e > 0) {
66 fac[nfactors].f = p;
67 fac[nfactors++].j = e;
68 }
69 return nfactors;
70}
71
72/*
73FIXME:
74 当极不平衡时,质因数分解本身的效率很低,因为本身其质因数就极其稀疏,我们却需要遍历整个素数表。
75 因此,当极不平衡时,应该考虑采用其他方法,比如直接计算排列数和阶乘,再使用精确除法。
76 当排列数不平衡时,可能会直接使用累乘法来避免遍历质数表,而阶乘本身就是平衡的。
77 目前精确除法暂未实现。实现完成后,可以在此处使用。
78*/
79
82 lmmp_param_assert(r <= n / 2);
83 if (r < ODD_FACTORIAL_SIZE) {
84 rn = lmmp_odd_nPr_ushort_(dst, rn, n, r);
85 mp_limb_t t = 0;
86 lmmp_odd_nPr_ushort_(&t, 1, r, r);
87 // FIXME: 这里的除法应该使用精确除法
88 lmmp_div_1_(dst, dst, rn, t);
89 rn -= dst[rn - 1] == 0 ? 1 : 0;
90 return rn;
91 } else if (rn < BINOMIAL_RN_BASECASE_THRESHOLD) {
92 if (r <= 4 || n > 0xfff) {
93 dst[0] = 1;
94 rn = 1;
95 ulong t, v;
96 mp_bitcnt_t cnt;
97 for (ulong i = 1; i <= r; ++i) {
98 t = n - i + 1;
99 ctz_shl(v, t, cnt);
100 dst[rn] = lmmp_mul_1_(dst, dst, rn, v);
101 ++rn;
102 rn -= dst[rn - 1] == 0 ? 1 : 0;
103 ctz_shl(v, i, cnt);
104 lmmp_div_1_(dst, dst, rn, v);
105 rn -= dst[rn - 1] == 0 ? 1 : 0;
106 }
107 return rn;
108 } else {
109 dst[0] = 1;
110 rn = 1;
111 ulong i = 1;
112 ulong t = 1, d = 1, v;
113 mp_bitcnt_t cnt;
114 for (; i <= (ulong)r - 4; i += 4) {
115 // 相邻四个数必含有3这个公共因子
116 d = i * (i + 1) * (i + 2) * (i + 3);
117 d /= 3;
118 t = (n - i + 1) * (n - i) * (n - i - 1) * (n - i - 2);
119 t /= 3;
120 ctz_shl(v, t, cnt);
121 dst[rn] = lmmp_mul_1_(dst, dst, rn, v);
122 ++rn;
123 rn -= dst[rn - 1] == 0 ? 1 : 0;
124 ctz_shl(v, d, cnt);
125 lmmp_div_1_(dst, dst, rn, v);
126 rn -= dst[rn - 1] == 0 ? 1 : 0;
127 }
128 for (; i <= r; ++i) {
129 t = n - i + 1;
130 ctz_shl(v, t, cnt);
131 dst[rn] = lmmp_mul_1_(dst, dst, rn, v);
132 ++rn;
133 rn -= dst[rn - 1] == 0 ? 1 : 0;
134 ctz_shl(v, i, cnt);
135 lmmp_div_1_(dst, dst, rn, v);
136 rn -= dst[rn - 1] == 0 ? 1 : 0;
137 }
138 return rn;
139 }
140 } else {
141 TEMP_DECL;
142 uint primen = lmmp_prime_cnt16_(n);
143 uint nfactors = factor_size_short(rn);
144 nfactors = primen < nfactors ? primen : nfactors;
145 fac_ptr restrict fac = TALLOC_TYPE(nfactors, fac_t);
146 uint nr = n - r;
147 nfactors = 0;
148 for (uint i = 1; i < primen; ++i) {
149 uint p = prime_short_table[i];
150 nfactors = count_factors(fac, nfactors, n, r, nr, p);
151 }
152
153 rn = lmmp_factors_mul_(dst, rn, fac, nfactors);
154
155 TEMP_FREE;
156 return rn;
157 }
158}
159
161 lmmp_param_assert(r <= (n / 2));
162 if (r <= 3 || (n > 0xfffffff && rn < BINOMIAL_RN_BASECASE_THRESHOLD)) {
163 dst[0] = 1;
164 rn = 1;
165 ulong t, v;
166 mp_bitcnt_t cnt;
167 for (ulong i = 1; i <= r; ++i) {
168 t = n - i + 1;
169 ctz_shl(v, t, cnt);
170 dst[rn] = lmmp_mul_1_(dst, dst, rn, v);
171 ++rn;
172 rn -= dst[rn - 1] == 0 ? 1 : 0;
173 ctz_shl(v, i, cnt);
174 lmmp_div_1_(dst, dst, rn, v);
175 rn -= dst[rn - 1] == 0 ? 1 : 0;
176 }
177 return rn;
178 } else if (rn < BINOMIAL_RN_BASECASE_THRESHOLD) {
179 dst[0] = 1;
180 rn = 1;
181 ulong i = 1;
182 ulong t = 1, d = 1, v;
183 mp_bitcnt_t cnt;
184 for (; i <= (ulong)r - 2; i += 2) {
185 d = i * (i + 1);
186 t = (n - i + 1) * (n - i);
187 ctz_shl(v, t, cnt);
188 dst[rn] = lmmp_mul_1_(dst, dst, rn, v);
189 ++rn;
190 rn -= dst[rn - 1] == 0 ? 1 : 0;
191 ctz_shl(v, d, cnt);
192 lmmp_div_1_(dst, dst, rn, v);
193 rn -= dst[rn - 1] == 0 ? 1 : 0;
194 }
195 for (; i <= r; ++i) {
196 t = n - i + 1;
197 ctz_shl(v, t, cnt);
198 dst[rn] = lmmp_mul_1_(dst, dst, rn, v);
199 ++rn;
200 rn -= dst[rn - 1] == 0 ? 1 : 0;
201 ctz_shl(v, i, cnt);
202 lmmp_div_1_(dst, dst, rn, v);
203 rn -= dst[rn - 1] == 0 ? 1 : 0;
204 }
205 return rn;
206 } else {
209 uint nfactors = factor_size_int(rn, n);
210 fac_ptr restrict fac = BALLOC_TYPE(nfactors, fac_t);
211 uint nr = n - r;
212
213 nfactors = 0;
214 prime_cache_t cache;
215 lmmp_prime_cache_init_(&cache, n);
216 while (cache.is_end == 0) {
218 for (uint i = 0; i < cache.size; ++i) {
219 nfactors = count_factors(fac, nfactors, n, r, nr, cache.pp[i]);
220 }
221 }
223
224 rn = lmmp_factors_mul_(dst, rn, fac, nfactors);
225
227 return rn;
228 }
229}
230
232 lmmp_debug_assert(r <= (n / 2));
233 mp_size_t shw = bits / LIMB_BITS;
234 bits %= LIMB_BITS;
235 lmmp_zero(dst, shw);
236 if (n <= NCR_SHORT_LIMIT)
237 rn = lmmp_odd_nCr_ushort_(dst + shw, rn - shw, n, r);
238 else
239 rn = lmmp_odd_nCr_uint_(dst + shw, rn - shw, n, r);
240 if (bits > 0) {
241 dst[shw + rn] = lmmp_shl_(dst + shw, dst + shw, rn, bits);
242 rn += shw + 1;
243 rn -= dst[rn - 1] == 0;
244 } else {
245 rn += shw;
246 }
247 return rn;
248}
uint f
Definition ele_mul.h:118
uint j
Definition ele_mul.h:119
mp_size_t lmmp_factors_mul_(mp_ptr dst, mp_size_t rn, fac_ptr fac, uint nfactors)
计算因子的累乘,并将结果放入dst中
mp_limb_t * mp_ptr
Definition lmmp.h:215
#define lmmp_zero(dst, n)
Definition lmmp.h:366
size_t mp_bitcnt_t
Definition lmmp.h:217
uint64_t mp_size_t
Definition lmmp.h:212
#define lmmp_debug_assert(x)
Definition lmmp.h:387
uint64_t mp_limb_t
Definition lmmp.h:211
#define LIMB_BITS
Definition lmmp.h:221
#define lmmp_param_assert(x)
Definition lmmp.h:398
mp_limb_t lmmp_div_1_(mp_ptr dstq, mp_srcptr numa, mp_size_t na, mp_limb_t x)
单精度数除法
Definition div.c:66
mp_limb_t lmmp_shl_(mp_ptr dst, mp_srcptr numa, mp_size_t na, mp_size_t shl)
大数左移操作 [dst,na] = [numa,na]<<shl,dst的低shl位填充0
Definition shl.c:9
int lmmp_limb_popcnt_(mp_limb_t x)
计算一个64位无符号整数中1的个数
Definition tiny.c:20
mp_limb_t lmmp_mul_1_(mp_ptr dst, mp_srcptr numa, mp_size_t na, mp_limb_t x)
大数乘以单limb操作 [dst,na] = [numa,na] * x
#define ctz_shl(r, x, cnt)
Definition longlong.h:229
#define _udiv32by32_q_preinv(q, n0, dinv)
Definition longlong.h:466
#define BINOMIAL_RN_BASECASE_THRESHOLD
Definition mparam.h:125
#define MP_USHORT_MAX
Definition mparam.h:138
#define LOG2_
Definition mparam.h:165
#define MP_ULONG_MAX
Definition mparam.h:140
#define NCR_SHORT_LIMIT
Definition mparam.h:157
#define ODD_FACTORIAL_SIZE
Definition mparam.h:152
mp_size_t lmmp_odd_nCr_ushort_(mp_ptr restrict dst, mp_size_t rn, uint n, uint r)
Definition nCr.c:80
static uint factor_size_int(mp_size_t rn, uint n)
Definition nCr.c:23
mp_size_t lmmp_nCr_size_(uint n, uint r, mp_bitcnt_t *restrict bits)
Definition nCr.c:13
static uint factor_size_short(mp_size_t rn)
Definition nCr.c:39
static uint count_factors(fac_ptr fac, uint nfactors, uint n, uint r, uint nr, uint p)
Definition nCr.c:47
mp_size_t lmmp_nCr_(mp_ptr dst, mp_bitcnt_t bits, mp_size_t rn, uint n, uint r)
计算 nCr 组合数 ( nCr = n! / (r!(n-r)!) )
Definition nCr.c:231
mp_size_t lmmp_odd_nCr_uint_(mp_ptr restrict dst, mp_size_t rn, uint n, uint r)
Definition nCr.c:160
mp_size_t lmmp_odd_nPr_ushort_(mp_ptr dst, mp_size_t rn, ulong n, ulong r)
计算 nPr 排列数的奇数部分
uint32_t uint
Definition numth.h:35
uint64_t ulong
Definition numth.h:36
void lmmp_prime_cache_free_(prime_cache_t *cache)
释放素数表缓存
ulong lmmp_prime_size_(ulong n)
估计 n 范围内的素数数量
Definition prime_table.c:11
void lmmp_prime_cache_next_(prime_cache_t *cache)
素数表缓存更新(从小到大遍历全局质数表)
const ushort prime_short_table[6542]
void lmmp_prime_int_table_init_(uint n)
初始化全局素数表
Definition prime_table.c:70
ushort lmmp_prime_cnt16_(ushort n)
计算小于等于 n 的素数数量
void lmmp_prime_cache_init_(prime_cache_t *cache, uint n)
初始化素数表缓存
#define TEMP_DECL
Definition tmp_alloc.h:72
#define TEMP_FREE
Definition tmp_alloc.h:93
#define TALLOC_TYPE(n, type)
Definition tmp_alloc.h:91
#define TEMP_B_DECL
Definition tmp_alloc.h:75
#define BALLOC_TYPE(n, type)
Definition tmp_alloc.h:89
#define TEMP_B_FREE
Definition tmp_alloc.h:100