97 {
100
106
107 if (numa == numb) {
108 for (; i + 4 <= n; i += 4) {
109 ul0 = numa[i + 0];
110 ul1 = numa[i + 1];
111 ul2 = numa[i + 2];
112 ul3 = numa[i + 3];
113
115 lpl0 += cl;
116 cl = (lpl0 < cl) + hpl0;
117 lpl0 = ul0 + lpl0;
118 cl += (lpl0 < ul0);
119
121 lpl1 += cl;
122 cl = (lpl1 < cl) + hpl1;
123 lpl1 = ul1 + lpl1;
124 cl += (lpl1 < ul1);
125
127 lpl2 += cl;
128 cl = (lpl2 < cl) + hpl2;
129 lpl2 = ul2 + lpl2;
130 cl += (lpl2 < ul2);
131
133 lpl3 += cl;
134 cl = (lpl3 < cl) + hpl3;
135 lpl3 = ul3 + lpl3;
136 cl += (lpl3 < ul3);
137
138 numa[i + 0] = lpl0;
139 numa[i + 1] = lpl1;
140 numa[i + 2] = lpl2;
141 numa[i + 3] = lpl3;
142 }
143 for (; i < n; i++) {
144 ul0 = numa[i];
146 lpl0 += cl;
147 cl = (lpl0 < cl) + hpl0;
148 lpl0 = ul0 + lpl0;
149 cl += (lpl0 < ul0);
150 numa[i] = lpl0;
151 }
152 } else {
153 for (; i + 4 <= n; i += 4) {
154 ul0 = numb[i + 0];
155 ul1 = numb[i + 1];
156 ul2 = numb[i + 2];
157 ul3 = numb[i + 3];
158 rl0 = numa[i + 0];
159 rl1 = numa[i + 1];
160 rl2 = numa[i + 2];
161 rl3 = numa[i + 3];
162
164 lpl0 += cl;
165 cl = (lpl0 < cl) + hpl0;
166 lpl0 = rl0 + lpl0;
167 cl += (lpl0 < rl0);
168
170 lpl1 += cl;
171 cl = (lpl1 < cl) + hpl1;
172 lpl1 = rl1 + lpl1;
173 cl += (lpl1 < rl1);
174
176 lpl2 += cl;
177 cl = (lpl2 < cl) + hpl2;
178 lpl2 = rl2 + lpl2;
179 cl += (lpl2 < rl2);
180
182 lpl3 += cl;
183 cl = (lpl3 < cl) + hpl3;
184 lpl3 = rl3 + lpl3;
185 cl += (lpl3 < rl3);
186
187 numa[i + 0] = lpl0;
188 numa[i + 1] = lpl1;
189 numa[i + 2] = lpl2;
190 numa[i + 3] = lpl3;
191 }
192 for (; i < n; i++) {
193 ul0 = numb[i];
194 rl0 = numa[i];
196 lpl0 += cl;
197 cl = (lpl0 < cl) + hpl0;
198 lpl0 = rl0 + lpl0;
199 cl += (lpl0 < rl0);
200 numa[i] = lpl0;
201 }
202 }
203 return cl;
204}
static void _umul64to128_(uint64_t a, uint64_t b, uint64_t *low, uint64_t *high)