This source file includes following definitions.
- iszero_128
- isnonzero_128
- iseq_128
- isgt_128
- islt_128
- isge_128
- islt_s128
- isgt_s128
- and_128
- and_s128
- or_128
- xor_128
- complement_128
- add_128
- subtract_128
- negate_128
- negate_s128
- lshift_128
- lshift_s128
- rshift_128
- rshift_s128
- mulmn
- mulmns
- nlz
- divmnu
- multiply_128
- multiply_s128
- divide_128
- divide_128_16
- divide_128_32
- tisz
- tand
- tor
- tcomp
- tadd
- tsub
- tneg
- tgt
- tls
- trs
- tmul
- tsmul
- tdiv16
- tdiv32
- test_math128
- __udivti3
- __udivmodti3
- __divti3
- __modti3
- __umodti3
- __multi3
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31 #ifndef DPS8_MATH128
32 # define DPS8_MATH128
33 # include "dps8.h"
34 # include "dps8_math128.h"
35
36 # ifdef NEED_128
37
38 bool iszero_128 (uint128 w)
39 {
40 if (w.h || w.l)
41 return false;
42 return true;
43 }
44
45 bool isnonzero_128 (uint128 w)
46 {
47 if (w.h || w.l)
48 return true;
49 return false;
50 }
51
52 bool iseq_128 (uint128 a, uint128 b)
53 {
54 return a.h == b.h && a.l == b.l;
55 }
56
57 bool isgt_128 (uint128 a, uint128 b)
58 {
59 if (a.h > b.h) return true;
60 if (a.h < b.h) return false;
61 if (a.l > b.l) return true;
62 return false;
63 }
64
65 bool islt_128 (uint128 a, uint128 b)
66 {
67 if (a.h < b.h) return true;
68 if (a.h > b.h) return false;
69 if (a.l < b.l) return true;
70 return false;
71 }
72
73 bool isge_128 (uint128 a, uint128 b)
74 {
75 if (a.h > b.h) return true;
76 if (a.h < b.h) return false;
77 if (a.l >= b.l) return true;
78 return false;
79 }
80
81 bool islt_s128 (int128 a, int128 b)
82 {
83 if (a.h < b.h) return true;
84 if (a.h > b.h) return false;
85 if (a.l < b.l) return true;
86 return false;
87 }
88
89 bool isgt_s128 (int128 a, int128 b)
90 {
91 if (a.h > b.h) return true;
92 if (a.h < b.h) return false;
93 if (a.l > b.l) return true;
94 return false;
95 }
96
97 uint128 and_128 (uint128 a, uint128 b)
98 {
99 return (uint128) {a.h & b.h, a.l & b.l};
100 }
101
102 int128 and_s128 (int128 a, uint128 b)
103 {
104 return (int128) {a.h & (int64_t)b.h, a.l & b.l};
105 }
106
107 uint128 or_128 (uint128 a, uint128 b)
108 {
109 return (uint128) {a.h | b.h, a.l | b.l};
110 }
111
112 uint128 xor_128 (uint128 a, uint128 b)
113 {
114 return (uint128) {a.h ^ b.h, a.l ^ b.l};
115 }
116
117 uint128 complement_128 (uint128 a)
118 {
119 return (uint128) {~ a.h, ~ a.l};
120 }
121
122 uint128 add_128 (uint128 a, uint128 b)
123 {
124
125
126
127
128 uint64_t al63 = a.l & MASK63;
129 uint64_t bl63 = b.l & MASK63;
130 uint64_t l63 = al63 + bl63;
131 uint64_t c63 = l63 & SIGN64;
132 l63 &= MASK63;
133
134 unsigned int al64 = (a.l >> 63) & 1;
135 unsigned int bl64 = (b.l >> 63) & 1;
136 unsigned int cl64 = (c63 >> 63) & 1;
137 uint64_t l64 = al64 + bl64 + cl64;
138 unsigned int c64 = l64 > 1 ? 1 : 0;
139 l64 = (l64 & 1) << 63;
140 l64 |= l63;
141 uint64_t h64 = a.h + b.h + c64;
142 return construct_128 (h64, l64);
143 }
144
145 uint128 subtract_128 (uint128 a, uint128 b)
146 {
147
148
149 bool borrow = !! (b.l > a.l);
150 uint128 res = construct_128 (a.h - b.h, a.l - b.l);
151 if (borrow)
152 res.h --;
153
154 return res;
155 }
156
157 uint128 negate_128 (uint128 a)
158 {
159 return add_128 (complement_128 (a), construct_128 (0, 1));
160 }
161
162 int128 negate_s128 (int128 a)
163 {
164 uint128 t = add_128 (complement_128 (cast_128 (a)), construct_128 (0, 1));
165 return cast_s128 (t);
166 }
167
168 uint128 lshift_128 (uint128 a, unsigned int n)
169 # if ( defined(__clang__) || defined(__clang_version__) ) && defined(__llvm__)
170 # ifdef NEED_128
171 __attribute__ ((optnone))
172 # endif
173 # endif
174 {
175 if (n < 64)
176 {
177 uint64_t nmask = (uint64_t) ((~(MASK64 << n)));
178
179
180 uint64_t keep = (a.l >> (64 - n)) & nmask;
181
182
183 uint64_t l = a.l << n;
184
185
186 uint64_t h = a.h << n;
187
188
189 h |= keep;
190
191 return construct_128 (h, l);
192 }
193 uint64_t h = a.l << (n - 64);
194 return construct_128 (h, 0);
195 }
196
197 int128 lshift_s128 (int128 a, unsigned int n)
198 {
199 uint128 t = lshift_128 (cast_128 (a), n);
200 return cast_s128 (t);
201 }
202
203 uint128 rshift_128 (uint128 a, unsigned int n)
204 {
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253 uint64_t h = a.h;
254 uint64_t l = a.l;
255 uint64_t sign = a.h & SIGN64;
256 while (n)
257 {
258 uint64_t b = (h & 1) ? SIGN64 : 0;
259 h >>= 1;
260 h |= sign;
261 l >>= 1;
262 l &= MASK63;
263 l |= b;
264 n --;
265 }
266 return construct_128 (h, l);
267 }
268
269 int128 rshift_s128 (int128 a, unsigned int n)
270 {
271 uint128 t = rshift_128 (cast_128 (a), n);
272 return cast_s128 (t);
273 }
274
275
276
277 static void mulmn (uint32_t w[], uint32_t u[],
278 uint32_t v[], int m, int n)
279 {
280
281
282
283
284 uint64_t k, t;
285 int i, j;
286
287 for (i = 0; i < m; i++)
288 w[i] = 0;
289
290 for (j = 0; j < n; j++)
291 {
292 k = 0;
293 for (i = 0; i < m; i++)
294 {
295 t = (uint64_t) u[i] * (uint64_t) v[j] + (uint64_t) w[i + j] + k;
296
297 w[i + j] = (uint32_t) t;
298 k = t >> 32;
299 }
300 w[j + m] = (uint32_t) k;
301 }
302 }
303
304 static void mulmns (uint32_t w[], uint32_t u[],
305 uint32_t v[], int m, int n)
306 {
307 mulmn (w, u, v, m, n);
308
309
310
311
312
313 if ((int32_t)u[m - 1] < 0)
314 {
315 int b = 0;
316 for (int j = 0; j < n; j++)
317 {
318 int t = (int) w[j + m] - (int) v[j] - b;
319 w[j + m] = (uint32_t) t;
320 b = (t & 0x40000000) ? 1 : 0;
321 }
322 }
323 if ((int32_t)v[n - 1] < 0)
324 {
325 int b = 0;
326 for (int i = 0; i < m; i++)
327 {
328 int t = (int) w[i + n] - (int) u[i] - b;
329 w[i + n] = (uint32_t) t;
330 b = (t & 0x40000000) ? 1 : 0;
331 }
332 }
333 return;
334 }
335
336 static int
337 nlz (unsigned x)
338 {
339 int n;
340
341 if (x == 0)
342 return (32);
343
344 n = 0;
345
346 if (x <= 0x0000FFFF)
347 {
348 n = n + 16;
349 x = x << 16;
350 }
351
352 if (x <= 0x00FFFFFF)
353 {
354 n = n + 8;
355 x = x << 8;
356 }
357
358 if (x <= 0x0FFFFFFF)
359 {
360 n = n + 4;
361 x = x << 4;
362 }
363
364 if (x <= 0x3FFFFFFF)
365 {
366 n = n + 2;
367 x = x << 2;
368 }
369
370 if (x <= 0x7FFFFFFF)
371 {
372 n = n + 1;
373 }
374
375 return n;
376 }
377
378 unsigned int kd_div_errors = 0;
379
380 int divmnu (uint16_t q[], uint16_t r[],
381 const uint16_t u[], const uint16_t v[],
382 int m, int n)
383 {
384
385 const uint32_t b = 65536;
386
387 unsigned qhat;
388 unsigned rhat;
389 unsigned p;
390 int s, i, j, t, k;
391
392 if (m < n || n <= 0 || v[n-1] == 0)
393 return 1;
394
395
396 if (n == 1)
397 {
398 k = 0;
399 for (j = m - 1; j >= 0; j--)
400 {
401 q[j] = (uint16_t) (((unsigned int) k*b + u[j])/v[0]);
402 k = (int) (((unsigned int) k*b + u[j]) - q[j]*v[0]);
403 }
404 if (r != NULL) r[0] = (uint16_t) k;
405 return 0;
406 }
407
408
409
410
411
412
413 s = nlz (v[n-1]) - 16;
414
415 uint16_t vn [n];
416 for (i = n - 1; i > 0; i--)
417 vn[i] = (uint16_t) (v[i] << s) | (uint16_t) (v[i-1] >> (16-s));
418 vn[0] = (uint16_t) (v[0] << s);
419
420
421 uint16_t un [m+1];
422 un[m] = u[m-1] >> (16-s);
423 for (i = m - 1; i > 0; i--)
424 un[i] = (uint16_t) (u[i] << s) | (uint16_t) (u[i-1] >> (16-s));
425 un[0] = (uint16_t) (u[0] << s);
426 for (j = m - n; j >= 0; j--)
427 {
428
429 qhat = (un[j+n]*b + un[j+n-1])/vn[n-1];
430 rhat = (un[j+n]*b + un[j+n-1])%vn[n-1];
431 cmp_again:
432 if (qhat >= b || (unsigned)qhat*(unsigned long long)vn[n-2] > b*rhat + un[j+n-2])
433 {
434 qhat = qhat - 1;
435 rhat = rhat + vn[n-1];
436 if (rhat < b) goto cmp_again;
437 }
438
439
440 k = 0;
441 for (i = 0; i < n; i++)
442 {
443 p = (unsigned)qhat*(unsigned long long)vn[i];
444 t = (int32_t) un[i+j] - k - (int32_t) (p & 0xFFFF);
445 un[i+j] = (uint16_t) t;
446 k = (int) (p >> 16) - (t >> 16);
447 }
448 t = un[j+n] - k;
449 un[j+n] = (uint16_t) t;
450
451 q[j] = (uint16_t) qhat;
452 if (t < 0)
453 {
454 q[j] = q[j] - 1;
455 k = 0;
456 for (i = 0; i < n; i++)
457 {
458 t = un[i+j] + vn[i] + k;
459 un[i+j] = (uint16_t) t;
460 k = t >> 16;
461 }
462 un[j+n] = (uint16_t) (un[j+n] + k);
463 }
464 }
465
466
467 if (r != NULL)
468 {
469 for (i = 0; i < n; i++)
470 r[i] = (uint16_t) (un[i] >> s) | (uint16_t) (un[i+1] << (16-s));
471 }
472 return 0;
473 }
474
475 uint128 multiply_128 (uint128 a, uint128 b)
476 {
477
478 const int l = 4;
479 uint32_t w[l+l], u[l], v[l];
480
481 u[3] = (uint32_t) (a.h >> 32);
482 u[2] = (uint32_t) a.h;
483 u[1] = (uint32_t) (a.l >> 32);
484 u[0] = (uint32_t) a.l;
485 v[3] = (uint32_t) (b.h >> 32);
486 v[2] = (uint32_t) b.h;
487 v[1] = (uint32_t) (b.l >> 32);
488 v[0] = (uint32_t) b.l;
489 mulmn (w, u, v, l, l);
490 return construct_128 (
491 (((uint64_t) w[3]) << 32) | w[2],
492 (((uint64_t) w[1]) << 32) | w[0]);
493 }
494
495 int128 multiply_s128 (int128 a, int128 b)
496 {
497
498 const int l = 4;
499 uint32_t w[l+l], u[l], v[l];
500
501 u[3] = (uint32_t) (a.h >> 32);
502 u[2] = (uint32_t) a.h;
503 u[1] = (uint32_t) (a.l >> 32);
504 u[0] = (uint32_t) a.l;
505 v[3] = (uint32_t) (b.h >> 32);
506 v[2] = (uint32_t) b.h;
507 v[1] = (uint32_t) (b.l >> 32);
508 v[0] = (uint32_t) b.l;
509 mulmns (w, u, v, l, l);
510 return construct_s128 (
511 (((int64_t) w[3]) << 32) | w[2],
512 (((uint64_t) w[1]) << 32) | w[0]);
513 }
514
515
516 uint128 divide_128 (uint128 a, uint128 b, uint128 * remp)
517 {
518 const int m = 8;
519 const int n = 8;
520 uint16_t q[m], u[m], v[n];
521 u[0] = (uint16_t) a.l;
522 u[1] = (uint16_t) (a.l >> 16);
523 u[2] = (uint16_t) (a.l >> 32);
524 u[3] = (uint16_t) (a.l >> 48);
525 u[4] = (uint16_t) a.h;
526 u[5] = (uint16_t) (a.h >> 16);
527 u[6] = (uint16_t) (a.h >> 32);
528 u[7] = (uint16_t) (a.h >> 48);
529
530 v[0] = (uint16_t) b.l;
531 v[1] = (uint16_t) (b.l >> 16);
532 v[2] = (uint16_t) (b.l >> 32);
533 v[3] = (uint16_t) (b.l >> 48);
534 v[4] = (uint16_t) b.h;
535 v[5] = (uint16_t) (b.h >> 16);
536 v[6] = (uint16_t) (b.h >> 32);
537 v[7] = (uint16_t) (b.h >> 48);
538
539 q[0] = q[1] = q[2] = q[3] = q[4] = q[5] = q[6] = q[7] = 0;
540
541 int normlen;
542 for (normlen = 8; normlen > 0; normlen --)
543 if (v [normlen - 1])
544 break;
545 uint16_t r [8] = { 8 * 0 };
546 divmnu (q, remp ? r : NULL, u, v, m, normlen);
547 if (remp)
548 {
549 * remp = construct_128 (
550 (((uint64_t) r [7]) << 48) |
551 (((uint64_t) r [6]) << 32) |
552 (((uint64_t) r [5]) << 16) |
553 (((uint64_t) r [4]) << 0),
554 (((uint64_t) r [3]) << 48) |
555 (((uint64_t) r [2]) << 32) |
556 (((uint64_t) r [1]) << 16) |
557 (((uint64_t) r [0]) << 0));
558 }
559 return construct_128 (
560 (((uint64_t) q [7]) << 48) |
561 (((uint64_t) q [6]) << 32) |
562 (((uint64_t) q [5]) << 16) |
563 (((uint64_t) q [4]) << 0),
564 (((uint64_t) q [3]) << 48) |
565 (((uint64_t) q [2]) << 32) |
566 (((uint64_t) q [1]) << 16) |
567 (((uint64_t) q [0]) << 0));
568 }
569
570
571 uint128 divide_128_16 (uint128 a, uint16_t b, uint16_t * remp)
572 {
573 const int m = 8;
574 const int n = 1;
575 uint16_t q[m], u[m], v[n];
576 u[0] = (uint16_t) a.l;
577 u[1] = (uint16_t) (a.l >> 16);
578 u[2] = (uint16_t) (a.l >> 32);
579 u[3] = (uint16_t) (a.l >> 48);
580 u[4] = (uint16_t) a.h;
581 u[5] = (uint16_t) (a.h >> 16);
582 u[6] = (uint16_t) (a.h >> 32);
583 u[7] = (uint16_t) (a.h >> 48);
584
585 v[0] = (uint16_t) b;
586
587 q[0] = q[1] = q[2] = q[3] = q[4] = q[5] = q[6] = q[7] = 0;
588
589 divmnu (q, remp, u, v, m, n);
590 return construct_128 (
591 (((uint64_t) q [7]) << 48) |
592 (((uint64_t) q [6]) << 32) |
593 (((uint64_t) q [5]) << 16) |
594 (((uint64_t) q [4]) << 0),
595 (((uint64_t) q [3]) << 48) |
596 (((uint64_t) q [2]) << 32) |
597 (((uint64_t) q [1]) << 16) |
598 (((uint64_t) q [0]) << 0));
599 }
600
601
602
603
604
605
606
607 uint128 divide_128_32 (uint128 a, uint32_t b, uint32_t * remp)
608 {
609 const int m = 8;
610 const int n = 2;
611 uint16_t q[m], u[m], v[n], r[2];
612 u[0] = (uint16_t) a.l;
613 u[1] = (uint16_t) (a.l >> 16);
614 u[2] = (uint16_t) (a.l >> 32);
615 u[3] = (uint16_t) (a.l >> 48);
616 u[4] = (uint16_t) a.h;
617 u[5] = (uint16_t) (a.h >> 16);
618 u[6] = (uint16_t) (a.h >> 32);
619 u[7] = (uint16_t) (a.h >> 48);
620
621 v[0] = (uint16_t) b;
622 v[1] = (uint16_t) (b >> 16);
623
624 q[0] = q[1] = q[2] = q[3] = q[4] = q[5] = q[6] = q[7] = 0;
625
626 divmnu (q, remp ? r : NULL, u, v, m, n);
627 if (remp)
628 * remp = r [0] | (uint32_t) r[1] << 16;
629
630 return construct_128 (
631 (((uint64_t) q [7]) << 48) |
632 (((uint64_t) q [6]) << 32) |
633 (((uint64_t) q [5]) << 16) |
634 (((uint64_t) q [4]) << 0),
635 (((uint64_t) q [3]) << 48) |
636 (((uint64_t) q [2]) << 32) |
637 (((uint64_t) q [1]) << 16) |
638 (((uint64_t) q [0]) << 0));
639 }
640
641 unsigned ti_test_iter = 0;
642
643 static void tisz (uint64_t h, uint64_t l, bool expect)
644 {
645 bool r = iszero_128 (construct_128 (h, l));
646 ti_test_iter++;
647 if (r != expect) {
648 fprintf (stderr, "FATAL (%u): iszero_128 (%llu, %llu) returned %lu\n",
649 ti_test_iter,
650 (unsigned long long)h, (unsigned long long)l, (unsigned long)r);
651 kd_div_errors++;
652 }
653 }
654
655 static void tand (uint64_t ah, uint64_t al, uint64_t bh, uint64_t bl,
656 uint64_t rh, uint64_t rl)
657 {
658 uint128 a = construct_128 (ah, al);
659 uint128 b = construct_128 (bh, bl);
660 uint128 r = and_128 (a, b);
661 ti_test_iter++;
662 if (r.h != rh || r.l != rl) {
663 fprintf (stderr, "FATAL (%u): and_128 (%016llx%016llx, %016llx%016llx) returned %016llx%016llx\n",
664 ti_test_iter,
665 (unsigned long long)ah, (unsigned long long)al, (unsigned long long)bh,
666 (unsigned long long)bl, (unsigned long long)r.h, (unsigned long long)r.l);
667 kd_div_errors++;
668 }
669 }
670
671 static void tor (uint64_t ah, uint64_t al, uint64_t bh, uint64_t bl,
672 uint64_t rh, uint64_t rl)
673 {
674 uint128 a = construct_128 (ah, al);
675 uint128 b = construct_128 (bh, bl);
676 uint128 r = or_128 (a, b);
677 ti_test_iter++;
678 if (r.h != rh || r.l != rl) {
679 fprintf (stderr, "FATAL (%u): or_128 (%016llx%016llx, %016llx%016llx) returned %016llx%016llx\n",
680 ti_test_iter,
681 (unsigned long long)ah, (unsigned long long)al, (unsigned long long)bh,
682 (unsigned long long)bl, (unsigned long long)r.h, (unsigned long long)r.l);
683 kd_div_errors++;
684 }
685 }
686
687 static void tcomp (uint64_t ah, uint64_t al,
688 uint64_t rh, uint64_t rl)
689 {
690 uint128 a = construct_128 (ah, al);
691 uint128 r = complement_128 (a);
692 ti_test_iter++;
693 if (r.h != rh || r.l != rl) {
694 fprintf (stderr, "FATAL (%u): complement_128 (%016llx%016llx) returned %016llx%016llx\n",
695 ti_test_iter,
696 (unsigned long long)ah, (unsigned long long)al, (unsigned long long)r.h,
697 (unsigned long long)r.l);
698 kd_div_errors++;
699 }
700 }
701
702 static void tadd (uint64_t ah, uint64_t al, uint64_t bh, uint64_t bl,
703 uint64_t rh, uint64_t rl)
704 {
705 uint128 a = construct_128 (ah, al);
706 uint128 b = construct_128 (bh, bl);
707 uint128 r = add_128 (a, b);
708 ti_test_iter++;
709 if (r.h != rh || r.l != rl) {
710 fprintf (stderr, "FATAL %u): add_128 (%016llx%016llx, %016llx%016llx) returned %016llx%016llx\n",
711 ti_test_iter,
712 (unsigned long long)ah, (unsigned long long)al, (unsigned long long)bh,
713 (unsigned long long)bl, (unsigned long long)r.h, (unsigned long long)r.l);
714 kd_div_errors++;
715 }
716 }
717
718 static void tsub (uint64_t ah, uint64_t al, uint64_t bh, uint64_t bl,
719 uint64_t rh, uint64_t rl)
720 {
721 uint128 a = construct_128 (ah, al);
722 uint128 b = construct_128 (bh, bl);
723 uint128 r = subtract_128 (a, b);
724 ti_test_iter++;
725 if (r.h != rh || r.l != rl) {
726 fprintf (stderr, "FATAL (%u): subtract_128 (%016llx%016llx, %016llx%016llx) returned %016llx%016llx\n",
727 ti_test_iter,
728 (unsigned long long)ah, (unsigned long long)al, (unsigned long long)bh,
729 (unsigned long long)bl, (unsigned long long)r.h, (unsigned long long)r.l);
730 kd_div_errors++;
731 }
732 }
733
734 static void tneg (uint64_t ah, uint64_t al,
735 uint64_t rh, uint64_t rl)
736 {
737 uint128 a = construct_128 (ah, al);
738 uint128 r = negate_128 (a);
739 ti_test_iter++;
740 if (r.h != rh || r.l != rl) {
741 fprintf (stderr, "FATAL (%u): negate_128 (%016llx%016llx) returned %016llx%016llx\n",
742 ti_test_iter,
743 (unsigned long long)ah, (unsigned long long)al, (unsigned long long)r.h,
744 (unsigned long long)r.l);
745 kd_div_errors++;
746 }
747 }
748
749 static void tgt (uint64_t ah, uint64_t al, uint64_t bh, uint64_t bl,
750 bool expect)
751 {
752 uint128 a = construct_128 (ah, al);
753 uint128 b = construct_128 (bh, bl);
754 ti_test_iter++;
755 bool r = isgt_128 (a, b);
756 if (r != expect) {
757 fprintf (stderr, "FATAL (%u): gt_128 (%016llx%016llx, %016llx%016llx) returned %llu\n",
758 ti_test_iter,
759 (unsigned long long)ah, (unsigned long long)al, (unsigned long long)bh,
760 (unsigned long long)bl, (unsigned long long)r);
761 kd_div_errors++;
762 }
763 }
764
765 static void tls (uint64_t ah, uint64_t al, unsigned int n,
766 uint64_t rh, uint64_t rl)
767 {
768 uint128 a = construct_128 (ah, al);
769 uint128 r = lshift_128 (a, n);
770 ti_test_iter++;
771 if (r.h != rh || r.l != rl) {
772 fprintf (stderr, "FATAL (%u): lshift_128 (%016llx%016llx, %llu) returned %016llx%016llx\n",
773 ti_test_iter,
774 (unsigned long long)ah, (unsigned long long)al, (unsigned long long)n,
775 (unsigned long long)r.h, (unsigned long long)r.l);
776 kd_div_errors++;
777 }
778 }
779
780 static void trs (uint64_t ah, uint64_t al, unsigned int n,
781 uint64_t rh, uint64_t rl)
782 {
783 uint128 a = construct_128 (ah, al);
784 uint128 r = rshift_128 (a, n);
785 ti_test_iter++;
786 if (r.h != rh || r.l != rl) {
787 fprintf (stderr, "FATAL (%u): rshift_128 (%016llx%016llx, %llu) returned %016llx%016llx\n",
788 ti_test_iter,
789 (unsigned long long)ah, (unsigned long long)al, (unsigned long long)n,
790 (unsigned long long)r.h, (unsigned long long)r.l);
791 kd_div_errors++;
792 }
793 }
794
795 static void tmul (uint64_t ah, uint64_t al, uint64_t bh, uint64_t bl,
796 uint64_t rh, uint64_t rl)
797 {
798 uint128 a = construct_128 (ah, al);
799 uint128 b = construct_128 (bh, bl);
800 uint128 r = multiply_128 (a, b);
801 ti_test_iter++;
802 if (r.h != rh || r.l != rl) {
803 fprintf (stderr, "FATAL (%u): multiply_128 (%016llx%016llx, %016llx%016llx) returned %016llx%016llx\n",
804 ti_test_iter,
805 (unsigned long long)ah, (unsigned long long)al, (unsigned long long)bh,
806 (unsigned long long)bl, (unsigned long long)r.h, (unsigned long long)r.l);
807 kd_div_errors++;
808 }
809 }
810
811 static void tsmul (int64_t ah, uint64_t al, int64_t bh, uint64_t bl,
812 int64_t rh, uint64_t rl)
813 {
814 int128 a = construct_s128 (ah, al);
815 int128 b = construct_s128 (bh, bl);
816 int128 r = multiply_s128 (a, b);
817 ti_test_iter++;
818 if (r.h != rh || r.l != rl) {
819 fprintf (stderr, "FATAL (%u): multiply_s128 (%016llx%016llx, %016llx%016llx) returned %016llx%016llx\n",
820 ti_test_iter,
821 (unsigned long long)ah, (unsigned long long)al, (unsigned long long)bh,
822 (unsigned long long)bl, (unsigned long long)r.h, (unsigned long long)r.l);
823 kd_div_errors++;
824 }
825 }
826
827 static void tdiv16 (uint64_t ah, uint64_t al, uint16_t b,
828 uint64_t resh, uint64_t resl,
829 uint16_t remainder)
830 {
831 uint128 a = construct_128 (ah, al);
832 uint16_t rem;
833 uint128 res = divide_128_16 (a, b, & rem);
834 ti_test_iter++;
835 if (res.h != resh || res.l != resl || rem != remainder) {
836 fprintf (stderr, "FATAL (%u): divide_128_16 (%016llx%016llx, %04x) returned %016llx%016llx, %04x\n",
837 ti_test_iter,
838 (unsigned long long)ah, (unsigned long long)al, b,
839 (unsigned long long)res.h, (unsigned long long)res.l, rem);
840 kd_div_errors++;
841 }
842 }
843
844 static void tdiv32 (uint64_t ah, uint64_t al, uint32_t b,
845 uint64_t resh, uint64_t resl,
846 uint32_t remainder)
847 {
848 uint128 a = construct_128 (ah, al);
849 uint32_t rem;
850 uint128 res = divide_128_32 (a, b, & rem);
851 ti_test_iter++;
852 if (res.h != resh || res.l != resl || rem != remainder) {
853 fprintf (stderr, "FATAL (%u): divide_128_32 (%016llx%016llx, %08x) returned %016llx%016llx, %08x\n",
854 ti_test_iter,
855 (unsigned long long)ah, (unsigned long long)al, b,
856 (unsigned long long)res.h, (unsigned long long)res.l, rem);
857 kd_div_errors++;
858 }
859 }
860
861 int test_math128 (void)
862 {
863 tisz (0, 0, true);
864 tisz (1, 0, false);
865 tisz (0, 1, false);
866 tisz (1, 1, false);
867 tisz (SIGN64, 0, false);
868 tisz (0, SIGN64, false);
869 tisz (SIGN64, SIGN64, false);
870 tneg (0, 0, 0, 0);
871 tneg (MASK64, MASK64, 0, 1);
872 tcomp (MASK64, MASK64, 0, 0);
873 tneg (0, 1, MASK64, MASK64);
874 tcomp (0, 0, MASK64, MASK64);
875 tcomp (0, 1, MASK64, MASK64-1);
876 tgt (0, 0, 0, 0, false);
877 tgt (0, 0, 0, 1, false);
878 tgt (0, 1, 0, 0, true);
879 tgt (MASK64, MASK64, MASK64, MASK64, false);
880 tgt (0, 0, MASK64, MASK64, false);
881 tgt (MASK64, MASK64, 0, 0, true);
882 # ifndef __PGI
883 # ifndef __PGLLVM__
884 # ifndef __NVCOMPILER
885 # ifndef __NVCOMPILER_LLVM__
886 tls (0, 0, 0, 0, 0);
887 tls (MASK64, MASK64, 0, MASK64, MASK64);
888 # endif
889 # endif
890 # endif
891 # endif
892 tls (0, 1, 127, SIGN64, 0);
893 tls (0, MASK64, 64, MASK64, 0);
894 tls (0, MASK64, 1, 1, MASK64-1);
895 tls (0, 1, 64, 1, 0);
896 tls (0, 1, 63, 0, SIGN64);
897 tls (1, 0, 63, SIGN64, 0);
898 trs (0, 0, 0, 0, 0);
899 trs (SIGN64, 0, 127, MASK64, MASK64);
900 trs (MASK64, 0, 64, MASK64, MASK64);
901 trs (MASK64, 0, 1, MASK64, SIGN64);
902 trs (1, 0, 64, 0, 1);
903 trs (1, 0, 1, 0, SIGN64);
904 trs (MASK64, MASK64, 0, MASK64, MASK64);
905 trs (SIGN64, 0, 63, MASK64, 0);
906 trs (SIGN64, 0, 64, MASK64, SIGN64);
907 tand (0, 0, 0, 0, 0, 0);
908 tand (MASK64, MASK64, 0, 0, 0, 0);
909 tand (0, 0, MASK64, MASK64, 0, 0);
910 tand (MASK64, MASK64, MASK64, MASK64, MASK64, MASK64);
911 tor (0, 0, 0, 0, 0, 0);
912 tor (MASK64, MASK64, 0, 0, MASK64, MASK64);
913 tor (0, 0, MASK64, MASK64, MASK64, MASK64);
914 tor (MASK64, MASK64, MASK64, MASK64, MASK64, MASK64);
915 tadd (0, 0, 0, 0, 0, 0);
916 tadd (0, 0, 0, 1, 0, 1);
917 tadd (0, 1, 0, 0, 0, 1);
918 tadd (0, 1, 0, 1, 0, 2);
919 tadd (0, 1, 0, MASK64, 1, 0);
920 tadd (0, 1, MASK64, MASK64, 0, 0);
921 tsub (0, 0, 0, 0, 0, 0);
922 tsub (0, 1, 0, 1, 0, 0);
923 tsub (MASK64, MASK64, MASK64, MASK64, 0, 0);
924 tsub (MASK64, MASK64, 0, 0, MASK64, MASK64);
925 tsub (0, 0, 0, 1, MASK64, MASK64);
926 tmul (0, 0, 0, 0, 0, 0);
927 tmul (MASK64, MASK64, 0, 0, 0, 0);
928 tmul (0, 0, MASK64, MASK64, 0, 0);
929 tmul (0, 1, 0, 1, 0, 1);
930 tmul (0, 1, 0, 10, 0, 10);
931 tmul (0, 10, 0, 10, 0, 100);
932 tmul (0, 100, 0, 10, 0, 1000);
933 tmul (0, MASK64, 0, 2, 1, MASK64-1);
934 tmul (MASK64, MASK64, MASK64, MASK64, 0, 1);
935 tdiv16 (MASK64, MASK64, 16, MASK64>>4, MASK64, 15);
936 tsmul (0, 1, MASK64, MASK64, MASK64, MASK64);
937 tsmul (MASK64, MASK64, MASK64, MASK64, 0, 1);
938 tdiv32 (1, 0, 1<<16, 0, 1ll<<48, 0);
939 tdiv16 (0, 1, 1, 0, 1, 0);
940 tdiv16 (0, 10, 2, 0, 5, 0);
941 tdiv16 (0, 3, 2, 0, 1, 1);
942 tdiv32 (MASK64, MASK64, 1<<16, MASK64>>16, MASK64, 0xffff);
943 if (kd_div_errors)
944 abort();
945 return 0;
946 }
947 # else
948 # ifdef NEED_128
949 # include "dps8_math128.h"
950
951 void __udivmodti3(UTItype div, UTItype dvd,UTItype *result,UTItype *remain);
952 UTItype __udivti3(UTItype div, UTItype dvd);
953 UTItype __umodti3(UTItype div, UTItype dvd);
954
955 UTItype __udivti3(UTItype div, UTItype dvd)
956 {
957 UTItype result,remain;
958
959 __udivmodti3(div,dvd,&result,&remain);
960
961 return result;
962 }
963
964 void __udivmodti3(UTItype div, UTItype dvd,UTItype *result,UTItype *remain)
965 {
966 UTItype z1 = dvd;
967 UTItype z2 = (UTItype)1;
968
969 *result = (UTItype)0;
970 *remain = div;
971
972 if ( z1 == (UTItype)0)
973 # ifndef CPPCHECK
974 1/0;
975 # else
976 abort();
977 # endif
978
979 while ( z1 < *remain )
980 {
981 z1 <<= 1 ;
982 z2 <<= 1;
983 }
984
985 do
986 {
987 if ( *remain >= z1 )
988 {
989 *remain -= z1;
990 *result += z2;
991 }
992 z1 >>= 1;
993 z2 >>= 1;
994 } while ( z2 );
995 }
996
997 TItype __divti3(TItype div, TItype dvd)
998 {
999 int sign=1;
1000
1001 if (div < (TItype)0)
1002 {
1003 sign = -1;
1004 div = -div;
1005 }
1006
1007 if (dvd < (TItype)0)
1008 {
1009 sign = -sign;
1010 dvd = -dvd;
1011 }
1012
1013 if (sign > 0)
1014 return (TItype)__udivti3(div,dvd);
1015 else
1016 return -((TItype)__udivti3(div,dvd));
1017 }
1018
1019 TItype __modti3(TItype div, TItype dvd)
1020 {
1021 int sign=1;
1022
1023 if (div < (TItype)0)
1024 {
1025 sign = -1;
1026 div = -div;
1027 }
1028
1029 if (dvd < (TItype)0)
1030 {
1031 sign = -sign;
1032 dvd = -dvd;
1033 }
1034
1035 if (sign > 0)
1036 return (TItype)__umodti3(div,dvd);
1037 else
1038 return ((TItype)0-(TItype)__umodti3(div,dvd));
1039 }
1040
1041 UTItype __umodti3(UTItype div, UTItype dvd)
1042 {
1043 UTItype result,remain;
1044
1045 __udivmodti3(div,dvd,&result,&remain);
1046
1047 return remain;
1048 }
1049
1050 TItype __multi3 (TItype u, TItype v)
1051 {
1052 TItype result = (TItype)0;
1053 int sign = 1;
1054
1055 if (u<0)
1056 {
1057 sign = -1;
1058 u = -u;
1059 }
1060
1061 while (u != (TItype)0)
1062 {
1063 if ( u&(TItype)1 )
1064 result += v;
1065 u>>=1;
1066 v<<=1;
1067 }
1068
1069 if ( sign < 0 )
1070 return -result;
1071 else
1072 return result;
1073 }
1074 # endif
1075 # endif
1076 #endif