root/src/dps8/dps8_math128.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. iszero_128
  2. isnonzero_128
  3. iseq_128
  4. isgt_128
  5. islt_128
  6. isge_128
  7. islt_s128
  8. isgt_s128
  9. and_128
  10. and_s128
  11. or_128
  12. xor_128
  13. complement_128
  14. add_128
  15. subtract_128
  16. negate_128
  17. negate_s128
  18. lshift_128
  19. lshift_s128
  20. rshift_128
  21. rshift_s128
  22. mulmn
  23. mulmns
  24. nlz
  25. divmnu
  26. multiply_128
  27. multiply_s128
  28. divide_128
  29. divide_128_16
  30. divide_128_32
  31. tisz
  32. tand
  33. tor
  34. tcomp
  35. tadd
  36. tsub
  37. tneg
  38. tgt
  39. tls
  40. trs
  41. tmul
  42. tsmul
  43. tdiv16
  44. tdiv32
  45. test_math128
  46. __udivti3
  47. __udivmodti3
  48. __divti3
  49. __modti3
  50. __umodti3
  51. __multi3

   1 /*
   2  * vim: filetype=c:tabstop=4:ai:expandtab
   3  * SPDX-License-Identifier: ICU
   4  * SPDX-License-Identifier: Multics
   5  * scspell-id: 928c0e86-f62e-11ec-ab5c-80ee73e9b8e7
   6  *
   7  * ---------------------------------------------------------------------------
   8  *
   9  * Copyright (c) 2016 Jean-Michel Merliot
  10  * Copyright (c) 2021-2025 The DPS8M Development Team
  11  *
  12  * This software is made available under the terms of the ICU License..
  13  * See the LICENSE.md file at the top-level directory of this distribution.
  14  *
  15  * ---------------------------------------------------------------------------
  16  *
  17  * This source file may contain code comments that adapt, include, and/or
  18  * incorporate Multics program code and/or documentation distributed under
  19  * the Multics License.  In the event of any discrepancy between code
  20  * comments herein and the original Multics materials, the original Multics
  21  * materials should be considered authoritative unless otherwise noted.
  22  * For more details and historical background, see the LICENSE.md file at
  23  * the top-level directory of this distribution.
  24  *
  25  * ---------------------------------------------------------------------------
  26  */
  27 
  28 #if !defined(DPS8_MATH128)
  29 # define DPS8_MATH128
  30 # include "dps8.h"
  31 # include "dps8_math128.h"
  32 
  33 # if defined(NEED_128)
  34 
  35 bool iszero_128 (uint128 w)
     /* [previous][next][first][last][top][bottom][index][help] */
  36   {
  37     if (w.h || w.l)
  38       return false;
  39     return true;
  40   }
  41 
  42 bool isnonzero_128 (uint128 w)
     /* [previous][next][first][last][top][bottom][index][help] */
  43   {
  44     if (w.h || w.l)
  45       return true;
  46     return false;
  47   }
  48 
  49 bool iseq_128 (uint128 a, uint128 b)
     /* [previous][next][first][last][top][bottom][index][help] */
  50   {
  51     return a.h == b.h && a.l == b.l;
  52   }
  53 
  54 bool isgt_128 (uint128 a, uint128 b)
     /* [previous][next][first][last][top][bottom][index][help] */
  55   {
  56     if (a.h > b.h) return true;
  57     if (a.h < b.h) return false;
  58     if (a.l > b.l) return true;
  59     return false;
  60   }
  61 
  62 bool islt_128 (uint128 a, uint128 b)
     /* [previous][next][first][last][top][bottom][index][help] */
  63   {
  64     if (a.h < b.h) return true;
  65     if (a.h > b.h) return false;
  66     if (a.l < b.l) return true;
  67     return false;
  68   }
  69 
  70 bool isge_128 (uint128 a, uint128 b)
     /* [previous][next][first][last][top][bottom][index][help] */
  71   {
  72     if (a.h >  b.h) return true;
  73     if (a.h <  b.h) return false;
  74     if (a.l >= b.l) return true;
  75     return false;
  76   }
  77 
  78 bool islt_s128 (int128 a, int128 b)
     /* [previous][next][first][last][top][bottom][index][help] */
  79   {
  80     if (a.h < b.h) return true;
  81     if (a.h > b.h) return false;
  82     if (a.l < b.l) return true;
  83     return false;
  84   }
  85 
  86 bool isgt_s128 (int128 a, int128 b)
     /* [previous][next][first][last][top][bottom][index][help] */
  87   {
  88     if (a.h > b.h) return true;
  89     if (a.h < b.h) return false;
  90     if (a.l > b.l) return true;
  91     return false;
  92   }
  93 
  94 uint128 and_128 (uint128 a, uint128 b)
     /* [previous][next][first][last][top][bottom][index][help] */
  95   {
  96     return (uint128) {a.h & b.h, a.l & b.l};
  97   }
  98 
  99 int128 and_s128 (int128 a, uint128 b)
     /* [previous][next][first][last][top][bottom][index][help] */
 100   {
 101     return (int128) {a.h & (int64_t)b.h, a.l & b.l};
 102   }
 103 
 104 uint128 or_128 (uint128 a, uint128 b)
     /* [previous][next][first][last][top][bottom][index][help] */
 105   {
 106     return (uint128) {a.h | b.h, a.l | b.l};
 107   }
 108 
 109 uint128 xor_128 (uint128 a, uint128 b)
     /* [previous][next][first][last][top][bottom][index][help] */
 110   {
 111     return (uint128) {a.h ^ b.h, a.l ^ b.l};
 112   }
 113 
 114 uint128 complement_128 (uint128 a)
     /* [previous][next][first][last][top][bottom][index][help] */
 115   {
 116     return (uint128) {~ a.h, ~ a.l};
 117   }
 118 
 119 uint128 add_128 (uint128 a, uint128 b)
     /* [previous][next][first][last][top][bottom][index][help] */
 120   {
 121 // To do carry detection from low to high, bust the low into 1 bit/63
 122 // bit fields; add the 63 bit fields checking for carry in the "sign"
 123 // bit; add the 1 bit fields plus that carry
 124 
 125     uint64_t al63 = a.l & MASK63;  // low 63 bits of a
 126     uint64_t bl63 = b.l & MASK63;  // low 63 bits of b
 127     uint64_t l63 = al63 + bl63;    // low 63 bits of a + b, with carry into bit 64
 128     uint64_t c63 = l63 & SIGN64;   // the carry out of low 63 a + b
 129     l63 &= MASK63;                 // lose the carry bit
 130 
 131     unsigned int al64 = (a.l >> 63) & 1; // bit 64 of a
 132     unsigned int bl64 = (b.l >> 63) & 1; // bit 64 of b
 133     unsigned int cl64 = (c63 >> 63) & 1; // the carry out of bit 63
 134     uint64_t l64 = al64 + bl64 + cl64;   // bit 64 a + b + carry in
 135     unsigned int c64 = l64 > 1 ? 1 : 0;  // carry out of bit 64
 136     l64 = (l64 & 1) << 63;               // put bit 64 in the right place
 137     l64 |= l63;                          // put low 63 bits in
 138     uint64_t h64 = a.h + b.h + c64;      // compute the high
 139     return construct_128 (h64, l64);
 140   }
 141 
 142 uint128 subtract_128 (uint128 a, uint128 b)
     /* [previous][next][first][last][top][bottom][index][help] */
 143   {
 144 //(void)printf ("sub a %016llx %016llx\n", a.h, a.l);
 145 //(void)printf ("sub b %016llx %016llx\n", b.h, b.l);
 146     bool borrow = !! (b.l > a.l);
 147     uint128 res = construct_128 (a.h - b.h, a.l - b.l);
 148     if (borrow)
 149       res.h --;
 150 //(void)printf ("sub res %016llx %016llx\n", res.h, res.l);
 151     return res;
 152   }
 153 
 154 uint128 negate_128 (uint128 a)
     /* [previous][next][first][last][top][bottom][index][help] */
 155   {
 156     return add_128 (complement_128 (a), construct_128 (0, 1));
 157   }
 158 
 159 int128 negate_s128 (int128 a)
     /* [previous][next][first][last][top][bottom][index][help] */
 160   {
 161     uint128 t = add_128 (complement_128 (cast_128 (a)), construct_128 (0, 1));
 162     return cast_s128 (t);
 163   }
 164 
 165 uint128 lshift_128 (uint128 a, unsigned int n)
     /* [previous][next][first][last][top][bottom][index][help] */
 166 #  if ( defined(__clang__) || defined(__clang_version__) ) && defined(__llvm__)
 167 #   if defined(NEED_128)
 168 __attribute__ ((optnone))
 169 #   endif /* NEED_128 */
 170 #  endif /* if ( defined(__clang__) || defined(__clang_version__) ) && defined(__llvm__) */
 171   {
 172     if (n < 64)
 173       {
 174         uint64_t nmask = (uint64_t) ((~(MASK64 << n)));
 175 //(void)printf ("nmask %016llx\r\n", nmask);
 176         // capture the high bits of the low half
 177         uint64_t keep = (a.l >> (64 - n)) & nmask;
 178 //(void)printf ("keep %016llx\r\n", keep);
 179         // shift the low half
 180         uint64_t l = a.l << n;
 181 //(void)printf ("l %016llx\r\n", l);
 182         // shift the high half
 183         uint64_t h = a.h << n;
 184 //(void)printf ("h %016llx\r\n", h);
 185         // put the bits from the low into the high
 186         h |= keep;
 187 //(void)printf ("h|keep %016llx\r\n", h);
 188         return construct_128 (h, l);
 189       }
 190     uint64_t h = a.l << (n - 64);
 191     return construct_128 (h, 0);
 192   }
 193 
 194 int128 lshift_s128 (int128 a, unsigned int n)
     /* [previous][next][first][last][top][bottom][index][help] */
 195   {
 196     uint128 t = lshift_128 (cast_128 (a), n);
 197     return cast_s128 (t);
 198   }
 199 
 200 uint128 rshift_128 (uint128 a, unsigned int n)
     /* [previous][next][first][last][top][bottom][index][help] */
 201   {
 202 
 203 
 204 
 205 
 206 
 207 
 208 
 209 
 210 
 211 
 212 
 213 
 214 
 215 
 216 
 217 
 218 
 219 
 220 
 221 
 222 
 223 
 224 
 225 
 226 
 227 
 228 
 229 
 230 
 231 
 232 
 233 
 234 
 235 
 236 
 237 
 238 
 239 
 240 
 241 
 242 
 243 
 244 
 245 
 246 
 247 
 248 
 249 
 250     uint64_t h = a.h;
 251     uint64_t l = a.l;
 252     uint64_t sign = a.h & SIGN64;
 253     while (n)
 254       {
 255         uint64_t b = (h & 1) ? SIGN64 : 0;
 256         h >>= 1;
 257         h |= sign;
 258         l >>= 1;
 259         l &= MASK63;
 260         l |= b;
 261         n --;
 262       }
 263     return construct_128 (h, l);
 264   }
 265 
 266 int128 rshift_s128 (int128 a, unsigned int n)
     /* [previous][next][first][last][top][bottom][index][help] */
 267   {
 268     uint128 t = rshift_128 (cast_128 (a), n);
 269     return cast_s128 (t);
 270   }
 271 
 272 // See: https://web.archive.org/web/20190219173849/http://www.hackersdelight.org/
 273 
 274 static void mulmn (uint32_t w[], uint32_t u[],
     /* [previous][next][first][last][top][bottom][index][help] */
 275                    uint32_t v[], int m, int n)
 276   {
 277 //for (int i = m - 1; i >= 0; i --)
 278 //  (void)fprintf (stderr, "%08x", u [i]);
 279 //(void)fprintf (stderr, "  ");
 280 //for (int i = n - 1; i >= 0; i --)
 281 //  (void)fprintf (stderr, "%08x", v [i]);
 282 //(void)fprintf (stderr, "\n");
 283     uint64_t k, t;
 284     int i, j;
 285 
 286     for (i = 0; i < m; i++)
 287        w[i] = 0;
 288 
 289     for (j = 0; j < n; j++)
 290       {
 291         k = 0;
 292         for (i = 0; i < m; i++)
 293           {
 294             t = (uint64_t) u[i] * (uint64_t) v[j] + (uint64_t) w[i + j] + k;
 295 //(void)printf ("%d %d %016llx\n",i, j, t);
 296             w[i + j] = (uint32_t) t;        // (i.e., t & 0xFFFF).
 297             k = t >> 32;
 298           }
 299         w[j + m] = (uint32_t) k;
 300       }
 301   }
 302 
 303 static void mulmns (uint32_t w[], uint32_t u[],
     /* [previous][next][first][last][top][bottom][index][help] */
 304                     uint32_t v[], int m, int n)
 305   {
 306     mulmn (w, u, v, m, n);
 307 
 308     // Now w[] has the unsigned product. Correct by
 309     // subtracting v*2**16m if u < 0, and
 310     // subtracting u*2**16n if v < 0.
 311 
 312     if ((int32_t)u[m - 1] < 0)
 313       {
 314         int b = 0;  // Initialize borrow.
 315         for (int j = 0; j < n; j++)
 316           {
 317             int t = (int) w[j + m] - (int) v[j] - b;
 318             w[j + m] = (uint32_t) t;
 319             b = (t & 0x40000000) ? 1 : 0;
 320           }
 321       }
 322     if ((int32_t)v[n - 1] < 0)
 323       {
 324         int b = 0;
 325         for (int i = 0; i < m; i++)
 326           {
 327             int t = (int) w[i + n] - (int) u[i] - b;
 328             w[i + n] = (uint32_t) t;
 329             b = (t & 0x40000000) ? 1 : 0;
 330           }
 331       }
 332     return;
 333   }
 334 
 335 static int
 336 nlz (unsigned x)
     /* [previous][next][first][last][top][bottom][index][help] */
 337 {
 338   int n;
 339 
 340   if (x == 0)
 341     return (32);
 342 
 343   n = 0;
 344 
 345   if (x <= 0x0000FFFF)
 346     {
 347       n = n  + 16;
 348       x = x << 16;
 349     }
 350 
 351   if (x <= 0x00FFFFFF)
 352     {
 353       n = n  + 8;
 354       x = x << 8;
 355     }
 356 
 357   if (x <= 0x0FFFFFFF)
 358     {
 359       n = n  + 4;
 360       x = x << 4;
 361     }
 362 
 363   if (x <= 0x3FFFFFFF)
 364     {
 365       n = n  + 2;
 366       x = x << 2;
 367     }
 368 
 369   if (x <= 0x7FFFFFFF)
 370     {
 371       n = n + 1;
 372     }
 373 
 374   return n;
 375 }
 376 
 377 unsigned int kd_div_errors = 0;
 378 
 379 int divmnu (uint16_t q[], uint16_t r[],
     /* [previous][next][first][last][top][bottom][index][help] */
 380             const uint16_t u[], const uint16_t v[],
 381             int m, int n)
 382   {
 383     const uint32_t b = 65536;   // Number base (16 bits).
 384     //uint16_t *un, *vn;        // Normalized form of u, v.
 385     unsigned qhat;              // Estimated quotient digit.
 386     unsigned rhat;              // A remainder.
 387     unsigned p;                 // Product of two digits.
 388     int s, i, j, t, k;
 389 
 390     if (m < n || n <= 0 || v[n-1] == 0)
 391       return 1;                 // Return if invalid param.
 392 
 393     // Take care of the case of a single-digit span
 394     if (n == 1)
 395       {
 396         k = 0;
 397         for (j = m - 1; j >= 0; j--)
 398           {
 399             q[j] = (uint16_t) (((unsigned int) k*b + u[j])/v[0]);    // divisor here.
 400             k = (int) (((unsigned int) k*b + u[j]) - q[j]*v[0]);
 401           }
 402         if (r != NULL) r[0] = (uint16_t) k;
 403         return 0;
 404       }
 405 
 406     // Normalize by shifting v left just enough so that
 407     // its high-order bit is on, and shift u left the
 408     // same amount. We may have to append a high-order
 409     // digit on the dividend; we do that unconditionally.
 410 
 411     s = nlz (v[n-1]) - 16;      // 0 <= s <= 16.
 412     //vn = (uint16_t *) alloca (2*n);
 413     uint16_t vn [n];
 414     for (i = n - 1; i > 0; i--)
 415       vn[i] = (uint16_t) (v[i] << s) | (uint16_t) (v[i-1] >> (16-s));
 416     vn[0] = (uint16_t) (v[0] << s);
 417 
 418     //un = (uint16_t *)alloca(2*(m + 1));
 419     uint16_t un [m+1];
 420     un[m] = u[m-1] >> (16-s);
 421     for (i = m - 1; i > 0; i--)
 422       un[i] = (uint16_t) (u[i] << s) | (uint16_t) (u[i-1] >> (16-s));
 423     un[0] = (uint16_t) (u[0] << s);
 424     for (j = m - n; j >= 0; j--)
 425       {       // Main loop.
 426         // Compute estimate qhat of q[j].
 427         qhat = (un[j+n]*b + un[j+n-1])/vn[n-1];
 428         rhat = (un[j+n]*b + un[j+n-1])%vn[n-1];
 429 cmp_again:
 430         if (qhat >= b || (unsigned)qhat*(unsigned long long)vn[n-2] > b*rhat + un[j+n-2])
 431           {
 432             qhat = qhat - 1;
 433             rhat = rhat + vn[n-1];
 434             if (rhat < b) goto cmp_again;
 435           }
 436 
 437         // Multiply and subtract.
 438         k = 0;
 439         for (i = 0; i < n; i++)
 440           {
 441             p = (unsigned)qhat*(unsigned long long)vn[i];
 442             t = (int32_t) un[i+j] - k - (int32_t) (p & 0xFFFF);
 443             un[i+j] = (uint16_t) t;
 444             k = (int) (p >> 16) - (t >> 16);
 445           }
 446         t = un[j+n] - k;
 447         un[j+n] = (uint16_t) t;
 448 
 449         q[j] = (uint16_t) qhat;  // Store quotient digit.
 450         if (t < 0)
 451           {                      // If we subtracted too
 452             q[j] = q[j] - 1;     // much, add back.
 453             k = 0;
 454             for (i = 0; i < n; i++)
 455               {
 456                 t = un[i+j] + vn[i] + k;
 457                 un[i+j] = (uint16_t) t;
 458                 k = t >> 16;
 459                }
 460              un[j+n] = (uint16_t) (un[j+n] + k);
 461           }
 462       } // End j.
 463     // If the caller wants the remainder, denormalize
 464     // it and pass it back.
 465     if (r != NULL)
 466       {
 467         for (i = 0; i < n; i++)
 468           r[i] = (uint16_t) (un[i] >> s) | (uint16_t) (un[i+1] << (16-s));
 469       }
 470     return 0;
 471   }
 472 
 473 uint128 multiply_128 (uint128 a, uint128 b)
     /* [previous][next][first][last][top][bottom][index][help] */
 474   {
 475 //(void)printf ("%016llx%016llx %016llx%016llx\n", a.h,a.l,b.h,b.l);
 476     const int l = 4;
 477     uint32_t w[l+l], u[l], v[l];
 478 
 479     u[3] = (uint32_t) (a.h >> 32);
 480     u[2] = (uint32_t)  a.h;
 481     u[1] = (uint32_t) (a.l >> 32);
 482     u[0] = (uint32_t)  a.l;
 483     v[3] = (uint32_t) (b.h >> 32);
 484     v[2] = (uint32_t)  b.h;
 485     v[1] = (uint32_t) (b.l >> 32);
 486     v[0] = (uint32_t)  b.l;
 487     mulmn (w, u, v, l, l);
 488     return construct_128 (
 489        (((uint64_t) w[3]) << 32) | w[2],
 490        (((uint64_t) w[1]) << 32) | w[0]);
 491   }
 492 
 493 int128 multiply_s128 (int128 a, int128 b)
     /* [previous][next][first][last][top][bottom][index][help] */
 494   {
 495 //(void)printf ("%016llx%016llx %016llx%016llx\n", a.h,a.l,b.h,b.l);
 496     const int l = 4;
 497     uint32_t w[l+l], u[l], v[l];
 498 
 499     u[3] = (uint32_t) (a.h >> 32);
 500     u[2] = (uint32_t)  a.h;
 501     u[1] = (uint32_t) (a.l >> 32);
 502     u[0] = (uint32_t)  a.l;
 503     v[3] = (uint32_t) (b.h >> 32);
 504     v[2] = (uint32_t)  b.h;
 505     v[1] = (uint32_t) (b.l >> 32);
 506     v[0] = (uint32_t)  b.l;
 507     mulmns (w, u, v, l, l);
 508     return construct_s128 (
 509        (((int64_t)  w[3]) << 32) | w[2],
 510        (((uint64_t) w[1]) << 32) | w[0]);
 511   }
 512 
 513 // Note: divisor is < 2^16
 514 uint128 divide_128 (uint128 a, uint128 b, uint128 * remp)
     /* [previous][next][first][last][top][bottom][index][help] */
 515   {
 516     const int m = 8;
 517     const int n = 8;
 518     uint16_t q[m], u[m], v[n];
 519     u[0] = (uint16_t)  a.l;
 520     u[1] = (uint16_t) (a.l >> 16);
 521     u[2] = (uint16_t) (a.l >> 32);
 522     u[3] = (uint16_t) (a.l >> 48);
 523     u[4] = (uint16_t)  a.h;
 524     u[5] = (uint16_t) (a.h >> 16);
 525     u[6] = (uint16_t) (a.h >> 32);
 526     u[7] = (uint16_t) (a.h >> 48);
 527 
 528     v[0] = (uint16_t)  b.l;
 529     v[1] = (uint16_t) (b.l >> 16);
 530     v[2] = (uint16_t) (b.l >> 32);
 531     v[3] = (uint16_t) (b.l >> 48);
 532     v[4] = (uint16_t)  b.h;
 533     v[5] = (uint16_t) (b.h >> 16);
 534     v[6] = (uint16_t) (b.h >> 32);
 535     v[7] = (uint16_t) (b.h >> 48);
 536 
 537     q[0] = q[1] = q[2] = q[3] = q[4] = q[5] = q[6] = q[7] = 0;
 538 
 539     int normlen;
 540     for (normlen = 8; normlen > 0; normlen --)
 541       if (v [normlen - 1])
 542         break;
 543     uint16_t r [8] = { 8 * 0 };
 544     divmnu (q, remp ? r : NULL, u, v, m, normlen);
 545     if (remp)
 546       {
 547         * remp =  construct_128 (
 548        (((uint64_t) r [7]) << 48) |
 549        (((uint64_t) r [6]) << 32) |
 550        (((uint64_t) r [5]) << 16) |
 551        (((uint64_t) r [4]) <<  0),
 552        (((uint64_t) r [3]) << 48) |
 553        (((uint64_t) r [2]) << 32) |
 554        (((uint64_t) r [1]) << 16) |
 555        (((uint64_t) r [0]) <<  0));
 556       }
 557     return construct_128 (
 558        (((uint64_t) q [7]) << 48) |
 559        (((uint64_t) q [6]) << 32) |
 560        (((uint64_t) q [5]) << 16) |
 561        (((uint64_t) q [4]) <<  0),
 562        (((uint64_t) q [3]) << 48) |
 563        (((uint64_t) q [2]) << 32) |
 564        (((uint64_t) q [1]) << 16) |
 565        (((uint64_t) q [0]) <<  0));
 566   }
 567 
 568 // Note: divisor is < 2^16
 569 uint128 divide_128_16 (uint128 a, uint16_t b, uint16_t * remp)
     /* [previous][next][first][last][top][bottom][index][help] */
 570   {
 571     const int m = 8;
 572     const int n = 1;
 573     uint16_t q[m], u[m], v[n];
 574     u[0] = (uint16_t)  a.l;
 575     u[1] = (uint16_t) (a.l >> 16);
 576     u[2] = (uint16_t) (a.l >> 32);
 577     u[3] = (uint16_t) (a.l >> 48);
 578     u[4] = (uint16_t)  a.h;
 579     u[5] = (uint16_t) (a.h >> 16);
 580     u[6] = (uint16_t) (a.h >> 32);
 581     u[7] = (uint16_t) (a.h >> 48);
 582 
 583     v[0] = (uint16_t) b;
 584 
 585     q[0] = q[1] = q[2] = q[3] = q[4] = q[5] = q[6] = q[7] = 0;
 586 
 587     divmnu (q, remp, u, v, m, n);
 588     return construct_128 (
 589        (((uint64_t) q [7]) << 48) |
 590        (((uint64_t) q [6]) << 32) |
 591        (((uint64_t) q [5]) << 16) |
 592        (((uint64_t) q [4]) <<  0),
 593        (((uint64_t) q [3]) << 48) |
 594        (((uint64_t) q [2]) << 32) |
 595        (((uint64_t) q [1]) << 16) |
 596        (((uint64_t) q [0]) <<  0));
 597   }
 598 
 599 //divide_128_32 (
 600 //00000000000000010000000000000000, 00010000) returned
 601 //00000000000000000001000000000000, 00000000
 602 //divide_128_32 (ffffffffffffffffffffffffffffffff, 00010000) returned f771ffffffffffffffffffffffffffff, 0000ffff
 603 
 604 // Note: divisor is < 2^32 and >= 2^16; i.e. high 16 bits *must* be non-zero
 605 uint128 divide_128_32 (uint128 a, uint32_t b, uint32_t * remp)
     /* [previous][next][first][last][top][bottom][index][help] */
 606   {
 607     const int m = 8;
 608     const int n = 2;
 609     uint16_t q[m], u[m], v[n], r[2];
 610     u[0] = (uint16_t)  a.l;
 611     u[1] = (uint16_t) (a.l >> 16);
 612     u[2] = (uint16_t) (a.l >> 32);
 613     u[3] = (uint16_t) (a.l >> 48);
 614     u[4] = (uint16_t)  a.h;
 615     u[5] = (uint16_t) (a.h >> 16);
 616     u[6] = (uint16_t) (a.h >> 32);
 617     u[7] = (uint16_t) (a.h >> 48);
 618 
 619     v[0] = (uint16_t) b;
 620     v[1] = (uint16_t) (b >> 16);
 621 
 622     q[0] = q[1] = q[2] = q[3] = q[4] = q[5] = q[6] = q[7] = 0;
 623 
 624     divmnu (q, remp ? r : NULL, u, v, m, n);
 625     if (remp)
 626       * remp = r [0] | (uint32_t) r[1] << 16;
 627 
 628     return construct_128 (
 629        (((uint64_t) q [7]) << 48) |
 630        (((uint64_t) q [6]) << 32) |
 631        (((uint64_t) q [5]) << 16) |
 632        (((uint64_t) q [4]) <<  0),
 633        (((uint64_t) q [3]) << 48) |
 634        (((uint64_t) q [2]) << 32) |
 635        (((uint64_t) q [1]) << 16) |
 636        (((uint64_t) q [0]) <<  0));
 637   }
 638 
 639 unsigned ti_test_iter = 0;
 640 
 641 static void tisz (uint64_t h, uint64_t l, bool expect)
     /* [previous][next][first][last][top][bottom][index][help] */
 642   {
 643     bool r = iszero_128 (construct_128 (h, l));
 644     ti_test_iter++;
 645     if (r != expect) {
 646       (void)fprintf (stderr, "FATAL (%u): iszero_128 (%llu, %llu) returned %lu\n",
 647                      ti_test_iter,
 648                      (unsigned long long)h, (unsigned long long)l, (unsigned long)r);
 649       kd_div_errors++;
 650     }
 651   }
 652 
 653 static void tand (uint64_t ah, uint64_t al, uint64_t bh, uint64_t bl,
     /* [previous][next][first][last][top][bottom][index][help] */
 654                   uint64_t rh, uint64_t rl)
 655   {
 656     uint128 a = construct_128 (ah, al);
 657     uint128 b = construct_128 (bh, bl);
 658     uint128 r = and_128 (a, b);
 659     ti_test_iter++;
 660     if (r.h != rh || r.l != rl) {
 661       (void)fprintf (stderr, "FATAL (%u): and_128 (%016llx%016llx, %016llx%016llx) returned %016llx%016llx\n",
 662                      ti_test_iter,
 663                      (unsigned long long)ah, (unsigned long long)al,  (unsigned long long)bh,
 664                      (unsigned long long)bl, (unsigned long long)r.h, (unsigned long long)r.l);
 665       kd_div_errors++;
 666     }
 667   }
 668 
 669 static void tor (uint64_t ah, uint64_t al, uint64_t bh, uint64_t bl,
     /* [previous][next][first][last][top][bottom][index][help] */
 670                  uint64_t rh, uint64_t rl)
 671   {
 672     uint128 a = construct_128 (ah, al);
 673     uint128 b = construct_128 (bh, bl);
 674     uint128 r = or_128 (a, b);
 675     ti_test_iter++;
 676     if (r.h != rh || r.l != rl) {
 677       (void)fprintf (stderr, "FATAL (%u): or_128 (%016llx%016llx, %016llx%016llx) returned %016llx%016llx\n",
 678                      ti_test_iter,
 679                      (unsigned long long)ah, (unsigned long long)al, (unsigned long long)bh,
 680                      (unsigned long long)bl, (unsigned long long)r.h, (unsigned long long)r.l);
 681       kd_div_errors++;
 682     }
 683   }
 684 
 685 static void tcomp (uint64_t ah, uint64_t al,
     /* [previous][next][first][last][top][bottom][index][help] */
 686                    uint64_t rh, uint64_t rl)
 687   {
 688     uint128 a = construct_128 (ah, al);
 689     uint128 r = complement_128 (a);
 690     ti_test_iter++;
 691     if (r.h != rh || r.l != rl) {
 692       (void)fprintf (stderr, "FATAL (%u): complement_128 (%016llx%016llx) returned %016llx%016llx\n",
 693                      ti_test_iter,
 694                      (unsigned long long)ah, (unsigned long long)al, (unsigned long long)r.h,
 695                      (unsigned long long)r.l);
 696       kd_div_errors++;
 697     }
 698   }
 699 
 700 static void tadd (uint64_t ah, uint64_t al, uint64_t bh, uint64_t bl,
     /* [previous][next][first][last][top][bottom][index][help] */
 701                   uint64_t rh, uint64_t rl)
 702   {
 703     uint128 a = construct_128 (ah, al);
 704     uint128 b = construct_128 (bh, bl);
 705     uint128 r = add_128 (a, b);
 706     ti_test_iter++;
 707     if (r.h != rh || r.l != rl) {
 708       (void)fprintf (stderr, "FATAL %u): add_128 (%016llx%016llx, %016llx%016llx) returned %016llx%016llx\n",
 709                      ti_test_iter,
 710                      (unsigned long long)ah, (unsigned long long)al,  (unsigned long long)bh,
 711                      (unsigned long long)bl, (unsigned long long)r.h, (unsigned long long)r.l);
 712       kd_div_errors++;
 713     }
 714   }
 715 
 716 static void tsub (uint64_t ah, uint64_t al, uint64_t bh, uint64_t bl,
     /* [previous][next][first][last][top][bottom][index][help] */
 717                   uint64_t rh, uint64_t rl)
 718   {
 719     uint128 a = construct_128 (ah, al);
 720     uint128 b = construct_128 (bh, bl);
 721     uint128 r = subtract_128 (a, b);
 722     ti_test_iter++;
 723     if (r.h != rh || r.l != rl) {
 724       (void)fprintf (stderr, "FATAL (%u): subtract_128 (%016llx%016llx, %016llx%016llx) returned %016llx%016llx\n",
 725                      ti_test_iter,
 726                      (unsigned long long)ah, (unsigned long long)al,  (unsigned long long)bh,
 727                      (unsigned long long)bl, (unsigned long long)r.h, (unsigned long long)r.l);
 728       kd_div_errors++;
 729     }
 730   }
 731 
 732 static void tneg (uint64_t ah, uint64_t al,
     /* [previous][next][first][last][top][bottom][index][help] */
 733                   uint64_t rh, uint64_t rl)
 734   {
 735     uint128 a = construct_128 (ah, al);
 736     uint128 r = negate_128 (a);
 737     ti_test_iter++;
 738     if (r.h != rh || r.l != rl) {
 739       (void)fprintf (stderr, "FATAL (%u): negate_128 (%016llx%016llx) returned %016llx%016llx\n",
 740                      ti_test_iter,
 741                      (unsigned long long)ah, (unsigned long long)al, (unsigned long long)r.h,
 742                      (unsigned long long)r.l);
 743       kd_div_errors++;
 744     }
 745   }
 746 
 747 static void tgt (uint64_t ah, uint64_t al, uint64_t bh, uint64_t bl,
     /* [previous][next][first][last][top][bottom][index][help] */
 748                  bool expect)
 749   {
 750     uint128 a = construct_128 (ah, al);
 751     uint128 b = construct_128 (bh, bl);
 752     ti_test_iter++;
 753     bool r = isgt_128 (a, b);
 754     if (r != expect) {
 755       (void)fprintf (stderr, "FATAL (%u): gt_128 (%016llx%016llx, %016llx%016llx) returned %llu\n",
 756                      ti_test_iter,
 757                      (unsigned long long)ah, (unsigned long long)al, (unsigned long long)bh,
 758                      (unsigned long long)bl, (unsigned long long)r);
 759       kd_div_errors++;
 760     }
 761   }
 762 
 763 static void tls (uint64_t ah, uint64_t al, unsigned int n,
     /* [previous][next][first][last][top][bottom][index][help] */
 764                  uint64_t rh, uint64_t rl)
 765   {
 766     uint128 a = construct_128 (ah, al);
 767     uint128 r = lshift_128 (a, n);
 768     ti_test_iter++;
 769     if (r.h != rh || r.l != rl) {
 770       (void)fprintf (stderr, "FATAL (%u): lshift_128 (%016llx%016llx, %llu) returned %016llx%016llx\n",
 771                      ti_test_iter,
 772                      (unsigned long long)ah,  (unsigned long long)al, (unsigned long long)n,
 773                      (unsigned long long)r.h, (unsigned long long)r.l);
 774       kd_div_errors++;
 775     }
 776   }
 777 
 778 static void trs (uint64_t ah, uint64_t al, unsigned int n,
     /* [previous][next][first][last][top][bottom][index][help] */
 779                  uint64_t rh, uint64_t rl)
 780   {
 781     uint128 a = construct_128 (ah, al);
 782     uint128 r = rshift_128 (a, n);
 783     ti_test_iter++;
 784     if (r.h != rh || r.l != rl) {
 785       (void)fprintf (stderr, "FATAL (%u): rshift_128 (%016llx%016llx, %llu) returned %016llx%016llx\n",
 786                      ti_test_iter,
 787                      (unsigned long long)ah,  (unsigned long long)al, (unsigned long long)n,
 788                      (unsigned long long)r.h, (unsigned long long)r.l);
 789       kd_div_errors++;
 790     }
 791   }
 792 
 793 static void tmul (uint64_t ah, uint64_t al, uint64_t bh, uint64_t bl,
     /* [previous][next][first][last][top][bottom][index][help] */
 794                   uint64_t rh, uint64_t rl)
 795   {
 796     uint128 a = construct_128 (ah, al);
 797     uint128 b = construct_128 (bh, bl);
 798     uint128 r = multiply_128 (a, b);
 799     ti_test_iter++;
 800     if (r.h != rh || r.l != rl) {
 801       (void)fprintf (stderr, "FATAL (%u): multiply_128 (%016llx%016llx, %016llx%016llx) returned %016llx%016llx\n",
 802                      ti_test_iter,
 803                      (unsigned long long)ah, (unsigned long long)al,  (unsigned long long)bh,
 804                      (unsigned long long)bl, (unsigned long long)r.h, (unsigned long long)r.l);
 805       kd_div_errors++;
 806     }
 807   }
 808 
 809 static void tsmul (int64_t ah, uint64_t al, int64_t bh, uint64_t bl,
     /* [previous][next][first][last][top][bottom][index][help] */
 810                    int64_t rh, uint64_t rl)
 811   {
 812     int128 a = construct_s128 (ah, al);
 813     int128 b = construct_s128 (bh, bl);
 814     int128 r = multiply_s128 (a, b);
 815     ti_test_iter++;
 816     if (r.h != rh || r.l != rl) {
 817       (void)fprintf (stderr, "FATAL (%u): multiply_s128 (%016llx%016llx, %016llx%016llx) returned %016llx%016llx\n",
 818                      ti_test_iter,
 819                      (unsigned long long)ah, (unsigned long long)al,  (unsigned long long)bh,
 820                      (unsigned long long)bl, (unsigned long long)r.h, (unsigned long long)r.l);
 821       kd_div_errors++;
 822     }
 823   }
 824 
 825 static void tdiv16 (uint64_t ah, uint64_t al, uint16_t b,
     /* [previous][next][first][last][top][bottom][index][help] */
 826                     uint64_t resh, uint64_t resl,
 827                     uint16_t remainder)
 828   {
 829     uint128 a = construct_128 (ah, al);
 830     uint16_t rem;
 831     uint128 res = divide_128_16 (a, b, & rem);
 832     ti_test_iter++;
 833     if (res.h != resh || res.l != resl || rem != remainder) {
 834       (void)fprintf (stderr, "FATAL (%u): divide_128_16 (%016llx%016llx, %04x) returned %016llx%016llx, %04x\n",
 835                      ti_test_iter,
 836                      (unsigned long long)ah,    (unsigned long long)al, b,
 837                      (unsigned long long)res.h, (unsigned long long)res.l, rem);
 838       kd_div_errors++;
 839     }
 840   }
 841 
 842 static void tdiv32 (uint64_t ah, uint64_t al, uint32_t b,
     /* [previous][next][first][last][top][bottom][index][help] */
 843                     uint64_t resh, uint64_t resl,
 844                     uint32_t remainder)
 845   {
 846     uint128 a = construct_128 (ah, al);
 847     uint32_t rem;
 848     uint128 res = divide_128_32 (a, b, & rem);
 849     ti_test_iter++;
 850     if (res.h != resh || res.l != resl || rem != remainder) {
 851       (void)fprintf (stderr, "FATAL (%u): divide_128_32 (%016llx%016llx, %08x) returned %016llx%016llx, %08x\n",
 852                      ti_test_iter,
 853                      (unsigned long long)ah,    (unsigned long long)al, b,
 854                      (unsigned long long)res.h, (unsigned long long)res.l, rem);
 855       kd_div_errors++;
 856     }
 857   }
 858 
 859 int test_math128 (void)
     /* [previous][next][first][last][top][bottom][index][help] */
 860   {
 861     tisz   (0,      0,      true);
 862     tisz   (1,      0,      false);
 863     tisz   (0,      1,      false);
 864     tisz   (1,      1,      false);
 865     tisz   (SIGN64, 0,      false);
 866     tisz   (0,      SIGN64, false);
 867     tisz   (SIGN64, SIGN64, false);
 868     tneg   (0,      0,      0,      0);
 869     tneg   (MASK64, MASK64, 0,      1);
 870     tcomp  (MASK64, MASK64, 0,      0);
 871     tneg   (0,      1,      MASK64, MASK64);
 872     tcomp  (0,      0,      MASK64, MASK64);
 873     tcomp  (0,      1,      MASK64, MASK64-1);
 874     tgt    (0,      0,      0,      0,          false);
 875     tgt    (0,      0,      0,      1,          false);
 876     tgt    (0,      1,      0,      0,          true);
 877     tgt    (MASK64, MASK64, MASK64, MASK64,     false);
 878     tgt    (0,      0,      MASK64, MASK64,     false);
 879     tgt    (MASK64, MASK64, 0,      0,          true);
 880 #  if !defined(__PGI) && !defined(__PGLLVM__) && !defined(__NVCOMPILER) && !defined(__NVCOMPILER_LLVM__)
 881     tls    (0,      0,      0,      0,          0);
 882     tls    (MASK64, MASK64, 0,      MASK64,     MASK64);
 883 #  endif /* if !defined(__PGI) && !defined(__PGLLVM__) && !defined(__NVCOMPILER) && !defined(__NVCOMPILER_LLVM__) */
 884     tls    (0,      1,      127,    SIGN64,     0);
 885     tls    (0,      MASK64, 64,     MASK64,     0);
 886     tls    (0,      MASK64, 1,      1,          MASK64-1);
 887     tls    (0,      1,      64,     1,          0);
 888     tls    (0,      1,      63,     0,          SIGN64);
 889     tls    (1,      0,      63,     SIGN64,     0);
 890     trs    (0,      0,      0,      0,          0);
 891     trs    (SIGN64, 0,      127,    MASK64,     MASK64);
 892     trs    (MASK64, 0,      64,     MASK64,     MASK64);
 893     trs    (MASK64, 0,      1,      MASK64,     SIGN64);
 894     trs    (1,      0,      64,     0,          1);
 895     trs    (1,      0,      1,      0,          SIGN64);
 896     trs    (MASK64, MASK64, 0,      MASK64,     MASK64);
 897     trs    (SIGN64, 0,      63,     MASK64,     0);
 898     trs    (SIGN64, 0,      64,     MASK64,     SIGN64);
 899     tand   (0,      0,      0,      0,          0,       0);
 900     tand   (MASK64, MASK64, 0,      0,          0,       0);
 901     tand   (0,      0,      MASK64, MASK64,     0,       0);
 902     tand   (MASK64, MASK64, MASK64, MASK64,     MASK64,  MASK64);
 903     tor    (0,      0,      0,      0,          0,       0);
 904     tor    (MASK64, MASK64, 0,      0,          MASK64,  MASK64);
 905     tor    (0,      0,      MASK64, MASK64,     MASK64,  MASK64);
 906     tor    (MASK64, MASK64, MASK64, MASK64,     MASK64,  MASK64);
 907     tadd   (0,      0,      0,      0,          0,       0);
 908     tadd   (0,      0,      0,      1,          0,       1);
 909     tadd   (0,      1,      0,      0,          0,       1);
 910     tadd   (0,      1,      0,      1,          0,       2);
 911     tadd   (0,      1,      0,      MASK64,     1,       0);
 912     tadd   (0,      1,      MASK64, MASK64,     0,       0);
 913     tsub   (0,      0,      0,      0,          0,       0);
 914     tsub   (0,      1,      0,      1,          0,       0);
 915     tsub   (MASK64, MASK64, MASK64, MASK64,     0,       0);
 916     tsub   (MASK64, MASK64, 0,      0,          MASK64,  MASK64);
 917     tsub   (0,      0,      0,      1,          MASK64,  MASK64);
 918     tmul   (0,      0,      0,      0,          0,       0);
 919     tmul   (MASK64, MASK64, 0,      0,          0,       0);
 920     tmul   (0,      0,      MASK64, MASK64,     0,       0);
 921     tmul   (0,      1,      0,      1,          0,       1);
 922     tmul   (0,      1,      0,      10,         0,       10);
 923     tmul   (0,      10,     0,      10,         0,       100);
 924     tmul   (0,      100,    0,      10,         0,       1000);
 925     tmul   (0,      MASK64, 0,      2,          1,       MASK64-1);
 926     tmul   (MASK64, MASK64, MASK64, MASK64,     0,       1);
 927     tdiv16 (MASK64,   MASK64, 16,   MASK64>>4,  MASK64,  15);
 928     tsmul  (0,      1,      MASK64, MASK64,     MASK64,  MASK64);
 929     tsmul  (MASK64, MASK64, MASK64, MASK64,     0,       1);
 930     tdiv32 (1,      0,      1<<16,  0,          1ll<<48, 0);
 931     tdiv16 (0,      1,      1,      0,          1,       0);
 932     tdiv16 (0,      10,     2,      0,          5,       0);
 933     tdiv16 (0,      3,      2,      0,          1,       1);
 934     tdiv32 (MASK64, MASK64, 1<<16,  MASK64>>16, MASK64,  0xffff);
 935     if (kd_div_errors)
 936       abort();
 937     return 0;
 938   }
 939 # else
 940 #  if defined(NEED_128)
 941 #   include "dps8_math128.h"
 942 
 943 void __udivmodti3(UTItype div, UTItype dvd,UTItype *result,UTItype *remain);
 944 UTItype __udivti3(UTItype div, UTItype dvd);
 945 UTItype __umodti3(UTItype div, UTItype dvd);
 946 
 947 UTItype __udivti3(UTItype div, UTItype dvd)
     /* [previous][next][first][last][top][bottom][index][help] */
 948 {
 949         UTItype result,remain;
 950 
 951         __udivmodti3(div,dvd,&result,&remain);
 952 
 953         return result;
 954 }
 955 
 956 void __udivmodti3(UTItype div, UTItype dvd,UTItype *result,UTItype *remain)
     /* [previous][next][first][last][top][bottom][index][help] */
 957 {
 958         UTItype z1 = dvd;
 959         UTItype z2 = (UTItype)1;
 960 
 961         *result = (UTItype)0;
 962         *remain = div;
 963 
 964         if ( z1 == (UTItype)0)
 965 #   if !defined (CPPCHECK)
 966           1/0;
 967 #   else
 968           abort();
 969 #   endif /* if !defined(CPPCHECK) */
 970 
 971         while ( z1 < *remain )
 972           {
 973             z1 <<= 1 ;
 974             z2 <<= 1;
 975           }
 976 
 977         do
 978           {
 979             if ( *remain >= z1 )
 980               {
 981                 *remain -= z1;
 982                 *result += z2;
 983               }
 984             z1 >>= 1;
 985             z2 >>= 1;
 986           } while ( z2 );
 987 }
 988 
 989 TItype __divti3(TItype div, TItype dvd)
     /* [previous][next][first][last][top][bottom][index][help] */
 990 {
 991         int sign=1;
 992 
 993         if (div < (TItype)0)
 994           {
 995             sign = -1;
 996             div = -div;
 997           }
 998 
 999         if (dvd < (TItype)0)
1000           {
1001             sign = -sign;
1002             dvd = -dvd;
1003           }
1004 
1005         if (sign > 0)
1006           return (TItype)__udivti3(div,dvd);
1007         else
1008           return -((TItype)__udivti3(div,dvd));
1009 }
1010 
1011 TItype __modti3(TItype div, TItype dvd)
     /* [previous][next][first][last][top][bottom][index][help] */
1012 {
1013         int sign=1;
1014 
1015         if (div < (TItype)0)
1016           {
1017             sign = -1;
1018             div = -div;
1019           }
1020 
1021         if (dvd < (TItype)0)
1022           {
1023             sign = -sign;
1024             dvd = -dvd;
1025           }
1026 
1027         if (sign > 0)
1028           return (TItype)__umodti3(div,dvd);
1029         else
1030           return ((TItype)0-(TItype)__umodti3(div,dvd));
1031 }
1032 
1033 UTItype __umodti3(UTItype div, UTItype dvd)
     /* [previous][next][first][last][top][bottom][index][help] */
1034 {
1035         UTItype result,remain;
1036 
1037         __udivmodti3(div,dvd,&result,&remain);
1038 
1039         return remain;
1040 }
1041 
1042 TItype __multi3 (TItype u, TItype v)
     /* [previous][next][first][last][top][bottom][index][help] */
1043 {
1044         TItype result = (TItype)0;
1045         int sign = 1;
1046 
1047         if (u<0)
1048           {
1049             sign = -1;
1050             u = -u;
1051           }
1052 
1053         while (u != (TItype)0)
1054           {
1055             if ( u&(TItype)1 )
1056               result += v;
1057             u>>=1;
1058             v<<=1;
1059           }
1060 
1061         if ( sign < 0 )
1062           return -result;
1063         else
1064           return result;
1065 }
1066 #  endif
1067 # endif
1068 #endif

/* [previous][next][first][last][top][bottom][index][help] */