root/src/dps8/dps8_math128.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. iszero_128
  2. isnonzero_128
  3. iseq_128
  4. isgt_128
  5. islt_128
  6. isge_128
  7. islt_s128
  8. isgt_s128
  9. and_128
  10. and_s128
  11. or_128
  12. xor_128
  13. complement_128
  14. add_128
  15. subtract_128
  16. negate_128
  17. negate_s128
  18. lshift_128
  19. lshift_s128
  20. rshift_128
  21. rshift_s128
  22. mulmn
  23. mulmns
  24. nlz
  25. divmnu
  26. multiply_128
  27. multiply_s128
  28. divide_128
  29. divide_128_16
  30. divide_128_32
  31. tisz
  32. tand
  33. tor
  34. tcomp
  35. tadd
  36. tsub
  37. tneg
  38. tgt
  39. tls
  40. trs
  41. tmul
  42. tsmul
  43. tdiv16
  44. tdiv32
  45. test_math128
  46. __udivti3
  47. __udivmodti3
  48. __divti3
  49. __modti3
  50. __umodti3
  51. __multi3

   1 /*
   2  * vim: filetype=c:tabstop=4:ai:expandtab
   3  * SPDX-License-Identifier: ICU
   4  * SPDX-License-Identifier: Multics
   5  * scspell-id: 928c0e86-f62e-11ec-ab5c-80ee73e9b8e7
   6  *
   7  * ---------------------------------------------------------------------------
   8  *
   9  * Copyright (c) 2016 Jean-Michel Merliot
  10  * Copyright (c) 2021-2023 The DPS8M Development Team
  11  *
  12  * All rights reserved.
  13  *
  14  * This software is made available under the terms of the ICU
  15  * License, version 1.8.1 or later.  For more details, see the
  16  * LICENSE.md file at the top-level directory of this distribution.
  17  *
  18  * ---------------------------------------------------------------------------
  19  *
  20  * This source file may contain code comments that adapt, include, and/or
  21  * incorporate Multics program code and/or documentation distributed under
  22  * the Multics License.  In the event of any discrepancy between code
  23  * comments herein and the original Multics materials, the original Multics
  24  * materials should be considered authoritative unless otherwise noted.
  25  * For more details and historical background, see the LICENSE.md file at
  26  * the top-level directory of this distribution.
  27  *
  28  * ---------------------------------------------------------------------------
  29  */
  30 
  31 #ifndef DPS8_MATH128
  32 # define DPS8_MATH128
  33 # include "dps8.h"
  34 # include "dps8_math128.h"
  35 
  36 # ifdef NEED_128
  37 
  38 bool iszero_128 (uint128 w)
     /* [previous][next][first][last][top][bottom][index][help] */
  39   {
  40     if (w.h || w.l)
  41       return false;
  42     return true;
  43   }
  44 
  45 bool isnonzero_128 (uint128 w)
     /* [previous][next][first][last][top][bottom][index][help] */
  46   {
  47     if (w.h || w.l)
  48       return true;
  49     return false;
  50   }
  51 
  52 bool iseq_128 (uint128 a, uint128 b)
     /* [previous][next][first][last][top][bottom][index][help] */
  53   {
  54     return a.h == b.h && a.l == b.l;
  55   }
  56 
  57 bool isgt_128 (uint128 a, uint128 b)
     /* [previous][next][first][last][top][bottom][index][help] */
  58   {
  59     if (a.h > b.h) return true;
  60     if (a.h < b.h) return false;
  61     if (a.l > b.l) return true;
  62     return false;
  63   }
  64 
  65 bool islt_128 (uint128 a, uint128 b)
     /* [previous][next][first][last][top][bottom][index][help] */
  66   {
  67     if (a.h < b.h) return true;
  68     if (a.h > b.h) return false;
  69     if (a.l < b.l) return true;
  70     return false;
  71   }
  72 
  73 bool isge_128 (uint128 a, uint128 b)
     /* [previous][next][first][last][top][bottom][index][help] */
  74   {
  75     if (a.h >  b.h) return true;
  76     if (a.h <  b.h) return false;
  77     if (a.l >= b.l) return true;
  78     return false;
  79   }
  80 
  81 bool islt_s128 (int128 a, int128 b)
     /* [previous][next][first][last][top][bottom][index][help] */
  82   {
  83     if (a.h < b.h) return true;
  84     if (a.h > b.h) return false;
  85     if (a.l < b.l) return true;
  86     return false;
  87   }
  88 
  89 bool isgt_s128 (int128 a, int128 b)
     /* [previous][next][first][last][top][bottom][index][help] */
  90   {
  91     if (a.h > b.h) return true;
  92     if (a.h < b.h) return false;
  93     if (a.l > b.l) return true;
  94     return false;
  95   }
  96 
  97 uint128 and_128 (uint128 a, uint128 b)
     /* [previous][next][first][last][top][bottom][index][help] */
  98   {
  99     return (uint128) {a.h & b.h, a.l & b.l};
 100   }
 101 
 102 int128 and_s128 (int128 a, uint128 b)
     /* [previous][next][first][last][top][bottom][index][help] */
 103   {
 104     return (int128) {a.h & (int64_t)b.h, a.l & b.l};
 105   }
 106 
 107 uint128 or_128 (uint128 a, uint128 b)
     /* [previous][next][first][last][top][bottom][index][help] */
 108   {
 109     return (uint128) {a.h | b.h, a.l | b.l};
 110   }
 111 
 112 uint128 xor_128 (uint128 a, uint128 b)
     /* [previous][next][first][last][top][bottom][index][help] */
 113   {
 114     return (uint128) {a.h ^ b.h, a.l ^ b.l};
 115   }
 116 
 117 uint128 complement_128 (uint128 a)
     /* [previous][next][first][last][top][bottom][index][help] */
 118   {
 119     return (uint128) {~ a.h, ~ a.l};
 120   }
 121 
 122 uint128 add_128 (uint128 a, uint128 b)
     /* [previous][next][first][last][top][bottom][index][help] */
 123   {
 124 // To do carry detection from low to high, bust the low into 1 bit/63
 125 // bit fields; add the 63 bit fields checking for carry in the "sign"
 126 // bit; add the 1 bit fields plus that carry
 127 
 128     uint64_t al63 = a.l & MASK63;  // low 63 bits of a
 129     uint64_t bl63 = b.l & MASK63;  // low 63 bits of b
 130     uint64_t l63 = al63 + bl63;    // low 63 bits of a + b, with carry into bit 64
 131     uint64_t c63 = l63 & SIGN64;   // the carry out of low 63 a + b
 132     l63 &= MASK63;                 // lose the carry bit
 133 
 134     unsigned int al64 = (a.l >> 63) & 1; // bit 64 of a
 135     unsigned int bl64 = (b.l >> 63) & 1; // bit 64 of b
 136     unsigned int cl64 = (c63 >> 63) & 1; // the carry out of bit 63
 137     uint64_t l64 = al64 + bl64 + cl64;   // bit 64 a + b + carry in
 138     unsigned int c64 = l64 > 1 ? 1 : 0;  // carry out of bit 64
 139     l64 = (l64 & 1) << 63;               // put bit 64 in the right place
 140     l64 |= l63;                          // put low 63 bits in
 141     uint64_t h64 = a.h + b.h + c64;      // compute the high
 142     return construct_128 (h64, l64);
 143   }
 144 
 145 uint128 subtract_128 (uint128 a, uint128 b)
     /* [previous][next][first][last][top][bottom][index][help] */
 146   {
 147 //printf ("sub a %016llx %016llx\n", a.h, a.l);
 148 //printf ("sub b %016llx %016llx\n", b.h, b.l);
 149     bool borrow = !! (b.l > a.l);
 150     uint128 res = construct_128 (a.h - b.h, a.l - b.l);
 151     if (borrow)
 152       res.h --;
 153 //printf ("sub res %016llx %016llx\n", res.h, res.l);
 154     return res;
 155   }
 156 
 157 uint128 negate_128 (uint128 a)
     /* [previous][next][first][last][top][bottom][index][help] */
 158   {
 159     return add_128 (complement_128 (a), construct_128 (0, 1));
 160   }
 161 
 162 int128 negate_s128 (int128 a)
     /* [previous][next][first][last][top][bottom][index][help] */
 163   {
 164     uint128 t = add_128 (complement_128 (cast_128 (a)), construct_128 (0, 1));
 165     return cast_s128 (t);
 166   }
 167 
 168 uint128 lshift_128 (uint128 a, unsigned int n)
     /* [previous][next][first][last][top][bottom][index][help] */
 169 #  if ( defined(__clang__) || defined(__clang_version__) ) && defined(__llvm__)
 170 #   ifdef NEED_128
 171 __attribute__ ((optnone))
 172 #   endif /* NEED_128 */
 173 #  endif /* if ( defined(__clang__) || defined(__clang_version__) ) && defined(__llvm__) */
 174   {
 175     if (n < 64)
 176       {
 177         uint64_t nmask = (uint64_t) ((~(MASK64 << n)));
 178 //printf ("nmask %016llx\r\n", nmask);
 179         // capture the high bits of the low half
 180         uint64_t keep = (a.l >> (64 - n)) & nmask;
 181 //printf ("keep %016llx\r\n", keep);
 182         // shift the low half
 183         uint64_t l = a.l << n;
 184 //printf ("l %016llx\r\n", l);
 185         // shift the high half
 186         uint64_t h = a.h << n;
 187 //printf ("h %016llx\r\n", h);
 188         // put the bits from the low into the high
 189         h |= keep;
 190 //printf ("h|keep %016llx\r\n", h);
 191         return construct_128 (h, l);
 192       }
 193     uint64_t h = a.l << (n - 64);
 194     return construct_128 (h, 0);
 195   }
 196 
 197 int128 lshift_s128 (int128 a, unsigned int n)
     /* [previous][next][first][last][top][bottom][index][help] */
 198   {
 199     uint128 t = lshift_128 (cast_128 (a), n);
 200     return cast_s128 (t);
 201   }
 202 
 203 uint128 rshift_128 (uint128 a, unsigned int n)
     /* [previous][next][first][last][top][bottom][index][help] */
 204   {
 205 
 206 
 207 
 208 
 209 
 210 
 211 
 212 
 213 
 214 
 215 
 216 
 217 
 218 
 219 
 220 
 221 
 222 
 223 
 224 
 225 
 226 
 227 
 228 
 229 
 230 
 231 
 232 
 233 
 234 
 235 
 236 
 237 
 238 
 239 
 240 
 241 
 242 
 243 
 244 
 245 
 246 
 247 
 248 
 249 
 250 
 251 
 252 
 253     uint64_t h = a.h;
 254     uint64_t l = a.l;
 255     uint64_t sign = a.h & SIGN64;
 256     while (n)
 257       {
 258         uint64_t b = (h & 1) ? SIGN64 : 0;
 259         h >>= 1;
 260         h |= sign;
 261         l >>= 1;
 262         l &= MASK63;
 263         l |= b;
 264         n --;
 265       }
 266     return construct_128 (h, l);
 267   }
 268 
 269 int128 rshift_s128 (int128 a, unsigned int n)
     /* [previous][next][first][last][top][bottom][index][help] */
 270   {
 271     uint128 t = rshift_128 (cast_128 (a), n);
 272     return cast_s128 (t);
 273   }
 274 
 275 // See: https://web.archive.org/web/20190219173849/http://www.hackersdelight.org/
 276 
 277 static void mulmn (uint32_t w[], uint32_t u[],
     /* [previous][next][first][last][top][bottom][index][help] */
 278                    uint32_t v[], int m, int n)
 279   {
 280 //for (int i = m - 1; i >= 0; i --) fprintf (stderr, "%08x", u [i]);
 281 //fprintf (stderr, "  ");
 282 //for (int i = n - 1; i >= 0; i --) fprintf (stderr, "%08x", v [i]);
 283 //fprintf (stderr, "\n");
 284     uint64_t k, t;
 285     int i, j;
 286 
 287     for (i = 0; i < m; i++)
 288        w[i] = 0;
 289 
 290     for (j = 0; j < n; j++)
 291       {
 292         k = 0;
 293         for (i = 0; i < m; i++)
 294           {
 295             t = (uint64_t) u[i] * (uint64_t) v[j] + (uint64_t) w[i + j] + k;
 296 //printf ("%d %d %016llx\n",i, j, t);
 297             w[i + j] = (uint32_t) t;        // (i.e., t & 0xFFFF).
 298             k = t >> 32;
 299           }
 300         w[j + m] = (uint32_t) k;
 301       }
 302   }
 303 
 304 static void mulmns (uint32_t w[], uint32_t u[],
     /* [previous][next][first][last][top][bottom][index][help] */
 305                     uint32_t v[], int m, int n)
 306   {
 307     mulmn (w, u, v, m, n);
 308 
 309     // Now w[] has the unsigned product. Correct by
 310     // subtracting v*2**16m if u < 0, and
 311     // subtracting u*2**16n if v < 0.
 312 
 313     if ((int32_t)u[m - 1] < 0)
 314       {
 315         int b = 0;  // Initialize borrow.
 316         for (int j = 0; j < n; j++)
 317           {
 318             int t = (int) w[j + m] - (int) v[j] - b;
 319             w[j + m] = (uint32_t) t;
 320             b = (t & 0x40000000) ? 1 : 0;
 321           }
 322       }
 323     if ((int32_t)v[n - 1] < 0)
 324       {
 325         int b = 0;
 326         for (int i = 0; i < m; i++)
 327           {
 328             int t = (int) w[i + n] - (int) u[i] - b;
 329             w[i + n] = (uint32_t) t;
 330             b = (t & 0x40000000) ? 1 : 0;
 331           }
 332       }
 333     return;
 334   }
 335 
 336 static int
 337 nlz (unsigned x)
     /* [previous][next][first][last][top][bottom][index][help] */
 338 {
 339   int n;
 340 
 341   if (x == 0)
 342     return (32);
 343 
 344   n = 0;
 345 
 346   if (x <= 0x0000FFFF)
 347     {
 348       n = n  + 16;
 349       x = x << 16;
 350     }
 351 
 352   if (x <= 0x00FFFFFF)
 353     {
 354       n = n  + 8;
 355       x = x << 8;
 356     }
 357 
 358   if (x <= 0x0FFFFFFF)
 359     {
 360       n = n  + 4;
 361       x = x << 4;
 362     }
 363 
 364   if (x <= 0x3FFFFFFF)
 365     {
 366       n = n  + 2;
 367       x = x << 2;
 368     }
 369 
 370   if (x <= 0x7FFFFFFF)
 371     {
 372       n = n + 1;
 373     }
 374 
 375   return n;
 376 }
 377 
 378 unsigned int kd_div_errors = 0;
 379 
 380 int divmnu (uint16_t q[], uint16_t r[],
     /* [previous][next][first][last][top][bottom][index][help] */
 381             const uint16_t u[], const uint16_t v[],
 382             int m, int n)
 383   {
 384 
 385     const uint32_t b = 65536;   // Number base (16 bits).
 386     //uint16_t *un, *vn;        // Normalized form of u, v.
 387     unsigned qhat;              // Estimated quotient digit.
 388     unsigned rhat;              // A remainder.
 389     unsigned p;                 // Product of two digits.
 390     int s, i, j, t, k;
 391 
 392     if (m < n || n <= 0 || v[n-1] == 0)
 393       return 1;                 // Return if invalid param.
 394 
 395     // Take care of the case of a single-digit span
 396     if (n == 1)
 397       {
 398         k = 0;
 399         for (j = m - 1; j >= 0; j--)
 400           {
 401             q[j] = (uint16_t) (((unsigned int) k*b + u[j])/v[0]);    // divisor here.
 402             k = (int) (((unsigned int) k*b + u[j]) - q[j]*v[0]);
 403           }
 404         if (r != NULL) r[0] = (uint16_t) k;
 405         return 0;
 406       }
 407 
 408     // Normalize by shifting v left just enough so that
 409     // its high-order bit is on, and shift u left the
 410     // same amount. We may have to append a high-order
 411     // digit on the dividend; we do that unconditionally.
 412 
 413     s = nlz (v[n-1]) - 16;      // 0 <= s <= 16.
 414     //vn = (uint16_t *) alloca (2*n);
 415     uint16_t vn [n];
 416     for (i = n - 1; i > 0; i--)
 417       vn[i] = (uint16_t) (v[i] << s) | (uint16_t) (v[i-1] >> (16-s));
 418     vn[0] = (uint16_t) (v[0] << s);
 419 
 420     //un = (uint16_t *)alloca(2*(m + 1));
 421     uint16_t un [m+1];
 422     un[m] = u[m-1] >> (16-s);
 423     for (i = m - 1; i > 0; i--)
 424       un[i] = (uint16_t) (u[i] << s) | (uint16_t) (u[i-1] >> (16-s));
 425     un[0] = (uint16_t) (u[0] << s);
 426     for (j = m - n; j >= 0; j--)
 427       {       // Main loop.
 428         // Compute estimate qhat of q[j].
 429         qhat = (un[j+n]*b + un[j+n-1])/vn[n-1];
 430         rhat = (un[j+n]*b + un[j+n-1])%vn[n-1];
 431 cmp_again:
 432         if (qhat >= b || (unsigned)qhat*(unsigned long long)vn[n-2] > b*rhat + un[j+n-2])
 433           {
 434             qhat = qhat - 1;
 435             rhat = rhat + vn[n-1];
 436             if (rhat < b) goto cmp_again;
 437           }
 438 
 439         // Multiply and subtract.
 440         k = 0;
 441         for (i = 0; i < n; i++)
 442           {
 443             p = (unsigned)qhat*(unsigned long long)vn[i];
 444             t = (int32_t) un[i+j] - k - (int32_t) (p & 0xFFFF);
 445             un[i+j] = (uint16_t) t;
 446             k = (int) (p >> 16) - (t >> 16);
 447           }
 448         t = un[j+n] - k;
 449         un[j+n] = (uint16_t) t;
 450 
 451         q[j] = (uint16_t) qhat;  // Store quotient digit.
 452         if (t < 0)
 453           {                      // If we subtracted too
 454             q[j] = q[j] - 1;     // much, add back.
 455             k = 0;
 456             for (i = 0; i < n; i++)
 457               {
 458                 t = un[i+j] + vn[i] + k;
 459                 un[i+j] = (uint16_t) t;
 460                 k = t >> 16;
 461                }
 462              un[j+n] = (uint16_t) (un[j+n] + k);
 463           }
 464       } // End j.
 465     // If the caller wants the remainder, denormalize
 466     // it and pass it back.
 467     if (r != NULL)
 468       {
 469         for (i = 0; i < n; i++)
 470           r[i] = (uint16_t) (un[i] >> s) | (uint16_t) (un[i+1] << (16-s));
 471       }
 472     return 0;
 473   }
 474 
 475 uint128 multiply_128 (uint128 a, uint128 b)
     /* [previous][next][first][last][top][bottom][index][help] */
 476   {
 477 //printf ("%016llx%016llx %016llx%016llx\n", a.h,a.l,b.h,b.l);
 478     const int l = 4;
 479     uint32_t w[l+l], u[l], v[l];
 480 
 481     u[3] = (uint32_t) (a.h >> 32);
 482     u[2] = (uint32_t)  a.h;
 483     u[1] = (uint32_t) (a.l >> 32);
 484     u[0] = (uint32_t)  a.l;
 485     v[3] = (uint32_t) (b.h >> 32);
 486     v[2] = (uint32_t)  b.h;
 487     v[1] = (uint32_t) (b.l >> 32);
 488     v[0] = (uint32_t)  b.l;
 489     mulmn (w, u, v, l, l);
 490     return construct_128 (
 491        (((uint64_t) w[3]) << 32) | w[2],
 492        (((uint64_t) w[1]) << 32) | w[0]);
 493   }
 494 
 495 int128 multiply_s128 (int128 a, int128 b)
     /* [previous][next][first][last][top][bottom][index][help] */
 496   {
 497 //printf ("%016llx%016llx %016llx%016llx\n", a.h,a.l,b.h,b.l);
 498     const int l = 4;
 499     uint32_t w[l+l], u[l], v[l];
 500 
 501     u[3] = (uint32_t) (a.h >> 32);
 502     u[2] = (uint32_t)  a.h;
 503     u[1] = (uint32_t) (a.l >> 32);
 504     u[0] = (uint32_t)  a.l;
 505     v[3] = (uint32_t) (b.h >> 32);
 506     v[2] = (uint32_t)  b.h;
 507     v[1] = (uint32_t) (b.l >> 32);
 508     v[0] = (uint32_t)  b.l;
 509     mulmns (w, u, v, l, l);
 510     return construct_s128 (
 511        (((int64_t)  w[3]) << 32) | w[2],
 512        (((uint64_t) w[1]) << 32) | w[0]);
 513   }
 514 
 515 // Note: divisor is < 2^16
 516 uint128 divide_128 (uint128 a, uint128 b, uint128 * remp)
     /* [previous][next][first][last][top][bottom][index][help] */
 517   {
 518     const int m = 8;
 519     const int n = 8;
 520     uint16_t q[m], u[m], v[n];
 521     u[0] = (uint16_t)  a.l;
 522     u[1] = (uint16_t) (a.l >> 16);
 523     u[2] = (uint16_t) (a.l >> 32);
 524     u[3] = (uint16_t) (a.l >> 48);
 525     u[4] = (uint16_t)  a.h;
 526     u[5] = (uint16_t) (a.h >> 16);
 527     u[6] = (uint16_t) (a.h >> 32);
 528     u[7] = (uint16_t) (a.h >> 48);
 529 
 530     v[0] = (uint16_t)  b.l;
 531     v[1] = (uint16_t) (b.l >> 16);
 532     v[2] = (uint16_t) (b.l >> 32);
 533     v[3] = (uint16_t) (b.l >> 48);
 534     v[4] = (uint16_t)  b.h;
 535     v[5] = (uint16_t) (b.h >> 16);
 536     v[6] = (uint16_t) (b.h >> 32);
 537     v[7] = (uint16_t) (b.h >> 48);
 538 
 539     q[0] = q[1] = q[2] = q[3] = q[4] = q[5] = q[6] = q[7] = 0;
 540 
 541     int normlen;
 542     for (normlen = 8; normlen > 0; normlen --)
 543       if (v [normlen - 1])
 544         break;
 545     uint16_t r [8] = { 8 * 0 };
 546     divmnu (q, remp ? r : NULL, u, v, m, normlen);
 547     if (remp)
 548       {
 549         * remp =  construct_128 (
 550        (((uint64_t) r [7]) << 48) |
 551        (((uint64_t) r [6]) << 32) |
 552        (((uint64_t) r [5]) << 16) |
 553        (((uint64_t) r [4]) <<  0),
 554        (((uint64_t) r [3]) << 48) |
 555        (((uint64_t) r [2]) << 32) |
 556        (((uint64_t) r [1]) << 16) |
 557        (((uint64_t) r [0]) <<  0));
 558       }
 559     return construct_128 (
 560        (((uint64_t) q [7]) << 48) |
 561        (((uint64_t) q [6]) << 32) |
 562        (((uint64_t) q [5]) << 16) |
 563        (((uint64_t) q [4]) <<  0),
 564        (((uint64_t) q [3]) << 48) |
 565        (((uint64_t) q [2]) << 32) |
 566        (((uint64_t) q [1]) << 16) |
 567        (((uint64_t) q [0]) <<  0));
 568   }
 569 
 570 // Note: divisor is < 2^16
 571 uint128 divide_128_16 (uint128 a, uint16_t b, uint16_t * remp)
     /* [previous][next][first][last][top][bottom][index][help] */
 572   {
 573     const int m = 8;
 574     const int n = 1;
 575     uint16_t q[m], u[m], v[n];
 576     u[0] = (uint16_t)  a.l;
 577     u[1] = (uint16_t) (a.l >> 16);
 578     u[2] = (uint16_t) (a.l >> 32);
 579     u[3] = (uint16_t) (a.l >> 48);
 580     u[4] = (uint16_t)  a.h;
 581     u[5] = (uint16_t) (a.h >> 16);
 582     u[6] = (uint16_t) (a.h >> 32);
 583     u[7] = (uint16_t) (a.h >> 48);
 584 
 585     v[0] = (uint16_t) b;
 586 
 587     q[0] = q[1] = q[2] = q[3] = q[4] = q[5] = q[6] = q[7] = 0;
 588 
 589     divmnu (q, remp, u, v, m, n);
 590     return construct_128 (
 591        (((uint64_t) q [7]) << 48) |
 592        (((uint64_t) q [6]) << 32) |
 593        (((uint64_t) q [5]) << 16) |
 594        (((uint64_t) q [4]) <<  0),
 595        (((uint64_t) q [3]) << 48) |
 596        (((uint64_t) q [2]) << 32) |
 597        (((uint64_t) q [1]) << 16) |
 598        (((uint64_t) q [0]) <<  0));
 599   }
 600 
 601 //divide_128_32 (
 602 //00000000000000010000000000000000, 00010000) returned
 603 //00000000000000000001000000000000, 00000000
 604 //divide_128_32 (ffffffffffffffffffffffffffffffff, 00010000) returned f771ffffffffffffffffffffffffffff, 0000ffff
 605 
 606 // Note: divisor is < 2^32 and >= 2^16; i.e. high 16 bits *must* be non-zero
 607 uint128 divide_128_32 (uint128 a, uint32_t b, uint32_t * remp)
     /* [previous][next][first][last][top][bottom][index][help] */
 608   {
 609     const int m = 8;
 610     const int n = 2;
 611     uint16_t q[m], u[m], v[n], r[2];
 612     u[0] = (uint16_t)  a.l;
 613     u[1] = (uint16_t) (a.l >> 16);
 614     u[2] = (uint16_t) (a.l >> 32);
 615     u[3] = (uint16_t) (a.l >> 48);
 616     u[4] = (uint16_t)  a.h;
 617     u[5] = (uint16_t) (a.h >> 16);
 618     u[6] = (uint16_t) (a.h >> 32);
 619     u[7] = (uint16_t) (a.h >> 48);
 620 
 621     v[0] = (uint16_t) b;
 622     v[1] = (uint16_t) (b >> 16);
 623 
 624     q[0] = q[1] = q[2] = q[3] = q[4] = q[5] = q[6] = q[7] = 0;
 625 
 626     divmnu (q, remp ? r : NULL, u, v, m, n);
 627     if (remp)
 628       * remp = r [0] | (uint32_t) r[1] << 16;
 629 
 630     return construct_128 (
 631        (((uint64_t) q [7]) << 48) |
 632        (((uint64_t) q [6]) << 32) |
 633        (((uint64_t) q [5]) << 16) |
 634        (((uint64_t) q [4]) <<  0),
 635        (((uint64_t) q [3]) << 48) |
 636        (((uint64_t) q [2]) << 32) |
 637        (((uint64_t) q [1]) << 16) |
 638        (((uint64_t) q [0]) <<  0));
 639   }
 640 
 641 unsigned ti_test_iter = 0;
 642 
 643 static void tisz (uint64_t h, uint64_t l, bool expect)
     /* [previous][next][first][last][top][bottom][index][help] */
 644   {
 645     bool r = iszero_128 (construct_128 (h, l));
 646     ti_test_iter++;
 647     if (r != expect) {
 648       fprintf (stderr, "FATAL (%u): iszero_128 (%llu, %llu) returned %lu\n",
 649                ti_test_iter,
 650                (unsigned long long)h, (unsigned long long)l, (unsigned long)r);
 651       kd_div_errors++;
 652     }
 653   }
 654 
 655 static void tand (uint64_t ah, uint64_t al, uint64_t bh, uint64_t bl,
     /* [previous][next][first][last][top][bottom][index][help] */
 656                   uint64_t rh, uint64_t rl)
 657   {
 658     uint128 a = construct_128 (ah, al);
 659     uint128 b = construct_128 (bh, bl);
 660     uint128 r = and_128 (a, b);
 661     ti_test_iter++;
 662     if (r.h != rh || r.l != rl) {
 663       fprintf (stderr, "FATAL (%u): and_128 (%016llx%016llx, %016llx%016llx) returned %016llx%016llx\n",
 664                ti_test_iter,
 665                (unsigned long long)ah, (unsigned long long)al,  (unsigned long long)bh,
 666                (unsigned long long)bl, (unsigned long long)r.h, (unsigned long long)r.l);
 667       kd_div_errors++;
 668     }
 669   }
 670 
 671 static void tor (uint64_t ah, uint64_t al, uint64_t bh, uint64_t bl,
     /* [previous][next][first][last][top][bottom][index][help] */
 672                  uint64_t rh, uint64_t rl)
 673   {
 674     uint128 a = construct_128 (ah, al);
 675     uint128 b = construct_128 (bh, bl);
 676     uint128 r = or_128 (a, b);
 677     ti_test_iter++;
 678     if (r.h != rh || r.l != rl) {
 679       fprintf (stderr, "FATAL (%u): or_128 (%016llx%016llx, %016llx%016llx) returned %016llx%016llx\n",
 680                ti_test_iter,
 681                (unsigned long long)ah, (unsigned long long)al, (unsigned long long)bh,
 682                (unsigned long long)bl, (unsigned long long)r.h, (unsigned long long)r.l);
 683       kd_div_errors++;
 684     }
 685   }
 686 
 687 static void tcomp (uint64_t ah, uint64_t al,
     /* [previous][next][first][last][top][bottom][index][help] */
 688                    uint64_t rh, uint64_t rl)
 689   {
 690     uint128 a = construct_128 (ah, al);
 691     uint128 r = complement_128 (a);
 692     ti_test_iter++;
 693     if (r.h != rh || r.l != rl) {
 694       fprintf (stderr, "FATAL (%u): complement_128 (%016llx%016llx) returned %016llx%016llx\n",
 695                ti_test_iter,
 696                (unsigned long long)ah, (unsigned long long)al, (unsigned long long)r.h,
 697                (unsigned long long)r.l);
 698       kd_div_errors++;
 699     }
 700   }
 701 
 702 static void tadd (uint64_t ah, uint64_t al, uint64_t bh, uint64_t bl,
     /* [previous][next][first][last][top][bottom][index][help] */
 703                   uint64_t rh, uint64_t rl)
 704   {
 705     uint128 a = construct_128 (ah, al);
 706     uint128 b = construct_128 (bh, bl);
 707     uint128 r = add_128 (a, b);
 708     ti_test_iter++;
 709     if (r.h != rh || r.l != rl) {
 710       fprintf (stderr, "FATAL %u): add_128 (%016llx%016llx, %016llx%016llx) returned %016llx%016llx\n",
 711                ti_test_iter,
 712                (unsigned long long)ah, (unsigned long long)al,  (unsigned long long)bh,
 713                (unsigned long long)bl, (unsigned long long)r.h, (unsigned long long)r.l);
 714       kd_div_errors++;
 715     }
 716   }
 717 
 718 static void tsub (uint64_t ah, uint64_t al, uint64_t bh, uint64_t bl,
     /* [previous][next][first][last][top][bottom][index][help] */
 719                   uint64_t rh, uint64_t rl)
 720   {
 721     uint128 a = construct_128 (ah, al);
 722     uint128 b = construct_128 (bh, bl);
 723     uint128 r = subtract_128 (a, b);
 724     ti_test_iter++;
 725     if (r.h != rh || r.l != rl) {
 726       fprintf (stderr, "FATAL (%u): subtract_128 (%016llx%016llx, %016llx%016llx) returned %016llx%016llx\n",
 727                ti_test_iter,
 728                (unsigned long long)ah, (unsigned long long)al,  (unsigned long long)bh,
 729                (unsigned long long)bl, (unsigned long long)r.h, (unsigned long long)r.l);
 730       kd_div_errors++;
 731     }
 732   }
 733 
 734 static void tneg (uint64_t ah, uint64_t al,
     /* [previous][next][first][last][top][bottom][index][help] */
 735                   uint64_t rh, uint64_t rl)
 736   {
 737     uint128 a = construct_128 (ah, al);
 738     uint128 r = negate_128 (a);
 739     ti_test_iter++;
 740     if (r.h != rh || r.l != rl) {
 741       fprintf (stderr, "FATAL (%u): negate_128 (%016llx%016llx) returned %016llx%016llx\n",
 742                ti_test_iter,
 743                (unsigned long long)ah, (unsigned long long)al, (unsigned long long)r.h,
 744                (unsigned long long)r.l);
 745       kd_div_errors++;
 746     }
 747   }
 748 
 749 static void tgt (uint64_t ah, uint64_t al, uint64_t bh, uint64_t bl,
     /* [previous][next][first][last][top][bottom][index][help] */
 750                  bool expect)
 751   {
 752     uint128 a = construct_128 (ah, al);
 753     uint128 b = construct_128 (bh, bl);
 754     ti_test_iter++;
 755     bool r = isgt_128 (a, b);
 756     if (r != expect) {
 757       fprintf (stderr, "FATAL (%u): gt_128 (%016llx%016llx, %016llx%016llx) returned %llu\n",
 758                ti_test_iter,
 759                (unsigned long long)ah, (unsigned long long)al, (unsigned long long)bh,
 760                (unsigned long long)bl, (unsigned long long)r);
 761       kd_div_errors++;
 762     }
 763   }
 764 
 765 static void tls (uint64_t ah, uint64_t al, unsigned int n,
     /* [previous][next][first][last][top][bottom][index][help] */
 766                  uint64_t rh, uint64_t rl)
 767   {
 768     uint128 a = construct_128 (ah, al);
 769     uint128 r = lshift_128 (a, n);
 770     ti_test_iter++;
 771     if (r.h != rh || r.l != rl) {
 772       fprintf (stderr, "FATAL (%u): lshift_128 (%016llx%016llx, %llu) returned %016llx%016llx\n",
 773                ti_test_iter,
 774                (unsigned long long)ah,  (unsigned long long)al, (unsigned long long)n,
 775                (unsigned long long)r.h, (unsigned long long)r.l);
 776       kd_div_errors++;
 777     }
 778   }
 779 
 780 static void trs (uint64_t ah, uint64_t al, unsigned int n,
     /* [previous][next][first][last][top][bottom][index][help] */
 781                  uint64_t rh, uint64_t rl)
 782   {
 783     uint128 a = construct_128 (ah, al);
 784     uint128 r = rshift_128 (a, n);
 785     ti_test_iter++;
 786     if (r.h != rh || r.l != rl) {
 787       fprintf (stderr, "FATAL (%u): rshift_128 (%016llx%016llx, %llu) returned %016llx%016llx\n",
 788                ti_test_iter,
 789                (unsigned long long)ah,  (unsigned long long)al, (unsigned long long)n,
 790                (unsigned long long)r.h, (unsigned long long)r.l);
 791       kd_div_errors++;
 792     }
 793   }
 794 
 795 static void tmul (uint64_t ah, uint64_t al, uint64_t bh, uint64_t bl,
     /* [previous][next][first][last][top][bottom][index][help] */
 796                   uint64_t rh, uint64_t rl)
 797   {
 798     uint128 a = construct_128 (ah, al);
 799     uint128 b = construct_128 (bh, bl);
 800     uint128 r = multiply_128 (a, b);
 801     ti_test_iter++;
 802     if (r.h != rh || r.l != rl) {
 803       fprintf (stderr, "FATAL (%u): multiply_128 (%016llx%016llx, %016llx%016llx) returned %016llx%016llx\n",
 804                ti_test_iter,
 805                (unsigned long long)ah, (unsigned long long)al,  (unsigned long long)bh,
 806                (unsigned long long)bl, (unsigned long long)r.h, (unsigned long long)r.l);
 807       kd_div_errors++;
 808     }
 809   }
 810 
 811 static void tsmul (int64_t ah, uint64_t al, int64_t bh, uint64_t bl,
     /* [previous][next][first][last][top][bottom][index][help] */
 812                    int64_t rh, uint64_t rl)
 813   {
 814     int128 a = construct_s128 (ah, al);
 815     int128 b = construct_s128 (bh, bl);
 816     int128 r = multiply_s128 (a, b);
 817     ti_test_iter++;
 818     if (r.h != rh || r.l != rl) {
 819       fprintf (stderr, "FATAL (%u): multiply_s128 (%016llx%016llx, %016llx%016llx) returned %016llx%016llx\n",
 820                ti_test_iter,
 821                (unsigned long long)ah, (unsigned long long)al,  (unsigned long long)bh,
 822                (unsigned long long)bl, (unsigned long long)r.h, (unsigned long long)r.l);
 823       kd_div_errors++;
 824     }
 825   }
 826 
 827 static void tdiv16 (uint64_t ah, uint64_t al, uint16_t b,
     /* [previous][next][first][last][top][bottom][index][help] */
 828                     uint64_t resh, uint64_t resl,
 829                     uint16_t remainder)
 830   {
 831     uint128 a = construct_128 (ah, al);
 832     uint16_t rem;
 833     uint128 res = divide_128_16 (a, b, & rem);
 834     ti_test_iter++;
 835     if (res.h != resh || res.l != resl || rem != remainder) {
 836       fprintf (stderr, "FATAL (%u): divide_128_16 (%016llx%016llx, %04x) returned %016llx%016llx, %04x\n",
 837                ti_test_iter,
 838                (unsigned long long)ah,    (unsigned long long)al, b,
 839                (unsigned long long)res.h, (unsigned long long)res.l, rem);
 840       kd_div_errors++;
 841     }
 842   }
 843 
 844 static void tdiv32 (uint64_t ah, uint64_t al, uint32_t b,
     /* [previous][next][first][last][top][bottom][index][help] */
 845                     uint64_t resh, uint64_t resl,
 846                     uint32_t remainder)
 847   {
 848     uint128 a = construct_128 (ah, al);
 849     uint32_t rem;
 850     uint128 res = divide_128_32 (a, b, & rem);
 851     ti_test_iter++;
 852     if (res.h != resh || res.l != resl || rem != remainder) {
 853       fprintf (stderr, "FATAL (%u): divide_128_32 (%016llx%016llx, %08x) returned %016llx%016llx, %08x\n",
 854                ti_test_iter,
 855                (unsigned long long)ah,    (unsigned long long)al, b,
 856                (unsigned long long)res.h, (unsigned long long)res.l, rem);
 857       kd_div_errors++;
 858     }
 859   }
 860 
 861 int test_math128 (void)
     /* [previous][next][first][last][top][bottom][index][help] */
 862   {
 863     tisz   (0,      0,      true);
 864     tisz   (1,      0,      false);
 865     tisz   (0,      1,      false);
 866     tisz   (1,      1,      false);
 867     tisz   (SIGN64, 0,      false);
 868     tisz   (0,      SIGN64, false);
 869     tisz   (SIGN64, SIGN64, false);
 870     tneg   (0,      0,      0,      0);
 871     tneg   (MASK64, MASK64, 0,      1);
 872     tcomp  (MASK64, MASK64, 0,      0);
 873     tneg   (0,      1,      MASK64, MASK64);
 874     tcomp  (0,      0,      MASK64, MASK64);
 875     tcomp  (0,      1,      MASK64, MASK64-1);
 876     tgt    (0,      0,      0,      0,          false);
 877     tgt    (0,      0,      0,      1,          false);
 878     tgt    (0,      1,      0,      0,          true);
 879     tgt    (MASK64, MASK64, MASK64, MASK64,     false);
 880     tgt    (0,      0,      MASK64, MASK64,     false);
 881     tgt    (MASK64, MASK64, 0,      0,          true);
 882 #  ifndef __PGI
 883 #   ifndef __PGLLVM__
 884 #    ifndef __NVCOMPILER
 885 #     ifndef __NVCOMPILER_LLVM__
 886     tls    (0,      0,      0,      0,          0);
 887     tls    (MASK64, MASK64, 0,      MASK64,     MASK64);
 888 #     endif /* ifndef __PGI */
 889 #    endif /* ifndef __PGLLVM__ */
 890 #   endif /* ifndef __NVCOMPILER */
 891 #  endif /* ifndef __NVCOMPILER_LLVM__ */
 892     tls    (0,      1,      127,    SIGN64,     0);
 893     tls    (0,      MASK64, 64,     MASK64,     0);
 894     tls    (0,      MASK64, 1,      1,          MASK64-1);
 895     tls    (0,      1,      64,     1,          0);
 896     tls    (0,      1,      63,     0,          SIGN64);
 897     tls    (1,      0,      63,     SIGN64,     0);
 898     trs    (0,      0,      0,      0,          0);
 899     trs    (SIGN64, 0,      127,    MASK64,     MASK64);
 900     trs    (MASK64, 0,      64,     MASK64,     MASK64);
 901     trs    (MASK64, 0,      1,      MASK64,     SIGN64);
 902     trs    (1,      0,      64,     0,          1);
 903     trs    (1,      0,      1,      0,          SIGN64);
 904     trs    (MASK64, MASK64, 0,      MASK64,     MASK64);
 905     trs    (SIGN64, 0,      63,     MASK64,     0);
 906     trs    (SIGN64, 0,      64,     MASK64,     SIGN64);
 907     tand   (0,      0,      0,      0,          0,       0);
 908     tand   (MASK64, MASK64, 0,      0,          0,       0);
 909     tand   (0,      0,      MASK64, MASK64,     0,       0);
 910     tand   (MASK64, MASK64, MASK64, MASK64,     MASK64,  MASK64);
 911     tor    (0,      0,      0,      0,          0,       0);
 912     tor    (MASK64, MASK64, 0,      0,          MASK64,  MASK64);
 913     tor    (0,      0,      MASK64, MASK64,     MASK64,  MASK64);
 914     tor    (MASK64, MASK64, MASK64, MASK64,     MASK64,  MASK64);
 915     tadd   (0,      0,      0,      0,          0,       0);
 916     tadd   (0,      0,      0,      1,          0,       1);
 917     tadd   (0,      1,      0,      0,          0,       1);
 918     tadd   (0,      1,      0,      1,          0,       2);
 919     tadd   (0,      1,      0,      MASK64,     1,       0);
 920     tadd   (0,      1,      MASK64, MASK64,     0,       0);
 921     tsub   (0,      0,      0,      0,          0,       0);
 922     tsub   (0,      1,      0,      1,          0,       0);
 923     tsub   (MASK64, MASK64, MASK64, MASK64,     0,       0);
 924     tsub   (MASK64, MASK64, 0,      0,          MASK64,  MASK64);
 925     tsub   (0,      0,      0,      1,          MASK64,  MASK64);
 926     tmul   (0,      0,      0,      0,          0,       0);
 927     tmul   (MASK64, MASK64, 0,      0,          0,       0);
 928     tmul   (0,      0,      MASK64, MASK64,     0,       0);
 929     tmul   (0,      1,      0,      1,          0,       1);
 930     tmul   (0,      1,      0,      10,         0,       10);
 931     tmul   (0,      10,     0,      10,         0,       100);
 932     tmul   (0,      100,    0,      10,         0,       1000);
 933     tmul   (0,      MASK64, 0,      2,          1,       MASK64-1);
 934     tmul   (MASK64, MASK64, MASK64, MASK64,     0,       1);
 935     tdiv16 (MASK64,   MASK64, 16,   MASK64>>4,  MASK64,  15);
 936     tsmul  (0,      1,      MASK64, MASK64,     MASK64,  MASK64);
 937     tsmul  (MASK64, MASK64, MASK64, MASK64,     0,       1);
 938     tdiv32 (1,      0,      1<<16,  0,          1ll<<48, 0);
 939     tdiv16 (0,      1,      1,      0,          1,       0);
 940     tdiv16 (0,      10,     2,      0,          5,       0);
 941     tdiv16 (0,      3,      2,      0,          1,       1);
 942     tdiv32 (MASK64, MASK64, 1<<16,  MASK64>>16, MASK64,  0xffff);
 943     if (kd_div_errors)
 944       abort();
 945     return 0;
 946   }
 947 # else
 948 #  ifdef NEED_128
 949 #   include "dps8_math128.h"
 950 
 951 void __udivmodti3(UTItype div, UTItype dvd,UTItype *result,UTItype *remain);
 952 UTItype __udivti3(UTItype div, UTItype dvd);
 953 UTItype __umodti3(UTItype div, UTItype dvd);
 954 
 955 UTItype __udivti3(UTItype div, UTItype dvd)
     /* [previous][next][first][last][top][bottom][index][help] */
 956 {
 957         UTItype result,remain;
 958 
 959         __udivmodti3(div,dvd,&result,&remain);
 960 
 961         return result;
 962 }
 963 
 964 void __udivmodti3(UTItype div, UTItype dvd,UTItype *result,UTItype *remain)
     /* [previous][next][first][last][top][bottom][index][help] */
 965 {
 966         UTItype z1 = dvd;
 967         UTItype z2 = (UTItype)1;
 968 
 969         *result = (UTItype)0;
 970         *remain = div;
 971 
 972         if ( z1 == (UTItype)0)
 973 #   ifndef CPPCHECK
 974           1/0;
 975 #   else
 976           abort();
 977 #   endif /* ifndef CPPCHECK */
 978 
 979         while ( z1 < *remain )
 980           {
 981             z1 <<= 1 ;
 982             z2 <<= 1;
 983           }
 984 
 985         do
 986           {
 987             if ( *remain >= z1 )
 988               {
 989                 *remain -= z1;
 990                 *result += z2;
 991               }
 992             z1 >>= 1;
 993             z2 >>= 1;
 994           } while ( z2 );
 995 }
 996 
 997 TItype __divti3(TItype div, TItype dvd)
     /* [previous][next][first][last][top][bottom][index][help] */
 998 {
 999         int sign=1;
1000 
1001         if (div < (TItype)0)
1002           {
1003             sign = -1;
1004             div = -div;
1005           }
1006 
1007         if (dvd < (TItype)0)
1008           {
1009             sign = -sign;
1010             dvd = -dvd;
1011           }
1012 
1013         if (sign > 0)
1014           return (TItype)__udivti3(div,dvd);
1015         else
1016           return -((TItype)__udivti3(div,dvd));
1017 }
1018 
1019 TItype __modti3(TItype div, TItype dvd)
     /* [previous][next][first][last][top][bottom][index][help] */
1020 {
1021         int sign=1;
1022 
1023         if (div < (TItype)0)
1024           {
1025             sign = -1;
1026             div = -div;
1027           }
1028 
1029         if (dvd < (TItype)0)
1030           {
1031             sign = -sign;
1032             dvd = -dvd;
1033           }
1034 
1035         if (sign > 0)
1036           return (TItype)__umodti3(div,dvd);
1037         else
1038           return ((TItype)0-(TItype)__umodti3(div,dvd));
1039 }
1040 
1041 UTItype __umodti3(UTItype div, UTItype dvd)
     /* [previous][next][first][last][top][bottom][index][help] */
1042 {
1043         UTItype result,remain;
1044 
1045         __udivmodti3(div,dvd,&result,&remain);
1046 
1047         return remain;
1048 }
1049 
1050 TItype __multi3 (TItype u, TItype v)
     /* [previous][next][first][last][top][bottom][index][help] */
1051 {
1052         TItype result = (TItype)0;
1053         int sign = 1;
1054 
1055         if (u<0)
1056           {
1057             sign = -1;
1058             u = -u;
1059           }
1060 
1061         while (u != (TItype)0)
1062           {
1063             if ( u&(TItype)1 )
1064               result += v;
1065             u>>=1;
1066             v<<=1;
1067           }
1068 
1069         if ( sign < 0 )
1070           return -result;
1071         else
1072           return result;
1073 }
1074 #  endif
1075 # endif
1076 #endif

/* [previous][next][first][last][top][bottom][index][help] */