annotate src/SFMT/SFMT-alti.h @ 39:3e36553d0cbf

got some speedruns to work!
author Robert McIntyre <rlm@mit.edu>
date Mon, 05 Mar 2012 16:37:38 -0600
parents f9f4f1b99eed
children
rev   line source
rlm@1 1 /**
rlm@1 2 * @file SFMT-alti.h
rlm@1 3 *
rlm@1 4 * @brief SIMD oriented Fast Mersenne Twister(SFMT)
rlm@1 5 * pseudorandom number generator
rlm@1 6 *
rlm@1 7 * @author Mutsuo Saito (Hiroshima University)
rlm@1 8 * @author Makoto Matsumoto (Hiroshima University)
rlm@1 9 *
rlm@1 10 * Copyright (C) 2007 Mutsuo Saito, Makoto Matsumoto and Hiroshima
rlm@1 11 * University. All rights reserved.
rlm@1 12 *
rlm@1 13 * The new BSD License is applied to this software.
rlm@1 14 * see LICENSE.txt
rlm@1 15 */
rlm@1 16
rlm@1 17 #ifndef SFMT_ALTI_H
rlm@1 18 #define SFMT_ALTI_H
rlm@1 19
rlm@1 20 inline static vector unsigned int vec_recursion(vector unsigned int a,
rlm@1 21 vector unsigned int b,
rlm@1 22 vector unsigned int c,
rlm@1 23 vector unsigned int d)
rlm@1 24 ALWAYSINLINE;
rlm@1 25
rlm@1 26 /**
rlm@1 27 * This function represents the recursion formula in AltiVec and BIG ENDIAN.
rlm@1 28 * @param a a 128-bit part of the interal state array
rlm@1 29 * @param b a 128-bit part of the interal state array
rlm@1 30 * @param c a 128-bit part of the interal state array
rlm@1 31 * @param d a 128-bit part of the interal state array
rlm@1 32 * @return output
rlm@1 33 */
rlm@1 34 inline static vector unsigned int vec_recursion(vector unsigned int a,
rlm@1 35 vector unsigned int b,
rlm@1 36 vector unsigned int c,
rlm@1 37 vector unsigned int d) {
rlm@1 38
rlm@1 39 const vector unsigned int sl1 = ALTI_SL1;
rlm@1 40 const vector unsigned int sr1 = ALTI_SR1;
rlm@1 41 #ifdef ONLY64
rlm@1 42 const vector unsigned int mask = ALTI_MSK64;
rlm@1 43 const vector unsigned char perm_sl = ALTI_SL2_PERM64;
rlm@1 44 const vector unsigned char perm_sr = ALTI_SR2_PERM64;
rlm@1 45 #else
rlm@1 46 const vector unsigned int mask = ALTI_MSK;
rlm@1 47 const vector unsigned char perm_sl = ALTI_SL2_PERM;
rlm@1 48 const vector unsigned char perm_sr = ALTI_SR2_PERM;
rlm@1 49 #endif
rlm@1 50 vector unsigned int v, w, x, y, z;
rlm@1 51 x = vec_perm(a, (vector unsigned int)perm_sl, perm_sl);
rlm@1 52 v = a;
rlm@1 53 y = vec_sr(b, sr1);
rlm@1 54 z = vec_perm(c, (vector unsigned int)perm_sr, perm_sr);
rlm@1 55 w = vec_sl(d, sl1);
rlm@1 56 z = vec_xor(z, w);
rlm@1 57 y = vec_and(y, mask);
rlm@1 58 v = vec_xor(v, x);
rlm@1 59 z = vec_xor(z, y);
rlm@1 60 z = vec_xor(z, v);
rlm@1 61 return z;
rlm@1 62 }
rlm@1 63
rlm@1 64 /**
rlm@1 65 * This function fills the internal state array with pseudorandom
rlm@1 66 * integers.
rlm@1 67 */
rlm@1 68 inline static void gen_rand_all(void) {
rlm@1 69 int i;
rlm@1 70 vector unsigned int r, r1, r2;
rlm@1 71
rlm@1 72 r1 = sfmt[N - 2].s;
rlm@1 73 r2 = sfmt[N - 1].s;
rlm@1 74 for (i = 0; i < N - POS1; i++) {
rlm@1 75 r = vec_recursion(sfmt[i].s, sfmt[i + POS1].s, r1, r2);
rlm@1 76 sfmt[i].s = r;
rlm@1 77 r1 = r2;
rlm@1 78 r2 = r;
rlm@1 79 }
rlm@1 80 for (; i < N; i++) {
rlm@1 81 r = vec_recursion(sfmt[i].s, sfmt[i + POS1 - N].s, r1, r2);
rlm@1 82 sfmt[i].s = r;
rlm@1 83 r1 = r2;
rlm@1 84 r2 = r;
rlm@1 85 }
rlm@1 86 }
rlm@1 87
rlm@1 88 /**
rlm@1 89 * This function fills the user-specified array with pseudorandom
rlm@1 90 * integers.
rlm@1 91 *
rlm@1 92 * @param array an 128-bit array to be filled by pseudorandom numbers.
rlm@1 93 * @param size number of 128-bit pesudorandom numbers to be generated.
rlm@1 94 */
rlm@1 95 inline static void gen_rand_array(w128_t *array, int size) {
rlm@1 96 int i, j;
rlm@1 97 vector unsigned int r, r1, r2;
rlm@1 98
rlm@1 99 r1 = sfmt[N - 2].s;
rlm@1 100 r2 = sfmt[N - 1].s;
rlm@1 101 for (i = 0; i < N - POS1; i++) {
rlm@1 102 r = vec_recursion(sfmt[i].s, sfmt[i + POS1].s, r1, r2);
rlm@1 103 array[i].s = r;
rlm@1 104 r1 = r2;
rlm@1 105 r2 = r;
rlm@1 106 }
rlm@1 107 for (; i < N; i++) {
rlm@1 108 r = vec_recursion(sfmt[i].s, array[i + POS1 - N].s, r1, r2);
rlm@1 109 array[i].s = r;
rlm@1 110 r1 = r2;
rlm@1 111 r2 = r;
rlm@1 112 }
rlm@1 113 /* main loop */
rlm@1 114 for (; i < size - N; i++) {
rlm@1 115 r = vec_recursion(array[i - N].s, array[i + POS1 - N].s, r1, r2);
rlm@1 116 array[i].s = r;
rlm@1 117 r1 = r2;
rlm@1 118 r2 = r;
rlm@1 119 }
rlm@1 120 for (j = 0; j < 2 * N - size; j++) {
rlm@1 121 sfmt[j].s = array[j + size - N].s;
rlm@1 122 }
rlm@1 123 for (; i < size; i++) {
rlm@1 124 r = vec_recursion(array[i - N].s, array[i + POS1 - N].s, r1, r2);
rlm@1 125 array[i].s = r;
rlm@1 126 sfmt[j++].s = r;
rlm@1 127 r1 = r2;
rlm@1 128 r2 = r;
rlm@1 129 }
rlm@1 130 }
rlm@1 131
rlm@1 132 #ifndef ONLY64
rlm@1 133 #if defined(__APPLE__)
rlm@1 134 #define ALTI_SWAP (vector unsigned char) \
rlm@1 135 (4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11)
rlm@1 136 #else
rlm@1 137 #define ALTI_SWAP {4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11}
rlm@1 138 #endif
rlm@1 139 /**
rlm@1 140 * This function swaps high and low 32-bit of 64-bit integers in user
rlm@1 141 * specified array.
rlm@1 142 *
rlm@1 143 * @param array an 128-bit array to be swaped.
rlm@1 144 * @param size size of 128-bit array.
rlm@1 145 */
rlm@1 146 inline static void swap(w128_t *array, int size) {
rlm@1 147 int i;
rlm@1 148 const vector unsigned char perm = ALTI_SWAP;
rlm@1 149
rlm@1 150 for (i = 0; i < size; i++) {
rlm@1 151 array[i].s = vec_perm(array[i].s, (vector unsigned int)perm, perm);
rlm@1 152 }
rlm@1 153 }
rlm@1 154 #endif
rlm@1 155
rlm@1 156 #endif