diff src/SFMT/SFMT-alti.h @ 23:bf9169ad4222

add SMID-oriented fast mersenne twister
author Robert McIntyre <rlm@mit.edu>
date Sun, 04 Mar 2012 17:38:32 -0600
parents f9f4f1b99eed
children
line wrap: on
line diff
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/src/SFMT/SFMT-alti.h	Sun Mar 04 17:38:32 2012 -0600
     1.3 @@ -0,0 +1,156 @@
     1.4 +/** 
     1.5 + * @file SFMT-alti.h 
     1.6 + *
     1.7 + * @brief SIMD oriented Fast Mersenne Twister(SFMT)
     1.8 + * pseudorandom number generator
     1.9 + *
    1.10 + * @author Mutsuo Saito (Hiroshima University)
    1.11 + * @author Makoto Matsumoto (Hiroshima University)
    1.12 + *
    1.13 + * Copyright (C) 2007 Mutsuo Saito, Makoto Matsumoto and Hiroshima
    1.14 + * University. All rights reserved.
    1.15 + *
    1.16 + * The new BSD License is applied to this software.
    1.17 + * see LICENSE.txt
    1.18 + */
    1.19 +
    1.20 +#ifndef SFMT_ALTI_H
    1.21 +#define SFMT_ALTI_H
    1.22 +
    1.23 +inline static vector unsigned int vec_recursion(vector unsigned int a,
    1.24 +						vector unsigned int b,
    1.25 +						vector unsigned int c,
    1.26 +						vector unsigned int d)
    1.27 +    ALWAYSINLINE;
    1.28 +
    1.29 +/**
    1.30 + * This function represents the recursion formula in AltiVec and BIG ENDIAN.
    1.31 + * @param a a 128-bit part of the interal state array
    1.32 + * @param b a 128-bit part of the interal state array
    1.33 + * @param c a 128-bit part of the interal state array
    1.34 + * @param d a 128-bit part of the interal state array
    1.35 + * @return output
    1.36 + */
    1.37 +inline static vector unsigned int vec_recursion(vector unsigned int a,
    1.38 +						vector unsigned int b,
    1.39 +						vector unsigned int c,
    1.40 +						vector unsigned int d) {
    1.41 +
    1.42 +    const vector unsigned int sl1 = ALTI_SL1;
    1.43 +    const vector unsigned int sr1 = ALTI_SR1;
    1.44 +#ifdef ONLY64
    1.45 +    const vector unsigned int mask = ALTI_MSK64;
    1.46 +    const vector unsigned char perm_sl = ALTI_SL2_PERM64;
    1.47 +    const vector unsigned char perm_sr = ALTI_SR2_PERM64;
    1.48 +#else
    1.49 +    const vector unsigned int mask = ALTI_MSK;
    1.50 +    const vector unsigned char perm_sl = ALTI_SL2_PERM;
    1.51 +    const vector unsigned char perm_sr = ALTI_SR2_PERM;
    1.52 +#endif
    1.53 +    vector unsigned int v, w, x, y, z;
    1.54 +    x = vec_perm(a, (vector unsigned int)perm_sl, perm_sl);
    1.55 +    v = a;
    1.56 +    y = vec_sr(b, sr1);
    1.57 +    z = vec_perm(c, (vector unsigned int)perm_sr, perm_sr);
    1.58 +    w = vec_sl(d, sl1);
    1.59 +    z = vec_xor(z, w);
    1.60 +    y = vec_and(y, mask);
    1.61 +    v = vec_xor(v, x);
    1.62 +    z = vec_xor(z, y);
    1.63 +    z = vec_xor(z, v);
    1.64 +    return z;
    1.65 +}
    1.66 +
    1.67 +/**
    1.68 + * This function fills the internal state array with pseudorandom
    1.69 + * integers.
    1.70 + */
    1.71 +inline static void gen_rand_all(void) {
    1.72 +    int i;
    1.73 +    vector unsigned int r, r1, r2;
    1.74 +
    1.75 +    r1 = sfmt[N - 2].s;
    1.76 +    r2 = sfmt[N - 1].s;
    1.77 +    for (i = 0; i < N - POS1; i++) {
    1.78 +	r = vec_recursion(sfmt[i].s, sfmt[i + POS1].s, r1, r2);
    1.79 +	sfmt[i].s = r;
    1.80 +	r1 = r2;
    1.81 +	r2 = r;
    1.82 +    }
    1.83 +    for (; i < N; i++) {
    1.84 +	r = vec_recursion(sfmt[i].s, sfmt[i + POS1 - N].s, r1, r2);
    1.85 +	sfmt[i].s = r;
    1.86 +	r1 = r2;
    1.87 +	r2 = r;
    1.88 +    }
    1.89 +}
    1.90 +
    1.91 +/**
    1.92 + * This function fills the user-specified array with pseudorandom
    1.93 + * integers.
    1.94 + *
    1.95 + * @param array an 128-bit array to be filled by pseudorandom numbers.  
    1.96 + * @param size number of 128-bit pesudorandom numbers to be generated.
    1.97 + */
    1.98 +inline static void gen_rand_array(w128_t *array, int size) {
    1.99 +    int i, j;
   1.100 +    vector unsigned int r, r1, r2;
   1.101 +
   1.102 +    r1 = sfmt[N - 2].s;
   1.103 +    r2 = sfmt[N - 1].s;
   1.104 +    for (i = 0; i < N - POS1; i++) {
   1.105 +	r = vec_recursion(sfmt[i].s, sfmt[i + POS1].s, r1, r2);
   1.106 +	array[i].s = r;
   1.107 +	r1 = r2;
   1.108 +	r2 = r;
   1.109 +    }
   1.110 +    for (; i < N; i++) {
   1.111 +	r = vec_recursion(sfmt[i].s, array[i + POS1 - N].s, r1, r2);
   1.112 +	array[i].s = r;
   1.113 +	r1 = r2;
   1.114 +	r2 = r;
   1.115 +    }
   1.116 +    /* main loop */
   1.117 +    for (; i < size - N; i++) {
   1.118 +	r = vec_recursion(array[i - N].s, array[i + POS1 - N].s, r1, r2);
   1.119 +	array[i].s = r;
   1.120 +	r1 = r2;
   1.121 +	r2 = r;
   1.122 +    }
   1.123 +    for (j = 0; j < 2 * N - size; j++) {
   1.124 +	sfmt[j].s = array[j + size - N].s;
   1.125 +    }
   1.126 +    for (; i < size; i++) {
   1.127 +	r = vec_recursion(array[i - N].s, array[i + POS1 - N].s, r1, r2);
   1.128 +	array[i].s = r;
   1.129 +	sfmt[j++].s = r;
   1.130 +	r1 = r2;
   1.131 +	r2 = r;
   1.132 +    }
   1.133 +}
   1.134 +
   1.135 +#ifndef ONLY64
   1.136 +#if defined(__APPLE__)
   1.137 +#define ALTI_SWAP (vector unsigned char) \
   1.138 +	(4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11)
   1.139 +#else
   1.140 +#define ALTI_SWAP {4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11}
   1.141 +#endif
   1.142 +/**
   1.143 + * This function swaps high and low 32-bit of 64-bit integers in user
   1.144 + * specified array.
   1.145 + *
   1.146 + * @param array an 128-bit array to be swaped.
   1.147 + * @param size size of 128-bit array.
   1.148 + */
   1.149 +inline static void swap(w128_t *array, int size) {
   1.150 +    int i;
   1.151 +    const vector unsigned char perm = ALTI_SWAP;
   1.152 +
   1.153 +    for (i = 0; i < size; i++) {
   1.154 +	array[i].s = vec_perm(array[i].s, (vector unsigned int)perm, perm);
   1.155 +    }
   1.156 +}
   1.157 +#endif
   1.158 +
   1.159 +#endif