Blob Blame History Raw
From 1130f8ea757e20910db51feff7ac8f7768eba119 Mon Sep 17 00:00:00 2001
From: "Ankur Sinha (Ankur Sinha Gmail)" <sanjay.ankur@gmail.com>
Date: Thu, 8 Nov 2018 18:37:57 +0000
Subject: [PATCH 1/4] Unbundle Random123

---
 configure.ac                              |   2 +-
 src/Makefile.am                           |   2 +-
 src/Random123/.clang-format               |   2 -
 src/Random123/Makefile.am                 |  10 -
 src/Random123/aes.h                       | 344 ---------
 src/Random123/array.h                     | 326 --------
 src/Random123/ars.h                       | 204 -----
 src/Random123/features/clangfeatures.h    |  80 --
 src/Random123/features/compilerfeatures.h | 324 --------
 src/Random123/features/crayfeatures.h     | 100 ---
 src/Random123/features/fujitsufeatures.h  |  86 ---
 src/Random123/features/gccfeatures.h      | 265 -------
 src/Random123/features/iccfeatures.h      | 212 ------
 src/Random123/features/llvmfeatures.h     |  39 -
 src/Random123/features/msvcfeatures.h     | 200 -----
 src/Random123/features/nvccfeatures.h     | 110 ---
 src/Random123/features/open64features.h   |  50 --
 src/Random123/features/openclfeatures.h   |  89 ---
 src/Random123/features/pgccfeatures.h     | 194 -----
 src/Random123/features/sse.h              | 280 -------
 src/Random123/features/sunprofeatures.h   | 172 -----
 src/Random123/features/xlcfeatures.h      | 210 ------
 src/Random123/philox.h                    | 486 ------------
 src/Random123/threefry.h                  | 864 ----------------------
 24 files changed, 2 insertions(+), 4649 deletions(-)
 delete mode 100644 src/Random123/.clang-format
 delete mode 100644 src/Random123/Makefile.am
 delete mode 100644 src/Random123/aes.h
 delete mode 100644 src/Random123/array.h
 delete mode 100644 src/Random123/ars.h
 delete mode 100644 src/Random123/features/clangfeatures.h
 delete mode 100644 src/Random123/features/compilerfeatures.h
 delete mode 100644 src/Random123/features/crayfeatures.h
 delete mode 100644 src/Random123/features/fujitsufeatures.h
 delete mode 100644 src/Random123/features/gccfeatures.h
 delete mode 100644 src/Random123/features/iccfeatures.h
 delete mode 100644 src/Random123/features/llvmfeatures.h
 delete mode 100644 src/Random123/features/msvcfeatures.h
 delete mode 100644 src/Random123/features/nvccfeatures.h
 delete mode 100644 src/Random123/features/open64features.h
 delete mode 100644 src/Random123/features/openclfeatures.h
 delete mode 100644 src/Random123/features/pgccfeatures.h
 delete mode 100644 src/Random123/features/sse.h
 delete mode 100644 src/Random123/features/sunprofeatures.h
 delete mode 100644 src/Random123/features/xlcfeatures.h
 delete mode 100644 src/Random123/philox.h
 delete mode 100644 src/Random123/threefry.h

diff --git a/configure.ac b/configure.ac
index 458dc892..7c0b1f46 100644
--- a/configure.ac
+++ b/configure.ac
@@ -657,7 +657,7 @@ AC_CONFIG_FILES([
 	src/sundials/Makefile src/sundials/shared/Makefile
 	src/sundials/cvodes/Makefile src/sundials/ida/Makefile
 	src/sparse/Makefile src/memacs/Makefile
-	src/readline/Makefile src/sparse13/Makefile src/Random123/Makefile
+	src/readline/Makefile src/sparse13/Makefile
 	src/oc/Makefile	src/scopmath/Makefile src/nrnoc/Makefile
 	src/gnu/Makefile src/uxnrnbbs/Makefile src/mesch/Makefile
 	src/uxnrnbbs/mos2nrn.h src/nrnmpi/Makefile
diff --git a/src/Makefile.am b/src/Makefile.am
index 4d90b62e..e790a4e8 100755
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -13,7 +13,7 @@ NMODL_SUBDIRS = modlunit nmodl
 else
 NMODL_SUBDIRS = modlunit nmodl
 endif
-NRNOC_SUBDIRS = sparse sparse13 scopmath Random123 nrnmpi oc nrnoc
+NRNOC_SUBDIRS = sparse sparse13 scopmath nrnmpi oc nrnoc
 NRNIV_SUBDIRS = sundials mesch gnu ivoc nrncvode parallel nrniv
 IVOS_SUBDIRS = ivos
 NRNJAVA_SUBDIRS = nrnjava
diff --git a/src/Random123/.clang-format b/src/Random123/.clang-format
deleted file mode 100644
index 9d159247..00000000
--- a/src/Random123/.clang-format
+++ /dev/null
@@ -1,2 +0,0 @@
-DisableFormat: true
-SortIncludes: false
diff --git a/src/Random123/Makefile.am b/src/Random123/Makefile.am
deleted file mode 100644
index 308cb2e1..00000000
--- a/src/Random123/Makefile.am
+++ /dev/null
@@ -1,10 +0,0 @@
-# Just copy the .h files into the distribution tar file
-EXTRA_DIST = aes.h array.h ars.h philox.h threefry.h \
-	features/compilerfeatures.h features/gccfeatures.h \
-	features/iccfeatures.h features/llvmfeatures.h features/msvcfeatures.h \
-	features/nvccfeatures.h features/open64features.h \
-	features/openclfeatures.h features/sse.h features/sunprofeatures.h \
-	features/xlcfeatures.h features/pgccfeatures.h \
-	features/clangfeatures.h features/crayfeatures.h \
-	features/fujitsufeatures.h
-
diff --git a/src/Random123/aes.h b/src/Random123/aes.h
deleted file mode 100644
index 96e3c9cd..00000000
--- a/src/Random123/aes.h
+++ /dev/null
@@ -1,344 +0,0 @@
-/*
-Copyright 2010-2011, D. E. Shaw Research.
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
-met:
-
-* Redistributions of source code must retain the above copyright
-  notice, this list of conditions, and the following disclaimer.
-
-* Redistributions in binary form must reproduce the above copyright
-  notice, this list of conditions, and the following disclaimer in the
-  documentation and/or other materials provided with the distribution.
-
-* Neither the name of D. E. Shaw Research nor the names of its
-  contributors may be used to endorse or promote products derived from
-  this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-#ifndef __Random123_aes_dot_hpp__
-#define __Random123_aes_dot_hpp__
-
-#include "features/compilerfeatures.h"
-#include "array.h"
-
-/* Implement a bona fide AES block cipher.  It's minimally
-// checked against the test vector in FIPS-197 in ut_aes.cpp. */
-#if R123_USE_AES_NI
-
-/** @ingroup AESNI */
-typedef struct r123array1xm128i aesni1xm128i_ctr_t;
-/** @ingroup AESNI */
-typedef struct r123array1xm128i aesni1xm128i_ukey_t;
-/** @ingroup AESNI */
-typedef struct r123array4x32 aesni4x32_ukey_t;
-/** @ingroup AESNI */
-enum r123_enum_aesni1xm128i { aesni1xm128i_rounds = 10 };
-
-/** \cond HIDDEN_FROM_DOXYGEN */
-R123_STATIC_INLINE __m128i AES_128_ASSIST (__m128i temp1, __m128i temp2) { 
-    __m128i temp3; 
-    temp2 = _mm_shuffle_epi32 (temp2 ,0xff); 
-    temp3 = _mm_slli_si128 (temp1, 0x4);
-    temp1 = _mm_xor_si128 (temp1, temp3);
-    temp3 = _mm_slli_si128 (temp3, 0x4);
-    temp1 = _mm_xor_si128 (temp1, temp3);
-    temp3 = _mm_slli_si128 (temp3, 0x4);
-    temp1 = _mm_xor_si128 (temp1, temp3);
-    temp1 = _mm_xor_si128 (temp1, temp2); 
-    return temp1; 
-}
-
-R123_STATIC_INLINE void aesni1xm128iexpand(aesni1xm128i_ukey_t uk, __m128i ret[11])
-{
-    __m128i rkey = uk.v[0].m;
-    __m128i tmp2;
-
-    ret[0] = rkey;
-    tmp2 = _mm_aeskeygenassist_si128(rkey, 0x1);
-    rkey = AES_128_ASSIST(rkey, tmp2);
-    ret[1] = rkey;
-
-    tmp2 = _mm_aeskeygenassist_si128(rkey, 0x2);
-    rkey = AES_128_ASSIST(rkey, tmp2);
-    ret[2] = rkey;
-
-    tmp2 = _mm_aeskeygenassist_si128(rkey, 0x4);
-    rkey = AES_128_ASSIST(rkey, tmp2);
-    ret[3] = rkey;
-
-    tmp2 = _mm_aeskeygenassist_si128(rkey, 0x8);
-    rkey = AES_128_ASSIST(rkey, tmp2);
-    ret[4] = rkey;
-
-    tmp2 = _mm_aeskeygenassist_si128(rkey, 0x10);
-    rkey = AES_128_ASSIST(rkey, tmp2);
-    ret[5] = rkey;
-
-    tmp2 = _mm_aeskeygenassist_si128(rkey, 0x20);
-    rkey = AES_128_ASSIST(rkey, tmp2);
-    ret[6] = rkey;
-
-    tmp2 = _mm_aeskeygenassist_si128(rkey, 0x40);
-    rkey = AES_128_ASSIST(rkey, tmp2);
-    ret[7] = rkey;
-
-    tmp2 = _mm_aeskeygenassist_si128(rkey, 0x80);
-    rkey = AES_128_ASSIST(rkey, tmp2);
-    ret[8] = rkey;
-
-    tmp2 = _mm_aeskeygenassist_si128(rkey, 0x1b);
-    rkey = AES_128_ASSIST(rkey, tmp2);
-    ret[9] = rkey;
-
-    tmp2 = _mm_aeskeygenassist_si128(rkey, 0x36);
-    rkey = AES_128_ASSIST(rkey, tmp2);
-    ret[10] = rkey;
-}
-/** \endcond */
-    
-#ifdef __cplusplus
-/** @ingroup AESNI */
-struct aesni1xm128i_key_t{ 
-    __m128i k[11]; 
-    aesni1xm128i_key_t(){
-        aesni1xm128i_ukey_t uk;
-        uk.v[0].m = _mm_setzero_si128();
-        aesni1xm128iexpand(uk, k);
-    }
-    aesni1xm128i_key_t(const aesni1xm128i_ukey_t& uk){
-        aesni1xm128iexpand(uk, k);
-    }
-    aesni1xm128i_key_t(const aesni4x32_ukey_t& uk){
-        aesni1xm128i_ukey_t uk128;
-        uk128.v[0].m = _mm_set_epi32(uk.v[3], uk.v[2], uk.v[1], uk.v[0]);
-        aesni1xm128iexpand(uk128, k);
-    }
-    aesni1xm128i_key_t& operator=(const aesni1xm128i_ukey_t& uk){
-        aesni1xm128iexpand(uk, k);
-        return *this;
-    }
-    aesni1xm128i_key_t& operator=(const aesni4x32_ukey_t& uk){
-        aesni1xm128i_ukey_t uk128;
-        uk128.v[0].m = _mm_set_epi32(uk.v[3], uk.v[2], uk.v[1], uk.v[0]);
-        aesni1xm128iexpand(uk128, k);
-        return *this;
-    }
-};
-#else
-typedef struct { 
-    __m128i k[11]; 
-}aesni1xm128i_key_t;
-
-/** @ingroup AESNI */
-R123_STATIC_INLINE aesni1xm128i_key_t aesni1xm128ikeyinit(aesni1xm128i_ukey_t uk){
-    aesni1xm128i_key_t ret;
-    aesni1xm128iexpand(uk, ret.k);
-    return ret;
-}
-#endif
-
-/** @ingroup AESNI */
-R123_STATIC_INLINE aesni1xm128i_ctr_t aesni1xm128i(aesni1xm128i_ctr_t in, aesni1xm128i_key_t k) {
-    __m128i x = _mm_xor_si128(k.k[0], in.v[0].m);
-    x = _mm_aesenc_si128(x, k.k[1]);
-    x = _mm_aesenc_si128(x, k.k[2]);
-    x = _mm_aesenc_si128(x, k.k[3]);
-    x = _mm_aesenc_si128(x, k.k[4]);
-    x = _mm_aesenc_si128(x, k.k[5]);
-    x = _mm_aesenc_si128(x, k.k[6]);
-    x = _mm_aesenc_si128(x, k.k[7]);
-    x = _mm_aesenc_si128(x, k.k[8]);
-    x = _mm_aesenc_si128(x, k.k[9]);
-    x = _mm_aesenclast_si128(x, k.k[10]);
-    {
-      aesni1xm128i_ctr_t ret;
-      ret.v[0].m = x;
-      return ret;
-    }
-}
-
-/** @ingroup AESNI */
-R123_STATIC_INLINE aesni1xm128i_ctr_t aesni1xm128i_R(unsigned R, aesni1xm128i_ctr_t in, aesni1xm128i_key_t k){
-    R123_ASSERT(R==10);
-    return aesni1xm128i(in, k);
-}
-
-
-/** @ingroup AESNI */
-typedef struct r123array4x32 aesni4x32_ctr_t;
-/** @ingroup AESNI */
-typedef aesni1xm128i_key_t aesni4x32_key_t;
-/** @ingroup AESNI */
-enum r123_enum_aesni4x32 { aesni4x32_rounds = 10 };
-/** @ingroup AESNI */
-R123_STATIC_INLINE aesni4x32_key_t aesni4x32keyinit(aesni4x32_ukey_t uk){
-    aesni1xm128i_ukey_t uk128;
-    aesni4x32_key_t ret;
-    uk128.v[0].m = _mm_set_epi32(uk.v[3], uk.v[2], uk.v[1], uk.v[0]);
-    aesni1xm128iexpand(uk128, ret.k);
-    return ret;
-}
-
-/** @ingroup AESNI */
-/** The aesni4x32_R function provides a C API to the @ref AESNI "AESNI" CBRNG, allowing the number of rounds to be specified explicitly **/
-R123_STATIC_INLINE aesni4x32_ctr_t aesni4x32_R(unsigned int Nrounds, aesni4x32_ctr_t c, aesni4x32_key_t k){
-    aesni1xm128i_ctr_t c128;
-    c128.v[0].m = _mm_set_epi32(c.v[3], c.v[2], c.v[1], c.v[0]);
-    c128 = aesni1xm128i_R(Nrounds, c128, k);
-    _mm_storeu_si128((__m128i*)&c.v[0], c128.v[0].m);
-    return c;
-}
-
-#define aesni4x32_rounds aesni1xm128i_rounds
-
-/** The aesni4x32 macro provides a C API to the @ref AESNI "AESNI" CBRNG, uses the default number of rounds i.e. \c aesni4x32_rounds **/
-/** @ingroup AESNI */
-#define aesni4x32(c,k) aesni4x32_R(aesni4x32_rounds, c, k)
-
-#ifdef __cplusplus
-namespace r123{
-/** 
-@defgroup AESNI ARS and AESNI Classes and Typedefs
-
-The ARS4x32, ARS1xm128i, AESNI4x32 and AESNI1xm128i classes export the member functions, typedefs and
-operator overloads required by a @ref CBRNG "CBRNG" class.
-
-ARS1xm128i and AESNI1xm128i are based on the AES block cipher and rely on the AES-NI hardware instructions
-available on some some new (2011) CPUs.
-
-The ARS1xm128i CBRNG and the use of AES for random number generation are described in 
-<a href="http://dl.acm.org/citation.cfm?doid=2063405"><i>Parallel Random Numbers:  As Easy as 1, 2, 3</i> </a>.
-Although it uses some cryptographic primitives, ARS1xm128i uses a cryptographically weak key schedule and is \b not suitable for cryptographic use.
-
-@class AESNI1xm128i
-@ingroup AESNI
-AESNI exports the member functions, typedefs and operator overloads required by a @ref CBRNG class.
-
-AESNI1xm128i uses the crypotgraphic AES round function, including the cryptographic key schedule.
-
-In contrast to the other CBRNGs in the Random123 library, the AESNI1xm128i_R::key_type is opaque
-and is \b not identical to the AESNI1xm128i_R::ukey_type.  Creating a key_type, using either the constructor
-or assignment operator, is significantly more time-consuming than running the bijection (hundreds
-of clock cycles vs. tens of clock cycles).
-
-AESNI1xm128i is only available when the feature-test macro R123_USE_AES_NI is true, which
-should occur only when the compiler is configured to generate AES-NI instructions (or
-when defaults are overridden by compile-time, compiler-command-line options).
-
-As of September 2011, the authors know of no statistical flaws with AESNI1xm128i.  It
-would be an event of major cryptographic note if any such flaws were ever found.
-*/
-struct AESNI1xm128i{
-    typedef aesni1xm128i_ctr_t ctr_type;
-    typedef aesni1xm128i_ukey_t ukey_type;
-    typedef aesni1xm128i_key_t key_type;
-    static const unsigned int rounds=10;
-    ctr_type operator()(ctr_type ctr, key_type key) const{
-        return aesni1xm128i(ctr, key);
-    }
-};
-
-/* @class AESNI4x32 */
-struct AESNI4x32{
-    typedef aesni4x32_ctr_t ctr_type;
-    typedef aesni4x32_ukey_t ukey_type;
-    typedef aesni4x32_key_t key_type;
-    static const unsigned int rounds=10;
-    ctr_type operator()(ctr_type ctr, key_type key) const{
-        return aesni4x32(ctr, key);
-    }
-};
-
-/** @ingroup AESNI
-    @class AESNI1xm128i_R
-
-AESNI1xm128i_R is provided for completeness, but is only instantiable with ROUNDS=10, in
-which case it is identical to AESNI1xm128i */
-template <unsigned ROUNDS=10> 
-struct AESNI1xm128i_R : public AESNI1xm128i{
-    R123_STATIC_ASSERT(ROUNDS==10, "AESNI1xm128i_R<R> is only valid with R=10");
-};
-
-/** @class AESNI4x32_R **/
-template <unsigned ROUNDS=10> 
-struct AESNI4x32_R : public AESNI4x32{
-    R123_STATIC_ASSERT(ROUNDS==10, "AESNI4x32_R<R> is only valid with R=10");
-};
-} // namespace r123
-#endif /* __cplusplus */
-
-#endif /* R123_USE_AES_NI */
-
-#if R123_USE_AES_OPENSSL
-#include <openssl/aes.h>
-typedef struct r123array16x8 aesopenssl16x8_ctr_t;
-typedef struct r123array16x8 aesopenssl16x8_ukey_t;
-#ifdef __cplusplus
-struct aesopenssl16x8_key_t{
-    AES_KEY k;
-    aesopenssl16x8_key_t(){
-        aesopenssl16x8_ukey_t ukey={{}};
-        AES_set_encrypt_key((const unsigned char *)&ukey.v[0], 128, &k);
-    }
-    aesopenssl16x8_key_t(const aesopenssl16x8_ukey_t& ukey){
-        AES_set_encrypt_key((const unsigned char *)&ukey.v[0], 128, &k);
-    }
-    aesopenssl16x8_key_t& operator=(const aesopenssl16x8_ukey_t& ukey){
-        AES_set_encrypt_key((const unsigned char *)&ukey.v[0], 128, &k);
-        return *this;
-    }
-};
-#else
-typedef struct aesopenssl16x8_key_t{
-    AES_KEY k;
-}aesopenssl16x8_key_t;
-R123_STATIC_INLINE struct aesopenssl16x8_key_t aesopenssl16x8keyinit(aesopenssl16x8_ukey_t uk){
-    aesopenssl16x8_key_t ret;
-    AES_set_encrypt_key((const unsigned char *)&uk.v[0], 128, &ret.k);
-    return ret;
-}
-#endif
-
-R123_STATIC_INLINE R123_FORCE_INLINE(aesopenssl16x8_ctr_t aesopenssl16x8_R(aesopenssl16x8_ctr_t ctr, aesopenssl16x8_key_t key));
-R123_STATIC_INLINE
-aesopenssl16x8_ctr_t aesopenssl16x8_R(aesopenssl16x8_ctr_t ctr, aesopenssl16x8_key_t key){
-    aesopenssl16x8_ctr_t ret;
-    AES_encrypt((const unsigned char*)&ctr.v[0], (unsigned char *)&ret.v[0], &key.k);
-    return ret;
-}
-
-#define aesopenssl16x8_rounds aesni4x32_rounds
-#define aesopenssl16x8(c,k) aesopenssl16x8_R(aesopenssl16x8_rounds)
-
-#ifdef __cplusplus
-namespace r123{
-struct AESOpenSSL16x8{
-    typedef aesopenssl16x8_ctr_t ctr_type;
-    typedef aesopenssl16x8_key_t key_type;
-    typedef aesopenssl16x8_ukey_t ukey_type;
-    static const unsigned int rounds=10;
-    ctr_type operator()(const ctr_type& in, const key_type& k){
-        ctr_type out;
-        AES_encrypt((const unsigned char *)&in[0], (unsigned char *)&out[0], &k.k);
-        return out;
-    }
-};
-} // namespace r123
-#endif /* __cplusplus */
-#endif /* R123_USE_AES_OPENSSL */
-
-#endif
diff --git a/src/Random123/array.h b/src/Random123/array.h
deleted file mode 100644
index ab85392d..00000000
--- a/src/Random123/array.h
+++ /dev/null
@@ -1,326 +0,0 @@
-/*
-Copyright 2010-2011, D. E. Shaw Research.
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
-met:
-
-* Redistributions of source code must retain the above copyright
-  notice, this list of conditions, and the following disclaimer.
-
-* Redistributions in binary form must reproduce the above copyright
-  notice, this list of conditions, and the following disclaimer in the
-  documentation and/or other materials provided with the distribution.
-
-* Neither the name of D. E. Shaw Research nor the names of its
-  contributors may be used to endorse or promote products derived from
-  this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-#ifndef _r123array_dot_h__
-#define _r123array_dot_h__
-#include "features/compilerfeatures.h"
-#include "features/sse.h"
-
-#ifndef __cplusplus
-#define CXXMETHODS(_N, W, T)
-#define CXXOVERLOADS(_N, W, T)
-#else
-
-#include <stddef.h>
-#include <algorithm>
-#include <stdexcept>
-#include <iterator>
-#include <limits>
-#include <iostream>
-
-/** @defgroup arrayNxW The r123arrayNxW classes 
-
-    Each of the r123arrayNxW is a fixed size array of N W-bit unsigned integers.
-    It is functionally equivalent to the C++0x std::array<N, uintW_t>,
-    but does not require C++0x features or libraries.
-
-    In addition to meeting most of the requirements of a Container,
-    it also has a member function, incr(), which increments the zero-th
-    element and carrys overflows into higher indexed elements.  Thus,
-    by using incr(), sequences of up to 2^(N*W) distinct values
-    can be produced. 
-
-    If SSE is supported by the compiler, then the class
-    r123array1xm128i is also defined, in which the data member is an
-    array of one r123128i object.
-
-    @cond HIDDEN_FROM_DOXYGEN
-*/
-
-template <typename value_type>
-inline R123_CUDA_DEVICE value_type assemble_from_u32(uint32_t *p32){
-    value_type v=0;
-    for(size_t i=0; i<(3+sizeof(value_type))/4; ++i)
-        v |= ((value_type)(*p32++)) << (32*i);
-    return v;
-}
-
-// Work-alike methods and typedefs modeled on std::array:
-#define CXXMETHODS(_N, W, T)                                            \
-    typedef T value_type;                                               \
-    typedef T* iterator;                                                \
-    typedef const T* const_iterator;                                    \
-    typedef value_type& reference;                                      \
-    typedef const value_type& const_reference;                          \
-    typedef size_t size_type;                                           \
-    typedef ptrdiff_t difference_type;                                  \
-    typedef T* pointer;                                                 \
-    typedef const T* const_pointer;                                     \
-    typedef std::reverse_iterator<iterator> reverse_iterator;           \
-    typedef std::reverse_iterator<const_iterator> const_reverse_iterator; \
-    /* Boost.array has static_size.  C++11 specializes tuple_size */    \
-    enum {static_size = _N};                                            \
-    R123_CUDA_DEVICE reference operator[](size_type i){return v[i];}                     \
-    R123_CUDA_DEVICE const_reference operator[](size_type i) const {return v[i];}        \
-    R123_CUDA_DEVICE reference at(size_type i){ if(i >=  _N) R123_THROW(std::out_of_range("array index out of range")); return (*this)[i]; } \
-    R123_CUDA_DEVICE const_reference at(size_type i) const { if(i >=  _N) R123_THROW(std::out_of_range("array index out of range")); return (*this)[i]; } \
-    R123_CUDA_DEVICE size_type size() const { return  _N; }                              \
-    R123_CUDA_DEVICE size_type max_size() const { return _N; }                           \
-    R123_CUDA_DEVICE bool empty() const { return _N==0; };                               \
-    R123_CUDA_DEVICE iterator begin() { return &v[0]; }                                  \
-    R123_CUDA_DEVICE iterator end() { return &v[_N]; }                                   \
-    R123_CUDA_DEVICE const_iterator begin() const { return &v[0]; }                      \
-    R123_CUDA_DEVICE const_iterator end() const { return &v[_N]; }                       \
-    R123_CUDA_DEVICE const_iterator cbegin() const { return &v[0]; }                     \
-    R123_CUDA_DEVICE const_iterator cend() const { return &v[_N]; }                      \
-    R123_CUDA_DEVICE reverse_iterator rbegin(){ return reverse_iterator(end()); }        \
-    R123_CUDA_DEVICE const_reverse_iterator rbegin() const{ return const_reverse_iterator(end()); } \
-    R123_CUDA_DEVICE reverse_iterator rend(){ return reverse_iterator(begin()); }        \
-    R123_CUDA_DEVICE const_reverse_iterator rend() const{ return const_reverse_iterator(begin()); } \
-    R123_CUDA_DEVICE const_reverse_iterator crbegin() const{ return const_reverse_iterator(cend()); } \
-    R123_CUDA_DEVICE const_reverse_iterator crend() const{ return const_reverse_iterator(cbegin()); } \
-    R123_CUDA_DEVICE pointer data(){ return &v[0]; }                                     \
-    R123_CUDA_DEVICE const_pointer data() const{ return &v[0]; }                         \
-    R123_CUDA_DEVICE reference front(){ return v[0]; }                                   \
-    R123_CUDA_DEVICE const_reference front() const{ return v[0]; }                       \
-    R123_CUDA_DEVICE reference back(){ return v[_N-1]; }                                 \
-    R123_CUDA_DEVICE const_reference back() const{ return v[_N-1]; }                     \
-    R123_CUDA_DEVICE bool operator==(const r123array##_N##x##W& rhs) const{ \
-	/* CUDA3 does not have std::equal */ \
-	for (size_t i = 0; i < _N; ++i) \
-	    if (v[i] != rhs.v[i]) return false; \
-	return true; \
-    } \
-    R123_CUDA_DEVICE bool operator!=(const r123array##_N##x##W& rhs) const{ return !(*this == rhs); } \
-    /* CUDA3 does not have std::fill_n */ \
-    R123_CUDA_DEVICE void fill(const value_type& val){ for (size_t i = 0; i < _N; ++i) v[i] = val; } \
-    R123_CUDA_DEVICE void swap(r123array##_N##x##W& rhs){ \
-	/* CUDA3 does not have std::swap_ranges */ \
-	for (size_t i = 0; i < _N; ++i) { \
-	    T tmp = v[i]; \
-	    v[i] = rhs.v[i]; \
-	    rhs.v[i] = tmp; \
-	} \
-    } \
-    R123_CUDA_DEVICE r123array##_N##x##W& incr(R123_ULONG_LONG n=1){                         \
-        /* This test is tricky because we're trying to avoid spurious   \
-           complaints about illegal shifts, yet still be compile-time   \
-           evaulated. */                                                \
-        if(sizeof(T)<sizeof(n) && n>>((sizeof(T)<sizeof(n))?8*sizeof(T):0) ) \
-            return incr_carefully(n);                                   \
-        if(n==1){                                                       \
-            ++v[0];                                                     \
-            if(_N==1 || R123_BUILTIN_EXPECT(!!v[0], 1)) return *this;   \
-        }else{                                                          \
-            v[0] += n;                                                  \
-            if(_N==1 || R123_BUILTIN_EXPECT(n<=v[0], 1)) return *this;  \
-        }                                                               \
-        /* We expect that the N==?? tests will be                       \
-           constant-folded/optimized away by the compiler, so only the  \
-           overflow tests (!!v[i]) remain to be done at runtime.  For  \
-           small values of N, it would be better to do this as an       \
-           uncondtional sequence of adc.  An experiment/optimization    \
-           for another day...                                           \
-           N.B.  The weird subscripting: v[_N>3?3:0] is to silence      \
-           a spurious error from icpc                                   \
-           */                                                           \
-        ++v[_N>1?1:0];                                                  \
-        if(_N==2 || R123_BUILTIN_EXPECT(!!v[_N>1?1:0], 1)) return *this; \
-        ++v[_N>2?2:0];                                                  \
-        if(_N==3 || R123_BUILTIN_EXPECT(!!v[_N>2?2:0], 1)) return *this;  \
-        ++v[_N>3?3:0];                                                  \
-        for(size_t i=4; i<_N; ++i){                                     \
-            if( R123_BUILTIN_EXPECT(!!v[i-1], 1) ) return *this;        \
-            ++v[i];                                                     \
-        }                                                               \
-        return *this;                                                   \
-    }                                                                   \
-    /* seed(SeedSeq) would be a constructor if having a constructor */  \
-    /* didn't cause headaches with defaults */                          \
-    template <typename SeedSeq>                                         \
-    R123_CUDA_DEVICE static r123array##_N##x##W seed(SeedSeq &ss){      \
-        r123array##_N##x##W ret;                                        \
-        const size_t Ngen = _N*((3+sizeof(value_type))/4);              \
-        uint32_t u32[Ngen];                                             \
-        uint32_t *p32 = &u32[0];                                        \
-        ss.generate(&u32[0], &u32[Ngen]);                               \
-        for(size_t i=0; i<_N; ++i){                                     \
-            ret.v[i] = assemble_from_u32<value_type>(p32);              \
-            p32 += (3+sizeof(value_type))/4;                            \
-        }                                                               \
-        return ret;                                                     \
-    }                                                                   \
-protected:                                                              \
-    R123_CUDA_DEVICE r123array##_N##x##W& incr_carefully(R123_ULONG_LONG n){ \
-        /* n may be greater than the maximum value of a single value_type */ \
-        value_type vtn;                                                 \
-        vtn = n;                                                        \
-        v[0] += n;                                                      \
-        const unsigned rshift = 8* ((sizeof(n)>sizeof(value_type))? sizeof(value_type) : 0); \
-        for(size_t i=1; i<_N; ++i){                                     \
-            if(rshift){                                                 \
-                n >>= rshift;                                           \
-            }else{                                                      \
-                n=0;                                                    \
-            }                                                           \
-            if( v[i-1] < vtn )                                          \
-                ++n;                                                    \
-            if( n==0 ) break;                                           \
-            vtn = n;                                                    \
-            v[i] += n;                                                  \
-        }                                                               \
-        return *this;                                                   \
-    }                                                                   \
-    
-                                                                        
-// There are several tricky considerations for the insertion and extraction
-// operators:
-// - we would like to be able to print r123array16x8 as a sequence of 16 integers,
-//   not as 16 bytes.
-// - we would like to be able to print r123array1xm128i.
-// - we do not want an int conversion operator in r123m128i because it causes
-//   lots of ambiguity problems with automatic promotions.
-// Solution: r123arrayinsertable and r123arrayextractable
-
-template<typename T>
-struct r123arrayinsertable{
-    const T& v;
-    r123arrayinsertable(const T& t_) : v(t_) {} 
-    friend std::ostream& operator<<(std::ostream& os, const r123arrayinsertable<T>& t){
-        return os << t.v;
-    }
-};
-
-template<>
-struct r123arrayinsertable<uint8_t>{
-    const uint8_t& v;
-    r123arrayinsertable(const uint8_t& t_) : v(t_) {} 
-    friend std::ostream& operator<<(std::ostream& os, const r123arrayinsertable<uint8_t>& t){
-        return os << (int)t.v;
-    }
-};
-
-template<typename T>
-struct r123arrayextractable{
-    T& v;
-    r123arrayextractable(T& t_) : v(t_) {}
-    friend std::istream& operator>>(std::istream& is, r123arrayextractable<T>& t){
-        return is >> t.v;
-    }
-};
-
-template<>
-struct r123arrayextractable<uint8_t>{
-    uint8_t& v;
-    r123arrayextractable(uint8_t& t_) : v(t_) {} 
-    friend std::istream& operator>>(std::istream& is, r123arrayextractable<uint8_t>& t){
-        int i;
-        is >>  i;
-        t.v = i;
-        return is;
-    }
-};
-
-#define CXXOVERLOADS(_N, W, T)                                          \
-                                                                        \
-inline std::ostream& operator<<(std::ostream& os, const r123array##_N##x##W& a){   \
-    os << r123arrayinsertable<T>(a.v[0]);                                  \
-    for(size_t i=1; i<_N; ++i)                                          \
-        os << " " << r123arrayinsertable<T>(a.v[i]);                       \
-    return os;                                                          \
-}                                                                       \
-                                                                        \
-inline std::istream& operator>>(std::istream& is, r123array##_N##x##W& a){         \
-    for(size_t i=0; i<_N; ++i){                                         \
-        r123arrayextractable<T> x(a.v[i]);                                 \
-        is >> x;                                                        \
-    }                                                                   \
-    return is;                                                          \
-}                                                                       \
-                                                                        \
-namespace r123{                                                        \
- typedef r123array##_N##x##W Array##_N##x##W;                          \
-}
-                                                                        
-#endif /* __cplusplus */
-
-/* _r123array_tpl expands to a declaration of struct r123arrayNxW.  
-
-   In C, it's nothing more than a struct containing an array of N
-   objects of type T.
-
-   In C++ it's the same, but endowed with an assortment of member
-   functions, typedefs and friends.  In C++, r123arrayNxW looks a lot
-   like std::array<T,N>, has most of the capabilities of a container,
-   and satisfies the requirements outlined in compat/Engine.hpp for
-   counter and key types.  ArrayNxW, in the r123 namespace is
-   a typedef equivalent to r123arrayNxW.
-*/
-
-#define _r123array_tpl(_N, W, T)                   \
-    /** @ingroup arrayNxW */                        \
-    /** @see arrayNxW */                            \
-struct r123array##_N##x##W{                         \
- T v[_N];                                       \
- CXXMETHODS(_N, W, T)                           \
-};                                              \
-                                                \
-CXXOVERLOADS(_N, W, T)
-
-/** @endcond */
-
-_r123array_tpl(1, 32, uint32_t)  /* r123array1x32 */
-_r123array_tpl(2, 32, uint32_t)  /* r123array2x32 */
-_r123array_tpl(4, 32, uint32_t)  /* r123array4x32 */
-_r123array_tpl(8, 32, uint32_t)  /* r123array8x32 */
-
-_r123array_tpl(1, 64, uint64_t)  /* r123array1x64 */
-_r123array_tpl(2, 64, uint64_t)  /* r123array2x64 */
-_r123array_tpl(4, 64, uint64_t)  /* r123array4x64 */
-
-_r123array_tpl(16, 8, uint8_t)  /* r123array16x8 for ARSsw, AESsw */
-
-#if R123_USE_SSE
-_r123array_tpl(1, m128i, r123m128i) /* r123array1x128i for ARSni, AESni */
-#endif
-
-/* In C++, it's natural to use sizeof(a::value_type), but in C it's
-   pretty convoluted to figure out the width of the value_type of an
-   r123arrayNxW:
-*/
-#define R123_W(a)   (8*sizeof(((a *)0)->v[0]))
-
-/** @namespace r123
-  Most of the Random123 C++ API is contained in the r123 namespace. 
-*/
-
-#endif
-
diff --git a/src/Random123/ars.h b/src/Random123/ars.h
deleted file mode 100644
index a027b6fe..00000000
--- a/src/Random123/ars.h
+++ /dev/null
@@ -1,204 +0,0 @@
-/*
-Copyright 2010-2011, D. E. Shaw Research.
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
-met:
-
-* Redistributions of source code must retain the above copyright
-  notice, this list of conditions, and the following disclaimer.
-
-* Redistributions in binary form must reproduce the above copyright
-  notice, this list of conditions, and the following disclaimer in the
-  documentation and/or other materials provided with the distribution.
-
-* Neither the name of D. E. Shaw Research nor the names of its
-  contributors may be used to endorse or promote products derived from
-  this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-#ifndef __Random123_ars_dot_hpp__
-#define __Random123_ars_dot_hpp__
-
-#include "features/compilerfeatures.h"
-#include "array.h"
-
-#if R123_USE_AES_NI
-
-#ifndef ARS1xm128i_DEFAULT_ROUNDS
-#define ARS1xm128i_DEFAULT_ROUNDS 7
-#endif
-
-/** @ingroup AESNI */
-enum r123_enum_ars1xm128i {ars1xm128i_rounds = ARS1xm128i_DEFAULT_ROUNDS};
-
-/* ARS1xm128i with Weyl keys.  Fast, and Crush-resistant, but NOT CRYPTO. */
-/** @ingroup AESNI */
-typedef struct r123array1xm128i ars1xm128i_ctr_t;
-/** @ingroup AESNI */
-typedef struct r123array1xm128i ars1xm128i_key_t;
-/** @ingroup AESNI */
-typedef struct r123array1xm128i ars1xm128i_ukey_t;
-/** @ingroup AESNI */
-R123_STATIC_INLINE ars1xm128i_key_t ars1xm128ikeyinit(ars1xm128i_ukey_t uk) { return uk; }
-/** @ingroup AESNI */
-R123_STATIC_INLINE ars1xm128i_ctr_t ars1xm128i_R(unsigned int Nrounds, ars1xm128i_ctr_t in, ars1xm128i_key_t k){
-    __m128i kweyl = _mm_set_epi64x(R123_64BIT(0xBB67AE8584CAA73B), /* sqrt(3) - 1.0 */
-                                   R123_64BIT(0x9E3779B97F4A7C15)); /* golden ratio */
-    /* N.B.  the aesenc instructions do the xor *after*
-    // so if we want to follow the AES pattern, we
-    // have to do the initial xor explicitly */
-    __m128i kk = k.v[0].m;
-    __m128i v = _mm_xor_si128(in.v[0].m, kk);
-    ars1xm128i_ctr_t ret;
-    R123_ASSERT(Nrounds<=10);
-    if( Nrounds>1 ){
-        kk = _mm_add_epi64(kk, kweyl);
-        v = _mm_aesenc_si128(v, kk);
-    }
-    if( Nrounds>2 ){
-        kk = _mm_add_epi64(kk, kweyl);
-        v = _mm_aesenc_si128(v, kk);
-    }
-    if( Nrounds>3 ){
-        kk = _mm_add_epi64(kk, kweyl);
-        v = _mm_aesenc_si128(v, kk);
-    }
-    if( Nrounds>4 ){
-        kk = _mm_add_epi64(kk, kweyl);
-        v = _mm_aesenc_si128(v, kk);
-    }
-    if( Nrounds>5 ){
-        kk = _mm_add_epi64(kk, kweyl);
-        v = _mm_aesenc_si128(v, kk);
-    }
-    if( Nrounds>6 ){
-        kk = _mm_add_epi64(kk, kweyl);
-        v = _mm_aesenc_si128(v, kk);
-    }
-    if( Nrounds>7 ){
-        kk = _mm_add_epi64(kk, kweyl);
-        v = _mm_aesenc_si128(v, kk);
-    }
-    if( Nrounds>8 ){
-        kk = _mm_add_epi64(kk, kweyl);
-        v = _mm_aesenc_si128(v, kk);
-    }
-    if( Nrounds>9 ){
-        kk = _mm_add_epi64(kk, kweyl);
-        v = _mm_aesenc_si128(v, kk);
-    }
-    kk = _mm_add_epi64(kk, kweyl);
-    v = _mm_aesenclast_si128(v, kk);
-    ret.v[0].m = v;
-    return ret;
-}
-
-/** @def ars1xm128i
-@ingroup AESNI
-The ars1mx128i macro provides a C API interface to the @ref AESNI "ARS" CBRNG with the default number of rounds i.e. \c ars1xm128i_rounds **/
-#define ars1xm128i(c,k) ars1xm128i_R(ars1xm128i_rounds, c, k)
-
-/** @ingroup AESNI */
-typedef struct r123array4x32 ars4x32_ctr_t;
-/** @ingroup AESNI */
-typedef struct r123array4x32 ars4x32_key_t;
-/** @ingroup AESNI */
-typedef struct r123array4x32 ars4x32_ukey_t;
-/** @ingroup AESNI */
-enum r123_enum_ars4x32 {ars4x32_rounds = ARS1xm128i_DEFAULT_ROUNDS};
-/** @ingroup AESNI */
-R123_STATIC_INLINE ars4x32_key_t ars4x32keyinit(ars4x32_ukey_t uk) { return uk; }
-/** @ingroup AESNI */
-R123_STATIC_INLINE ars4x32_ctr_t ars4x32_R(unsigned int Nrounds, ars4x32_ctr_t c, ars4x32_key_t k){
-    ars1xm128i_ctr_t c128;
-    ars1xm128i_key_t k128;
-    c128.v[0].m = _mm_set_epi32(c.v[3], c.v[2], c.v[1], c.v[0]);
-    k128.v[0].m = _mm_set_epi32(k.v[3], k.v[2], k.v[1], k.v[0]);
-    c128 = ars1xm128i_R(Nrounds, c128, k128);
-    _mm_storeu_si128((__m128i*)&c.v[0], c128.v[0].m);
-    return c;
-}
-
-/** @def ars4x32
-@ingroup AESNI
-The ars4x32 macro provides a C API interface to the @ref AESNI "ARS" CBRNG with the default number of rounds i.e. \c ars4x32_rounds **/
-#define ars4x32(c,k) ars4x32_R(ars4x32_rounds, c, k)
-
-#ifdef __cplusplus
-namespace r123{
-/** 
-@ingroup AESNI
-
-ARS1xm128i_R exports the member functions, typedefs and operator overloads required by a @ref CBRNG class.
-
-ARS1xm128i uses the crypotgraphic AES round function, but a @b non-cryptographc key schedule
-to save time and space.
-
-ARS1xm128i is only available when the feature-test macro R123_USE_AES_NI is true, which
-should occur only when the compiler is configured to generate AES-NI instructions (or
-when defaults are overridden by compile-time, compiler-command-line options).
-
-The template argument, ROUNDS, is the number of times the ARS round
-functions will be applied.
-
-As of September 2011, the authors know of no statistical flaws with
-ROUNDS=5 or more.
-
-@class ARS1xm128i_R
-
-*/
-template<unsigned int ROUNDS>
-struct ARS1xm128i_R{
-    typedef ars1xm128i_ctr_t ctr_type;
-    typedef ars1xm128i_key_t key_type;
-    typedef ars1xm128i_key_t ukey_type;
-    static const unsigned int rounds=ROUNDS;
-    R123_FORCE_INLINE(ctr_type operator()(ctr_type ctr, key_type key) const){
-        return ars1xm128i_R(ROUNDS, ctr, key);
-    }
-};
-
-/** @class ARS4x32_R
-    @ingroup AESNI
-*/
-
-template<unsigned int ROUNDS>
-struct ARS4x32_R{
-    typedef ars4x32_ctr_t ctr_type;
-    typedef ars4x32_key_t key_type;
-    typedef ars4x32_key_t ukey_type;
-    static const unsigned int rounds=ROUNDS;
-    R123_FORCE_INLINE(ctr_type operator()(ctr_type ctr, key_type key) const){
-        return ars4x32_R(ROUNDS, ctr, key);
-    }
-};
-/**
-@ingroup AESNI
-
-@class ARS1xm128i_R
-  ARS1xm128i is equivalent to ARS1xm128i_R<7>.    With 7 rounds,
-  the ARS1xm128i CBRNG  has a considerable safety margin over the minimum number
-  of rounds with no known statistical flaws, but still has excellent
-  performance. */
-typedef ARS1xm128i_R<ars1xm128i_rounds> ARS1xm128i;
-typedef ARS4x32_R<ars4x32_rounds> ARS4x32;
-} // namespace r123
-
-#endif /* __cplusplus */
-
-#endif /* R123_USE_AES_NI */
-
-#endif
diff --git a/src/Random123/features/clangfeatures.h b/src/Random123/features/clangfeatures.h
deleted file mode 100644
index c4f0fbc3..00000000
--- a/src/Random123/features/clangfeatures.h
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
-Copyright 2010-2011, D. E. Shaw Research.
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
-met:
-
-* Redistributions of source code must retain the above copyright
-  notice, this list of conditions, and the following disclaimer.
-
-* Redistributions in binary form must reproduce the above copyright
-  notice, this list of conditions, and the following disclaimer in the
-  documentation and/or other materials provided with the distribution.
-
-* Neither the name of D. E. Shaw Research nor the names of its
-  contributors may be used to endorse or promote products derived from
-  this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-#ifndef __clangfeatures_dot_hpp
-#define __clangfeatures_dot_hpp
-
-#ifndef R123_USE_X86INTRIN_H
-#if ((defined(__x86_64__)||defined(__i386__)))
-#define R123_USE_X86INTRIN_H 1
-#else
-#define R123_USE_X86INTRIN_H 0
-#endif
-#endif
-
-#ifndef R123_USE_CXX11_UNRESTRICTED_UNIONS
-#define R123_USE_CXX11_UNRESTRICTED_UNIONS __has_feature(cxx_unrestricted_unions)
-#endif
-
-#ifndef R123_USE_CXX11_STATIC_ASSERT
-#define R123_USE_CXX11_STATIC_ASSERT __has_feature(cxx_static_assert)
-#endif
-
-#ifndef R123_USE_CXX11_CONSTEXPR
-#define R123_USE_CXX11_CONSTEXPR __has_feature(cxx_constexpr)
-#endif
-
-#ifndef R123_USE_CXX11_EXPLICIT_CONVERSIONS
-#define R123_USE_CXX11_EXPLICIT_CONVERSIONS __has_feature(cxx_explicit_conversions)
-#endif
-
-// With clang-3.0, the apparently simpler:
-//  #define R123_USE_CXX11_RANDOM __has_include(<random>)
-// dumps core.
-#ifndef R123_USE_CXX11_RANDOM
-#if __cplusplus>=201103L && __has_include(<random>)
-#define R123_USE_CXX11_RANDOM 1
-#else
-#define R123_USE_CXX11_RANDOM 0
-#endif
-#endif
-
-#ifndef R123_USE_CXX11_TYPE_TRAITS
-#if __cplusplus>=201103L && __has_include(<type_traits>)
-#define R123_USE_CXX11_TYPE_TRAITS 1
-#else
-#define R123_USE_CXX11_TYPE_TRAITS 0
-#endif
-#endif
-
-#include "gccfeatures.h"
-
-#endif
diff --git a/src/Random123/features/compilerfeatures.h b/src/Random123/features/compilerfeatures.h
deleted file mode 100644
index 68c11fa7..00000000
--- a/src/Random123/features/compilerfeatures.h
+++ /dev/null
@@ -1,324 +0,0 @@
-/*
-Copyright 2010-2011, D. E. Shaw Research.
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
-met:
-
-* Redistributions of source code must retain the above copyright
-  notice, this list of conditions, and the following disclaimer.
-
-* Redistributions in binary form must reproduce the above copyright
-  notice, this list of conditions, and the following disclaimer in the
-  documentation and/or other materials provided with the distribution.
-
-* Neither the name of D. E. Shaw Research nor the names of its
-  contributors may be used to endorse or promote products derived from
-  this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-/**
-
-@page porting Preprocessor symbols for porting Random123 to different platforms.
-
-The Random123 library is portable across C, C++, CUDA, OpenCL environments,
-and multiple operating systems (Linux, Windows 7, Mac OS X, FreeBSD, Solaris).
-This level of portability requires the abstraction of some features
-and idioms that are either not standardized (e.g., asm statments), or for which 
-different vendors have their own standards (e.g., SSE intrinsics) or for
-which vendors simply refuse to conform to well-established standards (e.g., <inttypes.h>).
-
-Random123/features/compilerfeatures.h
-conditionally includes a compiler-or-OS-specific Random123/featires/XXXfeatures.h file which
-defines appropriate values for the preprocessor symbols which can be used with
-a specific compiler or OS.  Those symbols will then
-be used by other header files and source files in the Random123
-library (and may be used by applications) to control what actually
-gets presented to the compiler.
-
-Most of the symbols are boolean valued.  In general, they will
-\b always be defined with value either 1 or 0, so do
-\b NOT use \#ifdef.  Use \#if R123_USE_SOMETHING instead.
-
-Library users can override any value by defining the pp-symbol with a compiler option,
-e.g.,
-
-    cc -DR123_USE_MULHILO64_C99 
-
-will use a strictly c99 version of the full-width 64x64->128-bit multiplication
-function, even if it would be disabled by default.
-
-All boolean-valued pre-processor symbols in Random123/features/compilerfeatures.h start with the prefix R123_USE_
-@verbatim
-         AES_NI
-         AES_OPENSSL
-         SSE4_2
-         SSE4_1
-         SSE
-
-         STD_RANDOM
-
-         GNU_UINT128
-         ASM_GNU
-         ASM_MSASM
-
-         CPUID_MSVC
-
-         CXX11_RANDOM
-         CXX11_TYPE_TRAITS
-         CXX11_STATIC_ASSERT
-         CXX11_CONSTEXPR
-         CXX11_UNRESTRICTED_UNIONS
-         CXX11_EXPLICIT_CONVERSIONS
-         CXX11_LONG_LONG
-         CXX11 
-   
-         X86INTRIN_H
-         IA32INTRIN_H
-         XMMINTRIN_H
-         EMMINTRIN_H
-         SMMINTRIN_H
-         WMMINTRIN_H
-         INTRIN_H
-
-         MULHILO32_ASM
-         MULHILO64_ASM
-         MULHILO64_MSVC_INTRIN
-         MULHILO64_CUDA_INTRIN
-         MULHILO64_OPENCL_INTRIN
-         MULHILO64_C99
-
-         U01_DOUBLE
-	 
-@endverbatim
-Most have obvious meanings.  Some non-obvious ones:
-
-AES_NI and AES_OPENSSL are not mutually exclusive.  You can have one,
-both or neither.
-
-GNU_UINT128 says that it's safe to use __uint128_t, but it
-does not require its use.  In particular, it should be
-used in mulhilo<uint64_t> only if MULHILO64_ASM is unset.
-
-If the XXXINTRIN_H macros are true, then one should
-@code
-#include <xxxintrin.h>
-@endcode
-to gain accesss to compiler intrinsics.
-
-The CXX11_SOME_FEATURE macros allow the code to use specific
-features of the C++11 language and library.  The catchall
-In the absence of a specific CXX11_SOME_FEATURE, the feature
-is controlled by the catch-all R123_USE_CXX11 macro.
-
-U01_DOUBLE defaults on, and can be turned off (set to 0)
-if one does not want the utility functions that convert to double
-(i.e. u01_*_53()), e.g. on OpenCL without the cl_khr_fp64 extension.
-
-There are a number of invariants that are always true.  Application code may
-choose to rely on these:
-
-<ul>
-<li>ASM_GNU and ASM_MASM are mutually exclusive
-<li>The "higher" SSE values imply the lower ones.
-</ul>
-
-There are also non-boolean valued symbols:
-
-<ul>
-<li>R123_STATIC_INLINE -
-  According to both C99 and GNU99, the 'static inline' declaration allows
-  the compiler to not emit code if the function is not used.  
-  Note that the semantics of 'inline', 'static' and 'extern' in
-  gcc have changed over time and are subject to modification by
-  command line options, e.g., -std=gnu89, -fgnu-inline.
-  Nevertheless, it appears that the meaning of 'static inline' 
-  has not changed over time and (with a little luck) the use of 'static inline'
-  here will be portable between versions of gcc and to other C99
-  compilers.
-  See: http://gcc.gnu.org/onlinedocs/gcc/Inline.html
-       http://www.greenend.org.uk/rjk/2003/03/inline.html
-
-<li>R123_FORCE_INLINE(decl) -
-  which expands to 'decl', adorned with the compiler-specific
-  embellishments to strongly encourage that the declared function be
-  inlined.  If there is no such compiler-specific magic, it should
-  expand to decl, unadorned.
-   
-<li>R123_CUDA_DEVICE - which expands to __device__ (or something else with
-  sufficiently similar semantics) when CUDA is in use, and expands
-  to nothing in other cases.
-
-<li>R123_ASSERT(x) - which expands to assert(x), or maybe to nothing at
-  all if we're in an environment so feature-poor that you can't even
-  call assert (I'm looking at you, CUDA and OpenCL), or even include
-  assert.h safely (OpenCL).
-
-<li>R123_STATIC_ASSERT(expr,msg) - which expands to
-  static_assert(expr,msg), or to an expression that
-  will raise a compile-time exception if expr is not true.
-
-<li>R123_ULONG_LONG - which expands to a declaration of the longest available
-  unsigned integer.
-
-<li>R123_64BIT(x) - expands to something equivalent to
-  UINT64_C(x) from <stdint.h>, even in environments where <stdint.h>
-  is not available, e.g., MSVC and OpenCL.
-
-<li>R123_BUILTIN_EXPECT(expr,likely_value) - expands to something with
-  the semantics of gcc's __builtin_expect(expr,likely_value).  If
-  the environment has nothing like __builtin_expect, it should expand
-  to just expr.
-</ul>
-
-
-\cond HIDDEN_FROM_DOXYGEN
-*/
-
-/* 
-N.B.  When something is added to the list of features, it should be
-added to each of the *features.h files, AND to examples/ut_features.cpp.
-*/
-
-/* N.B.  most other compilers (icc, nvcc, open64, llvm) will also define __GNUC__, so order matters. */
-#if defined(__OPENCL_VERSION__) && __OPENCL_VERSION__ > 0
-#include "openclfeatures.h"
-#elif defined(__CUDACC__)
-#include "nvccfeatures.h"
-#elif defined(__ICC)
-#include "iccfeatures.h"
-#elif defined(__xlC__)
-#include "xlcfeatures.h"
-#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC)
-#include "sunprofeatures.h"
-#elif defined(__OPEN64__)
-#include "open64features.h"
-#elif defined(__clang__)
-#include "clangfeatures.h"
-#elif defined(_CRAYC)
-#include "crayfeatures.h"
-#elif defined(__GNUC__)
-#include "gccfeatures.h"
-#elif defined(__FCC_VERSION) || defined(__FUJITSU)
-#include "fujitsufeatures.h"
-#elif defined(__PGI)
-#include "pgccfeatures.h"
-#elif defined(_MSC_FULL_VER)
-#include "msvcfeatures.h"
-#else
-#error "Can't identify compiler.  You'll need to add a new xxfeatures.hpp"
-{ /* maybe an unbalanced brace will terminate the compilation */
-#endif
-
-#ifndef R123_USE_CXX11
-#define R123_USE_CXX11 (__cplusplus >= 201103L)
-#endif
-
-#ifndef R123_USE_CXX11_UNRESTRICTED_UNIONS
-#define R123_USE_CXX11_UNRESTRICTED_UNIONS R123_USE_CXX11
-#endif
-
-#ifndef R123_USE_CXX11_STATIC_ASSERT
-#define R123_USE_CXX11_STATIC_ASSERT R123_USE_CXX11
-#endif
-
-#ifndef R123_USE_CXX11_CONSTEXPR
-#define R123_USE_CXX11_CONSTEXPR R123_USE_CXX11
-#endif
-
-#ifndef R123_USE_CXX11_EXPLICIT_CONVERSIONS
-#define R123_USE_CXX11_EXPLICIT_CONVERSIONS R123_USE_CXX11
-#endif
-
-#ifndef R123_USE_CXX11_RANDOM
-#define R123_USE_CXX11_RANDOM R123_USE_CXX11
-#endif
-
-#ifndef R123_USE_CXX11_TYPE_TRAITS
-#define R123_USE_CXX11_TYPE_TRAITS R123_USE_CXX11
-#endif
-
-#ifndef R123_USE_CXX11_LONG_LONG
-#define R123_USE_CXX11_LONG_LONG R123_USE_CXX11
-#endif
-
-#ifndef R123_USE_MULHILO64_C99
-#define R123_USE_MULHILO64_C99 0
-#endif
-
-#ifndef R123_USE_MULHILO64_MULHI_INTRIN
-#define R123_USE_MULHILO64_MULHI_INTRIN 0
-#endif
-
-#ifndef R123_USE_MULHILO32_MULHI_INTRIN
-#define R123_USE_MULHILO32_MULHI_INTRIN 0
-#endif
-
-#ifndef R123_STATIC_ASSERT
-#if R123_USE_CXX11_STATIC_ASSERT
-#define R123_STATIC_ASSERT(expr, msg) static_assert(expr, msg)
-#else
-    /* if msg always_looked_like_this, we could paste it into the name.  Worth it? */
-#define R123_STATIC_ASSERT(expr, msg) typedef char static_assertion[(!!(expr))*2-1]
-#endif
-#endif
-
-#ifndef R123_CONSTEXPR
-#if R123_USE_CXX11_CONSTEXPR
-#define R123_CONSTEXPR constexpr
-#else
-#define R123_CONSTEXPR
-#endif
-#endif
-
-#ifndef R123_USE_PHILOX_64BIT
-#define R123_USE_PHILOX_64BIT (R123_USE_MULHILO64_ASM || R123_USE_MULHILO64_MSVC_INTRIN || R123_USE_MULHILO64_CUDA_INTRIN || R123_USE_GNU_UINT128 || R123_USE_MULHILO64_C99 || R123_USE_MULHILO64_OPENCL_INTRIN || R123_USE_MULHILO64_MULHI_INTRIN)
-#endif
-
-#ifndef R123_ULONG_LONG
-#if defined(__cplusplus) && !R123_USE_CXX11_LONG_LONG
-/* C++98 doesn't have long long.  It doesn't have uint64_t either, but
-   we will have typedef'ed uint64_t to something in the xxxfeatures.h.
-   With luck, it won't elicit complaints from -pedantic.  Cross your
-   fingers... */
-#define R123_ULONG_LONG uint64_t
-#else
-#define R123_ULONG_LONG unsigned long long
-#endif
-#endif
-
-/* UINT64_C should have been #defined by XXXfeatures.h, either by
-   #include <stdint.h> or through compiler-dependent hacks */
-#ifndef R123_64BIT
-#define R123_64BIT(x) UINT64_C(x)
-#endif
-
-#ifndef R123_THROW
-#define R123_THROW(x)    throw (x)
-#endif
-
-/*
- * Windows.h (and perhaps other "well-meaning" code define min and
- * max, so there's a high chance that our definition of min, max
- * methods or use of std::numeric_limits min and max will cause
- * complaints in any program that happened to include Windows.h or
- * suchlike first.  We use the null macro below in our own header
- * files definition or use of min, max to defensively preclude
- * this problem.  It may not be enough; one might need to #define
- * NOMINMAX before including Windows.h or compile with -DNOMINMAX.
- */
-#define R123_NO_MACRO_SUBST
-
-/** \endcond */
diff --git a/src/Random123/features/crayfeatures.h b/src/Random123/features/crayfeatures.h
deleted file mode 100644
index cf34a9ae..00000000
--- a/src/Random123/features/crayfeatures.h
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
-Copyright (c) 2014 EPFL-BBP, All rights reserved.
-
-THIS SOFTWARE IS PROVIDED BY THE BLUE BRAIN PROJECT "AS IS"
-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
-THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE BLUE BRAIN PROJECT
-BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
-BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
-WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
-OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
-IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-Note: Minimum/Initial version derived from openclfeatures.h to work
-with cray compiler.
-*/
-
-#ifndef __crayfeatures_dot_hpp
-#define __crayfeatures_dot_hpp
-
-#ifndef R123_STATIC_INLINE
-#define R123_STATIC_INLINE static __inline__
-#endif
-
-#ifndef R123_FORCE_INLINE
-#define R123_FORCE_INLINE(decl) decl
-#endif
-
-#ifndef R123_CUDA_DEVICE
-#define R123_CUDA_DEVICE
-#endif
-
-#ifndef R123_ASSERT
-#include <assert.h>
-#define R123_ASSERT(x) assert(x)
-#endif
-
-#ifndef R123_BUILTIN_EXPECT
-#define R123_BUILTIN_EXPECT(expr,likely) expr
-#endif
-
-#ifndef R123_USE_WMMINTRIN_H
-#define R123_USE_WMMINTRIN_H 0
-#endif
-
-#ifndef R123_USE_INTRIN_H
-#define R123_USE_INTRIN_H 0
-#endif
-
-#ifndef R123_USE_MULHILO32_ASM
-#define R123_USE_MULHILO32_ASM 0
-#endif
-
-#ifndef R123_USE_MULHILO64_ASM
-#define R123_USE_MULHILO64_ASM 0
-#endif
-
-#ifndef R123_USE_MULHILO64_MSVC_INTRIN
-#define R123_USE_MULHILO64_MSVC_INTRIN 0
-#endif
-
-#ifndef R123_USE_MULHILO64_CUDA_INTRIN
-#define R123_USE_MULHILO64_CUDA_INTRIN 0
-#endif
-
-#ifndef R123_USE_MULHILO64_OPENCL_INTRIN
-#define R123_USE_MULHILO64_OPENCL_INTRIN 0
-#endif
-
-#ifndef R123_USE_MULHILO64_MULHI_INTRIN
-#if (defined(__powerpc64__))
-#define R123_USE_MULHILO64_MULHI_INTRIN 1
-#else
-#define R123_USE_MULHILO64_MULHI_INTRIN 0
-#endif
-#endif
-
-#ifndef R123_MULHILO64_MULHI_INTRIN
-#define R123_MULHILO64_MULHI_INTRIN __mulhdu
-#endif
-
-#ifndef R123_USE_MULHILO32_MULHI_INTRIN
-#define R123_USE_MULHILO32_MULHI_INTRIN 0
-#endif
-
-#ifndef R123_MULHILO32_MULHI_INTRIN
-#define R123_MULHILO32_MULHI_INTRIN __mulhwu
-#endif
-
-#ifndef __STDC_CONSTANT_MACROS
-#define __STDC_CONSTANT_MACROS
-#endif
-#include <stdint.h>
-#ifndef UINT64_C
-#error UINT64_C not defined.  You must define __STDC_CONSTANT_MACROS before you #include <stdint.h>
-#endif
-
-#endif
diff --git a/src/Random123/features/fujitsufeatures.h b/src/Random123/features/fujitsufeatures.h
deleted file mode 100644
index 6c2637b0..00000000
--- a/src/Random123/features/fujitsufeatures.h
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
-Note: Minimum/Initial version derived from openclfeatures.h to work
-with cray compiler.
-*/
-
-#ifndef __crayfeatures_dot_hpp
-#define __crayfeatures_dot_hpp
-
-#ifndef R123_STATIC_INLINE
-#define R123_STATIC_INLINE static __inline
-#endif
-
-#ifndef R123_FORCE_INLINE
-#define R123_FORCE_INLINE(decl) decl
-#endif
-
-#ifndef R123_CUDA_DEVICE
-#define R123_CUDA_DEVICE
-#endif
-
-#ifndef R123_ASSERT
-#include <assert.h>
-#define R123_ASSERT(x) assert(x)
-#endif
-
-#ifndef R123_BUILTIN_EXPECT
-#define R123_BUILTIN_EXPECT(expr,likely) expr
-#endif
-
-#ifndef R123_USE_WMMINTRIN_H
-#define R123_USE_WMMINTRIN_H 0
-#endif
-
-#ifndef R123_USE_INTRIN_H
-#define R123_USE_INTRIN_H 0
-#endif
-
-#ifndef R123_USE_MULHILO32_ASM
-#define R123_USE_MULHILO32_ASM 0
-#endif
-
-#ifndef R123_USE_MULHILO64_ASM
-#define R123_USE_MULHILO64_ASM 0
-#endif
-
-#ifndef R123_USE_MULHILO64_MSVC_INTRIN
-#define R123_USE_MULHILO64_MSVC_INTRIN 0
-#endif
-
-#ifndef R123_USE_MULHILO64_CUDA_INTRIN
-#define R123_USE_MULHILO64_CUDA_INTRIN 0
-#endif
-
-#ifndef R123_USE_MULHILO64_OPENCL_INTRIN
-#define R123_USE_MULHILO64_OPENCL_INTRIN 0
-#endif
-
-#ifndef R123_USE_MULHILO64_MULHI_INTRIN
-#if (defined(__powerpc64__))
-#define R123_USE_MULHILO64_MULHI_INTRIN 1
-#else
-#define R123_USE_MULHILO64_MULHI_INTRIN 0
-#endif
-#endif
-
-#ifndef R123_MULHILO64_MULHI_INTRIN
-#define R123_MULHILO64_MULHI_INTRIN __mulhdu
-#endif
-
-#ifndef R123_USE_MULHILO32_MULHI_INTRIN
-#define R123_USE_MULHILO32_MULHI_INTRIN 0
-#endif
-
-#ifndef R123_MULHILO32_MULHI_INTRIN
-#define R123_MULHILO32_MULHI_INTRIN __mulhwu
-#endif
-
-#ifndef __STDC_CONSTANT_MACROS
-#define __STDC_CONSTANT_MACROS
-#endif
-#include <stdint.h>
-#ifndef UINT64_C
-#error UINT64_C not defined.  You must define __STDC_CONSTANT_MACROS before you #include <stdint.h>
-#endif
-
-#endif
diff --git a/src/Random123/features/gccfeatures.h b/src/Random123/features/gccfeatures.h
deleted file mode 100644
index e5c691f2..00000000
--- a/src/Random123/features/gccfeatures.h
+++ /dev/null
@@ -1,265 +0,0 @@
-/*
-Copyright 2010-2011, D. E. Shaw Research.
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
-met:
-
-* Redistributions of source code must retain the above copyright
-  notice, this list of conditions, and the following disclaimer.
-
-* Redistributions in binary form must reproduce the above copyright
-  notice, this list of conditions, and the following disclaimer in the
-  documentation and/or other materials provided with the distribution.
-
-* Neither the name of D. E. Shaw Research nor the names of its
-  contributors may be used to endorse or promote products derived from
-  this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-#ifndef __gccfeatures_dot_hpp
-#define __gccfeatures_dot_hpp
-
-#define R123_GNUC_VERSION (__GNUC__*10000 + __GNUC_MINOR__*100 + __GNUC_PATCHLEVEL__)
-
-#if !defined(__x86_64__) && !defined(__i386__) && !defined(__powerpc__) && !defined(__aarch64__)
-#  error "This code has only been tested on x86 and powerpc platforms."
-#include <including_a_nonexistent_file_will_stop_some_compilers_from_continuing_with_a_hopeless_task>
-{ /* maybe an unbalanced brace will terminate the compilation */
- /* Feel free to try the Random123 library on other architectures by changing
- the conditions that reach this error, but you should consider it a
- porting exercise and expect to encounter bugs and deficiencies.
- Please let the authors know of any successes (or failures). */
-#endif
-
-#ifdef __powerpc__
-#include <ppu_intrinsics.h>
-#endif
-
-#ifndef R123_STATIC_INLINE
-#define R123_STATIC_INLINE static __inline__
-#endif
-
-#ifndef R123_FORCE_INLINE
-#if R123_GNUC_VERSION >= 40000
-#define R123_FORCE_INLINE(decl) decl __attribute__((always_inline))
-#else
-#define R123_FORCE_INLINE(decl) decl
-#endif
-#endif
-
-#ifndef R123_CUDA_DEVICE
-#define R123_CUDA_DEVICE
-#endif
-
-#ifndef R123_ASSERT
-#include <assert.h>
-#define R123_ASSERT(x) assert(x)
-#endif
-
-#ifndef R123_BUILTIN_EXPECT
-#define R123_BUILTIN_EXPECT(expr,likely) __builtin_expect(expr,likely)
-#endif
-
-/* According to the C++0x standard, we should be able to test the numeric
-   value of __cplusplus == 199701L for C++98, __cplusplus == 201103L for C++0x
-   But gcc has had an open bug  http://gcc.gnu.org/bugzilla/show_bug.cgi?id=1773
-   since early 2001, which was finally fixed in 4.7 (early 2012).  For
-   earlier versions, the only way  to detect whether --std=c++0x was requested
-   on the command line is to look at the __GCC_EXPERIMENTAL_CXX0X__ pp-symbol.
-*/
-#if (__cplusplus>=201103L || (R123_GNUC_VERSION<40700 && defined(__GCC_EXPERIMENTAL_CXX0X__) ))
-#define GNU_CXX11 1
-#else
-#define GNU_CXX11 0
-#endif
-
-#ifndef R123_USE_CXX11_UNRESTRICTED_UNIONS
-#define R123_USE_CXX11_UNRESTRICTED_UNIONS ((R123_GNUC_VERSION >= 40600) && GNU_CXX11)
-#endif
-
-#ifndef R123_USE_CXX11_STATIC_ASSERT
-#define R123_USE_CXX11_STATIC_ASSERT ((R123_GNUC_VERSION >= 40300) && GNU_CXX11)
-#endif
-
-#ifndef R123_USE_CXX11_CONSTEXPR
-#define R123_USE_CXX11_CONSTEXPR ((R123_GNUC_VERSION >= 40600) && GNU_CXX11)
-#endif
-
-#ifndef R123_USE_CXX11_EXPLICIT_CONVERSIONS
-#define R123_USE_CXX11_EXPLICIT_CONVERSIONS ((R123_GNUC_VERSION >= 40500) && GNU_CXX11)
-#endif
-
-#ifndef R123_USE_CXX11_RANDOM
-#define R123_USE_CXX11_RANDOM ((R123_GNUC_VERSION>=40500) && GNU_CXX11)
-#endif
-
-#ifndef R123_USE_CXX11_TYPE_TRAITS
-#define R123_USE_CXX11_TYPE_TRAITS ((R123_GNUC_VERSION>=40400) && GNU_CXX11)
-#endif
-
-#ifndef R123_USE_AES_NI
-#ifdef __AES__
-#define R123_USE_AES_NI 1
-#else
-#define R123_USE_AES_NI 0
-#endif
-#endif
-
-#ifndef R123_USE_SSE4_2
-#ifdef __SSE4_2__
-#define R123_USE_SSE4_2 1
-#else
-#define R123_USE_SSE4_2 0
-#endif
-#endif
-
-#ifndef R123_USE_SSE4_1
-#ifdef __SSE4_1__
-#define R123_USE_SSE4_1 1
-#else
-#define R123_USE_SSE4_1 0
-#endif
-#endif
-
-#ifndef R123_USE_SSE
-/* There's no point in trying to compile SSE code in Random123
-   unless SSE2 is available. */
-#ifdef __SSE2__
-#define R123_USE_SSE 1
-#else
-#define R123_USE_SSE 0
-#endif
-#endif
-
-#ifndef R123_USE_AES_OPENSSL
-/* There isn't really a good way to tell at compile time whether
-   openssl is available.  Without a pre-compilation configure-like
-   tool, it's less error-prone to guess that it isn't available.  Add
-   -DR123_USE_AES_OPENSSL=1 and any necessary LDFLAGS or LDLIBS to
-   play with openssl */
-#define R123_USE_AES_OPENSSL 0
-#endif
-
-#ifndef R123_USE_GNU_UINT128
-#ifdef __x86_64__
-#define R123_USE_GNU_UINT128 1
-#else
-#define R123_USE_GNU_UINT128 0
-#endif
-#endif
-
-#ifndef R123_USE_ASM_GNU
-/* avoid "macro expansion producing 'defined' has undefined behavior */
-#if (defined(__x86_64__)||defined(__i386__))
-#define R123_USE_ASM_GNU 1
-#else
-#define R123_USE_ASM_GNU 0
-#endif
-#endif
-
-#ifndef R123_USE_CPUID_MSVC
-#define R123_USE_CPUID_MSVC 0
-#endif
-
-#ifndef R123_USE_X86INTRIN_H
-/* avoid "macro expansion producing 'defined' has undefined behavior */
-#if ((defined(__x86_64__)||defined(__i386__)) && R123_GNUC_VERSION >= 40402)
-#define R123_USE_X86INTRIN_H 1
-#else
-#define R123_USE_X86INTRIN_H 0
-#endif
-#endif
-
-#ifndef R123_USE_IA32INTRIN_H
-#define R123_USE_IA32INTRIN_H 0
-#endif
-
-#ifndef R123_USE_XMMINTRIN_H
-#define R123_USE_XMMINTRIN_H 0
-#endif
-
-#ifndef R123_USE_EMMINTRIN_H
-/* gcc -m64 on Solaris 10 defines __SSE2__ but doesn't have 
-   emmintrin.h in the include search path.  This is
-   so broken that I refuse to try to work around it.  If this
-   affects you, figure out where your emmintrin.h lives and
-   add an appropriate -I to your CPPFLAGS.  Or add -DR123_USE_SSE=0. */
-#define R123_USE_EMMINTRIN_H (R123_USE_SSE && (R123_GNUC_VERSION < 40402))
-#endif
-
-#ifndef R123_USE_SMMINTRIN_H
-#define R123_USE_SMMINTRIN_H ((R123_USE_SSE4_1 || R123_USE_SSE4_2) && (R123_GNUC_VERSION < 40402))
-#endif
-
-#ifndef R123_USE_WMMINTRIN_H
-#define R123_USE_WMMINTRIN_H 0
-#endif
-
-#ifndef R123_USE_INTRIN_H
-#define R123_USE_INTRIN_H 0
-#endif
-
-#ifndef R123_USE_MULHILO32_ASM
-#define R123_USE_MULHILO32_ASM 0
-#endif
-
-#ifndef R123_USE_MULHILO64_ASM
-#define R123_USE_MULHILO64_ASM 0
-#endif
-
-#ifndef R123_USE_MULHILO64_MSVC_INTRIN
-#define R123_USE_MULHILO64_MSVC_INTRIN 0
-#endif
-
-#ifndef R123_USE_MULHILO64_CUDA_INTRIN
-#define R123_USE_MULHILO64_CUDA_INTRIN 0
-#endif
-
-#ifndef R123_USE_MULHILO64_OPENCL_INTRIN
-#define R123_USE_MULHILO64_OPENCL_INTRIN 0
-#endif
-
-#ifndef R123_USE_MULHILO64_MULHI_INTRIN
-#if (defined(__powerpc64__))
-#define R123_USE_MULHILO64_MULHI_INTRIN 1
-#else
-#define R123_USE_MULHILO64_MULHI_INTRIN 0
-#endif
-#endif
-
-#ifndef R123_MULHILO64_MULHI_INTRIN
-#define R123_MULHILO64_MULHI_INTRIN __mulhdu
-#endif
-
-#ifndef R123_USE_MULHILO32_MULHI_INTRIN
-#define R123_USE_MULHILO32_MULHI_INTRIN 0
-#endif
-
-#ifndef R123_MULHILO32_MULHI_INTRIN
-#define R123_MULHILO32_MULHI_INTRIN __mulhwu
-#endif
-
-#ifndef __STDC_CONSTANT_MACROS
-#define __STDC_CONSTANT_MACROS
-#endif
-#include <stdint.h>
-#ifndef UINT64_C
-#error UINT64_C not defined.  You must define __STDC_CONSTANT_MACROS before you #include <stdint.h>
-#endif
-
-/* If you add something, it must go in all the other XXfeatures.hpp
-   and in ../ut_features.cpp */
-#endif
diff --git a/src/Random123/features/iccfeatures.h b/src/Random123/features/iccfeatures.h
deleted file mode 100644
index 435238bf..00000000
--- a/src/Random123/features/iccfeatures.h
+++ /dev/null
@@ -1,212 +0,0 @@
-/*
-Copyright 2010-2011, D. E. Shaw Research.
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
-met:
-
-* Redistributions of source code must retain the above copyright
-  notice, this list of conditions, and the following disclaimer.
-
-* Redistributions in binary form must reproduce the above copyright
-  notice, this list of conditions, and the following disclaimer in the
-  documentation and/or other materials provided with the distribution.
-
-* Neither the name of D. E. Shaw Research nor the names of its
-  contributors may be used to endorse or promote products derived from
-  this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-#ifndef __icpcfeatures_dot_hpp
-#define __icpcfeatures_dot_hpp
-
-// icc relies on gcc libraries and other toolchain components.
-#define R123_GNUC_VERSION (__GNUC__*10000 + __GNUC_MINOR__*100 + __GNUC_PATCHLEVEL__)
-
-#if !defined(__x86_64__) && !defined(__i386__)
-#  error "This code has only been tested on x86 platforms."
-{ // maybe an unbalanced brace will terminate the compilation
-// You are invited to try Easy123 on other architectures, by changing
-// the conditions that reach this error, but you should consider it a
-// porting exercise and expect to encounter bugs and deficiencies.
-// Please let the authors know of any successes (or failures).
-#endif
-
-#ifndef R123_STATIC_INLINE
-#define R123_STATIC_INLINE static inline
-#endif
-
-#ifndef R123_FORCE_INLINE
-#define R123_FORCE_INLINE(decl) decl __attribute__((always_inline))
-#endif
-
-#ifndef R123_CUDA_DEVICE
-#define R123_CUDA_DEVICE
-#endif
-
-#ifndef R123_ASSERT
-#include <assert.h>
-#define R123_ASSERT(x) assert(x)
-#endif
-
-#ifndef R123_BUILTIN_EXPECT
-#define R123_BUILTIN_EXPECT(expr,likely) __builtin_expect(expr,likely)
-#endif
-
-// The basic idiom is:
-// #ifndef R123_SOMETHING
-// #if some condition
-// #define R123_SOMETHING 1
-// #else
-// #define R123_SOMETHING 0
-// #endif
-// #endif
-// This idiom allows an external user to override any decision
-// in this file with a command-line -DR123_SOMETHING=1 or -DR123_SOMETHINE=0
-
-// An alternative idiom is:
-// #ifndef R123_SOMETHING
-// #define R123_SOMETHING (some boolean expression)
-// #endif
-// where the boolean expression might contain previously-defined R123_SOMETHING_ELSE
-// pp-symbols.
-
-#ifndef R123_USE_SSE4_2
-#ifdef __SSE4_2__
-#define R123_USE_SSE4_2 1
-#else
-#define R123_USE_SSE4_2 0
-#endif
-#endif
-
-#ifndef R123_USE_SSE4_1
-#ifdef __SSE4_1__
-#define R123_USE_SSE4_1 1
-#else
-#define R123_USE_SSE4_1 0
-#endif
-#endif
-
-#ifndef R123_USE_SSE
-#ifdef __SSE2__
-#define R123_USE_SSE 1
-#else
-#define R123_USE_SSE 0
-#endif
-#endif
-
-#ifndef R123_USE_AES_NI
-// Unlike gcc, icc (version 12) does not pre-define an __AES__
-// pp-symbol when -maes or -xHost is on the command line.  This feels
-// like a defect in icc (it defines __SSE4_2__ in analogous
-// circumstances), but until Intel fixes it, we're better off erring
-// on the side of caution and not generating instructions that are
-// going to raise SIGILL when executed.  To get the AES-NI
-// instructions with icc, the caller must puts something like
-// -DR123_USE_AES_NI=1 or -D__AES__ on the command line.  FWIW, the
-// AES-NI Whitepaper by Gueron says that icc has supported AES-NI from
-// 11.1 onwards.
-//
-#if ((__ICC>=1101) && defined(__AES__))
-#define R123_USE_AES_NI 1
-#else
-#define R123_USE_AES_NI 0
-#endif
-#endif
-
-#ifndef R123_USE_AES_OPENSSL
-/* There isn't really a good way to tell at compile time whether
-   openssl is available.  Without a pre-compilation configure-like
-   tool, it's less error-prone to guess that it isn't available.  Add
-   -DR123_USE_AES_OPENSSL=1 and any necessary LDFLAGS or LDLIBS to
-   play with openssl */
-#define R123_USE_AES_OPENSSL 0
-#endif
-
-#ifndef R123_USE_GNU_UINT128
-#define R123_USE_GNU_UINT128 0
-#endif
-
-#ifndef R123_USE_ASM_GNU
-#define R123_USE_ASM_GNU 1
-#endif
-
-#ifndef R123_USE_CPUID_MSVC
-#define R123_USE_CPUID_MSVC 0
-#endif
-
-#ifndef R123_USE_X86INTRIN_H
-#define R123_USE_X86INTRIN_H 0
-#endif
-
-#ifndef R123_USE_IA32INTRIN_H
-#define R123_USE_IA32INTRIN_H 1
-#endif
-
-#ifndef R123_USE_XMMINTRIN_H
-#define R123_USE_XMMINTRIN_H 0
-#endif
-
-#ifndef R123_USE_EMMINTRIN_H
-#define R123_USE_EMMINTRIN_H 1
-#endif
-
-#ifndef R123_USE_SMMINTRIN_H
-#define R123_USE_SMMINTRIN_H 1
-#endif
-
-#ifndef R123_USE_WMMINTRIN_H
-#define R123_USE_WMMINTRIN_H 1
-#endif
-
-#ifndef R123_USE_INTRIN_H
-#define R123_USE_INTRIN_H 0
-#endif
-
-#ifndef R123_USE_MULHILO16_ASM
-#define R123_USE_MULHILO16_ASM 0
-#endif
-
-#ifndef R123_USE_MULHILO32_ASM
-#define R123_USE_MULHILO32_ASM 0
-#endif
-
-#ifndef R123_USE_MULHILO64_ASM
-#define R123_USE_MULHILO64_ASM 1
-#endif
-
-#ifndef R123_USE_MULHILO64_MSVC_INTRIN
-#define R123_USE_MULHILO64_MSVC_INTRIN 0
-#endif
-
-#ifndef R123_USE_MULHILO64_CUDA_INTRIN
-#define R123_USE_MULHILO64_CUDA_INTRIN 0
-#endif
-
-#ifndef R123_USE_MULHILO64_OPENCL_INTRIN
-#define R123_USE_MULHILO64_OPENCL_INTRIN 0
-#endif
-
-#ifndef __STDC_CONSTANT_MACROS
-#define __STDC_CONSTANT_MACROS
-#endif
-#include <stdint.h>
-#ifndef UINT64_C
-#error UINT64_C not defined.  You must define __STDC_CONSTANT_MACROS before you #include <stdint.h>
-#endif
-
-// If you add something, it must go in all the other XXfeatures.hpp
-// and in ../ut_features.cpp
-#endif
diff --git a/src/Random123/features/llvmfeatures.h b/src/Random123/features/llvmfeatures.h
deleted file mode 100644
index 47c3c3ec..00000000
--- a/src/Random123/features/llvmfeatures.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
-Copyright 2010-2011, D. E. Shaw Research.
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
-met:
-
-* Redistributions of source code must retain the above copyright
-  notice, this list of conditions, and the following disclaimer.
-
-* Redistributions in binary form must reproduce the above copyright
-  notice, this list of conditions, and the following disclaimer in the
-  documentation and/or other materials provided with the distribution.
-
-* Neither the name of D. E. Shaw Research nor the names of its
-  contributors may be used to endorse or promote products derived from
-  this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-#ifndef __llvmfeatures_dot_hpp
-#define __llvmfeatures_dot_hpp
-
-/* The gcc features seem to work, but this is a placeholder in case they don't. */
-
-#include "gccfeatures.h"
-
-#endif
diff --git a/src/Random123/features/msvcfeatures.h b/src/Random123/features/msvcfeatures.h
deleted file mode 100644
index 9eb95209..00000000
--- a/src/Random123/features/msvcfeatures.h
+++ /dev/null
@@ -1,200 +0,0 @@
-/*
-Copyright 2010-2011, D. E. Shaw Research.
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
-met:
-
-* Redistributions of source code must retain the above copyright
-  notice, this list of conditions, and the following disclaimer.
-
-* Redistributions in binary form must reproduce the above copyright
-  notice, this list of conditions, and the following disclaimer in the
-  documentation and/or other materials provided with the distribution.
-
-* Neither the name of D. E. Shaw Research nor the names of its
-  contributors may be used to endorse or promote products derived from
-  this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-#ifndef __msvcfeatures_dot_hpp
-#define __msvcfeatures_dot_hpp
-
-//#if _MSVC_FULL_VER <= 15
-//#error "We've only tested MSVC_FULL_VER==15."
-//#endif
-
-#if !defined(_M_IX86) && !defined(_M_X64)
-#  error "This code has only been tested on x86 platforms."
-{ // maybe an unbalanced brace will terminate the compilation
-// You are invited to try Random123 on other architectures, by changing
-// the conditions that reach this error, but you should consider it a
-// porting exercise and expect to encounter bugs and deficiencies.
-// Please let the authors know of any successes (or failures).
-#endif
-
-#ifndef R123_STATIC_INLINE
-#define R123_STATIC_INLINE static __inline
-#endif
-
-#ifndef R123_FORCE_INLINE
-#define R123_FORCE_INLINE(decl) _forceinline decl
-#endif
-
-#ifndef R123_CUDA_DEVICE
-#define R123_CUDA_DEVICE
-#endif
-
-#ifndef R123_ASSERT
-#include <assert.h>
-#define R123_ASSERT(x) assert(x)
-#endif
-
-#ifndef R123_BUILTIN_EXPECT
-#define R123_BUILTIN_EXPECT(expr,likely) expr
-#endif
-
-// The basic idiom is:
-// #ifndef R123_SOMETHING
-// #if some condition
-// #define R123_SOMETHING 1
-// #else
-// #define R123_SOMETHING 0
-// #endif
-// #endif
-// This idiom allows an external user to override any decision
-// in this file with a command-line -DR123_SOMETHING=1 or -DR123_SOMETHINE=0
-
-// An alternative idiom is:
-// #ifndef R123_SOMETHING
-// #define R123_SOMETHING (some boolean expression)
-// #endif
-// where the boolean expression might contain previously-defined R123_SOMETHING_ELSE
-// pp-symbols.
-
-#ifndef R123_USE_AES_NI
-#if defined(_M_X64)
-#define R123_USE_AES_NI 1
-#else
-#define R123_USE_AES_NI 0
-#endif
-#endif
-
-#ifndef R123_USE_SSE4_2
-#if defined(_M_X64)
-#define R123_USE_SSE4_2 1
-#else
-#define R123_USE_SSE4_2 0
-#endif
-#endif
-
-#ifndef R123_USE_SSE4_1
-#if defined(_M_X64)
-#define R123_USE_SSE4_1 1
-#else
-#define R123_USE_SSE4_1 0
-#endif
-#endif
-
-#ifndef R123_USE_SSE
-#define R123_USE_SSE 1
-#endif
-
-#ifndef R123_USE_AES_OPENSSL
-#define R123_USE_AES_OPENSSL 0
-#endif
-
-#ifndef R123_USE_GNU_UINT128
-#define R123_USE_GNU_UINT128 0
-#endif
-
-#ifndef R123_USE_ASM_GNU
-#define R123_USE_ASM_GNU 0
-#endif
-
-#ifndef R123_USE_CPUID_MSVC
-#define R123_USE_CPUID_MSVC 1
-#endif
-
-#ifndef R123_USE_X86INTRIN_H
-#define R123_USE_X86INTRIN_H 0
-#endif
-
-#ifndef R123_USE_IA32INTRIN_H
-#define R123_USE_IA32INTRIN_H 0
-#endif
-
-#ifndef R123_USE_XMMINTRIN_H
-#define R123_USE_XMMINTRIN_H 0
-#endif
-
-#ifndef R123_USE_EMMINTRIN_H
-#define R123_USE_EMMINTRIN_H 1
-#endif
-
-#ifndef R123_USE_SMMINTRIN_H
-#define R123_USE_SMMINTRIN_H 1
-#endif
-
-#ifndef R123_USE_WMMINTRIN_H
-#define R123_USE_WMMINTRIN_H 1
-#endif
-
-#ifndef R123_USE_INTRIN_H
-#define R123_USE_INTRIN_H 1
-#endif
-
-#ifndef R123_USE_MULHILO16_ASM
-#define R123_USE_MULHILO16_ASM 0
-#endif
-
-#ifndef R123_USE_MULHILO32_ASM
-#define R123_USE_MULHILO32_ASM 0
-#endif
-
-#ifndef R123_USE_MULHILO64_ASM
-#define R123_USE_MULHILO64_ASM 0
-#endif
-
-#ifndef R123_USE_MULHILO64_MSVC_INTRIN
-#if defined(_M_X64)
-#define R123_USE_MULHILO64_MSVC_INTRIN 1
-#else
-#define R123_USE_MULHILO64_MSVC_INTRIN 0
-#endif
-#endif
-
-#ifndef R123_USE_MULHILO64_CUDA_INTRIN
-#define R123_USE_MULHILO64_CUDA_INTRIN 0
-#endif
-
-#ifndef R123_USE_MULHILO64_OPENCL_INTRIN
-#define R123_USE_MULHILO64_OPENCL_INTRIN 0
-#endif
-
-#ifndef __STDC_CONSTANT_MACROS
-#define __STDC_CONSTANT_MACROS
-#endif
-#include <stdint.h>
-#ifndef UINT64_C
-#error UINT64_C not defined.  You must define __STDC_CONSTANT_MACROS before you #include <stdint.h>
-#endif
-
-#pragma warning(disable:4244)
-#pragma warning(disable:4996)
-
-// If you add something, it must go in all the other XXfeatures.hpp
-// and in ../ut_features.cpp
-#endif
diff --git a/src/Random123/features/nvccfeatures.h b/src/Random123/features/nvccfeatures.h
deleted file mode 100644
index 711babf8..00000000
--- a/src/Random123/features/nvccfeatures.h
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
-Copyright 2010-2011, D. E. Shaw Research.
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
-met:
-
-* Redistributions of source code must retain the above copyright
-  notice, this list of conditions, and the following disclaimer.
-
-* Redistributions in binary form must reproduce the above copyright
-  notice, this list of conditions, and the following disclaimer in the
-  documentation and/or other materials provided with the distribution.
-
-* Neither the name of D. E. Shaw Research nor the names of its
-  contributors may be used to endorse or promote products derived from
-  this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-#ifndef __r123_nvcc_features_dot_h__
-#define __r123_nvcc_features_dot_h__
-
-#if !defined(CUDART_VERSION)
-#error "why are we in nvccfeatures.h if CUDART_VERSION is not defined"
-#endif
-
-#if CUDART_VERSION < 4010
-#error "CUDA versions earlier than 4.1 produce incorrect results for some templated functions in namespaces.  Random123 isunsupported.  See comments in nvccfeatures.h"
-// This test was added in Random123-1.08 (August, 2013) because we
-// discovered that Ftype(maxTvalue<T>()) with Ftype=double and
-// T=uint64_t in examples/uniform.hpp produces -1 for CUDA4.0 and
-// earlier.  We can't be sure this bug doesn't also affect invocations
-// of other templated functions, e.g., essentially all of Random123.
-// Thus, we no longer trust CUDA versions earlier than 4.1 even though
-// we had previously tested and timed Random123 with CUDA 3.x and 4.0.
-// If you feel lucky or desperate, you can change #error to #warning, but
-// please take extra care to be sure that you are getting correct
-// results.
-#endif
-
-// nvcc falls through to gcc or msvc.  So first define
-// a couple of things and then include either gccfeatures.h
-// or msvcfeatures.h
-
-#ifndef R123_CUDA_DEVICE
-#define R123_CUDA_DEVICE __device__
-#endif
-
-#ifndef R123_USE_MULHILO64_CUDA_INTRIN
-#define R123_USE_MULHILO64_CUDA_INTRIN 1
-#endif
-
-#ifndef R123_ASSERT
-#define R123_ASSERT(x) if((x)) ; else asm("trap;")
-#endif
-
-#ifndef R123_BUILTIN_EXPECT
-#define R123_BUILTIN_EXPECT(expr,likely) expr
-#endif
-
-#ifndef R123_USE_AES_NI
-#define R123_USE_AES_NI 0
-#endif
-
-#ifndef R123_USE_SSE4_2
-#define R123_USE_SSE4_2 0
-#endif
-
-#ifndef R123_USE_SSE4_1
-#define R123_USE_SSE4_1 0
-#endif
-
-#ifndef R123_USE_SSE
-#define R123_USE_SSE 0
-#endif
-
-#ifndef R123_USE_GNU_UINT128
-#define R123_USE_GNU_UINT128 0
-#endif
-
-#ifndef R123_ULONG_LONG
-// uint64_t, which is what we'd get without this, is
-// not the same as unsigned long long
-#define R123_ULONG_LONG unsigned long long
-#endif
-
-#ifndef R123_THROW
-// No exceptions in CUDA, at least upto 4.0
-#define R123_THROW(x)    R123_ASSERT(0)
-#endif
-
-#if defined(__GNUC__)
-#include "gccfeatures.h"
-#elif defined(_MSC_FULL_VER)
-#include "msvcfeatures.h"
-#endif
-
-#endif
diff --git a/src/Random123/features/open64features.h b/src/Random123/features/open64features.h
deleted file mode 100644
index 8da9f5f5..00000000
--- a/src/Random123/features/open64features.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
-Copyright 2010-2011, D. E. Shaw Research.
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
-met:
-
-* Redistributions of source code must retain the above copyright
-  notice, this list of conditions, and the following disclaimer.
-
-* Redistributions in binary form must reproduce the above copyright
-  notice, this list of conditions, and the following disclaimer in the
-  documentation and/or other materials provided with the distribution.
-
-* Neither the name of D. E. Shaw Research nor the names of its
-  contributors may be used to endorse or promote products derived from
-  this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-#ifndef __open64features_dot_hpp
-#define __open64features_dot_hpp
-
-/* The gcc features are mostly right.  We just override a few and then include gccfeatures.h */
-
-/* Open64 4.2.3 and 4.2.4 accept the __uint128_t code without complaint
-   but produce incorrect code for 64-bit philox.  The MULHILO64_ASM
-   seems to work fine */
-#ifndef R123_USE_GNU_UINT128
-#define R123_USE_GNU_UINT128 0
-#endif
-
-#ifndef R123_USE_MULHILO64_ASM
-#define R123_USE_MULHILO64_ASM 1
-#endif
-
-#include "gccfeatures.h"
-
-#endif
diff --git a/src/Random123/features/openclfeatures.h b/src/Random123/features/openclfeatures.h
deleted file mode 100644
index af03d309..00000000
--- a/src/Random123/features/openclfeatures.h
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
-Copyright 2010-2011, D. E. Shaw Research.
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
-met:
-
-* Redistributions of source code must retain the above copyright
-  notice, this list of conditions, and the following disclaimer.
-
-* Redistributions in binary form must reproduce the above copyright
-  notice, this list of conditions, and the following disclaimer in the
-  documentation and/or other materials provided with the distribution.
-
-* Neither the name of D. E. Shaw Research nor the names of its
-  contributors may be used to endorse or promote products derived from
-  this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-#ifndef __openclfeatures_dot_hpp
-#define __openclfeatures_dot_hpp
-
-#ifndef R123_STATIC_INLINE
-#define R123_STATIC_INLINE inline
-#endif
-
-#ifndef R123_FORCE_INLINE
-#define R123_FORCE_INLINE(decl) decl __attribute__((always_inline))
-#endif
-
-#ifndef R123_CUDA_DEVICE
-#define R123_CUDA_DEVICE
-#endif
-
-#ifndef R123_ASSERT
-#define R123_ASSERT(x)
-#endif
-
-#ifndef R123_BUILTIN_EXPECT
-#define R123_BUILTIN_EXPECT(expr,likely) expr
-#endif
-
-#ifndef R123_USE_GNU_UINT128
-#define R123_USE_GNU_UINT128 0
-#endif
-
-#ifndef R123_USE_MULHILO64_ASM
-#define R123_USE_MULHILO64_ASM 0
-#endif
-
-#ifndef R123_USE_MULHILO64_MSVC_INTRIN
-#define R123_USE_MULHILO64_MSVC_INTRIN 0
-#endif
-
-#ifndef R123_USE_MULHILO64_CUDA_INTRIN
-#define R123_USE_MULHILO64_CUDA_INTRIN 0
-#endif
-
-#ifndef R123_USE_MULHILO64_OPENCL_INTRIN
-#define R123_USE_MULHILO64_OPENCL_INTRIN 1
-#endif
-
-#ifndef R123_USE_AES_NI
-#define R123_USE_AES_NI 0
-#endif
-
-// XXX ATI APP SDK 2.4 clBuildProgram SEGVs if one uses uint64_t instead of
-// ulong to mul_hi.  And gets lots of complaints from stdint.h
-// on some machines.
-// But these typedefs mean we cannot include stdint.h with
-// these headers?  Do we need R123_64T, R123_32T, R123_8T?
-typedef ulong uint64_t;
-typedef uint  uint32_t;
-typedef uchar uint8_t;
-#define UINT64_C(x) ((ulong)(x##UL))
-
-#endif
diff --git a/src/Random123/features/pgccfeatures.h b/src/Random123/features/pgccfeatures.h
deleted file mode 100644
index 18ace135..00000000
--- a/src/Random123/features/pgccfeatures.h
+++ /dev/null
@@ -1,194 +0,0 @@
-/*
-Copyright 2010-2011, D. E. Shaw Research.
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
-met:
-
-* Redistributions of source code must retain the above copyright
-  notice, this list of conditions, and the following disclaimer.
-
-* Redistributions in binary form must reproduce the above copyright
-  notice, this list of conditions, and the following disclaimer in the
-  documentation and/or other materials provided with the distribution.
-
-* Neither the name of D. E. Shaw Research nor the names of its
-  contributors may be used to endorse or promote products derived from
-  this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-Copyright (c) 2013, Los Alamos National Security, LLC
-All rights reserved.
-
-Copyright 2013. Los Alamos National Security, LLC. This software was produced
-under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National
-Laboratory (LANL), which is operated by Los Alamos National Security, LLC for
-the U.S. Department of Energy. The U.S. Government has rights to use,
-reproduce, and distribute this software.  NEITHER THE GOVERNMENT NOR LOS
-ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR
-ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE.  If software is modified
-to produce derivative works, such modified software should be clearly marked,
-so as not to confuse it with the version available from LANL.
-*/
-#ifndef __pgccfeatures_dot_hpp
-#define __pgccfeatures_dot_hpp
-
-#if !defined(__x86_64__) && !defined(__i386__)
-#  error "This code has only been tested on x86 platforms."
-#include <including_a_nonexistent_file_will_stop_some_compilers_from_continuing_with_a_hopeless_task>
-{ /* maybe an unbalanced brace will terminate the compilation */
- /* Feel free to try the Random123 library on other architectures by changing
- the conditions that reach this error, but you should consider it a
- porting exercise and expect to encounter bugs and deficiencies.
- Please let the authors know of any successes (or failures). */
-#endif
-
-#ifndef R123_STATIC_INLINE
-#define R123_STATIC_INLINE static inline
-#endif
-
-/* Found this example in PGI's emmintrin.h. */
-#ifndef R123_FORCE_INLINE
-#define R123_FORCE_INLINE(decl) decl __attribute__((__always_inline__))
-#endif
-
-#ifndef R123_CUDA_DEVICE
-#define R123_CUDA_DEVICE
-#endif
-
-#ifndef R123_ASSERT
-#include <assert.h>
-#define R123_ASSERT(x) assert(x)
-#endif
-
-#ifndef R123_BUILTIN_EXPECT
-#define R123_BUILTIN_EXPECT(expr,likely) (expr)
-#endif
-
-/* PGI through 13.2 doesn't appear to support AES-NI. */
-#ifndef R123_USE_AES_NI
-#define R123_USE_AES_NI 0
-#endif
-
-/* PGI through 13.2 appears to support MMX, SSE, SSE3, SSE3, SSSE3, SSE4a, and
-   ABM, but not SSE4.1 or SSE4.2. */
-#ifndef R123_USE_SSE4_2
-#define R123_USE_SSE4_2 0
-#endif
-
-#ifndef R123_USE_SSE4_1
-#define R123_USE_SSE4_1 0
-#endif
-
-#ifndef R123_USE_SSE
-/* There's no point in trying to compile SSE code in Random123
-   unless SSE2 is available. */
-#ifdef __SSE2__
-#define R123_USE_SSE 1
-#else
-#define R123_USE_SSE 0
-#endif
-#endif
-
-#ifndef R123_USE_AES_OPENSSL
-/* There isn't really a good way to tell at compile time whether
-   openssl is available.  Without a pre-compilation configure-like
-   tool, it's less error-prone to guess that it isn't available.  Add
-   -DR123_USE_AES_OPENSSL=1 and any necessary LDFLAGS or LDLIBS to
-   play with openssl */
-#define R123_USE_AES_OPENSSL 0
-#endif
-
-#ifndef R123_USE_GNU_UINT128
-#define R123_USE_GNU_UINT128 0
-#endif
-
-#ifndef R123_USE_ASM_GNU
-#define R123_USE_ASM_GNU 1
-#endif
-
-#ifndef R123_USE_CPUID_MSVC
-#define R123_USE_CPUID_MSVC 0
-#endif
-
-#ifndef R123_USE_X86INTRIN_H
-#define R123_USE_X86INTRIN_H 0
-#endif
-
-#ifndef R123_USE_IA32INTRIN_H
-#define R123_USE_IA32INTRIN_H 0
-#endif
-
-/* emmintrin.h from PGI #includes xmmintrin.h but then complains at link time
-   about undefined references to _mm_castsi128_ps(__m128i).  Why? */
-#ifndef R123_USE_XMMINTRIN_H
-#define R123_USE_XMMINTRIN_H 1
-#endif
-
-#ifndef R123_USE_EMMINTRIN_H
-#define R123_USE_EMMINTRIN_H 1
-#endif
-
-#ifndef R123_USE_SMMINTRIN_H
-#define R123_USE_SMMINTRIN_H 0
-#endif
-
-#ifndef R123_USE_WMMINTRIN_H
-#define R123_USE_WMMINTRIN_H 0
-#endif
-
-#ifndef R123_USE_INTRIN_H
-#ifdef __ABM__
-#define R123_USE_INTRIN_H 1
-#else
-#define R123_USE_INTRIN_H 0
-#endif
-#endif
-
-#ifndef R123_USE_MULHILO32_ASM
-#define R123_USE_MULHILO32_ASM 0
-#endif
-
-#ifndef R123_USE_MULHILO64_MULHI_INTRIN
-#define R123_USE_MULHILO64_MULHI_INTRIN 0
-#endif
-
-#ifndef R123_USE_MULHILO64_ASM
-#define R123_USE_MULHILO64_ASM 1
-#endif
-
-#ifndef R123_USE_MULHILO64_MSVC_INTRIN
-#define R123_USE_MULHILO64_MSVC_INTRIN 0
-#endif
-
-#ifndef R123_USE_MULHILO64_CUDA_INTRIN
-#define R123_USE_MULHILO64_CUDA_INTRIN 0
-#endif
-
-#ifndef R123_USE_MULHILO64_OPENCL_INTRIN
-#define R123_USE_MULHILO64_OPENCL_INTRIN 0
-#endif
-
-#ifndef __STDC_CONSTANT_MACROS
-#define __STDC_CONSTANT_MACROS
-#endif
-#include <stdint.h>
-#ifndef UINT64_C
-#error UINT64_C not defined.  You must define __STDC_CONSTANT_MACROS before you #include <stdint.h>
-#endif
-
-/* If you add something, it must go in all the other XXfeatures.hpp
-   and in ../ut_features.cpp */
-#endif
diff --git a/src/Random123/features/sse.h b/src/Random123/features/sse.h
deleted file mode 100644
index 88efd65f..00000000
--- a/src/Random123/features/sse.h
+++ /dev/null
@@ -1,280 +0,0 @@
-/*
-Copyright 2010-2011, D. E. Shaw Research.
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
-met:
-
-* Redistributions of source code must retain the above copyright
-  notice, this list of conditions, and the following disclaimer.
-
-* Redistributions in binary form must reproduce the above copyright
-  notice, this list of conditions, and the following disclaimer in the
-  documentation and/or other materials provided with the distribution.
-
-* Neither the name of D. E. Shaw Research nor the names of its
-  contributors may be used to endorse or promote products derived from
-  this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-#ifndef _Random123_sse_dot_h__
-#define _Random123_sse_dot_h__
-
-#if R123_USE_SSE
-
-#if R123_USE_X86INTRIN_H
-#include <x86intrin.h>
-#endif
-#if R123_USE_IA32INTRIN_H
-#include <ia32intrin.h>
-#endif
-#if R123_USE_XMMINTRIN_H
-#include <xmmintrin.h>
-#endif
-#if R123_USE_EMMINTRIN_H
-#include <emmintrin.h>
-#endif
-#if R123_USE_SMMINTRIN_H
-#include <smmintrin.h>
-#endif
-#if R123_USE_WMMINTRIN_H
-#include <wmmintrin.h>
-#endif
-#if R123_USE_INTRIN_H
-#include <intrin.h>
-#endif
-#ifdef __cplusplus
-#include <iostream>
-#include <limits>
-#include <stdexcept>
-#endif
-
-#if R123_USE_ASM_GNU
-
-/* bit25 of CX tells us whether AES is enabled. */
-R123_STATIC_INLINE int haveAESNI(){
-    unsigned int eax, ebx, ecx, edx;
-    __asm__ __volatile__ ("cpuid": "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) :
-                      "a" (1));
-    return (ecx>>25) & 1;
-}
-#elif R123_USE_CPUID_MSVC
-R123_STATIC_INLINE int haveAESNI(){
-    int CPUInfo[4];
-    __cpuid(CPUInfo, 1);
-    return (CPUInfo[2]>>25)&1;
-}
-#else /* R123_USE_CPUID_??? */
-#warning "No R123_USE_CPUID_XXX method chosen.  haveAESNI will always return false"
-R123_STATIC_INLINE int haveAESNI(){
-    return 0;
-}
-#endif /* R123_USE_ASM_GNU || R123_USE_CPUID_MSVC */
-
-// There is a lot of annoying and inexplicable variation in the
-// SSE intrinsics available in different compilation environments.
-// The details seem to depend on the compiler, the version and
-// the target architecture.  Rather than insisting on
-// R123_USE_feature tests for each of these in each of the
-// compilerfeatures.h files we just keep the complexity localized
-// to here...
-#if (defined(__ICC) && __ICC<1210) || (defined(_MSC_VER) && !defined(_WIN64))
-/* Is there an intrinsic to assemble an __m128i from two 64-bit words? 
-   If not, use the 4x32-bit intrisic instead.  N.B.  It looks like Intel
-   added _mm_set_epi64x to icc version 12.1 in Jan 2012.
-*/
-R123_STATIC_INLINE __m128i _mm_set_epi64x(uint64_t v1, uint64_t v0){
-    union{
-        uint64_t u64;
-        uint32_t u32[2];
-    } u1, u0;
-    u1.u64 = v1;
-    u0.u64 = v0;
-    return _mm_set_epi32(u1.u32[1], u1.u32[0], u0.u32[1], u0.u32[0]);
-}
-#endif
-/* _mm_extract_lo64 abstracts the task of extracting the low 64-bit
-   word from an __m128i.  The _mm_cvtsi128_si64 intrinsic does the job
-   on 64-bit platforms.  Unfortunately, both MSVC and Open64 fail
-   assertions in ut_M128.cpp and ut_carray.cpp when we use the
-   _mm_cvtsi128_si64 intrinsic.  (See
-   https://bugs.open64.net/show_bug.cgi?id=873 for the Open64 bug).
-   On 32-bit platforms, there's no MOVQ, so there's no intrinsic.
-   Finally, even if the intrinsic exists, it may be spelled with or
-   without the 'x'.
-*/
-#if !defined(__x86_64__) || defined(_MSC_VER) || defined(__OPEN64__)
-R123_STATIC_INLINE uint64_t _mm_extract_lo64(__m128i si){
-    union{
-        uint64_t u64[2];
-        __m128i m;
-    }u;
-    _mm_store_si128(&u.m, si);
-    return u.u64[0];
-}
-#elif defined(__llvm__) || defined(__ICC)
-R123_STATIC_INLINE uint64_t _mm_extract_lo64(__m128i si){
-    return (uint64_t)_mm_cvtsi128_si64(si);
-}
-#else /* GNUC, others */
-/* FWIW, gcc's emmintrin.h has had the 'x' spelling
-   since at least gcc-3.4.4.  The no-'x' spelling showed up
-   around 4.2. */
-R123_STATIC_INLINE uint64_t _mm_extract_lo64(__m128i si){
-    return (uint64_t)_mm_cvtsi128_si64x(si);
-}
-#endif
-#if defined(__GNUC__) && __GNUC__ < 4
-/* the cast builtins showed up in gcc4. */
-R123_STATIC_INLINE __m128 _mm_castsi128_ps(__m128i si){
-    return (__m128)si;
-}
-#endif
-
-#ifdef __cplusplus
-
-struct r123m128i{
-    __m128i m;
-#if R123_USE_CXX11_UNRESTRICTED_UNIONS
-    // C++98 forbids a union member from having *any* constructors.
-    // C++11 relaxes this, and allows union members to have constructors
-    // as long as there is a "trivial" default construtor.  So in C++11
-    // we can provide a r123m128i constructor with an __m128i argument, and still
-    // have the default (and hence trivial) default constructor.
-    r123m128i() = default;
-    r123m128i(__m128i _m): m(_m){}
-#endif
-    r123m128i& operator=(const __m128i& rhs){ m=rhs; return *this;}
-    r123m128i& operator=(R123_ULONG_LONG n){ m = _mm_set_epi64x(0, n); return *this;}
-#if R123_USE_CXX11_EXPLICIT_CONVERSIONS
-    // With C++0x we can attach explicit to the bool conversion operator
-    // to disambiguate undesired promotions.  For g++, this works
-    // only in 4.5 and above.
-    explicit operator bool() const {return _bool();}
-#else
-    // Pre-C++0x, we have to do something else.  Google for the "safe bool"
-    // idiom for other ideas...
-    operator const void*() const{return _bool()?this:0;}
-#endif
-    operator __m128i() const {return m;}
-
-private:
-#if R123_USE_SSE4_1
-    bool _bool() const{ return !_mm_testz_si128(m,m); }
-#else
-    bool _bool() const{ return 0xf != _mm_movemask_ps(_mm_castsi128_ps(_mm_cmpeq_epi32(m, _mm_setzero_si128()))); }
-#endif
-};
-
-R123_STATIC_INLINE r123m128i& operator++(r123m128i& v){
-    __m128i& c = v.m;
-    __m128i zeroone = _mm_set_epi64x(R123_64BIT(0), R123_64BIT(1));
-    c = _mm_add_epi64(c, zeroone);
-    //return c;
-#if R123_USE_SSE4_1
-    __m128i zerofff = _mm_set_epi64x(0, ~(R123_64BIT(0)));
-    if( R123_BUILTIN_EXPECT(_mm_testz_si128(c,zerofff), 0) ){
-        __m128i onezero = _mm_set_epi64x(R123_64BIT(1), R123_64BIT(0));
-        c = _mm_add_epi64(c, onezero);
-    }
-#else
-    unsigned mask  = _mm_movemask_ps( _mm_castsi128_ps(_mm_cmpeq_epi32(c, _mm_setzero_si128())));
-    // The low two bits of mask are 11 iff the low 64 bits of
-    // c are zero.
-    if( R123_BUILTIN_EXPECT((mask&0x3) == 0x3, 0) ){
-        __m128i onezero = _mm_set_epi64x(1,0);
-        c = _mm_add_epi64(c, onezero);
-    }
-#endif
-    return v;
-}
-
-R123_STATIC_INLINE r123m128i& operator+=(r123m128i& lhs, R123_ULONG_LONG n){ 
-    __m128i c = lhs.m;
-    __m128i incr128 = _mm_set_epi64x(0, n);
-    c = _mm_add_epi64(c, incr128);
-    // return c;     // NO CARRY!  
-
-    int64_t lo64 = _mm_extract_lo64(c);
-    if((uint64_t)lo64 < n)
-        c = _mm_add_epi64(c, _mm_set_epi64x(1,0));
-    lhs.m = c;
-    return lhs; 
-}
-
-// We need this one because it's present, but never used in r123array1xm128i::incr
-R123_STATIC_INLINE bool operator<=(R123_ULONG_LONG, const r123m128i &){
-    throw std::runtime_error("operator<=(unsigned long long, r123m128i) is unimplemented.");}
-
-// The comparisons aren't implemented, but if we leave them out, and 
-// somebody writes, e.g., M1 < M2, the compiler will do an implicit
-// conversion through void*.  Sigh...
-R123_STATIC_INLINE bool operator<(const r123m128i&, const r123m128i&){
-    throw std::runtime_error("operator<(r123m128i, r123m128i) is unimplemented.");}
-R123_STATIC_INLINE bool operator<=(const r123m128i&, const r123m128i&){
-    throw std::runtime_error("operator<=(r123m128i, r123m128i) is unimplemented.");}
-R123_STATIC_INLINE bool operator>(const r123m128i&, const r123m128i&){
-    throw std::runtime_error("operator>(r123m128i, r123m128i) is unimplemented.");}
-R123_STATIC_INLINE bool operator>=(const r123m128i&, const r123m128i&){
-    throw std::runtime_error("operator>=(r123m128i, r123m128i) is unimplemented.");}
-
-R123_STATIC_INLINE bool operator==(const r123m128i &lhs, const r123m128i &rhs){ 
-    return 0xf==_mm_movemask_ps(_mm_castsi128_ps(_mm_cmpeq_epi32(lhs, rhs))); }
-R123_STATIC_INLINE bool operator!=(const r123m128i &lhs, const r123m128i &rhs){ 
-    return !(lhs==rhs);}
-R123_STATIC_INLINE bool operator==(R123_ULONG_LONG lhs, const r123m128i &rhs){
-    r123m128i LHS; LHS.m=_mm_set_epi64x(0, lhs); return LHS == rhs; }
-R123_STATIC_INLINE bool operator!=(R123_ULONG_LONG lhs, const r123m128i &rhs){
-    return !(lhs==rhs);}
-R123_STATIC_INLINE std::ostream& operator<<(std::ostream& os, const r123m128i& m){
-    union{
-        uint64_t u64[2];
-        __m128i m;
-    }u;
-    _mm_storeu_si128(&u.m, m.m);
-    return os << u.u64[0] << " " << u.u64[1];
-}
-
-R123_STATIC_INLINE std::istream& operator>>(std::istream& is, r123m128i& m){
-    uint64_t u64[2];
-    is >> u64[0] >> u64[1];
-    m.m = _mm_set_epi64x(u64[1], u64[0]);
-    return is;
-}
-
-template<typename T> inline T assemble_from_u32(uint32_t *p32); // forward declaration
-
-template <>
-inline r123m128i assemble_from_u32<r123m128i>(uint32_t *p32){
-    r123m128i ret;
-    ret.m = _mm_set_epi32(p32[3], p32[2], p32[1], p32[0]);
-    return ret;
-}
-
-#else
-
-typedef struct {
-    __m128i m;
-} r123m128i;
-
-#endif /* __cplusplus */
-
-#else /* !R123_USE_SSE */
-R123_STATIC_INLINE int haveAESNI(){
-    return 0;
-}
-#endif /* R123_USE_SSE */
-
-#endif /* _Random123_sse_dot_h__ */
diff --git a/src/Random123/features/sunprofeatures.h b/src/Random123/features/sunprofeatures.h
deleted file mode 100644
index c9cdc00f..00000000
--- a/src/Random123/features/sunprofeatures.h
+++ /dev/null
@@ -1,172 +0,0 @@
-/*
-Copyright 2010-2011, D. E. Shaw Research.
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
-met:
-
-* Redistributions of source code must retain the above copyright
-  notice, this list of conditions, and the following disclaimer.
-
-* Redistributions in binary form must reproduce the above copyright
-  notice, this list of conditions, and the following disclaimer in the
-  documentation and/or other materials provided with the distribution.
-
-* Neither the name of D. E. Shaw Research nor the names of its
-  contributors may be used to endorse or promote products derived from
-  this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-#ifndef __sunprofeatures_dot_hpp
-#define __sunprofeatures_dot_hpp
-
-#ifndef R123_STATIC_INLINE
-#define R123_STATIC_INLINE static inline
-#endif
-
-#ifndef R123_FORCE_INLINE
-#define R123_FORCE_INLINE(decl) decl
-#endif
-
-#ifndef R123_CUDA_DEVICE
-#define R123_CUDA_DEVICE
-#endif
-
-#ifndef R123_ASSERT
-#include <assert.h>
-#define R123_ASSERT(x) assert(x)
-#endif
-
-#ifndef R123_BUILTIN_EXPECT
-#define R123_BUILTIN_EXPECT(expr,likely) expr
-#endif
-
-// The basic idiom is:
-// #ifndef R123_SOMETHING
-// #if some condition
-// #define R123_SOMETHING 1
-// #else
-// #define R123_SOMETHING 0
-// #endif
-// #endif
-// This idiom allows an external user to override any decision
-// in this file with a command-line -DR123_SOMETHING=1 or -DR123_SOMETHINE=0
-
-// An alternative idiom is:
-// #ifndef R123_SOMETHING
-// #define R123_SOMETHING (some boolean expression)
-// #endif
-// where the boolean expression might contain previously-defined R123_SOMETHING_ELSE
-// pp-symbols.
-
-#ifndef R123_USE_AES_NI
-#define R123_USE_AES_NI 0
-#endif
-
-#ifndef R123_USE_SSE4_2
-#define R123_USE_SSE4_2 0
-#endif
-
-#ifndef R123_USE_SSE4_1
-#define R123_USE_SSE4_1 0
-#endif
-
-#ifndef R123_USE_SSE
-#define R123_USE_SSE 0
-#endif
-
-#ifndef R123_USE_AES_OPENSSL
-#define R123_USE_AES_OPENSSL 0
-#endif
-
-#ifndef R123_USE_GNU_UINT128
-#define R123_USE_GNU_UINT128 0
-#endif
-
-#ifndef R123_USE_ASM_GNU
-#define R123_USE_ASM_GNU 0
-#endif
-
-#ifndef R123_USE_CPUID_MSVC
-#define R123_USE_CPUID_MSVC 0
-#endif
-
-#ifndef R123_USE_X86INTRIN_H
-#define R123_USE_X86INTRIN_H 0
-#endif
-
-#ifndef R123_USE_IA32INTRIN_H
-#define R123_USE_IA32INTRIN_H 0
-#endif
-
-#ifndef R123_USE_XMMINTRIN_H
-#define R123_USE_XMMINTRIN_H 0
-#endif
-
-#ifndef R123_USE_EMMINTRIN_H
-#define R123_USE_EMMINTRIN_H 0
-#endif
-
-#ifndef R123_USE_SMMINTRIN_H
-#define R123_USE_SMMINTRIN_H 0
-#endif
-
-#ifndef R123_USE_WMMINTRIN_H
-#define R123_USE_WMMINTRIN_H 0
-#endif
-
-#ifndef R123_USE_INTRIN_H
-#define R123_USE_INTRIN_H 0
-#endif
-
-#ifndef R123_USE_MULHILO16_ASM
-#define R123_USE_MULHILO16_ASM 0
-#endif
-
-#ifndef R123_USE_MULHILO32_ASM
-#define R123_USE_MULHILO32_ASM 0
-#endif
-
-#ifndef R123_USE_MULHILO64_ASM
-#define R123_USE_MULHILO64_ASM 0
-#endif
-
-#ifndef R123_USE_MULHILO64_MSVC_INTRIN
-#define R123_USE_MULHILO64_MSVC_INTRIN 0
-#endif
-
-#ifndef R123_USE_MULHILO64_CUDA_INTRIN
-#define R123_USE_MULHILO64_CUDA_INTRIN 0
-#endif
-
-#ifndef R123_USE_MULHILO64_OPENCL_INTRIN
-#define R123_USE_MULHILO64_OPENCL_INTRIN 0
-#endif
-
-#ifndef R123_USE_PHILOX_64BIT
-#define R123_USE_PHILOX_64BIT 0
-#endif
-
-#ifndef __STDC_CONSTANT_MACROS
-#define __STDC_CONSTANT_MACROS
-#endif
-#include <stdint.h>
-#ifndef UINT64_C
-#error UINT64_C not defined.  You must define __STDC_CONSTANT_MACROS before you #include <stdint.h>
-#endif
-
-// If you add something, it must go in all the other XXfeatures.hpp
-// and in ../ut_features.cpp
-#endif
diff --git a/src/Random123/features/xlcfeatures.h b/src/Random123/features/xlcfeatures.h
deleted file mode 100644
index 02b621d4..00000000
--- a/src/Random123/features/xlcfeatures.h
+++ /dev/null
@@ -1,210 +0,0 @@
-/*
-Copyright 2010-2011, D. E. Shaw Research.
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
-met:
-
-* Redistributions of source code must retain the above copyright
-  notice, this list of conditions, and the following disclaimer.
-
-* Redistributions in binary form must reproduce the above copyright
-  notice, this list of conditions, and the following disclaimer in the
-  documentation and/or other materials provided with the distribution.
-
-* Neither the name of D. E. Shaw Research nor the names of its
-  contributors may be used to endorse or promote products derived from
-  this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-Copyright (c) 2013, Los Alamos National Security, LLC
-All rights reserved.
-
-Copyright 2013. Los Alamos National Security, LLC. This software was produced
-under U.S. Government contract DE-AC52-06NA25396 for Los Alamos National
-Laboratory (LANL), which is operated by Los Alamos National Security, LLC for
-the U.S. Department of Energy. The U.S. Government has rights to use,
-reproduce, and distribute this software.  NEITHER THE GOVERNMENT NOR LOS
-ALAMOS NATIONAL SECURITY, LLC MAKES ANY WARRANTY, EXPRESS OR IMPLIED, OR
-ASSUMES ANY LIABILITY FOR THE USE OF THIS SOFTWARE.  If software is modified
-to produce derivative works, such modified software should be clearly marked,
-so as not to confuse it with the version available from LANL.
-*/
-#ifndef __xlcfeatures_dot_hpp
-#define __xlcfeatures_dot_hpp
-
-#if !defined(__x86_64__) && !defined(__i386__) && !defined(__powerpc__)
-#  error "This code has only been tested on x86 and PowerPC platforms."
-#include <including_a_nonexistent_file_will_stop_some_compilers_from_continuing_with_a_hopeless_task>
-{ /* maybe an unbalanced brace will terminate the compilation */
- /* Feel free to try the Random123 library on other architectures by changing
- the conditions that reach this error, but you should consider it a
- porting exercise and expect to encounter bugs and deficiencies.
- Please let the authors know of any successes (or failures). */
-#endif
-
-#ifdef __cplusplus
-/* builtins are automatically available to xlc.  To use them with xlc++,
-   one must include builtins.h.   c.f
-   http://publib.boulder.ibm.com/infocenter/cellcomp/v101v121/index.jsp?topic=/com.ibm.xlcpp101.cell.doc/compiler_ref/compiler_builtins.html
-*/
-#include <builtins.h>
-#endif
-
-#ifndef R123_STATIC_INLINE
-#define R123_STATIC_INLINE static inline
-#endif
-
-#ifndef R123_FORCE_INLINE
-#define R123_FORCE_INLINE(decl) decl __attribute__((__always_inline__))
-#endif
-
-#ifndef R123_CUDA_DEVICE
-#define R123_CUDA_DEVICE
-#endif
-
-#ifndef R123_ASSERT
-#include <assert.h>
-#define R123_ASSERT(x) assert(x)
-#endif
-
-#ifndef R123_BUILTIN_EXPECT
-#define R123_BUILTIN_EXPECT(expr,likely) __builtin_expect(expr,likely)
-#endif
-
-#ifndef R123_USE_AES_NI
-#define R123_USE_AES_NI 0
-#endif
-
-#ifndef R123_USE_SSE4_2
-#define R123_USE_SSE4_2 0
-#endif
-
-#ifndef R123_USE_SSE4_1
-#define R123_USE_SSE4_1 0
-#endif
-
-#ifndef R123_USE_SSE
-#define R123_USE_SSE 0
-#endif
-
-#ifndef R123_USE_AES_OPENSSL
-/* There isn't really a good way to tell at compile time whether
-   openssl is available.  Without a pre-compilation configure-like
-   tool, it's less error-prone to guess that it isn't available.  Add
-   -DR123_USE_AES_OPENSSL=1 and any necessary LDFLAGS or LDLIBS to
-   play with openssl */
-#define R123_USE_AES_OPENSSL 0
-#endif
-
-#ifndef R123_USE_GNU_UINT128
-#define R123_USE_GNU_UINT128 0
-#endif
-
-#ifndef R123_USE_ASM_GNU
-#define R123_USE_ASM_GNU 1
-#endif
-
-#ifndef R123_USE_CPUID_MSVC
-#define R123_USE_CPUID_MSVC 0
-#endif
-
-#ifndef R123_USE_X86INTRIN_H
-#define R123_USE_X86INTRIN_H 0
-#endif
-
-#ifndef R123_USE_IA32INTRIN_H
-#define R123_USE_IA32INTRIN_H 0
-#endif
-
-#ifndef R123_USE_XMMINTRIN_H
-#define R123_USE_XMMINTRIN_H 0
-#endif
-
-#ifndef R123_USE_EMMINTRIN_H
-#define R123_USE_EMMINTRIN_H 0
-#endif
-
-#ifndef R123_USE_SMMINTRIN_H
-#define R123_USE_SMMINTRIN_H 0
-#endif
-
-#ifndef R123_USE_WMMINTRIN_H
-#define R123_USE_WMMINTRIN_H 0
-#endif
-
-#ifndef R123_USE_INTRIN_H
-#ifdef __ABM__
-#define R123_USE_INTRIN_H 1
-#else
-#define R123_USE_INTRIN_H 0
-#endif
-#endif
-
-#ifndef R123_USE_MULHILO32_ASM
-#define R123_USE_MULHILO32_ASM 0
-#endif
-
-#ifndef R123_USE_MULHILO64_MULHI_INTRIN
-#if (defined(__powerpc64__))
-#define R123_USE_MULHILO64_MULHI_INTRIN 1
-#else
-#define R123_USE_MULHILO64_MULHI_INTRIN 0
-#endif
-#endif
-
-#ifndef R123_MULHILO64_MULHI_INTRIN
-#define R123_MULHILO64_MULHI_INTRIN __mulhdu
-#endif
-
-#ifndef R123_USE_MULHILO32_MULHI_INTRIN
-#define R123_USE_MULHILO32_MULHI_INTRIN 0
-#endif
-
-#ifndef R123_MULHILO32_MULHI_INTRIN
-#define R123_MULHILO32_MULHI_INTRIN __mulhwu
-#endif
-
-#ifndef R123_USE_MULHILO64_ASM
-#if (defined(__powerpc64__) && !(R123_USE_MULHILO64_MULHI_INTRIN))
-#define R123_USE_MULHILO64_ASM 1
-#else
-#define R123_USE_MULHILO64_ASM 0
-#endif
-#endif
-
-#ifndef R123_USE_MULHILO64_MSVC_INTRIN
-#define R123_USE_MULHILO64_MSVC_INTRIN 0
-#endif
-
-#ifndef R123_USE_MULHILO64_CUDA_INTRIN
-#define R123_USE_MULHILO64_CUDA_INTRIN 0
-#endif
-
-#ifndef R123_USE_MULHILO64_OPENCL_INTRIN
-#define R123_USE_MULHILO64_OPENCL_INTRIN 0
-#endif
-
-#ifndef __STDC_CONSTANT_MACROS
-#define __STDC_CONSTANT_MACROS
-#endif
-#include <stdint.h>
-#ifndef UINT64_C
-#error UINT64_C not defined.  You must define __STDC_CONSTANT_MACROS before you #include <stdint.h>
-#endif
-
-/* If you add something, it must go in all the other XXfeatures.hpp
-   and in ../ut_features.cpp */
-#endif
diff --git a/src/Random123/philox.h b/src/Random123/philox.h
deleted file mode 100644
index 9c87384c..00000000
--- a/src/Random123/philox.h
+++ /dev/null
@@ -1,486 +0,0 @@
-/*
-Copyright 2010-2011, D. E. Shaw Research.
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
-met:
-
-* Redistributions of source code must retain the above copyright
-  notice, this list of conditions, and the following disclaimer.
-
-* Redistributions in binary form must reproduce the above copyright
-  notice, this list of conditions, and the following disclaimer in the
-  documentation and/or other materials provided with the distribution.
-
-* Neither the name of D. E. Shaw Research nor the names of its
-  contributors may be used to endorse or promote products derived from
-  this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-#ifndef _philox_dot_h_
-#define _philox_dot_h_
-
-/** \cond HIDDEN_FROM_DOXYGEN */
-
-#include "features/compilerfeatures.h"
-#include "array.h"
-
-
-/*
-// Macros _Foo_tpl are code generation 'templates'  They define
-// inline functions with names obtained by mangling Foo and the
-// macro arguments.  E.g.,
-//   _mulhilo_tpl(32, uint32_t, uint64_t)
-// expands to a definition of:
-//   mulhilo32(uint32_t, uint32_t, uint32_t *, uint32_t *)
-// We then 'instantiate the template' to define
-// several different functions, e.g.,
-//   mulhilo32
-//   mulhilo64
-// These functions will be visible to user code, and may
-// also be used later in subsequent templates and definitions.
-
-// A template for mulhilo using a temporary of twice the word-width.
-// Gcc figures out that this can be reduced to a single 'mul' instruction,
-// despite the apparent use of double-wide variables, shifts, etc.  It's
-// obviously not guaranteed that all compilers will be that smart, so
-// other implementations might be preferable, e.g., using an intrinsic
-// or an asm block.  On the other hand, for 32-bit multiplies,
-// this *is* perfectly standard C99 - any C99 compiler should 
-// understand it and produce correct code.  For 64-bit multiplies,
-// it's only usable if the compiler recognizes that it can do
-// arithmetic on a 128-bit type.  That happens to be true for gcc on
-// x86-64, and powerpc64 but not much else.
-*/
-#define _mulhilo_dword_tpl(W, Word, Dword)                              \
-R123_CUDA_DEVICE R123_STATIC_INLINE Word mulhilo##W(Word a, Word b, Word* hip){ \
-    Dword product = ((Dword)a)*((Dword)b);                              \
-    *hip = product>>W;                                                  \
-    return (Word)product;                                               \
-}
-
-/*
-// A template for mulhilo using gnu-style asm syntax.
-// INSN can be "mulw", "mull" or "mulq".  
-// FIXME - porting to other architectures, we'll need still-more conditional
-// branching here.  Note that intrinsics are usually preferable.
-*/
-#ifdef __powerpc__
-#define _mulhilo_asm_tpl(W, Word, INSN)                         \
-R123_STATIC_INLINE Word mulhilo##W(Word ax, Word b, Word *hip){ \
-    Word dx = 0;                                                \
-    __asm__("\n\t"                                              \
-        INSN " %0,%1,%2\n\t"                                    \
-        : "=r"(dx)                                              \
-        : "r"(b), "r"(ax)                                       \
-        );                                                      \
-    *hip = dx;                                                  \
-    return ax*b;                                                \
-}
-#else
-#define _mulhilo_asm_tpl(W, Word, INSN)                         \
-R123_STATIC_INLINE Word mulhilo##W(Word ax, Word b, Word *hip){      \
-    Word dx;                                                    \
-    __asm__("\n\t"                                              \
-        INSN " %2\n\t"                                          \
-        : "=a"(ax), "=d"(dx)                                    \
-        : "r"(b), "0"(ax)                                       \
-        );                                                      \
-    *hip = dx;                                                  \
-    return ax;                                                  \
-}
-#endif /* __powerpc__ */
-
-/*
-// A template for mulhilo using MSVC-style intrinsics
-// For example,_umul128 is an msvc intrinsic, c.f.
-// http://msdn.microsoft.com/en-us/library/3dayytw9.aspx
-*/
-#define _mulhilo_msvc_intrin_tpl(W, Word, INTRIN)               \
-R123_STATIC_INLINE Word mulhilo##W(Word a, Word b, Word* hip){       \
-    return INTRIN(a, b, hip);                                   \
-}
-
-/* N.B.  This really should be called _mulhilo_mulhi_intrin.  It just
-   happens that CUDA was the first time we used the idiom. */
-#define _mulhilo_cuda_intrin_tpl(W, Word, INTRIN)                       \
-R123_CUDA_DEVICE R123_STATIC_INLINE Word mulhilo##W(Word a, Word b, Word* hip){ \
-    *hip = INTRIN(a, b);                                                \
-    return a*b;                                                         \
-}
-
-/*
-// A template for mulhilo using only word-size operations and
-// C99 operators (no adc, no mulhi).  It
-// requires four multiplies and a dozen or so shifts, adds
-// and tests.  It's not clear what this is good for, other than
-// completeness.  On 32-bit platforms, it could be used to
-// implement philoxNx64, but on such platforms both the philoxNx32
-// and the threefryNx64 cbrngs are going to have much better
-// performance.  It is enabled below by R123_USE_MULHILO64_C99,
-// but that is currently (Sep 2011) not set by any of the
-// features/XXfeatures.h headers.  It can, of course, be
-// set with a compile-time -D option.
-*/
-#define _mulhilo_c99_tpl(W, Word) \
-R123_STATIC_INLINE Word mulhilo##W(Word a, Word b, Word *hip){ \
-    const unsigned WHALF = W/2;                                    \
-    const Word LOMASK = ((((Word)1)<<WHALF)-1);                    \
-    Word lo = a*b;               /* full low multiply */           \
-    Word ahi = a>>WHALF;                                           \
-    Word alo = a& LOMASK;                                          \
-    Word bhi = b>>WHALF;                                           \
-    Word blo = b& LOMASK;                                          \
-                                                                   \
-    Word ahbl = ahi*blo;                                           \
-    Word albh = alo*bhi;                                           \
-                                                                   \
-    Word ahbl_albh = ((ahbl&LOMASK) + (albh&LOMASK));                   \
-    Word hi = ahi*bhi + (ahbl>>WHALF) +  (albh>>WHALF);                 \
-    hi += ahbl_albh >> WHALF; /* carry from the sum of lo(ahbl) + lo(albh) ) */ \
-    /* carry from the sum with alo*blo */                               \
-    hi += ((lo >> WHALF) < (ahbl_albh&LOMASK));                         \
-    *hip = hi;                                                          \
-    return lo;                                                          \
-}
-
-/*
-// A template for mulhilo on a platform that can't do it
-// We could put a C version here, but is it better to run *VERY*
-// slowly or to just stop and force the user to find another CBRNG?
-*/
-#define _mulhilo_fail_tpl(W, Word)                                      \
-R123_STATIC_INLINE Word mulhilo##W(Word a, Word b, Word *hip){               \
-    R123_STATIC_ASSERT(0, "mulhilo" #W " is not implemented on this machine\n"); \
-}
-
-/*
-// N.B.  There's an MSVC intrinsic called _emul,
-// which *might* compile into better code than
-// _mulhilo_dword_tpl 
-*/
-#if R123_USE_MULHILO32_ASM
-#ifdef __powerpc__
-_mulhilo_asm_tpl(32, uint32_t, "mulhwu")
-#else
-_mulhilo_asm_tpl(32, uint32_t, "mull")
-#endif /* __powerpc__ */
-#else
-_mulhilo_dword_tpl(32, uint32_t, uint64_t)
-#endif
-
-#if R123_USE_PHILOX_64BIT
-#if R123_USE_MULHILO64_ASM
-#ifdef __powerpc64__
-_mulhilo_asm_tpl(64, uint64_t, "mulhdu")
-#else
-_mulhilo_asm_tpl(64, uint64_t, "mulq")
-#endif /* __powerpc64__ */
-#elif R123_USE_MULHILO64_MSVC_INTRIN
-_mulhilo_msvc_intrin_tpl(64, uint64_t, _umul128)
-#elif R123_USE_MULHILO64_CUDA_INTRIN
-_mulhilo_cuda_intrin_tpl(64, uint64_t, __umul64hi)
-#elif R123_USE_MULHILO64_OPENCL_INTRIN
-_mulhilo_cuda_intrin_tpl(64, uint64_t, mul_hi)
-#elif R123_USE_MULHILO64_MULHI_INTRIN
-_mulhilo_cuda_intrin_tpl(64, uint64_t, R123_MULHILO64_MULHI_INTRIN)
-#elif R123_USE_GNU_UINT128
-_mulhilo_dword_tpl(64, uint64_t, __uint128_t)
-#elif R123_USE_MULHILO64_C99
-_mulhilo_c99_tpl(64, uint64_t)
-#else
-_mulhilo_fail_tpl(64, uint64_t)
-#endif
-#endif
-
-/*
-// The multipliers and Weyl constants are "hard coded".
-// To change them, you can #define them with different
-// values before #include-ing this file. 
-// This isn't terribly elegant, but it works for C as
-// well as C++.  A nice C++-only solution would be to
-// use template parameters in the style of <random>
-*/
-#ifndef PHILOX_M2x64_0
-#define PHILOX_M2x64_0 R123_64BIT(0xD2B74407B1CE6E93)
-#endif
-
-#ifndef PHILOX_M4x64_0
-#define PHILOX_M4x64_0 R123_64BIT(0xD2E7470EE14C6C93)
-#endif
-
-#ifndef PHILOX_M4x64_1
-#define PHILOX_M4x64_1 R123_64BIT(0xCA5A826395121157)
-#endif
-
-#ifndef PHILOX_M2x32_0
-#define PHILOX_M2x32_0 ((uint32_t)0xd256d193)
-#endif
-
-#ifndef PHILOX_M4x32_0
-#define PHILOX_M4x32_0 ((uint32_t)0xD2511F53)
-#endif
-#ifndef PHILOX_M4x32_1
-#define PHILOX_M4x32_1 ((uint32_t)0xCD9E8D57)
-#endif
-
-#ifndef PHILOX_W64_0
-#define PHILOX_W64_0 R123_64BIT(0x9E3779B97F4A7C15)  /* golden ratio */
-#endif
-#ifndef PHILOX_W64_1
-#define PHILOX_W64_1 R123_64BIT(0xBB67AE8584CAA73B)  /* sqrt(3)-1 */
-#endif
-
-#ifndef PHILOX_W32_0
-#define PHILOX_W32_0 ((uint32_t)0x9E3779B9)
-#endif
-#ifndef PHILOX_W32_1
-#define PHILOX_W32_1 ((uint32_t)0xBB67AE85)
-#endif
-
-#ifndef PHILOX2x32_DEFAULT_ROUNDS
-#define PHILOX2x32_DEFAULT_ROUNDS 10
-#endif
-
-#ifndef PHILOX2x64_DEFAULT_ROUNDS
-#define PHILOX2x64_DEFAULT_ROUNDS 10
-#endif
-
-#ifndef PHILOX4x32_DEFAULT_ROUNDS
-#define PHILOX4x32_DEFAULT_ROUNDS 10
-#endif
-
-#ifndef PHILOX4x64_DEFAULT_ROUNDS
-#define PHILOX4x64_DEFAULT_ROUNDS 10
-#endif
-
-/* The ignored fourth argument allows us to instantiate the
-   same macro regardless of N. */
-#define _philox2xWround_tpl(W, T)                                       \
-R123_CUDA_DEVICE R123_STATIC_INLINE R123_FORCE_INLINE(struct r123array2x##W _philox2x##W##round(struct r123array2x##W ctr, struct r123array1x##W key)); \
-R123_CUDA_DEVICE R123_STATIC_INLINE struct r123array2x##W _philox2x##W##round(struct r123array2x##W ctr, struct r123array1x##W key){ \
-    T hi;                                                               \
-    T lo = mulhilo##W(PHILOX_M2x##W##_0, ctr.v[0], &hi);                \
-    struct r123array2x##W out = {{hi^key.v[0]^ctr.v[1], lo}};               \
-    return out;                                                         \
-}
-#define _philox2xWbumpkey_tpl(W)                                        \
-R123_CUDA_DEVICE R123_STATIC_INLINE struct r123array1x##W _philox2x##W##bumpkey( struct r123array1x##W key) { \
-    key.v[0] += PHILOX_W##W##_0;                                        \
-    return key;                                                         \
-}
-
-#define _philox4xWround_tpl(W, T)                                       \
-R123_CUDA_DEVICE R123_STATIC_INLINE R123_FORCE_INLINE(struct r123array4x##W _philox4x##W##round(struct r123array4x##W ctr, struct r123array2x##W key)); \
-R123_CUDA_DEVICE R123_STATIC_INLINE struct r123array4x##W _philox4x##W##round(struct r123array4x##W ctr, struct r123array2x##W key){ \
-    T hi0;                                                              \
-    T hi1;                                                              \
-    T lo0 = mulhilo##W(PHILOX_M4x##W##_0, ctr.v[0], &hi0);              \
-    T lo1 = mulhilo##W(PHILOX_M4x##W##_1, ctr.v[2], &hi1);              \
-    struct r123array4x##W out = {{hi1^ctr.v[1]^key.v[0], lo1,               \
-                              hi0^ctr.v[3]^key.v[1], lo0}};             \
-    return out;                                                         \
-}
-
-#define _philox4xWbumpkey_tpl(W)                                        \
-R123_CUDA_DEVICE R123_STATIC_INLINE struct r123array2x##W _philox4x##W##bumpkey( struct r123array2x##W key) { \
-    key.v[0] += PHILOX_W##W##_0;                                        \
-    key.v[1] += PHILOX_W##W##_1;                                        \
-    return key;                                                         \
-}
-
-#define _philoxNxW_tpl(N, Nhalf, W, T)                         \
-/** @ingroup PhiloxNxW */                                       \
-enum r123_enum_philox##N##x##W { philox##N##x##W##_rounds = PHILOX##N##x##W##_DEFAULT_ROUNDS }; \
-typedef struct r123array##N##x##W philox##N##x##W##_ctr_t;                  \
-typedef struct r123array##Nhalf##x##W philox##N##x##W##_key_t;              \
-typedef struct r123array##Nhalf##x##W philox##N##x##W##_ukey_t;              \
-R123_CUDA_DEVICE R123_STATIC_INLINE philox##N##x##W##_key_t philox##N##x##W##keyinit(philox##N##x##W##_ukey_t uk) { return uk; } \
-R123_CUDA_DEVICE R123_STATIC_INLINE R123_FORCE_INLINE(philox##N##x##W##_ctr_t philox##N##x##W##_R(unsigned int R, philox##N##x##W##_ctr_t ctr, philox##N##x##W##_key_t key)); \
-R123_CUDA_DEVICE R123_STATIC_INLINE philox##N##x##W##_ctr_t philox##N##x##W##_R(unsigned int R, philox##N##x##W##_ctr_t ctr, philox##N##x##W##_key_t key) { \
-    R123_ASSERT(R<=16);                                                 \
-    if(R>0){                                       ctr = _philox##N##x##W##round(ctr, key); } \
-    if(R>1){ key = _philox##N##x##W##bumpkey(key); ctr = _philox##N##x##W##round(ctr, key); } \
-    if(R>2){ key = _philox##N##x##W##bumpkey(key); ctr = _philox##N##x##W##round(ctr, key); } \
-    if(R>3){ key = _philox##N##x##W##bumpkey(key); ctr = _philox##N##x##W##round(ctr, key); } \
-    if(R>4){ key = _philox##N##x##W##bumpkey(key); ctr = _philox##N##x##W##round(ctr, key); } \
-    if(R>5){ key = _philox##N##x##W##bumpkey(key); ctr = _philox##N##x##W##round(ctr, key); } \
-    if(R>6){ key = _philox##N##x##W##bumpkey(key); ctr = _philox##N##x##W##round(ctr, key); } \
-    if(R>7){ key = _philox##N##x##W##bumpkey(key); ctr = _philox##N##x##W##round(ctr, key); } \
-    if(R>8){ key = _philox##N##x##W##bumpkey(key); ctr = _philox##N##x##W##round(ctr, key); } \
-    if(R>9){ key = _philox##N##x##W##bumpkey(key); ctr = _philox##N##x##W##round(ctr, key); } \
-    if(R>10){ key = _philox##N##x##W##bumpkey(key); ctr = _philox##N##x##W##round(ctr, key); } \
-    if(R>11){ key = _philox##N##x##W##bumpkey(key); ctr = _philox##N##x##W##round(ctr, key); } \
-    if(R>12){ key = _philox##N##x##W##bumpkey(key); ctr = _philox##N##x##W##round(ctr, key); } \
-    if(R>13){ key = _philox##N##x##W##bumpkey(key); ctr = _philox##N##x##W##round(ctr, key); } \
-    if(R>14){ key = _philox##N##x##W##bumpkey(key); ctr = _philox##N##x##W##round(ctr, key); } \
-    if(R>15){ key = _philox##N##x##W##bumpkey(key); ctr = _philox##N##x##W##round(ctr, key); } \
-    return ctr;                                                         \
-}
-         
-_philox2xWbumpkey_tpl(32)
-_philox4xWbumpkey_tpl(32)
-_philox2xWround_tpl(32, uint32_t) /* philo2x32round */
-_philox4xWround_tpl(32, uint32_t)            /* philo4x32round */
-/** \endcond */
-_philoxNxW_tpl(2, 1, 32, uint32_t)    /* philox2x32bijection */
-_philoxNxW_tpl(4, 2, 32, uint32_t)    /* philox4x32bijection */
-#if R123_USE_PHILOX_64BIT
-/** \cond HIDDEN_FROM_DOXYGEN */
-_philox2xWbumpkey_tpl(64)
-_philox4xWbumpkey_tpl(64)
-_philox2xWround_tpl(64, uint64_t) /* philo2x64round */
-_philox4xWround_tpl(64, uint64_t) /* philo4x64round */
-/** \endcond */
-_philoxNxW_tpl(2, 1, 64, uint64_t)    /* philox2x64bijection */
-_philoxNxW_tpl(4, 2, 64, uint64_t)    /* philox4x64bijection */
-#endif /* R123_USE_PHILOX_64BIT */
-
-#define philox2x32(c,k) philox2x32_R(philox2x32_rounds, c, k)
-#define philox4x32(c,k) philox4x32_R(philox4x32_rounds, c, k)
-#if R123_USE_PHILOX_64BIT
-#define philox2x64(c,k) philox2x64_R(philox2x64_rounds, c, k)
-#define philox4x64(c,k) philox4x64_R(philox4x64_rounds, c, k)
-#endif /* R123_USE_PHILOX_64BIT */
-
-#ifdef __cplusplus
-#include <stdexcept>
-
-/** \cond HIDDEN_FROM_DOXYGEN */
-
-#define _PhiloxNxW_base_tpl(CType, KType, N, W)                         \
-namespace r123{                                                          \
-template<unsigned int ROUNDS>                                             \
-struct Philox##N##x##W##_R{                                             \
-    typedef CType ctr_type;                                         \
-    typedef KType key_type;                                             \
-    typedef KType ukey_type;                                         \
-    static const unsigned int rounds=ROUNDS;                                 \
-    inline R123_CUDA_DEVICE R123_FORCE_INLINE(ctr_type operator()(ctr_type ctr, key_type key) const){ \
-        R123_STATIC_ASSERT(ROUNDS<=16, "philox is only unrolled up to 16 rounds\n"); \
-        return philox##N##x##W##_R(ROUNDS, ctr, key);                       \
-    }                                                                   \
-};                                                                      \
-typedef Philox##N##x##W##_R<philox##N##x##W##_rounds> Philox##N##x##W; \
- } // namespace r123
-/** \endcond */
-
-_PhiloxNxW_base_tpl(r123array2x32, r123array1x32, 2, 32) // Philox2x32_R<R>
-_PhiloxNxW_base_tpl(r123array4x32, r123array2x32, 4, 32) // Philox4x32_R<R>
-#if R123_USE_PHILOX_64BIT
-_PhiloxNxW_base_tpl(r123array2x64, r123array1x64, 2, 64) // Philox2x64_R<R>
-_PhiloxNxW_base_tpl(r123array4x64, r123array2x64, 4, 64) // Philox4x64_R<R>
-#endif
-
-/* The _tpl macros don't quite work to do string-pasting inside comments.
-   so we just write out the boilerplate documentation four times... */
-
-/** 
-@defgroup PhiloxNxW Philox Classes and Typedefs
-
-The PhiloxNxW classes export the member functions, typedefs and
-operator overloads required by a @ref CBRNG "CBRNG" class.
-
-As described in  
-<a href="http://dl.acm.org/citation.cfm?doid=2063405"><i>Parallel Random Numbers:  As Easy as 1, 2, 3</i> </a>.
-The Philox family of counter-based RNGs use integer multiplication, xor and permutation of W-bit words
-to scramble its N-word input key.  Philox is a mnemonic for Product HI LO Xor).
-
-
-@class r123::Philox2x32_R 
-@ingroup PhiloxNxW
-
-exports the member functions, typedefs and operator overloads required by a @ref CBRNG "CBRNG" class.
-
-The template argument, ROUNDS, is the number of times the Philox round
-function will be applied.
-
-As of November 2011, the authors know of no statistical flaws with
-ROUNDS=6 or more for Philox2x32.
-
-@typedef r123::Philox2x32
-@ingroup PhiloxNxW
-  Philox2x32 is equivalent to Philox2x32_R<10>.    With 10 rounds,
-  Philox2x32 has a considerable safety margin over the minimum number
-  of rounds with no known statistical flaws, but still has excellent
-   performance. 
-
-
-
-@class r123::Philox2x64_R 
-@ingroup PhiloxNxW
-
-exports the member functions, typedefs and operator overloads required by a @ref CBRNG "CBRNG" class.
-
-The template argument, ROUNDS, is the number of times the Philox round
-function will be applied.
-
-As of September 2011, the authors know of no statistical flaws with
-ROUNDS=6 or more for Philox2x64.
-
-@typedef r123::Philox2x64
-@ingroup PhiloxNxW
-  Philox2x64 is equivalent to Philox2x64_R<10>.    With 10 rounds,
-  Philox2x64 has a considerable safety margin over the minimum number
-  of rounds with no known statistical flaws, but still has excellent
-   performance. 
-
-
-
-@class r123::Philox4x32_R 
-@ingroup PhiloxNxW
-
-exports the member functions, typedefs and operator overloads required by a @ref CBRNG "CBRNG" class.
-
-The template argument, ROUNDS, is the number of times the Philox round
-function will be applied.
-
-In November 2011, the authors recorded some suspicious p-values (approximately 1.e-7) from
-some very long (longer than the default BigCrush length) SimpPoker tests.  Despite
-the fact that even longer tests reverted to "passing" p-values, a cloud remains over
-Philox4x32 with 7 rounds.  The authors know of no statistical flaws with
-ROUNDS=8 or more for Philox4x32.
-
-@typedef r123::Philox4x32
-@ingroup PhiloxNxW
-  Philox4x32 is equivalent to Philox4x32_R<10>.    With 10 rounds,
-  Philox4x32 has a considerable safety margin over the minimum number
-  of rounds with no known statistical flaws, but still has excellent
-   performance. 
-
-
-
-@class r123::Philox4x64_R 
-@ingroup PhiloxNxW
-
-exports the member functions, typedefs and operator overloads required by a @ref CBRNG "CBRNG" class.
-
-The template argument, ROUNDS, is the number of times the Philox round
-function will be applied.
-
-As of September 2011, the authors know of no statistical flaws with
-ROUNDS=7 or more for Philox4x64.
-
-@typedef r123::Philox4x64
-@ingroup PhiloxNxW
-  Philox4x64 is equivalent to Philox4x64_R<10>.    With 10 rounds,
-  Philox4x64 has a considerable safety margin over the minimum number
-  of rounds with no known statistical flaws, but still has excellent
-   performance. 
-*/
-
-#endif /* __cplusplus */
-
-#endif /* _philox_dot_h_ */
diff --git a/src/Random123/threefry.h b/src/Random123/threefry.h
deleted file mode 100644
index da2de979..00000000
--- a/src/Random123/threefry.h
+++ /dev/null
@@ -1,864 +0,0 @@
-/*
-Copyright 2010-2011, D. E. Shaw Research.
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
-met:
-
-* Redistributions of source code must retain the above copyright
-  notice, this list of conditions, and the following disclaimer.
-
-* Redistributions in binary form must reproduce the above copyright
-  notice, this list of conditions, and the following disclaimer in the
-  documentation and/or other materials provided with the distribution.
-
-* Neither the name of D. E. Shaw Research nor the names of its
-  contributors may be used to endorse or promote products derived from
-  this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-#ifndef _threefry_dot_h_
-#define _threefry_dot_h_
-#include "features/compilerfeatures.h"
-#include "array.h"
-
-/** \cond HIDDEN_FROM_DOXYGEN */
-/* Significant parts of this file were copied from
-   from:
-      Skein_FinalRnd/ReferenceImplementation/skein.h
-      Skein_FinalRnd/ReferenceImplementation/skein_block.c
-
-   in http://csrc.nist.gov/groups/ST/hash/sha-3/Round3/documents/Skein_FinalRnd.zip
-
-   This file has been modified so that it may no longer perform its originally
-   intended function.  If you're looking for a Skein or Threefish source code,
-   please consult the original file.
-
-   The original file had the following header:
-**************************************************************************
-**
-** Interface declarations and internal definitions for Skein hashing.
-**
-** Source code author: Doug Whiting, 2008.
-**
-** This algorithm and source code is released to the public domain.
-**
-***************************************************************************
-
-*/
-
-/* See comment at the top of philox.h for the macro pre-process
-   strategy. */
-
-/* Rotation constants: */
-enum r123_enum_threefry64x4 {
-    /* These are the R_256 constants from the Threefish reference sources
-       with names changed to R_64x4... */
-    R_64x4_0_0=14, R_64x4_0_1=16,
-    R_64x4_1_0=52, R_64x4_1_1=57,
-    R_64x4_2_0=23, R_64x4_2_1=40,
-    R_64x4_3_0= 5, R_64x4_3_1=37,
-    R_64x4_4_0=25, R_64x4_4_1=33,
-    R_64x4_5_0=46, R_64x4_5_1=12,
-    R_64x4_6_0=58, R_64x4_6_1=22,
-    R_64x4_7_0=32, R_64x4_7_1=32
-};
-
-enum r123_enum_threefry64x2 {
-    /*
-    // Output from skein_rot_search: (srs64_B64-X1000)
-    // Random seed = 1. BlockSize = 128 bits. sampleCnt =  1024. rounds =  8, minHW_or=57
-    // Start: Tue Mar  1 10:07:48 2011
-    // rMin = 0.136. #0325[*15] [CRC=455A682F. hw_OR=64. cnt=16384. blkSize= 128].format   
-    */
-    R_64x2_0_0=16,
-    R_64x2_1_0=42,
-    R_64x2_2_0=12,
-    R_64x2_3_0=31,
-    R_64x2_4_0=16,
-    R_64x2_5_0=32,
-    R_64x2_6_0=24,
-    R_64x2_7_0=21
-    /* 4 rounds: minHW =  4  [  4  4  4  4 ]
-    // 5 rounds: minHW =  8  [  8  8  8  8 ]
-    // 6 rounds: minHW = 16  [ 16 16 16 16 ]
-    // 7 rounds: minHW = 32  [ 32 32 32 32 ]
-    // 8 rounds: minHW = 64  [ 64 64 64 64 ]
-    // 9 rounds: minHW = 64  [ 64 64 64 64 ]
-    //10 rounds: minHW = 64  [ 64 64 64 64 ]
-    //11 rounds: minHW = 64  [ 64 64 64 64 ] */
-};
-
-enum r123_enum_threefry32x4 {
-    /* Output from skein_rot_search: (srs-B128-X5000.out)
-    // Random seed = 1. BlockSize = 64 bits. sampleCnt =  1024. rounds =  8, minHW_or=28
-    // Start: Mon Aug 24 22:41:36 2009
-    // ...
-    // rMin = 0.472. #0A4B[*33] [CRC=DD1ECE0F. hw_OR=31. cnt=16384. blkSize= 128].format    */
-    R_32x4_0_0=10, R_32x4_0_1=26,
-    R_32x4_1_0=11, R_32x4_1_1=21,
-    R_32x4_2_0=13, R_32x4_2_1=27,
-    R_32x4_3_0=23, R_32x4_3_1= 5,
-    R_32x4_4_0= 6, R_32x4_4_1=20,
-    R_32x4_5_0=17, R_32x4_5_1=11,
-    R_32x4_6_0=25, R_32x4_6_1=10,
-    R_32x4_7_0=18, R_32x4_7_1=20
-
-    /* 4 rounds: minHW =  3  [  3  3  3  3 ]
-    // 5 rounds: minHW =  7  [  7  7  7  7 ]
-    // 6 rounds: minHW = 12  [ 13 12 13 12 ]
-    // 7 rounds: minHW = 22  [ 22 23 22 23 ]
-    // 8 rounds: minHW = 31  [ 31 31 31 31 ]
-    // 9 rounds: minHW = 32  [ 32 32 32 32 ]
-    //10 rounds: minHW = 32  [ 32 32 32 32 ]
-    //11 rounds: minHW = 32  [ 32 32 32 32 ] */
-
-};
-
-enum r123_enum_threefry32x2 {
-    /* Output from skein_rot_search (srs32x2-X5000.out)
-    // Random seed = 1. BlockSize = 64 bits. sampleCnt =  1024. rounds =  8, minHW_or=28
-    // Start: Tue Jul 12 11:11:33 2011
-    // rMin = 0.334. #0206[*07] [CRC=1D9765C0. hw_OR=32. cnt=16384. blkSize=  64].format   */
-    R_32x2_0_0=13,
-    R_32x2_1_0=15,
-    R_32x2_2_0=26,
-    R_32x2_3_0= 6,
-    R_32x2_4_0=17,
-    R_32x2_5_0=29,
-    R_32x2_6_0=16,
-    R_32x2_7_0=24
-
-    /* 4 rounds: minHW =  4  [  4  4  4  4 ]
-    // 5 rounds: minHW =  6  [  6  8  6  8 ]
-    // 6 rounds: minHW =  9  [  9 12  9 12 ]
-    // 7 rounds: minHW = 16  [ 16 24 16 24 ]
-    // 8 rounds: minHW = 32  [ 32 32 32 32 ]
-    // 9 rounds: minHW = 32  [ 32 32 32 32 ]
-    //10 rounds: minHW = 32  [ 32 32 32 32 ]
-    //11 rounds: minHW = 32  [ 32 32 32 32 ] */
-    };
-
-enum r123_enum_threefry_wcnt {
-    WCNT2=2,
-    WCNT4=4
-};
-R123_CUDA_DEVICE R123_STATIC_INLINE R123_FORCE_INLINE(uint64_t RotL_64(uint64_t x, unsigned int N));
-R123_CUDA_DEVICE R123_STATIC_INLINE uint64_t RotL_64(uint64_t x, unsigned int N)
-{
-    return (x << (N & 63)) | (x >> ((64-N) & 63));
-}
-    
-R123_CUDA_DEVICE R123_STATIC_INLINE R123_FORCE_INLINE(uint32_t RotL_32(uint32_t x, unsigned int N));
-R123_CUDA_DEVICE R123_STATIC_INLINE uint32_t RotL_32(uint32_t x, unsigned int N)
-{
-    return (x << (N & 31)) | (x >> ((32-N) & 31));
-}
-
-#define SKEIN_MK_64(hi32,lo32)  ((lo32) + (((uint64_t) (hi32)) << 32))
-#define SKEIN_KS_PARITY64         SKEIN_MK_64(0x1BD11BDA,0xA9FC1A22)
-#define SKEIN_KS_PARITY32         0x1BD11BDA
-
-#ifndef THREEFRY2x32_DEFAULT_ROUNDS
-#define THREEFRY2x32_DEFAULT_ROUNDS 20
-#endif
-
-#ifndef THREEFRY2x64_DEFAULT_ROUNDS
-#define THREEFRY2x64_DEFAULT_ROUNDS 20
-#endif
-
-#ifndef THREEFRY4x32_DEFAULT_ROUNDS
-#define THREEFRY4x32_DEFAULT_ROUNDS 20
-#endif
-
-#ifndef THREEFRY4x64_DEFAULT_ROUNDS
-#define THREEFRY4x64_DEFAULT_ROUNDS 20
-#endif
-
-#define _threefry2x_tpl(W)                                              \
-typedef struct r123array2x##W threefry2x##W##_ctr_t;                          \
-typedef struct r123array2x##W threefry2x##W##_key_t;                          \
-typedef struct r123array2x##W threefry2x##W##_ukey_t;                          \
-R123_CUDA_DEVICE R123_STATIC_INLINE threefry2x##W##_key_t threefry2x##W##keyinit(threefry2x##W##_ukey_t uk) { return uk; } \
-R123_CUDA_DEVICE R123_STATIC_INLINE R123_FORCE_INLINE(threefry2x##W##_ctr_t threefry2x##W##_R(unsigned int Nrounds, threefry2x##W##_ctr_t in, threefry2x##W##_key_t k)); \
-R123_CUDA_DEVICE R123_STATIC_INLINE                                          \
-threefry2x##W##_ctr_t threefry2x##W##_R(unsigned int Nrounds, threefry2x##W##_ctr_t in, threefry2x##W##_key_t k){ \
-    threefry2x##W##_ctr_t X;                                              \
-    uint##W##_t ks[2+1];                                          \
-    int  i; /* avoid size_t to avoid need for stddef.h */                   \
-    R123_ASSERT(Nrounds<=32);                                           \
-    ks[2] =  SKEIN_KS_PARITY##W;                                   \
-    for (i=0;i < 2; i++)                                        \
-        {                                                               \
-            ks[i] = k.v[i];                                             \
-            X.v[i]  = in.v[i];                                          \
-            ks[2] ^= k.v[i];                                    \
-        }                                                               \
-                                                                        \
-    /* Insert initial key before round 0 */                             \
-    X.v[0] += ks[0]; X.v[1] += ks[1];                                   \
-                                                                        \
-    if(Nrounds>0){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_0_0); X.v[1] ^= X.v[0]; } \
-    if(Nrounds>1){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_1_0); X.v[1] ^= X.v[0]; } \
-    if(Nrounds>2){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_2_0); X.v[1] ^= X.v[0]; } \
-    if(Nrounds>3){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_3_0); X.v[1] ^= X.v[0]; } \
-    if(Nrounds>3){                                                      \
-        /* InjectKey(r=1) */                                            \
-        X.v[0] += ks[1]; X.v[1] += ks[2];                               \
-        X.v[1] += 1;     /* X.v[2-1] += r  */                   \
-    }                                                                   \
-    if(Nrounds>4){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_4_0); X.v[1] ^= X.v[0]; } \
-    if(Nrounds>5){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_5_0); X.v[1] ^= X.v[0]; } \
-    if(Nrounds>6){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_6_0); X.v[1] ^= X.v[0]; } \
-    if(Nrounds>7){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_7_0); X.v[1] ^= X.v[0]; } \
-    if(Nrounds>7){                                                      \
-        /* InjectKey(r=2) */                                            \
-        X.v[0] += ks[2]; X.v[1] += ks[0];                               \
-        X.v[1] += 2;                                                    \
-    }                                                                   \
-    if(Nrounds>8){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_0_0); X.v[1] ^= X.v[0]; } \
-    if(Nrounds>9){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_1_0); X.v[1] ^= X.v[0]; } \
-    if(Nrounds>10){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_2_0); X.v[1] ^= X.v[0]; } \
-    if(Nrounds>11){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_3_0); X.v[1] ^= X.v[0]; } \
-    if(Nrounds>11){                                                     \
-        /* InjectKey(r=3) */                                            \
-        X.v[0] += ks[0]; X.v[1] += ks[1];                               \
-        X.v[1] += 3;                                                    \
-    }                                                                   \
-    if(Nrounds>12){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_4_0); X.v[1] ^= X.v[0]; } \
-    if(Nrounds>13){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_5_0); X.v[1] ^= X.v[0]; } \
-    if(Nrounds>14){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_6_0); X.v[1] ^= X.v[0]; } \
-    if(Nrounds>15){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_7_0); X.v[1] ^= X.v[0]; } \
-    if(Nrounds>15){                                                     \
-        /* InjectKey(r=4) */                                            \
-        X.v[0] += ks[1]; X.v[1] += ks[2];                               \
-        X.v[1] += 4;                                                    \
-    }                                                                   \
-    if(Nrounds>16){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_0_0); X.v[1] ^= X.v[0]; } \
-    if(Nrounds>17){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_1_0); X.v[1] ^= X.v[0]; } \
-    if(Nrounds>18){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_2_0); X.v[1] ^= X.v[0]; } \
-    if(Nrounds>19){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_3_0); X.v[1] ^= X.v[0]; } \
-    if(Nrounds>19){                                                     \
-        /* InjectKey(r=5) */                                            \
-        X.v[0] += ks[2]; X.v[1] += ks[0];                               \
-        X.v[1] += 5;                                                    \
-    }                                                                   \
-    if(Nrounds>20){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_4_0); X.v[1] ^= X.v[0]; } \
-    if(Nrounds>21){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_5_0); X.v[1] ^= X.v[0]; } \
-    if(Nrounds>22){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_6_0); X.v[1] ^= X.v[0]; } \
-    if(Nrounds>23){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_7_0); X.v[1] ^= X.v[0]; } \
-    if(Nrounds>23){                                                     \
-        /* InjectKey(r=6) */                                            \
-        X.v[0] += ks[0]; X.v[1] += ks[1];                               \
-        X.v[1] += 6;                                                    \
-    }                                                                   \
-    if(Nrounds>24){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_0_0); X.v[1] ^= X.v[0]; } \
-    if(Nrounds>25){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_1_0); X.v[1] ^= X.v[0]; } \
-    if(Nrounds>26){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_2_0); X.v[1] ^= X.v[0]; } \
-    if(Nrounds>27){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_3_0); X.v[1] ^= X.v[0]; } \
-    if(Nrounds>27){                                                     \
-        /* InjectKey(r=7) */                                            \
-        X.v[0] += ks[1]; X.v[1] += ks[2];                               \
-        X.v[1] += 7;                                                    \
-    }                                                                   \
-    if(Nrounds>28){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_4_0); X.v[1] ^= X.v[0]; } \
-    if(Nrounds>29){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_5_0); X.v[1] ^= X.v[0]; } \
-    if(Nrounds>30){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_6_0); X.v[1] ^= X.v[0]; } \
-    if(Nrounds>31){  X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x2_7_0); X.v[1] ^= X.v[0]; } \
-    if(Nrounds>31){                                                     \
-        /* InjectKey(r=8) */                                            \
-        X.v[0] += ks[2]; X.v[1] += ks[0];                               \
-        X.v[1] += 8;                                                    \
-    }                                                                   \
-    return X;                                                           \
-}                                                                       \
- /** @ingroup ThreefryNxW */                                            \
-enum r123_enum_threefry2x##W { threefry2x##W##_rounds = THREEFRY2x##W##_DEFAULT_ROUNDS };       \
-R123_CUDA_DEVICE R123_STATIC_INLINE R123_FORCE_INLINE(threefry2x##W##_ctr_t threefry2x##W(threefry2x##W##_ctr_t in, threefry2x##W##_key_t k)); \
-R123_CUDA_DEVICE R123_STATIC_INLINE                                     \
-threefry2x##W##_ctr_t threefry2x##W(threefry2x##W##_ctr_t in, threefry2x##W##_key_t k){ \
-    return threefry2x##W##_R(threefry2x##W##_rounds, in, k);            \
-}
-
-
-#define _threefry4x_tpl(W)                                              \
-typedef struct r123array4x##W threefry4x##W##_ctr_t;                        \
-typedef struct r123array4x##W threefry4x##W##_key_t;                        \
-typedef struct r123array4x##W threefry4x##W##_ukey_t;                        \
-R123_CUDA_DEVICE R123_STATIC_INLINE threefry4x##W##_key_t threefry4x##W##keyinit(threefry4x##W##_ukey_t uk) { return uk; } \
-R123_CUDA_DEVICE R123_STATIC_INLINE R123_FORCE_INLINE(threefry4x##W##_ctr_t threefry4x##W##_R(unsigned int Nrounds, threefry4x##W##_ctr_t in, threefry4x##W##_key_t k)); \
-R123_CUDA_DEVICE R123_STATIC_INLINE                                          \
-threefry4x##W##_ctr_t threefry4x##W##_R(unsigned int Nrounds, threefry4x##W##_ctr_t in, threefry4x##W##_key_t k){ \
-    threefry4x##W##_ctr_t X;                                            \
-    uint##W##_t ks[4+1];                                            \
-    int  i; /* avoid size_t to avoid need for stddef.h */                   \
-    R123_ASSERT(Nrounds<=72);                                           \
-    ks[4] =  SKEIN_KS_PARITY##W;                                    \
-    for (i=0;i < 4; i++)                                            \
-        {                                                               \
-            ks[i] = k.v[i];                                             \
-            X.v[i]  = in.v[i];                                          \
-            ks[4] ^= k.v[i];                                        \
-        }                                                               \
-                                                                        \
-    /* Insert initial key before round 0 */                             \
-    X.v[0] += ks[0]; X.v[1] += ks[1]; X.v[2] += ks[2]; X.v[3] += ks[3]; \
-                                                                        \
-    if(Nrounds>0){                                                      \
-        X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_0_0); X.v[1] ^= X.v[0]; \
-        X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_0_1); X.v[3] ^= X.v[2]; \
-    }                                                                   \
-    if(Nrounds>1){                                                      \
-        X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_1_0); X.v[3] ^= X.v[0]; \
-        X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_1_1); X.v[1] ^= X.v[2]; \
-    }                                                                   \
-    if(Nrounds>2){                                                      \
-        X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_2_0); X.v[1] ^= X.v[0]; \
-        X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_2_1); X.v[3] ^= X.v[2]; \
-    }                                                                   \
-    if(Nrounds>3){                                                      \
-        X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_3_0); X.v[3] ^= X.v[0]; \
-        X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_3_1); X.v[1] ^= X.v[2]; \
-    }                                                                   \
-    if(Nrounds>3){                                                      \
-        /* InjectKey(r=1) */                                            \
-        X.v[0] += ks[1]; X.v[1] += ks[2]; X.v[2] += ks[3]; X.v[3] += ks[4]; \
-        X.v[4-1] += 1;     /* X.v[WCNT4-1] += r  */                 \
-    }                                                                   \
-                                                                        \
-    if(Nrounds>4){                                                      \
-        X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_4_0); X.v[1] ^= X.v[0]; \
-        X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_4_1); X.v[3] ^= X.v[2]; \
-    }                                                                   \
-    if(Nrounds>5){                                                      \
-        X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_5_0); X.v[3] ^= X.v[0]; \
-        X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_5_1); X.v[1] ^= X.v[2]; \
-    }                                                                   \
-    if(Nrounds>6){                                                      \
-        X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_6_0); X.v[1] ^= X.v[0]; \
-        X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_6_1); X.v[3] ^= X.v[2]; \
-    }                                                                   \
-    if(Nrounds>7){                                                      \
-        X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_7_0); X.v[3] ^= X.v[0]; \
-        X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_7_1); X.v[1] ^= X.v[2]; \
-    }                                                                   \
-    if(Nrounds>7){                                                      \
-        /* InjectKey(r=2) */                                            \
-        X.v[0] += ks[2]; X.v[1] += ks[3]; X.v[2] += ks[4]; X.v[3] += ks[0]; \
-        X.v[4-1] += 2;     /* X.v[WCNT4-1] += r  */                 \
-    }                                                                   \
-                                                                        \
-    if(Nrounds>8){                                                      \
-        X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_0_0); X.v[1] ^= X.v[0]; \
-        X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_0_1); X.v[3] ^= X.v[2]; \
-    }                                                                   \
-    if(Nrounds>9){                                                      \
-        X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_1_0); X.v[3] ^= X.v[0]; \
-        X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_1_1); X.v[1] ^= X.v[2]; \
-    }                                                                   \
-    if(Nrounds>10){                                                     \
-        X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_2_0); X.v[1] ^= X.v[0]; \
-        X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_2_1); X.v[3] ^= X.v[2]; \
-    }                                                                   \
-    if(Nrounds>11){                                                     \
-        X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_3_0); X.v[3] ^= X.v[0]; \
-        X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_3_1); X.v[1] ^= X.v[2]; \
-    }                                                                   \
-    if(Nrounds>11){                                                     \
-        /* InjectKey(r=3) */                                            \
-        X.v[0] += ks[3]; X.v[1] += ks[4]; X.v[2] += ks[0]; X.v[3] += ks[1]; \
-        X.v[4-1] += 3;     /* X.v[WCNT4-1] += r  */                 \
-    }                                                                   \
-                                                                        \
-    if(Nrounds>12){                                                     \
-        X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_4_0); X.v[1] ^= X.v[0]; \
-        X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_4_1); X.v[3] ^= X.v[2]; \
-    }                                                                   \
-    if(Nrounds>13){                                                     \
-        X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_5_0); X.v[3] ^= X.v[0]; \
-        X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_5_1); X.v[1] ^= X.v[2]; \
-    }                                                                   \
-    if(Nrounds>14){                                                     \
-        X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_6_0); X.v[1] ^= X.v[0]; \
-        X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_6_1); X.v[3] ^= X.v[2]; \
-    }                                                                   \
-    if(Nrounds>15){                                                     \
-        X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_7_0); X.v[3] ^= X.v[0]; \
-        X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_7_1); X.v[1] ^= X.v[2]; \
-    }                                                                   \
-    if(Nrounds>15){                                                     \
-        /* InjectKey(r=1) */                                            \
-        X.v[0] += ks[4]; X.v[1] += ks[0]; X.v[2] += ks[1]; X.v[3] += ks[2]; \
-        X.v[4-1] += 4;     /* X.v[WCNT4-1] += r  */                 \
-    }                                                                   \
-                                                                        \
-    if(Nrounds>16){                                                     \
-        X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_0_0); X.v[1] ^= X.v[0]; \
-        X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_0_1); X.v[3] ^= X.v[2]; \
-    }                                                                   \
-    if(Nrounds>17){                                                     \
-        X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_1_0); X.v[3] ^= X.v[0]; \
-        X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_1_1); X.v[1] ^= X.v[2]; \
-    }                                                                   \
-    if(Nrounds>18){                                                     \
-        X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_2_0); X.v[1] ^= X.v[0]; \
-        X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_2_1); X.v[3] ^= X.v[2]; \
-    }                                                                   \
-    if(Nrounds>19){                                                     \
-        X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_3_0); X.v[3] ^= X.v[0]; \
-        X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_3_1); X.v[1] ^= X.v[2]; \
-    }                                                                   \
-    if(Nrounds>19){                                                     \
-        /* InjectKey(r=1) */                                            \
-        X.v[0] += ks[0]; X.v[1] += ks[1]; X.v[2] += ks[2]; X.v[3] += ks[3]; \
-        X.v[4-1] += 5;     /* X.v[WCNT4-1] += r  */                 \
-    }                                                                   \
-                                                                        \
-    if(Nrounds>20){                                                     \
-        X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_4_0); X.v[1] ^= X.v[0]; \
-        X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_4_1); X.v[3] ^= X.v[2]; \
-    }                                                                   \
-    if(Nrounds>21){                                                     \
-        X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_5_0); X.v[3] ^= X.v[0]; \
-        X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_5_1); X.v[1] ^= X.v[2]; \
-    }                                                                   \
-    if(Nrounds>22){                                                     \
-        X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_6_0); X.v[1] ^= X.v[0]; \
-        X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_6_1); X.v[3] ^= X.v[2]; \
-    }                                                                   \
-    if(Nrounds>23){                                                     \
-        X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_7_0); X.v[3] ^= X.v[0]; \
-        X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_7_1); X.v[1] ^= X.v[2]; \
-    }                                                                   \
-    if(Nrounds>23){                                                     \
-        /* InjectKey(r=1) */                                            \
-        X.v[0] += ks[1]; X.v[1] += ks[2]; X.v[2] += ks[3]; X.v[3] += ks[4]; \
-        X.v[4-1] += 6;     /* X.v[WCNT4-1] += r  */                 \
-    }                                                                   \
-                                                                        \
-    if(Nrounds>24){                                                     \
-        X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_0_0); X.v[1] ^= X.v[0]; \
-        X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_0_1); X.v[3] ^= X.v[2]; \
-    }                                                                   \
-    if(Nrounds>25){                                                     \
-        X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_1_0); X.v[3] ^= X.v[0]; \
-        X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_1_1); X.v[1] ^= X.v[2]; \
-    }                                                                   \
-    if(Nrounds>26){                                                     \
-        X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_2_0); X.v[1] ^= X.v[0]; \
-        X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_2_1); X.v[3] ^= X.v[2]; \
-    }                                                                   \
-    if(Nrounds>27){                                                     \
-        X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_3_0); X.v[3] ^= X.v[0]; \
-        X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_3_1); X.v[1] ^= X.v[2]; \
-    }                                                                   \
-    if(Nrounds>27){                                                     \
-        /* InjectKey(r=1) */                                            \
-        X.v[0] += ks[2]; X.v[1] += ks[3]; X.v[2] += ks[4]; X.v[3] += ks[0]; \
-        X.v[4-1] += 7;     /* X.v[WCNT4-1] += r  */                 \
-    }                                                                   \
-                                                                        \
-    if(Nrounds>28){                                                     \
-        X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_4_0); X.v[1] ^= X.v[0]; \
-        X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_4_1); X.v[3] ^= X.v[2]; \
-    }                                                                   \
-    if(Nrounds>29){                                                     \
-        X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_5_0); X.v[3] ^= X.v[0]; \
-        X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_5_1); X.v[1] ^= X.v[2]; \
-    }                                                                   \
-    if(Nrounds>30){                                                     \
-        X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_6_0); X.v[1] ^= X.v[0]; \
-        X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_6_1); X.v[3] ^= X.v[2]; \
-    }                                                                   \
-    if(Nrounds>31){                                                     \
-        X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_7_0); X.v[3] ^= X.v[0]; \
-        X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_7_1); X.v[1] ^= X.v[2]; \
-    }                                                                   \
-    if(Nrounds>31){                                                     \
-        /* InjectKey(r=1) */                                            \
-        X.v[0] += ks[3]; X.v[1] += ks[4]; X.v[2] += ks[0]; X.v[3] += ks[1]; \
-        X.v[4-1] += 8;     /* X.v[WCNT4-1] += r  */                 \
-    }                                                                   \
-                                                                        \
-    if(Nrounds>32){                                                     \
-        X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_0_0); X.v[1] ^= X.v[0]; \
-        X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_0_1); X.v[3] ^= X.v[2]; \
-    }                                                                   \
-    if(Nrounds>33){                                                     \
-        X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_1_0); X.v[3] ^= X.v[0]; \
-        X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_1_1); X.v[1] ^= X.v[2]; \
-    }                                                                   \
-    if(Nrounds>34){                                                     \
-        X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_2_0); X.v[1] ^= X.v[0]; \
-        X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_2_1); X.v[3] ^= X.v[2]; \
-    }                                                                   \
-    if(Nrounds>35){                                                     \
-        X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_3_0); X.v[3] ^= X.v[0]; \
-        X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_3_1); X.v[1] ^= X.v[2]; \
-    }                                                                   \
-    if(Nrounds>35){                                                     \
-        /* InjectKey(r=1) */                                            \
-        X.v[0] += ks[4]; X.v[1] += ks[0]; X.v[2] += ks[1]; X.v[3] += ks[2]; \
-        X.v[4-1] += 9;     /* X.v[WCNT4-1] += r  */                 \
-    }                                                                   \
-                                                                        \
-    if(Nrounds>36){                                                     \
-        X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_4_0); X.v[1] ^= X.v[0]; \
-        X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_4_1); X.v[3] ^= X.v[2]; \
-    }                                                                   \
-    if(Nrounds>37){                                                     \
-        X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_5_0); X.v[3] ^= X.v[0]; \
-        X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_5_1); X.v[1] ^= X.v[2]; \
-    }                                                                   \
-    if(Nrounds>38){                                                     \
-        X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_6_0); X.v[1] ^= X.v[0]; \
-        X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_6_1); X.v[3] ^= X.v[2]; \
-    }                                                                   \
-    if(Nrounds>39){                                                     \
-        X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_7_0); X.v[3] ^= X.v[0]; \
-        X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_7_1); X.v[1] ^= X.v[2]; \
-    }                                                                   \
-    if(Nrounds>39){                                                     \
-        /* InjectKey(r=1) */                                            \
-        X.v[0] += ks[0]; X.v[1] += ks[1]; X.v[2] += ks[2]; X.v[3] += ks[3]; \
-        X.v[4-1] += 10;     /* X.v[WCNT4-1] += r  */                 \
-    }                                                                   \
-                                                                        \
-    if(Nrounds>40){                                                     \
-        X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_0_0); X.v[1] ^= X.v[0]; \
-        X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_0_1); X.v[3] ^= X.v[2]; \
-    }                                                                   \
-    if(Nrounds>41){                                                     \
-        X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_1_0); X.v[3] ^= X.v[0]; \
-        X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_1_1); X.v[1] ^= X.v[2]; \
-    }                                                                   \
-    if(Nrounds>42){                                                     \
-        X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_2_0); X.v[1] ^= X.v[0]; \
-        X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_2_1); X.v[3] ^= X.v[2]; \
-    }                                                                   \
-    if(Nrounds>43){                                                     \
-        X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_3_0); X.v[3] ^= X.v[0]; \
-        X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_3_1); X.v[1] ^= X.v[2]; \
-    }                                                                   \
-    if(Nrounds>43){                                                     \
-        /* InjectKey(r=1) */                                            \
-        X.v[0] += ks[1]; X.v[1] += ks[2]; X.v[2] += ks[3]; X.v[3] += ks[4]; \
-        X.v[4-1] += 11;     /* X.v[WCNT4-1] += r  */                \
-    }                                                                   \
-                                                                        \
-    if(Nrounds>44){                                                     \
-        X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_4_0); X.v[1] ^= X.v[0]; \
-        X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_4_1); X.v[3] ^= X.v[2]; \
-    }                                                                   \
-    if(Nrounds>45){                                                     \
-        X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_5_0); X.v[3] ^= X.v[0]; \
-        X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_5_1); X.v[1] ^= X.v[2]; \
-    }                                                                   \
-    if(Nrounds>46){                                                     \
-        X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_6_0); X.v[1] ^= X.v[0]; \
-        X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_6_1); X.v[3] ^= X.v[2]; \
-    }                                                                   \
-    if(Nrounds>47){                                                     \
-        X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_7_0); X.v[3] ^= X.v[0]; \
-        X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_7_1); X.v[1] ^= X.v[2]; \
-    }                                                                   \
-    if(Nrounds>47){                                                     \
-        /* InjectKey(r=1) */                                            \
-        X.v[0] += ks[2]; X.v[1] += ks[3]; X.v[2] += ks[4]; X.v[3] += ks[0]; \
-        X.v[4-1] += 12;     /* X.v[WCNT4-1] += r  */                 \
-    }                                                                   \
-                                                                        \
-    if(Nrounds>48){                                                     \
-        X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_0_0); X.v[1] ^= X.v[0]; \
-        X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_0_1); X.v[3] ^= X.v[2]; \
-    }                                                                   \
-    if(Nrounds>49){                                                     \
-        X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_1_0); X.v[3] ^= X.v[0]; \
-        X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_1_1); X.v[1] ^= X.v[2]; \
-    }                                                                   \
-    if(Nrounds>50){                                                     \
-        X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_2_0); X.v[1] ^= X.v[0]; \
-        X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_2_1); X.v[3] ^= X.v[2]; \
-    }                                                                   \
-    if(Nrounds>51){                                                     \
-        X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_3_0); X.v[3] ^= X.v[0]; \
-        X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_3_1); X.v[1] ^= X.v[2]; \
-    }                                                                   \
-    if(Nrounds>51){                                                     \
-        /* InjectKey(r=1) */                                            \
-        X.v[0] += ks[3]; X.v[1] += ks[4]; X.v[2] += ks[0]; X.v[3] += ks[1]; \
-        X.v[4-1] += 13;     /* X.v[WCNT4-1] += r  */                 \
-    }                                                                   \
-                                                                        \
-    if(Nrounds>52){                                                     \
-        X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_4_0); X.v[1] ^= X.v[0]; \
-        X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_4_1); X.v[3] ^= X.v[2]; \
-    }                                                                   \
-    if(Nrounds>53){                                                     \
-        X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_5_0); X.v[3] ^= X.v[0]; \
-        X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_5_1); X.v[1] ^= X.v[2]; \
-    }                                                                   \
-    if(Nrounds>54){                                                     \
-        X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_6_0); X.v[1] ^= X.v[0]; \
-        X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_6_1); X.v[3] ^= X.v[2]; \
-    }                                                                   \
-    if(Nrounds>55){                                                     \
-        X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_7_0); X.v[3] ^= X.v[0]; \
-        X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_7_1); X.v[1] ^= X.v[2]; \
-    }                                                                   \
-    if(Nrounds>55){                                                     \
-        /* InjectKey(r=1) */                                            \
-        X.v[0] += ks[4]; X.v[1] += ks[0]; X.v[2] += ks[1]; X.v[3] += ks[2]; \
-        X.v[4-1] += 14;     /* X.v[WCNT4-1] += r  */                 \
-    }                                                                   \
-                                                                        \
-    if(Nrounds>56){                                                     \
-        X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_0_0); X.v[1] ^= X.v[0]; \
-        X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_0_1); X.v[3] ^= X.v[2]; \
-    }                                                                   \
-    if(Nrounds>57){                                                     \
-        X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_1_0); X.v[3] ^= X.v[0]; \
-        X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_1_1); X.v[1] ^= X.v[2]; \
-    }                                                                   \
-    if(Nrounds>58){                                                     \
-        X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_2_0); X.v[1] ^= X.v[0]; \
-        X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_2_1); X.v[3] ^= X.v[2]; \
-    }                                                                   \
-    if(Nrounds>59){                                                     \
-        X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_3_0); X.v[3] ^= X.v[0]; \
-        X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_3_1); X.v[1] ^= X.v[2]; \
-    }                                                                   \
-    if(Nrounds>59){                                                     \
-        /* InjectKey(r=1) */                                            \
-        X.v[0] += ks[0]; X.v[1] += ks[1]; X.v[2] += ks[2]; X.v[3] += ks[3]; \
-        X.v[4-1] += 15;     /* X.v[WCNT4-1] += r  */                 \
-    }                                                                   \
-                                                                        \
-    if(Nrounds>60){                                                     \
-        X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_4_0); X.v[1] ^= X.v[0]; \
-        X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_4_1); X.v[3] ^= X.v[2]; \
-    }                                                                   \
-    if(Nrounds>61){                                                     \
-        X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_5_0); X.v[3] ^= X.v[0]; \
-        X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_5_1); X.v[1] ^= X.v[2]; \
-    }                                                                   \
-    if(Nrounds>62){                                                     \
-        X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_6_0); X.v[1] ^= X.v[0]; \
-        X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_6_1); X.v[3] ^= X.v[2]; \
-    }                                                                   \
-    if(Nrounds>63){                                                     \
-        X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_7_0); X.v[3] ^= X.v[0]; \
-        X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_7_1); X.v[1] ^= X.v[2]; \
-    }                                                                   \
-    if(Nrounds>63){                                                     \
-        /* InjectKey(r=1) */                                            \
-        X.v[0] += ks[1]; X.v[1] += ks[2]; X.v[2] += ks[3]; X.v[3] += ks[4]; \
-        X.v[4-1] += 16;     /* X.v[WCNT4-1] += r  */                 \
-    }                                                                   \
-                                                                        \
-    if(Nrounds>64){                                                     \
-        X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_0_0); X.v[1] ^= X.v[0]; \
-        X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_0_1); X.v[3] ^= X.v[2]; \
-    }                                                                   \
-    if(Nrounds>65){                                                     \
-        X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_1_0); X.v[3] ^= X.v[0]; \
-        X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_1_1); X.v[1] ^= X.v[2]; \
-    }                                                                   \
-    if(Nrounds>66){                                                     \
-        X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_2_0); X.v[1] ^= X.v[0]; \
-        X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_2_1); X.v[3] ^= X.v[2]; \
-    }                                                                   \
-    if(Nrounds>67){                                                     \
-        X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_3_0); X.v[3] ^= X.v[0]; \
-        X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_3_1); X.v[1] ^= X.v[2]; \
-    }                                                                   \
-    if(Nrounds>67){                                                     \
-        /* InjectKey(r=1) */                                            \
-        X.v[0] += ks[2]; X.v[1] += ks[3]; X.v[2] += ks[4]; X.v[3] += ks[0]; \
-        X.v[4-1] += 17;     /* X.v[WCNT4-1] += r  */                 \
-    }                                                                   \
-                                                                        \
-    if(Nrounds>68){                                                     \
-        X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_4_0); X.v[1] ^= X.v[0]; \
-        X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_4_1); X.v[3] ^= X.v[2]; \
-    }                                                                   \
-    if(Nrounds>69){                                                     \
-        X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_5_0); X.v[3] ^= X.v[0]; \
-        X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_5_1); X.v[1] ^= X.v[2]; \
-    }                                                                   \
-    if(Nrounds>70){                                                     \
-        X.v[0] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_6_0); X.v[1] ^= X.v[0]; \
-        X.v[2] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_6_1); X.v[3] ^= X.v[2]; \
-    }                                                                   \
-    if(Nrounds>71){                                                     \
-        X.v[0] += X.v[3]; X.v[3] = RotL_##W(X.v[3],R_##W##x4_7_0); X.v[3] ^= X.v[0]; \
-        X.v[2] += X.v[1]; X.v[1] = RotL_##W(X.v[1],R_##W##x4_7_1); X.v[1] ^= X.v[2]; \
-    }                                                                   \
-    if(Nrounds>71){                                                     \
-        /* InjectKey(r=1) */                                            \
-        X.v[0] += ks[3]; X.v[1] += ks[4]; X.v[2] += ks[0]; X.v[3] += ks[1]; \
-        X.v[4-1] += 18;     /* X.v[WCNT4-1] += r  */                 \
-    }                                                                   \
-                                                                        \
-    return X;                                                           \
-}                                                                       \
- /** @ingroup ThreefryNxW */                                            \
-enum r123_enum_threefry4x##W { threefry4x##W##_rounds = THREEFRY4x##W##_DEFAULT_ROUNDS };       \
-R123_CUDA_DEVICE R123_STATIC_INLINE R123_FORCE_INLINE(threefry4x##W##_ctr_t threefry4x##W(threefry4x##W##_ctr_t in, threefry4x##W##_key_t k)); \
-R123_CUDA_DEVICE R123_STATIC_INLINE                                     \
-threefry4x##W##_ctr_t threefry4x##W(threefry4x##W##_ctr_t in, threefry4x##W##_key_t k){ \
-    return threefry4x##W##_R(threefry4x##W##_rounds, in, k);            \
-}
-/** \endcond */
-
-_threefry2x_tpl(64)
-_threefry2x_tpl(32)
-_threefry4x_tpl(64)
-_threefry4x_tpl(32)
-
-/* gcc4.5 and 4.6 seem to optimize a macro-ized threefryNxW better
-   than a static inline function.  Why?  */
-#define threefry2x32(c,k) threefry2x32_R(threefry2x32_rounds, c, k)
-#define threefry4x32(c,k) threefry4x32_R(threefry4x32_rounds, c, k)
-#define threefry2x64(c,k) threefry2x64_R(threefry2x64_rounds, c, k)
-#define threefry4x64(c,k) threefry4x64_R(threefry4x64_rounds, c, k)
-
-#ifdef __cplusplus
-/** \cond HIDDEN_FROM_DOXYGEN */
-#define _threefryNxWclass_tpl(NxW)                                      \
-namespace r123{                                                     \
-template<unsigned int R>                                                  \
- struct Threefry##NxW##_R{                                              \
-    typedef threefry##NxW##_ctr_t ctr_type;                             \
-    typedef threefry##NxW##_key_t key_type;                             \
-    typedef threefry##NxW##_key_t ukey_type;                            \
-    static const unsigned int rounds=R;                                 \
-   inline R123_CUDA_DEVICE R123_FORCE_INLINE(ctr_type operator()(ctr_type ctr, key_type key)){ \
-        R123_STATIC_ASSERT(R<=72, "threefry is only unrolled up to 72 rounds\n"); \
-        return threefry##NxW##_R(R, ctr, key);                              \
-    }                                                                   \
-};                                                                      \
- typedef Threefry##NxW##_R<threefry##NxW##_rounds> Threefry##NxW;       \
-} // namespace r123
-
-/** \endcond */
-
-_threefryNxWclass_tpl(2x32)
-_threefryNxWclass_tpl(4x32)
-_threefryNxWclass_tpl(2x64)
-_threefryNxWclass_tpl(4x64)
-
-/* The _tpl macros don't quite work to do string-pasting inside comments.
-   so we just write out the boilerplate documentation four times... */
-
-/** 
-@defgroup ThreefryNxW Threefry Classes and Typedefs
-
-The ThreefryNxW classes export the member functions, typedefs and
-operator overloads required by a @ref CBRNG "CBRNG" class.
-
-As described in  
-<a href="http://dl.acm.org/citation.cfm?doid=2063405"><i>Parallel Random Numbers:  As Easy as 1, 2, 3</i> </a>, 
-the Threefry family is closely related to the Threefish block cipher from
-<a href="http://www.skein-hash.info/"> Skein Hash Function</a>.  
-Threefry is \b not suitable for cryptographic use.
-
-Threefry uses integer addition, bitwise rotation, xor and permutation of words to randomize its output.
-
-@class r123::Threefry2x32_R 
-@ingroup ThreefryNxW
-
-exports the member functions, typedefs and operator overloads required by a @ref CBRNG "CBRNG" class.
-
-The template argument, ROUNDS, is the number of times the Threefry round
-function will be applied.
-
-As of September 2011, the authors know of no statistical flaws with
-ROUNDS=13 or more for Threefry2x32.
-
-@typedef r123::Threefry2x32
-@ingroup ThreefryNxW
-  Threefry2x32 is equivalent to Threefry2x32_R<20>.    With 20 rounds,
-  Threefry2x32 has a considerable safety margin over the minimum number
-  of rounds with no known statistical flaws, but still has excellent
-   performance. 
-
-@class r123::Threefry2x64_R 
-@ingroup ThreefryNxW
-
-exports the member functions, typedefs and operator overloads required by a @ref CBRNG "CBRNG" class.
-
-The template argument, ROUNDS, is the number of times the Threefry round
-function will be applied.
-
-In November 2011, the authors discovered that 13 rounds of
-Threefry2x64 sequenced by strided, interleaved key and counter
-increments failed a very long (longer than the default BigCrush
-length) WeightDistrub test.  At the same time, it was confirmed that
-14 rounds passes much longer tests (up to 5x10^12 samples) of a
-similar nature.  The authors know of no statistical flaws with
-ROUNDS=14 or more for Threefry2x64.
-
-@typedef r123::Threefry2x64
-@ingroup ThreefryNxW
-  Threefry2x64 is equivalent to Threefry2x64_R<20>.    With 20 rounds,
-  Threefry2x64 has a considerable safety margin over the minimum number
-  of rounds with no known statistical flaws, but still has excellent
-   performance. 
-
-
-
-@class r123::Threefry4x32_R 
-@ingroup ThreefryNxW
-
-exports the member functions, typedefs and operator overloads required by a @ref CBRNG "CBRNG" class.
-
-The template argument, ROUNDS, is the number of times the Threefry round
-function will be applied.
-
-As of September 2011, the authors know of no statistical flaws with
-ROUNDS=12 or more for Threefry4x32.
-
-@typedef r123::Threefry4x32
-@ingroup ThreefryNxW
-  Threefry4x32 is equivalent to Threefry4x32_R<20>.    With 20 rounds,
-  Threefry4x32 has a considerable safety margin over the minimum number
-  of rounds with no known statistical flaws, but still has excellent
-   performance. 
-
-
-
-@class r123::Threefry4x64_R 
-@ingroup ThreefryNxW
-
-exports the member functions, typedefs and operator overloads required by a @ref CBRNG "CBRNG" class.
-
-The template argument, ROUNDS, is the number of times the Threefry round
-function will be applied.
-
-As of September 2011, the authors know of no statistical flaws with
-ROUNDS=12 or more for Threefry4x64.
-
-@typedef r123::Threefry4x64
-@ingroup ThreefryNxW
-  Threefry4x64 is equivalent to Threefry4x64_R<20>.    With 20 rounds,
-  Threefry4x64 has a considerable safety margin over the minimum number
-  of rounds with no known statistical flaws, but still has excellent
-   performance. 
-*/
-
-#endif
-
-#endif
-- 
2.26.2