1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-10 07:55:35 +00:00

Major rework of extract/merge/permute processing debugged and working.

This commit is contained in:
Peter Boyle 2015-04-06 11:26:24 +01:00
parent 9e597ac50a
commit 982274e5a0
24 changed files with 291 additions and 574 deletions

3
Grid.h
View File

@ -42,11 +42,10 @@
#endif
#include <Grid_aligned_allocator.h>
#include <Grid_simd.h>
#include <Grid_math_types.h>
#include <Grid_Cartesian.h>
#include <Grid_aligned_allocator.h>
#include <Grid_aligned_allocator.h>
#include <Grid_Lattice.h>
#include <Grid_QCD.h>

View File

@ -8,48 +8,6 @@ namespace Grid{
/////////////////////////////////////////////////////////////////////////////////////////
// Grid Support.
/////////////////////////////////////////////////////////////////////////////////////////
//
// Cartesian grid inheritance
// Grid::GridBase
// |
// __________|___________
// | |
// Grid::GridCartesian Grid::GridCartesianRedBlack
//
// TODO: document the following as an API guaranteed public interface
/*
* Rough map of functionality against QDP++ Layout
*
* Param | Grid | QDP++
* -----------------------------------------
* | |
* void | oSites, iSites, lSites | sitesOnNode
* void | gSites | vol
* | |
* gcoor | oIndex, iIndex | linearSiteIndex // no virtual node in QDP
* lcoor | |
*
* void | CheckerBoarded | - // No checkerboarded in QDP
* void | FullDimensions | lattSize
* void | GlobalDimensions | lattSize // No checkerboarded in QDP
* void | LocalDimensions | subgridLattSize
* void | VirtualLocalDimensions | subgridLattSize // no virtual node in QDP
* | |
* int x 3 | oiSiteRankToGlobal | siteCoords
* | ProcessorCoorLocalCoorToGlobalCoor |
* | |
* vector<int> | GlobalCoorToRankIndex | nodeNumber(coord)
* vector<int> | GlobalCoorToProcessorCoorLocalCoor| nodeCoord(coord)
* | |
* void | Processors | logicalSize // returns cart array shape
* void | ThisRank | nodeNumber(); // returns this node rank
* void | ThisProcessorCoor | // returns this node coor
* void | isBoss(void) | primaryNode();
* | |
* | RankFromProcessorCoor | getLogicalCoorFrom(node)
* | ProcessorCoorFromRank | getNodeNumberFrom(logical_coord)
*/
class GridBase : public CartesianCommunicator {
public:
@ -60,7 +18,8 @@ public:
GridBase(std::vector<int> & processor_grid) : CartesianCommunicator(processor_grid) {};
//protected:
//FIXME
// protected:
// Lattice wide random support. not yet fully implemented. Need seed strategy
// and one generator per site.
// std::default_random_engine generator;
@ -165,7 +124,16 @@ public:
lane = lane / _simd_layout[d];
}
}
inline int PermuteDim(int dimension){
return _simd_layout[dimension]>1;
}
inline int PermuteType(int dimension){
int permute_type=0;
for(int d=_ndimension-1;d>dimension;d--){
if (_simd_layout[d]>1 ) permute_type++;
}
return permute_type;
}
////////////////////////////////////////////////////////////////
// Array sizing queries
////////////////////////////////////////////////////////////////
@ -399,8 +367,6 @@ public:
////////////////////////////////////////////////////////////////////////////////////////////
// subplane information
// It may be worth the investment of generating a more general subplane "iterator",
// and providing support for threads grabbing a unit of allocation.
////////////////////////////////////////////////////////////////////////////////////////////
_slice_block.resize(_ndimension);
_slice_stride.resize(_ndimension);

View File

@ -4,17 +4,9 @@
#include "Grid.h"
namespace Grid {
// Permute the pointers 32bitx16 = 512
static int permute_map[4][16] = {
{ 1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14},
{ 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13},
{ 4,5,6,7,0,1,2,3,12,13,14,15,8,9,10,11},
{ 9,10,11,12,13,14,15,0,1,2,3,4,5,6,7,8}
};
extern int GridCshiftPermuteMap[4][16];
template<class vobj>
class Lattice
@ -37,11 +29,10 @@ public:
#include <Grid_cshift.h>
// overloading Grid::conformable but no conformable in Grid ...?:w
template<class obj1,class obj2>
friend void conformable(const Lattice<obj1> &lhs,const Lattice<obj2> &rhs);
// Performance difference between operator * and mult is troubling.
// FIXME Performance difference between operator * and mult is troubling.
// Auto move constructor seems to lose surprisingly much.
// Site wise binary operations
@ -182,23 +173,20 @@ public:
}}
};
// FIXME Implement a consistent seed management strategy
friend void gaussian(Lattice<vobj> &l){
// Zero mean, unit variance.
std::normal_distribution<double> distribution(0.0,1.0);
Real *v_ptr = (Real *)&l._odata[0];
size_t v_len = l._grid->oSites()*sizeof(vobj);
size_t d_len = v_len/sizeof(Real);
// Not a parallel RNG. Could make up some seed per 4d site, seed
// per hypercube type scheme.
for(int i=0;i<d_len;i++){
v_ptr[i]= drand48();
}
};
// Unary functions and Unops
// Unary negation
friend inline Lattice<vobj> operator -(const Lattice<vobj> &r) {
Lattice<vobj> ret(r._grid);
#pragma omp parallel for

View File

@ -24,10 +24,7 @@ namespace QCD {
typedef iSinglet<Real > TReal; // This is painful. Tensor singlet complex type.
typedef iSinglet<vIntegerF > vTIntegerF;
typedef iSinglet<vIntegerD > vTIntegerD;
typedef iSinglet<vIntegerC > vTIntegerC;
typedef iSinglet<vIntegerZ > vTIntegerZ;
typedef iSinglet<vInteger > vTInteger;
typedef iSpinMatrix<Complex > SpinMatrix;
typedef iColourMatrix<Complex > ColourMatrix;
@ -46,12 +43,9 @@ namespace QCD {
typedef iColourVector<vComplex > vColourVector;
typedef iSpinColourVector<vComplex > vSpinColourVector;
typedef Lattice<vTComplex> LatticeComplex;
typedef Lattice<vTComplex> LatticeComplex;
typedef Lattice<vTIntegerF> LatticeIntegerF; // Predicates for "where"
typedef Lattice<vTIntegerD> LatticeIntegerD;
typedef Lattice<vTIntegerC> LatticeIntegerC;
typedef Lattice<vTIntegerZ> LatticeIntegerZ;
typedef Lattice<vTInteger> LatticeInteger; // Predicates for "where"
typedef Lattice<vColourMatrix> LatticeColourMatrix;
typedef Lattice<vSpinMatrix> LatticeSpinMatrix;

View File

@ -1,5 +1,8 @@
#ifndef GRID_ALIGNED_ALLOCATOR_H
#define GRID_ALIGNED_ALLOCATOR_H
#include <immintrin.h>
namespace Grid {
////////////////////////////////////////////////////////////////////

View File

@ -10,9 +10,6 @@
/* AVX512 */
/* #undef AVX512 */
/* GRID_COMMS_FAKE */
/* #undef GRID_COMMS_FAKE */
/* GRID_COMMS_MPI */
#define GRID_COMMS_MPI 1

View File

@ -9,9 +9,6 @@
/* AVX512 */
#undef AVX512
/* GRID_COMMS_FAKE */
#undef GRID_COMMS_FAKE
/* GRID_COMMS_MPI */
#undef GRID_COMMS_MPI

View File

@ -1,17 +1,5 @@
#ifndef _GRID_CSHIFT_COMMON_H_
#define _GRID_CSHIFT_COMMON_H_
//////////////////////////////////////////////////////////////////////////////////////////
// Must not lose sight that goal is to be able to construct really efficient
// gather to a point stencil code. CSHIFT is not the best way, so probably need
// additional stencil support.
//
// Stencil based code could pre-exchange haloes and use a table lookup for neighbours
//
// Lattice <foo> could also allocate haloes which get used for stencil code.
//
// Grid could create a neighbour index table for a given stencil.
// Could also implement CovariantCshift.
//////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////
// Gather for when there is no need to SIMD split
@ -57,7 +45,6 @@ friend void Gather_plane_simple (Lattice<vobj> &rhs,std::vector<vobj,alignedAllo
}
}
//////////////////////////////////////////////////////
// Gather for when there *is* need to SIMD split
//////////////////////////////////////////////////////
@ -101,8 +88,6 @@ friend void Gather_plane_extract(Lattice<vobj> &rhs,std::vector<scalar_type *> p
}
}
//////////////////////////////////////////////////////
// Scatter for when there is no need to SIMD split
//////////////////////////////////////////////////////
@ -146,7 +131,6 @@ friend void Scatter_plane_simple (Lattice<vobj> &rhs,std::vector<vobj,alignedAll
}
}
//////////////////////////////////////////////////////
// Scatter for when there *is* need to SIMD split
//////////////////////////////////////////////////////
@ -190,11 +174,9 @@ friend void Scatter_plane_merge(Lattice<vobj> &rhs,std::vector<scalar_type *> po
}
}
//////////////////////////////////////////////////////
// local to node block strided copies
//////////////////////////////////////////////////////
// if lhs is odd, rhs even??
friend void Copy_plane(Lattice<vobj>& lhs,Lattice<vobj> &rhs, int dimension,int lplane,int rplane,int cbmask)
{
int rd = rhs._grid->_rdimensions[dimension];
@ -284,40 +266,6 @@ friend void Copy_plane_permute(Lattice<vobj>& lhs,Lattice<vobj> &rhs, int dimens
// Local to node Cshift
//////////////////////////////////////////////////////
// Work out whether to permute
// ABCDEFGH -> AE BF CG DH permute wrap num
//
// Shift 0 AE BF CG DH 0 0 0 0 ABCDEFGH 0 0
// Shift 1 BF CG DH AE 0 0 0 1 BCDEFGHA 0 1
// Shift 2 CG DH AE BF 0 0 1 1 CDEFGHAB 0 2
// Shift 3 DH AE BF CG 0 1 1 1 DEFGHABC 0 3
// Shift 4 AE BF CG DH 1 1 1 1 EFGHABCD 1 0
// Shift 5 BF CG DH AE 1 1 1 0 FGHACBDE 1 1
// Shift 6 CG DH AE BF 1 1 0 0 GHABCDEF 1 2
// Shift 7 DH AE BF CG 1 0 0 0 HABCDEFG 1 3
// Suppose 4way simd in one dim.
// ABCDEFGH -> AECG BFDH permute wrap num
// Shift 0 AECG BFDH 0,00 0,00 ABCDEFGH 0 0
// Shift 1 BFDH CGEA 0,00 1,01 BCDEFGHA 0 1
// Shift 2 CGEA DHFB 1,01 1,01 CDEFGHAB 1 0
// Shift 3 DHFB EAGC 1,01 1,11 DEFGHABC 1 1
// Shift 4 EAGC FBHD 1,11 1,11 EFGHABCD 2 0
// Shift 5 FBHD GCAE 1,11 1,10 FGHABCDE 2 1
// Shift 6 GCAE HDBF 1,10 1,10 GHABCDEF 3 0
// Shift 7 HDBF AECG 1,10 0,00 HABCDEFG 3 1
// Generalisation to 8 way simd, 16 way simd required.
//
// Need log2 Nway masks. consisting of
// 1 bit 256 bit granule
// 2 bit 128 bit granule
// 4 bits 64 bit granule
// 8 bits 32 bit granules
//
// 15 bits....
friend void Cshift_local(Lattice<vobj>& ret,Lattice<vobj> &rhs,int dimension,int shift)
{
int sshift[2];
@ -333,35 +281,31 @@ friend void Cshift_local(Lattice<vobj>& ret,Lattice<vobj> &rhs,int dimension,int
}
}
friend Lattice<vobj> Cshift_local(Lattice<vobj> &ret,Lattice<vobj> &rhs,int dimension,int shift,int cbmask)
{
int fd = rhs._grid->_fdimensions[dimension];
int rd = rhs._grid->_rdimensions[dimension];
int ld = rhs._grid->_ldimensions[dimension];
int gd = rhs._grid->_gdimensions[dimension];
GridBase *grid = rhs._grid;
int fd = grid->_fdimensions[dimension];
int rd = grid->_rdimensions[dimension];
int ld = grid->_ldimensions[dimension];
int gd = grid->_gdimensions[dimension];
// Map to always positive shift modulo global full dimension.
shift = (shift+fd)%fd;
ret.checkerboard = rhs._grid->CheckerBoardDestination(rhs.checkerboard,shift);
ret.checkerboard = grid->CheckerBoardDestination(rhs.checkerboard,shift);
// the permute type
int permute_dim =rhs._grid->_simd_layout[dimension]>1 ;
int permute_type=0;
for(int d=0;d<dimension;d++){
if (rhs._grid->_simd_layout[d]>1 ) permute_type++;
}
int permute_dim =grid->PermuteDim(dimension);
int permute_type=grid->PermuteType(dimension);
for(int x=0;x<rd;x++){
int o = 0;
int bo = x * rhs._grid->_ostride[dimension];
int bo = x * grid->_ostride[dimension];
int cb= (cbmask==0x2)? 1 : 0;
int sshift = rhs._grid->CheckerBoardShift(rhs.checkerboard,dimension,shift,cb);
int sshift = grid->CheckerBoardShift(rhs.checkerboard,dimension,shift,cb);
int sx = (x+sshift)%rd;
int permute_slice=0;

View File

@ -146,10 +146,7 @@ friend void Cshift_comms_simd(Lattice<vobj> &ret,Lattice<vobj> &rhs,int dimensi
assert(shift>=0);
assert(shift<fd);
int permute_type=0;
for(int d=0;d<dimension;d++){
if (grid->_simd_layout[d]>1 ) permute_type++;
}
int permute_type=grid->PermuteType(dimension);
///////////////////////////////////////////////
// Simd direction uses an extract/merge pair
@ -236,9 +233,12 @@ friend void Cshift_comms_simd(Lattice<vobj> &ret,Lattice<vobj> &rhs,int dimensi
if ( x< rd-num ) permute_slice=wrap;
else permute_slice = 1-wrap;
int toggle_bit = (Nsimd>>(permute_type+1));
int PermuteMap;
for(int i=0;i<Nsimd;i++){
if ( permute_slice ) {
pointers[i] = rpointers[permute_map[permute_type][i]];
PermuteMap=i^toggle_bit;
pointers[i] = rpointers[PermuteMap];
} else {
pointers[i] = rpointers[i];
}
@ -260,8 +260,4 @@ friend void Cshift_comms_simd(Lattice<vobj> &ret,Lattice<vobj> &rhs,int dimensi
}
}
}
#endif

View File

@ -20,8 +20,8 @@ int main (int argc, char ** argv)
std::vector<int> mpi_layout(4);
mpi_layout[0]=2;
mpi_layout[1]=1;
mpi_layout[2]=1;
mpi_layout[1]=2;
mpi_layout[2]=2;
mpi_layout[3]=2;
#ifdef AVX512

View File

@ -10,32 +10,6 @@
//
// Vector types are arch dependent
////////////////////////////////////////////////////////////////////////
// TODO
//
// Base class to share common code between vRealF, VComplexF etc...
//
// lattice Broad cast assignment
//
// where() support
// implement with masks, and/or? Type of the mask & boolean support?
//
// Unary functions
// cos,sin, tan, acos, asin, cosh, acosh, tanh, sinh, // Scalar<vReal> only arg
// exp, log, sqrt, fabs
//
// transposeColor, transposeSpin,
// adjColor, adjSpin,
// traceColor, traceSpin.
// peekColor, peekSpin + pokeColor PokeSpin
//
// copyMask.
//
// localMaxAbs
//
// norm2,
// sumMulti equivalent.
// Fourier transform equivalent.
//
////////////////////////////////////////////////////////////
// SIMD Alignment controls
@ -71,9 +45,6 @@ namespace Grid {
typedef std::complex<RealD> ComplexD;
typedef std::complex<Real> Complex;
inline RealF adj(const RealF & r){ return r; }
inline RealF conj(const RealF & r){ return r; }
inline ComplexD localInnerProduct(const ComplexD & l, const ComplexD & r) { return conj(l)*r; }
@ -122,7 +93,6 @@ namespace Grid {
template<> inline void ZeroIt(RealD &arg){ arg=0; };
#if defined (SSE2)
typedef __m128 fvec;
typedef __m128d dvec;
@ -162,31 +132,46 @@ namespace Grid {
inline void v_prefetch0(int size, const char *ptr){};
#endif
};
/////////////////////////////////////////////////////////////////
// Generic extract/merge/permute
/////////////////////////////////////////////////////////////////
template<class vsimd,class scalar,int Nsimd>
template<class vsimd,class scalar>
inline void Gextract(vsimd &y,std::vector<scalar *> &extracted){
// Bounce off stack is painful
// temporary hack while I figure out the right interface
scalar buf[Nsimd];
vstore(y,buf);
for(int i=0;i<Nsimd;i++){
*extracted[i] = buf[i];
extracted[i]++;
#if 1
// FIXME: bounce off stack is painful
// temporary hack while I figure out better way.
// There are intrinsics to do this work without the storage.
int Nsimd = extracted.size();
{
std::vector<scalar,alignedAllocator<scalar> > buf(Nsimd);
vstore(y,&buf[0]);
for(int i=0;i<Nsimd;i++){
*extracted[i] = buf[i];
extracted[i]++;
}
}
#else
int NSo = extracted.size();
int NSv = vsimd::Nsimd();
int sparse= NSv/NSo;
for(int i=0;i<NSv;i+=sparse){
}
#endif
};
template<class vsimd,class scalar,int Nsimd>
template<class vsimd,class scalar>
inline void Gmerge(vsimd &y,std::vector<scalar *> &extracted){
scalar buf[Nsimd];
#if 1
int Nsimd = extracted.size();
std::vector<scalar> buf(Nsimd);
for(int i=0;i<Nsimd;i++){
buf[i]=*extracted[i];
extracted[i]++;
}
vset(y,buf);
vset(y,&buf[0]);
#else
#endif
};
//////////////////////////////////////////////////////////
@ -197,8 +182,6 @@ inline void Gmerge(vsimd &y,std::vector<scalar *> &extracted){
// Permute 3 possible on longer iVector lengths (512bit = 8 double = 16 single)
// Permute 4 possible on half precision @512bit vectors.
//////////////////////////////////////////////////////////
// Should be able to make the permute/extract/merge independent of the
// vector subtype and reduce the volume of code.
template<class vsimd>
inline void Gpermute(vsimd &y,vsimd b,int perm){
switch (perm){
@ -229,6 +212,7 @@ inline void Gpermute(vsimd &y,vsimd b,int perm){
default: assert(0); break;
}
};
};
#include <Grid_vRealF.h>
#include <Grid_vRealD.h>

12
Grid_stencil.h Normal file
View File

@ -0,0 +1,12 @@
//////////////////////////////////////////////////////////////////////////////////////////
// Must not lose sight that goal is to be able to construct really efficient
// gather to a point stencil code. CSHIFT is not the best way, so probably need
// additional stencil support.
//
// Stencil based code could pre-exchange haloes and use a table lookup for neighbours
//
// Lattice <foo> could also allocate haloes which get used for stencil code.
//
// Grid could create a neighbour index table for a given stencil.
// Could also implement CovariantCshift.
//////////////////////////////////////////////////////////////////////////////////////////

View File

@ -5,7 +5,7 @@
namespace Grid {
class vComplexD {
protected:
public:
zvec v;
public:
typedef zvec vector_type;
@ -154,64 +154,27 @@ namespace Grid {
return ret;
};
/////////////////////////////////////////////////////////////////
// Extract
/////////////////////////////////////////////////////////////////
friend inline void extract(vComplexD &y,std::vector<ComplexD *> &extracted){
// Bounce off stack is painful
// temporary hack while I figure out the right interface
const int Nsimd = vComplexD::Nsimd();
std::vector<ComplexD> buf(Nsimd);
////////////////////////////////////////////////////////////////////
// General permute; assumes vector length is same across
// all subtypes; may not be a good assumption, but could
// add the vector width as a template param for BG/Q for example
////////////////////////////////////////////////////////////////////
friend inline void permute(vComplexD &y,vComplexD b,int perm)
{
Gpermute<vComplexD>(y,b,perm);
}
friend inline void merge(vComplexD &y,std::vector<ComplexD *> &extracted)
{
Gmerge<vComplexD,ComplexD >(y,extracted);
}
friend inline void extract(vComplexD &y,std::vector<ComplexD *> &extracted)
{
Gextract<vComplexD,ComplexD>(y,extracted);
}
vstore(y,&buf[0]);
for(int i=0;i<Nsimd;i++){
*extracted[i] = buf[i];
extracted[i]++;
}
};
friend inline void merge(vComplexD &y,std::vector<ComplexD *> &extracted){
// Bounce off stack is painful
// temporary hack while I figure out the right interface
const int Nsimd = vComplexD::Nsimd();
std::vector<ComplexD> buf(Nsimd);
for(int i=0;i<Nsimd;i++){
buf[i]=*extracted[i];
extracted[i]++;
}
vset(y,&buf[0]);
};
/////////////////////////////////////////////////////////////////
// Permute
/////////////////////////////////////////////////////////////////
friend inline void permute(vComplexD &y,vComplexD b,int perm){
switch (perm){
// 2 complex=>1 permute
#if defined(AVX1)||defined(AVX2)
case 0: y.v = _mm256_permute2f128_pd(b.v,b.v,0x01); break;
// AB => BA i.e. ab cd =>cd ab
#endif
#ifdef SSE2
break;
#endif
#ifdef AVX512
// 4 complex=>2 permute
// ABCD => BADC i.e. abcd efgh => cdab ghef
// ABCD => CDAB i.e. abcd efgh => efgh abcd
case 0: y.v = _mm512_swizzle_pd(b.v,_MM_SWIZ_REG_BADC); break;
case 1: y.v = _mm512_permute4f128_ps(b.v,(_MM_PERM_ENUM)_MM_SHUFFLE(1,0,3,2)); // permute for double is not implemented
#endif
#ifdef QPX
#error // Not implemented yet
#endif
default: assert(0); break;
}
};
////////////////////////////////////////////////////////////////////////
// FIXME: gonna remove these load/store, get, set, prefetch
////////////////////////////////////////////////////////////////////////
void vload(zvec& a){
this->v = a;
}
@ -296,7 +259,7 @@ friend inline void vstore(vComplexD &ret, ComplexD *a){
#endif
return ret;
}
// REDUCE
// REDUCE FIXME must be a cleaner implementation
friend inline ComplexD Reduce(const vComplexD & in)
{
#if defined (AVX1) || defined(AVX2)

View File

@ -4,7 +4,9 @@
namespace Grid {
class vComplexF {
protected:
// protected:
public:
cvec v;
public:
@ -129,75 +131,11 @@ namespace Grid {
#endif
return ret;
};
/////////////////////////////////////////////////////////////////
// Extract
/////////////////////////////////////////////////////////////////
friend inline void extract(vComplexF &y,std::vector<ComplexF *> &extracted){
// Bounce off heap is painful
// temporary hack while I figure out the right interface
vComplexF vbuf;
ComplexF *buf = (ComplexF *)&vbuf;
vstore(y,&buf[0]);
for(int i=0;i<vComplexF::Nsimd();i++){
*extracted[i] = buf[i];
extracted[i]++;
}
};
friend inline void merge(vComplexF &y,std::vector<ComplexF *> &extracted){
// Bounce off stack is painful
// temporary hack while I figure out the right interface
const int Nsimd = vComplexF::Nsimd();
vComplexF vbuf;
ComplexF *buf = (ComplexF *)&vbuf;
for(int i=0;i<Nsimd;i++){
buf[i]=*extracted[i];
extracted[i]++;
}
vset(y,&buf[0]);
};
/////////////////////////////////////////////////////////////////
// Permute
/////////////////////////////////////////////////////////////////
friend inline void permute(vComplexF &y,vComplexF b,int perm){
switch (perm){
#if defined(AVX1)||defined(AVX2)
//HERE
// 4 complex=>2 permutes
// case 0 ABCD->BADC
// case 1 ABCD->CDAB
case 0: y.v = _mm256_shuffle_ps(b.v,b.v,_MM_SHUFFLE(1,0,3,2)); break;
case 1: y.v = _mm256_permute2f128_ps(b.v,b.v,0x01); break;
#endif
#ifdef SSE2
case 0: y.v = _mm_shuffle_ps(b.v,b.v,_MM_SHUFFLE(1,0,3,2));break;
#endif
#ifdef AVX512
//#error should permute for 512
// 8 complex=>3 permutes
// case 0 ABCD EFGH -> BADC FEHG
// case 1 ABCD EFGH -> CDAB GHEF
// case 2 ABCD EFGH -> EFGH ABCD
case 0: y.v = _mm512_swizzle_ps(b.v,_MM_SWIZ_REG_CDAB); break; // OK
case 1: y.v = _mm512_swizzle_ps(b.v,_MM_SWIZ_REG_BADC); break; // OK
case 2: y.v = _mm512_permute4f128_ps(b.v, (_MM_PERM_ENUM)_MM_SHUFFLE(2,3,0,1)); break; // OK
#endif
#ifdef QPX
#error
#endif
default: assert(0); break;
}
};
////////////////////////////////////////////////////////////////////////
// FIXME: gonna remove these load/store, get, set, prefetch
////////////////////////////////////////////////////////////////////////
friend inline void vset(vComplexF &ret, Complex *a){
#if defined (AVX1)|| defined (AVX2)
ret.v = _mm256_set_ps(a[3].imag(),a[3].real(),a[2].imag(),a[2].real(),a[1].imag(),a[1].real(),a[0].imag(),a[0].real());
@ -358,6 +296,20 @@ friend inline void vstore(vComplexF &ret, ComplexF *a){
return *this;
}
friend inline void permute(vComplexF &y,vComplexF b,int perm)
{
Gpermute<vComplexF>(y,b,perm);
}
friend inline void merge(vComplexF &y,std::vector<ComplexF *> &extracted)
{
Gmerge<vComplexF,ComplexF >(y,extracted);
}
friend inline void extract(vComplexF &y,std::vector<ComplexF *> &extracted)
{
Gextract<vComplexF,ComplexF>(y,extracted);
}
};
inline vComplexF localInnerProduct(const vComplexF & l, const vComplexF & r) { return conj(l)*r; }
@ -371,7 +323,5 @@ friend inline void vstore(vComplexF &ret, ComplexF *a){
return l*r;
}
}
#endif

View File

@ -235,70 +235,11 @@ friend inline void vstore(vInteger &ret, Integer *a){
}
friend inline void merge(vIntegerF &y,std::vector<Integer *> &extracted)
{
Gmerge<vIntegerF,Integer,sizeof(ivec)/sizeof(float) >(y,extracted);
Gmerge<vIntegerF,Integer>(y,extracted);
}
friend inline void extract(vIntegerF &y,std::vector<Integer *> &extracted)
{
Gextract<vIntegerF,Integer,sizeof(ivec)/sizeof(float) >(y,extracted);
}
};
class vIntegerD : public vInteger
{
public:
static inline int Nsimd(void) { return sizeof(ivec)/sizeof(double);}
friend inline void permute(vIntegerD &y,vIntegerD b,int perm)
{
Gpermute<vIntegerD>(y,b,perm);
}
friend inline void merge(vIntegerD &y,std::vector<Integer *> &extracted)
{
Gmerge<vIntegerD,Integer,sizeof(ivec)/sizeof(double) >(y,extracted);
}
friend inline void extract(vIntegerD &y,std::vector<Integer *> &extracted)
{
Gextract<vIntegerD,Integer,sizeof(ivec)/sizeof(double) >(y,extracted);
}
};
class vIntegerC : public vInteger
{
public:
static inline int Nsimd(void) { return sizeof(ivec)/sizeof(ComplexF);}
friend inline void permute(vIntegerC &y,vIntegerC b,int perm)
{
Gpermute<vIntegerC>(y,b,perm);
}
friend inline void merge(vIntegerC &y,std::vector<Integer *> &extracted)
{
Gmerge<vIntegerC,Integer,sizeof(ivec)/sizeof(ComplexF) >(y,extracted);
}
friend inline void extract(vIntegerC &y,std::vector<Integer *> &extracted)
{
Gextract<vIntegerC,Integer,sizeof(ivec)/sizeof(ComplexF) >(y,extracted);
}
};
class vIntegerZ : public vInteger
{
public:
static inline int Nsimd(void) { return sizeof(ivec)/sizeof(ComplexD);}
friend inline void permute(vIntegerZ &y,vIntegerZ b,int perm)
{
Gpermute<vIntegerZ>(y,b,perm);
}
friend inline void merge(vIntegerZ &y,std::vector<Integer *> &extracted)
{
Gmerge<vIntegerZ,Integer,sizeof(ivec)/sizeof(ComplexD) >(y,extracted);
}
friend inline void extract(vIntegerZ &y,std::vector<Integer *> &extracted)
{
Gextract<vIntegerZ,Integer,sizeof(ivec)/sizeof(ComplexD) >(y,extracted);
Gextract<vIntegerF,Integer>(y,extracted);
}
};

View File

@ -5,7 +5,7 @@
namespace Grid {
class vRealD {
protected:
public:
dvec v; // dvec is double precision vector
public:
@ -99,72 +99,27 @@ namespace Grid {
return ret;
};
/////////////////////////////////////////////////////////////////
// Extract
/////////////////////////////////////////////////////////////////
friend inline void extract(vRealD &y,std::vector<RealD *> &extracted){
// Bounce off stack is painful
// temporary hack while I figure out the right interface
const int Nsimd = vRealD::Nsimd();
RealD buf[Nsimd];
////////////////////////////////////////////////////////////////////
// General permute; assumes vector length is same across
// all subtypes; may not be a good assumption, but could
// add the vector width as a template param for BG/Q for example
////////////////////////////////////////////////////////////////////
friend inline void permute(vRealD &y,vRealD b,int perm)
{
Gpermute<vRealD>(y,b,perm);
}
friend inline void merge(vRealD &y,std::vector<RealD *> &extracted)
{
Gmerge<vRealD,RealD >(y,extracted);
}
friend inline void extract(vRealD &y,std::vector<RealD *> &extracted)
{
Gextract<vRealD,RealD>(y,extracted);
}
vstore(y,buf);
for(int i=0;i<Nsimd;i++){
*extracted[i] = buf[i];
extracted[i]++;
}
};
friend inline void merge(vRealD &y,std::vector<RealD *> &extracted){
// Bounce off stack is painful
// temporary hack while I figure out the right interface
const int Nsimd = vRealD::Nsimd();
RealD buf[Nsimd];
for(int i=0;i<Nsimd;i++){
buf[i]=*extracted[i];
extracted[i]++;
}
vset(y,buf);
};
// Permute plans
// Permute 0 every ABCDEFGH -> BA DC FE HG
// Permute 1 every ABCDEFGH -> CD AB GH EF
// Permute 2 every ABCDEFGH -> EFGH ABCD
// Permute 3 possible on longer iVector lengths (512bit = 8 double = 16 single)
// Permute 4 possible on half precision @512bit vectors.
friend inline void permute(vRealD &y,vRealD b,int perm){
switch (perm){
// 4 doubles=>2 permutes
#if defined(AVX1)||defined(AVX2)
case 0: y.v = _mm256_shuffle_pd(b.v,b.v,0x5); break;
case 1: y.v = _mm256_permute2f128_pd(b.v,b.v,0x01); break;
#endif
#ifdef SSE2
case 0: y.v = _mm_shuffle_pd(b.v,b.v,0x1); break;
#endif
#ifdef AVX512
// 8 double => 3 permutes
// Permute 0 every abcd efgh -> badc fehg
// Permute 1 every abcd efgh -> cdab ghef
// Permute 2 every abcd efgh -> efgh abcd
// NOTE: mm_512_permutex_pd not implemented
// NOTE: ignore warning
case 0: y.v = _mm512_swizzle_pd(b.v,_MM_SWIZ_REG_CDAB); break;
case 1: y.v = _mm512_swizzle_pd(b.v,_MM_SWIZ_REG_BADC); break;
case 2: y.v = _mm512_permute4f128_ps(b.v,(_MM_PERM_ENUM)_MM_SHUFFLE(1,0,3,2)); break;
#endif
#ifdef QPX
#error
#endif
default: assert(0);break;
}
};
// gona be bye bye
////////////////////////////////////////////////////////////////////////
// FIXME: gonna remove these load/store, get, set, prefetch
////////////////////////////////////////////////////////////////////////
void vload(dvec& a){
this->v = a;
}

View File

@ -5,7 +5,7 @@
namespace Grid {
class vRealF {
protected:
public:
fvec v;
public:
@ -120,74 +120,25 @@ namespace Grid {
friend inline void vzero(vRealF &ret){vsplat(ret,0.0);}
/////////////////////////////////////////////////////////////////
// Extract
/////////////////////////////////////////////////////////////////
friend inline void extract(vRealF &y,std::vector<RealF *> &extracted){
// Bounce off stack is painful
// temporary hack while I figure out the right interface
const int Nsimd = vRealF::Nsimd();
RealF buf[Nsimd];
////////////////////////////////////////////////////////////////////
// General permute; assumes vector length is same across
// all subtypes; may not be a good assumption, but could
// add the vector width as a template param for BG/Q for example
////////////////////////////////////////////////////////////////////
friend inline void permute(vRealF &y,vRealF b,int perm)
{
Gpermute<vRealF>(y,b,perm);
}
friend inline void merge(vRealF &y,std::vector<RealF *> &extracted)
{
Gmerge<vRealF,RealF >(y,extracted);
}
friend inline void extract(vRealF &y,std::vector<RealF *> &extracted)
{
Gextract<vRealF,RealF>(y,extracted);
}
vstore(y,buf);
for(int i=0;i<Nsimd;i++){
*extracted[i] = buf[i];
extracted[i]++;
}
};
friend inline void merge(vRealF &y,std::vector<RealF *> &extracted){
// Bounce off stack is painful
// temporary hack while I figure out the right interface
const int Nsimd = vRealF::Nsimd();
RealF buf[Nsimd];
for(int i=0;i<Nsimd;i++){
buf[i]=*extracted[i];
extracted[i]++;
}
vset(y,buf);
};
//////////////////////////////////////////////////////////
// Permute
// Permute 0 every ABCDEFGH -> BA DC FE HG
// Permute 1 every ABCDEFGH -> CD AB GH EF
// Permute 2 every ABCDEFGH -> EFGH ABCD
// Permute 3 possible on longer iVector lengths (512bit = 8 double = 16 single)
// Permute 4 possible on half precision @512bit vectors.
//////////////////////////////////////////////////////////
friend inline void permute(vRealF &y,vRealF b,int perm){
switch (perm){
// 8 floats=>3 permutes
#if defined(AVX1)||defined(AVX2)
case 0: y.v = _mm256_shuffle_ps(b.v,b.v,_MM_SHUFFLE(2,3,0,1)); break;
case 1: y.v = _mm256_shuffle_ps(b.v,b.v,_MM_SHUFFLE(1,0,3,2)); break;
case 2: y.v = _mm256_permute2f128_ps(b.v,b.v,0x01); break;
#endif
#ifdef SSE2
case 0: y.v = _mm_shuffle_ps(b.v,b.v,_MM_SHUFFLE(2,3,0,1)); break;
case 1: y.v = _mm_shuffle_ps(b.v,b.v,_MM_SHUFFLE(1,0,3,2));break;
#endif
#ifdef AVX512
// 16 floats=> permutes
// Permute 0 every abcd efgh ijkl mnop -> badc fehg jilk nmpo
// Permute 1 every abcd efgh ijkl mnop -> cdab ghef jkij opmn
// Permute 2 every abcd efgh ijkl mnop -> efgh abcd mnop ijkl
// Permute 3 every abcd efgh ijkl mnop -> ijkl mnop abcd efgh
//#error not implemented should do something
case 0: y.v = _mm512_swizzle_ps(b.v,_MM_SWIZ_REG_CDAB); break;
case 1: y.v = _mm512_swizzle_ps(b.v,_MM_SWIZ_REG_BADC); break;
case 2: y.v = _mm512_permute4f128_ps(b.v,(_MM_PERM_ENUM)_MM_SHUFFLE(2,3,0,1)); break;
case 3: y.v = _mm512_permute4f128_ps(b.v,(_MM_PERM_ENUM)_MM_SHUFFLE(1,0,3,2)); break;
#endif
#ifdef QPX
#error not implemented
#endif
default: assert(0); break;
}
};
/////////////////////////////////////////////////////
// Broadcast a value across Nsimd copies.
@ -207,6 +158,8 @@ namespace Grid {
ret.v = {a,a,a,a};
#endif
}
friend inline void vset(vRealF &ret, float *a){
#if defined (AVX1)|| defined (AVX2)
ret.v = _mm256_set_ps(a[7],a[6],a[5],a[4],a[3],a[2],a[1],a[0]);
@ -224,6 +177,9 @@ namespace Grid {
#endif
}
////////////////////////////////////////////////////////////////////////
// FIXME: gonna remove these load/store, get, set, prefetch
////////////////////////////////////////////////////////////////////////
friend inline void vstore(vRealF &ret, float *a){
#if defined (AVX1)|| defined (AVX2)
_mm256_store_ps(a,ret.v);

View File

@ -24,7 +24,6 @@ include_HEADERS = Grid_config.h\
Grid_aligned_allocator.h\
Grid_cshift.h\
Grid_cshift_common.h\
Grid_cshift_fake.h\
Grid_cshift_mpi.h\
Grid_cshift_none.h\
Grid_math_types.h
@ -37,13 +36,10 @@ bin_PROGRAMS = Grid_main
extra_sources=
if BUILD_COMMS_MPI
extra_sources+=Grid_mpi.cc
endif
if BUILD_COMMS_FAKE
extra_sources+=Grid_fake.cc
extra_sources+=Grid_communicator_mpi.cc
endif
if BUILD_COMMS_NONE
extra_sources+=Grid_fake.cc
extra_sources+=Grid_communicator_fake.cc
endif
Grid_main_SOURCES = \

View File

@ -89,9 +89,8 @@ NORMAL_UNINSTALL = :
PRE_UNINSTALL = :
POST_UNINSTALL = :
bin_PROGRAMS = Grid_main$(EXEEXT)
@BUILD_COMMS_MPI_TRUE@am__append_1 = Grid_mpi.cc
@BUILD_COMMS_FAKE_TRUE@am__append_2 = Grid_fake.cc
@BUILD_COMMS_NONE_TRUE@am__append_3 = Grid_fake.cc
@BUILD_COMMS_MPI_TRUE@am__append_1 = Grid_communicator_mpi.cc
@BUILD_COMMS_NONE_TRUE@am__append_2 = Grid_communicator_fake.cc
subdir = .
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/configure.ac
@ -146,12 +145,13 @@ libGrid_a_LIBADD =
am_libGrid_a_OBJECTS = Grid_init.$(OBJEXT)
libGrid_a_OBJECTS = $(am_libGrid_a_OBJECTS)
PROGRAMS = $(bin_PROGRAMS)
am__Grid_main_SOURCES_DIST = Grid_main.cc Grid_mpi.cc Grid_fake.cc
@BUILD_COMMS_MPI_TRUE@am__objects_1 = Grid_mpi.$(OBJEXT)
@BUILD_COMMS_FAKE_TRUE@am__objects_2 = Grid_fake.$(OBJEXT)
@BUILD_COMMS_NONE_TRUE@am__objects_3 = Grid_fake.$(OBJEXT)
am__objects_4 = $(am__objects_1) $(am__objects_2) $(am__objects_3)
am_Grid_main_OBJECTS = Grid_main.$(OBJEXT) $(am__objects_4)
am__Grid_main_SOURCES_DIST = Grid_main.cc Grid_communicator_mpi.cc \
Grid_communicator_fake.cc
@BUILD_COMMS_MPI_TRUE@am__objects_1 = Grid_communicator_mpi.$(OBJEXT)
@BUILD_COMMS_NONE_TRUE@am__objects_2 = \
@BUILD_COMMS_NONE_TRUE@ Grid_communicator_fake.$(OBJEXT)
am__objects_3 = $(am__objects_1) $(am__objects_2)
am_Grid_main_OBJECTS = Grid_main.$(OBJEXT) $(am__objects_3)
Grid_main_OBJECTS = $(am_Grid_main_OBJECTS)
Grid_main_DEPENDENCIES = libGrid.a
AM_V_P = $(am__v_P_@AM_V@)
@ -214,8 +214,8 @@ CTAGS = ctags
CSCOPE = cscope
AM_RECURSIVE_TARGETS = cscope
am__DIST_COMMON = $(srcdir)/Grid_config.h.in $(srcdir)/Makefile.in \
AUTHORS COPYING ChangeLog INSTALL NEWS README compile depcomp \
install-sh missing
AUTHORS COPYING ChangeLog INSTALL NEWS README TODO compile \
depcomp install-sh missing
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
distdir = $(PACKAGE)-$(VERSION)
top_distdir = $(distdir)
@ -353,12 +353,11 @@ include_HEADERS = Grid_config.h\
Grid_aligned_allocator.h\
Grid_cshift.h\
Grid_cshift_common.h\
Grid_cshift_fake.h\
Grid_cshift_mpi.h\
Grid_cshift_none.h\
Grid_math_types.h
extra_sources = $(am__append_1) $(am__append_2) $(am__append_3)
extra_sources = $(am__append_1) $(am__append_2)
Grid_main_SOURCES = \
Grid_main.cc\
$(extra_sources)
@ -506,10 +505,10 @@ mostlyclean-compile:
distclean-compile:
-rm -f *.tab.c
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/Grid_fake.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/Grid_communicator_fake.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/Grid_communicator_mpi.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/Grid_init.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/Grid_main.Po@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/Grid_mpi.Po@am__quote@
.cc.o:
@am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<

103
TODO
View File

@ -1,4 +1,7 @@
* FIXME audit
* Remove vload/store etc..
* Replace vset with a call to merge.
* Replace vset with a call to merge.
* Conditional execution Subset, where etc...
* Coordinate information, integers etc...
@ -27,3 +30,103 @@
- BinaryWriter, TextWriter etc...
- protocol buffers?
-
// Cartesian grid inheritance
// Grid::GridBase
// |
// __________|___________
// | |
// Grid::GridCartesian Grid::GridCartesianRedBlack
//
// TODO: document the following as an API guaranteed public interface
/*
* Rough map of functionality against QDP++ Layout
*
* Param | Grid | QDP++
* -----------------------------------------
* | |
* void | oSites, iSites, lSites | sitesOnNode
* void | gSites | vol
* | |
* gcoor | oIndex, iIndex | linearSiteIndex // no virtual node in QDP
* lcoor | |
*
* void | CheckerBoarded | - // No checkerboarded in QDP
* void | FullDimensions | lattSize
* void | GlobalDimensions | lattSize // No checkerboarded in QDP
* void | LocalDimensions | subgridLattSize
* void | VirtualLocalDimensions | subgridLattSize // no virtual node in QDP
* | |
* int x 3 | oiSiteRankToGlobal | siteCoords
* | ProcessorCoorLocalCoorToGlobalCoor |
* | |
* vector<int> | GlobalCoorToRankIndex | nodeNumber(coord)
* vector<int> | GlobalCoorToProcessorCoorLocalCoor| nodeCoord(coord)
* | |
* void | Processors | logicalSize // returns cart array shape
* void | ThisRank | nodeNumber(); // returns this node rank
* void | ThisProcessorCoor | // returns this node coor
* void | isBoss(void) | primaryNode();
* | |
* | RankFromProcessorCoor | getLogicalCoorFrom(node)
* | ProcessorCoorFromRank | getNodeNumberFrom(logical_coord)
*/
// Work out whether to permute
// ABCDEFGH -> AE BF CG DH permute wrap num
//
// Shift 0 AE BF CG DH 0 0 0 0 ABCDEFGH 0 0
// Shift 1 BF CG DH AE 0 0 0 1 BCDEFGHA 0 1
// Shift 2 CG DH AE BF 0 0 1 1 CDEFGHAB 0 2
// Shift 3 DH AE BF CG 0 1 1 1 DEFGHABC 0 3
// Shift 4 AE BF CG DH 1 1 1 1 EFGHABCD 1 0
// Shift 5 BF CG DH AE 1 1 1 0 FGHACBDE 1 1
// Shift 6 CG DH AE BF 1 1 0 0 GHABCDEF 1 2
// Shift 7 DH AE BF CG 1 0 0 0 HABCDEFG 1 3
// Suppose 4way simd in one dim.
// ABCDEFGH -> AECG BFDH permute wrap num
// Shift 0 AECG BFDH 0,00 0,00 ABCDEFGH 0 0
// Shift 1 BFDH CGEA 0,00 1,01 BCDEFGHA 0 1
// Shift 2 CGEA DHFB 1,01 1,01 CDEFGHAB 1 0
// Shift 3 DHFB EAGC 1,01 1,11 DEFGHABC 1 1
// Shift 4 EAGC FBHD 1,11 1,11 EFGHABCD 2 0
// Shift 5 FBHD GCAE 1,11 1,10 FGHABCDE 2 1
// Shift 6 GCAE HDBF 1,10 1,10 GHABCDEF 3 0
// Shift 7 HDBF AECG 1,10 0,00 HABCDEFG 3 1
// Generalisation to 8 way simd, 16 way simd required.
//
// Need log2 Nway masks. consisting of
// 1 bit 256 bit granule
// 2 bit 128 bit granule
// 4 bits 64 bit granule
// 8 bits 32 bit granules
//
// 15 bits....
// TODO
//
// Base class to share common code between vRealF, VComplexF etc...
//
// lattice Broad cast assignment
//
// where() support
// implement with masks, and/or? Type of the mask & boolean support?
//
// Unary functions
// cos,sin, tan, acos, asin, cosh, acosh, tanh, sinh, // Scalar<vReal> only arg
// exp, log, sqrt, fabs
//
// transposeColor, transposeSpin,
// adjColor, adjSpin,
// traceColor, traceSpin.
// peekColor, peekSpin + pokeColor PokeSpin
//
// copyMask.
//
// localMaxAbs
//
// norm2,
// sumMulti equivalent.
// Fourier transform equivalent.
//

23
configure vendored
View File

@ -628,8 +628,6 @@ LTLIBOBJS
LIBOBJS
BUILD_COMMS_NONE_FALSE
BUILD_COMMS_NONE_TRUE
BUILD_COMMS_FAKE_FALSE
BUILD_COMMS_FAKE_TRUE
BUILD_COMMS_MPI_FALSE
BUILD_COMMS_MPI_TRUE
EGREP
@ -1369,8 +1367,7 @@ Optional Features:
--disable-openmp do not use OpenMP
--enable-simd=SSE|AVX|AVX2|AVX512
Select instructions
--enable-comms=none|fake|mpi
Select communications
--enable-comms=none|mpi Select communications
Some influential environment variables:
CXX C++ compiler command
@ -5051,12 +5048,6 @@ fi
case ${ac_COMMS} in
fake)
echo Configuring for FAKE communications
$as_echo "#define GRID_COMMS_FAKE 1" >>confdefs.h
;;
none)
echo Configuring for NO communications
@ -5082,14 +5073,6 @@ else
BUILD_COMMS_MPI_FALSE=
fi
if test "X${ac_COMMS}X" == "XfakeX" ; then
BUILD_COMMS_FAKE_TRUE=
BUILD_COMMS_FAKE_FALSE='#'
else
BUILD_COMMS_FAKE_TRUE='#'
BUILD_COMMS_FAKE_FALSE=
fi
if test "X${ac_COMMS}X" == "XnoneX" ; then
BUILD_COMMS_NONE_TRUE=
BUILD_COMMS_NONE_FALSE='#'
@ -5243,10 +5226,6 @@ if test -z "${BUILD_COMMS_MPI_TRUE}" && test -z "${BUILD_COMMS_MPI_FALSE}"; then
as_fn_error $? "conditional \"BUILD_COMMS_MPI\" was never defined.
Usually this means the macro was only invoked conditionally." "$LINENO" 5
fi
if test -z "${BUILD_COMMS_FAKE_TRUE}" && test -z "${BUILD_COMMS_FAKE_FALSE}"; then
as_fn_error $? "conditional \"BUILD_COMMS_FAKE\" was never defined.
Usually this means the macro was only invoked conditionally." "$LINENO" 5
fi
if test -z "${BUILD_COMMS_NONE_TRUE}" && test -z "${BUILD_COMMS_NONE_FALSE}"; then
as_fn_error $? "conditional \"BUILD_COMMS_NONE\" was never defined.
Usually this means the macro was only invoked conditionally." "$LINENO" 5

View File

@ -51,13 +51,9 @@ case ${ac_SIMD} in
esac
AC_ARG_ENABLE([comms],[AC_HELP_STRING([--enable-comms=none|fake|mpi],[Select communications])],[ac_COMMS=${enable_comms}],[ac_COMMS=none])
AC_ARG_ENABLE([comms],[AC_HELP_STRING([--enable-comms=none|mpi],[Select communications])],[ac_COMMS=${enable_comms}],[ac_COMMS=none])
case ${ac_COMMS} in
fake)
echo Configuring for FAKE communications
AC_DEFINE([GRID_COMMS_FAKE],[1],[GRID_COMMS_FAKE] )
;;
none)
echo Configuring for NO communications
AC_DEFINE([GRID_COMMS_NONE],[1],[GRID_COMMS_NONE] )
@ -72,7 +68,6 @@ case ${ac_COMMS} in
esac
AM_CONDITIONAL(BUILD_COMMS_MPI,[ test "X${ac_COMMS}X" == "XmpiX" ])
AM_CONDITIONAL(BUILD_COMMS_FAKE,[ test "X${ac_COMMS}X" == "XfakeX" ])
AM_CONDITIONAL(BUILD_COMMS_NONE,[ test "X${ac_COMMS}X" == "XnoneX" ])