From 982274e5a0e9f4433dd05be420e31fb98fbc4357 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Mon, 6 Apr 2015 11:26:24 +0100 Subject: [PATCH] Major rework of extract/merge/permute processing debugged and working. --- Grid.h | 3 +- Grid_Cartesian.h | 58 +++--------- Grid_Lattice.h | 20 +---- Grid_QCD.h | 12 +-- Grid_aligned_allocator.h | 3 + Grid_fake.cc => Grid_communicator_fake.cc | 0 Grid_mpi.cc => Grid_communicator_mpi.cc | 0 Grid_config.h | 3 - Grid_config.h.in | 3 - Grid_cshift_common.h | 76 +++------------- Grid_cshift_mpi.h | 14 ++- Grid_main.cc | 4 +- Grid_simd.h | 74 ++++++---------- Grid_stencil.h | 12 +++ Grid_vComplexD.h | 81 +++++------------ Grid_vComplexF.h | 92 +++++-------------- Grid_vInteger.h | 63 +------------ Grid_vRealD.h | 87 +++++------------- Grid_vRealF.h | 90 +++++-------------- Makefile.am | 8 +- Makefile.in | 29 +++--- TODO | 103 ++++++++++++++++++++++ configure | 23 +---- configure.ac | 7 +- 24 files changed, 291 insertions(+), 574 deletions(-) rename Grid_fake.cc => Grid_communicator_fake.cc (100%) rename Grid_mpi.cc => Grid_communicator_mpi.cc (100%) create mode 100644 Grid_stencil.h diff --git a/Grid.h b/Grid.h index 184e7e36..0a4024bf 100644 --- a/Grid.h +++ b/Grid.h @@ -42,11 +42,10 @@ #endif +#include #include #include #include -#include -#include #include #include diff --git a/Grid_Cartesian.h b/Grid_Cartesian.h index 004c2fa5..c891edcc 100644 --- a/Grid_Cartesian.h +++ b/Grid_Cartesian.h @@ -8,48 +8,6 @@ namespace Grid{ ///////////////////////////////////////////////////////////////////////////////////////// // Grid Support. ///////////////////////////////////////////////////////////////////////////////////////// -// -// Cartesian grid inheritance -// Grid::GridBase -// | -// __________|___________ -// | | -// Grid::GridCartesian Grid::GridCartesianRedBlack -// -// TODO: document the following as an API guaranteed public interface - - /* - * Rough map of functionality against QDP++ Layout - * - * Param | Grid | QDP++ - * ----------------------------------------- - * | | - * void | oSites, iSites, lSites | sitesOnNode - * void | gSites | vol - * | | - * gcoor | oIndex, iIndex | linearSiteIndex // no virtual node in QDP - * lcoor | | - * - * void | CheckerBoarded | - // No checkerboarded in QDP - * void | FullDimensions | lattSize - * void | GlobalDimensions | lattSize // No checkerboarded in QDP - * void | LocalDimensions | subgridLattSize - * void | VirtualLocalDimensions | subgridLattSize // no virtual node in QDP - * | | - * int x 3 | oiSiteRankToGlobal | siteCoords - * | ProcessorCoorLocalCoorToGlobalCoor | - * | | - * vector | GlobalCoorToRankIndex | nodeNumber(coord) - * vector | GlobalCoorToProcessorCoorLocalCoor| nodeCoord(coord) - * | | - * void | Processors | logicalSize // returns cart array shape - * void | ThisRank | nodeNumber(); // returns this node rank - * void | ThisProcessorCoor | // returns this node coor - * void | isBoss(void) | primaryNode(); - * | | - * | RankFromProcessorCoor | getLogicalCoorFrom(node) - * | ProcessorCoorFromRank | getNodeNumberFrom(logical_coord) - */ class GridBase : public CartesianCommunicator { public: @@ -60,7 +18,8 @@ public: GridBase(std::vector & processor_grid) : CartesianCommunicator(processor_grid) {}; - //protected: + //FIXME + // protected: // Lattice wide random support. not yet fully implemented. Need seed strategy // and one generator per site. // std::default_random_engine generator; @@ -165,7 +124,16 @@ public: lane = lane / _simd_layout[d]; } } - + inline int PermuteDim(int dimension){ + return _simd_layout[dimension]>1; + } + inline int PermuteType(int dimension){ + int permute_type=0; + for(int d=_ndimension-1;d>dimension;d--){ + if (_simd_layout[d]>1 ) permute_type++; + } + return permute_type; + } //////////////////////////////////////////////////////////////// // Array sizing queries //////////////////////////////////////////////////////////////// @@ -399,8 +367,6 @@ public: //////////////////////////////////////////////////////////////////////////////////////////// // subplane information - // It may be worth the investment of generating a more general subplane "iterator", - // and providing support for threads grabbing a unit of allocation. //////////////////////////////////////////////////////////////////////////////////////////// _slice_block.resize(_ndimension); _slice_stride.resize(_ndimension); diff --git a/Grid_Lattice.h b/Grid_Lattice.h index 2aa46b97..ebf68560 100644 --- a/Grid_Lattice.h +++ b/Grid_Lattice.h @@ -4,17 +4,9 @@ #include "Grid.h" - namespace Grid { -// Permute the pointers 32bitx16 = 512 -static int permute_map[4][16] = { - { 1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14}, - { 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13}, - { 4,5,6,7,0,1,2,3,12,13,14,15,8,9,10,11}, - { 9,10,11,12,13,14,15,0,1,2,3,4,5,6,7,8} -}; - + extern int GridCshiftPermuteMap[4][16]; template class Lattice @@ -37,11 +29,10 @@ public: #include - // overloading Grid::conformable but no conformable in Grid ...?:w template friend void conformable(const Lattice &lhs,const Lattice &rhs); - // Performance difference between operator * and mult is troubling. + // FIXME Performance difference between operator * and mult is troubling. // Auto move constructor seems to lose surprisingly much. // Site wise binary operations @@ -182,23 +173,20 @@ public: }} }; + // FIXME Implement a consistent seed management strategy friend void gaussian(Lattice &l){ // Zero mean, unit variance. std::normal_distribution distribution(0.0,1.0); Real *v_ptr = (Real *)&l._odata[0]; size_t v_len = l._grid->oSites()*sizeof(vobj); size_t d_len = v_len/sizeof(Real); - - // Not a parallel RNG. Could make up some seed per 4d site, seed - // per hypercube type scheme. + for(int i=0;i operator -(const Lattice &r) { Lattice ret(r._grid); #pragma omp parallel for diff --git a/Grid_QCD.h b/Grid_QCD.h index a0f7801a..3e852633 100644 --- a/Grid_QCD.h +++ b/Grid_QCD.h @@ -24,10 +24,7 @@ namespace QCD { typedef iSinglet TReal; // This is painful. Tensor singlet complex type. - typedef iSinglet vTIntegerF; - typedef iSinglet vTIntegerD; - typedef iSinglet vTIntegerC; - typedef iSinglet vTIntegerZ; + typedef iSinglet vTInteger; typedef iSpinMatrix SpinMatrix; typedef iColourMatrix ColourMatrix; @@ -46,12 +43,9 @@ namespace QCD { typedef iColourVector vColourVector; typedef iSpinColourVector vSpinColourVector; - typedef Lattice LatticeComplex; + typedef Lattice LatticeComplex; - typedef Lattice LatticeIntegerF; // Predicates for "where" - typedef Lattice LatticeIntegerD; - typedef Lattice LatticeIntegerC; - typedef Lattice LatticeIntegerZ; + typedef Lattice LatticeInteger; // Predicates for "where" typedef Lattice LatticeColourMatrix; typedef Lattice LatticeSpinMatrix; diff --git a/Grid_aligned_allocator.h b/Grid_aligned_allocator.h index dbaa0ba3..9008105a 100644 --- a/Grid_aligned_allocator.h +++ b/Grid_aligned_allocator.h @@ -1,5 +1,8 @@ #ifndef GRID_ALIGNED_ALLOCATOR_H #define GRID_ALIGNED_ALLOCATOR_H + +#include + namespace Grid { //////////////////////////////////////////////////////////////////// diff --git a/Grid_fake.cc b/Grid_communicator_fake.cc similarity index 100% rename from Grid_fake.cc rename to Grid_communicator_fake.cc diff --git a/Grid_mpi.cc b/Grid_communicator_mpi.cc similarity index 100% rename from Grid_mpi.cc rename to Grid_communicator_mpi.cc diff --git a/Grid_config.h b/Grid_config.h index 8a0fdc26..bb885708 100644 --- a/Grid_config.h +++ b/Grid_config.h @@ -10,9 +10,6 @@ /* AVX512 */ /* #undef AVX512 */ -/* GRID_COMMS_FAKE */ -/* #undef GRID_COMMS_FAKE */ - /* GRID_COMMS_MPI */ #define GRID_COMMS_MPI 1 diff --git a/Grid_config.h.in b/Grid_config.h.in index ff15a834..91948fa2 100644 --- a/Grid_config.h.in +++ b/Grid_config.h.in @@ -9,9 +9,6 @@ /* AVX512 */ #undef AVX512 -/* GRID_COMMS_FAKE */ -#undef GRID_COMMS_FAKE - /* GRID_COMMS_MPI */ #undef GRID_COMMS_MPI diff --git a/Grid_cshift_common.h b/Grid_cshift_common.h index 72518d7d..08e75cff 100644 --- a/Grid_cshift_common.h +++ b/Grid_cshift_common.h @@ -1,17 +1,5 @@ #ifndef _GRID_CSHIFT_COMMON_H_ #define _GRID_CSHIFT_COMMON_H_ -////////////////////////////////////////////////////////////////////////////////////////// -// Must not lose sight that goal is to be able to construct really efficient -// gather to a point stencil code. CSHIFT is not the best way, so probably need -// additional stencil support. -// -// Stencil based code could pre-exchange haloes and use a table lookup for neighbours -// -// Lattice could also allocate haloes which get used for stencil code. -// -// Grid could create a neighbour index table for a given stencil. -// Could also implement CovariantCshift. -////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////// // Gather for when there is no need to SIMD split @@ -57,7 +45,6 @@ friend void Gather_plane_simple (Lattice &rhs,std::vector &rhs,std::vector p } } - - ////////////////////////////////////////////////////// // Scatter for when there is no need to SIMD split ////////////////////////////////////////////////////// @@ -146,7 +131,6 @@ friend void Scatter_plane_simple (Lattice &rhs,std::vector &rhs,std::vector po } } - ////////////////////////////////////////////////////// // local to node block strided copies ////////////////////////////////////////////////////// -// if lhs is odd, rhs even?? friend void Copy_plane(Lattice& lhs,Lattice &rhs, int dimension,int lplane,int rplane,int cbmask) { int rd = rhs._grid->_rdimensions[dimension]; @@ -284,40 +266,6 @@ friend void Copy_plane_permute(Lattice& lhs,Lattice &rhs, int dimens // Local to node Cshift ////////////////////////////////////////////////////// - // Work out whether to permute - // ABCDEFGH -> AE BF CG DH permute wrap num - // - // Shift 0 AE BF CG DH 0 0 0 0 ABCDEFGH 0 0 - // Shift 1 BF CG DH AE 0 0 0 1 BCDEFGHA 0 1 - // Shift 2 CG DH AE BF 0 0 1 1 CDEFGHAB 0 2 - // Shift 3 DH AE BF CG 0 1 1 1 DEFGHABC 0 3 - // Shift 4 AE BF CG DH 1 1 1 1 EFGHABCD 1 0 - // Shift 5 BF CG DH AE 1 1 1 0 FGHACBDE 1 1 - // Shift 6 CG DH AE BF 1 1 0 0 GHABCDEF 1 2 - // Shift 7 DH AE BF CG 1 0 0 0 HABCDEFG 1 3 - - // Suppose 4way simd in one dim. - // ABCDEFGH -> AECG BFDH permute wrap num - - // Shift 0 AECG BFDH 0,00 0,00 ABCDEFGH 0 0 - // Shift 1 BFDH CGEA 0,00 1,01 BCDEFGHA 0 1 - // Shift 2 CGEA DHFB 1,01 1,01 CDEFGHAB 1 0 - // Shift 3 DHFB EAGC 1,01 1,11 DEFGHABC 1 1 - // Shift 4 EAGC FBHD 1,11 1,11 EFGHABCD 2 0 - // Shift 5 FBHD GCAE 1,11 1,10 FGHABCDE 2 1 - // Shift 6 GCAE HDBF 1,10 1,10 GHABCDEF 3 0 - // Shift 7 HDBF AECG 1,10 0,00 HABCDEFG 3 1 - - // Generalisation to 8 way simd, 16 way simd required. - // - // Need log2 Nway masks. consisting of - // 1 bit 256 bit granule - // 2 bit 128 bit granule - // 4 bits 64 bit granule - // 8 bits 32 bit granules - // - // 15 bits.... - friend void Cshift_local(Lattice& ret,Lattice &rhs,int dimension,int shift) { int sshift[2]; @@ -333,35 +281,31 @@ friend void Cshift_local(Lattice& ret,Lattice &rhs,int dimension,int } } - friend Lattice Cshift_local(Lattice &ret,Lattice &rhs,int dimension,int shift,int cbmask) { - int fd = rhs._grid->_fdimensions[dimension]; - int rd = rhs._grid->_rdimensions[dimension]; - int ld = rhs._grid->_ldimensions[dimension]; - int gd = rhs._grid->_gdimensions[dimension]; - + GridBase *grid = rhs._grid; + int fd = grid->_fdimensions[dimension]; + int rd = grid->_rdimensions[dimension]; + int ld = grid->_ldimensions[dimension]; + int gd = grid->_gdimensions[dimension]; // Map to always positive shift modulo global full dimension. shift = (shift+fd)%fd; - ret.checkerboard = rhs._grid->CheckerBoardDestination(rhs.checkerboard,shift); + ret.checkerboard = grid->CheckerBoardDestination(rhs.checkerboard,shift); // the permute type - int permute_dim =rhs._grid->_simd_layout[dimension]>1 ; - int permute_type=0; - for(int d=0;d_simd_layout[d]>1 ) permute_type++; - } + int permute_dim =grid->PermuteDim(dimension); + int permute_type=grid->PermuteType(dimension); for(int x=0;x_ostride[dimension]; + int bo = x * grid->_ostride[dimension]; int cb= (cbmask==0x2)? 1 : 0; - int sshift = rhs._grid->CheckerBoardShift(rhs.checkerboard,dimension,shift,cb); + int sshift = grid->CheckerBoardShift(rhs.checkerboard,dimension,shift,cb); int sx = (x+sshift)%rd; int permute_slice=0; diff --git a/Grid_cshift_mpi.h b/Grid_cshift_mpi.h index 75a4dc96..4ca8f6b5 100644 --- a/Grid_cshift_mpi.h +++ b/Grid_cshift_mpi.h @@ -146,10 +146,7 @@ friend void Cshift_comms_simd(Lattice &ret,Lattice &rhs,int dimensi assert(shift>=0); assert(shift_simd_layout[d]>1 ) permute_type++; - } + int permute_type=grid->PermuteType(dimension); /////////////////////////////////////////////// // Simd direction uses an extract/merge pair @@ -236,9 +233,12 @@ friend void Cshift_comms_simd(Lattice &ret,Lattice &rhs,int dimensi if ( x< rd-num ) permute_slice=wrap; else permute_slice = 1-wrap; + int toggle_bit = (Nsimd>>(permute_type+1)); + int PermuteMap; for(int i=0;i &ret,Lattice &rhs,int dimensi } } } - - - - #endif diff --git a/Grid_main.cc b/Grid_main.cc index cb45fd85..62356436 100644 --- a/Grid_main.cc +++ b/Grid_main.cc @@ -20,8 +20,8 @@ int main (int argc, char ** argv) std::vector mpi_layout(4); mpi_layout[0]=2; - mpi_layout[1]=1; - mpi_layout[2]=1; + mpi_layout[1]=2; + mpi_layout[2]=2; mpi_layout[3]=2; #ifdef AVX512 diff --git a/Grid_simd.h b/Grid_simd.h index ffcbff15..53cc8909 100644 --- a/Grid_simd.h +++ b/Grid_simd.h @@ -10,32 +10,6 @@ // // Vector types are arch dependent //////////////////////////////////////////////////////////////////////// - // TODO - // - // Base class to share common code between vRealF, VComplexF etc... - // - // lattice Broad cast assignment - // - // where() support - // implement with masks, and/or? Type of the mask & boolean support? - // - // Unary functions - // cos,sin, tan, acos, asin, cosh, acosh, tanh, sinh, // Scalar only arg - // exp, log, sqrt, fabs - // - // transposeColor, transposeSpin, - // adjColor, adjSpin, - // traceColor, traceSpin. - // peekColor, peekSpin + pokeColor PokeSpin - // - // copyMask. - // - // localMaxAbs - // - // norm2, - // sumMulti equivalent. - // Fourier transform equivalent. - // //////////////////////////////////////////////////////////// // SIMD Alignment controls @@ -71,9 +45,6 @@ namespace Grid { typedef std::complex ComplexD; typedef std::complex Complex; - - - inline RealF adj(const RealF & r){ return r; } inline RealF conj(const RealF & r){ return r; } inline ComplexD localInnerProduct(const ComplexD & l, const ComplexD & r) { return conj(l)*r; } @@ -122,7 +93,6 @@ namespace Grid { template<> inline void ZeroIt(RealD &arg){ arg=0; }; - #if defined (SSE2) typedef __m128 fvec; typedef __m128d dvec; @@ -162,31 +132,46 @@ namespace Grid { inline void v_prefetch0(int size, const char *ptr){}; #endif -}; - ///////////////////////////////////////////////////////////////// // Generic extract/merge/permute ///////////////////////////////////////////////////////////////// -template +template inline void Gextract(vsimd &y,std::vector &extracted){ - // Bounce off stack is painful - // temporary hack while I figure out the right interface - scalar buf[Nsimd]; - vstore(y,buf); - for(int i=0;i > buf(Nsimd); + vstore(y,&buf[0]); + for(int i=0;i +template inline void Gmerge(vsimd &y,std::vector &extracted){ - scalar buf[Nsimd]; +#if 1 + int Nsimd = extracted.size(); + std::vector buf(Nsimd); for(int i=0;i &extracted){ // Permute 3 possible on longer iVector lengths (512bit = 8 double = 16 single) // Permute 4 possible on half precision @512bit vectors. ////////////////////////////////////////////////////////// -// Should be able to make the permute/extract/merge independent of the -// vector subtype and reduce the volume of code. template inline void Gpermute(vsimd &y,vsimd b,int perm){ switch (perm){ @@ -229,6 +212,7 @@ inline void Gpermute(vsimd &y,vsimd b,int perm){ default: assert(0); break; } }; +}; #include #include diff --git a/Grid_stencil.h b/Grid_stencil.h new file mode 100644 index 00000000..d93204fc --- /dev/null +++ b/Grid_stencil.h @@ -0,0 +1,12 @@ +////////////////////////////////////////////////////////////////////////////////////////// +// Must not lose sight that goal is to be able to construct really efficient +// gather to a point stencil code. CSHIFT is not the best way, so probably need +// additional stencil support. +// +// Stencil based code could pre-exchange haloes and use a table lookup for neighbours +// +// Lattice could also allocate haloes which get used for stencil code. +// +// Grid could create a neighbour index table for a given stencil. +// Could also implement CovariantCshift. +////////////////////////////////////////////////////////////////////////////////////////// diff --git a/Grid_vComplexD.h b/Grid_vComplexD.h index d1caefb1..330de26e 100644 --- a/Grid_vComplexD.h +++ b/Grid_vComplexD.h @@ -5,7 +5,7 @@ namespace Grid { class vComplexD { - protected: + public: zvec v; public: typedef zvec vector_type; @@ -154,64 +154,27 @@ namespace Grid { return ret; }; - ///////////////////////////////////////////////////////////////// - // Extract - ///////////////////////////////////////////////////////////////// - friend inline void extract(vComplexD &y,std::vector &extracted){ - // Bounce off stack is painful - // temporary hack while I figure out the right interface - const int Nsimd = vComplexD::Nsimd(); - std::vector buf(Nsimd); + //////////////////////////////////////////////////////////////////// + // General permute; assumes vector length is same across + // all subtypes; may not be a good assumption, but could + // add the vector width as a template param for BG/Q for example + //////////////////////////////////////////////////////////////////// + friend inline void permute(vComplexD &y,vComplexD b,int perm) + { + Gpermute(y,b,perm); + } + friend inline void merge(vComplexD &y,std::vector &extracted) + { + Gmerge(y,extracted); + } + friend inline void extract(vComplexD &y,std::vector &extracted) + { + Gextract(y,extracted); + } - vstore(y,&buf[0]); - - for(int i=0;i &extracted){ - // Bounce off stack is painful - // temporary hack while I figure out the right interface - const int Nsimd = vComplexD::Nsimd(); - std::vector buf(Nsimd); - - for(int i=0;i1 permute -#if defined(AVX1)||defined(AVX2) - case 0: y.v = _mm256_permute2f128_pd(b.v,b.v,0x01); break; - // AB => BA i.e. ab cd =>cd ab -#endif -#ifdef SSE2 - break; -#endif -#ifdef AVX512 - // 4 complex=>2 permute - // ABCD => BADC i.e. abcd efgh => cdab ghef - // ABCD => CDAB i.e. abcd efgh => efgh abcd - case 0: y.v = _mm512_swizzle_pd(b.v,_MM_SWIZ_REG_BADC); break; - case 1: y.v = _mm512_permute4f128_ps(b.v,(_MM_PERM_ENUM)_MM_SHUFFLE(1,0,3,2)); // permute for double is not implemented - -#endif -#ifdef QPX -#error // Not implemented yet -#endif - default: assert(0); break; - } - }; + //////////////////////////////////////////////////////////////////////// + // FIXME: gonna remove these load/store, get, set, prefetch + //////////////////////////////////////////////////////////////////////// void vload(zvec& a){ this->v = a; } @@ -296,7 +259,7 @@ friend inline void vstore(vComplexD &ret, ComplexD *a){ #endif return ret; } -// REDUCE +// REDUCE FIXME must be a cleaner implementation friend inline ComplexD Reduce(const vComplexD & in) { #if defined (AVX1) || defined(AVX2) diff --git a/Grid_vComplexF.h b/Grid_vComplexF.h index 578228a3..b7fb3d6a 100644 --- a/Grid_vComplexF.h +++ b/Grid_vComplexF.h @@ -4,7 +4,9 @@ namespace Grid { class vComplexF { - protected: + // protected: + + public: cvec v; public: @@ -129,75 +131,11 @@ namespace Grid { #endif return ret; }; + - ///////////////////////////////////////////////////////////////// - // Extract - ///////////////////////////////////////////////////////////////// - friend inline void extract(vComplexF &y,std::vector &extracted){ - // Bounce off heap is painful - // temporary hack while I figure out the right interface - vComplexF vbuf; - ComplexF *buf = (ComplexF *)&vbuf; - - vstore(y,&buf[0]); - for(int i=0;i &extracted){ - // Bounce off stack is painful - // temporary hack while I figure out the right interface - const int Nsimd = vComplexF::Nsimd(); - vComplexF vbuf; - ComplexF *buf = (ComplexF *)&vbuf; - - for(int i=0;i2 permutes - // case 0 ABCD->BADC - // case 1 ABCD->CDAB - case 0: y.v = _mm256_shuffle_ps(b.v,b.v,_MM_SHUFFLE(1,0,3,2)); break; - case 1: y.v = _mm256_permute2f128_ps(b.v,b.v,0x01); break; -#endif -#ifdef SSE2 - case 0: y.v = _mm_shuffle_ps(b.v,b.v,_MM_SHUFFLE(1,0,3,2));break; -#endif -#ifdef AVX512 -//#error should permute for 512 - // 8 complex=>3 permutes - // case 0 ABCD EFGH -> BADC FEHG - // case 1 ABCD EFGH -> CDAB GHEF - // case 2 ABCD EFGH -> EFGH ABCD - case 0: y.v = _mm512_swizzle_ps(b.v,_MM_SWIZ_REG_CDAB); break; // OK - case 1: y.v = _mm512_swizzle_ps(b.v,_MM_SWIZ_REG_BADC); break; // OK - case 2: y.v = _mm512_permute4f128_ps(b.v, (_MM_PERM_ENUM)_MM_SHUFFLE(2,3,0,1)); break; // OK - -#endif -#ifdef QPX -#error -#endif - default: assert(0); break; - } - }; - - + //////////////////////////////////////////////////////////////////////// + // FIXME: gonna remove these load/store, get, set, prefetch + //////////////////////////////////////////////////////////////////////// friend inline void vset(vComplexF &ret, Complex *a){ #if defined (AVX1)|| defined (AVX2) ret.v = _mm256_set_ps(a[3].imag(),a[3].real(),a[2].imag(),a[2].real(),a[1].imag(),a[1].real(),a[0].imag(),a[0].real()); @@ -358,6 +296,20 @@ friend inline void vstore(vComplexF &ret, ComplexF *a){ return *this; } + friend inline void permute(vComplexF &y,vComplexF b,int perm) + { + Gpermute(y,b,perm); + } + friend inline void merge(vComplexF &y,std::vector &extracted) + { + Gmerge(y,extracted); + } + friend inline void extract(vComplexF &y,std::vector &extracted) + { + Gextract(y,extracted); + } + + }; inline vComplexF localInnerProduct(const vComplexF & l, const vComplexF & r) { return conj(l)*r; } @@ -371,7 +323,5 @@ friend inline void vstore(vComplexF &ret, ComplexF *a){ return l*r; } - - } #endif diff --git a/Grid_vInteger.h b/Grid_vInteger.h index 7a0c1c4f..6ddce191 100644 --- a/Grid_vInteger.h +++ b/Grid_vInteger.h @@ -235,70 +235,11 @@ friend inline void vstore(vInteger &ret, Integer *a){ } friend inline void merge(vIntegerF &y,std::vector &extracted) { - Gmerge(y,extracted); + Gmerge(y,extracted); } friend inline void extract(vIntegerF &y,std::vector &extracted) { - Gextract(y,extracted); - } - }; - - - class vIntegerD : public vInteger - { - public: - static inline int Nsimd(void) { return sizeof(ivec)/sizeof(double);} - - friend inline void permute(vIntegerD &y,vIntegerD b,int perm) - { - Gpermute(y,b,perm); - } - friend inline void merge(vIntegerD &y,std::vector &extracted) - { - Gmerge(y,extracted); - } - friend inline void extract(vIntegerD &y,std::vector &extracted) - { - Gextract(y,extracted); - } - }; - - - class vIntegerC : public vInteger - { - public: - static inline int Nsimd(void) { return sizeof(ivec)/sizeof(ComplexF);} - - friend inline void permute(vIntegerC &y,vIntegerC b,int perm) - { - Gpermute(y,b,perm); - } - friend inline void merge(vIntegerC &y,std::vector &extracted) - { - Gmerge(y,extracted); - } - friend inline void extract(vIntegerC &y,std::vector &extracted) - { - Gextract(y,extracted); - } - }; - - class vIntegerZ : public vInteger - { - public: - static inline int Nsimd(void) { return sizeof(ivec)/sizeof(ComplexD);} - - friend inline void permute(vIntegerZ &y,vIntegerZ b,int perm) - { - Gpermute(y,b,perm); - } - friend inline void merge(vIntegerZ &y,std::vector &extracted) - { - Gmerge(y,extracted); - } - friend inline void extract(vIntegerZ &y,std::vector &extracted) - { - Gextract(y,extracted); + Gextract(y,extracted); } }; diff --git a/Grid_vRealD.h b/Grid_vRealD.h index 1abc0804..13ceedbe 100644 --- a/Grid_vRealD.h +++ b/Grid_vRealD.h @@ -5,7 +5,7 @@ namespace Grid { class vRealD { - protected: + public: dvec v; // dvec is double precision vector public: @@ -99,72 +99,27 @@ namespace Grid { return ret; }; - ///////////////////////////////////////////////////////////////// - // Extract - ///////////////////////////////////////////////////////////////// - friend inline void extract(vRealD &y,std::vector &extracted){ - // Bounce off stack is painful - // temporary hack while I figure out the right interface - const int Nsimd = vRealD::Nsimd(); - RealD buf[Nsimd]; + //////////////////////////////////////////////////////////////////// + // General permute; assumes vector length is same across + // all subtypes; may not be a good assumption, but could + // add the vector width as a template param for BG/Q for example + //////////////////////////////////////////////////////////////////// + friend inline void permute(vRealD &y,vRealD b,int perm) + { + Gpermute(y,b,perm); + } + friend inline void merge(vRealD &y,std::vector &extracted) + { + Gmerge(y,extracted); + } + friend inline void extract(vRealD &y,std::vector &extracted) + { + Gextract(y,extracted); + } - vstore(y,buf); - - for(int i=0;i &extracted){ - // Bounce off stack is painful - // temporary hack while I figure out the right interface - const int Nsimd = vRealD::Nsimd(); - RealD buf[Nsimd]; - - for(int i=0;i BA DC FE HG - // Permute 1 every ABCDEFGH -> CD AB GH EF - // Permute 2 every ABCDEFGH -> EFGH ABCD - // Permute 3 possible on longer iVector lengths (512bit = 8 double = 16 single) - // Permute 4 possible on half precision @512bit vectors. - friend inline void permute(vRealD &y,vRealD b,int perm){ - switch (perm){ - // 4 doubles=>2 permutes -#if defined(AVX1)||defined(AVX2) - case 0: y.v = _mm256_shuffle_pd(b.v,b.v,0x5); break; - case 1: y.v = _mm256_permute2f128_pd(b.v,b.v,0x01); break; -#endif -#ifdef SSE2 - case 0: y.v = _mm_shuffle_pd(b.v,b.v,0x1); break; -#endif -#ifdef AVX512 - // 8 double => 3 permutes - // Permute 0 every abcd efgh -> badc fehg - // Permute 1 every abcd efgh -> cdab ghef - // Permute 2 every abcd efgh -> efgh abcd - // NOTE: mm_512_permutex_pd not implemented - // NOTE: ignore warning - case 0: y.v = _mm512_swizzle_pd(b.v,_MM_SWIZ_REG_CDAB); break; - case 1: y.v = _mm512_swizzle_pd(b.v,_MM_SWIZ_REG_BADC); break; - case 2: y.v = _mm512_permute4f128_ps(b.v,(_MM_PERM_ENUM)_MM_SHUFFLE(1,0,3,2)); break; - -#endif -#ifdef QPX -#error -#endif - default: assert(0);break; - } - }; -// gona be bye bye + //////////////////////////////////////////////////////////////////////// + // FIXME: gonna remove these load/store, get, set, prefetch + //////////////////////////////////////////////////////////////////////// void vload(dvec& a){ this->v = a; } diff --git a/Grid_vRealF.h b/Grid_vRealF.h index 22809b83..185f4da8 100644 --- a/Grid_vRealF.h +++ b/Grid_vRealF.h @@ -5,7 +5,7 @@ namespace Grid { class vRealF { - protected: + public: fvec v; public: @@ -120,74 +120,25 @@ namespace Grid { friend inline void vzero(vRealF &ret){vsplat(ret,0.0);} - ///////////////////////////////////////////////////////////////// - // Extract - ///////////////////////////////////////////////////////////////// - friend inline void extract(vRealF &y,std::vector &extracted){ - // Bounce off stack is painful - // temporary hack while I figure out the right interface - const int Nsimd = vRealF::Nsimd(); - RealF buf[Nsimd]; + //////////////////////////////////////////////////////////////////// + // General permute; assumes vector length is same across + // all subtypes; may not be a good assumption, but could + // add the vector width as a template param for BG/Q for example + //////////////////////////////////////////////////////////////////// + friend inline void permute(vRealF &y,vRealF b,int perm) + { + Gpermute(y,b,perm); + } + friend inline void merge(vRealF &y,std::vector &extracted) + { + Gmerge(y,extracted); + } + friend inline void extract(vRealF &y,std::vector &extracted) + { + Gextract(y,extracted); + } - vstore(y,buf); - for(int i=0;i &extracted){ - // Bounce off stack is painful - // temporary hack while I figure out the right interface - const int Nsimd = vRealF::Nsimd(); - RealF buf[Nsimd]; - - for(int i=0;i BA DC FE HG - // Permute 1 every ABCDEFGH -> CD AB GH EF - // Permute 2 every ABCDEFGH -> EFGH ABCD - // Permute 3 possible on longer iVector lengths (512bit = 8 double = 16 single) - // Permute 4 possible on half precision @512bit vectors. - ////////////////////////////////////////////////////////// - friend inline void permute(vRealF &y,vRealF b,int perm){ - switch (perm){ - // 8 floats=>3 permutes -#if defined(AVX1)||defined(AVX2) - case 0: y.v = _mm256_shuffle_ps(b.v,b.v,_MM_SHUFFLE(2,3,0,1)); break; - case 1: y.v = _mm256_shuffle_ps(b.v,b.v,_MM_SHUFFLE(1,0,3,2)); break; - case 2: y.v = _mm256_permute2f128_ps(b.v,b.v,0x01); break; -#endif -#ifdef SSE2 - case 0: y.v = _mm_shuffle_ps(b.v,b.v,_MM_SHUFFLE(2,3,0,1)); break; - case 1: y.v = _mm_shuffle_ps(b.v,b.v,_MM_SHUFFLE(1,0,3,2));break; -#endif -#ifdef AVX512 - // 16 floats=> permutes - // Permute 0 every abcd efgh ijkl mnop -> badc fehg jilk nmpo - // Permute 1 every abcd efgh ijkl mnop -> cdab ghef jkij opmn - // Permute 2 every abcd efgh ijkl mnop -> efgh abcd mnop ijkl - // Permute 3 every abcd efgh ijkl mnop -> ijkl mnop abcd efgh -//#error not implemented should do something - case 0: y.v = _mm512_swizzle_ps(b.v,_MM_SWIZ_REG_CDAB); break; - case 1: y.v = _mm512_swizzle_ps(b.v,_MM_SWIZ_REG_BADC); break; - case 2: y.v = _mm512_permute4f128_ps(b.v,(_MM_PERM_ENUM)_MM_SHUFFLE(2,3,0,1)); break; - case 3: y.v = _mm512_permute4f128_ps(b.v,(_MM_PERM_ENUM)_MM_SHUFFLE(1,0,3,2)); break; -#endif -#ifdef QPX -#error not implemented -#endif - default: assert(0); break; - } - }; ///////////////////////////////////////////////////// // Broadcast a value across Nsimd copies. @@ -207,6 +158,8 @@ namespace Grid { ret.v = {a,a,a,a}; #endif } + + friend inline void vset(vRealF &ret, float *a){ #if defined (AVX1)|| defined (AVX2) ret.v = _mm256_set_ps(a[7],a[6],a[5],a[4],a[3],a[2],a[1],a[0]); @@ -224,6 +177,9 @@ namespace Grid { #endif } + //////////////////////////////////////////////////////////////////////// + // FIXME: gonna remove these load/store, get, set, prefetch + //////////////////////////////////////////////////////////////////////// friend inline void vstore(vRealF &ret, float *a){ #if defined (AVX1)|| defined (AVX2) _mm256_store_ps(a,ret.v); diff --git a/Makefile.am b/Makefile.am index d3a7b329..d11f28d5 100644 --- a/Makefile.am +++ b/Makefile.am @@ -24,7 +24,6 @@ include_HEADERS = Grid_config.h\ Grid_aligned_allocator.h\ Grid_cshift.h\ Grid_cshift_common.h\ - Grid_cshift_fake.h\ Grid_cshift_mpi.h\ Grid_cshift_none.h\ Grid_math_types.h @@ -37,13 +36,10 @@ bin_PROGRAMS = Grid_main extra_sources= if BUILD_COMMS_MPI - extra_sources+=Grid_mpi.cc -endif -if BUILD_COMMS_FAKE - extra_sources+=Grid_fake.cc + extra_sources+=Grid_communicator_mpi.cc endif if BUILD_COMMS_NONE - extra_sources+=Grid_fake.cc + extra_sources+=Grid_communicator_fake.cc endif Grid_main_SOURCES = \ diff --git a/Makefile.in b/Makefile.in index 6983b96a..16981e52 100644 --- a/Makefile.in +++ b/Makefile.in @@ -89,9 +89,8 @@ NORMAL_UNINSTALL = : PRE_UNINSTALL = : POST_UNINSTALL = : bin_PROGRAMS = Grid_main$(EXEEXT) -@BUILD_COMMS_MPI_TRUE@am__append_1 = Grid_mpi.cc -@BUILD_COMMS_FAKE_TRUE@am__append_2 = Grid_fake.cc -@BUILD_COMMS_NONE_TRUE@am__append_3 = Grid_fake.cc +@BUILD_COMMS_MPI_TRUE@am__append_1 = Grid_communicator_mpi.cc +@BUILD_COMMS_NONE_TRUE@am__append_2 = Grid_communicator_fake.cc subdir = . ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/configure.ac @@ -146,12 +145,13 @@ libGrid_a_LIBADD = am_libGrid_a_OBJECTS = Grid_init.$(OBJEXT) libGrid_a_OBJECTS = $(am_libGrid_a_OBJECTS) PROGRAMS = $(bin_PROGRAMS) -am__Grid_main_SOURCES_DIST = Grid_main.cc Grid_mpi.cc Grid_fake.cc -@BUILD_COMMS_MPI_TRUE@am__objects_1 = Grid_mpi.$(OBJEXT) -@BUILD_COMMS_FAKE_TRUE@am__objects_2 = Grid_fake.$(OBJEXT) -@BUILD_COMMS_NONE_TRUE@am__objects_3 = Grid_fake.$(OBJEXT) -am__objects_4 = $(am__objects_1) $(am__objects_2) $(am__objects_3) -am_Grid_main_OBJECTS = Grid_main.$(OBJEXT) $(am__objects_4) +am__Grid_main_SOURCES_DIST = Grid_main.cc Grid_communicator_mpi.cc \ + Grid_communicator_fake.cc +@BUILD_COMMS_MPI_TRUE@am__objects_1 = Grid_communicator_mpi.$(OBJEXT) +@BUILD_COMMS_NONE_TRUE@am__objects_2 = \ +@BUILD_COMMS_NONE_TRUE@ Grid_communicator_fake.$(OBJEXT) +am__objects_3 = $(am__objects_1) $(am__objects_2) +am_Grid_main_OBJECTS = Grid_main.$(OBJEXT) $(am__objects_3) Grid_main_OBJECTS = $(am_Grid_main_OBJECTS) Grid_main_DEPENDENCIES = libGrid.a AM_V_P = $(am__v_P_@AM_V@) @@ -214,8 +214,8 @@ CTAGS = ctags CSCOPE = cscope AM_RECURSIVE_TARGETS = cscope am__DIST_COMMON = $(srcdir)/Grid_config.h.in $(srcdir)/Makefile.in \ - AUTHORS COPYING ChangeLog INSTALL NEWS README compile depcomp \ - install-sh missing + AUTHORS COPYING ChangeLog INSTALL NEWS README TODO compile \ + depcomp install-sh missing DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) distdir = $(PACKAGE)-$(VERSION) top_distdir = $(distdir) @@ -353,12 +353,11 @@ include_HEADERS = Grid_config.h\ Grid_aligned_allocator.h\ Grid_cshift.h\ Grid_cshift_common.h\ - Grid_cshift_fake.h\ Grid_cshift_mpi.h\ Grid_cshift_none.h\ Grid_math_types.h -extra_sources = $(am__append_1) $(am__append_2) $(am__append_3) +extra_sources = $(am__append_1) $(am__append_2) Grid_main_SOURCES = \ Grid_main.cc\ $(extra_sources) @@ -506,10 +505,10 @@ mostlyclean-compile: distclean-compile: -rm -f *.tab.c -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/Grid_fake.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/Grid_communicator_fake.Po@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/Grid_communicator_mpi.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/Grid_init.Po@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/Grid_main.Po@am__quote@ -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/Grid_mpi.Po@am__quote@ .cc.o: @am__fastdepCXX_TRUE@ $(AM_V_CXX)$(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< diff --git a/TODO b/TODO index 9febd6c3..d2373ba1 100644 --- a/TODO +++ b/TODO @@ -1,4 +1,7 @@ * FIXME audit +* Remove vload/store etc.. +* Replace vset with a call to merge. +* Replace vset with a call to merge. * Conditional execution Subset, where etc... * Coordinate information, integers etc... @@ -27,3 +30,103 @@ - BinaryWriter, TextWriter etc... - protocol buffers? - +// Cartesian grid inheritance +// Grid::GridBase +// | +// __________|___________ +// | | +// Grid::GridCartesian Grid::GridCartesianRedBlack +// +// TODO: document the following as an API guaranteed public interface + + /* + * Rough map of functionality against QDP++ Layout + * + * Param | Grid | QDP++ + * ----------------------------------------- + * | | + * void | oSites, iSites, lSites | sitesOnNode + * void | gSites | vol + * | | + * gcoor | oIndex, iIndex | linearSiteIndex // no virtual node in QDP + * lcoor | | + * + * void | CheckerBoarded | - // No checkerboarded in QDP + * void | FullDimensions | lattSize + * void | GlobalDimensions | lattSize // No checkerboarded in QDP + * void | LocalDimensions | subgridLattSize + * void | VirtualLocalDimensions | subgridLattSize // no virtual node in QDP + * | | + * int x 3 | oiSiteRankToGlobal | siteCoords + * | ProcessorCoorLocalCoorToGlobalCoor | + * | | + * vector | GlobalCoorToRankIndex | nodeNumber(coord) + * vector | GlobalCoorToProcessorCoorLocalCoor| nodeCoord(coord) + * | | + * void | Processors | logicalSize // returns cart array shape + * void | ThisRank | nodeNumber(); // returns this node rank + * void | ThisProcessorCoor | // returns this node coor + * void | isBoss(void) | primaryNode(); + * | | + * | RankFromProcessorCoor | getLogicalCoorFrom(node) + * | ProcessorCoorFromRank | getNodeNumberFrom(logical_coord) + */ + // Work out whether to permute + // ABCDEFGH -> AE BF CG DH permute wrap num + // + // Shift 0 AE BF CG DH 0 0 0 0 ABCDEFGH 0 0 + // Shift 1 BF CG DH AE 0 0 0 1 BCDEFGHA 0 1 + // Shift 2 CG DH AE BF 0 0 1 1 CDEFGHAB 0 2 + // Shift 3 DH AE BF CG 0 1 1 1 DEFGHABC 0 3 + // Shift 4 AE BF CG DH 1 1 1 1 EFGHABCD 1 0 + // Shift 5 BF CG DH AE 1 1 1 0 FGHACBDE 1 1 + // Shift 6 CG DH AE BF 1 1 0 0 GHABCDEF 1 2 + // Shift 7 DH AE BF CG 1 0 0 0 HABCDEFG 1 3 + + // Suppose 4way simd in one dim. + // ABCDEFGH -> AECG BFDH permute wrap num + + // Shift 0 AECG BFDH 0,00 0,00 ABCDEFGH 0 0 + // Shift 1 BFDH CGEA 0,00 1,01 BCDEFGHA 0 1 + // Shift 2 CGEA DHFB 1,01 1,01 CDEFGHAB 1 0 + // Shift 3 DHFB EAGC 1,01 1,11 DEFGHABC 1 1 + // Shift 4 EAGC FBHD 1,11 1,11 EFGHABCD 2 0 + // Shift 5 FBHD GCAE 1,11 1,10 FGHABCDE 2 1 + // Shift 6 GCAE HDBF 1,10 1,10 GHABCDEF 3 0 + // Shift 7 HDBF AECG 1,10 0,00 HABCDEFG 3 1 + + // Generalisation to 8 way simd, 16 way simd required. + // + // Need log2 Nway masks. consisting of + // 1 bit 256 bit granule + // 2 bit 128 bit granule + // 4 bits 64 bit granule + // 8 bits 32 bit granules + // + // 15 bits.... + // TODO + // + // Base class to share common code between vRealF, VComplexF etc... + // + // lattice Broad cast assignment + // + // where() support + // implement with masks, and/or? Type of the mask & boolean support? + // + // Unary functions + // cos,sin, tan, acos, asin, cosh, acosh, tanh, sinh, // Scalar only arg + // exp, log, sqrt, fabs + // + // transposeColor, transposeSpin, + // adjColor, adjSpin, + // traceColor, traceSpin. + // peekColor, peekSpin + pokeColor PokeSpin + // + // copyMask. + // + // localMaxAbs + // + // norm2, + // sumMulti equivalent. + // Fourier transform equivalent. + // diff --git a/configure b/configure index cb6bab86..c6f93abc 100755 --- a/configure +++ b/configure @@ -628,8 +628,6 @@ LTLIBOBJS LIBOBJS BUILD_COMMS_NONE_FALSE BUILD_COMMS_NONE_TRUE -BUILD_COMMS_FAKE_FALSE -BUILD_COMMS_FAKE_TRUE BUILD_COMMS_MPI_FALSE BUILD_COMMS_MPI_TRUE EGREP @@ -1369,8 +1367,7 @@ Optional Features: --disable-openmp do not use OpenMP --enable-simd=SSE|AVX|AVX2|AVX512 Select instructions - --enable-comms=none|fake|mpi - Select communications + --enable-comms=none|mpi Select communications Some influential environment variables: CXX C++ compiler command @@ -5051,12 +5048,6 @@ fi case ${ac_COMMS} in - fake) - echo Configuring for FAKE communications - -$as_echo "#define GRID_COMMS_FAKE 1" >>confdefs.h - - ;; none) echo Configuring for NO communications @@ -5082,14 +5073,6 @@ else BUILD_COMMS_MPI_FALSE= fi - if test "X${ac_COMMS}X" == "XfakeX" ; then - BUILD_COMMS_FAKE_TRUE= - BUILD_COMMS_FAKE_FALSE='#' -else - BUILD_COMMS_FAKE_TRUE='#' - BUILD_COMMS_FAKE_FALSE= -fi - if test "X${ac_COMMS}X" == "XnoneX" ; then BUILD_COMMS_NONE_TRUE= BUILD_COMMS_NONE_FALSE='#' @@ -5243,10 +5226,6 @@ if test -z "${BUILD_COMMS_MPI_TRUE}" && test -z "${BUILD_COMMS_MPI_FALSE}"; then as_fn_error $? "conditional \"BUILD_COMMS_MPI\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 fi -if test -z "${BUILD_COMMS_FAKE_TRUE}" && test -z "${BUILD_COMMS_FAKE_FALSE}"; then - as_fn_error $? "conditional \"BUILD_COMMS_FAKE\" was never defined. -Usually this means the macro was only invoked conditionally." "$LINENO" 5 -fi if test -z "${BUILD_COMMS_NONE_TRUE}" && test -z "${BUILD_COMMS_NONE_FALSE}"; then as_fn_error $? "conditional \"BUILD_COMMS_NONE\" was never defined. Usually this means the macro was only invoked conditionally." "$LINENO" 5 diff --git a/configure.ac b/configure.ac index 22d0365e..bfe745e5 100644 --- a/configure.ac +++ b/configure.ac @@ -51,13 +51,9 @@ case ${ac_SIMD} in esac -AC_ARG_ENABLE([comms],[AC_HELP_STRING([--enable-comms=none|fake|mpi],[Select communications])],[ac_COMMS=${enable_comms}],[ac_COMMS=none]) +AC_ARG_ENABLE([comms],[AC_HELP_STRING([--enable-comms=none|mpi],[Select communications])],[ac_COMMS=${enable_comms}],[ac_COMMS=none]) case ${ac_COMMS} in - fake) - echo Configuring for FAKE communications - AC_DEFINE([GRID_COMMS_FAKE],[1],[GRID_COMMS_FAKE] ) - ;; none) echo Configuring for NO communications AC_DEFINE([GRID_COMMS_NONE],[1],[GRID_COMMS_NONE] ) @@ -72,7 +68,6 @@ case ${ac_COMMS} in esac AM_CONDITIONAL(BUILD_COMMS_MPI,[ test "X${ac_COMMS}X" == "XmpiX" ]) -AM_CONDITIONAL(BUILD_COMMS_FAKE,[ test "X${ac_COMMS}X" == "XfakeX" ]) AM_CONDITIONAL(BUILD_COMMS_NONE,[ test "X${ac_COMMS}X" == "XnoneX" ])