#ifndef GRID_CARTESIAN_BASE_H #define GRID_CARTESIAN_BASE_H #include #include namespace Grid{ class GridBase : public CartesianCommunicator { public: // Give Lattice access template friend class Lattice; GridBase(std::vector & processor_grid) : CartesianCommunicator(processor_grid) {}; //FIXME // protected: // Lattice wide random support. not yet fully implemented. Need seed strategy // and one generator per site. // std::default_random_engine generator; // static std::mt19937 generator( 9 ); ////////////////////////////////////////////////////////////////////// // Commicator provides information on the processor grid ////////////////////////////////////////////////////////////////////// // unsigned long _ndimension; // std::vector _processors; // processor grid // int _processor; // linear processor rank // std::vector _processor_coor; // linear processor rank ////////////////////////////////////////////////////////////////////// // Physics Grid information. std::vector _simd_layout; // Which dimensions get relayed out over simd lanes. std::vector _fdimensions;// Global dimensions of array prior to cb removal std::vector _gdimensions;// Global dimensions of array after cb removal std::vector _ldimensions;// local dimensions of array with processor images removed std::vector _rdimensions;// Reduced local dimensions with simd lane images and processor images removed std::vector _ostride; // Outer stride for each dimension std::vector _istride; // Inner stride i.e. within simd lane int _osites; // _isites*_osites = product(dimensions). int _isites; std::vector _slice_block; // subslice information std::vector _slice_stride; std::vector _slice_nblock; // Might need these at some point // std::vector _lstart; // local start of array in gcoors. _processor_coor[d]*_ldimensions[d] // std::vector _lend; // local end of array in gcoors _processor_coor[d]*_ldimensions[d]+_ldimensions_[d]-1 public: //////////////////////////////////////////////////////////////// // Checkerboarding interface is virtual and overridden by // GridCartesian / GridRedBlackCartesian //////////////////////////////////////////////////////////////// virtual int CheckerBoarded(int dim)=0; virtual int CheckerBoard(std::vector site)=0; virtual int CheckerBoardDestination(int source_cb,int shift)=0; virtual int CheckerBoardShift(int source_cb,int dim,int shift,int osite)=0; inline int CheckerBoardFromOindex (int Oindex){ std::vector ocoor; oCoorFromOindex(ocoor,Oindex); int ss=0; for(int d=0;d<_ndimension;d++){ ss=ss+ocoor[d]; } return ss&0x1; } ////////////////////////////////////////////////////////////////////////////////////////////// // Local layout calculations ////////////////////////////////////////////////////////////////////////////////////////////// // These routines are key. Subdivide the linearised cartesian index into // "inner" index identifying which simd lane of object is associated with coord // "outer" index identifying which element of _odata in class "Lattice" is associated with coord. // // Compared to, say, Blitz++ we simply need to store BOTH an inner stride and an outer // stride per dimension. The cost of evaluating the indexing information is doubled for an n-dimensional // coordinate. Note, however, for data parallel operations the "inner" indexing cost is not paid and all // lanes are operated upon simultaneously. virtual int oIndex(std::vector &coor) { int idx=0; // Works with either global or local coordinates for(int d=0;d<_ndimension;d++) idx+=_ostride[d]*(coor[d]%_rdimensions[d]); return idx; } inline int oIndexReduced(std::vector &ocoor) { int idx=0; // ocoor is already reduced so can eliminate the modulo operation // for fast indexing and inline the routine for(int d=0;d<_ndimension;d++) idx+=_ostride[d]*ocoor[d]; return idx; } inline void oCoorFromOindex (std::vector& coor,int Oindex){ coor.resize(_ndimension); for(int d=0;d<_ndimension;d++){ coor[d] = Oindex % _rdimensions[d]; Oindex = Oindex / _rdimensions[d]; } } ////////////////////////////////////////////////////////// // SIMD lane addressing ////////////////////////////////////////////////////////// inline int iIndex(std::vector &lcoor) { int idx=0; for(int d=0;d<_ndimension;d++) idx+=_istride[d]*(lcoor[d]/_rdimensions[d]); return idx; } inline void iCoorFromIindex(std::vector &coor,int lane) { coor.resize(_ndimension); for(int d=0;d<_ndimension;d++){ coor[d] = lane % _simd_layout[d]; lane = lane / _simd_layout[d]; } } inline int PermuteDim(int dimension){ return _simd_layout[dimension]>1; } inline int PermuteType(int dimension){ int permute_type=0; for(int d=_ndimension-1;d>dimension;d--){ if (_simd_layout[d]>1 ) permute_type++; } return permute_type; } //////////////////////////////////////////////////////////////// // Array sizing queries //////////////////////////////////////////////////////////////// inline int iSites(void) { return _isites; }; inline int Nsimd(void) { return _isites; };// Synonymous with iSites inline int oSites(void) { return _osites; }; inline int lSites(void) { return _isites*_osites; }; inline int gSites(void) { return _isites*_osites*_Nprocessors; }; inline int Nd (void) { return _ndimension;}; inline const std::vector &FullDimensions(void) { return _fdimensions;}; inline const std::vector &GlobalDimensions(void) { return _gdimensions;}; inline const std::vector &LocalDimensions(void) { return _ldimensions;}; inline const std::vector &VirtualLocalDimensions(void) { return _ldimensions;}; //////////////////////////////////////////////////////////////// // Global addressing //////////////////////////////////////////////////////////////// void RankIndexToGlobalCoor(int rank, int o_idx, int i_idx , std::vector &gcoor) { gcoor.resize(_ndimension); std::vector coor(_ndimension); ProcessorCoorFromRank(rank,coor); for(int mu=0;mu<_ndimension;mu++) gcoor[mu] = _ldimensions[mu]&coor[mu]; iCoorFromIindex(coor,i_idx); for(int mu=0;mu<_ndimension;mu++) gcoor[mu] += _rdimensions[mu]&coor[mu]; oCoorFromOindex (coor,o_idx); for(int mu=0;mu<_ndimension;mu++) gcoor[mu] += coor[mu]; } void RankIndexCbToFullGlobalCoor(int rank, int o_idx, int i_idx, int cb,std::vector &fcoor) { RankIndexToGlobalCoor(rank,o_idx,i_idx ,fcoor); if(CheckerBoarded(0)){ fcoor[0] = fcoor[0]*2+cb; } } void ProcessorCoorLocalCoorToGlobalCoor(std::vector &Pcoor,std::vector &Lcoor,std::vector &gcoor) { gcoor.resize(_ndimension); for(int mu=0;mu<_ndimension;mu++) gcoor[mu] = Pcoor[mu]*_ldimensions[mu]+Lcoor[mu]; } void GlobalCoorToProcessorCoorLocalCoor(std::vector &pcoor,std::vector &lcoor,const std::vector &gcoor) { pcoor.resize(_ndimension); lcoor.resize(_ndimension); for(int mu=0;mu<_ndimension;mu++){ pcoor[mu] = gcoor[mu]/_ldimensions[mu]; lcoor[mu] = gcoor[mu]%_ldimensions[mu]; } } void GlobalCoorToRankIndex(int &rank, int &o_idx, int &i_idx ,const std::vector &gcoor) { std::vector pcoor; std::vector lcoor; GlobalCoorToProcessorCoorLocalCoor(pcoor,lcoor,gcoor); rank = RankFromProcessorCoor(pcoor); i_idx= iIndex(lcoor); o_idx= oIndex(lcoor); } }; } #endif