From d5eee231e07f41ae515db9326943de17945edfd1 Mon Sep 17 00:00:00 2001 From: Peter Boyle Date: Fri, 3 Apr 2015 22:54:13 +0100 Subject: [PATCH] Clean up but no major changes --- Grid_Cartesian.h | 13 ++---- Grid_Lattice.h | 108 +++++++++++++++---------------------------- Grid_cshift_common.h | 27 ++++++----- Grid_cshift_mpi.h | 82 ++------------------------------ Grid_math_types.h | 2 +- Grid_mpi.cc | 22 +++------ Grid_vComplexD.h | 8 ++-- Grid_vComplexF.h | 7 ++- Grid_vRealD.h | 5 +- Grid_vRealF.h | 5 +- 10 files changed, 74 insertions(+), 205 deletions(-) diff --git a/Grid_Cartesian.h b/Grid_Cartesian.h index ced06825..a983639c 100644 --- a/Grid_Cartesian.h +++ b/Grid_Cartesian.h @@ -201,10 +201,8 @@ public: block = block*_rdimensions[d]; } - if ( _isites != vComplex::Nsimd()) { - printf("bad layout for grid isites %d Nsimd %d\n",_isites,vComplex::Nsimd()); - exit(0); - } + assert( _isites == vComplex::Nsimd()); + }; }; @@ -235,10 +233,8 @@ public: int ocb=CheckerBoardFromOsite(osite); if ( (source_cb+ocb)&1 ) { - printf("Checkerboard shift %d\n",(shift)/2); return (shift)/2; } else { - printf("Checkerboard shift %d\n",(shift+1)/2); return (shift+1)/2; } } @@ -314,10 +310,7 @@ public: block = block*_rdimensions[d]; } - if ( _isites != vComplex::Nsimd()) { - printf("bad layout for grid isites %d Nsimd %d\n",_isites,vComplex::Nsimd()); - exit(0); - } + assert ( _isites == vComplex::Nsimd()); }; protected: virtual int oIndex(std::vector &coor) diff --git a/Grid_Lattice.h b/Grid_Lattice.h index 6219235a..aab4fec9 100644 --- a/Grid_Lattice.h +++ b/Grid_Lattice.h @@ -30,9 +30,7 @@ public: Lattice(SimdGrid *grid) : _grid(grid) { _odata.reserve(_grid->oSites()); - if ( ((uint64_t)&_odata[0])&0xF) { - exit(-1); - } + assert((((uint64_t)&_odata[0])&0xF) ==0); checkerboard=0; } @@ -97,26 +95,25 @@ public: template friend void pokeSite(const sobj &s,Lattice &l,std::vector &site){ - if ( l.checkerboard != l._grid->CheckerBoard(site)){ - printf("Poking wrong checkerboard\n"); - exit(EXIT_FAILURE); - } + typedef typename vobj::scalar_type stype; + typedef typename vobj::vector_type vtype; - int o_index = l._grid->oIndex(site); - int i_index = l._grid->iIndex(site); + assert( l.checkerboard == l._grid->CheckerBoard(site)); + + int o_index = l._grid->oIndex(site); + int i_index = l._grid->iIndex(site); + + stype *v_ptr = (stype *)&l._odata[o_index]; + stype *s_ptr = (stype *)&s; + v_ptr = v_ptr + 2*i_index; - // BUGGY. This assumes complex real - Real *v_ptr = (Real *)&l._odata[o_index]; - Real *s_ptr = (Real *)&s; - v_ptr = v_ptr + 2*i_index; - - for(int i=0;i friend void peekSite(sobj &s,const Lattice &l,std::vector &site){ - // FIXME : define exceptions set and throw up. - if ( l.checkerboard != l._grid->CheckerBoard(site)){ - printf("Peeking wrong checkerboard\n"); - exit(EXIT_FAILURE); - } - int o_index = l._grid->oIndex(site); - int i_index = l._grid->iIndex(site); - - Real *v_ptr = (Real *)&l._odata[o_index]; - Real *s_ptr = (Real *)&s; - v_ptr = v_ptr + 2*i_index; - - for(int i=0;iCheckerBoard(site)); + + int o_index = l._grid->oIndex(site); + int i_index = l._grid->iIndex(site); + + stype *v_ptr = (stype *)&l._odata[o_index]; + stype *s_ptr = (stype *)&s; + v_ptr = v_ptr + 2*i_index; + + for(int i=0;i S, S M -> M, M S -> M through - all nested possibilities. - template class lhs,template class rhs> - class MultTypeSelector { - template using ltype = lhs - typedef lhs type; - }; - */ - template void conformable(const Lattice &lhs,const Lattice &rhs) { @@ -313,28 +301,6 @@ public: uint32_t vec_len = lhs._grid->oSites(); #pragma omp parallel for for(int ss=0;ss could also allocate haloes which get used for stencil code. +// +// Grid could create a neighbour index table for a given stencil. +// Could also implement CovariantCshift. +////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////// // Gather for when there is no need to SIMD split @@ -8,8 +20,6 @@ friend void Gather_plane_simple (Lattice &rhs,std::vector_rdimensions[dimension]; - // printf("Gather plane _simple mask %d\n",cbmask); - if ( !rhs._grid->CheckerBoarded(dimension) ) { int so = plane*rhs._grid->_ostride[dimension]; // base offset for start of plane @@ -31,7 +41,6 @@ friend void Gather_plane_simple (Lattice &rhs,std::vector_slice_nblock[dimension];n++){ for(int b=0;b_slice_block[dimension];b++){ @@ -39,11 +48,6 @@ friend void Gather_plane_simple (Lattice &rhs,std::vectorCheckerBoardFromOsite(o+b);// Could easily be a table lookup if ( ocb &cbmask ) { buffer[bo]=rhs._odata[so+o+b]; - // float * ptr = (float *)& rhs._odata[so+o+b]; - // if( (cbmask!=3)&&(jjj<8)){ - // printf("Gather_plane_simple %d %le bo %d\n",so+o+b,*ptr,bo); - // jjj++; - // } bo++; } @@ -215,7 +219,7 @@ friend void Copy_plane(Lattice& lhs,Lattice &rhs, int dimension,int int ro = rplane*rhs._grid->_ostride[dimension]; // base offset for start of plane int lo = lplane*lhs._grid->_ostride[dimension]; // base offset for start of plane int o = 0; // relative offset to base within plane - // int jjj=0; + #pragma omp parallel for collapse(2) for(int n=0;n_slice_nblock[dimension];n++){ for(int b=0;b_slice_block[dimension];b++){ @@ -224,11 +228,6 @@ friend void Copy_plane(Lattice& lhs,Lattice &rhs, int dimension,int if ( ocb&cbmask ) { lhs._odata[lo+o+b]=rhs._odata[ro+o+b]; - // float *ptr =(float *) &rhs._odata[ro+o+b]; - // if((cbmask!=0x3)&&jjj<8) { - // printf("Copy_plane %d %le n,b=%d,%d mask %d ocb %d\n",ro+o+b,*ptr,n,b,cbmask,ocb); - // jjj++; - // } } } diff --git a/Grid_cshift_mpi.h b/Grid_cshift_mpi.h index 4029f152..17aff2f4 100644 --- a/Grid_cshift_mpi.h +++ b/Grid_cshift_mpi.h @@ -1,36 +1,10 @@ #ifndef _GRID_MPI_CSHIFT_H_ #define _GRID_MPI_CSHIFT_H_ +#ifndef MAX #define MAX(x,y) ((x)>(y)?(x):(y)) #define MIN(x,y) ((x)>(y)?(y):(x)) -////////////////////////////////////////////////////////////////////////////////////////// -// Must not lose sight that goal is to be able to construct really efficient -// gather to a point stencil code. CSHIFT is not the best way, so probably need -// additional stencil support. -// -// Could still do a templated syntax tree and make CSHIFT return lattice vector. -// -// Stencil based code could pre-exchange haloes and use a table lookup for neighbours -// -// Lattice could also allocate haloes which get used for stencil code. -// -// Grid could create a neighbour index table for a given stencil. -// Could also implement CovariantCshift. -////////////////////////////////////////////////////////////////////////////////////////// - - -///////////////////////////////////////////////////////////// -// Q. Further split this into separate sub functions? -///////////////////////////////////////////////////////////// - -// CshiftCB_local -// CshiftCB_local_permute - -// Cshift_comms_splice -// Cshift_comms -// Cshift_local -// Cshift_local_permute - +#endif friend Lattice Cshift(Lattice &rhs,int dimension,int shift) { @@ -71,16 +45,10 @@ friend void Cshift_comms(Lattice& ret,Lattice &rhs,int dimension,int sshift[1] = rhs._grid->CheckerBoardShift(rhs.checkerboard,dimension,shift,1); if ( sshift[0] == sshift[1] ) { - // printf("Cshift_comms : single pass\n"); Cshift_comms(ret,rhs,dimension,shift,0x3); } else { - // printf("Cshift_comms : two pass\n"); - // printf("call1\n"); Cshift_comms(ret,rhs,dimension,shift,0x1);// if checkerboard is unfavourable take two passes - // printf("call2\n"); Cshift_comms(ret,rhs,dimension,shift,0x2);// both with block stride loop iteration - // printf("done\n"); - } } @@ -94,11 +62,8 @@ friend void Cshift_comms_simd(Lattice& ret,Lattice &rhs,int dimensio if ( sshift[0] == sshift[1] ) { Cshift_comms_simd(ret,rhs,dimension,shift,0x3); } else { - // printf("call1 0x1 cb=even\n"); Cshift_comms_simd(ret,rhs,dimension,shift,0x1);// if checkerboard is unfavourable take two passes - // printf("call2 0x2 cb=odd\n"); Cshift_comms_simd(ret,rhs,dimension,shift,0x2);// both with block stride loop iteration - // printf("done\n"); } } @@ -120,13 +85,10 @@ friend void Cshift_comms(Lattice &ret,Lattice &rhs,int dimension,int assert(shift>=0); assert(shift_slice_nblock[dimension]*rhs._grid->_slice_block[dimension]; std::vector > send_buf(buffer_size); std::vector > recv_buf(buffer_size); - // This code could be simplified by multiple calls to single routine with extra params to - // encapsulate the difference in the code paths. int cb= (cbmask==0x2)? 1 : 0; int sshift= rhs._grid->CheckerBoardShift(rhs.checkerboard,dimension,shift,cb); @@ -137,8 +99,9 @@ friend void Cshift_comms(Lattice &ret,Lattice &rhs,int dimension,int int comm_proc = (x+sshift)/rd; if (!offnode) { - // printf("local x %d sshift %d offnode %d rd %d cb %d\n",x,sshift,offnode,rd,cb); + Copy_plane(ret,rhs,dimension,x,sx,cbmask); + } else { int words = send_buf.size(); @@ -146,29 +109,13 @@ friend void Cshift_comms(Lattice &ret,Lattice &rhs,int dimension,int int bytes = words * sizeof(vobj); - // printf("nonlocal x %d sx %d sshift %d offnode %d rd %d cb %d cbmask %d rhscb %d comm_proc %d\n", - // x,sx,sshift,offnode,rd,cb,cbmask,rhs.checkerboard,comm_proc); - // Copy_plane(temp,rhs,dimension,x,sx,cbmask); - - // Bug found; cbmask may differ between sx plan and rx plane. Gather_plane_simple (rhs,send_buf,dimension,sx,cbmask); - // for(int i=0;i_processor; int recv_from_rank; int xmit_to_rank; grid->ShiftedRanks(dimension,comm_proc,xmit_to_rank,recv_from_rank); - // printf("bytes %d node %d sending to %d receiving from %d\n",bytes,rank,xmit_to_rank,recv_from_rank ); grid->SendToRecvFrom((void *)&send_buf[0], xmit_to_rank, (void *)&recv_buf[0], @@ -224,46 +171,29 @@ friend void Cshift_comms_simd(Lattice &ret,Lattice &rhs,int dimensi int cb = (cbmask==0x2)? 1 : 0; int sshift= grid->CheckerBoardShift(rhs.checkerboard,dimension,shift,cb); - // printf("cshift-comms-simd: shift = %d ; sshift = %d ; cbmask %d ; simd_layout %d\n",shift,sshift,cbmask,simd_layout); std::vector comm_offnode(simd_layout); std::vector comm_proc (simd_layout); //relative processor coord in dim=dimension - // Strategy - // - //* Loop over source planes - //* if any communication needed extract and send - //* if communication needed extract and send - for(int x=0;x= ld; comm_any = comm_any | comm_offnode[s]; comm_proc[s] = shifted_x/ld; - // printf("rd %d x %d shifted %d s=%d comm_any %d\n",rd, x,shifted_x,s,comm_any); } int o = 0; int bo = x*grid->_ostride[dimension]; int sx = (x+sshift)%rd; - // Need Convenience function in _grid. Move this in if ( comm_any ) { for(int i=0;i &ret,Lattice &rhs,int dimensi recv_from_rank, bytes); - // printf("Cshift_simd comms %d %le %le\n",i,real(recv_buf_extract[i][0]),real(send_buf_extract[i][0])); - rpointers[i] = (scalar_type *)&recv_buf_extract[i][0]; } else { rpointers[i] = (scalar_type *)&send_buf_extract[i][0]; - // printf("Cshift_simd local %d %le \n",i,real(send_buf_extract[i][0])); } @@ -311,7 +238,6 @@ friend void Cshift_comms_simd(Lattice &ret,Lattice &rhs,int dimensi } else { pointers[i] = rpointers[i]; } - // printf("Cshift_simd perm %d num %d wrap %d swiz %d %le unswiz %le\n",permute_slice,num,wrap,i,real(pointers[i][0]),real(rpointers[i][0])); } Scatter_plane_merge(ret,pointers,dimension,x,cbmask); diff --git a/Grid_math_types.h b/Grid_math_types.h index 533ce424..3f7714d5 100644 --- a/Grid_math_types.h +++ b/Grid_math_types.h @@ -830,7 +830,7 @@ template inline iMatrix adj(const iMatrix & ///////////////////////////////////////////////////////////////// // Can only take the real/imag part of scalar objects, since -// lattice objects of different complexity are non-conformable. +// lattice objects of different complex nature are non-conformable. ///////////////////////////////////////////////////////////////// template inline auto real(const iScalar &z) -> iScalar { diff --git a/Grid_mpi.cc b/Grid_mpi.cc index e00e3dce..c9b91210 100644 --- a/Grid_mpi.cc +++ b/Grid_mpi.cc @@ -17,13 +17,15 @@ CartesianCommunicator::CartesianCommunicator(std::vector &processors) MPI_Cart_create(MPI_COMM_WORLD, _ndimension,&_processors[0],&periodic[0],1,&communicator); MPI_Comm_rank(communicator,&_processor); MPI_Cart_coords(communicator,_processor,_ndimension,&_processor_coor[0]); - printf("Hello world from processor ["); + for(int i=0;i<_ndimension;i++){ - printf("%d ",_processor_coor[i]); _Nprocessors*=_processors[i]; } - printf("]\n"); - fflush(stdout); + + int Size; + MPI_Comm_size(communicator,&Size); + + assert(Size==_Nprocessors); } void CartesianCommunicator::GlobalSumF(float &f){ @@ -71,15 +73,3 @@ void CartesianCommunicator::SendToRecvFrom(void *xmit, } -#if 0 - -// Could possibly do a direct block strided send? - int MPI_Type_vector( - int count, - int blocklength, - int stride, - MPI_Datatype old_type, - MPI_Datatype *newtype_p - ); - -#endif diff --git a/Grid_vComplexD.h b/Grid_vComplexD.h index f575f3ad..d1caefb1 100644 --- a/Grid_vComplexD.h +++ b/Grid_vComplexD.h @@ -48,7 +48,6 @@ namespace Grid { #endif #ifdef AVX512 ret.v = _mm512_add_pd(a.v,b.v); - //printf("%s %f\n",__func__,_mm512_reduce_mul_pd(ret.v)); #endif #ifdef QPX ret.v = vec_add(a.v,b.v); @@ -210,7 +209,7 @@ namespace Grid { #ifdef QPX #error // Not implemented yet #endif - default: exit(EXIT_FAILURE); break; + default: assert(0); break; } }; void vload(zvec& a){ @@ -265,8 +264,7 @@ friend inline void vstore(vComplexD &ret, ComplexD *a){ //Note v has a3 a2 a1 a0 #endif #ifdef QPX - printf("%s Not implemented\n",__func__); - exit(-1); + assert(0); #endif } friend inline void vprefetch(const vComplexD &v) @@ -294,7 +292,7 @@ friend inline void vstore(vComplexD &ret, ComplexD *a){ #endif #ifdef QPX - exit(0); // not implemented + assert(0); #endif return ret; } diff --git a/Grid_vComplexF.h b/Grid_vComplexF.h index 6c3e9eac..578228a3 100644 --- a/Grid_vComplexF.h +++ b/Grid_vComplexF.h @@ -193,7 +193,7 @@ namespace Grid { #ifdef QPX #error #endif - default: exit(EXIT_FAILURE); break; + default: assert(0); break; } }; @@ -235,8 +235,7 @@ friend inline void vstore(vComplexF &ret, ComplexF *a){ //Note v has a3 a2 a1 a0 #endif #ifdef QPX - printf("%s Not implemented\n",__func__); -exit(-1); + assert(0); #endif } friend inline void vprefetch(const vComplexF &v) @@ -333,7 +332,7 @@ exit(-1); ret.v = _mm512_mask_sub_ps(in.v,0xaaaa,ret.v,in.v); // Zero out 0+real 0-imag #endif #ifdef QPX - exit(0); // not implemented + assert(0); #endif return ret; } diff --git a/Grid_vRealD.h b/Grid_vRealD.h index 34150e4b..1abc0804 100644 --- a/Grid_vRealD.h +++ b/Grid_vRealD.h @@ -161,7 +161,7 @@ namespace Grid { #ifdef QPX #error #endif - default: exit(EXIT_FAILURE); break; + default: assert(0);break; } }; // gona be bye bye @@ -214,8 +214,7 @@ namespace Grid { // Note v has a7 a6 a5ba4 a3 a2 a1 a0 #endif #ifdef QPX - printf("%s Not implemented\n",__func__); - exit(-1); + assert(0); #endif } friend inline void vprefetch(const vRealD &v) diff --git a/Grid_vRealF.h b/Grid_vRealF.h index 94875f7e..22809b83 100644 --- a/Grid_vRealF.h +++ b/Grid_vRealF.h @@ -185,7 +185,7 @@ namespace Grid { #ifdef QPX #error not implemented #endif - default: exit(EXIT_FAILURE); break; + default: assert(0); break; } }; @@ -236,8 +236,7 @@ friend inline void vstore(vRealF &ret, float *a){ // Note v has a7 a6 a5ba4 a3 a2 a1 a0 #endif #ifdef QPX - printf("%s Not implemented\n",__func__); - exit(-1); + assert(0); #endif }