1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-04-09 21:50:45 +01:00

Merge branch 'develop' of github.com:paboyle/Grid into develop

This commit is contained in:
Antonin Portelli 2017-12-01 19:44:31 +00:00
commit 2fd4989029
7 changed files with 190 additions and 166 deletions

View File

@ -308,32 +308,34 @@ namespace Grid {
public: public:
SchurStaggeredOperator (Matrix &Mat): _Mat(Mat){}; SchurStaggeredOperator (Matrix &Mat): _Mat(Mat){};
virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
GridLogIterative.TimingMode(1);
std::cout << GridLogIterative << " HermOpAndNorm "<<std::endl;
n2 = Mpc(in,out); n2 = Mpc(in,out);
std::cout << GridLogIterative << " HermOpAndNorm.Mpc "<<std::endl;
ComplexD dot= innerProduct(in,out); ComplexD dot= innerProduct(in,out);
std::cout << GridLogIterative << " HermOpAndNorm.innerProduct "<<std::endl;
n1 = real(dot); n1 = real(dot);
} }
virtual void HermOp(const Field &in, Field &out){ virtual void HermOp(const Field &in, Field &out){
std::cout << GridLogIterative << " HermOp "<<std::endl;
Mpc(in,out); Mpc(in,out);
} }
virtual RealD Mpc (const Field &in, Field &out) { virtual RealD Mpc (const Field &in, Field &out) {
Field tmp(in._grid); Field tmp(in._grid);
Field tmp2(in._grid); Field tmp2(in._grid);
std::cout << GridLogIterative << " HermOp.Mpc "<<std::endl;
_Mat.Mooee(in,out); _Mat.Mooee(in,out);
_Mat.Mooee(out,tmp); _Mat.Mooee(out,tmp);
std::cout << GridLogIterative << " HermOp.MooeeMooee "<<std::endl;
_Mat.Meooe(in,out); _Mat.Meooe(in,out);
_Mat.Meooe(out,tmp2); _Mat.Meooe(out,tmp2);
std::cout << GridLogIterative << " HermOp.MeooeMeooe "<<std::endl;
return axpy_norm(out,-1.0,tmp2,tmp); RealD nn=axpy_norm(out,-1.0,tmp2,tmp);
#if 0 std::cout << GridLogIterative << " HermOp.axpy_norm "<<std::endl;
//... much prefer conventional Schur norm return nn;
_Mat.Meooe(in,tmp);
_Mat.MooeeInv(tmp,out);
_Mat.Meooe(out,tmp);
_Mat.Mooee(in,out);
return axpy_norm(out,-1.0,tmp,out);
#endif
} }
virtual RealD MpcDag (const Field &in, Field &out){ virtual RealD MpcDag (const Field &in, Field &out){
return Mpc(in,out); return Mpc(in,out);

View File

@ -123,11 +123,14 @@ namespace Grid {
Field tmp(grid); Field tmp(grid);
Field Mtmp(grid); Field Mtmp(grid);
Field resid(fgrid); Field resid(fgrid);
std::cout << GridLogMessage << " SchurRedBlackStaggeredSolve " <<std::endl;
pickCheckerboard(Even,src_e,in); pickCheckerboard(Even,src_e,in);
pickCheckerboard(Odd ,src_o,in); pickCheckerboard(Odd ,src_o,in);
pickCheckerboard(Even,sol_e,out); pickCheckerboard(Even,sol_e,out);
pickCheckerboard(Odd ,sol_o,out); pickCheckerboard(Odd ,sol_o,out);
std::cout << GridLogMessage << " SchurRedBlackStaggeredSolve checkerboards picked" <<std::endl;
///////////////////////////////////////////////////// /////////////////////////////////////////////////////
// src_o = (source_o - Moe MeeInv source_e) // src_o = (source_o - Moe MeeInv source_e)
@ -144,6 +147,7 @@ namespace Grid {
////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////
std::cout<<GridLogMessage << "SchurRedBlackStaggeredSolver calling the Mpc solver" <<std::endl; std::cout<<GridLogMessage << "SchurRedBlackStaggeredSolver calling the Mpc solver" <<std::endl;
_HermitianRBSolver(_HermOpEO,src_o,sol_o); assert(sol_o.checkerboard==Odd); _HermitianRBSolver(_HermOpEO,src_o,sol_o); assert(sol_o.checkerboard==Odd);
std::cout<<GridLogMessage << "SchurRedBlackStaggeredSolver called the Mpc solver" <<std::endl;
/////////////////////////////////////////////////// ///////////////////////////////////////////////////
// sol_e = M_ee^-1 * ( src_e - Meo sol_o )... // sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
@ -152,15 +156,16 @@ namespace Grid {
src_e = src_e-tmp; assert( src_e.checkerboard ==Even); src_e = src_e-tmp; assert( src_e.checkerboard ==Even);
_Matrix.MooeeInv(src_e,sol_e); assert( sol_e.checkerboard ==Even); _Matrix.MooeeInv(src_e,sol_e); assert( sol_e.checkerboard ==Even);
std::cout<<GridLogMessage << "SchurRedBlackStaggeredSolver reconstructed other CB" <<std::endl;
setCheckerboard(out,sol_e); assert( sol_e.checkerboard ==Even); setCheckerboard(out,sol_e); assert( sol_e.checkerboard ==Even);
setCheckerboard(out,sol_o); assert( sol_o.checkerboard ==Odd ); setCheckerboard(out,sol_o); assert( sol_o.checkerboard ==Odd );
std::cout<<GridLogMessage << "SchurRedBlackStaggeredSolver inserted solution" <<std::endl;
// Verify the unprec residual // Verify the unprec residual
_Matrix.M(out,resid); _Matrix.M(out,resid);
resid = resid-in; resid = resid-in;
RealD ns = norm2(in); RealD ns = norm2(in);
RealD nr = norm2(resid); RealD nr = norm2(resid);
std::cout<<GridLogMessage << "SchurRedBlackStaggered solver true unprec resid "<< std::sqrt(nr/ns) <<" nr "<< nr <<" ns "<<ns << std::endl; std::cout<<GridLogMessage << "SchurRedBlackStaggered solver true unprec resid "<< std::sqrt(nr/ns) <<" nr "<< nr <<" ns "<<ns << std::endl;
} }
}; };

View File

@ -134,8 +134,18 @@ void CartesianCommunicator::AllToAll(void *in,void *out,uint64_t words,uint64_t
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,const CartesianCommunicator &parent,int &srank) CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,const CartesianCommunicator &parent,int &srank)
{ {
_ndimension = processors.size(); _ndimension = processors.size();
assert(_ndimension = parent._ndimension);
int parent_ndimension = parent._ndimension; assert(_ndimension >= parent._ndimension);
std::vector<int> parent_processor_coor(_ndimension,0);
std::vector<int> parent_processors (_ndimension,1);
// Can make 5d grid from 4d etc...
int pad = _ndimension-parent_ndimension;
for(int d=0;d<parent_ndimension;d++){
parent_processor_coor[pad+d]=parent._processor_coor[d];
parent_processors [pad+d]=parent._processors[d];
}
////////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////////
// split the communicator // split the communicator
////////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////////
@ -154,9 +164,9 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,
std::vector<int> ssize(_ndimension); // coor of split within parent std::vector<int> ssize(_ndimension); // coor of split within parent
for(int d=0;d<_ndimension;d++){ for(int d=0;d<_ndimension;d++){
ccoor[d] = parent._processor_coor[d] % processors[d]; ccoor[d] = parent_processor_coor[d] % processors[d];
scoor[d] = parent._processor_coor[d] / processors[d]; scoor[d] = parent_processor_coor[d] / processors[d];
ssize[d] = parent._processors[d] / processors[d]; ssize[d] = parent_processors[d] / processors[d];
} }
int crank; // rank within subcomm ; srank is rank of subcomm within blocks of subcomms int crank; // rank within subcomm ; srank is rank of subcomm within blocks of subcomms
// Mpi uses the reverse Lexico convention to us // Mpi uses the reverse Lexico convention to us
@ -166,38 +176,36 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,
MPI_Comm comm_split; MPI_Comm comm_split;
if ( Nchild > 1 ) { if ( Nchild > 1 ) {
/* if(0){
std::cout << GridLogMessage<<"Child communicator of "<< std::hex << parent.communicator << std::dec<<std::endl; std::cout << GridLogMessage<<"Child communicator of "<< std::hex << parent.communicator << std::dec<<std::endl;
std::cout << GridLogMessage<<" parent grid["<< parent._ndimension<<"] "; std::cout << GridLogMessage<<" parent grid["<< parent._ndimension<<"] ";
for(int d=0;d<parent._processors.size();d++) std::cout << parent._processors[d] << " "; for(int d=0;d<parent._ndimension;d++) std::cout << parent._processors[d] << " ";
std::cout<<std::endl; std::cout<<std::endl;
std::cout << GridLogMessage<<" child grid["<< _ndimension <<"] "; std::cout << GridLogMessage<<" child grid["<< _ndimension <<"] ";
for(int d=0;d<processors.size();d++) std::cout << processors[d] << " "; for(int d=0;d<processors.size();d++) std::cout << processors[d] << " ";
std::cout<<std::endl; std::cout<<std::endl;
std::cout << GridLogMessage<<" old rank "<< parent._processor<<" coor ["<< _ndimension <<"] "; std::cout << GridLogMessage<<" old rank "<< parent._processor<<" coor ["<< parent._ndimension <<"] ";
for(int d=0;d<processors.size();d++) std::cout << parent._processor_coor[d] << " "; for(int d=0;d<parent._ndimension;d++) std::cout << parent._processor_coor[d] << " ";
std::cout<<std::endl; std::cout<<std::endl;
std::cout << GridLogMessage<<" new rank "<< crank<<" coor ["<< _ndimension <<"] "; std::cout << GridLogMessage<<" new split "<< srank<<" scoor ["<< _ndimension <<"] ";
for(int d=0;d<processors.size();d++) std::cout << ccoor[d] << " "; for(int d=0;d<processors.size();d++) std::cout << scoor[d] << " ";
std::cout<<std::endl; std::cout<<std::endl;
std::cout << GridLogMessage<<" new coor ["<< _ndimension <<"] "; std::cout << GridLogMessage<<" new rank "<< crank<<" coor ["<< _ndimension <<"] ";
for(int d=0;d<processors.size();d++) std::cout << parent._processor_coor[d] << " "; for(int d=0;d<processors.size();d++) std::cout << ccoor[d] << " ";
std::cout<<std::endl; std::cout<<std::endl;
*/ }
int ierr= MPI_Comm_split(parent.communicator,srank,crank,&comm_split); int ierr= MPI_Comm_split(parent.communicator,srank,crank,&comm_split);
assert(ierr==0); assert(ierr==0);
////////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////////
// Declare victory // Declare victory
////////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////////
/* // std::cout << GridLogMessage<<"Divided communicator "<< parent._Nprocessors<<" into "
std::cout << GridLogMessage<<"Divided communicator "<< parent._Nprocessors<<" into " // << Nchild <<" communicators with " << childsize << " ranks"<<std::endl;
<< Nchild <<" communicators with " << childsize << " ranks"<<std::endl;
*/
} else { } else {
comm_split=parent.communicator; comm_split=parent.communicator;
srank = 0; srank = 0;
@ -207,6 +215,17 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,
// Set up from the new split communicator // Set up from the new split communicator
////////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////////
InitFromMPICommunicator(processors,comm_split); InitFromMPICommunicator(processors,comm_split);
if(0){
std::cout << " ndim " <<_ndimension<<" " << parent._ndimension << std::endl;
for(int d=0;d<processors.size();d++){
std::cout << d<< " " << _processor_coor[d] <<" " << ccoor[d]<<std::endl;
}
}
for(int d=0;d<processors.size();d++){
assert(_processor_coor[d] == ccoor[d] );
}
} }
////////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////////
@ -231,7 +250,7 @@ void CartesianCommunicator::InitFromMPICommunicator(const std::vector<int> &proc
MPI_Comm_rank(communicator,&_processor); MPI_Comm_rank(communicator,&_processor);
MPI_Cart_coords(communicator,_processor,_ndimension,&_processor_coor[0]); MPI_Cart_coords(communicator,_processor,_ndimension,&_processor_coor[0]);
if ( communicator_base != communicator_world ) { if ( 0 && (communicator_base != communicator_world) ) {
std::cout << "InitFromMPICommunicator Cartesian communicator created with a non-world communicator"<<std::endl; std::cout << "InitFromMPICommunicator Cartesian communicator created with a non-world communicator"<<std::endl;
std::cout << " new communicator rank "<<_processor<< " coor ["<<_ndimension<<"] "; std::cout << " new communicator rank "<<_processor<< " coor ["<<_ndimension<<"] ";

View File

@ -606,7 +606,7 @@ CartesianCommunicator::~CartesianCommunicator()
MPI_Finalized(&MPI_is_finalised); MPI_Finalized(&MPI_is_finalised);
if (communicator && !MPI_is_finalised) { if (communicator && !MPI_is_finalised) {
MPI_Comm_free(&communicator); MPI_Comm_free(&communicator);
for(int i=0;i< communicator_halo.size();i++){ for(int i=0;i<communicator_halo.size();i++){
MPI_Comm_free(&communicator_halo[i]); MPI_Comm_free(&communicator_halo[i]);
} }
} }

View File

@ -50,26 +50,22 @@ inline void subdivides(GridBase *coarse,GridBase *fine)
//////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////
template<class vobj> inline void pickCheckerboard(int cb,Lattice<vobj> &half,const Lattice<vobj> &full){ template<class vobj> inline void pickCheckerboard(int cb,Lattice<vobj> &half,const Lattice<vobj> &full){
half.checkerboard = cb; half.checkerboard = cb;
int ssh=0;
//parallel_for parallel_for(int ss=0;ss<full._grid->oSites();ss++){
for(int ss=0;ss<full._grid->oSites();ss++){
std::vector<int> coor;
int cbos; int cbos;
std::vector<int> coor;
full._grid->oCoorFromOindex(coor,ss); full._grid->oCoorFromOindex(coor,ss);
cbos=half._grid->CheckerBoard(coor); cbos=half._grid->CheckerBoard(coor);
if (cbos==cb) { if (cbos==cb) {
int ssh=half._grid->oIndex(coor);
half._odata[ssh] = full._odata[ss]; half._odata[ssh] = full._odata[ss];
ssh++;
} }
} }
} }
template<class vobj> inline void setCheckerboard(Lattice<vobj> &full,const Lattice<vobj> &half){ template<class vobj> inline void setCheckerboard(Lattice<vobj> &full,const Lattice<vobj> &half){
int cb = half.checkerboard; int cb = half.checkerboard;
int ssh=0; parallel_for(int ss=0;ss<full._grid->oSites();ss++){
//parallel_for
for(int ss=0;ss<full._grid->oSites();ss++){
std::vector<int> coor; std::vector<int> coor;
int cbos; int cbos;
@ -77,8 +73,8 @@ inline void subdivides(GridBase *coarse,GridBase *fine)
cbos=half._grid->CheckerBoard(coor); cbos=half._grid->CheckerBoard(coor);
if (cbos==cb) { if (cbos==cb) {
int ssh=half._grid->oIndex(coor);
full._odata[ss]=half._odata[ssh]; full._odata[ss]=half._odata[ssh];
ssh++;
} }
} }
} }
@ -698,30 +694,6 @@ void precisionChange(Lattice<VobjOut> &out, const Lattice<VobjIn> &in){
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// Communicate between grids // Communicate between grids
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
//
// All to all plan
//
// Subvolume on fine grid is v. Vectors a,b,c,d
//
///////////////////////////////////////////////////////////////////////////////////////////////////////////
// SIMPLEST CASE:
///////////////////////////////////////////////////////////////////////////////////////////////////////////
// Mesh of nodes (2) ; subdivide to 1 subdivisions
//
// Lex ord:
// N0 va0 vb0 N1 va1 vb1
//
// For each dimension do an all to all
//
// full AllToAll(0)
// N0 va0 va1 N1 vb0 vb1
//
// REARRANGE
// N0 va01 N1 vb01
//
// Must also rearrange data to get into the NEW lex order of grid at each stage. Some kind of "insert/extract".
// NB: Easiest to programme if keep in lex order.
//
/////////////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////////////
// SIMPLE CASE: // SIMPLE CASE:
/////////////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////////////
@ -755,9 +727,17 @@ void precisionChange(Lattice<VobjOut> &out, const Lattice<VobjIn> &in){
// //
// Must also rearrange data to get into the NEW lex order of grid at each stage. Some kind of "insert/extract". // Must also rearrange data to get into the NEW lex order of grid at each stage. Some kind of "insert/extract".
// NB: Easiest to programme if keep in lex order. // NB: Easiest to programme if keep in lex order.
// /*
///////////////////////////////////////////////////////// * Let chunk = (fvol*nvec)/sP be size of a chunk. ( Divide lexico vol * nvec into fP/sP = M chunks )
*
* 2nd A2A (over sP nodes; subdivide the fP into sP chunks of M)
*
* node 0 1st chunk of node 0M..(1M-1); 2nd chunk of node 0M..(1M-1).. data chunk x M x sP = fL / sP * M * sP = fL * M growth
* node 1 1st chunk of node 1M..(2M-1); 2nd chunk of node 1M..(2M-1)..
* node 2 1st chunk of node 2M..(3M-1); 2nd chunk of node 2M..(3M-1)..
* node 3 1st chunk of node 3M..(3M-1); 2nd chunk of node 2M..(3M-1)..
* etc...
*/
template<class Vobj> template<class Vobj>
void Grid_split(std::vector<Lattice<Vobj> > & full,Lattice<Vobj> & split) void Grid_split(std::vector<Lattice<Vobj> > & full,Lattice<Vobj> & split)
{ {
@ -816,57 +796,58 @@ void Grid_split(std::vector<Lattice<Vobj> > & full,Lattice<Vobj> & split)
int nvec = nvector; // Counts down to 1 as we collapse dims int nvec = nvector; // Counts down to 1 as we collapse dims
std::vector<int> ldims = full_grid->_ldimensions; std::vector<int> ldims = full_grid->_ldimensions;
std::vector<int> lcoor(ndim);
for(int d=ndim-1;d>=0;d--){ for(int d=ndim-1;d>=0;d--){
if ( ratio[d] != 1 ) { if ( ratio[d] != 1 ) {
full_grid ->AllToAll(d,alldata,tmpdata); full_grid ->AllToAll(d,alldata,tmpdata);
// std::cout << GridLogMessage << "Grid_split: dim " <<d<<" ratio "<<ratio[d]<<" nvec "<<nvec<<" procs "<<split_grid->_processors[d]<<std::endl; if ( split_grid->_processors[d] > 1 ) {
// for(int v=0;v<nvec;v++){ alldata=tmpdata;
// std::cout << "Grid_split: alldata["<<v<<"] " << alldata[v] <<std::endl; split_grid->AllToAll(d,alldata,tmpdata);
// std::cout << "Grid_split: tmpdata["<<v<<"] " << tmpdata[v] <<std::endl; }
// }
//////////////////////////////////////////
//Local volume for this dimension is expanded by ratio of processor extents
// Number of vectors is decreased by same factor
// Rearrange to lexico for bigger volume
//////////////////////////////////////////
nvec /= ratio[d];
auto rdims = ldims; rdims[d] *= ratio[d]; auto rdims = ldims;
auto rsites= lsites*ratio[d]; auto M = ratio[d];
for(int v=0;v<nvec;v++){ auto rsites= lsites*M;// increases rsites by M
nvec /= M; // Reduce nvec by subdivision factor
rdims[d] *= M; // increase local dim by same factor
// For loop over each site within old subvol int sP = split_grid->_processors[d];
for(int lsite=0;lsite<lsites;lsite++){ int fP = full_grid->_processors[d];
Lexicographic::CoorFromIndex(lcoor, lsite, ldims); int fvol = lsites;
int chunk = (nvec*fvol)/sP; assert(chunk*sP == nvec*fvol);
for(int r=0;r<ratio[d];r++){ // ratio*nvec terms // Loop over reordered data post A2A
parallel_for(int c=0;c<chunk;c++){
for(int m=0;m<M;m++){
for(int s=0;s<sP;s++){
// addressing; use lexico
int lex_r;
uint64_t lex_c = c+chunk*m+chunk*M*s;
uint64_t lex_fvol_vec = c+chunk*s;
uint64_t lex_fvol = lex_fvol_vec%fvol;
uint64_t lex_vec = lex_fvol_vec/fvol;
auto rcoor = lcoor; rcoor[d] += r*ldims[d]; // which node sets an adder to the coordinate
std::vector<int> coor(ndim);
Lexicographic::CoorFromIndex(coor, lex_fvol, ldims);
coor[d] += m*ldims[d];
Lexicographic::IndexFromCoor(coor, lex_r, rdims);
lex_r += lex_vec * rsites;
int rsite; Lexicographic::IndexFromCoor(rcoor, rsite, rdims); // LexicoFind coordinate & vector number within split lattice
rsite += v * rsites; alldata[lex_r] = tmpdata[lex_c];
int rmul=nvec*lsites;
int vmul= lsites;
alldata[rsite] = tmpdata[lsite+r*rmul+v*vmul];
// if ( lsite==0 ) {
// std::cout << "Grid_split: grow alldata["<<rsite<<"] " << alldata[rsite] << " <- tmpdata["<< lsite+r*rmul+v*vmul<<"] "<<tmpdata[lsite+r*rmul+v*vmul] <<std::endl;
// }
} }
} }
} }
ldims[d]*= ratio[d]; ldims[d]*= ratio[d];
lsites *= ratio[d]; lsites *= ratio[d];
if ( split_grid->_processors[d] > 1 ) {
tmpdata = alldata;
split_grid->AllToAll(d,tmpdata,alldata);
}
} }
} }
vectorizeFromLexOrdArray(alldata,split); vectorizeFromLexOrdArray(alldata,split);
@ -937,59 +918,61 @@ void Grid_unsplit(std::vector<Lattice<Vobj> > & full,Lattice<Vobj> & split)
///////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////
// Start from split grid and work towards full grid // Start from split grid and work towards full grid
///////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////
std::vector<int> lcoor(ndim);
std::vector<int> rcoor(ndim);
int nvec = 1; int nvec = 1;
lsites = split_grid->lSites(); uint64_t rsites = split_grid->lSites();
std::vector<int> ldims = split_grid->_ldimensions; std::vector<int> rdims = split_grid->_ldimensions;
// for(int d=ndim-1;d>=0;d--){
for(int d=0;d<ndim;d++){ for(int d=0;d<ndim;d++){
if ( ratio[d] != 1 ) { if ( ratio[d] != 1 ) {
auto M = ratio[d];
if ( split_grid->_processors[d] > 1 ) { int sP = split_grid->_processors[d];
tmpdata = alldata; int fP = full_grid->_processors[d];
split_grid->AllToAll(d,tmpdata,alldata);
} auto ldims = rdims; ldims[d] /= M; // Decrease local dims by same factor
auto lsites= rsites/M; // Decreases rsites by M
//////////////////////////////////////////
//Local volume for this dimension is expanded by ratio of processor extents int fvol = lsites;
// Number of vectors is decreased by same factor int chunk = (nvec*fvol)/sP; assert(chunk*sP == nvec*fvol);
// Rearrange to lexico for bigger volume
////////////////////////////////////////// {
auto rsites= lsites/ratio[d]; // Loop over reordered data post A2A
auto rdims = ldims; rdims[d]/=ratio[d]; for(int c=0;c<chunk;c++){
for(int m=0;m<M;m++){
for(int v=0;v<nvec;v++){ for(int s=0;s<sP;s++){
// rsite, rcoor --> smaller local volume // addressing; use lexico
// lsite, lcoor --> bigger original (single node?) volume int lex_r;
// For loop over each site within smaller subvol uint64_t lex_c = c+chunk*m+chunk*M*s;
for(int rsite=0;rsite<rsites;rsite++){ uint64_t lex_fvol_vec = c+chunk*s;
uint64_t lex_fvol = lex_fvol_vec%fvol;
Lexicographic::CoorFromIndex(rcoor, rsite, rdims); uint64_t lex_vec = lex_fvol_vec/fvol;
int lsite;
// which node sets an adder to the coordinate
for(int r=0;r<ratio[d];r++){ std::vector<int> coor(ndim);
Lexicographic::CoorFromIndex(coor, lex_fvol, ldims);
lcoor = rcoor; lcoor[d] += r*rdims[d]; coor[d] += m*ldims[d];
Lexicographic::IndexFromCoor(lcoor, lsite, ldims); lsite += v * lsites; Lexicographic::IndexFromCoor(coor, lex_r, rdims);
lex_r += lex_vec * rsites;
int rmul=nvec*rsites;
int vmul= rsites; // LexicoFind coordinate & vector number within split lattice
tmpdata[rsite+r*rmul+v*vmul]=alldata[lsite]; tmpdata[lex_c] = alldata[lex_r];
}
} }
} }
} }
nvec *= ratio[d];
ldims[d]=rdims[d];
lsites =rsites;
if ( split_grid->_processors[d] > 1 ) {
split_grid->AllToAll(d,tmpdata,alldata);
tmpdata=alldata;
}
full_grid ->AllToAll(d,tmpdata,alldata); full_grid ->AllToAll(d,tmpdata,alldata);
rdims[d]/= M;
rsites /= M;
nvec *= M; // Increase nvec by subdivision factor
} }
} }
@ -997,12 +980,12 @@ void Grid_unsplit(std::vector<Lattice<Vobj> > & full,Lattice<Vobj> & split)
for(int v=0;v<nvector;v++){ for(int v=0;v<nvector;v++){
assert(v<full.size()); assert(v<full.size());
parallel_for(int site=0;site<lsites;site++){ parallel_for(int site=0;site<lsites;site++){
assert(v*lsites+site < alldata.size());
scalardata[site] = alldata[v*lsites+site]; scalardata[site] = alldata[v*lsites+site];
} }
vectorizeFromLexOrdArray(scalardata,full[v]); vectorizeFromLexOrdArray(scalardata,full[v]);
} }
} }
} }
#endif #endif

View File

@ -95,7 +95,7 @@ int main (int argc, char ** argv)
FermionField tmp(FGrid); FermionField tmp(FGrid);
for(int s=0;s<nrhs;s++) result[s]=zero; for(int s=0;s<nrhs;s++) result[s]=zero;
#undef LEXICO_TEST #define LEXICO_TEST
#ifdef LEXICO_TEST #ifdef LEXICO_TEST
{ {
LatticeFermion lex(FGrid); lex = zero; LatticeFermion lex(FGrid); lex = zero;
@ -121,12 +121,12 @@ int main (int argc, char ** argv)
random(pRNG5,src[s]); random(pRNG5,src[s]);
tmp = 100.0*s; tmp = 100.0*s;
src[s] = (src[s] * 0.1) + tmp; src[s] = (src[s] * 0.1) + tmp;
std::cout << " src ]"<<s<<"] "<<norm2(src[s])<<std::endl; std::cout << GridLogMessage << " src ["<<s<<"] "<<norm2(src[s])<<std::endl;
} }
#endif #endif
for(int n =0 ; n< nrhs ; n++) { for(int n =0 ; n< nrhs ; n++) {
std::cout << " src"<<n<<"\n"<< src[n] <<std::endl; // std::cout << " src"<<n<<"\n"<< src[n] <<std::endl;
} }
LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(pRNG,Umu); LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(pRNG,Umu);
@ -144,8 +144,8 @@ int main (int argc, char ** argv)
/////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////
Grid_split (Umu,s_Umu); Grid_split (Umu,s_Umu);
Grid_split (src,s_src); Grid_split (src,s_src);
std::cout << " split rank " <<me << " s_src "<<norm2(s_src)<<std::endl; std::cout << GridLogMessage << " split rank " <<me << " s_src "<<norm2(s_src)<<std::endl;
std::cout << " s_src\n "<< s_src <<std::endl; // std::cout << " s_src\n "<< s_src <<std::endl;
#ifdef LEXICO_TEST #ifdef LEXICO_TEST
FermionField s_src_tmp(SFGrid); FermionField s_src_tmp(SFGrid);
@ -168,11 +168,12 @@ int main (int argc, char ** argv)
s_src_tmp = s_src_tmp + ftmp; s_src_tmp = s_src_tmp + ftmp;
} }
s_src_diff = s_src_tmp - s_src; s_src_diff = s_src_tmp - s_src;
std::cout << " s_src_diff " << norm2(s_src_diff)<<std::endl; std::cout << GridLogMessage <<" LEXICO test: s_src_diff " << norm2(s_src_diff)<<std::endl;
std::cout << " s_src \n" << s_src << std::endl; // std::cout << " s_src \n" << s_src << std::endl;
std::cout << " s_src_tmp \n" << s_src_tmp << std::endl; // std::cout << " s_src_tmp \n" << s_src_tmp << std::endl;
std::cout << " s_src_diff \n" << s_src_diff << std::endl; // std::cout << " s_src_diff \n" << s_src_diff << std::endl;
// exit(0);
#endif #endif
/////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////
@ -189,11 +190,11 @@ int main (int argc, char ** argv)
MdagMLinearOperator<DomainWallFermionR,FermionField> HermOp(Ddwf); MdagMLinearOperator<DomainWallFermionR,FermionField> HermOp(Ddwf);
MdagMLinearOperator<DomainWallFermionR,FermionField> HermOpCk(Dchk); MdagMLinearOperator<DomainWallFermionR,FermionField> HermOpCk(Dchk);
ConjugateGradient<FermionField> CG((1.0e-5),10000); ConjugateGradient<FermionField> CG((1.0e-2),10000);
s_res = zero; s_res = zero;
CG(HermOp,s_src,s_res); CG(HermOp,s_src,s_res);
std::cout << " s_res norm "<<norm2(s_res)<<std::endl; std::cout << GridLogMessage << " split residual norm "<<norm2(s_res)<<std::endl;
///////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////
// Report how long they all took // Report how long they all took
///////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////
@ -214,7 +215,7 @@ int main (int argc, char ** argv)
std::cout << GridLogMessage<< "Checking the residuals"<<std::endl; std::cout << GridLogMessage<< "Checking the residuals"<<std::endl;
for(int n=0;n<nrhs;n++){ for(int n=0;n<nrhs;n++){
std::cout << " res["<<n<<"] norm "<<norm2(result[n])<<std::endl; std::cout << GridLogMessage<< " res["<<n<<"] norm "<<norm2(result[n])<<std::endl;
HermOpCk.HermOp(result[n],tmp); tmp = tmp - src[n]; HermOpCk.HermOp(result[n],tmp); tmp = tmp - src[n];
std::cout << GridLogMessage<<" resid["<<n<<"] "<< norm2(tmp)/norm2(src[n])<<std::endl; std::cout << GridLogMessage<<" resid["<<n<<"] "<< norm2(tmp)/norm2(src[n])<<std::endl;
} }

View File

@ -70,7 +70,21 @@ int main (int argc, char ** argv)
ConjugateGradient<FermionField> CG(1.0e-8,10000); ConjugateGradient<FermionField> CG(1.0e-8,10000);
SchurRedBlackStaggeredSolve<FermionField> SchurSolver(CG); SchurRedBlackStaggeredSolve<FermionField> SchurSolver(CG);
double volume=1.0;
for(int mu=0;mu<Nd;mu++){
volume=volume*latt_size[mu];
}
double t1=usecond();
SchurSolver(Ds,src,result); SchurSolver(Ds,src,result);
double t2=usecond();
// Schur solver: uses DeoDoe => volume * 1146
double ncall=CG.IterationsToComplete;
double flops=(16*(3*(6+8+8)) + 15*3*2)*volume*ncall; // == 66*16 + == 1146
std::cout<<GridLogMessage << "usec = "<< (t2-t1)<<std::endl;
std::cout<<GridLogMessage << "flop/s = "<< flops<<std::endl;
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t2-t1)<<std::endl;
Grid_finalize(); Grid_finalize();
} }