mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-04 19:25:56 +01:00
Merge branch 'develop' of github.com:paboyle/Grid into develop
This commit is contained in:
commit
2fd4989029
@ -308,32 +308,34 @@ namespace Grid {
|
||||
public:
|
||||
SchurStaggeredOperator (Matrix &Mat): _Mat(Mat){};
|
||||
virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
|
||||
GridLogIterative.TimingMode(1);
|
||||
std::cout << GridLogIterative << " HermOpAndNorm "<<std::endl;
|
||||
n2 = Mpc(in,out);
|
||||
std::cout << GridLogIterative << " HermOpAndNorm.Mpc "<<std::endl;
|
||||
ComplexD dot= innerProduct(in,out);
|
||||
std::cout << GridLogIterative << " HermOpAndNorm.innerProduct "<<std::endl;
|
||||
n1 = real(dot);
|
||||
}
|
||||
virtual void HermOp(const Field &in, Field &out){
|
||||
std::cout << GridLogIterative << " HermOp "<<std::endl;
|
||||
Mpc(in,out);
|
||||
}
|
||||
virtual RealD Mpc (const Field &in, Field &out) {
|
||||
Field tmp(in._grid);
|
||||
Field tmp2(in._grid);
|
||||
|
||||
std::cout << GridLogIterative << " HermOp.Mpc "<<std::endl;
|
||||
_Mat.Mooee(in,out);
|
||||
_Mat.Mooee(out,tmp);
|
||||
std::cout << GridLogIterative << " HermOp.MooeeMooee "<<std::endl;
|
||||
|
||||
_Mat.Meooe(in,out);
|
||||
_Mat.Meooe(out,tmp2);
|
||||
std::cout << GridLogIterative << " HermOp.MeooeMeooe "<<std::endl;
|
||||
|
||||
return axpy_norm(out,-1.0,tmp2,tmp);
|
||||
#if 0
|
||||
//... much prefer conventional Schur norm
|
||||
_Mat.Meooe(in,tmp);
|
||||
_Mat.MooeeInv(tmp,out);
|
||||
_Mat.Meooe(out,tmp);
|
||||
_Mat.Mooee(in,out);
|
||||
return axpy_norm(out,-1.0,tmp,out);
|
||||
#endif
|
||||
RealD nn=axpy_norm(out,-1.0,tmp2,tmp);
|
||||
std::cout << GridLogIterative << " HermOp.axpy_norm "<<std::endl;
|
||||
return nn;
|
||||
}
|
||||
virtual RealD MpcDag (const Field &in, Field &out){
|
||||
return Mpc(in,out);
|
||||
|
@ -123,11 +123,14 @@ namespace Grid {
|
||||
Field tmp(grid);
|
||||
Field Mtmp(grid);
|
||||
Field resid(fgrid);
|
||||
|
||||
|
||||
std::cout << GridLogMessage << " SchurRedBlackStaggeredSolve " <<std::endl;
|
||||
pickCheckerboard(Even,src_e,in);
|
||||
pickCheckerboard(Odd ,src_o,in);
|
||||
pickCheckerboard(Even,sol_e,out);
|
||||
pickCheckerboard(Odd ,sol_o,out);
|
||||
|
||||
std::cout << GridLogMessage << " SchurRedBlackStaggeredSolve checkerboards picked" <<std::endl;
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// src_o = (source_o - Moe MeeInv source_e)
|
||||
@ -144,6 +147,7 @@ namespace Grid {
|
||||
//////////////////////////////////////////////////////////////
|
||||
std::cout<<GridLogMessage << "SchurRedBlackStaggeredSolver calling the Mpc solver" <<std::endl;
|
||||
_HermitianRBSolver(_HermOpEO,src_o,sol_o); assert(sol_o.checkerboard==Odd);
|
||||
std::cout<<GridLogMessage << "SchurRedBlackStaggeredSolver called the Mpc solver" <<std::endl;
|
||||
|
||||
///////////////////////////////////////////////////
|
||||
// sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
|
||||
@ -152,15 +156,16 @@ namespace Grid {
|
||||
src_e = src_e-tmp; assert( src_e.checkerboard ==Even);
|
||||
_Matrix.MooeeInv(src_e,sol_e); assert( sol_e.checkerboard ==Even);
|
||||
|
||||
std::cout<<GridLogMessage << "SchurRedBlackStaggeredSolver reconstructed other CB" <<std::endl;
|
||||
setCheckerboard(out,sol_e); assert( sol_e.checkerboard ==Even);
|
||||
setCheckerboard(out,sol_o); assert( sol_o.checkerboard ==Odd );
|
||||
std::cout<<GridLogMessage << "SchurRedBlackStaggeredSolver inserted solution" <<std::endl;
|
||||
|
||||
// Verify the unprec residual
|
||||
_Matrix.M(out,resid);
|
||||
resid = resid-in;
|
||||
RealD ns = norm2(in);
|
||||
RealD nr = norm2(resid);
|
||||
|
||||
std::cout<<GridLogMessage << "SchurRedBlackStaggered solver true unprec resid "<< std::sqrt(nr/ns) <<" nr "<< nr <<" ns "<<ns << std::endl;
|
||||
}
|
||||
};
|
||||
|
@ -134,8 +134,18 @@ void CartesianCommunicator::AllToAll(void *in,void *out,uint64_t words,uint64_t
|
||||
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,const CartesianCommunicator &parent,int &srank)
|
||||
{
|
||||
_ndimension = processors.size();
|
||||
assert(_ndimension = parent._ndimension);
|
||||
|
||||
|
||||
int parent_ndimension = parent._ndimension; assert(_ndimension >= parent._ndimension);
|
||||
std::vector<int> parent_processor_coor(_ndimension,0);
|
||||
std::vector<int> parent_processors (_ndimension,1);
|
||||
|
||||
// Can make 5d grid from 4d etc...
|
||||
int pad = _ndimension-parent_ndimension;
|
||||
for(int d=0;d<parent_ndimension;d++){
|
||||
parent_processor_coor[pad+d]=parent._processor_coor[d];
|
||||
parent_processors [pad+d]=parent._processors[d];
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// split the communicator
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
@ -154,9 +164,9 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,
|
||||
std::vector<int> ssize(_ndimension); // coor of split within parent
|
||||
|
||||
for(int d=0;d<_ndimension;d++){
|
||||
ccoor[d] = parent._processor_coor[d] % processors[d];
|
||||
scoor[d] = parent._processor_coor[d] / processors[d];
|
||||
ssize[d] = parent._processors[d] / processors[d];
|
||||
ccoor[d] = parent_processor_coor[d] % processors[d];
|
||||
scoor[d] = parent_processor_coor[d] / processors[d];
|
||||
ssize[d] = parent_processors[d] / processors[d];
|
||||
}
|
||||
int crank; // rank within subcomm ; srank is rank of subcomm within blocks of subcomms
|
||||
// Mpi uses the reverse Lexico convention to us
|
||||
@ -166,38 +176,36 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,
|
||||
MPI_Comm comm_split;
|
||||
if ( Nchild > 1 ) {
|
||||
|
||||
/*
|
||||
std::cout << GridLogMessage<<"Child communicator of "<< std::hex << parent.communicator << std::dec<<std::endl;
|
||||
std::cout << GridLogMessage<<" parent grid["<< parent._ndimension<<"] ";
|
||||
for(int d=0;d<parent._processors.size();d++) std::cout << parent._processors[d] << " ";
|
||||
std::cout<<std::endl;
|
||||
|
||||
std::cout << GridLogMessage<<" child grid["<< _ndimension <<"] ";
|
||||
for(int d=0;d<processors.size();d++) std::cout << processors[d] << " ";
|
||||
std::cout<<std::endl;
|
||||
|
||||
std::cout << GridLogMessage<<" old rank "<< parent._processor<<" coor ["<< _ndimension <<"] ";
|
||||
for(int d=0;d<processors.size();d++) std::cout << parent._processor_coor[d] << " ";
|
||||
std::cout<<std::endl;
|
||||
|
||||
std::cout << GridLogMessage<<" new rank "<< crank<<" coor ["<< _ndimension <<"] ";
|
||||
for(int d=0;d<processors.size();d++) std::cout << ccoor[d] << " ";
|
||||
std::cout<<std::endl;
|
||||
|
||||
std::cout << GridLogMessage<<" new coor ["<< _ndimension <<"] ";
|
||||
for(int d=0;d<processors.size();d++) std::cout << parent._processor_coor[d] << " ";
|
||||
std::cout<<std::endl;
|
||||
*/
|
||||
if(0){
|
||||
std::cout << GridLogMessage<<"Child communicator of "<< std::hex << parent.communicator << std::dec<<std::endl;
|
||||
std::cout << GridLogMessage<<" parent grid["<< parent._ndimension<<"] ";
|
||||
for(int d=0;d<parent._ndimension;d++) std::cout << parent._processors[d] << " ";
|
||||
std::cout<<std::endl;
|
||||
|
||||
std::cout << GridLogMessage<<" child grid["<< _ndimension <<"] ";
|
||||
for(int d=0;d<processors.size();d++) std::cout << processors[d] << " ";
|
||||
std::cout<<std::endl;
|
||||
|
||||
std::cout << GridLogMessage<<" old rank "<< parent._processor<<" coor ["<< parent._ndimension <<"] ";
|
||||
for(int d=0;d<parent._ndimension;d++) std::cout << parent._processor_coor[d] << " ";
|
||||
std::cout<<std::endl;
|
||||
|
||||
std::cout << GridLogMessage<<" new split "<< srank<<" scoor ["<< _ndimension <<"] ";
|
||||
for(int d=0;d<processors.size();d++) std::cout << scoor[d] << " ";
|
||||
std::cout<<std::endl;
|
||||
|
||||
std::cout << GridLogMessage<<" new rank "<< crank<<" coor ["<< _ndimension <<"] ";
|
||||
for(int d=0;d<processors.size();d++) std::cout << ccoor[d] << " ";
|
||||
std::cout<<std::endl;
|
||||
}
|
||||
|
||||
int ierr= MPI_Comm_split(parent.communicator,srank,crank,&comm_split);
|
||||
assert(ierr==0);
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Declare victory
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
/*
|
||||
std::cout << GridLogMessage<<"Divided communicator "<< parent._Nprocessors<<" into "
|
||||
<< Nchild <<" communicators with " << childsize << " ranks"<<std::endl;
|
||||
*/
|
||||
// std::cout << GridLogMessage<<"Divided communicator "<< parent._Nprocessors<<" into "
|
||||
// << Nchild <<" communicators with " << childsize << " ranks"<<std::endl;
|
||||
} else {
|
||||
comm_split=parent.communicator;
|
||||
srank = 0;
|
||||
@ -207,6 +215,17 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,
|
||||
// Set up from the new split communicator
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
InitFromMPICommunicator(processors,comm_split);
|
||||
|
||||
if(0){
|
||||
std::cout << " ndim " <<_ndimension<<" " << parent._ndimension << std::endl;
|
||||
for(int d=0;d<processors.size();d++){
|
||||
std::cout << d<< " " << _processor_coor[d] <<" " << ccoor[d]<<std::endl;
|
||||
}
|
||||
}
|
||||
for(int d=0;d<processors.size();d++){
|
||||
assert(_processor_coor[d] == ccoor[d] );
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
@ -231,7 +250,7 @@ void CartesianCommunicator::InitFromMPICommunicator(const std::vector<int> &proc
|
||||
MPI_Comm_rank(communicator,&_processor);
|
||||
MPI_Cart_coords(communicator,_processor,_ndimension,&_processor_coor[0]);
|
||||
|
||||
if ( communicator_base != communicator_world ) {
|
||||
if ( 0 && (communicator_base != communicator_world) ) {
|
||||
std::cout << "InitFromMPICommunicator Cartesian communicator created with a non-world communicator"<<std::endl;
|
||||
|
||||
std::cout << " new communicator rank "<<_processor<< " coor ["<<_ndimension<<"] ";
|
||||
|
@ -606,7 +606,7 @@ CartesianCommunicator::~CartesianCommunicator()
|
||||
MPI_Finalized(&MPI_is_finalised);
|
||||
if (communicator && !MPI_is_finalised) {
|
||||
MPI_Comm_free(&communicator);
|
||||
for(int i=0;i< communicator_halo.size();i++){
|
||||
for(int i=0;i<communicator_halo.size();i++){
|
||||
MPI_Comm_free(&communicator_halo[i]);
|
||||
}
|
||||
}
|
||||
|
@ -50,26 +50,22 @@ inline void subdivides(GridBase *coarse,GridBase *fine)
|
||||
////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class vobj> inline void pickCheckerboard(int cb,Lattice<vobj> &half,const Lattice<vobj> &full){
|
||||
half.checkerboard = cb;
|
||||
int ssh=0;
|
||||
//parallel_for
|
||||
for(int ss=0;ss<full._grid->oSites();ss++){
|
||||
std::vector<int> coor;
|
||||
|
||||
parallel_for(int ss=0;ss<full._grid->oSites();ss++){
|
||||
int cbos;
|
||||
|
||||
std::vector<int> coor;
|
||||
full._grid->oCoorFromOindex(coor,ss);
|
||||
cbos=half._grid->CheckerBoard(coor);
|
||||
|
||||
if (cbos==cb) {
|
||||
int ssh=half._grid->oIndex(coor);
|
||||
half._odata[ssh] = full._odata[ss];
|
||||
ssh++;
|
||||
}
|
||||
}
|
||||
}
|
||||
template<class vobj> inline void setCheckerboard(Lattice<vobj> &full,const Lattice<vobj> &half){
|
||||
int cb = half.checkerboard;
|
||||
int ssh=0;
|
||||
//parallel_for
|
||||
for(int ss=0;ss<full._grid->oSites();ss++){
|
||||
parallel_for(int ss=0;ss<full._grid->oSites();ss++){
|
||||
std::vector<int> coor;
|
||||
int cbos;
|
||||
|
||||
@ -77,8 +73,8 @@ inline void subdivides(GridBase *coarse,GridBase *fine)
|
||||
cbos=half._grid->CheckerBoard(coor);
|
||||
|
||||
if (cbos==cb) {
|
||||
int ssh=half._grid->oIndex(coor);
|
||||
full._odata[ss]=half._odata[ssh];
|
||||
ssh++;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -698,30 +694,6 @@ void precisionChange(Lattice<VobjOut> &out, const Lattice<VobjIn> &in){
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Communicate between grids
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// All to all plan
|
||||
//
|
||||
// Subvolume on fine grid is v. Vectors a,b,c,d
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// SIMPLEST CASE:
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Mesh of nodes (2) ; subdivide to 1 subdivisions
|
||||
//
|
||||
// Lex ord:
|
||||
// N0 va0 vb0 N1 va1 vb1
|
||||
//
|
||||
// For each dimension do an all to all
|
||||
//
|
||||
// full AllToAll(0)
|
||||
// N0 va0 va1 N1 vb0 vb1
|
||||
//
|
||||
// REARRANGE
|
||||
// N0 va01 N1 vb01
|
||||
//
|
||||
// Must also rearrange data to get into the NEW lex order of grid at each stage. Some kind of "insert/extract".
|
||||
// NB: Easiest to programme if keep in lex order.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// SIMPLE CASE:
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
@ -755,9 +727,17 @@ void precisionChange(Lattice<VobjOut> &out, const Lattice<VobjIn> &in){
|
||||
//
|
||||
// Must also rearrange data to get into the NEW lex order of grid at each stage. Some kind of "insert/extract".
|
||||
// NB: Easiest to programme if keep in lex order.
|
||||
//
|
||||
/////////////////////////////////////////////////////////
|
||||
|
||||
/*
|
||||
* Let chunk = (fvol*nvec)/sP be size of a chunk. ( Divide lexico vol * nvec into fP/sP = M chunks )
|
||||
*
|
||||
* 2nd A2A (over sP nodes; subdivide the fP into sP chunks of M)
|
||||
*
|
||||
* node 0 1st chunk of node 0M..(1M-1); 2nd chunk of node 0M..(1M-1).. data chunk x M x sP = fL / sP * M * sP = fL * M growth
|
||||
* node 1 1st chunk of node 1M..(2M-1); 2nd chunk of node 1M..(2M-1)..
|
||||
* node 2 1st chunk of node 2M..(3M-1); 2nd chunk of node 2M..(3M-1)..
|
||||
* node 3 1st chunk of node 3M..(3M-1); 2nd chunk of node 2M..(3M-1)..
|
||||
* etc...
|
||||
*/
|
||||
template<class Vobj>
|
||||
void Grid_split(std::vector<Lattice<Vobj> > & full,Lattice<Vobj> & split)
|
||||
{
|
||||
@ -816,57 +796,58 @@ void Grid_split(std::vector<Lattice<Vobj> > & full,Lattice<Vobj> & split)
|
||||
|
||||
int nvec = nvector; // Counts down to 1 as we collapse dims
|
||||
std::vector<int> ldims = full_grid->_ldimensions;
|
||||
std::vector<int> lcoor(ndim);
|
||||
|
||||
for(int d=ndim-1;d>=0;d--){
|
||||
|
||||
if ( ratio[d] != 1 ) {
|
||||
|
||||
full_grid ->AllToAll(d,alldata,tmpdata);
|
||||
// std::cout << GridLogMessage << "Grid_split: dim " <<d<<" ratio "<<ratio[d]<<" nvec "<<nvec<<" procs "<<split_grid->_processors[d]<<std::endl;
|
||||
// for(int v=0;v<nvec;v++){
|
||||
// std::cout << "Grid_split: alldata["<<v<<"] " << alldata[v] <<std::endl;
|
||||
// std::cout << "Grid_split: tmpdata["<<v<<"] " << tmpdata[v] <<std::endl;
|
||||
// }
|
||||
//////////////////////////////////////////
|
||||
//Local volume for this dimension is expanded by ratio of processor extents
|
||||
// Number of vectors is decreased by same factor
|
||||
// Rearrange to lexico for bigger volume
|
||||
//////////////////////////////////////////
|
||||
nvec /= ratio[d];
|
||||
if ( split_grid->_processors[d] > 1 ) {
|
||||
alldata=tmpdata;
|
||||
split_grid->AllToAll(d,alldata,tmpdata);
|
||||
}
|
||||
|
||||
auto rdims = ldims; rdims[d] *= ratio[d];
|
||||
auto rsites= lsites*ratio[d];
|
||||
for(int v=0;v<nvec;v++){
|
||||
auto rdims = ldims;
|
||||
auto M = ratio[d];
|
||||
auto rsites= lsites*M;// increases rsites by M
|
||||
nvec /= M; // Reduce nvec by subdivision factor
|
||||
rdims[d] *= M; // increase local dim by same factor
|
||||
|
||||
// For loop over each site within old subvol
|
||||
for(int lsite=0;lsite<lsites;lsite++){
|
||||
int sP = split_grid->_processors[d];
|
||||
int fP = full_grid->_processors[d];
|
||||
|
||||
Lexicographic::CoorFromIndex(lcoor, lsite, ldims);
|
||||
int fvol = lsites;
|
||||
|
||||
int chunk = (nvec*fvol)/sP; assert(chunk*sP == nvec*fvol);
|
||||
|
||||
for(int r=0;r<ratio[d];r++){ // ratio*nvec terms
|
||||
// Loop over reordered data post A2A
|
||||
parallel_for(int c=0;c<chunk;c++){
|
||||
for(int m=0;m<M;m++){
|
||||
for(int s=0;s<sP;s++){
|
||||
|
||||
// addressing; use lexico
|
||||
int lex_r;
|
||||
uint64_t lex_c = c+chunk*m+chunk*M*s;
|
||||
uint64_t lex_fvol_vec = c+chunk*s;
|
||||
uint64_t lex_fvol = lex_fvol_vec%fvol;
|
||||
uint64_t lex_vec = lex_fvol_vec/fvol;
|
||||
|
||||
auto rcoor = lcoor; rcoor[d] += r*ldims[d];
|
||||
// which node sets an adder to the coordinate
|
||||
std::vector<int> coor(ndim);
|
||||
Lexicographic::CoorFromIndex(coor, lex_fvol, ldims);
|
||||
coor[d] += m*ldims[d];
|
||||
Lexicographic::IndexFromCoor(coor, lex_r, rdims);
|
||||
lex_r += lex_vec * rsites;
|
||||
|
||||
int rsite; Lexicographic::IndexFromCoor(rcoor, rsite, rdims);
|
||||
rsite += v * rsites;
|
||||
// LexicoFind coordinate & vector number within split lattice
|
||||
alldata[lex_r] = tmpdata[lex_c];
|
||||
|
||||
int rmul=nvec*lsites;
|
||||
int vmul= lsites;
|
||||
alldata[rsite] = tmpdata[lsite+r*rmul+v*vmul];
|
||||
// if ( lsite==0 ) {
|
||||
// std::cout << "Grid_split: grow alldata["<<rsite<<"] " << alldata[rsite] << " <- tmpdata["<< lsite+r*rmul+v*vmul<<"] "<<tmpdata[lsite+r*rmul+v*vmul] <<std::endl;
|
||||
// }
|
||||
}
|
||||
}
|
||||
}
|
||||
ldims[d]*= ratio[d];
|
||||
lsites *= ratio[d];
|
||||
|
||||
if ( split_grid->_processors[d] > 1 ) {
|
||||
tmpdata = alldata;
|
||||
split_grid->AllToAll(d,tmpdata,alldata);
|
||||
}
|
||||
}
|
||||
}
|
||||
vectorizeFromLexOrdArray(alldata,split);
|
||||
@ -937,59 +918,61 @@ void Grid_unsplit(std::vector<Lattice<Vobj> > & full,Lattice<Vobj> & split)
|
||||
/////////////////////////////////////////////////////////////////
|
||||
// Start from split grid and work towards full grid
|
||||
/////////////////////////////////////////////////////////////////
|
||||
std::vector<int> lcoor(ndim);
|
||||
std::vector<int> rcoor(ndim);
|
||||
|
||||
int nvec = 1;
|
||||
lsites = split_grid->lSites();
|
||||
std::vector<int> ldims = split_grid->_ldimensions;
|
||||
uint64_t rsites = split_grid->lSites();
|
||||
std::vector<int> rdims = split_grid->_ldimensions;
|
||||
|
||||
// for(int d=ndim-1;d>=0;d--){
|
||||
for(int d=0;d<ndim;d++){
|
||||
|
||||
if ( ratio[d] != 1 ) {
|
||||
|
||||
auto M = ratio[d];
|
||||
|
||||
if ( split_grid->_processors[d] > 1 ) {
|
||||
tmpdata = alldata;
|
||||
split_grid->AllToAll(d,tmpdata,alldata);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////
|
||||
//Local volume for this dimension is expanded by ratio of processor extents
|
||||
// Number of vectors is decreased by same factor
|
||||
// Rearrange to lexico for bigger volume
|
||||
//////////////////////////////////////////
|
||||
auto rsites= lsites/ratio[d];
|
||||
auto rdims = ldims; rdims[d]/=ratio[d];
|
||||
|
||||
for(int v=0;v<nvec;v++){
|
||||
|
||||
// rsite, rcoor --> smaller local volume
|
||||
// lsite, lcoor --> bigger original (single node?) volume
|
||||
// For loop over each site within smaller subvol
|
||||
for(int rsite=0;rsite<rsites;rsite++){
|
||||
|
||||
Lexicographic::CoorFromIndex(rcoor, rsite, rdims);
|
||||
int lsite;
|
||||
|
||||
for(int r=0;r<ratio[d];r++){
|
||||
|
||||
lcoor = rcoor; lcoor[d] += r*rdims[d];
|
||||
Lexicographic::IndexFromCoor(lcoor, lsite, ldims); lsite += v * lsites;
|
||||
|
||||
int rmul=nvec*rsites;
|
||||
int vmul= rsites;
|
||||
tmpdata[rsite+r*rmul+v*vmul]=alldata[lsite];
|
||||
|
||||
int sP = split_grid->_processors[d];
|
||||
int fP = full_grid->_processors[d];
|
||||
|
||||
auto ldims = rdims; ldims[d] /= M; // Decrease local dims by same factor
|
||||
auto lsites= rsites/M; // Decreases rsites by M
|
||||
|
||||
int fvol = lsites;
|
||||
int chunk = (nvec*fvol)/sP; assert(chunk*sP == nvec*fvol);
|
||||
|
||||
{
|
||||
// Loop over reordered data post A2A
|
||||
for(int c=0;c<chunk;c++){
|
||||
for(int m=0;m<M;m++){
|
||||
for(int s=0;s<sP;s++){
|
||||
|
||||
// addressing; use lexico
|
||||
int lex_r;
|
||||
uint64_t lex_c = c+chunk*m+chunk*M*s;
|
||||
uint64_t lex_fvol_vec = c+chunk*s;
|
||||
uint64_t lex_fvol = lex_fvol_vec%fvol;
|
||||
uint64_t lex_vec = lex_fvol_vec/fvol;
|
||||
|
||||
// which node sets an adder to the coordinate
|
||||
std::vector<int> coor(ndim);
|
||||
Lexicographic::CoorFromIndex(coor, lex_fvol, ldims);
|
||||
coor[d] += m*ldims[d];
|
||||
Lexicographic::IndexFromCoor(coor, lex_r, rdims);
|
||||
lex_r += lex_vec * rsites;
|
||||
|
||||
// LexicoFind coordinate & vector number within split lattice
|
||||
tmpdata[lex_c] = alldata[lex_r];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
nvec *= ratio[d];
|
||||
ldims[d]=rdims[d];
|
||||
lsites =rsites;
|
||||
|
||||
if ( split_grid->_processors[d] > 1 ) {
|
||||
split_grid->AllToAll(d,tmpdata,alldata);
|
||||
tmpdata=alldata;
|
||||
}
|
||||
full_grid ->AllToAll(d,tmpdata,alldata);
|
||||
rdims[d]/= M;
|
||||
rsites /= M;
|
||||
nvec *= M; // Increase nvec by subdivision factor
|
||||
}
|
||||
}
|
||||
|
||||
@ -997,12 +980,12 @@ void Grid_unsplit(std::vector<Lattice<Vobj> > & full,Lattice<Vobj> & split)
|
||||
for(int v=0;v<nvector;v++){
|
||||
assert(v<full.size());
|
||||
parallel_for(int site=0;site<lsites;site++){
|
||||
assert(v*lsites+site < alldata.size());
|
||||
scalardata[site] = alldata[v*lsites+site];
|
||||
}
|
||||
vectorizeFromLexOrdArray(scalardata,full[v]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
#endif
|
||||
|
@ -95,7 +95,7 @@ int main (int argc, char ** argv)
|
||||
FermionField tmp(FGrid);
|
||||
|
||||
for(int s=0;s<nrhs;s++) result[s]=zero;
|
||||
#undef LEXICO_TEST
|
||||
#define LEXICO_TEST
|
||||
#ifdef LEXICO_TEST
|
||||
{
|
||||
LatticeFermion lex(FGrid); lex = zero;
|
||||
@ -121,12 +121,12 @@ int main (int argc, char ** argv)
|
||||
random(pRNG5,src[s]);
|
||||
tmp = 100.0*s;
|
||||
src[s] = (src[s] * 0.1) + tmp;
|
||||
std::cout << " src ]"<<s<<"] "<<norm2(src[s])<<std::endl;
|
||||
std::cout << GridLogMessage << " src ["<<s<<"] "<<norm2(src[s])<<std::endl;
|
||||
}
|
||||
#endif
|
||||
|
||||
for(int n =0 ; n< nrhs ; n++) {
|
||||
std::cout << " src"<<n<<"\n"<< src[n] <<std::endl;
|
||||
// std::cout << " src"<<n<<"\n"<< src[n] <<std::endl;
|
||||
}
|
||||
|
||||
LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(pRNG,Umu);
|
||||
@ -144,8 +144,8 @@ int main (int argc, char ** argv)
|
||||
///////////////////////////////////////////////////////////////
|
||||
Grid_split (Umu,s_Umu);
|
||||
Grid_split (src,s_src);
|
||||
std::cout << " split rank " <<me << " s_src "<<norm2(s_src)<<std::endl;
|
||||
std::cout << " s_src\n "<< s_src <<std::endl;
|
||||
std::cout << GridLogMessage << " split rank " <<me << " s_src "<<norm2(s_src)<<std::endl;
|
||||
// std::cout << " s_src\n "<< s_src <<std::endl;
|
||||
|
||||
#ifdef LEXICO_TEST
|
||||
FermionField s_src_tmp(SFGrid);
|
||||
@ -168,11 +168,12 @@ int main (int argc, char ** argv)
|
||||
s_src_tmp = s_src_tmp + ftmp;
|
||||
}
|
||||
s_src_diff = s_src_tmp - s_src;
|
||||
std::cout << " s_src_diff " << norm2(s_src_diff)<<std::endl;
|
||||
std::cout << GridLogMessage <<" LEXICO test: s_src_diff " << norm2(s_src_diff)<<std::endl;
|
||||
|
||||
std::cout << " s_src \n" << s_src << std::endl;
|
||||
std::cout << " s_src_tmp \n" << s_src_tmp << std::endl;
|
||||
std::cout << " s_src_diff \n" << s_src_diff << std::endl;
|
||||
// std::cout << " s_src \n" << s_src << std::endl;
|
||||
// std::cout << " s_src_tmp \n" << s_src_tmp << std::endl;
|
||||
// std::cout << " s_src_diff \n" << s_src_diff << std::endl;
|
||||
// exit(0);
|
||||
#endif
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
@ -189,11 +190,11 @@ int main (int argc, char ** argv)
|
||||
|
||||
MdagMLinearOperator<DomainWallFermionR,FermionField> HermOp(Ddwf);
|
||||
MdagMLinearOperator<DomainWallFermionR,FermionField> HermOpCk(Dchk);
|
||||
ConjugateGradient<FermionField> CG((1.0e-5),10000);
|
||||
ConjugateGradient<FermionField> CG((1.0e-2),10000);
|
||||
s_res = zero;
|
||||
CG(HermOp,s_src,s_res);
|
||||
|
||||
std::cout << " s_res norm "<<norm2(s_res)<<std::endl;
|
||||
std::cout << GridLogMessage << " split residual norm "<<norm2(s_res)<<std::endl;
|
||||
/////////////////////////////////////////////////////////////
|
||||
// Report how long they all took
|
||||
/////////////////////////////////////////////////////////////
|
||||
@ -214,7 +215,7 @@ int main (int argc, char ** argv)
|
||||
|
||||
std::cout << GridLogMessage<< "Checking the residuals"<<std::endl;
|
||||
for(int n=0;n<nrhs;n++){
|
||||
std::cout << " res["<<n<<"] norm "<<norm2(result[n])<<std::endl;
|
||||
std::cout << GridLogMessage<< " res["<<n<<"] norm "<<norm2(result[n])<<std::endl;
|
||||
HermOpCk.HermOp(result[n],tmp); tmp = tmp - src[n];
|
||||
std::cout << GridLogMessage<<" resid["<<n<<"] "<< norm2(tmp)/norm2(src[n])<<std::endl;
|
||||
}
|
||||
|
@ -70,7 +70,21 @@ int main (int argc, char ** argv)
|
||||
ConjugateGradient<FermionField> CG(1.0e-8,10000);
|
||||
SchurRedBlackStaggeredSolve<FermionField> SchurSolver(CG);
|
||||
|
||||
double volume=1.0;
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
volume=volume*latt_size[mu];
|
||||
}
|
||||
double t1=usecond();
|
||||
SchurSolver(Ds,src,result);
|
||||
double t2=usecond();
|
||||
|
||||
// Schur solver: uses DeoDoe => volume * 1146
|
||||
double ncall=CG.IterationsToComplete;
|
||||
double flops=(16*(3*(6+8+8)) + 15*3*2)*volume*ncall; // == 66*16 + == 1146
|
||||
|
||||
std::cout<<GridLogMessage << "usec = "<< (t2-t1)<<std::endl;
|
||||
std::cout<<GridLogMessage << "flop/s = "<< flops<<std::endl;
|
||||
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t2-t1)<<std::endl;
|
||||
|
||||
Grid_finalize();
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user