mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-09 21:50:45 +01:00
Merge branch 'develop' of github.com:paboyle/Grid into develop
This commit is contained in:
commit
2fd4989029
@ -308,32 +308,34 @@ namespace Grid {
|
|||||||
public:
|
public:
|
||||||
SchurStaggeredOperator (Matrix &Mat): _Mat(Mat){};
|
SchurStaggeredOperator (Matrix &Mat): _Mat(Mat){};
|
||||||
virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
|
virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
|
||||||
|
GridLogIterative.TimingMode(1);
|
||||||
|
std::cout << GridLogIterative << " HermOpAndNorm "<<std::endl;
|
||||||
n2 = Mpc(in,out);
|
n2 = Mpc(in,out);
|
||||||
|
std::cout << GridLogIterative << " HermOpAndNorm.Mpc "<<std::endl;
|
||||||
ComplexD dot= innerProduct(in,out);
|
ComplexD dot= innerProduct(in,out);
|
||||||
|
std::cout << GridLogIterative << " HermOpAndNorm.innerProduct "<<std::endl;
|
||||||
n1 = real(dot);
|
n1 = real(dot);
|
||||||
}
|
}
|
||||||
virtual void HermOp(const Field &in, Field &out){
|
virtual void HermOp(const Field &in, Field &out){
|
||||||
|
std::cout << GridLogIterative << " HermOp "<<std::endl;
|
||||||
Mpc(in,out);
|
Mpc(in,out);
|
||||||
}
|
}
|
||||||
virtual RealD Mpc (const Field &in, Field &out) {
|
virtual RealD Mpc (const Field &in, Field &out) {
|
||||||
Field tmp(in._grid);
|
Field tmp(in._grid);
|
||||||
Field tmp2(in._grid);
|
Field tmp2(in._grid);
|
||||||
|
|
||||||
|
std::cout << GridLogIterative << " HermOp.Mpc "<<std::endl;
|
||||||
_Mat.Mooee(in,out);
|
_Mat.Mooee(in,out);
|
||||||
_Mat.Mooee(out,tmp);
|
_Mat.Mooee(out,tmp);
|
||||||
|
std::cout << GridLogIterative << " HermOp.MooeeMooee "<<std::endl;
|
||||||
|
|
||||||
_Mat.Meooe(in,out);
|
_Mat.Meooe(in,out);
|
||||||
_Mat.Meooe(out,tmp2);
|
_Mat.Meooe(out,tmp2);
|
||||||
|
std::cout << GridLogIterative << " HermOp.MeooeMeooe "<<std::endl;
|
||||||
|
|
||||||
return axpy_norm(out,-1.0,tmp2,tmp);
|
RealD nn=axpy_norm(out,-1.0,tmp2,tmp);
|
||||||
#if 0
|
std::cout << GridLogIterative << " HermOp.axpy_norm "<<std::endl;
|
||||||
//... much prefer conventional Schur norm
|
return nn;
|
||||||
_Mat.Meooe(in,tmp);
|
|
||||||
_Mat.MooeeInv(tmp,out);
|
|
||||||
_Mat.Meooe(out,tmp);
|
|
||||||
_Mat.Mooee(in,out);
|
|
||||||
return axpy_norm(out,-1.0,tmp,out);
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
virtual RealD MpcDag (const Field &in, Field &out){
|
virtual RealD MpcDag (const Field &in, Field &out){
|
||||||
return Mpc(in,out);
|
return Mpc(in,out);
|
||||||
|
@ -123,11 +123,14 @@ namespace Grid {
|
|||||||
Field tmp(grid);
|
Field tmp(grid);
|
||||||
Field Mtmp(grid);
|
Field Mtmp(grid);
|
||||||
Field resid(fgrid);
|
Field resid(fgrid);
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << " SchurRedBlackStaggeredSolve " <<std::endl;
|
||||||
pickCheckerboard(Even,src_e,in);
|
pickCheckerboard(Even,src_e,in);
|
||||||
pickCheckerboard(Odd ,src_o,in);
|
pickCheckerboard(Odd ,src_o,in);
|
||||||
pickCheckerboard(Even,sol_e,out);
|
pickCheckerboard(Even,sol_e,out);
|
||||||
pickCheckerboard(Odd ,sol_o,out);
|
pickCheckerboard(Odd ,sol_o,out);
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << " SchurRedBlackStaggeredSolve checkerboards picked" <<std::endl;
|
||||||
|
|
||||||
/////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////
|
||||||
// src_o = (source_o - Moe MeeInv source_e)
|
// src_o = (source_o - Moe MeeInv source_e)
|
||||||
@ -144,6 +147,7 @@ namespace Grid {
|
|||||||
//////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////
|
||||||
std::cout<<GridLogMessage << "SchurRedBlackStaggeredSolver calling the Mpc solver" <<std::endl;
|
std::cout<<GridLogMessage << "SchurRedBlackStaggeredSolver calling the Mpc solver" <<std::endl;
|
||||||
_HermitianRBSolver(_HermOpEO,src_o,sol_o); assert(sol_o.checkerboard==Odd);
|
_HermitianRBSolver(_HermOpEO,src_o,sol_o); assert(sol_o.checkerboard==Odd);
|
||||||
|
std::cout<<GridLogMessage << "SchurRedBlackStaggeredSolver called the Mpc solver" <<std::endl;
|
||||||
|
|
||||||
///////////////////////////////////////////////////
|
///////////////////////////////////////////////////
|
||||||
// sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
|
// sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
|
||||||
@ -152,15 +156,16 @@ namespace Grid {
|
|||||||
src_e = src_e-tmp; assert( src_e.checkerboard ==Even);
|
src_e = src_e-tmp; assert( src_e.checkerboard ==Even);
|
||||||
_Matrix.MooeeInv(src_e,sol_e); assert( sol_e.checkerboard ==Even);
|
_Matrix.MooeeInv(src_e,sol_e); assert( sol_e.checkerboard ==Even);
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "SchurRedBlackStaggeredSolver reconstructed other CB" <<std::endl;
|
||||||
setCheckerboard(out,sol_e); assert( sol_e.checkerboard ==Even);
|
setCheckerboard(out,sol_e); assert( sol_e.checkerboard ==Even);
|
||||||
setCheckerboard(out,sol_o); assert( sol_o.checkerboard ==Odd );
|
setCheckerboard(out,sol_o); assert( sol_o.checkerboard ==Odd );
|
||||||
|
std::cout<<GridLogMessage << "SchurRedBlackStaggeredSolver inserted solution" <<std::endl;
|
||||||
|
|
||||||
// Verify the unprec residual
|
// Verify the unprec residual
|
||||||
_Matrix.M(out,resid);
|
_Matrix.M(out,resid);
|
||||||
resid = resid-in;
|
resid = resid-in;
|
||||||
RealD ns = norm2(in);
|
RealD ns = norm2(in);
|
||||||
RealD nr = norm2(resid);
|
RealD nr = norm2(resid);
|
||||||
|
|
||||||
std::cout<<GridLogMessage << "SchurRedBlackStaggered solver true unprec resid "<< std::sqrt(nr/ns) <<" nr "<< nr <<" ns "<<ns << std::endl;
|
std::cout<<GridLogMessage << "SchurRedBlackStaggered solver true unprec resid "<< std::sqrt(nr/ns) <<" nr "<< nr <<" ns "<<ns << std::endl;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -134,8 +134,18 @@ void CartesianCommunicator::AllToAll(void *in,void *out,uint64_t words,uint64_t
|
|||||||
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,const CartesianCommunicator &parent,int &srank)
|
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,const CartesianCommunicator &parent,int &srank)
|
||||||
{
|
{
|
||||||
_ndimension = processors.size();
|
_ndimension = processors.size();
|
||||||
assert(_ndimension = parent._ndimension);
|
|
||||||
|
int parent_ndimension = parent._ndimension; assert(_ndimension >= parent._ndimension);
|
||||||
|
std::vector<int> parent_processor_coor(_ndimension,0);
|
||||||
|
std::vector<int> parent_processors (_ndimension,1);
|
||||||
|
|
||||||
|
// Can make 5d grid from 4d etc...
|
||||||
|
int pad = _ndimension-parent_ndimension;
|
||||||
|
for(int d=0;d<parent_ndimension;d++){
|
||||||
|
parent_processor_coor[pad+d]=parent._processor_coor[d];
|
||||||
|
parent_processors [pad+d]=parent._processors[d];
|
||||||
|
}
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// split the communicator
|
// split the communicator
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
@ -154,9 +164,9 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,
|
|||||||
std::vector<int> ssize(_ndimension); // coor of split within parent
|
std::vector<int> ssize(_ndimension); // coor of split within parent
|
||||||
|
|
||||||
for(int d=0;d<_ndimension;d++){
|
for(int d=0;d<_ndimension;d++){
|
||||||
ccoor[d] = parent._processor_coor[d] % processors[d];
|
ccoor[d] = parent_processor_coor[d] % processors[d];
|
||||||
scoor[d] = parent._processor_coor[d] / processors[d];
|
scoor[d] = parent_processor_coor[d] / processors[d];
|
||||||
ssize[d] = parent._processors[d] / processors[d];
|
ssize[d] = parent_processors[d] / processors[d];
|
||||||
}
|
}
|
||||||
int crank; // rank within subcomm ; srank is rank of subcomm within blocks of subcomms
|
int crank; // rank within subcomm ; srank is rank of subcomm within blocks of subcomms
|
||||||
// Mpi uses the reverse Lexico convention to us
|
// Mpi uses the reverse Lexico convention to us
|
||||||
@ -166,38 +176,36 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,
|
|||||||
MPI_Comm comm_split;
|
MPI_Comm comm_split;
|
||||||
if ( Nchild > 1 ) {
|
if ( Nchild > 1 ) {
|
||||||
|
|
||||||
/*
|
if(0){
|
||||||
std::cout << GridLogMessage<<"Child communicator of "<< std::hex << parent.communicator << std::dec<<std::endl;
|
std::cout << GridLogMessage<<"Child communicator of "<< std::hex << parent.communicator << std::dec<<std::endl;
|
||||||
std::cout << GridLogMessage<<" parent grid["<< parent._ndimension<<"] ";
|
std::cout << GridLogMessage<<" parent grid["<< parent._ndimension<<"] ";
|
||||||
for(int d=0;d<parent._processors.size();d++) std::cout << parent._processors[d] << " ";
|
for(int d=0;d<parent._ndimension;d++) std::cout << parent._processors[d] << " ";
|
||||||
std::cout<<std::endl;
|
std::cout<<std::endl;
|
||||||
|
|
||||||
std::cout << GridLogMessage<<" child grid["<< _ndimension <<"] ";
|
std::cout << GridLogMessage<<" child grid["<< _ndimension <<"] ";
|
||||||
for(int d=0;d<processors.size();d++) std::cout << processors[d] << " ";
|
for(int d=0;d<processors.size();d++) std::cout << processors[d] << " ";
|
||||||
std::cout<<std::endl;
|
std::cout<<std::endl;
|
||||||
|
|
||||||
std::cout << GridLogMessage<<" old rank "<< parent._processor<<" coor ["<< _ndimension <<"] ";
|
std::cout << GridLogMessage<<" old rank "<< parent._processor<<" coor ["<< parent._ndimension <<"] ";
|
||||||
for(int d=0;d<processors.size();d++) std::cout << parent._processor_coor[d] << " ";
|
for(int d=0;d<parent._ndimension;d++) std::cout << parent._processor_coor[d] << " ";
|
||||||
std::cout<<std::endl;
|
std::cout<<std::endl;
|
||||||
|
|
||||||
std::cout << GridLogMessage<<" new rank "<< crank<<" coor ["<< _ndimension <<"] ";
|
std::cout << GridLogMessage<<" new split "<< srank<<" scoor ["<< _ndimension <<"] ";
|
||||||
for(int d=0;d<processors.size();d++) std::cout << ccoor[d] << " ";
|
for(int d=0;d<processors.size();d++) std::cout << scoor[d] << " ";
|
||||||
std::cout<<std::endl;
|
std::cout<<std::endl;
|
||||||
|
|
||||||
std::cout << GridLogMessage<<" new coor ["<< _ndimension <<"] ";
|
std::cout << GridLogMessage<<" new rank "<< crank<<" coor ["<< _ndimension <<"] ";
|
||||||
for(int d=0;d<processors.size();d++) std::cout << parent._processor_coor[d] << " ";
|
for(int d=0;d<processors.size();d++) std::cout << ccoor[d] << " ";
|
||||||
std::cout<<std::endl;
|
std::cout<<std::endl;
|
||||||
*/
|
}
|
||||||
|
|
||||||
int ierr= MPI_Comm_split(parent.communicator,srank,crank,&comm_split);
|
int ierr= MPI_Comm_split(parent.communicator,srank,crank,&comm_split);
|
||||||
assert(ierr==0);
|
assert(ierr==0);
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Declare victory
|
// Declare victory
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
/*
|
// std::cout << GridLogMessage<<"Divided communicator "<< parent._Nprocessors<<" into "
|
||||||
std::cout << GridLogMessage<<"Divided communicator "<< parent._Nprocessors<<" into "
|
// << Nchild <<" communicators with " << childsize << " ranks"<<std::endl;
|
||||||
<< Nchild <<" communicators with " << childsize << " ranks"<<std::endl;
|
|
||||||
*/
|
|
||||||
} else {
|
} else {
|
||||||
comm_split=parent.communicator;
|
comm_split=parent.communicator;
|
||||||
srank = 0;
|
srank = 0;
|
||||||
@ -207,6 +215,17 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,
|
|||||||
// Set up from the new split communicator
|
// Set up from the new split communicator
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
InitFromMPICommunicator(processors,comm_split);
|
InitFromMPICommunicator(processors,comm_split);
|
||||||
|
|
||||||
|
if(0){
|
||||||
|
std::cout << " ndim " <<_ndimension<<" " << parent._ndimension << std::endl;
|
||||||
|
for(int d=0;d<processors.size();d++){
|
||||||
|
std::cout << d<< " " << _processor_coor[d] <<" " << ccoor[d]<<std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for(int d=0;d<processors.size();d++){
|
||||||
|
assert(_processor_coor[d] == ccoor[d] );
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
@ -231,7 +250,7 @@ void CartesianCommunicator::InitFromMPICommunicator(const std::vector<int> &proc
|
|||||||
MPI_Comm_rank(communicator,&_processor);
|
MPI_Comm_rank(communicator,&_processor);
|
||||||
MPI_Cart_coords(communicator,_processor,_ndimension,&_processor_coor[0]);
|
MPI_Cart_coords(communicator,_processor,_ndimension,&_processor_coor[0]);
|
||||||
|
|
||||||
if ( communicator_base != communicator_world ) {
|
if ( 0 && (communicator_base != communicator_world) ) {
|
||||||
std::cout << "InitFromMPICommunicator Cartesian communicator created with a non-world communicator"<<std::endl;
|
std::cout << "InitFromMPICommunicator Cartesian communicator created with a non-world communicator"<<std::endl;
|
||||||
|
|
||||||
std::cout << " new communicator rank "<<_processor<< " coor ["<<_ndimension<<"] ";
|
std::cout << " new communicator rank "<<_processor<< " coor ["<<_ndimension<<"] ";
|
||||||
|
@ -606,7 +606,7 @@ CartesianCommunicator::~CartesianCommunicator()
|
|||||||
MPI_Finalized(&MPI_is_finalised);
|
MPI_Finalized(&MPI_is_finalised);
|
||||||
if (communicator && !MPI_is_finalised) {
|
if (communicator && !MPI_is_finalised) {
|
||||||
MPI_Comm_free(&communicator);
|
MPI_Comm_free(&communicator);
|
||||||
for(int i=0;i< communicator_halo.size();i++){
|
for(int i=0;i<communicator_halo.size();i++){
|
||||||
MPI_Comm_free(&communicator_halo[i]);
|
MPI_Comm_free(&communicator_halo[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -50,26 +50,22 @@ inline void subdivides(GridBase *coarse,GridBase *fine)
|
|||||||
////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
template<class vobj> inline void pickCheckerboard(int cb,Lattice<vobj> &half,const Lattice<vobj> &full){
|
template<class vobj> inline void pickCheckerboard(int cb,Lattice<vobj> &half,const Lattice<vobj> &full){
|
||||||
half.checkerboard = cb;
|
half.checkerboard = cb;
|
||||||
int ssh=0;
|
|
||||||
//parallel_for
|
parallel_for(int ss=0;ss<full._grid->oSites();ss++){
|
||||||
for(int ss=0;ss<full._grid->oSites();ss++){
|
|
||||||
std::vector<int> coor;
|
|
||||||
int cbos;
|
int cbos;
|
||||||
|
std::vector<int> coor;
|
||||||
full._grid->oCoorFromOindex(coor,ss);
|
full._grid->oCoorFromOindex(coor,ss);
|
||||||
cbos=half._grid->CheckerBoard(coor);
|
cbos=half._grid->CheckerBoard(coor);
|
||||||
|
|
||||||
if (cbos==cb) {
|
if (cbos==cb) {
|
||||||
|
int ssh=half._grid->oIndex(coor);
|
||||||
half._odata[ssh] = full._odata[ss];
|
half._odata[ssh] = full._odata[ss];
|
||||||
ssh++;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
template<class vobj> inline void setCheckerboard(Lattice<vobj> &full,const Lattice<vobj> &half){
|
template<class vobj> inline void setCheckerboard(Lattice<vobj> &full,const Lattice<vobj> &half){
|
||||||
int cb = half.checkerboard;
|
int cb = half.checkerboard;
|
||||||
int ssh=0;
|
parallel_for(int ss=0;ss<full._grid->oSites();ss++){
|
||||||
//parallel_for
|
|
||||||
for(int ss=0;ss<full._grid->oSites();ss++){
|
|
||||||
std::vector<int> coor;
|
std::vector<int> coor;
|
||||||
int cbos;
|
int cbos;
|
||||||
|
|
||||||
@ -77,8 +73,8 @@ inline void subdivides(GridBase *coarse,GridBase *fine)
|
|||||||
cbos=half._grid->CheckerBoard(coor);
|
cbos=half._grid->CheckerBoard(coor);
|
||||||
|
|
||||||
if (cbos==cb) {
|
if (cbos==cb) {
|
||||||
|
int ssh=half._grid->oIndex(coor);
|
||||||
full._odata[ss]=half._odata[ssh];
|
full._odata[ss]=half._odata[ssh];
|
||||||
ssh++;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -698,30 +694,6 @@ void precisionChange(Lattice<VobjOut> &out, const Lattice<VobjIn> &in){
|
|||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
// Communicate between grids
|
// Communicate between grids
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
//
|
|
||||||
// All to all plan
|
|
||||||
//
|
|
||||||
// Subvolume on fine grid is v. Vectors a,b,c,d
|
|
||||||
//
|
|
||||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
// SIMPLEST CASE:
|
|
||||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
// Mesh of nodes (2) ; subdivide to 1 subdivisions
|
|
||||||
//
|
|
||||||
// Lex ord:
|
|
||||||
// N0 va0 vb0 N1 va1 vb1
|
|
||||||
//
|
|
||||||
// For each dimension do an all to all
|
|
||||||
//
|
|
||||||
// full AllToAll(0)
|
|
||||||
// N0 va0 va1 N1 vb0 vb1
|
|
||||||
//
|
|
||||||
// REARRANGE
|
|
||||||
// N0 va01 N1 vb01
|
|
||||||
//
|
|
||||||
// Must also rearrange data to get into the NEW lex order of grid at each stage. Some kind of "insert/extract".
|
|
||||||
// NB: Easiest to programme if keep in lex order.
|
|
||||||
//
|
|
||||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// SIMPLE CASE:
|
// SIMPLE CASE:
|
||||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
@ -755,9 +727,17 @@ void precisionChange(Lattice<VobjOut> &out, const Lattice<VobjIn> &in){
|
|||||||
//
|
//
|
||||||
// Must also rearrange data to get into the NEW lex order of grid at each stage. Some kind of "insert/extract".
|
// Must also rearrange data to get into the NEW lex order of grid at each stage. Some kind of "insert/extract".
|
||||||
// NB: Easiest to programme if keep in lex order.
|
// NB: Easiest to programme if keep in lex order.
|
||||||
//
|
/*
|
||||||
/////////////////////////////////////////////////////////
|
* Let chunk = (fvol*nvec)/sP be size of a chunk. ( Divide lexico vol * nvec into fP/sP = M chunks )
|
||||||
|
*
|
||||||
|
* 2nd A2A (over sP nodes; subdivide the fP into sP chunks of M)
|
||||||
|
*
|
||||||
|
* node 0 1st chunk of node 0M..(1M-1); 2nd chunk of node 0M..(1M-1).. data chunk x M x sP = fL / sP * M * sP = fL * M growth
|
||||||
|
* node 1 1st chunk of node 1M..(2M-1); 2nd chunk of node 1M..(2M-1)..
|
||||||
|
* node 2 1st chunk of node 2M..(3M-1); 2nd chunk of node 2M..(3M-1)..
|
||||||
|
* node 3 1st chunk of node 3M..(3M-1); 2nd chunk of node 2M..(3M-1)..
|
||||||
|
* etc...
|
||||||
|
*/
|
||||||
template<class Vobj>
|
template<class Vobj>
|
||||||
void Grid_split(std::vector<Lattice<Vobj> > & full,Lattice<Vobj> & split)
|
void Grid_split(std::vector<Lattice<Vobj> > & full,Lattice<Vobj> & split)
|
||||||
{
|
{
|
||||||
@ -816,57 +796,58 @@ void Grid_split(std::vector<Lattice<Vobj> > & full,Lattice<Vobj> & split)
|
|||||||
|
|
||||||
int nvec = nvector; // Counts down to 1 as we collapse dims
|
int nvec = nvector; // Counts down to 1 as we collapse dims
|
||||||
std::vector<int> ldims = full_grid->_ldimensions;
|
std::vector<int> ldims = full_grid->_ldimensions;
|
||||||
std::vector<int> lcoor(ndim);
|
|
||||||
|
|
||||||
for(int d=ndim-1;d>=0;d--){
|
for(int d=ndim-1;d>=0;d--){
|
||||||
|
|
||||||
if ( ratio[d] != 1 ) {
|
if ( ratio[d] != 1 ) {
|
||||||
|
|
||||||
full_grid ->AllToAll(d,alldata,tmpdata);
|
full_grid ->AllToAll(d,alldata,tmpdata);
|
||||||
// std::cout << GridLogMessage << "Grid_split: dim " <<d<<" ratio "<<ratio[d]<<" nvec "<<nvec<<" procs "<<split_grid->_processors[d]<<std::endl;
|
if ( split_grid->_processors[d] > 1 ) {
|
||||||
// for(int v=0;v<nvec;v++){
|
alldata=tmpdata;
|
||||||
// std::cout << "Grid_split: alldata["<<v<<"] " << alldata[v] <<std::endl;
|
split_grid->AllToAll(d,alldata,tmpdata);
|
||||||
// std::cout << "Grid_split: tmpdata["<<v<<"] " << tmpdata[v] <<std::endl;
|
}
|
||||||
// }
|
|
||||||
//////////////////////////////////////////
|
|
||||||
//Local volume for this dimension is expanded by ratio of processor extents
|
|
||||||
// Number of vectors is decreased by same factor
|
|
||||||
// Rearrange to lexico for bigger volume
|
|
||||||
//////////////////////////////////////////
|
|
||||||
nvec /= ratio[d];
|
|
||||||
|
|
||||||
auto rdims = ldims; rdims[d] *= ratio[d];
|
auto rdims = ldims;
|
||||||
auto rsites= lsites*ratio[d];
|
auto M = ratio[d];
|
||||||
for(int v=0;v<nvec;v++){
|
auto rsites= lsites*M;// increases rsites by M
|
||||||
|
nvec /= M; // Reduce nvec by subdivision factor
|
||||||
|
rdims[d] *= M; // increase local dim by same factor
|
||||||
|
|
||||||
// For loop over each site within old subvol
|
int sP = split_grid->_processors[d];
|
||||||
for(int lsite=0;lsite<lsites;lsite++){
|
int fP = full_grid->_processors[d];
|
||||||
|
|
||||||
Lexicographic::CoorFromIndex(lcoor, lsite, ldims);
|
int fvol = lsites;
|
||||||
|
|
||||||
|
int chunk = (nvec*fvol)/sP; assert(chunk*sP == nvec*fvol);
|
||||||
|
|
||||||
for(int r=0;r<ratio[d];r++){ // ratio*nvec terms
|
// Loop over reordered data post A2A
|
||||||
|
parallel_for(int c=0;c<chunk;c++){
|
||||||
|
for(int m=0;m<M;m++){
|
||||||
|
for(int s=0;s<sP;s++){
|
||||||
|
|
||||||
|
// addressing; use lexico
|
||||||
|
int lex_r;
|
||||||
|
uint64_t lex_c = c+chunk*m+chunk*M*s;
|
||||||
|
uint64_t lex_fvol_vec = c+chunk*s;
|
||||||
|
uint64_t lex_fvol = lex_fvol_vec%fvol;
|
||||||
|
uint64_t lex_vec = lex_fvol_vec/fvol;
|
||||||
|
|
||||||
auto rcoor = lcoor; rcoor[d] += r*ldims[d];
|
// which node sets an adder to the coordinate
|
||||||
|
std::vector<int> coor(ndim);
|
||||||
|
Lexicographic::CoorFromIndex(coor, lex_fvol, ldims);
|
||||||
|
coor[d] += m*ldims[d];
|
||||||
|
Lexicographic::IndexFromCoor(coor, lex_r, rdims);
|
||||||
|
lex_r += lex_vec * rsites;
|
||||||
|
|
||||||
int rsite; Lexicographic::IndexFromCoor(rcoor, rsite, rdims);
|
// LexicoFind coordinate & vector number within split lattice
|
||||||
rsite += v * rsites;
|
alldata[lex_r] = tmpdata[lex_c];
|
||||||
|
|
||||||
int rmul=nvec*lsites;
|
|
||||||
int vmul= lsites;
|
|
||||||
alldata[rsite] = tmpdata[lsite+r*rmul+v*vmul];
|
|
||||||
// if ( lsite==0 ) {
|
|
||||||
// std::cout << "Grid_split: grow alldata["<<rsite<<"] " << alldata[rsite] << " <- tmpdata["<< lsite+r*rmul+v*vmul<<"] "<<tmpdata[lsite+r*rmul+v*vmul] <<std::endl;
|
|
||||||
// }
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ldims[d]*= ratio[d];
|
ldims[d]*= ratio[d];
|
||||||
lsites *= ratio[d];
|
lsites *= ratio[d];
|
||||||
|
|
||||||
if ( split_grid->_processors[d] > 1 ) {
|
|
||||||
tmpdata = alldata;
|
|
||||||
split_grid->AllToAll(d,tmpdata,alldata);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
vectorizeFromLexOrdArray(alldata,split);
|
vectorizeFromLexOrdArray(alldata,split);
|
||||||
@ -937,59 +918,61 @@ void Grid_unsplit(std::vector<Lattice<Vobj> > & full,Lattice<Vobj> & split)
|
|||||||
/////////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////////
|
||||||
// Start from split grid and work towards full grid
|
// Start from split grid and work towards full grid
|
||||||
/////////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////////
|
||||||
std::vector<int> lcoor(ndim);
|
|
||||||
std::vector<int> rcoor(ndim);
|
|
||||||
|
|
||||||
int nvec = 1;
|
int nvec = 1;
|
||||||
lsites = split_grid->lSites();
|
uint64_t rsites = split_grid->lSites();
|
||||||
std::vector<int> ldims = split_grid->_ldimensions;
|
std::vector<int> rdims = split_grid->_ldimensions;
|
||||||
|
|
||||||
// for(int d=ndim-1;d>=0;d--){
|
|
||||||
for(int d=0;d<ndim;d++){
|
for(int d=0;d<ndim;d++){
|
||||||
|
|
||||||
if ( ratio[d] != 1 ) {
|
if ( ratio[d] != 1 ) {
|
||||||
|
|
||||||
|
auto M = ratio[d];
|
||||||
|
|
||||||
if ( split_grid->_processors[d] > 1 ) {
|
int sP = split_grid->_processors[d];
|
||||||
tmpdata = alldata;
|
int fP = full_grid->_processors[d];
|
||||||
split_grid->AllToAll(d,tmpdata,alldata);
|
|
||||||
}
|
auto ldims = rdims; ldims[d] /= M; // Decrease local dims by same factor
|
||||||
|
auto lsites= rsites/M; // Decreases rsites by M
|
||||||
//////////////////////////////////////////
|
|
||||||
//Local volume for this dimension is expanded by ratio of processor extents
|
int fvol = lsites;
|
||||||
// Number of vectors is decreased by same factor
|
int chunk = (nvec*fvol)/sP; assert(chunk*sP == nvec*fvol);
|
||||||
// Rearrange to lexico for bigger volume
|
|
||||||
//////////////////////////////////////////
|
{
|
||||||
auto rsites= lsites/ratio[d];
|
// Loop over reordered data post A2A
|
||||||
auto rdims = ldims; rdims[d]/=ratio[d];
|
for(int c=0;c<chunk;c++){
|
||||||
|
for(int m=0;m<M;m++){
|
||||||
for(int v=0;v<nvec;v++){
|
for(int s=0;s<sP;s++){
|
||||||
|
|
||||||
// rsite, rcoor --> smaller local volume
|
// addressing; use lexico
|
||||||
// lsite, lcoor --> bigger original (single node?) volume
|
int lex_r;
|
||||||
// For loop over each site within smaller subvol
|
uint64_t lex_c = c+chunk*m+chunk*M*s;
|
||||||
for(int rsite=0;rsite<rsites;rsite++){
|
uint64_t lex_fvol_vec = c+chunk*s;
|
||||||
|
uint64_t lex_fvol = lex_fvol_vec%fvol;
|
||||||
Lexicographic::CoorFromIndex(rcoor, rsite, rdims);
|
uint64_t lex_vec = lex_fvol_vec/fvol;
|
||||||
int lsite;
|
|
||||||
|
// which node sets an adder to the coordinate
|
||||||
for(int r=0;r<ratio[d];r++){
|
std::vector<int> coor(ndim);
|
||||||
|
Lexicographic::CoorFromIndex(coor, lex_fvol, ldims);
|
||||||
lcoor = rcoor; lcoor[d] += r*rdims[d];
|
coor[d] += m*ldims[d];
|
||||||
Lexicographic::IndexFromCoor(lcoor, lsite, ldims); lsite += v * lsites;
|
Lexicographic::IndexFromCoor(coor, lex_r, rdims);
|
||||||
|
lex_r += lex_vec * rsites;
|
||||||
int rmul=nvec*rsites;
|
|
||||||
int vmul= rsites;
|
// LexicoFind coordinate & vector number within split lattice
|
||||||
tmpdata[rsite+r*rmul+v*vmul]=alldata[lsite];
|
tmpdata[lex_c] = alldata[lex_r];
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
nvec *= ratio[d];
|
|
||||||
ldims[d]=rdims[d];
|
|
||||||
lsites =rsites;
|
|
||||||
|
|
||||||
|
if ( split_grid->_processors[d] > 1 ) {
|
||||||
|
split_grid->AllToAll(d,tmpdata,alldata);
|
||||||
|
tmpdata=alldata;
|
||||||
|
}
|
||||||
full_grid ->AllToAll(d,tmpdata,alldata);
|
full_grid ->AllToAll(d,tmpdata,alldata);
|
||||||
|
rdims[d]/= M;
|
||||||
|
rsites /= M;
|
||||||
|
nvec *= M; // Increase nvec by subdivision factor
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -997,12 +980,12 @@ void Grid_unsplit(std::vector<Lattice<Vobj> > & full,Lattice<Vobj> & split)
|
|||||||
for(int v=0;v<nvector;v++){
|
for(int v=0;v<nvector;v++){
|
||||||
assert(v<full.size());
|
assert(v<full.size());
|
||||||
parallel_for(int site=0;site<lsites;site++){
|
parallel_for(int site=0;site<lsites;site++){
|
||||||
|
assert(v*lsites+site < alldata.size());
|
||||||
scalardata[site] = alldata[v*lsites+site];
|
scalardata[site] = alldata[v*lsites+site];
|
||||||
}
|
}
|
||||||
vectorizeFromLexOrdArray(scalardata,full[v]);
|
vectorizeFromLexOrdArray(scalardata,full[v]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -95,7 +95,7 @@ int main (int argc, char ** argv)
|
|||||||
FermionField tmp(FGrid);
|
FermionField tmp(FGrid);
|
||||||
|
|
||||||
for(int s=0;s<nrhs;s++) result[s]=zero;
|
for(int s=0;s<nrhs;s++) result[s]=zero;
|
||||||
#undef LEXICO_TEST
|
#define LEXICO_TEST
|
||||||
#ifdef LEXICO_TEST
|
#ifdef LEXICO_TEST
|
||||||
{
|
{
|
||||||
LatticeFermion lex(FGrid); lex = zero;
|
LatticeFermion lex(FGrid); lex = zero;
|
||||||
@ -121,12 +121,12 @@ int main (int argc, char ** argv)
|
|||||||
random(pRNG5,src[s]);
|
random(pRNG5,src[s]);
|
||||||
tmp = 100.0*s;
|
tmp = 100.0*s;
|
||||||
src[s] = (src[s] * 0.1) + tmp;
|
src[s] = (src[s] * 0.1) + tmp;
|
||||||
std::cout << " src ]"<<s<<"] "<<norm2(src[s])<<std::endl;
|
std::cout << GridLogMessage << " src ["<<s<<"] "<<norm2(src[s])<<std::endl;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
for(int n =0 ; n< nrhs ; n++) {
|
for(int n =0 ; n< nrhs ; n++) {
|
||||||
std::cout << " src"<<n<<"\n"<< src[n] <<std::endl;
|
// std::cout << " src"<<n<<"\n"<< src[n] <<std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(pRNG,Umu);
|
LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(pRNG,Umu);
|
||||||
@ -144,8 +144,8 @@ int main (int argc, char ** argv)
|
|||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
Grid_split (Umu,s_Umu);
|
Grid_split (Umu,s_Umu);
|
||||||
Grid_split (src,s_src);
|
Grid_split (src,s_src);
|
||||||
std::cout << " split rank " <<me << " s_src "<<norm2(s_src)<<std::endl;
|
std::cout << GridLogMessage << " split rank " <<me << " s_src "<<norm2(s_src)<<std::endl;
|
||||||
std::cout << " s_src\n "<< s_src <<std::endl;
|
// std::cout << " s_src\n "<< s_src <<std::endl;
|
||||||
|
|
||||||
#ifdef LEXICO_TEST
|
#ifdef LEXICO_TEST
|
||||||
FermionField s_src_tmp(SFGrid);
|
FermionField s_src_tmp(SFGrid);
|
||||||
@ -168,11 +168,12 @@ int main (int argc, char ** argv)
|
|||||||
s_src_tmp = s_src_tmp + ftmp;
|
s_src_tmp = s_src_tmp + ftmp;
|
||||||
}
|
}
|
||||||
s_src_diff = s_src_tmp - s_src;
|
s_src_diff = s_src_tmp - s_src;
|
||||||
std::cout << " s_src_diff " << norm2(s_src_diff)<<std::endl;
|
std::cout << GridLogMessage <<" LEXICO test: s_src_diff " << norm2(s_src_diff)<<std::endl;
|
||||||
|
|
||||||
std::cout << " s_src \n" << s_src << std::endl;
|
// std::cout << " s_src \n" << s_src << std::endl;
|
||||||
std::cout << " s_src_tmp \n" << s_src_tmp << std::endl;
|
// std::cout << " s_src_tmp \n" << s_src_tmp << std::endl;
|
||||||
std::cout << " s_src_diff \n" << s_src_diff << std::endl;
|
// std::cout << " s_src_diff \n" << s_src_diff << std::endl;
|
||||||
|
// exit(0);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
@ -189,11 +190,11 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
MdagMLinearOperator<DomainWallFermionR,FermionField> HermOp(Ddwf);
|
MdagMLinearOperator<DomainWallFermionR,FermionField> HermOp(Ddwf);
|
||||||
MdagMLinearOperator<DomainWallFermionR,FermionField> HermOpCk(Dchk);
|
MdagMLinearOperator<DomainWallFermionR,FermionField> HermOpCk(Dchk);
|
||||||
ConjugateGradient<FermionField> CG((1.0e-5),10000);
|
ConjugateGradient<FermionField> CG((1.0e-2),10000);
|
||||||
s_res = zero;
|
s_res = zero;
|
||||||
CG(HermOp,s_src,s_res);
|
CG(HermOp,s_src,s_res);
|
||||||
|
|
||||||
std::cout << " s_res norm "<<norm2(s_res)<<std::endl;
|
std::cout << GridLogMessage << " split residual norm "<<norm2(s_res)<<std::endl;
|
||||||
/////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////
|
||||||
// Report how long they all took
|
// Report how long they all took
|
||||||
/////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////
|
||||||
@ -214,7 +215,7 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
std::cout << GridLogMessage<< "Checking the residuals"<<std::endl;
|
std::cout << GridLogMessage<< "Checking the residuals"<<std::endl;
|
||||||
for(int n=0;n<nrhs;n++){
|
for(int n=0;n<nrhs;n++){
|
||||||
std::cout << " res["<<n<<"] norm "<<norm2(result[n])<<std::endl;
|
std::cout << GridLogMessage<< " res["<<n<<"] norm "<<norm2(result[n])<<std::endl;
|
||||||
HermOpCk.HermOp(result[n],tmp); tmp = tmp - src[n];
|
HermOpCk.HermOp(result[n],tmp); tmp = tmp - src[n];
|
||||||
std::cout << GridLogMessage<<" resid["<<n<<"] "<< norm2(tmp)/norm2(src[n])<<std::endl;
|
std::cout << GridLogMessage<<" resid["<<n<<"] "<< norm2(tmp)/norm2(src[n])<<std::endl;
|
||||||
}
|
}
|
||||||
|
@ -70,7 +70,21 @@ int main (int argc, char ** argv)
|
|||||||
ConjugateGradient<FermionField> CG(1.0e-8,10000);
|
ConjugateGradient<FermionField> CG(1.0e-8,10000);
|
||||||
SchurRedBlackStaggeredSolve<FermionField> SchurSolver(CG);
|
SchurRedBlackStaggeredSolve<FermionField> SchurSolver(CG);
|
||||||
|
|
||||||
|
double volume=1.0;
|
||||||
|
for(int mu=0;mu<Nd;mu++){
|
||||||
|
volume=volume*latt_size[mu];
|
||||||
|
}
|
||||||
|
double t1=usecond();
|
||||||
SchurSolver(Ds,src,result);
|
SchurSolver(Ds,src,result);
|
||||||
|
double t2=usecond();
|
||||||
|
|
||||||
|
// Schur solver: uses DeoDoe => volume * 1146
|
||||||
|
double ncall=CG.IterationsToComplete;
|
||||||
|
double flops=(16*(3*(6+8+8)) + 15*3*2)*volume*ncall; // == 66*16 + == 1146
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage << "usec = "<< (t2-t1)<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "flop/s = "<< flops<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t2-t1)<<std::endl;
|
||||||
|
|
||||||
Grid_finalize();
|
Grid_finalize();
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user