1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-04-04 19:25:56 +01:00

Merge branch 'develop' of github.com:paboyle/Grid into develop

This commit is contained in:
Antonin Portelli 2017-12-01 19:44:31 +00:00
commit 2fd4989029
7 changed files with 190 additions and 166 deletions

View File

@ -308,32 +308,34 @@ namespace Grid {
public:
SchurStaggeredOperator (Matrix &Mat): _Mat(Mat){};
virtual void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
GridLogIterative.TimingMode(1);
std::cout << GridLogIterative << " HermOpAndNorm "<<std::endl;
n2 = Mpc(in,out);
std::cout << GridLogIterative << " HermOpAndNorm.Mpc "<<std::endl;
ComplexD dot= innerProduct(in,out);
std::cout << GridLogIterative << " HermOpAndNorm.innerProduct "<<std::endl;
n1 = real(dot);
}
virtual void HermOp(const Field &in, Field &out){
std::cout << GridLogIterative << " HermOp "<<std::endl;
Mpc(in,out);
}
virtual RealD Mpc (const Field &in, Field &out) {
Field tmp(in._grid);
Field tmp2(in._grid);
std::cout << GridLogIterative << " HermOp.Mpc "<<std::endl;
_Mat.Mooee(in,out);
_Mat.Mooee(out,tmp);
std::cout << GridLogIterative << " HermOp.MooeeMooee "<<std::endl;
_Mat.Meooe(in,out);
_Mat.Meooe(out,tmp2);
std::cout << GridLogIterative << " HermOp.MeooeMeooe "<<std::endl;
return axpy_norm(out,-1.0,tmp2,tmp);
#if 0
//... much prefer conventional Schur norm
_Mat.Meooe(in,tmp);
_Mat.MooeeInv(tmp,out);
_Mat.Meooe(out,tmp);
_Mat.Mooee(in,out);
return axpy_norm(out,-1.0,tmp,out);
#endif
RealD nn=axpy_norm(out,-1.0,tmp2,tmp);
std::cout << GridLogIterative << " HermOp.axpy_norm "<<std::endl;
return nn;
}
virtual RealD MpcDag (const Field &in, Field &out){
return Mpc(in,out);

View File

@ -123,11 +123,14 @@ namespace Grid {
Field tmp(grid);
Field Mtmp(grid);
Field resid(fgrid);
std::cout << GridLogMessage << " SchurRedBlackStaggeredSolve " <<std::endl;
pickCheckerboard(Even,src_e,in);
pickCheckerboard(Odd ,src_o,in);
pickCheckerboard(Even,sol_e,out);
pickCheckerboard(Odd ,sol_o,out);
std::cout << GridLogMessage << " SchurRedBlackStaggeredSolve checkerboards picked" <<std::endl;
/////////////////////////////////////////////////////
// src_o = (source_o - Moe MeeInv source_e)
@ -144,6 +147,7 @@ namespace Grid {
//////////////////////////////////////////////////////////////
std::cout<<GridLogMessage << "SchurRedBlackStaggeredSolver calling the Mpc solver" <<std::endl;
_HermitianRBSolver(_HermOpEO,src_o,sol_o); assert(sol_o.checkerboard==Odd);
std::cout<<GridLogMessage << "SchurRedBlackStaggeredSolver called the Mpc solver" <<std::endl;
///////////////////////////////////////////////////
// sol_e = M_ee^-1 * ( src_e - Meo sol_o )...
@ -152,15 +156,16 @@ namespace Grid {
src_e = src_e-tmp; assert( src_e.checkerboard ==Even);
_Matrix.MooeeInv(src_e,sol_e); assert( sol_e.checkerboard ==Even);
std::cout<<GridLogMessage << "SchurRedBlackStaggeredSolver reconstructed other CB" <<std::endl;
setCheckerboard(out,sol_e); assert( sol_e.checkerboard ==Even);
setCheckerboard(out,sol_o); assert( sol_o.checkerboard ==Odd );
std::cout<<GridLogMessage << "SchurRedBlackStaggeredSolver inserted solution" <<std::endl;
// Verify the unprec residual
_Matrix.M(out,resid);
resid = resid-in;
RealD ns = norm2(in);
RealD nr = norm2(resid);
std::cout<<GridLogMessage << "SchurRedBlackStaggered solver true unprec resid "<< std::sqrt(nr/ns) <<" nr "<< nr <<" ns "<<ns << std::endl;
}
};

View File

@ -134,8 +134,18 @@ void CartesianCommunicator::AllToAll(void *in,void *out,uint64_t words,uint64_t
CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,const CartesianCommunicator &parent,int &srank)
{
_ndimension = processors.size();
assert(_ndimension = parent._ndimension);
int parent_ndimension = parent._ndimension; assert(_ndimension >= parent._ndimension);
std::vector<int> parent_processor_coor(_ndimension,0);
std::vector<int> parent_processors (_ndimension,1);
// Can make 5d grid from 4d etc...
int pad = _ndimension-parent_ndimension;
for(int d=0;d<parent_ndimension;d++){
parent_processor_coor[pad+d]=parent._processor_coor[d];
parent_processors [pad+d]=parent._processors[d];
}
//////////////////////////////////////////////////////////////////////////////////////////////////////
// split the communicator
//////////////////////////////////////////////////////////////////////////////////////////////////////
@ -154,9 +164,9 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,
std::vector<int> ssize(_ndimension); // coor of split within parent
for(int d=0;d<_ndimension;d++){
ccoor[d] = parent._processor_coor[d] % processors[d];
scoor[d] = parent._processor_coor[d] / processors[d];
ssize[d] = parent._processors[d] / processors[d];
ccoor[d] = parent_processor_coor[d] % processors[d];
scoor[d] = parent_processor_coor[d] / processors[d];
ssize[d] = parent_processors[d] / processors[d];
}
int crank; // rank within subcomm ; srank is rank of subcomm within blocks of subcomms
// Mpi uses the reverse Lexico convention to us
@ -166,38 +176,36 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,
MPI_Comm comm_split;
if ( Nchild > 1 ) {
/*
std::cout << GridLogMessage<<"Child communicator of "<< std::hex << parent.communicator << std::dec<<std::endl;
std::cout << GridLogMessage<<" parent grid["<< parent._ndimension<<"] ";
for(int d=0;d<parent._processors.size();d++) std::cout << parent._processors[d] << " ";
std::cout<<std::endl;
std::cout << GridLogMessage<<" child grid["<< _ndimension <<"] ";
for(int d=0;d<processors.size();d++) std::cout << processors[d] << " ";
std::cout<<std::endl;
std::cout << GridLogMessage<<" old rank "<< parent._processor<<" coor ["<< _ndimension <<"] ";
for(int d=0;d<processors.size();d++) std::cout << parent._processor_coor[d] << " ";
std::cout<<std::endl;
std::cout << GridLogMessage<<" new rank "<< crank<<" coor ["<< _ndimension <<"] ";
for(int d=0;d<processors.size();d++) std::cout << ccoor[d] << " ";
std::cout<<std::endl;
std::cout << GridLogMessage<<" new coor ["<< _ndimension <<"] ";
for(int d=0;d<processors.size();d++) std::cout << parent._processor_coor[d] << " ";
std::cout<<std::endl;
*/
if(0){
std::cout << GridLogMessage<<"Child communicator of "<< std::hex << parent.communicator << std::dec<<std::endl;
std::cout << GridLogMessage<<" parent grid["<< parent._ndimension<<"] ";
for(int d=0;d<parent._ndimension;d++) std::cout << parent._processors[d] << " ";
std::cout<<std::endl;
std::cout << GridLogMessage<<" child grid["<< _ndimension <<"] ";
for(int d=0;d<processors.size();d++) std::cout << processors[d] << " ";
std::cout<<std::endl;
std::cout << GridLogMessage<<" old rank "<< parent._processor<<" coor ["<< parent._ndimension <<"] ";
for(int d=0;d<parent._ndimension;d++) std::cout << parent._processor_coor[d] << " ";
std::cout<<std::endl;
std::cout << GridLogMessage<<" new split "<< srank<<" scoor ["<< _ndimension <<"] ";
for(int d=0;d<processors.size();d++) std::cout << scoor[d] << " ";
std::cout<<std::endl;
std::cout << GridLogMessage<<" new rank "<< crank<<" coor ["<< _ndimension <<"] ";
for(int d=0;d<processors.size();d++) std::cout << ccoor[d] << " ";
std::cout<<std::endl;
}
int ierr= MPI_Comm_split(parent.communicator,srank,crank,&comm_split);
assert(ierr==0);
//////////////////////////////////////////////////////////////////////////////////////////////////////
// Declare victory
//////////////////////////////////////////////////////////////////////////////////////////////////////
/*
std::cout << GridLogMessage<<"Divided communicator "<< parent._Nprocessors<<" into "
<< Nchild <<" communicators with " << childsize << " ranks"<<std::endl;
*/
// std::cout << GridLogMessage<<"Divided communicator "<< parent._Nprocessors<<" into "
// << Nchild <<" communicators with " << childsize << " ranks"<<std::endl;
} else {
comm_split=parent.communicator;
srank = 0;
@ -207,6 +215,17 @@ CartesianCommunicator::CartesianCommunicator(const std::vector<int> &processors,
// Set up from the new split communicator
//////////////////////////////////////////////////////////////////////////////////////////////////////
InitFromMPICommunicator(processors,comm_split);
if(0){
std::cout << " ndim " <<_ndimension<<" " << parent._ndimension << std::endl;
for(int d=0;d<processors.size();d++){
std::cout << d<< " " << _processor_coor[d] <<" " << ccoor[d]<<std::endl;
}
}
for(int d=0;d<processors.size();d++){
assert(_processor_coor[d] == ccoor[d] );
}
}
//////////////////////////////////////////////////////////////////////////////////////////////////////
@ -231,7 +250,7 @@ void CartesianCommunicator::InitFromMPICommunicator(const std::vector<int> &proc
MPI_Comm_rank(communicator,&_processor);
MPI_Cart_coords(communicator,_processor,_ndimension,&_processor_coor[0]);
if ( communicator_base != communicator_world ) {
if ( 0 && (communicator_base != communicator_world) ) {
std::cout << "InitFromMPICommunicator Cartesian communicator created with a non-world communicator"<<std::endl;
std::cout << " new communicator rank "<<_processor<< " coor ["<<_ndimension<<"] ";

View File

@ -606,7 +606,7 @@ CartesianCommunicator::~CartesianCommunicator()
MPI_Finalized(&MPI_is_finalised);
if (communicator && !MPI_is_finalised) {
MPI_Comm_free(&communicator);
for(int i=0;i< communicator_halo.size();i++){
for(int i=0;i<communicator_halo.size();i++){
MPI_Comm_free(&communicator_halo[i]);
}
}

View File

@ -50,26 +50,22 @@ inline void subdivides(GridBase *coarse,GridBase *fine)
////////////////////////////////////////////////////////////////////////////////////////////
template<class vobj> inline void pickCheckerboard(int cb,Lattice<vobj> &half,const Lattice<vobj> &full){
half.checkerboard = cb;
int ssh=0;
//parallel_for
for(int ss=0;ss<full._grid->oSites();ss++){
std::vector<int> coor;
parallel_for(int ss=0;ss<full._grid->oSites();ss++){
int cbos;
std::vector<int> coor;
full._grid->oCoorFromOindex(coor,ss);
cbos=half._grid->CheckerBoard(coor);
if (cbos==cb) {
int ssh=half._grid->oIndex(coor);
half._odata[ssh] = full._odata[ss];
ssh++;
}
}
}
template<class vobj> inline void setCheckerboard(Lattice<vobj> &full,const Lattice<vobj> &half){
int cb = half.checkerboard;
int ssh=0;
//parallel_for
for(int ss=0;ss<full._grid->oSites();ss++){
parallel_for(int ss=0;ss<full._grid->oSites();ss++){
std::vector<int> coor;
int cbos;
@ -77,8 +73,8 @@ inline void subdivides(GridBase *coarse,GridBase *fine)
cbos=half._grid->CheckerBoard(coor);
if (cbos==cb) {
int ssh=half._grid->oIndex(coor);
full._odata[ss]=half._odata[ssh];
ssh++;
}
}
}
@ -698,30 +694,6 @@ void precisionChange(Lattice<VobjOut> &out, const Lattice<VobjIn> &in){
////////////////////////////////////////////////////////////////////////////////
// Communicate between grids
////////////////////////////////////////////////////////////////////////////////
//
// All to all plan
//
// Subvolume on fine grid is v. Vectors a,b,c,d
//
///////////////////////////////////////////////////////////////////////////////////////////////////////////
// SIMPLEST CASE:
///////////////////////////////////////////////////////////////////////////////////////////////////////////
// Mesh of nodes (2) ; subdivide to 1 subdivisions
//
// Lex ord:
// N0 va0 vb0 N1 va1 vb1
//
// For each dimension do an all to all
//
// full AllToAll(0)
// N0 va0 va1 N1 vb0 vb1
//
// REARRANGE
// N0 va01 N1 vb01
//
// Must also rearrange data to get into the NEW lex order of grid at each stage. Some kind of "insert/extract".
// NB: Easiest to programme if keep in lex order.
//
///////////////////////////////////////////////////////////////////////////////////////////////////////////
// SIMPLE CASE:
///////////////////////////////////////////////////////////////////////////////////////////////////////////
@ -755,9 +727,17 @@ void precisionChange(Lattice<VobjOut> &out, const Lattice<VobjIn> &in){
//
// Must also rearrange data to get into the NEW lex order of grid at each stage. Some kind of "insert/extract".
// NB: Easiest to programme if keep in lex order.
//
/////////////////////////////////////////////////////////
/*
* Let chunk = (fvol*nvec)/sP be size of a chunk. ( Divide lexico vol * nvec into fP/sP = M chunks )
*
* 2nd A2A (over sP nodes; subdivide the fP into sP chunks of M)
*
* node 0 1st chunk of node 0M..(1M-1); 2nd chunk of node 0M..(1M-1).. data chunk x M x sP = fL / sP * M * sP = fL * M growth
* node 1 1st chunk of node 1M..(2M-1); 2nd chunk of node 1M..(2M-1)..
* node 2 1st chunk of node 2M..(3M-1); 2nd chunk of node 2M..(3M-1)..
* node 3 1st chunk of node 3M..(3M-1); 2nd chunk of node 2M..(3M-1)..
* etc...
*/
template<class Vobj>
void Grid_split(std::vector<Lattice<Vobj> > & full,Lattice<Vobj> & split)
{
@ -816,57 +796,58 @@ void Grid_split(std::vector<Lattice<Vobj> > & full,Lattice<Vobj> & split)
int nvec = nvector; // Counts down to 1 as we collapse dims
std::vector<int> ldims = full_grid->_ldimensions;
std::vector<int> lcoor(ndim);
for(int d=ndim-1;d>=0;d--){
if ( ratio[d] != 1 ) {
full_grid ->AllToAll(d,alldata,tmpdata);
// std::cout << GridLogMessage << "Grid_split: dim " <<d<<" ratio "<<ratio[d]<<" nvec "<<nvec<<" procs "<<split_grid->_processors[d]<<std::endl;
// for(int v=0;v<nvec;v++){
// std::cout << "Grid_split: alldata["<<v<<"] " << alldata[v] <<std::endl;
// std::cout << "Grid_split: tmpdata["<<v<<"] " << tmpdata[v] <<std::endl;
// }
//////////////////////////////////////////
//Local volume for this dimension is expanded by ratio of processor extents
// Number of vectors is decreased by same factor
// Rearrange to lexico for bigger volume
//////////////////////////////////////////
nvec /= ratio[d];
if ( split_grid->_processors[d] > 1 ) {
alldata=tmpdata;
split_grid->AllToAll(d,alldata,tmpdata);
}
auto rdims = ldims; rdims[d] *= ratio[d];
auto rsites= lsites*ratio[d];
for(int v=0;v<nvec;v++){
auto rdims = ldims;
auto M = ratio[d];
auto rsites= lsites*M;// increases rsites by M
nvec /= M; // Reduce nvec by subdivision factor
rdims[d] *= M; // increase local dim by same factor
// For loop over each site within old subvol
for(int lsite=0;lsite<lsites;lsite++){
int sP = split_grid->_processors[d];
int fP = full_grid->_processors[d];
Lexicographic::CoorFromIndex(lcoor, lsite, ldims);
int fvol = lsites;
int chunk = (nvec*fvol)/sP; assert(chunk*sP == nvec*fvol);
for(int r=0;r<ratio[d];r++){ // ratio*nvec terms
// Loop over reordered data post A2A
parallel_for(int c=0;c<chunk;c++){
for(int m=0;m<M;m++){
for(int s=0;s<sP;s++){
// addressing; use lexico
int lex_r;
uint64_t lex_c = c+chunk*m+chunk*M*s;
uint64_t lex_fvol_vec = c+chunk*s;
uint64_t lex_fvol = lex_fvol_vec%fvol;
uint64_t lex_vec = lex_fvol_vec/fvol;
auto rcoor = lcoor; rcoor[d] += r*ldims[d];
// which node sets an adder to the coordinate
std::vector<int> coor(ndim);
Lexicographic::CoorFromIndex(coor, lex_fvol, ldims);
coor[d] += m*ldims[d];
Lexicographic::IndexFromCoor(coor, lex_r, rdims);
lex_r += lex_vec * rsites;
int rsite; Lexicographic::IndexFromCoor(rcoor, rsite, rdims);
rsite += v * rsites;
// LexicoFind coordinate & vector number within split lattice
alldata[lex_r] = tmpdata[lex_c];
int rmul=nvec*lsites;
int vmul= lsites;
alldata[rsite] = tmpdata[lsite+r*rmul+v*vmul];
// if ( lsite==0 ) {
// std::cout << "Grid_split: grow alldata["<<rsite<<"] " << alldata[rsite] << " <- tmpdata["<< lsite+r*rmul+v*vmul<<"] "<<tmpdata[lsite+r*rmul+v*vmul] <<std::endl;
// }
}
}
}
ldims[d]*= ratio[d];
lsites *= ratio[d];
if ( split_grid->_processors[d] > 1 ) {
tmpdata = alldata;
split_grid->AllToAll(d,tmpdata,alldata);
}
}
}
vectorizeFromLexOrdArray(alldata,split);
@ -937,59 +918,61 @@ void Grid_unsplit(std::vector<Lattice<Vobj> > & full,Lattice<Vobj> & split)
/////////////////////////////////////////////////////////////////
// Start from split grid and work towards full grid
/////////////////////////////////////////////////////////////////
std::vector<int> lcoor(ndim);
std::vector<int> rcoor(ndim);
int nvec = 1;
lsites = split_grid->lSites();
std::vector<int> ldims = split_grid->_ldimensions;
uint64_t rsites = split_grid->lSites();
std::vector<int> rdims = split_grid->_ldimensions;
// for(int d=ndim-1;d>=0;d--){
for(int d=0;d<ndim;d++){
if ( ratio[d] != 1 ) {
auto M = ratio[d];
if ( split_grid->_processors[d] > 1 ) {
tmpdata = alldata;
split_grid->AllToAll(d,tmpdata,alldata);
}
//////////////////////////////////////////
//Local volume for this dimension is expanded by ratio of processor extents
// Number of vectors is decreased by same factor
// Rearrange to lexico for bigger volume
//////////////////////////////////////////
auto rsites= lsites/ratio[d];
auto rdims = ldims; rdims[d]/=ratio[d];
for(int v=0;v<nvec;v++){
// rsite, rcoor --> smaller local volume
// lsite, lcoor --> bigger original (single node?) volume
// For loop over each site within smaller subvol
for(int rsite=0;rsite<rsites;rsite++){
Lexicographic::CoorFromIndex(rcoor, rsite, rdims);
int lsite;
for(int r=0;r<ratio[d];r++){
lcoor = rcoor; lcoor[d] += r*rdims[d];
Lexicographic::IndexFromCoor(lcoor, lsite, ldims); lsite += v * lsites;
int rmul=nvec*rsites;
int vmul= rsites;
tmpdata[rsite+r*rmul+v*vmul]=alldata[lsite];
int sP = split_grid->_processors[d];
int fP = full_grid->_processors[d];
auto ldims = rdims; ldims[d] /= M; // Decrease local dims by same factor
auto lsites= rsites/M; // Decreases rsites by M
int fvol = lsites;
int chunk = (nvec*fvol)/sP; assert(chunk*sP == nvec*fvol);
{
// Loop over reordered data post A2A
for(int c=0;c<chunk;c++){
for(int m=0;m<M;m++){
for(int s=0;s<sP;s++){
// addressing; use lexico
int lex_r;
uint64_t lex_c = c+chunk*m+chunk*M*s;
uint64_t lex_fvol_vec = c+chunk*s;
uint64_t lex_fvol = lex_fvol_vec%fvol;
uint64_t lex_vec = lex_fvol_vec/fvol;
// which node sets an adder to the coordinate
std::vector<int> coor(ndim);
Lexicographic::CoorFromIndex(coor, lex_fvol, ldims);
coor[d] += m*ldims[d];
Lexicographic::IndexFromCoor(coor, lex_r, rdims);
lex_r += lex_vec * rsites;
// LexicoFind coordinate & vector number within split lattice
tmpdata[lex_c] = alldata[lex_r];
}
}
}
}
nvec *= ratio[d];
ldims[d]=rdims[d];
lsites =rsites;
if ( split_grid->_processors[d] > 1 ) {
split_grid->AllToAll(d,tmpdata,alldata);
tmpdata=alldata;
}
full_grid ->AllToAll(d,tmpdata,alldata);
rdims[d]/= M;
rsites /= M;
nvec *= M; // Increase nvec by subdivision factor
}
}
@ -997,12 +980,12 @@ void Grid_unsplit(std::vector<Lattice<Vobj> > & full,Lattice<Vobj> & split)
for(int v=0;v<nvector;v++){
assert(v<full.size());
parallel_for(int site=0;site<lsites;site++){
assert(v*lsites+site < alldata.size());
scalardata[site] = alldata[v*lsites+site];
}
vectorizeFromLexOrdArray(scalardata,full[v]);
}
}
}
#endif

View File

@ -95,7 +95,7 @@ int main (int argc, char ** argv)
FermionField tmp(FGrid);
for(int s=0;s<nrhs;s++) result[s]=zero;
#undef LEXICO_TEST
#define LEXICO_TEST
#ifdef LEXICO_TEST
{
LatticeFermion lex(FGrid); lex = zero;
@ -121,12 +121,12 @@ int main (int argc, char ** argv)
random(pRNG5,src[s]);
tmp = 100.0*s;
src[s] = (src[s] * 0.1) + tmp;
std::cout << " src ]"<<s<<"] "<<norm2(src[s])<<std::endl;
std::cout << GridLogMessage << " src ["<<s<<"] "<<norm2(src[s])<<std::endl;
}
#endif
for(int n =0 ; n< nrhs ; n++) {
std::cout << " src"<<n<<"\n"<< src[n] <<std::endl;
// std::cout << " src"<<n<<"\n"<< src[n] <<std::endl;
}
LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(pRNG,Umu);
@ -144,8 +144,8 @@ int main (int argc, char ** argv)
///////////////////////////////////////////////////////////////
Grid_split (Umu,s_Umu);
Grid_split (src,s_src);
std::cout << " split rank " <<me << " s_src "<<norm2(s_src)<<std::endl;
std::cout << " s_src\n "<< s_src <<std::endl;
std::cout << GridLogMessage << " split rank " <<me << " s_src "<<norm2(s_src)<<std::endl;
// std::cout << " s_src\n "<< s_src <<std::endl;
#ifdef LEXICO_TEST
FermionField s_src_tmp(SFGrid);
@ -168,11 +168,12 @@ int main (int argc, char ** argv)
s_src_tmp = s_src_tmp + ftmp;
}
s_src_diff = s_src_tmp - s_src;
std::cout << " s_src_diff " << norm2(s_src_diff)<<std::endl;
std::cout << GridLogMessage <<" LEXICO test: s_src_diff " << norm2(s_src_diff)<<std::endl;
std::cout << " s_src \n" << s_src << std::endl;
std::cout << " s_src_tmp \n" << s_src_tmp << std::endl;
std::cout << " s_src_diff \n" << s_src_diff << std::endl;
// std::cout << " s_src \n" << s_src << std::endl;
// std::cout << " s_src_tmp \n" << s_src_tmp << std::endl;
// std::cout << " s_src_diff \n" << s_src_diff << std::endl;
// exit(0);
#endif
///////////////////////////////////////////////////////////////
@ -189,11 +190,11 @@ int main (int argc, char ** argv)
MdagMLinearOperator<DomainWallFermionR,FermionField> HermOp(Ddwf);
MdagMLinearOperator<DomainWallFermionR,FermionField> HermOpCk(Dchk);
ConjugateGradient<FermionField> CG((1.0e-5),10000);
ConjugateGradient<FermionField> CG((1.0e-2),10000);
s_res = zero;
CG(HermOp,s_src,s_res);
std::cout << " s_res norm "<<norm2(s_res)<<std::endl;
std::cout << GridLogMessage << " split residual norm "<<norm2(s_res)<<std::endl;
/////////////////////////////////////////////////////////////
// Report how long they all took
/////////////////////////////////////////////////////////////
@ -214,7 +215,7 @@ int main (int argc, char ** argv)
std::cout << GridLogMessage<< "Checking the residuals"<<std::endl;
for(int n=0;n<nrhs;n++){
std::cout << " res["<<n<<"] norm "<<norm2(result[n])<<std::endl;
std::cout << GridLogMessage<< " res["<<n<<"] norm "<<norm2(result[n])<<std::endl;
HermOpCk.HermOp(result[n],tmp); tmp = tmp - src[n];
std::cout << GridLogMessage<<" resid["<<n<<"] "<< norm2(tmp)/norm2(src[n])<<std::endl;
}

View File

@ -70,7 +70,21 @@ int main (int argc, char ** argv)
ConjugateGradient<FermionField> CG(1.0e-8,10000);
SchurRedBlackStaggeredSolve<FermionField> SchurSolver(CG);
double volume=1.0;
for(int mu=0;mu<Nd;mu++){
volume=volume*latt_size[mu];
}
double t1=usecond();
SchurSolver(Ds,src,result);
double t2=usecond();
// Schur solver: uses DeoDoe => volume * 1146
double ncall=CG.IterationsToComplete;
double flops=(16*(3*(6+8+8)) + 15*3*2)*volume*ncall; // == 66*16 + == 1146
std::cout<<GridLogMessage << "usec = "<< (t2-t1)<<std::endl;
std::cout<<GridLogMessage << "flop/s = "<< flops<<std::endl;
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t2-t1)<<std::endl;
Grid_finalize();
}