1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-07-14 03:57:06 +01:00

Merge branch 'develop' of https://github.com/paboyle/Grid into ic2

This commit is contained in:
Chulwoo Jung
2025-04-25 10:48:41 -04:00
97 changed files with 3435 additions and 257 deletions

View File

@ -191,7 +191,7 @@ public:
Lattice<sobj> pgbuf(&pencil_g); Lattice<sobj> pgbuf(&pencil_g);
autoView(pgbuf_v , pgbuf, CpuWrite); autoView(pgbuf_v , pgbuf, CpuWrite);
std::cout << "CPU view" << std::endl; //std::cout << "CPU view" << std::endl;
typedef typename FFTW<scalar>::FFTW_scalar FFTW_scalar; typedef typename FFTW<scalar>::FFTW_scalar FFTW_scalar;
typedef typename FFTW<scalar>::FFTW_plan FFTW_plan; typedef typename FFTW<scalar>::FFTW_plan FFTW_plan;
@ -215,7 +215,7 @@ public:
else if ( sign == forward ) div = 1.0; else if ( sign == forward ) div = 1.0;
else assert(0); else assert(0);
std::cout << GridLogPerformance<<"Making FFTW plan" << std::endl; //std::cout << GridLogPerformance<<"Making FFTW plan" << std::endl;
FFTW_plan p; FFTW_plan p;
{ {
FFTW_scalar *in = (FFTW_scalar *)&pgbuf_v[0]; FFTW_scalar *in = (FFTW_scalar *)&pgbuf_v[0];
@ -229,7 +229,7 @@ public:
} }
// Barrel shift and collect global pencil // Barrel shift and collect global pencil
std::cout << GridLogPerformance<<"Making pencil" << std::endl; //std::cout << GridLogPerformance<<"Making pencil" << std::endl;
Coordinate lcoor(Nd), gcoor(Nd); Coordinate lcoor(Nd), gcoor(Nd);
result = source; result = source;
int pc = processor_coor[dim]; int pc = processor_coor[dim];
@ -251,7 +251,7 @@ public:
} }
} }
std::cout <<GridLogPerformance<< "Looping orthog" << std::endl; //std::cout <<GridLogPerformance<< "Looping orthog" << std::endl;
// Loop over orthog coords // Loop over orthog coords
int NN=pencil_g.lSites(); int NN=pencil_g.lSites();
GridStopWatch timer; GridStopWatch timer;
@ -274,7 +274,7 @@ public:
usec += timer.useconds(); usec += timer.useconds();
flops+= flops_call*NN; flops+= flops_call*NN;
std::cout <<GridLogPerformance<< "Writing back results " << std::endl; //std::cout <<GridLogPerformance<< "Writing back results " << std::endl;
// writing out result // writing out result
{ {
autoView(pgbuf_v,pgbuf,CpuRead); autoView(pgbuf_v,pgbuf,CpuRead);
@ -291,7 +291,7 @@ public:
} }
result = result*div; result = result*div;
std::cout <<GridLogPerformance<< "Destroying plan " << std::endl; //std::cout <<GridLogPerformance<< "Destroying plan " << std::endl;
// destroying plan // destroying plan
FFTW<scalar>::fftw_destroy_plan(p); FFTW<scalar>::fftw_destroy_plan(p);
#endif #endif

View File

@ -277,6 +277,38 @@ public:
assert(0); assert(0);
} }
}; };
template<class Matrix,class Field>
class ShiftedNonHermitianLinearOperator : public LinearOperatorBase<Field> {
Matrix &_Mat;
RealD shift;
public:
ShiftedNonHermitianLinearOperator(Matrix &Mat,RealD shft): _Mat(Mat),shift(shft){};
// Support for coarsening to a multigrid
void OpDiag (const Field &in, Field &out) {
_Mat.Mdiag(in,out);
out = out + shift*in;
}
void OpDir (const Field &in, Field &out,int dir,int disp) {
_Mat.Mdir(in,out,dir,disp);
}
void OpDirAll (const Field &in, std::vector<Field> &out){
_Mat.MdirAll(in,out);
};
void Op (const Field &in, Field &out){
_Mat.M(in,out);
out = out + shift * in;
}
void AdjOp (const Field &in, Field &out){
_Mat.Mdag(in,out);
out = out + shift * in;
}
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
assert(0);
}
void HermOp(const Field &in, Field &out){
assert(0);
}
};
////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////
// Even Odd Schur decomp operators; there are several // Even Odd Schur decomp operators; there are several

View File

@ -208,8 +208,8 @@ public:
assert(Bkn.size()==batchCount); assert(Bkn.size()==batchCount);
assert(Cmn.size()==batchCount); assert(Cmn.size()==batchCount);
assert(OpA!=GridBLAS_OP_T); // Complex case expect no transpose //assert(OpA!=GridBLAS_OP_T); // Complex case expect no transpose
assert(OpB!=GridBLAS_OP_T); //assert(OpB!=GridBLAS_OP_T);
int lda = m; // m x k column major int lda = m; // m x k column major
int ldb = k; // k x n column major int ldb = k; // k x n column major
@ -367,28 +367,67 @@ public:
Eigen::Map<Eigen::MatrixXcd> eAmk(Amk[p],m,k); Eigen::Map<Eigen::MatrixXcd> eAmk(Amk[p],m,k);
Eigen::Map<Eigen::MatrixXcd> eBkn(Bkn[p],k,n); Eigen::Map<Eigen::MatrixXcd> eBkn(Bkn[p],k,n);
Eigen::Map<Eigen::MatrixXcd> eCmn(Cmn[p],m,n); Eigen::Map<Eigen::MatrixXcd> eCmn(Cmn[p],m,n);
if (std::abs(beta) != 0.0)
eCmn = beta * eCmn + alpha * eAmk * eBkn ; eCmn = beta * eCmn + alpha * eAmk * eBkn ;
else
eCmn = alpha * eAmk * eBkn ;
}); });
} else if ( (OpA == GridBLAS_OP_C ) && (OpB == GridBLAS_OP_N) ) { } else if ( (OpA == GridBLAS_OP_C ) && (OpB == GridBLAS_OP_N) ) {
thread_for (p, batchCount, { thread_for (p, batchCount, {
Eigen::Map<Eigen::MatrixXcd> eAmk(Amk[p],k,m); Eigen::Map<Eigen::MatrixXcd> eAmk(Amk[p],k,m);
Eigen::Map<Eigen::MatrixXcd> eBkn(Bkn[p],k,n); Eigen::Map<Eigen::MatrixXcd> eBkn(Bkn[p],k,n);
Eigen::Map<Eigen::MatrixXcd> eCmn(Cmn[p],m,n); Eigen::Map<Eigen::MatrixXcd> eCmn(Cmn[p],m,n);
if (std::abs(beta) != 0.0)
eCmn = beta * eCmn + alpha * eAmk.adjoint() * eBkn ; eCmn = beta * eCmn + alpha * eAmk.adjoint() * eBkn ;
else
eCmn = alpha * eAmk.adjoint() * eBkn ;
});
} else if ( (OpA == GridBLAS_OP_T ) && (OpB == GridBLAS_OP_N) ) {
thread_for (p, batchCount, {
Eigen::Map<Eigen::MatrixXcd> eAmk(Amk[p],k,m);
Eigen::Map<Eigen::MatrixXcd> eBkn(Bkn[p],k,n);
Eigen::Map<Eigen::MatrixXcd> eCmn(Cmn[p],m,n);
if (std::abs(beta) != 0.0)
eCmn = beta * eCmn + alpha * eAmk.transpose() * eBkn ;
else
eCmn = alpha * eAmk.transpose() * eBkn ;
}); });
} else if ( (OpA == GridBLAS_OP_N ) && (OpB == GridBLAS_OP_C) ) { } else if ( (OpA == GridBLAS_OP_N ) && (OpB == GridBLAS_OP_C) ) {
thread_for (p, batchCount, { thread_for (p, batchCount, {
Eigen::Map<Eigen::MatrixXcd> eAmk(Amk[p],m,k); Eigen::Map<Eigen::MatrixXcd> eAmk(Amk[p],m,k);
Eigen::Map<Eigen::MatrixXcd> eBkn(Bkn[p],n,k); Eigen::Map<Eigen::MatrixXcd> eBkn(Bkn[p],n,k);
Eigen::Map<Eigen::MatrixXcd> eCmn(Cmn[p],m,n); Eigen::Map<Eigen::MatrixXcd> eCmn(Cmn[p],m,n);
if (std::abs(beta) != 0.0)
eCmn = beta * eCmn + alpha * eAmk * eBkn.adjoint() ; eCmn = beta * eCmn + alpha * eAmk * eBkn.adjoint() ;
else
eCmn = alpha * eAmk * eBkn.adjoint() ;
});
} else if ( (OpA == GridBLAS_OP_N ) && (OpB == GridBLAS_OP_T) ) {
thread_for (p, batchCount, {
Eigen::Map<Eigen::MatrixXcd> eAmk(Amk[p],m,k);
Eigen::Map<Eigen::MatrixXcd> eBkn(Bkn[p],n,k);
Eigen::Map<Eigen::MatrixXcd> eCmn(Cmn[p],m,n);
eCmn = beta * eCmn + alpha * eAmk * eBkn.transpose() ;
}); });
} else if ( (OpA == GridBLAS_OP_C ) && (OpB == GridBLAS_OP_C) ) { } else if ( (OpA == GridBLAS_OP_C ) && (OpB == GridBLAS_OP_C) ) {
thread_for (p, batchCount, { thread_for (p, batchCount, {
Eigen::Map<Eigen::MatrixXcd> eAmk(Amk[p],k,m); Eigen::Map<Eigen::MatrixXcd> eAmk(Amk[p],k,m);
Eigen::Map<Eigen::MatrixXcd> eBkn(Bkn[p],n,k); Eigen::Map<Eigen::MatrixXcd> eBkn(Bkn[p],n,k);
Eigen::Map<Eigen::MatrixXcd> eCmn(Cmn[p],m,n); Eigen::Map<Eigen::MatrixXcd> eCmn(Cmn[p],m,n);
if (std::abs(beta) != 0.0)
eCmn = beta * eCmn + alpha * eAmk.adjoint() * eBkn.adjoint() ; eCmn = beta * eCmn + alpha * eAmk.adjoint() * eBkn.adjoint() ;
else
eCmn = alpha * eAmk.adjoint() * eBkn.adjoint() ;
} );
} else if ( (OpA == GridBLAS_OP_T ) && (OpB == GridBLAS_OP_T) ) {
thread_for (p, batchCount, {
Eigen::Map<Eigen::MatrixXcd> eAmk(Amk[p],k,m);
Eigen::Map<Eigen::MatrixXcd> eBkn(Bkn[p],n,k);
Eigen::Map<Eigen::MatrixXcd> eCmn(Cmn[p],m,n);
if (std::abs(beta) != 0.0)
eCmn = beta * eCmn + alpha * eAmk.transpose() * eBkn.transpose() ;
else
eCmn = alpha * eAmk.transpose() * eBkn.transpose() ;
} ); } );
} else { } else {
assert(0); assert(0);
@ -414,8 +453,8 @@ public:
RealD t2=usecond(); RealD t2=usecond();
int32_t batchCount = Amk.size(); int32_t batchCount = Amk.size();
assert(OpA!=GridBLAS_OP_T); // Complex case expect no transpose //assert(OpA!=GridBLAS_OP_T); // Complex case expect no transpose
assert(OpB!=GridBLAS_OP_T); //assert(OpB!=GridBLAS_OP_T);
int lda = m; // m x k column major int lda = m; // m x k column major
int ldb = k; // k x n column major int ldb = k; // k x n column major
@ -514,28 +553,70 @@ public:
Eigen::Map<Eigen::MatrixXcf> eAmk(Amk[p],m,k); Eigen::Map<Eigen::MatrixXcf> eAmk(Amk[p],m,k);
Eigen::Map<Eigen::MatrixXcf> eBkn(Bkn[p],k,n); Eigen::Map<Eigen::MatrixXcf> eBkn(Bkn[p],k,n);
Eigen::Map<Eigen::MatrixXcf> eCmn(Cmn[p],m,n); Eigen::Map<Eigen::MatrixXcf> eCmn(Cmn[p],m,n);
if (std::abs(beta) != 0.0)
eCmn = beta * eCmn + alpha * eAmk * eBkn ; eCmn = beta * eCmn + alpha * eAmk * eBkn ;
else
eCmn = alpha * eAmk * eBkn ;
}); });
} else if ( (OpA == GridBLAS_OP_C ) && (OpB == GridBLAS_OP_N) ) { } else if ( (OpA == GridBLAS_OP_C ) && (OpB == GridBLAS_OP_N) ) {
thread_for (p, batchCount, { thread_for (p, batchCount, {
Eigen::Map<Eigen::MatrixXcf> eAmk(Amk[p],k,m); Eigen::Map<Eigen::MatrixXcf> eAmk(Amk[p],k,m);
Eigen::Map<Eigen::MatrixXcf> eBkn(Bkn[p],k,n); Eigen::Map<Eigen::MatrixXcf> eBkn(Bkn[p],k,n);
Eigen::Map<Eigen::MatrixXcf> eCmn(Cmn[p],m,n); Eigen::Map<Eigen::MatrixXcf> eCmn(Cmn[p],m,n);
if (std::abs(beta) != 0.0)
eCmn = beta * eCmn + alpha * eAmk.adjoint() * eBkn ; eCmn = beta * eCmn + alpha * eAmk.adjoint() * eBkn ;
else
eCmn = alpha * eAmk.adjoint() * eBkn ;
});
} else if ( (OpA == GridBLAS_OP_T ) && (OpB == GridBLAS_OP_N) ) {
thread_for (p, batchCount, {
Eigen::Map<Eigen::MatrixXcf> eAmk(Amk[p],k,m);
Eigen::Map<Eigen::MatrixXcf> eBkn(Bkn[p],k,n);
Eigen::Map<Eigen::MatrixXcf> eCmn(Cmn[p],m,n);
if (std::abs(beta) != 0.0)
eCmn = beta * eCmn + alpha * eAmk.transpose() * eBkn ;
else
eCmn = alpha * eAmk.transpose() * eBkn ;
}); });
} else if ( (OpA == GridBLAS_OP_N ) && (OpB == GridBLAS_OP_C) ) { } else if ( (OpA == GridBLAS_OP_N ) && (OpB == GridBLAS_OP_C) ) {
thread_for (p, batchCount, { thread_for (p, batchCount, {
Eigen::Map<Eigen::MatrixXcf> eAmk(Amk[p],m,k); Eigen::Map<Eigen::MatrixXcf> eAmk(Amk[p],m,k);
Eigen::Map<Eigen::MatrixXcf> eBkn(Bkn[p],n,k); Eigen::Map<Eigen::MatrixXcf> eBkn(Bkn[p],n,k);
Eigen::Map<Eigen::MatrixXcf> eCmn(Cmn[p],m,n); Eigen::Map<Eigen::MatrixXcf> eCmn(Cmn[p],m,n);
if (std::abs(beta) != 0.0)
eCmn = beta * eCmn + alpha * eAmk * eBkn.adjoint() ; eCmn = beta * eCmn + alpha * eAmk * eBkn.adjoint() ;
else
eCmn = alpha * eAmk * eBkn.adjoint() ;
});
} else if ( (OpA == GridBLAS_OP_N ) && (OpB == GridBLAS_OP_T) ) {
thread_for (p, batchCount, {
Eigen::Map<Eigen::MatrixXcf> eAmk(Amk[p],m,k);
Eigen::Map<Eigen::MatrixXcf> eBkn(Bkn[p],n,k);
Eigen::Map<Eigen::MatrixXcf> eCmn(Cmn[p],m,n);
if (std::abs(beta) != 0.0)
eCmn = beta * eCmn + alpha * eAmk * eBkn.transpose() ;
else
eCmn = alpha * eAmk * eBkn.transpose() ;
}); });
} else if ( (OpA == GridBLAS_OP_C ) && (OpB == GridBLAS_OP_C) ) { } else if ( (OpA == GridBLAS_OP_C ) && (OpB == GridBLAS_OP_C) ) {
thread_for (p, batchCount, { thread_for (p, batchCount, {
Eigen::Map<Eigen::MatrixXcf> eAmk(Amk[p],k,m); Eigen::Map<Eigen::MatrixXcf> eAmk(Amk[p],k,m);
Eigen::Map<Eigen::MatrixXcf> eBkn(Bkn[p],n,k); Eigen::Map<Eigen::MatrixXcf> eBkn(Bkn[p],n,k);
Eigen::Map<Eigen::MatrixXcf> eCmn(Cmn[p],m,n); Eigen::Map<Eigen::MatrixXcf> eCmn(Cmn[p],m,n);
if (std::abs(beta) != 0.0)
eCmn = beta * eCmn + alpha * eAmk.adjoint() * eBkn.adjoint() ; eCmn = beta * eCmn + alpha * eAmk.adjoint() * eBkn.adjoint() ;
else
eCmn = alpha * eAmk.adjoint() * eBkn.adjoint() ;
} );
} else if ( (OpA == GridBLAS_OP_T ) && (OpB == GridBLAS_OP_T) ) {
thread_for (p, batchCount, {
Eigen::Map<Eigen::MatrixXcf> eAmk(Amk[p],k,m);
Eigen::Map<Eigen::MatrixXcf> eBkn(Bkn[p],n,k);
Eigen::Map<Eigen::MatrixXcf> eCmn(Cmn[p],m,n);
if (std::abs(beta) != 0.0)
eCmn = beta * eCmn + alpha * eAmk.transpose() * eBkn.transpose() ;
else
eCmn = alpha * eAmk.transpose() * eBkn.transpose() ;
} ); } );
} else { } else {
assert(0); assert(0);
@ -661,28 +742,40 @@ public:
Eigen::Map<Eigen::MatrixXf> eAmk(Amk[p],m,k); Eigen::Map<Eigen::MatrixXf> eAmk(Amk[p],m,k);
Eigen::Map<Eigen::MatrixXf> eBkn(Bkn[p],k,n); Eigen::Map<Eigen::MatrixXf> eBkn(Bkn[p],k,n);
Eigen::Map<Eigen::MatrixXf> eCmn(Cmn[p],m,n); Eigen::Map<Eigen::MatrixXf> eCmn(Cmn[p],m,n);
if (std::abs(beta) != 0.0)
eCmn = beta * eCmn + alpha * eAmk * eBkn ; eCmn = beta * eCmn + alpha * eAmk * eBkn ;
else
eCmn = alpha * eAmk * eBkn ;
}); });
} else if ( (OpA == GridBLAS_OP_T ) && (OpB == GridBLAS_OP_N) ) { } else if ( (OpA == GridBLAS_OP_T ) && (OpB == GridBLAS_OP_N) ) {
thread_for (p, batchCount, { thread_for (p, batchCount, {
Eigen::Map<Eigen::MatrixXf> eAmk(Amk[p],k,m); Eigen::Map<Eigen::MatrixXf> eAmk(Amk[p],k,m);
Eigen::Map<Eigen::MatrixXf> eBkn(Bkn[p],k,n); Eigen::Map<Eigen::MatrixXf> eBkn(Bkn[p],k,n);
Eigen::Map<Eigen::MatrixXf> eCmn(Cmn[p],m,n); Eigen::Map<Eigen::MatrixXf> eCmn(Cmn[p],m,n);
if (std::abs(beta) != 0.0)
eCmn = beta * eCmn + alpha * eAmk.transpose() * eBkn ; eCmn = beta * eCmn + alpha * eAmk.transpose() * eBkn ;
else
eCmn = alpha * eAmk.transpose() * eBkn ;
}); });
} else if ( (OpA == GridBLAS_OP_N ) && (OpB == GridBLAS_OP_T) ) { } else if ( (OpA == GridBLAS_OP_N ) && (OpB == GridBLAS_OP_T) ) {
thread_for (p, batchCount, { thread_for (p, batchCount, {
Eigen::Map<Eigen::MatrixXf> eAmk(Amk[p],m,k); Eigen::Map<Eigen::MatrixXf> eAmk(Amk[p],m,k);
Eigen::Map<Eigen::MatrixXf> eBkn(Bkn[p],n,k); Eigen::Map<Eigen::MatrixXf> eBkn(Bkn[p],n,k);
Eigen::Map<Eigen::MatrixXf> eCmn(Cmn[p],m,n); Eigen::Map<Eigen::MatrixXf> eCmn(Cmn[p],m,n);
if (std::abs(beta) != 0.0)
eCmn = beta * eCmn + alpha * eAmk * eBkn.transpose() ; eCmn = beta * eCmn + alpha * eAmk * eBkn.transpose() ;
else
eCmn = alpha * eAmk * eBkn.transpose() ;
}); });
} else if ( (OpA == GridBLAS_OP_T ) && (OpB == GridBLAS_OP_T) ) { } else if ( (OpA == GridBLAS_OP_T ) && (OpB == GridBLAS_OP_T) ) {
thread_for (p, batchCount, { thread_for (p, batchCount, {
Eigen::Map<Eigen::MatrixXf> eAmk(Amk[p],k,m); Eigen::Map<Eigen::MatrixXf> eAmk(Amk[p],k,m);
Eigen::Map<Eigen::MatrixXf> eBkn(Bkn[p],n,k); Eigen::Map<Eigen::MatrixXf> eBkn(Bkn[p],n,k);
Eigen::Map<Eigen::MatrixXf> eCmn(Cmn[p],m,n); Eigen::Map<Eigen::MatrixXf> eCmn(Cmn[p],m,n);
if (std::abs(beta) != 0.0)
eCmn = beta * eCmn + alpha * eAmk.transpose() * eBkn.transpose() ; eCmn = beta * eCmn + alpha * eAmk.transpose() * eBkn.transpose() ;
else
eCmn = alpha * eAmk.transpose() * eBkn.transpose() ;
}); });
} else { } else {
assert(0); assert(0);
@ -809,28 +902,40 @@ public:
Eigen::Map<Eigen::MatrixXd> eAmk(Amk[p],m,k); Eigen::Map<Eigen::MatrixXd> eAmk(Amk[p],m,k);
Eigen::Map<Eigen::MatrixXd> eBkn(Bkn[p],k,n); Eigen::Map<Eigen::MatrixXd> eBkn(Bkn[p],k,n);
Eigen::Map<Eigen::MatrixXd> eCmn(Cmn[p],m,n); Eigen::Map<Eigen::MatrixXd> eCmn(Cmn[p],m,n);
if (std::abs(beta) != 0.0)
eCmn = beta * eCmn + alpha * eAmk * eBkn ; eCmn = beta * eCmn + alpha * eAmk * eBkn ;
else
eCmn = alpha * eAmk * eBkn ;
}); });
} else if ( (OpA == GridBLAS_OP_T ) && (OpB == GridBLAS_OP_N) ) { } else if ( (OpA == GridBLAS_OP_T ) && (OpB == GridBLAS_OP_N) ) {
thread_for (p, batchCount, { thread_for (p, batchCount, {
Eigen::Map<Eigen::MatrixXd> eAmk(Amk[p],k,m); Eigen::Map<Eigen::MatrixXd> eAmk(Amk[p],k,m);
Eigen::Map<Eigen::MatrixXd> eBkn(Bkn[p],k,n); Eigen::Map<Eigen::MatrixXd> eBkn(Bkn[p],k,n);
Eigen::Map<Eigen::MatrixXd> eCmn(Cmn[p],m,n); Eigen::Map<Eigen::MatrixXd> eCmn(Cmn[p],m,n);
if (std::abs(beta) != 0.0)
eCmn = beta * eCmn + alpha * eAmk.transpose() * eBkn ; eCmn = beta * eCmn + alpha * eAmk.transpose() * eBkn ;
else
eCmn = alpha * eAmk.transpose() * eBkn ;
}); });
} else if ( (OpA == GridBLAS_OP_N ) && (OpB == GridBLAS_OP_T) ) { } else if ( (OpA == GridBLAS_OP_N ) && (OpB == GridBLAS_OP_T) ) {
thread_for (p, batchCount, { thread_for (p, batchCount, {
Eigen::Map<Eigen::MatrixXd> eAmk(Amk[p],m,k); Eigen::Map<Eigen::MatrixXd> eAmk(Amk[p],m,k);
Eigen::Map<Eigen::MatrixXd> eBkn(Bkn[p],n,k); Eigen::Map<Eigen::MatrixXd> eBkn(Bkn[p],n,k);
Eigen::Map<Eigen::MatrixXd> eCmn(Cmn[p],m,n); Eigen::Map<Eigen::MatrixXd> eCmn(Cmn[p],m,n);
if (std::abs(beta) != 0.0)
eCmn = beta * eCmn + alpha * eAmk * eBkn.transpose() ; eCmn = beta * eCmn + alpha * eAmk * eBkn.transpose() ;
else
eCmn = alpha * eAmk * eBkn.transpose() ;
}); });
} else if ( (OpA == GridBLAS_OP_T ) && (OpB == GridBLAS_OP_T) ) { } else if ( (OpA == GridBLAS_OP_T ) && (OpB == GridBLAS_OP_T) ) {
thread_for (p, batchCount, { thread_for (p, batchCount, {
Eigen::Map<Eigen::MatrixXd> eAmk(Amk[p],k,m); Eigen::Map<Eigen::MatrixXd> eAmk(Amk[p],k,m);
Eigen::Map<Eigen::MatrixXd> eBkn(Bkn[p],n,k); Eigen::Map<Eigen::MatrixXd> eBkn(Bkn[p],n,k);
Eigen::Map<Eigen::MatrixXd> eCmn(Cmn[p],m,n); Eigen::Map<Eigen::MatrixXd> eCmn(Cmn[p],m,n);
if (std::abs(beta) != 0.0)
eCmn = beta * eCmn + alpha * eAmk.transpose() * eBkn.transpose() ; eCmn = beta * eCmn + alpha * eAmk.transpose() * eBkn.transpose() ;
else
eCmn = alpha * eAmk.transpose() * eBkn.transpose() ;
}); });
} else { } else {
assert(0); assert(0);

View File

@ -245,9 +245,10 @@ until convergence
_HermOp(src_n,tmp); _HermOp(src_n,tmp);
// std::cout << GridLogMessage<< tmp<<std::endl; exit(0); // std::cout << GridLogMessage<< tmp<<std::endl; exit(0);
// std::cout << GridLogIRL << " _HermOp " << norm2(tmp) << std::endl; // std::cout << GridLogIRL << " _HermOp " << norm2(tmp) << std::endl;
RealD vnum = real(innerProduct(src_n,tmp)); // HermOp. // RealD vnum = real(innerProduct(src_n,tmp)); // HermOp.
RealD vnum = real(innerProduct(tmp,tmp)); // HermOp^2.
RealD vden = norm2(src_n); RealD vden = norm2(src_n);
RealD na = vnum/vden; RealD na = std::sqrt(vnum/vden);
if (fabs(evalMaxApprox/na - 1.0) < 0.0001) if (fabs(evalMaxApprox/na - 1.0) < 0.0001)
i=_MAX_ITER_IRL_MEVAPP_; i=_MAX_ITER_IRL_MEVAPP_;
evalMaxApprox = na; evalMaxApprox = na;
@ -255,6 +256,7 @@ until convergence
src_n = tmp; src_n = tmp;
} }
} }
std::cout << GridLogIRL << " Final evalMaxApprox " << evalMaxApprox << std::endl;
std::vector<RealD> lme(Nm); std::vector<RealD> lme(Nm);
std::vector<RealD> lme2(Nm); std::vector<RealD> lme2(Nm);

View File

@ -97,7 +97,7 @@ public:
RealD scale; RealD scale;
ConjugateGradient<FineField> CG(1.0e-2,100,false); ConjugateGradient<FineField> CG(1.0e-3,400,false);
FineField noise(FineGrid); FineField noise(FineGrid);
FineField Mn(FineGrid); FineField Mn(FineGrid);
@ -110,7 +110,7 @@ public:
hermop.Op(noise,Mn); std::cout<<GridLogMessage << "noise ["<<b<<"] <n|MdagM|n> "<<norm2(Mn)<<std::endl; hermop.Op(noise,Mn); std::cout<<GridLogMessage << "noise ["<<b<<"] <n|MdagM|n> "<<norm2(Mn)<<std::endl;
for(int i=0;i<1;i++){ for(int i=0;i<4;i++){
CG(hermop,noise,subspace[b]); CG(hermop,noise,subspace[b]);
@ -146,7 +146,7 @@ public:
DiracOp.Op(noise,Mn); std::cout<<GridLogMessage << "noise ["<<b<<"] <n|Op|n> "<<innerProduct(noise,Mn)<<std::endl; DiracOp.Op(noise,Mn); std::cout<<GridLogMessage << "noise ["<<b<<"] <n|Op|n> "<<innerProduct(noise,Mn)<<std::endl;
for(int i=0;i<3;i++){ for(int i=0;i<2;i++){
// void operator() (const Field &src, Field &psi){ // void operator() (const Field &src, Field &psi){
#if 1 #if 1
std::cout << GridLogMessage << " inverting on noise "<<std::endl; std::cout << GridLogMessage << " inverting on noise "<<std::endl;

View File

@ -441,8 +441,20 @@ public:
std::cout << GridLogMessage<<"CoarsenOperator inv "<<tinv<<" us"<<std::endl; std::cout << GridLogMessage<<"CoarsenOperator inv "<<tinv<<" us"<<std::endl;
} }
#else #else
//////////////////////////////////////////////////////////////////////
// Galerkin projection of matrix
//////////////////////////////////////////////////////////////////////
void CoarsenOperator(LinearOperatorBase<Lattice<Fobj> > &linop, void CoarsenOperator(LinearOperatorBase<Lattice<Fobj> > &linop,
Aggregation<Fobj,CComplex,nbasis> & Subspace) Aggregation<Fobj,CComplex,nbasis> & Subspace)
{
CoarsenOperator(linop,Subspace,Subspace);
}
//////////////////////////////////////////////////////////////////////
// Petrov - Galerkin projection of matrix
//////////////////////////////////////////////////////////////////////
void CoarsenOperator(LinearOperatorBase<Lattice<Fobj> > &linop,
Aggregation<Fobj,CComplex,nbasis> & U,
Aggregation<Fobj,CComplex,nbasis> & V)
{ {
std::cout << GridLogMessage<< "GeneralCoarsenMatrix "<< std::endl; std::cout << GridLogMessage<< "GeneralCoarsenMatrix "<< std::endl;
GridBase *grid = FineGrid(); GridBase *grid = FineGrid();
@ -458,11 +470,9 @@ public:
// Orthogonalise the subblocks over the basis // Orthogonalise the subblocks over the basis
///////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////
CoarseScalar InnerProd(CoarseGrid()); CoarseScalar InnerProd(CoarseGrid());
blockOrthogonalise(InnerProd,Subspace.subspace); blockOrthogonalise(InnerProd,V.subspace);
blockOrthogonalise(InnerProd,U.subspace);
// for(int s=0;s<Subspace.subspace.size();s++){
// std::cout << " subspace norm "<<norm2(Subspace.subspace[s])<<std::endl;
// }
const int npoint = geom.npoint; const int npoint = geom.npoint;
Coordinate clatt = CoarseGrid()->GlobalDimensions(); Coordinate clatt = CoarseGrid()->GlobalDimensions();
@ -542,7 +552,7 @@ public:
std::cout << GridLogMessage<< "CoarsenMatrixColoured vec "<<i<<"/"<<nbasis<< std::endl; std::cout << GridLogMessage<< "CoarsenMatrixColoured vec "<<i<<"/"<<nbasis<< std::endl;
for(int p=0;p<npoint;p++){ // Loop over momenta in npoint for(int p=0;p<npoint;p++){ // Loop over momenta in npoint
tphaseBZ-=usecond(); tphaseBZ-=usecond();
phaV = phaF[p]*Subspace.subspace[i]; phaV = phaF[p]*V.subspace[i];
tphaseBZ+=usecond(); tphaseBZ+=usecond();
///////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////
@ -555,7 +565,7 @@ public:
// std::cout << i << " " <<p << " MphaV "<<norm2(MphaV)<<" "<<norm2(phaV)<<std::endl; // std::cout << i << " " <<p << " MphaV "<<norm2(MphaV)<<" "<<norm2(phaV)<<std::endl;
tproj-=usecond(); tproj-=usecond();
blockProject(coarseInner,MphaV,Subspace.subspace); blockProject(coarseInner,MphaV,U.subspace);
coarseInner = conjugate(pha[p]) * coarseInner; coarseInner = conjugate(pha[p]) * coarseInner;
ComputeProj[p] = coarseInner; ComputeProj[p] = coarseInner;

View File

@ -69,7 +69,7 @@ public:
} }
// FIXME: hack for the copy constructor: it must be avoided to avoid single thread loop // FIXME: hack for the copy constructor: it must be avoided to avoid single thread loop
void construct(pointer __p, const _Tp& __val) { assert(0);}; void construct(pointer __p, const _Tp& __val) { };
void construct(pointer __p) { }; void construct(pointer __p) { };
void destroy(pointer __p) { }; void destroy(pointer __p) { };
}; };

View File

@ -234,6 +234,9 @@ void *MemoryManager::ViewOpen(void* _CpuPtr,size_t bytes,ViewMode mode,ViewAdvis
} }
void MemoryManager::EvictVictims(uint64_t bytes) void MemoryManager::EvictVictims(uint64_t bytes)
{ {
if(bytes>=DeviceMaxBytes) {
printf("EvictVictims bytes %ld DeviceMaxBytes %ld\n",bytes,DeviceMaxBytes);
}
assert(bytes<DeviceMaxBytes); assert(bytes<DeviceMaxBytes);
while(bytes+DeviceLRUBytes > DeviceMaxBytes){ while(bytes+DeviceLRUBytes > DeviceMaxBytes){
if ( DeviceLRUBytes > 0){ if ( DeviceLRUBytes > 0){

View File

@ -149,6 +149,7 @@ public:
sizeof(obj),d*100+p); sizeof(obj),d*100+p);
} }
if (!list.empty()) // avoid triggering assert in comms == none
CommsComplete(list); CommsComplete(list);
for(int p=1;p<_processors[d];p++){ for(int p=1;p<_processors[d];p++){
accum = accum + column[p]; accum = accum + column[p];

View File

@ -438,8 +438,15 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques
list.push_back(rrq); list.push_back(rrq);
off_node_bytes+=rbytes; off_node_bytes+=rbytes;
} }
#ifdef NVLINK_GET
else {
void *shm = (void *) this->ShmBufferTranslate(from,xmit);
assert(shm!=NULL);
acceleratorCopyDeviceToDeviceAsynch(shm,recv,rbytes);
} }
#endif
}
// This is a NVLINK PUT
if (dox) { if (dox) {
if ( (gdest == MPI_UNDEFINED) || Stencil_force_mpi ) { if ( (gdest == MPI_UNDEFINED) || Stencil_force_mpi ) {
tag= dir+_processor*32; tag= dir+_processor*32;
@ -448,9 +455,11 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques
list.push_back(xrq); list.push_back(xrq);
off_node_bytes+=xbytes; off_node_bytes+=xbytes;
} else { } else {
#ifndef NVLINK_GET
void *shm = (void *) this->ShmBufferTranslate(dest,recv); void *shm = (void *) this->ShmBufferTranslate(dest,recv);
assert(shm!=NULL); assert(shm!=NULL);
acceleratorCopyDeviceToDeviceAsynch(xmit,shm,xbytes); acceleratorCopyDeviceToDeviceAsynch(xmit,shm,xbytes);
#endif
} }
} }
return off_node_bytes; return off_node_bytes;
@ -459,7 +468,7 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques
void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &list,int dir) void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &list,int dir)
{ {
int nreq=list.size(); int nreq=list.size();
/*finishes Get/Put*/
acceleratorCopySynchronise(); acceleratorCopySynchronise();
if (nreq==0) return; if (nreq==0) return;
@ -746,18 +755,24 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques
} }
void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &list,int dir) void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &list,int dir)
{ {
// int nreq=list.size(); acceleratorCopySynchronise(); // Complete all pending copy transfers D2D
// if (nreq==0) return; std::vector<MPI_Status> status;
// std::vector<MPI_Status> status(nreq); std::vector<MPI_Request> MpiRequests;
// std::vector<MPI_Request> MpiRequests(nreq);
// for(int r=0;r<nreq;r++){ for(int r=0;r<list.size();r++){
// MpiRequests[r] = list[r].req; // Must check each Send buf is clear to reuse
// } if ( list[r].PacketType == InterNodeXmitISend ) MpiRequests.push_back(list[r].req);
// if ( list[r].PacketType == InterNodeRecv ) MpiRequests.push_back(list[r].req); // Already "Test" passed
}
// int ierr = MPI_Waitall(nreq,&MpiRequests[0],&status[0]); // Sends are guaranteed in order. No harm in not completing. int nreq=MpiRequests.size();
// assert(ierr==0);
if (nreq>0) {
status.resize(MpiRequests.size());
int ierr = MPI_Waitall(MpiRequests.size(),&MpiRequests[0],&status[0]); // Sends are guaranteed in order. No harm in not completing.
assert(ierr==0);
}
// for(int r=0;r<nreq;r++){ // for(int r=0;r<nreq;r++){
// if ( list[r].PacketType==InterNodeRecv ) { // if ( list[r].PacketType==InterNodeRecv ) {
@ -765,7 +780,6 @@ void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsReque
// } // }
// } // }
acceleratorCopySynchronise(); // Complete all pending copy transfers D2D
list.resize(0); // Delete the list list.resize(0); // Delete the list
this->HostBufferFreeAll(); // Clean up the buffer allocs this->HostBufferFreeAll(); // Clean up the buffer allocs

View File

@ -91,7 +91,7 @@ void CartesianCommunicator::SendToRecvFrom(void *xmit,
{ {
assert(0); assert(0);
} }
void CartesianCommunicator::CommsComplete(std::vector<CommsRequest_t> &list){ assert(0);} void CartesianCommunicator::CommsComplete(std::vector<CommsRequest_t> &list){ assert(list.size()==0);}
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list, void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
void *xmit, void *xmit,
int dest, int dest,

View File

@ -137,7 +137,7 @@ public:
/////////////////////////////////////////////////// ///////////////////////////////////////////////////
static void SharedMemoryAllocate(uint64_t bytes, int flags); static void SharedMemoryAllocate(uint64_t bytes, int flags);
static void SharedMemoryFree(void); static void SharedMemoryFree(void);
static void SharedMemoryCopy(void *dest,void *src,size_t bytes); // static void SharedMemoryCopy(void *dest,void *src,size_t bytes);
static void SharedMemoryZero(void *dest,size_t bytes); static void SharedMemoryZero(void *dest,size_t bytes);
}; };

View File

@ -542,12 +542,12 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
// Each MPI rank should allocate our own buffer // Each MPI rank should allocate our own buffer
/////////////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////////////
#ifndef ACCELERATOR_AWARE_MPI #ifndef ACCELERATOR_AWARE_MPI
printf("Host buffer allocate for GPU non-aware MPI\n"); // printf("Host buffer allocate for GPU non-aware MPI\n");
#if 0 #if 0
HostCommBuf= acceleratorAllocHost(bytes); HostCommBuf= acceleratorAllocHost(bytes);
#else #else
HostCommBuf= malloc(bytes); /// CHANGE THIS TO malloc_host HostCommBuf= malloc(bytes); /// CHANGE THIS TO malloc_host
#ifdef HAVE_NUMAIF_H #if 0
#warning "Moving host buffers to specific NUMA domain" #warning "Moving host buffers to specific NUMA domain"
int numa; int numa;
char *numa_name=(char *)getenv("MPI_BUF_NUMA"); char *numa_name=(char *)getenv("MPI_BUF_NUMA");
@ -916,14 +916,14 @@ void GlobalSharedMemory::SharedMemoryZero(void *dest,size_t bytes)
bzero(dest,bytes); bzero(dest,bytes);
#endif #endif
} }
void GlobalSharedMemory::SharedMemoryCopy(void *dest,void *src,size_t bytes) //void GlobalSharedMemory::SharedMemoryCopy(void *dest,void *src,size_t bytes)
{ //{
#if defined(GRID_CUDA) || defined(GRID_HIP) || defined(GRID_SYCL) //#if defined(GRID_CUDA) || defined(GRID_HIP) || defined(GRID_SYCL)
acceleratorCopyToDevice(src,dest,bytes); // acceleratorCopyToDevice(src,dest,bytes);
#else //#else
bcopy(src,dest,bytes); // bcopy(src,dest,bytes);
#endif //#endif
} //}
//////////////////////////////////////////////////////// ////////////////////////////////////////////////////////
// Global shared functionality finished // Global shared functionality finished
// Now move to per communicator functionality // Now move to per communicator functionality
@ -959,6 +959,7 @@ void SharedMemory::SetCommunicator(Grid_MPI_Comm comm)
MPI_Allreduce(MPI_IN_PLACE,&wsr,1,MPI_UINT32_T,MPI_SUM,ShmComm); MPI_Allreduce(MPI_IN_PLACE,&wsr,1,MPI_UINT32_T,MPI_SUM,ShmComm);
ShmCommBufs[r] = GlobalSharedMemory::WorldShmCommBufs[wsr]; ShmCommBufs[r] = GlobalSharedMemory::WorldShmCommBufs[wsr];
// std::cerr << " SetCommunicator rank "<<r<<" comm "<<ShmCommBufs[r] <<std::endl;
} }
ShmBufferFreeAll(); ShmBufferFreeAll();
@ -989,7 +990,7 @@ void SharedMemory::SetCommunicator(Grid_MPI_Comm comm)
} }
#endif #endif
//SharedMemoryTest(); SharedMemoryTest();
} }
////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////
// On node barrier // On node barrier
@ -1011,19 +1012,18 @@ void SharedMemory::SharedMemoryTest(void)
check[0]=GlobalSharedMemory::WorldNode; check[0]=GlobalSharedMemory::WorldNode;
check[1]=r; check[1]=r;
check[2]=magic; check[2]=magic;
GlobalSharedMemory::SharedMemoryCopy( ShmCommBufs[r], check, 3*sizeof(uint64_t)); acceleratorCopyToDevice(check,ShmCommBufs[r],3*sizeof(uint64_t));
} }
} }
ShmBarrier(); ShmBarrier();
for(uint64_t r=0;r<ShmSize;r++){ for(uint64_t r=0;r<ShmSize;r++){
ShmBarrier(); acceleratorCopyFromDevice(ShmCommBufs[r],check,3*sizeof(uint64_t));
GlobalSharedMemory::SharedMemoryCopy(check,ShmCommBufs[r], 3*sizeof(uint64_t));
ShmBarrier();
assert(check[0]==GlobalSharedMemory::WorldNode); assert(check[0]==GlobalSharedMemory::WorldNode);
assert(check[1]==r); assert(check[1]==r);
assert(check[2]==magic); assert(check[2]==magic);
ShmBarrier();
} }
ShmBarrier();
std::cout << GridLogDebug << " SharedMemoryTest has passed "<<std::endl;
} }
void *SharedMemory::ShmBuffer(int rank) void *SharedMemory::ShmBuffer(int rank)

View File

@ -244,7 +244,6 @@ template<class vobj> void Cshift_comms_simd(Lattice<vobj> &ret,const Lattice<vo
scalar_object * recv_buf_extract_mpi; scalar_object * recv_buf_extract_mpi;
scalar_object * send_buf_extract_mpi; scalar_object * send_buf_extract_mpi;
for(int s=0;s<Nsimd;s++){ for(int s=0;s<Nsimd;s++){
send_buf_extract[s].resize(buffer_size); send_buf_extract[s].resize(buffer_size);
recv_buf_extract[s].resize(buffer_size); recv_buf_extract[s].resize(buffer_size);

View File

@ -55,7 +55,7 @@ inline void sliceSumReduction_cub_small(const vobj *Data,
d_offsets = static_cast<int*>(acceleratorAllocDevice((rd+1)*sizeof(int))); d_offsets = static_cast<int*>(acceleratorAllocDevice((rd+1)*sizeof(int)));
//copy offsets to device //copy offsets to device
acceleratorCopyToDeviceAsync(&offsets[0],d_offsets,sizeof(int)*(rd+1),computeStream); acceleratorCopyToDeviceAsynch(&offsets[0],d_offsets,sizeof(int)*(rd+1),computeStream);
gpuError_t gpuErr = gpucub::DeviceSegmentedReduce::Reduce(temp_storage_array, temp_storage_bytes, rb_p,d_out, rd, d_offsets, d_offsets+1, ::gpucub::Sum(), zero_init, computeStream); gpuError_t gpuErr = gpucub::DeviceSegmentedReduce::Reduce(temp_storage_array, temp_storage_bytes, rb_p,d_out, rd, d_offsets, d_offsets+1, ::gpucub::Sum(), zero_init, computeStream);
@ -88,7 +88,7 @@ inline void sliceSumReduction_cub_small(const vobj *Data,
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
} }
acceleratorCopyFromDeviceAsync(d_out,&lvSum[0],rd*sizeof(vobj),computeStream); acceleratorCopyFromDeviceAsynch(d_out,&lvSum[0],rd*sizeof(vobj),computeStream);
//sync after copy //sync after copy
accelerator_barrier(); accelerator_barrier();

View File

@ -510,7 +510,6 @@ public:
grid->SendToRecvFromBegin(fwd_req, grid->SendToRecvFromBegin(fwd_req,
(void *)&hsend_buf[d*buffer_size], xmit_to_rank, (void *)&hsend_buf[d*buffer_size], xmit_to_rank,
(void *)&hrecv_buf[d*buffer_size], recv_from_rank, bytes, tag); (void *)&hrecv_buf[d*buffer_size], recv_from_rank, bytes, tag);
acceleratorCopyToDevice(&hrecv_buf[d*buffer_size],&recv_buf[d*buffer_size],bytes);
#endif #endif
t_comms+=usecond()-t; t_comms+=usecond()-t;
} }
@ -531,7 +530,6 @@ public:
grid->SendToRecvFromBegin(bwd_req, grid->SendToRecvFromBegin(bwd_req,
(void *)&hsend_buf[(d+depth)*buffer_size], recv_from_rank, (void *)&hsend_buf[(d+depth)*buffer_size], recv_from_rank,
(void *)&hrecv_buf[(d+depth)*buffer_size], xmit_to_rank, bytes,tag); (void *)&hrecv_buf[(d+depth)*buffer_size], xmit_to_rank, bytes,tag);
acceleratorCopyToDevice(&hrecv_buf[(d+depth)*buffer_size],&recv_buf[(d+depth)*buffer_size],bytes);
#endif #endif
t_comms+=usecond()-t; t_comms+=usecond()-t;
} }
@ -555,6 +553,11 @@ public:
t=usecond(); t=usecond();
grid->CommsComplete(fwd_req); grid->CommsComplete(fwd_req);
#ifndef ACCELERATOR_AWARE_MPI
for ( int d=0;d < depth ; d ++ ) {
acceleratorCopyToDevice(&hrecv_buf[d*buffer_size],&recv_buf[d*buffer_size],bytes);
}
#endif
t_comms+= usecond() - t; t_comms+= usecond() - t;
t=usecond(); t=usecond();
@ -565,6 +568,11 @@ public:
t=usecond(); t=usecond();
grid->CommsComplete(bwd_req); grid->CommsComplete(bwd_req);
#ifndef ACCELERATOR_AWARE_MPI
for ( int d=0;d < depth ; d ++ ) {
acceleratorCopyToDevice(&hrecv_buf[(d+depth)*buffer_size],&recv_buf[(d+depth)*buffer_size],bytes);
}
#endif
t_comms+= usecond() - t; t_comms+= usecond() - t;
t=usecond(); t=usecond();

View File

@ -0,0 +1,196 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/CompactWilsonCloverFermion5D.h
Copyright (C) 2020 - 2025
Author: Daniel Richtmann <daniel.richtmann@gmail.com>
Author: Nils Meyer <nils.meyer@ur.de>
Author: Christoph Lehner <christoph@lhnr.de>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#pragma once
#include <Grid/qcd/action/fermion/WilsonFermion5D.h>
#include <Grid/qcd/action/fermion/WilsonCloverTypes.h>
#include <Grid/qcd/action/fermion/WilsonCloverHelpers.h>
#include <Grid/qcd/action/fermion/CloverHelpers.h>
NAMESPACE_BEGIN(Grid);
// see Grid/qcd/action/fermion/CompactWilsonCloverFermion.h for description
template<class Impl, class CloverHelpers>
class CompactWilsonCloverFermion5D : public WilsonFermion5D<Impl>,
public WilsonCloverHelpers<Impl>,
public CompactWilsonCloverHelpers<Impl> {
/////////////////////////////////////////////
// Sizes
/////////////////////////////////////////////
public:
INHERIT_COMPACT_CLOVER_SIZES(Impl);
/////////////////////////////////////////////
// Type definitions
/////////////////////////////////////////////
public:
INHERIT_IMPL_TYPES(Impl);
INHERIT_CLOVER_TYPES(Impl);
INHERIT_COMPACT_CLOVER_TYPES(Impl);
typedef WilsonFermion5D<Impl> WilsonBase;
typedef WilsonCloverHelpers<Impl> Helpers;
typedef CompactWilsonCloverHelpers<Impl> CompactHelpers;
/////////////////////////////////////////////
// Constructors
/////////////////////////////////////////////
public:
CompactWilsonCloverFermion5D(GaugeField& _Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
const RealD _mass,
const RealD _csw_r = 0.0,
const RealD _csw_t = 0.0,
const RealD _cF = 1.0,
const ImplParams& impl_p = ImplParams());
/////////////////////////////////////////////
// Member functions (implementing interface)
/////////////////////////////////////////////
public:
virtual void Instantiatable() {};
int ConstEE() override { return 0; };
int isTrivialEE() override { return 0; };
void Dhop(const FermionField& in, FermionField& out, int dag) override;
void DhopOE(const FermionField& in, FermionField& out, int dag) override;
void DhopEO(const FermionField& in, FermionField& out, int dag) override;
void DhopDir(const FermionField& in, FermionField& out, int dir, int disp) override;
void DhopDirAll(const FermionField& in, std::vector<FermionField>& out) /* override */;
void M(const FermionField& in, FermionField& out) override;
void Mdag(const FermionField& in, FermionField& out) override;
void Meooe(const FermionField& in, FermionField& out) override;
void MeooeDag(const FermionField& in, FermionField& out) override;
void Mooee(const FermionField& in, FermionField& out) override;
void MooeeDag(const FermionField& in, FermionField& out) override;
void MooeeInv(const FermionField& in, FermionField& out) override;
void MooeeInvDag(const FermionField& in, FermionField& out) override;
void Mdir(const FermionField& in, FermionField& out, int dir, int disp) override;
void MdirAll(const FermionField& in, std::vector<FermionField>& out) override;
void MDeriv(GaugeField& force, const FermionField& X, const FermionField& Y, int dag) override;
void MooDeriv(GaugeField& mat, const FermionField& U, const FermionField& V, int dag) override;
void MeeDeriv(GaugeField& mat, const FermionField& U, const FermionField& V, int dag) override;
/////////////////////////////////////////////
// Member functions (internals)
/////////////////////////////////////////////
void MooeeInternal(const FermionField& in,
FermionField& out,
const CloverDiagonalField& diagonal,
const CloverTriangleField& triangle);
/////////////////////////////////////////////
// Helpers
/////////////////////////////////////////////
void ImportGauge(const GaugeField& _Umu) override;
/////////////////////////////////////////////
// Helpers
/////////////////////////////////////////////
private:
template<class Field>
const MaskField* getCorrectMaskField(const Field &in) const {
if(in.Grid()->_isCheckerBoarded) {
if(in.Checkerboard() == Odd) {
return &this->BoundaryMaskOdd;
} else {
return &this->BoundaryMaskEven;
}
} else {
return &this->BoundaryMask;
}
}
template<class Field>
void ApplyBoundaryMask(Field& f) {
const MaskField* m = getCorrectMaskField(f); assert(m != nullptr);
assert(m != nullptr);
CompactHelpers::ApplyBoundaryMask(f, *m);
}
/////////////////////////////////////////////
// Member Data
/////////////////////////////////////////////
public:
RealD csw_r;
RealD csw_t;
RealD cF;
int n_rhs;
bool fixedBoundaries;
CloverDiagonalField Diagonal, DiagonalEven, DiagonalOdd;
CloverDiagonalField DiagonalInv, DiagonalInvEven, DiagonalInvOdd;
CloverTriangleField Triangle, TriangleEven, TriangleOdd;
CloverTriangleField TriangleInv, TriangleInvEven, TriangleInvOdd;
FermionField Tmp;
MaskField BoundaryMask, BoundaryMaskEven, BoundaryMaskOdd;
};
NAMESPACE_END(Grid);

View File

@ -55,6 +55,7 @@ NAMESPACE_CHECK(Wilson);
NAMESPACE_CHECK(WilsonTM); NAMESPACE_CHECK(WilsonTM);
#include <Grid/qcd/action/fermion/WilsonCloverFermion.h> // 4d wilson clover fermions #include <Grid/qcd/action/fermion/WilsonCloverFermion.h> // 4d wilson clover fermions
#include <Grid/qcd/action/fermion/CompactWilsonCloverFermion.h> // 4d compact wilson clover fermions #include <Grid/qcd/action/fermion/CompactWilsonCloverFermion.h> // 4d compact wilson clover fermions
#include <Grid/qcd/action/fermion/CompactWilsonCloverFermion5D.h> // 5d compact wilson clover fermions
NAMESPACE_CHECK(WilsonClover); NAMESPACE_CHECK(WilsonClover);
#include <Grid/qcd/action/fermion/WilsonFermion5D.h> // 5d base used by all 5d overlap types #include <Grid/qcd/action/fermion/WilsonFermion5D.h> // 5d base used by all 5d overlap types
NAMESPACE_CHECK(Wilson5D); NAMESPACE_CHECK(Wilson5D);
@ -164,12 +165,17 @@ typedef WilsonClover<WilsonTwoIndexAntiSymmetricImplD> WilsonCloverTwoIndexAntiS
// Compact Clover fermions // Compact Clover fermions
template <typename WImpl> using CompactWilsonClover = CompactWilsonCloverFermion<WImpl, CompactCloverHelpers<WImpl>>; template <typename WImpl> using CompactWilsonClover = CompactWilsonCloverFermion<WImpl, CompactCloverHelpers<WImpl>>;
template <typename WImpl> using CompactWilsonClover5D = CompactWilsonCloverFermion5D<WImpl, CompactCloverHelpers<WImpl>>;
template <typename WImpl> using CompactWilsonExpClover = CompactWilsonCloverFermion<WImpl, CompactExpCloverHelpers<WImpl>>; template <typename WImpl> using CompactWilsonExpClover = CompactWilsonCloverFermion<WImpl, CompactExpCloverHelpers<WImpl>>;
typedef CompactWilsonClover<WilsonImplD2> CompactWilsonCloverFermionD2; typedef CompactWilsonClover<WilsonImplD2> CompactWilsonCloverFermionD2;
typedef CompactWilsonClover<WilsonImplF> CompactWilsonCloverFermionF; typedef CompactWilsonClover<WilsonImplF> CompactWilsonCloverFermionF;
typedef CompactWilsonClover<WilsonImplD> CompactWilsonCloverFermionD; typedef CompactWilsonClover<WilsonImplD> CompactWilsonCloverFermionD;
typedef CompactWilsonClover5D<WilsonImplD2> CompactWilsonCloverFermion5DD2;
typedef CompactWilsonClover5D<WilsonImplF> CompactWilsonCloverFermion5DF;
typedef CompactWilsonClover5D<WilsonImplD> CompactWilsonCloverFermion5DD;
typedef CompactWilsonExpClover<WilsonImplD2> CompactWilsonExpCloverFermionD2; typedef CompactWilsonExpClover<WilsonImplD2> CompactWilsonExpCloverFermionD2;
typedef CompactWilsonExpClover<WilsonImplF> CompactWilsonExpCloverFermionF; typedef CompactWilsonExpClover<WilsonImplF> CompactWilsonExpCloverFermionF;
typedef CompactWilsonExpClover<WilsonImplD> CompactWilsonExpCloverFermionD; typedef CompactWilsonExpClover<WilsonImplD> CompactWilsonExpCloverFermionD;

View File

@ -485,7 +485,6 @@ public:
assert(this->u_comm_offset==this->_unified_buffer_size); assert(this->u_comm_offset==this->_unified_buffer_size);
accelerator_barrier(); accelerator_barrier();
#ifdef NVLINK_GET #ifdef NVLINK_GET
#warning "NVLINK_GET"
this->_grid->StencilBarrier(); // He can now get mu local gather, I can get his this->_grid->StencilBarrier(); // He can now get mu local gather, I can get his
// Synch shared memory on a single nodes; could use an asynchronous barrier here and defer check // Synch shared memory on a single nodes; could use an asynchronous barrier here and defer check
// Or issue barrier AFTER the DMA is running // Or issue barrier AFTER the DMA is running

View File

@ -91,13 +91,13 @@ public:
virtual void Mdag (const FermionField &in, FermionField &out){assert(0);}; virtual void Mdag (const FermionField &in, FermionField &out){assert(0);};
// half checkerboard operations; leave unimplemented as abstract for now // half checkerboard operations; leave unimplemented as abstract for now
virtual void Meooe (const FermionField &in, FermionField &out){assert(0);}; virtual void Meooe (const FermionField &in, FermionField &out);
virtual void Mooee (const FermionField &in, FermionField &out){assert(0);}; virtual void Mooee (const FermionField &in, FermionField &out);
virtual void MooeeInv (const FermionField &in, FermionField &out){assert(0);}; virtual void MooeeInv (const FermionField &in, FermionField &out);
virtual void MeooeDag (const FermionField &in, FermionField &out){assert(0);}; virtual void MeooeDag (const FermionField &in, FermionField &out);
virtual void MooeeDag (const FermionField &in, FermionField &out){assert(0);}; virtual void MooeeDag (const FermionField &in, FermionField &out);
virtual void MooeeInvDag (const FermionField &in, FermionField &out){assert(0);}; virtual void MooeeInvDag (const FermionField &in, FermionField &out);
virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp){assert(0);}; // case by case Wilson, Clover, Cayley, ContFrac, PartFrac virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp){assert(0);}; // case by case Wilson, Clover, Cayley, ContFrac, PartFrac
virtual void MdirAll(const FermionField &in, std::vector<FermionField> &out){assert(0);}; // case by case Wilson, Clover, Cayley, ContFrac, PartFrac virtual void MdirAll(const FermionField &in, std::vector<FermionField> &out){assert(0);}; // case by case Wilson, Clover, Cayley, ContFrac, PartFrac

View File

@ -0,0 +1,376 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/qcd/action/fermion/CompactWilsonCloverFermion5DImplementation.h
Copyright (C) 2017 - 2025
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Guido Cossu <guido.cossu@ed.ac.uk>
Author: Daniel Richtmann <daniel.richtmann@gmail.com>
Author: Christoph Lehner <christoph@lhnr.de>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
#include <Grid/qcd/spin/Dirac.h>
#include <Grid/qcd/action/fermion/CompactWilsonCloverFermion5D.h>
NAMESPACE_BEGIN(Grid);
template<class Impl, class CloverHelpers>
CompactWilsonCloverFermion5D<Impl, CloverHelpers>::CompactWilsonCloverFermion5D(GaugeField& _Umu,
GridCartesian &FiveDimGrid,
GridRedBlackCartesian &FiveDimRedBlackGrid,
GridCartesian &FourDimGrid,
GridRedBlackCartesian &FourDimRedBlackGrid,
const RealD _mass,
const RealD _csw_r,
const RealD _csw_t,
const RealD _cF,
const ImplParams& impl_p)
: WilsonBase(_Umu, FiveDimGrid, FiveDimRedBlackGrid, FourDimGrid, FourDimRedBlackGrid, _mass, impl_p)
, csw_r(_csw_r)
, csw_t(_csw_t)
, cF(_cF)
, fixedBoundaries(impl_p.boundary_phases[Nd-1] == 0.0)
, Diagonal(&FourDimGrid), Triangle(&FourDimGrid)
, DiagonalEven(&FourDimRedBlackGrid), TriangleEven(&FourDimRedBlackGrid)
, DiagonalOdd(&FourDimRedBlackGrid), TriangleOdd(&FourDimRedBlackGrid)
, DiagonalInv(&FourDimGrid), TriangleInv(&FourDimGrid)
, DiagonalInvEven(&FourDimRedBlackGrid), TriangleInvEven(&FourDimRedBlackGrid)
, DiagonalInvOdd(&FourDimRedBlackGrid), TriangleInvOdd(&FourDimRedBlackGrid)
, Tmp(&FiveDimGrid)
, BoundaryMask(&FiveDimGrid)
, BoundaryMaskEven(&FiveDimRedBlackGrid), BoundaryMaskOdd(&FiveDimRedBlackGrid)
{
assert(Nd == 4 && Nc == 3 && Ns == 4 && Impl::Dimension == 3);
csw_r *= 0.5;
csw_t *= 0.5;
//if (clover_anisotropy.isAnisotropic)
// csw_r /= clover_anisotropy.xi_0;
ImportGauge(_Umu);
if (fixedBoundaries) {
this->BoundaryMaskEven.Checkerboard() = Even;
this->BoundaryMaskOdd.Checkerboard() = Odd;
CompactHelpers::SetupMasks(this->BoundaryMask, this->BoundaryMaskEven, this->BoundaryMaskOdd);
}
}
template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion5D<Impl, CloverHelpers>::Dhop(const FermionField& in, FermionField& out, int dag) {
WilsonBase::Dhop(in, out, dag);
if(fixedBoundaries) ApplyBoundaryMask(out);
}
template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion5D<Impl, CloverHelpers>::DhopOE(const FermionField& in, FermionField& out, int dag) {
WilsonBase::DhopOE(in, out, dag);
if(fixedBoundaries) ApplyBoundaryMask(out);
}
template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion5D<Impl, CloverHelpers>::DhopEO(const FermionField& in, FermionField& out, int dag) {
WilsonBase::DhopEO(in, out, dag);
if(fixedBoundaries) ApplyBoundaryMask(out);
}
template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion5D<Impl, CloverHelpers>::DhopDir(const FermionField& in, FermionField& out, int dir, int disp) {
WilsonBase::DhopDir(in, out, dir, disp);
if(this->fixedBoundaries) ApplyBoundaryMask(out);
}
template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion5D<Impl, CloverHelpers>::DhopDirAll(const FermionField& in, std::vector<FermionField>& out) {
WilsonBase::DhopDirAll(in, out);
if(this->fixedBoundaries) {
for(auto& o : out) ApplyBoundaryMask(o);
}
}
template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion5D<Impl, CloverHelpers>::M(const FermionField& in, FermionField& out) {
out.Checkerboard() = in.Checkerboard();
WilsonBase::Dhop(in, out, DaggerNo); // call base to save applying bc
Mooee(in, Tmp);
axpy(out, 1.0, out, Tmp);
if(fixedBoundaries) ApplyBoundaryMask(out);
}
template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion5D<Impl, CloverHelpers>::Mdag(const FermionField& in, FermionField& out) {
out.Checkerboard() = in.Checkerboard();
WilsonBase::Dhop(in, out, DaggerYes); // call base to save applying bc
MooeeDag(in, Tmp);
axpy(out, 1.0, out, Tmp);
if(fixedBoundaries) ApplyBoundaryMask(out);
}
template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion5D<Impl, CloverHelpers>::Meooe(const FermionField& in, FermionField& out) {
WilsonBase::Meooe(in, out);
if(fixedBoundaries) ApplyBoundaryMask(out);
}
template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion5D<Impl, CloverHelpers>::MeooeDag(const FermionField& in, FermionField& out) {
WilsonBase::MeooeDag(in, out);
if(fixedBoundaries) ApplyBoundaryMask(out);
}
template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion5D<Impl, CloverHelpers>::Mooee(const FermionField& in, FermionField& out) {
if(in.Grid()->_isCheckerBoarded) {
if(in.Checkerboard() == Odd) {
MooeeInternal(in, out, DiagonalOdd, TriangleOdd);
} else {
MooeeInternal(in, out, DiagonalEven, TriangleEven);
}
} else {
MooeeInternal(in, out, Diagonal, Triangle);
}
if(fixedBoundaries) ApplyBoundaryMask(out);
}
template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion5D<Impl, CloverHelpers>::MooeeDag(const FermionField& in, FermionField& out) {
Mooee(in, out); // blocks are hermitian
}
template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion5D<Impl, CloverHelpers>::MooeeInv(const FermionField& in, FermionField& out) {
if(in.Grid()->_isCheckerBoarded) {
if(in.Checkerboard() == Odd) {
MooeeInternal(in, out, DiagonalInvOdd, TriangleInvOdd);
} else {
MooeeInternal(in, out, DiagonalInvEven, TriangleInvEven);
}
} else {
MooeeInternal(in, out, DiagonalInv, TriangleInv);
}
if(fixedBoundaries) ApplyBoundaryMask(out);
}
template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion5D<Impl, CloverHelpers>::MooeeInvDag(const FermionField& in, FermionField& out) {
MooeeInv(in, out); // blocks are hermitian
}
template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion5D<Impl, CloverHelpers>::Mdir(const FermionField& in, FermionField& out, int dir, int disp) {
DhopDir(in, out, dir, disp);
}
template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion5D<Impl, CloverHelpers>::MdirAll(const FermionField& in, std::vector<FermionField>& out) {
DhopDirAll(in, out);
}
template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion5D<Impl, CloverHelpers>::MDeriv(GaugeField& force, const FermionField& X, const FermionField& Y, int dag) {
assert(!fixedBoundaries); // TODO check for changes required for open bc
// NOTE: code copied from original clover term
conformable(X.Grid(), Y.Grid());
conformable(X.Grid(), force.Grid());
GaugeLinkField force_mu(force.Grid()), lambda(force.Grid());
GaugeField clover_force(force.Grid());
PropagatorField Lambda(force.Grid());
// Guido: Here we are hitting some performance issues:
// need to extract the components of the DoubledGaugeField
// for each call
// Possible solution
// Create a vector object to store them? (cons: wasting space)
std::vector<GaugeLinkField> U(Nd, this->Umu.Grid());
Impl::extractLinkField(U, this->Umu);
force = Zero();
// Derivative of the Wilson hopping term
this->DhopDeriv(force, X, Y, dag);
///////////////////////////////////////////////////////////
// Clover term derivative
///////////////////////////////////////////////////////////
Impl::outerProductImpl(Lambda, X, Y);
//std::cout << "Lambda:" << Lambda << std::endl;
Gamma::Algebra sigma[] = {
Gamma::Algebra::SigmaXY,
Gamma::Algebra::SigmaXZ,
Gamma::Algebra::SigmaXT,
Gamma::Algebra::MinusSigmaXY,
Gamma::Algebra::SigmaYZ,
Gamma::Algebra::SigmaYT,
Gamma::Algebra::MinusSigmaXZ,
Gamma::Algebra::MinusSigmaYZ,
Gamma::Algebra::SigmaZT,
Gamma::Algebra::MinusSigmaXT,
Gamma::Algebra::MinusSigmaYT,
Gamma::Algebra::MinusSigmaZT};
/*
sigma_{\mu \nu}=
| 0 sigma[0] sigma[1] sigma[2] |
| sigma[3] 0 sigma[4] sigma[5] |
| sigma[6] sigma[7] 0 sigma[8] |
| sigma[9] sigma[10] sigma[11] 0 |
*/
int count = 0;
clover_force = Zero();
for (int mu = 0; mu < 4; mu++)
{
force_mu = Zero();
for (int nu = 0; nu < 4; nu++)
{
if (mu == nu)
continue;
RealD factor;
if (nu == 4 || mu == 4)
{
factor = 2.0 * csw_t;
}
else
{
factor = 2.0 * csw_r;
}
PropagatorField Slambda = Gamma(sigma[count]) * Lambda; // sigma checked
Impl::TraceSpinImpl(lambda, Slambda); // traceSpin ok
force_mu -= factor*CloverHelpers::Cmunu(U, lambda, mu, nu); // checked
count++;
}
pokeLorentz(clover_force, U[mu] * force_mu, mu);
}
//clover_force *= csw;
force += clover_force;
}
template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion5D<Impl, CloverHelpers>::MooDeriv(GaugeField& mat, const FermionField& U, const FermionField& V, int dag) {
assert(0);
}
template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion5D<Impl, CloverHelpers>::MeeDeriv(GaugeField& mat, const FermionField& U, const FermionField& V, int dag) {
assert(0);
}
template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion5D<Impl, CloverHelpers>::MooeeInternal(const FermionField& in,
FermionField& out,
const CloverDiagonalField& diagonal,
const CloverTriangleField& triangle) {
assert(in.Checkerboard() == Odd || in.Checkerboard() == Even);
out.Checkerboard() = in.Checkerboard();
conformable(in, out);
CompactHelpers::MooeeKernel(diagonal.oSites(), this->Ls, in, out, diagonal, triangle);
}
template<class Impl, class CloverHelpers>
void CompactWilsonCloverFermion5D<Impl, CloverHelpers>::ImportGauge(const GaugeField& _Umu) {
// NOTE: parts copied from original implementation
// Import gauge into base class
double t0 = usecond();
WilsonBase::ImportGauge(_Umu); // NOTE: called here and in wilson constructor -> performed twice, but can't avoid that
// Initialize temporary variables
double t1 = usecond();
conformable(_Umu.Grid(), this->GaugeGrid());
GridBase* grid = _Umu.Grid();
typename Impl::GaugeLinkField Bx(grid), By(grid), Bz(grid), Ex(grid), Ey(grid), Ez(grid);
CloverField TmpOriginal(grid);
CloverField TmpInverse(grid);
// Compute the field strength terms mu>nu
double t2 = usecond();
WilsonLoops<Impl>::FieldStrength(Bx, _Umu, Zdir, Ydir);
WilsonLoops<Impl>::FieldStrength(By, _Umu, Zdir, Xdir);
WilsonLoops<Impl>::FieldStrength(Bz, _Umu, Ydir, Xdir);
WilsonLoops<Impl>::FieldStrength(Ex, _Umu, Tdir, Xdir);
WilsonLoops<Impl>::FieldStrength(Ey, _Umu, Tdir, Ydir);
WilsonLoops<Impl>::FieldStrength(Ez, _Umu, Tdir, Zdir);
// Compute the Clover Operator acting on Colour and Spin
// multiply here by the clover coefficients for the anisotropy
double t3 = usecond();
TmpOriginal = Helpers::fillCloverYZ(Bx) * csw_r;
TmpOriginal += Helpers::fillCloverXZ(By) * csw_r;
TmpOriginal += Helpers::fillCloverXY(Bz) * csw_r;
TmpOriginal += Helpers::fillCloverXT(Ex) * csw_t;
TmpOriginal += Helpers::fillCloverYT(Ey) * csw_t;
TmpOriginal += Helpers::fillCloverZT(Ez) * csw_t;
// Instantiate the clover term
// - In case of the standard clover the mass term is added
// - In case of the exponential clover the clover term is exponentiated
double t4 = usecond();
CloverHelpers::InstantiateClover(TmpOriginal, TmpInverse, csw_t, 4.0 + this->M5 /*this->diag_mass*/);
// Convert the data layout of the clover term
double t5 = usecond();
CompactHelpers::ConvertLayout(TmpOriginal, Diagonal, Triangle);
// Modify the clover term at the temporal boundaries in case of open boundary conditions
double t6 = usecond();
if(fixedBoundaries) CompactHelpers::ModifyBoundaries(Diagonal, Triangle, csw_t, cF, 4.0 + this->M5 /*this->diag_mass*/);
// Invert the Clover term
// In case of the exponential clover with (anti-)periodic boundary conditions exp(-Clover) saved
// in TmpInverse can be used. In all other cases the clover term has to be explictly inverted.
// TODO: For now this inversion is explictly done on the CPU
double t7 = usecond();
CloverHelpers::InvertClover(TmpInverse, Diagonal, Triangle, DiagonalInv, TriangleInv, fixedBoundaries);
// Fill the remaining clover fields
double t8 = usecond();
pickCheckerboard(Even, DiagonalEven, Diagonal);
pickCheckerboard(Even, TriangleEven, Triangle);
pickCheckerboard(Odd, DiagonalOdd, Diagonal);
pickCheckerboard(Odd, TriangleOdd, Triangle);
pickCheckerboard(Even, DiagonalInvEven, DiagonalInv);
pickCheckerboard(Even, TriangleInvEven, TriangleInv);
pickCheckerboard(Odd, DiagonalInvOdd, DiagonalInv);
pickCheckerboard(Odd, TriangleInvOdd, TriangleInv);
// Report timings
double t9 = usecond();
std::cout << GridLogDebug << "CompactWilsonCloverFermion5D::ImportGauge timings:" << std::endl;
std::cout << GridLogDebug << "WilsonFermion::Importgauge = " << (t1 - t0) / 1e6 << std::endl;
std::cout << GridLogDebug << "allocations = " << (t2 - t1) / 1e6 << std::endl;
std::cout << GridLogDebug << "field strength = " << (t3 - t2) / 1e6 << std::endl;
std::cout << GridLogDebug << "fill clover = " << (t4 - t3) / 1e6 << std::endl;
std::cout << GridLogDebug << "instantiate clover = " << (t5 - t4) / 1e6 << std::endl;
std::cout << GridLogDebug << "convert layout = " << (t6 - t5) / 1e6 << std::endl;
std::cout << GridLogDebug << "modify boundaries = " << (t7 - t6) / 1e6 << std::endl;
std::cout << GridLogDebug << "invert clover = " << (t8 - t7) / 1e6 << std::endl;
std::cout << GridLogDebug << "pick cbs = " << (t9 - t8) / 1e6 << std::endl;
std::cout << GridLogDebug << "total = " << (t9 - t0) / 1e6 << std::endl;
}
NAMESPACE_END(Grid);

View File

@ -14,6 +14,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Guido Cossu <guido.cossu@ed.ac.uk> Author: Guido Cossu <guido.cossu@ed.ac.uk>
Author: Andrew Lawson <andrew.lawson1991@gmail.com> Author: Andrew Lawson <andrew.lawson1991@gmail.com>
Author: Vera Guelpers <V.M.Guelpers@soton.ac.uk> Author: Vera Guelpers <V.M.Guelpers@soton.ac.uk>
Author: Christoph Lehner <christoph@lhnr.de>
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@ -484,6 +485,54 @@ void WilsonFermion5D<Impl>::DW(const FermionField &in, FermionField &out,int dag
Dhop(in,out,dag); // -0.5 is included Dhop(in,out,dag); // -0.5 is included
axpy(out,4.0-M5,in,out); axpy(out,4.0-M5,in,out);
} }
template <class Impl>
void WilsonFermion5D<Impl>::Meooe(const FermionField &in, FermionField &out)
{
if (in.Checkerboard() == Odd) {
DhopEO(in, out, DaggerNo);
} else {
DhopOE(in, out, DaggerNo);
}
}
template <class Impl>
void WilsonFermion5D<Impl>::MeooeDag(const FermionField &in, FermionField &out)
{
if (in.Checkerboard() == Odd) {
DhopEO(in, out, DaggerYes);
} else {
DhopOE(in, out, DaggerYes);
}
}
template <class Impl>
void WilsonFermion5D<Impl>::Mooee(const FermionField &in, FermionField &out)
{
out.Checkerboard() = in.Checkerboard();
typename FermionField::scalar_type scal(4.0 + M5);
out = scal * in;
}
template <class Impl>
void WilsonFermion5D<Impl>::MooeeDag(const FermionField &in, FermionField &out)
{
out.Checkerboard() = in.Checkerboard();
Mooee(in, out);
}
template<class Impl>
void WilsonFermion5D<Impl>::MooeeInv(const FermionField &in, FermionField &out)
{
out.Checkerboard() = in.Checkerboard();
out = (1.0/(4.0 + M5))*in;
}
template<class Impl>
void WilsonFermion5D<Impl>::MooeeInvDag(const FermionField &in, FermionField &out)
{
out.Checkerboard() = in.Checkerboard();
MooeeInv(in,out);
}
template<class Impl> template<class Impl>
void WilsonFermion5D<Impl>::MomentumSpacePropagatorHt_5d(FermionField &out,const FermionField &in, RealD mass,std::vector<double> twist) void WilsonFermion5D<Impl>::MomentumSpacePropagatorHt_5d(FermionField &out,const FermionField &in, RealD mass,std::vector<double> twist)

View File

@ -504,7 +504,7 @@ void WilsonKernels<Impl>::DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField
autoView(st_v , st,AcceleratorRead); autoView(st_v , st,AcceleratorRead);
if( interior && exterior ) { if( interior && exterior ) {
// acceleratorFenceComputeStream(); acceleratorFenceComputeStream();
if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSite); return;} if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSite); return;}
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSite); return;} if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSite); return;}
#ifndef GRID_CUDA #ifndef GRID_CUDA

View File

@ -0,0 +1,45 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./lib/ qcd/action/fermion/instantiation/CompactWilsonCloverFermionInstantiation5D.cc.master
Copyright (C) 2017 - 2025
Author: paboyle <paboyle@ph.ed.ac.uk>
Author: Guido Cossu <guido.cossu@ed.ac.uk>
Author: Daniel Richtmann <daniel.richtmann@gmail.com>
Author: Mattia Bruno <mattia.bruno@cern.ch>
Author: Christoph Lehner <christoph@lhnr.de>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
#include <Grid/qcd/spin/Dirac.h>
#include <Grid/qcd/action/fermion/CompactWilsonCloverFermion5D.h>
#include <Grid/qcd/action/fermion/implementation/CompactWilsonCloverFermion5DImplementation.h>
#include <Grid/qcd/action/fermion/CloverHelpers.h>
NAMESPACE_BEGIN(Grid);
#include "impl.h"
template class CompactWilsonCloverFermion5D<IMPLEMENTATION, CompactCloverHelpers<IMPLEMENTATION>>;
template class CompactWilsonCloverFermion5D<IMPLEMENTATION, CompactExpCloverHelpers<IMPLEMENTATION>>;
NAMESPACE_END(Grid);

View File

@ -0,0 +1 @@
../CompactWilsonCloverFermion5DInstantiation.cc.master

View File

@ -0,0 +1 @@
../CompactWilsonCloverFermion5DInstantiation.cc.master

View File

@ -62,7 +62,7 @@ do
done done
done done
CC_LIST="CompactWilsonCloverFermionInstantiation" CC_LIST="CompactWilsonCloverFermionInstantiation CompactWilsonCloverFermion5DInstantiation"
for impl in $COMPACT_WILSON_IMPL_LIST for impl in $COMPACT_WILSON_IMPL_LIST
do do

View File

@ -76,26 +76,26 @@ public:
return action; return action;
}; };
virtual void deriv(const GaugeField &Umu,GaugeField & dSdU) { virtual void deriv(const GaugeField &U, GaugeField &dSdU) {
//extend Ta to include Lorentz indexes //extend Ta to include Lorentz indexes
RealD factor_p = c_plaq/RealD(Nc)*0.5; RealD factor_p = c_plaq/RealD(Nc)*0.5;
RealD factor_r = c_rect/RealD(Nc)*0.5; RealD factor_r = c_rect/RealD(Nc)*0.5;
GridBase *grid = Umu.Grid(); GridBase *grid = U.Grid();
std::vector<GaugeLinkField> U (Nd,grid); std::vector<GaugeLinkField> Umu (Nd,grid);
for(int mu=0;mu<Nd;mu++){ for(int mu=0;mu<Nd;mu++){
U[mu] = PeekIndex<LorentzIndex>(Umu,mu); Umu[mu] = PeekIndex<LorentzIndex>(U,mu);
} }
std::vector<GaugeLinkField> RectStaple(Nd,grid), Staple(Nd,grid); std::vector<GaugeLinkField> RectStaple(Nd,grid), Staple(Nd,grid);
WilsonLoops<Gimpl>::StapleAndRectStapleAll(Staple, RectStaple, U, workspace); WilsonLoops<Gimpl>::StapleAndRectStapleAll(Staple, RectStaple, Umu, workspace);
GaugeLinkField dSdU_mu(grid); GaugeLinkField dSdU_mu(grid);
GaugeLinkField staple(grid); GaugeLinkField staple(grid);
for (int mu=0; mu < Nd; mu++){ for (int mu=0; mu < Nd; mu++){
dSdU_mu = Ta(U[mu]*Staple[mu])*factor_p; dSdU_mu = Ta(Umu[mu]*Staple[mu])*factor_p;
dSdU_mu = dSdU_mu + Ta(U[mu]*RectStaple[mu])*factor_r; dSdU_mu = dSdU_mu + Ta(Umu[mu]*RectStaple[mu])*factor_r;
PokeIndex<LorentzIndex>(dSdU, dSdU_mu, mu); PokeIndex<LorentzIndex>(dSdU, dSdU_mu, mu);
} }

View File

@ -73,20 +73,23 @@ public:
// extend Ta to include Lorentz indexes // extend Ta to include Lorentz indexes
RealD factor = 0.5 * beta / RealD(Nc); RealD factor = 0.5 * beta / RealD(Nc);
GridBase *grid = U.Grid();
GaugeLinkField Umu(U.Grid()); GaugeLinkField dSdU_mu(grid);
GaugeLinkField dSdU_mu(U.Grid()); std::vector<GaugeLinkField> Umu(Nd, grid);
for (int mu = 0; mu < Nd; mu++) { for (int mu = 0; mu < Nd; mu++) {
Umu[mu] = PeekIndex<LorentzIndex>(U, mu);
}
Umu = PeekIndex<LorentzIndex>(U, mu); for (int mu = 0; mu < Nd; mu++) {
// Staple in direction mu // Staple in direction mu
WilsonLoops<Gimpl>::Staple(dSdU_mu, U, mu); WilsonLoops<Gimpl>::Staple(dSdU_mu, Umu, mu);
dSdU_mu = Ta(Umu * dSdU_mu) * factor; dSdU_mu = Ta(Umu[mu] * dSdU_mu) * factor;
PokeIndex<LorentzIndex>(dSdU, dSdU_mu, mu); PokeIndex<LorentzIndex>(dSdU, dSdU_mu, mu);
} }
} }
private: private:
RealD beta; RealD beta;
}; };

View File

@ -111,8 +111,8 @@ public:
}; };
void CheckpointRestore(int traj, Field &U, GridSerialRNG &sRNG, GridParallelRNG &pRNG) { void CheckpointRestore(int traj, Field &U, GridSerialRNG &sRNG, GridParallelRNG &pRNG) {
std::string config, rng; std::string config, rng, smr;
this->build_filenames(traj, Params, config, rng); this->build_filenames(traj, Params, config, smr, rng);
this->check_filename(rng); this->check_filename(rng);
this->check_filename(config); this->check_filename(config);

View File

@ -75,7 +75,7 @@ public:
GridParallelRNG &pRNG) { GridParallelRNG &pRNG) {
if ((traj % Params.saveInterval) == 0) { if ((traj % Params.saveInterval) == 0) {
std::string config, rng, smr; std::string config, rng, smr;
this->build_filenames(traj, Params, config, rng); this->build_filenames(traj, Params, config, smr, rng);
GridBase *grid = SmartConfig.get_U(false).Grid(); GridBase *grid = SmartConfig.get_U(false).Grid();
uint32_t nersc_csum,scidac_csuma,scidac_csumb; uint32_t nersc_csum,scidac_csuma,scidac_csumb;
BinaryIO::writeRNG(sRNG, pRNG, rng, 0,nersc_csum,scidac_csuma,scidac_csumb); BinaryIO::writeRNG(sRNG, pRNG, rng, 0,nersc_csum,scidac_csuma,scidac_csumb);
@ -102,7 +102,7 @@ public:
if ( Params.saveSmeared ) { if ( Params.saveSmeared ) {
IldgWriter _IldgWriter(grid->IsBoss()); IldgWriter _IldgWriter(grid->IsBoss());
_IldgWriter.open(smr); _IldgWriter.open(smr);
_IldgWriter.writeConfiguration<GaugeStats>(SmartConfig.get_U(true), traj, config, config); _IldgWriter.writeConfiguration<GaugeStats>(SmartConfig.get_U(true), traj, smr, smr);
_IldgWriter.close(); _IldgWriter.close();
std::cout << GridLogMessage << "Written ILDG Configuration on " << smr std::cout << GridLogMessage << "Written ILDG Configuration on " << smr
@ -118,8 +118,8 @@ public:
void CheckpointRestore(int traj, GaugeField &U, GridSerialRNG &sRNG, void CheckpointRestore(int traj, GaugeField &U, GridSerialRNG &sRNG,
GridParallelRNG &pRNG) { GridParallelRNG &pRNG) {
std::string config, rng; std::string config, rng, smr;
this->build_filenames(traj, Params, config, rng); this->build_filenames(traj, Params, config, smr, rng);
this->check_filename(rng); this->check_filename(rng);
this->check_filename(config); this->check_filename(config);

View File

@ -107,8 +107,8 @@ class ScidacHmcCheckpointer : public BaseHmcCheckpointer<Implementation> {
void CheckpointRestore(int traj, Field &U, GridSerialRNG &sRNG, void CheckpointRestore(int traj, Field &U, GridSerialRNG &sRNG,
GridParallelRNG &pRNG) { GridParallelRNG &pRNG) {
std::string config, rng; std::string config, rng, smr;
this->build_filenames(traj, Params, config, rng); this->build_filenames(traj, Params, config, smr, rng);
this->check_filename(rng); this->check_filename(rng);
this->check_filename(config); this->check_filename(config);

View File

@ -62,15 +62,15 @@ accelerator_inline int stencilIndex(int mu, int nu) {
/*! @brief structure holding the link treatment */ /*! @brief structure holding the link treatment */
struct SmearingParameters{ struct HISQSmearingParameters{
SmearingParameters(){} HISQSmearingParameters(){}
Real c_1; // 1 link Real c_1; // 1 link
Real c_naik; // Naik term Real c_naik; // Naik term
Real c_3; // 3 link Real c_3; // 3 link
Real c_5; // 5 link Real c_5; // 5 link
Real c_7; // 7 link Real c_7; // 7 link
Real c_lp; // 5 link Lepage Real c_lp; // 5 link Lepage
SmearingParameters(Real c1, Real cnaik, Real c3, Real c5, Real c7, Real clp) HISQSmearingParameters(Real c1, Real cnaik, Real c3, Real c5, Real c7, Real clp)
: c_1(c1), : c_1(c1),
c_naik(cnaik), c_naik(cnaik),
c_3(c3), c_3(c3),
@ -86,7 +86,7 @@ class Smear_HISQ : public Gimpl {
private: private:
GridCartesian* const _grid; GridCartesian* const _grid;
SmearingParameters _linkTreatment; HISQSmearingParameters _linkTreatment;
public: public:
@ -117,7 +117,7 @@ public:
// IN--u_thin // IN--u_thin
void smear(GF& u_smr, GF& u_naik, GF& u_thin) const { void smear(GF& u_smr, GF& u_naik, GF& u_thin) const {
SmearingParameters lt = this->_linkTreatment; HISQSmearingParameters lt = this->_linkTreatment;
auto grid = this->_grid; auto grid = this->_grid;
// Create a padded cell of extra padding depth=1 and fill the padding. // Create a padded cell of extra padding depth=1 and fill the padding.

View File

@ -207,11 +207,14 @@ std::vector<RealD> WilsonFlowBase<Gimpl>::flowMeasureEnergyDensityCloverleaf(con
} }
template <class Gimpl> template <class Gimpl>
void WilsonFlowBase<Gimpl>::setDefaultMeasurements(int topq_meas_interval){ void WilsonFlowBase<Gimpl>::setDefaultMeasurements(int meas_interval){
addMeasurement(1, [](int step, RealD t, const typename Gimpl::GaugeField &U){ addMeasurement(meas_interval, [](int step, RealD t, const typename Gimpl::GaugeField &U){
std::cout << GridLogMessage << "[WilsonFlow] Energy density (plaq) : " << step << " " << t << " " << energyDensityPlaquette(t,U) << std::endl; std::cout << GridLogMessage << "[WilsonFlow] Energy density (plaq) : " << step << " " << t << " " << energyDensityPlaquette(t,U) << std::endl;
}); });
addMeasurement(topq_meas_interval, [](int step, RealD t, const typename Gimpl::GaugeField &U){ addMeasurement(meas_interval, [](int step, RealD t, const typename Gimpl::GaugeField &U){
std::cout << GridLogMessage << "[WilsonFlow] Energy density (cloverleaf) : " << step << " " << t << " " << energyDensityCloverleaf(t,U) << std::endl;
});
addMeasurement(meas_interval, [](int step, RealD t, const typename Gimpl::GaugeField &U){
std::cout << GridLogMessage << "[WilsonFlow] Top. charge : " << step << " " << WilsonLoops<Gimpl>::TopologicalCharge(U) << std::endl; std::cout << GridLogMessage << "[WilsonFlow] Top. charge : " << step << " " << WilsonLoops<Gimpl>::TopologicalCharge(U) << std::endl;
}); });
} }

View File

@ -292,19 +292,21 @@ public:
////////////////////////////////////////////////// //////////////////////////////////////////////////
// the sum over all nu-oriented staples for nu != mu on each site // the sum over all nu-oriented staples for nu != mu on each site
////////////////////////////////////////////////// //////////////////////////////////////////////////
static void Staple(GaugeMat &staple, const GaugeLorentz &Umu, int mu) { static void Staple(GaugeMat &staple, const GaugeLorentz &U, int mu) {
GridBase *grid = Umu.Grid(); std::vector<GaugeMat> Umu(Nd, U.Grid());
std::vector<GaugeMat> U(Nd, grid);
for (int d = 0; d < Nd; d++) { for (int d = 0; d < Nd; d++) {
U[d] = PeekIndex<LorentzIndex>(Umu, d); Umu[d] = PeekIndex<LorentzIndex>(U, d);
} }
Staple(staple, U, mu); Staple(staple, Umu, mu);
} }
static void Staple(GaugeMat &staple, const std::vector<GaugeMat> &U, int mu) { static void Staple(GaugeMat &staple, const std::vector<GaugeMat> &Umu, int mu) {
staple = Zero();
autoView(staple_v, staple, AcceleratorWrite);
accelerator_for(i, staple.Grid()->oSites(), Simd::Nsimd(), {
staple_v[i] = Zero();
});
for (int nu = 0; nu < Nd; nu++) { for (int nu = 0; nu < Nd; nu++) {
@ -321,9 +323,9 @@ public:
staple += Gimpl::ShiftStaple( staple += Gimpl::ShiftStaple(
Gimpl::CovShiftForward( Gimpl::CovShiftForward(
U[nu], nu, Umu[nu], nu,
Gimpl::CovShiftBackward( Gimpl::CovShiftBackward(
U[mu], mu, Gimpl::CovShiftIdentityBackward(U[nu], nu))), Umu[mu], mu, Gimpl::CovShiftIdentityBackward(Umu[nu], nu))),
mu); mu);
// __ // __
@ -333,8 +335,8 @@ public:
// //
staple += Gimpl::ShiftStaple( staple += Gimpl::ShiftStaple(
Gimpl::CovShiftBackward(U[nu], nu, Gimpl::CovShiftBackward(Umu[nu], nu,
Gimpl::CovShiftBackward(U[mu], mu, U[nu])), mu); Gimpl::CovShiftBackward(Umu[mu], mu, Umu[nu])), mu);
} }
} }
} }

View File

@ -446,6 +446,7 @@ public:
Communicate(); Communicate();
CommsMergeSHM(compress); CommsMergeSHM(compress);
CommsMerge(compress); CommsMerge(compress);
accelerator_barrier();
} }
template<class compressor> int HaloGatherDir(const Lattice<vobj> &source,compressor &compress,int point,int & face_idx) template<class compressor> int HaloGatherDir(const Lattice<vobj> &source,compressor &compress,int point,int & face_idx)
@ -518,7 +519,6 @@ public:
} }
accelerator_barrier(); // All my local gathers are complete accelerator_barrier(); // All my local gathers are complete
#ifdef NVLINK_GET #ifdef NVLINK_GET
#warning "NVLINK_GET"
_grid->StencilBarrier(); // He can now get mu local gather, I can get his _grid->StencilBarrier(); // He can now get mu local gather, I can get his
// Synch shared memory on a single nodes; could use an asynchronous barrier here and defer check // Synch shared memory on a single nodes; could use an asynchronous barrier here and defer check
// Or issue barrier AFTER the DMA is running // Or issue barrier AFTER the DMA is running
@ -690,6 +690,7 @@ public:
} }
} }
} }
// std::cout << "BuildSurfaceList size is "<<surface_list_size<<std::endl;
surface_list.resize(surface_list_size); surface_list.resize(surface_list_size);
std::vector<int> surface_list_host(surface_list_size); std::vector<int> surface_list_host(surface_list_size);
int32_t ss=0; int32_t ss=0;
@ -709,7 +710,7 @@ public:
} }
} }
acceleratorCopyToDevice(&surface_list_host[0],&surface_list[0],surface_list_size*sizeof(int)); acceleratorCopyToDevice(&surface_list_host[0],&surface_list[0],surface_list_size*sizeof(int));
std::cout << GridLogMessage<<"BuildSurfaceList size is "<<surface_list_size<<std::endl; // std::cout << GridLogMessage<<"BuildSurfaceList size is "<<surface_list_size<<std::endl;
} }
/// Introduce a block structure and switch off comms on boundaries /// Introduce a block structure and switch off comms on boundaries
void DirichletBlock(const Coordinate &dirichlet_block) void DirichletBlock(const Coordinate &dirichlet_block)
@ -801,8 +802,8 @@ public:
this->_entries_host_p = &_entries[0]; this->_entries_host_p = &_entries[0];
this->_entries_p = &_entries_device[0]; this->_entries_p = &_entries_device[0];
std::cout << GridLogMessage << " Stencil object allocated for "<<std::dec<<this->_osites // std::cout << GridLogMessage << " Stencil object allocated for "<<std::dec<<this->_osites
<<" sites table "<<std::hex<<this->_entries_p<< " GridPtr "<<_grid<<std::dec<<std::endl; // <<" sites table "<<std::hex<<this->_entries_p<< " GridPtr "<<_grid<<std::dec<<std::endl;
for(int ii=0;ii<npoints;ii++){ for(int ii=0;ii<npoints;ii++){

View File

@ -242,19 +242,33 @@ inline void *acceleratorAllocDevice(size_t bytes)
return ptr; return ptr;
}; };
typedef int acceleratorEvent_t;
inline void acceleratorFreeShared(void *ptr){ cudaFree(ptr);}; inline void acceleratorFreeShared(void *ptr){ cudaFree(ptr);};
inline void acceleratorFreeDevice(void *ptr){ cudaFree(ptr);}; inline void acceleratorFreeDevice(void *ptr){ cudaFree(ptr);};
inline void acceleratorFreeHost(void *ptr){ cudaFree(ptr);}; inline void acceleratorFreeHost(void *ptr){ cudaFree(ptr);};
inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) { cudaMemcpy(to,from,bytes, cudaMemcpyHostToDevice);} inline void acceleratorCopyToDevice(const void *from,void *to,size_t bytes) { cudaMemcpy(to,from,bytes, cudaMemcpyHostToDevice);}
inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ cudaMemcpy(to,from,bytes, cudaMemcpyDeviceToHost);} inline void acceleratorCopyFromDevice(const void *from,void *to,size_t bytes){ cudaMemcpy(to,from,bytes, cudaMemcpyDeviceToHost);}
inline void acceleratorCopyToDeviceAsync(void *from, void *to, size_t bytes, cudaStream_t stream = copyStream) { cudaMemcpyAsync(to,from,bytes, cudaMemcpyHostToDevice, stream);}
inline void acceleratorCopyFromDeviceAsync(void *from, void *to, size_t bytes, cudaStream_t stream = copyStream) { cudaMemcpyAsync(to,from,bytes, cudaMemcpyDeviceToHost, stream);}
inline void acceleratorMemSet(void *base,int value,size_t bytes) { cudaMemset(base,value,bytes);} inline void acceleratorMemSet(void *base,int value,size_t bytes) { cudaMemset(base,value,bytes);}
inline void acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes) // Asynch inline acceleratorEvent_t acceleratorCopyToDeviceAsynch(void *from, void *to, size_t bytes, cudaStream_t stream = copyStream) {
acceleratorCopyToDevice(to,from,bytes, cudaMemcpyHostToDevice);
return 0;
}
inline acceleratorEvent_t acceleratorCopyFromDeviceAsynch(void *from, void *to, size_t bytes, cudaStream_t stream = copyStream) {
acceleratorCopyFromDevice(from,to,bytes);
return 0;
}
inline acceleratorEvent_t acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes) // Asynch
{ {
cudaMemcpyAsync(to,from,bytes, cudaMemcpyDeviceToDevice,copyStream); cudaMemcpyAsync(to,from,bytes, cudaMemcpyDeviceToDevice,copyStream);
return 0;
} }
inline void acceleratorCopySynchronise(void) { cudaStreamSynchronize(copyStream); }; inline void acceleratorCopySynchronise(void) { cudaStreamSynchronize(copyStream); };
inline void acceleratorEventWait(acceleratorEvent_t ev)
{
//auto discard=cudaStreamSynchronize(ev);
}
inline int acceleratorEventIsComplete(acceleratorEvent_t ev){ acceleratorEventWait(ev) ; return 1;}
inline int acceleratorIsCommunicable(void *ptr) inline int acceleratorIsCommunicable(void *ptr)
@ -363,8 +377,8 @@ inline acceleratorEvent_t acceleratorCopyDeviceToDeviceAsynch(void *from,void *t
inline acceleratorEvent_t acceleratorCopyToDeviceAsynch(void *from,void *to,size_t bytes) { return theCopyAccelerator->memcpy(to,from,bytes); } inline acceleratorEvent_t acceleratorCopyToDeviceAsynch(void *from,void *to,size_t bytes) { return theCopyAccelerator->memcpy(to,from,bytes); }
inline acceleratorEvent_t acceleratorCopyFromDeviceAsynch(void *from,void *to,size_t bytes) { return theCopyAccelerator->memcpy(to,from,bytes); } inline acceleratorEvent_t acceleratorCopyFromDeviceAsynch(void *from,void *to,size_t bytes) { return theCopyAccelerator->memcpy(to,from,bytes); }
inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) { theCopyAccelerator->memcpy(to,from,bytes); theCopyAccelerator->wait();} inline void acceleratorCopyToDevice(const void *from,void *to,size_t bytes) { theCopyAccelerator->memcpy(to,from,bytes); theCopyAccelerator->wait();}
inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ theCopyAccelerator->memcpy(to,from,bytes); theCopyAccelerator->wait();} inline void acceleratorCopyFromDevice(const void *from,void *to,size_t bytes){ theCopyAccelerator->memcpy(to,from,bytes); theCopyAccelerator->wait();}
inline void acceleratorMemSet(void *base,int value,size_t bytes) { theCopyAccelerator->memset(base,value,bytes); theCopyAccelerator->wait();} inline void acceleratorMemSet(void *base,int value,size_t bytes) { theCopyAccelerator->memset(base,value,bytes); theCopyAccelerator->wait();}
inline int acceleratorIsCommunicable(void *ptr) inline int acceleratorIsCommunicable(void *ptr)
@ -478,7 +492,7 @@ void LambdaApply(uint64_t numx, uint64_t numy, uint64_t numz, lambda Lambda)
inline void *acceleratorAllocHost(size_t bytes) inline void *acceleratorAllocHost(size_t bytes)
{ {
void *ptr=NULL; void *ptr=NULL;
auto err = hipMallocHost((void **)&ptr,bytes); auto err = hipHostMalloc((void **)&ptr,bytes);
if( err != hipSuccess ) { if( err != hipSuccess ) {
ptr = (void *) NULL; ptr = (void *) NULL;
fprintf(stderr," hipMallocManaged failed for %ld %s \n",bytes,hipGetErrorString(err)); fflush(stderr); fprintf(stderr," hipMallocManaged failed for %ld %s \n",bytes,hipGetErrorString(err)); fflush(stderr);
@ -511,23 +525,35 @@ inline void *acceleratorAllocDevice(size_t bytes)
inline void acceleratorFreeHost(void *ptr){ auto discard=hipFree(ptr);}; inline void acceleratorFreeHost(void *ptr){ auto discard=hipFree(ptr);};
inline void acceleratorFreeShared(void *ptr){ auto discard=hipFree(ptr);}; inline void acceleratorFreeShared(void *ptr){ auto discard=hipFree(ptr);};
inline void acceleratorFreeDevice(void *ptr){ auto discard=hipFree(ptr);}; inline void acceleratorFreeDevice(void *ptr){ auto discard=hipFree(ptr);};
inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) { auto discard=hipMemcpy(to,from,bytes, hipMemcpyHostToDevice);} inline void acceleratorCopyToDevice(const void *from,void *to,size_t bytes) { auto discard=hipMemcpy(to,from,bytes, hipMemcpyHostToDevice);}
inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ auto discard=hipMemcpy(to,from,bytes, hipMemcpyDeviceToHost);} inline void acceleratorCopyFromDevice(const void *from,void *to,size_t bytes){ auto discard=hipMemcpy(to,from,bytes, hipMemcpyDeviceToHost);}
inline void acceleratorMemSet(void *base,int value,size_t bytes) { auto discard=hipMemset(base,value,bytes);} inline void acceleratorMemSet(void *base,int value,size_t bytes) { auto discard=hipMemset(base,value,bytes);}
inline void acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes) // Asynch typedef int acceleratorEvent_t;
inline acceleratorEvent_t acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes) // Asynch
{ {
auto discard=hipMemcpyDtoDAsync(to,from,bytes, copyStream); auto discard=hipMemcpyDtoDAsync(to,from,bytes, copyStream);
return 0;
} }
inline void acceleratorCopyToDeviceAsync(void *from, void *to, size_t bytes, hipStream_t stream = copyStream) { inline acceleratorEvent_t acceleratorCopyToDeviceAsynch(void *from, void *to, size_t bytes, hipStream_t stream = copyStream) {
auto r = hipMemcpyAsync(to,from,bytes, hipMemcpyHostToDevice, stream); acceleratorCopyToDevice(from,to,bytes);
return 0;
} }
inline void acceleratorCopyFromDeviceAsync(void *from, void *to, size_t bytes, hipStream_t stream = copyStream) { inline acceleratorEvent_t acceleratorCopyFromDeviceAsynch(void *from, void *to, size_t bytes, hipStream_t stream = copyStream) {
auto r = hipMemcpyAsync(to,from,bytes, hipMemcpyDeviceToHost, stream); acceleratorCopyFromDevice(from,to,bytes);
return 0;
} }
inline void acceleratorCopySynchronise(void) { auto discard=hipStreamSynchronize(copyStream); }; inline void acceleratorCopySynchronise(void) { auto discard=hipStreamSynchronize(copyStream); };
inline void acceleratorEventWait(acceleratorEvent_t ev)
{
// auto discard=hipStreamSynchronize(ev);
}
inline int acceleratorEventIsComplete(acceleratorEvent_t ev){ acceleratorEventWait(ev) ; return 1;}
#endif #endif
inline void acceleratorPin(void *ptr,unsigned long bytes) inline void acceleratorPin(void *ptr,unsigned long bytes)
@ -564,6 +590,8 @@ inline void acceleratorPin(void *ptr,unsigned long bytes)
#undef GRID_SIMT #undef GRID_SIMT
typedef int acceleratorEvent_t;
inline void acceleratorMem(void) inline void acceleratorMem(void)
{ {
/* /*
@ -585,7 +613,12 @@ accelerator_inline int acceleratorSIMTlane(int Nsimd) { return 0; } // CUDA spec
inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) { thread_bcopy(from,to,bytes); } inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) { thread_bcopy(from,to,bytes); }
inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes) { thread_bcopy(from,to,bytes); } inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes) { thread_bcopy(from,to,bytes); }
inline void acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes) { thread_bcopy(from,to,bytes);} inline acceleratorEvent_t acceleratorCopyToDeviceAsynch(void *from,void *to,size_t bytes) { acceleratorCopyToDevice(from,to,bytes); return 0; }
inline acceleratorEvent_t acceleratorCopyFromDeviceAsynch(void *from,void *to,size_t bytes) { acceleratorCopyFromDevice(from,to,bytes); return 0; }
inline void acceleratorEventWait(acceleratorEvent_t ev){}
inline int acceleratorEventIsComplete(acceleratorEvent_t ev){ acceleratorEventWait(ev); return 1;}
inline acceleratorEvent_t acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes) { thread_bcopy(from,to,bytes); return 0;}
inline void acceleratorCopySynchronise(void) {}; inline void acceleratorCopySynchronise(void) {};
inline int acceleratorIsCommunicable(void *ptr){ return 1; } inline int acceleratorIsCommunicable(void *ptr){ return 1; }
@ -674,9 +707,9 @@ inline void acceleratorCopyDeviceToDevice(void *from,void *to,size_t bytes)
acceleratorCopySynchronise(); acceleratorCopySynchronise();
} }
template<class T> void acceleratorPut(T& dev,T&host) template<class T> void acceleratorPut(T& dev,const T&host)
{ {
acceleratorCopyToDevice(&host,&dev,sizeof(T)); acceleratorCopyToDevice((void *)&host,&dev,sizeof(T));
} }
template<class T> T acceleratorGet(T& dev) template<class T> T acceleratorGet(T& dev)
{ {

View File

@ -73,9 +73,9 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#define thread_critical DO_PRAGMA(omp critical) #define thread_critical DO_PRAGMA(omp critical)
#ifdef GRID_OMP #ifdef GRID_OMP
inline void thread_bcopy(void *from, void *to,size_t bytes) inline void thread_bcopy(const void *from, void *to,size_t bytes)
{ {
uint64_t *ufrom = (uint64_t *)from; const uint64_t *ufrom = (const uint64_t *)from;
uint64_t *uto = (uint64_t *)to; uint64_t *uto = (uint64_t *)to;
assert(bytes%8==0); assert(bytes%8==0);
uint64_t words=bytes/8; uint64_t words=bytes/8;
@ -84,7 +84,7 @@ inline void thread_bcopy(void *from, void *to,size_t bytes)
}); });
} }
#else #else
inline void thread_bcopy(void *from, void *to,size_t bytes) inline void thread_bcopy(const void *from, void *to,size_t bytes)
{ {
bcopy(from,to,bytes); bcopy(from,to,bytes);
} }

View File

@ -509,6 +509,13 @@ void Grid_init(int *argc,char ***argv)
Grid_default_latt, Grid_default_latt,
Grid_default_mpi); Grid_default_mpi);
if( GridCmdOptionExists(*argv,*argv+*argc,"--flightrecorder") ){
std::cout << GridLogMessage <<" Enabling flight recorder " <<std::endl;
FlightRecorder::SetLoggingMode(FlightRecorder::LoggingModeRecord);
FlightRecorder::PrintEntireLog = 1;
FlightRecorder::ChecksumComms = 1;
FlightRecorder::ChecksumCommsSend=1;
}
if( GridCmdOptionExists(*argv,*argv+*argc,"--decomposition") ){ if( GridCmdOptionExists(*argv,*argv+*argc,"--decomposition") ){
std::cout<<GridLogMessage<<"Grid Default Decomposition patterns\n"; std::cout<<GridLogMessage<<"Grid Default Decomposition patterns\n";
@ -651,3 +658,4 @@ void Grid_debug_handler_init(void)
} }
NAMESPACE_END(Grid); NAMESPACE_END(Grid);

View File

@ -50,7 +50,7 @@ namespace Grid{
int64_t index64; int64_t index64;
IndexFromCoorReversed(coor,index64,dims); IndexFromCoorReversed(coor,index64,dims);
if ( index64>=2*1024*1024*1024LL ){ if ( index64>=2*1024*1024*1024LL ){
std::cout << " IndexFromCoorReversed " << coor<<" index " << index64<< " dims "<<dims<<std::endl; // std::cout << " IndexFromCoorReversed " << coor<<" index " << index64<< " dims "<<dims<<std::endl;
} }
assert(index64<2*1024*1024*1024LL); assert(index64<2*1024*1024*1024LL);
index = (int) index64; index = (int) index64;

View File

@ -25,13 +25,20 @@ directory
*************************************************************************************/ *************************************************************************************/
/* END LEGAL */ /* END LEGAL */
#include <Grid/Grid.h> #include <Grid/Grid.h>
#if Nc == 3
#include <Grid/qcd/smearing/GaugeConfigurationMasked.h> #include <Grid/qcd/smearing/GaugeConfigurationMasked.h>
#include <Grid/qcd/smearing/JacobianAction.h> #include <Grid/qcd/smearing/JacobianAction.h>
#endif
using namespace Grid; using namespace Grid;
int main(int argc, char **argv) int main(int argc, char **argv)
{ {
#if Nc != 3
#warning FTHMC2p1f will not work for Nc != 3
std::cout << "This program will currently only work for Nc == 3." << std::endl;
#else
std::cout << std::setprecision(12); std::cout << std::setprecision(12);
Grid_init(&argc, &argv); Grid_init(&argc, &argv);
@ -220,7 +227,6 @@ int main(int argc, char **argv)
TheHMC.Run(SmearingPolicy); // for smearing TheHMC.Run(SmearingPolicy); // for smearing
Grid_finalize(); Grid_finalize();
#endif
} // main } // main

View File

@ -24,14 +24,22 @@ See the full license in the file "LICENSE" in the top level distribution
directory directory
*************************************************************************************/ *************************************************************************************/
/* END LEGAL */ /* END LEGAL */
#include <Grid/Grid.h> #include <Grid/Grid.h>
#if Nc == 3
#include <Grid/qcd/smearing/GaugeConfigurationMasked.h> #include <Grid/qcd/smearing/GaugeConfigurationMasked.h>
#include <Grid/qcd/smearing/JacobianAction.h> #include <Grid/qcd/smearing/JacobianAction.h>
#endif
using namespace Grid; using namespace Grid;
int main(int argc, char **argv) int main(int argc, char **argv)
{ {
#if Nc != 3
#warning FTHMC2p1f_3GeV will not work for Nc != 3
std::cout << "This program will currently only work for Nc == 3." << std::endl;
#else
std::cout << std::setprecision(12); std::cout << std::setprecision(12);
Grid_init(&argc, &argv); Grid_init(&argc, &argv);
@ -220,6 +228,7 @@ int main(int argc, char **argv)
TheHMC.Run(SmearingPolicy); // for smearing TheHMC.Run(SmearingPolicy); // for smearing
Grid_finalize(); Grid_finalize();
#endif
} // main } // main

View File

@ -25,13 +25,20 @@ directory
*************************************************************************************/ *************************************************************************************/
/* END LEGAL */ /* END LEGAL */
#include <Grid/Grid.h> #include <Grid/Grid.h>
#if Nc == 3
#include <Grid/qcd/smearing/GaugeConfigurationMasked.h> #include <Grid/qcd/smearing/GaugeConfigurationMasked.h>
#include <Grid/qcd/smearing/JacobianAction.h> #include <Grid/qcd/smearing/JacobianAction.h>
#endif
using namespace Grid; using namespace Grid;
int main(int argc, char **argv) int main(int argc, char **argv)
{ {
#if Nc != 3
#warning HMC2p1f_3GeV will not work for Nc != 3
std::cout << "This program will currently only work for Nc == 3." << std::endl;
#else
std::cout << std::setprecision(12); std::cout << std::setprecision(12);
Grid_init(&argc, &argv); Grid_init(&argc, &argv);
@ -220,6 +227,7 @@ int main(int argc, char **argv)
TheHMC.Run(SmearingPolicy); // for smearing TheHMC.Run(SmearingPolicy); // for smearing
Grid_finalize(); Grid_finalize();
#endif
} // main } // main

View File

@ -492,17 +492,18 @@ public:
} }
FGrid->Barrier(); FGrid->Barrier();
double t1=usecond(); double t1=usecond();
uint64_t ncall = 500; uint64_t no = 50;
uint64_t ni = 100;
FGrid->Broadcast(0,&ncall,sizeof(ncall));
// std::cout << GridLogMessage << " Estimate " << ncall << " calls per second"<<std::endl; // std::cout << GridLogMessage << " Estimate " << ncall << " calls per second"<<std::endl;
time_statistics timestat; time_statistics timestat;
std::vector<double> t_time(ncall); std::vector<double> t_time(no);
for(uint64_t i=0;i<ncall;i++){ for(uint64_t i=0;i<no;i++){
t0=usecond(); t0=usecond();
for(uint64_t j=0;j<ni;j++){
Dw.DhopEO(src_o,r_e,DaggerNo); Dw.DhopEO(src_o,r_e,DaggerNo);
}
t1=usecond(); t1=usecond();
t_time[i] = t1-t0; t_time[i] = t1-t0;
} }
@ -520,11 +521,11 @@ public:
double mf_hi, mf_lo, mf_err; double mf_hi, mf_lo, mf_err;
timestat.statistics(t_time); timestat.statistics(t_time);
mf_hi = flops/timestat.min; mf_hi = flops/timestat.min*ni;
mf_lo = flops/timestat.max; mf_lo = flops/timestat.max*ni;
mf_err= flops/timestat.min * timestat.err/timestat.mean; mf_err= flops/timestat.min * timestat.err/timestat.mean;
mflops = flops/timestat.mean; mflops = flops/timestat.mean*ni;
mflops_all.push_back(mflops); mflops_all.push_back(mflops);
if ( mflops_best == 0 ) mflops_best = mflops; if ( mflops_best == 0 ) mflops_best = mflops;
if ( mflops_worst== 0 ) mflops_worst= mflops; if ( mflops_worst== 0 ) mflops_worst= mflops;
@ -535,6 +536,7 @@ public:
std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Deo mflop/s = "<< mflops << " ("<<mf_err<<") " << mf_lo<<"-"<<mf_hi <<std::endl; std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Deo mflop/s = "<< mflops << " ("<<mf_err<<") " << mf_lo<<"-"<<mf_hi <<std::endl;
std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Deo mflop/s per rank "<< mflops/NP<<std::endl; std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Deo mflop/s per rank "<< mflops/NP<<std::endl;
std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Deo mflop/s per node "<< mflops/NN<<std::endl; std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Deo mflop/s per node "<< mflops/NN<<std::endl;
std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Deo us per call "<< timestat.mean/ni<<std::endl;
} }
@ -654,17 +656,19 @@ public:
} }
FGrid->Barrier(); FGrid->Barrier();
double t1=usecond(); double t1=usecond();
uint64_t ncall = 500;
FGrid->Broadcast(0,&ncall,sizeof(ncall)); uint64_t no = 50;
uint64_t ni = 100;
// std::cout << GridLogMessage << " Estimate " << ncall << " calls per second"<<std::endl; // std::cout << GridLogMessage << " Estimate " << ncall << " calls per second"<<std::endl;
time_statistics timestat; time_statistics timestat;
std::vector<double> t_time(ncall); std::vector<double> t_time(no);
for(uint64_t i=0;i<ncall;i++){ for(uint64_t i=0;i<no;i++){
t0=usecond(); t0=usecond();
for(uint64_t j=0;j<ni;j++){
Ds.DhopEO(src_o,r_e,DaggerNo); Ds.DhopEO(src_o,r_e,DaggerNo);
}
t1=usecond(); t1=usecond();
t_time[i] = t1-t0; t_time[i] = t1-t0;
} }
@ -675,11 +679,11 @@ public:
double mf_hi, mf_lo, mf_err; double mf_hi, mf_lo, mf_err;
timestat.statistics(t_time); timestat.statistics(t_time);
mf_hi = flops/timestat.min; mf_hi = flops/timestat.min*ni;
mf_lo = flops/timestat.max; mf_lo = flops/timestat.max*ni;
mf_err= flops/timestat.min * timestat.err/timestat.mean; mf_err= flops/timestat.min * timestat.err/timestat.mean;
mflops = flops/timestat.mean; mflops = flops/timestat.mean*ni;
mflops_all.push_back(mflops); mflops_all.push_back(mflops);
if ( mflops_best == 0 ) mflops_best = mflops; if ( mflops_best == 0 ) mflops_best = mflops;
if ( mflops_worst== 0 ) mflops_worst= mflops; if ( mflops_worst== 0 ) mflops_worst= mflops;
@ -689,6 +693,7 @@ public:
std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Deo mflop/s = "<< mflops << " ("<<mf_err<<") " << mf_lo<<"-"<<mf_hi <<std::endl; std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Deo mflop/s = "<< mflops << " ("<<mf_err<<") " << mf_lo<<"-"<<mf_hi <<std::endl;
std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Deo mflop/s per rank "<< mflops/NP<<std::endl; std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Deo mflop/s per rank "<< mflops/NP<<std::endl;
std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Deo mflop/s per node "<< mflops/NN<<std::endl; std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Deo mflop/s per node "<< mflops/NN<<std::endl;
std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Deo us per call "<< timestat.mean/ni<<std::endl;
} }
@ -792,19 +797,18 @@ public:
Dc.M(src,r); Dc.M(src,r);
} }
FGrid->Barrier(); FGrid->Barrier();
double t1=usecond(); uint64_t ni = 100;
uint64_t ncall = 500; uint64_t no = 50;
FGrid->Broadcast(0,&ncall,sizeof(ncall));
// std::cout << GridLogMessage << " Estimate " << ncall << " calls per second"<<std::endl; // std::cout << GridLogMessage << " Estimate " << ncall << " calls per second"<<std::endl;
time_statistics timestat; time_statistics timestat;
std::vector<double> t_time(ncall); std::vector<double> t_time(no);
for(uint64_t i=0;i<ncall;i++){ for(uint64_t i=0;i<no;i++){
t0=usecond(); double t0=usecond();
for(uint64_t j=0;j<ni;j++){
Dc.M(src,r); Dc.M(src,r);
t1=usecond(); }
double t1=usecond();
t_time[i] = t1-t0; t_time[i] = t1-t0;
} }
FGrid->Barrier(); FGrid->Barrier();
@ -814,20 +818,21 @@ public:
double mf_hi, mf_lo, mf_err; double mf_hi, mf_lo, mf_err;
timestat.statistics(t_time); timestat.statistics(t_time);
mf_hi = flops/timestat.min; mf_hi = flops/timestat.min*ni;
mf_lo = flops/timestat.max; mf_lo = flops/timestat.max*ni;
mf_err= flops/timestat.min * timestat.err/timestat.mean; mf_err= flops/timestat.min * timestat.err/timestat.mean;
mflops = flops/timestat.mean; mflops = flops/timestat.mean*ni;
mflops_all.push_back(mflops); mflops_all.push_back(mflops);
if ( mflops_best == 0 ) mflops_best = mflops; if ( mflops_best == 0 ) mflops_best = mflops;
if ( mflops_worst== 0 ) mflops_worst= mflops; if ( mflops_worst== 0 ) mflops_worst= mflops;
if ( mflops>mflops_best ) mflops_best = mflops; if ( mflops>mflops_best ) mflops_best = mflops;
if ( mflops<mflops_worst) mflops_worst= mflops; if ( mflops<mflops_worst) mflops_worst= mflops;
std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Dclov mflop/s = "<< mflops << " ("<<mf_err<<") " << mf_lo<<"-"<<mf_hi <<std::endl; std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Dclov mflop/s = "<< mflops << " ("<<mf_err<<") " << mf_lo<<"-"<<mf_hi <<" "<<timestat.mean<<" us"<<std::endl;
std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Dclov mflop/s per rank "<< mflops/NP<<std::endl; std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Dclov mflop/s per rank "<< mflops/NP<<std::endl;
std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Dclov mflop/s per node "<< mflops/NN<<std::endl; std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Dclov mflop/s per node "<< mflops/NN<<std::endl;
std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Dclov us per call "<< timestat.mean/ni<<std::endl;
} }
@ -872,7 +877,7 @@ int main (int argc, char ** argv)
int do_dslash=1; int do_dslash=1;
int sel=4; int sel=4;
std::vector<int> L_list({8,12,16,24}); std::vector<int> L_list({8,12,16,24,32});
int selm1=sel-1; int selm1=sel-1;
std::vector<double> clover; std::vector<double> clover;

View File

@ -151,7 +151,7 @@ AC_ARG_ENABLE([tracing],
case ${ac_TRACING} in case ${ac_TRACING} in
nvtx) nvtx)
AC_DEFINE([GRID_TRACING_NVTX],[1],[use NVTX]) AC_DEFINE([GRID_TRACING_NVTX],[1],[use NVTX])
LIBS="${LIBS} -lnvToolsExt64_1" LIBS="${LIBS} -lnvToolsExt"
;; ;;
roctx) roctx)
AC_DEFINE([GRID_TRACING_ROCTX],[1],[use ROCTX]) AC_DEFINE([GRID_TRACING_ROCTX],[1],[use ROCTX])

View File

@ -93,10 +93,13 @@ int main(int argc, char ** argv)
Real coeff = (width*width) / Real(4*Iterations); Real coeff = (width*width) / Real(4*Iterations);
chi=kronecker; chi=kronecker;
// chi = (1-p^2/2N)^N kronecker // chi = (1-p^2/2N)^N kronecker
for(int n = 0; n < Iterations; ++n) { for(int n = 0; n < Iterations; ++n) {
Laplacian.M(chi,psi); Laplacian.M(chi,psi);
chi = chi - coeff*psi; chi = chi - coeff*psi;
RealD n2 = norm2(chi);
chi = chi * (1.0/std::sqrt(n2));
} }
std::cout << " Wuppertal smeared operator is chi = \n" << chi <<std::endl; std::cout << " Wuppertal smeared operator is chi = \n" << chi <<std::endl;

View File

@ -1,18 +1,19 @@
#Ahead of time compile for PVC #Ahead of time compile for PVC
export LDFLAGS="-fiopenmp -fsycl -fsycl-device-code-split=per_kernel -fsycl-targets=spir64_gen -Xs -device -Xs pvc -fsycl-device-lib=all -lze_loader -L${MKLROOT}/lib -qmkl=parallel -fsycl -lsycl -lnuma -L/opt/aurora/24.180.3/spack/unified/0.8.0/install/linux-sles15-x86_64/oneapi-2024.07.30.002/numactl-2.0.14-7v6edad/lib" export LDFLAGS="-fiopenmp -fsycl -fsycl-device-code-split=per_kernel -fsycl-targets=spir64_gen -Xs -device -Xs pvc -fsycl-device-lib=all -lze_loader -L${MKLROOT}/lib -qmkl=parallel -fsycl -lsycl -lnuma -L/opt/aurora/24.180.3/spack/unified/0.8.0/install/linux-sles15-x86_64/oneapi-2024.07.30.002/numactl-2.0.14-7v6edad/lib -fPIC -fsycl-max-parallel-link-jobs=16 -fno-sycl-rdc"
export CXXFLAGS="-O3 -fiopenmp -fsycl-unnamed-lambda -fsycl -Wno-tautological-compare -qmkl=parallel -fsycl -fno-exceptions -I/opt/aurora/24.180.3/spack/unified/0.8.0/install/linux-sles15-x86_64/oneapi-2024.07.30.002/numactl-2.0.14-7v6edad/include/" export CXXFLAGS="-O3 -fiopenmp -fsycl-unnamed-lambda -fsycl -Wno-tautological-compare -qmkl=parallel -fsycl -fno-exceptions -I/opt/aurora/24.180.3/spack/unified/0.8.0/install/linux-sles15-x86_64/oneapi-2024.07.30.002/numactl-2.0.14-7v6edad/include/ -fPIC"
#JIT compile #JIT compile
#export LDFLAGS="-fiopenmp -fsycl -fsycl-device-code-split=per_kernel -fsycl-device-lib=all -lze_loader -L${MKLROOT}/lib -qmkl=parallel -fsycl -lsycl " #export LDFLAGS="-fiopenmp -fsycl -fsycl-device-code-split=per_kernel -fsycl-device-lib=all -lze_loader -L${MKLROOT}/lib -qmkl=parallel -fsycl -lsycl "
#export CXXFLAGS="-O3 -fiopenmp -fsycl-unnamed-lambda -fsycl -Wno-tautological-compare -qmkl=parallel -fsycl -fno-exceptions " #export CXXFLAGS="-O3 -fiopenmp -fsycl-unnamed-lambda -fsycl -Wno-tautological-compare -qmkl=parallel -fsycl -fno-exceptions "
../../configure \ ../configure \
--enable-simd=GPU \ --enable-simd=GPU \
--enable-reduction=grid \ --enable-reduction=grid \
--enable-gen-simd-width=64 \ --enable-gen-simd-width=64 \
--enable-comms=mpi-auto \ --enable-comms=mpi-auto \
--enable-debug \ --enable-debug \
--prefix $HOME/gpt-install \
--disable-gparity \ --disable-gparity \
--disable-fermion-reps \ --disable-fermion-reps \
--with-lime=$CLIME \ --with-lime=$CLIME \

View File

@ -0,0 +1,22 @@
CLIME=`spack find --paths c-lime@2-3-9 | grep c-lime| cut -c 15-`
../../configure --enable-comms=mpi-auto \
--with-lime=$CLIME \
--enable-unified=no \
--enable-shm=nvlink \
--enable-tracing=none \
--enable-accelerator=hip \
--enable-gen-simd-width=64 \
--disable-gparity \
--disable-fermion-reps \
--enable-simd=GPU \
--with-gmp=$OLCF_GMP_ROOT \
--with-fftw=$FFTW_DIR/.. \
--with-mpfr=/opt/cray/pe/gcc/mpfr/3.1.4/ \
--disable-fermion-reps \
CXX=hipcc MPICXX=mpicxx \
CXXFLAGS="-fPIC -I${ROCM_PATH}/include/ -I${MPICH_DIR}/include -L/lib64 " \
LDFLAGS="-L/lib64 -L${ROCM_PATH}/lib -L${MPICH_DIR}/lib -lmpi -L${CRAY_MPICH_ROOTDIR}/gtl/lib -lmpi_gtl_hsa -lhipblas -lrocblas"

View File

@ -0,0 +1,16 @@
echo spack
. /autofs/nccs-svm1_home1/paboyle/Crusher/Grid/spack/share/spack/setup-env.sh
#module load cce/15.0.1
module load rocm/6.3.1
module load cray-fftw
module load craype-accel-amd-gfx90a
export LD_LIBRARY_PATH=/opt/gcc/mpfr/3.1.4/lib:$LD_LIBRARY_PATH
#Ugly hacks to get down level software working on current system
#export LD_LIBRARY_PATH=/opt/cray/libfabric/1.20.1/lib64/:$LD_LIBRARY_PATH
#export LD_LIBRARY_PATH=`pwd`/:$LD_LIBRARY_PATH
#ln -s /opt/rocm-6.0.0/lib/libamdhip64.so.6 .

View File

@ -30,14 +30,10 @@ source ${root}/sourceme.sh
export OMP_NUM_THREADS=7 export OMP_NUM_THREADS=7
export MPICH_GPU_SUPPORT_ENABLED=1 export MPICH_GPU_SUPPORT_ENABLED=1
export MPICH_SMP_SINGLE_COPY_MODE=XPMEM #export MPICH_SMP_SINGLE_COPY_MODE=XPMEM
#64.64.32.96
for vol in 32.32.32.64 for vol in 64.64.32.64
do do
srun ./select_gpu ./Benchmark_dwf_fp32 --mpi 2.2.2.2 --accelerator-threads 8 --comms-overlap --shm 2048 --shm-mpi 0 --grid $vol > log.shm0.ov.$vol srun ./select_gpu ./Benchmark_dwf_fp32 --mpi 2.2.2.2 --accelerator-threads 8 --comms-overlap --shm 2048 --shm-mpi 0 --grid $vol -Ls 16
srun ./select_gpu ./Benchmark_dwf_fp32 --mpi 2.2.2.2 --accelerator-threads 8 --comms-overlap --shm 2048 --shm-mpi 1 --grid $vol > log.shm1.ov.$vol
srun ./select_gpu ./Benchmark_dwf_fp32 --mpi 2.2.2.2 --accelerator-threads 8 --comms-sequential --shm 2048 --shm-mpi 0 --grid $vol > log.shm0.seq.$vol
srun ./select_gpu ./Benchmark_dwf_fp32 --mpi 2.2.2.2 --accelerator-threads 8 --comms-sequential --shm 2048 --shm-mpi 1 --grid $vol > log.shm1.seq.$vol
done done

View File

@ -3,20 +3,19 @@ CLIME=`spack find --paths c-lime@2-3-9 | grep c-lime| cut -c 15-`
--with-lime=$CLIME \ --with-lime=$CLIME \
--enable-unified=no \ --enable-unified=no \
--enable-shm=nvlink \ --enable-shm=nvlink \
--enable-tracing=timer \ --enable-tracing=none \
--enable-accelerator=hip \ --enable-accelerator=hip \
--enable-gen-simd-width=64 \ --enable-gen-simd-width=64 \
--disable-gparity \ --disable-gparity \
--disable-fermion-reps \ --disable-fermion-reps \
--enable-simd=GPU \ --enable-simd=GPU \
--enable-accelerator-cshift \
--with-gmp=$OLCF_GMP_ROOT \ --with-gmp=$OLCF_GMP_ROOT \
--with-fftw=$FFTW_DIR/.. \ --with-fftw=$FFTW_DIR/.. \
--with-mpfr=/opt/cray/pe/gcc/mpfr/3.1.4/ \ --with-mpfr=/opt/cray/pe/gcc/mpfr/3.1.4/ \
--disable-fermion-reps \ --disable-fermion-reps \
CXX=hipcc MPICXX=mpicxx \ CXX=hipcc MPICXX=mpicxx \
CXXFLAGS="-fPIC -I{$ROCM_PATH}/include/ -I${MPICH_DIR}/include -L/lib64 " \ CXXFLAGS="-fPIC -I${ROCM_PATH}/include/ -I${MPICH_DIR}/include -L/lib64 " \
LDFLAGS="-L/lib64 -L${MPICH_DIR}/lib -lmpi -L${CRAY_MPICH_ROOTDIR}/gtl/lib -lmpi_gtl_hsa -lamdhip64 -lhipblas -lrocblas" LDFLAGS="-L/lib64 -L${ROCM_PATH}/lib -L${MPICH_DIR}/lib -lmpi -L${CRAY_MPICH_ROOTDIR}/gtl/lib -lmpi_gtl_hsa -lhipblas -lrocblas"

View File

@ -1,12 +1,25 @@
echo spack
. /autofs/nccs-svm1_home1/paboyle/Crusher/Grid/spack/share/spack/setup-env.sh . /autofs/nccs-svm1_home1/paboyle/Crusher/Grid/spack/share/spack/setup-env.sh
spack load c-lime
module load emacs module load cce/15.0.1
module load PrgEnv-gnu module load rocm/5.3.0
module load rocm/6.0.0
module load cray-mpich
module load gmp
module load cray-fftw module load cray-fftw
module load craype-accel-amd-gfx90a module load craype-accel-amd-gfx90a
#Ugly hacks to get down level software working on current system
export LD_LIBRARY_PATH=/opt/cray/libfabric/1.20.1/lib64/:$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=/opt/gcc/mpfr/3.1.4/lib:$LD_LIBRARY_PATH export LD_LIBRARY_PATH=/opt/gcc/mpfr/3.1.4/lib:$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=`pwd`/:$LD_LIBRARY_PATH
ln -s /opt/rocm-6.0.0/lib/libamdhip64.so.6 .
#echo spack load c-lime
#spack load c-lime
#module load emacs
##module load PrgEnv-gnu
##module load cray-mpich
##module load cray-fftw
##module load craype-accel-amd-gfx90a
##export LD_LIBRARY_PATH=/opt/gcc/mpfr/3.1.4/lib:$LD_LIBRARY_PATH
#Hack for lib #Hack for lib
#export LD_LIBRARY_PATH=`pwd`:$LD_LIBRARY_PATH ##export LD_LIBRARY_PATH=`pwd`/:$LD_LIBRARY_PATH

206
systems/WorkArounds.txt Normal file
View File

@ -0,0 +1,206 @@
The purpose of this file is to collate all non-obvious known magic shell variables
and compiler flags required for either correctness or performance on various systems.
A repository of work-arounds.
Contents:
1. Interconnect + MPI
2. Compilation
3. Profiling
************************
* 1. INTERCONNECT + MPI
************************
--------------------------------------------------------------------
MPI2-IO correctness: force OpenMPI to use the MPICH romio implementation for parallel I/O
--------------------------------------------------------------------
export OMPI_MCA_io=romio321
--------------------------------------
ROMIO fail with > 2GB per node read (32 bit issue)
--------------------------------------
Use later MPICH
https://github.com/paboyle/Grid/issues/381
https://github.com/pmodels/mpich/commit/3a479ab0
--------------------------------------------------------------------
Slingshot: Frontier and Perlmutter libfabric slow down
and physical memory fragmentation
--------------------------------------------------------------------
export FI_MR_CACHE_MONITOR=disabled
or
export FI_MR_CACHE_MONITOR=kdreg2
--------------------------------------------------------------------
Perlmutter
--------------------------------------------------------------------
export MPICH_RDMA_ENABLED_CUDA=1
export MPICH_GPU_IPC_ENABLED=1
export MPICH_GPU_EAGER_REGISTER_HOST_MEM=0
export MPICH_GPU_NO_ASYNC_MEMCPY=0
--------------------------------------------------------------------
Frontier/LumiG
--------------------------------------------------------------------
Hiding ROCR_VISIBLE_DEVICES triggers SDMA engines to be used for GPU-GPU
cat << EOF > select_gpu
#!/bin/bash
export MPICH_GPU_SUPPORT_ENABLED=1
export MPICH_SMP_SINGLE_COPY_MODE=XPMEM
export GPU_MAP=(0 1 2 3 7 6 5 4)
export NUMA_MAP=(3 3 1 1 2 2 0 0)
export GPU=\${GPU_MAP[\$SLURM_LOCALID]}
export NUMA=\${NUMA_MAP[\$SLURM_LOCALID]}
export HIP_VISIBLE_DEVICES=\$GPU
unset ROCR_VISIBLE_DEVICES
echo RANK \$SLURM_LOCALID using GPU \$GPU
exec numactl -m \$NUMA -N \$NUMA \$*
EOF
chmod +x ./select_gpu
srun ./select_gpu BINARY
--------------------------------------------------------------------
Mellanox performance with A100 GPU (Tursa, Booster, Leonardo)
--------------------------------------------------------------------
export OMPI_MCA_btl=^uct,openib
export UCX_TLS=gdr_copy,rc,rc_x,sm,cuda_copy,cuda_ipc
export UCX_RNDV_SCHEME=put_zcopy
export UCX_RNDV_THRESH=16384
export UCX_IB_GPU_DIRECT_RDMA=yes
--------------------------------------------------------------------
Mellanox + A100 correctness (Tursa, Booster, Leonardo)
--------------------------------------------------------------------
export UCX_MEMTYPE_CACHE=n
--------------------------------------------------------------------
MPICH/Aurora/PVC correctness and performance
--------------------------------------------------------------------
https://github.com/pmodels/mpich/issues/7302
--enable-cuda-aware-mpi=no
--enable-unified=no
Grid's internal D-H-H-D pipeline mode, avoid device memory in MPI
Do not use SVM
Ideally use MPICH with fix to issue 7302:
https://github.com/pmodels/mpich/pull/7312
Ideally:
MPIR_CVAR_CH4_IPC_GPU_HANDLE_CACHE=generic
Alternatives:
export MPIR_CVAR_NOLOCAL=1
export MPIR_CVAR_CH4_IPC_GPU_P2P_THRESHOLD=1000000000
--------------------------------------------------------------------
MPICH/Aurora/PVC correctness and performance
--------------------------------------------------------------------
Broken:
export MPIR_CVAR_CH4_OFI_ENABLE_GPU_PIPELINE=1
This gives good peformance without requiring
--enable-cuda-aware-mpi=no
But is an open issue reported by James Osborn
https://github.com/pmodels/mpich/issues/7139
Possibly resolved but unclear if in the installed software yet.
************************
* 2. COMPILATION
************************
--------------------------------------------------------------------
G++ compiler breakage / graveyard
--------------------------------------------------------------------
9.3.0, 10.3.1,
https://github.com/paboyle/Grid/issues/290
https://github.com/paboyle/Grid/issues/264
Working (-) Broken (X):
4.9.0 -
4.9.1 -
5.1.0 X
5.2.0 X
5.3.0 X
5.4.0 X
6.1.0 X
6.2.0 X
6.3.0 -
7.1.0 -
8.0.0 (HEAD) -
https://github.com/paboyle/Grid/issues/100
--------------------------------------------------------------------
AMD GPU nodes :
--------------------------------------------------------------------
multiple ROCM versions broken; use 5.3.0
manifests itself as wrong results in fp32
https://github.com/paboyle/Grid/issues/464
--------------------------------------------------------------------
Aurora/PVC
--------------------------------------------------------------------
SYCL ahead of time compilation (fixes rare runtime JIT errors and faster runtime, PB)
SYCL slow link and relocatable code issues (Christoph Lehner)
Opt large register file required for good performance in fp64
export SYCL_PROGRAM_COMPILE_OPTIONS="-ze-opt-large-register-file"
export LDFLAGS="-fiopenmp -fsycl -fsycl-device-code-split=per_kernel -fsycl-targets=spir64_gen -Xs -device -Xs pvc -fsycl-device-lib=all -lze_loader -L${MKLROOT}/lib -qmkl=parallel -fsycl -lsycl -fPIC -fsycl-max-parallel-link-jobs=16 -fno-sycl-rdc"
export CXXFLAGS="-O3 -fiopenmp -fsycl-unnamed-lambda -fsycl -Wno-tautological-compare -qmkl=parallel -fsycl -fno-exceptions -fPIC"
--------------------------------------------------------------------
Aurora/PVC useful extra options
--------------------------------------------------------------------
Host only sanitizer:
-Xarch_host -fsanitize=leak
-Xarch_host -fsanitize=address
Deterministic MPI reduction:
export MPIR_CVAR_ALLREDUCE_DEVICE_COLLECTIVE=0
export MPIR_CVAR_REDUCE_DEVICE_COLLECTIVE=0
export MPIR_CVAR_ALLREDUCE_INTRA_ALGORITHM=recursive_doubling
unset MPIR_CVAR_CH4_COLL_SELECTION_TUNING_JSON_FILE
unset MPIR_CVAR_COLL_SELECTION_TUNING_JSON_FILE
unset MPIR_CVAR_CH4_POSIX_COLL_SELECTION_TUNING_JSON_FILE
************************
* 3. Visual profile tools
************************
--------------------------------------------------------------------
Frontier/rocprof
--------------------------------------------------------------------
--------------------------------------------------------------------
Aurora/unitrace
--------------------------------------------------------------------
--------------------------------------------------------------------
Tursa/nsight-sys
--------------------------------------------------------------------

View File

@ -1,2 +1,16 @@
CXXFLAGS=-I/opt/local/include LDFLAGS=-L/opt/local/lib/ CXX=c++-13 MPICXX=mpicxx ../../configure --enable-simd=GEN --enable-comms=mpi-auto --enable-unified=yes --prefix $HOME/QCD/GridInstall --with-lime=/Users/peterboyle/QCD/SciDAC/install/ --with-openssl=$BREW --disable-fermion-reps --disable-gparity --disable-debug
CXX=mpicxx ../../configure \
--enable-simd=GEN \
--enable-comms=mpi-auto \
--enable-Sp=yes \
--enable-unified=yes \
--prefix /Users/peterboyle/QCD/vtk/Grid/install \
--with-lime=$CLIME \
--with-hdf5=$HDF5 \
--with-fftw=$FFTW \
--with-openssl=$OPENSSL \
--with-gmp=$GMP \
--with-mpfr=$MPFR \
--disable-debug

View File

@ -0,0 +1,32 @@
#!/bin/bash
#SBATCH --partition lqcd
#SBATCH --time=00:50:00
#SBATCH -A lqcdtest
#SBATCH -q lqcd
#SBATCH --exclusive
#SBATCH --nodes=1
#SBATCH -w genoahost001,genoahost003,genoahost050,genoahost054
#SBATCH --ntasks=1
#SBATCH --cpus-per-task=64
#SBATCH --qos lqcd
source sourceme.sh
export PLACES=(1:16:4 1:32:2 0:64:1);
export THR=(16 32 64)
for t in 2
do
export OMP_NUM_THREADS=${THR[$t]}
export OMP_PLACES=${PLACES[$t]}
export thr=${THR[$t]}
#for vol in 24.24.24.24 32.32.32.32 48.48.48.96
for vol in 48.48.48.96
do
srun -N1 -n1 ./benchmarks/Benchmark_dwf_fp32 --mpi 1.1.1.1 --grid $vol --dslash-asm --shm 8192 > $vol.1node.thr$thr
done
#srun -N1 -n1 ./benchmarks/Benchmark_usqcd --mpi 1.1.1.1 --grid $vol > usqcd.1node.thr$thr
done

View File

@ -0,0 +1,36 @@
#!/bin/bash
#SBATCH --partition lqcd
#SBATCH --time=00:50:00
#SBATCH -A lqcdtest
#SBATCH -q lqcd
#SBATCH --exclusive
#SBATCH --nodes=2
#SBATCH -w genoahost001,genoahost003,genoahost050,genoahost054
#SBATCH --ntasks=2
#SBATCH --cpus-per-task=64
#SBATCH --qos lqcd
source sourceme.sh
export PLACES=(1:16:4 1:32:2 0:64:1);
export THR=(16 32 64)
nodes=2
mpi=1.1.1.2
for t in 2
do
export OMP_NUM_THREADS=${THR[$t]}
export OMP_PLACES=${PLACES[$t]}
export thr=${THR[$t]}
#srun -N$nodes -n$nodes ./benchmarks/Benchmark_usqcd --mpi $mpi --grid 32.32.32.32 > usqcd.n$nodes.thr$thr
for vol in 64.64.64.128
do
srun -N$nodes -n$nodes ./benchmarks/Benchmark_dwf_fp32 --mpi $mpi --grid $vol --dslash-asm --comms-overlap --shm 8192 > $vol.n$nodes.overlap.thr$thr
done
done

View File

@ -0,0 +1,16 @@
../../configure \
--enable-comms=mpi-auto \
--enable-unified=yes \
--enable-shm=shmopen \
--enable-shm-fast-path=shmopen \
--enable-accelerator=none \
--enable-simd=AVX512 \
--disable-accelerator-cshift \
--disable-fermion-reps \
--disable-gparity \
CXX=clang++ \
MPICXX=mpicxx \
CXXFLAGS="-std=c++17"

View File

@ -0,0 +1,4 @@
source $HOME/spack/share/spack/setup-env.sh
spack load llvm@17.0.4
export LD_LIBRARY_PATH=/direct/sdcc+u/paboyle/spack/opt/spack/linux-almalinux8-icelake/gcc-8.5.0/llvm-17.0.4-laufdrcip63ivkadmtgoepwmj3dtztdu/lib:$LD_LIBRARY_PATH
module load openmpi

View File

@ -0,0 +1,17 @@
../../src/Grid/configure \
--prefix /home/pab/NPR/install \
--enable-comms=mpi-auto \
--enable-simd=AVX2 \
--enable-shm=none \
--enable-debug \
--with-lime=$CLIME \
--with-hdf5=$HDF5 \
--with-fftw=$FFTW \
--with-gmp=$GMP \
--with-mpfr=$MPFR \
--disable-gparity \
--disable-fermion-reps \
CXX=clang++ \
MPICXX=mpicxx \
CXXFLAGS="-std=c++17 "

View File

@ -0,0 +1,28 @@
source $HOME/spack/share/spack/setup-env.sh
spack load llvm@12
spack load autoconf%clang@12.0.1
spack load automake%clang@12.0.1
spack load c-lime%clang@12.0.1
spack load fftw%clang@12.0.1
spack load gmp%clang@12.0.1
spack load mpfr%clang@12.0.1
spack load openmpi%clang@12.0.1
spack load openssl%clang@12.0.1
spack load hdf5+cxx%clang@12.0.1
spack load cmake%clang@12.0.1
export FFTW=`spack find --paths fftw%clang@12.0.1 | grep ^fftw | awk '{print $2}' `
export HDF5=`spack find --paths hdf5+cxx%clang@12.0.1 | grep ^hdf5 | awk '{print $2}' `
export CLIME=`spack find --paths c-lime%clang@12.0.1 | grep ^c-lime | awk '{print $2}' `
export MPFR=`spack find --paths mpfr%clang@12.0.1 | grep ^mpfr | awk '{print $2}' `
export LLVM=`spack find --paths llvm@12 | grep ^llvm | awk '{print $2}' `
export OPENSSL=`spack find --paths openssl%clang@12.0.1 | grep openssl | awk '{print $2}' `
export GMP=`spack find --paths gmp%clang@12.0.1 | grep ^gmp | awk '{print $2}' `
export TCLAP=`spack find --paths tclap%clang@12.0.1 | grep ^tclap | awk '{print $2}' `
export LD_LIBRARY_PATH=${TCLAP}/lib:$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=$MPFR/lib:$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=$GMP/lib:$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=$FFTW/lib:$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=$LLVM/lib:$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=$LLVM/lib/x86_64-unknown-linux-gnu/:$LD_LIBRARY_PATH
ulimit -s 81920

View File

@ -0,0 +1,19 @@
cd
git clone https://github.com/spack/spack.git
source $HOME/spack/share/spack/setup-env.sh
spack install llvm@12
spack install autoconf%clang@12.0.1
spack install automake%clang@12.0.1
spack install c-lime%clang@12.0.1
spack install fftw%clang@12.0.1
spack install gmp%clang@12.0.1
spack install mpfr%clang@12.0.1
spack install openmpi%clang@12.0.1
spack install openssl%clang@12.0.1
spack install hdf5+cxx%clang@12.0.1
spack install cmake%clang@12.0.1
spack install tclap%clang@12.0.1
spack install emacs%clang@12.0.1

View File

@ -62,7 +62,7 @@ int VerifyOnDevice(const FermionField &res, FermionField &ref)
if (((random()&0xF)==0)&&injection) { if (((random()&0xF)==0)&&injection) {
uint64_t sF = random()%(NN); uint64_t sF = random()%(NN);
int lane=0; int lane=0;
printf("Error injection site %ld on rank %d\n",sF,res.Grid()->ThisRank()); printf("Error injection site %ld on rank %d\n",(long)sF,res.Grid()->ThisRank());
auto vv = acceleratorGet(res_v[sF]); auto vv = acceleratorGet(res_v[sF]);
double *dd = (double *)&vv; double *dd = (double *)&vv;
*dd=M_PI; *dd=M_PI;

View File

@ -195,8 +195,8 @@ int main (int argc, char ** argv)
int Nk=nrhs; int Nk=nrhs;
int Nm=Nk*3; int Nm=Nk*3;
int Nk=36; // int Nk=36;
int Nm=144; // int Nm=144;
int Nstop=Nk; int Nstop=Nk;
int Nconv_test_interval=1; int Nconv_test_interval=1;

View File

@ -47,20 +47,20 @@ public:
void OpDir (const Field &in, Field &out,int dir,int disp) { assert(0); } void OpDir (const Field &in, Field &out,int dir,int disp) { assert(0); }
void OpDirAll (const Field &in, std::vector<Field> &out){ assert(0); }; void OpDirAll (const Field &in, std::vector<Field> &out){ assert(0); };
void Op (const Field &in, Field &out){ void Op (const Field &in, Field &out){
std::cout << "Op: PVdag M "<<std::endl; // std::cout << "Op: PVdag M "<<std::endl;
Field tmp(in.Grid()); Field tmp(in.Grid());
_Mat.M(in,tmp); _Mat.M(in,tmp);
_PV.Mdag(tmp,out); _PV.Mdag(tmp,out);
} }
void AdjOp (const Field &in, Field &out){ void AdjOp (const Field &in, Field &out){
std::cout << "AdjOp: Mdag PV "<<std::endl; // std::cout << "AdjOp: Mdag PV "<<std::endl;
Field tmp(in.Grid()); Field tmp(in.Grid());
_PV.M(in,tmp); _PV.M(in,tmp);
_Mat.Mdag(tmp,out); _Mat.Mdag(tmp,out);
} }
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ assert(0); } void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ assert(0); }
void HermOp(const Field &in, Field &out){ void HermOp(const Field &in, Field &out){
std::cout << "HermOp: Mdag PV PVdag M"<<std::endl; // std::cout << "HermOp: Mdag PV PVdag M"<<std::endl;
Field tmp(in.Grid()); Field tmp(in.Grid());
// _Mat.M(in,tmp); // _Mat.M(in,tmp);
// _PV.Mdag(tmp,out); // _PV.Mdag(tmp,out);
@ -83,14 +83,14 @@ public:
void OpDir (const Field &in, Field &out,int dir,int disp) { assert(0); } void OpDir (const Field &in, Field &out,int dir,int disp) { assert(0); }
void OpDirAll (const Field &in, std::vector<Field> &out){ assert(0); }; void OpDirAll (const Field &in, std::vector<Field> &out){ assert(0); };
void Op (const Field &in, Field &out){ void Op (const Field &in, Field &out){
std::cout << "Op: PVdag M "<<std::endl; // std::cout << "Op: PVdag M "<<std::endl;
Field tmp(in.Grid()); Field tmp(in.Grid());
_Mat.M(in,tmp); _Mat.M(in,tmp);
_PV.Mdag(tmp,out); _PV.Mdag(tmp,out);
out = out + shift * in; out = out + shift * in;
} }
void AdjOp (const Field &in, Field &out){ void AdjOp (const Field &in, Field &out){
std::cout << "AdjOp: Mdag PV "<<std::endl; // std::cout << "AdjOp: Mdag PV "<<std::endl;
Field tmp(in.Grid()); Field tmp(in.Grid());
_PV.M(tmp,out); _PV.M(tmp,out);
_Mat.Mdag(in,tmp); _Mat.Mdag(in,tmp);
@ -98,7 +98,7 @@ public:
} }
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ assert(0); } void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ assert(0); }
void HermOp(const Field &in, Field &out){ void HermOp(const Field &in, Field &out){
std::cout << "HermOp: Mdag PV PVdag M"<<std::endl; // std::cout << "HermOp: Mdag PV PVdag M"<<std::endl;
Field tmp(in.Grid()); Field tmp(in.Grid());
Op(in,tmp); Op(in,tmp);
AdjOp(tmp,out); AdjOp(tmp,out);

View File

@ -54,6 +54,7 @@ const RealD M5 = 1.8;
int main(int argc, char** argv) int main(int argc, char** argv)
{ {
#ifdef ENABLE_GPARITY
Grid_init(&argc, &argv); Grid_init(&argc, &argv);
int threads = GridThread::GetThreads(); int threads = GridThread::GetThreads();
@ -106,6 +107,6 @@ int main(int argc, char** argv)
Meofa.refresh(Umu,sRNG, RNG5); Meofa.refresh(Umu,sRNG, RNG5);
printf("<Phi|Meofa|Phi> = %1.15e\n", Meofa.S(Umu)); printf("<Phi|Meofa|Phi> = %1.15e\n", Meofa.S(Umu));
} }
#endif
return 0; return 0;
} }

View File

@ -56,6 +56,7 @@ const RealD M5 = 1.8;
int main(int argc, char** argv) int main(int argc, char** argv)
{ {
#ifdef ENABLE_GPARITY
Grid_init(&argc, &argv); Grid_init(&argc, &argv);
int threads = GridThread::GetThreads(); int threads = GridThread::GetThreads();
@ -106,6 +107,6 @@ int main(int argc, char** argv)
Meofa.refresh(Umu, sRNG, RNG5); Meofa.refresh(Umu, sRNG, RNG5);
printf("<Phi|Meofa|Phi> = %1.15e\n", Meofa.S(Umu)); printf("<Phi|Meofa|Phi> = %1.15e\n", Meofa.S(Umu));
} }
#endif
return 0; return 0;
} }

View File

@ -33,6 +33,7 @@ using namespace std;
using namespace Grid; using namespace Grid;
// This is to optimize the SIMD // This is to optimize the SIMD
/*
template<class vobj> void gpermute(vobj & inout,int perm){ template<class vobj> void gpermute(vobj & inout,int perm){
vobj tmp=inout; vobj tmp=inout;
if (perm & 0x1 ) { permute(inout,tmp,0); tmp=inout;} if (perm & 0x1 ) { permute(inout,tmp,0); tmp=inout;}
@ -40,7 +41,7 @@ template<class vobj> void gpermute(vobj & inout,int perm){
if (perm & 0x4 ) { permute(inout,tmp,2); tmp=inout;} if (perm & 0x4 ) { permute(inout,tmp,2); tmp=inout;}
if (perm & 0x8 ) { permute(inout,tmp,3); tmp=inout;} if (perm & 0x8 ) { permute(inout,tmp,3); tmp=inout;}
} }
*/
int main (int argc, char ** argv) int main (int argc, char ** argv)
{ {

View File

@ -153,7 +153,7 @@ public:
t=usecond(); t=usecond();
{ {
autoView( gStaple_v , gStaple, AcceleratorWrite); autoView( gStaple_v , gStaple, AcceleratorWrite);
auto gStencil_v = gStencil.View(); auto gStencil_v = gStencil.View(AcceleratorRead);
autoView( Ug_mu_v , Ug_mu, AcceleratorRead); autoView( Ug_mu_v , Ug_mu, AcceleratorRead);
autoView( Ug_nu_v , Ug_nu, AcceleratorRead); autoView( Ug_nu_v , Ug_nu, AcceleratorRead);
@ -389,7 +389,7 @@ public:
GeneralLocalStencil gStencil(ggrid,shifts); GeneralLocalStencil gStencil(ggrid,shifts);
{ {
autoView( gStaple_v , gStaple, AcceleratorWrite); autoView( gStaple_v , gStaple, AcceleratorWrite);
auto gStencil_v = gStencil.View(); auto gStencil_v = gStencil.View(AcceleratorRead);
typedef LatticeView<typename GaugeMat::vector_object> GaugeViewType; typedef LatticeView<typename GaugeMat::vector_object> GaugeViewType;
size_t vsize = Nd*sizeof(GaugeViewType); size_t vsize = Nd*sizeof(GaugeViewType);

View File

@ -83,6 +83,7 @@ std::vector<RealD> jack_stats(const std::vector<RealD>& data)
int main(int argc, char **argv) int main(int argc, char **argv)
{ {
#ifdef ENABLE_GPARITY
Grid_init(&argc, &argv); Grid_init(&argc, &argv);
// Initialize spacetime grid // Initialize spacetime grid
@ -206,4 +207,5 @@ int main(int argc, char **argv)
std::cout << std::endl << "EOFA: rw = " << eofa_result[0] << " +/- " << eofa_result[1] << std::endl; std::cout << std::endl << "EOFA: rw = " << eofa_result[0] << " +/- " << eofa_result[1] << std::endl;
Grid_finalize(); Grid_finalize();
#endif
} }

View File

@ -85,6 +85,7 @@ std::vector<RealD> jack_stats(const std::vector<RealD>& data)
int main(int argc, char **argv) int main(int argc, char **argv)
{ {
#ifdef ENABLE_GPARITY
Grid_init(&argc, &argv); Grid_init(&argc, &argv);
// Initialize spacetime grid // Initialize spacetime grid
@ -215,4 +216,5 @@ int main(int argc, char **argv)
std::cout << std::endl << "EOFA: rw = " << eofa_result[0] << " +/- " << eofa_result[1] << std::endl; std::cout << std::endl << "EOFA: rw = " << eofa_result[0] << " +/- " << eofa_result[1] << std::endl;
Grid_finalize(); Grid_finalize();
#endif
} }

View File

@ -35,6 +35,7 @@ using namespace Grid;
int main (int argc, char ** argv) int main (int argc, char ** argv)
{ {
#ifdef ENABLE_GPARITY
Grid_init(&argc,&argv); Grid_init(&argc,&argv);
Coordinate latt_size = GridDefaultLatt(); Coordinate latt_size = GridDefaultLatt();
@ -244,4 +245,5 @@ int main (int argc, char ** argv)
std::cout<< GridLogMessage << "Done" <<std::endl; std::cout<< GridLogMessage << "Done" <<std::endl;
Grid_finalize(); Grid_finalize();
#endif
} }

View File

@ -38,6 +38,7 @@ typedef typename FermionAction::FermionField FermionField;
int main (int argc, char** argv) int main (int argc, char** argv)
{ {
#ifdef ENABLE_GPARITY
Grid_init(&argc, &argv); Grid_init(&argc, &argv);
Coordinate latt_size = GridDefaultLatt(); Coordinate latt_size = GridDefaultLatt();
@ -173,4 +174,5 @@ int main (int argc, char** argv)
std::cout << GridLogMessage << "Done" << std::endl; std::cout << GridLogMessage << "Done" << std::endl;
Grid_finalize(); Grid_finalize();
#endif
} }

View File

@ -35,6 +35,7 @@ using namespace Grid;
int main (int argc, char ** argv) int main (int argc, char ** argv)
{ {
#ifdef ENABLE_GPARITY
Grid_init(&argc,&argv); Grid_init(&argc,&argv);
Coordinate latt_size = GridDefaultLatt(); Coordinate latt_size = GridDefaultLatt();
@ -204,4 +205,5 @@ int main (int argc, char ** argv)
assert( fabs(real(Sprime-S-dSpred)) < 1.0 ) ; assert( fabs(real(Sprime-S-dSpred)) < 1.0 ) ;
std::cout<< GridLogMessage << "Done" <<std::endl; std::cout<< GridLogMessage << "Done" <<std::endl;
Grid_finalize(); Grid_finalize();
#endif
} }

View File

@ -32,6 +32,7 @@ using namespace std;
using namespace Grid; using namespace Grid;
//Here we test the G-parity action and force between the 1f (doubled-lattice) and 2f approaches //Here we test the G-parity action and force between the 1f (doubled-lattice) and 2f approaches
#ifdef ENABLE_GPARITY
void copyConjGauge(LatticeGaugeFieldD &Umu_1f, const LatticeGaugeFieldD &Umu_2f, const int nu){ void copyConjGauge(LatticeGaugeFieldD &Umu_1f, const LatticeGaugeFieldD &Umu_2f, const int nu){
@ -444,3 +445,7 @@ int main (int argc, char ** argv)
assert(0); assert(0);
} }
} }
#else
int main (int argc, char ** argv){};
#endif

View File

@ -32,6 +32,7 @@ using namespace Grid;
int main (int argc, char ** argv) int main (int argc, char ** argv)
{ {
#ifdef ENABLE_GPARITY
Grid_init(&argc,&argv); Grid_init(&argc,&argv);
Coordinate latt_size = GridDefaultLatt(); Coordinate latt_size = GridDefaultLatt();
@ -155,4 +156,5 @@ int main (int argc, char ** argv)
std::cout<< GridLogMessage << "Done" <<std::endl; std::cout<< GridLogMessage << "Done" <<std::endl;
Grid_finalize(); Grid_finalize();
#endif
} }

View File

@ -30,9 +30,10 @@ See the full license in the file "LICENSE" in the top level distribution directo
#include <Grid/Grid.h> #include <Grid/Grid.h>
#ifdef ENABLE_GPARITY
using namespace std; using namespace std;
using namespace Grid; using namespace Grid;
;
typedef GparityWilsonImplD FermionImplPolicyD; typedef GparityWilsonImplD FermionImplPolicyD;
typedef GparityMobiusEOFAFermionD FermionActionD; typedef GparityMobiusEOFAFermionD FermionActionD;
@ -231,3 +232,7 @@ int main (int argc, char** argv)
std::cout << GridLogMessage << "Done" << std::endl; std::cout << GridLogMessage << "Done" << std::endl;
Grid_finalize(); Grid_finalize();
} }
#else
int main(int argc,char ** argv) { return 0;};
#endif

View File

@ -31,14 +31,14 @@ See the full license in the file "LICENSE" in the top level distribution directo
using namespace std; using namespace std;
using namespace Grid; using namespace Grid;
;
typedef GparityWilsonImplR FermionImplPolicy; typedef GparityWilsonImplR FermionImplPolicy;
typedef GparityMobiusEOFAFermionD FermionAction; typedef GparityMobiusEOFAFermionD FermionAction;
typedef typename FermionAction::FermionField FermionField; typedef typename FermionAction::FermionField FermionField;
int main (int argc, char** argv) int main (int argc, char** argv)
{ {
#ifdef ENABLE_GPARITY
Grid_init(&argc, &argv); Grid_init(&argc, &argv);
Coordinate latt_size = GridDefaultLatt(); Coordinate latt_size = GridDefaultLatt();
@ -171,4 +171,5 @@ int main (int argc, char** argv)
std::cout << GridLogMessage << "Done" << std::endl; std::cout << GridLogMessage << "Done" << std::endl;
Grid_finalize(); Grid_finalize();
#endif
} }

View File

@ -30,7 +30,7 @@
using namespace Grid; using namespace Grid;
#ifdef ENABLE_GPARITY
template<typename FermionField2f, typename FermionField1f> template<typename FermionField2f, typename FermionField1f>
void copy2fTo1fFermionField(FermionField1f &out, const FermionField2f &in, int gpdir){ void copy2fTo1fFermionField(FermionField1f &out, const FermionField2f &in, int gpdir){
@ -255,3 +255,6 @@ int main(int argc, char **argv) {
} // main } // main
#else
int main(int argc, char **argv){};
#endif

View File

@ -30,6 +30,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
int main(int argc, char **argv) { int main(int argc, char **argv) {
#ifdef ENABLE_GPARITY
using namespace Grid; using namespace Grid;
; ;
@ -139,7 +140,7 @@ int main(int argc, char **argv) {
Grid_finalize(); Grid_finalize();
#endif
} // main } // main

View File

@ -55,13 +55,13 @@ namespace Grid{
}; };
struct SmearingParameters: Serializable { struct HmcSmearingParameters: Serializable {
GRID_SERIALIZABLE_CLASS_MEMBERS(SmearingParameters, GRID_SERIALIZABLE_CLASS_MEMBERS(HmcSmearingParameters,
double, rho, double, rho,
Integer, Nsmear) Integer, Nsmear)
template <class ReaderClass > template <class ReaderClass >
SmearingParameters(Reader<ReaderClass>& Reader){ HmcSmearingParameters(Reader<ReaderClass>& Reader){
read(Reader, "StoutSmearing", *this); read(Reader, "StoutSmearing", *this);
} }
@ -213,7 +213,7 @@ int main(int argc, char **argv) {
// Reset performance counters // Reset performance counters
if (ApplySmearing){ if (ApplySmearing){
SmearingParameters SmPar(Reader); HmcSmearingParameters SmPar(Reader);
//double rho = 0.1; // smearing parameter //double rho = 0.1; // smearing parameter
//int Nsmear = 3; // number of smearing levels //int Nsmear = 3; // number of smearing levels
Smear_Stout<HMCWrapper::ImplPolicy> Stout(SmPar.rho); Smear_Stout<HMCWrapper::ImplPolicy> Stout(SmPar.rho);

View File

@ -0,0 +1,14 @@
<?xml version="1.0"?>
<grid>
<LanczosParameters>
<mass>0.00107</mass>
<M5>1.8</M5>
<Ls>48</Ls>
<Nstop>10</Nstop>
<Nk>15</Nk>
<Np>85</Np>
<ChebyLow>0.003</ChebyLow>
<ChebyHigh>60</ChebyHigh>
<ChebyOrder>201</ChebyOrder>
</LanczosParameters>
</grid>

View File

@ -35,6 +35,8 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
#include <Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h> #include <Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h>
#include <Grid/algorithms/iterative/LocalCoherenceLanczos.h> #include <Grid/algorithms/iterative/LocalCoherenceLanczos.h>
#ifdef ENABLE_GPARITY
using namespace std; using namespace std;
using namespace Grid; using namespace Grid;
@ -378,7 +380,8 @@ void runTest(const Options &opt){
//Note: because we rely upon physical properties we must use a "real" gauge configuration //Note: because we rely upon physical properties we must use a "real" gauge configuration
int main (int argc, char ** argv) { int main (int argc, char ** argv)
{
Grid_init(&argc,&argv); Grid_init(&argc,&argv);
GridLogIRL.TimingMode(1); GridLogIRL.TimingMode(1);
@ -482,4 +485,8 @@ int main (int argc, char ** argv) {
Grid_finalize(); Grid_finalize();
} }
#else
int main(int argc, char **argv){};
#endif

View File

@ -0,0 +1,346 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/Test_dwf_G5R5.cc
Copyright (C) 2015
Author: Chulwoo Jung <chulwoo@bnl.gov>
From Duo and Bob's Chirality study
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
using namespace std;
using namespace Grid;
;
//typedef WilsonFermionD FermionOp;
typedef DomainWallFermionD FermionOp;
typedef typename DomainWallFermionD::FermionField FermionField;
RealD AllZero(RealD x) { return 0.; }
namespace Grid {
struct LanczosParameters: Serializable {
GRID_SERIALIZABLE_CLASS_MEMBERS(LanczosParameters,
RealD, mass ,
RealD, M5 ,
Integer, Ls,
Integer, Nstop,
Integer, Nk,
Integer, Np,
RealD, ChebyLow,
RealD, ChebyHigh,
Integer, ChebyOrder)
// Integer, StartTrajectory,
// Integer, Trajectories, /* @brief Number of sweeps in this run */
// bool, MetropolisTest,
// Integer, NoMetropolisUntil,
// std::string, StartingType,
// Integer, SW,
// RealD, Kappa,
// IntegratorParameters, MD)
LanczosParameters() {
////////////////////////////// Default values
mass = 0;
// MetropolisTest = true;
// NoMetropolisUntil = 10;
// StartTrajectory = 0;
// SW = 2;
// Trajectories = 10;
// StartingType = "HotStart";
/////////////////////////////////
}
template <class ReaderClass >
LanczosParameters(Reader<ReaderClass> & TheReader){
initialize(TheReader);
}
template < class ReaderClass >
void initialize(Reader<ReaderClass> &TheReader){
// std::cout << GridLogMessage << "Reading HMC\n";
read(TheReader, "HMC", *this);
}
void print_parameters() const {
// std::cout << GridLogMessage << "[HMC parameters] Trajectories : " << Trajectories << "\n";
// std::cout << GridLogMessage << "[HMC parameters] Start trajectory : " << StartTrajectory << "\n";
// std::cout << GridLogMessage << "[HMC parameters] Metropolis test (on/off): " << std::boolalpha << MetropolisTest << "\n";
// std::cout << GridLogMessage << "[HMC parameters] Thermalization trajs : " << NoMetropolisUntil << "\n";
// std::cout << GridLogMessage << "[HMC parameters] Starting type : " << StartingType << "\n";
// MD.print_parameters();
}
};
}
int main(int argc, char** argv) {
Grid_init(&argc, &argv);
LanczosParameters LanParams;
#if 1
{
XmlReader HMCrd("LanParams.xml");
read(HMCrd,"LanczosParameters",LanParams);
}
#else
{
LanParams.mass = mass;
}
#endif
std::cout << GridLogMessage<< LanParams <<std::endl;
{
XmlWriter HMCwr("LanParams.xml.out");
write(HMCwr,"LanczosParameters",LanParams);
}
int Ls=16;
RealD M5=1.8;
RealD mass = -1.0;
mass=LanParams.mass;
Ls=LanParams.Ls;
M5=LanParams.M5;
GridCartesian* UGrid = SpaceTimeGrid::makeFourDimGrid(
GridDefaultLatt(), GridDefaultSimd(Nd, vComplex::Nsimd()),
GridDefaultMpi());
GridRedBlackCartesian* UrbGrid =
SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
// GridCartesian* FGrid = UGrid;
// GridRedBlackCartesian* FrbGrid = UrbGrid;
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls, UGrid);
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls, UGrid);
// printf("UGrid=%p UrbGrid=%p FGrid=%p FrbGrid=%p\n", UGrid, UrbGrid, FGrid, FrbGrid);
std::vector<int> seeds4({1, 2, 3, 4});
std::vector<int> seeds5({5, 6, 7, 8});
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
GridParallelRNG RNG5rb(FrbGrid); RNG5.SeedFixedIntegers(seeds5);
LatticeGaugeField Umu(UGrid);
FieldMetaData header;
std::string file("./config");
int precision32 = 0;
int tworow = 0;
NerscIO::readConfiguration(Umu,header,file);
/*
std::vector<LatticeColourMatrix> U(4, UGrid);
for (int mu = 0; mu < Nd; mu++) {
U[mu] = PeekIndex<LorentzIndex>(Umu, mu);
}
*/
int Nstop = 10;
int Nk = 20;
int Np = 80;
Nstop=LanParams.Nstop;
Nk=LanParams.Nk;
Np=LanParams.Np;
int Nm = Nk + Np;
int MaxIt = 10000;
RealD resid = 1.0e-5;
//while ( mass > - 5.0){
FermionOp Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
MdagMLinearOperator<FermionOp,FermionField> HermOp(Ddwf); /// <-----
// Gamma5HermitianLinearOperator <FermionOp,LatticeFermion> HermOp2(WilsonOperator); /// <-----
Gamma5R5HermitianLinearOperator<FermionOp, LatticeFermion> G5R5Herm(Ddwf);
// Gamma5R5HermitianLinearOperator
std::vector<double> Coeffs{0, 1.};
Polynomial<FermionField> PolyX(Coeffs);
Chebyshev<FermionField> Cheby(LanParams.ChebyLow,LanParams.ChebyHigh,LanParams.ChebyOrder);
FunctionHermOp<FermionField> OpCheby(Cheby,HermOp);
PlainHermOp<FermionField> Op (HermOp);
PlainHermOp<FermionField> Op2 (G5R5Herm);
ImplicitlyRestartedLanczos<FermionField> IRL(OpCheby, Op, Nstop, Nk, Nm, resid, MaxIt);
std::vector<RealD> eval(Nm);
FermionField src(FGrid);
gaussian(RNG5, src);
std::vector<FermionField> evec(Nm, FGrid);
for (int i = 0; i < 1; i++) {
std::cout << i << " / " << Nm << " grid pointer " << evec[i].Grid()
<< std::endl;
};
int Nconv;
IRL.calc(eval, evec, src, Nconv);
std::cout << mass <<" : " << eval << std::endl;
#if 0
Gamma g5(Gamma::Algebra::Gamma5) ;
ComplexD dot;
FermionField tmp(FGrid);
// RealD eMe,eMMe;
for (int i = 0; i < Nstop ; i++) {
// tmp = g5*evec[i];
dot = innerProduct(evec[i],evec[i]);
// G5R5(tmp,evec[i]);
G5R5Herm.HermOpAndNorm(evec[i],tmp,eMe,eMMe);
std::cout <<"Norm "<<M5<<" "<< mass << " : " << i << " " << real(dot) << " " << imag(dot) << " "<< eMe << " " <<eMMe<< std::endl ;
for (int j = 0; j < Nstop ; j++) {
dot = innerProduct(tmp,evec[j]);
std::cout <<"G5R5 "<<M5<<" "<< mass << " : " << i << " " <<j<<" " << real(dot) << " " << imag(dot) << std::endl ;
}
}
// src = evec[0]+evec[1]+evec[2];
// mass += -0.1;
#endif
//**********************************************************************
//orthogonalization
//calculat the matrix
cout << "Start orthogonalization " << endl;
cout << "calculate the matrix element" << endl;
vector<LatticeFermion> G5R5Mevec(Nconv, FGrid);
vector<LatticeFermion> finalevec(Nconv, FGrid);
vector<RealD> eMe(Nconv), eMMe(Nconv);
for(int i = 0; i < Nconv; i++){
G5R5Herm.HermOpAndNorm(evec[i], G5R5Mevec[i], eMe[i], eMMe[i]);
}
cout << "Re<evec, G5R5M(evec)>: " << endl;
cout << eMe << endl;
cout << "<G5R5M(evec), G5R5M(evec)>" << endl;
cout << eMMe << endl;
vector<vector<ComplexD>> VevecG5R5Mevec(Nconv);
Eigen::MatrixXcd evecG5R5Mevec = Eigen::MatrixXcd::Zero(Nconv, Nconv);
for(int i = 0; i < Nconv; i++){
VevecG5R5Mevec[i].resize(Nconv);
for(int j = 0; j < Nconv; j++){
VevecG5R5Mevec[i][j] = innerProduct(evec[i], G5R5Mevec[j]);
evecG5R5Mevec(i, j) = VevecG5R5Mevec[i][j];
}
}
//calculate eigenvector
cout << "Eigen solver" << endl;
Eigen::SelfAdjointEigenSolver<Eigen::MatrixXcd> eigensolver(evecG5R5Mevec);
vector<RealD> eigeneval(Nconv);
vector<vector<ComplexD>> eigenevec(Nconv);
for(int i = 0; i < Nconv; i++){
eigeneval[i] = eigensolver.eigenvalues()[i];
eigenevec[i].resize(Nconv);
for(int j = 0; j < Nconv; j++){
eigenevec[i][j] = eigensolver.eigenvectors()(i, j);
}
}
//rotation
cout << "Do rotation" << endl;
for(int i = 0; i < Nconv; i++){
finalevec[i] = finalevec[i] - finalevec[i];
for(int j = 0; j < Nconv; j++){
finalevec[i] = eigenevec[j][i]*evec[j] + finalevec[i];
}
}
//normalize again;
for(int i = 0; i < Nconv; i++){
RealD tmp_RealD = norm2(finalevec[i]);
tmp_RealD = 1./pow(tmp_RealD, 0.5);
finalevec[i] = finalevec[i]*tmp_RealD;
}
//check
for(int i = 0; i < Nconv; i++){
G5R5Herm.HermOpAndNorm(finalevec[i], G5R5Mevec[i], eMe[i], eMMe[i]);
}
//**********************************************************************
//sort the eigenvectors
vector<LatticeFermion> finalevec_copy(Nconv, FGrid);
for(int i = 0; i < Nconv; i++){
finalevec_copy[i] = finalevec[i];
}
vector<RealD> eMe_copy(eMe);
for(int i = 0; i < Nconv; i++){
eMe[i] = fabs(eMe[i]);
eMe_copy[i] = eMe[i];
}
sort(eMe_copy.begin(), eMe_copy.end());
for(int i = 0; i < Nconv; i++){
for(int j = 0; j < Nconv; j++){
if(eMe[j] == eMe_copy[i]){
finalevec[i] = finalevec_copy[j];
}
}
}
for(int i = 0; i < Nconv; i++){
G5R5Herm.HermOpAndNorm(finalevec[i], G5R5Mevec[i], eMe[i], eMMe[i]);
}
cout << "Re<evec, G5R5M(evec)>: " << endl;
cout << eMe << endl;
cout << "<G5R5M(evec), G5R5M(evec)>" << endl;
cout << eMMe << endl;
// vector<LatticeFermion> finalevec(Nconv, FGrid);
// temporary, until doing rotation
// for(int i = 0; i < Nconv; i++)
// finalevec[i]=evec[i];
//**********************************************************************
//calculate chirality matrix
vector<LatticeFermion> G5evec(Nconv, FGrid);
vector<vector<ComplexD>> chiral_matrix(Nconv);
vector<vector<RealD>> chiral_matrix_real(Nconv);
for(int i = 0; i < Nconv; i++){
// G5evec[i] = G5evec[i] - G5evec[i];
G5evec[i] = Zero();
for(int j = 0; j < Ls/2; j++){
axpby_ssp(G5evec[i], 1., finalevec[i], 0., G5evec[i], j, j);
}
for(int j = Ls/2; j < Ls; j++){
axpby_ssp(G5evec[i], -1., finalevec[i], 0., G5evec[i], j, j);
}
}
for(int i = 0; i < Nconv; i++){
chiral_matrix_real[i].resize(Nconv);
chiral_matrix[i].resize(Nconv);
for(int j = 0; j < Nconv; j++){
chiral_matrix[i][j] = innerProduct(finalevec[i], G5evec[j]);
chiral_matrix_real[i][j] = abs(chiral_matrix[i][j]);
std::cout <<" chiral_matrix_real "<<i<<" "<<j<<" "<< chiral_matrix_real[i][j] << std::endl;
}
}
for(int i = 0; i < Nconv; i++){
if(chiral_matrix[i][i].real() < 0.){
chiral_matrix_real[i][i] = -1. * chiral_matrix_real[i][i];
}
}
Grid_finalize();
}

View File

@ -29,11 +29,11 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
using namespace std; using namespace std;
using namespace Grid; using namespace Grid;
;
template<typename Action> template<typename Action>
struct Setup{}; struct Setup{};
#ifdef ENABLE_GPARITY
template<> template<>
struct Setup<GparityMobiusFermionF>{ struct Setup<GparityMobiusFermionF>{
static GparityMobiusFermionF* getAction(LatticeGaugeFieldF &Umu, static GparityMobiusFermionF* getAction(LatticeGaugeFieldF &Umu,
@ -47,16 +47,24 @@ struct Setup<GparityMobiusFermionF>{
return new GparityMobiusFermionF(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,mob_b,mob_b-1.,params); return new GparityMobiusFermionF(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,mob_b,mob_b-1.,params);
} }
}; };
#endif
template<> template<>
struct Setup<DomainWallFermionF>{ struct Setup<DomainWallFermionF>{
static DomainWallFermionF* getAction(LatticeGaugeFieldF &Umu, static DomainWallFermionF* getAction(LatticeGaugeFieldF &Umu,
GridCartesian* FGrid, GridRedBlackCartesian* FrbGrid, GridCartesian* UGrid, GridRedBlackCartesian* UrbGrid){
RealD mass=0.00054;
RealD M5=1.8;
return new DomainWallFermionF(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
}
};
template<>
struct Setup<DomainWallFermionD>{ struct Setup<DomainWallFermionD>{
static DomainWallFermionD* getAction(LatticeGaugeField &Umu, static DomainWallFermionD* getAction(LatticeGaugeField &Umu,
GridCartesian* FGrid, GridRedBlackCartesian* FrbGrid, GridCartesian* UGrid, GridRedBlackCartesian* UrbGrid){ GridCartesian* FGrid, GridRedBlackCartesian* FrbGrid, GridCartesian* UGrid, GridRedBlackCartesian* UrbGrid){
RealD mass=0.00054; RealD mass=0.00054;
RealD M5=1.8; RealD M5=1.8;
return new DomainWallFermionF(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); return new DomainWallFermionD(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
} }
}; };
@ -168,7 +176,9 @@ int main (int argc, char ** argv)
} }
if(action == "GparityMobius"){ if(action == "GparityMobius"){
#ifdef ENABLE_GPARITY
run<GparityMobiusFermionF>(); run<GparityMobiusFermionF>();
#endif
}else if(action == "DWF"){ }else if(action == "DWF"){
run<DomainWallFermionF>(); run<DomainWallFermionF>();
}else if(action == "Mobius"){ }else if(action == "Mobius"){

View File

@ -555,6 +555,7 @@ int main (int argc, char ** argv) {
double c = (args.mobius_scale - bmc)/2.; // c = 1/2 [ (b+c) - (b-c) ] double c = (args.mobius_scale - bmc)/2.; // c = 1/2 [ (b+c) - (b-c) ]
if(is_gparity){ if(is_gparity){
#ifdef ENABLE_GPARITY
GparityWilsonImplD::ImplParams Params = setupGparityParams(args.GparityDirs); GparityWilsonImplD::ImplParams Params = setupGparityParams(args.GparityDirs);
readConfiguration<ConjugateGimplD>(Umu, config, args.is_cps_cfg); //Read the gauge field readConfiguration<ConjugateGimplD>(Umu, config, args.is_cps_cfg); //Read the gauge field
@ -565,6 +566,9 @@ int main (int argc, char ** argv) {
GparityMobiusFermionD action(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, args.mass, args.M5, b, c, Params); GparityMobiusFermionD action(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, args.mass, args.M5, b, c, Params);
run(action, config, args); run(action, config, args);
} }
#else
assert(0);
#endif
}else{ }else{
WilsonImplD::ImplParams Params = setupParams(); WilsonImplD::ImplParams Params = setupParams();
readConfiguration<PeriodicGimplD>(Umu, config, args.is_cps_cfg); //Read the gauge field readConfiguration<PeriodicGimplD>(Umu, config, args.is_cps_cfg); //Read the gauge field

View File

@ -0,0 +1,278 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/Test_dwf_lanczos.cc
Copyright (C) 2015
Author: Chulwoo Jung <chulwoo@bnl.gov>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
using namespace std;
using namespace Grid;
;
typedef WilsonFermionD FermionOp;
typedef typename WilsonFermionD::FermionField FermionField;
RealD AllZero(RealD x) { return 0.; }
namespace Grid {
#if 0
template<typename Field>
class RationalHermOp : public LinearFunction<Field> {
public:
using LinearFunction<Field>::operator();
// OperatorFunction<Field> & _poly;
LinearOperatorBase<Field> &_Linop;
RealD _massDen, _massNum;
FunctionHermOp(LinearOperatorBase<Field>& linop, RealD massDen,RealD massNum)
: _Linop(linop) ,_massDen(massDen),_massNum(massNum) {};
void operator()(const Field& in, Field& out) {
// _poly(_Linop,in,out);
}
};
#endif
template<class Matrix,class Field>
class InvG5LinearOperator : public LinearOperatorBase<Field> {
Matrix &_Mat;
RealD _num;
RealD _Tol;
Integer _MaxIt;
Gamma g5;
public:
InvG5LinearOperator(Matrix &Mat,RealD num): _Mat(Mat),_num(num), _Tol(1e-12),_MaxIt(10000), g5(Gamma::Algebra::Gamma5) {};
// Support for coarsening to a multigrid
void OpDiag (const Field &in, Field &out) {
assert(0);
_Mat.Mdiag(in,out);
}
void OpDir (const Field &in, Field &out,int dir,int disp) {
assert(0);
_Mat.Mdir(in,out,dir,disp);
}
void OpDirAll (const Field &in, std::vector<Field> &out){
assert(0);
_Mat.MdirAll(in,out);
};
void Op (const Field &in, Field &out){
assert(0);
_Mat.M(in,out);
}
void AdjOp (const Field &in, Field &out){
assert(0);
_Mat.Mdag(in,out);
}
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
HermOp(in,out);
ComplexD dot = innerProduct(in,out);
n1=real(dot);
n2=norm2(out);
}
void HermOp(const Field &in, Field &out){
Field tmp(in.Grid());
MdagMLinearOperator<Matrix,Field> denom(_Mat);
ConjugateGradient<Field> CG(_Tol,_MaxIt);
_Mat.M(in,tmp);
tmp += _num*in;
_Mat.Mdag(tmp,out);
CG(denom,out,tmp);
out = g5*tmp;
}
};
struct LanczosParameters: Serializable {
GRID_SERIALIZABLE_CLASS_MEMBERS(LanczosParameters,
RealD, mass ,
RealD, resid,
RealD, ChebyLow,
RealD, ChebyHigh,
Integer, ChebyOrder)
// Integer, StartTrajectory,
// Integer, Trajectories, /* @brief Number of sweeps in this run */
// bool, MetropolisTest,
// Integer, NoMetropolisUntil,
// std::string, StartingType,
// Integer, SW,
// RealD, Kappa,
// IntegratorParameters, MD)
LanczosParameters() {
////////////////////////////// Default values
mass = 0;
// MetropolisTest = true;
// NoMetropolisUntil = 10;
// StartTrajectory = 0;
// SW = 2;
// Trajectories = 10;
// StartingType = "HotStart";
/////////////////////////////////
}
template <class ReaderClass >
LanczosParameters(Reader<ReaderClass> & TheReader){
initialize(TheReader);
}
template < class ReaderClass >
void initialize(Reader<ReaderClass> &TheReader){
// std::cout << GridLogMessage << "Reading HMC\n";
read(TheReader, "HMC", *this);
}
void print_parameters() const {
// std::cout << GridLogMessage << "[HMC parameters] Trajectories : " << Trajectories << "\n";
// std::cout << GridLogMessage << "[HMC parameters] Start trajectory : " << StartTrajectory << "\n";
// std::cout << GridLogMessage << "[HMC parameters] Metropolis test (on/off): " << std::boolalpha << MetropolisTest << "\n";
// std::cout << GridLogMessage << "[HMC parameters] Thermalization trajs : " << NoMetropolisUntil << "\n";
// std::cout << GridLogMessage << "[HMC parameters] Starting type : " << StartingType << "\n";
// MD.print_parameters();
}
};
}
int main(int argc, char** argv) {
Grid_init(&argc, &argv);
GridCartesian* UGrid = SpaceTimeGrid::makeFourDimGrid(
GridDefaultLatt(), GridDefaultSimd(Nd, vComplex::Nsimd()),
GridDefaultMpi());
GridRedBlackCartesian* UrbGrid =
SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
GridCartesian* FGrid = UGrid;
GridRedBlackCartesian* FrbGrid = UrbGrid;
// printf("UGrid=%p UrbGrid=%p FGrid=%p FrbGrid=%p\n", UGrid, UrbGrid, FGrid, FrbGrid);
std::vector<int> seeds4({1, 2, 3, 4});
std::vector<int> seeds5({5, 6, 7, 8});
GridParallelRNG RNG5(FGrid);
RNG5.SeedFixedIntegers(seeds5);
GridParallelRNG RNG4(UGrid);
RNG4.SeedFixedIntegers(seeds4);
GridParallelRNG RNG5rb(FrbGrid);
RNG5.SeedFixedIntegers(seeds5);
LatticeGaugeField Umu(UGrid);
// SU<Nc>::HotConfiguration(RNG4, Umu);
FieldMetaData header;
std::string file("./config");
int precision32 = 0;
int tworow = 0;
// NerscIO::writeConfiguration(Umu,file,tworow,precision32);
NerscIO::readConfiguration(Umu,header,file);
/*
std::vector<LatticeColourMatrix> U(4, UGrid);
for (int mu = 0; mu < Nd; mu++) {
U[mu] = PeekIndex<LorentzIndex>(Umu, mu);
}
*/
int Nstop = 5;
int Nk = 10;
int Np = 90;
int Nm = Nk + Np;
int MaxIt = 10000;
RealD resid = 1.0e-5;
RealD mass = -1.0;
LanczosParameters LanParams;
#if 1
{
XmlReader HMCrd("LanParams.xml");
read(HMCrd,"LanczosParameters",LanParams);
}
#else
{
LanParams.mass = mass;
}
#endif
std::cout << GridLogMessage<< LanParams <<std::endl;
{
XmlWriter HMCwr("LanParams.xml.out");
write(HMCwr,"LanczosParameters",LanParams);
}
mass=LanParams.mass;
resid=LanParams.resid;
while ( mass > - 5.0){
FermionOp WilsonOperator(Umu,*FGrid,*FrbGrid,2.+mass);
InvG5LinearOperator<FermionOp,LatticeFermion> HermOp(WilsonOperator,-2.); /// <-----
//SchurDiagTwoOperator<FermionOp,FermionField> HermOp(WilsonOperator);
// Gamma5HermitianLinearOperator <FermionOp,LatticeFermion> HermOp2(WilsonOperator); /// <-----
std::vector<double> Coeffs{0, 0, 1.};
Polynomial<FermionField> PolyX(Coeffs);
Chebyshev<FermionField> Cheby(LanParams.ChebyLow,LanParams.ChebyHigh,LanParams.ChebyOrder);
FunctionHermOp<FermionField> OpCheby(Cheby,HermOp);
// InvHermOp<FermionField> Op(WilsonOperator,HermOp);
PlainHermOp<FermionField> Op (HermOp);
// PlainHermOp<FermionField> Op2 (HermOp2);
ImplicitlyRestartedLanczos<FermionField> IRL(OpCheby, Op, Nstop, Nk, Nm, resid, MaxIt);
std::vector<RealD> eval(Nm);
FermionField src(FGrid);
gaussian(RNG5, src);
std::vector<FermionField> evec(Nm, FGrid);
for (int i = 0; i < 1; i++) {
std::cout << i << " / " << Nm << " grid pointer " << evec[i].Grid()
<< std::endl;
};
int Nconv;
IRL.calc(eval, evec, src, Nconv);
std::cout << mass <<" : " << eval << std::endl;
Gamma g5(Gamma::Algebra::Gamma5) ;
ComplexD dot;
FermionField tmp(FGrid);
for (int i = 0; i < Nstop ; i++) {
tmp = g5*evec[i];
dot = innerProduct(tmp,evec[i]);
std::cout << mass << " : " << eval[i] << " " << real(dot) << " " << imag(dot) << std::endl ;
}
src = evec[0]+evec[1]+evec[2];
mass += -0.1;
}
Grid_finalize();
}

View File

@ -0,0 +1,211 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/Test_dwf_lanczos.cc
Copyright (C) 2015
Author: Chulwoo Jung <chulwoo@bnl.gov>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
using namespace std;
using namespace Grid;
;
typedef WilsonFermionD FermionOp;
typedef typename WilsonFermionD::FermionField FermionField;
RealD AllZero(RealD x) { return 0.; }
namespace Grid {
struct LanczosParameters: Serializable {
GRID_SERIALIZABLE_CLASS_MEMBERS(LanczosParameters,
RealD, mass ,
RealD, ChebyLow,
RealD, ChebyHigh,
Integer, ChebyOrder)
// Integer, StartTrajectory,
// Integer, Trajectories, /* @brief Number of sweeps in this run */
// bool, MetropolisTest,
// Integer, NoMetropolisUntil,
// std::string, StartingType,
// Integer, SW,
// RealD, Kappa,
// IntegratorParameters, MD)
LanczosParameters() {
////////////////////////////// Default values
mass = 0;
// MetropolisTest = true;
// NoMetropolisUntil = 10;
// StartTrajectory = 0;
// SW = 2;
// Trajectories = 10;
// StartingType = "HotStart";
/////////////////////////////////
}
template <class ReaderClass >
LanczosParameters(Reader<ReaderClass> & TheReader){
initialize(TheReader);
}
template < class ReaderClass >
void initialize(Reader<ReaderClass> &TheReader){
// std::cout << GridLogMessage << "Reading HMC\n";
read(TheReader, "HMC", *this);
}
void print_parameters() const {
// std::cout << GridLogMessage << "[HMC parameters] Trajectories : " << Trajectories << "\n";
// std::cout << GridLogMessage << "[HMC parameters] Start trajectory : " << StartTrajectory << "\n";
// std::cout << GridLogMessage << "[HMC parameters] Metropolis test (on/off): " << std::boolalpha << MetropolisTest << "\n";
// std::cout << GridLogMessage << "[HMC parameters] Thermalization trajs : " << NoMetropolisUntil << "\n";
// std::cout << GridLogMessage << "[HMC parameters] Starting type : " << StartingType << "\n";
// MD.print_parameters();
}
};
}
int main(int argc, char** argv) {
Grid_init(&argc, &argv);
GridCartesian* UGrid = SpaceTimeGrid::makeFourDimGrid(
GridDefaultLatt(), GridDefaultSimd(Nd, vComplex::Nsimd()),
GridDefaultMpi());
GridRedBlackCartesian* UrbGrid =
SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
GridCartesian* FGrid = UGrid;
GridRedBlackCartesian* FrbGrid = UrbGrid;
// printf("UGrid=%p UrbGrid=%p FGrid=%p FrbGrid=%p\n", UGrid, UrbGrid, FGrid, FrbGrid);
std::vector<int> seeds4({1, 2, 3, 4});
std::vector<int> seeds5({5, 6, 7, 8});
GridParallelRNG RNG5(FGrid);
RNG5.SeedFixedIntegers(seeds5);
GridParallelRNG RNG4(UGrid);
RNG4.SeedFixedIntegers(seeds4);
GridParallelRNG RNG5rb(FrbGrid);
RNG5.SeedFixedIntegers(seeds5);
LatticeGaugeField Umu(UGrid);
// SU<Nc>::HotConfiguration(RNG4, Umu);
FieldMetaData header;
std::string file("./config");
int precision32 = 0;
int tworow = 0;
// NerscIO::writeConfiguration(Umu,file,tworow,precision32);
NerscIO::readConfiguration(Umu,header,file);
/*
std::vector<LatticeColourMatrix> U(4, UGrid);
for (int mu = 0; mu < Nd; mu++) {
U[mu] = PeekIndex<LorentzIndex>(Umu, mu);
}
*/
int Nstop = 10;
int Nk = 20;
int Np = 80;
int Nm = Nk + Np;
int MaxIt = 10000;
RealD resid = 1.0e-5;
RealD mass = -1.0;
LanczosParameters LanParams;
#if 1
{
XmlReader HMCrd("LanParams.xml");
read(HMCrd,"LanczosParameters",LanParams);
}
#else
{
LanParams.mass = mass;
}
#endif
std::cout << GridLogMessage<< LanParams <<std::endl;
{
XmlWriter HMCwr("LanParams.xml.out");
write(HMCwr,"LanczosParameters",LanParams);
}
mass=LanParams.mass;
while ( mass > - 5.0){
FermionOp WilsonOperator(Umu,*FGrid,*FrbGrid,mass);
MdagMLinearOperator<FermionOp,FermionField> HermOp(WilsonOperator); /// <-----
//SchurDiagTwoOperator<FermionOp,FermionField> HermOp(WilsonOperator);
Gamma5HermitianLinearOperator <FermionOp,LatticeFermion> HermOp2(WilsonOperator); /// <-----
std::vector<double> Coeffs{0, 1.};
Polynomial<FermionField> PolyX(Coeffs);
// Chebyshev<FermionField> Cheby(0.5, 60., 31);
// RealD, ChebyLow,
// RealD, ChebyHigh,
// Integer, ChebyOrder)
Chebyshev<FermionField> Cheby(LanParams.ChebyLow,LanParams.ChebyHigh,LanParams.ChebyOrder);
FunctionHermOp<FermionField> OpCheby(Cheby,HermOp);
PlainHermOp<FermionField> Op (HermOp);
PlainHermOp<FermionField> Op2 (HermOp2);
ImplicitlyRestartedLanczos<FermionField> IRL(OpCheby, Op2, Nstop, Nk, Nm, resid, MaxIt);
std::vector<RealD> eval(Nm);
FermionField src(FGrid);
gaussian(RNG5, src);
std::vector<FermionField> evec(Nm, FGrid);
for (int i = 0; i < 1; i++) {
std::cout << i << " / " << Nm << " grid pointer " << evec[i].Grid()
<< std::endl;
};
int Nconv;
IRL.calc(eval, evec, src, Nconv);
std::cout << mass <<" : " << eval << std::endl;
Gamma g5(Gamma::Algebra::Gamma5) ;
ComplexD dot;
FermionField tmp(FGrid);
for (int i = 0; i < Nstop ; i++) {
tmp = g5*evec[i];
dot = innerProduct(tmp,evec[i]);
std::cout << mass << " : " << eval[i] << " " << real(dot) << " " << imag(dot) << std::endl ;
}
src = evec[0]+evec[1]+evec[2];
mass += -0.1;
}
Grid_finalize();
}

View File

@ -33,8 +33,7 @@ namespace Grid{
GRID_SERIALIZABLE_CLASS_MEMBERS(WFParameters, GRID_SERIALIZABLE_CLASS_MEMBERS(WFParameters,
int, steps, int, steps,
double, step_size, double, step_size,
int, meas_interval, int, meas_interval);
double, maxTau); // for the adaptive algorithm
template <class ReaderClass > template <class ReaderClass >
@ -86,7 +85,7 @@ int main(int argc, char **argv) {
WFParameters WFPar(Reader); WFParameters WFPar(Reader);
ConfParameters CPar(Reader); ConfParameters CPar(Reader);
CheckpointerParameters CPPar(CPar.conf_prefix, CPar.rng_prefix); CheckpointerParameters CPPar(CPar.conf_prefix, CPar.rng_prefix);
BinaryHmcCheckpointer<PeriodicGimplR> CPBin(CPPar); NerscHmcCheckpointer<PeriodicGimplR> CPBin(CPPar);
for (int conf = CPar.StartConfiguration; conf <= CPar.EndConfiguration; conf+= CPar.Skip){ for (int conf = CPar.StartConfiguration; conf <= CPar.EndConfiguration; conf+= CPar.Skip){
@ -96,19 +95,13 @@ int main(int argc, char **argv) {
std::cout << GridLogMessage << "Initial plaquette: " std::cout << GridLogMessage << "Initial plaquette: "
<< WilsonLoops<PeriodicGimplR>::avgPlaquette(Umu) << std::endl; << WilsonLoops<PeriodicGimplR>::avgPlaquette(Umu) << std::endl;
int t=WFPar.maxTau; WilsonFlow<PeriodicGimplR> WF(WFPar.step_size, WFPar.steps,
WilsonFlowAdaptive<PeriodicGimplR> WF(WFPar.step_size, WFPar.maxTau,
1.0e-4,
WFPar.meas_interval); WFPar.meas_interval);
WF.smear(Uflow, Umu); WF.smear(Uflow, Umu);
RealD WFlow_plaq = WilsonLoops<PeriodicGimplR>::avgPlaquette(Uflow); RealD WFlow_plaq = WilsonLoops<PeriodicGimplR>::avgPlaquette(Uflow);
RealD WFlow_TC = WilsonLoops<PeriodicGimplR>::TopologicalCharge(Uflow);
RealD WFlow_T0 = WF.energyDensityPlaquette(t,Uflow);
std::cout << GridLogMessage << "Plaquette "<< conf << " " << WFlow_plaq << std::endl; std::cout << GridLogMessage << "Plaquette "<< conf << " " << WFlow_plaq << std::endl;
std::cout << GridLogMessage << "T0 "<< conf << " " << WFlow_T0 << std::endl;
std::cout << GridLogMessage << "TopologicalCharge "<< conf << " " << WFlow_TC << std::endl;
std::cout<< GridLogMessage << " Admissibility check:\n"; std::cout<< GridLogMessage << " Admissibility check:\n";
const double sp_adm = 0.067; // admissible threshold const double sp_adm = 0.067; // admissible threshold

View File

@ -1,4 +1,4 @@
************************************************************************************* /*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid Grid physics library, www.github.com/paboyle/Grid

View File

@ -165,6 +165,7 @@ int main (int argc, char ** argv)
} }
} }
if(gparity){ if(gparity){
#ifdef ENABLE_GPARITY
std::cout << "Running test with G-parity BCs in " << gpdir << " direction" << std::endl; std::cout << "Running test with G-parity BCs in " << gpdir << " direction" << std::endl;
GparityWilsonImplParams params; GparityWilsonImplParams params;
params.twists[gpdir] = 1; params.twists[gpdir] = 1;
@ -174,6 +175,9 @@ int main (int argc, char ** argv)
ConjugateGimplD::setDirections(conj_dirs); ConjugateGimplD::setDirections(conj_dirs);
run_test<GparityDomainWallFermionD, GparityDomainWallFermionF, ConjugateGaugeStatistics>(argc,argv,params); run_test<GparityDomainWallFermionD, GparityDomainWallFermionF, ConjugateGaugeStatistics>(argc,argv,params);
#else
std::cout << " Gparity is not compiled "<<std::endl;
#endif
}else{ }else{
std::cout << "Running test with periodic BCs" << std::endl; std::cout << "Running test with periodic BCs" << std::endl;
WilsonImplParams params; WilsonImplParams params;

View File

@ -0,0 +1,37 @@
cmake_minimum_required(VERSION 3.12 FATAL_ERROR)
project(GridViewer)
list(APPEND CMAKE_PREFIX_PATH "/Users/peterboyle/QCD/vtk/VTK-9.4.2-install/")
find_package(VTK COMPONENTS
CommonColor
CommonCore
FiltersCore
FiltersModeling
IOImage
IOFFMPEG
InteractionStyle
InteractionWidgets
RenderingContextOpenGL2
RenderingCore
RenderingFreeType
RenderingGL2PSOpenGL2
RenderingOpenGL2
)
if (NOT VTK_FOUND)
message(FATAL_ERROR "GridViewer: Unable to find the VTK build folder.")
endif()
# Prevent a "command line is too long" failure in Windows.
set(CMAKE_NINJA_FORCE_RESPONSE_FILE "ON" CACHE BOOL "Force Ninja to use response files.")
add_executable(FieldDensityAnimate MACOSX_BUNDLE FieldDensityAnimate.cxx )
target_link_libraries(FieldDensityAnimate PRIVATE ${VTK_LIBRARIES}
)
# vtk_module_autoinit is needed
vtk_module_autoinit(
TARGETS FieldDensityAnimate
MODULES ${VTK_LIBRARIES}
)

Binary file not shown.

View File

@ -0,0 +1,285 @@
#!/usr/bin/env python
# noinspection PyUnresolvedReferences
import math
import vtk
import vtkmodules.vtkInteractionStyle
# noinspection PyUnresolvedReferences
import vtkmodules.vtkRenderingOpenGL2
from vtkmodules.vtkCommonColor import vtkNamedColors
from vtkmodules.vtkCommonCore import (
VTK_VERSION_NUMBER,
vtkVersion
)
from vtkmodules.vtkCommonCore import VTK_DOUBLE
from vtkmodules.vtkCommonDataModel import vtkImageData
from vtkmodules.vtkFiltersCore import (
vtkMarchingCubes,
vtkStripper
)
from vtkmodules.vtkFiltersModeling import vtkOutlineFilter
from vtkmodules.vtkIOImage import (
vtkMetaImageReader,
vtkJPEGWriter,
vtkPNGWriter
)
from vtkmodules.vtkRenderingCore import (
vtkActor,
vtkCamera,
vtkPolyDataMapper,
vtkProperty,
vtkRenderWindow,
vtkRenderWindowInteractor,
vtkRenderer,
vtkWindowToImageFilter
)
class vtkTimerCallback():
def __init__(self, steps, imageData, iren):
self.timer_count = 0
self.steps = steps
self.imageData = imageData
self.iren = iren
self.timerId = None
self.step = 0
def execute(self, obj, event):
print(self.timer_count)
dims = self.imageData.GetDimensions()
t=self.step/10.0
z0 = 2
y0 = 4
x0 = 4
z1 = 14
y1 = 12
x1 = 12
for z in range(dims[2]):
for y in range(dims[1]):
for x in range(dims[0]):
self.imageData.SetScalarComponentFromDouble(x, y, z, 0,
math.sin(t)*math.exp(-0.25*((x-x0)*(x-x0)+(y-y0)*(y-y0)+(z-z0)*(z-z0)))
- math.cos(t)*math.exp(-0.25*((x-x1)*(x-x1)+(y-y1)*(y-y1)+(z-z1)*(z-z1))))
self.imageData.Modified()
iren = obj
iren.GetRenderWindow().Render()
self.timer_count += 1
self.step += 1
if self.step >= self.steps :
iren.DestroyTimer(self.timerId)
def WriteImage(fileName, renWin):
'''
'''
import os
if fileName:
# Select the writer to use.
path, ext = os.path.splitext(fileName)
ext = ext.lower()
if not ext:
ext = '.png'
fileName = fileName + ext
elif ext == '.jpg':
writer = vtkJPEGWriter()
else:
writer = vtkPNGWriter()
windowto_image_filter = vtkWindowToImageFilter()
windowto_image_filter.SetInput(renWin)
windowto_image_filter.SetScale(1) # image quality
windowto_image_filter.SetInputBufferTypeToRGBA()
writer.SetFileName(fileName)
writer.SetInputConnection(windowto_image_filter.GetOutputPort())
writer.Write()
else:
raise RuntimeError('Need a filename.')
def main():
colors = vtkNamedColors()
file_name = get_program_parameters()
colors.SetColor('InstantonColor', [240, 184, 160, 255])
colors.SetColor('BackfaceColor', [255, 229, 200, 255])
colors.SetColor('BkgColor', [51, 77, 102, 255])
# Create the renderer, the render window, and the interactor. The renderer
# draws into the render window, the interactor enables mouse- and
# keyboard-based interaction with the data within the render window.
#
a_renderer = vtkRenderer()
ren_win = vtkRenderWindow()
ren_win.AddRenderer(a_renderer)
iren = vtkRenderWindowInteractor()
iren.SetRenderWindow(ren_win)
# The following reader is used to read a series of 2D slices (images)
# that compose the volume. The slice dimensions are set, and the
# pixel spacing. The data Endianness must also be specified. The reader
# uses the FilePrefix in combination with the slice number to construct
# filenames using the format FilePrefix.%d. (In this case the FilePrefix
# is the root name of the file: quarter.)
imageData = vtkImageData()
imageData.SetDimensions(16, 16, 16)
imageData.AllocateScalars(VTK_DOUBLE, 1)
dims = imageData.GetDimensions()
# Fill every entry of the image data with '2.0'
# Set the input data
for z in range(dims[2]):
z0 = dims[2]/2
for y in range(dims[1]):
y0 = dims[1]/2
for x in range(dims[0]):
x0 = dims[0]/2
imageData.SetScalarComponentFromDouble(x, y, z, 0, math.exp(-0.25*((x-x0)*(x-x0)+(y-y0)*(y-y0)+z*z)) - math.exp(-0.25*((x-x0)*(x-x0)+y*y+(z-z0)*(z-z0))))
instanton_extractor = vtkMarchingCubes()
instanton_extractor.SetInputData(imageData)
instanton_extractor.SetValue(0, 0.1)
instanton_stripper = vtkStripper()
instanton_stripper.SetInputConnection(instanton_extractor.GetOutputPort())
instanton_mapper = vtkPolyDataMapper()
instanton_mapper.SetInputConnection(instanton_stripper.GetOutputPort())
instanton_mapper.ScalarVisibilityOff()
instanton = vtkActor()
instanton.SetMapper(instanton_mapper)
instanton.GetProperty().SetDiffuseColor(colors.GetColor3d('InstantonColor'))
instanton.GetProperty().SetSpecular(0.3)
instanton.GetProperty().SetSpecularPower(20)
instanton.GetProperty().SetOpacity(0.5)
# The triangle stripper is used to create triangle strips from the
# isosurface these render much faster on may systems.
antiinstanton_extractor = vtkMarchingCubes()
antiinstanton_extractor.SetInputData(imageData)
antiinstanton_extractor.SetValue(0, -0.1)
antiinstanton_stripper = vtkStripper()
antiinstanton_stripper.SetInputConnection(antiinstanton_extractor.GetOutputPort())
antiinstanton_mapper = vtkPolyDataMapper()
antiinstanton_mapper.SetInputConnection(antiinstanton_stripper.GetOutputPort())
antiinstanton_mapper.ScalarVisibilityOff()
antiinstanton = vtkActor()
antiinstanton.SetMapper(antiinstanton_mapper)
antiinstanton.GetProperty().SetDiffuseColor(colors.GetColor3d('Ivory'))
# An outline provides box around the data.
outline_data = vtkOutlineFilter()
outline_data.SetInputData(imageData)
map_outline = vtkPolyDataMapper()
map_outline.SetInputConnection(outline_data.GetOutputPort())
outline = vtkActor()
outline.SetMapper(map_outline)
outline.GetProperty().SetColor(colors.GetColor3d('Black'))
# It is convenient to create an initial view of the data. The FocalPoint
# and Position form a vector direction. Later on (ResetCamera() method)
# this vector is used to position the camera to look at the data in
# this direction.
a_camera = vtkCamera()
a_camera.SetViewUp(0, 0, -1)
a_camera.SetPosition(0, -100, 0)
a_camera.SetFocalPoint(0, 0, 0)
a_camera.ComputeViewPlaneNormal()
a_camera.Azimuth(30.0)
a_camera.Elevation(30.0)
# Actors are added to the renderer. An initial camera view is created.
# The Dolly() method moves the camera towards the FocalPoint,
# thereby enlarging the image.
a_renderer.AddActor(outline)
a_renderer.AddActor(instanton)
a_renderer.AddActor(antiinstanton)
a_renderer.SetActiveCamera(a_camera)
a_renderer.ResetCamera()
a_camera.Dolly(1.0)
# Set a background color for the renderer and set the size of the
# render window (expressed in pixels).
a_renderer.SetBackground(colors.GetColor3d('BkgColor'))
ren_win.SetSize(1024, 1024)
ren_win.SetWindowName('ExpoDemo')
# Note that when camera movement occurs (as it does in the Dolly()
# method), the clipping planes often need adjusting. Clipping planes
# consist of two planes: near and far along the view direction. The
# near plane clips out objects in front of the plane the far plane
# clips out objects behind the plane. This way only what is drawn
# between the planes is actually rendered.
a_renderer.ResetCameraClippingRange()
# write image
# WriteImage('exp.jpg',ren_win)
# Sign up to receive TimerEvent
cb = vtkTimerCallback(200, imageData, iren)
iren.AddObserver('TimerEvent', cb.execute)
cb.timerId = iren.CreateRepeatingTimer(50)
# start the interaction and timer
ren_win.Render()
# Initialize the event loop and then start it.
iren.Initialize()
iren.Start()
def get_program_parameters():
import argparse
description = 'Simple lattice volumetric demo'
epilogue = '''
Derived from VTK/Examples/Cxx/Medical2.cxx
'''
parser = argparse.ArgumentParser(description=description, epilog=epilogue,
formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument('filename', help='FieldDensity.py')
args = parser.parse_args()
return args.filename
def vtk_version_ok(major, minor, build):
"""
Check the VTK version.
:param major: Major version.
:param minor: Minor version.
:param build: Build version.
:return: True if the requested VTK version is greater or equal to the actual VTK version.
"""
needed_version = 10000000000 * int(major) + 100000000 * int(minor) + int(build)
try:
vtk_version_number = VTK_VERSION_NUMBER
except AttributeError: # as error:
ver = vtkVersion()
vtk_version_number = 10000000000 * ver.GetVTKMajorVersion() + 100000000 * ver.GetVTKMinorVersion() \
+ ver.GetVTKBuildVersion()
if vtk_version_number >= needed_version:
return True
else:
return False
if __name__ == '__main__':
main()

View File

@ -0,0 +1,490 @@
// Derived from VTK/Examples/Cxx/Medical2.cxx
// The example reads a volume dataset, extracts two isosurfaces that
// represent the skin and bone, and then displays them.
//
// Modified heavily by Peter Boyle to display lattice field theory data as movies and compare multiple files
#include <vtkActor.h>
#include <vtkCamera.h>
#include <vtkMetaImageReader.h>
#include <vtkNamedColors.h>
#include <vtkNew.h>
#include <vtkOutlineFilter.h>
#include <vtkPolyDataMapper.h>
#include <vtkProperty.h>
#include <vtkRenderWindow.h>
#include <vtkRenderWindowInteractor.h>
#include <vtkRenderer.h>
#include <vtkStripper.h>
#include <vtkImageData.h>
#include <vtkVersion.h>
#include <vtkCallbackCommand.h>
#include <vtkTextActor.h>
#include <vtkTextProperty.h>
#define MPEG
#ifdef MPEG
#include <vtkFFMPEGWriter.h>
#endif
#include <vtkProperty2D.h>
#include <vtkSliderWidget.h>
#include <vtkSliderRepresentation2D.h>
#include <vtkWindowToImageFilter.h>
#include <array>
#include <string>
#include <Grid/Grid.h>
#define USE_FLYING_EDGES
#ifdef USE_FLYING_EDGES
#include <vtkFlyingEdges3D.h>
typedef vtkFlyingEdges3D isosurface;
#else
#include <vtkMarchingCubes.h>
typedef vtkMarchingCubes isosurface;
#endif
int mpeg = 0 ;
int xlate = 0 ;
template <class T> void readFile(T& out, std::string const fname){
#ifdef HAVE_LIME
Grid::emptyUserRecord record;
Grid::ScidacReader RD;
RD.open(fname);
RD.readScidacFieldRecord(out,record);
RD.close();
#endif
}
using namespace Grid;
class FrameUpdater : public vtkCallbackCommand
{
public:
FrameUpdater() {
TimerCount = 0;
xoff = 0;
t = 0;
imageData = nullptr;
grid_data = nullptr;
timerId = 0;
maxCount = -1;
}
static FrameUpdater* New()
{
FrameUpdater* cb = new FrameUpdater;
cb->TimerCount = 0;
return cb;
}
virtual void Execute(vtkObject* caller, unsigned long eventId,void* vtkNotUsed(callData))
{
const int max=256;
char text_string[max];
if (this->TimerCount < this->maxCount) {
if (vtkCommand::TimerEvent == eventId)
{
++this->TimerCount;
// Make a new frame
auto latt_size = grid_data->Grid()->GlobalDimensions();
for(int xx=0;xx<latt_size[0];xx++){
for(int yy=0;yy<latt_size[1];yy++){
for(int zz=0;zz<latt_size[2];zz++){
int x = (xx+xoff)%latt_size[0];
Coordinate site({x,yy,zz,t});
RealD value = real(peekSite(*grid_data,site));
imageData->SetScalarComponentFromDouble(xx,yy,zz,0,value);
}}}
if ( xlate ) {
xoff = (xoff + 1)%latt_size[0];
if ( xoff== 0 ) t = (t+1)%latt_size[3];
} else {
t = (t+1)%latt_size[3];
if ( t== 0 ) xoff = (xoff + 1)%latt_size[0];
}
snprintf(text_string,max,"T=%d",t);
text->SetInput(text_string);
std::cout << this->TimerCount<<"/"<<maxCount<< " xoff "<<xoff<<" t "<<t <<std::endl;
imageData->Modified();
vtkRenderWindowInteractor* iren = dynamic_cast<vtkRenderWindowInteractor*>(caller);
iren->GetRenderWindow()->Render();
}
}
if (this->TimerCount >= this->maxCount) {
vtkRenderWindowInteractor* iren = dynamic_cast<vtkRenderWindowInteractor*>(caller);
if (this->timerId > -1)
{
iren->DestroyTimer(this->timerId);
}
}
}
private:
int TimerCount;
int xoff;
int t;
public:
Grid::LatticeComplexD * grid_data;
vtkImageData* imageData = nullptr;
vtkTextActor* text = nullptr;
vtkFFMPEGWriter *writer = nullptr;
int timerId ;
int maxCount ;
double rms;
isosurface * posExtractor;
isosurface * negExtractor;
};
class SliderCallback : public vtkCommand
{
public:
static SliderCallback* New()
{
return new SliderCallback;
}
virtual void Execute(vtkObject* caller, unsigned long eventId, void* callData)
{
vtkSliderWidget *sliderWidget = vtkSliderWidget::SafeDownCast(caller);
if (sliderWidget)
{
contour = ((vtkSliderRepresentation *)sliderWidget->GetRepresentation())->GetValue();
}
for(int i=0;i<fu_list.size();i++){
fu_list[i]->posExtractor->SetValue(0, SliderCallback::contour*fu_list[i]->rms);
fu_list[i]->negExtractor->SetValue(0, -SliderCallback::contour*fu_list[i]->rms);
fu_list[i]->posExtractor->Modified();
fu_list[i]->negExtractor->Modified();
}
}
public:
static double contour;
std::vector<FrameUpdater *> fu_list;
};
double SliderCallback::contour;
int main(int argc, char* argv[])
{
using namespace Grid;
Grid_init(&argc, &argv);
GridLogLayout();
auto latt_size = GridDefaultLatt();
auto simd_layout = GridDefaultSimd(Nd, vComplex::Nsimd());
auto mpi_layout = GridDefaultMpi();
GridCartesian Grid(latt_size, simd_layout, mpi_layout);
std::cout << argc << " command Line arguments "<<std::endl;
for(int c=0;c<argc;c++) {
std::cout << " - "<<argv[c]<<std::endl;
}
std::vector<std::string> file_list;
double default_contour = 1.0;
std::string arg;
#ifdef MPEG
if( GridCmdOptionExists(argv,argv+argc,"--mpeg") ){
mpeg = 1;
}
#endif
if( GridCmdOptionExists(argv,argv+argc,"--xlate") ){
xlate = 1;
}
if( GridCmdOptionExists(argv,argv+argc,"--isosurface") ){
arg=GridCmdOptionPayload(argv,argv+argc,"--isosurface");
GridCmdOptionFloat(arg,default_contour);
}
if( GridCmdOptionExists(argv,argv+argc,"--file1") ){
arg = GridCmdOptionPayload(argv,argv+argc,"--file1");
file_list.push_back(arg);
}
if( GridCmdOptionExists(argv,argv+argc,"--file2") ){
arg = GridCmdOptionPayload(argv,argv+argc,"--file2");
file_list.push_back(arg);
}
if( GridCmdOptionExists(argv,argv+argc,"--file3") ){
arg = GridCmdOptionPayload(argv,argv+argc,"--file3");
file_list.push_back(arg);
}
if( GridCmdOptionExists(argv,argv+argc,"--file4") ){
arg = GridCmdOptionPayload(argv,argv+argc,"--file4");
file_list.push_back(arg);
}
for(int c=0;c<file_list.size();c++) {
std::cout << " file: "<<file_list[c]<<std::endl;
}
// Common things:
vtkNew<vtkNamedColors> colors;
std::array<unsigned char, 4> posColor{{240, 184, 160, 255}}; colors->SetColor("posColor", posColor.data());
std::array<unsigned char, 4> bkg{{51, 77, 102, 255}}; colors->SetColor("BkgColor", bkg.data());
// Create the renderer, the render window, and the interactor. The renderer
// draws into the render window, the interactor enables mouse- and
// keyboard-based interaction with the data within the render window.
//
vtkNew<vtkRenderWindow> renWin;
vtkNew<vtkRenderWindowInteractor> iren;
iren->SetRenderWindow(renWin);
std::vector<LatticeComplexD> data(file_list.size(),&Grid);
FieldMetaData header;
int frameCount;
if ( mpeg ) frameCount = latt_size[3];
else frameCount = latt_size[0] * latt_size[3];
std::vector<FrameUpdater *> fu_list;
for (int f=0;f<file_list.size();f++){
// It is convenient to create an initial view of the data. The FocalPoint
// and Position form a vector direction. Later on (ResetCamera() method)
// this vector is used to position the camera to look at the data in
// this direction.
vtkNew<vtkCamera> aCamera;
aCamera->SetViewUp(0, 0, -1);
aCamera->SetPosition(0, -1000, 0);
aCamera->SetFocalPoint(0, 0, 0);
aCamera->ComputeViewPlaneNormal();
aCamera->Azimuth(30.0);
aCamera->Elevation(30.0);
vtkNew<vtkRenderer> aRenderer;
renWin->AddRenderer(aRenderer);
double vol = data[f].Grid()->gSites();
std::cout << "Reading "<<file_list[f]<<std::endl;
readFile(data[f],file_list[f]);
auto nrm = norm2(data[f]);
auto nrmbar = nrm/vol;
auto rms = sqrt(nrmbar);
double contour = default_contour * rms; // default to 1 x RMS
// The following reader is used to read a series of 2D slices (images)
// that compose the volume. The slice dimensions are set, and the
// pixel spacing. The data Endianness must also be specified. The reader
// uses the FilePrefix in combination with the slice number to construct
// filenames using the format FilePrefix.%d. (In this case the FilePrefix
// is the root name of the file: quarter.)
vtkNew<vtkImageData> imageData;
imageData->SetDimensions(latt_size[0],latt_size[1],latt_size[2]);
imageData->AllocateScalars(VTK_DOUBLE, 1);
for(int xx=0;xx<latt_size[0];xx++){
for(int yy=0;yy<latt_size[1];yy++){
for(int zz=0;zz<latt_size[2];zz++){
Coordinate site({xx,yy,zz,0});
RealD value = real(peekSite(data[f],site));
imageData->SetScalarComponentFromDouble(xx,yy,zz,0,value);
}}}
vtkNew<isosurface> posExtractor;
posExtractor->SetInputData(imageData);
posExtractor->SetValue(0, contour);
vtkNew<vtkStripper> posStripper;
posStripper->SetInputConnection(posExtractor->GetOutputPort());
vtkNew<vtkPolyDataMapper> posMapper;
posMapper->SetInputConnection(posStripper->GetOutputPort());
posMapper->ScalarVisibilityOff();
vtkNew<vtkActor> pos;
pos->SetMapper(posMapper);
pos->GetProperty()->SetDiffuseColor(colors->GetColor3d("posColor").GetData());
pos->GetProperty()->SetSpecular(0.3);
pos->GetProperty()->SetSpecularPower(20);
pos->GetProperty()->SetOpacity(0.5);
// An isosurface, or contour value is set
// The triangle stripper is used to create triangle strips from the
// isosurface; these render much faster on may systems.
vtkNew<isosurface> negExtractor;
negExtractor->SetInputData(imageData);
negExtractor->SetValue(0, -contour);
vtkNew<vtkStripper> negStripper;
negStripper->SetInputConnection(negExtractor->GetOutputPort());
vtkNew<vtkPolyDataMapper> negMapper;
negMapper->SetInputConnection(negStripper->GetOutputPort());
negMapper->ScalarVisibilityOff();
vtkNew<vtkActor> neg;
neg->SetMapper(negMapper);
neg->GetProperty()->SetDiffuseColor(colors->GetColor3d("Ivory").GetData());
// An outline provides context around the data.
vtkNew<vtkOutlineFilter> outlineData;
outlineData->SetInputData(imageData);
vtkNew<vtkPolyDataMapper> mapOutline;
mapOutline->SetInputConnection(outlineData->GetOutputPort());
vtkNew<vtkActor> outline;
outline->SetMapper(mapOutline);
outline->GetProperty()->SetColor(colors->GetColor3d("Black").GetData());
vtkNew<vtkTextActor> Text;
Text->SetInput(file_list[f].c_str());
Text->SetPosition2(0,0);
Text->GetTextProperty()->SetFontSize(48);
Text->GetTextProperty()->SetColor(colors->GetColor3d("Gold").GetData());
vtkNew<vtkTextActor> TextT;
TextT->SetInput("T=0");
TextT->SetPosition(0,.9*1025);
TextT->GetTextProperty()->SetFontSize(48);
TextT->GetTextProperty()->SetColor(colors->GetColor3d("Gold").GetData());
// Actors are added to the renderer. An initial camera view is created.
// The Dolly() method moves the camera towards the FocalPoint,
// thereby enlarging the image.
aRenderer->AddActor(Text);
aRenderer->AddActor(TextT);
aRenderer->AddActor(outline);
aRenderer->AddActor(pos);
aRenderer->AddActor(neg);
// Sign up to receive TimerEvent
vtkNew<FrameUpdater> fu;
fu->imageData = imageData;
fu->grid_data = &data[f];
fu->text = TextT;
fu->maxCount = frameCount;
fu->posExtractor = posExtractor;
fu->negExtractor = negExtractor;
fu->rms = rms;
iren->AddObserver(vtkCommand::TimerEvent, fu);
aRenderer->SetActiveCamera(aCamera);
aRenderer->ResetCamera();
aRenderer->SetBackground(colors->GetColor3d("BkgColor").GetData());
aCamera->Dolly(1.0);
double nf = file_list.size();
std::cout << " Adding renderer " <<f<<" of "<<nf<<std::endl;
aRenderer->SetViewport((1.0/nf)*f, 0.0,(1.0/nf)*(f+1) , 1.0);
// Note that when camera movement occurs (as it does in the Dolly()
// method), the clipping planes often need adjusting. Clipping planes
// consist of two planes: near and far along the view direction. The
// near plane clips out objects in front of the plane; the far plane
// clips out objects behind the plane. This way only what is drawn
// between the planes is actually rendered.
aRenderer->ResetCameraClippingRange();
fu_list.push_back(fu);
}
// Set a background color for the renderer and set the size of the
// render window (expressed in pixels).
// Initialize the event loop and then start it.
renWin->SetSize(1024*file_list.size(), 1024);
renWin->SetWindowName("FieldDensity");
renWin->Render();
iren->Initialize();
if ( mpeg ) {
#ifdef MPEG
vtkWindowToImageFilter *imageFilter = vtkWindowToImageFilter::New();
imageFilter->SetInput( renWin );
imageFilter->SetInputBufferTypeToRGB();
vtkFFMPEGWriter *writer = vtkFFMPEGWriter::New();
writer->SetFileName("movie.avi");
writer->SetRate(1);
writer->SetInputConnection(imageFilter->GetOutputPort());
writer->Start();
for(int i=0;i<fu_list[0]->maxCount;i++){
for(int f=0;f<fu_list.size();f++){
fu_list[f]->Execute(iren,vtkCommand::TimerEvent,nullptr);
}
imageFilter->Modified();
writer->Write();
}
writer->End();
writer->Delete();
#else
assert(-1 && "MPEG support not compiled");
#endif
} else {
// Add control of contour threshold
// Create a slider widget
vtkSmartPointer<vtkSliderRepresentation2D> sliderRep = vtkSmartPointer<vtkSliderRepresentation2D>::New();
sliderRep->SetMinimumValue(0.1);
sliderRep->SetMaximumValue(5.0);
sliderRep->SetValue(1.0);
sliderRep->SetTitleText("Fraction RMS");
// Set color properties:
// Change the color of the knob that slides
// sliderRep->GetSliderProperty()->SetColor(colors->GetColor3d("Green").GetData());
sliderRep->GetTitleProperty()->SetColor(colors->GetColor3d("AliceBlue").GetData());
sliderRep->GetLabelProperty()->SetColor(colors->GetColor3d("AliceBlue").GetData());
sliderRep->GetSelectedProperty()->SetColor(colors->GetColor3d("DeepPink").GetData());
// Change the color of the bar
sliderRep->GetTubeProperty()->SetColor(colors->GetColor3d("MistyRose").GetData());
sliderRep->GetCapProperty()->SetColor(colors->GetColor3d("Yellow").GetData());
sliderRep->SetSliderLength(0.05);
sliderRep->SetSliderWidth(0.025);
sliderRep->SetEndCapLength(0.02);
double nf = file_list.size();
sliderRep->GetPoint1Coordinate()->SetCoordinateSystemToNormalizedDisplay();
sliderRep->GetPoint1Coordinate()->SetValue(0.1, 0.1);
sliderRep->GetPoint2Coordinate()->SetCoordinateSystemToNormalizedDisplay();
sliderRep->GetPoint2Coordinate()->SetValue(0.9/nf, 0.1);
vtkSmartPointer<vtkSliderWidget> sliderWidget = vtkSmartPointer<vtkSliderWidget>::New();
sliderWidget->SetInteractor(iren);
sliderWidget->SetRepresentation(sliderRep);
sliderWidget->SetAnimationModeToAnimate();
sliderWidget->EnabledOn();
// Create the slider callback
vtkSmartPointer<SliderCallback> slidercallback = vtkSmartPointer<SliderCallback>::New();
slidercallback->fu_list = fu_list;
sliderWidget->AddObserver(vtkCommand::InteractionEvent, slidercallback);
int timerId = iren->CreateRepeatingTimer(300);
std::cout << "timerId: " << timerId << std::endl;
// Start the interaction and timer
iren->Start();
}
Grid_finalize();
return EXIT_SUCCESS;
}

113
visualisation/README Normal file
View File

@ -0,0 +1,113 @@
========================================
Visualisation of Grid / SciDAC format density fields using VTK (visualisation toolkit). Peter Boyle, 2025.
========================================
Uses:
https://vtk.org
Files are, for example, those produced by
Grid/HMC/ComputeWilsonFlow.cc
and
Grid/HMC/site_plaquette.cc
========================================
Prerequisites:
========================================
1) Install ffmpeg-7.0.2 (developer install, includes headers and libraries).
MacOS note: must install ffmpeg from source -- homebrew only installs binaries.
https://ffmpeg.org/download.html#releases
Note: the latest ffmpeg (7.1.1) broke software compatibility with VTK.
2) Build and install VTK-9.4.2, with FFMEG support enabled.
This is particularly involved on MacOS, so documented here.
cd VTK-9.4.2
mkdir build
cd build
ccmake ..
Using ccmake editor, set:
FFMPEG_DIR /usr/local
Toggle "advanced mode" (t)
Set:
CMAKE_EXE_LINKER_FLAGS
CMAKE_MODULE_LINKER_FLAGS
CMAKE_SHARED_LINKER_FLAGS
each to:
-framework Foundation -framework AudioToolbox -framework CoreAudio -liconv -lm -framework AVFoundation -framework CoreVideo -framework CoreMedia -framework CoreGraphics -framework AudioToolbox -framework OpenGL -framework OpenGL -framework VideoToolbox -framework CoreImage -framework AppKit -framework CoreFoundation -framework CoreServices -lz -lbz2 -Wl,-framework,CoreFoundation -Wl,-framework,Security -L/usr/local/lib -lavdevice -lavfilter -lavformat -lavcodec -lswresample -lswscale -lavutil
Set paths for each of
FFMPEG_DIR /usr/local
FFMPEG_avcodec_INCLUDE_DIR /usr/local/include
FFMPEG_avcodec_LIBRARY /usr/local/lib/libavcodec.a
FFMPEG_avdevice_INCLUDE_DIR /usr/local/include
FFMPEG_avdevice_LIBRARY /usr/local/lib/libavdevice.a
FFMPEG_avfilter_INCLUDE_DIR /usr/local/include
FFMPEG_avfilter_LIBRARY /usr/local/lib/libavfilter.a
FFMPEG_avformat_INCLUDE_DIR /usr/local/include
FFMPEG_avformat_LIBRARY /usr/local/lib/libavformat.a
FFMPEG_avresample_INCLUDE_DIR /usr/local/include
FFMPEG_avresample_LIBRARY /usr/local/lib/libavresample.a
FFMPEG_avutil_INCLUDE_DIR /usr/local/include
FFMPEG_avutil_LIBRARY /usr/local/lib/libavutil.a
FFMPEG_swresample_INCLUDE_DIR /usr/local/include
FFMPEG_swresample_LIBRARY /usr/local/lib/libswresample.a
FFMPEG_swscale_INCLUDE_DIR /usr/local/include
FFMPEG_swscale_LIBRARY /usr/local/lib/libswscale.a
VTK_MODULE_ENABLE_VTK_IOFFMPEG YES
VTK really should make it easier to pick up the flags required for FFMPEG linkage, especially as they are very quirky on MacOS.
========================================
Grid:
========================================
3) Build and install a version of Grid
4) Ensure "grid-config" is in your path.
5) cd Grid/visualisation/
libs=`grid-config --libs`
ldflags=`grid-config --ldflags`
cxxflags=`grid-config --cxxflags`
cxx=`grid-config --cxx`
mkdir build
cd build
LDFLAGS="$ldflags $libs " cmake .. -DCMAKE_CXX_COMPILER=$cxx -DCMAKE_CXX_FLAGS=$cxxflags
make
6) Invoke as:
FieldDensityAnimate --isosurface <float-from-0-to-5> --grid X.Y.Z.T --file1 SciDacDensityFile1 [--xlate] [--mpeg]
FieldDensityAnimate --isosurface <float-from-0-to-5> --grid X.Y.Z.T --file1 SciDacDensityFile1 --file2 SciDacDensityFile2 [--xlate] [--mpeg]
==================================
Extensions
==================================
7) Direct calling from Grid ?:
Not yet implemented, but could develop sufficient interface to write a Lattice scalar field into MPEG direct from running code.
8) Example python code: FieldDensity.py . This is not interfaced to Grid.

Binary file not shown.

View File

@ -0,0 +1,9 @@
libs=`grid-config --libs`
ldflags=`grid-config --ldflags`
cxxflags=`grid-config --cxxflags`
cxx=`grid-config --cxx`
mkdir build
cd build
LDFLAGS="$ldflags $libs " cmake .. -DCMAKE_CXX_COMPILER=$cxx -DCMAKE_CXX_FLAGS=$cxxflags