mirror of
https://github.com/paboyle/Grid.git
synced 2025-06-13 04:37:05 +01:00
Compare commits
108 Commits
feature/de
...
21de6f7da8
Author | SHA1 | Date | |
---|---|---|---|
21de6f7da8 | |||
dbe39f9ce0 | |||
ab3de50d5e | |||
c545bd2139 | |||
6a1c64fbdd | |||
b75809ed61 | |||
ecaf228e5c | |||
6d015ae8fc | |||
233150d93f | |||
7af8c77a52 | |||
a957e7bfa1 | |||
cee4c8ce8c | |||
96bf814d8c | |||
7ddc422788 | |||
e652fc2825 | |||
a49fa3f8d0 | |||
cd452a2f91 | |||
4f89f603ae | |||
11dc2c5e1d | |||
6fec3c15ca | |||
938c47480f | |||
3811d19298 | |||
83a3ab6b6f | |||
d66a9af6a3 | |||
adc90d3a86 | |||
ebbd015c5c | |||
4ab73b36b2 | |||
130e07a422 | |||
8f47bb367e | |||
0c3cb60135 | |||
9eae8fca5d | |||
882a217074 | |||
e465fce201 | |||
d41542c64b | |||
199818bd6c | |||
fe66c7ca30 | |||
e9177e4af3 | |||
d15a6c5933 | |||
25ab9325e7 | |||
19f9378b98 | |||
785bc7a14f | |||
1a1fe85428 | |||
0000d2e558 | |||
9ffd1ed4ce | |||
3d014864e2 | |||
1d22841811 | |||
a1cdda833f | |||
ad6db92690 | |||
e8ff9d8e50 | |||
795769c636 | |||
267a39d943 | |||
3624bd3d22 | |||
bc12dbbb38 | |||
eb8a008a8f | |||
c4d9aa1a21 | |||
6ae809ed40 | |||
311e2aab3f | |||
438dfbdb83 | |||
b2ce760cf4 | |||
b1ba209696 | |||
cb3e529b1e | |||
717f647418 | |||
98e7418187 | |||
fe05bf48b1 | |||
d2dd8f54e2 | |||
7726ee4b16 | |||
ba9bbe0221 | |||
4c3dd82d84 | |||
44e911b5b7 | |||
a7a16df9d0 | |||
382e0abefd | |||
6fdefe5b90 | |||
4788dd8e2e | |||
1cc5f221f3 | |||
93251bfba0 | |||
18b79508b8 | |||
4de5ed1613 | |||
0baaddbe98 | |||
8729c46169 | |||
09f81fe7c3 | |||
1876e5b7c0 | |||
355ec76257 | |||
b50fb34e71 | |||
de84d730ff | |||
c74d11e3d7 | |||
84cab5e6e7 | |||
c4fc972fec | |||
4f17c8d081 | |||
aaab753982 | |||
570b72a47b | |||
a5798a89ed | |||
3f3661a86f | |||
f7e2f9a401 | |||
2848a9b558 | |||
d4868991af | |||
e99d42404e | |||
3ba019c747 | |||
47429218bb | |||
5a4f9bf2e3 | |||
f617468e04 | |||
ee4046fe92 | |||
2a9cfeb9ea | |||
1147b8ea40 | |||
3f9119b39d | |||
35e8225abd | |||
bdbfbb7a14 | |||
f7d4be8d96 | |||
8d305df0db |
@ -191,7 +191,7 @@ public:
|
||||
|
||||
Lattice<sobj> pgbuf(&pencil_g);
|
||||
autoView(pgbuf_v , pgbuf, CpuWrite);
|
||||
std::cout << "CPU view" << std::endl;
|
||||
//std::cout << "CPU view" << std::endl;
|
||||
|
||||
typedef typename FFTW<scalar>::FFTW_scalar FFTW_scalar;
|
||||
typedef typename FFTW<scalar>::FFTW_plan FFTW_plan;
|
||||
@ -215,7 +215,7 @@ public:
|
||||
else if ( sign == forward ) div = 1.0;
|
||||
else assert(0);
|
||||
|
||||
std::cout << GridLogPerformance<<"Making FFTW plan" << std::endl;
|
||||
//std::cout << GridLogPerformance<<"Making FFTW plan" << std::endl;
|
||||
FFTW_plan p;
|
||||
{
|
||||
FFTW_scalar *in = (FFTW_scalar *)&pgbuf_v[0];
|
||||
@ -229,7 +229,7 @@ public:
|
||||
}
|
||||
|
||||
// Barrel shift and collect global pencil
|
||||
std::cout << GridLogPerformance<<"Making pencil" << std::endl;
|
||||
//std::cout << GridLogPerformance<<"Making pencil" << std::endl;
|
||||
Coordinate lcoor(Nd), gcoor(Nd);
|
||||
result = source;
|
||||
int pc = processor_coor[dim];
|
||||
@ -251,7 +251,7 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
std::cout <<GridLogPerformance<< "Looping orthog" << std::endl;
|
||||
//std::cout <<GridLogPerformance<< "Looping orthog" << std::endl;
|
||||
// Loop over orthog coords
|
||||
int NN=pencil_g.lSites();
|
||||
GridStopWatch timer;
|
||||
@ -274,7 +274,7 @@ public:
|
||||
usec += timer.useconds();
|
||||
flops+= flops_call*NN;
|
||||
|
||||
std::cout <<GridLogPerformance<< "Writing back results " << std::endl;
|
||||
//std::cout <<GridLogPerformance<< "Writing back results " << std::endl;
|
||||
// writing out result
|
||||
{
|
||||
autoView(pgbuf_v,pgbuf,CpuRead);
|
||||
@ -291,7 +291,7 @@ public:
|
||||
}
|
||||
result = result*div;
|
||||
|
||||
std::cout <<GridLogPerformance<< "Destroying plan " << std::endl;
|
||||
//std::cout <<GridLogPerformance<< "Destroying plan " << std::endl;
|
||||
// destroying plan
|
||||
FFTW<scalar>::fftw_destroy_plan(p);
|
||||
#endif
|
||||
|
@ -277,6 +277,38 @@ public:
|
||||
assert(0);
|
||||
}
|
||||
};
|
||||
template<class Matrix,class Field>
|
||||
class ShiftedNonHermitianLinearOperator : public LinearOperatorBase<Field> {
|
||||
Matrix &_Mat;
|
||||
RealD shift;
|
||||
public:
|
||||
ShiftedNonHermitianLinearOperator(Matrix &Mat,RealD shft): _Mat(Mat),shift(shft){};
|
||||
// Support for coarsening to a multigrid
|
||||
void OpDiag (const Field &in, Field &out) {
|
||||
_Mat.Mdiag(in,out);
|
||||
out = out + shift*in;
|
||||
}
|
||||
void OpDir (const Field &in, Field &out,int dir,int disp) {
|
||||
_Mat.Mdir(in,out,dir,disp);
|
||||
}
|
||||
void OpDirAll (const Field &in, std::vector<Field> &out){
|
||||
_Mat.MdirAll(in,out);
|
||||
};
|
||||
void Op (const Field &in, Field &out){
|
||||
_Mat.M(in,out);
|
||||
out = out + shift * in;
|
||||
}
|
||||
void AdjOp (const Field &in, Field &out){
|
||||
_Mat.Mdag(in,out);
|
||||
out = out + shift * in;
|
||||
}
|
||||
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
|
||||
assert(0);
|
||||
}
|
||||
void HermOp(const Field &in, Field &out){
|
||||
assert(0);
|
||||
}
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////
|
||||
// Even Odd Schur decomp operators; there are several
|
||||
|
@ -208,8 +208,8 @@ public:
|
||||
assert(Bkn.size()==batchCount);
|
||||
assert(Cmn.size()==batchCount);
|
||||
|
||||
assert(OpA!=GridBLAS_OP_T); // Complex case expect no transpose
|
||||
assert(OpB!=GridBLAS_OP_T);
|
||||
//assert(OpA!=GridBLAS_OP_T); // Complex case expect no transpose
|
||||
//assert(OpB!=GridBLAS_OP_T);
|
||||
|
||||
int lda = m; // m x k column major
|
||||
int ldb = k; // k x n column major
|
||||
@ -367,28 +367,67 @@ public:
|
||||
Eigen::Map<Eigen::MatrixXcd> eAmk(Amk[p],m,k);
|
||||
Eigen::Map<Eigen::MatrixXcd> eBkn(Bkn[p],k,n);
|
||||
Eigen::Map<Eigen::MatrixXcd> eCmn(Cmn[p],m,n);
|
||||
eCmn = beta * eCmn + alpha * eAmk * eBkn ;
|
||||
if (std::abs(beta) != 0.0)
|
||||
eCmn = beta * eCmn + alpha * eAmk * eBkn ;
|
||||
else
|
||||
eCmn = alpha * eAmk * eBkn ;
|
||||
});
|
||||
} else if ( (OpA == GridBLAS_OP_C ) && (OpB == GridBLAS_OP_N) ) {
|
||||
thread_for (p, batchCount, {
|
||||
Eigen::Map<Eigen::MatrixXcd> eAmk(Amk[p],k,m);
|
||||
Eigen::Map<Eigen::MatrixXcd> eBkn(Bkn[p],k,n);
|
||||
Eigen::Map<Eigen::MatrixXcd> eCmn(Cmn[p],m,n);
|
||||
eCmn = beta * eCmn + alpha * eAmk.adjoint() * eBkn ;
|
||||
if (std::abs(beta) != 0.0)
|
||||
eCmn = beta * eCmn + alpha * eAmk.adjoint() * eBkn ;
|
||||
else
|
||||
eCmn = alpha * eAmk.adjoint() * eBkn ;
|
||||
});
|
||||
} else if ( (OpA == GridBLAS_OP_T ) && (OpB == GridBLAS_OP_N) ) {
|
||||
thread_for (p, batchCount, {
|
||||
Eigen::Map<Eigen::MatrixXcd> eAmk(Amk[p],k,m);
|
||||
Eigen::Map<Eigen::MatrixXcd> eBkn(Bkn[p],k,n);
|
||||
Eigen::Map<Eigen::MatrixXcd> eCmn(Cmn[p],m,n);
|
||||
if (std::abs(beta) != 0.0)
|
||||
eCmn = beta * eCmn + alpha * eAmk.transpose() * eBkn ;
|
||||
else
|
||||
eCmn = alpha * eAmk.transpose() * eBkn ;
|
||||
});
|
||||
} else if ( (OpA == GridBLAS_OP_N ) && (OpB == GridBLAS_OP_C) ) {
|
||||
thread_for (p, batchCount, {
|
||||
Eigen::Map<Eigen::MatrixXcd> eAmk(Amk[p],m,k);
|
||||
Eigen::Map<Eigen::MatrixXcd> eBkn(Bkn[p],n,k);
|
||||
Eigen::Map<Eigen::MatrixXcd> eCmn(Cmn[p],m,n);
|
||||
eCmn = beta * eCmn + alpha * eAmk * eBkn.adjoint() ;
|
||||
if (std::abs(beta) != 0.0)
|
||||
eCmn = beta * eCmn + alpha * eAmk * eBkn.adjoint() ;
|
||||
else
|
||||
eCmn = alpha * eAmk * eBkn.adjoint() ;
|
||||
});
|
||||
} else if ( (OpA == GridBLAS_OP_N ) && (OpB == GridBLAS_OP_T) ) {
|
||||
thread_for (p, batchCount, {
|
||||
Eigen::Map<Eigen::MatrixXcd> eAmk(Amk[p],m,k);
|
||||
Eigen::Map<Eigen::MatrixXcd> eBkn(Bkn[p],n,k);
|
||||
Eigen::Map<Eigen::MatrixXcd> eCmn(Cmn[p],m,n);
|
||||
eCmn = beta * eCmn + alpha * eAmk * eBkn.transpose() ;
|
||||
});
|
||||
} else if ( (OpA == GridBLAS_OP_C ) && (OpB == GridBLAS_OP_C) ) {
|
||||
thread_for (p, batchCount, {
|
||||
Eigen::Map<Eigen::MatrixXcd> eAmk(Amk[p],k,m);
|
||||
Eigen::Map<Eigen::MatrixXcd> eBkn(Bkn[p],n,k);
|
||||
Eigen::Map<Eigen::MatrixXcd> eCmn(Cmn[p],m,n);
|
||||
eCmn = beta * eCmn + alpha * eAmk.adjoint() * eBkn.adjoint() ;
|
||||
if (std::abs(beta) != 0.0)
|
||||
eCmn = beta * eCmn + alpha * eAmk.adjoint() * eBkn.adjoint() ;
|
||||
else
|
||||
eCmn = alpha * eAmk.adjoint() * eBkn.adjoint() ;
|
||||
} );
|
||||
} else if ( (OpA == GridBLAS_OP_T ) && (OpB == GridBLAS_OP_T) ) {
|
||||
thread_for (p, batchCount, {
|
||||
Eigen::Map<Eigen::MatrixXcd> eAmk(Amk[p],k,m);
|
||||
Eigen::Map<Eigen::MatrixXcd> eBkn(Bkn[p],n,k);
|
||||
Eigen::Map<Eigen::MatrixXcd> eCmn(Cmn[p],m,n);
|
||||
if (std::abs(beta) != 0.0)
|
||||
eCmn = beta * eCmn + alpha * eAmk.transpose() * eBkn.transpose() ;
|
||||
else
|
||||
eCmn = alpha * eAmk.transpose() * eBkn.transpose() ;
|
||||
} );
|
||||
} else {
|
||||
assert(0);
|
||||
@ -414,8 +453,8 @@ public:
|
||||
RealD t2=usecond();
|
||||
int32_t batchCount = Amk.size();
|
||||
|
||||
assert(OpA!=GridBLAS_OP_T); // Complex case expect no transpose
|
||||
assert(OpB!=GridBLAS_OP_T);
|
||||
//assert(OpA!=GridBLAS_OP_T); // Complex case expect no transpose
|
||||
//assert(OpB!=GridBLAS_OP_T);
|
||||
|
||||
int lda = m; // m x k column major
|
||||
int ldb = k; // k x n column major
|
||||
@ -514,28 +553,70 @@ public:
|
||||
Eigen::Map<Eigen::MatrixXcf> eAmk(Amk[p],m,k);
|
||||
Eigen::Map<Eigen::MatrixXcf> eBkn(Bkn[p],k,n);
|
||||
Eigen::Map<Eigen::MatrixXcf> eCmn(Cmn[p],m,n);
|
||||
eCmn = beta * eCmn + alpha * eAmk * eBkn ;
|
||||
if (std::abs(beta) != 0.0)
|
||||
eCmn = beta * eCmn + alpha * eAmk * eBkn ;
|
||||
else
|
||||
eCmn = alpha * eAmk * eBkn ;
|
||||
});
|
||||
} else if ( (OpA == GridBLAS_OP_C ) && (OpB == GridBLAS_OP_N) ) {
|
||||
thread_for (p, batchCount, {
|
||||
Eigen::Map<Eigen::MatrixXcf> eAmk(Amk[p],k,m);
|
||||
Eigen::Map<Eigen::MatrixXcf> eBkn(Bkn[p],k,n);
|
||||
Eigen::Map<Eigen::MatrixXcf> eCmn(Cmn[p],m,n);
|
||||
eCmn = beta * eCmn + alpha * eAmk.adjoint() * eBkn ;
|
||||
if (std::abs(beta) != 0.0)
|
||||
eCmn = beta * eCmn + alpha * eAmk.adjoint() * eBkn ;
|
||||
else
|
||||
eCmn = alpha * eAmk.adjoint() * eBkn ;
|
||||
});
|
||||
} else if ( (OpA == GridBLAS_OP_T ) && (OpB == GridBLAS_OP_N) ) {
|
||||
thread_for (p, batchCount, {
|
||||
Eigen::Map<Eigen::MatrixXcf> eAmk(Amk[p],k,m);
|
||||
Eigen::Map<Eigen::MatrixXcf> eBkn(Bkn[p],k,n);
|
||||
Eigen::Map<Eigen::MatrixXcf> eCmn(Cmn[p],m,n);
|
||||
if (std::abs(beta) != 0.0)
|
||||
eCmn = beta * eCmn + alpha * eAmk.transpose() * eBkn ;
|
||||
else
|
||||
eCmn = alpha * eAmk.transpose() * eBkn ;
|
||||
});
|
||||
} else if ( (OpA == GridBLAS_OP_N ) && (OpB == GridBLAS_OP_C) ) {
|
||||
thread_for (p, batchCount, {
|
||||
Eigen::Map<Eigen::MatrixXcf> eAmk(Amk[p],m,k);
|
||||
Eigen::Map<Eigen::MatrixXcf> eBkn(Bkn[p],n,k);
|
||||
Eigen::Map<Eigen::MatrixXcf> eCmn(Cmn[p],m,n);
|
||||
eCmn = beta * eCmn + alpha * eAmk * eBkn.adjoint() ;
|
||||
if (std::abs(beta) != 0.0)
|
||||
eCmn = beta * eCmn + alpha * eAmk * eBkn.adjoint() ;
|
||||
else
|
||||
eCmn = alpha * eAmk * eBkn.adjoint() ;
|
||||
});
|
||||
} else if ( (OpA == GridBLAS_OP_N ) && (OpB == GridBLAS_OP_T) ) {
|
||||
thread_for (p, batchCount, {
|
||||
Eigen::Map<Eigen::MatrixXcf> eAmk(Amk[p],m,k);
|
||||
Eigen::Map<Eigen::MatrixXcf> eBkn(Bkn[p],n,k);
|
||||
Eigen::Map<Eigen::MatrixXcf> eCmn(Cmn[p],m,n);
|
||||
if (std::abs(beta) != 0.0)
|
||||
eCmn = beta * eCmn + alpha * eAmk * eBkn.transpose() ;
|
||||
else
|
||||
eCmn = alpha * eAmk * eBkn.transpose() ;
|
||||
});
|
||||
} else if ( (OpA == GridBLAS_OP_C ) && (OpB == GridBLAS_OP_C) ) {
|
||||
thread_for (p, batchCount, {
|
||||
Eigen::Map<Eigen::MatrixXcf> eAmk(Amk[p],k,m);
|
||||
Eigen::Map<Eigen::MatrixXcf> eBkn(Bkn[p],n,k);
|
||||
Eigen::Map<Eigen::MatrixXcf> eCmn(Cmn[p],m,n);
|
||||
eCmn = beta * eCmn + alpha * eAmk.adjoint() * eBkn.adjoint() ;
|
||||
if (std::abs(beta) != 0.0)
|
||||
eCmn = beta * eCmn + alpha * eAmk.adjoint() * eBkn.adjoint() ;
|
||||
else
|
||||
eCmn = alpha * eAmk.adjoint() * eBkn.adjoint() ;
|
||||
} );
|
||||
} else if ( (OpA == GridBLAS_OP_T ) && (OpB == GridBLAS_OP_T) ) {
|
||||
thread_for (p, batchCount, {
|
||||
Eigen::Map<Eigen::MatrixXcf> eAmk(Amk[p],k,m);
|
||||
Eigen::Map<Eigen::MatrixXcf> eBkn(Bkn[p],n,k);
|
||||
Eigen::Map<Eigen::MatrixXcf> eCmn(Cmn[p],m,n);
|
||||
if (std::abs(beta) != 0.0)
|
||||
eCmn = beta * eCmn + alpha * eAmk.transpose() * eBkn.transpose() ;
|
||||
else
|
||||
eCmn = alpha * eAmk.transpose() * eBkn.transpose() ;
|
||||
} );
|
||||
} else {
|
||||
assert(0);
|
||||
@ -661,29 +742,41 @@ public:
|
||||
Eigen::Map<Eigen::MatrixXf> eAmk(Amk[p],m,k);
|
||||
Eigen::Map<Eigen::MatrixXf> eBkn(Bkn[p],k,n);
|
||||
Eigen::Map<Eigen::MatrixXf> eCmn(Cmn[p],m,n);
|
||||
eCmn = beta * eCmn + alpha * eAmk * eBkn ;
|
||||
if (std::abs(beta) != 0.0)
|
||||
eCmn = beta * eCmn + alpha * eAmk * eBkn ;
|
||||
else
|
||||
eCmn = alpha * eAmk * eBkn ;
|
||||
});
|
||||
} else if ( (OpA == GridBLAS_OP_T ) && (OpB == GridBLAS_OP_N) ) {
|
||||
thread_for (p, batchCount, {
|
||||
Eigen::Map<Eigen::MatrixXf> eAmk(Amk[p],k,m);
|
||||
Eigen::Map<Eigen::MatrixXf> eBkn(Bkn[p],k,n);
|
||||
Eigen::Map<Eigen::MatrixXf> eCmn(Cmn[p],m,n);
|
||||
eCmn = beta * eCmn + alpha * eAmk.transpose() * eBkn ;
|
||||
if (std::abs(beta) != 0.0)
|
||||
eCmn = beta * eCmn + alpha * eAmk.transpose() * eBkn ;
|
||||
else
|
||||
eCmn = alpha * eAmk.transpose() * eBkn ;
|
||||
});
|
||||
} else if ( (OpA == GridBLAS_OP_N ) && (OpB == GridBLAS_OP_T) ) {
|
||||
thread_for (p, batchCount, {
|
||||
Eigen::Map<Eigen::MatrixXf> eAmk(Amk[p],m,k);
|
||||
Eigen::Map<Eigen::MatrixXf> eBkn(Bkn[p],n,k);
|
||||
Eigen::Map<Eigen::MatrixXf> eCmn(Cmn[p],m,n);
|
||||
eCmn = beta * eCmn + alpha * eAmk * eBkn.transpose() ;
|
||||
if (std::abs(beta) != 0.0)
|
||||
eCmn = beta * eCmn + alpha * eAmk * eBkn.transpose() ;
|
||||
else
|
||||
eCmn = alpha * eAmk * eBkn.transpose() ;
|
||||
});
|
||||
} else if ( (OpA == GridBLAS_OP_T ) && (OpB == GridBLAS_OP_T) ) {
|
||||
thread_for (p, batchCount, {
|
||||
Eigen::Map<Eigen::MatrixXf> eAmk(Amk[p],k,m);
|
||||
Eigen::Map<Eigen::MatrixXf> eBkn(Bkn[p],n,k);
|
||||
Eigen::Map<Eigen::MatrixXf> eCmn(Cmn[p],m,n);
|
||||
eCmn = beta * eCmn + alpha * eAmk.transpose() * eBkn.transpose() ;
|
||||
} );
|
||||
if (std::abs(beta) != 0.0)
|
||||
eCmn = beta * eCmn + alpha * eAmk.transpose() * eBkn.transpose() ;
|
||||
else
|
||||
eCmn = alpha * eAmk.transpose() * eBkn.transpose() ;
|
||||
});
|
||||
} else {
|
||||
assert(0);
|
||||
}
|
||||
@ -809,28 +902,40 @@ public:
|
||||
Eigen::Map<Eigen::MatrixXd> eAmk(Amk[p],m,k);
|
||||
Eigen::Map<Eigen::MatrixXd> eBkn(Bkn[p],k,n);
|
||||
Eigen::Map<Eigen::MatrixXd> eCmn(Cmn[p],m,n);
|
||||
eCmn = beta * eCmn + alpha * eAmk * eBkn ;
|
||||
if (std::abs(beta) != 0.0)
|
||||
eCmn = beta * eCmn + alpha * eAmk * eBkn ;
|
||||
else
|
||||
eCmn = alpha * eAmk * eBkn ;
|
||||
});
|
||||
} else if ( (OpA == GridBLAS_OP_T ) && (OpB == GridBLAS_OP_N) ) {
|
||||
thread_for (p, batchCount, {
|
||||
Eigen::Map<Eigen::MatrixXd> eAmk(Amk[p],k,m);
|
||||
Eigen::Map<Eigen::MatrixXd> eBkn(Bkn[p],k,n);
|
||||
Eigen::Map<Eigen::MatrixXd> eCmn(Cmn[p],m,n);
|
||||
eCmn = beta * eCmn + alpha * eAmk.transpose() * eBkn ;
|
||||
if (std::abs(beta) != 0.0)
|
||||
eCmn = beta * eCmn + alpha * eAmk.transpose() * eBkn ;
|
||||
else
|
||||
eCmn = alpha * eAmk.transpose() * eBkn ;
|
||||
});
|
||||
} else if ( (OpA == GridBLAS_OP_N ) && (OpB == GridBLAS_OP_T) ) {
|
||||
thread_for (p, batchCount, {
|
||||
Eigen::Map<Eigen::MatrixXd> eAmk(Amk[p],m,k);
|
||||
Eigen::Map<Eigen::MatrixXd> eBkn(Bkn[p],n,k);
|
||||
Eigen::Map<Eigen::MatrixXd> eCmn(Cmn[p],m,n);
|
||||
eCmn = beta * eCmn + alpha * eAmk * eBkn.transpose() ;
|
||||
if (std::abs(beta) != 0.0)
|
||||
eCmn = beta * eCmn + alpha * eAmk * eBkn.transpose() ;
|
||||
else
|
||||
eCmn = alpha * eAmk * eBkn.transpose() ;
|
||||
});
|
||||
} else if ( (OpA == GridBLAS_OP_T ) && (OpB == GridBLAS_OP_T) ) {
|
||||
thread_for (p, batchCount, {
|
||||
Eigen::Map<Eigen::MatrixXd> eAmk(Amk[p],k,m);
|
||||
Eigen::Map<Eigen::MatrixXd> eBkn(Bkn[p],n,k);
|
||||
Eigen::Map<Eigen::MatrixXd> eCmn(Cmn[p],m,n);
|
||||
eCmn = beta * eCmn + alpha * eAmk.transpose() * eBkn.transpose() ;
|
||||
if (std::abs(beta) != 0.0)
|
||||
eCmn = beta * eCmn + alpha * eAmk.transpose() * eBkn.transpose() ;
|
||||
else
|
||||
eCmn = alpha * eAmk.transpose() * eBkn.transpose() ;
|
||||
});
|
||||
} else {
|
||||
assert(0);
|
||||
|
@ -144,11 +144,11 @@ public:
|
||||
acceleratorCopyDeviceToDevice(&BLAS_Y[offset],&y_v[0],sizeof(scalar_object)*vol);
|
||||
}
|
||||
RealD t4 = usecond();
|
||||
std::cout << "MulMatrix alloc took "<< t1-t0<<" us"<<std::endl;
|
||||
std::cout << "MulMatrix preamble took "<< t2-t1<<" us"<<std::endl;
|
||||
std::cout << "MulMatrix blas took "<< t3-t2<<" us"<<std::endl;
|
||||
std::cout << "MulMatrix copy took "<< t4-t3<<" us"<<std::endl;
|
||||
std::cout << "MulMatrix total "<< t4-t0<<" us"<<std::endl;
|
||||
std::cout <<GridLogPerformance << "MulMatrix alloc took "<< t1-t0<<" us"<<std::endl;
|
||||
std::cout <<GridLogPerformance<< "MulMatrix preamble took "<< t2-t1<<" us"<<std::endl;
|
||||
std::cout <<GridLogPerformance<< "MulMatrix blas took "<< t3-t2<<" us"<<std::endl;
|
||||
std::cout <<GridLogPerformance<< "MulMatrix copy took "<< t4-t3<<" us"<<std::endl;
|
||||
std::cout <<GridLogPerformance<< "MulMatrix total "<< t4-t0<<" us"<<std::endl;
|
||||
}
|
||||
|
||||
void InnerProductMatrix(Eigen::MatrixXcd &m , const std::vector<Field> &X, const std::vector<Field> &Y)
|
||||
@ -242,16 +242,16 @@ public:
|
||||
RealD flops = 8.0*M*N*K;
|
||||
flops = flops/(t4-t3)/1.e3;
|
||||
bytes = bytes/(t4-t3)/1.e3;
|
||||
std::cout << "InnerProductMatrix m,n,k "<< M<<","<<N<<","<<K<<std::endl;
|
||||
std::cout << "InnerProductMatrix alloc t1 "<< t1-t0<<" us"<<std::endl;
|
||||
std::cout << "InnerProductMatrix cp t2 "<< t2-t1<<" us"<<std::endl;
|
||||
std::cout << "InnerProductMatrix setup t3 "<< t3-t2<<" us"<<std::endl;
|
||||
std::cout << "InnerProductMatrix blas t4 "<< t4-t3<<" us"<<std::endl;
|
||||
std::cout << "InnerProductMatrix blas "<< flops<<" GF/s"<<std::endl;
|
||||
std::cout << "InnerProductMatrix blas "<< bytes<<" GB/s"<<std::endl;
|
||||
std::cout << "InnerProductMatrix gsum t5 "<< t5-t4<<" us"<<std::endl;
|
||||
std::cout << "InnerProductMatrix cp t6 "<< t6-t5<<" us"<<std::endl;
|
||||
std::cout << "InnerProductMatrix took "<< t6-t0<<" us"<<std::endl;
|
||||
std::cout <<GridLogPerformance<< "InnerProductMatrix m,n,k "<< M<<","<<N<<","<<K<<std::endl;
|
||||
std::cout <<GridLogPerformance<< "InnerProductMatrix alloc t1 "<< t1-t0<<" us"<<std::endl;
|
||||
std::cout <<GridLogPerformance<< "InnerProductMatrix cp t2 "<< t2-t1<<" us"<<std::endl;
|
||||
std::cout <<GridLogPerformance<< "InnerProductMatrix setup t3 "<< t3-t2<<" us"<<std::endl;
|
||||
std::cout <<GridLogPerformance<< "InnerProductMatrix blas t4 "<< t4-t3<<" us"<<std::endl;
|
||||
std::cout <<GridLogPerformance<< "InnerProductMatrix blas "<< flops<<" GF/s"<<std::endl;
|
||||
std::cout <<GridLogPerformance<< "InnerProductMatrix blas "<< bytes<<" GB/s"<<std::endl;
|
||||
std::cout <<GridLogPerformance<< "InnerProductMatrix gsum t5 "<< t5-t4<<" us"<<std::endl;
|
||||
std::cout <<GridLogPerformance<< "InnerProductMatrix cp t6 "<< t6-t5<<" us"<<std::endl;
|
||||
std::cout <<GridLogPerformance<< "InnerProductMatrix took "<< t6-t0<<" us"<<std::endl;
|
||||
#else
|
||||
int nrhs;
|
||||
GridBase *grid;
|
||||
@ -358,17 +358,17 @@ public:
|
||||
flops = flops/(t4-t3)/1.e3;
|
||||
bytes = bytes/(t4-t3)/1.e3;
|
||||
xybytes = 4*xybytes/(t2-t1)/1.e3;
|
||||
std::cout << "InnerProductMatrix m,n,k "<< M<<","<<N<<","<<K<<std::endl;
|
||||
std::cout << "InnerProductMatrix alloc t1 "<< t1-t0<<" us"<<std::endl;
|
||||
std::cout << "InnerProductMatrix cp t2 "<< t2-t1<<" us "<<xybytes<<" GB/s"<<std::endl;
|
||||
std::cout << "InnerProductMatrix setup t3 "<< t3-t2<<" us"<<std::endl;
|
||||
std::cout << "InnerProductMatrix blas t4 "<< t4-t3<<" us"<<std::endl;
|
||||
std::cout << "InnerProductMatrix blas "<< flops<<" GF/s"<<std::endl;
|
||||
std::cout << "InnerProductMatrix blas "<< bytes<<" GB/s"<<std::endl;
|
||||
std::cout << "InnerProductMatrix cp t5 "<< t5-t4<<" us"<<std::endl;
|
||||
std::cout << "InnerProductMatrix lsum t6l "<< t6l-t5<<" us"<<std::endl;
|
||||
std::cout << "InnerProductMatrix gsum t6 "<< t6-t6l<<" us"<<std::endl;
|
||||
std::cout << "InnerProductMatrix took "<< t6-t0<<" us"<<std::endl;
|
||||
std::cout <<GridLogPerformance<< "InnerProductMatrix m,n,k "<< M<<","<<N<<","<<K<<std::endl;
|
||||
std::cout <<GridLogPerformance<< "InnerProductMatrix alloc t1 "<< t1-t0<<" us"<<std::endl;
|
||||
std::cout <<GridLogPerformance<< "InnerProductMatrix cp t2 "<< t2-t1<<" us "<<xybytes<<" GB/s"<<std::endl;
|
||||
std::cout <<GridLogPerformance<< "InnerProductMatrix setup t3 "<< t3-t2<<" us"<<std::endl;
|
||||
std::cout <<GridLogPerformance<< "InnerProductMatrix blas t4 "<< t4-t3<<" us"<<std::endl;
|
||||
std::cout <<GridLogPerformance<< "InnerProductMatrix blas "<< flops<<" GF/s"<<std::endl;
|
||||
std::cout <<GridLogPerformance<< "InnerProductMatrix blas "<< bytes<<" GB/s"<<std::endl;
|
||||
std::cout <<GridLogPerformance<< "InnerProductMatrix cp t5 "<< t5-t4<<" us"<<std::endl;
|
||||
std::cout <<GridLogPerformance<< "InnerProductMatrix lsum t6l "<< t6l-t5<<" us"<<std::endl;
|
||||
std::cout <<GridLogPerformance<< "InnerProductMatrix gsum t6 "<< t6-t6l<<" us"<<std::endl;
|
||||
std::cout <<GridLogPerformance<< "InnerProductMatrix took "<< t6-t0<<" us"<<std::endl;
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
@ -63,7 +63,12 @@ class TwoLevelCGmrhs
|
||||
GridStopWatch SmoothTimer;
|
||||
GridStopWatch InsertTimer;
|
||||
|
||||
|
||||
/*
|
||||
Field rrr;
|
||||
Field sss;
|
||||
Field qqq;
|
||||
Field zzz;
|
||||
*/
|
||||
// more most opertor functions
|
||||
TwoLevelCGmrhs(RealD tol,
|
||||
Integer maxit,
|
||||
@ -74,6 +79,12 @@ class TwoLevelCGmrhs
|
||||
MaxIterations(maxit),
|
||||
_FineLinop(FineLinop),
|
||||
_Smoother(Smoother)
|
||||
/*
|
||||
rrr(fine),
|
||||
sss(fine),
|
||||
qqq(fine),
|
||||
zzz(fine)
|
||||
*/
|
||||
{
|
||||
grid = fine;
|
||||
};
|
||||
@ -81,8 +92,8 @@ class TwoLevelCGmrhs
|
||||
// Vector case
|
||||
virtual void operator() (std::vector<Field> &src, std::vector<Field> &x)
|
||||
{
|
||||
SolveSingleSystem(src,x);
|
||||
// SolvePrecBlockCG(src,x);
|
||||
// SolveSingleSystem(src,x);
|
||||
SolvePrecBlockCG(src,x);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
@ -657,6 +668,8 @@ public:
|
||||
CoarseField PleftProjMrhs(this->coarsegridmrhs);
|
||||
CoarseField PleftMss_projMrhs(this->coarsegridmrhs);
|
||||
|
||||
// this->rrr=in[0];
|
||||
|
||||
#undef SMOOTHER_BLOCK_SOLVE
|
||||
#if SMOOTHER_BLOCK_SOLVE
|
||||
this->SmoothTimer.Start();
|
||||
@ -669,6 +682,7 @@ public:
|
||||
this->SmoothTimer.Stop();
|
||||
}
|
||||
#endif
|
||||
// this->sss=Min[0];
|
||||
|
||||
for(int rhs=0;rhs<nrhs;rhs++) {
|
||||
|
||||
@ -705,9 +719,11 @@ public:
|
||||
this->_Projector.blockPromote(tmp,PleftMss_proj);// tmp= Q[in - A Min]
|
||||
this->PromoteTimer.Stop();
|
||||
this->FineTimer.Start();
|
||||
// this->qqq=tmp[0];
|
||||
for(int rhs=0;rhs<nrhs;rhs++) {
|
||||
axpy(out[rhs],1.0,Min[rhs],tmp[rhs]); // Min+tmp
|
||||
}
|
||||
// this->zzz=out[0];
|
||||
this->FineTimer.Stop();
|
||||
}
|
||||
};
|
||||
|
@ -245,9 +245,10 @@ until convergence
|
||||
_HermOp(src_n,tmp);
|
||||
// std::cout << GridLogMessage<< tmp<<std::endl; exit(0);
|
||||
// std::cout << GridLogIRL << " _HermOp " << norm2(tmp) << std::endl;
|
||||
RealD vnum = real(innerProduct(src_n,tmp)); // HermOp.
|
||||
// RealD vnum = real(innerProduct(src_n,tmp)); // HermOp.
|
||||
RealD vnum = real(innerProduct(tmp,tmp)); // HermOp^2.
|
||||
RealD vden = norm2(src_n);
|
||||
RealD na = vnum/vden;
|
||||
RealD na = std::sqrt(vnum/vden);
|
||||
if (fabs(evalMaxApprox/na - 1.0) < 0.0001)
|
||||
i=_MAX_ITER_IRL_MEVAPP_;
|
||||
evalMaxApprox = na;
|
||||
@ -255,6 +256,7 @@ until convergence
|
||||
src_n = tmp;
|
||||
}
|
||||
}
|
||||
std::cout << GridLogIRL << " Final evalMaxApprox " << evalMaxApprox << std::endl;
|
||||
|
||||
std::vector<RealD> lme(Nm);
|
||||
std::vector<RealD> lme2(Nm);
|
||||
|
@ -74,7 +74,7 @@ public:
|
||||
|
||||
void operator() (const Field &src, Field &psi){
|
||||
|
||||
psi=Zero();
|
||||
// psi=Zero();
|
||||
RealD cp, ssq,rsq;
|
||||
ssq=norm2(src);
|
||||
rsq=Tolerance*Tolerance*ssq;
|
||||
|
@ -30,6 +30,8 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
/* END LEGAL */
|
||||
#pragma once
|
||||
|
||||
#include <Grid/algorithms/iterative/PrecGeneralisedConjugateResidualNonHermitian.h>
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
inline RealD AggregatePowerLaw(RealD x)
|
||||
@ -95,7 +97,7 @@ public:
|
||||
|
||||
RealD scale;
|
||||
|
||||
ConjugateGradient<FineField> CG(1.0e-2,100,false);
|
||||
ConjugateGradient<FineField> CG(1.0e-3,400,false);
|
||||
FineField noise(FineGrid);
|
||||
FineField Mn(FineGrid);
|
||||
|
||||
@ -108,7 +110,7 @@ public:
|
||||
|
||||
hermop.Op(noise,Mn); std::cout<<GridLogMessage << "noise ["<<b<<"] <n|MdagM|n> "<<norm2(Mn)<<std::endl;
|
||||
|
||||
for(int i=0;i<1;i++){
|
||||
for(int i=0;i<4;i++){
|
||||
|
||||
CG(hermop,noise,subspace[b]);
|
||||
|
||||
@ -124,6 +126,53 @@ public:
|
||||
}
|
||||
}
|
||||
|
||||
virtual void CreateSubspaceGCR(GridParallelRNG &RNG,LinearOperatorBase<FineField> &DiracOp,int nn=nbasis)
|
||||
{
|
||||
RealD scale;
|
||||
|
||||
TrivialPrecon<FineField> simple_fine;
|
||||
PrecGeneralisedConjugateResidualNonHermitian<FineField> GCR(0.001,30,DiracOp,simple_fine,12,12);
|
||||
FineField noise(FineGrid);
|
||||
FineField src(FineGrid);
|
||||
FineField guess(FineGrid);
|
||||
FineField Mn(FineGrid);
|
||||
|
||||
for(int b=0;b<nn;b++){
|
||||
|
||||
subspace[b] = Zero();
|
||||
gaussian(RNG,noise);
|
||||
scale = std::pow(norm2(noise),-0.5);
|
||||
noise=noise*scale;
|
||||
|
||||
DiracOp.Op(noise,Mn); std::cout<<GridLogMessage << "noise ["<<b<<"] <n|Op|n> "<<innerProduct(noise,Mn)<<std::endl;
|
||||
|
||||
for(int i=0;i<2;i++){
|
||||
// void operator() (const Field &src, Field &psi){
|
||||
#if 1
|
||||
std::cout << GridLogMessage << " inverting on noise "<<std::endl;
|
||||
src = noise;
|
||||
guess=Zero();
|
||||
GCR(src,guess);
|
||||
subspace[b] = guess;
|
||||
#else
|
||||
std::cout << GridLogMessage << " inverting on zero "<<std::endl;
|
||||
src=Zero();
|
||||
guess = noise;
|
||||
GCR(src,guess);
|
||||
subspace[b] = guess;
|
||||
#endif
|
||||
noise = subspace[b];
|
||||
scale = std::pow(norm2(noise),-0.5);
|
||||
noise=noise*scale;
|
||||
|
||||
}
|
||||
|
||||
DiracOp.Op(noise,Mn); std::cout<<GridLogMessage << "filtered["<<b<<"] <f|Op|f> "<<innerProduct(noise,Mn)<<std::endl;
|
||||
subspace[b] = noise;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// World of possibilities here. But have tried quite a lot of experiments (250+ jobs run on Summit)
|
||||
// and this is the best I found
|
||||
@ -160,14 +209,21 @@ public:
|
||||
|
||||
int b =0;
|
||||
{
|
||||
ComplexD ip;
|
||||
// Filter
|
||||
Chebyshev<FineField> Cheb(lo,hi,orderfilter);
|
||||
Cheb(hermop,noise,Mn);
|
||||
// normalise
|
||||
scale = std::pow(norm2(Mn),-0.5); Mn=Mn*scale;
|
||||
subspace[b] = Mn;
|
||||
hermop.Op(Mn,tmp);
|
||||
std::cout<<GridLogMessage << "filt ["<<b<<"] <n|MdagM|n> "<<norm2(tmp)<<std::endl;
|
||||
|
||||
hermop.Op(Mn,tmp);
|
||||
ip= innerProduct(Mn,tmp);
|
||||
std::cout<<GridLogMessage << "filt ["<<b<<"] <n|Op|n> "<<norm2(tmp)<<" "<<ip<<std::endl;
|
||||
|
||||
hermop.AdjOp(Mn,tmp);
|
||||
ip = innerProduct(Mn,tmp);
|
||||
std::cout<<GridLogMessage << "filt ["<<b<<"] <n|AdjOp|n> "<<norm2(tmp)<<" "<<ip<<std::endl;
|
||||
b++;
|
||||
}
|
||||
|
||||
@ -213,8 +269,18 @@ public:
|
||||
Mn=*Tnp;
|
||||
scale = std::pow(norm2(Mn),-0.5); Mn=Mn*scale;
|
||||
subspace[b] = Mn;
|
||||
hermop.Op(Mn,tmp);
|
||||
std::cout<<GridLogMessage << n<<" filt ["<<b<<"] <n|MdagM|n> "<<norm2(tmp)<<std::endl;
|
||||
|
||||
|
||||
ComplexD ip;
|
||||
|
||||
hermop.Op(Mn,tmp);
|
||||
ip= innerProduct(Mn,tmp);
|
||||
std::cout<<GridLogMessage << "filt ["<<b<<"] <n|Op|n> "<<norm2(tmp)<<" "<<ip<<std::endl;
|
||||
|
||||
hermop.AdjOp(Mn,tmp);
|
||||
ip = innerProduct(Mn,tmp);
|
||||
std::cout<<GridLogMessage << "filt ["<<b<<"] <n|AdjOp|n> "<<norm2(tmp)<<" "<<ip<<std::endl;
|
||||
|
||||
b++;
|
||||
}
|
||||
|
||||
@ -228,6 +294,70 @@ public:
|
||||
}
|
||||
assert(b==nn);
|
||||
}
|
||||
|
||||
|
||||
virtual void CreateSubspacePolyCheby(GridParallelRNG &RNG,LinearOperatorBase<FineField> &hermop,
|
||||
int nn,
|
||||
double hi,
|
||||
double lo1,
|
||||
int orderfilter,
|
||||
double lo2,
|
||||
int orderstep)
|
||||
{
|
||||
RealD scale;
|
||||
|
||||
FineField noise(FineGrid);
|
||||
FineField Mn(FineGrid);
|
||||
FineField tmp(FineGrid);
|
||||
|
||||
// New normalised noise
|
||||
gaussian(RNG,noise);
|
||||
scale = std::pow(norm2(noise),-0.5);
|
||||
noise=noise*scale;
|
||||
|
||||
std::cout << GridLogMessage<<" CreateSubspacePolyCheby "<<std::endl;
|
||||
// Initial matrix element
|
||||
hermop.Op(noise,Mn);
|
||||
std::cout<<GridLogMessage << "noise <n|MdagM|n> "<<norm2(Mn)<<std::endl;
|
||||
|
||||
int b =0;
|
||||
{
|
||||
// Filter
|
||||
std::cout << GridLogMessage << "Cheby "<<lo1<<","<<hi<<" "<<orderstep<<std::endl;
|
||||
Chebyshev<FineField> Cheb(lo1,hi,orderfilter);
|
||||
Cheb(hermop,noise,Mn);
|
||||
// normalise
|
||||
scale = std::pow(norm2(Mn),-0.5); Mn=Mn*scale;
|
||||
subspace[b] = Mn;
|
||||
hermop.Op(Mn,tmp);
|
||||
std::cout<<GridLogMessage << "filt ["<<b<<"] <n|MdagM|n> "<<norm2(tmp)<<std::endl;
|
||||
std::cout<<GridLogMessage << "filt ["<<b<<"] <n|n> "<<norm2(Mn)<<std::endl;
|
||||
}
|
||||
|
||||
// Generate a full sequence of Chebyshevs
|
||||
for(int n=1;n<nn;n++){
|
||||
std::cout << GridLogMessage << "Cheby "<<lo2<<","<<hi<<" "<<orderstep<<std::endl;
|
||||
Chebyshev<FineField> Cheb(lo2,hi,orderstep);
|
||||
Cheb(hermop,subspace[n-1],Mn);
|
||||
|
||||
for(int m=0;m<n;m++){
|
||||
ComplexD c = innerProduct(subspace[m],Mn);
|
||||
Mn = Mn - c*subspace[m];
|
||||
}
|
||||
|
||||
// normalise
|
||||
scale = std::pow(norm2(Mn),-0.5);
|
||||
Mn=Mn*scale;
|
||||
|
||||
subspace[n]=Mn;
|
||||
|
||||
hermop.Op(Mn,tmp);
|
||||
std::cout<<GridLogMessage << "filt ["<<n<<"] <n|MdagM|n> "<<norm2(tmp)<<std::endl;
|
||||
std::cout<<GridLogMessage << "filt ["<<n<<"] <n|n> "<<norm2(Mn)<<std::endl;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
virtual void CreateSubspaceChebyshev(GridParallelRNG &RNG,LinearOperatorBase<FineField> &hermop,
|
||||
int nn,
|
||||
double hi,
|
||||
|
@ -441,8 +441,20 @@ public:
|
||||
std::cout << GridLogMessage<<"CoarsenOperator inv "<<tinv<<" us"<<std::endl;
|
||||
}
|
||||
#else
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Galerkin projection of matrix
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
void CoarsenOperator(LinearOperatorBase<Lattice<Fobj> > &linop,
|
||||
Aggregation<Fobj,CComplex,nbasis> & Subspace)
|
||||
{
|
||||
CoarsenOperator(linop,Subspace,Subspace);
|
||||
}
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Petrov - Galerkin projection of matrix
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
void CoarsenOperator(LinearOperatorBase<Lattice<Fobj> > &linop,
|
||||
Aggregation<Fobj,CComplex,nbasis> & U,
|
||||
Aggregation<Fobj,CComplex,nbasis> & V)
|
||||
{
|
||||
std::cout << GridLogMessage<< "GeneralCoarsenMatrix "<< std::endl;
|
||||
GridBase *grid = FineGrid();
|
||||
@ -458,11 +470,9 @@ public:
|
||||
// Orthogonalise the subblocks over the basis
|
||||
/////////////////////////////////////////////////////////////
|
||||
CoarseScalar InnerProd(CoarseGrid());
|
||||
blockOrthogonalise(InnerProd,Subspace.subspace);
|
||||
blockOrthogonalise(InnerProd,V.subspace);
|
||||
blockOrthogonalise(InnerProd,U.subspace);
|
||||
|
||||
// for(int s=0;s<Subspace.subspace.size();s++){
|
||||
// std::cout << " subspace norm "<<norm2(Subspace.subspace[s])<<std::endl;
|
||||
// }
|
||||
const int npoint = geom.npoint;
|
||||
|
||||
Coordinate clatt = CoarseGrid()->GlobalDimensions();
|
||||
@ -542,7 +552,7 @@ public:
|
||||
std::cout << GridLogMessage<< "CoarsenMatrixColoured vec "<<i<<"/"<<nbasis<< std::endl;
|
||||
for(int p=0;p<npoint;p++){ // Loop over momenta in npoint
|
||||
tphaseBZ-=usecond();
|
||||
phaV = phaF[p]*Subspace.subspace[i];
|
||||
phaV = phaF[p]*V.subspace[i];
|
||||
tphaseBZ+=usecond();
|
||||
|
||||
/////////////////////////////////////////////////////////////////////
|
||||
@ -555,7 +565,7 @@ public:
|
||||
// std::cout << i << " " <<p << " MphaV "<<norm2(MphaV)<<" "<<norm2(phaV)<<std::endl;
|
||||
|
||||
tproj-=usecond();
|
||||
blockProject(coarseInner,MphaV,Subspace.subspace);
|
||||
blockProject(coarseInner,MphaV,U.subspace);
|
||||
coarseInner = conjugate(pha[p]) * coarseInner;
|
||||
|
||||
ComputeProj[p] = coarseInner;
|
||||
|
@ -69,7 +69,7 @@ public:
|
||||
}
|
||||
|
||||
// FIXME: hack for the copy constructor: it must be avoided to avoid single thread loop
|
||||
void construct(pointer __p, const _Tp& __val) { assert(0);};
|
||||
void construct(pointer __p, const _Tp& __val) { };
|
||||
void construct(pointer __p) { };
|
||||
void destroy(pointer __p) { };
|
||||
};
|
||||
@ -175,10 +175,11 @@ template<typename _Tp> inline bool operator!=(const devAllocator<_Tp>&, const d
|
||||
// Template typedefs
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
template<class T> using hostVector = std::vector<T,alignedAllocator<T> >; // Needs autoview
|
||||
template<class T> using Vector = std::vector<T,uvmAllocator<T> >; //
|
||||
template<class T> using Vector = std::vector<T,uvmAllocator<T> >; // Really want to deprecate
|
||||
template<class T> using uvmVector = std::vector<T,uvmAllocator<T> >; // auto migrating page
|
||||
template<class T> using deviceVector = std::vector<T,devAllocator<T> >; // device vector
|
||||
|
||||
/*
|
||||
template<class T> class vecView
|
||||
{
|
||||
protected:
|
||||
@ -214,6 +215,7 @@ template<class T> vecView<T> VectorView(Vector<T> &vec,ViewMode _mode)
|
||||
#define autoVecView(v_v,v,mode) \
|
||||
auto v_v = VectorView(v,mode); \
|
||||
ViewCloser<decltype(v_v)> _autoView##v_v(v_v);
|
||||
*/
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
||||
|
@ -9,6 +9,7 @@ static char print_buffer [ MAXLINE ];
|
||||
#define mprintf(...) snprintf (print_buffer,MAXLINE, __VA_ARGS__ ); std::cout << GridLogMemory << print_buffer << std::endl;
|
||||
#define dprintf(...) snprintf (print_buffer,MAXLINE, __VA_ARGS__ ); std::cout << GridLogDebug << print_buffer << std::endl;
|
||||
//#define dprintf(...)
|
||||
//#define mprintf(...)
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
// For caching copies of data on device
|
||||
@ -109,7 +110,7 @@ void MemoryManager::AccDiscard(AcceleratorViewEntry &AccCache)
|
||||
///////////////////////////////////////////////////////////
|
||||
assert(AccCache.state!=Empty);
|
||||
|
||||
dprintf("MemoryManager: Discard(%lx) %lx\n",(uint64_t)AccCache.CpuPtr,(uint64_t)AccCache.AccPtr);
|
||||
dprintf("MemoryManager: Discard(%lx) %lx",(uint64_t)AccCache.CpuPtr,(uint64_t)AccCache.AccPtr);
|
||||
assert(AccCache.accLock==0);
|
||||
assert(AccCache.cpuLock==0);
|
||||
assert(AccCache.CpuPtr!=(uint64_t)NULL);
|
||||
@ -119,7 +120,7 @@ void MemoryManager::AccDiscard(AcceleratorViewEntry &AccCache)
|
||||
DeviceBytes -=AccCache.bytes;
|
||||
LRUremove(AccCache);
|
||||
AccCache.AccPtr=(uint64_t) NULL;
|
||||
dprintf("MemoryManager: Free(%lx) LRU %ld Total %ld\n",(uint64_t)AccCache.AccPtr,DeviceLRUBytes,DeviceBytes);
|
||||
dprintf("MemoryManager: Free(%lx) LRU %ld Total %ld",(uint64_t)AccCache.AccPtr,DeviceLRUBytes,DeviceBytes);
|
||||
}
|
||||
uint64_t CpuPtr = AccCache.CpuPtr;
|
||||
EntryErase(CpuPtr);
|
||||
@ -139,7 +140,7 @@ void MemoryManager::Evict(AcceleratorViewEntry &AccCache)
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
assert(AccCache.state!=Empty);
|
||||
|
||||
mprintf("MemoryManager: Evict CpuPtr %lx AccPtr %lx cpuLock %ld accLock %ld\n",
|
||||
mprintf("MemoryManager: Evict CpuPtr %lx AccPtr %lx cpuLock %ld accLock %ld",
|
||||
(uint64_t)AccCache.CpuPtr,(uint64_t)AccCache.AccPtr,
|
||||
(uint64_t)AccCache.cpuLock,(uint64_t)AccCache.accLock);
|
||||
if (AccCache.accLock!=0) return;
|
||||
@ -153,7 +154,7 @@ void MemoryManager::Evict(AcceleratorViewEntry &AccCache)
|
||||
AccCache.AccPtr=(uint64_t)NULL;
|
||||
AccCache.state=CpuDirty; // CPU primary now
|
||||
DeviceBytes -=AccCache.bytes;
|
||||
dprintf("MemoryManager: Free(AccPtr %lx) footprint now %ld \n",(uint64_t)AccCache.AccPtr,DeviceBytes);
|
||||
dprintf("MemoryManager: Free(AccPtr %lx) footprint now %ld ",(uint64_t)AccCache.AccPtr,DeviceBytes);
|
||||
}
|
||||
// uint64_t CpuPtr = AccCache.CpuPtr;
|
||||
DeviceEvictions++;
|
||||
@ -167,7 +168,7 @@ void MemoryManager::Flush(AcceleratorViewEntry &AccCache)
|
||||
assert(AccCache.AccPtr!=(uint64_t)NULL);
|
||||
assert(AccCache.CpuPtr!=(uint64_t)NULL);
|
||||
acceleratorCopyFromDevice((void *)AccCache.AccPtr,(void *)AccCache.CpuPtr,AccCache.bytes);
|
||||
mprintf("MemoryManager: acceleratorCopyFromDevice Flush size %ld AccPtr %lx -> CpuPtr %lx\n",(uint64_t)AccCache.bytes,(uint64_t)AccCache.AccPtr,(uint64_t)AccCache.CpuPtr); fflush(stdout);
|
||||
mprintf("MemoryManager: acceleratorCopyFromDevice Flush size %ld AccPtr %lx -> CpuPtr %lx",(uint64_t)AccCache.bytes,(uint64_t)AccCache.AccPtr,(uint64_t)AccCache.CpuPtr); fflush(stdout);
|
||||
DeviceToHostBytes+=AccCache.bytes;
|
||||
DeviceToHostXfer++;
|
||||
AccCache.state=Consistent;
|
||||
@ -182,7 +183,7 @@ void MemoryManager::Clone(AcceleratorViewEntry &AccCache)
|
||||
AccCache.AccPtr=(uint64_t)AcceleratorAllocate(AccCache.bytes);
|
||||
DeviceBytes+=AccCache.bytes;
|
||||
}
|
||||
mprintf("MemoryManager: acceleratorCopyToDevice Clone size %ld AccPtr %lx <- CpuPtr %lx\n",
|
||||
mprintf("MemoryManager: acceleratorCopyToDevice Clone size %ld AccPtr %lx <- CpuPtr %lx",
|
||||
(uint64_t)AccCache.bytes,
|
||||
(uint64_t)AccCache.AccPtr,(uint64_t)AccCache.CpuPtr); fflush(stdout);
|
||||
acceleratorCopyToDevice((void *)AccCache.CpuPtr,(void *)AccCache.AccPtr,AccCache.bytes);
|
||||
@ -210,7 +211,7 @@ void MemoryManager::CpuDiscard(AcceleratorViewEntry &AccCache)
|
||||
void MemoryManager::ViewClose(void* Ptr,ViewMode mode)
|
||||
{
|
||||
if( (mode==AcceleratorRead)||(mode==AcceleratorWrite)||(mode==AcceleratorWriteDiscard) ){
|
||||
dprintf("AcceleratorViewClose %lx\n",(uint64_t)Ptr);
|
||||
dprintf("AcceleratorViewClose %lx",(uint64_t)Ptr);
|
||||
AcceleratorViewClose((uint64_t)Ptr);
|
||||
} else if( (mode==CpuRead)||(mode==CpuWrite)){
|
||||
CpuViewClose((uint64_t)Ptr);
|
||||
@ -222,7 +223,7 @@ void *MemoryManager::ViewOpen(void* _CpuPtr,size_t bytes,ViewMode mode,ViewAdvis
|
||||
{
|
||||
uint64_t CpuPtr = (uint64_t)_CpuPtr;
|
||||
if( (mode==AcceleratorRead)||(mode==AcceleratorWrite)||(mode==AcceleratorWriteDiscard) ){
|
||||
dprintf("AcceleratorViewOpen %lx\n",(uint64_t)CpuPtr);
|
||||
dprintf("AcceleratorViewOpen %lx",(uint64_t)CpuPtr);
|
||||
return (void *) AcceleratorViewOpen(CpuPtr,bytes,mode,hint);
|
||||
} else if( (mode==CpuRead)||(mode==CpuWrite)){
|
||||
return (void *)CpuViewOpen(CpuPtr,bytes,mode,hint);
|
||||
@ -233,6 +234,9 @@ void *MemoryManager::ViewOpen(void* _CpuPtr,size_t bytes,ViewMode mode,ViewAdvis
|
||||
}
|
||||
void MemoryManager::EvictVictims(uint64_t bytes)
|
||||
{
|
||||
if(bytes>=DeviceMaxBytes) {
|
||||
printf("EvictVictims bytes %ld DeviceMaxBytes %ld\n",bytes,DeviceMaxBytes);
|
||||
}
|
||||
assert(bytes<DeviceMaxBytes);
|
||||
while(bytes+DeviceLRUBytes > DeviceMaxBytes){
|
||||
if ( DeviceLRUBytes > 0){
|
||||
@ -265,7 +269,7 @@ uint64_t MemoryManager::AcceleratorViewOpen(uint64_t CpuPtr,size_t bytes,ViewMod
|
||||
assert(AccCache.cpuLock==0); // Programming error
|
||||
|
||||
if(AccCache.state!=Empty) {
|
||||
dprintf("ViewOpen found entry %lx %lx : sizes %ld %ld accLock %ld\n",
|
||||
dprintf("ViewOpen found entry %lx %lx : sizes %ld %ld accLock %ld",
|
||||
(uint64_t)AccCache.CpuPtr,
|
||||
(uint64_t)CpuPtr,
|
||||
(uint64_t)AccCache.bytes,
|
||||
@ -305,7 +309,7 @@ uint64_t MemoryManager::AcceleratorViewOpen(uint64_t CpuPtr,size_t bytes,ViewMod
|
||||
AccCache.state = Consistent; // Empty + AccRead => Consistent
|
||||
}
|
||||
AccCache.accLock= 1;
|
||||
dprintf("Copied Empty entry into device accLock= %d\n",AccCache.accLock);
|
||||
dprintf("Copied Empty entry into device accLock= %d",AccCache.accLock);
|
||||
} else if(AccCache.state==CpuDirty ){
|
||||
if(mode==AcceleratorWriteDiscard) {
|
||||
CpuDiscard(AccCache);
|
||||
@ -318,21 +322,21 @@ uint64_t MemoryManager::AcceleratorViewOpen(uint64_t CpuPtr,size_t bytes,ViewMod
|
||||
AccCache.state = Consistent; // CpuDirty + AccRead => Consistent
|
||||
}
|
||||
AccCache.accLock++;
|
||||
dprintf("CpuDirty entry into device ++accLock= %d\n",AccCache.accLock);
|
||||
dprintf("CpuDirty entry into device ++accLock= %d",AccCache.accLock);
|
||||
} else if(AccCache.state==Consistent) {
|
||||
if((mode==AcceleratorWrite)||(mode==AcceleratorWriteDiscard))
|
||||
AccCache.state = AccDirty; // Consistent + AcceleratorWrite=> AccDirty
|
||||
else
|
||||
AccCache.state = Consistent; // Consistent + AccRead => Consistent
|
||||
AccCache.accLock++;
|
||||
dprintf("Consistent entry into device ++accLock= %d\n",AccCache.accLock);
|
||||
dprintf("Consistent entry into device ++accLock= %d",AccCache.accLock);
|
||||
} else if(AccCache.state==AccDirty) {
|
||||
if((mode==AcceleratorWrite)||(mode==AcceleratorWriteDiscard))
|
||||
AccCache.state = AccDirty; // AccDirty + AcceleratorWrite=> AccDirty
|
||||
else
|
||||
AccCache.state = AccDirty; // AccDirty + AccRead => AccDirty
|
||||
AccCache.accLock++;
|
||||
dprintf("AccDirty entry ++accLock= %d\n",AccCache.accLock);
|
||||
dprintf("AccDirty entry ++accLock= %d",AccCache.accLock);
|
||||
} else {
|
||||
assert(0);
|
||||
}
|
||||
@ -341,7 +345,7 @@ uint64_t MemoryManager::AcceleratorViewOpen(uint64_t CpuPtr,size_t bytes,ViewMod
|
||||
// If view is opened on device must remove from LRU
|
||||
if(AccCache.LRU_valid==1){
|
||||
// must possibly remove from LRU as now locked on GPU
|
||||
dprintf("AccCache entry removed from LRU \n");
|
||||
dprintf("AccCache entry removed from LRU ");
|
||||
LRUremove(AccCache);
|
||||
}
|
||||
|
||||
@ -364,10 +368,10 @@ void MemoryManager::AcceleratorViewClose(uint64_t CpuPtr)
|
||||
AccCache.accLock--;
|
||||
// Move to LRU queue if not locked and close on device
|
||||
if(AccCache.accLock==0) {
|
||||
dprintf("AccleratorViewClose %lx AccLock decremented to %ld move to LRU queue\n",(uint64_t)CpuPtr,(uint64_t)AccCache.accLock);
|
||||
dprintf("AccleratorViewClose %lx AccLock decremented to %ld move to LRU queue",(uint64_t)CpuPtr,(uint64_t)AccCache.accLock);
|
||||
LRUinsert(AccCache);
|
||||
} else {
|
||||
dprintf("AccleratorViewClose %lx AccLock decremented to %ld\n",(uint64_t)CpuPtr,(uint64_t)AccCache.accLock);
|
||||
dprintf("AccleratorViewClose %lx AccLock decremented to %ld",(uint64_t)CpuPtr,(uint64_t)AccCache.accLock);
|
||||
}
|
||||
}
|
||||
void MemoryManager::CpuViewClose(uint64_t CpuPtr)
|
||||
|
@ -33,6 +33,8 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
///////////////////////////////////
|
||||
#include <Grid/communicator/SharedMemory.h>
|
||||
|
||||
#define NVLINK_GET
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
extern bool Stencil_force_mpi ;
|
||||
@ -127,7 +129,7 @@ public:
|
||||
void GlobalSumVector(ComplexD *c,int N);
|
||||
void GlobalXOR(uint32_t &);
|
||||
void GlobalXOR(uint64_t &);
|
||||
|
||||
|
||||
template<class obj> void GlobalSumP2P(obj &o)
|
||||
{
|
||||
std::vector<obj> column;
|
||||
@ -147,7 +149,8 @@ public:
|
||||
sizeof(obj),d*100+p);
|
||||
|
||||
}
|
||||
CommsComplete(list);
|
||||
if (!list.empty()) // avoid triggering assert in comms == none
|
||||
CommsComplete(list);
|
||||
for(int p=1;p<_processors[d];p++){
|
||||
accum = accum + column[p];
|
||||
}
|
||||
@ -192,6 +195,11 @@ public:
|
||||
void *recv,
|
||||
int recv_from_rank,int do_recv,
|
||||
int xbytes,int rbytes,int dir);
|
||||
|
||||
// Could do a PollHtoD and have a CommsMerge dependence
|
||||
void StencilSendToRecvFromPollDtoH (std::vector<CommsRequest_t> &list);
|
||||
void StencilSendToRecvFromPollIRecv(std::vector<CommsRequest_t> &list);
|
||||
|
||||
double StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
|
||||
void *xmit,
|
||||
int xmit_to_rank,int do_xmit,
|
||||
|
@ -30,6 +30,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
|
||||
Grid_MPI_Comm CartesianCommunicator::communicator_world;
|
||||
|
||||
////////////////////////////////////////////
|
||||
@ -362,8 +363,6 @@ void CartesianCommunicator::SendToRecvFrom(void *xmit,
|
||||
int bytes)
|
||||
{
|
||||
std::vector<MpiCommsRequest_t> reqs(0);
|
||||
unsigned long xcrc = crc32(0L, Z_NULL, 0);
|
||||
unsigned long rcrc = crc32(0L, Z_NULL, 0);
|
||||
|
||||
int myrank = _processor;
|
||||
int ierr;
|
||||
@ -379,9 +378,6 @@ void CartesianCommunicator::SendToRecvFrom(void *xmit,
|
||||
communicator,MPI_STATUS_IGNORE);
|
||||
assert(ierr==0);
|
||||
|
||||
// xcrc = crc32(xcrc,(unsigned char *)xmit,bytes);
|
||||
// rcrc = crc32(rcrc,(unsigned char *)recv,bytes);
|
||||
// printf("proc %d SendToRecvFrom %d bytes xcrc %lx rcrc %lx\n",_processor,bytes,xcrc,rcrc); fflush
|
||||
}
|
||||
// Basic Halo comms primitive
|
||||
double CartesianCommunicator::StencilSendToRecvFrom( void *xmit,
|
||||
@ -399,6 +395,8 @@ double CartesianCommunicator::StencilSendToRecvFrom( void *xmit,
|
||||
|
||||
|
||||
#ifdef ACCELERATOR_AWARE_MPI
|
||||
void CartesianCommunicator::StencilSendToRecvFromPollIRecv(std::vector<CommsRequest_t> &list) {};
|
||||
void CartesianCommunicator::StencilSendToRecvFromPollDtoH(std::vector<CommsRequest_t> &list) {};
|
||||
double CartesianCommunicator::StencilSendToRecvFromPrepare(std::vector<CommsRequest_t> &list,
|
||||
void *xmit,
|
||||
int dest,int dox,
|
||||
@ -440,8 +438,15 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques
|
||||
list.push_back(rrq);
|
||||
off_node_bytes+=rbytes;
|
||||
}
|
||||
#ifdef NVLINK_GET
|
||||
else {
|
||||
void *shm = (void *) this->ShmBufferTranslate(from,xmit);
|
||||
assert(shm!=NULL);
|
||||
acceleratorCopyDeviceToDeviceAsynch(shm,recv,rbytes);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
// This is a NVLINK PUT
|
||||
if (dox) {
|
||||
if ( (gdest == MPI_UNDEFINED) || Stencil_force_mpi ) {
|
||||
tag= dir+_processor*32;
|
||||
@ -450,9 +455,11 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques
|
||||
list.push_back(xrq);
|
||||
off_node_bytes+=xbytes;
|
||||
} else {
|
||||
#ifndef NVLINK_GET
|
||||
void *shm = (void *) this->ShmBufferTranslate(dest,recv);
|
||||
assert(shm!=NULL);
|
||||
acceleratorCopyDeviceToDeviceAsynch(xmit,shm,xbytes);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
return off_node_bytes;
|
||||
@ -461,7 +468,7 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques
|
||||
void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &list,int dir)
|
||||
{
|
||||
int nreq=list.size();
|
||||
|
||||
/*finishes Get/Put*/
|
||||
acceleratorCopySynchronise();
|
||||
|
||||
if (nreq==0) return;
|
||||
@ -561,53 +568,105 @@ double CartesianCommunicator::StencilSendToRecvFromPrepare(std::vector<CommsRequ
|
||||
|
||||
if (dox) {
|
||||
if ( (gdest == MPI_UNDEFINED) || Stencil_force_mpi ) {
|
||||
#undef DEVICE_TO_HOST_CONCURRENT // pipeline
|
||||
#ifdef DEVICE_TO_HOST_CONCURRENT
|
||||
|
||||
tag= dir+_processor*32;
|
||||
|
||||
host_xmit = this->HostBufferMalloc(xbytes);
|
||||
acceleratorCopyFromDeviceAsynch(xmit, host_xmit,xbytes); // Make this Asynch
|
||||
CommsRequest_t srq;
|
||||
|
||||
srq.ev = acceleratorCopyFromDeviceAsynch(xmit, host_xmit,xbytes); // Make this Asynch
|
||||
|
||||
// ierr =MPI_Isend(host_xmit, xbytes, MPI_CHAR,dest,tag,communicator_halo[commdir],&xrq);
|
||||
// assert(ierr==0);
|
||||
// off_node_bytes+=xbytes;
|
||||
|
||||
CommsRequest_t srq;
|
||||
srq.PacketType = InterNodeXmit;
|
||||
srq.bytes = xbytes;
|
||||
// srq.req = xrq;
|
||||
srq.host_buf = host_xmit;
|
||||
srq.device_buf = xmit;
|
||||
srq.tag = tag;
|
||||
srq.dest = dest;
|
||||
srq.commdir = commdir;
|
||||
list.push_back(srq);
|
||||
#else
|
||||
tag= dir+_processor*32;
|
||||
|
||||
host_xmit = this->HostBufferMalloc(xbytes);
|
||||
const int chunks=1;
|
||||
for(int n=0;n<chunks;n++){
|
||||
void * host_xmitc = (void *)( (uint64_t) host_xmit + n*xbytes/chunks);
|
||||
void * xmitc = (void *)( (uint64_t) xmit + n*xbytes/chunks);
|
||||
acceleratorCopyFromDeviceAsynch(xmitc, host_xmitc,xbytes/chunks); // Make this Asynch
|
||||
}
|
||||
acceleratorCopySynchronise(); // Complete all pending copy transfers
|
||||
|
||||
ierr =MPI_Isend(host_xmit, xbytes, MPI_CHAR,dest,tag,communicator_halo[commdir],&xrq);
|
||||
assert(ierr==0);
|
||||
off_node_bytes+=xbytes;
|
||||
|
||||
CommsRequest_t srq;
|
||||
srq.PacketType = InterNodeXmit;
|
||||
srq.bytes = xbytes;
|
||||
srq.req = xrq;
|
||||
srq.host_buf = host_xmit;
|
||||
srq.device_buf = xmit;
|
||||
list.push_back(srq);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
return off_node_bytes;
|
||||
}
|
||||
/*
|
||||
* In the interest of better pipelining, poll for completion on each DtoH and
|
||||
* start MPI_ISend in the meantime
|
||||
*/
|
||||
void CartesianCommunicator::StencilSendToRecvFromPollIRecv(std::vector<CommsRequest_t> &list)
|
||||
{
|
||||
int pending = 0;
|
||||
do {
|
||||
|
||||
pending = 0;
|
||||
|
||||
for(int idx = 0; idx<list.size();idx++){
|
||||
|
||||
if ( list[idx].PacketType==InterNodeRecv ) {
|
||||
|
||||
int flag = 0;
|
||||
MPI_Status status;
|
||||
int ierr = MPI_Test(&list[idx].req,&flag,&status);
|
||||
assert(ierr==0);
|
||||
|
||||
if ( flag ) {
|
||||
// std::cout << " PollIrecv "<<idx<<" flag "<<flag<<std::endl;
|
||||
acceleratorCopyToDeviceAsynch(list[idx].host_buf,list[idx].device_buf,list[idx].bytes);
|
||||
list[idx].PacketType=InterNodeReceiveHtoD;
|
||||
} else {
|
||||
pending ++;
|
||||
}
|
||||
}
|
||||
}
|
||||
// std::cout << " PollIrecv "<<pending<<" pending requests"<<std::endl;
|
||||
} while ( pending );
|
||||
|
||||
}
|
||||
void CartesianCommunicator::StencilSendToRecvFromPollDtoH(std::vector<CommsRequest_t> &list)
|
||||
{
|
||||
int pending = 0;
|
||||
do {
|
||||
|
||||
pending = 0;
|
||||
|
||||
for(int idx = 0; idx<list.size();idx++){
|
||||
|
||||
if ( list[idx].PacketType==InterNodeXmit ) {
|
||||
|
||||
if ( acceleratorEventIsComplete(list[idx].ev) ) {
|
||||
|
||||
void *host_xmit = list[idx].host_buf;
|
||||
uint32_t xbytes = list[idx].bytes;
|
||||
int dest = list[idx].dest;
|
||||
int tag = list[idx].tag;
|
||||
int commdir = list[idx].commdir;
|
||||
///////////////////
|
||||
// Send packet
|
||||
///////////////////
|
||||
|
||||
// std::cout << " DtoH is complete for index "<<idx<<" calling MPI_Isend "<<std::endl;
|
||||
|
||||
MPI_Request xrq;
|
||||
int ierr =MPI_Isend(host_xmit, xbytes, MPI_CHAR,dest,tag,communicator_halo[commdir],&xrq);
|
||||
assert(ierr==0);
|
||||
|
||||
list[idx].req = xrq; // Update the MPI request in the list
|
||||
|
||||
list[idx].PacketType=InterNodeXmitISend;
|
||||
|
||||
} else {
|
||||
// not done, so return to polling loop
|
||||
pending++;
|
||||
}
|
||||
}
|
||||
}
|
||||
} while (pending);
|
||||
}
|
||||
|
||||
double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
|
||||
void *xmit,
|
||||
@ -644,69 +703,89 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques
|
||||
* - complete all copies
|
||||
* - post MPI send asynch
|
||||
*/
|
||||
#ifdef NVLINK_GET
|
||||
if ( dor ) {
|
||||
|
||||
// static int printed;
|
||||
// if((printed<8) && this->IsBoss() ) {
|
||||
// printf("dir %d doX %d doR %d Face size %ld %ld\n",dir,dox,dor,xbytes,rbytes);
|
||||
// printed++;
|
||||
// }
|
||||
|
||||
if ( ! ( (gfrom ==MPI_UNDEFINED) || Stencil_force_mpi ) ) {
|
||||
// Intranode
|
||||
void *shm = (void *) this->ShmBufferTranslate(from,xmit);
|
||||
assert(shm!=NULL);
|
||||
|
||||
CommsRequest_t srq;
|
||||
|
||||
srq.ev = acceleratorCopyDeviceToDeviceAsynch(shm,recv,rbytes);
|
||||
|
||||
srq.PacketType = IntraNodeRecv;
|
||||
srq.bytes = xbytes;
|
||||
// srq.req = xrq;
|
||||
srq.host_buf = NULL;
|
||||
srq.device_buf = xmit;
|
||||
srq.tag = -1;
|
||||
srq.dest = dest;
|
||||
srq.commdir = dir;
|
||||
list.push_back(srq);
|
||||
}
|
||||
}
|
||||
#else
|
||||
if (dox) {
|
||||
|
||||
if ( (gdest == MPI_UNDEFINED) || Stencil_force_mpi ) {
|
||||
#ifdef DEVICE_TO_HOST_CONCURRENT
|
||||
tag= dir+_processor*32;
|
||||
// Find the send in the prepared list
|
||||
int list_idx=-1;
|
||||
for(int idx = 0; idx<list.size();idx++){
|
||||
|
||||
if ( (list[idx].device_buf==xmit)
|
||||
&&(list[idx].PacketType==InterNodeXmit)
|
||||
&&(list[idx].bytes==xbytes) ) {
|
||||
|
||||
list_idx = idx;
|
||||
host_xmit = list[idx].host_buf;
|
||||
}
|
||||
}
|
||||
assert(list_idx != -1); // found it
|
||||
ierr =MPI_Isend(host_xmit, xbytes, MPI_CHAR,dest,tag,communicator_halo[commdir],&xrq);
|
||||
assert(ierr==0);
|
||||
list[list_idx].req = xrq; // Update the MPI request in the list
|
||||
off_node_bytes+=xbytes;
|
||||
#endif
|
||||
} else {
|
||||
if ( !( (gdest == MPI_UNDEFINED) || Stencil_force_mpi ) ) {
|
||||
// Intranode
|
||||
void *shm = (void *) this->ShmBufferTranslate(dest,recv);
|
||||
assert(shm!=NULL);
|
||||
acceleratorCopyDeviceToDeviceAsynch(xmit,shm,xbytes);
|
||||
|
||||
CommsRequest_t srq;
|
||||
|
||||
srq.ev = acceleratorCopyDeviceToDeviceAsynch(xmit,shm,xbytes);
|
||||
|
||||
srq.PacketType = IntraNodeXmit;
|
||||
srq.bytes = xbytes;
|
||||
// srq.req = xrq;
|
||||
srq.host_buf = NULL;
|
||||
srq.device_buf = xmit;
|
||||
srq.tag = -1;
|
||||
srq.dest = dest;
|
||||
srq.commdir = dir;
|
||||
list.push_back(srq);
|
||||
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return off_node_bytes;
|
||||
}
|
||||
void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &list,int dir)
|
||||
{
|
||||
int nreq=list.size();
|
||||
acceleratorCopySynchronise(); // Complete all pending copy transfers D2D
|
||||
|
||||
if (nreq==0) return;
|
||||
std::vector<MPI_Status> status(nreq);
|
||||
std::vector<MPI_Request> MpiRequests(nreq);
|
||||
std::vector<MPI_Status> status;
|
||||
std::vector<MPI_Request> MpiRequests;
|
||||
|
||||
for(int r=0;r<list.size();r++){
|
||||
// Must check each Send buf is clear to reuse
|
||||
if ( list[r].PacketType == InterNodeXmitISend ) MpiRequests.push_back(list[r].req);
|
||||
// if ( list[r].PacketType == InterNodeRecv ) MpiRequests.push_back(list[r].req); // Already "Test" passed
|
||||
}
|
||||
|
||||
for(int r=0;r<nreq;r++){
|
||||
MpiRequests[r] = list[r].req;
|
||||
int nreq=MpiRequests.size();
|
||||
|
||||
if (nreq>0) {
|
||||
status.resize(MpiRequests.size());
|
||||
int ierr = MPI_Waitall(MpiRequests.size(),&MpiRequests[0],&status[0]); // Sends are guaranteed in order. No harm in not completing.
|
||||
assert(ierr==0);
|
||||
}
|
||||
|
||||
int ierr = MPI_Waitall(nreq,&MpiRequests[0],&status[0]);
|
||||
assert(ierr==0);
|
||||
|
||||
for(int r=0;r<nreq;r++){
|
||||
if ( list[r].PacketType==InterNodeRecv ) {
|
||||
acceleratorCopyToDeviceAsynch(list[r].host_buf,list[r].device_buf,list[r].bytes);
|
||||
}
|
||||
}
|
||||
// for(int r=0;r<nreq;r++){
|
||||
// if ( list[r].PacketType==InterNodeRecv ) {
|
||||
// acceleratorCopyToDeviceAsynch(list[r].host_buf,list[r].device_buf,list[r].bytes);
|
||||
// }
|
||||
// }
|
||||
|
||||
|
||||
acceleratorCopySynchronise(); // Complete all pending copy transfers
|
||||
list.resize(0); // Delete the list
|
||||
this->HostBufferFreeAll(); // Clean up the buffer allocs
|
||||
this->StencilBarrier();
|
||||
#ifndef NVLINK_GET
|
||||
this->StencilBarrier(); // if PUT must check our nbrs have filled our receive buffers.
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
////////////////////////////////////////////
|
||||
|
@ -91,7 +91,7 @@ void CartesianCommunicator::SendToRecvFrom(void *xmit,
|
||||
{
|
||||
assert(0);
|
||||
}
|
||||
void CartesianCommunicator::CommsComplete(std::vector<CommsRequest_t> &list){ assert(0);}
|
||||
void CartesianCommunicator::CommsComplete(std::vector<CommsRequest_t> &list){ assert(list.size()==0);}
|
||||
void CartesianCommunicator::SendToRecvFromBegin(std::vector<CommsRequest_t> &list,
|
||||
void *xmit,
|
||||
int dest,
|
||||
@ -132,6 +132,8 @@ double CartesianCommunicator::StencilSendToRecvFrom( void *xmit,
|
||||
{
|
||||
return 2.0*bytes;
|
||||
}
|
||||
void CartesianCommunicator::StencilSendToRecvFromPollIRecv(std::vector<CommsRequest_t> &list) {};
|
||||
void CartesianCommunicator::StencilSendToRecvFromPollDtoH(std::vector<CommsRequest_t> &list) {};
|
||||
double CartesianCommunicator::StencilSendToRecvFromPrepare(std::vector<CommsRequest_t> &list,
|
||||
void *xmit,
|
||||
int xmit_to_rank,int dox,
|
||||
@ -139,7 +141,7 @@ double CartesianCommunicator::StencilSendToRecvFromPrepare(std::vector<CommsRequ
|
||||
int recv_from_rank,int dor,
|
||||
int xbytes,int rbytes, int dir)
|
||||
{
|
||||
return xbytes+rbytes;
|
||||
return 0.0;
|
||||
}
|
||||
double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list,
|
||||
void *xmit,
|
||||
|
@ -50,12 +50,30 @@ typedef MPI_Request MpiCommsRequest_t;
|
||||
#ifdef ACCELERATOR_AWARE_MPI
|
||||
typedef MPI_Request CommsRequest_t;
|
||||
#else
|
||||
enum PacketType_t { InterNodeXmit, InterNodeRecv, IntraNodeXmit, IntraNodeRecv };
|
||||
/*
|
||||
* Enable state transitions as each packet flows.
|
||||
*/
|
||||
enum PacketType_t {
|
||||
FaceGather,
|
||||
InterNodeXmit,
|
||||
InterNodeRecv,
|
||||
IntraNodeXmit,
|
||||
IntraNodeRecv,
|
||||
InterNodeXmitISend,
|
||||
InterNodeReceiveHtoD
|
||||
};
|
||||
/*
|
||||
*Package arguments needed for various actions along packet flow
|
||||
*/
|
||||
typedef struct {
|
||||
PacketType_t PacketType;
|
||||
void *host_buf;
|
||||
void *device_buf;
|
||||
int dest;
|
||||
int tag;
|
||||
int commdir;
|
||||
unsigned long bytes;
|
||||
acceleratorEvent_t ev;
|
||||
MpiCommsRequest_t req;
|
||||
} CommsRequest_t;
|
||||
#endif
|
||||
@ -119,7 +137,7 @@ public:
|
||||
///////////////////////////////////////////////////
|
||||
static void SharedMemoryAllocate(uint64_t bytes, int flags);
|
||||
static void SharedMemoryFree(void);
|
||||
static void SharedMemoryCopy(void *dest,void *src,size_t bytes);
|
||||
// static void SharedMemoryCopy(void *dest,void *src,size_t bytes);
|
||||
static void SharedMemoryZero(void *dest,size_t bytes);
|
||||
|
||||
};
|
||||
|
@ -542,12 +542,12 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
||||
// Each MPI rank should allocate our own buffer
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
#ifndef ACCELERATOR_AWARE_MPI
|
||||
printf("Host buffer allocate for GPU non-aware MPI\n");
|
||||
// printf("Host buffer allocate for GPU non-aware MPI\n");
|
||||
#if 0
|
||||
HostCommBuf= acceleratorAllocHost(bytes);
|
||||
#else
|
||||
HostCommBuf= malloc(bytes); /// CHANGE THIS TO malloc_host
|
||||
#ifdef HAVE_NUMAIF_H
|
||||
#if 0
|
||||
#warning "Moving host buffers to specific NUMA domain"
|
||||
int numa;
|
||||
char *numa_name=(char *)getenv("MPI_BUF_NUMA");
|
||||
@ -916,14 +916,14 @@ void GlobalSharedMemory::SharedMemoryZero(void *dest,size_t bytes)
|
||||
bzero(dest,bytes);
|
||||
#endif
|
||||
}
|
||||
void GlobalSharedMemory::SharedMemoryCopy(void *dest,void *src,size_t bytes)
|
||||
{
|
||||
#if defined(GRID_CUDA) || defined(GRID_HIP) || defined(GRID_SYCL)
|
||||
acceleratorCopyToDevice(src,dest,bytes);
|
||||
#else
|
||||
bcopy(src,dest,bytes);
|
||||
#endif
|
||||
}
|
||||
//void GlobalSharedMemory::SharedMemoryCopy(void *dest,void *src,size_t bytes)
|
||||
//{
|
||||
//#if defined(GRID_CUDA) || defined(GRID_HIP) || defined(GRID_SYCL)
|
||||
// acceleratorCopyToDevice(src,dest,bytes);
|
||||
//#else
|
||||
// bcopy(src,dest,bytes);
|
||||
//#endif
|
||||
//}
|
||||
////////////////////////////////////////////////////////
|
||||
// Global shared functionality finished
|
||||
// Now move to per communicator functionality
|
||||
@ -959,6 +959,7 @@ void SharedMemory::SetCommunicator(Grid_MPI_Comm comm)
|
||||
MPI_Allreduce(MPI_IN_PLACE,&wsr,1,MPI_UINT32_T,MPI_SUM,ShmComm);
|
||||
|
||||
ShmCommBufs[r] = GlobalSharedMemory::WorldShmCommBufs[wsr];
|
||||
// std::cerr << " SetCommunicator rank "<<r<<" comm "<<ShmCommBufs[r] <<std::endl;
|
||||
}
|
||||
ShmBufferFreeAll();
|
||||
|
||||
@ -989,7 +990,7 @@ void SharedMemory::SetCommunicator(Grid_MPI_Comm comm)
|
||||
}
|
||||
#endif
|
||||
|
||||
//SharedMemoryTest();
|
||||
SharedMemoryTest();
|
||||
}
|
||||
//////////////////////////////////////////////////////////////////
|
||||
// On node barrier
|
||||
@ -1011,19 +1012,18 @@ void SharedMemory::SharedMemoryTest(void)
|
||||
check[0]=GlobalSharedMemory::WorldNode;
|
||||
check[1]=r;
|
||||
check[2]=magic;
|
||||
GlobalSharedMemory::SharedMemoryCopy( ShmCommBufs[r], check, 3*sizeof(uint64_t));
|
||||
acceleratorCopyToDevice(check,ShmCommBufs[r],3*sizeof(uint64_t));
|
||||
}
|
||||
}
|
||||
ShmBarrier();
|
||||
for(uint64_t r=0;r<ShmSize;r++){
|
||||
ShmBarrier();
|
||||
GlobalSharedMemory::SharedMemoryCopy(check,ShmCommBufs[r], 3*sizeof(uint64_t));
|
||||
ShmBarrier();
|
||||
acceleratorCopyFromDevice(ShmCommBufs[r],check,3*sizeof(uint64_t));
|
||||
assert(check[0]==GlobalSharedMemory::WorldNode);
|
||||
assert(check[1]==r);
|
||||
assert(check[2]==magic);
|
||||
ShmBarrier();
|
||||
}
|
||||
ShmBarrier();
|
||||
std::cout << GridLogDebug << " SharedMemoryTest has passed "<<std::endl;
|
||||
}
|
||||
|
||||
void *SharedMemory::ShmBuffer(int rank)
|
||||
|
@ -68,7 +68,7 @@ template<class vobj> Lattice<vobj> Cshift(const Lattice<vobj> &rhs,int dimension
|
||||
if(Cshift_verbose) std::cout << GridLogPerformance << "Cshift took "<< (t1-t0)/1e3 << " ms"<<std::endl;
|
||||
return ret;
|
||||
}
|
||||
#if 1
|
||||
|
||||
template<class vobj> void Cshift_comms(Lattice<vobj>& ret,const Lattice<vobj> &rhs,int dimension,int shift)
|
||||
{
|
||||
int sshift[2];
|
||||
@ -125,7 +125,11 @@ template<class vobj> void Cshift_comms(Lattice<vobj> &ret,const Lattice<vobj> &r
|
||||
int buffer_size = rhs.Grid()->_slice_nblock[dimension]*rhs.Grid()->_slice_block[dimension];
|
||||
static deviceVector<vobj> send_buf; send_buf.resize(buffer_size);
|
||||
static deviceVector<vobj> recv_buf; recv_buf.resize(buffer_size);
|
||||
|
||||
#ifndef ACCELERATOR_AWARE_MPI
|
||||
static hostVector<vobj> hsend_buf; hsend_buf.resize(buffer_size);
|
||||
static hostVector<vobj> hrecv_buf; hrecv_buf.resize(buffer_size);
|
||||
#endif
|
||||
|
||||
int cb= (cbmask==0x2)? Odd : Even;
|
||||
int sshift= rhs.Grid()->CheckerBoardShiftForCB(rhs.Checkerboard(),dimension,shift,cb);
|
||||
RealD tcopy=0.0;
|
||||
@ -156,16 +160,29 @@ template<class vobj> void Cshift_comms(Lattice<vobj> &ret,const Lattice<vobj> &r
|
||||
// int rank = grid->_processor;
|
||||
int recv_from_rank;
|
||||
int xmit_to_rank;
|
||||
|
||||
grid->ShiftedRanks(dimension,comm_proc,xmit_to_rank,recv_from_rank);
|
||||
|
||||
tcomms-=usecond();
|
||||
grid->Barrier();
|
||||
|
||||
#ifdef ACCELERATOR_AWARE_MPI
|
||||
grid->SendToRecvFrom((void *)&send_buf[0],
|
||||
xmit_to_rank,
|
||||
(void *)&recv_buf[0],
|
||||
recv_from_rank,
|
||||
bytes);
|
||||
#else
|
||||
// bouncy bouncy
|
||||
acceleratorCopyFromDevice(&send_buf[0],&hsend_buf[0],bytes);
|
||||
grid->SendToRecvFrom((void *)&hsend_buf[0],
|
||||
xmit_to_rank,
|
||||
(void *)&hrecv_buf[0],
|
||||
recv_from_rank,
|
||||
bytes);
|
||||
acceleratorCopyToDevice(&hrecv_buf[0],&recv_buf[0],bytes);
|
||||
#endif
|
||||
|
||||
xbytes+=bytes;
|
||||
grid->Barrier();
|
||||
tcomms+=usecond();
|
||||
@ -226,12 +243,16 @@ template<class vobj> void Cshift_comms_simd(Lattice<vobj> &ret,const Lattice<vo
|
||||
static std::vector<deviceVector<scalar_object> > recv_buf_extract; recv_buf_extract.resize(Nsimd);
|
||||
scalar_object * recv_buf_extract_mpi;
|
||||
scalar_object * send_buf_extract_mpi;
|
||||
|
||||
|
||||
for(int s=0;s<Nsimd;s++){
|
||||
send_buf_extract[s].resize(buffer_size);
|
||||
recv_buf_extract[s].resize(buffer_size);
|
||||
}
|
||||
|
||||
#ifndef ACCELERATOR_AWARE_MPI
|
||||
hostVector<scalar_object> hsend_buf; hsend_buf.resize(buffer_size);
|
||||
hostVector<scalar_object> hrecv_buf; hrecv_buf.resize(buffer_size);
|
||||
#endif
|
||||
|
||||
int bytes = buffer_size*sizeof(scalar_object);
|
||||
|
||||
ExtractPointerArray<scalar_object> pointers(Nsimd); //
|
||||
@ -283,11 +304,22 @@ template<class vobj> void Cshift_comms_simd(Lattice<vobj> &ret,const Lattice<vo
|
||||
|
||||
send_buf_extract_mpi = &send_buf_extract[nbr_lane][0];
|
||||
recv_buf_extract_mpi = &recv_buf_extract[i][0];
|
||||
#ifdef ACCELERATOR_AWARE_MPI
|
||||
grid->SendToRecvFrom((void *)send_buf_extract_mpi,
|
||||
xmit_to_rank,
|
||||
(void *)recv_buf_extract_mpi,
|
||||
recv_from_rank,
|
||||
bytes);
|
||||
#else
|
||||
// bouncy bouncy
|
||||
acceleratorCopyFromDevice((void *)send_buf_extract_mpi,(void *)&hsend_buf[0],bytes);
|
||||
grid->SendToRecvFrom((void *)&hsend_buf[0],
|
||||
xmit_to_rank,
|
||||
(void *)&hrecv_buf[0],
|
||||
recv_from_rank,
|
||||
bytes);
|
||||
acceleratorCopyToDevice((void *)&hrecv_buf[0],(void *)recv_buf_extract_mpi,bytes);
|
||||
#endif
|
||||
|
||||
xbytes+=bytes;
|
||||
grid->Barrier();
|
||||
@ -311,234 +343,6 @@ template<class vobj> void Cshift_comms_simd(Lattice<vobj> &ret,const Lattice<vo
|
||||
std::cout << GridLogPerformance << " Cshift BW "<<(2.0*xbytes)/tcomms<<" MB/s "<<2*xbytes<< " Bytes "<<std::endl;
|
||||
}
|
||||
}
|
||||
#else
|
||||
template<class vobj> void Cshift_comms(Lattice<vobj> &ret,const Lattice<vobj> &rhs,int dimension,int shift,int cbmask)
|
||||
{
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
|
||||
GridBase *grid=rhs.Grid();
|
||||
Lattice<vobj> temp(rhs.Grid());
|
||||
|
||||
int fd = rhs.Grid()->_fdimensions[dimension];
|
||||
int rd = rhs.Grid()->_rdimensions[dimension];
|
||||
int pd = rhs.Grid()->_processors[dimension];
|
||||
int simd_layout = rhs.Grid()->_simd_layout[dimension];
|
||||
int comm_dim = rhs.Grid()->_processors[dimension] >1 ;
|
||||
assert(simd_layout==1);
|
||||
assert(comm_dim==1);
|
||||
assert(shift>=0);
|
||||
assert(shift<fd);
|
||||
RealD tcopy=0.0;
|
||||
RealD tgather=0.0;
|
||||
RealD tscatter=0.0;
|
||||
RealD tcomms=0.0;
|
||||
uint64_t xbytes=0;
|
||||
|
||||
int buffer_size = rhs.Grid()->_slice_nblock[dimension]*rhs.Grid()->_slice_block[dimension];
|
||||
static cshiftVector<vobj> send_buf_v; send_buf_v.resize(buffer_size);
|
||||
static cshiftVector<vobj> recv_buf_v; recv_buf_v.resize(buffer_size);
|
||||
vobj *send_buf;
|
||||
vobj *recv_buf;
|
||||
{
|
||||
grid->ShmBufferFreeAll();
|
||||
size_t bytes = buffer_size*sizeof(vobj);
|
||||
send_buf=(vobj *)grid->ShmBufferMalloc(bytes);
|
||||
recv_buf=(vobj *)grid->ShmBufferMalloc(bytes);
|
||||
}
|
||||
|
||||
int cb= (cbmask==0x2)? Odd : Even;
|
||||
int sshift= rhs.Grid()->CheckerBoardShiftForCB(rhs.Checkerboard(),dimension,shift,cb);
|
||||
|
||||
for(int x=0;x<rd;x++){
|
||||
|
||||
int sx = (x+sshift)%rd;
|
||||
int comm_proc = ((x+sshift)/rd)%pd;
|
||||
|
||||
if (comm_proc==0) {
|
||||
|
||||
tcopy-=usecond();
|
||||
Copy_plane(ret,rhs,dimension,x,sx,cbmask);
|
||||
tcopy+=usecond();
|
||||
|
||||
} else {
|
||||
|
||||
int words = buffer_size;
|
||||
if (cbmask != 0x3) words=words>>1;
|
||||
|
||||
int bytes = words * sizeof(vobj);
|
||||
|
||||
tgather-=usecond();
|
||||
Gather_plane_simple (rhs,send_buf_v,dimension,sx,cbmask);
|
||||
tgather+=usecond();
|
||||
|
||||
// int rank = grid->_processor;
|
||||
int recv_from_rank;
|
||||
int xmit_to_rank;
|
||||
grid->ShiftedRanks(dimension,comm_proc,xmit_to_rank,recv_from_rank);
|
||||
|
||||
|
||||
tcomms-=usecond();
|
||||
// grid->Barrier();
|
||||
|
||||
acceleratorCopyDeviceToDevice((void *)&send_buf_v[0],(void *)&send_buf[0],bytes);
|
||||
grid->SendToRecvFrom((void *)&send_buf[0],
|
||||
xmit_to_rank,
|
||||
(void *)&recv_buf[0],
|
||||
recv_from_rank,
|
||||
bytes);
|
||||
xbytes+=bytes;
|
||||
acceleratorCopyDeviceToDevice((void *)&recv_buf[0],(void *)&recv_buf_v[0],bytes);
|
||||
|
||||
// grid->Barrier();
|
||||
tcomms+=usecond();
|
||||
|
||||
tscatter-=usecond();
|
||||
Scatter_plane_simple (ret,recv_buf_v,dimension,x,cbmask);
|
||||
tscatter+=usecond();
|
||||
}
|
||||
}
|
||||
if(Cshift_verbose){
|
||||
std::cout << GridLogPerformance << " Cshift copy "<<tcopy/1e3<<" ms"<<std::endl;
|
||||
std::cout << GridLogPerformance << " Cshift gather "<<tgather/1e3<<" ms"<<std::endl;
|
||||
std::cout << GridLogPerformance << " Cshift scatter "<<tscatter/1e3<<" ms"<<std::endl;
|
||||
std::cout << GridLogPerformance << " Cshift comm "<<tcomms/1e3<<" ms"<<std::endl;
|
||||
std::cout << GridLogPerformance << " Cshift BW "<<(2.0*xbytes)/tcomms<<" MB/s "<<2*xbytes<< " Bytes "<<std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
template<class vobj> void Cshift_comms_simd(Lattice<vobj> &ret,const Lattice<vobj> &rhs,int dimension,int shift,int cbmask)
|
||||
{
|
||||
GridBase *grid=rhs.Grid();
|
||||
const int Nsimd = grid->Nsimd();
|
||||
typedef typename vobj::vector_type vector_type;
|
||||
typedef typename vobj::scalar_object scalar_object;
|
||||
typedef typename vobj::scalar_type scalar_type;
|
||||
|
||||
int fd = grid->_fdimensions[dimension];
|
||||
int rd = grid->_rdimensions[dimension];
|
||||
int ld = grid->_ldimensions[dimension];
|
||||
int pd = grid->_processors[dimension];
|
||||
int simd_layout = grid->_simd_layout[dimension];
|
||||
int comm_dim = grid->_processors[dimension] >1 ;
|
||||
|
||||
//std::cout << "Cshift_comms_simd dim "<< dimension << " fd "<<fd<<" rd "<<rd
|
||||
// << " ld "<<ld<<" pd " << pd<<" simd_layout "<<simd_layout
|
||||
// << " comm_dim " << comm_dim << " cbmask " << cbmask <<std::endl;
|
||||
|
||||
assert(comm_dim==1);
|
||||
assert(simd_layout==2);
|
||||
assert(shift>=0);
|
||||
assert(shift<fd);
|
||||
RealD tcopy=0.0;
|
||||
RealD tgather=0.0;
|
||||
RealD tscatter=0.0;
|
||||
RealD tcomms=0.0;
|
||||
uint64_t xbytes=0;
|
||||
|
||||
int permute_type=grid->PermuteType(dimension);
|
||||
|
||||
///////////////////////////////////////////////
|
||||
// Simd direction uses an extract/merge pair
|
||||
///////////////////////////////////////////////
|
||||
int buffer_size = grid->_slice_nblock[dimension]*grid->_slice_block[dimension];
|
||||
// int words = sizeof(vobj)/sizeof(vector_type);
|
||||
|
||||
static std::vector<cshiftVector<scalar_object> > send_buf_extract; send_buf_extract.resize(Nsimd);
|
||||
static std::vector<cshiftVector<scalar_object> > recv_buf_extract; recv_buf_extract.resize(Nsimd);
|
||||
scalar_object * recv_buf_extract_mpi;
|
||||
scalar_object * send_buf_extract_mpi;
|
||||
{
|
||||
size_t bytes = sizeof(scalar_object)*buffer_size;
|
||||
grid->ShmBufferFreeAll();
|
||||
send_buf_extract_mpi = (scalar_object *)grid->ShmBufferMalloc(bytes);
|
||||
recv_buf_extract_mpi = (scalar_object *)grid->ShmBufferMalloc(bytes);
|
||||
}
|
||||
for(int s=0;s<Nsimd;s++){
|
||||
send_buf_extract[s].resize(buffer_size);
|
||||
recv_buf_extract[s].resize(buffer_size);
|
||||
}
|
||||
|
||||
int bytes = buffer_size*sizeof(scalar_object);
|
||||
|
||||
ExtractPointerArray<scalar_object> pointers(Nsimd); //
|
||||
ExtractPointerArray<scalar_object> rpointers(Nsimd); // received pointers
|
||||
|
||||
///////////////////////////////////////////
|
||||
// Work out what to send where
|
||||
///////////////////////////////////////////
|
||||
int cb = (cbmask==0x2)? Odd : Even;
|
||||
int sshift= grid->CheckerBoardShiftForCB(rhs.Checkerboard(),dimension,shift,cb);
|
||||
|
||||
// loop over outer coord planes orthog to dim
|
||||
for(int x=0;x<rd;x++){
|
||||
|
||||
// FIXME call local permute copy if none are offnode.
|
||||
for(int i=0;i<Nsimd;i++){
|
||||
pointers[i] = &send_buf_extract[i][0];
|
||||
}
|
||||
tgather-=usecond();
|
||||
int sx = (x+sshift)%rd;
|
||||
Gather_plane_extract(rhs,pointers,dimension,sx,cbmask);
|
||||
tgather+=usecond();
|
||||
|
||||
for(int i=0;i<Nsimd;i++){
|
||||
|
||||
int inner_bit = (Nsimd>>(permute_type+1));
|
||||
int ic= (i&inner_bit)? 1:0;
|
||||
|
||||
int my_coor = rd*ic + x;
|
||||
int nbr_coor = my_coor+sshift;
|
||||
int nbr_proc = ((nbr_coor)/ld) % pd;// relative shift in processors
|
||||
|
||||
int nbr_ic = (nbr_coor%ld)/rd; // inner coord of peer
|
||||
int nbr_ox = (nbr_coor%rd); // outer coord of peer
|
||||
int nbr_lane = (i&(~inner_bit));
|
||||
|
||||
int recv_from_rank;
|
||||
int xmit_to_rank;
|
||||
|
||||
if (nbr_ic) nbr_lane|=inner_bit;
|
||||
|
||||
assert (sx == nbr_ox);
|
||||
|
||||
if(nbr_proc){
|
||||
grid->ShiftedRanks(dimension,nbr_proc,xmit_to_rank,recv_from_rank);
|
||||
|
||||
tcomms-=usecond();
|
||||
// grid->Barrier();
|
||||
|
||||
acceleratorCopyDeviceToDevice((void *)&send_buf_extract[nbr_lane][0],(void *)send_buf_extract_mpi,bytes);
|
||||
grid->SendToRecvFrom((void *)send_buf_extract_mpi,
|
||||
xmit_to_rank,
|
||||
(void *)recv_buf_extract_mpi,
|
||||
recv_from_rank,
|
||||
bytes);
|
||||
acceleratorCopyDeviceToDevice((void *)recv_buf_extract_mpi,(void *)&recv_buf_extract[i][0],bytes);
|
||||
xbytes+=bytes;
|
||||
|
||||
// grid->Barrier();
|
||||
tcomms+=usecond();
|
||||
rpointers[i] = &recv_buf_extract[i][0];
|
||||
} else {
|
||||
rpointers[i] = &send_buf_extract[nbr_lane][0];
|
||||
}
|
||||
|
||||
}
|
||||
tscatter-=usecond();
|
||||
Scatter_plane_merge(ret,rpointers,dimension,x,cbmask);
|
||||
tscatter+=usecond();
|
||||
|
||||
}
|
||||
if(Cshift_verbose){
|
||||
std::cout << GridLogPerformance << " Cshift (s) copy "<<tcopy/1e3<<" ms"<<std::endl;
|
||||
std::cout << GridLogPerformance << " Cshift (s) gather "<<tgather/1e3<<" ms"<<std::endl;
|
||||
std::cout << GridLogPerformance << " Cshift (s) scatter "<<tscatter/1e3<<" ms"<<std::endl;
|
||||
std::cout << GridLogPerformance << " Cshift (s) comm "<<tcomms/1e3<<" ms"<<std::endl;
|
||||
std::cout << GridLogPerformance << " Cshift BW "<<(2.0*xbytes)/tcomms<<" MB/s"<<std::endl;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
||||
|
@ -55,7 +55,7 @@ inline void sliceSumReduction_cub_small(const vobj *Data,
|
||||
d_offsets = static_cast<int*>(acceleratorAllocDevice((rd+1)*sizeof(int)));
|
||||
|
||||
//copy offsets to device
|
||||
acceleratorCopyToDeviceAsync(&offsets[0],d_offsets,sizeof(int)*(rd+1),computeStream);
|
||||
acceleratorCopyToDeviceAsynch(&offsets[0],d_offsets,sizeof(int)*(rd+1),computeStream);
|
||||
|
||||
|
||||
gpuError_t gpuErr = gpucub::DeviceSegmentedReduce::Reduce(temp_storage_array, temp_storage_bytes, rb_p,d_out, rd, d_offsets, d_offsets+1, ::gpucub::Sum(), zero_init, computeStream);
|
||||
@ -88,7 +88,7 @@ inline void sliceSumReduction_cub_small(const vobj *Data,
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
acceleratorCopyFromDeviceAsync(d_out,&lvSum[0],rd*sizeof(vobj),computeStream);
|
||||
acceleratorCopyFromDeviceAsynch(d_out,&lvSum[0],rd*sizeof(vobj),computeStream);
|
||||
|
||||
//sync after copy
|
||||
accelerator_barrier();
|
||||
|
@ -466,6 +466,12 @@ public:
|
||||
static deviceVector<vobj> recv_buf;
|
||||
send_buf.resize(buffer_size*2*depth);
|
||||
recv_buf.resize(buffer_size*2*depth);
|
||||
#ifndef ACCELERATOR_AWARE_MPI
|
||||
static hostVector<vobj> hsend_buf;
|
||||
static hostVector<vobj> hrecv_buf;
|
||||
hsend_buf.resize(buffer_size*2*depth);
|
||||
hrecv_buf.resize(buffer_size*2*depth);
|
||||
#endif
|
||||
|
||||
std::vector<MpiCommsRequest_t> fwd_req;
|
||||
std::vector<MpiCommsRequest_t> bwd_req;
|
||||
@ -495,9 +501,16 @@ public:
|
||||
t_gather+=usecond()-t;
|
||||
|
||||
t=usecond();
|
||||
#ifdef ACCELERATOR_AWARE_MPI
|
||||
grid->SendToRecvFromBegin(fwd_req,
|
||||
(void *)&send_buf[d*buffer_size], xmit_to_rank,
|
||||
(void *)&recv_buf[d*buffer_size], recv_from_rank, bytes, tag);
|
||||
#else
|
||||
acceleratorCopyFromDevice(&send_buf[d*buffer_size],&hsend_buf[d*buffer_size],bytes);
|
||||
grid->SendToRecvFromBegin(fwd_req,
|
||||
(void *)&hsend_buf[d*buffer_size], xmit_to_rank,
|
||||
(void *)&hrecv_buf[d*buffer_size], recv_from_rank, bytes, tag);
|
||||
#endif
|
||||
t_comms+=usecond()-t;
|
||||
}
|
||||
for ( int d=0;d < depth ; d ++ ) {
|
||||
@ -508,9 +521,16 @@ public:
|
||||
t_gather+= usecond() - t;
|
||||
|
||||
t=usecond();
|
||||
#ifdef ACCELERATOR_AWARE_MPI
|
||||
grid->SendToRecvFromBegin(bwd_req,
|
||||
(void *)&send_buf[(d+depth)*buffer_size], recv_from_rank,
|
||||
(void *)&recv_buf[(d+depth)*buffer_size], xmit_to_rank, bytes,tag);
|
||||
#else
|
||||
acceleratorCopyFromDevice(&send_buf[(d+depth)*buffer_size],&hsend_buf[(d+depth)*buffer_size],bytes);
|
||||
grid->SendToRecvFromBegin(bwd_req,
|
||||
(void *)&hsend_buf[(d+depth)*buffer_size], recv_from_rank,
|
||||
(void *)&hrecv_buf[(d+depth)*buffer_size], xmit_to_rank, bytes,tag);
|
||||
#endif
|
||||
t_comms+=usecond()-t;
|
||||
}
|
||||
|
||||
@ -533,8 +553,13 @@ public:
|
||||
|
||||
t=usecond();
|
||||
grid->CommsComplete(fwd_req);
|
||||
#ifndef ACCELERATOR_AWARE_MPI
|
||||
for ( int d=0;d < depth ; d ++ ) {
|
||||
acceleratorCopyToDevice(&hrecv_buf[d*buffer_size],&recv_buf[d*buffer_size],bytes);
|
||||
}
|
||||
#endif
|
||||
t_comms+= usecond() - t;
|
||||
|
||||
|
||||
t=usecond();
|
||||
for ( int d=0;d < depth ; d ++ ) {
|
||||
ScatterSlice(recv_buf,to,nld-depth+d,dimension,plane*buffer_size); plane++;
|
||||
@ -543,6 +568,11 @@ public:
|
||||
|
||||
t=usecond();
|
||||
grid->CommsComplete(bwd_req);
|
||||
#ifndef ACCELERATOR_AWARE_MPI
|
||||
for ( int d=0;d < depth ; d ++ ) {
|
||||
acceleratorCopyToDevice(&hrecv_buf[(d+depth)*buffer_size],&recv_buf[(d+depth)*buffer_size],bytes);
|
||||
}
|
||||
#endif
|
||||
t_comms+= usecond() - t;
|
||||
|
||||
t=usecond();
|
||||
|
196
Grid/qcd/action/fermion/CompactWilsonCloverFermion5D.h
Normal file
196
Grid/qcd/action/fermion/CompactWilsonCloverFermion5D.h
Normal file
@ -0,0 +1,196 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/CompactWilsonCloverFermion5D.h
|
||||
|
||||
Copyright (C) 2020 - 2025
|
||||
|
||||
Author: Daniel Richtmann <daniel.richtmann@gmail.com>
|
||||
Author: Nils Meyer <nils.meyer@ur.de>
|
||||
Author: Christoph Lehner <christoph@lhnr.de>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <Grid/qcd/action/fermion/WilsonFermion5D.h>
|
||||
#include <Grid/qcd/action/fermion/WilsonCloverTypes.h>
|
||||
#include <Grid/qcd/action/fermion/WilsonCloverHelpers.h>
|
||||
#include <Grid/qcd/action/fermion/CloverHelpers.h>
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
// see Grid/qcd/action/fermion/CompactWilsonCloverFermion.h for description
|
||||
|
||||
template<class Impl, class CloverHelpers>
|
||||
class CompactWilsonCloverFermion5D : public WilsonFermion5D<Impl>,
|
||||
public WilsonCloverHelpers<Impl>,
|
||||
public CompactWilsonCloverHelpers<Impl> {
|
||||
/////////////////////////////////////////////
|
||||
// Sizes
|
||||
/////////////////////////////////////////////
|
||||
|
||||
public:
|
||||
|
||||
INHERIT_COMPACT_CLOVER_SIZES(Impl);
|
||||
|
||||
/////////////////////////////////////////////
|
||||
// Type definitions
|
||||
/////////////////////////////////////////////
|
||||
|
||||
public:
|
||||
|
||||
INHERIT_IMPL_TYPES(Impl);
|
||||
INHERIT_CLOVER_TYPES(Impl);
|
||||
INHERIT_COMPACT_CLOVER_TYPES(Impl);
|
||||
|
||||
typedef WilsonFermion5D<Impl> WilsonBase;
|
||||
typedef WilsonCloverHelpers<Impl> Helpers;
|
||||
typedef CompactWilsonCloverHelpers<Impl> CompactHelpers;
|
||||
|
||||
/////////////////////////////////////////////
|
||||
// Constructors
|
||||
/////////////////////////////////////////////
|
||||
|
||||
public:
|
||||
|
||||
CompactWilsonCloverFermion5D(GaugeField& _Umu,
|
||||
GridCartesian &FiveDimGrid,
|
||||
GridRedBlackCartesian &FiveDimRedBlackGrid,
|
||||
GridCartesian &FourDimGrid,
|
||||
GridRedBlackCartesian &FourDimRedBlackGrid,
|
||||
const RealD _mass,
|
||||
const RealD _csw_r = 0.0,
|
||||
const RealD _csw_t = 0.0,
|
||||
const RealD _cF = 1.0,
|
||||
const ImplParams& impl_p = ImplParams());
|
||||
|
||||
/////////////////////////////////////////////
|
||||
// Member functions (implementing interface)
|
||||
/////////////////////////////////////////////
|
||||
|
||||
public:
|
||||
|
||||
virtual void Instantiatable() {};
|
||||
int ConstEE() override { return 0; };
|
||||
int isTrivialEE() override { return 0; };
|
||||
|
||||
void Dhop(const FermionField& in, FermionField& out, int dag) override;
|
||||
|
||||
void DhopOE(const FermionField& in, FermionField& out, int dag) override;
|
||||
|
||||
void DhopEO(const FermionField& in, FermionField& out, int dag) override;
|
||||
|
||||
void DhopDir(const FermionField& in, FermionField& out, int dir, int disp) override;
|
||||
|
||||
void DhopDirAll(const FermionField& in, std::vector<FermionField>& out) /* override */;
|
||||
|
||||
void M(const FermionField& in, FermionField& out) override;
|
||||
|
||||
void Mdag(const FermionField& in, FermionField& out) override;
|
||||
|
||||
void Meooe(const FermionField& in, FermionField& out) override;
|
||||
|
||||
void MeooeDag(const FermionField& in, FermionField& out) override;
|
||||
|
||||
void Mooee(const FermionField& in, FermionField& out) override;
|
||||
|
||||
void MooeeDag(const FermionField& in, FermionField& out) override;
|
||||
|
||||
void MooeeInv(const FermionField& in, FermionField& out) override;
|
||||
|
||||
void MooeeInvDag(const FermionField& in, FermionField& out) override;
|
||||
|
||||
void Mdir(const FermionField& in, FermionField& out, int dir, int disp) override;
|
||||
|
||||
void MdirAll(const FermionField& in, std::vector<FermionField>& out) override;
|
||||
|
||||
void MDeriv(GaugeField& force, const FermionField& X, const FermionField& Y, int dag) override;
|
||||
|
||||
void MooDeriv(GaugeField& mat, const FermionField& U, const FermionField& V, int dag) override;
|
||||
|
||||
void MeeDeriv(GaugeField& mat, const FermionField& U, const FermionField& V, int dag) override;
|
||||
|
||||
/////////////////////////////////////////////
|
||||
// Member functions (internals)
|
||||
/////////////////////////////////////////////
|
||||
|
||||
void MooeeInternal(const FermionField& in,
|
||||
FermionField& out,
|
||||
const CloverDiagonalField& diagonal,
|
||||
const CloverTriangleField& triangle);
|
||||
|
||||
/////////////////////////////////////////////
|
||||
// Helpers
|
||||
/////////////////////////////////////////////
|
||||
|
||||
void ImportGauge(const GaugeField& _Umu) override;
|
||||
|
||||
/////////////////////////////////////////////
|
||||
// Helpers
|
||||
/////////////////////////////////////////////
|
||||
|
||||
private:
|
||||
|
||||
template<class Field>
|
||||
const MaskField* getCorrectMaskField(const Field &in) const {
|
||||
if(in.Grid()->_isCheckerBoarded) {
|
||||
if(in.Checkerboard() == Odd) {
|
||||
return &this->BoundaryMaskOdd;
|
||||
} else {
|
||||
return &this->BoundaryMaskEven;
|
||||
}
|
||||
} else {
|
||||
return &this->BoundaryMask;
|
||||
}
|
||||
}
|
||||
|
||||
template<class Field>
|
||||
void ApplyBoundaryMask(Field& f) {
|
||||
const MaskField* m = getCorrectMaskField(f); assert(m != nullptr);
|
||||
assert(m != nullptr);
|
||||
CompactHelpers::ApplyBoundaryMask(f, *m);
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////
|
||||
// Member Data
|
||||
/////////////////////////////////////////////
|
||||
|
||||
public:
|
||||
|
||||
RealD csw_r;
|
||||
RealD csw_t;
|
||||
RealD cF;
|
||||
int n_rhs;
|
||||
|
||||
bool fixedBoundaries;
|
||||
|
||||
CloverDiagonalField Diagonal, DiagonalEven, DiagonalOdd;
|
||||
CloverDiagonalField DiagonalInv, DiagonalInvEven, DiagonalInvOdd;
|
||||
|
||||
CloverTriangleField Triangle, TriangleEven, TriangleOdd;
|
||||
CloverTriangleField TriangleInv, TriangleInvEven, TriangleInvOdd;
|
||||
|
||||
FermionField Tmp;
|
||||
|
||||
MaskField BoundaryMask, BoundaryMaskEven, BoundaryMaskOdd;
|
||||
};
|
||||
|
||||
NAMESPACE_END(Grid);
|
@ -55,6 +55,7 @@ NAMESPACE_CHECK(Wilson);
|
||||
NAMESPACE_CHECK(WilsonTM);
|
||||
#include <Grid/qcd/action/fermion/WilsonCloverFermion.h> // 4d wilson clover fermions
|
||||
#include <Grid/qcd/action/fermion/CompactWilsonCloverFermion.h> // 4d compact wilson clover fermions
|
||||
#include <Grid/qcd/action/fermion/CompactWilsonCloverFermion5D.h> // 5d compact wilson clover fermions
|
||||
NAMESPACE_CHECK(WilsonClover);
|
||||
#include <Grid/qcd/action/fermion/WilsonFermion5D.h> // 5d base used by all 5d overlap types
|
||||
NAMESPACE_CHECK(Wilson5D);
|
||||
@ -164,12 +165,17 @@ typedef WilsonClover<WilsonTwoIndexAntiSymmetricImplD> WilsonCloverTwoIndexAntiS
|
||||
|
||||
// Compact Clover fermions
|
||||
template <typename WImpl> using CompactWilsonClover = CompactWilsonCloverFermion<WImpl, CompactCloverHelpers<WImpl>>;
|
||||
template <typename WImpl> using CompactWilsonClover5D = CompactWilsonCloverFermion5D<WImpl, CompactCloverHelpers<WImpl>>;
|
||||
template <typename WImpl> using CompactWilsonExpClover = CompactWilsonCloverFermion<WImpl, CompactExpCloverHelpers<WImpl>>;
|
||||
|
||||
typedef CompactWilsonClover<WilsonImplD2> CompactWilsonCloverFermionD2;
|
||||
typedef CompactWilsonClover<WilsonImplF> CompactWilsonCloverFermionF;
|
||||
typedef CompactWilsonClover<WilsonImplD> CompactWilsonCloverFermionD;
|
||||
|
||||
typedef CompactWilsonClover5D<WilsonImplD2> CompactWilsonCloverFermion5DD2;
|
||||
typedef CompactWilsonClover5D<WilsonImplF> CompactWilsonCloverFermion5DF;
|
||||
typedef CompactWilsonClover5D<WilsonImplD> CompactWilsonCloverFermion5DD;
|
||||
|
||||
typedef CompactWilsonExpClover<WilsonImplD2> CompactWilsonExpCloverFermionD2;
|
||||
typedef CompactWilsonExpClover<WilsonImplF> CompactWilsonExpCloverFermionF;
|
||||
typedef CompactWilsonExpClover<WilsonImplD> CompactWilsonExpCloverFermionD;
|
||||
|
@ -484,6 +484,11 @@ public:
|
||||
this->face_table_computed=1;
|
||||
assert(this->u_comm_offset==this->_unified_buffer_size);
|
||||
accelerator_barrier();
|
||||
#ifdef NVLINK_GET
|
||||
this->_grid->StencilBarrier(); // He can now get mu local gather, I can get his
|
||||
// Synch shared memory on a single nodes; could use an asynchronous barrier here and defer check
|
||||
// Or issue barrier AFTER the DMA is running
|
||||
#endif
|
||||
}
|
||||
|
||||
};
|
||||
|
@ -91,13 +91,13 @@ public:
|
||||
virtual void Mdag (const FermionField &in, FermionField &out){assert(0);};
|
||||
|
||||
// half checkerboard operations; leave unimplemented as abstract for now
|
||||
virtual void Meooe (const FermionField &in, FermionField &out){assert(0);};
|
||||
virtual void Mooee (const FermionField &in, FermionField &out){assert(0);};
|
||||
virtual void MooeeInv (const FermionField &in, FermionField &out){assert(0);};
|
||||
virtual void Meooe (const FermionField &in, FermionField &out);
|
||||
virtual void Mooee (const FermionField &in, FermionField &out);
|
||||
virtual void MooeeInv (const FermionField &in, FermionField &out);
|
||||
|
||||
virtual void MeooeDag (const FermionField &in, FermionField &out){assert(0);};
|
||||
virtual void MooeeDag (const FermionField &in, FermionField &out){assert(0);};
|
||||
virtual void MooeeInvDag (const FermionField &in, FermionField &out){assert(0);};
|
||||
virtual void MeooeDag (const FermionField &in, FermionField &out);
|
||||
virtual void MooeeDag (const FermionField &in, FermionField &out);
|
||||
virtual void MooeeInvDag (const FermionField &in, FermionField &out);
|
||||
virtual void Mdir (const FermionField &in, FermionField &out,int dir,int disp){assert(0);}; // case by case Wilson, Clover, Cayley, ContFrac, PartFrac
|
||||
virtual void MdirAll(const FermionField &in, std::vector<FermionField> &out){assert(0);}; // case by case Wilson, Clover, Cayley, ContFrac, PartFrac
|
||||
|
||||
|
@ -0,0 +1,376 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/qcd/action/fermion/CompactWilsonCloverFermion5DImplementation.h
|
||||
|
||||
Copyright (C) 2017 - 2025
|
||||
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
|
||||
Author: Daniel Richtmann <daniel.richtmann@gmail.com>
|
||||
Author: Christoph Lehner <christoph@lhnr.de>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#include <Grid/Grid.h>
|
||||
#include <Grid/qcd/spin/Dirac.h>
|
||||
#include <Grid/qcd/action/fermion/CompactWilsonCloverFermion5D.h>
|
||||
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
template<class Impl, class CloverHelpers>
|
||||
CompactWilsonCloverFermion5D<Impl, CloverHelpers>::CompactWilsonCloverFermion5D(GaugeField& _Umu,
|
||||
GridCartesian &FiveDimGrid,
|
||||
GridRedBlackCartesian &FiveDimRedBlackGrid,
|
||||
GridCartesian &FourDimGrid,
|
||||
GridRedBlackCartesian &FourDimRedBlackGrid,
|
||||
const RealD _mass,
|
||||
const RealD _csw_r,
|
||||
const RealD _csw_t,
|
||||
const RealD _cF,
|
||||
const ImplParams& impl_p)
|
||||
: WilsonBase(_Umu, FiveDimGrid, FiveDimRedBlackGrid, FourDimGrid, FourDimRedBlackGrid, _mass, impl_p)
|
||||
, csw_r(_csw_r)
|
||||
, csw_t(_csw_t)
|
||||
, cF(_cF)
|
||||
, fixedBoundaries(impl_p.boundary_phases[Nd-1] == 0.0)
|
||||
, Diagonal(&FourDimGrid), Triangle(&FourDimGrid)
|
||||
, DiagonalEven(&FourDimRedBlackGrid), TriangleEven(&FourDimRedBlackGrid)
|
||||
, DiagonalOdd(&FourDimRedBlackGrid), TriangleOdd(&FourDimRedBlackGrid)
|
||||
, DiagonalInv(&FourDimGrid), TriangleInv(&FourDimGrid)
|
||||
, DiagonalInvEven(&FourDimRedBlackGrid), TriangleInvEven(&FourDimRedBlackGrid)
|
||||
, DiagonalInvOdd(&FourDimRedBlackGrid), TriangleInvOdd(&FourDimRedBlackGrid)
|
||||
, Tmp(&FiveDimGrid)
|
||||
, BoundaryMask(&FiveDimGrid)
|
||||
, BoundaryMaskEven(&FiveDimRedBlackGrid), BoundaryMaskOdd(&FiveDimRedBlackGrid)
|
||||
{
|
||||
assert(Nd == 4 && Nc == 3 && Ns == 4 && Impl::Dimension == 3);
|
||||
|
||||
csw_r *= 0.5;
|
||||
csw_t *= 0.5;
|
||||
//if (clover_anisotropy.isAnisotropic)
|
||||
// csw_r /= clover_anisotropy.xi_0;
|
||||
|
||||
ImportGauge(_Umu);
|
||||
if (fixedBoundaries) {
|
||||
this->BoundaryMaskEven.Checkerboard() = Even;
|
||||
this->BoundaryMaskOdd.Checkerboard() = Odd;
|
||||
CompactHelpers::SetupMasks(this->BoundaryMask, this->BoundaryMaskEven, this->BoundaryMaskOdd);
|
||||
}
|
||||
}
|
||||
|
||||
template<class Impl, class CloverHelpers>
|
||||
void CompactWilsonCloverFermion5D<Impl, CloverHelpers>::Dhop(const FermionField& in, FermionField& out, int dag) {
|
||||
WilsonBase::Dhop(in, out, dag);
|
||||
if(fixedBoundaries) ApplyBoundaryMask(out);
|
||||
}
|
||||
|
||||
template<class Impl, class CloverHelpers>
|
||||
void CompactWilsonCloverFermion5D<Impl, CloverHelpers>::DhopOE(const FermionField& in, FermionField& out, int dag) {
|
||||
WilsonBase::DhopOE(in, out, dag);
|
||||
if(fixedBoundaries) ApplyBoundaryMask(out);
|
||||
}
|
||||
|
||||
template<class Impl, class CloverHelpers>
|
||||
void CompactWilsonCloverFermion5D<Impl, CloverHelpers>::DhopEO(const FermionField& in, FermionField& out, int dag) {
|
||||
WilsonBase::DhopEO(in, out, dag);
|
||||
if(fixedBoundaries) ApplyBoundaryMask(out);
|
||||
}
|
||||
|
||||
template<class Impl, class CloverHelpers>
|
||||
void CompactWilsonCloverFermion5D<Impl, CloverHelpers>::DhopDir(const FermionField& in, FermionField& out, int dir, int disp) {
|
||||
WilsonBase::DhopDir(in, out, dir, disp);
|
||||
if(this->fixedBoundaries) ApplyBoundaryMask(out);
|
||||
}
|
||||
|
||||
template<class Impl, class CloverHelpers>
|
||||
void CompactWilsonCloverFermion5D<Impl, CloverHelpers>::DhopDirAll(const FermionField& in, std::vector<FermionField>& out) {
|
||||
WilsonBase::DhopDirAll(in, out);
|
||||
if(this->fixedBoundaries) {
|
||||
for(auto& o : out) ApplyBoundaryMask(o);
|
||||
}
|
||||
}
|
||||
|
||||
template<class Impl, class CloverHelpers>
|
||||
void CompactWilsonCloverFermion5D<Impl, CloverHelpers>::M(const FermionField& in, FermionField& out) {
|
||||
out.Checkerboard() = in.Checkerboard();
|
||||
WilsonBase::Dhop(in, out, DaggerNo); // call base to save applying bc
|
||||
Mooee(in, Tmp);
|
||||
axpy(out, 1.0, out, Tmp);
|
||||
if(fixedBoundaries) ApplyBoundaryMask(out);
|
||||
}
|
||||
|
||||
template<class Impl, class CloverHelpers>
|
||||
void CompactWilsonCloverFermion5D<Impl, CloverHelpers>::Mdag(const FermionField& in, FermionField& out) {
|
||||
out.Checkerboard() = in.Checkerboard();
|
||||
WilsonBase::Dhop(in, out, DaggerYes); // call base to save applying bc
|
||||
MooeeDag(in, Tmp);
|
||||
axpy(out, 1.0, out, Tmp);
|
||||
if(fixedBoundaries) ApplyBoundaryMask(out);
|
||||
}
|
||||
|
||||
template<class Impl, class CloverHelpers>
|
||||
void CompactWilsonCloverFermion5D<Impl, CloverHelpers>::Meooe(const FermionField& in, FermionField& out) {
|
||||
WilsonBase::Meooe(in, out);
|
||||
if(fixedBoundaries) ApplyBoundaryMask(out);
|
||||
}
|
||||
|
||||
template<class Impl, class CloverHelpers>
|
||||
void CompactWilsonCloverFermion5D<Impl, CloverHelpers>::MeooeDag(const FermionField& in, FermionField& out) {
|
||||
WilsonBase::MeooeDag(in, out);
|
||||
if(fixedBoundaries) ApplyBoundaryMask(out);
|
||||
}
|
||||
|
||||
template<class Impl, class CloverHelpers>
|
||||
void CompactWilsonCloverFermion5D<Impl, CloverHelpers>::Mooee(const FermionField& in, FermionField& out) {
|
||||
if(in.Grid()->_isCheckerBoarded) {
|
||||
if(in.Checkerboard() == Odd) {
|
||||
MooeeInternal(in, out, DiagonalOdd, TriangleOdd);
|
||||
} else {
|
||||
MooeeInternal(in, out, DiagonalEven, TriangleEven);
|
||||
}
|
||||
} else {
|
||||
MooeeInternal(in, out, Diagonal, Triangle);
|
||||
}
|
||||
if(fixedBoundaries) ApplyBoundaryMask(out);
|
||||
}
|
||||
|
||||
template<class Impl, class CloverHelpers>
|
||||
void CompactWilsonCloverFermion5D<Impl, CloverHelpers>::MooeeDag(const FermionField& in, FermionField& out) {
|
||||
Mooee(in, out); // blocks are hermitian
|
||||
}
|
||||
|
||||
template<class Impl, class CloverHelpers>
|
||||
void CompactWilsonCloverFermion5D<Impl, CloverHelpers>::MooeeInv(const FermionField& in, FermionField& out) {
|
||||
if(in.Grid()->_isCheckerBoarded) {
|
||||
if(in.Checkerboard() == Odd) {
|
||||
MooeeInternal(in, out, DiagonalInvOdd, TriangleInvOdd);
|
||||
} else {
|
||||
MooeeInternal(in, out, DiagonalInvEven, TriangleInvEven);
|
||||
}
|
||||
} else {
|
||||
MooeeInternal(in, out, DiagonalInv, TriangleInv);
|
||||
}
|
||||
if(fixedBoundaries) ApplyBoundaryMask(out);
|
||||
}
|
||||
|
||||
template<class Impl, class CloverHelpers>
|
||||
void CompactWilsonCloverFermion5D<Impl, CloverHelpers>::MooeeInvDag(const FermionField& in, FermionField& out) {
|
||||
MooeeInv(in, out); // blocks are hermitian
|
||||
}
|
||||
|
||||
template<class Impl, class CloverHelpers>
|
||||
void CompactWilsonCloverFermion5D<Impl, CloverHelpers>::Mdir(const FermionField& in, FermionField& out, int dir, int disp) {
|
||||
DhopDir(in, out, dir, disp);
|
||||
}
|
||||
|
||||
template<class Impl, class CloverHelpers>
|
||||
void CompactWilsonCloverFermion5D<Impl, CloverHelpers>::MdirAll(const FermionField& in, std::vector<FermionField>& out) {
|
||||
DhopDirAll(in, out);
|
||||
}
|
||||
|
||||
template<class Impl, class CloverHelpers>
|
||||
void CompactWilsonCloverFermion5D<Impl, CloverHelpers>::MDeriv(GaugeField& force, const FermionField& X, const FermionField& Y, int dag) {
|
||||
assert(!fixedBoundaries); // TODO check for changes required for open bc
|
||||
|
||||
// NOTE: code copied from original clover term
|
||||
conformable(X.Grid(), Y.Grid());
|
||||
conformable(X.Grid(), force.Grid());
|
||||
GaugeLinkField force_mu(force.Grid()), lambda(force.Grid());
|
||||
GaugeField clover_force(force.Grid());
|
||||
PropagatorField Lambda(force.Grid());
|
||||
|
||||
// Guido: Here we are hitting some performance issues:
|
||||
// need to extract the components of the DoubledGaugeField
|
||||
// for each call
|
||||
// Possible solution
|
||||
// Create a vector object to store them? (cons: wasting space)
|
||||
std::vector<GaugeLinkField> U(Nd, this->Umu.Grid());
|
||||
|
||||
Impl::extractLinkField(U, this->Umu);
|
||||
|
||||
force = Zero();
|
||||
// Derivative of the Wilson hopping term
|
||||
this->DhopDeriv(force, X, Y, dag);
|
||||
|
||||
///////////////////////////////////////////////////////////
|
||||
// Clover term derivative
|
||||
///////////////////////////////////////////////////////////
|
||||
Impl::outerProductImpl(Lambda, X, Y);
|
||||
//std::cout << "Lambda:" << Lambda << std::endl;
|
||||
|
||||
Gamma::Algebra sigma[] = {
|
||||
Gamma::Algebra::SigmaXY,
|
||||
Gamma::Algebra::SigmaXZ,
|
||||
Gamma::Algebra::SigmaXT,
|
||||
Gamma::Algebra::MinusSigmaXY,
|
||||
Gamma::Algebra::SigmaYZ,
|
||||
Gamma::Algebra::SigmaYT,
|
||||
Gamma::Algebra::MinusSigmaXZ,
|
||||
Gamma::Algebra::MinusSigmaYZ,
|
||||
Gamma::Algebra::SigmaZT,
|
||||
Gamma::Algebra::MinusSigmaXT,
|
||||
Gamma::Algebra::MinusSigmaYT,
|
||||
Gamma::Algebra::MinusSigmaZT};
|
||||
|
||||
/*
|
||||
sigma_{\mu \nu}=
|
||||
| 0 sigma[0] sigma[1] sigma[2] |
|
||||
| sigma[3] 0 sigma[4] sigma[5] |
|
||||
| sigma[6] sigma[7] 0 sigma[8] |
|
||||
| sigma[9] sigma[10] sigma[11] 0 |
|
||||
*/
|
||||
|
||||
int count = 0;
|
||||
clover_force = Zero();
|
||||
for (int mu = 0; mu < 4; mu++)
|
||||
{
|
||||
force_mu = Zero();
|
||||
for (int nu = 0; nu < 4; nu++)
|
||||
{
|
||||
if (mu == nu)
|
||||
continue;
|
||||
|
||||
RealD factor;
|
||||
if (nu == 4 || mu == 4)
|
||||
{
|
||||
factor = 2.0 * csw_t;
|
||||
}
|
||||
else
|
||||
{
|
||||
factor = 2.0 * csw_r;
|
||||
}
|
||||
PropagatorField Slambda = Gamma(sigma[count]) * Lambda; // sigma checked
|
||||
Impl::TraceSpinImpl(lambda, Slambda); // traceSpin ok
|
||||
force_mu -= factor*CloverHelpers::Cmunu(U, lambda, mu, nu); // checked
|
||||
count++;
|
||||
}
|
||||
|
||||
pokeLorentz(clover_force, U[mu] * force_mu, mu);
|
||||
}
|
||||
//clover_force *= csw;
|
||||
force += clover_force;
|
||||
}
|
||||
|
||||
template<class Impl, class CloverHelpers>
|
||||
void CompactWilsonCloverFermion5D<Impl, CloverHelpers>::MooDeriv(GaugeField& mat, const FermionField& U, const FermionField& V, int dag) {
|
||||
assert(0);
|
||||
}
|
||||
|
||||
template<class Impl, class CloverHelpers>
|
||||
void CompactWilsonCloverFermion5D<Impl, CloverHelpers>::MeeDeriv(GaugeField& mat, const FermionField& U, const FermionField& V, int dag) {
|
||||
assert(0);
|
||||
}
|
||||
|
||||
template<class Impl, class CloverHelpers>
|
||||
void CompactWilsonCloverFermion5D<Impl, CloverHelpers>::MooeeInternal(const FermionField& in,
|
||||
FermionField& out,
|
||||
const CloverDiagonalField& diagonal,
|
||||
const CloverTriangleField& triangle) {
|
||||
assert(in.Checkerboard() == Odd || in.Checkerboard() == Even);
|
||||
out.Checkerboard() = in.Checkerboard();
|
||||
conformable(in, out);
|
||||
CompactHelpers::MooeeKernel(diagonal.oSites(), this->Ls, in, out, diagonal, triangle);
|
||||
}
|
||||
|
||||
template<class Impl, class CloverHelpers>
|
||||
void CompactWilsonCloverFermion5D<Impl, CloverHelpers>::ImportGauge(const GaugeField& _Umu) {
|
||||
// NOTE: parts copied from original implementation
|
||||
|
||||
// Import gauge into base class
|
||||
double t0 = usecond();
|
||||
WilsonBase::ImportGauge(_Umu); // NOTE: called here and in wilson constructor -> performed twice, but can't avoid that
|
||||
|
||||
// Initialize temporary variables
|
||||
double t1 = usecond();
|
||||
conformable(_Umu.Grid(), this->GaugeGrid());
|
||||
GridBase* grid = _Umu.Grid();
|
||||
typename Impl::GaugeLinkField Bx(grid), By(grid), Bz(grid), Ex(grid), Ey(grid), Ez(grid);
|
||||
CloverField TmpOriginal(grid);
|
||||
CloverField TmpInverse(grid);
|
||||
|
||||
// Compute the field strength terms mu>nu
|
||||
double t2 = usecond();
|
||||
WilsonLoops<Impl>::FieldStrength(Bx, _Umu, Zdir, Ydir);
|
||||
WilsonLoops<Impl>::FieldStrength(By, _Umu, Zdir, Xdir);
|
||||
WilsonLoops<Impl>::FieldStrength(Bz, _Umu, Ydir, Xdir);
|
||||
WilsonLoops<Impl>::FieldStrength(Ex, _Umu, Tdir, Xdir);
|
||||
WilsonLoops<Impl>::FieldStrength(Ey, _Umu, Tdir, Ydir);
|
||||
WilsonLoops<Impl>::FieldStrength(Ez, _Umu, Tdir, Zdir);
|
||||
|
||||
// Compute the Clover Operator acting on Colour and Spin
|
||||
// multiply here by the clover coefficients for the anisotropy
|
||||
double t3 = usecond();
|
||||
TmpOriginal = Helpers::fillCloverYZ(Bx) * csw_r;
|
||||
TmpOriginal += Helpers::fillCloverXZ(By) * csw_r;
|
||||
TmpOriginal += Helpers::fillCloverXY(Bz) * csw_r;
|
||||
TmpOriginal += Helpers::fillCloverXT(Ex) * csw_t;
|
||||
TmpOriginal += Helpers::fillCloverYT(Ey) * csw_t;
|
||||
TmpOriginal += Helpers::fillCloverZT(Ez) * csw_t;
|
||||
|
||||
// Instantiate the clover term
|
||||
// - In case of the standard clover the mass term is added
|
||||
// - In case of the exponential clover the clover term is exponentiated
|
||||
double t4 = usecond();
|
||||
CloverHelpers::InstantiateClover(TmpOriginal, TmpInverse, csw_t, 4.0 + this->M5 /*this->diag_mass*/);
|
||||
|
||||
// Convert the data layout of the clover term
|
||||
double t5 = usecond();
|
||||
CompactHelpers::ConvertLayout(TmpOriginal, Diagonal, Triangle);
|
||||
|
||||
// Modify the clover term at the temporal boundaries in case of open boundary conditions
|
||||
double t6 = usecond();
|
||||
if(fixedBoundaries) CompactHelpers::ModifyBoundaries(Diagonal, Triangle, csw_t, cF, 4.0 + this->M5 /*this->diag_mass*/);
|
||||
|
||||
// Invert the Clover term
|
||||
// In case of the exponential clover with (anti-)periodic boundary conditions exp(-Clover) saved
|
||||
// in TmpInverse can be used. In all other cases the clover term has to be explictly inverted.
|
||||
// TODO: For now this inversion is explictly done on the CPU
|
||||
double t7 = usecond();
|
||||
CloverHelpers::InvertClover(TmpInverse, Diagonal, Triangle, DiagonalInv, TriangleInv, fixedBoundaries);
|
||||
|
||||
// Fill the remaining clover fields
|
||||
double t8 = usecond();
|
||||
pickCheckerboard(Even, DiagonalEven, Diagonal);
|
||||
pickCheckerboard(Even, TriangleEven, Triangle);
|
||||
pickCheckerboard(Odd, DiagonalOdd, Diagonal);
|
||||
pickCheckerboard(Odd, TriangleOdd, Triangle);
|
||||
pickCheckerboard(Even, DiagonalInvEven, DiagonalInv);
|
||||
pickCheckerboard(Even, TriangleInvEven, TriangleInv);
|
||||
pickCheckerboard(Odd, DiagonalInvOdd, DiagonalInv);
|
||||
pickCheckerboard(Odd, TriangleInvOdd, TriangleInv);
|
||||
|
||||
// Report timings
|
||||
double t9 = usecond();
|
||||
|
||||
std::cout << GridLogDebug << "CompactWilsonCloverFermion5D::ImportGauge timings:" << std::endl;
|
||||
std::cout << GridLogDebug << "WilsonFermion::Importgauge = " << (t1 - t0) / 1e6 << std::endl;
|
||||
std::cout << GridLogDebug << "allocations = " << (t2 - t1) / 1e6 << std::endl;
|
||||
std::cout << GridLogDebug << "field strength = " << (t3 - t2) / 1e6 << std::endl;
|
||||
std::cout << GridLogDebug << "fill clover = " << (t4 - t3) / 1e6 << std::endl;
|
||||
std::cout << GridLogDebug << "instantiate clover = " << (t5 - t4) / 1e6 << std::endl;
|
||||
std::cout << GridLogDebug << "convert layout = " << (t6 - t5) / 1e6 << std::endl;
|
||||
std::cout << GridLogDebug << "modify boundaries = " << (t7 - t6) / 1e6 << std::endl;
|
||||
std::cout << GridLogDebug << "invert clover = " << (t8 - t7) / 1e6 << std::endl;
|
||||
std::cout << GridLogDebug << "pick cbs = " << (t9 - t8) / 1e6 << std::endl;
|
||||
std::cout << GridLogDebug << "total = " << (t9 - t0) / 1e6 << std::endl;
|
||||
}
|
||||
|
||||
NAMESPACE_END(Grid);
|
@ -14,6 +14,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
|
||||
Author: Andrew Lawson <andrew.lawson1991@gmail.com>
|
||||
Author: Vera Guelpers <V.M.Guelpers@soton.ac.uk>
|
||||
Author: Christoph Lehner <christoph@lhnr.de>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
@ -484,6 +485,54 @@ void WilsonFermion5D<Impl>::DW(const FermionField &in, FermionField &out,int dag
|
||||
Dhop(in,out,dag); // -0.5 is included
|
||||
axpy(out,4.0-M5,in,out);
|
||||
}
|
||||
template <class Impl>
|
||||
void WilsonFermion5D<Impl>::Meooe(const FermionField &in, FermionField &out)
|
||||
{
|
||||
if (in.Checkerboard() == Odd) {
|
||||
DhopEO(in, out, DaggerNo);
|
||||
} else {
|
||||
DhopOE(in, out, DaggerNo);
|
||||
}
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void WilsonFermion5D<Impl>::MeooeDag(const FermionField &in, FermionField &out)
|
||||
{
|
||||
if (in.Checkerboard() == Odd) {
|
||||
DhopEO(in, out, DaggerYes);
|
||||
} else {
|
||||
DhopOE(in, out, DaggerYes);
|
||||
}
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void WilsonFermion5D<Impl>::Mooee(const FermionField &in, FermionField &out)
|
||||
{
|
||||
out.Checkerboard() = in.Checkerboard();
|
||||
typename FermionField::scalar_type scal(4.0 + M5);
|
||||
out = scal * in;
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
void WilsonFermion5D<Impl>::MooeeDag(const FermionField &in, FermionField &out)
|
||||
{
|
||||
out.Checkerboard() = in.Checkerboard();
|
||||
Mooee(in, out);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void WilsonFermion5D<Impl>::MooeeInv(const FermionField &in, FermionField &out)
|
||||
{
|
||||
out.Checkerboard() = in.Checkerboard();
|
||||
out = (1.0/(4.0 + M5))*in;
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void WilsonFermion5D<Impl>::MooeeInvDag(const FermionField &in, FermionField &out)
|
||||
{
|
||||
out.Checkerboard() = in.Checkerboard();
|
||||
MooeeInv(in,out);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void WilsonFermion5D<Impl>::MomentumSpacePropagatorHt_5d(FermionField &out,const FermionField &in, RealD mass,std::vector<double> twist)
|
||||
|
@ -63,7 +63,7 @@ accelerator_inline void get_stencil(StencilEntry * mem, StencilEntry &chip)
|
||||
} else { \
|
||||
chi = coalescedRead(buf[SE->_offset],lane); \
|
||||
} \
|
||||
acceleratorSynchronise(); \
|
||||
acceleratorSynchronise(); \
|
||||
Impl::multLink(Uchi, U[sU], chi, Dir, SE, st); \
|
||||
Recon(result, Uchi);
|
||||
|
||||
@ -517,7 +517,7 @@ void WilsonKernels<Impl>::DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField
|
||||
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteInt); return;}
|
||||
#endif
|
||||
} else if( exterior ) {
|
||||
// dependent on result of merge
|
||||
// // dependent on result of merge
|
||||
acceleratorFenceComputeStream();
|
||||
if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL_EXT(GenericDhopSiteExt); return;}
|
||||
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL_EXT(HandDhopSiteExt); return;}
|
||||
|
@ -0,0 +1,45 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./lib/ qcd/action/fermion/instantiation/CompactWilsonCloverFermionInstantiation5D.cc.master
|
||||
|
||||
Copyright (C) 2017 - 2025
|
||||
|
||||
Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
|
||||
Author: Daniel Richtmann <daniel.richtmann@gmail.com>
|
||||
Author: Mattia Bruno <mattia.bruno@cern.ch>
|
||||
Author: Christoph Lehner <christoph@lhnr.de>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#include <Grid/Grid.h>
|
||||
#include <Grid/qcd/spin/Dirac.h>
|
||||
#include <Grid/qcd/action/fermion/CompactWilsonCloverFermion5D.h>
|
||||
#include <Grid/qcd/action/fermion/implementation/CompactWilsonCloverFermion5DImplementation.h>
|
||||
#include <Grid/qcd/action/fermion/CloverHelpers.h>
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
#include "impl.h"
|
||||
template class CompactWilsonCloverFermion5D<IMPLEMENTATION, CompactCloverHelpers<IMPLEMENTATION>>;
|
||||
template class CompactWilsonCloverFermion5D<IMPLEMENTATION, CompactExpCloverHelpers<IMPLEMENTATION>>;
|
||||
|
||||
NAMESPACE_END(Grid);
|
@ -0,0 +1 @@
|
||||
../CompactWilsonCloverFermion5DInstantiation.cc.master
|
@ -0,0 +1 @@
|
||||
../CompactWilsonCloverFermion5DInstantiation.cc.master
|
@ -62,7 +62,7 @@ do
|
||||
done
|
||||
done
|
||||
|
||||
CC_LIST="CompactWilsonCloverFermionInstantiation"
|
||||
CC_LIST="CompactWilsonCloverFermionInstantiation CompactWilsonCloverFermion5DInstantiation"
|
||||
|
||||
for impl in $COMPACT_WILSON_IMPL_LIST
|
||||
do
|
||||
|
@ -76,27 +76,27 @@ public:
|
||||
return action;
|
||||
};
|
||||
|
||||
virtual void deriv(const GaugeField &Umu,GaugeField & dSdU) {
|
||||
virtual void deriv(const GaugeField &U, GaugeField &dSdU) {
|
||||
//extend Ta to include Lorentz indexes
|
||||
RealD factor_p = c_plaq/RealD(Nc)*0.5;
|
||||
RealD factor_r = c_rect/RealD(Nc)*0.5;
|
||||
|
||||
GridBase *grid = Umu.Grid();
|
||||
GridBase *grid = U.Grid();
|
||||
|
||||
std::vector<GaugeLinkField> U (Nd,grid);
|
||||
std::vector<GaugeLinkField> Umu (Nd,grid);
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
U[mu] = PeekIndex<LorentzIndex>(Umu,mu);
|
||||
Umu[mu] = PeekIndex<LorentzIndex>(U,mu);
|
||||
}
|
||||
std::vector<GaugeLinkField> RectStaple(Nd,grid), Staple(Nd,grid);
|
||||
WilsonLoops<Gimpl>::StapleAndRectStapleAll(Staple, RectStaple, U, workspace);
|
||||
WilsonLoops<Gimpl>::StapleAndRectStapleAll(Staple, RectStaple, Umu, workspace);
|
||||
|
||||
GaugeLinkField dSdU_mu(grid);
|
||||
GaugeLinkField staple(grid);
|
||||
|
||||
for (int mu=0; mu < Nd; mu++){
|
||||
dSdU_mu = Ta(U[mu]*Staple[mu])*factor_p;
|
||||
dSdU_mu = dSdU_mu + Ta(U[mu]*RectStaple[mu])*factor_r;
|
||||
|
||||
dSdU_mu = Ta(Umu[mu]*Staple[mu])*factor_p;
|
||||
dSdU_mu = dSdU_mu + Ta(Umu[mu]*RectStaple[mu])*factor_r;
|
||||
|
||||
PokeIndex<LorentzIndex>(dSdU, dSdU_mu, mu);
|
||||
}
|
||||
|
||||
|
@ -73,20 +73,23 @@ public:
|
||||
// extend Ta to include Lorentz indexes
|
||||
|
||||
RealD factor = 0.5 * beta / RealD(Nc);
|
||||
GridBase *grid = U.Grid();
|
||||
|
||||
GaugeLinkField Umu(U.Grid());
|
||||
GaugeLinkField dSdU_mu(U.Grid());
|
||||
GaugeLinkField dSdU_mu(grid);
|
||||
std::vector<GaugeLinkField> Umu(Nd, grid);
|
||||
for (int mu = 0; mu < Nd; mu++) {
|
||||
Umu[mu] = PeekIndex<LorentzIndex>(U, mu);
|
||||
}
|
||||
|
||||
Umu = PeekIndex<LorentzIndex>(U, mu);
|
||||
|
||||
for (int mu = 0; mu < Nd; mu++) {
|
||||
// Staple in direction mu
|
||||
WilsonLoops<Gimpl>::Staple(dSdU_mu, U, mu);
|
||||
dSdU_mu = Ta(Umu * dSdU_mu) * factor;
|
||||
|
||||
WilsonLoops<Gimpl>::Staple(dSdU_mu, Umu, mu);
|
||||
dSdU_mu = Ta(Umu[mu] * dSdU_mu) * factor;
|
||||
|
||||
PokeIndex<LorentzIndex>(dSdU, dSdU_mu, mu);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
RealD beta;
|
||||
};
|
||||
|
@ -207,11 +207,14 @@ std::vector<RealD> WilsonFlowBase<Gimpl>::flowMeasureEnergyDensityCloverleaf(con
|
||||
}
|
||||
|
||||
template <class Gimpl>
|
||||
void WilsonFlowBase<Gimpl>::setDefaultMeasurements(int topq_meas_interval){
|
||||
addMeasurement(1, [](int step, RealD t, const typename Gimpl::GaugeField &U){
|
||||
void WilsonFlowBase<Gimpl>::setDefaultMeasurements(int meas_interval){
|
||||
addMeasurement(meas_interval, [](int step, RealD t, const typename Gimpl::GaugeField &U){
|
||||
std::cout << GridLogMessage << "[WilsonFlow] Energy density (plaq) : " << step << " " << t << " " << energyDensityPlaquette(t,U) << std::endl;
|
||||
});
|
||||
addMeasurement(topq_meas_interval, [](int step, RealD t, const typename Gimpl::GaugeField &U){
|
||||
addMeasurement(meas_interval, [](int step, RealD t, const typename Gimpl::GaugeField &U){
|
||||
std::cout << GridLogMessage << "[WilsonFlow] Energy density (cloverleaf) : " << step << " " << t << " " << energyDensityCloverleaf(t,U) << std::endl;
|
||||
});
|
||||
addMeasurement(meas_interval, [](int step, RealD t, const typename Gimpl::GaugeField &U){
|
||||
std::cout << GridLogMessage << "[WilsonFlow] Top. charge : " << step << " " << WilsonLoops<Gimpl>::TopologicalCharge(U) << std::endl;
|
||||
});
|
||||
}
|
||||
|
@ -292,19 +292,21 @@ public:
|
||||
//////////////////////////////////////////////////
|
||||
// the sum over all nu-oriented staples for nu != mu on each site
|
||||
//////////////////////////////////////////////////
|
||||
static void Staple(GaugeMat &staple, const GaugeLorentz &Umu, int mu) {
|
||||
static void Staple(GaugeMat &staple, const GaugeLorentz &U, int mu) {
|
||||
|
||||
GridBase *grid = Umu.Grid();
|
||||
|
||||
std::vector<GaugeMat> U(Nd, grid);
|
||||
std::vector<GaugeMat> Umu(Nd, U.grid());
|
||||
for (int d = 0; d < Nd; d++) {
|
||||
U[d] = PeekIndex<LorentzIndex>(Umu, d);
|
||||
Umu[d] = PeekIndex<LorentzIndex>(U, d);
|
||||
}
|
||||
Staple(staple, U, mu);
|
||||
Staple(staple, Umu, mu);
|
||||
}
|
||||
|
||||
static void Staple(GaugeMat &staple, const std::vector<GaugeMat> &U, int mu) {
|
||||
staple = Zero();
|
||||
static void Staple(GaugeMat &staple, const std::vector<GaugeMat> &Umu, int mu) {
|
||||
|
||||
autoView(staple_v, staple, AcceleratorWrite);
|
||||
accelerator_for(i, staple.Grid()->oSites(), Simd::Nsimd(), {
|
||||
staple_v[i] = Zero();
|
||||
});
|
||||
|
||||
for (int nu = 0; nu < Nd; nu++) {
|
||||
|
||||
@ -318,12 +320,12 @@ public:
|
||||
// |
|
||||
// __|
|
||||
//
|
||||
|
||||
|
||||
staple += Gimpl::ShiftStaple(
|
||||
Gimpl::CovShiftForward(
|
||||
U[nu], nu,
|
||||
Umu[nu], nu,
|
||||
Gimpl::CovShiftBackward(
|
||||
U[mu], mu, Gimpl::CovShiftIdentityBackward(U[nu], nu))),
|
||||
Umu[mu], mu, Gimpl::CovShiftIdentityBackward(Umu[nu], nu))),
|
||||
mu);
|
||||
|
||||
// __
|
||||
@ -333,8 +335,8 @@ public:
|
||||
//
|
||||
|
||||
staple += Gimpl::ShiftStaple(
|
||||
Gimpl::CovShiftBackward(U[nu], nu,
|
||||
Gimpl::CovShiftBackward(U[mu], mu, U[nu])), mu);
|
||||
Gimpl::CovShiftBackward(Umu[nu], nu,
|
||||
Gimpl::CovShiftBackward(Umu[mu], mu, Umu[nu])), mu);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -363,12 +363,16 @@ public:
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
void CommunicateBegin(std::vector<std::vector<CommsRequest_t> > &reqs)
|
||||
{
|
||||
// std::cout << "Communicate Begin "<<std::endl;
|
||||
// _grid->Barrier();
|
||||
FlightRecorder::StepLog("Communicate begin");
|
||||
// All GPU kernel tasks must complete
|
||||
// accelerator_barrier(); // All kernels should ALREADY be complete
|
||||
// _grid->StencilBarrier(); // Everyone is here, so noone running slow and still using receive buffer
|
||||
// But the HaloGather had a barrier too.
|
||||
for(int i=0;i<Packets.size();i++){
|
||||
// std::cout << "Communicate prepare "<<i<<std::endl;
|
||||
// _grid->Barrier();
|
||||
_grid->StencilSendToRecvFromPrepare(MpiReqs,
|
||||
Packets[i].send_buf,
|
||||
Packets[i].to_rank,Packets[i].do_send,
|
||||
@ -376,8 +380,15 @@ public:
|
||||
Packets[i].from_rank,Packets[i].do_recv,
|
||||
Packets[i].xbytes,Packets[i].rbytes,i);
|
||||
}
|
||||
// std::cout << "Communicate PollDtoH "<<std::endl;
|
||||
// _grid->Barrier();
|
||||
_grid->StencilSendToRecvFromPollDtoH (MpiReqs); /* Starts MPI*/
|
||||
// std::cout << "Communicate CopySynch "<<std::endl;
|
||||
// _grid->Barrier();
|
||||
acceleratorCopySynchronise();
|
||||
// Starts intranode
|
||||
for(int i=0;i<Packets.size();i++){
|
||||
// std::cout << "Communicate Begin "<<i<<std::endl;
|
||||
_grid->StencilSendToRecvFromBegin(MpiReqs,
|
||||
Packets[i].send_buf,
|
||||
Packets[i].to_rank,Packets[i].do_send,
|
||||
@ -395,7 +406,14 @@ public:
|
||||
|
||||
void CommunicateComplete(std::vector<std::vector<CommsRequest_t> > &reqs)
|
||||
{
|
||||
// std::cout << "Communicate Complete "<<std::endl;
|
||||
// _grid->Barrier();
|
||||
FlightRecorder::StepLog("Start communicate complete");
|
||||
// std::cout << "Communicate Complete PollIRecv "<<std::endl;
|
||||
// _grid->Barrier();
|
||||
_grid->StencilSendToRecvFromPollIRecv(MpiReqs);
|
||||
// std::cout << "Communicate Complete Complete "<<std::endl;
|
||||
// _grid->Barrier();
|
||||
_grid->StencilSendToRecvFromComplete(MpiReqs,0); // MPI is done
|
||||
if ( this->partialDirichlet ) DslashLogPartial();
|
||||
else if ( this->fullDirichlet ) DslashLogDirichlet();
|
||||
@ -428,6 +446,7 @@ public:
|
||||
Communicate();
|
||||
CommsMergeSHM(compress);
|
||||
CommsMerge(compress);
|
||||
accelerator_barrier();
|
||||
}
|
||||
|
||||
template<class compressor> int HaloGatherDir(const Lattice<vobj> &source,compressor &compress,int point,int & face_idx)
|
||||
@ -483,6 +502,9 @@ public:
|
||||
void HaloGather(const Lattice<vobj> &source,compressor &compress)
|
||||
{
|
||||
// accelerator_barrier();
|
||||
//////////////////////////////////
|
||||
// I will overwrite my send buffers
|
||||
//////////////////////////////////
|
||||
_grid->StencilBarrier();// Synch shared memory on a single nodes
|
||||
|
||||
assert(source.Grid()==_grid);
|
||||
@ -496,7 +518,11 @@ public:
|
||||
HaloGatherDir(source,compress,point,face_idx);
|
||||
}
|
||||
accelerator_barrier(); // All my local gathers are complete
|
||||
// _grid->StencilBarrier();// Synch shared memory on a single nodes
|
||||
#ifdef NVLINK_GET
|
||||
_grid->StencilBarrier(); // He can now get mu local gather, I can get his
|
||||
// Synch shared memory on a single nodes; could use an asynchronous barrier here and defer check
|
||||
// Or issue barrier AFTER the DMA is running
|
||||
#endif
|
||||
face_table_computed=1;
|
||||
assert(u_comm_offset==_unified_buffer_size);
|
||||
}
|
||||
@ -535,6 +561,7 @@ public:
|
||||
coalescedWrite(to[j] ,coalescedRead(from [j]));
|
||||
});
|
||||
acceleratorFenceComputeStream();
|
||||
// Also fenced in WilsonKernels
|
||||
}
|
||||
}
|
||||
|
||||
@ -663,7 +690,7 @@ public:
|
||||
}
|
||||
}
|
||||
}
|
||||
std::cout << "BuildSurfaceList size is "<<surface_list.size()<<std::endl;
|
||||
// std::cout << "BuildSurfaceList size is "<<surface_list_size<<std::endl;
|
||||
surface_list.resize(surface_list_size);
|
||||
std::vector<int> surface_list_host(surface_list_size);
|
||||
int32_t ss=0;
|
||||
@ -683,6 +710,7 @@ public:
|
||||
}
|
||||
}
|
||||
acceleratorCopyToDevice(&surface_list_host[0],&surface_list[0],surface_list_size*sizeof(int));
|
||||
// std::cout << GridLogMessage<<"BuildSurfaceList size is "<<surface_list_size<<std::endl;
|
||||
}
|
||||
/// Introduce a block structure and switch off comms on boundaries
|
||||
void DirichletBlock(const Coordinate &dirichlet_block)
|
||||
@ -774,8 +802,8 @@ public:
|
||||
this->_entries_host_p = &_entries[0];
|
||||
this->_entries_p = &_entries_device[0];
|
||||
|
||||
std::cout << GridLogMessage << " Stencil object allocated for "<<std::dec<<this->_osites
|
||||
<<" sites table "<<std::hex<<this->_entries_p<< " GridPtr "<<_grid<<std::dec<<std::endl;
|
||||
// std::cout << GridLogMessage << " Stencil object allocated for "<<std::dec<<this->_osites
|
||||
// <<" sites table "<<std::hex<<this->_entries_p<< " GridPtr "<<_grid<<std::dec<<std::endl;
|
||||
|
||||
for(int ii=0;ii<npoints;ii++){
|
||||
|
||||
|
@ -242,19 +242,33 @@ inline void *acceleratorAllocDevice(size_t bytes)
|
||||
return ptr;
|
||||
};
|
||||
|
||||
typedef int acceleratorEvent_t;
|
||||
|
||||
inline void acceleratorFreeShared(void *ptr){ cudaFree(ptr);};
|
||||
inline void acceleratorFreeDevice(void *ptr){ cudaFree(ptr);};
|
||||
inline void acceleratorFreeHost(void *ptr){ cudaFree(ptr);};
|
||||
inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) { cudaMemcpy(to,from,bytes, cudaMemcpyHostToDevice);}
|
||||
inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ cudaMemcpy(to,from,bytes, cudaMemcpyDeviceToHost);}
|
||||
inline void acceleratorCopyToDeviceAsync(void *from, void *to, size_t bytes, cudaStream_t stream = copyStream) { cudaMemcpyAsync(to,from,bytes, cudaMemcpyHostToDevice, stream);}
|
||||
inline void acceleratorCopyFromDeviceAsync(void *from, void *to, size_t bytes, cudaStream_t stream = copyStream) { cudaMemcpyAsync(to,from,bytes, cudaMemcpyDeviceToHost, stream);}
|
||||
inline void acceleratorCopyToDevice(const void *from,void *to,size_t bytes) { cudaMemcpy(to,from,bytes, cudaMemcpyHostToDevice);}
|
||||
inline void acceleratorCopyFromDevice(const void *from,void *to,size_t bytes){ cudaMemcpy(to,from,bytes, cudaMemcpyDeviceToHost);}
|
||||
inline void acceleratorMemSet(void *base,int value,size_t bytes) { cudaMemset(base,value,bytes);}
|
||||
inline void acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes) // Asynch
|
||||
inline acceleratorEvent_t acceleratorCopyToDeviceAsynch(void *from, void *to, size_t bytes, cudaStream_t stream = copyStream) {
|
||||
acceleratorCopyToDevice(to,from,bytes, cudaMemcpyHostToDevice);
|
||||
return 0;
|
||||
}
|
||||
inline acceleratorEvent_t acceleratorCopyFromDeviceAsynch(void *from, void *to, size_t bytes, cudaStream_t stream = copyStream) {
|
||||
acceleratorCopyFromDevice(from,to,bytes);
|
||||
return 0;
|
||||
}
|
||||
inline acceleratorEvent_t acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes) // Asynch
|
||||
{
|
||||
cudaMemcpyAsync(to,from,bytes, cudaMemcpyDeviceToDevice,copyStream);
|
||||
return 0;
|
||||
}
|
||||
inline void acceleratorCopySynchronise(void) { cudaStreamSynchronize(copyStream); };
|
||||
inline void acceleratorEventWait(acceleratorEvent_t ev)
|
||||
{
|
||||
//auto discard=cudaStreamSynchronize(ev);
|
||||
}
|
||||
inline int acceleratorEventIsComplete(acceleratorEvent_t ev){ acceleratorEventWait(ev) ; return 1;}
|
||||
|
||||
|
||||
inline int acceleratorIsCommunicable(void *ptr)
|
||||
@ -343,11 +357,28 @@ inline void acceleratorFreeDevice(void *ptr){free(ptr,*theGridAccelerator);};
|
||||
|
||||
inline void acceleratorCopySynchronise(void) { theCopyAccelerator->wait(); }
|
||||
|
||||
inline void acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes) { theCopyAccelerator->memcpy(to,from,bytes);}
|
||||
inline void acceleratorCopyToDeviceAsynch(void *from,void *to,size_t bytes) { theCopyAccelerator->memcpy(to,from,bytes); }
|
||||
inline void acceleratorCopyFromDeviceAsynch(void *from,void *to,size_t bytes){ theCopyAccelerator->memcpy(to,from,bytes); }
|
||||
inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) { theCopyAccelerator->memcpy(to,from,bytes); theCopyAccelerator->wait();}
|
||||
inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ theCopyAccelerator->memcpy(to,from,bytes); theCopyAccelerator->wait();}
|
||||
|
||||
///////
|
||||
// Asynch event interface
|
||||
///////
|
||||
typedef sycl::event acceleratorEvent_t;
|
||||
|
||||
inline void acceleratorEventWait(acceleratorEvent_t ev)
|
||||
{
|
||||
ev.wait();
|
||||
}
|
||||
|
||||
inline int acceleratorEventIsComplete(acceleratorEvent_t ev)
|
||||
{
|
||||
return (ev.get_info<sycl::info::event::command_execution_status>() == sycl::info::event_command_status::complete);
|
||||
}
|
||||
|
||||
inline acceleratorEvent_t acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes) { return theCopyAccelerator->memcpy(to,from,bytes);}
|
||||
inline acceleratorEvent_t acceleratorCopyToDeviceAsynch(void *from,void *to,size_t bytes) { return theCopyAccelerator->memcpy(to,from,bytes); }
|
||||
inline acceleratorEvent_t acceleratorCopyFromDeviceAsynch(void *from,void *to,size_t bytes) { return theCopyAccelerator->memcpy(to,from,bytes); }
|
||||
|
||||
inline void acceleratorCopyToDevice(const void *from,void *to,size_t bytes) { theCopyAccelerator->memcpy(to,from,bytes); theCopyAccelerator->wait();}
|
||||
inline void acceleratorCopyFromDevice(const void *from,void *to,size_t bytes){ theCopyAccelerator->memcpy(to,from,bytes); theCopyAccelerator->wait();}
|
||||
inline void acceleratorMemSet(void *base,int value,size_t bytes) { theCopyAccelerator->memset(base,value,bytes); theCopyAccelerator->wait();}
|
||||
|
||||
inline int acceleratorIsCommunicable(void *ptr)
|
||||
@ -358,8 +389,10 @@ inline int acceleratorIsCommunicable(void *ptr)
|
||||
else return 0;
|
||||
#endif
|
||||
return 1;
|
||||
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
//////////////////////////////////////////////
|
||||
@ -459,7 +492,7 @@ void LambdaApply(uint64_t numx, uint64_t numy, uint64_t numz, lambda Lambda)
|
||||
inline void *acceleratorAllocHost(size_t bytes)
|
||||
{
|
||||
void *ptr=NULL;
|
||||
auto err = hipMallocHost((void **)&ptr,bytes);
|
||||
auto err = hipHostMalloc((void **)&ptr,bytes);
|
||||
if( err != hipSuccess ) {
|
||||
ptr = (void *) NULL;
|
||||
fprintf(stderr," hipMallocManaged failed for %ld %s \n",bytes,hipGetErrorString(err)); fflush(stderr);
|
||||
@ -492,23 +525,35 @@ inline void *acceleratorAllocDevice(size_t bytes)
|
||||
inline void acceleratorFreeHost(void *ptr){ auto discard=hipFree(ptr);};
|
||||
inline void acceleratorFreeShared(void *ptr){ auto discard=hipFree(ptr);};
|
||||
inline void acceleratorFreeDevice(void *ptr){ auto discard=hipFree(ptr);};
|
||||
inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) { auto discard=hipMemcpy(to,from,bytes, hipMemcpyHostToDevice);}
|
||||
inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ auto discard=hipMemcpy(to,from,bytes, hipMemcpyDeviceToHost);}
|
||||
inline void acceleratorCopyToDevice(const void *from,void *to,size_t bytes) { auto discard=hipMemcpy(to,from,bytes, hipMemcpyHostToDevice);}
|
||||
inline void acceleratorCopyFromDevice(const void *from,void *to,size_t bytes){ auto discard=hipMemcpy(to,from,bytes, hipMemcpyDeviceToHost);}
|
||||
|
||||
inline void acceleratorMemSet(void *base,int value,size_t bytes) { auto discard=hipMemset(base,value,bytes);}
|
||||
|
||||
inline void acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes) // Asynch
|
||||
typedef int acceleratorEvent_t;
|
||||
|
||||
inline acceleratorEvent_t acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes) // Asynch
|
||||
{
|
||||
auto discard=hipMemcpyDtoDAsync(to,from,bytes, copyStream);
|
||||
return 0;
|
||||
}
|
||||
inline void acceleratorCopyToDeviceAsync(void *from, void *to, size_t bytes, hipStream_t stream = copyStream) {
|
||||
auto r = hipMemcpyAsync(to,from,bytes, hipMemcpyHostToDevice, stream);
|
||||
inline acceleratorEvent_t acceleratorCopyToDeviceAsynch(void *from, void *to, size_t bytes, hipStream_t stream = copyStream) {
|
||||
acceleratorCopyToDevice(from,to,bytes);
|
||||
return 0;
|
||||
}
|
||||
inline void acceleratorCopyFromDeviceAsync(void *from, void *to, size_t bytes, hipStream_t stream = copyStream) {
|
||||
auto r = hipMemcpyAsync(to,from,bytes, hipMemcpyDeviceToHost, stream);
|
||||
inline acceleratorEvent_t acceleratorCopyFromDeviceAsynch(void *from, void *to, size_t bytes, hipStream_t stream = copyStream) {
|
||||
acceleratorCopyFromDevice(from,to,bytes);
|
||||
return 0;
|
||||
}
|
||||
inline void acceleratorCopySynchronise(void) { auto discard=hipStreamSynchronize(copyStream); };
|
||||
|
||||
inline void acceleratorEventWait(acceleratorEvent_t ev)
|
||||
{
|
||||
// auto discard=hipStreamSynchronize(ev);
|
||||
}
|
||||
inline int acceleratorEventIsComplete(acceleratorEvent_t ev){ acceleratorEventWait(ev) ; return 1;}
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
inline void acceleratorPin(void *ptr,unsigned long bytes)
|
||||
@ -545,6 +590,8 @@ inline void acceleratorPin(void *ptr,unsigned long bytes)
|
||||
|
||||
#undef GRID_SIMT
|
||||
|
||||
typedef int acceleratorEvent_t;
|
||||
|
||||
inline void acceleratorMem(void)
|
||||
{
|
||||
/*
|
||||
@ -564,9 +611,12 @@ inline void acceleratorMem(void)
|
||||
|
||||
accelerator_inline int acceleratorSIMTlane(int Nsimd) { return 0; } // CUDA specific
|
||||
|
||||
inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) { thread_bcopy(from,to,bytes); }
|
||||
inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ thread_bcopy(from,to,bytes);}
|
||||
inline void acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes) { thread_bcopy(from,to,bytes);}
|
||||
inline acceleratorEvent_t acceleratorCopyToDeviceAsynch(void *from,void *to,size_t bytes) { acceleratorCopyToDevice(from,to,bytes); return 0; }
|
||||
inline acceleratorEvent_t acceleratorCopyFromDeviceAsynch(void *from,void *to,size_t bytes) { acceleratorCopyFromDevice(from,to,bytes); return 0; }
|
||||
inline void acceleratorEventWait(acceleratorEvent_t ev){}
|
||||
inline int acceleratorEventIsComplete(acceleratorEvent_t ev){ acceleratorEventWait(ev); return 1;}
|
||||
inline acceleratorEvent_t acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes) { thread_bcopy(from,to,bytes); return 0;}
|
||||
|
||||
inline void acceleratorCopySynchronise(void) {};
|
||||
|
||||
inline int acceleratorIsCommunicable(void *ptr){ return 1; }
|
||||
@ -655,9 +705,9 @@ inline void acceleratorCopyDeviceToDevice(void *from,void *to,size_t bytes)
|
||||
acceleratorCopySynchronise();
|
||||
}
|
||||
|
||||
template<class T> void acceleratorPut(T& dev,T&host)
|
||||
template<class T> void acceleratorPut(T& dev,const T&host)
|
||||
{
|
||||
acceleratorCopyToDevice(&host,&dev,sizeof(T));
|
||||
acceleratorCopyToDevice((void *)&host,&dev,sizeof(T));
|
||||
}
|
||||
template<class T> T acceleratorGet(T& dev)
|
||||
{
|
||||
|
@ -73,9 +73,9 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
#define thread_critical DO_PRAGMA(omp critical)
|
||||
|
||||
#ifdef GRID_OMP
|
||||
inline void thread_bcopy(void *from, void *to,size_t bytes)
|
||||
inline void thread_bcopy(const void *from, void *to,size_t bytes)
|
||||
{
|
||||
uint64_t *ufrom = (uint64_t *)from;
|
||||
const uint64_t *ufrom = (const uint64_t *)from;
|
||||
uint64_t *uto = (uint64_t *)to;
|
||||
assert(bytes%8==0);
|
||||
uint64_t words=bytes/8;
|
||||
@ -84,7 +84,7 @@ inline void thread_bcopy(void *from, void *to,size_t bytes)
|
||||
});
|
||||
}
|
||||
#else
|
||||
inline void thread_bcopy(void *from, void *to,size_t bytes)
|
||||
inline void thread_bcopy(const void *from, void *to,size_t bytes)
|
||||
{
|
||||
bcopy(from,to,bytes);
|
||||
}
|
||||
|
@ -509,7 +509,14 @@ void Grid_init(int *argc,char ***argv)
|
||||
Grid_default_latt,
|
||||
Grid_default_mpi);
|
||||
|
||||
|
||||
if( GridCmdOptionExists(*argv,*argv+*argc,"--flightrecorder") ){
|
||||
std::cout << GridLogMessage <<" Enabling flight recorder " <<std::endl;
|
||||
FlightRecorder::SetLoggingMode(FlightRecorder::LoggingModeRecord);
|
||||
FlightRecorder::PrintEntireLog = 1;
|
||||
FlightRecorder::ChecksumComms = 1;
|
||||
FlightRecorder::ChecksumCommsSend=1;
|
||||
}
|
||||
|
||||
if( GridCmdOptionExists(*argv,*argv+*argc,"--decomposition") ){
|
||||
std::cout<<GridLogMessage<<"Grid Default Decomposition patterns\n";
|
||||
std::cout<<GridLogMessage<<"\tOpenMP threads : "<<GridThread::GetThreads()<<std::endl;
|
||||
@ -651,3 +658,4 @@ void Grid_debug_handler_init(void)
|
||||
}
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
||||
|
@ -50,7 +50,7 @@ namespace Grid{
|
||||
int64_t index64;
|
||||
IndexFromCoorReversed(coor,index64,dims);
|
||||
if ( index64>=2*1024*1024*1024LL ){
|
||||
std::cout << " IndexFromCoorReversed " << coor<<" index " << index64<< " dims "<<dims<<std::endl;
|
||||
// std::cout << " IndexFromCoorReversed " << coor<<" index " << index64<< " dims "<<dims<<std::endl;
|
||||
}
|
||||
assert(index64<2*1024*1024*1024LL);
|
||||
index = (int) index64;
|
||||
|
@ -25,13 +25,20 @@ directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#include <Grid/Grid.h>
|
||||
|
||||
#if Nc == 3
|
||||
#include <Grid/qcd/smearing/GaugeConfigurationMasked.h>
|
||||
#include <Grid/qcd/smearing/JacobianAction.h>
|
||||
#endif
|
||||
|
||||
using namespace Grid;
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
#if Nc != 3
|
||||
#warning FTHMC2p1f will not work for Nc != 3
|
||||
std::cout << "This program will currently only work for Nc == 3." << std::endl;
|
||||
#else
|
||||
std::cout << std::setprecision(12);
|
||||
|
||||
Grid_init(&argc, &argv);
|
||||
@ -220,7 +227,6 @@ int main(int argc, char **argv)
|
||||
TheHMC.Run(SmearingPolicy); // for smearing
|
||||
|
||||
Grid_finalize();
|
||||
#endif
|
||||
} // main
|
||||
|
||||
|
||||
|
||||
|
@ -24,14 +24,22 @@ See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
#include <Grid/Grid.h>
|
||||
|
||||
#if Nc == 3
|
||||
#include <Grid/qcd/smearing/GaugeConfigurationMasked.h>
|
||||
#include <Grid/qcd/smearing/JacobianAction.h>
|
||||
#endif
|
||||
|
||||
using namespace Grid;
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
#if Nc != 3
|
||||
#warning FTHMC2p1f_3GeV will not work for Nc != 3
|
||||
std::cout << "This program will currently only work for Nc == 3." << std::endl;
|
||||
#else
|
||||
std::cout << std::setprecision(12);
|
||||
|
||||
Grid_init(&argc, &argv);
|
||||
@ -220,6 +228,7 @@ int main(int argc, char **argv)
|
||||
TheHMC.Run(SmearingPolicy); // for smearing
|
||||
|
||||
Grid_finalize();
|
||||
#endif
|
||||
} // main
|
||||
|
||||
|
||||
|
@ -25,13 +25,20 @@ directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#include <Grid/Grid.h>
|
||||
|
||||
#if Nc == 3
|
||||
#include <Grid/qcd/smearing/GaugeConfigurationMasked.h>
|
||||
#include <Grid/qcd/smearing/JacobianAction.h>
|
||||
#endif
|
||||
|
||||
using namespace Grid;
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
#if Nc != 3
|
||||
#warning HMC2p1f_3GeV will not work for Nc != 3
|
||||
std::cout << "This program will currently only work for Nc == 3." << std::endl;
|
||||
#else
|
||||
std::cout << std::setprecision(12);
|
||||
|
||||
Grid_init(&argc, &argv);
|
||||
@ -220,6 +227,7 @@ int main(int argc, char **argv)
|
||||
TheHMC.Run(SmearingPolicy); // for smearing
|
||||
|
||||
Grid_finalize();
|
||||
#endif
|
||||
} // main
|
||||
|
||||
|
||||
|
@ -52,7 +52,7 @@ int main (int argc, char ** argv)
|
||||
|
||||
int threads = GridThread::GetThreads();
|
||||
|
||||
int Ls=8;
|
||||
int Ls=16;
|
||||
for(int i=0;i<argc;i++) {
|
||||
if(std::string(argv[i]) == "-Ls"){
|
||||
std::stringstream ss(argv[i+1]); ss >> Ls;
|
||||
|
@ -175,8 +175,8 @@ public:
|
||||
timestat.statistics(t_time);
|
||||
|
||||
dbytes=dbytes*ppn;
|
||||
double xbytes = dbytes*0.5;
|
||||
double bidibytes = dbytes;
|
||||
double xbytes = dbytes;
|
||||
double bidibytes = dbytes*2.0;
|
||||
|
||||
std::cout<<GridLogMessage << lat<<"\t"<<Ls<<"\t "
|
||||
<< bytes << " \t "
|
||||
@ -492,17 +492,18 @@ public:
|
||||
}
|
||||
FGrid->Barrier();
|
||||
double t1=usecond();
|
||||
uint64_t ncall = 500;
|
||||
|
||||
FGrid->Broadcast(0,&ncall,sizeof(ncall));
|
||||
uint64_t no = 50;
|
||||
uint64_t ni = 100;
|
||||
|
||||
// std::cout << GridLogMessage << " Estimate " << ncall << " calls per second"<<std::endl;
|
||||
|
||||
time_statistics timestat;
|
||||
std::vector<double> t_time(ncall);
|
||||
for(uint64_t i=0;i<ncall;i++){
|
||||
std::vector<double> t_time(no);
|
||||
for(uint64_t i=0;i<no;i++){
|
||||
t0=usecond();
|
||||
Dw.DhopEO(src_o,r_e,DaggerNo);
|
||||
for(uint64_t j=0;j<ni;j++){
|
||||
Dw.DhopEO(src_o,r_e,DaggerNo);
|
||||
}
|
||||
t1=usecond();
|
||||
t_time[i] = t1-t0;
|
||||
}
|
||||
@ -520,11 +521,11 @@ public:
|
||||
double mf_hi, mf_lo, mf_err;
|
||||
|
||||
timestat.statistics(t_time);
|
||||
mf_hi = flops/timestat.min;
|
||||
mf_lo = flops/timestat.max;
|
||||
mf_hi = flops/timestat.min*ni;
|
||||
mf_lo = flops/timestat.max*ni;
|
||||
mf_err= flops/timestat.min * timestat.err/timestat.mean;
|
||||
|
||||
mflops = flops/timestat.mean;
|
||||
mflops = flops/timestat.mean*ni;
|
||||
mflops_all.push_back(mflops);
|
||||
if ( mflops_best == 0 ) mflops_best = mflops;
|
||||
if ( mflops_worst== 0 ) mflops_worst= mflops;
|
||||
@ -535,6 +536,7 @@ public:
|
||||
std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Deo mflop/s = "<< mflops << " ("<<mf_err<<") " << mf_lo<<"-"<<mf_hi <<std::endl;
|
||||
std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Deo mflop/s per rank "<< mflops/NP<<std::endl;
|
||||
std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Deo mflop/s per node "<< mflops/NN<<std::endl;
|
||||
std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Deo us per call "<< timestat.mean/ni<<std::endl;
|
||||
|
||||
}
|
||||
|
||||
@ -654,17 +656,19 @@ public:
|
||||
}
|
||||
FGrid->Barrier();
|
||||
double t1=usecond();
|
||||
uint64_t ncall = 500;
|
||||
|
||||
FGrid->Broadcast(0,&ncall,sizeof(ncall));
|
||||
uint64_t no = 50;
|
||||
uint64_t ni = 100;
|
||||
|
||||
// std::cout << GridLogMessage << " Estimate " << ncall << " calls per second"<<std::endl;
|
||||
|
||||
time_statistics timestat;
|
||||
std::vector<double> t_time(ncall);
|
||||
for(uint64_t i=0;i<ncall;i++){
|
||||
std::vector<double> t_time(no);
|
||||
for(uint64_t i=0;i<no;i++){
|
||||
t0=usecond();
|
||||
Ds.DhopEO(src_o,r_e,DaggerNo);
|
||||
for(uint64_t j=0;j<ni;j++){
|
||||
Ds.DhopEO(src_o,r_e,DaggerNo);
|
||||
}
|
||||
t1=usecond();
|
||||
t_time[i] = t1-t0;
|
||||
}
|
||||
@ -675,11 +679,11 @@ public:
|
||||
double mf_hi, mf_lo, mf_err;
|
||||
|
||||
timestat.statistics(t_time);
|
||||
mf_hi = flops/timestat.min;
|
||||
mf_lo = flops/timestat.max;
|
||||
mf_hi = flops/timestat.min*ni;
|
||||
mf_lo = flops/timestat.max*ni;
|
||||
mf_err= flops/timestat.min * timestat.err/timestat.mean;
|
||||
|
||||
mflops = flops/timestat.mean;
|
||||
mflops = flops/timestat.mean*ni;
|
||||
mflops_all.push_back(mflops);
|
||||
if ( mflops_best == 0 ) mflops_best = mflops;
|
||||
if ( mflops_worst== 0 ) mflops_worst= mflops;
|
||||
@ -689,6 +693,7 @@ public:
|
||||
std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Deo mflop/s = "<< mflops << " ("<<mf_err<<") " << mf_lo<<"-"<<mf_hi <<std::endl;
|
||||
std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Deo mflop/s per rank "<< mflops/NP<<std::endl;
|
||||
std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Deo mflop/s per node "<< mflops/NN<<std::endl;
|
||||
std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Deo us per call "<< timestat.mean/ni<<std::endl;
|
||||
|
||||
}
|
||||
|
||||
@ -792,19 +797,18 @@ public:
|
||||
Dc.M(src,r);
|
||||
}
|
||||
FGrid->Barrier();
|
||||
double t1=usecond();
|
||||
uint64_t ncall = 500;
|
||||
|
||||
FGrid->Broadcast(0,&ncall,sizeof(ncall));
|
||||
uint64_t ni = 100;
|
||||
uint64_t no = 50;
|
||||
|
||||
// std::cout << GridLogMessage << " Estimate " << ncall << " calls per second"<<std::endl;
|
||||
|
||||
time_statistics timestat;
|
||||
std::vector<double> t_time(ncall);
|
||||
for(uint64_t i=0;i<ncall;i++){
|
||||
t0=usecond();
|
||||
Dc.M(src,r);
|
||||
t1=usecond();
|
||||
std::vector<double> t_time(no);
|
||||
for(uint64_t i=0;i<no;i++){
|
||||
double t0=usecond();
|
||||
for(uint64_t j=0;j<ni;j++){
|
||||
Dc.M(src,r);
|
||||
}
|
||||
double t1=usecond();
|
||||
t_time[i] = t1-t0;
|
||||
}
|
||||
FGrid->Barrier();
|
||||
@ -814,20 +818,21 @@ public:
|
||||
double mf_hi, mf_lo, mf_err;
|
||||
|
||||
timestat.statistics(t_time);
|
||||
mf_hi = flops/timestat.min;
|
||||
mf_lo = flops/timestat.max;
|
||||
mf_hi = flops/timestat.min*ni;
|
||||
mf_lo = flops/timestat.max*ni;
|
||||
mf_err= flops/timestat.min * timestat.err/timestat.mean;
|
||||
|
||||
mflops = flops/timestat.mean;
|
||||
mflops = flops/timestat.mean*ni;
|
||||
mflops_all.push_back(mflops);
|
||||
if ( mflops_best == 0 ) mflops_best = mflops;
|
||||
if ( mflops_worst== 0 ) mflops_worst= mflops;
|
||||
if ( mflops>mflops_best ) mflops_best = mflops;
|
||||
if ( mflops<mflops_worst) mflops_worst= mflops;
|
||||
|
||||
std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Dclov mflop/s = "<< mflops << " ("<<mf_err<<") " << mf_lo<<"-"<<mf_hi <<std::endl;
|
||||
std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Dclov mflop/s = "<< mflops << " ("<<mf_err<<") " << mf_lo<<"-"<<mf_hi <<" "<<timestat.mean<<" us"<<std::endl;
|
||||
std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Dclov mflop/s per rank "<< mflops/NP<<std::endl;
|
||||
std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Dclov mflop/s per node "<< mflops/NN<<std::endl;
|
||||
std::cout<<GridLogMessage << std::fixed << std::setprecision(1)<<"Dclov us per call "<< timestat.mean/ni<<std::endl;
|
||||
|
||||
}
|
||||
|
||||
@ -872,7 +877,7 @@ int main (int argc, char ** argv)
|
||||
int do_dslash=1;
|
||||
|
||||
int sel=4;
|
||||
std::vector<int> L_list({8,12,16,24});
|
||||
std::vector<int> L_list({8,12,16,24,32});
|
||||
int selm1=sel-1;
|
||||
|
||||
std::vector<double> clover;
|
||||
|
@ -151,7 +151,7 @@ AC_ARG_ENABLE([tracing],
|
||||
case ${ac_TRACING} in
|
||||
nvtx)
|
||||
AC_DEFINE([GRID_TRACING_NVTX],[1],[use NVTX])
|
||||
LIBS="${LIBS} -lnvToolsExt64_1"
|
||||
LIBS="${LIBS} -lnvToolsExt"
|
||||
;;
|
||||
roctx)
|
||||
AC_DEFINE([GRID_TRACING_ROCTX],[1],[use ROCTX])
|
||||
|
@ -93,10 +93,13 @@ int main(int argc, char ** argv)
|
||||
Real coeff = (width*width) / Real(4*Iterations);
|
||||
|
||||
chi=kronecker;
|
||||
|
||||
// chi = (1-p^2/2N)^N kronecker
|
||||
for(int n = 0; n < Iterations; ++n) {
|
||||
Laplacian.M(chi,psi);
|
||||
chi = chi - coeff*psi;
|
||||
RealD n2 = norm2(chi);
|
||||
chi = chi * (1.0/std::sqrt(n2));
|
||||
}
|
||||
|
||||
std::cout << " Wuppertal smeared operator is chi = \n" << chi <<std::endl;
|
||||
|
74
systems/Aurora/benchmarks/bench16.pbs
Normal file
74
systems/Aurora/benchmarks/bench16.pbs
Normal file
@ -0,0 +1,74 @@
|
||||
#!/bin/bash
|
||||
|
||||
##PBS -q LatticeQCD_aesp_CNDA
|
||||
#PBS -q debug-scaling
|
||||
##PBS -q prod
|
||||
#PBS -l select=16
|
||||
#PBS -l walltime=00:20:00
|
||||
#PBS -A LatticeQCD_aesp_CNDA
|
||||
|
||||
cd $PBS_O_WORKDIR
|
||||
|
||||
source ../sourceme.sh
|
||||
|
||||
cp $PBS_NODEFILE nodefile
|
||||
|
||||
export OMP_NUM_THREADS=4
|
||||
export MPICH_OFI_NIC_POLICY=GPU
|
||||
|
||||
#export MPIR_CVAR_CH4_OFI_ENABLE_GPU_PIPELINE=1
|
||||
#unset MPIR_CVAR_CH4_OFI_GPU_PIPELINE_D2H_ENGINE_TYPE
|
||||
#unset MPIR_CVAR_CH4_OFI_GPU_PIPELINE_H2D_ENGINE_TYPE
|
||||
#unset MPIR_CVAR_GPU_USE_IMMEDIATE_COMMAND_LIST
|
||||
#export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_D2H_ENGINE_TYPE=0
|
||||
#export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_H2D_ENGINE_TYPE=0
|
||||
#export MPIR_CVAR_GPU_USE_IMMEDIATE_COMMAND_LIST=1
|
||||
#export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_BUFFER_SZ=1048576
|
||||
#export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_THRESHOLD=131072
|
||||
#export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_NUM_BUFFERS_PER_CHUNK=16
|
||||
#export MPIR_CVAR_CH4_OFI_GPU_PIPELINE_MAX_NUM_BUFFERS=16
|
||||
|
||||
#
|
||||
# Local vol 16.16.16.32
|
||||
#
|
||||
|
||||
LX=16
|
||||
LY=16
|
||||
LZ=16
|
||||
LT=32
|
||||
|
||||
NX=2
|
||||
NY=2
|
||||
NZ=4
|
||||
NT=1
|
||||
|
||||
GX=2
|
||||
GY=2
|
||||
GZ=1
|
||||
GT=3
|
||||
|
||||
PX=$((NX * GX ))
|
||||
PY=$((NY * GY ))
|
||||
PZ=$((NZ * GZ ))
|
||||
PT=$((NT * GT ))
|
||||
|
||||
VX=$((PX * LX ))
|
||||
VY=$((PY * LY ))
|
||||
VZ=$((PZ * LZ ))
|
||||
VT=$((PT * LT ))
|
||||
|
||||
NP=$((PX*PY*PZ*PT))
|
||||
VOL=${VX}.${VY}.${VZ}.${VT}
|
||||
AT=8
|
||||
MPI=${PX}.${PY}.${PZ}.${PT}
|
||||
|
||||
CMD="mpiexec -np $NP -ppn 12 -envall \
|
||||
./gpu_tile.sh ./Benchmark_dwf_fp32 --mpi $MPI --grid $VOL \
|
||||
--shm-mpi 0 --shm 2048 --device-mem 32000 --accelerator-threads $AT --comms-overlap "
|
||||
|
||||
echo VOL $VOL
|
||||
echo MPI $MPI
|
||||
echo NPROC $NP
|
||||
echo $CMD
|
||||
$CMD
|
||||
|
@ -19,7 +19,7 @@ export ONEAPI_DEVICE_FILTER=gpu,level_zero
|
||||
|
||||
export SYCL_PI_LEVEL_ZERO_DEVICE_SCOPE_EVENTS=0
|
||||
export SYCL_PI_LEVEL_ZERO_USE_IMMEDIATE_COMMANDLISTS=1
|
||||
export SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE=0:3
|
||||
export SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE=0:4
|
||||
export SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE_FOR_D2D_COPY=1
|
||||
#export SYCL_PI_LEVEL_ZERO_USE_COPY_ENGINE=0:2
|
||||
#export SYCL_PI_LEVEL_ZERO_USM_RESIDENT=1
|
||||
@ -30,8 +30,8 @@ echo "rank $PALS_RANKID ; local rank $PALS_LOCAL_RANKID ; ZE_AFFINITY_MASK=$ZE_A
|
||||
|
||||
if [ $PALS_RANKID = "0" ]
|
||||
then
|
||||
numactl -p $NUMAP -N $NUMAP unitrace --chrome-kernel-logging --chrome-mpi-logging --chrome-sycl-logging --demangle "$@"
|
||||
# numactl -p $NUMAP -N $NUMAP "$@"
|
||||
# numactl -p $NUMAP -N $NUMAP unitrace --chrome-kernel-logging --chrome-mpi-logging --chrome-sycl-logging --demangle "$@"
|
||||
numactl -p $NUMAP -N $NUMAP "$@"
|
||||
else
|
||||
numactl -p $NUMAP -N $NUMAP "$@"
|
||||
fi
|
||||
|
@ -1,18 +1,19 @@
|
||||
#Ahead of time compile for PVC
|
||||
|
||||
export LDFLAGS="-fiopenmp -fsycl -fsycl-device-code-split=per_kernel -fsycl-targets=spir64_gen -Xs -device -Xs pvc -fsycl-device-lib=all -lze_loader -L${MKLROOT}/lib -qmkl=parallel -fsycl -lsycl -lnuma -L/opt/aurora/24.180.3/spack/unified/0.8.0/install/linux-sles15-x86_64/oneapi-2024.07.30.002/numactl-2.0.14-7v6edad/lib"
|
||||
export CXXFLAGS="-O3 -fiopenmp -fsycl-unnamed-lambda -fsycl -Wno-tautological-compare -qmkl=parallel -fsycl -fno-exceptions -I/opt/aurora/24.180.3/spack/unified/0.8.0/install/linux-sles15-x86_64/oneapi-2024.07.30.002/numactl-2.0.14-7v6edad/include/"
|
||||
export LDFLAGS="-fiopenmp -fsycl -fsycl-device-code-split=per_kernel -fsycl-targets=spir64_gen -Xs -device -Xs pvc -fsycl-device-lib=all -lze_loader -L${MKLROOT}/lib -qmkl=parallel -fsycl -lsycl -lnuma -L/opt/aurora/24.180.3/spack/unified/0.8.0/install/linux-sles15-x86_64/oneapi-2024.07.30.002/numactl-2.0.14-7v6edad/lib -fPIC -fsycl-max-parallel-link-jobs=16 -fno-sycl-rdc"
|
||||
export CXXFLAGS="-O3 -fiopenmp -fsycl-unnamed-lambda -fsycl -Wno-tautological-compare -qmkl=parallel -fsycl -fno-exceptions -I/opt/aurora/24.180.3/spack/unified/0.8.0/install/linux-sles15-x86_64/oneapi-2024.07.30.002/numactl-2.0.14-7v6edad/include/ -fPIC"
|
||||
|
||||
#JIT compile
|
||||
#export LDFLAGS="-fiopenmp -fsycl -fsycl-device-code-split=per_kernel -fsycl-device-lib=all -lze_loader -L${MKLROOT}/lib -qmkl=parallel -fsycl -lsycl "
|
||||
#export CXXFLAGS="-O3 -fiopenmp -fsycl-unnamed-lambda -fsycl -Wno-tautological-compare -qmkl=parallel -fsycl -fno-exceptions "
|
||||
|
||||
../../configure \
|
||||
../configure \
|
||||
--enable-simd=GPU \
|
||||
--enable-reduction=grid \
|
||||
--enable-gen-simd-width=64 \
|
||||
--enable-comms=mpi-auto \
|
||||
--enable-debug \
|
||||
--prefix $HOME/gpt-install \
|
||||
--disable-gparity \
|
||||
--disable-fermion-reps \
|
||||
--with-lime=$CLIME \
|
||||
|
22
systems/Frontier-rocm631/config-command
Normal file
22
systems/Frontier-rocm631/config-command
Normal file
@ -0,0 +1,22 @@
|
||||
CLIME=`spack find --paths c-lime@2-3-9 | grep c-lime| cut -c 15-`
|
||||
../../configure --enable-comms=mpi-auto \
|
||||
--with-lime=$CLIME \
|
||||
--enable-unified=no \
|
||||
--enable-shm=nvlink \
|
||||
--enable-tracing=none \
|
||||
--enable-accelerator=hip \
|
||||
--enable-gen-simd-width=64 \
|
||||
--disable-gparity \
|
||||
--disable-fermion-reps \
|
||||
--enable-simd=GPU \
|
||||
--with-gmp=$OLCF_GMP_ROOT \
|
||||
--with-fftw=$FFTW_DIR/.. \
|
||||
--with-mpfr=/opt/cray/pe/gcc/mpfr/3.1.4/ \
|
||||
--disable-fermion-reps \
|
||||
CXX=hipcc MPICXX=mpicxx \
|
||||
CXXFLAGS="-fPIC -I${ROCM_PATH}/include/ -I${MPICH_DIR}/include -L/lib64 " \
|
||||
LDFLAGS="-L/lib64 -L${ROCM_PATH}/lib -L${MPICH_DIR}/lib -lmpi -L${CRAY_MPICH_ROOTDIR}/gtl/lib -lmpi_gtl_hsa -lhipblas -lrocblas"
|
||||
|
||||
|
||||
|
||||
|
16
systems/Frontier-rocm631/sourceme631.sh
Normal file
16
systems/Frontier-rocm631/sourceme631.sh
Normal file
@ -0,0 +1,16 @@
|
||||
|
||||
echo spack
|
||||
. /autofs/nccs-svm1_home1/paboyle/Crusher/Grid/spack/share/spack/setup-env.sh
|
||||
|
||||
#module load cce/15.0.1
|
||||
|
||||
module load rocm/6.3.1
|
||||
module load cray-fftw
|
||||
module load craype-accel-amd-gfx90a
|
||||
export LD_LIBRARY_PATH=/opt/gcc/mpfr/3.1.4/lib:$LD_LIBRARY_PATH
|
||||
|
||||
#Ugly hacks to get down level software working on current system
|
||||
#export LD_LIBRARY_PATH=/opt/cray/libfabric/1.20.1/lib64/:$LD_LIBRARY_PATH
|
||||
#export LD_LIBRARY_PATH=`pwd`/:$LD_LIBRARY_PATH
|
||||
#ln -s /opt/rocm-6.0.0/lib/libamdhip64.so.6 .
|
||||
|
@ -30,14 +30,10 @@ source ${root}/sourceme.sh
|
||||
|
||||
export OMP_NUM_THREADS=7
|
||||
export MPICH_GPU_SUPPORT_ENABLED=1
|
||||
export MPICH_SMP_SINGLE_COPY_MODE=XPMEM
|
||||
|
||||
for vol in 32.32.32.64
|
||||
#export MPICH_SMP_SINGLE_COPY_MODE=XPMEM
|
||||
#64.64.32.96
|
||||
for vol in 64.64.32.64
|
||||
do
|
||||
srun ./select_gpu ./Benchmark_dwf_fp32 --mpi 2.2.2.2 --accelerator-threads 8 --comms-overlap --shm 2048 --shm-mpi 0 --grid $vol > log.shm0.ov.$vol
|
||||
srun ./select_gpu ./Benchmark_dwf_fp32 --mpi 2.2.2.2 --accelerator-threads 8 --comms-overlap --shm 2048 --shm-mpi 1 --grid $vol > log.shm1.ov.$vol
|
||||
|
||||
srun ./select_gpu ./Benchmark_dwf_fp32 --mpi 2.2.2.2 --accelerator-threads 8 --comms-sequential --shm 2048 --shm-mpi 0 --grid $vol > log.shm0.seq.$vol
|
||||
srun ./select_gpu ./Benchmark_dwf_fp32 --mpi 2.2.2.2 --accelerator-threads 8 --comms-sequential --shm 2048 --shm-mpi 1 --grid $vol > log.shm1.seq.$vol
|
||||
srun ./select_gpu ./Benchmark_dwf_fp32 --mpi 2.2.2.2 --accelerator-threads 8 --comms-overlap --shm 2048 --shm-mpi 0 --grid $vol -Ls 16
|
||||
done
|
||||
|
||||
|
@ -3,20 +3,19 @@ CLIME=`spack find --paths c-lime@2-3-9 | grep c-lime| cut -c 15-`
|
||||
--with-lime=$CLIME \
|
||||
--enable-unified=no \
|
||||
--enable-shm=nvlink \
|
||||
--enable-tracing=timer \
|
||||
--enable-tracing=none \
|
||||
--enable-accelerator=hip \
|
||||
--enable-gen-simd-width=64 \
|
||||
--disable-gparity \
|
||||
--disable-fermion-reps \
|
||||
--enable-simd=GPU \
|
||||
--enable-accelerator-cshift \
|
||||
--with-gmp=$OLCF_GMP_ROOT \
|
||||
--with-fftw=$FFTW_DIR/.. \
|
||||
--with-mpfr=/opt/cray/pe/gcc/mpfr/3.1.4/ \
|
||||
--disable-fermion-reps \
|
||||
CXX=hipcc MPICXX=mpicxx \
|
||||
CXXFLAGS="-fPIC -I{$ROCM_PATH}/include/ -I${MPICH_DIR}/include -L/lib64 " \
|
||||
LDFLAGS="-L/lib64 -L${MPICH_DIR}/lib -lmpi -L${CRAY_MPICH_ROOTDIR}/gtl/lib -lmpi_gtl_hsa -lamdhip64 -lhipblas -lrocblas"
|
||||
CXXFLAGS="-fPIC -I${ROCM_PATH}/include/ -I${MPICH_DIR}/include -L/lib64 " \
|
||||
LDFLAGS="-L/lib64 -L${ROCM_PATH}/lib -L${MPICH_DIR}/lib -lmpi -L${CRAY_MPICH_ROOTDIR}/gtl/lib -lmpi_gtl_hsa -lhipblas -lrocblas"
|
||||
|
||||
|
||||
|
||||
|
@ -1,12 +1,25 @@
|
||||
|
||||
echo spack
|
||||
. /autofs/nccs-svm1_home1/paboyle/Crusher/Grid/spack/share/spack/setup-env.sh
|
||||
spack load c-lime
|
||||
module load emacs
|
||||
module load PrgEnv-gnu
|
||||
module load rocm
|
||||
module load cray-mpich
|
||||
module load gmp
|
||||
|
||||
module load cce/15.0.1
|
||||
module load rocm/5.3.0
|
||||
module load cray-fftw
|
||||
module load craype-accel-amd-gfx90a
|
||||
|
||||
#Ugly hacks to get down level software working on current system
|
||||
export LD_LIBRARY_PATH=/opt/cray/libfabric/1.20.1/lib64/:$LD_LIBRARY_PATH
|
||||
export LD_LIBRARY_PATH=/opt/gcc/mpfr/3.1.4/lib:$LD_LIBRARY_PATH
|
||||
export LD_LIBRARY_PATH=`pwd`/:$LD_LIBRARY_PATH
|
||||
ln -s /opt/rocm-6.0.0/lib/libamdhip64.so.6 .
|
||||
|
||||
#echo spack load c-lime
|
||||
#spack load c-lime
|
||||
#module load emacs
|
||||
##module load PrgEnv-gnu
|
||||
##module load cray-mpich
|
||||
##module load cray-fftw
|
||||
##module load craype-accel-amd-gfx90a
|
||||
##export LD_LIBRARY_PATH=/opt/gcc/mpfr/3.1.4/lib:$LD_LIBRARY_PATH
|
||||
#Hack for lib
|
||||
#export LD_LIBRARY_PATH=`pwd`:$LD_LIBRARY_PATH
|
||||
##export LD_LIBRARY_PATH=`pwd`/:$LD_LIBRARY_PATH
|
||||
|
206
systems/WorkArounds.txt
Normal file
206
systems/WorkArounds.txt
Normal file
@ -0,0 +1,206 @@
|
||||
The purpose of this file is to collate all non-obvious known magic shell variables
|
||||
and compiler flags required for either correctness or performance on various systems.
|
||||
|
||||
A repository of work-arounds.
|
||||
|
||||
Contents:
|
||||
1. Interconnect + MPI
|
||||
2. Compilation
|
||||
3. Profiling
|
||||
|
||||
************************
|
||||
* 1. INTERCONNECT + MPI
|
||||
************************
|
||||
|
||||
--------------------------------------------------------------------
|
||||
MPI2-IO correctness: force OpenMPI to use the MPICH romio implementation for parallel I/O
|
||||
--------------------------------------------------------------------
|
||||
export OMPI_MCA_io=romio321
|
||||
|
||||
--------------------------------------
|
||||
ROMIO fail with > 2GB per node read (32 bit issue)
|
||||
--------------------------------------
|
||||
|
||||
Use later MPICH
|
||||
|
||||
https://github.com/paboyle/Grid/issues/381
|
||||
|
||||
https://github.com/pmodels/mpich/commit/3a479ab0
|
||||
|
||||
--------------------------------------------------------------------
|
||||
Slingshot: Frontier and Perlmutter libfabric slow down
|
||||
and physical memory fragmentation
|
||||
--------------------------------------------------------------------
|
||||
export FI_MR_CACHE_MONITOR=disabled
|
||||
or
|
||||
export FI_MR_CACHE_MONITOR=kdreg2
|
||||
|
||||
--------------------------------------------------------------------
|
||||
Perlmutter
|
||||
--------------------------------------------------------------------
|
||||
|
||||
export MPICH_RDMA_ENABLED_CUDA=1
|
||||
export MPICH_GPU_IPC_ENABLED=1
|
||||
export MPICH_GPU_EAGER_REGISTER_HOST_MEM=0
|
||||
export MPICH_GPU_NO_ASYNC_MEMCPY=0
|
||||
|
||||
--------------------------------------------------------------------
|
||||
Frontier/LumiG
|
||||
--------------------------------------------------------------------
|
||||
|
||||
Hiding ROCR_VISIBLE_DEVICES triggers SDMA engines to be used for GPU-GPU
|
||||
|
||||
cat << EOF > select_gpu
|
||||
#!/bin/bash
|
||||
export MPICH_GPU_SUPPORT_ENABLED=1
|
||||
export MPICH_SMP_SINGLE_COPY_MODE=XPMEM
|
||||
export GPU_MAP=(0 1 2 3 7 6 5 4)
|
||||
export NUMA_MAP=(3 3 1 1 2 2 0 0)
|
||||
export GPU=\${GPU_MAP[\$SLURM_LOCALID]}
|
||||
export NUMA=\${NUMA_MAP[\$SLURM_LOCALID]}
|
||||
export HIP_VISIBLE_DEVICES=\$GPU
|
||||
unset ROCR_VISIBLE_DEVICES
|
||||
echo RANK \$SLURM_LOCALID using GPU \$GPU
|
||||
exec numactl -m \$NUMA -N \$NUMA \$*
|
||||
EOF
|
||||
chmod +x ./select_gpu
|
||||
|
||||
srun ./select_gpu BINARY
|
||||
|
||||
|
||||
--------------------------------------------------------------------
|
||||
Mellanox performance with A100 GPU (Tursa, Booster, Leonardo)
|
||||
--------------------------------------------------------------------
|
||||
export OMPI_MCA_btl=^uct,openib
|
||||
export UCX_TLS=gdr_copy,rc,rc_x,sm,cuda_copy,cuda_ipc
|
||||
export UCX_RNDV_SCHEME=put_zcopy
|
||||
export UCX_RNDV_THRESH=16384
|
||||
export UCX_IB_GPU_DIRECT_RDMA=yes
|
||||
|
||||
--------------------------------------------------------------------
|
||||
Mellanox + A100 correctness (Tursa, Booster, Leonardo)
|
||||
--------------------------------------------------------------------
|
||||
export UCX_MEMTYPE_CACHE=n
|
||||
|
||||
--------------------------------------------------------------------
|
||||
MPICH/Aurora/PVC correctness and performance
|
||||
--------------------------------------------------------------------
|
||||
|
||||
https://github.com/pmodels/mpich/issues/7302
|
||||
|
||||
--enable-cuda-aware-mpi=no
|
||||
--enable-unified=no
|
||||
|
||||
Grid's internal D-H-H-D pipeline mode, avoid device memory in MPI
|
||||
Do not use SVM
|
||||
|
||||
Ideally use MPICH with fix to issue 7302:
|
||||
|
||||
https://github.com/pmodels/mpich/pull/7312
|
||||
|
||||
Ideally:
|
||||
MPIR_CVAR_CH4_IPC_GPU_HANDLE_CACHE=generic
|
||||
|
||||
Alternatives:
|
||||
export MPIR_CVAR_NOLOCAL=1
|
||||
export MPIR_CVAR_CH4_IPC_GPU_P2P_THRESHOLD=1000000000
|
||||
|
||||
--------------------------------------------------------------------
|
||||
MPICH/Aurora/PVC correctness and performance
|
||||
--------------------------------------------------------------------
|
||||
|
||||
Broken:
|
||||
export MPIR_CVAR_CH4_OFI_ENABLE_GPU_PIPELINE=1
|
||||
|
||||
This gives good peformance without requiring
|
||||
--enable-cuda-aware-mpi=no
|
||||
|
||||
But is an open issue reported by James Osborn
|
||||
https://github.com/pmodels/mpich/issues/7139
|
||||
|
||||
Possibly resolved but unclear if in the installed software yet.
|
||||
|
||||
************************
|
||||
* 2. COMPILATION
|
||||
************************
|
||||
|
||||
--------------------------------------------------------------------
|
||||
G++ compiler breakage / graveyard
|
||||
--------------------------------------------------------------------
|
||||
|
||||
9.3.0, 10.3.1,
|
||||
https://github.com/paboyle/Grid/issues/290
|
||||
https://github.com/paboyle/Grid/issues/264
|
||||
|
||||
Working (-) Broken (X):
|
||||
|
||||
4.9.0 -
|
||||
4.9.1 -
|
||||
5.1.0 X
|
||||
5.2.0 X
|
||||
5.3.0 X
|
||||
5.4.0 X
|
||||
6.1.0 X
|
||||
6.2.0 X
|
||||
6.3.0 -
|
||||
7.1.0 -
|
||||
8.0.0 (HEAD) -
|
||||
|
||||
https://github.com/paboyle/Grid/issues/100
|
||||
|
||||
--------------------------------------------------------------------
|
||||
AMD GPU nodes :
|
||||
--------------------------------------------------------------------
|
||||
|
||||
multiple ROCM versions broken; use 5.3.0
|
||||
manifests itself as wrong results in fp32
|
||||
|
||||
https://github.com/paboyle/Grid/issues/464
|
||||
|
||||
--------------------------------------------------------------------
|
||||
Aurora/PVC
|
||||
--------------------------------------------------------------------
|
||||
|
||||
SYCL ahead of time compilation (fixes rare runtime JIT errors and faster runtime, PB)
|
||||
SYCL slow link and relocatable code issues (Christoph Lehner)
|
||||
Opt large register file required for good performance in fp64
|
||||
|
||||
|
||||
export SYCL_PROGRAM_COMPILE_OPTIONS="-ze-opt-large-register-file"
|
||||
export LDFLAGS="-fiopenmp -fsycl -fsycl-device-code-split=per_kernel -fsycl-targets=spir64_gen -Xs -device -Xs pvc -fsycl-device-lib=all -lze_loader -L${MKLROOT}/lib -qmkl=parallel -fsycl -lsycl -fPIC -fsycl-max-parallel-link-jobs=16 -fno-sycl-rdc"
|
||||
export CXXFLAGS="-O3 -fiopenmp -fsycl-unnamed-lambda -fsycl -Wno-tautological-compare -qmkl=parallel -fsycl -fno-exceptions -fPIC"
|
||||
|
||||
--------------------------------------------------------------------
|
||||
Aurora/PVC useful extra options
|
||||
--------------------------------------------------------------------
|
||||
|
||||
Host only sanitizer:
|
||||
-Xarch_host -fsanitize=leak
|
||||
-Xarch_host -fsanitize=address
|
||||
|
||||
Deterministic MPI reduction:
|
||||
export MPIR_CVAR_ALLREDUCE_DEVICE_COLLECTIVE=0
|
||||
export MPIR_CVAR_REDUCE_DEVICE_COLLECTIVE=0
|
||||
export MPIR_CVAR_ALLREDUCE_INTRA_ALGORITHM=recursive_doubling
|
||||
unset MPIR_CVAR_CH4_COLL_SELECTION_TUNING_JSON_FILE
|
||||
unset MPIR_CVAR_COLL_SELECTION_TUNING_JSON_FILE
|
||||
unset MPIR_CVAR_CH4_POSIX_COLL_SELECTION_TUNING_JSON_FILE
|
||||
|
||||
|
||||
|
||||
************************
|
||||
* 3. Visual profile tools
|
||||
************************
|
||||
|
||||
--------------------------------------------------------------------
|
||||
Frontier/rocprof
|
||||
--------------------------------------------------------------------
|
||||
|
||||
--------------------------------------------------------------------
|
||||
Aurora/unitrace
|
||||
--------------------------------------------------------------------
|
||||
|
||||
|
||||
--------------------------------------------------------------------
|
||||
Tursa/nsight-sys
|
||||
--------------------------------------------------------------------
|
32
systems/sdcc-genoa/bench.slurm
Normal file
32
systems/sdcc-genoa/bench.slurm
Normal file
@ -0,0 +1,32 @@
|
||||
#!/bin/bash
|
||||
#SBATCH --partition lqcd
|
||||
#SBATCH --time=00:50:00
|
||||
#SBATCH -A lqcdtest
|
||||
#SBATCH -q lqcd
|
||||
#SBATCH --exclusive
|
||||
#SBATCH --nodes=1
|
||||
#SBATCH -w genoahost001,genoahost003,genoahost050,genoahost054
|
||||
#SBATCH --ntasks=1
|
||||
#SBATCH --cpus-per-task=64
|
||||
#SBATCH --qos lqcd
|
||||
|
||||
source sourceme.sh
|
||||
|
||||
export PLACES=(1:16:4 1:32:2 0:64:1);
|
||||
export THR=(16 32 64)
|
||||
|
||||
for t in 2
|
||||
do
|
||||
|
||||
export OMP_NUM_THREADS=${THR[$t]}
|
||||
export OMP_PLACES=${PLACES[$t]}
|
||||
export thr=${THR[$t]}
|
||||
|
||||
#for vol in 24.24.24.24 32.32.32.32 48.48.48.96
|
||||
for vol in 48.48.48.96
|
||||
do
|
||||
srun -N1 -n1 ./benchmarks/Benchmark_dwf_fp32 --mpi 1.1.1.1 --grid $vol --dslash-asm --shm 8192 > $vol.1node.thr$thr
|
||||
done
|
||||
#srun -N1 -n1 ./benchmarks/Benchmark_usqcd --mpi 1.1.1.1 --grid $vol > usqcd.1node.thr$thr
|
||||
done
|
||||
|
36
systems/sdcc-genoa/bench2.slurm
Normal file
36
systems/sdcc-genoa/bench2.slurm
Normal file
@ -0,0 +1,36 @@
|
||||
#!/bin/bash
|
||||
#SBATCH --partition lqcd
|
||||
#SBATCH --time=00:50:00
|
||||
#SBATCH -A lqcdtest
|
||||
#SBATCH -q lqcd
|
||||
#SBATCH --exclusive
|
||||
#SBATCH --nodes=2
|
||||
#SBATCH -w genoahost001,genoahost003,genoahost050,genoahost054
|
||||
#SBATCH --ntasks=2
|
||||
#SBATCH --cpus-per-task=64
|
||||
#SBATCH --qos lqcd
|
||||
|
||||
source sourceme.sh
|
||||
|
||||
export PLACES=(1:16:4 1:32:2 0:64:1);
|
||||
export THR=(16 32 64)
|
||||
|
||||
nodes=2
|
||||
mpi=1.1.1.2
|
||||
|
||||
for t in 2
|
||||
do
|
||||
|
||||
export OMP_NUM_THREADS=${THR[$t]}
|
||||
export OMP_PLACES=${PLACES[$t]}
|
||||
export thr=${THR[$t]}
|
||||
|
||||
#srun -N$nodes -n$nodes ./benchmarks/Benchmark_usqcd --mpi $mpi --grid 32.32.32.32 > usqcd.n$nodes.thr$thr
|
||||
|
||||
for vol in 64.64.64.128
|
||||
do
|
||||
srun -N$nodes -n$nodes ./benchmarks/Benchmark_dwf_fp32 --mpi $mpi --grid $vol --dslash-asm --comms-overlap --shm 8192 > $vol.n$nodes.overlap.thr$thr
|
||||
done
|
||||
|
||||
done
|
||||
|
16
systems/sdcc-genoa/config-command
Normal file
16
systems/sdcc-genoa/config-command
Normal file
@ -0,0 +1,16 @@
|
||||
../../configure \
|
||||
--enable-comms=mpi-auto \
|
||||
--enable-unified=yes \
|
||||
--enable-shm=shmopen \
|
||||
--enable-shm-fast-path=shmopen \
|
||||
--enable-accelerator=none \
|
||||
--enable-simd=AVX512 \
|
||||
--disable-accelerator-cshift \
|
||||
--disable-fermion-reps \
|
||||
--disable-gparity \
|
||||
CXX=clang++ \
|
||||
MPICXX=mpicxx \
|
||||
CXXFLAGS="-std=c++17"
|
||||
|
||||
|
||||
|
4
systems/sdcc-genoa/sourceme.sh
Normal file
4
systems/sdcc-genoa/sourceme.sh
Normal file
@ -0,0 +1,4 @@
|
||||
source $HOME/spack/share/spack/setup-env.sh
|
||||
spack load llvm@17.0.4
|
||||
export LD_LIBRARY_PATH=/direct/sdcc+u/paboyle/spack/opt/spack/linux-almalinux8-icelake/gcc-8.5.0/llvm-17.0.4-laufdrcip63ivkadmtgoepwmj3dtztdu/lib:$LD_LIBRARY_PATH
|
||||
module load openmpi
|
17
systems/spack-linux/config-command
Normal file
17
systems/spack-linux/config-command
Normal file
@ -0,0 +1,17 @@
|
||||
../../src/Grid/configure \
|
||||
--prefix /home/pab/NPR/install \
|
||||
--enable-comms=mpi-auto \
|
||||
--enable-simd=AVX2 \
|
||||
--enable-shm=none \
|
||||
--enable-debug \
|
||||
--with-lime=$CLIME \
|
||||
--with-hdf5=$HDF5 \
|
||||
--with-fftw=$FFTW \
|
||||
--with-gmp=$GMP \
|
||||
--with-mpfr=$MPFR \
|
||||
--disable-gparity \
|
||||
--disable-fermion-reps \
|
||||
CXX=clang++ \
|
||||
MPICXX=mpicxx \
|
||||
CXXFLAGS="-std=c++17 "
|
||||
|
28
systems/spack-linux/sourceme.sh
Normal file
28
systems/spack-linux/sourceme.sh
Normal file
@ -0,0 +1,28 @@
|
||||
source $HOME/spack/share/spack/setup-env.sh
|
||||
spack load llvm@12
|
||||
spack load autoconf%clang@12.0.1
|
||||
spack load automake%clang@12.0.1
|
||||
spack load c-lime%clang@12.0.1
|
||||
spack load fftw%clang@12.0.1
|
||||
spack load gmp%clang@12.0.1
|
||||
spack load mpfr%clang@12.0.1
|
||||
spack load openmpi%clang@12.0.1
|
||||
spack load openssl%clang@12.0.1
|
||||
spack load hdf5+cxx%clang@12.0.1
|
||||
spack load cmake%clang@12.0.1
|
||||
export FFTW=`spack find --paths fftw%clang@12.0.1 | grep ^fftw | awk '{print $2}' `
|
||||
export HDF5=`spack find --paths hdf5+cxx%clang@12.0.1 | grep ^hdf5 | awk '{print $2}' `
|
||||
export CLIME=`spack find --paths c-lime%clang@12.0.1 | grep ^c-lime | awk '{print $2}' `
|
||||
export MPFR=`spack find --paths mpfr%clang@12.0.1 | grep ^mpfr | awk '{print $2}' `
|
||||
export LLVM=`spack find --paths llvm@12 | grep ^llvm | awk '{print $2}' `
|
||||
export OPENSSL=`spack find --paths openssl%clang@12.0.1 | grep openssl | awk '{print $2}' `
|
||||
export GMP=`spack find --paths gmp%clang@12.0.1 | grep ^gmp | awk '{print $2}' `
|
||||
export TCLAP=`spack find --paths tclap%clang@12.0.1 | grep ^tclap | awk '{print $2}' `
|
||||
export LD_LIBRARY_PATH=${TCLAP}/lib:$LD_LIBRARY_PATH
|
||||
export LD_LIBRARY_PATH=$MPFR/lib:$LD_LIBRARY_PATH
|
||||
export LD_LIBRARY_PATH=$GMP/lib:$LD_LIBRARY_PATH
|
||||
export LD_LIBRARY_PATH=$FFTW/lib:$LD_LIBRARY_PATH
|
||||
export LD_LIBRARY_PATH=$LLVM/lib:$LD_LIBRARY_PATH
|
||||
export LD_LIBRARY_PATH=$LLVM/lib/x86_64-unknown-linux-gnu/:$LD_LIBRARY_PATH
|
||||
|
||||
ulimit -s 81920
|
19
systems/spack-linux/spack-install
Normal file
19
systems/spack-linux/spack-install
Normal file
@ -0,0 +1,19 @@
|
||||
cd
|
||||
git clone https://github.com/spack/spack.git
|
||||
source $HOME/spack/share/spack/setup-env.sh
|
||||
|
||||
spack install llvm@12
|
||||
|
||||
spack install autoconf%clang@12.0.1
|
||||
spack install automake%clang@12.0.1
|
||||
spack install c-lime%clang@12.0.1
|
||||
spack install fftw%clang@12.0.1
|
||||
spack install gmp%clang@12.0.1
|
||||
spack install mpfr%clang@12.0.1
|
||||
spack install openmpi%clang@12.0.1
|
||||
spack install openssl%clang@12.0.1
|
||||
spack install hdf5+cxx%clang@12.0.1
|
||||
spack install cmake%clang@12.0.1
|
||||
spack install tclap%clang@12.0.1
|
||||
spack install emacs%clang@12.0.1
|
||||
|
781
tests/debug/Test_general_coarse_hdcg_phys48_blockcg.cc
Normal file
781
tests/debug/Test_general_coarse_hdcg_phys48_blockcg.cc
Normal file
@ -0,0 +1,781 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./tests/Test_general_coarse_hdcg.cc
|
||||
|
||||
Copyright (C) 2023
|
||||
|
||||
Author: Peter Boyle <pboyle@bnl.gov>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#include <Grid/Grid.h>
|
||||
#include <Grid/algorithms/iterative/ImplicitlyRestartedBlockLanczos.h>
|
||||
#include <Grid/algorithms/iterative/ImplicitlyRestartedBlockLanczosCoarse.h>
|
||||
#include <Grid/algorithms/iterative/AdefMrhs.h>
|
||||
#include <Grid/algorithms/iterative/PowerSpectrum.h>
|
||||
#include <Grid/algorithms/iterative/BlockConjugateGradient.h>
|
||||
|
||||
using namespace std;
|
||||
using namespace Grid;
|
||||
|
||||
template<class aggregation>
|
||||
void SaveFineEvecs(aggregation &Agg,std::string file)
|
||||
{
|
||||
#ifdef HAVE_LIME
|
||||
emptyUserRecord record;
|
||||
ScidacWriter WR(Agg[0].Grid()->IsBoss());
|
||||
WR.open(file);
|
||||
for(int b=0;b<Agg.size();b++){
|
||||
WR.writeScidacFieldRecord(Agg[b],record,0,Grid::BinaryIO::BINARYIO_LEXICOGRAPHIC);
|
||||
}
|
||||
WR.close();
|
||||
#endif
|
||||
}
|
||||
template<class aggregation>
|
||||
void SaveBasis(aggregation &Agg,std::string file)
|
||||
{
|
||||
#ifdef HAVE_LIME
|
||||
emptyUserRecord record;
|
||||
ScidacWriter WR(Agg.FineGrid->IsBoss());
|
||||
WR.open(file);
|
||||
for(int b=0;b<Agg.subspace.size();b++){
|
||||
WR.writeScidacFieldRecord(Agg.subspace[b],record,0,Grid::BinaryIO::BINARYIO_LEXICOGRAPHIC);
|
||||
// WR.writeScidacFieldRecord(Agg.subspace[b],record);
|
||||
}
|
||||
WR.close();
|
||||
#endif
|
||||
}
|
||||
template<class aggregation>
|
||||
void LoadBasis(aggregation &Agg, std::string file)
|
||||
{
|
||||
#ifdef HAVE_LIME
|
||||
emptyUserRecord record;
|
||||
ScidacReader RD ;
|
||||
RD.open(file);
|
||||
for(int b=0;b<Agg.subspace.size();b++){
|
||||
RD.readScidacFieldRecord(Agg.subspace[b],record,Grid::BinaryIO::BINARYIO_LEXICOGRAPHIC);
|
||||
// RD.readScidacFieldRecord(Agg.subspace[b],record,0);
|
||||
}
|
||||
RD.close();
|
||||
#endif
|
||||
}
|
||||
|
||||
template<class aggregation>
|
||||
void LoadBasisSkip(aggregation &Agg, std::string file,int N,LatticeFermionF & tmp)
|
||||
{
|
||||
#ifdef HAVE_LIME
|
||||
emptyUserRecord record;
|
||||
ScidacReader RD ;
|
||||
|
||||
RD.open(file);
|
||||
for(int b=0;b<Agg.subspace.size();b++){
|
||||
for(int n=0;n<N;n++){
|
||||
RD.readScidacFieldRecord(tmp,record,Grid::BinaryIO::BINARYIO_LEXICOGRAPHIC);
|
||||
if(n==0) precisionChange(Agg.subspace[b],tmp);
|
||||
}
|
||||
// RD.readScidacFieldRecord(Agg.subspace[b],record,0);
|
||||
}
|
||||
RD.close();
|
||||
#endif
|
||||
}
|
||||
template<class aggregation>
|
||||
void LoadBasisSum(aggregation &Agg, std::string file,int N,LatticeFermionF & tmp)
|
||||
{
|
||||
#ifdef HAVE_LIME
|
||||
emptyUserRecord record;
|
||||
ScidacReader RD ;
|
||||
|
||||
LatticeFermionF sum(tmp.Grid());
|
||||
RD.open(file);
|
||||
for(int b=0;b<Agg.subspace.size();b++){
|
||||
sum=Zero();
|
||||
for(int n=0;n<N;n++){
|
||||
RD.readScidacFieldRecord(tmp,record,Grid::BinaryIO::BINARYIO_LEXICOGRAPHIC);
|
||||
sum=sum+tmp;
|
||||
}
|
||||
precisionChange(Agg.subspace[b],sum);
|
||||
// RD.readScidacFieldRecord(Agg.subspace[b],record,0);
|
||||
}
|
||||
RD.close();
|
||||
#endif
|
||||
}
|
||||
|
||||
template<class CoarseVector>
|
||||
void SaveEigenvectors(std::vector<RealD> &eval,
|
||||
std::vector<CoarseVector> &evec,
|
||||
std::string evec_file,
|
||||
std::string eval_file)
|
||||
{
|
||||
#ifdef HAVE_LIME
|
||||
emptyUserRecord record;
|
||||
ScidacWriter WR(evec[0].Grid()->IsBoss());
|
||||
WR.open(evec_file);
|
||||
for(int b=0;b<evec.size();b++){
|
||||
WR.writeScidacFieldRecord(evec[b],record,0,0);
|
||||
}
|
||||
WR.close();
|
||||
XmlWriter WRx(eval_file);
|
||||
write(WRx,"evals",eval);
|
||||
#endif
|
||||
}
|
||||
template<class CoarseVector>
|
||||
void LoadEigenvectors(std::vector<RealD> &eval,
|
||||
std::vector<CoarseVector> &evec,
|
||||
std::string evec_file,
|
||||
std::string eval_file)
|
||||
{
|
||||
#ifdef HAVE_LIME
|
||||
XmlReader RDx(eval_file);
|
||||
read(RDx,"evals",eval);
|
||||
emptyUserRecord record;
|
||||
|
||||
Grid::ScidacReader RD ;
|
||||
RD.open(evec_file);
|
||||
assert(evec.size()==eval.size());
|
||||
for(int k=0;k<eval.size();k++) {
|
||||
RD.readScidacFieldRecord(evec[k],record);
|
||||
}
|
||||
RD.close();
|
||||
#endif
|
||||
}
|
||||
|
||||
// Want Op in CoarsenOp to call MatPcDagMatPc
|
||||
template<class Field>
|
||||
class HermOpAdaptor : public LinearOperatorBase<Field>
|
||||
{
|
||||
LinearOperatorBase<Field> & wrapped;
|
||||
public:
|
||||
HermOpAdaptor(LinearOperatorBase<Field> &wrapme) : wrapped(wrapme) {};
|
||||
void Op (const Field &in, Field &out) { wrapped.HermOp(in,out); }
|
||||
void HermOp(const Field &in, Field &out) { wrapped.HermOp(in,out); }
|
||||
void AdjOp (const Field &in, Field &out){ wrapped.HermOp(in,out); }
|
||||
void OpDiag (const Field &in, Field &out) { assert(0); }
|
||||
void OpDir (const Field &in, Field &out,int dir,int disp) { assert(0); }
|
||||
void OpDirAll (const Field &in, std::vector<Field> &out) { assert(0); };
|
||||
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ assert(0); }
|
||||
};
|
||||
|
||||
template<class Field> class FixedCGPolynomial : public LinearFunction<Field>
|
||||
{
|
||||
public:
|
||||
using LinearFunction<Field>::operator();
|
||||
typedef LinearOperatorBase<Field> FineOperator;
|
||||
FineOperator & _SmootherOperator;
|
||||
ConjugateGradientPolynomial<Field> CG;
|
||||
int iters;
|
||||
bool record;
|
||||
int replay_count;
|
||||
FixedCGPolynomial(int _iters, FineOperator &SmootherOperator) :
|
||||
_SmootherOperator(SmootherOperator),
|
||||
iters(_iters),
|
||||
record(true),
|
||||
CG(0.0,_iters,false)
|
||||
{
|
||||
std::cout << GridLogMessage<<" FixedCGPolynomial order "<<iters<<std::endl;
|
||||
replay_count = 0;
|
||||
};
|
||||
void operator() (const Field &in, Field &out)
|
||||
{
|
||||
#if 1
|
||||
GridBase *grid = in.Grid();
|
||||
Field Mx0(grid);
|
||||
Field r0(grid);
|
||||
Field Minvr0(grid);
|
||||
|
||||
_SmootherOperator.HermOp(out,Mx0);
|
||||
|
||||
r0 = in - Mx0;
|
||||
|
||||
Minvr0 = Zero();
|
||||
Minvr0.Checkerboard()=in.Checkerboard();
|
||||
|
||||
if ( record ) {
|
||||
std::cout << " FixedCGPolynomial recording polynomial "<<std::endl;
|
||||
CG.Solve(_SmootherOperator,r0,Minvr0);
|
||||
record = false;
|
||||
/*
|
||||
std::cout << "P(x) = 0 "<<std::endl;
|
||||
for(int i=0;i<CG.polynomial.size();i++){
|
||||
std::cout<<" + "<< CG.polynomial[i]<<" * (x**"<<i<<")"<<std::endl;
|
||||
}
|
||||
*/
|
||||
Field tmp(Minvr0.Grid());
|
||||
CG.CGsequenceHermOp(_SmootherOperator,r0,tmp);
|
||||
tmp = tmp - Minvr0;
|
||||
std::cout << " CGsequence error "<<norm2(tmp)<<" / "<<norm2(out)<<std::endl;
|
||||
} else {
|
||||
std::cout << " FixedCGPolynomial replaying polynomial "<<std::endl;
|
||||
CG.CGsequenceHermOp(_SmootherOperator,r0,Minvr0);
|
||||
if ( replay_count %5== 0 ) record=true;
|
||||
replay_count++;
|
||||
}
|
||||
out = out + Minvr0;
|
||||
_SmootherOperator.HermOp(out,r0);
|
||||
r0 = r0 - in;
|
||||
RealD rr=norm2(r0);
|
||||
RealD ss=norm2(in);
|
||||
std::cout << " FixedCGPolynomial replayed polynomial resid "<<::sqrt(rr/ss)<<std::endl;
|
||||
#else
|
||||
out = Zero();
|
||||
out.Checkerboard()=in.Checkerboard();
|
||||
if ( record ) {
|
||||
std::cout << " FixedCGPolynomial recording polynomial "<<std::endl;
|
||||
CG.Solve(_SmootherOperator,in,out);
|
||||
record = false;
|
||||
std::cout << "P(x) = 0 "<<std::endl;
|
||||
for(int i=0;i<CG.polynomial.size();i++){
|
||||
std::cout<<" + "<< CG.polynomial[i]<<" * (x**"<<i<<")"<<std::endl;
|
||||
}
|
||||
Field tmp(in.Grid());
|
||||
CG.CGsequenceHermOp(_SmootherOperator,in,tmp);
|
||||
tmp = tmp - out;
|
||||
std::cout << " CGsequence error "<<norm2(tmp)<<" / "<<norm2(out)<<std::endl;
|
||||
} else {
|
||||
std::cout << " FixedCGPolynomial replaying polynomial "<<std::endl;
|
||||
CG.CGsequenceHermOp(_SmootherOperator,in,out);
|
||||
if ( replay_count %5== 5 ) record=true;
|
||||
replay_count++;
|
||||
}
|
||||
#endif
|
||||
|
||||
}
|
||||
void operator() (const std::vector<Field> &in, std::vector<Field> &out)
|
||||
{
|
||||
for(int i=0;i<out.size();i++){
|
||||
out[i]=Zero();
|
||||
}
|
||||
int blockDim = 0;//not used for BlockCGVec
|
||||
BlockConjugateGradient<Field> BCGV (BlockCGrQVec,blockDim,0.0,iters,false);
|
||||
BCGV(_SmootherOperator,in,out);
|
||||
}
|
||||
|
||||
};
|
||||
template<class Field> class CGSmoother : public LinearFunction<Field>
|
||||
{
|
||||
public:
|
||||
using LinearFunction<Field>::operator();
|
||||
typedef LinearOperatorBase<Field> FineOperator;
|
||||
FineOperator & _SmootherOperator;
|
||||
int iters;
|
||||
CGSmoother(int _iters, FineOperator &SmootherOperator) :
|
||||
_SmootherOperator(SmootherOperator),
|
||||
iters(_iters)
|
||||
{
|
||||
std::cout << GridLogMessage<<" Mirs smoother order "<<iters<<std::endl;
|
||||
};
|
||||
void operator() (const Field &in, Field &out)
|
||||
{
|
||||
ConjugateGradient<Field> CG(0.0,iters,false); // non-converge is just fine in a smoother
|
||||
|
||||
out=Zero();
|
||||
|
||||
CG(_SmootherOperator,in,out);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
RealD InverseApproximation(RealD x){
|
||||
return 1.0/x;
|
||||
}
|
||||
template<class Field> class ChebyshevSmoother : public LinearFunction<Field>
|
||||
{
|
||||
public:
|
||||
using LinearFunction<Field>::operator();
|
||||
typedef LinearOperatorBase<Field> FineOperator;
|
||||
FineOperator & _SmootherOperator;
|
||||
Chebyshev<Field> Cheby;
|
||||
ChebyshevSmoother(RealD _lo,RealD _hi,int _ord, FineOperator &SmootherOperator) :
|
||||
_SmootherOperator(SmootherOperator),
|
||||
Cheby(_lo,_hi,_ord,InverseApproximation)
|
||||
{
|
||||
std::cout << GridLogMessage<<" Chebyshev smoother order "<<_ord<<" ["<<_lo<<","<<_hi<<"]"<<std::endl;
|
||||
};
|
||||
void operator() (const Field &in, Field &out)
|
||||
{
|
||||
// Field r(out.Grid());
|
||||
Cheby(_SmootherOperator,in,out);
|
||||
// _SmootherOperator.HermOp(out,r);
|
||||
// r=r-in;
|
||||
// RealD rr=norm2(r);
|
||||
// RealD ss=norm2(in);
|
||||
// std::cout << GridLogMessage<<" Chebyshev smoother resid "<<::sqrt(rr/ss)<<std::endl;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template<class Field> class ChebyshevInverter : public LinearFunction<Field>
|
||||
{
|
||||
public:
|
||||
using LinearFunction<Field>::operator();
|
||||
typedef LinearOperatorBase<Field> FineOperator;
|
||||
FineOperator & _Operator;
|
||||
Chebyshev<Field> Cheby;
|
||||
ChebyshevInverter(RealD _lo,RealD _hi,int _ord, FineOperator &Operator) :
|
||||
_Operator(Operator),
|
||||
Cheby(_lo,_hi,_ord,InverseApproximation)
|
||||
{
|
||||
std::cout << GridLogMessage<<" Chebyshev Inverter order "<<_ord<<" ["<<_lo<<","<<_hi<<"]"<<std::endl;
|
||||
};
|
||||
void operator() (const Field &in, Field &out)
|
||||
{
|
||||
Field r(in.Grid());
|
||||
Field AinvR(in.Grid());
|
||||
_Operator.HermOp(out,r);
|
||||
r = in - r; // b - A x
|
||||
Cheby(_Operator,r,AinvR); // A^{-1} ( b - A x ) ~ A^{-1} b - x
|
||||
out = out + AinvR;
|
||||
_Operator.HermOp(out,r);
|
||||
r = in - r; // b - A x
|
||||
RealD rr = norm2(r);
|
||||
RealD ss = norm2(in);
|
||||
std::cout << "ChebshevInverse resid " <<::sqrt(rr/ss)<<std::endl;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
||||
int main (int argc, char ** argv)
|
||||
{
|
||||
Grid_init(&argc,&argv);
|
||||
|
||||
int sample=1;
|
||||
if( GridCmdOptionExists(argv,argv+argc,"--sample") ){
|
||||
std::string arg;
|
||||
arg = GridCmdOptionPayload(argv,argv+argc,"--sample");
|
||||
GridCmdOptionInt(arg,sample);
|
||||
}
|
||||
|
||||
const int Ls=24;
|
||||
const int nbasis = 62;
|
||||
const int cb = 0 ;
|
||||
RealD mass=0.00078;
|
||||
|
||||
if( GridCmdOptionExists(argv,argv+argc,"--mass") ){
|
||||
std::string arg;
|
||||
arg = GridCmdOptionPayload(argv,argv+argc,"--mass");
|
||||
GridCmdOptionFloat(arg,mass);
|
||||
}
|
||||
|
||||
RealD M5=1.8;
|
||||
RealD b=1.5;
|
||||
RealD c=0.5;
|
||||
|
||||
std::cout << GridLogMessage << " *************************** " <<std::endl;
|
||||
std::cout << GridLogMessage << " Mass " <<mass<<std::endl;
|
||||
std::cout << GridLogMessage << " M5 " <<M5<<std::endl;
|
||||
std::cout << GridLogMessage << " Ls " <<Ls<<std::endl;
|
||||
std::cout << GridLogMessage << " b " <<b<<std::endl;
|
||||
std::cout << GridLogMessage << " c " <<c<<std::endl;
|
||||
std::cout << GridLogMessage << " *************************** " <<std::endl;
|
||||
|
||||
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(),
|
||||
GridDefaultSimd(Nd,vComplex::Nsimd()),
|
||||
GridDefaultMpi());
|
||||
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
||||
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
|
||||
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
|
||||
|
||||
//////////////////////////////////////////
|
||||
// Single precision grids -- lanczos + smoother
|
||||
//////////////////////////////////////////
|
||||
GridCartesian * UGridF = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(),
|
||||
GridDefaultSimd(Nd,vComplexF::Nsimd()),
|
||||
GridDefaultMpi());
|
||||
GridRedBlackCartesian * UrbGridF = SpaceTimeGrid::makeFourDimRedBlackGrid(UGridF);
|
||||
GridCartesian * FGridF = SpaceTimeGrid::makeFiveDimGrid(Ls,UGridF);
|
||||
GridRedBlackCartesian * FrbGridF = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGridF);
|
||||
|
||||
///////////////////////// Configuration /////////////////////////////////
|
||||
LatticeGaugeField Umu(UGrid);
|
||||
|
||||
FieldMetaData header;
|
||||
std::string file("ckpoint_lat.1000");
|
||||
NerscIO::readConfiguration(Umu,header,file);
|
||||
|
||||
//////////////////////// Fermion action //////////////////////////////////
|
||||
MobiusFermionD Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,b,c);
|
||||
SchurDiagMooeeOperator<MobiusFermionD, LatticeFermion> HermOpEO(Ddwf);
|
||||
|
||||
std::cout << "**************************************"<<std::endl;
|
||||
std::cout << " Fine Power method "<<std::endl;
|
||||
std::cout << "**************************************"<<std::endl;
|
||||
|
||||
{
|
||||
LatticeFermionD pm_src(FrbGrid);
|
||||
pm_src = ComplexD(1.0);
|
||||
PowerMethod<LatticeFermionD> fPM;
|
||||
fPM(HermOpEO,pm_src);
|
||||
}
|
||||
|
||||
if(0)
|
||||
{
|
||||
|
||||
std::cout << "**************************************"<<std::endl;
|
||||
std::cout << " Fine Lanczos "<<std::endl;
|
||||
std::cout << "**************************************"<<std::endl;
|
||||
|
||||
typedef LatticeFermionF FermionField;
|
||||
LatticeGaugeFieldF UmuF(UGridF);
|
||||
precisionChange(UmuF,Umu);
|
||||
MobiusFermionF DdwfF(UmuF,*FGridF,*FrbGridF,*UGridF,*UrbGridF,mass,M5,b,c);
|
||||
SchurDiagMooeeOperator<MobiusFermionF, LatticeFermionF> HermOpEOF(DdwfF);
|
||||
|
||||
const int Fine_Nstop = 200;
|
||||
const int Fine_Nk = 200;
|
||||
const int Fine_Np = 200;
|
||||
const int Fine_Nm = Fine_Nk+Fine_Np;
|
||||
const int Fine_MaxIt= 10;
|
||||
|
||||
RealD Fine_resid = 1.0e-4;
|
||||
std::cout << GridLogMessage << "Fine Lanczos "<<std::endl;
|
||||
std::cout << GridLogMessage << "Nstop "<<Fine_Nstop<<std::endl;
|
||||
std::cout << GridLogMessage << "Nk "<<Fine_Nk<<std::endl;
|
||||
std::cout << GridLogMessage << "Np "<<Fine_Np<<std::endl;
|
||||
std::cout << GridLogMessage << "resid "<<Fine_resid<<std::endl;
|
||||
|
||||
Chebyshev<FermionField> Cheby(0.002,92.0,401);
|
||||
// Chebyshev<FermionField> Cheby(0.1,92.0,401);
|
||||
FunctionHermOp<FermionField> OpCheby(Cheby,HermOpEOF);
|
||||
PlainHermOp<FermionField> Op (HermOpEOF);
|
||||
ImplicitlyRestartedLanczos<FermionField> IRL(OpCheby,Op,Fine_Nstop,Fine_Nk,Fine_Nm,Fine_resid,Fine_MaxIt);
|
||||
std::vector<RealD> Fine_eval(Fine_Nm);
|
||||
FermionField Fine_src(FrbGridF);
|
||||
Fine_src = ComplexF(1.0);
|
||||
std::vector<FermionField> Fine_evec(Fine_Nm,FrbGridF);
|
||||
|
||||
int Fine_Nconv;
|
||||
std::cout << GridLogMessage <<" Calling IRL.calc single prec"<<std::endl;
|
||||
IRL.calc(Fine_eval,Fine_evec,Fine_src,Fine_Nconv);
|
||||
|
||||
std::string evec_file("/lustre/orion/phy157/proj-shared/phy157_dwf/paboyle/Subspace.phys48.evecF");
|
||||
SaveFineEvecs(Fine_evec,evec_file);
|
||||
}
|
||||
|
||||
|
||||
//////////////////////////////////////////
|
||||
// Construct a coarsened grid with 4^4 cell
|
||||
//////////////////////////////////////////
|
||||
Coordinate Block({4,4,6,4});
|
||||
Coordinate clatt = GridDefaultLatt();
|
||||
for(int d=0;d<clatt.size();d++){
|
||||
clatt[d] = clatt[d]/Block[d];
|
||||
}
|
||||
|
||||
GridCartesian *Coarse4d = SpaceTimeGrid::makeFourDimGrid(clatt,
|
||||
GridDefaultSimd(Nd,vComplex::Nsimd()),
|
||||
GridDefaultMpi());;
|
||||
GridCartesian *Coarse5d = SpaceTimeGrid::makeFiveDimGrid(1,Coarse4d);
|
||||
|
||||
///////////////////////// RNGs /////////////////////////////////
|
||||
std::vector<int> seeds4({1,2,3,4});
|
||||
std::vector<int> seeds5({5,6,7,8});
|
||||
std::vector<int> cseeds({5,6,7,8});
|
||||
|
||||
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
|
||||
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
|
||||
GridParallelRNG CRNG(Coarse5d);CRNG.SeedFixedIntegers(cseeds);
|
||||
|
||||
|
||||
typedef HermOpAdaptor<LatticeFermionD> HermFineMatrix;
|
||||
HermFineMatrix FineHermOp(HermOpEO);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
///////////// Coarse basis and Little Dirac Operator ///////
|
||||
////////////////////////////////////////////////////////////
|
||||
typedef GeneralCoarsenedMatrix<vSpinColourVector,vTComplex,nbasis> LittleDiracOperator;
|
||||
typedef LittleDiracOperator::CoarseVector CoarseVector;
|
||||
|
||||
NextToNextToNextToNearestStencilGeometry5D geom(Coarse5d);
|
||||
|
||||
typedef Aggregation<vSpinColourVector,vTComplex,nbasis> Subspace;
|
||||
Subspace Aggregates(Coarse5d,FrbGrid,cb);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
// Need to check about red-black grid coarsening
|
||||
////////////////////////////////////////////////////////////
|
||||
std::string subspace_file("/lustre/orion/phy157/proj-shared/phy157_dwf/paboyle/Subspace.phys48.mixed.2500.60");
|
||||
// // std::string subspace_file("/lustre/orion/phy157/proj-shared/phy157_dwf/paboyle/Subspace.phys48.new.62");
|
||||
// std::string refine_file("/lustre/orion/phy157/proj-shared/phy157_dwf/paboyle/Subspace.phys48.evecF");
|
||||
std::string refine_file("/lustre/orion/phy157/proj-shared/phy157_dwf/paboyle/Refine.phys48.mixed.2500.60");
|
||||
std::string ldop_file("/lustre/orion/phy157/proj-shared/phy157_dwf/paboyle/LittleDiracOp.phys48.mixed.60");
|
||||
std::string evec_file("/lustre/orion/phy157/proj-shared/phy157_dwf/paboyle/evecs.scidac");
|
||||
std::string eval_file("/lustre/orion/phy157/proj-shared/phy157_dwf/paboyle/eval.xml");
|
||||
bool load_agg=true;
|
||||
bool load_refine=true;
|
||||
bool load_mat=false;
|
||||
bool load_evec=false;
|
||||
|
||||
int refine=1;
|
||||
if ( load_agg ) {
|
||||
if ( !(refine) || (!load_refine) ) {
|
||||
LoadBasis(Aggregates,subspace_file);
|
||||
}
|
||||
} else {
|
||||
// Aggregates.CreateSubspaceMultishift(RNG5,HermOpEO,
|
||||
// 0.0003,1.0e-5,2000); // Lo, tol, maxit
|
||||
// Aggregates.CreateSubspaceChebyshev(RNG5,HermOpEO,nbasis,95.,0.01,1500);// <== last run
|
||||
Aggregates.CreateSubspaceChebyshevNew(RNG5,HermOpEO,95.);
|
||||
SaveBasis(Aggregates,subspace_file);
|
||||
}
|
||||
|
||||
std::cout << "**************************************"<<std::endl;
|
||||
std::cout << "Building MultiRHS Coarse operator"<<std::endl;
|
||||
std::cout << "**************************************"<<std::endl;
|
||||
ConjugateGradient<CoarseVector> coarseCG(4.0e-2,20000,true);
|
||||
|
||||
const int nrhs=24;
|
||||
|
||||
Coordinate mpi=GridDefaultMpi();
|
||||
Coordinate rhMpi ({1,1,mpi[0],mpi[1],mpi[2],mpi[3]});
|
||||
Coordinate rhLatt({nrhs,1,clatt[0],clatt[1],clatt[2],clatt[3]});
|
||||
Coordinate rhSimd({vComplex::Nsimd(),1, 1,1,1,1});
|
||||
|
||||
GridCartesian *CoarseMrhs = new GridCartesian(rhLatt,rhSimd,rhMpi);
|
||||
typedef MultiGeneralCoarsenedMatrix<vSpinColourVector,vTComplex,nbasis> MultiGeneralCoarsenedMatrix_t;
|
||||
MultiGeneralCoarsenedMatrix_t mrhs(geom,CoarseMrhs);
|
||||
|
||||
std::cout << "**************************************"<<std::endl;
|
||||
std::cout << " Coarse Lanczos "<<std::endl;
|
||||
std::cout << "**************************************"<<std::endl;
|
||||
|
||||
typedef HermitianLinearOperator<MultiGeneralCoarsenedMatrix_t,CoarseVector> MrhsHermMatrix;
|
||||
Chebyshev<CoarseVector> IRLCheby(0.005,42.0,301); // 1 iter
|
||||
MrhsHermMatrix MrhsCoarseOp (mrhs);
|
||||
|
||||
// CoarseVector pm_src(CoarseMrhs);
|
||||
// pm_src = ComplexD(1.0);
|
||||
// PowerMethod<CoarseVector> cPM; cPM(MrhsCoarseOp,pm_src);
|
||||
|
||||
int Nk=192;
|
||||
int Nm=384;
|
||||
int Nstop=Nk;
|
||||
int Nconv_test_interval=1;
|
||||
|
||||
ImplicitlyRestartedBlockLanczosCoarse<CoarseVector> IRL(MrhsCoarseOp,
|
||||
Coarse5d,
|
||||
CoarseMrhs,
|
||||
nrhs,
|
||||
IRLCheby,
|
||||
Nstop,
|
||||
Nconv_test_interval,
|
||||
nrhs,
|
||||
Nk,
|
||||
Nm,
|
||||
1e-5,10);
|
||||
|
||||
int Nconv;
|
||||
std::vector<RealD> eval(Nm);
|
||||
std::vector<CoarseVector> evec(Nm,Coarse5d);
|
||||
std::vector<CoarseVector> c_src(nrhs,Coarse5d);
|
||||
|
||||
///////////////////////
|
||||
// Deflation guesser object
|
||||
///////////////////////
|
||||
MultiRHSDeflation<CoarseVector> MrhsGuesser;
|
||||
|
||||
//////////////////////////////////////////
|
||||
// Block projector for coarse/fine
|
||||
//////////////////////////////////////////
|
||||
MultiRHSBlockProject<LatticeFermionD> MrhsProjector;
|
||||
|
||||
//////////////////////////
|
||||
// Extra HDCG parameters
|
||||
//////////////////////////
|
||||
int maxit=300;
|
||||
ConjugateGradient<CoarseVector> CG(5.0e-2,maxit,false);
|
||||
ConjugateGradient<CoarseVector> CGstart(5.0e-2,maxit,false);
|
||||
RealD lo=2.0;
|
||||
int ord = 7;
|
||||
// int ord = 11;
|
||||
|
||||
int blockDim = 0;//not used for BlockCG
|
||||
BlockConjugateGradient<CoarseVector> BCG (BlockCGrQ,blockDim,5.0e-5,maxit,true);
|
||||
|
||||
DoNothingGuesser<CoarseVector> DoNothing;
|
||||
// HPDSolver<CoarseVector> HPDSolveMrhs(MrhsCoarseOp,CG,DoNothing);
|
||||
// HPDSolver<CoarseVector> HPDSolveMrhsStart(MrhsCoarseOp,CGstart,DoNothing);
|
||||
// HPDSolver<CoarseVector> HPDSolveMrhs(MrhsCoarseOp,BCG,DoNothing);
|
||||
// HPDSolver<CoarseVector> HPDSolveMrhsRefine(MrhsCoarseOp,BCG,DoNothing);
|
||||
// FixedCGPolynomial<CoarseVector> HPDSolveMrhs(maxit,MrhsCoarseOp);
|
||||
|
||||
ChebyshevInverter<CoarseVector> HPDSolveMrhs(1.0e-2,40.0,120,MrhsCoarseOp); //
|
||||
// ChebyshevInverter<CoarseVector> HPDSolveMrhs(1.0e-2,40.0,110,MrhsCoarseOp); // 114 iter with Chebysmooth and BlockCG
|
||||
// ChebyshevInverter<CoarseVector> HPDSolveMrhs(1.0e-2,40.0,120,MrhsCoarseOp); // 138 iter with Chebysmooth
|
||||
// ChebyshevInverter<CoarseVector> HPDSolveMrhs(1.0e-2,40.0,200,MrhsCoarseOp); // 139 iter
|
||||
// ChebyshevInverter<CoarseVector> HPDSolveMrhs(3.0e-3,40.0,200,MrhsCoarseOp); // 137 iter, CG smooth, flex
|
||||
// ChebyshevInverter<CoarseVector> HPDSolveMrhs(1.0e-3,40.0,200,MrhsCoarseOp); // 146 iter, CG smooth, flex
|
||||
// ChebyshevInverter<CoarseVector> HPDSolveMrhs(3.0e-4,40.0,200,MrhsCoarseOp); // 156 iter, CG smooth, flex
|
||||
|
||||
/////////////////////////////////////////////////
|
||||
// Mirs smoother
|
||||
/////////////////////////////////////////////////
|
||||
ShiftedHermOpLinearOperator<LatticeFermionD> ShiftedFineHermOp(HermOpEO,lo);
|
||||
// FixedCGPolynomial<LatticeFermionD> CGsmooth(ord,ShiftedFineHermOp) ;
|
||||
// CGSmoother<LatticeFermionD> CGsmooth(ord,ShiftedFineHermOp) ;
|
||||
ChebyshevSmoother<LatticeFermionD> CGsmooth(2.0,92.0,8,HermOpEO) ;
|
||||
|
||||
if ( load_refine ) {
|
||||
LoadBasis(Aggregates,refine_file);
|
||||
// LatticeFermionF conv_tmp(FrbGridF);
|
||||
// LoadBasisSum(Aggregates,refine_file,sample,conv_tmp);
|
||||
} else {
|
||||
Aggregates.RefineSubspace(HermOpEO,0.001,1.0e-3,3000); // 172 iters
|
||||
SaveBasis(Aggregates,refine_file);
|
||||
}
|
||||
Aggregates.Orthogonalise();
|
||||
|
||||
std::cout << "**************************************"<<std::endl;
|
||||
std::cout << "Coarsen after refine"<<std::endl;
|
||||
std::cout << "**************************************"<<std::endl;
|
||||
mrhs.CoarsenOperator(FineHermOp,Aggregates,Coarse5d);
|
||||
|
||||
std::cout << "**************************************"<<std::endl;
|
||||
std::cout << " Recompute coarse evecs "<<std::endl;
|
||||
std::cout << "**************************************"<<std::endl;
|
||||
evec.resize(Nm,Coarse5d);
|
||||
eval.resize(Nm);
|
||||
for(int r=0;r<nrhs;r++){
|
||||
random(CRNG,c_src[r]);
|
||||
}
|
||||
IRL.calc(eval,evec,c_src,Nconv,LanczosType::irbl);
|
||||
|
||||
std::cout << "**************************************"<<std::endl;
|
||||
std::cout << " Reimport coarse evecs "<<std::endl;
|
||||
std::cout << "**************************************"<<std::endl;
|
||||
MrhsGuesser.ImportEigenBasis(evec,eval);
|
||||
|
||||
std::cout << "**************************************"<<std::endl;
|
||||
std::cout << " Setting up mRHS HDCG"<<std::endl;
|
||||
std::cout << "**************************************"<<std::endl;
|
||||
MrhsProjector.Allocate(nbasis,FrbGrid,Coarse5d);
|
||||
MrhsProjector.ImportBasis(Aggregates.subspace);
|
||||
|
||||
std::cout << "**************************************"<<std::endl;
|
||||
std::cout << "Calling mRHS HDCG"<<std::endl;
|
||||
std::cout << "**************************************"<<std::endl;
|
||||
TwoLevelADEF2mrhs<LatticeFermion,CoarseVector>
|
||||
HDCGmrhs(1.0e-8, 300,
|
||||
FineHermOp,
|
||||
CGsmooth,
|
||||
HPDSolveMrhs, // Used in M1
|
||||
HPDSolveMrhs, // Used in Vstart
|
||||
MrhsProjector,
|
||||
MrhsGuesser,
|
||||
CoarseMrhs);
|
||||
|
||||
std::vector<LatticeFermionD> src_mrhs(nrhs,FrbGrid);
|
||||
std::vector<LatticeFermionD> res_mrhs(nrhs,FrbGrid);
|
||||
LatticeFermionD result_accurate(FrbGrid);
|
||||
LatticeFermionD result_sloppy(FrbGrid);
|
||||
LatticeFermionD error(FrbGrid);
|
||||
LatticeFermionD residual(FrbGrid);
|
||||
|
||||
for(int r=0;r<nrhs;r++){
|
||||
random(RNG5,src_mrhs[r]);
|
||||
res_mrhs[r]=Zero();
|
||||
}
|
||||
HDCGmrhs(src_mrhs,res_mrhs);
|
||||
result_accurate = res_mrhs[0];
|
||||
|
||||
#if 0
|
||||
|
||||
std::vector<RealD> bins({1.0e-3,1.0e-2,1.0e-1,1.0,10.0,100.0});
|
||||
std::vector<int> orders({6000 ,4000 ,1000 ,500,500 ,500});
|
||||
PowerSpectrum GraphicEqualizer(bins,orders);
|
||||
|
||||
std::cout << "**************************************"<<std::endl;
|
||||
std::cout << GridLogMessage << " PowerSpectrum of rrr "<<std::endl;
|
||||
std::cout << "**************************************"<<std::endl;
|
||||
GraphicEqualizer(FineHermOp,HDCGmrhs.rrr);
|
||||
std::cout << "**************************************"<<std::endl;
|
||||
std::cout << GridLogMessage << " PowerSpectrum of sss "<<std::endl;
|
||||
std::cout << "**************************************"<<std::endl;
|
||||
GraphicEqualizer(FineHermOp,HDCGmrhs.sss);
|
||||
std::cout << "**************************************"<<std::endl;
|
||||
std::cout << GridLogMessage << " PowerSpectrum of qqq "<<std::endl;
|
||||
std::cout << "**************************************"<<std::endl;
|
||||
GraphicEqualizer(FineHermOp,HDCGmrhs.qqq);
|
||||
std::cout << "**************************************"<<std::endl;
|
||||
std::cout << GridLogMessage << " PowerSpectrum of zzz "<<std::endl;
|
||||
std::cout << "**************************************"<<std::endl;
|
||||
GraphicEqualizer(FineHermOp,HDCGmrhs.zzz);
|
||||
|
||||
std::vector<RealD> tols({1.0e-3,1.0e-4,1.0e-5});
|
||||
|
||||
|
||||
for(auto tol : tols) {
|
||||
|
||||
TwoLevelADEF2mrhs<LatticeFermion,CoarseVector>
|
||||
HDCGmrhsSloppy(tol, 500,
|
||||
FineHermOp,
|
||||
CGsmooth,
|
||||
HPDSolveMrhs, // Used in M1
|
||||
HPDSolveMrhs, // Used in Vstart
|
||||
MrhsProjector,
|
||||
MrhsGuesser,
|
||||
CoarseMrhs);
|
||||
|
||||
// Solve again to 10^-5
|
||||
for(int r=0;r<nrhs;r++){
|
||||
res_mrhs[r]=Zero();
|
||||
}
|
||||
HDCGmrhsSloppy(src_mrhs,res_mrhs);
|
||||
|
||||
result_sloppy = res_mrhs[0];
|
||||
error = result_sloppy - result_accurate;
|
||||
FineHermOp.HermOp(result_sloppy,residual);
|
||||
residual = residual - src_mrhs[0];
|
||||
|
||||
std::cout << "**************************************"<<std::endl;
|
||||
std::cout << GridLogMessage << " Converged to tolerance "<< tol<<std::endl;
|
||||
std::cout << GridLogMessage << " Absolute error "<<norm2(error)<<std::endl;
|
||||
std::cout << GridLogMessage << " Residual "<<norm2(residual)<<std::endl;
|
||||
std::cout << "**************************************"<<std::endl;
|
||||
|
||||
std::cout << "**************************************"<<std::endl;
|
||||
std::cout << GridLogMessage << " PowerSpectrum of error "<<std::endl;
|
||||
std::cout << "**************************************"<<std::endl;
|
||||
GraphicEqualizer(FineHermOp,error);
|
||||
std::cout << "**************************************"<<std::endl;
|
||||
std::cout << GridLogMessage << " PowerSpectrum of residual "<<std::endl;
|
||||
std::cout << "**************************************"<<std::endl;
|
||||
GraphicEqualizer(FineHermOp,residual);
|
||||
|
||||
};
|
||||
#endif
|
||||
|
||||
// Standard CG
|
||||
#if 0
|
||||
{
|
||||
std::cout << "**************************************"<<std::endl;
|
||||
std::cout << "Calling red black CG"<<std::endl;
|
||||
std::cout << "**************************************"<<std::endl;
|
||||
|
||||
LatticeFermion result(FrbGrid); result=Zero();
|
||||
LatticeFermion src(FrbGrid); random(RNG5,src);
|
||||
result=Zero();
|
||||
|
||||
ConjugateGradient<LatticeFermionD> CGfine(1.0e-8,30000,false);
|
||||
CGfine(HermOpEO, src, result);
|
||||
}
|
||||
#endif
|
||||
Grid_finalize();
|
||||
return 0;
|
||||
}
|
355
tests/debug/Test_general_coarse_hdcg_phys48_lanczos_subspace.cc
Normal file
355
tests/debug/Test_general_coarse_hdcg_phys48_lanczos_subspace.cc
Normal file
@ -0,0 +1,355 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./tests/Test_general_coarse_hdcg.cc
|
||||
|
||||
Copyright (C) 2023
|
||||
|
||||
Author: Peter Boyle <pboyle@bnl.gov>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#include <Grid/Grid.h>
|
||||
#include <Grid/algorithms/iterative/ImplicitlyRestartedBlockLanczos.h>
|
||||
#include <Grid/algorithms/iterative/ImplicitlyRestartedBlockLanczosCoarse.h>
|
||||
#include <Grid/algorithms/iterative/AdefMrhs.h>
|
||||
|
||||
using namespace std;
|
||||
using namespace Grid;
|
||||
|
||||
template<class aggregation>
|
||||
void SaveFineEvecs(aggregation &Agg,std::string file)
|
||||
{
|
||||
#ifdef HAVE_LIME
|
||||
emptyUserRecord record;
|
||||
ScidacWriter WR(Agg[0].Grid()->IsBoss());
|
||||
WR.open(file);
|
||||
for(int b=0;b<Agg.size();b++){
|
||||
WR.writeScidacFieldRecord(Agg[b],record,0,Grid::BinaryIO::BINARYIO_LEXICOGRAPHIC);
|
||||
}
|
||||
WR.close();
|
||||
#endif
|
||||
}
|
||||
template<class aggregation>
|
||||
void SaveBasis(aggregation &Agg,std::string file)
|
||||
{
|
||||
#ifdef HAVE_LIME
|
||||
emptyUserRecord record;
|
||||
ScidacWriter WR(Agg.FineGrid->IsBoss());
|
||||
WR.open(file);
|
||||
for(int b=0;b<Agg.subspace.size();b++){
|
||||
WR.writeScidacFieldRecord(Agg.subspace[b],record,0,Grid::BinaryIO::BINARYIO_LEXICOGRAPHIC);
|
||||
// WR.writeScidacFieldRecord(Agg.subspace[b],record);
|
||||
}
|
||||
WR.close();
|
||||
#endif
|
||||
}
|
||||
template<class aggregation>
|
||||
void LoadBasis(aggregation &Agg, std::string file)
|
||||
{
|
||||
#ifdef HAVE_LIME
|
||||
emptyUserRecord record;
|
||||
ScidacReader RD ;
|
||||
RD.open(file);
|
||||
for(int b=0;b<Agg.subspace.size();b++){
|
||||
RD.readScidacFieldRecord(Agg.subspace[b],record,Grid::BinaryIO::BINARYIO_LEXICOGRAPHIC);
|
||||
// RD.readScidacFieldRecord(Agg.subspace[b],record,0);
|
||||
}
|
||||
RD.close();
|
||||
#endif
|
||||
}
|
||||
template<class aggregation>
|
||||
void LoadFineEvecs(aggregation &Agg, std::string file,LatticeFermionF & conv_tmp)
|
||||
{
|
||||
#ifdef HAVE_LIME
|
||||
emptyUserRecord record;
|
||||
ScidacReader RD ;
|
||||
RD.open(file);
|
||||
for(int b=0;b<Agg.size();b++){
|
||||
RD.readScidacFieldRecord(conv_tmp,record,Grid::BinaryIO::BINARYIO_LEXICOGRAPHIC);
|
||||
precisionChange(Agg[b],conv_tmp);
|
||||
}
|
||||
RD.close();
|
||||
#endif
|
||||
}
|
||||
template<class CoarseVector>
|
||||
void SaveEigenvectors(std::vector<RealD> &eval,
|
||||
std::vector<CoarseVector> &evec,
|
||||
std::string evec_file,
|
||||
std::string eval_file)
|
||||
{
|
||||
#ifdef HAVE_LIME
|
||||
emptyUserRecord record;
|
||||
ScidacWriter WR(evec[0].Grid()->IsBoss());
|
||||
WR.open(evec_file);
|
||||
for(int b=0;b<evec.size();b++){
|
||||
WR.writeScidacFieldRecord(evec[b],record,0,0);
|
||||
}
|
||||
WR.close();
|
||||
XmlWriter WRx(eval_file);
|
||||
write(WRx,"evals",eval);
|
||||
#endif
|
||||
}
|
||||
template<class CoarseVector>
|
||||
void LoadEigenvectors(std::vector<RealD> &eval,
|
||||
std::vector<CoarseVector> &evec,
|
||||
std::string evec_file,
|
||||
std::string eval_file)
|
||||
{
|
||||
#ifdef HAVE_LIME
|
||||
XmlReader RDx(eval_file);
|
||||
read(RDx,"evals",eval);
|
||||
emptyUserRecord record;
|
||||
|
||||
Grid::ScidacReader RD ;
|
||||
RD.open(evec_file);
|
||||
assert(evec.size()==eval.size());
|
||||
for(int k=0;k<eval.size();k++) {
|
||||
RD.readScidacFieldRecord(evec[k],record);
|
||||
}
|
||||
RD.close();
|
||||
#endif
|
||||
}
|
||||
|
||||
// Want Op in CoarsenOp to call MatPcDagMatPc
|
||||
template<class Field>
|
||||
class HermOpAdaptor : public LinearOperatorBase<Field>
|
||||
{
|
||||
LinearOperatorBase<Field> & wrapped;
|
||||
public:
|
||||
HermOpAdaptor(LinearOperatorBase<Field> &wrapme) : wrapped(wrapme) {};
|
||||
void Op (const Field &in, Field &out) { wrapped.HermOp(in,out); }
|
||||
void HermOp(const Field &in, Field &out) { wrapped.HermOp(in,out); }
|
||||
void AdjOp (const Field &in, Field &out){ wrapped.HermOp(in,out); }
|
||||
void OpDiag (const Field &in, Field &out) { assert(0); }
|
||||
void OpDir (const Field &in, Field &out,int dir,int disp) { assert(0); }
|
||||
void OpDirAll (const Field &in, std::vector<Field> &out) { assert(0); };
|
||||
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ assert(0); }
|
||||
};
|
||||
|
||||
template<class Field> class CGSmoother : public LinearFunction<Field>
|
||||
{
|
||||
public:
|
||||
using LinearFunction<Field>::operator();
|
||||
typedef LinearOperatorBase<Field> FineOperator;
|
||||
FineOperator & _SmootherOperator;
|
||||
int iters;
|
||||
CGSmoother(int _iters, FineOperator &SmootherOperator) :
|
||||
_SmootherOperator(SmootherOperator),
|
||||
iters(_iters)
|
||||
{
|
||||
std::cout << GridLogMessage<<" Mirs smoother order "<<iters<<std::endl;
|
||||
};
|
||||
void operator() (const Field &in, Field &out)
|
||||
{
|
||||
ConjugateGradient<Field> CG(0.0,iters,false); // non-converge is just fine in a smoother
|
||||
|
||||
out=Zero();
|
||||
|
||||
CG(_SmootherOperator,in,out);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
int main (int argc, char ** argv)
|
||||
{
|
||||
Grid_init(&argc,&argv);
|
||||
|
||||
const int Ls=24;
|
||||
const int nbasis = 62;
|
||||
const int cb = 0 ;
|
||||
RealD mass=0.00078;
|
||||
RealD M5=1.8;
|
||||
RealD b=1.5;
|
||||
RealD c=0.5;
|
||||
|
||||
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(),
|
||||
GridDefaultSimd(Nd,vComplex::Nsimd()),
|
||||
GridDefaultMpi());
|
||||
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
||||
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
|
||||
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
|
||||
|
||||
// Construct a coarsened grid with 4^4 cell
|
||||
Coordinate Block({4,4,6,4});
|
||||
Coordinate clatt = GridDefaultLatt();
|
||||
for(int d=0;d<clatt.size();d++){
|
||||
clatt[d] = clatt[d]/Block[d];
|
||||
}
|
||||
|
||||
//////////////////////////////////////////
|
||||
// Double precision grids
|
||||
//////////////////////////////////////////
|
||||
GridCartesian *Coarse4d = SpaceTimeGrid::makeFourDimGrid(clatt,
|
||||
GridDefaultSimd(Nd,vComplex::Nsimd()),
|
||||
GridDefaultMpi());;
|
||||
GridCartesian *Coarse5d = SpaceTimeGrid::makeFiveDimGrid(1,Coarse4d);
|
||||
|
||||
//////////////////////////////////////////
|
||||
// Single precision grids -- lanczos + smoother
|
||||
//////////////////////////////////////////
|
||||
GridCartesian * UGridF = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(),
|
||||
GridDefaultSimd(Nd,vComplexF::Nsimd()),
|
||||
GridDefaultMpi());
|
||||
GridRedBlackCartesian * UrbGridF = SpaceTimeGrid::makeFourDimRedBlackGrid(UGridF);
|
||||
GridCartesian * FGridF = SpaceTimeGrid::makeFiveDimGrid(Ls,UGridF);
|
||||
GridRedBlackCartesian * FrbGridF = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGridF);
|
||||
///////////////////////// RNGs /////////////////////////////////
|
||||
std::vector<int> seeds4({1,2,3,4});
|
||||
std::vector<int> seeds5({5,6,7,8});
|
||||
std::vector<int> cseeds({5,6,7,8});
|
||||
|
||||
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
|
||||
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
|
||||
GridParallelRNG CRNG(Coarse5d);CRNG.SeedFixedIntegers(cseeds);
|
||||
|
||||
///////////////////////// Configuration /////////////////////////////////
|
||||
LatticeGaugeField Umu(UGrid);
|
||||
|
||||
FieldMetaData header;
|
||||
std::string file("ckpoint_lat.1000");
|
||||
NerscIO::readConfiguration(Umu,header,file);
|
||||
|
||||
//////////////////////// Fermion action //////////////////////////////////
|
||||
MobiusFermionD Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,b,c);
|
||||
|
||||
SchurDiagMooeeOperator<MobiusFermionD, LatticeFermion> HermOpEO(Ddwf);
|
||||
|
||||
const int Fine_Nstop = 200;
|
||||
const int Fine_Nk = 100;
|
||||
const int Fine_Np = 100;
|
||||
const int Fine_Nm = Fine_Nk+Fine_Np;
|
||||
|
||||
typedef LatticeFermion FermionField;
|
||||
std::vector<RealD> Fine_eval;
|
||||
std::vector<FermionField> Fine_evec;
|
||||
|
||||
LatticeFermionF conv_tmp(FrbGridF);
|
||||
Fine_eval.resize(Fine_Nstop);
|
||||
Fine_evec.resize(Fine_Nstop,FrbGrid);
|
||||
std::string evec_file("/lustre/orion/phy157/proj-shared/phy157_dwf/paboyle/Subspace.phys48.evecF");
|
||||
LoadFineEvecs(Fine_evec,evec_file,conv_tmp);
|
||||
|
||||
typedef HermOpAdaptor<LatticeFermionD> HermFineMatrix;
|
||||
HermFineMatrix FineHermOp(HermOpEO);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
///////////// Coarse basis and Little Dirac Operator ///////
|
||||
////////////////////////////////////////////////////////////
|
||||
typedef GeneralCoarsenedMatrix<vSpinColourVector,vTComplex,nbasis> LittleDiracOperator;
|
||||
typedef LittleDiracOperator::CoarseVector CoarseVector;
|
||||
|
||||
NextToNextToNextToNearestStencilGeometry5D geom(Coarse5d);
|
||||
|
||||
typedef Aggregation<vSpinColourVector,vTComplex,nbasis> Subspace;
|
||||
Subspace Aggregates(Coarse5d,FrbGrid,cb);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
// Need to check about red-black grid coarsening
|
||||
////////////////////////////////////////////////////////////
|
||||
// std::string subspace_file("/lustre/orion/phy157/proj-shared/phy157_dwf/paboyle/Subspace.phys48.mixed.2500.60");
|
||||
// // std::string subspace_file("/lustre/orion/phy157/proj-shared/phy157_dwf/paboyle/Subspace.phys48.new.62");
|
||||
// std::string refine_file("/lustre/orion/phy157/proj-shared/phy157_dwf/paboyle/Subspace.phys48.evec");
|
||||
std::string refine_file("/lustre/orion/phy157/proj-shared/phy157_dwf/paboyle/Refine.phys48.mixed.2500.60");
|
||||
// std::string ldop_file("/lustre/orion/phy157/proj-shared/phy157_dwf/paboyle/LittleDiracOp.phys48.mixed.60");
|
||||
// std::string evec_file("/lustre/orion/phy157/proj-shared/phy157_dwf/paboyle/evecs.scidac");
|
||||
// std::string eval_file("/lustre/orion/phy157/proj-shared/phy157_dwf/paboyle/eval.xml");
|
||||
bool load_agg=true;
|
||||
bool load_refine=true;
|
||||
|
||||
//////////////////////////////////////////
|
||||
// Block projector for coarse/fine
|
||||
//////////////////////////////////////////
|
||||
MultiRHSBlockProject<LatticeFermionD> MrhsProjector;
|
||||
|
||||
|
||||
/////////////////////////////////////////////////
|
||||
// Mirs smoother
|
||||
/////////////////////////////////////////////////
|
||||
int ord=8;
|
||||
RealD lo=2.0;
|
||||
RealD MirsShift = lo;
|
||||
ShiftedHermOpLinearOperator<LatticeFermionD> ShiftedFineHermOp(HermOpEO,MirsShift);
|
||||
CGSmoother<LatticeFermionD> CGsmooth(ord,ShiftedFineHermOp) ;
|
||||
|
||||
LoadBasis(Aggregates,refine_file);
|
||||
Aggregates.Orthogonalise();
|
||||
|
||||
std::cout << "**************************************"<<std::endl;
|
||||
std::cout << " Using filtered subspace"<<std::endl;
|
||||
std::cout << "**************************************"<<std::endl;
|
||||
MrhsProjector.Allocate(nbasis,FrbGrid,Coarse5d);
|
||||
MrhsProjector.ImportBasis(Aggregates.subspace);
|
||||
|
||||
FermionField Ftmp(FrbGrid);
|
||||
std::vector<FermionField> Fine_ev(1,FrbGrid);
|
||||
std::vector<FermionField> Fine_ev_compressed(1,FrbGrid);
|
||||
std::vector<CoarseVector> c_evec(1,Coarse5d);
|
||||
for(int ev=0;ev<Fine_evec.size();ev++){
|
||||
Fine_ev[0] = Fine_evec[ev];
|
||||
MrhsProjector.blockProject(Fine_ev,c_evec);
|
||||
MrhsProjector.blockPromote(Fine_ev_compressed,c_evec);
|
||||
Ftmp = Fine_ev_compressed[0];
|
||||
RealD div = 1.0/ sqrt(norm2(Ftmp));
|
||||
Ftmp = Ftmp * div;
|
||||
std::cout << GridLogMessage<<" "<<ev<<" uncomp "<< norm2(Fine_ev[0]) <<std::endl;
|
||||
std::cout << GridLogMessage<<" "<<ev<<" comp "<< norm2(Ftmp) <<std::endl;
|
||||
Ftmp = Fine_ev[0] - Ftmp;
|
||||
std::cout << GridLogMessage<<" "<<ev<<" diff "<< norm2(Ftmp) <<std::endl;
|
||||
CGsmooth(Fine_ev_compressed[0],Ftmp);
|
||||
Ftmp = Ftmp *lo;
|
||||
std::cout << GridLogMessage<<" "<<ev<<" smoothed "<< norm2(Ftmp) <<std::endl;
|
||||
div = 1.0/ sqrt(norm2(Ftmp));
|
||||
Ftmp=Ftmp*div;
|
||||
Ftmp = Fine_ev[0]-Ftmp;
|
||||
std::cout << GridLogMessage<<" "<<ev<<" diff "<< norm2(Ftmp) <<std::endl;
|
||||
}
|
||||
|
||||
std::cout << "**************************************"<<std::endl;
|
||||
std::cout << " Using eigenvector subspace "<<std::endl;
|
||||
std::cout << "**************************************"<<std::endl;
|
||||
for(int i=0;i<Aggregates.subspace.size();i++){
|
||||
Aggregates.subspace[i] = Fine_evec[i];
|
||||
}
|
||||
Aggregates.Orthogonalise();
|
||||
MrhsProjector.ImportBasis(Aggregates.subspace);
|
||||
for(int ev=0;ev<Fine_evec.size();ev++){
|
||||
Fine_ev[0] = Fine_evec[ev];
|
||||
MrhsProjector.blockProject(Fine_ev,c_evec);
|
||||
MrhsProjector.blockPromote(Fine_ev_compressed,c_evec);
|
||||
Ftmp = Fine_ev_compressed[0];
|
||||
RealD div = 1.0/ sqrt(norm2(Ftmp));
|
||||
Ftmp = Ftmp * div;
|
||||
std::cout << GridLogMessage<<" "<<ev<<" uncomp "<< norm2(Fine_ev[0]) <<std::endl;
|
||||
std::cout << GridLogMessage<<" "<<ev<<" comp "<< norm2(Ftmp) <<std::endl;
|
||||
Ftmp = Fine_ev[0] - Ftmp;
|
||||
std::cout << GridLogMessage<<" "<<ev<<" diff "<< norm2(Ftmp) <<std::endl;
|
||||
CGsmooth(Fine_ev_compressed[0],Ftmp);
|
||||
Ftmp = Ftmp *lo;
|
||||
std::cout << GridLogMessage<<" "<<ev<<" smoothed "<< norm2(Ftmp) <<std::endl;
|
||||
div = 1.0/ sqrt(norm2(Ftmp));
|
||||
Ftmp=Ftmp*div;
|
||||
Ftmp = Fine_ev[0]-Ftmp;
|
||||
std::cout << GridLogMessage<<" "<<ev<<" diff "<< norm2(Ftmp) <<std::endl;
|
||||
}
|
||||
|
||||
// Standard CG
|
||||
Grid_finalize();
|
||||
return 0;
|
||||
}
|
@ -36,28 +36,6 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
using namespace std;
|
||||
using namespace Grid;
|
||||
|
||||
template<class Field>
|
||||
class HermOpAdaptor : public LinearOperatorBase<Field>
|
||||
{
|
||||
LinearOperatorBase<Field> & wrapped;
|
||||
public:
|
||||
HermOpAdaptor(LinearOperatorBase<Field> &wrapme) : wrapped(wrapme) {};
|
||||
void OpDiag (const Field &in, Field &out) { assert(0); }
|
||||
void OpDir (const Field &in, Field &out,int dir,int disp) { assert(0); }
|
||||
void OpDirAll (const Field &in, std::vector<Field> &out){ assert(0); };
|
||||
void Op (const Field &in, Field &out){
|
||||
wrapped.HermOp(in,out);
|
||||
}
|
||||
void AdjOp (const Field &in, Field &out){
|
||||
wrapped.HermOp(in,out);
|
||||
}
|
||||
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ assert(0); }
|
||||
void HermOp(const Field &in, Field &out){
|
||||
wrapped.HermOp(in,out);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
template<class Matrix,class Field>
|
||||
class PVdagMLinearOperator : public LinearOperatorBase<Field> {
|
||||
Matrix &_Mat;
|
||||
@ -69,78 +47,169 @@ public:
|
||||
void OpDir (const Field &in, Field &out,int dir,int disp) { assert(0); }
|
||||
void OpDirAll (const Field &in, std::vector<Field> &out){ assert(0); };
|
||||
void Op (const Field &in, Field &out){
|
||||
// std::cout << "Op: PVdag M "<<std::endl;
|
||||
Field tmp(in.Grid());
|
||||
_Mat.M(in,tmp);
|
||||
_PV.Mdag(tmp,out);
|
||||
}
|
||||
void AdjOp (const Field &in, Field &out){
|
||||
// std::cout << "AdjOp: Mdag PV "<<std::endl;
|
||||
Field tmp(in.Grid());
|
||||
_PV.M(tmp,out);
|
||||
_Mat.Mdag(in,tmp);
|
||||
_PV.M(in,tmp);
|
||||
_Mat.Mdag(tmp,out);
|
||||
}
|
||||
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ assert(0); }
|
||||
void HermOp(const Field &in, Field &out){
|
||||
std::cout << "HermOp"<<std::endl;
|
||||
// std::cout << "HermOp: Mdag PV PVdag M"<<std::endl;
|
||||
Field tmp(in.Grid());
|
||||
// _Mat.M(in,tmp);
|
||||
// _PV.Mdag(tmp,out);
|
||||
// _PV.M(out,tmp);
|
||||
// _Mat.Mdag(tmp,out);
|
||||
Op(in,tmp);
|
||||
AdjOp(tmp,out);
|
||||
// std::cout << "HermOp done "<<norm2(out)<<std::endl;
|
||||
}
|
||||
};
|
||||
template<class Matrix,class Field>
|
||||
class ShiftedPVdagMLinearOperator : public LinearOperatorBase<Field> {
|
||||
Matrix &_Mat;
|
||||
Matrix &_PV;
|
||||
RealD shift;
|
||||
public:
|
||||
ShiftedPVdagMLinearOperator(RealD _shift,Matrix &Mat,Matrix &PV): shift(_shift),_Mat(Mat),_PV(PV){};
|
||||
|
||||
void OpDiag (const Field &in, Field &out) { assert(0); }
|
||||
void OpDir (const Field &in, Field &out,int dir,int disp) { assert(0); }
|
||||
void OpDirAll (const Field &in, std::vector<Field> &out){ assert(0); };
|
||||
void Op (const Field &in, Field &out){
|
||||
// std::cout << "Op: PVdag M "<<std::endl;
|
||||
Field tmp(in.Grid());
|
||||
_Mat.M(in,tmp);
|
||||
_PV.Mdag(tmp,out);
|
||||
_PV.M(out,tmp);
|
||||
_Mat.Mdag(tmp,out);
|
||||
std::cout << "HermOp done "<<norm2(out)<<std::endl;
|
||||
|
||||
out = out + shift * in;
|
||||
}
|
||||
};
|
||||
|
||||
template<class Field> class DumbOperator : public LinearOperatorBase<Field> {
|
||||
public:
|
||||
LatticeComplex scale;
|
||||
DumbOperator(GridBase *grid) : scale(grid)
|
||||
{
|
||||
scale = 0.0;
|
||||
LatticeComplex scalesft(grid);
|
||||
LatticeComplex scaletmp(grid);
|
||||
for(int d=0;d<4;d++){
|
||||
Lattice<iScalar<vInteger> > x(grid); LatticeCoordinate(x,d+1);
|
||||
LatticeCoordinate(scaletmp,d+1);
|
||||
scalesft = Cshift(scaletmp,d+1,1);
|
||||
scale = 100.0*scale + where( mod(x ,2)==(Integer)0, scalesft,scaletmp);
|
||||
}
|
||||
std::cout << " scale\n" << scale << std::endl;
|
||||
}
|
||||
// Support for coarsening to a multigrid
|
||||
void OpDiag (const Field &in, Field &out) {};
|
||||
void OpDir (const Field &in, Field &out,int dir,int disp){};
|
||||
void OpDirAll (const Field &in, std::vector<Field> &out) {};
|
||||
|
||||
void Op (const Field &in, Field &out){
|
||||
out = scale * in;
|
||||
}
|
||||
void AdjOp (const Field &in, Field &out){
|
||||
out = scale * in;
|
||||
void AdjOp (const Field &in, Field &out){
|
||||
// std::cout << "AdjOp: Mdag PV "<<std::endl;
|
||||
Field tmp(in.Grid());
|
||||
_PV.M(tmp,out);
|
||||
_Mat.Mdag(in,tmp);
|
||||
out = out + shift * in;
|
||||
}
|
||||
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ assert(0); }
|
||||
void HermOp(const Field &in, Field &out){
|
||||
double n1, n2;
|
||||
HermOpAndNorm(in,out,n1,n2);
|
||||
}
|
||||
void HermOpAndNorm(const Field &in, Field &out,double &n1,double &n2){
|
||||
ComplexD dot;
|
||||
|
||||
out = scale * in;
|
||||
|
||||
dot= innerProduct(in,out);
|
||||
n1=real(dot);
|
||||
|
||||
dot = innerProduct(out,out);
|
||||
n2=real(dot);
|
||||
// std::cout << "HermOp: Mdag PV PVdag M"<<std::endl;
|
||||
Field tmp(in.Grid());
|
||||
Op(in,tmp);
|
||||
AdjOp(tmp,out);
|
||||
}
|
||||
};
|
||||
template<class Fobj,class CComplex,int nbasis>
|
||||
class MGPreconditioner : public LinearFunction< Lattice<Fobj> > {
|
||||
public:
|
||||
using LinearFunction<Lattice<Fobj> >::operator();
|
||||
|
||||
typedef Aggregation<Fobj,CComplex,nbasis> Aggregates;
|
||||
typedef typename Aggregation<Fobj,CComplex,nbasis>::FineField FineField;
|
||||
typedef typename Aggregation<Fobj,CComplex,nbasis>::CoarseVector CoarseVector;
|
||||
typedef typename Aggregation<Fobj,CComplex,nbasis>::CoarseMatrix CoarseMatrix;
|
||||
typedef LinearOperatorBase<FineField> FineOperator;
|
||||
typedef LinearFunction <FineField> FineSmoother;
|
||||
typedef LinearOperatorBase<CoarseVector> CoarseOperator;
|
||||
typedef LinearFunction <CoarseVector> CoarseSolver;
|
||||
Aggregates & _Aggregates;
|
||||
FineOperator & _FineOperator;
|
||||
FineSmoother & _PreSmoother;
|
||||
FineSmoother & _PostSmoother;
|
||||
CoarseOperator & _CoarseOperator;
|
||||
CoarseSolver & _CoarseSolve;
|
||||
|
||||
int level; void Level(int lv) {level = lv; };
|
||||
|
||||
MGPreconditioner(Aggregates &Agg,
|
||||
FineOperator &Fine,
|
||||
FineSmoother &PreSmoother,
|
||||
FineSmoother &PostSmoother,
|
||||
CoarseOperator &CoarseOperator_,
|
||||
CoarseSolver &CoarseSolve_)
|
||||
: _Aggregates(Agg),
|
||||
_FineOperator(Fine),
|
||||
_PreSmoother(PreSmoother),
|
||||
_PostSmoother(PostSmoother),
|
||||
_CoarseOperator(CoarseOperator_),
|
||||
_CoarseSolve(CoarseSolve_),
|
||||
level(1) { }
|
||||
|
||||
virtual void operator()(const FineField &in, FineField & out)
|
||||
{
|
||||
GridBase *CoarseGrid = _Aggregates.CoarseGrid;
|
||||
// auto CoarseGrid = _CoarseOperator.Grid();
|
||||
CoarseVector Csrc(CoarseGrid);
|
||||
CoarseVector Csol(CoarseGrid);
|
||||
FineField vec1(in.Grid());
|
||||
FineField vec2(in.Grid());
|
||||
|
||||
std::cout<<GridLogMessage << "Calling PreSmoother " <<std::endl;
|
||||
|
||||
// std::cout<<GridLogMessage << "Calling PreSmoother input residual "<<norm2(in) <<std::endl;
|
||||
double t;
|
||||
// Fine Smoother
|
||||
// out = in;
|
||||
out = Zero();
|
||||
t=-usecond();
|
||||
_PreSmoother(in,out);
|
||||
t+=usecond();
|
||||
|
||||
std::cout<<GridLogMessage << "PreSmoother took "<< t/1000.0<< "ms" <<std::endl;
|
||||
|
||||
// Update the residual
|
||||
_FineOperator.Op(out,vec1); sub(vec1, in ,vec1);
|
||||
// std::cout<<GridLogMessage <<"Residual-1 now " <<norm2(vec1)<<std::endl;
|
||||
|
||||
// Fine to Coarse
|
||||
t=-usecond();
|
||||
_Aggregates.ProjectToSubspace (Csrc,vec1);
|
||||
t+=usecond();
|
||||
std::cout<<GridLogMessage << "Project to coarse took "<< t/1000.0<< "ms" <<std::endl;
|
||||
|
||||
// Coarse correction
|
||||
t=-usecond();
|
||||
Csol = Zero();
|
||||
_CoarseSolve(Csrc,Csol);
|
||||
//Csol=Zero();
|
||||
t+=usecond();
|
||||
std::cout<<GridLogMessage << "Coarse solve took "<< t/1000.0<< "ms" <<std::endl;
|
||||
|
||||
// Coarse to Fine
|
||||
t=-usecond();
|
||||
// _CoarseOperator.PromoteFromSubspace(_Aggregates,Csol,vec1);
|
||||
_Aggregates.PromoteFromSubspace(Csol,vec1);
|
||||
add(out,out,vec1);
|
||||
t+=usecond();
|
||||
std::cout<<GridLogMessage << "Promote to this level took "<< t/1000.0<< "ms" <<std::endl;
|
||||
|
||||
// Residual
|
||||
_FineOperator.Op(out,vec1); sub(vec1 ,in , vec1);
|
||||
// std::cout<<GridLogMessage <<"Residual-2 now " <<norm2(vec1)<<std::endl;
|
||||
|
||||
// Fine Smoother
|
||||
t=-usecond();
|
||||
// vec2=vec1;
|
||||
vec2=Zero();
|
||||
_PostSmoother(vec1,vec2);
|
||||
t+=usecond();
|
||||
std::cout<<GridLogMessage << "PostSmoother took "<< t/1000.0<< "ms" <<std::endl;
|
||||
|
||||
add( out,out,vec2);
|
||||
std::cout<<GridLogMessage << "Done " <<std::endl;
|
||||
}
|
||||
};
|
||||
|
||||
int main (int argc, char ** argv)
|
||||
{
|
||||
Grid_init(&argc,&argv);
|
||||
|
||||
const int Ls=2;
|
||||
const int Ls=16;
|
||||
|
||||
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
|
||||
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
||||
@ -151,7 +220,8 @@ int main (int argc, char ** argv)
|
||||
// Construct a coarsened grid
|
||||
Coordinate clatt = GridDefaultLatt();
|
||||
for(int d=0;d<clatt.size();d++){
|
||||
clatt[d] = clatt[d]/4;
|
||||
clatt[d] = clatt[d]/2;
|
||||
// clatt[d] = clatt[d]/4;
|
||||
}
|
||||
GridCartesian *Coarse4d = SpaceTimeGrid::makeFourDimGrid(clatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());;
|
||||
GridCartesian *Coarse5d = SpaceTimeGrid::makeFiveDimGrid(1,Coarse4d);
|
||||
@ -173,15 +243,14 @@ int main (int argc, char ** argv)
|
||||
FieldMetaData header;
|
||||
std::string file("ckpoint_lat.4000");
|
||||
NerscIO::readConfiguration(Umu,header,file);
|
||||
//Umu = 1.0;
|
||||
|
||||
RealD mass=0.5;
|
||||
RealD mass=0.01;
|
||||
RealD M5=1.8;
|
||||
|
||||
DomainWallFermionD Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
|
||||
DomainWallFermionD Dpv(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,1.0,M5);
|
||||
|
||||
const int nbasis = 1;
|
||||
const int nbasis = 20;
|
||||
const int cb = 0 ;
|
||||
LatticeFermion prom(FGrid);
|
||||
|
||||
@ -193,25 +262,51 @@ int main (int argc, char ** argv)
|
||||
std::cout<<GridLogMessage<<std::endl;
|
||||
std::cout<<GridLogMessage<<"*******************************************"<<std::endl;
|
||||
std::cout<<GridLogMessage<<std::endl;
|
||||
|
||||
PVdagMLinearOperator<DomainWallFermionD,LatticeFermionD> PVdagM(Ddwf,Dpv);
|
||||
HermOpAdaptor<LatticeFermionD> HOA(PVdagM);
|
||||
|
||||
typedef PVdagMLinearOperator<DomainWallFermionD,LatticeFermionD> PVdagM_t;
|
||||
typedef ShiftedPVdagMLinearOperator<DomainWallFermionD,LatticeFermionD> ShiftedPVdagM_t;
|
||||
PVdagM_t PVdagM(Ddwf,Dpv);
|
||||
// ShiftedPVdagM_t ShiftedPVdagM(2.0,Ddwf,Dpv); // 355
|
||||
// ShiftedPVdagM_t ShiftedPVdagM(1.0,Ddwf,Dpv); // 246
|
||||
// ShiftedPVdagM_t ShiftedPVdagM(0.5,Ddwf,Dpv); // 183
|
||||
// ShiftedPVdagM_t ShiftedPVdagM(0.25,Ddwf,Dpv); // 145
|
||||
// ShiftedPVdagM_t ShiftedPVdagM(0.1,Ddwf,Dpv); // 134
|
||||
// ShiftedPVdagM_t ShiftedPVdagM(0.1,Ddwf,Dpv); // 127 -- NULL space via inverse iteration
|
||||
// ShiftedPVdagM_t ShiftedPVdagM(0.1,Ddwf,Dpv); // 57 -- NULL space via inverse iteration; 3 iterations
|
||||
// ShiftedPVdagM_t ShiftedPVdagM(0.25,Ddwf,Dpv); // 57 , tighter inversion
|
||||
// ShiftedPVdagM_t ShiftedPVdagM(0.25,Ddwf,Dpv); // nbasis 20 -- 49 iters
|
||||
// ShiftedPVdagM_t ShiftedPVdagM(0.25,Ddwf,Dpv); // nbasis 20 -- 70 iters; asymmetric
|
||||
// ShiftedPVdagM_t ShiftedPVdagM(0.25,Ddwf,Dpv); // 58; Loosen coarse, tighten fine
|
||||
// ShiftedPVdagM_t ShiftedPVdagM(0.1,Ddwf,Dpv); // 56 ...
|
||||
// ShiftedPVdagM_t ShiftedPVdagM(0.1,Ddwf,Dpv); // 51 ... with 24 vecs
|
||||
// ShiftedPVdagM_t ShiftedPVdagM(0.1,Ddwf,Dpv); // 31 ... with 24 vecs and 2^4 blocking
|
||||
// ShiftedPVdagM_t ShiftedPVdagM(0.1,Ddwf,Dpv); // 43 ... with 16 vecs and 2^4 blocking, sloppier
|
||||
// ShiftedPVdagM_t ShiftedPVdagM(0.1,Ddwf,Dpv); // 35 ... with 20 vecs and 2^4 blocking
|
||||
// ShiftedPVdagM_t ShiftedPVdagM(0.1,Ddwf,Dpv); // 35 ... with 20 vecs and 2^4 blocking, looser coarse
|
||||
// ShiftedPVdagM_t ShiftedPVdagM(0.1,Ddwf,Dpv); // 64 ... with 20 vecs, Christoph setup, and 2^4 blocking, looser coarse
|
||||
ShiftedPVdagM_t ShiftedPVdagM(0.01,Ddwf,Dpv); //
|
||||
|
||||
|
||||
// Run power method on HOA??
|
||||
PowerMethod<LatticeFermion> PM; PM(HOA,src);
|
||||
// PowerMethod<LatticeFermion> PM; PM(PVdagM,src);
|
||||
|
||||
// Warning: This routine calls PVdagM.Op, not PVdagM.HermOp
|
||||
typedef Aggregation<vSpinColourVector,vTComplex,nbasis> Subspace;
|
||||
Subspace AggregatesPD(Coarse5d,FGrid,cb);
|
||||
/*
|
||||
AggregatesPD.CreateSubspaceChebyshev(RNG5,
|
||||
HOA,
|
||||
PVdagM,
|
||||
nbasis,
|
||||
5000.0,
|
||||
0.02,
|
||||
100,
|
||||
50,
|
||||
50,
|
||||
4000.0,
|
||||
2.0,
|
||||
200,
|
||||
200,
|
||||
200,
|
||||
0.0);
|
||||
*/
|
||||
AggregatesPD.CreateSubspaceGCR(RNG5,
|
||||
PVdagM,
|
||||
nbasis);
|
||||
|
||||
LittleDiracOperator LittleDiracOpPV(geom,FGrid,Coarse5d);
|
||||
LittleDiracOpPV.CoarsenOperator(PVdagM,AggregatesPD);
|
||||
@ -257,6 +352,60 @@ int main (int argc, char ** argv)
|
||||
std::cout<<GridLogMessage<<" ldop error: "<<norm2(c_proj)<<std::endl;
|
||||
// std::cout<<GridLogMessage<<" error "<< c_proj<<std::endl;
|
||||
|
||||
|
||||
/**********
|
||||
* Some solvers
|
||||
**********
|
||||
*/
|
||||
|
||||
///////////////////////////////////////
|
||||
// Coarse grid solver test
|
||||
///////////////////////////////////////
|
||||
|
||||
std::cout<<GridLogMessage<<"******************* "<<std::endl;
|
||||
std::cout<<GridLogMessage<<" Coarse Grid Solve -- Level 3 "<<std::endl;
|
||||
std::cout<<GridLogMessage<<"******************* "<<std::endl;
|
||||
TrivialPrecon<CoarseVector> simple;
|
||||
NonHermitianLinearOperator<LittleDiracOperator,CoarseVector> LinOpCoarse(LittleDiracOpPV);
|
||||
// PrecGeneralisedConjugateResidualNonHermitian<CoarseVector> L2PGCR(1.0e-4, 100, LinOpCoarse,simple,10,10);
|
||||
PrecGeneralisedConjugateResidualNonHermitian<CoarseVector> L2PGCR(3.0e-2, 100, LinOpCoarse,simple,10,10);
|
||||
L2PGCR.Level(3);
|
||||
c_res=Zero();
|
||||
L2PGCR(c_src,c_res);
|
||||
|
||||
////////////////////////////////////////
|
||||
// Fine grid smoother
|
||||
////////////////////////////////////////
|
||||
std::cout<<GridLogMessage<<"******************* "<<std::endl;
|
||||
std::cout<<GridLogMessage<<" Fine Grid Smoother -- Level 2 "<<std::endl;
|
||||
std::cout<<GridLogMessage<<"******************* "<<std::endl;
|
||||
TrivialPrecon<LatticeFermionD> simple_fine;
|
||||
// NonHermitianLinearOperator<PVdagM_t,LatticeFermionD> LinOpSmooth(PVdagM);
|
||||
PrecGeneralisedConjugateResidualNonHermitian<LatticeFermionD> SmootherGCR(0.01,1,ShiftedPVdagM,simple_fine,16,16);
|
||||
SmootherGCR.Level(2);
|
||||
|
||||
LatticeFermionD f_src(FGrid);
|
||||
LatticeFermionD f_res(FGrid);
|
||||
|
||||
f_src = one; // 1 in every element for vector 1.
|
||||
f_res=Zero();
|
||||
SmootherGCR(f_src,f_res);
|
||||
|
||||
typedef MGPreconditioner<vSpinColourVector, vTComplex,nbasis> TwoLevelMG;
|
||||
|
||||
TwoLevelMG TwoLevelPrecon(AggregatesPD,
|
||||
PVdagM,
|
||||
simple_fine,
|
||||
SmootherGCR,
|
||||
LinOpCoarse,
|
||||
L2PGCR);
|
||||
|
||||
PrecGeneralisedConjugateResidualNonHermitian<LatticeFermion> L1PGCR(1.0e-8,1000,PVdagM,TwoLevelPrecon,16,16);
|
||||
L1PGCR.Level(1);
|
||||
|
||||
f_res=Zero();
|
||||
L1PGCR(f_src,f_res);
|
||||
|
||||
std::cout<<GridLogMessage<<std::endl;
|
||||
std::cout<<GridLogMessage<<"*******************************************"<<std::endl;
|
||||
std::cout<<GridLogMessage<<std::endl;
|
||||
|
14
tests/lanczos/LanParams.xml
Normal file
14
tests/lanczos/LanParams.xml
Normal file
@ -0,0 +1,14 @@
|
||||
<?xml version="1.0"?>
|
||||
<grid>
|
||||
<LanczosParameters>
|
||||
<mass>0.00107</mass>
|
||||
<M5>1.8</M5>
|
||||
<Ls>48</Ls>
|
||||
<Nstop>10</Nstop>
|
||||
<Nk>15</Nk>
|
||||
<Np>85</Np>
|
||||
<ChebyLow>0.003</ChebyLow>
|
||||
<ChebyHigh>60</ChebyHigh>
|
||||
<ChebyOrder>201</ChebyOrder>
|
||||
</LanczosParameters>
|
||||
</grid>
|
346
tests/lanczos/Test_dwf_G5R5.cc
Normal file
346
tests/lanczos/Test_dwf_G5R5.cc
Normal file
@ -0,0 +1,346 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./tests/Test_dwf_G5R5.cc
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Chulwoo Jung <chulwoo@bnl.gov>
|
||||
From Duo and Bob's Chirality study
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#include <Grid/Grid.h>
|
||||
|
||||
using namespace std;
|
||||
using namespace Grid;
|
||||
;
|
||||
|
||||
//typedef WilsonFermionD FermionOp;
|
||||
typedef DomainWallFermionD FermionOp;
|
||||
typedef typename DomainWallFermionD::FermionField FermionField;
|
||||
|
||||
|
||||
RealD AllZero(RealD x) { return 0.; }
|
||||
|
||||
namespace Grid {
|
||||
|
||||
struct LanczosParameters: Serializable {
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(LanczosParameters,
|
||||
RealD, mass ,
|
||||
RealD, M5 ,
|
||||
Integer, Ls,
|
||||
Integer, Nstop,
|
||||
Integer, Nk,
|
||||
Integer, Np,
|
||||
RealD, ChebyLow,
|
||||
RealD, ChebyHigh,
|
||||
Integer, ChebyOrder)
|
||||
// Integer, StartTrajectory,
|
||||
// Integer, Trajectories, /* @brief Number of sweeps in this run */
|
||||
// bool, MetropolisTest,
|
||||
// Integer, NoMetropolisUntil,
|
||||
// std::string, StartingType,
|
||||
// Integer, SW,
|
||||
// RealD, Kappa,
|
||||
// IntegratorParameters, MD)
|
||||
|
||||
LanczosParameters() {
|
||||
////////////////////////////// Default values
|
||||
mass = 0;
|
||||
// MetropolisTest = true;
|
||||
// NoMetropolisUntil = 10;
|
||||
// StartTrajectory = 0;
|
||||
// SW = 2;
|
||||
// Trajectories = 10;
|
||||
// StartingType = "HotStart";
|
||||
/////////////////////////////////
|
||||
}
|
||||
|
||||
template <class ReaderClass >
|
||||
LanczosParameters(Reader<ReaderClass> & TheReader){
|
||||
initialize(TheReader);
|
||||
}
|
||||
|
||||
template < class ReaderClass >
|
||||
void initialize(Reader<ReaderClass> &TheReader){
|
||||
// std::cout << GridLogMessage << "Reading HMC\n";
|
||||
read(TheReader, "HMC", *this);
|
||||
}
|
||||
|
||||
|
||||
void print_parameters() const {
|
||||
// std::cout << GridLogMessage << "[HMC parameters] Trajectories : " << Trajectories << "\n";
|
||||
// std::cout << GridLogMessage << "[HMC parameters] Start trajectory : " << StartTrajectory << "\n";
|
||||
// std::cout << GridLogMessage << "[HMC parameters] Metropolis test (on/off): " << std::boolalpha << MetropolisTest << "\n";
|
||||
// std::cout << GridLogMessage << "[HMC parameters] Thermalization trajs : " << NoMetropolisUntil << "\n";
|
||||
// std::cout << GridLogMessage << "[HMC parameters] Starting type : " << StartingType << "\n";
|
||||
// MD.print_parameters();
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
Grid_init(&argc, &argv);
|
||||
|
||||
LanczosParameters LanParams;
|
||||
#if 1
|
||||
{
|
||||
XmlReader HMCrd("LanParams.xml");
|
||||
read(HMCrd,"LanczosParameters",LanParams);
|
||||
}
|
||||
#else
|
||||
{
|
||||
LanParams.mass = mass;
|
||||
}
|
||||
#endif
|
||||
std::cout << GridLogMessage<< LanParams <<std::endl;
|
||||
{
|
||||
XmlWriter HMCwr("LanParams.xml.out");
|
||||
write(HMCwr,"LanczosParameters",LanParams);
|
||||
}
|
||||
|
||||
int Ls=16;
|
||||
RealD M5=1.8;
|
||||
RealD mass = -1.0;
|
||||
|
||||
mass=LanParams.mass;
|
||||
Ls=LanParams.Ls;
|
||||
M5=LanParams.M5;
|
||||
|
||||
GridCartesian* UGrid = SpaceTimeGrid::makeFourDimGrid(
|
||||
GridDefaultLatt(), GridDefaultSimd(Nd, vComplex::Nsimd()),
|
||||
GridDefaultMpi());
|
||||
GridRedBlackCartesian* UrbGrid =
|
||||
SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
||||
// GridCartesian* FGrid = UGrid;
|
||||
// GridRedBlackCartesian* FrbGrid = UrbGrid;
|
||||
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls, UGrid);
|
||||
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls, UGrid);
|
||||
// printf("UGrid=%p UrbGrid=%p FGrid=%p FrbGrid=%p\n", UGrid, UrbGrid, FGrid, FrbGrid);
|
||||
|
||||
std::vector<int> seeds4({1, 2, 3, 4});
|
||||
std::vector<int> seeds5({5, 6, 7, 8});
|
||||
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
|
||||
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
|
||||
GridParallelRNG RNG5rb(FrbGrid); RNG5.SeedFixedIntegers(seeds5);
|
||||
|
||||
LatticeGaugeField Umu(UGrid);
|
||||
|
||||
FieldMetaData header;
|
||||
std::string file("./config");
|
||||
|
||||
int precision32 = 0;
|
||||
int tworow = 0;
|
||||
NerscIO::readConfiguration(Umu,header,file);
|
||||
|
||||
/*
|
||||
std::vector<LatticeColourMatrix> U(4, UGrid);
|
||||
for (int mu = 0; mu < Nd; mu++) {
|
||||
U[mu] = PeekIndex<LorentzIndex>(Umu, mu);
|
||||
}
|
||||
*/
|
||||
|
||||
int Nstop = 10;
|
||||
int Nk = 20;
|
||||
int Np = 80;
|
||||
Nstop=LanParams.Nstop;
|
||||
Nk=LanParams.Nk;
|
||||
Np=LanParams.Np;
|
||||
|
||||
int Nm = Nk + Np;
|
||||
int MaxIt = 10000;
|
||||
RealD resid = 1.0e-5;
|
||||
|
||||
|
||||
//while ( mass > - 5.0){
|
||||
FermionOp Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
|
||||
MdagMLinearOperator<FermionOp,FermionField> HermOp(Ddwf); /// <-----
|
||||
// Gamma5HermitianLinearOperator <FermionOp,LatticeFermion> HermOp2(WilsonOperator); /// <-----
|
||||
Gamma5R5HermitianLinearOperator<FermionOp, LatticeFermion> G5R5Herm(Ddwf);
|
||||
// Gamma5R5HermitianLinearOperator
|
||||
std::vector<double> Coeffs{0, 1.};
|
||||
Polynomial<FermionField> PolyX(Coeffs);
|
||||
|
||||
Chebyshev<FermionField> Cheby(LanParams.ChebyLow,LanParams.ChebyHigh,LanParams.ChebyOrder);
|
||||
|
||||
FunctionHermOp<FermionField> OpCheby(Cheby,HermOp);
|
||||
PlainHermOp<FermionField> Op (HermOp);
|
||||
PlainHermOp<FermionField> Op2 (G5R5Herm);
|
||||
|
||||
ImplicitlyRestartedLanczos<FermionField> IRL(OpCheby, Op, Nstop, Nk, Nm, resid, MaxIt);
|
||||
|
||||
std::vector<RealD> eval(Nm);
|
||||
FermionField src(FGrid);
|
||||
gaussian(RNG5, src);
|
||||
std::vector<FermionField> evec(Nm, FGrid);
|
||||
for (int i = 0; i < 1; i++) {
|
||||
std::cout << i << " / " << Nm << " grid pointer " << evec[i].Grid()
|
||||
<< std::endl;
|
||||
};
|
||||
|
||||
int Nconv;
|
||||
IRL.calc(eval, evec, src, Nconv);
|
||||
|
||||
std::cout << mass <<" : " << eval << std::endl;
|
||||
|
||||
#if 0
|
||||
Gamma g5(Gamma::Algebra::Gamma5) ;
|
||||
ComplexD dot;
|
||||
FermionField tmp(FGrid);
|
||||
// RealD eMe,eMMe;
|
||||
for (int i = 0; i < Nstop ; i++) {
|
||||
// tmp = g5*evec[i];
|
||||
dot = innerProduct(evec[i],evec[i]);
|
||||
// G5R5(tmp,evec[i]);
|
||||
G5R5Herm.HermOpAndNorm(evec[i],tmp,eMe,eMMe);
|
||||
std::cout <<"Norm "<<M5<<" "<< mass << " : " << i << " " << real(dot) << " " << imag(dot) << " "<< eMe << " " <<eMMe<< std::endl ;
|
||||
for (int j = 0; j < Nstop ; j++) {
|
||||
dot = innerProduct(tmp,evec[j]);
|
||||
std::cout <<"G5R5 "<<M5<<" "<< mass << " : " << i << " " <<j<<" " << real(dot) << " " << imag(dot) << std::endl ;
|
||||
}
|
||||
}
|
||||
// src = evec[0]+evec[1]+evec[2];
|
||||
// mass += -0.1;
|
||||
#endif
|
||||
|
||||
//**********************************************************************
|
||||
//orthogonalization
|
||||
//calculat the matrix
|
||||
cout << "Start orthogonalization " << endl;
|
||||
cout << "calculate the matrix element" << endl;
|
||||
vector<LatticeFermion> G5R5Mevec(Nconv, FGrid);
|
||||
vector<LatticeFermion> finalevec(Nconv, FGrid);
|
||||
vector<RealD> eMe(Nconv), eMMe(Nconv);
|
||||
for(int i = 0; i < Nconv; i++){
|
||||
G5R5Herm.HermOpAndNorm(evec[i], G5R5Mevec[i], eMe[i], eMMe[i]);
|
||||
}
|
||||
cout << "Re<evec, G5R5M(evec)>: " << endl;
|
||||
cout << eMe << endl;
|
||||
cout << "<G5R5M(evec), G5R5M(evec)>" << endl;
|
||||
cout << eMMe << endl;
|
||||
vector<vector<ComplexD>> VevecG5R5Mevec(Nconv);
|
||||
Eigen::MatrixXcd evecG5R5Mevec = Eigen::MatrixXcd::Zero(Nconv, Nconv);
|
||||
for(int i = 0; i < Nconv; i++){
|
||||
VevecG5R5Mevec[i].resize(Nconv);
|
||||
for(int j = 0; j < Nconv; j++){
|
||||
VevecG5R5Mevec[i][j] = innerProduct(evec[i], G5R5Mevec[j]);
|
||||
evecG5R5Mevec(i, j) = VevecG5R5Mevec[i][j];
|
||||
}
|
||||
}
|
||||
//calculate eigenvector
|
||||
cout << "Eigen solver" << endl;
|
||||
Eigen::SelfAdjointEigenSolver<Eigen::MatrixXcd> eigensolver(evecG5R5Mevec);
|
||||
vector<RealD> eigeneval(Nconv);
|
||||
vector<vector<ComplexD>> eigenevec(Nconv);
|
||||
for(int i = 0; i < Nconv; i++){
|
||||
eigeneval[i] = eigensolver.eigenvalues()[i];
|
||||
eigenevec[i].resize(Nconv);
|
||||
for(int j = 0; j < Nconv; j++){
|
||||
eigenevec[i][j] = eigensolver.eigenvectors()(i, j);
|
||||
}
|
||||
}
|
||||
//rotation
|
||||
cout << "Do rotation" << endl;
|
||||
for(int i = 0; i < Nconv; i++){
|
||||
finalevec[i] = finalevec[i] - finalevec[i];
|
||||
for(int j = 0; j < Nconv; j++){
|
||||
finalevec[i] = eigenevec[j][i]*evec[j] + finalevec[i];
|
||||
}
|
||||
}
|
||||
//normalize again;
|
||||
for(int i = 0; i < Nconv; i++){
|
||||
RealD tmp_RealD = norm2(finalevec[i]);
|
||||
tmp_RealD = 1./pow(tmp_RealD, 0.5);
|
||||
finalevec[i] = finalevec[i]*tmp_RealD;
|
||||
}
|
||||
|
||||
//check
|
||||
for(int i = 0; i < Nconv; i++){
|
||||
G5R5Herm.HermOpAndNorm(finalevec[i], G5R5Mevec[i], eMe[i], eMMe[i]);
|
||||
}
|
||||
|
||||
//**********************************************************************
|
||||
//sort the eigenvectors
|
||||
vector<LatticeFermion> finalevec_copy(Nconv, FGrid);
|
||||
for(int i = 0; i < Nconv; i++){
|
||||
finalevec_copy[i] = finalevec[i];
|
||||
}
|
||||
vector<RealD> eMe_copy(eMe);
|
||||
for(int i = 0; i < Nconv; i++){
|
||||
eMe[i] = fabs(eMe[i]);
|
||||
eMe_copy[i] = eMe[i];
|
||||
}
|
||||
sort(eMe_copy.begin(), eMe_copy.end());
|
||||
for(int i = 0; i < Nconv; i++){
|
||||
for(int j = 0; j < Nconv; j++){
|
||||
if(eMe[j] == eMe_copy[i]){
|
||||
finalevec[i] = finalevec_copy[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
for(int i = 0; i < Nconv; i++){
|
||||
G5R5Herm.HermOpAndNorm(finalevec[i], G5R5Mevec[i], eMe[i], eMMe[i]);
|
||||
}
|
||||
cout << "Re<evec, G5R5M(evec)>: " << endl;
|
||||
cout << eMe << endl;
|
||||
cout << "<G5R5M(evec), G5R5M(evec)>" << endl;
|
||||
cout << eMMe << endl;
|
||||
|
||||
|
||||
// vector<LatticeFermion> finalevec(Nconv, FGrid);
|
||||
// temporary, until doing rotation
|
||||
// for(int i = 0; i < Nconv; i++)
|
||||
// finalevec[i]=evec[i];
|
||||
//**********************************************************************
|
||||
//calculate chirality matrix
|
||||
vector<LatticeFermion> G5evec(Nconv, FGrid);
|
||||
vector<vector<ComplexD>> chiral_matrix(Nconv);
|
||||
vector<vector<RealD>> chiral_matrix_real(Nconv);
|
||||
for(int i = 0; i < Nconv; i++){
|
||||
// G5evec[i] = G5evec[i] - G5evec[i];
|
||||
G5evec[i] = Zero();
|
||||
for(int j = 0; j < Ls/2; j++){
|
||||
axpby_ssp(G5evec[i], 1., finalevec[i], 0., G5evec[i], j, j);
|
||||
}
|
||||
for(int j = Ls/2; j < Ls; j++){
|
||||
axpby_ssp(G5evec[i], -1., finalevec[i], 0., G5evec[i], j, j);
|
||||
}
|
||||
}
|
||||
for(int i = 0; i < Nconv; i++){
|
||||
chiral_matrix_real[i].resize(Nconv);
|
||||
chiral_matrix[i].resize(Nconv);
|
||||
for(int j = 0; j < Nconv; j++){
|
||||
chiral_matrix[i][j] = innerProduct(finalevec[i], G5evec[j]);
|
||||
chiral_matrix_real[i][j] = abs(chiral_matrix[i][j]);
|
||||
std::cout <<" chiral_matrix_real "<<i<<" "<<j<<" "<< chiral_matrix_real[i][j] << std::endl;
|
||||
}
|
||||
}
|
||||
for(int i = 0; i < Nconv; i++){
|
||||
if(chiral_matrix[i][i].real() < 0.){
|
||||
chiral_matrix_real[i][i] = -1. * chiral_matrix_real[i][i];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Grid_finalize();
|
||||
}
|
278
tests/lanczos/Test_wilson_DWFKernel.cc
Normal file
278
tests/lanczos/Test_wilson_DWFKernel.cc
Normal file
@ -0,0 +1,278 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./tests/Test_dwf_lanczos.cc
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Chulwoo Jung <chulwoo@bnl.gov>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#include <Grid/Grid.h>
|
||||
|
||||
using namespace std;
|
||||
using namespace Grid;
|
||||
;
|
||||
|
||||
typedef WilsonFermionD FermionOp;
|
||||
typedef typename WilsonFermionD::FermionField FermionField;
|
||||
|
||||
|
||||
RealD AllZero(RealD x) { return 0.; }
|
||||
|
||||
namespace Grid {
|
||||
|
||||
#if 0
|
||||
template<typename Field>
|
||||
class RationalHermOp : public LinearFunction<Field> {
|
||||
public:
|
||||
using LinearFunction<Field>::operator();
|
||||
// OperatorFunction<Field> & _poly;
|
||||
LinearOperatorBase<Field> &_Linop;
|
||||
RealD _massDen, _massNum;
|
||||
|
||||
FunctionHermOp(LinearOperatorBase<Field>& linop, RealD massDen,RealD massNum)
|
||||
: _Linop(linop) ,_massDen(massDen),_massNum(massNum) {};
|
||||
|
||||
void operator()(const Field& in, Field& out) {
|
||||
// _poly(_Linop,in,out);
|
||||
}
|
||||
};
|
||||
#endif
|
||||
|
||||
template<class Matrix,class Field>
|
||||
class InvG5LinearOperator : public LinearOperatorBase<Field> {
|
||||
Matrix &_Mat;
|
||||
RealD _num;
|
||||
RealD _Tol;
|
||||
Integer _MaxIt;
|
||||
Gamma g5;
|
||||
|
||||
public:
|
||||
InvG5LinearOperator(Matrix &Mat,RealD num): _Mat(Mat),_num(num), _Tol(1e-12),_MaxIt(10000), g5(Gamma::Algebra::Gamma5) {};
|
||||
|
||||
// Support for coarsening to a multigrid
|
||||
void OpDiag (const Field &in, Field &out) {
|
||||
assert(0);
|
||||
_Mat.Mdiag(in,out);
|
||||
}
|
||||
void OpDir (const Field &in, Field &out,int dir,int disp) {
|
||||
assert(0);
|
||||
_Mat.Mdir(in,out,dir,disp);
|
||||
}
|
||||
void OpDirAll (const Field &in, std::vector<Field> &out){
|
||||
assert(0);
|
||||
_Mat.MdirAll(in,out);
|
||||
};
|
||||
void Op (const Field &in, Field &out){
|
||||
assert(0);
|
||||
_Mat.M(in,out);
|
||||
}
|
||||
void AdjOp (const Field &in, Field &out){
|
||||
assert(0);
|
||||
_Mat.Mdag(in,out);
|
||||
}
|
||||
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){
|
||||
HermOp(in,out);
|
||||
ComplexD dot = innerProduct(in,out);
|
||||
n1=real(dot);
|
||||
n2=norm2(out);
|
||||
}
|
||||
void HermOp(const Field &in, Field &out){
|
||||
Field tmp(in.Grid());
|
||||
MdagMLinearOperator<Matrix,Field> denom(_Mat);
|
||||
ConjugateGradient<Field> CG(_Tol,_MaxIt);
|
||||
_Mat.M(in,tmp);
|
||||
tmp += _num*in;
|
||||
_Mat.Mdag(tmp,out);
|
||||
CG(denom,out,tmp);
|
||||
out = g5*tmp;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
struct LanczosParameters: Serializable {
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(LanczosParameters,
|
||||
RealD, mass ,
|
||||
RealD, resid,
|
||||
RealD, ChebyLow,
|
||||
RealD, ChebyHigh,
|
||||
Integer, ChebyOrder)
|
||||
// Integer, StartTrajectory,
|
||||
// Integer, Trajectories, /* @brief Number of sweeps in this run */
|
||||
// bool, MetropolisTest,
|
||||
// Integer, NoMetropolisUntil,
|
||||
// std::string, StartingType,
|
||||
// Integer, SW,
|
||||
// RealD, Kappa,
|
||||
// IntegratorParameters, MD)
|
||||
|
||||
LanczosParameters() {
|
||||
////////////////////////////// Default values
|
||||
mass = 0;
|
||||
// MetropolisTest = true;
|
||||
// NoMetropolisUntil = 10;
|
||||
// StartTrajectory = 0;
|
||||
// SW = 2;
|
||||
// Trajectories = 10;
|
||||
// StartingType = "HotStart";
|
||||
/////////////////////////////////
|
||||
}
|
||||
|
||||
template <class ReaderClass >
|
||||
LanczosParameters(Reader<ReaderClass> & TheReader){
|
||||
initialize(TheReader);
|
||||
}
|
||||
|
||||
template < class ReaderClass >
|
||||
void initialize(Reader<ReaderClass> &TheReader){
|
||||
// std::cout << GridLogMessage << "Reading HMC\n";
|
||||
read(TheReader, "HMC", *this);
|
||||
}
|
||||
|
||||
|
||||
void print_parameters() const {
|
||||
// std::cout << GridLogMessage << "[HMC parameters] Trajectories : " << Trajectories << "\n";
|
||||
// std::cout << GridLogMessage << "[HMC parameters] Start trajectory : " << StartTrajectory << "\n";
|
||||
// std::cout << GridLogMessage << "[HMC parameters] Metropolis test (on/off): " << std::boolalpha << MetropolisTest << "\n";
|
||||
// std::cout << GridLogMessage << "[HMC parameters] Thermalization trajs : " << NoMetropolisUntil << "\n";
|
||||
// std::cout << GridLogMessage << "[HMC parameters] Starting type : " << StartingType << "\n";
|
||||
// MD.print_parameters();
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
Grid_init(&argc, &argv);
|
||||
|
||||
GridCartesian* UGrid = SpaceTimeGrid::makeFourDimGrid(
|
||||
GridDefaultLatt(), GridDefaultSimd(Nd, vComplex::Nsimd()),
|
||||
GridDefaultMpi());
|
||||
GridRedBlackCartesian* UrbGrid =
|
||||
SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
||||
GridCartesian* FGrid = UGrid;
|
||||
GridRedBlackCartesian* FrbGrid = UrbGrid;
|
||||
// printf("UGrid=%p UrbGrid=%p FGrid=%p FrbGrid=%p\n", UGrid, UrbGrid, FGrid, FrbGrid);
|
||||
|
||||
std::vector<int> seeds4({1, 2, 3, 4});
|
||||
std::vector<int> seeds5({5, 6, 7, 8});
|
||||
GridParallelRNG RNG5(FGrid);
|
||||
RNG5.SeedFixedIntegers(seeds5);
|
||||
GridParallelRNG RNG4(UGrid);
|
||||
RNG4.SeedFixedIntegers(seeds4);
|
||||
GridParallelRNG RNG5rb(FrbGrid);
|
||||
RNG5.SeedFixedIntegers(seeds5);
|
||||
|
||||
LatticeGaugeField Umu(UGrid);
|
||||
// SU<Nc>::HotConfiguration(RNG4, Umu);
|
||||
|
||||
FieldMetaData header;
|
||||
std::string file("./config");
|
||||
|
||||
int precision32 = 0;
|
||||
int tworow = 0;
|
||||
// NerscIO::writeConfiguration(Umu,file,tworow,precision32);
|
||||
NerscIO::readConfiguration(Umu,header,file);
|
||||
|
||||
/*
|
||||
std::vector<LatticeColourMatrix> U(4, UGrid);
|
||||
for (int mu = 0; mu < Nd; mu++) {
|
||||
U[mu] = PeekIndex<LorentzIndex>(Umu, mu);
|
||||
}
|
||||
*/
|
||||
|
||||
int Nstop = 5;
|
||||
int Nk = 10;
|
||||
int Np = 90;
|
||||
int Nm = Nk + Np;
|
||||
int MaxIt = 10000;
|
||||
RealD resid = 1.0e-5;
|
||||
|
||||
RealD mass = -1.0;
|
||||
|
||||
LanczosParameters LanParams;
|
||||
#if 1
|
||||
{
|
||||
XmlReader HMCrd("LanParams.xml");
|
||||
read(HMCrd,"LanczosParameters",LanParams);
|
||||
}
|
||||
#else
|
||||
{
|
||||
LanParams.mass = mass;
|
||||
}
|
||||
#endif
|
||||
std::cout << GridLogMessage<< LanParams <<std::endl;
|
||||
{
|
||||
XmlWriter HMCwr("LanParams.xml.out");
|
||||
write(HMCwr,"LanczosParameters",LanParams);
|
||||
}
|
||||
|
||||
mass=LanParams.mass;
|
||||
resid=LanParams.resid;
|
||||
|
||||
|
||||
while ( mass > - 5.0){
|
||||
FermionOp WilsonOperator(Umu,*FGrid,*FrbGrid,2.+mass);
|
||||
InvG5LinearOperator<FermionOp,LatticeFermion> HermOp(WilsonOperator,-2.); /// <-----
|
||||
//SchurDiagTwoOperator<FermionOp,FermionField> HermOp(WilsonOperator);
|
||||
// Gamma5HermitianLinearOperator <FermionOp,LatticeFermion> HermOp2(WilsonOperator); /// <-----
|
||||
|
||||
std::vector<double> Coeffs{0, 0, 1.};
|
||||
Polynomial<FermionField> PolyX(Coeffs);
|
||||
Chebyshev<FermionField> Cheby(LanParams.ChebyLow,LanParams.ChebyHigh,LanParams.ChebyOrder);
|
||||
|
||||
FunctionHermOp<FermionField> OpCheby(Cheby,HermOp);
|
||||
// InvHermOp<FermionField> Op(WilsonOperator,HermOp);
|
||||
PlainHermOp<FermionField> Op (HermOp);
|
||||
// PlainHermOp<FermionField> Op2 (HermOp2);
|
||||
|
||||
ImplicitlyRestartedLanczos<FermionField> IRL(OpCheby, Op, Nstop, Nk, Nm, resid, MaxIt);
|
||||
|
||||
std::vector<RealD> eval(Nm);
|
||||
FermionField src(FGrid);
|
||||
gaussian(RNG5, src);
|
||||
std::vector<FermionField> evec(Nm, FGrid);
|
||||
for (int i = 0; i < 1; i++) {
|
||||
std::cout << i << " / " << Nm << " grid pointer " << evec[i].Grid()
|
||||
<< std::endl;
|
||||
};
|
||||
|
||||
int Nconv;
|
||||
IRL.calc(eval, evec, src, Nconv);
|
||||
|
||||
std::cout << mass <<" : " << eval << std::endl;
|
||||
|
||||
Gamma g5(Gamma::Algebra::Gamma5) ;
|
||||
ComplexD dot;
|
||||
FermionField tmp(FGrid);
|
||||
for (int i = 0; i < Nstop ; i++) {
|
||||
tmp = g5*evec[i];
|
||||
dot = innerProduct(tmp,evec[i]);
|
||||
std::cout << mass << " : " << eval[i] << " " << real(dot) << " " << imag(dot) << std::endl ;
|
||||
}
|
||||
src = evec[0]+evec[1]+evec[2];
|
||||
mass += -0.1;
|
||||
}
|
||||
|
||||
Grid_finalize();
|
||||
}
|
211
tests/lanczos/Test_wilson_specflow.cc
Normal file
211
tests/lanczos/Test_wilson_specflow.cc
Normal file
@ -0,0 +1,211 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./tests/Test_dwf_lanczos.cc
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Chulwoo Jung <chulwoo@bnl.gov>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#include <Grid/Grid.h>
|
||||
|
||||
using namespace std;
|
||||
using namespace Grid;
|
||||
;
|
||||
|
||||
typedef WilsonFermionD FermionOp;
|
||||
typedef typename WilsonFermionD::FermionField FermionField;
|
||||
|
||||
|
||||
RealD AllZero(RealD x) { return 0.; }
|
||||
|
||||
namespace Grid {
|
||||
|
||||
struct LanczosParameters: Serializable {
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(LanczosParameters,
|
||||
RealD, mass ,
|
||||
RealD, ChebyLow,
|
||||
RealD, ChebyHigh,
|
||||
Integer, ChebyOrder)
|
||||
// Integer, StartTrajectory,
|
||||
// Integer, Trajectories, /* @brief Number of sweeps in this run */
|
||||
// bool, MetropolisTest,
|
||||
// Integer, NoMetropolisUntil,
|
||||
// std::string, StartingType,
|
||||
// Integer, SW,
|
||||
// RealD, Kappa,
|
||||
// IntegratorParameters, MD)
|
||||
|
||||
LanczosParameters() {
|
||||
////////////////////////////// Default values
|
||||
mass = 0;
|
||||
// MetropolisTest = true;
|
||||
// NoMetropolisUntil = 10;
|
||||
// StartTrajectory = 0;
|
||||
// SW = 2;
|
||||
// Trajectories = 10;
|
||||
// StartingType = "HotStart";
|
||||
/////////////////////////////////
|
||||
}
|
||||
|
||||
template <class ReaderClass >
|
||||
LanczosParameters(Reader<ReaderClass> & TheReader){
|
||||
initialize(TheReader);
|
||||
}
|
||||
|
||||
template < class ReaderClass >
|
||||
void initialize(Reader<ReaderClass> &TheReader){
|
||||
// std::cout << GridLogMessage << "Reading HMC\n";
|
||||
read(TheReader, "HMC", *this);
|
||||
}
|
||||
|
||||
|
||||
void print_parameters() const {
|
||||
// std::cout << GridLogMessage << "[HMC parameters] Trajectories : " << Trajectories << "\n";
|
||||
// std::cout << GridLogMessage << "[HMC parameters] Start trajectory : " << StartTrajectory << "\n";
|
||||
// std::cout << GridLogMessage << "[HMC parameters] Metropolis test (on/off): " << std::boolalpha << MetropolisTest << "\n";
|
||||
// std::cout << GridLogMessage << "[HMC parameters] Thermalization trajs : " << NoMetropolisUntil << "\n";
|
||||
// std::cout << GridLogMessage << "[HMC parameters] Starting type : " << StartingType << "\n";
|
||||
// MD.print_parameters();
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
Grid_init(&argc, &argv);
|
||||
|
||||
GridCartesian* UGrid = SpaceTimeGrid::makeFourDimGrid(
|
||||
GridDefaultLatt(), GridDefaultSimd(Nd, vComplex::Nsimd()),
|
||||
GridDefaultMpi());
|
||||
GridRedBlackCartesian* UrbGrid =
|
||||
SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
||||
GridCartesian* FGrid = UGrid;
|
||||
GridRedBlackCartesian* FrbGrid = UrbGrid;
|
||||
// printf("UGrid=%p UrbGrid=%p FGrid=%p FrbGrid=%p\n", UGrid, UrbGrid, FGrid, FrbGrid);
|
||||
|
||||
std::vector<int> seeds4({1, 2, 3, 4});
|
||||
std::vector<int> seeds5({5, 6, 7, 8});
|
||||
GridParallelRNG RNG5(FGrid);
|
||||
RNG5.SeedFixedIntegers(seeds5);
|
||||
GridParallelRNG RNG4(UGrid);
|
||||
RNG4.SeedFixedIntegers(seeds4);
|
||||
GridParallelRNG RNG5rb(FrbGrid);
|
||||
RNG5.SeedFixedIntegers(seeds5);
|
||||
|
||||
LatticeGaugeField Umu(UGrid);
|
||||
// SU<Nc>::HotConfiguration(RNG4, Umu);
|
||||
|
||||
FieldMetaData header;
|
||||
std::string file("./config");
|
||||
|
||||
int precision32 = 0;
|
||||
int tworow = 0;
|
||||
// NerscIO::writeConfiguration(Umu,file,tworow,precision32);
|
||||
NerscIO::readConfiguration(Umu,header,file);
|
||||
|
||||
/*
|
||||
std::vector<LatticeColourMatrix> U(4, UGrid);
|
||||
for (int mu = 0; mu < Nd; mu++) {
|
||||
U[mu] = PeekIndex<LorentzIndex>(Umu, mu);
|
||||
}
|
||||
*/
|
||||
|
||||
int Nstop = 10;
|
||||
int Nk = 20;
|
||||
int Np = 80;
|
||||
int Nm = Nk + Np;
|
||||
int MaxIt = 10000;
|
||||
RealD resid = 1.0e-5;
|
||||
|
||||
RealD mass = -1.0;
|
||||
|
||||
LanczosParameters LanParams;
|
||||
#if 1
|
||||
{
|
||||
XmlReader HMCrd("LanParams.xml");
|
||||
read(HMCrd,"LanczosParameters",LanParams);
|
||||
}
|
||||
#else
|
||||
{
|
||||
LanParams.mass = mass;
|
||||
}
|
||||
#endif
|
||||
std::cout << GridLogMessage<< LanParams <<std::endl;
|
||||
{
|
||||
XmlWriter HMCwr("LanParams.xml.out");
|
||||
write(HMCwr,"LanczosParameters",LanParams);
|
||||
}
|
||||
|
||||
mass=LanParams.mass;
|
||||
|
||||
|
||||
while ( mass > - 5.0){
|
||||
FermionOp WilsonOperator(Umu,*FGrid,*FrbGrid,mass);
|
||||
MdagMLinearOperator<FermionOp,FermionField> HermOp(WilsonOperator); /// <-----
|
||||
//SchurDiagTwoOperator<FermionOp,FermionField> HermOp(WilsonOperator);
|
||||
Gamma5HermitianLinearOperator <FermionOp,LatticeFermion> HermOp2(WilsonOperator); /// <-----
|
||||
|
||||
std::vector<double> Coeffs{0, 1.};
|
||||
Polynomial<FermionField> PolyX(Coeffs);
|
||||
// Chebyshev<FermionField> Cheby(0.5, 60., 31);
|
||||
// RealD, ChebyLow,
|
||||
// RealD, ChebyHigh,
|
||||
// Integer, ChebyOrder)
|
||||
|
||||
Chebyshev<FermionField> Cheby(LanParams.ChebyLow,LanParams.ChebyHigh,LanParams.ChebyOrder);
|
||||
|
||||
FunctionHermOp<FermionField> OpCheby(Cheby,HermOp);
|
||||
PlainHermOp<FermionField> Op (HermOp);
|
||||
PlainHermOp<FermionField> Op2 (HermOp2);
|
||||
|
||||
ImplicitlyRestartedLanczos<FermionField> IRL(OpCheby, Op2, Nstop, Nk, Nm, resid, MaxIt);
|
||||
|
||||
std::vector<RealD> eval(Nm);
|
||||
FermionField src(FGrid);
|
||||
gaussian(RNG5, src);
|
||||
std::vector<FermionField> evec(Nm, FGrid);
|
||||
for (int i = 0; i < 1; i++) {
|
||||
std::cout << i << " / " << Nm << " grid pointer " << evec[i].Grid()
|
||||
<< std::endl;
|
||||
};
|
||||
|
||||
int Nconv;
|
||||
IRL.calc(eval, evec, src, Nconv);
|
||||
|
||||
std::cout << mass <<" : " << eval << std::endl;
|
||||
|
||||
Gamma g5(Gamma::Algebra::Gamma5) ;
|
||||
ComplexD dot;
|
||||
FermionField tmp(FGrid);
|
||||
for (int i = 0; i < Nstop ; i++) {
|
||||
tmp = g5*evec[i];
|
||||
dot = innerProduct(tmp,evec[i]);
|
||||
std::cout << mass << " : " << eval[i] << " " << real(dot) << " " << imag(dot) << std::endl ;
|
||||
}
|
||||
src = evec[0]+evec[1]+evec[2];
|
||||
mass += -0.1;
|
||||
}
|
||||
|
||||
Grid_finalize();
|
||||
}
|
@ -33,8 +33,7 @@ namespace Grid{
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(WFParameters,
|
||||
int, steps,
|
||||
double, step_size,
|
||||
int, meas_interval,
|
||||
double, maxTau); // for the adaptive algorithm
|
||||
int, meas_interval);
|
||||
|
||||
|
||||
template <class ReaderClass >
|
||||
@ -86,7 +85,7 @@ int main(int argc, char **argv) {
|
||||
WFParameters WFPar(Reader);
|
||||
ConfParameters CPar(Reader);
|
||||
CheckpointerParameters CPPar(CPar.conf_prefix, CPar.rng_prefix);
|
||||
BinaryHmcCheckpointer<PeriodicGimplR> CPBin(CPPar);
|
||||
NerscHmcCheckpointer<PeriodicGimplR> CPBin(CPPar);
|
||||
|
||||
for (int conf = CPar.StartConfiguration; conf <= CPar.EndConfiguration; conf+= CPar.Skip){
|
||||
|
||||
@ -96,19 +95,13 @@ int main(int argc, char **argv) {
|
||||
std::cout << GridLogMessage << "Initial plaquette: "
|
||||
<< WilsonLoops<PeriodicGimplR>::avgPlaquette(Umu) << std::endl;
|
||||
|
||||
int t=WFPar.maxTau;
|
||||
WilsonFlowAdaptive<PeriodicGimplR> WF(WFPar.step_size, WFPar.maxTau,
|
||||
1.0e-4,
|
||||
WilsonFlow<PeriodicGimplR> WF(WFPar.step_size, WFPar.steps,
|
||||
WFPar.meas_interval);
|
||||
|
||||
WF.smear(Uflow, Umu);
|
||||
|
||||
RealD WFlow_plaq = WilsonLoops<PeriodicGimplR>::avgPlaquette(Uflow);
|
||||
RealD WFlow_TC = WilsonLoops<PeriodicGimplR>::TopologicalCharge(Uflow);
|
||||
RealD WFlow_T0 = WF.energyDensityPlaquette(t,Uflow);
|
||||
std::cout << GridLogMessage << "Plaquette "<< conf << " " << WFlow_plaq << std::endl;
|
||||
std::cout << GridLogMessage << "T0 "<< conf << " " << WFlow_T0 << std::endl;
|
||||
std::cout << GridLogMessage << "TopologicalCharge "<< conf << " " << WFlow_TC << std::endl;
|
||||
|
||||
std::cout<< GridLogMessage << " Admissibility check:\n";
|
||||
const double sp_adm = 0.067; // admissible threshold
|
||||
|
37
visualisation/CMakeLists.txt
Normal file
37
visualisation/CMakeLists.txt
Normal file
@ -0,0 +1,37 @@
|
||||
cmake_minimum_required(VERSION 3.12 FATAL_ERROR)
|
||||
|
||||
project(GridViewer)
|
||||
|
||||
list(APPEND CMAKE_PREFIX_PATH "/Users/peterboyle/QCD/vtk/VTK-9.4.2-install/")
|
||||
|
||||
find_package(VTK COMPONENTS
|
||||
CommonColor
|
||||
CommonCore
|
||||
FiltersCore
|
||||
FiltersModeling
|
||||
IOImage
|
||||
IOFFMPEG
|
||||
InteractionStyle
|
||||
InteractionWidgets
|
||||
RenderingContextOpenGL2
|
||||
RenderingCore
|
||||
RenderingFreeType
|
||||
RenderingGL2PSOpenGL2
|
||||
RenderingOpenGL2
|
||||
)
|
||||
|
||||
if (NOT VTK_FOUND)
|
||||
message(FATAL_ERROR "GridViewer: Unable to find the VTK build folder.")
|
||||
endif()
|
||||
|
||||
# Prevent a "command line is too long" failure in Windows.
|
||||
set(CMAKE_NINJA_FORCE_RESPONSE_FILE "ON" CACHE BOOL "Force Ninja to use response files.")
|
||||
|
||||
add_executable(FieldDensityAnimate MACOSX_BUNDLE FieldDensityAnimate.cxx )
|
||||
target_link_libraries(FieldDensityAnimate PRIVATE ${VTK_LIBRARIES}
|
||||
)
|
||||
# vtk_module_autoinit is needed
|
||||
vtk_module_autoinit(
|
||||
TARGETS FieldDensityAnimate
|
||||
MODULES ${VTK_LIBRARIES}
|
||||
)
|
BIN
visualisation/E8_vs_Topo8.avi
Normal file
BIN
visualisation/E8_vs_Topo8.avi
Normal file
Binary file not shown.
285
visualisation/FieldDensity.py
Normal file
285
visualisation/FieldDensity.py
Normal file
@ -0,0 +1,285 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
# noinspection PyUnresolvedReferences
|
||||
import math
|
||||
import vtk
|
||||
import vtkmodules.vtkInteractionStyle
|
||||
# noinspection PyUnresolvedReferences
|
||||
import vtkmodules.vtkRenderingOpenGL2
|
||||
from vtkmodules.vtkCommonColor import vtkNamedColors
|
||||
from vtkmodules.vtkCommonCore import (
|
||||
VTK_VERSION_NUMBER,
|
||||
vtkVersion
|
||||
)
|
||||
from vtkmodules.vtkCommonCore import VTK_DOUBLE
|
||||
from vtkmodules.vtkCommonDataModel import vtkImageData
|
||||
from vtkmodules.vtkFiltersCore import (
|
||||
vtkMarchingCubes,
|
||||
vtkStripper
|
||||
)
|
||||
from vtkmodules.vtkFiltersModeling import vtkOutlineFilter
|
||||
from vtkmodules.vtkIOImage import (
|
||||
vtkMetaImageReader,
|
||||
vtkJPEGWriter,
|
||||
vtkPNGWriter
|
||||
)
|
||||
from vtkmodules.vtkRenderingCore import (
|
||||
vtkActor,
|
||||
vtkCamera,
|
||||
vtkPolyDataMapper,
|
||||
vtkProperty,
|
||||
vtkRenderWindow,
|
||||
vtkRenderWindowInteractor,
|
||||
vtkRenderer,
|
||||
vtkWindowToImageFilter
|
||||
)
|
||||
|
||||
|
||||
class vtkTimerCallback():
|
||||
def __init__(self, steps, imageData, iren):
|
||||
self.timer_count = 0
|
||||
self.steps = steps
|
||||
self.imageData = imageData
|
||||
self.iren = iren
|
||||
self.timerId = None
|
||||
self.step = 0
|
||||
|
||||
def execute(self, obj, event):
|
||||
|
||||
print(self.timer_count)
|
||||
|
||||
dims = self.imageData.GetDimensions()
|
||||
|
||||
t=self.step/10.0
|
||||
|
||||
z0 = 2
|
||||
y0 = 4
|
||||
x0 = 4
|
||||
z1 = 14
|
||||
y1 = 12
|
||||
x1 = 12
|
||||
for z in range(dims[2]):
|
||||
for y in range(dims[1]):
|
||||
for x in range(dims[0]):
|
||||
self.imageData.SetScalarComponentFromDouble(x, y, z, 0,
|
||||
math.sin(t)*math.exp(-0.25*((x-x0)*(x-x0)+(y-y0)*(y-y0)+(z-z0)*(z-z0)))
|
||||
- math.cos(t)*math.exp(-0.25*((x-x1)*(x-x1)+(y-y1)*(y-y1)+(z-z1)*(z-z1))))
|
||||
|
||||
self.imageData.Modified()
|
||||
|
||||
iren = obj
|
||||
iren.GetRenderWindow().Render()
|
||||
self.timer_count += 1
|
||||
self.step += 1
|
||||
|
||||
if self.step >= self.steps :
|
||||
iren.DestroyTimer(self.timerId)
|
||||
|
||||
def WriteImage(fileName, renWin):
|
||||
'''
|
||||
'''
|
||||
|
||||
import os
|
||||
|
||||
if fileName:
|
||||
# Select the writer to use.
|
||||
path, ext = os.path.splitext(fileName)
|
||||
ext = ext.lower()
|
||||
if not ext:
|
||||
ext = '.png'
|
||||
fileName = fileName + ext
|
||||
elif ext == '.jpg':
|
||||
writer = vtkJPEGWriter()
|
||||
else:
|
||||
writer = vtkPNGWriter()
|
||||
|
||||
windowto_image_filter = vtkWindowToImageFilter()
|
||||
windowto_image_filter.SetInput(renWin)
|
||||
windowto_image_filter.SetScale(1) # image quality
|
||||
windowto_image_filter.SetInputBufferTypeToRGBA()
|
||||
|
||||
writer.SetFileName(fileName)
|
||||
writer.SetInputConnection(windowto_image_filter.GetOutputPort())
|
||||
writer.Write()
|
||||
else:
|
||||
raise RuntimeError('Need a filename.')
|
||||
|
||||
|
||||
def main():
|
||||
|
||||
colors = vtkNamedColors()
|
||||
|
||||
file_name = get_program_parameters()
|
||||
|
||||
colors.SetColor('InstantonColor', [240, 184, 160, 255])
|
||||
colors.SetColor('BackfaceColor', [255, 229, 200, 255])
|
||||
colors.SetColor('BkgColor', [51, 77, 102, 255])
|
||||
|
||||
# Create the renderer, the render window, and the interactor. The renderer
|
||||
# draws into the render window, the interactor enables mouse- and
|
||||
# keyboard-based interaction with the data within the render window.
|
||||
#
|
||||
a_renderer = vtkRenderer()
|
||||
ren_win = vtkRenderWindow()
|
||||
ren_win.AddRenderer(a_renderer)
|
||||
|
||||
iren = vtkRenderWindowInteractor()
|
||||
iren.SetRenderWindow(ren_win)
|
||||
|
||||
# The following reader is used to read a series of 2D slices (images)
|
||||
# that compose the volume. The slice dimensions are set, and the
|
||||
# pixel spacing. The data Endianness must also be specified. The reader
|
||||
# uses the FilePrefix in combination with the slice number to construct
|
||||
# filenames using the format FilePrefix.%d. (In this case the FilePrefix
|
||||
# is the root name of the file: quarter.)
|
||||
imageData = vtkImageData()
|
||||
imageData.SetDimensions(16, 16, 16)
|
||||
imageData.AllocateScalars(VTK_DOUBLE, 1)
|
||||
|
||||
dims = imageData.GetDimensions()
|
||||
|
||||
# Fill every entry of the image data with '2.0'
|
||||
# Set the input data
|
||||
for z in range(dims[2]):
|
||||
z0 = dims[2]/2
|
||||
for y in range(dims[1]):
|
||||
y0 = dims[1]/2
|
||||
for x in range(dims[0]):
|
||||
x0 = dims[0]/2
|
||||
imageData.SetScalarComponentFromDouble(x, y, z, 0, math.exp(-0.25*((x-x0)*(x-x0)+(y-y0)*(y-y0)+z*z)) - math.exp(-0.25*((x-x0)*(x-x0)+y*y+(z-z0)*(z-z0))))
|
||||
|
||||
instanton_extractor = vtkMarchingCubes()
|
||||
instanton_extractor.SetInputData(imageData)
|
||||
instanton_extractor.SetValue(0, 0.1)
|
||||
|
||||
instanton_stripper = vtkStripper()
|
||||
instanton_stripper.SetInputConnection(instanton_extractor.GetOutputPort())
|
||||
|
||||
instanton_mapper = vtkPolyDataMapper()
|
||||
instanton_mapper.SetInputConnection(instanton_stripper.GetOutputPort())
|
||||
instanton_mapper.ScalarVisibilityOff()
|
||||
|
||||
instanton = vtkActor()
|
||||
instanton.SetMapper(instanton_mapper)
|
||||
instanton.GetProperty().SetDiffuseColor(colors.GetColor3d('InstantonColor'))
|
||||
instanton.GetProperty().SetSpecular(0.3)
|
||||
instanton.GetProperty().SetSpecularPower(20)
|
||||
instanton.GetProperty().SetOpacity(0.5)
|
||||
|
||||
# The triangle stripper is used to create triangle strips from the
|
||||
# isosurface these render much faster on may systems.
|
||||
antiinstanton_extractor = vtkMarchingCubes()
|
||||
antiinstanton_extractor.SetInputData(imageData)
|
||||
antiinstanton_extractor.SetValue(0, -0.1)
|
||||
|
||||
antiinstanton_stripper = vtkStripper()
|
||||
antiinstanton_stripper.SetInputConnection(antiinstanton_extractor.GetOutputPort())
|
||||
|
||||
antiinstanton_mapper = vtkPolyDataMapper()
|
||||
antiinstanton_mapper.SetInputConnection(antiinstanton_stripper.GetOutputPort())
|
||||
antiinstanton_mapper.ScalarVisibilityOff()
|
||||
|
||||
antiinstanton = vtkActor()
|
||||
antiinstanton.SetMapper(antiinstanton_mapper)
|
||||
antiinstanton.GetProperty().SetDiffuseColor(colors.GetColor3d('Ivory'))
|
||||
|
||||
# An outline provides box around the data.
|
||||
outline_data = vtkOutlineFilter()
|
||||
outline_data.SetInputData(imageData)
|
||||
|
||||
map_outline = vtkPolyDataMapper()
|
||||
map_outline.SetInputConnection(outline_data.GetOutputPort())
|
||||
|
||||
outline = vtkActor()
|
||||
outline.SetMapper(map_outline)
|
||||
outline.GetProperty().SetColor(colors.GetColor3d('Black'))
|
||||
|
||||
# It is convenient to create an initial view of the data. The FocalPoint
|
||||
# and Position form a vector direction. Later on (ResetCamera() method)
|
||||
# this vector is used to position the camera to look at the data in
|
||||
# this direction.
|
||||
a_camera = vtkCamera()
|
||||
a_camera.SetViewUp(0, 0, -1)
|
||||
a_camera.SetPosition(0, -100, 0)
|
||||
a_camera.SetFocalPoint(0, 0, 0)
|
||||
a_camera.ComputeViewPlaneNormal()
|
||||
a_camera.Azimuth(30.0)
|
||||
a_camera.Elevation(30.0)
|
||||
|
||||
# Actors are added to the renderer. An initial camera view is created.
|
||||
# The Dolly() method moves the camera towards the FocalPoint,
|
||||
# thereby enlarging the image.
|
||||
a_renderer.AddActor(outline)
|
||||
a_renderer.AddActor(instanton)
|
||||
a_renderer.AddActor(antiinstanton)
|
||||
a_renderer.SetActiveCamera(a_camera)
|
||||
a_renderer.ResetCamera()
|
||||
a_camera.Dolly(1.0)
|
||||
|
||||
# Set a background color for the renderer and set the size of the
|
||||
# render window (expressed in pixels).
|
||||
a_renderer.SetBackground(colors.GetColor3d('BkgColor'))
|
||||
ren_win.SetSize(1024, 1024)
|
||||
ren_win.SetWindowName('ExpoDemo')
|
||||
|
||||
# Note that when camera movement occurs (as it does in the Dolly()
|
||||
# method), the clipping planes often need adjusting. Clipping planes
|
||||
# consist of two planes: near and far along the view direction. The
|
||||
# near plane clips out objects in front of the plane the far plane
|
||||
# clips out objects behind the plane. This way only what is drawn
|
||||
# between the planes is actually rendered.
|
||||
a_renderer.ResetCameraClippingRange()
|
||||
|
||||
# write image
|
||||
# WriteImage('exp.jpg',ren_win)
|
||||
|
||||
# Sign up to receive TimerEvent
|
||||
cb = vtkTimerCallback(200, imageData, iren)
|
||||
iren.AddObserver('TimerEvent', cb.execute)
|
||||
cb.timerId = iren.CreateRepeatingTimer(50)
|
||||
|
||||
# start the interaction and timer
|
||||
ren_win.Render()
|
||||
|
||||
# Initialize the event loop and then start it.
|
||||
iren.Initialize()
|
||||
iren.Start()
|
||||
|
||||
|
||||
def get_program_parameters():
|
||||
import argparse
|
||||
description = 'Simple lattice volumetric demo'
|
||||
epilogue = '''
|
||||
Derived from VTK/Examples/Cxx/Medical2.cxx
|
||||
'''
|
||||
parser = argparse.ArgumentParser(description=description, epilog=epilogue,
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter)
|
||||
parser.add_argument('filename', help='FieldDensity.py')
|
||||
args = parser.parse_args()
|
||||
return args.filename
|
||||
|
||||
|
||||
def vtk_version_ok(major, minor, build):
|
||||
"""
|
||||
Check the VTK version.
|
||||
|
||||
:param major: Major version.
|
||||
:param minor: Minor version.
|
||||
:param build: Build version.
|
||||
:return: True if the requested VTK version is greater or equal to the actual VTK version.
|
||||
"""
|
||||
needed_version = 10000000000 * int(major) + 100000000 * int(minor) + int(build)
|
||||
try:
|
||||
vtk_version_number = VTK_VERSION_NUMBER
|
||||
except AttributeError: # as error:
|
||||
ver = vtkVersion()
|
||||
vtk_version_number = 10000000000 * ver.GetVTKMajorVersion() + 100000000 * ver.GetVTKMinorVersion() \
|
||||
+ ver.GetVTKBuildVersion()
|
||||
if vtk_version_number >= needed_version:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
490
visualisation/FieldDensityAnimate.cxx
Normal file
490
visualisation/FieldDensityAnimate.cxx
Normal file
@ -0,0 +1,490 @@
|
||||
// Derived from VTK/Examples/Cxx/Medical2.cxx
|
||||
// The example reads a volume dataset, extracts two isosurfaces that
|
||||
// represent the skin and bone, and then displays them.
|
||||
//
|
||||
// Modified heavily by Peter Boyle to display lattice field theory data as movies and compare multiple files
|
||||
|
||||
#include <vtkActor.h>
|
||||
#include <vtkCamera.h>
|
||||
#include <vtkMetaImageReader.h>
|
||||
#include <vtkNamedColors.h>
|
||||
#include <vtkNew.h>
|
||||
#include <vtkOutlineFilter.h>
|
||||
#include <vtkPolyDataMapper.h>
|
||||
#include <vtkProperty.h>
|
||||
#include <vtkRenderWindow.h>
|
||||
#include <vtkRenderWindowInteractor.h>
|
||||
#include <vtkRenderer.h>
|
||||
#include <vtkStripper.h>
|
||||
#include <vtkImageData.h>
|
||||
#include <vtkVersion.h>
|
||||
#include <vtkCallbackCommand.h>
|
||||
#include <vtkTextActor.h>
|
||||
#include <vtkTextProperty.h>
|
||||
|
||||
#define MPEG
|
||||
#ifdef MPEG
|
||||
#include <vtkFFMPEGWriter.h>
|
||||
#endif
|
||||
|
||||
#include <vtkProperty2D.h>
|
||||
#include <vtkSliderWidget.h>
|
||||
#include <vtkSliderRepresentation2D.h>
|
||||
#include <vtkWindowToImageFilter.h>
|
||||
|
||||
#include <array>
|
||||
#include <string>
|
||||
|
||||
#include <Grid/Grid.h>
|
||||
|
||||
#define USE_FLYING_EDGES
|
||||
#ifdef USE_FLYING_EDGES
|
||||
#include <vtkFlyingEdges3D.h>
|
||||
typedef vtkFlyingEdges3D isosurface;
|
||||
#else
|
||||
#include <vtkMarchingCubes.h>
|
||||
typedef vtkMarchingCubes isosurface;
|
||||
#endif
|
||||
|
||||
int mpeg = 0 ;
|
||||
int xlate = 0 ;
|
||||
|
||||
template <class T> void readFile(T& out, std::string const fname){
|
||||
#ifdef HAVE_LIME
|
||||
Grid::emptyUserRecord record;
|
||||
Grid::ScidacReader RD;
|
||||
RD.open(fname);
|
||||
RD.readScidacFieldRecord(out,record);
|
||||
RD.close();
|
||||
#endif
|
||||
}
|
||||
using namespace Grid;
|
||||
|
||||
class FrameUpdater : public vtkCallbackCommand
|
||||
{
|
||||
public:
|
||||
|
||||
FrameUpdater() {
|
||||
TimerCount = 0;
|
||||
xoff = 0;
|
||||
t = 0;
|
||||
imageData = nullptr;
|
||||
grid_data = nullptr;
|
||||
timerId = 0;
|
||||
maxCount = -1;
|
||||
}
|
||||
|
||||
static FrameUpdater* New()
|
||||
{
|
||||
FrameUpdater* cb = new FrameUpdater;
|
||||
cb->TimerCount = 0;
|
||||
return cb;
|
||||
}
|
||||
|
||||
virtual void Execute(vtkObject* caller, unsigned long eventId,void* vtkNotUsed(callData))
|
||||
{
|
||||
const int max=256;
|
||||
char text_string[max];
|
||||
|
||||
if (this->TimerCount < this->maxCount) {
|
||||
|
||||
if (vtkCommand::TimerEvent == eventId)
|
||||
{
|
||||
++this->TimerCount;
|
||||
|
||||
// Make a new frame
|
||||
auto latt_size = grid_data->Grid()->GlobalDimensions();
|
||||
for(int xx=0;xx<latt_size[0];xx++){
|
||||
for(int yy=0;yy<latt_size[1];yy++){
|
||||
for(int zz=0;zz<latt_size[2];zz++){
|
||||
int x = (xx+xoff)%latt_size[0];
|
||||
Coordinate site({x,yy,zz,t});
|
||||
RealD value = real(peekSite(*grid_data,site));
|
||||
imageData->SetScalarComponentFromDouble(xx,yy,zz,0,value);
|
||||
}}}
|
||||
|
||||
if ( xlate ) {
|
||||
xoff = (xoff + 1)%latt_size[0];
|
||||
if ( xoff== 0 ) t = (t+1)%latt_size[3];
|
||||
} else {
|
||||
t = (t+1)%latt_size[3];
|
||||
if ( t== 0 ) xoff = (xoff + 1)%latt_size[0];
|
||||
}
|
||||
|
||||
snprintf(text_string,max,"T=%d",t);
|
||||
text->SetInput(text_string);
|
||||
|
||||
std::cout << this->TimerCount<<"/"<<maxCount<< " xoff "<<xoff<<" t "<<t <<std::endl;
|
||||
imageData->Modified();
|
||||
|
||||
vtkRenderWindowInteractor* iren = dynamic_cast<vtkRenderWindowInteractor*>(caller);
|
||||
iren->GetRenderWindow()->Render();
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
if (this->TimerCount >= this->maxCount) {
|
||||
vtkRenderWindowInteractor* iren = dynamic_cast<vtkRenderWindowInteractor*>(caller);
|
||||
if (this->timerId > -1)
|
||||
{
|
||||
iren->DestroyTimer(this->timerId);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private:
|
||||
int TimerCount;
|
||||
int xoff;
|
||||
int t;
|
||||
public:
|
||||
Grid::LatticeComplexD * grid_data;
|
||||
vtkImageData* imageData = nullptr;
|
||||
vtkTextActor* text = nullptr;
|
||||
vtkFFMPEGWriter *writer = nullptr;
|
||||
int timerId ;
|
||||
int maxCount ;
|
||||
double rms;
|
||||
isosurface * posExtractor;
|
||||
isosurface * negExtractor;
|
||||
};
|
||||
class SliderCallback : public vtkCommand
|
||||
{
|
||||
public:
|
||||
static SliderCallback* New()
|
||||
{
|
||||
return new SliderCallback;
|
||||
}
|
||||
virtual void Execute(vtkObject* caller, unsigned long eventId, void* callData)
|
||||
{
|
||||
vtkSliderWidget *sliderWidget = vtkSliderWidget::SafeDownCast(caller);
|
||||
if (sliderWidget)
|
||||
{
|
||||
contour = ((vtkSliderRepresentation *)sliderWidget->GetRepresentation())->GetValue();
|
||||
}
|
||||
for(int i=0;i<fu_list.size();i++){
|
||||
fu_list[i]->posExtractor->SetValue(0, SliderCallback::contour*fu_list[i]->rms);
|
||||
fu_list[i]->negExtractor->SetValue(0, -SliderCallback::contour*fu_list[i]->rms);
|
||||
fu_list[i]->posExtractor->Modified();
|
||||
fu_list[i]->negExtractor->Modified();
|
||||
}
|
||||
}
|
||||
public:
|
||||
static double contour;
|
||||
std::vector<FrameUpdater *> fu_list;
|
||||
};
|
||||
|
||||
|
||||
double SliderCallback::contour;
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
using namespace Grid;
|
||||
|
||||
Grid_init(&argc, &argv);
|
||||
GridLogLayout();
|
||||
|
||||
auto latt_size = GridDefaultLatt();
|
||||
auto simd_layout = GridDefaultSimd(Nd, vComplex::Nsimd());
|
||||
auto mpi_layout = GridDefaultMpi();
|
||||
GridCartesian Grid(latt_size, simd_layout, mpi_layout);
|
||||
|
||||
|
||||
std::cout << argc << " command Line arguments "<<std::endl;
|
||||
for(int c=0;c<argc;c++) {
|
||||
std::cout << " - "<<argv[c]<<std::endl;
|
||||
}
|
||||
|
||||
std::vector<std::string> file_list;
|
||||
double default_contour = 1.0;
|
||||
std::string arg;
|
||||
#ifdef MPEG
|
||||
if( GridCmdOptionExists(argv,argv+argc,"--mpeg") ){
|
||||
mpeg = 1;
|
||||
}
|
||||
#endif
|
||||
|
||||
if( GridCmdOptionExists(argv,argv+argc,"--xlate") ){
|
||||
xlate = 1;
|
||||
}
|
||||
|
||||
if( GridCmdOptionExists(argv,argv+argc,"--isosurface") ){
|
||||
arg=GridCmdOptionPayload(argv,argv+argc,"--isosurface");
|
||||
GridCmdOptionFloat(arg,default_contour);
|
||||
}
|
||||
if( GridCmdOptionExists(argv,argv+argc,"--file1") ){
|
||||
arg = GridCmdOptionPayload(argv,argv+argc,"--file1");
|
||||
file_list.push_back(arg);
|
||||
}
|
||||
if( GridCmdOptionExists(argv,argv+argc,"--file2") ){
|
||||
arg = GridCmdOptionPayload(argv,argv+argc,"--file2");
|
||||
file_list.push_back(arg);
|
||||
}
|
||||
if( GridCmdOptionExists(argv,argv+argc,"--file3") ){
|
||||
arg = GridCmdOptionPayload(argv,argv+argc,"--file3");
|
||||
file_list.push_back(arg);
|
||||
}
|
||||
if( GridCmdOptionExists(argv,argv+argc,"--file4") ){
|
||||
arg = GridCmdOptionPayload(argv,argv+argc,"--file4");
|
||||
file_list.push_back(arg);
|
||||
}
|
||||
for(int c=0;c<file_list.size();c++) {
|
||||
std::cout << " file: "<<file_list[c]<<std::endl;
|
||||
}
|
||||
|
||||
// Common things:
|
||||
vtkNew<vtkNamedColors> colors;
|
||||
std::array<unsigned char, 4> posColor{{240, 184, 160, 255}}; colors->SetColor("posColor", posColor.data());
|
||||
std::array<unsigned char, 4> bkg{{51, 77, 102, 255}}; colors->SetColor("BkgColor", bkg.data());
|
||||
|
||||
// Create the renderer, the render window, and the interactor. The renderer
|
||||
// draws into the render window, the interactor enables mouse- and
|
||||
// keyboard-based interaction with the data within the render window.
|
||||
//
|
||||
vtkNew<vtkRenderWindow> renWin;
|
||||
vtkNew<vtkRenderWindowInteractor> iren;
|
||||
iren->SetRenderWindow(renWin);
|
||||
|
||||
|
||||
std::vector<LatticeComplexD> data(file_list.size(),&Grid);
|
||||
FieldMetaData header;
|
||||
|
||||
|
||||
int frameCount;
|
||||
if ( mpeg ) frameCount = latt_size[3];
|
||||
else frameCount = latt_size[0] * latt_size[3];
|
||||
|
||||
std::vector<FrameUpdater *> fu_list;
|
||||
for (int f=0;f<file_list.size();f++){
|
||||
|
||||
// It is convenient to create an initial view of the data. The FocalPoint
|
||||
// and Position form a vector direction. Later on (ResetCamera() method)
|
||||
// this vector is used to position the camera to look at the data in
|
||||
// this direction.
|
||||
vtkNew<vtkCamera> aCamera;
|
||||
aCamera->SetViewUp(0, 0, -1);
|
||||
aCamera->SetPosition(0, -1000, 0);
|
||||
aCamera->SetFocalPoint(0, 0, 0);
|
||||
aCamera->ComputeViewPlaneNormal();
|
||||
aCamera->Azimuth(30.0);
|
||||
aCamera->Elevation(30.0);
|
||||
|
||||
|
||||
vtkNew<vtkRenderer> aRenderer;
|
||||
renWin->AddRenderer(aRenderer);
|
||||
|
||||
double vol = data[f].Grid()->gSites();
|
||||
|
||||
std::cout << "Reading "<<file_list[f]<<std::endl;
|
||||
readFile(data[f],file_list[f]);
|
||||
|
||||
auto nrm = norm2(data[f]);
|
||||
auto nrmbar = nrm/vol;
|
||||
auto rms = sqrt(nrmbar);
|
||||
|
||||
double contour = default_contour * rms; // default to 1 x RMS
|
||||
|
||||
// The following reader is used to read a series of 2D slices (images)
|
||||
// that compose the volume. The slice dimensions are set, and the
|
||||
// pixel spacing. The data Endianness must also be specified. The reader
|
||||
// uses the FilePrefix in combination with the slice number to construct
|
||||
// filenames using the format FilePrefix.%d. (In this case the FilePrefix
|
||||
// is the root name of the file: quarter.)
|
||||
vtkNew<vtkImageData> imageData;
|
||||
imageData->SetDimensions(latt_size[0],latt_size[1],latt_size[2]);
|
||||
imageData->AllocateScalars(VTK_DOUBLE, 1);
|
||||
for(int xx=0;xx<latt_size[0];xx++){
|
||||
for(int yy=0;yy<latt_size[1];yy++){
|
||||
for(int zz=0;zz<latt_size[2];zz++){
|
||||
Coordinate site({xx,yy,zz,0});
|
||||
RealD value = real(peekSite(data[f],site));
|
||||
imageData->SetScalarComponentFromDouble(xx,yy,zz,0,value);
|
||||
}}}
|
||||
|
||||
vtkNew<isosurface> posExtractor;
|
||||
posExtractor->SetInputData(imageData);
|
||||
posExtractor->SetValue(0, contour);
|
||||
|
||||
vtkNew<vtkStripper> posStripper;
|
||||
posStripper->SetInputConnection(posExtractor->GetOutputPort());
|
||||
|
||||
vtkNew<vtkPolyDataMapper> posMapper;
|
||||
posMapper->SetInputConnection(posStripper->GetOutputPort());
|
||||
posMapper->ScalarVisibilityOff();
|
||||
|
||||
vtkNew<vtkActor> pos;
|
||||
pos->SetMapper(posMapper);
|
||||
pos->GetProperty()->SetDiffuseColor(colors->GetColor3d("posColor").GetData());
|
||||
pos->GetProperty()->SetSpecular(0.3);
|
||||
pos->GetProperty()->SetSpecularPower(20);
|
||||
pos->GetProperty()->SetOpacity(0.5);
|
||||
|
||||
// An isosurface, or contour value is set
|
||||
// The triangle stripper is used to create triangle strips from the
|
||||
// isosurface; these render much faster on may systems.
|
||||
vtkNew<isosurface> negExtractor;
|
||||
negExtractor->SetInputData(imageData);
|
||||
negExtractor->SetValue(0, -contour);
|
||||
|
||||
vtkNew<vtkStripper> negStripper;
|
||||
negStripper->SetInputConnection(negExtractor->GetOutputPort());
|
||||
|
||||
vtkNew<vtkPolyDataMapper> negMapper;
|
||||
negMapper->SetInputConnection(negStripper->GetOutputPort());
|
||||
negMapper->ScalarVisibilityOff();
|
||||
|
||||
vtkNew<vtkActor> neg;
|
||||
neg->SetMapper(negMapper);
|
||||
neg->GetProperty()->SetDiffuseColor(colors->GetColor3d("Ivory").GetData());
|
||||
|
||||
// An outline provides context around the data.
|
||||
vtkNew<vtkOutlineFilter> outlineData;
|
||||
outlineData->SetInputData(imageData);
|
||||
|
||||
vtkNew<vtkPolyDataMapper> mapOutline;
|
||||
mapOutline->SetInputConnection(outlineData->GetOutputPort());
|
||||
|
||||
vtkNew<vtkActor> outline;
|
||||
outline->SetMapper(mapOutline);
|
||||
outline->GetProperty()->SetColor(colors->GetColor3d("Black").GetData());
|
||||
|
||||
vtkNew<vtkTextActor> Text;
|
||||
Text->SetInput(file_list[f].c_str());
|
||||
Text->SetPosition2(0,0);
|
||||
Text->GetTextProperty()->SetFontSize(48);
|
||||
Text->GetTextProperty()->SetColor(colors->GetColor3d("Gold").GetData());
|
||||
|
||||
vtkNew<vtkTextActor> TextT;
|
||||
TextT->SetInput("T=0");
|
||||
TextT->SetPosition(0,.9*1025);
|
||||
TextT->GetTextProperty()->SetFontSize(48);
|
||||
TextT->GetTextProperty()->SetColor(colors->GetColor3d("Gold").GetData());
|
||||
|
||||
|
||||
// Actors are added to the renderer. An initial camera view is created.
|
||||
// The Dolly() method moves the camera towards the FocalPoint,
|
||||
// thereby enlarging the image.
|
||||
aRenderer->AddActor(Text);
|
||||
aRenderer->AddActor(TextT);
|
||||
aRenderer->AddActor(outline);
|
||||
aRenderer->AddActor(pos);
|
||||
aRenderer->AddActor(neg);
|
||||
|
||||
// Sign up to receive TimerEvent
|
||||
vtkNew<FrameUpdater> fu;
|
||||
fu->imageData = imageData;
|
||||
fu->grid_data = &data[f];
|
||||
fu->text = TextT;
|
||||
fu->maxCount = frameCount;
|
||||
fu->posExtractor = posExtractor;
|
||||
fu->negExtractor = negExtractor;
|
||||
fu->rms = rms;
|
||||
|
||||
iren->AddObserver(vtkCommand::TimerEvent, fu);
|
||||
|
||||
aRenderer->SetActiveCamera(aCamera);
|
||||
aRenderer->ResetCamera();
|
||||
aRenderer->SetBackground(colors->GetColor3d("BkgColor").GetData());
|
||||
aCamera->Dolly(1.0);
|
||||
|
||||
double nf = file_list.size();
|
||||
std::cout << " Adding renderer " <<f<<" of "<<nf<<std::endl;
|
||||
aRenderer->SetViewport((1.0/nf)*f, 0.0,(1.0/nf)*(f+1) , 1.0);
|
||||
|
||||
// Note that when camera movement occurs (as it does in the Dolly()
|
||||
// method), the clipping planes often need adjusting. Clipping planes
|
||||
// consist of two planes: near and far along the view direction. The
|
||||
// near plane clips out objects in front of the plane; the far plane
|
||||
// clips out objects behind the plane. This way only what is drawn
|
||||
// between the planes is actually rendered.
|
||||
aRenderer->ResetCameraClippingRange();
|
||||
|
||||
fu_list.push_back(fu);
|
||||
}
|
||||
|
||||
|
||||
// Set a background color for the renderer and set the size of the
|
||||
// render window (expressed in pixels).
|
||||
// Initialize the event loop and then start it.
|
||||
renWin->SetSize(1024*file_list.size(), 1024);
|
||||
renWin->SetWindowName("FieldDensity");
|
||||
renWin->Render();
|
||||
|
||||
iren->Initialize();
|
||||
|
||||
if ( mpeg ) {
|
||||
#ifdef MPEG
|
||||
vtkWindowToImageFilter *imageFilter = vtkWindowToImageFilter::New();
|
||||
imageFilter->SetInput( renWin );
|
||||
imageFilter->SetInputBufferTypeToRGB();
|
||||
|
||||
vtkFFMPEGWriter *writer = vtkFFMPEGWriter::New();
|
||||
writer->SetFileName("movie.avi");
|
||||
writer->SetRate(1);
|
||||
writer->SetInputConnection(imageFilter->GetOutputPort());
|
||||
writer->Start();
|
||||
|
||||
for(int i=0;i<fu_list[0]->maxCount;i++){
|
||||
for(int f=0;f<fu_list.size();f++){
|
||||
fu_list[f]->Execute(iren,vtkCommand::TimerEvent,nullptr);
|
||||
}
|
||||
imageFilter->Modified();
|
||||
writer->Write();
|
||||
}
|
||||
writer->End();
|
||||
writer->Delete();
|
||||
#else
|
||||
assert(-1 && "MPEG support not compiled");
|
||||
#endif
|
||||
} else {
|
||||
|
||||
// Add control of contour threshold
|
||||
// Create a slider widget
|
||||
vtkSmartPointer<vtkSliderRepresentation2D> sliderRep = vtkSmartPointer<vtkSliderRepresentation2D>::New();
|
||||
sliderRep->SetMinimumValue(0.1);
|
||||
sliderRep->SetMaximumValue(5.0);
|
||||
sliderRep->SetValue(1.0);
|
||||
sliderRep->SetTitleText("Fraction RMS");
|
||||
// Set color properties:
|
||||
|
||||
// Change the color of the knob that slides
|
||||
// sliderRep->GetSliderProperty()->SetColor(colors->GetColor3d("Green").GetData());
|
||||
sliderRep->GetTitleProperty()->SetColor(colors->GetColor3d("AliceBlue").GetData());
|
||||
sliderRep->GetLabelProperty()->SetColor(colors->GetColor3d("AliceBlue").GetData());
|
||||
sliderRep->GetSelectedProperty()->SetColor(colors->GetColor3d("DeepPink").GetData());
|
||||
|
||||
// Change the color of the bar
|
||||
sliderRep->GetTubeProperty()->SetColor(colors->GetColor3d("MistyRose").GetData());
|
||||
sliderRep->GetCapProperty()->SetColor(colors->GetColor3d("Yellow").GetData());
|
||||
sliderRep->SetSliderLength(0.05);
|
||||
sliderRep->SetSliderWidth(0.025);
|
||||
sliderRep->SetEndCapLength(0.02);
|
||||
|
||||
double nf = file_list.size();
|
||||
sliderRep->GetPoint1Coordinate()->SetCoordinateSystemToNormalizedDisplay();
|
||||
sliderRep->GetPoint1Coordinate()->SetValue(0.1, 0.1);
|
||||
sliderRep->GetPoint2Coordinate()->SetCoordinateSystemToNormalizedDisplay();
|
||||
sliderRep->GetPoint2Coordinate()->SetValue(0.9/nf, 0.1);
|
||||
|
||||
vtkSmartPointer<vtkSliderWidget> sliderWidget = vtkSmartPointer<vtkSliderWidget>::New();
|
||||
sliderWidget->SetInteractor(iren);
|
||||
sliderWidget->SetRepresentation(sliderRep);
|
||||
sliderWidget->SetAnimationModeToAnimate();
|
||||
sliderWidget->EnabledOn();
|
||||
|
||||
// Create the slider callback
|
||||
vtkSmartPointer<SliderCallback> slidercallback = vtkSmartPointer<SliderCallback>::New();
|
||||
slidercallback->fu_list = fu_list;
|
||||
sliderWidget->AddObserver(vtkCommand::InteractionEvent, slidercallback);
|
||||
|
||||
int timerId = iren->CreateRepeatingTimer(300);
|
||||
std::cout << "timerId: " << timerId << std::endl;
|
||||
|
||||
// Start the interaction and timer
|
||||
iren->Start();
|
||||
}
|
||||
|
||||
Grid_finalize();
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
}
|
113
visualisation/README
Normal file
113
visualisation/README
Normal file
@ -0,0 +1,113 @@
|
||||
========================================
|
||||
Visualisation of Grid / SciDAC format density fields using VTK (visualisation toolkit). Peter Boyle, 2025.
|
||||
========================================
|
||||
|
||||
Uses:
|
||||
|
||||
https://vtk.org
|
||||
|
||||
Files are, for example, those produced by
|
||||
Grid/HMC/ComputeWilsonFlow.cc
|
||||
and
|
||||
Grid/HMC/site_plaquette.cc
|
||||
|
||||
========================================
|
||||
Prerequisites:
|
||||
========================================
|
||||
|
||||
|
||||
1) Install ffmpeg-7.0.2 (developer install, includes headers and libraries).
|
||||
MacOS note: must install ffmpeg from source -- homebrew only installs binaries.
|
||||
|
||||
https://ffmpeg.org/download.html#releases
|
||||
|
||||
|
||||
Note: the latest ffmpeg (7.1.1) broke software compatibility with VTK.
|
||||
|
||||
|
||||
2) Build and install VTK-9.4.2, with FFMEG support enabled.
|
||||
|
||||
This is particularly involved on MacOS, so documented here.
|
||||
|
||||
cd VTK-9.4.2
|
||||
mkdir build
|
||||
cd build
|
||||
ccmake ..
|
||||
|
||||
|
||||
Using ccmake editor, set:
|
||||
|
||||
FFMPEG_DIR /usr/local
|
||||
|
||||
Toggle "advanced mode" (t)
|
||||
|
||||
Set:
|
||||
CMAKE_EXE_LINKER_FLAGS
|
||||
CMAKE_MODULE_LINKER_FLAGS
|
||||
CMAKE_SHARED_LINKER_FLAGS
|
||||
each to:
|
||||
|
||||
-framework Foundation -framework AudioToolbox -framework CoreAudio -liconv -lm -framework AVFoundation -framework CoreVideo -framework CoreMedia -framework CoreGraphics -framework AudioToolbox -framework OpenGL -framework OpenGL -framework VideoToolbox -framework CoreImage -framework AppKit -framework CoreFoundation -framework CoreServices -lz -lbz2 -Wl,-framework,CoreFoundation -Wl,-framework,Security -L/usr/local/lib -lavdevice -lavfilter -lavformat -lavcodec -lswresample -lswscale -lavutil
|
||||
|
||||
Set paths for each of
|
||||
FFMPEG_DIR /usr/local
|
||||
FFMPEG_avcodec_INCLUDE_DIR /usr/local/include
|
||||
FFMPEG_avcodec_LIBRARY /usr/local/lib/libavcodec.a
|
||||
FFMPEG_avdevice_INCLUDE_DIR /usr/local/include
|
||||
FFMPEG_avdevice_LIBRARY /usr/local/lib/libavdevice.a
|
||||
FFMPEG_avfilter_INCLUDE_DIR /usr/local/include
|
||||
FFMPEG_avfilter_LIBRARY /usr/local/lib/libavfilter.a
|
||||
FFMPEG_avformat_INCLUDE_DIR /usr/local/include
|
||||
FFMPEG_avformat_LIBRARY /usr/local/lib/libavformat.a
|
||||
FFMPEG_avresample_INCLUDE_DIR /usr/local/include
|
||||
FFMPEG_avresample_LIBRARY /usr/local/lib/libavresample.a
|
||||
FFMPEG_avutil_INCLUDE_DIR /usr/local/include
|
||||
FFMPEG_avutil_LIBRARY /usr/local/lib/libavutil.a
|
||||
FFMPEG_swresample_INCLUDE_DIR /usr/local/include
|
||||
FFMPEG_swresample_LIBRARY /usr/local/lib/libswresample.a
|
||||
FFMPEG_swscale_INCLUDE_DIR /usr/local/include
|
||||
FFMPEG_swscale_LIBRARY /usr/local/lib/libswscale.a
|
||||
|
||||
VTK_MODULE_ENABLE_VTK_IOFFMPEG YES
|
||||
|
||||
|
||||
VTK really should make it easier to pick up the flags required for FFMPEG linkage, especially as they are very quirky on MacOS.
|
||||
|
||||
|
||||
========================================
|
||||
Grid:
|
||||
========================================
|
||||
|
||||
3) Build and install a version of Grid
|
||||
|
||||
4) Ensure "grid-config" is in your path.
|
||||
|
||||
5) cd Grid/visualisation/
|
||||
libs=`grid-config --libs`
|
||||
ldflags=`grid-config --ldflags`
|
||||
cxxflags=`grid-config --cxxflags`
|
||||
cxx=`grid-config --cxx`
|
||||
|
||||
mkdir build
|
||||
cd build
|
||||
|
||||
LDFLAGS="$ldflags $libs " cmake .. -DCMAKE_CXX_COMPILER=$cxx -DCMAKE_CXX_FLAGS=$cxxflags
|
||||
|
||||
make
|
||||
|
||||
6) Invoke as:
|
||||
|
||||
FieldDensityAnimate --isosurface <float-from-0-to-5> --grid X.Y.Z.T --file1 SciDacDensityFile1 [--xlate] [--mpeg]
|
||||
FieldDensityAnimate --isosurface <float-from-0-to-5> --grid X.Y.Z.T --file1 SciDacDensityFile1 --file2 SciDacDensityFile2 [--xlate] [--mpeg]
|
||||
|
||||
==================================
|
||||
Extensions
|
||||
==================================
|
||||
|
||||
7) Direct calling from Grid ?:
|
||||
|
||||
Not yet implemented, but could develop sufficient interface to write a Lattice scalar field into MPEG direct from running code.
|
||||
|
||||
8) Example python code: FieldDensity.py . This is not interfaced to Grid.
|
||||
|
||||
|
BIN
visualisation/Topo-vs-flowtime.avi
Normal file
BIN
visualisation/Topo-vs-flowtime.avi
Normal file
Binary file not shown.
9
visualisation/cmake-command
Normal file
9
visualisation/cmake-command
Normal file
@ -0,0 +1,9 @@
|
||||
libs=`grid-config --libs`
|
||||
ldflags=`grid-config --ldflags`
|
||||
cxxflags=`grid-config --cxxflags`
|
||||
cxx=`grid-config --cxx`
|
||||
|
||||
mkdir build
|
||||
cd build
|
||||
|
||||
LDFLAGS="$ldflags $libs " cmake .. -DCMAKE_CXX_COMPILER=$cxx -DCMAKE_CXX_FLAGS=$cxxflags
|
Reference in New Issue
Block a user