mirror of
https://github.com/paboyle/Grid.git
synced 2025-04-03 02:35:55 +01:00
Merge branch 'develop' into feature/hadrons
This commit is contained in:
commit
ca639c195f
2
VERSION
2
VERSION
@ -1,4 +1,4 @@
|
|||||||
Version : 0.7.0
|
Version : 0.8.0
|
||||||
|
|
||||||
- Clang 3.5 and above, ICPC v16 and above, GCC 6.3 and above recommended
|
- Clang 3.5 and above, ICPC v16 and above, GCC 6.3 and above recommended
|
||||||
- MPI and MPI3 comms optimisations for KNL and OPA finished
|
- MPI and MPI3 comms optimisations for KNL and OPA finished
|
||||||
|
@ -158,8 +158,10 @@ public:
|
|||||||
|
|
||||||
dbytes=0;
|
dbytes=0;
|
||||||
ncomm=0;
|
ncomm=0;
|
||||||
|
#ifdef GRID_OMP
|
||||||
parallel_for(int dir=0;dir<8;dir++){
|
#pragma omp parallel for num_threads(Grid::CartesianCommunicator::nCommThreads)
|
||||||
|
#endif
|
||||||
|
for(int dir=0;dir<8;dir++){
|
||||||
|
|
||||||
double tbytes;
|
double tbytes;
|
||||||
int mu =dir % 4;
|
int mu =dir % 4;
|
||||||
@ -175,9 +177,14 @@ public:
|
|||||||
int comm_proc = mpi_layout[mu]-1;
|
int comm_proc = mpi_layout[mu]-1;
|
||||||
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
|
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
|
||||||
}
|
}
|
||||||
|
#ifdef GRID_OMP
|
||||||
|
int tid = omp_get_thread_num();
|
||||||
|
#else
|
||||||
|
int tid = dir;
|
||||||
|
#endif
|
||||||
tbytes= Grid.StencilSendToRecvFrom((void *)&xbuf[dir][0], xmit_to_rank,
|
tbytes= Grid.StencilSendToRecvFrom((void *)&xbuf[dir][0], xmit_to_rank,
|
||||||
(void *)&rbuf[dir][0], recv_from_rank,
|
(void *)&rbuf[dir][0], recv_from_rank,
|
||||||
bytes,dir);
|
bytes,tid);
|
||||||
|
|
||||||
#ifdef GRID_OMP
|
#ifdef GRID_OMP
|
||||||
#pragma omp atomic
|
#pragma omp atomic
|
||||||
|
@ -169,7 +169,11 @@ int main (int argc, char ** argv)
|
|||||||
for(int lat=4;lat<=maxlat;lat+=4){
|
for(int lat=4;lat<=maxlat;lat+=4){
|
||||||
for(int Ls=8;Ls<=8;Ls*=2){
|
for(int Ls=8;Ls<=8;Ls*=2){
|
||||||
|
|
||||||
std::vector<int> latt_size ({lat,lat,lat,lat});
|
std::vector<int> latt_size ({lat*mpi_layout[0],
|
||||||
|
lat*mpi_layout[1],
|
||||||
|
lat*mpi_layout[2],
|
||||||
|
lat*mpi_layout[3]});
|
||||||
|
|
||||||
|
|
||||||
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||||
RealD Nrank = Grid._Nprocessors;
|
RealD Nrank = Grid._Nprocessors;
|
||||||
@ -485,7 +489,8 @@ int main (int argc, char ** argv)
|
|||||||
dbytes=0;
|
dbytes=0;
|
||||||
ncomm=0;
|
ncomm=0;
|
||||||
|
|
||||||
parallel_for(int dir=0;dir<8;dir++){
|
#pragma omp parallel for num_threads(Grid::CartesianCommunicator::nCommThreads)
|
||||||
|
for(int dir=0;dir<8;dir++){
|
||||||
|
|
||||||
double tbytes;
|
double tbytes;
|
||||||
int mu =dir % 4;
|
int mu =dir % 4;
|
||||||
@ -502,9 +507,9 @@ int main (int argc, char ** argv)
|
|||||||
int comm_proc = mpi_layout[mu]-1;
|
int comm_proc = mpi_layout[mu]-1;
|
||||||
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
|
Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
|
||||||
}
|
}
|
||||||
|
int tid = omp_get_thread_num();
|
||||||
tbytes= Grid.StencilSendToRecvFrom((void *)&xbuf[dir][0], xmit_to_rank,
|
tbytes= Grid.StencilSendToRecvFrom((void *)&xbuf[dir][0], xmit_to_rank,
|
||||||
(void *)&rbuf[dir][0], recv_from_rank, bytes,dir);
|
(void *)&rbuf[dir][0], recv_from_rank, bytes,tid);
|
||||||
|
|
||||||
#pragma omp atomic
|
#pragma omp atomic
|
||||||
dbytes+=tbytes;
|
dbytes+=tbytes;
|
||||||
|
@ -55,7 +55,7 @@ int main (int argc, char ** argv)
|
|||||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||||
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl;
|
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl;
|
||||||
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
|
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
|
||||||
uint64_t lmax=96;
|
uint64_t lmax=64;
|
||||||
#define NLOOP (10*lmax*lmax*lmax*lmax/vol)
|
#define NLOOP (10*lmax*lmax*lmax*lmax/vol)
|
||||||
for(int lat=8;lat<=lmax;lat+=8){
|
for(int lat=8;lat<=lmax;lat+=8){
|
||||||
|
|
||||||
|
@ -36,7 +36,7 @@ int main (int argc, char ** argv)
|
|||||||
{
|
{
|
||||||
Grid_init(&argc,&argv);
|
Grid_init(&argc,&argv);
|
||||||
#define LMAX (32)
|
#define LMAX (32)
|
||||||
#define LMIN (4)
|
#define LMIN (16)
|
||||||
#define LINC (4)
|
#define LINC (4)
|
||||||
|
|
||||||
int64_t Nloop=2000;
|
int64_t Nloop=2000;
|
||||||
@ -46,14 +46,14 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
int64_t threads = GridThread::GetThreads();
|
int64_t threads = GridThread::GetThreads();
|
||||||
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
|
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
|
||||||
#if 1
|
|
||||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||||
std::cout<<GridLogMessage << "= Benchmarking SU3xSU3 x= x*y"<<std::endl;
|
std::cout<<GridLogMessage << "= Benchmarking SU3xSU3 x= x*y"<<std::endl;
|
||||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||||
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
|
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
|
||||||
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
|
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
|
||||||
|
|
||||||
for(int lat=4;lat<=LMAX;lat+=LINC){
|
for(int lat=LMIN;lat<=LMAX;lat+=LINC){
|
||||||
|
|
||||||
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
||||||
int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||||
@ -85,7 +85,7 @@ int main (int argc, char ** argv)
|
|||||||
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
|
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
|
||||||
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
|
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
|
||||||
|
|
||||||
for(int lat=2;lat<=LMAX;lat+=LINC){
|
for(int lat=LMIN;lat<=LMAX;lat+=LINC){
|
||||||
|
|
||||||
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
||||||
int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||||
@ -116,7 +116,7 @@ int main (int argc, char ** argv)
|
|||||||
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
|
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
|
||||||
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
|
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
|
||||||
|
|
||||||
for(int lat=2;lat<=LMAX;lat+=LINC){
|
for(int lat=LMIN;lat<=LMAX;lat+=LINC){
|
||||||
|
|
||||||
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
||||||
int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||||
@ -147,7 +147,7 @@ int main (int argc, char ** argv)
|
|||||||
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
|
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
|
||||||
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
|
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
|
||||||
|
|
||||||
for(int lat=2;lat<=LMAX;lat+=LINC){
|
for(int lat=LMIN;lat<=LMAX;lat+=LINC){
|
||||||
|
|
||||||
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
||||||
int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||||
@ -171,8 +171,6 @@ int main (int argc, char ** argv)
|
|||||||
std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t" << flops/time<<std::endl;
|
std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t" << flops/time<<std::endl;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||||
std::cout<<GridLogMessage << "= Benchmarking SU3xSU3 CovShiftForward(z,x,y)"<<std::endl;
|
std::cout<<GridLogMessage << "= Benchmarking SU3xSU3 CovShiftForward(z,x,y)"<<std::endl;
|
||||||
@ -206,6 +204,50 @@ int main (int argc, char ** argv)
|
|||||||
std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t" << flops/time<<std::endl;
|
std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t" << flops/time<<std::endl;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#if 1
|
||||||
|
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "= Benchmarking SU3xSU3 z= x * Cshift(y)"<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << " L "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
|
||||||
|
std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
|
||||||
|
|
||||||
|
for(int lat=LMIN;lat<=LMAX;lat+=LINC){
|
||||||
|
std::vector<int> latt_size ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
|
||||||
|
int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||||
|
|
||||||
|
GridCartesian Grid(latt_size,simd_layout,mpi_layout);
|
||||||
|
GridParallelRNG pRNG(&Grid); pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
|
||||||
|
|
||||||
|
LatticeColourMatrix z(&Grid); random(pRNG,z);
|
||||||
|
LatticeColourMatrix x(&Grid); random(pRNG,x);
|
||||||
|
LatticeColourMatrix y(&Grid); random(pRNG,y);
|
||||||
|
LatticeColourMatrix tmp(&Grid);
|
||||||
|
|
||||||
|
for(int mu=0;mu<4;mu++){
|
||||||
|
double tshift=0;
|
||||||
|
double tmult =0;
|
||||||
|
|
||||||
|
double start=usecond();
|
||||||
|
for(int64_t i=0;i<Nloop;i++){
|
||||||
|
tshift-=usecond();
|
||||||
|
tmp = Cshift(y,mu,-1);
|
||||||
|
tshift+=usecond();
|
||||||
|
tmult-=usecond();
|
||||||
|
z = x*tmp;
|
||||||
|
tmult+=usecond();
|
||||||
|
}
|
||||||
|
double stop=usecond();
|
||||||
|
double time = (stop-start)/Nloop;
|
||||||
|
tshift = tshift/Nloop;
|
||||||
|
tmult = tmult /Nloop;
|
||||||
|
|
||||||
|
double bytes=3*vol*Nc*Nc*sizeof(Complex);
|
||||||
|
double flops=Nc*Nc*(6+8+8)*vol;
|
||||||
|
std::cout<<GridLogMessage<<std::setprecision(3) << "total us "<<time<<" shift "<<tshift <<" mult "<<tmult<<std::endl;
|
||||||
|
time = time * 1000; // convert to NS for GB/s
|
||||||
|
std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<" \t\t"<<bytes/time<<"\t\t" << flops/time<<std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
Grid_finalize();
|
Grid_finalize();
|
||||||
}
|
}
|
||||||
|
@ -479,15 +479,13 @@ until convergence
|
|||||||
Field B(grid); B.checkerboard = evec[0].checkerboard;
|
Field B(grid); B.checkerboard = evec[0].checkerboard;
|
||||||
|
|
||||||
// power of two search pattern; not every evalue in eval2 is assessed.
|
// power of two search pattern; not every evalue in eval2 is assessed.
|
||||||
|
int allconv =1;
|
||||||
for(int jj = 1; jj<=Nstop; jj*=2){
|
for(int jj = 1; jj<=Nstop; jj*=2){
|
||||||
int j = Nstop-jj;
|
int j = Nstop-jj;
|
||||||
RealD e = eval2_copy[j]; // Discard the evalue
|
RealD e = eval2_copy[j]; // Discard the evalue
|
||||||
basisRotateJ(B,evec,Qt,j,0,Nk,Nm);
|
basisRotateJ(B,evec,Qt,j,0,Nk,Nm);
|
||||||
if( _Tester.TestConvergence(j,eresid,B,e,evalMaxApprox) ) {
|
if( !_Tester.TestConvergence(j,eresid,B,e,evalMaxApprox) ) {
|
||||||
if ( j > Nconv ) {
|
allconv=0;
|
||||||
Nconv=j+1;
|
|
||||||
jj=Nstop; // Terminate the scan
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Do evec[0] for good measure
|
// Do evec[0] for good measure
|
||||||
@ -495,8 +493,10 @@ until convergence
|
|||||||
int j=0;
|
int j=0;
|
||||||
RealD e = eval2_copy[0];
|
RealD e = eval2_copy[0];
|
||||||
basisRotateJ(B,evec,Qt,j,0,Nk,Nm);
|
basisRotateJ(B,evec,Qt,j,0,Nk,Nm);
|
||||||
_Tester.TestConvergence(j,eresid,B,e,evalMaxApprox);
|
if( !_Tester.TestConvergence(j,eresid,B,e,evalMaxApprox) ) allconv=0;
|
||||||
}
|
}
|
||||||
|
if ( allconv ) Nconv = Nstop;
|
||||||
|
|
||||||
// test if we converged, if so, terminate
|
// test if we converged, if so, terminate
|
||||||
std::cout<<GridLogIRL<<" #modes converged: >= "<<Nconv<<"/"<<Nstop<<std::endl;
|
std::cout<<GridLogIRL<<" #modes converged: >= "<<Nconv<<"/"<<Nstop<<std::endl;
|
||||||
// if( Nconv>=Nstop || beta_k < betastp){
|
// if( Nconv>=Nstop || beta_k < betastp){
|
||||||
|
@ -48,6 +48,7 @@ struct LanczosParams : Serializable {
|
|||||||
struct LocalCoherenceLanczosParams : Serializable {
|
struct LocalCoherenceLanczosParams : Serializable {
|
||||||
public:
|
public:
|
||||||
GRID_SERIALIZABLE_CLASS_MEMBERS(LocalCoherenceLanczosParams,
|
GRID_SERIALIZABLE_CLASS_MEMBERS(LocalCoherenceLanczosParams,
|
||||||
|
bool, saveEvecs,
|
||||||
bool, doFine,
|
bool, doFine,
|
||||||
bool, doFineRead,
|
bool, doFineRead,
|
||||||
bool, doCoarse,
|
bool, doCoarse,
|
||||||
|
@ -277,7 +277,9 @@ public:
|
|||||||
uint8_t *cp = (uint8_t *)ptr;
|
uint8_t *cp = (uint8_t *)ptr;
|
||||||
if ( ptr ) {
|
if ( ptr ) {
|
||||||
// One touch per 4k page, static OMP loop to catch same loop order
|
// One touch per 4k page, static OMP loop to catch same loop order
|
||||||
|
#ifdef GRID_OMP
|
||||||
#pragma omp parallel for schedule(static)
|
#pragma omp parallel for schedule(static)
|
||||||
|
#endif
|
||||||
for(size_type n=0;n<bytes;n+=4096){
|
for(size_type n=0;n<bytes;n+=4096){
|
||||||
cp[n]=0;
|
cp[n]=0;
|
||||||
}
|
}
|
||||||
|
@ -45,31 +45,33 @@ Gather_plane_simple (const Lattice<vobj> &rhs,commVector<vobj> &buffer,int dimen
|
|||||||
int so=plane*rhs._grid->_ostride[dimension]; // base offset for start of plane
|
int so=plane*rhs._grid->_ostride[dimension]; // base offset for start of plane
|
||||||
int e1=rhs._grid->_slice_nblock[dimension];
|
int e1=rhs._grid->_slice_nblock[dimension];
|
||||||
int e2=rhs._grid->_slice_block[dimension];
|
int e2=rhs._grid->_slice_block[dimension];
|
||||||
|
int ent = 0;
|
||||||
|
|
||||||
|
static std::vector<std::pair<int,int> > table; table.resize(e1*e2);
|
||||||
|
|
||||||
int stride=rhs._grid->_slice_stride[dimension];
|
int stride=rhs._grid->_slice_stride[dimension];
|
||||||
if ( cbmask == 0x3 ) {
|
if ( cbmask == 0x3 ) {
|
||||||
parallel_for_nest2(int n=0;n<e1;n++){
|
for(int n=0;n<e1;n++){
|
||||||
for(int b=0;b<e2;b++){
|
for(int b=0;b<e2;b++){
|
||||||
int o = n*stride;
|
int o = n*stride;
|
||||||
int bo = n*e2;
|
int bo = n*e2;
|
||||||
buffer[off+bo+b]=rhs._odata[so+o+b];
|
table[ent++] = std::pair<int,int>(off+bo+b,so+o+b);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
int bo=0;
|
int bo=0;
|
||||||
std::vector<std::pair<int,int> > table;
|
|
||||||
for(int n=0;n<e1;n++){
|
for(int n=0;n<e1;n++){
|
||||||
for(int b=0;b<e2;b++){
|
for(int b=0;b<e2;b++){
|
||||||
int o = n*stride;
|
int o = n*stride;
|
||||||
int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);
|
int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);
|
||||||
if ( ocb &cbmask ) {
|
if ( ocb &cbmask ) {
|
||||||
table.push_back(std::pair<int,int> (bo++,o+b));
|
table[ent++]=std::pair<int,int> (off+bo++,so+o+b);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
parallel_for(int i=0;i<table.size();i++){
|
}
|
||||||
buffer[off+table[i].first]=rhs._odata[so+table[i].second];
|
parallel_for(int i=0;i<ent;i++){
|
||||||
}
|
buffer[table[i].first]=rhs._odata[table[i].second];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -140,31 +142,35 @@ template<class vobj> void Scatter_plane_simple (Lattice<vobj> &rhs,commVector<vo
|
|||||||
int e1=rhs._grid->_slice_nblock[dimension];
|
int e1=rhs._grid->_slice_nblock[dimension];
|
||||||
int e2=rhs._grid->_slice_block[dimension];
|
int e2=rhs._grid->_slice_block[dimension];
|
||||||
int stride=rhs._grid->_slice_stride[dimension];
|
int stride=rhs._grid->_slice_stride[dimension];
|
||||||
|
|
||||||
|
static std::vector<std::pair<int,int> > table; table.resize(e1*e2);
|
||||||
|
int ent =0;
|
||||||
|
|
||||||
if ( cbmask ==0x3 ) {
|
if ( cbmask ==0x3 ) {
|
||||||
parallel_for_nest2(int n=0;n<e1;n++){
|
|
||||||
|
for(int n=0;n<e1;n++){
|
||||||
for(int b=0;b<e2;b++){
|
for(int b=0;b<e2;b++){
|
||||||
int o =n*rhs._grid->_slice_stride[dimension];
|
int o =n*rhs._grid->_slice_stride[dimension];
|
||||||
int bo =n*rhs._grid->_slice_block[dimension];
|
int bo =n*rhs._grid->_slice_block[dimension];
|
||||||
rhs._odata[so+o+b]=buffer[bo+b];
|
table[ent++] = std::pair<int,int>(so+o+b,bo+b);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
std::vector<std::pair<int,int> > table;
|
|
||||||
int bo=0;
|
int bo=0;
|
||||||
for(int n=0;n<e1;n++){
|
for(int n=0;n<e1;n++){
|
||||||
for(int b=0;b<e2;b++){
|
for(int b=0;b<e2;b++){
|
||||||
int o =n*rhs._grid->_slice_stride[dimension];
|
int o =n*rhs._grid->_slice_stride[dimension];
|
||||||
int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);// Could easily be a table lookup
|
int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);// Could easily be a table lookup
|
||||||
if ( ocb & cbmask ) {
|
if ( ocb & cbmask ) {
|
||||||
table.push_back(std::pair<int,int> (so+o+b,bo++));
|
table[ent++]=std::pair<int,int> (so+o+b,bo++);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
parallel_for(int i=0;i<table.size();i++){
|
}
|
||||||
// std::cout << "Rcv"<< table[i].first << " " << table[i].second << " " <<buffer[table[i].second]<<std::endl;
|
|
||||||
rhs._odata[table[i].first]=buffer[table[i].second];
|
parallel_for(int i=0;i<ent;i++){
|
||||||
}
|
rhs._odata[table[i].first]=buffer[table[i].second];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -228,29 +234,32 @@ template<class vobj> void Copy_plane(Lattice<vobj>& lhs,const Lattice<vobj> &rhs
|
|||||||
int e1=rhs._grid->_slice_nblock[dimension]; // clearly loop invariant for icpc
|
int e1=rhs._grid->_slice_nblock[dimension]; // clearly loop invariant for icpc
|
||||||
int e2=rhs._grid->_slice_block[dimension];
|
int e2=rhs._grid->_slice_block[dimension];
|
||||||
int stride = rhs._grid->_slice_stride[dimension];
|
int stride = rhs._grid->_slice_stride[dimension];
|
||||||
|
static std::vector<std::pair<int,int> > table; table.resize(e1*e2);
|
||||||
|
int ent=0;
|
||||||
|
|
||||||
if(cbmask == 0x3 ){
|
if(cbmask == 0x3 ){
|
||||||
parallel_for_nest2(int n=0;n<e1;n++){
|
for(int n=0;n<e1;n++){
|
||||||
for(int b=0;b<e2;b++){
|
for(int b=0;b<e2;b++){
|
||||||
|
|
||||||
int o =n*stride+b;
|
int o =n*stride+b;
|
||||||
//lhs._odata[lo+o]=rhs._odata[ro+o];
|
table[ent++] = std::pair<int,int>(lo+o,ro+o);
|
||||||
vstream(lhs._odata[lo+o],rhs._odata[ro+o]);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
parallel_for_nest2(int n=0;n<e1;n++){
|
for(int n=0;n<e1;n++){
|
||||||
for(int b=0;b<e2;b++){
|
for(int b=0;b<e2;b++){
|
||||||
|
|
||||||
int o =n*stride+b;
|
int o =n*stride+b;
|
||||||
int ocb=1<<lhs._grid->CheckerBoardFromOindex(o);
|
int ocb=1<<lhs._grid->CheckerBoardFromOindex(o);
|
||||||
if ( ocb&cbmask ) {
|
if ( ocb&cbmask ) {
|
||||||
//lhs._odata[lo+o]=rhs._odata[ro+o];
|
table[ent++] = std::pair<int,int>(lo+o,ro+o);
|
||||||
vstream(lhs._odata[lo+o],rhs._odata[ro+o]);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
parallel_for(int i=0;i<ent;i++){
|
||||||
|
lhs._odata[table[i].first]=rhs._odata[table[i].second];
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class vobj> void Copy_plane_permute(Lattice<vobj>& lhs,const Lattice<vobj> &rhs, int dimension,int lplane,int rplane,int cbmask,int permute_type)
|
template<class vobj> void Copy_plane_permute(Lattice<vobj>& lhs,const Lattice<vobj> &rhs, int dimension,int lplane,int rplane,int cbmask,int permute_type)
|
||||||
@ -269,16 +278,28 @@ template<class vobj> void Copy_plane_permute(Lattice<vobj>& lhs,const Lattice<vo
|
|||||||
int e2=rhs._grid->_slice_block [dimension];
|
int e2=rhs._grid->_slice_block [dimension];
|
||||||
int stride = rhs._grid->_slice_stride[dimension];
|
int stride = rhs._grid->_slice_stride[dimension];
|
||||||
|
|
||||||
parallel_for_nest2(int n=0;n<e1;n++){
|
static std::vector<std::pair<int,int> > table; table.resize(e1*e2);
|
||||||
for(int b=0;b<e2;b++){
|
int ent=0;
|
||||||
|
|
||||||
|
double t_tab,t_perm;
|
||||||
|
if ( cbmask == 0x3 ) {
|
||||||
|
for(int n=0;n<e1;n++){
|
||||||
|
for(int b=0;b<e2;b++){
|
||||||
|
int o =n*stride;
|
||||||
|
table[ent++] = std::pair<int,int>(lo+o+b,ro+o+b);
|
||||||
|
}}
|
||||||
|
} else {
|
||||||
|
for(int n=0;n<e1;n++){
|
||||||
|
for(int b=0;b<e2;b++){
|
||||||
int o =n*stride;
|
int o =n*stride;
|
||||||
int ocb=1<<lhs._grid->CheckerBoardFromOindex(o+b);
|
int ocb=1<<lhs._grid->CheckerBoardFromOindex(o+b);
|
||||||
if ( ocb&cbmask ) {
|
if ( ocb&cbmask ) table[ent++] = std::pair<int,int>(lo+o+b,ro+o+b);
|
||||||
permute(lhs._odata[lo+o+b],rhs._odata[ro+o+b],permute_type);
|
}}
|
||||||
}
|
}
|
||||||
|
|
||||||
}}
|
parallel_for(int i=0;i<ent;i++){
|
||||||
|
permute(lhs._odata[table[i].first],rhs._odata[table[i].second],permute_type);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////
|
||||||
@ -291,6 +312,8 @@ template<class vobj> void Cshift_local(Lattice<vobj>& ret,const Lattice<vobj> &r
|
|||||||
sshift[0] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Even);
|
sshift[0] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Even);
|
||||||
sshift[1] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Odd);
|
sshift[1] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Odd);
|
||||||
|
|
||||||
|
double t_local;
|
||||||
|
|
||||||
if ( sshift[0] == sshift[1] ) {
|
if ( sshift[0] == sshift[1] ) {
|
||||||
Cshift_local(ret,rhs,dimension,shift,0x3);
|
Cshift_local(ret,rhs,dimension,shift,0x3);
|
||||||
} else {
|
} else {
|
||||||
@ -299,7 +322,7 @@ template<class vobj> void Cshift_local(Lattice<vobj>& ret,const Lattice<vobj> &r
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class vobj> Lattice<vobj> Cshift_local(Lattice<vobj> &ret,const Lattice<vobj> &rhs,int dimension,int shift,int cbmask)
|
template<class vobj> void Cshift_local(Lattice<vobj> &ret,const Lattice<vobj> &rhs,int dimension,int shift,int cbmask)
|
||||||
{
|
{
|
||||||
GridBase *grid = rhs._grid;
|
GridBase *grid = rhs._grid;
|
||||||
int fd = grid->_fdimensions[dimension];
|
int fd = grid->_fdimensions[dimension];
|
||||||
@ -325,11 +348,7 @@ template<class vobj> Lattice<vobj> Cshift_local(Lattice<vobj> &ret,const Lattice
|
|||||||
|
|
||||||
int sshift = grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,cb);
|
int sshift = grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,cb);
|
||||||
int sx = (x+sshift)%rd;
|
int sx = (x+sshift)%rd;
|
||||||
|
|
||||||
// FIXME : This must change where we have a
|
|
||||||
// Rotate slice.
|
|
||||||
|
|
||||||
// Document how this works ; why didn't I do this when I first wrote it...
|
|
||||||
// wrap is whether sshift > rd.
|
// wrap is whether sshift > rd.
|
||||||
// num is sshift mod rd.
|
// num is sshift mod rd.
|
||||||
//
|
//
|
||||||
@ -365,10 +384,8 @@ template<class vobj> Lattice<vobj> Cshift_local(Lattice<vobj> &ret,const Lattice
|
|||||||
|
|
||||||
if ( permute_slice ) Copy_plane_permute(ret,rhs,dimension,x,sx,cbmask,permute_type_dist);
|
if ( permute_slice ) Copy_plane_permute(ret,rhs,dimension,x,sx,cbmask,permute_type_dist);
|
||||||
else Copy_plane(ret,rhs,dimension,x,sx,cbmask);
|
else Copy_plane(ret,rhs,dimension,x,sx,cbmask);
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
return ret;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -256,13 +256,42 @@ public:
|
|||||||
_odata[ss]=r._odata[ss];
|
_odata[ss]=r._odata[ss];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Lattice(Lattice&& r){ // move constructor
|
Lattice(Lattice&& r){ // move constructor
|
||||||
_grid = r._grid;
|
_grid = r._grid;
|
||||||
checkerboard = r.checkerboard;
|
checkerboard = r.checkerboard;
|
||||||
_odata=std::move(r._odata);
|
_odata=std::move(r._odata);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline Lattice<vobj> & operator = (Lattice<vobj> && r)
|
||||||
|
{
|
||||||
|
_grid = r._grid;
|
||||||
|
checkerboard = r.checkerboard;
|
||||||
|
_odata =std::move(r._odata);
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline Lattice<vobj> & operator = (const Lattice<vobj> & r){
|
||||||
|
_grid = r._grid;
|
||||||
|
checkerboard = r.checkerboard;
|
||||||
|
_odata.resize(_grid->oSites());// essential
|
||||||
|
|
||||||
|
parallel_for(int ss=0;ss<_grid->oSites();ss++){
|
||||||
|
_odata[ss]=r._odata[ss];
|
||||||
|
}
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class robj> strong_inline Lattice<vobj> & operator = (const Lattice<robj> & r){
|
||||||
|
this->checkerboard = r.checkerboard;
|
||||||
|
conformable(*this,r);
|
||||||
|
|
||||||
|
parallel_for(int ss=0;ss<_grid->oSites();ss++){
|
||||||
|
this->_odata[ss]=r._odata[ss];
|
||||||
|
}
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
virtual ~Lattice(void) = default;
|
virtual ~Lattice(void) = default;
|
||||||
|
|
||||||
void reset(GridBase* grid) {
|
void reset(GridBase* grid) {
|
||||||
@ -281,33 +310,6 @@ public:
|
|||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class robj> strong_inline Lattice<vobj> & operator = (const Lattice<robj> & r){
|
|
||||||
this->checkerboard = r.checkerboard;
|
|
||||||
conformable(*this,r);
|
|
||||||
|
|
||||||
parallel_for(int ss=0;ss<_grid->oSites();ss++){
|
|
||||||
this->_odata[ss]=r._odata[ss];
|
|
||||||
}
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
strong_inline Lattice<vobj> & operator = (const Lattice<vobj> & r){
|
|
||||||
_grid = r._grid;
|
|
||||||
checkerboard = r.checkerboard;
|
|
||||||
_odata.resize(_grid->oSites());// essential
|
|
||||||
|
|
||||||
parallel_for(int ss=0;ss<_grid->oSites();ss++){
|
|
||||||
_odata[ss]=r._odata[ss];
|
|
||||||
}
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
strong_inline Lattice<vobj> & operator = (Lattice<vobj> && r)
|
|
||||||
{
|
|
||||||
_grid = r._grid;
|
|
||||||
checkerboard = r.checkerboard;
|
|
||||||
_odata =std::move(r._odata);
|
|
||||||
return *this;
|
|
||||||
}
|
|
||||||
|
|
||||||
// *=,+=,-= operators inherit behvour from correspond */+/- operation
|
// *=,+=,-= operators inherit behvour from correspond */+/- operation
|
||||||
template<class T> strong_inline Lattice<vobj> &operator *=(const T &r) {
|
template<class T> strong_inline Lattice<vobj> &operator *=(const T &r) {
|
||||||
|
@ -179,7 +179,7 @@ namespace Grid {
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
#define DECLARE_RELATIONAL(op,functor) \
|
#define DECLARE_RELATIONAL_EQ(op,functor) \
|
||||||
template<class vsimd,IfSimd<vsimd> = 0>\
|
template<class vsimd,IfSimd<vsimd> = 0>\
|
||||||
inline vInteger operator op (const vsimd & lhs, const vsimd & rhs)\
|
inline vInteger operator op (const vsimd & lhs, const vsimd & rhs)\
|
||||||
{\
|
{\
|
||||||
@ -198,11 +198,6 @@ namespace Grid {
|
|||||||
typedef typename vsimd::scalar_type scalar;\
|
typedef typename vsimd::scalar_type scalar;\
|
||||||
return Comparison(functor<scalar,scalar>(),lhs,rhs);\
|
return Comparison(functor<scalar,scalar>(),lhs,rhs);\
|
||||||
}\
|
}\
|
||||||
template<class vsimd,IfSimd<vsimd> = 0>\
|
|
||||||
inline vInteger operator op(const iScalar<vsimd> &lhs,const iScalar<vsimd> &rhs)\
|
|
||||||
{ \
|
|
||||||
return lhs._internal op rhs._internal; \
|
|
||||||
} \
|
|
||||||
template<class vsimd>\
|
template<class vsimd>\
|
||||||
inline vInteger operator op(const iScalar<vsimd> &lhs,const typename vsimd::scalar_type &rhs) \
|
inline vInteger operator op(const iScalar<vsimd> &lhs,const typename vsimd::scalar_type &rhs) \
|
||||||
{ \
|
{ \
|
||||||
@ -212,14 +207,21 @@ namespace Grid {
|
|||||||
inline vInteger operator op(const typename vsimd::scalar_type &lhs,const iScalar<vsimd> &rhs) \
|
inline vInteger operator op(const typename vsimd::scalar_type &lhs,const iScalar<vsimd> &rhs) \
|
||||||
{ \
|
{ \
|
||||||
return lhs op rhs._internal; \
|
return lhs op rhs._internal; \
|
||||||
}
|
} \
|
||||||
|
|
||||||
|
#define DECLARE_RELATIONAL(op,functor) \
|
||||||
|
DECLARE_RELATIONAL_EQ(op,functor) \
|
||||||
|
template<class vsimd>\
|
||||||
|
inline vInteger operator op(const iScalar<vsimd> &lhs,const iScalar<vsimd> &rhs)\
|
||||||
|
{ \
|
||||||
|
return lhs._internal op rhs._internal; \
|
||||||
|
}
|
||||||
|
|
||||||
DECLARE_RELATIONAL(<,slt);
|
DECLARE_RELATIONAL(<,slt);
|
||||||
DECLARE_RELATIONAL(<=,sle);
|
DECLARE_RELATIONAL(<=,sle);
|
||||||
DECLARE_RELATIONAL(>,sgt);
|
DECLARE_RELATIONAL(>,sgt);
|
||||||
DECLARE_RELATIONAL(>=,sge);
|
DECLARE_RELATIONAL(>=,sge);
|
||||||
DECLARE_RELATIONAL(==,seq);
|
DECLARE_RELATIONAL_EQ(==,seq);
|
||||||
DECLARE_RELATIONAL(!=,sne);
|
DECLARE_RELATIONAL(!=,sne);
|
||||||
|
|
||||||
#undef DECLARE_RELATIONAL
|
#undef DECLARE_RELATIONAL
|
||||||
|
@ -110,11 +110,11 @@ class BinaryIO {
|
|||||||
lsites = 1;
|
lsites = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
#pragma omp parallel
|
PARALLEL_REGION
|
||||||
{
|
{
|
||||||
uint32_t nersc_csum_thr = 0;
|
uint32_t nersc_csum_thr = 0;
|
||||||
|
|
||||||
#pragma omp for
|
PARALLEL_FOR_LOOP_INTERN
|
||||||
for (uint64_t local_site = 0; local_site < lsites; local_site++)
|
for (uint64_t local_site = 0; local_site < lsites; local_site++)
|
||||||
{
|
{
|
||||||
uint32_t *site_buf = (uint32_t *)&fbuf[local_site];
|
uint32_t *site_buf = (uint32_t *)&fbuf[local_site];
|
||||||
@ -124,7 +124,7 @@ class BinaryIO {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#pragma omp critical
|
PARALLEL_CRITICAL
|
||||||
{
|
{
|
||||||
nersc_csum += nersc_csum_thr;
|
nersc_csum += nersc_csum_thr;
|
||||||
}
|
}
|
||||||
@ -146,14 +146,14 @@ class BinaryIO {
|
|||||||
std::vector<int> local_start =grid->LocalStarts();
|
std::vector<int> local_start =grid->LocalStarts();
|
||||||
std::vector<int> global_vol =grid->FullDimensions();
|
std::vector<int> global_vol =grid->FullDimensions();
|
||||||
|
|
||||||
#pragma omp parallel
|
PARALLEL_REGION
|
||||||
{
|
{
|
||||||
std::vector<int> coor(nd);
|
std::vector<int> coor(nd);
|
||||||
uint32_t scidac_csuma_thr=0;
|
uint32_t scidac_csuma_thr=0;
|
||||||
uint32_t scidac_csumb_thr=0;
|
uint32_t scidac_csumb_thr=0;
|
||||||
uint32_t site_crc=0;
|
uint32_t site_crc=0;
|
||||||
|
|
||||||
#pragma omp for
|
PARALLEL_FOR_LOOP_INTERN
|
||||||
for(uint64_t local_site=0;local_site<lsites;local_site++){
|
for(uint64_t local_site=0;local_site<lsites;local_site++){
|
||||||
|
|
||||||
uint32_t * site_buf = (uint32_t *)&fbuf[local_site];
|
uint32_t * site_buf = (uint32_t *)&fbuf[local_site];
|
||||||
@ -183,7 +183,7 @@ class BinaryIO {
|
|||||||
scidac_csumb_thr ^= site_crc<<gsite31 | site_crc>>(32-gsite31);
|
scidac_csumb_thr ^= site_crc<<gsite31 | site_crc>>(32-gsite31);
|
||||||
}
|
}
|
||||||
|
|
||||||
#pragma omp critical
|
PARALLEL_CRITICAL
|
||||||
{
|
{
|
||||||
scidac_csuma^= scidac_csuma_thr;
|
scidac_csuma^= scidac_csuma_thr;
|
||||||
scidac_csumb^= scidac_csumb_thr;
|
scidac_csumb^= scidac_csumb_thr;
|
||||||
|
@ -40,7 +40,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
|
|
||||||
#define PARALLEL_FOR_LOOP _Pragma("omp parallel for schedule(static)")
|
#define PARALLEL_FOR_LOOP _Pragma("omp parallel for schedule(static)")
|
||||||
#define PARALLEL_FOR_LOOP_INTERN _Pragma("omp for schedule(static)")
|
#define PARALLEL_FOR_LOOP_INTERN _Pragma("omp for schedule(static)")
|
||||||
#define PARALLEL_NESTED_LOOP2 _Pragma("omp parallel for collapse(2)")
|
#define PARALLEL_NESTED_LOOP2 _Pragma("omp parallel for schedule(static) collapse(2)")
|
||||||
#define PARALLEL_REGION _Pragma("omp parallel")
|
#define PARALLEL_REGION _Pragma("omp parallel")
|
||||||
#define PARALLEL_CRITICAL _Pragma("omp critical")
|
#define PARALLEL_CRITICAL _Pragma("omp critical")
|
||||||
#else
|
#else
|
||||||
|
259
tests/Test_compressed_lanczos_hot_start.cc
Normal file
259
tests/Test_compressed_lanczos_hot_start.cc
Normal file
@ -0,0 +1,259 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./tests/Test_dwf_compressed_lanczos_reorg.cc
|
||||||
|
|
||||||
|
Copyright (C) 2017
|
||||||
|
|
||||||
|
Author: Leans heavily on Christoph Lehner's code
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
/*
|
||||||
|
* Reimplement the badly named "multigrid" lanczos as compressed Lanczos using the features
|
||||||
|
* in Grid that were intended to be used to support blocked Aggregates, from
|
||||||
|
*/
|
||||||
|
#include <Grid/Grid.h>
|
||||||
|
#include <Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h>
|
||||||
|
#include <Grid/algorithms/iterative/LocalCoherenceLanczos.h>
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
using namespace Grid;
|
||||||
|
using namespace Grid::QCD;
|
||||||
|
|
||||||
|
template<class Fobj,class CComplex,int nbasis>
|
||||||
|
class LocalCoherenceLanczosScidac : public LocalCoherenceLanczos<Fobj,CComplex,nbasis>
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
typedef iVector<CComplex,nbasis > CoarseSiteVector;
|
||||||
|
typedef Lattice<CoarseSiteVector> CoarseField;
|
||||||
|
typedef Lattice<CComplex> CoarseScalar; // used for inner products on fine field
|
||||||
|
typedef Lattice<Fobj> FineField;
|
||||||
|
|
||||||
|
LocalCoherenceLanczosScidac(GridBase *FineGrid,GridBase *CoarseGrid,
|
||||||
|
LinearOperatorBase<FineField> &FineOp,
|
||||||
|
int checkerboard)
|
||||||
|
// Base constructor
|
||||||
|
: LocalCoherenceLanczos<Fobj,CComplex,nbasis>(FineGrid,CoarseGrid,FineOp,checkerboard)
|
||||||
|
{};
|
||||||
|
|
||||||
|
void checkpointFine(std::string evecs_file,std::string evals_file)
|
||||||
|
{
|
||||||
|
#ifdef HAVE_LIME
|
||||||
|
assert(this->subspace.size()==nbasis);
|
||||||
|
emptyUserRecord record;
|
||||||
|
Grid::QCD::ScidacWriter WR(this->_FineGrid->IsBoss());
|
||||||
|
WR.open(evecs_file);
|
||||||
|
for(int k=0;k<nbasis;k++) {
|
||||||
|
WR.writeScidacFieldRecord(this->subspace[k],record);
|
||||||
|
}
|
||||||
|
WR.close();
|
||||||
|
|
||||||
|
XmlWriter WRx(evals_file);
|
||||||
|
write(WRx,"evals",this->evals_fine);
|
||||||
|
#else
|
||||||
|
assert(0);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
void checkpointFineRestore(std::string evecs_file,std::string evals_file)
|
||||||
|
{
|
||||||
|
#ifdef HAVE_LIME
|
||||||
|
this->evals_fine.resize(nbasis);
|
||||||
|
this->subspace.resize(nbasis,this->_FineGrid);
|
||||||
|
|
||||||
|
std::cout << GridLogIRL<< "checkpointFineRestore: Reading evals from "<<evals_file<<std::endl;
|
||||||
|
XmlReader RDx(evals_file);
|
||||||
|
read(RDx,"evals",this->evals_fine);
|
||||||
|
|
||||||
|
assert(this->evals_fine.size()==nbasis);
|
||||||
|
|
||||||
|
std::cout << GridLogIRL<< "checkpointFineRestore: Reading evecs from "<<evecs_file<<std::endl;
|
||||||
|
emptyUserRecord record;
|
||||||
|
Grid::QCD::ScidacReader RD ;
|
||||||
|
RD.open(evecs_file);
|
||||||
|
for(int k=0;k<nbasis;k++) {
|
||||||
|
this->subspace[k].checkerboard=this->_checkerboard;
|
||||||
|
RD.readScidacFieldRecord(this->subspace[k],record);
|
||||||
|
|
||||||
|
}
|
||||||
|
RD.close();
|
||||||
|
#else
|
||||||
|
assert(0);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
void checkpointCoarse(std::string evecs_file,std::string evals_file)
|
||||||
|
{
|
||||||
|
#ifdef HAVE_LIME
|
||||||
|
int n = this->evec_coarse.size();
|
||||||
|
emptyUserRecord record;
|
||||||
|
Grid::QCD::ScidacWriter WR(this->_CoarseGrid->IsBoss());
|
||||||
|
WR.open(evecs_file);
|
||||||
|
for(int k=0;k<n;k++) {
|
||||||
|
WR.writeScidacFieldRecord(this->evec_coarse[k],record);
|
||||||
|
}
|
||||||
|
WR.close();
|
||||||
|
|
||||||
|
XmlWriter WRx(evals_file);
|
||||||
|
write(WRx,"evals",this->evals_coarse);
|
||||||
|
#else
|
||||||
|
assert(0);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
void checkpointCoarseRestore(std::string evecs_file,std::string evals_file,int nvec)
|
||||||
|
{
|
||||||
|
#ifdef HAVE_LIME
|
||||||
|
std::cout << "resizing coarse vecs to " << nvec<< std::endl;
|
||||||
|
this->evals_coarse.resize(nvec);
|
||||||
|
this->evec_coarse.resize(nvec,this->_CoarseGrid);
|
||||||
|
std::cout << GridLogIRL<< "checkpointCoarseRestore: Reading evals from "<<evals_file<<std::endl;
|
||||||
|
XmlReader RDx(evals_file);
|
||||||
|
read(RDx,"evals",this->evals_coarse);
|
||||||
|
|
||||||
|
assert(this->evals_coarse.size()==nvec);
|
||||||
|
emptyUserRecord record;
|
||||||
|
std::cout << GridLogIRL<< "checkpointCoarseRestore: Reading evecs from "<<evecs_file<<std::endl;
|
||||||
|
Grid::QCD::ScidacReader RD ;
|
||||||
|
RD.open(evecs_file);
|
||||||
|
for(int k=0;k<nvec;k++) {
|
||||||
|
RD.readScidacFieldRecord(this->evec_coarse[k],record);
|
||||||
|
}
|
||||||
|
RD.close();
|
||||||
|
#else
|
||||||
|
assert(0);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
int main (int argc, char ** argv) {
|
||||||
|
|
||||||
|
Grid_init(&argc,&argv);
|
||||||
|
GridLogIRL.TimingMode(1);
|
||||||
|
|
||||||
|
LocalCoherenceLanczosParams Params;
|
||||||
|
{
|
||||||
|
Params.omega.resize(10);
|
||||||
|
Params.blockSize.resize(5);
|
||||||
|
XmlWriter writer("Params_template.xml");
|
||||||
|
write(writer,"Params",Params);
|
||||||
|
std::cout << GridLogMessage << " Written Params_template.xml" <<std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
XmlReader reader(std::string("./Params.xml"));
|
||||||
|
read(reader, "Params", Params);
|
||||||
|
}
|
||||||
|
|
||||||
|
int Ls = (int)Params.omega.size();
|
||||||
|
RealD mass = Params.mass;
|
||||||
|
RealD M5 = Params.M5;
|
||||||
|
std::vector<int> blockSize = Params.blockSize;
|
||||||
|
std::vector<int> latt({16,16,16,16});
|
||||||
|
uint64_t vol = Ls*latt[0]*latt[1]*latt[2]*latt[3];
|
||||||
|
double mat_flop= 2.0*1320.0*vol;
|
||||||
|
// Grids
|
||||||
|
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(latt,
|
||||||
|
GridDefaultSimd(Nd,vComplex::Nsimd()),
|
||||||
|
GridDefaultMpi());
|
||||||
|
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
||||||
|
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
|
||||||
|
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
|
||||||
|
|
||||||
|
std::vector<int> fineLatt = latt;
|
||||||
|
int dims=fineLatt.size();
|
||||||
|
assert(blockSize.size()==dims+1);
|
||||||
|
std::vector<int> coarseLatt(dims);
|
||||||
|
std::vector<int> coarseLatt5d ;
|
||||||
|
|
||||||
|
for (int d=0;d<coarseLatt.size();d++){
|
||||||
|
coarseLatt[d] = fineLatt[d]/blockSize[d]; assert(coarseLatt[d]*blockSize[d]==fineLatt[d]);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout << GridLogMessage<< " 5d coarse lattice is ";
|
||||||
|
for (int i=0;i<coarseLatt.size();i++){
|
||||||
|
std::cout << coarseLatt[i]<<"x";
|
||||||
|
}
|
||||||
|
int cLs = Ls/blockSize[dims]; assert(cLs*blockSize[dims]==Ls);
|
||||||
|
std::cout << cLs<<std::endl;
|
||||||
|
|
||||||
|
GridCartesian * CoarseGrid4 = SpaceTimeGrid::makeFourDimGrid(coarseLatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
|
||||||
|
GridRedBlackCartesian * CoarseGrid4rb = SpaceTimeGrid::makeFourDimRedBlackGrid(CoarseGrid4);
|
||||||
|
GridCartesian * CoarseGrid5 = SpaceTimeGrid::makeFiveDimGrid(cLs,CoarseGrid4);
|
||||||
|
|
||||||
|
// Gauge field
|
||||||
|
std::vector<int> seeds4({1,2,3,4});
|
||||||
|
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
|
||||||
|
LatticeGaugeField Umu(UGrid);
|
||||||
|
SU3::HotConfiguration(RNG4,Umu);
|
||||||
|
// FieldMetaData header;
|
||||||
|
// NerscIO::readConfiguration(Umu,header,Params.config);
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << "Lattice dimensions: " << latt << " Ls: " << Ls << std::endl;
|
||||||
|
|
||||||
|
// ZMobius EO Operator
|
||||||
|
ZMobiusFermionR Ddwf(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mass, M5, Params.omega,1.,0.);
|
||||||
|
SchurDiagTwoOperator<ZMobiusFermionR,LatticeFermion> HermOp(Ddwf);
|
||||||
|
|
||||||
|
// Eigenvector storage
|
||||||
|
LanczosParams fine =Params.FineParams;
|
||||||
|
LanczosParams coarse=Params.CoarseParams;
|
||||||
|
|
||||||
|
const int Ns1 = fine.Nstop; const int Ns2 = coarse.Nstop;
|
||||||
|
const int Nk1 = fine.Nk; const int Nk2 = coarse.Nk;
|
||||||
|
const int Nm1 = fine.Nm; const int Nm2 = coarse.Nm;
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << "Keep " << fine.Nstop << " fine vectors" << std::endl;
|
||||||
|
std::cout << GridLogMessage << "Keep " << coarse.Nstop << " coarse vectors" << std::endl;
|
||||||
|
assert(Nm2 >= Nm1);
|
||||||
|
|
||||||
|
const int nbasis= 60;
|
||||||
|
assert(nbasis==Ns1);
|
||||||
|
LocalCoherenceLanczosScidac<vSpinColourVector,vTComplex,nbasis> _LocalCoherenceLanczos(FrbGrid,CoarseGrid5,HermOp,Odd);
|
||||||
|
std::cout << GridLogMessage << "Constructed LocalCoherenceLanczos" << std::endl;
|
||||||
|
|
||||||
|
assert( (Params.doFine)||(Params.doFineRead));
|
||||||
|
|
||||||
|
if ( Params.doFine ) {
|
||||||
|
std::cout << GridLogMessage << "Performing fine grid IRL Nstop "<< Ns1 << " Nk "<<Nk1<<" Nm "<<Nm1<< std::endl;
|
||||||
|
double t0=-usecond();
|
||||||
|
_LocalCoherenceLanczos.calcFine(fine.Cheby,
|
||||||
|
fine.Nstop,fine.Nk,fine.Nm,
|
||||||
|
fine.resid,fine.MaxIt,
|
||||||
|
fine.betastp,fine.MinRes);
|
||||||
|
t0+=usecond();
|
||||||
|
|
||||||
|
double t1=-usecond();
|
||||||
|
if ( Params.saveEvecs ) {
|
||||||
|
std::cout << GridLogIRL<<"Checkpointing Fine evecs"<<std::endl;
|
||||||
|
_LocalCoherenceLanczos.checkpointFine(std::string("evecs.scidac"),std::string("evals.xml"));
|
||||||
|
}
|
||||||
|
t1+=usecond();
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << "Computation time is " << (t0)/1.0e6 <<" seconds"<<std::endl;
|
||||||
|
if ( Params.saveEvecs ) std::cout << GridLogMessage << "I/O time is " << (t1)/1.0e6 <<" seconds"<<std::endl;
|
||||||
|
std::cout << GridLogMessage << "Time to solution is " << (t1+t0)/1.0e6 <<" seconds"<<std::endl;
|
||||||
|
std::cout << GridLogMessage << "Done"<<std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
Grid_finalize();
|
||||||
|
}
|
||||||
|
|
253
tests/lanczos/Test_compressed_lanczos.cc
Normal file
253
tests/lanczos/Test_compressed_lanczos.cc
Normal file
@ -0,0 +1,253 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./tests/Test_dwf_compressed_lanczos_reorg.cc
|
||||||
|
|
||||||
|
Copyright (C) 2017
|
||||||
|
|
||||||
|
Author: Leans heavily on Christoph Lehner's code
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
/*
|
||||||
|
* Reimplement the badly named "multigrid" lanczos as compressed Lanczos using the features
|
||||||
|
* in Grid that were intended to be used to support blocked Aggregates, from
|
||||||
|
*/
|
||||||
|
#include <Grid/Grid.h>
|
||||||
|
#include <Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h>
|
||||||
|
#include <Grid/algorithms/iterative/LocalCoherenceLanczos.h>
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
using namespace Grid;
|
||||||
|
using namespace Grid::QCD;
|
||||||
|
|
||||||
|
template<class Fobj,class CComplex,int nbasis>
|
||||||
|
class LocalCoherenceLanczosScidac : public LocalCoherenceLanczos<Fobj,CComplex,nbasis>
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
typedef iVector<CComplex,nbasis > CoarseSiteVector;
|
||||||
|
typedef Lattice<CoarseSiteVector> CoarseField;
|
||||||
|
typedef Lattice<CComplex> CoarseScalar; // used for inner products on fine field
|
||||||
|
typedef Lattice<Fobj> FineField;
|
||||||
|
|
||||||
|
LocalCoherenceLanczosScidac(GridBase *FineGrid,GridBase *CoarseGrid,
|
||||||
|
LinearOperatorBase<FineField> &FineOp,
|
||||||
|
int checkerboard)
|
||||||
|
// Base constructor
|
||||||
|
: LocalCoherenceLanczos<Fobj,CComplex,nbasis>(FineGrid,CoarseGrid,FineOp,checkerboard)
|
||||||
|
{};
|
||||||
|
|
||||||
|
void checkpointFine(std::string evecs_file,std::string evals_file)
|
||||||
|
{
|
||||||
|
assert(this->subspace.size()==nbasis);
|
||||||
|
emptyUserRecord record;
|
||||||
|
Grid::QCD::ScidacWriter WR(this->_FineGrid->IsBoss());
|
||||||
|
WR.open(evecs_file);
|
||||||
|
for(int k=0;k<nbasis;k++) {
|
||||||
|
WR.writeScidacFieldRecord(this->subspace[k],record);
|
||||||
|
}
|
||||||
|
WR.close();
|
||||||
|
|
||||||
|
XmlWriter WRx(evals_file);
|
||||||
|
write(WRx,"evals",this->evals_fine);
|
||||||
|
}
|
||||||
|
|
||||||
|
void checkpointFineRestore(std::string evecs_file,std::string evals_file)
|
||||||
|
{
|
||||||
|
this->evals_fine.resize(nbasis);
|
||||||
|
this->subspace.resize(nbasis,this->_FineGrid);
|
||||||
|
|
||||||
|
std::cout << GridLogIRL<< "checkpointFineRestore: Reading evals from "<<evals_file<<std::endl;
|
||||||
|
XmlReader RDx(evals_file);
|
||||||
|
read(RDx,"evals",this->evals_fine);
|
||||||
|
|
||||||
|
assert(this->evals_fine.size()==nbasis);
|
||||||
|
|
||||||
|
std::cout << GridLogIRL<< "checkpointFineRestore: Reading evecs from "<<evecs_file<<std::endl;
|
||||||
|
emptyUserRecord record;
|
||||||
|
Grid::QCD::ScidacReader RD ;
|
||||||
|
RD.open(evecs_file);
|
||||||
|
for(int k=0;k<nbasis;k++) {
|
||||||
|
this->subspace[k].checkerboard=this->_checkerboard;
|
||||||
|
RD.readScidacFieldRecord(this->subspace[k],record);
|
||||||
|
|
||||||
|
}
|
||||||
|
RD.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
void checkpointCoarse(std::string evecs_file,std::string evals_file)
|
||||||
|
{
|
||||||
|
int n = this->evec_coarse.size();
|
||||||
|
emptyUserRecord record;
|
||||||
|
Grid::QCD::ScidacWriter WR(this->_CoarseGrid->IsBoss());
|
||||||
|
WR.open(evecs_file);
|
||||||
|
for(int k=0;k<n;k++) {
|
||||||
|
WR.writeScidacFieldRecord(this->evec_coarse[k],record);
|
||||||
|
}
|
||||||
|
WR.close();
|
||||||
|
|
||||||
|
XmlWriter WRx(evals_file);
|
||||||
|
write(WRx,"evals",this->evals_coarse);
|
||||||
|
}
|
||||||
|
|
||||||
|
void checkpointCoarseRestore(std::string evecs_file,std::string evals_file,int nvec)
|
||||||
|
{
|
||||||
|
std::cout << "resizing coarse vecs to " << nvec<< std::endl;
|
||||||
|
this->evals_coarse.resize(nvec);
|
||||||
|
this->evec_coarse.resize(nvec,this->_CoarseGrid);
|
||||||
|
std::cout << GridLogIRL<< "checkpointCoarseRestore: Reading evals from "<<evals_file<<std::endl;
|
||||||
|
XmlReader RDx(evals_file);
|
||||||
|
read(RDx,"evals",this->evals_coarse);
|
||||||
|
|
||||||
|
assert(this->evals_coarse.size()==nvec);
|
||||||
|
emptyUserRecord record;
|
||||||
|
std::cout << GridLogIRL<< "checkpointCoarseRestore: Reading evecs from "<<evecs_file<<std::endl;
|
||||||
|
Grid::QCD::ScidacReader RD ;
|
||||||
|
RD.open(evecs_file);
|
||||||
|
for(int k=0;k<nvec;k++) {
|
||||||
|
RD.readScidacFieldRecord(this->evec_coarse[k],record);
|
||||||
|
}
|
||||||
|
RD.close();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
int main (int argc, char ** argv) {
|
||||||
|
|
||||||
|
Grid_init(&argc,&argv);
|
||||||
|
GridLogIRL.TimingMode(1);
|
||||||
|
|
||||||
|
LocalCoherenceLanczosParams Params;
|
||||||
|
{
|
||||||
|
Params.omega.resize(10);
|
||||||
|
Params.blockSize.resize(5);
|
||||||
|
XmlWriter writer("Params_template.xml");
|
||||||
|
write(writer,"Params",Params);
|
||||||
|
std::cout << GridLogMessage << " Written Params_template.xml" <<std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
XmlReader reader(std::string("./Params.xml"));
|
||||||
|
read(reader, "Params", Params);
|
||||||
|
}
|
||||||
|
|
||||||
|
int Ls = (int)Params.omega.size();
|
||||||
|
RealD mass = Params.mass;
|
||||||
|
RealD M5 = Params.M5;
|
||||||
|
std::vector<int> blockSize = Params.blockSize;
|
||||||
|
|
||||||
|
// Grids
|
||||||
|
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(),
|
||||||
|
GridDefaultSimd(Nd,vComplex::Nsimd()),
|
||||||
|
GridDefaultMpi());
|
||||||
|
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
||||||
|
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
|
||||||
|
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
|
||||||
|
|
||||||
|
std::vector<int> fineLatt = GridDefaultLatt();
|
||||||
|
int dims=fineLatt.size();
|
||||||
|
assert(blockSize.size()==dims+1);
|
||||||
|
std::vector<int> coarseLatt(dims);
|
||||||
|
std::vector<int> coarseLatt5d ;
|
||||||
|
|
||||||
|
for (int d=0;d<coarseLatt.size();d++){
|
||||||
|
coarseLatt[d] = fineLatt[d]/blockSize[d]; assert(coarseLatt[d]*blockSize[d]==fineLatt[d]);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout << GridLogMessage<< " 5d coarse lattice is ";
|
||||||
|
for (int i=0;i<coarseLatt.size();i++){
|
||||||
|
std::cout << coarseLatt[i]<<"x";
|
||||||
|
}
|
||||||
|
int cLs = Ls/blockSize[dims]; assert(cLs*blockSize[dims]==Ls);
|
||||||
|
std::cout << cLs<<std::endl;
|
||||||
|
|
||||||
|
GridCartesian * CoarseGrid4 = SpaceTimeGrid::makeFourDimGrid(coarseLatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
|
||||||
|
GridRedBlackCartesian * CoarseGrid4rb = SpaceTimeGrid::makeFourDimRedBlackGrid(CoarseGrid4);
|
||||||
|
GridCartesian * CoarseGrid5 = SpaceTimeGrid::makeFiveDimGrid(cLs,CoarseGrid4);
|
||||||
|
|
||||||
|
// Gauge field
|
||||||
|
LatticeGaugeField Umu(UGrid);
|
||||||
|
FieldMetaData header;
|
||||||
|
NerscIO::readConfiguration(Umu,header,Params.config);
|
||||||
|
std::cout << GridLogMessage << "Lattice dimensions: " << GridDefaultLatt() << " Ls: " << Ls << std::endl;
|
||||||
|
|
||||||
|
// ZMobius EO Operator
|
||||||
|
ZMobiusFermionR Ddwf(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mass, M5, Params.omega,1.,0.);
|
||||||
|
SchurDiagTwoOperator<ZMobiusFermionR,LatticeFermion> HermOp(Ddwf);
|
||||||
|
|
||||||
|
// Eigenvector storage
|
||||||
|
LanczosParams fine =Params.FineParams;
|
||||||
|
LanczosParams coarse=Params.CoarseParams;
|
||||||
|
|
||||||
|
const int Ns1 = fine.Nstop; const int Ns2 = coarse.Nstop;
|
||||||
|
const int Nk1 = fine.Nk; const int Nk2 = coarse.Nk;
|
||||||
|
const int Nm1 = fine.Nm; const int Nm2 = coarse.Nm;
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << "Keep " << fine.Nstop << " fine vectors" << std::endl;
|
||||||
|
std::cout << GridLogMessage << "Keep " << coarse.Nstop << " coarse vectors" << std::endl;
|
||||||
|
assert(Nm2 >= Nm1);
|
||||||
|
|
||||||
|
const int nbasis= 60;
|
||||||
|
assert(nbasis==Ns1);
|
||||||
|
LocalCoherenceLanczosScidac<vSpinColourVector,vTComplex,nbasis> _LocalCoherenceLanczos(FrbGrid,CoarseGrid5,HermOp,Odd);
|
||||||
|
std::cout << GridLogMessage << "Constructed LocalCoherenceLanczos" << std::endl;
|
||||||
|
|
||||||
|
assert( (Params.doFine)||(Params.doFineRead));
|
||||||
|
|
||||||
|
if ( Params.doFine ) {
|
||||||
|
std::cout << GridLogMessage << "Performing fine grid IRL Nstop "<< Ns1 << " Nk "<<Nk1<<" Nm "<<Nm1<< std::endl;
|
||||||
|
_LocalCoherenceLanczos.calcFine(fine.Cheby,
|
||||||
|
fine.Nstop,fine.Nk,fine.Nm,
|
||||||
|
fine.resid,fine.MaxIt,
|
||||||
|
fine.betastp,fine.MinRes);
|
||||||
|
|
||||||
|
std::cout << GridLogIRL<<"Checkpointing Fine evecs"<<std::endl;
|
||||||
|
_LocalCoherenceLanczos.checkpointFine(std::string("evecs.scidac"),std::string("evals.xml"));
|
||||||
|
_LocalCoherenceLanczos.testFine(fine.resid*100.0); // Coarse check
|
||||||
|
std::cout << GridLogIRL<<"Orthogonalising"<<std::endl;
|
||||||
|
_LocalCoherenceLanczos.Orthogonalise();
|
||||||
|
std::cout << GridLogIRL<<"Orthogonaled"<<std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( Params.doFineRead ) {
|
||||||
|
_LocalCoherenceLanczos.checkpointFineRestore(std::string("evecs.scidac"),std::string("evals.xml"));
|
||||||
|
_LocalCoherenceLanczos.testFine(fine.resid*100.0); // Coarse check
|
||||||
|
_LocalCoherenceLanczos.Orthogonalise();
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( Params.doCoarse ) {
|
||||||
|
std::cout << GridLogMessage << "Performing coarse grid IRL Nstop "<< Ns2<< " Nk "<<Nk2<<" Nm "<<Nm2<< std::endl;
|
||||||
|
_LocalCoherenceLanczos.calcCoarse(coarse.Cheby,Params.Smoother,Params.coarse_relax_tol,
|
||||||
|
coarse.Nstop, coarse.Nk,coarse.Nm,
|
||||||
|
coarse.resid, coarse.MaxIt,
|
||||||
|
coarse.betastp,coarse.MinRes);
|
||||||
|
|
||||||
|
|
||||||
|
std::cout << GridLogIRL<<"Checkpointing coarse evecs"<<std::endl;
|
||||||
|
_LocalCoherenceLanczos.checkpointCoarse(std::string("evecs.coarse.scidac"),std::string("evals.coarse.xml"));
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( Params.doCoarseRead ) {
|
||||||
|
// Verify we can reread ???
|
||||||
|
_LocalCoherenceLanczos.checkpointCoarseRestore(std::string("evecs.coarse.scidac"),std::string("evals.coarse.xml"),coarse.Nstop);
|
||||||
|
_LocalCoherenceLanczos.testCoarse(coarse.resid*100.0,Params.Smoother,Params.coarse_relax_tol); // Coarse check
|
||||||
|
}
|
||||||
|
Grid_finalize();
|
||||||
|
}
|
||||||
|
|
Loading…
x
Reference in New Issue
Block a user