mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-11-04 14:04:32 +00:00 
			
		
		
		
	Merge branch 'develop' into feature/hadrons
This commit is contained in:
		
							
								
								
									
										2
									
								
								VERSION
									
									
									
									
									
								
							
							
						
						
									
										2
									
								
								VERSION
									
									
									
									
									
								
							@@ -1,4 +1,4 @@
 | 
			
		||||
Version : 0.7.0
 | 
			
		||||
Version : 0.8.0
 | 
			
		||||
 | 
			
		||||
- Clang 3.5 and above, ICPC v16 and above, GCC 6.3 and above recommended
 | 
			
		||||
- MPI and MPI3 comms optimisations for KNL and OPA finished
 | 
			
		||||
 
 | 
			
		||||
@@ -158,8 +158,10 @@ public:
 | 
			
		||||
 | 
			
		||||
	  dbytes=0;
 | 
			
		||||
	  ncomm=0;
 | 
			
		||||
 | 
			
		||||
	  parallel_for(int dir=0;dir<8;dir++){
 | 
			
		||||
#ifdef GRID_OMP
 | 
			
		||||
#pragma omp parallel for num_threads(Grid::CartesianCommunicator::nCommThreads)
 | 
			
		||||
#endif
 | 
			
		||||
	  for(int dir=0;dir<8;dir++){
 | 
			
		||||
 | 
			
		||||
	    double tbytes;
 | 
			
		||||
	    int mu =dir % 4;
 | 
			
		||||
@@ -175,9 +177,14 @@ public:
 | 
			
		||||
		int comm_proc = mpi_layout[mu]-1;
 | 
			
		||||
		Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
 | 
			
		||||
	      }
 | 
			
		||||
#ifdef GRID_OMP
 | 
			
		||||
	int tid = omp_get_thread_num(); 
 | 
			
		||||
#else 
 | 
			
		||||
        int tid = dir;
 | 
			
		||||
#endif
 | 
			
		||||
	      tbytes= Grid.StencilSendToRecvFrom((void *)&xbuf[dir][0], xmit_to_rank,
 | 
			
		||||
						 (void *)&rbuf[dir][0], recv_from_rank,
 | 
			
		||||
						 bytes,dir);
 | 
			
		||||
						 bytes,tid);
 | 
			
		||||
	  
 | 
			
		||||
#ifdef GRID_OMP
 | 
			
		||||
#pragma omp atomic
 | 
			
		||||
 
 | 
			
		||||
@@ -169,7 +169,11 @@ int main (int argc, char ** argv)
 | 
			
		||||
  for(int lat=4;lat<=maxlat;lat+=4){
 | 
			
		||||
    for(int Ls=8;Ls<=8;Ls*=2){
 | 
			
		||||
 | 
			
		||||
      std::vector<int> latt_size  ({lat,lat,lat,lat});
 | 
			
		||||
      std::vector<int> latt_size  ({lat*mpi_layout[0],
 | 
			
		||||
                                    lat*mpi_layout[1],
 | 
			
		||||
                                    lat*mpi_layout[2],
 | 
			
		||||
                                    lat*mpi_layout[3]});
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
      GridCartesian     Grid(latt_size,simd_layout,mpi_layout);
 | 
			
		||||
      RealD Nrank = Grid._Nprocessors;
 | 
			
		||||
@@ -485,7 +489,8 @@ int main (int argc, char ** argv)
 | 
			
		||||
	dbytes=0;
 | 
			
		||||
	ncomm=0;
 | 
			
		||||
 | 
			
		||||
	parallel_for(int dir=0;dir<8;dir++){
 | 
			
		||||
#pragma omp parallel for num_threads(Grid::CartesianCommunicator::nCommThreads)
 | 
			
		||||
	for(int dir=0;dir<8;dir++){
 | 
			
		||||
 | 
			
		||||
	  double tbytes;
 | 
			
		||||
	  int mu =dir % 4;
 | 
			
		||||
@@ -502,9 +507,9 @@ int main (int argc, char ** argv)
 | 
			
		||||
	      int comm_proc = mpi_layout[mu]-1;
 | 
			
		||||
	      Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank);
 | 
			
		||||
	    }
 | 
			
		||||
 | 
			
		||||
            int tid = omp_get_thread_num();
 | 
			
		||||
	    tbytes= Grid.StencilSendToRecvFrom((void *)&xbuf[dir][0], xmit_to_rank,
 | 
			
		||||
					       (void *)&rbuf[dir][0], recv_from_rank, bytes,dir);
 | 
			
		||||
					       (void *)&rbuf[dir][0], recv_from_rank, bytes,tid);
 | 
			
		||||
 | 
			
		||||
#pragma omp atomic
 | 
			
		||||
	    dbytes+=tbytes;
 | 
			
		||||
 
 | 
			
		||||
@@ -55,7 +55,7 @@ int main (int argc, char ** argv)
 | 
			
		||||
  std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
 | 
			
		||||
  uint64_t lmax=96;
 | 
			
		||||
  uint64_t lmax=64;
 | 
			
		||||
#define NLOOP (10*lmax*lmax*lmax*lmax/vol)
 | 
			
		||||
  for(int lat=8;lat<=lmax;lat+=8){
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -36,7 +36,7 @@ int main (int argc, char ** argv)
 | 
			
		||||
{
 | 
			
		||||
  Grid_init(&argc,&argv);
 | 
			
		||||
#define LMAX (32)
 | 
			
		||||
#define LMIN (4)
 | 
			
		||||
#define LMIN (16)
 | 
			
		||||
#define LINC (4)
 | 
			
		||||
 | 
			
		||||
  int64_t Nloop=2000;
 | 
			
		||||
@@ -46,14 +46,14 @@ int main (int argc, char ** argv)
 | 
			
		||||
 | 
			
		||||
  int64_t threads = GridThread::GetThreads();
 | 
			
		||||
  std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
 | 
			
		||||
#if 1
 | 
			
		||||
 | 
			
		||||
  std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "= Benchmarking SU3xSU3  x= x*y"<<std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
 | 
			
		||||
 | 
			
		||||
  for(int lat=4;lat<=LMAX;lat+=LINC){
 | 
			
		||||
  for(int lat=LMIN;lat<=LMAX;lat+=LINC){
 | 
			
		||||
 | 
			
		||||
      std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
 | 
			
		||||
      int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
 | 
			
		||||
@@ -85,7 +85,7 @@ int main (int argc, char ** argv)
 | 
			
		||||
  std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
 | 
			
		||||
 | 
			
		||||
  for(int lat=2;lat<=LMAX;lat+=LINC){
 | 
			
		||||
  for(int lat=LMIN;lat<=LMAX;lat+=LINC){
 | 
			
		||||
 | 
			
		||||
      std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
 | 
			
		||||
      int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
 | 
			
		||||
@@ -116,7 +116,7 @@ int main (int argc, char ** argv)
 | 
			
		||||
  std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
 | 
			
		||||
 | 
			
		||||
  for(int lat=2;lat<=LMAX;lat+=LINC){
 | 
			
		||||
  for(int lat=LMIN;lat<=LMAX;lat+=LINC){
 | 
			
		||||
 | 
			
		||||
      std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
 | 
			
		||||
      int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
 | 
			
		||||
@@ -147,7 +147,7 @@ int main (int argc, char ** argv)
 | 
			
		||||
  std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
 | 
			
		||||
 | 
			
		||||
  for(int lat=2;lat<=LMAX;lat+=LINC){
 | 
			
		||||
  for(int lat=LMIN;lat<=LMAX;lat+=LINC){
 | 
			
		||||
    
 | 
			
		||||
    std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
 | 
			
		||||
    int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
 | 
			
		||||
@@ -172,8 +172,6 @@ int main (int argc, char ** argv)
 | 
			
		||||
    
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
  std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "= Benchmarking SU3xSU3  CovShiftForward(z,x,y)"<<std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
 | 
			
		||||
@@ -206,6 +204,50 @@ int main (int argc, char ** argv)
 | 
			
		||||
	    std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<"   \t\t"<<bytes/time<<"\t\t" << flops/time<<std::endl;
 | 
			
		||||
      }
 | 
			
		||||
  }
 | 
			
		||||
#if 1
 | 
			
		||||
  std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "= Benchmarking SU3xSU3  z= x * Cshift(y)"<<std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "===================================================================================================="<<std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s\t\t GFlop/s"<<std::endl;
 | 
			
		||||
  std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl;
 | 
			
		||||
 | 
			
		||||
  for(int lat=LMIN;lat<=LMAX;lat+=LINC){
 | 
			
		||||
      std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]});
 | 
			
		||||
      int64_t vol = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
 | 
			
		||||
 | 
			
		||||
      GridCartesian     Grid(latt_size,simd_layout,mpi_layout);
 | 
			
		||||
      GridParallelRNG          pRNG(&Grid);      pRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
 | 
			
		||||
 | 
			
		||||
      LatticeColourMatrix z(&Grid); random(pRNG,z);
 | 
			
		||||
      LatticeColourMatrix x(&Grid); random(pRNG,x);
 | 
			
		||||
      LatticeColourMatrix y(&Grid); random(pRNG,y);
 | 
			
		||||
      LatticeColourMatrix tmp(&Grid);
 | 
			
		||||
 | 
			
		||||
      for(int mu=0;mu<4;mu++){
 | 
			
		||||
	double tshift=0;
 | 
			
		||||
	double tmult =0;
 | 
			
		||||
 | 
			
		||||
	double start=usecond();
 | 
			
		||||
	for(int64_t i=0;i<Nloop;i++){
 | 
			
		||||
	  tshift-=usecond();
 | 
			
		||||
	  tmp = Cshift(y,mu,-1);
 | 
			
		||||
	  tshift+=usecond();
 | 
			
		||||
	  tmult-=usecond();
 | 
			
		||||
	  z   = x*tmp;
 | 
			
		||||
	  tmult+=usecond();
 | 
			
		||||
	}
 | 
			
		||||
	double stop=usecond();
 | 
			
		||||
	double time = (stop-start)/Nloop;
 | 
			
		||||
	tshift = tshift/Nloop;
 | 
			
		||||
	tmult  = tmult /Nloop;
 | 
			
		||||
	
 | 
			
		||||
	double bytes=3*vol*Nc*Nc*sizeof(Complex);
 | 
			
		||||
	double flops=Nc*Nc*(6+8+8)*vol;
 | 
			
		||||
	std::cout<<GridLogMessage<<std::setprecision(3) << "total us "<<time<<" shift "<<tshift <<" mult "<<tmult<<std::endl;
 | 
			
		||||
	time = time * 1000; // convert to NS for GB/s
 | 
			
		||||
	std::cout<<GridLogMessage<<std::setprecision(3) << lat<<"\t\t"<<bytes<<"   \t\t"<<bytes/time<<"\t\t" << flops/time<<std::endl;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
#endif
 | 
			
		||||
  Grid_finalize();
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -479,15 +479,13 @@ until convergence
 | 
			
		||||
	Field B(grid); B.checkerboard = evec[0].checkerboard;
 | 
			
		||||
 | 
			
		||||
	//  power of two search pattern;  not every evalue in eval2 is assessed.
 | 
			
		||||
	int allconv =1;
 | 
			
		||||
	for(int jj = 1; jj<=Nstop; jj*=2){
 | 
			
		||||
	  int j = Nstop-jj;
 | 
			
		||||
	  RealD e = eval2_copy[j]; // Discard the evalue
 | 
			
		||||
	  basisRotateJ(B,evec,Qt,j,0,Nk,Nm);	    
 | 
			
		||||
	  if( _Tester.TestConvergence(j,eresid,B,e,evalMaxApprox) ) {
 | 
			
		||||
	    if ( j > Nconv ) {
 | 
			
		||||
	      Nconv=j+1;
 | 
			
		||||
	      jj=Nstop; // Terminate the scan
 | 
			
		||||
	    }
 | 
			
		||||
	  if( !_Tester.TestConvergence(j,eresid,B,e,evalMaxApprox) ) {
 | 
			
		||||
	    allconv=0;
 | 
			
		||||
	  }
 | 
			
		||||
	}
 | 
			
		||||
	// Do evec[0] for good measure
 | 
			
		||||
@@ -495,8 +493,10 @@ until convergence
 | 
			
		||||
	  int j=0;
 | 
			
		||||
	  RealD e = eval2_copy[0]; 
 | 
			
		||||
	  basisRotateJ(B,evec,Qt,j,0,Nk,Nm);	    
 | 
			
		||||
	  _Tester.TestConvergence(j,eresid,B,e,evalMaxApprox);
 | 
			
		||||
	  if( !_Tester.TestConvergence(j,eresid,B,e,evalMaxApprox) ) allconv=0;
 | 
			
		||||
	}
 | 
			
		||||
	if ( allconv ) Nconv = Nstop;
 | 
			
		||||
 | 
			
		||||
	// test if we converged, if so, terminate
 | 
			
		||||
	std::cout<<GridLogIRL<<" #modes converged: >= "<<Nconv<<"/"<<Nstop<<std::endl;
 | 
			
		||||
	//	if( Nconv>=Nstop || beta_k < betastp){
 | 
			
		||||
 
 | 
			
		||||
@@ -48,6 +48,7 @@ struct LanczosParams : Serializable {
 | 
			
		||||
struct LocalCoherenceLanczosParams : Serializable {
 | 
			
		||||
 public:
 | 
			
		||||
  GRID_SERIALIZABLE_CLASS_MEMBERS(LocalCoherenceLanczosParams,
 | 
			
		||||
				  bool, saveEvecs,
 | 
			
		||||
				  bool, doFine,
 | 
			
		||||
				  bool, doFineRead,
 | 
			
		||||
				  bool, doCoarse,
 | 
			
		||||
 
 | 
			
		||||
@@ -277,7 +277,9 @@ public:
 | 
			
		||||
    uint8_t *cp = (uint8_t *)ptr;
 | 
			
		||||
    if ( ptr ) { 
 | 
			
		||||
    // One touch per 4k page, static OMP loop to catch same loop order
 | 
			
		||||
#ifdef GRID_OMP
 | 
			
		||||
#pragma omp parallel for schedule(static)
 | 
			
		||||
#endif
 | 
			
		||||
      for(size_type n=0;n<bytes;n+=4096){
 | 
			
		||||
	cp[n]=0;
 | 
			
		||||
      }
 | 
			
		||||
 
 | 
			
		||||
@@ -45,31 +45,33 @@ Gather_plane_simple (const Lattice<vobj> &rhs,commVector<vobj> &buffer,int dimen
 | 
			
		||||
  int so=plane*rhs._grid->_ostride[dimension]; // base offset for start of plane 
 | 
			
		||||
  int e1=rhs._grid->_slice_nblock[dimension];
 | 
			
		||||
  int e2=rhs._grid->_slice_block[dimension];
 | 
			
		||||
  int ent = 0;
 | 
			
		||||
 | 
			
		||||
  static std::vector<std::pair<int,int> > table; table.resize(e1*e2);
 | 
			
		||||
 | 
			
		||||
  int stride=rhs._grid->_slice_stride[dimension];
 | 
			
		||||
  if ( cbmask == 0x3 ) { 
 | 
			
		||||
    parallel_for_nest2(int n=0;n<e1;n++){
 | 
			
		||||
    for(int n=0;n<e1;n++){
 | 
			
		||||
      for(int b=0;b<e2;b++){
 | 
			
		||||
	int o  = n*stride;
 | 
			
		||||
	int bo = n*e2;
 | 
			
		||||
	buffer[off+bo+b]=rhs._odata[so+o+b];
 | 
			
		||||
	table[ent++] = std::pair<int,int>(off+bo+b,so+o+b);
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  } else { 
 | 
			
		||||
     int bo=0;
 | 
			
		||||
     std::vector<std::pair<int,int> > table;
 | 
			
		||||
     for(int n=0;n<e1;n++){
 | 
			
		||||
       for(int b=0;b<e2;b++){
 | 
			
		||||
	 int o  = n*stride;
 | 
			
		||||
	 int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);
 | 
			
		||||
	 if ( ocb &cbmask ) {
 | 
			
		||||
	   table.push_back(std::pair<int,int> (bo++,o+b));
 | 
			
		||||
	   table[ent++]=std::pair<int,int> (off+bo++,so+o+b);
 | 
			
		||||
	 }
 | 
			
		||||
       }
 | 
			
		||||
     }
 | 
			
		||||
     parallel_for(int i=0;i<table.size();i++){
 | 
			
		||||
       buffer[off+table[i].first]=rhs._odata[so+table[i].second];
 | 
			
		||||
  }
 | 
			
		||||
  parallel_for(int i=0;i<ent;i++){
 | 
			
		||||
    buffer[table[i].first]=rhs._odata[table[i].second];
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@@ -141,31 +143,35 @@ template<class vobj> void Scatter_plane_simple (Lattice<vobj> &rhs,commVector<vo
 | 
			
		||||
  int e2=rhs._grid->_slice_block[dimension];
 | 
			
		||||
  int stride=rhs._grid->_slice_stride[dimension];
 | 
			
		||||
 | 
			
		||||
  static std::vector<std::pair<int,int> > table; table.resize(e1*e2);
 | 
			
		||||
  int ent    =0;
 | 
			
		||||
 | 
			
		||||
  if ( cbmask ==0x3 ) {
 | 
			
		||||
    parallel_for_nest2(int n=0;n<e1;n++){
 | 
			
		||||
 | 
			
		||||
    for(int n=0;n<e1;n++){
 | 
			
		||||
      for(int b=0;b<e2;b++){
 | 
			
		||||
	int o   =n*rhs._grid->_slice_stride[dimension];
 | 
			
		||||
	int bo  =n*rhs._grid->_slice_block[dimension];
 | 
			
		||||
	rhs._odata[so+o+b]=buffer[bo+b];
 | 
			
		||||
	table[ent++] = std::pair<int,int>(so+o+b,bo+b);
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
  } else { 
 | 
			
		||||
    std::vector<std::pair<int,int> > table;
 | 
			
		||||
    int bo=0;
 | 
			
		||||
    for(int n=0;n<e1;n++){
 | 
			
		||||
      for(int b=0;b<e2;b++){
 | 
			
		||||
	int o   =n*rhs._grid->_slice_stride[dimension];
 | 
			
		||||
	int ocb=1<<rhs._grid->CheckerBoardFromOindex(o+b);// Could easily be a table lookup
 | 
			
		||||
	if ( ocb & cbmask ) {
 | 
			
		||||
	  table.push_back(std::pair<int,int> (so+o+b,bo++));
 | 
			
		||||
	  table[ent++]=std::pair<int,int> (so+o+b,bo++);
 | 
			
		||||
	}
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    parallel_for(int i=0;i<table.size();i++){
 | 
			
		||||
       //       std::cout << "Rcv"<< table[i].first << " " << table[i].second << " " <<buffer[table[i].second]<<std::endl;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  parallel_for(int i=0;i<ent;i++){
 | 
			
		||||
    rhs._odata[table[i].first]=buffer[table[i].second];
 | 
			
		||||
  }
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
//////////////////////////////////////////////////////
 | 
			
		||||
@@ -228,29 +234,32 @@ template<class vobj> void Copy_plane(Lattice<vobj>& lhs,const Lattice<vobj> &rhs
 | 
			
		||||
  int e1=rhs._grid->_slice_nblock[dimension]; // clearly loop invariant for icpc
 | 
			
		||||
  int e2=rhs._grid->_slice_block[dimension];
 | 
			
		||||
  int stride = rhs._grid->_slice_stride[dimension];
 | 
			
		||||
  if(cbmask == 0x3 ){
 | 
			
		||||
    parallel_for_nest2(int n=0;n<e1;n++){
 | 
			
		||||
      for(int b=0;b<e2;b++){
 | 
			
		||||
  static std::vector<std::pair<int,int> > table; table.resize(e1*e2);
 | 
			
		||||
  int ent=0;
 | 
			
		||||
 | 
			
		||||
  if(cbmask == 0x3 ){
 | 
			
		||||
    for(int n=0;n<e1;n++){
 | 
			
		||||
      for(int b=0;b<e2;b++){
 | 
			
		||||
        int o =n*stride+b;
 | 
			
		||||
  	//lhs._odata[lo+o]=rhs._odata[ro+o];
 | 
			
		||||
	vstream(lhs._odata[lo+o],rhs._odata[ro+o]);
 | 
			
		||||
	table[ent++] = std::pair<int,int>(lo+o,ro+o);
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  } else { 
 | 
			
		||||
    parallel_for_nest2(int n=0;n<e1;n++){
 | 
			
		||||
    for(int n=0;n<e1;n++){
 | 
			
		||||
      for(int b=0;b<e2;b++){
 | 
			
		||||
 
 | 
			
		||||
        int o =n*stride+b;
 | 
			
		||||
        int ocb=1<<lhs._grid->CheckerBoardFromOindex(o);
 | 
			
		||||
        if ( ocb&cbmask ) {
 | 
			
		||||
  	//lhs._odata[lo+o]=rhs._odata[ro+o];
 | 
			
		||||
	  vstream(lhs._odata[lo+o],rhs._odata[ro+o]);
 | 
			
		||||
	  table[ent++] = std::pair<int,int>(lo+o,ro+o);
 | 
			
		||||
	}
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  parallel_for(int i=0;i<ent;i++){
 | 
			
		||||
    lhs._odata[table[i].first]=rhs._odata[table[i].second];
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template<class vobj> void Copy_plane_permute(Lattice<vobj>& lhs,const Lattice<vobj> &rhs, int dimension,int lplane,int rplane,int cbmask,int permute_type)
 | 
			
		||||
@@ -269,16 +278,28 @@ template<class vobj> void Copy_plane_permute(Lattice<vobj>& lhs,const Lattice<vo
 | 
			
		||||
  int e2=rhs._grid->_slice_block [dimension];
 | 
			
		||||
  int stride = rhs._grid->_slice_stride[dimension];
 | 
			
		||||
 | 
			
		||||
  parallel_for_nest2(int n=0;n<e1;n++){
 | 
			
		||||
  for(int b=0;b<e2;b++){
 | 
			
		||||
  static std::vector<std::pair<int,int> > table;  table.resize(e1*e2);
 | 
			
		||||
  int ent=0;
 | 
			
		||||
 | 
			
		||||
  double t_tab,t_perm;
 | 
			
		||||
  if ( cbmask == 0x3 ) {
 | 
			
		||||
    for(int n=0;n<e1;n++){
 | 
			
		||||
    for(int b=0;b<e2;b++){
 | 
			
		||||
      int o  =n*stride;
 | 
			
		||||
      table[ent++] = std::pair<int,int>(lo+o+b,ro+o+b);
 | 
			
		||||
    }}
 | 
			
		||||
  } else {
 | 
			
		||||
    for(int n=0;n<e1;n++){
 | 
			
		||||
    for(int b=0;b<e2;b++){
 | 
			
		||||
      int o  =n*stride;
 | 
			
		||||
      int ocb=1<<lhs._grid->CheckerBoardFromOindex(o+b);
 | 
			
		||||
      if ( ocb&cbmask ) {
 | 
			
		||||
	permute(lhs._odata[lo+o+b],rhs._odata[ro+o+b],permute_type);
 | 
			
		||||
      if ( ocb&cbmask ) table[ent++] = std::pair<int,int>(lo+o+b,ro+o+b);
 | 
			
		||||
    }}
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  }}
 | 
			
		||||
  parallel_for(int i=0;i<ent;i++){
 | 
			
		||||
    permute(lhs._odata[table[i].first],rhs._odata[table[i].second],permute_type);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
//////////////////////////////////////////////////////
 | 
			
		||||
@@ -291,6 +312,8 @@ template<class vobj> void Cshift_local(Lattice<vobj>& ret,const Lattice<vobj> &r
 | 
			
		||||
  sshift[0] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Even);
 | 
			
		||||
  sshift[1] = rhs._grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,Odd);
 | 
			
		||||
 | 
			
		||||
  double t_local;
 | 
			
		||||
  
 | 
			
		||||
  if ( sshift[0] == sshift[1] ) {
 | 
			
		||||
    Cshift_local(ret,rhs,dimension,shift,0x3);
 | 
			
		||||
  } else {
 | 
			
		||||
@@ -299,7 +322,7 @@ template<class vobj> void Cshift_local(Lattice<vobj>& ret,const Lattice<vobj> &r
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template<class vobj> Lattice<vobj> Cshift_local(Lattice<vobj> &ret,const Lattice<vobj> &rhs,int dimension,int shift,int cbmask)
 | 
			
		||||
template<class vobj> void Cshift_local(Lattice<vobj> &ret,const Lattice<vobj> &rhs,int dimension,int shift,int cbmask)
 | 
			
		||||
{
 | 
			
		||||
  GridBase *grid = rhs._grid;
 | 
			
		||||
  int fd = grid->_fdimensions[dimension];
 | 
			
		||||
@@ -326,10 +349,6 @@ template<class vobj> Lattice<vobj> Cshift_local(Lattice<vobj> &ret,const Lattice
 | 
			
		||||
    int sshift = grid->CheckerBoardShiftForCB(rhs.checkerboard,dimension,shift,cb);
 | 
			
		||||
    int sx     = (x+sshift)%rd;
 | 
			
		||||
    
 | 
			
		||||
    // FIXME : This must change where we have a 
 | 
			
		||||
    // Rotate slice.
 | 
			
		||||
    
 | 
			
		||||
    // Document how this works ; why didn't I do this when I first wrote it...
 | 
			
		||||
    // wrap is whether sshift > rd.
 | 
			
		||||
    //  num is sshift mod rd.
 | 
			
		||||
    // 
 | 
			
		||||
@@ -366,9 +385,7 @@ template<class vobj> Lattice<vobj> Cshift_local(Lattice<vobj> &ret,const Lattice
 | 
			
		||||
    if ( permute_slice ) Copy_plane_permute(ret,rhs,dimension,x,sx,cbmask,permute_type_dist);
 | 
			
		||||
    else                 Copy_plane(ret,rhs,dimension,x,sx,cbmask); 
 | 
			
		||||
  
 | 
			
		||||
  
 | 
			
		||||
  }
 | 
			
		||||
  return ret;
 | 
			
		||||
}
 | 
			
		||||
}
 | 
			
		||||
#endif
 | 
			
		||||
 
 | 
			
		||||
@@ -263,6 +263,35 @@ public:
 | 
			
		||||
    _odata=std::move(r._odata);
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  inline Lattice<vobj> & operator = (Lattice<vobj> && r)
 | 
			
		||||
  {
 | 
			
		||||
    _grid        = r._grid;
 | 
			
		||||
    checkerboard = r.checkerboard;
 | 
			
		||||
    _odata       =std::move(r._odata);
 | 
			
		||||
    return *this;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  inline Lattice<vobj> & operator = (const Lattice<vobj> & r){
 | 
			
		||||
    _grid        = r._grid;
 | 
			
		||||
    checkerboard = r.checkerboard;
 | 
			
		||||
    _odata.resize(_grid->oSites());// essential
 | 
			
		||||
    
 | 
			
		||||
    parallel_for(int ss=0;ss<_grid->oSites();ss++){
 | 
			
		||||
      _odata[ss]=r._odata[ss];
 | 
			
		||||
    }  	
 | 
			
		||||
    return *this;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  template<class robj> strong_inline Lattice<vobj> & operator = (const Lattice<robj> & r){
 | 
			
		||||
    this->checkerboard = r.checkerboard;
 | 
			
		||||
    conformable(*this,r);
 | 
			
		||||
    
 | 
			
		||||
    parallel_for(int ss=0;ss<_grid->oSites();ss++){
 | 
			
		||||
      this->_odata[ss]=r._odata[ss];
 | 
			
		||||
    }
 | 
			
		||||
    return *this;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  virtual ~Lattice(void) = default;
 | 
			
		||||
    
 | 
			
		||||
  void reset(GridBase* grid) {
 | 
			
		||||
@@ -281,33 +310,6 @@ public:
 | 
			
		||||
    return *this;
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  template<class robj> strong_inline Lattice<vobj> & operator = (const Lattice<robj> & r){
 | 
			
		||||
    this->checkerboard = r.checkerboard;
 | 
			
		||||
    conformable(*this,r);
 | 
			
		||||
    
 | 
			
		||||
    parallel_for(int ss=0;ss<_grid->oSites();ss++){
 | 
			
		||||
      this->_odata[ss]=r._odata[ss];
 | 
			
		||||
    }
 | 
			
		||||
    return *this;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  strong_inline Lattice<vobj> & operator = (const Lattice<vobj> & r){
 | 
			
		||||
    _grid        = r._grid;
 | 
			
		||||
    checkerboard = r.checkerboard;
 | 
			
		||||
    _odata.resize(_grid->oSites());// essential
 | 
			
		||||
    
 | 
			
		||||
    parallel_for(int ss=0;ss<_grid->oSites();ss++){
 | 
			
		||||
      _odata[ss]=r._odata[ss];
 | 
			
		||||
    }  	
 | 
			
		||||
    return *this;
 | 
			
		||||
  }
 | 
			
		||||
  strong_inline Lattice<vobj> & operator = (Lattice<vobj> && r)
 | 
			
		||||
  {
 | 
			
		||||
    _grid        = r._grid;
 | 
			
		||||
    checkerboard = r.checkerboard;
 | 
			
		||||
    _odata       =std::move(r._odata);
 | 
			
		||||
    return *this;
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  // *=,+=,-= operators inherit behvour from correspond */+/- operation
 | 
			
		||||
  template<class T> strong_inline Lattice<vobj> &operator *=(const T &r) {
 | 
			
		||||
 
 | 
			
		||||
@@ -179,7 +179,7 @@ namespace Grid {
 | 
			
		||||
      return ret;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
#define DECLARE_RELATIONAL(op,functor) \
 | 
			
		||||
#define DECLARE_RELATIONAL_EQ(op,functor) \
 | 
			
		||||
  template<class vsimd,IfSimd<vsimd> = 0>\
 | 
			
		||||
    inline vInteger operator op (const vsimd & lhs, const vsimd & rhs)\
 | 
			
		||||
    {\
 | 
			
		||||
@@ -198,11 +198,6 @@ namespace Grid {
 | 
			
		||||
      typedef typename vsimd::scalar_type scalar;\
 | 
			
		||||
      return Comparison(functor<scalar,scalar>(),lhs,rhs);\
 | 
			
		||||
    }\
 | 
			
		||||
  template<class vsimd,IfSimd<vsimd> = 0>\
 | 
			
		||||
    inline vInteger operator op(const iScalar<vsimd> &lhs,const iScalar<vsimd> &rhs)\
 | 
			
		||||
    {									\
 | 
			
		||||
      return lhs._internal op rhs._internal;				\
 | 
			
		||||
    }									\
 | 
			
		||||
  template<class vsimd>\
 | 
			
		||||
    inline vInteger operator op(const iScalar<vsimd> &lhs,const typename vsimd::scalar_type &rhs) \
 | 
			
		||||
    {									\
 | 
			
		||||
@@ -212,14 +207,21 @@ namespace Grid {
 | 
			
		||||
    inline vInteger operator op(const typename vsimd::scalar_type &lhs,const iScalar<vsimd> &rhs) \
 | 
			
		||||
    {									\
 | 
			
		||||
      return lhs op rhs._internal;					\
 | 
			
		||||
    }									
 | 
			
		||||
    }									\
 | 
			
		||||
 | 
			
		||||
#define DECLARE_RELATIONAL(op,functor) \
 | 
			
		||||
  DECLARE_RELATIONAL_EQ(op,functor)    \
 | 
			
		||||
  template<class vsimd>\
 | 
			
		||||
    inline vInteger operator op(const iScalar<vsimd> &lhs,const iScalar<vsimd> &rhs)\
 | 
			
		||||
    {									\
 | 
			
		||||
      return lhs._internal op rhs._internal;				\
 | 
			
		||||
    }									
 | 
			
		||||
 | 
			
		||||
DECLARE_RELATIONAL(<,slt);
 | 
			
		||||
DECLARE_RELATIONAL(<=,sle);
 | 
			
		||||
DECLARE_RELATIONAL(>,sgt);
 | 
			
		||||
DECLARE_RELATIONAL(>=,sge);
 | 
			
		||||
DECLARE_RELATIONAL(==,seq);
 | 
			
		||||
DECLARE_RELATIONAL_EQ(==,seq);
 | 
			
		||||
DECLARE_RELATIONAL(!=,sne);
 | 
			
		||||
 | 
			
		||||
#undef DECLARE_RELATIONAL
 | 
			
		||||
 
 | 
			
		||||
@@ -110,11 +110,11 @@ class BinaryIO {
 | 
			
		||||
      lsites = 1;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    #pragma omp parallel
 | 
			
		||||
PARALLEL_REGION
 | 
			
		||||
    {
 | 
			
		||||
      uint32_t nersc_csum_thr = 0;
 | 
			
		||||
 | 
			
		||||
      #pragma omp for
 | 
			
		||||
PARALLEL_FOR_LOOP_INTERN
 | 
			
		||||
      for (uint64_t local_site = 0; local_site < lsites; local_site++)
 | 
			
		||||
      {
 | 
			
		||||
        uint32_t *site_buf = (uint32_t *)&fbuf[local_site];
 | 
			
		||||
@@ -124,7 +124,7 @@ class BinaryIO {
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
      #pragma omp critical
 | 
			
		||||
PARALLEL_CRITICAL
 | 
			
		||||
      {
 | 
			
		||||
        nersc_csum += nersc_csum_thr;
 | 
			
		||||
      }
 | 
			
		||||
@@ -146,14 +146,14 @@ class BinaryIO {
 | 
			
		||||
    std::vector<int> local_start =grid->LocalStarts();
 | 
			
		||||
    std::vector<int> global_vol  =grid->FullDimensions();
 | 
			
		||||
 | 
			
		||||
#pragma omp parallel
 | 
			
		||||
PARALLEL_REGION
 | 
			
		||||
    { 
 | 
			
		||||
      std::vector<int> coor(nd);
 | 
			
		||||
      uint32_t scidac_csuma_thr=0;
 | 
			
		||||
      uint32_t scidac_csumb_thr=0;
 | 
			
		||||
      uint32_t site_crc=0;
 | 
			
		||||
 | 
			
		||||
#pragma omp for
 | 
			
		||||
PARALLEL_FOR_LOOP_INTERN
 | 
			
		||||
      for(uint64_t local_site=0;local_site<lsites;local_site++){
 | 
			
		||||
 | 
			
		||||
	uint32_t * site_buf = (uint32_t *)&fbuf[local_site];
 | 
			
		||||
@@ -183,7 +183,7 @@ class BinaryIO {
 | 
			
		||||
	scidac_csumb_thr ^= site_crc<<gsite31 | site_crc>>(32-gsite31);
 | 
			
		||||
      }
 | 
			
		||||
 | 
			
		||||
#pragma omp critical
 | 
			
		||||
PARALLEL_CRITICAL
 | 
			
		||||
      {
 | 
			
		||||
	scidac_csuma^= scidac_csuma_thr;
 | 
			
		||||
	scidac_csumb^= scidac_csumb_thr;
 | 
			
		||||
 
 | 
			
		||||
@@ -40,7 +40,7 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
#define PARALLEL_FOR_LOOP        _Pragma("omp parallel for schedule(static)")
 | 
			
		||||
#define PARALLEL_FOR_LOOP_INTERN _Pragma("omp for schedule(static)")
 | 
			
		||||
#define PARALLEL_NESTED_LOOP2 _Pragma("omp parallel for collapse(2)")
 | 
			
		||||
#define PARALLEL_NESTED_LOOP2 _Pragma("omp parallel for schedule(static) collapse(2)")
 | 
			
		||||
#define PARALLEL_REGION       _Pragma("omp parallel")
 | 
			
		||||
#define PARALLEL_CRITICAL     _Pragma("omp critical")
 | 
			
		||||
#else
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										259
									
								
								tests/Test_compressed_lanczos_hot_start.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										259
									
								
								tests/Test_compressed_lanczos_hot_start.cc
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,259 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./tests/Test_dwf_compressed_lanczos_reorg.cc
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2017
 | 
			
		||||
 | 
			
		||||
Author: Leans heavily on Christoph Lehner's code
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
/*
 | 
			
		||||
 *  Reimplement the badly named "multigrid" lanczos as compressed Lanczos using the features 
 | 
			
		||||
 *  in Grid that were intended to be used to support blocked Aggregates, from
 | 
			
		||||
 */
 | 
			
		||||
#include <Grid/Grid.h>
 | 
			
		||||
#include <Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h>
 | 
			
		||||
#include <Grid/algorithms/iterative/LocalCoherenceLanczos.h>
 | 
			
		||||
 | 
			
		||||
using namespace std;
 | 
			
		||||
using namespace Grid;
 | 
			
		||||
using namespace Grid::QCD;
 | 
			
		||||
 | 
			
		||||
template<class Fobj,class CComplex,int nbasis>
 | 
			
		||||
class LocalCoherenceLanczosScidac : public LocalCoherenceLanczos<Fobj,CComplex,nbasis>
 | 
			
		||||
{ 
 | 
			
		||||
public:
 | 
			
		||||
  typedef iVector<CComplex,nbasis >           CoarseSiteVector;
 | 
			
		||||
  typedef Lattice<CoarseSiteVector>           CoarseField;
 | 
			
		||||
  typedef Lattice<CComplex>   CoarseScalar; // used for inner products on fine field
 | 
			
		||||
  typedef Lattice<Fobj>          FineField;
 | 
			
		||||
 | 
			
		||||
  LocalCoherenceLanczosScidac(GridBase *FineGrid,GridBase *CoarseGrid,
 | 
			
		||||
			      LinearOperatorBase<FineField> &FineOp,
 | 
			
		||||
			      int checkerboard) 
 | 
			
		||||
    // Base constructor
 | 
			
		||||
    : LocalCoherenceLanczos<Fobj,CComplex,nbasis>(FineGrid,CoarseGrid,FineOp,checkerboard) 
 | 
			
		||||
  {};
 | 
			
		||||
 | 
			
		||||
  void checkpointFine(std::string evecs_file,std::string evals_file)
 | 
			
		||||
  {
 | 
			
		||||
#ifdef HAVE_LIME
 | 
			
		||||
    assert(this->subspace.size()==nbasis);
 | 
			
		||||
    emptyUserRecord record;
 | 
			
		||||
    Grid::QCD::ScidacWriter WR(this->_FineGrid->IsBoss());
 | 
			
		||||
    WR.open(evecs_file);
 | 
			
		||||
    for(int k=0;k<nbasis;k++) {
 | 
			
		||||
      WR.writeScidacFieldRecord(this->subspace[k],record);
 | 
			
		||||
    }
 | 
			
		||||
    WR.close();
 | 
			
		||||
    
 | 
			
		||||
    XmlWriter WRx(evals_file);
 | 
			
		||||
    write(WRx,"evals",this->evals_fine);
 | 
			
		||||
#else
 | 
			
		||||
    assert(0);
 | 
			
		||||
#endif
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  void checkpointFineRestore(std::string evecs_file,std::string evals_file)
 | 
			
		||||
  {
 | 
			
		||||
#ifdef HAVE_LIME
 | 
			
		||||
    this->evals_fine.resize(nbasis);
 | 
			
		||||
    this->subspace.resize(nbasis,this->_FineGrid);
 | 
			
		||||
    
 | 
			
		||||
    std::cout << GridLogIRL<< "checkpointFineRestore:  Reading evals from "<<evals_file<<std::endl;
 | 
			
		||||
    XmlReader RDx(evals_file);
 | 
			
		||||
    read(RDx,"evals",this->evals_fine);
 | 
			
		||||
    
 | 
			
		||||
    assert(this->evals_fine.size()==nbasis);
 | 
			
		||||
    
 | 
			
		||||
    std::cout << GridLogIRL<< "checkpointFineRestore:  Reading evecs from "<<evecs_file<<std::endl;
 | 
			
		||||
    emptyUserRecord record;
 | 
			
		||||
    Grid::QCD::ScidacReader RD ;
 | 
			
		||||
    RD.open(evecs_file);
 | 
			
		||||
    for(int k=0;k<nbasis;k++) {
 | 
			
		||||
      this->subspace[k].checkerboard=this->_checkerboard;
 | 
			
		||||
      RD.readScidacFieldRecord(this->subspace[k],record);
 | 
			
		||||
      
 | 
			
		||||
    }
 | 
			
		||||
    RD.close();
 | 
			
		||||
#else
 | 
			
		||||
    assert(0);
 | 
			
		||||
#endif 
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  void checkpointCoarse(std::string evecs_file,std::string evals_file)
 | 
			
		||||
  {
 | 
			
		||||
#ifdef HAVE_LIME
 | 
			
		||||
    int n = this->evec_coarse.size();
 | 
			
		||||
    emptyUserRecord record;
 | 
			
		||||
    Grid::QCD::ScidacWriter WR(this->_CoarseGrid->IsBoss());
 | 
			
		||||
    WR.open(evecs_file);
 | 
			
		||||
    for(int k=0;k<n;k++) {
 | 
			
		||||
      WR.writeScidacFieldRecord(this->evec_coarse[k],record);
 | 
			
		||||
    }
 | 
			
		||||
    WR.close();
 | 
			
		||||
    
 | 
			
		||||
    XmlWriter WRx(evals_file);
 | 
			
		||||
    write(WRx,"evals",this->evals_coarse);
 | 
			
		||||
#else
 | 
			
		||||
    assert(0);
 | 
			
		||||
#endif
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  void checkpointCoarseRestore(std::string evecs_file,std::string evals_file,int nvec)
 | 
			
		||||
  {
 | 
			
		||||
#ifdef HAVE_LIME
 | 
			
		||||
    std::cout << "resizing coarse vecs to " << nvec<< std::endl;
 | 
			
		||||
    this->evals_coarse.resize(nvec);
 | 
			
		||||
    this->evec_coarse.resize(nvec,this->_CoarseGrid);
 | 
			
		||||
    std::cout << GridLogIRL<< "checkpointCoarseRestore:  Reading evals from "<<evals_file<<std::endl;
 | 
			
		||||
    XmlReader RDx(evals_file);
 | 
			
		||||
    read(RDx,"evals",this->evals_coarse);
 | 
			
		||||
 | 
			
		||||
    assert(this->evals_coarse.size()==nvec);
 | 
			
		||||
    emptyUserRecord record;
 | 
			
		||||
    std::cout << GridLogIRL<< "checkpointCoarseRestore:  Reading evecs from "<<evecs_file<<std::endl;
 | 
			
		||||
    Grid::QCD::ScidacReader RD ;
 | 
			
		||||
    RD.open(evecs_file);
 | 
			
		||||
    for(int k=0;k<nvec;k++) {
 | 
			
		||||
      RD.readScidacFieldRecord(this->evec_coarse[k],record);
 | 
			
		||||
    }
 | 
			
		||||
    RD.close();
 | 
			
		||||
#else 
 | 
			
		||||
    assert(0);
 | 
			
		||||
#endif
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
int main (int argc, char ** argv) {
 | 
			
		||||
 | 
			
		||||
  Grid_init(&argc,&argv);
 | 
			
		||||
  GridLogIRL.TimingMode(1);
 | 
			
		||||
 | 
			
		||||
  LocalCoherenceLanczosParams Params;
 | 
			
		||||
  {
 | 
			
		||||
    Params.omega.resize(10);
 | 
			
		||||
    Params.blockSize.resize(5);
 | 
			
		||||
    XmlWriter writer("Params_template.xml");
 | 
			
		||||
    write(writer,"Params",Params);
 | 
			
		||||
    std::cout << GridLogMessage << " Written Params_template.xml" <<std::endl;
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  { 
 | 
			
		||||
    XmlReader reader(std::string("./Params.xml"));
 | 
			
		||||
    read(reader, "Params", Params);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  int     Ls = (int)Params.omega.size();
 | 
			
		||||
  RealD mass = Params.mass;
 | 
			
		||||
  RealD M5   = Params.M5;
 | 
			
		||||
  std::vector<int> blockSize = Params.blockSize;
 | 
			
		||||
  std::vector<int> latt({16,16,16,16});
 | 
			
		||||
  uint64_t     vol = Ls*latt[0]*latt[1]*latt[2]*latt[3];
 | 
			
		||||
  double   mat_flop= 2.0*1320.0*vol;    
 | 
			
		||||
  // Grids
 | 
			
		||||
  GridCartesian         * UGrid     = SpaceTimeGrid::makeFourDimGrid(latt,
 | 
			
		||||
								     GridDefaultSimd(Nd,vComplex::Nsimd()),
 | 
			
		||||
								     GridDefaultMpi());
 | 
			
		||||
  GridRedBlackCartesian * UrbGrid   = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
 | 
			
		||||
  GridCartesian         * FGrid     = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
 | 
			
		||||
  GridRedBlackCartesian * FrbGrid   = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
 | 
			
		||||
 | 
			
		||||
  std::vector<int> fineLatt     = latt;
 | 
			
		||||
  int dims=fineLatt.size();
 | 
			
		||||
  assert(blockSize.size()==dims+1);
 | 
			
		||||
  std::vector<int> coarseLatt(dims);
 | 
			
		||||
  std::vector<int> coarseLatt5d ;
 | 
			
		||||
 | 
			
		||||
  for (int d=0;d<coarseLatt.size();d++){
 | 
			
		||||
    coarseLatt[d] = fineLatt[d]/blockSize[d];    assert(coarseLatt[d]*blockSize[d]==fineLatt[d]);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  std::cout << GridLogMessage<< " 5d coarse lattice is ";
 | 
			
		||||
  for (int i=0;i<coarseLatt.size();i++){
 | 
			
		||||
    std::cout << coarseLatt[i]<<"x";
 | 
			
		||||
  } 
 | 
			
		||||
  int cLs = Ls/blockSize[dims]; assert(cLs*blockSize[dims]==Ls);
 | 
			
		||||
  std::cout << cLs<<std::endl;
 | 
			
		||||
  
 | 
			
		||||
  GridCartesian         * CoarseGrid4    = SpaceTimeGrid::makeFourDimGrid(coarseLatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
 | 
			
		||||
  GridRedBlackCartesian * CoarseGrid4rb  = SpaceTimeGrid::makeFourDimRedBlackGrid(CoarseGrid4);
 | 
			
		||||
  GridCartesian         * CoarseGrid5    = SpaceTimeGrid::makeFiveDimGrid(cLs,CoarseGrid4);
 | 
			
		||||
 | 
			
		||||
  // Gauge field
 | 
			
		||||
  std::vector<int> seeds4({1,2,3,4});
 | 
			
		||||
  GridParallelRNG          RNG4(UGrid);  RNG4.SeedFixedIntegers(seeds4);
 | 
			
		||||
  LatticeGaugeField Umu(UGrid);
 | 
			
		||||
  SU3::HotConfiguration(RNG4,Umu);
 | 
			
		||||
  //  FieldMetaData header;
 | 
			
		||||
  //  NerscIO::readConfiguration(Umu,header,Params.config);
 | 
			
		||||
 | 
			
		||||
  std::cout << GridLogMessage << "Lattice dimensions: " << latt << "   Ls: " << Ls << std::endl;
 | 
			
		||||
 | 
			
		||||
  // ZMobius EO Operator
 | 
			
		||||
  ZMobiusFermionR Ddwf(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mass, M5, Params.omega,1.,0.);
 | 
			
		||||
  SchurDiagTwoOperator<ZMobiusFermionR,LatticeFermion> HermOp(Ddwf);
 | 
			
		||||
 | 
			
		||||
  // Eigenvector storage
 | 
			
		||||
  LanczosParams fine  =Params.FineParams;  
 | 
			
		||||
  LanczosParams coarse=Params.CoarseParams;  
 | 
			
		||||
 | 
			
		||||
  const int Ns1 = fine.Nstop;   const int Ns2 = coarse.Nstop;
 | 
			
		||||
  const int Nk1 = fine.Nk;      const int Nk2 = coarse.Nk;
 | 
			
		||||
  const int Nm1 = fine.Nm;      const int Nm2 = coarse.Nm;
 | 
			
		||||
 | 
			
		||||
  std::cout << GridLogMessage << "Keep " << fine.Nstop   << " fine   vectors" << std::endl;
 | 
			
		||||
  std::cout << GridLogMessage << "Keep " << coarse.Nstop << " coarse vectors" << std::endl;
 | 
			
		||||
  assert(Nm2 >= Nm1);
 | 
			
		||||
 | 
			
		||||
  const int nbasis= 60;
 | 
			
		||||
  assert(nbasis==Ns1);
 | 
			
		||||
  LocalCoherenceLanczosScidac<vSpinColourVector,vTComplex,nbasis> _LocalCoherenceLanczos(FrbGrid,CoarseGrid5,HermOp,Odd);
 | 
			
		||||
  std::cout << GridLogMessage << "Constructed LocalCoherenceLanczos" << std::endl;
 | 
			
		||||
 | 
			
		||||
  assert( (Params.doFine)||(Params.doFineRead));
 | 
			
		||||
 | 
			
		||||
  if ( Params.doFine ) { 
 | 
			
		||||
    std::cout << GridLogMessage << "Performing fine grid IRL Nstop "<< Ns1 << " Nk "<<Nk1<<" Nm "<<Nm1<< std::endl;
 | 
			
		||||
    double t0=-usecond();
 | 
			
		||||
    _LocalCoherenceLanczos.calcFine(fine.Cheby,
 | 
			
		||||
		 fine.Nstop,fine.Nk,fine.Nm,
 | 
			
		||||
		 fine.resid,fine.MaxIt, 
 | 
			
		||||
		 fine.betastp,fine.MinRes);
 | 
			
		||||
    t0+=usecond();
 | 
			
		||||
 | 
			
		||||
    double t1=-usecond();
 | 
			
		||||
    if ( Params.saveEvecs ) {
 | 
			
		||||
      std::cout << GridLogIRL<<"Checkpointing Fine evecs"<<std::endl;
 | 
			
		||||
      _LocalCoherenceLanczos.checkpointFine(std::string("evecs.scidac"),std::string("evals.xml"));
 | 
			
		||||
    }
 | 
			
		||||
    t1+=usecond();
 | 
			
		||||
 | 
			
		||||
    std::cout << GridLogMessage << "Computation time is " << (t0)/1.0e6 <<" seconds"<<std::endl;
 | 
			
		||||
    if ( Params.saveEvecs )  std::cout << GridLogMessage << "I/O         time is " << (t1)/1.0e6 <<" seconds"<<std::endl;
 | 
			
		||||
    std::cout << GridLogMessage << "Time to solution is " << (t1+t0)/1.0e6 <<" seconds"<<std::endl;
 | 
			
		||||
    std::cout << GridLogMessage << "Done"<<std::endl;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  Grid_finalize();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										253
									
								
								tests/lanczos/Test_compressed_lanczos.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										253
									
								
								tests/lanczos/Test_compressed_lanczos.cc
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,253 @@
 | 
			
		||||
    /*************************************************************************************
 | 
			
		||||
 | 
			
		||||
    Grid physics library, www.github.com/paboyle/Grid 
 | 
			
		||||
 | 
			
		||||
    Source file: ./tests/Test_dwf_compressed_lanczos_reorg.cc
 | 
			
		||||
 | 
			
		||||
    Copyright (C) 2017
 | 
			
		||||
 | 
			
		||||
Author: Leans heavily on Christoph Lehner's code
 | 
			
		||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
 | 
			
		||||
 | 
			
		||||
    This program is free software; you can redistribute it and/or modify
 | 
			
		||||
    it under the terms of the GNU General Public License as published by
 | 
			
		||||
    the Free Software Foundation; either version 2 of the License, or
 | 
			
		||||
    (at your option) any later version.
 | 
			
		||||
 | 
			
		||||
    This program is distributed in the hope that it will be useful,
 | 
			
		||||
    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
			
		||||
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
			
		||||
    GNU General Public License for more details.
 | 
			
		||||
 | 
			
		||||
    You should have received a copy of the GNU General Public License along
 | 
			
		||||
    with this program; if not, write to the Free Software Foundation, Inc.,
 | 
			
		||||
    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
 | 
			
		||||
 | 
			
		||||
    See the full license in the file "LICENSE" in the top level distribution directory
 | 
			
		||||
    *************************************************************************************/
 | 
			
		||||
    /*  END LEGAL */
 | 
			
		||||
/*
 | 
			
		||||
 *  Reimplement the badly named "multigrid" lanczos as compressed Lanczos using the features 
 | 
			
		||||
 *  in Grid that were intended to be used to support blocked Aggregates, from
 | 
			
		||||
 */
 | 
			
		||||
#include <Grid/Grid.h>
 | 
			
		||||
#include <Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h>
 | 
			
		||||
#include <Grid/algorithms/iterative/LocalCoherenceLanczos.h>
 | 
			
		||||
 | 
			
		||||
using namespace std;
 | 
			
		||||
using namespace Grid;
 | 
			
		||||
using namespace Grid::QCD;
 | 
			
		||||
 | 
			
		||||
template<class Fobj,class CComplex,int nbasis>
 | 
			
		||||
class LocalCoherenceLanczosScidac : public LocalCoherenceLanczos<Fobj,CComplex,nbasis>
 | 
			
		||||
{ 
 | 
			
		||||
public:
 | 
			
		||||
  typedef iVector<CComplex,nbasis >           CoarseSiteVector;
 | 
			
		||||
  typedef Lattice<CoarseSiteVector>           CoarseField;
 | 
			
		||||
  typedef Lattice<CComplex>   CoarseScalar; // used for inner products on fine field
 | 
			
		||||
  typedef Lattice<Fobj>          FineField;
 | 
			
		||||
 | 
			
		||||
  LocalCoherenceLanczosScidac(GridBase *FineGrid,GridBase *CoarseGrid,
 | 
			
		||||
			      LinearOperatorBase<FineField> &FineOp,
 | 
			
		||||
			      int checkerboard) 
 | 
			
		||||
    // Base constructor
 | 
			
		||||
    : LocalCoherenceLanczos<Fobj,CComplex,nbasis>(FineGrid,CoarseGrid,FineOp,checkerboard) 
 | 
			
		||||
  {};
 | 
			
		||||
 | 
			
		||||
  void checkpointFine(std::string evecs_file,std::string evals_file)
 | 
			
		||||
  {
 | 
			
		||||
    assert(this->subspace.size()==nbasis);
 | 
			
		||||
    emptyUserRecord record;
 | 
			
		||||
    Grid::QCD::ScidacWriter WR(this->_FineGrid->IsBoss());
 | 
			
		||||
    WR.open(evecs_file);
 | 
			
		||||
    for(int k=0;k<nbasis;k++) {
 | 
			
		||||
      WR.writeScidacFieldRecord(this->subspace[k],record);
 | 
			
		||||
    }
 | 
			
		||||
    WR.close();
 | 
			
		||||
    
 | 
			
		||||
    XmlWriter WRx(evals_file);
 | 
			
		||||
    write(WRx,"evals",this->evals_fine);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  void checkpointFineRestore(std::string evecs_file,std::string evals_file)
 | 
			
		||||
  {
 | 
			
		||||
    this->evals_fine.resize(nbasis);
 | 
			
		||||
    this->subspace.resize(nbasis,this->_FineGrid);
 | 
			
		||||
    
 | 
			
		||||
    std::cout << GridLogIRL<< "checkpointFineRestore:  Reading evals from "<<evals_file<<std::endl;
 | 
			
		||||
    XmlReader RDx(evals_file);
 | 
			
		||||
    read(RDx,"evals",this->evals_fine);
 | 
			
		||||
    
 | 
			
		||||
    assert(this->evals_fine.size()==nbasis);
 | 
			
		||||
    
 | 
			
		||||
    std::cout << GridLogIRL<< "checkpointFineRestore:  Reading evecs from "<<evecs_file<<std::endl;
 | 
			
		||||
    emptyUserRecord record;
 | 
			
		||||
    Grid::QCD::ScidacReader RD ;
 | 
			
		||||
    RD.open(evecs_file);
 | 
			
		||||
    for(int k=0;k<nbasis;k++) {
 | 
			
		||||
      this->subspace[k].checkerboard=this->_checkerboard;
 | 
			
		||||
      RD.readScidacFieldRecord(this->subspace[k],record);
 | 
			
		||||
      
 | 
			
		||||
    }
 | 
			
		||||
    RD.close();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  void checkpointCoarse(std::string evecs_file,std::string evals_file)
 | 
			
		||||
  {
 | 
			
		||||
    int n = this->evec_coarse.size();
 | 
			
		||||
    emptyUserRecord record;
 | 
			
		||||
    Grid::QCD::ScidacWriter WR(this->_CoarseGrid->IsBoss());
 | 
			
		||||
    WR.open(evecs_file);
 | 
			
		||||
    for(int k=0;k<n;k++) {
 | 
			
		||||
      WR.writeScidacFieldRecord(this->evec_coarse[k],record);
 | 
			
		||||
    }
 | 
			
		||||
    WR.close();
 | 
			
		||||
    
 | 
			
		||||
    XmlWriter WRx(evals_file);
 | 
			
		||||
    write(WRx,"evals",this->evals_coarse);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  void checkpointCoarseRestore(std::string evecs_file,std::string evals_file,int nvec)
 | 
			
		||||
  {
 | 
			
		||||
    std::cout << "resizing coarse vecs to " << nvec<< std::endl;
 | 
			
		||||
    this->evals_coarse.resize(nvec);
 | 
			
		||||
    this->evec_coarse.resize(nvec,this->_CoarseGrid);
 | 
			
		||||
    std::cout << GridLogIRL<< "checkpointCoarseRestore:  Reading evals from "<<evals_file<<std::endl;
 | 
			
		||||
    XmlReader RDx(evals_file);
 | 
			
		||||
    read(RDx,"evals",this->evals_coarse);
 | 
			
		||||
 | 
			
		||||
    assert(this->evals_coarse.size()==nvec);
 | 
			
		||||
    emptyUserRecord record;
 | 
			
		||||
    std::cout << GridLogIRL<< "checkpointCoarseRestore:  Reading evecs from "<<evecs_file<<std::endl;
 | 
			
		||||
    Grid::QCD::ScidacReader RD ;
 | 
			
		||||
    RD.open(evecs_file);
 | 
			
		||||
    for(int k=0;k<nvec;k++) {
 | 
			
		||||
      RD.readScidacFieldRecord(this->evec_coarse[k],record);
 | 
			
		||||
    }
 | 
			
		||||
    RD.close();
 | 
			
		||||
  }
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
int main (int argc, char ** argv) {
 | 
			
		||||
 | 
			
		||||
  Grid_init(&argc,&argv);
 | 
			
		||||
  GridLogIRL.TimingMode(1);
 | 
			
		||||
 | 
			
		||||
  LocalCoherenceLanczosParams Params;
 | 
			
		||||
  {
 | 
			
		||||
    Params.omega.resize(10);
 | 
			
		||||
    Params.blockSize.resize(5);
 | 
			
		||||
    XmlWriter writer("Params_template.xml");
 | 
			
		||||
    write(writer,"Params",Params);
 | 
			
		||||
    std::cout << GridLogMessage << " Written Params_template.xml" <<std::endl;
 | 
			
		||||
  }
 | 
			
		||||
  
 | 
			
		||||
  { 
 | 
			
		||||
    XmlReader reader(std::string("./Params.xml"));
 | 
			
		||||
    read(reader, "Params", Params);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  int     Ls = (int)Params.omega.size();
 | 
			
		||||
  RealD mass = Params.mass;
 | 
			
		||||
  RealD M5   = Params.M5;
 | 
			
		||||
  std::vector<int> blockSize = Params.blockSize;
 | 
			
		||||
 | 
			
		||||
  // Grids
 | 
			
		||||
  GridCartesian         * UGrid     = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(),
 | 
			
		||||
								     GridDefaultSimd(Nd,vComplex::Nsimd()),
 | 
			
		||||
								     GridDefaultMpi());
 | 
			
		||||
  GridRedBlackCartesian * UrbGrid   = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
 | 
			
		||||
  GridCartesian         * FGrid     = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
 | 
			
		||||
  GridRedBlackCartesian * FrbGrid   = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
 | 
			
		||||
 | 
			
		||||
  std::vector<int> fineLatt     = GridDefaultLatt();
 | 
			
		||||
  int dims=fineLatt.size();
 | 
			
		||||
  assert(blockSize.size()==dims+1);
 | 
			
		||||
  std::vector<int> coarseLatt(dims);
 | 
			
		||||
  std::vector<int> coarseLatt5d ;
 | 
			
		||||
 | 
			
		||||
  for (int d=0;d<coarseLatt.size();d++){
 | 
			
		||||
    coarseLatt[d] = fineLatt[d]/blockSize[d];    assert(coarseLatt[d]*blockSize[d]==fineLatt[d]);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  std::cout << GridLogMessage<< " 5d coarse lattice is ";
 | 
			
		||||
  for (int i=0;i<coarseLatt.size();i++){
 | 
			
		||||
    std::cout << coarseLatt[i]<<"x";
 | 
			
		||||
  } 
 | 
			
		||||
  int cLs = Ls/blockSize[dims]; assert(cLs*blockSize[dims]==Ls);
 | 
			
		||||
  std::cout << cLs<<std::endl;
 | 
			
		||||
  
 | 
			
		||||
  GridCartesian         * CoarseGrid4    = SpaceTimeGrid::makeFourDimGrid(coarseLatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
 | 
			
		||||
  GridRedBlackCartesian * CoarseGrid4rb  = SpaceTimeGrid::makeFourDimRedBlackGrid(CoarseGrid4);
 | 
			
		||||
  GridCartesian         * CoarseGrid5    = SpaceTimeGrid::makeFiveDimGrid(cLs,CoarseGrid4);
 | 
			
		||||
 | 
			
		||||
  // Gauge field
 | 
			
		||||
  LatticeGaugeField Umu(UGrid);
 | 
			
		||||
  FieldMetaData header;
 | 
			
		||||
  NerscIO::readConfiguration(Umu,header,Params.config);
 | 
			
		||||
  std::cout << GridLogMessage << "Lattice dimensions: " << GridDefaultLatt() << "   Ls: " << Ls << std::endl;
 | 
			
		||||
 | 
			
		||||
  // ZMobius EO Operator
 | 
			
		||||
  ZMobiusFermionR Ddwf(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, mass, M5, Params.omega,1.,0.);
 | 
			
		||||
  SchurDiagTwoOperator<ZMobiusFermionR,LatticeFermion> HermOp(Ddwf);
 | 
			
		||||
 | 
			
		||||
  // Eigenvector storage
 | 
			
		||||
  LanczosParams fine  =Params.FineParams;  
 | 
			
		||||
  LanczosParams coarse=Params.CoarseParams;  
 | 
			
		||||
 | 
			
		||||
  const int Ns1 = fine.Nstop;   const int Ns2 = coarse.Nstop;
 | 
			
		||||
  const int Nk1 = fine.Nk;      const int Nk2 = coarse.Nk;
 | 
			
		||||
  const int Nm1 = fine.Nm;      const int Nm2 = coarse.Nm;
 | 
			
		||||
 | 
			
		||||
  std::cout << GridLogMessage << "Keep " << fine.Nstop   << " fine   vectors" << std::endl;
 | 
			
		||||
  std::cout << GridLogMessage << "Keep " << coarse.Nstop << " coarse vectors" << std::endl;
 | 
			
		||||
  assert(Nm2 >= Nm1);
 | 
			
		||||
 | 
			
		||||
  const int nbasis= 60;
 | 
			
		||||
  assert(nbasis==Ns1);
 | 
			
		||||
  LocalCoherenceLanczosScidac<vSpinColourVector,vTComplex,nbasis> _LocalCoherenceLanczos(FrbGrid,CoarseGrid5,HermOp,Odd);
 | 
			
		||||
  std::cout << GridLogMessage << "Constructed LocalCoherenceLanczos" << std::endl;
 | 
			
		||||
 | 
			
		||||
  assert( (Params.doFine)||(Params.doFineRead));
 | 
			
		||||
 | 
			
		||||
  if ( Params.doFine ) { 
 | 
			
		||||
    std::cout << GridLogMessage << "Performing fine grid IRL Nstop "<< Ns1 << " Nk "<<Nk1<<" Nm "<<Nm1<< std::endl;
 | 
			
		||||
    _LocalCoherenceLanczos.calcFine(fine.Cheby,
 | 
			
		||||
		 fine.Nstop,fine.Nk,fine.Nm,
 | 
			
		||||
		 fine.resid,fine.MaxIt, 
 | 
			
		||||
		 fine.betastp,fine.MinRes);
 | 
			
		||||
 | 
			
		||||
    std::cout << GridLogIRL<<"Checkpointing Fine evecs"<<std::endl;
 | 
			
		||||
    _LocalCoherenceLanczos.checkpointFine(std::string("evecs.scidac"),std::string("evals.xml"));
 | 
			
		||||
    _LocalCoherenceLanczos.testFine(fine.resid*100.0); // Coarse check
 | 
			
		||||
    std::cout << GridLogIRL<<"Orthogonalising"<<std::endl;
 | 
			
		||||
    _LocalCoherenceLanczos.Orthogonalise();
 | 
			
		||||
    std::cout << GridLogIRL<<"Orthogonaled"<<std::endl;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if ( Params.doFineRead ) { 
 | 
			
		||||
    _LocalCoherenceLanczos.checkpointFineRestore(std::string("evecs.scidac"),std::string("evals.xml"));
 | 
			
		||||
    _LocalCoherenceLanczos.testFine(fine.resid*100.0); // Coarse check
 | 
			
		||||
    _LocalCoherenceLanczos.Orthogonalise();
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if ( Params.doCoarse ) {
 | 
			
		||||
    std::cout << GridLogMessage << "Performing coarse grid IRL Nstop "<< Ns2<< " Nk "<<Nk2<<" Nm "<<Nm2<< std::endl;
 | 
			
		||||
    _LocalCoherenceLanczos.calcCoarse(coarse.Cheby,Params.Smoother,Params.coarse_relax_tol,
 | 
			
		||||
			      coarse.Nstop, coarse.Nk,coarse.Nm,
 | 
			
		||||
			      coarse.resid, coarse.MaxIt, 
 | 
			
		||||
			      coarse.betastp,coarse.MinRes);
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    std::cout << GridLogIRL<<"Checkpointing coarse evecs"<<std::endl;
 | 
			
		||||
    _LocalCoherenceLanczos.checkpointCoarse(std::string("evecs.coarse.scidac"),std::string("evals.coarse.xml"));
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if ( Params.doCoarseRead ) {
 | 
			
		||||
    // Verify we can reread ???
 | 
			
		||||
    _LocalCoherenceLanczos.checkpointCoarseRestore(std::string("evecs.coarse.scidac"),std::string("evals.coarse.xml"),coarse.Nstop);
 | 
			
		||||
    _LocalCoherenceLanczos.testCoarse(coarse.resid*100.0,Params.Smoother,Params.coarse_relax_tol); // Coarse check
 | 
			
		||||
  }
 | 
			
		||||
  Grid_finalize();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
		Reference in New Issue
	
	Block a user