mirror of
				https://github.com/paboyle/Grid.git
				synced 2025-10-31 03:54:33 +00:00 
			
		
		
		
	Compare commits
	
		
			80 Commits
		
	
	
		
			release/v0
			...
			feature/la
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|  | 9e56c65730 | ||
|  | ef4f2b8c41 | ||
|  | e8b95bd35b | ||
|  | 7e35286860 | ||
|  | 0486ff8e79 | ||
|  | e9cc21900f | ||
|  | 0a8faac271 | ||
|  | abc4de0fd2 | ||
|  | cfe3cd76d1 | ||
|  | 3fa5e3109f | ||
|  | 8b7049f737 | ||
|  | c85024683e | ||
|  | 1300b0b04b | ||
|  | e6d984b484 | ||
|  | 1d18d95d4f | ||
|  | ae39ec85a3 | ||
|  | b96daf53a0 | ||
|  | 46879e1658 | ||
|  | ae4de94798 | ||
|  | 0ab555b4f5 | ||
|  | 8e9be9f84f | ||
|  | d572170170 | ||
|  | 12ccc73cf5 | ||
|  | e7564f8330 | ||
|  | 91199a8ea0 | ||
|  | 0494feec98 | ||
|  | a16b1e134e | ||
|  | 769ad578f5 | ||
|  | eaac0044b5 | ||
|  | 56042f002c | ||
|  | 3bfd1f13e6 | ||
|  | 70ab598c96 | ||
|  | 1d0ca65e28 | ||
|  | 2bc4d0a20e | ||
|  | 092dcd4e04 | ||
|  | 4a8c4ccfba | ||
|  | 9b44189d5a | ||
|  | 7da4856e8e | ||
|  | aaf1e33a77 | ||
|  | 094c3d091a | ||
|  | 4b98e524a0 | ||
|  | 1a1f6d55f9 | ||
|  | 21421656ab | ||
|  | 6f687a67cd | ||
|  | b30754e762 | ||
|  | 1e429a0d57 | ||
|  | d38a4de36c | ||
|  | ef1b7db374 | ||
|  | 53a9aeb965 | ||
|  | e30fa9f4b8 | ||
|  | 58e8d0a10d | ||
|  | 62cf9cf638 | ||
|  | 0fb458879d | ||
|  | 725c513d94 | ||
| d8648307ff | |||
| 064315c00b | |||
|  | 7c6cc85df6 | ||
|  | a6691ef87c | ||
|  | 8e0ced627a | ||
|  | 0de314870d | ||
|  | ffb91e53d2 | ||
|  | f4e8bf2858 | ||
| a74c34315c | |||
|  | 69470ccc10 | ||
|  | b8b5934193 | ||
|  | 75856f2945 | ||
|  | 3c112a7a25 | ||
|  | ab3596d4d3 | ||
|  | a8c10b1933 | ||
|  | 15e801af3f | ||
|  | 0ffc235741 | ||
|  | 8e19c99c7d | ||
|  | a0bc0ad06f | ||
|  | a8fb2835ca | ||
|  | bc862ce3ab | ||
|  | 3267683e22 | ||
|  | f46a67ffb3 | ||
|  | f7b8383ef5 | ||
|  | 10f2872aae | ||
|  | cd73897b8d | 
							
								
								
									
										31
									
								
								TODO
									
									
									
									
									
								
							
							
						
						
									
										31
									
								
								TODO
									
									
									
									
									
								
							| @@ -1,23 +1,30 @@ | ||||
| TODO: | ||||
| --------------- | ||||
|  | ||||
| Peter's work list: | ||||
| 2)- Precision conversion and sort out localConvert      <--  | ||||
| 3)- Remove DenseVector, DenseMatrix; Use Eigen instead. <-- started  | ||||
| 4)- Binary I/O speed up & x-strips | ||||
| -- Profile CG, BlockCG, etc... Flop count/rate -- PARTIAL, time but no flop/s yet | ||||
| -- Physical propagator interface | ||||
| -- Conserved currents | ||||
| -- GaugeFix into central location | ||||
| -- Multigrid Wilson and DWF, compare to other Multigrid implementations | ||||
| -- HDCR resume | ||||
| Large item work list: | ||||
| 1)- MultiRHS with spread out extra dim -- Go through filesystem with SciDAC I/O | ||||
|  | ||||
| 2)- Christoph's local basis expansion Lanczos | ||||
| 3)- BG/Q port and check | ||||
| 4)- Precision conversion and sort out localConvert      <-- partial | ||||
|   - Consistent linear solver flop count/rate -- PARTIAL, time but no flop/s yet | ||||
| 5)- Physical propagator interface | ||||
| 6)- Conserved currents | ||||
| 7)- Multigrid Wilson and DWF, compare to other Multigrid implementations | ||||
| 8)- HDCR resume | ||||
|  | ||||
| Recent DONE  | ||||
| -- Lanczos Remove DenseVector, DenseMatrix; Use Eigen instead. <-- DONE | ||||
| -- GaugeFix into central location                      <-- DONE | ||||
| -- Scidac and Ildg metadata handling                   <-- DONE | ||||
| -- Binary I/O MPI2 IO                                  <-- DONE | ||||
| -- Binary I/O speed up & x-strips                      <-- DONE | ||||
| -- Cut down the exterior overhead                      <-- DONE | ||||
| -- Interior legs from SHM comms                        <-- DONE | ||||
| -- Half-precision comms                                <-- DONE | ||||
| -- Merge high precision reduction into develop         | ||||
| -- multiRHS DWF; benchmark on Cori/BNL for comms elimination | ||||
| -- Merge high precision reduction into develop         <-- DONE | ||||
| -- BlockCG, BCGrQ                                      <-- DONE | ||||
| -- multiRHS DWF; benchmark on Cori/BNL for comms elimination <-- DONE | ||||
|    -- slice* linalg routines for multiRHS, BlockCG     | ||||
|  | ||||
| ----- | ||||
|   | ||||
| @@ -31,6 +31,32 @@ using namespace std; | ||||
| using namespace Grid; | ||||
| using namespace Grid::QCD; | ||||
|  | ||||
| struct time_statistics{ | ||||
|   double mean; | ||||
|   double err; | ||||
|   double min; | ||||
|   double max; | ||||
|  | ||||
|   void statistics(std::vector<double> v){ | ||||
|       double sum = std::accumulate(v.begin(), v.end(), 0.0); | ||||
|       mean = sum / v.size(); | ||||
|  | ||||
|       std::vector<double> diff(v.size()); | ||||
|       std::transform(v.begin(), v.end(), diff.begin(), [=](double x) { return x - mean; }); | ||||
|       double sq_sum = std::inner_product(diff.begin(), diff.end(), diff.begin(), 0.0); | ||||
|       err = std::sqrt(sq_sum / (v.size()*(v.size() - 1))); | ||||
|  | ||||
|       auto result = std::minmax_element(v.begin(), v.end()); | ||||
|       min = *result.first; | ||||
|       max = *result.second; | ||||
| } | ||||
| }; | ||||
|  | ||||
| void header(){ | ||||
|   std::cout <<GridLogMessage << " L  "<<"\t"<<" Ls  "<<"\t" | ||||
|             <<std::setw(11)<<"bytes"<<"MB/s uni (err/min/max)"<<"\t\t"<<"MB/s bidi (err/min/max)"<<std::endl; | ||||
| }; | ||||
|  | ||||
| int main (int argc, char ** argv) | ||||
| { | ||||
|   Grid_init(&argc,&argv); | ||||
| @@ -40,15 +66,19 @@ int main (int argc, char ** argv) | ||||
|   int threads = GridThread::GetThreads(); | ||||
|   std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl; | ||||
|  | ||||
|   int Nloop=10; | ||||
|   int Nloop=100; | ||||
|   int nmu=0; | ||||
|   int maxlat=24; | ||||
|   for(int mu=0;mu<Nd;mu++) if (mpi_layout[mu]>1) nmu++; | ||||
|  | ||||
|   std::cout << GridLogMessage << "Number of iterations to average: "<< Nloop << std::endl; | ||||
|   std::vector<double> t_time(Nloop); | ||||
|   time_statistics timestat; | ||||
|  | ||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||
|   std::cout<<GridLogMessage << "= Benchmarking concurrent halo exchange in "<<nmu<<" dimensions"<<std::endl; | ||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||
|   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<" Ls  "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl; | ||||
|   int maxlat=24; | ||||
|   header(); | ||||
|   for(int lat=4;lat<=maxlat;lat+=4){ | ||||
|     for(int Ls=8;Ls<=32;Ls*=2){ | ||||
|  | ||||
| @@ -58,6 +88,9 @@ int main (int argc, char ** argv) | ||||
|       				    lat*mpi_layout[3]}); | ||||
|  | ||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||
|       RealD Nrank = Grid._Nprocessors; | ||||
|       RealD Nnode = Grid.NodeCount(); | ||||
|       RealD ppn = Nrank/Nnode; | ||||
|  | ||||
|       std::vector<std::vector<HalfSpinColourVectorD> > xbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls)); | ||||
|       std::vector<std::vector<HalfSpinColourVectorD> > rbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls)); | ||||
| @@ -65,8 +98,8 @@ int main (int argc, char ** argv) | ||||
|       int ncomm; | ||||
|       int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); | ||||
|  | ||||
|       double start=usecond(); | ||||
|       for(int i=0;i<Nloop;i++){ | ||||
|       double start=usecond(); | ||||
|  | ||||
| 	std::vector<CartesianCommunicator::CommsRequest_t> requests; | ||||
|  | ||||
| @@ -102,18 +135,24 @@ int main (int argc, char ** argv) | ||||
| 	} | ||||
| 	Grid.SendToRecvFromComplete(requests); | ||||
| 	Grid.Barrier(); | ||||
|  | ||||
| 	double stop=usecond(); | ||||
| 	t_time[i] = stop-start; // microseconds | ||||
|       } | ||||
|       double stop=usecond(); | ||||
|  | ||||
|       double dbytes    = bytes; | ||||
|       double xbytes    = Nloop*dbytes*2.0*ncomm; | ||||
|       timestat.statistics(t_time); | ||||
|  | ||||
|       double dbytes    = bytes*ppn; | ||||
|       double xbytes    = dbytes*2.0*ncomm; | ||||
|       double rbytes    = xbytes; | ||||
|       double bidibytes = xbytes+rbytes; | ||||
|  | ||||
|       double time = stop-start; // microseconds | ||||
|       std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t" | ||||
|                <<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7) | ||||
|                <<std::right<< xbytes/timestat.mean<<"  "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " " | ||||
|                <<xbytes/timestat.max <<" "<< xbytes/timestat.min   | ||||
|                << "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< "  " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " " | ||||
|                << bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl; | ||||
|  | ||||
|       std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl; | ||||
|     } | ||||
|   }     | ||||
|  | ||||
| @@ -121,8 +160,7 @@ int main (int argc, char ** argv) | ||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||
|   std::cout<<GridLogMessage << "= Benchmarking sequential halo exchange in "<<nmu<<" dimensions"<<std::endl; | ||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||
|   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<" Ls  "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl; | ||||
|  | ||||
|   header(); | ||||
|  | ||||
|   for(int lat=4;lat<=maxlat;lat+=4){ | ||||
|     for(int Ls=8;Ls<=32;Ls*=2){ | ||||
| @@ -130,6 +168,9 @@ int main (int argc, char ** argv) | ||||
|       std::vector<int> latt_size  ({lat,lat,lat,lat}); | ||||
|  | ||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||
|       RealD Nrank = Grid._Nprocessors; | ||||
|       RealD Nnode = Grid.NodeCount(); | ||||
|       RealD ppn = Nrank/Nnode; | ||||
|  | ||||
|       std::vector<std::vector<HalfSpinColourVectorD> > xbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls)); | ||||
|       std::vector<std::vector<HalfSpinColourVectorD> > rbuf(8,std::vector<HalfSpinColourVectorD>(lat*lat*lat*Ls)); | ||||
| @@ -138,8 +179,8 @@ int main (int argc, char ** argv) | ||||
|       int ncomm; | ||||
|       int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); | ||||
|  | ||||
|       double start=usecond(); | ||||
|       for(int i=0;i<Nloop;i++){ | ||||
|       double start=usecond(); | ||||
|      | ||||
| 	ncomm=0; | ||||
| 	for(int mu=0;mu<4;mu++){ | ||||
| @@ -178,27 +219,34 @@ int main (int argc, char ** argv) | ||||
| 	  } | ||||
| 	} | ||||
| 	Grid.Barrier(); | ||||
| 	double stop=usecond(); | ||||
| 	t_time[i] = stop-start; // microseconds | ||||
|  | ||||
|       } | ||||
|  | ||||
|       double stop=usecond(); | ||||
|       timestat.statistics(t_time); | ||||
|        | ||||
|       double dbytes    = bytes; | ||||
|       double xbytes    = Nloop*dbytes*2.0*ncomm; | ||||
|       double dbytes    = bytes*ppn; | ||||
|       double xbytes    = dbytes*2.0*ncomm; | ||||
|       double rbytes    = xbytes; | ||||
|       double bidibytes = xbytes+rbytes; | ||||
|  | ||||
|       double time = stop-start; | ||||
|     std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t" | ||||
|                <<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7) | ||||
|                <<std::right<< xbytes/timestat.mean<<"  "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " " | ||||
|                <<xbytes/timestat.max <<" "<< xbytes/timestat.min   | ||||
|                << "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< "  " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " " | ||||
|                << bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl; | ||||
|  | ||||
|       std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl; | ||||
|        | ||||
|     } | ||||
|   }   | ||||
|  | ||||
|  | ||||
|   Nloop=10; | ||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||
|   std::cout<<GridLogMessage << "= Benchmarking concurrent STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl; | ||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||
|   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<" Ls  "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl; | ||||
|   header(); | ||||
|  | ||||
|   for(int lat=4;lat<=maxlat;lat+=4){ | ||||
|     for(int Ls=8;Ls<=32;Ls*=2){ | ||||
| @@ -209,6 +257,9 @@ int main (int argc, char ** argv) | ||||
|       				    lat*mpi_layout[3]}); | ||||
|  | ||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||
|       RealD Nrank = Grid._Nprocessors; | ||||
|       RealD Nnode = Grid.NodeCount(); | ||||
|       RealD ppn = Nrank/Nnode; | ||||
|  | ||||
|       std::vector<HalfSpinColourVectorD *> xbuf(8); | ||||
|       std::vector<HalfSpinColourVectorD *> rbuf(8); | ||||
| @@ -216,70 +267,83 @@ int main (int argc, char ** argv) | ||||
|       for(int d=0;d<8;d++){ | ||||
| 	xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | ||||
| 	rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | ||||
| 	bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | ||||
| 	bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | ||||
|       } | ||||
|  | ||||
|       int ncomm; | ||||
|       int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); | ||||
|  | ||||
|       double start=usecond(); | ||||
|       double dbytes; | ||||
|       for(int i=0;i<Nloop;i++){ | ||||
| 	double start=usecond(); | ||||
|  | ||||
| 	dbytes=0; | ||||
| 	ncomm=0; | ||||
|  | ||||
| 	std::vector<CartesianCommunicator::CommsRequest_t> requests; | ||||
|  | ||||
| 	ncomm=0; | ||||
| 	for(int mu=0;mu<4;mu++){ | ||||
| 	 | ||||
|  | ||||
| 	  if (mpi_layout[mu]>1 ) { | ||||
| 	   | ||||
| 	    ncomm++; | ||||
| 	    int comm_proc=1; | ||||
| 	    int xmit_to_rank; | ||||
| 	    int recv_from_rank; | ||||
| 	     | ||||
| 	    Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); | ||||
| 	    Grid.StencilSendToRecvFromBegin(requests, | ||||
| 					    (void *)&xbuf[mu][0], | ||||
| 					    xmit_to_rank, | ||||
| 					    (void *)&rbuf[mu][0], | ||||
| 					    recv_from_rank, | ||||
| 					    bytes); | ||||
| 	    dbytes+= | ||||
| 	      Grid.StencilSendToRecvFromBegin(requests, | ||||
| 					      (void *)&xbuf[mu][0], | ||||
| 					      xmit_to_rank, | ||||
| 					      (void *)&rbuf[mu][0], | ||||
| 					      recv_from_rank, | ||||
| 					      bytes); | ||||
| 	 | ||||
| 	    comm_proc = mpi_layout[mu]-1; | ||||
| 	   | ||||
| 	    Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); | ||||
| 	    Grid.StencilSendToRecvFromBegin(requests, | ||||
| 					    (void *)&xbuf[mu+4][0], | ||||
| 					    xmit_to_rank, | ||||
| 					    (void *)&rbuf[mu+4][0], | ||||
| 					    recv_from_rank, | ||||
| 					    bytes); | ||||
| 	    dbytes+= | ||||
| 	      Grid.StencilSendToRecvFromBegin(requests, | ||||
| 					      (void *)&xbuf[mu+4][0], | ||||
| 					      xmit_to_rank, | ||||
| 					      (void *)&rbuf[mu+4][0], | ||||
| 					      recv_from_rank, | ||||
| 					      bytes); | ||||
| 	   | ||||
| 	  } | ||||
| 	} | ||||
| 	Grid.StencilSendToRecvFromComplete(requests); | ||||
| 	Grid.Barrier(); | ||||
|  | ||||
| 	double stop=usecond(); | ||||
| 	t_time[i] = stop-start; // microseconds | ||||
| 	 | ||||
|       } | ||||
|       double stop=usecond(); | ||||
|  | ||||
|       double dbytes    = bytes; | ||||
|       double xbytes    = Nloop*dbytes*2.0*ncomm; | ||||
|       double rbytes    = xbytes; | ||||
|       double bidibytes = xbytes+rbytes; | ||||
|       timestat.statistics(t_time); | ||||
|  | ||||
|       dbytes=dbytes*ppn; | ||||
|       double xbytes    = dbytes*0.5; | ||||
|       double rbytes    = dbytes*0.5; | ||||
|       double bidibytes = dbytes; | ||||
|  | ||||
|       std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t" | ||||
|                <<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7) | ||||
|                <<std::right<< xbytes/timestat.mean<<"  "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " " | ||||
|                <<xbytes/timestat.max <<" "<< xbytes/timestat.min   | ||||
|                << "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< "  " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " " | ||||
|                << bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl; | ||||
|  | ||||
|       double time = stop-start; // microseconds | ||||
|  | ||||
|       std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl; | ||||
|     } | ||||
|   }     | ||||
|  | ||||
|  | ||||
|  | ||||
|   Nloop=100; | ||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||
|   std::cout<<GridLogMessage << "= Benchmarking sequential STENCIL halo exchange in "<<nmu<<" dimensions"<<std::endl; | ||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||
|   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<" Ls  "<<"\t\t"<<"bytes"<<"\t\t"<<"MB/s uni"<<"\t\t"<<"MB/s bidi"<<std::endl; | ||||
|   header(); | ||||
|  | ||||
|   for(int lat=4;lat<=maxlat;lat+=4){ | ||||
|     for(int Ls=8;Ls<=32;Ls*=2){ | ||||
| @@ -290,6 +354,9 @@ int main (int argc, char ** argv) | ||||
|       				    lat*mpi_layout[3]}); | ||||
|  | ||||
|       GridCartesian     Grid(latt_size,simd_layout,mpi_layout); | ||||
|       RealD Nrank = Grid._Nprocessors; | ||||
|       RealD Nnode = Grid.NodeCount(); | ||||
|       RealD ppn = Nrank/Nnode; | ||||
|  | ||||
|       std::vector<HalfSpinColourVectorD *> xbuf(8); | ||||
|       std::vector<HalfSpinColourVectorD *> rbuf(8); | ||||
| @@ -297,16 +364,18 @@ int main (int argc, char ** argv) | ||||
|       for(int d=0;d<8;d++){ | ||||
| 	xbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | ||||
| 	rbuf[d] = (HalfSpinColourVectorD *)Grid.ShmBufferMalloc(lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | ||||
| 	bzero((void *)xbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | ||||
| 	bzero((void *)rbuf[d],lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD)); | ||||
|       } | ||||
|  | ||||
|       int ncomm; | ||||
|       int bytes=lat*lat*lat*Ls*sizeof(HalfSpinColourVectorD); | ||||
|  | ||||
|       double start=usecond(); | ||||
|       double dbytes; | ||||
|       for(int i=0;i<Nloop;i++){ | ||||
| 	double start=usecond(); | ||||
|  | ||||
| 	std::vector<CartesianCommunicator::CommsRequest_t> requests; | ||||
|  | ||||
| 	dbytes=0; | ||||
| 	ncomm=0; | ||||
| 	for(int mu=0;mu<4;mu++){ | ||||
| 	 | ||||
| @@ -318,42 +387,52 @@ int main (int argc, char ** argv) | ||||
| 	    int recv_from_rank; | ||||
| 	     | ||||
| 	    Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); | ||||
| 	    Grid.StencilSendToRecvFromBegin(requests, | ||||
| 					    (void *)&xbuf[mu][0], | ||||
| 					    xmit_to_rank, | ||||
| 					    (void *)&rbuf[mu][0], | ||||
| 					    recv_from_rank, | ||||
| 					    bytes); | ||||
| 	    dbytes+= | ||||
| 	      Grid.StencilSendToRecvFromBegin(requests, | ||||
| 					      (void *)&xbuf[mu][0], | ||||
| 					      xmit_to_rank, | ||||
| 					      (void *)&rbuf[mu][0], | ||||
| 					      recv_from_rank, | ||||
| 					      bytes); | ||||
| 	    Grid.StencilSendToRecvFromComplete(requests); | ||||
| 	    requests.resize(0); | ||||
|  | ||||
| 	    comm_proc = mpi_layout[mu]-1; | ||||
| 	   | ||||
| 	    Grid.ShiftedRanks(mu,comm_proc,xmit_to_rank,recv_from_rank); | ||||
| 	    Grid.StencilSendToRecvFromBegin(requests, | ||||
| 					    (void *)&xbuf[mu+4][0], | ||||
| 					    xmit_to_rank, | ||||
| 					    (void *)&rbuf[mu+4][0], | ||||
| 					    recv_from_rank, | ||||
| 					    bytes); | ||||
| 	    dbytes+= | ||||
| 	      Grid.StencilSendToRecvFromBegin(requests, | ||||
| 					      (void *)&xbuf[mu+4][0], | ||||
| 					      xmit_to_rank, | ||||
| 					      (void *)&rbuf[mu+4][0], | ||||
| 					      recv_from_rank, | ||||
| 					      bytes); | ||||
| 	    Grid.StencilSendToRecvFromComplete(requests); | ||||
| 	    requests.resize(0); | ||||
| 	   | ||||
| 	  } | ||||
| 	} | ||||
| 	Grid.Barrier(); | ||||
|  | ||||
| 	double stop=usecond(); | ||||
| 	t_time[i] = stop-start; // microseconds | ||||
| 	 | ||||
|       } | ||||
|       double stop=usecond(); | ||||
|  | ||||
|       double dbytes    = bytes; | ||||
|       double xbytes    = Nloop*dbytes*2.0*ncomm; | ||||
|       double rbytes    = xbytes; | ||||
|       double bidibytes = xbytes+rbytes; | ||||
|       timestat.statistics(t_time); | ||||
|  | ||||
|       double time = stop-start; // microseconds | ||||
|       dbytes=dbytes*ppn; | ||||
|       double xbytes    = dbytes*0.5; | ||||
|       double rbytes    = dbytes*0.5; | ||||
|       double bidibytes = dbytes; | ||||
|  | ||||
|       std::cout<<GridLogMessage << lat<<"\t\t"<<Ls<<"\t\t"<<bytes<<"\t\t"<<xbytes/time<<"\t\t"<<bidibytes/time<<std::endl; | ||||
|  | ||||
|       std::cout<<GridLogMessage << std::setw(4) << lat<<"\t"<<Ls<<"\t" | ||||
|                <<std::setw(11) << bytes<< std::fixed << std::setprecision(1) << std::setw(7) | ||||
|                <<std::right<< xbytes/timestat.mean<<"  "<< xbytes*timestat.err/(timestat.mean*timestat.mean)<< " " | ||||
|                <<xbytes/timestat.max <<" "<< xbytes/timestat.min   | ||||
|                << "\t\t"<<std::setw(7)<< bidibytes/timestat.mean<< "  " << bidibytes*timestat.err/(timestat.mean*timestat.mean) << " " | ||||
|                << bidibytes/timestat.max << " " << bidibytes/timestat.min << std::endl; | ||||
|   | ||||
|     } | ||||
|   }     | ||||
|  | ||||
|   | ||||
| @@ -55,8 +55,8 @@ int main (int argc, char ** argv) | ||||
|   std::cout<<GridLogMessage << "===================================================================================================="<<std::endl; | ||||
|   std::cout<<GridLogMessage << "  L  "<<"\t\t"<<"bytes"<<"\t\t\t"<<"GB/s"<<"\t\t"<<"Gflop/s"<<"\t\t seconds"<<std::endl; | ||||
|   std::cout<<GridLogMessage << "----------------------------------------------------------"<<std::endl; | ||||
|   uint64_t lmax=44; | ||||
| #define NLOOP (1*lmax*lmax*lmax*lmax/vol) | ||||
|   uint64_t lmax=64; | ||||
| #define NLOOP (100*lmax*lmax*lmax*lmax/vol) | ||||
|   for(int lat=4;lat<=lmax;lat+=4){ | ||||
|  | ||||
|       std::vector<int> latt_size  ({lat*mpi_layout[0],lat*mpi_layout[1],lat*mpi_layout[2],lat*mpi_layout[3]}); | ||||
|   | ||||
| @@ -35,9 +35,9 @@ using namespace Grid::QCD; | ||||
| int main (int argc, char ** argv) | ||||
| { | ||||
|   Grid_init(&argc,&argv); | ||||
| #define LMAX (32) | ||||
| #define LMAX (64) | ||||
|  | ||||
|   int Nloop=200; | ||||
|   int Nloop=20; | ||||
|  | ||||
|   std::vector<int> simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd()); | ||||
|   std::vector<int> mpi_layout  = GridDefaultMpi(); | ||||
|   | ||||
| @@ -1,4 +1,4 @@ | ||||
| ]#!/usr/bin/env bash | ||||
| #!/usr/bin/env bash | ||||
|  | ||||
| EIGEN_URL='http://bitbucket.org/eigen/eigen/get/3.3.3.tar.bz2' | ||||
|  | ||||
|   | ||||
| @@ -27,7 +27,7 @@ AX_GXX_VERSION | ||||
| AC_DEFINE_UNQUOTED([GXX_VERSION],["$GXX_VERSION"], | ||||
|       [version of g++ that will compile the code]) | ||||
|  | ||||
| CXXFLAGS="-O3 $CXXFLAGS" | ||||
| CXXFLAGS="-g $CXXFLAGS" | ||||
|  | ||||
|  | ||||
| ############### Checks for typedefs, structures, and compiler characteristics | ||||
| @@ -184,6 +184,10 @@ AC_SEARCH_LIBS([limeCreateReader], [lime], | ||||
| In order to use ILGG file format please install or provide the correct path to your installation | ||||
| Info at: http://usqcd.jlab.org/usqcd-docs/c-lime/)]) | ||||
|  | ||||
| AC_SEARCH_LIBS([crc32], [z], | ||||
|                [AC_DEFINE([HAVE_ZLIB], [1], [Define to 1 if you have the `LIBZ' library])] | ||||
|                [have_zlib=true], | ||||
| 	       [AC_MSG_ERROR(zlib library was not found in your system.)]) | ||||
|  | ||||
| AC_SEARCH_LIBS([H5Fopen], [hdf5_cpp], | ||||
|                [AC_DEFINE([HAVE_HDF5], [1], [Define to 1 if you have the `HDF5' library])] | ||||
|   | ||||
| @@ -48,7 +48,8 @@ public: | ||||
|                                     std::string, gauge, | ||||
|                                     unsigned int, Ls, | ||||
|                                     double      , mass, | ||||
|                                     double      , M5); | ||||
|                                     double      , M5, | ||||
|                                     std::string , boundary); | ||||
| }; | ||||
|  | ||||
| template <typename FImpl> | ||||
| @@ -116,14 +117,19 @@ void TDWF<FImpl>::execute(void) | ||||
|                  << par().mass << ", M5= " << par().M5 << " and Ls= " | ||||
|                  << par().Ls << " using gauge field '" << par().gauge << "'" | ||||
|                  << std::endl; | ||||
|     LOG(Message) << "Fermion boundary conditions: " << par().boundary  | ||||
|                  << std::endl; | ||||
|     env().createGrid(par().Ls); | ||||
|     auto &U      = *env().template getObject<LatticeGaugeField>(par().gauge); | ||||
|     auto &g4     = *env().getGrid(); | ||||
|     auto &grb4   = *env().getRbGrid(); | ||||
|     auto &g5     = *env().getGrid(par().Ls); | ||||
|     auto &grb5   = *env().getRbGrid(par().Ls); | ||||
|     std::vector<Complex> boundary = strToVec<Complex>(par().boundary); | ||||
|     typename DomainWallFermion<FImpl>::ImplParams implParams(boundary); | ||||
|     FMat *fMatPt = new DomainWallFermion<FImpl>(U, g5, grb5, g4, grb4, | ||||
|                                                 par().mass, par().M5); | ||||
|                                                 par().mass, par().M5, | ||||
|                                                 implParams); | ||||
|     env().setObject(getName(), fMatPt); | ||||
| } | ||||
|  | ||||
|   | ||||
| @@ -46,7 +46,8 @@ class WilsonPar: Serializable | ||||
| public: | ||||
|     GRID_SERIALIZABLE_CLASS_MEMBERS(WilsonPar, | ||||
|                                     std::string, gauge, | ||||
|                                     double     , mass); | ||||
|                                     double     , mass, | ||||
|                                     std::string, boundary); | ||||
| }; | ||||
|  | ||||
| template <typename FImpl> | ||||
| @@ -112,10 +113,15 @@ void TWilson<FImpl>::execute() | ||||
| { | ||||
|     LOG(Message) << "Setting up TWilson fermion matrix with m= " << par().mass | ||||
|                  << " using gauge field '" << par().gauge << "'" << std::endl; | ||||
|     LOG(Message) << "Fermion boundary conditions: " << par().boundary  | ||||
|                  << std::endl; | ||||
|     auto &U      = *env().template getObject<LatticeGaugeField>(par().gauge); | ||||
|     auto &grid   = *env().getGrid(); | ||||
|     auto &gridRb = *env().getRbGrid(); | ||||
|     FMat *fMatPt = new WilsonFermion<FImpl>(U, grid, gridRb, par().mass); | ||||
|     std::vector<Complex> boundary = strToVec<Complex>(par().boundary); | ||||
|     typename WilsonFermion<FImpl>::ImplParams implParams(boundary); | ||||
|     FMat *fMatPt = new WilsonFermion<FImpl>(U, grid, gridRb, par().mass, | ||||
|                                             implParams); | ||||
|     env().setObject(getName(), fMatPt); | ||||
| } | ||||
|  | ||||
|   | ||||
| @@ -131,12 +131,11 @@ std::vector<std::string> TMeson<FImpl1, FImpl2>::getOutput(void) | ||||
| template <typename FImpl1, typename FImpl2> | ||||
| void TMeson<FImpl1, FImpl2>::parseGammaString(std::vector<GammaPair> &gammaList) | ||||
| { | ||||
|     gammaList.clear(); | ||||
|     // Determine gamma matrices to insert at source/sink. | ||||
|     if (par().gammas.compare("all") == 0) | ||||
|     { | ||||
|         // Do all contractions. | ||||
|         unsigned int n_gam = Ns * Ns; | ||||
|         gammaList.resize(n_gam*n_gam); | ||||
|         for (unsigned int i = 1; i < Gamma::nGamma; i += 2) | ||||
|         { | ||||
|             for (unsigned int j = 1; j < Gamma::nGamma; j += 2) | ||||
|   | ||||
| @@ -65,7 +65,7 @@ void TLoad::setup(void) | ||||
| // execution /////////////////////////////////////////////////////////////////// | ||||
| void TLoad::execute(void) | ||||
| { | ||||
|     NerscField  header; | ||||
|     FieldMetaData  header; | ||||
|     std::string fileName = par().file + "." | ||||
|                            + std::to_string(env().getTrajectory()); | ||||
|      | ||||
| @@ -74,5 +74,5 @@ void TLoad::execute(void) | ||||
|     LatticeGaugeField &U = *env().createLattice<LatticeGaugeField>(getName()); | ||||
|     NerscIO::readConfiguration(U, header, fileName); | ||||
|     LOG(Message) << "NERSC header:" << std::endl; | ||||
|     dump_nersc_header(header, LOG(Message)); | ||||
|     dump_meta_data(header, LOG(Message)); | ||||
| } | ||||
|   | ||||
| @@ -41,7 +41,9 @@ Author: paboyle <paboyle@ph.ed.ac.uk> | ||||
| #include <Grid/GridCore.h> | ||||
| #include <Grid/GridQCDcore.h> | ||||
| #include <Grid/qcd/action/Action.h> | ||||
| #include <Grid/qcd/utils/GaugeFix.h> | ||||
| #include <Grid/qcd/smearing/Smearing.h> | ||||
| #include <Grid/parallelIO/MetaData.h> | ||||
| #include <Grid/qcd/hmc/HMC_aggregate.h> | ||||
|  | ||||
| #endif | ||||
|   | ||||
| @@ -7,6 +7,7 @@ | ||||
| #include <cassert> | ||||
| #include <complex> | ||||
| #include <vector> | ||||
| #include <string> | ||||
| #include <iostream> | ||||
| #include <iomanip> | ||||
| #include <random> | ||||
| @@ -18,6 +19,7 @@ | ||||
| #include <ctime> | ||||
| #include <sys/time.h> | ||||
| #include <chrono> | ||||
| #include <zlib.h> | ||||
|  | ||||
| /////////////////// | ||||
| // Grid config | ||||
|   | ||||
| @@ -1,137 +0,0 @@ | ||||
|     /************************************************************************************* | ||||
|  | ||||
|     Grid physics library, www.github.com/paboyle/Grid  | ||||
|  | ||||
|     Source file: ./lib/algorithms/iterative/DenseMatrix.h | ||||
|  | ||||
|     Copyright (C) 2015 | ||||
|  | ||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
| Author: paboyle <paboyle@ph.ed.ac.uk> | ||||
|  | ||||
|     This program is free software; you can redistribute it and/or modify | ||||
|     it under the terms of the GNU General Public License as published by | ||||
|     the Free Software Foundation; either version 2 of the License, or | ||||
|     (at your option) any later version. | ||||
|  | ||||
|     This program is distributed in the hope that it will be useful, | ||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|     GNU General Public License for more details. | ||||
|  | ||||
|     You should have received a copy of the GNU General Public License along | ||||
|     with this program; if not, write to the Free Software Foundation, Inc., | ||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
|     *************************************************************************************/ | ||||
|     /*  END LEGAL */ | ||||
| #ifndef GRID_DENSE_MATRIX_H | ||||
| #define GRID_DENSE_MATRIX_H | ||||
|  | ||||
| namespace Grid { | ||||
|     ///////////////////////////////////////////////////////////// | ||||
|     // Matrix untils | ||||
|     ///////////////////////////////////////////////////////////// | ||||
|  | ||||
| template<class T> using DenseVector = std::vector<T>; | ||||
| template<class T> using DenseMatrix = DenseVector<DenseVector<T> >; | ||||
|  | ||||
| template<class T> void Size(DenseVector<T> & vec, int &N)  | ||||
| {  | ||||
|   N= vec.size(); | ||||
| } | ||||
| template<class T> void Size(DenseMatrix<T> & mat, int &N,int &M)  | ||||
| {  | ||||
|   N= mat.size(); | ||||
|   M= mat[0].size(); | ||||
| } | ||||
|  | ||||
| template<class T> void SizeSquare(DenseMatrix<T> & mat, int &N)  | ||||
| {  | ||||
|   int M; Size(mat,N,M); | ||||
|   assert(N==M); | ||||
| } | ||||
|  | ||||
| template<class T> void Resize(DenseVector<T > & mat, int N) {  | ||||
|   mat.resize(N); | ||||
| } | ||||
| template<class T> void Resize(DenseMatrix<T > & mat, int N, int M) {  | ||||
|   mat.resize(N); | ||||
|   for(int i=0;i<N;i++){ | ||||
|     mat[i].resize(M); | ||||
|   } | ||||
| } | ||||
| template<class T> void Fill(DenseMatrix<T> & mat, T&val) {  | ||||
|   int N,M; | ||||
|   Size(mat,N,M); | ||||
|   for(int i=0;i<N;i++){ | ||||
|   for(int j=0;j<M;j++){ | ||||
|     mat[i][j] = val; | ||||
|   }} | ||||
| } | ||||
|  | ||||
| /** Transpose of a matrix **/ | ||||
| template<class T> DenseMatrix<T> Transpose(DenseMatrix<T> & mat){ | ||||
|   int N,M; | ||||
|   Size(mat,N,M); | ||||
|   DenseMatrix<T> C; Resize(C,M,N); | ||||
|   for(int i=0;i<M;i++){ | ||||
|   for(int j=0;j<N;j++){ | ||||
|     C[i][j] = mat[j][i]; | ||||
|   }}  | ||||
|   return C; | ||||
| } | ||||
| /** Set DenseMatrix to unit matrix **/ | ||||
| template<class T> void Unity(DenseMatrix<T> &A){ | ||||
|   int N;  SizeSquare(A,N); | ||||
|   for(int i=0;i<N;i++){ | ||||
|     for(int j=0;j<N;j++){ | ||||
|       if ( i==j ) A[i][j] = 1; | ||||
|       else        A[i][j] = 0; | ||||
|     }  | ||||
|   }  | ||||
| } | ||||
|  | ||||
| /** Add C * I to matrix **/ | ||||
| template<class T> | ||||
| void PlusUnit(DenseMatrix<T> & A,T c){ | ||||
|   int dim;  SizeSquare(A,dim); | ||||
|   for(int i=0;i<dim;i++){A[i][i] = A[i][i] + c;}  | ||||
| } | ||||
|  | ||||
| /** return the Hermitian conjugate of matrix **/ | ||||
| template<class T> | ||||
| DenseMatrix<T> HermitianConj(DenseMatrix<T> &mat){ | ||||
|  | ||||
|   int dim; SizeSquare(mat,dim); | ||||
|  | ||||
|   DenseMatrix<T> C; Resize(C,dim,dim); | ||||
|  | ||||
|   for(int i=0;i<dim;i++){ | ||||
|     for(int j=0;j<dim;j++){ | ||||
|       C[i][j] = conj(mat[j][i]); | ||||
|     }  | ||||
|   }  | ||||
|   return C; | ||||
| } | ||||
| /**Get a square submatrix**/ | ||||
| template <class T> | ||||
| DenseMatrix<T> GetSubMtx(DenseMatrix<T> &A,int row_st, int row_end, int col_st, int col_end) | ||||
| { | ||||
|   DenseMatrix<T> H; Resize(H,row_end - row_st,col_end-col_st); | ||||
|  | ||||
|   for(int i = row_st; i<row_end; i++){ | ||||
|   for(int j = col_st; j<col_end; j++){ | ||||
|     H[i-row_st][j-col_st]=A[i][j]; | ||||
|   }} | ||||
|   return H; | ||||
| } | ||||
|  | ||||
| } | ||||
|  | ||||
| #include "Householder.h" | ||||
| #include "Francis.h" | ||||
|  | ||||
| #endif | ||||
|  | ||||
| @@ -1,525 +0,0 @@ | ||||
|     /************************************************************************************* | ||||
|  | ||||
|     Grid physics library, www.github.com/paboyle/Grid  | ||||
|  | ||||
|     Source file: ./lib/algorithms/iterative/Francis.h | ||||
|  | ||||
|     Copyright (C) 2015 | ||||
|  | ||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
|  | ||||
|     This program is free software; you can redistribute it and/or modify | ||||
|     it under the terms of the GNU General Public License as published by | ||||
|     the Free Software Foundation; either version 2 of the License, or | ||||
|     (at your option) any later version. | ||||
|  | ||||
|     This program is distributed in the hope that it will be useful, | ||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|     GNU General Public License for more details. | ||||
|  | ||||
|     You should have received a copy of the GNU General Public License along | ||||
|     with this program; if not, write to the Free Software Foundation, Inc., | ||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
|     *************************************************************************************/ | ||||
|     /*  END LEGAL */ | ||||
| #ifndef FRANCIS_H | ||||
| #define FRANCIS_H | ||||
|  | ||||
| #include <cstdlib> | ||||
| #include <string> | ||||
| #include <cmath> | ||||
| #include <iostream> | ||||
| #include <sstream> | ||||
| #include <stdexcept> | ||||
| #include <fstream> | ||||
| #include <complex> | ||||
| #include <algorithm> | ||||
|  | ||||
| //#include <timer.h> | ||||
| //#include <lapacke.h> | ||||
| //#include <Eigen/Dense> | ||||
|  | ||||
| namespace Grid { | ||||
|  | ||||
| template <class T> int SymmEigensystem(DenseMatrix<T > &Ain, DenseVector<T> &evals, DenseMatrix<T> &evecs, RealD small); | ||||
| template <class T> int     Eigensystem(DenseMatrix<T > &Ain, DenseVector<T> &evals, DenseMatrix<T> &evecs, RealD small); | ||||
|  | ||||
| /** | ||||
|   Find the eigenvalues of an upper hessenberg matrix using the Francis QR algorithm. | ||||
| H = | ||||
|       x  x  x  x  x  x  x  x  x | ||||
|       x  x  x  x  x  x  x  x  x | ||||
|       0  x  x  x  x  x  x  x  x | ||||
|       0  0  x  x  x  x  x  x  x | ||||
|       0  0  0  x  x  x  x  x  x | ||||
|       0  0  0  0  x  x  x  x  x | ||||
|       0  0  0  0  0  x  x  x  x | ||||
|       0  0  0  0  0  0  x  x  x | ||||
|       0  0  0  0  0  0  0  x  x | ||||
| Factorization is P T P^H where T is upper triangular (mod cc blocks) and P is orthagonal/unitary. | ||||
| **/ | ||||
| template <class T> | ||||
| int QReigensystem(DenseMatrix<T> &Hin, DenseVector<T> &evals, DenseMatrix<T> &evecs, RealD small) | ||||
| { | ||||
|   DenseMatrix<T> H = Hin;  | ||||
|  | ||||
|   int N ; SizeSquare(H,N); | ||||
|   int M = N; | ||||
|  | ||||
|   Fill(evals,0); | ||||
|   Fill(evecs,0); | ||||
|  | ||||
|   T s,t,x=0,y=0,z=0; | ||||
|   T u,d; | ||||
|   T apd,amd,bc; | ||||
|   DenseVector<T> p(N,0); | ||||
|   T nrm = Norm(H);    ///DenseMatrix Norm | ||||
|   int n, m; | ||||
|   int e = 0; | ||||
|   int it = 0; | ||||
|   int tot_it = 0; | ||||
|   int l = 0; | ||||
|   int r = 0; | ||||
|   DenseMatrix<T> P; Resize(P,N,N); Unity(P); | ||||
|   DenseVector<int> trows(N,0); | ||||
|  | ||||
|   /// Check if the matrix is really hessenberg, if not abort | ||||
|   RealD sth = 0; | ||||
|   for(int j=0;j<N;j++){ | ||||
|     for(int i=j+2;i<N;i++){ | ||||
|       sth = abs(H[i][j]); | ||||
|       if(sth > small){ | ||||
| 	std::cout << "Non hessenberg H = " << sth << " > " << small << std::endl; | ||||
| 	exit(1); | ||||
|       } | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   do{ | ||||
|     std::cout << "Francis QR Step N = " << N << std::endl; | ||||
|     /** Check for convergence | ||||
|       x  x  x  x  x | ||||
|       0  x  x  x  x | ||||
|       0  0  x  x  x | ||||
|       0  0  x  x  x | ||||
|       0  0  0  0  x | ||||
|       for this matrix l = 4 | ||||
|      **/ | ||||
|     do{ | ||||
|       l = Chop_subdiag(H,nrm,e,small); | ||||
|       r = 0;    ///May have converged on more than one eval | ||||
|       ///Single eval | ||||
|       if(l == N-1){ | ||||
|         evals[e] = H[l][l]; | ||||
|         N--; e++; r++; it = 0; | ||||
|       } | ||||
|       ///RealD eval | ||||
|       if(l == N-2){ | ||||
|         trows[l+1] = 1;    ///Needed for UTSolve | ||||
|         apd = H[l][l] + H[l+1][l+1]; | ||||
|         amd = H[l][l] - H[l+1][l+1]; | ||||
|         bc =  (T)4.0*H[l+1][l]*H[l][l+1]; | ||||
|         evals[e]   = (T)0.5*( apd + sqrt(amd*amd + bc) ); | ||||
|         evals[e+1] = (T)0.5*( apd - sqrt(amd*amd + bc) ); | ||||
|         N-=2; e+=2; r++; it = 0; | ||||
|       } | ||||
|     } while(r>0); | ||||
|  | ||||
|     if(N ==0) break; | ||||
|  | ||||
|     DenseVector<T > ck; Resize(ck,3); | ||||
|     DenseVector<T> v;   Resize(v,3); | ||||
|  | ||||
|     for(int m = N-3; m >= l; m--){ | ||||
|       ///Starting vector essentially random shift. | ||||
|       if(it%10 == 0 && N >= 3 && it > 0){ | ||||
|         s = (T)1.618033989*( abs( H[N-1][N-2] ) + abs( H[N-2][N-3] ) ); | ||||
|         t = (T)0.618033989*( abs( H[N-1][N-2] ) + abs( H[N-2][N-3] ) ); | ||||
|         x = H[m][m]*H[m][m] + H[m][m+1]*H[m+1][m] - s*H[m][m] + t; | ||||
|         y = H[m+1][m]*(H[m][m] + H[m+1][m+1] - s); | ||||
|         z = H[m+1][m]*H[m+2][m+1]; | ||||
|       } | ||||
|       ///Starting vector implicit Q theorem | ||||
|       else{ | ||||
|         s = (H[N-2][N-2] + H[N-1][N-1]); | ||||
|         t = (H[N-2][N-2]*H[N-1][N-1] - H[N-2][N-1]*H[N-1][N-2]); | ||||
|         x = H[m][m]*H[m][m] + H[m][m+1]*H[m+1][m] - s*H[m][m] + t; | ||||
|         y = H[m+1][m]*(H[m][m] + H[m+1][m+1] - s); | ||||
|         z = H[m+1][m]*H[m+2][m+1]; | ||||
|       } | ||||
|       ck[0] = x; ck[1] = y; ck[2] = z; | ||||
|  | ||||
|       if(m == l) break; | ||||
|  | ||||
|       /** Some stupid thing from numerical recipies, seems to work**/ | ||||
|       // PAB.. for heaven's sake quote page, purpose, evidence it works. | ||||
|       //       what sort of comment is that!?!?!? | ||||
|       u=abs(H[m][m-1])*(abs(y)+abs(z)); | ||||
|       d=abs(x)*(abs(H[m-1][m-1])+abs(H[m][m])+abs(H[m+1][m+1])); | ||||
|       if ((T)abs(u+d) == (T)abs(d) ){ | ||||
| 	l = m; break; | ||||
|       } | ||||
|  | ||||
|       //if (u < small){l = m; break;} | ||||
|     } | ||||
|     if(it > 100000){ | ||||
|      std::cout << "QReigensystem: bugger it got stuck after 100000 iterations" << std::endl; | ||||
|      std::cout << "got " << e << " evals " << l << " " << N << std::endl; | ||||
|       exit(1); | ||||
|     } | ||||
|     normalize(ck);    ///Normalization cancels in PHP anyway | ||||
|     T beta; | ||||
|     Householder_vector<T >(ck, 0, 2, v, beta); | ||||
|     Householder_mult<T >(H,v,beta,0,l,l+2,0); | ||||
|     Householder_mult<T >(H,v,beta,0,l,l+2,1); | ||||
|     ///Accumulate eigenvector | ||||
|     Householder_mult<T >(P,v,beta,0,l,l+2,1); | ||||
|     int sw = 0;      ///Are we on the last row? | ||||
|     for(int k=l;k<N-2;k++){ | ||||
|       x = H[k+1][k]; | ||||
|       y = H[k+2][k]; | ||||
|       z = (T)0.0; | ||||
|       if(k+3 <= N-1){ | ||||
| 	z = H[k+3][k]; | ||||
|       } else{ | ||||
| 	sw = 1;  | ||||
| 	v[2] = (T)0.0; | ||||
|       } | ||||
|       ck[0] = x; ck[1] = y; ck[2] = z; | ||||
|       normalize(ck); | ||||
|       Householder_vector<T >(ck, 0, 2-sw, v, beta); | ||||
|       Householder_mult<T >(H,v, beta,0,k+1,k+3-sw,0); | ||||
|       Householder_mult<T >(H,v, beta,0,k+1,k+3-sw,1); | ||||
|       ///Accumulate eigenvector | ||||
|       Householder_mult<T >(P,v, beta,0,k+1,k+3-sw,1); | ||||
|     } | ||||
|     it++; | ||||
|     tot_it++; | ||||
|   }while(N > 1); | ||||
|   N = evals.size(); | ||||
|   ///Annoying - UT solves in reverse order; | ||||
|   DenseVector<T> tmp; Resize(tmp,N); | ||||
|   for(int i=0;i<N;i++){ | ||||
|     tmp[i] = evals[N-i-1]; | ||||
|   }  | ||||
|   evals = tmp; | ||||
|   UTeigenvectors(H, trows, evals, evecs); | ||||
|   for(int i=0;i<evals.size();i++){evecs[i] = P*evecs[i]; normalize(evecs[i]);} | ||||
|   return tot_it; | ||||
| } | ||||
|  | ||||
| template <class T> | ||||
| int my_Wilkinson(DenseMatrix<T> &Hin, DenseVector<T> &evals, DenseMatrix<T> &evecs, RealD small) | ||||
| { | ||||
|   /** | ||||
|   Find the eigenvalues of an upper Hessenberg matrix using the Wilkinson QR algorithm. | ||||
|   H = | ||||
|   x  x  0  0  0  0 | ||||
|   x  x  x  0  0  0 | ||||
|   0  x  x  x  0  0 | ||||
|   0  0  x  x  x  0 | ||||
|   0  0  0  x  x  x | ||||
|   0  0  0  0  x  x | ||||
|   Factorization is P T P^H where T is upper triangular (mod cc blocks) and P is orthagonal/unitary.  **/ | ||||
|   return my_Wilkinson(Hin, evals, evecs, small, small); | ||||
| } | ||||
|  | ||||
| template <class T> | ||||
| int my_Wilkinson(DenseMatrix<T> &Hin, DenseVector<T> &evals, DenseMatrix<T> &evecs, RealD small, RealD tol) | ||||
| { | ||||
|   int N; SizeSquare(Hin,N); | ||||
|   int M = N; | ||||
|  | ||||
|   ///I don't want to modify the input but matricies must be passed by reference | ||||
|   //Scale a matrix by its "norm" | ||||
|   //RealD Hnorm = abs( Hin.LargestDiag() ); H =  H*(1.0/Hnorm); | ||||
|   DenseMatrix<T> H;  H = Hin; | ||||
|    | ||||
|   RealD Hnorm = abs(Norm(Hin)); | ||||
|   H = H * (1.0 / Hnorm); | ||||
|  | ||||
|   // TODO use openmp and memset | ||||
|   Fill(evals,0); | ||||
|   Fill(evecs,0); | ||||
|  | ||||
|   T s, t, x = 0, y = 0, z = 0; | ||||
|   T u, d; | ||||
|   T apd, amd, bc; | ||||
|   DenseVector<T> p; Resize(p,N); Fill(p,0); | ||||
|  | ||||
|   T nrm = Norm(H);    ///DenseMatrix Norm | ||||
|   int n, m; | ||||
|   int e = 0; | ||||
|   int it = 0; | ||||
|   int tot_it = 0; | ||||
|   int l = 0; | ||||
|   int r = 0; | ||||
|   DenseMatrix<T> P; Resize(P,N,N); | ||||
|   Unity(P); | ||||
|   DenseVector<int> trows(N, 0); | ||||
|   /// Check if the matrix is really symm tridiag | ||||
|   RealD sth = 0; | ||||
|   for(int j = 0; j < N; ++j) | ||||
|   { | ||||
|     for(int i = j + 2; i < N; ++i) | ||||
|     { | ||||
|       if(abs(H[i][j]) > tol || abs(H[j][i]) > tol) | ||||
|       { | ||||
| 	std::cout << "Non Tridiagonal H(" << i << ","<< j << ") = |" << Real( real( H[j][i] ) ) << "| > " << tol << std::endl; | ||||
| 	std::cout << "Warning tridiagonalize and call again" << std::endl; | ||||
|         // exit(1); // see what is going on | ||||
|         //return; | ||||
|       } | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   do{ | ||||
|     do{ | ||||
|       //Jasper | ||||
|       //Check if the subdiagonal term is small enough (<small) | ||||
|       //if true then it is converged. | ||||
|       //check start from H.dim - e - 1 | ||||
|       //How to deal with more than 2 are converged? | ||||
|       //What if Chop_symm_subdiag return something int the middle? | ||||
|       //-------------- | ||||
|       l = Chop_symm_subdiag(H,nrm, e, small); | ||||
|       r = 0;    ///May have converged on more than one eval | ||||
|       //Jasper | ||||
|       //In this case | ||||
|       // x  x  0  0  0  0 | ||||
|       // x  x  x  0  0  0 | ||||
|       // 0  x  x  x  0  0 | ||||
|       // 0  0  x  x  x  0 | ||||
|       // 0  0  0  x  x  0 | ||||
|       // 0  0  0  0  0  x  <- l | ||||
|       //-------------- | ||||
|       ///Single eval | ||||
|       if(l == N - 1) | ||||
|       { | ||||
|         evals[e] = H[l][l]; | ||||
|         N--; | ||||
|         e++; | ||||
|         r++; | ||||
|         it = 0; | ||||
|       } | ||||
|       //Jasper | ||||
|       // x  x  0  0  0  0 | ||||
|       // x  x  x  0  0  0 | ||||
|       // 0  x  x  x  0  0 | ||||
|       // 0  0  x  x  0  0 | ||||
|       // 0  0  0  0  x  x  <- l | ||||
|       // 0  0  0  0  x  x | ||||
|       //-------------- | ||||
|       ///RealD eval | ||||
|       if(l == N - 2) | ||||
|       { | ||||
|         trows[l + 1] = 1;    ///Needed for UTSolve | ||||
|         apd = H[l][l] + H[l + 1][ l + 1]; | ||||
|         amd = H[l][l] - H[l + 1][l + 1]; | ||||
|         bc =  (T) 4.0 * H[l + 1][l] * H[l][l + 1]; | ||||
|         evals[e] = (T) 0.5 * (apd + sqrt(amd * amd + bc)); | ||||
|         evals[e + 1] = (T) 0.5 * (apd - sqrt(amd * amd + bc)); | ||||
|         N -= 2; | ||||
|         e += 2; | ||||
|         r++; | ||||
|         it = 0; | ||||
|       } | ||||
|     }while(r > 0); | ||||
|     //Jasper | ||||
|     //Already converged | ||||
|     //-------------- | ||||
|     if(N == 0) break; | ||||
|  | ||||
|     DenseVector<T> ck,v; Resize(ck,2); Resize(v,2); | ||||
|  | ||||
|     for(int m = N - 3; m >= l; m--) | ||||
|     { | ||||
|       ///Starting vector essentially random shift. | ||||
|       if(it%10 == 0 && N >= 3 && it > 0) | ||||
|       { | ||||
|         t = abs(H[N - 1][N - 2]) + abs(H[N - 2][N - 3]); | ||||
|         x = H[m][m] - t; | ||||
|         z = H[m + 1][m]; | ||||
|       } else { | ||||
|       ///Starting vector implicit Q theorem | ||||
|         d = (H[N - 2][N - 2] - H[N - 1][N - 1]) * (T) 0.5; | ||||
|         t =  H[N - 1][N - 1] - H[N - 1][N - 2] * H[N - 1][N - 2]  | ||||
| 	  / (d + sign(d) * sqrt(d * d + H[N - 1][N - 2] * H[N - 1][N - 2])); | ||||
|         x = H[m][m] - t; | ||||
|         z = H[m + 1][m]; | ||||
|       } | ||||
|       //Jasper | ||||
|       //why it is here???? | ||||
|       //----------------------- | ||||
|       if(m == l) | ||||
|         break; | ||||
|  | ||||
|       u = abs(H[m][m - 1]) * (abs(y) + abs(z)); | ||||
|       d = abs(x) * (abs(H[m - 1][m - 1]) + abs(H[m][m]) + abs(H[m + 1][m + 1])); | ||||
|       if ((T)abs(u + d) == (T)abs(d)) | ||||
|       { | ||||
|         l = m; | ||||
|         break; | ||||
|       } | ||||
|     } | ||||
|     //Jasper | ||||
|     if(it > 1000000) | ||||
|     { | ||||
|       std::cout << "Wilkinson: bugger it got stuck after 100000 iterations" << std::endl; | ||||
|       std::cout << "got " << e << " evals " << l << " " << N << std::endl; | ||||
|       exit(1); | ||||
|     } | ||||
|     // | ||||
|     T s, c; | ||||
|     Givens_calc<T>(x, z, c, s); | ||||
|     Givens_mult<T>(H, l, l + 1, c, -s, 0); | ||||
|     Givens_mult<T>(H, l, l + 1, c,  s, 1); | ||||
|     Givens_mult<T>(P, l, l + 1, c,  s, 1); | ||||
|     // | ||||
|     for(int k = l; k < N - 2; ++k) | ||||
|     { | ||||
|       x = H.A[k + 1][k]; | ||||
|       z = H.A[k + 2][k]; | ||||
|       Givens_calc<T>(x, z, c, s); | ||||
|       Givens_mult<T>(H, k + 1, k + 2, c, -s, 0); | ||||
|       Givens_mult<T>(H, k + 1, k + 2, c,  s, 1); | ||||
|       Givens_mult<T>(P, k + 1, k + 2, c,  s, 1); | ||||
|     } | ||||
|     it++; | ||||
|     tot_it++; | ||||
|   }while(N > 1); | ||||
|  | ||||
|   N = evals.size(); | ||||
|   ///Annoying - UT solves in reverse order; | ||||
|   DenseVector<T> tmp(N); | ||||
|   for(int i = 0; i < N; ++i) | ||||
|     tmp[i] = evals[N-i-1]; | ||||
|   evals = tmp; | ||||
|   // | ||||
|   UTeigenvectors(H, trows, evals, evecs); | ||||
|   //UTSymmEigenvectors(H, trows, evals, evecs); | ||||
|   for(int i = 0; i < evals.size(); ++i) | ||||
|   { | ||||
|     evecs[i] = P * evecs[i]; | ||||
|     normalize(evecs[i]); | ||||
|     evals[i] = evals[i] * Hnorm; | ||||
|   } | ||||
|   // // FIXME this is to test | ||||
|   // Hin.write("evecs3", evecs); | ||||
|   // Hin.write("evals3", evals); | ||||
|   // // check rsd | ||||
|   // for(int i = 0; i < M; i++) { | ||||
|   //   vector<T> Aevec = Hin * evecs[i]; | ||||
|   //   RealD norm2(0.); | ||||
|   //   for(int j = 0; j < M; j++) { | ||||
|   //     norm2 += (Aevec[j] - evals[i] * evecs[i][j]) * (Aevec[j] - evals[i] * evecs[i][j]); | ||||
|   //   } | ||||
|   // } | ||||
|   return tot_it; | ||||
| } | ||||
|  | ||||
| template <class T> | ||||
| void Hess(DenseMatrix<T > &A, DenseMatrix<T> &Q, int start){ | ||||
|  | ||||
|   /** | ||||
|   turn a matrix A = | ||||
|   x  x  x  x  x | ||||
|   x  x  x  x  x | ||||
|   x  x  x  x  x | ||||
|   x  x  x  x  x | ||||
|   x  x  x  x  x | ||||
|   into | ||||
|   x  x  x  x  x | ||||
|   x  x  x  x  x | ||||
|   0  x  x  x  x | ||||
|   0  0  x  x  x | ||||
|   0  0  0  x  x | ||||
|   with householder rotations | ||||
|   Slow. | ||||
|   */ | ||||
|   int N ; SizeSquare(A,N); | ||||
|   DenseVector<T > p; Resize(p,N); Fill(p,0); | ||||
|  | ||||
|   for(int k=start;k<N-2;k++){ | ||||
|     //cerr << "hess" << k << std::endl; | ||||
|     DenseVector<T > ck,v; Resize(ck,N-k-1); Resize(v,N-k-1); | ||||
|     for(int i=k+1;i<N;i++){ck[i-k-1] = A(i,k);}  ///kth column | ||||
|     normalize(ck);    ///Normalization cancels in PHP anyway | ||||
|     T beta; | ||||
|     Householder_vector<T >(ck, 0, ck.size()-1, v, beta);  ///Householder vector | ||||
|     Householder_mult<T>(A,v,beta,start,k+1,N-1,0);  ///A -> PA | ||||
|     Householder_mult<T >(A,v,beta,start,k+1,N-1,1);  ///PA -> PAP^H | ||||
|     ///Accumulate eigenvector | ||||
|     Householder_mult<T >(Q,v,beta,start,k+1,N-1,1);  ///Q -> QP^H | ||||
|   } | ||||
|   /*for(int l=0;l<N-2;l++){ | ||||
|     for(int k=l+2;k<N;k++){ | ||||
|     A(0,k,l); | ||||
|     } | ||||
|     }*/ | ||||
| } | ||||
|  | ||||
| template <class T> | ||||
| void Tri(DenseMatrix<T > &A, DenseMatrix<T> &Q, int start){ | ||||
| ///Tridiagonalize a matrix | ||||
|   int N; SizeSquare(A,N); | ||||
|   Hess(A,Q,start); | ||||
|   /*for(int l=0;l<N-2;l++){ | ||||
|     for(int k=l+2;k<N;k++){ | ||||
|     A(0,l,k); | ||||
|     } | ||||
|     }*/ | ||||
| } | ||||
|  | ||||
| template <class T> | ||||
| void ForceTridiagonal(DenseMatrix<T> &A){ | ||||
| ///Tridiagonalize a matrix | ||||
|   int N ; SizeSquare(A,N); | ||||
|   for(int l=0;l<N-2;l++){ | ||||
|     for(int k=l+2;k<N;k++){ | ||||
|       A[l][k]=0; | ||||
|       A[k][l]=0; | ||||
|     } | ||||
|   } | ||||
| } | ||||
|  | ||||
| template <class T> | ||||
| int my_SymmEigensystem(DenseMatrix<T > &Ain, DenseVector<T> &evals, DenseVector<DenseVector<T> > &evecs, RealD small){ | ||||
|   ///Solve a symmetric eigensystem, not necessarily in tridiagonal form | ||||
|   int N; SizeSquare(Ain,N); | ||||
|   DenseMatrix<T > A; A = Ain; | ||||
|   DenseMatrix<T > Q; Resize(Q,N,N); Unity(Q); | ||||
|   Tri(A,Q,0); | ||||
|   int it = my_Wilkinson<T>(A, evals, evecs, small); | ||||
|   for(int k=0;k<N;k++){evecs[k] = Q*evecs[k];} | ||||
|   return it; | ||||
| } | ||||
|  | ||||
|  | ||||
| template <class T> | ||||
| int Wilkinson(DenseMatrix<T> &Ain, DenseVector<T> &evals, DenseVector<DenseVector<T> > &evecs, RealD small){ | ||||
|   return my_Wilkinson(Ain, evals, evecs, small); | ||||
| } | ||||
|  | ||||
| template <class T> | ||||
| int SymmEigensystem(DenseMatrix<T> &Ain, DenseVector<T> &evals, DenseVector<DenseVector<T> > &evecs, RealD small){ | ||||
|   return my_SymmEigensystem(Ain, evals, evecs, small); | ||||
| } | ||||
|  | ||||
| template <class T> | ||||
| int Eigensystem(DenseMatrix<T > &Ain, DenseVector<T> &evals, DenseVector<DenseVector<T> > &evecs, RealD small){ | ||||
| ///Solve a general eigensystem, not necessarily in tridiagonal form | ||||
|   int N = Ain.dim; | ||||
|   DenseMatrix<T > A(N); A = Ain; | ||||
|   DenseMatrix<T > Q(N);Q.Unity(); | ||||
|   Hess(A,Q,0); | ||||
|   int it = QReigensystem<T>(A, evals, evecs, small); | ||||
|   for(int k=0;k<N;k++){evecs[k] = Q*evecs[k];} | ||||
|   return it; | ||||
| } | ||||
|  | ||||
| } | ||||
| #endif | ||||
| @@ -1,242 +0,0 @@ | ||||
|     /************************************************************************************* | ||||
|  | ||||
|     Grid physics library, www.github.com/paboyle/Grid  | ||||
|  | ||||
|     Source file: ./lib/algorithms/iterative/Householder.h | ||||
|  | ||||
|     Copyright (C) 2015 | ||||
|  | ||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
|  | ||||
|     This program is free software; you can redistribute it and/or modify | ||||
|     it under the terms of the GNU General Public License as published by | ||||
|     the Free Software Foundation; either version 2 of the License, or | ||||
|     (at your option) any later version. | ||||
|  | ||||
|     This program is distributed in the hope that it will be useful, | ||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|     GNU General Public License for more details. | ||||
|  | ||||
|     You should have received a copy of the GNU General Public License along | ||||
|     with this program; if not, write to the Free Software Foundation, Inc., | ||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
|     *************************************************************************************/ | ||||
|     /*  END LEGAL */ | ||||
| #ifndef HOUSEHOLDER_H | ||||
| #define HOUSEHOLDER_H | ||||
|  | ||||
| #define TIMER(A) std::cout << GridLogMessage << __FUNC__ << " file "<< __FILE__ <<" line " << __LINE__ << std::endl; | ||||
| #define ENTER()  std::cout << GridLogMessage << "ENTRY "<<__FUNC__ << " file "<< __FILE__ <<" line " << __LINE__ << std::endl; | ||||
| #define LEAVE()  std::cout << GridLogMessage << "EXIT  "<<__FUNC__ << " file "<< __FILE__ <<" line " << __LINE__ << std::endl; | ||||
|  | ||||
| #include <cstdlib> | ||||
| #include <string> | ||||
| #include <cmath> | ||||
| #include <iostream> | ||||
| #include <sstream> | ||||
| #include <stdexcept> | ||||
| #include <fstream> | ||||
| #include <complex> | ||||
| #include <algorithm> | ||||
|  | ||||
| namespace Grid { | ||||
| /** Comparison function for finding the max element in a vector **/ | ||||
| template <class T> bool cf(T i, T j) {  | ||||
|   return abs(i) < abs(j);  | ||||
| } | ||||
|  | ||||
| /**  | ||||
| 	Calculate a real Givens angle  | ||||
|  **/ | ||||
| template <class T> inline void Givens_calc(T y, T z, T &c, T &s){ | ||||
|  | ||||
|   RealD mz = (RealD)abs(z); | ||||
|    | ||||
|   if(mz==0.0){ | ||||
|     c = 1; s = 0; | ||||
|   } | ||||
|   if(mz >= (RealD)abs(y)){ | ||||
|     T t = -y/z; | ||||
|     s = (T)1.0 / sqrt ((T)1.0 + t * t); | ||||
|     c = s * t; | ||||
|   } else { | ||||
|     T t = -z/y; | ||||
|     c = (T)1.0 / sqrt ((T)1.0 + t * t); | ||||
|     s = c * t; | ||||
|   } | ||||
| } | ||||
|  | ||||
| template <class T> inline void Givens_mult(DenseMatrix<T> &A,  int i, int k, T c, T s, int dir) | ||||
| { | ||||
|   int q ; SizeSquare(A,q); | ||||
|  | ||||
|   if(dir == 0){ | ||||
|     for(int j=0;j<q;j++){ | ||||
|       T nu = A[i][j]; | ||||
|       T w  = A[k][j]; | ||||
|       A[i][j] = (c*nu + s*w); | ||||
|       A[k][j] = (-s*nu + c*w); | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   if(dir == 1){ | ||||
|     for(int j=0;j<q;j++){ | ||||
|       T nu = A[j][i]; | ||||
|       T w  = A[j][k]; | ||||
|       A[j][i] = (c*nu - s*w); | ||||
|       A[j][k] = (s*nu + c*w); | ||||
|     } | ||||
|   } | ||||
| } | ||||
|  | ||||
| /** | ||||
| 	from input = x; | ||||
| 	Compute the complex Householder vector, v, such that | ||||
| 	P = (I - b v transpose(v) ) | ||||
| 	b = 2/v.v | ||||
|  | ||||
| 	P | x |    | x | k = 0 | ||||
| 	| x |    | 0 |  | ||||
| 	| x | =  | 0 | | ||||
| 	| x |    | 0 | j = 3 | ||||
| 	| x |	   | x | | ||||
|  | ||||
| 	These are the "Unreduced" Householder vectors. | ||||
|  | ||||
|  **/ | ||||
| template <class T> inline void Householder_vector(DenseVector<T> input, int k, int j, DenseVector<T> &v, T &beta) | ||||
| { | ||||
|   int N ; Size(input,N); | ||||
|   T m = *max_element(input.begin() + k, input.begin() + j + 1, cf<T> ); | ||||
|  | ||||
|   if(abs(m) > 0.0){ | ||||
|     T alpha = 0; | ||||
|  | ||||
|     for(int i=k; i<j+1; i++){ | ||||
|       v[i] = input[i]/m; | ||||
|       alpha = alpha + v[i]*conj(v[i]); | ||||
|     } | ||||
|     alpha = sqrt(alpha); | ||||
|     beta = (T)1.0/(alpha*(alpha + abs(v[k]) )); | ||||
|  | ||||
|     if(abs(v[k]) > 0.0)  v[k] = v[k] + (v[k]/abs(v[k]))*alpha; | ||||
|     else                 v[k] = -alpha; | ||||
|   } else{ | ||||
|     for(int i=k; i<j+1; i++){ | ||||
|       v[i] = 0.0; | ||||
|     }  | ||||
|   } | ||||
| } | ||||
|  | ||||
| /** | ||||
| 	from input = x; | ||||
| 	Compute the complex Householder vector, v, such that | ||||
| 	P = (I - b v transpose(v) ) | ||||
| 	b = 2/v.v | ||||
|  | ||||
| 	Px = alpha*e_dir | ||||
|  | ||||
| 	These are the "Unreduced" Householder vectors. | ||||
|  | ||||
|  **/ | ||||
|  | ||||
| template <class T> inline void Householder_vector(DenseVector<T> input, int k, int j, int dir, DenseVector<T> &v, T &beta) | ||||
| { | ||||
|   int N = input.size(); | ||||
|   T m = *max_element(input.begin() + k, input.begin() + j + 1, cf); | ||||
|    | ||||
|   if(abs(m) > 0.0){ | ||||
|     T alpha = 0; | ||||
|  | ||||
|     for(int i=k; i<j+1; i++){ | ||||
|       v[i] = input[i]/m; | ||||
|       alpha = alpha + v[i]*conj(v[i]); | ||||
|     } | ||||
|      | ||||
|     alpha = sqrt(alpha); | ||||
|     beta = 1.0/(alpha*(alpha + abs(v[dir]) )); | ||||
| 	 | ||||
|     if(abs(v[dir]) > 0.0) v[dir] = v[dir] + (v[dir]/abs(v[dir]))*alpha; | ||||
|     else                  v[dir] = -alpha; | ||||
|   }else{ | ||||
|     for(int i=k; i<j+1; i++){ | ||||
|       v[i] = 0.0; | ||||
|     }  | ||||
|   } | ||||
| } | ||||
|  | ||||
| /** | ||||
| 	Compute the product PA if trans = 0 | ||||
| 	AP if trans = 1 | ||||
| 	P = (I - b v transpose(v) ) | ||||
| 	b = 2/v.v | ||||
| 	start at element l of matrix A | ||||
| 	v is of length j - k + 1 of v are nonzero | ||||
|  **/ | ||||
|  | ||||
| template <class T> inline void Householder_mult(DenseMatrix<T> &A , DenseVector<T> v, T beta, int l, int k, int j, int trans) | ||||
| { | ||||
|   int N ; SizeSquare(A,N); | ||||
|  | ||||
|   if(abs(beta) > 0.0){ | ||||
|     for(int p=l; p<N; p++){ | ||||
|       T s = 0; | ||||
|       if(trans==0){ | ||||
| 	for(int i=k;i<j+1;i++) s += conj(v[i-k])*A[i][p]; | ||||
| 	s *= beta; | ||||
| 	for(int i=k;i<j+1;i++){ A[i][p] = A[i][p]-s*conj(v[i-k]);} | ||||
|       } else { | ||||
| 	for(int i=k;i<j+1;i++){ s += conj(v[i-k])*A[p][i];} | ||||
| 	s *= beta; | ||||
| 	for(int i=k;i<j+1;i++){ A[p][i]=A[p][i]-s*conj(v[i-k]);} | ||||
|       } | ||||
|     } | ||||
|   } | ||||
| } | ||||
|  | ||||
| /** | ||||
| 	Compute the product PA if trans = 0 | ||||
| 	AP if trans = 1 | ||||
| 	P = (I - b v transpose(v) ) | ||||
| 	b = 2/v.v | ||||
| 	start at element l of matrix A | ||||
| 	v is of length j - k + 1 of v are nonzero | ||||
| 	A is tridiagonal | ||||
|  **/ | ||||
| template <class T> inline void Householder_mult_tri(DenseMatrix<T> &A , DenseVector<T> v, T beta, int l, int M, int k, int j, int trans) | ||||
| { | ||||
|   if(abs(beta) > 0.0){ | ||||
|  | ||||
|     int N ; SizeSquare(A,N); | ||||
|  | ||||
|     DenseMatrix<T> tmp; Resize(tmp,N,N); Fill(tmp,0);  | ||||
|  | ||||
|     T s; | ||||
|     for(int p=l; p<M; p++){ | ||||
|       s = 0; | ||||
|       if(trans==0){ | ||||
| 	for(int i=k;i<j+1;i++) s = s + conj(v[i-k])*A[i][p]; | ||||
|       }else{ | ||||
| 	for(int i=k;i<j+1;i++) s = s + v[i-k]*A[p][i]; | ||||
|       } | ||||
|       s = beta*s; | ||||
|       if(trans==0){ | ||||
| 	for(int i=k;i<j+1;i++) tmp[i][p] = tmp(i,p) - s*v[i-k]; | ||||
|       }else{ | ||||
| 	for(int i=k;i<j+1;i++) tmp[p][i] = tmp[p][i] - s*conj(v[i-k]); | ||||
|       } | ||||
|     } | ||||
|     for(int p=l; p<M; p++){ | ||||
|       if(trans==0){ | ||||
| 	for(int i=k;i<j+1;i++) A[i][p] = A[i][p] + tmp[i][p]; | ||||
|       }else{ | ||||
| 	for(int i=k;i<j+1;i++) A[p][i] = A[p][i] + tmp[p][i]; | ||||
|       } | ||||
|     } | ||||
|   } | ||||
| } | ||||
| } | ||||
| #endif | ||||
| @@ -33,6 +33,8 @@ directory | ||||
|  | ||||
| namespace Grid { | ||||
|  | ||||
| enum BlockCGtype { BlockCG, BlockCGrQ, CGmultiRHS }; | ||||
|  | ||||
| ////////////////////////////////////////////////////////////////////////// | ||||
| // Block conjugate gradient. Dimension zero should be the block direction | ||||
| ////////////////////////////////////////////////////////////////////////// | ||||
| @@ -40,25 +42,273 @@ template <class Field> | ||||
| class BlockConjugateGradient : public OperatorFunction<Field> { | ||||
|  public: | ||||
|  | ||||
|  | ||||
|   typedef typename Field::scalar_type scomplex; | ||||
|  | ||||
|   const int blockDim = 0; | ||||
|  | ||||
|   int blockDim ; | ||||
|   int Nblock; | ||||
|  | ||||
|   BlockCGtype CGtype; | ||||
|   bool ErrorOnNoConverge;  // throw an assert when the CG fails to converge. | ||||
|                            // Defaults true. | ||||
|   RealD Tolerance; | ||||
|   Integer MaxIterations; | ||||
|   Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion | ||||
|    | ||||
|   BlockConjugateGradient(RealD tol, Integer maxit, bool err_on_no_conv = true) | ||||
|     : Tolerance(tol), | ||||
|     MaxIterations(maxit), | ||||
|     ErrorOnNoConverge(err_on_no_conv){}; | ||||
|   BlockConjugateGradient(BlockCGtype cgtype,int _Orthog,RealD tol, Integer maxit, bool err_on_no_conv = true) | ||||
|     : Tolerance(tol), CGtype(cgtype),   blockDim(_Orthog),  MaxIterations(maxit), ErrorOnNoConverge(err_on_no_conv) | ||||
|   {}; | ||||
|  | ||||
| //////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
| // Thin QR factorisation (google it) | ||||
| //////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
| void ThinQRfact (Eigen::MatrixXcd &m_rr, | ||||
| 		 Eigen::MatrixXcd &C, | ||||
| 		 Eigen::MatrixXcd &Cinv, | ||||
| 		 Field & Q, | ||||
| 		 const Field & R) | ||||
| { | ||||
|   int Orthog = blockDim; // First dimension is block dim; this is an assumption | ||||
|   //////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|   //Dimensions | ||||
|   // R_{ferm x Nblock} =  Q_{ferm x Nblock} x  C_{Nblock x Nblock} -> ferm x Nblock | ||||
|   // | ||||
|   // Rdag R = m_rr = Herm = L L^dag        <-- Cholesky decomposition (LLT routine in Eigen) | ||||
|   // | ||||
|   //   Q  C = R => Q = R C^{-1} | ||||
|   // | ||||
|   // Want  Ident = Q^dag Q = C^{-dag} R^dag R C^{-1} = C^{-dag} L L^dag C^{-1} = 1_{Nblock x Nblock}  | ||||
|   // | ||||
|   // Set C = L^{dag}, and then Q^dag Q = ident  | ||||
|   // | ||||
|   // Checks: | ||||
|   // Cdag C = Rdag R ; passes. | ||||
|   // QdagQ  = 1      ; passes | ||||
|   //////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|   sliceInnerProductMatrix(m_rr,R,R,Orthog); | ||||
|  | ||||
|   //////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|   // Cholesky from Eigen | ||||
|   // There exists a ldlt that is documented as more stable | ||||
|   //////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|   Eigen::MatrixXcd L    = m_rr.llt().matrixL();  | ||||
|  | ||||
|   C    = L.adjoint(); | ||||
|   Cinv = C.inverse(); | ||||
|  | ||||
|   //////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|   // Q = R C^{-1} | ||||
|   // | ||||
|   // Q_j  = R_i Cinv(i,j)  | ||||
|   // | ||||
|   // NB maddMatrix conventions are Right multiplication X[j] a[j,i] already | ||||
|   //////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|   // FIXME:: make a sliceMulMatrix to avoid zero vector | ||||
|   sliceMulMatrix(Q,Cinv,R,Orthog); | ||||
| } | ||||
| //////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
| // Call one of several implementations | ||||
| //////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
| void operator()(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)  | ||||
| { | ||||
|   int Orthog = 0; // First dimension is block dim | ||||
|   if ( CGtype == BlockCGrQ ) { | ||||
|     BlockCGrQsolve(Linop,Src,Psi); | ||||
|   } else if (CGtype == BlockCG ) { | ||||
|     BlockCGsolve(Linop,Src,Psi); | ||||
|   } else if (CGtype == CGmultiRHS ) { | ||||
|     CGmultiRHSsolve(Linop,Src,Psi); | ||||
|   } else { | ||||
|     assert(0); | ||||
|   } | ||||
| } | ||||
|  | ||||
| //////////////////////////////////////////////////////////////////////////// | ||||
| // BlockCGrQ implementation: | ||||
| //-------------------------- | ||||
| // X is guess/Solution | ||||
| // B is RHS | ||||
| // Solve A X_i = B_i    ;        i refers to Nblock index | ||||
| //////////////////////////////////////////////////////////////////////////// | ||||
| void BlockCGrQsolve(LinearOperatorBase<Field> &Linop, const Field &B, Field &X)  | ||||
| { | ||||
|   int Orthog = blockDim; // First dimension is block dim; this is an assumption | ||||
|   Nblock = B._grid->_fdimensions[Orthog]; | ||||
|  | ||||
|   std::cout<<GridLogMessage<<" Block Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl; | ||||
|  | ||||
|   X.checkerboard = B.checkerboard; | ||||
|   conformable(X, B); | ||||
|  | ||||
|   Field tmp(B); | ||||
|   Field Q(B); | ||||
|   Field D(B); | ||||
|   Field Z(B); | ||||
|   Field AD(B); | ||||
|  | ||||
|   Eigen::MatrixXcd m_DZ     = Eigen::MatrixXcd::Identity(Nblock,Nblock); | ||||
|   Eigen::MatrixXcd m_M      = Eigen::MatrixXcd::Identity(Nblock,Nblock); | ||||
|   Eigen::MatrixXcd m_rr     = Eigen::MatrixXcd::Zero(Nblock,Nblock); | ||||
|  | ||||
|   Eigen::MatrixXcd m_C      = Eigen::MatrixXcd::Zero(Nblock,Nblock); | ||||
|   Eigen::MatrixXcd m_Cinv   = Eigen::MatrixXcd::Zero(Nblock,Nblock); | ||||
|   Eigen::MatrixXcd m_S      = Eigen::MatrixXcd::Zero(Nblock,Nblock); | ||||
|   Eigen::MatrixXcd m_Sinv   = Eigen::MatrixXcd::Zero(Nblock,Nblock); | ||||
|  | ||||
|   Eigen::MatrixXcd m_tmp    = Eigen::MatrixXcd::Identity(Nblock,Nblock); | ||||
|   Eigen::MatrixXcd m_tmp1   = Eigen::MatrixXcd::Identity(Nblock,Nblock); | ||||
|  | ||||
|   // Initial residual computation & set up | ||||
|   std::vector<RealD> residuals(Nblock); | ||||
|   std::vector<RealD> ssq(Nblock); | ||||
|  | ||||
|   sliceNorm(ssq,B,Orthog); | ||||
|   RealD sssum=0; | ||||
|   for(int b=0;b<Nblock;b++) sssum+=ssq[b]; | ||||
|  | ||||
|   sliceNorm(residuals,B,Orthog); | ||||
|   for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); } | ||||
|  | ||||
|   sliceNorm(residuals,X,Orthog); | ||||
|   for(int b=0;b<Nblock;b++){ assert(std::isnan(residuals[b])==0); } | ||||
|  | ||||
|   /************************************************************************ | ||||
|    * Block conjugate gradient rQ (Sebastien Birk Thesis, after Dubrulle 2001) | ||||
|    ************************************************************************ | ||||
|    * Dimensions: | ||||
|    * | ||||
|    *   X,B==(Nferm x Nblock) | ||||
|    *   A==(Nferm x Nferm) | ||||
|    *   | ||||
|    * Nferm = Nspin x Ncolour x Ncomplex x Nlattice_site | ||||
|    *  | ||||
|    * QC = R = B-AX, D = Q     ; QC => Thin QR factorisation (google it) | ||||
|    * for k:  | ||||
|    *   Z  = AD | ||||
|    *   M  = [D^dag Z]^{-1} | ||||
|    *   X  = X + D MC | ||||
|    *   QS = Q - ZM | ||||
|    *   D  = Q + D S^dag | ||||
|    *   C  = S C | ||||
|    */ | ||||
|   /////////////////////////////////////// | ||||
|   // Initial block: initial search dir is guess | ||||
|   /////////////////////////////////////// | ||||
|   std::cout << GridLogMessage<<"BlockCGrQ algorithm initialisation " <<std::endl; | ||||
|  | ||||
|   //1.  QC = R = B-AX, D = Q     ; QC => Thin QR factorisation (google it) | ||||
|  | ||||
|   Linop.HermOp(X, AD); | ||||
|   tmp = B - AD;   | ||||
|   ThinQRfact (m_rr, m_C, m_Cinv, Q, tmp); | ||||
|   D=Q; | ||||
|  | ||||
|   std::cout << GridLogMessage<<"BlockCGrQ computed initial residual and QR fact " <<std::endl; | ||||
|  | ||||
|   /////////////////////////////////////// | ||||
|   // Timers | ||||
|   /////////////////////////////////////// | ||||
|   GridStopWatch sliceInnerTimer; | ||||
|   GridStopWatch sliceMaddTimer; | ||||
|   GridStopWatch QRTimer; | ||||
|   GridStopWatch MatrixTimer; | ||||
|   GridStopWatch SolverTimer; | ||||
|   SolverTimer.Start(); | ||||
|  | ||||
|   int k; | ||||
|   for (k = 1; k <= MaxIterations; k++) { | ||||
|  | ||||
|     //3. Z  = AD | ||||
|     MatrixTimer.Start(); | ||||
|     Linop.HermOp(D, Z);       | ||||
|     MatrixTimer.Stop(); | ||||
|  | ||||
|     //4. M  = [D^dag Z]^{-1} | ||||
|     sliceInnerTimer.Start(); | ||||
|     sliceInnerProductMatrix(m_DZ,D,Z,Orthog); | ||||
|     sliceInnerTimer.Stop(); | ||||
|     m_M       = m_DZ.inverse(); | ||||
|  | ||||
|     //5. X  = X + D MC | ||||
|     m_tmp     = m_M * m_C; | ||||
|     sliceMaddTimer.Start(); | ||||
|     sliceMaddMatrix(X,m_tmp, D,X,Orthog);      | ||||
|     sliceMaddTimer.Stop(); | ||||
|  | ||||
|     //6. QS = Q - ZM | ||||
|     sliceMaddTimer.Start(); | ||||
|     sliceMaddMatrix(tmp,m_M,Z,Q,Orthog,-1.0); | ||||
|     sliceMaddTimer.Stop(); | ||||
|     QRTimer.Start(); | ||||
|     ThinQRfact (m_rr, m_S, m_Sinv, Q, tmp); | ||||
|     QRTimer.Stop(); | ||||
|      | ||||
|     //7. D  = Q + D S^dag | ||||
|     m_tmp = m_S.adjoint(); | ||||
|     sliceMaddTimer.Start(); | ||||
|     sliceMaddMatrix(D,m_tmp,D,Q,Orthog); | ||||
|     sliceMaddTimer.Stop(); | ||||
|  | ||||
|     //8. C  = S C | ||||
|     m_C = m_S*m_C; | ||||
|      | ||||
|     /********************* | ||||
|      * convergence monitor | ||||
|      ********************* | ||||
|      */ | ||||
|     m_rr = m_C.adjoint() * m_C; | ||||
|  | ||||
|     RealD max_resid=0; | ||||
|     RealD rrsum=0; | ||||
|     RealD rr; | ||||
|  | ||||
|     for(int b=0;b<Nblock;b++) { | ||||
|       rrsum+=real(m_rr(b,b)); | ||||
|       rr = real(m_rr(b,b))/ssq[b]; | ||||
|       if ( rr > max_resid ) max_resid = rr; | ||||
|     } | ||||
|  | ||||
|     std::cout << GridLogIterative << "\titeration "<<k<<" rr_sum "<<rrsum<<" ssq_sum "<< sssum | ||||
| 	      <<" ave "<<std::sqrt(rrsum/sssum) << " max "<< max_resid <<std::endl; | ||||
|  | ||||
|     if ( max_resid < Tolerance*Tolerance ) {  | ||||
|  | ||||
|       SolverTimer.Stop(); | ||||
|  | ||||
|       std::cout << GridLogMessage<<"BlockCGrQ converged in "<<k<<" iterations"<<std::endl; | ||||
|  | ||||
|       for(int b=0;b<Nblock;b++){ | ||||
| 	std::cout << GridLogMessage<< "\t\tblock "<<b<<" computed resid " | ||||
| 		  << std::sqrt(real(m_rr(b,b))/ssq[b])<<std::endl; | ||||
|       } | ||||
|       std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl; | ||||
|  | ||||
|       Linop.HermOp(X, AD); | ||||
|       AD = AD-B; | ||||
|       std::cout << GridLogMessage <<"\t True residual is " << std::sqrt(norm2(AD)/norm2(B)) <<std::endl; | ||||
|  | ||||
|       std::cout << GridLogMessage << "Time Breakdown "<<std::endl; | ||||
|       std::cout << GridLogMessage << "\tElapsed    " << SolverTimer.Elapsed()     <<std::endl; | ||||
|       std::cout << GridLogMessage << "\tMatrix     " << MatrixTimer.Elapsed()     <<std::endl; | ||||
|       std::cout << GridLogMessage << "\tInnerProd  " << sliceInnerTimer.Elapsed() <<std::endl; | ||||
|       std::cout << GridLogMessage << "\tMaddMatrix " << sliceMaddTimer.Elapsed()  <<std::endl; | ||||
|       std::cout << GridLogMessage << "\tThinQRfact " << QRTimer.Elapsed()  <<std::endl; | ||||
| 	     | ||||
|       IterationsToComplete = k; | ||||
|       return; | ||||
|     } | ||||
|  | ||||
|   } | ||||
|   std::cout << GridLogMessage << "BlockConjugateGradient(rQ) did NOT converge" << std::endl; | ||||
|  | ||||
|   if (ErrorOnNoConverge) assert(0); | ||||
|   IterationsToComplete = k; | ||||
| } | ||||
| ////////////////////////////////////////////////////////////////////////// | ||||
| // Block conjugate gradient; Original O'Leary Dimension zero should be the block direction | ||||
| ////////////////////////////////////////////////////////////////////////// | ||||
| void BlockCGsolve(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)  | ||||
| { | ||||
|   int Orthog = blockDim; // First dimension is block dim; this is an assumption | ||||
|   Nblock = Src._grid->_fdimensions[Orthog]; | ||||
|  | ||||
|   std::cout<<GridLogMessage<<" Block Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl; | ||||
| @@ -162,8 +412,9 @@ void operator()(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi) | ||||
|      ********************* | ||||
|      */ | ||||
|     RealD max_resid=0; | ||||
|     RealD rr; | ||||
|     for(int b=0;b<Nblock;b++){ | ||||
|       RealD rr = real(m_rr(b,b))/ssq[b]; | ||||
|       rr = real(m_rr(b,b))/ssq[b]; | ||||
|       if ( rr > max_resid ) max_resid = rr; | ||||
|     } | ||||
|      | ||||
| @@ -173,13 +424,14 @@ void operator()(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi) | ||||
|  | ||||
|       std::cout << GridLogMessage<<"BlockCG converged in "<<k<<" iterations"<<std::endl; | ||||
|       for(int b=0;b<Nblock;b++){ | ||||
| 	std::cout << GridLogMessage<< "\t\tblock "<<b<<" resid "<< std::sqrt(real(m_rr(b,b))/ssq[b])<<std::endl; | ||||
| 	std::cout << GridLogMessage<< "\t\tblock "<<b<<" computed resid " | ||||
| 		  << std::sqrt(real(m_rr(b,b))/ssq[b])<<std::endl; | ||||
|       } | ||||
|       std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl; | ||||
|  | ||||
|       Linop.HermOp(Psi, AP); | ||||
|       AP = AP-Src; | ||||
|       std::cout << GridLogMessage <<"\tTrue residual is " << std::sqrt(norm2(AP)/norm2(Src)) <<std::endl; | ||||
|       std::cout << GridLogMessage <<"\t True residual is " << std::sqrt(norm2(AP)/norm2(Src)) <<std::endl; | ||||
|  | ||||
|       std::cout << GridLogMessage << "Time Breakdown "<<std::endl; | ||||
|       std::cout << GridLogMessage << "\tElapsed    " << SolverTimer.Elapsed()     <<std::endl; | ||||
| @@ -197,35 +449,13 @@ void operator()(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi) | ||||
|   if (ErrorOnNoConverge) assert(0); | ||||
|   IterationsToComplete = k; | ||||
| } | ||||
| }; | ||||
|  | ||||
|  | ||||
| ////////////////////////////////////////////////////////////////////////// | ||||
| // multiRHS conjugate gradient. Dimension zero should be the block direction | ||||
| // Use this for spread out across nodes | ||||
| ////////////////////////////////////////////////////////////////////////// | ||||
| template <class Field> | ||||
| class MultiRHSConjugateGradient : public OperatorFunction<Field> { | ||||
|  public: | ||||
|  | ||||
|   typedef typename Field::scalar_type scomplex; | ||||
|  | ||||
|   const int blockDim = 0; | ||||
|  | ||||
|   int Nblock; | ||||
|   bool ErrorOnNoConverge;  // throw an assert when the CG fails to converge. | ||||
|                            // Defaults true. | ||||
|   RealD Tolerance; | ||||
|   Integer MaxIterations; | ||||
|   Integer IterationsToComplete; //Number of iterations the CG took to finish. Filled in upon completion | ||||
|    | ||||
|    MultiRHSConjugateGradient(RealD tol, Integer maxit, bool err_on_no_conv = true) | ||||
|     : Tolerance(tol), | ||||
|     MaxIterations(maxit), | ||||
|     ErrorOnNoConverge(err_on_no_conv){}; | ||||
|  | ||||
| void operator()(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)  | ||||
| void CGmultiRHSsolve(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi)  | ||||
| { | ||||
|   int Orthog = 0; // First dimension is block dim | ||||
|   int Orthog = blockDim; // First dimension is block dim | ||||
|   Nblock = Src._grid->_fdimensions[Orthog]; | ||||
|  | ||||
|   std::cout<<GridLogMessage<<"MultiRHS Conjugate Gradient : Orthog "<<Orthog<<" Nblock "<<Nblock<<std::endl; | ||||
| @@ -285,12 +515,10 @@ void operator()(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi) | ||||
|     MatrixTimer.Stop(); | ||||
|  | ||||
|     // Alpha | ||||
|     //    sliceInnerProductVectorTest(v_pAp_test,P,AP,Orthog); | ||||
|     sliceInnerTimer.Start(); | ||||
|     sliceInnerProductVector(v_pAp,P,AP,Orthog); | ||||
|     sliceInnerTimer.Stop(); | ||||
|     for(int b=0;b<Nblock;b++){ | ||||
|       //      std::cout << " "<< v_pAp[b]<<" "<< v_pAp_test[b]<<std::endl; | ||||
|       v_alpha[b] = v_rr[b]/real(v_pAp[b]); | ||||
|     } | ||||
|  | ||||
| @@ -332,7 +560,7 @@ void operator()(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi) | ||||
|  | ||||
|       std::cout << GridLogMessage<<"MultiRHS solver converged in " <<k<<" iterations"<<std::endl; | ||||
|       for(int b=0;b<Nblock;b++){ | ||||
| 	std::cout << GridLogMessage<< "\t\tBlock "<<b<<" resid "<< std::sqrt(v_rr[b]/ssq[b])<<std::endl; | ||||
| 	std::cout << GridLogMessage<< "\t\tBlock "<<b<<" computed resid "<< std::sqrt(v_rr[b]/ssq[b])<<std::endl; | ||||
|       } | ||||
|       std::cout << GridLogMessage<<"\tMax residual is "<<std::sqrt(max_resid)<<std::endl; | ||||
|  | ||||
| @@ -358,9 +586,8 @@ void operator()(LinearOperatorBase<Field> &Linop, const Field &Src, Field &Psi) | ||||
|   if (ErrorOnNoConverge) assert(0); | ||||
|   IterationsToComplete = k; | ||||
| } | ||||
|  | ||||
| }; | ||||
|  | ||||
|  | ||||
|  | ||||
| } | ||||
| #endif | ||||
|   | ||||
| @@ -1,81 +0,0 @@ | ||||
|     /************************************************************************************* | ||||
|  | ||||
|     Grid physics library, www.github.com/paboyle/Grid  | ||||
|  | ||||
|     Source file: ./lib/algorithms/iterative/EigenSort.h | ||||
|  | ||||
|     Copyright (C) 2015 | ||||
|  | ||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
|  | ||||
|     This program is free software; you can redistribute it and/or modify | ||||
|     it under the terms of the GNU General Public License as published by | ||||
|     the Free Software Foundation; either version 2 of the License, or | ||||
|     (at your option) any later version. | ||||
|  | ||||
|     This program is distributed in the hope that it will be useful, | ||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|     GNU General Public License for more details. | ||||
|  | ||||
|     You should have received a copy of the GNU General Public License along | ||||
|     with this program; if not, write to the Free Software Foundation, Inc., | ||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
|     *************************************************************************************/ | ||||
|     /*  END LEGAL */ | ||||
| #ifndef GRID_EIGENSORT_H | ||||
| #define GRID_EIGENSORT_H | ||||
|  | ||||
|  | ||||
| namespace Grid { | ||||
|     ///////////////////////////////////////////////////////////// | ||||
|     // Eigen sorter to begin with | ||||
|     ///////////////////////////////////////////////////////////// | ||||
|  | ||||
| template<class Field> | ||||
| class SortEigen { | ||||
|  private: | ||||
|    | ||||
| //hacking for testing for now | ||||
|  private: | ||||
|   static bool less_lmd(RealD left,RealD right){ | ||||
|     return left > right; | ||||
|   }   | ||||
|   static bool less_pair(std::pair<RealD,Field const*>& left, | ||||
|                         std::pair<RealD,Field const*>& right){ | ||||
|     return left.first > (right.first); | ||||
|   }   | ||||
|    | ||||
|    | ||||
|  public: | ||||
|  | ||||
|   void push(DenseVector<RealD>& lmd, | ||||
|             DenseVector<Field>& evec,int N) { | ||||
|     DenseVector<Field> cpy(lmd.size(),evec[0]._grid); | ||||
|     for(int i=0;i<lmd.size();i++) cpy[i] = evec[i]; | ||||
|      | ||||
|     DenseVector<std::pair<RealD, Field const*> > emod(lmd.size());     | ||||
|     for(int i=0;i<lmd.size();++i) | ||||
|       emod[i] = std::pair<RealD,Field const*>(lmd[i],&cpy[i]); | ||||
|  | ||||
|     partial_sort(emod.begin(),emod.begin()+N,emod.end(),less_pair); | ||||
|  | ||||
|     typename DenseVector<std::pair<RealD, Field const*> >::iterator it = emod.begin(); | ||||
|     for(int i=0;i<N;++i){ | ||||
|       lmd[i]=it->first; | ||||
|       evec[i]=*(it->second); | ||||
|       ++it; | ||||
|     } | ||||
|   } | ||||
|   void push(DenseVector<RealD>& lmd,int N) { | ||||
|     std::partial_sort(lmd.begin(),lmd.begin()+N,lmd.end(),less_lmd); | ||||
|   } | ||||
|   bool saturated(RealD lmd, RealD thrs) { | ||||
|     return fabs(lmd) > fabs(thrs); | ||||
|   } | ||||
| }; | ||||
|  | ||||
| } | ||||
| #endif | ||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @@ -50,7 +50,6 @@ public: | ||||
|  | ||||
|     GridBase(const std::vector<int> & processor_grid) : CartesianCommunicator(processor_grid) {}; | ||||
|  | ||||
|  | ||||
|     // Physics Grid information. | ||||
|     std::vector<int> _simd_layout;// Which dimensions get relayed out over simd lanes. | ||||
|     std::vector<int> _fdimensions;// (full) Global dimensions of array prior to cb removal | ||||
| @@ -63,13 +62,12 @@ public: | ||||
|     int _isites; | ||||
|     int _fsites;                  // _isites*_osites = product(dimensions). | ||||
|     int _gsites; | ||||
|     std::vector<int> _slice_block;   // subslice information | ||||
|     std::vector<int> _slice_block;// subslice information | ||||
|     std::vector<int> _slice_stride; | ||||
|     std::vector<int> _slice_nblock; | ||||
|  | ||||
|     // Might need these at some point | ||||
|     //    std::vector<int> _lstart;     // local start of array in gcoors. _processor_coor[d]*_ldimensions[d] | ||||
|     //    std::vector<int> _lend;       // local end of array in gcoors    _processor_coor[d]*_ldimensions[d]+_ldimensions_[d]-1 | ||||
|     std::vector<int> _lstart;     // local start of array in gcoors _processor_coor[d]*_ldimensions[d] | ||||
|     std::vector<int> _lend  ;     // local end of array in gcoors   _processor_coor[d]*_ldimensions[d]+_ldimensions_[d]-1 | ||||
|  | ||||
| public: | ||||
|  | ||||
| @@ -176,6 +174,7 @@ public: | ||||
|     inline int gSites(void) const { return _isites*_osites*_Nprocessors; };  | ||||
|     inline int Nd    (void) const { return _ndimension;}; | ||||
|  | ||||
|     inline const std::vector<int> LocalStarts(void)             { return _lstart;    }; | ||||
|     inline const std::vector<int> &FullDimensions(void)         { return _fdimensions;}; | ||||
|     inline const std::vector<int> &GlobalDimensions(void)       { return _gdimensions;}; | ||||
|     inline const std::vector<int> &LocalDimensions(void)        { return _ldimensions;}; | ||||
|   | ||||
| @@ -76,6 +76,8 @@ public: | ||||
|         _ldimensions.resize(_ndimension); | ||||
|         _rdimensions.resize(_ndimension); | ||||
|         _simd_layout.resize(_ndimension); | ||||
| 	_lstart.resize(_ndimension); | ||||
| 	_lend.resize(_ndimension); | ||||
|              | ||||
|         _ostride.resize(_ndimension); | ||||
|         _istride.resize(_ndimension); | ||||
| @@ -94,8 +96,10 @@ public: | ||||
| 	  // Use a reduced simd grid | ||||
| 	  _ldimensions[d]= _gdimensions[d]/_processors[d];  //local dimensions | ||||
| 	  _rdimensions[d]= _ldimensions[d]/_simd_layout[d]; //overdecomposition | ||||
| 	  _osites *= _rdimensions[d]; | ||||
| 	  _isites *= _simd_layout[d]; | ||||
| 	  _lstart[d]     = _processor_coor[d]*_ldimensions[d]; | ||||
| 	  _lend[d]       = _processor_coor[d]*_ldimensions[d]+_ldimensions[d]-1; | ||||
| 	  _osites  *= _rdimensions[d]; | ||||
| 	  _isites  *= _simd_layout[d]; | ||||
|                  | ||||
| 	  // Addressing support | ||||
| 	  if ( d==0 ) { | ||||
|   | ||||
| @@ -151,6 +151,8 @@ public: | ||||
|       _ldimensions.resize(_ndimension); | ||||
|       _rdimensions.resize(_ndimension); | ||||
|       _simd_layout.resize(_ndimension); | ||||
|       _lstart.resize(_ndimension); | ||||
|       _lend.resize(_ndimension); | ||||
|        | ||||
|       _ostride.resize(_ndimension); | ||||
|       _istride.resize(_ndimension); | ||||
| @@ -169,6 +171,8 @@ public: | ||||
| 	  _gdimensions[d] = _gdimensions[d]/2; // Remove a checkerboard | ||||
| 	} | ||||
| 	_ldimensions[d] = _gdimensions[d]/_processors[d]; | ||||
| 	_lstart[d]     = _processor_coor[d]*_ldimensions[d]; | ||||
| 	_lend[d]       = _processor_coor[d]*_ldimensions[d]+_ldimensions[d]-1; | ||||
|  | ||||
| 	// Use a reduced simd grid | ||||
| 	_simd_layout[d] = simd_layout[d]; | ||||
|   | ||||
| @@ -60,6 +60,7 @@ void CartesianCommunicator::ShmBufferFreeAll(void) { | ||||
| ///////////////////////////////// | ||||
| // Grid information queries | ||||
| ///////////////////////////////// | ||||
| int                      CartesianCommunicator::Dimensions(void)         { return _ndimension; }; | ||||
| int                      CartesianCommunicator::IsBoss(void)            { return _processor==0; }; | ||||
| int                      CartesianCommunicator::BossRank(void)          { return 0; }; | ||||
| int                      CartesianCommunicator::ThisRank(void)          { return _processor; }; | ||||
| @@ -91,6 +92,7 @@ void CartesianCommunicator::GlobalSumVector(ComplexD *c,int N) | ||||
| #if !defined( GRID_COMMS_MPI3) && !defined (GRID_COMMS_MPI3L) | ||||
|  | ||||
| int                      CartesianCommunicator::NodeCount(void)    { return ProcessorCount();}; | ||||
| int                      CartesianCommunicator::RankCount(void)    { return ProcessorCount();}; | ||||
|  | ||||
| double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsRequest_t> &list, | ||||
| 						       void *xmit, | ||||
|   | ||||
| @@ -148,6 +148,7 @@ class CartesianCommunicator { | ||||
|   int  RankFromProcessorCoor(std::vector<int> &coor); | ||||
|   void ProcessorCoorFromRank(int rank,std::vector<int> &coor); | ||||
|    | ||||
|   int                      Dimensions(void)        ; | ||||
|   int                      IsBoss(void)            ; | ||||
|   int                      BossRank(void)          ; | ||||
|   int                      ThisRank(void)          ; | ||||
| @@ -155,6 +156,7 @@ class CartesianCommunicator { | ||||
|   const std::vector<int> & ProcessorGrid(void)     ; | ||||
|   int                      ProcessorCount(void)    ; | ||||
|   int                      NodeCount(void)    ; | ||||
|   int                      RankCount(void)    ; | ||||
|  | ||||
|   //////////////////////////////////////////////////////////////////////////////// | ||||
|   // very VERY rarely (Log, serial RNG) we need world without a grid | ||||
| @@ -175,6 +177,8 @@ class CartesianCommunicator { | ||||
|   void GlobalSumVector(ComplexF *c,int N); | ||||
|   void GlobalSum(ComplexD &c); | ||||
|   void GlobalSumVector(ComplexD *c,int N); | ||||
|   void GlobalXOR(uint32_t &); | ||||
|   void GlobalXOR(uint64_t &); | ||||
|    | ||||
|   template<class obj> void GlobalSum(obj &o){ | ||||
|     typedef typename obj::scalar_type scalar_type; | ||||
|   | ||||
| @@ -83,6 +83,14 @@ void CartesianCommunicator::GlobalSum(uint64_t &u){ | ||||
|   int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_SUM,communicator); | ||||
|   assert(ierr==0); | ||||
| } | ||||
| void CartesianCommunicator::GlobalXOR(uint32_t &u){ | ||||
|   int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_BXOR,communicator); | ||||
|   assert(ierr==0); | ||||
| } | ||||
| void CartesianCommunicator::GlobalXOR(uint64_t &u){ | ||||
|   int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_BXOR,communicator); | ||||
|   assert(ierr==0); | ||||
| } | ||||
| void CartesianCommunicator::GlobalSum(float &f){ | ||||
|   int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator); | ||||
|   assert(ierr==0); | ||||
|   | ||||
| @@ -65,6 +65,7 @@ std::vector<int> CartesianCommunicator::MyGroup; | ||||
| std::vector<void *> CartesianCommunicator::ShmCommBufs; | ||||
|  | ||||
| int CartesianCommunicator::NodeCount(void)    { return GroupSize;}; | ||||
| int CartesianCommunicator::RankCount(void)    { return WorldSize;}; | ||||
|  | ||||
|  | ||||
| #undef FORCE_COMMS | ||||
| @@ -509,6 +510,14 @@ void CartesianCommunicator::GlobalSum(uint64_t &u){ | ||||
|   int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_SUM,communicator); | ||||
|   assert(ierr==0); | ||||
| } | ||||
| void CartesianCommunicator::GlobalXOR(uint32_t &u){ | ||||
|   int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT32_T,MPI_BXOR,communicator); | ||||
|   assert(ierr==0); | ||||
| } | ||||
| void CartesianCommunicator::GlobalXOR(uint64_t &u){ | ||||
|   int ierr=MPI_Allreduce(MPI_IN_PLACE,&u,1,MPI_UINT64_T,MPI_BXOR,communicator); | ||||
|   assert(ierr==0); | ||||
| } | ||||
| void CartesianCommunicator::GlobalSum(float &f){ | ||||
|   int ierr=MPI_Allreduce(MPI_IN_PLACE,&f,1,MPI_FLOAT,MPI_SUM,communicator); | ||||
|   assert(ierr==0); | ||||
|   | ||||
| @@ -59,6 +59,8 @@ void CartesianCommunicator::GlobalSum(double &){} | ||||
| void CartesianCommunicator::GlobalSum(uint32_t &){} | ||||
| void CartesianCommunicator::GlobalSum(uint64_t &){} | ||||
| void CartesianCommunicator::GlobalSumVector(double *,int N){} | ||||
| void CartesianCommunicator::GlobalXOR(uint32_t &){} | ||||
| void CartesianCommunicator::GlobalXOR(uint64_t &){} | ||||
|  | ||||
| void CartesianCommunicator::SendRecvPacket(void *xmit, | ||||
| 					   void *recv, | ||||
|   | ||||
| @@ -1,4 +1,4 @@ | ||||
|  /************************************************************************************* | ||||
| /************************************************************************************* | ||||
|     Grid physics library, www.github.com/paboyle/Grid  | ||||
|     Source file: ./lib/lattice/Lattice_reduction.h | ||||
|     Copyright (C) 2015 | ||||
| @@ -328,6 +328,8 @@ static void sliceMaddVector(Lattice<vobj> &R,std::vector<RealD> &a,const Lattice | ||||
|   typedef typename vobj::vector_type vector_type; | ||||
|   typedef typename vobj::tensor_reduced tensor_reduced; | ||||
|    | ||||
|   scalar_type zscale(scale); | ||||
|  | ||||
|   GridBase *grid  = X._grid; | ||||
|  | ||||
|   int Nsimd  =grid->Nsimd(); | ||||
| @@ -353,7 +355,7 @@ static void sliceMaddVector(Lattice<vobj> &R,std::vector<RealD> &a,const Lattice | ||||
|       grid->iCoorFromIindex(icoor,l); | ||||
|       int ldx =r+icoor[orthogdim]*rd; | ||||
|       scalar_type *as =(scalar_type *)&av; | ||||
|       as[l] = scalar_type(a[ldx])*scale; | ||||
|       as[l] = scalar_type(a[ldx])*zscale; | ||||
|     } | ||||
|  | ||||
|     tensor_reduced at; at=av; | ||||
| @@ -367,71 +369,6 @@ static void sliceMaddVector(Lattice<vobj> &R,std::vector<RealD> &a,const Lattice | ||||
|   } | ||||
| }; | ||||
|  | ||||
|  | ||||
| /* | ||||
| template<class vobj> | ||||
| static void sliceMaddVectorSlow (Lattice<vobj> &R,std::vector<RealD> &a,const Lattice<vobj> &X,const Lattice<vobj> &Y, | ||||
| 			     int Orthog,RealD scale=1.0)  | ||||
| {     | ||||
|   // FIXME: Implementation is slow | ||||
|   // Best base the linear combination by constructing a  | ||||
|   // set of vectors of size grid->_rdimensions[Orthog]. | ||||
|   typedef typename vobj::scalar_object sobj; | ||||
|   typedef typename vobj::scalar_type scalar_type; | ||||
|   typedef typename vobj::vector_type vector_type; | ||||
|    | ||||
|   int Nblock = X._grid->GlobalDimensions()[Orthog]; | ||||
|    | ||||
|   GridBase *FullGrid  = X._grid; | ||||
|   GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog); | ||||
|    | ||||
|   Lattice<vobj> Xslice(SliceGrid); | ||||
|   Lattice<vobj> Rslice(SliceGrid); | ||||
|   // If we based this on Cshift it would work for spread out | ||||
|   // but it would be even slower | ||||
|   for(int i=0;i<Nblock;i++){ | ||||
|     ExtractSlice(Rslice,Y,i,Orthog); | ||||
|     ExtractSlice(Xslice,X,i,Orthog); | ||||
|     Rslice = Rslice + Xslice*(scale*a[i]); | ||||
|     InsertSlice(Rslice,R,i,Orthog); | ||||
|   } | ||||
| }; | ||||
| template<class vobj> | ||||
| static void sliceInnerProductVectorSlow( std::vector<ComplexD> & vec, const Lattice<vobj> &lhs,const Lattice<vobj> &rhs,int Orthog)  | ||||
|   { | ||||
|     // FIXME: Implementation is slow | ||||
|     // Look at localInnerProduct implementation, | ||||
|     // and do inside a site loop with block strided iterators | ||||
|     typedef typename vobj::scalar_object sobj; | ||||
|     typedef typename vobj::scalar_type scalar_type; | ||||
|     typedef typename vobj::vector_type vector_type; | ||||
|     typedef typename vobj::tensor_reduced scalar; | ||||
|     typedef typename scalar::scalar_object  scomplex; | ||||
|    | ||||
|     int Nblock = lhs._grid->GlobalDimensions()[Orthog]; | ||||
|     vec.resize(Nblock); | ||||
|     std::vector<scomplex> sip(Nblock); | ||||
|     Lattice<scalar> IP(lhs._grid);  | ||||
|     IP=localInnerProduct(lhs,rhs); | ||||
|     sliceSum(IP,sip,Orthog); | ||||
|    | ||||
|     for(int ss=0;ss<Nblock;ss++){ | ||||
|       vec[ss] = TensorRemove(sip[ss]); | ||||
|     } | ||||
|   } | ||||
| */ | ||||
|  | ||||
| ////////////////////////////////////////////////////////////////////////////////////////// | ||||
| // FIXME: Implementation is slow | ||||
| // If we based this on Cshift it would work for spread out | ||||
| // but it would be even slower | ||||
| // | ||||
| // Repeated extract slice is inefficient | ||||
| // | ||||
| // Best base the linear combination by constructing a  | ||||
| // set of vectors of size grid->_rdimensions[Orthog]. | ||||
| ////////////////////////////////////////////////////////////////////////////////////////// | ||||
|  | ||||
| inline GridBase         *makeSubSliceGrid(const GridBase *BlockSolverGrid,int Orthog) | ||||
| { | ||||
|   int NN    = BlockSolverGrid->_ndimension; | ||||
| @@ -451,7 +388,6 @@ inline GridBase         *makeSubSliceGrid(const GridBase *BlockSolverGrid,int Or | ||||
|   return (GridBase *)new GridCartesian(latt_phys,simd_phys,mpi_phys);  | ||||
| } | ||||
|  | ||||
|  | ||||
| template<class vobj> | ||||
| static void sliceMaddMatrix (Lattice<vobj> &R,Eigen::MatrixXcd &aa,const Lattice<vobj> &X,const Lattice<vobj> &Y,int Orthog,RealD scale=1.0)  | ||||
| {     | ||||
| @@ -460,28 +396,103 @@ static void sliceMaddMatrix (Lattice<vobj> &R,Eigen::MatrixXcd &aa,const Lattice | ||||
|   typedef typename vobj::vector_type vector_type; | ||||
|  | ||||
|   int Nblock = X._grid->GlobalDimensions()[Orthog]; | ||||
|    | ||||
|  | ||||
|   GridBase *FullGrid  = X._grid; | ||||
|   GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog); | ||||
|    | ||||
|  | ||||
|   Lattice<vobj> Xslice(SliceGrid); | ||||
|   Lattice<vobj> Rslice(SliceGrid); | ||||
|    | ||||
|   for(int i=0;i<Nblock;i++){ | ||||
|     ExtractSlice(Rslice,Y,i,Orthog); | ||||
|     for(int j=0;j<Nblock;j++){ | ||||
|       ExtractSlice(Xslice,X,j,Orthog); | ||||
|       Rslice = Rslice + Xslice*(scale*aa(j,i)); | ||||
|     } | ||||
|     InsertSlice(Rslice,R,i,Orthog); | ||||
|  | ||||
|   assert( FullGrid->_simd_layout[Orthog]==1); | ||||
|   int nh =  FullGrid->_ndimension; | ||||
|   int nl = SliceGrid->_ndimension; | ||||
|  | ||||
|   //FIXME package in a convenient iterator | ||||
|   //Should loop over a plane orthogonal to direction "Orthog" | ||||
|   int stride=FullGrid->_slice_stride[Orthog]; | ||||
|   int block =FullGrid->_slice_block [Orthog]; | ||||
|   int nblock=FullGrid->_slice_nblock[Orthog]; | ||||
|   int ostride=FullGrid->_ostride[Orthog]; | ||||
| #pragma omp parallel  | ||||
|   { | ||||
|     std::vector<vobj> s_x(Nblock); | ||||
|  | ||||
| #pragma omp for collapse(2) | ||||
|     for(int n=0;n<nblock;n++){ | ||||
|     for(int b=0;b<block;b++){ | ||||
|       int o  = n*stride + b; | ||||
|  | ||||
|       for(int i=0;i<Nblock;i++){ | ||||
| 	s_x[i] = X[o+i*ostride]; | ||||
|       } | ||||
|  | ||||
|       vobj dot; | ||||
|       for(int i=0;i<Nblock;i++){ | ||||
| 	dot = Y[o+i*ostride]; | ||||
| 	for(int j=0;j<Nblock;j++){ | ||||
| 	  dot = dot + s_x[j]*(scale*aa(j,i)); | ||||
| 	} | ||||
| 	R[o+i*ostride]=dot; | ||||
|       } | ||||
|     }} | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template<class vobj> | ||||
| static void sliceMulMatrix (Lattice<vobj> &R,Eigen::MatrixXcd &aa,const Lattice<vobj> &X,int Orthog,RealD scale=1.0)  | ||||
| {     | ||||
|   typedef typename vobj::scalar_object sobj; | ||||
|   typedef typename vobj::scalar_type scalar_type; | ||||
|   typedef typename vobj::vector_type vector_type; | ||||
|  | ||||
|   int Nblock = X._grid->GlobalDimensions()[Orthog]; | ||||
|  | ||||
|   GridBase *FullGrid  = X._grid; | ||||
|   GridBase *SliceGrid = makeSubSliceGrid(FullGrid,Orthog); | ||||
|  | ||||
|   Lattice<vobj> Xslice(SliceGrid); | ||||
|   Lattice<vobj> Rslice(SliceGrid); | ||||
|  | ||||
|   assert( FullGrid->_simd_layout[Orthog]==1); | ||||
|   int nh =  FullGrid->_ndimension; | ||||
|   int nl = SliceGrid->_ndimension; | ||||
|  | ||||
|   //FIXME package in a convenient iterator | ||||
|   //Should loop over a plane orthogonal to direction "Orthog" | ||||
|   int stride=FullGrid->_slice_stride[Orthog]; | ||||
|   int block =FullGrid->_slice_block [Orthog]; | ||||
|   int nblock=FullGrid->_slice_nblock[Orthog]; | ||||
|   int ostride=FullGrid->_ostride[Orthog]; | ||||
| #pragma omp parallel  | ||||
|   { | ||||
|     std::vector<vobj> s_x(Nblock); | ||||
|  | ||||
| #pragma omp for collapse(2) | ||||
|     for(int n=0;n<nblock;n++){ | ||||
|     for(int b=0;b<block;b++){ | ||||
|       int o  = n*stride + b; | ||||
|  | ||||
|       for(int i=0;i<Nblock;i++){ | ||||
| 	s_x[i] = X[o+i*ostride]; | ||||
|       } | ||||
|  | ||||
|       vobj dot; | ||||
|       for(int i=0;i<Nblock;i++){ | ||||
| 	dot = s_x[0]*(scale*aa(0,i)); | ||||
| 	for(int j=1;j<Nblock;j++){ | ||||
| 	  dot = dot + s_x[j]*(scale*aa(j,i)); | ||||
| 	} | ||||
| 	R[o+i*ostride]=dot; | ||||
|       } | ||||
|     }} | ||||
|   } | ||||
|  | ||||
| }; | ||||
|  | ||||
|  | ||||
| template<class vobj> | ||||
| static void sliceInnerProductMatrix(  Eigen::MatrixXcd &mat, const Lattice<vobj> &lhs,const Lattice<vobj> &rhs,int Orthog)  | ||||
| { | ||||
|   // FIXME: Implementation is slow | ||||
|   // Not sure of best solution.. think about it | ||||
|   typedef typename vobj::scalar_object sobj; | ||||
|   typedef typename vobj::scalar_type scalar_type; | ||||
|   typedef typename vobj::vector_type vector_type; | ||||
| @@ -495,22 +506,49 @@ static void sliceInnerProductMatrix(  Eigen::MatrixXcd &mat, const Lattice<vobj> | ||||
|   Lattice<vobj> Rslice(SliceGrid); | ||||
|    | ||||
|   mat = Eigen::MatrixXcd::Zero(Nblock,Nblock); | ||||
|    | ||||
|   for(int i=0;i<Nblock;i++){ | ||||
|     ExtractSlice(Lslice,lhs,i,Orthog); | ||||
|     for(int j=0;j<Nblock;j++){ | ||||
|       ExtractSlice(Rslice,rhs,j,Orthog); | ||||
|       mat(i,j) = innerProduct(Lslice,Rslice); | ||||
|     } | ||||
|  | ||||
|   assert( FullGrid->_simd_layout[Orthog]==1); | ||||
|   int nh =  FullGrid->_ndimension; | ||||
|   int nl = SliceGrid->_ndimension; | ||||
|  | ||||
|   //FIXME package in a convenient iterator | ||||
|   //Should loop over a plane orthogonal to direction "Orthog" | ||||
|   int stride=FullGrid->_slice_stride[Orthog]; | ||||
|   int block =FullGrid->_slice_block [Orthog]; | ||||
|   int nblock=FullGrid->_slice_nblock[Orthog]; | ||||
|   int ostride=FullGrid->_ostride[Orthog]; | ||||
|  | ||||
|   typedef typename vobj::vector_typeD vector_typeD; | ||||
|  | ||||
| #pragma omp parallel  | ||||
|   { | ||||
|     std::vector<vobj> Left(Nblock); | ||||
|     std::vector<vobj> Right(Nblock); | ||||
|     Eigen::MatrixXcd  mat_thread = Eigen::MatrixXcd::Zero(Nblock,Nblock); | ||||
|  | ||||
| #pragma omp for collapse(2) | ||||
|     for(int n=0;n<nblock;n++){ | ||||
|     for(int b=0;b<block;b++){ | ||||
|  | ||||
|       int o  = n*stride + b; | ||||
|  | ||||
|       for(int i=0;i<Nblock;i++){ | ||||
| 	Left [i] = lhs[o+i*ostride]; | ||||
| 	Right[i] = rhs[o+i*ostride]; | ||||
|       } | ||||
|  | ||||
|       for(int i=0;i<Nblock;i++){ | ||||
|       for(int j=0;j<Nblock;j++){ | ||||
| 	auto tmp = innerProduct(Left[i],Right[j]); | ||||
| 	vector_typeD rtmp = TensorRemove(tmp); | ||||
| 	mat_thread(i,j) += Reduce(rtmp); | ||||
|       }} | ||||
|     }} | ||||
| #pragma omp critical | ||||
|     { | ||||
|       mat += mat_thread; | ||||
|     }   | ||||
|   } | ||||
| #undef FORCE_DIAG | ||||
| #ifdef FORCE_DIAG | ||||
|   for(int i=0;i<Nblock;i++){ | ||||
|     for(int j=0;j<Nblock;j++){ | ||||
|       if ( i != j ) mat(i,j)=0.0; | ||||
|     } | ||||
|   } | ||||
| #endif | ||||
|   return; | ||||
| } | ||||
|  | ||||
|   | ||||
| @@ -551,7 +551,10 @@ void Replicate(Lattice<vobj> &coarse,Lattice<vobj> & fine) | ||||
|  | ||||
| //Copy SIMD-vectorized lattice to array of scalar objects in lexicographic order | ||||
| template<typename vobj, typename sobj> | ||||
| typename std::enable_if<isSIMDvectorized<vobj>::value && !isSIMDvectorized<sobj>::value, void>::type unvectorizeToLexOrdArray(std::vector<sobj> &out, const Lattice<vobj> &in){ | ||||
| typename std::enable_if<isSIMDvectorized<vobj>::value && !isSIMDvectorized<sobj>::value, void>::type  | ||||
| unvectorizeToLexOrdArray(std::vector<sobj> &out, const Lattice<vobj> &in) | ||||
| { | ||||
|  | ||||
|   typedef typename vobj::vector_type vtype; | ||||
|    | ||||
|   GridBase* in_grid = in._grid; | ||||
| @@ -590,6 +593,54 @@ typename std::enable_if<isSIMDvectorized<vobj>::value && !isSIMDvectorized<sobj> | ||||
|     extract1(in_vobj, out_ptrs, 0); | ||||
|   } | ||||
| } | ||||
| //Copy SIMD-vectorized lattice to array of scalar objects in lexicographic order | ||||
| template<typename vobj, typename sobj> | ||||
| typename std::enable_if<isSIMDvectorized<vobj>::value  | ||||
|                     && !isSIMDvectorized<sobj>::value, void>::type  | ||||
| vectorizeFromLexOrdArray( std::vector<sobj> &in, Lattice<vobj> &out) | ||||
| { | ||||
|  | ||||
|   typedef typename vobj::vector_type vtype; | ||||
|    | ||||
|   GridBase* grid = out._grid; | ||||
|   assert(in.size()==grid->lSites()); | ||||
|    | ||||
|   int ndim     = grid->Nd(); | ||||
|   int nsimd    = vtype::Nsimd(); | ||||
|  | ||||
|   std::vector<std::vector<int> > icoor(nsimd); | ||||
|        | ||||
|   for(int lane=0; lane < nsimd; lane++){ | ||||
|     icoor[lane].resize(ndim); | ||||
|     grid->iCoorFromIindex(icoor[lane],lane); | ||||
|   } | ||||
|    | ||||
|   parallel_for(uint64_t oidx = 0; oidx < grid->oSites(); oidx++){ //loop over outer index | ||||
|     //Assemble vector of pointers to output elements | ||||
|     std::vector<sobj*> ptrs(nsimd); | ||||
|  | ||||
|     std::vector<int> ocoor(ndim); | ||||
|     grid->oCoorFromOindex(ocoor, oidx); | ||||
|  | ||||
|     std::vector<int> lcoor(grid->Nd()); | ||||
|        | ||||
|     for(int lane=0; lane < nsimd; lane++){ | ||||
|  | ||||
|       for(int mu=0;mu<ndim;mu++){ | ||||
| 	lcoor[mu] = ocoor[mu] + grid->_rdimensions[mu]*icoor[lane][mu]; | ||||
|       } | ||||
|  | ||||
|       int lex; | ||||
|       Lexicographic::IndexFromCoor(lcoor, lex, grid->_ldimensions); | ||||
|       ptrs[lane] = &in[lex]; | ||||
|     } | ||||
|      | ||||
|     //pack from those ptrs | ||||
|     vobj vecobj; | ||||
|     merge1(vecobj, ptrs, 0); | ||||
|     out._odata[oidx] = vecobj;  | ||||
|   } | ||||
| } | ||||
|  | ||||
| //Convert a Lattice from one precision to another | ||||
| template<class VobjOut, class VobjIn> | ||||
| @@ -615,7 +666,7 @@ void precisionChange(Lattice<VobjOut> &out, const Lattice<VobjIn> &in){ | ||||
|   std::vector<SobjOut> in_slex_conv(in_grid->lSites()); | ||||
|   unvectorizeToLexOrdArray(in_slex_conv, in); | ||||
|      | ||||
|   parallel_for(int out_oidx=0;out_oidx<out_grid->oSites();out_oidx++){ | ||||
|   parallel_for(uint64_t out_oidx=0;out_oidx<out_grid->oSites();out_oidx++){ | ||||
|     std::vector<int> out_ocoor(ndim); | ||||
|     out_grid->oCoorFromOindex(out_ocoor, out_oidx); | ||||
|  | ||||
|   | ||||
| @@ -62,14 +62,20 @@ namespace Grid { | ||||
|     return ret; | ||||
|   } | ||||
|  | ||||
|   template<class obj> Lattice<obj> expMat(const Lattice<obj> &rhs, ComplexD alpha, Integer Nexp = DEFAULT_MAT_EXP){ | ||||
|   template<class obj> Lattice<obj> expMat(const Lattice<obj> &rhs, RealD alpha, Integer Nexp = DEFAULT_MAT_EXP){ | ||||
|     Lattice<obj> ret(rhs._grid); | ||||
|     ret.checkerboard = rhs.checkerboard; | ||||
|     conformable(ret,rhs); | ||||
|     parallel_for(int ss=0;ss<rhs._grid->oSites();ss++){ | ||||
|       ret._odata[ss]=Exponentiate(rhs._odata[ss],alpha, Nexp); | ||||
|     } | ||||
|  | ||||
|     return ret; | ||||
|  | ||||
|      | ||||
|      | ||||
|  | ||||
|      | ||||
|   } | ||||
|  | ||||
|  | ||||
|   | ||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @@ -27,6 +27,7 @@ directory | ||||
| #ifndef GRID_ILDG_IO_H | ||||
| #define GRID_ILDG_IO_H | ||||
|  | ||||
| #ifdef HAVE_LIME | ||||
| #include <algorithm> | ||||
| #include <fstream> | ||||
| #include <iomanip> | ||||
| @@ -37,213 +38,677 @@ directory | ||||
| #include <sys/utsname.h> | ||||
| #include <unistd.h> | ||||
|  | ||||
| #ifdef HAVE_LIME | ||||
|  | ||||
| extern "C" {  // for linkage | ||||
| //C-Lime is a must have for this functionality | ||||
| extern "C" {   | ||||
| #include "lime.h" | ||||
| } | ||||
|  | ||||
| namespace Grid { | ||||
| namespace QCD { | ||||
|  | ||||
| inline void ILDGGrid(GridBase *grid, ILDGField &header) { | ||||
|   assert(grid->_ndimension == 4);  // emit error if not | ||||
|   header.dimension.resize(4); | ||||
|   header.boundary.resize(4); | ||||
|   for (int d = 0; d < 4; d++) { | ||||
|     header.dimension[d] = grid->_fdimensions[d]; | ||||
|     // Read boundary conditions from ... ? | ||||
|     header.boundary[d] = std::string("periodic"); | ||||
|   } | ||||
| } | ||||
|   ///////////////////////////////// | ||||
|   // Encode word types as strings | ||||
|   ///////////////////////////////// | ||||
|  template<class word> inline std::string ScidacWordMnemonic(void){ return std::string("unknown"); } | ||||
|  template<> inline std::string ScidacWordMnemonic<double>  (void){ return std::string("D"); } | ||||
|  template<> inline std::string ScidacWordMnemonic<float>   (void){ return std::string("F"); } | ||||
|  template<> inline std::string ScidacWordMnemonic< int32_t>(void){ return std::string("I32_t"); } | ||||
|  template<> inline std::string ScidacWordMnemonic<uint32_t>(void){ return std::string("U32_t"); } | ||||
|  template<> inline std::string ScidacWordMnemonic< int64_t>(void){ return std::string("I64_t"); } | ||||
|  template<> inline std::string ScidacWordMnemonic<uint64_t>(void){ return std::string("U64_t"); } | ||||
|  | ||||
| inline void ILDGChecksum(uint32_t *buf, uint32_t buf_size_bytes, | ||||
|                          uint32_t &csum) { | ||||
|   BinaryIO::Uint32Checksum(buf, buf_size_bytes, csum); | ||||
| } | ||||
|   ///////////////////////////////////////// | ||||
|   // Encode a generic tensor as a string | ||||
|   ///////////////////////////////////////// | ||||
|  template<class vobj> std::string ScidacRecordTypeString(int &colors, int &spins, int & typesize,int &datacount) {  | ||||
|  | ||||
| ////////////////////////////////////////////////////////////////////// | ||||
| // Utilities ; these are QCD aware | ||||
| ////////////////////////////////////////////////////////////////////// | ||||
| template <class GaugeField> | ||||
| inline void ILDGStatistics(GaugeField &data, ILDGField &header) { | ||||
|   // How to convert data precision etc... | ||||
|   header.link_trace = Grid::QCD::WilsonLoops<PeriodicGimplR>::linkTrace(data); | ||||
|   header.plaquette = Grid::QCD::WilsonLoops<PeriodicGimplR>::avgPlaquette(data); | ||||
|   // header.polyakov = | ||||
| } | ||||
|    typedef typename getPrecision<vobj>::real_scalar_type stype; | ||||
|  | ||||
| // Forcing QCD here | ||||
| template <class fobj, class sobj> | ||||
| struct ILDGMunger { | ||||
|   void operator()(fobj &in, sobj &out, uint32_t &csum) { | ||||
|     for (int mu = 0; mu < 4; mu++) { | ||||
|       for (int i = 0; i < 3; i++) { | ||||
|         for (int j = 0; j < 3; j++) { | ||||
|           out(mu)()(i, j) = in(mu)()(i, j); | ||||
|         } | ||||
|       } | ||||
|     } | ||||
|     ILDGChecksum((uint32_t *)&in, sizeof(in), csum); | ||||
|   }; | ||||
| }; | ||||
|    int _ColourN       = indexRank<ColourIndex,vobj>(); | ||||
|    int _ColourScalar  =  isScalar<ColourIndex,vobj>(); | ||||
|    int _ColourVector  =  isVector<ColourIndex,vobj>(); | ||||
|    int _ColourMatrix  =  isMatrix<ColourIndex,vobj>(); | ||||
|  | ||||
| template <class fobj, class sobj> | ||||
| struct ILDGUnmunger { | ||||
|   void operator()(sobj &in, fobj &out, uint32_t &csum) { | ||||
|     for (int mu = 0; mu < 4; mu++) { | ||||
|       for (int i = 0; i < 3; i++) { | ||||
|         for (int j = 0; j < 3; j++) { | ||||
|           out(mu)()(i, j) = in(mu)()(i, j); | ||||
|         } | ||||
|       } | ||||
|     } | ||||
|     ILDGChecksum((uint32_t *)&out, sizeof(out), csum); | ||||
|   }; | ||||
| }; | ||||
|    int _SpinN       = indexRank<SpinIndex,vobj>(); | ||||
|    int _SpinScalar  =  isScalar<SpinIndex,vobj>(); | ||||
|    int _SpinVector  =  isVector<SpinIndex,vobj>(); | ||||
|    int _SpinMatrix  =  isMatrix<SpinIndex,vobj>(); | ||||
|  | ||||
| //////////////////////////////////////////////////////////////////////////////// | ||||
| // Write and read from fstream; compute header offset for payload | ||||
| //////////////////////////////////////////////////////////////////////////////// | ||||
| enum ILDGstate {ILDGread, ILDGwrite}; | ||||
|    int _LorentzN       = indexRank<LorentzIndex,vobj>(); | ||||
|    int _LorentzScalar  =  isScalar<LorentzIndex,vobj>(); | ||||
|    int _LorentzVector  =  isVector<LorentzIndex,vobj>(); | ||||
|    int _LorentzMatrix  =  isMatrix<LorentzIndex,vobj>(); | ||||
|  | ||||
| class ILDGIO : public BinaryIO { | ||||
|   FILE *File; | ||||
|   LimeWriter *LimeW; | ||||
|   LimeRecordHeader *LimeHeader; | ||||
|   LimeReader *LimeR; | ||||
|   std::string filename; | ||||
|    std::stringstream stream; | ||||
|  | ||||
|    stream << "GRID_"; | ||||
|    stream << ScidacWordMnemonic<stype>(); | ||||
|  | ||||
|    //   std::cout << " Lorentz N/S/V/M : " << _LorentzN<<" "<<_LorentzScalar<<"/"<<_LorentzVector<<"/"<<_LorentzMatrix<<std::endl; | ||||
|    //   std::cout << " Spin    N/S/V/M : " << _SpinN   <<" "<<_SpinScalar   <<"/"<<_SpinVector   <<"/"<<_SpinMatrix<<std::endl; | ||||
|    //   std::cout << " Colour  N/S/V/M : " << _ColourN <<" "<<_ColourScalar <<"/"<<_ColourVector <<"/"<<_ColourMatrix<<std::endl; | ||||
|  | ||||
|    if ( _LorentzVector )   stream << "_LorentzVector"<<_LorentzN; | ||||
|    if ( _LorentzMatrix )   stream << "_LorentzMatrix"<<_LorentzN; | ||||
|  | ||||
|    if ( _SpinVector )   stream << "_SpinVector"<<_SpinN; | ||||
|    if ( _SpinMatrix )   stream << "_SpinMatrix"<<_SpinN; | ||||
|  | ||||
|    if ( _ColourVector )   stream << "_ColourVector"<<_ColourN; | ||||
|    if ( _ColourMatrix )   stream << "_ColourMatrix"<<_ColourN; | ||||
|  | ||||
|    if ( _ColourScalar && _LorentzScalar && _SpinScalar )   stream << "_Complex"; | ||||
|  | ||||
|  | ||||
|    typesize = sizeof(typename vobj::scalar_type); | ||||
|  | ||||
|    if ( _ColourMatrix ) typesize*= _ColourN*_ColourN; | ||||
|    else                 typesize*= _ColourN; | ||||
|  | ||||
|    if ( _SpinMatrix )   typesize*= _SpinN*_SpinN; | ||||
|    else                 typesize*= _SpinN; | ||||
|  | ||||
|    colors    = _ColourN; | ||||
|    spins     = _SpinN; | ||||
|    datacount = _LorentzN; | ||||
|  | ||||
|    return stream.str(); | ||||
|  } | ||||
|   | ||||
|  template<class vobj> std::string ScidacRecordTypeString(Lattice<vobj> & lat,int &colors, int &spins, int & typesize,int &datacount) {  | ||||
|    return ScidacRecordTypeString<vobj>(colors,spins,typesize,datacount); | ||||
|  }; | ||||
|  | ||||
|  | ||||
|  //////////////////////////////////////////////////////////// | ||||
|  // Helper to fill out metadata | ||||
|  //////////////////////////////////////////////////////////// | ||||
|  template<class vobj> void ScidacMetaData(Lattice<vobj> & field, | ||||
| 					  FieldMetaData &header, | ||||
| 					  scidacRecord & _scidacRecord, | ||||
| 					  scidacFile   & _scidacFile)  | ||||
|  { | ||||
|    typedef typename getPrecision<vobj>::real_scalar_type stype; | ||||
|  | ||||
|    ///////////////////////////////////// | ||||
|    // Pull Grid's metadata | ||||
|    ///////////////////////////////////// | ||||
|    PrepareMetaData(field,header); | ||||
|  | ||||
|    ///////////////////////////////////// | ||||
|    // Scidac Private File structure | ||||
|    ///////////////////////////////////// | ||||
|    _scidacFile              = scidacFile(field._grid); | ||||
|  | ||||
|    ///////////////////////////////////// | ||||
|    // Scidac Private Record structure | ||||
|    ///////////////////////////////////// | ||||
|    scidacRecord sr; | ||||
|    sr.datatype   = ScidacRecordTypeString(field,sr.colors,sr.spins,sr.typesize,sr.datacount); | ||||
|    sr.date       = header.creation_date; | ||||
|    sr.precision  = ScidacWordMnemonic<stype>(); | ||||
|    sr.recordtype = GRID_IO_FIELD; | ||||
|  | ||||
|    _scidacRecord = sr; | ||||
|  | ||||
|    std::cout << GridLogMessage << "Build SciDAC datatype " <<sr.datatype<<std::endl; | ||||
|  } | ||||
|   | ||||
|  /////////////////////////////////////////////////////// | ||||
|  // Scidac checksum | ||||
|  /////////////////////////////////////////////////////// | ||||
|  static int scidacChecksumVerify(scidacChecksum &scidacChecksum_,uint32_t scidac_csuma,uint32_t scidac_csumb) | ||||
|  { | ||||
|    uint32_t scidac_checksuma = stoull(scidacChecksum_.suma,0,16); | ||||
|    uint32_t scidac_checksumb = stoull(scidacChecksum_.sumb,0,16); | ||||
|    if ( scidac_csuma !=scidac_checksuma) return 0; | ||||
|    if ( scidac_csumb !=scidac_checksumb) return 0; | ||||
|     return 1; | ||||
|  } | ||||
|  | ||||
| //////////////////////////////////////////////////////////////////////////////////// | ||||
| // Lime, ILDG and Scidac I/O classes | ||||
| //////////////////////////////////////////////////////////////////////////////////// | ||||
| class GridLimeReader : public BinaryIO { | ||||
|  public: | ||||
|   ILDGIO(std::string file, ILDGstate RW) { | ||||
|       filename = file; | ||||
|     if (RW == ILDGwrite){ | ||||
|       File = fopen(file.c_str(), "w"); | ||||
|       // check if opened correctly | ||||
|    /////////////////////////////////////////////////// | ||||
|    // FIXME: format for RNG? Now just binary out instead | ||||
|    /////////////////////////////////////////////////// | ||||
|  | ||||
|       LimeW = limeCreateWriter(File); | ||||
|     } else { | ||||
|       File = fopen(file.c_str(), "r"); | ||||
|       // check if opened correctly | ||||
|    FILE       *File; | ||||
|    LimeReader *LimeR; | ||||
|    std::string filename; | ||||
|  | ||||
|       LimeR = limeCreateReader(File); | ||||
|    ///////////////////////////////////////////// | ||||
|    // Open the file | ||||
|    ///////////////////////////////////////////// | ||||
|    void open(std::string &_filename)  | ||||
|    { | ||||
|      filename= _filename; | ||||
|      File = fopen(filename.c_str(), "r"); | ||||
|      LimeR = limeCreateReader(File); | ||||
|    } | ||||
|    ///////////////////////////////////////////// | ||||
|    // Close the file | ||||
|    ///////////////////////////////////////////// | ||||
|    void close(void){ | ||||
|      fclose(File); | ||||
|      //     limeDestroyReader(LimeR); | ||||
|    } | ||||
|  | ||||
|   //////////////////////////////////////////// | ||||
|   // Read a generic lattice field and verify checksum | ||||
|   //////////////////////////////////////////// | ||||
|   template<class vobj> | ||||
|   void readLimeLatticeBinaryObject(Lattice<vobj> &field,std::string record_name) | ||||
|   { | ||||
|     typedef typename vobj::scalar_object sobj; | ||||
|     scidacChecksum scidacChecksum_; | ||||
|     uint32_t nersc_csum,scidac_csuma,scidac_csumb; | ||||
|  | ||||
|     std::string format = getFormatString<vobj>(); | ||||
|  | ||||
|     while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) {  | ||||
|  | ||||
|       std::cout << GridLogMessage << limeReaderType(LimeR) <<std::endl; | ||||
| 	 | ||||
|       if ( strncmp(limeReaderType(LimeR), record_name.c_str(),strlen(record_name.c_str()) )  ) { | ||||
|  | ||||
|  | ||||
| 	off_t offset= ftell(File); | ||||
| 	BinarySimpleMunger<sobj,sobj> munge; | ||||
| 	BinaryIO::readLatticeObject< sobj, sobj >(field, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb); | ||||
|  | ||||
| 	///////////////////////////////////////////// | ||||
| 	// Insist checksum is next record | ||||
| 	///////////////////////////////////////////// | ||||
| 	readLimeObject(scidacChecksum_,std::string("scidacChecksum"),record_name); | ||||
|  | ||||
| 	///////////////////////////////////////////// | ||||
| 	// Verify checksums | ||||
| 	///////////////////////////////////////////// | ||||
| 	scidacChecksumVerify(scidacChecksum_,scidac_csuma,scidac_csumb); | ||||
| 	return; | ||||
|       } | ||||
|     } | ||||
|   } | ||||
|   //////////////////////////////////////////// | ||||
|   // Read a generic serialisable object | ||||
|   //////////////////////////////////////////// | ||||
|   template<class serialisable_object> | ||||
|   void readLimeObject(serialisable_object &object,std::string object_name,std::string record_name) | ||||
|   { | ||||
|     std::string xmlstring; | ||||
|     // should this be a do while; can we miss a first record?? | ||||
|     while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) {  | ||||
|  | ||||
|   ~ILDGIO() { fclose(File); } | ||||
|       uint64_t nbytes = limeReaderBytes(LimeR);//size of this record (configuration) | ||||
|  | ||||
|   int createHeader(std::string message, int MB, int ME, size_t PayloadSize, LimeWriter* L){ | ||||
|       if ( strncmp(limeReaderType(LimeR), record_name.c_str(),strlen(record_name.c_str()) )  ) { | ||||
| 	std::vector<char> xmlc(nbytes+1,'\0'); | ||||
| 	limeReaderReadData((void *)&xmlc[0], &nbytes, LimeR);     | ||||
| 	XmlReader RD(&xmlc[0],""); | ||||
| 	read(RD,object_name,object); | ||||
| 	return; | ||||
|       } | ||||
|  | ||||
|     }   | ||||
|     assert(0); | ||||
|   } | ||||
| }; | ||||
|  | ||||
| class GridLimeWriter : public BinaryIO { | ||||
|  public: | ||||
|    /////////////////////////////////////////////////// | ||||
|    // FIXME: format for RNG? Now just binary out instead | ||||
|    /////////////////////////////////////////////////// | ||||
|  | ||||
|    FILE       *File; | ||||
|    LimeWriter *LimeW; | ||||
|    std::string filename; | ||||
|  | ||||
|    void open(std::string &_filename) {  | ||||
|      filename= _filename; | ||||
|      File = fopen(filename.c_str(), "w"); | ||||
|      LimeW = limeCreateWriter(File); assert(LimeW != NULL ); | ||||
|    } | ||||
|    ///////////////////////////////////////////// | ||||
|    // Close the file | ||||
|    ///////////////////////////////////////////// | ||||
|    void close(void) { | ||||
|      fclose(File); | ||||
|      //  limeDestroyWriter(LimeW); | ||||
|    } | ||||
|   /////////////////////////////////////////////////////// | ||||
|   // Lime utility functions | ||||
|   /////////////////////////////////////////////////////// | ||||
|   int createLimeRecordHeader(std::string message, int MB, int ME, size_t PayloadSize) | ||||
|   { | ||||
|     LimeRecordHeader *h; | ||||
|     h = limeCreateHeader(MB, ME, const_cast<char *>(message.c_str()), PayloadSize); | ||||
|     int status = limeWriteRecordHeader(h, L); | ||||
|     if (status < 0) { | ||||
|       std::cerr << "ILDG Header error\n"; | ||||
|       return status; | ||||
|     } | ||||
|     assert(limeWriteRecordHeader(h, LimeW) >= 0); | ||||
|     limeDestroyHeader(h); | ||||
|     return LIME_SUCCESS; | ||||
|   } | ||||
|   //////////////////////////////////////////// | ||||
|   // Write a generic serialisable object | ||||
|   //////////////////////////////////////////// | ||||
|   template<class serialisable_object> | ||||
|   void writeLimeObject(int MB,int ME,serialisable_object &object,std::string object_name,std::string record_name) | ||||
|   { | ||||
|     std::string xmlstring; | ||||
|     { | ||||
|       XmlWriter WR("",""); | ||||
|       write(WR,object_name,object); | ||||
|       xmlstring = WR.XmlString(); | ||||
|     } | ||||
|     uint64_t nbytes = xmlstring.size(); | ||||
|     int err; | ||||
|     LimeRecordHeader *h = limeCreateHeader(MB, ME,(char *)record_name.c_str(), nbytes); assert(h!= NULL); | ||||
|  | ||||
|   unsigned int writeHeader(ILDGField &header) { | ||||
|     // write header in LIME | ||||
|     n_uint64_t nbytes; | ||||
|     int MB_flag = 1, ME_flag = 0; | ||||
|     err=limeWriteRecordHeader(h, LimeW);                    assert(err>=0); | ||||
|     err=limeWriteRecordData(&xmlstring[0], &nbytes, LimeW); assert(err>=0); | ||||
|     err=limeWriterCloseRecord(LimeW);                       assert(err>=0); | ||||
|     limeDestroyHeader(h); | ||||
|   } | ||||
|   //////////////////////////////////////////// | ||||
|   // Write a generic lattice field and csum | ||||
|   //////////////////////////////////////////// | ||||
|   template<class vobj> | ||||
|   void writeLimeLatticeBinaryObject(Lattice<vobj> &field,std::string record_name) | ||||
|   { | ||||
|     //////////////////////////////////////////// | ||||
|     // Create record header | ||||
|     //////////////////////////////////////////// | ||||
|     typedef typename vobj::scalar_object sobj; | ||||
|     int err; | ||||
|     uint32_t nersc_csum,scidac_csuma,scidac_csumb; | ||||
|     uint64_t PayloadSize = sizeof(sobj) * field._grid->_gsites; | ||||
|     createLimeRecordHeader(record_name, 0, 0, PayloadSize); | ||||
|  | ||||
|     char message[] = "ildg-format"; | ||||
|     nbytes = strlen(message); | ||||
|     LimeHeader = limeCreateHeader(MB_flag, ME_flag, message, nbytes); | ||||
|     limeWriteRecordHeader(LimeHeader, LimeW); | ||||
|     limeDestroyHeader(LimeHeader); | ||||
|     // save the xml header here | ||||
|     // use the xml_writer to c++ streams in pugixml | ||||
|     // and convert to char message | ||||
|     limeWriteRecordData(message, &nbytes, LimeW); | ||||
|     limeWriterCloseRecord(LimeW); | ||||
|     //////////////////////////////////////////////////////////////////// | ||||
|     // NB: FILE and iostream are jointly writing disjoint sequences in the | ||||
|     // the same file through different file handles (integer units). | ||||
|     //  | ||||
|     // These are both buffered, so why I think this code is right is as follows. | ||||
|     // | ||||
|     // i)  write record header to FILE *File, telegraphing the size.  | ||||
|     // ii) ftell reads the offset from FILE *File . | ||||
|     // iii) iostream / MPI Open independently seek this offset. Write sequence direct to disk. | ||||
|     //      Closes iostream and flushes. | ||||
|     // iv) fseek on FILE * to end of this disjoint section. | ||||
|     //  v) Continue writing scidac record. | ||||
|     //////////////////////////////////////////////////////////////////// | ||||
|     off_t offset = ftell(File); | ||||
|     std::string format = getFormatString<vobj>(); | ||||
|     BinarySimpleMunger<sobj,sobj> munge; | ||||
|     BinaryIO::writeLatticeObject<vobj,sobj>(field, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb); | ||||
|     err=limeWriterCloseRecord(LimeW);  assert(err>=0); | ||||
|     //////////////////////////////////////// | ||||
|     // Write checksum element, propagaing forward from the BinaryIO | ||||
|     // Always pair a checksum with a binary object, and close message | ||||
|     //////////////////////////////////////// | ||||
|     scidacChecksum checksum; | ||||
|     std::stringstream streama; streama << std::hex << scidac_csuma; | ||||
|     std::stringstream streamb; streamb << std::hex << scidac_csumb; | ||||
|     checksum.suma= streama.str(); | ||||
|     checksum.sumb= streamb.str(); | ||||
|     std::cout << GridLogMessage<<" writing scidac checksums "<<std::hex<<scidac_csuma<<"/"<<scidac_csumb<<std::dec<<std::endl; | ||||
|     writeLimeObject(0,1,checksum,std::string("scidacChecksum"    ),std::string(SCIDAC_CHECKSUM)); | ||||
|   } | ||||
| }; | ||||
|  | ||||
|     return 0; | ||||
|   } | ||||
|  | ||||
|   unsigned int readHeader(ILDGField &header) { | ||||
|     return 0; | ||||
| class ScidacWriter : public GridLimeWriter { | ||||
|  public: | ||||
|  | ||||
|    template<class SerialisableUserFile> | ||||
|    void writeScidacFileRecord(GridBase *grid,SerialisableUserFile &_userFile) | ||||
|    { | ||||
|      scidacFile    _scidacFile(grid); | ||||
|      writeLimeObject(1,0,_scidacFile,_scidacFile.SerialisableClassName(),std::string(SCIDAC_PRIVATE_FILE_XML)); | ||||
|      writeLimeObject(0,1,_userFile,_userFile.SerialisableClassName(),std::string(SCIDAC_FILE_XML)); | ||||
|    } | ||||
|   //////////////////////////////////////////////// | ||||
|   // Write generic lattice field in scidac format | ||||
|   //////////////////////////////////////////////// | ||||
|    template <class vobj, class userRecord> | ||||
|   void writeScidacFieldRecord(Lattice<vobj> &field,userRecord _userRecord)  | ||||
|   { | ||||
|     typedef typename vobj::scalar_object sobj; | ||||
|     uint64_t nbytes; | ||||
|     GridBase * grid = field._grid; | ||||
|  | ||||
|     //////////////////////////////////////// | ||||
|     // fill the Grid header | ||||
|     //////////////////////////////////////// | ||||
|     FieldMetaData header; | ||||
|     scidacRecord  _scidacRecord; | ||||
|     scidacFile    _scidacFile; | ||||
|  | ||||
|     ScidacMetaData(field,header,_scidacRecord,_scidacFile); | ||||
|  | ||||
|     ////////////////////////////////////////////// | ||||
|     // Fill the Lime file record by record | ||||
|     ////////////////////////////////////////////// | ||||
|     writeLimeObject(1,0,header ,std::string("FieldMetaData"),std::string(GRID_FORMAT)); // Open message  | ||||
|     writeLimeObject(0,0,_userRecord,_userRecord.SerialisableClassName(),std::string(SCIDAC_RECORD_XML)); | ||||
|     writeLimeObject(0,0,_scidacRecord,_scidacRecord.SerialisableClassName(),std::string(SCIDAC_PRIVATE_RECORD_XML)); | ||||
|     writeLimeLatticeBinaryObject(field,std::string(ILDG_BINARY_DATA));      // Closes message with checksum | ||||
|   } | ||||
| }; | ||||
|  | ||||
| class IldgWriter : public ScidacWriter { | ||||
|  public: | ||||
|  | ||||
|   /////////////////////////////////// | ||||
|   // A little helper | ||||
|   /////////////////////////////////// | ||||
|   void writeLimeIldgLFN(std::string &LFN) | ||||
|   { | ||||
|     uint64_t PayloadSize = LFN.size(); | ||||
|     int err; | ||||
|     createLimeRecordHeader(ILDG_DATA_LFN, 0 , 0, PayloadSize); | ||||
|     err=limeWriteRecordData(const_cast<char*>(LFN.c_str()), &PayloadSize,LimeW); assert(err>=0); | ||||
|     err=limeWriterCloseRecord(LimeW); assert(err>=0); | ||||
|   } | ||||
|  | ||||
|   //////////////////////////////////////////////////////////////// | ||||
|   // Special ILDG operations ; gauge configs only. | ||||
|   // Don't require scidac records EXCEPT checksum | ||||
|   // Use Grid MetaData object if present. | ||||
|   //////////////////////////////////////////////////////////////// | ||||
|   template <class vsimd> | ||||
|   uint32_t readConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu) { | ||||
|     typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField; | ||||
|     typedef LorentzColourMatrixD sobjd; | ||||
|     typedef LorentzColourMatrixF sobjf; | ||||
|     typedef iLorentzColourMatrix<vsimd> itype; | ||||
|     typedef LorentzColourMatrix sobj; | ||||
|     GridBase *grid = Umu._grid; | ||||
|  | ||||
|     ILDGField header; | ||||
|     readHeader(header); | ||||
|  | ||||
|     // now just the conf, ignore the header | ||||
|     std::string format = std::string("IEEE64BIG"); | ||||
|     do {limeReaderNextRecord(LimeR);} | ||||
|     while (strncmp(limeReaderType(LimeR), "ildg-binary-data",16)); | ||||
|  | ||||
|     n_uint64_t nbytes = limeReaderBytes(LimeR);//size of this record (configuration) | ||||
|  | ||||
|  | ||||
|     ILDGtype ILDGt(true, LimeR); | ||||
|     // this is special for double prec data, just for the moment | ||||
|     uint32_t csum = BinaryIO::readObjectParallel< itype, sobjd >( | ||||
|        Umu, filename, ILDGMunger<sobjd, sobj>(), 0, format, ILDGt); | ||||
|  | ||||
|     // Check configuration  | ||||
|     // todo | ||||
|  | ||||
|     return csum; | ||||
|   } | ||||
|  | ||||
|   template <class vsimd> | ||||
|   uint32_t writeConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu, std::string format) { | ||||
|   void writeConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu,int sequence,std::string LFN,std::string description)  | ||||
|   { | ||||
|     GridBase * grid = Umu._grid; | ||||
|     typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField; | ||||
|     typedef iLorentzColourMatrix<vsimd> vobj; | ||||
|     typedef typename vobj::scalar_object sobj; | ||||
|     typedef LorentzColourMatrixD fobj; | ||||
|  | ||||
|     ILDGField header; | ||||
|     // fill the header | ||||
|     header.floating_point = format; | ||||
|     uint64_t nbytes; | ||||
|  | ||||
|     ILDGUnmunger<fobj, sobj> munge; | ||||
|     unsigned int offset = writeHeader(header); | ||||
|     //////////////////////////////////////// | ||||
|     // fill the Grid header | ||||
|     //////////////////////////////////////// | ||||
|     FieldMetaData header; | ||||
|     scidacRecord  _scidacRecord; | ||||
|     scidacFile    _scidacFile; | ||||
|  | ||||
|     BinaryIO::Uint32Checksum<vobj, fobj>(Umu, munge, header.checksum); | ||||
|     ScidacMetaData(Umu,header,_scidacRecord,_scidacFile); | ||||
|  | ||||
|     // Write data record header | ||||
|     n_uint64_t PayloadSize = sizeof(fobj) * Umu._grid->_gsites; | ||||
|     createHeader("ildg-binary-data", 0, 1, PayloadSize, LimeW); | ||||
|     std::string format = header.floating_point; | ||||
|     header.ensemble_id    = description; | ||||
|     header.ensemble_label = description; | ||||
|     header.sequence_number = sequence; | ||||
|     header.ildg_lfn = LFN; | ||||
|  | ||||
|     ILDGtype ILDGt(true, LimeW); | ||||
|     uint32_t csum = BinaryIO::writeObjectParallel<vobj, fobj>( | ||||
|        Umu, filename, munge, 0, header.floating_point, ILDGt); | ||||
|     assert ( (format == std::string("IEEE32BIG"))   | ||||
|            ||(format == std::string("IEEE64BIG")) ); | ||||
|  | ||||
|     limeWriterCloseRecord(LimeW); | ||||
|     ////////////////////////////////////////////////////// | ||||
|     // Fill ILDG header data struct | ||||
|     ////////////////////////////////////////////////////// | ||||
|     ildgFormat ildgfmt ; | ||||
|     ildgfmt.field     = std::string("su3gauge"); | ||||
|  | ||||
|     // Last record | ||||
|     // the logical file name LNF | ||||
|     // look into documentation on how to generate this string | ||||
|     std::string LNF = "empty";  | ||||
|     if ( format == std::string("IEEE32BIG") ) {  | ||||
|       ildgfmt.precision = 32; | ||||
|     } else {  | ||||
|       ildgfmt.precision = 64; | ||||
|     } | ||||
|     ildgfmt.version = 1.0; | ||||
|     ildgfmt.lx = header.dimension[0]; | ||||
|     ildgfmt.ly = header.dimension[1]; | ||||
|     ildgfmt.lz = header.dimension[2]; | ||||
|     ildgfmt.lt = header.dimension[3]; | ||||
|     assert(header.nd==4); | ||||
|     assert(header.nd==header.dimension.size()); | ||||
|  | ||||
|     ////////////////////////////////////////////////////////////////////////////// | ||||
|     // Fill the USQCD info field | ||||
|     ////////////////////////////////////////////////////////////////////////////// | ||||
|     usqcdInfo info; | ||||
|     info.version=1.0; | ||||
|     info.plaq   = header.plaquette; | ||||
|     info.linktr = header.link_trace; | ||||
|  | ||||
|     PayloadSize = sizeof(LNF); | ||||
|     createHeader("ildg-binary-lfn", 1 , 1, PayloadSize, LimeW); | ||||
|     limeWriteRecordData(const_cast<char*>(LNF.c_str()), &PayloadSize, LimeW); | ||||
|  | ||||
|     limeWriterCloseRecord(LimeW); | ||||
|  | ||||
|     return csum; | ||||
|     std::cout << GridLogMessage << " Writing config; IldgIO "<<std::endl; | ||||
|     ////////////////////////////////////////////// | ||||
|     // Fill the Lime file record by record | ||||
|     ////////////////////////////////////////////// | ||||
|     writeLimeObject(1,0,header ,std::string("FieldMetaData"),std::string(GRID_FORMAT)); // Open message  | ||||
|     writeLimeObject(0,0,_scidacFile,_scidacFile.SerialisableClassName(),std::string(SCIDAC_PRIVATE_FILE_XML)); | ||||
|     writeLimeObject(0,1,info,info.SerialisableClassName(),std::string(SCIDAC_FILE_XML)); | ||||
|     writeLimeObject(1,0,_scidacRecord,_scidacRecord.SerialisableClassName(),std::string(SCIDAC_PRIVATE_RECORD_XML)); | ||||
|     writeLimeObject(0,0,info,info.SerialisableClassName(),std::string(SCIDAC_RECORD_XML)); | ||||
|     writeLimeObject(0,0,ildgfmt,std::string("ildgFormat")   ,std::string(ILDG_FORMAT)); // rec | ||||
|     writeLimeIldgLFN(header.ildg_lfn);                                                 // rec | ||||
|     writeLimeLatticeBinaryObject(Umu,std::string(ILDG_BINARY_DATA));      // Closes message with checksum | ||||
|     //    limeDestroyWriter(LimeW); | ||||
|     fclose(File); | ||||
|   } | ||||
|  | ||||
|   // format for RNG? Now just binary out | ||||
| }; | ||||
| } | ||||
| } | ||||
|  | ||||
| class IldgReader : public GridLimeReader { | ||||
|  public: | ||||
|  | ||||
|   //////////////////////////////////////////////////////////////// | ||||
|   // Read either Grid/SciDAC/ILDG configuration | ||||
|   // Don't require scidac records EXCEPT checksum | ||||
|   // Use Grid MetaData object if present. | ||||
|   // Else use ILDG MetaData object if present. | ||||
|   // Else use SciDAC MetaData object if present. | ||||
|   //////////////////////////////////////////////////////////////// | ||||
|   template <class vsimd> | ||||
|   void readConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu, FieldMetaData &FieldMetaData_) { | ||||
|  | ||||
|     typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField; | ||||
|     typedef typename GaugeField::vector_object  vobj; | ||||
|     typedef typename vobj::scalar_object sobj; | ||||
|  | ||||
|     typedef LorentzColourMatrixF fobj; | ||||
|     typedef LorentzColourMatrixD dobj; | ||||
|  | ||||
|     GridBase *grid = Umu._grid; | ||||
|  | ||||
|     std::vector<int> dims = Umu._grid->FullDimensions(); | ||||
|  | ||||
|     assert(dims.size()==4); | ||||
|  | ||||
|     // Metadata holders | ||||
|     ildgFormat     ildgFormat_    ; | ||||
|     std::string    ildgLFN_       ; | ||||
|     scidacChecksum scidacChecksum_;  | ||||
|     usqcdInfo      usqcdInfo_     ; | ||||
|  | ||||
|     // track what we read from file | ||||
|     int found_ildgFormat    =0; | ||||
|     int found_ildgLFN       =0; | ||||
|     int found_scidacChecksum=0; | ||||
|     int found_usqcdInfo     =0; | ||||
|     int found_ildgBinary =0; | ||||
|     int found_FieldMetaData =0; | ||||
|  | ||||
|     uint32_t nersc_csum; | ||||
|     uint32_t scidac_csuma; | ||||
|     uint32_t scidac_csumb; | ||||
|  | ||||
|     // Binary format | ||||
|     std::string format; | ||||
|  | ||||
|     ////////////////////////////////////////////////////////////////////////// | ||||
|     // Loop over all records | ||||
|     // -- Order is poorly guaranteed except ILDG header preceeds binary section. | ||||
|     // -- Run like an event loop. | ||||
|     // -- Impose trust hierarchy. Grid takes precedence & look for ILDG, and failing | ||||
|     //    that Scidac.  | ||||
|     // -- Insist on Scidac checksum record. | ||||
|     ////////////////////////////////////////////////////////////////////////// | ||||
|  | ||||
|     while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) {  | ||||
|  | ||||
|       uint64_t nbytes = limeReaderBytes(LimeR);//size of this record (configuration) | ||||
|        | ||||
|       ////////////////////////////////////////////////////////////////// | ||||
|       // If not BINARY_DATA read a string and parse | ||||
|       ////////////////////////////////////////////////////////////////// | ||||
|       if ( strncmp(limeReaderType(LimeR), ILDG_BINARY_DATA,strlen(ILDG_BINARY_DATA) )  ) { | ||||
| 	 | ||||
| 	// Copy out the string | ||||
| 	std::vector<char> xmlc(nbytes+1,'\0'); | ||||
| 	limeReaderReadData((void *)&xmlc[0], &nbytes, LimeR);     | ||||
| 	std::cout << GridLogMessage<< "Non binary record :" <<limeReaderType(LimeR) <<std::endl; //<<"\n"<<(&xmlc[0])<<std::endl; | ||||
|  | ||||
| 	////////////////////////////////// | ||||
| 	// ILDG format record | ||||
| 	if ( !strncmp(limeReaderType(LimeR), ILDG_FORMAT,strlen(ILDG_FORMAT)) ) {  | ||||
|  | ||||
| 	  XmlReader RD(&xmlc[0],""); | ||||
| 	  read(RD,"ildgFormat",ildgFormat_); | ||||
|  | ||||
| 	  if ( ildgFormat_.precision == 64 ) format = std::string("IEEE64BIG"); | ||||
| 	  if ( ildgFormat_.precision == 32 ) format = std::string("IEEE32BIG"); | ||||
|  | ||||
| 	  assert( ildgFormat_.lx == dims[0]); | ||||
| 	  assert( ildgFormat_.ly == dims[1]); | ||||
| 	  assert( ildgFormat_.lz == dims[2]); | ||||
| 	  assert( ildgFormat_.lt == dims[3]); | ||||
|  | ||||
| 	  found_ildgFormat = 1; | ||||
| 	} | ||||
|  | ||||
| 	if ( !strncmp(limeReaderType(LimeR), ILDG_DATA_LFN,strlen(ILDG_DATA_LFN)) ) { | ||||
| 	  FieldMetaData_.ildg_lfn = std::string(&xmlc[0]); | ||||
| 	  found_ildgLFN = 1; | ||||
| 	} | ||||
|  | ||||
| 	if ( !strncmp(limeReaderType(LimeR), GRID_FORMAT,strlen(ILDG_FORMAT)) ) {  | ||||
|  | ||||
| 	  XmlReader RD(&xmlc[0],""); | ||||
| 	  read(RD,"FieldMetaData",FieldMetaData_); | ||||
|  | ||||
| 	  format = FieldMetaData_.floating_point; | ||||
|  | ||||
| 	  assert(FieldMetaData_.dimension[0] == dims[0]); | ||||
| 	  assert(FieldMetaData_.dimension[1] == dims[1]); | ||||
| 	  assert(FieldMetaData_.dimension[2] == dims[2]); | ||||
| 	  assert(FieldMetaData_.dimension[3] == dims[3]); | ||||
|  | ||||
| 	  found_FieldMetaData = 1; | ||||
| 	} | ||||
|  | ||||
| 	if ( !strncmp(limeReaderType(LimeR), SCIDAC_RECORD_XML,strlen(SCIDAC_RECORD_XML)) ) {  | ||||
| 	  std::string xmls(&xmlc[0]); | ||||
| 	  // is it a USQCD info field | ||||
| 	  if ( xmls.find(std::string("usqcdInfo")) != std::string::npos ) {  | ||||
| 	    std::cout << GridLogMessage<<"...found a usqcdInfo field"<<std::endl; | ||||
| 	    XmlReader RD(&xmlc[0],""); | ||||
| 	    read(RD,"usqcdInfo",usqcdInfo_); | ||||
| 	    found_usqcdInfo = 1; | ||||
| 	  } | ||||
| 	} | ||||
|  | ||||
| 	if ( !strncmp(limeReaderType(LimeR), SCIDAC_CHECKSUM,strlen(SCIDAC_CHECKSUM)) ) {  | ||||
| 	  XmlReader RD(&xmlc[0],""); | ||||
| 	  read(RD,"scidacChecksum",scidacChecksum_); | ||||
| 	  found_scidacChecksum = 1; | ||||
| 	} | ||||
|  | ||||
|       } else {   | ||||
| 	///////////////////////////////// | ||||
| 	// Binary data | ||||
| 	///////////////////////////////// | ||||
| 	std::cout << GridLogMessage << "ILDG Binary record found : "  ILDG_BINARY_DATA << std::endl; | ||||
| 	off_t offset= ftell(File); | ||||
|  | ||||
| 	if ( format == std::string("IEEE64BIG") ) { | ||||
| 	  GaugeSimpleMunger<dobj, sobj> munge; | ||||
| 	  BinaryIO::readLatticeObject< vobj, dobj >(Umu, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb); | ||||
| 	} else {  | ||||
| 	  GaugeSimpleMunger<fobj, sobj> munge; | ||||
| 	  BinaryIO::readLatticeObject< vobj, fobj >(Umu, filename, munge, offset, format,nersc_csum,scidac_csuma,scidac_csumb); | ||||
| 	} | ||||
|  | ||||
| 	found_ildgBinary = 1; | ||||
|       } | ||||
|  | ||||
|     } | ||||
|  | ||||
|     ////////////////////////////////////////////////////// | ||||
|     // Minimally must find binary segment and checksum | ||||
|     // Since this is an ILDG reader require ILDG format | ||||
|     ////////////////////////////////////////////////////// | ||||
|     assert(found_ildgBinary); | ||||
|     assert(found_ildgFormat); | ||||
|     assert(found_scidacChecksum); | ||||
|  | ||||
|     // Must find something with the lattice dimensions | ||||
|     assert(found_FieldMetaData||found_ildgFormat); | ||||
|  | ||||
|     if ( found_FieldMetaData ) { | ||||
|  | ||||
|       std::cout << GridLogMessage<<"Grid MetaData was record found: configuration was probably written by Grid ! Yay ! "<<std::endl; | ||||
|  | ||||
|     } else {  | ||||
|  | ||||
|       assert(found_ildgFormat); | ||||
|       assert ( ildgFormat_.field == std::string("su3gauge") ); | ||||
|  | ||||
|       /////////////////////////////////////////////////////////////////////////////////////// | ||||
|       // Populate our Grid metadata as best we can | ||||
|       /////////////////////////////////////////////////////////////////////////////////////// | ||||
|  | ||||
|       std::ostringstream vers; vers << ildgFormat_.version; | ||||
|       FieldMetaData_.hdr_version = vers.str(); | ||||
|       FieldMetaData_.data_type = std::string("4D_SU3_GAUGE_3X3"); | ||||
|  | ||||
|       FieldMetaData_.nd=4; | ||||
|       FieldMetaData_.dimension.resize(4); | ||||
|  | ||||
|       FieldMetaData_.dimension[0] = ildgFormat_.lx ; | ||||
|       FieldMetaData_.dimension[1] = ildgFormat_.ly ; | ||||
|       FieldMetaData_.dimension[2] = ildgFormat_.lz ; | ||||
|       FieldMetaData_.dimension[3] = ildgFormat_.lt ; | ||||
|  | ||||
|       if ( found_usqcdInfo ) {  | ||||
| 	FieldMetaData_.plaquette = usqcdInfo_.plaq; | ||||
| 	FieldMetaData_.link_trace= usqcdInfo_.linktr; | ||||
| 	std::cout << GridLogMessage <<"This configuration was probably written by USQCD "<<std::endl; | ||||
| 	std::cout << GridLogMessage <<"USQCD xml record Plaquette : "<<FieldMetaData_.plaquette<<std::endl; | ||||
| 	std::cout << GridLogMessage <<"USQCD xml record LinkTrace : "<<FieldMetaData_.link_trace<<std::endl; | ||||
|       } else {  | ||||
| 	FieldMetaData_.plaquette = 0.0; | ||||
| 	FieldMetaData_.link_trace= 0.0; | ||||
| 	std::cout << GridLogWarning << "This configuration is unsafe with no plaquette records that can verify it !!! "<<std::endl; | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     //////////////////////////////////////////////////////////// | ||||
|     // Really really want to mandate a scidac checksum | ||||
|     //////////////////////////////////////////////////////////// | ||||
|     if ( found_scidacChecksum ) { | ||||
|       FieldMetaData_.scidac_checksuma = stoull(scidacChecksum_.suma,0,16); | ||||
|       FieldMetaData_.scidac_checksumb = stoull(scidacChecksum_.sumb,0,16); | ||||
|       scidacChecksumVerify(scidacChecksum_,scidac_csuma,scidac_csumb); | ||||
|       assert( scidac_csuma ==FieldMetaData_.scidac_checksuma); | ||||
|       assert( scidac_csumb ==FieldMetaData_.scidac_checksumb); | ||||
|       std::cout << GridLogMessage<<"SciDAC checksums match " << std::endl; | ||||
|     } else {  | ||||
|       std::cout << GridLogWarning<<"SciDAC checksums not found. This is unsafe. " << std::endl; | ||||
|       assert(0); // Can I insist always checksum ? | ||||
|     } | ||||
|  | ||||
|     if ( found_FieldMetaData || found_usqcdInfo ) { | ||||
|       FieldMetaData checker; | ||||
|       GaugeStatistics(Umu,checker); | ||||
|       assert(fabs(checker.plaquette  - FieldMetaData_.plaquette )<1.0e-5); | ||||
|       assert(fabs(checker.link_trace - FieldMetaData_.link_trace)<1.0e-5); | ||||
|       std::cout << GridLogMessage<<"Plaquette and link trace match " << std::endl; | ||||
|     } | ||||
|   } | ||||
|  }; | ||||
|  | ||||
| }} | ||||
|  | ||||
| //HAVE_LIME | ||||
| #endif | ||||
|   | ||||
| @@ -34,47 +34,198 @@ extern "C" { // for linkage | ||||
|  | ||||
| namespace Grid { | ||||
|  | ||||
| struct ILDGtype { | ||||
|   bool is_ILDG; | ||||
|   LimeWriter* LW; | ||||
|   LimeReader* LR; | ||||
| ///////////////////////////////////////////////////////////////////////////////// | ||||
| // Data representation of records that enter ILDG and SciDac formats | ||||
| ///////////////////////////////////////////////////////////////////////////////// | ||||
|  | ||||
|   ILDGtype(bool is, LimeWriter* L) : is_ILDG(is), LW(L), LR(NULL) {} | ||||
|   ILDGtype(bool is, LimeReader* L) : is_ILDG(is), LW(NULL), LR(L) {} | ||||
|   ILDGtype() : is_ILDG(false), LW(NULL), LR(NULL) {} | ||||
| }; | ||||
| #define GRID_FORMAT      "grid-format" | ||||
| #define ILDG_FORMAT      "ildg-format" | ||||
| #define ILDG_BINARY_DATA "ildg-binary-data" | ||||
| #define ILDG_DATA_LFN    "ildg-data-lfn" | ||||
| #define SCIDAC_CHECKSUM           "scidac-checksum" | ||||
| #define SCIDAC_PRIVATE_FILE_XML   "scidac-private-file-xml" | ||||
| #define SCIDAC_FILE_XML           "scidac-file-xml" | ||||
| #define SCIDAC_PRIVATE_RECORD_XML "scidac-private-record-xml" | ||||
| #define SCIDAC_RECORD_XML         "scidac-record-xml" | ||||
| #define SCIDAC_BINARY_DATA        "scidac-binary-data" | ||||
| // Unused SCIDAC records names; could move to support this functionality | ||||
| #define SCIDAC_SITELIST           "scidac-sitelist" | ||||
|  | ||||
| class ILDGField { | ||||
|   //////////////////////////////////////////////////////////// | ||||
|   const int GRID_IO_SINGLEFILE = 0; // hardcode lift from QIO compat | ||||
|   const int GRID_IO_MULTIFILE  = 1; // hardcode lift from QIO compat | ||||
|   const int GRID_IO_FIELD      = 0; // hardcode lift from QIO compat | ||||
|   const int GRID_IO_GLOBAL     = 1; // hardcode lift from QIO compat | ||||
|   //////////////////////////////////////////////////////////// | ||||
|  | ||||
| ///////////////////////////////////////////////////////////////////////////////// | ||||
| // QIO uses mandatory "private" records fixed format | ||||
| // Private is in principle "opaque" however it can't be changed now because that would break existing  | ||||
| // file compatability, so should be correct to assume the undocumented but defacto file structure. | ||||
| ///////////////////////////////////////////////////////////////////////////////// | ||||
|  | ||||
| //////////////////////// | ||||
| // Scidac private file xml | ||||
| // <?xml version="1.0" encoding="UTF-8"?><scidacFile><version>1.1</version><spacetime>4</spacetime><dims>16 16 16 32 </dims><volfmt>0</volfmt></scidacFile> | ||||
| //////////////////////// | ||||
| struct scidacFile : Serializable { | ||||
|  public: | ||||
|   // header strings (not in order) | ||||
|   std::vector<int> dimension; | ||||
|   std::vector<std::string> boundary; | ||||
|   int data_start; | ||||
|   std::string hdr_version; | ||||
|   std::string storage_format; | ||||
|   // Checks on data | ||||
|   double link_trace; | ||||
|   double plaquette; | ||||
|   uint32_t checksum; | ||||
|   unsigned int sequence_number; | ||||
|   std::string data_type; | ||||
|   std::string ensemble_id; | ||||
|   std::string ensemble_label; | ||||
|   std::string creator; | ||||
|   std::string creator_hardware; | ||||
|   std::string creation_date; | ||||
|   std::string archive_date; | ||||
|   std::string floating_point; | ||||
| }; | ||||
| } | ||||
| #else | ||||
| namespace Grid { | ||||
|   GRID_SERIALIZABLE_CLASS_MEMBERS(scidacFile, | ||||
|                                   double, version, | ||||
|                                   int, spacetime, | ||||
| 				  std::string, dims, // must convert to int | ||||
|                                   int, volfmt); | ||||
|  | ||||
| struct ILDGtype { | ||||
|   bool is_ILDG; | ||||
|   ILDGtype() : is_ILDG(false) {} | ||||
| }; | ||||
| } | ||||
|   std::vector<int> getDimensions(void) {  | ||||
|     std::stringstream stream(dims); | ||||
|     std::vector<int> dimensions; | ||||
|     int n; | ||||
|     while(stream >> n){ | ||||
|       dimensions.push_back(n); | ||||
|     } | ||||
|     return dimensions; | ||||
|   } | ||||
|  | ||||
|   void setDimensions(std::vector<int> dimensions) {  | ||||
|     char delimiter = ' '; | ||||
|     std::stringstream stream; | ||||
|     for(int i=0;i<dimensions.size();i++){  | ||||
|       stream << dimensions[i]; | ||||
|       if ( i != dimensions.size()-1) {  | ||||
| 	stream << delimiter <<std::endl; | ||||
|       } | ||||
|     } | ||||
|     dims = stream.str(); | ||||
|   } | ||||
|  | ||||
|   // Constructor provides Grid | ||||
|   scidacFile() =default; // default constructor | ||||
|   scidacFile(GridBase * grid){ | ||||
|     version      = 1.0; | ||||
|     spacetime    = grid->_ndimension; | ||||
|     setDimensions(grid->FullDimensions());  | ||||
|     volfmt       = GRID_IO_SINGLEFILE; | ||||
|   } | ||||
|  | ||||
| }; | ||||
|  | ||||
| /////////////////////////////////////////////////////////////////////// | ||||
| // scidac-private-record-xml : example | ||||
| // <scidacRecord> | ||||
| // <version>1.1</version><date>Tue Jul 26 21:14:44 2011 UTC</date><recordtype>0</recordtype> | ||||
| // <datatype>QDP_D3_ColorMatrix</datatype><precision>D</precision><colors>3</colors><spins>4</spins> | ||||
| // <typesize>144</typesize><datacount>4</datacount> | ||||
| // </scidacRecord> | ||||
| /////////////////////////////////////////////////////////////////////// | ||||
|  | ||||
| struct scidacRecord : Serializable { | ||||
|  public: | ||||
|   GRID_SERIALIZABLE_CLASS_MEMBERS(scidacRecord, | ||||
|                                   double, version, | ||||
|                                   std::string, date, | ||||
| 				  int, recordtype, | ||||
| 				  std::string, datatype, | ||||
| 				  std::string, precision, | ||||
| 				  int, colors, | ||||
| 				  int, spins, | ||||
| 				  int, typesize, | ||||
| 				  int, datacount); | ||||
|  | ||||
|   scidacRecord() { version =1.0; } | ||||
|  | ||||
| }; | ||||
|  | ||||
| //////////////////////// | ||||
| // ILDG format | ||||
| //////////////////////// | ||||
| struct ildgFormat : Serializable { | ||||
| public: | ||||
|   GRID_SERIALIZABLE_CLASS_MEMBERS(ildgFormat, | ||||
| 				  double, version, | ||||
| 				  std::string, field, | ||||
| 				  int, precision, | ||||
| 				  int, lx, | ||||
| 				  int, ly, | ||||
| 				  int, lz, | ||||
| 				  int, lt); | ||||
|   ildgFormat() { version=1.0; }; | ||||
| }; | ||||
| //////////////////////// | ||||
| // USQCD info | ||||
| //////////////////////// | ||||
| struct usqcdInfo : Serializable {  | ||||
|  public: | ||||
|   GRID_SERIALIZABLE_CLASS_MEMBERS(usqcdInfo, | ||||
| 				  double, version, | ||||
| 				  double, plaq, | ||||
| 				  double, linktr, | ||||
| 				  std::string, info); | ||||
|   usqcdInfo() {  | ||||
|     version=1.0;  | ||||
|   }; | ||||
| }; | ||||
| //////////////////////// | ||||
| // Scidac Checksum | ||||
| //////////////////////// | ||||
| struct scidacChecksum : Serializable {  | ||||
|  public: | ||||
|   GRID_SERIALIZABLE_CLASS_MEMBERS(scidacChecksum, | ||||
| 				  double, version, | ||||
| 				  std::string, suma, | ||||
| 				  std::string, sumb); | ||||
|   scidacChecksum() {  | ||||
|     version=1.0;  | ||||
|   }; | ||||
| }; | ||||
| //////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
| // Type:           scidac-file-xml         <title>MILC ILDG archival gauge configuration</title> | ||||
| //////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|  | ||||
| //////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
| // Type:            | ||||
| //////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|  | ||||
| //////////////////////// | ||||
| // Scidac private file xml  | ||||
| // <?xml version="1.0" encoding="UTF-8"?><scidacFile><version>1.1</version><spacetime>4</spacetime><dims>16 16 16 32 </dims><volfmt>0</volfmt></scidacFile>  | ||||
| ////////////////////////                                                                                                                                                                               | ||||
|  | ||||
| #if 0 | ||||
| //////////////////////////////////////////////////////////////////////////////////////// | ||||
| // From http://www.physics.utah.edu/~detar/scidac/qio_2p3.pdf | ||||
| //////////////////////////////////////////////////////////////////////////////////////// | ||||
| struct usqcdPropFile : Serializable {  | ||||
|  public: | ||||
|   GRID_SERIALIZABLE_CLASS_MEMBERS(usqcdPropFile, | ||||
| 				  double, version, | ||||
| 				  std::string, type, | ||||
| 				  std::string, info); | ||||
|   usqcdPropFile() {  | ||||
|     version=1.0;  | ||||
|   }; | ||||
| }; | ||||
| struct usqcdSourceInfo : Serializable {  | ||||
|  public: | ||||
|   GRID_SERIALIZABLE_CLASS_MEMBERS(usqcdSourceInfo, | ||||
| 				  double, version, | ||||
| 				  std::string, info); | ||||
|   usqcdSourceInfo() {  | ||||
|     version=1.0;  | ||||
|   }; | ||||
| }; | ||||
| struct usqcdPropInfo : Serializable {  | ||||
|  public: | ||||
|   GRID_SERIALIZABLE_CLASS_MEMBERS(usqcdPropInfo, | ||||
| 				  double, version, | ||||
| 				  int, spin, | ||||
| 				  int, color, | ||||
| 				  std::string, info); | ||||
|   usqcdPropInfo() {  | ||||
|     version=1.0;  | ||||
|   }; | ||||
| }; | ||||
| #endif | ||||
|  | ||||
| } | ||||
| #endif | ||||
| #endif | ||||
|   | ||||
							
								
								
									
										325
									
								
								lib/parallelIO/MetaData.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										325
									
								
								lib/parallelIO/MetaData.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,325 @@ | ||||
| /************************************************************************************* | ||||
|  | ||||
|     Grid physics library, www.github.com/paboyle/Grid  | ||||
|  | ||||
|     Source file: ./lib/parallelIO/NerscIO.h | ||||
|  | ||||
|     Copyright (C) 2015 | ||||
|  | ||||
|  | ||||
|     Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
|  | ||||
|     This program is free software; you can redistribute it and/or modify | ||||
|     it under the terms of the GNU General Public License as published by | ||||
|     the Free Software Foundation; either version 2 of the License, or | ||||
|     (at your option) any later version. | ||||
|  | ||||
|     This program is distributed in the hope that it will be useful, | ||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|     GNU General Public License for more details. | ||||
|  | ||||
|     You should have received a copy of the GNU General Public License along | ||||
|     with this program; if not, write to the Free Software Foundation, Inc., | ||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
| *************************************************************************************/ | ||||
| /*  END LEGAL */ | ||||
|  | ||||
| #include <algorithm> | ||||
| #include <iostream> | ||||
| #include <iomanip> | ||||
| #include <fstream> | ||||
| #include <map> | ||||
| #include <unistd.h> | ||||
| #include <sys/utsname.h> | ||||
| #include <pwd.h> | ||||
|  | ||||
| namespace Grid { | ||||
|  | ||||
|   /////////////////////////////////////////////////////// | ||||
|   // Precision mapping | ||||
|   /////////////////////////////////////////////////////// | ||||
|   template<class vobj> static std::string getFormatString (void) | ||||
|   { | ||||
|     std::string format; | ||||
|     typedef typename getPrecision<vobj>::real_scalar_type stype; | ||||
|     if ( sizeof(stype) == sizeof(float) ) { | ||||
|       format = std::string("IEEE32BIG"); | ||||
|     } | ||||
|     if ( sizeof(stype) == sizeof(double) ) { | ||||
|       format = std::string("IEEE64BIG"); | ||||
|     } | ||||
|     return format; | ||||
|   } | ||||
|   //////////////////////////////////////////////////////////////////////////////// | ||||
|   // header specification/interpretation | ||||
|   //////////////////////////////////////////////////////////////////////////////// | ||||
|     class FieldMetaData : Serializable { | ||||
|     public: | ||||
|  | ||||
|       GRID_SERIALIZABLE_CLASS_MEMBERS(FieldMetaData, | ||||
| 				      int, nd, | ||||
| 				      std::vector<int>, dimension, | ||||
| 				      std::vector<std::string>, boundary, | ||||
| 				      int, data_start, | ||||
| 				      std::string, hdr_version, | ||||
| 				      std::string, storage_format, | ||||
| 				      double, link_trace, | ||||
| 				      double, plaquette, | ||||
| 				      uint32_t, checksum, | ||||
| 				      uint32_t, scidac_checksuma, | ||||
| 				      uint32_t, scidac_checksumb, | ||||
| 				      unsigned int, sequence_number, | ||||
| 				      std::string, data_type, | ||||
| 				      std::string, ensemble_id, | ||||
| 				      std::string, ensemble_label, | ||||
| 				      std::string, ildg_lfn, | ||||
| 				      std::string, creator, | ||||
| 				      std::string, creator_hardware, | ||||
| 				      std::string, creation_date, | ||||
| 				      std::string, archive_date, | ||||
| 				      std::string, floating_point); | ||||
|       FieldMetaData(void) {  | ||||
| 	nd=4; | ||||
| 	dimension.resize(4); | ||||
| 	boundary.resize(4); | ||||
|       } | ||||
|     }; | ||||
|  | ||||
|  | ||||
|  | ||||
|   namespace QCD { | ||||
|  | ||||
|     using namespace Grid; | ||||
|  | ||||
|  | ||||
|     ////////////////////////////////////////////////////////////////////// | ||||
|     // Bit and Physical Checksumming and QA of data | ||||
|     ////////////////////////////////////////////////////////////////////// | ||||
|     inline void GridMetaData(GridBase *grid,FieldMetaData &header) | ||||
|     { | ||||
|       int nd = grid->_ndimension; | ||||
|       header.nd = nd; | ||||
|       header.dimension.resize(nd); | ||||
|       header.boundary.resize(nd); | ||||
|       for(int d=0;d<nd;d++) { | ||||
| 	header.dimension[d] = grid->_fdimensions[d]; | ||||
|       } | ||||
|       for(int d=0;d<nd;d++) { | ||||
| 	header.boundary[d] = std::string("PERIODIC"); | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     inline void MachineCharacteristics(FieldMetaData &header) | ||||
|     { | ||||
|       // Who | ||||
|       struct passwd *pw = getpwuid (getuid()); | ||||
|       if (pw) header.creator = std::string(pw->pw_name);  | ||||
|  | ||||
|       // When | ||||
|       std::time_t t = std::time(nullptr); | ||||
|       std::tm tm_ = *std::localtime(&t); | ||||
|       std::ostringstream oss;  | ||||
|       //      oss << std::put_time(&tm_, "%c %Z"); | ||||
|       header.creation_date = oss.str(); | ||||
|       header.archive_date  = header.creation_date; | ||||
|  | ||||
|       // What | ||||
|       struct utsname name;  uname(&name); | ||||
|       header.creator_hardware = std::string(name.nodename)+"-"; | ||||
|       header.creator_hardware+= std::string(name.machine)+"-"; | ||||
|       header.creator_hardware+= std::string(name.sysname)+"-"; | ||||
|       header.creator_hardware+= std::string(name.release); | ||||
|     } | ||||
|  | ||||
| #define dump_meta_data(field, s)					\ | ||||
|       s << "BEGIN_HEADER"      << std::endl;				\ | ||||
|       s << "HDR_VERSION = "    << field.hdr_version    << std::endl;	\ | ||||
|       s << "DATATYPE = "       << field.data_type      << std::endl;	\ | ||||
|       s << "STORAGE_FORMAT = " << field.storage_format << std::endl;	\ | ||||
|       for(int i=0;i<4;i++){						\ | ||||
| 	s << "DIMENSION_" << i+1 << " = " << field.dimension[i] << std::endl ; \ | ||||
|       }									\ | ||||
|       s << "LINK_TRACE = " << std::setprecision(10) << field.link_trace << std::endl; \ | ||||
|       s << "PLAQUETTE  = " << std::setprecision(10) << field.plaquette  << std::endl; \ | ||||
|       for(int i=0;i<4;i++){						\ | ||||
| 	s << "BOUNDARY_"<<i+1<<" = " << field.boundary[i] << std::endl;	\ | ||||
|       }									\ | ||||
| 									\ | ||||
|       s << "CHECKSUM = "<< std::hex << std::setw(10) << field.checksum << std::dec<<std::endl; \ | ||||
|       s << "SCIDAC_CHECKSUMA = "<< std::hex << std::setw(10) << field.scidac_checksuma << std::dec<<std::endl; \ | ||||
|       s << "SCIDAC_CHECKSUMB = "<< std::hex << std::setw(10) << field.scidac_checksumb << std::dec<<std::endl; \ | ||||
|       s << "ENSEMBLE_ID = "     << field.ensemble_id      << std::endl;	\ | ||||
|       s << "ENSEMBLE_LABEL = "  << field.ensemble_label   << std::endl;	\ | ||||
|       s << "SEQUENCE_NUMBER = " << field.sequence_number  << std::endl;	\ | ||||
|       s << "CREATOR = "         << field.creator          << std::endl;	\ | ||||
|       s << "CREATOR_HARDWARE = "<< field.creator_hardware << std::endl;	\ | ||||
|       s << "CREATION_DATE = "   << field.creation_date    << std::endl;	\ | ||||
|       s << "ARCHIVE_DATE = "    << field.archive_date     << std::endl;	\ | ||||
|       s << "FLOATING_POINT = "  << field.floating_point   << std::endl;	\ | ||||
|       s << "END_HEADER"         << std::endl; | ||||
|  | ||||
| template<class vobj> inline void PrepareMetaData(Lattice<vobj> & field, FieldMetaData &header) | ||||
| { | ||||
|   GridBase *grid = field._grid; | ||||
|   std::string format = getFormatString<vobj>(); | ||||
|    header.floating_point = format; | ||||
|    header.checksum = 0x0; // Nersc checksum unused in ILDG, Scidac | ||||
|    GridMetaData(grid,header);  | ||||
|    MachineCharacteristics(header); | ||||
|  } | ||||
|  inline void GaugeStatistics(Lattice<vLorentzColourMatrixF> & data,FieldMetaData &header) | ||||
|  { | ||||
|    // How to convert data precision etc... | ||||
|    header.link_trace=Grid::QCD::WilsonLoops<PeriodicGimplF>::linkTrace(data); | ||||
|    header.plaquette =Grid::QCD::WilsonLoops<PeriodicGimplF>::avgPlaquette(data); | ||||
|  } | ||||
|  inline void GaugeStatistics(Lattice<vLorentzColourMatrixD> & data,FieldMetaData &header) | ||||
|  { | ||||
|    // How to convert data precision etc... | ||||
|    header.link_trace=Grid::QCD::WilsonLoops<PeriodicGimplD>::linkTrace(data); | ||||
|    header.plaquette =Grid::QCD::WilsonLoops<PeriodicGimplD>::avgPlaquette(data); | ||||
|  } | ||||
|  template<> inline void PrepareMetaData<vLorentzColourMatrixF>(Lattice<vLorentzColourMatrixF> & field, FieldMetaData &header) | ||||
|  { | ||||
|     | ||||
|    GridBase *grid = field._grid; | ||||
|    std::string format = getFormatString<vLorentzColourMatrixF>(); | ||||
|    header.floating_point = format; | ||||
|    header.checksum = 0x0; // Nersc checksum unused in ILDG, Scidac | ||||
|    GridMetaData(grid,header);  | ||||
|    GaugeStatistics(field,header); | ||||
|    MachineCharacteristics(header); | ||||
|  } | ||||
|  template<> inline void PrepareMetaData<vLorentzColourMatrixD>(Lattice<vLorentzColourMatrixD> & field, FieldMetaData &header) | ||||
|  { | ||||
|    GridBase *grid = field._grid; | ||||
|    std::string format = getFormatString<vLorentzColourMatrixD>(); | ||||
|    header.floating_point = format; | ||||
|    header.checksum = 0x0; // Nersc checksum unused in ILDG, Scidac | ||||
|    GridMetaData(grid,header);  | ||||
|    GaugeStatistics(field,header); | ||||
|    MachineCharacteristics(header); | ||||
|  } | ||||
|  | ||||
|     ////////////////////////////////////////////////////////////////////// | ||||
|     // Utilities ; these are QCD aware | ||||
|     ////////////////////////////////////////////////////////////////////// | ||||
|     inline void reconstruct3(LorentzColourMatrix & cm) | ||||
|     { | ||||
|       const int x=0; | ||||
|       const int y=1; | ||||
|       const int z=2; | ||||
|       for(int mu=0;mu<Nd;mu++){ | ||||
| 	cm(mu)()(2,x) = adj(cm(mu)()(0,y)*cm(mu)()(1,z)-cm(mu)()(0,z)*cm(mu)()(1,y)); //x= yz-zy | ||||
| 	cm(mu)()(2,y) = adj(cm(mu)()(0,z)*cm(mu)()(1,x)-cm(mu)()(0,x)*cm(mu)()(1,z)); //y= zx-xz | ||||
| 	cm(mu)()(2,z) = adj(cm(mu)()(0,x)*cm(mu)()(1,y)-cm(mu)()(0,y)*cm(mu)()(1,x)); //z= xy-yx | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     //////////////////////////////////////////////////////////////////////////////// | ||||
|     // Some data types for intermediate storage | ||||
|     //////////////////////////////////////////////////////////////////////////////// | ||||
|     template<typename vtype> using iLorentzColour2x3 = iVector<iVector<iVector<vtype, Nc>, 2>, Nd >; | ||||
|  | ||||
|     typedef iLorentzColour2x3<Complex>  LorentzColour2x3; | ||||
|     typedef iLorentzColour2x3<ComplexF> LorentzColour2x3F; | ||||
|     typedef iLorentzColour2x3<ComplexD> LorentzColour2x3D; | ||||
|  | ||||
| ///////////////////////////////////////////////////////////////////////////////// | ||||
| // Simple classes for precision conversion | ||||
| ///////////////////////////////////////////////////////////////////////////////// | ||||
| template <class fobj, class sobj> | ||||
| struct BinarySimpleUnmunger { | ||||
|   typedef typename getPrecision<fobj>::real_scalar_type fobj_stype; | ||||
|   typedef typename getPrecision<sobj>::real_scalar_type sobj_stype; | ||||
|    | ||||
|   void operator()(sobj &in, fobj &out) { | ||||
|     // take word by word and transform accoding to the status | ||||
|     fobj_stype *out_buffer = (fobj_stype *)&out; | ||||
|     sobj_stype *in_buffer = (sobj_stype *)∈ | ||||
|     size_t fobj_words = sizeof(out) / sizeof(fobj_stype); | ||||
|     size_t sobj_words = sizeof(in) / sizeof(sobj_stype); | ||||
|     assert(fobj_words == sobj_words); | ||||
|      | ||||
|     for (unsigned int word = 0; word < sobj_words; word++) | ||||
|       out_buffer[word] = in_buffer[word];  // type conversion on the fly | ||||
|      | ||||
|   } | ||||
| }; | ||||
|  | ||||
| template <class fobj, class sobj> | ||||
| struct BinarySimpleMunger { | ||||
|   typedef typename getPrecision<fobj>::real_scalar_type fobj_stype; | ||||
|   typedef typename getPrecision<sobj>::real_scalar_type sobj_stype; | ||||
|  | ||||
|   void operator()(fobj &in, sobj &out) { | ||||
|     // take word by word and transform accoding to the status | ||||
|     fobj_stype *in_buffer = (fobj_stype *)∈ | ||||
|     sobj_stype *out_buffer = (sobj_stype *)&out; | ||||
|     size_t fobj_words = sizeof(in) / sizeof(fobj_stype); | ||||
|     size_t sobj_words = sizeof(out) / sizeof(sobj_stype); | ||||
|     assert(fobj_words == sobj_words); | ||||
|      | ||||
|     for (unsigned int word = 0; word < sobj_words; word++) | ||||
|       out_buffer[word] = in_buffer[word];  // type conversion on the fly | ||||
|      | ||||
|   } | ||||
| }; | ||||
|  | ||||
|  | ||||
|     template<class fobj,class sobj> | ||||
|     struct GaugeSimpleMunger{ | ||||
|       void operator()(fobj &in, sobj &out) { | ||||
|         for (int mu = 0; mu < Nd; mu++) { | ||||
|           for (int i = 0; i < Nc; i++) { | ||||
|           for (int j = 0; j < Nc; j++) { | ||||
| 	    out(mu)()(i, j) = in(mu)()(i, j); | ||||
| 	  }} | ||||
|         } | ||||
|       }; | ||||
|     }; | ||||
|  | ||||
|     template <class fobj, class sobj> | ||||
|     struct GaugeSimpleUnmunger { | ||||
|  | ||||
|       void operator()(sobj &in, fobj &out) { | ||||
|         for (int mu = 0; mu < Nd; mu++) { | ||||
|           for (int i = 0; i < Nc; i++) { | ||||
|           for (int j = 0; j < Nc; j++) { | ||||
| 	    out(mu)()(i, j) = in(mu)()(i, j); | ||||
| 	  }} | ||||
|         } | ||||
|       }; | ||||
|     }; | ||||
|  | ||||
|     template<class fobj,class sobj> | ||||
|     struct Gauge3x2munger{ | ||||
|       void operator() (fobj &in,sobj &out){ | ||||
| 	for(int mu=0;mu<Nd;mu++){ | ||||
| 	  for(int i=0;i<2;i++){ | ||||
| 	  for(int j=0;j<3;j++){ | ||||
| 	    out(mu)()(i,j) = in(mu)(i)(j); | ||||
| 	  }} | ||||
| 	} | ||||
| 	reconstruct3(out); | ||||
|       } | ||||
|     }; | ||||
|  | ||||
|     template<class fobj,class sobj> | ||||
|     struct Gauge3x2unmunger{ | ||||
|       void operator() (sobj &in,fobj &out){ | ||||
| 	for(int mu=0;mu<Nd;mu++){ | ||||
| 	  for(int i=0;i<2;i++){ | ||||
| 	  for(int j=0;j<3;j++){ | ||||
| 	    out(mu)(i)(j) = in(mu)()(i,j); | ||||
| 	  }} | ||||
| 	} | ||||
|       } | ||||
|     }; | ||||
|   } | ||||
|  | ||||
|  | ||||
| } | ||||
| @@ -30,182 +30,11 @@ | ||||
| #ifndef GRID_NERSC_IO_H | ||||
| #define GRID_NERSC_IO_H | ||||
|  | ||||
| #include <algorithm> | ||||
| #include <iostream> | ||||
| #include <iomanip> | ||||
| #include <fstream> | ||||
| #include <map> | ||||
|  | ||||
| #include <unistd.h> | ||||
| #include <sys/utsname.h> | ||||
| #include <pwd.h> | ||||
|  | ||||
| namespace Grid { | ||||
|   namespace QCD { | ||||
|  | ||||
|     using namespace Grid; | ||||
|  | ||||
|     //////////////////////////////////////////////////////////////////////////////// | ||||
|     // Some data types for intermediate storage | ||||
|     //////////////////////////////////////////////////////////////////////////////// | ||||
|     template<typename vtype> using iLorentzColour2x3 = iVector<iVector<iVector<vtype, Nc>, 2>, 4 >; | ||||
|  | ||||
|     typedef iLorentzColour2x3<Complex>  LorentzColour2x3; | ||||
|     typedef iLorentzColour2x3<ComplexF> LorentzColour2x3F; | ||||
|     typedef iLorentzColour2x3<ComplexD> LorentzColour2x3D; | ||||
|  | ||||
|     //////////////////////////////////////////////////////////////////////////////// | ||||
|     // header specification/interpretation | ||||
|     //////////////////////////////////////////////////////////////////////////////// | ||||
|     class NerscField { | ||||
|     public: | ||||
|       // header strings (not in order) | ||||
|       int dimension[4]; | ||||
|       std::string boundary[4];  | ||||
|       int data_start; | ||||
|       std::string hdr_version; | ||||
|       std::string storage_format; | ||||
|       // Checks on data | ||||
|       double link_trace; | ||||
|       double plaquette; | ||||
|       uint32_t checksum; | ||||
|       unsigned int sequence_number; | ||||
|       std::string data_type; | ||||
|       std::string ensemble_id ; | ||||
|       std::string ensemble_label ; | ||||
|       std::string creator ; | ||||
|       std::string creator_hardware ; | ||||
|       std::string creation_date ; | ||||
|       std::string archive_date ; | ||||
|       std::string floating_point; | ||||
|     }; | ||||
|  | ||||
|     ////////////////////////////////////////////////////////////////////// | ||||
|     // Bit and Physical Checksumming and QA of data | ||||
|     ////////////////////////////////////////////////////////////////////// | ||||
|  | ||||
|     inline void NerscGrid(GridBase *grid,NerscField &header) | ||||
|     { | ||||
|       assert(grid->_ndimension==4); | ||||
|       for(int d=0;d<4;d++) { | ||||
| 	header.dimension[d] = grid->_fdimensions[d]; | ||||
|       } | ||||
|       for(int d=0;d<4;d++) { | ||||
| 	header.boundary[d] = std::string("PERIODIC"); | ||||
|       } | ||||
|     } | ||||
|     template<class GaugeField> | ||||
|     inline void NerscStatistics(GaugeField & data,NerscField &header) | ||||
|     { | ||||
|       // How to convert data precision etc... | ||||
|       header.link_trace=Grid::QCD::WilsonLoops<PeriodicGimplR>::linkTrace(data); | ||||
|       header.plaquette =Grid::QCD::WilsonLoops<PeriodicGimplR>::avgPlaquette(data); | ||||
|     } | ||||
|  | ||||
|     inline void NerscMachineCharacteristics(NerscField &header) | ||||
|     { | ||||
|       // Who | ||||
|       struct passwd *pw = getpwuid (getuid()); | ||||
|       if (pw) header.creator = std::string(pw->pw_name);  | ||||
|  | ||||
|       // When | ||||
|       std::time_t t = std::time(nullptr); | ||||
|       std::tm tm = *std::localtime(&t); | ||||
|       std::ostringstream oss;  | ||||
|       //  oss << std::put_time(&tm, "%c %Z"); | ||||
|       header.creation_date = oss.str(); | ||||
|       header.archive_date  = header.creation_date; | ||||
|  | ||||
|       // What | ||||
|       struct utsname name;  uname(&name); | ||||
|       header.creator_hardware = std::string(name.nodename)+"-"; | ||||
|       header.creator_hardware+= std::string(name.machine)+"-"; | ||||
|       header.creator_hardware+= std::string(name.sysname)+"-"; | ||||
|       header.creator_hardware+= std::string(name.release); | ||||
|  | ||||
|     } | ||||
|     ////////////////////////////////////////////////////////////////////// | ||||
|     // Utilities ; these are QCD aware | ||||
|     ////////////////////////////////////////////////////////////////////// | ||||
|     inline void NerscChecksum(uint32_t *buf,uint32_t buf_size_bytes,uint32_t &csum) | ||||
|     { | ||||
|       BinaryIO::Uint32Checksum(buf,buf_size_bytes,csum); | ||||
|     } | ||||
|     inline void reconstruct3(LorentzColourMatrix & cm) | ||||
|     { | ||||
|       const int x=0; | ||||
|       const int y=1; | ||||
|       const int z=2; | ||||
|       for(int mu=0;mu<4;mu++){ | ||||
| 	cm(mu)()(2,x) = adj(cm(mu)()(0,y)*cm(mu)()(1,z)-cm(mu)()(0,z)*cm(mu)()(1,y)); //x= yz-zy | ||||
| 	cm(mu)()(2,y) = adj(cm(mu)()(0,z)*cm(mu)()(1,x)-cm(mu)()(0,x)*cm(mu)()(1,z)); //y= zx-xz | ||||
| 	cm(mu)()(2,z) = adj(cm(mu)()(0,x)*cm(mu)()(1,y)-cm(mu)()(0,y)*cm(mu)()(1,x)); //z= xy-yx | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     template<class fobj,class sobj> | ||||
|     struct NerscSimpleMunger{ | ||||
|       void operator()(fobj &in, sobj &out, uint32_t &csum) { | ||||
|         for (int mu = 0; mu < Nd; mu++) { | ||||
|           for (int i = 0; i < Nc; i++) { | ||||
|             for (int j = 0; j < Nc; j++) { | ||||
|               out(mu)()(i, j) = in(mu)()(i, j); | ||||
|             } | ||||
|           } | ||||
|         } | ||||
|         NerscChecksum((uint32_t *)&in, sizeof(in), csum); | ||||
|       }; | ||||
|     }; | ||||
|  | ||||
|     template <class fobj, class sobj> | ||||
|     struct NerscSimpleUnmunger { | ||||
|       void operator()(sobj &in, fobj &out, uint32_t &csum) { | ||||
|         for (int mu = 0; mu < Nd; mu++) { | ||||
|           for (int i = 0; i < Nc; i++) { | ||||
|             for (int j = 0; j < Nc; j++) { | ||||
|               out(mu)()(i, j) = in(mu)()(i, j); | ||||
|             } | ||||
|           } | ||||
|         } | ||||
|         NerscChecksum((uint32_t *)&out, sizeof(out), csum); | ||||
|       }; | ||||
|     }; | ||||
|  | ||||
|     template<class fobj,class sobj> | ||||
|     struct Nersc3x2munger{ | ||||
|       void operator() (fobj &in,sobj &out,uint32_t &csum){ | ||||
|       | ||||
| 	NerscChecksum((uint32_t *)&in,sizeof(in),csum);  | ||||
|  | ||||
| 	for(int mu=0;mu<4;mu++){ | ||||
| 	  for(int i=0;i<2;i++){ | ||||
| 	    for(int j=0;j<3;j++){ | ||||
| 	      out(mu)()(i,j) = in(mu)(i)(j); | ||||
| 	    }} | ||||
| 	} | ||||
| 	reconstruct3(out); | ||||
|       } | ||||
|     }; | ||||
|  | ||||
|     template<class fobj,class sobj> | ||||
|     struct Nersc3x2unmunger{ | ||||
|  | ||||
|       void operator() (sobj &in,fobj &out,uint32_t &csum){ | ||||
|  | ||||
|  | ||||
| 	for(int mu=0;mu<4;mu++){ | ||||
| 	  for(int i=0;i<2;i++){ | ||||
| 	    for(int j=0;j<3;j++){ | ||||
| 	      out(mu)(i)(j) = in(mu)()(i,j); | ||||
| 	    }} | ||||
| 	} | ||||
|  | ||||
| 	NerscChecksum((uint32_t *)&out,sizeof(out),csum);  | ||||
|  | ||||
|       } | ||||
|     }; | ||||
|  | ||||
|  | ||||
|     //////////////////////////////////////////////////////////////////////////////// | ||||
|     // Write and read from fstream; comput header offset for payload | ||||
|     //////////////////////////////////////////////////////////////////////////////// | ||||
| @@ -216,42 +45,17 @@ namespace Grid { | ||||
| 	std::ofstream fout(file,std::ios::out); | ||||
|       } | ||||
|    | ||||
| #define dump_nersc_header(field, s)					\ | ||||
|       s << "BEGIN_HEADER"      << std::endl;				\ | ||||
|       s << "HDR_VERSION = "    << field.hdr_version    << std::endl;	\ | ||||
|       s << "DATATYPE = "       << field.data_type      << std::endl;	\ | ||||
|       s << "STORAGE_FORMAT = " << field.storage_format << std::endl;	\ | ||||
|       for(int i=0;i<4;i++){						\ | ||||
| 	s << "DIMENSION_" << i+1 << " = " << field.dimension[i] << std::endl ; \ | ||||
|       }									\ | ||||
|       s << "LINK_TRACE = " << std::setprecision(10) << field.link_trace << std::endl; \ | ||||
|       s << "PLAQUETTE  = " << std::setprecision(10) << field.plaquette  << std::endl; \ | ||||
|       for(int i=0;i<4;i++){						\ | ||||
| 	s << "BOUNDARY_"<<i+1<<" = " << field.boundary[i] << std::endl;	\ | ||||
|       }									\ | ||||
| 									\ | ||||
|       s << "CHECKSUM = "<< std::hex << std::setw(10) << field.checksum << std::dec<<std::endl; \ | ||||
|       s << "ENSEMBLE_ID = "     << field.ensemble_id      << std::endl;	\ | ||||
|       s << "ENSEMBLE_LABEL = "  << field.ensemble_label   << std::endl;	\ | ||||
|       s << "SEQUENCE_NUMBER = " << field.sequence_number  << std::endl;	\ | ||||
|       s << "CREATOR = "         << field.creator          << std::endl;	\ | ||||
|       s << "CREATOR_HARDWARE = "<< field.creator_hardware << std::endl;	\ | ||||
|       s << "CREATION_DATE = "   << field.creation_date    << std::endl;	\ | ||||
|       s << "ARCHIVE_DATE = "    << field.archive_date     << std::endl;	\ | ||||
|       s << "FLOATING_POINT = "  << field.floating_point   << std::endl;	\ | ||||
|       s << "END_HEADER"         << std::endl; | ||||
|    | ||||
|       static inline unsigned int writeHeader(NerscField &field,std::string file) | ||||
|       static inline unsigned int writeHeader(FieldMetaData &field,std::string file) | ||||
|       { | ||||
|       std::ofstream fout(file,std::ios::out|std::ios::in); | ||||
|       fout.seekp(0,std::ios::beg); | ||||
|       dump_nersc_header(field, fout); | ||||
|       dump_meta_data(field, fout); | ||||
|       field.data_start = fout.tellp(); | ||||
|       return field.data_start; | ||||
|     } | ||||
|  | ||||
|       // for the header-reader | ||||
|       static inline int readHeader(std::string file,GridBase *grid,  NerscField &field) | ||||
|       static inline int readHeader(std::string file,GridBase *grid,  FieldMetaData &field) | ||||
|       { | ||||
|       int offset=0; | ||||
|       std::map<std::string,std::string> header; | ||||
| @@ -323,21 +127,21 @@ namespace Grid { | ||||
|       return field.data_start; | ||||
|     } | ||||
|  | ||||
|       ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|       // Now the meat: the object readers | ||||
|       ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
| #define PARALLEL_READ | ||||
| #define PARALLEL_WRITE | ||||
|     ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|     // Now the meat: the object readers | ||||
|     ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
|  | ||||
|       template<class vsimd> | ||||
|       static inline void readConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu,NerscField& header,std::string file) | ||||
|       { | ||||
|     template<class vsimd> | ||||
|     static inline void readConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu, | ||||
| 					 FieldMetaData& header, | ||||
| 					 std::string file) | ||||
|     { | ||||
|       typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField; | ||||
|  | ||||
|       GridBase *grid = Umu._grid; | ||||
|       int offset = readHeader(file,Umu._grid,header); | ||||
|  | ||||
|       NerscField clone(header); | ||||
|       FieldMetaData clone(header); | ||||
|  | ||||
|       std::string format(header.floating_point); | ||||
|  | ||||
| @@ -346,76 +150,78 @@ namespace Grid { | ||||
|       int ieee64big = (format == std::string("IEEE64BIG")); | ||||
|       int ieee64    = (format == std::string("IEEE64")); | ||||
|  | ||||
|       uint32_t csum; | ||||
|       uint32_t nersc_csum,scidac_csuma,scidac_csumb; | ||||
|       // depending on datatype, set up munger; | ||||
|       // munger is a function of <floating point, Real, data_type> | ||||
|       if ( header.data_type == std::string("4D_SU3_GAUGE") ) { | ||||
|       if ( ieee32 || ieee32big ) { | ||||
| #ifdef PARALLEL_READ | ||||
| 	csum=BinaryIO::readObjectParallel<iLorentzColourMatrix<vsimd>, LorentzColour2x3F>  | ||||
| 	  (Umu,file,Nersc3x2munger<LorentzColour2x3F,LorentzColourMatrix>(), offset,format); | ||||
| #else | ||||
| 	csum=BinaryIO::readObjectSerial<iLorentzColourMatrix<vsimd>, LorentzColour2x3F>  | ||||
| 	  (Umu,file,Nersc3x2munger<LorentzColour2x3F,LorentzColourMatrix>(), offset,format); | ||||
| #endif | ||||
|       } | ||||
|       if ( ieee64 || ieee64big ) { | ||||
| #ifdef PARALLEL_READ | ||||
| 	csum=BinaryIO::readObjectParallel<iLorentzColourMatrix<vsimd>, LorentzColour2x3D>  | ||||
| 	  (Umu,file,Nersc3x2munger<LorentzColour2x3D,LorentzColourMatrix>(),offset,format); | ||||
| #else  | ||||
| 	csum=BinaryIO::readObjectSerial<iLorentzColourMatrix<vsimd>, LorentzColour2x3D>  | ||||
| 	  (Umu,file,Nersc3x2munger<LorentzColour2x3D,LorentzColourMatrix>(),offset,format); | ||||
| #endif | ||||
|       } | ||||
|       } else if ( header.data_type == std::string("4D_SU3_GAUGE_3x3") ) { | ||||
| 	if ( ieee32 || ieee32big ) { | ||||
| #ifdef PARALLEL_READ | ||||
| 	  csum=BinaryIO::readObjectParallel<iLorentzColourMatrix<vsimd>,LorentzColourMatrixF> | ||||
| 	    (Umu,file,NerscSimpleMunger<LorentzColourMatrixF,LorentzColourMatrix>(),offset,format); | ||||
| #else | ||||
| 	  csum=BinaryIO::readObjectSerial<iLorentzColourMatrix<vsimd>,LorentzColourMatrixF> | ||||
| 	    (Umu,file,NerscSimpleMunger<LorentzColourMatrixF,LorentzColourMatrix>(),offset,format); | ||||
| #endif | ||||
| 	  BinaryIO::readLatticeObject<iLorentzColourMatrix<vsimd>, LorentzColour2x3F>  | ||||
| 	    (Umu,file,Gauge3x2munger<LorentzColour2x3F,LorentzColourMatrix>(), offset,format, | ||||
| 	     nersc_csum,scidac_csuma,scidac_csumb); | ||||
| 	} | ||||
| 	if ( ieee64 || ieee64big ) { | ||||
| #ifdef PARALLEL_READ | ||||
| 	  csum=BinaryIO::readObjectParallel<iLorentzColourMatrix<vsimd>,LorentzColourMatrixD> | ||||
| 	    (Umu,file,NerscSimpleMunger<LorentzColourMatrixD,LorentzColourMatrix>(),offset,format); | ||||
| #else | ||||
| 	  csum=BinaryIO::readObjectSerial<iLorentzColourMatrix<vsimd>,LorentzColourMatrixD> | ||||
| 	    (Umu,file,NerscSimpleMunger<LorentzColourMatrixD,LorentzColourMatrix>(),offset,format); | ||||
| #endif | ||||
| 	  BinaryIO::readLatticeObject<iLorentzColourMatrix<vsimd>, LorentzColour2x3D>  | ||||
| 	    (Umu,file,Gauge3x2munger<LorentzColour2x3D,LorentzColourMatrix>(),offset,format, | ||||
| 	     nersc_csum,scidac_csuma,scidac_csumb); | ||||
| 	} | ||||
|       } else if ( header.data_type == std::string("4D_SU3_GAUGE_3x3") ) { | ||||
| 	if ( ieee32 || ieee32big ) { | ||||
| 	  BinaryIO::readLatticeObject<iLorentzColourMatrix<vsimd>,LorentzColourMatrixF> | ||||
| 	    (Umu,file,GaugeSimpleMunger<LorentzColourMatrixF,LorentzColourMatrix>(),offset,format, | ||||
| 	     nersc_csum,scidac_csuma,scidac_csumb); | ||||
| 	} | ||||
| 	if ( ieee64 || ieee64big ) { | ||||
| 	  BinaryIO::readLatticeObject<iLorentzColourMatrix<vsimd>,LorentzColourMatrixD> | ||||
| 	    (Umu,file,GaugeSimpleMunger<LorentzColourMatrixD,LorentzColourMatrix>(),offset,format, | ||||
| 	     nersc_csum,scidac_csuma,scidac_csumb); | ||||
| 	} | ||||
|       } else { | ||||
| 	assert(0); | ||||
|       } | ||||
|  | ||||
|       NerscStatistics<GaugeField>(Umu,clone); | ||||
|       GaugeStatistics(Umu,clone); | ||||
|  | ||||
|       std::cout<<GridLogMessage <<"NERSC Configuration "<<file<<" checksum "<<std::hex<<            csum<< std::dec | ||||
|       std::cout<<GridLogMessage <<"NERSC Configuration "<<file<<" checksum "<<std::hex<<nersc_csum<< std::dec | ||||
| 	       <<" header   "<<std::hex<<header.checksum<<std::dec <<std::endl; | ||||
|       std::cout<<GridLogMessage <<"NERSC Configuration "<<file<<" plaquette "<<clone.plaquette | ||||
| 	       <<" header    "<<header.plaquette<<std::endl; | ||||
|       std::cout<<GridLogMessage <<"NERSC Configuration "<<file<<" link_trace "<<clone.link_trace | ||||
| 	       <<" header    "<<header.link_trace<<std::endl; | ||||
|  | ||||
|       if ( fabs(clone.plaquette -header.plaquette ) >=  1.0e-5 ) {  | ||||
| 	std::cout << " Plaquette mismatch "<<std::endl; | ||||
| 	std::cout << Umu[0]<<std::endl; | ||||
| 	std::cout << Umu[1]<<std::endl; | ||||
|       } | ||||
|       if ( nersc_csum != header.checksum ) {  | ||||
| 	std::cerr << " checksum mismatch " << std::endl; | ||||
| 	std::cerr << " plaqs " << clone.plaquette << " " << header.plaquette << std::endl; | ||||
| 	std::cerr << " trace " << clone.link_trace<< " " << header.link_trace<< std::endl; | ||||
| 	std::cerr << " nersc_csum  " <<std::hex<< nersc_csum << " " << header.checksum<< std::dec<< std::endl; | ||||
| 	exit(0); | ||||
|       } | ||||
|       assert(fabs(clone.plaquette -header.plaquette ) < 1.0e-5 ); | ||||
|       assert(fabs(clone.link_trace-header.link_trace) < 1.0e-6 ); | ||||
|       assert(csum == header.checksum ); | ||||
|  | ||||
|       assert(nersc_csum == header.checksum ); | ||||
|        | ||||
|       std::cout<<GridLogMessage <<"NERSC Configuration "<<file<< " and plaquette, link trace, and checksum agree"<<std::endl; | ||||
|       } | ||||
|     } | ||||
|  | ||||
|       template<class vsimd> | ||||
|       static inline void writeConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu,std::string file, int two_row,int bits32) | ||||
|       static inline void writeConfiguration(Lattice<iLorentzColourMatrix<vsimd> > &Umu, | ||||
| 					    std::string file,  | ||||
| 					    int two_row, | ||||
| 					    int bits32) | ||||
|       { | ||||
| 	typedef Lattice<iLorentzColourMatrix<vsimd> > GaugeField; | ||||
|  | ||||
| 	typedef iLorentzColourMatrix<vsimd> vobj; | ||||
| 	typedef typename vobj::scalar_object sobj; | ||||
|  | ||||
| 	FieldMetaData header; | ||||
| 	/////////////////////////////////////////// | ||||
| 	// Following should become arguments | ||||
| 	NerscField header; | ||||
| 	/////////////////////////////////////////// | ||||
| 	header.sequence_number = 1; | ||||
| 	header.ensemble_id     = "UKQCD"; | ||||
| 	header.ensemble_label  = "DWF"; | ||||
| @@ -425,45 +231,32 @@ namespace Grid { | ||||
|    | ||||
| 	GridBase *grid = Umu._grid; | ||||
|  | ||||
| 	NerscGrid(grid,header); | ||||
| 	NerscStatistics<GaugeField>(Umu,header); | ||||
| 	NerscMachineCharacteristics(header); | ||||
| 	GridMetaData(grid,header); | ||||
| 	assert(header.nd==4); | ||||
| 	GaugeStatistics(Umu,header); | ||||
| 	MachineCharacteristics(header); | ||||
|  | ||||
| 	uint32_t csum; | ||||
| 	int offset; | ||||
|    | ||||
| 	truncate(file); | ||||
|  | ||||
| 	if ( two_row ) {  | ||||
| 	// Sod it -- always write 3x3 double | ||||
| 	header.floating_point = std::string("IEEE64BIG"); | ||||
| 	header.data_type      = std::string("4D_SU3_GAUGE_3x3"); | ||||
| 	GaugeSimpleUnmunger<fobj3D,sobj> munge; | ||||
| 	offset = writeHeader(header,file); | ||||
|  | ||||
| 	  header.floating_point = std::string("IEEE64BIG"); | ||||
| 	  header.data_type      = std::string("4D_SU3_GAUGE"); | ||||
| 	  Nersc3x2unmunger<fobj2D,sobj> munge; | ||||
| 	  BinaryIO::Uint32Checksum<vobj,fobj2D>(Umu, munge,header.checksum); | ||||
| 	  offset = writeHeader(header,file); | ||||
| #ifdef PARALLEL_WRITE | ||||
| 	  csum=BinaryIO::writeObjectParallel<vobj,fobj2D>(Umu,file,munge,offset,header.floating_point); | ||||
| #else | ||||
| 	  csum=BinaryIO::writeObjectSerial<vobj,fobj2D>(Umu,file,munge,offset,header.floating_point); | ||||
| #endif | ||||
| 	} else {  | ||||
| 	  header.floating_point = std::string("IEEE64BIG"); | ||||
| 	  header.data_type      = std::string("4D_SU3_GAUGE_3x3"); | ||||
| 	  NerscSimpleUnmunger<fobj3D,sobj> munge; | ||||
| 	  BinaryIO::Uint32Checksum<vobj,fobj3D>(Umu, munge,header.checksum); | ||||
| 	  offset = writeHeader(header,file); | ||||
| #ifdef PARALLEL_WRITE | ||||
| 	  csum=BinaryIO::writeObjectParallel<vobj,fobj3D>(Umu,file,munge,offset,header.floating_point); | ||||
| #else | ||||
| 	  csum=BinaryIO::writeObjectSerial<vobj,fobj3D>(Umu,file,munge,offset,header.floating_point); | ||||
| #endif | ||||
| 	} | ||||
| 	uint32_t nersc_csum,scidac_csuma,scidac_csumb; | ||||
| 	BinaryIO::writeLatticeObject<vobj,fobj3D>(Umu,file,munge,offset,header.floating_point, | ||||
| 								  nersc_csum,scidac_csuma,scidac_csumb); | ||||
| 	header.checksum = nersc_csum; | ||||
| 	writeHeader(header,file); | ||||
|  | ||||
| 	std::cout<<GridLogMessage <<"Written NERSC Configuration on "<< file << " checksum "<<std::hex<<csum<< std::dec<<" plaq "<< header.plaquette <<std::endl; | ||||
| 	std::cout<<GridLogMessage <<"Written NERSC Configuration on "<< file << " checksum " | ||||
| 		 <<std::hex<<header.checksum | ||||
| 		 <<std::dec<<" plaq "<< header.plaquette <<std::endl; | ||||
|  | ||||
|       } | ||||
|  | ||||
|  | ||||
|       /////////////////////////////// | ||||
|       // RNG state | ||||
|       /////////////////////////////// | ||||
| @@ -472,19 +265,19 @@ namespace Grid { | ||||
| 	typedef typename GridParallelRNG::RngStateType RngStateType; | ||||
|  | ||||
| 	// Following should become arguments | ||||
| 	NerscField header; | ||||
| 	FieldMetaData header; | ||||
| 	header.sequence_number = 1; | ||||
| 	header.ensemble_id     = "UKQCD"; | ||||
| 	header.ensemble_label  = "DWF"; | ||||
|  | ||||
| 	GridBase *grid = parallel._grid; | ||||
|  | ||||
| 	NerscGrid(grid,header); | ||||
| 	GridMetaData(grid,header); | ||||
| 	assert(header.nd==4); | ||||
| 	header.link_trace=0.0; | ||||
| 	header.plaquette=0.0; | ||||
| 	NerscMachineCharacteristics(header); | ||||
| 	MachineCharacteristics(header); | ||||
|  | ||||
| 	uint32_t csum; | ||||
| 	int offset; | ||||
|    | ||||
| #ifdef RNG_RANLUX | ||||
| @@ -502,15 +295,19 @@ namespace Grid { | ||||
|  | ||||
| 	truncate(file); | ||||
| 	offset = writeHeader(header,file); | ||||
| 	csum=BinaryIO::writeRNGSerial(serial,parallel,file,offset); | ||||
| 	header.checksum = csum; | ||||
| 	uint32_t nersc_csum,scidac_csuma,scidac_csumb; | ||||
| 	BinaryIO::writeRNG(serial,parallel,file,offset,nersc_csum,scidac_csuma,scidac_csumb); | ||||
| 	header.checksum = nersc_csum; | ||||
| 	offset = writeHeader(header,file); | ||||
|  | ||||
| 	std::cout<<GridLogMessage <<"Written NERSC RNG STATE "<<file<< " checksum "<<std::hex<<csum<<std::dec<<std::endl; | ||||
| 	std::cout<<GridLogMessage  | ||||
| 		 <<"Written NERSC RNG STATE "<<file<< " checksum " | ||||
| 		 <<std::hex<<header.checksum | ||||
| 		 <<std::dec<<std::endl; | ||||
|  | ||||
|       } | ||||
|      | ||||
|       static inline void readRNGState(GridSerialRNG &serial,GridParallelRNG & parallel,NerscField& header,std::string file) | ||||
|       static inline void readRNGState(GridSerialRNG &serial,GridParallelRNG & parallel,FieldMetaData& header,std::string file) | ||||
|       { | ||||
| 	typedef typename GridParallelRNG::RngStateType RngStateType; | ||||
|  | ||||
| @@ -518,7 +315,7 @@ namespace Grid { | ||||
|  | ||||
| 	int offset = readHeader(file,grid,header); | ||||
|  | ||||
| 	NerscField clone(header); | ||||
| 	FieldMetaData clone(header); | ||||
|  | ||||
| 	std::string format(header.floating_point); | ||||
| 	std::string data_type(header.data_type); | ||||
| @@ -538,15 +335,19 @@ namespace Grid { | ||||
|  | ||||
| 	// depending on datatype, set up munger; | ||||
| 	// munger is a function of <floating point, Real, data_type> | ||||
| 	uint32_t csum=BinaryIO::readRNGSerial(serial,parallel,file,offset); | ||||
| 	uint32_t nersc_csum,scidac_csuma,scidac_csumb; | ||||
| 	BinaryIO::readRNG(serial,parallel,file,offset,nersc_csum,scidac_csuma,scidac_csumb); | ||||
|  | ||||
| 	assert(csum == header.checksum ); | ||||
| 	if ( nersc_csum != header.checksum ) {  | ||||
| 	  std::cerr << "checksum mismatch "<<std::hex<< nersc_csum <<" "<<header.checksum<<std::dec<<std::endl; | ||||
| 	  exit(0); | ||||
| 	} | ||||
| 	assert(nersc_csum == header.checksum ); | ||||
|  | ||||
| 	std::cout<<GridLogMessage <<"Read NERSC RNG file "<<file<< " format "<< data_type <<std::endl; | ||||
|       } | ||||
|  | ||||
|     }; | ||||
|  | ||||
|  | ||||
|   }} | ||||
| #endif | ||||
|   | ||||
| @@ -644,19 +644,16 @@ class StaggeredImpl : public PeriodicGaugeImpl<GaugeImplTypes<S, Representation: | ||||
|  | ||||
|     INHERIT_GIMPL_TYPES(Gimpl); | ||||
|        | ||||
|     template <typename vtype> using iImplScalar            = iScalar<iScalar<iScalar<vtype> > >; | ||||
|     template <typename vtype> using iImplSpinor            = iScalar<iScalar<iVector<vtype, Dimension> > >; | ||||
|     template <typename vtype> using iImplHalfSpinor        = iScalar<iScalar<iVector<vtype, Dimension> > >; | ||||
|     template <typename vtype> using iImplDoubledGaugeField = iVector<iScalar<iMatrix<vtype, Dimension> >, Nds>; | ||||
|     template <typename vtype> using iImplPropagator        = iScalar<iScalar<iMatrix<vtype, Dimension> > >; | ||||
|      | ||||
|     typedef iImplScalar<Simd>            SiteComplex; | ||||
|     typedef iImplSpinor<Simd>            SiteSpinor; | ||||
|     typedef iImplHalfSpinor<Simd>        SiteHalfSpinor; | ||||
|     typedef iImplDoubledGaugeField<Simd> SiteDoubledGaugeField; | ||||
|     typedef iImplPropagator<Simd>        SitePropagator; | ||||
|      | ||||
|     typedef Lattice<SiteComplex>           ComplexField; | ||||
|     typedef Lattice<SiteSpinor>            FermionField; | ||||
|     typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField; | ||||
|     typedef Lattice<SitePropagator> PropagatorField; | ||||
| @@ -775,7 +772,6 @@ class StaggeredImpl : public PeriodicGaugeImpl<GaugeImplTypes<S, Representation: | ||||
|  | ||||
|     INHERIT_GIMPL_TYPES(Gimpl); | ||||
|  | ||||
|     template <typename vtype> using iImplScalar            = iScalar<iScalar<iScalar<vtype> > >; | ||||
|     template <typename vtype> using iImplSpinor            = iScalar<iScalar<iVector<vtype, Dimension> > >; | ||||
|     template <typename vtype> using iImplHalfSpinor        = iScalar<iScalar<iVector<vtype, Dimension> > >; | ||||
|     template <typename vtype> using iImplDoubledGaugeField = iVector<iScalar<iMatrix<vtype, Dimension> >, Nds>; | ||||
| @@ -792,12 +788,10 @@ class StaggeredImpl : public PeriodicGaugeImpl<GaugeImplTypes<S, Representation: | ||||
|     typedef Lattice<SiteDoubledGaugeField> DoubledGaugeField; | ||||
|     typedef Lattice<SitePropagator> PropagatorField; | ||||
|      | ||||
|     typedef iImplScalar<Simd>            SiteComplex; | ||||
|     typedef iImplSpinor<Simd>            SiteSpinor; | ||||
|     typedef iImplHalfSpinor<Simd>        SiteHalfSpinor; | ||||
|  | ||||
|      | ||||
|     typedef Lattice<SiteComplex>           ComplexField; | ||||
|     typedef Lattice<SiteSpinor>            FermionField; | ||||
|      | ||||
|     typedef SimpleCompressor<SiteSpinor> Compressor; | ||||
|   | ||||
| @@ -40,12 +40,15 @@ namespace QCD { | ||||
|   typedef typename GImpl::Simd Simd;                \ | ||||
|   typedef typename GImpl::LinkField GaugeLinkField; \ | ||||
|   typedef typename GImpl::Field GaugeField;         \ | ||||
|   typedef typename GImpl::ComplexField ComplexField;\ | ||||
|   typedef typename GImpl::SiteField SiteGaugeField; \ | ||||
|   typedef typename GImpl::SiteComplex SiteComplex;  \ | ||||
|   typedef typename GImpl::SiteLink SiteGaugeLink; | ||||
|  | ||||
| #define INHERIT_FIELD_TYPES(Impl)             \ | ||||
|   typedef typename Impl::Simd Simd;           \ | ||||
|   typedef typename Impl::SiteField SiteField; \ | ||||
| #define INHERIT_FIELD_TYPES(Impl)		    \ | ||||
|   typedef typename Impl::Simd Simd;		    \ | ||||
|   typedef typename Impl::ComplexField ComplexField; \ | ||||
|   typedef typename Impl::SiteField SiteField;	    \ | ||||
|   typedef typename Impl::Field Field; | ||||
|  | ||||
| // hardcodes the exponential approximation in the template | ||||
| @@ -53,14 +56,17 @@ template <class S, int Nrepresentation = Nc, int Nexp = 12 > class GaugeImplType | ||||
| public: | ||||
|   typedef S Simd; | ||||
|  | ||||
|   template <typename vtype> using iImplGaugeLink  = iScalar<iScalar<iMatrix<vtype, Nrepresentation>>>; | ||||
|   template <typename vtype> using iImplGaugeField = iVector<iScalar<iMatrix<vtype, Nrepresentation>>, Nd>; | ||||
|   template <typename vtype> using iImplScalar     = iScalar<iScalar<iScalar<vtype> > >; | ||||
|   template <typename vtype> using iImplGaugeLink  = iScalar<iScalar<iMatrix<vtype, Nrepresentation> > >; | ||||
|   template <typename vtype> using iImplGaugeField = iVector<iScalar<iMatrix<vtype, Nrepresentation> >, Nd>; | ||||
|  | ||||
|   typedef iImplScalar<Simd>     SiteComplex; | ||||
|   typedef iImplGaugeLink<Simd>  SiteLink; | ||||
|   typedef iImplGaugeField<Simd> SiteField; | ||||
|  | ||||
|   typedef Lattice<SiteLink>  LinkField;  | ||||
|   typedef Lattice<SiteField> Field; | ||||
|   typedef Lattice<SiteComplex> ComplexField; | ||||
|   typedef Lattice<SiteLink>    LinkField;  | ||||
|   typedef Lattice<SiteField>   Field; | ||||
|  | ||||
|   // Guido: we can probably separate the types from the HMC functions | ||||
|   // this will create 2 kind of implementations | ||||
| @@ -80,7 +86,7 @@ public: | ||||
|  | ||||
|   /////////////////////////////////////////////////////////// | ||||
|   // Move these to another class | ||||
|   // HMC auxiliary functions  | ||||
|   // HMC auxiliary functions | ||||
|   static inline void generate_momenta(Field &P, GridParallelRNG &pRNG) { | ||||
|     // specific for SU gauge fields | ||||
|     LinkField Pmu(P._grid); | ||||
| @@ -92,14 +98,19 @@ public: | ||||
|   } | ||||
|  | ||||
|   static inline Field projectForce(Field &P) { return Ta(P); } | ||||
|    | ||||
|  | ||||
|   static inline void update_field(Field& P, Field& U, double ep){ | ||||
|     for (int mu = 0; mu < Nd; mu++) { | ||||
|       auto Umu = PeekIndex<LorentzIndex>(U, mu); | ||||
|       auto Pmu = PeekIndex<LorentzIndex>(P, mu); | ||||
|       Umu = expMat(Pmu, ep, Nexp) * Umu; | ||||
|       PokeIndex<LorentzIndex>(U, ProjectOnGroup(Umu), mu); | ||||
|     //static std::chrono::duration<double> diff; | ||||
|  | ||||
|     //auto start = std::chrono::high_resolution_clock::now(); | ||||
|     parallel_for(int ss=0;ss<P._grid->oSites();ss++){ | ||||
|       for (int mu = 0; mu < Nd; mu++)  | ||||
|         U[ss]._internal[mu] = ProjectOnGroup(Exponentiate(P[ss]._internal[mu], ep, Nexp) * U[ss]._internal[mu]); | ||||
|     } | ||||
|      | ||||
|     //auto end = std::chrono::high_resolution_clock::now(); | ||||
|    // diff += end - start; | ||||
|    // std::cout << "Time to exponentiate matrix " << diff.count() << " s\n"; | ||||
|   } | ||||
|  | ||||
|   static inline RealD FieldSquareNorm(Field& U){ | ||||
|   | ||||
| @@ -71,14 +71,18 @@ class WilsonGaugeAction : public Action<typename Gimpl::GaugeField> { | ||||
|  | ||||
|     RealD factor = 0.5 * beta / RealD(Nc); | ||||
|  | ||||
|     GaugeLinkField Umu(U._grid); | ||||
|     //GaugeLinkField Umu(U._grid); | ||||
|     GaugeLinkField dSdU_mu(U._grid); | ||||
|     for (int mu = 0; mu < Nd; mu++) { | ||||
|       Umu = PeekIndex<LorentzIndex>(U, mu); | ||||
|       //Umu = PeekIndex<LorentzIndex>(U, mu); | ||||
|  | ||||
|       // Staple in direction mu | ||||
|       WilsonLoops<Gimpl>::Staple(dSdU_mu, U, mu); | ||||
|       dSdU_mu = Ta(Umu * dSdU_mu) * factor; | ||||
|       //WilsonLoops<Gimpl>::Staple(dSdU_mu, U, mu); | ||||
|       //dSdU_mu = Ta(Umu * dSdU_mu) * factor; | ||||
|  | ||||
|    | ||||
|       WilsonLoops<Gimpl>::StapleMult(dSdU_mu, U, mu); | ||||
|       dSdU_mu = Ta(dSdU_mu) * factor; | ||||
|  | ||||
|       PokeIndex<LorentzIndex>(dSdU, dSdU_mu, mu); | ||||
|     } | ||||
|   | ||||
| @@ -15,6 +15,8 @@ namespace Grid { | ||||
|      | ||||
|     typedef iImplField<Simd> SiteField; | ||||
|      | ||||
|     template <typename vtype> using iImplScalar= iScalar<iScalar<iScalar<vtype   > > >; | ||||
|     typedef iImplScalar<Simd> ComplexField; | ||||
|      | ||||
|     typedef Lattice<SiteField> Field; | ||||
|      | ||||
| @@ -51,13 +53,14 @@ namespace Grid { | ||||
|   public: | ||||
|     typedef S Simd; | ||||
|      | ||||
|     template <typename vtype> | ||||
|     using iImplField = iScalar<iScalar<iMatrix<vtype, N> > >; | ||||
|      | ||||
|     template <typename vtype> using iImplField = iScalar<iScalar<iMatrix<vtype, N> > >; | ||||
|  | ||||
|     typedef iImplField<Simd> SiteField; | ||||
|      | ||||
|      | ||||
|     typedef Lattice<SiteField> Field; | ||||
|  | ||||
|     template <typename vtype> using iImplScalar= iScalar<iScalar<iScalar<vtype   > > >; | ||||
|     typedef iImplScalar<Simd> ComplexField; | ||||
|      | ||||
|      | ||||
|     static inline void generate_momenta(Field& P, GridParallelRNG& pRNG){ | ||||
|       gaussian(pRNG, P); | ||||
|   | ||||
| @@ -62,36 +62,50 @@ class BinaryHmcCheckpointer : public BaseHmcCheckpointer<Impl> { | ||||
|     fout.close(); | ||||
|   } | ||||
|  | ||||
|   void TrajectoryComplete(int traj, Field &U, GridSerialRNG &sRNG, | ||||
|                           GridParallelRNG &pRNG) { | ||||
|   void TrajectoryComplete(int traj, Field &U, GridSerialRNG &sRNG, GridParallelRNG &pRNG) { | ||||
|  | ||||
|     if ((traj % Params.saveInterval) == 0) { | ||||
|       std::string config, rng; | ||||
|       this->build_filenames(traj, Params, config, rng); | ||||
|  | ||||
|       BinaryIO::BinarySimpleUnmunger<sobj_double, sobj> munge; | ||||
|       uint32_t nersc_csum; | ||||
|       uint32_t scidac_csuma; | ||||
|       uint32_t scidac_csumb; | ||||
|        | ||||
|       BinarySimpleUnmunger<sobj_double, sobj> munge; | ||||
|       truncate(rng); | ||||
|       BinaryIO::writeRNGSerial(sRNG, pRNG, rng, 0); | ||||
|       BinaryIO::writeRNG(sRNG, pRNG, rng, 0,nersc_csum,scidac_csuma,scidac_csumb); | ||||
|       truncate(config); | ||||
|       uint32_t csum = BinaryIO::writeObjectParallel<vobj, sobj_double>( | ||||
|           U, config, munge, 0, Params.format); | ||||
|  | ||||
|       BinaryIO::writeLatticeObject<vobj, sobj_double>(U, config, munge, 0, Params.format, | ||||
| 						      nersc_csum,scidac_csuma,scidac_csumb); | ||||
|  | ||||
|       std::cout << GridLogMessage << "Written Binary Configuration " << config | ||||
|                 << " checksum " << std::hex << csum << std::dec << std::endl; | ||||
|                 << " checksum " << std::hex  | ||||
| 		<< nersc_csum   <<"/" | ||||
| 		<< scidac_csuma   <<"/" | ||||
| 		<< scidac_csumb  | ||||
| 		<< std::dec << std::endl; | ||||
|     } | ||||
|  | ||||
|   }; | ||||
|  | ||||
|   void CheckpointRestore(int traj, Field &U, GridSerialRNG &sRNG, | ||||
|                          GridParallelRNG &pRNG) { | ||||
|   void CheckpointRestore(int traj, Field &U, GridSerialRNG &sRNG, GridParallelRNG &pRNG) { | ||||
|     std::string config, rng; | ||||
|     this->build_filenames(traj, Params, config, rng); | ||||
|  | ||||
|     BinaryIO::BinarySimpleMunger<sobj_double, sobj> munge; | ||||
|     BinaryIO::readRNGSerial(sRNG, pRNG, rng, 0); | ||||
|     uint32_t csum = BinaryIO::readObjectParallel<vobj, sobj_double>( | ||||
|         U, config, munge, 0, Params.format); | ||||
|     BinarySimpleMunger<sobj_double, sobj> munge; | ||||
|  | ||||
|     uint32_t nersc_csum; | ||||
|     uint32_t scidac_csuma; | ||||
|     uint32_t scidac_csumb; | ||||
|     BinaryIO::readRNG(sRNG, pRNG, rng, 0,nersc_csum,scidac_csuma,scidac_csumb); | ||||
|     BinaryIO::readLatticeObject<vobj, sobj_double>(U, config, munge, 0, Params.format, | ||||
| 						   nersc_csum,scidac_csuma,scidac_csumb); | ||||
|      | ||||
|     std::cout << GridLogMessage << "Read Binary Configuration " << config | ||||
|               << " checksum " << std::hex << csum << std::dec << std::endl; | ||||
|               << " checksums " << std::hex << nersc_csum<<"/"<<scidac_csuma<<"/"<<scidac_csumb  | ||||
| 	      << std::dec << std::endl; | ||||
|   }; | ||||
| }; | ||||
| } | ||||
|   | ||||
| @@ -54,9 +54,9 @@ class ILDGHmcCheckpointer : public BaseHmcCheckpointer<Implementation> { | ||||
|  | ||||
|     // check here that the format is valid | ||||
|     int ieee32big = (Params.format == std::string("IEEE32BIG")); | ||||
|     int ieee32 = (Params.format == std::string("IEEE32")); | ||||
|     int ieee32    = (Params.format == std::string("IEEE32")); | ||||
|     int ieee64big = (Params.format == std::string("IEEE64BIG")); | ||||
|     int ieee64 = (Params.format == std::string("IEEE64")); | ||||
|     int ieee64    = (Params.format == std::string("IEEE64")); | ||||
|  | ||||
|     if (!(ieee64big || ieee32 || ieee32big || ieee64)) { | ||||
|       std::cout << GridLogError << "Unrecognized file format " << Params.format | ||||
| @@ -74,13 +74,20 @@ class ILDGHmcCheckpointer : public BaseHmcCheckpointer<Implementation> { | ||||
|     if ((traj % Params.saveInterval) == 0) { | ||||
|       std::string config, rng; | ||||
|       this->build_filenames(traj, Params, config, rng); | ||||
|  | ||||
|       ILDGIO IO(config, ILDGwrite); | ||||
|       BinaryIO::writeRNGSerial(sRNG, pRNG, rng, 0); | ||||
|       uint32_t csum = IO.writeConfiguration(U, Params.format); | ||||
|        | ||||
|       uint32_t nersc_csum,scidac_csuma,scidac_csumb; | ||||
|       BinaryIO::writeRNG(sRNG, pRNG, rng, 0,nersc_csum,scidac_csuma,scidac_csumb); | ||||
|       IldgWriter _IldgWriter; | ||||
|       _IldgWriter.open(config); | ||||
|       _IldgWriter.writeConfiguration(U, traj, config, config); | ||||
|       _IldgWriter.close(); | ||||
|  | ||||
|       std::cout << GridLogMessage << "Written ILDG Configuration on " << config | ||||
|                 << " checksum " << std::hex << csum << std::dec << std::endl; | ||||
|                 << " checksum " << std::hex  | ||||
| 		<< nersc_csum<<"/" | ||||
| 		<< scidac_csuma<<"/" | ||||
| 		<< scidac_csumb | ||||
| 		<< std::dec << std::endl; | ||||
|     } | ||||
|   }; | ||||
|  | ||||
| @@ -89,12 +96,21 @@ class ILDGHmcCheckpointer : public BaseHmcCheckpointer<Implementation> { | ||||
|     std::string config, rng; | ||||
|     this->build_filenames(traj, Params, config, rng); | ||||
|  | ||||
|     ILDGIO IO(config, ILDGread); | ||||
|     BinaryIO::readRNGSerial(sRNG, pRNG, rng, 0); | ||||
|     uint32_t csum = IO.readConfiguration(U);  // format from the header | ||||
|     uint32_t nersc_csum,scidac_csuma,scidac_csumb; | ||||
|     BinaryIO::readRNG(sRNG, pRNG, rng, 0,nersc_csum,scidac_csuma,scidac_csumb); | ||||
|  | ||||
|     FieldMetaData header; | ||||
|     IldgReader _IldgReader; | ||||
|     _IldgReader.open(config); | ||||
|     _IldgReader.readConfiguration(U,header);  // format from the header | ||||
|     _IldgReader.close(); | ||||
|  | ||||
|     std::cout << GridLogMessage << "Read ILDG Configuration from " << config | ||||
|               << " checksum " << std::hex << csum << std::dec << std::endl; | ||||
|               << " checksum " << std::hex  | ||||
| 	      << nersc_csum<<"/" | ||||
| 	      << scidac_csuma<<"/" | ||||
| 	      << scidac_csumb | ||||
| 	      << std::dec << std::endl; | ||||
|   }; | ||||
| }; | ||||
| } | ||||
|   | ||||
| @@ -70,7 +70,7 @@ class NerscHmcCheckpointer : public BaseHmcCheckpointer<Gimpl> { | ||||
|     std::string config, rng; | ||||
|     this->build_filenames(traj, Params, config, rng); | ||||
|  | ||||
|     NerscField header; | ||||
|     FieldMetaData header; | ||||
|     NerscIO::readRNGState(sRNG, pRNG, header, rng); | ||||
|     NerscIO::readConfiguration(U, header, config); | ||||
|   }; | ||||
|   | ||||
| @@ -58,6 +58,8 @@ class Smear_Stout : public Smear<Gimpl> { | ||||
|     SmearBase->smear(C, U); | ||||
|   }; | ||||
|  | ||||
|  | ||||
|   // Repetion of code here (use the Tensor_exp.h function) | ||||
|   void exponentiate_iQ(GaugeLinkField& e_iQ, const GaugeLinkField& iQ) const { | ||||
|     // Put this outside | ||||
|     // only valid for SU(3) matrices | ||||
|   | ||||
| @@ -36,20 +36,23 @@ namespace QCD { | ||||
| template <class Gimpl> | ||||
| class WilsonFlow: public Smear<Gimpl>{ | ||||
|     unsigned int Nstep; | ||||
|     RealD epsilon; | ||||
|     unsigned int measure_interval; | ||||
|     mutable RealD epsilon, taus; | ||||
|  | ||||
|  | ||||
|     mutable WilsonGaugeAction<Gimpl> SG; | ||||
|  | ||||
|     void evolve_step(typename Gimpl::GaugeField&) const; | ||||
|     void evolve_step_adaptive(typename Gimpl::GaugeField&, RealD); | ||||
|     RealD tau(unsigned int t)const {return epsilon*(t+1.0); } | ||||
|  | ||||
|  | ||||
|  public: | ||||
|     INHERIT_GIMPL_TYPES(Gimpl) | ||||
|  | ||||
|     explicit WilsonFlow(unsigned int Nstep, RealD epsilon): | ||||
|     explicit WilsonFlow(unsigned int Nstep, RealD epsilon, unsigned int interval = 1): | ||||
|         Nstep(Nstep), | ||||
|         epsilon(epsilon), | ||||
|         measure_interval(interval), | ||||
|         SG(WilsonGaugeAction<Gimpl>(3.0)) { | ||||
|             // WilsonGaugeAction with beta 3.0 | ||||
|             assert(epsilon > 0.0); | ||||
| @@ -72,7 +75,9 @@ class WilsonFlow: public Smear<Gimpl>{ | ||||
|         // undefined for WilsonFlow | ||||
|     } | ||||
|  | ||||
|     void smear_adaptive(GaugeField&, const GaugeField&, RealD maxTau); | ||||
|     RealD energyDensityPlaquette(unsigned int step, const GaugeField& U) const; | ||||
|     RealD energyDensityPlaquette(const GaugeField& U) const; | ||||
| }; | ||||
|  | ||||
|  | ||||
| @@ -98,23 +103,111 @@ void WilsonFlow<Gimpl>::evolve_step(typename Gimpl::GaugeField &U) const{ | ||||
|     Gimpl::update_field(Z, U, -2.0*epsilon);    // V(t+e) = exp(ep*Z)*W2 | ||||
| } | ||||
|  | ||||
| template <class Gimpl> | ||||
| void WilsonFlow<Gimpl>::evolve_step_adaptive(typename Gimpl::GaugeField &U, RealD maxTau) { | ||||
|     if (maxTau - taus < epsilon){ | ||||
|         epsilon = maxTau-taus; | ||||
|     } | ||||
|     std::cout << GridLogMessage << "Integration epsilon : " << epsilon << std::endl; | ||||
|     GaugeField Z(U._grid); | ||||
|     GaugeField Zprime(U._grid); | ||||
|     GaugeField tmp(U._grid), Uprime(U._grid); | ||||
|     Uprime = U; | ||||
|     SG.deriv(U, Z); | ||||
|     Zprime = -Z; | ||||
|     Z *= 0.25;                                  // Z0 = 1/4 * F(U) | ||||
|     Gimpl::update_field(Z, U, -2.0*epsilon);    // U = W1 = exp(ep*Z0)*W0 | ||||
|  | ||||
|     Z *= -17.0/8.0; | ||||
|     SG.deriv(U, tmp); Z += tmp;                 // -17/32*Z0 +Z1 | ||||
|     Zprime += 2.0*tmp; | ||||
|     Z *= 8.0/9.0;                               // Z = -17/36*Z0 +8/9*Z1 | ||||
|     Gimpl::update_field(Z, U, -2.0*epsilon);    // U_= W2 = exp(ep*Z)*W1 | ||||
|      | ||||
|  | ||||
|     Z *= -4.0/3.0; | ||||
|     SG.deriv(U, tmp); Z += tmp;                 // 4/3*(17/36*Z0 -8/9*Z1) +Z2 | ||||
|     Z *= 3.0/4.0;                               // Z = 17/36*Z0 -8/9*Z1 +3/4*Z2 | ||||
|     Gimpl::update_field(Z, U, -2.0*epsilon);    // V(t+e) = exp(ep*Z)*W2 | ||||
|  | ||||
|     // Ramos  | ||||
|     Gimpl::update_field(Zprime, Uprime, -2.0*epsilon); // V'(t+e) = exp(ep*Z')*W0 | ||||
|     // Compute distance as norm^2 of the difference | ||||
|     GaugeField diffU = U - Uprime; | ||||
|     RealD diff = norm2(diffU); | ||||
|     // adjust integration step | ||||
|      | ||||
|     taus += epsilon; | ||||
|     std::cout << GridLogMessage << "Adjusting integration step with distance: " << diff << std::endl; | ||||
|      | ||||
|     epsilon = epsilon*0.95*std::pow(1e-4/diff,1./3.); | ||||
|     std::cout << GridLogMessage << "New epsilon : " << epsilon << std::endl; | ||||
|  | ||||
| } | ||||
|  | ||||
| template <class Gimpl> | ||||
| RealD WilsonFlow<Gimpl>::energyDensityPlaquette(unsigned int step, const GaugeField& U) const { | ||||
|     RealD td = tau(step); | ||||
|     return 2.0 * td * td * SG.S(U)/U._grid->gSites(); | ||||
| } | ||||
|  | ||||
| template <class Gimpl> | ||||
| RealD WilsonFlow<Gimpl>::energyDensityPlaquette(const GaugeField& U) const { | ||||
|     return 2.0 * taus * taus * SG.S(U)/U._grid->gSites(); | ||||
| } | ||||
|  | ||||
|  | ||||
| //#define WF_TIMING  | ||||
|  | ||||
|  | ||||
|  | ||||
| template <class Gimpl> | ||||
| void WilsonFlow<Gimpl>::smear(GaugeField& out, const GaugeField& in) const { | ||||
|     out = in; | ||||
|     for (unsigned int step = 0; step < Nstep; step++) { | ||||
|     for (unsigned int step = 1; step <= Nstep; step++) { | ||||
|         auto start = std::chrono::high_resolution_clock::now(); | ||||
|         std::cout << GridLogMessage << "Evolution time :"<< tau(step) << std::endl; | ||||
|         evolve_step(out); | ||||
|         auto end = std::chrono::high_resolution_clock::now(); | ||||
|         std::chrono::duration<double> diff = end - start; | ||||
|         #ifdef WF_TIMING | ||||
|         std::cout << "Time to evolve " << diff.count() << " s\n"; | ||||
|         #endif | ||||
|         std::cout << GridLogMessage << "[WilsonFlow] Energy density (plaq) : " | ||||
|             << step << " " | ||||
|             << step << "  " | ||||
|             << energyDensityPlaquette(step,out) << std::endl; | ||||
|          if( step % measure_interval == 0){ | ||||
|          std::cout << GridLogMessage << "[WilsonFlow] Top. charge           : " | ||||
|             << step << "  "  | ||||
|             << WilsonLoops<PeriodicGimplR>::TopologicalCharge(out) << std::endl; | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| template <class Gimpl> | ||||
| void WilsonFlow<Gimpl>::smear_adaptive(GaugeField& out, const GaugeField& in, RealD maxTau){ | ||||
|     out = in; | ||||
|     taus = epsilon; | ||||
|     unsigned int step = 0; | ||||
|     do{ | ||||
|         step++; | ||||
|         std::cout << GridLogMessage << "Evolution time :"<< taus << std::endl; | ||||
|         evolve_step_adaptive(out, maxTau); | ||||
|         std::cout << GridLogMessage << "[WilsonFlow] Energy density (plaq) : " | ||||
|             << step << "  " | ||||
|             << energyDensityPlaquette(out) << std::endl; | ||||
|          if( step % measure_interval == 0){ | ||||
|          std::cout << GridLogMessage << "[WilsonFlow] Top. charge           : " | ||||
|             << step << "  "  | ||||
|             << WilsonLoops<PeriodicGimplR>::TopologicalCharge(out) << std::endl; | ||||
|         } | ||||
|     } while (taus < maxTau); | ||||
|  | ||||
|  | ||||
|  | ||||
| } | ||||
|  | ||||
|  | ||||
| }  // namespace QCD | ||||
| }  // namespace Grid | ||||
|  | ||||
|   | ||||
							
								
								
									
										188
									
								
								lib/qcd/utils/GaugeFix.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										188
									
								
								lib/qcd/utils/GaugeFix.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,188 @@ | ||||
|     /************************************************************************************* | ||||
|  | ||||
|     grid` physics library, www.github.com/paboyle/Grid  | ||||
|  | ||||
|     Copyright (C) 2015 | ||||
|  | ||||
| Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk> | ||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
|  | ||||
|     This program is free software; you can redistribute it and/or modify | ||||
|     it under the terms of the GNU General Public License as published by | ||||
|     the Free Software Foundation; either version 2 of the License, or | ||||
|     (at your option) any later version. | ||||
|  | ||||
|     This program is distributed in the hope that it will be useful, | ||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|     GNU General Public License for more details. | ||||
|  | ||||
|     You should have received a copy of the GNU General Public License along | ||||
|     with this program; if not, write to the Free Software Foundation, Inc., | ||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
|     *************************************************************************************/ | ||||
|     /*  END LEGAL */ | ||||
| //#include <Grid/Grid.h> | ||||
|  | ||||
| using namespace Grid; | ||||
| using namespace Grid::QCD; | ||||
|  | ||||
| template <class Gimpl>  | ||||
| class FourierAcceleratedGaugeFixer  : public Gimpl { | ||||
|   public: | ||||
|   INHERIT_GIMPL_TYPES(Gimpl); | ||||
|  | ||||
|   typedef typename Gimpl::GaugeLinkField GaugeMat; | ||||
|   typedef typename Gimpl::GaugeField GaugeLorentz; | ||||
|  | ||||
|   static void GaugeLinkToLieAlgebraField(const std::vector<GaugeMat> &U,std::vector<GaugeMat> &A) { | ||||
|     for(int mu=0;mu<Nd;mu++){ | ||||
|       Complex cmi(0.0,-1.0); | ||||
|       A[mu] = Ta(U[mu]) * cmi; | ||||
|     } | ||||
|   } | ||||
|   static void DmuAmu(const std::vector<GaugeMat> &A,GaugeMat &dmuAmu) { | ||||
|     dmuAmu=zero; | ||||
|     for(int mu=0;mu<Nd;mu++){ | ||||
|       dmuAmu = dmuAmu + A[mu] - Cshift(A[mu],mu,-1); | ||||
|     } | ||||
|   }   | ||||
|   static void SteepestDescentGaugeFix(GaugeLorentz &Umu,Real & alpha,int maxiter,Real Omega_tol, Real Phi_tol,bool Fourier=false) { | ||||
|     GridBase *grid = Umu._grid; | ||||
|  | ||||
|     Real org_plaq      =WilsonLoops<Gimpl>::avgPlaquette(Umu); | ||||
|     Real org_link_trace=WilsonLoops<Gimpl>::linkTrace(Umu);  | ||||
|     Real old_trace = org_link_trace; | ||||
|     Real trG; | ||||
|  | ||||
|     std::vector<GaugeMat> U(Nd,grid); | ||||
|                  GaugeMat dmuAmu(grid); | ||||
|  | ||||
|     for(int i=0;i<maxiter;i++){ | ||||
|       for(int mu=0;mu<Nd;mu++) U[mu]= PeekIndex<LorentzIndex>(Umu,mu); | ||||
|       if ( Fourier==false ) {  | ||||
| 	trG = SteepestDescentStep(U,alpha,dmuAmu); | ||||
|       } else {  | ||||
| 	trG = FourierAccelSteepestDescentStep(U,alpha,dmuAmu); | ||||
|       } | ||||
|       for(int mu=0;mu<Nd;mu++) PokeIndex<LorentzIndex>(Umu,U[mu],mu); | ||||
|       // Monitor progress and convergence test  | ||||
|       // infrequently to minimise cost overhead | ||||
|       if ( i %20 == 0 ) {  | ||||
| 	Real plaq      =WilsonLoops<Gimpl>::avgPlaquette(Umu); | ||||
| 	Real link_trace=WilsonLoops<Gimpl>::linkTrace(Umu);  | ||||
|  | ||||
| 	if (Fourier)  | ||||
| 	  std::cout << GridLogMessage << "Fourier Iteration "<<i<< " plaq= "<<plaq<< " dmuAmu " << norm2(dmuAmu)<< std::endl; | ||||
| 	else  | ||||
| 	  std::cout << GridLogMessage << " Iteration "<<i<< " plaq= "<<plaq<< " dmuAmu " << norm2(dmuAmu)<< std::endl; | ||||
| 	 | ||||
| 	Real Phi  = 1.0 - old_trace / link_trace ; | ||||
| 	Real Omega= 1.0 - trG; | ||||
|  | ||||
|  | ||||
| 	std::cout << GridLogMessage << " Iteration "<<i<< " Phi= "<<Phi<< " Omega= " << Omega<< " trG " << trG <<std::endl; | ||||
| 	if ( (Omega < Omega_tol) && ( ::fabs(Phi) < Phi_tol) ) { | ||||
| 	  std::cout << GridLogMessage << "Converged ! "<<std::endl; | ||||
| 	  return; | ||||
| 	} | ||||
|  | ||||
| 	old_trace = link_trace; | ||||
|  | ||||
|       } | ||||
|     } | ||||
|   }; | ||||
|   static Real SteepestDescentStep(std::vector<GaugeMat> &U,Real & alpha, GaugeMat & dmuAmu) { | ||||
|     GridBase *grid = U[0]._grid; | ||||
|  | ||||
|     std::vector<GaugeMat> A(Nd,grid); | ||||
|     GaugeMat g(grid); | ||||
|  | ||||
|     GaugeLinkToLieAlgebraField(U,A); | ||||
|     ExpiAlphaDmuAmu(A,g,alpha,dmuAmu); | ||||
|  | ||||
|  | ||||
|     Real vol = grid->gSites(); | ||||
|     Real trG = TensorRemove(sum(trace(g))).real()/vol/Nc; | ||||
|  | ||||
|     SU<Nc>::GaugeTransform(U,g); | ||||
|  | ||||
|     return trG; | ||||
|   } | ||||
|  | ||||
|   static Real FourierAccelSteepestDescentStep(std::vector<GaugeMat> &U,Real & alpha, GaugeMat & dmuAmu) { | ||||
|  | ||||
|     GridBase *grid = U[0]._grid; | ||||
|  | ||||
|     Real vol = grid->gSites(); | ||||
|  | ||||
|     FFT theFFT((GridCartesian *)grid); | ||||
|  | ||||
|     LatticeComplex  Fp(grid); | ||||
|     LatticeComplex  psq(grid); psq=zero; | ||||
|     LatticeComplex  pmu(grid);  | ||||
|     LatticeComplex   one(grid); one = Complex(1.0,0.0); | ||||
|  | ||||
|     GaugeMat g(grid); | ||||
|     GaugeMat dmuAmu_p(grid); | ||||
|     std::vector<GaugeMat> A(Nd,grid); | ||||
|  | ||||
|     GaugeLinkToLieAlgebraField(U,A); | ||||
|  | ||||
|     DmuAmu(A,dmuAmu); | ||||
|  | ||||
|     theFFT.FFT_all_dim(dmuAmu_p,dmuAmu,FFT::forward); | ||||
|  | ||||
|     ////////////////////////////////// | ||||
|     // Work out Fp = psq_max/ psq... | ||||
|     ////////////////////////////////// | ||||
|     std::vector<int> latt_size = grid->GlobalDimensions(); | ||||
|     std::vector<int> coor(grid->_ndimension,0); | ||||
|     for(int mu=0;mu<Nd;mu++) { | ||||
|  | ||||
|       Real TwoPiL =  M_PI * 2.0/ latt_size[mu]; | ||||
|       LatticeCoordinate(pmu,mu); | ||||
|       pmu = TwoPiL * pmu ; | ||||
|       psq = psq + 4.0*sin(pmu*0.5)*sin(pmu*0.5);  | ||||
|     } | ||||
|  | ||||
|     Complex psqMax(16.0); | ||||
|     Fp =  psqMax*one/psq; | ||||
|  | ||||
|     /* | ||||
|     static int once; | ||||
|     if ( once == 0 ) {  | ||||
|       std::cout << " Fp " << Fp <<std::endl; | ||||
|       once ++; | ||||
|       }*/ | ||||
|  | ||||
|     pokeSite(TComplex(1.0),Fp,coor); | ||||
|  | ||||
|     dmuAmu_p  = dmuAmu_p * Fp;  | ||||
|  | ||||
|     theFFT.FFT_all_dim(dmuAmu,dmuAmu_p,FFT::backward); | ||||
|  | ||||
|     GaugeMat ciadmam(grid); | ||||
|     Complex cialpha(0.0,-alpha); | ||||
|     ciadmam = dmuAmu*cialpha; | ||||
|     SU<Nc>::taExp(ciadmam,g); | ||||
|  | ||||
|     Real trG = TensorRemove(sum(trace(g))).real()/vol/Nc; | ||||
|  | ||||
|     SU<Nc>::GaugeTransform(U,g); | ||||
|  | ||||
|     return trG; | ||||
|   } | ||||
|  | ||||
|   static void ExpiAlphaDmuAmu(const std::vector<GaugeMat> &A,GaugeMat &g,Real & alpha, GaugeMat &dmuAmu) { | ||||
|     GridBase *grid = g._grid; | ||||
|     Complex cialpha(0.0,-alpha); | ||||
|     GaugeMat ciadmam(grid); | ||||
|     DmuAmu(A,dmuAmu); | ||||
|     ciadmam = dmuAmu*cialpha; | ||||
|     SU<Nc>::taExp(ciadmam,g); | ||||
|   }   | ||||
| }; | ||||
|  | ||||
| @@ -12,7 +12,4 @@ | ||||
| #include <Grid/qcd/utils/SUnAdjoint.h> | ||||
| #include <Grid/qcd/utils/SUnTwoIndex.h> | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
| #endif | ||||
|   | ||||
| @@ -73,7 +73,7 @@ public: | ||||
|   ////////////////////////////////////////////////// | ||||
|   // trace of directed plaquette oriented in mu,nu plane | ||||
|   ////////////////////////////////////////////////// | ||||
|   static void traceDirPlaquette(LatticeComplex &plaq, | ||||
|   static void traceDirPlaquette(ComplexField &plaq, | ||||
|                                 const std::vector<GaugeMat> &U, const int mu, | ||||
|                                 const int nu) { | ||||
|     GaugeMat sp(U[0]._grid); | ||||
| @@ -83,9 +83,9 @@ public: | ||||
|   ////////////////////////////////////////////////// | ||||
|   // sum over all planes of plaquette | ||||
|   ////////////////////////////////////////////////// | ||||
|   static void sitePlaquette(LatticeComplex &Plaq, | ||||
|   static void sitePlaquette(ComplexField &Plaq, | ||||
|                             const std::vector<GaugeMat> &U) { | ||||
|     LatticeComplex sitePlaq(U[0]._grid); | ||||
|     ComplexField sitePlaq(U[0]._grid); | ||||
|     Plaq = zero; | ||||
|     for (int mu = 1; mu < Nd; mu++) { | ||||
|       for (int nu = 0; nu < mu; nu++) { | ||||
| @@ -104,11 +104,11 @@ public: | ||||
|       U[mu] = PeekIndex<LorentzIndex>(Umu, mu); | ||||
|     } | ||||
|  | ||||
|     LatticeComplex Plaq(Umu._grid); | ||||
|     ComplexField Plaq(Umu._grid); | ||||
|  | ||||
|     sitePlaquette(Plaq, U); | ||||
|     TComplex Tp = sum(Plaq); | ||||
|     Complex p = TensorRemove(Tp); | ||||
|     auto Tp = sum(Plaq); | ||||
|     auto p = TensorRemove(Tp); | ||||
|     return p.real(); | ||||
|   } | ||||
|  | ||||
| @@ -129,15 +129,15 @@ public: | ||||
|   static RealD linkTrace(const GaugeLorentz &Umu) { | ||||
|     std::vector<GaugeMat> U(Nd, Umu._grid); | ||||
|  | ||||
|     LatticeComplex Tr(Umu._grid); | ||||
|     ComplexField Tr(Umu._grid); | ||||
|     Tr = zero; | ||||
|     for (int mu = 0; mu < Nd; mu++) { | ||||
|       U[mu] = PeekIndex<LorentzIndex>(Umu, mu); | ||||
|       Tr = Tr + trace(U[mu]); | ||||
|     } | ||||
|  | ||||
|     TComplex Tp = sum(Tr); | ||||
|     Complex p = TensorRemove(Tp); | ||||
|     auto Tp = sum(Tr); | ||||
|     auto p = TensorRemove(Tp); | ||||
|  | ||||
|     double vol = Umu._grid->gSites(); | ||||
|  | ||||
| @@ -188,6 +188,32 @@ public: | ||||
|     } | ||||
|   } | ||||
|  | ||||
|  | ||||
| // For the force term | ||||
| static void StapleMult(GaugeMat &staple, const GaugeLorentz &Umu, int mu) { | ||||
|     GridBase *grid = Umu._grid; | ||||
|     std::vector<GaugeMat> U(Nd, grid); | ||||
|     for (int d = 0; d < Nd; d++) { | ||||
|       // this operation is taking too much time | ||||
|       U[d] = PeekIndex<LorentzIndex>(Umu, d); | ||||
|     } | ||||
|     staple = zero; | ||||
|     GaugeMat tmp1(grid); | ||||
|     GaugeMat tmp2(grid); | ||||
|  | ||||
|     for (int nu = 0; nu < Nd; nu++) { | ||||
|       if (nu != mu) { | ||||
|         // this is ~10% faster than the Staple | ||||
|         tmp1 = Cshift(U[nu], mu, 1); | ||||
|         tmp2 = Cshift(U[mu], nu, 1); | ||||
|         staple += tmp1* adj(U[nu]*tmp2); | ||||
|         tmp2 = adj(U[mu]*tmp1)*U[nu]; | ||||
|         staple += Cshift(tmp2, nu, -1); | ||||
|       } | ||||
|     } | ||||
|     staple = U[mu]*staple; | ||||
| } | ||||
|  | ||||
|   ////////////////////////////////////////////////// | ||||
|   // the sum over all staples on each site | ||||
|   ////////////////////////////////////////////////// | ||||
| @@ -200,7 +226,6 @@ public: | ||||
|       U[d] = PeekIndex<LorentzIndex>(Umu, d); | ||||
|     } | ||||
|     staple = zero; | ||||
|     GaugeMat tmp(grid); | ||||
|  | ||||
|     for (int nu = 0; nu < Nd; nu++) { | ||||
|  | ||||
| @@ -214,7 +239,7 @@ public: | ||||
|         //      | | ||||
|         //    __| | ||||
|         // | ||||
|  | ||||
|       | ||||
|         staple += Gimpl::ShiftStaple( | ||||
|             Gimpl::CovShiftForward( | ||||
|                 U[nu], nu, | ||||
| @@ -227,6 +252,7 @@ public: | ||||
|         // |__ | ||||
|         // | ||||
|         // | ||||
|  | ||||
|         staple += Gimpl::ShiftStaple( | ||||
|             Gimpl::CovShiftBackward(U[nu], nu, | ||||
|                                     Gimpl::CovShiftBackward(U[mu], mu, U[nu])), mu); | ||||
| @@ -289,8 +315,7 @@ public: | ||||
|       // | ||||
|       staple = Gimpl::ShiftStaple( | ||||
|           Gimpl::CovShiftBackward(U[nu], nu, | ||||
|                                   Gimpl::CovShiftBackward(U[mu], mu, U[nu])), | ||||
|           mu); | ||||
|                                   Gimpl::CovShiftBackward(U[mu], mu, U[nu])), mu); | ||||
|     } | ||||
|   } | ||||
|  | ||||
| @@ -307,10 +332,10 @@ public: | ||||
|       GaugeMat Vup(Umu._grid), Vdn(Umu._grid); | ||||
|       StapleUpper(Vup, Umu, mu, nu); | ||||
|       StapleLower(Vdn, Umu, mu, nu); | ||||
|       GaugeMat v = adj(Vup) - adj(Vdn); | ||||
|       GaugeMat v = Vup - Vdn; | ||||
|       GaugeMat u = PeekIndex<LorentzIndex>(Umu, mu);  // some redundant copies | ||||
|       GaugeMat vu = v*u; | ||||
|       FS = 0.25*Ta(u*v + Cshift(vu, mu, +1)); | ||||
|       FS = 0.25*Ta(u*v + Cshift(vu, mu, -1)); | ||||
|   } | ||||
|  | ||||
|   static Real TopologicalCharge(GaugeLorentz &U){ | ||||
| @@ -330,8 +355,8 @@ public: | ||||
|  | ||||
|     double coeff = 8.0/(32.0*M_PI*M_PI); | ||||
|  | ||||
|     LatticeComplex qfield = coeff*trace(Bx*Ex + By*Ey + Bz*Ez); | ||||
|     TComplex Tq = sum(qfield); | ||||
|     ComplexField qfield = coeff*trace(Bx*Ex + By*Ey + Bz*Ez); | ||||
|     auto Tq = sum(qfield); | ||||
|     return TensorRemove(Tq).real(); | ||||
|   } | ||||
|  | ||||
| @@ -350,16 +375,16 @@ public: | ||||
|                adj(Gimpl::CovShiftForward( | ||||
|                    U[nu], nu, Gimpl::CovShiftForward(U[nu], nu, U[mu]))); | ||||
|   } | ||||
|   static void traceDirRectangle(LatticeComplex &rect, | ||||
|   static void traceDirRectangle(ComplexField &rect, | ||||
|                                 const std::vector<GaugeMat> &U, const int mu, | ||||
|                                 const int nu) { | ||||
|     GaugeMat sp(U[0]._grid); | ||||
|     dirRectangle(sp, U, mu, nu); | ||||
|     rect = trace(sp); | ||||
|   } | ||||
|   static void siteRectangle(LatticeComplex &Rect, | ||||
|   static void siteRectangle(ComplexField &Rect, | ||||
|                             const std::vector<GaugeMat> &U) { | ||||
|     LatticeComplex siteRect(U[0]._grid); | ||||
|     ComplexField siteRect(U[0]._grid); | ||||
|     Rect = zero; | ||||
|     for (int mu = 1; mu < Nd; mu++) { | ||||
|       for (int nu = 0; nu < mu; nu++) { | ||||
| @@ -379,12 +404,12 @@ public: | ||||
|       U[mu] = PeekIndex<LorentzIndex>(Umu, mu); | ||||
|     } | ||||
|  | ||||
|     LatticeComplex Rect(Umu._grid); | ||||
|     ComplexField Rect(Umu._grid); | ||||
|  | ||||
|     siteRectangle(Rect, U); | ||||
|  | ||||
|     TComplex Tp = sum(Rect); | ||||
|     Complex p = TensorRemove(Tp); | ||||
|     auto Tp = sum(Rect); | ||||
|     auto p = TensorRemove(Tp); | ||||
|     return p.real(); | ||||
|   } | ||||
|   ////////////////////////////////////////////////// | ||||
|   | ||||
| @@ -110,11 +110,12 @@ THE SOFTWARE. | ||||
|  | ||||
| #define GRID_MACRO_MEMBER(A,B)        A B; | ||||
| #define GRID_MACRO_COMP_MEMBER(A,B) result = (result and (lhs. B == rhs. B)); | ||||
| #define GRID_MACRO_OS_WRITE_MEMBER(A,B) os<< #A <<" "#B <<" = "<< obj. B <<" ; " <<std::endl; | ||||
| #define GRID_MACRO_OS_WRITE_MEMBER(A,B) os<< #A <<" " #B << " = " << obj. B << " ; " <<std::endl; | ||||
| #define GRID_MACRO_READ_MEMBER(A,B) Grid::read(RD,#B,obj. B); | ||||
| #define GRID_MACRO_WRITE_MEMBER(A,B) Grid::write(WR,#B,obj. B); | ||||
|  | ||||
| #define GRID_SERIALIZABLE_CLASS_MEMBERS(cname,...)\ | ||||
|   std::string SerialisableClassName(void) {return std::string(#cname);}	\ | ||||
| GRID_MACRO_EVAL(GRID_MACRO_MAP(GRID_MACRO_MEMBER,__VA_ARGS__))\ | ||||
| template <typename T>\ | ||||
| static inline void write(Writer<T> &WR,const std::string &s, const cname &obj){ \ | ||||
|   | ||||
| @@ -32,16 +32,21 @@ using namespace Grid; | ||||
| using namespace std; | ||||
|  | ||||
| // Writer implementation /////////////////////////////////////////////////////// | ||||
| XmlWriter::XmlWriter(const string &fileName) | ||||
| : fileName_(fileName) | ||||
| XmlWriter::XmlWriter(const string &fileName, string toplev) : fileName_(fileName) | ||||
| { | ||||
|   node_ = doc_.append_child(); | ||||
|   node_.set_name("grid"); | ||||
|   if ( toplev == std::string("") ) { | ||||
|     node_=doc_; | ||||
|   } else {  | ||||
|     node_=doc_.append_child(); | ||||
|     node_.set_name(toplev.c_str()); | ||||
|   } | ||||
| } | ||||
|  | ||||
| XmlWriter::~XmlWriter(void) | ||||
| { | ||||
|   doc_.save_file(fileName_.c_str(), "  "); | ||||
|   if ( fileName_ != std::string("") ) {  | ||||
|     doc_.save_file(fileName_.c_str(), "  "); | ||||
|   } | ||||
| } | ||||
|  | ||||
| void XmlWriter::push(const string &s) | ||||
| @@ -53,21 +58,44 @@ void XmlWriter::pop(void) | ||||
| { | ||||
|   node_ = node_.parent(); | ||||
| } | ||||
|  | ||||
| // Reader implementation /////////////////////////////////////////////////////// | ||||
| XmlReader::XmlReader(const string &fileName) | ||||
| : fileName_(fileName) | ||||
| std::string XmlWriter::XmlString(void) | ||||
| { | ||||
|   pugi::xml_parse_result result = doc_.load_file(fileName_.c_str()); | ||||
|    | ||||
|   if ( !result ) | ||||
|   { | ||||
|   std::ostringstream oss;  | ||||
|   doc_.save(oss); | ||||
|   return oss.str(); | ||||
| } | ||||
|  | ||||
| XmlReader::XmlReader(const char *xmlstring,string toplev) : fileName_("") | ||||
| { | ||||
|   pugi::xml_parse_result result; | ||||
|   result = doc_.load_string(xmlstring); | ||||
|   if ( !result ) { | ||||
|     cerr << "XML error description: " << result.description() << "\n"; | ||||
|     cerr << "XML error offset     : " << result.offset        << "\n"; | ||||
|     abort(); | ||||
|   } | ||||
|    | ||||
|   node_ = doc_.child("grid"); | ||||
|   if ( toplev == std::string("") ) { | ||||
|     node_ = doc_; | ||||
|   } else {  | ||||
|     node_ = doc_.child(toplev.c_str()); | ||||
|   } | ||||
| } | ||||
|  | ||||
| // Reader implementation /////////////////////////////////////////////////////// | ||||
| XmlReader::XmlReader(const string &fileName,string toplev) : fileName_(fileName) | ||||
| { | ||||
|   pugi::xml_parse_result result; | ||||
|   result = doc_.load_file(fileName_.c_str()); | ||||
|   if ( !result ) { | ||||
|     cerr << "XML error description: " << result.description() << "\n"; | ||||
|     cerr << "XML error offset     : " << result.offset        << "\n"; | ||||
|     abort(); | ||||
|   } | ||||
|   if ( toplev == std::string("") ) { | ||||
|     node_ = doc_; | ||||
|   } else {  | ||||
|     node_ = doc_.child(toplev.c_str()); | ||||
|   } | ||||
| } | ||||
|  | ||||
| bool XmlReader::push(const string &s) | ||||
|   | ||||
| @@ -44,10 +44,9 @@ namespace Grid | ||||
| { | ||||
|    | ||||
|   class XmlWriter: public Writer<XmlWriter> | ||||
|   { | ||||
|      | ||||
|   {     | ||||
|   public: | ||||
|     XmlWriter(const std::string &fileName); | ||||
|     XmlWriter(const std::string &fileName,std::string toplev = std::string("grid") ); | ||||
|     virtual ~XmlWriter(void); | ||||
|     void push(const std::string &s); | ||||
|     void pop(void); | ||||
| @@ -55,6 +54,7 @@ namespace Grid | ||||
|     void writeDefault(const std::string &s, const U &x); | ||||
|     template <typename U> | ||||
|     void writeDefault(const std::string &s, const std::vector<U> &x); | ||||
|     std::string XmlString(void); | ||||
|   private: | ||||
|     pugi::xml_document doc_; | ||||
|     pugi::xml_node     node_; | ||||
| @@ -64,7 +64,8 @@ namespace Grid | ||||
|   class XmlReader: public Reader<XmlReader> | ||||
|   { | ||||
|   public: | ||||
|     XmlReader(const std::string &fileName); | ||||
|     XmlReader(const char *xmlstring,std::string toplev = std::string("grid") ); | ||||
|     XmlReader(const std::string &fileName,std::string toplev = std::string("grid") ); | ||||
|     virtual ~XmlReader(void) = default; | ||||
|     bool push(const std::string &s); | ||||
|     void pop(void); | ||||
| @@ -118,7 +119,7 @@ namespace Grid | ||||
|     std::string buf; | ||||
|      | ||||
|     readDefault(s, buf); | ||||
|     std::cout << s << "   " << buf << std::endl; | ||||
|     //    std::cout << s << "   " << buf << std::endl; | ||||
|     fromString(output, buf); | ||||
|   } | ||||
|    | ||||
|   | ||||
| @@ -281,8 +281,8 @@ namespace Optimization { | ||||
|  | ||||
|   struct PrecisionChange { | ||||
|     static inline vech StoH (const vecf &a,const vecf &b) { | ||||
| #ifdef USE_FP16 | ||||
|       vech ret; | ||||
| #ifdef USE_FP16 | ||||
|       vech *ha = (vech *)&a; | ||||
|       vech *hb = (vech *)&b; | ||||
|       const int nf = W<float>::r; | ||||
| @@ -493,6 +493,8 @@ namespace Optimization { | ||||
|      | ||||
|     return a; | ||||
|   } | ||||
|  | ||||
|   #undef acc  // EIGEN compatibility | ||||
| } | ||||
|  | ||||
| ////////////////////////////////////////////////////////////////////////////////////// | ||||
|   | ||||
| @@ -327,18 +327,16 @@ class Grid_simd { | ||||
|   // provides support | ||||
|   /////////////////////////////////////// | ||||
|  | ||||
| #if (__GNUC__ == 5 ) || ( ( __GNUC__ == 6 ) && __GNUC_MINOR__ < 3 ) | ||||
| #pragma GCC push_options  | ||||
| #pragma GCC optimize ("O0")  | ||||
| #endif | ||||
|   template <class functor> | ||||
|   friend inline Grid_simd SimdApply(const functor &func, const Grid_simd &v) { | ||||
|     Grid_simd ret; | ||||
|     Grid_simd::conv_t conv; | ||||
|  | ||||
|     Grid_simd::scalar_type s; | ||||
|      | ||||
|     conv.v = v.v; | ||||
|     for (int i = 0; i < Nsimd(); i++) { | ||||
|       conv.s[i] = func(conv.s[i]); | ||||
|       s = conv.s[i]; | ||||
|       conv.s[i] = func(s); | ||||
|     } | ||||
|     ret.v = conv.v; | ||||
|     return ret; | ||||
| @@ -350,18 +348,18 @@ class Grid_simd { | ||||
|     Grid_simd ret; | ||||
|     Grid_simd::conv_t cx; | ||||
|     Grid_simd::conv_t cy; | ||||
|     Grid_simd::scalar_type sx,sy; | ||||
|  | ||||
|     cx.v = x.v; | ||||
|     cy.v = y.v; | ||||
|     for (int i = 0; i < Nsimd(); i++) { | ||||
|       cx.s[i] = func(cx.s[i], cy.s[i]); | ||||
|       sx = cx.s[i]; | ||||
|       sy = cy.s[i]; | ||||
|       cx.s[i] = func(sx,sy); | ||||
|     } | ||||
|     ret.v = cx.v; | ||||
|     return ret; | ||||
|   } | ||||
| #if (__GNUC__ == 5 ) || ( ( __GNUC__ == 6 ) && __GNUC_MINOR__ < 3 ) | ||||
| #pragma GCC pop_options | ||||
| #endif | ||||
|   /////////////////////// | ||||
|   // Exchange  | ||||
|   // Al Ah , Bl Bh -> Al Bl Ah,Bh | ||||
| @@ -423,7 +421,6 @@ class Grid_simd { | ||||
|    | ||||
| };  // end of Grid_simd class definition | ||||
|  | ||||
|  | ||||
| inline void permute(ComplexD &y,ComplexD b, int perm) {  y=b; } | ||||
| inline void permute(ComplexF &y,ComplexF b, int perm) {  y=b; } | ||||
| inline void permute(RealD &y,RealD b, int perm) {  y=b; } | ||||
| @@ -754,8 +751,8 @@ inline Grid_simd<std::complex<R>, V> toComplex(const Grid_simd<R, V> &in) { | ||||
|  | ||||
|   conv.v = in.v; | ||||
|   for (int i = 0; i < Rsimd::Nsimd(); i += 2) { | ||||
|     assert(conv.s[i + 1] == | ||||
|            conv.s[i]);  // trap any cases where real was not duplicated | ||||
|     assert(conv.s[i + 1] == conv.s[i]);   | ||||
|     // trap any cases where real was not duplicated | ||||
|     // indicating the SIMD grids of real and imag assignment did not correctly | ||||
|     // match | ||||
|     conv.s[i + 1] = 0.0;  // zero imaginary parts | ||||
| @@ -833,8 +830,6 @@ inline void precisionChange(vComplexD *out,vComplexF *in,int nvec){ precisionCha | ||||
| inline void precisionChange(vComplexD *out,vComplexH *in,int nvec){ precisionChange((vRealD *)out,(vRealH *)in,nvec);} | ||||
| inline void precisionChange(vComplexF *out,vComplexH *in,int nvec){ precisionChange((vRealF *)out,(vRealH *)in,nvec);} | ||||
|  | ||||
|  | ||||
|  | ||||
| // Check our vector types are of an appropriate size. | ||||
| #if defined QPX | ||||
| static_assert(2*sizeof(SIMD_Ftype) == sizeof(SIMD_Dtype), "SIMD vector lengths incorrect"); | ||||
| @@ -849,21 +844,14 @@ static_assert(sizeof(SIMD_Ftype) == sizeof(SIMD_Itype), "SIMD vector lengths inc | ||||
| ///////////////////////////////////////// | ||||
| template <typename T> | ||||
| struct is_simd : public std::false_type {}; | ||||
| template <> | ||||
| struct is_simd<vRealF> : public std::true_type {}; | ||||
| template <> | ||||
| struct is_simd<vRealD> : public std::true_type {}; | ||||
| template <> | ||||
| struct is_simd<vComplexF> : public std::true_type {}; | ||||
| template <> | ||||
| struct is_simd<vComplexD> : public std::true_type {}; | ||||
| template <> | ||||
| struct is_simd<vInteger> : public std::true_type {}; | ||||
| template <> struct is_simd<vRealF>     : public std::true_type {}; | ||||
| template <> struct is_simd<vRealD>     : public std::true_type {}; | ||||
| template <> struct is_simd<vComplexF>  : public std::true_type {}; | ||||
| template <> struct is_simd<vComplexD>  : public std::true_type {}; | ||||
| template <> struct is_simd<vInteger>   : public std::true_type {}; | ||||
|  | ||||
| template <typename T> | ||||
| using IfSimd = Invoke<std::enable_if<is_simd<T>::value, int> >; | ||||
| template <typename T> | ||||
| using IfNotSimd = Invoke<std::enable_if<!is_simd<T>::value, unsigned> >; | ||||
| template <typename T> using IfSimd    = Invoke<std::enable_if<is_simd<T>::value, int> >; | ||||
| template <typename T> using IfNotSimd = Invoke<std::enable_if<!is_simd<T>::value, unsigned> >; | ||||
| } | ||||
|  | ||||
| #endif | ||||
|   | ||||
| @@ -179,13 +179,6 @@ inline Grid_simd<S, V> div(const Grid_simd<S, V> &r, Integer y) { | ||||
| //////////////////////////////////////////////////////////////////////////// | ||||
| // Allows us to assign into **conformable** real vectors from complex | ||||
| //////////////////////////////////////////////////////////////////////////// | ||||
| //  template < class S, class V > | ||||
| //  inline auto ComplexRemove(const Grid_simd<S,V> &c) -> | ||||
| //  Grid_simd<Grid_simd<S,V>::Real,V> { | ||||
| //    Grid_simd<Grid_simd<S,V>::Real,V> ret; | ||||
| //    ret.v = c.v; | ||||
| //    return ret; | ||||
| //  } | ||||
| template <class scalar> | ||||
| struct AndFunctor { | ||||
|   scalar operator()(const scalar &x, const scalar &y) const { return x & y; } | ||||
|   | ||||
| @@ -156,11 +156,18 @@ class iScalar { | ||||
|  | ||||
|   // convert from a something to a scalar via constructor of something arg | ||||
|   template <class T, typename std::enable_if<!isGridTensor<T>::value, T>::type * = nullptr> | ||||
|     strong_inline iScalar<vtype> operator=(T arg) { | ||||
|   strong_inline iScalar<vtype> operator=(T arg) { | ||||
|     _internal = arg; | ||||
|     return *this; | ||||
|   } | ||||
|  | ||||
|   // Convert elements | ||||
|   template <class ttype> | ||||
|   strong_inline iScalar<vtype> operator=(iScalar<ttype> &&arg) { | ||||
|     _internal = arg._internal; | ||||
|     return *this; | ||||
|   } | ||||
|  | ||||
|   friend std::ostream &operator<<(std::ostream &stream,const iScalar<vtype> &o) { | ||||
|     stream << "S {" << o._internal << "}"; | ||||
|     return stream; | ||||
|   | ||||
| @@ -37,30 +37,108 @@ namespace Grid { | ||||
|   ///////////////////////////////////////////////  | ||||
|  | ||||
|  | ||||
|   template<class vtype> inline iScalar<vtype> Exponentiate(const iScalar<vtype>&r, ComplexD alpha ,  Integer Nexp = DEFAULT_MAT_EXP) | ||||
|   template<class vtype> inline iScalar<vtype> Exponentiate(const iScalar<vtype>&r, RealD alpha ,  Integer Nexp = DEFAULT_MAT_EXP) | ||||
|     { | ||||
|       iScalar<vtype> ret; | ||||
|       ret._internal = Exponentiate(r._internal, alpha, Nexp); | ||||
|       return ret; | ||||
|     } | ||||
|  | ||||
|  | ||||
|   template<class vtype,int N, typename std::enable_if< GridTypeMapper<vtype>::TensorLevel == 0 >::type * =nullptr>  | ||||
|     inline iMatrix<vtype,N> Exponentiate(const iMatrix<vtype,N> &arg, ComplexD alpha  , Integer Nexp = DEFAULT_MAT_EXP ) | ||||
| template<class vtype, int N> inline iVector<vtype, N> Exponentiate(const iVector<vtype,N>&r, RealD alpha ,  Integer Nexp = DEFAULT_MAT_EXP) | ||||
|     { | ||||
|       iMatrix<vtype,N> unit(1.0); | ||||
|       iMatrix<vtype,N> temp(unit); | ||||
|        | ||||
|       for(int i=Nexp; i>=1;--i){ | ||||
| 	temp *= alpha/ComplexD(i); | ||||
| 	temp = unit + temp*arg; | ||||
|       } | ||||
|        | ||||
|       return temp; | ||||
|        | ||||
|       iVector<vtype, N> ret; | ||||
|       for (int i = 0; i < N; i++) | ||||
|         ret._internal[i] = Exponentiate(r._internal[i], alpha, Nexp); | ||||
|       return ret; | ||||
|     } | ||||
|  | ||||
|  | ||||
|  | ||||
|     // Specialisation: Cayley-Hamilton exponential for SU(3) | ||||
|     template<class vtype, typename std::enable_if< GridTypeMapper<vtype>::TensorLevel == 0>::type * =nullptr>  | ||||
|     inline iMatrix<vtype,3> Exponentiate(const iMatrix<vtype,3> &arg, RealD alpha  , Integer Nexp = DEFAULT_MAT_EXP ) | ||||
|     { | ||||
|     // for SU(3) 2x faster than the std implementation using Nexp=12 | ||||
|     // notice that it actually computes | ||||
|     // exp ( input matrix ) | ||||
|     // the i sign is coming from outside | ||||
|     // input matrix is anti-hermitian NOT hermitian | ||||
|       typedef iMatrix<vtype,3> mat; | ||||
|       typedef iScalar<vtype> scalar; | ||||
|       mat unit(1.0); | ||||
|       mat temp(unit); | ||||
|       const Complex one_over_three = 1.0 / 3.0; | ||||
|       const Complex one_over_two = 1.0 / 2.0; | ||||
|  | ||||
|       scalar c0, c1, tmp, c0max, theta, u, w; | ||||
|       scalar xi0, u2, w2, cosw; | ||||
|       scalar fden, h0, h1, h2; | ||||
|       scalar e2iu, emiu, ixi0, qt; | ||||
|       scalar f0, f1, f2; | ||||
|       scalar unity(1.0); | ||||
|        | ||||
|       mat iQ2 = arg*arg*alpha*alpha; | ||||
|       mat iQ3 = arg*iQ2*alpha;    | ||||
|       // sign in c0 from the conventions on the Ta | ||||
|       scalar imQ3, reQ2; | ||||
|       imQ3 = imag( trace(iQ3) ); | ||||
|       reQ2 = real( trace(iQ2) ); | ||||
|       c0 = -imQ3 * one_over_three;   | ||||
|       c1 = -reQ2 * one_over_two; | ||||
|  | ||||
|       // Cayley Hamilton checks to machine precision, tested | ||||
|       tmp = c1 * one_over_three; | ||||
|       c0max = 2.0 * pow(tmp, 1.5); | ||||
|  | ||||
|       theta = acos(c0 / c0max) * one_over_three; | ||||
|       u = sqrt(tmp) * cos(theta); | ||||
|       w = sqrt(c1) * sin(theta); | ||||
|  | ||||
|       xi0 = sin(w) / w; | ||||
|       u2 = u * u; | ||||
|       w2 = w * w; | ||||
|       cosw = cos(w); | ||||
|  | ||||
|       ixi0 = timesI(xi0); | ||||
|       emiu = cos(u) - timesI(sin(u)); | ||||
|       e2iu = cos(2.0 * u) + timesI(sin(2.0 * u)); | ||||
|  | ||||
|       h0 = e2iu * (u2 - w2) + | ||||
|            emiu * ((8.0 * u2 * cosw) + (2.0 * u * (3.0 * u2 + w2) * ixi0)); | ||||
|       h1 = e2iu * (2.0 * u) - emiu * ((2.0 * u * cosw) - (3.0 * u2 - w2) * ixi0); | ||||
|       h2 = e2iu - emiu * (cosw + (3.0 * u) * ixi0); | ||||
|  | ||||
|       fden = unity / (9.0 * u2 - w2);  // reals | ||||
|       f0 = h0 * fden; | ||||
|       f1 = h1 * fden; | ||||
|       f2 = h2 * fden; | ||||
|  | ||||
|       return (f0 * unit + timesMinusI(f1) * arg*alpha - f2 * iQ2); | ||||
|     } | ||||
|  | ||||
|  | ||||
|  | ||||
| // General exponential | ||||
| template<class vtype,int N, typename std::enable_if< GridTypeMapper<vtype>::TensorLevel == 0 >::type * =nullptr>  | ||||
|     inline iMatrix<vtype,N> Exponentiate(const iMatrix<vtype,N> &arg, RealD alpha  , Integer Nexp = DEFAULT_MAT_EXP ) | ||||
|     { | ||||
|     // notice that it actually computes | ||||
|     // exp ( input matrix ) | ||||
|     // the i sign is coming from outside | ||||
|     // input matrix is anti-hermitian NOT hermitian | ||||
|       typedef iMatrix<vtype,N> mat; | ||||
|       mat unit(1.0); | ||||
|       mat temp(unit); | ||||
|       for(int i=Nexp; i>=1;--i){ | ||||
| 	      temp *= alpha/RealD(i); | ||||
| 	      temp = unit + temp*arg; | ||||
|       } | ||||
|       return temp; | ||||
|  | ||||
|     } | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
| } | ||||
| #endif | ||||
|   | ||||
| @@ -47,6 +47,28 @@ template<int Level> | ||||
| class TensorIndexRecursion { | ||||
|  | ||||
|  public: | ||||
|  | ||||
|   //////////////////////////////////////////////////// | ||||
|   // Type Queries | ||||
|   //////////////////////////////////////////////////// | ||||
|   template<class vtype>       static inline int indexRank(const iScalar<vtype> tmp)  { return TensorIndexRecursion<Level-1>::indexRank(tmp._internal);  } | ||||
|   template<class vtype,int N> static inline int indexRank(const iVector<vtype,N> tmp){ return TensorIndexRecursion<Level-1>::indexRank(tmp._internal[0]);  } | ||||
|   template<class vtype,int N> static inline int indexRank(const iMatrix<vtype,N> tmp){ return TensorIndexRecursion<Level-1>::indexRank(tmp._internal[0][0]);  } | ||||
|  | ||||
|   template<class vtype>       static inline int isScalar(const iScalar<vtype> tmp)  { return TensorIndexRecursion<Level-1>::isScalar(tmp._internal);  } | ||||
|   template<class vtype,int N> static inline int isScalar(const iVector<vtype,N> tmp){ return TensorIndexRecursion<Level-1>::isScalar(tmp._internal[0]);  } | ||||
|   template<class vtype,int N> static inline int isScalar(const iMatrix<vtype,N> tmp){ return TensorIndexRecursion<Level-1>::isScalar(tmp._internal[0][0]);  } | ||||
|  | ||||
|   template<class vtype>       static inline int isVector(const iScalar<vtype> tmp)  { return TensorIndexRecursion<Level-1>::isVector(tmp._internal);  } | ||||
|   template<class vtype,int N> static inline int isVector(const iVector<vtype,N> tmp){ return TensorIndexRecursion<Level-1>::isVector(tmp._internal[0]);  } | ||||
|   template<class vtype,int N> static inline int isVector(const iMatrix<vtype,N> tmp){ return TensorIndexRecursion<Level-1>::isVector(tmp._internal[0][0]);  } | ||||
|  | ||||
|   template<class vtype>       static inline int isMatrix(const iScalar<vtype> tmp)  { return TensorIndexRecursion<Level-1>::isMatrix(tmp._internal);  } | ||||
|   template<class vtype,int N> static inline int isMatrix(const iVector<vtype,N> tmp){ return TensorIndexRecursion<Level-1>::isMatrix(tmp._internal[0]);  } | ||||
|   template<class vtype,int N> static inline int isMatrix(const iMatrix<vtype,N> tmp){ return TensorIndexRecursion<Level-1>::isMatrix(tmp._internal[0][0]);  } | ||||
|   //////////////////////////////////////////////////// | ||||
|   // Trace | ||||
|   //////////////////////////////////////////////////// | ||||
|   template<class vtype> | ||||
|   static auto traceIndex(const iScalar<vtype> arg) ->  iScalar<decltype(TensorIndexRecursion<Level-1>::traceIndex(arg._internal))>  | ||||
|   { | ||||
| @@ -215,6 +237,24 @@ class TensorIndexRecursion { | ||||
| template<> | ||||
| class TensorIndexRecursion<0> { | ||||
|  public: | ||||
|   //////////////////////////////////////////////////// | ||||
|   // Type Queries | ||||
|   //////////////////////////////////////////////////// | ||||
|   template<class vtype>       static inline int indexRank(const iScalar<vtype> tmp)  { return 1; } | ||||
|   template<class vtype,int N> static inline int indexRank(const iVector<vtype,N> tmp){ return N; } | ||||
|   template<class vtype,int N> static inline int indexRank(const iMatrix<vtype,N> tmp){ return N; } | ||||
|  | ||||
|   template<class vtype>       static inline int isScalar(const iScalar<vtype> tmp)  { return true;} | ||||
|   template<class vtype,int N> static inline int isScalar(const iVector<vtype,N> tmp){ return false;} | ||||
|   template<class vtype,int N> static inline int isScalar(const iMatrix<vtype,N> tmp){ return false;} | ||||
|  | ||||
|   template<class vtype>       static inline int isVector(const iScalar<vtype> tmp)  { return false;} | ||||
|   template<class vtype,int N> static inline int isVector(const iVector<vtype,N> tmp){ return true;} | ||||
|   template<class vtype,int N> static inline int isVector(const iMatrix<vtype,N> tmp){ return false;} | ||||
|  | ||||
|   template<class vtype>       static inline int isMatrix(const iScalar<vtype> tmp)  { return false;} | ||||
|   template<class vtype,int N> static inline int isMatrix(const iVector<vtype,N> tmp){ return false;} | ||||
|   template<class vtype,int N> static inline int isMatrix(const iMatrix<vtype,N> tmp){ return true;} | ||||
|  | ||||
|   ///////////////////////////////////////// | ||||
|   // Ends recursion for trace (scalar/vector/matrix) | ||||
| @@ -302,6 +342,26 @@ class TensorIndexRecursion<0> { | ||||
| //////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
| // External wrappers | ||||
| //////////////////////////////////////////////////////////////////////////////////////////////////////// | ||||
| template<int Level,class vtype> inline int indexRank(void) | ||||
| { | ||||
|   vtype tmp; | ||||
|   return TensorIndexRecursion<Level>::indexRank(tmp); | ||||
| } | ||||
| template<int Level,class vtype> inline int isScalar(void) | ||||
| { | ||||
|   vtype tmp; | ||||
|   return TensorIndexRecursion<Level>::isScalar(tmp); | ||||
| } | ||||
| template<int Level,class vtype> inline int isVector(void) | ||||
| { | ||||
|   vtype tmp; | ||||
|   return TensorIndexRecursion<Level>::isVector(tmp); | ||||
| } | ||||
| template<int Level,class vtype> inline int isMatrix(void) | ||||
| { | ||||
|   vtype tmp; | ||||
|   return TensorIndexRecursion<Level>::isMatrix(tmp); | ||||
| } | ||||
|  | ||||
| template<int Level,class vtype> inline auto traceIndex (const vtype &arg) -> RemoveCRV(TensorIndexRecursion<Level>::traceIndex(arg)) | ||||
| { | ||||
|   | ||||
| @@ -281,8 +281,8 @@ namespace Grid { | ||||
|   template<typename T> | ||||
|   class getPrecision{ | ||||
|   public: | ||||
|     typedef typename getVectorType<T>::type vector_obj; //get the vector_obj (i.e. a grid Tensor) if its a Lattice<vobj>, do nothing otherwise (i.e. if fundamental or grid Tensor) | ||||
|    | ||||
|     //get the vector_obj (i.e. a grid Tensor) if its a Lattice<vobj>, do nothing otherwise (i.e. if fundamental or grid Tensor) | ||||
|     typedef typename getVectorType<T>::type vector_obj;  | ||||
|     typedef typename GridTypeMapper<vector_obj>::scalar_type scalar_type; //get the associated scalar type. Works on fundamental and tensor types | ||||
|     typedef typename GridTypeMapper<scalar_type>::Realified real_scalar_type; //remove any std::complex wrapper, should get us to the fundamental type | ||||
|  | ||||
|   | ||||
							
								
								
									
										99
									
								
								tests/IO/Test_ildg_io.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										99
									
								
								tests/IO/Test_ildg_io.cc
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,99 @@ | ||||
|     /************************************************************************************* | ||||
|  | ||||
|     Grid physics library, www.github.com/paboyle/Grid  | ||||
|  | ||||
|     Source file: ./tests/Test_nersc_io.cc | ||||
|  | ||||
|     Copyright (C) 2015 | ||||
|  | ||||
| Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk> | ||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
| Author: paboyle <paboyle@ph.ed.ac.uk> | ||||
|  | ||||
|     This program is free software; you can redistribute it and/or modify | ||||
|     it under the terms of the GNU General Public License as published by | ||||
|     the Free Software Foundation; either version 2 of the License, or | ||||
|     (at your option) any later version. | ||||
|  | ||||
|     This program is distributed in the hope that it will be useful, | ||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|     GNU General Public License for more details. | ||||
|  | ||||
|     You should have received a copy of the GNU General Public License along | ||||
|     with this program; if not, write to the Free Software Foundation, Inc., | ||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
|     *************************************************************************************/ | ||||
|     /*  END LEGAL */ | ||||
| #include <Grid/Grid.h> | ||||
|  | ||||
| using namespace std; | ||||
| using namespace Grid; | ||||
| using namespace Grid::QCD; | ||||
|  | ||||
|  | ||||
| int main (int argc, char ** argv) | ||||
| { | ||||
|   Grid_init(&argc,&argv); | ||||
|  | ||||
|   std::cout <<GridLogMessage<< " main "<<std::endl; | ||||
|  | ||||
|   std::vector<int> simd_layout = GridDefaultSimd(4,vComplex::Nsimd()); | ||||
|   std::vector<int> mpi_layout  = GridDefaultMpi(); | ||||
|   //std::vector<int> latt_size  ({48,48,48,96}); | ||||
|   //std::vector<int> latt_size  ({32,32,32,32}); | ||||
|   std::vector<int> latt_size  ({16,16,16,32}); | ||||
|   std::vector<int> clatt_size  ({4,4,4,8}); | ||||
|   int orthodir=3; | ||||
|   int orthosz =latt_size[orthodir]; | ||||
|      | ||||
|   GridCartesian     Fine(latt_size,simd_layout,mpi_layout); | ||||
|   GridCartesian     Coarse(clatt_size,simd_layout,mpi_layout); | ||||
|  | ||||
|  | ||||
|   GridParallelRNG   pRNGa(&Fine); | ||||
|   GridParallelRNG   pRNGb(&Fine); | ||||
|   GridSerialRNG     sRNGa; | ||||
|   GridSerialRNG     sRNGb; | ||||
|  | ||||
|   std::cout <<GridLogMessage<< " seeding... "<<std::endl; | ||||
|   pRNGa.SeedFixedIntegers(std::vector<int>({45,12,81,9})); | ||||
|   sRNGa.SeedFixedIntegers(std::vector<int>({45,12,81,9})); | ||||
|   std::cout <<GridLogMessage<< " ...done "<<std::endl; | ||||
|  | ||||
|   LatticeGaugeField Umu(&Fine); | ||||
|   LatticeGaugeField Umu_diff(&Fine); | ||||
|   LatticeGaugeField Umu_saved(&Fine); | ||||
|  | ||||
|   std::vector<LatticeColourMatrix> U(4,&Fine); | ||||
|    | ||||
|   SU3::HotConfiguration(pRNGa,Umu); | ||||
|  | ||||
|  | ||||
|   FieldMetaData header; | ||||
|  | ||||
|   std::cout <<GridLogMessage<<"**************************************"<<std::endl; | ||||
|   std::cout <<GridLogMessage<<"** Writing out  ILDG conf    *********"<<std::endl; | ||||
|   std::cout <<GridLogMessage<<"**************************************"<<std::endl; | ||||
|   std::string file("./ckpoint_ildg.4000"); | ||||
|   IldgWriter _IldgWriter; | ||||
|   _IldgWriter.open(file); | ||||
|   _IldgWriter.writeConfiguration(Umu,4000,std::string("dummy_ildg_LFN"),std::string("dummy_config")); | ||||
|   _IldgWriter.close(); | ||||
|  | ||||
|   Umu_saved = Umu; | ||||
|   std::cout <<GridLogMessage<<"**************************************"<<std::endl; | ||||
|   std::cout <<GridLogMessage<<"** Reading back ILDG conf    *********"<<std::endl; | ||||
|   std::cout <<GridLogMessage<<"**************************************"<<std::endl; | ||||
|   IldgReader _IldgReader; | ||||
|   _IldgReader.open(file); | ||||
|   _IldgReader.readConfiguration(Umu,header); | ||||
|   _IldgReader.close(); | ||||
|   Umu_diff = Umu - Umu_saved; | ||||
|  | ||||
|   std::cout <<GridLogMessage<< "norm2 Gauge Diff = "<<norm2(Umu_diff)<<std::endl; | ||||
|  | ||||
|   Grid_finalize(); | ||||
| } | ||||
							
								
								
									
										115
									
								
								tests/IO/Test_ildg_read.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										115
									
								
								tests/IO/Test_ildg_read.cc
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,115 @@ | ||||
|     /************************************************************************************* | ||||
|  | ||||
|     Grid physics library, www.github.com/paboyle/Grid  | ||||
|  | ||||
|     Source file: ./tests/Test_nersc_io.cc | ||||
|  | ||||
|     Copyright (C) 2015 | ||||
|  | ||||
| Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk> | ||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
| Author: paboyle <paboyle@ph.ed.ac.uk> | ||||
|  | ||||
|     This program is free software; you can redistribute it and/or modify | ||||
|     it under the terms of the GNU General Public License as published by | ||||
|     the Free Software Foundation; either version 2 of the License, or | ||||
|     (at your option) any later version. | ||||
|  | ||||
|     This program is distributed in the hope that it will be useful, | ||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|     GNU General Public License for more details. | ||||
|  | ||||
|     You should have received a copy of the GNU General Public License along | ||||
|     with this program; if not, write to the Free Software Foundation, Inc., | ||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
|     *************************************************************************************/ | ||||
|     /*  END LEGAL */ | ||||
| #include <Grid/Grid.h> | ||||
|  | ||||
| using namespace std; | ||||
| using namespace Grid; | ||||
| using namespace Grid::QCD; | ||||
|  | ||||
|  | ||||
| int main (int argc, char ** argv) | ||||
| { | ||||
|   Grid_init(&argc,&argv); | ||||
|  | ||||
|  | ||||
|   std::vector<int> simd_layout = GridDefaultSimd(4,vComplex::Nsimd()); | ||||
|   std::vector<int> mpi_layout  = GridDefaultMpi(); | ||||
|   std::vector<int> latt_size = GridDefaultLatt(); | ||||
|   int orthodir=3; | ||||
|   int orthosz =latt_size[orthodir]; | ||||
|      | ||||
|   GridCartesian     Fine(latt_size,simd_layout,mpi_layout); | ||||
|  | ||||
|   LatticeGaugeField Umu(&Fine); | ||||
|   std::vector<LatticeColourMatrix> U(4,&Fine); | ||||
|    | ||||
|   FieldMetaData header; | ||||
|   std::string file("./ildg.file"); | ||||
|   IldgReader IR; | ||||
|   IR.open(file); | ||||
|   IR.readConfiguration(Umu,header); | ||||
|   IR.close(); | ||||
|  | ||||
|   for(int mu=0;mu<Nd;mu++){ | ||||
|     U[mu] = PeekIndex<LorentzIndex>(Umu,mu); | ||||
|   } | ||||
|  | ||||
|   // Painful ; fix syntactical niceness | ||||
|   LatticeComplex LinkTrace(&Fine); | ||||
|   LinkTrace=zero; | ||||
|   for(int mu=0;mu<Nd;mu++){ | ||||
|     LinkTrace = LinkTrace + trace(U[mu]); | ||||
|   } | ||||
|  | ||||
|   // (1+2+3)=6 = N(N-1)/2 terms | ||||
|   LatticeComplex Plaq(&Fine); | ||||
|  | ||||
|   Plaq = zero; | ||||
|  | ||||
|   for(int mu=1;mu<Nd;mu++){ | ||||
|     for(int nu=0;nu<mu;nu++){ | ||||
|       Plaq = Plaq + trace(U[mu]*Cshift(U[nu],mu,1)*adj(Cshift(U[mu],nu,1))*adj(U[nu])); | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   double vol = Fine.gSites(); | ||||
|   Complex PlaqScale(1.0/vol/6.0/3.0); | ||||
|   std::cout<<GridLogMessage <<"PlaqScale" << PlaqScale<<std::endl; | ||||
|  | ||||
|   std::vector<TComplex> Plaq_T(orthosz); | ||||
|   sliceSum(Plaq,Plaq_T,Nd-1); | ||||
|   int Nt = Plaq_T.size(); | ||||
|  | ||||
|   TComplex Plaq_T_sum;  | ||||
|   Plaq_T_sum=zero; | ||||
|   for(int t=0;t<Nt;t++){ | ||||
|     Plaq_T_sum = Plaq_T_sum+Plaq_T[t]; | ||||
|     Complex Pt=TensorRemove(Plaq_T[t]); | ||||
|     std::cout<<GridLogMessage << "sliced ["<<t<<"]" <<Pt*PlaqScale*Real(Nt)<<std::endl; | ||||
|   } | ||||
|  | ||||
|   { | ||||
|     Complex Pt = TensorRemove(Plaq_T_sum); | ||||
|     std::cout<<GridLogMessage << "total " <<Pt*PlaqScale<<std::endl; | ||||
|   }   | ||||
|  | ||||
|  | ||||
|   TComplex Tp = sum(Plaq); | ||||
|   Complex p  = TensorRemove(Tp); | ||||
|   std::cout<<GridLogMessage << "calculated plaquettes " <<p*PlaqScale<<std::endl; | ||||
|  | ||||
|  | ||||
|   Complex LinkTraceScale(1.0/vol/4.0/3.0); | ||||
|   TComplex Tl = sum(LinkTrace); | ||||
|   Complex l  = TensorRemove(Tl); | ||||
|   std::cout<<GridLogMessage << "calculated link trace " <<l*LinkTraceScale<<std::endl; | ||||
|  | ||||
|   Grid_finalize(); | ||||
| } | ||||
| @@ -38,10 +38,13 @@ int main (int argc, char ** argv) | ||||
| { | ||||
|   Grid_init(&argc,&argv); | ||||
|  | ||||
|   std::cout <<GridLogMessage<< " main "<<std::endl; | ||||
|  | ||||
|   std::vector<int> simd_layout = GridDefaultSimd(4,vComplex::Nsimd()); | ||||
|   std::vector<int> mpi_layout  = GridDefaultMpi(); | ||||
|   std::vector<int> latt_size  ({16,16,16,16}); | ||||
|   //std::vector<int> latt_size  ({48,48,48,96}); | ||||
|   //std::vector<int> latt_size  ({32,32,32,32}); | ||||
|   std::vector<int> latt_size  ({16,16,16,32}); | ||||
|   std::vector<int> clatt_size  ({4,4,4,8}); | ||||
|   int orthodir=3; | ||||
|   int orthosz =latt_size[orthodir]; | ||||
| @@ -49,30 +52,32 @@ int main (int argc, char ** argv) | ||||
|   GridCartesian     Fine(latt_size,simd_layout,mpi_layout); | ||||
|   GridCartesian     Coarse(clatt_size,simd_layout,mpi_layout); | ||||
|  | ||||
|  | ||||
|   GridParallelRNG   pRNGa(&Fine); | ||||
|   GridParallelRNG   pRNGb(&Fine); | ||||
|   GridSerialRNG     sRNGa; | ||||
|   GridSerialRNG     sRNGb; | ||||
|  | ||||
|   std::cout <<GridLogMessage<< " seeding... "<<std::endl; | ||||
|   pRNGa.SeedFixedIntegers(std::vector<int>({45,12,81,9})); | ||||
|   sRNGa.SeedFixedIntegers(std::vector<int>({45,12,81,9})); | ||||
|    | ||||
|   std::cout <<GridLogMessage<< " ...done "<<std::endl; | ||||
|  | ||||
|   std::string rfile("./ckpoint_rng.4000"); | ||||
|   FieldMetaData rngheader; | ||||
|   NerscIO::writeRNGState(sRNGa,pRNGa,rfile); | ||||
|   NerscField rngheader; | ||||
|   NerscIO::readRNGState (sRNGb,pRNGb,rngheader,rfile); | ||||
|  | ||||
|   LatticeComplex tmpa(&Fine); random(pRNGa,tmpa); | ||||
|   LatticeComplex tmpb(&Fine); random(pRNGb,tmpb); | ||||
|   tmpa = tmpa - tmpb; | ||||
|   std::cout << " difference between restored randoms and orig "<<norm2( tmpa ) <<" / "<< norm2(tmpb)<<std::endl; | ||||
|   std::cout <<GridLogMessage<< " difference between restored randoms and orig "<<norm2( tmpa ) <<" / "<< norm2(tmpb)<<std::endl; | ||||
|  | ||||
|   ComplexD a,b; | ||||
|  | ||||
|   random(sRNGa,a); | ||||
|   random(sRNGb,b); | ||||
|   std::cout << " serial RNG numbers "<<a<<" "<<b<<std::endl; | ||||
|  | ||||
|   std::cout <<GridLogMessage<< " serial RNG numbers "<<a<<" "<<b<<std::endl; | ||||
|  | ||||
|   LatticeGaugeField Umu(&Fine); | ||||
|   LatticeGaugeField Umu_diff(&Fine); | ||||
| @@ -80,15 +85,20 @@ int main (int argc, char ** argv) | ||||
|  | ||||
|   std::vector<LatticeColourMatrix> U(4,&Fine); | ||||
|    | ||||
|   SU3::ColdConfiguration(pRNGa,Umu); | ||||
|   SU3::HotConfiguration(pRNGa,Umu); | ||||
|  | ||||
|   NerscField header; | ||||
|   FieldMetaData header; | ||||
|   std::string file("./ckpoint_lat.4000"); | ||||
|  | ||||
|   int precision32 = 0; | ||||
|   int tworow      = 0; | ||||
|   NerscIO::writeConfiguration(Umu,file,tworow,precision32); | ||||
|   Umu_saved = Umu; | ||||
|   NerscIO::readConfiguration(Umu,header,file); | ||||
|   Umu_diff = Umu - Umu_saved; | ||||
|   //std::cout << "Umu_save "<<Umu_saved[0]<<std::endl; | ||||
|   //std::cout << "Umu_read "<<Umu[0]<<std::endl; | ||||
|   std::cout <<GridLogMessage<< "norm2 Gauge Diff = "<<norm2(Umu_diff)<<std::endl; | ||||
|  | ||||
|   for(int mu=0;mu<Nd;mu++){ | ||||
|     U[mu] = PeekIndex<LorentzIndex>(Umu,mu); | ||||
| @@ -115,7 +125,6 @@ int main (int argc, char ** argv) | ||||
| #endif | ||||
|   double vol = Fine.gSites(); | ||||
|   Complex PlaqScale(1.0/vol/6.0/3.0); | ||||
|   std::cout<<GridLogMessage <<"PlaqScale" << PlaqScale<<std::endl; | ||||
|  | ||||
|   std::vector<TComplex> Plaq_T(orthosz); | ||||
|   sliceSum(Plaq,Plaq_T,Nd-1); | ||||
| @@ -139,7 +148,6 @@ int main (int argc, char ** argv) | ||||
|   Complex p  = TensorRemove(Tp); | ||||
|   std::cout<<GridLogMessage << "calculated plaquettes " <<p*PlaqScale<<std::endl; | ||||
|  | ||||
|  | ||||
|   Complex LinkTraceScale(1.0/vol/4.0/3.0); | ||||
|   TComplex Tl = sum(LinkTrace); | ||||
|   Complex l  = TensorRemove(Tl); | ||||
|   | ||||
| @@ -50,7 +50,7 @@ int main (int argc, char ** argv) | ||||
|   LatticeGaugeField Umu(&Fine); | ||||
|   std::vector<LatticeColourMatrix> U(4,&Fine); | ||||
|    | ||||
|   NerscField header; | ||||
|   FieldMetaData header; | ||||
|   std::string file("./ckpoint_lat"); | ||||
|   NerscIO::readConfiguration(Umu,header,file); | ||||
|  | ||||
|   | ||||
| @@ -31,6 +31,7 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
|  | ||||
|  | ||||
| using namespace Grid; | ||||
| using namespace Grid::QCD; | ||||
|  | ||||
| GRID_SERIALIZABLE_ENUM(myenum, undef, red, 1, blue, 2, green, 3); | ||||
|    | ||||
| @@ -44,8 +45,8 @@ public: | ||||
|                           double, y, | ||||
|                           bool , b, | ||||
|                           std::vector<double>, array, | ||||
|                           std::vector<std::vector<double>>, twodimarray, | ||||
|                           std::vector<std::vector<std::vector<Complex>>>, cmplx3darray | ||||
|                           std::vector<std::vector<double> >, twodimarray, | ||||
|                           std::vector<std::vector<std::vector<Complex> > >, cmplx3darray | ||||
|                           ); | ||||
|   myclass() {} | ||||
|   myclass(int i) | ||||
| @@ -237,7 +238,7 @@ int main(int argc,char **argv) | ||||
|     std::cout << "Loaded (JSON) -----------------" << std::endl; | ||||
|     std::cout << jcopy1 << std::endl << jveccopy1 << std::endl; | ||||
|   } | ||||
|  | ||||
|    | ||||
| /*  | ||||
|   // This is still work in progress | ||||
|   { | ||||
|   | ||||
							
								
								
									
										110
									
								
								tests/Test_dwf_mixedcg_prec_halfcomms.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										110
									
								
								tests/Test_dwf_mixedcg_prec_halfcomms.cc
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,110 @@ | ||||
|     /************************************************************************************* | ||||
|  | ||||
|     Grid physics library, www.github.com/paboyle/Grid  | ||||
|  | ||||
|     Source file: ./tests/Test_dwf_cg_prec.cc | ||||
|  | ||||
|     Copyright (C) 2015 | ||||
|  | ||||
| Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
|  | ||||
|     This program is free software; you can redistribute it and/or modify | ||||
|     it under the terms of the GNU General Public License as published by | ||||
|     the Free Software Foundation; either version 2 of the License, or | ||||
|     (at your option) any later version. | ||||
|  | ||||
|     This program is distributed in the hope that it will be useful, | ||||
|     but WITHOUT ANY WARRANTY; without even the implied warranty of | ||||
|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | ||||
|     GNU General Public License for more details. | ||||
|  | ||||
|     You should have received a copy of the GNU General Public License along | ||||
|     with this program; if not, write to the Free Software Foundation, Inc., | ||||
|     51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | ||||
|  | ||||
|     See the full license in the file "LICENSE" in the top level distribution directory | ||||
|     *************************************************************************************/ | ||||
|     /*  END LEGAL */ | ||||
| #include <Grid/Grid.h> | ||||
|  | ||||
| using namespace std; | ||||
| using namespace Grid; | ||||
| using namespace Grid::QCD; | ||||
|  | ||||
| template<class d> | ||||
| struct scal { | ||||
|   d internal; | ||||
| }; | ||||
|  | ||||
|   Gamma::Algebra Gmu [] = { | ||||
|     Gamma::Algebra::GammaX, | ||||
|     Gamma::Algebra::GammaY, | ||||
|     Gamma::Algebra::GammaZ, | ||||
|     Gamma::Algebra::GammaT | ||||
|   }; | ||||
|  | ||||
| int main (int argc, char ** argv) | ||||
| { | ||||
|   Grid_init(&argc,&argv); | ||||
|  | ||||
|   const int Ls=24; | ||||
|  | ||||
|   GridCartesian         * UGrid   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexD::Nsimd()),GridDefaultMpi()); | ||||
|   GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid); | ||||
|   GridCartesian         * FGrid   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid); | ||||
|   GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid); | ||||
|  | ||||
|   GridCartesian         * UGrid_f   = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexF::Nsimd()),GridDefaultMpi()); | ||||
|   GridRedBlackCartesian * UrbGrid_f = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid_f); | ||||
|   GridCartesian         * FGrid_f   = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid_f); | ||||
|   GridRedBlackCartesian * FrbGrid_f = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid_f); | ||||
|    | ||||
|   std::vector<int> seeds4({1,2,3,4}); | ||||
|   std::vector<int> seeds5({5,6,7,8}); | ||||
|   GridParallelRNG          RNG5(FGrid);  RNG5.SeedFixedIntegers(seeds5); | ||||
|   GridParallelRNG          RNG4(UGrid);  RNG4.SeedFixedIntegers(seeds4); | ||||
|  | ||||
|   LatticeFermionD    src(FGrid); random(RNG5,src); | ||||
|   LatticeFermionD result(FGrid); result=zero; | ||||
|   LatticeGaugeFieldD Umu(UGrid); | ||||
|   LatticeGaugeFieldF Umu_f(UGrid_f);  | ||||
|    | ||||
|   SU3::HotConfiguration(RNG4,Umu); | ||||
|  | ||||
|   precisionChange(Umu_f,Umu); | ||||
|    | ||||
|   RealD mass=0.1; | ||||
|   RealD M5=1.8; | ||||
|   DomainWallFermionD Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5); | ||||
|   DomainWallFermionFH Ddwf_f(Umu_f,*FGrid_f,*FrbGrid_f,*UGrid_f,*UrbGrid_f,mass,M5); | ||||
|  | ||||
|  | ||||
|   LatticeFermionD    src_o(FrbGrid); | ||||
|   LatticeFermionD result_o(FrbGrid); | ||||
|   LatticeFermionD result_o_2(FrbGrid); | ||||
|   pickCheckerboard(Odd,src_o,src); | ||||
|   result_o.checkerboard = Odd; | ||||
|   result_o = zero; | ||||
|   result_o_2.checkerboard = Odd; | ||||
|   result_o_2 = zero; | ||||
|  | ||||
|   SchurDiagMooeeOperator<DomainWallFermionD,LatticeFermionD> HermOpEO(Ddwf); | ||||
|   SchurDiagMooeeOperator<DomainWallFermionFH,LatticeFermionF> HermOpEO_f(Ddwf_f); | ||||
|  | ||||
|   std::cout << "Starting mixed CG" << std::endl; | ||||
|   MixedPrecisionConjugateGradient<LatticeFermionD,LatticeFermionF> mCG(1.0e-8, 10000, 50, FrbGrid_f, HermOpEO_f, HermOpEO); | ||||
|   mCG.InnerTolerance = 3.0e-5; | ||||
|   mCG(src_o,result_o); | ||||
|  | ||||
|   std::cout << "Starting regular CG" << std::endl; | ||||
|   ConjugateGradient<LatticeFermionD> CG(1.0e-8,10000); | ||||
|   CG(HermOpEO,src_o,result_o_2); | ||||
|  | ||||
|   LatticeFermionD diff_o(FrbGrid); | ||||
|   RealD diff = axpy_norm(diff_o, -1.0, result_o, result_o_2); | ||||
|  | ||||
|   std::cout << "Diff between mixed and regular CG: " << diff << std::endl; | ||||
|  | ||||
|    | ||||
|   Grid_finalize(); | ||||
| } | ||||
| @@ -73,7 +73,7 @@ int main (int argc, char ** argv) | ||||
|  | ||||
|   std::vector<LatticeColourMatrix> U(4,&Fine); | ||||
|    | ||||
|   NerscField header; | ||||
|   FieldMetaData header; | ||||
|    | ||||
|   std::string file("./ckpoint_lat.4000"); | ||||
|   NerscIO::readConfiguration(Umu,header,file); | ||||
|   | ||||
| @@ -90,7 +90,7 @@ int main (int argc, char ** argv) | ||||
|  | ||||
|   std::vector<LatticeColourMatrix> U(4,&Fine); | ||||
|    | ||||
|   NerscField header; | ||||
|   FieldMetaData header; | ||||
|    | ||||
|   std::string file("./ckpoint_lat.4000"); | ||||
|   NerscIO::readConfiguration(Umu,header,file); | ||||
|   | ||||
| @@ -28,212 +28,6 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk> | ||||
|     /*  END LEGAL */ | ||||
| #include <Grid/Grid.h> | ||||
|  | ||||
| using namespace Grid; | ||||
| using namespace Grid::QCD; | ||||
|  | ||||
| template <class Gimpl>  | ||||
| class FourierAcceleratedGaugeFixer  : public Gimpl { | ||||
|   public: | ||||
|   INHERIT_GIMPL_TYPES(Gimpl); | ||||
|  | ||||
|   typedef typename Gimpl::GaugeLinkField GaugeMat; | ||||
|   typedef typename Gimpl::GaugeField GaugeLorentz; | ||||
|  | ||||
|   static void GaugeLinkToLieAlgebraField(const std::vector<GaugeMat> &U,std::vector<GaugeMat> &A) { | ||||
|     for(int mu=0;mu<Nd;mu++){ | ||||
| //      ImplComplex cmi(0.0,-1.0); | ||||
|       Complex cmi(0.0,-1.0); | ||||
|       A[mu] = Ta(U[mu]) * cmi; | ||||
|     } | ||||
|   } | ||||
|   static void DmuAmu(const std::vector<GaugeMat> &A,GaugeMat &dmuAmu) { | ||||
|     dmuAmu=zero; | ||||
|     for(int mu=0;mu<Nd;mu++){ | ||||
|       dmuAmu = dmuAmu + A[mu] - Cshift(A[mu],mu,-1); | ||||
|     } | ||||
|   }   | ||||
|   static void SteepestDescentGaugeFix(GaugeLorentz &Umu,Real & alpha,int maxiter,Real Omega_tol, Real Phi_tol) { | ||||
|     GridBase *grid = Umu._grid; | ||||
|  | ||||
|     Real org_plaq      =WilsonLoops<Gimpl>::avgPlaquette(Umu); | ||||
|     Real org_link_trace=WilsonLoops<Gimpl>::linkTrace(Umu);  | ||||
|     Real old_trace = org_link_trace; | ||||
|     Real trG; | ||||
|  | ||||
|     std::vector<GaugeMat> U(Nd,grid); | ||||
|                  GaugeMat dmuAmu(grid); | ||||
|  | ||||
|     for(int i=0;i<maxiter;i++){ | ||||
|       for(int mu=0;mu<Nd;mu++) U[mu]= PeekIndex<LorentzIndex>(Umu,mu); | ||||
|       //trG = SteepestDescentStep(U,alpha,dmuAmu); | ||||
|       trG = FourierAccelSteepestDescentStep(U,alpha,dmuAmu); | ||||
|       for(int mu=0;mu<Nd;mu++) PokeIndex<LorentzIndex>(Umu,U[mu],mu); | ||||
|       // Monitor progress and convergence test  | ||||
|       // infrequently to minimise cost overhead | ||||
|       if ( i %20 == 0 ) {  | ||||
| 	Real plaq      =WilsonLoops<Gimpl>::avgPlaquette(Umu); | ||||
| 	Real link_trace=WilsonLoops<Gimpl>::linkTrace(Umu);  | ||||
|  | ||||
| 	std::cout << GridLogMessage << " Iteration "<<i<< " plaq= "<<plaq<< " dmuAmu " << norm2(dmuAmu)<< std::endl; | ||||
| 	 | ||||
| 	Real Phi  = 1.0 - old_trace / link_trace ; | ||||
| 	Real Omega= 1.0 - trG; | ||||
|  | ||||
|  | ||||
| 	std::cout << GridLogMessage << " Iteration "<<i<< " Phi= "<<Phi<< " Omega= " << Omega<< " trG " << trG <<std::endl; | ||||
| 	if ( (Omega < Omega_tol) && ( ::fabs(Phi) < Phi_tol) ) { | ||||
| 	  std::cout << GridLogMessage << "Converged ! "<<std::endl; | ||||
| 	  return; | ||||
| 	} | ||||
|  | ||||
| 	old_trace = link_trace; | ||||
|  | ||||
|       } | ||||
|     } | ||||
|   }; | ||||
|   static Real SteepestDescentStep(std::vector<GaugeMat> &U,Real & alpha, GaugeMat & dmuAmu) { | ||||
|     GridBase *grid = U[0]._grid; | ||||
|  | ||||
|     std::vector<GaugeMat> A(Nd,grid); | ||||
|     GaugeMat g(grid); | ||||
|  | ||||
|     GaugeLinkToLieAlgebraField(U,A); | ||||
|     ExpiAlphaDmuAmu(A,g,alpha,dmuAmu); | ||||
|  | ||||
|  | ||||
|     Real vol = grid->gSites(); | ||||
|     Real trG = TensorRemove(sum(trace(g))).real()/vol/Nc; | ||||
|  | ||||
|     SU<Nc>::GaugeTransform(U,g); | ||||
|  | ||||
|     return trG; | ||||
|   } | ||||
|  | ||||
|   static Real FourierAccelSteepestDescentStep(std::vector<GaugeMat> &U,Real & alpha, GaugeMat & dmuAmu) { | ||||
|  | ||||
|     GridBase *grid = U[0]._grid; | ||||
|  | ||||
|     Real vol = grid->gSites(); | ||||
|  | ||||
|     FFT theFFT((GridCartesian *)grid); | ||||
|  | ||||
|     LatticeComplex  Fp(grid); | ||||
|     LatticeComplex  psq(grid); psq=zero; | ||||
|     LatticeComplex  pmu(grid);  | ||||
|     LatticeComplex   one(grid); one = Complex(1.0,0.0); | ||||
|  | ||||
|     GaugeMat g(grid); | ||||
|     GaugeMat dmuAmu_p(grid); | ||||
|     std::vector<GaugeMat> A(Nd,grid); | ||||
|  | ||||
|     GaugeLinkToLieAlgebraField(U,A); | ||||
|  | ||||
|     DmuAmu(A,dmuAmu); | ||||
|  | ||||
|     theFFT.FFT_all_dim(dmuAmu_p,dmuAmu,FFT::forward); | ||||
|  | ||||
|     ////////////////////////////////// | ||||
|     // Work out Fp = psq_max/ psq... | ||||
|     ////////////////////////////////// | ||||
|     std::vector<int> latt_size = grid->GlobalDimensions(); | ||||
|     std::vector<int> coor(grid->_ndimension,0); | ||||
|     for(int mu=0;mu<Nd;mu++) { | ||||
|  | ||||
|       Real TwoPiL =  M_PI * 2.0/ latt_size[mu]; | ||||
|       LatticeCoordinate(pmu,mu); | ||||
|       pmu = TwoPiL * pmu ; | ||||
|       psq = psq + 4.0*sin(pmu*0.5)*sin(pmu*0.5);  | ||||
|     } | ||||
|  | ||||
|     Complex psqMax(16.0); | ||||
|     Fp =  psqMax*one/psq; | ||||
|  | ||||
|     /* | ||||
|     static int once; | ||||
|     if ( once == 0 ) {  | ||||
|       std::cout << " Fp " << Fp <<std::endl; | ||||
|       once ++; | ||||
|       }*/ | ||||
|  | ||||
|     pokeSite(TComplex(1.0),Fp,coor); | ||||
|  | ||||
|     dmuAmu_p  = dmuAmu_p * Fp;  | ||||
|  | ||||
|     theFFT.FFT_all_dim(dmuAmu,dmuAmu_p,FFT::backward); | ||||
|  | ||||
|     GaugeMat ciadmam(grid); | ||||
|     Complex cialpha(0.0,-alpha); | ||||
|     ciadmam = dmuAmu*cialpha; | ||||
|     SU<Nc>::taExp(ciadmam,g); | ||||
|  | ||||
|     Real trG = TensorRemove(sum(trace(g))).real()/vol/Nc; | ||||
|  | ||||
|     SU<Nc>::GaugeTransform(U,g); | ||||
|  | ||||
|     return trG; | ||||
|   } | ||||
|  | ||||
|   static void ExpiAlphaDmuAmu(const std::vector<GaugeMat> &A,GaugeMat &g,Real & alpha, GaugeMat &dmuAmu) { | ||||
|     GridBase *grid = g._grid; | ||||
|     Complex cialpha(0.0,-alpha); | ||||
|     GaugeMat ciadmam(grid); | ||||
|     DmuAmu(A,dmuAmu); | ||||
|     ciadmam = dmuAmu*cialpha; | ||||
|     SU<Nc>::taExp(ciadmam,g); | ||||
|   }   | ||||
| /* | ||||
|   //////////////////////////////////////////////////////////////// | ||||
|   // NB The FT for fields living on links has an extra phase in it | ||||
|   // Could add these to the FFT class as a later task since this code | ||||
|   // might be reused elsewhere ???? | ||||
|   //////////////////////////////////////////////////////////////// | ||||
|   static void InverseFourierTransformAmu(FFT &theFFT,const std::vector<GaugeMat> &Ap,std::vector<GaugeMat> &Ax) { | ||||
|     GridBase * grid = theFFT.Grid(); | ||||
|     std::vector<int> latt_size = grid->GlobalDimensions(); | ||||
|  | ||||
|     ComplexField  pmu(grid); | ||||
|     ComplexField  pha(grid); | ||||
|     GaugeMat      Apha(grid); | ||||
|  | ||||
|     Complex ci(0.0,1.0); | ||||
|  | ||||
|     for(int mu=0;mu<Nd;mu++){ | ||||
|  | ||||
|       Real TwoPiL =  M_PI * 2.0/ latt_size[mu]; | ||||
|       LatticeCoordinate(pmu,mu); | ||||
|       pmu = TwoPiL * pmu ; | ||||
|       pha = exp(pmu *  (0.5 *ci)); // e(ipmu/2) since Amu(x+mu/2) | ||||
|  | ||||
|       Apha = Ap[mu] * pha; | ||||
|  | ||||
|       theFFT.FFT_all_dim(Apha,Ax[mu],FFT::backward); | ||||
|     } | ||||
|   } | ||||
|   static void FourierTransformAmu(FFT & theFFT,const std::vector<GaugeMat> &Ax,std::vector<GaugeMat> &Ap) { | ||||
|     GridBase * grid = theFFT.Grid(); | ||||
|     std::vector<int> latt_size = grid->GlobalDimensions(); | ||||
|  | ||||
|     ComplexField  pmu(grid); | ||||
|     ComplexField  pha(grid); | ||||
|     Complex ci(0.0,1.0); | ||||
|      | ||||
|     // Sign convention for FFTW calls: | ||||
|     // A(x)= Sum_p e^ipx A(p) / V | ||||
|     // A(p)= Sum_p e^-ipx A(x) | ||||
|  | ||||
|     for(int mu=0;mu<Nd;mu++){ | ||||
|       Real TwoPiL =  M_PI * 2.0/ latt_size[mu]; | ||||
|       LatticeCoordinate(pmu,mu); | ||||
|       pmu = TwoPiL * pmu ; | ||||
|       pha = exp(-pmu *  (0.5 *ci)); // e(+ipmu/2) since Amu(x+mu/2) | ||||
|  | ||||
|       theFFT.FFT_all_dim(Ax[mu],Ap[mu],FFT::backward); | ||||
|       Ap[mu] = Ap[mu] * pha; | ||||
|     } | ||||
|   } | ||||
| */ | ||||
| }; | ||||
|  | ||||
| int main (int argc, char ** argv) | ||||
| { | ||||
|   std::vector<int> seeds({1,2,3,4}); | ||||
| @@ -264,22 +58,24 @@ int main (int argc, char ** argv) | ||||
|   std::cout<< "*****************************************************************" <<std::endl; | ||||
|  | ||||
|   LatticeGaugeField   Umu(&GRID); | ||||
|   LatticeGaugeField   Urnd(&GRID); | ||||
|   LatticeGaugeField   Uorg(&GRID); | ||||
|   LatticeColourMatrix   g(&GRID); // Gauge xform | ||||
|  | ||||
|    | ||||
|   SU3::ColdConfiguration(pRNG,Umu); // Unit gauge | ||||
|   Uorg=Umu; | ||||
|   Urnd=Umu; | ||||
|  | ||||
|   SU3::RandomGaugeTransform(pRNG,Urnd,g); // Unit gauge | ||||
|  | ||||
|   SU3::RandomGaugeTransform(pRNG,Umu,g); // Unit gauge | ||||
|   Real plaq=WilsonLoops<PeriodicGimplR>::avgPlaquette(Umu); | ||||
|   std::cout << " Initial plaquette "<<plaq << std::endl; | ||||
|  | ||||
|  | ||||
|  | ||||
|   Real alpha=0.1; | ||||
|   FourierAcceleratedGaugeFixer<PeriodicGimplR>::SteepestDescentGaugeFix(Umu,alpha,10000,1.0e-10, 1.0e-10); | ||||
|  | ||||
|   Umu = Urnd; | ||||
|   FourierAcceleratedGaugeFixer<PeriodicGimplR>::SteepestDescentGaugeFix(Umu,alpha,10000,1.0e-12, 1.0e-12,false); | ||||
|  | ||||
|   plaq=WilsonLoops<PeriodicGimplR>::avgPlaquette(Umu); | ||||
|   std::cout << " Final plaquette "<<plaq << std::endl; | ||||
| @@ -288,14 +84,28 @@ int main (int argc, char ** argv) | ||||
|   std::cout << " Norm Difference "<< norm2(Uorg) << std::endl; | ||||
|  | ||||
|  | ||||
|   //  std::cout<< "*****************************************************************" <<std::endl; | ||||
|   //  std::cout<< "* Testing Fourier accelerated fixing                            *" <<std::endl; | ||||
|   //  std::cout<< "*****************************************************************" <<std::endl; | ||||
|   std::cout<< "*****************************************************************" <<std::endl; | ||||
|   std::cout<< "* Testing Fourier accelerated fixing                            *" <<std::endl; | ||||
|   std::cout<< "*****************************************************************" <<std::endl; | ||||
|   Umu=Urnd; | ||||
|   FourierAcceleratedGaugeFixer<PeriodicGimplR>::SteepestDescentGaugeFix(Umu,alpha,10000,1.0e-12, 1.0e-12,true); | ||||
|  | ||||
|   //  std::cout<< "*****************************************************************" <<std::endl; | ||||
|   //  std::cout<< "* Testing non-unit configuration                                *" <<std::endl; | ||||
|   //  std::cout<< "*****************************************************************" <<std::endl; | ||||
|   plaq=WilsonLoops<PeriodicGimplR>::avgPlaquette(Umu); | ||||
|   std::cout << " Final plaquette "<<plaq << std::endl; | ||||
|  | ||||
|   std::cout<< "*****************************************************************" <<std::endl; | ||||
|   std::cout<< "* Testing non-unit configuration                                *" <<std::endl; | ||||
|   std::cout<< "*****************************************************************" <<std::endl; | ||||
|  | ||||
|   SU3::HotConfiguration(pRNG,Umu); // Unit gauge | ||||
|  | ||||
|   plaq=WilsonLoops<PeriodicGimplR>::avgPlaquette(Umu); | ||||
|   std::cout << " Initial plaquette "<<plaq << std::endl; | ||||
|  | ||||
|   FourierAcceleratedGaugeFixer<PeriodicGimplR>::SteepestDescentGaugeFix(Umu,alpha,10000,1.0e-12, 1.0e-12,true); | ||||
|  | ||||
|   plaq=WilsonLoops<PeriodicGimplR>::avgPlaquette(Umu); | ||||
|   std::cout << " Final plaquette "<<plaq << std::endl; | ||||
|  | ||||
|  | ||||
|   Grid_finalize(); | ||||
|   | ||||
| @@ -336,7 +336,7 @@ int main(int argc, char **argv) { | ||||
|  | ||||
|       std::cout << GridLogMessage << "norm cMmat : " << norm2(cMat) | ||||
|                 << std::endl; | ||||
|       cMat = expMat(cMat, ComplexD(1.0, 0.0)); | ||||
|       cMat = expMat(cMat,1.0);// ComplexD(1.0, 0.0)); | ||||
|       std::cout << GridLogMessage << "norm expMat: " << norm2(cMat) | ||||
|                 << std::endl; | ||||
|       peekSite(cm, cMat, mysite); | ||||
|   | ||||
| @@ -67,7 +67,7 @@ int main (int argc, char ** argv) | ||||
|   LatticeFermion    err(FGrid); | ||||
|   LatticeGaugeField Umu(UGrid);  | ||||
|  | ||||
|   NerscField header; | ||||
|   FieldMetaData header; | ||||
|   std::string file("./ckpoint_lat.400"); | ||||
|   NerscIO::readConfiguration(Umu,header,file); | ||||
|  | ||||
|   | ||||
| @@ -133,8 +133,8 @@ int main (int argc, char ** argv) | ||||
|   int Nconv; | ||||
|   RealD eresid = 1.0e-6; | ||||
|  | ||||
|   ImplicitlyRestartedLanczos<LatticeComplex> IRL(HermOp,X,Nk,Nm,eresid,Nit); | ||||
|   ImplicitlyRestartedLanczos<LatticeComplex> ChebyIRL(HermOp,Cheby,Nk,Nm,eresid,Nit); | ||||
|   ImplicitlyRestartedLanczos<LatticeComplex> IRL(HermOp,X,Nk,Nk,Nm,eresid,Nit); | ||||
|   ImplicitlyRestartedLanczos<LatticeComplex> ChebyIRL(HermOp,Cheby,Nk,Nk,Nm,eresid,Nit); | ||||
|  | ||||
|   LatticeComplex src(grid); gaussian(RNG,src); | ||||
|   { | ||||
|   | ||||
| @@ -139,7 +139,7 @@ int main (int argc, char ** argv) | ||||
|  | ||||
|   } | ||||
|  | ||||
|   Complex dSpred    = sum(dS); | ||||
|   ComplexD dSpred    = sum(dS); | ||||
|  | ||||
|   std::cout << GridLogMessage << " S      "<<S<<std::endl; | ||||
|   std::cout << GridLogMessage << " Sprime "<<Sprime<<std::endl; | ||||
|   | ||||
| @@ -150,7 +150,7 @@ int main (int argc, char ** argv) | ||||
|  | ||||
|   } | ||||
|  | ||||
|   Complex dSpred    = sum(dS); | ||||
|   ComplexD dSpred    = sum(dS); | ||||
|  | ||||
|   std::cout << GridLogMessage << " S      "<<S<<std::endl; | ||||
|   std::cout << GridLogMessage << " Sprime "<<Sprime<<std::endl; | ||||
|   | ||||
| @@ -194,9 +194,9 @@ int main (int argc, char ** argv) | ||||
|  | ||||
|   } | ||||
|  | ||||
|   Complex dSpred    = sum(dS); | ||||
|   Complex dSm       = sum(dSmom); | ||||
|   Complex dSm2      = sum(dSmom2); | ||||
|   ComplexD dSpred    = sum(dS); | ||||
|   ComplexD dSm       = sum(dSmom); | ||||
|   ComplexD dSm2      = sum(dSmom2); | ||||
|  | ||||
|  | ||||
|   std::cout << GridLogMessage <<"Initial mom hamiltonian is "<< Hmom <<std::endl; | ||||
|   | ||||
| @@ -113,7 +113,7 @@ int main (int argc, char ** argv) | ||||
|     dS = dS - trace(mommu*UdSdUmu)*dt*2.0; | ||||
|  | ||||
|   } | ||||
|   Complex dSpred    = sum(dS); | ||||
|   ComplexD dSpred    = sum(dS); | ||||
|  | ||||
|   std::cout << GridLogMessage << " S      "<<S<<std::endl; | ||||
|   std::cout << GridLogMessage << " Sprime "<<Sprime<<std::endl; | ||||
|   | ||||
| @@ -143,7 +143,7 @@ int main (int argc, char ** argv) | ||||
|     dS = dS+trace(mommu*forcemu)*dt; | ||||
|   } | ||||
|  | ||||
|   Complex dSpred    = sum(dS); | ||||
|   ComplexD dSpred    = sum(dS); | ||||
|  | ||||
|   // From TwoFlavourPseudoFermion: | ||||
|   ////////////////////////////////////////////////////// | ||||
|   | ||||
| @@ -143,7 +143,7 @@ int main (int argc, char ** argv) | ||||
|     dS = dS+trace(mommu*forcemu)*dt; | ||||
|   } | ||||
|  | ||||
|   Complex dSpred    = sum(dS); | ||||
|   ComplexD dSpred    = sum(dS); | ||||
|  | ||||
|   std::cout << GridLogMessage << " S      "<<S<<std::endl; | ||||
|   std::cout << GridLogMessage << " Sprime "<<Sprime<<std::endl; | ||||
|   | ||||
| @@ -128,7 +128,7 @@ int main (int argc, char ** argv) | ||||
|     dS = dS + trace(mommu*UdSdUmu)*dt*2.0; | ||||
|   } | ||||
|  | ||||
|   Complex dSpred    = sum(dS); | ||||
|   ComplexD dSpred    = sum(dS); | ||||
|  | ||||
|   std::cout << GridLogMessage << " S      "<<S<<std::endl; | ||||
|   std::cout << GridLogMessage << " Sprime "<<Sprime<<std::endl; | ||||
|   | ||||
| @@ -141,7 +141,7 @@ int main (int argc, char ** argv) | ||||
|  | ||||
|   } | ||||
|  | ||||
|   Complex dSpred    = sum(dS); | ||||
|   ComplexD dSpred    = sum(dS); | ||||
|  | ||||
|   std::cout << GridLogMessage << " -- S         "<<S<<std::endl; | ||||
|   std::cout << GridLogMessage << " -- Sprime    "<<Sprime<<std::endl; | ||||
|   | ||||
| @@ -141,7 +141,7 @@ int main (int argc, char ** argv) | ||||
|  | ||||
|   } | ||||
|  | ||||
|   Complex dSpred    = sum(dS); | ||||
|   ComplexD dSpred    = sum(dS); | ||||
|  | ||||
|   std::cout << GridLogMessage << " S      "<<S<<std::endl; | ||||
|   std::cout << GridLogMessage << " Sprime "<<Sprime<<std::endl; | ||||
|   | ||||
| @@ -112,7 +112,7 @@ int main (int argc, char ** argv) | ||||
|     dS = dS - trace(mommu*UdSdUmu)*dt*2.0; | ||||
|  | ||||
|   } | ||||
|   Complex dSpred    = sum(dS); | ||||
|   ComplexD dSpred    = sum(dS); | ||||
|  | ||||
|   std::cout << GridLogMessage << " S      "<<S<<std::endl; | ||||
|   std::cout << GridLogMessage << " Sprime "<<Sprime<<std::endl; | ||||
|   | ||||
| @@ -178,9 +178,9 @@ int main (int argc, char ** argv) | ||||
|  | ||||
|   } | ||||
|  | ||||
|   Complex dSpred    = sum(dS); | ||||
|   Complex dSm       = sum(dSmom); | ||||
|   Complex dSm2      = sum(dSmom2); | ||||
|   ComplexD dSpred    = sum(dS); | ||||
|   ComplexD dSm       = sum(dSmom); | ||||
|   ComplexD dSm2      = sum(dSmom2); | ||||
|  | ||||
|  | ||||
|   std::cout << GridLogMessage <<"Initial mom hamiltonian is "<< Hmom <<std::endl; | ||||
|   | ||||
| @@ -155,7 +155,7 @@ int main (int argc, char ** argv) | ||||
|  | ||||
|   } | ||||
|  | ||||
|   Complex dSpred    = sum(dS); | ||||
|   ComplexD dSpred    = sum(dS); | ||||
|  | ||||
|   std::cout << GridLogMessage << " S      "<<S<<std::endl; | ||||
|   std::cout << GridLogMessage << " Sprime "<<Sprime<<std::endl; | ||||
|   | ||||
| @@ -61,6 +61,10 @@ int main(int argc, char *argv[]) | ||||
|      | ||||
|     // gauge field | ||||
|     application.createModule<MGauge::Unit>("gauge"); | ||||
|      | ||||
|     // set fermion boundary conditions to be periodic space, antiperiodic time. | ||||
|     std::string boundary = "1 1 1 -1"; | ||||
|  | ||||
|     for (unsigned int i = 0; i < flavour.size(); ++i) | ||||
|     { | ||||
|         // actions | ||||
| @@ -69,6 +73,7 @@ int main(int argc, char *argv[]) | ||||
|         actionPar.Ls    = 12; | ||||
|         actionPar.M5    = 1.8; | ||||
|         actionPar.mass  = mass[i]; | ||||
|         actionPar.boundary = boundary; | ||||
|         application.createModule<MAction::DWF>("DWF_" + flavour[i], actionPar); | ||||
|          | ||||
|         // solvers | ||||
|   | ||||
| @@ -98,6 +98,10 @@ int main(int argc, char *argv[]) | ||||
|         gaugePar.file = configStem; | ||||
|         application.createModule<MGauge::Load>("gauge", gaugePar); | ||||
|     } | ||||
|      | ||||
|     // set fermion boundary conditions to be periodic space, antiperiodic time. | ||||
|     std::string boundary = "1 1 1 -1"; | ||||
|  | ||||
|     for (unsigned int i = 0; i < flavour.size(); ++i) | ||||
|     { | ||||
|         // actions | ||||
| @@ -106,6 +110,7 @@ int main(int argc, char *argv[]) | ||||
|         actionPar.Ls    = 16; | ||||
|         actionPar.M5    = 1.8; | ||||
|         actionPar.mass  = mass[i]; | ||||
|         actionPar.boundary = boundary; | ||||
|         application.createModule<MAction::DWF>("DWF_" + flavour[i], actionPar); | ||||
|  | ||||
|         // solvers | ||||
|   | ||||
| @@ -63,6 +63,10 @@ int main(int argc, char *argv[]) | ||||
|     MSource::Point::Par ptPar; | ||||
|     ptPar.position = "0 0 0 0"; | ||||
|     application.createModule<MSource::Point>("pt", ptPar); | ||||
|      | ||||
|     // set fermion boundary conditions to be periodic space, antiperiodic time. | ||||
|     std::string boundary = "1 1 1 -1"; | ||||
|  | ||||
|     for (unsigned int i = 0; i < flavour.size(); ++i) | ||||
|     { | ||||
|         // actions | ||||
| @@ -71,6 +75,7 @@ int main(int argc, char *argv[]) | ||||
|         actionPar.Ls    = 12; | ||||
|         actionPar.M5    = 1.8; | ||||
|         actionPar.mass  = mass[i]; | ||||
|         actionPar.boundary = boundary; | ||||
|         application.createModule<MAction::DWF>("DWF_" + flavour[i], actionPar); | ||||
|          | ||||
|         // solvers | ||||
|   | ||||
| @@ -28,6 +28,38 @@ directory | ||||
| /*  END LEGAL */ | ||||
| #include <Grid/Grid.h> | ||||
|  | ||||
| namespace Grid{ | ||||
|   struct WFParameters: Serializable { | ||||
|     GRID_SERIALIZABLE_CLASS_MEMBERS(WFParameters, | ||||
|             int, steps, | ||||
|             double, step_size, | ||||
|             int, meas_interval, | ||||
|             double, maxTau); // for the adaptive algorithm | ||||
|         | ||||
|  | ||||
|     template <class ReaderClass > | ||||
|     WFParameters(Reader<ReaderClass>& Reader){ | ||||
|       read(Reader, "WilsonFlow", *this); | ||||
|     } | ||||
|  | ||||
|   }; | ||||
|  | ||||
|   struct ConfParameters: Serializable { | ||||
|     GRID_SERIALIZABLE_CLASS_MEMBERS(ConfParameters, | ||||
|            std::string, conf_prefix, | ||||
|             std::string, rng_prefix, | ||||
| 				    int, StartConfiguration, | ||||
| 				    int, EndConfiguration, | ||||
|             int, Skip); | ||||
|    | ||||
|     template <class ReaderClass > | ||||
|     ConfParameters(Reader<ReaderClass>& Reader){ | ||||
|       read(Reader, "Configurations", *this); | ||||
|     } | ||||
|  | ||||
|   }; | ||||
| } | ||||
|  | ||||
| int main(int argc, char **argv) { | ||||
|   using namespace Grid; | ||||
|   using namespace Grid::QCD; | ||||
| @@ -42,22 +74,38 @@ int main(int argc, char **argv) { | ||||
|   GridRedBlackCartesian     RBGrid(latt_size, simd_layout, mpi_layout); | ||||
|  | ||||
|   std::vector<int> seeds({1, 2, 3, 4, 5}); | ||||
|   GridSerialRNG sRNG; | ||||
|   GridParallelRNG pRNG(&Grid); | ||||
|   pRNG.SeedFixedIntegers(seeds); | ||||
|  | ||||
|   LatticeGaugeField Umu(&Grid), Uflow(&Grid); | ||||
|   SU<Nc>::HotConfiguration(pRNG, Umu); | ||||
|    | ||||
|   typedef Grid::JSONReader       Serialiser; | ||||
|   Serialiser Reader("input.json"); | ||||
|   WFParameters WFPar(Reader); | ||||
|   ConfParameters CPar(Reader); | ||||
|   CheckpointerParameters CPPar(CPar.conf_prefix, CPar.rng_prefix); | ||||
|   BinaryHmcCheckpointer<PeriodicGimplR> CPBin(CPPar); | ||||
|  | ||||
|   for (int conf = CPar.StartConfiguration; conf <= CPar.EndConfiguration; conf+= CPar.Skip){ | ||||
|  | ||||
|   CPBin.CheckpointRestore(conf, Umu, sRNG, pRNG); | ||||
|  | ||||
|   std::cout << std::setprecision(15); | ||||
|   std::cout << GridLogMessage << "Plaquette: " | ||||
|   std::cout << GridLogMessage << "Initial plaquette: " | ||||
|     << WilsonLoops<PeriodicGimplR>::avgPlaquette(Umu) << std::endl; | ||||
|  | ||||
|   WilsonFlow<PeriodicGimplR> WF(200, 0.01); | ||||
|   WilsonFlow<PeriodicGimplR> WF(WFPar.steps, WFPar.step_size, WFPar.meas_interval); | ||||
|  | ||||
|   WF.smear(Uflow, Umu); | ||||
|   WF.smear_adaptive(Uflow, Umu, WFPar.maxTau); | ||||
|  | ||||
|   RealD WFlow_plaq = WilsonLoops<PeriodicGimplR>::avgPlaquette(Uflow); | ||||
|   std::cout << GridLogMessage << "Plaquette: "<< WFlow_plaq << std::endl; | ||||
|   RealD WFlow_TC   = WilsonLoops<PeriodicGimplR>::TopologicalCharge(Uflow); | ||||
|   RealD WFlow_T0   = WF.energyDensityPlaquette(Uflow); | ||||
|   std::cout << GridLogMessage << "Plaquette          "<< conf << "   " << WFlow_plaq << std::endl; | ||||
|   std::cout << GridLogMessage << "T0                 "<< conf << "   " << WFlow_T0 << std::endl; | ||||
|   std::cout << GridLogMessage << "TopologicalCharge  "<< conf << "   " << WFlow_TC   << std::endl; | ||||
|  | ||||
|   std::cout<< GridLogMessage << " Admissibility check:\n"; | ||||
|   const double sp_adm = 0.067;                // admissible threshold | ||||
| @@ -73,6 +121,32 @@ int main(int argc, char **argv) { | ||||
|   std::cout<< GridLogMessage << "   (sp_admissible = "<< sp_adm <<")\n"; | ||||
|   //std::cout<< GridLogMessage << "   sp_admissible - sp_max = "<<sp_adm-sp_max <<"\n"; | ||||
|   std::cout<< GridLogMessage << "   sp_admissible - sp_ave = "<<sp_adm-sp_ave <<"\n"; | ||||
|  | ||||
|   } | ||||
|   Grid_finalize(); | ||||
| }  // main | ||||
|  | ||||
|  | ||||
| /* | ||||
| Input file example | ||||
|  | ||||
|  | ||||
| JSON | ||||
|  | ||||
| { | ||||
|     "WilsonFlow":{ | ||||
| 	"steps": 200, | ||||
| 	"step_size": 0.01, | ||||
| 	"meas_interval": 50, | ||||
|   "maxTau": 2.0 | ||||
|     }, | ||||
|     "Configurations":{ | ||||
| 	"conf_prefix": "ckpoint_lat", | ||||
| 	"rng_prefix": "ckpoint_rng", | ||||
| 	"StartConfiguration": 3000, | ||||
| 	"EndConfiguration": 3000, | ||||
| 	"Skip": 5 | ||||
|     } | ||||
| } | ||||
|  | ||||
|  | ||||
| */ | ||||
| @@ -516,7 +516,7 @@ int main (int argc, char ** argv) | ||||
|   LatticeColourMatrix U(UGrid); | ||||
|   LatticeColourMatrix zz(UGrid); | ||||
|  | ||||
|   NerscField header; | ||||
|   FieldMetaData header; | ||||
|   std::string file("./ckpoint_lat.4000"); | ||||
|   NerscIO::readConfiguration(Umu,header,file); | ||||
|  | ||||
|   | ||||
| @@ -54,7 +54,7 @@ int main (int argc, char ** argv) | ||||
|   GridParallelRNG          RNG5rb(FrbGrid);  RNG5.SeedFixedIntegers(seeds5); | ||||
|  | ||||
|   LatticeGaugeField Umu(UGrid);  | ||||
|   SU3::TepidConfiguration(RNG4, Umu); | ||||
|   SU3::HotConfiguration(RNG4, Umu); | ||||
|  | ||||
|   std::vector<LatticeColourMatrix> U(4,UGrid); | ||||
|   for(int mu=0;mu<Nd;mu++){ | ||||
| @@ -92,16 +92,15 @@ int main (int argc, char ** argv) | ||||
|  | ||||
|    | ||||
|   std::vector<RealD>          eval(Nm); | ||||
|   FermionField    src(FrbGrid); gaussian(RNG5rb,src); | ||||
|   FermionField    src(FrbGrid);  | ||||
|   gaussian(RNG5rb,src); | ||||
|   std::vector<FermionField> evec(Nm,FrbGrid); | ||||
|   for(int i=0;i<1;i++){ | ||||
|     std::cout << i<<" / "<< Nm<< " grid pointer "<<evec[i]._grid<<std::endl; | ||||
|     std::cout << GridLogMessage <<i<<" / "<< Nm<< " grid pointer "<<evec[i]._grid<<std::endl; | ||||
|   }; | ||||
|  | ||||
|   int Nconv; | ||||
|   IRL.calc(eval,evec, | ||||
| 	   src, | ||||
| 	   Nconv); | ||||
|   IRL.calc(eval,evec,src,Nconv); | ||||
|  | ||||
|  | ||||
|   Grid_finalize(); | ||||
|   | ||||
| @@ -51,7 +51,7 @@ int main (int argc, char ** argv) | ||||
|   typedef typename ImprovedStaggeredFermion5DR::ComplexField ComplexField;  | ||||
|   typename ImprovedStaggeredFermion5DR::ImplParams params;  | ||||
|  | ||||
|   const int Ls=4; | ||||
|   const int Ls=8; | ||||
|  | ||||
|   Grid_init(&argc,&argv); | ||||
|  | ||||
| @@ -74,17 +74,19 @@ int main (int argc, char ** argv) | ||||
|  | ||||
|   LatticeGaugeField Umu(UGrid); SU3::HotConfiguration(pRNG,Umu); | ||||
|  | ||||
|   RealD mass=0.01; | ||||
|   RealD mass=0.003; | ||||
|   ImprovedStaggeredFermion5DR Ds(Umu,Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass); | ||||
|   MdagMLinearOperator<ImprovedStaggeredFermion5DR,FermionField> HermOp(Ds); | ||||
|  | ||||
|   ConjugateGradient<FermionField> CG(1.0e-8,10000); | ||||
|   BlockConjugateGradient<FermionField> BCG(1.0e-8,10000); | ||||
|   MultiRHSConjugateGradient<FermionField> mCG(1.0e-8,10000); | ||||
|   int blockDim = 0; | ||||
|   BlockConjugateGradient<FermionField>    BCGrQ(BlockCGrQ,blockDim,1.0e-8,10000); | ||||
|   BlockConjugateGradient<FermionField>    BCG  (BlockCG,blockDim,1.0e-8,10000); | ||||
|   BlockConjugateGradient<FermionField>    mCG  (CGmultiRHS,blockDim,1.0e-8,10000); | ||||
|  | ||||
|   std::cout << GridLogMessage << "************************************************************************ "<<std::endl; | ||||
|   std::cout << GridLogMessage << "****************************************************************** "<<std::endl; | ||||
|   std::cout << GridLogMessage << " Calling 4d CG "<<std::endl; | ||||
|   std::cout << GridLogMessage << "************************************************************************ "<<std::endl; | ||||
|   std::cout << GridLogMessage << "****************************************************************** "<<std::endl; | ||||
|   ImprovedStaggeredFermionR Ds4d(Umu,Umu,*UGrid,*UrbGrid,mass); | ||||
|   MdagMLinearOperator<ImprovedStaggeredFermionR,FermionField> HermOp4d(Ds4d); | ||||
|   FermionField src4d(UGrid); random(pRNG,src4d); | ||||
| @@ -111,7 +113,7 @@ int main (int argc, char ** argv) | ||||
|   std::cout << GridLogMessage << " Calling Block CG for "<<Ls <<" right hand sides" <<std::endl; | ||||
|   std::cout << GridLogMessage << "************************************************************************ "<<std::endl; | ||||
|   result=zero; | ||||
|   BCG(HermOp,src,result); | ||||
|   BCGrQ(HermOp,src,result); | ||||
|   std::cout << GridLogMessage << "************************************************************************ "<<std::endl; | ||||
|  | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user