1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-23 18:22:02 +01:00

Compare commits

..

30 Commits

Author SHA1 Message Date
95b640cb6b 10TF/s on 32^3 x 64 on single node 2022-08-04 15:43:52 -04:00
2cb5bedc15 Copy stream HIP improvements 2022-08-04 15:24:03 -04:00
806b02bddf Simplify dead code 2022-08-04 15:23:13 -04:00
de40395773 More timing. Think I should start to use nvtx and rocmtx ?? 2022-08-04 13:37:16 -04:00
7ba4788715 Fix 2022-08-04 13:36:44 -04:00
06d9ce1a02 Synch ranks on node here for GPU - GPU memcopy 2022-08-04 13:35:56 -04:00
75bb6b2b40 Move barrier into the StencilSend begin routine 2022-08-04 13:35:26 -04:00
74f10c2dc0 Move barrier into Stencil Send 2022-08-04 13:34:11 -04:00
a93d5459d4 Better mpi request completion 2022-07-28 12:18:35 -04:00
9c21add0c6 High res timer replaces getttimeofday 2022-07-28 12:14:03 -04:00
639aab6563 High res timer instead of gettimeofday 2022-07-28 12:13:35 -04:00
8137cc7049 Allways concurrent comms 2022-07-28 12:01:51 -04:00
60e63dca1d Add memory logging channel 2022-07-28 11:39:15 -04:00
486409574e Expanded cach to avoid any allocs in HMC 2022-07-28 11:38:34 -04:00
a913b8be12 Dslash self timing. Might want to not have this 2022-07-28 11:37:55 -04:00
2239751850 Better logging 2022-07-28 11:37:36 -04:00
9b20f1449c Better timing 2022-07-28 11:37:12 -04:00
b99453083d Updated timing 2022-07-28 11:37:02 -04:00
943fbb914d Merge branch 'feature/dirichlet' of https://github.com/paboyle/Grid into feature/dirichlet 2022-07-11 13:48:42 -04:00
ca4603580d Verbose 2022-07-11 13:48:35 -04:00
f73db8f1f3 Synch clocks 2022-07-11 13:47:39 -04:00
f7217d12d2 World barrier for clock synch 2022-07-11 13:45:31 -04:00
fab50c57d9 More loggin 2022-07-11 18:42:27 +01:00
3440534fbf MixedPrec support 2022-07-10 21:35:18 +01:00
177b1a7ec6 Mixed prec 2022-07-10 21:34:10 +01:00
58182fe345 Different approach to default dirichlet params 2022-07-10 21:32:58 +01:00
1f907d330d Different default params for dirichlet 2022-07-10 21:31:48 +01:00
b0fe664e9d Better force log info 2022-07-10 21:31:25 +01:00
c0f8482402 Remove SSC marks 2022-07-07 17:49:36 +01:00
3544965f54 Stream doesn't work 2022-07-07 17:49:20 +01:00
34 changed files with 729 additions and 3380 deletions

View File

@ -117,6 +117,7 @@ public:
GridStopWatch MatrixTimer;
GridStopWatch SolverTimer;
RealD usecs = -usecond();
SolverTimer.Start();
int k;
for (k = 1; k <= MaxIterations; k++) {
@ -166,14 +167,16 @@ public:
// Stopping condition
if (cp <= rsq) {
usecs +=usecond();
SolverTimer.Stop();
Linop.HermOpAndNorm(psi, mmp, d, qq);
p = mmp - src;
GridBase *grid = src.Grid();
RealD DwfFlops = (1452. )*grid->gSites()*4*k
+ (8+4+8+4+4)*12*grid->gSites()*k; // CG linear algebra
RealD srcnorm = std::sqrt(norm2(src));
RealD resnorm = std::sqrt(norm2(p));
RealD true_residual = resnorm / srcnorm;
std::cout << GridLogMessage << "ConjugateGradient Converged on iteration " << k
<< "\tComputed residual " << std::sqrt(cp / ssq)
<< "\tTrue residual " << true_residual
@ -187,6 +190,8 @@ public:
std::cout << GridLogMessage << "\tAxpyNorm " << AxpyNormTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tLinearComb " << LinearCombTimer.Elapsed() <<std::endl;
std::cout << GridLogMessage << "\tMobius flop rate " << DwfFlops/ usecs<< " Gflops " <<std::endl;
if (ErrorOnNoConverge) assert(true_residual / Tolerance < 10000.0);
IterationsToComplete = k;

View File

@ -146,21 +146,14 @@ public:
LinearOperatorBase<FineField> &_Linop;
RealD _coarse_relax_tol;
std::vector<FineField> &_subspace;
int _largestEvalIdxForReport; //The convergence of the LCL is based on the evals of the coarse grid operator, not those of the underlying fine grid operator
//As a result we do not know what the eval range of the fine operator is until the very end, making tuning the Cheby bounds very difficult
//To work around this issue, every restart we separately reconstruct the fine operator eval for the lowest and highest evec and print these
//out alongside the evals of the coarse operator. To do so we need to know the index of the largest eval (i.e. Nstop-1)
//NOTE: If largestEvalIdxForReport=-1 (default) then this is not performed
ImplicitlyRestartedLanczosSmoothedTester(LinearFunction<CoarseField> &Poly,
OperatorFunction<FineField> &smoother,
LinearOperatorBase<FineField> &Linop,
std::vector<FineField> &subspace,
RealD coarse_relax_tol=5.0e3,
int largestEvalIdxForReport=-1)
RealD coarse_relax_tol=5.0e3)
: _smoother(smoother), _Linop(Linop), _Poly(Poly), _subspace(subspace),
_coarse_relax_tol(coarse_relax_tol), _largestEvalIdxForReport(largestEvalIdxForReport)
_coarse_relax_tol(coarse_relax_tol)
{ };
//evalMaxApprox: approximation of largest eval of the fine Chebyshev operator (suitably wrapped by block projection)
@ -186,12 +179,6 @@ public:
<<" |H B[i] - eval[i]B[i]|^2 / evalMaxApprox^2 " << std::setw(25) << vv
<<std::endl;
if(_largestEvalIdxForReport != -1 && (j==0 || j==_largestEvalIdxForReport)){
std::cout<<GridLogIRL << "Estimating true eval of fine grid operator for eval idx " << j << std::endl;
RealD tmp_eval;
ReconstructEval(j,eresid,B,tmp_eval,1.0); //don't use evalMaxApprox of coarse operator! (cf below)
}
int conv=0;
if( (vv<eresid*eresid) ) conv = 1;
return conv;
@ -422,7 +409,7 @@ public:
//////////////////////////////////////////////////////////////////////////////////////////////////
Chebyshev<FineField> ChebySmooth(cheby_smooth); //lower order Chebyshev of fine operator on fine grid used to smooth regenerated eigenvectors
ImplicitlyRestartedLanczosSmoothedTester<Fobj,CComplex,nbasis> ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,subspace,relax,Nstop-1);
ImplicitlyRestartedLanczosSmoothedTester<Fobj,CComplex,nbasis> ChebySmoothTester(ChebyOp,ChebySmooth,_FineOp,subspace,relax);
evals_coarse.resize(Nm);
evec_coarse.resize(Nm,_CoarseGrid);

View File

@ -40,7 +40,7 @@ void MemoryManager::PrintBytes(void)
//////////////////////////////////////////////////////////////////////
MemoryManager::AllocationCacheEntry MemoryManager::Entries[MemoryManager::NallocType][MemoryManager::NallocCacheMax];
int MemoryManager::Victim[MemoryManager::NallocType];
int MemoryManager::Ncache[MemoryManager::NallocType] = { 2, 8, 2, 8, 2, 8 };
int MemoryManager::Ncache[MemoryManager::NallocType] = { 2, 8, 8, 16, 8, 16 };
uint64_t MemoryManager::CacheBytes[MemoryManager::NallocType];
//////////////////////////////////////////////////////////////////////
// Actual allocation and deallocation utils

View File

@ -3,8 +3,14 @@
#warning "Using explicit device memory copies"
NAMESPACE_BEGIN(Grid);
//#define dprintf(...) printf ( __VA_ARGS__ ); fflush(stdout);
#define dprintf(...)
#define MAXLINE 512
static char print_buffer [ MAXLINE ];
#define mprintf(...) snprintf (print_buffer,MAXLINE, __VA_ARGS__ ); std::cout << GridLogMemory << print_buffer;
//#define dprintf(...) printf (__VA_ARGS__ ); fflush(stdout);
#define dprintf(...)
////////////////////////////////////////////////////////////
@ -104,7 +110,7 @@ void MemoryManager::AccDiscard(AcceleratorViewEntry &AccCache)
///////////////////////////////////////////////////////////
assert(AccCache.state!=Empty);
dprintf("MemoryManager: Discard(%llx) %llx\n",(uint64_t)AccCache.CpuPtr,(uint64_t)AccCache.AccPtr);
mprintf("MemoryManager: Discard(%llx) %llx\n",(uint64_t)AccCache.CpuPtr,(uint64_t)AccCache.AccPtr);
assert(AccCache.accLock==0);
assert(AccCache.cpuLock==0);
assert(AccCache.CpuPtr!=(uint64_t)NULL);
@ -126,7 +132,7 @@ void MemoryManager::Evict(AcceleratorViewEntry &AccCache)
///////////////////////////////////////////////////////////////////////////
assert(AccCache.state!=Empty);
dprintf("MemoryManager: Evict(%llx) %llx\n",(uint64_t)AccCache.CpuPtr,(uint64_t)AccCache.AccPtr);
mprintf("MemoryManager: Evict(%llx) %llx\n",(uint64_t)AccCache.CpuPtr,(uint64_t)AccCache.AccPtr);
assert(AccCache.accLock==0);
assert(AccCache.cpuLock==0);
if(AccCache.state==AccDirty) {
@ -150,7 +156,7 @@ void MemoryManager::Flush(AcceleratorViewEntry &AccCache)
assert(AccCache.AccPtr!=(uint64_t)NULL);
assert(AccCache.CpuPtr!=(uint64_t)NULL);
acceleratorCopyFromDevice((void *)AccCache.AccPtr,(void *)AccCache.CpuPtr,AccCache.bytes);
dprintf("MemoryManager: Flush %llx -> %llx\n",(uint64_t)AccCache.AccPtr,(uint64_t)AccCache.CpuPtr); fflush(stdout);
mprintf("MemoryManager: Flush %llx -> %llx\n",(uint64_t)AccCache.AccPtr,(uint64_t)AccCache.CpuPtr); fflush(stdout);
DeviceToHostBytes+=AccCache.bytes;
DeviceToHostXfer++;
AccCache.state=Consistent;
@ -165,7 +171,7 @@ void MemoryManager::Clone(AcceleratorViewEntry &AccCache)
AccCache.AccPtr=(uint64_t)AcceleratorAllocate(AccCache.bytes);
DeviceBytes+=AccCache.bytes;
}
dprintf("MemoryManager: Clone %llx <- %llx\n",(uint64_t)AccCache.AccPtr,(uint64_t)AccCache.CpuPtr); fflush(stdout);
mprintf("MemoryManager: Clone %llx <- %llx\n",(uint64_t)AccCache.AccPtr,(uint64_t)AccCache.CpuPtr); fflush(stdout);
acceleratorCopyToDevice((void *)AccCache.CpuPtr,(void *)AccCache.AccPtr,AccCache.bytes);
HostToDeviceBytes+=AccCache.bytes;
HostToDeviceXfer++;

View File

@ -107,6 +107,7 @@ public:
////////////////////////////////////////////////////////////////////////////////
static int RankWorld(void) ;
static void BroadcastWorld(int root,void* data, int bytes);
static void BarrierWorld(void);
////////////////////////////////////////////////////////////
// Reduction

View File

@ -396,17 +396,17 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques
}
}
if ( CommunicatorPolicy == CommunicatorPolicySequential ) {
this->StencilSendToRecvFromComplete(list,dir);
list.resize(0);
}
/* if ( CommunicatorPolicy == CommunicatorPolicySequential ) {
* this->StencilSendToRecvFromComplete(list,dir);
* list.resize(0);
* }
*/
return off_node_bytes;
}
void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &list,int dir)
{
// std::cout << "Copy Synchronised\n"<<std::endl;
acceleratorCopySynchronise();
StencilBarrier();// Synch shared memory on a single nodes
int nreq=list.size();
@ -443,6 +443,10 @@ int CartesianCommunicator::RankWorld(void){
MPI_Comm_rank(communicator_world,&r);
return r;
}
void CartesianCommunicator::BarrierWorld(void){
int ierr = MPI_Barrier(communicator_world);
assert(ierr==0);
}
void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes)
{
int ierr= MPI_Bcast(data,

View File

@ -104,6 +104,7 @@ int CartesianCommunicator::RankWorld(void){return 0;}
void CartesianCommunicator::Barrier(void){}
void CartesianCommunicator::Broadcast(int root,void* data, int bytes) {}
void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes) { }
void CartesianCommunicator::BarrierWorld(void) { }
int CartesianCommunicator::RankFromProcessorCoor(Coordinate &coor) { return 0;}
void CartesianCommunicator::ProcessorCoorFromRank(int rank, Coordinate &coor){ coor = _processor_coor; }
void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest)

View File

@ -68,6 +68,7 @@ GridLogger GridLogMessage(1, "Message", GridLogColours, "NORMAL");
GridLogger GridLogMemory (1, "Memory", GridLogColours, "NORMAL");
GridLogger GridLogDebug (1, "Debug", GridLogColours, "PURPLE");
GridLogger GridLogPerformance(1, "Performance", GridLogColours, "GREEN");
GridLogger GridLogDslash (1, "Dslash", GridLogColours, "BLUE");
GridLogger GridLogIterative (1, "Iterative", GridLogColours, "BLUE");
GridLogger GridLogIntegrator (1, "Integrator", GridLogColours, "BLUE");
GridLogger GridLogHMC (1, "HMC", GridLogColours, "BLUE");
@ -80,6 +81,7 @@ void GridLogConfigure(std::vector<std::string> &logstreams) {
GridLogIterative.Active(0);
GridLogDebug.Active(0);
GridLogPerformance.Active(0);
GridLogDslash.Active(0);
GridLogIntegrator.Active(1);
GridLogColours.Active(0);
GridLogHMC.Active(1);
@ -91,6 +93,7 @@ void GridLogConfigure(std::vector<std::string> &logstreams) {
if (logstreams[i] == std::string("Iterative")) GridLogIterative.Active(1);
if (logstreams[i] == std::string("Debug")) GridLogDebug.Active(1);
if (logstreams[i] == std::string("Performance")) GridLogPerformance.Active(1);
if (logstreams[i] == std::string("Dslash")) GridLogDslash.Active(1);
if (logstreams[i] == std::string("NoIntegrator")) GridLogIntegrator.Active(0);
if (logstreams[i] == std::string("NoHMC")) GridLogHMC.Active(0);
if (logstreams[i] == std::string("Colours")) GridLogColours.Active(1);

View File

@ -138,7 +138,8 @@ public:
stream << std::setw(log.topWidth);
}
stream << log.topName << log.background()<< " : ";
stream << log.colour() << std::left;
// stream << log.colour() << std::left;
stream << std::left;
if (log.chanWidth > 0)
{
stream << std::setw(log.chanWidth);
@ -153,9 +154,9 @@ public:
stream << log.evidence()
<< now << log.background() << " : " ;
}
stream << log.colour();
// stream << log.colour();
stream << std::right;
stream.flags(f);
return stream;
} else {
return devnull;
@ -180,6 +181,7 @@ extern GridLogger GridLogWarning;
extern GridLogger GridLogMessage;
extern GridLogger GridLogDebug ;
extern GridLogger GridLogPerformance;
extern GridLogger GridLogDslash;
extern GridLogger GridLogIterative ;
extern GridLogger GridLogIntegrator ;
extern GridLogger GridLogHMC;

View File

@ -27,10 +27,13 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
/* END LEGAL */
#include <Grid/GridCore.h>
#include <Grid/perfmon/PerfCount.h>
#include <Grid/perfmon/Timer.h>
#include <Grid/perfmon/PerfCount.h>
NAMESPACE_BEGIN(Grid);
GridTimePoint theProgramStart = GridClock::now();
#define CacheControl(L,O,R) ((PERF_COUNT_HW_CACHE_##L)|(PERF_COUNT_HW_CACHE_OP_##O<<8)| (PERF_COUNT_HW_CACHE_RESULT_##R<<16))
#define RawConfig(A,B) (A<<8|B)
const PerformanceCounter::PerformanceCounterConfig PerformanceCounter::PerformanceCounterConfigs [] = {

View File

@ -35,17 +35,8 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
NAMESPACE_BEGIN(Grid)
// Dress the output; use std::chrono
// C++11 time facilities better?
inline double usecond(void) {
struct timeval tv;
tv.tv_sec = 0;
tv.tv_usec = 0;
gettimeofday(&tv,NULL);
return 1.0*tv.tv_usec + 1.0e6*tv.tv_sec;
}
typedef std::chrono::system_clock GridClock;
//typedef std::chrono::system_clock GridClock;
typedef std::chrono::high_resolution_clock GridClock;
typedef std::chrono::time_point<GridClock> GridTimePoint;
typedef std::chrono::seconds GridSecs;
@ -53,6 +44,15 @@ typedef std::chrono::milliseconds GridMillisecs;
typedef std::chrono::microseconds GridUsecs;
typedef std::chrono::microseconds GridTime;
extern GridTimePoint theProgramStart;
// Dress the output; use std::chrono
// C++11 time facilities better?
inline double usecond(void) {
auto usecs = std::chrono::duration_cast<GridUsecs>(GridClock::now()-theProgramStart);
return 1.0*usecs.count();
}
inline std::ostream& operator<< (std::ostream & stream, const GridSecs & time)
{
stream << time.count()<<" s";

View File

@ -42,6 +42,8 @@ public:
bool is_smeared = false;
RealD deriv_norm_sum;
RealD deriv_max_sum;
RealD Fdt_norm_sum;
RealD Fdt_max_sum;
int deriv_num;
RealD deriv_us;
RealD S_us;
@ -51,12 +53,17 @@ public:
deriv_num=0;
deriv_norm_sum = deriv_max_sum=0.0;
}
void deriv_log(RealD nrm, RealD max) { deriv_max_sum+=max; deriv_norm_sum+=nrm; deriv_num++;}
RealD deriv_max_average(void) { return deriv_max_sum/deriv_num; };
RealD deriv_norm_average(void) { return deriv_norm_sum/deriv_num; };
void deriv_log(RealD nrm, RealD max,RealD Fdt_nrm,RealD Fdt_max) {
deriv_max_sum+=max; deriv_norm_sum+=nrm;
Fdt_max_sum+=Fdt_max; Fdt_norm_sum+=Fdt_nrm; deriv_num++;
}
RealD deriv_max_average(void) { return deriv_max_sum/deriv_num; };
RealD deriv_norm_average(void) { return deriv_norm_sum/deriv_num; };
RealD Fdt_max_average(void) { return Fdt_max_sum/deriv_num; };
RealD Fdt_norm_average(void) { return Fdt_norm_sum/deriv_num; };
RealD deriv_timer(void) { return deriv_us; };
RealD S_timer(void) { return deriv_us; };
RealD refresh_timer(void) { return deriv_us; };
RealD S_timer(void) { return S_us; };
RealD refresh_timer(void) { return refresh_us; };
void deriv_timer_start(void) { deriv_us-=usecond(); }
void deriv_timer_stop(void) { deriv_us+=usecond(); }
void refresh_timer_start(void) { refresh_us-=usecond(); }

View File

@ -39,7 +39,7 @@ struct GparityWilsonImplParams {
Coordinate twists;
//mu=Nd-1 is assumed to be the time direction and a twist value of 1 indicates antiperiodic BCs
Coordinate dirichlet; // Blocksize of dirichlet BCs
GparityWilsonImplParams() : twists(Nd, 0), dirichlet(Nd, 0) {};
GparityWilsonImplParams() : twists(Nd, 0) { dirichlet.resize(0); };
};
struct WilsonImplParams {
@ -48,13 +48,13 @@ struct WilsonImplParams {
AcceleratorVector<Real,Nd> twist_n_2pi_L;
AcceleratorVector<Complex,Nd> boundary_phases;
WilsonImplParams() {
dirichlet.resize(Nd,0);
dirichlet.resize(0);
boundary_phases.resize(Nd, 1.0);
twist_n_2pi_L.resize(Nd, 0.0);
};
WilsonImplParams(const AcceleratorVector<Complex,Nd> phi) : boundary_phases(phi), overlapCommsCompute(false) {
twist_n_2pi_L.resize(Nd, 0.0);
dirichlet.resize(Nd,0);
dirichlet.resize(0);
}
};
@ -62,7 +62,7 @@ struct StaggeredImplParams {
Coordinate dirichlet; // Blocksize of dirichlet BCs
StaggeredImplParams()
{
dirichlet.resize(Nd,0);
dirichlet.resize(0);
};
};

View File

@ -400,7 +400,6 @@ public:
}
this->face_table_computed=1;
assert(this->u_comm_offset==this->_unified_buffer_size);
accelerator_barrier();
}
};

View File

@ -233,10 +233,10 @@ void WilsonFermion5D<Impl>::ImportGauge(const GaugeField &_Umu)
GaugeField HUmu(_Umu.Grid());
HUmu = _Umu*(-0.5);
if ( Dirichlet ) {
std::cout << GridLogMessage << " Dirichlet BCs 5d " <<Block<<std::endl;
std::cout << GridLogDslash << " Dirichlet BCs 5d " <<Block<<std::endl;
Coordinate GaugeBlock(Nd);
for(int d=0;d<Nd;d++) GaugeBlock[d] = Block[d+1];
std::cout << GridLogMessage << " Dirichlet BCs 4d " <<GaugeBlock<<std::endl;
std::cout << GridLogDslash << " Dirichlet BCs 4d " <<GaugeBlock<<std::endl;
DirichletFilter<GaugeField> Filter(GaugeBlock);
Filter.applyFilter(HUmu);
}
@ -382,12 +382,14 @@ void WilsonFermion5D<Impl>::DhopInternal(StencilImpl & st, LebesgueOrder &lo,
DoubledGaugeField & U,
const FermionField &in, FermionField &out,int dag)
{
DhopTotalTime-=usecond();
// std::cout << GridLogDslash<<"Dhop internal"<<std::endl;
DhopTotalTime=-usecond();
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute )
DhopInternalOverlappedComms(st,lo,U,in,out,dag);
else
DhopInternalSerialComms(st,lo,U,in,out,dag);
DhopTotalTime+=usecond();
// std::cout << GridLogDslash<<"Dhop took"<<DhopTotalTime<<std::endl;
}
@ -404,53 +406,59 @@ void WilsonFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl & st, Lebesg
/////////////////////////////
// Start comms // Gather intranode and extra node differentiated??
/////////////////////////////
DhopFaceTime-=usecond();
DhopFaceTime=-usecond();
st.HaloExchangeOptGather(in,compressor);
DhopFaceTime+=usecond();
// std::cout << GridLogDslash<< " Dhop Gather end "<< DhopFaceTime<<" us " <<std::endl;
DhopCommTime -=usecond();
DhopCommTime =-usecond();
std::vector<std::vector<CommsRequest_t> > requests;
st.CommunicateBegin(requests);
/////////////////////////////
// Overlap with comms
/////////////////////////////
DhopFaceTime-=usecond();
DhopFaceTime=-usecond();
st.CommsMergeSHM(compressor);// Could do this inside parallel region overlapped with comms
DhopFaceTime+=usecond();
// std::cout << GridLogDslash<< " Dhop Commsmerge end "<<DhopFaceTime<< " us "<<std::endl;
/////////////////////////////
// do the compute interior
/////////////////////////////
int Opt = WilsonKernelsStatic::Opt; // Why pass this. Kernels should know
DhopComputeTime-=usecond();
DhopComputeTime=-usecond();
if (dag == DaggerYes) {
Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out,1,0);
} else {
Kernels::DhopKernel (Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out,1,0);
}
DhopComputeTime+=usecond();
// std::cout << GridLogDslash<< " Dhop Compute 1 end "<< DhopComputeTime<<" us" <<std::endl;
/////////////////////////////
// Complete comms
/////////////////////////////
st.CommunicateComplete(requests);
DhopCommTime +=usecond();
// std::cout << GridLogDslash<< " Dhop Comunicate end "<< DhopCommTime << " us" <<std::endl;
/////////////////////////////
// do the compute exterior
/////////////////////////////
DhopFaceTime-=usecond();
DhopFaceTime=-usecond();
st.CommsMerge(compressor);
DhopFaceTime+=usecond();
// std::cout << GridLogDslash<< " Dhop CommsMerge2 end "<<DhopFaceTime << " us "<<std::endl;
DhopComputeTime2-=usecond();
DhopComputeTime2=-usecond();
if (dag == DaggerYes) {
Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out,0,1);
} else {
Kernels::DhopKernel (Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out,0,1);
}
DhopComputeTime2+=usecond();
// std::cout << GridLogDslash<< " Dhop Ext end "<<DhopComputeTime2 <<"us "<<std::endl;
}
@ -463,12 +471,14 @@ void WilsonFermion5D<Impl>::DhopInternalSerialComms(StencilImpl & st, LebesgueOr
Compressor compressor(dag);
int LLs = in.Grid()->_rdimensions[0];
DhopCommTime-=usecond();
// std::cout << GridLogDslash<< " Dhop Halo exchange begine " <<std::endl;
DhopCommTime=-usecond();
st.HaloExchangeOpt(in,compressor);
DhopCommTime+=usecond();
// std::cout << GridLogDslash<< " Dhop Comms end "<<DhopCommTime<<" us"<<std::endl;
DhopComputeTime-=usecond();
DhopComputeTime=-usecond();
int Opt = WilsonKernelsStatic::Opt;
if (dag == DaggerYes) {
Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out);
@ -476,6 +486,7 @@ void WilsonFermion5D<Impl>::DhopInternalSerialComms(StencilImpl & st, LebesgueOr
Kernels::DhopKernel(Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out);
}
DhopComputeTime+=usecond();
// std::cout << GridLogDslash<< " Dhop Compute end "<<DhopComputeTime<<" us" <<std::endl;
}

View File

@ -416,19 +416,6 @@ void WilsonKernels<Impl>::DhopDirKernel( StencilImpl &st, DoubledGaugeField &U,S
#undef LoopBody
}
#define KERNEL_CALL_TMP(A) \
const uint64_t NN = Nsite*Ls; \
auto U_p = & U_v[0]; \
auto in_p = & in_v[0]; \
auto out_p = & out_v[0]; \
auto st_p = st_v._entries_p; \
auto st_perm = st_v._permute_type; \
accelerator_forNB( ss, NN, Simd::Nsimd(), { \
int sF = ss; \
int sU = ss/Ls; \
WilsonKernels<Impl>::A(st_perm,st_p,U_p,buf,sF,sU,in_p,out_p); \
}); \
accelerator_barrier();
#define KERNEL_CALLNB(A) \
const uint64_t NN = Nsite*Ls; \
@ -448,8 +435,7 @@ void WilsonKernels<Impl>::DhopDirKernel( StencilImpl &st, DoubledGaugeField &U,S
int sF = ptr[ss]; \
int sU = ss/Ls; \
WilsonKernels<Impl>::A(st_v,U_v,buf,sF,sU,in_v,out_v); \
}); \
accelerator_barrier();
});
#define ASM_CALL(A) \
thread_for( ss, Nsite, { \
@ -471,7 +457,7 @@ void WilsonKernels<Impl>::DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField
if( interior && exterior ) {
if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSite); return;}
#ifdef SYCL_HACK
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL_TMP(HandDhopSiteSycl); return; }
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSiteSycl); return; }
#else
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSite); return;}
#endif

View File

@ -67,6 +67,36 @@ NAMESPACE_BEGIN(Grid);
virtual std::string action_name(){return "OneFlavourEvenOddRatioRationalPseudoFermionAction";}
};
template<class Impl,class ImplF>
class OneFlavourEvenOddRatioRationalMixedPrecPseudoFermionAction : public GeneralEvenOddRatioRationalMixedPrecPseudoFermionAction<Impl,ImplF> {
public:
typedef OneFlavourRationalParams Params;
private:
static RationalActionParams transcribe(const Params &in){
RationalActionParams out;
out.inv_pow = 2;
out.lo = in.lo;
out.hi = in.hi;
out.MaxIter = in.MaxIter;
out.action_tolerance = out.md_tolerance = in.tolerance;
out.action_degree = out.md_degree = in.degree;
out.precision = in.precision;
out.BoundsCheckFreq = in.BoundsCheckFreq;
return out;
}
public:
OneFlavourEvenOddRatioRationalMixedPrecPseudoFermionAction(FermionOperator<Impl> &_NumOp,
FermionOperator<Impl> &_DenOp,
FermionOperator<ImplF> &_NumOpF,
FermionOperator<ImplF> &_DenOpF,
const Params & p, Integer ReliableUpdateFreq
) :
GeneralEvenOddRatioRationalMixedPrecPseudoFermionAction<Impl,ImplF>(_NumOp, _DenOp,_NumOpF, _DenOpF, transcribe(p),ReliableUpdateFreq){}
virtual std::string action_name(){return "OneFlavourEvenOddRatioRationalPseudoFermionAction";}
};
NAMESPACE_END(Grid);
#endif

View File

@ -153,7 +153,7 @@ protected:
Real force_max = std::sqrt(maxLocalNorm2(force));
Real impulse_max = force_max * ep * HMC_MOMENTUM_DENOMINATOR;
as[level].actions.at(a)->deriv_log(force_abs,force_max);
as[level].actions.at(a)->deriv_log(force_abs,force_max,impulse_abs,impulse_max);
std::cout << GridLogIntegrator<< "["<<level<<"]["<<a<<"] Force average: " << force_abs <<" "<<name<<std::endl;
std::cout << GridLogIntegrator<< "["<<level<<"]["<<a<<"] Force max : " << force_max <<" "<<name<<std::endl;
@ -285,6 +285,8 @@ public:
<<"["<<level<<"]["<< actionID<<"] : "
<<" force max " << as[level].actions.at(actionID)->deriv_max_average()
<<" norm " << as[level].actions.at(actionID)->deriv_norm_average()
<<" Fdt max " << as[level].actions.at(actionID)->Fdt_max_average()
<<" norm " << as[level].actions.at(actionID)->Fdt_norm_average()
<<" calls " << as[level].actions.at(actionID)->deriv_num
<< std::endl;
}

View File

@ -99,7 +99,7 @@ public:
// using wilson flow by default here
WilsonFlow<PeriodicGimplR> WF(Pars.Smearing.steps, Pars.Smearing.step_size, Pars.Smearing.meas_interval);
WF.smear_adaptive(Usmear, U, Pars.Smearing.maxTau);
Real T0 = WF.energyDensityPlaquette(Pars.Smearing.maxTau, Usmear);
Real T0 = WF.energyDensityPlaquette(Usmear);
std::cout << GridLogMessage << std::setprecision(std::numeric_limits<Real>::digits10 + 1)
<< "T0 : [ " << traj << " ] "<< T0 << std::endl;
}

View File

@ -7,7 +7,6 @@ Source file: ./lib/qcd/modules/plaquette.h
Copyright (C) 2017
Author: Guido Cossu <guido.cossu@ed.ac.uk>
Author: Christopher Kelly <ckelly@bnl.gov>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -34,44 +33,28 @@ NAMESPACE_BEGIN(Grid);
template <class Gimpl>
class WilsonFlow: public Smear<Gimpl>{
public:
//Store generic measurements to take during smearing process using std::function
typedef std::function<void(int, RealD, const typename Gimpl::GaugeField &)> FunctionType; //int: step, RealD: flow time, GaugeField : the gauge field
private:
unsigned int Nstep;
RealD epsilon; //for regular smearing this is the time step, for adaptive it is the initial time step
std::vector< std::pair<int, FunctionType> > functions; //The int maps to the measurement frequency
unsigned int measure_interval;
mutable RealD epsilon, taus;
mutable WilsonGaugeAction<Gimpl> SG;
//Evolve the gauge field by 1 step and update tau
void evolve_step(typename Gimpl::GaugeField &U, RealD &tau) const;
//Evolve the gauge field by 1 step and update tau and the current time step eps
void evolve_step_adaptive(typename Gimpl::GaugeField&U, RealD &tau, RealD &eps, RealD maxTau) const;
void evolve_step(typename Gimpl::GaugeField&) const;
void evolve_step_adaptive(typename Gimpl::GaugeField&, RealD);
RealD tau(unsigned int t)const {return epsilon*(t+1.0); }
public:
INHERIT_GIMPL_TYPES(Gimpl)
void resetActions(){ functions.clear(); }
void addMeasurement(int meas_interval, FunctionType meas){ functions.push_back({meas_interval, meas}); }
//Set the class to perform the default measurements:
//the plaquette energy density every step
//the plaquette topological charge every 'topq_meas_interval' steps
//and output to stdout
void setDefaultMeasurements(int topq_meas_interval = 1);
explicit WilsonFlow(unsigned int Nstep, RealD epsilon, unsigned int interval = 1):
Nstep(Nstep),
epsilon(epsilon),
measure_interval(interval),
SG(WilsonGaugeAction<Gimpl>(3.0)) {
// WilsonGaugeAction with beta 3.0
assert(epsilon > 0.0);
LogMessage();
setDefaultMeasurements(interval);
}
void LogMessage() {
@ -90,29 +73,9 @@ public:
// undefined for WilsonFlow
}
void smear_adaptive(GaugeField&, const GaugeField&, RealD maxTau) const;
//Compute t^2 <E(t)> for time t from the plaquette
static RealD energyDensityPlaquette(const RealD t, const GaugeField& U);
//Compute t^2 <E(t)> for time t from the 1x1 cloverleaf form
//t is the Wilson flow time
static RealD energyDensityCloverleaf(const RealD t, const GaugeField& U);
//Evolve the gauge field by Nstep steps of epsilon and return the energy density computed every interval steps
//The smeared field is output as V
std::vector<RealD> flowMeasureEnergyDensityPlaquette(GaugeField &V, const GaugeField& U, int measure_interval = 1);
//Version that does not return the smeared field
std::vector<RealD> flowMeasureEnergyDensityPlaquette(const GaugeField& U, int measure_interval = 1);
//Evolve the gauge field by Nstep steps of epsilon and return the Cloverleaf energy density computed every interval steps
//The smeared field is output as V
std::vector<RealD> flowMeasureEnergyDensityCloverleaf(GaugeField &V, const GaugeField& U, int measure_interval = 1);
//Version that does not return the smeared field
std::vector<RealD> flowMeasureEnergyDensityCloverleaf(const GaugeField& U, int measure_interval = 1);
void smear_adaptive(GaugeField&, const GaugeField&, RealD maxTau);
RealD energyDensityPlaquette(unsigned int step, const GaugeField& U) const;
RealD energyDensityPlaquette(const GaugeField& U) const;
};
@ -120,7 +83,7 @@ public:
// Implementations
////////////////////////////////////////////////////////////////////////////////
template <class Gimpl>
void WilsonFlow<Gimpl>::evolve_step(typename Gimpl::GaugeField &U, RealD &tau) const{
void WilsonFlow<Gimpl>::evolve_step(typename Gimpl::GaugeField &U) const{
GaugeField Z(U.Grid());
GaugeField tmp(U.Grid());
SG.deriv(U, Z);
@ -136,13 +99,12 @@ void WilsonFlow<Gimpl>::evolve_step(typename Gimpl::GaugeField &U, RealD &tau) c
SG.deriv(U, tmp); Z += tmp; // 4/3*(17/36*Z0 -8/9*Z1) +Z2
Z *= 3.0/4.0; // Z = 17/36*Z0 -8/9*Z1 +3/4*Z2
Gimpl::update_field(Z, U, -2.0*epsilon); // V(t+e) = exp(ep*Z)*W2
tau += epsilon;
}
template <class Gimpl>
void WilsonFlow<Gimpl>::evolve_step_adaptive(typename Gimpl::GaugeField &U, RealD &tau, RealD &eps, RealD maxTau) const{
if (maxTau - tau < eps){
eps = maxTau-tau;
void WilsonFlow<Gimpl>::evolve_step_adaptive(typename Gimpl::GaugeField &U, RealD maxTau) {
if (maxTau - taus < epsilon){
epsilon = maxTau-taus;
}
//std::cout << GridLogMessage << "Integration epsilon : " << epsilon << std::endl;
GaugeField Z(U.Grid());
@ -152,151 +114,95 @@ void WilsonFlow<Gimpl>::evolve_step_adaptive(typename Gimpl::GaugeField &U, Real
SG.deriv(U, Z);
Zprime = -Z;
Z *= 0.25; // Z0 = 1/4 * F(U)
Gimpl::update_field(Z, U, -2.0*eps); // U = W1 = exp(ep*Z0)*W0
Gimpl::update_field(Z, U, -2.0*epsilon); // U = W1 = exp(ep*Z0)*W0
Z *= -17.0/8.0;
SG.deriv(U, tmp); Z += tmp; // -17/32*Z0 +Z1
Zprime += 2.0*tmp;
Z *= 8.0/9.0; // Z = -17/36*Z0 +8/9*Z1
Gimpl::update_field(Z, U, -2.0*eps); // U_= W2 = exp(ep*Z)*W1
Gimpl::update_field(Z, U, -2.0*epsilon); // U_= W2 = exp(ep*Z)*W1
Z *= -4.0/3.0;
SG.deriv(U, tmp); Z += tmp; // 4/3*(17/36*Z0 -8/9*Z1) +Z2
Z *= 3.0/4.0; // Z = 17/36*Z0 -8/9*Z1 +3/4*Z2
Gimpl::update_field(Z, U, -2.0*eps); // V(t+e) = exp(ep*Z)*W2
Gimpl::update_field(Z, U, -2.0*epsilon); // V(t+e) = exp(ep*Z)*W2
// Ramos
Gimpl::update_field(Zprime, Uprime, -2.0*eps); // V'(t+e) = exp(ep*Z')*W0
Gimpl::update_field(Zprime, Uprime, -2.0*epsilon); // V'(t+e) = exp(ep*Z')*W0
// Compute distance as norm^2 of the difference
GaugeField diffU = U - Uprime;
RealD diff = norm2(diffU);
// adjust integration step
tau += eps;
taus += epsilon;
//std::cout << GridLogMessage << "Adjusting integration step with distance: " << diff << std::endl;
eps = eps*0.95*std::pow(1e-4/diff,1./3.);
epsilon = epsilon*0.95*std::pow(1e-4/diff,1./3.);
//std::cout << GridLogMessage << "New epsilon : " << epsilon << std::endl;
}
template <class Gimpl>
RealD WilsonFlow<Gimpl>::energyDensityPlaquette(const RealD t, const GaugeField& U){
static WilsonGaugeAction<Gimpl> SG(3.0);
return 2.0 * t * t * SG.S(U)/U.Grid()->gSites();
}
//Compute t^2 <E(t)> for time from the 1x1 cloverleaf form
template <class Gimpl>
RealD WilsonFlow<Gimpl>::energyDensityCloverleaf(const RealD t, const GaugeField& U){
typedef typename Gimpl::GaugeLinkField GaugeMat;
typedef typename Gimpl::GaugeField GaugeLorentz;
assert(Nd == 4);
//E = 1/2 tr( F_munu F_munu )
//However as F_numu = -F_munu, only need to sum the trace of the squares of the following 6 field strengths:
//F_01 F_02 F_03 F_12 F_13 F_23
GaugeMat F(U.Grid());
LatticeComplexD R(U.Grid());
R = Zero();
for(int mu=0;mu<3;mu++){
for(int nu=mu+1;nu<4;nu++){
WilsonLoops<Gimpl>::FieldStrength(F, U, mu, nu);
R = R + trace(F*F);
}
}
ComplexD out = sum(R);
out = t*t*out / RealD(U.Grid()->gSites());
return -real(out); //minus sign necessary for +ve energy
}
template <class Gimpl>
std::vector<RealD> WilsonFlow<Gimpl>::flowMeasureEnergyDensityPlaquette(GaugeField &V, const GaugeField& U, int measure_interval){
std::vector<RealD> out;
resetActions();
addMeasurement(measure_interval, [&out](int step, RealD t, const typename Gimpl::GaugeField &U){
std::cout << GridLogMessage << "[WilsonFlow] Computing plaquette energy density for step " << step << std::endl;
out.push_back( energyDensityPlaquette(t,U) );
});
smear(V,U);
return out;
RealD WilsonFlow<Gimpl>::energyDensityPlaquette(unsigned int step, const GaugeField& U) const {
RealD td = tau(step);
return 2.0 * td * td * SG.S(U)/U.Grid()->gSites();
}
template <class Gimpl>
std::vector<RealD> WilsonFlow<Gimpl>::flowMeasureEnergyDensityPlaquette(const GaugeField& U, int measure_interval){
GaugeField V(U);
return flowMeasureEnergyDensityPlaquette(V,U, measure_interval);
RealD WilsonFlow<Gimpl>::energyDensityPlaquette(const GaugeField& U) const {
return 2.0 * taus * taus * SG.S(U)/U.Grid()->gSites();
}
template <class Gimpl>
std::vector<RealD> WilsonFlow<Gimpl>::flowMeasureEnergyDensityCloverleaf(GaugeField &V, const GaugeField& U, int measure_interval){
std::vector<RealD> out;
resetActions();
addMeasurement(measure_interval, [&out](int step, RealD t, const typename Gimpl::GaugeField &U){
std::cout << GridLogMessage << "[WilsonFlow] Computing Cloverleaf energy density for step " << step << std::endl;
out.push_back( energyDensityCloverleaf(t,U) );
});
smear(V,U);
return out;
}
template <class Gimpl>
std::vector<RealD> WilsonFlow<Gimpl>::flowMeasureEnergyDensityCloverleaf(const GaugeField& U, int measure_interval){
GaugeField V(U);
return flowMeasureEnergyDensityCloverleaf(V,U, measure_interval);
}
//#define WF_TIMING
template <class Gimpl>
void WilsonFlow<Gimpl>::smear(GaugeField& out, const GaugeField& in) const{
void WilsonFlow<Gimpl>::smear(GaugeField& out, const GaugeField& in) const {
out = in;
RealD taus = 0.;
for (unsigned int step = 1; step <= Nstep; step++) { //step indicates the number of smearing steps applied at the time of measurement
for (unsigned int step = 1; step <= Nstep; step++) {
auto start = std::chrono::high_resolution_clock::now();
evolve_step(out, taus);
evolve_step(out);
auto end = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> diff = end - start;
#ifdef WF_TIMING
std::cout << "Time to evolve " << diff.count() << " s\n";
#endif
//Perform measurements
for(auto const &meas : functions)
if( step % meas.first == 0 ) meas.second(step,taus,out);
std::cout << GridLogMessage << "[WilsonFlow] Energy density (plaq) : "
<< step << " " << tau(step) << " "
<< energyDensityPlaquette(step,out) << std::endl;
if( step % measure_interval == 0){
std::cout << GridLogMessage << "[WilsonFlow] Top. charge : "
<< step << " "
<< WilsonLoops<PeriodicGimplR>::TopologicalCharge(out) << std::endl;
}
}
}
template <class Gimpl>
void WilsonFlow<Gimpl>::smear_adaptive(GaugeField& out, const GaugeField& in, RealD maxTau) const{
void WilsonFlow<Gimpl>::smear_adaptive(GaugeField& out, const GaugeField& in, RealD maxTau){
out = in;
RealD taus = 0.;
RealD eps = epsilon;
taus = epsilon;
unsigned int step = 0;
do{
step++;
//std::cout << GridLogMessage << "Evolution time :"<< taus << std::endl;
evolve_step_adaptive(out, taus, eps, maxTau);
//Perform measurements
for(auto const &meas : functions)
if( step % meas.first == 0 ) meas.second(step,taus,out);
evolve_step_adaptive(out, maxTau);
std::cout << GridLogMessage << "[WilsonFlow] Energy density (plaq) : "
<< step << " " << taus << " "
<< energyDensityPlaquette(out) << std::endl;
if( step % measure_interval == 0){
std::cout << GridLogMessage << "[WilsonFlow] Top. charge : "
<< step << " "
<< WilsonLoops<PeriodicGimplR>::TopologicalCharge(out) << std::endl;
}
} while (taus < maxTau);
}
template <class Gimpl>
void WilsonFlow<Gimpl>::setDefaultMeasurements(int topq_meas_interval){
addMeasurement(1, [](int step, RealD t, const typename Gimpl::GaugeField &U){
std::cout << GridLogMessage << "[WilsonFlow] Energy density (plaq) : " << step << " " << t << " " << energyDensityPlaquette(t,U) << std::endl;
});
addMeasurement(topq_meas_interval, [](int step, RealD t, const typename Gimpl::GaugeField &U){
std::cout << GridLogMessage << "[WilsonFlow] Top. charge : " << step << " " << WilsonLoops<Gimpl>::TopologicalCharge(U) << std::endl;
});
}
}
NAMESPACE_END(Grid);

View File

@ -290,6 +290,8 @@ public:
std::vector<Decompress> DecompressionsSHM;
std::vector<CopyReceiveBuffer> CopyReceiveBuffers ;
std::vector<CachedTransfer> CachedTransfers;
std::vector<CommsRequest_t> MpiReqs;
///////////////////////////////////////////////////////////
// Unified Comms buffers for all directions
///////////////////////////////////////////////////////////
@ -357,9 +359,9 @@ public:
////////////////////////////////////////////////////////////////////////
void CommunicateBegin(std::vector<std::vector<CommsRequest_t> > &reqs)
{
reqs.resize(Packets.size());
accelerator_barrier();
for(int i=0;i<Packets.size();i++){
_grid->StencilSendToRecvFromBegin(reqs[i],
_grid->StencilSendToRecvFromBegin(MpiReqs,
Packets[i].send_buf,
Packets[i].to_rank,Packets[i].do_send,
Packets[i].recv_buf,
@ -370,41 +372,19 @@ public:
void CommunicateComplete(std::vector<std::vector<CommsRequest_t> > &reqs)
{
for(int i=0;i<Packets.size();i++){
_grid->StencilSendToRecvFromComplete(reqs[i],i);
}
_grid->StencilSendToRecvFromComplete(MpiReqs,0);
}
////////////////////////////////////////////////////////////////////////
// Blocking send and receive. Either sequential or parallel.
////////////////////////////////////////////////////////////////////////
void Communicate(void)
{
if ( CartesianCommunicator::CommunicatorPolicy == CartesianCommunicator::CommunicatorPolicySequential ){
/////////////////////////////////////////////////////////
// several way threaded on different communicators.
// Cannot combine with Dirichlet operators
// This scheme is needed on Intel Omnipath for best performance
// Deprecate once there are very few omnipath clusters
/////////////////////////////////////////////////////////
int nthreads = CartesianCommunicator::nCommThreads;
int old = GridThread::GetThreads();
GridThread::SetThreads(nthreads);
thread_for(i,Packets.size(),{
_grid->StencilSendToRecvFrom(Packets[i].send_buf,
Packets[i].to_rank,Packets[i].do_send,
Packets[i].recv_buf,
Packets[i].from_rank,Packets[i].do_recv,
Packets[i].bytes,i);
});
GridThread::SetThreads(old);
} else {
/////////////////////////////////////////////////////////
// Concurrent and non-threaded asynch calls to MPI
/////////////////////////////////////////////////////////
std::vector<std::vector<CommsRequest_t> > reqs;
this->CommunicateBegin(reqs);
this->CommunicateComplete(reqs);
}
/////////////////////////////////////////////////////////
// Concurrent and non-threaded asynch calls to MPI
/////////////////////////////////////////////////////////
std::vector<std::vector<CommsRequest_t> > reqs;
this->CommunicateBegin(reqs);
this->CommunicateComplete(reqs);
}
template<class compressor> void HaloExchange(const Lattice<vobj> &source,compressor &compress)
@ -484,7 +464,6 @@ public:
face_table_computed=1;
assert(u_comm_offset==_unified_buffer_size);
accelerator_barrier();
}
/////////////////////////
@ -499,6 +478,7 @@ public:
Packets.resize(0);
CopyReceiveBuffers.resize(0);
CachedTransfers.resize(0);
MpiReqs.resize(0);
}
void AddCopy(void *from,void * to, Integer bytes)
{
@ -711,7 +691,9 @@ public:
this->_comms_recv.resize(npoints);
this->same_node.resize(npoints);
if ( p.dirichlet.size() ) DirichletBlock(p.dirichlet); // comms send/recv set up
if ( p.dirichlet.size() ==0 ) p.dirichlet.resize(grid->Nd(),0);
DirichletBlock(p.dirichlet); // comms send/recv set up
_unified_buffer_size=0;
surface_list.resize(0);
@ -793,7 +775,6 @@ public:
u_simd_recv_buf[l] = (cobj *)_grid->ShmBufferMalloc(_unified_buffer_size*sizeof(cobj));
u_simd_send_buf[l] = (cobj *)_grid->ShmBufferMalloc(_unified_buffer_size*sizeof(cobj));
}
PrecomputeByteOffsets();
}
@ -1105,7 +1086,6 @@ public:
// Gather locally
////////////////////////////////////////////////////////
assert(send_buf!=NULL);
Gather_plane_simple_table(face_table[face_idx],rhs,send_buf,compress,comm_off,so);
}
@ -1212,8 +1192,9 @@ public:
face_table[face_idx].size()*sizeof(face_table_host[0]));
}
if ( comms_send || comms_recv )
if ( comms_send || comms_recv ) {
Gather_plane_exchange_table(face_table[face_idx],rhs,spointers,dimension,sx,cbmask,compress,permute_type);
}
face_idx++;
//spointers[0] -- low

View File

@ -208,46 +208,5 @@ void merge(vobj &vec,const ExtractPointerArray<sobj> &extracted, int offset)
}
//////////////////////////////////////////////////////////////////////////////////
//Copy a single lane of a SIMD tensor type from one object to another
//Output object must be of the same tensor type but may be of a different precision (i.e. it can have a different root data type)
///////////////////////////////////////////////////////////////////////////////////
template<class vobjOut, class vobjIn>
accelerator_inline
void copyLane(vobjOut & __restrict__ vecOut, int lane_out, const vobjIn & __restrict__ vecIn, int lane_in)
{
static_assert( std::is_same<typename vobjOut::DoublePrecision, typename vobjIn::DoublePrecision>::value == 1, "copyLane: tensor types must be the same" ); //if tensor types are same the DoublePrecision type must be the same
typedef typename vobjOut::vector_type ovector_type;
typedef typename vobjIn::vector_type ivector_type;
constexpr int owords=sizeof(vobjOut)/sizeof(ovector_type);
constexpr int iwords=sizeof(vobjIn)/sizeof(ivector_type);
static_assert( owords == iwords, "copyLane: Expected number of vector words in input and output objects to be equal" );
typedef typename vobjOut::scalar_type oscalar_type;
typedef typename vobjIn::scalar_type iscalar_type;
typedef typename ExtractTypeMap<oscalar_type>::extract_type oextract_type;
typedef typename ExtractTypeMap<iscalar_type>::extract_type iextract_type;
typedef oextract_type * opointer;
typedef iextract_type * ipointer;
constexpr int oNsimd=ovector_type::Nsimd();
constexpr int iNsimd=ivector_type::Nsimd();
iscalar_type itmp;
oscalar_type otmp;
opointer __restrict__ op = (opointer)&vecOut;
ipointer __restrict__ ip = (ipointer)&vecIn;
for(int w=0;w<owords;w++){
memcpy( (char*)&itmp, (char*)(ip + lane_in + iNsimd*w), sizeof(iscalar_type) );
otmp = itmp; //potential precision change
memcpy( (char*)(op + lane_out + oNsimd*w), (char*)&otmp, sizeof(oscalar_type) );
}
}
NAMESPACE_END(Grid);

View File

@ -1,6 +1,7 @@
#include <Grid/GridCore.h>
NAMESPACE_BEGIN(Grid);
int world_rank; // Use to control world rank for print guarding
int acceleratorAbortOnGpuError=1;
uint32_t accelerator_threads=2;
uint32_t acceleratorThreads(void) {return accelerator_threads;};
@ -16,7 +17,7 @@ void acceleratorThreads(uint32_t t) {accelerator_threads = t;};
#ifdef GRID_CUDA
cudaDeviceProp *gpu_props;
cudaStream_t copyStream;
cudaStream_t cpuStream;
cudaStream_t computeStream;
void acceleratorInit(void)
{
int nDevices = 1;
@ -24,7 +25,8 @@ void acceleratorInit(void)
gpu_props = new cudaDeviceProp[nDevices];
char * localRankStr = NULL;
int rank = 0, world_rank=0;
int rank = 0;
world_rank=0;
if ((localRankStr = getenv(ENV_RANK_OMPI )) != NULL) { world_rank = atoi(localRankStr);}
if ((localRankStr = getenv(ENV_RANK_MVAPICH)) != NULL) { world_rank = atoi(localRankStr);}
if ((localRankStr = getenv(ENV_RANK_SLURM )) != NULL) { world_rank = atoi(localRankStr);}
@ -99,7 +101,7 @@ void acceleratorInit(void)
cudaSetDevice(device);
cudaStreamCreate(&copyStream);
cudaStreamCreate(&cpuStream);
cudaStreamCreate(&computeStream);
const int len=64;
char busid[len];
if( rank == world_rank ) {
@ -114,7 +116,7 @@ void acceleratorInit(void)
#ifdef GRID_HIP
hipDeviceProp_t *gpu_props;
hipStream_t copyStream;
hipStream_t cpuStream;
hipStream_t computeStream;
void acceleratorInit(void)
{
int nDevices = 1;
@ -122,7 +124,8 @@ void acceleratorInit(void)
gpu_props = new hipDeviceProp_t[nDevices];
char * localRankStr = NULL;
int rank = 0, world_rank=0;
int rank = 0;
world_rank=0;
// We extract the local rank initialization using an environment variable
if ((localRankStr = getenv(ENV_LOCAL_RANK_OMPI)) != NULL)
{
@ -183,7 +186,7 @@ void acceleratorInit(void)
#endif
hipSetDevice(device);
hipStreamCreate(&copyStream);
hipStreamCreate(&cpuStream);
hipStreamCreate(&computeStream);
const int len=64;
char busid[len];
if( rank == world_rank ) {
@ -210,7 +213,8 @@ void acceleratorInit(void)
#endif
char * localRankStr = NULL;
int rank = 0, world_rank=0;
int rank = 0;
world_rank=0;
// We extract the local rank initialization using an environment variable
if ((localRankStr = getenv(ENV_LOCAL_RANK_OMPI)) != NULL)

View File

@ -107,7 +107,7 @@ void acceleratorInit(void);
extern int acceleratorAbortOnGpuError;
extern cudaStream_t copyStream;
extern cudaStream_t cpuStream;
extern cudaStream_t computeStream;
accelerator_inline int acceleratorSIMTlane(int Nsimd) {
#ifdef GRID_SIMT
@ -135,7 +135,7 @@ inline void cuda_mem(void)
}; \
dim3 cu_threads(nsimd,acceleratorThreads(),1); \
dim3 cu_blocks ((num1+nt-1)/nt,num2,1); \
LambdaApply<<<cu_blocks,cu_threads,0,cpuStream>>>(num1,num2,nsimd,lambda); \
LambdaApply<<<cu_blocks,cu_threads,0,computeStream>>>(num1,num2,nsimd,lambda); \
}
#define accelerator_for6dNB(iter1, num1, \
@ -154,7 +154,7 @@ inline void cuda_mem(void)
}; \
dim3 cu_blocks (num1,num2,num3); \
dim3 cu_threads(num4,num5,num6); \
Lambda6Apply<<<cu_blocks,cu_threads,0,cpuStream>>>(num1,num2,num3,num4,num5,num6,lambda); \
Lambda6Apply<<<cu_blocks,cu_threads,0,computeStream>>>(num1,num2,num3,num4,num5,num6,lambda); \
}
template<typename lambda> __global__
@ -190,7 +190,7 @@ void Lambda6Apply(uint64_t num1, uint64_t num2, uint64_t num3,
#define accelerator_barrier(dummy) \
{ \
cudaStreamSynchronize(cpuStream); \
cudaStreamSynchronize(computeStream); \
cudaError err = cudaGetLastError(); \
if ( cudaSuccess != err ) { \
printf("accelerator_barrier(): Cuda error %s \n", \
@ -340,7 +340,7 @@ NAMESPACE_BEGIN(Grid);
#define accelerator_inline __host__ __device__ inline
extern hipStream_t copyStream;
extern hipStream_t cpuStream;
extern hipStream_t computeStream;
/*These routines define mapping from thread grid to loop & vector lane indexing */
accelerator_inline int acceleratorSIMTlane(int Nsimd) {
#ifdef GRID_SIMT
@ -362,16 +362,15 @@ accelerator_inline int acceleratorSIMTlane(int Nsimd) {
dim3 hip_blocks ((num1+nt-1)/nt,num2,1); \
if(hip_threads.x * hip_threads.y * hip_threads.z <= 64){ \
hipLaunchKernelGGL(LambdaApply64,hip_blocks,hip_threads, \
0,cpuStream, \
0,computeStream, \
num1,num2,nsimd, lambda); \
} else { \
hipLaunchKernelGGL(LambdaApply,hip_blocks,hip_threads, \
0,cpuStream, \
0,computeStream, \
num1,num2,nsimd, lambda); \
} \
}
template<typename lambda> __global__
__launch_bounds__(64,1)
void LambdaApply64(uint64_t numx, uint64_t numy, uint64_t numz, lambda Lambda)
@ -400,7 +399,7 @@ void LambdaApply(uint64_t numx, uint64_t numy, uint64_t numz, lambda Lambda)
#define accelerator_barrier(dummy) \
{ \
hipStreamSynchronize(cpuStream); \
hipStreamSynchronize(computeStream); \
auto err = hipGetLastError(); \
if ( err != hipSuccess ) { \
printf("After hipDeviceSynchronize() : HIP error %s \n", hipGetErrorString( err )); \
@ -443,7 +442,7 @@ inline void acceleratorMemSet(void *base,int value,size_t bytes) { hipMemset(bas
inline void acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes) // Asynch
{
hipMemcpy(to,from,bytes, hipMemcpyDeviceToDevice);
hipMemcpyDtoDAsync(to,from,bytes, copyStream);
}
inline void acceleratorCopySynchronise(void) { hipStreamSynchronize(copyStream); };

View File

@ -356,6 +356,11 @@ void Grid_init(int *argc,char ***argv)
//////////////////////////////////////////////////////////
CartesianCommunicator::Init(argc,argv);
GridLogger::GlobalStopWatch.Stop();
CartesianCommunicator::BarrierWorld();
GridLogger::GlobalStopWatch.Reset();// Back to zero with synchronised clock
GridLogger::GlobalStopWatch.Start();
////////////////////////////////////
// Banner after MPI (unless GPU)
////////////////////////////////////

View File

@ -1,918 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./HMC/Mobius2p1fIDSDRGparityEOFA.cc
Copyright (C) 2015-2016
Author: Christopher Kelly <ckelly@bnl.gov>
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
using namespace Grid;
//Production binary for the 40ID G-parity ensemble
struct RatQuoParameters: Serializable {
GRID_SERIALIZABLE_CLASS_MEMBERS(RatQuoParameters,
double, bnd_lo,
double, bnd_hi,
Integer, action_degree,
double, action_tolerance,
Integer, md_degree,
double, md_tolerance,
Integer, reliable_update_freq,
Integer, bnd_check_freq);
RatQuoParameters() {
bnd_lo = 1e-2;
bnd_hi = 30;
action_degree = 10;
action_tolerance = 1e-10;
md_degree = 10;
md_tolerance = 1e-8;
bnd_check_freq = 20;
reliable_update_freq = 50;
}
void Export(RationalActionParams &into) const{
into.lo = bnd_lo;
into.hi = bnd_hi;
into.action_degree = action_degree;
into.action_tolerance = action_tolerance;
into.md_degree = md_degree;
into.md_tolerance = md_tolerance;
into.BoundsCheckFreq = bnd_check_freq;
}
};
struct EOFAparameters: Serializable {
GRID_SERIALIZABLE_CLASS_MEMBERS(EOFAparameters,
OneFlavourRationalParams, rat_params,
double, action_tolerance,
double, action_mixcg_inner_tolerance,
double, md_tolerance,
double, md_mixcg_inner_tolerance);
EOFAparameters() {
action_mixcg_inner_tolerance = 1e-8;
action_tolerance = 1e-10;
md_tolerance = 1e-8;
md_mixcg_inner_tolerance = 1e-8;
rat_params.lo = 1.0;
rat_params.hi = 25.0;
rat_params.MaxIter = 50000;
rat_params.tolerance= 1.0e-9;
rat_params.degree = 14;
rat_params.precision= 50;
}
};
struct EvolParameters: Serializable {
GRID_SERIALIZABLE_CLASS_MEMBERS(EvolParameters,
Integer, StartTrajectory,
Integer, Trajectories,
Integer, SaveInterval,
Integer, Steps,
RealD, TrajectoryLength,
bool, MetropolisTest,
std::string, StartingType,
std::vector<Integer>, GparityDirs,
std::vector<EOFAparameters>, eofa_l,
RatQuoParameters, rat_quo_s,
RatQuoParameters, rat_quo_DSDR);
EvolParameters() {
//For initial thermalization; afterwards user should switch Metropolis on and use StartingType=CheckpointStart
MetropolisTest = false;
StartTrajectory = 0;
Trajectories = 50;
SaveInterval = 5;
StartingType = "ColdStart";
GparityDirs.resize(3, 1); //1 for G-parity, 0 for periodic
Steps = 5;
TrajectoryLength = 1.0;
}
};
bool fileExists(const std::string &fn){
std::ifstream f(fn);
return f.good();
}
struct LanczosParameters: Serializable {
GRID_SERIALIZABLE_CLASS_MEMBERS(LanczosParameters,
double, alpha,
double, beta,
double, mu,
int, ord,
int, n_stop,
int, n_want,
int, n_use,
double, tolerance);
LanczosParameters() {
alpha = 35;
beta = 5;
mu = 0;
ord = 100;
n_stop = 10;
n_want = 10;
n_use = 15;
tolerance = 1e-6;
}
};
template<typename FermionActionD, typename FermionFieldD>
void computeEigenvalues(std::string param_file,
GridCartesian* Grid, GridRedBlackCartesian* rbGrid, const LatticeGaugeFieldD &latt, //expect lattice to have been initialized to something
FermionActionD &action, GridParallelRNG &rng){
LanczosParameters params;
if(fileExists(param_file)){
std::cout << GridLogMessage << " Reading " << param_file << std::endl;
Grid::XmlReader rd(param_file);
read(rd, "LanczosParameters", params);
}else if(!GlobalSharedMemory::WorldRank){
std::cout << GridLogMessage << " File " << param_file << " does not exist" << std::endl;
std::cout << GridLogMessage << " Writing xml template to " << param_file << ".templ" << std::endl;
Grid::XmlWriter wr(param_file + ".templ");
write(wr, "LanczosParameters", params);
}
FermionFieldD gauss_o(rbGrid);
FermionFieldD gauss(Grid);
gaussian(rng, gauss);
pickCheckerboard(Odd, gauss_o, gauss);
action.ImportGauge(latt);
SchurDiagMooeeOperator<FermionActionD, FermionFieldD> hermop(action);
PlainHermOp<FermionFieldD> hermop_wrap(hermop);
//ChebyshevLanczos<FermionFieldD> Cheb(params.alpha, params.beta, params.mu, params.ord);
assert(params.mu == 0.0);
Chebyshev<FermionFieldD> Cheb(params.beta*params.beta, params.alpha*params.alpha, params.ord+1);
FunctionHermOp<FermionFieldD> Cheb_wrap(Cheb, hermop);
std::cout << "IRL: alpha=" << params.alpha << " beta=" << params.beta << " mu=" << params.mu << " ord=" << params.ord << std::endl;
ImplicitlyRestartedLanczos<FermionFieldD> IRL(Cheb_wrap, hermop_wrap, params.n_stop, params.n_want, params.n_use, params.tolerance, 50000);
std::vector<RealD> eval(params.n_use);
std::vector<FermionFieldD> evec(params.n_use, rbGrid);
int Nconv;
IRL.calc(eval, evec, gauss_o, Nconv);
std::cout << "Eigenvalues:" << std::endl;
for(int i=0;i<params.n_want;i++){
std::cout << i << " " << eval[i] << std::endl;
}
}
//Check the quality of the RHMC approx
//action_or_md toggles checking the action (0), MD (1) or both (2) setups
template<typename FermionActionD, typename FermionFieldD, typename RHMCtype>
void checkRHMC(GridCartesian* Grid, GridRedBlackCartesian* rbGrid, const LatticeGaugeFieldD &latt, //expect lattice to have been initialized to something
FermionActionD &numOp, FermionActionD &denOp, RHMCtype &rhmc, GridParallelRNG &rng,
int inv_pow, const std::string &quark_descr, int action_or_md){
assert(action_or_md == 0 || action_or_md == 1 || action_or_md == 2);
FermionFieldD gauss_o(rbGrid);
FermionFieldD gauss(Grid);
gaussian(rng, gauss);
pickCheckerboard(Odd, gauss_o, gauss);
numOp.ImportGauge(latt);
denOp.ImportGauge(latt);
typedef typename FermionActionD::Impl_t FermionImplPolicyD;
SchurDifferentiableOperator<FermionImplPolicyD> MdagM(numOp);
SchurDifferentiableOperator<FermionImplPolicyD> VdagV(denOp);
PowerMethod<FermionFieldD> power_method;
RealD lambda_max;
std::cout << "Starting: Get RHMC high bound approx for " << quark_descr << " numerator" << std::endl;
lambda_max = power_method(MdagM,gauss_o);
std::cout << GridLogMessage << "Got lambda_max "<<lambda_max<<std::endl;
std::cout << "Starting: Get RHMC high bound approx for " << quark_descr << " denominator" << std::endl;
lambda_max = power_method(VdagV,gauss_o);
std::cout << GridLogMessage << "Got lambda_max "<<lambda_max<<std::endl;
if(action_or_md == 0 || action_or_md == 2){
std::cout << "Starting: Checking quality of RHMC action approx for " << quark_descr << " quark numerator and power -1/" << inv_pow << std::endl;
InversePowerBoundsCheck(inv_pow, 50000, 1e16, MdagM,gauss_o, rhmc.ApproxNegPowerAction); //use large tolerance to prevent exit on fail; we are trying to tune here!
std::cout << "Finished: Checking quality of RHMC action approx for " << quark_descr << " quark numerator and power -1/" << inv_pow << std::endl;
std::cout << "Starting: Checking quality of RHMC action approx for " << quark_descr << " quark numerator and power -1/" << 2*inv_pow << std::endl;
InversePowerBoundsCheck(2*inv_pow, 50000, 1e16, MdagM,gauss_o, rhmc.ApproxNegHalfPowerAction);
std::cout << "Finished: Checking quality of RHMC action approx for " << quark_descr << " quark numerator and power -1/" << 2*inv_pow << std::endl;
std::cout << "Starting: Checking quality of RHMC action approx for " << quark_descr << " quark denominator and power -1/" << inv_pow << std::endl;
InversePowerBoundsCheck(inv_pow, 50000, 1e16, VdagV,gauss_o, rhmc.ApproxNegPowerAction);
std::cout << "Finished: Checking quality of RHMC action approx for " << quark_descr << " quark denominator and power -1/" << inv_pow << std::endl;
std::cout << "Starting: Checking quality of RHMC action approx for " << quark_descr << " quark denominator and power -1/" << 2*inv_pow << std::endl;
InversePowerBoundsCheck(2*inv_pow, 50000, 1e16, VdagV,gauss_o, rhmc.ApproxNegHalfPowerAction);
std::cout << "Finished: Checking quality of RHMC action approx for " << quark_descr << " quark denominator and power -1/" << 2*inv_pow << std::endl;
}
std::cout << "-------------------------------------------------------------------------------" << std::endl;
if(action_or_md == 1 || action_or_md == 2){
std::cout << "Starting: Checking quality of RHMC MD approx for " << quark_descr << " quark numerator and power -1/" << inv_pow << std::endl;
InversePowerBoundsCheck(inv_pow, 50000, 1e16, MdagM,gauss_o, rhmc.ApproxNegPowerMD);
std::cout << "Finished: Checking quality of RHMC MD approx for " << quark_descr << " quark numerator and power -1/" << inv_pow << std::endl;
std::cout << "Starting: Checking quality of RHMC MD approx for " << quark_descr << " quark numerator and power -1/" << 2*inv_pow << std::endl;
InversePowerBoundsCheck(2*inv_pow, 50000, 1e16, MdagM,gauss_o, rhmc.ApproxNegHalfPowerMD);
std::cout << "Finished: Checking quality of RHMC MD approx for " << quark_descr << " quark numerator and power -1/" << 2*inv_pow << std::endl;
std::cout << "Starting: Checking quality of RHMC MD approx for " << quark_descr << " quark denominator and power -1/" << inv_pow << std::endl;
InversePowerBoundsCheck(inv_pow, 50000, 1e16, VdagV,gauss_o, rhmc.ApproxNegPowerMD);
std::cout << "Finished: Checking quality of RHMC MD approx for " << quark_descr << " quark denominator and power -1/" << inv_pow << std::endl;
std::cout << "Starting: Checking quality of RHMC MD approx for " << quark_descr << " quark denominator and power -1/" << 2*inv_pow << std::endl;
InversePowerBoundsCheck(2*inv_pow, 50000, 1e16, VdagV,gauss_o, rhmc.ApproxNegHalfPowerMD);
std::cout << "Finished: Checking quality of RHMC MD approx for " << quark_descr << " quark denominator and power -1/" << 2*inv_pow << std::endl;
}
}
template<typename FermionImplPolicy>
void checkEOFA(ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> &EOFA,
GridCartesian* FGrid, GridParallelRNG &rng, const LatticeGaugeFieldD &latt){
std::cout << GridLogMessage << "Starting EOFA action/bounds check" << std::endl;
typename FermionImplPolicy::FermionField eta(FGrid);
RealD scale = std::sqrt(0.5);
gaussian(rng,eta); eta = eta * scale;
//Use the inbuilt check
EOFA.refresh(latt, eta);
EOFA.S(latt);
std::cout << GridLogMessage << "Finished EOFA upper action/bounds check" << std::endl;
}
template<typename FermionImplPolicy>
class EOFAlinop: public LinearOperatorBase<typename FermionImplPolicy::FermionField>{
ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> &EOFA;
LatticeGaugeFieldD &U;
public:
EOFAlinop(ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> &EOFA, LatticeGaugeFieldD &U): EOFA(EOFA), U(U){}
typedef typename FermionImplPolicy::FermionField Field;
void OpDiag (const Field &in, Field &out){ assert(0); }
void OpDir (const Field &in, Field &out,int dir,int disp){ assert(0); }
void OpDirAll (const Field &in, std::vector<Field> &out){ assert(0); }
void Op (const Field &in, Field &out){ assert(0); }
void AdjOp (const Field &in, Field &out){ assert(0); }
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ assert(0); }
void HermOp(const Field &in, Field &out){ EOFA.Meofa(U, in, out); }
};
template<typename FermionImplPolicy>
void upperBoundEOFA(ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> &EOFA,
GridCartesian* FGrid, GridParallelRNG &rng, LatticeGaugeFieldD &latt){
std::cout << GridLogMessage << "Starting EOFA upper bound compute" << std::endl;
EOFAlinop<FermionImplPolicy> linop(EOFA, latt);
typename FermionImplPolicy::FermionField eta(FGrid);
gaussian(rng,eta);
PowerMethod<typename FermionImplPolicy::FermionField> power_method;
auto lambda_max = power_method(linop,eta);
std::cout << GridLogMessage << "Upper bound of EOFA operator " << lambda_max << std::endl;
}
//Applications of M^{-1} cost the same as M for EOFA!
template<typename FermionImplPolicy>
class EOFAinvLinop: public LinearOperatorBase<typename FermionImplPolicy::FermionField>{
ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> &EOFA;
LatticeGaugeFieldD &U;
public:
EOFAinvLinop(ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> &EOFA, LatticeGaugeFieldD &U): EOFA(EOFA), U(U){}
typedef typename FermionImplPolicy::FermionField Field;
void OpDiag (const Field &in, Field &out){ assert(0); }
void OpDir (const Field &in, Field &out,int dir,int disp){ assert(0); }
void OpDirAll (const Field &in, std::vector<Field> &out){ assert(0); }
void Op (const Field &in, Field &out){ assert(0); }
void AdjOp (const Field &in, Field &out){ assert(0); }
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ assert(0); }
void HermOp(const Field &in, Field &out){ EOFA.MeofaInv(U, in, out); }
};
template<typename FermionImplPolicy>
void lowerBoundEOFA(ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> &EOFA,
GridCartesian* FGrid, GridParallelRNG &rng, LatticeGaugeFieldD &latt){
std::cout << GridLogMessage << "Starting EOFA lower bound compute using power method on M^{-1}. Inverse of highest eigenvalue is the lowest eigenvalue of M" << std::endl;
EOFAinvLinop<FermionImplPolicy> linop(EOFA, latt);
typename FermionImplPolicy::FermionField eta(FGrid);
gaussian(rng,eta);
PowerMethod<typename FermionImplPolicy::FermionField> power_method;
auto lambda_max = power_method(linop,eta);
std::cout << GridLogMessage << "Lower bound of EOFA operator " << 1./lambda_max << std::endl;
}
NAMESPACE_BEGIN(Grid);
template<class FermionOperatorD, class FermionOperatorF, class SchurOperatorD, class SchurOperatorF>
class MixedPrecisionConjugateGradientOperatorFunction : public OperatorFunction<typename FermionOperatorD::FermionField> {
public:
typedef typename FermionOperatorD::FermionField FieldD;
typedef typename FermionOperatorF::FermionField FieldF;
using OperatorFunction<FieldD>::operator();
RealD Tolerance;
RealD InnerTolerance; //Initial tolerance for inner CG. Defaults to Tolerance but can be changed
Integer MaxInnerIterations;
Integer MaxOuterIterations;
GridBase* SinglePrecGrid4; //Grid for single-precision fields
GridBase* SinglePrecGrid5; //Grid for single-precision fields
RealD OuterLoopNormMult; //Stop the outer loop and move to a final double prec solve when the residual is OuterLoopNormMult * Tolerance
FermionOperatorF &FermOpF;
FermionOperatorD &FermOpD;;
SchurOperatorF &LinOpF;
SchurOperatorD &LinOpD;
Integer TotalInnerIterations; //Number of inner CG iterations
Integer TotalOuterIterations; //Number of restarts
Integer TotalFinalStepIterations; //Number of CG iterations in final patch-up step
MixedPrecisionConjugateGradientOperatorFunction(RealD tol,
Integer maxinnerit,
Integer maxouterit,
GridBase* _sp_grid4,
GridBase* _sp_grid5,
FermionOperatorF &_FermOpF,
FermionOperatorD &_FermOpD,
SchurOperatorF &_LinOpF,
SchurOperatorD &_LinOpD):
LinOpF(_LinOpF),
LinOpD(_LinOpD),
FermOpF(_FermOpF),
FermOpD(_FermOpD),
Tolerance(tol),
InnerTolerance(tol),
MaxInnerIterations(maxinnerit),
MaxOuterIterations(maxouterit),
SinglePrecGrid4(_sp_grid4),
SinglePrecGrid5(_sp_grid5),
OuterLoopNormMult(100.)
{
};
void operator()(LinearOperatorBase<FieldD> &LinOpU, const FieldD &src, FieldD &psi) {
std::cout << GridLogMessage << " Mixed precision CG wrapper operator() "<<std::endl;
SchurOperatorD * SchurOpU = static_cast<SchurOperatorD *>(&LinOpU);
assert(&(SchurOpU->_Mat)==&(LinOpD._Mat));
precisionChange(FermOpF.Umu, FermOpD.Umu);
pickCheckerboard(Even,FermOpF.UmuEven,FermOpF.Umu);
pickCheckerboard(Odd ,FermOpF.UmuOdd ,FermOpF.Umu);
////////////////////////////////////////////////////////////////////////////////////
// Make a mixed precision conjugate gradient
////////////////////////////////////////////////////////////////////////////////////
MixedPrecisionConjugateGradient<FieldD,FieldF> MPCG(Tolerance,MaxInnerIterations,MaxOuterIterations,SinglePrecGrid5,LinOpF,LinOpD);
MPCG.InnerTolerance = InnerTolerance;
std::cout << GridLogMessage << "Calling mixed precision Conjugate Gradient" <<std::endl;
MPCG(src,psi);
}
};
template<class FermionOperatorD, class FermionOperatorF, class SchurOperatorD, class SchurOperatorF>
class MixedPrecisionReliableUpdateConjugateGradientOperatorFunction : public OperatorFunction<typename FermionOperatorD::FermionField> {
public:
typedef typename FermionOperatorD::FermionField FieldD;
typedef typename FermionOperatorF::FermionField FieldF;
using OperatorFunction<FieldD>::operator();
RealD Tolerance;
Integer MaxIterations;
RealD Delta; //reliable update parameter
GridBase* SinglePrecGrid4; //Grid for single-precision fields
GridBase* SinglePrecGrid5; //Grid for single-precision fields
FermionOperatorF &FermOpF;
FermionOperatorD &FermOpD;;
SchurOperatorF &LinOpF;
SchurOperatorD &LinOpD;
MixedPrecisionReliableUpdateConjugateGradientOperatorFunction(RealD tol,
RealD delta,
Integer maxit,
GridBase* _sp_grid4,
GridBase* _sp_grid5,
FermionOperatorF &_FermOpF,
FermionOperatorD &_FermOpD,
SchurOperatorF &_LinOpF,
SchurOperatorD &_LinOpD):
LinOpF(_LinOpF),
LinOpD(_LinOpD),
FermOpF(_FermOpF),
FermOpD(_FermOpD),
Tolerance(tol),
Delta(delta),
MaxIterations(maxit),
SinglePrecGrid4(_sp_grid4),
SinglePrecGrid5(_sp_grid5)
{
};
void operator()(LinearOperatorBase<FieldD> &LinOpU, const FieldD &src, FieldD &psi) {
std::cout << GridLogMessage << " Mixed precision reliable CG update wrapper operator() "<<std::endl;
SchurOperatorD * SchurOpU = static_cast<SchurOperatorD *>(&LinOpU);
assert(&(SchurOpU->_Mat)==&(LinOpD._Mat));
precisionChange(FermOpF.Umu, FermOpD.Umu);
pickCheckerboard(Even,FermOpF.UmuEven,FermOpF.Umu);
pickCheckerboard(Odd ,FermOpF.UmuOdd ,FermOpF.Umu);
////////////////////////////////////////////////////////////////////////////////////
// Make a mixed precision conjugate gradient
////////////////////////////////////////////////////////////////////////////////////
ConjugateGradientReliableUpdate<FieldD,FieldF> MPCG(Tolerance,MaxIterations,Delta,SinglePrecGrid5,LinOpF,LinOpD);
std::cout << GridLogMessage << "Calling mixed precision reliable update Conjugate Gradient" <<std::endl;
MPCG(src,psi);
}
};
NAMESPACE_END(Grid);
int main(int argc, char **argv) {
Grid_init(&argc, &argv);
int threads = GridThread::GetThreads();
// here make a routine to print all the relevant information on the run
std::cout << GridLogMessage << "Grid is setup to use " << threads << " threads" << std::endl;
std::string param_file = "params.xml";
bool file_load_check = false;
std::string serial_seeds = "1 2 3 4 5";
std::string parallel_seeds = "6 7 8 9 10";
int i=1;
while(i < argc){
std::string sarg(argv[i]);
if(sarg == "--param_file"){
assert(i!=argc-1);
param_file = argv[i+1];
i+=2;
}else if(sarg == "--read_check"){ //check the fields load correctly and pass checksum/plaquette repro
file_load_check = true;
i++;
}else if(sarg == "--set_seeds"){ //set the rng seeds. Expects two vector args, e.g. --set_seeds 1.2.3.4 5.6.7.8
assert(i < argc-2);
std::vector<int> tmp;
GridCmdOptionIntVector(argv[i+1],tmp);
{
std::stringstream ss;
for(int j=0;j<tmp.size()-1;j++) ss << tmp[j] << " ";
ss << tmp.back();
serial_seeds = ss.str();
}
GridCmdOptionIntVector(argv[i+2],tmp);
{
std::stringstream ss;
for(int j=0;j<tmp.size()-1;j++) ss << tmp[j] << " ";
ss << tmp.back();
parallel_seeds = ss.str();
}
i+=3;
std::cout << GridLogMessage << "Set serial seeds to " << serial_seeds << std::endl;
std::cout << GridLogMessage << "Set parallel seeds to " << parallel_seeds << std::endl;
}else{
i++;
}
}
//Read the user parameters
EvolParameters user_params;
if(fileExists(param_file)){
std::cout << GridLogMessage << " Reading " << param_file << std::endl;
Grid::XmlReader rd(param_file);
read(rd, "Params", user_params);
}else if(!GlobalSharedMemory::WorldRank){
std::cout << GridLogMessage << " File " << param_file << " does not exist" << std::endl;
std::cout << GridLogMessage << " Writing xml template to " << param_file << ".templ" << std::endl;
{
Grid::XmlWriter wr(param_file + ".templ");
write(wr, "Params", user_params);
}
std::cout << GridLogMessage << " Done" << std::endl;
Grid_finalize();
return 0;
}
//Check the parameters
if(user_params.GparityDirs.size() != Nd-1){
std::cerr << "Error in input parameters: expect GparityDirs to have size = " << Nd-1 << std::endl;
exit(1);
}
for(int i=0;i<Nd-1;i++)
if(user_params.GparityDirs[i] != 0 && user_params.GparityDirs[i] != 1){
std::cerr << "Error in input parameters: expect GparityDirs values to be 0 (periodic) or 1 (G-parity)" << std::endl;
exit(1);
}
typedef GparityMobiusEOFAFermionD EOFAactionD;
typedef GparityMobiusFermionD FermionActionD;
typedef typename FermionActionD::Impl_t FermionImplPolicyD;
typedef typename FermionActionD::FermionField FermionFieldD;
typedef GparityMobiusEOFAFermionF EOFAactionF;
typedef GparityMobiusFermionF FermionActionF;
typedef typename FermionActionF::Impl_t FermionImplPolicyF;
typedef typename FermionActionF::FermionField FermionFieldF;
typedef GeneralEvenOddRatioRationalMixedPrecPseudoFermionAction<FermionImplPolicyD,FermionImplPolicyF> MixedPrecRHMC;
typedef GeneralEvenOddRatioRationalPseudoFermionAction<FermionImplPolicyD> DoublePrecRHMC;
//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
IntegratorParameters MD;
typedef ConjugateHMCRunnerD<MinimumNorm2> HMCWrapper; //NB: This is the "Omelyan integrator"
MD.name = std::string("MinimumNorm2");
// typedef ConjugateHMCRunnerD<ForceGradient> HMCWrapper;
// MD.name = std::string("ForceGradient");
MD.MDsteps = user_params.Steps;
MD.trajL = user_params.TrajectoryLength;
typedef HMCWrapper::ImplPolicy GaugeImplPolicy;
HMCparameters HMCparams;
HMCparams.StartTrajectory = user_params.StartTrajectory;
HMCparams.Trajectories = user_params.Trajectories;
HMCparams.NoMetropolisUntil= 0;
HMCparams.StartingType = user_params.StartingType;
HMCparams.MetropolisTest = user_params.MetropolisTest;
HMCparams.MD = MD;
HMCWrapper TheHMC(HMCparams);
// Grid from the command line arguments --grid and --mpi
TheHMC.Resources.AddFourDimGrid("gauge"); // use default simd lanes decomposition
CheckpointerParameters CPparams;
CPparams.config_prefix = "ckpoint_lat";
CPparams.rng_prefix = "ckpoint_rng";
CPparams.saveInterval = user_params.SaveInterval;
CPparams.format = "IEEE64BIG";
TheHMC.Resources.LoadNerscCheckpointer(CPparams);
//Note that checkpointing saves the RNG state so that this initialization is required only for the very first configuration
RNGModuleParameters RNGpar;
RNGpar.serial_seeds = serial_seeds;
RNGpar.parallel_seeds = parallel_seeds;
TheHMC.Resources.SetRNGSeeds(RNGpar);
typedef PlaquetteMod<GaugeImplPolicy> PlaqObs;
TheHMC.Resources.AddObservable<PlaqObs>();
//////////////////////////////////////////////
//aiming for ainv=1.723 GeV
// me bob
//Estimated a(ml+mres) [40ID] = 0.001305 0.00131
// a(mh+mres) [40ID] = 0.035910 0.03529
//Estimate Ls=12, b+c=2 mres~0.0011
//1/24/2022 initial mres measurement gives mres=0.001, adjusted light quark mass to 0.0003 from 0.0001
const int Ls = 12;
Real beta = 1.848;
Real light_mass = 0.0003;
Real strange_mass = 0.0342;
Real pv_mass = 1.0;
RealD M5 = 1.8;
RealD mobius_scale = 2.; //b+c
RealD mob_bmc = 1.0;
RealD mob_b = (mobius_scale + mob_bmc)/2.;
RealD mob_c = (mobius_scale - mob_bmc)/2.;
std::cout << GridLogMessage
<< "Ensemble parameters:" << std::endl
<< "Ls=" << Ls << std::endl
<< "beta=" << beta << std::endl
<< "light_mass=" << light_mass << std::endl
<< "strange_mass=" << strange_mass << std::endl
<< "mobius_scale=" << mobius_scale << std::endl;
//Setup the Grids
auto UGridD = TheHMC.Resources.GetCartesian();
auto UrbGridD = TheHMC.Resources.GetRBCartesian();
auto FGridD = SpaceTimeGrid::makeFiveDimGrid(Ls,UGridD);
auto FrbGridD = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGridD);
GridCartesian* UGridF = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd, vComplexF::Nsimd()), GridDefaultMpi());
GridRedBlackCartesian* UrbGridF = SpaceTimeGrid::makeFourDimRedBlackGrid(UGridF);
auto FGridF = SpaceTimeGrid::makeFiveDimGrid(Ls,UGridF);
auto FrbGridF = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGridF);
ConjugateIwasakiGaugeActionD GaugeAction(beta);
// temporarily need a gauge field
LatticeGaugeFieldD Ud(UGridD);
LatticeGaugeFieldF Uf(UGridF);
//Setup the BCs
FermionActionD::ImplParams Params;
for(int i=0;i<Nd-1;i++) Params.twists[i] = user_params.GparityDirs[i]; //G-parity directions
Params.twists[Nd-1] = 1; //APBC in time direction
std::vector<int> dirs4(Nd);
for(int i=0;i<Nd-1;i++) dirs4[i] = user_params.GparityDirs[i];
dirs4[Nd-1] = 0; //periodic gauge BC in time
GaugeImplPolicy::setDirections(dirs4); //gauge BC
//Run optional gauge field checksum checker and exit
if(file_load_check){
TheHMC.initializeGaugeFieldAndRNGs(Ud);
std::cout << GridLogMessage << " Done" << std::endl;
Grid_finalize();
return 0;
}
////////////////////////////////////
// Collect actions
////////////////////////////////////
ActionLevel<HMCWrapper::Field> Level1(1); //light quark + strange quark
ActionLevel<HMCWrapper::Field> Level2(4); //DSDR
ActionLevel<HMCWrapper::Field> Level3(2); //gauge
/////////////////////////////////////////////////////////////
// Light EOFA action
// have to be careful with the parameters, cf. Test_dwf_gpforce_eofa.cc
/////////////////////////////////////////////////////////////
typedef SchurDiagMooeeOperator<EOFAactionD,FermionFieldD> EOFAschuropD;
typedef SchurDiagMooeeOperator<EOFAactionF,FermionFieldF> EOFAschuropF;
typedef ExactOneFlavourRatioMixedPrecHeatbathPseudoFermionAction<FermionImplPolicyD, FermionImplPolicyF> EOFAmixPrecPFaction;
typedef MixedPrecisionConjugateGradientOperatorFunction<EOFAactionD, EOFAactionF, EOFAschuropD, EOFAschuropF> EOFA_mxCG;
typedef MixedPrecisionReliableUpdateConjugateGradientOperatorFunction<EOFAactionD, EOFAactionF, EOFAschuropD, EOFAschuropF> EOFA_relupCG;
std::vector<RealD> eofa_light_masses = { light_mass , 0.004, 0.016, 0.064, 0.256 };
std::vector<RealD> eofa_pv_masses = { 0.004 , 0.016, 0.064, 0.256, 1.0 };
int n_light_hsb = 5;
assert(user_params.eofa_l.size() == n_light_hsb);
EOFAmixPrecPFaction* EOFA_pfactions[n_light_hsb];
for(int i=0;i<n_light_hsb;i++){
RealD iml = eofa_light_masses[i];
RealD ipv = eofa_pv_masses[i];
EOFAactionD* LopD = new EOFAactionD(Ud, *FGridD, *FrbGridD, *UGridD, *UrbGridD, iml, iml, ipv, 0.0, -1, M5, mob_b, mob_c, Params);
EOFAactionF* LopF = new EOFAactionF(Uf, *FGridF, *FrbGridF, *UGridF, *UrbGridF, iml, iml, ipv, 0.0, -1, M5, mob_b, mob_c, Params);
EOFAactionD* RopD = new EOFAactionD(Ud, *FGridD, *FrbGridD, *UGridD, *UrbGridD, ipv, iml, ipv, -1.0, 1, M5, mob_b, mob_c, Params);
EOFAactionF* RopF = new EOFAactionF(Uf, *FGridF, *FrbGridF, *UGridF, *UrbGridF, ipv, iml, ipv, -1.0, 1, M5, mob_b, mob_c, Params);
EOFAschuropD* linopL_D = new EOFAschuropD(*LopD);
EOFAschuropD* linopR_D = new EOFAschuropD(*RopD);
EOFAschuropF* linopL_F = new EOFAschuropF(*LopF);
EOFAschuropF* linopR_F = new EOFAschuropF(*RopF);
#if 1
//Note reusing user_params.eofa_l.action(|md)_mixcg_inner_tolerance as Delta for now
EOFA_relupCG* ActionMCG_L = new EOFA_relupCG(user_params.eofa_l[i].action_tolerance, user_params.eofa_l[i].action_mixcg_inner_tolerance, 50000, UGridF, FrbGridF, *LopF, *LopD, *linopL_F, *linopL_D);
EOFA_relupCG* ActionMCG_R = new EOFA_relupCG(user_params.eofa_l[i].action_tolerance, user_params.eofa_l[i].action_mixcg_inner_tolerance, 50000, UGridF, FrbGridF, *RopF, *RopD, *linopR_F, *linopR_D);
EOFA_relupCG* DerivMCG_L = new EOFA_relupCG(user_params.eofa_l[i].md_tolerance, user_params.eofa_l[i].md_mixcg_inner_tolerance, 50000, UGridF, FrbGridF, *LopF, *LopD, *linopL_F, *linopL_D);
EOFA_relupCG* DerivMCG_R = new EOFA_relupCG(user_params.eofa_l[i].md_tolerance, user_params.eofa_l[i].md_mixcg_inner_tolerance, 50000, UGridF, FrbGridF, *RopF, *RopD, *linopR_F, *linopR_D);
#else
EOFA_mxCG* ActionMCG_L = new EOFA_mxCG(user_params.eofa_l[i].action_tolerance, 50000, 1000, UGridF, FrbGridF, *LopF, *LopD, *linopL_F, *linopL_D);
ActionMCG_L->InnerTolerance = user_params.eofa_l[i].action_mixcg_inner_tolerance;
EOFA_mxCG* ActionMCG_R = new EOFA_mxCG(user_params.eofa_l[i].action_tolerance, 50000, 1000, UGridF, FrbGridF, *RopF, *RopD, *linopR_F, *linopR_D);
ActionMCG_R->InnerTolerance = user_params.eofa_l[i].action_mixcg_inner_tolerance;
EOFA_mxCG* DerivMCG_L = new EOFA_mxCG(user_params.eofa_l[i].md_tolerance, 50000, 1000, UGridF, FrbGridF, *LopF, *LopD, *linopL_F, *linopL_D);
DerivMCG_L->InnerTolerance = user_params.eofa_l[i].md_mixcg_inner_tolerance;
EOFA_mxCG* DerivMCG_R = new EOFA_mxCG(user_params.eofa_l[i].md_tolerance, 50000, 1000, UGridF, FrbGridF, *RopF, *RopD, *linopR_F, *linopR_D);
DerivMCG_R->InnerTolerance = user_params.eofa_l[i].md_mixcg_inner_tolerance;
std::cout << GridLogMessage << "Set EOFA action solver action tolerance outer=" << ActionMCG_L->Tolerance << " inner=" << ActionMCG_L->InnerTolerance << std::endl;
std::cout << GridLogMessage << "Set EOFA MD solver tolerance outer=" << DerivMCG_L->Tolerance << " inner=" << DerivMCG_L->InnerTolerance << std::endl;
#endif
EOFAmixPrecPFaction* EOFA = new EOFAmixPrecPFaction(*LopF, *RopF,
*LopD, *RopD,
*ActionMCG_L, *ActionMCG_R,
*ActionMCG_L, *ActionMCG_R,
*DerivMCG_L, *DerivMCG_R,
user_params.eofa_l[i].rat_params, true);
EOFA_pfactions[i] = EOFA;
Level1.push_back(EOFA);
}
////////////////////////////////////
// Strange action
////////////////////////////////////
FermionActionD Numerator_sD(Ud,*FGridD,*FrbGridD,*UGridD,*UrbGridD,strange_mass,M5,mob_b,mob_c,Params);
FermionActionD Denominator_sD(Ud,*FGridD,*FrbGridD,*UGridD,*UrbGridD, pv_mass,M5,mob_b,mob_c,Params);
FermionActionF Numerator_sF(Uf,*FGridF,*FrbGridF,*UGridF,*UrbGridF,strange_mass,M5,mob_b,mob_c,Params);
FermionActionF Denominator_sF(Uf,*FGridF,*FrbGridF,*UGridF,*UrbGridF, pv_mass,M5,mob_b,mob_c,Params);
RationalActionParams rat_act_params_s;
rat_act_params_s.inv_pow = 4; // (M^dag M)^{1/4}
rat_act_params_s.precision= 60;
rat_act_params_s.MaxIter = 50000;
user_params.rat_quo_s.Export(rat_act_params_s);
std::cout << GridLogMessage << " Heavy quark bounds check every " << rat_act_params_s.BoundsCheckFreq << " trajectories (avg)" << std::endl;
//MixedPrecRHMC Quotient_s(Denominator_sD, Numerator_sD, Denominator_sF, Numerator_sF, rat_act_params_s, user_params.rat_quo_s.reliable_update_freq);
DoublePrecRHMC Quotient_s(Denominator_sD, Numerator_sD, rat_act_params_s);
Level1.push_back(&Quotient_s);
///////////////////////////////////
// DSDR action
///////////////////////////////////
RealD dsdr_mass=-1.8;
//Use same DSDR twists as https://arxiv.org/pdf/1208.4412.pdf
RealD dsdr_epsilon_f = 0.02; //numerator (in determinant)
RealD dsdr_epsilon_b = 0.5;
GparityWilsonTMFermionD Numerator_DSDR_D(Ud, *UGridD, *UrbGridD, dsdr_mass, dsdr_epsilon_f, Params);
GparityWilsonTMFermionF Numerator_DSDR_F(Uf, *UGridF, *UrbGridF, dsdr_mass, dsdr_epsilon_f, Params);
GparityWilsonTMFermionD Denominator_DSDR_D(Ud, *UGridD, *UrbGridD, dsdr_mass, dsdr_epsilon_b, Params);
GparityWilsonTMFermionF Denominator_DSDR_F(Uf, *UGridF, *UrbGridF, dsdr_mass, dsdr_epsilon_b, Params);
RationalActionParams rat_act_params_DSDR;
rat_act_params_DSDR.inv_pow = 2; // (M^dag M)^{1/2}
rat_act_params_DSDR.precision= 60;
rat_act_params_DSDR.MaxIter = 50000;
user_params.rat_quo_DSDR.Export(rat_act_params_DSDR);
std::cout << GridLogMessage << "DSDR quark bounds check every " << rat_act_params_DSDR.BoundsCheckFreq << " trajectories (avg)" << std::endl;
DoublePrecRHMC Quotient_DSDR(Denominator_DSDR_D, Numerator_DSDR_D, rat_act_params_DSDR);
Level2.push_back(&Quotient_DSDR);
/////////////////////////////////////////////////////////////
// Gauge action
/////////////////////////////////////////////////////////////
Level3.push_back(&GaugeAction);
TheHMC.TheAction.push_back(Level1);
TheHMC.TheAction.push_back(Level2);
TheHMC.TheAction.push_back(Level3);
std::cout << GridLogMessage << " Action complete "<< std::endl;
//Action tuning
bool
tune_rhmc_s=false, eigenrange_s=false,
tune_rhmc_DSDR=false, eigenrange_DSDR=false,
check_eofa=false,
upper_bound_eofa=false, lower_bound_eofa(false);
std::string lanc_params_s;
std::string lanc_params_DSDR;
int tune_rhmc_s_action_or_md;
int tune_rhmc_DSDR_action_or_md;
int eofa_which_hsb;
for(int i=1;i<argc;i++){
std::string sarg(argv[i]);
if(sarg == "--tune_rhmc_s"){
assert(i < argc-1);
tune_rhmc_s=true;
tune_rhmc_s_action_or_md = std::stoi(argv[i+1]);
}
else if(sarg == "--eigenrange_s"){
assert(i < argc-1);
eigenrange_s=true;
lanc_params_s = argv[i+1];
}
else if(sarg == "--tune_rhmc_DSDR"){
assert(i < argc-1);
tune_rhmc_DSDR=true;
tune_rhmc_DSDR_action_or_md = std::stoi(argv[i+1]);
}
else if(sarg == "--eigenrange_DSDR"){
assert(i < argc-1);
eigenrange_DSDR=true;
lanc_params_DSDR = argv[i+1];
}
else if(sarg == "--check_eofa"){
assert(i < argc-1);
check_eofa = true;
eofa_which_hsb = std::stoi(argv[i+1]); //-1 indicates all hasenbusch
assert(eofa_which_hsb == -1 || (eofa_which_hsb >= 0 && eofa_which_hsb < n_light_hsb) );
}
else if(sarg == "--upper_bound_eofa"){
assert(i < argc-1);
upper_bound_eofa = true;
eofa_which_hsb = std::stoi(argv[i+1]);
assert(eofa_which_hsb >= 0 && eofa_which_hsb < n_light_hsb);
}
else if(sarg == "--lower_bound_eofa"){
assert(i < argc-1);
lower_bound_eofa = true;
eofa_which_hsb = std::stoi(argv[i+1]);
assert(eofa_which_hsb >= 0 && eofa_which_hsb < n_light_hsb);
}
}
if(tune_rhmc_s || eigenrange_s || tune_rhmc_DSDR || eigenrange_DSDR ||check_eofa || upper_bound_eofa || lower_bound_eofa) {
std::cout << GridLogMessage << "Running checks" << std::endl;
TheHMC.initializeGaugeFieldAndRNGs(Ud);
//std::cout << GridLogMessage << "EOFA action solver action tolerance outer=" << ActionMCG_L.Tolerance << " inner=" << ActionMCG_L.InnerTolerance << std::endl;
//std::cout << GridLogMessage << "EOFA MD solver tolerance outer=" << DerivMCG_L.Tolerance << " inner=" << DerivMCG_L.InnerTolerance << std::endl;
if(check_eofa){
if(eofa_which_hsb >= 0){
std::cout << GridLogMessage << "Starting checking EOFA Hasenbusch " << eofa_which_hsb << std::endl;
checkEOFA(*EOFA_pfactions[eofa_which_hsb], FGridD, TheHMC.Resources.GetParallelRNG(), Ud);
std::cout << GridLogMessage << "Finished checking EOFA Hasenbusch " << eofa_which_hsb << std::endl;
}else{
for(int i=0;i<n_light_hsb;i++){
std::cout << GridLogMessage << "Starting checking EOFA Hasenbusch " << i << std::endl;
checkEOFA(*EOFA_pfactions[i], FGridD, TheHMC.Resources.GetParallelRNG(), Ud);
std::cout << GridLogMessage << "Finished checking EOFA Hasenbusch " << i << std::endl;
}
}
}
if(upper_bound_eofa) upperBoundEOFA(*EOFA_pfactions[eofa_which_hsb], FGridD, TheHMC.Resources.GetParallelRNG(), Ud);
if(lower_bound_eofa) lowerBoundEOFA(*EOFA_pfactions[eofa_which_hsb], FGridD, TheHMC.Resources.GetParallelRNG(), Ud);
if(eigenrange_s) computeEigenvalues<FermionActionD, FermionFieldD>(lanc_params_s, FGridD, FrbGridD, Ud, Numerator_sD, TheHMC.Resources.GetParallelRNG());
if(tune_rhmc_s) checkRHMC<FermionActionD, FermionFieldD, decltype(Quotient_s)>(FGridD, FrbGridD, Ud, Numerator_sD, Denominator_sD, Quotient_s, TheHMC.Resources.GetParallelRNG(), 4, "strange", tune_rhmc_s_action_or_md);
if(eigenrange_DSDR) computeEigenvalues<GparityWilsonTMFermionD, GparityWilsonTMFermionD::FermionField>(lanc_params_DSDR, UGridD, UrbGridD, Ud, Numerator_DSDR_D, TheHMC.Resources.GetParallelRNG());
if(tune_rhmc_DSDR) checkRHMC<GparityWilsonTMFermionD, GparityWilsonTMFermionD::FermionField, decltype(Quotient_DSDR)>(UGridD, UrbGridD, Ud, Numerator_DSDR_D, Denominator_DSDR_D, Quotient_DSDR, TheHMC.Resources.GetParallelRNG(), 2, "DSDR", tune_rhmc_DSDR_action_or_md);
std::cout << GridLogMessage << " Done" << std::endl;
Grid_finalize();
return 0;
}
//Run the HMC
std::cout << GridLogMessage << " Running the HMC "<< std::endl;
TheHMC.Run();
std::cout << GridLogMessage << " Done" << std::endl;
Grid_finalize();
return 0;
} // main

View File

@ -1,873 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./HMC/Mobius2p1fIDSDRGparityEOFA.cc
Copyright (C) 2015-2016
Author: Christopher Kelly <ckelly@bnl.gov>
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
using namespace Grid;
//Production binary for the 40ID G-parity ensemble
struct RatQuoParameters: Serializable {
GRID_SERIALIZABLE_CLASS_MEMBERS(RatQuoParameters,
double, bnd_lo,
double, bnd_hi,
Integer, action_degree,
double, action_tolerance,
Integer, md_degree,
double, md_tolerance,
Integer, reliable_update_freq,
Integer, bnd_check_freq);
RatQuoParameters() {
bnd_lo = 1e-2;
bnd_hi = 30;
action_degree = 10;
action_tolerance = 1e-10;
md_degree = 10;
md_tolerance = 1e-8;
bnd_check_freq = 20;
reliable_update_freq = 50;
}
void Export(RationalActionParams &into) const{
into.lo = bnd_lo;
into.hi = bnd_hi;
into.action_degree = action_degree;
into.action_tolerance = action_tolerance;
into.md_degree = md_degree;
into.md_tolerance = md_tolerance;
into.BoundsCheckFreq = bnd_check_freq;
}
};
struct EOFAparameters: Serializable {
GRID_SERIALIZABLE_CLASS_MEMBERS(EOFAparameters,
OneFlavourRationalParams, rat_params,
double, action_tolerance,
double, action_mixcg_inner_tolerance,
double, md_tolerance,
double, md_mixcg_inner_tolerance);
EOFAparameters() {
action_mixcg_inner_tolerance = 1e-8;
action_tolerance = 1e-10;
md_tolerance = 1e-8;
md_mixcg_inner_tolerance = 1e-8;
rat_params.lo = 1.0;
rat_params.hi = 25.0;
rat_params.MaxIter = 10000;
rat_params.tolerance= 1.0e-9;
rat_params.degree = 14;
rat_params.precision= 50;
}
};
struct EvolParameters: Serializable {
GRID_SERIALIZABLE_CLASS_MEMBERS(EvolParameters,
Integer, StartTrajectory,
Integer, Trajectories,
Integer, SaveInterval,
Integer, Steps,
RealD, TrajectoryLength,
bool, MetropolisTest,
std::string, StartingType,
std::vector<Integer>, GparityDirs,
std::vector<EOFAparameters>, eofa_l,
RatQuoParameters, rat_quo_s,
RatQuoParameters, rat_quo_DSDR);
EvolParameters() {
//For initial thermalization; afterwards user should switch Metropolis on and use StartingType=CheckpointStart
MetropolisTest = false;
StartTrajectory = 0;
Trajectories = 50;
SaveInterval = 5;
StartingType = "ColdStart";
GparityDirs.resize(3, 1); //1 for G-parity, 0 for periodic
Steps = 5;
TrajectoryLength = 1.0;
}
};
bool fileExists(const std::string &fn){
std::ifstream f(fn);
return f.good();
}
struct LanczosParameters: Serializable {
GRID_SERIALIZABLE_CLASS_MEMBERS(LanczosParameters,
double, alpha,
double, beta,
double, mu,
int, ord,
int, n_stop,
int, n_want,
int, n_use,
double, tolerance);
LanczosParameters() {
alpha = 35;
beta = 5;
mu = 0;
ord = 100;
n_stop = 10;
n_want = 10;
n_use = 15;
tolerance = 1e-6;
}
};
template<typename FermionActionD, typename FermionFieldD>
void computeEigenvalues(std::string param_file,
GridCartesian* Grid, GridRedBlackCartesian* rbGrid, const LatticeGaugeFieldD &latt, //expect lattice to have been initialized to something
FermionActionD &action, GridParallelRNG &rng){
LanczosParameters params;
if(fileExists(param_file)){
std::cout << GridLogMessage << " Reading " << param_file << std::endl;
Grid::XmlReader rd(param_file);
read(rd, "LanczosParameters", params);
}else if(!GlobalSharedMemory::WorldRank){
std::cout << GridLogMessage << " File " << param_file << " does not exist" << std::endl;
std::cout << GridLogMessage << " Writing xml template to " << param_file << ".templ" << std::endl;
Grid::XmlWriter wr(param_file + ".templ");
write(wr, "LanczosParameters", params);
}
FermionFieldD gauss_o(rbGrid);
FermionFieldD gauss(Grid);
gaussian(rng, gauss);
pickCheckerboard(Odd, gauss_o, gauss);
action.ImportGauge(latt);
SchurDiagMooeeOperator<FermionActionD, FermionFieldD> hermop(action);
PlainHermOp<FermionFieldD> hermop_wrap(hermop);
//ChebyshevLanczos<FermionFieldD> Cheb(params.alpha, params.beta, params.mu, params.ord);
assert(params.mu == 0.0);
Chebyshev<FermionFieldD> Cheb(params.beta*params.beta, params.alpha*params.alpha, params.ord+1);
FunctionHermOp<FermionFieldD> Cheb_wrap(Cheb, hermop);
std::cout << "IRL: alpha=" << params.alpha << " beta=" << params.beta << " mu=" << params.mu << " ord=" << params.ord << std::endl;
ImplicitlyRestartedLanczos<FermionFieldD> IRL(Cheb_wrap, hermop_wrap, params.n_stop, params.n_want, params.n_use, params.tolerance, 10000);
std::vector<RealD> eval(params.n_use);
std::vector<FermionFieldD> evec(params.n_use, rbGrid);
int Nconv;
IRL.calc(eval, evec, gauss_o, Nconv);
std::cout << "Eigenvalues:" << std::endl;
for(int i=0;i<params.n_want;i++){
std::cout << i << " " << eval[i] << std::endl;
}
}
//Check the quality of the RHMC approx
//action_or_md toggles checking the action (0), MD (1) or both (2) setups
template<typename FermionActionD, typename FermionFieldD, typename RHMCtype>
void checkRHMC(GridCartesian* Grid, GridRedBlackCartesian* rbGrid, const LatticeGaugeFieldD &latt, //expect lattice to have been initialized to something
FermionActionD &numOp, FermionActionD &denOp, RHMCtype &rhmc, GridParallelRNG &rng,
int inv_pow, const std::string &quark_descr, int action_or_md){
assert(action_or_md == 0 || action_or_md == 1 || action_or_md == 2);
FermionFieldD gauss_o(rbGrid);
FermionFieldD gauss(Grid);
gaussian(rng, gauss);
pickCheckerboard(Odd, gauss_o, gauss);
numOp.ImportGauge(latt);
denOp.ImportGauge(latt);
typedef typename FermionActionD::Impl_t FermionImplPolicyD;
SchurDifferentiableOperator<FermionImplPolicyD> MdagM(numOp);
SchurDifferentiableOperator<FermionImplPolicyD> VdagV(denOp);
PowerMethod<FermionFieldD> power_method;
RealD lambda_max;
std::cout << "Starting: Get RHMC high bound approx for " << quark_descr << " numerator" << std::endl;
lambda_max = power_method(MdagM,gauss_o);
std::cout << GridLogMessage << "Got lambda_max "<<lambda_max<<std::endl;
std::cout << "Starting: Get RHMC high bound approx for " << quark_descr << " denominator" << std::endl;
lambda_max = power_method(VdagV,gauss_o);
std::cout << GridLogMessage << "Got lambda_max "<<lambda_max<<std::endl;
if(action_or_md == 0 || action_or_md == 2){
std::cout << "Starting: Checking quality of RHMC action approx for " << quark_descr << " quark numerator and power -1/" << inv_pow << std::endl;
InversePowerBoundsCheck(inv_pow, 10000, 1e16, MdagM,gauss_o, rhmc.ApproxNegPowerAction); //use large tolerance to prevent exit on fail; we are trying to tune here!
std::cout << "Finished: Checking quality of RHMC action approx for " << quark_descr << " quark numerator and power -1/" << inv_pow << std::endl;
std::cout << "Starting: Checking quality of RHMC action approx for " << quark_descr << " quark numerator and power -1/" << 2*inv_pow << std::endl;
InversePowerBoundsCheck(2*inv_pow, 10000, 1e16, MdagM,gauss_o, rhmc.ApproxNegHalfPowerAction);
std::cout << "Finished: Checking quality of RHMC action approx for " << quark_descr << " quark numerator and power -1/" << 2*inv_pow << std::endl;
std::cout << "Starting: Checking quality of RHMC action approx for " << quark_descr << " quark denominator and power -1/" << inv_pow << std::endl;
InversePowerBoundsCheck(inv_pow, 10000, 1e16, VdagV,gauss_o, rhmc.ApproxNegPowerAction);
std::cout << "Finished: Checking quality of RHMC action approx for " << quark_descr << " quark denominator and power -1/" << inv_pow << std::endl;
std::cout << "Starting: Checking quality of RHMC action approx for " << quark_descr << " quark denominator and power -1/" << 2*inv_pow << std::endl;
InversePowerBoundsCheck(2*inv_pow, 10000, 1e16, VdagV,gauss_o, rhmc.ApproxNegHalfPowerAction);
std::cout << "Finished: Checking quality of RHMC action approx for " << quark_descr << " quark denominator and power -1/" << 2*inv_pow << std::endl;
}
std::cout << "-------------------------------------------------------------------------------" << std::endl;
if(action_or_md == 1 || action_or_md == 2){
std::cout << "Starting: Checking quality of RHMC MD approx for " << quark_descr << " quark numerator and power -1/" << inv_pow << std::endl;
InversePowerBoundsCheck(inv_pow, 10000, 1e16, MdagM,gauss_o, rhmc.ApproxNegPowerMD);
std::cout << "Finished: Checking quality of RHMC MD approx for " << quark_descr << " quark numerator and power -1/" << inv_pow << std::endl;
std::cout << "Starting: Checking quality of RHMC MD approx for " << quark_descr << " quark numerator and power -1/" << 2*inv_pow << std::endl;
InversePowerBoundsCheck(2*inv_pow, 10000, 1e16, MdagM,gauss_o, rhmc.ApproxNegHalfPowerMD);
std::cout << "Finished: Checking quality of RHMC MD approx for " << quark_descr << " quark numerator and power -1/" << 2*inv_pow << std::endl;
std::cout << "Starting: Checking quality of RHMC MD approx for " << quark_descr << " quark denominator and power -1/" << inv_pow << std::endl;
InversePowerBoundsCheck(inv_pow, 10000, 1e16, VdagV,gauss_o, rhmc.ApproxNegPowerMD);
std::cout << "Finished: Checking quality of RHMC MD approx for " << quark_descr << " quark denominator and power -1/" << inv_pow << std::endl;
std::cout << "Starting: Checking quality of RHMC MD approx for " << quark_descr << " quark denominator and power -1/" << 2*inv_pow << std::endl;
InversePowerBoundsCheck(2*inv_pow, 10000, 1e16, VdagV,gauss_o, rhmc.ApproxNegHalfPowerMD);
std::cout << "Finished: Checking quality of RHMC MD approx for " << quark_descr << " quark denominator and power -1/" << 2*inv_pow << std::endl;
}
}
template<typename FermionImplPolicy>
void checkEOFA(ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> &EOFA,
GridCartesian* FGrid, GridParallelRNG &rng, const LatticeGaugeFieldD &latt){
std::cout << GridLogMessage << "Starting EOFA action/bounds check" << std::endl;
typename FermionImplPolicy::FermionField eta(FGrid);
RealD scale = std::sqrt(0.5);
gaussian(rng,eta); eta = eta * scale;
//Use the inbuilt check
EOFA.refresh(latt, eta);
EOFA.S(latt);
std::cout << GridLogMessage << "Finished EOFA upper action/bounds check" << std::endl;
}
template<typename FermionImplPolicy>
class EOFAlinop: public LinearOperatorBase<typename FermionImplPolicy::FermionField>{
ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> &EOFA;
LatticeGaugeFieldD &U;
public:
EOFAlinop(ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> &EOFA, LatticeGaugeFieldD &U): EOFA(EOFA), U(U){}
typedef typename FermionImplPolicy::FermionField Field;
void OpDiag (const Field &in, Field &out){ assert(0); }
void OpDir (const Field &in, Field &out,int dir,int disp){ assert(0); }
void OpDirAll (const Field &in, std::vector<Field> &out){ assert(0); }
void Op (const Field &in, Field &out){ assert(0); }
void AdjOp (const Field &in, Field &out){ assert(0); }
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ assert(0); }
void HermOp(const Field &in, Field &out){ EOFA.Meofa(U, in, out); }
};
template<typename FermionImplPolicy>
void upperBoundEOFA(ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> &EOFA,
GridCartesian* FGrid, GridParallelRNG &rng, LatticeGaugeFieldD &latt){
std::cout << GridLogMessage << "Starting EOFA upper bound compute" << std::endl;
EOFAlinop<FermionImplPolicy> linop(EOFA, latt);
typename FermionImplPolicy::FermionField eta(FGrid);
gaussian(rng,eta);
PowerMethod<typename FermionImplPolicy::FermionField> power_method;
auto lambda_max = power_method(linop,eta);
std::cout << GridLogMessage << "Upper bound of EOFA operator " << lambda_max << std::endl;
}
//Applications of M^{-1} cost the same as M for EOFA!
template<typename FermionImplPolicy>
class EOFAinvLinop: public LinearOperatorBase<typename FermionImplPolicy::FermionField>{
ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> &EOFA;
LatticeGaugeFieldD &U;
public:
EOFAinvLinop(ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> &EOFA, LatticeGaugeFieldD &U): EOFA(EOFA), U(U){}
typedef typename FermionImplPolicy::FermionField Field;
void OpDiag (const Field &in, Field &out){ assert(0); }
void OpDir (const Field &in, Field &out,int dir,int disp){ assert(0); }
void OpDirAll (const Field &in, std::vector<Field> &out){ assert(0); }
void Op (const Field &in, Field &out){ assert(0); }
void AdjOp (const Field &in, Field &out){ assert(0); }
void HermOpAndNorm(const Field &in, Field &out,RealD &n1,RealD &n2){ assert(0); }
void HermOp(const Field &in, Field &out){ EOFA.MeofaInv(U, in, out); }
};
template<typename FermionImplPolicy>
void lowerBoundEOFA(ExactOneFlavourRatioPseudoFermionAction<FermionImplPolicy> &EOFA,
GridCartesian* FGrid, GridParallelRNG &rng, LatticeGaugeFieldD &latt){
std::cout << GridLogMessage << "Starting EOFA lower bound compute using power method on M^{-1}. Inverse of highest eigenvalue is the lowest eigenvalue of M" << std::endl;
EOFAinvLinop<FermionImplPolicy> linop(EOFA, latt);
typename FermionImplPolicy::FermionField eta(FGrid);
gaussian(rng,eta);
PowerMethod<typename FermionImplPolicy::FermionField> power_method;
auto lambda_max = power_method(linop,eta);
std::cout << GridLogMessage << "Lower bound of EOFA operator " << 1./lambda_max << std::endl;
}
NAMESPACE_BEGIN(Grid);
template<class FermionOperatorD, class FermionOperatorF, class SchurOperatorD, class SchurOperatorF>
class MixedPrecisionConjugateGradientOperatorFunction : public OperatorFunction<typename FermionOperatorD::FermionField> {
public:
typedef typename FermionOperatorD::FermionField FieldD;
typedef typename FermionOperatorF::FermionField FieldF;
using OperatorFunction<FieldD>::operator();
RealD Tolerance;
RealD InnerTolerance; //Initial tolerance for inner CG. Defaults to Tolerance but can be changed
Integer MaxInnerIterations;
Integer MaxOuterIterations;
GridBase* SinglePrecGrid4; //Grid for single-precision fields
GridBase* SinglePrecGrid5; //Grid for single-precision fields
RealD OuterLoopNormMult; //Stop the outer loop and move to a final double prec solve when the residual is OuterLoopNormMult * Tolerance
FermionOperatorF &FermOpF;
FermionOperatorD &FermOpD;;
SchurOperatorF &LinOpF;
SchurOperatorD &LinOpD;
Integer TotalInnerIterations; //Number of inner CG iterations
Integer TotalOuterIterations; //Number of restarts
Integer TotalFinalStepIterations; //Number of CG iterations in final patch-up step
MixedPrecisionConjugateGradientOperatorFunction(RealD tol,
Integer maxinnerit,
Integer maxouterit,
GridBase* _sp_grid4,
GridBase* _sp_grid5,
FermionOperatorF &_FermOpF,
FermionOperatorD &_FermOpD,
SchurOperatorF &_LinOpF,
SchurOperatorD &_LinOpD):
LinOpF(_LinOpF),
LinOpD(_LinOpD),
FermOpF(_FermOpF),
FermOpD(_FermOpD),
Tolerance(tol),
InnerTolerance(tol),
MaxInnerIterations(maxinnerit),
MaxOuterIterations(maxouterit),
SinglePrecGrid4(_sp_grid4),
SinglePrecGrid5(_sp_grid5),
OuterLoopNormMult(100.)
{
};
void operator()(LinearOperatorBase<FieldD> &LinOpU, const FieldD &src, FieldD &psi) {
std::cout << GridLogMessage << " Mixed precision CG wrapper operator() "<<std::endl;
SchurOperatorD * SchurOpU = static_cast<SchurOperatorD *>(&LinOpU);
assert(&(SchurOpU->_Mat)==&(LinOpD._Mat));
precisionChange(FermOpF.Umu, FermOpD.Umu);
pickCheckerboard(Even,FermOpF.UmuEven,FermOpF.Umu);
pickCheckerboard(Odd ,FermOpF.UmuOdd ,FermOpF.Umu);
////////////////////////////////////////////////////////////////////////////////////
// Make a mixed precision conjugate gradient
////////////////////////////////////////////////////////////////////////////////////
MixedPrecisionConjugateGradient<FieldD,FieldF> MPCG(Tolerance,MaxInnerIterations,MaxOuterIterations,SinglePrecGrid5,LinOpF,LinOpD);
MPCG.InnerTolerance = InnerTolerance;
std::cout << GridLogMessage << "Calling mixed precision Conjugate Gradient" <<std::endl;
MPCG(src,psi);
}
};
template<class FermionOperatorD, class FermionOperatorF, class SchurOperatorD, class SchurOperatorF>
class MixedPrecisionReliableUpdateConjugateGradientOperatorFunction : public OperatorFunction<typename FermionOperatorD::FermionField> {
public:
typedef typename FermionOperatorD::FermionField FieldD;
typedef typename FermionOperatorF::FermionField FieldF;
using OperatorFunction<FieldD>::operator();
RealD Tolerance;
Integer MaxIterations;
RealD Delta; //reliable update parameter
GridBase* SinglePrecGrid4; //Grid for single-precision fields
GridBase* SinglePrecGrid5; //Grid for single-precision fields
FermionOperatorF &FermOpF;
FermionOperatorD &FermOpD;;
SchurOperatorF &LinOpF;
SchurOperatorD &LinOpD;
MixedPrecisionReliableUpdateConjugateGradientOperatorFunction(RealD tol,
RealD delta,
Integer maxit,
GridBase* _sp_grid4,
GridBase* _sp_grid5,
FermionOperatorF &_FermOpF,
FermionOperatorD &_FermOpD,
SchurOperatorF &_LinOpF,
SchurOperatorD &_LinOpD):
LinOpF(_LinOpF),
LinOpD(_LinOpD),
FermOpF(_FermOpF),
FermOpD(_FermOpD),
Tolerance(tol),
Delta(delta),
MaxIterations(maxit),
SinglePrecGrid4(_sp_grid4),
SinglePrecGrid5(_sp_grid5)
{
};
void operator()(LinearOperatorBase<FieldD> &LinOpU, const FieldD &src, FieldD &psi) {
std::cout << GridLogMessage << " Mixed precision reliable CG update wrapper operator() "<<std::endl;
SchurOperatorD * SchurOpU = static_cast<SchurOperatorD *>(&LinOpU);
assert(&(SchurOpU->_Mat)==&(LinOpD._Mat));
precisionChange(FermOpF.Umu, FermOpD.Umu);
pickCheckerboard(Even,FermOpF.UmuEven,FermOpF.Umu);
pickCheckerboard(Odd ,FermOpF.UmuOdd ,FermOpF.Umu);
////////////////////////////////////////////////////////////////////////////////////
// Make a mixed precision conjugate gradient
////////////////////////////////////////////////////////////////////////////////////
ConjugateGradientReliableUpdate<FieldD,FieldF> MPCG(Tolerance,MaxIterations,Delta,SinglePrecGrid5,LinOpF,LinOpD);
std::cout << GridLogMessage << "Calling mixed precision reliable update Conjugate Gradient" <<std::endl;
MPCG(src,psi);
}
};
NAMESPACE_END(Grid);
int main(int argc, char **argv) {
Grid_init(&argc, &argv);
int threads = GridThread::GetThreads();
// here make a routine to print all the relevant information on the run
std::cout << GridLogMessage << "Grid is setup to use " << threads << " threads" << std::endl;
std::string param_file = "params.xml";
bool file_load_check = false;
for(int i=1;i<argc;i++){
std::string sarg(argv[i]);
if(sarg == "--param_file"){
assert(i!=argc-1);
param_file = argv[i+1];
}else if(sarg == "--read_check"){ //check the fields load correctly and pass checksum/plaquette repro
file_load_check = true;
}
}
//Read the user parameters
EvolParameters user_params;
if(fileExists(param_file)){
std::cout << GridLogMessage << " Reading " << param_file << std::endl;
Grid::XmlReader rd(param_file);
read(rd, "Params", user_params);
}else if(!GlobalSharedMemory::WorldRank){
std::cout << GridLogMessage << " File " << param_file << " does not exist" << std::endl;
std::cout << GridLogMessage << " Writing xml template to " << param_file << ".templ" << std::endl;
{
Grid::XmlWriter wr(param_file + ".templ");
write(wr, "Params", user_params);
}
std::cout << GridLogMessage << " Done" << std::endl;
Grid_finalize();
return 0;
}
//Check the parameters
if(user_params.GparityDirs.size() != Nd-1){
std::cerr << "Error in input parameters: expect GparityDirs to have size = " << Nd-1 << std::endl;
exit(1);
}
for(int i=0;i<Nd-1;i++)
if(user_params.GparityDirs[i] != 0 && user_params.GparityDirs[i] != 1){
std::cerr << "Error in input parameters: expect GparityDirs values to be 0 (periodic) or 1 (G-parity)" << std::endl;
exit(1);
}
typedef GparityMobiusEOFAFermionD EOFAactionD;
typedef GparityMobiusFermionD FermionActionD;
typedef typename FermionActionD::Impl_t FermionImplPolicyD;
typedef typename FermionActionD::FermionField FermionFieldD;
typedef GparityMobiusEOFAFermionF EOFAactionF;
typedef GparityMobiusFermionF FermionActionF;
typedef typename FermionActionF::Impl_t FermionImplPolicyF;
typedef typename FermionActionF::FermionField FermionFieldF;
typedef GeneralEvenOddRatioRationalMixedPrecPseudoFermionAction<FermionImplPolicyD,FermionImplPolicyF> MixedPrecRHMC;
typedef GeneralEvenOddRatioRationalPseudoFermionAction<FermionImplPolicyD> DoublePrecRHMC;
//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
IntegratorParameters MD;
typedef ConjugateHMCRunnerD<MinimumNorm2> HMCWrapper; //NB: This is the "Omelyan integrator"
typedef HMCWrapper::ImplPolicy GaugeImplPolicy;
MD.name = std::string("MinimumNorm2");
MD.MDsteps = user_params.Steps;
MD.trajL = user_params.TrajectoryLength;
HMCparameters HMCparams;
HMCparams.StartTrajectory = user_params.StartTrajectory;
HMCparams.Trajectories = user_params.Trajectories;
HMCparams.NoMetropolisUntil= 0;
HMCparams.StartingType = user_params.StartingType;
HMCparams.MetropolisTest = user_params.MetropolisTest;
HMCparams.MD = MD;
HMCWrapper TheHMC(HMCparams);
// Grid from the command line arguments --grid and --mpi
TheHMC.Resources.AddFourDimGrid("gauge"); // use default simd lanes decomposition
CheckpointerParameters CPparams;
CPparams.config_prefix = "ckpoint_lat";
CPparams.rng_prefix = "ckpoint_rng";
CPparams.saveInterval = user_params.SaveInterval;
CPparams.format = "IEEE64BIG";
TheHMC.Resources.LoadNerscCheckpointer(CPparams);
//Note that checkpointing saves the RNG state so that this initialization is required only for the very first configuration
RNGModuleParameters RNGpar;
RNGpar.serial_seeds = "1 2 3 4 5";
RNGpar.parallel_seeds = "6 7 8 9 10";
TheHMC.Resources.SetRNGSeeds(RNGpar);
typedef PlaquetteMod<GaugeImplPolicy> PlaqObs;
TheHMC.Resources.AddObservable<PlaqObs>();
//////////////////////////////////////////////
//aiming for ainv=2.068 me Bob
//Estimated a(ml+mres) [48ID] = 0.001048 0.00104
// a(mh+mres) [48ID] = 0.028847 0.02805
//Estimate Ls=12, b+c=2 mres~0.0003
const int Ls = 12;
Real beta = 1.946;
Real light_mass = 0.00074; //0.00104 - mres_approx;
Real strange_mass = 0.02775; //0.02805 - mres_approx
Real pv_mass = 1.0;
RealD M5 = 1.8;
RealD mobius_scale = 2.; //b+c
RealD mob_bmc = 1.0;
RealD mob_b = (mobius_scale + mob_bmc)/2.;
RealD mob_c = (mobius_scale - mob_bmc)/2.;
//Setup the Grids
auto UGridD = TheHMC.Resources.GetCartesian();
auto UrbGridD = TheHMC.Resources.GetRBCartesian();
auto FGridD = SpaceTimeGrid::makeFiveDimGrid(Ls,UGridD);
auto FrbGridD = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGridD);
GridCartesian* UGridF = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd, vComplexF::Nsimd()), GridDefaultMpi());
GridRedBlackCartesian* UrbGridF = SpaceTimeGrid::makeFourDimRedBlackGrid(UGridF);
auto FGridF = SpaceTimeGrid::makeFiveDimGrid(Ls,UGridF);
auto FrbGridF = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGridF);
ConjugateIwasakiGaugeActionD GaugeAction(beta);
// temporarily need a gauge field
LatticeGaugeFieldD Ud(UGridD);
LatticeGaugeFieldF Uf(UGridF);
//Setup the BCs
FermionActionD::ImplParams Params;
for(int i=0;i<Nd-1;i++) Params.twists[i] = user_params.GparityDirs[i]; //G-parity directions
Params.twists[Nd-1] = 1; //APBC in time direction
std::vector<int> dirs4(Nd);
for(int i=0;i<Nd-1;i++) dirs4[i] = user_params.GparityDirs[i];
dirs4[Nd-1] = 0; //periodic gauge BC in time
GaugeImplPolicy::setDirections(dirs4); //gauge BC
//Run optional gauge field checksum checker and exit
if(file_load_check){
TheHMC.initializeGaugeFieldAndRNGs(Ud);
std::cout << GridLogMessage << " Done" << std::endl;
Grid_finalize();
return 0;
}
////////////////////////////////////
// Collect actions
////////////////////////////////////
ActionLevel<HMCWrapper::Field> Level1(1); //light quark + strange quark
ActionLevel<HMCWrapper::Field> Level2(4); //DSDR
ActionLevel<HMCWrapper::Field> Level3(2); //gauge
/////////////////////////////////////////////////////////////
// Light EOFA action
// have to be careful with the parameters, cf. Test_dwf_gpforce_eofa.cc
/////////////////////////////////////////////////////////////
typedef SchurDiagMooeeOperator<EOFAactionD,FermionFieldD> EOFAschuropD;
typedef SchurDiagMooeeOperator<EOFAactionF,FermionFieldF> EOFAschuropF;
typedef ExactOneFlavourRatioMixedPrecHeatbathPseudoFermionAction<FermionImplPolicyD, FermionImplPolicyF> EOFAmixPrecPFaction;
typedef MixedPrecisionConjugateGradientOperatorFunction<EOFAactionD, EOFAactionF, EOFAschuropD, EOFAschuropF> EOFA_mxCG;
typedef MixedPrecisionReliableUpdateConjugateGradientOperatorFunction<EOFAactionD, EOFAactionF, EOFAschuropD, EOFAschuropF> EOFA_relupCG;
std::vector<RealD> eofa_light_masses = { light_mass , 0.004, 0.016, 0.064, 0.256 };
std::vector<RealD> eofa_pv_masses = { 0.004 , 0.016, 0.064, 0.256, 1.0 };
int n_light_hsb = 5;
assert(user_params.eofa_l.size() == n_light_hsb);
EOFAmixPrecPFaction* EOFA_pfactions[n_light_hsb];
for(int i=0;i<n_light_hsb;i++){
RealD iml = eofa_light_masses[i];
RealD ipv = eofa_pv_masses[i];
EOFAactionD* LopD = new EOFAactionD(Ud, *FGridD, *FrbGridD, *UGridD, *UrbGridD, iml, iml, ipv, 0.0, -1, M5, mob_b, mob_c, Params);
EOFAactionF* LopF = new EOFAactionF(Uf, *FGridF, *FrbGridF, *UGridF, *UrbGridF, iml, iml, ipv, 0.0, -1, M5, mob_b, mob_c, Params);
EOFAactionD* RopD = new EOFAactionD(Ud, *FGridD, *FrbGridD, *UGridD, *UrbGridD, ipv, iml, ipv, -1.0, 1, M5, mob_b, mob_c, Params);
EOFAactionF* RopF = new EOFAactionF(Uf, *FGridF, *FrbGridF, *UGridF, *UrbGridF, ipv, iml, ipv, -1.0, 1, M5, mob_b, mob_c, Params);
EOFAschuropD* linopL_D = new EOFAschuropD(*LopD);
EOFAschuropD* linopR_D = new EOFAschuropD(*RopD);
EOFAschuropF* linopL_F = new EOFAschuropF(*LopF);
EOFAschuropF* linopR_F = new EOFAschuropF(*RopF);
#if 1
//Note reusing user_params.eofa_l.action(|md)_mixcg_inner_tolerance as Delta for now
EOFA_relupCG* ActionMCG_L = new EOFA_relupCG(user_params.eofa_l[i].action_tolerance, user_params.eofa_l[i].action_mixcg_inner_tolerance, 50000, UGridF, FrbGridF, *LopF, *LopD, *linopL_F, *linopL_D);
EOFA_relupCG* ActionMCG_R = new EOFA_relupCG(user_params.eofa_l[i].action_tolerance, user_params.eofa_l[i].action_mixcg_inner_tolerance, 50000, UGridF, FrbGridF, *RopF, *RopD, *linopR_F, *linopR_D);
EOFA_relupCG* DerivMCG_L = new EOFA_relupCG(user_params.eofa_l[i].md_tolerance, user_params.eofa_l[i].md_mixcg_inner_tolerance, 50000, UGridF, FrbGridF, *LopF, *LopD, *linopL_F, *linopL_D);
EOFA_relupCG* DerivMCG_R = new EOFA_relupCG(user_params.eofa_l[i].md_tolerance, user_params.eofa_l[i].md_mixcg_inner_tolerance, 50000, UGridF, FrbGridF, *RopF, *RopD, *linopR_F, *linopR_D);
#else
EOFA_mxCG* ActionMCG_L = new EOFA_mxCG(user_params.eofa_l[i].action_tolerance, 10000, 1000, UGridF, FrbGridF, *LopF, *LopD, *linopL_F, *linopL_D);
ActionMCG_L->InnerTolerance = user_params.eofa_l[i].action_mixcg_inner_tolerance;
EOFA_mxCG* ActionMCG_R = new EOFA_mxCG(user_params.eofa_l[i].action_tolerance, 10000, 1000, UGridF, FrbGridF, *RopF, *RopD, *linopR_F, *linopR_D);
ActionMCG_R->InnerTolerance = user_params.eofa_l[i].action_mixcg_inner_tolerance;
EOFA_mxCG* DerivMCG_L = new EOFA_mxCG(user_params.eofa_l[i].md_tolerance, 10000, 1000, UGridF, FrbGridF, *LopF, *LopD, *linopL_F, *linopL_D);
DerivMCG_L->InnerTolerance = user_params.eofa_l[i].md_mixcg_inner_tolerance;
EOFA_mxCG* DerivMCG_R = new EOFA_mxCG(user_params.eofa_l[i].md_tolerance, 10000, 1000, UGridF, FrbGridF, *RopF, *RopD, *linopR_F, *linopR_D);
DerivMCG_R->InnerTolerance = user_params.eofa_l[i].md_mixcg_inner_tolerance;
std::cout << GridLogMessage << "Set EOFA action solver action tolerance outer=" << ActionMCG_L->Tolerance << " inner=" << ActionMCG_L->InnerTolerance << std::endl;
std::cout << GridLogMessage << "Set EOFA MD solver tolerance outer=" << DerivMCG_L->Tolerance << " inner=" << DerivMCG_L->InnerTolerance << std::endl;
#endif
EOFAmixPrecPFaction* EOFA = new EOFAmixPrecPFaction(*LopF, *RopF,
*LopD, *RopD,
*ActionMCG_L, *ActionMCG_R,
*ActionMCG_L, *ActionMCG_R,
*DerivMCG_L, *DerivMCG_R,
user_params.eofa_l[i].rat_params, true);
EOFA_pfactions[i] = EOFA;
Level1.push_back(EOFA);
}
////////////////////////////////////
// Strange action
////////////////////////////////////
FermionActionD Numerator_sD(Ud,*FGridD,*FrbGridD,*UGridD,*UrbGridD,strange_mass,M5,mob_b,mob_c,Params);
FermionActionD Denominator_sD(Ud,*FGridD,*FrbGridD,*UGridD,*UrbGridD, pv_mass,M5,mob_b,mob_c,Params);
FermionActionF Numerator_sF(Uf,*FGridF,*FrbGridF,*UGridF,*UrbGridF,strange_mass,M5,mob_b,mob_c,Params);
FermionActionF Denominator_sF(Uf,*FGridF,*FrbGridF,*UGridF,*UrbGridF, pv_mass,M5,mob_b,mob_c,Params);
RationalActionParams rat_act_params_s;
rat_act_params_s.inv_pow = 4; // (M^dag M)^{1/4}
rat_act_params_s.precision= 60;
rat_act_params_s.MaxIter = 10000;
user_params.rat_quo_s.Export(rat_act_params_s);
std::cout << GridLogMessage << " Heavy quark bounds check every " << rat_act_params_s.BoundsCheckFreq << " trajectories (avg)" << std::endl;
//MixedPrecRHMC Quotient_s(Denominator_sD, Numerator_sD, Denominator_sF, Numerator_sF, rat_act_params_s, user_params.rat_quo_s.reliable_update_freq);
DoublePrecRHMC Quotient_s(Denominator_sD, Numerator_sD, rat_act_params_s);
Level1.push_back(&Quotient_s);
///////////////////////////////////
// DSDR action
///////////////////////////////////
RealD dsdr_mass=-1.8;
//Use same DSDR twists as https://arxiv.org/pdf/1208.4412.pdf
RealD dsdr_epsilon_f = 0.02; //numerator (in determinant)
RealD dsdr_epsilon_b = 0.5;
GparityWilsonTMFermionD Numerator_DSDR_D(Ud, *UGridD, *UrbGridD, dsdr_mass, dsdr_epsilon_f, Params);
GparityWilsonTMFermionF Numerator_DSDR_F(Uf, *UGridF, *UrbGridF, dsdr_mass, dsdr_epsilon_f, Params);
GparityWilsonTMFermionD Denominator_DSDR_D(Ud, *UGridD, *UrbGridD, dsdr_mass, dsdr_epsilon_b, Params);
GparityWilsonTMFermionF Denominator_DSDR_F(Uf, *UGridF, *UrbGridF, dsdr_mass, dsdr_epsilon_b, Params);
RationalActionParams rat_act_params_DSDR;
rat_act_params_DSDR.inv_pow = 2; // (M^dag M)^{1/2}
rat_act_params_DSDR.precision= 60;
rat_act_params_DSDR.MaxIter = 10000;
user_params.rat_quo_DSDR.Export(rat_act_params_DSDR);
std::cout << GridLogMessage << "DSDR quark bounds check every " << rat_act_params_DSDR.BoundsCheckFreq << " trajectories (avg)" << std::endl;
DoublePrecRHMC Quotient_DSDR(Denominator_DSDR_D, Numerator_DSDR_D, rat_act_params_DSDR);
Level2.push_back(&Quotient_DSDR);
/////////////////////////////////////////////////////////////
// Gauge action
/////////////////////////////////////////////////////////////
Level3.push_back(&GaugeAction);
TheHMC.TheAction.push_back(Level1);
TheHMC.TheAction.push_back(Level2);
TheHMC.TheAction.push_back(Level3);
std::cout << GridLogMessage << " Action complete "<< std::endl;
//Action tuning
bool
tune_rhmc_s=false, eigenrange_s=false,
tune_rhmc_DSDR=false, eigenrange_DSDR=false,
check_eofa=false,
upper_bound_eofa=false, lower_bound_eofa(false);
std::string lanc_params_s;
std::string lanc_params_DSDR;
int tune_rhmc_s_action_or_md;
int tune_rhmc_DSDR_action_or_md;
int eofa_which_hsb;
for(int i=1;i<argc;i++){
std::string sarg(argv[i]);
if(sarg == "--tune_rhmc_s"){
assert(i < argc-1);
tune_rhmc_s=true;
tune_rhmc_s_action_or_md = std::stoi(argv[i+1]);
}
else if(sarg == "--eigenrange_s"){
assert(i < argc-1);
eigenrange_s=true;
lanc_params_s = argv[i+1];
}
else if(sarg == "--tune_rhmc_DSDR"){
assert(i < argc-1);
tune_rhmc_DSDR=true;
tune_rhmc_DSDR_action_or_md = std::stoi(argv[i+1]);
}
else if(sarg == "--eigenrange_DSDR"){
assert(i < argc-1);
eigenrange_DSDR=true;
lanc_params_DSDR = argv[i+1];
}
else if(sarg == "--check_eofa"){
assert(i < argc-1);
check_eofa = true;
eofa_which_hsb = std::stoi(argv[i+1]); //-1 indicates all hasenbusch
assert(eofa_which_hsb == -1 || (eofa_which_hsb >= 0 && eofa_which_hsb < n_light_hsb) );
}
else if(sarg == "--upper_bound_eofa"){
assert(i < argc-1);
upper_bound_eofa = true;
eofa_which_hsb = std::stoi(argv[i+1]);
assert(eofa_which_hsb >= 0 && eofa_which_hsb < n_light_hsb);
}
else if(sarg == "--lower_bound_eofa"){
assert(i < argc-1);
lower_bound_eofa = true;
eofa_which_hsb = std::stoi(argv[i+1]);
assert(eofa_which_hsb >= 0 && eofa_which_hsb < n_light_hsb);
}
}
if(tune_rhmc_s || eigenrange_s || tune_rhmc_DSDR || eigenrange_DSDR ||check_eofa || upper_bound_eofa || lower_bound_eofa) {
std::cout << GridLogMessage << "Running checks" << std::endl;
TheHMC.initializeGaugeFieldAndRNGs(Ud);
//std::cout << GridLogMessage << "EOFA action solver action tolerance outer=" << ActionMCG_L.Tolerance << " inner=" << ActionMCG_L.InnerTolerance << std::endl;
//std::cout << GridLogMessage << "EOFA MD solver tolerance outer=" << DerivMCG_L.Tolerance << " inner=" << DerivMCG_L.InnerTolerance << std::endl;
if(check_eofa){
if(eofa_which_hsb >= 0){
std::cout << GridLogMessage << "Starting checking EOFA Hasenbusch " << eofa_which_hsb << std::endl;
checkEOFA(*EOFA_pfactions[eofa_which_hsb], FGridD, TheHMC.Resources.GetParallelRNG(), Ud);
std::cout << GridLogMessage << "Finished checking EOFA Hasenbusch " << eofa_which_hsb << std::endl;
}else{
for(int i=0;i<n_light_hsb;i++){
std::cout << GridLogMessage << "Starting checking EOFA Hasenbusch " << i << std::endl;
checkEOFA(*EOFA_pfactions[i], FGridD, TheHMC.Resources.GetParallelRNG(), Ud);
std::cout << GridLogMessage << "Finished checking EOFA Hasenbusch " << i << std::endl;
}
}
}
if(upper_bound_eofa) upperBoundEOFA(*EOFA_pfactions[eofa_which_hsb], FGridD, TheHMC.Resources.GetParallelRNG(), Ud);
if(lower_bound_eofa) lowerBoundEOFA(*EOFA_pfactions[eofa_which_hsb], FGridD, TheHMC.Resources.GetParallelRNG(), Ud);
if(eigenrange_s) computeEigenvalues<FermionActionD, FermionFieldD>(lanc_params_s, FGridD, FrbGridD, Ud, Numerator_sD, TheHMC.Resources.GetParallelRNG());
if(tune_rhmc_s) checkRHMC<FermionActionD, FermionFieldD, decltype(Quotient_s)>(FGridD, FrbGridD, Ud, Numerator_sD, Denominator_sD, Quotient_s, TheHMC.Resources.GetParallelRNG(), 4, "strange", tune_rhmc_s_action_or_md);
if(eigenrange_DSDR) computeEigenvalues<GparityWilsonTMFermionD, GparityWilsonTMFermionD::FermionField>(lanc_params_DSDR, UGridD, UrbGridD, Ud, Numerator_DSDR_D, TheHMC.Resources.GetParallelRNG());
if(tune_rhmc_DSDR) checkRHMC<GparityWilsonTMFermionD, GparityWilsonTMFermionD::FermionField, decltype(Quotient_DSDR)>(UGridD, UrbGridD, Ud, Numerator_DSDR_D, Denominator_DSDR_D, Quotient_DSDR, TheHMC.Resources.GetParallelRNG(), 2, "DSDR", tune_rhmc_DSDR_action_or_md);
std::cout << GridLogMessage << " Done" << std::endl;
Grid_finalize();
return 0;
}
//Run the HMC
std::cout << GridLogMessage << " Running the HMC "<< std::endl;
TheHMC.Run();
std::cout << GridLogMessage << " Done" << std::endl;
Grid_finalize();
return 0;
} // main

View File

@ -128,8 +128,14 @@ template<class FermionOperatorD, class FermionOperatorF, class SchurOperatorD, c
////////////////////////////////////////////////////////////////////////////////////
// Make a mixed precision conjugate gradient
////////////////////////////////////////////////////////////////////////////////////
MixedPrecisionConjugateGradient<FieldD,FieldF> MPCG(Tolerance,MaxInnerIterations,MaxOuterIterations,SinglePrecGrid5,LinOpF,LinOpD);
#if 1
RealD delta=1.e-4;
std::cout << GridLogMessage << "Calling reliable update Conjugate Gradient" <<std::endl;
ConjugateGradientReliableUpdate<FieldD,FieldF> MPCG(Tolerance,MaxInnerIterations*MaxOuterIterations,delta,SinglePrecGrid5,LinOpF,LinOpD);
#else
std::cout << GridLogMessage << "Calling mixed precision Conjugate Gradient" <<std::endl;
MixedPrecisionConjugateGradient<FieldD,FieldF> MPCG(Tolerance,MaxInnerIterations,MaxOuterIterations,SinglePrecGrid5,LinOpF,LinOpD);
#endif
MPCG(src,psi);
}
};
@ -141,6 +147,10 @@ int main(int argc, char **argv) {
using namespace Grid;
Grid_init(&argc, &argv);
CartesianCommunicator::BarrierWorld();
std::cout << GridLogMessage << " Clock skew check" <<std::endl;
int threads = GridThread::GetThreads();
// Typedefs to simplify notation
@ -161,7 +171,7 @@ int main(int argc, char **argv) {
// MD.name = std::string("Force Gradient");
typedef GenericHMCRunner<MinimumNorm2> HMCWrapper;
MD.name = std::string("MinimumNorm2");
MD.MDsteps = 4;
MD.MDsteps = 6;
MD.trajL = 1.0;
HMCparameters HMCparams;
@ -183,7 +193,7 @@ int main(int argc, char **argv) {
CPparams.saveInterval = 1;
CPparams.format = "IEEE64BIG";
TheHMC.Resources.LoadNerscCheckpointer(CPparams);
std::cout << "loaded NERSC checpointer"<<std::endl;
RNGModuleParameters RNGpar;
RNGpar.serial_seeds = "1 2 3 4 5";
RNGpar.parallel_seeds = "6 7 8 9 10";
@ -204,7 +214,8 @@ int main(int argc, char **argv) {
Real light_mass = 7.8e-4;
Real strange_mass = 0.02132;
Real pv_mass = 1.0;
std::vector<Real> hasenbusch({ light_mass, 3.8e-3, 0.0145, 0.045, 0.108, 0.25, 0.51 , pv_mass });
// std::vector<Real> hasenbusch({ light_mass, 3.8e-3, 0.0145, 0.045, 0.108, 0.25, 0.51 , pv_mass });
std::vector<Real> hasenbusch({ light_mass, 5e-3, 0.0145, 0.045, 0.108, 0.25, 0.51 , pv_mass });
// FIXME:
// Same in MC and MD
@ -287,6 +298,7 @@ int main(int argc, char **argv) {
std::cout << GridLogMessage << " Running the HMC "<< std::endl;
TheHMC.ReadCommandLine(argc,argv); // params on CML or from param file
TheHMC.initializeGaugeFieldAndRNGs(U);
std::cout << "loaded NERSC gauge field"<<std::endl;
// These lines are unecessary if BC are all periodic

View File

@ -0,0 +1,474 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/Test_hmc_EODWFRatio.cc
Copyright (C) 2015-2016
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
Author: Guido Cossu <guido.cossu@ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution
directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
NAMESPACE_BEGIN(Grid);
template<class FermionOperatorD, class FermionOperatorF, class SchurOperatorD, class SchurOperatorF>
class MixedPrecisionConjugateGradientOperatorFunction : public OperatorFunction<typename FermionOperatorD::FermionField> {
public:
typedef typename FermionOperatorD::FermionField FieldD;
typedef typename FermionOperatorF::FermionField FieldF;
using OperatorFunction<FieldD>::operator();
RealD Tolerance;
RealD InnerTolerance; //Initial tolerance for inner CG. Defaults to Tolerance but can be changed
Integer MaxInnerIterations;
Integer MaxOuterIterations;
GridBase* SinglePrecGrid4; //Grid for single-precision fields
GridBase* SinglePrecGrid5; //Grid for single-precision fields
RealD OuterLoopNormMult; //Stop the outer loop and move to a final double prec solve when the residual is OuterLoopNormMult * Tolerance
FermionOperatorF &FermOpF;
FermionOperatorD &FermOpD;;
SchurOperatorF &LinOpF;
SchurOperatorD &LinOpD;
Integer TotalInnerIterations; //Number of inner CG iterations
Integer TotalOuterIterations; //Number of restarts
Integer TotalFinalStepIterations; //Number of CG iterations in final patch-up step
MixedPrecisionConjugateGradientOperatorFunction(RealD tol,
Integer maxinnerit,
Integer maxouterit,
GridBase* _sp_grid4,
GridBase* _sp_grid5,
FermionOperatorF &_FermOpF,
FermionOperatorD &_FermOpD,
SchurOperatorF &_LinOpF,
SchurOperatorD &_LinOpD):
LinOpF(_LinOpF),
LinOpD(_LinOpD),
FermOpF(_FermOpF),
FermOpD(_FermOpD),
Tolerance(tol),
InnerTolerance(tol),
MaxInnerIterations(maxinnerit),
MaxOuterIterations(maxouterit),
SinglePrecGrid4(_sp_grid4),
SinglePrecGrid5(_sp_grid5),
OuterLoopNormMult(100.)
{
/* Debugging instances of objects; references are stored
std::cout << GridLogMessage << " Mixed precision CG wrapper LinOpF " <<std::hex<< &LinOpF<<std::dec <<std::endl;
std::cout << GridLogMessage << " Mixed precision CG wrapper LinOpD " <<std::hex<< &LinOpD<<std::dec <<std::endl;
std::cout << GridLogMessage << " Mixed precision CG wrapper FermOpF " <<std::hex<< &FermOpF<<std::dec <<std::endl;
std::cout << GridLogMessage << " Mixed precision CG wrapper FermOpD " <<std::hex<< &FermOpD<<std::dec <<std::endl;
*/
};
void operator()(LinearOperatorBase<FieldD> &LinOpU, const FieldD &src, FieldD &psi) {
std::cout << GridLogMessage << " Mixed precision CG wrapper operator() "<<std::endl;
SchurOperatorD * SchurOpU = static_cast<SchurOperatorD *>(&LinOpU);
// std::cout << GridLogMessage << " Mixed precision CG wrapper operator() FermOpU " <<std::hex<< &(SchurOpU->_Mat)<<std::dec <<std::endl;
// std::cout << GridLogMessage << " Mixed precision CG wrapper operator() FermOpD " <<std::hex<< &(LinOpD._Mat) <<std::dec <<std::endl;
// Assumption made in code to extract gauge field
// We could avoid storing LinopD reference alltogether ?
assert(&(SchurOpU->_Mat)==&(LinOpD._Mat));
////////////////////////////////////////////////////////////////////////////////////
// Must snarf a single precision copy of the gauge field in Linop_d argument
////////////////////////////////////////////////////////////////////////////////////
typedef typename FermionOperatorF::GaugeField GaugeFieldF;
typedef typename FermionOperatorF::GaugeLinkField GaugeLinkFieldF;
typedef typename FermionOperatorD::GaugeField GaugeFieldD;
typedef typename FermionOperatorD::GaugeLinkField GaugeLinkFieldD;
GridBase * GridPtrF = SinglePrecGrid4;
GridBase * GridPtrD = FermOpD.Umu.Grid();
GaugeFieldF U_f (GridPtrF);
GaugeLinkFieldF Umu_f(GridPtrF);
// std::cout << " Dim gauge field "<<GridPtrF->Nd()<<std::endl; // 4d
// std::cout << " Dim gauge field "<<GridPtrD->Nd()<<std::endl; // 4d
////////////////////////////////////////////////////////////////////////////////////
// Moving this to a Clone method of fermion operator would allow to duplicate the
// physics parameters and decrease gauge field copies
////////////////////////////////////////////////////////////////////////////////////
GaugeLinkFieldD Umu_d(GridPtrD);
for(int mu=0;mu<Nd*2;mu++){
Umu_d = PeekIndex<LorentzIndex>(FermOpD.Umu, mu);
precisionChange(Umu_f,Umu_d);
PokeIndex<LorentzIndex>(FermOpF.Umu, Umu_f, mu);
}
pickCheckerboard(Even,FermOpF.UmuEven,FermOpF.Umu);
pickCheckerboard(Odd ,FermOpF.UmuOdd ,FermOpF.Umu);
////////////////////////////////////////////////////////////////////////////////////
// Make a mixed precision conjugate gradient
////////////////////////////////////////////////////////////////////////////////////
#if 1
RealD delta=1.e-4;
std::cout << GridLogMessage << "Calling reliable update Conjugate Gradient" <<std::endl;
ConjugateGradientReliableUpdate<FieldD,FieldF> MPCG(Tolerance,MaxInnerIterations*MaxOuterIterations,delta,SinglePrecGrid5,LinOpF,LinOpD);
#else
std::cout << GridLogMessage << "Calling mixed precision Conjugate Gradient" <<std::endl;
MixedPrecisionConjugateGradient<FieldD,FieldF> MPCG(Tolerance,MaxInnerIterations,MaxOuterIterations,SinglePrecGrid5,LinOpF,LinOpD);
#endif
MPCG(src,psi);
}
};
NAMESPACE_END(Grid);
int main(int argc, char **argv) {
using namespace Grid;
Grid_init(&argc, &argv);
int threads = GridThread::GetThreads();
// Typedefs to simplify notation
typedef WilsonImplR FermionImplPolicy;
typedef WilsonImplF FermionImplPolicyF;
typedef MobiusFermionR FermionAction;
typedef MobiusFermionF FermionActionF;
typedef typename FermionAction::FermionField FermionField;
typedef typename FermionActionF::FermionField FermionFieldF;
typedef Grid::XmlReader Serialiser;
//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
IntegratorParameters MD;
// typedef GenericHMCRunner<LeapFrog> HMCWrapper;
// MD.name = std::string("Leap Frog");
// typedef GenericHMCRunner<ForceGradient> HMCWrapper;
// MD.name = std::string("Force Gradient");
typedef GenericHMCRunner<MinimumNorm2> HMCWrapper;
MD.name = std::string("MinimumNorm2");
MD.MDsteps = 6;
MD.trajL = 1.0;
HMCparameters HMCparams;
HMCparams.StartTrajectory = 1077;
HMCparams.Trajectories = 1;
HMCparams.NoMetropolisUntil= 0;
// "[HotStart, ColdStart, TepidStart, CheckpointStart]\n";
// HMCparams.StartingType =std::string("ColdStart");
HMCparams.StartingType =std::string("CheckpointStart");
HMCparams.MD = MD;
HMCWrapper TheHMC(HMCparams);
// Grid from the command line arguments --grid and --mpi
TheHMC.Resources.AddFourDimGrid("gauge"); // use default simd lanes decomposition
CheckpointerParameters CPparams;
CPparams.config_prefix = "ckpoint_DDHMC_lat";
CPparams.rng_prefix = "ckpoint_DDHMC_rng";
CPparams.saveInterval = 1;
CPparams.format = "IEEE64BIG";
TheHMC.Resources.LoadNerscCheckpointer(CPparams);
RNGModuleParameters RNGpar;
RNGpar.serial_seeds = "1 2 3 4 5";
RNGpar.parallel_seeds = "6 7 8 9 10";
TheHMC.Resources.SetRNGSeeds(RNGpar);
// Construct observables
// here there is too much indirection
typedef PlaquetteMod<HMCWrapper::ImplPolicy> PlaqObs;
TheHMC.Resources.AddObservable<PlaqObs>();
//////////////////////////////////////////////
const int Ls = 12;
RealD M5 = 1.8;
RealD b = 1.5;
RealD c = 0.5;
Real beta = 2.31;
// Real light_mass = 5.4e-4;
Real light_mass = 7.8e-4;
Real strange_mass = 0.02132;
Real pv_mass = 1.0;
// std::vector<Real> hasenbusch({ light_mass, 3.8e-3, 0.0145, 0.045, 0.108, 0.25, 0.51 , pv_mass });
std::vector<Real> hasenbusch({ light_mass, 5e-3, 0.0145, 0.045, 0.108, 0.25, 0.51 , pv_mass });
// FIXME:
// Same in MC and MD
// Need to mix precision too
OneFlavourRationalParams SFRp; // Strange
SFRp.lo = 4.0e-3;
SFRp.hi = 90.0;
SFRp.MaxIter = 60000;
SFRp.tolerance= 1.0e-8;
SFRp.mdtolerance= 1.0e-6;
SFRp.degree = 12;
SFRp.precision= 50;
SFRp.BoundsCheckFreq=0;
OneFlavourRationalParams OFRp; // Up/down
OFRp.lo = 2.0e-5;
OFRp.hi = 90.0;
OFRp.MaxIter = 60000;
OFRp.tolerance= 1.0e-8;
OFRp.mdtolerance= 1.0e-6;
// OFRp.degree = 20; converges
// OFRp.degree = 16;
OFRp.degree = 12;
OFRp.precision= 80;
OFRp.BoundsCheckFreq=0;
auto GridPtr = TheHMC.Resources.GetCartesian();
auto GridRBPtr = TheHMC.Resources.GetRBCartesian();
typedef SchurDiagMooeeOperator<FermionActionF,FermionFieldF> LinearOperatorF;
typedef SchurDiagMooeeOperator<FermionAction ,FermionField > LinearOperatorD;
typedef MixedPrecisionConjugateGradientOperatorFunction<MobiusFermionD,MobiusFermionF,LinearOperatorD,LinearOperatorF> MxPCG;
////////////////////////////////////////////////////////////////
// Domain decomposed
////////////////////////////////////////////////////////////////
Coordinate latt4 = GridPtr->GlobalDimensions();
Coordinate mpi = GridPtr->ProcessorGrid();
Coordinate shm;
GlobalSharedMemory::GetShmDims(mpi,shm);
Coordinate CommDim(Nd);
for(int d=0;d<Nd;d++) CommDim[d]= (mpi[d]/shm[d])>1 ? 1 : 0;
Coordinate NonDirichlet(Nd+1,0);
Coordinate Dirichlet(Nd+1,0);
Dirichlet[1] = CommDim[0]*latt4[0]/mpi[0] * shm[0];
Dirichlet[2] = CommDim[1]*latt4[1]/mpi[1] * shm[1];
Dirichlet[3] = CommDim[2]*latt4[2]/mpi[2] * shm[2];
Dirichlet[4] = CommDim[3]*latt4[3]/mpi[3] * shm[3];
Coordinate Block4(Nd);
Block4[0] = Dirichlet[1];
Block4[1] = Dirichlet[2];
Block4[2] = Dirichlet[3];
Block4[3] = Dirichlet[4];
int Width=3;
TheHMC.Resources.SetMomentumFilter(new DDHMCFilter<WilsonImplR::Field>(Block4,Width));
//////////////////////////
// Fermion Grids
//////////////////////////
auto FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,GridPtr);
auto FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,GridPtr);
Coordinate simdF = GridDefaultSimd(Nd,vComplexF::Nsimd());
auto GridPtrF = SpaceTimeGrid::makeFourDimGrid(latt4,simdF,mpi);
auto GridRBPtrF = SpaceTimeGrid::makeFourDimRedBlackGrid(GridPtrF);
auto FGridF = SpaceTimeGrid::makeFiveDimGrid(Ls,GridPtrF);
auto FrbGridF = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,GridPtrF);
IwasakiGaugeActionR GaugeAction(beta);
// temporarily need a gauge field
LatticeGaugeField U(GridPtr);
LatticeGaugeFieldF UF(GridPtrF);
std::cout << GridLogMessage << " Running the HMC "<< std::endl;
TheHMC.ReadCommandLine(argc,argv); // params on CML or from param file
TheHMC.initializeGaugeFieldAndRNGs(U);
// These lines are unecessary if BC are all periodic
std::vector<Complex> boundary = {1,1,1,-1};
FermionAction::ImplParams Params(boundary);
Params.dirichlet=NonDirichlet;
FermionAction::ImplParams ParamsDir(boundary);
ParamsDir.dirichlet=Dirichlet;
// double StoppingCondition = 1e-14;
// double MDStoppingCondition = 1e-9;
double StoppingCondition = 1e-10;
double MDStoppingCondition = 1e-7;
double MDStoppingConditionLoose = 1e-6;
double MaxCGIterations = 300000;
ConjugateGradient<FermionField> CG(StoppingCondition,MaxCGIterations);
ConjugateGradient<FermionField> MDCG(MDStoppingCondition,MaxCGIterations);
////////////////////////////////////
// Collect actions
////////////////////////////////////
ActionLevel<HMCWrapper::Field> Level1(1);
ActionLevel<HMCWrapper::Field> Level2(4);
ActionLevel<HMCWrapper::Field> Level3(8);
////////////////////////////////////
// Strange action
////////////////////////////////////
FermionAction StrangeOp (U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,strange_mass,M5,b,c, Params);
FermionAction StrangePauliVillarsOp(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,pv_mass, M5,b,c, Params);
FermionAction StrangeOpDir (U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,strange_mass,M5,b,c, ParamsDir);
FermionAction StrangePauliVillarsOpDir(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,pv_mass, M5,b,c, ParamsDir);
OneFlavourEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy> StrangePseudoFermionBdy(StrangeOpDir,StrangeOp,SFRp);
OneFlavourEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy> StrangePseudoFermionLocal(StrangePauliVillarsOpDir,StrangeOpDir,SFRp);
OneFlavourEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy> StrangePseudoFermionPVBdy(StrangePauliVillarsOp,StrangePauliVillarsOpDir,SFRp);
Level1.push_back(&StrangePseudoFermionBdy); // ok
Level2.push_back(&StrangePseudoFermionLocal);
Level1.push_back(&StrangePseudoFermionPVBdy); //ok
////////////////////////////////////
// up down action
////////////////////////////////////
std::vector<Real> light_den;
std::vector<Real> light_num;
std::vector<int> dirichlet_den;
std::vector<int> dirichlet_num;
int n_hasenbusch = hasenbusch.size();
light_den.push_back(light_mass); dirichlet_den.push_back(0);
for(int h=0;h<n_hasenbusch;h++){
light_den.push_back(hasenbusch[h]); dirichlet_den.push_back(1);
}
for(int h=0;h<n_hasenbusch;h++){
light_num.push_back(hasenbusch[h]); dirichlet_num.push_back(1);
}
light_num.push_back(pv_mass); dirichlet_num.push_back(0);
std::vector<FermionAction *> Numerators;
std::vector<FermionActionF *> NumeratorsF;
std::vector<FermionAction *> Denominators;
std::vector<FermionActionF *> DenominatorsF;
std::vector<TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy> *> Quotients;
#define MIXED_PRECISION
#ifdef MIXED_PRECISION
std::vector<OneFlavourEvenOddRatioRationalMixedPrecPseudoFermionAction<FermionImplPolicy,FermionImplPolicyF> *> Bdys;
#else
std::vector<OneFlavourEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy> *> Bdys;
#endif
std::vector<MxPCG *> ActionMPCG;
std::vector<MxPCG *> MPCG;
typedef SchurDiagMooeeOperator<FermionActionF,FermionFieldF> LinearOperatorF;
typedef SchurDiagMooeeOperator<FermionAction ,FermionField > LinearOperatorD;
std::vector<LinearOperatorD *> LinOpD;
std::vector<LinearOperatorF *> LinOpF;
for(int h=0;h<n_hasenbusch+1;h++){
std::cout << GridLogMessage
<< " 2f quotient Action ";
std::cout << "det D("<<light_den[h]<<")";
if ( dirichlet_den[h] ) std::cout << "^dirichlet ";
std::cout << "/ det D("<<light_num[h]<<")";
if ( dirichlet_num[h] ) std::cout << "^dirichlet ";
std::cout << std::endl;
FermionAction::ImplParams ParamsNum(boundary);
FermionAction::ImplParams ParamsDen(boundary);
FermionActionF::ImplParams ParamsNumF(boundary);
FermionActionF::ImplParams ParamsDenF(boundary);
if ( dirichlet_num[h]==1) ParamsNum.dirichlet = Dirichlet;
else ParamsNum.dirichlet = NonDirichlet;
Numerators.push_back (new FermionAction(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_num[h],M5,b,c, ParamsNum));
if ( dirichlet_den[h]==1) ParamsDen.dirichlet = Dirichlet;
else ParamsDen.dirichlet = NonDirichlet;
Denominators.push_back(new FermionAction(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_den[h],M5,b,c, ParamsDen));
ParamsDenF.dirichlet = ParamsDen.dirichlet;
DenominatorsF.push_back(new FermionActionF(UF,*FGridF,*FrbGridF,*GridPtrF,*GridRBPtrF,light_den[h],M5,b,c, ParamsDenF));
ParamsNumF.dirichlet = ParamsNum.dirichlet;
NumeratorsF.push_back (new FermionActionF(UF,*FGridF,*FrbGridF,*GridPtrF,*GridRBPtrF,light_num[h],M5,b,c, ParamsNumF));
LinOpD.push_back(new LinearOperatorD(*Denominators[h]));
LinOpF.push_back(new LinearOperatorF(*DenominatorsF[h]));
double conv = MDStoppingCondition;
if (h<3) conv= MDStoppingConditionLoose; // Relax on first two hasenbusch factors
const int MX_inner = 5000;
MPCG.push_back(new MxPCG(conv,
MX_inner,
MaxCGIterations,
GridPtrF,
FrbGridF,
*DenominatorsF[h],*Denominators[h],
*LinOpF[h], *LinOpD[h]) );
ActionMPCG.push_back(new MxPCG(StoppingCondition,
MX_inner,
MaxCGIterations,
GridPtrF,
FrbGridF,
*DenominatorsF[h],*Denominators[h],
*LinOpF[h], *LinOpD[h]) );
if(h!=0) {
// Quotients.push_back (new TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],MDCG,CG));
Quotients.push_back (new TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],*MPCG[h],*ActionMPCG[h],CG));
} else {
#ifdef MIXED_PRECISION
Bdys.push_back( new OneFlavourEvenOddRatioRationalMixedPrecPseudoFermionAction<FermionImplPolicy,FermionImplPolicyF>(
*Numerators[h],*Denominators[h],
*NumeratorsF[h],*DenominatorsF[h],
OFRp, 500) );
Bdys.push_back( new OneFlavourEvenOddRatioRationalMixedPrecPseudoFermionAction<FermionImplPolicy,FermionImplPolicyF>(
*Numerators[h],*Denominators[h],
*NumeratorsF[h],*DenominatorsF[h],
OFRp, 500) );
#else
Bdys.push_back( new OneFlavourEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],OFRp));
Bdys.push_back( new OneFlavourEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],OFRp));
#endif
}
}
int nquo=Quotients.size();
Level1.push_back(Bdys[0]);
Level1.push_back(Bdys[1]);
for(int h=0;h<nquo-1;h++){
Level2.push_back(Quotients[h]);
}
Level2.push_back(Quotients[nquo-1]);
/////////////////////////////////////////////////////////////
// Gauge action
/////////////////////////////////////////////////////////////
Level3.push_back(&GaugeAction);
TheHMC.TheAction.push_back(Level1);
TheHMC.TheAction.push_back(Level2);
TheHMC.TheAction.push_back(Level3);
std::cout << GridLogMessage << " Action complete "<< std::endl;
/////////////////////////////////////////////////////////////
TheHMC.Run(); // no smearing
Grid_finalize();
} // main

View File

@ -98,9 +98,7 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage<<"Called warmup"<<std::endl;
double t0=usecond();
for(int i=0;i<ncall;i++){
__SSC_START;
Dw.Dhop(src,result,0);
__SSC_STOP;
}
double t1=usecond();
FGrid->Barrier();
@ -141,9 +139,7 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage<<"Called warmup"<<std::endl;
double t0=usecond();
for(int i=0;i<ncall;i++){
__SSC_START;
DwD.Dhop(src_d,result_d,0);
__SSC_STOP;
}
double t1=usecond();
FGrid_d->Barrier();

View File

@ -1,184 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/IO/Test_field_array_io.cc
Copyright (C) 2015
Author: Christopher Kelly <ckelly@bnl.gov>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
using namespace std;
using namespace Grid;
//This test demonstrates and checks a single-file write of an arbitrary array of fields
uint64_t writeHeader(const uint32_t size, const uint32_t checksum, const std::string &format, const std::string &file){
std::ofstream fout(file,std::ios::out|std::ios::in);
fout.seekp(0,std::ios::beg);
fout << std::setw(10) << size << std::endl;
fout << std::hex << std::setw(10) << checksum << std::endl;
fout << format << std::endl;
return fout.tellp();
}
uint64_t readHeader(uint32_t &size, uint32_t &checksum, std::string &format, const std::string &file){
std::ifstream fin(file);
std::string line;
getline(fin,line);
{
std::stringstream ss; ss <<line ; ss >> size;
}
getline(fin,line);
{
std::stringstream ss; ss <<line ; ss >> std::hex >> checksum;
}
getline(fin,format);
removeWhitespace(format);
return fin.tellg();
}
template<typename FieldType>
void writeFieldArray(const std::string &file, const std::vector<FieldType> &data){
typedef typename FieldType::vector_object vobj;
typedef typename FieldType::scalar_object sobj;
GridBase* grid = data[0].Grid(); //assume all fields have the same Grid
BinarySimpleMunger<sobj, sobj> munge; //straight copy
//We need a 2-pass header write, first to establish the size, the second pass writes the checksum
std::string format = getFormatString<typename FieldType::vector_object>();
uint64_t offset; //leave 64 bits for header
if ( grid->IsBoss() ) {
NerscIO::truncate(file);
offset = writeHeader(data.size(), 0, format, file);
}
grid->Broadcast(0,(void *)&offset,sizeof(offset)); //use as a barrier
std::cout << "Data offset write " << offset << std::endl;
std::cout << "Data size write " << data.size() << std::endl;
uint64_t field_size = uint64_t(grid->gSites()) * sizeof(sobj);
std::cout << "Field size = " << field_size << " B" << std::endl;
uint32_t checksum = 0;
for(int i=0;i<data.size();i++){
std::cout << "Data field write " << i << " offset " << offset << std::endl;
uint32_t nersc_csum,scidac_csuma,scidac_csumb;
BinaryIO::writeLatticeObject<vobj,sobj>(const_cast<FieldType &>(data[i]),file,munge,offset,format,
nersc_csum,scidac_csuma,scidac_csumb);
offset += field_size;
checksum ^= nersc_csum + 0x9e3779b9 + (checksum<<6) + (checksum>>2);
}
std::cout << "Write checksum " << checksum << std::endl;
if ( grid->IsBoss() ) {
writeHeader(data.size(), checksum, format, file);
}
}
template<typename FieldType>
void readFieldArray(std::vector<FieldType> &data, const std::string &file){
typedef typename FieldType::vector_object vobj;
typedef typename FieldType::scalar_object sobj;
assert(data.size() > 0);
GridBase* grid = data[0].Grid(); //assume all fields have the same Grid
BinarySimpleUnmunger<sobj, sobj> munge; //straight copy
uint32_t hdr_checksum, hdr_size;
std::string format;
uint64_t offset = readHeader(hdr_size, hdr_checksum, format, file);
std::cout << "Data offset read " << offset << std::endl;
std::cout << "Data size read " << hdr_size << std::endl;
assert(data.size() == hdr_size);
uint64_t field_size = uint64_t(grid->gSites()) * sizeof(sobj);
uint32_t checksum = 0;
for(int i=0;i<data.size();i++){
std::cout << "Data field read " << i << " offset " << offset << std::endl;
uint32_t nersc_csum,scidac_csuma,scidac_csumb;
BinaryIO::readLatticeObject<vobj,sobj>(data[i],file,munge,offset,format,
nersc_csum,scidac_csuma,scidac_csumb);
offset += field_size;
checksum ^= nersc_csum + 0x9e3779b9 + (checksum<<6) + (checksum>>2);
}
std::cout << "Header checksum " << hdr_checksum << std::endl;
std::cout << "Read checksum " << checksum << std::endl;
assert( hdr_checksum == checksum );
}
int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
Coordinate latt = GridDefaultLatt();
Coordinate simd_layout = GridDefaultSimd(Nd,vComplex::Nsimd());
Coordinate mpi_layout = GridDefaultMpi();
const int Ls=8;
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(latt, simd_layout, mpi_layout);
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
std::vector<int> seeds4({1,2,3,4});
std::vector<int> seeds5({5,6,7,8});
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
typedef DomainWallFermionD::FermionField FermionField;
int nfield = 20;
std::vector<FermionField> data(nfield, FGrid);
for(int i=0;i<data.size();i++)
gaussian(RNG5, data[i]);
std::string file = "test_field_array_io.0";
writeFieldArray(file, data);
std::vector<FermionField> data_r(nfield, FGrid);
readFieldArray(data_r, file);
for(int i=0;i<nfield;i++){
FermionField diff = data_r[i] - data[i];
RealD norm_diff = norm2(diff);
std::cout << "Norm2 of difference between stored and loaded data index " << i << " : " << norm_diff << std::endl;
}
std::cout << "Done" << std::endl;
Grid_finalize();
}

View File

@ -95,26 +95,34 @@ int main (int argc, char ** argv)
std::cout << GridLogMessage << "::::::::::::: Starting mixed CG" << std::endl;
MixedPrecisionConjugateGradient<LatticeFermionD,LatticeFermionF> mCG(1.0e-8, 10000, 50, FrbGrid_f, HermOpEO_f, HermOpEO);
double t1,t2,flops;
double MdagMsiteflops = 1452; // Mobius (real coeffs)
// CG overhead: 8 inner product, 4+8 axpy_norm, 4+4 linear comb (2 of)
double CGsiteflops = (8+4+8+4+4)*Nc*Ns ;
std:: cout << " MdagM site flops = "<< 4*MdagMsiteflops<<std::endl;
std:: cout << " CG site flops = "<< CGsiteflops <<std::endl;
int iters;
for(int i=0;i<100;i++){
for(int i=0;i<200;i++){
result_o = Zero();
t1=usecond();
mCG(src_o,result_o);
t2=usecond();
iters = mCG.TotalInnerIterations; //Number of inner CG iterations
flops = 1320.0*2*FGrid->gSites()*iters;
flops = MdagMsiteflops*4*FrbGrid->gSites()*iters;
flops+= CGsiteflops*FrbGrid->gSites()*iters;
std::cout << " SinglePrecision iterations/sec "<< iters/(t2-t1)*1000.*1000.<<std::endl;
std::cout << " SinglePrecision GF/s "<< flops/(t2-t1)/1000.<<std::endl;
}
std::cout << GridLogMessage << "::::::::::::: Starting regular CG" << std::endl;
ConjugateGradient<LatticeFermionD> CG(1.0e-8,10000);
for(int i=0;i<100;i++){
for(int i=0;i<1;i++){
result_o_2 = Zero();
t1=usecond();
CG(HermOpEO,src_o,result_o_2);
t2=usecond();
iters = CG.IterationsToComplete;
flops = 1320.0*2*FGrid->gSites()*iters;
flops = MdagMsiteflops*4*FrbGrid->gSites()*iters;
flops+= CGsiteflops*FrbGrid->gSites()*iters;
std::cout << " DoublePrecision iterations/sec "<< iters/(t2-t1)*1000.*1000.<<std::endl;
std::cout << " DoublePrecision GF/s "<< flops/(t2-t1)/1000.<<std::endl;
}

View File

@ -1,485 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/Test_compressed_lanczos_gparity.cc
Copyright (C) 2017
Author: Christopher Kelly <ckelly@bnl.gov>
Author: Leans heavily on Christoph Lehner's code
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
/*
* Reimplement the badly named "multigrid" lanczos as compressed Lanczos using the features
* in Grid that were intended to be used to support blocked Aggregates, from
*/
#include <Grid/Grid.h>
#include <Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h>
#include <Grid/algorithms/iterative/LocalCoherenceLanczos.h>
using namespace std;
using namespace Grid;
//For the CPS configurations we have to manually seed the RNG and deal with an incorrect factor of 2 in the plaquette metadata
void readConfiguration(LatticeGaugeFieldD &U,
const std::string &config,
bool is_cps_cfg = false){
if(is_cps_cfg) NerscIO::exitOnReadPlaquetteMismatch() = false;
typedef GaugeStatistics<ConjugateGimplD> GaugeStats;
FieldMetaData header;
NerscIO::readConfiguration<GaugeStats>(U, header, config);
if(is_cps_cfg) NerscIO::exitOnReadPlaquetteMismatch() = true;
}
//Lanczos parameters in CPS conventions
struct CPSLanczosParams : Serializable {
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(CPSLanczosParams,
RealD, alpha,
RealD, beta,
int, ch_ord,
int, N_use,
int, N_get,
int, N_true_get,
RealD, stop_rsd,
int, maxits);
//Translations
ChebyParams getChebyParams() const{
ChebyParams out;
out.alpha = beta*beta; //aka lo
out.beta = alpha*alpha; //aka hi
out.Npoly = ch_ord+1;
return out;
}
int Nstop() const{ return N_true_get; }
int Nm() const{ return N_use; }
int Nk() const{ return N_get; }
};
//Maybe this class should be in the main library?
template<class Fobj,class CComplex,int nbasis>
class LocalCoherenceLanczosScidac : public LocalCoherenceLanczos<Fobj,CComplex,nbasis>
{
public:
typedef iVector<CComplex,nbasis > CoarseSiteVector;
typedef Lattice<CoarseSiteVector> CoarseField;
typedef Lattice<CComplex> CoarseScalar; // used for inner products on fine field
typedef Lattice<Fobj> FineField;
LocalCoherenceLanczosScidac(GridBase *FineGrid,GridBase *CoarseGrid,
LinearOperatorBase<FineField> &FineOp,
int checkerboard)
// Base constructor
: LocalCoherenceLanczos<Fobj,CComplex,nbasis>(FineGrid,CoarseGrid,FineOp,checkerboard)
{};
void checkpointFine(std::string evecs_file,std::string evals_file)
{
assert(this->subspace.size()==nbasis);
emptyUserRecord record;
Grid::ScidacWriter WR(this->_FineGrid->IsBoss());
WR.open(evecs_file);
for(int k=0;k<nbasis;k++) {
WR.writeScidacFieldRecord(this->subspace[k],record);
}
WR.close();
XmlWriter WRx(evals_file);
write(WRx,"evals",this->evals_fine);
}
void checkpointFineRestore(std::string evecs_file,std::string evals_file)
{
this->evals_fine.resize(nbasis);
this->subspace.resize(nbasis,this->_FineGrid);
std::cout << GridLogIRL<< "checkpointFineRestore: Reading evals from "<<evals_file<<std::endl;
XmlReader RDx(evals_file);
read(RDx,"evals",this->evals_fine);
if(this->evals_fine.size() < nbasis) assert(0 && "Not enough fine evals to complete basis");
if(this->evals_fine.size() > nbasis){ //allow the use of precomputed evecs with a larger #evecs
std::cout << GridLogMessage << "Truncating " << this->evals_fine.size() << " evals to basis size " << nbasis << std::endl;
this->evals_fine.resize(nbasis);
}
std::cout << GridLogIRL<< "checkpointFineRestore: Reading evecs from "<<evecs_file<<std::endl;
emptyUserRecord record;
Grid::ScidacReader RD ;
RD.open(evecs_file);
for(int k=0;k<nbasis;k++) {
this->subspace[k].Checkerboard()=this->_checkerboard;
RD.readScidacFieldRecord(this->subspace[k],record);
}
RD.close();
}
void checkpointCoarse(std::string evecs_file,std::string evals_file)
{
int n = this->evec_coarse.size();
emptyUserRecord record;
Grid::ScidacWriter WR(this->_CoarseGrid->IsBoss());
WR.open(evecs_file);
for(int k=0;k<n;k++) {
WR.writeScidacFieldRecord(this->evec_coarse[k],record);
}
WR.close();
XmlWriter WRx(evals_file);
write(WRx,"evals",this->evals_coarse);
}
void checkpointCoarseRestore(std::string evecs_file,std::string evals_file,int nvec)
{
std::cout << "resizing coarse vecs to " << nvec<< std::endl;
this->evals_coarse.resize(nvec);
this->evec_coarse.resize(nvec,this->_CoarseGrid);
std::cout << GridLogIRL<< "checkpointCoarseRestore: Reading evals from "<<evals_file<<std::endl;
XmlReader RDx(evals_file);
read(RDx,"evals",this->evals_coarse);
assert(this->evals_coarse.size()==nvec);
emptyUserRecord record;
std::cout << GridLogIRL<< "checkpointCoarseRestore: Reading evecs from "<<evecs_file<<std::endl;
Grid::ScidacReader RD ;
RD.open(evecs_file);
for(int k=0;k<nvec;k++) {
RD.readScidacFieldRecord(this->evec_coarse[k],record);
}
RD.close();
}
};
struct Options{
std::vector<int> blockSize;
std::vector<int> GparityDirs;
int Ls;
RealD mass;
RealD M5;
RealD mobius_scale;
std::string config;
bool is_cps_cfg;
double coarse_relax_tol;
int smoother_ord;
CPSLanczosParams fine;
CPSLanczosParams coarse;
bool write_fine = false;
std::string write_fine_file;
bool read_fine = false;
std::string read_fine_file;
bool write_coarse = false;
std::string write_coarse_file;
bool read_coarse = false;
std::string read_coarse_file;
Options(){
blockSize = std::vector<int> ({2,2,2,2,2});
GparityDirs = std::vector<int> ({1,1,1}); //1 for each GP direction
Ls = 12;
mass = 0.01;
M5 = 1.8;
is_cps_cfg = false;
mobius_scale = 2.0;
fine.alpha = 2;
fine.beta = 0.1;
fine.ch_ord = 100;
fine.N_use = 70;
fine.N_get = 60;
fine.N_true_get = 60;
fine.stop_rsd = 1e-8;
fine.maxits = 10000;
coarse.alpha = 2;
coarse.beta = 0.1;
coarse.ch_ord = 100;
coarse.N_use = 200;
coarse.N_get = 190;
coarse.N_true_get = 190;
coarse.stop_rsd = 1e-8;
coarse.maxits = 10000;
coarse_relax_tol = 1e5;
smoother_ord = 20;
write_fine = false;
read_fine = false;
write_coarse = false;
read_coarse = false;
}
};
template<int nbasis>
void runTest(const Options &opt){
//Fine grids
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()), GridDefaultMpi());
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(opt.Ls,UGrid);
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(opt.Ls,UGrid);
//Setup G-parity BCs
assert(Nd == 4);
std::vector<int> dirs4(4);
for(int i=0;i<3;i++) dirs4[i] = opt.GparityDirs[i];
dirs4[3] = 0; //periodic gauge BC in time
std::cout << GridLogMessage << "Gauge BCs: " << dirs4 << std::endl;
ConjugateGimplD::setDirections(dirs4); //gauge BC
GparityWilsonImplD::ImplParams Params;
for(int i=0;i<Nd-1;i++) Params.twists[i] = opt.GparityDirs[i]; //G-parity directions
Params.twists[Nd-1] = 1; //APBC in time direction
std::cout << GridLogMessage << "Fermion BCs: " << Params.twists << std::endl;
//Read the gauge field
LatticeGaugeField Umu(UGrid);
readConfiguration(Umu, opt.config, opt.is_cps_cfg);
//Setup the coarse grids
auto fineLatt = GridDefaultLatt();
Coordinate coarseLatt(4);
for (int d=0;d<4;d++){
coarseLatt[d] = fineLatt[d]/opt.blockSize[d]; assert(coarseLatt[d]*opt.blockSize[d]==fineLatt[d]);
}
std::cout << GridLogMessage<< " 5d coarse lattice is ";
for (int i=0;i<4;i++){
std::cout << coarseLatt[i]<<"x";
}
int cLs = opt.Ls/opt.blockSize[4]; assert(cLs*opt.blockSize[4]==opt.Ls);
std::cout << cLs<<std::endl;
GridCartesian * CoarseGrid4 = SpaceTimeGrid::makeFourDimGrid(coarseLatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
GridRedBlackCartesian * CoarseGrid4rb = SpaceTimeGrid::makeFourDimRedBlackGrid(CoarseGrid4);
GridCartesian * CoarseGrid5 = SpaceTimeGrid::makeFiveDimGrid(cLs,CoarseGrid4);
//Dirac operator
double bmc = 1.;
double b = (opt.mobius_scale + bmc)/2.; // b = 1/2 [ (b+c) + (b-c) ]
double c = (opt.mobius_scale - bmc)/2.; // c = 1/2 [ (b+c) - (b-c) ]
GparityMobiusFermionD action(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, opt.mass, opt.M5, b,c,Params);
typedef GparityMobiusFermionD::FermionField FermionField;
SchurDiagTwoOperator<GparityMobiusFermionD, FermionField> SchurOp(action);
typedef GparityWilsonImplD::SiteSpinor SiteSpinor;
const CPSLanczosParams &fine = opt.fine;
const CPSLanczosParams &coarse = opt.coarse;
std::cout << GridLogMessage << "Keep " << fine.N_true_get << " fine vectors" << std::endl;
std::cout << GridLogMessage << "Keep " << coarse.N_true_get << " coarse vectors" << std::endl;
assert(coarse.N_true_get >= fine.N_true_get);
assert(nbasis<=fine.N_true_get);
LocalCoherenceLanczosScidac<SiteSpinor,vTComplex,nbasis> _LocalCoherenceLanczos(FrbGrid,CoarseGrid5,SchurOp,Odd);
std::cout << GridLogMessage << "Constructed LocalCoherenceLanczos" << std::endl;
//Compute and/or read fine evecs
if(opt.read_fine){
_LocalCoherenceLanczos.checkpointFineRestore(opt.read_fine_file + "_evecs.scidac", opt.read_fine_file + "_evals.xml");
}else{
std::cout << GridLogMessage << "Performing fine grid IRL" << std::endl;
std::cout << GridLogMessage << "Using Chebyshev alpha=" << fine.alpha << " beta=" << fine.beta << " ord=" << fine.ch_ord << std::endl;
_LocalCoherenceLanczos.calcFine(fine.getChebyParams(),
fine.Nstop(),fine.Nk(),fine.Nm(),
fine.stop_rsd,fine.maxits,0,0);
if(opt.write_fine){
std::cout << GridLogIRL<<"Checkpointing Fine evecs"<<std::endl;
_LocalCoherenceLanczos.checkpointFine(opt.write_fine_file + "_evecs.scidac", opt.write_fine_file + "_evals.xml");
}
}
//Block orthonormalise (this should be part of calcFine?)
std::cout << GridLogIRL<<"Orthogonalising"<<std::endl;
_LocalCoherenceLanczos.Orthogonalise();
std::cout << GridLogIRL<<"Orthogonaled"<<std::endl;
ChebyParams smoother = fine.getChebyParams();
smoother.Npoly = opt.smoother_ord+1;
if(opt.read_coarse){
_LocalCoherenceLanczos.checkpointCoarseRestore(opt.read_coarse_file + "_evecs.scidac", opt.read_coarse_file + "_evals.xml",coarse.Nstop());
}else{
std::cout << GridLogMessage << "Performing coarse grid IRL" << std::endl;
std::cout << GridLogMessage << "Using Chebyshev alpha=" << coarse.alpha << " beta=" << coarse.beta << " ord=" << coarse.ch_ord << std::endl;
_LocalCoherenceLanczos.calcCoarse(coarse.getChebyParams(), smoother, opt.coarse_relax_tol,
coarse.Nstop(), coarse.Nk() ,coarse.Nm(),
coarse.stop_rsd, coarse.maxits,
0,0);
if(opt.write_coarse){
std::cout << GridLogIRL<<"Checkpointing Coarse evecs"<<std::endl;
_LocalCoherenceLanczos.checkpointCoarse(opt.write_coarse_file + "_evecs.scidac", opt.write_coarse_file + "_evals.xml");
}
}
//Test the eigenvectors
//To remove high-frequency noise we apply a Chebyshev smoothing
Chebyshev<FermionField> cheb_smoother(smoother);
FermionField evec(FrbGrid);
FermionField evec_sm(FrbGrid); //smoothed
FermionField tmp(FrbGrid);
RealD eval;
for(int i=0;i<coarse.N_true_get;i++){
_LocalCoherenceLanczos.getFineEvecEval(evec, eval, i);
//Check unsmoothed evec
SchurOp.HermOp(evec, tmp);
tmp = tmp - eval*evec;
RealD norm_unsmoothed = sqrt(norm2(tmp));
//Check smoothed evec
cheb_smoother(SchurOp, evec, evec_sm);
SchurOp.HermOp(evec_sm, tmp);
tmp = tmp - eval*evec_sm;
RealD norm_smoothed = sqrt(norm2(tmp));
std::cout << GridLogMessage << "Eval " << eval << " unsmoothed resid " << norm_unsmoothed << " smoothed resid " << norm_smoothed << std::endl;
}
}
//Note: because we rely upon physical properties we must use a "real" gauge configuration
int main (int argc, char ** argv) {
Grid_init(&argc,&argv);
GridLogIRL.TimingMode(1);
Options opt;
int basis_size = 100;
if(argc < 3){
std::cout << GridLogMessage << "Usage: <exe> <config> <gparity dirs> <options>" << std::endl;
std::cout << GridLogMessage << "<gparity dirs> should have the format a.b.c where a,b,c are 0,1 depending on whether there are G-parity BCs in that direction" << std::endl;
std::cout << GridLogMessage << "Options:" << std::endl;
std::cout << GridLogMessage << "--Ls <value> : Set Ls (default 12)" << std::endl;
std::cout << GridLogMessage << "--mass <value> : Set the mass (default 0.01)" << std::endl;
std::cout << GridLogMessage << "--block <value> : Set the block size. Format should be a.b.c.d.e where a-e are the block extents (default 2.2.2.2.2)" << std::endl;
std::cout << GridLogMessage << "--is_cps_cfg : Indicate that the configuration was generated with CPS where until recently the stored plaquette was wrong by a factor of 2" << std::endl;
std::cout << GridLogMessage << "--write_irl_templ: Write a template for the parameters file of the Lanczos to \"irl_templ.xml\"" << std::endl;
std::cout << GridLogMessage << "--read_irl_fine <filename>: Real the parameters file for the fine Lanczos" << std::endl;
std::cout << GridLogMessage << "--read_irl_coarse <filename>: Real the parameters file for the coarse Lanczos" << std::endl;
std::cout << GridLogMessage << "--write_fine <filename stub>: Write fine evecs/evals to filename starting with the stub" << std::endl;
std::cout << GridLogMessage << "--read_fine <filename stub>: Read fine evecs/evals from filename starting with the stub" << std::endl;
std::cout << GridLogMessage << "--write_coarse <filename stub>: Write coarse evecs/evals to filename starting with the stub" << std::endl;
std::cout << GridLogMessage << "--read_coarse <filename stub>: Read coarse evecs/evals from filename starting with the stub" << std::endl;
std::cout << GridLogMessage << "--smoother_ord : Set the Chebyshev order of the smoother (default 20)" << std::endl;
std::cout << GridLogMessage << "--coarse_relax_tol : Set the relaxation parameter for evaluating the residual of the reconstructed eigenvectors outside of the basis (default 1e5)" << std::endl;
std::cout << GridLogMessage << "--basis_size : Select the basis size from 100,200,300,350 (default 100)" << std::endl;
Grid_finalize();
return 1;
}
opt.config = argv[1];
GridCmdOptionIntVector(argv[2], opt.GparityDirs);
assert(opt.GparityDirs.size() == 3);
for(int i=3;i<argc;i++){
std::string sarg = argv[i];
if(sarg == "--Ls"){
opt.Ls = std::stoi(argv[i+1]);
std::cout << GridLogMessage << "Set Ls to " << opt.Ls << std::endl;
}else if(sarg == "--mass"){
std::istringstream ss(argv[i+1]); ss >> opt.mass;
std::cout << GridLogMessage << "Set quark mass to " << opt.mass << std::endl;
}else if(sarg == "--block"){
GridCmdOptionIntVector(argv[i+1], opt.blockSize);
assert(opt.blockSize.size() == 5);
std::cout << GridLogMessage << "Set block size to ";
for(int q=0;q<5;q++) std::cout << opt.blockSize[q] << " ";
std::cout << std::endl;
}else if(sarg == "--is_cps_cfg"){
opt.is_cps_cfg = true;
}else if(sarg == "--write_irl_templ"){
XmlWriter writer("irl_templ.xml");
write(writer,"Params", opt.fine);
Grid_finalize();
return 0;
}else if(sarg == "--read_irl_fine"){
std::cout << GridLogMessage << "Reading fine IRL params from " << argv[i+1] << std::endl;
XmlReader reader(argv[i+1]);
read(reader, "Params", opt.fine);
}else if(sarg == "--read_irl_coarse"){
std::cout << GridLogMessage << "Reading coarse IRL params from " << argv[i+1] << std::endl;
XmlReader reader(argv[i+1]);
read(reader, "Params", opt.coarse);
}else if(sarg == "--write_fine"){
opt.write_fine = true;
opt.write_fine_file = argv[i+1];
}else if(sarg == "--read_fine"){
opt.read_fine = true;
opt.read_fine_file = argv[i+1];
}else if(sarg == "--write_coarse"){
opt.write_coarse = true;
opt.write_coarse_file = argv[i+1];
}else if(sarg == "--read_coarse"){
opt.read_coarse = true;
opt.read_coarse_file = argv[i+1];
}else if(sarg == "--smoother_ord"){
std::istringstream ss(argv[i+1]); ss >> opt.smoother_ord;
std::cout << GridLogMessage << "Set smoother order to " << opt.smoother_ord << std::endl;
}else if(sarg == "--coarse_relax_tol"){
std::istringstream ss(argv[i+1]); ss >> opt.coarse_relax_tol;
std::cout << GridLogMessage << "Set coarse IRL relaxation parameter to " << opt.coarse_relax_tol << std::endl;
}else if(sarg == "--mobius_scale"){
std::istringstream ss(argv[i+1]); ss >> opt.mobius_scale;
std::cout << GridLogMessage << "Set Mobius scale to " << opt.mobius_scale << std::endl;
}else if(sarg == "--basis_size"){
basis_size = std::stoi(argv[i+1]);
std::cout << GridLogMessage << "Set basis size to " << basis_size << std::endl;
}
}
switch(basis_size){
case 100:
runTest<100>(opt); break;
case 200:
runTest<200>(opt); break;
case 300:
runTest<300>(opt); break;
case 350:
runTest<350>(opt); break;
default:
std::cout << GridLogMessage << "Unsupported basis size " << basis_size << std::endl;
assert(0);
}
Grid_finalize();
}

View File

@ -1,582 +0,0 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/Test_evec_compression.cc
Copyright (C) 2017
Author: Christopher Kelly <ckelly@bnl.gov>
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
/*
*
* This test generates eigenvectors using the Lanczos algorithm then attempts to use local coherence compression
* to express those vectors in terms of a basis formed from a subset. This test is useful for finding the optimal
* blocking and basis size for performing a Local Coherence Lanczos
*/
#include <Grid/Grid.h>
#include <Grid/algorithms/iterative/ImplicitlyRestartedLanczos.h>
#include <Grid/algorithms/iterative/LocalCoherenceLanczos.h>
using namespace std;
using namespace Grid;
//For the CPS configurations we have to manually seed the RNG and deal with an incorrect factor of 2 in the plaquette metadata
template<typename Gimpl>
void readConfiguration(LatticeGaugeFieldD &U,
const std::string &config,
bool is_cps_cfg = false){
if(is_cps_cfg) NerscIO::exitOnReadPlaquetteMismatch() = false;
typedef GaugeStatistics<Gimpl> GaugeStats;
FieldMetaData header;
NerscIO::readConfiguration<GaugeStats>(U, header, config);
if(is_cps_cfg) NerscIO::exitOnReadPlaquetteMismatch() = true;
}
//Lanczos parameters in CPS conventions
struct CPSLanczosParams : Serializable {
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(CPSLanczosParams,
RealD, alpha,
RealD, beta,
int, ch_ord,
int, N_use,
int, N_get,
int, N_true_get,
RealD, stop_rsd,
int, maxits);
//Translations
ChebyParams getChebyParams() const{
ChebyParams out;
out.alpha = beta*beta; //aka lo
out.beta = alpha*alpha; //aka hi
out.Npoly = ch_ord+1;
return out;
}
int Nstop() const{ return N_true_get; }
int Nm() const{ return N_use; }
int Nk() const{ return N_get; }
};
template<class Fobj,class CComplex,int nbasis>
class LocalCoherenceCompressor{
public:
typedef iVector<CComplex,nbasis > CoarseSiteVector;
typedef Lattice<CComplex> CoarseScalar; // used for inner products on fine field
typedef Lattice<CoarseSiteVector> CoarseField;
typedef Lattice<Fobj> FineField;
void compress(std::vector<FineField> &basis,
std::vector<CoarseField> &compressed_evecs,
const std::vector<FineField> &evecs_in,
GridBase *FineGrid,
GridBase *CoarseGrid){
int nevecs = evecs_in.size();
assert(nevecs > nbasis);
//Construct the basis
basis.resize(nbasis, FineGrid);
for(int b=0;b<nbasis;b++) basis[b] = evecs_in[b];
//Block othornormalize basis
CoarseScalar InnerProd(CoarseGrid);
std::cout << GridLogMessage <<" Gramm-Schmidt pass 1"<<std::endl;
blockOrthogonalise(InnerProd,basis);
std::cout << GridLogMessage <<" Gramm-Schmidt pass 2"<<std::endl;
blockOrthogonalise(InnerProd,basis);
//The coarse grid representation is the field of vectors of block inner products
std::cout << GridLogMessage << "Compressing eigevectors" << std::endl;
compressed_evecs.resize(nevecs, CoarseGrid);
for(int i=0;i<nevecs;i++) blockProject(compressed_evecs[i], evecs_in[i], basis);
std::cout << GridLogMessage << "Compression complete" << std::endl;
}
void uncompress(FineField &evec, const int i, const std::vector<FineField> &basis, const std::vector<CoarseField> &compressed_evecs) const{
blockPromote(compressed_evecs[i],evec,basis);
}
//Test uncompressed eigenvectors of Linop.HermOp to precision 'base_tolerance' for i<nbasis and 'base_tolerance*relax' for i>=nbasis
//Because the uncompressed evec has a lot of high mode noise (unimportant for deflation) we apply a smoother before testing.
//The Chebyshev used by the Lanczos should be sufficient as a smoother
bool testCompression(LinearOperatorBase<FineField> &Linop, OperatorFunction<FineField> &smoother,
const std::vector<FineField> &basis, const std::vector<CoarseField> &compressed_evecs, const std::vector<RealD> &evals,
const RealD base_tolerance, const RealD relax){
std::cout << GridLogMessage << "Testing quality of uncompressed evecs (after smoothing)" << std::endl;
GridBase* FineGrid = basis[0].Grid();
GridBase* CoarseGrid = compressed_evecs[0].Grid();
bool fail = false;
FineField evec(FineGrid), Mevec(FineGrid), evec_sm(FineGrid);
for(int i=0;i<compressed_evecs.size();i++){
std::cout << GridLogMessage << "Uncompressing evec " << i << std::endl;
uncompress(evec, i, basis, compressed_evecs);
std::cout << GridLogMessage << "Smoothing evec " << i << std::endl;
smoother(Linop, evec, evec_sm);
std::cout << GridLogMessage << "Computing residual for evec " << i << std::endl;
std::cout << GridLogMessage << "Linop" << std::endl;
Linop.HermOp(evec_sm, Mevec);
std::cout << GridLogMessage << "Linalg" << std::endl;
Mevec = Mevec - evals[i]*evec_sm;
std::cout << GridLogMessage << "Resid" << std::endl;
RealD tol = base_tolerance * (i<nbasis ? 1. : relax);
RealD res = sqrt(norm2(Mevec));
std::cout << GridLogMessage << "Evec idx " << i << " res " << res << " tol " << tol << std::endl;
if(res > tol) fail = true;
}
return fail;
}
//Compare uncompressed evecs to original evecs
void compareEvecs(const std::vector<FineField> &basis, const std::vector<CoarseField> &compressed_evecs, const std::vector<FineField> &orig_evecs){
std::cout << GridLogMessage << "Comparing uncompressed evecs to original evecs" << std::endl;
GridBase* FineGrid = basis[0].Grid();
GridBase* CoarseGrid = compressed_evecs[0].Grid();
FineField evec(FineGrid), diff(FineGrid);
for(int i=0;i<compressed_evecs.size();i++){
std::cout << GridLogMessage << "Uncompressing evec " << i << std::endl;
uncompress(evec, i, basis, compressed_evecs);
diff = orig_evecs[i] - evec;
RealD res = sqrt(norm2(diff));
std::cout << GridLogMessage << "Evec idx " << i << " res " << res << std::endl;
}
}
};
template<class Fobj,class CComplex,int nbasis>
void compareBlockPromoteTimings(const std::vector<Lattice<Fobj> > &basis, const std::vector<Lattice<iVector<CComplex,nbasis > > > &compressed_evecs){
typedef iVector<CComplex,nbasis > CoarseSiteVector;
typedef Lattice<CComplex> CoarseScalar;
typedef Lattice<CoarseSiteVector> CoarseField;
typedef Lattice<Fobj> FineField;
GridStopWatch timer;
GridBase* FineGrid = basis[0].Grid();
GridBase* CoarseGrid = compressed_evecs[0].Grid();
FineField v1(FineGrid), v2(FineGrid);
//Start with a cold start
for(int i=0;i<basis.size();i++){
autoView( b_ , basis[i], CpuWrite);
}
for(int i=0;i<compressed_evecs.size();i++){
autoView( b_ , compressed_evecs[i], CpuWrite);
}
{
autoView( b_, v1, CpuWrite );
}
timer.Start();
blockPromote(compressed_evecs[0],v1,basis);
timer.Stop();
std::cout << GridLogMessage << "Time for cold blockPromote v1 " << timer.Elapsed() << std::endl;
//Test to ensure it is actually doing a cold start by repeating
for(int i=0;i<basis.size();i++){
autoView( b_ , basis[i], CpuWrite);
}
for(int i=0;i<compressed_evecs.size();i++){
autoView( b_ , compressed_evecs[i], CpuWrite);
}
{
autoView( b_, v1, CpuWrite );
}
timer.Reset();
timer.Start();
blockPromote(compressed_evecs[0],v1,basis);
timer.Stop();
std::cout << GridLogMessage << "Time for cold blockPromote v1 repeat (should be the same as above) " << timer.Elapsed() << std::endl;
}
struct Args{
int Ls;
RealD mass;
RealD M5;
bool is_cps_cfg;
RealD mobius_scale; //b+c
CPSLanczosParams fine;
double coarse_relax_tol;
std::vector<int> blockSize;
std::vector<int> GparityDirs;
bool write_fine;
std::string write_fine_file;
bool read_fine;
std::string read_fine_file;
int basis_size;
Args(){
blockSize = {2,2,2,2,2};
GparityDirs = {1,1,1}; //1 for each GP direction
Ls = 12;
mass = 0.01;
M5 = 1.8;
is_cps_cfg = false;
mobius_scale = 2;
fine.alpha = 2;
fine.beta = 0.1;
fine.ch_ord = 100;
fine.N_use = 70;
fine.N_get = 60;
fine.N_true_get = 60;
fine.stop_rsd = 1e-8;
fine.maxits = 10000;
coarse_relax_tol = 1e5;
write_fine = false;
read_fine = false;
basis_size = 100;
}
};
GparityWilsonImplD::ImplParams setupGparityParams(const std::vector<int> &GparityDirs){
//Setup G-parity BCs
assert(Nd == 4);
std::vector<int> dirs4(4);
for(int i=0;i<3;i++) dirs4[i] = GparityDirs[i];
dirs4[3] = 0; //periodic gauge BC in time
std::cout << GridLogMessage << "Gauge BCs: " << dirs4 << std::endl;
ConjugateGimplD::setDirections(dirs4); //gauge BC
GparityWilsonImplD::ImplParams Params;
for(int i=0;i<Nd-1;i++) Params.twists[i] = GparityDirs[i]; //G-parity directions
Params.twists[Nd-1] = 1; //APBC in time direction
std::cout << GridLogMessage << "Fermion BCs: " << Params.twists << std::endl;
return Params;
}
WilsonImplD::ImplParams setupParams(){
WilsonImplD::ImplParams Params;
Complex one(1.0);
Complex mone(-1.0);
for(int i=0;i<Nd-1;i++) Params.boundary_phases[i] = one;
Params.boundary_phases[Nd-1] = mone;
return Params;
}
template<int nbasis, typename ActionType>
void run_b(ActionType &action, const std::string &config, const Args &args){
//Fine grids
GridCartesian * UGrid = (GridCartesian*)action.GaugeGrid();
GridRedBlackCartesian * UrbGrid = (GridRedBlackCartesian*)action.GaugeRedBlackGrid();
GridCartesian * FGrid = (GridCartesian*)action.FermionGrid();
GridRedBlackCartesian * FrbGrid = (GridRedBlackCartesian*)action.FermionRedBlackGrid();
//Setup the coarse grids
auto fineLatt = GridDefaultLatt();
Coordinate coarseLatt(4);
for (int d=0;d<4;d++){
coarseLatt[d] = fineLatt[d]/args.blockSize[d]; assert(coarseLatt[d]*args.blockSize[d]==fineLatt[d]);
}
std::cout << GridLogMessage<< " 5d coarse lattice is ";
for (int i=0;i<4;i++){
std::cout << coarseLatt[i]<<"x";
}
int cLs = args.Ls/args.blockSize[4]; assert(cLs*args.blockSize[4]==args.Ls);
std::cout << cLs<<std::endl;
GridCartesian * CoarseGrid4 = SpaceTimeGrid::makeFourDimGrid(coarseLatt, GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
GridRedBlackCartesian * CoarseGrid4rb = SpaceTimeGrid::makeFourDimRedBlackGrid(CoarseGrid4);
GridCartesian * CoarseGrid5 = SpaceTimeGrid::makeFiveDimGrid(cLs,CoarseGrid4);
typedef vTComplex CComplex;
typedef iVector<CComplex,nbasis > CoarseSiteVector;
typedef Lattice<CComplex> CoarseScalar;
typedef Lattice<CoarseSiteVector> CoarseField;
typedef typename ActionType::FermionField FermionField;
SchurDiagTwoOperator<ActionType,FermionField> SchurOp(action);
typedef typename ActionType::SiteSpinor SiteSpinor;
const CPSLanczosParams &fine = args.fine;
//Do the fine Lanczos
std::vector<RealD> evals;
std::vector<FermionField> evecs;
if(args.read_fine){
evals.resize(fine.N_true_get);
evecs.resize(fine.N_true_get, FrbGrid);
std::string evals_file = args.read_fine_file + "_evals.xml";
std::string evecs_file = args.read_fine_file + "_evecs.scidac";
std::cout << GridLogIRL<< "Reading evals from "<<evals_file<<std::endl;
XmlReader RDx(evals_file);
read(RDx,"evals",evals);
assert(evals.size()==fine.N_true_get);
std::cout << GridLogIRL<< "Reading evecs from "<<evecs_file<<std::endl;
emptyUserRecord record;
Grid::ScidacReader RD ;
RD.open(evecs_file);
for(int k=0;k<fine.N_true_get;k++) {
evecs[k].Checkerboard()=Odd;
RD.readScidacFieldRecord(evecs[k],record);
}
RD.close();
}else{
int Nstop = fine.Nstop(); //==N_true_get
int Nm = fine.Nm();
int Nk = fine.Nk();
RealD resid = fine.stop_rsd;
int MaxIt = fine.maxits;
assert(nbasis<=Nm);
Chebyshev<FermionField> Cheby(fine.getChebyParams());
FunctionHermOp<FermionField> ChebyOp(Cheby,SchurOp);
PlainHermOp<FermionField> Op(SchurOp);
evals.resize(Nm);
evecs.resize(Nm,FrbGrid);
ImplicitlyRestartedLanczos<FermionField> IRL(ChebyOp,Op,Nstop,Nk,Nm,resid,MaxIt,0,0);
FermionField src(FrbGrid);
typedef typename FermionField::scalar_type Scalar;
src=Scalar(1.0);
src.Checkerboard() = Odd;
int Nconv;
IRL.calc(evals, evecs,src,Nconv,false);
if(Nconv < Nstop) assert(0 && "Fine lanczos failed to converge the required number of evecs"); //algorithm doesn't consider this a failure
if(Nconv > Nstop){
//Yes this potentially throws away some evecs but it is better than having a random number of evecs between Nstop and Nm!
evals.resize(Nstop);
evecs.resize(Nstop, FrbGrid);
}
if(args.write_fine){
std::string evals_file = args.write_fine_file + "_evals.xml";
std::string evecs_file = args.write_fine_file + "_evecs.scidac";
std::cout << GridLogIRL<< "Writing evecs to "<<evecs_file<<std::endl;
emptyUserRecord record;
Grid::ScidacWriter WR(FrbGrid->IsBoss());
WR.open(evecs_file);
for(int k=0;k<evecs.size();k++) {
WR.writeScidacFieldRecord(evecs[k],record);
}
WR.close();
std::cout << GridLogIRL<< "Writing evals to "<<evals_file<<std::endl;
XmlWriter WRx(evals_file);
write(WRx,"evals",evals);
}
}
//Do the compression
LocalCoherenceCompressor<SiteSpinor,vTComplex,nbasis> compressor;
std::vector<FermionField> basis(nbasis,FrbGrid);
std::vector<CoarseField> compressed_evecs(evecs.size(),CoarseGrid5);
compressor.compress(basis, compressed_evecs, evecs, FrbGrid, CoarseGrid5);
compareBlockPromoteTimings(basis, compressed_evecs);
//Compare uncompressed and original evecs
compressor.compareEvecs(basis, compressed_evecs, evecs);
//Create the smoother
Chebyshev<FermionField> smoother(fine.getChebyParams());
//Test the quality of the uncompressed evecs
assert( compressor.testCompression(SchurOp, smoother, basis, compressed_evecs, evals, fine.stop_rsd, args.coarse_relax_tol) );
}
template<typename ActionType>
void run(ActionType &action, const std::string &config, const Args &args){
switch(args.basis_size){
case 50:
return run_b<50>(action,config,args);
case 100:
return run_b<100>(action,config,args);
case 150:
return run_b<150>(action,config,args);
case 200:
return run_b<200>(action,config,args);
case 250:
return run_b<250>(action,config,args);
case 300:
return run_b<300>(action,config,args);
case 350:
return run_b<350>(action,config,args);
case 400:
return run_b<400>(action,config,args);
default:
assert(0 && "Unsupported basis size: allowed values are 50,100,200,250,300,350,400");
}
}
//Note: because we rely upon physical properties we must use a "real" gauge configuration
int main (int argc, char ** argv) {
Grid_init(&argc,&argv);
GridLogIRL.TimingMode(1);
if(argc < 3){
std::cout << GridLogMessage << "Usage: <exe> <config file> <gparity dirs> <options>" << std::endl;
std::cout << GridLogMessage << "<gparity dirs> should have the format a.b.c where a,b,c are 0,1 depending on whether there are G-parity BCs in that direction" << std::endl;
std::cout << GridLogMessage << "Options:" << std::endl;
std::cout << GridLogMessage << "--Ls <value> : Set Ls (default 12)" << std::endl;
std::cout << GridLogMessage << "--mass <value> : Set the mass (default 0.01)" << std::endl;
std::cout << GridLogMessage << "--block <value> : Set the block size. Format should be a.b.c.d.e where a-e are the block extents (default 2.2.2.2.2)" << std::endl;
std::cout << GridLogMessage << "--is_cps_cfg : Indicate that the configuration was generated with CPS where until recently the stored plaquette was wrong by a factor of 2" << std::endl;
std::cout << GridLogMessage << "--write_irl_templ: Write a template for the parameters file of the Lanczos to \"irl_templ.xml\"" << std::endl;
std::cout << GridLogMessage << "--read_irl_fine <filename>: Real the parameters file for the fine Lanczos" << std::endl;
std::cout << GridLogMessage << "--write_fine <filename stub>: Write fine evecs/evals to filename starting with the stub" << std::endl;
std::cout << GridLogMessage << "--read_fine <filename stub>: Read fine evecs/evals from filename starting with the stub" << std::endl;
std::cout << GridLogMessage << "--coarse_relax_tol : Set the relaxation parameter for evaluating the residual of the reconstructed eigenvectors outside of the basis (default 1e5)" << std::endl;
std::cout << GridLogMessage << "--action : Set the action from 'DWF', 'Mobius' (default Mobius)" << std::endl;
std::cout << GridLogMessage << "--mobius_scale : Set the Mobius scale b+c (default 2)" << std::endl;
std::cout << GridLogMessage << "--basis_size : Set the basis size from 50,100,150,200,250,300,350,400 (default 100)" << std::endl;
Grid_finalize();
return 1;
}
std::string config = argv[1];
Args args;
GridCmdOptionIntVector(argv[2], args.GparityDirs);
assert(args.GparityDirs.size() == 3);
std::string action_s = "Mobius";
for(int i=3;i<argc;i++){
std::string sarg = argv[i];
if(sarg == "--Ls"){
args.Ls = std::stoi(argv[i+1]);
std::cout << GridLogMessage << "Set Ls to " << args.Ls << std::endl;
}else if(sarg == "--mass"){
std::istringstream ss(argv[i+1]); ss >> args.mass;
std::cout << GridLogMessage << "Set quark mass to " << args.mass << std::endl;
}else if(sarg == "--block"){
GridCmdOptionIntVector(argv[i+1], args.blockSize);
assert(args.blockSize.size() == 5);
std::cout << GridLogMessage << "Set block size to ";
for(int q=0;q<5;q++) std::cout << args.blockSize[q] << " ";
std::cout << std::endl;
}else if(sarg == "--is_cps_cfg"){
args.is_cps_cfg = true;
}else if(sarg == "--write_irl_templ"){
XmlWriter writer("irl_templ.xml");
write(writer,"Params",args.fine);
Grid_finalize();
return 0;
}else if(sarg == "--read_irl_fine"){
std::cout << GridLogMessage << "Reading fine IRL params from " << argv[i+1] << std::endl;
XmlReader reader(argv[i+1]);
read(reader, "Params", args.fine);
}else if(sarg == "--write_fine"){
args.write_fine = true;
args.write_fine_file = argv[i+1];
}else if(sarg == "--read_fine"){
args.read_fine = true;
args.read_fine_file = argv[i+1];
}else if(sarg == "--coarse_relax_tol"){
std::istringstream ss(argv[i+1]); ss >> args.coarse_relax_tol;
std::cout << GridLogMessage << "Set coarse IRL relaxation parameter to " << args.coarse_relax_tol << std::endl;
}else if(sarg == "--action"){
action_s = argv[i+1];
std::cout << "Action set to " << action_s << std::endl;
}else if(sarg == "--mobius_scale"){
std::istringstream ss(argv[i+1]); ss >> args.mobius_scale;
std::cout << GridLogMessage << "Set Mobius scale to " << args.mobius_scale << std::endl;
}else if(sarg == "--basis_size"){
args.basis_size = std::stoi(argv[i+1]);
std::cout << GridLogMessage << "Set basis size to " << args.basis_size << std::endl;
}
}
//Fine grids
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()), GridDefaultMpi());
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(args.Ls,UGrid);
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(args.Ls,UGrid);
LatticeGaugeField Umu(UGrid);
bool is_gparity = false;
for(auto g : args.GparityDirs) if(g) is_gparity = true;
double bmc = 1.;
double b = (args.mobius_scale + bmc)/2.; // b = 1/2 [ (b+c) + (b-c) ]
double c = (args.mobius_scale - bmc)/2.; // c = 1/2 [ (b+c) - (b-c) ]
if(is_gparity){
GparityWilsonImplD::ImplParams Params = setupGparityParams(args.GparityDirs);
readConfiguration<ConjugateGimplD>(Umu, config, args.is_cps_cfg); //Read the gauge field
if(action_s == "DWF"){
GparityDomainWallFermionD action(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, args.mass, args.M5, Params);
run(action, config, args);
}else if(action_s == "Mobius"){
GparityMobiusFermionD action(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, args.mass, args.M5, b, c, Params);
run(action, config, args);
}
}else{
WilsonImplD::ImplParams Params = setupParams();
readConfiguration<PeriodicGimplD>(Umu, config, args.is_cps_cfg); //Read the gauge field
if(action_s == "DWF"){
DomainWallFermionD action(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, args.mass, args.M5, Params);
run(action, config, args);
}else if(action_s == "Mobius"){
MobiusFermionD action(Umu, *FGrid, *FrbGrid, *UGrid, *UrbGrid, args.mass, args.M5, b, c, Params);
run(action, config, args);
}
}
Grid_finalize();
}