1
0
mirror of https://github.com/paboyle/Grid.git synced 2024-11-09 23:45:36 +00:00

Merge branch 'develop' of https://github.com/paboyle/Grid into develop

This commit is contained in:
Peter Boyle 2021-12-07 09:06:42 -08:00
commit 7f7d06d963
52 changed files with 850 additions and 88 deletions

View File

@ -358,7 +358,7 @@ public:
autoView( in_v , in, AcceleratorRead);
autoView( out_v , out, AcceleratorWrite);
autoView( Stencil_v , Stencil, AcceleratorRead);
auto& geom_v = geom;
int npoint = geom.npoint;
typedef LatticeView<Cobj> Aview;
Vector<Aview> AcceleratorViewContainer;
@ -380,7 +380,7 @@ public:
int ptype;
StencilEntry *SE;
for(int point=0;point<geom_v.npoint;point++){
for(int point=0;point<npoint;point++){
SE=Stencil_v.GetEntry(ptype,point,ss);
@ -424,7 +424,7 @@ public:
autoView( in_v , in, AcceleratorRead);
autoView( out_v , out, AcceleratorWrite);
autoView( Stencil_v , Stencil, AcceleratorRead);
auto& geom_v = geom;
int npoint = geom.npoint;
typedef LatticeView<Cobj> Aview;
Vector<Aview> AcceleratorViewContainer;
@ -454,7 +454,7 @@ public:
int ptype;
StencilEntry *SE;
for(int p=0;p<geom_v.npoint;p++){
for(int p=0;p<npoint;p++){
int point = points_p[p];
SE=Stencil_v.GetEntry(ptype,point,ss);

View File

@ -508,7 +508,7 @@ class SchurStaggeredOperator : public SchurOperatorBase<Field> {
virtual void MpcDag (const Field &in, Field &out){
Mpc(in,out);
}
virtual void MpcDagMpc(const Field &in, Field &out,RealD &ni,RealD &no) {
virtual void MpcDagMpc(const Field &in, Field &out) {
assert(0);// Never need with staggered
}
};
@ -586,6 +586,7 @@ class HermOpOperatorFunction : public OperatorFunction<Field> {
template<typename Field>
class PlainHermOp : public LinearFunction<Field> {
public:
using LinearFunction<Field>::operator();
LinearOperatorBase<Field> &_Linop;
PlainHermOp(LinearOperatorBase<Field>& linop) : _Linop(linop)
@ -599,6 +600,7 @@ public:
template<typename Field>
class FunctionHermOp : public LinearFunction<Field> {
public:
using LinearFunction<Field>::operator();
OperatorFunction<Field> & _poly;
LinearOperatorBase<Field> &_Linop;

View File

@ -30,13 +30,19 @@ Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
NAMESPACE_BEGIN(Grid);
template<class Field> class Preconditioner : public LinearFunction<Field> {
template<class Field> using Preconditioner = LinearFunction<Field> ;
/*
template<class Field> class Preconditioner : public LinearFunction<Field> {
using LinearFunction<Field>::operator();
virtual void operator()(const Field &src, Field & psi)=0;
};
*/
template<class Field> class TrivialPrecon : public Preconditioner<Field> {
public:
void operator()(const Field &src, Field & psi){
using Preconditioner<Field>::operator();
virtual void operator()(const Field &src, Field & psi){
psi = src;
}
TrivialPrecon(void){};

View File

@ -36,7 +36,8 @@ NAMESPACE_BEGIN(Grid);
template<class FieldD, class FieldF, typename std::enable_if< getPrecision<FieldD>::value == 2, int>::type = 0, typename std::enable_if< getPrecision<FieldF>::value == 1, int>::type = 0>
class MixedPrecisionBiCGSTAB : public LinearFunction<FieldD>
{
public:
public:
using LinearFunction<FieldD>::operator();
RealD Tolerance;
RealD InnerTolerance; // Initial tolerance for inner CG. Defaults to Tolerance but can be changed
Integer MaxInnerIterations;

View File

@ -67,6 +67,7 @@ public:
template<class Fobj,class CComplex,int nbasis>
class ProjectedHermOp : public LinearFunction<Lattice<iVector<CComplex,nbasis > > > {
public:
using LinearFunction<Lattice<iVector<CComplex,nbasis > > >::operator();
typedef iVector<CComplex,nbasis > CoarseSiteVector;
typedef Lattice<CoarseSiteVector> CoarseField;
typedef Lattice<CComplex> CoarseScalar; // used for inner products on fine field
@ -97,6 +98,7 @@ public:
template<class Fobj,class CComplex,int nbasis>
class ProjectedFunctionHermOp : public LinearFunction<Lattice<iVector<CComplex,nbasis > > > {
public:
using LinearFunction<Lattice<iVector<CComplex,nbasis > > >::operator();
typedef iVector<CComplex,nbasis > CoarseSiteVector;
typedef Lattice<CoarseSiteVector> CoarseField;
typedef Lattice<CComplex> CoarseScalar; // used for inner products on fine field

View File

@ -43,7 +43,7 @@ NAMESPACE_BEGIN(Grid);
template<class Field>
class PrecGeneralisedConjugateResidual : public LinearFunction<Field> {
public:
using LinearFunction<Field>::operator();
RealD Tolerance;
Integer MaxIterations;
int verbose;

View File

@ -43,7 +43,7 @@ NAMESPACE_BEGIN(Grid);
template<class Field>
class PrecGeneralisedConjugateResidualNonHermitian : public LinearFunction<Field> {
public:
using LinearFunction<Field>::operator();
RealD Tolerance;
Integer MaxIterations;
int verbose;
@ -119,7 +119,8 @@ public:
RealD GCRnStep(const Field &src, Field &psi,RealD rsq){
RealD cp;
ComplexD a, b, zAz;
ComplexD a, b;
// ComplexD zAz;
RealD zAAz;
ComplexD rq;
@ -146,7 +147,7 @@ public:
//////////////////////////////////
MatTimer.Start();
Linop.Op(psi,Az);
zAz = innerProduct(Az,psi);
// zAz = innerProduct(Az,psi);
zAAz= norm2(Az);
MatTimer.Stop();
@ -170,7 +171,7 @@ public:
LinalgTimer.Start();
zAz = innerProduct(Az,psi);
// zAz = innerProduct(Az,psi);
zAAz= norm2(Az);
//p[0],q[0],qq[0]
@ -212,7 +213,7 @@ public:
MatTimer.Start();
Linop.Op(z,Az);
MatTimer.Stop();
zAz = innerProduct(Az,psi);
// zAz = innerProduct(Az,psi);
zAAz= norm2(Az);
LinalgTimer.Start();

View File

@ -170,6 +170,7 @@ private:
public:
static void Print(void);
static void PrintState( void* CpuPtr);
static int isOpen (void* CpuPtr);
static void ViewClose(void* CpuPtr,ViewMode mode);
static void *ViewOpen (void* CpuPtr,size_t bytes,ViewMode mode,ViewAdvise hint);

View File

@ -474,6 +474,32 @@ int MemoryManager::isOpen (void* _CpuPtr)
}
}
void MemoryManager::PrintState(void* _CpuPtr)
{
uint64_t CpuPtr = (uint64_t)_CpuPtr;
if ( EntryPresent(CpuPtr) ){
auto AccCacheIterator = EntryLookup(CpuPtr);
auto & AccCache = AccCacheIterator->second;
std::string str;
if ( AccCache.state==Empty ) str = std::string("Empty");
if ( AccCache.state==CpuDirty ) str = std::string("CpuDirty");
if ( AccCache.state==AccDirty ) str = std::string("AccDirty");
if ( AccCache.state==Consistent)str = std::string("Consistent");
if ( AccCache.state==EvictNext) str = std::string("EvictNext");
std::cout << GridLogMessage << "CpuAddr\t\tAccAddr\t\tState\t\tcpuLock\taccLock\tLRU_valid "<<std::endl;
std::cout << GridLogMessage << "0x"<<std::hex<<AccCache.CpuPtr<<std::dec
<< "\t0x"<<std::hex<<AccCache.AccPtr<<std::dec<<"\t" <<str
<< "\t" << AccCache.cpuLock
<< "\t" << AccCache.accLock
<< "\t" << AccCache.LRU_valid<<std::endl;
} else {
std::cout << GridLogMessage << "No Entry in AccCache table." << std::endl;
}
}
NAMESPACE_END(Grid);
#endif

View File

@ -16,6 +16,10 @@ uint64_t MemoryManager::DeviceToHostXfer;
void MemoryManager::ViewClose(void* AccPtr,ViewMode mode){};
void *MemoryManager::ViewOpen(void* CpuPtr,size_t bytes,ViewMode mode,ViewAdvise hint){ return CpuPtr; };
int MemoryManager::isOpen (void* CpuPtr) { return 0;}
void MemoryManager::PrintState(void* CpuPtr)
{
std::cout << GridLogMessage << "Host<->Device memory movement not currently managed by Grid." << std::endl;
};
void MemoryManager::Print(void){};
void MemoryManager::NotifyDeletion(void *ptr){};

View File

@ -88,6 +88,13 @@ public:
LatticeView<vobj> accessor(*( (LatticeAccelerator<vobj> *) this),mode);
accessor.ViewClose();
}
// Helper function to print the state of this object in the AccCache
void PrintCacheState(void)
{
MemoryManager::PrintState(this->_odata);
}
/////////////////////////////////////////////////////////////////////////////////
// Return a view object that may be dereferenced in site loops.
// The view is trivially copy constructible and may be copied to an accelerator device

View File

@ -576,7 +576,8 @@ class ScidacReader : public GridLimeReader {
std::string rec_name(ILDG_BINARY_DATA);
while ( limeReaderNextRecord(LimeR) == LIME_SUCCESS ) {
if ( !strncmp(limeReaderType(LimeR), rec_name.c_str(),strlen(rec_name.c_str()) ) ) {
skipPastObjectRecord(std::string(GRID_FIELD_NORM));
// in principle should do the line below, but that breaks backard compatibility with old data
// skipPastObjectRecord(std::string(GRID_FIELD_NORM));
skipPastObjectRecord(std::string(SCIDAC_CHECKSUM));
return;
}

View File

@ -828,6 +828,7 @@ void CayleyFermion5D<Impl>::SeqConservedCurrent(PropagatorField &q_in,
#if (!defined(GRID_HIP))
int tshift = (mu == Nd-1) ? 1 : 0;
unsigned int LLt = GridDefaultLatt()[Tp];
////////////////////////////////////////////////
// GENERAL CAYLEY CASE
////////////////////////////////////////////////
@ -880,7 +881,7 @@ void CayleyFermion5D<Impl>::SeqConservedCurrent(PropagatorField &q_in,
}
std::vector<RealD> G_s(Ls,1.0);
RealD sign = 1; // sign flip for vector/tadpole
RealD sign = 1.0; // sign flip for vector/tadpole
if ( curr_type == Current::Axial ) {
for(int s=0;s<Ls/2;s++){
G_s[s] = -1.0;
@ -890,7 +891,7 @@ void CayleyFermion5D<Impl>::SeqConservedCurrent(PropagatorField &q_in,
auto b=this->_b;
auto c=this->_c;
if ( b == 1 && c == 0 ) {
sign = -1;
sign = -1.0;
}
else {
std::cerr << "Error: Tadpole implementation currently unavailable for non-Shamir actions." << std::endl;
@ -934,7 +935,13 @@ void CayleyFermion5D<Impl>::SeqConservedCurrent(PropagatorField &q_in,
tmp = Cshift(tmp,mu,-1);
Impl::multLinkField(Utmp,this->Umu,tmp,mu+Nd); // Adjoint link
tmp = -G_s[s]*( Utmp + gmu*Utmp );
tmp = where((lcoor>=tmin+tshift),tmp,zz); // Mask the time
// Mask the time
if (tmax == LLt - 1 && tshift == 1){ // quick fix to include timeslice 0 if tmax + tshift is over the last timeslice
unsigned int t0 = 0;
tmp = where(((lcoor==t0) || (lcoor>=tmin+tshift)),tmp,zz);
} else {
tmp = where((lcoor>=tmin+tshift),tmp,zz);
}
L_Q += where((lcoor<=tmax+tshift),tmp,zz); // Position of current complicated
InsertSlice(L_Q, q_out, s , 0);

View File

@ -47,20 +47,20 @@ NAMESPACE_BEGIN(Grid);
class TypePair {
public:
T _internal[2];
TypePair<T>& operator=(const Grid::Zero& o) {
accelerator TypePair<T>& operator=(const Grid::Zero& o) {
_internal[0] = Zero();
_internal[1] = Zero();
return *this;
}
TypePair<T> operator+(const TypePair<T>& o) const {
accelerator TypePair<T> operator+(const TypePair<T>& o) const {
TypePair<T> r;
r._internal[0] = _internal[0] + o._internal[0];
r._internal[1] = _internal[1] + o._internal[1];
return r;
}
TypePair<T>& operator+=(const TypePair<T>& o) {
accelerator TypePair<T>& operator+=(const TypePair<T>& o) {
_internal[0] += o._internal[0];
_internal[1] += o._internal[1];
return *this;

View File

@ -84,7 +84,8 @@ void acceleratorInit(void)
// IBM Jsrun makes cuda Device numbering screwy and not match rank
if ( world_rank == 0 ) {
printf("AcceleratorCudaInit: using default device \n");
printf("AcceleratorCudaInit: assume user either uses a) IBM jsrun, or \n");
printf("AcceleratorCudaInit: assume user either uses\n");
printf("AcceleratorCudaInit: a) IBM jsrun, or \n");
printf("AcceleratorCudaInit: b) invokes through a wrapping script to set CUDA_VISIBLE_DEVICES, UCX_NET_DEVICES, and numa binding \n");
printf("AcceleratorCudaInit: Configure options --enable-setdevice=no \n");
}
@ -109,6 +110,7 @@ void acceleratorInit(void)
#ifdef GRID_HIP
hipDeviceProp_t *gpu_props;
hipStream_t copyStream;
void acceleratorInit(void)
{
int nDevices = 1;
@ -166,16 +168,25 @@ void acceleratorInit(void)
#ifdef GRID_DEFAULT_GPU
if ( world_rank == 0 ) {
printf("AcceleratorHipInit: using default device \n");
printf("AcceleratorHipInit: assume user either uses a wrapping script to set CUDA_VISIBLE_DEVICES, UCX_NET_DEVICES, and numa binding \n");
printf("AcceleratorHipInit: Configure options --enable-summit, --enable-select-gpu=no \n");
printf("AcceleratorHipInit: assume user or srun sets ROCR_VISIBLE_DEVICES and numa binding \n");
printf("AcceleratorHipInit: Configure options --enable-setdevice=no \n");
}
int device = 0;
#else
if ( world_rank == 0 ) {
printf("AcceleratorHipInit: rank %d setting device to node rank %d\n",world_rank,rank);
printf("AcceleratorHipInit: Configure options --enable-select-gpu=yes \n");
printf("AcceleratorHipInit: Configure options --enable-setdevice=yes \n");
}
hipSetDevice(rank);
int device = rank;
#endif
hipSetDevice(device);
hipStreamCreate(&copyStream);
const int len=64;
char busid[len];
if( rank == world_rank ) {
hipDeviceGetPCIBusId(busid, len, device);
printf("local rank %d device %d bus id: %s\n", rank, device, busid);
}
if ( world_rank == 0 ) printf("AcceleratorHipInit: ================================================\n");
}
#endif

View File

@ -230,6 +230,7 @@ inline void acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes
cudaMemcpyAsync(to,from,bytes, cudaMemcpyDeviceToDevice,copyStream);
}
inline void acceleratorCopySynchronise(void) { cudaStreamSynchronize(copyStream); };
inline int acceleratorIsCommunicable(void *ptr)
{
// int uvm=0;
@ -337,6 +338,7 @@ NAMESPACE_BEGIN(Grid);
#define accelerator __host__ __device__
#define accelerator_inline __host__ __device__ inline
extern hipStream_t copyStream;
/*These routines define mapping from thread grid to loop & vector lane indexing */
accelerator_inline int acceleratorSIMTlane(int Nsimd) {
#ifdef GRID_SIMT
@ -411,10 +413,16 @@ inline void acceleratorFreeShared(void *ptr){ hipFree(ptr);};
inline void acceleratorFreeDevice(void *ptr){ hipFree(ptr);};
inline void acceleratorCopyToDevice(void *from,void *to,size_t bytes) { hipMemcpy(to,from,bytes, hipMemcpyHostToDevice);}
inline void acceleratorCopyFromDevice(void *from,void *to,size_t bytes){ hipMemcpy(to,from,bytes, hipMemcpyDeviceToHost);}
inline void acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes) { hipMemcpy(to,from,bytes, hipMemcpyDeviceToDevice);}
inline void acceleratorCopySynchronise(void) { }
//inline void acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes) { hipMemcpy(to,from,bytes, hipMemcpyDeviceToDevice);}
//inline void acceleratorCopySynchronise(void) { }
inline void acceleratorMemSet(void *base,int value,size_t bytes) { hipMemset(base,value,bytes);}
inline void acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes) // Asynch
{
hipMemcpyAsync(to,from,bytes, hipMemcpyDeviceToDevice,copyStream);
}
inline void acceleratorCopySynchronise(void) { hipStreamSynchronize(copyStream); };
#endif
//////////////////////////////////////////////
@ -485,18 +493,12 @@ inline void acceleratorFreeCpu (void *ptr){free(ptr);};
///////////////////////////////////////////////////
// Synchronise across local threads for divergence resynch
///////////////////////////////////////////////////
accelerator_inline void acceleratorSynchronise(void)
accelerator_inline void acceleratorSynchronise(void) // Only Nvidia needs
{
#ifdef GRID_SIMT
#ifdef GRID_CUDA
__syncwarp();
#endif
#ifdef GRID_SYCL
//cl::sycl::detail::workGroupBarrier();
#endif
#ifdef GRID_HIP
__syncthreads();
#endif
#endif
return;
}

View File

@ -4,7 +4,7 @@ using namespace Grid;
template<class Field>
void SimpleConjugateGradient(LinearOperatorBase<Field> &HPDop,const Field &b, Field &x)
{
RealD cp, c, alpha, d, beta, ssq, qq;
RealD cp, c, alpha, d, beta, ssq;
RealD Tolerance=1.0e-10;
int MaxIterations=10000;

View File

@ -0,0 +1,539 @@
/*
* Warning: This code illustrative only: not well tested, and not meant for production use
* without regression / tests being applied
*/
#include <Grid/Grid.h>
using namespace std;
using namespace Grid;
typedef SpinColourMatrix Propagator;
typedef SpinColourVector Fermion;
typedef PeriodicGimplR GimplR;
template<class Gimpl,class Field> class CovariantLaplacianCshift : public SparseMatrixBase<Field>
{
public:
INHERIT_GIMPL_TYPES(Gimpl);
GridBase *grid;
GaugeField U;
CovariantLaplacianCshift(GaugeField &_U) :
grid(_U.Grid()),
U(_U) { };
virtual GridBase *Grid(void) { return grid; };
virtual void M (const Field &in, Field &out)
{
out=Zero();
for(int mu=0;mu<Nd-1;mu++) {
GaugeLinkField Umu = PeekIndex<LorentzIndex>(U, mu); // NB: Inefficent
out = out - Gimpl::CovShiftForward(Umu,mu,in);
out = out - Gimpl::CovShiftBackward(Umu,mu,in);
out = out + 2.0*in;
}
};
virtual void Mdag (const Field &in, Field &out) { M(in,out);}; // Laplacian is hermitian
virtual void Mdiag (const Field &in, Field &out) {assert(0);}; // Unimplemented need only for multigrid
virtual void Mdir (const Field &in, Field &out,int dir, int disp){assert(0);}; // Unimplemented need only for multigrid
virtual void MdirAll (const Field &in, std::vector<Field> &out) {assert(0);}; // Unimplemented need only for multigrid
};
void MakePhase(Coordinate mom,LatticeComplex &phase)
{
GridBase *grid = phase.Grid();
auto latt_size = grid->GlobalDimensions();
ComplexD ci(0.0,1.0);
phase=Zero();
LatticeComplex coor(phase.Grid());
for(int mu=0;mu<Nd;mu++){
RealD TwoPiL = M_PI * 2.0/ latt_size[mu];
LatticeCoordinate(coor,mu);
phase = phase + (TwoPiL * mom[mu]) * coor;
}
phase = exp(phase*ci);
}
void LinkSmear(int nstep, RealD rho,LatticeGaugeField &Uin,LatticeGaugeField &Usmr)
{
Smear_Stout<GimplR> Stout(rho);
LatticeGaugeField Utmp(Uin.Grid());
Utmp = Uin;
for(int i=0;i<nstep;i++){
Stout.smear(Usmr,Utmp);
Utmp = Usmr;
}
}
void PointSource(Coordinate &coor,LatticePropagator &source)
{
// Coordinate coor({0,0,0,0});
source=Zero();
SpinColourMatrix kronecker; kronecker=1.0;
pokeSite(kronecker,source,coor);
}
void GFWallSource(int tslice,LatticePropagator &source)
{
GridBase *grid = source.Grid();
LatticeComplex one(grid); one = ComplexD(1.0,0.0);
LatticeComplex zz(grid); zz=Zero();
LatticeInteger t(grid);
LatticeCoordinate(t,Tdir);
one = where(t==Integer(tslice), one, zz);
source = 1.0;
source = source * one;
}
void Z2WallSource(GridParallelRNG &RNG,int tslice,LatticePropagator &source)
{
GridBase *grid = source.Grid();
LatticeComplex noise(grid);
LatticeComplex zz(grid); zz=Zero();
LatticeInteger t(grid);
RealD nrm=1.0/sqrt(2);
bernoulli(RNG, noise); // 0,1 50:50
noise = (2.*noise - Complex(1,1))*nrm;
LatticeCoordinate(t,Tdir);
noise = where(t==Integer(tslice), noise, zz);
source = 1.0;
source = source*noise;
std::cout << " Z2 wall " << norm2(source) << std::endl;
}
void GaugeFix(LatticeGaugeField &U,LatticeGaugeField &Ufix)
{
Real alpha=0.05;
Real plaq=WilsonLoops<GimplR>::avgPlaquette(U);
std::cout << " Initial plaquette "<<plaq << std::endl;
LatticeColourMatrix xform(U.Grid());
Ufix = U;
int orthog=Nd-1;
FourierAcceleratedGaugeFixer<GimplR>::SteepestDescentGaugeFix(Ufix,xform,alpha,100000,1.0e-14, 1.0e-14,true,orthog);
plaq=WilsonLoops<GimplR>::avgPlaquette(Ufix);
std::cout << " Final plaquette "<<plaq << std::endl;
}
template<class Field>
void GaussianSmear(LatticeGaugeField &U,Field &unsmeared,Field &smeared)
{
typedef CovariantLaplacianCshift <GimplR,Field> Laplacian_t;
Laplacian_t Laplacian(U);
Integer Iterations = 40;
Real width = 2.0;
Real coeff = (width*width) / Real(4*Iterations);
Field tmp(U.Grid());
smeared=unsmeared;
// chi = (1-p^2/2N)^N kronecker
for(int n = 0; n < Iterations; ++n) {
Laplacian.M(smeared,tmp);
smeared = smeared - coeff*tmp;
std::cout << " smear iter " << n<<" " <<norm2(smeared)<<std::endl;
}
}
void GaussianSource(Coordinate &site,LatticeGaugeField &U,LatticePropagator &source)
{
LatticePropagator tmp(source.Grid());
PointSource(site,source);
std::cout << " GaussianSource Kronecker "<< norm2(source)<<std::endl;
tmp = source;
GaussianSmear(U,tmp,source);
std::cout << " GaussianSource Smeared "<< norm2(source)<<std::endl;
}
void GaussianWallSource(GridParallelRNG &RNG,int tslice,LatticeGaugeField &U,LatticePropagator &source)
{
Z2WallSource(RNG,tslice,source);
auto tmp = source;
GaussianSmear(U,tmp,source);
}
void SequentialSource(int tslice,Coordinate &mom,LatticePropagator &spectator,LatticePropagator &source)
{
assert(mom.size()==Nd);
assert(mom[Tdir] == 0);
GridBase * grid = spectator.Grid();
LatticeInteger ts(grid);
LatticeCoordinate(ts,Tdir);
source = Zero();
source = where(ts==Integer(tslice),spectator,source); // Stick in a slice of the spectator, zero everywhere else
LatticeComplex phase(grid);
MakePhase(mom,phase);
source = source *phase;
}
template<class Action>
void Solve(Action &D,LatticePropagator &source,LatticePropagator &propagator)
{
GridBase *UGrid = D.GaugeGrid();
GridBase *FGrid = D.FermionGrid();
LatticeFermion src4 (UGrid);
LatticeFermion src5 (FGrid);
LatticeFermion result5(FGrid);
LatticeFermion result4(UGrid);
ConjugateGradient<LatticeFermion> CG(1.0e-12,100000);
SchurRedBlackDiagTwoSolve<LatticeFermion> schur(CG);
ZeroGuesser<LatticeFermion> ZG; // Could be a DeflatedGuesser if have eigenvectors
for(int s=0;s<Nd;s++){
for(int c=0;c<Nc;c++){
PropToFerm<Action>(src4,source,s,c);
D.ImportPhysicalFermionSource(src4,src5);
result5=Zero();
schur(D,src5,result5,ZG);
std::cout<<GridLogMessage
<<"spin "<<s<<" color "<<c
<<" norm2(src5d) " <<norm2(src5)
<<" norm2(result5d) "<<norm2(result5)<<std::endl;
D.ExportPhysicalFermionSolution(result5,result4);
FermToProp<Action>(propagator,result4,s,c);
}
}
}
class MesonFile: Serializable {
public:
GRID_SERIALIZABLE_CLASS_MEMBERS(MesonFile, std::vector<std::vector<Complex> >, data);
};
void MesonTrace(std::string file,LatticePropagator &q1,LatticePropagator &q2,LatticeComplex &phase)
{
const int nchannel=4;
Gamma::Algebra Gammas[nchannel][2] = {
{Gamma::Algebra::Gamma5 ,Gamma::Algebra::Gamma5},
{Gamma::Algebra::GammaTGamma5,Gamma::Algebra::GammaTGamma5},
{Gamma::Algebra::GammaTGamma5,Gamma::Algebra::Gamma5},
{Gamma::Algebra::Gamma5 ,Gamma::Algebra::GammaTGamma5}
};
Gamma G5(Gamma::Algebra::Gamma5);
LatticeComplex meson_CF(q1.Grid());
MesonFile MF;
for(int ch=0;ch<nchannel;ch++){
Gamma Gsrc(Gammas[ch][0]);
Gamma Gsnk(Gammas[ch][1]);
meson_CF = trace(G5*adj(q1)*G5*Gsnk*q2*adj(Gsrc));
std::vector<TComplex> meson_T;
sliceSum(meson_CF,meson_T, Tdir);
int nt=meson_T.size();
std::vector<Complex> corr(nt);
for(int t=0;t<nt;t++){
corr[t] = TensorRemove(meson_T[t]); // Yes this is ugly, not figured a work around
std::cout << " channel "<<ch<<" t "<<t<<" " <<corr[t]<<std::endl;
}
MF.data.push_back(corr);
}
{
XmlWriter WR(file);
write(WR,"MesonFile",MF);
}
}
void Meson3pt(std::string file,LatticePropagator &q1,LatticePropagator &q2,LatticeComplex &phase)
{
const int nchannel=4;
Gamma::Algebra Gammas[nchannel][2] = {
{Gamma::Algebra::Gamma5 ,Gamma::Algebra::GammaX},
{Gamma::Algebra::Gamma5 ,Gamma::Algebra::GammaY},
{Gamma::Algebra::Gamma5 ,Gamma::Algebra::GammaZ},
{Gamma::Algebra::Gamma5 ,Gamma::Algebra::GammaT}
};
Gamma G5(Gamma::Algebra::Gamma5);
LatticeComplex meson_CF(q1.Grid());
MesonFile MF;
for(int ch=0;ch<nchannel;ch++){
Gamma Gsrc(Gammas[ch][0]);
Gamma Gsnk(Gammas[ch][1]);
meson_CF = trace(G5*adj(q1)*G5*Gsnk*q2*adj(Gsrc));
std::vector<TComplex> meson_T;
sliceSum(meson_CF,meson_T, Tdir);
int nt=meson_T.size();
std::vector<Complex> corr(nt);
for(int t=0;t<nt;t++){
corr[t] = TensorRemove(meson_T[t]); // Yes this is ugly, not figured a work around
std::cout << " channel "<<ch<<" t "<<t<<" " <<corr[t]<<std::endl;
}
MF.data.push_back(corr);
}
{
XmlWriter WR(file);
write(WR,"MesonFile",MF);
}
}
void WallSinkMesonTrace(std::string file,std::vector<Propagator> &q1,std::vector<Propagator> &q2)
{
const int nchannel=4;
Gamma::Algebra Gammas[nchannel][2] = {
{Gamma::Algebra::Gamma5 ,Gamma::Algebra::Gamma5},
{Gamma::Algebra::GammaTGamma5,Gamma::Algebra::GammaTGamma5},
{Gamma::Algebra::GammaTGamma5,Gamma::Algebra::Gamma5},
{Gamma::Algebra::Gamma5 ,Gamma::Algebra::GammaTGamma5}
};
Gamma G5(Gamma::Algebra::Gamma5);
int nt=q1.size();
std::vector<Complex> meson_CF(nt);
MesonFile MF;
for(int ch=0;ch<nchannel;ch++){
Gamma Gsrc(Gammas[ch][0]);
Gamma Gsnk(Gammas[ch][1]);
std::vector<Complex> corr(nt);
for(int t=0;t<nt;t++){
meson_CF[t] = trace(G5*adj(q1[t])*G5*Gsnk*q2[t]*adj(Gsrc));
corr[t] = TensorRemove(meson_CF[t]); // Yes this is ugly, not figured a work around
std::cout << " channel "<<ch<<" t "<<t<<" " <<corr[t]<<std::endl;
}
MF.data.push_back(corr);
}
{
XmlWriter WR(file);
write(WR,"MesonFile",MF);
}
}
int make_idx(int p, int m,int nmom)
{
if (m==0) return p;
assert(p==0);
return nmom + m - 1;
}
int main (int argc, char ** argv)
{
Grid_init(&argc,&argv);
// Double precision grids
auto latt = GridDefaultLatt();
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(),
GridDefaultSimd(Nd,vComplex::Nsimd()),
GridDefaultMpi());
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
LatticeGaugeField Umu(UGrid);
LatticeGaugeField Utmp(UGrid);
LatticeGaugeField Usmr(UGrid);
std::string config;
if( argc > 1 && argv[1][0] != '-' )
{
std::cout<<GridLogMessage <<"Loading configuration from "<<argv[1]<<std::endl;
FieldMetaData header;
NerscIO::readConfiguration(Umu, header, argv[1]);
config=argv[1];
}
else
{
std::cout<<GridLogMessage <<"Using hot configuration"<<std::endl;
SU<Nc>::ColdConfiguration(Umu);
config="ColdConfig";
}
// GaugeFix(Umu,Utmp);
// Umu=Utmp;
int nsmr=3;
RealD rho=0.1;
LinkSmear(nsmr,rho,Umu,Usmr);
std::vector<int> smeared_link({ 0,0,1} );
std::vector<RealD> masses({ 0.004,0.02477,0.447} ); // u/d, s, c ??
std::vector<RealD> M5s ({ 1.8,1.8,1.0} );
std::vector<RealD> bs ({ 1.0,1.0,1.5} ); // DDM
std::vector<RealD> cs ({ 0.0,0.0,0.5} ); // DDM
std::vector<int> Ls_s ({ 16,16,12} );
std::vector<GridCartesian *> FGrids;
std::vector<GridRedBlackCartesian *> FrbGrids;
std::vector<Coordinate> momenta;
momenta.push_back(Coordinate({0,0,0,0}));
momenta.push_back(Coordinate({1,0,0,0}));
momenta.push_back(Coordinate({2,0,0,0}));
int nmass = masses.size();
int nmom = momenta.size();
std::vector<MobiusFermionR *> FermActs;
std::cout<<GridLogMessage <<"======================"<<std::endl;
std::cout<<GridLogMessage <<"MobiusFermion action as Scaled Shamir kernel"<<std::endl;
std::cout<<GridLogMessage <<"======================"<<std::endl;
std::vector<Complex> boundary = {1,1,1,-1};
typedef MobiusFermionR FermionAction;
FermionAction::ImplParams Params(boundary);
for(int m=0;m<masses.size();m++) {
RealD mass = masses[m];
RealD M5 = M5s[m];
RealD b = bs[m];
RealD c = cs[m];
int Ls = Ls_s[m];
if ( smeared_link[m] ) Utmp = Usmr;
else Utmp = Umu;
FGrids.push_back(SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid));
FrbGrids.push_back(SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid));
FermActs.push_back(new MobiusFermionR(Utmp,*FGrids[m],*FrbGrids[m],*UGrid,*UrbGrid,mass,M5,b,c,Params));
}
LatticePropagator z2wall_source(UGrid);
LatticePropagator gfwall_source(UGrid);
LatticePropagator phased_prop(UGrid);
int tslice = 0;
int tseq=(tslice+16)%latt[Nd-1];
//////////////////////////////////////////////////////////////////////
// RNG seeded for Z2 wall
//////////////////////////////////////////////////////////////////////
// You can manage seeds however you like.
// Recommend SeedUniqueString.
//////////////////////////////////////////////////////////////////////
GridParallelRNG RNG4(UGrid); RNG4.SeedUniqueString("Study2-Source_Z2_p_0_0_0_t_0-880");
Z2WallSource (RNG4,tslice,z2wall_source);
GFWallSource (tslice,gfwall_source);
std::vector<LatticeComplex> phase(nmom,UGrid);
for(int m=0;m<nmom;m++){
MakePhase(momenta[m],phase[m]);
}
std::vector<LatticePropagator> Z2Props (nmom+nmass-1,UGrid);
std::vector<LatticePropagator> GFProps (nmom+nmass-1,UGrid);
for(int p=0;p<nmom;p++) {
int m=0;
int idx = make_idx(p,m,nmom);
phased_prop = z2wall_source * phase[p];
Solve(*FermActs[m],phased_prop ,Z2Props[idx]);
phased_prop = gfwall_source * phase[p];
Solve(*FermActs[m],phased_prop ,GFProps[idx]);
}
for(int m=1;m<nmass;m++) {
int p=0;
int idx = make_idx(p,m,nmom);
phased_prop = z2wall_source;
Solve(*FermActs[m],phased_prop ,Z2Props[idx]);
phased_prop = gfwall_source;
Solve(*FermActs[m],phased_prop ,GFProps[idx]);
}
std::vector<std::vector<Propagator> > wsnk_z2Props(nmom+nmass-1);
std::vector<std::vector<Propagator> > wsnk_gfProps(nmom+nmass-1);
// Non-zero kaon and point and D two point
// WW stick momentum on m1 (lighter)
// zero momentum on m2
for(int m1=0;m1<nmass;m1++) {
for(int m2=m1;m2<nmass;m2++) {
int pmax = (m1==0)? nmom:1;
for(int p=0;p<pmax;p++){
std::stringstream ssg,ssz;
std::stringstream wssg,wssz;
int idx1 = make_idx(p,m1,nmom);
int idx2 = make_idx(0,m2,nmom);
/// Point sinks
ssg<<config<<"_p"<<p<< "_m" << m1 << "_m"<< m2 << "_p_gf_meson.xml";
ssz<<config<<"_p"<<p<< "_m" << m1 << "_m"<< m2 << "_p_z2_meson.xml";
MesonTrace(ssz.str(),Z2Props[idx1],Z2Props[idx2],phase[p]); // Q1 is conjugated
MesonTrace(ssg.str(),GFProps[idx1],GFProps[idx2],phase[p]);
/// Wall sinks
wssg<<config<<"_p"<<p<< "_m" << m1 << "_m"<< m2 << "_w_gf_meson.xml";
wssz<<config<<"_p"<<p<< "_m" << m1 << "_m"<< m2 << "_w_z2_meson.xml";
phased_prop = GFProps[m2] * phase[p];
sliceSum(phased_prop,wsnk_gfProps[m1],Tdir);
sliceSum(GFProps[m1],wsnk_gfProps[m2],Tdir);
WallSinkMesonTrace(wssg.str(),wsnk_gfProps[m1],wsnk_gfProps[m2]);
phased_prop = Z2Props[m2] * phase[p];
sliceSum(phased_prop,wsnk_gfProps[m1],Tdir);
sliceSum(Z2Props[m1],wsnk_gfProps[m2],Tdir);
WallSinkMesonTrace(wssz.str(),wsnk_z2Props[m1],wsnk_z2Props[m2]);
}
}}
/////////////////////////////////////
// Sequential solves
/////////////////////////////////////
LatticePropagator seq_wsnk_z2src(UGrid);
LatticePropagator seq_wsnk_gfsrc(UGrid);
LatticePropagator seq_psnk_z2src(UGrid);
LatticePropagator seq_psnk_gfsrc(UGrid);
LatticePropagator source(UGrid);
for(int m=0;m<nmass-1;m++){
int spect_idx = make_idx(0,m,nmom);
int charm=nmass-1;
SequentialSource(tseq,momenta[0],GFProps[spect_idx],source);
Solve(*FermActs[charm],source,seq_psnk_gfsrc);
SequentialSource(tseq,momenta[0],Z2Props[spect_idx],source);
Solve(*FermActs[charm],source,seq_psnk_z2src);
// Todo need wall sequential solve
for(int p=0;p<nmom;p++){
int active_idx = make_idx(p,0,nmom);
std::stringstream seq_3pt_p_z2;
std::stringstream seq_3pt_p_gf;
std::stringstream seq_3pt_w_z2;
std::stringstream seq_3pt_w_gf;
seq_3pt_p_z2 <<config<<"_3pt_p"<<p<< "_m" << m << "_p_z2_meson.xml";
seq_3pt_p_gf <<config<<"_3pt_p"<<p<< "_m" << m << "_p_gf_meson.xml";
seq_3pt_w_z2 <<config<<"_3pt_p"<<p<< "_m" << m << "_w_z2_meson.xml";
seq_3pt_w_gf <<config<<"_3pt_p"<<p<< "_m" << m << "_w_gf_meson.xml";
Meson3pt(seq_3pt_p_gf.str(),GFProps[active_idx],seq_psnk_gfsrc,phase[p]);
Meson3pt(seq_3pt_p_z2.str(),Z2Props[active_idx],seq_psnk_z2src,phase[p]);
}
}
Grid_finalize();
}

26
systems/Spock/comms.slurm Normal file
View File

@ -0,0 +1,26 @@
#!/bin/bash
# Begin LSF Directives
#SBATCH -A LGT104
#SBATCH -t 01:00:00
##SBATCH -U openmpThu
#SBATCH -p ecp
#SBATCH -J comms
#SBATCH -o comms.%J
#SBATCH -e comms.%J
#SBATCH -N 1
#SBATCH -n 2
DIR=.
module list
export MPIR_CVAR_GPU_EAGER_DEVICE_MEM=0
export MPICH_GPU_SUPPORT_ENABLED=1
#export MPICH_SMP_SINGLE_COPY_MODE=XPMEM
#export MPICH_SMP_SINGLE_COPY_MODE=CMA
export MPICH_SMP_SINGLE_COPY_MODE=NONE
export OMP_NUM_THREADS=8
AT=8
echo MPICH_SMP_SINGLE_COPY_MODE $MPICH_SMP_SINGLE_COPY_MODE
PARAMS=" --accelerator-threads ${AT} --grid 64.64.32.32 --mpi 2.1.1.1 "
srun -n2 --label -c$OMP_NUM_THREADS --gpus-per-task=1 ./mpiwrapper.sh ./benchmarks/Benchmark_comms_host_device $PARAMS

View File

@ -0,0 +1,12 @@
../../configure --enable-comms=mpi-auto \
--enable-unified=no \
--enable-shm=nvlink \
--enable-accelerator=hip \
--enable-gen-simd-width=64 \
--enable-simd=GPU \
--disable-fermion-reps \
--disable-gparity \
CXX=hipcc MPICXX=mpicxx \
CXXFLAGS="-fPIC -I/opt/rocm-4.3.0/include/ -std=c++14 -I${MPICH_DIR}/include " \
--prefix=/ccs/home/chulwoo/Grid \
LDFLAGS=" -L${MPICH_DIR}/lib -lmpi -L${CRAY_MPICH_ROOTDIR}/gtl/lib -lmpi_gtl_hsa "

26
systems/Spock/dwf.slurm Normal file
View File

@ -0,0 +1,26 @@
#!/bin/bash
# Begin LSF Directives
#SBATCH -A LGT104
#SBATCH -t 01:00:00
##SBATCH -U openmpThu
#SBATCH -p ecp
#SBATCH -J DWF
#SBATCH -o DWF.%J
#SBATCH -e DWF.%J
#SBATCH -N 1
#SBATCH -n 1
DIR=.
module list
export MPIR_CVAR_GPU_EAGER_DEVICE_MEM=0
export MPICH_GPU_SUPPORT_ENABLED=1
#export MPICH_SMP_SINGLE_COPY_MODE=XPMEM
#export MPICH_SMP_SINGLE_COPY_MODE=NONE
export MPICH_SMP_SINGLE_COPY_MODE=CMA
export OMP_NUM_THREADS=8
AT=8
echo MPICH_SMP_SINGLE_COPY_MODE $MPICH_SMP_SINGLE_COPY_MODE
PARAMS=" --accelerator-threads ${AT} --grid 32.32.32.32 --mpi 1.1.1.1 --comms-overlap"
srun -n1 --label -c$OMP_NUM_THREADS --gpus-per-task=1 ./mpiwrapper.sh ./benchmarks/Benchmark_dwf_fp32 $PARAMS

26
systems/Spock/dwf4.slurm Normal file
View File

@ -0,0 +1,26 @@
#!/bin/bash
# Begin LSF Directives
#SBATCH -A LGT104
#SBATCH -t 01:00:00
##SBATCH -U openmpThu
#SBATCH -p ecp
#SBATCH -J DWF
#SBATCH -o DWF.%J
#SBATCH -e DWF.%J
#SBATCH -N 1
#SBATCH -n 4
DIR=.
module list
export MPIR_CVAR_GPU_EAGER_DEVICE_MEM=0
export MPICH_GPU_SUPPORT_ENABLED=1
#export MPICH_SMP_SINGLE_COPY_MODE=XPMEM
export MPICH_SMP_SINGLE_COPY_MODE=NONE
#export MPICH_SMP_SINGLE_COPY_MODE=CMA
export OMP_NUM_THREADS=8
AT=8
echo MPICH_SMP_SINGLE_COPY_MODE $MPICH_SMP_SINGLE_COPY_MODE
PARAMS=" --accelerator-threads ${AT} --grid 32.32.64.64 --mpi 1.1.2.2 --comms-overlap --shm 2048 --shm-mpi 0"
srun -n4 --label -c$OMP_NUM_THREADS --gpus-per-task=1 ./mpiwrapper.sh ./benchmarks/Benchmark_dwf_fp32 $PARAMS

26
systems/Spock/dwf8.slurm Normal file
View File

@ -0,0 +1,26 @@
#!/bin/bash
# Begin LSF Directives
#SBATCH -A LGT104
#SBATCH -t 01:00:00
##SBATCH -U openmpThu
#SBATCH -p ecp
#SBATCH -J DWF
#SBATCH -o DWF.%J
#SBATCH -e DWF.%J
#SBATCH -N 2
#SBATCH -n 8
DIR=.
module list
export MPIR_CVAR_GPU_EAGER_DEVICE_MEM=0
export MPICH_GPU_SUPPORT_ENABLED=1
#export MPICH_SMP_SINGLE_COPY_MODE=XPMEM
export MPICH_SMP_SINGLE_COPY_MODE=NONE
#export MPICH_SMP_SINGLE_COPY_MODE=CMA
export OMP_NUM_THREADS=8
AT=8
echo MPICH_SMP_SINGLE_COPY_MODE $MPICH_SMP_SINGLE_COPY_MODE
PARAMS=" --accelerator-threads ${AT} --grid 32.64.64.64 --mpi 1.2.2.2 --comms-overlap --shm 2048 --shm-mpi 0"
srun -n8 --label -c$OMP_NUM_THREADS --gpus-per-task=1 ./mpiwrapper.sh ./benchmarks/Benchmark_dwf_fp32 $PARAMS

12
systems/Spock/mpiwrapper.sh Executable file
View File

@ -0,0 +1,12 @@
#!/bin/bash
lrank=$SLURM_LOCALID
export ROCR_VISIBLE_DEVICES=$SLURM_LOCALID
echo "`hostname` - $lrank device=$ROCR_VISIBLE_DEVICES binding=$BINDING"
$*

View File

@ -0,0 +1,5 @@
module load PrgEnv-gnu
module load rocm/4.3.0
module load gmp
module load cray-fftw
module load craype-accel-amd-gfx908

View File

@ -235,7 +235,6 @@ void TestWhat(What & Ddwf,
pickCheckerboard(Odd ,chi_o,chi);
pickCheckerboard(Even,phi_e,phi);
pickCheckerboard(Odd ,phi_o,phi);
RealD t1,t2;
SchurDiagMooeeOperator<What,LatticeFermion> HermOpEO(Ddwf);
HermOpEO.MpcDagMpc(chi_e,dchi_e);

View File

@ -215,7 +215,6 @@ int main (int argc, char ** argv)
pickCheckerboard(Odd , chi_o, chi);
pickCheckerboard(Even, phi_e, phi);
pickCheckerboard(Odd , phi_o, phi);
RealD t1,t2;
SchurDiagMooeeOperator<DomainWallEOFAFermionR,LatticeFermion> HermOpEO(Ddwf);
HermOpEO.MpcDagMpc(chi_e, dchi_e);

View File

@ -212,8 +212,6 @@ int main (int argc, char ** argv)
pickCheckerboard(Odd ,chi_o,chi);
pickCheckerboard(Even,phi_e,phi);
pickCheckerboard(Odd ,phi_o,phi);
RealD t1,t2;
SchurDiagMooeeOperator<DomainWallFermionR,LatticeFermion> HermOpEO(Ddwf);
HermOpEO.MpcDagMpc(chi_e,dchi_e);

View File

@ -181,8 +181,8 @@ void checkAdj(const Gamma::Algebra a)
void checkProject(GridSerialRNG &rng)
{
SpinVector rv, recon, full;
HalfSpinVector hsp, hsm;
SpinVector rv, recon;
HalfSpinVector hsm;
random(rng, rv);

View File

@ -198,7 +198,6 @@ int main (int argc, char ** argv)
pickCheckerboard(Odd ,chi_o,chi);
pickCheckerboard(Even,phi_e,phi);
pickCheckerboard(Odd ,phi_o,phi);
RealD t1,t2;
SchurDiagMooeeOperator<GparityWilsonFermionR,FermionField> HermOpEO(Dw);
HermOpEO.MpcDagMpc(chi_e,dchi_e);

View File

@ -364,14 +364,12 @@ int main(int argc, char **argv) {
{ // Peek-ology and Poke-ology, with a little app-ology
Complex c;
ColourMatrix c_m;
SpinMatrix s_m;
SpinColourMatrix sc_m;
ColourMatrix c_m = Zero();
SpinMatrix s_m = Zero();
SpinColourMatrix sc_m = Zero();
s_m = TensorIndexRecursion<ColourIndex>::traceIndex(
sc_m); // Map to traceColour
c_m = TensorIndexRecursion<SpinIndex>::traceIndex(
sc_m); // map to traceSpin
s_m = TensorIndexRecursion<ColourIndex>::traceIndex(sc_m); // Map to traceColour
c_m = TensorIndexRecursion<SpinIndex>::traceIndex(sc_m); // map to traceSpin
c = TensorIndexRecursion<SpinIndex>::traceIndex(s_m);
c = TensorIndexRecursion<ColourIndex>::traceIndex(c_m);

View File

@ -217,7 +217,6 @@ int main (int argc, char ** argv)
pickCheckerboard(Odd , chi_o, chi);
pickCheckerboard(Even, phi_e, phi);
pickCheckerboard(Odd , phi_o, phi);
RealD t1,t2;
SchurDiagMooeeOperator<MobiusEOFAFermionR,LatticeFermion> HermOpEO(Ddwf);
HermOpEO.MpcDagMpc(chi_e, dchi_e);

View File

@ -262,7 +262,6 @@ int main (int argc, char ** argv)
pickCheckerboard(Odd ,chi_o,chi);
pickCheckerboard(Even,phi_e,phi);
pickCheckerboard(Odd ,phi_o,phi);
RealD t1,t2;
SchurDiagMooeeOperator<MobiusFermionR,LatticeFermion> HermOpEO(Ddwf);

View File

@ -144,7 +144,7 @@ int main (int argc, char ** argv)
Ds.Dhop(src,result,0);
}
double t1=usecond();
double t2;
double flops=(16*(3*(6+8+8)) + 15*3*2)*volume*ncall; // == 66*16 + == 1146
std::cout<<GridLogMessage << "Called Ds"<<std::endl;

View File

@ -162,7 +162,6 @@ int main (int argc, char ** argv)
}
double t1=usecond();
double t2;
double flops=(16*(3*(6+8+8)) + 15*3*2)*volume*ncall; // == 66*16 + == 1146
std::cout<<GridLogMessage << "Called Ds"<<std::endl;

View File

@ -30,7 +30,6 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
using namespace std;
using namespace Grid;
;
int main (int argc, char ** argv)
{
@ -135,7 +134,6 @@ int main (int argc, char ** argv)
Ds.Dhop(src,result,0);
}
double t1=usecond();
double t2;
double flops=(16*(3*(6+8+8)) + 15*3*2)*volume*ncall; // == 66*16 + == 1146
std::cout<<GridLogMessage << "Called Ds"<<std::endl;

View File

@ -204,7 +204,6 @@ int main (int argc, char ** argv)
pickCheckerboard(Odd ,chi_o,chi);
pickCheckerboard(Even,phi_e,phi);
pickCheckerboard(Odd ,phi_o,phi);
RealD t1,t2;
SchurDiagMooeeOperator<WilsonFermionR,LatticeFermion> HermOpEO(Dw);
HermOpEO.MpcDagMpc(chi_e,dchi_e);

View File

@ -205,7 +205,6 @@ int main (int argc, char ** argv)
pickCheckerboard(Odd ,chi_o,chi);
pickCheckerboard(Even,phi_e,phi);
pickCheckerboard(Odd ,phi_o,phi);
RealD t1,t2;
SchurDiagMooeeOperator<WilsonTMFermionR,LatticeFermion> HermOpEO(Dw);
HermOpEO.MpcDagMpc(chi_e,dchi_e);

View File

@ -276,7 +276,6 @@ int main (int argc, char ** argv)
pickCheckerboard(Odd ,chi_o,chi);
pickCheckerboard(Even,phi_e,phi);
pickCheckerboard(Odd ,phi_o,phi);
RealD t1,t2;
SchurDiagMooeeOperator<ZMobiusFermionR,LatticeFermion> HermOpEO(Ddwf);

View File

@ -57,7 +57,6 @@ int main (int argc, char ** argv)
SU<Nc>::HotConfiguration(pRNG,U);
double beta = 1.0;
double c1 = -0.331;
IwasakiGaugeActionR Action(beta);
// PlaqPlusRectangleActionR Action(beta,c1);

View File

@ -40,6 +40,7 @@ using namespace Grid;
template<class Fobj,class CComplex,int nbasis>
class ProjectedHermOp : public LinearFunction<Lattice<iVector<CComplex,nbasis > > > {
public:
using LinearFunction<Lattice<iVector<CComplex,nbasis > > >::operator();
typedef iVector<CComplex,nbasis > CoarseSiteVector;
typedef Lattice<CoarseSiteVector> CoarseField;
typedef Lattice<CComplex> CoarseScalar; // used for inner products on fine field
@ -67,6 +68,8 @@ public:
template<class Fobj,class CComplex,int nbasis>
class ProjectedFunctionHermOp : public LinearFunction<Lattice<iVector<CComplex,nbasis > > > {
public:
using LinearFunction<Lattice<iVector<CComplex,nbasis > > >::operator ();
typedef iVector<CComplex,nbasis > CoarseSiteVector;
typedef Lattice<CoarseSiteVector> CoarseField;
typedef Lattice<CComplex> CoarseScalar; // used for inner products on fine field

View File

@ -55,6 +55,7 @@ RealD InverseApproximation(RealD x){
template<class Field,class Matrix> class ChebyshevSmoother : public LinearFunction<Field>
{
public:
using LinearFunction<Field>::operator();
typedef LinearOperatorBase<Field> FineOperator;
Matrix & _SmootherMatrix;
FineOperator & _SmootherOperator;
@ -78,6 +79,7 @@ public:
template<class Field,class Matrix> class MirsSmoother : public LinearFunction<Field>
{
public:
using LinearFunction<Field>::operator();
typedef LinearOperatorBase<Field> FineOperator;
Matrix & SmootherMatrix;
FineOperator & SmootherOperator;
@ -108,6 +110,7 @@ public:
template<class Fobj,class CComplex,int nbasis, class Matrix, class Guesser, class CoarseSolver>
class MultiGridPreconditioner : public LinearFunction< Lattice<Fobj> > {
public:
using LinearFunction<Lattice<Fobj> >::operator();
typedef Aggregation<Fobj,CComplex,nbasis> Aggregates;
typedef CoarsenedMatrix<Fobj,CComplex,nbasis> CoarseOperator;

View File

@ -56,9 +56,9 @@ template<class Field> class SolverWrapper : public LinearFunction<Field> {
private:
CheckerBoardedSparseMatrixBase<Field> & _Matrix;
SchurRedBlackBase<Field> & _Solver;
public:
/////////////////////////////////////////////////////
public:
using LinearFunction<Field>::operator();
/////////////////////////////////////////////////////
// Wrap the usual normal equations trick
/////////////////////////////////////////////////////
SolverWrapper(CheckerBoardedSparseMatrixBase<Field> &Matrix,
@ -75,6 +75,7 @@ public:
template<class Field,class Matrix> class ChebyshevSmoother : public LinearFunction<Field>
{
public:
using LinearFunction<Field>::operator();
typedef LinearOperatorBase<Field> FineOperator;
Matrix & _SmootherMatrix;
FineOperator & _SmootherOperator;
@ -98,6 +99,7 @@ public:
template<class Field,class Matrix> class MirsSmoother : public LinearFunction<Field>
{
public:
using LinearFunction<Field>::operator();
typedef LinearOperatorBase<Field> FineOperator;
Matrix & SmootherMatrix;
FineOperator & SmootherOperator;
@ -128,6 +130,7 @@ public:
template<class Fobj,class CComplex,int nbasis, class Matrix, class Guesser, class CoarseSolver>
class MultiGridPreconditioner : public LinearFunction< Lattice<Fobj> > {
public:
using LinearFunction<Lattice<Fobj> >::operator();
typedef Aggregation<Fobj,CComplex,nbasis> Aggregates;
typedef CoarsenedMatrix<Fobj,CComplex,nbasis> CoarseOperator;

View File

@ -55,6 +55,7 @@ RealD InverseApproximation(RealD x){
template<class Field,class Matrix> class ChebyshevSmoother : public LinearFunction<Field>
{
public:
using LinearFunction<Field>::operator();
typedef LinearOperatorBase<Field> FineOperator;
Matrix & _SmootherMatrix;
FineOperator & _SmootherOperator;
@ -78,6 +79,7 @@ public:
template<class Field,class Matrix> class MirsSmoother : public LinearFunction<Field>
{
public:
using LinearFunction<Field>::operator();
typedef LinearOperatorBase<Field> FineOperator;
Matrix & SmootherMatrix;
FineOperator & SmootherOperator;
@ -108,6 +110,8 @@ public:
template<class Fobj,class CComplex,int nbasis, class Matrix, class Guesser, class CoarseSolver>
class MultiGridPreconditioner : public LinearFunction< Lattice<Fobj> > {
public:
using LinearFunction<Lattice<Fobj> >::operator();
typedef Aggregation<Fobj,CComplex,nbasis> Aggregates;
typedef CoarsenedMatrix<Fobj,CComplex,nbasis> CoarseOperator;

View File

@ -56,6 +56,7 @@ RealD InverseApproximation(RealD x){
template<class Field,class Matrix> class ChebyshevSmoother : public LinearFunction<Field>
{
public:
using LinearFunction<Field>::operator();
typedef LinearOperatorBase<Field> FineOperator;
Matrix & _SmootherMatrix;
FineOperator & _SmootherOperator;
@ -79,6 +80,7 @@ public:
template<class Field,class Matrix> class MirsSmoother : public LinearFunction<Field>
{
public:
using LinearFunction<Field>::operator();
typedef LinearOperatorBase<Field> FineOperator;
Matrix & SmootherMatrix;
FineOperator & SmootherOperator;
@ -108,6 +110,7 @@ public:
template<class Field,class Matrix> class RedBlackSmoother : public LinearFunction<Field>
{
public:
using LinearFunction<Field>::operator();
typedef LinearOperatorBase<Field> FineOperator;
Matrix & SmootherMatrix;
RealD tol;
@ -134,6 +137,7 @@ public:
template<class Fobj,class CComplex,int nbasis, class Matrix, class Guesser, class CoarseSolver>
class MultiGridPreconditioner : public LinearFunction< Lattice<Fobj> > {
public:
using LinearFunction<Lattice<Fobj> >::operator();
typedef Aggregation<Fobj,CComplex,nbasis> Aggregates;
typedef CoarsenedMatrix<Fobj,CComplex,nbasis> CoarseOperator;
@ -241,7 +245,7 @@ int main (int argc, char ** argv)
Grid_init(&argc,&argv);
const int Ls=16;
const int rLs=8;
// const int rLs=8;
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplex::Nsimd()),GridDefaultMpi());
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
@ -388,7 +392,7 @@ int main (int argc, char ** argv)
// RedBlackSmoother<LatticeFermion,DomainWallFermionR> FineRBSmoother(0.00,0.001,100,Ddwf);
// Wrap the 2nd level solver in a MultiGrid preconditioner acting on the fine space
ZeroGuesser<CoarseVector> CoarseZeroGuesser;
// ZeroGuesser<CoarseVector> CoarseZeroGuesser;
TwoLevelMG TwoLevelPrecon(Aggregates, LDOp,
HermIndefOp,Ddwf,
FineSmoother,

View File

@ -57,7 +57,7 @@ private:
CheckerBoardedSparseMatrixBase<Field> & _Matrix;
SchurRedBlackBase<Field> & _Solver;
public:
using LinearFunction<Field>::operator();
/////////////////////////////////////////////////////
// Wrap the usual normal equations trick
/////////////////////////////////////////////////////
@ -75,6 +75,7 @@ public:
template<class Field,class Matrix> class ChebyshevSmoother : public LinearFunction<Field>
{
public:
using LinearFunction<Field>::operator();
typedef LinearOperatorBase<Field> FineOperator;
Matrix & _SmootherMatrix;
FineOperator & _SmootherOperator;
@ -98,6 +99,7 @@ public:
template<class Field,class Matrix> class MirsSmoother : public LinearFunction<Field>
{
public:
using LinearFunction<Field>::operator();
typedef LinearOperatorBase<Field> FineOperator;
Matrix & SmootherMatrix;
FineOperator & SmootherOperator;
@ -128,6 +130,7 @@ public:
template<class Fobj,class CComplex,int nbasis, class Matrix, class Guesser, class CoarseSolver>
class MultiGridPreconditioner : public LinearFunction< Lattice<Fobj> > {
public:
using LinearFunction<Lattice<Fobj> >::operator();
typedef Aggregation<Fobj,CComplex,nbasis> Aggregates;
typedef CoarsenedMatrix<Fobj,CComplex,nbasis> CoarseOperator;

View File

@ -55,6 +55,7 @@ RealD InverseApproximation(RealD x){
template<class Field,class Matrix> class ChebyshevSmoother : public LinearFunction<Field>
{
public:
using LinearFunction<Field>::operator();
typedef LinearOperatorBase<Field> FineOperator;
Matrix & _SmootherMatrix;
FineOperator & _SmootherOperator;
@ -78,6 +79,7 @@ public:
template<class Field,class Matrix> class MirsSmoother : public LinearFunction<Field>
{
public:
using LinearFunction<Field>::operator();
typedef LinearOperatorBase<Field> FineOperator;
Matrix & SmootherMatrix;
FineOperator & SmootherOperator;
@ -108,6 +110,7 @@ public:
template<class Fobj,class CComplex,int nbasis, class Matrix, class Guesser, class CoarseSolver>
class MultiGridPreconditioner : public LinearFunction< Lattice<Fobj> > {
public:
using LinearFunction<Lattice<Fobj> >::operator();
typedef Aggregation<Fobj,CComplex,nbasis> Aggregates;
typedef CoarsenedMatrix<Fobj,CComplex,nbasis> CoarseOperator;

View File

@ -57,6 +57,7 @@ private:
OperatorFunction<Field> & _Solver;
LinearFunction<Field> & _Guess;
public:
using LinearFunction<Field>::operator();
/////////////////////////////////////////////////////
// Wrap the usual normal equations trick
@ -118,6 +119,7 @@ RealD InverseApproximation(RealD x){
template<class Field,class Matrix> class ChebyshevSmoother : public LinearFunction<Field>
{
public:
using LinearFunction<Field>::operator();
typedef LinearOperatorBase<Field> FineOperator;
Matrix & _SmootherMatrix;
FineOperator & _SmootherOperator;
@ -174,6 +176,7 @@ public:
template<class Fobj,class CComplex,int nbasis, class CoarseSolver>
class HDCRPreconditioner : public LinearFunction< Lattice<Fobj> > {
public:
using LinearFunction<Lattice<Fobj> >::operator();
typedef Aggregation<Fobj,CComplex,nbasis> Aggregates;
typedef CoarsenedMatrix<Fobj,CComplex,nbasis> CoarseOperator;

View File

@ -456,8 +456,8 @@ public:
siteVector *CBp=Stencil.CommBuf();
int ptype;
int nb2=nbasis/2;
// int ptype;
// int nb2=nbasis/2;
autoView(in_v , in, AcceleratorRead);
autoView(st, Stencil, AcceleratorRead);
@ -471,7 +471,7 @@ public:
typedef decltype(coalescedRead(in_v[0])) calcVector;
typedef decltype(coalescedRead(in_v[0](0))) calcComplex;
int sU = sF/Ls;
int s = sF%Ls;
// int s = sF%Ls;
calcComplex res = Zero();
calcVector nbr;
@ -517,14 +517,14 @@ public:
autoView(st, Stencil, AcceleratorRead);
siteVector *CBp=Stencil.CommBuf();
int ptype;
int nb2=nbasis/2;
// int ptype;
// int nb2=nbasis/2;
accelerator_for2d(sF, Coarse5D->oSites(), b, nbasis, Nsimd, {
typedef decltype(coalescedRead(in_v[0])) calcVector;
typedef decltype(coalescedRead(in_v[0](0))) calcComplex;
int sU = sF/Ls;
int s = sF%Ls;
// int s = sF%Ls;
calcComplex res = Zero();
@ -650,7 +650,7 @@ private:
OperatorFunction<Field> & _Solver;
LinearFunction<Field> & _Guess;
public:
using LinearFunction<Field>::operator();
/////////////////////////////////////////////////////
// Wrap the usual normal equations trick
/////////////////////////////////////////////////////
@ -712,6 +712,7 @@ RealD InverseApproximation(RealD x){
template<class Field,class Matrix> class ChebyshevSmoother : public LinearFunction<Field>
{
public:
using LinearFunction<Field>::operator();
typedef LinearOperatorBase<Field> FineOperator;
Matrix & _SmootherMatrix;
FineOperator & _SmootherOperator;
@ -735,6 +736,7 @@ public:
template<class Fobj,class CComplex,int nbasis, class CoarseSolver>
class MGPreconditioner : public LinearFunction< Lattice<Fobj> > {
public:
using LinearFunction<Lattice<Fobj> >::operator();
typedef Aggregation<Fobj,CComplex,nbasis> Aggregates;
typedef typename Aggregation<Fobj,CComplex,nbasis>::CoarseVector CoarseVector;
@ -831,6 +833,7 @@ public:
template<class Fobj,class CComplex,int nbasis, class CoarseSolver>
class HDCRPreconditioner : public LinearFunction< Lattice<Fobj> > {
public:
using LinearFunction<Lattice<Fobj> >::operator();
typedef Aggregation<Fobj,CComplex,nbasis> Aggregates;
typedef typename Aggregation<Fobj,CComplex,nbasis>::CoarseVector CoarseVector;
@ -1174,18 +1177,18 @@ int main (int argc, char ** argv)
PlainHermOp<CoarseCoarseVector> IRLOpL2 (IRLHermOpL2);
ImplicitlyRestartedLanczos<CoarseCoarseVector> IRLL2(IRLOpChebyL2,IRLOpL2,cNstop,cNk,cNm,1.0e-3,20);
int cNconv;
cNm=0;
std::vector<RealD> eval2(cNm);
std::vector<CoarseCoarseVector> evec2(cNm,CoarseCoarse5d);
cc_src=1.0;
// int cNconv;
// IRLL2.calc(eval2,evec2,cc_src,cNconv);
ConjugateGradient<CoarseCoarseVector> CoarseCoarseCG(0.02,10000);
DeflatedGuesser<CoarseCoarseVector> DeflCoarseCoarseGuesser(evec2,eval2);
NormalEquations<CoarseCoarseVector> DeflCoarseCoarseCGNE(cc_Dwf,CoarseCoarseCG,DeflCoarseCoarseGuesser);
ZeroGuesser<CoarseVector> CoarseZeroGuesser;
// ZeroGuesser<CoarseVector> CoarseZeroGuesser;
ZeroGuesser<CoarseCoarseVector> CoarseCoarseZeroGuesser;
std::cout<<GridLogMessage << "**************************************************"<< std::endl;

View File

@ -456,8 +456,8 @@ public:
siteVector *CBp=Stencil.CommBuf();
int ptype;
int nb2=nbasis/2;
//int ptype;
// int nb2=nbasis/2;
autoView(in_v , in, AcceleratorRead);
autoView(st, Stencil, AcceleratorRead);
@ -471,7 +471,7 @@ public:
typedef decltype(coalescedRead(in_v[0])) calcVector;
typedef decltype(coalescedRead(in_v[0](0))) calcComplex;
int sU = sF/Ls;
int s = sF%Ls;
// int s = sF%Ls;
calcComplex res = Zero();
calcVector nbr;
@ -517,14 +517,14 @@ public:
autoView(st, Stencil, AcceleratorRead);
siteVector *CBp=Stencil.CommBuf();
int ptype;
int nb2=nbasis/2;
// int ptype;
// int nb2=nbasis/2;
accelerator_for2d(sF, Coarse5D->oSites(), b, nbasis, Nsimd, {
typedef decltype(coalescedRead(in_v[0])) calcVector;
typedef decltype(coalescedRead(in_v[0](0))) calcComplex;
int sU = sF/Ls;
int s = sF%Ls;
// int s = sF%Ls;
calcComplex res = Zero();
@ -648,7 +648,7 @@ private:
CheckerBoardedSparseMatrixBase<Field> & _Matrix;
SchurRedBlackBase<Field> & _Solver;
public:
using LinearFunction<Field>::operator();
/////////////////////////////////////////////////////
// Wrap the usual normal equations trick
/////////////////////////////////////////////////////
@ -669,6 +669,7 @@ private:
OperatorFunction<Field> & _Solver;
LinearFunction<Field> & _Guess;
public:
using LinearFunction<Field>::operator();
/////////////////////////////////////////////////////
// Wrap the usual normal equations trick
@ -731,6 +732,7 @@ RealD InverseApproximation(RealD x){
template<class Field,class Matrix> class ChebyshevSmoother : public LinearFunction<Field>
{
public:
using LinearFunction<Field>::operator();
typedef LinearOperatorBase<Field> FineOperator;
Matrix & _SmootherMatrix;
FineOperator & _SmootherOperator;
@ -754,6 +756,7 @@ public:
template<class Fobj,class CComplex,int nbasis, class CoarseSolver>
class MGPreconditioner : public LinearFunction< Lattice<Fobj> > {
public:
using LinearFunction<Lattice<Fobj> >::operator();
typedef Aggregation<Fobj,CComplex,nbasis> Aggregates;
typedef typename Aggregation<Fobj,CComplex,nbasis>::CoarseVector CoarseVector;
@ -850,7 +853,8 @@ public:
template<class Fobj,class CComplex,int nbasis, class CoarseSolver>
class HDCRPreconditioner : public LinearFunction< Lattice<Fobj> > {
public:
using LinearFunction<Lattice<Fobj> >::operator();
typedef Aggregation<Fobj,CComplex,nbasis> Aggregates;
typedef typename Aggregation<Fobj,CComplex,nbasis>::CoarseVector CoarseVector;
typedef typename Aggregation<Fobj,CComplex,nbasis>::CoarseMatrix CoarseMatrix;
@ -1194,11 +1198,11 @@ int main (int argc, char ** argv)
PlainHermOp<CoarseCoarseVector> IRLOpL2 (IRLHermOpL2);
ImplicitlyRestartedLanczos<CoarseCoarseVector> IRLL2(IRLOpChebyL2,IRLOpL2,cNstop,cNk,cNm,1.0e-3,20);
int cNconv;
cNm=0;
std::vector<RealD> eval2(cNm);
std::vector<CoarseCoarseVector> evec2(cNm,CoarseCoarse5d);
cc_src=1.0;
// int cNconv;
// IRLL2.calc(eval2,evec2,cc_src,cNconv);
std::vector<RealD> tols ({0.005,0.001});
@ -1218,10 +1222,10 @@ int main (int argc, char ** argv)
for(auto c_hi : c_his ) {
for(auto f_lo : f_los ) {
for(auto f_hi : f_his ) {
ZeroGuesser<CoarseVector> CoarseZeroGuesser;
ZeroGuesser<CoarseCoarseVector> CoarseCoarseZeroGuesser;
// ZeroGuesser<CoarseVector> CoarseZeroGuesser;
// ZeroGuesser<CoarseCoarseVector> CoarseCoarseZeroGuesser;
ConjugateGradient<CoarseCoarseVector> CoarseCoarseCG(tol,10000);
ZeroGuesser<CoarseCoarseVector> CoarseCoarseGuesser;
// ZeroGuesser<CoarseCoarseVector> CoarseCoarseGuesser;
SchurRedBlackDiagMooeeSolve<CoarseCoarseVector> CoarseCoarseRBCG(CoarseCoarseCG);
SchurSolverWrapper<CoarseCoarseVector> CoarseCoarseSolver(cc_Dwf,CoarseCoarseRBCG);

View File

@ -143,6 +143,7 @@ public:
template<class Field> class MultiGridPreconditionerBase : public LinearFunction<Field> {
public:
using LinearFunction<Field>::operator();
virtual ~MultiGridPreconditionerBase() = default;
virtual void setup() = 0;
virtual void operator()(Field const &in, Field &out) = 0;
@ -156,6 +157,7 @@ public:
/////////////////////////////////////////////
// Type Definitions
/////////////////////////////////////////////
using MultiGridPreconditionerBase<Lattice<Fobj>>::operator();
// clang-format off
typedef Aggregation<Fobj, CComplex, nBasis> Aggregates;
@ -568,6 +570,7 @@ public:
/////////////////////////////////////////////
// Type Definitions
/////////////////////////////////////////////
using MultiGridPreconditionerBase<Lattice<Fobj>>::operator();
typedef Matrix FineDiracMatrix;
typedef Lattice<Fobj> FineVector;

View File

@ -56,7 +56,6 @@ int main (int argc, char ** argv)
QuasiMinimalResidual<LatticeFermion> QMR(1.0e-8,10000);
RealD mass=0.0;
RealD M5=1.8;
WilsonFermionR Dw(Umu,*Grid,*rbGrid,mass);
NonHermitianLinearOperator<WilsonFermionR,LatticeFermion> NonHermOp(Dw);