1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-11 03:46:55 +01:00

Compare commits

...

10 Commits

Author SHA1 Message Date
eafc150034 Test fft asserts 2024-10-23 16:46:26 -04:00
2877f1a268 Verbose reduce 2024-10-23 15:14:16 -04:00
1e893af775 GPU happy 2024-10-23 14:52:15 -04:00
d9f430a575 Happy GPU 2024-10-23 14:51:16 -04:00
63abe87f36 Memory manager verbose improvements that were useful to track an error 2024-10-23 14:49:13 -04:00
368d649c8a feature/deprecate-uvm happier -- preallocate device resident neigbour table 2024-10-23 14:47:55 -04:00
5603464f39 Fix in partial fraction import/export physical and
make the GPU happier on the deprecate-uvm -- don't use static vectors, make member of class
2024-10-23 14:45:58 -04:00
655c79f39e Suppress warning on partial override 2024-10-23 14:44:41 -04:00
565b231c03 Nvcc happy 2024-10-23 14:44:17 -04:00
62a9f180fa NVCC happy 2024-10-23 14:44:04 -04:00
24 changed files with 254 additions and 211 deletions

View File

@ -215,7 +215,7 @@ public:
else if ( sign == forward ) div = 1.0;
else assert(0);
std::cout << "Making FFTW plan" << std::endl;
std::cout << GridLogPerformance<<"Making FFTW plan" << std::endl;
FFTW_plan p;
{
FFTW_scalar *in = (FFTW_scalar *)&pgbuf_v[0];
@ -229,7 +229,7 @@ public:
}
// Barrel shift and collect global pencil
std::cout << "Making pencil" << std::endl;
std::cout << GridLogPerformance<<"Making pencil" << std::endl;
Coordinate lcoor(Nd), gcoor(Nd);
result = source;
int pc = processor_coor[dim];
@ -251,7 +251,7 @@ public:
}
}
std::cout << "Looping orthog" << std::endl;
std::cout <<GridLogPerformance<< "Looping orthog" << std::endl;
// Loop over orthog coords
int NN=pencil_g.lSites();
GridStopWatch timer;
@ -274,7 +274,7 @@ public:
usec += timer.useconds();
flops+= flops_call*NN;
std::cout << "Writing back results " << std::endl;
std::cout <<GridLogPerformance<< "Writing back results " << std::endl;
// writing out result
{
autoView(pgbuf_v,pgbuf,CpuRead);
@ -291,7 +291,7 @@ public:
}
result = result*div;
std::cout << "Destroying plan " << std::endl;
std::cout <<GridLogPerformance<< "Destroying plan " << std::endl;
// destroying plan
FFTW<scalar>::fftw_destroy_plan(p);
#endif

View File

@ -6,11 +6,10 @@ NAMESPACE_BEGIN(Grid);
#define MAXLINE 512
static char print_buffer [ MAXLINE ];
#define mprintf(...) snprintf (print_buffer,MAXLINE, __VA_ARGS__ ); std::cout << GridLogMemory << print_buffer;
#define dprintf(...) snprintf (print_buffer,MAXLINE, __VA_ARGS__ ); std::cout << GridLogDebug << print_buffer;
#define mprintf(...) snprintf (print_buffer,MAXLINE, __VA_ARGS__ ); std::cout << GridLogMemory << print_buffer << std::endl;
#define dprintf(...) snprintf (print_buffer,MAXLINE, __VA_ARGS__ ); std::cout << GridLogDebug << print_buffer << std::endl;
//#define dprintf(...)
////////////////////////////////////////////////////////////
// For caching copies of data on device
////////////////////////////////////////////////////////////
@ -168,7 +167,7 @@ void MemoryManager::Flush(AcceleratorViewEntry &AccCache)
assert(AccCache.AccPtr!=(uint64_t)NULL);
assert(AccCache.CpuPtr!=(uint64_t)NULL);
acceleratorCopyFromDevice((void *)AccCache.AccPtr,(void *)AccCache.CpuPtr,AccCache.bytes);
mprintf("MemoryManager: acceleratorCopyFromDevice Flush AccPtr %lx -> CpuPtr %lx\n",(uint64_t)AccCache.AccPtr,(uint64_t)AccCache.CpuPtr); fflush(stdout);
mprintf("MemoryManager: acceleratorCopyFromDevice Flush size %ld AccPtr %lx -> CpuPtr %lx\n",(uint64_t)AccCache.bytes,(uint64_t)AccCache.AccPtr,(uint64_t)AccCache.CpuPtr); fflush(stdout);
DeviceToHostBytes+=AccCache.bytes;
DeviceToHostXfer++;
AccCache.state=Consistent;
@ -183,7 +182,9 @@ void MemoryManager::Clone(AcceleratorViewEntry &AccCache)
AccCache.AccPtr=(uint64_t)AcceleratorAllocate(AccCache.bytes);
DeviceBytes+=AccCache.bytes;
}
mprintf("MemoryManager: acceleratorCopyToDevice Clone AccPtr %lx <- CpuPtr %lx\n",(uint64_t)AccCache.AccPtr,(uint64_t)AccCache.CpuPtr); fflush(stdout);
mprintf("MemoryManager: acceleratorCopyToDevice Clone size %ld AccPtr %lx <- CpuPtr %lx\n",
(uint64_t)AccCache.bytes,
(uint64_t)AccCache.AccPtr,(uint64_t)AccCache.CpuPtr); fflush(stdout);
acceleratorCopyToDevice((void *)AccCache.CpuPtr,(void *)AccCache.AccPtr,AccCache.bytes);
HostToDeviceBytes+=AccCache.bytes;
HostToDeviceXfer++;
@ -264,7 +265,7 @@ uint64_t MemoryManager::AcceleratorViewOpen(uint64_t CpuPtr,size_t bytes,ViewMod
assert(AccCache.cpuLock==0); // Programming error
if(AccCache.state!=Empty) {
dprintf("ViewOpen found entry %lx %lx : %ld %ld accLock %ld\n",
dprintf("ViewOpen found entry %lx %lx : sizes %ld %ld accLock %ld\n",
(uint64_t)AccCache.CpuPtr,
(uint64_t)CpuPtr,
(uint64_t)AccCache.bytes,

View File

@ -55,6 +55,11 @@ public:
RealD alpha; // Mobius scale
RealD k; // EOFA normalization constant
// Device resident
deviceVector<Coeff_t> d_shift_coefficients;
deviceVector<Coeff_t> d_MooeeInv_shift_lc;
deviceVector<Coeff_t> d_MooeeInv_shift_norm;
virtual void Instantiatable(void) = 0;
// EOFA-specific operations
@ -92,6 +97,11 @@ public:
this->k = this->alpha * (_mq3-_mq2) * std::pow(this->alpha+1.0,2*Ls) /
( std::pow(this->alpha+1.0,Ls) + _mq2*std::pow(this->alpha-1.0,Ls) ) /
( std::pow(this->alpha+1.0,Ls) + _mq3*std::pow(this->alpha-1.0,Ls) );
d_shift_coefficients.resize(Ls);
d_MooeeInv_shift_lc.resize(Ls);
d_MooeeInv_shift_norm.resize(Ls);
};
};

View File

@ -143,6 +143,17 @@ public:
std::vector<Coeff_t> ueem;
std::vector<Coeff_t> dee;
// Device memory
deviceVector<Coeff_t> d_diag;
deviceVector<Coeff_t> d_upper;
deviceVector<Coeff_t> d_lower;
deviceVector<Coeff_t> d_lee;
deviceVector<Coeff_t> d_dee;
deviceVector<Coeff_t> d_uee;
deviceVector<Coeff_t> d_leem;
deviceVector<Coeff_t> d_ueem;
// Matrices of 5d ee inverse params
// std::vector<iSinglet<Simd> > MatpInv;
// std::vector<iSinglet<Simd> > MatmInv;

View File

@ -42,7 +42,7 @@ public:
void MomentumSpacePropagator(FermionField &out,const FermionField &in,RealD _m,std::vector<double> twist) {
this->MomentumSpacePropagatorHw(out,in,_m,twist);
};
};
// Constructors
OverlapWilsonCayleyTanhFermion(GaugeField &_Umu,

View File

@ -41,6 +41,10 @@ public:
public:
// Constructors
virtual void Instantiatable(void){};
void MomentumSpacePropagator(FermionField &out,const FermionField &in,RealD _m,std::vector<double> twist) {
this->MomentumSpacePropagatorHw(out,in,_m,twist);
};
OverlapWilsonCayleyZolotarevFermion(GaugeField &_Umu,
GridCartesian &FiveDimGrid,

View File

@ -41,6 +41,9 @@ public:
public:
virtual void Instantiatable(void){};
void MomentumSpacePropagator(FermionField &out,const FermionField &in,RealD _m,std::vector<double> twist) {
this->MomentumSpacePropagatorHw(out,in,_m,twist);
};
// Constructors
OverlapWilsonContFracTanhFermion(GaugeField &_Umu,
GridCartesian &FiveDimGrid,

View File

@ -40,6 +40,9 @@ public:
INHERIT_IMPL_TYPES(Impl);
virtual void Instantiatable(void){};
void MomentumSpacePropagator(FermionField &out,const FermionField &in,RealD _m,std::vector<double> twist) {
this->MomentumSpacePropagatorHw(out,in,_m,twist);
};
// Constructors
OverlapWilsonContFracZolotarevFermion(GaugeField &_Umu,
GridCartesian &FiveDimGrid,

View File

@ -41,6 +41,9 @@ public:
public:
virtual void Instantiatable(void){};
void MomentumSpacePropagator(FermionField &out,const FermionField &in,RealD _m,std::vector<double> twist) {
this->MomentumSpacePropagatorHw(out,in,_m,twist);
};
// Constructors
OverlapWilsonPartialFractionTanhFermion(GaugeField &_Umu,
GridCartesian &FiveDimGrid,

View File

@ -40,6 +40,11 @@ public:
INHERIT_IMPL_TYPES(Impl);
virtual void Instantiatable(void){};
void MomentumSpacePropagator(FermionField &out,const FermionField &in,RealD _m,std::vector<double> twist) {
this->MomentumSpacePropagatorHw(out,in,_m,twist);
};
// Constructors
OverlapWilsonPartialFractionZolotarevFermion(GaugeField &_Umu,
GridCartesian &FiveDimGrid,

View File

@ -39,7 +39,7 @@ class PartialFractionFermion5D : public WilsonFermion5D<Impl>
public:
INHERIT_IMPL_TYPES(Impl);
const int part_frac_chroma_convention=1;
const int part_frac_chroma_convention=0;
void Meooe_internal(const FermionField &in, FermionField &out,int dag);
void Mooee_internal(const FermionField &in, FermionField &out,int dag);

View File

@ -414,29 +414,6 @@ public:
// surface_list.resize(0);
this->same_node.resize(npoints);
};
/*
void BuildSurfaceList(int Ls,int vol4){
// find same node for SHM
// Here we know the distance is 1 for WilsonStencil
for(int point=0;point<this->_npoints;point++){
this->same_node[point] = this->SameNode(point);
}
for(int site = 0 ;site< vol4;site++){
int local = 1;
for(int point=0;point<this->_npoints;point++){
if( (!this->GetNodeLocal(site*Ls,point)) && (!this->same_node[point]) ){
local = 0;
}
}
if(local == 0) {
surface_list.push_back(site);
}
}
}
*/
template < class compressor>
void HaloExchangeOpt(const Lattice<vobj> &source,compressor &compress)

View File

@ -488,7 +488,7 @@ void CayleyFermion5D<Impl>::SetCoefficientsInternal(RealD zolo_hi,std::vector<Co
leem.resize(Ls);
uee.resize(Ls);
ueem.resize(Ls);
for(int i=0;i<Ls;i++){
dee[i] = bee[i];
@ -529,6 +529,18 @@ void CayleyFermion5D<Impl>::SetCoefficientsInternal(RealD zolo_hi,std::vector<Co
dee[Ls-1] += delta_d;
}
//////////////////////////////////////////
// Device buffers
//////////////////////////////////////////
d_diag.resize(Ls);
d_upper.resize(Ls);
d_lower.resize(Ls);
d_dee.resize(Ls);
d_lee.resize(Ls);
d_uee.resize(Ls);
d_leem.resize(Ls);
d_ueem.resize(Ls);
// int inv=1;
// this->MooeeInternalCompute(0,inv,MatpInv,MatmInv);
// this->MooeeInternalCompute(1,inv,MatpInvDag,MatmInvDag);

View File

@ -57,9 +57,9 @@ CayleyFermion5D<Impl>::M5D(const FermionField &psi_i,
int Ls =this->Ls;
static deviceVector<Coeff_t> d_diag(Ls) ; acceleratorCopyToDevice(&diag[0] ,&d_diag[0],Ls*sizeof(Coeff_t));
static deviceVector<Coeff_t> d_upper(Ls); acceleratorCopyToDevice(&upper[0],&d_upper[0],Ls*sizeof(Coeff_t));
static deviceVector<Coeff_t> d_lower(Ls); acceleratorCopyToDevice(&lower[0],&d_lower[0],Ls*sizeof(Coeff_t));
acceleratorCopyToDevice(&diag[0] ,&this->d_diag[0],Ls*sizeof(Coeff_t));
acceleratorCopyToDevice(&upper[0],&this->d_upper[0],Ls*sizeof(Coeff_t));
acceleratorCopyToDevice(&lower[0],&this->d_lower[0],Ls*sizeof(Coeff_t));
auto pdiag = &d_diag[0];
auto pupper = &d_upper[0];
@ -99,9 +99,9 @@ CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi_i,
int Ls=this->Ls;
static deviceVector<Coeff_t> d_diag(Ls) ; acceleratorCopyToDevice(&diag[0] ,&d_diag[0],Ls*sizeof(Coeff_t));
static deviceVector<Coeff_t> d_upper(Ls); acceleratorCopyToDevice(&upper[0],&d_upper[0],Ls*sizeof(Coeff_t));
static deviceVector<Coeff_t> d_lower(Ls); acceleratorCopyToDevice(&lower[0],&d_lower[0],Ls*sizeof(Coeff_t));
acceleratorCopyToDevice(&diag[0] ,&this->d_diag[0],Ls*sizeof(Coeff_t));
acceleratorCopyToDevice(&upper[0],&this->d_upper[0],Ls*sizeof(Coeff_t));
acceleratorCopyToDevice(&lower[0],&this->d_lower[0],Ls*sizeof(Coeff_t));
auto pdiag = &d_diag[0];
auto pupper = &d_upper[0];
@ -134,11 +134,11 @@ CayleyFermion5D<Impl>::MooeeInv (const FermionField &psi_i, FermionField &chi
int Ls=this->Ls;
static deviceVector<Coeff_t> d_lee(Ls); acceleratorCopyToDevice(&lee[0],&d_lee[0],Ls*sizeof(Coeff_t));
static deviceVector<Coeff_t> d_dee(Ls); acceleratorCopyToDevice(&dee[0],&d_dee[0],Ls*sizeof(Coeff_t));
static deviceVector<Coeff_t> d_uee(Ls); acceleratorCopyToDevice(&uee[0],&d_uee[0],Ls*sizeof(Coeff_t));
static deviceVector<Coeff_t> d_leem(Ls); acceleratorCopyToDevice(&leem[0],&d_leem[0],Ls*sizeof(Coeff_t));
static deviceVector<Coeff_t> d_ueem(Ls); acceleratorCopyToDevice(&ueem[0],&d_ueem[0],Ls*sizeof(Coeff_t));
acceleratorCopyToDevice(&lee[0],&d_lee[0],Ls*sizeof(Coeff_t));
acceleratorCopyToDevice(&dee[0],&d_dee[0],Ls*sizeof(Coeff_t));
acceleratorCopyToDevice(&uee[0],&d_uee[0],Ls*sizeof(Coeff_t));
acceleratorCopyToDevice(&leem[0],&d_leem[0],Ls*sizeof(Coeff_t));
acceleratorCopyToDevice(&ueem[0],&d_ueem[0],Ls*sizeof(Coeff_t));
auto plee = & d_lee [0];
auto pdee = & d_dee [0];
@ -196,11 +196,11 @@ CayleyFermion5D<Impl>::MooeeInvDag (const FermionField &psi_i, FermionField &chi
autoView(psi , psi_i,AcceleratorRead);
autoView(chi , chi_i,AcceleratorWrite);
static deviceVector<Coeff_t> d_lee(Ls); acceleratorCopyToDevice(&lee[0],&d_lee[0],Ls*sizeof(Coeff_t));
static deviceVector<Coeff_t> d_dee(Ls); acceleratorCopyToDevice(&dee[0],&d_dee[0],Ls*sizeof(Coeff_t));
static deviceVector<Coeff_t> d_uee(Ls); acceleratorCopyToDevice(&uee[0],&d_uee[0],Ls*sizeof(Coeff_t));
static deviceVector<Coeff_t> d_leem(Ls); acceleratorCopyToDevice(&leem[0],&d_leem[0],Ls*sizeof(Coeff_t));
static deviceVector<Coeff_t> d_ueem(Ls); acceleratorCopyToDevice(&ueem[0],&d_ueem[0],Ls*sizeof(Coeff_t));
acceleratorCopyToDevice(&lee[0],&d_lee[0],Ls*sizeof(Coeff_t));
acceleratorCopyToDevice(&dee[0],&d_dee[0],Ls*sizeof(Coeff_t));
acceleratorCopyToDevice(&uee[0],&d_uee[0],Ls*sizeof(Coeff_t));
acceleratorCopyToDevice(&leem[0],&d_leem[0],Ls*sizeof(Coeff_t));
acceleratorCopyToDevice(&ueem[0],&d_ueem[0],Ls*sizeof(Coeff_t));
auto plee = & d_lee [0];
auto pdee = & d_dee [0];

View File

@ -51,13 +51,13 @@ void DomainWallEOFAFermion<Impl>::M5D(const FermionField& psi_i, const FermionFi
autoView( chi , chi_i, AcceleratorWrite);
assert(phi.Checkerboard() == psi.Checkerboard());
static deviceVector<Coeff_t> d_diag(Ls); acceleratorCopyToDevice(&diag[0],&d_diag[0],Ls*sizeof(Coeff_t));
static deviceVector<Coeff_t> d_upper(Ls);acceleratorCopyToDevice(&upper[0],&d_upper[0],Ls*sizeof(Coeff_t));
static deviceVector<Coeff_t> d_lower(Ls);acceleratorCopyToDevice(&lower[0],&d_lower[0],Ls*sizeof(Coeff_t));
auto pdiag = &d_diag[0];
auto pupper = &d_upper[0];
auto plower = &d_lower[0];
auto pdiag = &this->d_diag[0];
auto pupper = &this->d_upper[0];
auto plower = &this->d_lower[0];
acceleratorCopyToDevice(&diag[0],&pdiag[0],Ls*sizeof(Coeff_t));
acceleratorCopyToDevice(&upper[0],&pupper[0],Ls*sizeof(Coeff_t));
acceleratorCopyToDevice(&lower[0],&plower[0],Ls*sizeof(Coeff_t));
// Flops = 6.0*(Nc*Ns) *Ls*vol
@ -89,14 +89,14 @@ void DomainWallEOFAFermion<Impl>::M5Ddag(const FermionField& psi_i, const Fermio
autoView( phi , phi_i, AcceleratorRead);
autoView( chi , chi_i, AcceleratorWrite);
assert(phi.Checkerboard() == psi.Checkerboard());
static deviceVector<Coeff_t> d_diag(Ls); acceleratorCopyToDevice(&diag[0],&d_diag[0],Ls*sizeof(Coeff_t));
static deviceVector<Coeff_t> d_upper(Ls);acceleratorCopyToDevice(&upper[0],&d_upper[0],Ls*sizeof(Coeff_t));
static deviceVector<Coeff_t> d_lower(Ls);acceleratorCopyToDevice(&lower[0],&d_lower[0],Ls*sizeof(Coeff_t));
auto pdiag = &d_diag[0];
auto pupper = &d_upper[0];
auto plower = &d_lower[0];
auto pdiag = &this->d_diag[0];
auto pupper = &this->d_upper[0];
auto plower = &this->d_lower[0];
acceleratorCopyToDevice(&diag[0] ,&pdiag[0],Ls*sizeof(Coeff_t));
acceleratorCopyToDevice(&upper[0],&pupper[0],Ls*sizeof(Coeff_t));
acceleratorCopyToDevice(&lower[0],&plower[0],Ls*sizeof(Coeff_t));
// Flops = 6.0*(Nc*Ns) *Ls*vol
@ -125,18 +125,18 @@ void DomainWallEOFAFermion<Impl>::MooeeInv(const FermionField& psi_i, FermionFie
autoView( chi, chi_i, AcceleratorWrite);
int Ls = this->Ls;
static deviceVector<Coeff_t> d_lee(Ls); acceleratorCopyToDevice(&this->lee[0],&d_lee[0],Ls*sizeof(Coeff_t));
static deviceVector<Coeff_t> d_dee(Ls); acceleratorCopyToDevice(&this->dee[0],&d_dee[0],Ls*sizeof(Coeff_t));
static deviceVector<Coeff_t> d_uee(Ls); acceleratorCopyToDevice(&this->uee[0],&d_uee[0],Ls*sizeof(Coeff_t));
static deviceVector<Coeff_t> d_leem(Ls); acceleratorCopyToDevice(&this->leem[0],&d_leem[0],Ls*sizeof(Coeff_t));
static deviceVector<Coeff_t> d_ueem(Ls); acceleratorCopyToDevice(&this->ueem[0],&d_ueem[0],Ls*sizeof(Coeff_t));
auto plee = & d_lee [0];
auto pdee = & d_dee [0];
auto puee = & d_uee [0];
auto pleem = & d_leem[0];
auto pueem = & d_ueem[0];
auto plee = & this->d_lee [0];
auto pdee = & this->d_dee [0];
auto puee = & this->d_uee [0];
auto pleem = & this->d_leem[0];
auto pueem = & this->d_ueem[0];
acceleratorCopyToDevice(&this->lee[0],&plee[0],Ls*sizeof(Coeff_t));
acceleratorCopyToDevice(&this->dee[0],&pdee[0],Ls*sizeof(Coeff_t));
acceleratorCopyToDevice(&this->uee[0],&puee[0],Ls*sizeof(Coeff_t));
acceleratorCopyToDevice(&this->leem[0],&pleem[0],Ls*sizeof(Coeff_t));
acceleratorCopyToDevice(&this->ueem[0],&pueem[0],Ls*sizeof(Coeff_t));
uint64_t nloop=grid->oSites()/Ls;
accelerator_for(sss,nloop,Simd::Nsimd(),{
uint64_t ss=sss*Ls;

View File

@ -50,14 +50,14 @@ void MobiusEOFAFermion<Impl>::M5D(const FermionField &psi_i, const FermionField
assert(phi.Checkerboard() == psi.Checkerboard());
static deviceVector<Coeff_t> d_diag(Ls); acceleratorCopyToDevice(&diag[0],&d_diag[0],Ls*sizeof(Coeff_t));
static deviceVector<Coeff_t> d_upper(Ls);acceleratorCopyToDevice(&upper[0],&d_upper[0],Ls*sizeof(Coeff_t));
static deviceVector<Coeff_t> d_lower(Ls);acceleratorCopyToDevice(&lower[0],&d_lower[0],Ls*sizeof(Coeff_t));
auto pdiag = &d_diag[0];
auto pupper = &d_upper[0];
auto plower = &d_lower[0];
auto pdiag = &this->d_diag[0];
auto pupper = &this->d_upper[0];
auto plower = &this->d_lower[0];
acceleratorCopyToDevice(&diag[0],&pdiag[0],Ls*sizeof(Coeff_t));
acceleratorCopyToDevice(&upper[0],&pupper[0],Ls*sizeof(Coeff_t));
acceleratorCopyToDevice(&lower[0],&plower[0],Ls*sizeof(Coeff_t));
// Flops = 6.0*(Nc*Ns) *Ls*vol
int nloop = grid->oSites()/Ls;
accelerator_for(sss,nloop,Simd::Nsimd(),{
@ -93,15 +93,15 @@ void MobiusEOFAFermion<Impl>::M5D_shift(const FermionField &psi_i, const Fermion
assert(phi.Checkerboard() == psi.Checkerboard());
static deviceVector<Coeff_t> d_diag(Ls); acceleratorCopyToDevice(&diag[0],&d_diag[0],Ls*sizeof(Coeff_t));
static deviceVector<Coeff_t> d_upper(Ls);acceleratorCopyToDevice(&upper[0],&d_upper[0],Ls*sizeof(Coeff_t));
static deviceVector<Coeff_t> d_lower(Ls);acceleratorCopyToDevice(&lower[0],&d_lower[0],Ls*sizeof(Coeff_t));
static deviceVector<Coeff_t> d_shift_coeffs(Ls);acceleratorCopyToDevice(&shift_coeffs[0],&d_shift_coeffs[0],Ls*sizeof(Coeff_t));
auto pdiag = &d_diag[0];
auto pupper = &d_upper[0];
auto plower = &d_lower[0];
auto pshift_coeffs = &d_shift_coeffs[0];
auto pdiag = &this->d_diag[0];
auto pupper = &this->d_upper[0];
auto plower = &this->d_lower[0];
auto pshift_coeffs = &this->d_shift_coefficients[0];
acceleratorCopyToDevice(&diag[0],&pdiag[0],Ls*sizeof(Coeff_t));
acceleratorCopyToDevice(&upper[0],&pupper[0],Ls*sizeof(Coeff_t));
acceleratorCopyToDevice(&lower[0],&plower[0],Ls*sizeof(Coeff_t));
acceleratorCopyToDevice(&shift_coeffs[0],&pshift_coeffs[0],Ls*sizeof(Coeff_t));
// Flops = 6.0*(Nc*Ns) *Ls*vol
int nloop = grid->oSites()/Ls;
@ -138,14 +138,14 @@ void MobiusEOFAFermion<Impl>::M5Ddag(const FermionField &psi_i, const FermionFie
autoView(chi , chi_i, AcceleratorWrite);
assert(phi.Checkerboard() == psi.Checkerboard());
static deviceVector<Coeff_t> d_diag(Ls); acceleratorCopyToDevice(&diag[0],&d_diag[0],Ls*sizeof(Coeff_t));
static deviceVector<Coeff_t> d_upper(Ls);acceleratorCopyToDevice(&upper[0],&d_upper[0],Ls*sizeof(Coeff_t));
static deviceVector<Coeff_t> d_lower(Ls);acceleratorCopyToDevice(&lower[0],&d_lower[0],Ls*sizeof(Coeff_t));
auto pdiag = &d_diag[0];
auto pupper = &d_upper[0];
auto plower = &d_lower[0];
auto pdiag = &this->d_diag[0];
auto pupper = &this->d_upper[0];
auto plower = &this->d_lower[0];
acceleratorCopyToDevice(&diag[0],&pdiag[0],Ls*sizeof(Coeff_t));
acceleratorCopyToDevice(&upper[0],&pupper[0],Ls*sizeof(Coeff_t));
acceleratorCopyToDevice(&lower[0],&plower[0],Ls*sizeof(Coeff_t));
// Flops = 6.0*(Nc*Ns) *Ls*vol
int nloop = grid->oSites()/Ls;
@ -180,16 +180,16 @@ void MobiusEOFAFermion<Impl>::M5Ddag_shift(const FermionField &psi_i, const Ferm
assert(phi.Checkerboard() == psi.Checkerboard());
static deviceVector<Coeff_t> d_diag(Ls); acceleratorCopyToDevice(&diag[0],&d_diag[0],Ls*sizeof(Coeff_t));
static deviceVector<Coeff_t> d_upper(Ls);acceleratorCopyToDevice(&upper[0],&d_upper[0],Ls*sizeof(Coeff_t));
static deviceVector<Coeff_t> d_lower(Ls);acceleratorCopyToDevice(&lower[0],&d_lower[0],Ls*sizeof(Coeff_t));
static deviceVector<Coeff_t> d_shift_coeffs(Ls);acceleratorCopyToDevice(&shift_coeffs[0],&d_shift_coeffs[0],Ls*sizeof(Coeff_t));
auto pdiag = &d_diag[0];
auto pupper = &d_upper[0];
auto plower = &d_lower[0];
auto pshift_coeffs = &d_shift_coeffs[0];
auto pdiag = &this->d_diag[0];
auto pupper = &this->d_upper[0];
auto plower = &this->d_lower[0];
auto pshift_coeffs = &this->d_shift_coefficients[0];
acceleratorCopyToDevice(&diag[0],&pdiag[0],Ls*sizeof(Coeff_t));
acceleratorCopyToDevice(&upper[0],&pupper[0],Ls*sizeof(Coeff_t));
acceleratorCopyToDevice(&lower[0],&plower[0],Ls*sizeof(Coeff_t));
acceleratorCopyToDevice(&shift_coeffs[0],&pshift_coeffs[0],Ls*sizeof(Coeff_t));
// Flops = 6.0*(Nc*Ns) *Ls*vol
auto pm = this->pm;
@ -230,17 +230,17 @@ void MobiusEOFAFermion<Impl>::MooeeInv(const FermionField &psi_i, FermionField &
autoView(psi , psi_i, AcceleratorRead);
autoView(chi , chi_i, AcceleratorWrite);
static deviceVector<Coeff_t> d_lee(Ls); acceleratorCopyToDevice(&this->lee[0],&d_lee[0],Ls*sizeof(Coeff_t));
static deviceVector<Coeff_t> d_dee(Ls); acceleratorCopyToDevice(&this->dee[0],&d_dee[0],Ls*sizeof(Coeff_t));
static deviceVector<Coeff_t> d_uee(Ls); acceleratorCopyToDevice(&this->uee[0],&d_uee[0],Ls*sizeof(Coeff_t));
static deviceVector<Coeff_t> d_leem(Ls); acceleratorCopyToDevice(&this->leem[0],&d_leem[0],Ls*sizeof(Coeff_t));
static deviceVector<Coeff_t> d_ueem(Ls); acceleratorCopyToDevice(&this->ueem[0],&d_ueem[0],Ls*sizeof(Coeff_t));
auto plee = & this->d_lee [0];
auto pdee = & this->d_dee [0];
auto puee = & this->d_uee [0];
auto pleem = & this->d_leem[0];
auto pueem = & this->d_ueem[0];
auto plee = & d_lee [0];
auto pdee = & d_dee [0];
auto puee = & d_uee [0];
auto pleem = & d_leem[0];
auto pueem = & d_ueem[0];
acceleratorCopyToDevice(&this->lee[0],&plee[0],Ls*sizeof(Coeff_t));
acceleratorCopyToDevice(&this->dee[0],&pdee[0],Ls*sizeof(Coeff_t));
acceleratorCopyToDevice(&this->uee[0],&puee[0],Ls*sizeof(Coeff_t));
acceleratorCopyToDevice(&this->leem[0],&pleem[0],Ls*sizeof(Coeff_t));
acceleratorCopyToDevice(&this->ueem[0],&pueem[0],Ls*sizeof(Coeff_t));
if(this->shift != 0.0){ MooeeInv_shift(psi_i,chi_i); return; }
@ -293,23 +293,22 @@ void MobiusEOFAFermion<Impl>::MooeeInv_shift(const FermionField &psi_i, FermionF
autoView(chi , chi_i, AcceleratorWrite);
// Move into object and constructor
static deviceVector<Coeff_t> d_lee(Ls); acceleratorCopyToDevice(&this->lee[0],&d_lee[0],Ls*sizeof(Coeff_t));
static deviceVector<Coeff_t> d_dee(Ls); acceleratorCopyToDevice(&this->dee[0],&d_dee[0],Ls*sizeof(Coeff_t));
static deviceVector<Coeff_t> d_uee(Ls); acceleratorCopyToDevice(&this->uee[0],&d_uee[0],Ls*sizeof(Coeff_t));
static deviceVector<Coeff_t> d_leem(Ls); acceleratorCopyToDevice(&this->leem[0],&d_leem[0],Ls*sizeof(Coeff_t));
static deviceVector<Coeff_t> d_ueem(Ls); acceleratorCopyToDevice(&this->ueem[0],&d_ueem[0],Ls*sizeof(Coeff_t));
auto pm = this->pm;
auto plee = & d_lee [0];
auto pdee = & d_dee [0];
auto puee = & d_uee [0];
auto pleem = & d_leem[0];
auto pueem = & d_ueem[0];
auto plee = & this->d_lee [0];
auto pdee = & this->d_dee [0];
auto puee = & this->d_uee [0];
auto pleem = & this->d_leem[0];
auto pueem = & this->d_ueem[0];
auto pMooeeInv_shift_lc = &this->d_MooeeInv_shift_lc[0];
auto pMooeeInv_shift_norm = &this->d_MooeeInv_shift_norm[0];
static deviceVector<Coeff_t> d_MooeeInv_shift_lc(Ls); acceleratorCopyToDevice(&MooeeInv_shift_lc[0],&d_MooeeInv_shift_lc[0],Ls*sizeof(Coeff_t));
static deviceVector<Coeff_t> d_MooeeInv_shift_norm(Ls); acceleratorCopyToDevice(&MooeeInv_shift_norm[0],&d_MooeeInv_shift_norm[0],Ls*sizeof(Coeff_t));
auto pMooeeInv_shift_lc = &d_MooeeInv_shift_lc[0];
auto pMooeeInv_shift_norm = &d_MooeeInv_shift_norm[0];
acceleratorCopyToDevice(&this->lee[0],&plee[0],Ls*sizeof(Coeff_t));
acceleratorCopyToDevice(&this->dee[0],&pdee[0],Ls*sizeof(Coeff_t));
acceleratorCopyToDevice(&this->uee[0],&puee[0],Ls*sizeof(Coeff_t));
acceleratorCopyToDevice(&this->leem[0],&pleem[0],Ls*sizeof(Coeff_t));
acceleratorCopyToDevice(&this->ueem[0],&pueem[0],Ls*sizeof(Coeff_t));
acceleratorCopyToDevice(&MooeeInv_shift_lc[0],&pMooeeInv_shift_lc[0],Ls*sizeof(Coeff_t));
acceleratorCopyToDevice(&MooeeInv_shift_norm[0],&pMooeeInv_shift_norm[0],Ls*sizeof(Coeff_t));
int nloop = grid->oSites()/Ls;
accelerator_for(sss,nloop,Simd::Nsimd(),{
@ -367,17 +366,17 @@ void MobiusEOFAFermion<Impl>::MooeeInvDag(const FermionField &psi_i, FermionFiel
autoView(psi , psi_i, AcceleratorRead);
autoView(chi , chi_i, AcceleratorWrite);
static deviceVector<Coeff_t> d_lee(Ls); acceleratorCopyToDevice(&this->lee[0],&d_lee[0],Ls*sizeof(Coeff_t));
static deviceVector<Coeff_t> d_dee(Ls); acceleratorCopyToDevice(&this->dee[0],&d_dee[0],Ls*sizeof(Coeff_t));
static deviceVector<Coeff_t> d_uee(Ls); acceleratorCopyToDevice(&this->uee[0],&d_uee[0],Ls*sizeof(Coeff_t));
static deviceVector<Coeff_t> d_leem(Ls); acceleratorCopyToDevice(&this->leem[0],&d_leem[0],Ls*sizeof(Coeff_t));
static deviceVector<Coeff_t> d_ueem(Ls); acceleratorCopyToDevice(&this->ueem[0],&d_ueem[0],Ls*sizeof(Coeff_t));
auto plee = &this->d_lee [0];
auto pdee = &this->d_dee [0];
auto puee = &this->d_uee [0];
auto pleem = &this->d_leem[0];
auto pueem = &this->d_ueem[0];
auto plee = & d_lee [0];
auto pdee = & d_dee [0];
auto puee = & d_uee [0];
auto pleem = & d_leem[0];
auto pueem = & d_ueem[0];
acceleratorCopyToDevice(&this->lee[0],&plee[0],Ls*sizeof(Coeff_t));
acceleratorCopyToDevice(&this->dee[0],&pdee[0],Ls*sizeof(Coeff_t));
acceleratorCopyToDevice(&this->uee[0],&puee[0],Ls*sizeof(Coeff_t));
acceleratorCopyToDevice(&this->leem[0],&pleem[0],Ls*sizeof(Coeff_t));
acceleratorCopyToDevice(&this->ueem[0],&pueem[0],Ls*sizeof(Coeff_t));
int nloop = grid->oSites()/Ls;
accelerator_for(sss,nloop,Simd::Nsimd(),{
@ -426,25 +425,23 @@ void MobiusEOFAFermion<Impl>::MooeeInvDag_shift(const FermionField &psi_i, Fermi
autoView(chi , chi_i, AcceleratorWrite);
int Ls = this->Ls;
static deviceVector<Coeff_t> d_lee(Ls); acceleratorCopyToDevice(&this->lee[0],&d_lee[0],Ls*sizeof(Coeff_t));
static deviceVector<Coeff_t> d_dee(Ls); acceleratorCopyToDevice(&this->dee[0],&d_dee[0],Ls*sizeof(Coeff_t));
static deviceVector<Coeff_t> d_uee(Ls); acceleratorCopyToDevice(&this->uee[0],&d_uee[0],Ls*sizeof(Coeff_t));
static deviceVector<Coeff_t> d_leem(Ls); acceleratorCopyToDevice(&this->leem[0],&d_leem[0],Ls*sizeof(Coeff_t));
static deviceVector<Coeff_t> d_ueem(Ls); acceleratorCopyToDevice(&this->ueem[0],&d_ueem[0],Ls*sizeof(Coeff_t));
auto pm = this->pm;
auto plee = & d_lee [0];
auto pdee = & d_dee [0];
auto puee = & d_uee [0];
auto pleem = & d_leem[0];
auto pueem = & d_ueem[0];
auto plee = & this->d_lee [0];
auto pdee = & this->d_dee [0];
auto puee = & this->d_uee [0];
auto pleem = & this->d_leem[0];
auto pueem = & this->d_ueem[0];
static deviceVector<Coeff_t> d_MooeeInvDag_shift_lc(Ls);
static deviceVector<Coeff_t> d_MooeeInvDag_shift_norm(Ls);
acceleratorCopyToDevice(&MooeeInvDag_shift_lc[0],&d_MooeeInvDag_shift_lc[0],Ls*sizeof(Coeff_t));
acceleratorCopyToDevice(&MooeeInvDag_shift_norm[0],&d_MooeeInvDag_shift_norm[0],Ls*sizeof(Coeff_t));
auto pMooeeInvDag_shift_lc = &d_MooeeInvDag_shift_lc[0];
auto pMooeeInvDag_shift_norm = &d_MooeeInvDag_shift_norm[0];
auto pMooeeInvDag_shift_lc = &this->d_MooeeInv_shift_lc[0];
auto pMooeeInvDag_shift_norm = &this->d_MooeeInv_shift_norm[0];
acceleratorCopyToDevice(&this->lee[0],&plee[0],Ls*sizeof(Coeff_t));
acceleratorCopyToDevice(&this->dee[0],&pdee[0],Ls*sizeof(Coeff_t));
acceleratorCopyToDevice(&this->uee[0],&puee[0],Ls*sizeof(Coeff_t));
acceleratorCopyToDevice(&this->leem[0],&pleem[0],Ls*sizeof(Coeff_t));
acceleratorCopyToDevice(&this->ueem[0],&pueem[0],Ls*sizeof(Coeff_t));
acceleratorCopyToDevice(&MooeeInvDag_shift_lc[0],&pMooeeInvDag_shift_lc[0],Ls*sizeof(Coeff_t));
acceleratorCopyToDevice(&MooeeInvDag_shift_norm[0],&pMooeeInvDag_shift_norm[0],Ls*sizeof(Coeff_t));
// auto pMooeeInvDag_shift_lc = &MooeeInvDag_shift_lc[0];
// auto pMooeeInvDag_shift_norm = &MooeeInvDag_shift_norm[0];

View File

@ -411,17 +411,18 @@ void PartialFractionFermion5D<Impl>::SetCoefficientsZolotarev(RealD zolo_hi,App
int Ls = this->Ls;
conformable(solution5d.Grid(),this->FermionGrid());
conformable(exported4d.Grid(),this->GaugeGrid());
ExtractSlice(exported4d, solution5d, Ls-1, Ls-1);
ExtractSlice(exported4d, solution5d, Ls-1, 0);
}
template<class Impl>
void PartialFractionFermion5D<Impl>::ImportPhysicalFermionSource(const FermionField &input4d,FermionField &imported5d)
{
//void InsertSlice(const Lattice<vobj> &lowDim,Lattice<vobj> & higherDim,int slice, int orthog)
int Ls = this->Ls;
conformable(imported5d.Grid(),this->FermionGrid());
conformable(input4d.Grid() ,this->GaugeGrid());
FermionField tmp(this->FermionGrid());
tmp=Zero();
InsertSlice(input4d, tmp, Ls-1, Ls-1);
InsertSlice(input4d, tmp, Ls-1, 0);
tmp=Gamma(Gamma::Algebra::Gamma5)*tmp;
this->Dminus(tmp,imported5d);
}

View File

@ -40,6 +40,11 @@ public:
INHERIT_GIMPL_TYPES(Gimpl);
using Action<GaugeField>::S;
using Action<GaugeField>::Sinitial;
using Action<GaugeField>::deriv;
using Action<GaugeField>::refresh;
private:
RealD c_plaq;
RealD c_rect;

View File

@ -118,7 +118,7 @@ static void generatorDiagonal(int diagIndex, iGroupMatrix<cplx> &ta) {
////////////////////////////////////////////////////////////////////////
// Map a su2 subgroup number to the pair of rows that are non zero
////////////////////////////////////////////////////////////////////////
static void su2SubGroupIndex(int &i1, int &i2, int su2_index, GroupName::SU) {
static accelerator_inline void su2SubGroupIndex(int &i1, int &i2, int su2_index, GroupName::SU) {
assert((su2_index >= 0) && (su2_index < (ncolour * (ncolour - 1)) / 2));
int spare = su2_index;

View File

@ -207,7 +207,7 @@ static void generatorZtype(int zIndex, iGroupMatrix<cplx> &ta) {
// Map a su2 subgroup number to the pair of rows that are non zero
////////////////////////////////////////////////////////////////////////
template <ONLY_IF_Sp>
static void su2SubGroupIndex(int &i1, int &i2, int su2_index, GroupName::Sp) {
static accelerator_inline void su2SubGroupIndex(int &i1, int &i2, int su2_index, GroupName::Sp) {
const int nsp=ncolour/2;
assert((su2_index >= 0) && (su2_index < (nsp * (nsp - 1)) / 2));

View File

@ -121,17 +121,22 @@ class CartesianStencilAccelerator {
StencilVector same_node;
Coordinate _simd_layout;
Parameters parameters;
ViewMode mode;
StencilEntry* _entries_p;
StencilEntry* _entries_host_p;
cobj* u_recv_buf_p;
cobj* u_send_buf_p;
accelerator_inline cobj *CommBuf(void) const { return u_recv_buf_p; }
accelerator_inline int GetNodeLocal(int osite,int point) const {
return this->_entries_p[point+this->_npoints*osite]._is_local;
// Not a device function
inline int GetNodeLocal(int osite,int point) const {
StencilEntry SE=this->_entries_host_p[point+this->_npoints*osite];
return SE._is_local;
}
accelerator_inline StencilEntry * GetEntry(int &ptype,int point,int osite) const {
ptype = this->_permute_type[point]; return & this->_entries_p[point+this->_npoints*osite];
ptype = this->_permute_type[point];
return & this->_entries_p[point+this->_npoints*osite];
}
accelerator_inline uint64_t GetInfo(int &ptype,int &local,int &perm,int point,int ent,uint64_t base) const {
@ -164,28 +169,22 @@ class CartesianStencilView : public CartesianStencilAccelerator<vobj,cobj,Parame
{
public:
int *closed;
StencilEntry *cpu_ptr;
ViewMode mode;
// StencilEntry *cpu_ptr;
public:
// default copy constructor
CartesianStencilView (const CartesianStencilView &refer_to_me) = default;
CartesianStencilView (const CartesianStencilAccelerator<vobj,cobj,Parameters> &refer_to_me,ViewMode _mode)
: CartesianStencilAccelerator<vobj,cobj,Parameters>(refer_to_me),
cpu_ptr(this->_entries_p),
mode(_mode)
: CartesianStencilAccelerator<vobj,cobj,Parameters>(refer_to_me)
{
this->_entries_p =(StencilEntry *)
MemoryManager::ViewOpen(this->_entries_p,
this->_npoints*this->_osites*sizeof(StencilEntry),
mode,
AdviseDefault);
this->ViewOpen(_mode);
}
void ViewOpen(ViewMode _mode)
{
this->mode = _mode;
}
void ViewClose(void)
{
MemoryManager::ViewClose(this->cpu_ptr,this->mode);
}
void ViewClose(void) { }
};
@ -274,8 +273,8 @@ public:
std::vector<deviceVector<std::pair<int,int> > > face_table ;
deviceVector<int> surface_list;
std::vector<StencilEntry> _entries; // Resident in host memory
deviceVector<StencilEntry> _entries_device; // Resident in device memory
std::vector<StencilEntry> _entries; // Resident in host memory
deviceVector<StencilEntry> _entries_device; // Resident in device memory
std::vector<Packet> Packets;
std::vector<Merge> Mergers;
std::vector<Merge> MergersSHM;
@ -626,10 +625,10 @@ public:
////////////////////////////////////////
void PrecomputeByteOffsets(void){
for(int i=0;i<_entries.size();i++){
if( _entries[i]._is_local ) {
_entries[i]._byte_offset = _entries[i]._offset*sizeof(vobj);
if( this->_entries[i]._is_local ) {
this->_entries[i]._byte_offset = this->_entries[i]._offset*sizeof(vobj);
} else {
_entries[i]._byte_offset = _entries[i]._offset*sizeof(cobj);
this->_entries[i]._byte_offset = this->_entries[i]._offset*sizeof(cobj);
}
}
};
@ -764,7 +763,13 @@ public:
this->_osites = _grid->oSites();
_entries.resize(this->_npoints* this->_osites);
this->_entries_p = &_entries[0];
_entries_device.resize(this->_npoints* this->_osites);
this->_entries_host_p = &_entries[0];
this->_entries_p = &_entries_device[0];
std::cout << GridLogMessage << " Stencil object allocated for "<<std::dec<<this->_osites
<<" sites table "<<std::hex<<this->_entries_p<< " GridPtr "<<_grid<<std::dec<<std::endl;
for(int ii=0;ii<npoints;ii++){
int i = ii; // reverse direction to get SIMD comms done first
@ -841,6 +846,7 @@ public:
u_simd_send_buf[l] = (cobj *)_grid->ShmBufferMalloc(_unified_buffer_size*sizeof(cobj));
}
PrecomputeByteOffsets();
acceleratorCopyToDevice(&this->_entries[0],&this->_entries_device[0],this->_entries.size()*sizeof(StencilEntry));
}
void Local (int point, int dimension,int shiftpm,int cbmask)
@ -996,10 +1002,10 @@ public:
for(int n=0;n<_grid->_slice_nblock[dimension];n++){
for(int b=0;b<_grid->_slice_block[dimension];b++){
int idx=point+(lo+o+b)*this->_npoints;
_entries[idx]._offset =ro+o+b;
_entries[idx]._permute=permute;
_entries[idx]._is_local=1;
_entries[idx]._around_the_world=wrap;
this->_entries[idx]._offset =ro+o+b;
this->_entries[idx]._permute=permute;
this->_entries[idx]._is_local=1;
this->_entries[idx]._around_the_world=wrap;
}
o +=_grid->_slice_stride[dimension];
}
@ -1017,10 +1023,10 @@ public:
if ( ocb&cbmask ) {
int idx = point+(lo+o+b)*this->_npoints;
_entries[idx]._offset =ro+o+b;
_entries[idx]._is_local=1;
_entries[idx]._permute=permute;
_entries[idx]._around_the_world=wrap;
this->_entries[idx]._offset =ro+o+b;
this->_entries[idx]._is_local=1;
this->_entries[idx]._permute=permute;
this->_entries[idx]._around_the_world=wrap;
}
}
@ -1044,10 +1050,10 @@ public:
for(int n=0;n<_grid->_slice_nblock[dimension];n++){
for(int b=0;b<_grid->_slice_block[dimension];b++){
int idx=point+(so+o+b)*this->_npoints;
_entries[idx]._offset =offset+(bo++);
_entries[idx]._is_local=0;
_entries[idx]._permute=0;
_entries[idx]._around_the_world=wrap;
this->_entries[idx]._offset =offset+(bo++);
this->_entries[idx]._is_local=0;
this->_entries[idx]._permute=0;
this->_entries[idx]._around_the_world=wrap;
}
o +=_grid->_slice_stride[dimension];
}
@ -1064,10 +1070,10 @@ public:
int ocb=1<<_grid->CheckerBoardFromOindex(o+b);// Could easily be a table lookup
if ( ocb & cbmask ) {
int idx = point+(so+o+b)*this->_npoints;
_entries[idx]._offset =offset+(bo++);
_entries[idx]._is_local=0;
_entries[idx]._permute =0;
_entries[idx]._around_the_world=wrap;
this->_entries[idx]._offset =offset+(bo++);
this->_entries[idx]._is_local=0;
this->_entries[idx]._permute =0;
this->_entries[idx]._around_the_world=wrap;
}
}
o +=_grid->_slice_stride[dimension];

View File

@ -39,7 +39,7 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
Coordinate latt_size = GridDefaultLatt();
Coordinate simd_layout( { vComplexD::Nsimd(),1,1,1});
Coordinate simd_layout = GridDefaultSimd(Nd,vComplexD::Nsimd());
Coordinate mpi_layout = GridDefaultMpi();
int vol = 1;
@ -279,6 +279,7 @@ int main (int argc, char ** argv)
result5 = result5 - Kinetic;
std::cout<<"diff "<< norm2(result5)<<std::endl;
assert(norm2(result5)<1.0e-4);
}
@ -357,6 +358,7 @@ int main (int argc, char ** argv)
diff = ref - result4;
std::cout << "result - ref "<<norm2(diff)<<std::endl;
assert(norm2(diff)<1.0e-4);
}
@ -440,6 +442,7 @@ int main (int argc, char ** argv)
diff = ref - result4;
std::cout << "result - ref "<<norm2(diff)<<std::endl;
assert(norm2(diff)<1.0e-4);
}

View File

@ -38,7 +38,7 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
Coordinate latt_size = GridDefaultLatt();
Coordinate simd_layout( { vComplexD::Nsimd(),1,1,1});
Coordinate simd_layout = GridDefaultSimd(Nd,vComplexD::Nsimd());
Coordinate mpi_layout = GridDefaultMpi();
int vol = 1;
@ -74,7 +74,7 @@ int main (int argc, char ** argv)
{
std::cout<<"****************************************"<<std::endl;
std::cout << "Testing PartialFraction Hw kernel Mom space 4d propagator \n";
std::cout << "Testing OverlapWilsonPartialFractionZolotarevFermionD Hw kernel Mom space 4d propagator \n";
std::cout<<"****************************************"<<std::endl;
// LatticeFermionD src(&GRID); gaussian(pRNG,src);
@ -122,6 +122,7 @@ int main (int argc, char ** argv)
MdagMLinearOperator<OverlapWilsonPartialFractionZolotarevFermionD,LatticeFermionD> HermOp(Dov);
ConjugateGradient<LatticeFermionD> CG(1.0e-8,10000);
CG(HermOp,src5,result5);
std::cout << " Solved by Conjugate Gradient (CGNE)" <<std::endl;
////////////////////////////////////////////////////////////////////////
// Domain wall physical field propagator
////////////////////////////////////////////////////////////////////////
@ -153,7 +154,7 @@ int main (int argc, char ** argv)
////////////////////////////////////////////////////
{
std::cout<<"****************************************"<<std::endl;
std::cout << "Testing Dov(Hw) Mom space 4d propagator \n";
std::cout << "Testing OverlapWilsonCayleyTanhFermionD space 4d propagator \n";
std::cout<<"****************************************"<<std::endl;
LatticeFermionD tmp(&GRID);
@ -228,7 +229,7 @@ int main (int argc, char ** argv)
{
std::cout<<"****************************************"<<std::endl;
std::cout << "Testing PartialFraction Hw kernel Mom space 4d propagator with q\n";
std::cout<<"Testing OverlapWilsonPartialFractionZolotarevFermionD Hw kernel Mom space 4d propagator with q\n";
std::cout<<"****************************************"<<std::endl;
// LatticeFermionD src(&GRID); gaussian(pRNG,src);

View File

@ -39,7 +39,8 @@ int main (int argc, char ** argv)
std::cout<<GridLogMessage << "Grid is setup to use "<<threads<<" threads"<<std::endl;
Coordinate latt_size = GridDefaultLatt();
Coordinate simd_layout( { vComplexF::Nsimd(),1,1,1});
Coordinate simd_layout = GridDefaultSimd(Nd,vComplexF::Nsimd());
// Coordinate simd_layout( { vComplexF::Nsimd(),1,1,1});
Coordinate mpi_layout = GridDefaultMpi();
int vol = 1;