1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-22 17:52:02 +01:00

Compare commits

..

1 Commits

Author SHA1 Message Date
5d7e0d18b9 virtual destructor for LinearOperator 2023-04-07 14:30:38 +01:00
12 changed files with 37 additions and 40 deletions

View File

@ -542,6 +542,7 @@ public:
(*this)(in[i], out[i]); (*this)(in[i], out[i]);
} }
} }
virtual ~LinearFunction(){};
}; };
template<class Field> class IdentityLinearFunction : public LinearFunction<Field> { template<class Field> class IdentityLinearFunction : public LinearFunction<Field> {

View File

@ -166,16 +166,16 @@ public:
rsqf[s] =rsq[s]; rsqf[s] =rsq[s];
std::cout<<GridLogMessage<<"ConjugateGradientMultiShiftMixedPrecCleanup: shift "<< s <<" target resid "<<rsq[s]<<std::endl; std::cout<<GridLogMessage<<"ConjugateGradientMultiShiftMixedPrecCleanup: shift "<< s <<" target resid "<<rsq[s]<<std::endl;
// ps_d[s] = src_d; // ps_d[s] = src_d;
precisionChange(ps_f[s],src_d); precisionChangeFast(ps_f[s],src_d);
} }
// r and p for primary // r and p for primary
p_d = src_d; //primary copy --- make this a reference to ps_d to save axpys p_d = src_d; //primary copy --- make this a reference to ps_d to save axpys
r_d = p_d; r_d = p_d;
//MdagM+m[0] //MdagM+m[0]
precisionChange(p_f,p_d); precisionChangeFast(p_f,p_d);
Linop_f.HermOpAndNorm(p_f,mmp_f,d,qq); // mmp = MdagM p d=real(dot(p, mmp)), qq=norm2(mmp) Linop_f.HermOpAndNorm(p_f,mmp_f,d,qq); // mmp = MdagM p d=real(dot(p, mmp)), qq=norm2(mmp)
precisionChange(tmp_d,mmp_f); precisionChangeFast(tmp_d,mmp_f);
Linop_d.HermOpAndNorm(p_d,mmp_d,d,qq); // mmp = MdagM p d=real(dot(p, mmp)), qq=norm2(mmp) Linop_d.HermOpAndNorm(p_d,mmp_d,d,qq); // mmp = MdagM p d=real(dot(p, mmp)), qq=norm2(mmp)
tmp_d = tmp_d - mmp_d; tmp_d = tmp_d - mmp_d;
std::cout << " Testing operators match "<<norm2(mmp_d)<<" f "<<norm2(mmp_f)<<" diff "<< norm2(tmp_d)<<std::endl; std::cout << " Testing operators match "<<norm2(mmp_d)<<" f "<<norm2(mmp_f)<<" diff "<< norm2(tmp_d)<<std::endl;
@ -204,7 +204,7 @@ public:
for(int s=0;s<nshift;s++) { for(int s=0;s<nshift;s++) {
axpby(psi_d[s],0.,-bs[s]*alpha[s],src_d,src_d); axpby(psi_d[s],0.,-bs[s]*alpha[s],src_d,src_d);
precisionChange(psi_f[s],psi_d[s]); precisionChangeFast(psi_f[s],psi_d[s]);
} }
/////////////////////////////////////// ///////////////////////////////////////
@ -225,7 +225,7 @@ public:
AXPYTimer.Stop(); AXPYTimer.Stop();
PrecChangeTimer.Start(); PrecChangeTimer.Start();
precisionChange(r_f, r_d); precisionChangeFast(r_f, r_d);
PrecChangeTimer.Stop(); PrecChangeTimer.Stop();
AXPYTimer.Start(); AXPYTimer.Start();
@ -243,13 +243,13 @@ public:
cp=c; cp=c;
PrecChangeTimer.Start(); PrecChangeTimer.Start();
precisionChange(p_f, p_d); //get back single prec search direction for linop precisionChangeFast(p_f, p_d); //get back single prec search direction for linop
PrecChangeTimer.Stop(); PrecChangeTimer.Stop();
MatrixTimer.Start(); MatrixTimer.Start();
Linop_f.HermOp(p_f,mmp_f); Linop_f.HermOp(p_f,mmp_f);
MatrixTimer.Stop(); MatrixTimer.Stop();
PrecChangeTimer.Start(); PrecChangeTimer.Start();
precisionChange(mmp_d, mmp_f); // From Float to Double precisionChangeFast(mmp_d, mmp_f); // From Float to Double
PrecChangeTimer.Stop(); PrecChangeTimer.Stop();
d=real(innerProduct(p_d,mmp_d)); d=real(innerProduct(p_d,mmp_d));
@ -311,7 +311,7 @@ public:
SolverTimer.Stop(); SolverTimer.Stop();
for(int s=0;s<nshift;s++){ for(int s=0;s<nshift;s++){
precisionChange(psi_d[s],psi_f[s]); precisionChangeFast(psi_d[s],psi_f[s]);
} }

View File

@ -211,7 +211,7 @@ public:
Linop_d.HermOpAndNorm(p_d,mmp_d,d,qq); // mmp = MdagM p d=real(dot(p, mmp)), qq=norm2(mmp) Linop_d.HermOpAndNorm(p_d,mmp_d,d,qq); // mmp = MdagM p d=real(dot(p, mmp)), qq=norm2(mmp)
tmp_d = tmp_d - mmp_d; tmp_d = tmp_d - mmp_d;
std::cout << " Testing operators match "<<norm2(mmp_d)<<" f "<<norm2(mmp_f)<<" diff "<< norm2(tmp_d)<<std::endl; std::cout << " Testing operators match "<<norm2(mmp_d)<<" f "<<norm2(mmp_f)<<" diff "<< norm2(tmp_d)<<std::endl;
assert(norm2(tmp_d)< 1.0); // assert(norm2(tmp_d)< 1.0e-4);
axpy(mmp_d,mass[0],p_d,mmp_d); axpy(mmp_d,mass[0],p_d,mmp_d);
RealD rn = norm2(p_d); RealD rn = norm2(p_d);

View File

@ -507,7 +507,6 @@ public:
} }
this->face_table_computed=1; this->face_table_computed=1;
assert(this->u_comm_offset==this->_unified_buffer_size); assert(this->u_comm_offset==this->_unified_buffer_size);
accelerator_barrier();
} }
}; };

View File

@ -332,7 +332,8 @@ void WilsonFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl & st, Lebesg
///////////////////////////// /////////////////////////////
{ {
GRID_TRACE("Gather"); GRID_TRACE("Gather");
st.HaloExchangeOptGather(in,compressor); // Put the barrier in the routine st.HaloExchangeOptGather(in,compressor);
accelerator_barrier();
} }
std::vector<std::vector<CommsRequest_t> > requests; std::vector<std::vector<CommsRequest_t> > requests;

View File

@ -428,10 +428,9 @@ void WilsonKernels<Impl>::DhopDirKernel( StencilImpl &st, DoubledGaugeField &U,S
auto ptr = &st.surface_list[0]; \ auto ptr = &st.surface_list[0]; \
accelerator_forNB( ss, sz, Simd::Nsimd(), { \ accelerator_forNB( ss, sz, Simd::Nsimd(), { \
int sF = ptr[ss]; \ int sF = ptr[ss]; \
int sU = sF/Ls; \ int sU = ss/Ls; \
WilsonKernels<Impl>::A(st_v,U_v,buf,sF,sU,in_v,out_v); \ WilsonKernels<Impl>::A(st_v,U_v,buf,sF,sU,in_v,out_v); \
}); \ });
accelerator_barrier();
#define ASM_CALL(A) \ #define ASM_CALL(A) \
thread_for( sss, Nsite, { \ thread_for( sss, Nsite, { \
@ -475,10 +474,9 @@ void WilsonKernels<Impl>::DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteInt); return;} if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteInt); return;}
#endif #endif
} else if( exterior ) { } else if( exterior ) {
// dependent on result of merge
acceleratorFenceComputeStream(); acceleratorFenceComputeStream();
if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL_EXT(GenericDhopSiteExt); return;} if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSiteExt); return;}
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL_EXT(HandDhopSiteExt); return;} if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSiteExt); return;}
#ifndef GRID_CUDA #ifndef GRID_CUDA
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteExt); return;} if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteExt); return;}
#endif #endif
@ -508,10 +506,9 @@ void WilsonKernels<Impl>::DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteDagInt); return;} if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteDagInt); return;}
#endif #endif
} else if( exterior ) { } else if( exterior ) {
// Dependent on result of merge
acceleratorFenceComputeStream(); acceleratorFenceComputeStream();
if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL_EXT(GenericDhopSiteDagExt); return;} if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSiteDagExt); return;}
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL_EXT(HandDhopSiteDagExt); return;} if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSiteDagExt); return;}
#ifndef GRID_CUDA #ifndef GRID_CUDA
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteDagExt); return;} if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteDagExt); return;}
#endif #endif

View File

@ -53,10 +53,9 @@ NAMESPACE_BEGIN(Grid);
Integer ReliableUpdateFreq; Integer ReliableUpdateFreq;
protected: protected:
//Action evaluation
//Allow derived classes to override the multishift CG //Allow derived classes to override the multishift CG
virtual void multiShiftInverse(bool numerator, const MultiShiftFunction &approx, const Integer MaxIter, const FermionFieldD &in, FermionFieldD &out){ virtual void multiShiftInverse(bool numerator, const MultiShiftFunction &approx, const Integer MaxIter, const FermionFieldD &in, FermionFieldD &out){
#if 1 #if 0
SchurDifferentiableOperator<ImplD> schurOp(numerator ? NumOpD : DenOpD); SchurDifferentiableOperator<ImplD> schurOp(numerator ? NumOpD : DenOpD);
ConjugateGradientMultiShift<FermionFieldD> msCG(MaxIter, approx); ConjugateGradientMultiShift<FermionFieldD> msCG(MaxIter, approx);
msCG(schurOp,in, out); msCG(schurOp,in, out);
@ -71,7 +70,6 @@ NAMESPACE_BEGIN(Grid);
msCG(schurOpD, in, out); msCG(schurOpD, in, out);
#endif #endif
} }
//Force evaluation
virtual void multiShiftInverse(bool numerator, const MultiShiftFunction &approx, const Integer MaxIter, const FermionFieldD &in, std::vector<FermionFieldD> &out_elems, FermionFieldD &out){ virtual void multiShiftInverse(bool numerator, const MultiShiftFunction &approx, const Integer MaxIter, const FermionFieldD &in, std::vector<FermionFieldD> &out_elems, FermionFieldD &out){
SchurDifferentiableOperator<ImplD> schurOpD(numerator ? NumOpD : DenOpD); SchurDifferentiableOperator<ImplD> schurOpD(numerator ? NumOpD : DenOpD);
SchurDifferentiableOperator<ImplF> schurOpF (numerator ? NumOpF : DenOpF); SchurDifferentiableOperator<ImplF> schurOpF (numerator ? NumOpF : DenOpF);
@ -86,15 +84,20 @@ NAMESPACE_BEGIN(Grid);
virtual void ImportGauge(const typename ImplD::GaugeField &Ud){ virtual void ImportGauge(const typename ImplD::GaugeField &Ud){
typename ImplF::GaugeField Uf(NumOpF.GaugeGrid()); typename ImplF::GaugeField Uf(NumOpF.GaugeGrid());
typename ImplD::GaugeField Ud2(NumOpD.GaugeGrid());
precisionChange(Uf, Ud); precisionChange(Uf, Ud);
precisionChange(Ud2, Ud);
std::cout << "Importing "<<norm2(Ud)<<" "<< norm2(Uf)<<" " <<std::endl; std::cout << "Importing "<<norm2(Ud)<<" "<< norm2(Uf)<<" " << norm2(Ud2)<<std::endl;
NumOpD.ImportGauge(Ud); NumOpD.ImportGauge(Ud);
DenOpD.ImportGauge(Ud); DenOpD.ImportGauge(Ud);
NumOpF.ImportGauge(Uf); NumOpF.ImportGauge(Uf);
DenOpF.ImportGauge(Uf); DenOpF.ImportGauge(Uf);
NumOpD.ImportGauge(Ud2);
DenOpD.ImportGauge(Ud2);
} }
public: public:

View File

@ -348,7 +348,7 @@ public:
//////////////////////////////////////// ////////////////////////////////////////
// Stencil query // Stencil query
//////////////////////////////////////// ////////////////////////////////////////
#if 1 #ifdef SHM_FAST_PATH
inline int SameNode(int point) { inline int SameNode(int point) {
int dimension = this->_directions[point]; int dimension = this->_directions[point];
@ -434,6 +434,7 @@ public:
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
void CommunicateBegin(std::vector<std::vector<CommsRequest_t> > &reqs) void CommunicateBegin(std::vector<std::vector<CommsRequest_t> > &reqs)
{ {
accelerator_barrier();
for(int i=0;i<Packets.size();i++){ for(int i=0;i<Packets.size();i++){
_grid->StencilSendToRecvFromBegin(MpiReqs, _grid->StencilSendToRecvFromBegin(MpiReqs,
Packets[i].send_buf, Packets[i].send_buf,
@ -665,9 +666,11 @@ public:
for(int i=0;i<mm.size();i++){ for(int i=0;i<mm.size();i++){
decompressor::MergeFace(decompress,mm[i]); decompressor::MergeFace(decompress,mm[i]);
} }
if ( mm.size() ) acceleratorFenceComputeStream();
for(int i=0;i<dd.size();i++){ for(int i=0;i<dd.size();i++){
decompressor::DecompressFace(decompress,dd[i]); decompressor::DecompressFace(decompress,dd[i]);
} }
if ( dd.size() ) acceleratorFenceComputeStream();
} }
//////////////////////////////////////// ////////////////////////////////////////
// Set up routines // Set up routines
@ -705,7 +708,6 @@ public:
} }
} }
} }
std::cout << "BuildSurfaceList size is "<<surface_list.size()<<std::endl;
} }
/// Introduce a block structure and switch off comms on boundaries /// Introduce a block structure and switch off comms on boundaries
void DirichletBlock(const Coordinate &dirichlet_block) void DirichletBlock(const Coordinate &dirichlet_block)

View File

@ -526,7 +526,7 @@ inline void acceleratorFreeCpu (void *ptr){free(ptr);};
////////////////////////////////////////////// //////////////////////////////////////////////
#ifdef GRID_SYCL #ifdef GRID_SYCL
inline void acceleratorFenceComputeStream(void){ theGridAccelerator->ext_oneapi_submit_barrier(); }; inline void acceleratorFenceComputeStream(void){ theGridAccelerator->submit_barrier();};
#else #else
// Ordering within a stream guaranteed on Nvidia & AMD // Ordering within a stream guaranteed on Nvidia & AMD
inline void acceleratorFenceComputeStream(void){ }; inline void acceleratorFenceComputeStream(void){ };

View File

@ -227,7 +227,7 @@ int main(int argc, char **argv) {
// std::vector<Real> hasenbusch({ light_mass, 0.005, 0.0145, 0.045, 0.108, 0.25, 0.51 , pv_mass }); // Updated // std::vector<Real> hasenbusch({ light_mass, 0.005, 0.0145, 0.045, 0.108, 0.25, 0.51 , pv_mass }); // Updated
// std::vector<Real> hasenbusch({ light_mass, 0.0145, 0.045, 0.108, 0.25, 0.51 , 0.75 , pv_mass }); // std::vector<Real> hasenbusch({ light_mass, 0.0145, 0.045, 0.108, 0.25, 0.51 , 0.75 , pv_mass });
int SP_iters=9000; int SP_iters=10000;
RationalActionParams OFRp; // Up/down RationalActionParams OFRp; // Up/down
OFRp.lo = 6.0e-5; OFRp.lo = 6.0e-5;
@ -362,12 +362,12 @@ int main(int argc, char **argv) {
// Probably dominates the force - back to EOFA. // Probably dominates the force - back to EOFA.
OneFlavourRationalParams SFRp; OneFlavourRationalParams SFRp;
SFRp.lo = 0.1; SFRp.lo = 0.25;
SFRp.hi = 25.0; SFRp.hi = 25.0;
SFRp.MaxIter = 10000; SFRp.MaxIter = 10000;
SFRp.tolerance= 1.0e-8; SFRp.tolerance= 1.0e-5;
SFRp.mdtolerance= 2.0e-4; SFRp.mdtolerance= 2.0e-4;
SFRp.degree = 12; SFRp.degree = 8;
SFRp.precision= 50; SFRp.precision= 50;
MobiusEOFAFermionD Strange_Op_L (U , *FGrid , *FrbGrid , *GridPtr , *GridRBPtr , strange_mass, strange_mass, pv_mass, 0.0, -1, M5, b, c); MobiusEOFAFermionD Strange_Op_L (U , *FGrid , *FrbGrid , *GridPtr , *GridRBPtr , strange_mass, strange_mass, pv_mass, 0.0, -1, M5, b, c);

View File

@ -425,7 +425,7 @@ void Benchmark(int Ls, Coordinate Dirichlet)
err = r_eo-result; err = r_eo-result;
n2e= norm2(err); n2e= norm2(err);
std::cout<<GridLogMessage << "norm diff "<< n2e<<std::endl; std::cout<<GridLogMessage << "norm diff "<< n2e<< " Line "<<__LINE__ <<std::endl;
assert(n2e<1.0e-4); assert(n2e<1.0e-4);
pickCheckerboard(Even,src_e,err); pickCheckerboard(Even,src_e,err);

View File

@ -3,14 +3,8 @@ export https_proxy=http://proxy-chain.intel.com:911
export LD_LIBRARY_PATH=$HOME/prereqs/lib/:$LD_LIBRARY_PATH export LD_LIBRARY_PATH=$HOME/prereqs/lib/:$LD_LIBRARY_PATH
module load intel-release module load intel-release
module load intel-comp-rt/embargo-ci-neo source /opt/intel/oneapi/PVC_setup.sh
#source /opt/intel/oneapi/PVC_setup.sh
#source /opt/intel/oneapi/ATS_setup.sh #source /opt/intel/oneapi/ATS_setup.sh
#module load intel-nightly/20230331
#module load intel-comp-rt/ci-neo-master/026093
#module load intel/mpich
module load intel/mpich/pvc45.3 module load intel/mpich/pvc45.3
export PATH=~/ATS/pti-gpu/tools/onetrace/:$PATH export PATH=~/ATS/pti-gpu/tools/onetrace/:$PATH