mirror of
https://github.com/paboyle/Grid.git
synced 2025-06-21 17:22:03 +01:00
Compare commits
20 Commits
hotfix/vir
...
3984265851
Author | SHA1 | Date | |
---|---|---|---|
3984265851 | |||
45361d188f | |||
80c9d77e02 | |||
3aff64dddb | |||
b4f2ca81ff | |||
d1dea5f840 | |||
54f8b84d16 | |||
da503fef0e | |||
4a6802098a | |||
f9b41a84d2 | |||
9e64387933 | |||
983b681d46 | |||
86dac5ff4f | |||
4a382fad3f | |||
cc753670d9 | |||
cc9d88ea1c | |||
b281b0166e | |||
6a21f694ff | |||
39214702f6 | |||
3e4614c63a |
54
.github/ISSUE_TEMPLATE/bug-report.yml
vendored
Normal file
54
.github/ISSUE_TEMPLATE/bug-report.yml
vendored
Normal file
@ -0,0 +1,54 @@
|
||||
name: Bug report
|
||||
description: Report a bug.
|
||||
title: "<insert title>"
|
||||
labels: [bug]
|
||||
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: >
|
||||
Thank you for taking the time to file a bug report.
|
||||
Please check that the code is pointing to the HEAD of develop
|
||||
or any commit in master which is tagged with a version number.
|
||||
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: "Describe the issue:"
|
||||
description: >
|
||||
Describe the issue and any previous attempt to solve it.
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: "Code example:"
|
||||
description: >
|
||||
If relevant, show how to reproduce the issue using a minimal working
|
||||
example.
|
||||
placeholder: |
|
||||
<< your code here >>
|
||||
render: shell
|
||||
validations:
|
||||
required: false
|
||||
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: "Target platform:"
|
||||
description: >
|
||||
Give a description of the target platform (CPU, network, compiler).
|
||||
Please give the full CPU part description, using for example
|
||||
`cat /proc/cpuinfo | grep 'model name' | uniq` (Linux)
|
||||
or `sysctl machdep.cpu.brand_string` (macOS) and the full output
|
||||
the `--version` option of your compiler.
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: "Configure options:"
|
||||
description: >
|
||||
Please give the exact configure command used and attach
|
||||
`config.log`, `grid.config.summary` and the output of `make V=1`.
|
||||
render: shell
|
||||
validations:
|
||||
required: true
|
@ -166,16 +166,16 @@ public:
|
||||
rsqf[s] =rsq[s];
|
||||
std::cout<<GridLogMessage<<"ConjugateGradientMultiShiftMixedPrecCleanup: shift "<< s <<" target resid "<<rsq[s]<<std::endl;
|
||||
// ps_d[s] = src_d;
|
||||
precisionChangeFast(ps_f[s],src_d);
|
||||
precisionChange(ps_f[s],src_d);
|
||||
}
|
||||
// r and p for primary
|
||||
p_d = src_d; //primary copy --- make this a reference to ps_d to save axpys
|
||||
r_d = p_d;
|
||||
|
||||
//MdagM+m[0]
|
||||
precisionChangeFast(p_f,p_d);
|
||||
precisionChange(p_f,p_d);
|
||||
Linop_f.HermOpAndNorm(p_f,mmp_f,d,qq); // mmp = MdagM p d=real(dot(p, mmp)), qq=norm2(mmp)
|
||||
precisionChangeFast(tmp_d,mmp_f);
|
||||
precisionChange(tmp_d,mmp_f);
|
||||
Linop_d.HermOpAndNorm(p_d,mmp_d,d,qq); // mmp = MdagM p d=real(dot(p, mmp)), qq=norm2(mmp)
|
||||
tmp_d = tmp_d - mmp_d;
|
||||
std::cout << " Testing operators match "<<norm2(mmp_d)<<" f "<<norm2(mmp_f)<<" diff "<< norm2(tmp_d)<<std::endl;
|
||||
@ -204,7 +204,7 @@ public:
|
||||
|
||||
for(int s=0;s<nshift;s++) {
|
||||
axpby(psi_d[s],0.,-bs[s]*alpha[s],src_d,src_d);
|
||||
precisionChangeFast(psi_f[s],psi_d[s]);
|
||||
precisionChange(psi_f[s],psi_d[s]);
|
||||
}
|
||||
|
||||
///////////////////////////////////////
|
||||
@ -225,7 +225,7 @@ public:
|
||||
AXPYTimer.Stop();
|
||||
|
||||
PrecChangeTimer.Start();
|
||||
precisionChangeFast(r_f, r_d);
|
||||
precisionChange(r_f, r_d);
|
||||
PrecChangeTimer.Stop();
|
||||
|
||||
AXPYTimer.Start();
|
||||
@ -243,13 +243,13 @@ public:
|
||||
|
||||
cp=c;
|
||||
PrecChangeTimer.Start();
|
||||
precisionChangeFast(p_f, p_d); //get back single prec search direction for linop
|
||||
precisionChange(p_f, p_d); //get back single prec search direction for linop
|
||||
PrecChangeTimer.Stop();
|
||||
MatrixTimer.Start();
|
||||
Linop_f.HermOp(p_f,mmp_f);
|
||||
MatrixTimer.Stop();
|
||||
PrecChangeTimer.Start();
|
||||
precisionChangeFast(mmp_d, mmp_f); // From Float to Double
|
||||
precisionChange(mmp_d, mmp_f); // From Float to Double
|
||||
PrecChangeTimer.Stop();
|
||||
|
||||
d=real(innerProduct(p_d,mmp_d));
|
||||
@ -311,7 +311,7 @@ public:
|
||||
SolverTimer.Stop();
|
||||
|
||||
for(int s=0;s<nshift;s++){
|
||||
precisionChangeFast(psi_d[s],psi_f[s]);
|
||||
precisionChange(psi_d[s],psi_f[s]);
|
||||
}
|
||||
|
||||
|
||||
|
@ -211,7 +211,7 @@ public:
|
||||
Linop_d.HermOpAndNorm(p_d,mmp_d,d,qq); // mmp = MdagM p d=real(dot(p, mmp)), qq=norm2(mmp)
|
||||
tmp_d = tmp_d - mmp_d;
|
||||
std::cout << " Testing operators match "<<norm2(mmp_d)<<" f "<<norm2(mmp_f)<<" diff "<< norm2(tmp_d)<<std::endl;
|
||||
// assert(norm2(tmp_d)< 1.0e-4);
|
||||
assert(norm2(tmp_d)< 1.0);
|
||||
|
||||
axpy(mmp_d,mass[0],p_d,mmp_d);
|
||||
RealD rn = norm2(p_d);
|
||||
|
@ -519,7 +519,6 @@ void MemoryManager::Audit(std::string s)
|
||||
uint64_t LruBytes1=0;
|
||||
uint64_t LruBytes2=0;
|
||||
uint64_t LruCnt=0;
|
||||
uint64_t LockedBytes=0;
|
||||
|
||||
std::cout << " Memory Manager::Audit() from "<<s<<std::endl;
|
||||
for(auto it=LRU.begin();it!=LRU.end();it++){
|
||||
|
@ -170,10 +170,7 @@ void GlobalSharedMemory::OptimalCommunicator(const Coordinate &processors,Grid_M
|
||||
if(nscan==3 && HPEhypercube ) OptimalCommunicatorHypercube(processors,optimal_comm,SHM);
|
||||
else OptimalCommunicatorSharedMemory(processors,optimal_comm,SHM);
|
||||
}
|
||||
static inline int divides(int a,int b)
|
||||
{
|
||||
return ( b == ( (b/a)*a ) );
|
||||
}
|
||||
|
||||
void GlobalSharedMemory::OptimalCommunicatorHypercube(const Coordinate &processors,Grid_MPI_Comm & optimal_comm,Coordinate &SHM)
|
||||
{
|
||||
////////////////////////////////////////////////////////////////
|
||||
|
@ -507,6 +507,7 @@ public:
|
||||
}
|
||||
this->face_table_computed=1;
|
||||
assert(this->u_comm_offset==this->_unified_buffer_size);
|
||||
accelerator_barrier();
|
||||
}
|
||||
|
||||
};
|
||||
|
@ -332,8 +332,7 @@ void WilsonFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl & st, Lebesg
|
||||
/////////////////////////////
|
||||
{
|
||||
GRID_TRACE("Gather");
|
||||
st.HaloExchangeOptGather(in,compressor);
|
||||
accelerator_barrier();
|
||||
st.HaloExchangeOptGather(in,compressor); // Put the barrier in the routine
|
||||
}
|
||||
|
||||
std::vector<std::vector<CommsRequest_t> > requests;
|
||||
|
@ -428,9 +428,10 @@ void WilsonKernels<Impl>::DhopDirKernel( StencilImpl &st, DoubledGaugeField &U,S
|
||||
auto ptr = &st.surface_list[0]; \
|
||||
accelerator_forNB( ss, sz, Simd::Nsimd(), { \
|
||||
int sF = ptr[ss]; \
|
||||
int sU = ss/Ls; \
|
||||
int sU = sF/Ls; \
|
||||
WilsonKernels<Impl>::A(st_v,U_v,buf,sF,sU,in_v,out_v); \
|
||||
});
|
||||
}); \
|
||||
accelerator_barrier();
|
||||
|
||||
#define ASM_CALL(A) \
|
||||
thread_for( sss, Nsite, { \
|
||||
@ -474,9 +475,10 @@ void WilsonKernels<Impl>::DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField
|
||||
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteInt); return;}
|
||||
#endif
|
||||
} else if( exterior ) {
|
||||
// dependent on result of merge
|
||||
acceleratorFenceComputeStream();
|
||||
if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSiteExt); return;}
|
||||
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSiteExt); return;}
|
||||
if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL_EXT(GenericDhopSiteExt); return;}
|
||||
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL_EXT(HandDhopSiteExt); return;}
|
||||
#ifndef GRID_CUDA
|
||||
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteExt); return;}
|
||||
#endif
|
||||
@ -506,9 +508,10 @@ void WilsonKernels<Impl>::DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField
|
||||
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteDagInt); return;}
|
||||
#endif
|
||||
} else if( exterior ) {
|
||||
// Dependent on result of merge
|
||||
acceleratorFenceComputeStream();
|
||||
if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSiteDagExt); return;}
|
||||
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSiteDagExt); return;}
|
||||
if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL_EXT(GenericDhopSiteDagExt); return;}
|
||||
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL_EXT(HandDhopSiteDagExt); return;}
|
||||
#ifndef GRID_CUDA
|
||||
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteDagExt); return;}
|
||||
#endif
|
||||
|
@ -53,9 +53,10 @@ NAMESPACE_BEGIN(Grid);
|
||||
Integer ReliableUpdateFreq;
|
||||
protected:
|
||||
|
||||
//Action evaluation
|
||||
//Allow derived classes to override the multishift CG
|
||||
virtual void multiShiftInverse(bool numerator, const MultiShiftFunction &approx, const Integer MaxIter, const FermionFieldD &in, FermionFieldD &out){
|
||||
#if 0
|
||||
#if 1
|
||||
SchurDifferentiableOperator<ImplD> schurOp(numerator ? NumOpD : DenOpD);
|
||||
ConjugateGradientMultiShift<FermionFieldD> msCG(MaxIter, approx);
|
||||
msCG(schurOp,in, out);
|
||||
@ -70,9 +71,10 @@ NAMESPACE_BEGIN(Grid);
|
||||
msCG(schurOpD, in, out);
|
||||
#endif
|
||||
}
|
||||
//Force evaluation
|
||||
virtual void multiShiftInverse(bool numerator, const MultiShiftFunction &approx, const Integer MaxIter, const FermionFieldD &in, std::vector<FermionFieldD> &out_elems, FermionFieldD &out){
|
||||
SchurDifferentiableOperator<ImplD> schurOpD(numerator ? NumOpD : DenOpD);
|
||||
SchurDifferentiableOperator<ImplF> schurOpF (numerator ? NumOpF : DenOpF);
|
||||
SchurDifferentiableOperator<ImplF> schurOpF(numerator ? NumOpF : DenOpF);
|
||||
|
||||
FermionFieldD inD(NumOpD.FermionRedBlackGrid());
|
||||
FermionFieldD outD(NumOpD.FermionRedBlackGrid());
|
||||
@ -84,20 +86,15 @@ NAMESPACE_BEGIN(Grid);
|
||||
virtual void ImportGauge(const typename ImplD::GaugeField &Ud){
|
||||
|
||||
typename ImplF::GaugeField Uf(NumOpF.GaugeGrid());
|
||||
typename ImplD::GaugeField Ud2(NumOpD.GaugeGrid());
|
||||
precisionChange(Uf, Ud);
|
||||
precisionChange(Ud2, Ud);
|
||||
|
||||
std::cout << "Importing "<<norm2(Ud)<<" "<< norm2(Uf)<<" " << norm2(Ud2)<<std::endl;
|
||||
std::cout << "Importing "<<norm2(Ud)<<" "<< norm2(Uf)<<" " <<std::endl;
|
||||
|
||||
NumOpD.ImportGauge(Ud);
|
||||
DenOpD.ImportGauge(Ud);
|
||||
|
||||
NumOpF.ImportGauge(Uf);
|
||||
DenOpF.ImportGauge(Uf);
|
||||
|
||||
NumOpD.ImportGauge(Ud2);
|
||||
DenOpD.ImportGauge(Ud2);
|
||||
}
|
||||
|
||||
public:
|
||||
|
@ -348,7 +348,7 @@ public:
|
||||
////////////////////////////////////////
|
||||
// Stencil query
|
||||
////////////////////////////////////////
|
||||
#ifdef SHM_FAST_PATH
|
||||
#if 1
|
||||
inline int SameNode(int point) {
|
||||
|
||||
int dimension = this->_directions[point];
|
||||
@ -434,7 +434,6 @@ public:
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
void CommunicateBegin(std::vector<std::vector<CommsRequest_t> > &reqs)
|
||||
{
|
||||
accelerator_barrier();
|
||||
for(int i=0;i<Packets.size();i++){
|
||||
_grid->StencilSendToRecvFromBegin(MpiReqs,
|
||||
Packets[i].send_buf,
|
||||
@ -666,11 +665,9 @@ public:
|
||||
for(int i=0;i<mm.size();i++){
|
||||
decompressor::MergeFace(decompress,mm[i]);
|
||||
}
|
||||
if ( mm.size() ) acceleratorFenceComputeStream();
|
||||
for(int i=0;i<dd.size();i++){
|
||||
decompressor::DecompressFace(decompress,dd[i]);
|
||||
}
|
||||
if ( dd.size() ) acceleratorFenceComputeStream();
|
||||
}
|
||||
////////////////////////////////////////
|
||||
// Set up routines
|
||||
@ -708,6 +705,7 @@ public:
|
||||
}
|
||||
}
|
||||
}
|
||||
std::cout << "BuildSurfaceList size is "<<surface_list.size()<<std::endl;
|
||||
}
|
||||
/// Introduce a block structure and switch off comms on boundaries
|
||||
void DirichletBlock(const Coordinate &dirichlet_block)
|
||||
@ -1369,10 +1367,11 @@ public:
|
||||
int recv_from_rank;
|
||||
int xmit_to_rank;
|
||||
int shm_send=0;
|
||||
int shm_recv=0;
|
||||
|
||||
_grid->ShiftedRanks(dimension,nbr_proc,xmit_to_rank,recv_from_rank);
|
||||
#ifdef SHM_FAST_PATH
|
||||
#warning STENCIL SHM FAST PATH SELECTED
|
||||
int shm_recv=0;
|
||||
// shm == receive pointer if offnode
|
||||
// shm == Translate[send pointer] if on node -- my view of his send pointer
|
||||
cobj *shm = (cobj *) _grid->ShmBufferTranslate(recv_from_rank,sp);
|
||||
@ -1405,7 +1404,6 @@ public:
|
||||
acceleratorMemSet(rp,0,bytes); // Zero prefill comms buffer to zero
|
||||
}
|
||||
int do_send = (comms_send|comms_partial_send) && (!shm_send );
|
||||
int do_recv = (comms_send|comms_partial_send) && (!shm_recv );
|
||||
AddPacket((void *)sp,(void *)rp,
|
||||
xmit_to_rank,do_send,
|
||||
recv_from_rank,do_send,
|
||||
|
@ -133,7 +133,6 @@ typename vobj::scalar_object extractLane(int lane, const vobj & __restrict__ vec
|
||||
typedef scalar_type * pointer;
|
||||
|
||||
constexpr int words=sizeof(vobj)/sizeof(vector_type);
|
||||
constexpr int Nsimd=vector_type::Nsimd();
|
||||
|
||||
scalar_object extracted;
|
||||
pointer __restrict__ sp = (pointer)&extracted; // Type pun
|
||||
@ -153,7 +152,6 @@ void insertLane(int lane, vobj & __restrict__ vec,const typename vobj::scalar_ob
|
||||
typedef scalar_type * pointer;
|
||||
|
||||
constexpr int words=sizeof(vobj)/sizeof(vector_type);
|
||||
constexpr int Nsimd=vector_type::Nsimd();
|
||||
|
||||
pointer __restrict__ sp = (pointer)&extracted;
|
||||
vector_type *vp = (vector_type *)&vec;
|
||||
@ -178,8 +176,6 @@ void extract(const vobj &vec,const ExtractPointerArray<sobj> &extracted, int off
|
||||
const int s = Nsimd/Nextr;
|
||||
|
||||
vector_type * vp = (vector_type *)&vec;
|
||||
scalar_type vtmp;
|
||||
sobj_scalar_type stmp;
|
||||
for(int w=0;w<words;w++){
|
||||
for(int i=0;i<Nextr;i++){
|
||||
sobj_scalar_type * pointer = (sobj_scalar_type *)& extracted[i][offset];
|
||||
@ -205,7 +201,6 @@ void merge(vobj &vec,const ExtractPointerArray<sobj> &extracted, int offset)
|
||||
|
||||
vector_type * vp = (vector_type *)&vec;
|
||||
scalar_type vtmp;
|
||||
sobj_scalar_type stmp;
|
||||
for(int w=0;w<words;w++){
|
||||
for(int i=0;i<Nextr;i++){
|
||||
sobj_scalar_type * pointer = (sobj_scalar_type *)& extracted[i][offset];
|
||||
@ -242,9 +237,6 @@ void copyLane(vobjOut & __restrict__ vecOut, int lane_out, const vobjIn & __rest
|
||||
typedef oextract_type * opointer;
|
||||
typedef iextract_type * ipointer;
|
||||
|
||||
constexpr int oNsimd=ovector_type::Nsimd();
|
||||
constexpr int iNsimd=ivector_type::Nsimd();
|
||||
|
||||
iscalar_type itmp;
|
||||
oscalar_type otmp;
|
||||
|
||||
|
@ -526,7 +526,7 @@ inline void acceleratorFreeCpu (void *ptr){free(ptr);};
|
||||
//////////////////////////////////////////////
|
||||
|
||||
#ifdef GRID_SYCL
|
||||
inline void acceleratorFenceComputeStream(void){ theGridAccelerator->submit_barrier();};
|
||||
inline void acceleratorFenceComputeStream(void){ theGridAccelerator->ext_oneapi_submit_barrier(); };
|
||||
#else
|
||||
// Ordering within a stream guaranteed on Nvidia & AMD
|
||||
inline void acceleratorFenceComputeStream(void){ };
|
||||
|
@ -227,7 +227,7 @@ int main(int argc, char **argv) {
|
||||
// std::vector<Real> hasenbusch({ light_mass, 0.005, 0.0145, 0.045, 0.108, 0.25, 0.51 , pv_mass }); // Updated
|
||||
// std::vector<Real> hasenbusch({ light_mass, 0.0145, 0.045, 0.108, 0.25, 0.51 , 0.75 , pv_mass });
|
||||
|
||||
int SP_iters=10000;
|
||||
int SP_iters=9000;
|
||||
|
||||
RationalActionParams OFRp; // Up/down
|
||||
OFRp.lo = 6.0e-5;
|
||||
@ -362,12 +362,12 @@ int main(int argc, char **argv) {
|
||||
|
||||
// Probably dominates the force - back to EOFA.
|
||||
OneFlavourRationalParams SFRp;
|
||||
SFRp.lo = 0.25;
|
||||
SFRp.lo = 0.1;
|
||||
SFRp.hi = 25.0;
|
||||
SFRp.MaxIter = 10000;
|
||||
SFRp.tolerance= 1.0e-5;
|
||||
SFRp.tolerance= 1.0e-8;
|
||||
SFRp.mdtolerance= 2.0e-4;
|
||||
SFRp.degree = 8;
|
||||
SFRp.degree = 12;
|
||||
SFRp.precision= 50;
|
||||
|
||||
MobiusEOFAFermionD Strange_Op_L (U , *FGrid , *FrbGrid , *GridPtr , *GridRBPtr , strange_mass, strange_mass, pv_mass, 0.0, -1, M5, b, c);
|
||||
|
@ -329,7 +329,6 @@ int main(int argc, char **argv) {
|
||||
|
||||
|
||||
auto grid4= GridPtr;
|
||||
auto rbgrid4= GridRBPtr;
|
||||
auto rbgrid = StrangeOp.FermionRedBlackGrid();
|
||||
auto grid = StrangeOp.FermionGrid();
|
||||
if(1){
|
||||
|
@ -425,7 +425,7 @@ void Benchmark(int Ls, Coordinate Dirichlet)
|
||||
|
||||
err = r_eo-result;
|
||||
n2e= norm2(err);
|
||||
std::cout<<GridLogMessage << "norm diff "<< n2e<< " Line "<<__LINE__ <<std::endl;
|
||||
std::cout<<GridLogMessage << "norm diff "<< n2e<<std::endl;
|
||||
assert(n2e<1.0e-4);
|
||||
|
||||
pickCheckerboard(Even,src_e,err);
|
||||
|
@ -3,8 +3,14 @@ export https_proxy=http://proxy-chain.intel.com:911
|
||||
export LD_LIBRARY_PATH=$HOME/prereqs/lib/:$LD_LIBRARY_PATH
|
||||
|
||||
module load intel-release
|
||||
source /opt/intel/oneapi/PVC_setup.sh
|
||||
module load intel-comp-rt/embargo-ci-neo
|
||||
|
||||
#source /opt/intel/oneapi/PVC_setup.sh
|
||||
#source /opt/intel/oneapi/ATS_setup.sh
|
||||
#module load intel-nightly/20230331
|
||||
#module load intel-comp-rt/ci-neo-master/026093
|
||||
|
||||
#module load intel/mpich
|
||||
module load intel/mpich/pvc45.3
|
||||
export PATH=~/ATS/pti-gpu/tools/onetrace/:$PATH
|
||||
|
||||
|
Reference in New Issue
Block a user