mirror of
https://github.com/paboyle/Grid.git
synced 2025-06-21 17:22:03 +01:00
Compare commits
11 Commits
4a6802098a
...
3aff64dddb
Author | SHA1 | Date | |
---|---|---|---|
3aff64dddb | |||
b4f2ca81ff | |||
d1dea5f840 | |||
54f8b84d16 | |||
da503fef0e | |||
86dac5ff4f | |||
4a382fad3f | |||
cc753670d9 | |||
cc9d88ea1c | |||
b281b0166e | |||
6a21f694ff |
@ -507,6 +507,7 @@ public:
|
||||
}
|
||||
this->face_table_computed=1;
|
||||
assert(this->u_comm_offset==this->_unified_buffer_size);
|
||||
accelerator_barrier();
|
||||
}
|
||||
|
||||
};
|
||||
|
@ -332,8 +332,7 @@ void WilsonFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl & st, Lebesg
|
||||
/////////////////////////////
|
||||
{
|
||||
GRID_TRACE("Gather");
|
||||
st.HaloExchangeOptGather(in,compressor);
|
||||
accelerator_barrier();
|
||||
st.HaloExchangeOptGather(in,compressor); // Put the barrier in the routine
|
||||
}
|
||||
|
||||
std::vector<std::vector<CommsRequest_t> > requests;
|
||||
|
@ -428,9 +428,10 @@ void WilsonKernels<Impl>::DhopDirKernel( StencilImpl &st, DoubledGaugeField &U,S
|
||||
auto ptr = &st.surface_list[0]; \
|
||||
accelerator_forNB( ss, sz, Simd::Nsimd(), { \
|
||||
int sF = ptr[ss]; \
|
||||
int sU = ss/Ls; \
|
||||
int sU = sF/Ls; \
|
||||
WilsonKernels<Impl>::A(st_v,U_v,buf,sF,sU,in_v,out_v); \
|
||||
});
|
||||
}); \
|
||||
accelerator_barrier();
|
||||
|
||||
#define ASM_CALL(A) \
|
||||
thread_for( sss, Nsite, { \
|
||||
@ -474,9 +475,10 @@ void WilsonKernels<Impl>::DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField
|
||||
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteInt); return;}
|
||||
#endif
|
||||
} else if( exterior ) {
|
||||
// dependent on result of merge
|
||||
acceleratorFenceComputeStream();
|
||||
if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSiteExt); return;}
|
||||
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSiteExt); return;}
|
||||
if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL_EXT(GenericDhopSiteExt); return;}
|
||||
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL_EXT(HandDhopSiteExt); return;}
|
||||
#ifndef GRID_CUDA
|
||||
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteExt); return;}
|
||||
#endif
|
||||
@ -506,9 +508,10 @@ void WilsonKernels<Impl>::DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField
|
||||
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteDagInt); return;}
|
||||
#endif
|
||||
} else if( exterior ) {
|
||||
// Dependent on result of merge
|
||||
acceleratorFenceComputeStream();
|
||||
if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSiteDagExt); return;}
|
||||
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSiteDagExt); return;}
|
||||
if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL_EXT(GenericDhopSiteDagExt); return;}
|
||||
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL_EXT(HandDhopSiteDagExt); return;}
|
||||
#ifndef GRID_CUDA
|
||||
if (Opt == WilsonKernelsStatic::OptInlineAsm ) { ASM_CALL(AsmDhopSiteDagExt); return;}
|
||||
#endif
|
||||
|
@ -348,7 +348,7 @@ public:
|
||||
////////////////////////////////////////
|
||||
// Stencil query
|
||||
////////////////////////////////////////
|
||||
#ifdef SHM_FAST_PATH
|
||||
#if 1
|
||||
inline int SameNode(int point) {
|
||||
|
||||
int dimension = this->_directions[point];
|
||||
@ -434,7 +434,6 @@ public:
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
void CommunicateBegin(std::vector<std::vector<CommsRequest_t> > &reqs)
|
||||
{
|
||||
accelerator_barrier();
|
||||
for(int i=0;i<Packets.size();i++){
|
||||
_grid->StencilSendToRecvFromBegin(MpiReqs,
|
||||
Packets[i].send_buf,
|
||||
@ -666,11 +665,9 @@ public:
|
||||
for(int i=0;i<mm.size();i++){
|
||||
decompressor::MergeFace(decompress,mm[i]);
|
||||
}
|
||||
if ( mm.size() ) acceleratorFenceComputeStream();
|
||||
for(int i=0;i<dd.size();i++){
|
||||
decompressor::DecompressFace(decompress,dd[i]);
|
||||
}
|
||||
if ( dd.size() ) acceleratorFenceComputeStream();
|
||||
}
|
||||
////////////////////////////////////////
|
||||
// Set up routines
|
||||
@ -708,6 +705,7 @@ public:
|
||||
}
|
||||
}
|
||||
}
|
||||
std::cout << "BuildSurfaceList size is "<<surface_list.size()<<std::endl;
|
||||
}
|
||||
/// Introduce a block structure and switch off comms on boundaries
|
||||
void DirichletBlock(const Coordinate &dirichlet_block)
|
||||
|
@ -526,7 +526,7 @@ inline void acceleratorFreeCpu (void *ptr){free(ptr);};
|
||||
//////////////////////////////////////////////
|
||||
|
||||
#ifdef GRID_SYCL
|
||||
inline void acceleratorFenceComputeStream(void){ theGridAccelerator->submit_barrier();};
|
||||
inline void acceleratorFenceComputeStream(void){ theGridAccelerator->ext_oneapi_submit_barrier(); };
|
||||
#else
|
||||
// Ordering within a stream guaranteed on Nvidia & AMD
|
||||
inline void acceleratorFenceComputeStream(void){ };
|
||||
|
@ -425,7 +425,7 @@ void Benchmark(int Ls, Coordinate Dirichlet)
|
||||
|
||||
err = r_eo-result;
|
||||
n2e= norm2(err);
|
||||
std::cout<<GridLogMessage << "norm diff "<< n2e<< " Line "<<__LINE__ <<std::endl;
|
||||
std::cout<<GridLogMessage << "norm diff "<< n2e<<std::endl;
|
||||
assert(n2e<1.0e-4);
|
||||
|
||||
pickCheckerboard(Even,src_e,err);
|
||||
|
@ -3,8 +3,14 @@ export https_proxy=http://proxy-chain.intel.com:911
|
||||
export LD_LIBRARY_PATH=$HOME/prereqs/lib/:$LD_LIBRARY_PATH
|
||||
|
||||
module load intel-release
|
||||
source /opt/intel/oneapi/PVC_setup.sh
|
||||
module load intel-comp-rt/embargo-ci-neo
|
||||
|
||||
#source /opt/intel/oneapi/PVC_setup.sh
|
||||
#source /opt/intel/oneapi/ATS_setup.sh
|
||||
#module load intel-nightly/20230331
|
||||
#module load intel-comp-rt/ci-neo-master/026093
|
||||
|
||||
#module load intel/mpich
|
||||
module load intel/mpich/pvc45.3
|
||||
export PATH=~/ATS/pti-gpu/tools/onetrace/:$PATH
|
||||
|
||||
|
Reference in New Issue
Block a user