1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-04-03 18:55:56 +01:00

Dslash testing for reproduce

This commit is contained in:
Peter Boyle 2024-11-11 23:11:11 +00:00
parent b91fc1b6b4
commit 8fe429346f
5 changed files with 324 additions and 1 deletions

View File

@ -119,6 +119,9 @@ public:
void DhopOE(const FermionField &in, FermionField &out,int dag);
void DhopEO(const FermionField &in, FermionField &out,int dag);
void DhopComms (const FermionField &in, FermionField &out);
void DhopCalc (const FermionField &in, FermionField &out,uint64_t *ids);
// add a DhopComm
// -- suboptimal interface will presently trigger multiple comms.
void DhopDir(const FermionField &in, FermionField &out,int dir,int disp);

View File

@ -57,6 +57,10 @@ public:
int Ls, int Nsite, const FermionField &in, FermionField &out,
int interior=1,int exterior=1) ;
static void DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField &U, SiteHalfSpinor * buf,
int Ls, int Nsite, const FermionField &in, FermionField &out,
uint64_t *ids);
static void DhopDagKernel(int Opt,StencilImpl &st, DoubledGaugeField &U, SiteHalfSpinor * buf,
int Ls, int Nsite, const FermionField &in, FermionField &out,
int interior=1,int exterior=1) ;

View File

@ -438,6 +438,29 @@ void WilsonFermion5D<Impl>::DhopEO(const FermionField &in, FermionField &out,int
DhopInternal(StencilOdd,UmuEven,in,out,dag);
}
template<class Impl>
void WilsonFermion5D<Impl>::DhopComms(const FermionField &in, FermionField &out)
{
int dag =0 ;
conformable(in.Grid(),FermionGrid()); // verifies full grid
conformable(in.Grid(),out.Grid());
out.Checkerboard() = in.Checkerboard();
Compressor compressor(dag);
Stencil.HaloExchangeOpt(in,compressor);
}
template<class Impl>
void WilsonFermion5D<Impl>::DhopCalc(const FermionField &in, FermionField &out,uint64_t *ids)
{
conformable(in.Grid(),FermionGrid()); // verifies full grid
conformable(in.Grid(),out.Grid());
out.Checkerboard() = in.Checkerboard();
int LLs = in.Grid()->_rdimensions[0];
int Opt = WilsonKernelsStatic::Opt;
Kernels::DhopKernel(Opt,Stencil,Umu,Stencil.CommBuf(),LLs,Umu.oSites(),in,out,ids);
}
template<class Impl>
void WilsonFermion5D<Impl>::Dhop(const FermionField &in, FermionField &out,int dag)
{

View File

@ -411,6 +411,46 @@ void WilsonKernels<Impl>::DhopDirKernel( StencilImpl &st, DoubledGaugeField &U,S
#undef LoopBody
}
#ifdef GRID_SYCL
extern "C" {
ulong SYCL_EXTERNAL __attribute__((overloadable)) intel_get_cycle_counter( void );
uint SYCL_EXTERNAL __attribute__((overloadable)) intel_get_active_channel_mask( void );
uint SYCL_EXTERNAL __attribute__((overloadable)) intel_get_grf_register( uint reg );
uint SYCL_EXTERNAL __attribute__((overloadable)) intel_get_flag_register( uint flag );
uint SYCL_EXTERNAL __attribute__((overloadable)) intel_get_control_register( uint reg );
uint SYCL_EXTERNAL __attribute__((overloadable)) intel_get_hw_thread_id( void );
uint SYCL_EXTERNAL __attribute__((overloadable)) intel_get_slice_id( void );
uint SYCL_EXTERNAL __attribute__((overloadable)) intel_get_subslice_id( void );
uint SYCL_EXTERNAL __attribute__((overloadable)) intel_get_eu_id( void );
uint SYCL_EXTERNAL __attribute__((overloadable)) intel_get_eu_thread_id( void );
void SYCL_EXTERNAL __attribute__((overloadable)) intel_eu_thread_pause( uint value );
}
#ifdef GRID_SIMT
#define MAKE_ID(A) (intel_get_eu_id()<<16)|(intel_get_slice_id()<<8)|(intel_get_subslice_id())
#else
#define MAKE_ID(A) (0)
#endif
#else
#define MAKE_ID(A) (0)
#endif
#define KERNEL_CALL_ID(A) \
const uint64_t NN = Nsite*Ls; \
accelerator_forNB( ss, NN, Simd::Nsimd(), { \
int sF = ss; \
int sU = ss/Ls; \
WilsonKernels<Impl>::A(st_v,U_v,buf,sF,sU,in_v,out_v); \
const int Nsimd = SiteHalfSpinor::Nsimd(); \
const int lane=acceleratorSIMTlane(Nsimd); \
int idx=sF*Nsimd+lane; \
uint64_t id = MAKE_ID(); \
ids[idx]=id; \
}); \
accelerator_barrier();
#define KERNEL_CALLNB(A) \
const uint64_t NN = Nsite*Ls; \
@ -418,7 +458,7 @@ void WilsonKernels<Impl>::DhopDirKernel( StencilImpl &st, DoubledGaugeField &U,S
int sF = ss; \
int sU = ss/Ls; \
WilsonKernels<Impl>::A(st_v,U_v,buf,sF,sU,in_v,out_v); \
});
});
#define KERNEL_CALL(A) KERNEL_CALLNB(A); accelerator_barrier();
@ -451,6 +491,8 @@ void WilsonKernels<Impl>::DhopDirKernel( StencilImpl &st, DoubledGaugeField &U,S
WilsonKernels<Impl>::A(st_v,U_v,buf,sF,sU,Ls,1,in_v,out_v); \
});}
template <class Impl>
void WilsonKernels<Impl>::DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField &U, SiteHalfSpinor * buf,
int Ls, int Nsite, const FermionField &in, FermionField &out,
@ -485,6 +527,18 @@ void WilsonKernels<Impl>::DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField
}
assert(0 && " Kernel optimisation case not covered ");
}
template <class Impl>
void WilsonKernels<Impl>::DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField &U, SiteHalfSpinor * buf,
int Ls, int Nsite, const FermionField &in, FermionField &out,
uint64_t *ids)
{
autoView(U_v , U,AcceleratorRead);
autoView(in_v , in,AcceleratorRead);
autoView(out_v,out,AcceleratorWrite);
autoView(st_v , st,AcceleratorRead);
KERNEL_CALL_ID(GenericDhopSite);
}
template <class Impl>
void WilsonKernels<Impl>::DhopDagKernel(int Opt,StencilImpl &st, DoubledGaugeField &U, SiteHalfSpinor * buf,
int Ls, int Nsite, const FermionField &in, FermionField &out,

View File

@ -0,0 +1,239 @@
/*************************************************************************************
Grid physics library, www.github.com/paboyle/Grid
Source file: ./tests/Test_dwf_cg_prec.cc
Copyright (C) 2015
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
See the full license in the file "LICENSE" in the top level distribution directory
*************************************************************************************/
/* END LEGAL */
#include <Grid/Grid.h>
using namespace std;
using namespace Grid;
#ifndef HOST_NAME_MAX
#define HOST_NAME_MAX _POSIX_HOST_NAME_MAX
#endif
typedef LatticeFermionD FermionField;
int VerifyOnDevice(const FermionField &res, FermionField &ref)
{
deviceVector<int> Fails(1);
int * Fail = &Fails[0];
int FailHost=0;
typedef typename FermionField::vector_object vobj;
typedef typename vobj::scalar_type scalar_type;
typedef typename vobj::vector_type vector_type;
const uint64_t NN = res.Grid()->oSites();
acceleratorPut(*Fail,FailHost);
accelerator_barrier();
// Inject an error
int injection=0;
if(getenv("GRID_ERROR_INJECT")) injection=1;
autoView(res_v,res,AcceleratorWrite);
autoView(ref_v,ref,AcceleratorRead);
if ( res.Grid()->ThisRank()== 0 )
{
if (((random()&0xF)==0)&&injection) {
uint64_t sF = random()%(NN);
int lane=0;
printf("Error injection site %ld on rank %d\n",sF,res.Grid()->ThisRank());
auto vv = acceleratorGet(res_v[sF]);
double *dd = (double *)&vv;
*dd=M_PI;
acceleratorPut(res_v[sF],vv);
}
}
accelerator_for( sF, NN, vobj::Nsimd(), {
#ifdef GRID_SIMT
{
int blane = acceleratorSIMTlane(vobj::Nsimd());
#else
for(int blane;blane<vobj::Nsimd();blane++){
#endif
vector_type *vtrr = (vector_type *)&res_v[sF];
vector_type *vtrf = (vector_type *)&ref_v[sF];
int words = sizeof(vobj)/sizeof(vector_type);
for(int w=0;w<words;w++){
scalar_type rrtmp = getlane(vtrr[w], blane);
scalar_type rftmp = getlane(vtrf[w], blane);
if ( rrtmp != rftmp) {
*Fail=1;
}
}
}
});
FailHost = acceleratorGet(*Fail);
return FailHost;
}
void PrintFails(const FermionField &res, FermionField &ref,uint64_t *ids)
{
typedef typename FermionField::vector_object vobj;
const int Nsimd=vobj::Nsimd();
const uint64_t NN = res.Grid()->oSites();
///////////////////////////////
// Pull back to host
///////////////////////////////
autoView(res_v,res,CpuRead);
autoView(ref_v,ref,CpuRead);
std::vector<uint64_t> ids_host(NN*Nsimd);
acceleratorCopyFromDevice(ids,&ids_host[0],NN*Nsimd*sizeof(uint64_t));
//////////////////////////////////////////////////////////////
// Redo check on host and print IDs
//////////////////////////////////////////////////////////////
for(int ss=0;ss< NN; ss++){
int sF = ss;
for(int lane=0;lane<Nsimd;lane++){
auto rr = extractLane(lane,res_v[sF]);
auto rf = extractLane(lane,ref_v[sF]);
uint64_t id = ids_host[lane+Nsimd*sF];
// std::cout << GridHostname()<<" id["<<sF<<"] lane "<<lane<<" id "<<id<<std::endl;
for(int s=0;s<4;s++){
for(int c=0;c<3;c++){
if ( rr()(s)(c)!=rf()(s)(c) ) {
int subslice=(id>>0 )&0xFF;
int slice =(id>>8 )&0xFF;
int eu =(id>>16)&0xFF;
std::cout << GridHostname()<<" miscompare site "<<sF<<" "<<rr()(s)(c)<<" "<<rf()(s)(c)<<" EU "<<eu<<" slice "<<slice<<" subslice "<<subslice<<std::endl;
}
}
}
}
};
return;
}
int main (int argc, char ** argv)
{
char hostname[HOST_NAME_MAX+1];
gethostname(hostname, HOST_NAME_MAX+1);
std::string host(hostname);
Grid_init(&argc,&argv);
const int Ls=12;
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(), GridDefaultSimd(Nd,vComplexD::Nsimd()),GridDefaultMpi());
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
std::vector<int> seeds4({1,2,3,4});
std::vector<int> seeds5({5,6,7,8});
GridParallelRNG RNG5(FGrid); RNG5.SeedFixedIntegers(seeds5);
GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
LatticeGaugeField Umu(UGrid);
LatticeFermionD src(FGrid); random(RNG5,src);
LatticeFermionD junk(FGrid); random(RNG5,junk);
LatticeFermionD result(FGrid); result=Zero();
LatticeFermionD ref(FGrid); ref=Zero();
SU<Nc>::HotConfiguration(RNG4,Umu);
RealD mass=0.1;
RealD M5=1.8;
DomainWallFermionD Ddwf(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5);
int nsecs=600;
if( GridCmdOptionExists(argv,argv+argc,"--seconds") ){
std::string arg = GridCmdOptionPayload(argv,argv+argc,"--seconds");
GridCmdOptionInt(arg,nsecs);
}
std::cout << GridLogMessage << "::::::::::::: Job startup Barrier " << std::endl;
UGrid->Barrier();
std::cout << GridLogMessage << "::::::::::::: Job startup Barrier complete" << std::endl;
std::cout << GridLogMessage << "::::::::::::: Starting DWF repro for "<<nsecs <<" seconds" << std::endl;
time_t now;
time_t start = time(NULL);
UGrid->Broadcast(0,(void *)&start,sizeof(start));
FlightRecorder::ContinueOnFail = 0;
FlightRecorder::PrintEntireLog = 0;
FlightRecorder::ChecksumComms = 0;
FlightRecorder::ChecksumCommsSend=0;
if(char *s=getenv("GRID_PRINT_ENTIRE_LOG")) FlightRecorder::PrintEntireLog = atoi(s);
if(char *s=getenv("GRID_CHECKSUM_RECV_BUF")) FlightRecorder::ChecksumComms = atoi(s);
if(char *s=getenv("GRID_CHECKSUM_SEND_BUF")) FlightRecorder::ChecksumCommsSend = atoi(s);
const uint64_t NN = FGrid->oSites()*vComplexD::Nsimd();
deviceVector<uint64_t> ids_device(NN);
uint64_t *ids = &ids_device[0];
Ddwf.DhopComms(src,ref);
Ddwf.DhopCalc(src,ref,ids);
Ddwf.DhopComms(src,result);
int iter=0;
do {
result=junk;
Ddwf.DhopCalc(src,result,ids);
if ( VerifyOnDevice(result, ref) ) {
printf("Node %s Iter %d detected fails\n",GridHostname(),iter);
PrintFails(result,ref,ids);
// std::cout << " Dslash "<<iter<<" is WRONG! "<<std::endl;
}
//else {
// printf("Node %s Iter %d detected NO fails\n",GridHostname(),iter);
// PrintFails(result,ref,ids);
// std::cout << " Dslash "<<iter<<" is OK! "<<std::endl;
//}
iter ++;
now = time(NULL); UGrid->Broadcast(0,(void *)&now,sizeof(now));
} while (now < (start + nsecs) );
Grid_finalize();
}