1
0
mirror of https://github.com/paboyle/Grid.git synced 2025-06-24 18:52:02 +01:00

Compare commits

..

12 Commits

Author SHA1 Message Date
67f569354e Partial dirichlet changes 2022-11-30 15:51:13 -05:00
5fa573dfd3 partial send fix 2022-11-25 00:51:04 -05:00
f6402cb6c4 AUDIT removal 2022-11-25 00:50:33 -05:00
bae6c263dc Audit 2022-11-25 00:47:01 -05:00
d71672dca9 Bug fix 2022-11-25 00:46:35 -05:00
121c9e2ceb Tracing 2022-11-25 00:45:21 -05:00
63a30ae34f Tracing 2022-11-25 00:45:05 -05:00
7d8231ba32 Tracing 2022-11-25 00:44:57 -05:00
b690b1cbe9 Audit 2022-11-25 00:43:57 -05:00
c0fb20fc03 Audit check for wrongly locked data 2022-11-25 00:43:12 -05:00
bc9579dac6 Old code path removed 2022-11-25 00:40:45 -05:00
a5c77f8b95 Tracing moved in order 2022-11-25 00:40:27 -05:00
19 changed files with 204 additions and 72 deletions

View File

@ -44,10 +44,10 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
#include <Grid/GridStd.h>
#include <Grid/threads/Pragmas.h>
#include <Grid/perfmon/Timer.h>
#include <Grid/perfmon/Tracing.h>
//#include <Grid/perfmon/PerfCount.h>
#include <Grid/util/Util.h>
#include <Grid/log/Log.h>
#include <Grid/perfmon/Tracing.h>
#include <Grid/allocator/Allocator.h>
#include <Grid/simd/Simd.h>
#include <Grid/threads/ThreadReduction.h>

View File

@ -258,26 +258,12 @@ public:
for(int n=2;n<order;n++){
Linop.HermOp(*Tn,y);
#if 0
auto y_v = y.View();
auto Tn_v = Tn->View();
auto Tnp_v = Tnp->View();
auto Tnm_v = Tnm->View();
constexpr int Nsimd = vector_type::Nsimd();
accelerator_for(ss, in.Grid()->oSites(), Nsimd, {
coalescedWrite(y_v[ss],xscale*y_v(ss)+mscale*Tn_v(ss));
coalescedWrite(Tnp_v[ss],2.0*y_v(ss)-Tnm_v(ss));
});
if ( Coeffs[n] != 0.0) {
axpy(out,Coeffs[n],*Tnp,out);
}
#else
axpby(y,xscale,mscale,y,(*Tn));
axpby(*Tnp,2.0,-1.0,y,(*Tnm));
if ( Coeffs[n] != 0.0) {
axpy(out,Coeffs[n],*Tnp,out);
}
#endif
// Cycle pointers to avoid copies
Field *swizzle = Tnm;
Tnm =Tn;

View File

@ -36,6 +36,11 @@ NAMESPACE_BEGIN(Grid);
#define GRID_ALLOC_SMALL_LIMIT (4096)
#define STRINGIFY(x) #x
#define TOSTRING(x) STRINGIFY(x)
#define FILE_LINE __FILE__ ":" TOSTRING(__LINE__)
#define AUDIT(a) MemoryManager::Audit(FILE_LINE)
/*Pinning pages is costly*/
////////////////////////////////////////////////////////////////////////////
// Advise the LatticeAccelerator class
@ -94,6 +99,7 @@ private:
static void PrintBytes(void);
public:
static void Audit(std::string s);
static void Init(void);
static void InitMessage(void);
static void *AcceleratorAllocate(size_t bytes);

View File

@ -8,9 +8,8 @@ NAMESPACE_BEGIN(Grid);
static char print_buffer [ MAXLINE ];
#define mprintf(...) snprintf (print_buffer,MAXLINE, __VA_ARGS__ ); std::cout << GridLogMemory << print_buffer;
//#define dprintf(...) printf (__VA_ARGS__ ); fflush(stdout);
#define dprintf(...)
#define dprintf(...) snprintf (print_buffer,MAXLINE, __VA_ARGS__ ); std::cout << GridLogMemory << print_buffer;
//#define dprintf(...)
////////////////////////////////////////////////////////////
@ -132,9 +131,11 @@ void MemoryManager::Evict(AcceleratorViewEntry &AccCache)
///////////////////////////////////////////////////////////////////////////
assert(AccCache.state!=Empty);
mprintf("MemoryManager: Evict(%lx) %lx\n",(uint64_t)AccCache.CpuPtr,(uint64_t)AccCache.AccPtr);
assert(AccCache.accLock==0);
assert(AccCache.cpuLock==0);
mprintf("MemoryManager: Evict cpu %lx acc %lx cpuLock %ld accLock %ld\n",
(uint64_t)AccCache.CpuPtr,(uint64_t)AccCache.AccPtr,
(uint64_t)AccCache.cpuLock,(uint64_t)AccCache.accLock);
if (AccCache.accLock!=0) return;
if (AccCache.cpuLock!=0) return;
if(AccCache.state==AccDirty) {
Flush(AccCache);
}
@ -197,6 +198,7 @@ void MemoryManager::CpuDiscard(AcceleratorViewEntry &AccCache)
void MemoryManager::ViewClose(void* Ptr,ViewMode mode)
{
if( (mode==AcceleratorRead)||(mode==AcceleratorWrite)||(mode==AcceleratorWriteDiscard) ){
dprintf("AcceleratorViewClose %lx\n",(uint64_t)Ptr);
AcceleratorViewClose((uint64_t)Ptr);
} else if( (mode==CpuRead)||(mode==CpuWrite)){
CpuViewClose((uint64_t)Ptr);
@ -208,6 +210,7 @@ void *MemoryManager::ViewOpen(void* _CpuPtr,size_t bytes,ViewMode mode,ViewAdvis
{
uint64_t CpuPtr = (uint64_t)_CpuPtr;
if( (mode==AcceleratorRead)||(mode==AcceleratorWrite)||(mode==AcceleratorWriteDiscard) ){
dprintf("AcceleratorViewOpen %lx\n",(uint64_t)CpuPtr);
return (void *) AcceleratorViewOpen(CpuPtr,bytes,mode,hint);
} else if( (mode==CpuRead)||(mode==CpuWrite)){
return (void *)CpuViewOpen(CpuPtr,bytes,mode,hint);
@ -247,11 +250,12 @@ uint64_t MemoryManager::AcceleratorViewOpen(uint64_t CpuPtr,size_t bytes,ViewMod
assert(AccCache.cpuLock==0); // Programming error
if(AccCache.state!=Empty) {
dprintf("ViewOpen found entry %lx %lx : %ld %ld\n",
dprintf("ViewOpen found entry %lx %lx : %ld %ld accLock %ld\n",
(uint64_t)AccCache.CpuPtr,
(uint64_t)CpuPtr,
(uint64_t)AccCache.bytes,
(uint64_t)bytes);
(uint64_t)bytes,
(uint64_t)AccCache.accLock);
assert(AccCache.CpuPtr == CpuPtr);
assert(AccCache.bytes ==bytes);
}
@ -286,6 +290,7 @@ uint64_t MemoryManager::AcceleratorViewOpen(uint64_t CpuPtr,size_t bytes,ViewMod
AccCache.state = Consistent; // Empty + AccRead => Consistent
}
AccCache.accLock= 1;
dprintf("Copied Empty entry into device accLock= %d\n",AccCache.accLock);
} else if(AccCache.state==CpuDirty ){
if(mode==AcceleratorWriteDiscard) {
CpuDiscard(AccCache);
@ -298,21 +303,21 @@ uint64_t MemoryManager::AcceleratorViewOpen(uint64_t CpuPtr,size_t bytes,ViewMod
AccCache.state = Consistent; // CpuDirty + AccRead => Consistent
}
AccCache.accLock++;
dprintf("Copied CpuDirty entry into device accLock %d\n",AccCache.accLock);
dprintf("CpuDirty entry into device ++accLock= %d\n",AccCache.accLock);
} else if(AccCache.state==Consistent) {
if((mode==AcceleratorWrite)||(mode==AcceleratorWriteDiscard))
AccCache.state = AccDirty; // Consistent + AcceleratorWrite=> AccDirty
else
AccCache.state = Consistent; // Consistent + AccRead => Consistent
AccCache.accLock++;
dprintf("Consistent entry into device accLock %d\n",AccCache.accLock);
dprintf("Consistent entry into device ++accLock= %d\n",AccCache.accLock);
} else if(AccCache.state==AccDirty) {
if((mode==AcceleratorWrite)||(mode==AcceleratorWriteDiscard))
AccCache.state = AccDirty; // AccDirty + AcceleratorWrite=> AccDirty
else
AccCache.state = AccDirty; // AccDirty + AccRead => AccDirty
AccCache.accLock++;
dprintf("AccDirty entry into device accLock %d\n",AccCache.accLock);
dprintf("AccDirty entry ++accLock= %d\n",AccCache.accLock);
} else {
assert(0);
}
@ -320,6 +325,7 @@ uint64_t MemoryManager::AcceleratorViewOpen(uint64_t CpuPtr,size_t bytes,ViewMod
// If view is opened on device remove from LRU
if(AccCache.LRU_valid==1){
// must possibly remove from LRU as now locked on GPU
dprintf("AccCache entry removed from LRU \n");
LRUremove(AccCache);
}
@ -340,10 +346,12 @@ void MemoryManager::AcceleratorViewClose(uint64_t CpuPtr)
assert(AccCache.accLock>0);
AccCache.accLock--;
// Move to LRU queue if not locked and close on device
if(AccCache.accLock==0) {
dprintf("AccleratorViewClose %lx AccLock decremented to %ld move to LRU queue\n",(uint64_t)CpuPtr,(uint64_t)AccCache.accLock);
LRUinsert(AccCache);
} else {
dprintf("AccleratorViewClose %lx AccLock decremented to %ld\n",(uint64_t)CpuPtr,(uint64_t)AccCache.accLock);
}
}
void MemoryManager::CpuViewClose(uint64_t CpuPtr)
@ -479,6 +487,29 @@ int MemoryManager::isOpen (void* _CpuPtr)
return 0;
}
}
void MemoryManager::Audit(std::string s)
{
for(auto it=AccViewTable.begin();it!=AccViewTable.end();it++){
auto &AccCache = it->second;
std::string str;
if ( AccCache.state==Empty ) str = std::string("Empty");
if ( AccCache.state==CpuDirty ) str = std::string("CpuDirty");
if ( AccCache.state==AccDirty ) str = std::string("AccDirty");
if ( AccCache.state==Consistent)str = std::string("Consistent");
if ( AccCache.cpuLock || AccCache.accLock ) {
std::cout << GridLogError << s<< "\n\t 0x"<<std::hex<<AccCache.CpuPtr<<std::dec
<< "\t0x"<<std::hex<<AccCache.AccPtr<<std::dec<<"\t" <<str
<< "\t cpuLock " << AccCache.cpuLock
<< "\t accLock " << AccCache.accLock
<< "\t LRUvalid " << AccCache.LRU_valid<<std::endl;
}
assert( AccCache.cpuLock== 0 ) ;
assert( AccCache.accLock== 0 ) ;
}
}
void MemoryManager::PrintState(void* _CpuPtr)
{

View File

@ -13,6 +13,7 @@ uint64_t MemoryManager::DeviceToHostBytes;
uint64_t MemoryManager::HostToDeviceXfer;
uint64_t MemoryManager::DeviceToHostXfer;
void MemoryManager::Audit(void){};
void MemoryManager::ViewClose(void* AccPtr,ViewMode mode){};
void *MemoryManager::ViewOpen(void* CpuPtr,size_t bytes,ViewMode mode,ViewAdvise hint){ return CpuPtr; };
int MemoryManager::isOpen (void* CpuPtr) { return 0;}

View File

@ -66,6 +66,7 @@ GridLogger GridLogError (1, "Error" , GridLogColours, "RED");
GridLogger GridLogWarning(1, "Warning", GridLogColours, "YELLOW");
GridLogger GridLogMessage(1, "Message", GridLogColours, "NORMAL");
GridLogger GridLogMemory (1, "Memory", GridLogColours, "NORMAL");
GridLogger GridLogTracing(1, "Tracing", GridLogColours, "NORMAL");
GridLogger GridLogDebug (1, "Debug", GridLogColours, "PURPLE");
GridLogger GridLogPerformance(1, "Performance", GridLogColours, "GREEN");
GridLogger GridLogDslash (1, "Dslash", GridLogColours, "BLUE");
@ -77,7 +78,8 @@ void GridLogConfigure(std::vector<std::string> &logstreams) {
GridLogError.Active(1);
GridLogWarning.Active(0);
GridLogMessage.Active(1); // at least the messages should be always on
GridLogMemory.Active(0); // at least the messages should be always on
GridLogMemory.Active(0);
GridLogTracing.Active(0);
GridLogIterative.Active(0);
GridLogDebug.Active(0);
GridLogPerformance.Active(0);
@ -87,6 +89,7 @@ void GridLogConfigure(std::vector<std::string> &logstreams) {
GridLogHMC.Active(1);
for (int i = 0; i < logstreams.size(); i++) {
if (logstreams[i] == std::string("Tracing")) GridLogTracing.Active(1);
if (logstreams[i] == std::string("Memory")) GridLogMemory.Active(1);
if (logstreams[i] == std::string("Warning")) GridLogWarning.Active(1);
if (logstreams[i] == std::string("NoMessage")) GridLogMessage.Active(0);
@ -94,8 +97,8 @@ void GridLogConfigure(std::vector<std::string> &logstreams) {
if (logstreams[i] == std::string("Debug")) GridLogDebug.Active(1);
if (logstreams[i] == std::string("Performance")) GridLogPerformance.Active(1);
if (logstreams[i] == std::string("Dslash")) GridLogDslash.Active(1);
if (logstreams[i] == std::string("NoIntegrator")) GridLogIntegrator.Active(0);
if (logstreams[i] == std::string("NoHMC")) GridLogHMC.Active(0);
if (logstreams[i] == std::string("NoIntegrator"))GridLogIntegrator.Active(0);
if (logstreams[i] == std::string("NoHMC")) GridLogHMC.Active(0);
if (logstreams[i] == std::string("Colours")) GridLogColours.Active(1);
}
}

View File

@ -186,6 +186,7 @@ extern GridLogger GridLogIterative ;
extern GridLogger GridLogIntegrator ;
extern GridLogger GridLogHMC;
extern GridLogger GridLogMemory;
extern GridLogger GridLogTracing;
extern Colours GridLogColours;
std::string demangle(const char* name) ;

View File

@ -1,4 +1,7 @@
#pragma once
NAMESPACE_BEGIN(Grid);
#ifdef GRID_TRACING_NVTX
#include <nvToolsExt.h>
class GridTracer {
@ -64,3 +67,4 @@ inline void traceStop(int ID) { }
#else
#define GRID_TRACE(name) GridTracer uniq_name_using_macros##__COUNTER__(name);
#endif
NAMESPACE_END(Grid);

View File

@ -55,14 +55,18 @@ public:
deriv_num=0;
}
void deriv_log(RealD nrm, RealD max,RealD Fdt_nrm,RealD Fdt_max) {
deriv_max_sum+=max;
if ( max > deriv_max_sum ) {
deriv_max_sum=max;
}
deriv_norm_sum+=nrm;
Fdt_max_sum+=Fdt_max;
if ( Fdt_max > Fdt_max_sum ) {
Fdt_max_sum=Fdt_max;
}
Fdt_norm_sum+=Fdt_nrm; deriv_num++;
}
RealD deriv_max_average(void) { return deriv_max_sum/deriv_num; };
RealD deriv_max_average(void) { return deriv_max_sum; };
RealD deriv_norm_average(void) { return deriv_norm_sum/deriv_num; };
RealD Fdt_max_average(void) { return Fdt_max_sum/deriv_num; };
RealD Fdt_max_average(void) { return Fdt_max_sum; };
RealD Fdt_norm_average(void) { return Fdt_norm_sum/deriv_num; };
RealD deriv_timer(void) { return deriv_us; };
RealD S_timer(void) { return S_us; };

View File

@ -36,10 +36,12 @@ NAMESPACE_BEGIN(Grid);
// Wilson compressor will need FaceGather policies for:
// Periodic, Dirichlet, and partial Dirichlet for DWF
///////////////////////////////////////////////////////////////
const int dwf_compressor_depth=2;
class FaceGatherPartialDWF
{
public:
static int PartialCompressionFactor(GridBase *grid) {return grid->_fdimensions[0]/2;};
// static int PartialCompressionFactor(GridBase *grid) {return grid->_fdimensions[0]/(2*dwf_compressor_depth);};
static int PartialCompressionFactor(GridBase *grid) {return 1;}
// static int PartialCompressionFactor(GridBase *grid) { return 1;}
template<class vobj,class cobj,class compressor>
static void Gather_plane_simple (commVector<std::pair<int,int> >& table,
@ -52,20 +54,26 @@ public:
// Shrinks local and remote comms buffers
GridBase *Grid = rhs.Grid();
int Ls = Grid->_rdimensions[0];
// int depth=dwf_compressor_depth;
int depth=Ls/2;
std::pair<int,int> *table_v = & table[0];
auto rhs_v = rhs.View(AcceleratorRead);
int vol=table.size()/Ls;
accelerator_forNB( idx,table.size(), vobj::Nsimd(), {
Integer i=idx/Ls;
Integer s=idx%Ls;
if(s==0) compress.Compress(buffer[off+i ],rhs_v[so+table_v[idx].second]);
if(s==Ls-1) compress.Compress(buffer[off+i+vol],rhs_v[so+table_v[idx].second]);
Integer sc=depth+s-(Ls-depth);
if(s<depth) compress.Compress(buffer[off+i+s*vol],rhs_v[so+table_v[idx].second]);
if(s>=Ls-depth) compress.Compress(buffer[off+i+sc*vol],rhs_v[so+table_v[idx].second]);
});
rhs_v.ViewClose();
}
template<class decompressor,class Decompression>
static void DecompressFace(decompressor decompress,Decompression &dd)
{
auto Ls = dd.dims[0];
// int depth=dwf_compressor_depth;
int depth=Ls/2;
// Just pass in the Grid
auto kp = dd.kernel_p;
auto mp = dd.mpi_p;
@ -74,11 +82,12 @@ public:
accelerator_forNB(o,size,1,{
int idx=o/Ls;
int s=o%Ls;
if ( s == 0 ) {
int oo=idx;
if ( s < depth ) {
int oo=s*vol+idx;
kp[o]=mp[oo];
} else if ( s == Ls-1 ) {
int oo=vol+idx;
} else if ( s >= Ls-depth ) {
int sc = depth + s - (Ls-depth);
int oo=sc*vol+idx;
kp[o]=mp[oo];
} else {
kp[o] = Zero();//fill rest with zero if partial dirichlet
@ -97,6 +106,8 @@ public:
{
GridBase *Grid = rhs.Grid();
int Ls = Grid->_rdimensions[0];
// int depth=dwf_compressor_depth;
int depth = Ls/2;
// insertion of zeroes...
assert( (table.size()&0x1)==0);
@ -112,7 +123,7 @@ public:
// Reorders both local and remote comms buffers
//
int s = j % Ls;
int sp1 = (s+1)%Ls; // peri incremented s slice
int sp1 = (s+depth)%Ls; // peri incremented s slice
int hxyz= j/Ls;
@ -135,6 +146,8 @@ public:
static void MergeFace(decompressor decompress,Merger &mm)
{
auto Ls = mm.dims[0];
int depth = Ls/2;
// int depth=dwf_compressor_depth;
int num= mm.buffer_size/2; // relate vol and Ls to buffer size
auto mp = &mm.mpointer[0];
auto vp0= &mm.vpointers[0][0]; // First arg is exchange first
@ -148,7 +161,7 @@ public:
int xyz0=hxyz*2;
int xyz1=hxyz*2+1;
int sp = (s+1)%Ls;
int sp = (s+depth)%Ls;
int jj= hxyz + sp*nnum ; // 0,1,2,3 -> Ls-1 slice , 0-slice, 1-slice ....
int oo0= s+xyz0*Ls;
@ -162,7 +175,8 @@ public:
class FaceGatherDWFMixedBCs
{
public:
static int PartialCompressionFactor(GridBase *grid) {return grid->_fdimensions[0]/2;};
// static int PartialCompressionFactor(GridBase *grid) {return grid->_fdimensions[0]/(2*dwf_compressor_depth);};
static int PartialCompressionFactor(GridBase *grid) {return 1;}
template<class vobj,class cobj,class compressor>
static void Gather_plane_simple (commVector<std::pair<int,int> >& table,
@ -171,6 +185,7 @@ public:
compressor &compress,
int off,int so,int partial)
{
// std::cout << " face gather simple DWF partial "<<partial <<std::endl;
if(partial) FaceGatherPartialDWF::Gather_plane_simple(table,rhs,buffer,compress,off,so,partial);
else FaceGatherSimple::Gather_plane_simple(table,rhs,buffer,compress,off,so,partial);
}
@ -179,6 +194,7 @@ public:
Vector<cobj *> pointers,int dimension,int plane,int cbmask,
compressor &compress,int type,int partial)
{
// std::cout << " face gather exch DWF partial "<<partial <<std::endl;
if(partial) FaceGatherPartialDWF::Gather_plane_exchange(table,rhs,pointers,dimension, plane,cbmask,compress,type,partial);
else FaceGatherSimple::Gather_plane_exchange (table,rhs,pointers,dimension, plane,cbmask,compress,type,partial);
}
@ -186,6 +202,7 @@ public:
static void MergeFace(decompressor decompress,Merger &mm)
{
int partial = mm.partial;
// std::cout << " merge DWF partial "<<partial <<std::endl;
if ( partial ) FaceGatherPartialDWF::MergeFace(decompress,mm);
else FaceGatherSimple::MergeFace(decompress,mm);
}
@ -194,6 +211,7 @@ public:
static void DecompressFace(decompressor decompress,Decompression &dd)
{
int partial = dd.partial;
// std::cout << " decompress DWF partial "<<partial <<std::endl;
if ( partial ) FaceGatherPartialDWF::DecompressFace(decompress,dd);
else FaceGatherSimple::DecompressFace(decompress,dd);
}

View File

@ -96,6 +96,8 @@ WilsonFermion5D<Impl>::WilsonFermion5D(GaugeField &_Umu,
Coordinate block = p.dirichlet;
if ( block[0] || block[1] || block[2] || block[3] || block[4] ){
Dirichlet = 1;
std::cout << GridLogMessage << " WilsonFermion: non-trivial Dirichlet condition "<< block << std::endl;
std::cout << GridLogMessage << " WilsonFermion: partial Dirichlet "<< p.partialDirichlet << std::endl;
Block = block;
}
} else {
@ -137,9 +139,6 @@ WilsonFermion5D<Impl>::WilsonFermion5D(GaugeField &_Umu,
StencilEven.BuildSurfaceList(LLs,vol4);
StencilOdd.BuildSurfaceList(LLs,vol4);
// std::cout << GridLogMessage << " SurfaceLists "<< Stencil.surface_list.size()
// <<" " << StencilEven.surface_list.size()<<std::endl;
}
template<class Impl>
@ -148,21 +147,29 @@ void WilsonFermion5D<Impl>::ImportGauge(const GaugeField &_Umu)
GaugeField HUmu(_Umu.Grid());
HUmu = _Umu*(-0.5);
if ( Dirichlet ) {
if ( this->Params.partialDirichlet ) {
std::cout << GridLogMessage << " partialDirichlet BCs " <<Block<<std::endl;
} else {
std::cout << GridLogMessage << " FULL Dirichlet BCs " <<Block<<std::endl;
}
std:: cout << GridLogMessage << "Checking block size multiple of rank boundaries for Dirichlet"<<std::endl;
for(int d=0;d<Nd;d++) {
int GaugeBlock = Block[d+1];
int ldim=GaugeGrid()->LocalDimensions()[d];
if (GaugeBlock) assert( (GaugeBlock%ldim)==0);
}
}
if ( Dirichlet && (!this->Params.partialDirichlet) ) {
std::cout << GridLogMessage << " Dirichlet filtering gauge field BCs block " <<Block<<std::endl;
Coordinate GaugeBlock(Nd);
for(int d=0;d<Nd;d++) GaugeBlock[d] = Block[d+1];
DirichletFilter<GaugeField> Filter(GaugeBlock);
Filter.applyFilter(HUmu);
} else {
std::cout << GridLogMessage << " Dirichlet "<< Dirichlet << " not filtered gauge field" <<std::endl;
if (!this->Params.partialDirichlet) {
std::cout << GridLogMessage << " Dirichlet filtering gauge field BCs block " <<Block<<std::endl;
Coordinate GaugeBlock(Nd);
for(int d=0;d<Nd;d++) GaugeBlock[d] = Block[d+1];
DirichletFilter<GaugeField> Filter(GaugeBlock);
Filter.applyFilter(HUmu);
} else {
std::cout << GridLogMessage << " Dirichlet "<< Dirichlet << " NOT filtered gauge field" <<std::endl;
}
}
Impl::DoubleStore(GaugeGrid(),Umu,HUmu);
pickCheckerboard(Even,UmuEven,Umu);

View File

@ -279,6 +279,7 @@ NAMESPACE_BEGIN(Grid);
// S_f = chi^dag* P(V^dag*V)/Q(V^dag*V)* N(M^dag*M)/D(M^dag*M)* P(V^dag*V)/Q(V^dag*V)* chi
//////////////////////////////////////////////////////
virtual RealD Sinitial(const GaugeField &U) {
std::cout << GridLogMessage << "Returning stored two flavour refresh action "<<RefreshAction<<std::endl;
return RefreshAction;
}

View File

@ -112,28 +112,48 @@ NAMESPACE_BEGIN(Grid);
// NumOp == V
// DenOp == M
//
AUDIT();
FermionField etaOdd (NumOp.FermionRedBlackGrid());
FermionField etaEven(NumOp.FermionRedBlackGrid());
FermionField tmp (NumOp.FermionRedBlackGrid());
AUDIT();
pickCheckerboard(Even,etaEven,eta);
AUDIT();
pickCheckerboard(Odd,etaOdd,eta);
AUDIT();
NumOp.ImportGauge(U);
AUDIT();
DenOp.ImportGauge(U);
std::cout << " TwoFlavourRefresh: Imported gauge "<<std::endl;
AUDIT();
SchurDifferentiableOperator<Impl> Mpc(DenOp);
AUDIT();
SchurDifferentiableOperator<Impl> Vpc(NumOp);
AUDIT();
std::cout << " TwoFlavourRefresh: Diff ops "<<std::endl;
AUDIT();
// Odd det factors
Mpc.MpcDag(etaOdd,PhiOdd);
AUDIT();
std::cout << " TwoFlavourRefresh: MpcDag "<<std::endl;
tmp=Zero();
AUDIT();
std::cout << " TwoFlavourRefresh: Zero() guess "<<std::endl;
AUDIT();
HeatbathSolver(Vpc,PhiOdd,tmp);
AUDIT();
std::cout << " TwoFlavourRefresh: Heatbath solver "<<std::endl;
Vpc.Mpc(tmp,PhiOdd);
std::cout << " TwoFlavourRefresh: Mpc "<<std::endl;
// Even det factors
DenOp.MooeeDag(etaEven,tmp);
NumOp.MooeeInvDag(tmp,PhiEven);
std::cout << " TwoFlavourRefresh: Mee "<<std::endl;
RefreshAction = norm2(etaEven)+norm2(etaOdd);
std::cout << " refresh " <<action_name()<< " action "<<RefreshAction<<std::endl;
@ -142,6 +162,10 @@ NAMESPACE_BEGIN(Grid);
//////////////////////////////////////////////////////
// S = phi^dag V (Mdag M)^-1 Vdag phi
//////////////////////////////////////////////////////
virtual RealD Sinitial(const GaugeField &U) {
std::cout << GridLogMessage << "Returning stored two flavour refresh action "<<RefreshAction<<std::endl;
return RefreshAction;
}
virtual RealD S(const GaugeField &U) {
NumOp.ImportGauge(U);

View File

@ -132,10 +132,17 @@ protected:
Field& Us = Smearer.get_U(as[level].actions.at(a)->is_smeared);
double start_force = usecond();
std::cout << GridLogMessage << "AuditForce["<<level<<"]["<<a<<"] before"<<std::endl;
AUDIT();
as[level].actions.at(a)->deriv_timer_start();
as[level].actions.at(a)->deriv(Us, force); // deriv should NOT include Ta
as[level].actions.at(a)->deriv_timer_stop();
std::cout << GridLogMessage << "AuditForce["<<level<<"]["<<a<<"] after"<<std::endl;
AUDIT();
std::cout << GridLogIntegrator << "Smearing (on/off): " << as[level].actions.at(a)->is_smeared << std::endl;
auto name = as[level].actions.at(a)->action_name();
if (as[level].actions.at(a)->is_smeared) Smearer.smeared_force(force);
@ -284,7 +291,7 @@ public:
for (int actionID = 0; actionID < as[level].actions.size(); ++actionID) {
std::cout << GridLogMessage
<< as[level].actions.at(actionID)->action_name()
<<"["<<level<<"]["<< actionID<<"] : "
<<"["<<level<<"]["<< actionID<<"] :\n\t\t "
<<" force max " << as[level].actions.at(actionID)->deriv_max_average()
<<" norm " << as[level].actions.at(actionID)->deriv_norm_average()
<<" Fdt max " << as[level].actions.at(actionID)->Fdt_max_average()
@ -364,9 +371,14 @@ public:
std::cout << GridLogMessage << "refresh [" << level << "][" << actionID << "] "<<name << std::endl;
Field& Us = Smearer.get_U(as[level].actions.at(actionID)->is_smeared);
std::cout << GridLogMessage << "AuditRefresh["<<level<<"]["<<actionID<<"] before"<<std::endl;
AUDIT();
as[level].actions.at(actionID)->refresh_timer_start();
as[level].actions.at(actionID)->refresh(Us, sRNG, pRNG);
as[level].actions.at(actionID)->refresh_timer_stop();
std::cout << GridLogMessage << "AuditRefresh["<<level<<"]["<<actionID<<"] after"<<std::endl;
AUDIT();
}
// Refresh the higher representation actions
@ -403,6 +415,7 @@ public:
// Actions
for (int level = 0; level < as.size(); ++level) {
for (int actionID = 0; actionID < as[level].actions.size(); ++actionID) {
AUDIT();
// get gauge field from the SmearingPolicy and
// based on the boolean is_smeared in actionID
Field& Us = Smearer.get_U(as[level].actions.at(actionID)->is_smeared);
@ -412,6 +425,7 @@ public:
as[level].actions.at(actionID)->S_timer_stop();
std::cout << GridLogMessage << "S [" << level << "][" << actionID << "] H = " << Hterm << std::endl;
H += Hterm;
AUDIT();
}
as[level].apply(S_hireps, Representations, level, H);
}
@ -424,7 +438,9 @@ public:
void operator()(std::vector<Action<FieldType>*> repr_set, Repr& Rep, int level, RealD& H) {
for (int a = 0; a < repr_set.size(); ++a) {
AUDIT();
RealD Hterm = repr_set.at(a)->Sinitial(Rep.U);
AUDIT();
std::cout << GridLogMessage << "Sinitial Level " << level << " term " << a << " H Hirep = " << Hterm << std::endl;
H += Hterm;
@ -449,8 +465,10 @@ public:
Field& Us = Smearer.get_U(as[level].actions.at(actionID)->is_smeared);
std::cout << GridLogMessage << "S [" << level << "][" << actionID << "] action eval " << std::endl;
as[level].actions.at(actionID)->S_timer_start();
AUDIT();
Hterm = as[level].actions.at(actionID)->Sinitial(Us);
as[level].actions.at(actionID)->S_timer_stop();
AUDIT();
std::cout << GridLogMessage << "S [" << level << "][" << actionID << "] H = " << Hterm << std::endl;
H += Hterm;
}
@ -463,6 +481,7 @@ public:
void integrate(Field& U)
{
AUDIT();
// reset the clocks
t_U = 0;
for (int level = 0; level < as.size(); ++level) {
@ -480,8 +499,10 @@ public:
assert(fabs(t_U - t_P[level]) < 1.0e-6); // must be the same
std::cout << GridLogIntegrator << " times[" << level << "]= " << t_P[level] << " " << t_U << std::endl;
}
AUDIT();
FieldImplementation::Project(U);
AUDIT();
// and that we indeed got to the end of the trajectory
assert(fabs(t_U - Params.trajL) < 1.0e-6);

View File

@ -203,7 +203,7 @@ class CartesianStencilAccelerator {
template<class vobj,class cobj,class Parameters>
class CartesianStencilView : public CartesianStencilAccelerator<vobj,cobj,Parameters>
{
private:
public:
int *closed;
StencilEntry *cpu_ptr;
ViewMode mode;
@ -676,6 +676,8 @@ public:
int block = dirichlet_block[dimension];
this->_comms_send[ii] = comm_dim;
this->_comms_recv[ii] = comm_dim;
this->_comms_partial_send[ii] = 0;
this->_comms_partial_recv[ii] = 0;
if ( block && comm_dim ) {
assert(abs(displacement) < ld );
// Quiesce communication across block boundaries
@ -1131,6 +1133,7 @@ public:
send_buf = this->u_send_buf_p; // Gather locally, must send
assert(send_buf!=NULL);
// std::cout << " GatherPlaneSimple partial send "<< comms_partial_send<<std::endl;
compressor::Gather_plane_simple(face_table[face_idx],rhs,send_buf,compress,comm_off,so,comms_partial_send);
int duplicate = CheckForDuplicate(dimension,sx,comm_proc,(void *)&recv_buf[comm_off],0,xbytes,rbytes,cbmask);

View File

@ -179,8 +179,11 @@ int main(int argc, char **argv) {
MD.name = std::string("Force Gradient");
//typedef GenericHMCRunner<MinimumNorm2> HMCWrapper;
// MD.name = std::string("MinimumNorm2");
// MD.MDsteps = 4;
MD.MDsteps = 4;
// TrajL = 2
// 4/2 => 0.6 dH
// 3/3 => ?? dH
//MD.MDsteps = 4;
MD.MDsteps = 3;
MD.trajL = 0.5;
HMCparameters HMCparams;
@ -223,7 +226,7 @@ int main(int argc, char **argv) {
Real light_mass = 7.8e-4;
Real strange_mass = 0.0362;
Real pv_mass = 1.0;
std::vector<Real> hasenbusch({ light_mass, 3.8e-3, 0.0145, 0.045, 0.108, 0.25, 0.51 , pv_mass });
std::vector<Real> hasenbusch({ light_mass, 0.005, 0.0145, 0.045, 0.108, 0.25, 0.51 , pv_mass });
// std::vector<Real> hasenbusch({ light_mass, 0.0145, 0.045, 0.108, 0.25, 0.51 , 0.75 , pv_mass });
@ -327,6 +330,8 @@ int main(int argc, char **argv) {
ParamsF.dirichlet=NonDirichlet;
ParamsDir.dirichlet=Dirichlet;
ParamsDirF.dirichlet=Dirichlet;
ParamsDir.partialDirichlet=1;
ParamsDirF.partialDirichlet=1;
// double StoppingCondition = 1e-14;
// double MDStoppingCondition = 1e-9;
@ -342,8 +347,8 @@ int main(int argc, char **argv) {
// Collect actions
////////////////////////////////////
ActionLevel<HMCWrapper::Field> Level1(1);
ActionLevel<HMCWrapper::Field> Level2(2);
ActionLevel<HMCWrapper::Field> Level3(30);
ActionLevel<HMCWrapper::Field> Level2(3);
ActionLevel<HMCWrapper::Field> Level3(15);
////////////////////////////////////
// Strange action
@ -474,13 +479,21 @@ int main(int argc, char **argv) {
if ( dirichlet_den[h]==1) ParamsDen.dirichlet = Dirichlet;
else ParamsDen.dirichlet = NonDirichlet;
if ( dirichlet_num[h]==1) ParamsNum.partialDirichlet = 1;
else ParamsNum.partialDirichlet = 0;
if ( dirichlet_den[h]==1) ParamsDen.partialDirichlet = 1;
else ParamsDen.partialDirichlet = 0;
Numerators.push_back (new FermionAction(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_num[h],M5,b,c, ParamsNum));
Denominators.push_back(new FermionAction(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_den[h],M5,b,c, ParamsDen));
ParamsDenF.dirichlet = ParamsDen.dirichlet;
ParamsDenF.partialDirichlet = ParamsDen.partialDirichlet;
DenominatorsF.push_back(new FermionActionF(UF,*FGridF,*FrbGridF,*GridPtrF,*GridRBPtrF,light_den[h],M5,b,c, ParamsDenF));
ParamsNumF.dirichlet = ParamsNum.dirichlet;
ParamsNumF.partialDirichlet = ParamsNum.partialDirichlet;
NumeratorsF.push_back (new FermionActionF(UF,*FGridF,*FrbGridF,*GridPtrF,*GridRBPtrF,light_num[h],M5,b,c, ParamsNumF));
LinOpD.push_back(new LinearOperatorD(*Denominators[h]));
@ -516,9 +529,11 @@ int main(int argc, char **argv) {
FermionActionD2::ImplParams ParamsNumD2(boundary);
ParamsDenD2.dirichlet = ParamsDen.dirichlet;
ParamsDenD2.partialDirichlet = ParamsDen.partialDirichlet;
DenominatorsD2.push_back(new FermionActionD2(UD2,*FGridF,*FrbGridF,*GridPtrF,*GridRBPtrF,light_den[h],M5,b,c, ParamsDenD2));
ParamsNumD2.dirichlet = ParamsNum.dirichlet;
ParamsNumD2.partialDirichlet = ParamsNum.partialDirichlet;
NumeratorsD2.push_back (new FermionActionD2(UD2,*FGridF,*FrbGridF,*GridPtrF,*GridRBPtrF,light_num[h],M5,b,c, ParamsNumD2));
Bdys.push_back( new OneFlavourEvenOddRatioRationalMixedPrecPseudoFermionAction<FermionImplPolicy,FermionImplPolicyF,FermionImplPolicyD2>(
@ -543,7 +558,8 @@ int main(int argc, char **argv) {
int nquo=Quotients.size();
Level1.push_back(Bdys[0]);
Level1.push_back(Bdys[1]);
for(int h=0;h<nquo-1;h++){
Level2.push_back(Quotients[0]);
for(int h=1;h<nquo-1;h++){
Level2.push_back(Quotients[h]);
}
Level2.push_back(Quotients[nquo-1]);

3
TODO
View File

@ -1,3 +1,6 @@
Lattice_basis.h -- > HIP and SYCL GPU code
======
DDHMC
======

View File

@ -88,6 +88,7 @@ int main (int argc, char ** argv)
// Node level
//////////////////////
for(int d=0;d<Nd;d++) CommDim[d]= (mpi[d]/shm[d])>1 ? 1 : 0;
// for(int d=0;d<Nd;d++) CommDim[d]= 1;
Dirichlet[0] = 0;
Dirichlet[1] = CommDim[0]*latt4[0]/mpi[0] * shm[0];
Dirichlet[2] = CommDim[1]*latt4[1]/mpi[1] * shm[1];
@ -222,7 +223,7 @@ void Benchmark(int Ls, Coordinate Dirichlet, int partial)
{
ref = Zero();
for(int mu=0;mu<Nd;mu++){
int depth=dwf_compressor_depth;
tmp = Cshift(src,mu+1,1);
{
autoView( tmp_v , tmp , CpuWrite);
@ -230,7 +231,7 @@ void Benchmark(int Ls, Coordinate Dirichlet, int partial)
autoView( Ucopy_v, Ucopy[mu] , CpuRead);
for(int ss=0;ss<U[mu].Grid()->oSites();ss++){
for(int s=0;s<Ls;s++){
if ( (s==0) || (s==Ls-1)){
if ( (s<depth) || (s>=Ls-depth)){
tmp_v[Ls*ss+s] = Ucopy_v[ss]*tmp_v[Ls*ss+s];
} else {
tmp_v[Ls*ss+s] = U_v[ss]*tmp_v[Ls*ss+s];
@ -246,7 +247,7 @@ void Benchmark(int Ls, Coordinate Dirichlet, int partial)
autoView( src_v, src , CpuRead);
for(int ss=0;ss<U[mu].Grid()->oSites();ss++){
for(int s=0;s<Ls;s++){
if ( (s==0) || (s==Ls-1) ){
if ( (s<depth) || (s>=Ls-depth)){
tmp_v[Ls*ss+s] = adj(Ucopy_v[ss])*src_v[Ls*ss+s];
} else {
tmp_v[Ls*ss+s] = adj(U_v[ss])*src_v[Ls*ss+s];
@ -342,6 +343,7 @@ void Benchmark(int Ls, Coordinate Dirichlet, int partial)
ref = Zero();
for(int mu=0;mu<Nd;mu++){
int depth=dwf_compressor_depth;
tmp = Cshift(src,mu+1,1);
{
autoView( tmp_v , tmp , CpuWrite);
@ -349,7 +351,7 @@ void Benchmark(int Ls, Coordinate Dirichlet, int partial)
autoView( Ucopy_v, Ucopy[mu] , CpuRead);
for(int ss=0;ss<U[mu].Grid()->oSites();ss++){
for(int s=0;s<Ls;s++){
if ( (s==0) || (s==Ls-1)){
if ( (s<depth) || (s>=Ls-depth)){
tmp_v[Ls*ss+s] = Ucopy_v[ss]*tmp_v[Ls*ss+s];
} else {
tmp_v[Ls*ss+s] = U_v[ss]*tmp_v[Ls*ss+s];
@ -365,7 +367,7 @@ void Benchmark(int Ls, Coordinate Dirichlet, int partial)
autoView( src_v, src , CpuRead);
for(int ss=0;ss<U[mu].Grid()->oSites();ss++){
for(int s=0;s<Ls;s++){
if ( (s==0) || (s==Ls-1) ){
if ( (s<depth) || (s>=Ls-depth)){
tmp_v[Ls*ss+s] = adj(Ucopy_v[ss])*src_v[Ls*ss+s];
} else {
tmp_v[Ls*ss+s] = adj(U_v[ss])*src_v[Ls*ss+s];

View File

@ -3,6 +3,7 @@ CLIME=`spack find --paths c-lime@2-3-9 | grep c-lime| cut -c 15-`
--with-lime=$CLIME \
--enable-unified=no \
--enable-shm=nvlink \
--enable-tracing=timer \
--enable-accelerator=hip \
--enable-gen-simd-width=64 \
--enable-simd=GPU \