mirror of
https://github.com/paboyle/Grid.git
synced 2025-06-23 18:22:02 +01:00
Compare commits
71 Commits
feature/di
...
584a3ee45c
Author | SHA1 | Date | |
---|---|---|---|
584a3ee45c | |||
eec0c9eb7d | |||
66d001ec9e | |||
19da647e3c | |||
e7d9b75fdd | |||
3d0e3ec363 | |||
3c1c51f9aa | |||
5c87342108 | |||
66177bfbe2 | |||
5205e68963 | |||
cd5cf6d614 | |||
5abb19eab0 | |||
06d7b88c78 | |||
cf72799735 | |||
cdb8fcc269 | |||
b4f4130901 | |||
bb049847d5 | |||
fd33c835dd | |||
21371a7e5b | |||
abfaa00d3e | |||
efee33c55d | |||
db0fe6ddbb | |||
8a9e647120 | |||
e6dcb821ad | |||
9bff188f02 | |||
111b30ca1d | |||
24182ca8bf | |||
ee2d7369b3 | |||
7c686d29c9 | |||
e8a0a1e75d | |||
730be89abf | |||
f991ad7d5c | |||
b3f33f82f7 | |||
a34a6e059f | |||
1333319941 | |||
9295ed8d20 | |||
19cc7653fb | |||
5752538661 | |||
ca40a1b00b | |||
659fac9dfb | |||
4dc3d6fce0 | |||
95b640cb6b | |||
2cb5bedc15 | |||
806b02bddf | |||
de40395773 | |||
7ba4788715 | |||
06d9ce1a02 | |||
75bb6b2b40 | |||
74f10c2dc0 | |||
a93d5459d4 | |||
9c21add0c6 | |||
639aab6563 | |||
8137cc7049 | |||
60e63dca1d | |||
486409574e | |||
a913b8be12 | |||
2239751850 | |||
9b20f1449c | |||
b99453083d | |||
943fbb914d | |||
ca4603580d | |||
f73db8f1f3 | |||
f7217d12d2 | |||
fab50c57d9 | |||
3440534fbf | |||
177b1a7ec6 | |||
58182fe345 | |||
1f907d330d | |||
b0fe664e9d | |||
c0f8482402 | |||
3544965f54 |
@ -44,7 +44,8 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
#include <Grid/GridStd.h>
|
||||
#include <Grid/threads/Pragmas.h>
|
||||
#include <Grid/perfmon/Timer.h>
|
||||
#include <Grid/perfmon/PerfCount.h>
|
||||
#include <Grid/perfmon/Tracing.h>
|
||||
//#include <Grid/perfmon/PerfCount.h>
|
||||
#include <Grid/util/Util.h>
|
||||
#include <Grid/log/Log.h>
|
||||
#include <Grid/allocator/Allocator.h>
|
||||
|
@ -58,6 +58,7 @@ public:
|
||||
|
||||
void operator()(LinearOperatorBase<Field> &Linop, const Field &src, Field &psi) {
|
||||
|
||||
GRID_TRACE("ConjugateGradient");
|
||||
psi.Checkerboard() = src.Checkerboard();
|
||||
|
||||
conformable(psi, src);
|
||||
@ -117,6 +118,7 @@ public:
|
||||
GridStopWatch MatrixTimer;
|
||||
GridStopWatch SolverTimer;
|
||||
|
||||
RealD usecs = -usecond();
|
||||
SolverTimer.Start();
|
||||
int k;
|
||||
for (k = 1; k <= MaxIterations; k++) {
|
||||
@ -166,14 +168,16 @@ public:
|
||||
|
||||
// Stopping condition
|
||||
if (cp <= rsq) {
|
||||
usecs +=usecond();
|
||||
SolverTimer.Stop();
|
||||
Linop.HermOpAndNorm(psi, mmp, d, qq);
|
||||
p = mmp - src;
|
||||
|
||||
GridBase *grid = src.Grid();
|
||||
RealD DwfFlops = (1452. )*grid->gSites()*4*k
|
||||
+ (8+4+8+4+4)*12*grid->gSites()*k; // CG linear algebra
|
||||
RealD srcnorm = std::sqrt(norm2(src));
|
||||
RealD resnorm = std::sqrt(norm2(p));
|
||||
RealD true_residual = resnorm / srcnorm;
|
||||
|
||||
std::cout << GridLogMessage << "ConjugateGradient Converged on iteration " << k
|
||||
<< "\tComputed residual " << std::sqrt(cp / ssq)
|
||||
<< "\tTrue residual " << true_residual
|
||||
@ -187,6 +191,8 @@ public:
|
||||
std::cout << GridLogMessage << "\tAxpyNorm " << AxpyNormTimer.Elapsed() <<std::endl;
|
||||
std::cout << GridLogMessage << "\tLinearComb " << LinearCombTimer.Elapsed() <<std::endl;
|
||||
|
||||
std::cout << GridLogMessage << "\tMobius flop rate " << DwfFlops/ usecs<< " Gflops " <<std::endl;
|
||||
|
||||
if (ErrorOnNoConverge) assert(true_residual / Tolerance < 10000.0);
|
||||
|
||||
IterationsToComplete = k;
|
||||
|
@ -84,6 +84,7 @@ public:
|
||||
|
||||
void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector<Field> &psi)
|
||||
{
|
||||
GRID_TRACE("ConjugateGradientMultiShift");
|
||||
|
||||
GridBase *grid = src.Grid();
|
||||
|
||||
|
@ -127,6 +127,7 @@ public:
|
||||
|
||||
void operator() (LinearOperatorBase<FieldD> &Linop_d, const FieldD &src_d, std::vector<FieldD> &psi_d)
|
||||
{
|
||||
GRID_TRACE("ConjugateGradientMultiShiftMixedPrec");
|
||||
GridBase *DoublePrecGrid = src_d.Grid();
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
|
@ -73,6 +73,7 @@ public:
|
||||
}
|
||||
|
||||
void operator()(const FieldD &src, FieldD &psi) {
|
||||
GRID_TRACE("ConjugateGradientReliableUpdate");
|
||||
LinearOperatorBase<FieldF> *Linop_f_use = &Linop_f;
|
||||
bool using_fallback = false;
|
||||
|
||||
|
@ -40,7 +40,7 @@ void MemoryManager::PrintBytes(void)
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
MemoryManager::AllocationCacheEntry MemoryManager::Entries[MemoryManager::NallocType][MemoryManager::NallocCacheMax];
|
||||
int MemoryManager::Victim[MemoryManager::NallocType];
|
||||
int MemoryManager::Ncache[MemoryManager::NallocType] = { 2, 8, 2, 8, 2, 8 };
|
||||
int MemoryManager::Ncache[MemoryManager::NallocType] = { 2, 8, 8, 16, 8, 16 };
|
||||
uint64_t MemoryManager::CacheBytes[MemoryManager::NallocType];
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Actual allocation and deallocation utils
|
||||
|
@ -3,8 +3,14 @@
|
||||
|
||||
#warning "Using explicit device memory copies"
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
//#define dprintf(...) printf ( __VA_ARGS__ ); fflush(stdout);
|
||||
#define dprintf(...)
|
||||
|
||||
#define MAXLINE 512
|
||||
static char print_buffer [ MAXLINE ];
|
||||
|
||||
#define mprintf(...) snprintf (print_buffer,MAXLINE, __VA_ARGS__ ); std::cout << GridLogMemory << print_buffer;
|
||||
//#define dprintf(...) printf (__VA_ARGS__ ); fflush(stdout);
|
||||
#define dprintf(...)
|
||||
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
@ -104,7 +110,7 @@ void MemoryManager::AccDiscard(AcceleratorViewEntry &AccCache)
|
||||
///////////////////////////////////////////////////////////
|
||||
assert(AccCache.state!=Empty);
|
||||
|
||||
dprintf("MemoryManager: Discard(%llx) %llx\n",(uint64_t)AccCache.CpuPtr,(uint64_t)AccCache.AccPtr);
|
||||
mprintf("MemoryManager: Discard(%lx) %lx\n",(uint64_t)AccCache.CpuPtr,(uint64_t)AccCache.AccPtr);
|
||||
assert(AccCache.accLock==0);
|
||||
assert(AccCache.cpuLock==0);
|
||||
assert(AccCache.CpuPtr!=(uint64_t)NULL);
|
||||
@ -112,7 +118,7 @@ void MemoryManager::AccDiscard(AcceleratorViewEntry &AccCache)
|
||||
AcceleratorFree((void *)AccCache.AccPtr,AccCache.bytes);
|
||||
DeviceBytes -=AccCache.bytes;
|
||||
LRUremove(AccCache);
|
||||
dprintf("MemoryManager: Free(%llx) LRU %lld Total %lld\n",(uint64_t)AccCache.AccPtr,DeviceLRUBytes,DeviceBytes);
|
||||
dprintf("MemoryManager: Free(%lx) LRU %ld Total %ld\n",(uint64_t)AccCache.AccPtr,DeviceLRUBytes,DeviceBytes);
|
||||
}
|
||||
uint64_t CpuPtr = AccCache.CpuPtr;
|
||||
EntryErase(CpuPtr);
|
||||
@ -126,7 +132,7 @@ void MemoryManager::Evict(AcceleratorViewEntry &AccCache)
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
assert(AccCache.state!=Empty);
|
||||
|
||||
dprintf("MemoryManager: Evict(%llx) %llx\n",(uint64_t)AccCache.CpuPtr,(uint64_t)AccCache.AccPtr);
|
||||
mprintf("MemoryManager: Evict(%lx) %lx\n",(uint64_t)AccCache.CpuPtr,(uint64_t)AccCache.AccPtr);
|
||||
assert(AccCache.accLock==0);
|
||||
assert(AccCache.cpuLock==0);
|
||||
if(AccCache.state==AccDirty) {
|
||||
@ -137,7 +143,7 @@ void MemoryManager::Evict(AcceleratorViewEntry &AccCache)
|
||||
AcceleratorFree((void *)AccCache.AccPtr,AccCache.bytes);
|
||||
DeviceBytes -=AccCache.bytes;
|
||||
LRUremove(AccCache);
|
||||
dprintf("MemoryManager: Free(%llx) footprint now %lld \n",(uint64_t)AccCache.AccPtr,DeviceBytes);
|
||||
dprintf("MemoryManager: Free(%lx) footprint now %ld \n",(uint64_t)AccCache.AccPtr,DeviceBytes);
|
||||
}
|
||||
uint64_t CpuPtr = AccCache.CpuPtr;
|
||||
EntryErase(CpuPtr);
|
||||
@ -150,7 +156,7 @@ void MemoryManager::Flush(AcceleratorViewEntry &AccCache)
|
||||
assert(AccCache.AccPtr!=(uint64_t)NULL);
|
||||
assert(AccCache.CpuPtr!=(uint64_t)NULL);
|
||||
acceleratorCopyFromDevice((void *)AccCache.AccPtr,(void *)AccCache.CpuPtr,AccCache.bytes);
|
||||
dprintf("MemoryManager: Flush %llx -> %llx\n",(uint64_t)AccCache.AccPtr,(uint64_t)AccCache.CpuPtr); fflush(stdout);
|
||||
mprintf("MemoryManager: Flush %lx -> %lx\n",(uint64_t)AccCache.AccPtr,(uint64_t)AccCache.CpuPtr); fflush(stdout);
|
||||
DeviceToHostBytes+=AccCache.bytes;
|
||||
DeviceToHostXfer++;
|
||||
AccCache.state=Consistent;
|
||||
@ -165,7 +171,7 @@ void MemoryManager::Clone(AcceleratorViewEntry &AccCache)
|
||||
AccCache.AccPtr=(uint64_t)AcceleratorAllocate(AccCache.bytes);
|
||||
DeviceBytes+=AccCache.bytes;
|
||||
}
|
||||
dprintf("MemoryManager: Clone %llx <- %llx\n",(uint64_t)AccCache.AccPtr,(uint64_t)AccCache.CpuPtr); fflush(stdout);
|
||||
mprintf("MemoryManager: Clone %lx <- %lx\n",(uint64_t)AccCache.AccPtr,(uint64_t)AccCache.CpuPtr); fflush(stdout);
|
||||
acceleratorCopyToDevice((void *)AccCache.CpuPtr,(void *)AccCache.AccPtr,AccCache.bytes);
|
||||
HostToDeviceBytes+=AccCache.bytes;
|
||||
HostToDeviceXfer++;
|
||||
@ -241,7 +247,7 @@ uint64_t MemoryManager::AcceleratorViewOpen(uint64_t CpuPtr,size_t bytes,ViewMod
|
||||
assert(AccCache.cpuLock==0); // Programming error
|
||||
|
||||
if(AccCache.state!=Empty) {
|
||||
dprintf("ViewOpen found entry %llx %llx : %lld %lld\n",
|
||||
dprintf("ViewOpen found entry %lx %lx : %ld %ld\n",
|
||||
(uint64_t)AccCache.CpuPtr,
|
||||
(uint64_t)CpuPtr,
|
||||
(uint64_t)AccCache.bytes,
|
||||
|
@ -107,6 +107,7 @@ public:
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
static int RankWorld(void) ;
|
||||
static void BroadcastWorld(int root,void* data, int bytes);
|
||||
static void BarrierWorld(void);
|
||||
|
||||
////////////////////////////////////////////////////////////
|
||||
// Reduction
|
||||
|
@ -396,17 +396,17 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques
|
||||
}
|
||||
}
|
||||
|
||||
if ( CommunicatorPolicy == CommunicatorPolicySequential ) {
|
||||
this->StencilSendToRecvFromComplete(list,dir);
|
||||
list.resize(0);
|
||||
}
|
||||
|
||||
/* if ( CommunicatorPolicy == CommunicatorPolicySequential ) {
|
||||
* this->StencilSendToRecvFromComplete(list,dir);
|
||||
* list.resize(0);
|
||||
* }
|
||||
*/
|
||||
return off_node_bytes;
|
||||
}
|
||||
void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &list,int dir)
|
||||
{
|
||||
// std::cout << "Copy Synchronised\n"<<std::endl;
|
||||
acceleratorCopySynchronise();
|
||||
StencilBarrier();// Synch shared memory on a single nodes
|
||||
|
||||
int nreq=list.size();
|
||||
|
||||
@ -443,6 +443,10 @@ int CartesianCommunicator::RankWorld(void){
|
||||
MPI_Comm_rank(communicator_world,&r);
|
||||
return r;
|
||||
}
|
||||
void CartesianCommunicator::BarrierWorld(void){
|
||||
int ierr = MPI_Barrier(communicator_world);
|
||||
assert(ierr==0);
|
||||
}
|
||||
void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes)
|
||||
{
|
||||
int ierr= MPI_Bcast(data,
|
||||
|
@ -104,6 +104,7 @@ int CartesianCommunicator::RankWorld(void){return 0;}
|
||||
void CartesianCommunicator::Barrier(void){}
|
||||
void CartesianCommunicator::Broadcast(int root,void* data, int bytes) {}
|
||||
void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes) { }
|
||||
void CartesianCommunicator::BarrierWorld(void) { }
|
||||
int CartesianCommunicator::RankFromProcessorCoor(Coordinate &coor) { return 0;}
|
||||
void CartesianCommunicator::ProcessorCoorFromRank(int rank, Coordinate &coor){ coor = _processor_coor; }
|
||||
void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest)
|
||||
|
@ -523,7 +523,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
||||
}
|
||||
if ( WorldRank == 0 ){
|
||||
std::cout << WorldRank << header " SharedMemoryMPI.cc acceleratorAllocDevice "<< bytes
|
||||
<< "bytes at "<< std::hex<< ShmCommBuf <<std::dec<<" for comms buffers " <<std::endl;
|
||||
<< "bytes at "<< std::hex<< ShmCommBuf << " - "<<(bytes-1+(uint64_t)ShmCommBuf) <<std::dec<<" for comms buffers " <<std::endl;
|
||||
}
|
||||
SharedMemoryZero(ShmCommBuf,bytes);
|
||||
std::cout<< "Setting up IPC"<<std::endl;
|
||||
|
@ -36,6 +36,7 @@ NAMESPACE_BEGIN(Grid);
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class obj1,class obj2,class obj3> inline
|
||||
void mult(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
|
||||
GRID_TRACE("mult");
|
||||
ret.Checkerboard() = lhs.Checkerboard();
|
||||
autoView( ret_v , ret, AcceleratorWrite);
|
||||
autoView( lhs_v , lhs, AcceleratorRead);
|
||||
@ -53,6 +54,7 @@ void mult(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
|
||||
|
||||
template<class obj1,class obj2,class obj3> inline
|
||||
void mac(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
|
||||
GRID_TRACE("mac");
|
||||
ret.Checkerboard() = lhs.Checkerboard();
|
||||
conformable(ret,rhs);
|
||||
conformable(lhs,rhs);
|
||||
@ -70,6 +72,7 @@ void mac(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
|
||||
|
||||
template<class obj1,class obj2,class obj3> inline
|
||||
void sub(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
|
||||
GRID_TRACE("sub");
|
||||
ret.Checkerboard() = lhs.Checkerboard();
|
||||
conformable(ret,rhs);
|
||||
conformable(lhs,rhs);
|
||||
@ -86,6 +89,7 @@ void sub(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
|
||||
}
|
||||
template<class obj1,class obj2,class obj3> inline
|
||||
void add(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
|
||||
GRID_TRACE("add");
|
||||
ret.Checkerboard() = lhs.Checkerboard();
|
||||
conformable(ret,rhs);
|
||||
conformable(lhs,rhs);
|
||||
@ -106,6 +110,7 @@ void add(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class obj1,class obj2,class obj3> inline
|
||||
void mult(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
|
||||
GRID_TRACE("mult");
|
||||
ret.Checkerboard() = lhs.Checkerboard();
|
||||
conformable(lhs,ret);
|
||||
autoView( ret_v , ret, AcceleratorWrite);
|
||||
@ -119,6 +124,7 @@ void mult(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
|
||||
|
||||
template<class obj1,class obj2,class obj3> inline
|
||||
void mac(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
|
||||
GRID_TRACE("mac");
|
||||
ret.Checkerboard() = lhs.Checkerboard();
|
||||
conformable(ret,lhs);
|
||||
autoView( ret_v , ret, AcceleratorWrite);
|
||||
@ -133,6 +139,7 @@ void mac(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
|
||||
|
||||
template<class obj1,class obj2,class obj3> inline
|
||||
void sub(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
|
||||
GRID_TRACE("sub");
|
||||
ret.Checkerboard() = lhs.Checkerboard();
|
||||
conformable(ret,lhs);
|
||||
autoView( ret_v , ret, AcceleratorWrite);
|
||||
@ -146,6 +153,7 @@ void sub(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
|
||||
}
|
||||
template<class obj1,class obj2,class obj3> inline
|
||||
void add(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
|
||||
GRID_TRACE("add");
|
||||
ret.Checkerboard() = lhs.Checkerboard();
|
||||
conformable(lhs,ret);
|
||||
autoView( ret_v , ret, AcceleratorWrite);
|
||||
@ -163,6 +171,7 @@ void add(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
template<class obj1,class obj2,class obj3> inline
|
||||
void mult(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
|
||||
GRID_TRACE("mult");
|
||||
ret.Checkerboard() = rhs.Checkerboard();
|
||||
conformable(ret,rhs);
|
||||
autoView( ret_v , ret, AcceleratorWrite);
|
||||
@ -177,6 +186,7 @@ void mult(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
|
||||
|
||||
template<class obj1,class obj2,class obj3> inline
|
||||
void mac(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
|
||||
GRID_TRACE("mac");
|
||||
ret.Checkerboard() = rhs.Checkerboard();
|
||||
conformable(ret,rhs);
|
||||
autoView( ret_v , ret, AcceleratorWrite);
|
||||
@ -191,6 +201,7 @@ void mac(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
|
||||
|
||||
template<class obj1,class obj2,class obj3> inline
|
||||
void sub(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
|
||||
GRID_TRACE("sub");
|
||||
ret.Checkerboard() = rhs.Checkerboard();
|
||||
conformable(ret,rhs);
|
||||
autoView( ret_v , ret, AcceleratorWrite);
|
||||
@ -204,6 +215,7 @@ void sub(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
|
||||
}
|
||||
template<class obj1,class obj2,class obj3> inline
|
||||
void add(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
|
||||
GRID_TRACE("add");
|
||||
ret.Checkerboard() = rhs.Checkerboard();
|
||||
conformable(ret,rhs);
|
||||
autoView( ret_v , ret, AcceleratorWrite);
|
||||
@ -218,6 +230,7 @@ void add(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
|
||||
|
||||
template<class sobj,class vobj> inline
|
||||
void axpy(Lattice<vobj> &ret,sobj a,const Lattice<vobj> &x,const Lattice<vobj> &y){
|
||||
GRID_TRACE("axpy");
|
||||
ret.Checkerboard() = x.Checkerboard();
|
||||
conformable(ret,x);
|
||||
conformable(x,y);
|
||||
@ -231,6 +244,7 @@ void axpy(Lattice<vobj> &ret,sobj a,const Lattice<vobj> &x,const Lattice<vobj> &
|
||||
}
|
||||
template<class sobj,class vobj> inline
|
||||
void axpby(Lattice<vobj> &ret,sobj a,sobj b,const Lattice<vobj> &x,const Lattice<vobj> &y){
|
||||
GRID_TRACE("axpby");
|
||||
ret.Checkerboard() = x.Checkerboard();
|
||||
conformable(ret,x);
|
||||
conformable(x,y);
|
||||
@ -246,11 +260,13 @@ void axpby(Lattice<vobj> &ret,sobj a,sobj b,const Lattice<vobj> &x,const Lattice
|
||||
template<class sobj,class vobj> inline
|
||||
RealD axpy_norm(Lattice<vobj> &ret,sobj a,const Lattice<vobj> &x,const Lattice<vobj> &y)
|
||||
{
|
||||
GRID_TRACE("axpy_norm");
|
||||
return axpy_norm_fast(ret,a,x,y);
|
||||
}
|
||||
template<class sobj,class vobj> inline
|
||||
RealD axpby_norm(Lattice<vobj> &ret,sobj a,sobj b,const Lattice<vobj> &x,const Lattice<vobj> &y)
|
||||
{
|
||||
GRID_TRACE("axpby_norm");
|
||||
return axpby_norm_fast(ret,a,b,x,y);
|
||||
}
|
||||
|
||||
|
@ -117,6 +117,7 @@ public:
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
template <typename Op, typename T1> inline Lattice<vobj> & operator=(const LatticeUnaryExpression<Op,T1> &expr)
|
||||
{
|
||||
GRID_TRACE("ExpressionTemplateEval");
|
||||
GridBase *egrid(nullptr);
|
||||
GridFromExpression(egrid,expr);
|
||||
assert(egrid!=nullptr);
|
||||
@ -140,6 +141,7 @@ public:
|
||||
}
|
||||
template <typename Op, typename T1,typename T2> inline Lattice<vobj> & operator=(const LatticeBinaryExpression<Op,T1,T2> &expr)
|
||||
{
|
||||
GRID_TRACE("ExpressionTemplateEval");
|
||||
GridBase *egrid(nullptr);
|
||||
GridFromExpression(egrid,expr);
|
||||
assert(egrid!=nullptr);
|
||||
@ -163,6 +165,7 @@ public:
|
||||
}
|
||||
template <typename Op, typename T1,typename T2,typename T3> inline Lattice<vobj> & operator=(const LatticeTrinaryExpression<Op,T1,T2,T3> &expr)
|
||||
{
|
||||
GRID_TRACE("ExpressionTemplateEval");
|
||||
GridBase *egrid(nullptr);
|
||||
GridFromExpression(egrid,expr);
|
||||
assert(egrid!=nullptr);
|
||||
|
@ -91,10 +91,7 @@ inline typename vobj::scalar_objectD sumD_cpu(const vobj *arg, Integer osites)
|
||||
for(int i=0;i<nthread;i++){
|
||||
ssum = ssum+sumarray[i];
|
||||
}
|
||||
|
||||
typedef typename vobj::scalar_object ssobj;
|
||||
ssobj ret = ssum;
|
||||
return ret;
|
||||
return ssum;
|
||||
}
|
||||
/*
|
||||
Threaded max, don't use for now
|
||||
@ -488,6 +485,14 @@ template<class vobj> inline void sliceSum(const Lattice<vobj> &Data,std::vector<
|
||||
int words = fd*sizeof(sobj)/sizeof(scalar_type);
|
||||
grid->GlobalSumVector(ptr, words);
|
||||
}
|
||||
template<class vobj> inline
|
||||
std::vector<typename vobj::scalar_object>
|
||||
sliceSum(const Lattice<vobj> &Data,int orthogdim)
|
||||
{
|
||||
std::vector<typename vobj::scalar_object> result;
|
||||
sliceSum(Data,result,orthogdim);
|
||||
return result;
|
||||
}
|
||||
|
||||
template<class vobj>
|
||||
static void sliceInnerProductVector( std::vector<ComplexD> & result, const Lattice<vobj> &lhs,const Lattice<vobj> &rhs,int orthogdim)
|
||||
|
@ -68,6 +68,7 @@ GridLogger GridLogMessage(1, "Message", GridLogColours, "NORMAL");
|
||||
GridLogger GridLogMemory (1, "Memory", GridLogColours, "NORMAL");
|
||||
GridLogger GridLogDebug (1, "Debug", GridLogColours, "PURPLE");
|
||||
GridLogger GridLogPerformance(1, "Performance", GridLogColours, "GREEN");
|
||||
GridLogger GridLogDslash (1, "Dslash", GridLogColours, "BLUE");
|
||||
GridLogger GridLogIterative (1, "Iterative", GridLogColours, "BLUE");
|
||||
GridLogger GridLogIntegrator (1, "Integrator", GridLogColours, "BLUE");
|
||||
GridLogger GridLogHMC (1, "HMC", GridLogColours, "BLUE");
|
||||
@ -80,6 +81,7 @@ void GridLogConfigure(std::vector<std::string> &logstreams) {
|
||||
GridLogIterative.Active(0);
|
||||
GridLogDebug.Active(0);
|
||||
GridLogPerformance.Active(0);
|
||||
GridLogDslash.Active(0);
|
||||
GridLogIntegrator.Active(1);
|
||||
GridLogColours.Active(0);
|
||||
GridLogHMC.Active(1);
|
||||
@ -91,6 +93,7 @@ void GridLogConfigure(std::vector<std::string> &logstreams) {
|
||||
if (logstreams[i] == std::string("Iterative")) GridLogIterative.Active(1);
|
||||
if (logstreams[i] == std::string("Debug")) GridLogDebug.Active(1);
|
||||
if (logstreams[i] == std::string("Performance")) GridLogPerformance.Active(1);
|
||||
if (logstreams[i] == std::string("Dslash")) GridLogDslash.Active(1);
|
||||
if (logstreams[i] == std::string("NoIntegrator")) GridLogIntegrator.Active(0);
|
||||
if (logstreams[i] == std::string("NoHMC")) GridLogHMC.Active(0);
|
||||
if (logstreams[i] == std::string("Colours")) GridLogColours.Active(1);
|
||||
|
@ -138,7 +138,8 @@ public:
|
||||
stream << std::setw(log.topWidth);
|
||||
}
|
||||
stream << log.topName << log.background()<< " : ";
|
||||
stream << log.colour() << std::left;
|
||||
// stream << log.colour() << std::left;
|
||||
stream << std::left;
|
||||
if (log.chanWidth > 0)
|
||||
{
|
||||
stream << std::setw(log.chanWidth);
|
||||
@ -153,9 +154,9 @@ public:
|
||||
stream << log.evidence()
|
||||
<< now << log.background() << " : " ;
|
||||
}
|
||||
stream << log.colour();
|
||||
// stream << log.colour();
|
||||
stream << std::right;
|
||||
stream.flags(f);
|
||||
|
||||
return stream;
|
||||
} else {
|
||||
return devnull;
|
||||
@ -180,6 +181,7 @@ extern GridLogger GridLogWarning;
|
||||
extern GridLogger GridLogMessage;
|
||||
extern GridLogger GridLogDebug ;
|
||||
extern GridLogger GridLogPerformance;
|
||||
extern GridLogger GridLogDslash;
|
||||
extern GridLogger GridLogIterative ;
|
||||
extern GridLogger GridLogIntegrator ;
|
||||
extern GridLogger GridLogHMC;
|
||||
|
@ -27,10 +27,13 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
||||
/* END LEGAL */
|
||||
|
||||
#include <Grid/GridCore.h>
|
||||
#include <Grid/perfmon/PerfCount.h>
|
||||
|
||||
#include <Grid/perfmon/Timer.h>
|
||||
#include <Grid/perfmon/PerfCount.h>
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
GridTimePoint theProgramStart = GridClock::now();
|
||||
|
||||
#define CacheControl(L,O,R) ((PERF_COUNT_HW_CACHE_##L)|(PERF_COUNT_HW_CACHE_OP_##O<<8)| (PERF_COUNT_HW_CACHE_RESULT_##R<<16))
|
||||
#define RawConfig(A,B) (A<<8|B)
|
||||
const PerformanceCounter::PerformanceCounterConfig PerformanceCounter::PerformanceCounterConfigs [] = {
|
||||
|
@ -35,17 +35,8 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
NAMESPACE_BEGIN(Grid)
|
||||
|
||||
// Dress the output; use std::chrono
|
||||
// C++11 time facilities better?
|
||||
inline double usecond(void) {
|
||||
struct timeval tv;
|
||||
tv.tv_sec = 0;
|
||||
tv.tv_usec = 0;
|
||||
gettimeofday(&tv,NULL);
|
||||
return 1.0*tv.tv_usec + 1.0e6*tv.tv_sec;
|
||||
}
|
||||
|
||||
typedef std::chrono::system_clock GridClock;
|
||||
//typedef std::chrono::system_clock GridClock;
|
||||
typedef std::chrono::high_resolution_clock GridClock;
|
||||
typedef std::chrono::time_point<GridClock> GridTimePoint;
|
||||
|
||||
typedef std::chrono::seconds GridSecs;
|
||||
@ -53,6 +44,15 @@ typedef std::chrono::milliseconds GridMillisecs;
|
||||
typedef std::chrono::microseconds GridUsecs;
|
||||
typedef std::chrono::microseconds GridTime;
|
||||
|
||||
extern GridTimePoint theProgramStart;
|
||||
// Dress the output; use std::chrono
|
||||
// C++11 time facilities better?
|
||||
inline double usecond(void) {
|
||||
auto usecs = std::chrono::duration_cast<GridUsecs>(GridClock::now()-theProgramStart);
|
||||
return 1.0*usecs.count();
|
||||
}
|
||||
|
||||
|
||||
inline std::ostream& operator<< (std::ostream & stream, const GridSecs & time)
|
||||
{
|
||||
stream << time.count()<<" s";
|
||||
|
66
Grid/perfmon/Tracing.h
Normal file
66
Grid/perfmon/Tracing.h
Normal file
@ -0,0 +1,66 @@
|
||||
#pragma once
|
||||
#ifdef GRID_TRACING_NVTX
|
||||
#include <nvToolsExt.h>
|
||||
class GridTracer {
|
||||
public:
|
||||
GridTracer(const char* name) {
|
||||
nvtxRangePushA(name);
|
||||
}
|
||||
~GridTracer() {
|
||||
nvtxRangePop();
|
||||
}
|
||||
};
|
||||
inline void tracePush(const char *name) { nvtxRangePushA(name); }
|
||||
inline void tracePop(const char *name) { nvtxRangePop(); }
|
||||
inline int traceStart(const char *name) { }
|
||||
inline void traceStop(int ID) { }
|
||||
#endif
|
||||
|
||||
#ifdef GRID_TRACING_ROCTX
|
||||
#include <roctracer/roctx.h>
|
||||
class GridTracer {
|
||||
public:
|
||||
GridTracer(const char* name) {
|
||||
roctxRangePushA(name);
|
||||
std::cout << "roctxRangePush "<<name<<std::endl;
|
||||
}
|
||||
~GridTracer() {
|
||||
roctxRangePop();
|
||||
std::cout << "roctxRangePop "<<std::endl;
|
||||
}
|
||||
};
|
||||
inline void tracePush(const char *name) { roctxRangePushA(name); }
|
||||
inline void tracePop(const char *name) { roctxRangePop(); }
|
||||
inline int traceStart(const char *name) { roctxRangeStart(name); }
|
||||
inline void traceStop(int ID) { roctxRangeStop(ID); }
|
||||
#endif
|
||||
|
||||
#ifdef GRID_TRACING_TIMER
|
||||
class GridTracer {
|
||||
public:
|
||||
const char *name;
|
||||
double elapsed;
|
||||
GridTracer(const char* _name) {
|
||||
name = _name;
|
||||
elapsed=-usecond();
|
||||
}
|
||||
~GridTracer() {
|
||||
elapsed+=usecond();
|
||||
std::cout << GridLogTracing << name << " took " <<elapsed<< " us" <<std::endl;
|
||||
}
|
||||
};
|
||||
inline void tracePush(const char *name) { }
|
||||
inline void tracePop(const char *name) { }
|
||||
inline int traceStart(const char *name) { return 0; }
|
||||
inline void traceStop(int ID) { }
|
||||
#endif
|
||||
|
||||
#ifdef GRID_TRACING_NONE
|
||||
#define GRID_TRACE(name)
|
||||
inline void tracePush(const char *name) { }
|
||||
inline void tracePop(const char *name) { }
|
||||
inline int traceStart(const char *name) { return 0; }
|
||||
inline void traceStop(int ID) { }
|
||||
#else
|
||||
#define GRID_TRACE(name) GridTracer uniq_name_using_macros##__COUNTER__(name);
|
||||
#endif
|
@ -42,6 +42,8 @@ public:
|
||||
bool is_smeared = false;
|
||||
RealD deriv_norm_sum;
|
||||
RealD deriv_max_sum;
|
||||
RealD Fdt_norm_sum;
|
||||
RealD Fdt_max_sum;
|
||||
int deriv_num;
|
||||
RealD deriv_us;
|
||||
RealD S_us;
|
||||
@ -50,13 +52,21 @@ public:
|
||||
deriv_us = S_us = refresh_us = 0.0;
|
||||
deriv_num=0;
|
||||
deriv_norm_sum = deriv_max_sum=0.0;
|
||||
Fdt_max_sum = Fdt_norm_sum = 0.0;
|
||||
}
|
||||
void deriv_log(RealD nrm, RealD max) { deriv_max_sum+=max; deriv_norm_sum+=nrm; deriv_num++;}
|
||||
RealD deriv_max_average(void) { return deriv_max_sum/deriv_num; };
|
||||
RealD deriv_norm_average(void) { return deriv_norm_sum/deriv_num; };
|
||||
void deriv_log(RealD nrm, RealD max,RealD Fdt_nrm,RealD Fdt_max) {
|
||||
deriv_max_sum+=max;
|
||||
deriv_norm_sum+=nrm;
|
||||
Fdt_max_sum+=Fdt_max;
|
||||
Fdt_norm_sum+=Fdt_nrm; deriv_num++;
|
||||
}
|
||||
RealD deriv_max_average(void) { return deriv_max_sum/deriv_num; };
|
||||
RealD deriv_norm_average(void) { return deriv_norm_sum/deriv_num; };
|
||||
RealD Fdt_max_average(void) { return Fdt_max_sum/deriv_num; };
|
||||
RealD Fdt_norm_average(void) { return Fdt_norm_sum/deriv_num; };
|
||||
RealD deriv_timer(void) { return deriv_us; };
|
||||
RealD S_timer(void) { return deriv_us; };
|
||||
RealD refresh_timer(void) { return deriv_us; };
|
||||
RealD S_timer(void) { return S_us; };
|
||||
RealD refresh_timer(void) { return refresh_us; };
|
||||
void deriv_timer_start(void) { deriv_us-=usecond(); }
|
||||
void deriv_timer_stop(void) { deriv_us+=usecond(); }
|
||||
void refresh_timer_start(void) { refresh_us-=usecond(); }
|
||||
@ -66,6 +76,7 @@ public:
|
||||
// Heatbath?
|
||||
virtual void refresh(const GaugeField& U, GridSerialRNG &sRNG, GridParallelRNG& pRNG) = 0; // refresh pseudofermions
|
||||
virtual RealD S(const GaugeField& U) = 0; // evaluate the action
|
||||
virtual RealD Sinitial(const GaugeField& U) { return this->S(U); } ; // if the refresh computes the action, can cache it. Alternately refreshAndAction() ?
|
||||
virtual void deriv(const GaugeField& U, GaugeField& dSdU) = 0; // evaluate the action derivative
|
||||
virtual std::string action_name() = 0; // return the action name
|
||||
virtual std::string LogParameters() = 0; // prints action parameters
|
||||
|
@ -39,7 +39,7 @@ struct GparityWilsonImplParams {
|
||||
Coordinate twists;
|
||||
//mu=Nd-1 is assumed to be the time direction and a twist value of 1 indicates antiperiodic BCs
|
||||
Coordinate dirichlet; // Blocksize of dirichlet BCs
|
||||
GparityWilsonImplParams() : twists(Nd, 0), dirichlet(Nd, 0) {};
|
||||
GparityWilsonImplParams() : twists(Nd, 0) { dirichlet.resize(0); };
|
||||
};
|
||||
|
||||
struct WilsonImplParams {
|
||||
@ -48,13 +48,13 @@ struct WilsonImplParams {
|
||||
AcceleratorVector<Real,Nd> twist_n_2pi_L;
|
||||
AcceleratorVector<Complex,Nd> boundary_phases;
|
||||
WilsonImplParams() {
|
||||
dirichlet.resize(Nd,0);
|
||||
dirichlet.resize(0);
|
||||
boundary_phases.resize(Nd, 1.0);
|
||||
twist_n_2pi_L.resize(Nd, 0.0);
|
||||
};
|
||||
WilsonImplParams(const AcceleratorVector<Complex,Nd> phi) : boundary_phases(phi), overlapCommsCompute(false) {
|
||||
twist_n_2pi_L.resize(Nd, 0.0);
|
||||
dirichlet.resize(Nd,0);
|
||||
dirichlet.resize(0);
|
||||
}
|
||||
};
|
||||
|
||||
@ -62,7 +62,7 @@ struct StaggeredImplParams {
|
||||
Coordinate dirichlet; // Blocksize of dirichlet BCs
|
||||
StaggeredImplParams()
|
||||
{
|
||||
dirichlet.resize(Nd,0);
|
||||
dirichlet.resize(0);
|
||||
};
|
||||
};
|
||||
|
||||
|
@ -183,16 +183,6 @@ public:
|
||||
GridRedBlackCartesian &FourDimRedBlackGrid,
|
||||
RealD _mass,RealD _M5,const ImplParams &p= ImplParams());
|
||||
|
||||
void CayleyReport(void);
|
||||
void CayleyZeroCounters(void);
|
||||
|
||||
double M5Dflops;
|
||||
double M5Dcalls;
|
||||
double M5Dtime;
|
||||
|
||||
double MooeeInvFlops;
|
||||
double MooeeInvCalls;
|
||||
double MooeeInvTime;
|
||||
|
||||
protected:
|
||||
virtual void SetCoefficientsZolotarev(RealD zolohi,Approx::zolotarev_data *zdata,RealD b,RealD c);
|
||||
|
@ -47,18 +47,6 @@ public:
|
||||
FermionField _tmp;
|
||||
FermionField &tmp(void) { return _tmp; }
|
||||
|
||||
////////////////////////////////////////
|
||||
// Performance monitoring
|
||||
////////////////////////////////////////
|
||||
void Report(void);
|
||||
void ZeroCounters(void);
|
||||
double DhopTotalTime;
|
||||
double DhopCalls;
|
||||
double DhopCommTime;
|
||||
double DhopComputeTime;
|
||||
double DhopComputeTime2;
|
||||
double DhopFaceTime;
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
// Implement the abstract base
|
||||
///////////////////////////////////////////////////////////////
|
||||
|
@ -52,18 +52,6 @@ public:
|
||||
FermionField _tmp;
|
||||
FermionField &tmp(void) { return _tmp; }
|
||||
|
||||
////////////////////////////////////////
|
||||
// Performance monitoring
|
||||
////////////////////////////////////////
|
||||
void Report(void);
|
||||
void ZeroCounters(void);
|
||||
double DhopTotalTime;
|
||||
double DhopCalls;
|
||||
double DhopCommTime;
|
||||
double DhopComputeTime;
|
||||
double DhopComputeTime2;
|
||||
double DhopFaceTime;
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
// Implement the abstract base
|
||||
///////////////////////////////////////////////////////////////
|
||||
|
@ -47,18 +47,6 @@ public:
|
||||
FermionField _tmp;
|
||||
FermionField &tmp(void) { return _tmp; }
|
||||
|
||||
////////////////////////////////////////
|
||||
// Performance monitoring
|
||||
////////////////////////////////////////
|
||||
void Report(void);
|
||||
void ZeroCounters(void);
|
||||
double DhopTotalTime;
|
||||
double DhopCalls;
|
||||
double DhopCommTime;
|
||||
double DhopComputeTime;
|
||||
double DhopComputeTime2;
|
||||
double DhopFaceTime;
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
// Implement the abstract base
|
||||
///////////////////////////////////////////////////////////////
|
||||
|
@ -294,11 +294,7 @@ public:
|
||||
typedef typename Base::View_type View_type;
|
||||
typedef typename Base::StencilVector StencilVector;
|
||||
|
||||
void ZeroCountersi(void) { }
|
||||
void Reporti(int calls) { }
|
||||
|
||||
// Vector<int> surface_list;
|
||||
|
||||
WilsonStencil(GridBase *grid,
|
||||
int npoints,
|
||||
int checkerboard,
|
||||
@ -306,7 +302,6 @@ public:
|
||||
const std::vector<int> &distances,Parameters p)
|
||||
: CartesianStencil<vobj,cobj,Parameters> (grid,npoints,checkerboard,directions,distances,p)
|
||||
{
|
||||
ZeroCountersi();
|
||||
// surface_list.resize(0);
|
||||
this->same_node.resize(npoints);
|
||||
};
|
||||
@ -400,7 +395,6 @@ public:
|
||||
}
|
||||
this->face_table_computed=1;
|
||||
assert(this->u_comm_offset==this->_unified_buffer_size);
|
||||
accelerator_barrier();
|
||||
}
|
||||
|
||||
};
|
||||
|
@ -74,20 +74,6 @@ public:
|
||||
FermionField _tmp;
|
||||
FermionField &tmp(void) { return _tmp; }
|
||||
|
||||
void Report(void);
|
||||
void ZeroCounters(void);
|
||||
double DhopCalls;
|
||||
double DhopCommTime;
|
||||
double DhopComputeTime;
|
||||
double DhopComputeTime2;
|
||||
double DhopFaceTime;
|
||||
double DhopTotalTime;
|
||||
|
||||
double DerivCalls;
|
||||
double DerivCommTime;
|
||||
double DerivComputeTime;
|
||||
double DerivDhopComputeTime;
|
||||
|
||||
//////////////////////////////////////////////////////////////////
|
||||
// override multiply; cut number routines if pass dagger argument
|
||||
// and also make interface more uniformly consistent
|
||||
|
@ -78,21 +78,6 @@ public:
|
||||
int Dirichlet;
|
||||
Coordinate Block;
|
||||
|
||||
/********** Deprecate timers **********/
|
||||
void Report(void);
|
||||
void ZeroCounters(void);
|
||||
double DhopCalls;
|
||||
double DhopCommTime;
|
||||
double DhopComputeTime;
|
||||
double DhopComputeTime2;
|
||||
double DhopFaceTime;
|
||||
double DhopTotalTime;
|
||||
|
||||
double DerivCalls;
|
||||
double DerivCommTime;
|
||||
double DerivComputeTime;
|
||||
double DerivDhopComputeTime;
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
// Implement the abstract base
|
||||
///////////////////////////////////////////////////////////////
|
||||
|
@ -152,58 +152,6 @@ void CayleyFermion5D<Impl>::DminusDag(const FermionField &psi, FermionField &chi
|
||||
}
|
||||
}
|
||||
|
||||
template<class Impl> void CayleyFermion5D<Impl>::CayleyReport(void)
|
||||
{
|
||||
this->Report();
|
||||
Coordinate latt = GridDefaultLatt();
|
||||
RealD volume = this->Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt[mu];
|
||||
RealD NP = this->_FourDimGrid->_Nprocessors;
|
||||
if ( M5Dcalls > 0 ) {
|
||||
std::cout << GridLogMessage << "#### M5D calls report " << std::endl;
|
||||
std::cout << GridLogMessage << "CayleyFermion5D Number of M5D Calls : " << M5Dcalls << std::endl;
|
||||
std::cout << GridLogMessage << "CayleyFermion5D ComputeTime/Calls : " << M5Dtime / M5Dcalls << " us" << std::endl;
|
||||
|
||||
// Flops = 10.0*(Nc*Ns) *Ls*vol
|
||||
RealD mflops = 10.0*(Nc*Ns)*volume*M5Dcalls/M5Dtime/2; // 2 for red black counting
|
||||
std::cout << GridLogMessage << "Average mflops/s per call : " << mflops << std::endl;
|
||||
std::cout << GridLogMessage << "Average mflops/s per call per rank : " << mflops/NP << std::endl;
|
||||
|
||||
// Bytes = sizeof(Real) * (Nc*Ns*Nreim) * Ls * vol * (read+write) (/2 for red black counting)
|
||||
// read = 2 ( psi[ss+s+1] and psi[ss+s-1] count as 1 )
|
||||
// write = 1
|
||||
RealD Gbytes = sizeof(Real) * (Nc*Ns*2) * volume * 3 /2. * 1.e-9;
|
||||
std::cout << GridLogMessage << "Average bandwidth (GB/s) : " << Gbytes/M5Dtime*M5Dcalls*1.e6 << std::endl;
|
||||
}
|
||||
|
||||
if ( MooeeInvCalls > 0 ) {
|
||||
|
||||
std::cout << GridLogMessage << "#### MooeeInv calls report " << std::endl;
|
||||
std::cout << GridLogMessage << "CayleyFermion5D Number of MooeeInv Calls : " << MooeeInvCalls << std::endl;
|
||||
std::cout << GridLogMessage << "CayleyFermion5D ComputeTime/Calls : " << MooeeInvTime / MooeeInvCalls << " us" << std::endl;
|
||||
#ifdef GRID_CUDA
|
||||
RealD mflops = ( -16.*Nc*Ns+this->Ls*(1.+18.*Nc*Ns) )*volume*MooeeInvCalls/MooeeInvTime/2; // 2 for red black counting
|
||||
std::cout << GridLogMessage << "Average mflops/s per call : " << mflops << std::endl;
|
||||
std::cout << GridLogMessage << "Average mflops/s per call per rank : " << mflops/NP << std::endl;
|
||||
#else
|
||||
// Flops = MADD * Ls *Ls *4dvol * spin/colour/complex
|
||||
RealD mflops = 2.0*24*this->Ls*volume*MooeeInvCalls/MooeeInvTime/2; // 2 for red black counting
|
||||
std::cout << GridLogMessage << "Average mflops/s per call : " << mflops << std::endl;
|
||||
std::cout << GridLogMessage << "Average mflops/s per call per rank : " << mflops/NP << std::endl;
|
||||
#endif
|
||||
}
|
||||
|
||||
}
|
||||
template<class Impl> void CayleyFermion5D<Impl>::CayleyZeroCounters(void)
|
||||
{
|
||||
this->ZeroCounters();
|
||||
M5Dflops=0;
|
||||
M5Dcalls=0;
|
||||
M5Dtime=0;
|
||||
MooeeInvFlops=0;
|
||||
MooeeInvCalls=0;
|
||||
MooeeInvTime=0;
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void CayleyFermion5D<Impl>::M5D (const FermionField &psi, FermionField &chi)
|
||||
{
|
||||
@ -646,7 +594,6 @@ void CayleyFermion5D<Impl>::ContractConservedCurrent( PropagatorField &q_in_1,
|
||||
assert(mass_plus == mass_minus);
|
||||
RealD mass = mass_plus;
|
||||
|
||||
#if (!defined(GRID_HIP))
|
||||
Gamma::Algebra Gmu [] = {
|
||||
Gamma::Algebra::GammaX,
|
||||
Gamma::Algebra::GammaY,
|
||||
@ -765,7 +712,7 @@ void CayleyFermion5D<Impl>::ContractConservedCurrent( PropagatorField &q_in_1,
|
||||
else q_out += C;
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
@ -832,7 +779,6 @@ void CayleyFermion5D<Impl>::SeqConservedCurrent(PropagatorField &q_in,
|
||||
}
|
||||
#endif
|
||||
|
||||
#if (!defined(GRID_HIP))
|
||||
int tshift = (mu == Nd-1) ? 1 : 0;
|
||||
unsigned int LLt = GridDefaultLatt()[Tp];
|
||||
////////////////////////////////////////////////
|
||||
@ -952,7 +898,6 @@ void CayleyFermion5D<Impl>::SeqConservedCurrent(PropagatorField &q_in,
|
||||
|
||||
InsertSlice(L_Q, q_out, s , 0);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
#undef Pp
|
||||
#undef Pm
|
||||
|
@ -63,9 +63,6 @@ CayleyFermion5D<Impl>::M5D(const FermionField &psi_i,
|
||||
|
||||
// 10 = 3 complex mult + 2 complex add
|
||||
// Flops = 10.0*(Nc*Ns) *Ls*vol (/2 for red black counting)
|
||||
M5Dcalls++;
|
||||
M5Dtime-=usecond();
|
||||
|
||||
uint64_t nloop = grid->oSites();
|
||||
accelerator_for(sss,nloop,Simd::Nsimd(),{
|
||||
uint64_t s = sss%Ls;
|
||||
@ -78,7 +75,6 @@ CayleyFermion5D<Impl>::M5D(const FermionField &psi_i,
|
||||
spProj5p(tmp2,psi(idx_l));
|
||||
coalescedWrite(chi[ss+s],pdiag[s]*phi(ss+s)+pupper[s]*tmp1+plower[s]*tmp2);
|
||||
});
|
||||
M5Dtime+=usecond();
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
@ -104,9 +100,6 @@ CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi_i,
|
||||
int Ls=this->Ls;
|
||||
|
||||
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
||||
M5Dcalls++;
|
||||
M5Dtime-=usecond();
|
||||
|
||||
uint64_t nloop = grid->oSites();
|
||||
accelerator_for(sss,nloop,Simd::Nsimd(),{
|
||||
uint64_t s = sss%Ls;
|
||||
@ -119,7 +112,6 @@ CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi_i,
|
||||
spProj5m(tmp2,psi(idx_l));
|
||||
coalescedWrite(chi[ss+s],pdiag[s]*phi(ss+s)+pupper[s]*tmp1+plower[s]*tmp2);
|
||||
});
|
||||
M5Dtime+=usecond();
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
@ -140,8 +132,6 @@ CayleyFermion5D<Impl>::MooeeInv (const FermionField &psi_i, FermionField &chi
|
||||
auto pleem = & leem[0];
|
||||
auto pueem = & ueem[0];
|
||||
|
||||
MooeeInvCalls++;
|
||||
MooeeInvTime-=usecond();
|
||||
uint64_t nloop = grid->oSites()/Ls;
|
||||
accelerator_for(sss,nloop,Simd::Nsimd(),{
|
||||
uint64_t ss=sss*Ls;
|
||||
@ -178,8 +168,6 @@ CayleyFermion5D<Impl>::MooeeInv (const FermionField &psi_i, FermionField &chi
|
||||
coalescedWrite(chi[ss+s],res);
|
||||
}
|
||||
});
|
||||
|
||||
MooeeInvTime+=usecond();
|
||||
|
||||
}
|
||||
|
||||
@ -202,10 +190,6 @@ CayleyFermion5D<Impl>::MooeeInvDag (const FermionField &psi_i, FermionField &chi
|
||||
|
||||
assert(psi.Checkerboard() == psi.Checkerboard());
|
||||
|
||||
MooeeInvCalls++;
|
||||
MooeeInvTime-=usecond();
|
||||
|
||||
|
||||
uint64_t nloop = grid->oSites()/Ls;
|
||||
accelerator_for(sss,nloop,Simd::Nsimd(),{
|
||||
uint64_t ss=sss*Ls;
|
||||
@ -242,7 +226,6 @@ CayleyFermion5D<Impl>::MooeeInvDag (const FermionField &psi_i, FermionField &chi
|
||||
coalescedWrite(chi[ss+s],res);
|
||||
}
|
||||
});
|
||||
MooeeInvTime+=usecond();
|
||||
|
||||
}
|
||||
|
||||
|
@ -94,10 +94,6 @@ CayleyFermion5D<Impl>::M5D(const FermionField &psi_i,
|
||||
d_p[ss] = diag[s];
|
||||
}}
|
||||
|
||||
|
||||
M5Dcalls++;
|
||||
M5Dtime-=usecond();
|
||||
|
||||
assert(Nc==3);
|
||||
|
||||
thread_loop( (int ss=0;ss<grid->oSites();ss+=LLs),{ // adds LLs
|
||||
@ -198,7 +194,6 @@ CayleyFermion5D<Impl>::M5D(const FermionField &psi_i,
|
||||
}
|
||||
#endif
|
||||
});
|
||||
M5Dtime+=usecond();
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
@ -242,8 +237,6 @@ CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi_i,
|
||||
d_p[ss] = diag[s];
|
||||
}}
|
||||
|
||||
M5Dcalls++;
|
||||
M5Dtime-=usecond();
|
||||
thread_loop( (int ss=0;ss<grid->oSites();ss+=LLs),{ // adds LLs
|
||||
#if 0
|
||||
alignas(64) SiteHalfSpinor hp;
|
||||
@ -339,7 +332,6 @@ CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi_i,
|
||||
}
|
||||
#endif
|
||||
});
|
||||
M5Dtime+=usecond();
|
||||
}
|
||||
|
||||
|
||||
@ -813,9 +805,6 @@ CayleyFermion5D<Impl>::MooeeInternal(const FermionField &psi, FermionField &chi,
|
||||
}
|
||||
assert(_Matp->size()==Ls*LLs);
|
||||
|
||||
MooeeInvCalls++;
|
||||
MooeeInvTime-=usecond();
|
||||
|
||||
if ( switcheroo<Coeff_t>::iscomplex() ) {
|
||||
thread_loop( (auto site=0;site<vol;site++),{
|
||||
MooeeInternalZAsm(psi,chi,LLs,site,*_Matp,*_Matm);
|
||||
@ -825,7 +814,7 @@ CayleyFermion5D<Impl>::MooeeInternal(const FermionField &psi, FermionField &chi,
|
||||
MooeeInternalAsm(psi,chi,LLs,site,*_Matp,*_Matm);
|
||||
});
|
||||
}
|
||||
MooeeInvTime+=usecond();
|
||||
|
||||
}
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
@ -54,8 +54,6 @@ void DomainWallEOFAFermion<Impl>::M5D(const FermionField& psi_i, const FermionFi
|
||||
auto pupper = &upper[0];
|
||||
auto plower = &lower[0];
|
||||
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
||||
this->M5Dcalls++;
|
||||
this->M5Dtime -= usecond();
|
||||
|
||||
auto nloop=grid->oSites()/Ls;
|
||||
accelerator_for(sss,nloop,Simd::Nsimd(),{
|
||||
@ -71,7 +69,6 @@ void DomainWallEOFAFermion<Impl>::M5D(const FermionField& psi_i, const FermionFi
|
||||
}
|
||||
});
|
||||
|
||||
this->M5Dtime += usecond();
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
@ -91,8 +88,6 @@ void DomainWallEOFAFermion<Impl>::M5Ddag(const FermionField& psi_i, const Fermio
|
||||
auto plower = &lower[0];
|
||||
|
||||
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
||||
this->M5Dcalls++;
|
||||
this->M5Dtime -= usecond();
|
||||
|
||||
auto nloop=grid->oSites()/Ls;
|
||||
accelerator_for(sss,nloop,Simd::Nsimd(),{
|
||||
@ -108,7 +103,6 @@ void DomainWallEOFAFermion<Impl>::M5Ddag(const FermionField& psi_i, const Fermio
|
||||
}
|
||||
});
|
||||
|
||||
this->M5Dtime += usecond();
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
@ -127,8 +121,6 @@ void DomainWallEOFAFermion<Impl>::MooeeInv(const FermionField& psi_i, FermionFie
|
||||
auto pleem = & this->leem[0];
|
||||
auto pueem = & this->ueem[0];
|
||||
|
||||
this->MooeeInvCalls++;
|
||||
this->MooeeInvTime -= usecond();
|
||||
uint64_t nloop=grid->oSites()/Ls;
|
||||
accelerator_for(sss,nloop,Simd::Nsimd(),{
|
||||
uint64_t ss=sss*Ls;
|
||||
@ -164,7 +156,6 @@ void DomainWallEOFAFermion<Impl>::MooeeInv(const FermionField& psi_i, FermionFie
|
||||
coalescedWrite(chi[ss+s],res);
|
||||
}
|
||||
});
|
||||
this->MooeeInvTime += usecond();
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
@ -185,8 +176,6 @@ void DomainWallEOFAFermion<Impl>::MooeeInvDag(const FermionField& psi_i, Fermion
|
||||
|
||||
assert(psi.Checkerboard() == psi.Checkerboard());
|
||||
|
||||
this->MooeeInvCalls++;
|
||||
this->MooeeInvTime -= usecond();
|
||||
auto nloop = grid->oSites()/Ls;
|
||||
accelerator_for(sss,nloop,Simd::Nsimd(),{
|
||||
uint64_t ss=sss*Ls;
|
||||
@ -223,7 +212,6 @@ void DomainWallEOFAFermion<Impl>::MooeeInvDag(const FermionField& psi_i, Fermion
|
||||
}
|
||||
});
|
||||
|
||||
this->MooeeInvTime += usecond();
|
||||
}
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
@ -298,45 +298,33 @@ void ImprovedStaggeredFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl &
|
||||
int LLs = in.Grid()->_rdimensions[0];
|
||||
int len = U.Grid()->oSites();
|
||||
|
||||
DhopFaceTime-=usecond();
|
||||
st.Prepare();
|
||||
st.HaloGather(in,compressor);
|
||||
DhopFaceTime+=usecond();
|
||||
|
||||
DhopCommTime -=usecond();
|
||||
std::vector<std::vector<CommsRequest_t> > requests;
|
||||
st.CommunicateBegin(requests);
|
||||
|
||||
// st.HaloExchangeOptGather(in,compressor); // Wilson compressor
|
||||
DhopFaceTime-=usecond();
|
||||
st.CommsMergeSHM(compressor);// Could do this inside parallel region overlapped with comms
|
||||
DhopFaceTime+=usecond();
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Remove explicit thread mapping introduced for OPA reasons.
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
DhopComputeTime-=usecond();
|
||||
{
|
||||
int interior=1;
|
||||
int exterior=0;
|
||||
Kernels::DhopImproved(st,lo,U,UUU,in,out,dag,interior,exterior);
|
||||
}
|
||||
DhopComputeTime+=usecond();
|
||||
|
||||
DhopFaceTime-=usecond();
|
||||
st.CommsMerge(compressor);
|
||||
DhopFaceTime+=usecond();
|
||||
|
||||
st.CommunicateComplete(requests);
|
||||
DhopCommTime +=usecond();
|
||||
|
||||
DhopComputeTime2-=usecond();
|
||||
{
|
||||
int interior=0;
|
||||
int exterior=1;
|
||||
Kernels::DhopImproved(st,lo,U,UUU,in,out,dag,interior,exterior);
|
||||
}
|
||||
DhopComputeTime2+=usecond();
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
@ -347,22 +335,14 @@ void ImprovedStaggeredFermion5D<Impl>::DhopInternalSerialComms(StencilImpl & st,
|
||||
Compressor compressor;
|
||||
int LLs = in.Grid()->_rdimensions[0];
|
||||
|
||||
//double t1=usecond();
|
||||
DhopTotalTime -= usecond();
|
||||
DhopCommTime -= usecond();
|
||||
st.HaloExchange(in,compressor);
|
||||
DhopCommTime += usecond();
|
||||
|
||||
DhopComputeTime -= usecond();
|
||||
// Dhop takes the 4d grid from U, and makes a 5d index for fermion
|
||||
{
|
||||
int interior=1;
|
||||
int exterior=1;
|
||||
Kernels::DhopImproved(st,lo,U,UUU,in,out,dag,interior,exterior);
|
||||
}
|
||||
DhopComputeTime += usecond();
|
||||
DhopTotalTime += usecond();
|
||||
|
||||
}
|
||||
/*CHANGE END*/
|
||||
|
||||
@ -371,7 +351,6 @@ void ImprovedStaggeredFermion5D<Impl>::DhopInternalSerialComms(StencilImpl & st,
|
||||
template<class Impl>
|
||||
void ImprovedStaggeredFermion5D<Impl>::DhopOE(const FermionField &in, FermionField &out,int dag)
|
||||
{
|
||||
DhopCalls+=1;
|
||||
conformable(in.Grid(),FermionRedBlackGrid()); // verifies half grid
|
||||
conformable(in.Grid(),out.Grid()); // drops the cb check
|
||||
|
||||
@ -383,7 +362,6 @@ void ImprovedStaggeredFermion5D<Impl>::DhopOE(const FermionField &in, FermionFie
|
||||
template<class Impl>
|
||||
void ImprovedStaggeredFermion5D<Impl>::DhopEO(const FermionField &in, FermionField &out,int dag)
|
||||
{
|
||||
DhopCalls+=1;
|
||||
conformable(in.Grid(),FermionRedBlackGrid()); // verifies half grid
|
||||
conformable(in.Grid(),out.Grid()); // drops the cb check
|
||||
|
||||
@ -395,7 +373,6 @@ void ImprovedStaggeredFermion5D<Impl>::DhopEO(const FermionField &in, FermionFie
|
||||
template<class Impl>
|
||||
void ImprovedStaggeredFermion5D<Impl>::Dhop(const FermionField &in, FermionField &out,int dag)
|
||||
{
|
||||
DhopCalls+=2;
|
||||
conformable(in.Grid(),FermionGrid()); // verifies full grid
|
||||
conformable(in.Grid(),out.Grid());
|
||||
|
||||
@ -404,58 +381,6 @@ void ImprovedStaggeredFermion5D<Impl>::Dhop(const FermionField &in, FermionField
|
||||
DhopInternal(Stencil,Lebesgue,Umu,UUUmu,in,out,dag);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void ImprovedStaggeredFermion5D<Impl>::Report(void)
|
||||
{
|
||||
Coordinate latt = GridDefaultLatt();
|
||||
RealD volume = Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt[mu];
|
||||
RealD NP = _FourDimGrid->_Nprocessors;
|
||||
RealD NN = _FourDimGrid->NodeCount();
|
||||
|
||||
std::cout << GridLogMessage << "#### Dhop calls report " << std::endl;
|
||||
|
||||
std::cout << GridLogMessage << "ImprovedStaggeredFermion5D Number of DhopEO Calls : "
|
||||
<< DhopCalls << std::endl;
|
||||
std::cout << GridLogMessage << "ImprovedStaggeredFermion5D TotalTime /Calls : "
|
||||
<< DhopTotalTime / DhopCalls << " us" << std::endl;
|
||||
std::cout << GridLogMessage << "ImprovedStaggeredFermion5D CommTime /Calls : "
|
||||
<< DhopCommTime / DhopCalls << " us" << std::endl;
|
||||
std::cout << GridLogMessage << "ImprovedStaggeredFermion5D ComputeTime/Calls : "
|
||||
<< DhopComputeTime / DhopCalls << " us" << std::endl;
|
||||
|
||||
// Average the compute time
|
||||
_FourDimGrid->GlobalSum(DhopComputeTime);
|
||||
DhopComputeTime/=NP;
|
||||
|
||||
RealD mflops = 1154*volume*DhopCalls/DhopComputeTime/2; // 2 for red black counting
|
||||
std::cout << GridLogMessage << "Average mflops/s per call : " << mflops << std::endl;
|
||||
std::cout << GridLogMessage << "Average mflops/s per call per rank : " << mflops/NP << std::endl;
|
||||
std::cout << GridLogMessage << "Average mflops/s per call per node : " << mflops/NN << std::endl;
|
||||
|
||||
RealD Fullmflops = 1154*volume*DhopCalls/(DhopTotalTime)/2; // 2 for red black counting
|
||||
std::cout << GridLogMessage << "Average mflops/s per call (full) : " << Fullmflops << std::endl;
|
||||
std::cout << GridLogMessage << "Average mflops/s per call per rank (full): " << Fullmflops/NP << std::endl;
|
||||
std::cout << GridLogMessage << "Average mflops/s per call per node (full): " << Fullmflops/NN << std::endl;
|
||||
|
||||
std::cout << GridLogMessage << "ImprovedStaggeredFermion5D Stencil" <<std::endl; Stencil.Report();
|
||||
std::cout << GridLogMessage << "ImprovedStaggeredFermion5D StencilEven"<<std::endl; StencilEven.Report();
|
||||
std::cout << GridLogMessage << "ImprovedStaggeredFermion5D StencilOdd" <<std::endl; StencilOdd.Report();
|
||||
}
|
||||
template<class Impl>
|
||||
void ImprovedStaggeredFermion5D<Impl>::ZeroCounters(void)
|
||||
{
|
||||
DhopCalls = 0;
|
||||
DhopTotalTime = 0;
|
||||
DhopCommTime = 0;
|
||||
DhopComputeTime = 0;
|
||||
DhopFaceTime = 0;
|
||||
|
||||
|
||||
Stencil.ZeroCounters();
|
||||
StencilEven.ZeroCounters();
|
||||
StencilOdd.ZeroCounters();
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
// Implement the general interface. Here we use SAME mass on all slices
|
||||
/////////////////////////////////////////////////////////////////////////
|
||||
|
@ -334,7 +334,6 @@ void ImprovedStaggeredFermion<Impl>::DhopDerivEO(GaugeField &mat, const FermionF
|
||||
template <class Impl>
|
||||
void ImprovedStaggeredFermion<Impl>::Dhop(const FermionField &in, FermionField &out, int dag)
|
||||
{
|
||||
DhopCalls+=2;
|
||||
conformable(in.Grid(), _grid); // verifies full grid
|
||||
conformable(in.Grid(), out.Grid());
|
||||
|
||||
@ -346,7 +345,6 @@ void ImprovedStaggeredFermion<Impl>::Dhop(const FermionField &in, FermionField &
|
||||
template <class Impl>
|
||||
void ImprovedStaggeredFermion<Impl>::DhopOE(const FermionField &in, FermionField &out, int dag)
|
||||
{
|
||||
DhopCalls+=1;
|
||||
conformable(in.Grid(), _cbgrid); // verifies half grid
|
||||
conformable(in.Grid(), out.Grid()); // drops the cb check
|
||||
|
||||
@ -359,7 +357,6 @@ void ImprovedStaggeredFermion<Impl>::DhopOE(const FermionField &in, FermionField
|
||||
template <class Impl>
|
||||
void ImprovedStaggeredFermion<Impl>::DhopEO(const FermionField &in, FermionField &out, int dag)
|
||||
{
|
||||
DhopCalls+=1;
|
||||
conformable(in.Grid(), _cbgrid); // verifies half grid
|
||||
conformable(in.Grid(), out.Grid()); // drops the cb check
|
||||
|
||||
@ -418,47 +415,33 @@ void ImprovedStaggeredFermion<Impl>::DhopInternalOverlappedComms(StencilImpl &st
|
||||
Compressor compressor;
|
||||
int len = U.Grid()->oSites();
|
||||
|
||||
DhopTotalTime -= usecond();
|
||||
|
||||
DhopFaceTime -= usecond();
|
||||
st.Prepare();
|
||||
st.HaloGather(in,compressor);
|
||||
DhopFaceTime += usecond();
|
||||
|
||||
DhopCommTime -=usecond();
|
||||
std::vector<std::vector<CommsRequest_t> > requests;
|
||||
st.CommunicateBegin(requests);
|
||||
|
||||
DhopFaceTime-=usecond();
|
||||
st.CommsMergeSHM(compressor);
|
||||
DhopFaceTime+= usecond();
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Removed explicit thread comms
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
DhopComputeTime -= usecond();
|
||||
{
|
||||
int interior=1;
|
||||
int exterior=0;
|
||||
Kernels::DhopImproved(st,lo,U,UUU,in,out,dag,interior,exterior);
|
||||
}
|
||||
DhopComputeTime += usecond();
|
||||
|
||||
st.CommunicateComplete(requests);
|
||||
DhopCommTime +=usecond();
|
||||
|
||||
// First to enter, last to leave timing
|
||||
DhopFaceTime -= usecond();
|
||||
st.CommsMerge(compressor);
|
||||
DhopFaceTime -= usecond();
|
||||
|
||||
DhopComputeTime2 -= usecond();
|
||||
{
|
||||
int interior=0;
|
||||
int exterior=1;
|
||||
Kernels::DhopImproved(st,lo,U,UUU,in,out,dag,interior,exterior);
|
||||
}
|
||||
DhopComputeTime2 += usecond();
|
||||
}
|
||||
|
||||
|
||||
@ -471,78 +454,16 @@ void ImprovedStaggeredFermion<Impl>::DhopInternalSerialComms(StencilImpl &st, Le
|
||||
{
|
||||
assert((dag == DaggerNo) || (dag == DaggerYes));
|
||||
|
||||
DhopTotalTime -= usecond();
|
||||
|
||||
DhopCommTime -= usecond();
|
||||
Compressor compressor;
|
||||
st.HaloExchange(in, compressor);
|
||||
DhopCommTime += usecond();
|
||||
|
||||
DhopComputeTime -= usecond();
|
||||
{
|
||||
int interior=1;
|
||||
int exterior=1;
|
||||
Kernels::DhopImproved(st,lo,U,UUU,in,out,dag,interior,exterior);
|
||||
}
|
||||
DhopComputeTime += usecond();
|
||||
DhopTotalTime += usecond();
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Reporting
|
||||
////////////////////////////////////////////////////////////////
|
||||
template<class Impl>
|
||||
void ImprovedStaggeredFermion<Impl>::Report(void)
|
||||
{
|
||||
Coordinate latt = _grid->GlobalDimensions();
|
||||
RealD volume = 1; for(int mu=0;mu<Nd;mu++) volume=volume*latt[mu];
|
||||
RealD NP = _grid->_Nprocessors;
|
||||
RealD NN = _grid->NodeCount();
|
||||
|
||||
std::cout << GridLogMessage << "#### Dhop calls report " << std::endl;
|
||||
|
||||
std::cout << GridLogMessage << "ImprovedStaggeredFermion Number of DhopEO Calls : "
|
||||
<< DhopCalls << std::endl;
|
||||
std::cout << GridLogMessage << "ImprovedStaggeredFermion TotalTime /Calls : "
|
||||
<< DhopTotalTime / DhopCalls << " us" << std::endl;
|
||||
std::cout << GridLogMessage << "ImprovedStaggeredFermion CommTime /Calls : "
|
||||
<< DhopCommTime / DhopCalls << " us" << std::endl;
|
||||
std::cout << GridLogMessage << "ImprovedStaggeredFermion ComputeTime/Calls : "
|
||||
<< DhopComputeTime / DhopCalls << " us" << std::endl;
|
||||
|
||||
// Average the compute time
|
||||
_grid->GlobalSum(DhopComputeTime);
|
||||
DhopComputeTime/=NP;
|
||||
|
||||
RealD mflops = 1154*volume*DhopCalls/DhopComputeTime/2; // 2 for red black counting
|
||||
std::cout << GridLogMessage << "Average mflops/s per call : " << mflops << std::endl;
|
||||
std::cout << GridLogMessage << "Average mflops/s per call per rank : " << mflops/NP << std::endl;
|
||||
std::cout << GridLogMessage << "Average mflops/s per call per node : " << mflops/NN << std::endl;
|
||||
|
||||
RealD Fullmflops = 1154*volume*DhopCalls/(DhopTotalTime)/2; // 2 for red black counting
|
||||
std::cout << GridLogMessage << "Average mflops/s per call (full) : " << Fullmflops << std::endl;
|
||||
std::cout << GridLogMessage << "Average mflops/s per call per rank (full): " << Fullmflops/NP << std::endl;
|
||||
std::cout << GridLogMessage << "Average mflops/s per call per node (full): " << Fullmflops/NN << std::endl;
|
||||
|
||||
std::cout << GridLogMessage << "ImprovedStaggeredFermion Stencil" <<std::endl; Stencil.Report();
|
||||
std::cout << GridLogMessage << "ImprovedStaggeredFermion StencilEven"<<std::endl; StencilEven.Report();
|
||||
std::cout << GridLogMessage << "ImprovedStaggeredFermion StencilOdd" <<std::endl; StencilOdd.Report();
|
||||
}
|
||||
template<class Impl>
|
||||
void ImprovedStaggeredFermion<Impl>::ZeroCounters(void)
|
||||
{
|
||||
DhopCalls = 0;
|
||||
DhopTotalTime = 0;
|
||||
DhopCommTime = 0;
|
||||
DhopComputeTime = 0;
|
||||
DhopFaceTime = 0;
|
||||
|
||||
Stencil.ZeroCounters();
|
||||
StencilEven.ZeroCounters();
|
||||
StencilOdd.ZeroCounters();
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////
|
||||
// Conserved current - not yet implemented.
|
||||
////////////////////////////////////////////////////////
|
||||
|
@ -55,9 +55,6 @@ void MobiusEOFAFermion<Impl>::M5D(const FermionField &psi_i, const FermionField
|
||||
auto plower = &lower[0];
|
||||
|
||||
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
||||
this->M5Dcalls++;
|
||||
this->M5Dtime -= usecond();
|
||||
|
||||
int nloop = grid->oSites()/Ls;
|
||||
accelerator_for(sss,nloop,Simd::Nsimd(),{
|
||||
uint64_t ss = sss*Ls;
|
||||
@ -73,7 +70,6 @@ void MobiusEOFAFermion<Impl>::M5D(const FermionField &psi_i, const FermionField
|
||||
}
|
||||
});
|
||||
|
||||
this->M5Dtime += usecond();
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
@ -99,9 +95,6 @@ void MobiusEOFAFermion<Impl>::M5D_shift(const FermionField &psi_i, const Fermion
|
||||
auto pshift_coeffs = &shift_coeffs[0];
|
||||
|
||||
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
||||
this->M5Dcalls++;
|
||||
this->M5Dtime -= usecond();
|
||||
|
||||
int nloop = grid->oSites()/Ls;
|
||||
accelerator_for(sss,nloop,Simd::Nsimd(),{
|
||||
uint64_t ss = sss*Ls;
|
||||
@ -122,7 +115,6 @@ void MobiusEOFAFermion<Impl>::M5D_shift(const FermionField &psi_i, const Fermion
|
||||
}
|
||||
});
|
||||
|
||||
this->M5Dtime += usecond();
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
@ -143,9 +135,6 @@ void MobiusEOFAFermion<Impl>::M5Ddag(const FermionField &psi_i, const FermionFie
|
||||
auto plower = &lower[0];
|
||||
|
||||
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
||||
this->M5Dcalls++;
|
||||
this->M5Dtime -= usecond();
|
||||
|
||||
int nloop = grid->oSites()/Ls;
|
||||
accelerator_for(sss,nloop,Simd::Nsimd(), {
|
||||
uint64_t ss = sss*Ls;
|
||||
@ -161,8 +150,6 @@ void MobiusEOFAFermion<Impl>::M5Ddag(const FermionField &psi_i, const FermionFie
|
||||
coalescedWrite(chi[ss+s], pdiag[s]*phi(ss+s) + pupper[s]*tmp1 + plower[s]*tmp2);
|
||||
}
|
||||
});
|
||||
|
||||
this->M5Dtime += usecond();
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
@ -186,9 +173,6 @@ void MobiusEOFAFermion<Impl>::M5Ddag_shift(const FermionField &psi_i, const Ferm
|
||||
auto pshift_coeffs = &shift_coeffs[0];
|
||||
|
||||
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
||||
this->M5Dcalls++;
|
||||
this->M5Dtime -= usecond();
|
||||
|
||||
auto pm = this->pm;
|
||||
|
||||
int nloop = grid->oSites()/Ls;
|
||||
@ -217,7 +201,6 @@ void MobiusEOFAFermion<Impl>::M5Ddag_shift(const FermionField &psi_i, const Ferm
|
||||
}
|
||||
});
|
||||
|
||||
this->M5Dtime += usecond();
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
@ -237,9 +220,6 @@ void MobiusEOFAFermion<Impl>::MooeeInv(const FermionField &psi_i, FermionField &
|
||||
|
||||
if(this->shift != 0.0){ MooeeInv_shift(psi_i,chi_i); return; }
|
||||
|
||||
this->MooeeInvCalls++;
|
||||
this->MooeeInvTime -= usecond();
|
||||
|
||||
int nloop = grid->oSites()/Ls;
|
||||
accelerator_for(sss,nloop,Simd::Nsimd(),{
|
||||
uint64_t ss=sss*Ls;
|
||||
@ -277,7 +257,6 @@ void MobiusEOFAFermion<Impl>::MooeeInv(const FermionField &psi_i, FermionField &
|
||||
}
|
||||
});
|
||||
|
||||
this->MooeeInvTime += usecond();
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
@ -297,8 +276,6 @@ void MobiusEOFAFermion<Impl>::MooeeInv_shift(const FermionField &psi_i, FermionF
|
||||
auto pueem= & this->ueem[0];
|
||||
auto pMooeeInv_shift_lc = &MooeeInv_shift_lc[0];
|
||||
auto pMooeeInv_shift_norm = &MooeeInv_shift_norm[0];
|
||||
this->MooeeInvCalls++;
|
||||
this->MooeeInvTime -= usecond();
|
||||
|
||||
int nloop = grid->oSites()/Ls;
|
||||
accelerator_for(sss,nloop,Simd::Nsimd(),{
|
||||
@ -343,7 +320,6 @@ void MobiusEOFAFermion<Impl>::MooeeInv_shift(const FermionField &psi_i, FermionF
|
||||
}
|
||||
});
|
||||
|
||||
this->MooeeInvTime += usecond();
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
@ -363,9 +339,6 @@ void MobiusEOFAFermion<Impl>::MooeeInvDag(const FermionField &psi_i, FermionFiel
|
||||
auto pleem= & this->leem[0];
|
||||
auto pueem= & this->ueem[0];
|
||||
|
||||
this->MooeeInvCalls++;
|
||||
this->MooeeInvTime -= usecond();
|
||||
|
||||
int nloop = grid->oSites()/Ls;
|
||||
accelerator_for(sss,nloop,Simd::Nsimd(),{
|
||||
uint64_t ss=sss*Ls;
|
||||
@ -402,7 +375,6 @@ void MobiusEOFAFermion<Impl>::MooeeInvDag(const FermionField &psi_i, FermionFiel
|
||||
coalescedWrite(chi[ss+s],res);
|
||||
}
|
||||
});
|
||||
this->MooeeInvTime += usecond();
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
@ -423,9 +395,6 @@ void MobiusEOFAFermion<Impl>::MooeeInvDag_shift(const FermionField &psi_i, Fermi
|
||||
auto pMooeeInvDag_shift_lc = &MooeeInvDag_shift_lc[0];
|
||||
auto pMooeeInvDag_shift_norm = &MooeeInvDag_shift_norm[0];
|
||||
|
||||
this->MooeeInvCalls++;
|
||||
this->MooeeInvTime -= usecond();
|
||||
|
||||
int nloop = grid->oSites()/Ls;
|
||||
accelerator_for(sss,nloop,Simd::Nsimd(),{
|
||||
uint64_t ss=sss*Ls;
|
||||
@ -469,7 +438,6 @@ void MobiusEOFAFermion<Impl>::MooeeInvDag_shift(const FermionField &psi_i, Fermi
|
||||
}
|
||||
});
|
||||
|
||||
this->MooeeInvTime += usecond();
|
||||
}
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
@ -263,7 +263,6 @@ void NaiveStaggeredFermion<Impl>::DhopDerivEO(GaugeField &mat, const FermionFiel
|
||||
template <class Impl>
|
||||
void NaiveStaggeredFermion<Impl>::Dhop(const FermionField &in, FermionField &out, int dag)
|
||||
{
|
||||
DhopCalls+=2;
|
||||
conformable(in.Grid(), _grid); // verifies full grid
|
||||
conformable(in.Grid(), out.Grid());
|
||||
|
||||
@ -275,7 +274,6 @@ void NaiveStaggeredFermion<Impl>::Dhop(const FermionField &in, FermionField &out
|
||||
template <class Impl>
|
||||
void NaiveStaggeredFermion<Impl>::DhopOE(const FermionField &in, FermionField &out, int dag)
|
||||
{
|
||||
DhopCalls+=1;
|
||||
conformable(in.Grid(), _cbgrid); // verifies half grid
|
||||
conformable(in.Grid(), out.Grid()); // drops the cb check
|
||||
|
||||
@ -288,7 +286,6 @@ void NaiveStaggeredFermion<Impl>::DhopOE(const FermionField &in, FermionField &o
|
||||
template <class Impl>
|
||||
void NaiveStaggeredFermion<Impl>::DhopEO(const FermionField &in, FermionField &out, int dag)
|
||||
{
|
||||
DhopCalls+=1;
|
||||
conformable(in.Grid(), _cbgrid); // verifies half grid
|
||||
conformable(in.Grid(), out.Grid()); // drops the cb check
|
||||
|
||||
@ -345,47 +342,33 @@ void NaiveStaggeredFermion<Impl>::DhopInternalOverlappedComms(StencilImpl &st, L
|
||||
Compressor compressor;
|
||||
int len = U.Grid()->oSites();
|
||||
|
||||
DhopTotalTime -= usecond();
|
||||
|
||||
DhopFaceTime -= usecond();
|
||||
st.Prepare();
|
||||
st.HaloGather(in,compressor);
|
||||
DhopFaceTime += usecond();
|
||||
|
||||
DhopCommTime -=usecond();
|
||||
std::vector<std::vector<CommsRequest_t> > requests;
|
||||
st.CommunicateBegin(requests);
|
||||
|
||||
DhopFaceTime-=usecond();
|
||||
st.CommsMergeSHM(compressor);
|
||||
DhopFaceTime+= usecond();
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Removed explicit thread comms
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
DhopComputeTime -= usecond();
|
||||
{
|
||||
int interior=1;
|
||||
int exterior=0;
|
||||
Kernels::DhopNaive(st,lo,U,in,out,dag,interior,exterior);
|
||||
}
|
||||
DhopComputeTime += usecond();
|
||||
|
||||
st.CommunicateComplete(requests);
|
||||
DhopCommTime +=usecond();
|
||||
|
||||
// First to enter, last to leave timing
|
||||
DhopFaceTime -= usecond();
|
||||
st.CommsMerge(compressor);
|
||||
DhopFaceTime -= usecond();
|
||||
|
||||
DhopComputeTime2 -= usecond();
|
||||
{
|
||||
int interior=0;
|
||||
int exterior=1;
|
||||
Kernels::DhopNaive(st,lo,U,in,out,dag,interior,exterior);
|
||||
}
|
||||
DhopComputeTime2 += usecond();
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
@ -396,78 +379,16 @@ void NaiveStaggeredFermion<Impl>::DhopInternalSerialComms(StencilImpl &st, Lebes
|
||||
{
|
||||
assert((dag == DaggerNo) || (dag == DaggerYes));
|
||||
|
||||
DhopTotalTime -= usecond();
|
||||
|
||||
DhopCommTime -= usecond();
|
||||
Compressor compressor;
|
||||
st.HaloExchange(in, compressor);
|
||||
DhopCommTime += usecond();
|
||||
|
||||
DhopComputeTime -= usecond();
|
||||
{
|
||||
int interior=1;
|
||||
int exterior=1;
|
||||
Kernels::DhopNaive(st,lo,U,in,out,dag,interior,exterior);
|
||||
}
|
||||
DhopComputeTime += usecond();
|
||||
DhopTotalTime += usecond();
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Reporting
|
||||
////////////////////////////////////////////////////////////////
|
||||
template<class Impl>
|
||||
void NaiveStaggeredFermion<Impl>::Report(void)
|
||||
{
|
||||
Coordinate latt = _grid->GlobalDimensions();
|
||||
RealD volume = 1; for(int mu=0;mu<Nd;mu++) volume=volume*latt[mu];
|
||||
RealD NP = _grid->_Nprocessors;
|
||||
RealD NN = _grid->NodeCount();
|
||||
|
||||
std::cout << GridLogMessage << "#### Dhop calls report " << std::endl;
|
||||
|
||||
std::cout << GridLogMessage << "NaiveStaggeredFermion Number of DhopEO Calls : "
|
||||
<< DhopCalls << std::endl;
|
||||
std::cout << GridLogMessage << "NaiveStaggeredFermion TotalTime /Calls : "
|
||||
<< DhopTotalTime / DhopCalls << " us" << std::endl;
|
||||
std::cout << GridLogMessage << "NaiveStaggeredFermion CommTime /Calls : "
|
||||
<< DhopCommTime / DhopCalls << " us" << std::endl;
|
||||
std::cout << GridLogMessage << "NaiveStaggeredFermion ComputeTime/Calls : "
|
||||
<< DhopComputeTime / DhopCalls << " us" << std::endl;
|
||||
|
||||
// Average the compute time
|
||||
_grid->GlobalSum(DhopComputeTime);
|
||||
DhopComputeTime/=NP;
|
||||
|
||||
RealD mflops = 1154*volume*DhopCalls/DhopComputeTime/2; // 2 for red black counting
|
||||
std::cout << GridLogMessage << "Average mflops/s per call : " << mflops << std::endl;
|
||||
std::cout << GridLogMessage << "Average mflops/s per call per rank : " << mflops/NP << std::endl;
|
||||
std::cout << GridLogMessage << "Average mflops/s per call per node : " << mflops/NN << std::endl;
|
||||
|
||||
RealD Fullmflops = 1154*volume*DhopCalls/(DhopTotalTime)/2; // 2 for red black counting
|
||||
std::cout << GridLogMessage << "Average mflops/s per call (full) : " << Fullmflops << std::endl;
|
||||
std::cout << GridLogMessage << "Average mflops/s per call per rank (full): " << Fullmflops/NP << std::endl;
|
||||
std::cout << GridLogMessage << "Average mflops/s per call per node (full): " << Fullmflops/NN << std::endl;
|
||||
|
||||
std::cout << GridLogMessage << "NaiveStaggeredFermion Stencil" <<std::endl; Stencil.Report();
|
||||
std::cout << GridLogMessage << "NaiveStaggeredFermion StencilEven"<<std::endl; StencilEven.Report();
|
||||
std::cout << GridLogMessage << "NaiveStaggeredFermion StencilOdd" <<std::endl; StencilOdd.Report();
|
||||
}
|
||||
template<class Impl>
|
||||
void NaiveStaggeredFermion<Impl>::ZeroCounters(void)
|
||||
{
|
||||
DhopCalls = 0;
|
||||
DhopTotalTime = 0;
|
||||
DhopCommTime = 0;
|
||||
DhopComputeTime = 0;
|
||||
DhopFaceTime = 0;
|
||||
|
||||
Stencil.ZeroCounters();
|
||||
StencilEven.ZeroCounters();
|
||||
StencilOdd.ZeroCounters();
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////
|
||||
// Conserved current - not yet implemented.
|
||||
////////////////////////////////////////////////////////
|
||||
|
@ -103,8 +103,6 @@ WilsonFermion5D<Impl>::WilsonFermion5D(GaugeField &_Umu,
|
||||
Block = block;
|
||||
}
|
||||
|
||||
ZeroCounters();
|
||||
|
||||
if (Impl::LsVectorised) {
|
||||
|
||||
int nsimd = Simd::Nsimd();
|
||||
@ -143,89 +141,6 @@ WilsonFermion5D<Impl>::WilsonFermion5D(GaugeField &_Umu,
|
||||
// <<" " << StencilEven.surface_list.size()<<std::endl;
|
||||
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void WilsonFermion5D<Impl>::Report(void)
|
||||
{
|
||||
RealD NP = _FourDimGrid->_Nprocessors;
|
||||
RealD NN = _FourDimGrid->NodeCount();
|
||||
RealD volume = Ls;
|
||||
Coordinate latt = _FourDimGrid->GlobalDimensions();
|
||||
for(int mu=0;mu<Nd;mu++) volume=volume*latt[mu];
|
||||
|
||||
if ( DhopCalls > 0 ) {
|
||||
std::cout << GridLogMessage << "#### Dhop calls report " << std::endl;
|
||||
std::cout << GridLogMessage << "WilsonFermion5D Number of DhopEO Calls : " << DhopCalls << std::endl;
|
||||
std::cout << GridLogMessage << "WilsonFermion5D TotalTime /Calls : " << DhopTotalTime / DhopCalls << " us" << std::endl;
|
||||
std::cout << GridLogMessage << "WilsonFermion5D CommTime /Calls : " << DhopCommTime / DhopCalls << " us" << std::endl;
|
||||
std::cout << GridLogMessage << "WilsonFermion5D FaceTime /Calls : " << DhopFaceTime / DhopCalls << " us" << std::endl;
|
||||
std::cout << GridLogMessage << "WilsonFermion5D ComputeTime1/Calls : " << DhopComputeTime / DhopCalls << " us" << std::endl;
|
||||
std::cout << GridLogMessage << "WilsonFermion5D ComputeTime2/Calls : " << DhopComputeTime2/ DhopCalls << " us" << std::endl;
|
||||
|
||||
// Average the compute time
|
||||
_FourDimGrid->GlobalSum(DhopComputeTime);
|
||||
DhopComputeTime/=NP;
|
||||
RealD mflops = 1344*volume*DhopCalls/DhopComputeTime/2; // 2 for red black counting
|
||||
std::cout << GridLogMessage << "Average mflops/s per call : " << mflops << std::endl;
|
||||
std::cout << GridLogMessage << "Average mflops/s per call per rank : " << mflops/NP << std::endl;
|
||||
std::cout << GridLogMessage << "Average mflops/s per call per node : " << mflops/NN << std::endl;
|
||||
|
||||
RealD Fullmflops = 1344*volume*DhopCalls/(DhopTotalTime)/2; // 2 for red black counting
|
||||
std::cout << GridLogMessage << "Average mflops/s per call (full) : " << Fullmflops << std::endl;
|
||||
std::cout << GridLogMessage << "Average mflops/s per call per rank (full): " << Fullmflops/NP << std::endl;
|
||||
std::cout << GridLogMessage << "Average mflops/s per call per node (full): " << Fullmflops/NN << std::endl;
|
||||
|
||||
}
|
||||
|
||||
if ( DerivCalls > 0 ) {
|
||||
std::cout << GridLogMessage << "#### Deriv calls report "<< std::endl;
|
||||
std::cout << GridLogMessage << "WilsonFermion5D Number of Deriv Calls : " <<DerivCalls <<std::endl;
|
||||
std::cout << GridLogMessage << "WilsonFermion5D CommTime/Calls : " <<DerivCommTime/DerivCalls<<" us" <<std::endl;
|
||||
std::cout << GridLogMessage << "WilsonFermion5D ComputeTime/Calls : " <<DerivComputeTime/DerivCalls<<" us" <<std::endl;
|
||||
std::cout << GridLogMessage << "WilsonFermion5D Dhop ComputeTime/Calls : " <<DerivDhopComputeTime/DerivCalls<<" us" <<std::endl;
|
||||
|
||||
RealD mflops = 144*volume*DerivCalls/DerivDhopComputeTime;
|
||||
std::cout << GridLogMessage << "Average mflops/s per call : " << mflops << std::endl;
|
||||
std::cout << GridLogMessage << "Average mflops/s per call per node : " << mflops/NP << std::endl;
|
||||
|
||||
RealD Fullmflops = 144*volume*DerivCalls/(DerivDhopComputeTime+DerivCommTime)/2; // 2 for red black counting
|
||||
std::cout << GridLogMessage << "Average mflops/s per call (full) : " << Fullmflops << std::endl;
|
||||
std::cout << GridLogMessage << "Average mflops/s per call per node (full): " << Fullmflops/NP << std::endl; }
|
||||
|
||||
if (DerivCalls > 0 || DhopCalls > 0){
|
||||
std::cout << GridLogMessage << "WilsonFermion5D Stencil" <<std::endl; Stencil.Report();
|
||||
std::cout << GridLogMessage << "WilsonFermion5D StencilEven"<<std::endl; StencilEven.Report();
|
||||
std::cout << GridLogMessage << "WilsonFermion5D StencilOdd" <<std::endl; StencilOdd.Report();
|
||||
}
|
||||
if ( DhopCalls > 0){
|
||||
std::cout << GridLogMessage << "WilsonFermion5D Stencil Reporti()" <<std::endl; Stencil.Reporti(DhopCalls);
|
||||
std::cout << GridLogMessage << "WilsonFermion5D StencilEven Reporti()"<<std::endl; StencilEven.Reporti(DhopCalls);
|
||||
std::cout << GridLogMessage << "WilsonFermion5D StencilOdd Reporti()" <<std::endl; StencilOdd.Reporti(DhopCalls);
|
||||
}
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void WilsonFermion5D<Impl>::ZeroCounters(void) {
|
||||
DhopCalls = 0;
|
||||
DhopCommTime = 0;
|
||||
DhopComputeTime = 0;
|
||||
DhopComputeTime2= 0;
|
||||
DhopFaceTime = 0;
|
||||
DhopTotalTime = 0;
|
||||
|
||||
DerivCalls = 0;
|
||||
DerivCommTime = 0;
|
||||
DerivComputeTime = 0;
|
||||
DerivDhopComputeTime = 0;
|
||||
|
||||
Stencil.ZeroCounters();
|
||||
StencilEven.ZeroCounters();
|
||||
StencilOdd.ZeroCounters();
|
||||
Stencil.ZeroCountersi();
|
||||
StencilEven.ZeroCountersi();
|
||||
StencilOdd.ZeroCountersi();
|
||||
}
|
||||
|
||||
|
||||
template<class Impl>
|
||||
void WilsonFermion5D<Impl>::ImportGauge(const GaugeField &_Umu)
|
||||
@ -233,10 +148,10 @@ void WilsonFermion5D<Impl>::ImportGauge(const GaugeField &_Umu)
|
||||
GaugeField HUmu(_Umu.Grid());
|
||||
HUmu = _Umu*(-0.5);
|
||||
if ( Dirichlet ) {
|
||||
std::cout << GridLogMessage << " Dirichlet BCs 5d " <<Block<<std::endl;
|
||||
std::cout << GridLogDslash << " Dirichlet BCs 5d " <<Block<<std::endl;
|
||||
Coordinate GaugeBlock(Nd);
|
||||
for(int d=0;d<Nd;d++) GaugeBlock[d] = Block[d+1];
|
||||
std::cout << GridLogMessage << " Dirichlet BCs 4d " <<GaugeBlock<<std::endl;
|
||||
std::cout << GridLogDslash << " Dirichlet BCs 4d " <<GaugeBlock<<std::endl;
|
||||
DirichletFilter<GaugeField> Filter(GaugeBlock);
|
||||
Filter.applyFilter(HUmu);
|
||||
}
|
||||
@ -281,7 +196,6 @@ void WilsonFermion5D<Impl>::DerivInternal(StencilImpl & st,
|
||||
const FermionField &B,
|
||||
int dag)
|
||||
{
|
||||
DerivCalls++;
|
||||
assert((dag==DaggerNo) ||(dag==DaggerYes));
|
||||
|
||||
conformable(st.Grid(),A.Grid());
|
||||
@ -292,15 +206,12 @@ void WilsonFermion5D<Impl>::DerivInternal(StencilImpl & st,
|
||||
FermionField Btilde(B.Grid());
|
||||
FermionField Atilde(B.Grid());
|
||||
|
||||
DerivCommTime-=usecond();
|
||||
st.HaloExchange(B,compressor);
|
||||
DerivCommTime+=usecond();
|
||||
|
||||
Atilde=A;
|
||||
int LLs = B.Grid()->_rdimensions[0];
|
||||
|
||||
|
||||
DerivComputeTime-=usecond();
|
||||
for (int mu = 0; mu < Nd; mu++) {
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// Flip gamma if dag
|
||||
@ -312,8 +223,6 @@ void WilsonFermion5D<Impl>::DerivInternal(StencilImpl & st,
|
||||
// Call the single hop
|
||||
////////////////////////
|
||||
|
||||
DerivDhopComputeTime -= usecond();
|
||||
|
||||
int Usites = U.Grid()->oSites();
|
||||
|
||||
Kernels::DhopDirKernel(st, U, st.CommBuf(), Ls, Usites, B, Btilde, mu,gamma);
|
||||
@ -321,10 +230,8 @@ void WilsonFermion5D<Impl>::DerivInternal(StencilImpl & st,
|
||||
////////////////////////////
|
||||
// spin trace outer product
|
||||
////////////////////////////
|
||||
DerivDhopComputeTime += usecond();
|
||||
Impl::InsertForce5D(mat, Btilde, Atilde, mu);
|
||||
}
|
||||
DerivComputeTime += usecond();
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
@ -382,12 +289,10 @@ void WilsonFermion5D<Impl>::DhopInternal(StencilImpl & st, LebesgueOrder &lo,
|
||||
DoubledGaugeField & U,
|
||||
const FermionField &in, FermionField &out,int dag)
|
||||
{
|
||||
DhopTotalTime-=usecond();
|
||||
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute )
|
||||
DhopInternalOverlappedComms(st,lo,U,in,out,dag);
|
||||
else
|
||||
DhopInternalSerialComms(st,lo,U,in,out,dag);
|
||||
DhopTotalTime+=usecond();
|
||||
}
|
||||
|
||||
|
||||
@ -396,6 +301,7 @@ void WilsonFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl & st, Lebesg
|
||||
DoubledGaugeField & U,
|
||||
const FermionField &in, FermionField &out,int dag)
|
||||
{
|
||||
GRID_TRACE("DhopInternalOverlappedComms");
|
||||
Compressor compressor(dag);
|
||||
|
||||
int LLs = in.Grid()->_rdimensions[0];
|
||||
@ -404,53 +310,58 @@ void WilsonFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl & st, Lebesg
|
||||
/////////////////////////////
|
||||
// Start comms // Gather intranode and extra node differentiated??
|
||||
/////////////////////////////
|
||||
DhopFaceTime-=usecond();
|
||||
st.HaloExchangeOptGather(in,compressor);
|
||||
DhopFaceTime+=usecond();
|
||||
|
||||
DhopCommTime -=usecond();
|
||||
{
|
||||
GRID_TRACE("Gather");
|
||||
st.HaloExchangeOptGather(in,compressor);
|
||||
accelerator_barrier();
|
||||
}
|
||||
|
||||
std::vector<std::vector<CommsRequest_t> > requests;
|
||||
auto id=traceStart("Communicate overlapped");
|
||||
st.CommunicateBegin(requests);
|
||||
|
||||
/////////////////////////////
|
||||
// Overlap with comms
|
||||
/////////////////////////////
|
||||
DhopFaceTime-=usecond();
|
||||
st.CommsMergeSHM(compressor);// Could do this inside parallel region overlapped with comms
|
||||
DhopFaceTime+=usecond();
|
||||
{
|
||||
GRID_TRACE("MergeSHM");
|
||||
st.CommsMergeSHM(compressor);// Could do this inside parallel region overlapped with comms
|
||||
}
|
||||
|
||||
/////////////////////////////
|
||||
// do the compute interior
|
||||
/////////////////////////////
|
||||
int Opt = WilsonKernelsStatic::Opt; // Why pass this. Kernels should know
|
||||
DhopComputeTime-=usecond();
|
||||
if (dag == DaggerYes) {
|
||||
GRID_TRACE("DhopDagInterior");
|
||||
Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out,1,0);
|
||||
} else {
|
||||
GRID_TRACE("DhopInterior");
|
||||
Kernels::DhopKernel (Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out,1,0);
|
||||
}
|
||||
DhopComputeTime+=usecond();
|
||||
|
||||
/////////////////////////////
|
||||
// Complete comms
|
||||
/////////////////////////////
|
||||
st.CommunicateComplete(requests);
|
||||
DhopCommTime +=usecond();
|
||||
traceStop(id);
|
||||
|
||||
/////////////////////////////
|
||||
// do the compute exterior
|
||||
/////////////////////////////
|
||||
DhopFaceTime-=usecond();
|
||||
st.CommsMerge(compressor);
|
||||
DhopFaceTime+=usecond();
|
||||
{
|
||||
GRID_TRACE("Merge");
|
||||
st.CommsMerge(compressor);
|
||||
}
|
||||
|
||||
|
||||
DhopComputeTime2-=usecond();
|
||||
if (dag == DaggerYes) {
|
||||
GRID_TRACE("DhopDagExterior");
|
||||
Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out,0,1);
|
||||
} else {
|
||||
GRID_TRACE("DhopExterior");
|
||||
Kernels::DhopKernel (Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out,0,1);
|
||||
}
|
||||
DhopComputeTime2+=usecond();
|
||||
}
|
||||
|
||||
|
||||
@ -460,29 +371,30 @@ void WilsonFermion5D<Impl>::DhopInternalSerialComms(StencilImpl & st, LebesgueOr
|
||||
const FermionField &in,
|
||||
FermionField &out,int dag)
|
||||
{
|
||||
GRID_TRACE("DhopInternalSerialComms");
|
||||
Compressor compressor(dag);
|
||||
|
||||
int LLs = in.Grid()->_rdimensions[0];
|
||||
|
||||
{
|
||||
GRID_TRACE("HaloExchange");
|
||||
st.HaloExchangeOpt(in,compressor);
|
||||
}
|
||||
|
||||
DhopCommTime-=usecond();
|
||||
st.HaloExchangeOpt(in,compressor);
|
||||
DhopCommTime+=usecond();
|
||||
|
||||
DhopComputeTime-=usecond();
|
||||
int Opt = WilsonKernelsStatic::Opt;
|
||||
if (dag == DaggerYes) {
|
||||
GRID_TRACE("DhopDag");
|
||||
Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out);
|
||||
} else {
|
||||
GRID_TRACE("Dhop");
|
||||
Kernels::DhopKernel(Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out);
|
||||
}
|
||||
DhopComputeTime+=usecond();
|
||||
}
|
||||
|
||||
|
||||
template<class Impl>
|
||||
void WilsonFermion5D<Impl>::DhopOE(const FermionField &in, FermionField &out,int dag)
|
||||
{
|
||||
DhopCalls++;
|
||||
conformable(in.Grid(),FermionRedBlackGrid()); // verifies half grid
|
||||
conformable(in.Grid(),out.Grid()); // drops the cb check
|
||||
|
||||
@ -494,7 +406,6 @@ void WilsonFermion5D<Impl>::DhopOE(const FermionField &in, FermionField &out,int
|
||||
template<class Impl>
|
||||
void WilsonFermion5D<Impl>::DhopEO(const FermionField &in, FermionField &out,int dag)
|
||||
{
|
||||
DhopCalls++;
|
||||
conformable(in.Grid(),FermionRedBlackGrid()); // verifies half grid
|
||||
conformable(in.Grid(),out.Grid()); // drops the cb check
|
||||
|
||||
@ -506,7 +417,6 @@ void WilsonFermion5D<Impl>::DhopEO(const FermionField &in, FermionField &out,int
|
||||
template<class Impl>
|
||||
void WilsonFermion5D<Impl>::Dhop(const FermionField &in, FermionField &out,int dag)
|
||||
{
|
||||
DhopCalls+=2;
|
||||
conformable(in.Grid(),FermionGrid()); // verifies full grid
|
||||
conformable(in.Grid(),out.Grid());
|
||||
|
||||
@ -561,12 +471,17 @@ void WilsonFermion5D<Impl>::MomentumSpacePropagatorHt_5d(FermionField &out,const
|
||||
LatComplex sk(_grid); sk = Zero();
|
||||
LatComplex sk2(_grid); sk2= Zero();
|
||||
LatComplex W(_grid); W= Zero();
|
||||
LatComplex a(_grid); a= Zero();
|
||||
LatComplex one (_grid); one = ScalComplex(1.0,0.0);
|
||||
LatComplex cosha(_grid);
|
||||
LatComplex kmu(_grid);
|
||||
LatComplex Wea(_grid);
|
||||
LatComplex Wema(_grid);
|
||||
LatComplex ea(_grid);
|
||||
LatComplex ema(_grid);
|
||||
LatComplex eaLs(_grid);
|
||||
LatComplex emaLs(_grid);
|
||||
LatComplex ea2Ls(_grid);
|
||||
LatComplex ema2Ls(_grid);
|
||||
LatComplex sinha(_grid);
|
||||
LatComplex sinhaLs(_grid);
|
||||
LatComplex coshaLs(_grid);
|
||||
@ -601,39 +516,29 @@ void WilsonFermion5D<Impl>::MomentumSpacePropagatorHt_5d(FermionField &out,const
|
||||
////////////////////////////////////////////
|
||||
cosha = (one + W*W + sk) / (abs(W)*2.0);
|
||||
|
||||
// FIXME Need a Lattice acosh
|
||||
|
||||
{
|
||||
autoView(cosha_v,cosha,CpuRead);
|
||||
autoView(a_v,a,CpuWrite);
|
||||
for(int idx=0;idx<_grid->lSites();idx++){
|
||||
Coordinate lcoor(Nd);
|
||||
Tcomplex cc;
|
||||
// RealD sgn;
|
||||
_grid->LocalIndexToLocalCoor(idx,lcoor);
|
||||
peekLocalSite(cc,cosha_v,lcoor);
|
||||
assert((double)real(cc)>=1.0);
|
||||
assert(fabs((double)imag(cc))<=1.0e-15);
|
||||
cc = ScalComplex(::acosh(real(cc)),0.0);
|
||||
pokeLocalSite(cc,a_v,lcoor);
|
||||
}
|
||||
}
|
||||
|
||||
Wea = ( exp( a) * abs(W) );
|
||||
Wema= ( exp(-a) * abs(W) );
|
||||
sinha = 0.5*(exp( a) - exp(-a));
|
||||
sinhaLs = 0.5*(exp( a*Ls) - exp(-a*Ls));
|
||||
coshaLs = 0.5*(exp( a*Ls) + exp(-a*Ls));
|
||||
ea = (cosha + sqrt(cosha*cosha-one));
|
||||
ema= (cosha - sqrt(cosha*cosha-one));
|
||||
eaLs = pow(ea,Ls);
|
||||
emaLs= pow(ema,Ls);
|
||||
ea2Ls = pow(ea,2.0*Ls);
|
||||
ema2Ls= pow(ema,2.0*Ls);
|
||||
Wea= abs(W) * ea;
|
||||
Wema= abs(W) * ema;
|
||||
// a=log(ea);
|
||||
|
||||
sinha = 0.5*(ea - ema);
|
||||
sinhaLs = 0.5*(eaLs-emaLs);
|
||||
coshaLs = 0.5*(eaLs+emaLs);
|
||||
|
||||
A = one / (abs(W) * sinha * 2.0) * one / (sinhaLs * 2.0);
|
||||
F = exp( a*Ls) * (one - Wea + (Wema - one) * mass*mass);
|
||||
F = F + exp(-a*Ls) * (Wema - one + (one - Wea) * mass*mass);
|
||||
F = eaLs * (one - Wea + (Wema - one) * mass*mass);
|
||||
F = F + emaLs * (Wema - one + (one - Wea) * mass*mass);
|
||||
F = F - abs(W) * sinha * 4.0 * mass;
|
||||
|
||||
Bpp = (A/F) * (exp(-a*Ls*2.0) - one) * (one - Wema) * (one - mass*mass * one);
|
||||
Bmm = (A/F) * (one - exp(a*Ls*2.0)) * (one - Wea) * (one - mass*mass * one);
|
||||
App = (A/F) * (exp(-a*Ls*2.0) - one) * exp(-a) * (exp(-a) - abs(W)) * (one - mass*mass * one);
|
||||
Amm = (A/F) * (one - exp(a*Ls*2.0)) * exp(a) * (exp(a) - abs(W)) * (one - mass*mass * one);
|
||||
Bpp = (A/F) * (ema2Ls - one) * (one - Wema) * (one - mass*mass * one);
|
||||
Bmm = (A/F) * (one - ea2Ls) * (one - Wea) * (one - mass*mass * one);
|
||||
App = (A/F) * (ema2Ls - one) * ema * (ema - abs(W)) * (one - mass*mass * one);
|
||||
Amm = (A/F) * (one - ea2Ls) * ea * (ea - abs(W)) * (one - mass*mass * one);
|
||||
ABpm = (A/F) * abs(W) * sinha * 2.0 * (one + mass * coshaLs * 2.0 + mass*mass * one);
|
||||
|
||||
//P+ source, P- source
|
||||
@ -656,29 +561,29 @@ void WilsonFermion5D<Impl>::MomentumSpacePropagatorHt_5d(FermionField &out,const
|
||||
buf1_4d = Zero();
|
||||
ExtractSlice(buf1_4d, PRsource, (tt-1), 0);
|
||||
//G(s,t)
|
||||
bufR_4d = bufR_4d + A * exp(a*Ls) * exp(-a*f) * signW * buf1_4d + A * exp(-a*Ls) * exp(a*f) * signW * buf1_4d;
|
||||
bufR_4d = bufR_4d + A * eaLs * pow(ema,f) * signW * buf1_4d + A * emaLs * pow(ea,f) * signW * buf1_4d;
|
||||
//A++*exp(a(s+t))
|
||||
bufR_4d = bufR_4d + App * exp(a*ss) * exp(a*tt) * signW * buf1_4d ;
|
||||
bufR_4d = bufR_4d + App * pow(ea,ss) * pow(ea,tt) * signW * buf1_4d ;
|
||||
//A+-*exp(a(s-t))
|
||||
bufR_4d = bufR_4d + ABpm * exp(a*ss) * exp(-a*tt) * signW * buf1_4d ;
|
||||
bufR_4d = bufR_4d + ABpm * pow(ea,ss) * pow(ema,tt) * signW * buf1_4d ;
|
||||
//A-+*exp(a(-s+t))
|
||||
bufR_4d = bufR_4d + ABpm * exp(-a*ss) * exp(a*tt) * signW * buf1_4d ;
|
||||
bufR_4d = bufR_4d + ABpm * pow(ema,ss) * pow(ea,tt) * signW * buf1_4d ;
|
||||
//A--*exp(a(-s-t))
|
||||
bufR_4d = bufR_4d + Amm * exp(-a*ss) * exp(-a*tt) * signW * buf1_4d ;
|
||||
bufR_4d = bufR_4d + Amm * pow(ema,ss) * pow(ema,tt) * signW * buf1_4d ;
|
||||
|
||||
//GL
|
||||
buf2_4d = Zero();
|
||||
ExtractSlice(buf2_4d, PLsource, (tt-1), 0);
|
||||
//G(s,t)
|
||||
bufL_4d = bufL_4d + A * exp(a*Ls) * exp(-a*f) * signW * buf2_4d + A * exp(-a*Ls) * exp(a*f) * signW * buf2_4d;
|
||||
bufL_4d = bufL_4d + A * eaLs * pow(ema,f) * signW * buf2_4d + A * emaLs * pow(ea,f) * signW * buf2_4d;
|
||||
//B++*exp(a(s+t))
|
||||
bufL_4d = bufL_4d + Bpp * exp(a*ss) * exp(a*tt) * signW * buf2_4d ;
|
||||
bufL_4d = bufL_4d + Bpp * pow(ea,ss) * pow(ea,tt) * signW * buf2_4d ;
|
||||
//B+-*exp(a(s-t))
|
||||
bufL_4d = bufL_4d + ABpm * exp(a*ss) * exp(-a*tt) * signW * buf2_4d ;
|
||||
bufL_4d = bufL_4d + ABpm * pow(ea,ss) * pow(ema,tt) * signW * buf2_4d ;
|
||||
//B-+*exp(a(-s+t))
|
||||
bufL_4d = bufL_4d + ABpm * exp(-a*ss) * exp(a*tt) * signW * buf2_4d ;
|
||||
bufL_4d = bufL_4d + ABpm * pow(ema,ss) * pow(ea,tt) * signW * buf2_4d ;
|
||||
//B--*exp(a(-s-t))
|
||||
bufL_4d = bufL_4d + Bmm * exp(-a*ss) * exp(-a*tt) * signW * buf2_4d ;
|
||||
bufL_4d = bufL_4d + Bmm * pow(ema,ss) * pow(ema,tt) * signW * buf2_4d ;
|
||||
}
|
||||
InsertSlice(bufR_4d, GR, (ss-1), 0);
|
||||
InsertSlice(bufL_4d, GL, (ss-1), 0);
|
||||
@ -797,28 +702,12 @@ void WilsonFermion5D<Impl>::MomentumSpacePropagatorHt(FermionField &out,const Fe
|
||||
W = one - M5 + sk2;
|
||||
|
||||
////////////////////////////////////////////
|
||||
// Cosh alpha -> alpha
|
||||
// Cosh alpha -> exp(+/- alpha)
|
||||
////////////////////////////////////////////
|
||||
cosha = (one + W*W + sk) / (abs(W)*2.0);
|
||||
|
||||
// FIXME Need a Lattice acosh
|
||||
{
|
||||
autoView(cosha_v,cosha,CpuRead);
|
||||
autoView(a_v,a,CpuWrite);
|
||||
for(int idx=0;idx<_grid->lSites();idx++){
|
||||
Coordinate lcoor(Nd);
|
||||
Tcomplex cc;
|
||||
// RealD sgn;
|
||||
_grid->LocalIndexToLocalCoor(idx,lcoor);
|
||||
peekLocalSite(cc,cosha_v,lcoor);
|
||||
assert((double)real(cc)>=1.0);
|
||||
assert(fabs((double)imag(cc))<=1.0e-15);
|
||||
cc = ScalComplex(::acosh(real(cc)),0.0);
|
||||
pokeLocalSite(cc,a_v,lcoor);
|
||||
}}
|
||||
|
||||
Wea = ( exp( a) * abs(W) );
|
||||
Wema= ( exp(-a) * abs(W) );
|
||||
Wea = abs(W)*(cosha + sqrt(cosha*cosha-one));
|
||||
Wema= abs(W)*(cosha - sqrt(cosha*cosha-one));
|
||||
|
||||
num = num + ( one - Wema ) * mass * in;
|
||||
denom= ( Wea - one ) + mass*mass * (one - Wema);
|
||||
|
@ -76,91 +76,6 @@ WilsonFermion<Impl>::WilsonFermion(GaugeField &_Umu, GridCartesian &Fgrid,
|
||||
StencilOdd.BuildSurfaceList(1,vol4);
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void WilsonFermion<Impl>::Report(void)
|
||||
{
|
||||
RealD NP = _grid->_Nprocessors;
|
||||
RealD NN = _grid->NodeCount();
|
||||
RealD volume = 1;
|
||||
Coordinate latt = _grid->GlobalDimensions();
|
||||
for(int mu=0;mu<Nd;mu++) volume=volume*latt[mu];
|
||||
|
||||
if ( DhopCalls > 0 ) {
|
||||
std::cout << GridLogMessage << "#### Dhop calls report " << std::endl;
|
||||
std::cout << GridLogMessage << "WilsonFermion Number of DhopEO Calls : " << DhopCalls << std::endl;
|
||||
std::cout << GridLogMessage << "WilsonFermion TotalTime /Calls : " << DhopTotalTime / DhopCalls << " us" << std::endl;
|
||||
std::cout << GridLogMessage << "WilsonFermion CommTime /Calls : " << DhopCommTime / DhopCalls << " us" << std::endl;
|
||||
std::cout << GridLogMessage << "WilsonFermion FaceTime /Calls : " << DhopFaceTime / DhopCalls << " us" << std::endl;
|
||||
std::cout << GridLogMessage << "WilsonFermion ComputeTime1/Calls : " << DhopComputeTime / DhopCalls << " us" << std::endl;
|
||||
std::cout << GridLogMessage << "WilsonFermion ComputeTime2/Calls : " << DhopComputeTime2/ DhopCalls << " us" << std::endl;
|
||||
|
||||
// Average the compute time
|
||||
_grid->GlobalSum(DhopComputeTime);
|
||||
DhopComputeTime/=NP;
|
||||
RealD mflops = 1320*volume*DhopCalls/DhopComputeTime/2; // 2 for red black counting
|
||||
std::cout << GridLogMessage << "Average mflops/s per call : " << mflops << std::endl;
|
||||
std::cout << GridLogMessage << "Average mflops/s per call per rank : " << mflops/NP << std::endl;
|
||||
std::cout << GridLogMessage << "Average mflops/s per call per node : " << mflops/NN << std::endl;
|
||||
|
||||
RealD Fullmflops = 1320*volume*DhopCalls/(DhopTotalTime)/2; // 2 for red black counting
|
||||
std::cout << GridLogMessage << "Average mflops/s per call (full) : " << Fullmflops << std::endl;
|
||||
std::cout << GridLogMessage << "Average mflops/s per call per rank (full): " << Fullmflops/NP << std::endl;
|
||||
std::cout << GridLogMessage << "Average mflops/s per call per node (full): " << Fullmflops/NN << std::endl;
|
||||
|
||||
}
|
||||
|
||||
if ( DerivCalls > 0 ) {
|
||||
std::cout << GridLogMessage << "#### Deriv calls report "<< std::endl;
|
||||
std::cout << GridLogMessage << "WilsonFermion Number of Deriv Calls : " <<DerivCalls <<std::endl;
|
||||
std::cout << GridLogMessage << "WilsonFermion CommTime/Calls : " <<DerivCommTime/DerivCalls<<" us" <<std::endl;
|
||||
std::cout << GridLogMessage << "WilsonFermion ComputeTime/Calls : " <<DerivComputeTime/DerivCalls<<" us" <<std::endl;
|
||||
std::cout << GridLogMessage << "WilsonFermion Dhop ComputeTime/Calls : " <<DerivDhopComputeTime/DerivCalls<<" us" <<std::endl;
|
||||
|
||||
// how to count flops here?
|
||||
RealD mflops = 144*volume*DerivCalls/DerivDhopComputeTime;
|
||||
std::cout << GridLogMessage << "Average mflops/s per call ? : " << mflops << std::endl;
|
||||
std::cout << GridLogMessage << "Average mflops/s per call per node ? : " << mflops/NP << std::endl;
|
||||
|
||||
// how to count flops here?
|
||||
RealD Fullmflops = 144*volume*DerivCalls/(DerivDhopComputeTime+DerivCommTime)/2; // 2 for red black counting
|
||||
std::cout << GridLogMessage << "Average mflops/s per call (full) ? : " << Fullmflops << std::endl;
|
||||
std::cout << GridLogMessage << "Average mflops/s per call per node (full) ? : " << Fullmflops/NP << std::endl; }
|
||||
|
||||
if (DerivCalls > 0 || DhopCalls > 0){
|
||||
std::cout << GridLogMessage << "WilsonFermion Stencil" <<std::endl; Stencil.Report();
|
||||
std::cout << GridLogMessage << "WilsonFermion StencilEven"<<std::endl; StencilEven.Report();
|
||||
std::cout << GridLogMessage << "WilsonFermion StencilOdd" <<std::endl; StencilOdd.Report();
|
||||
}
|
||||
if ( DhopCalls > 0){
|
||||
std::cout << GridLogMessage << "WilsonFermion Stencil Reporti()" <<std::endl; Stencil.Reporti(DhopCalls);
|
||||
std::cout << GridLogMessage << "WilsonFermion StencilEven Reporti()"<<std::endl; StencilEven.Reporti(DhopCalls);
|
||||
std::cout << GridLogMessage << "WilsonFermion StencilOdd Reporti()" <<std::endl; StencilOdd.Reporti(DhopCalls);
|
||||
}
|
||||
}
|
||||
|
||||
template<class Impl>
|
||||
void WilsonFermion<Impl>::ZeroCounters(void) {
|
||||
DhopCalls = 0; // ok
|
||||
DhopCommTime = 0;
|
||||
DhopComputeTime = 0;
|
||||
DhopComputeTime2= 0;
|
||||
DhopFaceTime = 0;
|
||||
DhopTotalTime = 0;
|
||||
|
||||
DerivCalls = 0; // ok
|
||||
DerivCommTime = 0;
|
||||
DerivComputeTime = 0;
|
||||
DerivDhopComputeTime = 0;
|
||||
|
||||
Stencil.ZeroCounters();
|
||||
StencilEven.ZeroCounters();
|
||||
StencilOdd.ZeroCounters();
|
||||
Stencil.ZeroCountersi();
|
||||
StencilEven.ZeroCountersi();
|
||||
StencilOdd.ZeroCountersi();
|
||||
}
|
||||
|
||||
|
||||
template <class Impl>
|
||||
void WilsonFermion<Impl>::ImportGauge(const GaugeField &_Umu)
|
||||
{
|
||||
@ -320,7 +235,6 @@ template <class Impl>
|
||||
void WilsonFermion<Impl>::DerivInternal(StencilImpl &st, DoubledGaugeField &U,
|
||||
GaugeField &mat, const FermionField &A,
|
||||
const FermionField &B, int dag) {
|
||||
DerivCalls++;
|
||||
assert((dag == DaggerNo) || (dag == DaggerYes));
|
||||
|
||||
Compressor compressor(dag);
|
||||
@ -329,11 +243,8 @@ void WilsonFermion<Impl>::DerivInternal(StencilImpl &st, DoubledGaugeField &U,
|
||||
FermionField Atilde(B.Grid());
|
||||
Atilde = A;
|
||||
|
||||
DerivCommTime-=usecond();
|
||||
st.HaloExchange(B, compressor);
|
||||
DerivCommTime+=usecond();
|
||||
|
||||
DerivComputeTime-=usecond();
|
||||
for (int mu = 0; mu < Nd; mu++) {
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// Flip gamma (1+g)<->(1-g) if dag
|
||||
@ -341,7 +252,6 @@ void WilsonFermion<Impl>::DerivInternal(StencilImpl &st, DoubledGaugeField &U,
|
||||
int gamma = mu;
|
||||
if (!dag) gamma += Nd;
|
||||
|
||||
DerivDhopComputeTime -= usecond();
|
||||
int Ls=1;
|
||||
Kernels::DhopDirKernel(st, U, st.CommBuf(), Ls, B.Grid()->oSites(), B, Btilde, mu, gamma);
|
||||
|
||||
@ -349,9 +259,7 @@ void WilsonFermion<Impl>::DerivInternal(StencilImpl &st, DoubledGaugeField &U,
|
||||
// spin trace outer product
|
||||
//////////////////////////////////////////////////
|
||||
Impl::InsertForce4D(mat, Btilde, Atilde, mu);
|
||||
DerivDhopComputeTime += usecond();
|
||||
}
|
||||
DerivComputeTime += usecond();
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
@ -398,7 +306,6 @@ void WilsonFermion<Impl>::DhopDerivEO(GaugeField &mat, const FermionField &U, co
|
||||
template <class Impl>
|
||||
void WilsonFermion<Impl>::Dhop(const FermionField &in, FermionField &out, int dag)
|
||||
{
|
||||
DhopCalls+=2;
|
||||
conformable(in.Grid(), _grid); // verifies full grid
|
||||
conformable(in.Grid(), out.Grid());
|
||||
|
||||
@ -410,7 +317,6 @@ void WilsonFermion<Impl>::Dhop(const FermionField &in, FermionField &out, int da
|
||||
template <class Impl>
|
||||
void WilsonFermion<Impl>::DhopOE(const FermionField &in, FermionField &out, int dag)
|
||||
{
|
||||
DhopCalls++;
|
||||
conformable(in.Grid(), _cbgrid); // verifies half grid
|
||||
conformable(in.Grid(), out.Grid()); // drops the cb check
|
||||
|
||||
@ -423,7 +329,6 @@ void WilsonFermion<Impl>::DhopOE(const FermionField &in, FermionField &out, int
|
||||
template <class Impl>
|
||||
void WilsonFermion<Impl>::DhopEO(const FermionField &in, FermionField &out,int dag)
|
||||
{
|
||||
DhopCalls++;
|
||||
conformable(in.Grid(), _cbgrid); // verifies half grid
|
||||
conformable(in.Grid(), out.Grid()); // drops the cb check
|
||||
|
||||
@ -488,14 +393,12 @@ void WilsonFermion<Impl>::DhopInternal(StencilImpl &st, LebesgueOrder &lo,
|
||||
const FermionField &in,
|
||||
FermionField &out, int dag)
|
||||
{
|
||||
DhopTotalTime-=usecond();
|
||||
#ifdef GRID_OMP
|
||||
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute )
|
||||
DhopInternalOverlappedComms(st,lo,U,in,out,dag);
|
||||
else
|
||||
#endif
|
||||
DhopInternalSerial(st,lo,U,in,out,dag);
|
||||
DhopTotalTime+=usecond();
|
||||
}
|
||||
|
||||
template <class Impl>
|
||||
@ -504,6 +407,7 @@ void WilsonFermion<Impl>::DhopInternalOverlappedComms(StencilImpl &st, LebesgueO
|
||||
const FermionField &in,
|
||||
FermionField &out, int dag)
|
||||
{
|
||||
GRID_TRACE("DhopOverlapped");
|
||||
assert((dag == DaggerNo) || (dag == DaggerYes));
|
||||
|
||||
Compressor compressor(dag);
|
||||
@ -514,53 +418,55 @@ void WilsonFermion<Impl>::DhopInternalOverlappedComms(StencilImpl &st, LebesgueO
|
||||
/////////////////////////////
|
||||
std::vector<std::vector<CommsRequest_t> > requests;
|
||||
st.Prepare();
|
||||
DhopFaceTime-=usecond();
|
||||
st.HaloGather(in,compressor);
|
||||
DhopFaceTime+=usecond();
|
||||
{
|
||||
GRID_TRACE("Gather");
|
||||
st.HaloGather(in,compressor);
|
||||
}
|
||||
|
||||
DhopCommTime -=usecond();
|
||||
tracePush("Communication");
|
||||
st.CommunicateBegin(requests);
|
||||
|
||||
/////////////////////////////
|
||||
// Overlap with comms
|
||||
/////////////////////////////
|
||||
DhopFaceTime-=usecond();
|
||||
st.CommsMergeSHM(compressor);
|
||||
DhopFaceTime+=usecond();
|
||||
{
|
||||
GRID_TRACE("MergeSHM");
|
||||
st.CommsMergeSHM(compressor);
|
||||
}
|
||||
|
||||
/////////////////////////////
|
||||
// do the compute interior
|
||||
/////////////////////////////
|
||||
int Opt = WilsonKernelsStatic::Opt;
|
||||
DhopComputeTime-=usecond();
|
||||
if (dag == DaggerYes) {
|
||||
GRID_TRACE("DhopDagInterior");
|
||||
Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out,1,0);
|
||||
} else {
|
||||
GRID_TRACE("DhopInterior");
|
||||
Kernels::DhopKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out,1,0);
|
||||
}
|
||||
DhopComputeTime+=usecond();
|
||||
|
||||
/////////////////////////////
|
||||
// Complete comms
|
||||
/////////////////////////////
|
||||
st.CommunicateComplete(requests);
|
||||
DhopCommTime +=usecond();
|
||||
|
||||
DhopFaceTime-=usecond();
|
||||
st.CommsMerge(compressor);
|
||||
DhopFaceTime+=usecond();
|
||||
tracePop("Communication");
|
||||
|
||||
{
|
||||
GRID_TRACE("Merge");
|
||||
st.CommsMerge(compressor);
|
||||
}
|
||||
/////////////////////////////
|
||||
// do the compute exterior
|
||||
/////////////////////////////
|
||||
|
||||
DhopComputeTime2-=usecond();
|
||||
if (dag == DaggerYes) {
|
||||
GRID_TRACE("DhopDagExterior");
|
||||
Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out,0,1);
|
||||
} else {
|
||||
GRID_TRACE("DhopExterior");
|
||||
Kernels::DhopKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out,0,1);
|
||||
}
|
||||
DhopComputeTime2+=usecond();
|
||||
};
|
||||
|
||||
|
||||
@ -570,20 +476,22 @@ void WilsonFermion<Impl>::DhopInternalSerial(StencilImpl &st, LebesgueOrder &lo,
|
||||
const FermionField &in,
|
||||
FermionField &out, int dag)
|
||||
{
|
||||
GRID_TRACE("DhopSerial");
|
||||
assert((dag == DaggerNo) || (dag == DaggerYes));
|
||||
Compressor compressor(dag);
|
||||
DhopCommTime-=usecond();
|
||||
st.HaloExchange(in, compressor);
|
||||
DhopCommTime+=usecond();
|
||||
{
|
||||
GRID_TRACE("HaloExchange");
|
||||
st.HaloExchange(in, compressor);
|
||||
}
|
||||
|
||||
DhopComputeTime-=usecond();
|
||||
int Opt = WilsonKernelsStatic::Opt;
|
||||
if (dag == DaggerYes) {
|
||||
GRID_TRACE("DhopDag");
|
||||
Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out);
|
||||
} else {
|
||||
GRID_TRACE("Dhop");
|
||||
Kernels::DhopKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out);
|
||||
}
|
||||
DhopComputeTime+=usecond();
|
||||
};
|
||||
/*Change ends */
|
||||
|
||||
|
@ -72,20 +72,15 @@ accelerator_inline void get_stencil(StencilEntry * mem, StencilEntry &chip)
|
||||
if (SE->_is_local) { \
|
||||
int perm= SE->_permute; \
|
||||
auto tmp = coalescedReadPermute(in[SE->_offset],ptype,perm,lane); \
|
||||
spProj(chi,tmp); \
|
||||
} else if ( st.same_node[Dir] ) { \
|
||||
chi = coalescedRead(buf[SE->_offset],lane); \
|
||||
} \
|
||||
acceleratorSynchronise(); \
|
||||
if (SE->_is_local || st.same_node[Dir] ) { \
|
||||
Impl::multLink(Uchi, U[sU], chi, Dir, SE, st); \
|
||||
Recon(result, Uchi); \
|
||||
} \
|
||||
spProj(chi,tmp); \
|
||||
Impl::multLink(Uchi, U[sU], chi, Dir, SE, st); \
|
||||
Recon(result, Uchi); \
|
||||
} \
|
||||
acceleratorSynchronise();
|
||||
|
||||
#define GENERIC_STENCIL_LEG_EXT(Dir,spProj,Recon) \
|
||||
SE = st.GetEntry(ptype, Dir, sF); \
|
||||
if ((!SE->_is_local) && (!st.same_node[Dir]) ) { \
|
||||
if (!SE->_is_local ) { \
|
||||
auto chi = coalescedRead(buf[SE->_offset],lane); \
|
||||
Impl::multLink(Uchi, U[sU], chi, Dir, SE, st); \
|
||||
Recon(result, Uchi); \
|
||||
@ -416,19 +411,6 @@ void WilsonKernels<Impl>::DhopDirKernel( StencilImpl &st, DoubledGaugeField &U,S
|
||||
#undef LoopBody
|
||||
}
|
||||
|
||||
#define KERNEL_CALL_TMP(A) \
|
||||
const uint64_t NN = Nsite*Ls; \
|
||||
auto U_p = & U_v[0]; \
|
||||
auto in_p = & in_v[0]; \
|
||||
auto out_p = & out_v[0]; \
|
||||
auto st_p = st_v._entries_p; \
|
||||
auto st_perm = st_v._permute_type; \
|
||||
accelerator_forNB( ss, NN, Simd::Nsimd(), { \
|
||||
int sF = ss; \
|
||||
int sU = ss/Ls; \
|
||||
WilsonKernels<Impl>::A(st_perm,st_p,U_p,buf,sF,sU,in_p,out_p); \
|
||||
}); \
|
||||
accelerator_barrier();
|
||||
|
||||
#define KERNEL_CALLNB(A) \
|
||||
const uint64_t NN = Nsite*Ls; \
|
||||
@ -448,8 +430,7 @@ void WilsonKernels<Impl>::DhopDirKernel( StencilImpl &st, DoubledGaugeField &U,S
|
||||
int sF = ptr[ss]; \
|
||||
int sU = ss/Ls; \
|
||||
WilsonKernels<Impl>::A(st_v,U_v,buf,sF,sU,in_v,out_v); \
|
||||
}); \
|
||||
accelerator_barrier();
|
||||
});
|
||||
|
||||
#define ASM_CALL(A) \
|
||||
thread_for( ss, Nsite, { \
|
||||
@ -471,7 +452,7 @@ void WilsonKernels<Impl>::DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField
|
||||
if( interior && exterior ) {
|
||||
if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSite); return;}
|
||||
#ifdef SYCL_HACK
|
||||
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL_TMP(HandDhopSiteSycl); return; }
|
||||
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSiteSycl); return; }
|
||||
#else
|
||||
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSite); return;}
|
||||
#endif
|
||||
|
@ -91,6 +91,19 @@ struct DDHMCFilter: public MomentumFilterBase<GaugeField>
|
||||
U_mu = where(mod(coor,B1)==Integer(B1-4),zzz_mu,U_mu);
|
||||
PokeIndex<LorentzIndex>(U, U_mu, mu);
|
||||
}
|
||||
if ( Width==4) {
|
||||
U = where(mod(coor,B1)==Integer(B1-4),zzz,U);
|
||||
U = where(mod(coor,B1)==Integer(B1-3),zzz,U);
|
||||
U = where(mod(coor,B1)==Integer(B1-2),zzz,U);
|
||||
U = where(mod(coor,B1)==Integer(B1-1),zzz,U);
|
||||
U = where(mod(coor,B1)==Integer(0) ,zzz,U);
|
||||
U = where(mod(coor,B1)==Integer(1) ,zzz,U);
|
||||
U = where(mod(coor,B1)==Integer(2) ,zzz,U);
|
||||
U = where(mod(coor,B1)==Integer(3) ,zzz,U);
|
||||
auto U_mu = PeekIndex<LorentzIndex>(U,mu);
|
||||
U_mu = where(mod(coor,B1)==Integer(B1-5),zzz_mu,U_mu);
|
||||
PokeIndex<LorentzIndex>(U, U_mu, mu);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -38,6 +38,7 @@ NAMESPACE_BEGIN(Grid);
|
||||
template<typename MomentaField>
|
||||
struct MomentumFilterBase{
|
||||
virtual void applyFilter(MomentaField &P) const = 0;
|
||||
virtual ~MomentumFilterBase(){};
|
||||
};
|
||||
|
||||
//Do nothing
|
||||
@ -83,7 +84,6 @@ struct MomentumFilterApplyPhase: public MomentumFilterBase<MomentaField>{
|
||||
|
||||
}
|
||||
|
||||
|
||||
};
|
||||
|
||||
|
||||
|
@ -67,6 +67,36 @@ NAMESPACE_BEGIN(Grid);
|
||||
virtual std::string action_name(){return "OneFlavourEvenOddRatioRationalPseudoFermionAction";}
|
||||
};
|
||||
|
||||
template<class Impl,class ImplF>
|
||||
class OneFlavourEvenOddRatioRationalMixedPrecPseudoFermionAction : public GeneralEvenOddRatioRationalMixedPrecPseudoFermionAction<Impl,ImplF> {
|
||||
public:
|
||||
typedef OneFlavourRationalParams Params;
|
||||
private:
|
||||
static RationalActionParams transcribe(const Params &in){
|
||||
RationalActionParams out;
|
||||
out.inv_pow = 2;
|
||||
out.lo = in.lo;
|
||||
out.hi = in.hi;
|
||||
out.MaxIter = in.MaxIter;
|
||||
out.action_tolerance = out.md_tolerance = in.tolerance;
|
||||
out.action_degree = out.md_degree = in.degree;
|
||||
out.precision = in.precision;
|
||||
out.BoundsCheckFreq = in.BoundsCheckFreq;
|
||||
return out;
|
||||
}
|
||||
|
||||
public:
|
||||
OneFlavourEvenOddRatioRationalMixedPrecPseudoFermionAction(FermionOperator<Impl> &_NumOp,
|
||||
FermionOperator<Impl> &_DenOp,
|
||||
FermionOperator<ImplF> &_NumOpF,
|
||||
FermionOperator<ImplF> &_DenOpF,
|
||||
const Params & p, Integer ReliableUpdateFreq
|
||||
) :
|
||||
GeneralEvenOddRatioRationalMixedPrecPseudoFermionAction<Impl,ImplF>(_NumOp, _DenOp,_NumOpF, _DenOpF, transcribe(p),ReliableUpdateFreq){}
|
||||
|
||||
virtual std::string action_name(){return "OneFlavourEvenOddRatioRationalPseudoFermionAction";}
|
||||
};
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
||||
#endif
|
||||
|
@ -85,7 +85,12 @@ NAMESPACE_BEGIN(Grid);
|
||||
PowerNegQuarter.Init(remez,param.tolerance,true);
|
||||
};
|
||||
|
||||
virtual std::string action_name(){return "OneFlavourRatioRationalPseudoFermionAction";}
|
||||
virtual std::string action_name(){
|
||||
std::stringstream sstream;
|
||||
sstream<<"OneFlavourRatioRationalPseudoFermionAction("
|
||||
<<DenOp.Mass()<<") / det("<<NumOp.Mass()<<")";
|
||||
return sstream.str();
|
||||
}
|
||||
|
||||
virtual std::string LogParameters(){
|
||||
std::stringstream sstream;
|
||||
|
@ -53,6 +53,7 @@ struct HMCparameters: Serializable {
|
||||
Integer, Trajectories, /* @brief Number of sweeps in this run */
|
||||
bool, MetropolisTest,
|
||||
Integer, NoMetropolisUntil,
|
||||
bool, PerformRandomShift, /* @brief Randomly shift the gauge configuration at the start of a trajectory */
|
||||
std::string, StartingType,
|
||||
IntegratorParameters, MD)
|
||||
|
||||
@ -63,6 +64,7 @@ struct HMCparameters: Serializable {
|
||||
StartTrajectory = 0;
|
||||
Trajectories = 10;
|
||||
StartingType = "HotStart";
|
||||
PerformRandomShift = true;
|
||||
/////////////////////////////////
|
||||
}
|
||||
|
||||
@ -83,6 +85,7 @@ struct HMCparameters: Serializable {
|
||||
std::cout << GridLogMessage << "[HMC parameters] Start trajectory : " << StartTrajectory << "\n";
|
||||
std::cout << GridLogMessage << "[HMC parameters] Metropolis test (on/off): " << std::boolalpha << MetropolisTest << "\n";
|
||||
std::cout << GridLogMessage << "[HMC parameters] Thermalization trajs : " << NoMetropolisUntil << "\n";
|
||||
std::cout << GridLogMessage << "[HMC parameters] Doing random shift : " << std::boolalpha << PerformRandomShift << "\n";
|
||||
std::cout << GridLogMessage << "[HMC parameters] Starting type : " << StartingType << "\n";
|
||||
MD.print_parameters();
|
||||
}
|
||||
@ -95,6 +98,7 @@ private:
|
||||
const HMCparameters Params;
|
||||
|
||||
typedef typename IntegratorType::Field Field;
|
||||
typedef typename IntegratorType::FieldImplementation FieldImplementation;
|
||||
typedef std::vector< HmcObservable<Field> * > ObsListType;
|
||||
|
||||
//pass these from the resource manager
|
||||
@ -138,26 +142,37 @@ private:
|
||||
|
||||
GridBase *Grid = U.Grid();
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Mainly for DDHMC perform a random translation of U modulo volume
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
std::cout << GridLogMessage << "--------------------------------------------------\n";
|
||||
std::cout << GridLogMessage << "Random shifting gauge field by [";
|
||||
for(int d=0;d<Grid->Nd();d++) {
|
||||
if(Params.PerformRandomShift){
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Mainly for DDHMC perform a random translation of U modulo volume
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
std::cout << GridLogMessage << "--------------------------------------------------\n";
|
||||
std::cout << GridLogMessage << "Random shifting gauge field by [";
|
||||
|
||||
int L = Grid->GlobalDimensions()[d];
|
||||
std::vector<typename FieldImplementation::GaugeLinkField> Umu(Grid->Nd(), U.Grid());
|
||||
for(int mu=0;mu<Grid->Nd();mu++) Umu[mu] = PeekIndex<LorentzIndex>(U, mu);
|
||||
|
||||
RealD rn_uniform; random(sRNG, rn_uniform);
|
||||
for(int d=0;d<Grid->Nd();d++) {
|
||||
|
||||
int shift = (int) (rn_uniform*L);
|
||||
int L = Grid->GlobalDimensions()[d];
|
||||
|
||||
std::cout << shift;
|
||||
if(d<Grid->Nd()-1) std::cout <<",";
|
||||
else std::cout <<"]\n";
|
||||
RealD rn_uniform; random(sRNG, rn_uniform);
|
||||
|
||||
int shift = (int) (rn_uniform*L);
|
||||
|
||||
std::cout << shift;
|
||||
if(d<Grid->Nd()-1) std::cout <<",";
|
||||
else std::cout <<"]\n";
|
||||
|
||||
U = Cshift(U,d,shift);
|
||||
//shift all fields together in a way that respects the gauge BCs
|
||||
for(int mu=0; mu < Grid->Nd(); mu++)
|
||||
Umu[mu] = FieldImplementation::CshiftLink(Umu[mu],d,shift);
|
||||
}
|
||||
|
||||
for(int mu=0;mu<Grid->Nd();mu++) PokeIndex<LorentzIndex>(U,Umu[mu],mu);
|
||||
|
||||
std::cout << GridLogMessage << "--------------------------------------------------\n";
|
||||
}
|
||||
std::cout << GridLogMessage << "--------------------------------------------------\n";
|
||||
|
||||
TheIntegrator.reset_timer();
|
||||
|
||||
|
@ -63,10 +63,10 @@ public:
|
||||
};
|
||||
|
||||
/*! @brief Class for Molecular Dynamics management */
|
||||
template <class FieldImplementation, class SmearingPolicy, class RepresentationPolicy>
|
||||
template <class FieldImplementation_, class SmearingPolicy, class RepresentationPolicy>
|
||||
class Integrator {
|
||||
protected:
|
||||
|
||||
typedef FieldImplementation_ FieldImplementation;
|
||||
typedef typename FieldImplementation::Field MomentaField; //for readability
|
||||
typedef typename FieldImplementation::Field Field;
|
||||
|
||||
@ -143,9 +143,10 @@ protected:
|
||||
force = FieldImplementation::projectForce(force); // Ta for gauge fields
|
||||
double end_force = usecond();
|
||||
|
||||
// DumpSliceNorm("force ",force,Nd-1);
|
||||
MomFilter->applyFilter(force);
|
||||
std::cout << GridLogIntegrator << " update_P : Level [" << level <<"]["<<a <<"] "<<name<< std::endl;
|
||||
DumpSliceNorm("force ",force,Nd-1);
|
||||
DumpSliceNorm("force filtered ",force,Nd-1);
|
||||
|
||||
Real force_abs = std::sqrt(norm2(force)/U.Grid()->gSites()); //average per-site norm. nb. norm2(latt) = \sum_x norm2(latt[x])
|
||||
Real impulse_abs = force_abs * ep * HMC_MOMENTUM_DENOMINATOR;
|
||||
@ -153,7 +154,7 @@ protected:
|
||||
Real force_max = std::sqrt(maxLocalNorm2(force));
|
||||
Real impulse_max = force_max * ep * HMC_MOMENTUM_DENOMINATOR;
|
||||
|
||||
as[level].actions.at(a)->deriv_log(force_abs,force_max);
|
||||
as[level].actions.at(a)->deriv_log(force_abs,force_max,impulse_abs,impulse_max);
|
||||
|
||||
std::cout << GridLogIntegrator<< "["<<level<<"]["<<a<<"] Force average: " << force_abs <<" "<<name<<std::endl;
|
||||
std::cout << GridLogIntegrator<< "["<<level<<"]["<<a<<"] Force max : " << force_max <<" "<<name<<std::endl;
|
||||
@ -285,6 +286,8 @@ public:
|
||||
<<"["<<level<<"]["<< actionID<<"] : "
|
||||
<<" force max " << as[level].actions.at(actionID)->deriv_max_average()
|
||||
<<" norm " << as[level].actions.at(actionID)->deriv_norm_average()
|
||||
<<" Fdt max " << as[level].actions.at(actionID)->Fdt_max_average()
|
||||
<<" Fdt norm " << as[level].actions.at(actionID)->Fdt_norm_average()
|
||||
<<" calls " << as[level].actions.at(actionID)->deriv_num
|
||||
<< std::endl;
|
||||
}
|
||||
|
@ -92,10 +92,11 @@ NAMESPACE_BEGIN(Grid);
|
||||
* P 1/2 P 1/2
|
||||
*/
|
||||
|
||||
template <class FieldImplementation, class SmearingPolicy, class RepresentationPolicy = Representations<FundamentalRepresentation> >
|
||||
class LeapFrog : public Integrator<FieldImplementation, SmearingPolicy, RepresentationPolicy>
|
||||
template <class FieldImplementation_, class SmearingPolicy, class RepresentationPolicy = Representations<FundamentalRepresentation> >
|
||||
class LeapFrog : public Integrator<FieldImplementation_, SmearingPolicy, RepresentationPolicy>
|
||||
{
|
||||
public:
|
||||
typedef FieldImplementation_ FieldImplementation;
|
||||
typedef LeapFrog<FieldImplementation, SmearingPolicy, RepresentationPolicy> Algorithm;
|
||||
INHERIT_FIELD_TYPES(FieldImplementation);
|
||||
|
||||
@ -135,13 +136,14 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
template <class FieldImplementation, class SmearingPolicy, class RepresentationPolicy = Representations<FundamentalRepresentation> >
|
||||
class MinimumNorm2 : public Integrator<FieldImplementation, SmearingPolicy, RepresentationPolicy>
|
||||
template <class FieldImplementation_, class SmearingPolicy, class RepresentationPolicy = Representations<FundamentalRepresentation> >
|
||||
class MinimumNorm2 : public Integrator<FieldImplementation_, SmearingPolicy, RepresentationPolicy>
|
||||
{
|
||||
private:
|
||||
const RealD lambda = 0.1931833275037836;
|
||||
|
||||
public:
|
||||
typedef FieldImplementation_ FieldImplementation;
|
||||
INHERIT_FIELD_TYPES(FieldImplementation);
|
||||
|
||||
MinimumNorm2(GridBase* grid, IntegratorParameters Par, ActionSet<Field, RepresentationPolicy>& Aset, SmearingPolicy& Sm)
|
||||
@ -192,8 +194,8 @@ public:
|
||||
}
|
||||
};
|
||||
|
||||
template <class FieldImplementation, class SmearingPolicy, class RepresentationPolicy = Representations<FundamentalRepresentation> >
|
||||
class ForceGradient : public Integrator<FieldImplementation, SmearingPolicy, RepresentationPolicy>
|
||||
template <class FieldImplementation_, class SmearingPolicy, class RepresentationPolicy = Representations<FundamentalRepresentation> >
|
||||
class ForceGradient : public Integrator<FieldImplementation_, SmearingPolicy, RepresentationPolicy>
|
||||
{
|
||||
private:
|
||||
const RealD lambda = 1.0 / 6.0;
|
||||
@ -202,6 +204,7 @@ private:
|
||||
const RealD theta = 0.0;
|
||||
|
||||
public:
|
||||
typedef FieldImplementation_ FieldImplementation;
|
||||
INHERIT_FIELD_TYPES(FieldImplementation);
|
||||
|
||||
// Looks like dH scales as dt^4. tested wilson/wilson 2 level.
|
||||
|
@ -31,15 +31,16 @@ directory
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
|
||||
struct TopologySmearingParameters : Serializable {
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(TopologySmearingParameters,
|
||||
int, steps,
|
||||
float, step_size,
|
||||
int, meas_interval,
|
||||
float, maxTau);
|
||||
float, init_step_size,
|
||||
float, maxTau,
|
||||
float, tolerance);
|
||||
|
||||
TopologySmearingParameters(int s = 0, float ss = 0.0f, int mi = 0, float mT = 0.0f):
|
||||
steps(s), step_size(ss), meas_interval(mi), maxTau(mT){}
|
||||
TopologySmearingParameters(float ss = 0.0f, int mi = 0, float mT = 0.0f, float tol = 1e-4):
|
||||
init_step_size(ss), meas_interval(mi), maxTau(mT), tolerance(tol){}
|
||||
|
||||
template < class ReaderClass >
|
||||
TopologySmearingParameters(Reader<ReaderClass>& Reader){
|
||||
@ -97,8 +98,8 @@ public:
|
||||
|
||||
if (Pars.do_smearing){
|
||||
// using wilson flow by default here
|
||||
WilsonFlow<PeriodicGimplR> WF(Pars.Smearing.steps, Pars.Smearing.step_size, Pars.Smearing.meas_interval);
|
||||
WF.smear_adaptive(Usmear, U, Pars.Smearing.maxTau);
|
||||
WilsonFlowAdaptive<PeriodicGimplR> WF(Pars.Smearing.init_step_size, Pars.Smearing.maxTau, Pars.Smearing.tolerance, Pars.Smearing.meas_interval);
|
||||
WF.smear(Usmear, U);
|
||||
Real T0 = WF.energyDensityPlaquette(Pars.Smearing.maxTau, Usmear);
|
||||
std::cout << GridLogMessage << std::setprecision(std::numeric_limits<Real>::digits10 + 1)
|
||||
<< "T0 : [ " << traj << " ] "<< T0 << std::endl;
|
||||
|
@ -33,27 +33,25 @@ directory
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
template <class Gimpl>
|
||||
class WilsonFlow: public Smear<Gimpl>{
|
||||
class WilsonFlowBase: public Smear<Gimpl>{
|
||||
public:
|
||||
//Store generic measurements to take during smearing process using std::function
|
||||
typedef std::function<void(int, RealD, const typename Gimpl::GaugeField &)> FunctionType; //int: step, RealD: flow time, GaugeField : the gauge field
|
||||
|
||||
private:
|
||||
unsigned int Nstep;
|
||||
RealD epsilon; //for regular smearing this is the time step, for adaptive it is the initial time step
|
||||
|
||||
|
||||
protected:
|
||||
std::vector< std::pair<int, FunctionType> > functions; //The int maps to the measurement frequency
|
||||
|
||||
mutable WilsonGaugeAction<Gimpl> SG;
|
||||
|
||||
//Evolve the gauge field by 1 step and update tau
|
||||
void evolve_step(typename Gimpl::GaugeField &U, RealD &tau) const;
|
||||
//Evolve the gauge field by 1 step and update tau and the current time step eps
|
||||
void evolve_step_adaptive(typename Gimpl::GaugeField&U, RealD &tau, RealD &eps, RealD maxTau) const;
|
||||
|
||||
|
||||
public:
|
||||
INHERIT_GIMPL_TYPES(Gimpl)
|
||||
|
||||
explicit WilsonFlowBase(unsigned int meas_interval =1):
|
||||
SG(WilsonGaugeAction<Gimpl>(3.0)) {
|
||||
// WilsonGaugeAction with beta 3.0
|
||||
setDefaultMeasurements(meas_interval);
|
||||
}
|
||||
|
||||
void resetActions(){ functions.clear(); }
|
||||
|
||||
void addMeasurement(int meas_interval, FunctionType meas){ functions.push_back({meas_interval, meas}); }
|
||||
@ -64,34 +62,11 @@ public:
|
||||
//and output to stdout
|
||||
void setDefaultMeasurements(int topq_meas_interval = 1);
|
||||
|
||||
explicit WilsonFlow(unsigned int Nstep, RealD epsilon, unsigned int interval = 1):
|
||||
Nstep(Nstep),
|
||||
epsilon(epsilon),
|
||||
SG(WilsonGaugeAction<Gimpl>(3.0)) {
|
||||
// WilsonGaugeAction with beta 3.0
|
||||
assert(epsilon > 0.0);
|
||||
LogMessage();
|
||||
setDefaultMeasurements(interval);
|
||||
}
|
||||
|
||||
void LogMessage() {
|
||||
std::cout << GridLogMessage
|
||||
<< "[WilsonFlow] Nstep : " << Nstep << std::endl;
|
||||
std::cout << GridLogMessage
|
||||
<< "[WilsonFlow] epsilon : " << epsilon << std::endl;
|
||||
std::cout << GridLogMessage
|
||||
<< "[WilsonFlow] full trajectory : " << Nstep * epsilon << std::endl;
|
||||
}
|
||||
|
||||
virtual void smear(GaugeField&, const GaugeField&) const;
|
||||
|
||||
virtual void derivative(GaugeField&, const GaugeField&, const GaugeField&) const {
|
||||
void derivative(GaugeField&, const GaugeField&, const GaugeField&) const override{
|
||||
assert(0);
|
||||
// undefined for WilsonFlow
|
||||
}
|
||||
|
||||
void smear_adaptive(GaugeField&, const GaugeField&, RealD maxTau) const;
|
||||
|
||||
//Compute t^2 <E(t)> for time t from the plaquette
|
||||
static RealD energyDensityPlaquette(const RealD t, const GaugeField& U);
|
||||
|
||||
@ -115,82 +90,63 @@ public:
|
||||
std::vector<RealD> flowMeasureEnergyDensityCloverleaf(const GaugeField& U, int measure_interval = 1);
|
||||
};
|
||||
|
||||
//Basic iterative Wilson flow
|
||||
template <class Gimpl>
|
||||
class WilsonFlow: public WilsonFlowBase<Gimpl>{
|
||||
private:
|
||||
int Nstep; //number of steps
|
||||
RealD epsilon; //step size
|
||||
|
||||
//Evolve the gauge field by 1 step of size eps and update tau
|
||||
void evolve_step(typename Gimpl::GaugeField &U, RealD &tau) const;
|
||||
|
||||
public:
|
||||
INHERIT_GIMPL_TYPES(Gimpl)
|
||||
|
||||
//Integrate the Wilson flow for Nstep steps of size epsilon
|
||||
WilsonFlow(const RealD epsilon, const int Nstep, unsigned int meas_interval = 1): WilsonFlowBase<Gimpl>(meas_interval), Nstep(Nstep), epsilon(epsilon){}
|
||||
|
||||
void smear(GaugeField& out, const GaugeField& in) const override;
|
||||
};
|
||||
|
||||
//Wilson flow with adaptive step size
|
||||
template <class Gimpl>
|
||||
class WilsonFlowAdaptive: public WilsonFlowBase<Gimpl>{
|
||||
private:
|
||||
RealD init_epsilon; //initial step size
|
||||
RealD maxTau; //integrate to t=maxTau
|
||||
RealD tolerance; //integration error tolerance
|
||||
|
||||
//Evolve the gauge field by 1 step and update tau and the current time step eps
|
||||
//
|
||||
//If the step size eps is too large that a significant integration error results,
|
||||
//the gauge field (U) and tau will not be updated and the function will return 0; eps will be adjusted to a smaller
|
||||
//value for the next iteration.
|
||||
//
|
||||
//For a successful integration step the function will return 1
|
||||
int evolve_step_adaptive(typename Gimpl::GaugeField&U, RealD &tau, RealD &eps) const;
|
||||
|
||||
public:
|
||||
INHERIT_GIMPL_TYPES(Gimpl)
|
||||
|
||||
WilsonFlowAdaptive(const RealD init_epsilon, const RealD maxTau, const RealD tolerance, unsigned int meas_interval = 1):
|
||||
WilsonFlowBase<Gimpl>(meas_interval), init_epsilon(init_epsilon), maxTau(maxTau), tolerance(tolerance){}
|
||||
|
||||
void smear(GaugeField& out, const GaugeField& in) const override;
|
||||
};
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Implementations
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
template <class Gimpl>
|
||||
void WilsonFlow<Gimpl>::evolve_step(typename Gimpl::GaugeField &U, RealD &tau) const{
|
||||
GaugeField Z(U.Grid());
|
||||
GaugeField tmp(U.Grid());
|
||||
SG.deriv(U, Z);
|
||||
Z *= 0.25; // Z0 = 1/4 * F(U)
|
||||
Gimpl::update_field(Z, U, -2.0*epsilon); // U = W1 = exp(ep*Z0)*W0
|
||||
|
||||
Z *= -17.0/8.0;
|
||||
SG.deriv(U, tmp); Z += tmp; // -17/32*Z0 +Z1
|
||||
Z *= 8.0/9.0; // Z = -17/36*Z0 +8/9*Z1
|
||||
Gimpl::update_field(Z, U, -2.0*epsilon); // U_= W2 = exp(ep*Z)*W1
|
||||
|
||||
Z *= -4.0/3.0;
|
||||
SG.deriv(U, tmp); Z += tmp; // 4/3*(17/36*Z0 -8/9*Z1) +Z2
|
||||
Z *= 3.0/4.0; // Z = 17/36*Z0 -8/9*Z1 +3/4*Z2
|
||||
Gimpl::update_field(Z, U, -2.0*epsilon); // V(t+e) = exp(ep*Z)*W2
|
||||
tau += epsilon;
|
||||
}
|
||||
|
||||
template <class Gimpl>
|
||||
void WilsonFlow<Gimpl>::evolve_step_adaptive(typename Gimpl::GaugeField &U, RealD &tau, RealD &eps, RealD maxTau) const{
|
||||
if (maxTau - tau < eps){
|
||||
eps = maxTau-tau;
|
||||
}
|
||||
//std::cout << GridLogMessage << "Integration epsilon : " << epsilon << std::endl;
|
||||
GaugeField Z(U.Grid());
|
||||
GaugeField Zprime(U.Grid());
|
||||
GaugeField tmp(U.Grid()), Uprime(U.Grid());
|
||||
Uprime = U;
|
||||
SG.deriv(U, Z);
|
||||
Zprime = -Z;
|
||||
Z *= 0.25; // Z0 = 1/4 * F(U)
|
||||
Gimpl::update_field(Z, U, -2.0*eps); // U = W1 = exp(ep*Z0)*W0
|
||||
|
||||
Z *= -17.0/8.0;
|
||||
SG.deriv(U, tmp); Z += tmp; // -17/32*Z0 +Z1
|
||||
Zprime += 2.0*tmp;
|
||||
Z *= 8.0/9.0; // Z = -17/36*Z0 +8/9*Z1
|
||||
Gimpl::update_field(Z, U, -2.0*eps); // U_= W2 = exp(ep*Z)*W1
|
||||
|
||||
|
||||
Z *= -4.0/3.0;
|
||||
SG.deriv(U, tmp); Z += tmp; // 4/3*(17/36*Z0 -8/9*Z1) +Z2
|
||||
Z *= 3.0/4.0; // Z = 17/36*Z0 -8/9*Z1 +3/4*Z2
|
||||
Gimpl::update_field(Z, U, -2.0*eps); // V(t+e) = exp(ep*Z)*W2
|
||||
|
||||
// Ramos
|
||||
Gimpl::update_field(Zprime, Uprime, -2.0*eps); // V'(t+e) = exp(ep*Z')*W0
|
||||
// Compute distance as norm^2 of the difference
|
||||
GaugeField diffU = U - Uprime;
|
||||
RealD diff = norm2(diffU);
|
||||
// adjust integration step
|
||||
|
||||
tau += eps;
|
||||
//std::cout << GridLogMessage << "Adjusting integration step with distance: " << diff << std::endl;
|
||||
|
||||
eps = eps*0.95*std::pow(1e-4/diff,1./3.);
|
||||
//std::cout << GridLogMessage << "New epsilon : " << epsilon << std::endl;
|
||||
|
||||
}
|
||||
|
||||
|
||||
template <class Gimpl>
|
||||
RealD WilsonFlow<Gimpl>::energyDensityPlaquette(const RealD t, const GaugeField& U){
|
||||
RealD WilsonFlowBase<Gimpl>::energyDensityPlaquette(const RealD t, const GaugeField& U){
|
||||
static WilsonGaugeAction<Gimpl> SG(3.0);
|
||||
return 2.0 * t * t * SG.S(U)/U.Grid()->gSites();
|
||||
}
|
||||
|
||||
//Compute t^2 <E(t)> for time from the 1x1 cloverleaf form
|
||||
template <class Gimpl>
|
||||
RealD WilsonFlow<Gimpl>::energyDensityCloverleaf(const RealD t, const GaugeField& U){
|
||||
RealD WilsonFlowBase<Gimpl>::energyDensityCloverleaf(const RealD t, const GaugeField& U){
|
||||
typedef typename Gimpl::GaugeLinkField GaugeMat;
|
||||
typedef typename Gimpl::GaugeField GaugeLorentz;
|
||||
|
||||
@ -215,7 +171,7 @@ RealD WilsonFlow<Gimpl>::energyDensityCloverleaf(const RealD t, const GaugeField
|
||||
|
||||
|
||||
template <class Gimpl>
|
||||
std::vector<RealD> WilsonFlow<Gimpl>::flowMeasureEnergyDensityPlaquette(GaugeField &V, const GaugeField& U, int measure_interval){
|
||||
std::vector<RealD> WilsonFlowBase<Gimpl>::flowMeasureEnergyDensityPlaquette(GaugeField &V, const GaugeField& U, int measure_interval){
|
||||
std::vector<RealD> out;
|
||||
resetActions();
|
||||
addMeasurement(measure_interval, [&out](int step, RealD t, const typename Gimpl::GaugeField &U){
|
||||
@ -227,13 +183,13 @@ std::vector<RealD> WilsonFlow<Gimpl>::flowMeasureEnergyDensityPlaquette(GaugeFie
|
||||
}
|
||||
|
||||
template <class Gimpl>
|
||||
std::vector<RealD> WilsonFlow<Gimpl>::flowMeasureEnergyDensityPlaquette(const GaugeField& U, int measure_interval){
|
||||
std::vector<RealD> WilsonFlowBase<Gimpl>::flowMeasureEnergyDensityPlaquette(const GaugeField& U, int measure_interval){
|
||||
GaugeField V(U);
|
||||
return flowMeasureEnergyDensityPlaquette(V,U, measure_interval);
|
||||
}
|
||||
|
||||
template <class Gimpl>
|
||||
std::vector<RealD> WilsonFlow<Gimpl>::flowMeasureEnergyDensityCloverleaf(GaugeField &V, const GaugeField& U, int measure_interval){
|
||||
std::vector<RealD> WilsonFlowBase<Gimpl>::flowMeasureEnergyDensityCloverleaf(GaugeField &V, const GaugeField& U, int measure_interval){
|
||||
std::vector<RealD> out;
|
||||
resetActions();
|
||||
addMeasurement(measure_interval, [&out](int step, RealD t, const typename Gimpl::GaugeField &U){
|
||||
@ -245,16 +201,52 @@ std::vector<RealD> WilsonFlow<Gimpl>::flowMeasureEnergyDensityCloverleaf(GaugeFi
|
||||
}
|
||||
|
||||
template <class Gimpl>
|
||||
std::vector<RealD> WilsonFlow<Gimpl>::flowMeasureEnergyDensityCloverleaf(const GaugeField& U, int measure_interval){
|
||||
std::vector<RealD> WilsonFlowBase<Gimpl>::flowMeasureEnergyDensityCloverleaf(const GaugeField& U, int measure_interval){
|
||||
GaugeField V(U);
|
||||
return flowMeasureEnergyDensityCloverleaf(V,U, measure_interval);
|
||||
}
|
||||
|
||||
template <class Gimpl>
|
||||
void WilsonFlowBase<Gimpl>::setDefaultMeasurements(int topq_meas_interval){
|
||||
addMeasurement(1, [](int step, RealD t, const typename Gimpl::GaugeField &U){
|
||||
std::cout << GridLogMessage << "[WilsonFlow] Energy density (plaq) : " << step << " " << t << " " << energyDensityPlaquette(t,U) << std::endl;
|
||||
});
|
||||
addMeasurement(topq_meas_interval, [](int step, RealD t, const typename Gimpl::GaugeField &U){
|
||||
std::cout << GridLogMessage << "[WilsonFlow] Top. charge : " << step << " " << WilsonLoops<Gimpl>::TopologicalCharge(U) << std::endl;
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
//#define WF_TIMING
|
||||
|
||||
template <class Gimpl>
|
||||
void WilsonFlow<Gimpl>::evolve_step(typename Gimpl::GaugeField &U, RealD &tau) const{
|
||||
GaugeField Z(U.Grid());
|
||||
GaugeField tmp(U.Grid());
|
||||
this->SG.deriv(U, Z);
|
||||
Z *= 0.25; // Z0 = 1/4 * F(U)
|
||||
Gimpl::update_field(Z, U, -2.0*epsilon); // U = W1 = exp(ep*Z0)*W0
|
||||
|
||||
Z *= -17.0/8.0;
|
||||
this->SG.deriv(U, tmp); Z += tmp; // -17/32*Z0 +Z1
|
||||
Z *= 8.0/9.0; // Z = -17/36*Z0 +8/9*Z1
|
||||
Gimpl::update_field(Z, U, -2.0*epsilon); // U_= W2 = exp(ep*Z)*W1
|
||||
|
||||
Z *= -4.0/3.0;
|
||||
this->SG.deriv(U, tmp); Z += tmp; // 4/3*(17/36*Z0 -8/9*Z1) +Z2
|
||||
Z *= 3.0/4.0; // Z = 17/36*Z0 -8/9*Z1 +3/4*Z2
|
||||
Gimpl::update_field(Z, U, -2.0*epsilon); // V(t+e) = exp(ep*Z)*W2
|
||||
tau += epsilon;
|
||||
}
|
||||
|
||||
template <class Gimpl>
|
||||
void WilsonFlow<Gimpl>::smear(GaugeField& out, const GaugeField& in) const{
|
||||
std::cout << GridLogMessage
|
||||
<< "[WilsonFlow] Nstep : " << Nstep << std::endl;
|
||||
std::cout << GridLogMessage
|
||||
<< "[WilsonFlow] epsilon : " << epsilon << std::endl;
|
||||
std::cout << GridLogMessage
|
||||
<< "[WilsonFlow] full trajectory : " << Nstep * epsilon << std::endl;
|
||||
|
||||
out = in;
|
||||
RealD taus = 0.;
|
||||
for (unsigned int step = 1; step <= Nstep; step++) { //step indicates the number of smearing steps applied at the time of measurement
|
||||
@ -266,37 +258,93 @@ void WilsonFlow<Gimpl>::smear(GaugeField& out, const GaugeField& in) const{
|
||||
std::cout << "Time to evolve " << diff.count() << " s\n";
|
||||
#endif
|
||||
//Perform measurements
|
||||
for(auto const &meas : functions)
|
||||
for(auto const &meas : this->functions)
|
||||
if( step % meas.first == 0 ) meas.second(step,taus,out);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
template <class Gimpl>
|
||||
void WilsonFlow<Gimpl>::smear_adaptive(GaugeField& out, const GaugeField& in, RealD maxTau) const{
|
||||
out = in;
|
||||
RealD taus = 0.;
|
||||
RealD eps = epsilon;
|
||||
unsigned int step = 0;
|
||||
do{
|
||||
step++;
|
||||
//std::cout << GridLogMessage << "Evolution time :"<< taus << std::endl;
|
||||
evolve_step_adaptive(out, taus, eps, maxTau);
|
||||
//Perform measurements
|
||||
for(auto const &meas : functions)
|
||||
if( step % meas.first == 0 ) meas.second(step,taus,out);
|
||||
} while (taus < maxTau);
|
||||
int WilsonFlowAdaptive<Gimpl>::evolve_step_adaptive(typename Gimpl::GaugeField &U, RealD &tau, RealD &eps) const{
|
||||
if (maxTau - tau < eps){
|
||||
eps = maxTau-tau;
|
||||
}
|
||||
//std::cout << GridLogMessage << "Integration epsilon : " << epsilon << std::endl;
|
||||
GaugeField Z(U.Grid());
|
||||
GaugeField Zprime(U.Grid());
|
||||
GaugeField tmp(U.Grid()), Uprime(U.Grid()), Usave(U.Grid());
|
||||
Uprime = U;
|
||||
Usave = U;
|
||||
|
||||
this->SG.deriv(U, Z);
|
||||
Zprime = -Z;
|
||||
Z *= 0.25; // Z0 = 1/4 * F(U)
|
||||
Gimpl::update_field(Z, U, -2.0*eps); // U = W1 = exp(ep*Z0)*W0
|
||||
|
||||
Z *= -17.0/8.0;
|
||||
this->SG.deriv(U, tmp); Z += tmp; // -17/32*Z0 +Z1
|
||||
Zprime += 2.0*tmp;
|
||||
Z *= 8.0/9.0; // Z = -17/36*Z0 +8/9*Z1
|
||||
Gimpl::update_field(Z, U, -2.0*eps); // U_= W2 = exp(ep*Z)*W1
|
||||
|
||||
|
||||
Z *= -4.0/3.0;
|
||||
this->SG.deriv(U, tmp); Z += tmp; // 4/3*(17/36*Z0 -8/9*Z1) +Z2
|
||||
Z *= 3.0/4.0; // Z = 17/36*Z0 -8/9*Z1 +3/4*Z2
|
||||
Gimpl::update_field(Z, U, -2.0*eps); // V(t+e) = exp(ep*Z)*W2
|
||||
|
||||
// Ramos arXiv:1301.4388
|
||||
Gimpl::update_field(Zprime, Uprime, -2.0*eps); // V'(t+e) = exp(ep*Z')*W0
|
||||
|
||||
// Compute distance using Ramos' definition
|
||||
GaugeField diffU = U - Uprime;
|
||||
RealD max_dist = 0;
|
||||
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
typename Gimpl::GaugeLinkField diffU_mu = PeekIndex<LorentzIndex>(diffU, mu);
|
||||
RealD dist_mu = sqrt( maxLocalNorm2(diffU_mu) ) /Nc/Nc; //maximize over sites
|
||||
max_dist = std::max(max_dist, dist_mu); //maximize over mu
|
||||
}
|
||||
|
||||
int ret;
|
||||
if(max_dist < tolerance) {
|
||||
tau += eps;
|
||||
ret = 1;
|
||||
} else {
|
||||
U = Usave;
|
||||
ret = 0;
|
||||
}
|
||||
eps = eps*0.95*std::pow(tolerance/max_dist,1./3.);
|
||||
std::cout << GridLogMessage << "Adaptive smearing : Distance: "<< max_dist <<" Step successful: " << ret << " New epsilon: " << eps << std::endl;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
template <class Gimpl>
|
||||
void WilsonFlow<Gimpl>::setDefaultMeasurements(int topq_meas_interval){
|
||||
addMeasurement(1, [](int step, RealD t, const typename Gimpl::GaugeField &U){
|
||||
std::cout << GridLogMessage << "[WilsonFlow] Energy density (plaq) : " << step << " " << t << " " << energyDensityPlaquette(t,U) << std::endl;
|
||||
});
|
||||
addMeasurement(topq_meas_interval, [](int step, RealD t, const typename Gimpl::GaugeField &U){
|
||||
std::cout << GridLogMessage << "[WilsonFlow] Top. charge : " << step << " " << WilsonLoops<Gimpl>::TopologicalCharge(U) << std::endl;
|
||||
});
|
||||
void WilsonFlowAdaptive<Gimpl>::smear(GaugeField& out, const GaugeField& in) const{
|
||||
std::cout << GridLogMessage
|
||||
<< "[WilsonFlow] initial epsilon : " << init_epsilon << std::endl;
|
||||
std::cout << GridLogMessage
|
||||
<< "[WilsonFlow] full trajectory : " << maxTau << std::endl;
|
||||
std::cout << GridLogMessage
|
||||
<< "[WilsonFlow] tolerance : " << tolerance << std::endl;
|
||||
out = in;
|
||||
RealD taus = 0.;
|
||||
RealD eps = init_epsilon;
|
||||
unsigned int step = 0;
|
||||
do{
|
||||
int step_success = evolve_step_adaptive(out, taus, eps);
|
||||
step += step_success; //step will not be incremented if the integration step fails
|
||||
|
||||
//Perform measurements
|
||||
if(step_success)
|
||||
for(auto const &meas : this->functions)
|
||||
if( step % meas.first == 0 ) meas.second(step,taus,out);
|
||||
} while (taus < maxTau);
|
||||
}
|
||||
|
||||
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
||||
|
@ -227,26 +227,38 @@ namespace ConjugateBC {
|
||||
//shift = -1
|
||||
//Out(x) = U_\mu(x-mu) | x_\mu != 0
|
||||
// = U*_\mu(L-1) | x_\mu == 0
|
||||
//shift = 2
|
||||
//Out(x) = U_\mu(x+2\hat\mu) | x_\mu < L-2
|
||||
// = U*_\mu(1) | x_\mu == L-1
|
||||
// = U*_\mu(0) | x_\mu == L-2
|
||||
//shift = -2
|
||||
//Out(x) = U_\mu(x-2mu) | x_\mu > 1
|
||||
// = U*_\mu(L-2) | x_\mu == 0
|
||||
// = U*_\mu(L-1) | x_\mu == 1
|
||||
//etc
|
||||
template<class gauge> Lattice<gauge>
|
||||
CshiftLink(const Lattice<gauge> &Link, int mu, int shift)
|
||||
{
|
||||
GridBase *grid = Link.Grid();
|
||||
int Lmu = grid->GlobalDimensions()[mu] - 1;
|
||||
int Lmu = grid->GlobalDimensions()[mu];
|
||||
assert(abs(shift) < Lmu && "Invalid shift value");
|
||||
|
||||
Lattice<iScalar<vInteger>> coor(grid);
|
||||
LatticeCoordinate(coor, mu);
|
||||
|
||||
Lattice<gauge> tmp(grid);
|
||||
if(shift == 1){
|
||||
tmp = Cshift(Link, mu, 1);
|
||||
tmp = where(coor == Lmu, conjugate(tmp), tmp);
|
||||
if(shift > 0){
|
||||
tmp = Cshift(Link, mu, shift);
|
||||
tmp = where(coor >= Lmu-shift, conjugate(tmp), tmp);
|
||||
return tmp;
|
||||
}else if(shift == -1){
|
||||
}else if(shift < 0){
|
||||
tmp = Link;
|
||||
tmp = where(coor == Lmu, conjugate(tmp), tmp);
|
||||
return Cshift(tmp, mu, -1);
|
||||
}else assert(0 && "Invalid shift value");
|
||||
return tmp; //shuts up the compiler fussing about the return type
|
||||
tmp = where(coor >= Lmu+shift, conjugate(tmp), tmp);
|
||||
return Cshift(tmp, mu, shift);
|
||||
}
|
||||
|
||||
//shift == 0
|
||||
return Link;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -72,12 +72,12 @@ public:
|
||||
|
||||
//Fix the gauge field Umu
|
||||
//0 < alpha < 1 is related to the step size, cf https://arxiv.org/pdf/1405.5812.pdf
|
||||
static void SteepestDescentGaugeFix(GaugeLorentz &Umu,Real & alpha,int maxiter,Real Omega_tol, Real Phi_tol,bool Fourier=false,int orthog=-1,bool err_on_no_converge=true) {
|
||||
static void SteepestDescentGaugeFix(GaugeLorentz &Umu,Real alpha,int maxiter,Real Omega_tol, Real Phi_tol,bool Fourier=false,int orthog=-1,bool err_on_no_converge=true) {
|
||||
GridBase *grid = Umu.Grid();
|
||||
GaugeMat xform(grid);
|
||||
SteepestDescentGaugeFix(Umu,xform,alpha,maxiter,Omega_tol,Phi_tol,Fourier,orthog,err_on_no_converge);
|
||||
}
|
||||
static void SteepestDescentGaugeFix(GaugeLorentz &Umu,GaugeMat &xform,Real & alpha,int maxiter,Real Omega_tol, Real Phi_tol,bool Fourier=false,int orthog=-1,bool err_on_no_converge=true) {
|
||||
static void SteepestDescentGaugeFix(GaugeLorentz &Umu,GaugeMat &xform,Real alpha,int maxiter,Real Omega_tol, Real Phi_tol,bool Fourier=false,int orthog=-1,bool err_on_no_converge=true) {
|
||||
//Fix the gauge field Umu and also return the gauge transformation from the original gauge field, xform
|
||||
|
||||
GridBase *grid = Umu.Grid();
|
||||
|
@ -35,7 +35,7 @@ Author: neo <cossu@post.kek.jp>
|
||||
*/
|
||||
// Time-stamp: <2015-06-16 23:27:54 neo>
|
||||
//----------------------------------------------------------------------
|
||||
|
||||
#include <immintrin.h>
|
||||
#include <pmmintrin.h>
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
@ -290,6 +290,8 @@ public:
|
||||
std::vector<Decompress> DecompressionsSHM;
|
||||
std::vector<CopyReceiveBuffer> CopyReceiveBuffers ;
|
||||
std::vector<CachedTransfer> CachedTransfers;
|
||||
std::vector<CommsRequest_t> MpiReqs;
|
||||
|
||||
///////////////////////////////////////////////////////////
|
||||
// Unified Comms buffers for all directions
|
||||
///////////////////////////////////////////////////////////
|
||||
@ -357,9 +359,9 @@ public:
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
void CommunicateBegin(std::vector<std::vector<CommsRequest_t> > &reqs)
|
||||
{
|
||||
reqs.resize(Packets.size());
|
||||
accelerator_barrier();
|
||||
for(int i=0;i<Packets.size();i++){
|
||||
_grid->StencilSendToRecvFromBegin(reqs[i],
|
||||
_grid->StencilSendToRecvFromBegin(MpiReqs,
|
||||
Packets[i].send_buf,
|
||||
Packets[i].to_rank,Packets[i].do_send,
|
||||
Packets[i].recv_buf,
|
||||
@ -370,41 +372,19 @@ public:
|
||||
|
||||
void CommunicateComplete(std::vector<std::vector<CommsRequest_t> > &reqs)
|
||||
{
|
||||
for(int i=0;i<Packets.size();i++){
|
||||
_grid->StencilSendToRecvFromComplete(reqs[i],i);
|
||||
}
|
||||
_grid->StencilSendToRecvFromComplete(MpiReqs,0);
|
||||
}
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// Blocking send and receive. Either sequential or parallel.
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
void Communicate(void)
|
||||
{
|
||||
if ( CartesianCommunicator::CommunicatorPolicy == CartesianCommunicator::CommunicatorPolicySequential ){
|
||||
/////////////////////////////////////////////////////////
|
||||
// several way threaded on different communicators.
|
||||
// Cannot combine with Dirichlet operators
|
||||
// This scheme is needed on Intel Omnipath for best performance
|
||||
// Deprecate once there are very few omnipath clusters
|
||||
/////////////////////////////////////////////////////////
|
||||
int nthreads = CartesianCommunicator::nCommThreads;
|
||||
int old = GridThread::GetThreads();
|
||||
GridThread::SetThreads(nthreads);
|
||||
thread_for(i,Packets.size(),{
|
||||
_grid->StencilSendToRecvFrom(Packets[i].send_buf,
|
||||
Packets[i].to_rank,Packets[i].do_send,
|
||||
Packets[i].recv_buf,
|
||||
Packets[i].from_rank,Packets[i].do_recv,
|
||||
Packets[i].bytes,i);
|
||||
});
|
||||
GridThread::SetThreads(old);
|
||||
} else {
|
||||
/////////////////////////////////////////////////////////
|
||||
// Concurrent and non-threaded asynch calls to MPI
|
||||
/////////////////////////////////////////////////////////
|
||||
std::vector<std::vector<CommsRequest_t> > reqs;
|
||||
this->CommunicateBegin(reqs);
|
||||
this->CommunicateComplete(reqs);
|
||||
}
|
||||
/////////////////////////////////////////////////////////
|
||||
// Concurrent and non-threaded asynch calls to MPI
|
||||
/////////////////////////////////////////////////////////
|
||||
std::vector<std::vector<CommsRequest_t> > reqs;
|
||||
this->CommunicateBegin(reqs);
|
||||
this->CommunicateComplete(reqs);
|
||||
}
|
||||
|
||||
template<class compressor> void HaloExchange(const Lattice<vobj> &source,compressor &compress)
|
||||
@ -484,7 +464,6 @@ public:
|
||||
face_table_computed=1;
|
||||
assert(u_comm_offset==_unified_buffer_size);
|
||||
|
||||
accelerator_barrier();
|
||||
}
|
||||
|
||||
/////////////////////////
|
||||
@ -499,6 +478,7 @@ public:
|
||||
Packets.resize(0);
|
||||
CopyReceiveBuffers.resize(0);
|
||||
CachedTransfers.resize(0);
|
||||
MpiReqs.resize(0);
|
||||
}
|
||||
void AddCopy(void *from,void * to, Integer bytes)
|
||||
{
|
||||
@ -711,7 +691,9 @@ public:
|
||||
this->_comms_recv.resize(npoints);
|
||||
this->same_node.resize(npoints);
|
||||
|
||||
if ( p.dirichlet.size() ) DirichletBlock(p.dirichlet); // comms send/recv set up
|
||||
if ( p.dirichlet.size() ==0 ) p.dirichlet.resize(grid->Nd(),0);
|
||||
|
||||
DirichletBlock(p.dirichlet); // comms send/recv set up
|
||||
|
||||
_unified_buffer_size=0;
|
||||
surface_list.resize(0);
|
||||
@ -793,7 +775,6 @@ public:
|
||||
u_simd_recv_buf[l] = (cobj *)_grid->ShmBufferMalloc(_unified_buffer_size*sizeof(cobj));
|
||||
u_simd_send_buf[l] = (cobj *)_grid->ShmBufferMalloc(_unified_buffer_size*sizeof(cobj));
|
||||
}
|
||||
|
||||
PrecomputeByteOffsets();
|
||||
}
|
||||
|
||||
@ -1105,7 +1086,6 @@ public:
|
||||
// Gather locally
|
||||
////////////////////////////////////////////////////////
|
||||
assert(send_buf!=NULL);
|
||||
|
||||
Gather_plane_simple_table(face_table[face_idx],rhs,send_buf,compress,comm_off,so);
|
||||
}
|
||||
|
||||
@ -1212,8 +1192,9 @@ public:
|
||||
face_table[face_idx].size()*sizeof(face_table_host[0]));
|
||||
}
|
||||
|
||||
if ( comms_send || comms_recv )
|
||||
if ( comms_send || comms_recv ) {
|
||||
Gather_plane_exchange_table(face_table[face_idx],rhs,spointers,dimension,sx,cbmask,compress,permute_type);
|
||||
}
|
||||
face_idx++;
|
||||
|
||||
//spointers[0] -- low
|
||||
@ -1270,10 +1251,6 @@ public:
|
||||
return 0;
|
||||
}
|
||||
|
||||
void ZeroCounters(void) { };
|
||||
|
||||
void Report(void) { };
|
||||
|
||||
};
|
||||
NAMESPACE_END(Grid);
|
||||
|
||||
|
@ -1,6 +1,7 @@
|
||||
#include <Grid/GridCore.h>
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
int world_rank; // Use to control world rank for print guarding
|
||||
int acceleratorAbortOnGpuError=1;
|
||||
uint32_t accelerator_threads=2;
|
||||
uint32_t acceleratorThreads(void) {return accelerator_threads;};
|
||||
@ -16,7 +17,7 @@ void acceleratorThreads(uint32_t t) {accelerator_threads = t;};
|
||||
#ifdef GRID_CUDA
|
||||
cudaDeviceProp *gpu_props;
|
||||
cudaStream_t copyStream;
|
||||
cudaStream_t cpuStream;
|
||||
cudaStream_t computeStream;
|
||||
void acceleratorInit(void)
|
||||
{
|
||||
int nDevices = 1;
|
||||
@ -24,7 +25,8 @@ void acceleratorInit(void)
|
||||
gpu_props = new cudaDeviceProp[nDevices];
|
||||
|
||||
char * localRankStr = NULL;
|
||||
int rank = 0, world_rank=0;
|
||||
int rank = 0;
|
||||
world_rank=0;
|
||||
if ((localRankStr = getenv(ENV_RANK_OMPI )) != NULL) { world_rank = atoi(localRankStr);}
|
||||
if ((localRankStr = getenv(ENV_RANK_MVAPICH)) != NULL) { world_rank = atoi(localRankStr);}
|
||||
if ((localRankStr = getenv(ENV_RANK_SLURM )) != NULL) { world_rank = atoi(localRankStr);}
|
||||
@ -99,7 +101,7 @@ void acceleratorInit(void)
|
||||
|
||||
cudaSetDevice(device);
|
||||
cudaStreamCreate(©Stream);
|
||||
cudaStreamCreate(&cpuStream);
|
||||
cudaStreamCreate(&computeStream);
|
||||
const int len=64;
|
||||
char busid[len];
|
||||
if( rank == world_rank ) {
|
||||
@ -114,7 +116,7 @@ void acceleratorInit(void)
|
||||
#ifdef GRID_HIP
|
||||
hipDeviceProp_t *gpu_props;
|
||||
hipStream_t copyStream;
|
||||
hipStream_t cpuStream;
|
||||
hipStream_t computeStream;
|
||||
void acceleratorInit(void)
|
||||
{
|
||||
int nDevices = 1;
|
||||
@ -122,7 +124,8 @@ void acceleratorInit(void)
|
||||
gpu_props = new hipDeviceProp_t[nDevices];
|
||||
|
||||
char * localRankStr = NULL;
|
||||
int rank = 0, world_rank=0;
|
||||
int rank = 0;
|
||||
world_rank=0;
|
||||
// We extract the local rank initialization using an environment variable
|
||||
if ((localRankStr = getenv(ENV_LOCAL_RANK_OMPI)) != NULL)
|
||||
{
|
||||
@ -183,7 +186,7 @@ void acceleratorInit(void)
|
||||
#endif
|
||||
hipSetDevice(device);
|
||||
hipStreamCreate(©Stream);
|
||||
hipStreamCreate(&cpuStream);
|
||||
hipStreamCreate(&computeStream);
|
||||
const int len=64;
|
||||
char busid[len];
|
||||
if( rank == world_rank ) {
|
||||
@ -210,7 +213,8 @@ void acceleratorInit(void)
|
||||
#endif
|
||||
|
||||
char * localRankStr = NULL;
|
||||
int rank = 0, world_rank=0;
|
||||
int rank = 0;
|
||||
world_rank=0;
|
||||
|
||||
// We extract the local rank initialization using an environment variable
|
||||
if ((localRankStr = getenv(ENV_LOCAL_RANK_OMPI)) != NULL)
|
||||
|
@ -107,7 +107,7 @@ void acceleratorInit(void);
|
||||
|
||||
extern int acceleratorAbortOnGpuError;
|
||||
extern cudaStream_t copyStream;
|
||||
extern cudaStream_t cpuStream;
|
||||
extern cudaStream_t computeStream;
|
||||
|
||||
accelerator_inline int acceleratorSIMTlane(int Nsimd) {
|
||||
#ifdef GRID_SIMT
|
||||
@ -135,7 +135,7 @@ inline void cuda_mem(void)
|
||||
}; \
|
||||
dim3 cu_threads(nsimd,acceleratorThreads(),1); \
|
||||
dim3 cu_blocks ((num1+nt-1)/nt,num2,1); \
|
||||
LambdaApply<<<cu_blocks,cu_threads,0,cpuStream>>>(num1,num2,nsimd,lambda); \
|
||||
LambdaApply<<<cu_blocks,cu_threads,0,computeStream>>>(num1,num2,nsimd,lambda); \
|
||||
}
|
||||
|
||||
#define accelerator_for6dNB(iter1, num1, \
|
||||
@ -154,7 +154,7 @@ inline void cuda_mem(void)
|
||||
}; \
|
||||
dim3 cu_blocks (num1,num2,num3); \
|
||||
dim3 cu_threads(num4,num5,num6); \
|
||||
Lambda6Apply<<<cu_blocks,cu_threads,0,cpuStream>>>(num1,num2,num3,num4,num5,num6,lambda); \
|
||||
Lambda6Apply<<<cu_blocks,cu_threads,0,computeStream>>>(num1,num2,num3,num4,num5,num6,lambda); \
|
||||
}
|
||||
|
||||
template<typename lambda> __global__
|
||||
@ -190,7 +190,7 @@ void Lambda6Apply(uint64_t num1, uint64_t num2, uint64_t num3,
|
||||
|
||||
#define accelerator_barrier(dummy) \
|
||||
{ \
|
||||
cudaStreamSynchronize(cpuStream); \
|
||||
cudaStreamSynchronize(computeStream); \
|
||||
cudaError err = cudaGetLastError(); \
|
||||
if ( cudaSuccess != err ) { \
|
||||
printf("accelerator_barrier(): Cuda error %s \n", \
|
||||
@ -340,7 +340,7 @@ NAMESPACE_BEGIN(Grid);
|
||||
#define accelerator_inline __host__ __device__ inline
|
||||
|
||||
extern hipStream_t copyStream;
|
||||
extern hipStream_t cpuStream;
|
||||
extern hipStream_t computeStream;
|
||||
/*These routines define mapping from thread grid to loop & vector lane indexing */
|
||||
accelerator_inline int acceleratorSIMTlane(int Nsimd) {
|
||||
#ifdef GRID_SIMT
|
||||
@ -362,16 +362,15 @@ accelerator_inline int acceleratorSIMTlane(int Nsimd) {
|
||||
dim3 hip_blocks ((num1+nt-1)/nt,num2,1); \
|
||||
if(hip_threads.x * hip_threads.y * hip_threads.z <= 64){ \
|
||||
hipLaunchKernelGGL(LambdaApply64,hip_blocks,hip_threads, \
|
||||
0,cpuStream, \
|
||||
0,computeStream, \
|
||||
num1,num2,nsimd, lambda); \
|
||||
} else { \
|
||||
hipLaunchKernelGGL(LambdaApply,hip_blocks,hip_threads, \
|
||||
0,cpuStream, \
|
||||
0,computeStream, \
|
||||
num1,num2,nsimd, lambda); \
|
||||
} \
|
||||
}
|
||||
|
||||
|
||||
template<typename lambda> __global__
|
||||
__launch_bounds__(64,1)
|
||||
void LambdaApply64(uint64_t numx, uint64_t numy, uint64_t numz, lambda Lambda)
|
||||
@ -400,7 +399,7 @@ void LambdaApply(uint64_t numx, uint64_t numy, uint64_t numz, lambda Lambda)
|
||||
|
||||
#define accelerator_barrier(dummy) \
|
||||
{ \
|
||||
hipStreamSynchronize(cpuStream); \
|
||||
hipStreamSynchronize(computeStream); \
|
||||
auto err = hipGetLastError(); \
|
||||
if ( err != hipSuccess ) { \
|
||||
printf("After hipDeviceSynchronize() : HIP error %s \n", hipGetErrorString( err )); \
|
||||
@ -443,7 +442,7 @@ inline void acceleratorMemSet(void *base,int value,size_t bytes) { hipMemset(bas
|
||||
|
||||
inline void acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes) // Asynch
|
||||
{
|
||||
hipMemcpy(to,from,bytes, hipMemcpyDeviceToDevice);
|
||||
hipMemcpyDtoDAsync(to,from,bytes, copyStream);
|
||||
}
|
||||
inline void acceleratorCopySynchronise(void) { hipStreamSynchronize(copyStream); };
|
||||
|
||||
|
@ -356,6 +356,11 @@ void Grid_init(int *argc,char ***argv)
|
||||
//////////////////////////////////////////////////////////
|
||||
CartesianCommunicator::Init(argc,argv);
|
||||
|
||||
GridLogger::GlobalStopWatch.Stop();
|
||||
CartesianCommunicator::BarrierWorld();
|
||||
GridLogger::GlobalStopWatch.Reset();// Back to zero with synchronised clock
|
||||
GridLogger::GlobalStopWatch.Start();
|
||||
|
||||
////////////////////////////////////
|
||||
// Banner after MPI (unless GPU)
|
||||
////////////////////////////////////
|
||||
|
@ -128,8 +128,14 @@ template<class FermionOperatorD, class FermionOperatorF, class SchurOperatorD, c
|
||||
////////////////////////////////////////////////////////////////////////////////////
|
||||
// Make a mixed precision conjugate gradient
|
||||
////////////////////////////////////////////////////////////////////////////////////
|
||||
MixedPrecisionConjugateGradient<FieldD,FieldF> MPCG(Tolerance,MaxInnerIterations,MaxOuterIterations,SinglePrecGrid5,LinOpF,LinOpD);
|
||||
#if 1
|
||||
RealD delta=1.e-4;
|
||||
std::cout << GridLogMessage << "Calling reliable update Conjugate Gradient" <<std::endl;
|
||||
ConjugateGradientReliableUpdate<FieldD,FieldF> MPCG(Tolerance,MaxInnerIterations*MaxOuterIterations,delta,SinglePrecGrid5,LinOpF,LinOpD);
|
||||
#else
|
||||
std::cout << GridLogMessage << "Calling mixed precision Conjugate Gradient" <<std::endl;
|
||||
MixedPrecisionConjugateGradient<FieldD,FieldF> MPCG(Tolerance,MaxInnerIterations,MaxOuterIterations,SinglePrecGrid5,LinOpF,LinOpD);
|
||||
#endif
|
||||
MPCG(src,psi);
|
||||
}
|
||||
};
|
||||
@ -141,6 +147,10 @@ int main(int argc, char **argv) {
|
||||
using namespace Grid;
|
||||
|
||||
Grid_init(&argc, &argv);
|
||||
|
||||
CartesianCommunicator::BarrierWorld();
|
||||
std::cout << GridLogMessage << " Clock skew check" <<std::endl;
|
||||
|
||||
int threads = GridThread::GetThreads();
|
||||
|
||||
// Typedefs to simplify notation
|
||||
@ -161,7 +171,7 @@ int main(int argc, char **argv) {
|
||||
// MD.name = std::string("Force Gradient");
|
||||
typedef GenericHMCRunner<MinimumNorm2> HMCWrapper;
|
||||
MD.name = std::string("MinimumNorm2");
|
||||
MD.MDsteps = 4;
|
||||
MD.MDsteps = 6;
|
||||
MD.trajL = 1.0;
|
||||
|
||||
HMCparameters HMCparams;
|
||||
@ -183,7 +193,7 @@ int main(int argc, char **argv) {
|
||||
CPparams.saveInterval = 1;
|
||||
CPparams.format = "IEEE64BIG";
|
||||
TheHMC.Resources.LoadNerscCheckpointer(CPparams);
|
||||
|
||||
std::cout << "loaded NERSC checpointer"<<std::endl;
|
||||
RNGModuleParameters RNGpar;
|
||||
RNGpar.serial_seeds = "1 2 3 4 5";
|
||||
RNGpar.parallel_seeds = "6 7 8 9 10";
|
||||
@ -204,7 +214,8 @@ int main(int argc, char **argv) {
|
||||
Real light_mass = 7.8e-4;
|
||||
Real strange_mass = 0.02132;
|
||||
Real pv_mass = 1.0;
|
||||
std::vector<Real> hasenbusch({ light_mass, 3.8e-3, 0.0145, 0.045, 0.108, 0.25, 0.51 , pv_mass });
|
||||
// std::vector<Real> hasenbusch({ light_mass, 3.8e-3, 0.0145, 0.045, 0.108, 0.25, 0.51 , pv_mass });
|
||||
std::vector<Real> hasenbusch({ light_mass, 5e-3, 0.0145, 0.045, 0.108, 0.25, 0.51 , pv_mass });
|
||||
|
||||
// FIXME:
|
||||
// Same in MC and MD
|
||||
@ -287,6 +298,7 @@ int main(int argc, char **argv) {
|
||||
std::cout << GridLogMessage << " Running the HMC "<< std::endl;
|
||||
TheHMC.ReadCommandLine(argc,argv); // params on CML or from param file
|
||||
TheHMC.initializeGaugeFieldAndRNGs(U);
|
||||
std::cout << "loaded NERSC gauge field"<<std::endl;
|
||||
|
||||
|
||||
// These lines are unecessary if BC are all periodic
|
||||
|
474
HMC/Mobius2p1f_DD_RHMC_96I_mixedmshift.cc
Normal file
474
HMC/Mobius2p1f_DD_RHMC_96I_mixedmshift.cc
Normal file
@ -0,0 +1,474 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./tests/Test_hmc_EODWFRatio.cc
|
||||
|
||||
Copyright (C) 2015-2016
|
||||
|
||||
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
||||
Author: Guido Cossu <guido.cossu@ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#include <Grid/Grid.h>
|
||||
|
||||
NAMESPACE_BEGIN(Grid);
|
||||
|
||||
template<class FermionOperatorD, class FermionOperatorF, class SchurOperatorD, class SchurOperatorF>
|
||||
class MixedPrecisionConjugateGradientOperatorFunction : public OperatorFunction<typename FermionOperatorD::FermionField> {
|
||||
public:
|
||||
typedef typename FermionOperatorD::FermionField FieldD;
|
||||
typedef typename FermionOperatorF::FermionField FieldF;
|
||||
|
||||
using OperatorFunction<FieldD>::operator();
|
||||
|
||||
RealD Tolerance;
|
||||
RealD InnerTolerance; //Initial tolerance for inner CG. Defaults to Tolerance but can be changed
|
||||
Integer MaxInnerIterations;
|
||||
Integer MaxOuterIterations;
|
||||
GridBase* SinglePrecGrid4; //Grid for single-precision fields
|
||||
GridBase* SinglePrecGrid5; //Grid for single-precision fields
|
||||
RealD OuterLoopNormMult; //Stop the outer loop and move to a final double prec solve when the residual is OuterLoopNormMult * Tolerance
|
||||
|
||||
FermionOperatorF &FermOpF;
|
||||
FermionOperatorD &FermOpD;;
|
||||
SchurOperatorF &LinOpF;
|
||||
SchurOperatorD &LinOpD;
|
||||
|
||||
Integer TotalInnerIterations; //Number of inner CG iterations
|
||||
Integer TotalOuterIterations; //Number of restarts
|
||||
Integer TotalFinalStepIterations; //Number of CG iterations in final patch-up step
|
||||
|
||||
MixedPrecisionConjugateGradientOperatorFunction(RealD tol,
|
||||
Integer maxinnerit,
|
||||
Integer maxouterit,
|
||||
GridBase* _sp_grid4,
|
||||
GridBase* _sp_grid5,
|
||||
FermionOperatorF &_FermOpF,
|
||||
FermionOperatorD &_FermOpD,
|
||||
SchurOperatorF &_LinOpF,
|
||||
SchurOperatorD &_LinOpD):
|
||||
LinOpF(_LinOpF),
|
||||
LinOpD(_LinOpD),
|
||||
FermOpF(_FermOpF),
|
||||
FermOpD(_FermOpD),
|
||||
Tolerance(tol),
|
||||
InnerTolerance(tol),
|
||||
MaxInnerIterations(maxinnerit),
|
||||
MaxOuterIterations(maxouterit),
|
||||
SinglePrecGrid4(_sp_grid4),
|
||||
SinglePrecGrid5(_sp_grid5),
|
||||
OuterLoopNormMult(100.)
|
||||
{
|
||||
/* Debugging instances of objects; references are stored
|
||||
std::cout << GridLogMessage << " Mixed precision CG wrapper LinOpF " <<std::hex<< &LinOpF<<std::dec <<std::endl;
|
||||
std::cout << GridLogMessage << " Mixed precision CG wrapper LinOpD " <<std::hex<< &LinOpD<<std::dec <<std::endl;
|
||||
std::cout << GridLogMessage << " Mixed precision CG wrapper FermOpF " <<std::hex<< &FermOpF<<std::dec <<std::endl;
|
||||
std::cout << GridLogMessage << " Mixed precision CG wrapper FermOpD " <<std::hex<< &FermOpD<<std::dec <<std::endl;
|
||||
*/
|
||||
};
|
||||
|
||||
void operator()(LinearOperatorBase<FieldD> &LinOpU, const FieldD &src, FieldD &psi) {
|
||||
|
||||
std::cout << GridLogMessage << " Mixed precision CG wrapper operator() "<<std::endl;
|
||||
|
||||
SchurOperatorD * SchurOpU = static_cast<SchurOperatorD *>(&LinOpU);
|
||||
|
||||
// std::cout << GridLogMessage << " Mixed precision CG wrapper operator() FermOpU " <<std::hex<< &(SchurOpU->_Mat)<<std::dec <<std::endl;
|
||||
// std::cout << GridLogMessage << " Mixed precision CG wrapper operator() FermOpD " <<std::hex<< &(LinOpD._Mat) <<std::dec <<std::endl;
|
||||
// Assumption made in code to extract gauge field
|
||||
// We could avoid storing LinopD reference alltogether ?
|
||||
assert(&(SchurOpU->_Mat)==&(LinOpD._Mat));
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////
|
||||
// Must snarf a single precision copy of the gauge field in Linop_d argument
|
||||
////////////////////////////////////////////////////////////////////////////////////
|
||||
typedef typename FermionOperatorF::GaugeField GaugeFieldF;
|
||||
typedef typename FermionOperatorF::GaugeLinkField GaugeLinkFieldF;
|
||||
typedef typename FermionOperatorD::GaugeField GaugeFieldD;
|
||||
typedef typename FermionOperatorD::GaugeLinkField GaugeLinkFieldD;
|
||||
|
||||
GridBase * GridPtrF = SinglePrecGrid4;
|
||||
GridBase * GridPtrD = FermOpD.Umu.Grid();
|
||||
GaugeFieldF U_f (GridPtrF);
|
||||
GaugeLinkFieldF Umu_f(GridPtrF);
|
||||
// std::cout << " Dim gauge field "<<GridPtrF->Nd()<<std::endl; // 4d
|
||||
// std::cout << " Dim gauge field "<<GridPtrD->Nd()<<std::endl; // 4d
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////
|
||||
// Moving this to a Clone method of fermion operator would allow to duplicate the
|
||||
// physics parameters and decrease gauge field copies
|
||||
////////////////////////////////////////////////////////////////////////////////////
|
||||
GaugeLinkFieldD Umu_d(GridPtrD);
|
||||
for(int mu=0;mu<Nd*2;mu++){
|
||||
Umu_d = PeekIndex<LorentzIndex>(FermOpD.Umu, mu);
|
||||
precisionChange(Umu_f,Umu_d);
|
||||
PokeIndex<LorentzIndex>(FermOpF.Umu, Umu_f, mu);
|
||||
}
|
||||
pickCheckerboard(Even,FermOpF.UmuEven,FermOpF.Umu);
|
||||
pickCheckerboard(Odd ,FermOpF.UmuOdd ,FermOpF.Umu);
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////////
|
||||
// Make a mixed precision conjugate gradient
|
||||
////////////////////////////////////////////////////////////////////////////////////
|
||||
#if 1
|
||||
RealD delta=1.e-4;
|
||||
std::cout << GridLogMessage << "Calling reliable update Conjugate Gradient" <<std::endl;
|
||||
ConjugateGradientReliableUpdate<FieldD,FieldF> MPCG(Tolerance,MaxInnerIterations*MaxOuterIterations,delta,SinglePrecGrid5,LinOpF,LinOpD);
|
||||
#else
|
||||
std::cout << GridLogMessage << "Calling mixed precision Conjugate Gradient" <<std::endl;
|
||||
MixedPrecisionConjugateGradient<FieldD,FieldF> MPCG(Tolerance,MaxInnerIterations,MaxOuterIterations,SinglePrecGrid5,LinOpF,LinOpD);
|
||||
#endif
|
||||
MPCG(src,psi);
|
||||
}
|
||||
};
|
||||
|
||||
NAMESPACE_END(Grid);
|
||||
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
using namespace Grid;
|
||||
|
||||
Grid_init(&argc, &argv);
|
||||
int threads = GridThread::GetThreads();
|
||||
|
||||
// Typedefs to simplify notation
|
||||
typedef WilsonImplR FermionImplPolicy;
|
||||
typedef WilsonImplF FermionImplPolicyF;
|
||||
|
||||
typedef MobiusFermionR FermionAction;
|
||||
typedef MobiusFermionF FermionActionF;
|
||||
typedef typename FermionAction::FermionField FermionField;
|
||||
typedef typename FermionActionF::FermionField FermionFieldF;
|
||||
|
||||
typedef Grid::XmlReader Serialiser;
|
||||
|
||||
//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
|
||||
IntegratorParameters MD;
|
||||
// typedef GenericHMCRunner<LeapFrog> HMCWrapper;
|
||||
// MD.name = std::string("Leap Frog");
|
||||
// typedef GenericHMCRunner<ForceGradient> HMCWrapper;
|
||||
// MD.name = std::string("Force Gradient");
|
||||
typedef GenericHMCRunner<MinimumNorm2> HMCWrapper;
|
||||
MD.name = std::string("MinimumNorm2");
|
||||
MD.MDsteps = 6;
|
||||
MD.trajL = 1.0;
|
||||
|
||||
HMCparameters HMCparams;
|
||||
HMCparams.StartTrajectory = 1077;
|
||||
HMCparams.Trajectories = 1;
|
||||
HMCparams.NoMetropolisUntil= 0;
|
||||
// "[HotStart, ColdStart, TepidStart, CheckpointStart]\n";
|
||||
// HMCparams.StartingType =std::string("ColdStart");
|
||||
HMCparams.StartingType =std::string("CheckpointStart");
|
||||
HMCparams.MD = MD;
|
||||
HMCWrapper TheHMC(HMCparams);
|
||||
|
||||
// Grid from the command line arguments --grid and --mpi
|
||||
TheHMC.Resources.AddFourDimGrid("gauge"); // use default simd lanes decomposition
|
||||
|
||||
CheckpointerParameters CPparams;
|
||||
CPparams.config_prefix = "ckpoint_DDHMC_lat";
|
||||
CPparams.rng_prefix = "ckpoint_DDHMC_rng";
|
||||
CPparams.saveInterval = 1;
|
||||
CPparams.format = "IEEE64BIG";
|
||||
TheHMC.Resources.LoadNerscCheckpointer(CPparams);
|
||||
|
||||
RNGModuleParameters RNGpar;
|
||||
RNGpar.serial_seeds = "1 2 3 4 5";
|
||||
RNGpar.parallel_seeds = "6 7 8 9 10";
|
||||
TheHMC.Resources.SetRNGSeeds(RNGpar);
|
||||
|
||||
// Construct observables
|
||||
// here there is too much indirection
|
||||
typedef PlaquetteMod<HMCWrapper::ImplPolicy> PlaqObs;
|
||||
TheHMC.Resources.AddObservable<PlaqObs>();
|
||||
//////////////////////////////////////////////
|
||||
|
||||
const int Ls = 12;
|
||||
RealD M5 = 1.8;
|
||||
RealD b = 1.5;
|
||||
RealD c = 0.5;
|
||||
Real beta = 2.31;
|
||||
// Real light_mass = 5.4e-4;
|
||||
Real light_mass = 7.8e-4;
|
||||
Real strange_mass = 0.02132;
|
||||
Real pv_mass = 1.0;
|
||||
// std::vector<Real> hasenbusch({ light_mass, 3.8e-3, 0.0145, 0.045, 0.108, 0.25, 0.51 , pv_mass });
|
||||
std::vector<Real> hasenbusch({ light_mass, 5e-3, 0.0145, 0.045, 0.108, 0.25, 0.51 , pv_mass });
|
||||
|
||||
// FIXME:
|
||||
// Same in MC and MD
|
||||
// Need to mix precision too
|
||||
OneFlavourRationalParams SFRp; // Strange
|
||||
SFRp.lo = 4.0e-3;
|
||||
SFRp.hi = 90.0;
|
||||
SFRp.MaxIter = 60000;
|
||||
SFRp.tolerance= 1.0e-8;
|
||||
SFRp.mdtolerance= 1.0e-6;
|
||||
SFRp.degree = 12;
|
||||
SFRp.precision= 50;
|
||||
SFRp.BoundsCheckFreq=0;
|
||||
|
||||
OneFlavourRationalParams OFRp; // Up/down
|
||||
OFRp.lo = 2.0e-5;
|
||||
OFRp.hi = 90.0;
|
||||
OFRp.MaxIter = 60000;
|
||||
OFRp.tolerance= 1.0e-8;
|
||||
OFRp.mdtolerance= 1.0e-6;
|
||||
// OFRp.degree = 20; converges
|
||||
// OFRp.degree = 16;
|
||||
OFRp.degree = 12;
|
||||
OFRp.precision= 80;
|
||||
OFRp.BoundsCheckFreq=0;
|
||||
|
||||
auto GridPtr = TheHMC.Resources.GetCartesian();
|
||||
auto GridRBPtr = TheHMC.Resources.GetRBCartesian();
|
||||
|
||||
typedef SchurDiagMooeeOperator<FermionActionF,FermionFieldF> LinearOperatorF;
|
||||
typedef SchurDiagMooeeOperator<FermionAction ,FermionField > LinearOperatorD;
|
||||
typedef MixedPrecisionConjugateGradientOperatorFunction<MobiusFermionD,MobiusFermionF,LinearOperatorD,LinearOperatorF> MxPCG;
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Domain decomposed
|
||||
////////////////////////////////////////////////////////////////
|
||||
Coordinate latt4 = GridPtr->GlobalDimensions();
|
||||
Coordinate mpi = GridPtr->ProcessorGrid();
|
||||
Coordinate shm;
|
||||
|
||||
GlobalSharedMemory::GetShmDims(mpi,shm);
|
||||
|
||||
Coordinate CommDim(Nd);
|
||||
for(int d=0;d<Nd;d++) CommDim[d]= (mpi[d]/shm[d])>1 ? 1 : 0;
|
||||
|
||||
Coordinate NonDirichlet(Nd+1,0);
|
||||
Coordinate Dirichlet(Nd+1,0);
|
||||
Dirichlet[1] = CommDim[0]*latt4[0]/mpi[0] * shm[0];
|
||||
Dirichlet[2] = CommDim[1]*latt4[1]/mpi[1] * shm[1];
|
||||
Dirichlet[3] = CommDim[2]*latt4[2]/mpi[2] * shm[2];
|
||||
Dirichlet[4] = CommDim[3]*latt4[3]/mpi[3] * shm[3];
|
||||
|
||||
Coordinate Block4(Nd);
|
||||
Block4[0] = Dirichlet[1];
|
||||
Block4[1] = Dirichlet[2];
|
||||
Block4[2] = Dirichlet[3];
|
||||
Block4[3] = Dirichlet[4];
|
||||
|
||||
int Width=3;
|
||||
TheHMC.Resources.SetMomentumFilter(new DDHMCFilter<WilsonImplR::Field>(Block4,Width));
|
||||
|
||||
//////////////////////////
|
||||
// Fermion Grids
|
||||
//////////////////////////
|
||||
auto FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,GridPtr);
|
||||
auto FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,GridPtr);
|
||||
|
||||
Coordinate simdF = GridDefaultSimd(Nd,vComplexF::Nsimd());
|
||||
auto GridPtrF = SpaceTimeGrid::makeFourDimGrid(latt4,simdF,mpi);
|
||||
auto GridRBPtrF = SpaceTimeGrid::makeFourDimRedBlackGrid(GridPtrF);
|
||||
auto FGridF = SpaceTimeGrid::makeFiveDimGrid(Ls,GridPtrF);
|
||||
auto FrbGridF = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,GridPtrF);
|
||||
|
||||
IwasakiGaugeActionR GaugeAction(beta);
|
||||
|
||||
// temporarily need a gauge field
|
||||
LatticeGaugeField U(GridPtr);
|
||||
LatticeGaugeFieldF UF(GridPtrF);
|
||||
|
||||
std::cout << GridLogMessage << " Running the HMC "<< std::endl;
|
||||
TheHMC.ReadCommandLine(argc,argv); // params on CML or from param file
|
||||
TheHMC.initializeGaugeFieldAndRNGs(U);
|
||||
|
||||
|
||||
// These lines are unecessary if BC are all periodic
|
||||
std::vector<Complex> boundary = {1,1,1,-1};
|
||||
FermionAction::ImplParams Params(boundary);
|
||||
Params.dirichlet=NonDirichlet;
|
||||
FermionAction::ImplParams ParamsDir(boundary);
|
||||
ParamsDir.dirichlet=Dirichlet;
|
||||
|
||||
// double StoppingCondition = 1e-14;
|
||||
// double MDStoppingCondition = 1e-9;
|
||||
double StoppingCondition = 1e-10;
|
||||
double MDStoppingCondition = 1e-7;
|
||||
double MDStoppingConditionLoose = 1e-6;
|
||||
double MaxCGIterations = 300000;
|
||||
ConjugateGradient<FermionField> CG(StoppingCondition,MaxCGIterations);
|
||||
ConjugateGradient<FermionField> MDCG(MDStoppingCondition,MaxCGIterations);
|
||||
|
||||
////////////////////////////////////
|
||||
// Collect actions
|
||||
////////////////////////////////////
|
||||
ActionLevel<HMCWrapper::Field> Level1(1);
|
||||
ActionLevel<HMCWrapper::Field> Level2(4);
|
||||
ActionLevel<HMCWrapper::Field> Level3(8);
|
||||
|
||||
////////////////////////////////////
|
||||
// Strange action
|
||||
////////////////////////////////////
|
||||
FermionAction StrangeOp (U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,strange_mass,M5,b,c, Params);
|
||||
FermionAction StrangePauliVillarsOp(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,pv_mass, M5,b,c, Params);
|
||||
|
||||
FermionAction StrangeOpDir (U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,strange_mass,M5,b,c, ParamsDir);
|
||||
FermionAction StrangePauliVillarsOpDir(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,pv_mass, M5,b,c, ParamsDir);
|
||||
|
||||
OneFlavourEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy> StrangePseudoFermionBdy(StrangeOpDir,StrangeOp,SFRp);
|
||||
OneFlavourEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy> StrangePseudoFermionLocal(StrangePauliVillarsOpDir,StrangeOpDir,SFRp);
|
||||
OneFlavourEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy> StrangePseudoFermionPVBdy(StrangePauliVillarsOp,StrangePauliVillarsOpDir,SFRp);
|
||||
Level1.push_back(&StrangePseudoFermionBdy); // ok
|
||||
Level2.push_back(&StrangePseudoFermionLocal);
|
||||
Level1.push_back(&StrangePseudoFermionPVBdy); //ok
|
||||
|
||||
////////////////////////////////////
|
||||
// up down action
|
||||
////////////////////////////////////
|
||||
std::vector<Real> light_den;
|
||||
std::vector<Real> light_num;
|
||||
std::vector<int> dirichlet_den;
|
||||
std::vector<int> dirichlet_num;
|
||||
|
||||
int n_hasenbusch = hasenbusch.size();
|
||||
light_den.push_back(light_mass); dirichlet_den.push_back(0);
|
||||
for(int h=0;h<n_hasenbusch;h++){
|
||||
light_den.push_back(hasenbusch[h]); dirichlet_den.push_back(1);
|
||||
}
|
||||
|
||||
for(int h=0;h<n_hasenbusch;h++){
|
||||
light_num.push_back(hasenbusch[h]); dirichlet_num.push_back(1);
|
||||
}
|
||||
light_num.push_back(pv_mass); dirichlet_num.push_back(0);
|
||||
|
||||
std::vector<FermionAction *> Numerators;
|
||||
std::vector<FermionActionF *> NumeratorsF;
|
||||
std::vector<FermionAction *> Denominators;
|
||||
std::vector<FermionActionF *> DenominatorsF;
|
||||
std::vector<TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy> *> Quotients;
|
||||
|
||||
#define MIXED_PRECISION
|
||||
#ifdef MIXED_PRECISION
|
||||
std::vector<OneFlavourEvenOddRatioRationalMixedPrecPseudoFermionAction<FermionImplPolicy,FermionImplPolicyF> *> Bdys;
|
||||
#else
|
||||
std::vector<OneFlavourEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy> *> Bdys;
|
||||
#endif
|
||||
std::vector<MxPCG *> ActionMPCG;
|
||||
std::vector<MxPCG *> MPCG;
|
||||
|
||||
typedef SchurDiagMooeeOperator<FermionActionF,FermionFieldF> LinearOperatorF;
|
||||
typedef SchurDiagMooeeOperator<FermionAction ,FermionField > LinearOperatorD;
|
||||
std::vector<LinearOperatorD *> LinOpD;
|
||||
std::vector<LinearOperatorF *> LinOpF;
|
||||
|
||||
for(int h=0;h<n_hasenbusch+1;h++){
|
||||
std::cout << GridLogMessage
|
||||
<< " 2f quotient Action ";
|
||||
std::cout << "det D("<<light_den[h]<<")";
|
||||
if ( dirichlet_den[h] ) std::cout << "^dirichlet ";
|
||||
std::cout << "/ det D("<<light_num[h]<<")";
|
||||
if ( dirichlet_num[h] ) std::cout << "^dirichlet ";
|
||||
std::cout << std::endl;
|
||||
|
||||
FermionAction::ImplParams ParamsNum(boundary);
|
||||
FermionAction::ImplParams ParamsDen(boundary);
|
||||
FermionActionF::ImplParams ParamsNumF(boundary);
|
||||
FermionActionF::ImplParams ParamsDenF(boundary);
|
||||
|
||||
if ( dirichlet_num[h]==1) ParamsNum.dirichlet = Dirichlet;
|
||||
else ParamsNum.dirichlet = NonDirichlet;
|
||||
Numerators.push_back (new FermionAction(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_num[h],M5,b,c, ParamsNum));
|
||||
|
||||
if ( dirichlet_den[h]==1) ParamsDen.dirichlet = Dirichlet;
|
||||
else ParamsDen.dirichlet = NonDirichlet;
|
||||
|
||||
Denominators.push_back(new FermionAction(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_den[h],M5,b,c, ParamsDen));
|
||||
|
||||
ParamsDenF.dirichlet = ParamsDen.dirichlet;
|
||||
DenominatorsF.push_back(new FermionActionF(UF,*FGridF,*FrbGridF,*GridPtrF,*GridRBPtrF,light_den[h],M5,b,c, ParamsDenF));
|
||||
|
||||
ParamsNumF.dirichlet = ParamsNum.dirichlet;
|
||||
NumeratorsF.push_back (new FermionActionF(UF,*FGridF,*FrbGridF,*GridPtrF,*GridRBPtrF,light_num[h],M5,b,c, ParamsNumF));
|
||||
|
||||
LinOpD.push_back(new LinearOperatorD(*Denominators[h]));
|
||||
LinOpF.push_back(new LinearOperatorF(*DenominatorsF[h]));
|
||||
|
||||
double conv = MDStoppingCondition;
|
||||
if (h<3) conv= MDStoppingConditionLoose; // Relax on first two hasenbusch factors
|
||||
const int MX_inner = 5000;
|
||||
MPCG.push_back(new MxPCG(conv,
|
||||
MX_inner,
|
||||
MaxCGIterations,
|
||||
GridPtrF,
|
||||
FrbGridF,
|
||||
*DenominatorsF[h],*Denominators[h],
|
||||
*LinOpF[h], *LinOpD[h]) );
|
||||
|
||||
ActionMPCG.push_back(new MxPCG(StoppingCondition,
|
||||
MX_inner,
|
||||
MaxCGIterations,
|
||||
GridPtrF,
|
||||
FrbGridF,
|
||||
*DenominatorsF[h],*Denominators[h],
|
||||
*LinOpF[h], *LinOpD[h]) );
|
||||
|
||||
|
||||
if(h!=0) {
|
||||
// Quotients.push_back (new TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],MDCG,CG));
|
||||
Quotients.push_back (new TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],*MPCG[h],*ActionMPCG[h],CG));
|
||||
} else {
|
||||
#ifdef MIXED_PRECISION
|
||||
Bdys.push_back( new OneFlavourEvenOddRatioRationalMixedPrecPseudoFermionAction<FermionImplPolicy,FermionImplPolicyF>(
|
||||
*Numerators[h],*Denominators[h],
|
||||
*NumeratorsF[h],*DenominatorsF[h],
|
||||
OFRp, 500) );
|
||||
Bdys.push_back( new OneFlavourEvenOddRatioRationalMixedPrecPseudoFermionAction<FermionImplPolicy,FermionImplPolicyF>(
|
||||
*Numerators[h],*Denominators[h],
|
||||
*NumeratorsF[h],*DenominatorsF[h],
|
||||
OFRp, 500) );
|
||||
#else
|
||||
Bdys.push_back( new OneFlavourEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],OFRp));
|
||||
Bdys.push_back( new OneFlavourEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],OFRp));
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
int nquo=Quotients.size();
|
||||
Level1.push_back(Bdys[0]);
|
||||
Level1.push_back(Bdys[1]);
|
||||
for(int h=0;h<nquo-1;h++){
|
||||
Level2.push_back(Quotients[h]);
|
||||
}
|
||||
Level2.push_back(Quotients[nquo-1]);
|
||||
|
||||
/////////////////////////////////////////////////////////////
|
||||
// Gauge action
|
||||
/////////////////////////////////////////////////////////////
|
||||
Level3.push_back(&GaugeAction);
|
||||
TheHMC.TheAction.push_back(Level1);
|
||||
TheHMC.TheAction.push_back(Level2);
|
||||
TheHMC.TheAction.push_back(Level3);
|
||||
std::cout << GridLogMessage << " Action complete "<< std::endl;
|
||||
/////////////////////////////////////////////////////////////
|
||||
|
||||
TheHMC.Run(); // no smearing
|
||||
|
||||
Grid_finalize();
|
||||
} // main
|
||||
|
||||
|
||||
|
@ -420,7 +420,6 @@ public:
|
||||
FGrid->Broadcast(0,&ncall,sizeof(ncall));
|
||||
|
||||
// std::cout << GridLogMessage << " Estimate " << ncall << " calls per second"<<std::endl;
|
||||
Dw.ZeroCounters();
|
||||
|
||||
time_statistics timestat;
|
||||
std::vector<double> t_time(ncall);
|
||||
@ -589,7 +588,6 @@ public:
|
||||
FGrid->Broadcast(0,&ncall,sizeof(ncall));
|
||||
|
||||
// std::cout << GridLogMessage << " Estimate " << ncall << " calls per second"<<std::endl;
|
||||
Ds.ZeroCounters();
|
||||
|
||||
time_statistics timestat;
|
||||
std::vector<double> t_time(ncall);
|
||||
|
@ -186,7 +186,6 @@ int main (int argc, char ** argv)
|
||||
|
||||
if (1) {
|
||||
FGrid->Barrier();
|
||||
Dw.ZeroCounters();
|
||||
Dw.Dhop(src,result,0);
|
||||
std::cout<<GridLogMessage<<"Called warmup"<<std::endl;
|
||||
double t0=usecond();
|
||||
@ -231,7 +230,6 @@ int main (int argc, char ** argv)
|
||||
exit(-1);
|
||||
}
|
||||
assert (norm2(err)< 1.0e-4 );
|
||||
Dw.Report();
|
||||
}
|
||||
|
||||
if (1)
|
||||
@ -306,7 +304,6 @@ int main (int argc, char ** argv)
|
||||
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl;
|
||||
std::cout << GridLogMessage<< "*********************************************************" <<std::endl;
|
||||
{
|
||||
Dw.ZeroCounters();
|
||||
FGrid->Barrier();
|
||||
Dw.DhopEO(src_o,r_e,DaggerNo);
|
||||
double t0=usecond();
|
||||
@ -328,7 +325,6 @@ int main (int argc, char ** argv)
|
||||
std::cout<<GridLogMessage << "Deo mflop/s = "<< flops/(t1-t0)<<std::endl;
|
||||
std::cout<<GridLogMessage << "Deo mflop/s per rank "<< flops/(t1-t0)/NP<<std::endl;
|
||||
std::cout<<GridLogMessage << "Deo mflop/s per node "<< flops/(t1-t0)/NN<<std::endl;
|
||||
Dw.Report();
|
||||
}
|
||||
Dw.DhopEO(src_o,r_e,DaggerNo);
|
||||
Dw.DhopOE(src_e,r_o,DaggerNo);
|
||||
|
@ -93,14 +93,11 @@ int main (int argc, char ** argv)
|
||||
int ncall =1000;
|
||||
if (1) {
|
||||
FGrid->Barrier();
|
||||
Dw.ZeroCounters();
|
||||
Dw.Dhop(src,result,0);
|
||||
std::cout<<GridLogMessage<<"Called warmup"<<std::endl;
|
||||
double t0=usecond();
|
||||
for(int i=0;i<ncall;i++){
|
||||
__SSC_START;
|
||||
Dw.Dhop(src,result,0);
|
||||
__SSC_STOP;
|
||||
}
|
||||
double t1=usecond();
|
||||
FGrid->Barrier();
|
||||
@ -114,7 +111,6 @@ int main (int argc, char ** argv)
|
||||
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
|
||||
std::cout<<GridLogMessage << "mflop/s per rank = "<< flops/(t1-t0)/NP<<std::endl;
|
||||
std::cout<<GridLogMessage << "mflop/s per node = "<< flops/(t1-t0)/NN<<std::endl;
|
||||
Dw.Report();
|
||||
}
|
||||
|
||||
|
||||
@ -136,14 +132,11 @@ int main (int argc, char ** argv)
|
||||
GparityDomainWallFermionD DwD(Umu_d,*FGrid_d,*FrbGrid_d,*UGrid_d,*UrbGrid_d,mass,M5);
|
||||
if (1) {
|
||||
FGrid_d->Barrier();
|
||||
DwD.ZeroCounters();
|
||||
DwD.Dhop(src_d,result_d,0);
|
||||
std::cout<<GridLogMessage<<"Called warmup"<<std::endl;
|
||||
double t0=usecond();
|
||||
for(int i=0;i<ncall;i++){
|
||||
__SSC_START;
|
||||
DwD.Dhop(src_d,result_d,0);
|
||||
__SSC_STOP;
|
||||
}
|
||||
double t1=usecond();
|
||||
FGrid_d->Barrier();
|
||||
@ -157,7 +150,6 @@ int main (int argc, char ** argv)
|
||||
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
|
||||
std::cout<<GridLogMessage << "mflop/s per rank = "<< flops/(t1-t0)/NP<<std::endl;
|
||||
std::cout<<GridLogMessage << "mflop/s per node = "<< flops/(t1-t0)/NN<<std::endl;
|
||||
DwD.Report();
|
||||
}
|
||||
#endif
|
||||
Grid_finalize();
|
||||
|
@ -103,35 +103,30 @@ int main (int argc, char ** argv)
|
||||
#define BENCH_DW(A,...) \
|
||||
Dw. A (__VA_ARGS__); \
|
||||
FGrid->Barrier(); \
|
||||
Dw.CayleyZeroCounters(); \
|
||||
t0=usecond(); \
|
||||
for(int i=0;i<ncall;i++){ \
|
||||
Dw. A (__VA_ARGS__); \
|
||||
} \
|
||||
t1=usecond(); \
|
||||
FGrid->Barrier(); \
|
||||
Dw.CayleyReport(); \
|
||||
std::cout<<GridLogMessage << "Called " #A " "<< (t1-t0)/ncall<<" us"<<std::endl;\
|
||||
std::cout<<GridLogMessage << "******************"<<std::endl;
|
||||
|
||||
#define BENCH_ZDW(A,in,out) \
|
||||
zDw. A (in,out); \
|
||||
FGrid->Barrier(); \
|
||||
zDw.CayleyZeroCounters(); \
|
||||
t0=usecond(); \
|
||||
for(int i=0;i<ncall;i++){ \
|
||||
zDw. A (in,out); \
|
||||
} \
|
||||
t1=usecond(); \
|
||||
FGrid->Barrier(); \
|
||||
zDw.CayleyReport(); \
|
||||
std::cout<<GridLogMessage << "Called ZDw " #A " "<< (t1-t0)/ncall<<" us"<<std::endl;\
|
||||
std::cout<<GridLogMessage << "******************"<<std::endl;
|
||||
|
||||
#define BENCH_DW_SSC(A,in,out) \
|
||||
Dw. A (in,out); \
|
||||
FGrid->Barrier(); \
|
||||
Dw.CayleyZeroCounters(); \
|
||||
t0=usecond(); \
|
||||
for(int i=0;i<ncall;i++){ \
|
||||
__SSC_START ; \
|
||||
@ -140,7 +135,6 @@ int main (int argc, char ** argv)
|
||||
} \
|
||||
t1=usecond(); \
|
||||
FGrid->Barrier(); \
|
||||
Dw.CayleyReport(); \
|
||||
std::cout<<GridLogMessage << "Called " #A " "<< (t1-t0)/ncall<<" us"<<std::endl;\
|
||||
std::cout<<GridLogMessage << "******************"<<std::endl;
|
||||
|
||||
|
@ -155,7 +155,6 @@ int main (int argc, char ** argv)
|
||||
//int ncall=1;
|
||||
|
||||
// Counters
|
||||
Dw.ZeroCounters();
|
||||
Grid.Barrier();
|
||||
|
||||
double t0=usecond();
|
||||
@ -201,7 +200,6 @@ int main (int argc, char ** argv)
|
||||
err = ref-result;
|
||||
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
|
||||
|
||||
Dw.Report();
|
||||
|
||||
// guard
|
||||
double err0 = norm2(err);
|
||||
|
20
configure.ac
20
configure.ac
@ -128,6 +128,26 @@ case ${ac_LAPACK} in
|
||||
AC_DEFINE([USE_LAPACK],[1],[use LAPACK]);;
|
||||
esac
|
||||
|
||||
############### tracing
|
||||
AC_ARG_ENABLE([tracing],
|
||||
[AC_HELP_STRING([--enable-tracing=none|nvtx|roctx|timer], [enable tracing])],
|
||||
[ac_TRACING=${enable_tracing}], [ac_TRACING=none])
|
||||
|
||||
case ${ac_TRACING} in
|
||||
nvtx)
|
||||
AC_DEFINE([GRID_TRACING_NVTX],[1],[use NVTX])
|
||||
LIBS="${LIBS} -lnvToolsExt64_1"
|
||||
;;
|
||||
roctx)
|
||||
AC_DEFINE([GRID_TRACING_ROCTX],[1],[use ROCTX])
|
||||
LIBS="${LIBS} -lroctx64"
|
||||
;;
|
||||
timer)
|
||||
AC_DEFINE([GRID_TRACING_TIMER],[1],[use TIMER]);;
|
||||
*)
|
||||
AC_DEFINE([GRID_TRACING_NONE],[1],[no tracing]);;
|
||||
esac
|
||||
|
||||
############### fermions
|
||||
AC_ARG_ENABLE([fermion-reps],
|
||||
[AC_HELP_STRING([--enable-fermion-reps=yes|no], [enable extra fermion representation support])],
|
||||
|
436
examples/Example_christoph.cc
Normal file
436
examples/Example_christoph.cc
Normal file
@ -0,0 +1,436 @@
|
||||
/*
|
||||
* Warning: This code illustrative only: not well tested, and not meant for production use
|
||||
* without regression / tests being applied
|
||||
*/
|
||||
|
||||
#include <Grid/Grid.h>
|
||||
|
||||
using namespace std;
|
||||
using namespace Grid;
|
||||
|
||||
RealD LLscale =1.0;
|
||||
RealD LCscale =1.0;
|
||||
|
||||
template<class Gimpl,class Field> class CovariantLaplacianCshift : public SparseMatrixBase<Field>
|
||||
{
|
||||
public:
|
||||
INHERIT_GIMPL_TYPES(Gimpl);
|
||||
|
||||
GridBase *grid;
|
||||
GaugeField U;
|
||||
|
||||
CovariantLaplacianCshift(GaugeField &_U) :
|
||||
grid(_U.Grid()),
|
||||
U(_U) { };
|
||||
|
||||
virtual GridBase *Grid(void) { return grid; };
|
||||
|
||||
virtual void M (const Field &in, Field &out)
|
||||
{
|
||||
out=Zero();
|
||||
for(int mu=0;mu<Nd-1;mu++) {
|
||||
GaugeLinkField Umu = PeekIndex<LorentzIndex>(U, mu); // NB: Inefficent
|
||||
out = out - Gimpl::CovShiftForward(Umu,mu,in);
|
||||
out = out - Gimpl::CovShiftBackward(Umu,mu,in);
|
||||
out = out + 2.0*in;
|
||||
}
|
||||
};
|
||||
virtual void Mdag (const Field &in, Field &out) { M(in,out);}; // Laplacian is hermitian
|
||||
virtual void Mdiag (const Field &in, Field &out) {assert(0);}; // Unimplemented need only for multigrid
|
||||
virtual void Mdir (const Field &in, Field &out,int dir, int disp){assert(0);}; // Unimplemented need only for multigrid
|
||||
virtual void MdirAll (const Field &in, std::vector<Field> &out) {assert(0);}; // Unimplemented need only for multigrid
|
||||
};
|
||||
|
||||
void MakePhase(Coordinate mom,LatticeComplex &phase)
|
||||
{
|
||||
GridBase *grid = phase.Grid();
|
||||
auto latt_size = grid->GlobalDimensions();
|
||||
ComplexD ci(0.0,1.0);
|
||||
phase=Zero();
|
||||
|
||||
LatticeComplex coor(phase.Grid());
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
RealD TwoPiL = M_PI * 2.0/ latt_size[mu];
|
||||
LatticeCoordinate(coor,mu);
|
||||
phase = phase + (TwoPiL * mom[mu]) * coor;
|
||||
}
|
||||
phase = exp(phase*ci);
|
||||
}
|
||||
|
||||
void PointSource(Coordinate &coor,LatticePropagator &source)
|
||||
{
|
||||
// Coordinate coor({0,0,0,0});
|
||||
source=Zero();
|
||||
SpinColourMatrix kronecker; kronecker=1.0;
|
||||
pokeSite(kronecker,source,coor);
|
||||
}
|
||||
void Z2WallSource(GridParallelRNG &RNG,int tslice,LatticePropagator &source)
|
||||
{
|
||||
GridBase *grid = source.Grid();
|
||||
LatticeComplex noise(grid);
|
||||
LatticeComplex zz(grid); zz=Zero();
|
||||
LatticeInteger t(grid);
|
||||
|
||||
RealD nrm=1.0/sqrt(2);
|
||||
bernoulli(RNG, noise); // 0,1 50:50
|
||||
|
||||
noise = (2.*noise - Complex(1,1))*nrm;
|
||||
|
||||
LatticeCoordinate(t,Tdir);
|
||||
noise = where(t==Integer(tslice), noise, zz);
|
||||
|
||||
source = 1.0;
|
||||
source = source*noise;
|
||||
std::cout << " Z2 wall " << norm2(source) << std::endl;
|
||||
}
|
||||
template<class Field>
|
||||
void GaussianSmear(LatticeGaugeField &U,Field &unsmeared,Field &smeared)
|
||||
{
|
||||
typedef CovariantLaplacianCshift <PeriodicGimplR,Field> Laplacian_t;
|
||||
Laplacian_t Laplacian(U);
|
||||
|
||||
Integer Iterations = 40;
|
||||
Real width = 2.0;
|
||||
Real coeff = (width*width) / Real(4*Iterations);
|
||||
|
||||
Field tmp(U.Grid());
|
||||
smeared=unsmeared;
|
||||
// chi = (1-p^2/2N)^N kronecker
|
||||
for(int n = 0; n < Iterations; ++n) {
|
||||
Laplacian.M(smeared,tmp);
|
||||
smeared = smeared - coeff*tmp;
|
||||
std::cout << " smear iter " << n<<" " <<norm2(smeared)<<std::endl;
|
||||
}
|
||||
}
|
||||
void GaussianSource(Coordinate &site,LatticeGaugeField &U,LatticePropagator &source)
|
||||
{
|
||||
LatticePropagator tmp(source.Grid());
|
||||
PointSource(site,source);
|
||||
std::cout << " GaussianSource Kronecker "<< norm2(source)<<std::endl;
|
||||
tmp = source;
|
||||
GaussianSmear(U,tmp,source);
|
||||
std::cout << " GaussianSource Smeared "<< norm2(source)<<std::endl;
|
||||
}
|
||||
void GaussianWallSource(GridParallelRNG &RNG,int tslice,LatticeGaugeField &U,LatticePropagator &source)
|
||||
{
|
||||
Z2WallSource(RNG,tslice,source);
|
||||
auto tmp = source;
|
||||
GaussianSmear(U,tmp,source);
|
||||
}
|
||||
void SequentialSource(int tslice,Coordinate &mom,LatticePropagator &spectator,LatticePropagator &source)
|
||||
{
|
||||
assert(mom.size()==Nd);
|
||||
assert(mom[Tdir] == 0);
|
||||
|
||||
GridBase * grid = spectator.Grid();
|
||||
|
||||
|
||||
LatticeInteger ts(grid);
|
||||
LatticeCoordinate(ts,Tdir);
|
||||
source = Zero();
|
||||
source = where(ts==Integer(tslice),spectator,source); // Stick in a slice of the spectator, zero everywhere else
|
||||
|
||||
LatticeComplex phase(grid);
|
||||
MakePhase(mom,phase);
|
||||
|
||||
source = source *phase;
|
||||
}
|
||||
|
||||
template<class Action>
|
||||
void MasslessFreePropagator(Action &D,LatticePropagator &source,LatticePropagator &propagator)
|
||||
{
|
||||
GridBase *UGrid = source.Grid();
|
||||
GridBase *FGrid = D.FermionGrid();
|
||||
bool fiveD = true; //calculate 5d free propagator
|
||||
RealD mass = D.Mass();
|
||||
LatticeFermion src4 (UGrid);
|
||||
LatticeFermion result4 (UGrid);
|
||||
LatticeFermion result5(FGrid);
|
||||
LatticeFermion src5(FGrid);
|
||||
LatticePropagator prop5(FGrid);
|
||||
for(int s=0;s<Nd;s++){
|
||||
for(int c=0;c<Nc;c++){
|
||||
|
||||
PropToFerm<Action>(src4,source,s,c);
|
||||
|
||||
D.ImportPhysicalFermionSource(src4,src5);
|
||||
D.FreePropagator(src5,result5,mass,true);
|
||||
std::cout<<GridLogMessage
|
||||
<<"Free 5D prop spin "<<s<<" color "<<c
|
||||
<<" norm2(src5d) " <<norm2(src5)
|
||||
<<" norm2(result5d) "<<norm2(result5)<<std::endl;
|
||||
|
||||
D.ExportPhysicalFermionSolution(result5,result4);
|
||||
|
||||
FermToProp<Action>(prop5,result5,s,c);
|
||||
FermToProp<Action>(propagator,result4,s,c);
|
||||
}
|
||||
}
|
||||
|
||||
LatticePropagator Vector_mu(UGrid);
|
||||
LatticeComplex VV (UGrid);
|
||||
std::vector<TComplex> sumVV;
|
||||
Gamma::Algebra GammaV[3] = {
|
||||
Gamma::Algebra::GammaX,
|
||||
Gamma::Algebra::GammaY,
|
||||
Gamma::Algebra::GammaZ
|
||||
};
|
||||
for( int mu=0;mu<3;mu++ ) {
|
||||
Gamma gV(GammaV[mu]);
|
||||
D.ContractConservedCurrent(prop5,prop5,Vector_mu,source,Current::Vector,mu);
|
||||
VV = trace(gV*Vector_mu); // (local) Vector-Vector conserved current
|
||||
sliceSum(VV,sumVV,Tdir);
|
||||
int Nt = sumVV.size();
|
||||
for(int t=0;t<Nt;t++){
|
||||
RealD Ct = real(TensorRemove(sumVV[t]))*LCscale;
|
||||
RealD Cont=0;
|
||||
if(t) Cont=1.0/(2 * M_PI *M_PI * t*t*t);
|
||||
std::cout<<GridLogMessage <<"VVc["<<mu<<"]["<<t<<"] "<< Ct
|
||||
<< " 2 pi^2 t^3 C(t) "<< Ct/Cont << " delta Ct "<< Ct-Cont <<std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
template<class Action>
|
||||
void MasslessFreePropagator1(Action &D,LatticePropagator &source,LatticePropagator &propagator)
|
||||
{
|
||||
bool fiveD = false; //calculate 4d free propagator
|
||||
RealD mass = D.Mass();
|
||||
GridBase *UGrid = source.Grid();
|
||||
LatticeFermion src4 (UGrid);
|
||||
LatticeFermion result4 (UGrid);
|
||||
for(int s=0;s<Nd;s++){
|
||||
for(int c=0;c<Nc;c++){
|
||||
PropToFerm<Action>(src4,source,s,c);
|
||||
D.FreePropagator(src4,result4,mass,false);
|
||||
FermToProp<Action>(propagator,result4,s,c);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<class Action>
|
||||
void Solve(Action &D,LatticePropagator &source,LatticePropagator &propagator)
|
||||
{
|
||||
GridBase *UGrid = D.GaugeGrid();
|
||||
GridBase *FGrid = D.FermionGrid();
|
||||
|
||||
LatticeFermion src4 (UGrid);
|
||||
LatticeFermion src5 (FGrid);
|
||||
LatticeFermion result5(FGrid);
|
||||
LatticeFermion result4(UGrid);
|
||||
LatticePropagator prop5(FGrid);
|
||||
|
||||
ConjugateGradient<LatticeFermion> CG(1.0e-7,100000);
|
||||
SchurRedBlackDiagMooeeSolve<LatticeFermion> schur(CG);
|
||||
ZeroGuesser<LatticeFermion> ZG; // Could be a DeflatedGuesser if have eigenvectors
|
||||
for(int s=0;s<Nd;s++){
|
||||
for(int c=0;c<Nc;c++){
|
||||
PropToFerm<Action>(src4,source,s,c);
|
||||
|
||||
D.ImportPhysicalFermionSource(src4,src5);
|
||||
|
||||
result5=Zero();
|
||||
schur(D,src5,result5,ZG);
|
||||
std::cout<<GridLogMessage
|
||||
<<"spin "<<s<<" color "<<c
|
||||
<<" norm2(src5d) " <<norm2(src5)
|
||||
<<" norm2(result5d) "<<norm2(result5)<<std::endl;
|
||||
|
||||
D.ExportPhysicalFermionSolution(result5,result4);
|
||||
|
||||
FermToProp<Action>(prop5,result5,s,c);
|
||||
FermToProp<Action>(propagator,result4,s,c);
|
||||
}
|
||||
}
|
||||
LatticePropagator Axial_mu(UGrid);
|
||||
LatticePropagator Vector_mu(UGrid);
|
||||
|
||||
LatticeComplex PA (UGrid);
|
||||
LatticeComplex VV (UGrid);
|
||||
LatticeComplex PJ5q(UGrid);
|
||||
LatticeComplex PP (UGrid);
|
||||
|
||||
std::vector<TComplex> sumPA;
|
||||
std::vector<TComplex> sumVV;
|
||||
std::vector<TComplex> sumPP;
|
||||
std::vector<TComplex> sumPJ5q;
|
||||
|
||||
Gamma g5(Gamma::Algebra::Gamma5);
|
||||
D.ContractConservedCurrent(prop5,prop5,Axial_mu,source,Current::Axial,Tdir);
|
||||
PA = trace(g5*Axial_mu); // Pseudoscalar-Axial conserved current
|
||||
sliceSum(PA,sumPA,Tdir);
|
||||
|
||||
int Nt{static_cast<int>(sumPA.size())};
|
||||
|
||||
for(int t=0;t<Nt;t++) std::cout<<GridLogMessage <<"PAc["<<t<<"] "<<real(TensorRemove(sumPA[t]))*LCscale<<std::endl;
|
||||
|
||||
PP = trace(adj(propagator)*propagator); // Pseudoscalar density
|
||||
sliceSum(PP,sumPP,Tdir);
|
||||
for(int t=0;t<Nt;t++) std::cout<<GridLogMessage <<"PP["<<t<<"] "<<real(TensorRemove(sumPP[t]))*LCscale<<std::endl;
|
||||
|
||||
D.ContractJ5q(prop5,PJ5q);
|
||||
sliceSum(PJ5q,sumPJ5q,Tdir);
|
||||
for(int t=0;t<Nt;t++) std::cout<<GridLogMessage <<"PJ5q["<<t<<"] "<<real(TensorRemove(sumPJ5q[t]))<<std::endl;
|
||||
|
||||
Gamma::Algebra GammaV[3] = {
|
||||
Gamma::Algebra::GammaX,
|
||||
Gamma::Algebra::GammaY,
|
||||
Gamma::Algebra::GammaZ
|
||||
};
|
||||
for( int mu=0;mu<3;mu++ ) {
|
||||
Gamma gV(GammaV[mu]);
|
||||
D.ContractConservedCurrent(prop5,prop5,Vector_mu,source,Current::Vector,mu);
|
||||
// auto ss=sliceSum(Vector_mu,Tdir);
|
||||
// for(int t=0;t<Nt;t++) std::cout<<GridLogMessage <<"ss["<<mu<<"]["<<t<<"] "<<ss[t]<<std::endl;
|
||||
VV = trace(gV*Vector_mu); // (local) Vector-Vector conserved current
|
||||
sliceSum(VV,sumVV,Tdir);
|
||||
for(int t=0;t<Nt;t++){
|
||||
RealD Ct = real(TensorRemove(sumVV[t]))*LCscale;
|
||||
RealD Cont=0;
|
||||
if(t) Cont=1.0/(2 * M_PI *M_PI * t*t*t);
|
||||
std::cout<<GridLogMessage <<"VVc["<<mu<<"]["<<t<<"] "<< Ct
|
||||
<< " 2 pi^2 t^3 C(t) "<< Ct/Cont << " delta Ct "<< Ct-Cont <<std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
class MesonFile: Serializable {
|
||||
public:
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(MesonFile, std::vector<std::vector<Complex> >, data);
|
||||
};
|
||||
|
||||
void MesonTrace(std::string file,LatticePropagator &q1,LatticePropagator &q2,LatticeComplex &phase)
|
||||
{
|
||||
const int nchannel=4;
|
||||
Gamma::Algebra Gammas[nchannel][2] = {
|
||||
{Gamma::Algebra::GammaXGamma5,Gamma::Algebra::GammaXGamma5},
|
||||
{Gamma::Algebra::GammaYGamma5,Gamma::Algebra::GammaYGamma5},
|
||||
{Gamma::Algebra::GammaZGamma5,Gamma::Algebra::GammaZGamma5},
|
||||
{Gamma::Algebra::Identity,Gamma::Algebra::Identity}
|
||||
};
|
||||
|
||||
LatticeComplex meson_CF(q1.Grid());
|
||||
MesonFile MF;
|
||||
|
||||
for(int ch=0;ch<nchannel;ch++){
|
||||
|
||||
Gamma Gsrc(Gammas[ch][0]);
|
||||
Gamma Gsnk(Gammas[ch][1]);
|
||||
|
||||
meson_CF = trace(adj(q1)*Gsnk*q2*adj(Gsrc));
|
||||
|
||||
std::vector<TComplex> meson_T;
|
||||
sliceSum(meson_CF,meson_T, Tdir);
|
||||
|
||||
int nt=meson_T.size();
|
||||
|
||||
std::vector<Complex> corr(nt);
|
||||
for(int t=0;t<nt;t++){
|
||||
corr[t] = TensorRemove(meson_T[t])*LLscale; // Yes this is ugly, not figured a work around
|
||||
RealD Ct = real(corr[t]);
|
||||
RealD Cont=0;
|
||||
if(t) Cont=1.0/(2 * M_PI *M_PI * t*t*t);
|
||||
std::cout << " channel "<<ch<<" t "<<t<<" " <<real(corr[t])<< " 2 pi^2 t^3 C(t) "<< 2 * M_PI *M_PI * t*t*t * Ct
|
||||
<< " deltaC " <<Ct-Cont<<std::endl;
|
||||
}
|
||||
MF.data.push_back(corr);
|
||||
}
|
||||
|
||||
{
|
||||
XmlWriter WR(file);
|
||||
write(WR,"MesonFile",MF);
|
||||
}
|
||||
}
|
||||
|
||||
int main (int argc, char ** argv)
|
||||
{
|
||||
|
||||
Grid_init(&argc,&argv);
|
||||
int Ls= atoi(getenv("Ls"));
|
||||
|
||||
// Double precision grids
|
||||
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(),
|
||||
GridDefaultSimd(Nd,vComplex::Nsimd()),
|
||||
GridDefaultMpi());
|
||||
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
||||
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
|
||||
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// You can manage seeds however you like.
|
||||
// Recommend SeedUniqueString.
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// std::vector<int> seeds4({1,2,3,4});
|
||||
// GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
|
||||
|
||||
LatticeGaugeField Umu(UGrid);
|
||||
std::string config;
|
||||
RealD M5=atof(getenv("M5"));
|
||||
RealD mq = atof(getenv("mass"));
|
||||
int point_x = atoi(getenv("point_x"));
|
||||
int point_y = atoi(getenv("point_y"));
|
||||
int point_z = atoi(getenv("point_z"));
|
||||
int point_t = atoi(getenv("point_t"));
|
||||
std::vector<RealD> masses({ mq} ); // u/d, s, c ??
|
||||
if( argc > 1 && argv[1][0] != '-' )
|
||||
{
|
||||
std::cout<<GridLogMessage <<"Loading configuration from "<<argv[1]<<std::endl;
|
||||
FieldMetaData header;
|
||||
NerscIO::readConfiguration(Umu, header, argv[1]);
|
||||
config=argv[1];
|
||||
LLscale = 1.0;
|
||||
LCscale = 1.0;
|
||||
} else {
|
||||
printf("Expected a configuration");
|
||||
exit(0);
|
||||
}
|
||||
|
||||
int nmass = masses.size();
|
||||
|
||||
typedef MobiusFermionR FermionActionR;
|
||||
std::vector<FermionActionR *> FermActs;
|
||||
|
||||
std::cout<<GridLogMessage <<"======================"<<std::endl;
|
||||
std::cout<<GridLogMessage <<"DomainWallFermion action"<<std::endl;
|
||||
std::cout<<GridLogMessage <<"======================"<<std::endl;
|
||||
|
||||
for(auto mass: masses) {
|
||||
std::vector<Complex> boundary = {1,1,1,-1};
|
||||
FermionActionR::ImplParams Params(boundary);
|
||||
RealD b=1.5;
|
||||
RealD c=0.5;
|
||||
FermActs.push_back(new FermionActionR(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,b,c));
|
||||
}
|
||||
|
||||
LatticePropagator point_source(UGrid);
|
||||
|
||||
Coordinate Origin({point_x,point_y,point_z,point_t});
|
||||
PointSource (Origin,point_source);
|
||||
|
||||
std::vector<LatticePropagator> PointProps(nmass,UGrid);
|
||||
|
||||
for(int m=0;m<nmass;m++) {
|
||||
Solve(*FermActs[m],point_source ,PointProps[m]);
|
||||
}
|
||||
|
||||
LatticeComplex phase(UGrid);
|
||||
Coordinate mom({0,0,0,0});
|
||||
MakePhase(mom,phase);
|
||||
|
||||
for(int m1=0 ;m1<nmass;m1++) {
|
||||
for(int m2=m1;m2<nmass;m2++) {
|
||||
std::stringstream ssp,ssg,ssz;
|
||||
|
||||
ssp<<config<< "_m" << m1 << "_m"<< m2 << "_point_meson.xml";
|
||||
ssz<<config<< "_m" << m1 << "_m"<< m2 << "_free_meson.xml";
|
||||
|
||||
std::cout << "CG determined VV correlation function"<<std::endl;
|
||||
MesonTrace(ssp.str(),PointProps[m1],PointProps[m2],phase);
|
||||
|
||||
}}
|
||||
|
||||
Grid_finalize();
|
||||
}
|
||||
|
||||
|
||||
|
479
examples/Example_taku1.cc
Normal file
479
examples/Example_taku1.cc
Normal file
@ -0,0 +1,479 @@
|
||||
/*
|
||||
* Warning: This code illustrative only: not well tested, and not meant for production use
|
||||
* without regression / tests being applied
|
||||
*/
|
||||
|
||||
#include <Grid/Grid.h>
|
||||
|
||||
using namespace std;
|
||||
using namespace Grid;
|
||||
|
||||
RealD LLscale =1.0;
|
||||
RealD LCscale =1.0;
|
||||
|
||||
template<class Gimpl,class Field> class CovariantLaplacianCshift : public SparseMatrixBase<Field>
|
||||
{
|
||||
public:
|
||||
INHERIT_GIMPL_TYPES(Gimpl);
|
||||
|
||||
GridBase *grid;
|
||||
GaugeField U;
|
||||
|
||||
CovariantLaplacianCshift(GaugeField &_U) :
|
||||
grid(_U.Grid()),
|
||||
U(_U) { };
|
||||
|
||||
virtual GridBase *Grid(void) { return grid; };
|
||||
|
||||
virtual void M (const Field &in, Field &out)
|
||||
{
|
||||
out=Zero();
|
||||
for(int mu=0;mu<Nd-1;mu++) {
|
||||
GaugeLinkField Umu = PeekIndex<LorentzIndex>(U, mu); // NB: Inefficent
|
||||
out = out - Gimpl::CovShiftForward(Umu,mu,in);
|
||||
out = out - Gimpl::CovShiftBackward(Umu,mu,in);
|
||||
out = out + 2.0*in;
|
||||
}
|
||||
};
|
||||
virtual void Mdag (const Field &in, Field &out) { M(in,out);}; // Laplacian is hermitian
|
||||
virtual void Mdiag (const Field &in, Field &out) {assert(0);}; // Unimplemented need only for multigrid
|
||||
virtual void Mdir (const Field &in, Field &out,int dir, int disp){assert(0);}; // Unimplemented need only for multigrid
|
||||
virtual void MdirAll (const Field &in, std::vector<Field> &out) {assert(0);}; // Unimplemented need only for multigrid
|
||||
};
|
||||
|
||||
void MakePhase(Coordinate mom,LatticeComplex &phase)
|
||||
{
|
||||
GridBase *grid = phase.Grid();
|
||||
auto latt_size = grid->GlobalDimensions();
|
||||
ComplexD ci(0.0,1.0);
|
||||
phase=Zero();
|
||||
|
||||
LatticeComplex coor(phase.Grid());
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
RealD TwoPiL = M_PI * 2.0/ latt_size[mu];
|
||||
LatticeCoordinate(coor,mu);
|
||||
phase = phase + (TwoPiL * mom[mu]) * coor;
|
||||
}
|
||||
phase = exp(phase*ci);
|
||||
}
|
||||
|
||||
void PointSource(Coordinate &coor,LatticePropagator &source)
|
||||
{
|
||||
// Coordinate coor({0,0,0,0});
|
||||
source=Zero();
|
||||
SpinColourMatrix kronecker; kronecker=1.0;
|
||||
pokeSite(kronecker,source,coor);
|
||||
}
|
||||
void Z2WallSource(GridParallelRNG &RNG,int tslice,LatticePropagator &source)
|
||||
{
|
||||
GridBase *grid = source.Grid();
|
||||
LatticeComplex noise(grid);
|
||||
LatticeComplex zz(grid); zz=Zero();
|
||||
LatticeInteger t(grid);
|
||||
|
||||
RealD nrm=1.0/sqrt(2);
|
||||
bernoulli(RNG, noise); // 0,1 50:50
|
||||
|
||||
noise = (2.*noise - Complex(1,1))*nrm;
|
||||
|
||||
LatticeCoordinate(t,Tdir);
|
||||
noise = where(t==Integer(tslice), noise, zz);
|
||||
|
||||
source = 1.0;
|
||||
source = source*noise;
|
||||
std::cout << " Z2 wall " << norm2(source) << std::endl;
|
||||
}
|
||||
template<class Field>
|
||||
void GaussianSmear(LatticeGaugeField &U,Field &unsmeared,Field &smeared)
|
||||
{
|
||||
typedef CovariantLaplacianCshift <PeriodicGimplR,Field> Laplacian_t;
|
||||
Laplacian_t Laplacian(U);
|
||||
|
||||
Integer Iterations = 40;
|
||||
Real width = 2.0;
|
||||
Real coeff = (width*width) / Real(4*Iterations);
|
||||
|
||||
Field tmp(U.Grid());
|
||||
smeared=unsmeared;
|
||||
// chi = (1-p^2/2N)^N kronecker
|
||||
for(int n = 0; n < Iterations; ++n) {
|
||||
Laplacian.M(smeared,tmp);
|
||||
smeared = smeared - coeff*tmp;
|
||||
std::cout << " smear iter " << n<<" " <<norm2(smeared)<<std::endl;
|
||||
}
|
||||
}
|
||||
void GaussianSource(Coordinate &site,LatticeGaugeField &U,LatticePropagator &source)
|
||||
{
|
||||
LatticePropagator tmp(source.Grid());
|
||||
PointSource(site,source);
|
||||
std::cout << " GaussianSource Kronecker "<< norm2(source)<<std::endl;
|
||||
tmp = source;
|
||||
GaussianSmear(U,tmp,source);
|
||||
std::cout << " GaussianSource Smeared "<< norm2(source)<<std::endl;
|
||||
}
|
||||
void GaussianWallSource(GridParallelRNG &RNG,int tslice,LatticeGaugeField &U,LatticePropagator &source)
|
||||
{
|
||||
Z2WallSource(RNG,tslice,source);
|
||||
auto tmp = source;
|
||||
GaussianSmear(U,tmp,source);
|
||||
}
|
||||
void SequentialSource(int tslice,Coordinate &mom,LatticePropagator &spectator,LatticePropagator &source)
|
||||
{
|
||||
assert(mom.size()==Nd);
|
||||
assert(mom[Tdir] == 0);
|
||||
|
||||
GridBase * grid = spectator.Grid();
|
||||
|
||||
|
||||
LatticeInteger ts(grid);
|
||||
LatticeCoordinate(ts,Tdir);
|
||||
source = Zero();
|
||||
source = where(ts==Integer(tslice),spectator,source); // Stick in a slice of the spectator, zero everywhere else
|
||||
|
||||
LatticeComplex phase(grid);
|
||||
MakePhase(mom,phase);
|
||||
|
||||
source = source *phase;
|
||||
}
|
||||
|
||||
template<class Action>
|
||||
void MasslessFreePropagator(Action &D,LatticePropagator &source,LatticePropagator &propagator)
|
||||
{
|
||||
GridBase *UGrid = source.Grid();
|
||||
GridBase *FGrid = D.FermionGrid();
|
||||
bool fiveD = true; //calculate 5d free propagator
|
||||
RealD mass = D.Mass();
|
||||
LatticeFermion src4 (UGrid);
|
||||
LatticeFermion result4 (UGrid);
|
||||
LatticeFermion result5(FGrid);
|
||||
LatticeFermion src5(FGrid);
|
||||
LatticePropagator prop5(FGrid);
|
||||
for(int s=0;s<Nd;s++){
|
||||
for(int c=0;c<Nc;c++){
|
||||
|
||||
PropToFerm<Action>(src4,source,s,c);
|
||||
|
||||
D.ImportPhysicalFermionSource(src4,src5);
|
||||
D.FreePropagator(src5,result5,mass,true);
|
||||
std::cout<<GridLogMessage
|
||||
<<"Free 5D prop spin "<<s<<" color "<<c
|
||||
<<" norm2(src5d) " <<norm2(src5)
|
||||
<<" norm2(result5d) "<<norm2(result5)<<std::endl;
|
||||
|
||||
D.ExportPhysicalFermionSolution(result5,result4);
|
||||
|
||||
FermToProp<Action>(prop5,result5,s,c);
|
||||
FermToProp<Action>(propagator,result4,s,c);
|
||||
}
|
||||
}
|
||||
|
||||
LatticePropagator Vector_mu(UGrid);
|
||||
LatticeComplex VV (UGrid);
|
||||
std::vector<TComplex> sumVV;
|
||||
Gamma::Algebra GammaV[3] = {
|
||||
Gamma::Algebra::GammaX,
|
||||
Gamma::Algebra::GammaY,
|
||||
Gamma::Algebra::GammaZ
|
||||
};
|
||||
for( int mu=0;mu<3;mu++ ) {
|
||||
Gamma gV(GammaV[mu]);
|
||||
D.ContractConservedCurrent(prop5,prop5,Vector_mu,source,Current::Vector,mu);
|
||||
VV = trace(gV*Vector_mu); // (local) Vector-Vector conserved current
|
||||
sliceSum(VV,sumVV,Tdir);
|
||||
int Nt = sumVV.size();
|
||||
for(int t=0;t<Nt;t++){
|
||||
RealD Ct = real(TensorRemove(sumVV[t]))*LCscale;
|
||||
RealD Cont=0;
|
||||
if(t) Cont=1.0/(2 * M_PI *M_PI * t*t*t);
|
||||
std::cout<<GridLogMessage <<"VVc["<<mu<<"]["<<t<<"] "<< Ct
|
||||
<< " 2 pi^2 t^3 C(t) "<< Ct/Cont << " delta Ct "<< Ct-Cont <<std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
template<class Action>
|
||||
void MasslessFreePropagator1(Action &D,LatticePropagator &source,LatticePropagator &propagator)
|
||||
{
|
||||
bool fiveD = false; //calculate 4d free propagator
|
||||
RealD mass = D.Mass();
|
||||
GridBase *UGrid = source.Grid();
|
||||
LatticeFermion src4 (UGrid);
|
||||
LatticeFermion result4 (UGrid);
|
||||
for(int s=0;s<Nd;s++){
|
||||
for(int c=0;c<Nc;c++){
|
||||
PropToFerm<Action>(src4,source,s,c);
|
||||
D.FreePropagator(src4,result4,mass,false);
|
||||
FermToProp<Action>(propagator,result4,s,c);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<class Action>
|
||||
void Solve(Action &D,LatticePropagator &source,LatticePropagator &propagator)
|
||||
{
|
||||
GridBase *UGrid = D.GaugeGrid();
|
||||
GridBase *FGrid = D.FermionGrid();
|
||||
|
||||
LatticeFermion src4 (UGrid);
|
||||
LatticeFermion src5 (FGrid);
|
||||
LatticeFermion result5(FGrid);
|
||||
LatticeFermion result4(UGrid);
|
||||
LatticePropagator prop5(FGrid);
|
||||
|
||||
ConjugateGradient<LatticeFermion> CG(1.0e-10,100000);
|
||||
SchurRedBlackDiagMooeeSolve<LatticeFermion> schur(CG);
|
||||
ZeroGuesser<LatticeFermion> ZG; // Could be a DeflatedGuesser if have eigenvectors
|
||||
for(int s=0;s<Nd;s++){
|
||||
for(int c=0;c<Nc;c++){
|
||||
PropToFerm<Action>(src4,source,s,c);
|
||||
|
||||
D.ImportPhysicalFermionSource(src4,src5);
|
||||
|
||||
result5=Zero();
|
||||
schur(D,src5,result5,ZG);
|
||||
std::cout<<GridLogMessage
|
||||
<<"spin "<<s<<" color "<<c
|
||||
<<" norm2(src5d) " <<norm2(src5)
|
||||
<<" norm2(result5d) "<<norm2(result5)<<std::endl;
|
||||
|
||||
D.ExportPhysicalFermionSolution(result5,result4);
|
||||
|
||||
FermToProp<Action>(prop5,result5,s,c);
|
||||
FermToProp<Action>(propagator,result4,s,c);
|
||||
}
|
||||
}
|
||||
LatticePropagator Axial_mu(UGrid);
|
||||
LatticePropagator Vector_mu(UGrid);
|
||||
|
||||
LatticeComplex PA (UGrid);
|
||||
LatticeComplex VV (UGrid);
|
||||
LatticeComplex PJ5q(UGrid);
|
||||
LatticeComplex PP (UGrid);
|
||||
|
||||
std::vector<TComplex> sumPA;
|
||||
std::vector<TComplex> sumVV;
|
||||
std::vector<TComplex> sumPP;
|
||||
std::vector<TComplex> sumPJ5q;
|
||||
|
||||
Gamma g5(Gamma::Algebra::Gamma5);
|
||||
D.ContractConservedCurrent(prop5,prop5,Axial_mu,source,Current::Axial,Tdir);
|
||||
PA = trace(g5*Axial_mu); // Pseudoscalar-Axial conserved current
|
||||
sliceSum(PA,sumPA,Tdir);
|
||||
|
||||
int Nt{static_cast<int>(sumPA.size())};
|
||||
|
||||
for(int t=0;t<Nt;t++) std::cout<<GridLogMessage <<"PAc["<<t<<"] "<<real(TensorRemove(sumPA[t]))*LCscale<<std::endl;
|
||||
|
||||
PP = trace(adj(propagator)*propagator); // Pseudoscalar density
|
||||
sliceSum(PP,sumPP,Tdir);
|
||||
for(int t=0;t<Nt;t++) std::cout<<GridLogMessage <<"PP["<<t<<"] "<<real(TensorRemove(sumPP[t]))*LCscale<<std::endl;
|
||||
|
||||
D.ContractJ5q(prop5,PJ5q);
|
||||
sliceSum(PJ5q,sumPJ5q,Tdir);
|
||||
for(int t=0;t<Nt;t++) std::cout<<GridLogMessage <<"PJ5q["<<t<<"] "<<real(TensorRemove(sumPJ5q[t]))<<std::endl;
|
||||
|
||||
Gamma::Algebra GammaV[3] = {
|
||||
Gamma::Algebra::GammaX,
|
||||
Gamma::Algebra::GammaY,
|
||||
Gamma::Algebra::GammaZ
|
||||
};
|
||||
for( int mu=0;mu<3;mu++ ) {
|
||||
Gamma gV(GammaV[mu]);
|
||||
D.ContractConservedCurrent(prop5,prop5,Vector_mu,source,Current::Vector,mu);
|
||||
// auto ss=sliceSum(Vector_mu,Tdir);
|
||||
// for(int t=0;t<Nt;t++) std::cout<<GridLogMessage <<"ss["<<mu<<"]["<<t<<"] "<<ss[t]<<std::endl;
|
||||
VV = trace(gV*Vector_mu); // (local) Vector-Vector conserved current
|
||||
sliceSum(VV,sumVV,Tdir);
|
||||
for(int t=0;t<Nt;t++){
|
||||
RealD Ct = real(TensorRemove(sumVV[t]))*LCscale;
|
||||
RealD Cont=0;
|
||||
if(t) Cont=1.0/(2 * M_PI *M_PI * t*t*t);
|
||||
std::cout<<GridLogMessage <<"VVc["<<mu<<"]["<<t<<"] "<< Ct
|
||||
<< " 2 pi^2 t^3 C(t) "<< Ct/Cont << " delta Ct "<< Ct-Cont <<std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
class MesonFile: Serializable {
|
||||
public:
|
||||
GRID_SERIALIZABLE_CLASS_MEMBERS(MesonFile, std::vector<std::vector<Complex> >, data);
|
||||
};
|
||||
|
||||
void MesonTrace(std::string file,LatticePropagator &q1,LatticePropagator &q2,LatticeComplex &phase)
|
||||
{
|
||||
const int nchannel=4;
|
||||
Gamma::Algebra Gammas[nchannel][2] = {
|
||||
{Gamma::Algebra::GammaXGamma5,Gamma::Algebra::GammaXGamma5},
|
||||
{Gamma::Algebra::GammaYGamma5,Gamma::Algebra::GammaYGamma5},
|
||||
{Gamma::Algebra::GammaZGamma5,Gamma::Algebra::GammaZGamma5},
|
||||
{Gamma::Algebra::Identity,Gamma::Algebra::Identity}
|
||||
};
|
||||
|
||||
LatticeComplex meson_CF(q1.Grid());
|
||||
MesonFile MF;
|
||||
|
||||
for(int ch=0;ch<nchannel;ch++){
|
||||
|
||||
Gamma Gsrc(Gammas[ch][0]);
|
||||
Gamma Gsnk(Gammas[ch][1]);
|
||||
|
||||
meson_CF = trace(adj(q1)*Gsnk*q2*adj(Gsrc));
|
||||
|
||||
std::vector<TComplex> meson_T;
|
||||
sliceSum(meson_CF,meson_T, Tdir);
|
||||
|
||||
int nt=meson_T.size();
|
||||
|
||||
std::vector<Complex> corr(nt);
|
||||
for(int t=0;t<nt;t++){
|
||||
corr[t] = TensorRemove(meson_T[t])*LLscale; // Yes this is ugly, not figured a work around
|
||||
RealD Ct = real(corr[t]);
|
||||
RealD Cont=0;
|
||||
if(t) Cont=1.0/(2 * M_PI *M_PI * t*t*t);
|
||||
std::cout << " channel "<<ch<<" t "<<t<<" " <<real(corr[t])<< " 2 pi^2 t^3 C(t) "<< 2 * M_PI *M_PI * t*t*t * Ct
|
||||
<< " deltaC " <<Ct-Cont<<std::endl;
|
||||
}
|
||||
MF.data.push_back(corr);
|
||||
}
|
||||
|
||||
{
|
||||
XmlWriter WR(file);
|
||||
write(WR,"MesonFile",MF);
|
||||
}
|
||||
}
|
||||
|
||||
int main (int argc, char ** argv)
|
||||
{
|
||||
const int Ls=10;
|
||||
|
||||
Grid_init(&argc,&argv);
|
||||
|
||||
// Double precision grids
|
||||
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(),
|
||||
GridDefaultSimd(Nd,vComplex::Nsimd()),
|
||||
GridDefaultMpi());
|
||||
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
||||
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
|
||||
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// You can manage seeds however you like.
|
||||
// Recommend SeedUniqueString.
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// std::vector<int> seeds4({1,2,3,4});
|
||||
// GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
|
||||
|
||||
LatticeGaugeField Umu(UGrid);
|
||||
std::string config;
|
||||
RealD M5=atof(getenv("M5"));
|
||||
RealD mq = atof(getenv("mass"));
|
||||
int tadpole = atof(getenv("tadpole"));
|
||||
std::vector<RealD> masses({ mq} ); // u/d, s, c ??
|
||||
if( argc > 1 && argv[1][0] != '-' )
|
||||
{
|
||||
std::cout<<GridLogMessage <<"Loading configuration from "<<argv[1]<<std::endl;
|
||||
FieldMetaData header;
|
||||
NerscIO::readConfiguration(Umu, header, argv[1]);
|
||||
config=argv[1];
|
||||
LLscale = 1.0;
|
||||
LCscale = 1.0;
|
||||
}
|
||||
else
|
||||
{
|
||||
SU<Nc>::ColdConfiguration(Umu);
|
||||
config="ColdConfig";
|
||||
// RealD P=1.0; // Don't scale
|
||||
// RealD P=0.6388238 // 32Ifine
|
||||
// RealD P=0.6153342; // 64I
|
||||
RealD P=0.5871119; // 48I
|
||||
RealD u0 = sqrt(sqrt(P));
|
||||
RealD w0 = 1 - M5;
|
||||
std::cout<<GridLogMessage <<"For plaquette P="<<P<<" u0= "<<u0<<std::endl;
|
||||
if ( tadpole == 1 ) {
|
||||
Umu = Umu * u0;
|
||||
// LLscale = 1.0/(1-w0*w0)/(1-w0*w0)/u0/u0;
|
||||
// LCscale = 1.0/(1-w0*w0)/(1-w0*w0)/u0/u0;
|
||||
LLscale = 1.0;
|
||||
LCscale = 1.0;
|
||||
std::cout<<GridLogMessage <<"Gauge links are u= u0 "<<std::endl;
|
||||
std::cout<<GridLogMessage <<"M5 = "<<M5<<std::endl;
|
||||
} else if ( tadpole == 2) {
|
||||
std::cout<<GridLogMessage <<"Gauge links are u=1 "<<std::endl;
|
||||
LLscale = 1.0;
|
||||
LCscale = 1.0;
|
||||
std::cout<<GridLogMessage <<"M5 = "<<M5<<std::endl;
|
||||
} else {
|
||||
LLscale = 1.0/u0/u0;
|
||||
LCscale = 1.0/u0/u0;
|
||||
M5 = M5 - 4.0 * (1-u0);
|
||||
std::cout<<GridLogMessage <<"Gauge links are u=1 "<<std::endl;
|
||||
std::cout<<GridLogMessage <<"M5mf = "<<M5<<std::endl;
|
||||
}
|
||||
std::cout<<GridLogMessage <<"mq = "<<mq<<std::endl;
|
||||
std::cout<<GridLogMessage <<"LLscale = "<<LLscale<<std::endl;
|
||||
std::cout<<GridLogMessage <<"LCscale = "<<LCscale<<std::endl;
|
||||
}
|
||||
|
||||
int nmass = masses.size();
|
||||
|
||||
typedef DomainWallFermionR FermionActionR;
|
||||
// typedef MobiusFermionR FermionActionR;
|
||||
std::vector<FermionActionR *> FermActs;
|
||||
std::vector<DomainWallFermionR *> DWFActs;
|
||||
|
||||
std::cout<<GridLogMessage <<"======================"<<std::endl;
|
||||
std::cout<<GridLogMessage <<"DomainWallFermion action"<<std::endl;
|
||||
std::cout<<GridLogMessage <<"======================"<<std::endl;
|
||||
|
||||
for(auto mass: masses) {
|
||||
std::vector<Complex> boundary = {1,1,1,-1};
|
||||
FermionActionR::ImplParams Params(boundary);
|
||||
RealD b=1.5;
|
||||
RealD c=0.5;
|
||||
std::cout<<GridLogMessage <<"Making DomainWallFermion action"<<std::endl;
|
||||
// DWFActs.push_back(new DomainWallFermionR(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5));
|
||||
FermActs.push_back(new FermionActionR(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,Params));
|
||||
// FermActs.push_back(new FermionActionR(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass+0.001,M5,b,c));
|
||||
std::cout<<GridLogMessage <<"Made DomainWallFermion action"<<std::endl;
|
||||
}
|
||||
|
||||
LatticePropagator point_source(UGrid);
|
||||
|
||||
Coordinate Origin({0,0,0,0});
|
||||
PointSource (Origin,point_source);
|
||||
|
||||
std::vector<LatticePropagator> PointProps(nmass,UGrid);
|
||||
// std::vector<LatticePropagator> FreeProps(nmass,UGrid);
|
||||
// LatticePropagator delta(UGrid);
|
||||
|
||||
for(int m=0;m<nmass;m++) {
|
||||
Solve(*FermActs[m],point_source ,PointProps[m]);
|
||||
// MasslessFreePropagator(*FermActs[m],point_source ,FreeProps[m]);
|
||||
|
||||
// delta = PointProps[m] - FreeProps[m];
|
||||
// std::cout << " delta "<<norm2(delta) << " FFT "<<norm2(FreeProps[m])<< " CG " <<norm2(PointProps[m])<<std::endl;
|
||||
}
|
||||
|
||||
LatticeComplex phase(UGrid);
|
||||
Coordinate mom({0,0,0,0});
|
||||
MakePhase(mom,phase);
|
||||
|
||||
for(int m1=0 ;m1<nmass;m1++) {
|
||||
for(int m2=m1;m2<nmass;m2++) {
|
||||
std::stringstream ssp,ssg,ssz;
|
||||
|
||||
ssp<<config<< "_m" << m1 << "_m"<< m2 << "_point_meson.xml";
|
||||
ssz<<config<< "_m" << m1 << "_m"<< m2 << "_free_meson.xml";
|
||||
|
||||
std::cout << "CG determined VV correlation function"<<std::endl;
|
||||
MesonTrace(ssp.str(),PointProps[m1],PointProps[m2],phase);
|
||||
|
||||
// std::cout << "FFT derived VV correlation function"<<std::endl;
|
||||
// MesonTrace(ssz.str(),FreeProps[m1],FreeProps[m2],phase);
|
||||
}}
|
||||
|
||||
Grid_finalize();
|
||||
}
|
||||
|
||||
|
||||
|
@ -95,26 +95,34 @@ int main (int argc, char ** argv)
|
||||
std::cout << GridLogMessage << "::::::::::::: Starting mixed CG" << std::endl;
|
||||
MixedPrecisionConjugateGradient<LatticeFermionD,LatticeFermionF> mCG(1.0e-8, 10000, 50, FrbGrid_f, HermOpEO_f, HermOpEO);
|
||||
double t1,t2,flops;
|
||||
double MdagMsiteflops = 1452; // Mobius (real coeffs)
|
||||
// CG overhead: 8 inner product, 4+8 axpy_norm, 4+4 linear comb (2 of)
|
||||
double CGsiteflops = (8+4+8+4+4)*Nc*Ns ;
|
||||
std:: cout << " MdagM site flops = "<< 4*MdagMsiteflops<<std::endl;
|
||||
std:: cout << " CG site flops = "<< CGsiteflops <<std::endl;
|
||||
int iters;
|
||||
for(int i=0;i<100;i++){
|
||||
for(int i=0;i<200;i++){
|
||||
result_o = Zero();
|
||||
t1=usecond();
|
||||
mCG(src_o,result_o);
|
||||
t2=usecond();
|
||||
iters = mCG.TotalInnerIterations; //Number of inner CG iterations
|
||||
flops = 1320.0*2*FGrid->gSites()*iters;
|
||||
flops = MdagMsiteflops*4*FrbGrid->gSites()*iters;
|
||||
flops+= CGsiteflops*FrbGrid->gSites()*iters;
|
||||
std::cout << " SinglePrecision iterations/sec "<< iters/(t2-t1)*1000.*1000.<<std::endl;
|
||||
std::cout << " SinglePrecision GF/s "<< flops/(t2-t1)/1000.<<std::endl;
|
||||
}
|
||||
std::cout << GridLogMessage << "::::::::::::: Starting regular CG" << std::endl;
|
||||
ConjugateGradient<LatticeFermionD> CG(1.0e-8,10000);
|
||||
for(int i=0;i<100;i++){
|
||||
for(int i=0;i<1;i++){
|
||||
result_o_2 = Zero();
|
||||
t1=usecond();
|
||||
CG(HermOpEO,src_o,result_o_2);
|
||||
t2=usecond();
|
||||
iters = CG.IterationsToComplete;
|
||||
flops = 1320.0*2*FGrid->gSites()*iters;
|
||||
flops = MdagMsiteflops*4*FrbGrid->gSites()*iters;
|
||||
flops+= CGsiteflops*FrbGrid->gSites()*iters;
|
||||
|
||||
std::cout << " DoublePrecision iterations/sec "<< iters/(t2-t1)*1000.*1000.<<std::endl;
|
||||
std::cout << " DoublePrecision GF/s "<< flops/(t2-t1)/1000.<<std::endl;
|
||||
}
|
||||
|
183
tests/Test_gfield_shift.cc
Normal file
183
tests/Test_gfield_shift.cc
Normal file
@ -0,0 +1,183 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./tests/Test_gfield_shift.cc
|
||||
|
||||
Copyright (C) 2015
|
||||
|
||||
Author: Christopher Kelly <ckelly@bnl.gov>
|
||||
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
|
||||
//Test the shifting of the gauge field that respects the boundary conditions
|
||||
#include <Grid/Grid.h>
|
||||
|
||||
using namespace Grid;
|
||||
;
|
||||
|
||||
typedef ConjugateGimplR Gimpl; //can choose periodic / charge conjugate directions at wil
|
||||
typedef Gimpl::GaugeField GaugeField;
|
||||
typedef Gimpl::GaugeLinkField GaugeLinkField;
|
||||
typedef Gimpl::SiteGaugeField SiteGaugeField;
|
||||
typedef Gimpl::SiteGaugeLink SiteGaugeLink;
|
||||
|
||||
GaugeField CshiftGaugeField(const GaugeField &U, const int dir, const int shift){
|
||||
GridBase *Grid = U.Grid();
|
||||
|
||||
GaugeField out(Grid);
|
||||
GaugeLinkField Umu(Grid);
|
||||
for(int mu=0;mu<Grid->Nd();mu++){
|
||||
Umu = PeekIndex<LorentzIndex>(U, mu);
|
||||
Umu = Gimpl::CshiftLink(Umu,dir,shift);
|
||||
PokeIndex<LorentzIndex>(out,Umu,mu);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
|
||||
int main (int argc, char ** argv)
|
||||
{
|
||||
Grid_init(&argc,&argv);
|
||||
|
||||
auto latt_size = GridDefaultLatt();
|
||||
auto simd_layout = GridDefaultSimd(4,vComplex::Nsimd());
|
||||
auto mpi_layout = GridDefaultMpi();
|
||||
|
||||
std::vector<int> conj_dirs = {1,1,0,0};
|
||||
Gimpl::setDirections(conj_dirs);
|
||||
|
||||
GridCartesian Fine(latt_size,simd_layout,mpi_layout);
|
||||
|
||||
GridParallelRNG FineRNG(&Fine); FineRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
|
||||
|
||||
|
||||
GaugeField U(&Fine);
|
||||
GaugeField ShiftU(&Fine);
|
||||
|
||||
GaugeLinkField link_field(&Fine), link_field_2(&Fine);
|
||||
|
||||
//Like Test_cshift we put the lex coordinate index on each site but make it imaginary
|
||||
//so we can tell when it was complex conjugated
|
||||
LatticeComplex lex(&Fine);
|
||||
lex=Zero();
|
||||
U = Zero();
|
||||
{
|
||||
LatticeComplex coor(&Fine);
|
||||
Integer stride =1;
|
||||
for(int d=0;d<4;d++){
|
||||
LatticeCoordinate(coor,d);
|
||||
lex = lex + coor*stride;
|
||||
stride=stride*latt_size[d];
|
||||
}
|
||||
PokeIndex<ColourIndex>(link_field, lex, 0,0); //place on 0,0 element of link
|
||||
|
||||
for(int mu=0;mu<Nd;mu++){
|
||||
link_field_2 = link_field + mu*stride; //add in lex-mapping of mu
|
||||
link_field_2 = ComplexD(0,1) * link_field_2; //make imaginary
|
||||
PokeIndex<LorentzIndex>(U, link_field_2, mu);
|
||||
}
|
||||
}
|
||||
|
||||
std::stringstream ss;
|
||||
ss<<"error";
|
||||
for(int d=0;d<Fine._ndimension;d++){
|
||||
ss<<"."<<Fine._processor_coor[d];
|
||||
}
|
||||
ss<<"_wr_"<<Fine._processor;
|
||||
std::string fname(ss.str());
|
||||
std::ofstream ferr(fname);
|
||||
|
||||
Integer vol4d = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||
|
||||
bool fail = false;
|
||||
typename SiteGaugeField::scalar_object um;
|
||||
TComplex cm;
|
||||
|
||||
for(int dir=0;dir<4;dir++){
|
||||
for(int shift=-latt_size[dir]+1;shift<latt_size[dir];shift++){
|
||||
if ( Fine.IsBoss() )
|
||||
std::cout<<GridLogMessage<<"Shifting by "<<shift<<" in direction "<<dir
|
||||
<< " dir is conj ? " << conj_dirs[dir] << std::endl;
|
||||
|
||||
ShiftU = CshiftGaugeField(U,dir,shift);
|
||||
|
||||
Coordinate coor(4);
|
||||
|
||||
for(coor[3]=0;coor[3]<latt_size[3];coor[3]++){
|
||||
for(coor[2]=0;coor[2]<latt_size[2];coor[2]++){
|
||||
for(coor[1]=0;coor[1]<latt_size[1];coor[1]++){
|
||||
for(coor[0]=0;coor[0]<latt_size[0];coor[0]++){
|
||||
peekSite(um,ShiftU,coor);
|
||||
|
||||
Coordinate scoor(coor);
|
||||
scoor[dir] = (scoor[dir]+shift + latt_size[dir])%latt_size[dir];
|
||||
|
||||
Integer slex = scoor[0]
|
||||
+ latt_size[0]*scoor[1]
|
||||
+ latt_size[0]*latt_size[1]*scoor[2]
|
||||
+ latt_size[0]*latt_size[1]*latt_size[2]*scoor[3];
|
||||
|
||||
for(int mu = 0 ; mu < 4; mu++){
|
||||
Integer slex_mu = slex + vol4d*mu;
|
||||
Complex scm(0,slex_mu); //imaginary
|
||||
if(
|
||||
( shift > 0 && coor[dir] >= latt_size[dir]-shift && conj_dirs[dir] )
|
||||
||
|
||||
( shift < 0 && coor[dir] <= -shift-1 && conj_dirs[dir] )
|
||||
)
|
||||
scm = conjugate(scm); //CC if pulled over boundary
|
||||
|
||||
cm = um(mu)()(0,0);
|
||||
|
||||
RealD nrm = abs(scm-cm()()());
|
||||
//std::cout << cm << " " << scm << std::endl;
|
||||
|
||||
Coordinate peer(4);
|
||||
Complex tmp =cm;
|
||||
Integer index=real(tmp);
|
||||
|
||||
Integer cm_mu = index / vol4d;
|
||||
index = index % vol4d;
|
||||
Lexicographic::CoorFromIndex(peer,index,latt_size);
|
||||
|
||||
if (nrm > 0){
|
||||
ferr<<"FAIL mu " << mu << " shift "<< shift<<" in dir "<< dir<<" ["<<coor[0]<<","<<coor[1]<<","<<coor[2]<<","<<coor[3]<<"] = "<< cm()()()<<" expect "<<scm<<" "<<nrm<<std::endl;
|
||||
ferr<<"Got mu "<< cm_mu << " site " <<index<<" : " << peer[0]<<","<<peer[1]<<","<<peer[2]<<","<<peer[3]<<std::endl;
|
||||
|
||||
index=real(scm);
|
||||
Integer scm_mu = index / vol4d;
|
||||
index = index % vol4d;
|
||||
Lexicographic::CoorFromIndex(peer,index,latt_size);
|
||||
ferr<<"Expect mu " << scm_mu << " site " <<index<<": " << peer[0]<<","<<peer[1]<<","<<peer[2]<<","<<peer[3]<<std::endl;
|
||||
fail = true;
|
||||
}
|
||||
}
|
||||
|
||||
}}}}
|
||||
}
|
||||
}
|
||||
|
||||
if(fail) std::cout << "Test FAILED : see " << fname << " for more details" << std::endl;
|
||||
else std::cout << "Test Passed" << std::endl;
|
||||
|
||||
Grid_finalize();
|
||||
}
|
153
tests/smearing/Test_WilsonFlow_adaptive.cc
Normal file
153
tests/smearing/Test_WilsonFlow_adaptive.cc
Normal file
@ -0,0 +1,153 @@
|
||||
/*************************************************************************************
|
||||
|
||||
Grid physics library, www.github.com/paboyle/Grid
|
||||
|
||||
Source file: ./tests/hmc/Test_WilsonFlow_adaptive.cc
|
||||
|
||||
Copyright (C) 2017
|
||||
|
||||
Author: Christopher Kelly <ckelly@bnl.gov>
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
See the full license in the file "LICENSE" in the top level distribution
|
||||
directory
|
||||
*************************************************************************************/
|
||||
/* END LEGAL */
|
||||
#include <Grid/Grid.h>
|
||||
|
||||
using namespace Grid;
|
||||
|
||||
//Linearly interpolate between two nearest times
|
||||
RealD interpolate(const RealD t_int, const std::vector<std::pair<RealD,RealD> > &data){
|
||||
RealD tdiff1=1e32; int t1_idx=-1;
|
||||
RealD tdiff2=1e32; int t2_idx=-1;
|
||||
|
||||
for(int i=0;i<data.size();i++){
|
||||
RealD diff = fabs(data[i].first-t_int);
|
||||
//std::cout << "targ " << t_int << " cur " << data[i].first << " diff " << diff << " best diff1 " << tdiff1 << " diff2 " << tdiff2 << std::endl;
|
||||
|
||||
if(diff < tdiff1){
|
||||
if(tdiff1 < tdiff2){ //swap out tdiff2
|
||||
tdiff2 = tdiff1; t2_idx = t1_idx;
|
||||
}
|
||||
tdiff1 = diff; t1_idx = i;
|
||||
}
|
||||
else if(diff < tdiff2){ tdiff2 = diff; t2_idx = i; }
|
||||
}
|
||||
assert(t1_idx != -1 && t2_idx != -1);
|
||||
|
||||
RealD t2 = data[t2_idx].first, v2 = data[t2_idx].second;
|
||||
RealD t1 = data[t1_idx].first, v1 = data[t1_idx].second;
|
||||
|
||||
//v = a + bt
|
||||
//v2-v1 = b(t2-t1)
|
||||
RealD b = (v2-v1)/(t2-t1);
|
||||
RealD a = v1 - b*t1;
|
||||
RealD vout = a + b*t_int;
|
||||
|
||||
//std::cout << "Interpolate to " << t_int << " two closest points " << t1 << " " << t2
|
||||
//<< " with values " << v1 << " "<< v2 << " : got " << vout << std::endl;
|
||||
return vout;
|
||||
}
|
||||
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
Grid_init(&argc, &argv);
|
||||
GridLogLayout();
|
||||
|
||||
auto latt_size = GridDefaultLatt();
|
||||
auto simd_layout = GridDefaultSimd(Nd, vComplex::Nsimd());
|
||||
auto mpi_layout = GridDefaultMpi();
|
||||
GridCartesian Grid(latt_size, simd_layout, mpi_layout);
|
||||
GridRedBlackCartesian RBGrid(&Grid);
|
||||
|
||||
std::vector<int> seeds({1, 2, 3, 4, 5});
|
||||
GridSerialRNG sRNG;
|
||||
GridParallelRNG pRNG(&Grid);
|
||||
pRNG.SeedFixedIntegers(seeds);
|
||||
|
||||
LatticeGaugeField U(&Grid);
|
||||
SU<Nc>::HotConfiguration(pRNG, U);
|
||||
|
||||
int Nstep = 300;
|
||||
RealD epsilon = 0.01;
|
||||
RealD maxTau = Nstep*epsilon;
|
||||
RealD tolerance = 1e-4;
|
||||
|
||||
for(int i=1;i<argc;i++){
|
||||
std::string sarg(argv[i]);
|
||||
if(sarg == "--tolerance"){
|
||||
std::stringstream ss; ss << argv[i+1]; ss >> tolerance;
|
||||
}
|
||||
}
|
||||
std::cout << "Adaptive smear tolerance " << tolerance << std::endl;
|
||||
|
||||
//Setup iterative Wilson flow
|
||||
WilsonFlow<PeriodicGimplD> wflow(epsilon,Nstep);
|
||||
wflow.resetActions();
|
||||
|
||||
std::vector<std::pair<RealD, RealD> > meas_orig;
|
||||
|
||||
wflow.addMeasurement(1, [&wflow,&meas_orig](int step, RealD t, const LatticeGaugeField &U){
|
||||
std::cout << GridLogMessage << "[WilsonFlow] Computing Cloverleaf energy density for step " << step << std::endl;
|
||||
meas_orig.push_back( {t, wflow.energyDensityCloverleaf(t,U)} );
|
||||
});
|
||||
|
||||
//Setup adaptive Wilson flow
|
||||
WilsonFlowAdaptive<PeriodicGimplD> wflow_ad(epsilon,maxTau,tolerance);
|
||||
wflow_ad.resetActions();
|
||||
|
||||
std::vector<std::pair<RealD, RealD> > meas_adaptive;
|
||||
|
||||
wflow_ad.addMeasurement(1, [&wflow_ad,&meas_adaptive](int step, RealD t, const LatticeGaugeField &U){
|
||||
std::cout << GridLogMessage << "[WilsonFlow] Computing Cloverleaf energy density for step " << step << std::endl;
|
||||
meas_adaptive.push_back( {t, wflow_ad.energyDensityCloverleaf(t,U)} );
|
||||
});
|
||||
|
||||
//Run
|
||||
LatticeGaugeFieldD Vtmp(U.Grid());
|
||||
wflow.smear(Vtmp, U); //basic smear
|
||||
|
||||
Vtmp = Zero();
|
||||
wflow_ad.smear(Vtmp, U);
|
||||
|
||||
//Output values for plotting
|
||||
{
|
||||
std::ofstream out("wflow_t2E_orig.dat");
|
||||
out.precision(16);
|
||||
for(auto const &e: meas_orig){
|
||||
out << e.first << " " << e.second << std::endl;
|
||||
}
|
||||
}
|
||||
{
|
||||
std::ofstream out("wflow_t2E_adaptive.dat");
|
||||
out.precision(16);
|
||||
for(auto const &e: meas_adaptive){
|
||||
out << e.first << " " << e.second << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
//Compare at times available with adaptive smearing
|
||||
for(int i=0;i<meas_adaptive.size();i++){
|
||||
RealD t = meas_adaptive[i].first;
|
||||
RealD v_adaptive = meas_adaptive[i].second;
|
||||
RealD v_orig = interpolate(t, meas_orig); //should be very precise due to fine timestep
|
||||
std::cout << t << " orig: " << v_orig << " adaptive: " << v_adaptive << " reldiff: " << (v_adaptive-v_orig)/v_orig << std::endl;
|
||||
}
|
||||
|
||||
std::cout << GridLogMessage << "Done" << std::endl;
|
||||
Grid_finalize();
|
||||
}
|
Reference in New Issue
Block a user