mirror of
https://github.com/paboyle/Grid.git
synced 2025-06-24 10:42:03 +01:00
Compare commits
71 Commits
feature/di
...
584a3ee45c
Author | SHA1 | Date | |
---|---|---|---|
584a3ee45c | |||
eec0c9eb7d | |||
66d001ec9e | |||
19da647e3c | |||
e7d9b75fdd | |||
3d0e3ec363 | |||
3c1c51f9aa | |||
5c87342108 | |||
66177bfbe2 | |||
5205e68963 | |||
cd5cf6d614 | |||
5abb19eab0 | |||
06d7b88c78 | |||
cf72799735 | |||
cdb8fcc269 | |||
b4f4130901 | |||
bb049847d5 | |||
fd33c835dd | |||
21371a7e5b | |||
abfaa00d3e | |||
efee33c55d | |||
db0fe6ddbb | |||
8a9e647120 | |||
e6dcb821ad | |||
9bff188f02 | |||
111b30ca1d | |||
24182ca8bf | |||
ee2d7369b3 | |||
7c686d29c9 | |||
e8a0a1e75d | |||
730be89abf | |||
f991ad7d5c | |||
b3f33f82f7 | |||
a34a6e059f | |||
1333319941 | |||
9295ed8d20 | |||
19cc7653fb | |||
5752538661 | |||
ca40a1b00b | |||
659fac9dfb | |||
4dc3d6fce0 | |||
95b640cb6b | |||
2cb5bedc15 | |||
806b02bddf | |||
de40395773 | |||
7ba4788715 | |||
06d9ce1a02 | |||
75bb6b2b40 | |||
74f10c2dc0 | |||
a93d5459d4 | |||
9c21add0c6 | |||
639aab6563 | |||
8137cc7049 | |||
60e63dca1d | |||
486409574e | |||
a913b8be12 | |||
2239751850 | |||
9b20f1449c | |||
b99453083d | |||
943fbb914d | |||
ca4603580d | |||
f73db8f1f3 | |||
f7217d12d2 | |||
fab50c57d9 | |||
3440534fbf | |||
177b1a7ec6 | |||
58182fe345 | |||
1f907d330d | |||
b0fe664e9d | |||
c0f8482402 | |||
3544965f54 |
@ -44,7 +44,8 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
#include <Grid/GridStd.h>
|
#include <Grid/GridStd.h>
|
||||||
#include <Grid/threads/Pragmas.h>
|
#include <Grid/threads/Pragmas.h>
|
||||||
#include <Grid/perfmon/Timer.h>
|
#include <Grid/perfmon/Timer.h>
|
||||||
#include <Grid/perfmon/PerfCount.h>
|
#include <Grid/perfmon/Tracing.h>
|
||||||
|
//#include <Grid/perfmon/PerfCount.h>
|
||||||
#include <Grid/util/Util.h>
|
#include <Grid/util/Util.h>
|
||||||
#include <Grid/log/Log.h>
|
#include <Grid/log/Log.h>
|
||||||
#include <Grid/allocator/Allocator.h>
|
#include <Grid/allocator/Allocator.h>
|
||||||
|
@ -58,6 +58,7 @@ public:
|
|||||||
|
|
||||||
void operator()(LinearOperatorBase<Field> &Linop, const Field &src, Field &psi) {
|
void operator()(LinearOperatorBase<Field> &Linop, const Field &src, Field &psi) {
|
||||||
|
|
||||||
|
GRID_TRACE("ConjugateGradient");
|
||||||
psi.Checkerboard() = src.Checkerboard();
|
psi.Checkerboard() = src.Checkerboard();
|
||||||
|
|
||||||
conformable(psi, src);
|
conformable(psi, src);
|
||||||
@ -117,6 +118,7 @@ public:
|
|||||||
GridStopWatch MatrixTimer;
|
GridStopWatch MatrixTimer;
|
||||||
GridStopWatch SolverTimer;
|
GridStopWatch SolverTimer;
|
||||||
|
|
||||||
|
RealD usecs = -usecond();
|
||||||
SolverTimer.Start();
|
SolverTimer.Start();
|
||||||
int k;
|
int k;
|
||||||
for (k = 1; k <= MaxIterations; k++) {
|
for (k = 1; k <= MaxIterations; k++) {
|
||||||
@ -166,14 +168,16 @@ public:
|
|||||||
|
|
||||||
// Stopping condition
|
// Stopping condition
|
||||||
if (cp <= rsq) {
|
if (cp <= rsq) {
|
||||||
|
usecs +=usecond();
|
||||||
SolverTimer.Stop();
|
SolverTimer.Stop();
|
||||||
Linop.HermOpAndNorm(psi, mmp, d, qq);
|
Linop.HermOpAndNorm(psi, mmp, d, qq);
|
||||||
p = mmp - src;
|
p = mmp - src;
|
||||||
|
GridBase *grid = src.Grid();
|
||||||
|
RealD DwfFlops = (1452. )*grid->gSites()*4*k
|
||||||
|
+ (8+4+8+4+4)*12*grid->gSites()*k; // CG linear algebra
|
||||||
RealD srcnorm = std::sqrt(norm2(src));
|
RealD srcnorm = std::sqrt(norm2(src));
|
||||||
RealD resnorm = std::sqrt(norm2(p));
|
RealD resnorm = std::sqrt(norm2(p));
|
||||||
RealD true_residual = resnorm / srcnorm;
|
RealD true_residual = resnorm / srcnorm;
|
||||||
|
|
||||||
std::cout << GridLogMessage << "ConjugateGradient Converged on iteration " << k
|
std::cout << GridLogMessage << "ConjugateGradient Converged on iteration " << k
|
||||||
<< "\tComputed residual " << std::sqrt(cp / ssq)
|
<< "\tComputed residual " << std::sqrt(cp / ssq)
|
||||||
<< "\tTrue residual " << true_residual
|
<< "\tTrue residual " << true_residual
|
||||||
@ -187,6 +191,8 @@ public:
|
|||||||
std::cout << GridLogMessage << "\tAxpyNorm " << AxpyNormTimer.Elapsed() <<std::endl;
|
std::cout << GridLogMessage << "\tAxpyNorm " << AxpyNormTimer.Elapsed() <<std::endl;
|
||||||
std::cout << GridLogMessage << "\tLinearComb " << LinearCombTimer.Elapsed() <<std::endl;
|
std::cout << GridLogMessage << "\tLinearComb " << LinearCombTimer.Elapsed() <<std::endl;
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << "\tMobius flop rate " << DwfFlops/ usecs<< " Gflops " <<std::endl;
|
||||||
|
|
||||||
if (ErrorOnNoConverge) assert(true_residual / Tolerance < 10000.0);
|
if (ErrorOnNoConverge) assert(true_residual / Tolerance < 10000.0);
|
||||||
|
|
||||||
IterationsToComplete = k;
|
IterationsToComplete = k;
|
||||||
|
@ -84,6 +84,7 @@ public:
|
|||||||
|
|
||||||
void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector<Field> &psi)
|
void operator() (LinearOperatorBase<Field> &Linop, const Field &src, std::vector<Field> &psi)
|
||||||
{
|
{
|
||||||
|
GRID_TRACE("ConjugateGradientMultiShift");
|
||||||
|
|
||||||
GridBase *grid = src.Grid();
|
GridBase *grid = src.Grid();
|
||||||
|
|
||||||
|
@ -127,6 +127,7 @@ public:
|
|||||||
|
|
||||||
void operator() (LinearOperatorBase<FieldD> &Linop_d, const FieldD &src_d, std::vector<FieldD> &psi_d)
|
void operator() (LinearOperatorBase<FieldD> &Linop_d, const FieldD &src_d, std::vector<FieldD> &psi_d)
|
||||||
{
|
{
|
||||||
|
GRID_TRACE("ConjugateGradientMultiShiftMixedPrec");
|
||||||
GridBase *DoublePrecGrid = src_d.Grid();
|
GridBase *DoublePrecGrid = src_d.Grid();
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
|
@ -73,6 +73,7 @@ public:
|
|||||||
}
|
}
|
||||||
|
|
||||||
void operator()(const FieldD &src, FieldD &psi) {
|
void operator()(const FieldD &src, FieldD &psi) {
|
||||||
|
GRID_TRACE("ConjugateGradientReliableUpdate");
|
||||||
LinearOperatorBase<FieldF> *Linop_f_use = &Linop_f;
|
LinearOperatorBase<FieldF> *Linop_f_use = &Linop_f;
|
||||||
bool using_fallback = false;
|
bool using_fallback = false;
|
||||||
|
|
||||||
|
@ -40,7 +40,7 @@ void MemoryManager::PrintBytes(void)
|
|||||||
//////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////
|
||||||
MemoryManager::AllocationCacheEntry MemoryManager::Entries[MemoryManager::NallocType][MemoryManager::NallocCacheMax];
|
MemoryManager::AllocationCacheEntry MemoryManager::Entries[MemoryManager::NallocType][MemoryManager::NallocCacheMax];
|
||||||
int MemoryManager::Victim[MemoryManager::NallocType];
|
int MemoryManager::Victim[MemoryManager::NallocType];
|
||||||
int MemoryManager::Ncache[MemoryManager::NallocType] = { 2, 8, 2, 8, 2, 8 };
|
int MemoryManager::Ncache[MemoryManager::NallocType] = { 2, 8, 8, 16, 8, 16 };
|
||||||
uint64_t MemoryManager::CacheBytes[MemoryManager::NallocType];
|
uint64_t MemoryManager::CacheBytes[MemoryManager::NallocType];
|
||||||
//////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////
|
||||||
// Actual allocation and deallocation utils
|
// Actual allocation and deallocation utils
|
||||||
|
@ -3,10 +3,16 @@
|
|||||||
|
|
||||||
#warning "Using explicit device memory copies"
|
#warning "Using explicit device memory copies"
|
||||||
NAMESPACE_BEGIN(Grid);
|
NAMESPACE_BEGIN(Grid);
|
||||||
//#define dprintf(...) printf ( __VA_ARGS__ ); fflush(stdout);
|
|
||||||
|
#define MAXLINE 512
|
||||||
|
static char print_buffer [ MAXLINE ];
|
||||||
|
|
||||||
|
#define mprintf(...) snprintf (print_buffer,MAXLINE, __VA_ARGS__ ); std::cout << GridLogMemory << print_buffer;
|
||||||
|
//#define dprintf(...) printf (__VA_ARGS__ ); fflush(stdout);
|
||||||
#define dprintf(...)
|
#define dprintf(...)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////
|
||||||
// For caching copies of data on device
|
// For caching copies of data on device
|
||||||
////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////
|
||||||
@ -104,7 +110,7 @@ void MemoryManager::AccDiscard(AcceleratorViewEntry &AccCache)
|
|||||||
///////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////
|
||||||
assert(AccCache.state!=Empty);
|
assert(AccCache.state!=Empty);
|
||||||
|
|
||||||
dprintf("MemoryManager: Discard(%llx) %llx\n",(uint64_t)AccCache.CpuPtr,(uint64_t)AccCache.AccPtr);
|
mprintf("MemoryManager: Discard(%lx) %lx\n",(uint64_t)AccCache.CpuPtr,(uint64_t)AccCache.AccPtr);
|
||||||
assert(AccCache.accLock==0);
|
assert(AccCache.accLock==0);
|
||||||
assert(AccCache.cpuLock==0);
|
assert(AccCache.cpuLock==0);
|
||||||
assert(AccCache.CpuPtr!=(uint64_t)NULL);
|
assert(AccCache.CpuPtr!=(uint64_t)NULL);
|
||||||
@ -112,7 +118,7 @@ void MemoryManager::AccDiscard(AcceleratorViewEntry &AccCache)
|
|||||||
AcceleratorFree((void *)AccCache.AccPtr,AccCache.bytes);
|
AcceleratorFree((void *)AccCache.AccPtr,AccCache.bytes);
|
||||||
DeviceBytes -=AccCache.bytes;
|
DeviceBytes -=AccCache.bytes;
|
||||||
LRUremove(AccCache);
|
LRUremove(AccCache);
|
||||||
dprintf("MemoryManager: Free(%llx) LRU %lld Total %lld\n",(uint64_t)AccCache.AccPtr,DeviceLRUBytes,DeviceBytes);
|
dprintf("MemoryManager: Free(%lx) LRU %ld Total %ld\n",(uint64_t)AccCache.AccPtr,DeviceLRUBytes,DeviceBytes);
|
||||||
}
|
}
|
||||||
uint64_t CpuPtr = AccCache.CpuPtr;
|
uint64_t CpuPtr = AccCache.CpuPtr;
|
||||||
EntryErase(CpuPtr);
|
EntryErase(CpuPtr);
|
||||||
@ -126,7 +132,7 @@ void MemoryManager::Evict(AcceleratorViewEntry &AccCache)
|
|||||||
///////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////
|
||||||
assert(AccCache.state!=Empty);
|
assert(AccCache.state!=Empty);
|
||||||
|
|
||||||
dprintf("MemoryManager: Evict(%llx) %llx\n",(uint64_t)AccCache.CpuPtr,(uint64_t)AccCache.AccPtr);
|
mprintf("MemoryManager: Evict(%lx) %lx\n",(uint64_t)AccCache.CpuPtr,(uint64_t)AccCache.AccPtr);
|
||||||
assert(AccCache.accLock==0);
|
assert(AccCache.accLock==0);
|
||||||
assert(AccCache.cpuLock==0);
|
assert(AccCache.cpuLock==0);
|
||||||
if(AccCache.state==AccDirty) {
|
if(AccCache.state==AccDirty) {
|
||||||
@ -137,7 +143,7 @@ void MemoryManager::Evict(AcceleratorViewEntry &AccCache)
|
|||||||
AcceleratorFree((void *)AccCache.AccPtr,AccCache.bytes);
|
AcceleratorFree((void *)AccCache.AccPtr,AccCache.bytes);
|
||||||
DeviceBytes -=AccCache.bytes;
|
DeviceBytes -=AccCache.bytes;
|
||||||
LRUremove(AccCache);
|
LRUremove(AccCache);
|
||||||
dprintf("MemoryManager: Free(%llx) footprint now %lld \n",(uint64_t)AccCache.AccPtr,DeviceBytes);
|
dprintf("MemoryManager: Free(%lx) footprint now %ld \n",(uint64_t)AccCache.AccPtr,DeviceBytes);
|
||||||
}
|
}
|
||||||
uint64_t CpuPtr = AccCache.CpuPtr;
|
uint64_t CpuPtr = AccCache.CpuPtr;
|
||||||
EntryErase(CpuPtr);
|
EntryErase(CpuPtr);
|
||||||
@ -150,7 +156,7 @@ void MemoryManager::Flush(AcceleratorViewEntry &AccCache)
|
|||||||
assert(AccCache.AccPtr!=(uint64_t)NULL);
|
assert(AccCache.AccPtr!=(uint64_t)NULL);
|
||||||
assert(AccCache.CpuPtr!=(uint64_t)NULL);
|
assert(AccCache.CpuPtr!=(uint64_t)NULL);
|
||||||
acceleratorCopyFromDevice((void *)AccCache.AccPtr,(void *)AccCache.CpuPtr,AccCache.bytes);
|
acceleratorCopyFromDevice((void *)AccCache.AccPtr,(void *)AccCache.CpuPtr,AccCache.bytes);
|
||||||
dprintf("MemoryManager: Flush %llx -> %llx\n",(uint64_t)AccCache.AccPtr,(uint64_t)AccCache.CpuPtr); fflush(stdout);
|
mprintf("MemoryManager: Flush %lx -> %lx\n",(uint64_t)AccCache.AccPtr,(uint64_t)AccCache.CpuPtr); fflush(stdout);
|
||||||
DeviceToHostBytes+=AccCache.bytes;
|
DeviceToHostBytes+=AccCache.bytes;
|
||||||
DeviceToHostXfer++;
|
DeviceToHostXfer++;
|
||||||
AccCache.state=Consistent;
|
AccCache.state=Consistent;
|
||||||
@ -165,7 +171,7 @@ void MemoryManager::Clone(AcceleratorViewEntry &AccCache)
|
|||||||
AccCache.AccPtr=(uint64_t)AcceleratorAllocate(AccCache.bytes);
|
AccCache.AccPtr=(uint64_t)AcceleratorAllocate(AccCache.bytes);
|
||||||
DeviceBytes+=AccCache.bytes;
|
DeviceBytes+=AccCache.bytes;
|
||||||
}
|
}
|
||||||
dprintf("MemoryManager: Clone %llx <- %llx\n",(uint64_t)AccCache.AccPtr,(uint64_t)AccCache.CpuPtr); fflush(stdout);
|
mprintf("MemoryManager: Clone %lx <- %lx\n",(uint64_t)AccCache.AccPtr,(uint64_t)AccCache.CpuPtr); fflush(stdout);
|
||||||
acceleratorCopyToDevice((void *)AccCache.CpuPtr,(void *)AccCache.AccPtr,AccCache.bytes);
|
acceleratorCopyToDevice((void *)AccCache.CpuPtr,(void *)AccCache.AccPtr,AccCache.bytes);
|
||||||
HostToDeviceBytes+=AccCache.bytes;
|
HostToDeviceBytes+=AccCache.bytes;
|
||||||
HostToDeviceXfer++;
|
HostToDeviceXfer++;
|
||||||
@ -241,7 +247,7 @@ uint64_t MemoryManager::AcceleratorViewOpen(uint64_t CpuPtr,size_t bytes,ViewMod
|
|||||||
assert(AccCache.cpuLock==0); // Programming error
|
assert(AccCache.cpuLock==0); // Programming error
|
||||||
|
|
||||||
if(AccCache.state!=Empty) {
|
if(AccCache.state!=Empty) {
|
||||||
dprintf("ViewOpen found entry %llx %llx : %lld %lld\n",
|
dprintf("ViewOpen found entry %lx %lx : %ld %ld\n",
|
||||||
(uint64_t)AccCache.CpuPtr,
|
(uint64_t)AccCache.CpuPtr,
|
||||||
(uint64_t)CpuPtr,
|
(uint64_t)CpuPtr,
|
||||||
(uint64_t)AccCache.bytes,
|
(uint64_t)AccCache.bytes,
|
||||||
|
@ -107,6 +107,7 @@ public:
|
|||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
static int RankWorld(void) ;
|
static int RankWorld(void) ;
|
||||||
static void BroadcastWorld(int root,void* data, int bytes);
|
static void BroadcastWorld(int root,void* data, int bytes);
|
||||||
|
static void BarrierWorld(void);
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////
|
||||||
// Reduction
|
// Reduction
|
||||||
|
@ -396,17 +396,17 @@ double CartesianCommunicator::StencilSendToRecvFromBegin(std::vector<CommsReques
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( CommunicatorPolicy == CommunicatorPolicySequential ) {
|
/* if ( CommunicatorPolicy == CommunicatorPolicySequential ) {
|
||||||
this->StencilSendToRecvFromComplete(list,dir);
|
* this->StencilSendToRecvFromComplete(list,dir);
|
||||||
list.resize(0);
|
* list.resize(0);
|
||||||
}
|
* }
|
||||||
|
*/
|
||||||
return off_node_bytes;
|
return off_node_bytes;
|
||||||
}
|
}
|
||||||
void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &list,int dir)
|
void CartesianCommunicator::StencilSendToRecvFromComplete(std::vector<CommsRequest_t> &list,int dir)
|
||||||
{
|
{
|
||||||
// std::cout << "Copy Synchronised\n"<<std::endl;
|
|
||||||
acceleratorCopySynchronise();
|
acceleratorCopySynchronise();
|
||||||
|
StencilBarrier();// Synch shared memory on a single nodes
|
||||||
|
|
||||||
int nreq=list.size();
|
int nreq=list.size();
|
||||||
|
|
||||||
@ -443,6 +443,10 @@ int CartesianCommunicator::RankWorld(void){
|
|||||||
MPI_Comm_rank(communicator_world,&r);
|
MPI_Comm_rank(communicator_world,&r);
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
void CartesianCommunicator::BarrierWorld(void){
|
||||||
|
int ierr = MPI_Barrier(communicator_world);
|
||||||
|
assert(ierr==0);
|
||||||
|
}
|
||||||
void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes)
|
void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes)
|
||||||
{
|
{
|
||||||
int ierr= MPI_Bcast(data,
|
int ierr= MPI_Bcast(data,
|
||||||
|
@ -104,6 +104,7 @@ int CartesianCommunicator::RankWorld(void){return 0;}
|
|||||||
void CartesianCommunicator::Barrier(void){}
|
void CartesianCommunicator::Barrier(void){}
|
||||||
void CartesianCommunicator::Broadcast(int root,void* data, int bytes) {}
|
void CartesianCommunicator::Broadcast(int root,void* data, int bytes) {}
|
||||||
void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes) { }
|
void CartesianCommunicator::BroadcastWorld(int root,void* data, int bytes) { }
|
||||||
|
void CartesianCommunicator::BarrierWorld(void) { }
|
||||||
int CartesianCommunicator::RankFromProcessorCoor(Coordinate &coor) { return 0;}
|
int CartesianCommunicator::RankFromProcessorCoor(Coordinate &coor) { return 0;}
|
||||||
void CartesianCommunicator::ProcessorCoorFromRank(int rank, Coordinate &coor){ coor = _processor_coor; }
|
void CartesianCommunicator::ProcessorCoorFromRank(int rank, Coordinate &coor){ coor = _processor_coor; }
|
||||||
void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest)
|
void CartesianCommunicator::ShiftedRanks(int dim,int shift,int &source,int &dest)
|
||||||
|
@ -523,7 +523,7 @@ void GlobalSharedMemory::SharedMemoryAllocate(uint64_t bytes, int flags)
|
|||||||
}
|
}
|
||||||
if ( WorldRank == 0 ){
|
if ( WorldRank == 0 ){
|
||||||
std::cout << WorldRank << header " SharedMemoryMPI.cc acceleratorAllocDevice "<< bytes
|
std::cout << WorldRank << header " SharedMemoryMPI.cc acceleratorAllocDevice "<< bytes
|
||||||
<< "bytes at "<< std::hex<< ShmCommBuf <<std::dec<<" for comms buffers " <<std::endl;
|
<< "bytes at "<< std::hex<< ShmCommBuf << " - "<<(bytes-1+(uint64_t)ShmCommBuf) <<std::dec<<" for comms buffers " <<std::endl;
|
||||||
}
|
}
|
||||||
SharedMemoryZero(ShmCommBuf,bytes);
|
SharedMemoryZero(ShmCommBuf,bytes);
|
||||||
std::cout<< "Setting up IPC"<<std::endl;
|
std::cout<< "Setting up IPC"<<std::endl;
|
||||||
|
@ -36,6 +36,7 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
template<class obj1,class obj2,class obj3> inline
|
template<class obj1,class obj2,class obj3> inline
|
||||||
void mult(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
|
void mult(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
|
||||||
|
GRID_TRACE("mult");
|
||||||
ret.Checkerboard() = lhs.Checkerboard();
|
ret.Checkerboard() = lhs.Checkerboard();
|
||||||
autoView( ret_v , ret, AcceleratorWrite);
|
autoView( ret_v , ret, AcceleratorWrite);
|
||||||
autoView( lhs_v , lhs, AcceleratorRead);
|
autoView( lhs_v , lhs, AcceleratorRead);
|
||||||
@ -53,6 +54,7 @@ void mult(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
|
|||||||
|
|
||||||
template<class obj1,class obj2,class obj3> inline
|
template<class obj1,class obj2,class obj3> inline
|
||||||
void mac(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
|
void mac(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
|
||||||
|
GRID_TRACE("mac");
|
||||||
ret.Checkerboard() = lhs.Checkerboard();
|
ret.Checkerboard() = lhs.Checkerboard();
|
||||||
conformable(ret,rhs);
|
conformable(ret,rhs);
|
||||||
conformable(lhs,rhs);
|
conformable(lhs,rhs);
|
||||||
@ -70,6 +72,7 @@ void mac(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
|
|||||||
|
|
||||||
template<class obj1,class obj2,class obj3> inline
|
template<class obj1,class obj2,class obj3> inline
|
||||||
void sub(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
|
void sub(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
|
||||||
|
GRID_TRACE("sub");
|
||||||
ret.Checkerboard() = lhs.Checkerboard();
|
ret.Checkerboard() = lhs.Checkerboard();
|
||||||
conformable(ret,rhs);
|
conformable(ret,rhs);
|
||||||
conformable(lhs,rhs);
|
conformable(lhs,rhs);
|
||||||
@ -86,6 +89,7 @@ void sub(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
|
|||||||
}
|
}
|
||||||
template<class obj1,class obj2,class obj3> inline
|
template<class obj1,class obj2,class obj3> inline
|
||||||
void add(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
|
void add(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
|
||||||
|
GRID_TRACE("add");
|
||||||
ret.Checkerboard() = lhs.Checkerboard();
|
ret.Checkerboard() = lhs.Checkerboard();
|
||||||
conformable(ret,rhs);
|
conformable(ret,rhs);
|
||||||
conformable(lhs,rhs);
|
conformable(lhs,rhs);
|
||||||
@ -106,6 +110,7 @@ void add(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const Lattice<obj3> &rhs){
|
|||||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
template<class obj1,class obj2,class obj3> inline
|
template<class obj1,class obj2,class obj3> inline
|
||||||
void mult(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
|
void mult(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
|
||||||
|
GRID_TRACE("mult");
|
||||||
ret.Checkerboard() = lhs.Checkerboard();
|
ret.Checkerboard() = lhs.Checkerboard();
|
||||||
conformable(lhs,ret);
|
conformable(lhs,ret);
|
||||||
autoView( ret_v , ret, AcceleratorWrite);
|
autoView( ret_v , ret, AcceleratorWrite);
|
||||||
@ -119,6 +124,7 @@ void mult(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
|
|||||||
|
|
||||||
template<class obj1,class obj2,class obj3> inline
|
template<class obj1,class obj2,class obj3> inline
|
||||||
void mac(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
|
void mac(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
|
||||||
|
GRID_TRACE("mac");
|
||||||
ret.Checkerboard() = lhs.Checkerboard();
|
ret.Checkerboard() = lhs.Checkerboard();
|
||||||
conformable(ret,lhs);
|
conformable(ret,lhs);
|
||||||
autoView( ret_v , ret, AcceleratorWrite);
|
autoView( ret_v , ret, AcceleratorWrite);
|
||||||
@ -133,6 +139,7 @@ void mac(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
|
|||||||
|
|
||||||
template<class obj1,class obj2,class obj3> inline
|
template<class obj1,class obj2,class obj3> inline
|
||||||
void sub(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
|
void sub(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
|
||||||
|
GRID_TRACE("sub");
|
||||||
ret.Checkerboard() = lhs.Checkerboard();
|
ret.Checkerboard() = lhs.Checkerboard();
|
||||||
conformable(ret,lhs);
|
conformable(ret,lhs);
|
||||||
autoView( ret_v , ret, AcceleratorWrite);
|
autoView( ret_v , ret, AcceleratorWrite);
|
||||||
@ -146,6 +153,7 @@ void sub(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
|
|||||||
}
|
}
|
||||||
template<class obj1,class obj2,class obj3> inline
|
template<class obj1,class obj2,class obj3> inline
|
||||||
void add(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
|
void add(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
|
||||||
|
GRID_TRACE("add");
|
||||||
ret.Checkerboard() = lhs.Checkerboard();
|
ret.Checkerboard() = lhs.Checkerboard();
|
||||||
conformable(lhs,ret);
|
conformable(lhs,ret);
|
||||||
autoView( ret_v , ret, AcceleratorWrite);
|
autoView( ret_v , ret, AcceleratorWrite);
|
||||||
@ -163,6 +171,7 @@ void add(Lattice<obj1> &ret,const Lattice<obj2> &lhs,const obj3 &rhs){
|
|||||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
template<class obj1,class obj2,class obj3> inline
|
template<class obj1,class obj2,class obj3> inline
|
||||||
void mult(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
|
void mult(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
|
||||||
|
GRID_TRACE("mult");
|
||||||
ret.Checkerboard() = rhs.Checkerboard();
|
ret.Checkerboard() = rhs.Checkerboard();
|
||||||
conformable(ret,rhs);
|
conformable(ret,rhs);
|
||||||
autoView( ret_v , ret, AcceleratorWrite);
|
autoView( ret_v , ret, AcceleratorWrite);
|
||||||
@ -177,6 +186,7 @@ void mult(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
|
|||||||
|
|
||||||
template<class obj1,class obj2,class obj3> inline
|
template<class obj1,class obj2,class obj3> inline
|
||||||
void mac(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
|
void mac(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
|
||||||
|
GRID_TRACE("mac");
|
||||||
ret.Checkerboard() = rhs.Checkerboard();
|
ret.Checkerboard() = rhs.Checkerboard();
|
||||||
conformable(ret,rhs);
|
conformable(ret,rhs);
|
||||||
autoView( ret_v , ret, AcceleratorWrite);
|
autoView( ret_v , ret, AcceleratorWrite);
|
||||||
@ -191,6 +201,7 @@ void mac(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
|
|||||||
|
|
||||||
template<class obj1,class obj2,class obj3> inline
|
template<class obj1,class obj2,class obj3> inline
|
||||||
void sub(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
|
void sub(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
|
||||||
|
GRID_TRACE("sub");
|
||||||
ret.Checkerboard() = rhs.Checkerboard();
|
ret.Checkerboard() = rhs.Checkerboard();
|
||||||
conformable(ret,rhs);
|
conformable(ret,rhs);
|
||||||
autoView( ret_v , ret, AcceleratorWrite);
|
autoView( ret_v , ret, AcceleratorWrite);
|
||||||
@ -204,6 +215,7 @@ void sub(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
|
|||||||
}
|
}
|
||||||
template<class obj1,class obj2,class obj3> inline
|
template<class obj1,class obj2,class obj3> inline
|
||||||
void add(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
|
void add(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
|
||||||
|
GRID_TRACE("add");
|
||||||
ret.Checkerboard() = rhs.Checkerboard();
|
ret.Checkerboard() = rhs.Checkerboard();
|
||||||
conformable(ret,rhs);
|
conformable(ret,rhs);
|
||||||
autoView( ret_v , ret, AcceleratorWrite);
|
autoView( ret_v , ret, AcceleratorWrite);
|
||||||
@ -218,6 +230,7 @@ void add(Lattice<obj1> &ret,const obj2 &lhs,const Lattice<obj3> &rhs){
|
|||||||
|
|
||||||
template<class sobj,class vobj> inline
|
template<class sobj,class vobj> inline
|
||||||
void axpy(Lattice<vobj> &ret,sobj a,const Lattice<vobj> &x,const Lattice<vobj> &y){
|
void axpy(Lattice<vobj> &ret,sobj a,const Lattice<vobj> &x,const Lattice<vobj> &y){
|
||||||
|
GRID_TRACE("axpy");
|
||||||
ret.Checkerboard() = x.Checkerboard();
|
ret.Checkerboard() = x.Checkerboard();
|
||||||
conformable(ret,x);
|
conformable(ret,x);
|
||||||
conformable(x,y);
|
conformable(x,y);
|
||||||
@ -231,6 +244,7 @@ void axpy(Lattice<vobj> &ret,sobj a,const Lattice<vobj> &x,const Lattice<vobj> &
|
|||||||
}
|
}
|
||||||
template<class sobj,class vobj> inline
|
template<class sobj,class vobj> inline
|
||||||
void axpby(Lattice<vobj> &ret,sobj a,sobj b,const Lattice<vobj> &x,const Lattice<vobj> &y){
|
void axpby(Lattice<vobj> &ret,sobj a,sobj b,const Lattice<vobj> &x,const Lattice<vobj> &y){
|
||||||
|
GRID_TRACE("axpby");
|
||||||
ret.Checkerboard() = x.Checkerboard();
|
ret.Checkerboard() = x.Checkerboard();
|
||||||
conformable(ret,x);
|
conformable(ret,x);
|
||||||
conformable(x,y);
|
conformable(x,y);
|
||||||
@ -246,11 +260,13 @@ void axpby(Lattice<vobj> &ret,sobj a,sobj b,const Lattice<vobj> &x,const Lattice
|
|||||||
template<class sobj,class vobj> inline
|
template<class sobj,class vobj> inline
|
||||||
RealD axpy_norm(Lattice<vobj> &ret,sobj a,const Lattice<vobj> &x,const Lattice<vobj> &y)
|
RealD axpy_norm(Lattice<vobj> &ret,sobj a,const Lattice<vobj> &x,const Lattice<vobj> &y)
|
||||||
{
|
{
|
||||||
|
GRID_TRACE("axpy_norm");
|
||||||
return axpy_norm_fast(ret,a,x,y);
|
return axpy_norm_fast(ret,a,x,y);
|
||||||
}
|
}
|
||||||
template<class sobj,class vobj> inline
|
template<class sobj,class vobj> inline
|
||||||
RealD axpby_norm(Lattice<vobj> &ret,sobj a,sobj b,const Lattice<vobj> &x,const Lattice<vobj> &y)
|
RealD axpby_norm(Lattice<vobj> &ret,sobj a,sobj b,const Lattice<vobj> &x,const Lattice<vobj> &y)
|
||||||
{
|
{
|
||||||
|
GRID_TRACE("axpby_norm");
|
||||||
return axpby_norm_fast(ret,a,b,x,y);
|
return axpby_norm_fast(ret,a,b,x,y);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -117,6 +117,7 @@ public:
|
|||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
template <typename Op, typename T1> inline Lattice<vobj> & operator=(const LatticeUnaryExpression<Op,T1> &expr)
|
template <typename Op, typename T1> inline Lattice<vobj> & operator=(const LatticeUnaryExpression<Op,T1> &expr)
|
||||||
{
|
{
|
||||||
|
GRID_TRACE("ExpressionTemplateEval");
|
||||||
GridBase *egrid(nullptr);
|
GridBase *egrid(nullptr);
|
||||||
GridFromExpression(egrid,expr);
|
GridFromExpression(egrid,expr);
|
||||||
assert(egrid!=nullptr);
|
assert(egrid!=nullptr);
|
||||||
@ -140,6 +141,7 @@ public:
|
|||||||
}
|
}
|
||||||
template <typename Op, typename T1,typename T2> inline Lattice<vobj> & operator=(const LatticeBinaryExpression<Op,T1,T2> &expr)
|
template <typename Op, typename T1,typename T2> inline Lattice<vobj> & operator=(const LatticeBinaryExpression<Op,T1,T2> &expr)
|
||||||
{
|
{
|
||||||
|
GRID_TRACE("ExpressionTemplateEval");
|
||||||
GridBase *egrid(nullptr);
|
GridBase *egrid(nullptr);
|
||||||
GridFromExpression(egrid,expr);
|
GridFromExpression(egrid,expr);
|
||||||
assert(egrid!=nullptr);
|
assert(egrid!=nullptr);
|
||||||
@ -163,6 +165,7 @@ public:
|
|||||||
}
|
}
|
||||||
template <typename Op, typename T1,typename T2,typename T3> inline Lattice<vobj> & operator=(const LatticeTrinaryExpression<Op,T1,T2,T3> &expr)
|
template <typename Op, typename T1,typename T2,typename T3> inline Lattice<vobj> & operator=(const LatticeTrinaryExpression<Op,T1,T2,T3> &expr)
|
||||||
{
|
{
|
||||||
|
GRID_TRACE("ExpressionTemplateEval");
|
||||||
GridBase *egrid(nullptr);
|
GridBase *egrid(nullptr);
|
||||||
GridFromExpression(egrid,expr);
|
GridFromExpression(egrid,expr);
|
||||||
assert(egrid!=nullptr);
|
assert(egrid!=nullptr);
|
||||||
|
@ -91,10 +91,7 @@ inline typename vobj::scalar_objectD sumD_cpu(const vobj *arg, Integer osites)
|
|||||||
for(int i=0;i<nthread;i++){
|
for(int i=0;i<nthread;i++){
|
||||||
ssum = ssum+sumarray[i];
|
ssum = ssum+sumarray[i];
|
||||||
}
|
}
|
||||||
|
return ssum;
|
||||||
typedef typename vobj::scalar_object ssobj;
|
|
||||||
ssobj ret = ssum;
|
|
||||||
return ret;
|
|
||||||
}
|
}
|
||||||
/*
|
/*
|
||||||
Threaded max, don't use for now
|
Threaded max, don't use for now
|
||||||
@ -488,6 +485,14 @@ template<class vobj> inline void sliceSum(const Lattice<vobj> &Data,std::vector<
|
|||||||
int words = fd*sizeof(sobj)/sizeof(scalar_type);
|
int words = fd*sizeof(sobj)/sizeof(scalar_type);
|
||||||
grid->GlobalSumVector(ptr, words);
|
grid->GlobalSumVector(ptr, words);
|
||||||
}
|
}
|
||||||
|
template<class vobj> inline
|
||||||
|
std::vector<typename vobj::scalar_object>
|
||||||
|
sliceSum(const Lattice<vobj> &Data,int orthogdim)
|
||||||
|
{
|
||||||
|
std::vector<typename vobj::scalar_object> result;
|
||||||
|
sliceSum(Data,result,orthogdim);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
template<class vobj>
|
template<class vobj>
|
||||||
static void sliceInnerProductVector( std::vector<ComplexD> & result, const Lattice<vobj> &lhs,const Lattice<vobj> &rhs,int orthogdim)
|
static void sliceInnerProductVector( std::vector<ComplexD> & result, const Lattice<vobj> &lhs,const Lattice<vobj> &rhs,int orthogdim)
|
||||||
|
@ -68,6 +68,7 @@ GridLogger GridLogMessage(1, "Message", GridLogColours, "NORMAL");
|
|||||||
GridLogger GridLogMemory (1, "Memory", GridLogColours, "NORMAL");
|
GridLogger GridLogMemory (1, "Memory", GridLogColours, "NORMAL");
|
||||||
GridLogger GridLogDebug (1, "Debug", GridLogColours, "PURPLE");
|
GridLogger GridLogDebug (1, "Debug", GridLogColours, "PURPLE");
|
||||||
GridLogger GridLogPerformance(1, "Performance", GridLogColours, "GREEN");
|
GridLogger GridLogPerformance(1, "Performance", GridLogColours, "GREEN");
|
||||||
|
GridLogger GridLogDslash (1, "Dslash", GridLogColours, "BLUE");
|
||||||
GridLogger GridLogIterative (1, "Iterative", GridLogColours, "BLUE");
|
GridLogger GridLogIterative (1, "Iterative", GridLogColours, "BLUE");
|
||||||
GridLogger GridLogIntegrator (1, "Integrator", GridLogColours, "BLUE");
|
GridLogger GridLogIntegrator (1, "Integrator", GridLogColours, "BLUE");
|
||||||
GridLogger GridLogHMC (1, "HMC", GridLogColours, "BLUE");
|
GridLogger GridLogHMC (1, "HMC", GridLogColours, "BLUE");
|
||||||
@ -80,6 +81,7 @@ void GridLogConfigure(std::vector<std::string> &logstreams) {
|
|||||||
GridLogIterative.Active(0);
|
GridLogIterative.Active(0);
|
||||||
GridLogDebug.Active(0);
|
GridLogDebug.Active(0);
|
||||||
GridLogPerformance.Active(0);
|
GridLogPerformance.Active(0);
|
||||||
|
GridLogDslash.Active(0);
|
||||||
GridLogIntegrator.Active(1);
|
GridLogIntegrator.Active(1);
|
||||||
GridLogColours.Active(0);
|
GridLogColours.Active(0);
|
||||||
GridLogHMC.Active(1);
|
GridLogHMC.Active(1);
|
||||||
@ -91,6 +93,7 @@ void GridLogConfigure(std::vector<std::string> &logstreams) {
|
|||||||
if (logstreams[i] == std::string("Iterative")) GridLogIterative.Active(1);
|
if (logstreams[i] == std::string("Iterative")) GridLogIterative.Active(1);
|
||||||
if (logstreams[i] == std::string("Debug")) GridLogDebug.Active(1);
|
if (logstreams[i] == std::string("Debug")) GridLogDebug.Active(1);
|
||||||
if (logstreams[i] == std::string("Performance")) GridLogPerformance.Active(1);
|
if (logstreams[i] == std::string("Performance")) GridLogPerformance.Active(1);
|
||||||
|
if (logstreams[i] == std::string("Dslash")) GridLogDslash.Active(1);
|
||||||
if (logstreams[i] == std::string("NoIntegrator")) GridLogIntegrator.Active(0);
|
if (logstreams[i] == std::string("NoIntegrator")) GridLogIntegrator.Active(0);
|
||||||
if (logstreams[i] == std::string("NoHMC")) GridLogHMC.Active(0);
|
if (logstreams[i] == std::string("NoHMC")) GridLogHMC.Active(0);
|
||||||
if (logstreams[i] == std::string("Colours")) GridLogColours.Active(1);
|
if (logstreams[i] == std::string("Colours")) GridLogColours.Active(1);
|
||||||
|
@ -138,7 +138,8 @@ public:
|
|||||||
stream << std::setw(log.topWidth);
|
stream << std::setw(log.topWidth);
|
||||||
}
|
}
|
||||||
stream << log.topName << log.background()<< " : ";
|
stream << log.topName << log.background()<< " : ";
|
||||||
stream << log.colour() << std::left;
|
// stream << log.colour() << std::left;
|
||||||
|
stream << std::left;
|
||||||
if (log.chanWidth > 0)
|
if (log.chanWidth > 0)
|
||||||
{
|
{
|
||||||
stream << std::setw(log.chanWidth);
|
stream << std::setw(log.chanWidth);
|
||||||
@ -153,9 +154,9 @@ public:
|
|||||||
stream << log.evidence()
|
stream << log.evidence()
|
||||||
<< now << log.background() << " : " ;
|
<< now << log.background() << " : " ;
|
||||||
}
|
}
|
||||||
stream << log.colour();
|
// stream << log.colour();
|
||||||
|
stream << std::right;
|
||||||
stream.flags(f);
|
stream.flags(f);
|
||||||
|
|
||||||
return stream;
|
return stream;
|
||||||
} else {
|
} else {
|
||||||
return devnull;
|
return devnull;
|
||||||
@ -180,6 +181,7 @@ extern GridLogger GridLogWarning;
|
|||||||
extern GridLogger GridLogMessage;
|
extern GridLogger GridLogMessage;
|
||||||
extern GridLogger GridLogDebug ;
|
extern GridLogger GridLogDebug ;
|
||||||
extern GridLogger GridLogPerformance;
|
extern GridLogger GridLogPerformance;
|
||||||
|
extern GridLogger GridLogDslash;
|
||||||
extern GridLogger GridLogIterative ;
|
extern GridLogger GridLogIterative ;
|
||||||
extern GridLogger GridLogIntegrator ;
|
extern GridLogger GridLogIntegrator ;
|
||||||
extern GridLogger GridLogHMC;
|
extern GridLogger GridLogHMC;
|
||||||
|
@ -27,10 +27,13 @@ Author: paboyle <paboyle@ph.ed.ac.uk>
|
|||||||
/* END LEGAL */
|
/* END LEGAL */
|
||||||
|
|
||||||
#include <Grid/GridCore.h>
|
#include <Grid/GridCore.h>
|
||||||
#include <Grid/perfmon/PerfCount.h>
|
|
||||||
|
|
||||||
|
#include <Grid/perfmon/Timer.h>
|
||||||
|
#include <Grid/perfmon/PerfCount.h>
|
||||||
NAMESPACE_BEGIN(Grid);
|
NAMESPACE_BEGIN(Grid);
|
||||||
|
|
||||||
|
GridTimePoint theProgramStart = GridClock::now();
|
||||||
|
|
||||||
#define CacheControl(L,O,R) ((PERF_COUNT_HW_CACHE_##L)|(PERF_COUNT_HW_CACHE_OP_##O<<8)| (PERF_COUNT_HW_CACHE_RESULT_##R<<16))
|
#define CacheControl(L,O,R) ((PERF_COUNT_HW_CACHE_##L)|(PERF_COUNT_HW_CACHE_OP_##O<<8)| (PERF_COUNT_HW_CACHE_RESULT_##R<<16))
|
||||||
#define RawConfig(A,B) (A<<8|B)
|
#define RawConfig(A,B) (A<<8|B)
|
||||||
const PerformanceCounter::PerformanceCounterConfig PerformanceCounter::PerformanceCounterConfigs [] = {
|
const PerformanceCounter::PerformanceCounterConfig PerformanceCounter::PerformanceCounterConfigs [] = {
|
||||||
|
@ -35,17 +35,8 @@ Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
|||||||
|
|
||||||
NAMESPACE_BEGIN(Grid)
|
NAMESPACE_BEGIN(Grid)
|
||||||
|
|
||||||
// Dress the output; use std::chrono
|
//typedef std::chrono::system_clock GridClock;
|
||||||
// C++11 time facilities better?
|
typedef std::chrono::high_resolution_clock GridClock;
|
||||||
inline double usecond(void) {
|
|
||||||
struct timeval tv;
|
|
||||||
tv.tv_sec = 0;
|
|
||||||
tv.tv_usec = 0;
|
|
||||||
gettimeofday(&tv,NULL);
|
|
||||||
return 1.0*tv.tv_usec + 1.0e6*tv.tv_sec;
|
|
||||||
}
|
|
||||||
|
|
||||||
typedef std::chrono::system_clock GridClock;
|
|
||||||
typedef std::chrono::time_point<GridClock> GridTimePoint;
|
typedef std::chrono::time_point<GridClock> GridTimePoint;
|
||||||
|
|
||||||
typedef std::chrono::seconds GridSecs;
|
typedef std::chrono::seconds GridSecs;
|
||||||
@ -53,6 +44,15 @@ typedef std::chrono::milliseconds GridMillisecs;
|
|||||||
typedef std::chrono::microseconds GridUsecs;
|
typedef std::chrono::microseconds GridUsecs;
|
||||||
typedef std::chrono::microseconds GridTime;
|
typedef std::chrono::microseconds GridTime;
|
||||||
|
|
||||||
|
extern GridTimePoint theProgramStart;
|
||||||
|
// Dress the output; use std::chrono
|
||||||
|
// C++11 time facilities better?
|
||||||
|
inline double usecond(void) {
|
||||||
|
auto usecs = std::chrono::duration_cast<GridUsecs>(GridClock::now()-theProgramStart);
|
||||||
|
return 1.0*usecs.count();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
inline std::ostream& operator<< (std::ostream & stream, const GridSecs & time)
|
inline std::ostream& operator<< (std::ostream & stream, const GridSecs & time)
|
||||||
{
|
{
|
||||||
stream << time.count()<<" s";
|
stream << time.count()<<" s";
|
||||||
|
66
Grid/perfmon/Tracing.h
Normal file
66
Grid/perfmon/Tracing.h
Normal file
@ -0,0 +1,66 @@
|
|||||||
|
#pragma once
|
||||||
|
#ifdef GRID_TRACING_NVTX
|
||||||
|
#include <nvToolsExt.h>
|
||||||
|
class GridTracer {
|
||||||
|
public:
|
||||||
|
GridTracer(const char* name) {
|
||||||
|
nvtxRangePushA(name);
|
||||||
|
}
|
||||||
|
~GridTracer() {
|
||||||
|
nvtxRangePop();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
inline void tracePush(const char *name) { nvtxRangePushA(name); }
|
||||||
|
inline void tracePop(const char *name) { nvtxRangePop(); }
|
||||||
|
inline int traceStart(const char *name) { }
|
||||||
|
inline void traceStop(int ID) { }
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef GRID_TRACING_ROCTX
|
||||||
|
#include <roctracer/roctx.h>
|
||||||
|
class GridTracer {
|
||||||
|
public:
|
||||||
|
GridTracer(const char* name) {
|
||||||
|
roctxRangePushA(name);
|
||||||
|
std::cout << "roctxRangePush "<<name<<std::endl;
|
||||||
|
}
|
||||||
|
~GridTracer() {
|
||||||
|
roctxRangePop();
|
||||||
|
std::cout << "roctxRangePop "<<std::endl;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
inline void tracePush(const char *name) { roctxRangePushA(name); }
|
||||||
|
inline void tracePop(const char *name) { roctxRangePop(); }
|
||||||
|
inline int traceStart(const char *name) { roctxRangeStart(name); }
|
||||||
|
inline void traceStop(int ID) { roctxRangeStop(ID); }
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef GRID_TRACING_TIMER
|
||||||
|
class GridTracer {
|
||||||
|
public:
|
||||||
|
const char *name;
|
||||||
|
double elapsed;
|
||||||
|
GridTracer(const char* _name) {
|
||||||
|
name = _name;
|
||||||
|
elapsed=-usecond();
|
||||||
|
}
|
||||||
|
~GridTracer() {
|
||||||
|
elapsed+=usecond();
|
||||||
|
std::cout << GridLogTracing << name << " took " <<elapsed<< " us" <<std::endl;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
inline void tracePush(const char *name) { }
|
||||||
|
inline void tracePop(const char *name) { }
|
||||||
|
inline int traceStart(const char *name) { return 0; }
|
||||||
|
inline void traceStop(int ID) { }
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef GRID_TRACING_NONE
|
||||||
|
#define GRID_TRACE(name)
|
||||||
|
inline void tracePush(const char *name) { }
|
||||||
|
inline void tracePop(const char *name) { }
|
||||||
|
inline int traceStart(const char *name) { return 0; }
|
||||||
|
inline void traceStop(int ID) { }
|
||||||
|
#else
|
||||||
|
#define GRID_TRACE(name) GridTracer uniq_name_using_macros##__COUNTER__(name);
|
||||||
|
#endif
|
@ -42,6 +42,8 @@ public:
|
|||||||
bool is_smeared = false;
|
bool is_smeared = false;
|
||||||
RealD deriv_norm_sum;
|
RealD deriv_norm_sum;
|
||||||
RealD deriv_max_sum;
|
RealD deriv_max_sum;
|
||||||
|
RealD Fdt_norm_sum;
|
||||||
|
RealD Fdt_max_sum;
|
||||||
int deriv_num;
|
int deriv_num;
|
||||||
RealD deriv_us;
|
RealD deriv_us;
|
||||||
RealD S_us;
|
RealD S_us;
|
||||||
@ -50,13 +52,21 @@ public:
|
|||||||
deriv_us = S_us = refresh_us = 0.0;
|
deriv_us = S_us = refresh_us = 0.0;
|
||||||
deriv_num=0;
|
deriv_num=0;
|
||||||
deriv_norm_sum = deriv_max_sum=0.0;
|
deriv_norm_sum = deriv_max_sum=0.0;
|
||||||
|
Fdt_max_sum = Fdt_norm_sum = 0.0;
|
||||||
}
|
}
|
||||||
void deriv_log(RealD nrm, RealD max) { deriv_max_sum+=max; deriv_norm_sum+=nrm; deriv_num++;}
|
void deriv_log(RealD nrm, RealD max,RealD Fdt_nrm,RealD Fdt_max) {
|
||||||
RealD deriv_max_average(void) { return deriv_max_sum/deriv_num; };
|
deriv_max_sum+=max;
|
||||||
RealD deriv_norm_average(void) { return deriv_norm_sum/deriv_num; };
|
deriv_norm_sum+=nrm;
|
||||||
|
Fdt_max_sum+=Fdt_max;
|
||||||
|
Fdt_norm_sum+=Fdt_nrm; deriv_num++;
|
||||||
|
}
|
||||||
|
RealD deriv_max_average(void) { return deriv_max_sum/deriv_num; };
|
||||||
|
RealD deriv_norm_average(void) { return deriv_norm_sum/deriv_num; };
|
||||||
|
RealD Fdt_max_average(void) { return Fdt_max_sum/deriv_num; };
|
||||||
|
RealD Fdt_norm_average(void) { return Fdt_norm_sum/deriv_num; };
|
||||||
RealD deriv_timer(void) { return deriv_us; };
|
RealD deriv_timer(void) { return deriv_us; };
|
||||||
RealD S_timer(void) { return deriv_us; };
|
RealD S_timer(void) { return S_us; };
|
||||||
RealD refresh_timer(void) { return deriv_us; };
|
RealD refresh_timer(void) { return refresh_us; };
|
||||||
void deriv_timer_start(void) { deriv_us-=usecond(); }
|
void deriv_timer_start(void) { deriv_us-=usecond(); }
|
||||||
void deriv_timer_stop(void) { deriv_us+=usecond(); }
|
void deriv_timer_stop(void) { deriv_us+=usecond(); }
|
||||||
void refresh_timer_start(void) { refresh_us-=usecond(); }
|
void refresh_timer_start(void) { refresh_us-=usecond(); }
|
||||||
@ -66,6 +76,7 @@ public:
|
|||||||
// Heatbath?
|
// Heatbath?
|
||||||
virtual void refresh(const GaugeField& U, GridSerialRNG &sRNG, GridParallelRNG& pRNG) = 0; // refresh pseudofermions
|
virtual void refresh(const GaugeField& U, GridSerialRNG &sRNG, GridParallelRNG& pRNG) = 0; // refresh pseudofermions
|
||||||
virtual RealD S(const GaugeField& U) = 0; // evaluate the action
|
virtual RealD S(const GaugeField& U) = 0; // evaluate the action
|
||||||
|
virtual RealD Sinitial(const GaugeField& U) { return this->S(U); } ; // if the refresh computes the action, can cache it. Alternately refreshAndAction() ?
|
||||||
virtual void deriv(const GaugeField& U, GaugeField& dSdU) = 0; // evaluate the action derivative
|
virtual void deriv(const GaugeField& U, GaugeField& dSdU) = 0; // evaluate the action derivative
|
||||||
virtual std::string action_name() = 0; // return the action name
|
virtual std::string action_name() = 0; // return the action name
|
||||||
virtual std::string LogParameters() = 0; // prints action parameters
|
virtual std::string LogParameters() = 0; // prints action parameters
|
||||||
|
@ -39,7 +39,7 @@ struct GparityWilsonImplParams {
|
|||||||
Coordinate twists;
|
Coordinate twists;
|
||||||
//mu=Nd-1 is assumed to be the time direction and a twist value of 1 indicates antiperiodic BCs
|
//mu=Nd-1 is assumed to be the time direction and a twist value of 1 indicates antiperiodic BCs
|
||||||
Coordinate dirichlet; // Blocksize of dirichlet BCs
|
Coordinate dirichlet; // Blocksize of dirichlet BCs
|
||||||
GparityWilsonImplParams() : twists(Nd, 0), dirichlet(Nd, 0) {};
|
GparityWilsonImplParams() : twists(Nd, 0) { dirichlet.resize(0); };
|
||||||
};
|
};
|
||||||
|
|
||||||
struct WilsonImplParams {
|
struct WilsonImplParams {
|
||||||
@ -48,13 +48,13 @@ struct WilsonImplParams {
|
|||||||
AcceleratorVector<Real,Nd> twist_n_2pi_L;
|
AcceleratorVector<Real,Nd> twist_n_2pi_L;
|
||||||
AcceleratorVector<Complex,Nd> boundary_phases;
|
AcceleratorVector<Complex,Nd> boundary_phases;
|
||||||
WilsonImplParams() {
|
WilsonImplParams() {
|
||||||
dirichlet.resize(Nd,0);
|
dirichlet.resize(0);
|
||||||
boundary_phases.resize(Nd, 1.0);
|
boundary_phases.resize(Nd, 1.0);
|
||||||
twist_n_2pi_L.resize(Nd, 0.0);
|
twist_n_2pi_L.resize(Nd, 0.0);
|
||||||
};
|
};
|
||||||
WilsonImplParams(const AcceleratorVector<Complex,Nd> phi) : boundary_phases(phi), overlapCommsCompute(false) {
|
WilsonImplParams(const AcceleratorVector<Complex,Nd> phi) : boundary_phases(phi), overlapCommsCompute(false) {
|
||||||
twist_n_2pi_L.resize(Nd, 0.0);
|
twist_n_2pi_L.resize(Nd, 0.0);
|
||||||
dirichlet.resize(Nd,0);
|
dirichlet.resize(0);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -62,7 +62,7 @@ struct StaggeredImplParams {
|
|||||||
Coordinate dirichlet; // Blocksize of dirichlet BCs
|
Coordinate dirichlet; // Blocksize of dirichlet BCs
|
||||||
StaggeredImplParams()
|
StaggeredImplParams()
|
||||||
{
|
{
|
||||||
dirichlet.resize(Nd,0);
|
dirichlet.resize(0);
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -183,16 +183,6 @@ public:
|
|||||||
GridRedBlackCartesian &FourDimRedBlackGrid,
|
GridRedBlackCartesian &FourDimRedBlackGrid,
|
||||||
RealD _mass,RealD _M5,const ImplParams &p= ImplParams());
|
RealD _mass,RealD _M5,const ImplParams &p= ImplParams());
|
||||||
|
|
||||||
void CayleyReport(void);
|
|
||||||
void CayleyZeroCounters(void);
|
|
||||||
|
|
||||||
double M5Dflops;
|
|
||||||
double M5Dcalls;
|
|
||||||
double M5Dtime;
|
|
||||||
|
|
||||||
double MooeeInvFlops;
|
|
||||||
double MooeeInvCalls;
|
|
||||||
double MooeeInvTime;
|
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
virtual void SetCoefficientsZolotarev(RealD zolohi,Approx::zolotarev_data *zdata,RealD b,RealD c);
|
virtual void SetCoefficientsZolotarev(RealD zolohi,Approx::zolotarev_data *zdata,RealD b,RealD c);
|
||||||
|
@ -47,18 +47,6 @@ public:
|
|||||||
FermionField _tmp;
|
FermionField _tmp;
|
||||||
FermionField &tmp(void) { return _tmp; }
|
FermionField &tmp(void) { return _tmp; }
|
||||||
|
|
||||||
////////////////////////////////////////
|
|
||||||
// Performance monitoring
|
|
||||||
////////////////////////////////////////
|
|
||||||
void Report(void);
|
|
||||||
void ZeroCounters(void);
|
|
||||||
double DhopTotalTime;
|
|
||||||
double DhopCalls;
|
|
||||||
double DhopCommTime;
|
|
||||||
double DhopComputeTime;
|
|
||||||
double DhopComputeTime2;
|
|
||||||
double DhopFaceTime;
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
// Implement the abstract base
|
// Implement the abstract base
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
|
@ -52,18 +52,6 @@ public:
|
|||||||
FermionField _tmp;
|
FermionField _tmp;
|
||||||
FermionField &tmp(void) { return _tmp; }
|
FermionField &tmp(void) { return _tmp; }
|
||||||
|
|
||||||
////////////////////////////////////////
|
|
||||||
// Performance monitoring
|
|
||||||
////////////////////////////////////////
|
|
||||||
void Report(void);
|
|
||||||
void ZeroCounters(void);
|
|
||||||
double DhopTotalTime;
|
|
||||||
double DhopCalls;
|
|
||||||
double DhopCommTime;
|
|
||||||
double DhopComputeTime;
|
|
||||||
double DhopComputeTime2;
|
|
||||||
double DhopFaceTime;
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
// Implement the abstract base
|
// Implement the abstract base
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
|
@ -47,18 +47,6 @@ public:
|
|||||||
FermionField _tmp;
|
FermionField _tmp;
|
||||||
FermionField &tmp(void) { return _tmp; }
|
FermionField &tmp(void) { return _tmp; }
|
||||||
|
|
||||||
////////////////////////////////////////
|
|
||||||
// Performance monitoring
|
|
||||||
////////////////////////////////////////
|
|
||||||
void Report(void);
|
|
||||||
void ZeroCounters(void);
|
|
||||||
double DhopTotalTime;
|
|
||||||
double DhopCalls;
|
|
||||||
double DhopCommTime;
|
|
||||||
double DhopComputeTime;
|
|
||||||
double DhopComputeTime2;
|
|
||||||
double DhopFaceTime;
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
// Implement the abstract base
|
// Implement the abstract base
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
|
@ -294,11 +294,7 @@ public:
|
|||||||
typedef typename Base::View_type View_type;
|
typedef typename Base::View_type View_type;
|
||||||
typedef typename Base::StencilVector StencilVector;
|
typedef typename Base::StencilVector StencilVector;
|
||||||
|
|
||||||
void ZeroCountersi(void) { }
|
|
||||||
void Reporti(int calls) { }
|
|
||||||
|
|
||||||
// Vector<int> surface_list;
|
// Vector<int> surface_list;
|
||||||
|
|
||||||
WilsonStencil(GridBase *grid,
|
WilsonStencil(GridBase *grid,
|
||||||
int npoints,
|
int npoints,
|
||||||
int checkerboard,
|
int checkerboard,
|
||||||
@ -306,7 +302,6 @@ public:
|
|||||||
const std::vector<int> &distances,Parameters p)
|
const std::vector<int> &distances,Parameters p)
|
||||||
: CartesianStencil<vobj,cobj,Parameters> (grid,npoints,checkerboard,directions,distances,p)
|
: CartesianStencil<vobj,cobj,Parameters> (grid,npoints,checkerboard,directions,distances,p)
|
||||||
{
|
{
|
||||||
ZeroCountersi();
|
|
||||||
// surface_list.resize(0);
|
// surface_list.resize(0);
|
||||||
this->same_node.resize(npoints);
|
this->same_node.resize(npoints);
|
||||||
};
|
};
|
||||||
@ -400,7 +395,6 @@ public:
|
|||||||
}
|
}
|
||||||
this->face_table_computed=1;
|
this->face_table_computed=1;
|
||||||
assert(this->u_comm_offset==this->_unified_buffer_size);
|
assert(this->u_comm_offset==this->_unified_buffer_size);
|
||||||
accelerator_barrier();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
};
|
};
|
||||||
|
@ -74,20 +74,6 @@ public:
|
|||||||
FermionField _tmp;
|
FermionField _tmp;
|
||||||
FermionField &tmp(void) { return _tmp; }
|
FermionField &tmp(void) { return _tmp; }
|
||||||
|
|
||||||
void Report(void);
|
|
||||||
void ZeroCounters(void);
|
|
||||||
double DhopCalls;
|
|
||||||
double DhopCommTime;
|
|
||||||
double DhopComputeTime;
|
|
||||||
double DhopComputeTime2;
|
|
||||||
double DhopFaceTime;
|
|
||||||
double DhopTotalTime;
|
|
||||||
|
|
||||||
double DerivCalls;
|
|
||||||
double DerivCommTime;
|
|
||||||
double DerivComputeTime;
|
|
||||||
double DerivDhopComputeTime;
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////
|
||||||
// override multiply; cut number routines if pass dagger argument
|
// override multiply; cut number routines if pass dagger argument
|
||||||
// and also make interface more uniformly consistent
|
// and also make interface more uniformly consistent
|
||||||
|
@ -78,21 +78,6 @@ public:
|
|||||||
int Dirichlet;
|
int Dirichlet;
|
||||||
Coordinate Block;
|
Coordinate Block;
|
||||||
|
|
||||||
/********** Deprecate timers **********/
|
|
||||||
void Report(void);
|
|
||||||
void ZeroCounters(void);
|
|
||||||
double DhopCalls;
|
|
||||||
double DhopCommTime;
|
|
||||||
double DhopComputeTime;
|
|
||||||
double DhopComputeTime2;
|
|
||||||
double DhopFaceTime;
|
|
||||||
double DhopTotalTime;
|
|
||||||
|
|
||||||
double DerivCalls;
|
|
||||||
double DerivCommTime;
|
|
||||||
double DerivComputeTime;
|
|
||||||
double DerivDhopComputeTime;
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
// Implement the abstract base
|
// Implement the abstract base
|
||||||
///////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////
|
||||||
|
@ -152,58 +152,6 @@ void CayleyFermion5D<Impl>::DminusDag(const FermionField &psi, FermionField &chi
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl> void CayleyFermion5D<Impl>::CayleyReport(void)
|
|
||||||
{
|
|
||||||
this->Report();
|
|
||||||
Coordinate latt = GridDefaultLatt();
|
|
||||||
RealD volume = this->Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt[mu];
|
|
||||||
RealD NP = this->_FourDimGrid->_Nprocessors;
|
|
||||||
if ( M5Dcalls > 0 ) {
|
|
||||||
std::cout << GridLogMessage << "#### M5D calls report " << std::endl;
|
|
||||||
std::cout << GridLogMessage << "CayleyFermion5D Number of M5D Calls : " << M5Dcalls << std::endl;
|
|
||||||
std::cout << GridLogMessage << "CayleyFermion5D ComputeTime/Calls : " << M5Dtime / M5Dcalls << " us" << std::endl;
|
|
||||||
|
|
||||||
// Flops = 10.0*(Nc*Ns) *Ls*vol
|
|
||||||
RealD mflops = 10.0*(Nc*Ns)*volume*M5Dcalls/M5Dtime/2; // 2 for red black counting
|
|
||||||
std::cout << GridLogMessage << "Average mflops/s per call : " << mflops << std::endl;
|
|
||||||
std::cout << GridLogMessage << "Average mflops/s per call per rank : " << mflops/NP << std::endl;
|
|
||||||
|
|
||||||
// Bytes = sizeof(Real) * (Nc*Ns*Nreim) * Ls * vol * (read+write) (/2 for red black counting)
|
|
||||||
// read = 2 ( psi[ss+s+1] and psi[ss+s-1] count as 1 )
|
|
||||||
// write = 1
|
|
||||||
RealD Gbytes = sizeof(Real) * (Nc*Ns*2) * volume * 3 /2. * 1.e-9;
|
|
||||||
std::cout << GridLogMessage << "Average bandwidth (GB/s) : " << Gbytes/M5Dtime*M5Dcalls*1.e6 << std::endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
if ( MooeeInvCalls > 0 ) {
|
|
||||||
|
|
||||||
std::cout << GridLogMessage << "#### MooeeInv calls report " << std::endl;
|
|
||||||
std::cout << GridLogMessage << "CayleyFermion5D Number of MooeeInv Calls : " << MooeeInvCalls << std::endl;
|
|
||||||
std::cout << GridLogMessage << "CayleyFermion5D ComputeTime/Calls : " << MooeeInvTime / MooeeInvCalls << " us" << std::endl;
|
|
||||||
#ifdef GRID_CUDA
|
|
||||||
RealD mflops = ( -16.*Nc*Ns+this->Ls*(1.+18.*Nc*Ns) )*volume*MooeeInvCalls/MooeeInvTime/2; // 2 for red black counting
|
|
||||||
std::cout << GridLogMessage << "Average mflops/s per call : " << mflops << std::endl;
|
|
||||||
std::cout << GridLogMessage << "Average mflops/s per call per rank : " << mflops/NP << std::endl;
|
|
||||||
#else
|
|
||||||
// Flops = MADD * Ls *Ls *4dvol * spin/colour/complex
|
|
||||||
RealD mflops = 2.0*24*this->Ls*volume*MooeeInvCalls/MooeeInvTime/2; // 2 for red black counting
|
|
||||||
std::cout << GridLogMessage << "Average mflops/s per call : " << mflops << std::endl;
|
|
||||||
std::cout << GridLogMessage << "Average mflops/s per call per rank : " << mflops/NP << std::endl;
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
template<class Impl> void CayleyFermion5D<Impl>::CayleyZeroCounters(void)
|
|
||||||
{
|
|
||||||
this->ZeroCounters();
|
|
||||||
M5Dflops=0;
|
|
||||||
M5Dcalls=0;
|
|
||||||
M5Dtime=0;
|
|
||||||
MooeeInvFlops=0;
|
|
||||||
MooeeInvCalls=0;
|
|
||||||
MooeeInvTime=0;
|
|
||||||
}
|
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void CayleyFermion5D<Impl>::M5D (const FermionField &psi, FermionField &chi)
|
void CayleyFermion5D<Impl>::M5D (const FermionField &psi, FermionField &chi)
|
||||||
{
|
{
|
||||||
@ -646,7 +594,6 @@ void CayleyFermion5D<Impl>::ContractConservedCurrent( PropagatorField &q_in_1,
|
|||||||
assert(mass_plus == mass_minus);
|
assert(mass_plus == mass_minus);
|
||||||
RealD mass = mass_plus;
|
RealD mass = mass_plus;
|
||||||
|
|
||||||
#if (!defined(GRID_HIP))
|
|
||||||
Gamma::Algebra Gmu [] = {
|
Gamma::Algebra Gmu [] = {
|
||||||
Gamma::Algebra::GammaX,
|
Gamma::Algebra::GammaX,
|
||||||
Gamma::Algebra::GammaY,
|
Gamma::Algebra::GammaY,
|
||||||
@ -765,7 +712,7 @@ void CayleyFermion5D<Impl>::ContractConservedCurrent( PropagatorField &q_in_1,
|
|||||||
else q_out += C;
|
else q_out += C;
|
||||||
|
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
@ -832,7 +779,6 @@ void CayleyFermion5D<Impl>::SeqConservedCurrent(PropagatorField &q_in,
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if (!defined(GRID_HIP))
|
|
||||||
int tshift = (mu == Nd-1) ? 1 : 0;
|
int tshift = (mu == Nd-1) ? 1 : 0;
|
||||||
unsigned int LLt = GridDefaultLatt()[Tp];
|
unsigned int LLt = GridDefaultLatt()[Tp];
|
||||||
////////////////////////////////////////////////
|
////////////////////////////////////////////////
|
||||||
@ -952,7 +898,6 @@ void CayleyFermion5D<Impl>::SeqConservedCurrent(PropagatorField &q_in,
|
|||||||
|
|
||||||
InsertSlice(L_Q, q_out, s , 0);
|
InsertSlice(L_Q, q_out, s , 0);
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
#undef Pp
|
#undef Pp
|
||||||
#undef Pm
|
#undef Pm
|
||||||
|
@ -63,9 +63,6 @@ CayleyFermion5D<Impl>::M5D(const FermionField &psi_i,
|
|||||||
|
|
||||||
// 10 = 3 complex mult + 2 complex add
|
// 10 = 3 complex mult + 2 complex add
|
||||||
// Flops = 10.0*(Nc*Ns) *Ls*vol (/2 for red black counting)
|
// Flops = 10.0*(Nc*Ns) *Ls*vol (/2 for red black counting)
|
||||||
M5Dcalls++;
|
|
||||||
M5Dtime-=usecond();
|
|
||||||
|
|
||||||
uint64_t nloop = grid->oSites();
|
uint64_t nloop = grid->oSites();
|
||||||
accelerator_for(sss,nloop,Simd::Nsimd(),{
|
accelerator_for(sss,nloop,Simd::Nsimd(),{
|
||||||
uint64_t s = sss%Ls;
|
uint64_t s = sss%Ls;
|
||||||
@ -78,7 +75,6 @@ CayleyFermion5D<Impl>::M5D(const FermionField &psi_i,
|
|||||||
spProj5p(tmp2,psi(idx_l));
|
spProj5p(tmp2,psi(idx_l));
|
||||||
coalescedWrite(chi[ss+s],pdiag[s]*phi(ss+s)+pupper[s]*tmp1+plower[s]*tmp2);
|
coalescedWrite(chi[ss+s],pdiag[s]*phi(ss+s)+pupper[s]*tmp1+plower[s]*tmp2);
|
||||||
});
|
});
|
||||||
M5Dtime+=usecond();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
@ -104,9 +100,6 @@ CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi_i,
|
|||||||
int Ls=this->Ls;
|
int Ls=this->Ls;
|
||||||
|
|
||||||
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
||||||
M5Dcalls++;
|
|
||||||
M5Dtime-=usecond();
|
|
||||||
|
|
||||||
uint64_t nloop = grid->oSites();
|
uint64_t nloop = grid->oSites();
|
||||||
accelerator_for(sss,nloop,Simd::Nsimd(),{
|
accelerator_for(sss,nloop,Simd::Nsimd(),{
|
||||||
uint64_t s = sss%Ls;
|
uint64_t s = sss%Ls;
|
||||||
@ -119,7 +112,6 @@ CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi_i,
|
|||||||
spProj5m(tmp2,psi(idx_l));
|
spProj5m(tmp2,psi(idx_l));
|
||||||
coalescedWrite(chi[ss+s],pdiag[s]*phi(ss+s)+pupper[s]*tmp1+plower[s]*tmp2);
|
coalescedWrite(chi[ss+s],pdiag[s]*phi(ss+s)+pupper[s]*tmp1+plower[s]*tmp2);
|
||||||
});
|
});
|
||||||
M5Dtime+=usecond();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
@ -140,8 +132,6 @@ CayleyFermion5D<Impl>::MooeeInv (const FermionField &psi_i, FermionField &chi
|
|||||||
auto pleem = & leem[0];
|
auto pleem = & leem[0];
|
||||||
auto pueem = & ueem[0];
|
auto pueem = & ueem[0];
|
||||||
|
|
||||||
MooeeInvCalls++;
|
|
||||||
MooeeInvTime-=usecond();
|
|
||||||
uint64_t nloop = grid->oSites()/Ls;
|
uint64_t nloop = grid->oSites()/Ls;
|
||||||
accelerator_for(sss,nloop,Simd::Nsimd(),{
|
accelerator_for(sss,nloop,Simd::Nsimd(),{
|
||||||
uint64_t ss=sss*Ls;
|
uint64_t ss=sss*Ls;
|
||||||
@ -179,8 +169,6 @@ CayleyFermion5D<Impl>::MooeeInv (const FermionField &psi_i, FermionField &chi
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
MooeeInvTime+=usecond();
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
@ -202,10 +190,6 @@ CayleyFermion5D<Impl>::MooeeInvDag (const FermionField &psi_i, FermionField &chi
|
|||||||
|
|
||||||
assert(psi.Checkerboard() == psi.Checkerboard());
|
assert(psi.Checkerboard() == psi.Checkerboard());
|
||||||
|
|
||||||
MooeeInvCalls++;
|
|
||||||
MooeeInvTime-=usecond();
|
|
||||||
|
|
||||||
|
|
||||||
uint64_t nloop = grid->oSites()/Ls;
|
uint64_t nloop = grid->oSites()/Ls;
|
||||||
accelerator_for(sss,nloop,Simd::Nsimd(),{
|
accelerator_for(sss,nloop,Simd::Nsimd(),{
|
||||||
uint64_t ss=sss*Ls;
|
uint64_t ss=sss*Ls;
|
||||||
@ -242,7 +226,6 @@ CayleyFermion5D<Impl>::MooeeInvDag (const FermionField &psi_i, FermionField &chi
|
|||||||
coalescedWrite(chi[ss+s],res);
|
coalescedWrite(chi[ss+s],res);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
MooeeInvTime+=usecond();
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -94,10 +94,6 @@ CayleyFermion5D<Impl>::M5D(const FermionField &psi_i,
|
|||||||
d_p[ss] = diag[s];
|
d_p[ss] = diag[s];
|
||||||
}}
|
}}
|
||||||
|
|
||||||
|
|
||||||
M5Dcalls++;
|
|
||||||
M5Dtime-=usecond();
|
|
||||||
|
|
||||||
assert(Nc==3);
|
assert(Nc==3);
|
||||||
|
|
||||||
thread_loop( (int ss=0;ss<grid->oSites();ss+=LLs),{ // adds LLs
|
thread_loop( (int ss=0;ss<grid->oSites();ss+=LLs),{ // adds LLs
|
||||||
@ -198,7 +194,6 @@ CayleyFermion5D<Impl>::M5D(const FermionField &psi_i,
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
});
|
});
|
||||||
M5Dtime+=usecond();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
@ -242,8 +237,6 @@ CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi_i,
|
|||||||
d_p[ss] = diag[s];
|
d_p[ss] = diag[s];
|
||||||
}}
|
}}
|
||||||
|
|
||||||
M5Dcalls++;
|
|
||||||
M5Dtime-=usecond();
|
|
||||||
thread_loop( (int ss=0;ss<grid->oSites();ss+=LLs),{ // adds LLs
|
thread_loop( (int ss=0;ss<grid->oSites();ss+=LLs),{ // adds LLs
|
||||||
#if 0
|
#if 0
|
||||||
alignas(64) SiteHalfSpinor hp;
|
alignas(64) SiteHalfSpinor hp;
|
||||||
@ -339,7 +332,6 @@ CayleyFermion5D<Impl>::M5Ddag(const FermionField &psi_i,
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
});
|
});
|
||||||
M5Dtime+=usecond();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -813,9 +805,6 @@ CayleyFermion5D<Impl>::MooeeInternal(const FermionField &psi, FermionField &chi,
|
|||||||
}
|
}
|
||||||
assert(_Matp->size()==Ls*LLs);
|
assert(_Matp->size()==Ls*LLs);
|
||||||
|
|
||||||
MooeeInvCalls++;
|
|
||||||
MooeeInvTime-=usecond();
|
|
||||||
|
|
||||||
if ( switcheroo<Coeff_t>::iscomplex() ) {
|
if ( switcheroo<Coeff_t>::iscomplex() ) {
|
||||||
thread_loop( (auto site=0;site<vol;site++),{
|
thread_loop( (auto site=0;site<vol;site++),{
|
||||||
MooeeInternalZAsm(psi,chi,LLs,site,*_Matp,*_Matm);
|
MooeeInternalZAsm(psi,chi,LLs,site,*_Matp,*_Matm);
|
||||||
@ -825,7 +814,7 @@ CayleyFermion5D<Impl>::MooeeInternal(const FermionField &psi, FermionField &chi,
|
|||||||
MooeeInternalAsm(psi,chi,LLs,site,*_Matp,*_Matm);
|
MooeeInternalAsm(psi,chi,LLs,site,*_Matp,*_Matm);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
MooeeInvTime+=usecond();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
NAMESPACE_END(Grid);
|
NAMESPACE_END(Grid);
|
||||||
|
@ -54,8 +54,6 @@ void DomainWallEOFAFermion<Impl>::M5D(const FermionField& psi_i, const FermionFi
|
|||||||
auto pupper = &upper[0];
|
auto pupper = &upper[0];
|
||||||
auto plower = &lower[0];
|
auto plower = &lower[0];
|
||||||
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
||||||
this->M5Dcalls++;
|
|
||||||
this->M5Dtime -= usecond();
|
|
||||||
|
|
||||||
auto nloop=grid->oSites()/Ls;
|
auto nloop=grid->oSites()/Ls;
|
||||||
accelerator_for(sss,nloop,Simd::Nsimd(),{
|
accelerator_for(sss,nloop,Simd::Nsimd(),{
|
||||||
@ -71,7 +69,6 @@ void DomainWallEOFAFermion<Impl>::M5D(const FermionField& psi_i, const FermionFi
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
this->M5Dtime += usecond();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
@ -91,8 +88,6 @@ void DomainWallEOFAFermion<Impl>::M5Ddag(const FermionField& psi_i, const Fermio
|
|||||||
auto plower = &lower[0];
|
auto plower = &lower[0];
|
||||||
|
|
||||||
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
||||||
this->M5Dcalls++;
|
|
||||||
this->M5Dtime -= usecond();
|
|
||||||
|
|
||||||
auto nloop=grid->oSites()/Ls;
|
auto nloop=grid->oSites()/Ls;
|
||||||
accelerator_for(sss,nloop,Simd::Nsimd(),{
|
accelerator_for(sss,nloop,Simd::Nsimd(),{
|
||||||
@ -108,7 +103,6 @@ void DomainWallEOFAFermion<Impl>::M5Ddag(const FermionField& psi_i, const Fermio
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
this->M5Dtime += usecond();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
@ -127,8 +121,6 @@ void DomainWallEOFAFermion<Impl>::MooeeInv(const FermionField& psi_i, FermionFie
|
|||||||
auto pleem = & this->leem[0];
|
auto pleem = & this->leem[0];
|
||||||
auto pueem = & this->ueem[0];
|
auto pueem = & this->ueem[0];
|
||||||
|
|
||||||
this->MooeeInvCalls++;
|
|
||||||
this->MooeeInvTime -= usecond();
|
|
||||||
uint64_t nloop=grid->oSites()/Ls;
|
uint64_t nloop=grid->oSites()/Ls;
|
||||||
accelerator_for(sss,nloop,Simd::Nsimd(),{
|
accelerator_for(sss,nloop,Simd::Nsimd(),{
|
||||||
uint64_t ss=sss*Ls;
|
uint64_t ss=sss*Ls;
|
||||||
@ -164,7 +156,6 @@ void DomainWallEOFAFermion<Impl>::MooeeInv(const FermionField& psi_i, FermionFie
|
|||||||
coalescedWrite(chi[ss+s],res);
|
coalescedWrite(chi[ss+s],res);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
this->MooeeInvTime += usecond();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
@ -185,8 +176,6 @@ void DomainWallEOFAFermion<Impl>::MooeeInvDag(const FermionField& psi_i, Fermion
|
|||||||
|
|
||||||
assert(psi.Checkerboard() == psi.Checkerboard());
|
assert(psi.Checkerboard() == psi.Checkerboard());
|
||||||
|
|
||||||
this->MooeeInvCalls++;
|
|
||||||
this->MooeeInvTime -= usecond();
|
|
||||||
auto nloop = grid->oSites()/Ls;
|
auto nloop = grid->oSites()/Ls;
|
||||||
accelerator_for(sss,nloop,Simd::Nsimd(),{
|
accelerator_for(sss,nloop,Simd::Nsimd(),{
|
||||||
uint64_t ss=sss*Ls;
|
uint64_t ss=sss*Ls;
|
||||||
@ -223,7 +212,6 @@ void DomainWallEOFAFermion<Impl>::MooeeInvDag(const FermionField& psi_i, Fermion
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
this->MooeeInvTime += usecond();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
NAMESPACE_END(Grid);
|
NAMESPACE_END(Grid);
|
||||||
|
@ -298,45 +298,33 @@ void ImprovedStaggeredFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl &
|
|||||||
int LLs = in.Grid()->_rdimensions[0];
|
int LLs = in.Grid()->_rdimensions[0];
|
||||||
int len = U.Grid()->oSites();
|
int len = U.Grid()->oSites();
|
||||||
|
|
||||||
DhopFaceTime-=usecond();
|
|
||||||
st.Prepare();
|
st.Prepare();
|
||||||
st.HaloGather(in,compressor);
|
st.HaloGather(in,compressor);
|
||||||
DhopFaceTime+=usecond();
|
|
||||||
|
|
||||||
DhopCommTime -=usecond();
|
|
||||||
std::vector<std::vector<CommsRequest_t> > requests;
|
std::vector<std::vector<CommsRequest_t> > requests;
|
||||||
st.CommunicateBegin(requests);
|
st.CommunicateBegin(requests);
|
||||||
|
|
||||||
// st.HaloExchangeOptGather(in,compressor); // Wilson compressor
|
// st.HaloExchangeOptGather(in,compressor); // Wilson compressor
|
||||||
DhopFaceTime-=usecond();
|
|
||||||
st.CommsMergeSHM(compressor);// Could do this inside parallel region overlapped with comms
|
st.CommsMergeSHM(compressor);// Could do this inside parallel region overlapped with comms
|
||||||
DhopFaceTime+=usecond();
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Remove explicit thread mapping introduced for OPA reasons.
|
// Remove explicit thread mapping introduced for OPA reasons.
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
DhopComputeTime-=usecond();
|
|
||||||
{
|
{
|
||||||
int interior=1;
|
int interior=1;
|
||||||
int exterior=0;
|
int exterior=0;
|
||||||
Kernels::DhopImproved(st,lo,U,UUU,in,out,dag,interior,exterior);
|
Kernels::DhopImproved(st,lo,U,UUU,in,out,dag,interior,exterior);
|
||||||
}
|
}
|
||||||
DhopComputeTime+=usecond();
|
|
||||||
|
|
||||||
DhopFaceTime-=usecond();
|
|
||||||
st.CommsMerge(compressor);
|
st.CommsMerge(compressor);
|
||||||
DhopFaceTime+=usecond();
|
|
||||||
|
|
||||||
st.CommunicateComplete(requests);
|
st.CommunicateComplete(requests);
|
||||||
DhopCommTime +=usecond();
|
|
||||||
|
|
||||||
DhopComputeTime2-=usecond();
|
|
||||||
{
|
{
|
||||||
int interior=0;
|
int interior=0;
|
||||||
int exterior=1;
|
int exterior=1;
|
||||||
Kernels::DhopImproved(st,lo,U,UUU,in,out,dag,interior,exterior);
|
Kernels::DhopImproved(st,lo,U,UUU,in,out,dag,interior,exterior);
|
||||||
}
|
}
|
||||||
DhopComputeTime2+=usecond();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
@ -347,22 +335,14 @@ void ImprovedStaggeredFermion5D<Impl>::DhopInternalSerialComms(StencilImpl & st,
|
|||||||
Compressor compressor;
|
Compressor compressor;
|
||||||
int LLs = in.Grid()->_rdimensions[0];
|
int LLs = in.Grid()->_rdimensions[0];
|
||||||
|
|
||||||
//double t1=usecond();
|
|
||||||
DhopTotalTime -= usecond();
|
|
||||||
DhopCommTime -= usecond();
|
|
||||||
st.HaloExchange(in,compressor);
|
st.HaloExchange(in,compressor);
|
||||||
DhopCommTime += usecond();
|
|
||||||
|
|
||||||
DhopComputeTime -= usecond();
|
|
||||||
// Dhop takes the 4d grid from U, and makes a 5d index for fermion
|
// Dhop takes the 4d grid from U, and makes a 5d index for fermion
|
||||||
{
|
{
|
||||||
int interior=1;
|
int interior=1;
|
||||||
int exterior=1;
|
int exterior=1;
|
||||||
Kernels::DhopImproved(st,lo,U,UUU,in,out,dag,interior,exterior);
|
Kernels::DhopImproved(st,lo,U,UUU,in,out,dag,interior,exterior);
|
||||||
}
|
}
|
||||||
DhopComputeTime += usecond();
|
|
||||||
DhopTotalTime += usecond();
|
|
||||||
|
|
||||||
}
|
}
|
||||||
/*CHANGE END*/
|
/*CHANGE END*/
|
||||||
|
|
||||||
@ -371,7 +351,6 @@ void ImprovedStaggeredFermion5D<Impl>::DhopInternalSerialComms(StencilImpl & st,
|
|||||||
template<class Impl>
|
template<class Impl>
|
||||||
void ImprovedStaggeredFermion5D<Impl>::DhopOE(const FermionField &in, FermionField &out,int dag)
|
void ImprovedStaggeredFermion5D<Impl>::DhopOE(const FermionField &in, FermionField &out,int dag)
|
||||||
{
|
{
|
||||||
DhopCalls+=1;
|
|
||||||
conformable(in.Grid(),FermionRedBlackGrid()); // verifies half grid
|
conformable(in.Grid(),FermionRedBlackGrid()); // verifies half grid
|
||||||
conformable(in.Grid(),out.Grid()); // drops the cb check
|
conformable(in.Grid(),out.Grid()); // drops the cb check
|
||||||
|
|
||||||
@ -383,7 +362,6 @@ void ImprovedStaggeredFermion5D<Impl>::DhopOE(const FermionField &in, FermionFie
|
|||||||
template<class Impl>
|
template<class Impl>
|
||||||
void ImprovedStaggeredFermion5D<Impl>::DhopEO(const FermionField &in, FermionField &out,int dag)
|
void ImprovedStaggeredFermion5D<Impl>::DhopEO(const FermionField &in, FermionField &out,int dag)
|
||||||
{
|
{
|
||||||
DhopCalls+=1;
|
|
||||||
conformable(in.Grid(),FermionRedBlackGrid()); // verifies half grid
|
conformable(in.Grid(),FermionRedBlackGrid()); // verifies half grid
|
||||||
conformable(in.Grid(),out.Grid()); // drops the cb check
|
conformable(in.Grid(),out.Grid()); // drops the cb check
|
||||||
|
|
||||||
@ -395,7 +373,6 @@ void ImprovedStaggeredFermion5D<Impl>::DhopEO(const FermionField &in, FermionFie
|
|||||||
template<class Impl>
|
template<class Impl>
|
||||||
void ImprovedStaggeredFermion5D<Impl>::Dhop(const FermionField &in, FermionField &out,int dag)
|
void ImprovedStaggeredFermion5D<Impl>::Dhop(const FermionField &in, FermionField &out,int dag)
|
||||||
{
|
{
|
||||||
DhopCalls+=2;
|
|
||||||
conformable(in.Grid(),FermionGrid()); // verifies full grid
|
conformable(in.Grid(),FermionGrid()); // verifies full grid
|
||||||
conformable(in.Grid(),out.Grid());
|
conformable(in.Grid(),out.Grid());
|
||||||
|
|
||||||
@ -404,58 +381,6 @@ void ImprovedStaggeredFermion5D<Impl>::Dhop(const FermionField &in, FermionField
|
|||||||
DhopInternal(Stencil,Lebesgue,Umu,UUUmu,in,out,dag);
|
DhopInternal(Stencil,Lebesgue,Umu,UUUmu,in,out,dag);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl>
|
|
||||||
void ImprovedStaggeredFermion5D<Impl>::Report(void)
|
|
||||||
{
|
|
||||||
Coordinate latt = GridDefaultLatt();
|
|
||||||
RealD volume = Ls; for(int mu=0;mu<Nd;mu++) volume=volume*latt[mu];
|
|
||||||
RealD NP = _FourDimGrid->_Nprocessors;
|
|
||||||
RealD NN = _FourDimGrid->NodeCount();
|
|
||||||
|
|
||||||
std::cout << GridLogMessage << "#### Dhop calls report " << std::endl;
|
|
||||||
|
|
||||||
std::cout << GridLogMessage << "ImprovedStaggeredFermion5D Number of DhopEO Calls : "
|
|
||||||
<< DhopCalls << std::endl;
|
|
||||||
std::cout << GridLogMessage << "ImprovedStaggeredFermion5D TotalTime /Calls : "
|
|
||||||
<< DhopTotalTime / DhopCalls << " us" << std::endl;
|
|
||||||
std::cout << GridLogMessage << "ImprovedStaggeredFermion5D CommTime /Calls : "
|
|
||||||
<< DhopCommTime / DhopCalls << " us" << std::endl;
|
|
||||||
std::cout << GridLogMessage << "ImprovedStaggeredFermion5D ComputeTime/Calls : "
|
|
||||||
<< DhopComputeTime / DhopCalls << " us" << std::endl;
|
|
||||||
|
|
||||||
// Average the compute time
|
|
||||||
_FourDimGrid->GlobalSum(DhopComputeTime);
|
|
||||||
DhopComputeTime/=NP;
|
|
||||||
|
|
||||||
RealD mflops = 1154*volume*DhopCalls/DhopComputeTime/2; // 2 for red black counting
|
|
||||||
std::cout << GridLogMessage << "Average mflops/s per call : " << mflops << std::endl;
|
|
||||||
std::cout << GridLogMessage << "Average mflops/s per call per rank : " << mflops/NP << std::endl;
|
|
||||||
std::cout << GridLogMessage << "Average mflops/s per call per node : " << mflops/NN << std::endl;
|
|
||||||
|
|
||||||
RealD Fullmflops = 1154*volume*DhopCalls/(DhopTotalTime)/2; // 2 for red black counting
|
|
||||||
std::cout << GridLogMessage << "Average mflops/s per call (full) : " << Fullmflops << std::endl;
|
|
||||||
std::cout << GridLogMessage << "Average mflops/s per call per rank (full): " << Fullmflops/NP << std::endl;
|
|
||||||
std::cout << GridLogMessage << "Average mflops/s per call per node (full): " << Fullmflops/NN << std::endl;
|
|
||||||
|
|
||||||
std::cout << GridLogMessage << "ImprovedStaggeredFermion5D Stencil" <<std::endl; Stencil.Report();
|
|
||||||
std::cout << GridLogMessage << "ImprovedStaggeredFermion5D StencilEven"<<std::endl; StencilEven.Report();
|
|
||||||
std::cout << GridLogMessage << "ImprovedStaggeredFermion5D StencilOdd" <<std::endl; StencilOdd.Report();
|
|
||||||
}
|
|
||||||
template<class Impl>
|
|
||||||
void ImprovedStaggeredFermion5D<Impl>::ZeroCounters(void)
|
|
||||||
{
|
|
||||||
DhopCalls = 0;
|
|
||||||
DhopTotalTime = 0;
|
|
||||||
DhopCommTime = 0;
|
|
||||||
DhopComputeTime = 0;
|
|
||||||
DhopFaceTime = 0;
|
|
||||||
|
|
||||||
|
|
||||||
Stencil.ZeroCounters();
|
|
||||||
StencilEven.ZeroCounters();
|
|
||||||
StencilOdd.ZeroCounters();
|
|
||||||
}
|
|
||||||
|
|
||||||
/////////////////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////////////////
|
||||||
// Implement the general interface. Here we use SAME mass on all slices
|
// Implement the general interface. Here we use SAME mass on all slices
|
||||||
/////////////////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////////////////
|
||||||
|
@ -334,7 +334,6 @@ void ImprovedStaggeredFermion<Impl>::DhopDerivEO(GaugeField &mat, const FermionF
|
|||||||
template <class Impl>
|
template <class Impl>
|
||||||
void ImprovedStaggeredFermion<Impl>::Dhop(const FermionField &in, FermionField &out, int dag)
|
void ImprovedStaggeredFermion<Impl>::Dhop(const FermionField &in, FermionField &out, int dag)
|
||||||
{
|
{
|
||||||
DhopCalls+=2;
|
|
||||||
conformable(in.Grid(), _grid); // verifies full grid
|
conformable(in.Grid(), _grid); // verifies full grid
|
||||||
conformable(in.Grid(), out.Grid());
|
conformable(in.Grid(), out.Grid());
|
||||||
|
|
||||||
@ -346,7 +345,6 @@ void ImprovedStaggeredFermion<Impl>::Dhop(const FermionField &in, FermionField &
|
|||||||
template <class Impl>
|
template <class Impl>
|
||||||
void ImprovedStaggeredFermion<Impl>::DhopOE(const FermionField &in, FermionField &out, int dag)
|
void ImprovedStaggeredFermion<Impl>::DhopOE(const FermionField &in, FermionField &out, int dag)
|
||||||
{
|
{
|
||||||
DhopCalls+=1;
|
|
||||||
conformable(in.Grid(), _cbgrid); // verifies half grid
|
conformable(in.Grid(), _cbgrid); // verifies half grid
|
||||||
conformable(in.Grid(), out.Grid()); // drops the cb check
|
conformable(in.Grid(), out.Grid()); // drops the cb check
|
||||||
|
|
||||||
@ -359,7 +357,6 @@ void ImprovedStaggeredFermion<Impl>::DhopOE(const FermionField &in, FermionField
|
|||||||
template <class Impl>
|
template <class Impl>
|
||||||
void ImprovedStaggeredFermion<Impl>::DhopEO(const FermionField &in, FermionField &out, int dag)
|
void ImprovedStaggeredFermion<Impl>::DhopEO(const FermionField &in, FermionField &out, int dag)
|
||||||
{
|
{
|
||||||
DhopCalls+=1;
|
|
||||||
conformable(in.Grid(), _cbgrid); // verifies half grid
|
conformable(in.Grid(), _cbgrid); // verifies half grid
|
||||||
conformable(in.Grid(), out.Grid()); // drops the cb check
|
conformable(in.Grid(), out.Grid()); // drops the cb check
|
||||||
|
|
||||||
@ -418,47 +415,33 @@ void ImprovedStaggeredFermion<Impl>::DhopInternalOverlappedComms(StencilImpl &st
|
|||||||
Compressor compressor;
|
Compressor compressor;
|
||||||
int len = U.Grid()->oSites();
|
int len = U.Grid()->oSites();
|
||||||
|
|
||||||
DhopTotalTime -= usecond();
|
|
||||||
|
|
||||||
DhopFaceTime -= usecond();
|
|
||||||
st.Prepare();
|
st.Prepare();
|
||||||
st.HaloGather(in,compressor);
|
st.HaloGather(in,compressor);
|
||||||
DhopFaceTime += usecond();
|
|
||||||
|
|
||||||
DhopCommTime -=usecond();
|
|
||||||
std::vector<std::vector<CommsRequest_t> > requests;
|
std::vector<std::vector<CommsRequest_t> > requests;
|
||||||
st.CommunicateBegin(requests);
|
st.CommunicateBegin(requests);
|
||||||
|
|
||||||
DhopFaceTime-=usecond();
|
|
||||||
st.CommsMergeSHM(compressor);
|
st.CommsMergeSHM(compressor);
|
||||||
DhopFaceTime+= usecond();
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Removed explicit thread comms
|
// Removed explicit thread comms
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
DhopComputeTime -= usecond();
|
|
||||||
{
|
{
|
||||||
int interior=1;
|
int interior=1;
|
||||||
int exterior=0;
|
int exterior=0;
|
||||||
Kernels::DhopImproved(st,lo,U,UUU,in,out,dag,interior,exterior);
|
Kernels::DhopImproved(st,lo,U,UUU,in,out,dag,interior,exterior);
|
||||||
}
|
}
|
||||||
DhopComputeTime += usecond();
|
|
||||||
|
|
||||||
st.CommunicateComplete(requests);
|
st.CommunicateComplete(requests);
|
||||||
DhopCommTime +=usecond();
|
|
||||||
|
|
||||||
// First to enter, last to leave timing
|
// First to enter, last to leave timing
|
||||||
DhopFaceTime -= usecond();
|
|
||||||
st.CommsMerge(compressor);
|
st.CommsMerge(compressor);
|
||||||
DhopFaceTime -= usecond();
|
|
||||||
|
|
||||||
DhopComputeTime2 -= usecond();
|
|
||||||
{
|
{
|
||||||
int interior=0;
|
int interior=0;
|
||||||
int exterior=1;
|
int exterior=1;
|
||||||
Kernels::DhopImproved(st,lo,U,UUU,in,out,dag,interior,exterior);
|
Kernels::DhopImproved(st,lo,U,UUU,in,out,dag,interior,exterior);
|
||||||
}
|
}
|
||||||
DhopComputeTime2 += usecond();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -471,78 +454,16 @@ void ImprovedStaggeredFermion<Impl>::DhopInternalSerialComms(StencilImpl &st, Le
|
|||||||
{
|
{
|
||||||
assert((dag == DaggerNo) || (dag == DaggerYes));
|
assert((dag == DaggerNo) || (dag == DaggerYes));
|
||||||
|
|
||||||
DhopTotalTime -= usecond();
|
|
||||||
|
|
||||||
DhopCommTime -= usecond();
|
|
||||||
Compressor compressor;
|
Compressor compressor;
|
||||||
st.HaloExchange(in, compressor);
|
st.HaloExchange(in, compressor);
|
||||||
DhopCommTime += usecond();
|
|
||||||
|
|
||||||
DhopComputeTime -= usecond();
|
|
||||||
{
|
{
|
||||||
int interior=1;
|
int interior=1;
|
||||||
int exterior=1;
|
int exterior=1;
|
||||||
Kernels::DhopImproved(st,lo,U,UUU,in,out,dag,interior,exterior);
|
Kernels::DhopImproved(st,lo,U,UUU,in,out,dag,interior,exterior);
|
||||||
}
|
}
|
||||||
DhopComputeTime += usecond();
|
|
||||||
DhopTotalTime += usecond();
|
|
||||||
};
|
};
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////
|
|
||||||
// Reporting
|
|
||||||
////////////////////////////////////////////////////////////////
|
|
||||||
template<class Impl>
|
|
||||||
void ImprovedStaggeredFermion<Impl>::Report(void)
|
|
||||||
{
|
|
||||||
Coordinate latt = _grid->GlobalDimensions();
|
|
||||||
RealD volume = 1; for(int mu=0;mu<Nd;mu++) volume=volume*latt[mu];
|
|
||||||
RealD NP = _grid->_Nprocessors;
|
|
||||||
RealD NN = _grid->NodeCount();
|
|
||||||
|
|
||||||
std::cout << GridLogMessage << "#### Dhop calls report " << std::endl;
|
|
||||||
|
|
||||||
std::cout << GridLogMessage << "ImprovedStaggeredFermion Number of DhopEO Calls : "
|
|
||||||
<< DhopCalls << std::endl;
|
|
||||||
std::cout << GridLogMessage << "ImprovedStaggeredFermion TotalTime /Calls : "
|
|
||||||
<< DhopTotalTime / DhopCalls << " us" << std::endl;
|
|
||||||
std::cout << GridLogMessage << "ImprovedStaggeredFermion CommTime /Calls : "
|
|
||||||
<< DhopCommTime / DhopCalls << " us" << std::endl;
|
|
||||||
std::cout << GridLogMessage << "ImprovedStaggeredFermion ComputeTime/Calls : "
|
|
||||||
<< DhopComputeTime / DhopCalls << " us" << std::endl;
|
|
||||||
|
|
||||||
// Average the compute time
|
|
||||||
_grid->GlobalSum(DhopComputeTime);
|
|
||||||
DhopComputeTime/=NP;
|
|
||||||
|
|
||||||
RealD mflops = 1154*volume*DhopCalls/DhopComputeTime/2; // 2 for red black counting
|
|
||||||
std::cout << GridLogMessage << "Average mflops/s per call : " << mflops << std::endl;
|
|
||||||
std::cout << GridLogMessage << "Average mflops/s per call per rank : " << mflops/NP << std::endl;
|
|
||||||
std::cout << GridLogMessage << "Average mflops/s per call per node : " << mflops/NN << std::endl;
|
|
||||||
|
|
||||||
RealD Fullmflops = 1154*volume*DhopCalls/(DhopTotalTime)/2; // 2 for red black counting
|
|
||||||
std::cout << GridLogMessage << "Average mflops/s per call (full) : " << Fullmflops << std::endl;
|
|
||||||
std::cout << GridLogMessage << "Average mflops/s per call per rank (full): " << Fullmflops/NP << std::endl;
|
|
||||||
std::cout << GridLogMessage << "Average mflops/s per call per node (full): " << Fullmflops/NN << std::endl;
|
|
||||||
|
|
||||||
std::cout << GridLogMessage << "ImprovedStaggeredFermion Stencil" <<std::endl; Stencil.Report();
|
|
||||||
std::cout << GridLogMessage << "ImprovedStaggeredFermion StencilEven"<<std::endl; StencilEven.Report();
|
|
||||||
std::cout << GridLogMessage << "ImprovedStaggeredFermion StencilOdd" <<std::endl; StencilOdd.Report();
|
|
||||||
}
|
|
||||||
template<class Impl>
|
|
||||||
void ImprovedStaggeredFermion<Impl>::ZeroCounters(void)
|
|
||||||
{
|
|
||||||
DhopCalls = 0;
|
|
||||||
DhopTotalTime = 0;
|
|
||||||
DhopCommTime = 0;
|
|
||||||
DhopComputeTime = 0;
|
|
||||||
DhopFaceTime = 0;
|
|
||||||
|
|
||||||
Stencil.ZeroCounters();
|
|
||||||
StencilEven.ZeroCounters();
|
|
||||||
StencilOdd.ZeroCounters();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////
|
||||||
// Conserved current - not yet implemented.
|
// Conserved current - not yet implemented.
|
||||||
////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////
|
||||||
|
@ -55,9 +55,6 @@ void MobiusEOFAFermion<Impl>::M5D(const FermionField &psi_i, const FermionField
|
|||||||
auto plower = &lower[0];
|
auto plower = &lower[0];
|
||||||
|
|
||||||
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
||||||
this->M5Dcalls++;
|
|
||||||
this->M5Dtime -= usecond();
|
|
||||||
|
|
||||||
int nloop = grid->oSites()/Ls;
|
int nloop = grid->oSites()/Ls;
|
||||||
accelerator_for(sss,nloop,Simd::Nsimd(),{
|
accelerator_for(sss,nloop,Simd::Nsimd(),{
|
||||||
uint64_t ss = sss*Ls;
|
uint64_t ss = sss*Ls;
|
||||||
@ -73,7 +70,6 @@ void MobiusEOFAFermion<Impl>::M5D(const FermionField &psi_i, const FermionField
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
this->M5Dtime += usecond();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
@ -99,9 +95,6 @@ void MobiusEOFAFermion<Impl>::M5D_shift(const FermionField &psi_i, const Fermion
|
|||||||
auto pshift_coeffs = &shift_coeffs[0];
|
auto pshift_coeffs = &shift_coeffs[0];
|
||||||
|
|
||||||
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
||||||
this->M5Dcalls++;
|
|
||||||
this->M5Dtime -= usecond();
|
|
||||||
|
|
||||||
int nloop = grid->oSites()/Ls;
|
int nloop = grid->oSites()/Ls;
|
||||||
accelerator_for(sss,nloop,Simd::Nsimd(),{
|
accelerator_for(sss,nloop,Simd::Nsimd(),{
|
||||||
uint64_t ss = sss*Ls;
|
uint64_t ss = sss*Ls;
|
||||||
@ -122,7 +115,6 @@ void MobiusEOFAFermion<Impl>::M5D_shift(const FermionField &psi_i, const Fermion
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
this->M5Dtime += usecond();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
@ -143,9 +135,6 @@ void MobiusEOFAFermion<Impl>::M5Ddag(const FermionField &psi_i, const FermionFie
|
|||||||
auto plower = &lower[0];
|
auto plower = &lower[0];
|
||||||
|
|
||||||
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
||||||
this->M5Dcalls++;
|
|
||||||
this->M5Dtime -= usecond();
|
|
||||||
|
|
||||||
int nloop = grid->oSites()/Ls;
|
int nloop = grid->oSites()/Ls;
|
||||||
accelerator_for(sss,nloop,Simd::Nsimd(), {
|
accelerator_for(sss,nloop,Simd::Nsimd(), {
|
||||||
uint64_t ss = sss*Ls;
|
uint64_t ss = sss*Ls;
|
||||||
@ -161,8 +150,6 @@ void MobiusEOFAFermion<Impl>::M5Ddag(const FermionField &psi_i, const FermionFie
|
|||||||
coalescedWrite(chi[ss+s], pdiag[s]*phi(ss+s) + pupper[s]*tmp1 + plower[s]*tmp2);
|
coalescedWrite(chi[ss+s], pdiag[s]*phi(ss+s) + pupper[s]*tmp1 + plower[s]*tmp2);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
this->M5Dtime += usecond();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
@ -186,9 +173,6 @@ void MobiusEOFAFermion<Impl>::M5Ddag_shift(const FermionField &psi_i, const Ferm
|
|||||||
auto pshift_coeffs = &shift_coeffs[0];
|
auto pshift_coeffs = &shift_coeffs[0];
|
||||||
|
|
||||||
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
// Flops = 6.0*(Nc*Ns) *Ls*vol
|
||||||
this->M5Dcalls++;
|
|
||||||
this->M5Dtime -= usecond();
|
|
||||||
|
|
||||||
auto pm = this->pm;
|
auto pm = this->pm;
|
||||||
|
|
||||||
int nloop = grid->oSites()/Ls;
|
int nloop = grid->oSites()/Ls;
|
||||||
@ -217,7 +201,6 @@ void MobiusEOFAFermion<Impl>::M5Ddag_shift(const FermionField &psi_i, const Ferm
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
this->M5Dtime += usecond();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
@ -237,9 +220,6 @@ void MobiusEOFAFermion<Impl>::MooeeInv(const FermionField &psi_i, FermionField &
|
|||||||
|
|
||||||
if(this->shift != 0.0){ MooeeInv_shift(psi_i,chi_i); return; }
|
if(this->shift != 0.0){ MooeeInv_shift(psi_i,chi_i); return; }
|
||||||
|
|
||||||
this->MooeeInvCalls++;
|
|
||||||
this->MooeeInvTime -= usecond();
|
|
||||||
|
|
||||||
int nloop = grid->oSites()/Ls;
|
int nloop = grid->oSites()/Ls;
|
||||||
accelerator_for(sss,nloop,Simd::Nsimd(),{
|
accelerator_for(sss,nloop,Simd::Nsimd(),{
|
||||||
uint64_t ss=sss*Ls;
|
uint64_t ss=sss*Ls;
|
||||||
@ -277,7 +257,6 @@ void MobiusEOFAFermion<Impl>::MooeeInv(const FermionField &psi_i, FermionField &
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
this->MooeeInvTime += usecond();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
@ -297,8 +276,6 @@ void MobiusEOFAFermion<Impl>::MooeeInv_shift(const FermionField &psi_i, FermionF
|
|||||||
auto pueem= & this->ueem[0];
|
auto pueem= & this->ueem[0];
|
||||||
auto pMooeeInv_shift_lc = &MooeeInv_shift_lc[0];
|
auto pMooeeInv_shift_lc = &MooeeInv_shift_lc[0];
|
||||||
auto pMooeeInv_shift_norm = &MooeeInv_shift_norm[0];
|
auto pMooeeInv_shift_norm = &MooeeInv_shift_norm[0];
|
||||||
this->MooeeInvCalls++;
|
|
||||||
this->MooeeInvTime -= usecond();
|
|
||||||
|
|
||||||
int nloop = grid->oSites()/Ls;
|
int nloop = grid->oSites()/Ls;
|
||||||
accelerator_for(sss,nloop,Simd::Nsimd(),{
|
accelerator_for(sss,nloop,Simd::Nsimd(),{
|
||||||
@ -343,7 +320,6 @@ void MobiusEOFAFermion<Impl>::MooeeInv_shift(const FermionField &psi_i, FermionF
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
this->MooeeInvTime += usecond();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
@ -363,9 +339,6 @@ void MobiusEOFAFermion<Impl>::MooeeInvDag(const FermionField &psi_i, FermionFiel
|
|||||||
auto pleem= & this->leem[0];
|
auto pleem= & this->leem[0];
|
||||||
auto pueem= & this->ueem[0];
|
auto pueem= & this->ueem[0];
|
||||||
|
|
||||||
this->MooeeInvCalls++;
|
|
||||||
this->MooeeInvTime -= usecond();
|
|
||||||
|
|
||||||
int nloop = grid->oSites()/Ls;
|
int nloop = grid->oSites()/Ls;
|
||||||
accelerator_for(sss,nloop,Simd::Nsimd(),{
|
accelerator_for(sss,nloop,Simd::Nsimd(),{
|
||||||
uint64_t ss=sss*Ls;
|
uint64_t ss=sss*Ls;
|
||||||
@ -402,7 +375,6 @@ void MobiusEOFAFermion<Impl>::MooeeInvDag(const FermionField &psi_i, FermionFiel
|
|||||||
coalescedWrite(chi[ss+s],res);
|
coalescedWrite(chi[ss+s],res);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
this->MooeeInvTime += usecond();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
@ -423,9 +395,6 @@ void MobiusEOFAFermion<Impl>::MooeeInvDag_shift(const FermionField &psi_i, Fermi
|
|||||||
auto pMooeeInvDag_shift_lc = &MooeeInvDag_shift_lc[0];
|
auto pMooeeInvDag_shift_lc = &MooeeInvDag_shift_lc[0];
|
||||||
auto pMooeeInvDag_shift_norm = &MooeeInvDag_shift_norm[0];
|
auto pMooeeInvDag_shift_norm = &MooeeInvDag_shift_norm[0];
|
||||||
|
|
||||||
this->MooeeInvCalls++;
|
|
||||||
this->MooeeInvTime -= usecond();
|
|
||||||
|
|
||||||
int nloop = grid->oSites()/Ls;
|
int nloop = grid->oSites()/Ls;
|
||||||
accelerator_for(sss,nloop,Simd::Nsimd(),{
|
accelerator_for(sss,nloop,Simd::Nsimd(),{
|
||||||
uint64_t ss=sss*Ls;
|
uint64_t ss=sss*Ls;
|
||||||
@ -469,7 +438,6 @@ void MobiusEOFAFermion<Impl>::MooeeInvDag_shift(const FermionField &psi_i, Fermi
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
this->MooeeInvTime += usecond();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
NAMESPACE_END(Grid);
|
NAMESPACE_END(Grid);
|
||||||
|
@ -263,7 +263,6 @@ void NaiveStaggeredFermion<Impl>::DhopDerivEO(GaugeField &mat, const FermionFiel
|
|||||||
template <class Impl>
|
template <class Impl>
|
||||||
void NaiveStaggeredFermion<Impl>::Dhop(const FermionField &in, FermionField &out, int dag)
|
void NaiveStaggeredFermion<Impl>::Dhop(const FermionField &in, FermionField &out, int dag)
|
||||||
{
|
{
|
||||||
DhopCalls+=2;
|
|
||||||
conformable(in.Grid(), _grid); // verifies full grid
|
conformable(in.Grid(), _grid); // verifies full grid
|
||||||
conformable(in.Grid(), out.Grid());
|
conformable(in.Grid(), out.Grid());
|
||||||
|
|
||||||
@ -275,7 +274,6 @@ void NaiveStaggeredFermion<Impl>::Dhop(const FermionField &in, FermionField &out
|
|||||||
template <class Impl>
|
template <class Impl>
|
||||||
void NaiveStaggeredFermion<Impl>::DhopOE(const FermionField &in, FermionField &out, int dag)
|
void NaiveStaggeredFermion<Impl>::DhopOE(const FermionField &in, FermionField &out, int dag)
|
||||||
{
|
{
|
||||||
DhopCalls+=1;
|
|
||||||
conformable(in.Grid(), _cbgrid); // verifies half grid
|
conformable(in.Grid(), _cbgrid); // verifies half grid
|
||||||
conformable(in.Grid(), out.Grid()); // drops the cb check
|
conformable(in.Grid(), out.Grid()); // drops the cb check
|
||||||
|
|
||||||
@ -288,7 +286,6 @@ void NaiveStaggeredFermion<Impl>::DhopOE(const FermionField &in, FermionField &o
|
|||||||
template <class Impl>
|
template <class Impl>
|
||||||
void NaiveStaggeredFermion<Impl>::DhopEO(const FermionField &in, FermionField &out, int dag)
|
void NaiveStaggeredFermion<Impl>::DhopEO(const FermionField &in, FermionField &out, int dag)
|
||||||
{
|
{
|
||||||
DhopCalls+=1;
|
|
||||||
conformable(in.Grid(), _cbgrid); // verifies half grid
|
conformable(in.Grid(), _cbgrid); // verifies half grid
|
||||||
conformable(in.Grid(), out.Grid()); // drops the cb check
|
conformable(in.Grid(), out.Grid()); // drops the cb check
|
||||||
|
|
||||||
@ -345,47 +342,33 @@ void NaiveStaggeredFermion<Impl>::DhopInternalOverlappedComms(StencilImpl &st, L
|
|||||||
Compressor compressor;
|
Compressor compressor;
|
||||||
int len = U.Grid()->oSites();
|
int len = U.Grid()->oSites();
|
||||||
|
|
||||||
DhopTotalTime -= usecond();
|
|
||||||
|
|
||||||
DhopFaceTime -= usecond();
|
|
||||||
st.Prepare();
|
st.Prepare();
|
||||||
st.HaloGather(in,compressor);
|
st.HaloGather(in,compressor);
|
||||||
DhopFaceTime += usecond();
|
|
||||||
|
|
||||||
DhopCommTime -=usecond();
|
|
||||||
std::vector<std::vector<CommsRequest_t> > requests;
|
std::vector<std::vector<CommsRequest_t> > requests;
|
||||||
st.CommunicateBegin(requests);
|
st.CommunicateBegin(requests);
|
||||||
|
|
||||||
DhopFaceTime-=usecond();
|
|
||||||
st.CommsMergeSHM(compressor);
|
st.CommsMergeSHM(compressor);
|
||||||
DhopFaceTime+= usecond();
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Removed explicit thread comms
|
// Removed explicit thread comms
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
DhopComputeTime -= usecond();
|
|
||||||
{
|
{
|
||||||
int interior=1;
|
int interior=1;
|
||||||
int exterior=0;
|
int exterior=0;
|
||||||
Kernels::DhopNaive(st,lo,U,in,out,dag,interior,exterior);
|
Kernels::DhopNaive(st,lo,U,in,out,dag,interior,exterior);
|
||||||
}
|
}
|
||||||
DhopComputeTime += usecond();
|
|
||||||
|
|
||||||
st.CommunicateComplete(requests);
|
st.CommunicateComplete(requests);
|
||||||
DhopCommTime +=usecond();
|
|
||||||
|
|
||||||
// First to enter, last to leave timing
|
// First to enter, last to leave timing
|
||||||
DhopFaceTime -= usecond();
|
|
||||||
st.CommsMerge(compressor);
|
st.CommsMerge(compressor);
|
||||||
DhopFaceTime -= usecond();
|
|
||||||
|
|
||||||
DhopComputeTime2 -= usecond();
|
|
||||||
{
|
{
|
||||||
int interior=0;
|
int interior=0;
|
||||||
int exterior=1;
|
int exterior=1;
|
||||||
Kernels::DhopNaive(st,lo,U,in,out,dag,interior,exterior);
|
Kernels::DhopNaive(st,lo,U,in,out,dag,interior,exterior);
|
||||||
}
|
}
|
||||||
DhopComputeTime2 += usecond();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
@ -396,78 +379,16 @@ void NaiveStaggeredFermion<Impl>::DhopInternalSerialComms(StencilImpl &st, Lebes
|
|||||||
{
|
{
|
||||||
assert((dag == DaggerNo) || (dag == DaggerYes));
|
assert((dag == DaggerNo) || (dag == DaggerYes));
|
||||||
|
|
||||||
DhopTotalTime -= usecond();
|
|
||||||
|
|
||||||
DhopCommTime -= usecond();
|
|
||||||
Compressor compressor;
|
Compressor compressor;
|
||||||
st.HaloExchange(in, compressor);
|
st.HaloExchange(in, compressor);
|
||||||
DhopCommTime += usecond();
|
|
||||||
|
|
||||||
DhopComputeTime -= usecond();
|
|
||||||
{
|
{
|
||||||
int interior=1;
|
int interior=1;
|
||||||
int exterior=1;
|
int exterior=1;
|
||||||
Kernels::DhopNaive(st,lo,U,in,out,dag,interior,exterior);
|
Kernels::DhopNaive(st,lo,U,in,out,dag,interior,exterior);
|
||||||
}
|
}
|
||||||
DhopComputeTime += usecond();
|
|
||||||
DhopTotalTime += usecond();
|
|
||||||
};
|
};
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////
|
|
||||||
// Reporting
|
|
||||||
////////////////////////////////////////////////////////////////
|
|
||||||
template<class Impl>
|
|
||||||
void NaiveStaggeredFermion<Impl>::Report(void)
|
|
||||||
{
|
|
||||||
Coordinate latt = _grid->GlobalDimensions();
|
|
||||||
RealD volume = 1; for(int mu=0;mu<Nd;mu++) volume=volume*latt[mu];
|
|
||||||
RealD NP = _grid->_Nprocessors;
|
|
||||||
RealD NN = _grid->NodeCount();
|
|
||||||
|
|
||||||
std::cout << GridLogMessage << "#### Dhop calls report " << std::endl;
|
|
||||||
|
|
||||||
std::cout << GridLogMessage << "NaiveStaggeredFermion Number of DhopEO Calls : "
|
|
||||||
<< DhopCalls << std::endl;
|
|
||||||
std::cout << GridLogMessage << "NaiveStaggeredFermion TotalTime /Calls : "
|
|
||||||
<< DhopTotalTime / DhopCalls << " us" << std::endl;
|
|
||||||
std::cout << GridLogMessage << "NaiveStaggeredFermion CommTime /Calls : "
|
|
||||||
<< DhopCommTime / DhopCalls << " us" << std::endl;
|
|
||||||
std::cout << GridLogMessage << "NaiveStaggeredFermion ComputeTime/Calls : "
|
|
||||||
<< DhopComputeTime / DhopCalls << " us" << std::endl;
|
|
||||||
|
|
||||||
// Average the compute time
|
|
||||||
_grid->GlobalSum(DhopComputeTime);
|
|
||||||
DhopComputeTime/=NP;
|
|
||||||
|
|
||||||
RealD mflops = 1154*volume*DhopCalls/DhopComputeTime/2; // 2 for red black counting
|
|
||||||
std::cout << GridLogMessage << "Average mflops/s per call : " << mflops << std::endl;
|
|
||||||
std::cout << GridLogMessage << "Average mflops/s per call per rank : " << mflops/NP << std::endl;
|
|
||||||
std::cout << GridLogMessage << "Average mflops/s per call per node : " << mflops/NN << std::endl;
|
|
||||||
|
|
||||||
RealD Fullmflops = 1154*volume*DhopCalls/(DhopTotalTime)/2; // 2 for red black counting
|
|
||||||
std::cout << GridLogMessage << "Average mflops/s per call (full) : " << Fullmflops << std::endl;
|
|
||||||
std::cout << GridLogMessage << "Average mflops/s per call per rank (full): " << Fullmflops/NP << std::endl;
|
|
||||||
std::cout << GridLogMessage << "Average mflops/s per call per node (full): " << Fullmflops/NN << std::endl;
|
|
||||||
|
|
||||||
std::cout << GridLogMessage << "NaiveStaggeredFermion Stencil" <<std::endl; Stencil.Report();
|
|
||||||
std::cout << GridLogMessage << "NaiveStaggeredFermion StencilEven"<<std::endl; StencilEven.Report();
|
|
||||||
std::cout << GridLogMessage << "NaiveStaggeredFermion StencilOdd" <<std::endl; StencilOdd.Report();
|
|
||||||
}
|
|
||||||
template<class Impl>
|
|
||||||
void NaiveStaggeredFermion<Impl>::ZeroCounters(void)
|
|
||||||
{
|
|
||||||
DhopCalls = 0;
|
|
||||||
DhopTotalTime = 0;
|
|
||||||
DhopCommTime = 0;
|
|
||||||
DhopComputeTime = 0;
|
|
||||||
DhopFaceTime = 0;
|
|
||||||
|
|
||||||
Stencil.ZeroCounters();
|
|
||||||
StencilEven.ZeroCounters();
|
|
||||||
StencilOdd.ZeroCounters();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////
|
||||||
// Conserved current - not yet implemented.
|
// Conserved current - not yet implemented.
|
||||||
////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////
|
||||||
|
@ -103,8 +103,6 @@ WilsonFermion5D<Impl>::WilsonFermion5D(GaugeField &_Umu,
|
|||||||
Block = block;
|
Block = block;
|
||||||
}
|
}
|
||||||
|
|
||||||
ZeroCounters();
|
|
||||||
|
|
||||||
if (Impl::LsVectorised) {
|
if (Impl::LsVectorised) {
|
||||||
|
|
||||||
int nsimd = Simd::Nsimd();
|
int nsimd = Simd::Nsimd();
|
||||||
@ -144,99 +142,16 @@ WilsonFermion5D<Impl>::WilsonFermion5D(GaugeField &_Umu,
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl>
|
|
||||||
void WilsonFermion5D<Impl>::Report(void)
|
|
||||||
{
|
|
||||||
RealD NP = _FourDimGrid->_Nprocessors;
|
|
||||||
RealD NN = _FourDimGrid->NodeCount();
|
|
||||||
RealD volume = Ls;
|
|
||||||
Coordinate latt = _FourDimGrid->GlobalDimensions();
|
|
||||||
for(int mu=0;mu<Nd;mu++) volume=volume*latt[mu];
|
|
||||||
|
|
||||||
if ( DhopCalls > 0 ) {
|
|
||||||
std::cout << GridLogMessage << "#### Dhop calls report " << std::endl;
|
|
||||||
std::cout << GridLogMessage << "WilsonFermion5D Number of DhopEO Calls : " << DhopCalls << std::endl;
|
|
||||||
std::cout << GridLogMessage << "WilsonFermion5D TotalTime /Calls : " << DhopTotalTime / DhopCalls << " us" << std::endl;
|
|
||||||
std::cout << GridLogMessage << "WilsonFermion5D CommTime /Calls : " << DhopCommTime / DhopCalls << " us" << std::endl;
|
|
||||||
std::cout << GridLogMessage << "WilsonFermion5D FaceTime /Calls : " << DhopFaceTime / DhopCalls << " us" << std::endl;
|
|
||||||
std::cout << GridLogMessage << "WilsonFermion5D ComputeTime1/Calls : " << DhopComputeTime / DhopCalls << " us" << std::endl;
|
|
||||||
std::cout << GridLogMessage << "WilsonFermion5D ComputeTime2/Calls : " << DhopComputeTime2/ DhopCalls << " us" << std::endl;
|
|
||||||
|
|
||||||
// Average the compute time
|
|
||||||
_FourDimGrid->GlobalSum(DhopComputeTime);
|
|
||||||
DhopComputeTime/=NP;
|
|
||||||
RealD mflops = 1344*volume*DhopCalls/DhopComputeTime/2; // 2 for red black counting
|
|
||||||
std::cout << GridLogMessage << "Average mflops/s per call : " << mflops << std::endl;
|
|
||||||
std::cout << GridLogMessage << "Average mflops/s per call per rank : " << mflops/NP << std::endl;
|
|
||||||
std::cout << GridLogMessage << "Average mflops/s per call per node : " << mflops/NN << std::endl;
|
|
||||||
|
|
||||||
RealD Fullmflops = 1344*volume*DhopCalls/(DhopTotalTime)/2; // 2 for red black counting
|
|
||||||
std::cout << GridLogMessage << "Average mflops/s per call (full) : " << Fullmflops << std::endl;
|
|
||||||
std::cout << GridLogMessage << "Average mflops/s per call per rank (full): " << Fullmflops/NP << std::endl;
|
|
||||||
std::cout << GridLogMessage << "Average mflops/s per call per node (full): " << Fullmflops/NN << std::endl;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
if ( DerivCalls > 0 ) {
|
|
||||||
std::cout << GridLogMessage << "#### Deriv calls report "<< std::endl;
|
|
||||||
std::cout << GridLogMessage << "WilsonFermion5D Number of Deriv Calls : " <<DerivCalls <<std::endl;
|
|
||||||
std::cout << GridLogMessage << "WilsonFermion5D CommTime/Calls : " <<DerivCommTime/DerivCalls<<" us" <<std::endl;
|
|
||||||
std::cout << GridLogMessage << "WilsonFermion5D ComputeTime/Calls : " <<DerivComputeTime/DerivCalls<<" us" <<std::endl;
|
|
||||||
std::cout << GridLogMessage << "WilsonFermion5D Dhop ComputeTime/Calls : " <<DerivDhopComputeTime/DerivCalls<<" us" <<std::endl;
|
|
||||||
|
|
||||||
RealD mflops = 144*volume*DerivCalls/DerivDhopComputeTime;
|
|
||||||
std::cout << GridLogMessage << "Average mflops/s per call : " << mflops << std::endl;
|
|
||||||
std::cout << GridLogMessage << "Average mflops/s per call per node : " << mflops/NP << std::endl;
|
|
||||||
|
|
||||||
RealD Fullmflops = 144*volume*DerivCalls/(DerivDhopComputeTime+DerivCommTime)/2; // 2 for red black counting
|
|
||||||
std::cout << GridLogMessage << "Average mflops/s per call (full) : " << Fullmflops << std::endl;
|
|
||||||
std::cout << GridLogMessage << "Average mflops/s per call per node (full): " << Fullmflops/NP << std::endl; }
|
|
||||||
|
|
||||||
if (DerivCalls > 0 || DhopCalls > 0){
|
|
||||||
std::cout << GridLogMessage << "WilsonFermion5D Stencil" <<std::endl; Stencil.Report();
|
|
||||||
std::cout << GridLogMessage << "WilsonFermion5D StencilEven"<<std::endl; StencilEven.Report();
|
|
||||||
std::cout << GridLogMessage << "WilsonFermion5D StencilOdd" <<std::endl; StencilOdd.Report();
|
|
||||||
}
|
|
||||||
if ( DhopCalls > 0){
|
|
||||||
std::cout << GridLogMessage << "WilsonFermion5D Stencil Reporti()" <<std::endl; Stencil.Reporti(DhopCalls);
|
|
||||||
std::cout << GridLogMessage << "WilsonFermion5D StencilEven Reporti()"<<std::endl; StencilEven.Reporti(DhopCalls);
|
|
||||||
std::cout << GridLogMessage << "WilsonFermion5D StencilOdd Reporti()" <<std::endl; StencilOdd.Reporti(DhopCalls);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template<class Impl>
|
|
||||||
void WilsonFermion5D<Impl>::ZeroCounters(void) {
|
|
||||||
DhopCalls = 0;
|
|
||||||
DhopCommTime = 0;
|
|
||||||
DhopComputeTime = 0;
|
|
||||||
DhopComputeTime2= 0;
|
|
||||||
DhopFaceTime = 0;
|
|
||||||
DhopTotalTime = 0;
|
|
||||||
|
|
||||||
DerivCalls = 0;
|
|
||||||
DerivCommTime = 0;
|
|
||||||
DerivComputeTime = 0;
|
|
||||||
DerivDhopComputeTime = 0;
|
|
||||||
|
|
||||||
Stencil.ZeroCounters();
|
|
||||||
StencilEven.ZeroCounters();
|
|
||||||
StencilOdd.ZeroCounters();
|
|
||||||
Stencil.ZeroCountersi();
|
|
||||||
StencilEven.ZeroCountersi();
|
|
||||||
StencilOdd.ZeroCountersi();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void WilsonFermion5D<Impl>::ImportGauge(const GaugeField &_Umu)
|
void WilsonFermion5D<Impl>::ImportGauge(const GaugeField &_Umu)
|
||||||
{
|
{
|
||||||
GaugeField HUmu(_Umu.Grid());
|
GaugeField HUmu(_Umu.Grid());
|
||||||
HUmu = _Umu*(-0.5);
|
HUmu = _Umu*(-0.5);
|
||||||
if ( Dirichlet ) {
|
if ( Dirichlet ) {
|
||||||
std::cout << GridLogMessage << " Dirichlet BCs 5d " <<Block<<std::endl;
|
std::cout << GridLogDslash << " Dirichlet BCs 5d " <<Block<<std::endl;
|
||||||
Coordinate GaugeBlock(Nd);
|
Coordinate GaugeBlock(Nd);
|
||||||
for(int d=0;d<Nd;d++) GaugeBlock[d] = Block[d+1];
|
for(int d=0;d<Nd;d++) GaugeBlock[d] = Block[d+1];
|
||||||
std::cout << GridLogMessage << " Dirichlet BCs 4d " <<GaugeBlock<<std::endl;
|
std::cout << GridLogDslash << " Dirichlet BCs 4d " <<GaugeBlock<<std::endl;
|
||||||
DirichletFilter<GaugeField> Filter(GaugeBlock);
|
DirichletFilter<GaugeField> Filter(GaugeBlock);
|
||||||
Filter.applyFilter(HUmu);
|
Filter.applyFilter(HUmu);
|
||||||
}
|
}
|
||||||
@ -281,7 +196,6 @@ void WilsonFermion5D<Impl>::DerivInternal(StencilImpl & st,
|
|||||||
const FermionField &B,
|
const FermionField &B,
|
||||||
int dag)
|
int dag)
|
||||||
{
|
{
|
||||||
DerivCalls++;
|
|
||||||
assert((dag==DaggerNo) ||(dag==DaggerYes));
|
assert((dag==DaggerNo) ||(dag==DaggerYes));
|
||||||
|
|
||||||
conformable(st.Grid(),A.Grid());
|
conformable(st.Grid(),A.Grid());
|
||||||
@ -292,15 +206,12 @@ void WilsonFermion5D<Impl>::DerivInternal(StencilImpl & st,
|
|||||||
FermionField Btilde(B.Grid());
|
FermionField Btilde(B.Grid());
|
||||||
FermionField Atilde(B.Grid());
|
FermionField Atilde(B.Grid());
|
||||||
|
|
||||||
DerivCommTime-=usecond();
|
|
||||||
st.HaloExchange(B,compressor);
|
st.HaloExchange(B,compressor);
|
||||||
DerivCommTime+=usecond();
|
|
||||||
|
|
||||||
Atilde=A;
|
Atilde=A;
|
||||||
int LLs = B.Grid()->_rdimensions[0];
|
int LLs = B.Grid()->_rdimensions[0];
|
||||||
|
|
||||||
|
|
||||||
DerivComputeTime-=usecond();
|
|
||||||
for (int mu = 0; mu < Nd; mu++) {
|
for (int mu = 0; mu < Nd; mu++) {
|
||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
// Flip gamma if dag
|
// Flip gamma if dag
|
||||||
@ -312,8 +223,6 @@ void WilsonFermion5D<Impl>::DerivInternal(StencilImpl & st,
|
|||||||
// Call the single hop
|
// Call the single hop
|
||||||
////////////////////////
|
////////////////////////
|
||||||
|
|
||||||
DerivDhopComputeTime -= usecond();
|
|
||||||
|
|
||||||
int Usites = U.Grid()->oSites();
|
int Usites = U.Grid()->oSites();
|
||||||
|
|
||||||
Kernels::DhopDirKernel(st, U, st.CommBuf(), Ls, Usites, B, Btilde, mu,gamma);
|
Kernels::DhopDirKernel(st, U, st.CommBuf(), Ls, Usites, B, Btilde, mu,gamma);
|
||||||
@ -321,10 +230,8 @@ void WilsonFermion5D<Impl>::DerivInternal(StencilImpl & st,
|
|||||||
////////////////////////////
|
////////////////////////////
|
||||||
// spin trace outer product
|
// spin trace outer product
|
||||||
////////////////////////////
|
////////////////////////////
|
||||||
DerivDhopComputeTime += usecond();
|
|
||||||
Impl::InsertForce5D(mat, Btilde, Atilde, mu);
|
Impl::InsertForce5D(mat, Btilde, Atilde, mu);
|
||||||
}
|
}
|
||||||
DerivComputeTime += usecond();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
@ -382,12 +289,10 @@ void WilsonFermion5D<Impl>::DhopInternal(StencilImpl & st, LebesgueOrder &lo,
|
|||||||
DoubledGaugeField & U,
|
DoubledGaugeField & U,
|
||||||
const FermionField &in, FermionField &out,int dag)
|
const FermionField &in, FermionField &out,int dag)
|
||||||
{
|
{
|
||||||
DhopTotalTime-=usecond();
|
|
||||||
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute )
|
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute )
|
||||||
DhopInternalOverlappedComms(st,lo,U,in,out,dag);
|
DhopInternalOverlappedComms(st,lo,U,in,out,dag);
|
||||||
else
|
else
|
||||||
DhopInternalSerialComms(st,lo,U,in,out,dag);
|
DhopInternalSerialComms(st,lo,U,in,out,dag);
|
||||||
DhopTotalTime+=usecond();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -396,6 +301,7 @@ void WilsonFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl & st, Lebesg
|
|||||||
DoubledGaugeField & U,
|
DoubledGaugeField & U,
|
||||||
const FermionField &in, FermionField &out,int dag)
|
const FermionField &in, FermionField &out,int dag)
|
||||||
{
|
{
|
||||||
|
GRID_TRACE("DhopInternalOverlappedComms");
|
||||||
Compressor compressor(dag);
|
Compressor compressor(dag);
|
||||||
|
|
||||||
int LLs = in.Grid()->_rdimensions[0];
|
int LLs = in.Grid()->_rdimensions[0];
|
||||||
@ -404,53 +310,58 @@ void WilsonFermion5D<Impl>::DhopInternalOverlappedComms(StencilImpl & st, Lebesg
|
|||||||
/////////////////////////////
|
/////////////////////////////
|
||||||
// Start comms // Gather intranode and extra node differentiated??
|
// Start comms // Gather intranode and extra node differentiated??
|
||||||
/////////////////////////////
|
/////////////////////////////
|
||||||
DhopFaceTime-=usecond();
|
{
|
||||||
st.HaloExchangeOptGather(in,compressor);
|
GRID_TRACE("Gather");
|
||||||
DhopFaceTime+=usecond();
|
st.HaloExchangeOptGather(in,compressor);
|
||||||
|
accelerator_barrier();
|
||||||
|
}
|
||||||
|
|
||||||
DhopCommTime -=usecond();
|
|
||||||
std::vector<std::vector<CommsRequest_t> > requests;
|
std::vector<std::vector<CommsRequest_t> > requests;
|
||||||
|
auto id=traceStart("Communicate overlapped");
|
||||||
st.CommunicateBegin(requests);
|
st.CommunicateBegin(requests);
|
||||||
|
|
||||||
/////////////////////////////
|
/////////////////////////////
|
||||||
// Overlap with comms
|
// Overlap with comms
|
||||||
/////////////////////////////
|
/////////////////////////////
|
||||||
DhopFaceTime-=usecond();
|
{
|
||||||
st.CommsMergeSHM(compressor);// Could do this inside parallel region overlapped with comms
|
GRID_TRACE("MergeSHM");
|
||||||
DhopFaceTime+=usecond();
|
st.CommsMergeSHM(compressor);// Could do this inside parallel region overlapped with comms
|
||||||
|
}
|
||||||
|
|
||||||
/////////////////////////////
|
/////////////////////////////
|
||||||
// do the compute interior
|
// do the compute interior
|
||||||
/////////////////////////////
|
/////////////////////////////
|
||||||
int Opt = WilsonKernelsStatic::Opt; // Why pass this. Kernels should know
|
int Opt = WilsonKernelsStatic::Opt; // Why pass this. Kernels should know
|
||||||
DhopComputeTime-=usecond();
|
|
||||||
if (dag == DaggerYes) {
|
if (dag == DaggerYes) {
|
||||||
|
GRID_TRACE("DhopDagInterior");
|
||||||
Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out,1,0);
|
Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out,1,0);
|
||||||
} else {
|
} else {
|
||||||
|
GRID_TRACE("DhopInterior");
|
||||||
Kernels::DhopKernel (Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out,1,0);
|
Kernels::DhopKernel (Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out,1,0);
|
||||||
}
|
}
|
||||||
DhopComputeTime+=usecond();
|
|
||||||
|
|
||||||
/////////////////////////////
|
/////////////////////////////
|
||||||
// Complete comms
|
// Complete comms
|
||||||
/////////////////////////////
|
/////////////////////////////
|
||||||
st.CommunicateComplete(requests);
|
st.CommunicateComplete(requests);
|
||||||
DhopCommTime +=usecond();
|
traceStop(id);
|
||||||
|
|
||||||
/////////////////////////////
|
/////////////////////////////
|
||||||
// do the compute exterior
|
// do the compute exterior
|
||||||
/////////////////////////////
|
/////////////////////////////
|
||||||
DhopFaceTime-=usecond();
|
{
|
||||||
st.CommsMerge(compressor);
|
GRID_TRACE("Merge");
|
||||||
DhopFaceTime+=usecond();
|
st.CommsMerge(compressor);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
DhopComputeTime2-=usecond();
|
|
||||||
if (dag == DaggerYes) {
|
if (dag == DaggerYes) {
|
||||||
|
GRID_TRACE("DhopDagExterior");
|
||||||
Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out,0,1);
|
Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out,0,1);
|
||||||
} else {
|
} else {
|
||||||
|
GRID_TRACE("DhopExterior");
|
||||||
Kernels::DhopKernel (Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out,0,1);
|
Kernels::DhopKernel (Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out,0,1);
|
||||||
}
|
}
|
||||||
DhopComputeTime2+=usecond();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -460,29 +371,30 @@ void WilsonFermion5D<Impl>::DhopInternalSerialComms(StencilImpl & st, LebesgueOr
|
|||||||
const FermionField &in,
|
const FermionField &in,
|
||||||
FermionField &out,int dag)
|
FermionField &out,int dag)
|
||||||
{
|
{
|
||||||
|
GRID_TRACE("DhopInternalSerialComms");
|
||||||
Compressor compressor(dag);
|
Compressor compressor(dag);
|
||||||
|
|
||||||
int LLs = in.Grid()->_rdimensions[0];
|
int LLs = in.Grid()->_rdimensions[0];
|
||||||
|
|
||||||
DhopCommTime-=usecond();
|
{
|
||||||
st.HaloExchangeOpt(in,compressor);
|
GRID_TRACE("HaloExchange");
|
||||||
DhopCommTime+=usecond();
|
st.HaloExchangeOpt(in,compressor);
|
||||||
|
}
|
||||||
|
|
||||||
DhopComputeTime-=usecond();
|
|
||||||
int Opt = WilsonKernelsStatic::Opt;
|
int Opt = WilsonKernelsStatic::Opt;
|
||||||
if (dag == DaggerYes) {
|
if (dag == DaggerYes) {
|
||||||
|
GRID_TRACE("DhopDag");
|
||||||
Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out);
|
Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out);
|
||||||
} else {
|
} else {
|
||||||
|
GRID_TRACE("Dhop");
|
||||||
Kernels::DhopKernel(Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out);
|
Kernels::DhopKernel(Opt,st,U,st.CommBuf(),LLs,U.oSites(),in,out);
|
||||||
}
|
}
|
||||||
DhopComputeTime+=usecond();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template<class Impl>
|
template<class Impl>
|
||||||
void WilsonFermion5D<Impl>::DhopOE(const FermionField &in, FermionField &out,int dag)
|
void WilsonFermion5D<Impl>::DhopOE(const FermionField &in, FermionField &out,int dag)
|
||||||
{
|
{
|
||||||
DhopCalls++;
|
|
||||||
conformable(in.Grid(),FermionRedBlackGrid()); // verifies half grid
|
conformable(in.Grid(),FermionRedBlackGrid()); // verifies half grid
|
||||||
conformable(in.Grid(),out.Grid()); // drops the cb check
|
conformable(in.Grid(),out.Grid()); // drops the cb check
|
||||||
|
|
||||||
@ -494,7 +406,6 @@ void WilsonFermion5D<Impl>::DhopOE(const FermionField &in, FermionField &out,int
|
|||||||
template<class Impl>
|
template<class Impl>
|
||||||
void WilsonFermion5D<Impl>::DhopEO(const FermionField &in, FermionField &out,int dag)
|
void WilsonFermion5D<Impl>::DhopEO(const FermionField &in, FermionField &out,int dag)
|
||||||
{
|
{
|
||||||
DhopCalls++;
|
|
||||||
conformable(in.Grid(),FermionRedBlackGrid()); // verifies half grid
|
conformable(in.Grid(),FermionRedBlackGrid()); // verifies half grid
|
||||||
conformable(in.Grid(),out.Grid()); // drops the cb check
|
conformable(in.Grid(),out.Grid()); // drops the cb check
|
||||||
|
|
||||||
@ -506,7 +417,6 @@ void WilsonFermion5D<Impl>::DhopEO(const FermionField &in, FermionField &out,int
|
|||||||
template<class Impl>
|
template<class Impl>
|
||||||
void WilsonFermion5D<Impl>::Dhop(const FermionField &in, FermionField &out,int dag)
|
void WilsonFermion5D<Impl>::Dhop(const FermionField &in, FermionField &out,int dag)
|
||||||
{
|
{
|
||||||
DhopCalls+=2;
|
|
||||||
conformable(in.Grid(),FermionGrid()); // verifies full grid
|
conformable(in.Grid(),FermionGrid()); // verifies full grid
|
||||||
conformable(in.Grid(),out.Grid());
|
conformable(in.Grid(),out.Grid());
|
||||||
|
|
||||||
@ -561,12 +471,17 @@ void WilsonFermion5D<Impl>::MomentumSpacePropagatorHt_5d(FermionField &out,const
|
|||||||
LatComplex sk(_grid); sk = Zero();
|
LatComplex sk(_grid); sk = Zero();
|
||||||
LatComplex sk2(_grid); sk2= Zero();
|
LatComplex sk2(_grid); sk2= Zero();
|
||||||
LatComplex W(_grid); W= Zero();
|
LatComplex W(_grid); W= Zero();
|
||||||
LatComplex a(_grid); a= Zero();
|
|
||||||
LatComplex one (_grid); one = ScalComplex(1.0,0.0);
|
LatComplex one (_grid); one = ScalComplex(1.0,0.0);
|
||||||
LatComplex cosha(_grid);
|
LatComplex cosha(_grid);
|
||||||
LatComplex kmu(_grid);
|
LatComplex kmu(_grid);
|
||||||
LatComplex Wea(_grid);
|
LatComplex Wea(_grid);
|
||||||
LatComplex Wema(_grid);
|
LatComplex Wema(_grid);
|
||||||
|
LatComplex ea(_grid);
|
||||||
|
LatComplex ema(_grid);
|
||||||
|
LatComplex eaLs(_grid);
|
||||||
|
LatComplex emaLs(_grid);
|
||||||
|
LatComplex ea2Ls(_grid);
|
||||||
|
LatComplex ema2Ls(_grid);
|
||||||
LatComplex sinha(_grid);
|
LatComplex sinha(_grid);
|
||||||
LatComplex sinhaLs(_grid);
|
LatComplex sinhaLs(_grid);
|
||||||
LatComplex coshaLs(_grid);
|
LatComplex coshaLs(_grid);
|
||||||
@ -601,39 +516,29 @@ void WilsonFermion5D<Impl>::MomentumSpacePropagatorHt_5d(FermionField &out,const
|
|||||||
////////////////////////////////////////////
|
////////////////////////////////////////////
|
||||||
cosha = (one + W*W + sk) / (abs(W)*2.0);
|
cosha = (one + W*W + sk) / (abs(W)*2.0);
|
||||||
|
|
||||||
// FIXME Need a Lattice acosh
|
ea = (cosha + sqrt(cosha*cosha-one));
|
||||||
|
ema= (cosha - sqrt(cosha*cosha-one));
|
||||||
|
eaLs = pow(ea,Ls);
|
||||||
|
emaLs= pow(ema,Ls);
|
||||||
|
ea2Ls = pow(ea,2.0*Ls);
|
||||||
|
ema2Ls= pow(ema,2.0*Ls);
|
||||||
|
Wea= abs(W) * ea;
|
||||||
|
Wema= abs(W) * ema;
|
||||||
|
// a=log(ea);
|
||||||
|
|
||||||
{
|
sinha = 0.5*(ea - ema);
|
||||||
autoView(cosha_v,cosha,CpuRead);
|
sinhaLs = 0.5*(eaLs-emaLs);
|
||||||
autoView(a_v,a,CpuWrite);
|
coshaLs = 0.5*(eaLs+emaLs);
|
||||||
for(int idx=0;idx<_grid->lSites();idx++){
|
|
||||||
Coordinate lcoor(Nd);
|
|
||||||
Tcomplex cc;
|
|
||||||
// RealD sgn;
|
|
||||||
_grid->LocalIndexToLocalCoor(idx,lcoor);
|
|
||||||
peekLocalSite(cc,cosha_v,lcoor);
|
|
||||||
assert((double)real(cc)>=1.0);
|
|
||||||
assert(fabs((double)imag(cc))<=1.0e-15);
|
|
||||||
cc = ScalComplex(::acosh(real(cc)),0.0);
|
|
||||||
pokeLocalSite(cc,a_v,lcoor);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Wea = ( exp( a) * abs(W) );
|
|
||||||
Wema= ( exp(-a) * abs(W) );
|
|
||||||
sinha = 0.5*(exp( a) - exp(-a));
|
|
||||||
sinhaLs = 0.5*(exp( a*Ls) - exp(-a*Ls));
|
|
||||||
coshaLs = 0.5*(exp( a*Ls) + exp(-a*Ls));
|
|
||||||
|
|
||||||
A = one / (abs(W) * sinha * 2.0) * one / (sinhaLs * 2.0);
|
A = one / (abs(W) * sinha * 2.0) * one / (sinhaLs * 2.0);
|
||||||
F = exp( a*Ls) * (one - Wea + (Wema - one) * mass*mass);
|
F = eaLs * (one - Wea + (Wema - one) * mass*mass);
|
||||||
F = F + exp(-a*Ls) * (Wema - one + (one - Wea) * mass*mass);
|
F = F + emaLs * (Wema - one + (one - Wea) * mass*mass);
|
||||||
F = F - abs(W) * sinha * 4.0 * mass;
|
F = F - abs(W) * sinha * 4.0 * mass;
|
||||||
|
|
||||||
Bpp = (A/F) * (exp(-a*Ls*2.0) - one) * (one - Wema) * (one - mass*mass * one);
|
Bpp = (A/F) * (ema2Ls - one) * (one - Wema) * (one - mass*mass * one);
|
||||||
Bmm = (A/F) * (one - exp(a*Ls*2.0)) * (one - Wea) * (one - mass*mass * one);
|
Bmm = (A/F) * (one - ea2Ls) * (one - Wea) * (one - mass*mass * one);
|
||||||
App = (A/F) * (exp(-a*Ls*2.0) - one) * exp(-a) * (exp(-a) - abs(W)) * (one - mass*mass * one);
|
App = (A/F) * (ema2Ls - one) * ema * (ema - abs(W)) * (one - mass*mass * one);
|
||||||
Amm = (A/F) * (one - exp(a*Ls*2.0)) * exp(a) * (exp(a) - abs(W)) * (one - mass*mass * one);
|
Amm = (A/F) * (one - ea2Ls) * ea * (ea - abs(W)) * (one - mass*mass * one);
|
||||||
ABpm = (A/F) * abs(W) * sinha * 2.0 * (one + mass * coshaLs * 2.0 + mass*mass * one);
|
ABpm = (A/F) * abs(W) * sinha * 2.0 * (one + mass * coshaLs * 2.0 + mass*mass * one);
|
||||||
|
|
||||||
//P+ source, P- source
|
//P+ source, P- source
|
||||||
@ -656,29 +561,29 @@ void WilsonFermion5D<Impl>::MomentumSpacePropagatorHt_5d(FermionField &out,const
|
|||||||
buf1_4d = Zero();
|
buf1_4d = Zero();
|
||||||
ExtractSlice(buf1_4d, PRsource, (tt-1), 0);
|
ExtractSlice(buf1_4d, PRsource, (tt-1), 0);
|
||||||
//G(s,t)
|
//G(s,t)
|
||||||
bufR_4d = bufR_4d + A * exp(a*Ls) * exp(-a*f) * signW * buf1_4d + A * exp(-a*Ls) * exp(a*f) * signW * buf1_4d;
|
bufR_4d = bufR_4d + A * eaLs * pow(ema,f) * signW * buf1_4d + A * emaLs * pow(ea,f) * signW * buf1_4d;
|
||||||
//A++*exp(a(s+t))
|
//A++*exp(a(s+t))
|
||||||
bufR_4d = bufR_4d + App * exp(a*ss) * exp(a*tt) * signW * buf1_4d ;
|
bufR_4d = bufR_4d + App * pow(ea,ss) * pow(ea,tt) * signW * buf1_4d ;
|
||||||
//A+-*exp(a(s-t))
|
//A+-*exp(a(s-t))
|
||||||
bufR_4d = bufR_4d + ABpm * exp(a*ss) * exp(-a*tt) * signW * buf1_4d ;
|
bufR_4d = bufR_4d + ABpm * pow(ea,ss) * pow(ema,tt) * signW * buf1_4d ;
|
||||||
//A-+*exp(a(-s+t))
|
//A-+*exp(a(-s+t))
|
||||||
bufR_4d = bufR_4d + ABpm * exp(-a*ss) * exp(a*tt) * signW * buf1_4d ;
|
bufR_4d = bufR_4d + ABpm * pow(ema,ss) * pow(ea,tt) * signW * buf1_4d ;
|
||||||
//A--*exp(a(-s-t))
|
//A--*exp(a(-s-t))
|
||||||
bufR_4d = bufR_4d + Amm * exp(-a*ss) * exp(-a*tt) * signW * buf1_4d ;
|
bufR_4d = bufR_4d + Amm * pow(ema,ss) * pow(ema,tt) * signW * buf1_4d ;
|
||||||
|
|
||||||
//GL
|
//GL
|
||||||
buf2_4d = Zero();
|
buf2_4d = Zero();
|
||||||
ExtractSlice(buf2_4d, PLsource, (tt-1), 0);
|
ExtractSlice(buf2_4d, PLsource, (tt-1), 0);
|
||||||
//G(s,t)
|
//G(s,t)
|
||||||
bufL_4d = bufL_4d + A * exp(a*Ls) * exp(-a*f) * signW * buf2_4d + A * exp(-a*Ls) * exp(a*f) * signW * buf2_4d;
|
bufL_4d = bufL_4d + A * eaLs * pow(ema,f) * signW * buf2_4d + A * emaLs * pow(ea,f) * signW * buf2_4d;
|
||||||
//B++*exp(a(s+t))
|
//B++*exp(a(s+t))
|
||||||
bufL_4d = bufL_4d + Bpp * exp(a*ss) * exp(a*tt) * signW * buf2_4d ;
|
bufL_4d = bufL_4d + Bpp * pow(ea,ss) * pow(ea,tt) * signW * buf2_4d ;
|
||||||
//B+-*exp(a(s-t))
|
//B+-*exp(a(s-t))
|
||||||
bufL_4d = bufL_4d + ABpm * exp(a*ss) * exp(-a*tt) * signW * buf2_4d ;
|
bufL_4d = bufL_4d + ABpm * pow(ea,ss) * pow(ema,tt) * signW * buf2_4d ;
|
||||||
//B-+*exp(a(-s+t))
|
//B-+*exp(a(-s+t))
|
||||||
bufL_4d = bufL_4d + ABpm * exp(-a*ss) * exp(a*tt) * signW * buf2_4d ;
|
bufL_4d = bufL_4d + ABpm * pow(ema,ss) * pow(ea,tt) * signW * buf2_4d ;
|
||||||
//B--*exp(a(-s-t))
|
//B--*exp(a(-s-t))
|
||||||
bufL_4d = bufL_4d + Bmm * exp(-a*ss) * exp(-a*tt) * signW * buf2_4d ;
|
bufL_4d = bufL_4d + Bmm * pow(ema,ss) * pow(ema,tt) * signW * buf2_4d ;
|
||||||
}
|
}
|
||||||
InsertSlice(bufR_4d, GR, (ss-1), 0);
|
InsertSlice(bufR_4d, GR, (ss-1), 0);
|
||||||
InsertSlice(bufL_4d, GL, (ss-1), 0);
|
InsertSlice(bufL_4d, GL, (ss-1), 0);
|
||||||
@ -797,28 +702,12 @@ void WilsonFermion5D<Impl>::MomentumSpacePropagatorHt(FermionField &out,const Fe
|
|||||||
W = one - M5 + sk2;
|
W = one - M5 + sk2;
|
||||||
|
|
||||||
////////////////////////////////////////////
|
////////////////////////////////////////////
|
||||||
// Cosh alpha -> alpha
|
// Cosh alpha -> exp(+/- alpha)
|
||||||
////////////////////////////////////////////
|
////////////////////////////////////////////
|
||||||
cosha = (one + W*W + sk) / (abs(W)*2.0);
|
cosha = (one + W*W + sk) / (abs(W)*2.0);
|
||||||
|
|
||||||
// FIXME Need a Lattice acosh
|
Wea = abs(W)*(cosha + sqrt(cosha*cosha-one));
|
||||||
{
|
Wema= abs(W)*(cosha - sqrt(cosha*cosha-one));
|
||||||
autoView(cosha_v,cosha,CpuRead);
|
|
||||||
autoView(a_v,a,CpuWrite);
|
|
||||||
for(int idx=0;idx<_grid->lSites();idx++){
|
|
||||||
Coordinate lcoor(Nd);
|
|
||||||
Tcomplex cc;
|
|
||||||
// RealD sgn;
|
|
||||||
_grid->LocalIndexToLocalCoor(idx,lcoor);
|
|
||||||
peekLocalSite(cc,cosha_v,lcoor);
|
|
||||||
assert((double)real(cc)>=1.0);
|
|
||||||
assert(fabs((double)imag(cc))<=1.0e-15);
|
|
||||||
cc = ScalComplex(::acosh(real(cc)),0.0);
|
|
||||||
pokeLocalSite(cc,a_v,lcoor);
|
|
||||||
}}
|
|
||||||
|
|
||||||
Wea = ( exp( a) * abs(W) );
|
|
||||||
Wema= ( exp(-a) * abs(W) );
|
|
||||||
|
|
||||||
num = num + ( one - Wema ) * mass * in;
|
num = num + ( one - Wema ) * mass * in;
|
||||||
denom= ( Wea - one ) + mass*mass * (one - Wema);
|
denom= ( Wea - one ) + mass*mass * (one - Wema);
|
||||||
|
@ -76,91 +76,6 @@ WilsonFermion<Impl>::WilsonFermion(GaugeField &_Umu, GridCartesian &Fgrid,
|
|||||||
StencilOdd.BuildSurfaceList(1,vol4);
|
StencilOdd.BuildSurfaceList(1,vol4);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class Impl>
|
|
||||||
void WilsonFermion<Impl>::Report(void)
|
|
||||||
{
|
|
||||||
RealD NP = _grid->_Nprocessors;
|
|
||||||
RealD NN = _grid->NodeCount();
|
|
||||||
RealD volume = 1;
|
|
||||||
Coordinate latt = _grid->GlobalDimensions();
|
|
||||||
for(int mu=0;mu<Nd;mu++) volume=volume*latt[mu];
|
|
||||||
|
|
||||||
if ( DhopCalls > 0 ) {
|
|
||||||
std::cout << GridLogMessage << "#### Dhop calls report " << std::endl;
|
|
||||||
std::cout << GridLogMessage << "WilsonFermion Number of DhopEO Calls : " << DhopCalls << std::endl;
|
|
||||||
std::cout << GridLogMessage << "WilsonFermion TotalTime /Calls : " << DhopTotalTime / DhopCalls << " us" << std::endl;
|
|
||||||
std::cout << GridLogMessage << "WilsonFermion CommTime /Calls : " << DhopCommTime / DhopCalls << " us" << std::endl;
|
|
||||||
std::cout << GridLogMessage << "WilsonFermion FaceTime /Calls : " << DhopFaceTime / DhopCalls << " us" << std::endl;
|
|
||||||
std::cout << GridLogMessage << "WilsonFermion ComputeTime1/Calls : " << DhopComputeTime / DhopCalls << " us" << std::endl;
|
|
||||||
std::cout << GridLogMessage << "WilsonFermion ComputeTime2/Calls : " << DhopComputeTime2/ DhopCalls << " us" << std::endl;
|
|
||||||
|
|
||||||
// Average the compute time
|
|
||||||
_grid->GlobalSum(DhopComputeTime);
|
|
||||||
DhopComputeTime/=NP;
|
|
||||||
RealD mflops = 1320*volume*DhopCalls/DhopComputeTime/2; // 2 for red black counting
|
|
||||||
std::cout << GridLogMessage << "Average mflops/s per call : " << mflops << std::endl;
|
|
||||||
std::cout << GridLogMessage << "Average mflops/s per call per rank : " << mflops/NP << std::endl;
|
|
||||||
std::cout << GridLogMessage << "Average mflops/s per call per node : " << mflops/NN << std::endl;
|
|
||||||
|
|
||||||
RealD Fullmflops = 1320*volume*DhopCalls/(DhopTotalTime)/2; // 2 for red black counting
|
|
||||||
std::cout << GridLogMessage << "Average mflops/s per call (full) : " << Fullmflops << std::endl;
|
|
||||||
std::cout << GridLogMessage << "Average mflops/s per call per rank (full): " << Fullmflops/NP << std::endl;
|
|
||||||
std::cout << GridLogMessage << "Average mflops/s per call per node (full): " << Fullmflops/NN << std::endl;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
if ( DerivCalls > 0 ) {
|
|
||||||
std::cout << GridLogMessage << "#### Deriv calls report "<< std::endl;
|
|
||||||
std::cout << GridLogMessage << "WilsonFermion Number of Deriv Calls : " <<DerivCalls <<std::endl;
|
|
||||||
std::cout << GridLogMessage << "WilsonFermion CommTime/Calls : " <<DerivCommTime/DerivCalls<<" us" <<std::endl;
|
|
||||||
std::cout << GridLogMessage << "WilsonFermion ComputeTime/Calls : " <<DerivComputeTime/DerivCalls<<" us" <<std::endl;
|
|
||||||
std::cout << GridLogMessage << "WilsonFermion Dhop ComputeTime/Calls : " <<DerivDhopComputeTime/DerivCalls<<" us" <<std::endl;
|
|
||||||
|
|
||||||
// how to count flops here?
|
|
||||||
RealD mflops = 144*volume*DerivCalls/DerivDhopComputeTime;
|
|
||||||
std::cout << GridLogMessage << "Average mflops/s per call ? : " << mflops << std::endl;
|
|
||||||
std::cout << GridLogMessage << "Average mflops/s per call per node ? : " << mflops/NP << std::endl;
|
|
||||||
|
|
||||||
// how to count flops here?
|
|
||||||
RealD Fullmflops = 144*volume*DerivCalls/(DerivDhopComputeTime+DerivCommTime)/2; // 2 for red black counting
|
|
||||||
std::cout << GridLogMessage << "Average mflops/s per call (full) ? : " << Fullmflops << std::endl;
|
|
||||||
std::cout << GridLogMessage << "Average mflops/s per call per node (full) ? : " << Fullmflops/NP << std::endl; }
|
|
||||||
|
|
||||||
if (DerivCalls > 0 || DhopCalls > 0){
|
|
||||||
std::cout << GridLogMessage << "WilsonFermion Stencil" <<std::endl; Stencil.Report();
|
|
||||||
std::cout << GridLogMessage << "WilsonFermion StencilEven"<<std::endl; StencilEven.Report();
|
|
||||||
std::cout << GridLogMessage << "WilsonFermion StencilOdd" <<std::endl; StencilOdd.Report();
|
|
||||||
}
|
|
||||||
if ( DhopCalls > 0){
|
|
||||||
std::cout << GridLogMessage << "WilsonFermion Stencil Reporti()" <<std::endl; Stencil.Reporti(DhopCalls);
|
|
||||||
std::cout << GridLogMessage << "WilsonFermion StencilEven Reporti()"<<std::endl; StencilEven.Reporti(DhopCalls);
|
|
||||||
std::cout << GridLogMessage << "WilsonFermion StencilOdd Reporti()" <<std::endl; StencilOdd.Reporti(DhopCalls);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
template<class Impl>
|
|
||||||
void WilsonFermion<Impl>::ZeroCounters(void) {
|
|
||||||
DhopCalls = 0; // ok
|
|
||||||
DhopCommTime = 0;
|
|
||||||
DhopComputeTime = 0;
|
|
||||||
DhopComputeTime2= 0;
|
|
||||||
DhopFaceTime = 0;
|
|
||||||
DhopTotalTime = 0;
|
|
||||||
|
|
||||||
DerivCalls = 0; // ok
|
|
||||||
DerivCommTime = 0;
|
|
||||||
DerivComputeTime = 0;
|
|
||||||
DerivDhopComputeTime = 0;
|
|
||||||
|
|
||||||
Stencil.ZeroCounters();
|
|
||||||
StencilEven.ZeroCounters();
|
|
||||||
StencilOdd.ZeroCounters();
|
|
||||||
Stencil.ZeroCountersi();
|
|
||||||
StencilEven.ZeroCountersi();
|
|
||||||
StencilOdd.ZeroCountersi();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
void WilsonFermion<Impl>::ImportGauge(const GaugeField &_Umu)
|
void WilsonFermion<Impl>::ImportGauge(const GaugeField &_Umu)
|
||||||
{
|
{
|
||||||
@ -320,7 +235,6 @@ template <class Impl>
|
|||||||
void WilsonFermion<Impl>::DerivInternal(StencilImpl &st, DoubledGaugeField &U,
|
void WilsonFermion<Impl>::DerivInternal(StencilImpl &st, DoubledGaugeField &U,
|
||||||
GaugeField &mat, const FermionField &A,
|
GaugeField &mat, const FermionField &A,
|
||||||
const FermionField &B, int dag) {
|
const FermionField &B, int dag) {
|
||||||
DerivCalls++;
|
|
||||||
assert((dag == DaggerNo) || (dag == DaggerYes));
|
assert((dag == DaggerNo) || (dag == DaggerYes));
|
||||||
|
|
||||||
Compressor compressor(dag);
|
Compressor compressor(dag);
|
||||||
@ -329,11 +243,8 @@ void WilsonFermion<Impl>::DerivInternal(StencilImpl &st, DoubledGaugeField &U,
|
|||||||
FermionField Atilde(B.Grid());
|
FermionField Atilde(B.Grid());
|
||||||
Atilde = A;
|
Atilde = A;
|
||||||
|
|
||||||
DerivCommTime-=usecond();
|
|
||||||
st.HaloExchange(B, compressor);
|
st.HaloExchange(B, compressor);
|
||||||
DerivCommTime+=usecond();
|
|
||||||
|
|
||||||
DerivComputeTime-=usecond();
|
|
||||||
for (int mu = 0; mu < Nd; mu++) {
|
for (int mu = 0; mu < Nd; mu++) {
|
||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
// Flip gamma (1+g)<->(1-g) if dag
|
// Flip gamma (1+g)<->(1-g) if dag
|
||||||
@ -341,7 +252,6 @@ void WilsonFermion<Impl>::DerivInternal(StencilImpl &st, DoubledGaugeField &U,
|
|||||||
int gamma = mu;
|
int gamma = mu;
|
||||||
if (!dag) gamma += Nd;
|
if (!dag) gamma += Nd;
|
||||||
|
|
||||||
DerivDhopComputeTime -= usecond();
|
|
||||||
int Ls=1;
|
int Ls=1;
|
||||||
Kernels::DhopDirKernel(st, U, st.CommBuf(), Ls, B.Grid()->oSites(), B, Btilde, mu, gamma);
|
Kernels::DhopDirKernel(st, U, st.CommBuf(), Ls, B.Grid()->oSites(), B, Btilde, mu, gamma);
|
||||||
|
|
||||||
@ -349,9 +259,7 @@ void WilsonFermion<Impl>::DerivInternal(StencilImpl &st, DoubledGaugeField &U,
|
|||||||
// spin trace outer product
|
// spin trace outer product
|
||||||
//////////////////////////////////////////////////
|
//////////////////////////////////////////////////
|
||||||
Impl::InsertForce4D(mat, Btilde, Atilde, mu);
|
Impl::InsertForce4D(mat, Btilde, Atilde, mu);
|
||||||
DerivDhopComputeTime += usecond();
|
|
||||||
}
|
}
|
||||||
DerivComputeTime += usecond();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
@ -398,7 +306,6 @@ void WilsonFermion<Impl>::DhopDerivEO(GaugeField &mat, const FermionField &U, co
|
|||||||
template <class Impl>
|
template <class Impl>
|
||||||
void WilsonFermion<Impl>::Dhop(const FermionField &in, FermionField &out, int dag)
|
void WilsonFermion<Impl>::Dhop(const FermionField &in, FermionField &out, int dag)
|
||||||
{
|
{
|
||||||
DhopCalls+=2;
|
|
||||||
conformable(in.Grid(), _grid); // verifies full grid
|
conformable(in.Grid(), _grid); // verifies full grid
|
||||||
conformable(in.Grid(), out.Grid());
|
conformable(in.Grid(), out.Grid());
|
||||||
|
|
||||||
@ -410,7 +317,6 @@ void WilsonFermion<Impl>::Dhop(const FermionField &in, FermionField &out, int da
|
|||||||
template <class Impl>
|
template <class Impl>
|
||||||
void WilsonFermion<Impl>::DhopOE(const FermionField &in, FermionField &out, int dag)
|
void WilsonFermion<Impl>::DhopOE(const FermionField &in, FermionField &out, int dag)
|
||||||
{
|
{
|
||||||
DhopCalls++;
|
|
||||||
conformable(in.Grid(), _cbgrid); // verifies half grid
|
conformable(in.Grid(), _cbgrid); // verifies half grid
|
||||||
conformable(in.Grid(), out.Grid()); // drops the cb check
|
conformable(in.Grid(), out.Grid()); // drops the cb check
|
||||||
|
|
||||||
@ -423,7 +329,6 @@ void WilsonFermion<Impl>::DhopOE(const FermionField &in, FermionField &out, int
|
|||||||
template <class Impl>
|
template <class Impl>
|
||||||
void WilsonFermion<Impl>::DhopEO(const FermionField &in, FermionField &out,int dag)
|
void WilsonFermion<Impl>::DhopEO(const FermionField &in, FermionField &out,int dag)
|
||||||
{
|
{
|
||||||
DhopCalls++;
|
|
||||||
conformable(in.Grid(), _cbgrid); // verifies half grid
|
conformable(in.Grid(), _cbgrid); // verifies half grid
|
||||||
conformable(in.Grid(), out.Grid()); // drops the cb check
|
conformable(in.Grid(), out.Grid()); // drops the cb check
|
||||||
|
|
||||||
@ -488,14 +393,12 @@ void WilsonFermion<Impl>::DhopInternal(StencilImpl &st, LebesgueOrder &lo,
|
|||||||
const FermionField &in,
|
const FermionField &in,
|
||||||
FermionField &out, int dag)
|
FermionField &out, int dag)
|
||||||
{
|
{
|
||||||
DhopTotalTime-=usecond();
|
|
||||||
#ifdef GRID_OMP
|
#ifdef GRID_OMP
|
||||||
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute )
|
if ( WilsonKernelsStatic::Comms == WilsonKernelsStatic::CommsAndCompute )
|
||||||
DhopInternalOverlappedComms(st,lo,U,in,out,dag);
|
DhopInternalOverlappedComms(st,lo,U,in,out,dag);
|
||||||
else
|
else
|
||||||
#endif
|
#endif
|
||||||
DhopInternalSerial(st,lo,U,in,out,dag);
|
DhopInternalSerial(st,lo,U,in,out,dag);
|
||||||
DhopTotalTime+=usecond();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Impl>
|
template <class Impl>
|
||||||
@ -504,6 +407,7 @@ void WilsonFermion<Impl>::DhopInternalOverlappedComms(StencilImpl &st, LebesgueO
|
|||||||
const FermionField &in,
|
const FermionField &in,
|
||||||
FermionField &out, int dag)
|
FermionField &out, int dag)
|
||||||
{
|
{
|
||||||
|
GRID_TRACE("DhopOverlapped");
|
||||||
assert((dag == DaggerNo) || (dag == DaggerYes));
|
assert((dag == DaggerNo) || (dag == DaggerYes));
|
||||||
|
|
||||||
Compressor compressor(dag);
|
Compressor compressor(dag);
|
||||||
@ -514,53 +418,55 @@ void WilsonFermion<Impl>::DhopInternalOverlappedComms(StencilImpl &st, LebesgueO
|
|||||||
/////////////////////////////
|
/////////////////////////////
|
||||||
std::vector<std::vector<CommsRequest_t> > requests;
|
std::vector<std::vector<CommsRequest_t> > requests;
|
||||||
st.Prepare();
|
st.Prepare();
|
||||||
DhopFaceTime-=usecond();
|
{
|
||||||
st.HaloGather(in,compressor);
|
GRID_TRACE("Gather");
|
||||||
DhopFaceTime+=usecond();
|
st.HaloGather(in,compressor);
|
||||||
|
}
|
||||||
|
|
||||||
DhopCommTime -=usecond();
|
tracePush("Communication");
|
||||||
st.CommunicateBegin(requests);
|
st.CommunicateBegin(requests);
|
||||||
|
|
||||||
/////////////////////////////
|
/////////////////////////////
|
||||||
// Overlap with comms
|
// Overlap with comms
|
||||||
/////////////////////////////
|
/////////////////////////////
|
||||||
DhopFaceTime-=usecond();
|
{
|
||||||
st.CommsMergeSHM(compressor);
|
GRID_TRACE("MergeSHM");
|
||||||
DhopFaceTime+=usecond();
|
st.CommsMergeSHM(compressor);
|
||||||
|
}
|
||||||
|
|
||||||
/////////////////////////////
|
/////////////////////////////
|
||||||
// do the compute interior
|
// do the compute interior
|
||||||
/////////////////////////////
|
/////////////////////////////
|
||||||
int Opt = WilsonKernelsStatic::Opt;
|
int Opt = WilsonKernelsStatic::Opt;
|
||||||
DhopComputeTime-=usecond();
|
|
||||||
if (dag == DaggerYes) {
|
if (dag == DaggerYes) {
|
||||||
|
GRID_TRACE("DhopDagInterior");
|
||||||
Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out,1,0);
|
Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out,1,0);
|
||||||
} else {
|
} else {
|
||||||
|
GRID_TRACE("DhopInterior");
|
||||||
Kernels::DhopKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out,1,0);
|
Kernels::DhopKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out,1,0);
|
||||||
}
|
}
|
||||||
DhopComputeTime+=usecond();
|
|
||||||
|
|
||||||
/////////////////////////////
|
/////////////////////////////
|
||||||
// Complete comms
|
// Complete comms
|
||||||
/////////////////////////////
|
/////////////////////////////
|
||||||
st.CommunicateComplete(requests);
|
st.CommunicateComplete(requests);
|
||||||
DhopCommTime +=usecond();
|
tracePop("Communication");
|
||||||
|
|
||||||
DhopFaceTime-=usecond();
|
|
||||||
st.CommsMerge(compressor);
|
|
||||||
DhopFaceTime+=usecond();
|
|
||||||
|
|
||||||
|
{
|
||||||
|
GRID_TRACE("Merge");
|
||||||
|
st.CommsMerge(compressor);
|
||||||
|
}
|
||||||
/////////////////////////////
|
/////////////////////////////
|
||||||
// do the compute exterior
|
// do the compute exterior
|
||||||
/////////////////////////////
|
/////////////////////////////
|
||||||
|
|
||||||
DhopComputeTime2-=usecond();
|
|
||||||
if (dag == DaggerYes) {
|
if (dag == DaggerYes) {
|
||||||
|
GRID_TRACE("DhopDagExterior");
|
||||||
Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out,0,1);
|
Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out,0,1);
|
||||||
} else {
|
} else {
|
||||||
|
GRID_TRACE("DhopExterior");
|
||||||
Kernels::DhopKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out,0,1);
|
Kernels::DhopKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out,0,1);
|
||||||
}
|
}
|
||||||
DhopComputeTime2+=usecond();
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
@ -570,20 +476,22 @@ void WilsonFermion<Impl>::DhopInternalSerial(StencilImpl &st, LebesgueOrder &lo,
|
|||||||
const FermionField &in,
|
const FermionField &in,
|
||||||
FermionField &out, int dag)
|
FermionField &out, int dag)
|
||||||
{
|
{
|
||||||
|
GRID_TRACE("DhopSerial");
|
||||||
assert((dag == DaggerNo) || (dag == DaggerYes));
|
assert((dag == DaggerNo) || (dag == DaggerYes));
|
||||||
Compressor compressor(dag);
|
Compressor compressor(dag);
|
||||||
DhopCommTime-=usecond();
|
{
|
||||||
st.HaloExchange(in, compressor);
|
GRID_TRACE("HaloExchange");
|
||||||
DhopCommTime+=usecond();
|
st.HaloExchange(in, compressor);
|
||||||
|
}
|
||||||
|
|
||||||
DhopComputeTime-=usecond();
|
|
||||||
int Opt = WilsonKernelsStatic::Opt;
|
int Opt = WilsonKernelsStatic::Opt;
|
||||||
if (dag == DaggerYes) {
|
if (dag == DaggerYes) {
|
||||||
|
GRID_TRACE("DhopDag");
|
||||||
Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out);
|
Kernels::DhopDagKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out);
|
||||||
} else {
|
} else {
|
||||||
|
GRID_TRACE("Dhop");
|
||||||
Kernels::DhopKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out);
|
Kernels::DhopKernel(Opt,st,U,st.CommBuf(),1,U.oSites(),in,out);
|
||||||
}
|
}
|
||||||
DhopComputeTime+=usecond();
|
|
||||||
};
|
};
|
||||||
/*Change ends */
|
/*Change ends */
|
||||||
|
|
||||||
|
@ -72,20 +72,15 @@ accelerator_inline void get_stencil(StencilEntry * mem, StencilEntry &chip)
|
|||||||
if (SE->_is_local) { \
|
if (SE->_is_local) { \
|
||||||
int perm= SE->_permute; \
|
int perm= SE->_permute; \
|
||||||
auto tmp = coalescedReadPermute(in[SE->_offset],ptype,perm,lane); \
|
auto tmp = coalescedReadPermute(in[SE->_offset],ptype,perm,lane); \
|
||||||
spProj(chi,tmp); \
|
spProj(chi,tmp); \
|
||||||
} else if ( st.same_node[Dir] ) { \
|
Impl::multLink(Uchi, U[sU], chi, Dir, SE, st); \
|
||||||
chi = coalescedRead(buf[SE->_offset],lane); \
|
Recon(result, Uchi); \
|
||||||
} \
|
} \
|
||||||
acceleratorSynchronise(); \
|
|
||||||
if (SE->_is_local || st.same_node[Dir] ) { \
|
|
||||||
Impl::multLink(Uchi, U[sU], chi, Dir, SE, st); \
|
|
||||||
Recon(result, Uchi); \
|
|
||||||
} \
|
|
||||||
acceleratorSynchronise();
|
acceleratorSynchronise();
|
||||||
|
|
||||||
#define GENERIC_STENCIL_LEG_EXT(Dir,spProj,Recon) \
|
#define GENERIC_STENCIL_LEG_EXT(Dir,spProj,Recon) \
|
||||||
SE = st.GetEntry(ptype, Dir, sF); \
|
SE = st.GetEntry(ptype, Dir, sF); \
|
||||||
if ((!SE->_is_local) && (!st.same_node[Dir]) ) { \
|
if (!SE->_is_local ) { \
|
||||||
auto chi = coalescedRead(buf[SE->_offset],lane); \
|
auto chi = coalescedRead(buf[SE->_offset],lane); \
|
||||||
Impl::multLink(Uchi, U[sU], chi, Dir, SE, st); \
|
Impl::multLink(Uchi, U[sU], chi, Dir, SE, st); \
|
||||||
Recon(result, Uchi); \
|
Recon(result, Uchi); \
|
||||||
@ -416,19 +411,6 @@ void WilsonKernels<Impl>::DhopDirKernel( StencilImpl &st, DoubledGaugeField &U,S
|
|||||||
#undef LoopBody
|
#undef LoopBody
|
||||||
}
|
}
|
||||||
|
|
||||||
#define KERNEL_CALL_TMP(A) \
|
|
||||||
const uint64_t NN = Nsite*Ls; \
|
|
||||||
auto U_p = & U_v[0]; \
|
|
||||||
auto in_p = & in_v[0]; \
|
|
||||||
auto out_p = & out_v[0]; \
|
|
||||||
auto st_p = st_v._entries_p; \
|
|
||||||
auto st_perm = st_v._permute_type; \
|
|
||||||
accelerator_forNB( ss, NN, Simd::Nsimd(), { \
|
|
||||||
int sF = ss; \
|
|
||||||
int sU = ss/Ls; \
|
|
||||||
WilsonKernels<Impl>::A(st_perm,st_p,U_p,buf,sF,sU,in_p,out_p); \
|
|
||||||
}); \
|
|
||||||
accelerator_barrier();
|
|
||||||
|
|
||||||
#define KERNEL_CALLNB(A) \
|
#define KERNEL_CALLNB(A) \
|
||||||
const uint64_t NN = Nsite*Ls; \
|
const uint64_t NN = Nsite*Ls; \
|
||||||
@ -448,8 +430,7 @@ void WilsonKernels<Impl>::DhopDirKernel( StencilImpl &st, DoubledGaugeField &U,S
|
|||||||
int sF = ptr[ss]; \
|
int sF = ptr[ss]; \
|
||||||
int sU = ss/Ls; \
|
int sU = ss/Ls; \
|
||||||
WilsonKernels<Impl>::A(st_v,U_v,buf,sF,sU,in_v,out_v); \
|
WilsonKernels<Impl>::A(st_v,U_v,buf,sF,sU,in_v,out_v); \
|
||||||
}); \
|
});
|
||||||
accelerator_barrier();
|
|
||||||
|
|
||||||
#define ASM_CALL(A) \
|
#define ASM_CALL(A) \
|
||||||
thread_for( ss, Nsite, { \
|
thread_for( ss, Nsite, { \
|
||||||
@ -471,7 +452,7 @@ void WilsonKernels<Impl>::DhopKernel(int Opt,StencilImpl &st, DoubledGaugeField
|
|||||||
if( interior && exterior ) {
|
if( interior && exterior ) {
|
||||||
if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSite); return;}
|
if (Opt == WilsonKernelsStatic::OptGeneric ) { KERNEL_CALL(GenericDhopSite); return;}
|
||||||
#ifdef SYCL_HACK
|
#ifdef SYCL_HACK
|
||||||
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL_TMP(HandDhopSiteSycl); return; }
|
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSiteSycl); return; }
|
||||||
#else
|
#else
|
||||||
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSite); return;}
|
if (Opt == WilsonKernelsStatic::OptHandUnroll ) { KERNEL_CALL(HandDhopSite); return;}
|
||||||
#endif
|
#endif
|
||||||
|
@ -91,6 +91,19 @@ struct DDHMCFilter: public MomentumFilterBase<GaugeField>
|
|||||||
U_mu = where(mod(coor,B1)==Integer(B1-4),zzz_mu,U_mu);
|
U_mu = where(mod(coor,B1)==Integer(B1-4),zzz_mu,U_mu);
|
||||||
PokeIndex<LorentzIndex>(U, U_mu, mu);
|
PokeIndex<LorentzIndex>(U, U_mu, mu);
|
||||||
}
|
}
|
||||||
|
if ( Width==4) {
|
||||||
|
U = where(mod(coor,B1)==Integer(B1-4),zzz,U);
|
||||||
|
U = where(mod(coor,B1)==Integer(B1-3),zzz,U);
|
||||||
|
U = where(mod(coor,B1)==Integer(B1-2),zzz,U);
|
||||||
|
U = where(mod(coor,B1)==Integer(B1-1),zzz,U);
|
||||||
|
U = where(mod(coor,B1)==Integer(0) ,zzz,U);
|
||||||
|
U = where(mod(coor,B1)==Integer(1) ,zzz,U);
|
||||||
|
U = where(mod(coor,B1)==Integer(2) ,zzz,U);
|
||||||
|
U = where(mod(coor,B1)==Integer(3) ,zzz,U);
|
||||||
|
auto U_mu = PeekIndex<LorentzIndex>(U,mu);
|
||||||
|
U_mu = where(mod(coor,B1)==Integer(B1-5),zzz_mu,U_mu);
|
||||||
|
PokeIndex<LorentzIndex>(U, U_mu, mu);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -38,6 +38,7 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
template<typename MomentaField>
|
template<typename MomentaField>
|
||||||
struct MomentumFilterBase{
|
struct MomentumFilterBase{
|
||||||
virtual void applyFilter(MomentaField &P) const = 0;
|
virtual void applyFilter(MomentaField &P) const = 0;
|
||||||
|
virtual ~MomentumFilterBase(){};
|
||||||
};
|
};
|
||||||
|
|
||||||
//Do nothing
|
//Do nothing
|
||||||
@ -83,7 +84,6 @@ struct MomentumFilterApplyPhase: public MomentumFilterBase<MomentaField>{
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
@ -67,6 +67,36 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
virtual std::string action_name(){return "OneFlavourEvenOddRatioRationalPseudoFermionAction";}
|
virtual std::string action_name(){return "OneFlavourEvenOddRatioRationalPseudoFermionAction";}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template<class Impl,class ImplF>
|
||||||
|
class OneFlavourEvenOddRatioRationalMixedPrecPseudoFermionAction : public GeneralEvenOddRatioRationalMixedPrecPseudoFermionAction<Impl,ImplF> {
|
||||||
|
public:
|
||||||
|
typedef OneFlavourRationalParams Params;
|
||||||
|
private:
|
||||||
|
static RationalActionParams transcribe(const Params &in){
|
||||||
|
RationalActionParams out;
|
||||||
|
out.inv_pow = 2;
|
||||||
|
out.lo = in.lo;
|
||||||
|
out.hi = in.hi;
|
||||||
|
out.MaxIter = in.MaxIter;
|
||||||
|
out.action_tolerance = out.md_tolerance = in.tolerance;
|
||||||
|
out.action_degree = out.md_degree = in.degree;
|
||||||
|
out.precision = in.precision;
|
||||||
|
out.BoundsCheckFreq = in.BoundsCheckFreq;
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
public:
|
||||||
|
OneFlavourEvenOddRatioRationalMixedPrecPseudoFermionAction(FermionOperator<Impl> &_NumOp,
|
||||||
|
FermionOperator<Impl> &_DenOp,
|
||||||
|
FermionOperator<ImplF> &_NumOpF,
|
||||||
|
FermionOperator<ImplF> &_DenOpF,
|
||||||
|
const Params & p, Integer ReliableUpdateFreq
|
||||||
|
) :
|
||||||
|
GeneralEvenOddRatioRationalMixedPrecPseudoFermionAction<Impl,ImplF>(_NumOp, _DenOp,_NumOpF, _DenOpF, transcribe(p),ReliableUpdateFreq){}
|
||||||
|
|
||||||
|
virtual std::string action_name(){return "OneFlavourEvenOddRatioRationalPseudoFermionAction";}
|
||||||
|
};
|
||||||
|
|
||||||
NAMESPACE_END(Grid);
|
NAMESPACE_END(Grid);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -85,7 +85,12 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
PowerNegQuarter.Init(remez,param.tolerance,true);
|
PowerNegQuarter.Init(remez,param.tolerance,true);
|
||||||
};
|
};
|
||||||
|
|
||||||
virtual std::string action_name(){return "OneFlavourRatioRationalPseudoFermionAction";}
|
virtual std::string action_name(){
|
||||||
|
std::stringstream sstream;
|
||||||
|
sstream<<"OneFlavourRatioRationalPseudoFermionAction("
|
||||||
|
<<DenOp.Mass()<<") / det("<<NumOp.Mass()<<")";
|
||||||
|
return sstream.str();
|
||||||
|
}
|
||||||
|
|
||||||
virtual std::string LogParameters(){
|
virtual std::string LogParameters(){
|
||||||
std::stringstream sstream;
|
std::stringstream sstream;
|
||||||
|
@ -53,6 +53,7 @@ struct HMCparameters: Serializable {
|
|||||||
Integer, Trajectories, /* @brief Number of sweeps in this run */
|
Integer, Trajectories, /* @brief Number of sweeps in this run */
|
||||||
bool, MetropolisTest,
|
bool, MetropolisTest,
|
||||||
Integer, NoMetropolisUntil,
|
Integer, NoMetropolisUntil,
|
||||||
|
bool, PerformRandomShift, /* @brief Randomly shift the gauge configuration at the start of a trajectory */
|
||||||
std::string, StartingType,
|
std::string, StartingType,
|
||||||
IntegratorParameters, MD)
|
IntegratorParameters, MD)
|
||||||
|
|
||||||
@ -63,6 +64,7 @@ struct HMCparameters: Serializable {
|
|||||||
StartTrajectory = 0;
|
StartTrajectory = 0;
|
||||||
Trajectories = 10;
|
Trajectories = 10;
|
||||||
StartingType = "HotStart";
|
StartingType = "HotStart";
|
||||||
|
PerformRandomShift = true;
|
||||||
/////////////////////////////////
|
/////////////////////////////////
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -83,6 +85,7 @@ struct HMCparameters: Serializable {
|
|||||||
std::cout << GridLogMessage << "[HMC parameters] Start trajectory : " << StartTrajectory << "\n";
|
std::cout << GridLogMessage << "[HMC parameters] Start trajectory : " << StartTrajectory << "\n";
|
||||||
std::cout << GridLogMessage << "[HMC parameters] Metropolis test (on/off): " << std::boolalpha << MetropolisTest << "\n";
|
std::cout << GridLogMessage << "[HMC parameters] Metropolis test (on/off): " << std::boolalpha << MetropolisTest << "\n";
|
||||||
std::cout << GridLogMessage << "[HMC parameters] Thermalization trajs : " << NoMetropolisUntil << "\n";
|
std::cout << GridLogMessage << "[HMC parameters] Thermalization trajs : " << NoMetropolisUntil << "\n";
|
||||||
|
std::cout << GridLogMessage << "[HMC parameters] Doing random shift : " << std::boolalpha << PerformRandomShift << "\n";
|
||||||
std::cout << GridLogMessage << "[HMC parameters] Starting type : " << StartingType << "\n";
|
std::cout << GridLogMessage << "[HMC parameters] Starting type : " << StartingType << "\n";
|
||||||
MD.print_parameters();
|
MD.print_parameters();
|
||||||
}
|
}
|
||||||
@ -95,6 +98,7 @@ private:
|
|||||||
const HMCparameters Params;
|
const HMCparameters Params;
|
||||||
|
|
||||||
typedef typename IntegratorType::Field Field;
|
typedef typename IntegratorType::Field Field;
|
||||||
|
typedef typename IntegratorType::FieldImplementation FieldImplementation;
|
||||||
typedef std::vector< HmcObservable<Field> * > ObsListType;
|
typedef std::vector< HmcObservable<Field> * > ObsListType;
|
||||||
|
|
||||||
//pass these from the resource manager
|
//pass these from the resource manager
|
||||||
@ -138,26 +142,37 @@ private:
|
|||||||
|
|
||||||
GridBase *Grid = U.Grid();
|
GridBase *Grid = U.Grid();
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
if(Params.PerformRandomShift){
|
||||||
// Mainly for DDHMC perform a random translation of U modulo volume
|
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
// Mainly for DDHMC perform a random translation of U modulo volume
|
||||||
std::cout << GridLogMessage << "--------------------------------------------------\n";
|
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
std::cout << GridLogMessage << "Random shifting gauge field by [";
|
std::cout << GridLogMessage << "--------------------------------------------------\n";
|
||||||
for(int d=0;d<Grid->Nd();d++) {
|
std::cout << GridLogMessage << "Random shifting gauge field by [";
|
||||||
|
|
||||||
int L = Grid->GlobalDimensions()[d];
|
std::vector<typename FieldImplementation::GaugeLinkField> Umu(Grid->Nd(), U.Grid());
|
||||||
|
for(int mu=0;mu<Grid->Nd();mu++) Umu[mu] = PeekIndex<LorentzIndex>(U, mu);
|
||||||
|
|
||||||
RealD rn_uniform; random(sRNG, rn_uniform);
|
for(int d=0;d<Grid->Nd();d++) {
|
||||||
|
|
||||||
int shift = (int) (rn_uniform*L);
|
int L = Grid->GlobalDimensions()[d];
|
||||||
|
|
||||||
std::cout << shift;
|
RealD rn_uniform; random(sRNG, rn_uniform);
|
||||||
if(d<Grid->Nd()-1) std::cout <<",";
|
|
||||||
else std::cout <<"]\n";
|
|
||||||
|
|
||||||
U = Cshift(U,d,shift);
|
int shift = (int) (rn_uniform*L);
|
||||||
|
|
||||||
|
std::cout << shift;
|
||||||
|
if(d<Grid->Nd()-1) std::cout <<",";
|
||||||
|
else std::cout <<"]\n";
|
||||||
|
|
||||||
|
//shift all fields together in a way that respects the gauge BCs
|
||||||
|
for(int mu=0; mu < Grid->Nd(); mu++)
|
||||||
|
Umu[mu] = FieldImplementation::CshiftLink(Umu[mu],d,shift);
|
||||||
|
}
|
||||||
|
|
||||||
|
for(int mu=0;mu<Grid->Nd();mu++) PokeIndex<LorentzIndex>(U,Umu[mu],mu);
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << "--------------------------------------------------\n";
|
||||||
}
|
}
|
||||||
std::cout << GridLogMessage << "--------------------------------------------------\n";
|
|
||||||
|
|
||||||
TheIntegrator.reset_timer();
|
TheIntegrator.reset_timer();
|
||||||
|
|
||||||
|
@ -63,10 +63,10 @@ public:
|
|||||||
};
|
};
|
||||||
|
|
||||||
/*! @brief Class for Molecular Dynamics management */
|
/*! @brief Class for Molecular Dynamics management */
|
||||||
template <class FieldImplementation, class SmearingPolicy, class RepresentationPolicy>
|
template <class FieldImplementation_, class SmearingPolicy, class RepresentationPolicy>
|
||||||
class Integrator {
|
class Integrator {
|
||||||
protected:
|
protected:
|
||||||
|
typedef FieldImplementation_ FieldImplementation;
|
||||||
typedef typename FieldImplementation::Field MomentaField; //for readability
|
typedef typename FieldImplementation::Field MomentaField; //for readability
|
||||||
typedef typename FieldImplementation::Field Field;
|
typedef typename FieldImplementation::Field Field;
|
||||||
|
|
||||||
@ -143,9 +143,10 @@ protected:
|
|||||||
force = FieldImplementation::projectForce(force); // Ta for gauge fields
|
force = FieldImplementation::projectForce(force); // Ta for gauge fields
|
||||||
double end_force = usecond();
|
double end_force = usecond();
|
||||||
|
|
||||||
|
// DumpSliceNorm("force ",force,Nd-1);
|
||||||
MomFilter->applyFilter(force);
|
MomFilter->applyFilter(force);
|
||||||
std::cout << GridLogIntegrator << " update_P : Level [" << level <<"]["<<a <<"] "<<name<< std::endl;
|
std::cout << GridLogIntegrator << " update_P : Level [" << level <<"]["<<a <<"] "<<name<< std::endl;
|
||||||
DumpSliceNorm("force ",force,Nd-1);
|
DumpSliceNorm("force filtered ",force,Nd-1);
|
||||||
|
|
||||||
Real force_abs = std::sqrt(norm2(force)/U.Grid()->gSites()); //average per-site norm. nb. norm2(latt) = \sum_x norm2(latt[x])
|
Real force_abs = std::sqrt(norm2(force)/U.Grid()->gSites()); //average per-site norm. nb. norm2(latt) = \sum_x norm2(latt[x])
|
||||||
Real impulse_abs = force_abs * ep * HMC_MOMENTUM_DENOMINATOR;
|
Real impulse_abs = force_abs * ep * HMC_MOMENTUM_DENOMINATOR;
|
||||||
@ -153,7 +154,7 @@ protected:
|
|||||||
Real force_max = std::sqrt(maxLocalNorm2(force));
|
Real force_max = std::sqrt(maxLocalNorm2(force));
|
||||||
Real impulse_max = force_max * ep * HMC_MOMENTUM_DENOMINATOR;
|
Real impulse_max = force_max * ep * HMC_MOMENTUM_DENOMINATOR;
|
||||||
|
|
||||||
as[level].actions.at(a)->deriv_log(force_abs,force_max);
|
as[level].actions.at(a)->deriv_log(force_abs,force_max,impulse_abs,impulse_max);
|
||||||
|
|
||||||
std::cout << GridLogIntegrator<< "["<<level<<"]["<<a<<"] Force average: " << force_abs <<" "<<name<<std::endl;
|
std::cout << GridLogIntegrator<< "["<<level<<"]["<<a<<"] Force average: " << force_abs <<" "<<name<<std::endl;
|
||||||
std::cout << GridLogIntegrator<< "["<<level<<"]["<<a<<"] Force max : " << force_max <<" "<<name<<std::endl;
|
std::cout << GridLogIntegrator<< "["<<level<<"]["<<a<<"] Force max : " << force_max <<" "<<name<<std::endl;
|
||||||
@ -285,6 +286,8 @@ public:
|
|||||||
<<"["<<level<<"]["<< actionID<<"] : "
|
<<"["<<level<<"]["<< actionID<<"] : "
|
||||||
<<" force max " << as[level].actions.at(actionID)->deriv_max_average()
|
<<" force max " << as[level].actions.at(actionID)->deriv_max_average()
|
||||||
<<" norm " << as[level].actions.at(actionID)->deriv_norm_average()
|
<<" norm " << as[level].actions.at(actionID)->deriv_norm_average()
|
||||||
|
<<" Fdt max " << as[level].actions.at(actionID)->Fdt_max_average()
|
||||||
|
<<" Fdt norm " << as[level].actions.at(actionID)->Fdt_norm_average()
|
||||||
<<" calls " << as[level].actions.at(actionID)->deriv_num
|
<<" calls " << as[level].actions.at(actionID)->deriv_num
|
||||||
<< std::endl;
|
<< std::endl;
|
||||||
}
|
}
|
||||||
|
@ -92,10 +92,11 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
* P 1/2 P 1/2
|
* P 1/2 P 1/2
|
||||||
*/
|
*/
|
||||||
|
|
||||||
template <class FieldImplementation, class SmearingPolicy, class RepresentationPolicy = Representations<FundamentalRepresentation> >
|
template <class FieldImplementation_, class SmearingPolicy, class RepresentationPolicy = Representations<FundamentalRepresentation> >
|
||||||
class LeapFrog : public Integrator<FieldImplementation, SmearingPolicy, RepresentationPolicy>
|
class LeapFrog : public Integrator<FieldImplementation_, SmearingPolicy, RepresentationPolicy>
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
typedef FieldImplementation_ FieldImplementation;
|
||||||
typedef LeapFrog<FieldImplementation, SmearingPolicy, RepresentationPolicy> Algorithm;
|
typedef LeapFrog<FieldImplementation, SmearingPolicy, RepresentationPolicy> Algorithm;
|
||||||
INHERIT_FIELD_TYPES(FieldImplementation);
|
INHERIT_FIELD_TYPES(FieldImplementation);
|
||||||
|
|
||||||
@ -135,13 +136,14 @@ public:
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <class FieldImplementation, class SmearingPolicy, class RepresentationPolicy = Representations<FundamentalRepresentation> >
|
template <class FieldImplementation_, class SmearingPolicy, class RepresentationPolicy = Representations<FundamentalRepresentation> >
|
||||||
class MinimumNorm2 : public Integrator<FieldImplementation, SmearingPolicy, RepresentationPolicy>
|
class MinimumNorm2 : public Integrator<FieldImplementation_, SmearingPolicy, RepresentationPolicy>
|
||||||
{
|
{
|
||||||
private:
|
private:
|
||||||
const RealD lambda = 0.1931833275037836;
|
const RealD lambda = 0.1931833275037836;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
typedef FieldImplementation_ FieldImplementation;
|
||||||
INHERIT_FIELD_TYPES(FieldImplementation);
|
INHERIT_FIELD_TYPES(FieldImplementation);
|
||||||
|
|
||||||
MinimumNorm2(GridBase* grid, IntegratorParameters Par, ActionSet<Field, RepresentationPolicy>& Aset, SmearingPolicy& Sm)
|
MinimumNorm2(GridBase* grid, IntegratorParameters Par, ActionSet<Field, RepresentationPolicy>& Aset, SmearingPolicy& Sm)
|
||||||
@ -192,8 +194,8 @@ public:
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <class FieldImplementation, class SmearingPolicy, class RepresentationPolicy = Representations<FundamentalRepresentation> >
|
template <class FieldImplementation_, class SmearingPolicy, class RepresentationPolicy = Representations<FundamentalRepresentation> >
|
||||||
class ForceGradient : public Integrator<FieldImplementation, SmearingPolicy, RepresentationPolicy>
|
class ForceGradient : public Integrator<FieldImplementation_, SmearingPolicy, RepresentationPolicy>
|
||||||
{
|
{
|
||||||
private:
|
private:
|
||||||
const RealD lambda = 1.0 / 6.0;
|
const RealD lambda = 1.0 / 6.0;
|
||||||
@ -202,6 +204,7 @@ private:
|
|||||||
const RealD theta = 0.0;
|
const RealD theta = 0.0;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
typedef FieldImplementation_ FieldImplementation;
|
||||||
INHERIT_FIELD_TYPES(FieldImplementation);
|
INHERIT_FIELD_TYPES(FieldImplementation);
|
||||||
|
|
||||||
// Looks like dH scales as dt^4. tested wilson/wilson 2 level.
|
// Looks like dH scales as dt^4. tested wilson/wilson 2 level.
|
||||||
|
@ -31,15 +31,16 @@ directory
|
|||||||
|
|
||||||
NAMESPACE_BEGIN(Grid);
|
NAMESPACE_BEGIN(Grid);
|
||||||
|
|
||||||
|
|
||||||
struct TopologySmearingParameters : Serializable {
|
struct TopologySmearingParameters : Serializable {
|
||||||
GRID_SERIALIZABLE_CLASS_MEMBERS(TopologySmearingParameters,
|
GRID_SERIALIZABLE_CLASS_MEMBERS(TopologySmearingParameters,
|
||||||
int, steps,
|
|
||||||
float, step_size,
|
|
||||||
int, meas_interval,
|
int, meas_interval,
|
||||||
float, maxTau);
|
float, init_step_size,
|
||||||
|
float, maxTau,
|
||||||
|
float, tolerance);
|
||||||
|
|
||||||
TopologySmearingParameters(int s = 0, float ss = 0.0f, int mi = 0, float mT = 0.0f):
|
TopologySmearingParameters(float ss = 0.0f, int mi = 0, float mT = 0.0f, float tol = 1e-4):
|
||||||
steps(s), step_size(ss), meas_interval(mi), maxTau(mT){}
|
init_step_size(ss), meas_interval(mi), maxTau(mT), tolerance(tol){}
|
||||||
|
|
||||||
template < class ReaderClass >
|
template < class ReaderClass >
|
||||||
TopologySmearingParameters(Reader<ReaderClass>& Reader){
|
TopologySmearingParameters(Reader<ReaderClass>& Reader){
|
||||||
@ -97,8 +98,8 @@ public:
|
|||||||
|
|
||||||
if (Pars.do_smearing){
|
if (Pars.do_smearing){
|
||||||
// using wilson flow by default here
|
// using wilson flow by default here
|
||||||
WilsonFlow<PeriodicGimplR> WF(Pars.Smearing.steps, Pars.Smearing.step_size, Pars.Smearing.meas_interval);
|
WilsonFlowAdaptive<PeriodicGimplR> WF(Pars.Smearing.init_step_size, Pars.Smearing.maxTau, Pars.Smearing.tolerance, Pars.Smearing.meas_interval);
|
||||||
WF.smear_adaptive(Usmear, U, Pars.Smearing.maxTau);
|
WF.smear(Usmear, U);
|
||||||
Real T0 = WF.energyDensityPlaquette(Pars.Smearing.maxTau, Usmear);
|
Real T0 = WF.energyDensityPlaquette(Pars.Smearing.maxTau, Usmear);
|
||||||
std::cout << GridLogMessage << std::setprecision(std::numeric_limits<Real>::digits10 + 1)
|
std::cout << GridLogMessage << std::setprecision(std::numeric_limits<Real>::digits10 + 1)
|
||||||
<< "T0 : [ " << traj << " ] "<< T0 << std::endl;
|
<< "T0 : [ " << traj << " ] "<< T0 << std::endl;
|
||||||
|
@ -33,27 +33,25 @@ directory
|
|||||||
NAMESPACE_BEGIN(Grid);
|
NAMESPACE_BEGIN(Grid);
|
||||||
|
|
||||||
template <class Gimpl>
|
template <class Gimpl>
|
||||||
class WilsonFlow: public Smear<Gimpl>{
|
class WilsonFlowBase: public Smear<Gimpl>{
|
||||||
public:
|
public:
|
||||||
//Store generic measurements to take during smearing process using std::function
|
//Store generic measurements to take during smearing process using std::function
|
||||||
typedef std::function<void(int, RealD, const typename Gimpl::GaugeField &)> FunctionType; //int: step, RealD: flow time, GaugeField : the gauge field
|
typedef std::function<void(int, RealD, const typename Gimpl::GaugeField &)> FunctionType; //int: step, RealD: flow time, GaugeField : the gauge field
|
||||||
|
|
||||||
private:
|
protected:
|
||||||
unsigned int Nstep;
|
|
||||||
RealD epsilon; //for regular smearing this is the time step, for adaptive it is the initial time step
|
|
||||||
|
|
||||||
std::vector< std::pair<int, FunctionType> > functions; //The int maps to the measurement frequency
|
std::vector< std::pair<int, FunctionType> > functions; //The int maps to the measurement frequency
|
||||||
|
|
||||||
mutable WilsonGaugeAction<Gimpl> SG;
|
mutable WilsonGaugeAction<Gimpl> SG;
|
||||||
|
|
||||||
//Evolve the gauge field by 1 step and update tau
|
|
||||||
void evolve_step(typename Gimpl::GaugeField &U, RealD &tau) const;
|
|
||||||
//Evolve the gauge field by 1 step and update tau and the current time step eps
|
|
||||||
void evolve_step_adaptive(typename Gimpl::GaugeField&U, RealD &tau, RealD &eps, RealD maxTau) const;
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
INHERIT_GIMPL_TYPES(Gimpl)
|
INHERIT_GIMPL_TYPES(Gimpl)
|
||||||
|
|
||||||
|
explicit WilsonFlowBase(unsigned int meas_interval =1):
|
||||||
|
SG(WilsonGaugeAction<Gimpl>(3.0)) {
|
||||||
|
// WilsonGaugeAction with beta 3.0
|
||||||
|
setDefaultMeasurements(meas_interval);
|
||||||
|
}
|
||||||
|
|
||||||
void resetActions(){ functions.clear(); }
|
void resetActions(){ functions.clear(); }
|
||||||
|
|
||||||
void addMeasurement(int meas_interval, FunctionType meas){ functions.push_back({meas_interval, meas}); }
|
void addMeasurement(int meas_interval, FunctionType meas){ functions.push_back({meas_interval, meas}); }
|
||||||
@ -64,34 +62,11 @@ public:
|
|||||||
//and output to stdout
|
//and output to stdout
|
||||||
void setDefaultMeasurements(int topq_meas_interval = 1);
|
void setDefaultMeasurements(int topq_meas_interval = 1);
|
||||||
|
|
||||||
explicit WilsonFlow(unsigned int Nstep, RealD epsilon, unsigned int interval = 1):
|
void derivative(GaugeField&, const GaugeField&, const GaugeField&) const override{
|
||||||
Nstep(Nstep),
|
|
||||||
epsilon(epsilon),
|
|
||||||
SG(WilsonGaugeAction<Gimpl>(3.0)) {
|
|
||||||
// WilsonGaugeAction with beta 3.0
|
|
||||||
assert(epsilon > 0.0);
|
|
||||||
LogMessage();
|
|
||||||
setDefaultMeasurements(interval);
|
|
||||||
}
|
|
||||||
|
|
||||||
void LogMessage() {
|
|
||||||
std::cout << GridLogMessage
|
|
||||||
<< "[WilsonFlow] Nstep : " << Nstep << std::endl;
|
|
||||||
std::cout << GridLogMessage
|
|
||||||
<< "[WilsonFlow] epsilon : " << epsilon << std::endl;
|
|
||||||
std::cout << GridLogMessage
|
|
||||||
<< "[WilsonFlow] full trajectory : " << Nstep * epsilon << std::endl;
|
|
||||||
}
|
|
||||||
|
|
||||||
virtual void smear(GaugeField&, const GaugeField&) const;
|
|
||||||
|
|
||||||
virtual void derivative(GaugeField&, const GaugeField&, const GaugeField&) const {
|
|
||||||
assert(0);
|
assert(0);
|
||||||
// undefined for WilsonFlow
|
// undefined for WilsonFlow
|
||||||
}
|
}
|
||||||
|
|
||||||
void smear_adaptive(GaugeField&, const GaugeField&, RealD maxTau) const;
|
|
||||||
|
|
||||||
//Compute t^2 <E(t)> for time t from the plaquette
|
//Compute t^2 <E(t)> for time t from the plaquette
|
||||||
static RealD energyDensityPlaquette(const RealD t, const GaugeField& U);
|
static RealD energyDensityPlaquette(const RealD t, const GaugeField& U);
|
||||||
|
|
||||||
@ -115,82 +90,63 @@ public:
|
|||||||
std::vector<RealD> flowMeasureEnergyDensityCloverleaf(const GaugeField& U, int measure_interval = 1);
|
std::vector<RealD> flowMeasureEnergyDensityCloverleaf(const GaugeField& U, int measure_interval = 1);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
//Basic iterative Wilson flow
|
||||||
|
template <class Gimpl>
|
||||||
|
class WilsonFlow: public WilsonFlowBase<Gimpl>{
|
||||||
|
private:
|
||||||
|
int Nstep; //number of steps
|
||||||
|
RealD epsilon; //step size
|
||||||
|
|
||||||
|
//Evolve the gauge field by 1 step of size eps and update tau
|
||||||
|
void evolve_step(typename Gimpl::GaugeField &U, RealD &tau) const;
|
||||||
|
|
||||||
|
public:
|
||||||
|
INHERIT_GIMPL_TYPES(Gimpl)
|
||||||
|
|
||||||
|
//Integrate the Wilson flow for Nstep steps of size epsilon
|
||||||
|
WilsonFlow(const RealD epsilon, const int Nstep, unsigned int meas_interval = 1): WilsonFlowBase<Gimpl>(meas_interval), Nstep(Nstep), epsilon(epsilon){}
|
||||||
|
|
||||||
|
void smear(GaugeField& out, const GaugeField& in) const override;
|
||||||
|
};
|
||||||
|
|
||||||
|
//Wilson flow with adaptive step size
|
||||||
|
template <class Gimpl>
|
||||||
|
class WilsonFlowAdaptive: public WilsonFlowBase<Gimpl>{
|
||||||
|
private:
|
||||||
|
RealD init_epsilon; //initial step size
|
||||||
|
RealD maxTau; //integrate to t=maxTau
|
||||||
|
RealD tolerance; //integration error tolerance
|
||||||
|
|
||||||
|
//Evolve the gauge field by 1 step and update tau and the current time step eps
|
||||||
|
//
|
||||||
|
//If the step size eps is too large that a significant integration error results,
|
||||||
|
//the gauge field (U) and tau will not be updated and the function will return 0; eps will be adjusted to a smaller
|
||||||
|
//value for the next iteration.
|
||||||
|
//
|
||||||
|
//For a successful integration step the function will return 1
|
||||||
|
int evolve_step_adaptive(typename Gimpl::GaugeField&U, RealD &tau, RealD &eps) const;
|
||||||
|
|
||||||
|
public:
|
||||||
|
INHERIT_GIMPL_TYPES(Gimpl)
|
||||||
|
|
||||||
|
WilsonFlowAdaptive(const RealD init_epsilon, const RealD maxTau, const RealD tolerance, unsigned int meas_interval = 1):
|
||||||
|
WilsonFlowBase<Gimpl>(meas_interval), init_epsilon(init_epsilon), maxTau(maxTau), tolerance(tolerance){}
|
||||||
|
|
||||||
|
void smear(GaugeField& out, const GaugeField& in) const override;
|
||||||
|
};
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
// Implementations
|
// Implementations
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
template <class Gimpl>
|
template <class Gimpl>
|
||||||
void WilsonFlow<Gimpl>::evolve_step(typename Gimpl::GaugeField &U, RealD &tau) const{
|
RealD WilsonFlowBase<Gimpl>::energyDensityPlaquette(const RealD t, const GaugeField& U){
|
||||||
GaugeField Z(U.Grid());
|
|
||||||
GaugeField tmp(U.Grid());
|
|
||||||
SG.deriv(U, Z);
|
|
||||||
Z *= 0.25; // Z0 = 1/4 * F(U)
|
|
||||||
Gimpl::update_field(Z, U, -2.0*epsilon); // U = W1 = exp(ep*Z0)*W0
|
|
||||||
|
|
||||||
Z *= -17.0/8.0;
|
|
||||||
SG.deriv(U, tmp); Z += tmp; // -17/32*Z0 +Z1
|
|
||||||
Z *= 8.0/9.0; // Z = -17/36*Z0 +8/9*Z1
|
|
||||||
Gimpl::update_field(Z, U, -2.0*epsilon); // U_= W2 = exp(ep*Z)*W1
|
|
||||||
|
|
||||||
Z *= -4.0/3.0;
|
|
||||||
SG.deriv(U, tmp); Z += tmp; // 4/3*(17/36*Z0 -8/9*Z1) +Z2
|
|
||||||
Z *= 3.0/4.0; // Z = 17/36*Z0 -8/9*Z1 +3/4*Z2
|
|
||||||
Gimpl::update_field(Z, U, -2.0*epsilon); // V(t+e) = exp(ep*Z)*W2
|
|
||||||
tau += epsilon;
|
|
||||||
}
|
|
||||||
|
|
||||||
template <class Gimpl>
|
|
||||||
void WilsonFlow<Gimpl>::evolve_step_adaptive(typename Gimpl::GaugeField &U, RealD &tau, RealD &eps, RealD maxTau) const{
|
|
||||||
if (maxTau - tau < eps){
|
|
||||||
eps = maxTau-tau;
|
|
||||||
}
|
|
||||||
//std::cout << GridLogMessage << "Integration epsilon : " << epsilon << std::endl;
|
|
||||||
GaugeField Z(U.Grid());
|
|
||||||
GaugeField Zprime(U.Grid());
|
|
||||||
GaugeField tmp(U.Grid()), Uprime(U.Grid());
|
|
||||||
Uprime = U;
|
|
||||||
SG.deriv(U, Z);
|
|
||||||
Zprime = -Z;
|
|
||||||
Z *= 0.25; // Z0 = 1/4 * F(U)
|
|
||||||
Gimpl::update_field(Z, U, -2.0*eps); // U = W1 = exp(ep*Z0)*W0
|
|
||||||
|
|
||||||
Z *= -17.0/8.0;
|
|
||||||
SG.deriv(U, tmp); Z += tmp; // -17/32*Z0 +Z1
|
|
||||||
Zprime += 2.0*tmp;
|
|
||||||
Z *= 8.0/9.0; // Z = -17/36*Z0 +8/9*Z1
|
|
||||||
Gimpl::update_field(Z, U, -2.0*eps); // U_= W2 = exp(ep*Z)*W1
|
|
||||||
|
|
||||||
|
|
||||||
Z *= -4.0/3.0;
|
|
||||||
SG.deriv(U, tmp); Z += tmp; // 4/3*(17/36*Z0 -8/9*Z1) +Z2
|
|
||||||
Z *= 3.0/4.0; // Z = 17/36*Z0 -8/9*Z1 +3/4*Z2
|
|
||||||
Gimpl::update_field(Z, U, -2.0*eps); // V(t+e) = exp(ep*Z)*W2
|
|
||||||
|
|
||||||
// Ramos
|
|
||||||
Gimpl::update_field(Zprime, Uprime, -2.0*eps); // V'(t+e) = exp(ep*Z')*W0
|
|
||||||
// Compute distance as norm^2 of the difference
|
|
||||||
GaugeField diffU = U - Uprime;
|
|
||||||
RealD diff = norm2(diffU);
|
|
||||||
// adjust integration step
|
|
||||||
|
|
||||||
tau += eps;
|
|
||||||
//std::cout << GridLogMessage << "Adjusting integration step with distance: " << diff << std::endl;
|
|
||||||
|
|
||||||
eps = eps*0.95*std::pow(1e-4/diff,1./3.);
|
|
||||||
//std::cout << GridLogMessage << "New epsilon : " << epsilon << std::endl;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
template <class Gimpl>
|
|
||||||
RealD WilsonFlow<Gimpl>::energyDensityPlaquette(const RealD t, const GaugeField& U){
|
|
||||||
static WilsonGaugeAction<Gimpl> SG(3.0);
|
static WilsonGaugeAction<Gimpl> SG(3.0);
|
||||||
return 2.0 * t * t * SG.S(U)/U.Grid()->gSites();
|
return 2.0 * t * t * SG.S(U)/U.Grid()->gSites();
|
||||||
}
|
}
|
||||||
|
|
||||||
//Compute t^2 <E(t)> for time from the 1x1 cloverleaf form
|
//Compute t^2 <E(t)> for time from the 1x1 cloverleaf form
|
||||||
template <class Gimpl>
|
template <class Gimpl>
|
||||||
RealD WilsonFlow<Gimpl>::energyDensityCloverleaf(const RealD t, const GaugeField& U){
|
RealD WilsonFlowBase<Gimpl>::energyDensityCloverleaf(const RealD t, const GaugeField& U){
|
||||||
typedef typename Gimpl::GaugeLinkField GaugeMat;
|
typedef typename Gimpl::GaugeLinkField GaugeMat;
|
||||||
typedef typename Gimpl::GaugeField GaugeLorentz;
|
typedef typename Gimpl::GaugeField GaugeLorentz;
|
||||||
|
|
||||||
@ -215,7 +171,7 @@ RealD WilsonFlow<Gimpl>::energyDensityCloverleaf(const RealD t, const GaugeField
|
|||||||
|
|
||||||
|
|
||||||
template <class Gimpl>
|
template <class Gimpl>
|
||||||
std::vector<RealD> WilsonFlow<Gimpl>::flowMeasureEnergyDensityPlaquette(GaugeField &V, const GaugeField& U, int measure_interval){
|
std::vector<RealD> WilsonFlowBase<Gimpl>::flowMeasureEnergyDensityPlaquette(GaugeField &V, const GaugeField& U, int measure_interval){
|
||||||
std::vector<RealD> out;
|
std::vector<RealD> out;
|
||||||
resetActions();
|
resetActions();
|
||||||
addMeasurement(measure_interval, [&out](int step, RealD t, const typename Gimpl::GaugeField &U){
|
addMeasurement(measure_interval, [&out](int step, RealD t, const typename Gimpl::GaugeField &U){
|
||||||
@ -227,13 +183,13 @@ std::vector<RealD> WilsonFlow<Gimpl>::flowMeasureEnergyDensityPlaquette(GaugeFie
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <class Gimpl>
|
template <class Gimpl>
|
||||||
std::vector<RealD> WilsonFlow<Gimpl>::flowMeasureEnergyDensityPlaquette(const GaugeField& U, int measure_interval){
|
std::vector<RealD> WilsonFlowBase<Gimpl>::flowMeasureEnergyDensityPlaquette(const GaugeField& U, int measure_interval){
|
||||||
GaugeField V(U);
|
GaugeField V(U);
|
||||||
return flowMeasureEnergyDensityPlaquette(V,U, measure_interval);
|
return flowMeasureEnergyDensityPlaquette(V,U, measure_interval);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Gimpl>
|
template <class Gimpl>
|
||||||
std::vector<RealD> WilsonFlow<Gimpl>::flowMeasureEnergyDensityCloverleaf(GaugeField &V, const GaugeField& U, int measure_interval){
|
std::vector<RealD> WilsonFlowBase<Gimpl>::flowMeasureEnergyDensityCloverleaf(GaugeField &V, const GaugeField& U, int measure_interval){
|
||||||
std::vector<RealD> out;
|
std::vector<RealD> out;
|
||||||
resetActions();
|
resetActions();
|
||||||
addMeasurement(measure_interval, [&out](int step, RealD t, const typename Gimpl::GaugeField &U){
|
addMeasurement(measure_interval, [&out](int step, RealD t, const typename Gimpl::GaugeField &U){
|
||||||
@ -245,16 +201,52 @@ std::vector<RealD> WilsonFlow<Gimpl>::flowMeasureEnergyDensityCloverleaf(GaugeFi
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <class Gimpl>
|
template <class Gimpl>
|
||||||
std::vector<RealD> WilsonFlow<Gimpl>::flowMeasureEnergyDensityCloverleaf(const GaugeField& U, int measure_interval){
|
std::vector<RealD> WilsonFlowBase<Gimpl>::flowMeasureEnergyDensityCloverleaf(const GaugeField& U, int measure_interval){
|
||||||
GaugeField V(U);
|
GaugeField V(U);
|
||||||
return flowMeasureEnergyDensityCloverleaf(V,U, measure_interval);
|
return flowMeasureEnergyDensityCloverleaf(V,U, measure_interval);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <class Gimpl>
|
||||||
|
void WilsonFlowBase<Gimpl>::setDefaultMeasurements(int topq_meas_interval){
|
||||||
|
addMeasurement(1, [](int step, RealD t, const typename Gimpl::GaugeField &U){
|
||||||
|
std::cout << GridLogMessage << "[WilsonFlow] Energy density (plaq) : " << step << " " << t << " " << energyDensityPlaquette(t,U) << std::endl;
|
||||||
|
});
|
||||||
|
addMeasurement(topq_meas_interval, [](int step, RealD t, const typename Gimpl::GaugeField &U){
|
||||||
|
std::cout << GridLogMessage << "[WilsonFlow] Top. charge : " << step << " " << WilsonLoops<Gimpl>::TopologicalCharge(U) << std::endl;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
//#define WF_TIMING
|
|
||||||
|
template <class Gimpl>
|
||||||
|
void WilsonFlow<Gimpl>::evolve_step(typename Gimpl::GaugeField &U, RealD &tau) const{
|
||||||
|
GaugeField Z(U.Grid());
|
||||||
|
GaugeField tmp(U.Grid());
|
||||||
|
this->SG.deriv(U, Z);
|
||||||
|
Z *= 0.25; // Z0 = 1/4 * F(U)
|
||||||
|
Gimpl::update_field(Z, U, -2.0*epsilon); // U = W1 = exp(ep*Z0)*W0
|
||||||
|
|
||||||
|
Z *= -17.0/8.0;
|
||||||
|
this->SG.deriv(U, tmp); Z += tmp; // -17/32*Z0 +Z1
|
||||||
|
Z *= 8.0/9.0; // Z = -17/36*Z0 +8/9*Z1
|
||||||
|
Gimpl::update_field(Z, U, -2.0*epsilon); // U_= W2 = exp(ep*Z)*W1
|
||||||
|
|
||||||
|
Z *= -4.0/3.0;
|
||||||
|
this->SG.deriv(U, tmp); Z += tmp; // 4/3*(17/36*Z0 -8/9*Z1) +Z2
|
||||||
|
Z *= 3.0/4.0; // Z = 17/36*Z0 -8/9*Z1 +3/4*Z2
|
||||||
|
Gimpl::update_field(Z, U, -2.0*epsilon); // V(t+e) = exp(ep*Z)*W2
|
||||||
|
tau += epsilon;
|
||||||
|
}
|
||||||
|
|
||||||
template <class Gimpl>
|
template <class Gimpl>
|
||||||
void WilsonFlow<Gimpl>::smear(GaugeField& out, const GaugeField& in) const{
|
void WilsonFlow<Gimpl>::smear(GaugeField& out, const GaugeField& in) const{
|
||||||
|
std::cout << GridLogMessage
|
||||||
|
<< "[WilsonFlow] Nstep : " << Nstep << std::endl;
|
||||||
|
std::cout << GridLogMessage
|
||||||
|
<< "[WilsonFlow] epsilon : " << epsilon << std::endl;
|
||||||
|
std::cout << GridLogMessage
|
||||||
|
<< "[WilsonFlow] full trajectory : " << Nstep * epsilon << std::endl;
|
||||||
|
|
||||||
out = in;
|
out = in;
|
||||||
RealD taus = 0.;
|
RealD taus = 0.;
|
||||||
for (unsigned int step = 1; step <= Nstep; step++) { //step indicates the number of smearing steps applied at the time of measurement
|
for (unsigned int step = 1; step <= Nstep; step++) { //step indicates the number of smearing steps applied at the time of measurement
|
||||||
@ -266,37 +258,93 @@ void WilsonFlow<Gimpl>::smear(GaugeField& out, const GaugeField& in) const{
|
|||||||
std::cout << "Time to evolve " << diff.count() << " s\n";
|
std::cout << "Time to evolve " << diff.count() << " s\n";
|
||||||
#endif
|
#endif
|
||||||
//Perform measurements
|
//Perform measurements
|
||||||
for(auto const &meas : functions)
|
for(auto const &meas : this->functions)
|
||||||
if( step % meas.first == 0 ) meas.second(step,taus,out);
|
if( step % meas.first == 0 ) meas.second(step,taus,out);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
template <class Gimpl>
|
template <class Gimpl>
|
||||||
void WilsonFlow<Gimpl>::smear_adaptive(GaugeField& out, const GaugeField& in, RealD maxTau) const{
|
int WilsonFlowAdaptive<Gimpl>::evolve_step_adaptive(typename Gimpl::GaugeField &U, RealD &tau, RealD &eps) const{
|
||||||
out = in;
|
if (maxTau - tau < eps){
|
||||||
RealD taus = 0.;
|
eps = maxTau-tau;
|
||||||
RealD eps = epsilon;
|
}
|
||||||
unsigned int step = 0;
|
//std::cout << GridLogMessage << "Integration epsilon : " << epsilon << std::endl;
|
||||||
do{
|
GaugeField Z(U.Grid());
|
||||||
step++;
|
GaugeField Zprime(U.Grid());
|
||||||
//std::cout << GridLogMessage << "Evolution time :"<< taus << std::endl;
|
GaugeField tmp(U.Grid()), Uprime(U.Grid()), Usave(U.Grid());
|
||||||
evolve_step_adaptive(out, taus, eps, maxTau);
|
Uprime = U;
|
||||||
//Perform measurements
|
Usave = U;
|
||||||
for(auto const &meas : functions)
|
|
||||||
if( step % meas.first == 0 ) meas.second(step,taus,out);
|
this->SG.deriv(U, Z);
|
||||||
} while (taus < maxTau);
|
Zprime = -Z;
|
||||||
|
Z *= 0.25; // Z0 = 1/4 * F(U)
|
||||||
|
Gimpl::update_field(Z, U, -2.0*eps); // U = W1 = exp(ep*Z0)*W0
|
||||||
|
|
||||||
|
Z *= -17.0/8.0;
|
||||||
|
this->SG.deriv(U, tmp); Z += tmp; // -17/32*Z0 +Z1
|
||||||
|
Zprime += 2.0*tmp;
|
||||||
|
Z *= 8.0/9.0; // Z = -17/36*Z0 +8/9*Z1
|
||||||
|
Gimpl::update_field(Z, U, -2.0*eps); // U_= W2 = exp(ep*Z)*W1
|
||||||
|
|
||||||
|
|
||||||
|
Z *= -4.0/3.0;
|
||||||
|
this->SG.deriv(U, tmp); Z += tmp; // 4/3*(17/36*Z0 -8/9*Z1) +Z2
|
||||||
|
Z *= 3.0/4.0; // Z = 17/36*Z0 -8/9*Z1 +3/4*Z2
|
||||||
|
Gimpl::update_field(Z, U, -2.0*eps); // V(t+e) = exp(ep*Z)*W2
|
||||||
|
|
||||||
|
// Ramos arXiv:1301.4388
|
||||||
|
Gimpl::update_field(Zprime, Uprime, -2.0*eps); // V'(t+e) = exp(ep*Z')*W0
|
||||||
|
|
||||||
|
// Compute distance using Ramos' definition
|
||||||
|
GaugeField diffU = U - Uprime;
|
||||||
|
RealD max_dist = 0;
|
||||||
|
|
||||||
|
for(int mu=0;mu<Nd;mu++){
|
||||||
|
typename Gimpl::GaugeLinkField diffU_mu = PeekIndex<LorentzIndex>(diffU, mu);
|
||||||
|
RealD dist_mu = sqrt( maxLocalNorm2(diffU_mu) ) /Nc/Nc; //maximize over sites
|
||||||
|
max_dist = std::max(max_dist, dist_mu); //maximize over mu
|
||||||
|
}
|
||||||
|
|
||||||
|
int ret;
|
||||||
|
if(max_dist < tolerance) {
|
||||||
|
tau += eps;
|
||||||
|
ret = 1;
|
||||||
|
} else {
|
||||||
|
U = Usave;
|
||||||
|
ret = 0;
|
||||||
|
}
|
||||||
|
eps = eps*0.95*std::pow(tolerance/max_dist,1./3.);
|
||||||
|
std::cout << GridLogMessage << "Adaptive smearing : Distance: "<< max_dist <<" Step successful: " << ret << " New epsilon: " << eps << std::endl;
|
||||||
|
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class Gimpl>
|
template <class Gimpl>
|
||||||
void WilsonFlow<Gimpl>::setDefaultMeasurements(int topq_meas_interval){
|
void WilsonFlowAdaptive<Gimpl>::smear(GaugeField& out, const GaugeField& in) const{
|
||||||
addMeasurement(1, [](int step, RealD t, const typename Gimpl::GaugeField &U){
|
std::cout << GridLogMessage
|
||||||
std::cout << GridLogMessage << "[WilsonFlow] Energy density (plaq) : " << step << " " << t << " " << energyDensityPlaquette(t,U) << std::endl;
|
<< "[WilsonFlow] initial epsilon : " << init_epsilon << std::endl;
|
||||||
});
|
std::cout << GridLogMessage
|
||||||
addMeasurement(topq_meas_interval, [](int step, RealD t, const typename Gimpl::GaugeField &U){
|
<< "[WilsonFlow] full trajectory : " << maxTau << std::endl;
|
||||||
std::cout << GridLogMessage << "[WilsonFlow] Top. charge : " << step << " " << WilsonLoops<Gimpl>::TopologicalCharge(U) << std::endl;
|
std::cout << GridLogMessage
|
||||||
});
|
<< "[WilsonFlow] tolerance : " << tolerance << std::endl;
|
||||||
|
out = in;
|
||||||
|
RealD taus = 0.;
|
||||||
|
RealD eps = init_epsilon;
|
||||||
|
unsigned int step = 0;
|
||||||
|
do{
|
||||||
|
int step_success = evolve_step_adaptive(out, taus, eps);
|
||||||
|
step += step_success; //step will not be incremented if the integration step fails
|
||||||
|
|
||||||
|
//Perform measurements
|
||||||
|
if(step_success)
|
||||||
|
for(auto const &meas : this->functions)
|
||||||
|
if( step % meas.first == 0 ) meas.second(step,taus,out);
|
||||||
|
} while (taus < maxTau);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
NAMESPACE_END(Grid);
|
NAMESPACE_END(Grid);
|
||||||
|
|
||||||
|
@ -227,26 +227,38 @@ namespace ConjugateBC {
|
|||||||
//shift = -1
|
//shift = -1
|
||||||
//Out(x) = U_\mu(x-mu) | x_\mu != 0
|
//Out(x) = U_\mu(x-mu) | x_\mu != 0
|
||||||
// = U*_\mu(L-1) | x_\mu == 0
|
// = U*_\mu(L-1) | x_\mu == 0
|
||||||
|
//shift = 2
|
||||||
|
//Out(x) = U_\mu(x+2\hat\mu) | x_\mu < L-2
|
||||||
|
// = U*_\mu(1) | x_\mu == L-1
|
||||||
|
// = U*_\mu(0) | x_\mu == L-2
|
||||||
|
//shift = -2
|
||||||
|
//Out(x) = U_\mu(x-2mu) | x_\mu > 1
|
||||||
|
// = U*_\mu(L-2) | x_\mu == 0
|
||||||
|
// = U*_\mu(L-1) | x_\mu == 1
|
||||||
|
//etc
|
||||||
template<class gauge> Lattice<gauge>
|
template<class gauge> Lattice<gauge>
|
||||||
CshiftLink(const Lattice<gauge> &Link, int mu, int shift)
|
CshiftLink(const Lattice<gauge> &Link, int mu, int shift)
|
||||||
{
|
{
|
||||||
GridBase *grid = Link.Grid();
|
GridBase *grid = Link.Grid();
|
||||||
int Lmu = grid->GlobalDimensions()[mu] - 1;
|
int Lmu = grid->GlobalDimensions()[mu];
|
||||||
|
assert(abs(shift) < Lmu && "Invalid shift value");
|
||||||
|
|
||||||
Lattice<iScalar<vInteger>> coor(grid);
|
Lattice<iScalar<vInteger>> coor(grid);
|
||||||
LatticeCoordinate(coor, mu);
|
LatticeCoordinate(coor, mu);
|
||||||
|
|
||||||
Lattice<gauge> tmp(grid);
|
Lattice<gauge> tmp(grid);
|
||||||
if(shift == 1){
|
if(shift > 0){
|
||||||
tmp = Cshift(Link, mu, 1);
|
tmp = Cshift(Link, mu, shift);
|
||||||
tmp = where(coor == Lmu, conjugate(tmp), tmp);
|
tmp = where(coor >= Lmu-shift, conjugate(tmp), tmp);
|
||||||
return tmp;
|
return tmp;
|
||||||
}else if(shift == -1){
|
}else if(shift < 0){
|
||||||
tmp = Link;
|
tmp = Link;
|
||||||
tmp = where(coor == Lmu, conjugate(tmp), tmp);
|
tmp = where(coor >= Lmu+shift, conjugate(tmp), tmp);
|
||||||
return Cshift(tmp, mu, -1);
|
return Cshift(tmp, mu, shift);
|
||||||
}else assert(0 && "Invalid shift value");
|
}
|
||||||
return tmp; //shuts up the compiler fussing about the return type
|
|
||||||
|
//shift == 0
|
||||||
|
return Link;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -72,12 +72,12 @@ public:
|
|||||||
|
|
||||||
//Fix the gauge field Umu
|
//Fix the gauge field Umu
|
||||||
//0 < alpha < 1 is related to the step size, cf https://arxiv.org/pdf/1405.5812.pdf
|
//0 < alpha < 1 is related to the step size, cf https://arxiv.org/pdf/1405.5812.pdf
|
||||||
static void SteepestDescentGaugeFix(GaugeLorentz &Umu,Real & alpha,int maxiter,Real Omega_tol, Real Phi_tol,bool Fourier=false,int orthog=-1,bool err_on_no_converge=true) {
|
static void SteepestDescentGaugeFix(GaugeLorentz &Umu,Real alpha,int maxiter,Real Omega_tol, Real Phi_tol,bool Fourier=false,int orthog=-1,bool err_on_no_converge=true) {
|
||||||
GridBase *grid = Umu.Grid();
|
GridBase *grid = Umu.Grid();
|
||||||
GaugeMat xform(grid);
|
GaugeMat xform(grid);
|
||||||
SteepestDescentGaugeFix(Umu,xform,alpha,maxiter,Omega_tol,Phi_tol,Fourier,orthog,err_on_no_converge);
|
SteepestDescentGaugeFix(Umu,xform,alpha,maxiter,Omega_tol,Phi_tol,Fourier,orthog,err_on_no_converge);
|
||||||
}
|
}
|
||||||
static void SteepestDescentGaugeFix(GaugeLorentz &Umu,GaugeMat &xform,Real & alpha,int maxiter,Real Omega_tol, Real Phi_tol,bool Fourier=false,int orthog=-1,bool err_on_no_converge=true) {
|
static void SteepestDescentGaugeFix(GaugeLorentz &Umu,GaugeMat &xform,Real alpha,int maxiter,Real Omega_tol, Real Phi_tol,bool Fourier=false,int orthog=-1,bool err_on_no_converge=true) {
|
||||||
//Fix the gauge field Umu and also return the gauge transformation from the original gauge field, xform
|
//Fix the gauge field Umu and also return the gauge transformation from the original gauge field, xform
|
||||||
|
|
||||||
GridBase *grid = Umu.Grid();
|
GridBase *grid = Umu.Grid();
|
||||||
|
@ -35,7 +35,7 @@ Author: neo <cossu@post.kek.jp>
|
|||||||
*/
|
*/
|
||||||
// Time-stamp: <2015-06-16 23:27:54 neo>
|
// Time-stamp: <2015-06-16 23:27:54 neo>
|
||||||
//----------------------------------------------------------------------
|
//----------------------------------------------------------------------
|
||||||
|
#include <immintrin.h>
|
||||||
#include <pmmintrin.h>
|
#include <pmmintrin.h>
|
||||||
|
|
||||||
NAMESPACE_BEGIN(Grid);
|
NAMESPACE_BEGIN(Grid);
|
||||||
|
@ -290,6 +290,8 @@ public:
|
|||||||
std::vector<Decompress> DecompressionsSHM;
|
std::vector<Decompress> DecompressionsSHM;
|
||||||
std::vector<CopyReceiveBuffer> CopyReceiveBuffers ;
|
std::vector<CopyReceiveBuffer> CopyReceiveBuffers ;
|
||||||
std::vector<CachedTransfer> CachedTransfers;
|
std::vector<CachedTransfer> CachedTransfers;
|
||||||
|
std::vector<CommsRequest_t> MpiReqs;
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////
|
||||||
// Unified Comms buffers for all directions
|
// Unified Comms buffers for all directions
|
||||||
///////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////
|
||||||
@ -357,9 +359,9 @@ public:
|
|||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
void CommunicateBegin(std::vector<std::vector<CommsRequest_t> > &reqs)
|
void CommunicateBegin(std::vector<std::vector<CommsRequest_t> > &reqs)
|
||||||
{
|
{
|
||||||
reqs.resize(Packets.size());
|
accelerator_barrier();
|
||||||
for(int i=0;i<Packets.size();i++){
|
for(int i=0;i<Packets.size();i++){
|
||||||
_grid->StencilSendToRecvFromBegin(reqs[i],
|
_grid->StencilSendToRecvFromBegin(MpiReqs,
|
||||||
Packets[i].send_buf,
|
Packets[i].send_buf,
|
||||||
Packets[i].to_rank,Packets[i].do_send,
|
Packets[i].to_rank,Packets[i].do_send,
|
||||||
Packets[i].recv_buf,
|
Packets[i].recv_buf,
|
||||||
@ -370,41 +372,19 @@ public:
|
|||||||
|
|
||||||
void CommunicateComplete(std::vector<std::vector<CommsRequest_t> > &reqs)
|
void CommunicateComplete(std::vector<std::vector<CommsRequest_t> > &reqs)
|
||||||
{
|
{
|
||||||
for(int i=0;i<Packets.size();i++){
|
_grid->StencilSendToRecvFromComplete(MpiReqs,0);
|
||||||
_grid->StencilSendToRecvFromComplete(reqs[i],i);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
// Blocking send and receive. Either sequential or parallel.
|
// Blocking send and receive. Either sequential or parallel.
|
||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
void Communicate(void)
|
void Communicate(void)
|
||||||
{
|
{
|
||||||
if ( CartesianCommunicator::CommunicatorPolicy == CartesianCommunicator::CommunicatorPolicySequential ){
|
/////////////////////////////////////////////////////////
|
||||||
/////////////////////////////////////////////////////////
|
// Concurrent and non-threaded asynch calls to MPI
|
||||||
// several way threaded on different communicators.
|
/////////////////////////////////////////////////////////
|
||||||
// Cannot combine with Dirichlet operators
|
std::vector<std::vector<CommsRequest_t> > reqs;
|
||||||
// This scheme is needed on Intel Omnipath for best performance
|
this->CommunicateBegin(reqs);
|
||||||
// Deprecate once there are very few omnipath clusters
|
this->CommunicateComplete(reqs);
|
||||||
/////////////////////////////////////////////////////////
|
|
||||||
int nthreads = CartesianCommunicator::nCommThreads;
|
|
||||||
int old = GridThread::GetThreads();
|
|
||||||
GridThread::SetThreads(nthreads);
|
|
||||||
thread_for(i,Packets.size(),{
|
|
||||||
_grid->StencilSendToRecvFrom(Packets[i].send_buf,
|
|
||||||
Packets[i].to_rank,Packets[i].do_send,
|
|
||||||
Packets[i].recv_buf,
|
|
||||||
Packets[i].from_rank,Packets[i].do_recv,
|
|
||||||
Packets[i].bytes,i);
|
|
||||||
});
|
|
||||||
GridThread::SetThreads(old);
|
|
||||||
} else {
|
|
||||||
/////////////////////////////////////////////////////////
|
|
||||||
// Concurrent and non-threaded asynch calls to MPI
|
|
||||||
/////////////////////////////////////////////////////////
|
|
||||||
std::vector<std::vector<CommsRequest_t> > reqs;
|
|
||||||
this->CommunicateBegin(reqs);
|
|
||||||
this->CommunicateComplete(reqs);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template<class compressor> void HaloExchange(const Lattice<vobj> &source,compressor &compress)
|
template<class compressor> void HaloExchange(const Lattice<vobj> &source,compressor &compress)
|
||||||
@ -484,7 +464,6 @@ public:
|
|||||||
face_table_computed=1;
|
face_table_computed=1;
|
||||||
assert(u_comm_offset==_unified_buffer_size);
|
assert(u_comm_offset==_unified_buffer_size);
|
||||||
|
|
||||||
accelerator_barrier();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/////////////////////////
|
/////////////////////////
|
||||||
@ -499,6 +478,7 @@ public:
|
|||||||
Packets.resize(0);
|
Packets.resize(0);
|
||||||
CopyReceiveBuffers.resize(0);
|
CopyReceiveBuffers.resize(0);
|
||||||
CachedTransfers.resize(0);
|
CachedTransfers.resize(0);
|
||||||
|
MpiReqs.resize(0);
|
||||||
}
|
}
|
||||||
void AddCopy(void *from,void * to, Integer bytes)
|
void AddCopy(void *from,void * to, Integer bytes)
|
||||||
{
|
{
|
||||||
@ -711,7 +691,9 @@ public:
|
|||||||
this->_comms_recv.resize(npoints);
|
this->_comms_recv.resize(npoints);
|
||||||
this->same_node.resize(npoints);
|
this->same_node.resize(npoints);
|
||||||
|
|
||||||
if ( p.dirichlet.size() ) DirichletBlock(p.dirichlet); // comms send/recv set up
|
if ( p.dirichlet.size() ==0 ) p.dirichlet.resize(grid->Nd(),0);
|
||||||
|
|
||||||
|
DirichletBlock(p.dirichlet); // comms send/recv set up
|
||||||
|
|
||||||
_unified_buffer_size=0;
|
_unified_buffer_size=0;
|
||||||
surface_list.resize(0);
|
surface_list.resize(0);
|
||||||
@ -793,7 +775,6 @@ public:
|
|||||||
u_simd_recv_buf[l] = (cobj *)_grid->ShmBufferMalloc(_unified_buffer_size*sizeof(cobj));
|
u_simd_recv_buf[l] = (cobj *)_grid->ShmBufferMalloc(_unified_buffer_size*sizeof(cobj));
|
||||||
u_simd_send_buf[l] = (cobj *)_grid->ShmBufferMalloc(_unified_buffer_size*sizeof(cobj));
|
u_simd_send_buf[l] = (cobj *)_grid->ShmBufferMalloc(_unified_buffer_size*sizeof(cobj));
|
||||||
}
|
}
|
||||||
|
|
||||||
PrecomputeByteOffsets();
|
PrecomputeByteOffsets();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1105,7 +1086,6 @@ public:
|
|||||||
// Gather locally
|
// Gather locally
|
||||||
////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////
|
||||||
assert(send_buf!=NULL);
|
assert(send_buf!=NULL);
|
||||||
|
|
||||||
Gather_plane_simple_table(face_table[face_idx],rhs,send_buf,compress,comm_off,so);
|
Gather_plane_simple_table(face_table[face_idx],rhs,send_buf,compress,comm_off,so);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1212,8 +1192,9 @@ public:
|
|||||||
face_table[face_idx].size()*sizeof(face_table_host[0]));
|
face_table[face_idx].size()*sizeof(face_table_host[0]));
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( comms_send || comms_recv )
|
if ( comms_send || comms_recv ) {
|
||||||
Gather_plane_exchange_table(face_table[face_idx],rhs,spointers,dimension,sx,cbmask,compress,permute_type);
|
Gather_plane_exchange_table(face_table[face_idx],rhs,spointers,dimension,sx,cbmask,compress,permute_type);
|
||||||
|
}
|
||||||
face_idx++;
|
face_idx++;
|
||||||
|
|
||||||
//spointers[0] -- low
|
//spointers[0] -- low
|
||||||
@ -1270,10 +1251,6 @@ public:
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void ZeroCounters(void) { };
|
|
||||||
|
|
||||||
void Report(void) { };
|
|
||||||
|
|
||||||
};
|
};
|
||||||
NAMESPACE_END(Grid);
|
NAMESPACE_END(Grid);
|
||||||
|
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
#include <Grid/GridCore.h>
|
#include <Grid/GridCore.h>
|
||||||
|
|
||||||
NAMESPACE_BEGIN(Grid);
|
NAMESPACE_BEGIN(Grid);
|
||||||
|
int world_rank; // Use to control world rank for print guarding
|
||||||
int acceleratorAbortOnGpuError=1;
|
int acceleratorAbortOnGpuError=1;
|
||||||
uint32_t accelerator_threads=2;
|
uint32_t accelerator_threads=2;
|
||||||
uint32_t acceleratorThreads(void) {return accelerator_threads;};
|
uint32_t acceleratorThreads(void) {return accelerator_threads;};
|
||||||
@ -16,7 +17,7 @@ void acceleratorThreads(uint32_t t) {accelerator_threads = t;};
|
|||||||
#ifdef GRID_CUDA
|
#ifdef GRID_CUDA
|
||||||
cudaDeviceProp *gpu_props;
|
cudaDeviceProp *gpu_props;
|
||||||
cudaStream_t copyStream;
|
cudaStream_t copyStream;
|
||||||
cudaStream_t cpuStream;
|
cudaStream_t computeStream;
|
||||||
void acceleratorInit(void)
|
void acceleratorInit(void)
|
||||||
{
|
{
|
||||||
int nDevices = 1;
|
int nDevices = 1;
|
||||||
@ -24,7 +25,8 @@ void acceleratorInit(void)
|
|||||||
gpu_props = new cudaDeviceProp[nDevices];
|
gpu_props = new cudaDeviceProp[nDevices];
|
||||||
|
|
||||||
char * localRankStr = NULL;
|
char * localRankStr = NULL;
|
||||||
int rank = 0, world_rank=0;
|
int rank = 0;
|
||||||
|
world_rank=0;
|
||||||
if ((localRankStr = getenv(ENV_RANK_OMPI )) != NULL) { world_rank = atoi(localRankStr);}
|
if ((localRankStr = getenv(ENV_RANK_OMPI )) != NULL) { world_rank = atoi(localRankStr);}
|
||||||
if ((localRankStr = getenv(ENV_RANK_MVAPICH)) != NULL) { world_rank = atoi(localRankStr);}
|
if ((localRankStr = getenv(ENV_RANK_MVAPICH)) != NULL) { world_rank = atoi(localRankStr);}
|
||||||
if ((localRankStr = getenv(ENV_RANK_SLURM )) != NULL) { world_rank = atoi(localRankStr);}
|
if ((localRankStr = getenv(ENV_RANK_SLURM )) != NULL) { world_rank = atoi(localRankStr);}
|
||||||
@ -99,7 +101,7 @@ void acceleratorInit(void)
|
|||||||
|
|
||||||
cudaSetDevice(device);
|
cudaSetDevice(device);
|
||||||
cudaStreamCreate(©Stream);
|
cudaStreamCreate(©Stream);
|
||||||
cudaStreamCreate(&cpuStream);
|
cudaStreamCreate(&computeStream);
|
||||||
const int len=64;
|
const int len=64;
|
||||||
char busid[len];
|
char busid[len];
|
||||||
if( rank == world_rank ) {
|
if( rank == world_rank ) {
|
||||||
@ -114,7 +116,7 @@ void acceleratorInit(void)
|
|||||||
#ifdef GRID_HIP
|
#ifdef GRID_HIP
|
||||||
hipDeviceProp_t *gpu_props;
|
hipDeviceProp_t *gpu_props;
|
||||||
hipStream_t copyStream;
|
hipStream_t copyStream;
|
||||||
hipStream_t cpuStream;
|
hipStream_t computeStream;
|
||||||
void acceleratorInit(void)
|
void acceleratorInit(void)
|
||||||
{
|
{
|
||||||
int nDevices = 1;
|
int nDevices = 1;
|
||||||
@ -122,7 +124,8 @@ void acceleratorInit(void)
|
|||||||
gpu_props = new hipDeviceProp_t[nDevices];
|
gpu_props = new hipDeviceProp_t[nDevices];
|
||||||
|
|
||||||
char * localRankStr = NULL;
|
char * localRankStr = NULL;
|
||||||
int rank = 0, world_rank=0;
|
int rank = 0;
|
||||||
|
world_rank=0;
|
||||||
// We extract the local rank initialization using an environment variable
|
// We extract the local rank initialization using an environment variable
|
||||||
if ((localRankStr = getenv(ENV_LOCAL_RANK_OMPI)) != NULL)
|
if ((localRankStr = getenv(ENV_LOCAL_RANK_OMPI)) != NULL)
|
||||||
{
|
{
|
||||||
@ -183,7 +186,7 @@ void acceleratorInit(void)
|
|||||||
#endif
|
#endif
|
||||||
hipSetDevice(device);
|
hipSetDevice(device);
|
||||||
hipStreamCreate(©Stream);
|
hipStreamCreate(©Stream);
|
||||||
hipStreamCreate(&cpuStream);
|
hipStreamCreate(&computeStream);
|
||||||
const int len=64;
|
const int len=64;
|
||||||
char busid[len];
|
char busid[len];
|
||||||
if( rank == world_rank ) {
|
if( rank == world_rank ) {
|
||||||
@ -210,7 +213,8 @@ void acceleratorInit(void)
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
char * localRankStr = NULL;
|
char * localRankStr = NULL;
|
||||||
int rank = 0, world_rank=0;
|
int rank = 0;
|
||||||
|
world_rank=0;
|
||||||
|
|
||||||
// We extract the local rank initialization using an environment variable
|
// We extract the local rank initialization using an environment variable
|
||||||
if ((localRankStr = getenv(ENV_LOCAL_RANK_OMPI)) != NULL)
|
if ((localRankStr = getenv(ENV_LOCAL_RANK_OMPI)) != NULL)
|
||||||
|
@ -107,7 +107,7 @@ void acceleratorInit(void);
|
|||||||
|
|
||||||
extern int acceleratorAbortOnGpuError;
|
extern int acceleratorAbortOnGpuError;
|
||||||
extern cudaStream_t copyStream;
|
extern cudaStream_t copyStream;
|
||||||
extern cudaStream_t cpuStream;
|
extern cudaStream_t computeStream;
|
||||||
|
|
||||||
accelerator_inline int acceleratorSIMTlane(int Nsimd) {
|
accelerator_inline int acceleratorSIMTlane(int Nsimd) {
|
||||||
#ifdef GRID_SIMT
|
#ifdef GRID_SIMT
|
||||||
@ -135,7 +135,7 @@ inline void cuda_mem(void)
|
|||||||
}; \
|
}; \
|
||||||
dim3 cu_threads(nsimd,acceleratorThreads(),1); \
|
dim3 cu_threads(nsimd,acceleratorThreads(),1); \
|
||||||
dim3 cu_blocks ((num1+nt-1)/nt,num2,1); \
|
dim3 cu_blocks ((num1+nt-1)/nt,num2,1); \
|
||||||
LambdaApply<<<cu_blocks,cu_threads,0,cpuStream>>>(num1,num2,nsimd,lambda); \
|
LambdaApply<<<cu_blocks,cu_threads,0,computeStream>>>(num1,num2,nsimd,lambda); \
|
||||||
}
|
}
|
||||||
|
|
||||||
#define accelerator_for6dNB(iter1, num1, \
|
#define accelerator_for6dNB(iter1, num1, \
|
||||||
@ -154,7 +154,7 @@ inline void cuda_mem(void)
|
|||||||
}; \
|
}; \
|
||||||
dim3 cu_blocks (num1,num2,num3); \
|
dim3 cu_blocks (num1,num2,num3); \
|
||||||
dim3 cu_threads(num4,num5,num6); \
|
dim3 cu_threads(num4,num5,num6); \
|
||||||
Lambda6Apply<<<cu_blocks,cu_threads,0,cpuStream>>>(num1,num2,num3,num4,num5,num6,lambda); \
|
Lambda6Apply<<<cu_blocks,cu_threads,0,computeStream>>>(num1,num2,num3,num4,num5,num6,lambda); \
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename lambda> __global__
|
template<typename lambda> __global__
|
||||||
@ -190,7 +190,7 @@ void Lambda6Apply(uint64_t num1, uint64_t num2, uint64_t num3,
|
|||||||
|
|
||||||
#define accelerator_barrier(dummy) \
|
#define accelerator_barrier(dummy) \
|
||||||
{ \
|
{ \
|
||||||
cudaStreamSynchronize(cpuStream); \
|
cudaStreamSynchronize(computeStream); \
|
||||||
cudaError err = cudaGetLastError(); \
|
cudaError err = cudaGetLastError(); \
|
||||||
if ( cudaSuccess != err ) { \
|
if ( cudaSuccess != err ) { \
|
||||||
printf("accelerator_barrier(): Cuda error %s \n", \
|
printf("accelerator_barrier(): Cuda error %s \n", \
|
||||||
@ -340,7 +340,7 @@ NAMESPACE_BEGIN(Grid);
|
|||||||
#define accelerator_inline __host__ __device__ inline
|
#define accelerator_inline __host__ __device__ inline
|
||||||
|
|
||||||
extern hipStream_t copyStream;
|
extern hipStream_t copyStream;
|
||||||
extern hipStream_t cpuStream;
|
extern hipStream_t computeStream;
|
||||||
/*These routines define mapping from thread grid to loop & vector lane indexing */
|
/*These routines define mapping from thread grid to loop & vector lane indexing */
|
||||||
accelerator_inline int acceleratorSIMTlane(int Nsimd) {
|
accelerator_inline int acceleratorSIMTlane(int Nsimd) {
|
||||||
#ifdef GRID_SIMT
|
#ifdef GRID_SIMT
|
||||||
@ -362,16 +362,15 @@ accelerator_inline int acceleratorSIMTlane(int Nsimd) {
|
|||||||
dim3 hip_blocks ((num1+nt-1)/nt,num2,1); \
|
dim3 hip_blocks ((num1+nt-1)/nt,num2,1); \
|
||||||
if(hip_threads.x * hip_threads.y * hip_threads.z <= 64){ \
|
if(hip_threads.x * hip_threads.y * hip_threads.z <= 64){ \
|
||||||
hipLaunchKernelGGL(LambdaApply64,hip_blocks,hip_threads, \
|
hipLaunchKernelGGL(LambdaApply64,hip_blocks,hip_threads, \
|
||||||
0,cpuStream, \
|
0,computeStream, \
|
||||||
num1,num2,nsimd, lambda); \
|
num1,num2,nsimd, lambda); \
|
||||||
} else { \
|
} else { \
|
||||||
hipLaunchKernelGGL(LambdaApply,hip_blocks,hip_threads, \
|
hipLaunchKernelGGL(LambdaApply,hip_blocks,hip_threads, \
|
||||||
0,cpuStream, \
|
0,computeStream, \
|
||||||
num1,num2,nsimd, lambda); \
|
num1,num2,nsimd, lambda); \
|
||||||
} \
|
} \
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template<typename lambda> __global__
|
template<typename lambda> __global__
|
||||||
__launch_bounds__(64,1)
|
__launch_bounds__(64,1)
|
||||||
void LambdaApply64(uint64_t numx, uint64_t numy, uint64_t numz, lambda Lambda)
|
void LambdaApply64(uint64_t numx, uint64_t numy, uint64_t numz, lambda Lambda)
|
||||||
@ -400,7 +399,7 @@ void LambdaApply(uint64_t numx, uint64_t numy, uint64_t numz, lambda Lambda)
|
|||||||
|
|
||||||
#define accelerator_barrier(dummy) \
|
#define accelerator_barrier(dummy) \
|
||||||
{ \
|
{ \
|
||||||
hipStreamSynchronize(cpuStream); \
|
hipStreamSynchronize(computeStream); \
|
||||||
auto err = hipGetLastError(); \
|
auto err = hipGetLastError(); \
|
||||||
if ( err != hipSuccess ) { \
|
if ( err != hipSuccess ) { \
|
||||||
printf("After hipDeviceSynchronize() : HIP error %s \n", hipGetErrorString( err )); \
|
printf("After hipDeviceSynchronize() : HIP error %s \n", hipGetErrorString( err )); \
|
||||||
@ -443,7 +442,7 @@ inline void acceleratorMemSet(void *base,int value,size_t bytes) { hipMemset(bas
|
|||||||
|
|
||||||
inline void acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes) // Asynch
|
inline void acceleratorCopyDeviceToDeviceAsynch(void *from,void *to,size_t bytes) // Asynch
|
||||||
{
|
{
|
||||||
hipMemcpy(to,from,bytes, hipMemcpyDeviceToDevice);
|
hipMemcpyDtoDAsync(to,from,bytes, copyStream);
|
||||||
}
|
}
|
||||||
inline void acceleratorCopySynchronise(void) { hipStreamSynchronize(copyStream); };
|
inline void acceleratorCopySynchronise(void) { hipStreamSynchronize(copyStream); };
|
||||||
|
|
||||||
|
@ -356,6 +356,11 @@ void Grid_init(int *argc,char ***argv)
|
|||||||
//////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////
|
||||||
CartesianCommunicator::Init(argc,argv);
|
CartesianCommunicator::Init(argc,argv);
|
||||||
|
|
||||||
|
GridLogger::GlobalStopWatch.Stop();
|
||||||
|
CartesianCommunicator::BarrierWorld();
|
||||||
|
GridLogger::GlobalStopWatch.Reset();// Back to zero with synchronised clock
|
||||||
|
GridLogger::GlobalStopWatch.Start();
|
||||||
|
|
||||||
////////////////////////////////////
|
////////////////////////////////////
|
||||||
// Banner after MPI (unless GPU)
|
// Banner after MPI (unless GPU)
|
||||||
////////////////////////////////////
|
////////////////////////////////////
|
||||||
|
@ -128,8 +128,14 @@ template<class FermionOperatorD, class FermionOperatorF, class SchurOperatorD, c
|
|||||||
////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Make a mixed precision conjugate gradient
|
// Make a mixed precision conjugate gradient
|
||||||
////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////
|
||||||
MixedPrecisionConjugateGradient<FieldD,FieldF> MPCG(Tolerance,MaxInnerIterations,MaxOuterIterations,SinglePrecGrid5,LinOpF,LinOpD);
|
#if 1
|
||||||
|
RealD delta=1.e-4;
|
||||||
|
std::cout << GridLogMessage << "Calling reliable update Conjugate Gradient" <<std::endl;
|
||||||
|
ConjugateGradientReliableUpdate<FieldD,FieldF> MPCG(Tolerance,MaxInnerIterations*MaxOuterIterations,delta,SinglePrecGrid5,LinOpF,LinOpD);
|
||||||
|
#else
|
||||||
std::cout << GridLogMessage << "Calling mixed precision Conjugate Gradient" <<std::endl;
|
std::cout << GridLogMessage << "Calling mixed precision Conjugate Gradient" <<std::endl;
|
||||||
|
MixedPrecisionConjugateGradient<FieldD,FieldF> MPCG(Tolerance,MaxInnerIterations,MaxOuterIterations,SinglePrecGrid5,LinOpF,LinOpD);
|
||||||
|
#endif
|
||||||
MPCG(src,psi);
|
MPCG(src,psi);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -141,6 +147,10 @@ int main(int argc, char **argv) {
|
|||||||
using namespace Grid;
|
using namespace Grid;
|
||||||
|
|
||||||
Grid_init(&argc, &argv);
|
Grid_init(&argc, &argv);
|
||||||
|
|
||||||
|
CartesianCommunicator::BarrierWorld();
|
||||||
|
std::cout << GridLogMessage << " Clock skew check" <<std::endl;
|
||||||
|
|
||||||
int threads = GridThread::GetThreads();
|
int threads = GridThread::GetThreads();
|
||||||
|
|
||||||
// Typedefs to simplify notation
|
// Typedefs to simplify notation
|
||||||
@ -161,7 +171,7 @@ int main(int argc, char **argv) {
|
|||||||
// MD.name = std::string("Force Gradient");
|
// MD.name = std::string("Force Gradient");
|
||||||
typedef GenericHMCRunner<MinimumNorm2> HMCWrapper;
|
typedef GenericHMCRunner<MinimumNorm2> HMCWrapper;
|
||||||
MD.name = std::string("MinimumNorm2");
|
MD.name = std::string("MinimumNorm2");
|
||||||
MD.MDsteps = 4;
|
MD.MDsteps = 6;
|
||||||
MD.trajL = 1.0;
|
MD.trajL = 1.0;
|
||||||
|
|
||||||
HMCparameters HMCparams;
|
HMCparameters HMCparams;
|
||||||
@ -183,7 +193,7 @@ int main(int argc, char **argv) {
|
|||||||
CPparams.saveInterval = 1;
|
CPparams.saveInterval = 1;
|
||||||
CPparams.format = "IEEE64BIG";
|
CPparams.format = "IEEE64BIG";
|
||||||
TheHMC.Resources.LoadNerscCheckpointer(CPparams);
|
TheHMC.Resources.LoadNerscCheckpointer(CPparams);
|
||||||
|
std::cout << "loaded NERSC checpointer"<<std::endl;
|
||||||
RNGModuleParameters RNGpar;
|
RNGModuleParameters RNGpar;
|
||||||
RNGpar.serial_seeds = "1 2 3 4 5";
|
RNGpar.serial_seeds = "1 2 3 4 5";
|
||||||
RNGpar.parallel_seeds = "6 7 8 9 10";
|
RNGpar.parallel_seeds = "6 7 8 9 10";
|
||||||
@ -204,7 +214,8 @@ int main(int argc, char **argv) {
|
|||||||
Real light_mass = 7.8e-4;
|
Real light_mass = 7.8e-4;
|
||||||
Real strange_mass = 0.02132;
|
Real strange_mass = 0.02132;
|
||||||
Real pv_mass = 1.0;
|
Real pv_mass = 1.0;
|
||||||
std::vector<Real> hasenbusch({ light_mass, 3.8e-3, 0.0145, 0.045, 0.108, 0.25, 0.51 , pv_mass });
|
// std::vector<Real> hasenbusch({ light_mass, 3.8e-3, 0.0145, 0.045, 0.108, 0.25, 0.51 , pv_mass });
|
||||||
|
std::vector<Real> hasenbusch({ light_mass, 5e-3, 0.0145, 0.045, 0.108, 0.25, 0.51 , pv_mass });
|
||||||
|
|
||||||
// FIXME:
|
// FIXME:
|
||||||
// Same in MC and MD
|
// Same in MC and MD
|
||||||
@ -287,6 +298,7 @@ int main(int argc, char **argv) {
|
|||||||
std::cout << GridLogMessage << " Running the HMC "<< std::endl;
|
std::cout << GridLogMessage << " Running the HMC "<< std::endl;
|
||||||
TheHMC.ReadCommandLine(argc,argv); // params on CML or from param file
|
TheHMC.ReadCommandLine(argc,argv); // params on CML or from param file
|
||||||
TheHMC.initializeGaugeFieldAndRNGs(U);
|
TheHMC.initializeGaugeFieldAndRNGs(U);
|
||||||
|
std::cout << "loaded NERSC gauge field"<<std::endl;
|
||||||
|
|
||||||
|
|
||||||
// These lines are unecessary if BC are all periodic
|
// These lines are unecessary if BC are all periodic
|
||||||
|
474
HMC/Mobius2p1f_DD_RHMC_96I_mixedmshift.cc
Normal file
474
HMC/Mobius2p1f_DD_RHMC_96I_mixedmshift.cc
Normal file
@ -0,0 +1,474 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./tests/Test_hmc_EODWFRatio.cc
|
||||||
|
|
||||||
|
Copyright (C) 2015-2016
|
||||||
|
|
||||||
|
Author: Peter Boyle <pabobyle@ph.ed.ac.uk>
|
||||||
|
Author: Guido Cossu <guido.cossu@ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution
|
||||||
|
directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
#include <Grid/Grid.h>
|
||||||
|
|
||||||
|
NAMESPACE_BEGIN(Grid);
|
||||||
|
|
||||||
|
template<class FermionOperatorD, class FermionOperatorF, class SchurOperatorD, class SchurOperatorF>
|
||||||
|
class MixedPrecisionConjugateGradientOperatorFunction : public OperatorFunction<typename FermionOperatorD::FermionField> {
|
||||||
|
public:
|
||||||
|
typedef typename FermionOperatorD::FermionField FieldD;
|
||||||
|
typedef typename FermionOperatorF::FermionField FieldF;
|
||||||
|
|
||||||
|
using OperatorFunction<FieldD>::operator();
|
||||||
|
|
||||||
|
RealD Tolerance;
|
||||||
|
RealD InnerTolerance; //Initial tolerance for inner CG. Defaults to Tolerance but can be changed
|
||||||
|
Integer MaxInnerIterations;
|
||||||
|
Integer MaxOuterIterations;
|
||||||
|
GridBase* SinglePrecGrid4; //Grid for single-precision fields
|
||||||
|
GridBase* SinglePrecGrid5; //Grid for single-precision fields
|
||||||
|
RealD OuterLoopNormMult; //Stop the outer loop and move to a final double prec solve when the residual is OuterLoopNormMult * Tolerance
|
||||||
|
|
||||||
|
FermionOperatorF &FermOpF;
|
||||||
|
FermionOperatorD &FermOpD;;
|
||||||
|
SchurOperatorF &LinOpF;
|
||||||
|
SchurOperatorD &LinOpD;
|
||||||
|
|
||||||
|
Integer TotalInnerIterations; //Number of inner CG iterations
|
||||||
|
Integer TotalOuterIterations; //Number of restarts
|
||||||
|
Integer TotalFinalStepIterations; //Number of CG iterations in final patch-up step
|
||||||
|
|
||||||
|
MixedPrecisionConjugateGradientOperatorFunction(RealD tol,
|
||||||
|
Integer maxinnerit,
|
||||||
|
Integer maxouterit,
|
||||||
|
GridBase* _sp_grid4,
|
||||||
|
GridBase* _sp_grid5,
|
||||||
|
FermionOperatorF &_FermOpF,
|
||||||
|
FermionOperatorD &_FermOpD,
|
||||||
|
SchurOperatorF &_LinOpF,
|
||||||
|
SchurOperatorD &_LinOpD):
|
||||||
|
LinOpF(_LinOpF),
|
||||||
|
LinOpD(_LinOpD),
|
||||||
|
FermOpF(_FermOpF),
|
||||||
|
FermOpD(_FermOpD),
|
||||||
|
Tolerance(tol),
|
||||||
|
InnerTolerance(tol),
|
||||||
|
MaxInnerIterations(maxinnerit),
|
||||||
|
MaxOuterIterations(maxouterit),
|
||||||
|
SinglePrecGrid4(_sp_grid4),
|
||||||
|
SinglePrecGrid5(_sp_grid5),
|
||||||
|
OuterLoopNormMult(100.)
|
||||||
|
{
|
||||||
|
/* Debugging instances of objects; references are stored
|
||||||
|
std::cout << GridLogMessage << " Mixed precision CG wrapper LinOpF " <<std::hex<< &LinOpF<<std::dec <<std::endl;
|
||||||
|
std::cout << GridLogMessage << " Mixed precision CG wrapper LinOpD " <<std::hex<< &LinOpD<<std::dec <<std::endl;
|
||||||
|
std::cout << GridLogMessage << " Mixed precision CG wrapper FermOpF " <<std::hex<< &FermOpF<<std::dec <<std::endl;
|
||||||
|
std::cout << GridLogMessage << " Mixed precision CG wrapper FermOpD " <<std::hex<< &FermOpD<<std::dec <<std::endl;
|
||||||
|
*/
|
||||||
|
};
|
||||||
|
|
||||||
|
void operator()(LinearOperatorBase<FieldD> &LinOpU, const FieldD &src, FieldD &psi) {
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << " Mixed precision CG wrapper operator() "<<std::endl;
|
||||||
|
|
||||||
|
SchurOperatorD * SchurOpU = static_cast<SchurOperatorD *>(&LinOpU);
|
||||||
|
|
||||||
|
// std::cout << GridLogMessage << " Mixed precision CG wrapper operator() FermOpU " <<std::hex<< &(SchurOpU->_Mat)<<std::dec <<std::endl;
|
||||||
|
// std::cout << GridLogMessage << " Mixed precision CG wrapper operator() FermOpD " <<std::hex<< &(LinOpD._Mat) <<std::dec <<std::endl;
|
||||||
|
// Assumption made in code to extract gauge field
|
||||||
|
// We could avoid storing LinopD reference alltogether ?
|
||||||
|
assert(&(SchurOpU->_Mat)==&(LinOpD._Mat));
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Must snarf a single precision copy of the gauge field in Linop_d argument
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
typedef typename FermionOperatorF::GaugeField GaugeFieldF;
|
||||||
|
typedef typename FermionOperatorF::GaugeLinkField GaugeLinkFieldF;
|
||||||
|
typedef typename FermionOperatorD::GaugeField GaugeFieldD;
|
||||||
|
typedef typename FermionOperatorD::GaugeLinkField GaugeLinkFieldD;
|
||||||
|
|
||||||
|
GridBase * GridPtrF = SinglePrecGrid4;
|
||||||
|
GridBase * GridPtrD = FermOpD.Umu.Grid();
|
||||||
|
GaugeFieldF U_f (GridPtrF);
|
||||||
|
GaugeLinkFieldF Umu_f(GridPtrF);
|
||||||
|
// std::cout << " Dim gauge field "<<GridPtrF->Nd()<<std::endl; // 4d
|
||||||
|
// std::cout << " Dim gauge field "<<GridPtrD->Nd()<<std::endl; // 4d
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Moving this to a Clone method of fermion operator would allow to duplicate the
|
||||||
|
// physics parameters and decrease gauge field copies
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
GaugeLinkFieldD Umu_d(GridPtrD);
|
||||||
|
for(int mu=0;mu<Nd*2;mu++){
|
||||||
|
Umu_d = PeekIndex<LorentzIndex>(FermOpD.Umu, mu);
|
||||||
|
precisionChange(Umu_f,Umu_d);
|
||||||
|
PokeIndex<LorentzIndex>(FermOpF.Umu, Umu_f, mu);
|
||||||
|
}
|
||||||
|
pickCheckerboard(Even,FermOpF.UmuEven,FermOpF.Umu);
|
||||||
|
pickCheckerboard(Odd ,FermOpF.UmuOdd ,FermOpF.Umu);
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Make a mixed precision conjugate gradient
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
#if 1
|
||||||
|
RealD delta=1.e-4;
|
||||||
|
std::cout << GridLogMessage << "Calling reliable update Conjugate Gradient" <<std::endl;
|
||||||
|
ConjugateGradientReliableUpdate<FieldD,FieldF> MPCG(Tolerance,MaxInnerIterations*MaxOuterIterations,delta,SinglePrecGrid5,LinOpF,LinOpD);
|
||||||
|
#else
|
||||||
|
std::cout << GridLogMessage << "Calling mixed precision Conjugate Gradient" <<std::endl;
|
||||||
|
MixedPrecisionConjugateGradient<FieldD,FieldF> MPCG(Tolerance,MaxInnerIterations,MaxOuterIterations,SinglePrecGrid5,LinOpF,LinOpD);
|
||||||
|
#endif
|
||||||
|
MPCG(src,psi);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
NAMESPACE_END(Grid);
|
||||||
|
|
||||||
|
|
||||||
|
int main(int argc, char **argv) {
|
||||||
|
using namespace Grid;
|
||||||
|
|
||||||
|
Grid_init(&argc, &argv);
|
||||||
|
int threads = GridThread::GetThreads();
|
||||||
|
|
||||||
|
// Typedefs to simplify notation
|
||||||
|
typedef WilsonImplR FermionImplPolicy;
|
||||||
|
typedef WilsonImplF FermionImplPolicyF;
|
||||||
|
|
||||||
|
typedef MobiusFermionR FermionAction;
|
||||||
|
typedef MobiusFermionF FermionActionF;
|
||||||
|
typedef typename FermionAction::FermionField FermionField;
|
||||||
|
typedef typename FermionActionF::FermionField FermionFieldF;
|
||||||
|
|
||||||
|
typedef Grid::XmlReader Serialiser;
|
||||||
|
|
||||||
|
//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
|
||||||
|
IntegratorParameters MD;
|
||||||
|
// typedef GenericHMCRunner<LeapFrog> HMCWrapper;
|
||||||
|
// MD.name = std::string("Leap Frog");
|
||||||
|
// typedef GenericHMCRunner<ForceGradient> HMCWrapper;
|
||||||
|
// MD.name = std::string("Force Gradient");
|
||||||
|
typedef GenericHMCRunner<MinimumNorm2> HMCWrapper;
|
||||||
|
MD.name = std::string("MinimumNorm2");
|
||||||
|
MD.MDsteps = 6;
|
||||||
|
MD.trajL = 1.0;
|
||||||
|
|
||||||
|
HMCparameters HMCparams;
|
||||||
|
HMCparams.StartTrajectory = 1077;
|
||||||
|
HMCparams.Trajectories = 1;
|
||||||
|
HMCparams.NoMetropolisUntil= 0;
|
||||||
|
// "[HotStart, ColdStart, TepidStart, CheckpointStart]\n";
|
||||||
|
// HMCparams.StartingType =std::string("ColdStart");
|
||||||
|
HMCparams.StartingType =std::string("CheckpointStart");
|
||||||
|
HMCparams.MD = MD;
|
||||||
|
HMCWrapper TheHMC(HMCparams);
|
||||||
|
|
||||||
|
// Grid from the command line arguments --grid and --mpi
|
||||||
|
TheHMC.Resources.AddFourDimGrid("gauge"); // use default simd lanes decomposition
|
||||||
|
|
||||||
|
CheckpointerParameters CPparams;
|
||||||
|
CPparams.config_prefix = "ckpoint_DDHMC_lat";
|
||||||
|
CPparams.rng_prefix = "ckpoint_DDHMC_rng";
|
||||||
|
CPparams.saveInterval = 1;
|
||||||
|
CPparams.format = "IEEE64BIG";
|
||||||
|
TheHMC.Resources.LoadNerscCheckpointer(CPparams);
|
||||||
|
|
||||||
|
RNGModuleParameters RNGpar;
|
||||||
|
RNGpar.serial_seeds = "1 2 3 4 5";
|
||||||
|
RNGpar.parallel_seeds = "6 7 8 9 10";
|
||||||
|
TheHMC.Resources.SetRNGSeeds(RNGpar);
|
||||||
|
|
||||||
|
// Construct observables
|
||||||
|
// here there is too much indirection
|
||||||
|
typedef PlaquetteMod<HMCWrapper::ImplPolicy> PlaqObs;
|
||||||
|
TheHMC.Resources.AddObservable<PlaqObs>();
|
||||||
|
//////////////////////////////////////////////
|
||||||
|
|
||||||
|
const int Ls = 12;
|
||||||
|
RealD M5 = 1.8;
|
||||||
|
RealD b = 1.5;
|
||||||
|
RealD c = 0.5;
|
||||||
|
Real beta = 2.31;
|
||||||
|
// Real light_mass = 5.4e-4;
|
||||||
|
Real light_mass = 7.8e-4;
|
||||||
|
Real strange_mass = 0.02132;
|
||||||
|
Real pv_mass = 1.0;
|
||||||
|
// std::vector<Real> hasenbusch({ light_mass, 3.8e-3, 0.0145, 0.045, 0.108, 0.25, 0.51 , pv_mass });
|
||||||
|
std::vector<Real> hasenbusch({ light_mass, 5e-3, 0.0145, 0.045, 0.108, 0.25, 0.51 , pv_mass });
|
||||||
|
|
||||||
|
// FIXME:
|
||||||
|
// Same in MC and MD
|
||||||
|
// Need to mix precision too
|
||||||
|
OneFlavourRationalParams SFRp; // Strange
|
||||||
|
SFRp.lo = 4.0e-3;
|
||||||
|
SFRp.hi = 90.0;
|
||||||
|
SFRp.MaxIter = 60000;
|
||||||
|
SFRp.tolerance= 1.0e-8;
|
||||||
|
SFRp.mdtolerance= 1.0e-6;
|
||||||
|
SFRp.degree = 12;
|
||||||
|
SFRp.precision= 50;
|
||||||
|
SFRp.BoundsCheckFreq=0;
|
||||||
|
|
||||||
|
OneFlavourRationalParams OFRp; // Up/down
|
||||||
|
OFRp.lo = 2.0e-5;
|
||||||
|
OFRp.hi = 90.0;
|
||||||
|
OFRp.MaxIter = 60000;
|
||||||
|
OFRp.tolerance= 1.0e-8;
|
||||||
|
OFRp.mdtolerance= 1.0e-6;
|
||||||
|
// OFRp.degree = 20; converges
|
||||||
|
// OFRp.degree = 16;
|
||||||
|
OFRp.degree = 12;
|
||||||
|
OFRp.precision= 80;
|
||||||
|
OFRp.BoundsCheckFreq=0;
|
||||||
|
|
||||||
|
auto GridPtr = TheHMC.Resources.GetCartesian();
|
||||||
|
auto GridRBPtr = TheHMC.Resources.GetRBCartesian();
|
||||||
|
|
||||||
|
typedef SchurDiagMooeeOperator<FermionActionF,FermionFieldF> LinearOperatorF;
|
||||||
|
typedef SchurDiagMooeeOperator<FermionAction ,FermionField > LinearOperatorD;
|
||||||
|
typedef MixedPrecisionConjugateGradientOperatorFunction<MobiusFermionD,MobiusFermionF,LinearOperatorD,LinearOperatorF> MxPCG;
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////
|
||||||
|
// Domain decomposed
|
||||||
|
////////////////////////////////////////////////////////////////
|
||||||
|
Coordinate latt4 = GridPtr->GlobalDimensions();
|
||||||
|
Coordinate mpi = GridPtr->ProcessorGrid();
|
||||||
|
Coordinate shm;
|
||||||
|
|
||||||
|
GlobalSharedMemory::GetShmDims(mpi,shm);
|
||||||
|
|
||||||
|
Coordinate CommDim(Nd);
|
||||||
|
for(int d=0;d<Nd;d++) CommDim[d]= (mpi[d]/shm[d])>1 ? 1 : 0;
|
||||||
|
|
||||||
|
Coordinate NonDirichlet(Nd+1,0);
|
||||||
|
Coordinate Dirichlet(Nd+1,0);
|
||||||
|
Dirichlet[1] = CommDim[0]*latt4[0]/mpi[0] * shm[0];
|
||||||
|
Dirichlet[2] = CommDim[1]*latt4[1]/mpi[1] * shm[1];
|
||||||
|
Dirichlet[3] = CommDim[2]*latt4[2]/mpi[2] * shm[2];
|
||||||
|
Dirichlet[4] = CommDim[3]*latt4[3]/mpi[3] * shm[3];
|
||||||
|
|
||||||
|
Coordinate Block4(Nd);
|
||||||
|
Block4[0] = Dirichlet[1];
|
||||||
|
Block4[1] = Dirichlet[2];
|
||||||
|
Block4[2] = Dirichlet[3];
|
||||||
|
Block4[3] = Dirichlet[4];
|
||||||
|
|
||||||
|
int Width=3;
|
||||||
|
TheHMC.Resources.SetMomentumFilter(new DDHMCFilter<WilsonImplR::Field>(Block4,Width));
|
||||||
|
|
||||||
|
//////////////////////////
|
||||||
|
// Fermion Grids
|
||||||
|
//////////////////////////
|
||||||
|
auto FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,GridPtr);
|
||||||
|
auto FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,GridPtr);
|
||||||
|
|
||||||
|
Coordinate simdF = GridDefaultSimd(Nd,vComplexF::Nsimd());
|
||||||
|
auto GridPtrF = SpaceTimeGrid::makeFourDimGrid(latt4,simdF,mpi);
|
||||||
|
auto GridRBPtrF = SpaceTimeGrid::makeFourDimRedBlackGrid(GridPtrF);
|
||||||
|
auto FGridF = SpaceTimeGrid::makeFiveDimGrid(Ls,GridPtrF);
|
||||||
|
auto FrbGridF = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,GridPtrF);
|
||||||
|
|
||||||
|
IwasakiGaugeActionR GaugeAction(beta);
|
||||||
|
|
||||||
|
// temporarily need a gauge field
|
||||||
|
LatticeGaugeField U(GridPtr);
|
||||||
|
LatticeGaugeFieldF UF(GridPtrF);
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << " Running the HMC "<< std::endl;
|
||||||
|
TheHMC.ReadCommandLine(argc,argv); // params on CML or from param file
|
||||||
|
TheHMC.initializeGaugeFieldAndRNGs(U);
|
||||||
|
|
||||||
|
|
||||||
|
// These lines are unecessary if BC are all periodic
|
||||||
|
std::vector<Complex> boundary = {1,1,1,-1};
|
||||||
|
FermionAction::ImplParams Params(boundary);
|
||||||
|
Params.dirichlet=NonDirichlet;
|
||||||
|
FermionAction::ImplParams ParamsDir(boundary);
|
||||||
|
ParamsDir.dirichlet=Dirichlet;
|
||||||
|
|
||||||
|
// double StoppingCondition = 1e-14;
|
||||||
|
// double MDStoppingCondition = 1e-9;
|
||||||
|
double StoppingCondition = 1e-10;
|
||||||
|
double MDStoppingCondition = 1e-7;
|
||||||
|
double MDStoppingConditionLoose = 1e-6;
|
||||||
|
double MaxCGIterations = 300000;
|
||||||
|
ConjugateGradient<FermionField> CG(StoppingCondition,MaxCGIterations);
|
||||||
|
ConjugateGradient<FermionField> MDCG(MDStoppingCondition,MaxCGIterations);
|
||||||
|
|
||||||
|
////////////////////////////////////
|
||||||
|
// Collect actions
|
||||||
|
////////////////////////////////////
|
||||||
|
ActionLevel<HMCWrapper::Field> Level1(1);
|
||||||
|
ActionLevel<HMCWrapper::Field> Level2(4);
|
||||||
|
ActionLevel<HMCWrapper::Field> Level3(8);
|
||||||
|
|
||||||
|
////////////////////////////////////
|
||||||
|
// Strange action
|
||||||
|
////////////////////////////////////
|
||||||
|
FermionAction StrangeOp (U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,strange_mass,M5,b,c, Params);
|
||||||
|
FermionAction StrangePauliVillarsOp(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,pv_mass, M5,b,c, Params);
|
||||||
|
|
||||||
|
FermionAction StrangeOpDir (U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,strange_mass,M5,b,c, ParamsDir);
|
||||||
|
FermionAction StrangePauliVillarsOpDir(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,pv_mass, M5,b,c, ParamsDir);
|
||||||
|
|
||||||
|
OneFlavourEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy> StrangePseudoFermionBdy(StrangeOpDir,StrangeOp,SFRp);
|
||||||
|
OneFlavourEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy> StrangePseudoFermionLocal(StrangePauliVillarsOpDir,StrangeOpDir,SFRp);
|
||||||
|
OneFlavourEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy> StrangePseudoFermionPVBdy(StrangePauliVillarsOp,StrangePauliVillarsOpDir,SFRp);
|
||||||
|
Level1.push_back(&StrangePseudoFermionBdy); // ok
|
||||||
|
Level2.push_back(&StrangePseudoFermionLocal);
|
||||||
|
Level1.push_back(&StrangePseudoFermionPVBdy); //ok
|
||||||
|
|
||||||
|
////////////////////////////////////
|
||||||
|
// up down action
|
||||||
|
////////////////////////////////////
|
||||||
|
std::vector<Real> light_den;
|
||||||
|
std::vector<Real> light_num;
|
||||||
|
std::vector<int> dirichlet_den;
|
||||||
|
std::vector<int> dirichlet_num;
|
||||||
|
|
||||||
|
int n_hasenbusch = hasenbusch.size();
|
||||||
|
light_den.push_back(light_mass); dirichlet_den.push_back(0);
|
||||||
|
for(int h=0;h<n_hasenbusch;h++){
|
||||||
|
light_den.push_back(hasenbusch[h]); dirichlet_den.push_back(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
for(int h=0;h<n_hasenbusch;h++){
|
||||||
|
light_num.push_back(hasenbusch[h]); dirichlet_num.push_back(1);
|
||||||
|
}
|
||||||
|
light_num.push_back(pv_mass); dirichlet_num.push_back(0);
|
||||||
|
|
||||||
|
std::vector<FermionAction *> Numerators;
|
||||||
|
std::vector<FermionActionF *> NumeratorsF;
|
||||||
|
std::vector<FermionAction *> Denominators;
|
||||||
|
std::vector<FermionActionF *> DenominatorsF;
|
||||||
|
std::vector<TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy> *> Quotients;
|
||||||
|
|
||||||
|
#define MIXED_PRECISION
|
||||||
|
#ifdef MIXED_PRECISION
|
||||||
|
std::vector<OneFlavourEvenOddRatioRationalMixedPrecPseudoFermionAction<FermionImplPolicy,FermionImplPolicyF> *> Bdys;
|
||||||
|
#else
|
||||||
|
std::vector<OneFlavourEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy> *> Bdys;
|
||||||
|
#endif
|
||||||
|
std::vector<MxPCG *> ActionMPCG;
|
||||||
|
std::vector<MxPCG *> MPCG;
|
||||||
|
|
||||||
|
typedef SchurDiagMooeeOperator<FermionActionF,FermionFieldF> LinearOperatorF;
|
||||||
|
typedef SchurDiagMooeeOperator<FermionAction ,FermionField > LinearOperatorD;
|
||||||
|
std::vector<LinearOperatorD *> LinOpD;
|
||||||
|
std::vector<LinearOperatorF *> LinOpF;
|
||||||
|
|
||||||
|
for(int h=0;h<n_hasenbusch+1;h++){
|
||||||
|
std::cout << GridLogMessage
|
||||||
|
<< " 2f quotient Action ";
|
||||||
|
std::cout << "det D("<<light_den[h]<<")";
|
||||||
|
if ( dirichlet_den[h] ) std::cout << "^dirichlet ";
|
||||||
|
std::cout << "/ det D("<<light_num[h]<<")";
|
||||||
|
if ( dirichlet_num[h] ) std::cout << "^dirichlet ";
|
||||||
|
std::cout << std::endl;
|
||||||
|
|
||||||
|
FermionAction::ImplParams ParamsNum(boundary);
|
||||||
|
FermionAction::ImplParams ParamsDen(boundary);
|
||||||
|
FermionActionF::ImplParams ParamsNumF(boundary);
|
||||||
|
FermionActionF::ImplParams ParamsDenF(boundary);
|
||||||
|
|
||||||
|
if ( dirichlet_num[h]==1) ParamsNum.dirichlet = Dirichlet;
|
||||||
|
else ParamsNum.dirichlet = NonDirichlet;
|
||||||
|
Numerators.push_back (new FermionAction(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_num[h],M5,b,c, ParamsNum));
|
||||||
|
|
||||||
|
if ( dirichlet_den[h]==1) ParamsDen.dirichlet = Dirichlet;
|
||||||
|
else ParamsDen.dirichlet = NonDirichlet;
|
||||||
|
|
||||||
|
Denominators.push_back(new FermionAction(U,*FGrid,*FrbGrid,*GridPtr,*GridRBPtr,light_den[h],M5,b,c, ParamsDen));
|
||||||
|
|
||||||
|
ParamsDenF.dirichlet = ParamsDen.dirichlet;
|
||||||
|
DenominatorsF.push_back(new FermionActionF(UF,*FGridF,*FrbGridF,*GridPtrF,*GridRBPtrF,light_den[h],M5,b,c, ParamsDenF));
|
||||||
|
|
||||||
|
ParamsNumF.dirichlet = ParamsNum.dirichlet;
|
||||||
|
NumeratorsF.push_back (new FermionActionF(UF,*FGridF,*FrbGridF,*GridPtrF,*GridRBPtrF,light_num[h],M5,b,c, ParamsNumF));
|
||||||
|
|
||||||
|
LinOpD.push_back(new LinearOperatorD(*Denominators[h]));
|
||||||
|
LinOpF.push_back(new LinearOperatorF(*DenominatorsF[h]));
|
||||||
|
|
||||||
|
double conv = MDStoppingCondition;
|
||||||
|
if (h<3) conv= MDStoppingConditionLoose; // Relax on first two hasenbusch factors
|
||||||
|
const int MX_inner = 5000;
|
||||||
|
MPCG.push_back(new MxPCG(conv,
|
||||||
|
MX_inner,
|
||||||
|
MaxCGIterations,
|
||||||
|
GridPtrF,
|
||||||
|
FrbGridF,
|
||||||
|
*DenominatorsF[h],*Denominators[h],
|
||||||
|
*LinOpF[h], *LinOpD[h]) );
|
||||||
|
|
||||||
|
ActionMPCG.push_back(new MxPCG(StoppingCondition,
|
||||||
|
MX_inner,
|
||||||
|
MaxCGIterations,
|
||||||
|
GridPtrF,
|
||||||
|
FrbGridF,
|
||||||
|
*DenominatorsF[h],*Denominators[h],
|
||||||
|
*LinOpF[h], *LinOpD[h]) );
|
||||||
|
|
||||||
|
|
||||||
|
if(h!=0) {
|
||||||
|
// Quotients.push_back (new TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],MDCG,CG));
|
||||||
|
Quotients.push_back (new TwoFlavourEvenOddRatioPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],*MPCG[h],*ActionMPCG[h],CG));
|
||||||
|
} else {
|
||||||
|
#ifdef MIXED_PRECISION
|
||||||
|
Bdys.push_back( new OneFlavourEvenOddRatioRationalMixedPrecPseudoFermionAction<FermionImplPolicy,FermionImplPolicyF>(
|
||||||
|
*Numerators[h],*Denominators[h],
|
||||||
|
*NumeratorsF[h],*DenominatorsF[h],
|
||||||
|
OFRp, 500) );
|
||||||
|
Bdys.push_back( new OneFlavourEvenOddRatioRationalMixedPrecPseudoFermionAction<FermionImplPolicy,FermionImplPolicyF>(
|
||||||
|
*Numerators[h],*Denominators[h],
|
||||||
|
*NumeratorsF[h],*DenominatorsF[h],
|
||||||
|
OFRp, 500) );
|
||||||
|
#else
|
||||||
|
Bdys.push_back( new OneFlavourEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],OFRp));
|
||||||
|
Bdys.push_back( new OneFlavourEvenOddRatioRationalPseudoFermionAction<FermionImplPolicy>(*Numerators[h],*Denominators[h],OFRp));
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int nquo=Quotients.size();
|
||||||
|
Level1.push_back(Bdys[0]);
|
||||||
|
Level1.push_back(Bdys[1]);
|
||||||
|
for(int h=0;h<nquo-1;h++){
|
||||||
|
Level2.push_back(Quotients[h]);
|
||||||
|
}
|
||||||
|
Level2.push_back(Quotients[nquo-1]);
|
||||||
|
|
||||||
|
/////////////////////////////////////////////////////////////
|
||||||
|
// Gauge action
|
||||||
|
/////////////////////////////////////////////////////////////
|
||||||
|
Level3.push_back(&GaugeAction);
|
||||||
|
TheHMC.TheAction.push_back(Level1);
|
||||||
|
TheHMC.TheAction.push_back(Level2);
|
||||||
|
TheHMC.TheAction.push_back(Level3);
|
||||||
|
std::cout << GridLogMessage << " Action complete "<< std::endl;
|
||||||
|
/////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
TheHMC.Run(); // no smearing
|
||||||
|
|
||||||
|
Grid_finalize();
|
||||||
|
} // main
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -420,7 +420,6 @@ public:
|
|||||||
FGrid->Broadcast(0,&ncall,sizeof(ncall));
|
FGrid->Broadcast(0,&ncall,sizeof(ncall));
|
||||||
|
|
||||||
// std::cout << GridLogMessage << " Estimate " << ncall << " calls per second"<<std::endl;
|
// std::cout << GridLogMessage << " Estimate " << ncall << " calls per second"<<std::endl;
|
||||||
Dw.ZeroCounters();
|
|
||||||
|
|
||||||
time_statistics timestat;
|
time_statistics timestat;
|
||||||
std::vector<double> t_time(ncall);
|
std::vector<double> t_time(ncall);
|
||||||
@ -589,7 +588,6 @@ public:
|
|||||||
FGrid->Broadcast(0,&ncall,sizeof(ncall));
|
FGrid->Broadcast(0,&ncall,sizeof(ncall));
|
||||||
|
|
||||||
// std::cout << GridLogMessage << " Estimate " << ncall << " calls per second"<<std::endl;
|
// std::cout << GridLogMessage << " Estimate " << ncall << " calls per second"<<std::endl;
|
||||||
Ds.ZeroCounters();
|
|
||||||
|
|
||||||
time_statistics timestat;
|
time_statistics timestat;
|
||||||
std::vector<double> t_time(ncall);
|
std::vector<double> t_time(ncall);
|
||||||
|
@ -186,7 +186,6 @@ int main (int argc, char ** argv)
|
|||||||
|
|
||||||
if (1) {
|
if (1) {
|
||||||
FGrid->Barrier();
|
FGrid->Barrier();
|
||||||
Dw.ZeroCounters();
|
|
||||||
Dw.Dhop(src,result,0);
|
Dw.Dhop(src,result,0);
|
||||||
std::cout<<GridLogMessage<<"Called warmup"<<std::endl;
|
std::cout<<GridLogMessage<<"Called warmup"<<std::endl;
|
||||||
double t0=usecond();
|
double t0=usecond();
|
||||||
@ -231,7 +230,6 @@ int main (int argc, char ** argv)
|
|||||||
exit(-1);
|
exit(-1);
|
||||||
}
|
}
|
||||||
assert (norm2(err)< 1.0e-4 );
|
assert (norm2(err)< 1.0e-4 );
|
||||||
Dw.Report();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (1)
|
if (1)
|
||||||
@ -306,7 +304,6 @@ int main (int argc, char ** argv)
|
|||||||
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl;
|
if ( WilsonKernelsStatic::Opt == WilsonKernelsStatic::OptInlineAsm ) std::cout << GridLogMessage<< "* Using Asm Nc=3 WilsonKernels" <<std::endl;
|
||||||
std::cout << GridLogMessage<< "*********************************************************" <<std::endl;
|
std::cout << GridLogMessage<< "*********************************************************" <<std::endl;
|
||||||
{
|
{
|
||||||
Dw.ZeroCounters();
|
|
||||||
FGrid->Barrier();
|
FGrid->Barrier();
|
||||||
Dw.DhopEO(src_o,r_e,DaggerNo);
|
Dw.DhopEO(src_o,r_e,DaggerNo);
|
||||||
double t0=usecond();
|
double t0=usecond();
|
||||||
@ -328,7 +325,6 @@ int main (int argc, char ** argv)
|
|||||||
std::cout<<GridLogMessage << "Deo mflop/s = "<< flops/(t1-t0)<<std::endl;
|
std::cout<<GridLogMessage << "Deo mflop/s = "<< flops/(t1-t0)<<std::endl;
|
||||||
std::cout<<GridLogMessage << "Deo mflop/s per rank "<< flops/(t1-t0)/NP<<std::endl;
|
std::cout<<GridLogMessage << "Deo mflop/s per rank "<< flops/(t1-t0)/NP<<std::endl;
|
||||||
std::cout<<GridLogMessage << "Deo mflop/s per node "<< flops/(t1-t0)/NN<<std::endl;
|
std::cout<<GridLogMessage << "Deo mflop/s per node "<< flops/(t1-t0)/NN<<std::endl;
|
||||||
Dw.Report();
|
|
||||||
}
|
}
|
||||||
Dw.DhopEO(src_o,r_e,DaggerNo);
|
Dw.DhopEO(src_o,r_e,DaggerNo);
|
||||||
Dw.DhopOE(src_e,r_o,DaggerNo);
|
Dw.DhopOE(src_e,r_o,DaggerNo);
|
||||||
|
@ -93,14 +93,11 @@ int main (int argc, char ** argv)
|
|||||||
int ncall =1000;
|
int ncall =1000;
|
||||||
if (1) {
|
if (1) {
|
||||||
FGrid->Barrier();
|
FGrid->Barrier();
|
||||||
Dw.ZeroCounters();
|
|
||||||
Dw.Dhop(src,result,0);
|
Dw.Dhop(src,result,0);
|
||||||
std::cout<<GridLogMessage<<"Called warmup"<<std::endl;
|
std::cout<<GridLogMessage<<"Called warmup"<<std::endl;
|
||||||
double t0=usecond();
|
double t0=usecond();
|
||||||
for(int i=0;i<ncall;i++){
|
for(int i=0;i<ncall;i++){
|
||||||
__SSC_START;
|
|
||||||
Dw.Dhop(src,result,0);
|
Dw.Dhop(src,result,0);
|
||||||
__SSC_STOP;
|
|
||||||
}
|
}
|
||||||
double t1=usecond();
|
double t1=usecond();
|
||||||
FGrid->Barrier();
|
FGrid->Barrier();
|
||||||
@ -114,7 +111,6 @@ int main (int argc, char ** argv)
|
|||||||
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
|
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
|
||||||
std::cout<<GridLogMessage << "mflop/s per rank = "<< flops/(t1-t0)/NP<<std::endl;
|
std::cout<<GridLogMessage << "mflop/s per rank = "<< flops/(t1-t0)/NP<<std::endl;
|
||||||
std::cout<<GridLogMessage << "mflop/s per node = "<< flops/(t1-t0)/NN<<std::endl;
|
std::cout<<GridLogMessage << "mflop/s per node = "<< flops/(t1-t0)/NN<<std::endl;
|
||||||
Dw.Report();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -136,14 +132,11 @@ int main (int argc, char ** argv)
|
|||||||
GparityDomainWallFermionD DwD(Umu_d,*FGrid_d,*FrbGrid_d,*UGrid_d,*UrbGrid_d,mass,M5);
|
GparityDomainWallFermionD DwD(Umu_d,*FGrid_d,*FrbGrid_d,*UGrid_d,*UrbGrid_d,mass,M5);
|
||||||
if (1) {
|
if (1) {
|
||||||
FGrid_d->Barrier();
|
FGrid_d->Barrier();
|
||||||
DwD.ZeroCounters();
|
|
||||||
DwD.Dhop(src_d,result_d,0);
|
DwD.Dhop(src_d,result_d,0);
|
||||||
std::cout<<GridLogMessage<<"Called warmup"<<std::endl;
|
std::cout<<GridLogMessage<<"Called warmup"<<std::endl;
|
||||||
double t0=usecond();
|
double t0=usecond();
|
||||||
for(int i=0;i<ncall;i++){
|
for(int i=0;i<ncall;i++){
|
||||||
__SSC_START;
|
|
||||||
DwD.Dhop(src_d,result_d,0);
|
DwD.Dhop(src_d,result_d,0);
|
||||||
__SSC_STOP;
|
|
||||||
}
|
}
|
||||||
double t1=usecond();
|
double t1=usecond();
|
||||||
FGrid_d->Barrier();
|
FGrid_d->Barrier();
|
||||||
@ -157,7 +150,6 @@ int main (int argc, char ** argv)
|
|||||||
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
|
std::cout<<GridLogMessage << "mflop/s = "<< flops/(t1-t0)<<std::endl;
|
||||||
std::cout<<GridLogMessage << "mflop/s per rank = "<< flops/(t1-t0)/NP<<std::endl;
|
std::cout<<GridLogMessage << "mflop/s per rank = "<< flops/(t1-t0)/NP<<std::endl;
|
||||||
std::cout<<GridLogMessage << "mflop/s per node = "<< flops/(t1-t0)/NN<<std::endl;
|
std::cout<<GridLogMessage << "mflop/s per node = "<< flops/(t1-t0)/NN<<std::endl;
|
||||||
DwD.Report();
|
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
Grid_finalize();
|
Grid_finalize();
|
||||||
|
@ -103,35 +103,30 @@ int main (int argc, char ** argv)
|
|||||||
#define BENCH_DW(A,...) \
|
#define BENCH_DW(A,...) \
|
||||||
Dw. A (__VA_ARGS__); \
|
Dw. A (__VA_ARGS__); \
|
||||||
FGrid->Barrier(); \
|
FGrid->Barrier(); \
|
||||||
Dw.CayleyZeroCounters(); \
|
|
||||||
t0=usecond(); \
|
t0=usecond(); \
|
||||||
for(int i=0;i<ncall;i++){ \
|
for(int i=0;i<ncall;i++){ \
|
||||||
Dw. A (__VA_ARGS__); \
|
Dw. A (__VA_ARGS__); \
|
||||||
} \
|
} \
|
||||||
t1=usecond(); \
|
t1=usecond(); \
|
||||||
FGrid->Barrier(); \
|
FGrid->Barrier(); \
|
||||||
Dw.CayleyReport(); \
|
|
||||||
std::cout<<GridLogMessage << "Called " #A " "<< (t1-t0)/ncall<<" us"<<std::endl;\
|
std::cout<<GridLogMessage << "Called " #A " "<< (t1-t0)/ncall<<" us"<<std::endl;\
|
||||||
std::cout<<GridLogMessage << "******************"<<std::endl;
|
std::cout<<GridLogMessage << "******************"<<std::endl;
|
||||||
|
|
||||||
#define BENCH_ZDW(A,in,out) \
|
#define BENCH_ZDW(A,in,out) \
|
||||||
zDw. A (in,out); \
|
zDw. A (in,out); \
|
||||||
FGrid->Barrier(); \
|
FGrid->Barrier(); \
|
||||||
zDw.CayleyZeroCounters(); \
|
|
||||||
t0=usecond(); \
|
t0=usecond(); \
|
||||||
for(int i=0;i<ncall;i++){ \
|
for(int i=0;i<ncall;i++){ \
|
||||||
zDw. A (in,out); \
|
zDw. A (in,out); \
|
||||||
} \
|
} \
|
||||||
t1=usecond(); \
|
t1=usecond(); \
|
||||||
FGrid->Barrier(); \
|
FGrid->Barrier(); \
|
||||||
zDw.CayleyReport(); \
|
|
||||||
std::cout<<GridLogMessage << "Called ZDw " #A " "<< (t1-t0)/ncall<<" us"<<std::endl;\
|
std::cout<<GridLogMessage << "Called ZDw " #A " "<< (t1-t0)/ncall<<" us"<<std::endl;\
|
||||||
std::cout<<GridLogMessage << "******************"<<std::endl;
|
std::cout<<GridLogMessage << "******************"<<std::endl;
|
||||||
|
|
||||||
#define BENCH_DW_SSC(A,in,out) \
|
#define BENCH_DW_SSC(A,in,out) \
|
||||||
Dw. A (in,out); \
|
Dw. A (in,out); \
|
||||||
FGrid->Barrier(); \
|
FGrid->Barrier(); \
|
||||||
Dw.CayleyZeroCounters(); \
|
|
||||||
t0=usecond(); \
|
t0=usecond(); \
|
||||||
for(int i=0;i<ncall;i++){ \
|
for(int i=0;i<ncall;i++){ \
|
||||||
__SSC_START ; \
|
__SSC_START ; \
|
||||||
@ -140,7 +135,6 @@ int main (int argc, char ** argv)
|
|||||||
} \
|
} \
|
||||||
t1=usecond(); \
|
t1=usecond(); \
|
||||||
FGrid->Barrier(); \
|
FGrid->Barrier(); \
|
||||||
Dw.CayleyReport(); \
|
|
||||||
std::cout<<GridLogMessage << "Called " #A " "<< (t1-t0)/ncall<<" us"<<std::endl;\
|
std::cout<<GridLogMessage << "Called " #A " "<< (t1-t0)/ncall<<" us"<<std::endl;\
|
||||||
std::cout<<GridLogMessage << "******************"<<std::endl;
|
std::cout<<GridLogMessage << "******************"<<std::endl;
|
||||||
|
|
||||||
|
@ -155,7 +155,6 @@ int main (int argc, char ** argv)
|
|||||||
//int ncall=1;
|
//int ncall=1;
|
||||||
|
|
||||||
// Counters
|
// Counters
|
||||||
Dw.ZeroCounters();
|
|
||||||
Grid.Barrier();
|
Grid.Barrier();
|
||||||
|
|
||||||
double t0=usecond();
|
double t0=usecond();
|
||||||
@ -201,7 +200,6 @@ int main (int argc, char ** argv)
|
|||||||
err = ref-result;
|
err = ref-result;
|
||||||
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
|
std::cout<<GridLogMessage << "norm diff "<< norm2(err)<<std::endl;
|
||||||
|
|
||||||
Dw.Report();
|
|
||||||
|
|
||||||
// guard
|
// guard
|
||||||
double err0 = norm2(err);
|
double err0 = norm2(err);
|
||||||
|
20
configure.ac
20
configure.ac
@ -128,6 +128,26 @@ case ${ac_LAPACK} in
|
|||||||
AC_DEFINE([USE_LAPACK],[1],[use LAPACK]);;
|
AC_DEFINE([USE_LAPACK],[1],[use LAPACK]);;
|
||||||
esac
|
esac
|
||||||
|
|
||||||
|
############### tracing
|
||||||
|
AC_ARG_ENABLE([tracing],
|
||||||
|
[AC_HELP_STRING([--enable-tracing=none|nvtx|roctx|timer], [enable tracing])],
|
||||||
|
[ac_TRACING=${enable_tracing}], [ac_TRACING=none])
|
||||||
|
|
||||||
|
case ${ac_TRACING} in
|
||||||
|
nvtx)
|
||||||
|
AC_DEFINE([GRID_TRACING_NVTX],[1],[use NVTX])
|
||||||
|
LIBS="${LIBS} -lnvToolsExt64_1"
|
||||||
|
;;
|
||||||
|
roctx)
|
||||||
|
AC_DEFINE([GRID_TRACING_ROCTX],[1],[use ROCTX])
|
||||||
|
LIBS="${LIBS} -lroctx64"
|
||||||
|
;;
|
||||||
|
timer)
|
||||||
|
AC_DEFINE([GRID_TRACING_TIMER],[1],[use TIMER]);;
|
||||||
|
*)
|
||||||
|
AC_DEFINE([GRID_TRACING_NONE],[1],[no tracing]);;
|
||||||
|
esac
|
||||||
|
|
||||||
############### fermions
|
############### fermions
|
||||||
AC_ARG_ENABLE([fermion-reps],
|
AC_ARG_ENABLE([fermion-reps],
|
||||||
[AC_HELP_STRING([--enable-fermion-reps=yes|no], [enable extra fermion representation support])],
|
[AC_HELP_STRING([--enable-fermion-reps=yes|no], [enable extra fermion representation support])],
|
||||||
|
436
examples/Example_christoph.cc
Normal file
436
examples/Example_christoph.cc
Normal file
@ -0,0 +1,436 @@
|
|||||||
|
/*
|
||||||
|
* Warning: This code illustrative only: not well tested, and not meant for production use
|
||||||
|
* without regression / tests being applied
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <Grid/Grid.h>
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
using namespace Grid;
|
||||||
|
|
||||||
|
RealD LLscale =1.0;
|
||||||
|
RealD LCscale =1.0;
|
||||||
|
|
||||||
|
template<class Gimpl,class Field> class CovariantLaplacianCshift : public SparseMatrixBase<Field>
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
INHERIT_GIMPL_TYPES(Gimpl);
|
||||||
|
|
||||||
|
GridBase *grid;
|
||||||
|
GaugeField U;
|
||||||
|
|
||||||
|
CovariantLaplacianCshift(GaugeField &_U) :
|
||||||
|
grid(_U.Grid()),
|
||||||
|
U(_U) { };
|
||||||
|
|
||||||
|
virtual GridBase *Grid(void) { return grid; };
|
||||||
|
|
||||||
|
virtual void M (const Field &in, Field &out)
|
||||||
|
{
|
||||||
|
out=Zero();
|
||||||
|
for(int mu=0;mu<Nd-1;mu++) {
|
||||||
|
GaugeLinkField Umu = PeekIndex<LorentzIndex>(U, mu); // NB: Inefficent
|
||||||
|
out = out - Gimpl::CovShiftForward(Umu,mu,in);
|
||||||
|
out = out - Gimpl::CovShiftBackward(Umu,mu,in);
|
||||||
|
out = out + 2.0*in;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
virtual void Mdag (const Field &in, Field &out) { M(in,out);}; // Laplacian is hermitian
|
||||||
|
virtual void Mdiag (const Field &in, Field &out) {assert(0);}; // Unimplemented need only for multigrid
|
||||||
|
virtual void Mdir (const Field &in, Field &out,int dir, int disp){assert(0);}; // Unimplemented need only for multigrid
|
||||||
|
virtual void MdirAll (const Field &in, std::vector<Field> &out) {assert(0);}; // Unimplemented need only for multigrid
|
||||||
|
};
|
||||||
|
|
||||||
|
void MakePhase(Coordinate mom,LatticeComplex &phase)
|
||||||
|
{
|
||||||
|
GridBase *grid = phase.Grid();
|
||||||
|
auto latt_size = grid->GlobalDimensions();
|
||||||
|
ComplexD ci(0.0,1.0);
|
||||||
|
phase=Zero();
|
||||||
|
|
||||||
|
LatticeComplex coor(phase.Grid());
|
||||||
|
for(int mu=0;mu<Nd;mu++){
|
||||||
|
RealD TwoPiL = M_PI * 2.0/ latt_size[mu];
|
||||||
|
LatticeCoordinate(coor,mu);
|
||||||
|
phase = phase + (TwoPiL * mom[mu]) * coor;
|
||||||
|
}
|
||||||
|
phase = exp(phase*ci);
|
||||||
|
}
|
||||||
|
|
||||||
|
void PointSource(Coordinate &coor,LatticePropagator &source)
|
||||||
|
{
|
||||||
|
// Coordinate coor({0,0,0,0});
|
||||||
|
source=Zero();
|
||||||
|
SpinColourMatrix kronecker; kronecker=1.0;
|
||||||
|
pokeSite(kronecker,source,coor);
|
||||||
|
}
|
||||||
|
void Z2WallSource(GridParallelRNG &RNG,int tslice,LatticePropagator &source)
|
||||||
|
{
|
||||||
|
GridBase *grid = source.Grid();
|
||||||
|
LatticeComplex noise(grid);
|
||||||
|
LatticeComplex zz(grid); zz=Zero();
|
||||||
|
LatticeInteger t(grid);
|
||||||
|
|
||||||
|
RealD nrm=1.0/sqrt(2);
|
||||||
|
bernoulli(RNG, noise); // 0,1 50:50
|
||||||
|
|
||||||
|
noise = (2.*noise - Complex(1,1))*nrm;
|
||||||
|
|
||||||
|
LatticeCoordinate(t,Tdir);
|
||||||
|
noise = where(t==Integer(tslice), noise, zz);
|
||||||
|
|
||||||
|
source = 1.0;
|
||||||
|
source = source*noise;
|
||||||
|
std::cout << " Z2 wall " << norm2(source) << std::endl;
|
||||||
|
}
|
||||||
|
template<class Field>
|
||||||
|
void GaussianSmear(LatticeGaugeField &U,Field &unsmeared,Field &smeared)
|
||||||
|
{
|
||||||
|
typedef CovariantLaplacianCshift <PeriodicGimplR,Field> Laplacian_t;
|
||||||
|
Laplacian_t Laplacian(U);
|
||||||
|
|
||||||
|
Integer Iterations = 40;
|
||||||
|
Real width = 2.0;
|
||||||
|
Real coeff = (width*width) / Real(4*Iterations);
|
||||||
|
|
||||||
|
Field tmp(U.Grid());
|
||||||
|
smeared=unsmeared;
|
||||||
|
// chi = (1-p^2/2N)^N kronecker
|
||||||
|
for(int n = 0; n < Iterations; ++n) {
|
||||||
|
Laplacian.M(smeared,tmp);
|
||||||
|
smeared = smeared - coeff*tmp;
|
||||||
|
std::cout << " smear iter " << n<<" " <<norm2(smeared)<<std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
void GaussianSource(Coordinate &site,LatticeGaugeField &U,LatticePropagator &source)
|
||||||
|
{
|
||||||
|
LatticePropagator tmp(source.Grid());
|
||||||
|
PointSource(site,source);
|
||||||
|
std::cout << " GaussianSource Kronecker "<< norm2(source)<<std::endl;
|
||||||
|
tmp = source;
|
||||||
|
GaussianSmear(U,tmp,source);
|
||||||
|
std::cout << " GaussianSource Smeared "<< norm2(source)<<std::endl;
|
||||||
|
}
|
||||||
|
void GaussianWallSource(GridParallelRNG &RNG,int tslice,LatticeGaugeField &U,LatticePropagator &source)
|
||||||
|
{
|
||||||
|
Z2WallSource(RNG,tslice,source);
|
||||||
|
auto tmp = source;
|
||||||
|
GaussianSmear(U,tmp,source);
|
||||||
|
}
|
||||||
|
void SequentialSource(int tslice,Coordinate &mom,LatticePropagator &spectator,LatticePropagator &source)
|
||||||
|
{
|
||||||
|
assert(mom.size()==Nd);
|
||||||
|
assert(mom[Tdir] == 0);
|
||||||
|
|
||||||
|
GridBase * grid = spectator.Grid();
|
||||||
|
|
||||||
|
|
||||||
|
LatticeInteger ts(grid);
|
||||||
|
LatticeCoordinate(ts,Tdir);
|
||||||
|
source = Zero();
|
||||||
|
source = where(ts==Integer(tslice),spectator,source); // Stick in a slice of the spectator, zero everywhere else
|
||||||
|
|
||||||
|
LatticeComplex phase(grid);
|
||||||
|
MakePhase(mom,phase);
|
||||||
|
|
||||||
|
source = source *phase;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class Action>
|
||||||
|
void MasslessFreePropagator(Action &D,LatticePropagator &source,LatticePropagator &propagator)
|
||||||
|
{
|
||||||
|
GridBase *UGrid = source.Grid();
|
||||||
|
GridBase *FGrid = D.FermionGrid();
|
||||||
|
bool fiveD = true; //calculate 5d free propagator
|
||||||
|
RealD mass = D.Mass();
|
||||||
|
LatticeFermion src4 (UGrid);
|
||||||
|
LatticeFermion result4 (UGrid);
|
||||||
|
LatticeFermion result5(FGrid);
|
||||||
|
LatticeFermion src5(FGrid);
|
||||||
|
LatticePropagator prop5(FGrid);
|
||||||
|
for(int s=0;s<Nd;s++){
|
||||||
|
for(int c=0;c<Nc;c++){
|
||||||
|
|
||||||
|
PropToFerm<Action>(src4,source,s,c);
|
||||||
|
|
||||||
|
D.ImportPhysicalFermionSource(src4,src5);
|
||||||
|
D.FreePropagator(src5,result5,mass,true);
|
||||||
|
std::cout<<GridLogMessage
|
||||||
|
<<"Free 5D prop spin "<<s<<" color "<<c
|
||||||
|
<<" norm2(src5d) " <<norm2(src5)
|
||||||
|
<<" norm2(result5d) "<<norm2(result5)<<std::endl;
|
||||||
|
|
||||||
|
D.ExportPhysicalFermionSolution(result5,result4);
|
||||||
|
|
||||||
|
FermToProp<Action>(prop5,result5,s,c);
|
||||||
|
FermToProp<Action>(propagator,result4,s,c);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
LatticePropagator Vector_mu(UGrid);
|
||||||
|
LatticeComplex VV (UGrid);
|
||||||
|
std::vector<TComplex> sumVV;
|
||||||
|
Gamma::Algebra GammaV[3] = {
|
||||||
|
Gamma::Algebra::GammaX,
|
||||||
|
Gamma::Algebra::GammaY,
|
||||||
|
Gamma::Algebra::GammaZ
|
||||||
|
};
|
||||||
|
for( int mu=0;mu<3;mu++ ) {
|
||||||
|
Gamma gV(GammaV[mu]);
|
||||||
|
D.ContractConservedCurrent(prop5,prop5,Vector_mu,source,Current::Vector,mu);
|
||||||
|
VV = trace(gV*Vector_mu); // (local) Vector-Vector conserved current
|
||||||
|
sliceSum(VV,sumVV,Tdir);
|
||||||
|
int Nt = sumVV.size();
|
||||||
|
for(int t=0;t<Nt;t++){
|
||||||
|
RealD Ct = real(TensorRemove(sumVV[t]))*LCscale;
|
||||||
|
RealD Cont=0;
|
||||||
|
if(t) Cont=1.0/(2 * M_PI *M_PI * t*t*t);
|
||||||
|
std::cout<<GridLogMessage <<"VVc["<<mu<<"]["<<t<<"] "<< Ct
|
||||||
|
<< " 2 pi^2 t^3 C(t) "<< Ct/Cont << " delta Ct "<< Ct-Cont <<std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
template<class Action>
|
||||||
|
void MasslessFreePropagator1(Action &D,LatticePropagator &source,LatticePropagator &propagator)
|
||||||
|
{
|
||||||
|
bool fiveD = false; //calculate 4d free propagator
|
||||||
|
RealD mass = D.Mass();
|
||||||
|
GridBase *UGrid = source.Grid();
|
||||||
|
LatticeFermion src4 (UGrid);
|
||||||
|
LatticeFermion result4 (UGrid);
|
||||||
|
for(int s=0;s<Nd;s++){
|
||||||
|
for(int c=0;c<Nc;c++){
|
||||||
|
PropToFerm<Action>(src4,source,s,c);
|
||||||
|
D.FreePropagator(src4,result4,mass,false);
|
||||||
|
FermToProp<Action>(propagator,result4,s,c);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class Action>
|
||||||
|
void Solve(Action &D,LatticePropagator &source,LatticePropagator &propagator)
|
||||||
|
{
|
||||||
|
GridBase *UGrid = D.GaugeGrid();
|
||||||
|
GridBase *FGrid = D.FermionGrid();
|
||||||
|
|
||||||
|
LatticeFermion src4 (UGrid);
|
||||||
|
LatticeFermion src5 (FGrid);
|
||||||
|
LatticeFermion result5(FGrid);
|
||||||
|
LatticeFermion result4(UGrid);
|
||||||
|
LatticePropagator prop5(FGrid);
|
||||||
|
|
||||||
|
ConjugateGradient<LatticeFermion> CG(1.0e-7,100000);
|
||||||
|
SchurRedBlackDiagMooeeSolve<LatticeFermion> schur(CG);
|
||||||
|
ZeroGuesser<LatticeFermion> ZG; // Could be a DeflatedGuesser if have eigenvectors
|
||||||
|
for(int s=0;s<Nd;s++){
|
||||||
|
for(int c=0;c<Nc;c++){
|
||||||
|
PropToFerm<Action>(src4,source,s,c);
|
||||||
|
|
||||||
|
D.ImportPhysicalFermionSource(src4,src5);
|
||||||
|
|
||||||
|
result5=Zero();
|
||||||
|
schur(D,src5,result5,ZG);
|
||||||
|
std::cout<<GridLogMessage
|
||||||
|
<<"spin "<<s<<" color "<<c
|
||||||
|
<<" norm2(src5d) " <<norm2(src5)
|
||||||
|
<<" norm2(result5d) "<<norm2(result5)<<std::endl;
|
||||||
|
|
||||||
|
D.ExportPhysicalFermionSolution(result5,result4);
|
||||||
|
|
||||||
|
FermToProp<Action>(prop5,result5,s,c);
|
||||||
|
FermToProp<Action>(propagator,result4,s,c);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
LatticePropagator Axial_mu(UGrid);
|
||||||
|
LatticePropagator Vector_mu(UGrid);
|
||||||
|
|
||||||
|
LatticeComplex PA (UGrid);
|
||||||
|
LatticeComplex VV (UGrid);
|
||||||
|
LatticeComplex PJ5q(UGrid);
|
||||||
|
LatticeComplex PP (UGrid);
|
||||||
|
|
||||||
|
std::vector<TComplex> sumPA;
|
||||||
|
std::vector<TComplex> sumVV;
|
||||||
|
std::vector<TComplex> sumPP;
|
||||||
|
std::vector<TComplex> sumPJ5q;
|
||||||
|
|
||||||
|
Gamma g5(Gamma::Algebra::Gamma5);
|
||||||
|
D.ContractConservedCurrent(prop5,prop5,Axial_mu,source,Current::Axial,Tdir);
|
||||||
|
PA = trace(g5*Axial_mu); // Pseudoscalar-Axial conserved current
|
||||||
|
sliceSum(PA,sumPA,Tdir);
|
||||||
|
|
||||||
|
int Nt{static_cast<int>(sumPA.size())};
|
||||||
|
|
||||||
|
for(int t=0;t<Nt;t++) std::cout<<GridLogMessage <<"PAc["<<t<<"] "<<real(TensorRemove(sumPA[t]))*LCscale<<std::endl;
|
||||||
|
|
||||||
|
PP = trace(adj(propagator)*propagator); // Pseudoscalar density
|
||||||
|
sliceSum(PP,sumPP,Tdir);
|
||||||
|
for(int t=0;t<Nt;t++) std::cout<<GridLogMessage <<"PP["<<t<<"] "<<real(TensorRemove(sumPP[t]))*LCscale<<std::endl;
|
||||||
|
|
||||||
|
D.ContractJ5q(prop5,PJ5q);
|
||||||
|
sliceSum(PJ5q,sumPJ5q,Tdir);
|
||||||
|
for(int t=0;t<Nt;t++) std::cout<<GridLogMessage <<"PJ5q["<<t<<"] "<<real(TensorRemove(sumPJ5q[t]))<<std::endl;
|
||||||
|
|
||||||
|
Gamma::Algebra GammaV[3] = {
|
||||||
|
Gamma::Algebra::GammaX,
|
||||||
|
Gamma::Algebra::GammaY,
|
||||||
|
Gamma::Algebra::GammaZ
|
||||||
|
};
|
||||||
|
for( int mu=0;mu<3;mu++ ) {
|
||||||
|
Gamma gV(GammaV[mu]);
|
||||||
|
D.ContractConservedCurrent(prop5,prop5,Vector_mu,source,Current::Vector,mu);
|
||||||
|
// auto ss=sliceSum(Vector_mu,Tdir);
|
||||||
|
// for(int t=0;t<Nt;t++) std::cout<<GridLogMessage <<"ss["<<mu<<"]["<<t<<"] "<<ss[t]<<std::endl;
|
||||||
|
VV = trace(gV*Vector_mu); // (local) Vector-Vector conserved current
|
||||||
|
sliceSum(VV,sumVV,Tdir);
|
||||||
|
for(int t=0;t<Nt;t++){
|
||||||
|
RealD Ct = real(TensorRemove(sumVV[t]))*LCscale;
|
||||||
|
RealD Cont=0;
|
||||||
|
if(t) Cont=1.0/(2 * M_PI *M_PI * t*t*t);
|
||||||
|
std::cout<<GridLogMessage <<"VVc["<<mu<<"]["<<t<<"] "<< Ct
|
||||||
|
<< " 2 pi^2 t^3 C(t) "<< Ct/Cont << " delta Ct "<< Ct-Cont <<std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
class MesonFile: Serializable {
|
||||||
|
public:
|
||||||
|
GRID_SERIALIZABLE_CLASS_MEMBERS(MesonFile, std::vector<std::vector<Complex> >, data);
|
||||||
|
};
|
||||||
|
|
||||||
|
void MesonTrace(std::string file,LatticePropagator &q1,LatticePropagator &q2,LatticeComplex &phase)
|
||||||
|
{
|
||||||
|
const int nchannel=4;
|
||||||
|
Gamma::Algebra Gammas[nchannel][2] = {
|
||||||
|
{Gamma::Algebra::GammaXGamma5,Gamma::Algebra::GammaXGamma5},
|
||||||
|
{Gamma::Algebra::GammaYGamma5,Gamma::Algebra::GammaYGamma5},
|
||||||
|
{Gamma::Algebra::GammaZGamma5,Gamma::Algebra::GammaZGamma5},
|
||||||
|
{Gamma::Algebra::Identity,Gamma::Algebra::Identity}
|
||||||
|
};
|
||||||
|
|
||||||
|
LatticeComplex meson_CF(q1.Grid());
|
||||||
|
MesonFile MF;
|
||||||
|
|
||||||
|
for(int ch=0;ch<nchannel;ch++){
|
||||||
|
|
||||||
|
Gamma Gsrc(Gammas[ch][0]);
|
||||||
|
Gamma Gsnk(Gammas[ch][1]);
|
||||||
|
|
||||||
|
meson_CF = trace(adj(q1)*Gsnk*q2*adj(Gsrc));
|
||||||
|
|
||||||
|
std::vector<TComplex> meson_T;
|
||||||
|
sliceSum(meson_CF,meson_T, Tdir);
|
||||||
|
|
||||||
|
int nt=meson_T.size();
|
||||||
|
|
||||||
|
std::vector<Complex> corr(nt);
|
||||||
|
for(int t=0;t<nt;t++){
|
||||||
|
corr[t] = TensorRemove(meson_T[t])*LLscale; // Yes this is ugly, not figured a work around
|
||||||
|
RealD Ct = real(corr[t]);
|
||||||
|
RealD Cont=0;
|
||||||
|
if(t) Cont=1.0/(2 * M_PI *M_PI * t*t*t);
|
||||||
|
std::cout << " channel "<<ch<<" t "<<t<<" " <<real(corr[t])<< " 2 pi^2 t^3 C(t) "<< 2 * M_PI *M_PI * t*t*t * Ct
|
||||||
|
<< " deltaC " <<Ct-Cont<<std::endl;
|
||||||
|
}
|
||||||
|
MF.data.push_back(corr);
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
XmlWriter WR(file);
|
||||||
|
write(WR,"MesonFile",MF);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int main (int argc, char ** argv)
|
||||||
|
{
|
||||||
|
|
||||||
|
Grid_init(&argc,&argv);
|
||||||
|
int Ls= atoi(getenv("Ls"));
|
||||||
|
|
||||||
|
// Double precision grids
|
||||||
|
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(),
|
||||||
|
GridDefaultSimd(Nd,vComplex::Nsimd()),
|
||||||
|
GridDefaultMpi());
|
||||||
|
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
||||||
|
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
|
||||||
|
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////
|
||||||
|
// You can manage seeds however you like.
|
||||||
|
// Recommend SeedUniqueString.
|
||||||
|
//////////////////////////////////////////////////////////////////////
|
||||||
|
// std::vector<int> seeds4({1,2,3,4});
|
||||||
|
// GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
|
||||||
|
|
||||||
|
LatticeGaugeField Umu(UGrid);
|
||||||
|
std::string config;
|
||||||
|
RealD M5=atof(getenv("M5"));
|
||||||
|
RealD mq = atof(getenv("mass"));
|
||||||
|
int point_x = atoi(getenv("point_x"));
|
||||||
|
int point_y = atoi(getenv("point_y"));
|
||||||
|
int point_z = atoi(getenv("point_z"));
|
||||||
|
int point_t = atoi(getenv("point_t"));
|
||||||
|
std::vector<RealD> masses({ mq} ); // u/d, s, c ??
|
||||||
|
if( argc > 1 && argv[1][0] != '-' )
|
||||||
|
{
|
||||||
|
std::cout<<GridLogMessage <<"Loading configuration from "<<argv[1]<<std::endl;
|
||||||
|
FieldMetaData header;
|
||||||
|
NerscIO::readConfiguration(Umu, header, argv[1]);
|
||||||
|
config=argv[1];
|
||||||
|
LLscale = 1.0;
|
||||||
|
LCscale = 1.0;
|
||||||
|
} else {
|
||||||
|
printf("Expected a configuration");
|
||||||
|
exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
int nmass = masses.size();
|
||||||
|
|
||||||
|
typedef MobiusFermionR FermionActionR;
|
||||||
|
std::vector<FermionActionR *> FermActs;
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage <<"======================"<<std::endl;
|
||||||
|
std::cout<<GridLogMessage <<"DomainWallFermion action"<<std::endl;
|
||||||
|
std::cout<<GridLogMessage <<"======================"<<std::endl;
|
||||||
|
|
||||||
|
for(auto mass: masses) {
|
||||||
|
std::vector<Complex> boundary = {1,1,1,-1};
|
||||||
|
FermionActionR::ImplParams Params(boundary);
|
||||||
|
RealD b=1.5;
|
||||||
|
RealD c=0.5;
|
||||||
|
FermActs.push_back(new FermionActionR(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,b,c));
|
||||||
|
}
|
||||||
|
|
||||||
|
LatticePropagator point_source(UGrid);
|
||||||
|
|
||||||
|
Coordinate Origin({point_x,point_y,point_z,point_t});
|
||||||
|
PointSource (Origin,point_source);
|
||||||
|
|
||||||
|
std::vector<LatticePropagator> PointProps(nmass,UGrid);
|
||||||
|
|
||||||
|
for(int m=0;m<nmass;m++) {
|
||||||
|
Solve(*FermActs[m],point_source ,PointProps[m]);
|
||||||
|
}
|
||||||
|
|
||||||
|
LatticeComplex phase(UGrid);
|
||||||
|
Coordinate mom({0,0,0,0});
|
||||||
|
MakePhase(mom,phase);
|
||||||
|
|
||||||
|
for(int m1=0 ;m1<nmass;m1++) {
|
||||||
|
for(int m2=m1;m2<nmass;m2++) {
|
||||||
|
std::stringstream ssp,ssg,ssz;
|
||||||
|
|
||||||
|
ssp<<config<< "_m" << m1 << "_m"<< m2 << "_point_meson.xml";
|
||||||
|
ssz<<config<< "_m" << m1 << "_m"<< m2 << "_free_meson.xml";
|
||||||
|
|
||||||
|
std::cout << "CG determined VV correlation function"<<std::endl;
|
||||||
|
MesonTrace(ssp.str(),PointProps[m1],PointProps[m2],phase);
|
||||||
|
|
||||||
|
}}
|
||||||
|
|
||||||
|
Grid_finalize();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
479
examples/Example_taku1.cc
Normal file
479
examples/Example_taku1.cc
Normal file
@ -0,0 +1,479 @@
|
|||||||
|
/*
|
||||||
|
* Warning: This code illustrative only: not well tested, and not meant for production use
|
||||||
|
* without regression / tests being applied
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <Grid/Grid.h>
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
using namespace Grid;
|
||||||
|
|
||||||
|
RealD LLscale =1.0;
|
||||||
|
RealD LCscale =1.0;
|
||||||
|
|
||||||
|
template<class Gimpl,class Field> class CovariantLaplacianCshift : public SparseMatrixBase<Field>
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
INHERIT_GIMPL_TYPES(Gimpl);
|
||||||
|
|
||||||
|
GridBase *grid;
|
||||||
|
GaugeField U;
|
||||||
|
|
||||||
|
CovariantLaplacianCshift(GaugeField &_U) :
|
||||||
|
grid(_U.Grid()),
|
||||||
|
U(_U) { };
|
||||||
|
|
||||||
|
virtual GridBase *Grid(void) { return grid; };
|
||||||
|
|
||||||
|
virtual void M (const Field &in, Field &out)
|
||||||
|
{
|
||||||
|
out=Zero();
|
||||||
|
for(int mu=0;mu<Nd-1;mu++) {
|
||||||
|
GaugeLinkField Umu = PeekIndex<LorentzIndex>(U, mu); // NB: Inefficent
|
||||||
|
out = out - Gimpl::CovShiftForward(Umu,mu,in);
|
||||||
|
out = out - Gimpl::CovShiftBackward(Umu,mu,in);
|
||||||
|
out = out + 2.0*in;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
virtual void Mdag (const Field &in, Field &out) { M(in,out);}; // Laplacian is hermitian
|
||||||
|
virtual void Mdiag (const Field &in, Field &out) {assert(0);}; // Unimplemented need only for multigrid
|
||||||
|
virtual void Mdir (const Field &in, Field &out,int dir, int disp){assert(0);}; // Unimplemented need only for multigrid
|
||||||
|
virtual void MdirAll (const Field &in, std::vector<Field> &out) {assert(0);}; // Unimplemented need only for multigrid
|
||||||
|
};
|
||||||
|
|
||||||
|
void MakePhase(Coordinate mom,LatticeComplex &phase)
|
||||||
|
{
|
||||||
|
GridBase *grid = phase.Grid();
|
||||||
|
auto latt_size = grid->GlobalDimensions();
|
||||||
|
ComplexD ci(0.0,1.0);
|
||||||
|
phase=Zero();
|
||||||
|
|
||||||
|
LatticeComplex coor(phase.Grid());
|
||||||
|
for(int mu=0;mu<Nd;mu++){
|
||||||
|
RealD TwoPiL = M_PI * 2.0/ latt_size[mu];
|
||||||
|
LatticeCoordinate(coor,mu);
|
||||||
|
phase = phase + (TwoPiL * mom[mu]) * coor;
|
||||||
|
}
|
||||||
|
phase = exp(phase*ci);
|
||||||
|
}
|
||||||
|
|
||||||
|
void PointSource(Coordinate &coor,LatticePropagator &source)
|
||||||
|
{
|
||||||
|
// Coordinate coor({0,0,0,0});
|
||||||
|
source=Zero();
|
||||||
|
SpinColourMatrix kronecker; kronecker=1.0;
|
||||||
|
pokeSite(kronecker,source,coor);
|
||||||
|
}
|
||||||
|
void Z2WallSource(GridParallelRNG &RNG,int tslice,LatticePropagator &source)
|
||||||
|
{
|
||||||
|
GridBase *grid = source.Grid();
|
||||||
|
LatticeComplex noise(grid);
|
||||||
|
LatticeComplex zz(grid); zz=Zero();
|
||||||
|
LatticeInteger t(grid);
|
||||||
|
|
||||||
|
RealD nrm=1.0/sqrt(2);
|
||||||
|
bernoulli(RNG, noise); // 0,1 50:50
|
||||||
|
|
||||||
|
noise = (2.*noise - Complex(1,1))*nrm;
|
||||||
|
|
||||||
|
LatticeCoordinate(t,Tdir);
|
||||||
|
noise = where(t==Integer(tslice), noise, zz);
|
||||||
|
|
||||||
|
source = 1.0;
|
||||||
|
source = source*noise;
|
||||||
|
std::cout << " Z2 wall " << norm2(source) << std::endl;
|
||||||
|
}
|
||||||
|
template<class Field>
|
||||||
|
void GaussianSmear(LatticeGaugeField &U,Field &unsmeared,Field &smeared)
|
||||||
|
{
|
||||||
|
typedef CovariantLaplacianCshift <PeriodicGimplR,Field> Laplacian_t;
|
||||||
|
Laplacian_t Laplacian(U);
|
||||||
|
|
||||||
|
Integer Iterations = 40;
|
||||||
|
Real width = 2.0;
|
||||||
|
Real coeff = (width*width) / Real(4*Iterations);
|
||||||
|
|
||||||
|
Field tmp(U.Grid());
|
||||||
|
smeared=unsmeared;
|
||||||
|
// chi = (1-p^2/2N)^N kronecker
|
||||||
|
for(int n = 0; n < Iterations; ++n) {
|
||||||
|
Laplacian.M(smeared,tmp);
|
||||||
|
smeared = smeared - coeff*tmp;
|
||||||
|
std::cout << " smear iter " << n<<" " <<norm2(smeared)<<std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
void GaussianSource(Coordinate &site,LatticeGaugeField &U,LatticePropagator &source)
|
||||||
|
{
|
||||||
|
LatticePropagator tmp(source.Grid());
|
||||||
|
PointSource(site,source);
|
||||||
|
std::cout << " GaussianSource Kronecker "<< norm2(source)<<std::endl;
|
||||||
|
tmp = source;
|
||||||
|
GaussianSmear(U,tmp,source);
|
||||||
|
std::cout << " GaussianSource Smeared "<< norm2(source)<<std::endl;
|
||||||
|
}
|
||||||
|
void GaussianWallSource(GridParallelRNG &RNG,int tslice,LatticeGaugeField &U,LatticePropagator &source)
|
||||||
|
{
|
||||||
|
Z2WallSource(RNG,tslice,source);
|
||||||
|
auto tmp = source;
|
||||||
|
GaussianSmear(U,tmp,source);
|
||||||
|
}
|
||||||
|
void SequentialSource(int tslice,Coordinate &mom,LatticePropagator &spectator,LatticePropagator &source)
|
||||||
|
{
|
||||||
|
assert(mom.size()==Nd);
|
||||||
|
assert(mom[Tdir] == 0);
|
||||||
|
|
||||||
|
GridBase * grid = spectator.Grid();
|
||||||
|
|
||||||
|
|
||||||
|
LatticeInteger ts(grid);
|
||||||
|
LatticeCoordinate(ts,Tdir);
|
||||||
|
source = Zero();
|
||||||
|
source = where(ts==Integer(tslice),spectator,source); // Stick in a slice of the spectator, zero everywhere else
|
||||||
|
|
||||||
|
LatticeComplex phase(grid);
|
||||||
|
MakePhase(mom,phase);
|
||||||
|
|
||||||
|
source = source *phase;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class Action>
|
||||||
|
void MasslessFreePropagator(Action &D,LatticePropagator &source,LatticePropagator &propagator)
|
||||||
|
{
|
||||||
|
GridBase *UGrid = source.Grid();
|
||||||
|
GridBase *FGrid = D.FermionGrid();
|
||||||
|
bool fiveD = true; //calculate 5d free propagator
|
||||||
|
RealD mass = D.Mass();
|
||||||
|
LatticeFermion src4 (UGrid);
|
||||||
|
LatticeFermion result4 (UGrid);
|
||||||
|
LatticeFermion result5(FGrid);
|
||||||
|
LatticeFermion src5(FGrid);
|
||||||
|
LatticePropagator prop5(FGrid);
|
||||||
|
for(int s=0;s<Nd;s++){
|
||||||
|
for(int c=0;c<Nc;c++){
|
||||||
|
|
||||||
|
PropToFerm<Action>(src4,source,s,c);
|
||||||
|
|
||||||
|
D.ImportPhysicalFermionSource(src4,src5);
|
||||||
|
D.FreePropagator(src5,result5,mass,true);
|
||||||
|
std::cout<<GridLogMessage
|
||||||
|
<<"Free 5D prop spin "<<s<<" color "<<c
|
||||||
|
<<" norm2(src5d) " <<norm2(src5)
|
||||||
|
<<" norm2(result5d) "<<norm2(result5)<<std::endl;
|
||||||
|
|
||||||
|
D.ExportPhysicalFermionSolution(result5,result4);
|
||||||
|
|
||||||
|
FermToProp<Action>(prop5,result5,s,c);
|
||||||
|
FermToProp<Action>(propagator,result4,s,c);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
LatticePropagator Vector_mu(UGrid);
|
||||||
|
LatticeComplex VV (UGrid);
|
||||||
|
std::vector<TComplex> sumVV;
|
||||||
|
Gamma::Algebra GammaV[3] = {
|
||||||
|
Gamma::Algebra::GammaX,
|
||||||
|
Gamma::Algebra::GammaY,
|
||||||
|
Gamma::Algebra::GammaZ
|
||||||
|
};
|
||||||
|
for( int mu=0;mu<3;mu++ ) {
|
||||||
|
Gamma gV(GammaV[mu]);
|
||||||
|
D.ContractConservedCurrent(prop5,prop5,Vector_mu,source,Current::Vector,mu);
|
||||||
|
VV = trace(gV*Vector_mu); // (local) Vector-Vector conserved current
|
||||||
|
sliceSum(VV,sumVV,Tdir);
|
||||||
|
int Nt = sumVV.size();
|
||||||
|
for(int t=0;t<Nt;t++){
|
||||||
|
RealD Ct = real(TensorRemove(sumVV[t]))*LCscale;
|
||||||
|
RealD Cont=0;
|
||||||
|
if(t) Cont=1.0/(2 * M_PI *M_PI * t*t*t);
|
||||||
|
std::cout<<GridLogMessage <<"VVc["<<mu<<"]["<<t<<"] "<< Ct
|
||||||
|
<< " 2 pi^2 t^3 C(t) "<< Ct/Cont << " delta Ct "<< Ct-Cont <<std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
template<class Action>
|
||||||
|
void MasslessFreePropagator1(Action &D,LatticePropagator &source,LatticePropagator &propagator)
|
||||||
|
{
|
||||||
|
bool fiveD = false; //calculate 4d free propagator
|
||||||
|
RealD mass = D.Mass();
|
||||||
|
GridBase *UGrid = source.Grid();
|
||||||
|
LatticeFermion src4 (UGrid);
|
||||||
|
LatticeFermion result4 (UGrid);
|
||||||
|
for(int s=0;s<Nd;s++){
|
||||||
|
for(int c=0;c<Nc;c++){
|
||||||
|
PropToFerm<Action>(src4,source,s,c);
|
||||||
|
D.FreePropagator(src4,result4,mass,false);
|
||||||
|
FermToProp<Action>(propagator,result4,s,c);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template<class Action>
|
||||||
|
void Solve(Action &D,LatticePropagator &source,LatticePropagator &propagator)
|
||||||
|
{
|
||||||
|
GridBase *UGrid = D.GaugeGrid();
|
||||||
|
GridBase *FGrid = D.FermionGrid();
|
||||||
|
|
||||||
|
LatticeFermion src4 (UGrid);
|
||||||
|
LatticeFermion src5 (FGrid);
|
||||||
|
LatticeFermion result5(FGrid);
|
||||||
|
LatticeFermion result4(UGrid);
|
||||||
|
LatticePropagator prop5(FGrid);
|
||||||
|
|
||||||
|
ConjugateGradient<LatticeFermion> CG(1.0e-10,100000);
|
||||||
|
SchurRedBlackDiagMooeeSolve<LatticeFermion> schur(CG);
|
||||||
|
ZeroGuesser<LatticeFermion> ZG; // Could be a DeflatedGuesser if have eigenvectors
|
||||||
|
for(int s=0;s<Nd;s++){
|
||||||
|
for(int c=0;c<Nc;c++){
|
||||||
|
PropToFerm<Action>(src4,source,s,c);
|
||||||
|
|
||||||
|
D.ImportPhysicalFermionSource(src4,src5);
|
||||||
|
|
||||||
|
result5=Zero();
|
||||||
|
schur(D,src5,result5,ZG);
|
||||||
|
std::cout<<GridLogMessage
|
||||||
|
<<"spin "<<s<<" color "<<c
|
||||||
|
<<" norm2(src5d) " <<norm2(src5)
|
||||||
|
<<" norm2(result5d) "<<norm2(result5)<<std::endl;
|
||||||
|
|
||||||
|
D.ExportPhysicalFermionSolution(result5,result4);
|
||||||
|
|
||||||
|
FermToProp<Action>(prop5,result5,s,c);
|
||||||
|
FermToProp<Action>(propagator,result4,s,c);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
LatticePropagator Axial_mu(UGrid);
|
||||||
|
LatticePropagator Vector_mu(UGrid);
|
||||||
|
|
||||||
|
LatticeComplex PA (UGrid);
|
||||||
|
LatticeComplex VV (UGrid);
|
||||||
|
LatticeComplex PJ5q(UGrid);
|
||||||
|
LatticeComplex PP (UGrid);
|
||||||
|
|
||||||
|
std::vector<TComplex> sumPA;
|
||||||
|
std::vector<TComplex> sumVV;
|
||||||
|
std::vector<TComplex> sumPP;
|
||||||
|
std::vector<TComplex> sumPJ5q;
|
||||||
|
|
||||||
|
Gamma g5(Gamma::Algebra::Gamma5);
|
||||||
|
D.ContractConservedCurrent(prop5,prop5,Axial_mu,source,Current::Axial,Tdir);
|
||||||
|
PA = trace(g5*Axial_mu); // Pseudoscalar-Axial conserved current
|
||||||
|
sliceSum(PA,sumPA,Tdir);
|
||||||
|
|
||||||
|
int Nt{static_cast<int>(sumPA.size())};
|
||||||
|
|
||||||
|
for(int t=0;t<Nt;t++) std::cout<<GridLogMessage <<"PAc["<<t<<"] "<<real(TensorRemove(sumPA[t]))*LCscale<<std::endl;
|
||||||
|
|
||||||
|
PP = trace(adj(propagator)*propagator); // Pseudoscalar density
|
||||||
|
sliceSum(PP,sumPP,Tdir);
|
||||||
|
for(int t=0;t<Nt;t++) std::cout<<GridLogMessage <<"PP["<<t<<"] "<<real(TensorRemove(sumPP[t]))*LCscale<<std::endl;
|
||||||
|
|
||||||
|
D.ContractJ5q(prop5,PJ5q);
|
||||||
|
sliceSum(PJ5q,sumPJ5q,Tdir);
|
||||||
|
for(int t=0;t<Nt;t++) std::cout<<GridLogMessage <<"PJ5q["<<t<<"] "<<real(TensorRemove(sumPJ5q[t]))<<std::endl;
|
||||||
|
|
||||||
|
Gamma::Algebra GammaV[3] = {
|
||||||
|
Gamma::Algebra::GammaX,
|
||||||
|
Gamma::Algebra::GammaY,
|
||||||
|
Gamma::Algebra::GammaZ
|
||||||
|
};
|
||||||
|
for( int mu=0;mu<3;mu++ ) {
|
||||||
|
Gamma gV(GammaV[mu]);
|
||||||
|
D.ContractConservedCurrent(prop5,prop5,Vector_mu,source,Current::Vector,mu);
|
||||||
|
// auto ss=sliceSum(Vector_mu,Tdir);
|
||||||
|
// for(int t=0;t<Nt;t++) std::cout<<GridLogMessage <<"ss["<<mu<<"]["<<t<<"] "<<ss[t]<<std::endl;
|
||||||
|
VV = trace(gV*Vector_mu); // (local) Vector-Vector conserved current
|
||||||
|
sliceSum(VV,sumVV,Tdir);
|
||||||
|
for(int t=0;t<Nt;t++){
|
||||||
|
RealD Ct = real(TensorRemove(sumVV[t]))*LCscale;
|
||||||
|
RealD Cont=0;
|
||||||
|
if(t) Cont=1.0/(2 * M_PI *M_PI * t*t*t);
|
||||||
|
std::cout<<GridLogMessage <<"VVc["<<mu<<"]["<<t<<"] "<< Ct
|
||||||
|
<< " 2 pi^2 t^3 C(t) "<< Ct/Cont << " delta Ct "<< Ct-Cont <<std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
class MesonFile: Serializable {
|
||||||
|
public:
|
||||||
|
GRID_SERIALIZABLE_CLASS_MEMBERS(MesonFile, std::vector<std::vector<Complex> >, data);
|
||||||
|
};
|
||||||
|
|
||||||
|
void MesonTrace(std::string file,LatticePropagator &q1,LatticePropagator &q2,LatticeComplex &phase)
|
||||||
|
{
|
||||||
|
const int nchannel=4;
|
||||||
|
Gamma::Algebra Gammas[nchannel][2] = {
|
||||||
|
{Gamma::Algebra::GammaXGamma5,Gamma::Algebra::GammaXGamma5},
|
||||||
|
{Gamma::Algebra::GammaYGamma5,Gamma::Algebra::GammaYGamma5},
|
||||||
|
{Gamma::Algebra::GammaZGamma5,Gamma::Algebra::GammaZGamma5},
|
||||||
|
{Gamma::Algebra::Identity,Gamma::Algebra::Identity}
|
||||||
|
};
|
||||||
|
|
||||||
|
LatticeComplex meson_CF(q1.Grid());
|
||||||
|
MesonFile MF;
|
||||||
|
|
||||||
|
for(int ch=0;ch<nchannel;ch++){
|
||||||
|
|
||||||
|
Gamma Gsrc(Gammas[ch][0]);
|
||||||
|
Gamma Gsnk(Gammas[ch][1]);
|
||||||
|
|
||||||
|
meson_CF = trace(adj(q1)*Gsnk*q2*adj(Gsrc));
|
||||||
|
|
||||||
|
std::vector<TComplex> meson_T;
|
||||||
|
sliceSum(meson_CF,meson_T, Tdir);
|
||||||
|
|
||||||
|
int nt=meson_T.size();
|
||||||
|
|
||||||
|
std::vector<Complex> corr(nt);
|
||||||
|
for(int t=0;t<nt;t++){
|
||||||
|
corr[t] = TensorRemove(meson_T[t])*LLscale; // Yes this is ugly, not figured a work around
|
||||||
|
RealD Ct = real(corr[t]);
|
||||||
|
RealD Cont=0;
|
||||||
|
if(t) Cont=1.0/(2 * M_PI *M_PI * t*t*t);
|
||||||
|
std::cout << " channel "<<ch<<" t "<<t<<" " <<real(corr[t])<< " 2 pi^2 t^3 C(t) "<< 2 * M_PI *M_PI * t*t*t * Ct
|
||||||
|
<< " deltaC " <<Ct-Cont<<std::endl;
|
||||||
|
}
|
||||||
|
MF.data.push_back(corr);
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
XmlWriter WR(file);
|
||||||
|
write(WR,"MesonFile",MF);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int main (int argc, char ** argv)
|
||||||
|
{
|
||||||
|
const int Ls=10;
|
||||||
|
|
||||||
|
Grid_init(&argc,&argv);
|
||||||
|
|
||||||
|
// Double precision grids
|
||||||
|
GridCartesian * UGrid = SpaceTimeGrid::makeFourDimGrid(GridDefaultLatt(),
|
||||||
|
GridDefaultSimd(Nd,vComplex::Nsimd()),
|
||||||
|
GridDefaultMpi());
|
||||||
|
GridRedBlackCartesian * UrbGrid = SpaceTimeGrid::makeFourDimRedBlackGrid(UGrid);
|
||||||
|
GridCartesian * FGrid = SpaceTimeGrid::makeFiveDimGrid(Ls,UGrid);
|
||||||
|
GridRedBlackCartesian * FrbGrid = SpaceTimeGrid::makeFiveDimRedBlackGrid(Ls,UGrid);
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////
|
||||||
|
// You can manage seeds however you like.
|
||||||
|
// Recommend SeedUniqueString.
|
||||||
|
//////////////////////////////////////////////////////////////////////
|
||||||
|
// std::vector<int> seeds4({1,2,3,4});
|
||||||
|
// GridParallelRNG RNG4(UGrid); RNG4.SeedFixedIntegers(seeds4);
|
||||||
|
|
||||||
|
LatticeGaugeField Umu(UGrid);
|
||||||
|
std::string config;
|
||||||
|
RealD M5=atof(getenv("M5"));
|
||||||
|
RealD mq = atof(getenv("mass"));
|
||||||
|
int tadpole = atof(getenv("tadpole"));
|
||||||
|
std::vector<RealD> masses({ mq} ); // u/d, s, c ??
|
||||||
|
if( argc > 1 && argv[1][0] != '-' )
|
||||||
|
{
|
||||||
|
std::cout<<GridLogMessage <<"Loading configuration from "<<argv[1]<<std::endl;
|
||||||
|
FieldMetaData header;
|
||||||
|
NerscIO::readConfiguration(Umu, header, argv[1]);
|
||||||
|
config=argv[1];
|
||||||
|
LLscale = 1.0;
|
||||||
|
LCscale = 1.0;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
SU<Nc>::ColdConfiguration(Umu);
|
||||||
|
config="ColdConfig";
|
||||||
|
// RealD P=1.0; // Don't scale
|
||||||
|
// RealD P=0.6388238 // 32Ifine
|
||||||
|
// RealD P=0.6153342; // 64I
|
||||||
|
RealD P=0.5871119; // 48I
|
||||||
|
RealD u0 = sqrt(sqrt(P));
|
||||||
|
RealD w0 = 1 - M5;
|
||||||
|
std::cout<<GridLogMessage <<"For plaquette P="<<P<<" u0= "<<u0<<std::endl;
|
||||||
|
if ( tadpole == 1 ) {
|
||||||
|
Umu = Umu * u0;
|
||||||
|
// LLscale = 1.0/(1-w0*w0)/(1-w0*w0)/u0/u0;
|
||||||
|
// LCscale = 1.0/(1-w0*w0)/(1-w0*w0)/u0/u0;
|
||||||
|
LLscale = 1.0;
|
||||||
|
LCscale = 1.0;
|
||||||
|
std::cout<<GridLogMessage <<"Gauge links are u= u0 "<<std::endl;
|
||||||
|
std::cout<<GridLogMessage <<"M5 = "<<M5<<std::endl;
|
||||||
|
} else if ( tadpole == 2) {
|
||||||
|
std::cout<<GridLogMessage <<"Gauge links are u=1 "<<std::endl;
|
||||||
|
LLscale = 1.0;
|
||||||
|
LCscale = 1.0;
|
||||||
|
std::cout<<GridLogMessage <<"M5 = "<<M5<<std::endl;
|
||||||
|
} else {
|
||||||
|
LLscale = 1.0/u0/u0;
|
||||||
|
LCscale = 1.0/u0/u0;
|
||||||
|
M5 = M5 - 4.0 * (1-u0);
|
||||||
|
std::cout<<GridLogMessage <<"Gauge links are u=1 "<<std::endl;
|
||||||
|
std::cout<<GridLogMessage <<"M5mf = "<<M5<<std::endl;
|
||||||
|
}
|
||||||
|
std::cout<<GridLogMessage <<"mq = "<<mq<<std::endl;
|
||||||
|
std::cout<<GridLogMessage <<"LLscale = "<<LLscale<<std::endl;
|
||||||
|
std::cout<<GridLogMessage <<"LCscale = "<<LCscale<<std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
int nmass = masses.size();
|
||||||
|
|
||||||
|
typedef DomainWallFermionR FermionActionR;
|
||||||
|
// typedef MobiusFermionR FermionActionR;
|
||||||
|
std::vector<FermionActionR *> FermActs;
|
||||||
|
std::vector<DomainWallFermionR *> DWFActs;
|
||||||
|
|
||||||
|
std::cout<<GridLogMessage <<"======================"<<std::endl;
|
||||||
|
std::cout<<GridLogMessage <<"DomainWallFermion action"<<std::endl;
|
||||||
|
std::cout<<GridLogMessage <<"======================"<<std::endl;
|
||||||
|
|
||||||
|
for(auto mass: masses) {
|
||||||
|
std::vector<Complex> boundary = {1,1,1,-1};
|
||||||
|
FermionActionR::ImplParams Params(boundary);
|
||||||
|
RealD b=1.5;
|
||||||
|
RealD c=0.5;
|
||||||
|
std::cout<<GridLogMessage <<"Making DomainWallFermion action"<<std::endl;
|
||||||
|
// DWFActs.push_back(new DomainWallFermionR(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5));
|
||||||
|
FermActs.push_back(new FermionActionR(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass,M5,Params));
|
||||||
|
// FermActs.push_back(new FermionActionR(Umu,*FGrid,*FrbGrid,*UGrid,*UrbGrid,mass+0.001,M5,b,c));
|
||||||
|
std::cout<<GridLogMessage <<"Made DomainWallFermion action"<<std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
LatticePropagator point_source(UGrid);
|
||||||
|
|
||||||
|
Coordinate Origin({0,0,0,0});
|
||||||
|
PointSource (Origin,point_source);
|
||||||
|
|
||||||
|
std::vector<LatticePropagator> PointProps(nmass,UGrid);
|
||||||
|
// std::vector<LatticePropagator> FreeProps(nmass,UGrid);
|
||||||
|
// LatticePropagator delta(UGrid);
|
||||||
|
|
||||||
|
for(int m=0;m<nmass;m++) {
|
||||||
|
Solve(*FermActs[m],point_source ,PointProps[m]);
|
||||||
|
// MasslessFreePropagator(*FermActs[m],point_source ,FreeProps[m]);
|
||||||
|
|
||||||
|
// delta = PointProps[m] - FreeProps[m];
|
||||||
|
// std::cout << " delta "<<norm2(delta) << " FFT "<<norm2(FreeProps[m])<< " CG " <<norm2(PointProps[m])<<std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
LatticeComplex phase(UGrid);
|
||||||
|
Coordinate mom({0,0,0,0});
|
||||||
|
MakePhase(mom,phase);
|
||||||
|
|
||||||
|
for(int m1=0 ;m1<nmass;m1++) {
|
||||||
|
for(int m2=m1;m2<nmass;m2++) {
|
||||||
|
std::stringstream ssp,ssg,ssz;
|
||||||
|
|
||||||
|
ssp<<config<< "_m" << m1 << "_m"<< m2 << "_point_meson.xml";
|
||||||
|
ssz<<config<< "_m" << m1 << "_m"<< m2 << "_free_meson.xml";
|
||||||
|
|
||||||
|
std::cout << "CG determined VV correlation function"<<std::endl;
|
||||||
|
MesonTrace(ssp.str(),PointProps[m1],PointProps[m2],phase);
|
||||||
|
|
||||||
|
// std::cout << "FFT derived VV correlation function"<<std::endl;
|
||||||
|
// MesonTrace(ssz.str(),FreeProps[m1],FreeProps[m2],phase);
|
||||||
|
}}
|
||||||
|
|
||||||
|
Grid_finalize();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -95,26 +95,34 @@ int main (int argc, char ** argv)
|
|||||||
std::cout << GridLogMessage << "::::::::::::: Starting mixed CG" << std::endl;
|
std::cout << GridLogMessage << "::::::::::::: Starting mixed CG" << std::endl;
|
||||||
MixedPrecisionConjugateGradient<LatticeFermionD,LatticeFermionF> mCG(1.0e-8, 10000, 50, FrbGrid_f, HermOpEO_f, HermOpEO);
|
MixedPrecisionConjugateGradient<LatticeFermionD,LatticeFermionF> mCG(1.0e-8, 10000, 50, FrbGrid_f, HermOpEO_f, HermOpEO);
|
||||||
double t1,t2,flops;
|
double t1,t2,flops;
|
||||||
|
double MdagMsiteflops = 1452; // Mobius (real coeffs)
|
||||||
|
// CG overhead: 8 inner product, 4+8 axpy_norm, 4+4 linear comb (2 of)
|
||||||
|
double CGsiteflops = (8+4+8+4+4)*Nc*Ns ;
|
||||||
|
std:: cout << " MdagM site flops = "<< 4*MdagMsiteflops<<std::endl;
|
||||||
|
std:: cout << " CG site flops = "<< CGsiteflops <<std::endl;
|
||||||
int iters;
|
int iters;
|
||||||
for(int i=0;i<100;i++){
|
for(int i=0;i<200;i++){
|
||||||
result_o = Zero();
|
result_o = Zero();
|
||||||
t1=usecond();
|
t1=usecond();
|
||||||
mCG(src_o,result_o);
|
mCG(src_o,result_o);
|
||||||
t2=usecond();
|
t2=usecond();
|
||||||
iters = mCG.TotalInnerIterations; //Number of inner CG iterations
|
iters = mCG.TotalInnerIterations; //Number of inner CG iterations
|
||||||
flops = 1320.0*2*FGrid->gSites()*iters;
|
flops = MdagMsiteflops*4*FrbGrid->gSites()*iters;
|
||||||
|
flops+= CGsiteflops*FrbGrid->gSites()*iters;
|
||||||
std::cout << " SinglePrecision iterations/sec "<< iters/(t2-t1)*1000.*1000.<<std::endl;
|
std::cout << " SinglePrecision iterations/sec "<< iters/(t2-t1)*1000.*1000.<<std::endl;
|
||||||
std::cout << " SinglePrecision GF/s "<< flops/(t2-t1)/1000.<<std::endl;
|
std::cout << " SinglePrecision GF/s "<< flops/(t2-t1)/1000.<<std::endl;
|
||||||
}
|
}
|
||||||
std::cout << GridLogMessage << "::::::::::::: Starting regular CG" << std::endl;
|
std::cout << GridLogMessage << "::::::::::::: Starting regular CG" << std::endl;
|
||||||
ConjugateGradient<LatticeFermionD> CG(1.0e-8,10000);
|
ConjugateGradient<LatticeFermionD> CG(1.0e-8,10000);
|
||||||
for(int i=0;i<100;i++){
|
for(int i=0;i<1;i++){
|
||||||
result_o_2 = Zero();
|
result_o_2 = Zero();
|
||||||
t1=usecond();
|
t1=usecond();
|
||||||
CG(HermOpEO,src_o,result_o_2);
|
CG(HermOpEO,src_o,result_o_2);
|
||||||
t2=usecond();
|
t2=usecond();
|
||||||
iters = CG.IterationsToComplete;
|
iters = CG.IterationsToComplete;
|
||||||
flops = 1320.0*2*FGrid->gSites()*iters;
|
flops = MdagMsiteflops*4*FrbGrid->gSites()*iters;
|
||||||
|
flops+= CGsiteflops*FrbGrid->gSites()*iters;
|
||||||
|
|
||||||
std::cout << " DoublePrecision iterations/sec "<< iters/(t2-t1)*1000.*1000.<<std::endl;
|
std::cout << " DoublePrecision iterations/sec "<< iters/(t2-t1)*1000.*1000.<<std::endl;
|
||||||
std::cout << " DoublePrecision GF/s "<< flops/(t2-t1)/1000.<<std::endl;
|
std::cout << " DoublePrecision GF/s "<< flops/(t2-t1)/1000.<<std::endl;
|
||||||
}
|
}
|
||||||
|
183
tests/Test_gfield_shift.cc
Normal file
183
tests/Test_gfield_shift.cc
Normal file
@ -0,0 +1,183 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./tests/Test_gfield_shift.cc
|
||||||
|
|
||||||
|
Copyright (C) 2015
|
||||||
|
|
||||||
|
Author: Christopher Kelly <ckelly@bnl.gov>
|
||||||
|
Author: Azusa Yamaguchi <ayamaguc@staffmail.ed.ac.uk>
|
||||||
|
Author: Peter Boyle <paboyle@ph.ed.ac.uk>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
|
||||||
|
//Test the shifting of the gauge field that respects the boundary conditions
|
||||||
|
#include <Grid/Grid.h>
|
||||||
|
|
||||||
|
using namespace Grid;
|
||||||
|
;
|
||||||
|
|
||||||
|
typedef ConjugateGimplR Gimpl; //can choose periodic / charge conjugate directions at wil
|
||||||
|
typedef Gimpl::GaugeField GaugeField;
|
||||||
|
typedef Gimpl::GaugeLinkField GaugeLinkField;
|
||||||
|
typedef Gimpl::SiteGaugeField SiteGaugeField;
|
||||||
|
typedef Gimpl::SiteGaugeLink SiteGaugeLink;
|
||||||
|
|
||||||
|
GaugeField CshiftGaugeField(const GaugeField &U, const int dir, const int shift){
|
||||||
|
GridBase *Grid = U.Grid();
|
||||||
|
|
||||||
|
GaugeField out(Grid);
|
||||||
|
GaugeLinkField Umu(Grid);
|
||||||
|
for(int mu=0;mu<Grid->Nd();mu++){
|
||||||
|
Umu = PeekIndex<LorentzIndex>(U, mu);
|
||||||
|
Umu = Gimpl::CshiftLink(Umu,dir,shift);
|
||||||
|
PokeIndex<LorentzIndex>(out,Umu,mu);
|
||||||
|
}
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int main (int argc, char ** argv)
|
||||||
|
{
|
||||||
|
Grid_init(&argc,&argv);
|
||||||
|
|
||||||
|
auto latt_size = GridDefaultLatt();
|
||||||
|
auto simd_layout = GridDefaultSimd(4,vComplex::Nsimd());
|
||||||
|
auto mpi_layout = GridDefaultMpi();
|
||||||
|
|
||||||
|
std::vector<int> conj_dirs = {1,1,0,0};
|
||||||
|
Gimpl::setDirections(conj_dirs);
|
||||||
|
|
||||||
|
GridCartesian Fine(latt_size,simd_layout,mpi_layout);
|
||||||
|
|
||||||
|
GridParallelRNG FineRNG(&Fine); FineRNG.SeedFixedIntegers(std::vector<int>({45,12,81,9}));
|
||||||
|
|
||||||
|
|
||||||
|
GaugeField U(&Fine);
|
||||||
|
GaugeField ShiftU(&Fine);
|
||||||
|
|
||||||
|
GaugeLinkField link_field(&Fine), link_field_2(&Fine);
|
||||||
|
|
||||||
|
//Like Test_cshift we put the lex coordinate index on each site but make it imaginary
|
||||||
|
//so we can tell when it was complex conjugated
|
||||||
|
LatticeComplex lex(&Fine);
|
||||||
|
lex=Zero();
|
||||||
|
U = Zero();
|
||||||
|
{
|
||||||
|
LatticeComplex coor(&Fine);
|
||||||
|
Integer stride =1;
|
||||||
|
for(int d=0;d<4;d++){
|
||||||
|
LatticeCoordinate(coor,d);
|
||||||
|
lex = lex + coor*stride;
|
||||||
|
stride=stride*latt_size[d];
|
||||||
|
}
|
||||||
|
PokeIndex<ColourIndex>(link_field, lex, 0,0); //place on 0,0 element of link
|
||||||
|
|
||||||
|
for(int mu=0;mu<Nd;mu++){
|
||||||
|
link_field_2 = link_field + mu*stride; //add in lex-mapping of mu
|
||||||
|
link_field_2 = ComplexD(0,1) * link_field_2; //make imaginary
|
||||||
|
PokeIndex<LorentzIndex>(U, link_field_2, mu);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::stringstream ss;
|
||||||
|
ss<<"error";
|
||||||
|
for(int d=0;d<Fine._ndimension;d++){
|
||||||
|
ss<<"."<<Fine._processor_coor[d];
|
||||||
|
}
|
||||||
|
ss<<"_wr_"<<Fine._processor;
|
||||||
|
std::string fname(ss.str());
|
||||||
|
std::ofstream ferr(fname);
|
||||||
|
|
||||||
|
Integer vol4d = latt_size[0]*latt_size[1]*latt_size[2]*latt_size[3];
|
||||||
|
|
||||||
|
bool fail = false;
|
||||||
|
typename SiteGaugeField::scalar_object um;
|
||||||
|
TComplex cm;
|
||||||
|
|
||||||
|
for(int dir=0;dir<4;dir++){
|
||||||
|
for(int shift=-latt_size[dir]+1;shift<latt_size[dir];shift++){
|
||||||
|
if ( Fine.IsBoss() )
|
||||||
|
std::cout<<GridLogMessage<<"Shifting by "<<shift<<" in direction "<<dir
|
||||||
|
<< " dir is conj ? " << conj_dirs[dir] << std::endl;
|
||||||
|
|
||||||
|
ShiftU = CshiftGaugeField(U,dir,shift);
|
||||||
|
|
||||||
|
Coordinate coor(4);
|
||||||
|
|
||||||
|
for(coor[3]=0;coor[3]<latt_size[3];coor[3]++){
|
||||||
|
for(coor[2]=0;coor[2]<latt_size[2];coor[2]++){
|
||||||
|
for(coor[1]=0;coor[1]<latt_size[1];coor[1]++){
|
||||||
|
for(coor[0]=0;coor[0]<latt_size[0];coor[0]++){
|
||||||
|
peekSite(um,ShiftU,coor);
|
||||||
|
|
||||||
|
Coordinate scoor(coor);
|
||||||
|
scoor[dir] = (scoor[dir]+shift + latt_size[dir])%latt_size[dir];
|
||||||
|
|
||||||
|
Integer slex = scoor[0]
|
||||||
|
+ latt_size[0]*scoor[1]
|
||||||
|
+ latt_size[0]*latt_size[1]*scoor[2]
|
||||||
|
+ latt_size[0]*latt_size[1]*latt_size[2]*scoor[3];
|
||||||
|
|
||||||
|
for(int mu = 0 ; mu < 4; mu++){
|
||||||
|
Integer slex_mu = slex + vol4d*mu;
|
||||||
|
Complex scm(0,slex_mu); //imaginary
|
||||||
|
if(
|
||||||
|
( shift > 0 && coor[dir] >= latt_size[dir]-shift && conj_dirs[dir] )
|
||||||
|
||
|
||||||
|
( shift < 0 && coor[dir] <= -shift-1 && conj_dirs[dir] )
|
||||||
|
)
|
||||||
|
scm = conjugate(scm); //CC if pulled over boundary
|
||||||
|
|
||||||
|
cm = um(mu)()(0,0);
|
||||||
|
|
||||||
|
RealD nrm = abs(scm-cm()()());
|
||||||
|
//std::cout << cm << " " << scm << std::endl;
|
||||||
|
|
||||||
|
Coordinate peer(4);
|
||||||
|
Complex tmp =cm;
|
||||||
|
Integer index=real(tmp);
|
||||||
|
|
||||||
|
Integer cm_mu = index / vol4d;
|
||||||
|
index = index % vol4d;
|
||||||
|
Lexicographic::CoorFromIndex(peer,index,latt_size);
|
||||||
|
|
||||||
|
if (nrm > 0){
|
||||||
|
ferr<<"FAIL mu " << mu << " shift "<< shift<<" in dir "<< dir<<" ["<<coor[0]<<","<<coor[1]<<","<<coor[2]<<","<<coor[3]<<"] = "<< cm()()()<<" expect "<<scm<<" "<<nrm<<std::endl;
|
||||||
|
ferr<<"Got mu "<< cm_mu << " site " <<index<<" : " << peer[0]<<","<<peer[1]<<","<<peer[2]<<","<<peer[3]<<std::endl;
|
||||||
|
|
||||||
|
index=real(scm);
|
||||||
|
Integer scm_mu = index / vol4d;
|
||||||
|
index = index % vol4d;
|
||||||
|
Lexicographic::CoorFromIndex(peer,index,latt_size);
|
||||||
|
ferr<<"Expect mu " << scm_mu << " site " <<index<<": " << peer[0]<<","<<peer[1]<<","<<peer[2]<<","<<peer[3]<<std::endl;
|
||||||
|
fail = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}}}}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if(fail) std::cout << "Test FAILED : see " << fname << " for more details" << std::endl;
|
||||||
|
else std::cout << "Test Passed" << std::endl;
|
||||||
|
|
||||||
|
Grid_finalize();
|
||||||
|
}
|
153
tests/smearing/Test_WilsonFlow_adaptive.cc
Normal file
153
tests/smearing/Test_WilsonFlow_adaptive.cc
Normal file
@ -0,0 +1,153 @@
|
|||||||
|
/*************************************************************************************
|
||||||
|
|
||||||
|
Grid physics library, www.github.com/paboyle/Grid
|
||||||
|
|
||||||
|
Source file: ./tests/hmc/Test_WilsonFlow_adaptive.cc
|
||||||
|
|
||||||
|
Copyright (C) 2017
|
||||||
|
|
||||||
|
Author: Christopher Kelly <ckelly@bnl.gov>
|
||||||
|
|
||||||
|
This program is free software; you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU General Public License as published by
|
||||||
|
the Free Software Foundation; either version 2 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU General Public License along
|
||||||
|
with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
|
||||||
|
See the full license in the file "LICENSE" in the top level distribution
|
||||||
|
directory
|
||||||
|
*************************************************************************************/
|
||||||
|
/* END LEGAL */
|
||||||
|
#include <Grid/Grid.h>
|
||||||
|
|
||||||
|
using namespace Grid;
|
||||||
|
|
||||||
|
//Linearly interpolate between two nearest times
|
||||||
|
RealD interpolate(const RealD t_int, const std::vector<std::pair<RealD,RealD> > &data){
|
||||||
|
RealD tdiff1=1e32; int t1_idx=-1;
|
||||||
|
RealD tdiff2=1e32; int t2_idx=-1;
|
||||||
|
|
||||||
|
for(int i=0;i<data.size();i++){
|
||||||
|
RealD diff = fabs(data[i].first-t_int);
|
||||||
|
//std::cout << "targ " << t_int << " cur " << data[i].first << " diff " << diff << " best diff1 " << tdiff1 << " diff2 " << tdiff2 << std::endl;
|
||||||
|
|
||||||
|
if(diff < tdiff1){
|
||||||
|
if(tdiff1 < tdiff2){ //swap out tdiff2
|
||||||
|
tdiff2 = tdiff1; t2_idx = t1_idx;
|
||||||
|
}
|
||||||
|
tdiff1 = diff; t1_idx = i;
|
||||||
|
}
|
||||||
|
else if(diff < tdiff2){ tdiff2 = diff; t2_idx = i; }
|
||||||
|
}
|
||||||
|
assert(t1_idx != -1 && t2_idx != -1);
|
||||||
|
|
||||||
|
RealD t2 = data[t2_idx].first, v2 = data[t2_idx].second;
|
||||||
|
RealD t1 = data[t1_idx].first, v1 = data[t1_idx].second;
|
||||||
|
|
||||||
|
//v = a + bt
|
||||||
|
//v2-v1 = b(t2-t1)
|
||||||
|
RealD b = (v2-v1)/(t2-t1);
|
||||||
|
RealD a = v1 - b*t1;
|
||||||
|
RealD vout = a + b*t_int;
|
||||||
|
|
||||||
|
//std::cout << "Interpolate to " << t_int << " two closest points " << t1 << " " << t2
|
||||||
|
//<< " with values " << v1 << " "<< v2 << " : got " << vout << std::endl;
|
||||||
|
return vout;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int main(int argc, char **argv) {
|
||||||
|
Grid_init(&argc, &argv);
|
||||||
|
GridLogLayout();
|
||||||
|
|
||||||
|
auto latt_size = GridDefaultLatt();
|
||||||
|
auto simd_layout = GridDefaultSimd(Nd, vComplex::Nsimd());
|
||||||
|
auto mpi_layout = GridDefaultMpi();
|
||||||
|
GridCartesian Grid(latt_size, simd_layout, mpi_layout);
|
||||||
|
GridRedBlackCartesian RBGrid(&Grid);
|
||||||
|
|
||||||
|
std::vector<int> seeds({1, 2, 3, 4, 5});
|
||||||
|
GridSerialRNG sRNG;
|
||||||
|
GridParallelRNG pRNG(&Grid);
|
||||||
|
pRNG.SeedFixedIntegers(seeds);
|
||||||
|
|
||||||
|
LatticeGaugeField U(&Grid);
|
||||||
|
SU<Nc>::HotConfiguration(pRNG, U);
|
||||||
|
|
||||||
|
int Nstep = 300;
|
||||||
|
RealD epsilon = 0.01;
|
||||||
|
RealD maxTau = Nstep*epsilon;
|
||||||
|
RealD tolerance = 1e-4;
|
||||||
|
|
||||||
|
for(int i=1;i<argc;i++){
|
||||||
|
std::string sarg(argv[i]);
|
||||||
|
if(sarg == "--tolerance"){
|
||||||
|
std::stringstream ss; ss << argv[i+1]; ss >> tolerance;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
std::cout << "Adaptive smear tolerance " << tolerance << std::endl;
|
||||||
|
|
||||||
|
//Setup iterative Wilson flow
|
||||||
|
WilsonFlow<PeriodicGimplD> wflow(epsilon,Nstep);
|
||||||
|
wflow.resetActions();
|
||||||
|
|
||||||
|
std::vector<std::pair<RealD, RealD> > meas_orig;
|
||||||
|
|
||||||
|
wflow.addMeasurement(1, [&wflow,&meas_orig](int step, RealD t, const LatticeGaugeField &U){
|
||||||
|
std::cout << GridLogMessage << "[WilsonFlow] Computing Cloverleaf energy density for step " << step << std::endl;
|
||||||
|
meas_orig.push_back( {t, wflow.energyDensityCloverleaf(t,U)} );
|
||||||
|
});
|
||||||
|
|
||||||
|
//Setup adaptive Wilson flow
|
||||||
|
WilsonFlowAdaptive<PeriodicGimplD> wflow_ad(epsilon,maxTau,tolerance);
|
||||||
|
wflow_ad.resetActions();
|
||||||
|
|
||||||
|
std::vector<std::pair<RealD, RealD> > meas_adaptive;
|
||||||
|
|
||||||
|
wflow_ad.addMeasurement(1, [&wflow_ad,&meas_adaptive](int step, RealD t, const LatticeGaugeField &U){
|
||||||
|
std::cout << GridLogMessage << "[WilsonFlow] Computing Cloverleaf energy density for step " << step << std::endl;
|
||||||
|
meas_adaptive.push_back( {t, wflow_ad.energyDensityCloverleaf(t,U)} );
|
||||||
|
});
|
||||||
|
|
||||||
|
//Run
|
||||||
|
LatticeGaugeFieldD Vtmp(U.Grid());
|
||||||
|
wflow.smear(Vtmp, U); //basic smear
|
||||||
|
|
||||||
|
Vtmp = Zero();
|
||||||
|
wflow_ad.smear(Vtmp, U);
|
||||||
|
|
||||||
|
//Output values for plotting
|
||||||
|
{
|
||||||
|
std::ofstream out("wflow_t2E_orig.dat");
|
||||||
|
out.precision(16);
|
||||||
|
for(auto const &e: meas_orig){
|
||||||
|
out << e.first << " " << e.second << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
{
|
||||||
|
std::ofstream out("wflow_t2E_adaptive.dat");
|
||||||
|
out.precision(16);
|
||||||
|
for(auto const &e: meas_adaptive){
|
||||||
|
out << e.first << " " << e.second << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
//Compare at times available with adaptive smearing
|
||||||
|
for(int i=0;i<meas_adaptive.size();i++){
|
||||||
|
RealD t = meas_adaptive[i].first;
|
||||||
|
RealD v_adaptive = meas_adaptive[i].second;
|
||||||
|
RealD v_orig = interpolate(t, meas_orig); //should be very precise due to fine timestep
|
||||||
|
std::cout << t << " orig: " << v_orig << " adaptive: " << v_adaptive << " reldiff: " << (v_adaptive-v_orig)/v_orig << std::endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::cout << GridLogMessage << "Done" << std::endl;
|
||||||
|
Grid_finalize();
|
||||||
|
}
|
Reference in New Issue
Block a user